diff options
88 files changed, 6026 insertions, 1626 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 0007fef4ed81..b112efc816f1 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -612,11 +612,14 @@ Type: vm ioctl | |||
612 | Parameters: none | 612 | Parameters: none |
613 | Returns: 0 on success, -1 on error | 613 | Returns: 0 on success, -1 on error |
614 | 614 | ||
615 | Creates an interrupt controller model in the kernel. On x86, creates a virtual | 615 | Creates an interrupt controller model in the kernel. |
616 | ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a | 616 | On x86, creates a virtual ioapic, a virtual PIC (two PICs, nested), and sets up |
617 | local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23 | 617 | future vcpus to have a local APIC. IRQ routing for GSIs 0-15 is set to both |
618 | only go to the IOAPIC. On ARM/arm64, a GIC is | 618 | PIC and IOAPIC; GSI 16-23 only go to the IOAPIC. |
619 | created. On s390, a dummy irq routing table is created. | 619 | On ARM/arm64, a GICv2 is created. Any other GIC versions require the usage of |
620 | KVM_CREATE_DEVICE, which also supports creating a GICv2. Using | ||
621 | KVM_CREATE_DEVICE is preferred over KVM_CREATE_IRQCHIP for GICv2. | ||
622 | On s390, a dummy irq routing table is created. | ||
620 | 623 | ||
621 | Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled | 624 | Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled |
622 | before KVM_CREATE_IRQCHIP can be used. | 625 | before KVM_CREATE_IRQCHIP can be used. |
@@ -2312,7 +2315,7 @@ struct kvm_s390_interrupt { | |||
2312 | 2315 | ||
2313 | type can be one of the following: | 2316 | type can be one of the following: |
2314 | 2317 | ||
2315 | KVM_S390_SIGP_STOP (vcpu) - sigp restart | 2318 | KVM_S390_SIGP_STOP (vcpu) - sigp stop; optional flags in parm |
2316 | KVM_S390_PROGRAM_INT (vcpu) - program check; code in parm | 2319 | KVM_S390_PROGRAM_INT (vcpu) - program check; code in parm |
2317 | KVM_S390_SIGP_SET_PREFIX (vcpu) - sigp set prefix; prefix address in parm | 2320 | KVM_S390_SIGP_SET_PREFIX (vcpu) - sigp set prefix; prefix address in parm |
2318 | KVM_S390_RESTART (vcpu) - restart | 2321 | KVM_S390_RESTART (vcpu) - restart |
@@ -3225,3 +3228,23 @@ userspace from doing that. | |||
3225 | If the hcall number specified is not one that has an in-kernel | 3228 | If the hcall number specified is not one that has an in-kernel |
3226 | implementation, the KVM_ENABLE_CAP ioctl will fail with an EINVAL | 3229 | implementation, the KVM_ENABLE_CAP ioctl will fail with an EINVAL |
3227 | error. | 3230 | error. |
3231 | |||
3232 | 7.2 KVM_CAP_S390_USER_SIGP | ||
3233 | |||
3234 | Architectures: s390 | ||
3235 | Parameters: none | ||
3236 | |||
3237 | This capability controls which SIGP orders will be handled completely in user | ||
3238 | space. With this capability enabled, all fast orders will be handled completely | ||
3239 | in the kernel: | ||
3240 | - SENSE | ||
3241 | - SENSE RUNNING | ||
3242 | - EXTERNAL CALL | ||
3243 | - EMERGENCY SIGNAL | ||
3244 | - CONDITIONAL EMERGENCY SIGNAL | ||
3245 | |||
3246 | All other orders will be handled completely in user space. | ||
3247 | |||
3248 | Only privileged operation exceptions will be checked for in the kernel (or even | ||
3249 | in the hardware prior to interception). If this capability is not enabled, the | ||
3250 | old way of handling SIGP orders is used (partially in kernel and user space). | ||
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index df8b0c7540b6..3fb905429e8a 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt | |||
@@ -3,22 +3,42 @@ ARM Virtual Generic Interrupt Controller (VGIC) | |||
3 | 3 | ||
4 | Device types supported: | 4 | Device types supported: |
5 | KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0 | 5 | KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0 |
6 | KVM_DEV_TYPE_ARM_VGIC_V3 ARM Generic Interrupt Controller v3.0 | ||
6 | 7 | ||
7 | Only one VGIC instance may be instantiated through either this API or the | 8 | Only one VGIC instance may be instantiated through either this API or the |
8 | legacy KVM_CREATE_IRQCHIP api. The created VGIC will act as the VM interrupt | 9 | legacy KVM_CREATE_IRQCHIP api. The created VGIC will act as the VM interrupt |
9 | controller, requiring emulated user-space devices to inject interrupts to the | 10 | controller, requiring emulated user-space devices to inject interrupts to the |
10 | VGIC instead of directly to CPUs. | 11 | VGIC instead of directly to CPUs. |
11 | 12 | ||
13 | Creating a guest GICv3 device requires a host GICv3 as well. | ||
14 | GICv3 implementations with hardware compatibility support allow a guest GICv2 | ||
15 | as well. | ||
16 | |||
12 | Groups: | 17 | Groups: |
13 | KVM_DEV_ARM_VGIC_GRP_ADDR | 18 | KVM_DEV_ARM_VGIC_GRP_ADDR |
14 | Attributes: | 19 | Attributes: |
15 | KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit) | 20 | KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit) |
16 | Base address in the guest physical address space of the GIC distributor | 21 | Base address in the guest physical address space of the GIC distributor |
17 | register mappings. | 22 | register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2. |
23 | This address needs to be 4K aligned and the region covers 4 KByte. | ||
18 | 24 | ||
19 | KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit) | 25 | KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit) |
20 | Base address in the guest physical address space of the GIC virtual cpu | 26 | Base address in the guest physical address space of the GIC virtual cpu |
21 | interface register mappings. | 27 | interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2. |
28 | This address needs to be 4K aligned and the region covers 4 KByte. | ||
29 | |||
30 | KVM_VGIC_V3_ADDR_TYPE_DIST (rw, 64-bit) | ||
31 | Base address in the guest physical address space of the GICv3 distributor | ||
32 | register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3. | ||
33 | This address needs to be 64K aligned and the region covers 64 KByte. | ||
34 | |||
35 | KVM_VGIC_V3_ADDR_TYPE_REDIST (rw, 64-bit) | ||
36 | Base address in the guest physical address space of the GICv3 | ||
37 | redistributor register mappings. There are two 64K pages for each | ||
38 | VCPU and all of the redistributor pages are contiguous. | ||
39 | Only valid for KVM_DEV_TYPE_ARM_VGIC_V3. | ||
40 | This address needs to be 64K aligned. | ||
41 | |||
22 | 42 | ||
23 | KVM_DEV_ARM_VGIC_GRP_DIST_REGS | 43 | KVM_DEV_ARM_VGIC_GRP_DIST_REGS |
24 | Attributes: | 44 | Attributes: |
@@ -36,6 +56,7 @@ Groups: | |||
36 | the register. | 56 | the register. |
37 | Limitations: | 57 | Limitations: |
38 | - Priorities are not implemented, and registers are RAZ/WI | 58 | - Priorities are not implemented, and registers are RAZ/WI |
59 | - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2. | ||
39 | Errors: | 60 | Errors: |
40 | -ENODEV: Getting or setting this register is not yet supported | 61 | -ENODEV: Getting or setting this register is not yet supported |
41 | -EBUSY: One or more VCPUs are running | 62 | -EBUSY: One or more VCPUs are running |
@@ -68,6 +89,7 @@ Groups: | |||
68 | 89 | ||
69 | Limitations: | 90 | Limitations: |
70 | - Priorities are not implemented, and registers are RAZ/WI | 91 | - Priorities are not implemented, and registers are RAZ/WI |
92 | - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2. | ||
71 | Errors: | 93 | Errors: |
72 | -ENODEV: Getting or setting this register is not yet supported | 94 | -ENODEV: Getting or setting this register is not yet supported |
73 | -EBUSY: One or more VCPUs are running | 95 | -EBUSY: One or more VCPUs are running |
@@ -81,3 +103,14 @@ Groups: | |||
81 | -EINVAL: Value set is out of the expected range | 103 | -EINVAL: Value set is out of the expected range |
82 | -EBUSY: Value has already be set, or GIC has already been initialized | 104 | -EBUSY: Value has already be set, or GIC has already been initialized |
83 | with default values. | 105 | with default values. |
106 | |||
107 | KVM_DEV_ARM_VGIC_GRP_CTRL | ||
108 | Attributes: | ||
109 | KVM_DEV_ARM_VGIC_CTRL_INIT | ||
110 | request the initialization of the VGIC, no additional parameter in | ||
111 | kvm_device_attr.addr. | ||
112 | Errors: | ||
113 | -ENXIO: VGIC not properly configured as required prior to calling | ||
114 | this attribute | ||
115 | -ENODEV: no online VCPU | ||
116 | -ENOMEM: memory shortage when allocating vgic internal data | ||
diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index d426fc87fe93..5542c4641a3c 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt | |||
@@ -24,3 +24,62 @@ Returns: 0 | |||
24 | 24 | ||
25 | Clear the CMMA status for all guest pages, so any pages the guest marked | 25 | Clear the CMMA status for all guest pages, so any pages the guest marked |
26 | as unused are again used any may not be reclaimed by the host. | 26 | as unused are again used any may not be reclaimed by the host. |
27 | |||
28 | 1.3. ATTRIBUTE KVM_S390_VM_MEM_LIMIT_SIZE | ||
29 | Parameters: in attr->addr the address for the new limit of guest memory | ||
30 | Returns: -EFAULT if the given address is not accessible | ||
31 | -EINVAL if the virtual machine is of type UCONTROL | ||
32 | -E2BIG if the given guest memory is to big for that machine | ||
33 | -EBUSY if a vcpu is already defined | ||
34 | -ENOMEM if not enough memory is available for a new shadow guest mapping | ||
35 | 0 otherwise | ||
36 | |||
37 | Allows userspace to query the actual limit and set a new limit for | ||
38 | the maximum guest memory size. The limit will be rounded up to | ||
39 | 2048 MB, 4096 GB, 8192 TB respectively, as this limit is governed by | ||
40 | the number of page table levels. | ||
41 | |||
42 | 2. GROUP: KVM_S390_VM_CPU_MODEL | ||
43 | Architectures: s390 | ||
44 | |||
45 | 2.1. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE (r/o) | ||
46 | |||
47 | Allows user space to retrieve machine and kvm specific cpu related information: | ||
48 | |||
49 | struct kvm_s390_vm_cpu_machine { | ||
50 | __u64 cpuid; # CPUID of host | ||
51 | __u32 ibc; # IBC level range offered by host | ||
52 | __u8 pad[4]; | ||
53 | __u64 fac_mask[256]; # set of cpu facilities enabled by KVM | ||
54 | __u64 fac_list[256]; # set of cpu facilities offered by host | ||
55 | } | ||
56 | |||
57 | Parameters: address of buffer to store the machine related cpu data | ||
58 | of type struct kvm_s390_vm_cpu_machine* | ||
59 | Returns: -EFAULT if the given address is not accessible from kernel space | ||
60 | -ENOMEM if not enough memory is available to process the ioctl | ||
61 | 0 in case of success | ||
62 | |||
63 | 2.2. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR (r/w) | ||
64 | |||
65 | Allows user space to retrieve or request to change cpu related information for a vcpu: | ||
66 | |||
67 | struct kvm_s390_vm_cpu_processor { | ||
68 | __u64 cpuid; # CPUID currently (to be) used by this vcpu | ||
69 | __u16 ibc; # IBC level currently (to be) used by this vcpu | ||
70 | __u8 pad[6]; | ||
71 | __u64 fac_list[256]; # set of cpu facilities currently (to be) used | ||
72 | # by this vcpu | ||
73 | } | ||
74 | |||
75 | KVM does not enforce or limit the cpu model data in any form. Take the information | ||
76 | retrieved by means of KVM_S390_VM_CPU_MACHINE as hint for reasonable configuration | ||
77 | setups. Instruction interceptions triggered by additionally set facilitiy bits that | ||
78 | are not handled by KVM need to by imlemented in the VM driver code. | ||
79 | |||
80 | Parameters: address of buffer to store/set the processor related cpu | ||
81 | data of type struct kvm_s390_vm_cpu_processor*. | ||
82 | Returns: -EBUSY in case 1 or more vcpus are already activated (only in write case) | ||
83 | -EFAULT if the given address is not accessible from kernel space | ||
84 | -ENOMEM if not enough memory is available to process the ioctl | ||
85 | 0 in case of success | ||
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 3a67bec72d0c..25410b2d8bc1 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h | |||
@@ -96,6 +96,7 @@ extern char __kvm_hyp_code_end[]; | |||
96 | 96 | ||
97 | extern void __kvm_flush_vm_context(void); | 97 | extern void __kvm_flush_vm_context(void); |
98 | extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); | 98 | extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); |
99 | extern void __kvm_tlb_flush_vmid(struct kvm *kvm); | ||
99 | 100 | ||
100 | extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); | 101 | extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); |
101 | #endif | 102 | #endif |
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 7b0152321b20..a9c80a2ea1a7 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <asm/kvm_asm.h> | 23 | #include <asm/kvm_asm.h> |
24 | #include <asm/kvm_mmio.h> | 24 | #include <asm/kvm_mmio.h> |
25 | #include <asm/kvm_arm.h> | 25 | #include <asm/kvm_arm.h> |
26 | #include <asm/cputype.h> | ||
26 | 27 | ||
27 | unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); | 28 | unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); |
28 | unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu); | 29 | unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu); |
@@ -177,9 +178,9 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu) | |||
177 | return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; | 178 | return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; |
178 | } | 179 | } |
179 | 180 | ||
180 | static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) | 181 | static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) |
181 | { | 182 | { |
182 | return vcpu->arch.cp15[c0_MPIDR]; | 183 | return vcpu->arch.cp15[c0_MPIDR] & MPIDR_HWID_BITMASK; |
183 | } | 184 | } |
184 | 185 | ||
185 | static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) | 186 | static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) |
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 04b4ea0b550a..41008cd7c53f 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h | |||
@@ -68,6 +68,7 @@ struct kvm_arch { | |||
68 | 68 | ||
69 | /* Interrupt controller */ | 69 | /* Interrupt controller */ |
70 | struct vgic_dist vgic; | 70 | struct vgic_dist vgic; |
71 | int max_vcpus; | ||
71 | }; | 72 | }; |
72 | 73 | ||
73 | #define KVM_NR_MEM_OBJS 40 | 74 | #define KVM_NR_MEM_OBJS 40 |
@@ -144,6 +145,7 @@ struct kvm_vm_stat { | |||
144 | }; | 145 | }; |
145 | 146 | ||
146 | struct kvm_vcpu_stat { | 147 | struct kvm_vcpu_stat { |
148 | u32 halt_successful_poll; | ||
147 | u32 halt_wakeup; | 149 | u32 halt_wakeup; |
148 | }; | 150 | }; |
149 | 151 | ||
@@ -231,6 +233,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) | |||
231 | int kvm_perf_init(void); | 233 | int kvm_perf_init(void); |
232 | int kvm_perf_teardown(void); | 234 | int kvm_perf_teardown(void); |
233 | 235 | ||
236 | void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); | ||
237 | |||
238 | struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); | ||
239 | |||
234 | static inline void kvm_arch_hardware_disable(void) {} | 240 | static inline void kvm_arch_hardware_disable(void) {} |
235 | static inline void kvm_arch_hardware_unsetup(void) {} | 241 | static inline void kvm_arch_hardware_unsetup(void) {} |
236 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} | 242 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} |
diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h index adcc0d7d3175..3f83db2f6cf0 100644 --- a/arch/arm/include/asm/kvm_mmio.h +++ b/arch/arm/include/asm/kvm_mmio.h | |||
@@ -37,6 +37,7 @@ struct kvm_exit_mmio { | |||
37 | u8 data[8]; | 37 | u8 data[8]; |
38 | u32 len; | 38 | u32 len; |
39 | bool is_write; | 39 | bool is_write; |
40 | void *private; | ||
40 | }; | 41 | }; |
41 | 42 | ||
42 | static inline void kvm_prepare_mmio(struct kvm_run *run, | 43 | static inline void kvm_prepare_mmio(struct kvm_run *run, |
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 1bca8f8af442..37ca2a4c6f09 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h | |||
@@ -115,6 +115,27 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) | |||
115 | pmd_val(*pmd) |= L_PMD_S2_RDWR; | 115 | pmd_val(*pmd) |= L_PMD_S2_RDWR; |
116 | } | 116 | } |
117 | 117 | ||
118 | static inline void kvm_set_s2pte_readonly(pte_t *pte) | ||
119 | { | ||
120 | pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY; | ||
121 | } | ||
122 | |||
123 | static inline bool kvm_s2pte_readonly(pte_t *pte) | ||
124 | { | ||
125 | return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY; | ||
126 | } | ||
127 | |||
128 | static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) | ||
129 | { | ||
130 | pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY; | ||
131 | } | ||
132 | |||
133 | static inline bool kvm_s2pmd_readonly(pmd_t *pmd) | ||
134 | { | ||
135 | return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY; | ||
136 | } | ||
137 | |||
138 | |||
118 | /* Open coded p*d_addr_end that can deal with 64bit addresses */ | 139 | /* Open coded p*d_addr_end that can deal with 64bit addresses */ |
119 | #define kvm_pgd_addr_end(addr, end) \ | 140 | #define kvm_pgd_addr_end(addr, end) \ |
120 | ({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ | 141 | ({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ |
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 423a5ac09d3a..a745a2a53853 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h | |||
@@ -129,6 +129,7 @@ | |||
129 | #define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ | 129 | #define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ |
130 | #define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ | 130 | #define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ |
131 | 131 | ||
132 | #define L_PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[1] */ | ||
132 | #define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ | 133 | #define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ |
133 | 134 | ||
134 | /* | 135 | /* |
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 09ee408c1a67..0db25bc32864 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h | |||
@@ -175,6 +175,8 @@ struct kvm_arch_memory_slot { | |||
175 | #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 | 175 | #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 |
176 | #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) | 176 | #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) |
177 | #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 | 177 | #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 |
178 | #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 | ||
179 | #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 | ||
178 | 180 | ||
179 | /* KVM_IRQ_LINE irq field index values */ | 181 | /* KVM_IRQ_LINE irq field index values */ |
180 | #define KVM_ARM_IRQ_TYPE_SHIFT 24 | 182 | #define KVM_ARM_IRQ_TYPE_SHIFT 24 |
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 3afee5f40f4f..338ace78ed18 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig | |||
@@ -21,8 +21,10 @@ config KVM | |||
21 | select PREEMPT_NOTIFIERS | 21 | select PREEMPT_NOTIFIERS |
22 | select ANON_INODES | 22 | select ANON_INODES |
23 | select HAVE_KVM_CPU_RELAX_INTERCEPT | 23 | select HAVE_KVM_CPU_RELAX_INTERCEPT |
24 | select HAVE_KVM_ARCH_TLB_FLUSH_ALL | ||
24 | select KVM_MMIO | 25 | select KVM_MMIO |
25 | select KVM_ARM_HOST | 26 | select KVM_ARM_HOST |
27 | select KVM_GENERIC_DIRTYLOG_READ_PROTECT | ||
26 | select SRCU | 28 | select SRCU |
27 | depends on ARM_VIRT_EXT && ARM_LPAE | 29 | depends on ARM_VIRT_EXT && ARM_LPAE |
28 | ---help--- | 30 | ---help--- |
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index f7057ed045b6..443b8bea43e9 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile | |||
@@ -22,4 +22,5 @@ obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o | |||
22 | obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o | 22 | obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o |
23 | obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o | 23 | obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o |
24 | obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o | 24 | obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o |
25 | obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o | ||
25 | obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o | 26 | obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o |
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 0b0d58a905c4..07e7eb1d7ab6 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c | |||
@@ -132,6 +132,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
132 | /* Mark the initial VMID generation invalid */ | 132 | /* Mark the initial VMID generation invalid */ |
133 | kvm->arch.vmid_gen = 0; | 133 | kvm->arch.vmid_gen = 0; |
134 | 134 | ||
135 | /* The maximum number of VCPUs is limited by the host's GIC model */ | ||
136 | kvm->arch.max_vcpus = kvm_vgic_get_max_vcpus(); | ||
137 | |||
135 | return ret; | 138 | return ret; |
136 | out_free_stage2_pgd: | 139 | out_free_stage2_pgd: |
137 | kvm_free_stage2_pgd(kvm); | 140 | kvm_free_stage2_pgd(kvm); |
@@ -218,6 +221,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) | |||
218 | goto out; | 221 | goto out; |
219 | } | 222 | } |
220 | 223 | ||
224 | if (id >= kvm->arch.max_vcpus) { | ||
225 | err = -EINVAL; | ||
226 | goto out; | ||
227 | } | ||
228 | |||
221 | vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); | 229 | vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); |
222 | if (!vcpu) { | 230 | if (!vcpu) { |
223 | err = -ENOMEM; | 231 | err = -ENOMEM; |
@@ -241,9 +249,8 @@ out: | |||
241 | return ERR_PTR(err); | 249 | return ERR_PTR(err); |
242 | } | 250 | } |
243 | 251 | ||
244 | int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) | 252 | void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) |
245 | { | 253 | { |
246 | return 0; | ||
247 | } | 254 | } |
248 | 255 | ||
249 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | 256 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) |
@@ -777,9 +784,39 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
777 | } | 784 | } |
778 | } | 785 | } |
779 | 786 | ||
787 | /** | ||
788 | * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot | ||
789 | * @kvm: kvm instance | ||
790 | * @log: slot id and address to which we copy the log | ||
791 | * | ||
792 | * Steps 1-4 below provide general overview of dirty page logging. See | ||
793 | * kvm_get_dirty_log_protect() function description for additional details. | ||
794 | * | ||
795 | * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we | ||
796 | * always flush the TLB (step 4) even if previous step failed and the dirty | ||
797 | * bitmap may be corrupt. Regardless of previous outcome the KVM logging API | ||
798 | * does not preclude user space subsequent dirty log read. Flushing TLB ensures | ||
799 | * writes will be marked dirty for next log read. | ||
800 | * | ||
801 | * 1. Take a snapshot of the bit and clear it if needed. | ||
802 | * 2. Write protect the corresponding page. | ||
803 | * 3. Copy the snapshot to the userspace. | ||
804 | * 4. Flush TLB's if needed. | ||
805 | */ | ||
780 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) | 806 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) |
781 | { | 807 | { |
782 | return -EINVAL; | 808 | bool is_dirty = false; |
809 | int r; | ||
810 | |||
811 | mutex_lock(&kvm->slots_lock); | ||
812 | |||
813 | r = kvm_get_dirty_log_protect(kvm, log, &is_dirty); | ||
814 | |||
815 | if (is_dirty) | ||
816 | kvm_flush_remote_tlbs(kvm); | ||
817 | |||
818 | mutex_unlock(&kvm->slots_lock); | ||
819 | return r; | ||
783 | } | 820 | } |
784 | 821 | ||
785 | static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, | 822 | static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, |
@@ -811,7 +848,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
811 | switch (ioctl) { | 848 | switch (ioctl) { |
812 | case KVM_CREATE_IRQCHIP: { | 849 | case KVM_CREATE_IRQCHIP: { |
813 | if (vgic_present) | 850 | if (vgic_present) |
814 | return kvm_vgic_create(kvm); | 851 | return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); |
815 | else | 852 | else |
816 | return -ENXIO; | 853 | return -ENXIO; |
817 | } | 854 | } |
@@ -1035,6 +1072,19 @@ static void check_kvm_target_cpu(void *ret) | |||
1035 | *(int *)ret = kvm_target_cpu(); | 1072 | *(int *)ret = kvm_target_cpu(); |
1036 | } | 1073 | } |
1037 | 1074 | ||
1075 | struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) | ||
1076 | { | ||
1077 | struct kvm_vcpu *vcpu; | ||
1078 | int i; | ||
1079 | |||
1080 | mpidr &= MPIDR_HWID_BITMASK; | ||
1081 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
1082 | if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu)) | ||
1083 | return vcpu; | ||
1084 | } | ||
1085 | return NULL; | ||
1086 | } | ||
1087 | |||
1038 | /** | 1088 | /** |
1039 | * Initialize Hyp-mode and memory mappings on all CPUs. | 1089 | * Initialize Hyp-mode and memory mappings on all CPUs. |
1040 | */ | 1090 | */ |
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index a96a8043277c..95f12b2ccdcb 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c | |||
@@ -87,11 +87,13 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
87 | */ | 87 | */ |
88 | static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) | 88 | static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) |
89 | { | 89 | { |
90 | trace_kvm_wfi(*vcpu_pc(vcpu)); | 90 | if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) { |
91 | if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) | 91 | trace_kvm_wfx(*vcpu_pc(vcpu), true); |
92 | kvm_vcpu_on_spin(vcpu); | 92 | kvm_vcpu_on_spin(vcpu); |
93 | else | 93 | } else { |
94 | trace_kvm_wfx(*vcpu_pc(vcpu), false); | ||
94 | kvm_vcpu_block(vcpu); | 95 | kvm_vcpu_block(vcpu); |
96 | } | ||
95 | 97 | ||
96 | kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); | 98 | kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); |
97 | 99 | ||
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 01dcb0e752d9..79caf79b304a 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S | |||
@@ -66,6 +66,17 @@ ENTRY(__kvm_tlb_flush_vmid_ipa) | |||
66 | bx lr | 66 | bx lr |
67 | ENDPROC(__kvm_tlb_flush_vmid_ipa) | 67 | ENDPROC(__kvm_tlb_flush_vmid_ipa) |
68 | 68 | ||
69 | /** | ||
70 | * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs | ||
71 | * | ||
72 | * Reuses __kvm_tlb_flush_vmid_ipa() for ARMv7, without passing address | ||
73 | * parameter | ||
74 | */ | ||
75 | |||
76 | ENTRY(__kvm_tlb_flush_vmid) | ||
77 | b __kvm_tlb_flush_vmid_ipa | ||
78 | ENDPROC(__kvm_tlb_flush_vmid) | ||
79 | |||
69 | /******************************************************************** | 80 | /******************************************************************** |
70 | * Flush TLBs and instruction caches of all CPUs inside the inner-shareable | 81 | * Flush TLBs and instruction caches of all CPUs inside the inner-shareable |
71 | * domain, for all VMIDs | 82 | * domain, for all VMIDs |
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 136662547ca6..3e6859bc3e11 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c | |||
@@ -45,6 +45,26 @@ static phys_addr_t hyp_idmap_vector; | |||
45 | #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) | 45 | #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) |
46 | 46 | ||
47 | #define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) | 47 | #define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) |
48 | #define kvm_pud_huge(_x) pud_huge(_x) | ||
49 | |||
50 | #define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) | ||
51 | #define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1) | ||
52 | |||
53 | static bool memslot_is_logging(struct kvm_memory_slot *memslot) | ||
54 | { | ||
55 | return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY); | ||
56 | } | ||
57 | |||
58 | /** | ||
59 | * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8 | ||
60 | * @kvm: pointer to kvm structure. | ||
61 | * | ||
62 | * Interface to HYP function to flush all VM TLB entries | ||
63 | */ | ||
64 | void kvm_flush_remote_tlbs(struct kvm *kvm) | ||
65 | { | ||
66 | kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); | ||
67 | } | ||
48 | 68 | ||
49 | static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) | 69 | static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) |
50 | { | 70 | { |
@@ -78,6 +98,25 @@ static void kvm_flush_dcache_pud(pud_t pud) | |||
78 | __kvm_flush_dcache_pud(pud); | 98 | __kvm_flush_dcache_pud(pud); |
79 | } | 99 | } |
80 | 100 | ||
101 | /** | ||
102 | * stage2_dissolve_pmd() - clear and flush huge PMD entry | ||
103 | * @kvm: pointer to kvm structure. | ||
104 | * @addr: IPA | ||
105 | * @pmd: pmd pointer for IPA | ||
106 | * | ||
107 | * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. Marks all | ||
108 | * pages in the range dirty. | ||
109 | */ | ||
110 | static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) | ||
111 | { | ||
112 | if (!kvm_pmd_huge(*pmd)) | ||
113 | return; | ||
114 | |||
115 | pmd_clear(pmd); | ||
116 | kvm_tlb_flush_vmid_ipa(kvm, addr); | ||
117 | put_page(virt_to_page(pmd)); | ||
118 | } | ||
119 | |||
81 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | 120 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, |
82 | int min, int max) | 121 | int min, int max) |
83 | { | 122 | { |
@@ -819,10 +858,15 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache | |||
819 | } | 858 | } |
820 | 859 | ||
821 | static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, | 860 | static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, |
822 | phys_addr_t addr, const pte_t *new_pte, bool iomap) | 861 | phys_addr_t addr, const pte_t *new_pte, |
862 | unsigned long flags) | ||
823 | { | 863 | { |
824 | pmd_t *pmd; | 864 | pmd_t *pmd; |
825 | pte_t *pte, old_pte; | 865 | pte_t *pte, old_pte; |
866 | bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP; | ||
867 | bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE; | ||
868 | |||
869 | VM_BUG_ON(logging_active && !cache); | ||
826 | 870 | ||
827 | /* Create stage-2 page table mapping - Levels 0 and 1 */ | 871 | /* Create stage-2 page table mapping - Levels 0 and 1 */ |
828 | pmd = stage2_get_pmd(kvm, cache, addr); | 872 | pmd = stage2_get_pmd(kvm, cache, addr); |
@@ -834,6 +878,13 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, | |||
834 | return 0; | 878 | return 0; |
835 | } | 879 | } |
836 | 880 | ||
881 | /* | ||
882 | * While dirty page logging - dissolve huge PMD, then continue on to | ||
883 | * allocate page. | ||
884 | */ | ||
885 | if (logging_active) | ||
886 | stage2_dissolve_pmd(kvm, addr, pmd); | ||
887 | |||
837 | /* Create stage-2 page mappings - Level 2 */ | 888 | /* Create stage-2 page mappings - Level 2 */ |
838 | if (pmd_none(*pmd)) { | 889 | if (pmd_none(*pmd)) { |
839 | if (!cache) | 890 | if (!cache) |
@@ -890,7 +941,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, | |||
890 | if (ret) | 941 | if (ret) |
891 | goto out; | 942 | goto out; |
892 | spin_lock(&kvm->mmu_lock); | 943 | spin_lock(&kvm->mmu_lock); |
893 | ret = stage2_set_pte(kvm, &cache, addr, &pte, true); | 944 | ret = stage2_set_pte(kvm, &cache, addr, &pte, |
945 | KVM_S2PTE_FLAG_IS_IOMAP); | ||
894 | spin_unlock(&kvm->mmu_lock); | 946 | spin_unlock(&kvm->mmu_lock); |
895 | if (ret) | 947 | if (ret) |
896 | goto out; | 948 | goto out; |
@@ -957,6 +1009,165 @@ static bool kvm_is_device_pfn(unsigned long pfn) | |||
957 | return !pfn_valid(pfn); | 1009 | return !pfn_valid(pfn); |
958 | } | 1010 | } |
959 | 1011 | ||
1012 | /** | ||
1013 | * stage2_wp_ptes - write protect PMD range | ||
1014 | * @pmd: pointer to pmd entry | ||
1015 | * @addr: range start address | ||
1016 | * @end: range end address | ||
1017 | */ | ||
1018 | static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) | ||
1019 | { | ||
1020 | pte_t *pte; | ||
1021 | |||
1022 | pte = pte_offset_kernel(pmd, addr); | ||
1023 | do { | ||
1024 | if (!pte_none(*pte)) { | ||
1025 | if (!kvm_s2pte_readonly(pte)) | ||
1026 | kvm_set_s2pte_readonly(pte); | ||
1027 | } | ||
1028 | } while (pte++, addr += PAGE_SIZE, addr != end); | ||
1029 | } | ||
1030 | |||
1031 | /** | ||
1032 | * stage2_wp_pmds - write protect PUD range | ||
1033 | * @pud: pointer to pud entry | ||
1034 | * @addr: range start address | ||
1035 | * @end: range end address | ||
1036 | */ | ||
1037 | static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) | ||
1038 | { | ||
1039 | pmd_t *pmd; | ||
1040 | phys_addr_t next; | ||
1041 | |||
1042 | pmd = pmd_offset(pud, addr); | ||
1043 | |||
1044 | do { | ||
1045 | next = kvm_pmd_addr_end(addr, end); | ||
1046 | if (!pmd_none(*pmd)) { | ||
1047 | if (kvm_pmd_huge(*pmd)) { | ||
1048 | if (!kvm_s2pmd_readonly(pmd)) | ||
1049 | kvm_set_s2pmd_readonly(pmd); | ||
1050 | } else { | ||
1051 | stage2_wp_ptes(pmd, addr, next); | ||
1052 | } | ||
1053 | } | ||
1054 | } while (pmd++, addr = next, addr != end); | ||
1055 | } | ||
1056 | |||
1057 | /** | ||
1058 | * stage2_wp_puds - write protect PGD range | ||
1059 | * @pgd: pointer to pgd entry | ||
1060 | * @addr: range start address | ||
1061 | * @end: range end address | ||
1062 | * | ||
1063 | * Process PUD entries, for a huge PUD we cause a panic. | ||
1064 | */ | ||
1065 | static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) | ||
1066 | { | ||
1067 | pud_t *pud; | ||
1068 | phys_addr_t next; | ||
1069 | |||
1070 | pud = pud_offset(pgd, addr); | ||
1071 | do { | ||
1072 | next = kvm_pud_addr_end(addr, end); | ||
1073 | if (!pud_none(*pud)) { | ||
1074 | /* TODO:PUD not supported, revisit later if supported */ | ||
1075 | BUG_ON(kvm_pud_huge(*pud)); | ||
1076 | stage2_wp_pmds(pud, addr, next); | ||
1077 | } | ||
1078 | } while (pud++, addr = next, addr != end); | ||
1079 | } | ||
1080 | |||
1081 | /** | ||
1082 | * stage2_wp_range() - write protect stage2 memory region range | ||
1083 | * @kvm: The KVM pointer | ||
1084 | * @addr: Start address of range | ||
1085 | * @end: End address of range | ||
1086 | */ | ||
1087 | static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) | ||
1088 | { | ||
1089 | pgd_t *pgd; | ||
1090 | phys_addr_t next; | ||
1091 | |||
1092 | pgd = kvm->arch.pgd + pgd_index(addr); | ||
1093 | do { | ||
1094 | /* | ||
1095 | * Release kvm_mmu_lock periodically if the memory region is | ||
1096 | * large. Otherwise, we may see kernel panics with | ||
1097 | * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR, | ||
1098 | * CONFIG_LOCKDEP. Additionally, holding the lock too long | ||
1099 | * will also starve other vCPUs. | ||
1100 | */ | ||
1101 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) | ||
1102 | cond_resched_lock(&kvm->mmu_lock); | ||
1103 | |||
1104 | next = kvm_pgd_addr_end(addr, end); | ||
1105 | if (pgd_present(*pgd)) | ||
1106 | stage2_wp_puds(pgd, addr, next); | ||
1107 | } while (pgd++, addr = next, addr != end); | ||
1108 | } | ||
1109 | |||
1110 | /** | ||
1111 | * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot | ||
1112 | * @kvm: The KVM pointer | ||
1113 | * @slot: The memory slot to write protect | ||
1114 | * | ||
1115 | * Called to start logging dirty pages after memory region | ||
1116 | * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns | ||
1117 | * all present PMD and PTEs are write protected in the memory region. | ||
1118 | * Afterwards read of dirty page log can be called. | ||
1119 | * | ||
1120 | * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired, | ||
1121 | * serializing operations for VM memory regions. | ||
1122 | */ | ||
1123 | void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) | ||
1124 | { | ||
1125 | struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot); | ||
1126 | phys_addr_t start = memslot->base_gfn << PAGE_SHIFT; | ||
1127 | phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; | ||
1128 | |||
1129 | spin_lock(&kvm->mmu_lock); | ||
1130 | stage2_wp_range(kvm, start, end); | ||
1131 | spin_unlock(&kvm->mmu_lock); | ||
1132 | kvm_flush_remote_tlbs(kvm); | ||
1133 | } | ||
1134 | |||
1135 | /** | ||
1136 | * kvm_mmu_write_protect_pt_masked() - write protect dirty pages | ||
1137 | * @kvm: The KVM pointer | ||
1138 | * @slot: The memory slot associated with mask | ||
1139 | * @gfn_offset: The gfn offset in memory slot | ||
1140 | * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory | ||
1141 | * slot to be write protected | ||
1142 | * | ||
1143 | * Walks bits set in mask write protects the associated pte's. Caller must | ||
1144 | * acquire kvm_mmu_lock. | ||
1145 | */ | ||
1146 | static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, | ||
1147 | struct kvm_memory_slot *slot, | ||
1148 | gfn_t gfn_offset, unsigned long mask) | ||
1149 | { | ||
1150 | phys_addr_t base_gfn = slot->base_gfn + gfn_offset; | ||
1151 | phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT; | ||
1152 | phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; | ||
1153 | |||
1154 | stage2_wp_range(kvm, start, end); | ||
1155 | } | ||
1156 | |||
1157 | /* | ||
1158 | * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected | ||
1159 | * dirty pages. | ||
1160 | * | ||
1161 | * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to | ||
1162 | * enable dirty logging for them. | ||
1163 | */ | ||
1164 | void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, | ||
1165 | struct kvm_memory_slot *slot, | ||
1166 | gfn_t gfn_offset, unsigned long mask) | ||
1167 | { | ||
1168 | kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); | ||
1169 | } | ||
1170 | |||
960 | static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, | 1171 | static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, |
961 | unsigned long size, bool uncached) | 1172 | unsigned long size, bool uncached) |
962 | { | 1173 | { |
@@ -977,6 +1188,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
977 | pfn_t pfn; | 1188 | pfn_t pfn; |
978 | pgprot_t mem_type = PAGE_S2; | 1189 | pgprot_t mem_type = PAGE_S2; |
979 | bool fault_ipa_uncached; | 1190 | bool fault_ipa_uncached; |
1191 | bool logging_active = memslot_is_logging(memslot); | ||
1192 | unsigned long flags = 0; | ||
980 | 1193 | ||
981 | write_fault = kvm_is_write_fault(vcpu); | 1194 | write_fault = kvm_is_write_fault(vcpu); |
982 | if (fault_status == FSC_PERM && !write_fault) { | 1195 | if (fault_status == FSC_PERM && !write_fault) { |
@@ -993,7 +1206,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
993 | return -EFAULT; | 1206 | return -EFAULT; |
994 | } | 1207 | } |
995 | 1208 | ||
996 | if (is_vm_hugetlb_page(vma)) { | 1209 | if (is_vm_hugetlb_page(vma) && !logging_active) { |
997 | hugetlb = true; | 1210 | hugetlb = true; |
998 | gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; | 1211 | gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; |
999 | } else { | 1212 | } else { |
@@ -1034,12 +1247,30 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
1034 | if (is_error_pfn(pfn)) | 1247 | if (is_error_pfn(pfn)) |
1035 | return -EFAULT; | 1248 | return -EFAULT; |
1036 | 1249 | ||
1037 | if (kvm_is_device_pfn(pfn)) | 1250 | if (kvm_is_device_pfn(pfn)) { |
1038 | mem_type = PAGE_S2_DEVICE; | 1251 | mem_type = PAGE_S2_DEVICE; |
1252 | flags |= KVM_S2PTE_FLAG_IS_IOMAP; | ||
1253 | } else if (logging_active) { | ||
1254 | /* | ||
1255 | * Faults on pages in a memslot with logging enabled | ||
1256 | * should not be mapped with huge pages (it introduces churn | ||
1257 | * and performance degradation), so force a pte mapping. | ||
1258 | */ | ||
1259 | force_pte = true; | ||
1260 | flags |= KVM_S2_FLAG_LOGGING_ACTIVE; | ||
1261 | |||
1262 | /* | ||
1263 | * Only actually map the page as writable if this was a write | ||
1264 | * fault. | ||
1265 | */ | ||
1266 | if (!write_fault) | ||
1267 | writable = false; | ||
1268 | } | ||
1039 | 1269 | ||
1040 | spin_lock(&kvm->mmu_lock); | 1270 | spin_lock(&kvm->mmu_lock); |
1041 | if (mmu_notifier_retry(kvm, mmu_seq)) | 1271 | if (mmu_notifier_retry(kvm, mmu_seq)) |
1042 | goto out_unlock; | 1272 | goto out_unlock; |
1273 | |||
1043 | if (!hugetlb && !force_pte) | 1274 | if (!hugetlb && !force_pte) |
1044 | hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); | 1275 | hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); |
1045 | 1276 | ||
@@ -1056,16 +1287,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
1056 | ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); | 1287 | ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); |
1057 | } else { | 1288 | } else { |
1058 | pte_t new_pte = pfn_pte(pfn, mem_type); | 1289 | pte_t new_pte = pfn_pte(pfn, mem_type); |
1290 | |||
1059 | if (writable) { | 1291 | if (writable) { |
1060 | kvm_set_s2pte_writable(&new_pte); | 1292 | kvm_set_s2pte_writable(&new_pte); |
1061 | kvm_set_pfn_dirty(pfn); | 1293 | kvm_set_pfn_dirty(pfn); |
1294 | mark_page_dirty(kvm, gfn); | ||
1062 | } | 1295 | } |
1063 | coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached); | 1296 | coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached); |
1064 | ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, | 1297 | ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags); |
1065 | pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); | ||
1066 | } | 1298 | } |
1067 | 1299 | ||
1068 | |||
1069 | out_unlock: | 1300 | out_unlock: |
1070 | spin_unlock(&kvm->mmu_lock); | 1301 | spin_unlock(&kvm->mmu_lock); |
1071 | kvm_release_pfn_clean(pfn); | 1302 | kvm_release_pfn_clean(pfn); |
@@ -1215,7 +1446,14 @@ static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data) | |||
1215 | { | 1446 | { |
1216 | pte_t *pte = (pte_t *)data; | 1447 | pte_t *pte = (pte_t *)data; |
1217 | 1448 | ||
1218 | stage2_set_pte(kvm, NULL, gpa, pte, false); | 1449 | /* |
1450 | * We can always call stage2_set_pte with KVM_S2PTE_FLAG_LOGGING_ACTIVE | ||
1451 | * flag clear because MMU notifiers will have unmapped a huge PMD before | ||
1452 | * calling ->change_pte() (which in turn calls kvm_set_spte_hva()) and | ||
1453 | * therefore stage2_set_pte() never needs to clear out a huge PMD | ||
1454 | * through this calling path. | ||
1455 | */ | ||
1456 | stage2_set_pte(kvm, NULL, gpa, pte, 0); | ||
1219 | } | 1457 | } |
1220 | 1458 | ||
1221 | 1459 | ||
@@ -1348,6 +1586,13 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
1348 | const struct kvm_memory_slot *old, | 1586 | const struct kvm_memory_slot *old, |
1349 | enum kvm_mr_change change) | 1587 | enum kvm_mr_change change) |
1350 | { | 1588 | { |
1589 | /* | ||
1590 | * At this point memslot has been committed and there is an | ||
1591 | * allocated dirty_bitmap[], dirty pages will be be tracked while the | ||
1592 | * memory slot is write protected. | ||
1593 | */ | ||
1594 | if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) | ||
1595 | kvm_mmu_wp_memory_region(kvm, mem->slot); | ||
1351 | } | 1596 | } |
1352 | 1597 | ||
1353 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 1598 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
@@ -1360,7 +1605,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
1360 | bool writable = !(mem->flags & KVM_MEM_READONLY); | 1605 | bool writable = !(mem->flags & KVM_MEM_READONLY); |
1361 | int ret = 0; | 1606 | int ret = 0; |
1362 | 1607 | ||
1363 | if (change != KVM_MR_CREATE && change != KVM_MR_MOVE) | 1608 | if (change != KVM_MR_CREATE && change != KVM_MR_MOVE && |
1609 | change != KVM_MR_FLAGS_ONLY) | ||
1364 | return 0; | 1610 | return 0; |
1365 | 1611 | ||
1366 | /* | 1612 | /* |
@@ -1411,6 +1657,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
1411 | phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) + | 1657 | phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) + |
1412 | vm_start - vma->vm_start; | 1658 | vm_start - vma->vm_start; |
1413 | 1659 | ||
1660 | /* IO region dirty page logging not allowed */ | ||
1661 | if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) | ||
1662 | return -EINVAL; | ||
1663 | |||
1414 | ret = kvm_phys_addr_ioremap(kvm, gpa, pa, | 1664 | ret = kvm_phys_addr_ioremap(kvm, gpa, pa, |
1415 | vm_end - vm_start, | 1665 | vm_end - vm_start, |
1416 | writable); | 1666 | writable); |
@@ -1420,6 +1670,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
1420 | hva = vm_end; | 1670 | hva = vm_end; |
1421 | } while (hva < reg_end); | 1671 | } while (hva < reg_end); |
1422 | 1672 | ||
1673 | if (change == KVM_MR_FLAGS_ONLY) | ||
1674 | return ret; | ||
1675 | |||
1423 | spin_lock(&kvm->mmu_lock); | 1676 | spin_lock(&kvm->mmu_lock); |
1424 | if (ret) | 1677 | if (ret) |
1425 | unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); | 1678 | unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); |
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 58cb3248d277..02fa8eff6ae1 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <asm/cputype.h> | 22 | #include <asm/cputype.h> |
23 | #include <asm/kvm_emulate.h> | 23 | #include <asm/kvm_emulate.h> |
24 | #include <asm/kvm_psci.h> | 24 | #include <asm/kvm_psci.h> |
25 | #include <asm/kvm_host.h> | ||
25 | 26 | ||
26 | /* | 27 | /* |
27 | * This is an implementation of the Power State Coordination Interface | 28 | * This is an implementation of the Power State Coordination Interface |
@@ -66,25 +67,17 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) | |||
66 | static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) | 67 | static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) |
67 | { | 68 | { |
68 | struct kvm *kvm = source_vcpu->kvm; | 69 | struct kvm *kvm = source_vcpu->kvm; |
69 | struct kvm_vcpu *vcpu = NULL, *tmp; | 70 | struct kvm_vcpu *vcpu = NULL; |
70 | wait_queue_head_t *wq; | 71 | wait_queue_head_t *wq; |
71 | unsigned long cpu_id; | 72 | unsigned long cpu_id; |
72 | unsigned long context_id; | 73 | unsigned long context_id; |
73 | unsigned long mpidr; | ||
74 | phys_addr_t target_pc; | 74 | phys_addr_t target_pc; |
75 | int i; | ||
76 | 75 | ||
77 | cpu_id = *vcpu_reg(source_vcpu, 1); | 76 | cpu_id = *vcpu_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK; |
78 | if (vcpu_mode_is_32bit(source_vcpu)) | 77 | if (vcpu_mode_is_32bit(source_vcpu)) |
79 | cpu_id &= ~((u32) 0); | 78 | cpu_id &= ~((u32) 0); |
80 | 79 | ||
81 | kvm_for_each_vcpu(i, tmp, kvm) { | 80 | vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id); |
82 | mpidr = kvm_vcpu_get_mpidr(tmp); | ||
83 | if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) { | ||
84 | vcpu = tmp; | ||
85 | break; | ||
86 | } | ||
87 | } | ||
88 | 81 | ||
89 | /* | 82 | /* |
90 | * Make sure the caller requested a valid CPU and that the CPU is | 83 | * Make sure the caller requested a valid CPU and that the CPU is |
@@ -155,7 +148,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) | |||
155 | * then ON else OFF | 148 | * then ON else OFF |
156 | */ | 149 | */ |
157 | kvm_for_each_vcpu(i, tmp, kvm) { | 150 | kvm_for_each_vcpu(i, tmp, kvm) { |
158 | mpidr = kvm_vcpu_get_mpidr(tmp); | 151 | mpidr = kvm_vcpu_get_mpidr_aff(tmp); |
159 | if (((mpidr & target_affinity_mask) == target_affinity) && | 152 | if (((mpidr & target_affinity_mask) == target_affinity) && |
160 | !tmp->arch.pause) { | 153 | !tmp->arch.pause) { |
161 | return PSCI_0_2_AFFINITY_LEVEL_ON; | 154 | return PSCI_0_2_AFFINITY_LEVEL_ON; |
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index b6a6e7102201..881874b1a036 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h | |||
@@ -140,19 +140,22 @@ TRACE_EVENT(kvm_emulate_cp15_imp, | |||
140 | __entry->CRm, __entry->Op2) | 140 | __entry->CRm, __entry->Op2) |
141 | ); | 141 | ); |
142 | 142 | ||
143 | TRACE_EVENT(kvm_wfi, | 143 | TRACE_EVENT(kvm_wfx, |
144 | TP_PROTO(unsigned long vcpu_pc), | 144 | TP_PROTO(unsigned long vcpu_pc, bool is_wfe), |
145 | TP_ARGS(vcpu_pc), | 145 | TP_ARGS(vcpu_pc, is_wfe), |
146 | 146 | ||
147 | TP_STRUCT__entry( | 147 | TP_STRUCT__entry( |
148 | __field( unsigned long, vcpu_pc ) | 148 | __field( unsigned long, vcpu_pc ) |
149 | __field( bool, is_wfe ) | ||
149 | ), | 150 | ), |
150 | 151 | ||
151 | TP_fast_assign( | 152 | TP_fast_assign( |
152 | __entry->vcpu_pc = vcpu_pc; | 153 | __entry->vcpu_pc = vcpu_pc; |
154 | __entry->is_wfe = is_wfe; | ||
153 | ), | 155 | ), |
154 | 156 | ||
155 | TP_printk("guest executed wfi at: 0x%08lx", __entry->vcpu_pc) | 157 | TP_printk("guest executed wf%c at: 0x%08lx", |
158 | __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc) | ||
156 | ); | 159 | ); |
157 | 160 | ||
158 | TRACE_EVENT(kvm_unmap_hva, | 161 | TRACE_EVENT(kvm_unmap_hva, |
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 62167090937d..92bbae381598 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h | |||
@@ -96,6 +96,7 @@ | |||
96 | #define ESR_ELx_COND_SHIFT (20) | 96 | #define ESR_ELx_COND_SHIFT (20) |
97 | #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) | 97 | #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) |
98 | #define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) | 98 | #define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) |
99 | #define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1) | ||
99 | 100 | ||
100 | #ifndef __ASSEMBLY__ | 101 | #ifndef __ASSEMBLY__ |
101 | #include <asm/types.h> | 102 | #include <asm/types.h> |
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 483842180f8f..4f7310fa77f0 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h | |||
@@ -126,6 +126,7 @@ extern char __kvm_hyp_vector[]; | |||
126 | 126 | ||
127 | extern void __kvm_flush_vm_context(void); | 127 | extern void __kvm_flush_vm_context(void); |
128 | extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); | 128 | extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); |
129 | extern void __kvm_tlb_flush_vmid(struct kvm *kvm); | ||
129 | 130 | ||
130 | extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); | 131 | extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); |
131 | 132 | ||
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 0163b5775ca5..17e92f05b1fe 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <asm/kvm_asm.h> | 29 | #include <asm/kvm_asm.h> |
30 | #include <asm/kvm_mmio.h> | 30 | #include <asm/kvm_mmio.h> |
31 | #include <asm/ptrace.h> | 31 | #include <asm/ptrace.h> |
32 | #include <asm/cputype.h> | ||
32 | 33 | ||
33 | unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); | 34 | unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); |
34 | unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); | 35 | unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); |
@@ -140,6 +141,11 @@ static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu) | |||
140 | return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8; | 141 | return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8; |
141 | } | 142 | } |
142 | 143 | ||
144 | static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu) | ||
145 | { | ||
146 | return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK; | ||
147 | } | ||
148 | |||
143 | static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) | 149 | static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) |
144 | { | 150 | { |
145 | return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV); | 151 | return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV); |
@@ -201,9 +207,9 @@ static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) | |||
201 | return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE; | 207 | return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE; |
202 | } | 208 | } |
203 | 209 | ||
204 | static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) | 210 | static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) |
205 | { | 211 | { |
206 | return vcpu_sys_reg(vcpu, MPIDR_EL1); | 212 | return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; |
207 | } | 213 | } |
208 | 214 | ||
209 | static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) | 215 | static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) |
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index acd101a9014d..8ac3c70fe3c6 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h | |||
@@ -59,6 +59,9 @@ struct kvm_arch { | |||
59 | /* VTTBR value associated with above pgd and vmid */ | 59 | /* VTTBR value associated with above pgd and vmid */ |
60 | u64 vttbr; | 60 | u64 vttbr; |
61 | 61 | ||
62 | /* The maximum number of vCPUs depends on the used GIC model */ | ||
63 | int max_vcpus; | ||
64 | |||
62 | /* Interrupt controller */ | 65 | /* Interrupt controller */ |
63 | struct vgic_dist vgic; | 66 | struct vgic_dist vgic; |
64 | 67 | ||
@@ -159,6 +162,7 @@ struct kvm_vm_stat { | |||
159 | }; | 162 | }; |
160 | 163 | ||
161 | struct kvm_vcpu_stat { | 164 | struct kvm_vcpu_stat { |
165 | u32 halt_successful_poll; | ||
162 | u32 halt_wakeup; | 166 | u32 halt_wakeup; |
163 | }; | 167 | }; |
164 | 168 | ||
@@ -196,6 +200,7 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); | |||
196 | 200 | ||
197 | u64 kvm_call_hyp(void *hypfn, ...); | 201 | u64 kvm_call_hyp(void *hypfn, ...); |
198 | void force_vm_exit(const cpumask_t *mask); | 202 | void force_vm_exit(const cpumask_t *mask); |
203 | void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); | ||
199 | 204 | ||
200 | int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, | 205 | int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, |
201 | int exception_index); | 206 | int exception_index); |
@@ -203,6 +208,8 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, | |||
203 | int kvm_perf_init(void); | 208 | int kvm_perf_init(void); |
204 | int kvm_perf_teardown(void); | 209 | int kvm_perf_teardown(void); |
205 | 210 | ||
211 | struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); | ||
212 | |||
206 | static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, | 213 | static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, |
207 | phys_addr_t pgd_ptr, | 214 | phys_addr_t pgd_ptr, |
208 | unsigned long hyp_stack_ptr, | 215 | unsigned long hyp_stack_ptr, |
diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h index fc2f689c0694..9f52beb7cb13 100644 --- a/arch/arm64/include/asm/kvm_mmio.h +++ b/arch/arm64/include/asm/kvm_mmio.h | |||
@@ -40,6 +40,7 @@ struct kvm_exit_mmio { | |||
40 | u8 data[8]; | 40 | u8 data[8]; |
41 | u32 len; | 41 | u32 len; |
42 | bool is_write; | 42 | bool is_write; |
43 | void *private; | ||
43 | }; | 44 | }; |
44 | 45 | ||
45 | static inline void kvm_prepare_mmio(struct kvm_run *run, | 46 | static inline void kvm_prepare_mmio(struct kvm_run *run, |
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index adcf49547301..6458b5373142 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h | |||
@@ -118,6 +118,27 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) | |||
118 | pmd_val(*pmd) |= PMD_S2_RDWR; | 118 | pmd_val(*pmd) |= PMD_S2_RDWR; |
119 | } | 119 | } |
120 | 120 | ||
121 | static inline void kvm_set_s2pte_readonly(pte_t *pte) | ||
122 | { | ||
123 | pte_val(*pte) = (pte_val(*pte) & ~PTE_S2_RDWR) | PTE_S2_RDONLY; | ||
124 | } | ||
125 | |||
126 | static inline bool kvm_s2pte_readonly(pte_t *pte) | ||
127 | { | ||
128 | return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY; | ||
129 | } | ||
130 | |||
131 | static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) | ||
132 | { | ||
133 | pmd_val(*pmd) = (pmd_val(*pmd) & ~PMD_S2_RDWR) | PMD_S2_RDONLY; | ||
134 | } | ||
135 | |||
136 | static inline bool kvm_s2pmd_readonly(pmd_t *pmd) | ||
137 | { | ||
138 | return (pmd_val(*pmd) & PMD_S2_RDWR) == PMD_S2_RDONLY; | ||
139 | } | ||
140 | |||
141 | |||
121 | #define kvm_pgd_addr_end(addr, end) pgd_addr_end(addr, end) | 142 | #define kvm_pgd_addr_end(addr, end) pgd_addr_end(addr, end) |
122 | #define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end) | 143 | #define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end) |
123 | #define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end) | 144 | #define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end) |
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 88174e0bfafe..5f930cc9ea83 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h | |||
@@ -119,6 +119,7 @@ | |||
119 | #define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */ | 119 | #define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */ |
120 | #define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ | 120 | #define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ |
121 | 121 | ||
122 | #define PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[2:1] */ | ||
122 | #define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ | 123 | #define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ |
123 | 124 | ||
124 | /* | 125 | /* |
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 8e38878c87c6..3ef77a466018 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h | |||
@@ -78,6 +78,13 @@ struct kvm_regs { | |||
78 | #define KVM_VGIC_V2_DIST_SIZE 0x1000 | 78 | #define KVM_VGIC_V2_DIST_SIZE 0x1000 |
79 | #define KVM_VGIC_V2_CPU_SIZE 0x2000 | 79 | #define KVM_VGIC_V2_CPU_SIZE 0x2000 |
80 | 80 | ||
81 | /* Supported VGICv3 address types */ | ||
82 | #define KVM_VGIC_V3_ADDR_TYPE_DIST 2 | ||
83 | #define KVM_VGIC_V3_ADDR_TYPE_REDIST 3 | ||
84 | |||
85 | #define KVM_VGIC_V3_DIST_SIZE SZ_64K | ||
86 | #define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K) | ||
87 | |||
81 | #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ | 88 | #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ |
82 | #define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ | 89 | #define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ |
83 | #define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */ | 90 | #define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */ |
@@ -161,6 +168,8 @@ struct kvm_arch_memory_slot { | |||
161 | #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 | 168 | #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 |
162 | #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) | 169 | #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) |
163 | #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 | 170 | #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 |
171 | #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 | ||
172 | #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 | ||
164 | 173 | ||
165 | /* KVM_IRQ_LINE irq field index values */ | 174 | /* KVM_IRQ_LINE irq field index values */ |
166 | #define KVM_ARM_IRQ_TYPE_SHIFT 24 | 175 | #define KVM_ARM_IRQ_TYPE_SHIFT 24 |
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index a2ae19403abb..f7fa65d4c352 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c | |||
@@ -140,6 +140,7 @@ int main(void) | |||
140 | DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); | 140 | DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); |
141 | DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); | 141 | DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); |
142 | DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); | 142 | DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); |
143 | DEFINE(VGIC_V3_CPU_SRE, offsetof(struct vgic_cpu, vgic_v3.vgic_sre)); | ||
143 | DEFINE(VGIC_V3_CPU_HCR, offsetof(struct vgic_cpu, vgic_v3.vgic_hcr)); | 144 | DEFINE(VGIC_V3_CPU_HCR, offsetof(struct vgic_cpu, vgic_v3.vgic_hcr)); |
144 | DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr)); | 145 | DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr)); |
145 | DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr)); | 146 | DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr)); |
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index b334084d3675..f5590c81d95f 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig | |||
@@ -22,10 +22,12 @@ config KVM | |||
22 | select PREEMPT_NOTIFIERS | 22 | select PREEMPT_NOTIFIERS |
23 | select ANON_INODES | 23 | select ANON_INODES |
24 | select HAVE_KVM_CPU_RELAX_INTERCEPT | 24 | select HAVE_KVM_CPU_RELAX_INTERCEPT |
25 | select HAVE_KVM_ARCH_TLB_FLUSH_ALL | ||
25 | select KVM_MMIO | 26 | select KVM_MMIO |
26 | select KVM_ARM_HOST | 27 | select KVM_ARM_HOST |
27 | select KVM_ARM_VGIC | 28 | select KVM_ARM_VGIC |
28 | select KVM_ARM_TIMER | 29 | select KVM_ARM_TIMER |
30 | select KVM_GENERIC_DIRTYLOG_READ_PROTECT | ||
29 | select SRCU | 31 | select SRCU |
30 | ---help--- | 32 | ---help--- |
31 | Support hosting virtualized guest machines. | 33 | Support hosting virtualized guest machines. |
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 32a096174b94..4e6e09ee4033 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile | |||
@@ -21,7 +21,9 @@ kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o | |||
21 | 21 | ||
22 | kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o | 22 | kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o |
23 | kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o | 23 | kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o |
24 | kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o | ||
24 | kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o | 25 | kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o |
25 | kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o | 26 | kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o |
27 | kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3-emul.o | ||
26 | kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o | 28 | kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o |
27 | kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o | 29 | kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o |
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 29b184a8f3f8..524fa25671fc 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c | |||
@@ -28,12 +28,18 @@ | |||
28 | #include <asm/kvm_mmu.h> | 28 | #include <asm/kvm_mmu.h> |
29 | #include <asm/kvm_psci.h> | 29 | #include <asm/kvm_psci.h> |
30 | 30 | ||
31 | #define CREATE_TRACE_POINTS | ||
32 | #include "trace.h" | ||
33 | |||
31 | typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); | 34 | typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); |
32 | 35 | ||
33 | static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) | 36 | static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) |
34 | { | 37 | { |
35 | int ret; | 38 | int ret; |
36 | 39 | ||
40 | trace_kvm_hvc_arm64(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0), | ||
41 | kvm_vcpu_hvc_get_imm(vcpu)); | ||
42 | |||
37 | ret = kvm_psci_call(vcpu); | 43 | ret = kvm_psci_call(vcpu); |
38 | if (ret < 0) { | 44 | if (ret < 0) { |
39 | kvm_inject_undefined(vcpu); | 45 | kvm_inject_undefined(vcpu); |
@@ -63,10 +69,13 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
63 | */ | 69 | */ |
64 | static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) | 70 | static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) |
65 | { | 71 | { |
66 | if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) | 72 | if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) { |
73 | trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); | ||
67 | kvm_vcpu_on_spin(vcpu); | 74 | kvm_vcpu_on_spin(vcpu); |
68 | else | 75 | } else { |
76 | trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); | ||
69 | kvm_vcpu_block(vcpu); | 77 | kvm_vcpu_block(vcpu); |
78 | } | ||
70 | 79 | ||
71 | kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); | 80 | kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); |
72 | 81 | ||
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 9bff671cc561..5befd010e232 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S | |||
@@ -1032,6 +1032,28 @@ ENTRY(__kvm_tlb_flush_vmid_ipa) | |||
1032 | ret | 1032 | ret |
1033 | ENDPROC(__kvm_tlb_flush_vmid_ipa) | 1033 | ENDPROC(__kvm_tlb_flush_vmid_ipa) |
1034 | 1034 | ||
1035 | /** | ||
1036 | * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs | ||
1037 | * @struct kvm *kvm - pointer to kvm structure | ||
1038 | * | ||
1039 | * Invalidates all Stage 1 and 2 TLB entries for current VMID. | ||
1040 | */ | ||
1041 | ENTRY(__kvm_tlb_flush_vmid) | ||
1042 | dsb ishst | ||
1043 | |||
1044 | kern_hyp_va x0 | ||
1045 | ldr x2, [x0, #KVM_VTTBR] | ||
1046 | msr vttbr_el2, x2 | ||
1047 | isb | ||
1048 | |||
1049 | tlbi vmalls12e1is | ||
1050 | dsb ish | ||
1051 | isb | ||
1052 | |||
1053 | msr vttbr_el2, xzr | ||
1054 | ret | ||
1055 | ENDPROC(__kvm_tlb_flush_vmid) | ||
1056 | |||
1035 | ENTRY(__kvm_flush_vm_context) | 1057 | ENTRY(__kvm_flush_vm_context) |
1036 | dsb ishst | 1058 | dsb ishst |
1037 | tlbi alle1is | 1059 | tlbi alle1is |
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b96afdf6cee4..c370b4014799 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c | |||
@@ -113,6 +113,27 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, | |||
113 | return true; | 113 | return true; |
114 | } | 114 | } |
115 | 115 | ||
116 | /* | ||
117 | * Trap handler for the GICv3 SGI generation system register. | ||
118 | * Forward the request to the VGIC emulation. | ||
119 | * The cp15_64 code makes sure this automatically works | ||
120 | * for both AArch64 and AArch32 accesses. | ||
121 | */ | ||
122 | static bool access_gic_sgi(struct kvm_vcpu *vcpu, | ||
123 | const struct sys_reg_params *p, | ||
124 | const struct sys_reg_desc *r) | ||
125 | { | ||
126 | u64 val; | ||
127 | |||
128 | if (!p->is_write) | ||
129 | return read_from_write_only(vcpu, p); | ||
130 | |||
131 | val = *vcpu_reg(vcpu, p->Rt); | ||
132 | vgic_v3_dispatch_sgi(vcpu, val); | ||
133 | |||
134 | return true; | ||
135 | } | ||
136 | |||
116 | static bool trap_raz_wi(struct kvm_vcpu *vcpu, | 137 | static bool trap_raz_wi(struct kvm_vcpu *vcpu, |
117 | const struct sys_reg_params *p, | 138 | const struct sys_reg_params *p, |
118 | const struct sys_reg_desc *r) | 139 | const struct sys_reg_desc *r) |
@@ -200,10 +221,19 @@ static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) | |||
200 | 221 | ||
201 | static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) | 222 | static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) |
202 | { | 223 | { |
224 | u64 mpidr; | ||
225 | |||
203 | /* | 226 | /* |
204 | * Simply map the vcpu_id into the Aff0 field of the MPIDR. | 227 | * Map the vcpu_id into the first three affinity level fields of |
228 | * the MPIDR. We limit the number of VCPUs in level 0 due to a | ||
229 | * limitation to 16 CPUs in that level in the ICC_SGIxR registers | ||
230 | * of the GICv3 to be able to address each CPU directly when | ||
231 | * sending IPIs. | ||
205 | */ | 232 | */ |
206 | vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff); | 233 | mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0); |
234 | mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1); | ||
235 | mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2); | ||
236 | vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr; | ||
207 | } | 237 | } |
208 | 238 | ||
209 | /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ | 239 | /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ |
@@ -373,6 +403,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { | |||
373 | { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000), | 403 | { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000), |
374 | NULL, reset_val, VBAR_EL1, 0 }, | 404 | NULL, reset_val, VBAR_EL1, 0 }, |
375 | 405 | ||
406 | /* ICC_SGI1R_EL1 */ | ||
407 | { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1011), Op2(0b101), | ||
408 | access_gic_sgi }, | ||
376 | /* ICC_SRE_EL1 */ | 409 | /* ICC_SRE_EL1 */ |
377 | { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1100), Op2(0b101), | 410 | { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1100), Op2(0b101), |
378 | trap_raz_wi }, | 411 | trap_raz_wi }, |
@@ -605,6 +638,8 @@ static const struct sys_reg_desc cp14_64_regs[] = { | |||
605 | * register). | 638 | * register). |
606 | */ | 639 | */ |
607 | static const struct sys_reg_desc cp15_regs[] = { | 640 | static const struct sys_reg_desc cp15_regs[] = { |
641 | { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, | ||
642 | |||
608 | { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, | 643 | { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, |
609 | { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, | 644 | { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, |
610 | { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, | 645 | { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, |
@@ -652,6 +687,7 @@ static const struct sys_reg_desc cp15_regs[] = { | |||
652 | 687 | ||
653 | static const struct sys_reg_desc cp15_64_regs[] = { | 688 | static const struct sys_reg_desc cp15_64_regs[] = { |
654 | { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, | 689 | { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, |
690 | { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, | ||
655 | { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, | 691 | { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, |
656 | }; | 692 | }; |
657 | 693 | ||
diff --git a/arch/arm64/kvm/trace.h b/arch/arm64/kvm/trace.h new file mode 100644 index 000000000000..157416e963f2 --- /dev/null +++ b/arch/arm64/kvm/trace.h | |||
@@ -0,0 +1,55 @@ | |||
1 | #if !defined(_TRACE_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ) | ||
2 | #define _TRACE_ARM64_KVM_H | ||
3 | |||
4 | #include <linux/tracepoint.h> | ||
5 | |||
6 | #undef TRACE_SYSTEM | ||
7 | #define TRACE_SYSTEM kvm | ||
8 | |||
9 | TRACE_EVENT(kvm_wfx_arm64, | ||
10 | TP_PROTO(unsigned long vcpu_pc, bool is_wfe), | ||
11 | TP_ARGS(vcpu_pc, is_wfe), | ||
12 | |||
13 | TP_STRUCT__entry( | ||
14 | __field(unsigned long, vcpu_pc) | ||
15 | __field(bool, is_wfe) | ||
16 | ), | ||
17 | |||
18 | TP_fast_assign( | ||
19 | __entry->vcpu_pc = vcpu_pc; | ||
20 | __entry->is_wfe = is_wfe; | ||
21 | ), | ||
22 | |||
23 | TP_printk("guest executed wf%c at: 0x%08lx", | ||
24 | __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc) | ||
25 | ); | ||
26 | |||
27 | TRACE_EVENT(kvm_hvc_arm64, | ||
28 | TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm), | ||
29 | TP_ARGS(vcpu_pc, r0, imm), | ||
30 | |||
31 | TP_STRUCT__entry( | ||
32 | __field(unsigned long, vcpu_pc) | ||
33 | __field(unsigned long, r0) | ||
34 | __field(unsigned long, imm) | ||
35 | ), | ||
36 | |||
37 | TP_fast_assign( | ||
38 | __entry->vcpu_pc = vcpu_pc; | ||
39 | __entry->r0 = r0; | ||
40 | __entry->imm = imm; | ||
41 | ), | ||
42 | |||
43 | TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)", | ||
44 | __entry->vcpu_pc, __entry->r0, __entry->imm) | ||
45 | ); | ||
46 | |||
47 | #endif /* _TRACE_ARM64_KVM_H */ | ||
48 | |||
49 | #undef TRACE_INCLUDE_PATH | ||
50 | #define TRACE_INCLUDE_PATH . | ||
51 | #undef TRACE_INCLUDE_FILE | ||
52 | #define TRACE_INCLUDE_FILE trace | ||
53 | |||
54 | /* This part must be outside protection */ | ||
55 | #include <trace/define_trace.h> | ||
diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S index d16046999e06..617a012a0107 100644 --- a/arch/arm64/kvm/vgic-v3-switch.S +++ b/arch/arm64/kvm/vgic-v3-switch.S | |||
@@ -148,17 +148,18 @@ | |||
148 | * x0: Register pointing to VCPU struct | 148 | * x0: Register pointing to VCPU struct |
149 | */ | 149 | */ |
150 | .macro restore_vgic_v3_state | 150 | .macro restore_vgic_v3_state |
151 | // Disable SRE_EL1 access. Necessary, otherwise | ||
152 | // ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens... | ||
153 | msr_s ICC_SRE_EL1, xzr | ||
154 | isb | ||
155 | |||
156 | // Compute the address of struct vgic_cpu | 151 | // Compute the address of struct vgic_cpu |
157 | add x3, x0, #VCPU_VGIC_CPU | 152 | add x3, x0, #VCPU_VGIC_CPU |
158 | 153 | ||
159 | // Restore all interesting registers | 154 | // Restore all interesting registers |
160 | ldr w4, [x3, #VGIC_V3_CPU_HCR] | 155 | ldr w4, [x3, #VGIC_V3_CPU_HCR] |
161 | ldr w5, [x3, #VGIC_V3_CPU_VMCR] | 156 | ldr w5, [x3, #VGIC_V3_CPU_VMCR] |
157 | ldr w25, [x3, #VGIC_V3_CPU_SRE] | ||
158 | |||
159 | msr_s ICC_SRE_EL1, x25 | ||
160 | |||
161 | // make sure SRE is valid before writing the other registers | ||
162 | isb | ||
162 | 163 | ||
163 | msr_s ICH_HCR_EL2, x4 | 164 | msr_s ICH_HCR_EL2, x4 |
164 | msr_s ICH_VMCR_EL2, x5 | 165 | msr_s ICH_VMCR_EL2, x5 |
@@ -244,9 +245,12 @@ | |||
244 | dsb sy | 245 | dsb sy |
245 | 246 | ||
246 | // Prevent the guest from touching the GIC system registers | 247 | // Prevent the guest from touching the GIC system registers |
248 | // if SRE isn't enabled for GICv3 emulation | ||
249 | cbnz x25, 1f | ||
247 | mrs_s x5, ICC_SRE_EL2 | 250 | mrs_s x5, ICC_SRE_EL2 |
248 | and x5, x5, #~ICC_SRE_EL2_ENABLE | 251 | and x5, x5, #~ICC_SRE_EL2_ENABLE |
249 | msr_s ICC_SRE_EL2, x5 | 252 | msr_s ICC_SRE_EL2, x5 |
253 | 1: | ||
250 | .endm | 254 | .endm |
251 | 255 | ||
252 | ENTRY(__save_vgic_v3_state) | 256 | ENTRY(__save_vgic_v3_state) |
diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild index 1b3f5eb5fcdb..891002bbb995 100644 --- a/arch/ia64/include/uapi/asm/Kbuild +++ b/arch/ia64/include/uapi/asm/Kbuild | |||
@@ -18,7 +18,6 @@ header-y += intrinsics.h | |||
18 | header-y += ioctl.h | 18 | header-y += ioctl.h |
19 | header-y += ioctls.h | 19 | header-y += ioctls.h |
20 | header-y += ipcbuf.h | 20 | header-y += ipcbuf.h |
21 | header-y += kvm.h | ||
22 | header-y += kvm_para.h | 21 | header-y += kvm_para.h |
23 | header-y += mman.h | 22 | header-y += mman.h |
24 | header-y += msgbuf.h | 23 | header-y += msgbuf.h |
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index f2c249796ea8..ac4fc716062b 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h | |||
@@ -120,6 +120,7 @@ struct kvm_vcpu_stat { | |||
120 | u32 resvd_inst_exits; | 120 | u32 resvd_inst_exits; |
121 | u32 break_inst_exits; | 121 | u32 break_inst_exits; |
122 | u32 flush_dcache_exits; | 122 | u32 flush_dcache_exits; |
123 | u32 halt_successful_poll; | ||
123 | u32 halt_wakeup; | 124 | u32 halt_wakeup; |
124 | }; | 125 | }; |
125 | 126 | ||
diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S index d7279c03c517..4a68b176d6e4 100644 --- a/arch/mips/kvm/locore.S +++ b/arch/mips/kvm/locore.S | |||
@@ -434,7 +434,7 @@ __kvm_mips_return_to_guest: | |||
434 | /* Setup status register for running guest in UM */ | 434 | /* Setup status register for running guest in UM */ |
435 | .set at | 435 | .set at |
436 | or v1, v1, (ST0_EXL | KSU_USER | ST0_IE) | 436 | or v1, v1, (ST0_EXL | KSU_USER | ST0_IE) |
437 | and v1, v1, ~ST0_CU0 | 437 | and v1, v1, ~(ST0_CU0 | ST0_MX) |
438 | .set noat | 438 | .set noat |
439 | mtc0 v1, CP0_STATUS | 439 | mtc0 v1, CP0_STATUS |
440 | ehb | 440 | ehb |
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index e3b21e51ff7e..c9eccf5df912 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c | |||
@@ -15,9 +15,11 @@ | |||
15 | #include <linux/vmalloc.h> | 15 | #include <linux/vmalloc.h> |
16 | #include <linux/fs.h> | 16 | #include <linux/fs.h> |
17 | #include <linux/bootmem.h> | 17 | #include <linux/bootmem.h> |
18 | #include <asm/fpu.h> | ||
18 | #include <asm/page.h> | 19 | #include <asm/page.h> |
19 | #include <asm/cacheflush.h> | 20 | #include <asm/cacheflush.h> |
20 | #include <asm/mmu_context.h> | 21 | #include <asm/mmu_context.h> |
22 | #include <asm/pgtable.h> | ||
21 | 23 | ||
22 | #include <linux/kvm_host.h> | 24 | #include <linux/kvm_host.h> |
23 | 25 | ||
@@ -47,6 +49,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
47 | { "resvd_inst", VCPU_STAT(resvd_inst_exits), KVM_STAT_VCPU }, | 49 | { "resvd_inst", VCPU_STAT(resvd_inst_exits), KVM_STAT_VCPU }, |
48 | { "break_inst", VCPU_STAT(break_inst_exits), KVM_STAT_VCPU }, | 50 | { "break_inst", VCPU_STAT(break_inst_exits), KVM_STAT_VCPU }, |
49 | { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, | 51 | { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, |
52 | { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU }, | ||
50 | { "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU }, | 53 | { "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU }, |
51 | {NULL} | 54 | {NULL} |
52 | }; | 55 | }; |
@@ -378,6 +381,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
378 | vcpu->mmio_needed = 0; | 381 | vcpu->mmio_needed = 0; |
379 | } | 382 | } |
380 | 383 | ||
384 | lose_fpu(1); | ||
385 | |||
381 | local_irq_disable(); | 386 | local_irq_disable(); |
382 | /* Check if we have any exceptions/interrupts pending */ | 387 | /* Check if we have any exceptions/interrupts pending */ |
383 | kvm_mips_deliver_interrupts(vcpu, | 388 | kvm_mips_deliver_interrupts(vcpu, |
@@ -385,8 +390,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
385 | 390 | ||
386 | kvm_guest_enter(); | 391 | kvm_guest_enter(); |
387 | 392 | ||
393 | /* Disable hardware page table walking while in guest */ | ||
394 | htw_stop(); | ||
395 | |||
388 | r = __kvm_mips_vcpu_run(run, vcpu); | 396 | r = __kvm_mips_vcpu_run(run, vcpu); |
389 | 397 | ||
398 | /* Re-enable HTW before enabling interrupts */ | ||
399 | htw_start(); | ||
400 | |||
390 | kvm_guest_exit(); | 401 | kvm_guest_exit(); |
391 | local_irq_enable(); | 402 | local_irq_enable(); |
392 | 403 | ||
@@ -832,9 +843,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
832 | return -ENOIOCTLCMD; | 843 | return -ENOIOCTLCMD; |
833 | } | 844 | } |
834 | 845 | ||
835 | int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) | 846 | void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) |
836 | { | 847 | { |
837 | return 0; | ||
838 | } | 848 | } |
839 | 849 | ||
840 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 850 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
@@ -980,9 +990,6 @@ static void kvm_mips_set_c0_status(void) | |||
980 | { | 990 | { |
981 | uint32_t status = read_c0_status(); | 991 | uint32_t status = read_c0_status(); |
982 | 992 | ||
983 | if (cpu_has_fpu) | ||
984 | status |= (ST0_CU1); | ||
985 | |||
986 | if (cpu_has_dsp) | 993 | if (cpu_has_dsp) |
987 | status |= (ST0_MX); | 994 | status |= (ST0_MX); |
988 | 995 | ||
@@ -1002,6 +1009,9 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
1002 | enum emulation_result er = EMULATE_DONE; | 1009 | enum emulation_result er = EMULATE_DONE; |
1003 | int ret = RESUME_GUEST; | 1010 | int ret = RESUME_GUEST; |
1004 | 1011 | ||
1012 | /* re-enable HTW before enabling interrupts */ | ||
1013 | htw_start(); | ||
1014 | |||
1005 | /* Set a default exit reason */ | 1015 | /* Set a default exit reason */ |
1006 | run->exit_reason = KVM_EXIT_UNKNOWN; | 1016 | run->exit_reason = KVM_EXIT_UNKNOWN; |
1007 | run->ready_for_interrupt_injection = 1; | 1017 | run->ready_for_interrupt_injection = 1; |
@@ -1136,6 +1146,9 @@ skip_emul: | |||
1136 | } | 1146 | } |
1137 | } | 1147 | } |
1138 | 1148 | ||
1149 | /* Disable HTW before returning to guest or host */ | ||
1150 | htw_stop(); | ||
1151 | |||
1139 | return ret; | 1152 | return ret; |
1140 | } | 1153 | } |
1141 | 1154 | ||
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 7efd666a3fa7..8ef05121d3cd 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -107,6 +107,7 @@ struct kvm_vcpu_stat { | |||
107 | u32 emulated_inst_exits; | 107 | u32 emulated_inst_exits; |
108 | u32 dec_exits; | 108 | u32 dec_exits; |
109 | u32 ext_intr_exits; | 109 | u32 ext_intr_exits; |
110 | u32 halt_successful_poll; | ||
110 | u32 halt_wakeup; | 111 | u32 halt_wakeup; |
111 | u32 dbell_exits; | 112 | u32 dbell_exits; |
112 | u32 gdbell_exits; | 113 | u32 gdbell_exits; |
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 888bf466d8c6..cfbcdc654201 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
@@ -52,6 +52,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
52 | { "dec", VCPU_STAT(dec_exits) }, | 52 | { "dec", VCPU_STAT(dec_exits) }, |
53 | { "ext_intr", VCPU_STAT(ext_intr_exits) }, | 53 | { "ext_intr", VCPU_STAT(ext_intr_exits) }, |
54 | { "queue_intr", VCPU_STAT(queue_intr) }, | 54 | { "queue_intr", VCPU_STAT(queue_intr) }, |
55 | { "halt_successful_poll", VCPU_STAT(halt_successful_poll), }, | ||
55 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, | 56 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, |
56 | { "pf_storage", VCPU_STAT(pf_storage) }, | 57 | { "pf_storage", VCPU_STAT(pf_storage) }, |
57 | { "sp_storage", VCPU_STAT(sp_storage) }, | 58 | { "sp_storage", VCPU_STAT(sp_storage) }, |
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 9b55dec2d6cc..6c1316a15a27 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
@@ -62,6 +62,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
62 | { "inst_emu", VCPU_STAT(emulated_inst_exits) }, | 62 | { "inst_emu", VCPU_STAT(emulated_inst_exits) }, |
63 | { "dec", VCPU_STAT(dec_exits) }, | 63 | { "dec", VCPU_STAT(dec_exits) }, |
64 | { "ext_intr", VCPU_STAT(ext_intr_exits) }, | 64 | { "ext_intr", VCPU_STAT(ext_intr_exits) }, |
65 | { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, | ||
65 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, | 66 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, |
66 | { "doorbell", VCPU_STAT(dbell_exits) }, | 67 | { "doorbell", VCPU_STAT(dbell_exits) }, |
67 | { "guest doorbell", VCPU_STAT(gdbell_exits) }, | 68 | { "guest doorbell", VCPU_STAT(gdbell_exits) }, |
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index c45eaab752b0..27c0face86f4 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -623,9 +623,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) | |||
623 | return vcpu; | 623 | return vcpu; |
624 | } | 624 | } |
625 | 625 | ||
626 | int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) | 626 | void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) |
627 | { | 627 | { |
628 | return 0; | ||
629 | } | 628 | } |
630 | 629 | ||
631 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | 630 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) |
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 9cba74d5d853..d84559e31f32 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
@@ -35,11 +35,13 @@ | |||
35 | #define KVM_NR_IRQCHIPS 1 | 35 | #define KVM_NR_IRQCHIPS 1 |
36 | #define KVM_IRQCHIP_NUM_PINS 4096 | 36 | #define KVM_IRQCHIP_NUM_PINS 4096 |
37 | 37 | ||
38 | #define SIGP_CTRL_C 0x00800000 | 38 | #define SIGP_CTRL_C 0x80 |
39 | #define SIGP_CTRL_SCN_MASK 0x3f | ||
39 | 40 | ||
40 | struct sca_entry { | 41 | struct sca_entry { |
41 | atomic_t ctrl; | 42 | __u8 reserved0; |
42 | __u32 reserved; | 43 | __u8 sigp_ctrl; |
44 | __u16 reserved[3]; | ||
43 | __u64 sda; | 45 | __u64 sda; |
44 | __u64 reserved2[2]; | 46 | __u64 reserved2[2]; |
45 | } __attribute__((packed)); | 47 | } __attribute__((packed)); |
@@ -87,7 +89,8 @@ struct kvm_s390_sie_block { | |||
87 | atomic_t cpuflags; /* 0x0000 */ | 89 | atomic_t cpuflags; /* 0x0000 */ |
88 | __u32 : 1; /* 0x0004 */ | 90 | __u32 : 1; /* 0x0004 */ |
89 | __u32 prefix : 18; | 91 | __u32 prefix : 18; |
90 | __u32 : 13; | 92 | __u32 : 1; |
93 | __u32 ibc : 12; | ||
91 | __u8 reserved08[4]; /* 0x0008 */ | 94 | __u8 reserved08[4]; /* 0x0008 */ |
92 | #define PROG_IN_SIE (1<<0) | 95 | #define PROG_IN_SIE (1<<0) |
93 | __u32 prog0c; /* 0x000c */ | 96 | __u32 prog0c; /* 0x000c */ |
@@ -132,7 +135,9 @@ struct kvm_s390_sie_block { | |||
132 | __u8 reserved60; /* 0x0060 */ | 135 | __u8 reserved60; /* 0x0060 */ |
133 | __u8 ecb; /* 0x0061 */ | 136 | __u8 ecb; /* 0x0061 */ |
134 | __u8 ecb2; /* 0x0062 */ | 137 | __u8 ecb2; /* 0x0062 */ |
135 | __u8 reserved63[1]; /* 0x0063 */ | 138 | #define ECB3_AES 0x04 |
139 | #define ECB3_DEA 0x08 | ||
140 | __u8 ecb3; /* 0x0063 */ | ||
136 | __u32 scaol; /* 0x0064 */ | 141 | __u32 scaol; /* 0x0064 */ |
137 | __u8 reserved68[4]; /* 0x0068 */ | 142 | __u8 reserved68[4]; /* 0x0068 */ |
138 | __u32 todpr; /* 0x006c */ | 143 | __u32 todpr; /* 0x006c */ |
@@ -159,6 +164,7 @@ struct kvm_s390_sie_block { | |||
159 | __u64 tecmc; /* 0x00e8 */ | 164 | __u64 tecmc; /* 0x00e8 */ |
160 | __u8 reservedf0[12]; /* 0x00f0 */ | 165 | __u8 reservedf0[12]; /* 0x00f0 */ |
161 | #define CRYCB_FORMAT1 0x00000001 | 166 | #define CRYCB_FORMAT1 0x00000001 |
167 | #define CRYCB_FORMAT2 0x00000003 | ||
162 | __u32 crycbd; /* 0x00fc */ | 168 | __u32 crycbd; /* 0x00fc */ |
163 | __u64 gcr[16]; /* 0x0100 */ | 169 | __u64 gcr[16]; /* 0x0100 */ |
164 | __u64 gbea; /* 0x0180 */ | 170 | __u64 gbea; /* 0x0180 */ |
@@ -192,6 +198,7 @@ struct kvm_vcpu_stat { | |||
192 | u32 exit_stop_request; | 198 | u32 exit_stop_request; |
193 | u32 exit_validity; | 199 | u32 exit_validity; |
194 | u32 exit_instruction; | 200 | u32 exit_instruction; |
201 | u32 halt_successful_poll; | ||
195 | u32 halt_wakeup; | 202 | u32 halt_wakeup; |
196 | u32 instruction_lctl; | 203 | u32 instruction_lctl; |
197 | u32 instruction_lctlg; | 204 | u32 instruction_lctlg; |
@@ -378,14 +385,11 @@ struct kvm_s390_interrupt_info { | |||
378 | struct kvm_s390_emerg_info emerg; | 385 | struct kvm_s390_emerg_info emerg; |
379 | struct kvm_s390_extcall_info extcall; | 386 | struct kvm_s390_extcall_info extcall; |
380 | struct kvm_s390_prefix_info prefix; | 387 | struct kvm_s390_prefix_info prefix; |
388 | struct kvm_s390_stop_info stop; | ||
381 | struct kvm_s390_mchk_info mchk; | 389 | struct kvm_s390_mchk_info mchk; |
382 | }; | 390 | }; |
383 | }; | 391 | }; |
384 | 392 | ||
385 | /* for local_interrupt.action_flags */ | ||
386 | #define ACTION_STORE_ON_STOP (1<<0) | ||
387 | #define ACTION_STOP_ON_STOP (1<<1) | ||
388 | |||
389 | struct kvm_s390_irq_payload { | 393 | struct kvm_s390_irq_payload { |
390 | struct kvm_s390_io_info io; | 394 | struct kvm_s390_io_info io; |
391 | struct kvm_s390_ext_info ext; | 395 | struct kvm_s390_ext_info ext; |
@@ -393,6 +397,7 @@ struct kvm_s390_irq_payload { | |||
393 | struct kvm_s390_emerg_info emerg; | 397 | struct kvm_s390_emerg_info emerg; |
394 | struct kvm_s390_extcall_info extcall; | 398 | struct kvm_s390_extcall_info extcall; |
395 | struct kvm_s390_prefix_info prefix; | 399 | struct kvm_s390_prefix_info prefix; |
400 | struct kvm_s390_stop_info stop; | ||
396 | struct kvm_s390_mchk_info mchk; | 401 | struct kvm_s390_mchk_info mchk; |
397 | }; | 402 | }; |
398 | 403 | ||
@@ -401,7 +406,6 @@ struct kvm_s390_local_interrupt { | |||
401 | struct kvm_s390_float_interrupt *float_int; | 406 | struct kvm_s390_float_interrupt *float_int; |
402 | wait_queue_head_t *wq; | 407 | wait_queue_head_t *wq; |
403 | atomic_t *cpuflags; | 408 | atomic_t *cpuflags; |
404 | unsigned int action_bits; | ||
405 | DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS); | 409 | DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS); |
406 | struct kvm_s390_irq_payload irq; | 410 | struct kvm_s390_irq_payload irq; |
407 | unsigned long pending_irqs; | 411 | unsigned long pending_irqs; |
@@ -470,7 +474,6 @@ struct kvm_vcpu_arch { | |||
470 | }; | 474 | }; |
471 | struct gmap *gmap; | 475 | struct gmap *gmap; |
472 | struct kvm_guestdbg_info_arch guestdbg; | 476 | struct kvm_guestdbg_info_arch guestdbg; |
473 | #define KVM_S390_PFAULT_TOKEN_INVALID (-1UL) | ||
474 | unsigned long pfault_token; | 477 | unsigned long pfault_token; |
475 | unsigned long pfault_select; | 478 | unsigned long pfault_select; |
476 | unsigned long pfault_compare; | 479 | unsigned long pfault_compare; |
@@ -504,13 +507,39 @@ struct s390_io_adapter { | |||
504 | #define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8) | 507 | #define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8) |
505 | #define MAX_S390_ADAPTER_MAPS 256 | 508 | #define MAX_S390_ADAPTER_MAPS 256 |
506 | 509 | ||
510 | /* maximum size of facilities and facility mask is 2k bytes */ | ||
511 | #define S390_ARCH_FAC_LIST_SIZE_BYTE (1<<11) | ||
512 | #define S390_ARCH_FAC_LIST_SIZE_U64 \ | ||
513 | (S390_ARCH_FAC_LIST_SIZE_BYTE / sizeof(u64)) | ||
514 | #define S390_ARCH_FAC_MASK_SIZE_BYTE S390_ARCH_FAC_LIST_SIZE_BYTE | ||
515 | #define S390_ARCH_FAC_MASK_SIZE_U64 \ | ||
516 | (S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64)) | ||
517 | |||
518 | struct s390_model_fac { | ||
519 | /* facilities used in SIE context */ | ||
520 | __u64 sie[S390_ARCH_FAC_LIST_SIZE_U64]; | ||
521 | /* subset enabled by kvm */ | ||
522 | __u64 kvm[S390_ARCH_FAC_LIST_SIZE_U64]; | ||
523 | }; | ||
524 | |||
525 | struct kvm_s390_cpu_model { | ||
526 | struct s390_model_fac *fac; | ||
527 | struct cpuid cpu_id; | ||
528 | unsigned short ibc; | ||
529 | }; | ||
530 | |||
507 | struct kvm_s390_crypto { | 531 | struct kvm_s390_crypto { |
508 | struct kvm_s390_crypto_cb *crycb; | 532 | struct kvm_s390_crypto_cb *crycb; |
509 | __u32 crycbd; | 533 | __u32 crycbd; |
534 | __u8 aes_kw; | ||
535 | __u8 dea_kw; | ||
510 | }; | 536 | }; |
511 | 537 | ||
512 | struct kvm_s390_crypto_cb { | 538 | struct kvm_s390_crypto_cb { |
513 | __u8 reserved00[128]; /* 0x0000 */ | 539 | __u8 reserved00[72]; /* 0x0000 */ |
540 | __u8 dea_wrapping_key_mask[24]; /* 0x0048 */ | ||
541 | __u8 aes_wrapping_key_mask[32]; /* 0x0060 */ | ||
542 | __u8 reserved80[128]; /* 0x0080 */ | ||
514 | }; | 543 | }; |
515 | 544 | ||
516 | struct kvm_arch{ | 545 | struct kvm_arch{ |
@@ -523,12 +552,15 @@ struct kvm_arch{ | |||
523 | int use_irqchip; | 552 | int use_irqchip; |
524 | int use_cmma; | 553 | int use_cmma; |
525 | int user_cpu_state_ctrl; | 554 | int user_cpu_state_ctrl; |
555 | int user_sigp; | ||
526 | struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; | 556 | struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; |
527 | wait_queue_head_t ipte_wq; | 557 | wait_queue_head_t ipte_wq; |
528 | int ipte_lock_count; | 558 | int ipte_lock_count; |
529 | struct mutex ipte_mutex; | 559 | struct mutex ipte_mutex; |
530 | spinlock_t start_stop_lock; | 560 | spinlock_t start_stop_lock; |
561 | struct kvm_s390_cpu_model model; | ||
531 | struct kvm_s390_crypto crypto; | 562 | struct kvm_s390_crypto crypto; |
563 | u64 epoch; | ||
532 | }; | 564 | }; |
533 | 565 | ||
534 | #define KVM_HVA_ERR_BAD (-1UL) | 566 | #define KVM_HVA_ERR_BAD (-1UL) |
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index edb453cfc2c6..f1096bab5199 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h | |||
@@ -31,7 +31,8 @@ struct sclp_cpu_entry { | |||
31 | u8 reserved0[2]; | 31 | u8 reserved0[2]; |
32 | u8 : 3; | 32 | u8 : 3; |
33 | u8 siif : 1; | 33 | u8 siif : 1; |
34 | u8 : 4; | 34 | u8 sigpif : 1; |
35 | u8 : 3; | ||
35 | u8 reserved2[10]; | 36 | u8 reserved2[10]; |
36 | u8 type; | 37 | u8 type; |
37 | u8 reserved1; | 38 | u8 reserved1; |
@@ -69,6 +70,7 @@ int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode); | |||
69 | unsigned long sclp_get_hsa_size(void); | 70 | unsigned long sclp_get_hsa_size(void); |
70 | void sclp_early_detect(void); | 71 | void sclp_early_detect(void); |
71 | int sclp_has_siif(void); | 72 | int sclp_has_siif(void); |
73 | int sclp_has_sigpif(void); | ||
72 | unsigned int sclp_get_ibc(void); | 74 | unsigned int sclp_get_ibc(void); |
73 | 75 | ||
74 | long _sclp_print_early(const char *); | 76 | long _sclp_print_early(const char *); |
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index 73f12d21af4d..f7054a892d9e 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h | |||
@@ -15,6 +15,7 @@ | |||
15 | #define __ASM_S390_SYSINFO_H | 15 | #define __ASM_S390_SYSINFO_H |
16 | 16 | ||
17 | #include <asm/bitsperlong.h> | 17 | #include <asm/bitsperlong.h> |
18 | #include <linux/uuid.h> | ||
18 | 19 | ||
19 | struct sysinfo_1_1_1 { | 20 | struct sysinfo_1_1_1 { |
20 | unsigned char p:1; | 21 | unsigned char p:1; |
@@ -116,10 +117,13 @@ struct sysinfo_3_2_2 { | |||
116 | char name[8]; | 117 | char name[8]; |
117 | unsigned int caf; | 118 | unsigned int caf; |
118 | char cpi[16]; | 119 | char cpi[16]; |
119 | char reserved_1[24]; | 120 | char reserved_1[3]; |
120 | 121 | char ext_name_encoding; | |
122 | unsigned int reserved_2; | ||
123 | uuid_be uuid; | ||
121 | } vm[8]; | 124 | } vm[8]; |
122 | char reserved_544[3552]; | 125 | char reserved_3[1504]; |
126 | char ext_names[8][256]; | ||
123 | }; | 127 | }; |
124 | 128 | ||
125 | extern int topology_max_mnest; | 129 | extern int topology_max_mnest; |
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 48eda3ab4944..9c77e60b9a26 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h | |||
@@ -57,10 +57,44 @@ struct kvm_s390_io_adapter_req { | |||
57 | 57 | ||
58 | /* kvm attr_group on vm fd */ | 58 | /* kvm attr_group on vm fd */ |
59 | #define KVM_S390_VM_MEM_CTRL 0 | 59 | #define KVM_S390_VM_MEM_CTRL 0 |
60 | #define KVM_S390_VM_TOD 1 | ||
61 | #define KVM_S390_VM_CRYPTO 2 | ||
62 | #define KVM_S390_VM_CPU_MODEL 3 | ||
60 | 63 | ||
61 | /* kvm attributes for mem_ctrl */ | 64 | /* kvm attributes for mem_ctrl */ |
62 | #define KVM_S390_VM_MEM_ENABLE_CMMA 0 | 65 | #define KVM_S390_VM_MEM_ENABLE_CMMA 0 |
63 | #define KVM_S390_VM_MEM_CLR_CMMA 1 | 66 | #define KVM_S390_VM_MEM_CLR_CMMA 1 |
67 | #define KVM_S390_VM_MEM_LIMIT_SIZE 2 | ||
68 | |||
69 | /* kvm attributes for KVM_S390_VM_TOD */ | ||
70 | #define KVM_S390_VM_TOD_LOW 0 | ||
71 | #define KVM_S390_VM_TOD_HIGH 1 | ||
72 | |||
73 | /* kvm attributes for KVM_S390_VM_CPU_MODEL */ | ||
74 | /* processor related attributes are r/w */ | ||
75 | #define KVM_S390_VM_CPU_PROCESSOR 0 | ||
76 | struct kvm_s390_vm_cpu_processor { | ||
77 | __u64 cpuid; | ||
78 | __u16 ibc; | ||
79 | __u8 pad[6]; | ||
80 | __u64 fac_list[256]; | ||
81 | }; | ||
82 | |||
83 | /* machine related attributes are r/o */ | ||
84 | #define KVM_S390_VM_CPU_MACHINE 1 | ||
85 | struct kvm_s390_vm_cpu_machine { | ||
86 | __u64 cpuid; | ||
87 | __u32 ibc; | ||
88 | __u8 pad[4]; | ||
89 | __u64 fac_mask[256]; | ||
90 | __u64 fac_list[256]; | ||
91 | }; | ||
92 | |||
93 | /* kvm attributes for crypto */ | ||
94 | #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW 0 | ||
95 | #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1 | ||
96 | #define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2 | ||
97 | #define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3 | ||
64 | 98 | ||
65 | /* for KVM_GET_REGS and KVM_SET_REGS */ | 99 | /* for KVM_GET_REGS and KVM_SET_REGS */ |
66 | struct kvm_regs { | 100 | struct kvm_regs { |
@@ -107,6 +141,9 @@ struct kvm_guest_debug_arch { | |||
107 | struct kvm_hw_breakpoint __user *hw_bp; | 141 | struct kvm_hw_breakpoint __user *hw_bp; |
108 | }; | 142 | }; |
109 | 143 | ||
144 | /* for KVM_SYNC_PFAULT and KVM_REG_S390_PFTOKEN */ | ||
145 | #define KVM_S390_PFAULT_TOKEN_INVALID 0xffffffffffffffffULL | ||
146 | |||
110 | #define KVM_SYNC_PREFIX (1UL << 0) | 147 | #define KVM_SYNC_PREFIX (1UL << 0) |
111 | #define KVM_SYNC_GPRS (1UL << 1) | 148 | #define KVM_SYNC_GPRS (1UL << 1) |
112 | #define KVM_SYNC_ACRS (1UL << 2) | 149 | #define KVM_SYNC_ACRS (1UL << 2) |
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 85565f1ff474..99babea026ca 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c | |||
@@ -204,6 +204,33 @@ static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info) | |||
204 | } | 204 | } |
205 | } | 205 | } |
206 | 206 | ||
207 | static void print_ext_name(struct seq_file *m, int lvl, | ||
208 | struct sysinfo_3_2_2 *info) | ||
209 | { | ||
210 | if (info->vm[lvl].ext_name_encoding == 0) | ||
211 | return; | ||
212 | if (info->ext_names[lvl][0] == 0) | ||
213 | return; | ||
214 | switch (info->vm[lvl].ext_name_encoding) { | ||
215 | case 1: /* EBCDIC */ | ||
216 | EBCASC(info->ext_names[lvl], sizeof(info->ext_names[lvl])); | ||
217 | break; | ||
218 | case 2: /* UTF-8 */ | ||
219 | break; | ||
220 | default: | ||
221 | return; | ||
222 | } | ||
223 | seq_printf(m, "VM%02d Extended Name: %-.256s\n", lvl, | ||
224 | info->ext_names[lvl]); | ||
225 | } | ||
226 | |||
227 | static void print_uuid(struct seq_file *m, int i, struct sysinfo_3_2_2 *info) | ||
228 | { | ||
229 | if (!memcmp(&info->vm[i].uuid, &NULL_UUID_BE, sizeof(uuid_be))) | ||
230 | return; | ||
231 | seq_printf(m, "VM%02d UUID: %pUb\n", i, &info->vm[i].uuid); | ||
232 | } | ||
233 | |||
207 | static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info) | 234 | static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info) |
208 | { | 235 | { |
209 | int i; | 236 | int i; |
@@ -221,6 +248,8 @@ static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info) | |||
221 | seq_printf(m, "VM%02d CPUs Configured: %d\n", i, info->vm[i].cpus_configured); | 248 | seq_printf(m, "VM%02d CPUs Configured: %d\n", i, info->vm[i].cpus_configured); |
222 | seq_printf(m, "VM%02d CPUs Standby: %d\n", i, info->vm[i].cpus_standby); | 249 | seq_printf(m, "VM%02d CPUs Standby: %d\n", i, info->vm[i].cpus_standby); |
223 | seq_printf(m, "VM%02d CPUs Reserved: %d\n", i, info->vm[i].cpus_reserved); | 250 | seq_printf(m, "VM%02d CPUs Reserved: %d\n", i, info->vm[i].cpus_reserved); |
251 | print_ext_name(m, i, info); | ||
252 | print_uuid(m, i, info); | ||
224 | } | 253 | } |
225 | } | 254 | } |
226 | 255 | ||
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 8a1be9017730..267523cac6de 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c | |||
@@ -357,8 +357,8 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, | |||
357 | union asce asce; | 357 | union asce asce; |
358 | 358 | ||
359 | ctlreg0.val = vcpu->arch.sie_block->gcr[0]; | 359 | ctlreg0.val = vcpu->arch.sie_block->gcr[0]; |
360 | edat1 = ctlreg0.edat && test_vfacility(8); | 360 | edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8); |
361 | edat2 = edat1 && test_vfacility(78); | 361 | edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78); |
362 | asce.val = get_vcpu_asce(vcpu); | 362 | asce.val = get_vcpu_asce(vcpu); |
363 | if (asce.r) | 363 | if (asce.r) |
364 | goto real_address; | 364 | goto real_address; |
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 81c77ab8102e..bebd2157edd0 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c | |||
@@ -68,18 +68,27 @@ static int handle_noop(struct kvm_vcpu *vcpu) | |||
68 | 68 | ||
69 | static int handle_stop(struct kvm_vcpu *vcpu) | 69 | static int handle_stop(struct kvm_vcpu *vcpu) |
70 | { | 70 | { |
71 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | ||
71 | int rc = 0; | 72 | int rc = 0; |
72 | unsigned int action_bits; | 73 | uint8_t flags, stop_pending; |
73 | 74 | ||
74 | vcpu->stat.exit_stop_request++; | 75 | vcpu->stat.exit_stop_request++; |
75 | trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits); | ||
76 | 76 | ||
77 | action_bits = vcpu->arch.local_int.action_bits; | 77 | /* delay the stop if any non-stop irq is pending */ |
78 | if (kvm_s390_vcpu_has_irq(vcpu, 1)) | ||
79 | return 0; | ||
80 | |||
81 | /* avoid races with the injection/SIGP STOP code */ | ||
82 | spin_lock(&li->lock); | ||
83 | flags = li->irq.stop.flags; | ||
84 | stop_pending = kvm_s390_is_stop_irq_pending(vcpu); | ||
85 | spin_unlock(&li->lock); | ||
78 | 86 | ||
79 | if (!(action_bits & ACTION_STOP_ON_STOP)) | 87 | trace_kvm_s390_stop_request(stop_pending, flags); |
88 | if (!stop_pending) | ||
80 | return 0; | 89 | return 0; |
81 | 90 | ||
82 | if (action_bits & ACTION_STORE_ON_STOP) { | 91 | if (flags & KVM_S390_STOP_FLAG_STORE_STATUS) { |
83 | rc = kvm_s390_vcpu_store_status(vcpu, | 92 | rc = kvm_s390_vcpu_store_status(vcpu, |
84 | KVM_S390_STORE_STATUS_NOADDR); | 93 | KVM_S390_STORE_STATUS_NOADDR); |
85 | if (rc) | 94 | if (rc) |
@@ -279,11 +288,13 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu) | |||
279 | irq.type = KVM_S390_INT_CPU_TIMER; | 288 | irq.type = KVM_S390_INT_CPU_TIMER; |
280 | break; | 289 | break; |
281 | case EXT_IRQ_EXTERNAL_CALL: | 290 | case EXT_IRQ_EXTERNAL_CALL: |
282 | if (kvm_s390_si_ext_call_pending(vcpu)) | ||
283 | return 0; | ||
284 | irq.type = KVM_S390_INT_EXTERNAL_CALL; | 291 | irq.type = KVM_S390_INT_EXTERNAL_CALL; |
285 | irq.u.extcall.code = vcpu->arch.sie_block->extcpuaddr; | 292 | irq.u.extcall.code = vcpu->arch.sie_block->extcpuaddr; |
286 | break; | 293 | rc = kvm_s390_inject_vcpu(vcpu, &irq); |
294 | /* ignore if another external call is already pending */ | ||
295 | if (rc == -EBUSY) | ||
296 | return 0; | ||
297 | return rc; | ||
287 | default: | 298 | default: |
288 | return -EOPNOTSUPP; | 299 | return -EOPNOTSUPP; |
289 | } | 300 | } |
@@ -307,17 +318,19 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) | |||
307 | kvm_s390_get_regs_rre(vcpu, ®1, ®2); | 318 | kvm_s390_get_regs_rre(vcpu, ®1, ®2); |
308 | 319 | ||
309 | /* Make sure that the source is paged-in */ | 320 | /* Make sure that the source is paged-in */ |
310 | srcaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg2]); | 321 | rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2], |
311 | if (kvm_is_error_gpa(vcpu->kvm, srcaddr)) | 322 | &srcaddr, 0); |
312 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 323 | if (rc) |
324 | return kvm_s390_inject_prog_cond(vcpu, rc); | ||
313 | rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0); | 325 | rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0); |
314 | if (rc != 0) | 326 | if (rc != 0) |
315 | return rc; | 327 | return rc; |
316 | 328 | ||
317 | /* Make sure that the destination is paged-in */ | 329 | /* Make sure that the destination is paged-in */ |
318 | dstaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg1]); | 330 | rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1], |
319 | if (kvm_is_error_gpa(vcpu->kvm, dstaddr)) | 331 | &dstaddr, 1); |
320 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 332 | if (rc) |
333 | return kvm_s390_inject_prog_cond(vcpu, rc); | ||
321 | rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1); | 334 | rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1); |
322 | if (rc != 0) | 335 | if (rc != 0) |
323 | return rc; | 336 | return rc; |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index f00f31e66cd8..073b5f387d1d 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/bitmap.h> | 19 | #include <linux/bitmap.h> |
20 | #include <asm/asm-offsets.h> | 20 | #include <asm/asm-offsets.h> |
21 | #include <asm/uaccess.h> | 21 | #include <asm/uaccess.h> |
22 | #include <asm/sclp.h> | ||
22 | #include "kvm-s390.h" | 23 | #include "kvm-s390.h" |
23 | #include "gaccess.h" | 24 | #include "gaccess.h" |
24 | #include "trace-s390.h" | 25 | #include "trace-s390.h" |
@@ -159,6 +160,12 @@ static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu) | |||
159 | if (psw_mchk_disabled(vcpu)) | 160 | if (psw_mchk_disabled(vcpu)) |
160 | active_mask &= ~IRQ_PEND_MCHK_MASK; | 161 | active_mask &= ~IRQ_PEND_MCHK_MASK; |
161 | 162 | ||
163 | /* | ||
164 | * STOP irqs will never be actively delivered. They are triggered via | ||
165 | * intercept requests and cleared when the stop intercept is performed. | ||
166 | */ | ||
167 | __clear_bit(IRQ_PEND_SIGP_STOP, &active_mask); | ||
168 | |||
162 | return active_mask; | 169 | return active_mask; |
163 | } | 170 | } |
164 | 171 | ||
@@ -186,9 +193,6 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) | |||
186 | LCTL_CR10 | LCTL_CR11); | 193 | LCTL_CR10 | LCTL_CR11); |
187 | vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT); | 194 | vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT); |
188 | } | 195 | } |
189 | |||
190 | if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) | ||
191 | atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); | ||
192 | } | 196 | } |
193 | 197 | ||
194 | static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) | 198 | static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) |
@@ -216,11 +220,18 @@ static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu) | |||
216 | vcpu->arch.sie_block->lctl |= LCTL_CR14; | 220 | vcpu->arch.sie_block->lctl |= LCTL_CR14; |
217 | } | 221 | } |
218 | 222 | ||
223 | static void set_intercept_indicators_stop(struct kvm_vcpu *vcpu) | ||
224 | { | ||
225 | if (kvm_s390_is_stop_irq_pending(vcpu)) | ||
226 | __set_cpuflag(vcpu, CPUSTAT_STOP_INT); | ||
227 | } | ||
228 | |||
219 | /* Set interception request for non-deliverable local interrupts */ | 229 | /* Set interception request for non-deliverable local interrupts */ |
220 | static void set_intercept_indicators_local(struct kvm_vcpu *vcpu) | 230 | static void set_intercept_indicators_local(struct kvm_vcpu *vcpu) |
221 | { | 231 | { |
222 | set_intercept_indicators_ext(vcpu); | 232 | set_intercept_indicators_ext(vcpu); |
223 | set_intercept_indicators_mchk(vcpu); | 233 | set_intercept_indicators_mchk(vcpu); |
234 | set_intercept_indicators_stop(vcpu); | ||
224 | } | 235 | } |
225 | 236 | ||
226 | static void __set_intercept_indicator(struct kvm_vcpu *vcpu, | 237 | static void __set_intercept_indicator(struct kvm_vcpu *vcpu, |
@@ -392,18 +403,6 @@ static int __must_check __deliver_restart(struct kvm_vcpu *vcpu) | |||
392 | return rc ? -EFAULT : 0; | 403 | return rc ? -EFAULT : 0; |
393 | } | 404 | } |
394 | 405 | ||
395 | static int __must_check __deliver_stop(struct kvm_vcpu *vcpu) | ||
396 | { | ||
397 | VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); | ||
398 | vcpu->stat.deliver_stop_signal++; | ||
399 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_SIGP_STOP, | ||
400 | 0, 0); | ||
401 | |||
402 | __set_cpuflag(vcpu, CPUSTAT_STOP_INT); | ||
403 | clear_bit(IRQ_PEND_SIGP_STOP, &vcpu->arch.local_int.pending_irqs); | ||
404 | return 0; | ||
405 | } | ||
406 | |||
407 | static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu) | 406 | static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu) |
408 | { | 407 | { |
409 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 408 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
@@ -705,7 +704,6 @@ static const deliver_irq_t deliver_irq_funcs[] = { | |||
705 | [IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc, | 704 | [IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc, |
706 | [IRQ_PEND_EXT_CPU_TIMER] = __deliver_cpu_timer, | 705 | [IRQ_PEND_EXT_CPU_TIMER] = __deliver_cpu_timer, |
707 | [IRQ_PEND_RESTART] = __deliver_restart, | 706 | [IRQ_PEND_RESTART] = __deliver_restart, |
708 | [IRQ_PEND_SIGP_STOP] = __deliver_stop, | ||
709 | [IRQ_PEND_SET_PREFIX] = __deliver_set_prefix, | 707 | [IRQ_PEND_SET_PREFIX] = __deliver_set_prefix, |
710 | [IRQ_PEND_PFAULT_INIT] = __deliver_pfault_init, | 708 | [IRQ_PEND_PFAULT_INIT] = __deliver_pfault_init, |
711 | }; | 709 | }; |
@@ -738,21 +736,20 @@ static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu, | |||
738 | return rc; | 736 | return rc; |
739 | } | 737 | } |
740 | 738 | ||
741 | /* Check whether SIGP interpretation facility has an external call pending */ | 739 | /* Check whether an external call is pending (deliverable or not) */ |
742 | int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu) | 740 | int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) |
743 | { | 741 | { |
744 | atomic_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl; | 742 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
743 | uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; | ||
745 | 744 | ||
746 | if (!psw_extint_disabled(vcpu) && | 745 | if (!sclp_has_sigpif()) |
747 | (vcpu->arch.sie_block->gcr[0] & 0x2000ul) && | 746 | return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); |
748 | (atomic_read(sigp_ctrl) & SIGP_CTRL_C) && | ||
749 | (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND)) | ||
750 | return 1; | ||
751 | 747 | ||
752 | return 0; | 748 | return (sigp_ctrl & SIGP_CTRL_C) && |
749 | (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND); | ||
753 | } | 750 | } |
754 | 751 | ||
755 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) | 752 | int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop) |
756 | { | 753 | { |
757 | struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; | 754 | struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; |
758 | struct kvm_s390_interrupt_info *inti; | 755 | struct kvm_s390_interrupt_info *inti; |
@@ -773,7 +770,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) | |||
773 | if (!rc && kvm_cpu_has_pending_timer(vcpu)) | 770 | if (!rc && kvm_cpu_has_pending_timer(vcpu)) |
774 | rc = 1; | 771 | rc = 1; |
775 | 772 | ||
776 | if (!rc && kvm_s390_si_ext_call_pending(vcpu)) | 773 | /* external call pending and deliverable */ |
774 | if (!rc && kvm_s390_ext_call_pending(vcpu) && | ||
775 | !psw_extint_disabled(vcpu) && | ||
776 | (vcpu->arch.sie_block->gcr[0] & 0x2000ul)) | ||
777 | rc = 1; | ||
778 | |||
779 | if (!rc && !exclude_stop && kvm_s390_is_stop_irq_pending(vcpu)) | ||
777 | rc = 1; | 780 | rc = 1; |
778 | 781 | ||
779 | return rc; | 782 | return rc; |
@@ -804,14 +807,20 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) | |||
804 | return -EOPNOTSUPP; /* disabled wait */ | 807 | return -EOPNOTSUPP; /* disabled wait */ |
805 | } | 808 | } |
806 | 809 | ||
807 | __set_cpu_idle(vcpu); | ||
808 | if (!ckc_interrupts_enabled(vcpu)) { | 810 | if (!ckc_interrupts_enabled(vcpu)) { |
809 | VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); | 811 | VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); |
812 | __set_cpu_idle(vcpu); | ||
810 | goto no_timer; | 813 | goto no_timer; |
811 | } | 814 | } |
812 | 815 | ||
813 | now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; | 816 | now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; |
814 | sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); | 817 | sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); |
818 | |||
819 | /* underflow */ | ||
820 | if (vcpu->arch.sie_block->ckc < now) | ||
821 | return 0; | ||
822 | |||
823 | __set_cpu_idle(vcpu); | ||
815 | hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); | 824 | hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); |
816 | VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); | 825 | VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); |
817 | no_timer: | 826 | no_timer: |
@@ -820,7 +829,7 @@ no_timer: | |||
820 | __unset_cpu_idle(vcpu); | 829 | __unset_cpu_idle(vcpu); |
821 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 830 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
822 | 831 | ||
823 | hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); | 832 | hrtimer_cancel(&vcpu->arch.ckc_timer); |
824 | return 0; | 833 | return 0; |
825 | } | 834 | } |
826 | 835 | ||
@@ -840,10 +849,20 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) | |||
840 | enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) | 849 | enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) |
841 | { | 850 | { |
842 | struct kvm_vcpu *vcpu; | 851 | struct kvm_vcpu *vcpu; |
852 | u64 now, sltime; | ||
843 | 853 | ||
844 | vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); | 854 | vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); |
845 | kvm_s390_vcpu_wakeup(vcpu); | 855 | now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; |
856 | sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); | ||
846 | 857 | ||
858 | /* | ||
859 | * If the monotonic clock runs faster than the tod clock we might be | ||
860 | * woken up too early and have to go back to sleep to avoid deadlocks. | ||
861 | */ | ||
862 | if (vcpu->arch.sie_block->ckc > now && | ||
863 | hrtimer_forward_now(timer, ns_to_ktime(sltime))) | ||
864 | return HRTIMER_RESTART; | ||
865 | kvm_s390_vcpu_wakeup(vcpu); | ||
847 | return HRTIMER_NORESTART; | 866 | return HRTIMER_NORESTART; |
848 | } | 867 | } |
849 | 868 | ||
@@ -859,8 +878,7 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) | |||
859 | 878 | ||
860 | /* clear pending external calls set by sigp interpretation facility */ | 879 | /* clear pending external calls set by sigp interpretation facility */ |
861 | atomic_clear_mask(CPUSTAT_ECALL_PEND, li->cpuflags); | 880 | atomic_clear_mask(CPUSTAT_ECALL_PEND, li->cpuflags); |
862 | atomic_clear_mask(SIGP_CTRL_C, | 881 | vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl = 0; |
863 | &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl); | ||
864 | } | 882 | } |
865 | 883 | ||
866 | int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) | 884 | int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) |
@@ -984,18 +1002,43 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) | |||
984 | return 0; | 1002 | return 0; |
985 | } | 1003 | } |
986 | 1004 | ||
987 | int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) | 1005 | static int __inject_extcall_sigpif(struct kvm_vcpu *vcpu, uint16_t src_id) |
1006 | { | ||
1007 | unsigned char new_val, old_val; | ||
1008 | uint8_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; | ||
1009 | |||
1010 | new_val = SIGP_CTRL_C | (src_id & SIGP_CTRL_SCN_MASK); | ||
1011 | old_val = *sigp_ctrl & ~SIGP_CTRL_C; | ||
1012 | if (cmpxchg(sigp_ctrl, old_val, new_val) != old_val) { | ||
1013 | /* another external call is pending */ | ||
1014 | return -EBUSY; | ||
1015 | } | ||
1016 | atomic_set_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); | ||
1017 | return 0; | ||
1018 | } | ||
1019 | |||
1020 | static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) | ||
988 | { | 1021 | { |
989 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 1022 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
990 | struct kvm_s390_extcall_info *extcall = &li->irq.extcall; | 1023 | struct kvm_s390_extcall_info *extcall = &li->irq.extcall; |
1024 | uint16_t src_id = irq->u.extcall.code; | ||
991 | 1025 | ||
992 | VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u", | 1026 | VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u", |
993 | irq->u.extcall.code); | 1027 | src_id); |
994 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL, | 1028 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL, |
995 | irq->u.extcall.code, 0, 2); | 1029 | src_id, 0, 2); |
1030 | |||
1031 | /* sending vcpu invalid */ | ||
1032 | if (src_id >= KVM_MAX_VCPUS || | ||
1033 | kvm_get_vcpu(vcpu->kvm, src_id) == NULL) | ||
1034 | return -EINVAL; | ||
996 | 1035 | ||
1036 | if (sclp_has_sigpif()) | ||
1037 | return __inject_extcall_sigpif(vcpu, src_id); | ||
1038 | |||
1039 | if (!test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) | ||
1040 | return -EBUSY; | ||
997 | *extcall = irq->u.extcall; | 1041 | *extcall = irq->u.extcall; |
998 | set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); | ||
999 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); | 1042 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); |
1000 | return 0; | 1043 | return 0; |
1001 | } | 1044 | } |
@@ -1006,23 +1049,41 @@ static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) | |||
1006 | struct kvm_s390_prefix_info *prefix = &li->irq.prefix; | 1049 | struct kvm_s390_prefix_info *prefix = &li->irq.prefix; |
1007 | 1050 | ||
1008 | VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)", | 1051 | VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)", |
1009 | prefix->address); | 1052 | irq->u.prefix.address); |
1010 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX, | 1053 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX, |
1011 | prefix->address, 0, 2); | 1054 | irq->u.prefix.address, 0, 2); |
1055 | |||
1056 | if (!is_vcpu_stopped(vcpu)) | ||
1057 | return -EBUSY; | ||
1012 | 1058 | ||
1013 | *prefix = irq->u.prefix; | 1059 | *prefix = irq->u.prefix; |
1014 | set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs); | 1060 | set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs); |
1015 | return 0; | 1061 | return 0; |
1016 | } | 1062 | } |
1017 | 1063 | ||
1064 | #define KVM_S390_STOP_SUPP_FLAGS (KVM_S390_STOP_FLAG_STORE_STATUS) | ||
1018 | static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) | 1065 | static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) |
1019 | { | 1066 | { |
1020 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 1067 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
1068 | struct kvm_s390_stop_info *stop = &li->irq.stop; | ||
1069 | int rc = 0; | ||
1021 | 1070 | ||
1022 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2); | 1071 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2); |
1023 | 1072 | ||
1024 | li->action_bits |= ACTION_STOP_ON_STOP; | 1073 | if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS) |
1025 | set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs); | 1074 | return -EINVAL; |
1075 | |||
1076 | if (is_vcpu_stopped(vcpu)) { | ||
1077 | if (irq->u.stop.flags & KVM_S390_STOP_FLAG_STORE_STATUS) | ||
1078 | rc = kvm_s390_store_status_unloaded(vcpu, | ||
1079 | KVM_S390_STORE_STATUS_NOADDR); | ||
1080 | return rc; | ||
1081 | } | ||
1082 | |||
1083 | if (test_and_set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs)) | ||
1084 | return -EBUSY; | ||
1085 | stop->flags = irq->u.stop.flags; | ||
1086 | __set_cpuflag(vcpu, CPUSTAT_STOP_INT); | ||
1026 | return 0; | 1087 | return 0; |
1027 | } | 1088 | } |
1028 | 1089 | ||
@@ -1042,14 +1103,13 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu, | |||
1042 | struct kvm_s390_irq *irq) | 1103 | struct kvm_s390_irq *irq) |
1043 | { | 1104 | { |
1044 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 1105 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
1045 | struct kvm_s390_emerg_info *emerg = &li->irq.emerg; | ||
1046 | 1106 | ||
1047 | VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", | 1107 | VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", |
1048 | irq->u.emerg.code); | 1108 | irq->u.emerg.code); |
1049 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, | 1109 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, |
1050 | emerg->code, 0, 2); | 1110 | irq->u.emerg.code, 0, 2); |
1051 | 1111 | ||
1052 | set_bit(emerg->code, li->sigp_emerg_pending); | 1112 | set_bit(irq->u.emerg.code, li->sigp_emerg_pending); |
1053 | set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs); | 1113 | set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs); |
1054 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); | 1114 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); |
1055 | return 0; | 1115 | return 0; |
@@ -1061,9 +1121,9 @@ static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) | |||
1061 | struct kvm_s390_mchk_info *mchk = &li->irq.mchk; | 1121 | struct kvm_s390_mchk_info *mchk = &li->irq.mchk; |
1062 | 1122 | ||
1063 | VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx", | 1123 | VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx", |
1064 | mchk->mcic); | 1124 | irq->u.mchk.mcic); |
1065 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0, | 1125 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0, |
1066 | mchk->mcic, 2); | 1126 | irq->u.mchk.mcic, 2); |
1067 | 1127 | ||
1068 | /* | 1128 | /* |
1069 | * Because repressible machine checks can be indicated along with | 1129 | * Because repressible machine checks can be indicated along with |
@@ -1121,7 +1181,6 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, | |||
1121 | 1181 | ||
1122 | if ((!schid && !cr6) || (schid && cr6)) | 1182 | if ((!schid && !cr6) || (schid && cr6)) |
1123 | return NULL; | 1183 | return NULL; |
1124 | mutex_lock(&kvm->lock); | ||
1125 | fi = &kvm->arch.float_int; | 1184 | fi = &kvm->arch.float_int; |
1126 | spin_lock(&fi->lock); | 1185 | spin_lock(&fi->lock); |
1127 | inti = NULL; | 1186 | inti = NULL; |
@@ -1149,7 +1208,6 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, | |||
1149 | if (list_empty(&fi->list)) | 1208 | if (list_empty(&fi->list)) |
1150 | atomic_set(&fi->active, 0); | 1209 | atomic_set(&fi->active, 0); |
1151 | spin_unlock(&fi->lock); | 1210 | spin_unlock(&fi->lock); |
1152 | mutex_unlock(&kvm->lock); | ||
1153 | return inti; | 1211 | return inti; |
1154 | } | 1212 | } |
1155 | 1213 | ||
@@ -1162,7 +1220,6 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) | |||
1162 | int sigcpu; | 1220 | int sigcpu; |
1163 | int rc = 0; | 1221 | int rc = 0; |
1164 | 1222 | ||
1165 | mutex_lock(&kvm->lock); | ||
1166 | fi = &kvm->arch.float_int; | 1223 | fi = &kvm->arch.float_int; |
1167 | spin_lock(&fi->lock); | 1224 | spin_lock(&fi->lock); |
1168 | if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) { | 1225 | if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) { |
@@ -1187,6 +1244,8 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) | |||
1187 | list_add_tail(&inti->list, &iter->list); | 1244 | list_add_tail(&inti->list, &iter->list); |
1188 | } | 1245 | } |
1189 | atomic_set(&fi->active, 1); | 1246 | atomic_set(&fi->active, 1); |
1247 | if (atomic_read(&kvm->online_vcpus) == 0) | ||
1248 | goto unlock_fi; | ||
1190 | sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); | 1249 | sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); |
1191 | if (sigcpu == KVM_MAX_VCPUS) { | 1250 | if (sigcpu == KVM_MAX_VCPUS) { |
1192 | do { | 1251 | do { |
@@ -1213,7 +1272,6 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) | |||
1213 | kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu)); | 1272 | kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu)); |
1214 | unlock_fi: | 1273 | unlock_fi: |
1215 | spin_unlock(&fi->lock); | 1274 | spin_unlock(&fi->lock); |
1216 | mutex_unlock(&kvm->lock); | ||
1217 | return rc; | 1275 | return rc; |
1218 | } | 1276 | } |
1219 | 1277 | ||
@@ -1221,6 +1279,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, | |||
1221 | struct kvm_s390_interrupt *s390int) | 1279 | struct kvm_s390_interrupt *s390int) |
1222 | { | 1280 | { |
1223 | struct kvm_s390_interrupt_info *inti; | 1281 | struct kvm_s390_interrupt_info *inti; |
1282 | int rc; | ||
1224 | 1283 | ||
1225 | inti = kzalloc(sizeof(*inti), GFP_KERNEL); | 1284 | inti = kzalloc(sizeof(*inti), GFP_KERNEL); |
1226 | if (!inti) | 1285 | if (!inti) |
@@ -1239,7 +1298,6 @@ int kvm_s390_inject_vm(struct kvm *kvm, | |||
1239 | inti->ext.ext_params = s390int->parm; | 1298 | inti->ext.ext_params = s390int->parm; |
1240 | break; | 1299 | break; |
1241 | case KVM_S390_INT_PFAULT_DONE: | 1300 | case KVM_S390_INT_PFAULT_DONE: |
1242 | inti->type = s390int->type; | ||
1243 | inti->ext.ext_params2 = s390int->parm64; | 1301 | inti->ext.ext_params2 = s390int->parm64; |
1244 | break; | 1302 | break; |
1245 | case KVM_S390_MCHK: | 1303 | case KVM_S390_MCHK: |
@@ -1268,7 +1326,10 @@ int kvm_s390_inject_vm(struct kvm *kvm, | |||
1268 | trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64, | 1326 | trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64, |
1269 | 2); | 1327 | 2); |
1270 | 1328 | ||
1271 | return __inject_vm(kvm, inti); | 1329 | rc = __inject_vm(kvm, inti); |
1330 | if (rc) | ||
1331 | kfree(inti); | ||
1332 | return rc; | ||
1272 | } | 1333 | } |
1273 | 1334 | ||
1274 | void kvm_s390_reinject_io_int(struct kvm *kvm, | 1335 | void kvm_s390_reinject_io_int(struct kvm *kvm, |
@@ -1290,13 +1351,16 @@ int s390int_to_s390irq(struct kvm_s390_interrupt *s390int, | |||
1290 | case KVM_S390_SIGP_SET_PREFIX: | 1351 | case KVM_S390_SIGP_SET_PREFIX: |
1291 | irq->u.prefix.address = s390int->parm; | 1352 | irq->u.prefix.address = s390int->parm; |
1292 | break; | 1353 | break; |
1354 | case KVM_S390_SIGP_STOP: | ||
1355 | irq->u.stop.flags = s390int->parm; | ||
1356 | break; | ||
1293 | case KVM_S390_INT_EXTERNAL_CALL: | 1357 | case KVM_S390_INT_EXTERNAL_CALL: |
1294 | if (irq->u.extcall.code & 0xffff0000) | 1358 | if (s390int->parm & 0xffff0000) |
1295 | return -EINVAL; | 1359 | return -EINVAL; |
1296 | irq->u.extcall.code = s390int->parm; | 1360 | irq->u.extcall.code = s390int->parm; |
1297 | break; | 1361 | break; |
1298 | case KVM_S390_INT_EMERGENCY: | 1362 | case KVM_S390_INT_EMERGENCY: |
1299 | if (irq->u.emerg.code & 0xffff0000) | 1363 | if (s390int->parm & 0xffff0000) |
1300 | return -EINVAL; | 1364 | return -EINVAL; |
1301 | irq->u.emerg.code = s390int->parm; | 1365 | irq->u.emerg.code = s390int->parm; |
1302 | break; | 1366 | break; |
@@ -1307,6 +1371,23 @@ int s390int_to_s390irq(struct kvm_s390_interrupt *s390int, | |||
1307 | return 0; | 1371 | return 0; |
1308 | } | 1372 | } |
1309 | 1373 | ||
1374 | int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu) | ||
1375 | { | ||
1376 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | ||
1377 | |||
1378 | return test_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs); | ||
1379 | } | ||
1380 | |||
1381 | void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu) | ||
1382 | { | ||
1383 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | ||
1384 | |||
1385 | spin_lock(&li->lock); | ||
1386 | li->irq.stop.flags = 0; | ||
1387 | clear_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs); | ||
1388 | spin_unlock(&li->lock); | ||
1389 | } | ||
1390 | |||
1310 | int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) | 1391 | int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) |
1311 | { | 1392 | { |
1312 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 1393 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
@@ -1363,7 +1444,6 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm) | |||
1363 | struct kvm_s390_float_interrupt *fi; | 1444 | struct kvm_s390_float_interrupt *fi; |
1364 | struct kvm_s390_interrupt_info *n, *inti = NULL; | 1445 | struct kvm_s390_interrupt_info *n, *inti = NULL; |
1365 | 1446 | ||
1366 | mutex_lock(&kvm->lock); | ||
1367 | fi = &kvm->arch.float_int; | 1447 | fi = &kvm->arch.float_int; |
1368 | spin_lock(&fi->lock); | 1448 | spin_lock(&fi->lock); |
1369 | list_for_each_entry_safe(inti, n, &fi->list, list) { | 1449 | list_for_each_entry_safe(inti, n, &fi->list, list) { |
@@ -1373,7 +1453,6 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm) | |||
1373 | fi->irq_count = 0; | 1453 | fi->irq_count = 0; |
1374 | atomic_set(&fi->active, 0); | 1454 | atomic_set(&fi->active, 0); |
1375 | spin_unlock(&fi->lock); | 1455 | spin_unlock(&fi->lock); |
1376 | mutex_unlock(&kvm->lock); | ||
1377 | } | 1456 | } |
1378 | 1457 | ||
1379 | static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti, | 1458 | static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti, |
@@ -1413,7 +1492,6 @@ static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len) | |||
1413 | int ret = 0; | 1492 | int ret = 0; |
1414 | int n = 0; | 1493 | int n = 0; |
1415 | 1494 | ||
1416 | mutex_lock(&kvm->lock); | ||
1417 | fi = &kvm->arch.float_int; | 1495 | fi = &kvm->arch.float_int; |
1418 | spin_lock(&fi->lock); | 1496 | spin_lock(&fi->lock); |
1419 | 1497 | ||
@@ -1432,7 +1510,6 @@ static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len) | |||
1432 | } | 1510 | } |
1433 | 1511 | ||
1434 | spin_unlock(&fi->lock); | 1512 | spin_unlock(&fi->lock); |
1435 | mutex_unlock(&kvm->lock); | ||
1436 | 1513 | ||
1437 | return ret < 0 ? ret : n; | 1514 | return ret < 0 ? ret : n; |
1438 | } | 1515 | } |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 3e09801e3104..0c3623927563 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/kvm.h> | 22 | #include <linux/kvm.h> |
23 | #include <linux/kvm_host.h> | 23 | #include <linux/kvm_host.h> |
24 | #include <linux/module.h> | 24 | #include <linux/module.h> |
25 | #include <linux/random.h> | ||
25 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
26 | #include <linux/timer.h> | 27 | #include <linux/timer.h> |
27 | #include <asm/asm-offsets.h> | 28 | #include <asm/asm-offsets.h> |
@@ -29,7 +30,6 @@ | |||
29 | #include <asm/pgtable.h> | 30 | #include <asm/pgtable.h> |
30 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
31 | #include <asm/switch_to.h> | 32 | #include <asm/switch_to.h> |
32 | #include <asm/facility.h> | ||
33 | #include <asm/sclp.h> | 33 | #include <asm/sclp.h> |
34 | #include "kvm-s390.h" | 34 | #include "kvm-s390.h" |
35 | #include "gaccess.h" | 35 | #include "gaccess.h" |
@@ -50,6 +50,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
50 | { "exit_instruction", VCPU_STAT(exit_instruction) }, | 50 | { "exit_instruction", VCPU_STAT(exit_instruction) }, |
51 | { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, | 51 | { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, |
52 | { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, | 52 | { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, |
53 | { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, | ||
53 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, | 54 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, |
54 | { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, | 55 | { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, |
55 | { "instruction_lctl", VCPU_STAT(instruction_lctl) }, | 56 | { "instruction_lctl", VCPU_STAT(instruction_lctl) }, |
@@ -98,15 +99,20 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
98 | { NULL } | 99 | { NULL } |
99 | }; | 100 | }; |
100 | 101 | ||
101 | unsigned long *vfacilities; | 102 | /* upper facilities limit for kvm */ |
102 | static struct gmap_notifier gmap_notifier; | 103 | unsigned long kvm_s390_fac_list_mask[] = { |
104 | 0xff82fffbf4fc2000UL, | ||
105 | 0x005c000000000000UL, | ||
106 | }; | ||
103 | 107 | ||
104 | /* test availability of vfacility */ | 108 | unsigned long kvm_s390_fac_list_mask_size(void) |
105 | int test_vfacility(unsigned long nr) | ||
106 | { | 109 | { |
107 | return __test_facility(nr, (void *) vfacilities); | 110 | BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64); |
111 | return ARRAY_SIZE(kvm_s390_fac_list_mask); | ||
108 | } | 112 | } |
109 | 113 | ||
114 | static struct gmap_notifier gmap_notifier; | ||
115 | |||
110 | /* Section: not file related */ | 116 | /* Section: not file related */ |
111 | int kvm_arch_hardware_enable(void) | 117 | int kvm_arch_hardware_enable(void) |
112 | { | 118 | { |
@@ -166,6 +172,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
166 | case KVM_CAP_S390_IRQCHIP: | 172 | case KVM_CAP_S390_IRQCHIP: |
167 | case KVM_CAP_VM_ATTRIBUTES: | 173 | case KVM_CAP_VM_ATTRIBUTES: |
168 | case KVM_CAP_MP_STATE: | 174 | case KVM_CAP_MP_STATE: |
175 | case KVM_CAP_S390_USER_SIGP: | ||
169 | r = 1; | 176 | r = 1; |
170 | break; | 177 | break; |
171 | case KVM_CAP_NR_VCPUS: | 178 | case KVM_CAP_NR_VCPUS: |
@@ -254,6 +261,10 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) | |||
254 | kvm->arch.use_irqchip = 1; | 261 | kvm->arch.use_irqchip = 1; |
255 | r = 0; | 262 | r = 0; |
256 | break; | 263 | break; |
264 | case KVM_CAP_S390_USER_SIGP: | ||
265 | kvm->arch.user_sigp = 1; | ||
266 | r = 0; | ||
267 | break; | ||
257 | default: | 268 | default: |
258 | r = -EINVAL; | 269 | r = -EINVAL; |
259 | break; | 270 | break; |
@@ -261,7 +272,24 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) | |||
261 | return r; | 272 | return r; |
262 | } | 273 | } |
263 | 274 | ||
264 | static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) | 275 | static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) |
276 | { | ||
277 | int ret; | ||
278 | |||
279 | switch (attr->attr) { | ||
280 | case KVM_S390_VM_MEM_LIMIT_SIZE: | ||
281 | ret = 0; | ||
282 | if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr)) | ||
283 | ret = -EFAULT; | ||
284 | break; | ||
285 | default: | ||
286 | ret = -ENXIO; | ||
287 | break; | ||
288 | } | ||
289 | return ret; | ||
290 | } | ||
291 | |||
292 | static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) | ||
265 | { | 293 | { |
266 | int ret; | 294 | int ret; |
267 | unsigned int idx; | 295 | unsigned int idx; |
@@ -283,6 +311,36 @@ static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) | |||
283 | mutex_unlock(&kvm->lock); | 311 | mutex_unlock(&kvm->lock); |
284 | ret = 0; | 312 | ret = 0; |
285 | break; | 313 | break; |
314 | case KVM_S390_VM_MEM_LIMIT_SIZE: { | ||
315 | unsigned long new_limit; | ||
316 | |||
317 | if (kvm_is_ucontrol(kvm)) | ||
318 | return -EINVAL; | ||
319 | |||
320 | if (get_user(new_limit, (u64 __user *)attr->addr)) | ||
321 | return -EFAULT; | ||
322 | |||
323 | if (new_limit > kvm->arch.gmap->asce_end) | ||
324 | return -E2BIG; | ||
325 | |||
326 | ret = -EBUSY; | ||
327 | mutex_lock(&kvm->lock); | ||
328 | if (atomic_read(&kvm->online_vcpus) == 0) { | ||
329 | /* gmap_alloc will round the limit up */ | ||
330 | struct gmap *new = gmap_alloc(current->mm, new_limit); | ||
331 | |||
332 | if (!new) { | ||
333 | ret = -ENOMEM; | ||
334 | } else { | ||
335 | gmap_free(kvm->arch.gmap); | ||
336 | new->private = kvm; | ||
337 | kvm->arch.gmap = new; | ||
338 | ret = 0; | ||
339 | } | ||
340 | } | ||
341 | mutex_unlock(&kvm->lock); | ||
342 | break; | ||
343 | } | ||
286 | default: | 344 | default: |
287 | ret = -ENXIO; | 345 | ret = -ENXIO; |
288 | break; | 346 | break; |
@@ -290,13 +348,276 @@ static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) | |||
290 | return ret; | 348 | return ret; |
291 | } | 349 | } |
292 | 350 | ||
351 | static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); | ||
352 | |||
353 | static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) | ||
354 | { | ||
355 | struct kvm_vcpu *vcpu; | ||
356 | int i; | ||
357 | |||
358 | if (!test_kvm_facility(kvm, 76)) | ||
359 | return -EINVAL; | ||
360 | |||
361 | mutex_lock(&kvm->lock); | ||
362 | switch (attr->attr) { | ||
363 | case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: | ||
364 | get_random_bytes( | ||
365 | kvm->arch.crypto.crycb->aes_wrapping_key_mask, | ||
366 | sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); | ||
367 | kvm->arch.crypto.aes_kw = 1; | ||
368 | break; | ||
369 | case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: | ||
370 | get_random_bytes( | ||
371 | kvm->arch.crypto.crycb->dea_wrapping_key_mask, | ||
372 | sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); | ||
373 | kvm->arch.crypto.dea_kw = 1; | ||
374 | break; | ||
375 | case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: | ||
376 | kvm->arch.crypto.aes_kw = 0; | ||
377 | memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, | ||
378 | sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); | ||
379 | break; | ||
380 | case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: | ||
381 | kvm->arch.crypto.dea_kw = 0; | ||
382 | memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, | ||
383 | sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); | ||
384 | break; | ||
385 | default: | ||
386 | mutex_unlock(&kvm->lock); | ||
387 | return -ENXIO; | ||
388 | } | ||
389 | |||
390 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
391 | kvm_s390_vcpu_crypto_setup(vcpu); | ||
392 | exit_sie(vcpu); | ||
393 | } | ||
394 | mutex_unlock(&kvm->lock); | ||
395 | return 0; | ||
396 | } | ||
397 | |||
398 | static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) | ||
399 | { | ||
400 | u8 gtod_high; | ||
401 | |||
402 | if (copy_from_user(>od_high, (void __user *)attr->addr, | ||
403 | sizeof(gtod_high))) | ||
404 | return -EFAULT; | ||
405 | |||
406 | if (gtod_high != 0) | ||
407 | return -EINVAL; | ||
408 | |||
409 | return 0; | ||
410 | } | ||
411 | |||
412 | static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) | ||
413 | { | ||
414 | struct kvm_vcpu *cur_vcpu; | ||
415 | unsigned int vcpu_idx; | ||
416 | u64 host_tod, gtod; | ||
417 | int r; | ||
418 | |||
419 | if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) | ||
420 | return -EFAULT; | ||
421 | |||
422 | r = store_tod_clock(&host_tod); | ||
423 | if (r) | ||
424 | return r; | ||
425 | |||
426 | mutex_lock(&kvm->lock); | ||
427 | kvm->arch.epoch = gtod - host_tod; | ||
428 | kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) { | ||
429 | cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch; | ||
430 | exit_sie(cur_vcpu); | ||
431 | } | ||
432 | mutex_unlock(&kvm->lock); | ||
433 | return 0; | ||
434 | } | ||
435 | |||
436 | static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) | ||
437 | { | ||
438 | int ret; | ||
439 | |||
440 | if (attr->flags) | ||
441 | return -EINVAL; | ||
442 | |||
443 | switch (attr->attr) { | ||
444 | case KVM_S390_VM_TOD_HIGH: | ||
445 | ret = kvm_s390_set_tod_high(kvm, attr); | ||
446 | break; | ||
447 | case KVM_S390_VM_TOD_LOW: | ||
448 | ret = kvm_s390_set_tod_low(kvm, attr); | ||
449 | break; | ||
450 | default: | ||
451 | ret = -ENXIO; | ||
452 | break; | ||
453 | } | ||
454 | return ret; | ||
455 | } | ||
456 | |||
457 | static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) | ||
458 | { | ||
459 | u8 gtod_high = 0; | ||
460 | |||
461 | if (copy_to_user((void __user *)attr->addr, >od_high, | ||
462 | sizeof(gtod_high))) | ||
463 | return -EFAULT; | ||
464 | |||
465 | return 0; | ||
466 | } | ||
467 | |||
468 | static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) | ||
469 | { | ||
470 | u64 host_tod, gtod; | ||
471 | int r; | ||
472 | |||
473 | r = store_tod_clock(&host_tod); | ||
474 | if (r) | ||
475 | return r; | ||
476 | |||
477 | gtod = host_tod + kvm->arch.epoch; | ||
478 | if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) | ||
479 | return -EFAULT; | ||
480 | |||
481 | return 0; | ||
482 | } | ||
483 | |||
484 | static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) | ||
485 | { | ||
486 | int ret; | ||
487 | |||
488 | if (attr->flags) | ||
489 | return -EINVAL; | ||
490 | |||
491 | switch (attr->attr) { | ||
492 | case KVM_S390_VM_TOD_HIGH: | ||
493 | ret = kvm_s390_get_tod_high(kvm, attr); | ||
494 | break; | ||
495 | case KVM_S390_VM_TOD_LOW: | ||
496 | ret = kvm_s390_get_tod_low(kvm, attr); | ||
497 | break; | ||
498 | default: | ||
499 | ret = -ENXIO; | ||
500 | break; | ||
501 | } | ||
502 | return ret; | ||
503 | } | ||
504 | |||
505 | static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) | ||
506 | { | ||
507 | struct kvm_s390_vm_cpu_processor *proc; | ||
508 | int ret = 0; | ||
509 | |||
510 | mutex_lock(&kvm->lock); | ||
511 | if (atomic_read(&kvm->online_vcpus)) { | ||
512 | ret = -EBUSY; | ||
513 | goto out; | ||
514 | } | ||
515 | proc = kzalloc(sizeof(*proc), GFP_KERNEL); | ||
516 | if (!proc) { | ||
517 | ret = -ENOMEM; | ||
518 | goto out; | ||
519 | } | ||
520 | if (!copy_from_user(proc, (void __user *)attr->addr, | ||
521 | sizeof(*proc))) { | ||
522 | memcpy(&kvm->arch.model.cpu_id, &proc->cpuid, | ||
523 | sizeof(struct cpuid)); | ||
524 | kvm->arch.model.ibc = proc->ibc; | ||
525 | memcpy(kvm->arch.model.fac->kvm, proc->fac_list, | ||
526 | S390_ARCH_FAC_LIST_SIZE_BYTE); | ||
527 | } else | ||
528 | ret = -EFAULT; | ||
529 | kfree(proc); | ||
530 | out: | ||
531 | mutex_unlock(&kvm->lock); | ||
532 | return ret; | ||
533 | } | ||
534 | |||
535 | static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) | ||
536 | { | ||
537 | int ret = -ENXIO; | ||
538 | |||
539 | switch (attr->attr) { | ||
540 | case KVM_S390_VM_CPU_PROCESSOR: | ||
541 | ret = kvm_s390_set_processor(kvm, attr); | ||
542 | break; | ||
543 | } | ||
544 | return ret; | ||
545 | } | ||
546 | |||
547 | static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) | ||
548 | { | ||
549 | struct kvm_s390_vm_cpu_processor *proc; | ||
550 | int ret = 0; | ||
551 | |||
552 | proc = kzalloc(sizeof(*proc), GFP_KERNEL); | ||
553 | if (!proc) { | ||
554 | ret = -ENOMEM; | ||
555 | goto out; | ||
556 | } | ||
557 | memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid)); | ||
558 | proc->ibc = kvm->arch.model.ibc; | ||
559 | memcpy(&proc->fac_list, kvm->arch.model.fac->kvm, S390_ARCH_FAC_LIST_SIZE_BYTE); | ||
560 | if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) | ||
561 | ret = -EFAULT; | ||
562 | kfree(proc); | ||
563 | out: | ||
564 | return ret; | ||
565 | } | ||
566 | |||
567 | static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) | ||
568 | { | ||
569 | struct kvm_s390_vm_cpu_machine *mach; | ||
570 | int ret = 0; | ||
571 | |||
572 | mach = kzalloc(sizeof(*mach), GFP_KERNEL); | ||
573 | if (!mach) { | ||
574 | ret = -ENOMEM; | ||
575 | goto out; | ||
576 | } | ||
577 | get_cpu_id((struct cpuid *) &mach->cpuid); | ||
578 | mach->ibc = sclp_get_ibc(); | ||
579 | memcpy(&mach->fac_mask, kvm_s390_fac_list_mask, | ||
580 | kvm_s390_fac_list_mask_size() * sizeof(u64)); | ||
581 | memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, | ||
582 | S390_ARCH_FAC_LIST_SIZE_U64); | ||
583 | if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) | ||
584 | ret = -EFAULT; | ||
585 | kfree(mach); | ||
586 | out: | ||
587 | return ret; | ||
588 | } | ||
589 | |||
590 | static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) | ||
591 | { | ||
592 | int ret = -ENXIO; | ||
593 | |||
594 | switch (attr->attr) { | ||
595 | case KVM_S390_VM_CPU_PROCESSOR: | ||
596 | ret = kvm_s390_get_processor(kvm, attr); | ||
597 | break; | ||
598 | case KVM_S390_VM_CPU_MACHINE: | ||
599 | ret = kvm_s390_get_machine(kvm, attr); | ||
600 | break; | ||
601 | } | ||
602 | return ret; | ||
603 | } | ||
604 | |||
293 | static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) | 605 | static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) |
294 | { | 606 | { |
295 | int ret; | 607 | int ret; |
296 | 608 | ||
297 | switch (attr->group) { | 609 | switch (attr->group) { |
298 | case KVM_S390_VM_MEM_CTRL: | 610 | case KVM_S390_VM_MEM_CTRL: |
299 | ret = kvm_s390_mem_control(kvm, attr); | 611 | ret = kvm_s390_set_mem_control(kvm, attr); |
612 | break; | ||
613 | case KVM_S390_VM_TOD: | ||
614 | ret = kvm_s390_set_tod(kvm, attr); | ||
615 | break; | ||
616 | case KVM_S390_VM_CPU_MODEL: | ||
617 | ret = kvm_s390_set_cpu_model(kvm, attr); | ||
618 | break; | ||
619 | case KVM_S390_VM_CRYPTO: | ||
620 | ret = kvm_s390_vm_set_crypto(kvm, attr); | ||
300 | break; | 621 | break; |
301 | default: | 622 | default: |
302 | ret = -ENXIO; | 623 | ret = -ENXIO; |
@@ -308,7 +629,24 @@ static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) | |||
308 | 629 | ||
309 | static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) | 630 | static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) |
310 | { | 631 | { |
311 | return -ENXIO; | 632 | int ret; |
633 | |||
634 | switch (attr->group) { | ||
635 | case KVM_S390_VM_MEM_CTRL: | ||
636 | ret = kvm_s390_get_mem_control(kvm, attr); | ||
637 | break; | ||
638 | case KVM_S390_VM_TOD: | ||
639 | ret = kvm_s390_get_tod(kvm, attr); | ||
640 | break; | ||
641 | case KVM_S390_VM_CPU_MODEL: | ||
642 | ret = kvm_s390_get_cpu_model(kvm, attr); | ||
643 | break; | ||
644 | default: | ||
645 | ret = -ENXIO; | ||
646 | break; | ||
647 | } | ||
648 | |||
649 | return ret; | ||
312 | } | 650 | } |
313 | 651 | ||
314 | static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) | 652 | static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) |
@@ -320,6 +658,42 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) | |||
320 | switch (attr->attr) { | 658 | switch (attr->attr) { |
321 | case KVM_S390_VM_MEM_ENABLE_CMMA: | 659 | case KVM_S390_VM_MEM_ENABLE_CMMA: |
322 | case KVM_S390_VM_MEM_CLR_CMMA: | 660 | case KVM_S390_VM_MEM_CLR_CMMA: |
661 | case KVM_S390_VM_MEM_LIMIT_SIZE: | ||
662 | ret = 0; | ||
663 | break; | ||
664 | default: | ||
665 | ret = -ENXIO; | ||
666 | break; | ||
667 | } | ||
668 | break; | ||
669 | case KVM_S390_VM_TOD: | ||
670 | switch (attr->attr) { | ||
671 | case KVM_S390_VM_TOD_LOW: | ||
672 | case KVM_S390_VM_TOD_HIGH: | ||
673 | ret = 0; | ||
674 | break; | ||
675 | default: | ||
676 | ret = -ENXIO; | ||
677 | break; | ||
678 | } | ||
679 | break; | ||
680 | case KVM_S390_VM_CPU_MODEL: | ||
681 | switch (attr->attr) { | ||
682 | case KVM_S390_VM_CPU_PROCESSOR: | ||
683 | case KVM_S390_VM_CPU_MACHINE: | ||
684 | ret = 0; | ||
685 | break; | ||
686 | default: | ||
687 | ret = -ENXIO; | ||
688 | break; | ||
689 | } | ||
690 | break; | ||
691 | case KVM_S390_VM_CRYPTO: | ||
692 | switch (attr->attr) { | ||
693 | case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: | ||
694 | case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: | ||
695 | case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: | ||
696 | case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: | ||
323 | ret = 0; | 697 | ret = 0; |
324 | break; | 698 | break; |
325 | default: | 699 | default: |
@@ -401,9 +775,61 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
401 | return r; | 775 | return r; |
402 | } | 776 | } |
403 | 777 | ||
778 | static int kvm_s390_query_ap_config(u8 *config) | ||
779 | { | ||
780 | u32 fcn_code = 0x04000000UL; | ||
781 | u32 cc; | ||
782 | |||
783 | asm volatile( | ||
784 | "lgr 0,%1\n" | ||
785 | "lgr 2,%2\n" | ||
786 | ".long 0xb2af0000\n" /* PQAP(QCI) */ | ||
787 | "ipm %0\n" | ||
788 | "srl %0,28\n" | ||
789 | : "=r" (cc) | ||
790 | : "r" (fcn_code), "r" (config) | ||
791 | : "cc", "0", "2", "memory" | ||
792 | ); | ||
793 | |||
794 | return cc; | ||
795 | } | ||
796 | |||
797 | static int kvm_s390_apxa_installed(void) | ||
798 | { | ||
799 | u8 config[128]; | ||
800 | int cc; | ||
801 | |||
802 | if (test_facility(2) && test_facility(12)) { | ||
803 | cc = kvm_s390_query_ap_config(config); | ||
804 | |||
805 | if (cc) | ||
806 | pr_err("PQAP(QCI) failed with cc=%d", cc); | ||
807 | else | ||
808 | return config[0] & 0x40; | ||
809 | } | ||
810 | |||
811 | return 0; | ||
812 | } | ||
813 | |||
814 | static void kvm_s390_set_crycb_format(struct kvm *kvm) | ||
815 | { | ||
816 | kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; | ||
817 | |||
818 | if (kvm_s390_apxa_installed()) | ||
819 | kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; | ||
820 | else | ||
821 | kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; | ||
822 | } | ||
823 | |||
824 | static void kvm_s390_get_cpu_id(struct cpuid *cpu_id) | ||
825 | { | ||
826 | get_cpu_id(cpu_id); | ||
827 | cpu_id->version = 0xff; | ||
828 | } | ||
829 | |||
404 | static int kvm_s390_crypto_init(struct kvm *kvm) | 830 | static int kvm_s390_crypto_init(struct kvm *kvm) |
405 | { | 831 | { |
406 | if (!test_vfacility(76)) | 832 | if (!test_kvm_facility(kvm, 76)) |
407 | return 0; | 833 | return 0; |
408 | 834 | ||
409 | kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb), | 835 | kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb), |
@@ -411,15 +837,18 @@ static int kvm_s390_crypto_init(struct kvm *kvm) | |||
411 | if (!kvm->arch.crypto.crycb) | 837 | if (!kvm->arch.crypto.crycb) |
412 | return -ENOMEM; | 838 | return -ENOMEM; |
413 | 839 | ||
414 | kvm->arch.crypto.crycbd = (__u32) (unsigned long) kvm->arch.crypto.crycb | | 840 | kvm_s390_set_crycb_format(kvm); |
415 | CRYCB_FORMAT1; | 841 | |
842 | /* Disable AES/DEA protected key functions by default */ | ||
843 | kvm->arch.crypto.aes_kw = 0; | ||
844 | kvm->arch.crypto.dea_kw = 0; | ||
416 | 845 | ||
417 | return 0; | 846 | return 0; |
418 | } | 847 | } |
419 | 848 | ||
420 | int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | 849 | int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) |
421 | { | 850 | { |
422 | int rc; | 851 | int i, rc; |
423 | char debug_name[16]; | 852 | char debug_name[16]; |
424 | static unsigned long sca_offset; | 853 | static unsigned long sca_offset; |
425 | 854 | ||
@@ -454,6 +883,46 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
454 | if (!kvm->arch.dbf) | 883 | if (!kvm->arch.dbf) |
455 | goto out_nodbf; | 884 | goto out_nodbf; |
456 | 885 | ||
886 | /* | ||
887 | * The architectural maximum amount of facilities is 16 kbit. To store | ||
888 | * this amount, 2 kbyte of memory is required. Thus we need a full | ||
889 | * page to hold the active copy (arch.model.fac->sie) and the current | ||
890 | * facilities set (arch.model.fac->kvm). Its address size has to be | ||
891 | * 31 bits and word aligned. | ||
892 | */ | ||
893 | kvm->arch.model.fac = | ||
894 | (struct s390_model_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA); | ||
895 | if (!kvm->arch.model.fac) | ||
896 | goto out_nofac; | ||
897 | |||
898 | memcpy(kvm->arch.model.fac->kvm, S390_lowcore.stfle_fac_list, | ||
899 | S390_ARCH_FAC_LIST_SIZE_U64); | ||
900 | |||
901 | /* | ||
902 | * If this KVM host runs *not* in a LPAR, relax the facility bits | ||
903 | * of the kvm facility mask by all missing facilities. This will allow | ||
904 | * to determine the right CPU model by means of the remaining facilities. | ||
905 | * Live guest migration must prohibit the migration of KVMs running in | ||
906 | * a LPAR to non LPAR hosts. | ||
907 | */ | ||
908 | if (!MACHINE_IS_LPAR) | ||
909 | for (i = 0; i < kvm_s390_fac_list_mask_size(); i++) | ||
910 | kvm_s390_fac_list_mask[i] &= kvm->arch.model.fac->kvm[i]; | ||
911 | |||
912 | /* | ||
913 | * Apply the kvm facility mask to limit the kvm supported/tolerated | ||
914 | * facility list. | ||
915 | */ | ||
916 | for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) { | ||
917 | if (i < kvm_s390_fac_list_mask_size()) | ||
918 | kvm->arch.model.fac->kvm[i] &= kvm_s390_fac_list_mask[i]; | ||
919 | else | ||
920 | kvm->arch.model.fac->kvm[i] = 0UL; | ||
921 | } | ||
922 | |||
923 | kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id); | ||
924 | kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff; | ||
925 | |||
457 | if (kvm_s390_crypto_init(kvm) < 0) | 926 | if (kvm_s390_crypto_init(kvm) < 0) |
458 | goto out_crypto; | 927 | goto out_crypto; |
459 | 928 | ||
@@ -477,6 +946,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
477 | 946 | ||
478 | kvm->arch.css_support = 0; | 947 | kvm->arch.css_support = 0; |
479 | kvm->arch.use_irqchip = 0; | 948 | kvm->arch.use_irqchip = 0; |
949 | kvm->arch.epoch = 0; | ||
480 | 950 | ||
481 | spin_lock_init(&kvm->arch.start_stop_lock); | 951 | spin_lock_init(&kvm->arch.start_stop_lock); |
482 | 952 | ||
@@ -484,6 +954,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
484 | out_nogmap: | 954 | out_nogmap: |
485 | kfree(kvm->arch.crypto.crycb); | 955 | kfree(kvm->arch.crypto.crycb); |
486 | out_crypto: | 956 | out_crypto: |
957 | free_page((unsigned long)kvm->arch.model.fac); | ||
958 | out_nofac: | ||
487 | debug_unregister(kvm->arch.dbf); | 959 | debug_unregister(kvm->arch.dbf); |
488 | out_nodbf: | 960 | out_nodbf: |
489 | free_page((unsigned long)(kvm->arch.sca)); | 961 | free_page((unsigned long)(kvm->arch.sca)); |
@@ -536,6 +1008,7 @@ static void kvm_free_vcpus(struct kvm *kvm) | |||
536 | void kvm_arch_destroy_vm(struct kvm *kvm) | 1008 | void kvm_arch_destroy_vm(struct kvm *kvm) |
537 | { | 1009 | { |
538 | kvm_free_vcpus(kvm); | 1010 | kvm_free_vcpus(kvm); |
1011 | free_page((unsigned long)kvm->arch.model.fac); | ||
539 | free_page((unsigned long)(kvm->arch.sca)); | 1012 | free_page((unsigned long)(kvm->arch.sca)); |
540 | debug_unregister(kvm->arch.dbf); | 1013 | debug_unregister(kvm->arch.dbf); |
541 | kfree(kvm->arch.crypto.crycb); | 1014 | kfree(kvm->arch.crypto.crycb); |
@@ -546,25 +1019,30 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
546 | } | 1019 | } |
547 | 1020 | ||
548 | /* Section: vcpu related */ | 1021 | /* Section: vcpu related */ |
1022 | static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) | ||
1023 | { | ||
1024 | vcpu->arch.gmap = gmap_alloc(current->mm, -1UL); | ||
1025 | if (!vcpu->arch.gmap) | ||
1026 | return -ENOMEM; | ||
1027 | vcpu->arch.gmap->private = vcpu->kvm; | ||
1028 | |||
1029 | return 0; | ||
1030 | } | ||
1031 | |||
549 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | 1032 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) |
550 | { | 1033 | { |
551 | vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; | 1034 | vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; |
552 | kvm_clear_async_pf_completion_queue(vcpu); | 1035 | kvm_clear_async_pf_completion_queue(vcpu); |
553 | if (kvm_is_ucontrol(vcpu->kvm)) { | ||
554 | vcpu->arch.gmap = gmap_alloc(current->mm, -1UL); | ||
555 | if (!vcpu->arch.gmap) | ||
556 | return -ENOMEM; | ||
557 | vcpu->arch.gmap->private = vcpu->kvm; | ||
558 | return 0; | ||
559 | } | ||
560 | |||
561 | vcpu->arch.gmap = vcpu->kvm->arch.gmap; | ||
562 | vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | | 1036 | vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | |
563 | KVM_SYNC_GPRS | | 1037 | KVM_SYNC_GPRS | |
564 | KVM_SYNC_ACRS | | 1038 | KVM_SYNC_ACRS | |
565 | KVM_SYNC_CRS | | 1039 | KVM_SYNC_CRS | |
566 | KVM_SYNC_ARCH0 | | 1040 | KVM_SYNC_ARCH0 | |
567 | KVM_SYNC_PFAULT; | 1041 | KVM_SYNC_PFAULT; |
1042 | |||
1043 | if (kvm_is_ucontrol(vcpu->kvm)) | ||
1044 | return __kvm_ucontrol_vcpu_init(vcpu); | ||
1045 | |||
568 | return 0; | 1046 | return 0; |
569 | } | 1047 | } |
570 | 1048 | ||
@@ -615,16 +1093,27 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) | |||
615 | kvm_s390_clear_local_irqs(vcpu); | 1093 | kvm_s390_clear_local_irqs(vcpu); |
616 | } | 1094 | } |
617 | 1095 | ||
618 | int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) | 1096 | void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) |
619 | { | 1097 | { |
620 | return 0; | 1098 | mutex_lock(&vcpu->kvm->lock); |
1099 | vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; | ||
1100 | mutex_unlock(&vcpu->kvm->lock); | ||
1101 | if (!kvm_is_ucontrol(vcpu->kvm)) | ||
1102 | vcpu->arch.gmap = vcpu->kvm->arch.gmap; | ||
621 | } | 1103 | } |
622 | 1104 | ||
623 | static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) | 1105 | static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) |
624 | { | 1106 | { |
625 | if (!test_vfacility(76)) | 1107 | if (!test_kvm_facility(vcpu->kvm, 76)) |
626 | return; | 1108 | return; |
627 | 1109 | ||
1110 | vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); | ||
1111 | |||
1112 | if (vcpu->kvm->arch.crypto.aes_kw) | ||
1113 | vcpu->arch.sie_block->ecb3 |= ECB3_AES; | ||
1114 | if (vcpu->kvm->arch.crypto.dea_kw) | ||
1115 | vcpu->arch.sie_block->ecb3 |= ECB3_DEA; | ||
1116 | |||
628 | vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; | 1117 | vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; |
629 | } | 1118 | } |
630 | 1119 | ||
@@ -654,14 +1143,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
654 | CPUSTAT_STOPPED | | 1143 | CPUSTAT_STOPPED | |
655 | CPUSTAT_GED); | 1144 | CPUSTAT_GED); |
656 | vcpu->arch.sie_block->ecb = 6; | 1145 | vcpu->arch.sie_block->ecb = 6; |
657 | if (test_vfacility(50) && test_vfacility(73)) | 1146 | if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73)) |
658 | vcpu->arch.sie_block->ecb |= 0x10; | 1147 | vcpu->arch.sie_block->ecb |= 0x10; |
659 | 1148 | ||
660 | vcpu->arch.sie_block->ecb2 = 8; | 1149 | vcpu->arch.sie_block->ecb2 = 8; |
661 | vcpu->arch.sie_block->eca = 0xD1002000U; | 1150 | vcpu->arch.sie_block->eca = 0xC1002000U; |
662 | if (sclp_has_siif()) | 1151 | if (sclp_has_siif()) |
663 | vcpu->arch.sie_block->eca |= 1; | 1152 | vcpu->arch.sie_block->eca |= 1; |
664 | vcpu->arch.sie_block->fac = (int) (long) vfacilities; | 1153 | if (sclp_has_sigpif()) |
1154 | vcpu->arch.sie_block->eca |= 0x10000000U; | ||
665 | vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE | | 1155 | vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE | |
666 | ICTL_TPROT; | 1156 | ICTL_TPROT; |
667 | 1157 | ||
@@ -670,10 +1160,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
670 | if (rc) | 1160 | if (rc) |
671 | return rc; | 1161 | return rc; |
672 | } | 1162 | } |
673 | hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); | 1163 | hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
674 | vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; | 1164 | vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; |
675 | get_cpu_id(&vcpu->arch.cpu_id); | 1165 | |
676 | vcpu->arch.cpu_id.version = 0xff; | 1166 | mutex_lock(&vcpu->kvm->lock); |
1167 | vcpu->arch.cpu_id = vcpu->kvm->arch.model.cpu_id; | ||
1168 | memcpy(vcpu->kvm->arch.model.fac->sie, vcpu->kvm->arch.model.fac->kvm, | ||
1169 | S390_ARCH_FAC_LIST_SIZE_BYTE); | ||
1170 | vcpu->arch.sie_block->ibc = vcpu->kvm->arch.model.ibc; | ||
1171 | mutex_unlock(&vcpu->kvm->lock); | ||
677 | 1172 | ||
678 | kvm_s390_vcpu_crypto_setup(vcpu); | 1173 | kvm_s390_vcpu_crypto_setup(vcpu); |
679 | 1174 | ||
@@ -717,6 +1212,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, | |||
717 | vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; | 1212 | vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; |
718 | set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); | 1213 | set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); |
719 | } | 1214 | } |
1215 | vcpu->arch.sie_block->fac = (int) (long) kvm->arch.model.fac->sie; | ||
720 | 1216 | ||
721 | spin_lock_init(&vcpu->arch.local_int.lock); | 1217 | spin_lock_init(&vcpu->arch.local_int.lock); |
722 | vcpu->arch.local_int.float_int = &kvm->arch.float_int; | 1218 | vcpu->arch.local_int.float_int = &kvm->arch.float_int; |
@@ -741,7 +1237,7 @@ out: | |||
741 | 1237 | ||
742 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | 1238 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) |
743 | { | 1239 | { |
744 | return kvm_cpu_has_interrupt(vcpu); | 1240 | return kvm_s390_vcpu_has_irq(vcpu, 0); |
745 | } | 1241 | } |
746 | 1242 | ||
747 | void s390_vcpu_block(struct kvm_vcpu *vcpu) | 1243 | void s390_vcpu_block(struct kvm_vcpu *vcpu) |
@@ -869,6 +1365,8 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, | |||
869 | case KVM_REG_S390_PFTOKEN: | 1365 | case KVM_REG_S390_PFTOKEN: |
870 | r = get_user(vcpu->arch.pfault_token, | 1366 | r = get_user(vcpu->arch.pfault_token, |
871 | (u64 __user *)reg->addr); | 1367 | (u64 __user *)reg->addr); |
1368 | if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) | ||
1369 | kvm_clear_async_pf_completion_queue(vcpu); | ||
872 | break; | 1370 | break; |
873 | case KVM_REG_S390_PFCOMPARE: | 1371 | case KVM_REG_S390_PFCOMPARE: |
874 | r = get_user(vcpu->arch.pfault_compare, | 1372 | r = get_user(vcpu->arch.pfault_compare, |
@@ -1176,7 +1674,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) | |||
1176 | return 0; | 1674 | return 0; |
1177 | if (psw_extint_disabled(vcpu)) | 1675 | if (psw_extint_disabled(vcpu)) |
1178 | return 0; | 1676 | return 0; |
1179 | if (kvm_cpu_has_interrupt(vcpu)) | 1677 | if (kvm_s390_vcpu_has_irq(vcpu, 0)) |
1180 | return 0; | 1678 | return 0; |
1181 | if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul)) | 1679 | if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul)) |
1182 | return 0; | 1680 | return 0; |
@@ -1341,6 +1839,8 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1341 | vcpu->arch.pfault_token = kvm_run->s.regs.pft; | 1839 | vcpu->arch.pfault_token = kvm_run->s.regs.pft; |
1342 | vcpu->arch.pfault_select = kvm_run->s.regs.pfs; | 1840 | vcpu->arch.pfault_select = kvm_run->s.regs.pfs; |
1343 | vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; | 1841 | vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; |
1842 | if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) | ||
1843 | kvm_clear_async_pf_completion_queue(vcpu); | ||
1344 | } | 1844 | } |
1345 | kvm_run->kvm_dirty_regs = 0; | 1845 | kvm_run->kvm_dirty_regs = 0; |
1346 | } | 1846 | } |
@@ -1559,15 +2059,10 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) | |||
1559 | spin_lock(&vcpu->kvm->arch.start_stop_lock); | 2059 | spin_lock(&vcpu->kvm->arch.start_stop_lock); |
1560 | online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); | 2060 | online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); |
1561 | 2061 | ||
1562 | /* Need to lock access to action_bits to avoid a SIGP race condition */ | ||
1563 | spin_lock(&vcpu->arch.local_int.lock); | ||
1564 | atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); | ||
1565 | |||
1566 | /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ | 2062 | /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ |
1567 | vcpu->arch.local_int.action_bits &= | 2063 | kvm_s390_clear_stop_irq(vcpu); |
1568 | ~(ACTION_STOP_ON_STOP | ACTION_STORE_ON_STOP); | ||
1569 | spin_unlock(&vcpu->arch.local_int.lock); | ||
1570 | 2064 | ||
2065 | atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); | ||
1571 | __disable_ibs_on_vcpu(vcpu); | 2066 | __disable_ibs_on_vcpu(vcpu); |
1572 | 2067 | ||
1573 | for (i = 0; i < online_vcpus; i++) { | 2068 | for (i = 0; i < online_vcpus; i++) { |
@@ -1783,30 +2278,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
1783 | 2278 | ||
1784 | static int __init kvm_s390_init(void) | 2279 | static int __init kvm_s390_init(void) |
1785 | { | 2280 | { |
1786 | int ret; | 2281 | return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); |
1787 | ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); | ||
1788 | if (ret) | ||
1789 | return ret; | ||
1790 | |||
1791 | /* | ||
1792 | * guests can ask for up to 255+1 double words, we need a full page | ||
1793 | * to hold the maximum amount of facilities. On the other hand, we | ||
1794 | * only set facilities that are known to work in KVM. | ||
1795 | */ | ||
1796 | vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); | ||
1797 | if (!vfacilities) { | ||
1798 | kvm_exit(); | ||
1799 | return -ENOMEM; | ||
1800 | } | ||
1801 | memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16); | ||
1802 | vfacilities[0] &= 0xff82fffbf47c2000UL; | ||
1803 | vfacilities[1] &= 0x005c000000000000UL; | ||
1804 | return 0; | ||
1805 | } | 2282 | } |
1806 | 2283 | ||
1807 | static void __exit kvm_s390_exit(void) | 2284 | static void __exit kvm_s390_exit(void) |
1808 | { | 2285 | { |
1809 | free_page((unsigned long) vfacilities); | ||
1810 | kvm_exit(); | 2286 | kvm_exit(); |
1811 | } | 2287 | } |
1812 | 2288 | ||
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index a8f3d9b71c11..985c2114d7ef 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
@@ -18,12 +18,10 @@ | |||
18 | #include <linux/hrtimer.h> | 18 | #include <linux/hrtimer.h> |
19 | #include <linux/kvm.h> | 19 | #include <linux/kvm.h> |
20 | #include <linux/kvm_host.h> | 20 | #include <linux/kvm_host.h> |
21 | #include <asm/facility.h> | ||
21 | 22 | ||
22 | typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); | 23 | typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); |
23 | 24 | ||
24 | /* declare vfacilities extern */ | ||
25 | extern unsigned long *vfacilities; | ||
26 | |||
27 | /* Transactional Memory Execution related macros */ | 25 | /* Transactional Memory Execution related macros */ |
28 | #define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10)) | 26 | #define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10)) |
29 | #define TDB_FORMAT1 1 | 27 | #define TDB_FORMAT1 1 |
@@ -127,6 +125,12 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) | |||
127 | vcpu->arch.sie_block->gpsw.mask |= cc << 44; | 125 | vcpu->arch.sie_block->gpsw.mask |= cc << 44; |
128 | } | 126 | } |
129 | 127 | ||
128 | /* test availability of facility in a kvm intance */ | ||
129 | static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr) | ||
130 | { | ||
131 | return __test_facility(nr, kvm->arch.model.fac->kvm); | ||
132 | } | ||
133 | |||
130 | /* are cpu states controlled by user space */ | 134 | /* are cpu states controlled by user space */ |
131 | static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) | 135 | static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) |
132 | { | 136 | { |
@@ -183,7 +187,8 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu); | |||
183 | void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); | 187 | void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); |
184 | /* is cmma enabled */ | 188 | /* is cmma enabled */ |
185 | bool kvm_s390_cmma_enabled(struct kvm *kvm); | 189 | bool kvm_s390_cmma_enabled(struct kvm *kvm); |
186 | int test_vfacility(unsigned long nr); | 190 | unsigned long kvm_s390_fac_list_mask_size(void); |
191 | extern unsigned long kvm_s390_fac_list_mask[]; | ||
187 | 192 | ||
188 | /* implemented in diag.c */ | 193 | /* implemented in diag.c */ |
189 | int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); | 194 | int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); |
@@ -228,11 +233,13 @@ int s390int_to_s390irq(struct kvm_s390_interrupt *s390int, | |||
228 | struct kvm_s390_irq *s390irq); | 233 | struct kvm_s390_irq *s390irq); |
229 | 234 | ||
230 | /* implemented in interrupt.c */ | 235 | /* implemented in interrupt.c */ |
231 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); | 236 | int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop); |
232 | int psw_extint_disabled(struct kvm_vcpu *vcpu); | 237 | int psw_extint_disabled(struct kvm_vcpu *vcpu); |
233 | void kvm_s390_destroy_adapters(struct kvm *kvm); | 238 | void kvm_s390_destroy_adapters(struct kvm *kvm); |
234 | int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu); | 239 | int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu); |
235 | extern struct kvm_device_ops kvm_flic_ops; | 240 | extern struct kvm_device_ops kvm_flic_ops; |
241 | int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu); | ||
242 | void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu); | ||
236 | 243 | ||
237 | /* implemented in guestdbg.c */ | 244 | /* implemented in guestdbg.c */ |
238 | void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu); | 245 | void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu); |
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 1be578d64dfc..bdd9b5b17e03 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c | |||
@@ -337,19 +337,24 @@ static int handle_io_inst(struct kvm_vcpu *vcpu) | |||
337 | static int handle_stfl(struct kvm_vcpu *vcpu) | 337 | static int handle_stfl(struct kvm_vcpu *vcpu) |
338 | { | 338 | { |
339 | int rc; | 339 | int rc; |
340 | unsigned int fac; | ||
340 | 341 | ||
341 | vcpu->stat.instruction_stfl++; | 342 | vcpu->stat.instruction_stfl++; |
342 | 343 | ||
343 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | 344 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) |
344 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | 345 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); |
345 | 346 | ||
347 | /* | ||
348 | * We need to shift the lower 32 facility bits (bit 0-31) from a u64 | ||
349 | * into a u32 memory representation. They will remain bits 0-31. | ||
350 | */ | ||
351 | fac = *vcpu->kvm->arch.model.fac->sie >> 32; | ||
346 | rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list), | 352 | rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list), |
347 | vfacilities, 4); | 353 | &fac, sizeof(fac)); |
348 | if (rc) | 354 | if (rc) |
349 | return rc; | 355 | return rc; |
350 | VCPU_EVENT(vcpu, 5, "store facility list value %x", | 356 | VCPU_EVENT(vcpu, 5, "store facility list value %x", fac); |
351 | *(unsigned int *) vfacilities); | 357 | trace_kvm_s390_handle_stfl(vcpu, fac); |
352 | trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities); | ||
353 | return 0; | 358 | return 0; |
354 | } | 359 | } |
355 | 360 | ||
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 6651f9f73973..23b1e86b2122 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c | |||
@@ -26,15 +26,17 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, | |||
26 | struct kvm_s390_local_interrupt *li; | 26 | struct kvm_s390_local_interrupt *li; |
27 | int cpuflags; | 27 | int cpuflags; |
28 | int rc; | 28 | int rc; |
29 | int ext_call_pending; | ||
29 | 30 | ||
30 | li = &dst_vcpu->arch.local_int; | 31 | li = &dst_vcpu->arch.local_int; |
31 | 32 | ||
32 | cpuflags = atomic_read(li->cpuflags); | 33 | cpuflags = atomic_read(li->cpuflags); |
33 | if (!(cpuflags & (CPUSTAT_ECALL_PEND | CPUSTAT_STOPPED))) | 34 | ext_call_pending = kvm_s390_ext_call_pending(dst_vcpu); |
35 | if (!(cpuflags & CPUSTAT_STOPPED) && !ext_call_pending) | ||
34 | rc = SIGP_CC_ORDER_CODE_ACCEPTED; | 36 | rc = SIGP_CC_ORDER_CODE_ACCEPTED; |
35 | else { | 37 | else { |
36 | *reg &= 0xffffffff00000000UL; | 38 | *reg &= 0xffffffff00000000UL; |
37 | if (cpuflags & CPUSTAT_ECALL_PEND) | 39 | if (ext_call_pending) |
38 | *reg |= SIGP_STATUS_EXT_CALL_PENDING; | 40 | *reg |= SIGP_STATUS_EXT_CALL_PENDING; |
39 | if (cpuflags & CPUSTAT_STOPPED) | 41 | if (cpuflags & CPUSTAT_STOPPED) |
40 | *reg |= SIGP_STATUS_STOPPED; | 42 | *reg |= SIGP_STATUS_STOPPED; |
@@ -96,7 +98,7 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, | |||
96 | } | 98 | } |
97 | 99 | ||
98 | static int __sigp_external_call(struct kvm_vcpu *vcpu, | 100 | static int __sigp_external_call(struct kvm_vcpu *vcpu, |
99 | struct kvm_vcpu *dst_vcpu) | 101 | struct kvm_vcpu *dst_vcpu, u64 *reg) |
100 | { | 102 | { |
101 | struct kvm_s390_irq irq = { | 103 | struct kvm_s390_irq irq = { |
102 | .type = KVM_S390_INT_EXTERNAL_CALL, | 104 | .type = KVM_S390_INT_EXTERNAL_CALL, |
@@ -105,45 +107,31 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, | |||
105 | int rc; | 107 | int rc; |
106 | 108 | ||
107 | rc = kvm_s390_inject_vcpu(dst_vcpu, &irq); | 109 | rc = kvm_s390_inject_vcpu(dst_vcpu, &irq); |
108 | if (!rc) | 110 | if (rc == -EBUSY) { |
111 | *reg &= 0xffffffff00000000UL; | ||
112 | *reg |= SIGP_STATUS_EXT_CALL_PENDING; | ||
113 | return SIGP_CC_STATUS_STORED; | ||
114 | } else if (rc == 0) { | ||
109 | VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", | 115 | VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", |
110 | dst_vcpu->vcpu_id); | 116 | dst_vcpu->vcpu_id); |
111 | |||
112 | return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED; | ||
113 | } | ||
114 | |||
115 | static int __inject_sigp_stop(struct kvm_vcpu *dst_vcpu, int action) | ||
116 | { | ||
117 | struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int; | ||
118 | int rc = SIGP_CC_ORDER_CODE_ACCEPTED; | ||
119 | |||
120 | spin_lock(&li->lock); | ||
121 | if (li->action_bits & ACTION_STOP_ON_STOP) { | ||
122 | /* another SIGP STOP is pending */ | ||
123 | rc = SIGP_CC_BUSY; | ||
124 | goto out; | ||
125 | } | 117 | } |
126 | if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { | ||
127 | if ((action & ACTION_STORE_ON_STOP) != 0) | ||
128 | rc = -ESHUTDOWN; | ||
129 | goto out; | ||
130 | } | ||
131 | set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs); | ||
132 | li->action_bits |= action; | ||
133 | atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); | ||
134 | kvm_s390_vcpu_wakeup(dst_vcpu); | ||
135 | out: | ||
136 | spin_unlock(&li->lock); | ||
137 | 118 | ||
138 | return rc; | 119 | return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED; |
139 | } | 120 | } |
140 | 121 | ||
141 | static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu) | 122 | static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu) |
142 | { | 123 | { |
124 | struct kvm_s390_irq irq = { | ||
125 | .type = KVM_S390_SIGP_STOP, | ||
126 | }; | ||
143 | int rc; | 127 | int rc; |
144 | 128 | ||
145 | rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP); | 129 | rc = kvm_s390_inject_vcpu(dst_vcpu, &irq); |
146 | VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", dst_vcpu->vcpu_id); | 130 | if (rc == -EBUSY) |
131 | rc = SIGP_CC_BUSY; | ||
132 | else if (rc == 0) | ||
133 | VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", | ||
134 | dst_vcpu->vcpu_id); | ||
147 | 135 | ||
148 | return rc; | 136 | return rc; |
149 | } | 137 | } |
@@ -151,20 +139,18 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu) | |||
151 | static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu, | 139 | static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu, |
152 | struct kvm_vcpu *dst_vcpu, u64 *reg) | 140 | struct kvm_vcpu *dst_vcpu, u64 *reg) |
153 | { | 141 | { |
142 | struct kvm_s390_irq irq = { | ||
143 | .type = KVM_S390_SIGP_STOP, | ||
144 | .u.stop.flags = KVM_S390_STOP_FLAG_STORE_STATUS, | ||
145 | }; | ||
154 | int rc; | 146 | int rc; |
155 | 147 | ||
156 | rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP | | 148 | rc = kvm_s390_inject_vcpu(dst_vcpu, &irq); |
157 | ACTION_STORE_ON_STOP); | 149 | if (rc == -EBUSY) |
158 | VCPU_EVENT(vcpu, 4, "sent sigp stop and store status to cpu %x", | 150 | rc = SIGP_CC_BUSY; |
159 | dst_vcpu->vcpu_id); | 151 | else if (rc == 0) |
160 | 152 | VCPU_EVENT(vcpu, 4, "sent sigp stop and store status to cpu %x", | |
161 | if (rc == -ESHUTDOWN) { | 153 | dst_vcpu->vcpu_id); |
162 | /* If the CPU has already been stopped, we still have | ||
163 | * to save the status when doing stop-and-store. This | ||
164 | * has to be done after unlocking all spinlocks. */ | ||
165 | rc = kvm_s390_store_status_unloaded(dst_vcpu, | ||
166 | KVM_S390_STORE_STATUS_NOADDR); | ||
167 | } | ||
168 | 154 | ||
169 | return rc; | 155 | return rc; |
170 | } | 156 | } |
@@ -197,41 +183,33 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) | |||
197 | static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, | 183 | static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, |
198 | u32 address, u64 *reg) | 184 | u32 address, u64 *reg) |
199 | { | 185 | { |
200 | struct kvm_s390_local_interrupt *li; | 186 | struct kvm_s390_irq irq = { |
187 | .type = KVM_S390_SIGP_SET_PREFIX, | ||
188 | .u.prefix.address = address & 0x7fffe000u, | ||
189 | }; | ||
201 | int rc; | 190 | int rc; |
202 | 191 | ||
203 | li = &dst_vcpu->arch.local_int; | ||
204 | |||
205 | /* | 192 | /* |
206 | * Make sure the new value is valid memory. We only need to check the | 193 | * Make sure the new value is valid memory. We only need to check the |
207 | * first page, since address is 8k aligned and memory pieces are always | 194 | * first page, since address is 8k aligned and memory pieces are always |
208 | * at least 1MB aligned and have at least a size of 1MB. | 195 | * at least 1MB aligned and have at least a size of 1MB. |
209 | */ | 196 | */ |
210 | address &= 0x7fffe000u; | 197 | if (kvm_is_error_gpa(vcpu->kvm, irq.u.prefix.address)) { |
211 | if (kvm_is_error_gpa(vcpu->kvm, address)) { | ||
212 | *reg &= 0xffffffff00000000UL; | 198 | *reg &= 0xffffffff00000000UL; |
213 | *reg |= SIGP_STATUS_INVALID_PARAMETER; | 199 | *reg |= SIGP_STATUS_INVALID_PARAMETER; |
214 | return SIGP_CC_STATUS_STORED; | 200 | return SIGP_CC_STATUS_STORED; |
215 | } | 201 | } |
216 | 202 | ||
217 | spin_lock(&li->lock); | 203 | rc = kvm_s390_inject_vcpu(dst_vcpu, &irq); |
218 | /* cpu must be in stopped state */ | 204 | if (rc == -EBUSY) { |
219 | if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { | ||
220 | *reg &= 0xffffffff00000000UL; | 205 | *reg &= 0xffffffff00000000UL; |
221 | *reg |= SIGP_STATUS_INCORRECT_STATE; | 206 | *reg |= SIGP_STATUS_INCORRECT_STATE; |
222 | rc = SIGP_CC_STATUS_STORED; | 207 | return SIGP_CC_STATUS_STORED; |
223 | goto out_li; | 208 | } else if (rc == 0) { |
209 | VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", | ||
210 | dst_vcpu->vcpu_id, irq.u.prefix.address); | ||
224 | } | 211 | } |
225 | 212 | ||
226 | li->irq.prefix.address = address; | ||
227 | set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs); | ||
228 | kvm_s390_vcpu_wakeup(dst_vcpu); | ||
229 | rc = SIGP_CC_ORDER_CODE_ACCEPTED; | ||
230 | |||
231 | VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", dst_vcpu->vcpu_id, | ||
232 | address); | ||
233 | out_li: | ||
234 | spin_unlock(&li->lock); | ||
235 | return rc; | 213 | return rc; |
236 | } | 214 | } |
237 | 215 | ||
@@ -242,9 +220,7 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, | |||
242 | int flags; | 220 | int flags; |
243 | int rc; | 221 | int rc; |
244 | 222 | ||
245 | spin_lock(&dst_vcpu->arch.local_int.lock); | ||
246 | flags = atomic_read(dst_vcpu->arch.local_int.cpuflags); | 223 | flags = atomic_read(dst_vcpu->arch.local_int.cpuflags); |
247 | spin_unlock(&dst_vcpu->arch.local_int.lock); | ||
248 | if (!(flags & CPUSTAT_STOPPED)) { | 224 | if (!(flags & CPUSTAT_STOPPED)) { |
249 | *reg &= 0xffffffff00000000UL; | 225 | *reg &= 0xffffffff00000000UL; |
250 | *reg |= SIGP_STATUS_INCORRECT_STATE; | 226 | *reg |= SIGP_STATUS_INCORRECT_STATE; |
@@ -291,8 +267,9 @@ static int __prepare_sigp_re_start(struct kvm_vcpu *vcpu, | |||
291 | /* handle (RE)START in user space */ | 267 | /* handle (RE)START in user space */ |
292 | int rc = -EOPNOTSUPP; | 268 | int rc = -EOPNOTSUPP; |
293 | 269 | ||
270 | /* make sure we don't race with STOP irq injection */ | ||
294 | spin_lock(&li->lock); | 271 | spin_lock(&li->lock); |
295 | if (li->action_bits & ACTION_STOP_ON_STOP) | 272 | if (kvm_s390_is_stop_irq_pending(dst_vcpu)) |
296 | rc = SIGP_CC_BUSY; | 273 | rc = SIGP_CC_BUSY; |
297 | spin_unlock(&li->lock); | 274 | spin_unlock(&li->lock); |
298 | 275 | ||
@@ -333,7 +310,7 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code, | |||
333 | break; | 310 | break; |
334 | case SIGP_EXTERNAL_CALL: | 311 | case SIGP_EXTERNAL_CALL: |
335 | vcpu->stat.instruction_sigp_external_call++; | 312 | vcpu->stat.instruction_sigp_external_call++; |
336 | rc = __sigp_external_call(vcpu, dst_vcpu); | 313 | rc = __sigp_external_call(vcpu, dst_vcpu, status_reg); |
337 | break; | 314 | break; |
338 | case SIGP_EMERGENCY_SIGNAL: | 315 | case SIGP_EMERGENCY_SIGNAL: |
339 | vcpu->stat.instruction_sigp_emergency++; | 316 | vcpu->stat.instruction_sigp_emergency++; |
@@ -394,6 +371,53 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code, | |||
394 | return rc; | 371 | return rc; |
395 | } | 372 | } |
396 | 373 | ||
374 | static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code) | ||
375 | { | ||
376 | if (!vcpu->kvm->arch.user_sigp) | ||
377 | return 0; | ||
378 | |||
379 | switch (order_code) { | ||
380 | case SIGP_SENSE: | ||
381 | case SIGP_EXTERNAL_CALL: | ||
382 | case SIGP_EMERGENCY_SIGNAL: | ||
383 | case SIGP_COND_EMERGENCY_SIGNAL: | ||
384 | case SIGP_SENSE_RUNNING: | ||
385 | return 0; | ||
386 | /* update counters as we're directly dropping to user space */ | ||
387 | case SIGP_STOP: | ||
388 | vcpu->stat.instruction_sigp_stop++; | ||
389 | break; | ||
390 | case SIGP_STOP_AND_STORE_STATUS: | ||
391 | vcpu->stat.instruction_sigp_stop_store_status++; | ||
392 | break; | ||
393 | case SIGP_STORE_STATUS_AT_ADDRESS: | ||
394 | vcpu->stat.instruction_sigp_store_status++; | ||
395 | break; | ||
396 | case SIGP_SET_PREFIX: | ||
397 | vcpu->stat.instruction_sigp_prefix++; | ||
398 | break; | ||
399 | case SIGP_START: | ||
400 | vcpu->stat.instruction_sigp_start++; | ||
401 | break; | ||
402 | case SIGP_RESTART: | ||
403 | vcpu->stat.instruction_sigp_restart++; | ||
404 | break; | ||
405 | case SIGP_INITIAL_CPU_RESET: | ||
406 | vcpu->stat.instruction_sigp_init_cpu_reset++; | ||
407 | break; | ||
408 | case SIGP_CPU_RESET: | ||
409 | vcpu->stat.instruction_sigp_cpu_reset++; | ||
410 | break; | ||
411 | default: | ||
412 | vcpu->stat.instruction_sigp_unknown++; | ||
413 | } | ||
414 | |||
415 | VCPU_EVENT(vcpu, 4, "sigp order %u: completely handled in user space", | ||
416 | order_code); | ||
417 | |||
418 | return 1; | ||
419 | } | ||
420 | |||
397 | int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) | 421 | int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) |
398 | { | 422 | { |
399 | int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; | 423 | int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; |
@@ -408,6 +432,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) | |||
408 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | 432 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); |
409 | 433 | ||
410 | order_code = kvm_s390_get_base_disp_rs(vcpu); | 434 | order_code = kvm_s390_get_base_disp_rs(vcpu); |
435 | if (handle_sigp_order_in_user_space(vcpu, order_code)) | ||
436 | return -EOPNOTSUPP; | ||
411 | 437 | ||
412 | if (r1 % 2) | 438 | if (r1 % 2) |
413 | parameter = vcpu->run->s.regs.gprs[r1]; | 439 | parameter = vcpu->run->s.regs.gprs[r1]; |
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index 647e9d6a4818..653a7ec09ef5 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h | |||
@@ -209,19 +209,21 @@ TRACE_EVENT(kvm_s390_request_resets, | |||
209 | * Trace point for a vcpu's stop requests. | 209 | * Trace point for a vcpu's stop requests. |
210 | */ | 210 | */ |
211 | TRACE_EVENT(kvm_s390_stop_request, | 211 | TRACE_EVENT(kvm_s390_stop_request, |
212 | TP_PROTO(unsigned int action_bits), | 212 | TP_PROTO(unsigned char stop_irq, unsigned char flags), |
213 | TP_ARGS(action_bits), | 213 | TP_ARGS(stop_irq, flags), |
214 | 214 | ||
215 | TP_STRUCT__entry( | 215 | TP_STRUCT__entry( |
216 | __field(unsigned int, action_bits) | 216 | __field(unsigned char, stop_irq) |
217 | __field(unsigned char, flags) | ||
217 | ), | 218 | ), |
218 | 219 | ||
219 | TP_fast_assign( | 220 | TP_fast_assign( |
220 | __entry->action_bits = action_bits; | 221 | __entry->stop_irq = stop_irq; |
222 | __entry->flags = flags; | ||
221 | ), | 223 | ), |
222 | 224 | ||
223 | TP_printk("stop request, action_bits = %08x", | 225 | TP_printk("stop request, stop irq = %u, flags = %08x", |
224 | __entry->action_bits) | 226 | __entry->stop_irq, __entry->flags) |
225 | ); | 227 | ); |
226 | 228 | ||
227 | 229 | ||
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index eb181178fe0b..57a9d94fe160 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -208,6 +208,7 @@ struct x86_emulate_ops { | |||
208 | 208 | ||
209 | void (*get_cpuid)(struct x86_emulate_ctxt *ctxt, | 209 | void (*get_cpuid)(struct x86_emulate_ctxt *ctxt, |
210 | u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); | 210 | u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); |
211 | void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); | ||
211 | }; | 212 | }; |
212 | 213 | ||
213 | typedef u32 __attribute__((vector_size(16))) sse128_t; | 214 | typedef u32 __attribute__((vector_size(16))) sse128_t; |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d89c6b828c96..a236e39cc385 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -38,8 +38,6 @@ | |||
38 | #define KVM_PRIVATE_MEM_SLOTS 3 | 38 | #define KVM_PRIVATE_MEM_SLOTS 3 |
39 | #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) | 39 | #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) |
40 | 40 | ||
41 | #define KVM_MMIO_SIZE 16 | ||
42 | |||
43 | #define KVM_PIO_PAGE_OFFSET 1 | 41 | #define KVM_PIO_PAGE_OFFSET 1 |
44 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 | 42 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 |
45 | 43 | ||
@@ -51,7 +49,7 @@ | |||
51 | | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) | 49 | | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) |
52 | 50 | ||
53 | #define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL | 51 | #define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL |
54 | #define CR3_PCID_INVD (1UL << 63) | 52 | #define CR3_PCID_INVD BIT_64(63) |
55 | #define CR4_RESERVED_BITS \ | 53 | #define CR4_RESERVED_BITS \ |
56 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | 54 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ |
57 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | 55 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ |
@@ -160,6 +158,18 @@ enum { | |||
160 | #define DR7_FIXED_1 0x00000400 | 158 | #define DR7_FIXED_1 0x00000400 |
161 | #define DR7_VOLATILE 0xffff2bff | 159 | #define DR7_VOLATILE 0xffff2bff |
162 | 160 | ||
161 | #define PFERR_PRESENT_BIT 0 | ||
162 | #define PFERR_WRITE_BIT 1 | ||
163 | #define PFERR_USER_BIT 2 | ||
164 | #define PFERR_RSVD_BIT 3 | ||
165 | #define PFERR_FETCH_BIT 4 | ||
166 | |||
167 | #define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT) | ||
168 | #define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT) | ||
169 | #define PFERR_USER_MASK (1U << PFERR_USER_BIT) | ||
170 | #define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT) | ||
171 | #define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT) | ||
172 | |||
163 | /* apic attention bits */ | 173 | /* apic attention bits */ |
164 | #define KVM_APIC_CHECK_VAPIC 0 | 174 | #define KVM_APIC_CHECK_VAPIC 0 |
165 | /* | 175 | /* |
@@ -615,6 +625,8 @@ struct kvm_arch { | |||
615 | #ifdef CONFIG_KVM_MMU_AUDIT | 625 | #ifdef CONFIG_KVM_MMU_AUDIT |
616 | int audit_point; | 626 | int audit_point; |
617 | #endif | 627 | #endif |
628 | |||
629 | bool boot_vcpu_runs_old_kvmclock; | ||
618 | }; | 630 | }; |
619 | 631 | ||
620 | struct kvm_vm_stat { | 632 | struct kvm_vm_stat { |
@@ -643,6 +655,7 @@ struct kvm_vcpu_stat { | |||
643 | u32 irq_window_exits; | 655 | u32 irq_window_exits; |
644 | u32 nmi_window_exits; | 656 | u32 nmi_window_exits; |
645 | u32 halt_exits; | 657 | u32 halt_exits; |
658 | u32 halt_successful_poll; | ||
646 | u32 halt_wakeup; | 659 | u32 halt_wakeup; |
647 | u32 request_irq_exits; | 660 | u32 request_irq_exits; |
648 | u32 irq_exits; | 661 | u32 irq_exits; |
@@ -787,6 +800,31 @@ struct kvm_x86_ops { | |||
787 | int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); | 800 | int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); |
788 | 801 | ||
789 | void (*sched_in)(struct kvm_vcpu *kvm, int cpu); | 802 | void (*sched_in)(struct kvm_vcpu *kvm, int cpu); |
803 | |||
804 | /* | ||
805 | * Arch-specific dirty logging hooks. These hooks are only supposed to | ||
806 | * be valid if the specific arch has hardware-accelerated dirty logging | ||
807 | * mechanism. Currently only for PML on VMX. | ||
808 | * | ||
809 | * - slot_enable_log_dirty: | ||
810 | * called when enabling log dirty mode for the slot. | ||
811 | * - slot_disable_log_dirty: | ||
812 | * called when disabling log dirty mode for the slot. | ||
813 | * also called when slot is created with log dirty disabled. | ||
814 | * - flush_log_dirty: | ||
815 | * called before reporting dirty_bitmap to userspace. | ||
816 | * - enable_log_dirty_pt_masked: | ||
817 | * called when reenabling log dirty for the GFNs in the mask after | ||
818 | * corresponding bits are cleared in slot->dirty_bitmap. | ||
819 | */ | ||
820 | void (*slot_enable_log_dirty)(struct kvm *kvm, | ||
821 | struct kvm_memory_slot *slot); | ||
822 | void (*slot_disable_log_dirty)(struct kvm *kvm, | ||
823 | struct kvm_memory_slot *slot); | ||
824 | void (*flush_log_dirty)(struct kvm *kvm); | ||
825 | void (*enable_log_dirty_pt_masked)(struct kvm *kvm, | ||
826 | struct kvm_memory_slot *slot, | ||
827 | gfn_t offset, unsigned long mask); | ||
790 | }; | 828 | }; |
791 | 829 | ||
792 | struct kvm_arch_async_pf { | 830 | struct kvm_arch_async_pf { |
@@ -819,10 +857,17 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | |||
819 | u64 dirty_mask, u64 nx_mask, u64 x_mask); | 857 | u64 dirty_mask, u64 nx_mask, u64 x_mask); |
820 | 858 | ||
821 | void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); | 859 | void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); |
822 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); | 860 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, |
823 | void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, | 861 | struct kvm_memory_slot *memslot); |
824 | struct kvm_memory_slot *slot, | 862 | void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, |
825 | gfn_t gfn_offset, unsigned long mask); | 863 | struct kvm_memory_slot *memslot); |
864 | void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm, | ||
865 | struct kvm_memory_slot *memslot); | ||
866 | void kvm_mmu_slot_set_dirty(struct kvm *kvm, | ||
867 | struct kvm_memory_slot *memslot); | ||
868 | void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, | ||
869 | struct kvm_memory_slot *slot, | ||
870 | gfn_t gfn_offset, unsigned long mask); | ||
826 | void kvm_mmu_zap_all(struct kvm *kvm); | 871 | void kvm_mmu_zap_all(struct kvm *kvm); |
827 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm); | 872 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm); |
828 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); | 873 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 45afaee9555c..da772edd19ab 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -69,6 +69,7 @@ | |||
69 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 | 69 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 |
70 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 | 70 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 |
71 | #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 | 71 | #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 |
72 | #define SECONDARY_EXEC_ENABLE_PML 0x00020000 | ||
72 | #define SECONDARY_EXEC_XSAVES 0x00100000 | 73 | #define SECONDARY_EXEC_XSAVES 0x00100000 |
73 | 74 | ||
74 | 75 | ||
@@ -121,6 +122,7 @@ enum vmcs_field { | |||
121 | GUEST_LDTR_SELECTOR = 0x0000080c, | 122 | GUEST_LDTR_SELECTOR = 0x0000080c, |
122 | GUEST_TR_SELECTOR = 0x0000080e, | 123 | GUEST_TR_SELECTOR = 0x0000080e, |
123 | GUEST_INTR_STATUS = 0x00000810, | 124 | GUEST_INTR_STATUS = 0x00000810, |
125 | GUEST_PML_INDEX = 0x00000812, | ||
124 | HOST_ES_SELECTOR = 0x00000c00, | 126 | HOST_ES_SELECTOR = 0x00000c00, |
125 | HOST_CS_SELECTOR = 0x00000c02, | 127 | HOST_CS_SELECTOR = 0x00000c02, |
126 | HOST_SS_SELECTOR = 0x00000c04, | 128 | HOST_SS_SELECTOR = 0x00000c04, |
@@ -140,6 +142,8 @@ enum vmcs_field { | |||
140 | VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009, | 142 | VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009, |
141 | VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a, | 143 | VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a, |
142 | VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b, | 144 | VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b, |
145 | PML_ADDRESS = 0x0000200e, | ||
146 | PML_ADDRESS_HIGH = 0x0000200f, | ||
143 | TSC_OFFSET = 0x00002010, | 147 | TSC_OFFSET = 0x00002010, |
144 | TSC_OFFSET_HIGH = 0x00002011, | 148 | TSC_OFFSET_HIGH = 0x00002011, |
145 | VIRTUAL_APIC_PAGE_ADDR = 0x00002012, | 149 | VIRTUAL_APIC_PAGE_ADDR = 0x00002012, |
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 536240fa9a95..3ce079136c11 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h | |||
@@ -364,6 +364,9 @@ | |||
364 | #define MSR_IA32_UCODE_WRITE 0x00000079 | 364 | #define MSR_IA32_UCODE_WRITE 0x00000079 |
365 | #define MSR_IA32_UCODE_REV 0x0000008b | 365 | #define MSR_IA32_UCODE_REV 0x0000008b |
366 | 366 | ||
367 | #define MSR_IA32_SMM_MONITOR_CTL 0x0000009b | ||
368 | #define MSR_IA32_SMBASE 0x0000009e | ||
369 | |||
367 | #define MSR_IA32_PERF_STATUS 0x00000198 | 370 | #define MSR_IA32_PERF_STATUS 0x00000198 |
368 | #define MSR_IA32_PERF_CTL 0x00000199 | 371 | #define MSR_IA32_PERF_CTL 0x00000199 |
369 | #define INTEL_PERF_CTL_MASK 0xffff | 372 | #define INTEL_PERF_CTL_MASK 0xffff |
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index b813bf9da1e2..c5f1a1deb91a 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h | |||
@@ -56,6 +56,7 @@ | |||
56 | #define EXIT_REASON_MSR_READ 31 | 56 | #define EXIT_REASON_MSR_READ 31 |
57 | #define EXIT_REASON_MSR_WRITE 32 | 57 | #define EXIT_REASON_MSR_WRITE 32 |
58 | #define EXIT_REASON_INVALID_STATE 33 | 58 | #define EXIT_REASON_INVALID_STATE 33 |
59 | #define EXIT_REASON_MSR_LOAD_FAIL 34 | ||
59 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 | 60 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 |
60 | #define EXIT_REASON_MONITOR_INSTRUCTION 39 | 61 | #define EXIT_REASON_MONITOR_INSTRUCTION 39 |
61 | #define EXIT_REASON_PAUSE_INSTRUCTION 40 | 62 | #define EXIT_REASON_PAUSE_INSTRUCTION 40 |
@@ -72,6 +73,7 @@ | |||
72 | #define EXIT_REASON_XSETBV 55 | 73 | #define EXIT_REASON_XSETBV 55 |
73 | #define EXIT_REASON_APIC_WRITE 56 | 74 | #define EXIT_REASON_APIC_WRITE 56 |
74 | #define EXIT_REASON_INVPCID 58 | 75 | #define EXIT_REASON_INVPCID 58 |
76 | #define EXIT_REASON_PML_FULL 62 | ||
75 | #define EXIT_REASON_XSAVES 63 | 77 | #define EXIT_REASON_XSAVES 63 |
76 | #define EXIT_REASON_XRSTORS 64 | 78 | #define EXIT_REASON_XRSTORS 64 |
77 | 79 | ||
@@ -116,10 +118,14 @@ | |||
116 | { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \ | 118 | { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \ |
117 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ | 119 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ |
118 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ | 120 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ |
121 | { EXIT_REASON_MSR_LOAD_FAIL, "MSR_LOAD_FAIL" }, \ | ||
119 | { EXIT_REASON_INVD, "INVD" }, \ | 122 | { EXIT_REASON_INVD, "INVD" }, \ |
120 | { EXIT_REASON_INVVPID, "INVVPID" }, \ | 123 | { EXIT_REASON_INVVPID, "INVVPID" }, \ |
121 | { EXIT_REASON_INVPCID, "INVPCID" }, \ | 124 | { EXIT_REASON_INVPCID, "INVPCID" }, \ |
122 | { EXIT_REASON_XSAVES, "XSAVES" }, \ | 125 | { EXIT_REASON_XSAVES, "XSAVES" }, \ |
123 | { EXIT_REASON_XRSTORS, "XRSTORS" } | 126 | { EXIT_REASON_XRSTORS, "XRSTORS" } |
124 | 127 | ||
128 | #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 | ||
129 | #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 | ||
130 | |||
125 | #endif /* _UAPIVMX_H */ | 131 | #endif /* _UAPIVMX_H */ |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 7dc7ba577ecd..413a7bf9efbb 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -39,6 +39,7 @@ config KVM | |||
39 | select PERF_EVENTS | 39 | select PERF_EVENTS |
40 | select HAVE_KVM_MSI | 40 | select HAVE_KVM_MSI |
41 | select HAVE_KVM_CPU_RELAX_INTERCEPT | 41 | select HAVE_KVM_CPU_RELAX_INTERCEPT |
42 | select KVM_GENERIC_DIRTYLOG_READ_PROTECT | ||
42 | select KVM_VFIO | 43 | select KVM_VFIO |
43 | select SRCU | 44 | select SRCU |
44 | ---help--- | 45 | ---help--- |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index de12c1d379f1..e0b794a84c35 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -86,6 +86,7 @@ | |||
86 | #define DstAcc (OpAcc << DstShift) | 86 | #define DstAcc (OpAcc << DstShift) |
87 | #define DstDI (OpDI << DstShift) | 87 | #define DstDI (OpDI << DstShift) |
88 | #define DstMem64 (OpMem64 << DstShift) | 88 | #define DstMem64 (OpMem64 << DstShift) |
89 | #define DstMem16 (OpMem16 << DstShift) | ||
89 | #define DstImmUByte (OpImmUByte << DstShift) | 90 | #define DstImmUByte (OpImmUByte << DstShift) |
90 | #define DstDX (OpDX << DstShift) | 91 | #define DstDX (OpDX << DstShift) |
91 | #define DstAccLo (OpAccLo << DstShift) | 92 | #define DstAccLo (OpAccLo << DstShift) |
@@ -124,6 +125,7 @@ | |||
124 | #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ | 125 | #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ |
125 | #define Escape (5<<15) /* Escape to coprocessor instruction */ | 126 | #define Escape (5<<15) /* Escape to coprocessor instruction */ |
126 | #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */ | 127 | #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */ |
128 | #define ModeDual (7<<15) /* Different instruction for 32/64 bit */ | ||
127 | #define Sse (1<<18) /* SSE Vector instruction */ | 129 | #define Sse (1<<18) /* SSE Vector instruction */ |
128 | /* Generic ModRM decode. */ | 130 | /* Generic ModRM decode. */ |
129 | #define ModRM (1<<19) | 131 | #define ModRM (1<<19) |
@@ -165,10 +167,10 @@ | |||
165 | #define NoMod ((u64)1 << 47) /* Mod field is ignored */ | 167 | #define NoMod ((u64)1 << 47) /* Mod field is ignored */ |
166 | #define Intercept ((u64)1 << 48) /* Has valid intercept field */ | 168 | #define Intercept ((u64)1 << 48) /* Has valid intercept field */ |
167 | #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */ | 169 | #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */ |
168 | #define NoBigReal ((u64)1 << 50) /* No big real mode */ | ||
169 | #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */ | 170 | #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */ |
170 | #define NearBranch ((u64)1 << 52) /* Near branches */ | 171 | #define NearBranch ((u64)1 << 52) /* Near branches */ |
171 | #define No16 ((u64)1 << 53) /* No 16 bit operand */ | 172 | #define No16 ((u64)1 << 53) /* No 16 bit operand */ |
173 | #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */ | ||
172 | 174 | ||
173 | #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) | 175 | #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) |
174 | 176 | ||
@@ -213,6 +215,7 @@ struct opcode { | |||
213 | const struct gprefix *gprefix; | 215 | const struct gprefix *gprefix; |
214 | const struct escape *esc; | 216 | const struct escape *esc; |
215 | const struct instr_dual *idual; | 217 | const struct instr_dual *idual; |
218 | const struct mode_dual *mdual; | ||
216 | void (*fastop)(struct fastop *fake); | 219 | void (*fastop)(struct fastop *fake); |
217 | } u; | 220 | } u; |
218 | int (*check_perm)(struct x86_emulate_ctxt *ctxt); | 221 | int (*check_perm)(struct x86_emulate_ctxt *ctxt); |
@@ -240,6 +243,11 @@ struct instr_dual { | |||
240 | struct opcode mod3; | 243 | struct opcode mod3; |
241 | }; | 244 | }; |
242 | 245 | ||
246 | struct mode_dual { | ||
247 | struct opcode mode32; | ||
248 | struct opcode mode64; | ||
249 | }; | ||
250 | |||
243 | /* EFLAGS bit definitions. */ | 251 | /* EFLAGS bit definitions. */ |
244 | #define EFLG_ID (1<<21) | 252 | #define EFLG_ID (1<<21) |
245 | #define EFLG_VIP (1<<20) | 253 | #define EFLG_VIP (1<<20) |
@@ -262,6 +270,13 @@ struct instr_dual { | |||
262 | #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a | 270 | #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a |
263 | #define EFLG_RESERVED_ONE_MASK 2 | 271 | #define EFLG_RESERVED_ONE_MASK 2 |
264 | 272 | ||
273 | enum x86_transfer_type { | ||
274 | X86_TRANSFER_NONE, | ||
275 | X86_TRANSFER_CALL_JMP, | ||
276 | X86_TRANSFER_RET, | ||
277 | X86_TRANSFER_TASK_SWITCH, | ||
278 | }; | ||
279 | |||
265 | static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr) | 280 | static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr) |
266 | { | 281 | { |
267 | if (!(ctxt->regs_valid & (1 << nr))) { | 282 | if (!(ctxt->regs_valid & (1 << nr))) { |
@@ -669,9 +684,13 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, | |||
669 | } | 684 | } |
670 | if (addr.ea > lim) | 685 | if (addr.ea > lim) |
671 | goto bad; | 686 | goto bad; |
672 | *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea); | 687 | if (lim == 0xffffffff) |
673 | if (size > *max_size) | 688 | *max_size = ~0u; |
674 | goto bad; | 689 | else { |
690 | *max_size = (u64)lim + 1 - addr.ea; | ||
691 | if (size > *max_size) | ||
692 | goto bad; | ||
693 | } | ||
675 | la &= (u32)-1; | 694 | la &= (u32)-1; |
676 | break; | 695 | break; |
677 | } | 696 | } |
@@ -722,19 +741,26 @@ static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, | |||
722 | const struct desc_struct *cs_desc) | 741 | const struct desc_struct *cs_desc) |
723 | { | 742 | { |
724 | enum x86emul_mode mode = ctxt->mode; | 743 | enum x86emul_mode mode = ctxt->mode; |
744 | int rc; | ||
725 | 745 | ||
726 | #ifdef CONFIG_X86_64 | 746 | #ifdef CONFIG_X86_64 |
727 | if (ctxt->mode >= X86EMUL_MODE_PROT32 && cs_desc->l) { | 747 | if (ctxt->mode >= X86EMUL_MODE_PROT16) { |
728 | u64 efer = 0; | 748 | if (cs_desc->l) { |
749 | u64 efer = 0; | ||
729 | 750 | ||
730 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); | 751 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); |
731 | if (efer & EFER_LMA) | 752 | if (efer & EFER_LMA) |
732 | mode = X86EMUL_MODE_PROT64; | 753 | mode = X86EMUL_MODE_PROT64; |
754 | } else | ||
755 | mode = X86EMUL_MODE_PROT32; /* temporary value */ | ||
733 | } | 756 | } |
734 | #endif | 757 | #endif |
735 | if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32) | 758 | if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32) |
736 | mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 759 | mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; |
737 | return assign_eip(ctxt, dst, mode); | 760 | rc = assign_eip(ctxt, dst, mode); |
761 | if (rc == X86EMUL_CONTINUE) | ||
762 | ctxt->mode = mode; | ||
763 | return rc; | ||
738 | } | 764 | } |
739 | 765 | ||
740 | static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) | 766 | static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) |
@@ -1057,8 +1083,6 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt) | |||
1057 | asm volatile("fnstcw %0": "+m"(fcw)); | 1083 | asm volatile("fnstcw %0": "+m"(fcw)); |
1058 | ctxt->ops->put_fpu(ctxt); | 1084 | ctxt->ops->put_fpu(ctxt); |
1059 | 1085 | ||
1060 | /* force 2 byte destination */ | ||
1061 | ctxt->dst.bytes = 2; | ||
1062 | ctxt->dst.val = fcw; | 1086 | ctxt->dst.val = fcw; |
1063 | 1087 | ||
1064 | return X86EMUL_CONTINUE; | 1088 | return X86EMUL_CONTINUE; |
@@ -1075,8 +1099,6 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt) | |||
1075 | asm volatile("fnstsw %0": "+m"(fsw)); | 1099 | asm volatile("fnstsw %0": "+m"(fsw)); |
1076 | ctxt->ops->put_fpu(ctxt); | 1100 | ctxt->ops->put_fpu(ctxt); |
1077 | 1101 | ||
1078 | /* force 2 byte destination */ | ||
1079 | ctxt->dst.bytes = 2; | ||
1080 | ctxt->dst.val = fsw; | 1102 | ctxt->dst.val = fsw; |
1081 | 1103 | ||
1082 | return X86EMUL_CONTINUE; | 1104 | return X86EMUL_CONTINUE; |
@@ -1223,6 +1245,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
1223 | else { | 1245 | else { |
1224 | modrm_ea += reg_read(ctxt, base_reg); | 1246 | modrm_ea += reg_read(ctxt, base_reg); |
1225 | adjust_modrm_seg(ctxt, base_reg); | 1247 | adjust_modrm_seg(ctxt, base_reg); |
1248 | /* Increment ESP on POP [ESP] */ | ||
1249 | if ((ctxt->d & IncSP) && | ||
1250 | base_reg == VCPU_REGS_RSP) | ||
1251 | modrm_ea += ctxt->op_bytes; | ||
1226 | } | 1252 | } |
1227 | if (index_reg != 4) | 1253 | if (index_reg != 4) |
1228 | modrm_ea += reg_read(ctxt, index_reg) << scale; | 1254 | modrm_ea += reg_read(ctxt, index_reg) << scale; |
@@ -1435,10 +1461,8 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, | |||
1435 | ops->get_gdt(ctxt, dt); | 1461 | ops->get_gdt(ctxt, dt); |
1436 | } | 1462 | } |
1437 | 1463 | ||
1438 | /* allowed just for 8 bytes segments */ | 1464 | static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt, |
1439 | static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | 1465 | u16 selector, ulong *desc_addr_p) |
1440 | u16 selector, struct desc_struct *desc, | ||
1441 | ulong *desc_addr_p) | ||
1442 | { | 1466 | { |
1443 | struct desc_ptr dt; | 1467 | struct desc_ptr dt; |
1444 | u16 index = selector >> 3; | 1468 | u16 index = selector >> 3; |
@@ -1449,8 +1473,34 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1449 | if (dt.size < index * 8 + 7) | 1473 | if (dt.size < index * 8 + 7) |
1450 | return emulate_gp(ctxt, selector & 0xfffc); | 1474 | return emulate_gp(ctxt, selector & 0xfffc); |
1451 | 1475 | ||
1452 | *desc_addr_p = addr = dt.address + index * 8; | 1476 | addr = dt.address + index * 8; |
1453 | return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc, | 1477 | |
1478 | #ifdef CONFIG_X86_64 | ||
1479 | if (addr >> 32 != 0) { | ||
1480 | u64 efer = 0; | ||
1481 | |||
1482 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); | ||
1483 | if (!(efer & EFER_LMA)) | ||
1484 | addr &= (u32)-1; | ||
1485 | } | ||
1486 | #endif | ||
1487 | |||
1488 | *desc_addr_p = addr; | ||
1489 | return X86EMUL_CONTINUE; | ||
1490 | } | ||
1491 | |||
1492 | /* allowed just for 8 bytes segments */ | ||
1493 | static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | ||
1494 | u16 selector, struct desc_struct *desc, | ||
1495 | ulong *desc_addr_p) | ||
1496 | { | ||
1497 | int rc; | ||
1498 | |||
1499 | rc = get_descriptor_ptr(ctxt, selector, desc_addr_p); | ||
1500 | if (rc != X86EMUL_CONTINUE) | ||
1501 | return rc; | ||
1502 | |||
1503 | return ctxt->ops->read_std(ctxt, *desc_addr_p, desc, sizeof(*desc), | ||
1454 | &ctxt->exception); | 1504 | &ctxt->exception); |
1455 | } | 1505 | } |
1456 | 1506 | ||
@@ -1458,16 +1508,13 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1458 | static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | 1508 | static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, |
1459 | u16 selector, struct desc_struct *desc) | 1509 | u16 selector, struct desc_struct *desc) |
1460 | { | 1510 | { |
1461 | struct desc_ptr dt; | 1511 | int rc; |
1462 | u16 index = selector >> 3; | ||
1463 | ulong addr; | 1512 | ulong addr; |
1464 | 1513 | ||
1465 | get_descriptor_table_ptr(ctxt, selector, &dt); | 1514 | rc = get_descriptor_ptr(ctxt, selector, &addr); |
1466 | 1515 | if (rc != X86EMUL_CONTINUE) | |
1467 | if (dt.size < index * 8 + 7) | 1516 | return rc; |
1468 | return emulate_gp(ctxt, selector & 0xfffc); | ||
1469 | 1517 | ||
1470 | addr = dt.address + index * 8; | ||
1471 | return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc, | 1518 | return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc, |
1472 | &ctxt->exception); | 1519 | &ctxt->exception); |
1473 | } | 1520 | } |
@@ -1475,7 +1522,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1475 | /* Does not support long mode */ | 1522 | /* Does not support long mode */ |
1476 | static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | 1523 | static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, |
1477 | u16 selector, int seg, u8 cpl, | 1524 | u16 selector, int seg, u8 cpl, |
1478 | bool in_task_switch, | 1525 | enum x86_transfer_type transfer, |
1479 | struct desc_struct *desc) | 1526 | struct desc_struct *desc) |
1480 | { | 1527 | { |
1481 | struct desc_struct seg_desc, old_desc; | 1528 | struct desc_struct seg_desc, old_desc; |
@@ -1529,11 +1576,15 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1529 | return ret; | 1576 | return ret; |
1530 | 1577 | ||
1531 | err_code = selector & 0xfffc; | 1578 | err_code = selector & 0xfffc; |
1532 | err_vec = in_task_switch ? TS_VECTOR : GP_VECTOR; | 1579 | err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR : |
1580 | GP_VECTOR; | ||
1533 | 1581 | ||
1534 | /* can't load system descriptor into segment selector */ | 1582 | /* can't load system descriptor into segment selector */ |
1535 | if (seg <= VCPU_SREG_GS && !seg_desc.s) | 1583 | if (seg <= VCPU_SREG_GS && !seg_desc.s) { |
1584 | if (transfer == X86_TRANSFER_CALL_JMP) | ||
1585 | return X86EMUL_UNHANDLEABLE; | ||
1536 | goto exception; | 1586 | goto exception; |
1587 | } | ||
1537 | 1588 | ||
1538 | if (!seg_desc.p) { | 1589 | if (!seg_desc.p) { |
1539 | err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; | 1590 | err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; |
@@ -1605,10 +1656,13 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1605 | 1656 | ||
1606 | if (seg_desc.s) { | 1657 | if (seg_desc.s) { |
1607 | /* mark segment as accessed */ | 1658 | /* mark segment as accessed */ |
1608 | seg_desc.type |= 1; | 1659 | if (!(seg_desc.type & 1)) { |
1609 | ret = write_segment_descriptor(ctxt, selector, &seg_desc); | 1660 | seg_desc.type |= 1; |
1610 | if (ret != X86EMUL_CONTINUE) | 1661 | ret = write_segment_descriptor(ctxt, selector, |
1611 | return ret; | 1662 | &seg_desc); |
1663 | if (ret != X86EMUL_CONTINUE) | ||
1664 | return ret; | ||
1665 | } | ||
1612 | } else if (ctxt->mode == X86EMUL_MODE_PROT64) { | 1666 | } else if (ctxt->mode == X86EMUL_MODE_PROT64) { |
1613 | ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3, | 1667 | ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3, |
1614 | sizeof(base3), &ctxt->exception); | 1668 | sizeof(base3), &ctxt->exception); |
@@ -1631,7 +1685,8 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1631 | u16 selector, int seg) | 1685 | u16 selector, int seg) |
1632 | { | 1686 | { |
1633 | u8 cpl = ctxt->ops->cpl(ctxt); | 1687 | u8 cpl = ctxt->ops->cpl(ctxt); |
1634 | return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL); | 1688 | return __load_segment_descriptor(ctxt, selector, seg, cpl, |
1689 | X86_TRANSFER_NONE, NULL); | ||
1635 | } | 1690 | } |
1636 | 1691 | ||
1637 | static void write_register_operand(struct operand *op) | 1692 | static void write_register_operand(struct operand *op) |
@@ -1828,12 +1883,14 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt) | |||
1828 | unsigned long selector; | 1883 | unsigned long selector; |
1829 | int rc; | 1884 | int rc; |
1830 | 1885 | ||
1831 | rc = emulate_pop(ctxt, &selector, ctxt->op_bytes); | 1886 | rc = emulate_pop(ctxt, &selector, 2); |
1832 | if (rc != X86EMUL_CONTINUE) | 1887 | if (rc != X86EMUL_CONTINUE) |
1833 | return rc; | 1888 | return rc; |
1834 | 1889 | ||
1835 | if (ctxt->modrm_reg == VCPU_SREG_SS) | 1890 | if (ctxt->modrm_reg == VCPU_SREG_SS) |
1836 | ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS; | 1891 | ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS; |
1892 | if (ctxt->op_bytes > 2) | ||
1893 | rsp_increment(ctxt, ctxt->op_bytes - 2); | ||
1837 | 1894 | ||
1838 | rc = load_segment_descriptor(ctxt, (u16)selector, seg); | 1895 | rc = load_segment_descriptor(ctxt, (u16)selector, seg); |
1839 | return rc; | 1896 | return rc; |
@@ -2007,6 +2064,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt) | |||
2007 | 2064 | ||
2008 | ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */ | 2065 | ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */ |
2009 | ctxt->eflags |= EFLG_RESERVED_ONE_MASK; | 2066 | ctxt->eflags |= EFLG_RESERVED_ONE_MASK; |
2067 | ctxt->ops->set_nmi_mask(ctxt, false); | ||
2010 | 2068 | ||
2011 | return rc; | 2069 | return rc; |
2012 | } | 2070 | } |
@@ -2041,7 +2099,8 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) | |||
2041 | 2099 | ||
2042 | memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); | 2100 | memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); |
2043 | 2101 | ||
2044 | rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false, | 2102 | rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, |
2103 | X86_TRANSFER_CALL_JMP, | ||
2045 | &new_desc); | 2104 | &new_desc); |
2046 | if (rc != X86EMUL_CONTINUE) | 2105 | if (rc != X86EMUL_CONTINUE) |
2047 | return rc; | 2106 | return rc; |
@@ -2130,7 +2189,8 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) | |||
2130 | /* Outer-privilege level return is not implemented */ | 2189 | /* Outer-privilege level return is not implemented */ |
2131 | if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) | 2190 | if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) |
2132 | return X86EMUL_UNHANDLEABLE; | 2191 | return X86EMUL_UNHANDLEABLE; |
2133 | rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl, false, | 2192 | rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl, |
2193 | X86_TRANSFER_RET, | ||
2134 | &new_desc); | 2194 | &new_desc); |
2135 | if (rc != X86EMUL_CONTINUE) | 2195 | if (rc != X86EMUL_CONTINUE) |
2136 | return rc; | 2196 | return rc; |
@@ -2163,12 +2223,15 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) | |||
2163 | fastop(ctxt, em_cmp); | 2223 | fastop(ctxt, em_cmp); |
2164 | 2224 | ||
2165 | if (ctxt->eflags & EFLG_ZF) { | 2225 | if (ctxt->eflags & EFLG_ZF) { |
2166 | /* Success: write back to memory. */ | 2226 | /* Success: write back to memory; no update of EAX */ |
2227 | ctxt->src.type = OP_NONE; | ||
2167 | ctxt->dst.val = ctxt->src.orig_val; | 2228 | ctxt->dst.val = ctxt->src.orig_val; |
2168 | } else { | 2229 | } else { |
2169 | /* Failure: write the value we saw to EAX. */ | 2230 | /* Failure: write the value we saw to EAX. */ |
2170 | ctxt->dst.type = OP_REG; | 2231 | ctxt->src.type = OP_REG; |
2171 | ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); | 2232 | ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); |
2233 | ctxt->src.val = ctxt->dst.orig_val; | ||
2234 | /* Create write-cycle to dest by writing the same value */ | ||
2172 | ctxt->dst.val = ctxt->dst.orig_val; | 2235 | ctxt->dst.val = ctxt->dst.orig_val; |
2173 | } | 2236 | } |
2174 | return X86EMUL_CONTINUE; | 2237 | return X86EMUL_CONTINUE; |
@@ -2556,23 +2619,23 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | |||
2556 | * it is handled in a context of new task | 2619 | * it is handled in a context of new task |
2557 | */ | 2620 | */ |
2558 | ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, | 2621 | ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, |
2559 | true, NULL); | 2622 | X86_TRANSFER_TASK_SWITCH, NULL); |
2560 | if (ret != X86EMUL_CONTINUE) | 2623 | if (ret != X86EMUL_CONTINUE) |
2561 | return ret; | 2624 | return ret; |
2562 | ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, | 2625 | ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, |
2563 | true, NULL); | 2626 | X86_TRANSFER_TASK_SWITCH, NULL); |
2564 | if (ret != X86EMUL_CONTINUE) | 2627 | if (ret != X86EMUL_CONTINUE) |
2565 | return ret; | 2628 | return ret; |
2566 | ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, | 2629 | ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, |
2567 | true, NULL); | 2630 | X86_TRANSFER_TASK_SWITCH, NULL); |
2568 | if (ret != X86EMUL_CONTINUE) | 2631 | if (ret != X86EMUL_CONTINUE) |
2569 | return ret; | 2632 | return ret; |
2570 | ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, | 2633 | ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, |
2571 | true, NULL); | 2634 | X86_TRANSFER_TASK_SWITCH, NULL); |
2572 | if (ret != X86EMUL_CONTINUE) | 2635 | if (ret != X86EMUL_CONTINUE) |
2573 | return ret; | 2636 | return ret; |
2574 | ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, | 2637 | ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, |
2575 | true, NULL); | 2638 | X86_TRANSFER_TASK_SWITCH, NULL); |
2576 | if (ret != X86EMUL_CONTINUE) | 2639 | if (ret != X86EMUL_CONTINUE) |
2577 | return ret; | 2640 | return ret; |
2578 | 2641 | ||
@@ -2694,31 +2757,31 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
2694 | * it is handled in a context of new task | 2757 | * it is handled in a context of new task |
2695 | */ | 2758 | */ |
2696 | ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, | 2759 | ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, |
2697 | cpl, true, NULL); | 2760 | cpl, X86_TRANSFER_TASK_SWITCH, NULL); |
2698 | if (ret != X86EMUL_CONTINUE) | 2761 | if (ret != X86EMUL_CONTINUE) |
2699 | return ret; | 2762 | return ret; |
2700 | ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, | 2763 | ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, |
2701 | true, NULL); | 2764 | X86_TRANSFER_TASK_SWITCH, NULL); |
2702 | if (ret != X86EMUL_CONTINUE) | 2765 | if (ret != X86EMUL_CONTINUE) |
2703 | return ret; | 2766 | return ret; |
2704 | ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, | 2767 | ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, |
2705 | true, NULL); | 2768 | X86_TRANSFER_TASK_SWITCH, NULL); |
2706 | if (ret != X86EMUL_CONTINUE) | 2769 | if (ret != X86EMUL_CONTINUE) |
2707 | return ret; | 2770 | return ret; |
2708 | ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, | 2771 | ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, |
2709 | true, NULL); | 2772 | X86_TRANSFER_TASK_SWITCH, NULL); |
2710 | if (ret != X86EMUL_CONTINUE) | 2773 | if (ret != X86EMUL_CONTINUE) |
2711 | return ret; | 2774 | return ret; |
2712 | ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, | 2775 | ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, |
2713 | true, NULL); | 2776 | X86_TRANSFER_TASK_SWITCH, NULL); |
2714 | if (ret != X86EMUL_CONTINUE) | 2777 | if (ret != X86EMUL_CONTINUE) |
2715 | return ret; | 2778 | return ret; |
2716 | ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, | 2779 | ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, |
2717 | true, NULL); | 2780 | X86_TRANSFER_TASK_SWITCH, NULL); |
2718 | if (ret != X86EMUL_CONTINUE) | 2781 | if (ret != X86EMUL_CONTINUE) |
2719 | return ret; | 2782 | return ret; |
2720 | ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, | 2783 | ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, |
2721 | true, NULL); | 2784 | X86_TRANSFER_TASK_SWITCH, NULL); |
2722 | if (ret != X86EMUL_CONTINUE) | 2785 | if (ret != X86EMUL_CONTINUE) |
2723 | return ret; | 2786 | return ret; |
2724 | 2787 | ||
@@ -2739,7 +2802,6 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2739 | ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, | 2802 | ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, |
2740 | &ctxt->exception); | 2803 | &ctxt->exception); |
2741 | if (ret != X86EMUL_CONTINUE) | 2804 | if (ret != X86EMUL_CONTINUE) |
2742 | /* FIXME: need to provide precise fault address */ | ||
2743 | return ret; | 2805 | return ret; |
2744 | 2806 | ||
2745 | save_state_to_tss32(ctxt, &tss_seg); | 2807 | save_state_to_tss32(ctxt, &tss_seg); |
@@ -2748,13 +2810,11 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2748 | ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip, | 2810 | ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip, |
2749 | ldt_sel_offset - eip_offset, &ctxt->exception); | 2811 | ldt_sel_offset - eip_offset, &ctxt->exception); |
2750 | if (ret != X86EMUL_CONTINUE) | 2812 | if (ret != X86EMUL_CONTINUE) |
2751 | /* FIXME: need to provide precise fault address */ | ||
2752 | return ret; | 2813 | return ret; |
2753 | 2814 | ||
2754 | ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, | 2815 | ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, |
2755 | &ctxt->exception); | 2816 | &ctxt->exception); |
2756 | if (ret != X86EMUL_CONTINUE) | 2817 | if (ret != X86EMUL_CONTINUE) |
2757 | /* FIXME: need to provide precise fault address */ | ||
2758 | return ret; | 2818 | return ret; |
2759 | 2819 | ||
2760 | if (old_tss_sel != 0xffff) { | 2820 | if (old_tss_sel != 0xffff) { |
@@ -2765,7 +2825,6 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2765 | sizeof tss_seg.prev_task_link, | 2825 | sizeof tss_seg.prev_task_link, |
2766 | &ctxt->exception); | 2826 | &ctxt->exception); |
2767 | if (ret != X86EMUL_CONTINUE) | 2827 | if (ret != X86EMUL_CONTINUE) |
2768 | /* FIXME: need to provide precise fault address */ | ||
2769 | return ret; | 2828 | return ret; |
2770 | } | 2829 | } |
2771 | 2830 | ||
@@ -2999,15 +3058,16 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) | |||
2999 | struct desc_struct old_desc, new_desc; | 3058 | struct desc_struct old_desc, new_desc; |
3000 | const struct x86_emulate_ops *ops = ctxt->ops; | 3059 | const struct x86_emulate_ops *ops = ctxt->ops; |
3001 | int cpl = ctxt->ops->cpl(ctxt); | 3060 | int cpl = ctxt->ops->cpl(ctxt); |
3061 | enum x86emul_mode prev_mode = ctxt->mode; | ||
3002 | 3062 | ||
3003 | old_eip = ctxt->_eip; | 3063 | old_eip = ctxt->_eip; |
3004 | ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS); | 3064 | ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS); |
3005 | 3065 | ||
3006 | memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); | 3066 | memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); |
3007 | rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false, | 3067 | rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, |
3008 | &new_desc); | 3068 | X86_TRANSFER_CALL_JMP, &new_desc); |
3009 | if (rc != X86EMUL_CONTINUE) | 3069 | if (rc != X86EMUL_CONTINUE) |
3010 | return X86EMUL_CONTINUE; | 3070 | return rc; |
3011 | 3071 | ||
3012 | rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); | 3072 | rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); |
3013 | if (rc != X86EMUL_CONTINUE) | 3073 | if (rc != X86EMUL_CONTINUE) |
@@ -3022,11 +3082,14 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) | |||
3022 | rc = em_push(ctxt); | 3082 | rc = em_push(ctxt); |
3023 | /* If we failed, we tainted the memory, but the very least we should | 3083 | /* If we failed, we tainted the memory, but the very least we should |
3024 | restore cs */ | 3084 | restore cs */ |
3025 | if (rc != X86EMUL_CONTINUE) | 3085 | if (rc != X86EMUL_CONTINUE) { |
3086 | pr_warn_once("faulting far call emulation tainted memory\n"); | ||
3026 | goto fail; | 3087 | goto fail; |
3088 | } | ||
3027 | return rc; | 3089 | return rc; |
3028 | fail: | 3090 | fail: |
3029 | ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); | 3091 | ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); |
3092 | ctxt->mode = prev_mode; | ||
3030 | return rc; | 3093 | return rc; |
3031 | 3094 | ||
3032 | } | 3095 | } |
@@ -3477,6 +3540,12 @@ static int em_clflush(struct x86_emulate_ctxt *ctxt) | |||
3477 | return X86EMUL_CONTINUE; | 3540 | return X86EMUL_CONTINUE; |
3478 | } | 3541 | } |
3479 | 3542 | ||
3543 | static int em_movsxd(struct x86_emulate_ctxt *ctxt) | ||
3544 | { | ||
3545 | ctxt->dst.val = (s32) ctxt->src.val; | ||
3546 | return X86EMUL_CONTINUE; | ||
3547 | } | ||
3548 | |||
3480 | static bool valid_cr(int nr) | 3549 | static bool valid_cr(int nr) |
3481 | { | 3550 | { |
3482 | switch (nr) { | 3551 | switch (nr) { |
@@ -3676,6 +3745,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) | |||
3676 | #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } | 3745 | #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } |
3677 | #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } | 3746 | #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } |
3678 | #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) } | 3747 | #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) } |
3748 | #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) } | ||
3679 | #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) } | 3749 | #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) } |
3680 | #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } | 3750 | #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } |
3681 | #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } | 3751 | #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } |
@@ -3738,7 +3808,7 @@ static const struct opcode group1[] = { | |||
3738 | }; | 3808 | }; |
3739 | 3809 | ||
3740 | static const struct opcode group1A[] = { | 3810 | static const struct opcode group1A[] = { |
3741 | I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N, | 3811 | I(DstMem | SrcNone | Mov | Stack | IncSP, em_pop), N, N, N, N, N, N, N, |
3742 | }; | 3812 | }; |
3743 | 3813 | ||
3744 | static const struct opcode group2[] = { | 3814 | static const struct opcode group2[] = { |
@@ -3854,7 +3924,7 @@ static const struct gprefix pfx_0f_e7 = { | |||
3854 | }; | 3924 | }; |
3855 | 3925 | ||
3856 | static const struct escape escape_d9 = { { | 3926 | static const struct escape escape_d9 = { { |
3857 | N, N, N, N, N, N, N, I(DstMem, em_fnstcw), | 3927 | N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw), |
3858 | }, { | 3928 | }, { |
3859 | /* 0xC0 - 0xC7 */ | 3929 | /* 0xC0 - 0xC7 */ |
3860 | N, N, N, N, N, N, N, N, | 3930 | N, N, N, N, N, N, N, N, |
@@ -3896,7 +3966,7 @@ static const struct escape escape_db = { { | |||
3896 | } }; | 3966 | } }; |
3897 | 3967 | ||
3898 | static const struct escape escape_dd = { { | 3968 | static const struct escape escape_dd = { { |
3899 | N, N, N, N, N, N, N, I(DstMem, em_fnstsw), | 3969 | N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw), |
3900 | }, { | 3970 | }, { |
3901 | /* 0xC0 - 0xC7 */ | 3971 | /* 0xC0 - 0xC7 */ |
3902 | N, N, N, N, N, N, N, N, | 3972 | N, N, N, N, N, N, N, N, |
@@ -3920,6 +3990,10 @@ static const struct instr_dual instr_dual_0f_c3 = { | |||
3920 | I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N | 3990 | I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N |
3921 | }; | 3991 | }; |
3922 | 3992 | ||
3993 | static const struct mode_dual mode_dual_63 = { | ||
3994 | N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd) | ||
3995 | }; | ||
3996 | |||
3923 | static const struct opcode opcode_table[256] = { | 3997 | static const struct opcode opcode_table[256] = { |
3924 | /* 0x00 - 0x07 */ | 3998 | /* 0x00 - 0x07 */ |
3925 | F6ALU(Lock, em_add), | 3999 | F6ALU(Lock, em_add), |
@@ -3954,7 +4028,7 @@ static const struct opcode opcode_table[256] = { | |||
3954 | /* 0x60 - 0x67 */ | 4028 | /* 0x60 - 0x67 */ |
3955 | I(ImplicitOps | Stack | No64, em_pusha), | 4029 | I(ImplicitOps | Stack | No64, em_pusha), |
3956 | I(ImplicitOps | Stack | No64, em_popa), | 4030 | I(ImplicitOps | Stack | No64, em_popa), |
3957 | N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ , | 4031 | N, MD(ModRM, &mode_dual_63), |
3958 | N, N, N, N, | 4032 | N, N, N, N, |
3959 | /* 0x68 - 0x6F */ | 4033 | /* 0x68 - 0x6F */ |
3960 | I(SrcImm | Mov | Stack, em_push), | 4034 | I(SrcImm | Mov | Stack, em_push), |
@@ -4010,8 +4084,8 @@ static const struct opcode opcode_table[256] = { | |||
4010 | G(ByteOp, group11), G(0, group11), | 4084 | G(ByteOp, group11), G(0, group11), |
4011 | /* 0xC8 - 0xCF */ | 4085 | /* 0xC8 - 0xCF */ |
4012 | I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave), | 4086 | I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave), |
4013 | I(ImplicitOps | Stack | SrcImmU16, em_ret_far_imm), | 4087 | I(ImplicitOps | SrcImmU16, em_ret_far_imm), |
4014 | I(ImplicitOps | Stack, em_ret_far), | 4088 | I(ImplicitOps, em_ret_far), |
4015 | D(ImplicitOps), DI(SrcImmByte, intn), | 4089 | D(ImplicitOps), DI(SrcImmByte, intn), |
4016 | D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), | 4090 | D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), |
4017 | /* 0xD0 - 0xD7 */ | 4091 | /* 0xD0 - 0xD7 */ |
@@ -4108,7 +4182,7 @@ static const struct opcode twobyte_table[256] = { | |||
4108 | F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), | 4182 | F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), |
4109 | GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul), | 4183 | GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul), |
4110 | /* 0xB0 - 0xB7 */ | 4184 | /* 0xB0 - 0xB7 */ |
4111 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), | 4185 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg), |
4112 | I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), | 4186 | I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), |
4113 | F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), | 4187 | F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), |
4114 | I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), | 4188 | I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), |
@@ -4174,6 +4248,8 @@ static const struct opcode opcode_map_0f_38[256] = { | |||
4174 | #undef I | 4248 | #undef I |
4175 | #undef GP | 4249 | #undef GP |
4176 | #undef EXT | 4250 | #undef EXT |
4251 | #undef MD | ||
4252 | #undef ID | ||
4177 | 4253 | ||
4178 | #undef D2bv | 4254 | #undef D2bv |
4179 | #undef D2bvIP | 4255 | #undef D2bvIP |
@@ -4563,6 +4639,12 @@ done_prefixes: | |||
4563 | else | 4639 | else |
4564 | opcode = opcode.u.idual->mod012; | 4640 | opcode = opcode.u.idual->mod012; |
4565 | break; | 4641 | break; |
4642 | case ModeDual: | ||
4643 | if (ctxt->mode == X86EMUL_MODE_PROT64) | ||
4644 | opcode = opcode.u.mdual->mode64; | ||
4645 | else | ||
4646 | opcode = opcode.u.mdual->mode32; | ||
4647 | break; | ||
4566 | default: | 4648 | default: |
4567 | return EMULATION_FAILED; | 4649 | return EMULATION_FAILED; |
4568 | } | 4650 | } |
@@ -4860,8 +4942,13 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
4860 | /* optimisation - avoid slow emulated read if Mov */ | 4942 | /* optimisation - avoid slow emulated read if Mov */ |
4861 | rc = segmented_read(ctxt, ctxt->dst.addr.mem, | 4943 | rc = segmented_read(ctxt, ctxt->dst.addr.mem, |
4862 | &ctxt->dst.val, ctxt->dst.bytes); | 4944 | &ctxt->dst.val, ctxt->dst.bytes); |
4863 | if (rc != X86EMUL_CONTINUE) | 4945 | if (rc != X86EMUL_CONTINUE) { |
4946 | if (!(ctxt->d & NoWrite) && | ||
4947 | rc == X86EMUL_PROPAGATE_FAULT && | ||
4948 | ctxt->exception.vector == PF_VECTOR) | ||
4949 | ctxt->exception.error_code |= PFERR_WRITE_MASK; | ||
4864 | goto done; | 4950 | goto done; |
4951 | } | ||
4865 | } | 4952 | } |
4866 | ctxt->dst.orig_val = ctxt->dst.val; | 4953 | ctxt->dst.orig_val = ctxt->dst.val; |
4867 | 4954 | ||
@@ -4899,11 +4986,6 @@ special_insn: | |||
4899 | goto threebyte_insn; | 4986 | goto threebyte_insn; |
4900 | 4987 | ||
4901 | switch (ctxt->b) { | 4988 | switch (ctxt->b) { |
4902 | case 0x63: /* movsxd */ | ||
4903 | if (ctxt->mode != X86EMUL_MODE_PROT64) | ||
4904 | goto cannot_emulate; | ||
4905 | ctxt->dst.val = (s32) ctxt->src.val; | ||
4906 | break; | ||
4907 | case 0x70 ... 0x7f: /* jcc (short) */ | 4989 | case 0x70 ... 0x7f: /* jcc (short) */ |
4908 | if (test_cc(ctxt->b, ctxt->eflags)) | 4990 | if (test_cc(ctxt->b, ctxt->eflags)) |
4909 | rc = jmp_rel(ctxt, ctxt->src.val); | 4991 | rc = jmp_rel(ctxt, ctxt->src.val); |
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 3c9195535ffc..c2e36d934af4 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h | |||
@@ -98,7 +98,7 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) | |||
98 | } | 98 | } |
99 | 99 | ||
100 | void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); | 100 | void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); |
101 | int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | 101 | bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, |
102 | int short_hand, unsigned int dest, int dest_mode); | 102 | int short_hand, unsigned int dest, int dest_mode); |
103 | int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); | 103 | int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); |
104 | void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, | 104 | void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, |
diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c index 17b73eeac8a4..7dbced309ddb 100644 --- a/arch/x86/kvm/iommu.c +++ b/arch/x86/kvm/iommu.c | |||
@@ -138,7 +138,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) | |||
138 | 138 | ||
139 | gfn += page_size >> PAGE_SHIFT; | 139 | gfn += page_size >> PAGE_SHIFT; |
140 | 140 | ||
141 | 141 | cond_resched(); | |
142 | } | 142 | } |
143 | 143 | ||
144 | return 0; | 144 | return 0; |
@@ -306,6 +306,8 @@ static void kvm_iommu_put_pages(struct kvm *kvm, | |||
306 | kvm_unpin_pages(kvm, pfn, unmap_pages); | 306 | kvm_unpin_pages(kvm, pfn, unmap_pages); |
307 | 307 | ||
308 | gfn += unmap_pages; | 308 | gfn += unmap_pages; |
309 | |||
310 | cond_resched(); | ||
309 | } | 311 | } |
310 | } | 312 | } |
311 | 313 | ||
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d52dcf0776ea..e55b5fc344eb 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <asm/page.h> | 33 | #include <asm/page.h> |
34 | #include <asm/current.h> | 34 | #include <asm/current.h> |
35 | #include <asm/apicdef.h> | 35 | #include <asm/apicdef.h> |
36 | #include <asm/delay.h> | ||
36 | #include <linux/atomic.h> | 37 | #include <linux/atomic.h> |
37 | #include <linux/jump_label.h> | 38 | #include <linux/jump_label.h> |
38 | #include "kvm_cache_regs.h" | 39 | #include "kvm_cache_regs.h" |
@@ -327,17 +328,24 @@ static u8 count_vectors(void *bitmap) | |||
327 | return count; | 328 | return count; |
328 | } | 329 | } |
329 | 330 | ||
330 | void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) | 331 | void __kvm_apic_update_irr(u32 *pir, void *regs) |
331 | { | 332 | { |
332 | u32 i, pir_val; | 333 | u32 i, pir_val; |
333 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
334 | 334 | ||
335 | for (i = 0; i <= 7; i++) { | 335 | for (i = 0; i <= 7; i++) { |
336 | pir_val = xchg(&pir[i], 0); | 336 | pir_val = xchg(&pir[i], 0); |
337 | if (pir_val) | 337 | if (pir_val) |
338 | *((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val; | 338 | *((u32 *)(regs + APIC_IRR + i * 0x10)) |= pir_val; |
339 | } | 339 | } |
340 | } | 340 | } |
341 | EXPORT_SYMBOL_GPL(__kvm_apic_update_irr); | ||
342 | |||
343 | void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) | ||
344 | { | ||
345 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
346 | |||
347 | __kvm_apic_update_irr(pir, apic->regs); | ||
348 | } | ||
341 | EXPORT_SYMBOL_GPL(kvm_apic_update_irr); | 349 | EXPORT_SYMBOL_GPL(kvm_apic_update_irr); |
342 | 350 | ||
343 | static inline void apic_set_irr(int vec, struct kvm_lapic *apic) | 351 | static inline void apic_set_irr(int vec, struct kvm_lapic *apic) |
@@ -405,7 +413,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic) | |||
405 | * because the processor can modify ISR under the hood. Instead | 413 | * because the processor can modify ISR under the hood. Instead |
406 | * just set SVI. | 414 | * just set SVI. |
407 | */ | 415 | */ |
408 | if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) | 416 | if (unlikely(kvm_x86_ops->hwapic_isr_update)) |
409 | kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec); | 417 | kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec); |
410 | else { | 418 | else { |
411 | ++apic->isr_count; | 419 | ++apic->isr_count; |
@@ -453,7 +461,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) | |||
453 | * on the other hand isr_count and highest_isr_cache are unused | 461 | * on the other hand isr_count and highest_isr_cache are unused |
454 | * and must be left alone. | 462 | * and must be left alone. |
455 | */ | 463 | */ |
456 | if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) | 464 | if (unlikely(kvm_x86_ops->hwapic_isr_update)) |
457 | kvm_x86_ops->hwapic_isr_update(vcpu->kvm, | 465 | kvm_x86_ops->hwapic_isr_update(vcpu->kvm, |
458 | apic_find_highest_isr(apic)); | 466 | apic_find_highest_isr(apic)); |
459 | else { | 467 | else { |
@@ -580,55 +588,48 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) | |||
580 | apic_update_ppr(apic); | 588 | apic_update_ppr(apic); |
581 | } | 589 | } |
582 | 590 | ||
583 | static int kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest) | 591 | static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest) |
584 | { | 592 | { |
585 | return dest == (apic_x2apic_mode(apic) ? | 593 | return dest == (apic_x2apic_mode(apic) ? |
586 | X2APIC_BROADCAST : APIC_BROADCAST); | 594 | X2APIC_BROADCAST : APIC_BROADCAST); |
587 | } | 595 | } |
588 | 596 | ||
589 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest) | 597 | static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest) |
590 | { | 598 | { |
591 | return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest); | 599 | return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest); |
592 | } | 600 | } |
593 | 601 | ||
594 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) | 602 | static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) |
595 | { | 603 | { |
596 | int result = 0; | ||
597 | u32 logical_id; | 604 | u32 logical_id; |
598 | 605 | ||
599 | if (kvm_apic_broadcast(apic, mda)) | 606 | if (kvm_apic_broadcast(apic, mda)) |
600 | return 1; | 607 | return true; |
601 | 608 | ||
602 | if (apic_x2apic_mode(apic)) { | 609 | logical_id = kvm_apic_get_reg(apic, APIC_LDR); |
603 | logical_id = kvm_apic_get_reg(apic, APIC_LDR); | ||
604 | return logical_id & mda; | ||
605 | } | ||
606 | 610 | ||
607 | logical_id = GET_APIC_LOGICAL_ID(kvm_apic_get_reg(apic, APIC_LDR)); | 611 | if (apic_x2apic_mode(apic)) |
612 | return ((logical_id >> 16) == (mda >> 16)) | ||
613 | && (logical_id & mda & 0xffff) != 0; | ||
614 | |||
615 | logical_id = GET_APIC_LOGICAL_ID(logical_id); | ||
608 | 616 | ||
609 | switch (kvm_apic_get_reg(apic, APIC_DFR)) { | 617 | switch (kvm_apic_get_reg(apic, APIC_DFR)) { |
610 | case APIC_DFR_FLAT: | 618 | case APIC_DFR_FLAT: |
611 | if (logical_id & mda) | 619 | return (logical_id & mda) != 0; |
612 | result = 1; | ||
613 | break; | ||
614 | case APIC_DFR_CLUSTER: | 620 | case APIC_DFR_CLUSTER: |
615 | if (((logical_id >> 4) == (mda >> 0x4)) | 621 | return ((logical_id >> 4) == (mda >> 4)) |
616 | && (logical_id & mda & 0xf)) | 622 | && (logical_id & mda & 0xf) != 0; |
617 | result = 1; | ||
618 | break; | ||
619 | default: | 623 | default: |
620 | apic_debug("Bad DFR vcpu %d: %08x\n", | 624 | apic_debug("Bad DFR vcpu %d: %08x\n", |
621 | apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR)); | 625 | apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR)); |
622 | break; | 626 | return false; |
623 | } | 627 | } |
624 | |||
625 | return result; | ||
626 | } | 628 | } |
627 | 629 | ||
628 | int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | 630 | bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, |
629 | int short_hand, unsigned int dest, int dest_mode) | 631 | int short_hand, unsigned int dest, int dest_mode) |
630 | { | 632 | { |
631 | int result = 0; | ||
632 | struct kvm_lapic *target = vcpu->arch.apic; | 633 | struct kvm_lapic *target = vcpu->arch.apic; |
633 | 634 | ||
634 | apic_debug("target %p, source %p, dest 0x%x, " | 635 | apic_debug("target %p, source %p, dest 0x%x, " |
@@ -638,29 +639,21 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | |||
638 | ASSERT(target); | 639 | ASSERT(target); |
639 | switch (short_hand) { | 640 | switch (short_hand) { |
640 | case APIC_DEST_NOSHORT: | 641 | case APIC_DEST_NOSHORT: |
641 | if (dest_mode == 0) | 642 | if (dest_mode == APIC_DEST_PHYSICAL) |
642 | /* Physical mode. */ | 643 | return kvm_apic_match_physical_addr(target, dest); |
643 | result = kvm_apic_match_physical_addr(target, dest); | ||
644 | else | 644 | else |
645 | /* Logical mode. */ | 645 | return kvm_apic_match_logical_addr(target, dest); |
646 | result = kvm_apic_match_logical_addr(target, dest); | ||
647 | break; | ||
648 | case APIC_DEST_SELF: | 646 | case APIC_DEST_SELF: |
649 | result = (target == source); | 647 | return target == source; |
650 | break; | ||
651 | case APIC_DEST_ALLINC: | 648 | case APIC_DEST_ALLINC: |
652 | result = 1; | 649 | return true; |
653 | break; | ||
654 | case APIC_DEST_ALLBUT: | 650 | case APIC_DEST_ALLBUT: |
655 | result = (target != source); | 651 | return target != source; |
656 | break; | ||
657 | default: | 652 | default: |
658 | apic_debug("kvm: apic: Bad dest shorthand value %x\n", | 653 | apic_debug("kvm: apic: Bad dest shorthand value %x\n", |
659 | short_hand); | 654 | short_hand); |
660 | break; | 655 | return false; |
661 | } | 656 | } |
662 | |||
663 | return result; | ||
664 | } | 657 | } |
665 | 658 | ||
666 | bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | 659 | bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, |
@@ -693,7 +686,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | |||
693 | 686 | ||
694 | ret = true; | 687 | ret = true; |
695 | 688 | ||
696 | if (irq->dest_mode == 0) { /* physical mode */ | 689 | if (irq->dest_mode == APIC_DEST_PHYSICAL) { |
697 | if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) | 690 | if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) |
698 | goto out; | 691 | goto out; |
699 | 692 | ||
@@ -1076,25 +1069,72 @@ static void apic_timer_expired(struct kvm_lapic *apic) | |||
1076 | { | 1069 | { |
1077 | struct kvm_vcpu *vcpu = apic->vcpu; | 1070 | struct kvm_vcpu *vcpu = apic->vcpu; |
1078 | wait_queue_head_t *q = &vcpu->wq; | 1071 | wait_queue_head_t *q = &vcpu->wq; |
1072 | struct kvm_timer *ktimer = &apic->lapic_timer; | ||
1079 | 1073 | ||
1080 | /* | ||
1081 | * Note: KVM_REQ_PENDING_TIMER is implicitly checked in | ||
1082 | * vcpu_enter_guest. | ||
1083 | */ | ||
1084 | if (atomic_read(&apic->lapic_timer.pending)) | 1074 | if (atomic_read(&apic->lapic_timer.pending)) |
1085 | return; | 1075 | return; |
1086 | 1076 | ||
1087 | atomic_inc(&apic->lapic_timer.pending); | 1077 | atomic_inc(&apic->lapic_timer.pending); |
1088 | /* FIXME: this code should not know anything about vcpus */ | 1078 | kvm_set_pending_timer(vcpu); |
1089 | kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); | ||
1090 | 1079 | ||
1091 | if (waitqueue_active(q)) | 1080 | if (waitqueue_active(q)) |
1092 | wake_up_interruptible(q); | 1081 | wake_up_interruptible(q); |
1082 | |||
1083 | if (apic_lvtt_tscdeadline(apic)) | ||
1084 | ktimer->expired_tscdeadline = ktimer->tscdeadline; | ||
1085 | } | ||
1086 | |||
1087 | /* | ||
1088 | * On APICv, this test will cause a busy wait | ||
1089 | * during a higher-priority task. | ||
1090 | */ | ||
1091 | |||
1092 | static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu) | ||
1093 | { | ||
1094 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
1095 | u32 reg = kvm_apic_get_reg(apic, APIC_LVTT); | ||
1096 | |||
1097 | if (kvm_apic_hw_enabled(apic)) { | ||
1098 | int vec = reg & APIC_VECTOR_MASK; | ||
1099 | void *bitmap = apic->regs + APIC_ISR; | ||
1100 | |||
1101 | if (kvm_x86_ops->deliver_posted_interrupt) | ||
1102 | bitmap = apic->regs + APIC_IRR; | ||
1103 | |||
1104 | if (apic_test_vector(vec, bitmap)) | ||
1105 | return true; | ||
1106 | } | ||
1107 | return false; | ||
1108 | } | ||
1109 | |||
1110 | void wait_lapic_expire(struct kvm_vcpu *vcpu) | ||
1111 | { | ||
1112 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
1113 | u64 guest_tsc, tsc_deadline; | ||
1114 | |||
1115 | if (!kvm_vcpu_has_lapic(vcpu)) | ||
1116 | return; | ||
1117 | |||
1118 | if (apic->lapic_timer.expired_tscdeadline == 0) | ||
1119 | return; | ||
1120 | |||
1121 | if (!lapic_timer_int_injected(vcpu)) | ||
1122 | return; | ||
1123 | |||
1124 | tsc_deadline = apic->lapic_timer.expired_tscdeadline; | ||
1125 | apic->lapic_timer.expired_tscdeadline = 0; | ||
1126 | guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc()); | ||
1127 | trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); | ||
1128 | |||
1129 | /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ | ||
1130 | if (guest_tsc < tsc_deadline) | ||
1131 | __delay(tsc_deadline - guest_tsc); | ||
1093 | } | 1132 | } |
1094 | 1133 | ||
1095 | static void start_apic_timer(struct kvm_lapic *apic) | 1134 | static void start_apic_timer(struct kvm_lapic *apic) |
1096 | { | 1135 | { |
1097 | ktime_t now; | 1136 | ktime_t now; |
1137 | |||
1098 | atomic_set(&apic->lapic_timer.pending, 0); | 1138 | atomic_set(&apic->lapic_timer.pending, 0); |
1099 | 1139 | ||
1100 | if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { | 1140 | if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { |
@@ -1140,6 +1180,7 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
1140 | /* lapic timer in tsc deadline mode */ | 1180 | /* lapic timer in tsc deadline mode */ |
1141 | u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; | 1181 | u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; |
1142 | u64 ns = 0; | 1182 | u64 ns = 0; |
1183 | ktime_t expire; | ||
1143 | struct kvm_vcpu *vcpu = apic->vcpu; | 1184 | struct kvm_vcpu *vcpu = apic->vcpu; |
1144 | unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; | 1185 | unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; |
1145 | unsigned long flags; | 1186 | unsigned long flags; |
@@ -1154,8 +1195,10 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
1154 | if (likely(tscdeadline > guest_tsc)) { | 1195 | if (likely(tscdeadline > guest_tsc)) { |
1155 | ns = (tscdeadline - guest_tsc) * 1000000ULL; | 1196 | ns = (tscdeadline - guest_tsc) * 1000000ULL; |
1156 | do_div(ns, this_tsc_khz); | 1197 | do_div(ns, this_tsc_khz); |
1198 | expire = ktime_add_ns(now, ns); | ||
1199 | expire = ktime_sub_ns(expire, lapic_timer_advance_ns); | ||
1157 | hrtimer_start(&apic->lapic_timer.timer, | 1200 | hrtimer_start(&apic->lapic_timer.timer, |
1158 | ktime_add_ns(now, ns), HRTIMER_MODE_ABS); | 1201 | expire, HRTIMER_MODE_ABS); |
1159 | } else | 1202 | } else |
1160 | apic_timer_expired(apic); | 1203 | apic_timer_expired(apic); |
1161 | 1204 | ||
@@ -1745,7 +1788,9 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, | |||
1745 | if (kvm_x86_ops->hwapic_irr_update) | 1788 | if (kvm_x86_ops->hwapic_irr_update) |
1746 | kvm_x86_ops->hwapic_irr_update(vcpu, | 1789 | kvm_x86_ops->hwapic_irr_update(vcpu, |
1747 | apic_find_highest_irr(apic)); | 1790 | apic_find_highest_irr(apic)); |
1748 | kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); | 1791 | if (unlikely(kvm_x86_ops->hwapic_isr_update)) |
1792 | kvm_x86_ops->hwapic_isr_update(vcpu->kvm, | ||
1793 | apic_find_highest_isr(apic)); | ||
1749 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 1794 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
1750 | kvm_rtc_eoi_tracking_restore_one(vcpu); | 1795 | kvm_rtc_eoi_tracking_restore_one(vcpu); |
1751 | } | 1796 | } |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index c674fce53cf9..0bc6c656625b 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -14,6 +14,7 @@ struct kvm_timer { | |||
14 | u32 timer_mode; | 14 | u32 timer_mode; |
15 | u32 timer_mode_mask; | 15 | u32 timer_mode_mask; |
16 | u64 tscdeadline; | 16 | u64 tscdeadline; |
17 | u64 expired_tscdeadline; | ||
17 | atomic_t pending; /* accumulated triggered timers */ | 18 | atomic_t pending; /* accumulated triggered timers */ |
18 | }; | 19 | }; |
19 | 20 | ||
@@ -56,9 +57,8 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); | |||
56 | void kvm_apic_set_version(struct kvm_vcpu *vcpu); | 57 | void kvm_apic_set_version(struct kvm_vcpu *vcpu); |
57 | 58 | ||
58 | void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr); | 59 | void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr); |
60 | void __kvm_apic_update_irr(u32 *pir, void *regs); | ||
59 | void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); | 61 | void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); |
60 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest); | ||
61 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda); | ||
62 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, | 62 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, |
63 | unsigned long *dest_map); | 63 | unsigned long *dest_map); |
64 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); | 64 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); |
@@ -170,4 +170,6 @@ static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) | |||
170 | 170 | ||
171 | bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); | 171 | bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); |
172 | 172 | ||
173 | void wait_lapic_expire(struct kvm_vcpu *vcpu); | ||
174 | |||
173 | #endif | 175 | #endif |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f83fc6c5e0ba..cee759299a35 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -63,30 +63,16 @@ enum { | |||
63 | #undef MMU_DEBUG | 63 | #undef MMU_DEBUG |
64 | 64 | ||
65 | #ifdef MMU_DEBUG | 65 | #ifdef MMU_DEBUG |
66 | static bool dbg = 0; | ||
67 | module_param(dbg, bool, 0644); | ||
66 | 68 | ||
67 | #define pgprintk(x...) do { if (dbg) printk(x); } while (0) | 69 | #define pgprintk(x...) do { if (dbg) printk(x); } while (0) |
68 | #define rmap_printk(x...) do { if (dbg) printk(x); } while (0) | 70 | #define rmap_printk(x...) do { if (dbg) printk(x); } while (0) |
69 | 71 | #define MMU_WARN_ON(x) WARN_ON(x) | |
70 | #else | 72 | #else |
71 | |||
72 | #define pgprintk(x...) do { } while (0) | 73 | #define pgprintk(x...) do { } while (0) |
73 | #define rmap_printk(x...) do { } while (0) | 74 | #define rmap_printk(x...) do { } while (0) |
74 | 75 | #define MMU_WARN_ON(x) do { } while (0) | |
75 | #endif | ||
76 | |||
77 | #ifdef MMU_DEBUG | ||
78 | static bool dbg = 0; | ||
79 | module_param(dbg, bool, 0644); | ||
80 | #endif | ||
81 | |||
82 | #ifndef MMU_DEBUG | ||
83 | #define ASSERT(x) do { } while (0) | ||
84 | #else | ||
85 | #define ASSERT(x) \ | ||
86 | if (!(x)) { \ | ||
87 | printk(KERN_WARNING "assertion failed %s:%d: %s\n", \ | ||
88 | __FILE__, __LINE__, #x); \ | ||
89 | } | ||
90 | #endif | 76 | #endif |
91 | 77 | ||
92 | #define PTE_PREFETCH_NUM 8 | 78 | #define PTE_PREFETCH_NUM 8 |
@@ -546,6 +532,11 @@ static bool spte_is_bit_cleared(u64 old_spte, u64 new_spte, u64 bit_mask) | |||
546 | return (old_spte & bit_mask) && !(new_spte & bit_mask); | 532 | return (old_spte & bit_mask) && !(new_spte & bit_mask); |
547 | } | 533 | } |
548 | 534 | ||
535 | static bool spte_is_bit_changed(u64 old_spte, u64 new_spte, u64 bit_mask) | ||
536 | { | ||
537 | return (old_spte & bit_mask) != (new_spte & bit_mask); | ||
538 | } | ||
539 | |||
549 | /* Rules for using mmu_spte_set: | 540 | /* Rules for using mmu_spte_set: |
550 | * Set the sptep from nonpresent to present. | 541 | * Set the sptep from nonpresent to present. |
551 | * Note: the sptep being assigned *must* be either not present | 542 | * Note: the sptep being assigned *must* be either not present |
@@ -596,6 +587,14 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) | |||
596 | if (!shadow_accessed_mask) | 587 | if (!shadow_accessed_mask) |
597 | return ret; | 588 | return ret; |
598 | 589 | ||
590 | /* | ||
591 | * Flush TLB when accessed/dirty bits are changed in the page tables, | ||
592 | * to guarantee consistency between TLB and page tables. | ||
593 | */ | ||
594 | if (spte_is_bit_changed(old_spte, new_spte, | ||
595 | shadow_accessed_mask | shadow_dirty_mask)) | ||
596 | ret = true; | ||
597 | |||
599 | if (spte_is_bit_cleared(old_spte, new_spte, shadow_accessed_mask)) | 598 | if (spte_is_bit_cleared(old_spte, new_spte, shadow_accessed_mask)) |
600 | kvm_set_pfn_accessed(spte_to_pfn(old_spte)); | 599 | kvm_set_pfn_accessed(spte_to_pfn(old_spte)); |
601 | if (spte_is_bit_cleared(old_spte, new_spte, shadow_dirty_mask)) | 600 | if (spte_is_bit_cleared(old_spte, new_spte, shadow_dirty_mask)) |
@@ -1216,6 +1215,60 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, | |||
1216 | return flush; | 1215 | return flush; |
1217 | } | 1216 | } |
1218 | 1217 | ||
1218 | static bool spte_clear_dirty(struct kvm *kvm, u64 *sptep) | ||
1219 | { | ||
1220 | u64 spte = *sptep; | ||
1221 | |||
1222 | rmap_printk("rmap_clear_dirty: spte %p %llx\n", sptep, *sptep); | ||
1223 | |||
1224 | spte &= ~shadow_dirty_mask; | ||
1225 | |||
1226 | return mmu_spte_update(sptep, spte); | ||
1227 | } | ||
1228 | |||
1229 | static bool __rmap_clear_dirty(struct kvm *kvm, unsigned long *rmapp) | ||
1230 | { | ||
1231 | u64 *sptep; | ||
1232 | struct rmap_iterator iter; | ||
1233 | bool flush = false; | ||
1234 | |||
1235 | for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { | ||
1236 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); | ||
1237 | |||
1238 | flush |= spte_clear_dirty(kvm, sptep); | ||
1239 | sptep = rmap_get_next(&iter); | ||
1240 | } | ||
1241 | |||
1242 | return flush; | ||
1243 | } | ||
1244 | |||
1245 | static bool spte_set_dirty(struct kvm *kvm, u64 *sptep) | ||
1246 | { | ||
1247 | u64 spte = *sptep; | ||
1248 | |||
1249 | rmap_printk("rmap_set_dirty: spte %p %llx\n", sptep, *sptep); | ||
1250 | |||
1251 | spte |= shadow_dirty_mask; | ||
1252 | |||
1253 | return mmu_spte_update(sptep, spte); | ||
1254 | } | ||
1255 | |||
1256 | static bool __rmap_set_dirty(struct kvm *kvm, unsigned long *rmapp) | ||
1257 | { | ||
1258 | u64 *sptep; | ||
1259 | struct rmap_iterator iter; | ||
1260 | bool flush = false; | ||
1261 | |||
1262 | for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { | ||
1263 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); | ||
1264 | |||
1265 | flush |= spte_set_dirty(kvm, sptep); | ||
1266 | sptep = rmap_get_next(&iter); | ||
1267 | } | ||
1268 | |||
1269 | return flush; | ||
1270 | } | ||
1271 | |||
1219 | /** | 1272 | /** |
1220 | * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages | 1273 | * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages |
1221 | * @kvm: kvm instance | 1274 | * @kvm: kvm instance |
@@ -1226,7 +1279,7 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, | |||
1226 | * Used when we do not need to care about huge page mappings: e.g. during dirty | 1279 | * Used when we do not need to care about huge page mappings: e.g. during dirty |
1227 | * logging we do not have any such mappings. | 1280 | * logging we do not have any such mappings. |
1228 | */ | 1281 | */ |
1229 | void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, | 1282 | static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, |
1230 | struct kvm_memory_slot *slot, | 1283 | struct kvm_memory_slot *slot, |
1231 | gfn_t gfn_offset, unsigned long mask) | 1284 | gfn_t gfn_offset, unsigned long mask) |
1232 | { | 1285 | { |
@@ -1242,6 +1295,53 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, | |||
1242 | } | 1295 | } |
1243 | } | 1296 | } |
1244 | 1297 | ||
1298 | /** | ||
1299 | * kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages | ||
1300 | * @kvm: kvm instance | ||
1301 | * @slot: slot to clear D-bit | ||
1302 | * @gfn_offset: start of the BITS_PER_LONG pages we care about | ||
1303 | * @mask: indicates which pages we should clear D-bit | ||
1304 | * | ||
1305 | * Used for PML to re-log the dirty GPAs after userspace querying dirty_bitmap. | ||
1306 | */ | ||
1307 | void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, | ||
1308 | struct kvm_memory_slot *slot, | ||
1309 | gfn_t gfn_offset, unsigned long mask) | ||
1310 | { | ||
1311 | unsigned long *rmapp; | ||
1312 | |||
1313 | while (mask) { | ||
1314 | rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), | ||
1315 | PT_PAGE_TABLE_LEVEL, slot); | ||
1316 | __rmap_clear_dirty(kvm, rmapp); | ||
1317 | |||
1318 | /* clear the first set bit */ | ||
1319 | mask &= mask - 1; | ||
1320 | } | ||
1321 | } | ||
1322 | EXPORT_SYMBOL_GPL(kvm_mmu_clear_dirty_pt_masked); | ||
1323 | |||
1324 | /** | ||
1325 | * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected | ||
1326 | * PT level pages. | ||
1327 | * | ||
1328 | * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to | ||
1329 | * enable dirty logging for them. | ||
1330 | * | ||
1331 | * Used when we do not need to care about huge page mappings: e.g. during dirty | ||
1332 | * logging we do not have any such mappings. | ||
1333 | */ | ||
1334 | void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, | ||
1335 | struct kvm_memory_slot *slot, | ||
1336 | gfn_t gfn_offset, unsigned long mask) | ||
1337 | { | ||
1338 | if (kvm_x86_ops->enable_log_dirty_pt_masked) | ||
1339 | kvm_x86_ops->enable_log_dirty_pt_masked(kvm, slot, gfn_offset, | ||
1340 | mask); | ||
1341 | else | ||
1342 | kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); | ||
1343 | } | ||
1344 | |||
1245 | static bool rmap_write_protect(struct kvm *kvm, u64 gfn) | 1345 | static bool rmap_write_protect(struct kvm *kvm, u64 gfn) |
1246 | { | 1346 | { |
1247 | struct kvm_memory_slot *slot; | 1347 | struct kvm_memory_slot *slot; |
@@ -1536,7 +1636,7 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr) | |||
1536 | 1636 | ||
1537 | static void kvm_mmu_free_page(struct kvm_mmu_page *sp) | 1637 | static void kvm_mmu_free_page(struct kvm_mmu_page *sp) |
1538 | { | 1638 | { |
1539 | ASSERT(is_empty_shadow_page(sp->spt)); | 1639 | MMU_WARN_ON(!is_empty_shadow_page(sp->spt)); |
1540 | hlist_del(&sp->hash_link); | 1640 | hlist_del(&sp->hash_link); |
1541 | list_del(&sp->link); | 1641 | list_del(&sp->link); |
1542 | free_page((unsigned long)sp->spt); | 1642 | free_page((unsigned long)sp->spt); |
@@ -2501,8 +2601,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2501 | } | 2601 | } |
2502 | } | 2602 | } |
2503 | 2603 | ||
2504 | if (pte_access & ACC_WRITE_MASK) | 2604 | if (pte_access & ACC_WRITE_MASK) { |
2505 | mark_page_dirty(vcpu->kvm, gfn); | 2605 | mark_page_dirty(vcpu->kvm, gfn); |
2606 | spte |= shadow_dirty_mask; | ||
2607 | } | ||
2506 | 2608 | ||
2507 | set_pte: | 2609 | set_pte: |
2508 | if (mmu_spte_update(sptep, spte)) | 2610 | if (mmu_spte_update(sptep, spte)) |
@@ -2818,6 +2920,18 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
2818 | */ | 2920 | */ |
2819 | gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); | 2921 | gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); |
2820 | 2922 | ||
2923 | /* | ||
2924 | * Theoretically we could also set dirty bit (and flush TLB) here in | ||
2925 | * order to eliminate unnecessary PML logging. See comments in | ||
2926 | * set_spte. But fast_page_fault is very unlikely to happen with PML | ||
2927 | * enabled, so we do not do this. This might result in the same GPA | ||
2928 | * to be logged in PML buffer again when the write really happens, and | ||
2929 | * eventually to be called by mark_page_dirty twice. But it's also no | ||
2930 | * harm. This also avoids the TLB flush needed after setting dirty bit | ||
2931 | * so non-PML cases won't be impacted. | ||
2932 | * | ||
2933 | * Compare with set_spte where instead shadow_dirty_mask is set. | ||
2934 | */ | ||
2821 | if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) == spte) | 2935 | if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) == spte) |
2822 | mark_page_dirty(vcpu->kvm, gfn); | 2936 | mark_page_dirty(vcpu->kvm, gfn); |
2823 | 2937 | ||
@@ -3041,7 +3155,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) | |||
3041 | for (i = 0; i < 4; ++i) { | 3155 | for (i = 0; i < 4; ++i) { |
3042 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | 3156 | hpa_t root = vcpu->arch.mmu.pae_root[i]; |
3043 | 3157 | ||
3044 | ASSERT(!VALID_PAGE(root)); | 3158 | MMU_WARN_ON(VALID_PAGE(root)); |
3045 | spin_lock(&vcpu->kvm->mmu_lock); | 3159 | spin_lock(&vcpu->kvm->mmu_lock); |
3046 | make_mmu_pages_available(vcpu); | 3160 | make_mmu_pages_available(vcpu); |
3047 | sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), | 3161 | sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), |
@@ -3079,7 +3193,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
3079 | if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { | 3193 | if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { |
3080 | hpa_t root = vcpu->arch.mmu.root_hpa; | 3194 | hpa_t root = vcpu->arch.mmu.root_hpa; |
3081 | 3195 | ||
3082 | ASSERT(!VALID_PAGE(root)); | 3196 | MMU_WARN_ON(VALID_PAGE(root)); |
3083 | 3197 | ||
3084 | spin_lock(&vcpu->kvm->mmu_lock); | 3198 | spin_lock(&vcpu->kvm->mmu_lock); |
3085 | make_mmu_pages_available(vcpu); | 3199 | make_mmu_pages_available(vcpu); |
@@ -3104,7 +3218,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
3104 | for (i = 0; i < 4; ++i) { | 3218 | for (i = 0; i < 4; ++i) { |
3105 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | 3219 | hpa_t root = vcpu->arch.mmu.pae_root[i]; |
3106 | 3220 | ||
3107 | ASSERT(!VALID_PAGE(root)); | 3221 | MMU_WARN_ON(VALID_PAGE(root)); |
3108 | if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) { | 3222 | if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) { |
3109 | pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i); | 3223 | pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i); |
3110 | if (!is_present_gpte(pdptr)) { | 3224 | if (!is_present_gpte(pdptr)) { |
@@ -3329,8 +3443,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | |||
3329 | if (r) | 3443 | if (r) |
3330 | return r; | 3444 | return r; |
3331 | 3445 | ||
3332 | ASSERT(vcpu); | 3446 | MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
3333 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); | ||
3334 | 3447 | ||
3335 | gfn = gva >> PAGE_SHIFT; | 3448 | gfn = gva >> PAGE_SHIFT; |
3336 | 3449 | ||
@@ -3396,8 +3509,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
3396 | int write = error_code & PFERR_WRITE_MASK; | 3509 | int write = error_code & PFERR_WRITE_MASK; |
3397 | bool map_writable; | 3510 | bool map_writable; |
3398 | 3511 | ||
3399 | ASSERT(vcpu); | 3512 | MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
3400 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); | ||
3401 | 3513 | ||
3402 | if (unlikely(error_code & PFERR_RSVD_MASK)) { | 3514 | if (unlikely(error_code & PFERR_RSVD_MASK)) { |
3403 | r = handle_mmio_page_fault(vcpu, gpa, error_code, true); | 3515 | r = handle_mmio_page_fault(vcpu, gpa, error_code, true); |
@@ -3718,7 +3830,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu, | |||
3718 | update_permission_bitmask(vcpu, context, false); | 3830 | update_permission_bitmask(vcpu, context, false); |
3719 | update_last_pte_bitmap(vcpu, context); | 3831 | update_last_pte_bitmap(vcpu, context); |
3720 | 3832 | ||
3721 | ASSERT(is_pae(vcpu)); | 3833 | MMU_WARN_ON(!is_pae(vcpu)); |
3722 | context->page_fault = paging64_page_fault; | 3834 | context->page_fault = paging64_page_fault; |
3723 | context->gva_to_gpa = paging64_gva_to_gpa; | 3835 | context->gva_to_gpa = paging64_gva_to_gpa; |
3724 | context->sync_page = paging64_sync_page; | 3836 | context->sync_page = paging64_sync_page; |
@@ -3763,7 +3875,7 @@ static void paging32E_init_context(struct kvm_vcpu *vcpu, | |||
3763 | 3875 | ||
3764 | static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | 3876 | static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) |
3765 | { | 3877 | { |
3766 | struct kvm_mmu *context = vcpu->arch.walk_mmu; | 3878 | struct kvm_mmu *context = &vcpu->arch.mmu; |
3767 | 3879 | ||
3768 | context->base_role.word = 0; | 3880 | context->base_role.word = 0; |
3769 | context->page_fault = tdp_page_fault; | 3881 | context->page_fault = tdp_page_fault; |
@@ -3803,11 +3915,12 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
3803 | update_last_pte_bitmap(vcpu, context); | 3915 | update_last_pte_bitmap(vcpu, context); |
3804 | } | 3916 | } |
3805 | 3917 | ||
3806 | void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) | 3918 | void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) |
3807 | { | 3919 | { |
3808 | bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); | 3920 | bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); |
3809 | ASSERT(vcpu); | 3921 | struct kvm_mmu *context = &vcpu->arch.mmu; |
3810 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 3922 | |
3923 | MMU_WARN_ON(VALID_PAGE(context->root_hpa)); | ||
3811 | 3924 | ||
3812 | if (!is_paging(vcpu)) | 3925 | if (!is_paging(vcpu)) |
3813 | nonpaging_init_context(vcpu, context); | 3926 | nonpaging_init_context(vcpu, context); |
@@ -3818,19 +3931,19 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) | |||
3818 | else | 3931 | else |
3819 | paging32_init_context(vcpu, context); | 3932 | paging32_init_context(vcpu, context); |
3820 | 3933 | ||
3821 | vcpu->arch.mmu.base_role.nxe = is_nx(vcpu); | 3934 | context->base_role.nxe = is_nx(vcpu); |
3822 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); | 3935 | context->base_role.cr4_pae = !!is_pae(vcpu); |
3823 | vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); | 3936 | context->base_role.cr0_wp = is_write_protection(vcpu); |
3824 | vcpu->arch.mmu.base_role.smep_andnot_wp | 3937 | context->base_role.smep_andnot_wp |
3825 | = smep && !is_write_protection(vcpu); | 3938 | = smep && !is_write_protection(vcpu); |
3826 | } | 3939 | } |
3827 | EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); | 3940 | EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); |
3828 | 3941 | ||
3829 | void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, | 3942 | void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly) |
3830 | bool execonly) | ||
3831 | { | 3943 | { |
3832 | ASSERT(vcpu); | 3944 | struct kvm_mmu *context = &vcpu->arch.mmu; |
3833 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 3945 | |
3946 | MMU_WARN_ON(VALID_PAGE(context->root_hpa)); | ||
3834 | 3947 | ||
3835 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); | 3948 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); |
3836 | 3949 | ||
@@ -3851,11 +3964,13 @@ EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu); | |||
3851 | 3964 | ||
3852 | static void init_kvm_softmmu(struct kvm_vcpu *vcpu) | 3965 | static void init_kvm_softmmu(struct kvm_vcpu *vcpu) |
3853 | { | 3966 | { |
3854 | kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu); | 3967 | struct kvm_mmu *context = &vcpu->arch.mmu; |
3855 | vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3; | 3968 | |
3856 | vcpu->arch.walk_mmu->get_cr3 = get_cr3; | 3969 | kvm_init_shadow_mmu(vcpu); |
3857 | vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read; | 3970 | context->set_cr3 = kvm_x86_ops->set_cr3; |
3858 | vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; | 3971 | context->get_cr3 = get_cr3; |
3972 | context->get_pdptr = kvm_pdptr_read; | ||
3973 | context->inject_page_fault = kvm_inject_page_fault; | ||
3859 | } | 3974 | } |
3860 | 3975 | ||
3861 | static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) | 3976 | static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) |
@@ -3900,17 +4015,15 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) | |||
3900 | static void init_kvm_mmu(struct kvm_vcpu *vcpu) | 4015 | static void init_kvm_mmu(struct kvm_vcpu *vcpu) |
3901 | { | 4016 | { |
3902 | if (mmu_is_nested(vcpu)) | 4017 | if (mmu_is_nested(vcpu)) |
3903 | return init_kvm_nested_mmu(vcpu); | 4018 | init_kvm_nested_mmu(vcpu); |
3904 | else if (tdp_enabled) | 4019 | else if (tdp_enabled) |
3905 | return init_kvm_tdp_mmu(vcpu); | 4020 | init_kvm_tdp_mmu(vcpu); |
3906 | else | 4021 | else |
3907 | return init_kvm_softmmu(vcpu); | 4022 | init_kvm_softmmu(vcpu); |
3908 | } | 4023 | } |
3909 | 4024 | ||
3910 | void kvm_mmu_reset_context(struct kvm_vcpu *vcpu) | 4025 | void kvm_mmu_reset_context(struct kvm_vcpu *vcpu) |
3911 | { | 4026 | { |
3912 | ASSERT(vcpu); | ||
3913 | |||
3914 | kvm_mmu_unload(vcpu); | 4027 | kvm_mmu_unload(vcpu); |
3915 | init_kvm_mmu(vcpu); | 4028 | init_kvm_mmu(vcpu); |
3916 | } | 4029 | } |
@@ -4266,8 +4379,6 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) | |||
4266 | struct page *page; | 4379 | struct page *page; |
4267 | int i; | 4380 | int i; |
4268 | 4381 | ||
4269 | ASSERT(vcpu); | ||
4270 | |||
4271 | /* | 4382 | /* |
4272 | * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. | 4383 | * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. |
4273 | * Therefore we need to allocate shadow page tables in the first | 4384 | * Therefore we need to allocate shadow page tables in the first |
@@ -4286,8 +4397,6 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) | |||
4286 | 4397 | ||
4287 | int kvm_mmu_create(struct kvm_vcpu *vcpu) | 4398 | int kvm_mmu_create(struct kvm_vcpu *vcpu) |
4288 | { | 4399 | { |
4289 | ASSERT(vcpu); | ||
4290 | |||
4291 | vcpu->arch.walk_mmu = &vcpu->arch.mmu; | 4400 | vcpu->arch.walk_mmu = &vcpu->arch.mmu; |
4292 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | 4401 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; |
4293 | vcpu->arch.mmu.translate_gpa = translate_gpa; | 4402 | vcpu->arch.mmu.translate_gpa = translate_gpa; |
@@ -4298,19 +4407,18 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu) | |||
4298 | 4407 | ||
4299 | void kvm_mmu_setup(struct kvm_vcpu *vcpu) | 4408 | void kvm_mmu_setup(struct kvm_vcpu *vcpu) |
4300 | { | 4409 | { |
4301 | ASSERT(vcpu); | 4410 | MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
4302 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | ||
4303 | 4411 | ||
4304 | init_kvm_mmu(vcpu); | 4412 | init_kvm_mmu(vcpu); |
4305 | } | 4413 | } |
4306 | 4414 | ||
4307 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | 4415 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, |
4416 | struct kvm_memory_slot *memslot) | ||
4308 | { | 4417 | { |
4309 | struct kvm_memory_slot *memslot; | ||
4310 | gfn_t last_gfn; | 4418 | gfn_t last_gfn; |
4311 | int i; | 4419 | int i; |
4420 | bool flush = false; | ||
4312 | 4421 | ||
4313 | memslot = id_to_memslot(kvm->memslots, slot); | ||
4314 | last_gfn = memslot->base_gfn + memslot->npages - 1; | 4422 | last_gfn = memslot->base_gfn + memslot->npages - 1; |
4315 | 4423 | ||
4316 | spin_lock(&kvm->mmu_lock); | 4424 | spin_lock(&kvm->mmu_lock); |
@@ -4325,7 +4433,8 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
4325 | 4433 | ||
4326 | for (index = 0; index <= last_index; ++index, ++rmapp) { | 4434 | for (index = 0; index <= last_index; ++index, ++rmapp) { |
4327 | if (*rmapp) | 4435 | if (*rmapp) |
4328 | __rmap_write_protect(kvm, rmapp, false); | 4436 | flush |= __rmap_write_protect(kvm, rmapp, |
4437 | false); | ||
4329 | 4438 | ||
4330 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) | 4439 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) |
4331 | cond_resched_lock(&kvm->mmu_lock); | 4440 | cond_resched_lock(&kvm->mmu_lock); |
@@ -4352,8 +4461,124 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
4352 | * instead of PT_WRITABLE_MASK, that means it does not depend | 4461 | * instead of PT_WRITABLE_MASK, that means it does not depend |
4353 | * on PT_WRITABLE_MASK anymore. | 4462 | * on PT_WRITABLE_MASK anymore. |
4354 | */ | 4463 | */ |
4355 | kvm_flush_remote_tlbs(kvm); | 4464 | if (flush) |
4465 | kvm_flush_remote_tlbs(kvm); | ||
4466 | } | ||
4467 | |||
4468 | void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, | ||
4469 | struct kvm_memory_slot *memslot) | ||
4470 | { | ||
4471 | gfn_t last_gfn; | ||
4472 | unsigned long *rmapp; | ||
4473 | unsigned long last_index, index; | ||
4474 | bool flush = false; | ||
4475 | |||
4476 | last_gfn = memslot->base_gfn + memslot->npages - 1; | ||
4477 | |||
4478 | spin_lock(&kvm->mmu_lock); | ||
4479 | |||
4480 | rmapp = memslot->arch.rmap[PT_PAGE_TABLE_LEVEL - 1]; | ||
4481 | last_index = gfn_to_index(last_gfn, memslot->base_gfn, | ||
4482 | PT_PAGE_TABLE_LEVEL); | ||
4483 | |||
4484 | for (index = 0; index <= last_index; ++index, ++rmapp) { | ||
4485 | if (*rmapp) | ||
4486 | flush |= __rmap_clear_dirty(kvm, rmapp); | ||
4487 | |||
4488 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) | ||
4489 | cond_resched_lock(&kvm->mmu_lock); | ||
4490 | } | ||
4491 | |||
4492 | spin_unlock(&kvm->mmu_lock); | ||
4493 | |||
4494 | lockdep_assert_held(&kvm->slots_lock); | ||
4495 | |||
4496 | /* | ||
4497 | * It's also safe to flush TLBs out of mmu lock here as currently this | ||
4498 | * function is only used for dirty logging, in which case flushing TLB | ||
4499 | * out of mmu lock also guarantees no dirty pages will be lost in | ||
4500 | * dirty_bitmap. | ||
4501 | */ | ||
4502 | if (flush) | ||
4503 | kvm_flush_remote_tlbs(kvm); | ||
4504 | } | ||
4505 | EXPORT_SYMBOL_GPL(kvm_mmu_slot_leaf_clear_dirty); | ||
4506 | |||
4507 | void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm, | ||
4508 | struct kvm_memory_slot *memslot) | ||
4509 | { | ||
4510 | gfn_t last_gfn; | ||
4511 | int i; | ||
4512 | bool flush = false; | ||
4513 | |||
4514 | last_gfn = memslot->base_gfn + memslot->npages - 1; | ||
4515 | |||
4516 | spin_lock(&kvm->mmu_lock); | ||
4517 | |||
4518 | for (i = PT_PAGE_TABLE_LEVEL + 1; /* skip rmap for 4K page */ | ||
4519 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | ||
4520 | unsigned long *rmapp; | ||
4521 | unsigned long last_index, index; | ||
4522 | |||
4523 | rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL]; | ||
4524 | last_index = gfn_to_index(last_gfn, memslot->base_gfn, i); | ||
4525 | |||
4526 | for (index = 0; index <= last_index; ++index, ++rmapp) { | ||
4527 | if (*rmapp) | ||
4528 | flush |= __rmap_write_protect(kvm, rmapp, | ||
4529 | false); | ||
4530 | |||
4531 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) | ||
4532 | cond_resched_lock(&kvm->mmu_lock); | ||
4533 | } | ||
4534 | } | ||
4535 | spin_unlock(&kvm->mmu_lock); | ||
4536 | |||
4537 | /* see kvm_mmu_slot_remove_write_access */ | ||
4538 | lockdep_assert_held(&kvm->slots_lock); | ||
4539 | |||
4540 | if (flush) | ||
4541 | kvm_flush_remote_tlbs(kvm); | ||
4542 | } | ||
4543 | EXPORT_SYMBOL_GPL(kvm_mmu_slot_largepage_remove_write_access); | ||
4544 | |||
4545 | void kvm_mmu_slot_set_dirty(struct kvm *kvm, | ||
4546 | struct kvm_memory_slot *memslot) | ||
4547 | { | ||
4548 | gfn_t last_gfn; | ||
4549 | int i; | ||
4550 | bool flush = false; | ||
4551 | |||
4552 | last_gfn = memslot->base_gfn + memslot->npages - 1; | ||
4553 | |||
4554 | spin_lock(&kvm->mmu_lock); | ||
4555 | |||
4556 | for (i = PT_PAGE_TABLE_LEVEL; | ||
4557 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | ||
4558 | unsigned long *rmapp; | ||
4559 | unsigned long last_index, index; | ||
4560 | |||
4561 | rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL]; | ||
4562 | last_index = gfn_to_index(last_gfn, memslot->base_gfn, i); | ||
4563 | |||
4564 | for (index = 0; index <= last_index; ++index, ++rmapp) { | ||
4565 | if (*rmapp) | ||
4566 | flush |= __rmap_set_dirty(kvm, rmapp); | ||
4567 | |||
4568 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) | ||
4569 | cond_resched_lock(&kvm->mmu_lock); | ||
4570 | } | ||
4571 | } | ||
4572 | |||
4573 | spin_unlock(&kvm->mmu_lock); | ||
4574 | |||
4575 | lockdep_assert_held(&kvm->slots_lock); | ||
4576 | |||
4577 | /* see kvm_mmu_slot_leaf_clear_dirty */ | ||
4578 | if (flush) | ||
4579 | kvm_flush_remote_tlbs(kvm); | ||
4356 | } | 4580 | } |
4581 | EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty); | ||
4357 | 4582 | ||
4358 | #define BATCH_ZAP_PAGES 10 | 4583 | #define BATCH_ZAP_PAGES 10 |
4359 | static void kvm_zap_obsolete_pages(struct kvm *kvm) | 4584 | static void kvm_zap_obsolete_pages(struct kvm *kvm) |
@@ -4606,8 +4831,6 @@ EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy); | |||
4606 | 4831 | ||
4607 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu) | 4832 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu) |
4608 | { | 4833 | { |
4609 | ASSERT(vcpu); | ||
4610 | |||
4611 | kvm_mmu_unload(vcpu); | 4834 | kvm_mmu_unload(vcpu); |
4612 | free_mmu_pages(vcpu); | 4835 | free_mmu_pages(vcpu); |
4613 | mmu_free_memory_caches(vcpu); | 4836 | mmu_free_memory_caches(vcpu); |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index bde8ee725754..c7d65637c851 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -44,18 +44,6 @@ | |||
44 | #define PT_DIRECTORY_LEVEL 2 | 44 | #define PT_DIRECTORY_LEVEL 2 |
45 | #define PT_PAGE_TABLE_LEVEL 1 | 45 | #define PT_PAGE_TABLE_LEVEL 1 |
46 | 46 | ||
47 | #define PFERR_PRESENT_BIT 0 | ||
48 | #define PFERR_WRITE_BIT 1 | ||
49 | #define PFERR_USER_BIT 2 | ||
50 | #define PFERR_RSVD_BIT 3 | ||
51 | #define PFERR_FETCH_BIT 4 | ||
52 | |||
53 | #define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT) | ||
54 | #define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT) | ||
55 | #define PFERR_USER_MASK (1U << PFERR_USER_BIT) | ||
56 | #define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT) | ||
57 | #define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT) | ||
58 | |||
59 | static inline u64 rsvd_bits(int s, int e) | 47 | static inline u64 rsvd_bits(int s, int e) |
60 | { | 48 | { |
61 | return ((1ULL << (e - s + 1)) - 1) << s; | 49 | return ((1ULL << (e - s + 1)) - 1) << s; |
@@ -81,9 +69,8 @@ enum { | |||
81 | }; | 69 | }; |
82 | 70 | ||
83 | int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); | 71 | int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); |
84 | void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); | 72 | void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); |
85 | void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, | 73 | void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly); |
86 | bool execonly); | ||
87 | void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | 74 | void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, |
88 | bool ept); | 75 | bool ept); |
89 | 76 | ||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 41dd0387cccb..a17d848c6d42 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -2003,8 +2003,8 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, | |||
2003 | 2003 | ||
2004 | static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) | 2004 | static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) |
2005 | { | 2005 | { |
2006 | kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu); | 2006 | WARN_ON(mmu_is_nested(vcpu)); |
2007 | 2007 | kvm_init_shadow_mmu(vcpu); | |
2008 | vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; | 2008 | vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; |
2009 | vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; | 2009 | vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; |
2010 | vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; | 2010 | vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index c2a34bb5ad93..7c7bc8bef21f 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -848,6 +848,24 @@ TRACE_EVENT(kvm_track_tsc, | |||
848 | 848 | ||
849 | #endif /* CONFIG_X86_64 */ | 849 | #endif /* CONFIG_X86_64 */ |
850 | 850 | ||
851 | /* | ||
852 | * Tracepoint for PML full VMEXIT. | ||
853 | */ | ||
854 | TRACE_EVENT(kvm_pml_full, | ||
855 | TP_PROTO(unsigned int vcpu_id), | ||
856 | TP_ARGS(vcpu_id), | ||
857 | |||
858 | TP_STRUCT__entry( | ||
859 | __field( unsigned int, vcpu_id ) | ||
860 | ), | ||
861 | |||
862 | TP_fast_assign( | ||
863 | __entry->vcpu_id = vcpu_id; | ||
864 | ), | ||
865 | |||
866 | TP_printk("vcpu %d: PML full", __entry->vcpu_id) | ||
867 | ); | ||
868 | |||
851 | TRACE_EVENT(kvm_ple_window, | 869 | TRACE_EVENT(kvm_ple_window, |
852 | TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old), | 870 | TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old), |
853 | TP_ARGS(grow, vcpu_id, new, old), | 871 | TP_ARGS(grow, vcpu_id, new, old), |
@@ -914,6 +932,26 @@ TRACE_EVENT(kvm_pvclock_update, | |||
914 | __entry->flags) | 932 | __entry->flags) |
915 | ); | 933 | ); |
916 | 934 | ||
935 | TRACE_EVENT(kvm_wait_lapic_expire, | ||
936 | TP_PROTO(unsigned int vcpu_id, s64 delta), | ||
937 | TP_ARGS(vcpu_id, delta), | ||
938 | |||
939 | TP_STRUCT__entry( | ||
940 | __field( unsigned int, vcpu_id ) | ||
941 | __field( s64, delta ) | ||
942 | ), | ||
943 | |||
944 | TP_fast_assign( | ||
945 | __entry->vcpu_id = vcpu_id; | ||
946 | __entry->delta = delta; | ||
947 | ), | ||
948 | |||
949 | TP_printk("vcpu %u: delta %lld (%s)", | ||
950 | __entry->vcpu_id, | ||
951 | __entry->delta, | ||
952 | __entry->delta < 0 ? "early" : "late") | ||
953 | ); | ||
954 | |||
917 | #endif /* _TRACE_KVM_H */ | 955 | #endif /* _TRACE_KVM_H */ |
918 | 956 | ||
919 | #undef TRACE_INCLUDE_PATH | 957 | #undef TRACE_INCLUDE_PATH |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d4c58d884838..3f73bfad0349 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <asm/perf_event.h> | 45 | #include <asm/perf_event.h> |
46 | #include <asm/debugreg.h> | 46 | #include <asm/debugreg.h> |
47 | #include <asm/kexec.h> | 47 | #include <asm/kexec.h> |
48 | #include <asm/apic.h> | ||
48 | 49 | ||
49 | #include "trace.h" | 50 | #include "trace.h" |
50 | 51 | ||
@@ -101,6 +102,9 @@ module_param(nested, bool, S_IRUGO); | |||
101 | 102 | ||
102 | static u64 __read_mostly host_xss; | 103 | static u64 __read_mostly host_xss; |
103 | 104 | ||
105 | static bool __read_mostly enable_pml = 1; | ||
106 | module_param_named(pml, enable_pml, bool, S_IRUGO); | ||
107 | |||
104 | #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) | 108 | #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) |
105 | #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) | 109 | #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) |
106 | #define KVM_VM_CR0_ALWAYS_ON \ | 110 | #define KVM_VM_CR0_ALWAYS_ON \ |
@@ -215,7 +219,12 @@ struct __packed vmcs12 { | |||
215 | u64 tsc_offset; | 219 | u64 tsc_offset; |
216 | u64 virtual_apic_page_addr; | 220 | u64 virtual_apic_page_addr; |
217 | u64 apic_access_addr; | 221 | u64 apic_access_addr; |
222 | u64 posted_intr_desc_addr; | ||
218 | u64 ept_pointer; | 223 | u64 ept_pointer; |
224 | u64 eoi_exit_bitmap0; | ||
225 | u64 eoi_exit_bitmap1; | ||
226 | u64 eoi_exit_bitmap2; | ||
227 | u64 eoi_exit_bitmap3; | ||
219 | u64 xss_exit_bitmap; | 228 | u64 xss_exit_bitmap; |
220 | u64 guest_physical_address; | 229 | u64 guest_physical_address; |
221 | u64 vmcs_link_pointer; | 230 | u64 vmcs_link_pointer; |
@@ -330,6 +339,7 @@ struct __packed vmcs12 { | |||
330 | u32 vmx_preemption_timer_value; | 339 | u32 vmx_preemption_timer_value; |
331 | u32 padding32[7]; /* room for future expansion */ | 340 | u32 padding32[7]; /* room for future expansion */ |
332 | u16 virtual_processor_id; | 341 | u16 virtual_processor_id; |
342 | u16 posted_intr_nv; | ||
333 | u16 guest_es_selector; | 343 | u16 guest_es_selector; |
334 | u16 guest_cs_selector; | 344 | u16 guest_cs_selector; |
335 | u16 guest_ss_selector; | 345 | u16 guest_ss_selector; |
@@ -338,6 +348,7 @@ struct __packed vmcs12 { | |||
338 | u16 guest_gs_selector; | 348 | u16 guest_gs_selector; |
339 | u16 guest_ldtr_selector; | 349 | u16 guest_ldtr_selector; |
340 | u16 guest_tr_selector; | 350 | u16 guest_tr_selector; |
351 | u16 guest_intr_status; | ||
341 | u16 host_es_selector; | 352 | u16 host_es_selector; |
342 | u16 host_cs_selector; | 353 | u16 host_cs_selector; |
343 | u16 host_ss_selector; | 354 | u16 host_ss_selector; |
@@ -401,6 +412,10 @@ struct nested_vmx { | |||
401 | */ | 412 | */ |
402 | struct page *apic_access_page; | 413 | struct page *apic_access_page; |
403 | struct page *virtual_apic_page; | 414 | struct page *virtual_apic_page; |
415 | struct page *pi_desc_page; | ||
416 | struct pi_desc *pi_desc; | ||
417 | bool pi_pending; | ||
418 | u16 posted_intr_nv; | ||
404 | u64 msr_ia32_feature_control; | 419 | u64 msr_ia32_feature_control; |
405 | 420 | ||
406 | struct hrtimer preemption_timer; | 421 | struct hrtimer preemption_timer; |
@@ -408,6 +423,23 @@ struct nested_vmx { | |||
408 | 423 | ||
409 | /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ | 424 | /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ |
410 | u64 vmcs01_debugctl; | 425 | u64 vmcs01_debugctl; |
426 | |||
427 | u32 nested_vmx_procbased_ctls_low; | ||
428 | u32 nested_vmx_procbased_ctls_high; | ||
429 | u32 nested_vmx_true_procbased_ctls_low; | ||
430 | u32 nested_vmx_secondary_ctls_low; | ||
431 | u32 nested_vmx_secondary_ctls_high; | ||
432 | u32 nested_vmx_pinbased_ctls_low; | ||
433 | u32 nested_vmx_pinbased_ctls_high; | ||
434 | u32 nested_vmx_exit_ctls_low; | ||
435 | u32 nested_vmx_exit_ctls_high; | ||
436 | u32 nested_vmx_true_exit_ctls_low; | ||
437 | u32 nested_vmx_entry_ctls_low; | ||
438 | u32 nested_vmx_entry_ctls_high; | ||
439 | u32 nested_vmx_true_entry_ctls_low; | ||
440 | u32 nested_vmx_misc_low; | ||
441 | u32 nested_vmx_misc_high; | ||
442 | u32 nested_vmx_ept_caps; | ||
411 | }; | 443 | }; |
412 | 444 | ||
413 | #define POSTED_INTR_ON 0 | 445 | #define POSTED_INTR_ON 0 |
@@ -511,6 +543,10 @@ struct vcpu_vmx { | |||
511 | /* Dynamic PLE window. */ | 543 | /* Dynamic PLE window. */ |
512 | int ple_window; | 544 | int ple_window; |
513 | bool ple_window_dirty; | 545 | bool ple_window_dirty; |
546 | |||
547 | /* Support for PML */ | ||
548 | #define PML_ENTITY_NUM 512 | ||
549 | struct page *pml_pg; | ||
514 | }; | 550 | }; |
515 | 551 | ||
516 | enum segment_cache_field { | 552 | enum segment_cache_field { |
@@ -594,6 +630,7 @@ static int max_shadow_read_write_fields = | |||
594 | 630 | ||
595 | static const unsigned short vmcs_field_to_offset_table[] = { | 631 | static const unsigned short vmcs_field_to_offset_table[] = { |
596 | FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), | 632 | FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), |
633 | FIELD(POSTED_INTR_NV, posted_intr_nv), | ||
597 | FIELD(GUEST_ES_SELECTOR, guest_es_selector), | 634 | FIELD(GUEST_ES_SELECTOR, guest_es_selector), |
598 | FIELD(GUEST_CS_SELECTOR, guest_cs_selector), | 635 | FIELD(GUEST_CS_SELECTOR, guest_cs_selector), |
599 | FIELD(GUEST_SS_SELECTOR, guest_ss_selector), | 636 | FIELD(GUEST_SS_SELECTOR, guest_ss_selector), |
@@ -602,6 +639,7 @@ static const unsigned short vmcs_field_to_offset_table[] = { | |||
602 | FIELD(GUEST_GS_SELECTOR, guest_gs_selector), | 639 | FIELD(GUEST_GS_SELECTOR, guest_gs_selector), |
603 | FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector), | 640 | FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector), |
604 | FIELD(GUEST_TR_SELECTOR, guest_tr_selector), | 641 | FIELD(GUEST_TR_SELECTOR, guest_tr_selector), |
642 | FIELD(GUEST_INTR_STATUS, guest_intr_status), | ||
605 | FIELD(HOST_ES_SELECTOR, host_es_selector), | 643 | FIELD(HOST_ES_SELECTOR, host_es_selector), |
606 | FIELD(HOST_CS_SELECTOR, host_cs_selector), | 644 | FIELD(HOST_CS_SELECTOR, host_cs_selector), |
607 | FIELD(HOST_SS_SELECTOR, host_ss_selector), | 645 | FIELD(HOST_SS_SELECTOR, host_ss_selector), |
@@ -618,7 +656,12 @@ static const unsigned short vmcs_field_to_offset_table[] = { | |||
618 | FIELD64(TSC_OFFSET, tsc_offset), | 656 | FIELD64(TSC_OFFSET, tsc_offset), |
619 | FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr), | 657 | FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr), |
620 | FIELD64(APIC_ACCESS_ADDR, apic_access_addr), | 658 | FIELD64(APIC_ACCESS_ADDR, apic_access_addr), |
659 | FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr), | ||
621 | FIELD64(EPT_POINTER, ept_pointer), | 660 | FIELD64(EPT_POINTER, ept_pointer), |
661 | FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0), | ||
662 | FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1), | ||
663 | FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2), | ||
664 | FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3), | ||
622 | FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap), | 665 | FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap), |
623 | FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), | 666 | FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), |
624 | FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), | 667 | FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), |
@@ -766,6 +809,7 @@ static void kvm_cpu_vmxon(u64 addr); | |||
766 | static void kvm_cpu_vmxoff(void); | 809 | static void kvm_cpu_vmxoff(void); |
767 | static bool vmx_mpx_supported(void); | 810 | static bool vmx_mpx_supported(void); |
768 | static bool vmx_xsaves_supported(void); | 811 | static bool vmx_xsaves_supported(void); |
812 | static int vmx_vm_has_apicv(struct kvm *kvm); | ||
769 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); | 813 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); |
770 | static void vmx_set_segment(struct kvm_vcpu *vcpu, | 814 | static void vmx_set_segment(struct kvm_vcpu *vcpu, |
771 | struct kvm_segment *var, int seg); | 815 | struct kvm_segment *var, int seg); |
@@ -793,6 +837,7 @@ static unsigned long *vmx_msr_bitmap_legacy; | |||
793 | static unsigned long *vmx_msr_bitmap_longmode; | 837 | static unsigned long *vmx_msr_bitmap_longmode; |
794 | static unsigned long *vmx_msr_bitmap_legacy_x2apic; | 838 | static unsigned long *vmx_msr_bitmap_legacy_x2apic; |
795 | static unsigned long *vmx_msr_bitmap_longmode_x2apic; | 839 | static unsigned long *vmx_msr_bitmap_longmode_x2apic; |
840 | static unsigned long *vmx_msr_bitmap_nested; | ||
796 | static unsigned long *vmx_vmread_bitmap; | 841 | static unsigned long *vmx_vmread_bitmap; |
797 | static unsigned long *vmx_vmwrite_bitmap; | 842 | static unsigned long *vmx_vmwrite_bitmap; |
798 | 843 | ||
@@ -959,16 +1004,6 @@ static inline bool cpu_has_vmx_ept_execute_only(void) | |||
959 | return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT; | 1004 | return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT; |
960 | } | 1005 | } |
961 | 1006 | ||
962 | static inline bool cpu_has_vmx_eptp_uncacheable(void) | ||
963 | { | ||
964 | return vmx_capability.ept & VMX_EPTP_UC_BIT; | ||
965 | } | ||
966 | |||
967 | static inline bool cpu_has_vmx_eptp_writeback(void) | ||
968 | { | ||
969 | return vmx_capability.ept & VMX_EPTP_WB_BIT; | ||
970 | } | ||
971 | |||
972 | static inline bool cpu_has_vmx_ept_2m_page(void) | 1007 | static inline bool cpu_has_vmx_ept_2m_page(void) |
973 | { | 1008 | { |
974 | return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT; | 1009 | return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT; |
@@ -1073,6 +1108,11 @@ static inline bool cpu_has_vmx_shadow_vmcs(void) | |||
1073 | SECONDARY_EXEC_SHADOW_VMCS; | 1108 | SECONDARY_EXEC_SHADOW_VMCS; |
1074 | } | 1109 | } |
1075 | 1110 | ||
1111 | static inline bool cpu_has_vmx_pml(void) | ||
1112 | { | ||
1113 | return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML; | ||
1114 | } | ||
1115 | |||
1076 | static inline bool report_flexpriority(void) | 1116 | static inline bool report_flexpriority(void) |
1077 | { | 1117 | { |
1078 | return flexpriority_enabled; | 1118 | return flexpriority_enabled; |
@@ -1112,6 +1152,26 @@ static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12) | |||
1112 | vmx_xsaves_supported(); | 1152 | vmx_xsaves_supported(); |
1113 | } | 1153 | } |
1114 | 1154 | ||
1155 | static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12) | ||
1156 | { | ||
1157 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); | ||
1158 | } | ||
1159 | |||
1160 | static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12) | ||
1161 | { | ||
1162 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT); | ||
1163 | } | ||
1164 | |||
1165 | static inline bool nested_cpu_has_vid(struct vmcs12 *vmcs12) | ||
1166 | { | ||
1167 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); | ||
1168 | } | ||
1169 | |||
1170 | static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12) | ||
1171 | { | ||
1172 | return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR; | ||
1173 | } | ||
1174 | |||
1115 | static inline bool is_exception(u32 intr_info) | 1175 | static inline bool is_exception(u32 intr_info) |
1116 | { | 1176 | { |
1117 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) | 1177 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) |
@@ -2284,20 +2344,8 @@ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu) | |||
2284 | * if the corresponding bit in the (32-bit) control field *must* be on, and a | 2344 | * if the corresponding bit in the (32-bit) control field *must* be on, and a |
2285 | * bit in the high half is on if the corresponding bit in the control field | 2345 | * bit in the high half is on if the corresponding bit in the control field |
2286 | * may be on. See also vmx_control_verify(). | 2346 | * may be on. See also vmx_control_verify(). |
2287 | * TODO: allow these variables to be modified (downgraded) by module options | ||
2288 | * or other means. | ||
2289 | */ | 2347 | */ |
2290 | static u32 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high; | 2348 | static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) |
2291 | static u32 nested_vmx_true_procbased_ctls_low; | ||
2292 | static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high; | ||
2293 | static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; | ||
2294 | static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; | ||
2295 | static u32 nested_vmx_true_exit_ctls_low; | ||
2296 | static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; | ||
2297 | static u32 nested_vmx_true_entry_ctls_low; | ||
2298 | static u32 nested_vmx_misc_low, nested_vmx_misc_high; | ||
2299 | static u32 nested_vmx_ept_caps; | ||
2300 | static __init void nested_vmx_setup_ctls_msrs(void) | ||
2301 | { | 2349 | { |
2302 | /* | 2350 | /* |
2303 | * Note that as a general rule, the high half of the MSRs (bits in | 2351 | * Note that as a general rule, the high half of the MSRs (bits in |
@@ -2316,57 +2364,74 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2316 | 2364 | ||
2317 | /* pin-based controls */ | 2365 | /* pin-based controls */ |
2318 | rdmsr(MSR_IA32_VMX_PINBASED_CTLS, | 2366 | rdmsr(MSR_IA32_VMX_PINBASED_CTLS, |
2319 | nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high); | 2367 | vmx->nested.nested_vmx_pinbased_ctls_low, |
2320 | nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; | 2368 | vmx->nested.nested_vmx_pinbased_ctls_high); |
2321 | nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | | 2369 | vmx->nested.nested_vmx_pinbased_ctls_low |= |
2322 | PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS; | 2370 | PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; |
2323 | nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | | 2371 | vmx->nested.nested_vmx_pinbased_ctls_high &= |
2372 | PIN_BASED_EXT_INTR_MASK | | ||
2373 | PIN_BASED_NMI_EXITING | | ||
2374 | PIN_BASED_VIRTUAL_NMIS; | ||
2375 | vmx->nested.nested_vmx_pinbased_ctls_high |= | ||
2376 | PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | | ||
2324 | PIN_BASED_VMX_PREEMPTION_TIMER; | 2377 | PIN_BASED_VMX_PREEMPTION_TIMER; |
2378 | if (vmx_vm_has_apicv(vmx->vcpu.kvm)) | ||
2379 | vmx->nested.nested_vmx_pinbased_ctls_high |= | ||
2380 | PIN_BASED_POSTED_INTR; | ||
2325 | 2381 | ||
2326 | /* exit controls */ | 2382 | /* exit controls */ |
2327 | rdmsr(MSR_IA32_VMX_EXIT_CTLS, | 2383 | rdmsr(MSR_IA32_VMX_EXIT_CTLS, |
2328 | nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high); | 2384 | vmx->nested.nested_vmx_exit_ctls_low, |
2329 | nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; | 2385 | vmx->nested.nested_vmx_exit_ctls_high); |
2386 | vmx->nested.nested_vmx_exit_ctls_low = | ||
2387 | VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; | ||
2330 | 2388 | ||
2331 | nested_vmx_exit_ctls_high &= | 2389 | vmx->nested.nested_vmx_exit_ctls_high &= |
2332 | #ifdef CONFIG_X86_64 | 2390 | #ifdef CONFIG_X86_64 |
2333 | VM_EXIT_HOST_ADDR_SPACE_SIZE | | 2391 | VM_EXIT_HOST_ADDR_SPACE_SIZE | |
2334 | #endif | 2392 | #endif |
2335 | VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; | 2393 | VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; |
2336 | nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | | 2394 | vmx->nested.nested_vmx_exit_ctls_high |= |
2395 | VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | | ||
2337 | VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | | 2396 | VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | |
2338 | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; | 2397 | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; |
2339 | 2398 | ||
2340 | if (vmx_mpx_supported()) | 2399 | if (vmx_mpx_supported()) |
2341 | nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; | 2400 | vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; |
2342 | 2401 | ||
2343 | /* We support free control of debug control saving. */ | 2402 | /* We support free control of debug control saving. */ |
2344 | nested_vmx_true_exit_ctls_low = nested_vmx_exit_ctls_low & | 2403 | vmx->nested.nested_vmx_true_exit_ctls_low = |
2404 | vmx->nested.nested_vmx_exit_ctls_low & | ||
2345 | ~VM_EXIT_SAVE_DEBUG_CONTROLS; | 2405 | ~VM_EXIT_SAVE_DEBUG_CONTROLS; |
2346 | 2406 | ||
2347 | /* entry controls */ | 2407 | /* entry controls */ |
2348 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, | 2408 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, |
2349 | nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); | 2409 | vmx->nested.nested_vmx_entry_ctls_low, |
2350 | nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; | 2410 | vmx->nested.nested_vmx_entry_ctls_high); |
2351 | nested_vmx_entry_ctls_high &= | 2411 | vmx->nested.nested_vmx_entry_ctls_low = |
2412 | VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; | ||
2413 | vmx->nested.nested_vmx_entry_ctls_high &= | ||
2352 | #ifdef CONFIG_X86_64 | 2414 | #ifdef CONFIG_X86_64 |
2353 | VM_ENTRY_IA32E_MODE | | 2415 | VM_ENTRY_IA32E_MODE | |
2354 | #endif | 2416 | #endif |
2355 | VM_ENTRY_LOAD_IA32_PAT; | 2417 | VM_ENTRY_LOAD_IA32_PAT; |
2356 | nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | | 2418 | vmx->nested.nested_vmx_entry_ctls_high |= |
2357 | VM_ENTRY_LOAD_IA32_EFER); | 2419 | (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); |
2358 | if (vmx_mpx_supported()) | 2420 | if (vmx_mpx_supported()) |
2359 | nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; | 2421 | vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; |
2360 | 2422 | ||
2361 | /* We support free control of debug control loading. */ | 2423 | /* We support free control of debug control loading. */ |
2362 | nested_vmx_true_entry_ctls_low = nested_vmx_entry_ctls_low & | 2424 | vmx->nested.nested_vmx_true_entry_ctls_low = |
2425 | vmx->nested.nested_vmx_entry_ctls_low & | ||
2363 | ~VM_ENTRY_LOAD_DEBUG_CONTROLS; | 2426 | ~VM_ENTRY_LOAD_DEBUG_CONTROLS; |
2364 | 2427 | ||
2365 | /* cpu-based controls */ | 2428 | /* cpu-based controls */ |
2366 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, | 2429 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, |
2367 | nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); | 2430 | vmx->nested.nested_vmx_procbased_ctls_low, |
2368 | nested_vmx_procbased_ctls_low = CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; | 2431 | vmx->nested.nested_vmx_procbased_ctls_high); |
2369 | nested_vmx_procbased_ctls_high &= | 2432 | vmx->nested.nested_vmx_procbased_ctls_low = |
2433 | CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; | ||
2434 | vmx->nested.nested_vmx_procbased_ctls_high &= | ||
2370 | CPU_BASED_VIRTUAL_INTR_PENDING | | 2435 | CPU_BASED_VIRTUAL_INTR_PENDING | |
2371 | CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING | | 2436 | CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING | |
2372 | CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING | | 2437 | CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING | |
@@ -2386,45 +2451,55 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2386 | * can use it to avoid exits to L1 - even when L0 runs L2 | 2451 | * can use it to avoid exits to L1 - even when L0 runs L2 |
2387 | * without MSR bitmaps. | 2452 | * without MSR bitmaps. |
2388 | */ | 2453 | */ |
2389 | nested_vmx_procbased_ctls_high |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR | | 2454 | vmx->nested.nested_vmx_procbased_ctls_high |= |
2455 | CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR | | ||
2390 | CPU_BASED_USE_MSR_BITMAPS; | 2456 | CPU_BASED_USE_MSR_BITMAPS; |
2391 | 2457 | ||
2392 | /* We support free control of CR3 access interception. */ | 2458 | /* We support free control of CR3 access interception. */ |
2393 | nested_vmx_true_procbased_ctls_low = nested_vmx_procbased_ctls_low & | 2459 | vmx->nested.nested_vmx_true_procbased_ctls_low = |
2460 | vmx->nested.nested_vmx_procbased_ctls_low & | ||
2394 | ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); | 2461 | ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); |
2395 | 2462 | ||
2396 | /* secondary cpu-based controls */ | 2463 | /* secondary cpu-based controls */ |
2397 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, | 2464 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, |
2398 | nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high); | 2465 | vmx->nested.nested_vmx_secondary_ctls_low, |
2399 | nested_vmx_secondary_ctls_low = 0; | 2466 | vmx->nested.nested_vmx_secondary_ctls_high); |
2400 | nested_vmx_secondary_ctls_high &= | 2467 | vmx->nested.nested_vmx_secondary_ctls_low = 0; |
2468 | vmx->nested.nested_vmx_secondary_ctls_high &= | ||
2401 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 2469 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
2470 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | | ||
2471 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | ||
2472 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | | ||
2402 | SECONDARY_EXEC_WBINVD_EXITING | | 2473 | SECONDARY_EXEC_WBINVD_EXITING | |
2403 | SECONDARY_EXEC_XSAVES; | 2474 | SECONDARY_EXEC_XSAVES; |
2404 | 2475 | ||
2405 | if (enable_ept) { | 2476 | if (enable_ept) { |
2406 | /* nested EPT: emulate EPT also to L1 */ | 2477 | /* nested EPT: emulate EPT also to L1 */ |
2407 | nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT | | 2478 | vmx->nested.nested_vmx_secondary_ctls_high |= |
2479 | SECONDARY_EXEC_ENABLE_EPT | | ||
2408 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 2480 | SECONDARY_EXEC_UNRESTRICTED_GUEST; |
2409 | nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | | 2481 | vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | |
2410 | VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | | 2482 | VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | |
2411 | VMX_EPT_INVEPT_BIT; | 2483 | VMX_EPT_INVEPT_BIT; |
2412 | nested_vmx_ept_caps &= vmx_capability.ept; | 2484 | vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept; |
2413 | /* | 2485 | /* |
2414 | * For nested guests, we don't do anything specific | 2486 | * For nested guests, we don't do anything specific |
2415 | * for single context invalidation. Hence, only advertise | 2487 | * for single context invalidation. Hence, only advertise |
2416 | * support for global context invalidation. | 2488 | * support for global context invalidation. |
2417 | */ | 2489 | */ |
2418 | nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT; | 2490 | vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT; |
2419 | } else | 2491 | } else |
2420 | nested_vmx_ept_caps = 0; | 2492 | vmx->nested.nested_vmx_ept_caps = 0; |
2421 | 2493 | ||
2422 | /* miscellaneous data */ | 2494 | /* miscellaneous data */ |
2423 | rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); | 2495 | rdmsr(MSR_IA32_VMX_MISC, |
2424 | nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA; | 2496 | vmx->nested.nested_vmx_misc_low, |
2425 | nested_vmx_misc_low |= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | | 2497 | vmx->nested.nested_vmx_misc_high); |
2498 | vmx->nested.nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA; | ||
2499 | vmx->nested.nested_vmx_misc_low |= | ||
2500 | VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | | ||
2426 | VMX_MISC_ACTIVITY_HLT; | 2501 | VMX_MISC_ACTIVITY_HLT; |
2427 | nested_vmx_misc_high = 0; | 2502 | vmx->nested.nested_vmx_misc_high = 0; |
2428 | } | 2503 | } |
2429 | 2504 | ||
2430 | static inline bool vmx_control_verify(u32 control, u32 low, u32 high) | 2505 | static inline bool vmx_control_verify(u32 control, u32 low, u32 high) |
@@ -2443,6 +2518,8 @@ static inline u64 vmx_control_msr(u32 low, u32 high) | |||
2443 | /* Returns 0 on success, non-0 otherwise. */ | 2518 | /* Returns 0 on success, non-0 otherwise. */ |
2444 | static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | 2519 | static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) |
2445 | { | 2520 | { |
2521 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
2522 | |||
2446 | switch (msr_index) { | 2523 | switch (msr_index) { |
2447 | case MSR_IA32_VMX_BASIC: | 2524 | case MSR_IA32_VMX_BASIC: |
2448 | /* | 2525 | /* |
@@ -2457,36 +2534,44 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2457 | break; | 2534 | break; |
2458 | case MSR_IA32_VMX_TRUE_PINBASED_CTLS: | 2535 | case MSR_IA32_VMX_TRUE_PINBASED_CTLS: |
2459 | case MSR_IA32_VMX_PINBASED_CTLS: | 2536 | case MSR_IA32_VMX_PINBASED_CTLS: |
2460 | *pdata = vmx_control_msr(nested_vmx_pinbased_ctls_low, | 2537 | *pdata = vmx_control_msr( |
2461 | nested_vmx_pinbased_ctls_high); | 2538 | vmx->nested.nested_vmx_pinbased_ctls_low, |
2539 | vmx->nested.nested_vmx_pinbased_ctls_high); | ||
2462 | break; | 2540 | break; |
2463 | case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: | 2541 | case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: |
2464 | *pdata = vmx_control_msr(nested_vmx_true_procbased_ctls_low, | 2542 | *pdata = vmx_control_msr( |
2465 | nested_vmx_procbased_ctls_high); | 2543 | vmx->nested.nested_vmx_true_procbased_ctls_low, |
2544 | vmx->nested.nested_vmx_procbased_ctls_high); | ||
2466 | break; | 2545 | break; |
2467 | case MSR_IA32_VMX_PROCBASED_CTLS: | 2546 | case MSR_IA32_VMX_PROCBASED_CTLS: |
2468 | *pdata = vmx_control_msr(nested_vmx_procbased_ctls_low, | 2547 | *pdata = vmx_control_msr( |
2469 | nested_vmx_procbased_ctls_high); | 2548 | vmx->nested.nested_vmx_procbased_ctls_low, |
2549 | vmx->nested.nested_vmx_procbased_ctls_high); | ||
2470 | break; | 2550 | break; |
2471 | case MSR_IA32_VMX_TRUE_EXIT_CTLS: | 2551 | case MSR_IA32_VMX_TRUE_EXIT_CTLS: |
2472 | *pdata = vmx_control_msr(nested_vmx_true_exit_ctls_low, | 2552 | *pdata = vmx_control_msr( |
2473 | nested_vmx_exit_ctls_high); | 2553 | vmx->nested.nested_vmx_true_exit_ctls_low, |
2554 | vmx->nested.nested_vmx_exit_ctls_high); | ||
2474 | break; | 2555 | break; |
2475 | case MSR_IA32_VMX_EXIT_CTLS: | 2556 | case MSR_IA32_VMX_EXIT_CTLS: |
2476 | *pdata = vmx_control_msr(nested_vmx_exit_ctls_low, | 2557 | *pdata = vmx_control_msr( |
2477 | nested_vmx_exit_ctls_high); | 2558 | vmx->nested.nested_vmx_exit_ctls_low, |
2559 | vmx->nested.nested_vmx_exit_ctls_high); | ||
2478 | break; | 2560 | break; |
2479 | case MSR_IA32_VMX_TRUE_ENTRY_CTLS: | 2561 | case MSR_IA32_VMX_TRUE_ENTRY_CTLS: |
2480 | *pdata = vmx_control_msr(nested_vmx_true_entry_ctls_low, | 2562 | *pdata = vmx_control_msr( |
2481 | nested_vmx_entry_ctls_high); | 2563 | vmx->nested.nested_vmx_true_entry_ctls_low, |
2564 | vmx->nested.nested_vmx_entry_ctls_high); | ||
2482 | break; | 2565 | break; |
2483 | case MSR_IA32_VMX_ENTRY_CTLS: | 2566 | case MSR_IA32_VMX_ENTRY_CTLS: |
2484 | *pdata = vmx_control_msr(nested_vmx_entry_ctls_low, | 2567 | *pdata = vmx_control_msr( |
2485 | nested_vmx_entry_ctls_high); | 2568 | vmx->nested.nested_vmx_entry_ctls_low, |
2569 | vmx->nested.nested_vmx_entry_ctls_high); | ||
2486 | break; | 2570 | break; |
2487 | case MSR_IA32_VMX_MISC: | 2571 | case MSR_IA32_VMX_MISC: |
2488 | *pdata = vmx_control_msr(nested_vmx_misc_low, | 2572 | *pdata = vmx_control_msr( |
2489 | nested_vmx_misc_high); | 2573 | vmx->nested.nested_vmx_misc_low, |
2574 | vmx->nested.nested_vmx_misc_high); | ||
2490 | break; | 2575 | break; |
2491 | /* | 2576 | /* |
2492 | * These MSRs specify bits which the guest must keep fixed (on or off) | 2577 | * These MSRs specify bits which the guest must keep fixed (on or off) |
@@ -2511,12 +2596,13 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2511 | *pdata = 0x2e; /* highest index: VMX_PREEMPTION_TIMER_VALUE */ | 2596 | *pdata = 0x2e; /* highest index: VMX_PREEMPTION_TIMER_VALUE */ |
2512 | break; | 2597 | break; |
2513 | case MSR_IA32_VMX_PROCBASED_CTLS2: | 2598 | case MSR_IA32_VMX_PROCBASED_CTLS2: |
2514 | *pdata = vmx_control_msr(nested_vmx_secondary_ctls_low, | 2599 | *pdata = vmx_control_msr( |
2515 | nested_vmx_secondary_ctls_high); | 2600 | vmx->nested.nested_vmx_secondary_ctls_low, |
2601 | vmx->nested.nested_vmx_secondary_ctls_high); | ||
2516 | break; | 2602 | break; |
2517 | case MSR_IA32_VMX_EPT_VPID_CAP: | 2603 | case MSR_IA32_VMX_EPT_VPID_CAP: |
2518 | /* Currently, no nested vpid support */ | 2604 | /* Currently, no nested vpid support */ |
2519 | *pdata = nested_vmx_ept_caps; | 2605 | *pdata = vmx->nested.nested_vmx_ept_caps; |
2520 | break; | 2606 | break; |
2521 | default: | 2607 | default: |
2522 | return 1; | 2608 | return 1; |
@@ -2929,7 +3015,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2929 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | 3015 | SECONDARY_EXEC_APIC_REGISTER_VIRT | |
2930 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | | 3016 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
2931 | SECONDARY_EXEC_SHADOW_VMCS | | 3017 | SECONDARY_EXEC_SHADOW_VMCS | |
2932 | SECONDARY_EXEC_XSAVES; | 3018 | SECONDARY_EXEC_XSAVES | |
3019 | SECONDARY_EXEC_ENABLE_PML; | ||
2933 | if (adjust_vmx_controls(min2, opt2, | 3020 | if (adjust_vmx_controls(min2, opt2, |
2934 | MSR_IA32_VMX_PROCBASED_CTLS2, | 3021 | MSR_IA32_VMX_PROCBASED_CTLS2, |
2935 | &_cpu_based_2nd_exec_control) < 0) | 3022 | &_cpu_based_2nd_exec_control) < 0) |
@@ -4159,6 +4246,52 @@ static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, | |||
4159 | } | 4246 | } |
4160 | } | 4247 | } |
4161 | 4248 | ||
4249 | /* | ||
4250 | * If a msr is allowed by L0, we should check whether it is allowed by L1. | ||
4251 | * The corresponding bit will be cleared unless both of L0 and L1 allow it. | ||
4252 | */ | ||
4253 | static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, | ||
4254 | unsigned long *msr_bitmap_nested, | ||
4255 | u32 msr, int type) | ||
4256 | { | ||
4257 | int f = sizeof(unsigned long); | ||
4258 | |||
4259 | if (!cpu_has_vmx_msr_bitmap()) { | ||
4260 | WARN_ON(1); | ||
4261 | return; | ||
4262 | } | ||
4263 | |||
4264 | /* | ||
4265 | * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals | ||
4266 | * have the write-low and read-high bitmap offsets the wrong way round. | ||
4267 | * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. | ||
4268 | */ | ||
4269 | if (msr <= 0x1fff) { | ||
4270 | if (type & MSR_TYPE_R && | ||
4271 | !test_bit(msr, msr_bitmap_l1 + 0x000 / f)) | ||
4272 | /* read-low */ | ||
4273 | __clear_bit(msr, msr_bitmap_nested + 0x000 / f); | ||
4274 | |||
4275 | if (type & MSR_TYPE_W && | ||
4276 | !test_bit(msr, msr_bitmap_l1 + 0x800 / f)) | ||
4277 | /* write-low */ | ||
4278 | __clear_bit(msr, msr_bitmap_nested + 0x800 / f); | ||
4279 | |||
4280 | } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { | ||
4281 | msr &= 0x1fff; | ||
4282 | if (type & MSR_TYPE_R && | ||
4283 | !test_bit(msr, msr_bitmap_l1 + 0x400 / f)) | ||
4284 | /* read-high */ | ||
4285 | __clear_bit(msr, msr_bitmap_nested + 0x400 / f); | ||
4286 | |||
4287 | if (type & MSR_TYPE_W && | ||
4288 | !test_bit(msr, msr_bitmap_l1 + 0xc00 / f)) | ||
4289 | /* write-high */ | ||
4290 | __clear_bit(msr, msr_bitmap_nested + 0xc00 / f); | ||
4291 | |||
4292 | } | ||
4293 | } | ||
4294 | |||
4162 | static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) | 4295 | static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) |
4163 | { | 4296 | { |
4164 | if (!longmode_only) | 4297 | if (!longmode_only) |
@@ -4197,6 +4330,64 @@ static int vmx_vm_has_apicv(struct kvm *kvm) | |||
4197 | return enable_apicv && irqchip_in_kernel(kvm); | 4330 | return enable_apicv && irqchip_in_kernel(kvm); |
4198 | } | 4331 | } |
4199 | 4332 | ||
4333 | static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) | ||
4334 | { | ||
4335 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
4336 | int max_irr; | ||
4337 | void *vapic_page; | ||
4338 | u16 status; | ||
4339 | |||
4340 | if (vmx->nested.pi_desc && | ||
4341 | vmx->nested.pi_pending) { | ||
4342 | vmx->nested.pi_pending = false; | ||
4343 | if (!pi_test_and_clear_on(vmx->nested.pi_desc)) | ||
4344 | return 0; | ||
4345 | |||
4346 | max_irr = find_last_bit( | ||
4347 | (unsigned long *)vmx->nested.pi_desc->pir, 256); | ||
4348 | |||
4349 | if (max_irr == 256) | ||
4350 | return 0; | ||
4351 | |||
4352 | vapic_page = kmap(vmx->nested.virtual_apic_page); | ||
4353 | if (!vapic_page) { | ||
4354 | WARN_ON(1); | ||
4355 | return -ENOMEM; | ||
4356 | } | ||
4357 | __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); | ||
4358 | kunmap(vmx->nested.virtual_apic_page); | ||
4359 | |||
4360 | status = vmcs_read16(GUEST_INTR_STATUS); | ||
4361 | if ((u8)max_irr > ((u8)status & 0xff)) { | ||
4362 | status &= ~0xff; | ||
4363 | status |= (u8)max_irr; | ||
4364 | vmcs_write16(GUEST_INTR_STATUS, status); | ||
4365 | } | ||
4366 | } | ||
4367 | return 0; | ||
4368 | } | ||
4369 | |||
4370 | static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, | ||
4371 | int vector) | ||
4372 | { | ||
4373 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
4374 | |||
4375 | if (is_guest_mode(vcpu) && | ||
4376 | vector == vmx->nested.posted_intr_nv) { | ||
4377 | /* the PIR and ON have been set by L1. */ | ||
4378 | if (vcpu->mode == IN_GUEST_MODE) | ||
4379 | apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), | ||
4380 | POSTED_INTR_VECTOR); | ||
4381 | /* | ||
4382 | * If a posted intr is not recognized by hardware, | ||
4383 | * we will accomplish it in the next vmentry. | ||
4384 | */ | ||
4385 | vmx->nested.pi_pending = true; | ||
4386 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
4387 | return 0; | ||
4388 | } | ||
4389 | return -1; | ||
4390 | } | ||
4200 | /* | 4391 | /* |
4201 | * Send interrupt to vcpu via posted interrupt way. | 4392 | * Send interrupt to vcpu via posted interrupt way. |
4202 | * 1. If target vcpu is running(non-root mode), send posted interrupt | 4393 | * 1. If target vcpu is running(non-root mode), send posted interrupt |
@@ -4209,6 +4400,10 @@ static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) | |||
4209 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4400 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
4210 | int r; | 4401 | int r; |
4211 | 4402 | ||
4403 | r = vmx_deliver_nested_posted_interrupt(vcpu, vector); | ||
4404 | if (!r) | ||
4405 | return; | ||
4406 | |||
4212 | if (pi_test_and_set_pir(vector, &vmx->pi_desc)) | 4407 | if (pi_test_and_set_pir(vector, &vmx->pi_desc)) |
4213 | return; | 4408 | return; |
4214 | 4409 | ||
@@ -4360,6 +4555,9 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | |||
4360 | a current VMCS12 | 4555 | a current VMCS12 |
4361 | */ | 4556 | */ |
4362 | exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; | 4557 | exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; |
4558 | /* PML is enabled/disabled in creating/destorying vcpu */ | ||
4559 | exec_control &= ~SECONDARY_EXEC_ENABLE_PML; | ||
4560 | |||
4363 | return exec_control; | 4561 | return exec_control; |
4364 | } | 4562 | } |
4365 | 4563 | ||
@@ -4986,11 +5184,12 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
4986 | hypercall[2] = 0xc1; | 5184 | hypercall[2] = 0xc1; |
4987 | } | 5185 | } |
4988 | 5186 | ||
4989 | static bool nested_cr0_valid(struct vmcs12 *vmcs12, unsigned long val) | 5187 | static bool nested_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) |
4990 | { | 5188 | { |
4991 | unsigned long always_on = VMXON_CR0_ALWAYSON; | 5189 | unsigned long always_on = VMXON_CR0_ALWAYSON; |
5190 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
4992 | 5191 | ||
4993 | if (nested_vmx_secondary_ctls_high & | 5192 | if (to_vmx(vcpu)->nested.nested_vmx_secondary_ctls_high & |
4994 | SECONDARY_EXEC_UNRESTRICTED_GUEST && | 5193 | SECONDARY_EXEC_UNRESTRICTED_GUEST && |
4995 | nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST)) | 5194 | nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST)) |
4996 | always_on &= ~(X86_CR0_PE | X86_CR0_PG); | 5195 | always_on &= ~(X86_CR0_PE | X86_CR0_PG); |
@@ -5015,7 +5214,7 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) | |||
5015 | val = (val & ~vmcs12->cr0_guest_host_mask) | | 5214 | val = (val & ~vmcs12->cr0_guest_host_mask) | |
5016 | (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); | 5215 | (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); |
5017 | 5216 | ||
5018 | if (!nested_cr0_valid(vmcs12, val)) | 5217 | if (!nested_cr0_valid(vcpu, val)) |
5019 | return 1; | 5218 | return 1; |
5020 | 5219 | ||
5021 | if (kvm_set_cr0(vcpu, val)) | 5220 | if (kvm_set_cr0(vcpu, val)) |
@@ -5817,13 +6016,21 @@ static __init int hardware_setup(void) | |||
5817 | (unsigned long *)__get_free_page(GFP_KERNEL); | 6016 | (unsigned long *)__get_free_page(GFP_KERNEL); |
5818 | if (!vmx_msr_bitmap_longmode_x2apic) | 6017 | if (!vmx_msr_bitmap_longmode_x2apic) |
5819 | goto out4; | 6018 | goto out4; |
6019 | |||
6020 | if (nested) { | ||
6021 | vmx_msr_bitmap_nested = | ||
6022 | (unsigned long *)__get_free_page(GFP_KERNEL); | ||
6023 | if (!vmx_msr_bitmap_nested) | ||
6024 | goto out5; | ||
6025 | } | ||
6026 | |||
5820 | vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); | 6027 | vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); |
5821 | if (!vmx_vmread_bitmap) | 6028 | if (!vmx_vmread_bitmap) |
5822 | goto out5; | 6029 | goto out6; |
5823 | 6030 | ||
5824 | vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); | 6031 | vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); |
5825 | if (!vmx_vmwrite_bitmap) | 6032 | if (!vmx_vmwrite_bitmap) |
5826 | goto out6; | 6033 | goto out7; |
5827 | 6034 | ||
5828 | memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); | 6035 | memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); |
5829 | memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); | 6036 | memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); |
@@ -5839,10 +6046,12 @@ static __init int hardware_setup(void) | |||
5839 | 6046 | ||
5840 | memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); | 6047 | memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); |
5841 | memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); | 6048 | memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); |
6049 | if (nested) | ||
6050 | memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE); | ||
5842 | 6051 | ||
5843 | if (setup_vmcs_config(&vmcs_config) < 0) { | 6052 | if (setup_vmcs_config(&vmcs_config) < 0) { |
5844 | r = -EIO; | 6053 | r = -EIO; |
5845 | goto out7; | 6054 | goto out8; |
5846 | } | 6055 | } |
5847 | 6056 | ||
5848 | if (boot_cpu_has(X86_FEATURE_NX)) | 6057 | if (boot_cpu_has(X86_FEATURE_NX)) |
@@ -5868,16 +6077,16 @@ static __init int hardware_setup(void) | |||
5868 | if (!cpu_has_vmx_unrestricted_guest()) | 6077 | if (!cpu_has_vmx_unrestricted_guest()) |
5869 | enable_unrestricted_guest = 0; | 6078 | enable_unrestricted_guest = 0; |
5870 | 6079 | ||
5871 | if (!cpu_has_vmx_flexpriority()) { | 6080 | if (!cpu_has_vmx_flexpriority()) |
5872 | flexpriority_enabled = 0; | 6081 | flexpriority_enabled = 0; |
5873 | 6082 | ||
5874 | /* | 6083 | /* |
5875 | * set_apic_access_page_addr() is used to reload apic access | 6084 | * set_apic_access_page_addr() is used to reload apic access |
5876 | * page upon invalidation. No need to do anything if the | 6085 | * page upon invalidation. No need to do anything if not |
5877 | * processor does not have the APIC_ACCESS_ADDR VMCS field. | 6086 | * using the APIC_ACCESS_ADDR VMCS field. |
5878 | */ | 6087 | */ |
6088 | if (!flexpriority_enabled) | ||
5879 | kvm_x86_ops->set_apic_access_page_addr = NULL; | 6089 | kvm_x86_ops->set_apic_access_page_addr = NULL; |
5880 | } | ||
5881 | 6090 | ||
5882 | if (!cpu_has_vmx_tpr_shadow()) | 6091 | if (!cpu_has_vmx_tpr_shadow()) |
5883 | kvm_x86_ops->update_cr8_intercept = NULL; | 6092 | kvm_x86_ops->update_cr8_intercept = NULL; |
@@ -5895,13 +6104,11 @@ static __init int hardware_setup(void) | |||
5895 | kvm_x86_ops->update_cr8_intercept = NULL; | 6104 | kvm_x86_ops->update_cr8_intercept = NULL; |
5896 | else { | 6105 | else { |
5897 | kvm_x86_ops->hwapic_irr_update = NULL; | 6106 | kvm_x86_ops->hwapic_irr_update = NULL; |
6107 | kvm_x86_ops->hwapic_isr_update = NULL; | ||
5898 | kvm_x86_ops->deliver_posted_interrupt = NULL; | 6108 | kvm_x86_ops->deliver_posted_interrupt = NULL; |
5899 | kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; | 6109 | kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; |
5900 | } | 6110 | } |
5901 | 6111 | ||
5902 | if (nested) | ||
5903 | nested_vmx_setup_ctls_msrs(); | ||
5904 | |||
5905 | vmx_disable_intercept_for_msr(MSR_FS_BASE, false); | 6112 | vmx_disable_intercept_for_msr(MSR_FS_BASE, false); |
5906 | vmx_disable_intercept_for_msr(MSR_GS_BASE, false); | 6113 | vmx_disable_intercept_for_msr(MSR_GS_BASE, false); |
5907 | vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); | 6114 | vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); |
@@ -5945,12 +6152,29 @@ static __init int hardware_setup(void) | |||
5945 | 6152 | ||
5946 | update_ple_window_actual_max(); | 6153 | update_ple_window_actual_max(); |
5947 | 6154 | ||
6155 | /* | ||
6156 | * Only enable PML when hardware supports PML feature, and both EPT | ||
6157 | * and EPT A/D bit features are enabled -- PML depends on them to work. | ||
6158 | */ | ||
6159 | if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml()) | ||
6160 | enable_pml = 0; | ||
6161 | |||
6162 | if (!enable_pml) { | ||
6163 | kvm_x86_ops->slot_enable_log_dirty = NULL; | ||
6164 | kvm_x86_ops->slot_disable_log_dirty = NULL; | ||
6165 | kvm_x86_ops->flush_log_dirty = NULL; | ||
6166 | kvm_x86_ops->enable_log_dirty_pt_masked = NULL; | ||
6167 | } | ||
6168 | |||
5948 | return alloc_kvm_area(); | 6169 | return alloc_kvm_area(); |
5949 | 6170 | ||
5950 | out7: | 6171 | out8: |
5951 | free_page((unsigned long)vmx_vmwrite_bitmap); | 6172 | free_page((unsigned long)vmx_vmwrite_bitmap); |
5952 | out6: | 6173 | out7: |
5953 | free_page((unsigned long)vmx_vmread_bitmap); | 6174 | free_page((unsigned long)vmx_vmread_bitmap); |
6175 | out6: | ||
6176 | if (nested) | ||
6177 | free_page((unsigned long)vmx_msr_bitmap_nested); | ||
5954 | out5: | 6178 | out5: |
5955 | free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); | 6179 | free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); |
5956 | out4: | 6180 | out4: |
@@ -5977,6 +6201,8 @@ static __exit void hardware_unsetup(void) | |||
5977 | free_page((unsigned long)vmx_io_bitmap_a); | 6201 | free_page((unsigned long)vmx_io_bitmap_a); |
5978 | free_page((unsigned long)vmx_vmwrite_bitmap); | 6202 | free_page((unsigned long)vmx_vmwrite_bitmap); |
5979 | free_page((unsigned long)vmx_vmread_bitmap); | 6203 | free_page((unsigned long)vmx_vmread_bitmap); |
6204 | if (nested) | ||
6205 | free_page((unsigned long)vmx_msr_bitmap_nested); | ||
5980 | 6206 | ||
5981 | free_kvm_area(); | 6207 | free_kvm_area(); |
5982 | } | 6208 | } |
@@ -6143,6 +6369,13 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu, | |||
6143 | */ | 6369 | */ |
6144 | } | 6370 | } |
6145 | 6371 | ||
6372 | static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator) | ||
6373 | { | ||
6374 | /* TODO: not to reset guest simply here. */ | ||
6375 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); | ||
6376 | pr_warn("kvm: nested vmx abort, indicator %d\n", indicator); | ||
6377 | } | ||
6378 | |||
6146 | static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer) | 6379 | static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer) |
6147 | { | 6380 | { |
6148 | struct vcpu_vmx *vmx = | 6381 | struct vcpu_vmx *vmx = |
@@ -6432,6 +6665,7 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) | |||
6432 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 6665 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
6433 | vmcs_write64(VMCS_LINK_POINTER, -1ull); | 6666 | vmcs_write64(VMCS_LINK_POINTER, -1ull); |
6434 | } | 6667 | } |
6668 | vmx->nested.posted_intr_nv = -1; | ||
6435 | kunmap(vmx->nested.current_vmcs12_page); | 6669 | kunmap(vmx->nested.current_vmcs12_page); |
6436 | nested_release_page(vmx->nested.current_vmcs12_page); | 6670 | nested_release_page(vmx->nested.current_vmcs12_page); |
6437 | vmx->nested.current_vmptr = -1ull; | 6671 | vmx->nested.current_vmptr = -1ull; |
@@ -6460,6 +6694,12 @@ static void free_nested(struct vcpu_vmx *vmx) | |||
6460 | nested_release_page(vmx->nested.virtual_apic_page); | 6694 | nested_release_page(vmx->nested.virtual_apic_page); |
6461 | vmx->nested.virtual_apic_page = NULL; | 6695 | vmx->nested.virtual_apic_page = NULL; |
6462 | } | 6696 | } |
6697 | if (vmx->nested.pi_desc_page) { | ||
6698 | kunmap(vmx->nested.pi_desc_page); | ||
6699 | nested_release_page(vmx->nested.pi_desc_page); | ||
6700 | vmx->nested.pi_desc_page = NULL; | ||
6701 | vmx->nested.pi_desc = NULL; | ||
6702 | } | ||
6463 | 6703 | ||
6464 | nested_free_all_saved_vmcss(vmx); | 6704 | nested_free_all_saved_vmcss(vmx); |
6465 | } | 6705 | } |
@@ -6893,6 +7133,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) | |||
6893 | /* Emulate the INVEPT instruction */ | 7133 | /* Emulate the INVEPT instruction */ |
6894 | static int handle_invept(struct kvm_vcpu *vcpu) | 7134 | static int handle_invept(struct kvm_vcpu *vcpu) |
6895 | { | 7135 | { |
7136 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
6896 | u32 vmx_instruction_info, types; | 7137 | u32 vmx_instruction_info, types; |
6897 | unsigned long type; | 7138 | unsigned long type; |
6898 | gva_t gva; | 7139 | gva_t gva; |
@@ -6901,8 +7142,9 @@ static int handle_invept(struct kvm_vcpu *vcpu) | |||
6901 | u64 eptp, gpa; | 7142 | u64 eptp, gpa; |
6902 | } operand; | 7143 | } operand; |
6903 | 7144 | ||
6904 | if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) || | 7145 | if (!(vmx->nested.nested_vmx_secondary_ctls_high & |
6905 | !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) { | 7146 | SECONDARY_EXEC_ENABLE_EPT) || |
7147 | !(vmx->nested.nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) { | ||
6906 | kvm_queue_exception(vcpu, UD_VECTOR); | 7148 | kvm_queue_exception(vcpu, UD_VECTOR); |
6907 | return 1; | 7149 | return 1; |
6908 | } | 7150 | } |
@@ -6918,7 +7160,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) | |||
6918 | vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); | 7160 | vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); |
6919 | type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); | 7161 | type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); |
6920 | 7162 | ||
6921 | types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; | 7163 | types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; |
6922 | 7164 | ||
6923 | if (!(types & (1UL << type))) { | 7165 | if (!(types & (1UL << type))) { |
6924 | nested_vmx_failValid(vcpu, | 7166 | nested_vmx_failValid(vcpu, |
@@ -6960,6 +7202,31 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) | |||
6960 | return 1; | 7202 | return 1; |
6961 | } | 7203 | } |
6962 | 7204 | ||
7205 | static int handle_pml_full(struct kvm_vcpu *vcpu) | ||
7206 | { | ||
7207 | unsigned long exit_qualification; | ||
7208 | |||
7209 | trace_kvm_pml_full(vcpu->vcpu_id); | ||
7210 | |||
7211 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
7212 | |||
7213 | /* | ||
7214 | * PML buffer FULL happened while executing iret from NMI, | ||
7215 | * "blocked by NMI" bit has to be set before next VM entry. | ||
7216 | */ | ||
7217 | if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && | ||
7218 | cpu_has_virtual_nmis() && | ||
7219 | (exit_qualification & INTR_INFO_UNBLOCK_NMI)) | ||
7220 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
7221 | GUEST_INTR_STATE_NMI); | ||
7222 | |||
7223 | /* | ||
7224 | * PML buffer already flushed at beginning of VMEXIT. Nothing to do | ||
7225 | * here.., and there's no userspace involvement needed for PML. | ||
7226 | */ | ||
7227 | return 1; | ||
7228 | } | ||
7229 | |||
6963 | /* | 7230 | /* |
6964 | * The exit handlers return 1 if the exit was handled fully and guest execution | 7231 | * The exit handlers return 1 if the exit was handled fully and guest execution |
6965 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 7232 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
@@ -7008,6 +7275,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
7008 | [EXIT_REASON_INVVPID] = handle_invvpid, | 7275 | [EXIT_REASON_INVVPID] = handle_invvpid, |
7009 | [EXIT_REASON_XSAVES] = handle_xsaves, | 7276 | [EXIT_REASON_XSAVES] = handle_xsaves, |
7010 | [EXIT_REASON_XRSTORS] = handle_xrstors, | 7277 | [EXIT_REASON_XRSTORS] = handle_xrstors, |
7278 | [EXIT_REASON_PML_FULL] = handle_pml_full, | ||
7011 | }; | 7279 | }; |
7012 | 7280 | ||
7013 | static const int kvm_vmx_max_exit_handlers = | 7281 | static const int kvm_vmx_max_exit_handlers = |
@@ -7275,6 +7543,10 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
7275 | case EXIT_REASON_APIC_ACCESS: | 7543 | case EXIT_REASON_APIC_ACCESS: |
7276 | return nested_cpu_has2(vmcs12, | 7544 | return nested_cpu_has2(vmcs12, |
7277 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); | 7545 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); |
7546 | case EXIT_REASON_APIC_WRITE: | ||
7547 | case EXIT_REASON_EOI_INDUCED: | ||
7548 | /* apic_write and eoi_induced should exit unconditionally. */ | ||
7549 | return 1; | ||
7278 | case EXIT_REASON_EPT_VIOLATION: | 7550 | case EXIT_REASON_EPT_VIOLATION: |
7279 | /* | 7551 | /* |
7280 | * L0 always deals with the EPT violation. If nested EPT is | 7552 | * L0 always deals with the EPT violation. If nested EPT is |
@@ -7314,6 +7586,89 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) | |||
7314 | *info2 = vmcs_read32(VM_EXIT_INTR_INFO); | 7586 | *info2 = vmcs_read32(VM_EXIT_INTR_INFO); |
7315 | } | 7587 | } |
7316 | 7588 | ||
7589 | static int vmx_enable_pml(struct vcpu_vmx *vmx) | ||
7590 | { | ||
7591 | struct page *pml_pg; | ||
7592 | u32 exec_control; | ||
7593 | |||
7594 | pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
7595 | if (!pml_pg) | ||
7596 | return -ENOMEM; | ||
7597 | |||
7598 | vmx->pml_pg = pml_pg; | ||
7599 | |||
7600 | vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); | ||
7601 | vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); | ||
7602 | |||
7603 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
7604 | exec_control |= SECONDARY_EXEC_ENABLE_PML; | ||
7605 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | ||
7606 | |||
7607 | return 0; | ||
7608 | } | ||
7609 | |||
7610 | static void vmx_disable_pml(struct vcpu_vmx *vmx) | ||
7611 | { | ||
7612 | u32 exec_control; | ||
7613 | |||
7614 | ASSERT(vmx->pml_pg); | ||
7615 | __free_page(vmx->pml_pg); | ||
7616 | vmx->pml_pg = NULL; | ||
7617 | |||
7618 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
7619 | exec_control &= ~SECONDARY_EXEC_ENABLE_PML; | ||
7620 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | ||
7621 | } | ||
7622 | |||
7623 | static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx) | ||
7624 | { | ||
7625 | struct kvm *kvm = vmx->vcpu.kvm; | ||
7626 | u64 *pml_buf; | ||
7627 | u16 pml_idx; | ||
7628 | |||
7629 | pml_idx = vmcs_read16(GUEST_PML_INDEX); | ||
7630 | |||
7631 | /* Do nothing if PML buffer is empty */ | ||
7632 | if (pml_idx == (PML_ENTITY_NUM - 1)) | ||
7633 | return; | ||
7634 | |||
7635 | /* PML index always points to next available PML buffer entity */ | ||
7636 | if (pml_idx >= PML_ENTITY_NUM) | ||
7637 | pml_idx = 0; | ||
7638 | else | ||
7639 | pml_idx++; | ||
7640 | |||
7641 | pml_buf = page_address(vmx->pml_pg); | ||
7642 | for (; pml_idx < PML_ENTITY_NUM; pml_idx++) { | ||
7643 | u64 gpa; | ||
7644 | |||
7645 | gpa = pml_buf[pml_idx]; | ||
7646 | WARN_ON(gpa & (PAGE_SIZE - 1)); | ||
7647 | mark_page_dirty(kvm, gpa >> PAGE_SHIFT); | ||
7648 | } | ||
7649 | |||
7650 | /* reset PML index */ | ||
7651 | vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); | ||
7652 | } | ||
7653 | |||
7654 | /* | ||
7655 | * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap. | ||
7656 | * Called before reporting dirty_bitmap to userspace. | ||
7657 | */ | ||
7658 | static void kvm_flush_pml_buffers(struct kvm *kvm) | ||
7659 | { | ||
7660 | int i; | ||
7661 | struct kvm_vcpu *vcpu; | ||
7662 | /* | ||
7663 | * We only need to kick vcpu out of guest mode here, as PML buffer | ||
7664 | * is flushed at beginning of all VMEXITs, and it's obvious that only | ||
7665 | * vcpus running in guest are possible to have unflushed GPAs in PML | ||
7666 | * buffer. | ||
7667 | */ | ||
7668 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
7669 | kvm_vcpu_kick(vcpu); | ||
7670 | } | ||
7671 | |||
7317 | /* | 7672 | /* |
7318 | * The guest has exited. See if we can fix it or if we need userspace | 7673 | * The guest has exited. See if we can fix it or if we need userspace |
7319 | * assistance. | 7674 | * assistance. |
@@ -7324,6 +7679,16 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
7324 | u32 exit_reason = vmx->exit_reason; | 7679 | u32 exit_reason = vmx->exit_reason; |
7325 | u32 vectoring_info = vmx->idt_vectoring_info; | 7680 | u32 vectoring_info = vmx->idt_vectoring_info; |
7326 | 7681 | ||
7682 | /* | ||
7683 | * Flush logged GPAs PML buffer, this will make dirty_bitmap more | ||
7684 | * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before | ||
7685 | * querying dirty_bitmap, we only need to kick all vcpus out of guest | ||
7686 | * mode as if vcpus is in root mode, the PML buffer must has been | ||
7687 | * flushed already. | ||
7688 | */ | ||
7689 | if (enable_pml) | ||
7690 | vmx_flush_pml_buffer(vmx); | ||
7691 | |||
7327 | /* If guest state is invalid, start emulating */ | 7692 | /* If guest state is invalid, start emulating */ |
7328 | if (vmx->emulation_required) | 7693 | if (vmx->emulation_required) |
7329 | return handle_invalid_guest_state(vcpu); | 7694 | return handle_invalid_guest_state(vcpu); |
@@ -7471,9 +7836,6 @@ static void vmx_hwapic_isr_update(struct kvm *kvm, int isr) | |||
7471 | u16 status; | 7836 | u16 status; |
7472 | u8 old; | 7837 | u8 old; |
7473 | 7838 | ||
7474 | if (!vmx_vm_has_apicv(kvm)) | ||
7475 | return; | ||
7476 | |||
7477 | if (isr == -1) | 7839 | if (isr == -1) |
7478 | isr = 0; | 7840 | isr = 0; |
7479 | 7841 | ||
@@ -7973,6 +8335,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
7973 | { | 8335 | { |
7974 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 8336 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
7975 | 8337 | ||
8338 | if (enable_pml) | ||
8339 | vmx_disable_pml(vmx); | ||
7976 | free_vpid(vmx); | 8340 | free_vpid(vmx); |
7977 | leave_guest_mode(vcpu); | 8341 | leave_guest_mode(vcpu); |
7978 | vmx_load_vmcs01(vcpu); | 8342 | vmx_load_vmcs01(vcpu); |
@@ -8040,9 +8404,25 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
8040 | goto free_vmcs; | 8404 | goto free_vmcs; |
8041 | } | 8405 | } |
8042 | 8406 | ||
8407 | if (nested) | ||
8408 | nested_vmx_setup_ctls_msrs(vmx); | ||
8409 | |||
8410 | vmx->nested.posted_intr_nv = -1; | ||
8043 | vmx->nested.current_vmptr = -1ull; | 8411 | vmx->nested.current_vmptr = -1ull; |
8044 | vmx->nested.current_vmcs12 = NULL; | 8412 | vmx->nested.current_vmcs12 = NULL; |
8045 | 8413 | ||
8414 | /* | ||
8415 | * If PML is turned on, failure on enabling PML just results in failure | ||
8416 | * of creating the vcpu, therefore we can simplify PML logic (by | ||
8417 | * avoiding dealing with cases, such as enabling PML partially on vcpus | ||
8418 | * for the guest, etc. | ||
8419 | */ | ||
8420 | if (enable_pml) { | ||
8421 | err = vmx_enable_pml(vmx); | ||
8422 | if (err) | ||
8423 | goto free_vmcs; | ||
8424 | } | ||
8425 | |||
8046 | return &vmx->vcpu; | 8426 | return &vmx->vcpu; |
8047 | 8427 | ||
8048 | free_vmcs: | 8428 | free_vmcs: |
@@ -8184,9 +8564,10 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) | |||
8184 | 8564 | ||
8185 | static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) | 8565 | static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) |
8186 | { | 8566 | { |
8187 | kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu, | 8567 | WARN_ON(mmu_is_nested(vcpu)); |
8188 | nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT); | 8568 | kvm_init_shadow_ept_mmu(vcpu, |
8189 | 8569 | to_vmx(vcpu)->nested.nested_vmx_ept_caps & | |
8570 | VMX_EPT_EXECUTE_ONLY_BIT); | ||
8190 | vcpu->arch.mmu.set_cr3 = vmx_set_cr3; | 8571 | vcpu->arch.mmu.set_cr3 = vmx_set_cr3; |
8191 | vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; | 8572 | vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; |
8192 | vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; | 8573 | vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; |
@@ -8199,6 +8580,18 @@ static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) | |||
8199 | vcpu->arch.walk_mmu = &vcpu->arch.mmu; | 8580 | vcpu->arch.walk_mmu = &vcpu->arch.mmu; |
8200 | } | 8581 | } |
8201 | 8582 | ||
8583 | static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, | ||
8584 | u16 error_code) | ||
8585 | { | ||
8586 | bool inequality, bit; | ||
8587 | |||
8588 | bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0; | ||
8589 | inequality = | ||
8590 | (error_code & vmcs12->page_fault_error_code_mask) != | ||
8591 | vmcs12->page_fault_error_code_match; | ||
8592 | return inequality ^ bit; | ||
8593 | } | ||
8594 | |||
8202 | static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, | 8595 | static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, |
8203 | struct x86_exception *fault) | 8596 | struct x86_exception *fault) |
8204 | { | 8597 | { |
@@ -8206,8 +8599,7 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, | |||
8206 | 8599 | ||
8207 | WARN_ON(!is_guest_mode(vcpu)); | 8600 | WARN_ON(!is_guest_mode(vcpu)); |
8208 | 8601 | ||
8209 | /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ | 8602 | if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) |
8210 | if (vmcs12->exception_bitmap & (1u << PF_VECTOR)) | ||
8211 | nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason, | 8603 | nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason, |
8212 | vmcs_read32(VM_EXIT_INTR_INFO), | 8604 | vmcs_read32(VM_EXIT_INTR_INFO), |
8213 | vmcs_readl(EXIT_QUALIFICATION)); | 8605 | vmcs_readl(EXIT_QUALIFICATION)); |
@@ -8261,6 +8653,31 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | |||
8261 | return false; | 8653 | return false; |
8262 | } | 8654 | } |
8263 | 8655 | ||
8656 | if (nested_cpu_has_posted_intr(vmcs12)) { | ||
8657 | if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64)) | ||
8658 | return false; | ||
8659 | |||
8660 | if (vmx->nested.pi_desc_page) { /* shouldn't happen */ | ||
8661 | kunmap(vmx->nested.pi_desc_page); | ||
8662 | nested_release_page(vmx->nested.pi_desc_page); | ||
8663 | } | ||
8664 | vmx->nested.pi_desc_page = | ||
8665 | nested_get_page(vcpu, vmcs12->posted_intr_desc_addr); | ||
8666 | if (!vmx->nested.pi_desc_page) | ||
8667 | return false; | ||
8668 | |||
8669 | vmx->nested.pi_desc = | ||
8670 | (struct pi_desc *)kmap(vmx->nested.pi_desc_page); | ||
8671 | if (!vmx->nested.pi_desc) { | ||
8672 | nested_release_page_clean(vmx->nested.pi_desc_page); | ||
8673 | return false; | ||
8674 | } | ||
8675 | vmx->nested.pi_desc = | ||
8676 | (struct pi_desc *)((void *)vmx->nested.pi_desc + | ||
8677 | (unsigned long)(vmcs12->posted_intr_desc_addr & | ||
8678 | (PAGE_SIZE - 1))); | ||
8679 | } | ||
8680 | |||
8264 | return true; | 8681 | return true; |
8265 | } | 8682 | } |
8266 | 8683 | ||
@@ -8286,6 +8703,310 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) | |||
8286 | ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL); | 8703 | ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL); |
8287 | } | 8704 | } |
8288 | 8705 | ||
8706 | static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu, | ||
8707 | struct vmcs12 *vmcs12) | ||
8708 | { | ||
8709 | int maxphyaddr; | ||
8710 | u64 addr; | ||
8711 | |||
8712 | if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) | ||
8713 | return 0; | ||
8714 | |||
8715 | if (vmcs12_read_any(vcpu, MSR_BITMAP, &addr)) { | ||
8716 | WARN_ON(1); | ||
8717 | return -EINVAL; | ||
8718 | } | ||
8719 | maxphyaddr = cpuid_maxphyaddr(vcpu); | ||
8720 | |||
8721 | if (!PAGE_ALIGNED(vmcs12->msr_bitmap) || | ||
8722 | ((addr + PAGE_SIZE) >> maxphyaddr)) | ||
8723 | return -EINVAL; | ||
8724 | |||
8725 | return 0; | ||
8726 | } | ||
8727 | |||
8728 | /* | ||
8729 | * Merge L0's and L1's MSR bitmap, return false to indicate that | ||
8730 | * we do not use the hardware. | ||
8731 | */ | ||
8732 | static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, | ||
8733 | struct vmcs12 *vmcs12) | ||
8734 | { | ||
8735 | int msr; | ||
8736 | struct page *page; | ||
8737 | unsigned long *msr_bitmap; | ||
8738 | |||
8739 | if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) | ||
8740 | return false; | ||
8741 | |||
8742 | page = nested_get_page(vcpu, vmcs12->msr_bitmap); | ||
8743 | if (!page) { | ||
8744 | WARN_ON(1); | ||
8745 | return false; | ||
8746 | } | ||
8747 | msr_bitmap = (unsigned long *)kmap(page); | ||
8748 | if (!msr_bitmap) { | ||
8749 | nested_release_page_clean(page); | ||
8750 | WARN_ON(1); | ||
8751 | return false; | ||
8752 | } | ||
8753 | |||
8754 | if (nested_cpu_has_virt_x2apic_mode(vmcs12)) { | ||
8755 | if (nested_cpu_has_apic_reg_virt(vmcs12)) | ||
8756 | for (msr = 0x800; msr <= 0x8ff; msr++) | ||
8757 | nested_vmx_disable_intercept_for_msr( | ||
8758 | msr_bitmap, | ||
8759 | vmx_msr_bitmap_nested, | ||
8760 | msr, MSR_TYPE_R); | ||
8761 | /* TPR is allowed */ | ||
8762 | nested_vmx_disable_intercept_for_msr(msr_bitmap, | ||
8763 | vmx_msr_bitmap_nested, | ||
8764 | APIC_BASE_MSR + (APIC_TASKPRI >> 4), | ||
8765 | MSR_TYPE_R | MSR_TYPE_W); | ||
8766 | if (nested_cpu_has_vid(vmcs12)) { | ||
8767 | /* EOI and self-IPI are allowed */ | ||
8768 | nested_vmx_disable_intercept_for_msr( | ||
8769 | msr_bitmap, | ||
8770 | vmx_msr_bitmap_nested, | ||
8771 | APIC_BASE_MSR + (APIC_EOI >> 4), | ||
8772 | MSR_TYPE_W); | ||
8773 | nested_vmx_disable_intercept_for_msr( | ||
8774 | msr_bitmap, | ||
8775 | vmx_msr_bitmap_nested, | ||
8776 | APIC_BASE_MSR + (APIC_SELF_IPI >> 4), | ||
8777 | MSR_TYPE_W); | ||
8778 | } | ||
8779 | } else { | ||
8780 | /* | ||
8781 | * Enable reading intercept of all the x2apic | ||
8782 | * MSRs. We should not rely on vmcs12 to do any | ||
8783 | * optimizations here, it may have been modified | ||
8784 | * by L1. | ||
8785 | */ | ||
8786 | for (msr = 0x800; msr <= 0x8ff; msr++) | ||
8787 | __vmx_enable_intercept_for_msr( | ||
8788 | vmx_msr_bitmap_nested, | ||
8789 | msr, | ||
8790 | MSR_TYPE_R); | ||
8791 | |||
8792 | __vmx_enable_intercept_for_msr( | ||
8793 | vmx_msr_bitmap_nested, | ||
8794 | APIC_BASE_MSR + (APIC_TASKPRI >> 4), | ||
8795 | MSR_TYPE_W); | ||
8796 | __vmx_enable_intercept_for_msr( | ||
8797 | vmx_msr_bitmap_nested, | ||
8798 | APIC_BASE_MSR + (APIC_EOI >> 4), | ||
8799 | MSR_TYPE_W); | ||
8800 | __vmx_enable_intercept_for_msr( | ||
8801 | vmx_msr_bitmap_nested, | ||
8802 | APIC_BASE_MSR + (APIC_SELF_IPI >> 4), | ||
8803 | MSR_TYPE_W); | ||
8804 | } | ||
8805 | kunmap(page); | ||
8806 | nested_release_page_clean(page); | ||
8807 | |||
8808 | return true; | ||
8809 | } | ||
8810 | |||
8811 | static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu, | ||
8812 | struct vmcs12 *vmcs12) | ||
8813 | { | ||
8814 | if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && | ||
8815 | !nested_cpu_has_apic_reg_virt(vmcs12) && | ||
8816 | !nested_cpu_has_vid(vmcs12) && | ||
8817 | !nested_cpu_has_posted_intr(vmcs12)) | ||
8818 | return 0; | ||
8819 | |||
8820 | /* | ||
8821 | * If virtualize x2apic mode is enabled, | ||
8822 | * virtualize apic access must be disabled. | ||
8823 | */ | ||
8824 | if (nested_cpu_has_virt_x2apic_mode(vmcs12) && | ||
8825 | nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) | ||
8826 | return -EINVAL; | ||
8827 | |||
8828 | /* | ||
8829 | * If virtual interrupt delivery is enabled, | ||
8830 | * we must exit on external interrupts. | ||
8831 | */ | ||
8832 | if (nested_cpu_has_vid(vmcs12) && | ||
8833 | !nested_exit_on_intr(vcpu)) | ||
8834 | return -EINVAL; | ||
8835 | |||
8836 | /* | ||
8837 | * bits 15:8 should be zero in posted_intr_nv, | ||
8838 | * the descriptor address has been already checked | ||
8839 | * in nested_get_vmcs12_pages. | ||
8840 | */ | ||
8841 | if (nested_cpu_has_posted_intr(vmcs12) && | ||
8842 | (!nested_cpu_has_vid(vmcs12) || | ||
8843 | !nested_exit_intr_ack_set(vcpu) || | ||
8844 | vmcs12->posted_intr_nv & 0xff00)) | ||
8845 | return -EINVAL; | ||
8846 | |||
8847 | /* tpr shadow is needed by all apicv features. */ | ||
8848 | if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) | ||
8849 | return -EINVAL; | ||
8850 | |||
8851 | return 0; | ||
8852 | } | ||
8853 | |||
8854 | static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu, | ||
8855 | unsigned long count_field, | ||
8856 | unsigned long addr_field, | ||
8857 | int maxphyaddr) | ||
8858 | { | ||
8859 | u64 count, addr; | ||
8860 | |||
8861 | if (vmcs12_read_any(vcpu, count_field, &count) || | ||
8862 | vmcs12_read_any(vcpu, addr_field, &addr)) { | ||
8863 | WARN_ON(1); | ||
8864 | return -EINVAL; | ||
8865 | } | ||
8866 | if (count == 0) | ||
8867 | return 0; | ||
8868 | if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr || | ||
8869 | (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) { | ||
8870 | pr_warn_ratelimited( | ||
8871 | "nVMX: invalid MSR switch (0x%lx, %d, %llu, 0x%08llx)", | ||
8872 | addr_field, maxphyaddr, count, addr); | ||
8873 | return -EINVAL; | ||
8874 | } | ||
8875 | return 0; | ||
8876 | } | ||
8877 | |||
8878 | static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu, | ||
8879 | struct vmcs12 *vmcs12) | ||
8880 | { | ||
8881 | int maxphyaddr; | ||
8882 | |||
8883 | if (vmcs12->vm_exit_msr_load_count == 0 && | ||
8884 | vmcs12->vm_exit_msr_store_count == 0 && | ||
8885 | vmcs12->vm_entry_msr_load_count == 0) | ||
8886 | return 0; /* Fast path */ | ||
8887 | maxphyaddr = cpuid_maxphyaddr(vcpu); | ||
8888 | if (nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_LOAD_COUNT, | ||
8889 | VM_EXIT_MSR_LOAD_ADDR, maxphyaddr) || | ||
8890 | nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_STORE_COUNT, | ||
8891 | VM_EXIT_MSR_STORE_ADDR, maxphyaddr) || | ||
8892 | nested_vmx_check_msr_switch(vcpu, VM_ENTRY_MSR_LOAD_COUNT, | ||
8893 | VM_ENTRY_MSR_LOAD_ADDR, maxphyaddr)) | ||
8894 | return -EINVAL; | ||
8895 | return 0; | ||
8896 | } | ||
8897 | |||
8898 | static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu, | ||
8899 | struct vmx_msr_entry *e) | ||
8900 | { | ||
8901 | /* x2APIC MSR accesses are not allowed */ | ||
8902 | if (apic_x2apic_mode(vcpu->arch.apic) && e->index >> 8 == 0x8) | ||
8903 | return -EINVAL; | ||
8904 | if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */ | ||
8905 | e->index == MSR_IA32_UCODE_REV) | ||
8906 | return -EINVAL; | ||
8907 | if (e->reserved != 0) | ||
8908 | return -EINVAL; | ||
8909 | return 0; | ||
8910 | } | ||
8911 | |||
8912 | static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu, | ||
8913 | struct vmx_msr_entry *e) | ||
8914 | { | ||
8915 | if (e->index == MSR_FS_BASE || | ||
8916 | e->index == MSR_GS_BASE || | ||
8917 | e->index == MSR_IA32_SMM_MONITOR_CTL || /* SMM is not supported */ | ||
8918 | nested_vmx_msr_check_common(vcpu, e)) | ||
8919 | return -EINVAL; | ||
8920 | return 0; | ||
8921 | } | ||
8922 | |||
8923 | static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu, | ||
8924 | struct vmx_msr_entry *e) | ||
8925 | { | ||
8926 | if (e->index == MSR_IA32_SMBASE || /* SMM is not supported */ | ||
8927 | nested_vmx_msr_check_common(vcpu, e)) | ||
8928 | return -EINVAL; | ||
8929 | return 0; | ||
8930 | } | ||
8931 | |||
8932 | /* | ||
8933 | * Load guest's/host's msr at nested entry/exit. | ||
8934 | * return 0 for success, entry index for failure. | ||
8935 | */ | ||
8936 | static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) | ||
8937 | { | ||
8938 | u32 i; | ||
8939 | struct vmx_msr_entry e; | ||
8940 | struct msr_data msr; | ||
8941 | |||
8942 | msr.host_initiated = false; | ||
8943 | for (i = 0; i < count; i++) { | ||
8944 | if (kvm_read_guest(vcpu->kvm, gpa + i * sizeof(e), | ||
8945 | &e, sizeof(e))) { | ||
8946 | pr_warn_ratelimited( | ||
8947 | "%s cannot read MSR entry (%u, 0x%08llx)\n", | ||
8948 | __func__, i, gpa + i * sizeof(e)); | ||
8949 | goto fail; | ||
8950 | } | ||
8951 | if (nested_vmx_load_msr_check(vcpu, &e)) { | ||
8952 | pr_warn_ratelimited( | ||
8953 | "%s check failed (%u, 0x%x, 0x%x)\n", | ||
8954 | __func__, i, e.index, e.reserved); | ||
8955 | goto fail; | ||
8956 | } | ||
8957 | msr.index = e.index; | ||
8958 | msr.data = e.value; | ||
8959 | if (kvm_set_msr(vcpu, &msr)) { | ||
8960 | pr_warn_ratelimited( | ||
8961 | "%s cannot write MSR (%u, 0x%x, 0x%llx)\n", | ||
8962 | __func__, i, e.index, e.value); | ||
8963 | goto fail; | ||
8964 | } | ||
8965 | } | ||
8966 | return 0; | ||
8967 | fail: | ||
8968 | return i + 1; | ||
8969 | } | ||
8970 | |||
8971 | static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) | ||
8972 | { | ||
8973 | u32 i; | ||
8974 | struct vmx_msr_entry e; | ||
8975 | |||
8976 | for (i = 0; i < count; i++) { | ||
8977 | if (kvm_read_guest(vcpu->kvm, | ||
8978 | gpa + i * sizeof(e), | ||
8979 | &e, 2 * sizeof(u32))) { | ||
8980 | pr_warn_ratelimited( | ||
8981 | "%s cannot read MSR entry (%u, 0x%08llx)\n", | ||
8982 | __func__, i, gpa + i * sizeof(e)); | ||
8983 | return -EINVAL; | ||
8984 | } | ||
8985 | if (nested_vmx_store_msr_check(vcpu, &e)) { | ||
8986 | pr_warn_ratelimited( | ||
8987 | "%s check failed (%u, 0x%x, 0x%x)\n", | ||
8988 | __func__, i, e.index, e.reserved); | ||
8989 | return -EINVAL; | ||
8990 | } | ||
8991 | if (kvm_get_msr(vcpu, e.index, &e.value)) { | ||
8992 | pr_warn_ratelimited( | ||
8993 | "%s cannot read MSR (%u, 0x%x)\n", | ||
8994 | __func__, i, e.index); | ||
8995 | return -EINVAL; | ||
8996 | } | ||
8997 | if (kvm_write_guest(vcpu->kvm, | ||
8998 | gpa + i * sizeof(e) + | ||
8999 | offsetof(struct vmx_msr_entry, value), | ||
9000 | &e.value, sizeof(e.value))) { | ||
9001 | pr_warn_ratelimited( | ||
9002 | "%s cannot write MSR (%u, 0x%x, 0x%llx)\n", | ||
9003 | __func__, i, e.index, e.value); | ||
9004 | return -EINVAL; | ||
9005 | } | ||
9006 | } | ||
9007 | return 0; | ||
9008 | } | ||
9009 | |||
8289 | /* | 9010 | /* |
8290 | * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested | 9011 | * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested |
8291 | * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it | 9012 | * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it |
@@ -8365,8 +9086,23 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
8365 | 9086 | ||
8366 | exec_control = vmcs12->pin_based_vm_exec_control; | 9087 | exec_control = vmcs12->pin_based_vm_exec_control; |
8367 | exec_control |= vmcs_config.pin_based_exec_ctrl; | 9088 | exec_control |= vmcs_config.pin_based_exec_ctrl; |
8368 | exec_control &= ~(PIN_BASED_VMX_PREEMPTION_TIMER | | 9089 | exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; |
8369 | PIN_BASED_POSTED_INTR); | 9090 | |
9091 | if (nested_cpu_has_posted_intr(vmcs12)) { | ||
9092 | /* | ||
9093 | * Note that we use L0's vector here and in | ||
9094 | * vmx_deliver_nested_posted_interrupt. | ||
9095 | */ | ||
9096 | vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; | ||
9097 | vmx->nested.pi_pending = false; | ||
9098 | vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); | ||
9099 | vmcs_write64(POSTED_INTR_DESC_ADDR, | ||
9100 | page_to_phys(vmx->nested.pi_desc_page) + | ||
9101 | (unsigned long)(vmcs12->posted_intr_desc_addr & | ||
9102 | (PAGE_SIZE - 1))); | ||
9103 | } else | ||
9104 | exec_control &= ~PIN_BASED_POSTED_INTR; | ||
9105 | |||
8370 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control); | 9106 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control); |
8371 | 9107 | ||
8372 | vmx->nested.preemption_timer_expired = false; | 9108 | vmx->nested.preemption_timer_expired = false; |
@@ -8423,12 +9159,26 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
8423 | else | 9159 | else |
8424 | vmcs_write64(APIC_ACCESS_ADDR, | 9160 | vmcs_write64(APIC_ACCESS_ADDR, |
8425 | page_to_phys(vmx->nested.apic_access_page)); | 9161 | page_to_phys(vmx->nested.apic_access_page)); |
8426 | } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) { | 9162 | } else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) && |
9163 | (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))) { | ||
8427 | exec_control |= | 9164 | exec_control |= |
8428 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 9165 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
8429 | kvm_vcpu_reload_apic_access_page(vcpu); | 9166 | kvm_vcpu_reload_apic_access_page(vcpu); |
8430 | } | 9167 | } |
8431 | 9168 | ||
9169 | if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { | ||
9170 | vmcs_write64(EOI_EXIT_BITMAP0, | ||
9171 | vmcs12->eoi_exit_bitmap0); | ||
9172 | vmcs_write64(EOI_EXIT_BITMAP1, | ||
9173 | vmcs12->eoi_exit_bitmap1); | ||
9174 | vmcs_write64(EOI_EXIT_BITMAP2, | ||
9175 | vmcs12->eoi_exit_bitmap2); | ||
9176 | vmcs_write64(EOI_EXIT_BITMAP3, | ||
9177 | vmcs12->eoi_exit_bitmap3); | ||
9178 | vmcs_write16(GUEST_INTR_STATUS, | ||
9179 | vmcs12->guest_intr_status); | ||
9180 | } | ||
9181 | |||
8432 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 9182 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
8433 | } | 9183 | } |
8434 | 9184 | ||
@@ -8462,11 +9212,17 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
8462 | vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); | 9212 | vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); |
8463 | } | 9213 | } |
8464 | 9214 | ||
9215 | if (cpu_has_vmx_msr_bitmap() && | ||
9216 | exec_control & CPU_BASED_USE_MSR_BITMAPS && | ||
9217 | nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) { | ||
9218 | vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_nested)); | ||
9219 | } else | ||
9220 | exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; | ||
9221 | |||
8465 | /* | 9222 | /* |
8466 | * Merging of IO and MSR bitmaps not currently supported. | 9223 | * Merging of IO bitmap not currently supported. |
8467 | * Rather, exit every time. | 9224 | * Rather, exit every time. |
8468 | */ | 9225 | */ |
8469 | exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; | ||
8470 | exec_control &= ~CPU_BASED_USE_IO_BITMAPS; | 9226 | exec_control &= ~CPU_BASED_USE_IO_BITMAPS; |
8471 | exec_control |= CPU_BASED_UNCOND_IO_EXITING; | 9227 | exec_control |= CPU_BASED_UNCOND_IO_EXITING; |
8472 | 9228 | ||
@@ -8582,6 +9338,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
8582 | int cpu; | 9338 | int cpu; |
8583 | struct loaded_vmcs *vmcs02; | 9339 | struct loaded_vmcs *vmcs02; |
8584 | bool ia32e; | 9340 | bool ia32e; |
9341 | u32 msr_entry_idx; | ||
8585 | 9342 | ||
8586 | if (!nested_vmx_check_permission(vcpu) || | 9343 | if (!nested_vmx_check_permission(vcpu) || |
8587 | !nested_vmx_check_vmcs12(vcpu)) | 9344 | !nested_vmx_check_vmcs12(vcpu)) |
@@ -8616,41 +9373,42 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
8616 | return 1; | 9373 | return 1; |
8617 | } | 9374 | } |
8618 | 9375 | ||
8619 | if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) && | 9376 | if (!nested_get_vmcs12_pages(vcpu, vmcs12)) { |
8620 | !PAGE_ALIGNED(vmcs12->msr_bitmap)) { | ||
8621 | /*TODO: Also verify bits beyond physical address width are 0*/ | 9377 | /*TODO: Also verify bits beyond physical address width are 0*/ |
8622 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 9378 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); |
8623 | return 1; | 9379 | return 1; |
8624 | } | 9380 | } |
8625 | 9381 | ||
8626 | if (!nested_get_vmcs12_pages(vcpu, vmcs12)) { | 9382 | if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) { |
8627 | /*TODO: Also verify bits beyond physical address width are 0*/ | ||
8628 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 9383 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); |
8629 | return 1; | 9384 | return 1; |
8630 | } | 9385 | } |
8631 | 9386 | ||
8632 | if (vmcs12->vm_entry_msr_load_count > 0 || | 9387 | if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) { |
8633 | vmcs12->vm_exit_msr_load_count > 0 || | 9388 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); |
8634 | vmcs12->vm_exit_msr_store_count > 0) { | 9389 | return 1; |
8635 | pr_warn_ratelimited("%s: VMCS MSR_{LOAD,STORE} unsupported\n", | 9390 | } |
8636 | __func__); | 9391 | |
9392 | if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12)) { | ||
8637 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 9393 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); |
8638 | return 1; | 9394 | return 1; |
8639 | } | 9395 | } |
8640 | 9396 | ||
8641 | if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, | 9397 | if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, |
8642 | nested_vmx_true_procbased_ctls_low, | 9398 | vmx->nested.nested_vmx_true_procbased_ctls_low, |
8643 | nested_vmx_procbased_ctls_high) || | 9399 | vmx->nested.nested_vmx_procbased_ctls_high) || |
8644 | !vmx_control_verify(vmcs12->secondary_vm_exec_control, | 9400 | !vmx_control_verify(vmcs12->secondary_vm_exec_control, |
8645 | nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high) || | 9401 | vmx->nested.nested_vmx_secondary_ctls_low, |
9402 | vmx->nested.nested_vmx_secondary_ctls_high) || | ||
8646 | !vmx_control_verify(vmcs12->pin_based_vm_exec_control, | 9403 | !vmx_control_verify(vmcs12->pin_based_vm_exec_control, |
8647 | nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high) || | 9404 | vmx->nested.nested_vmx_pinbased_ctls_low, |
9405 | vmx->nested.nested_vmx_pinbased_ctls_high) || | ||
8648 | !vmx_control_verify(vmcs12->vm_exit_controls, | 9406 | !vmx_control_verify(vmcs12->vm_exit_controls, |
8649 | nested_vmx_true_exit_ctls_low, | 9407 | vmx->nested.nested_vmx_true_exit_ctls_low, |
8650 | nested_vmx_exit_ctls_high) || | 9408 | vmx->nested.nested_vmx_exit_ctls_high) || |
8651 | !vmx_control_verify(vmcs12->vm_entry_controls, | 9409 | !vmx_control_verify(vmcs12->vm_entry_controls, |
8652 | nested_vmx_true_entry_ctls_low, | 9410 | vmx->nested.nested_vmx_true_entry_ctls_low, |
8653 | nested_vmx_entry_ctls_high)) | 9411 | vmx->nested.nested_vmx_entry_ctls_high)) |
8654 | { | 9412 | { |
8655 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 9413 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); |
8656 | return 1; | 9414 | return 1; |
@@ -8663,7 +9421,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
8663 | return 1; | 9421 | return 1; |
8664 | } | 9422 | } |
8665 | 9423 | ||
8666 | if (!nested_cr0_valid(vmcs12, vmcs12->guest_cr0) || | 9424 | if (!nested_cr0_valid(vcpu, vmcs12->guest_cr0) || |
8667 | ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) { | 9425 | ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) { |
8668 | nested_vmx_entry_failure(vcpu, vmcs12, | 9426 | nested_vmx_entry_failure(vcpu, vmcs12, |
8669 | EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); | 9427 | EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); |
@@ -8739,10 +9497,21 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
8739 | 9497 | ||
8740 | vmx_segment_cache_clear(vmx); | 9498 | vmx_segment_cache_clear(vmx); |
8741 | 9499 | ||
8742 | vmcs12->launch_state = 1; | ||
8743 | |||
8744 | prepare_vmcs02(vcpu, vmcs12); | 9500 | prepare_vmcs02(vcpu, vmcs12); |
8745 | 9501 | ||
9502 | msr_entry_idx = nested_vmx_load_msr(vcpu, | ||
9503 | vmcs12->vm_entry_msr_load_addr, | ||
9504 | vmcs12->vm_entry_msr_load_count); | ||
9505 | if (msr_entry_idx) { | ||
9506 | leave_guest_mode(vcpu); | ||
9507 | vmx_load_vmcs01(vcpu); | ||
9508 | nested_vmx_entry_failure(vcpu, vmcs12, | ||
9509 | EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx); | ||
9510 | return 1; | ||
9511 | } | ||
9512 | |||
9513 | vmcs12->launch_state = 1; | ||
9514 | |||
8746 | if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) | 9515 | if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) |
8747 | return kvm_emulate_halt(vcpu); | 9516 | return kvm_emulate_halt(vcpu); |
8748 | 9517 | ||
@@ -8869,9 +9638,10 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) | |||
8869 | if (vmx->nested.nested_run_pending) | 9638 | if (vmx->nested.nested_run_pending) |
8870 | return -EBUSY; | 9639 | return -EBUSY; |
8871 | nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); | 9640 | nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); |
9641 | return 0; | ||
8872 | } | 9642 | } |
8873 | 9643 | ||
8874 | return 0; | 9644 | return vmx_complete_nested_posted_interrupt(vcpu); |
8875 | } | 9645 | } |
8876 | 9646 | ||
8877 | static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) | 9647 | static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) |
@@ -8981,6 +9751,9 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
8981 | vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3); | 9751 | vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3); |
8982 | } | 9752 | } |
8983 | 9753 | ||
9754 | if (nested_cpu_has_vid(vmcs12)) | ||
9755 | vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS); | ||
9756 | |||
8984 | vmcs12->vm_entry_controls = | 9757 | vmcs12->vm_entry_controls = |
8985 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | | 9758 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | |
8986 | (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); | 9759 | (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); |
@@ -9172,6 +9945,13 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
9172 | 9945 | ||
9173 | kvm_set_dr(vcpu, 7, 0x400); | 9946 | kvm_set_dr(vcpu, 7, 0x400); |
9174 | vmcs_write64(GUEST_IA32_DEBUGCTL, 0); | 9947 | vmcs_write64(GUEST_IA32_DEBUGCTL, 0); |
9948 | |||
9949 | if (cpu_has_vmx_msr_bitmap()) | ||
9950 | vmx_set_msr_bitmap(vcpu); | ||
9951 | |||
9952 | if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, | ||
9953 | vmcs12->vm_exit_msr_load_count)) | ||
9954 | nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL); | ||
9175 | } | 9955 | } |
9176 | 9956 | ||
9177 | /* | 9957 | /* |
@@ -9193,6 +9973,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
9193 | prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, | 9973 | prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, |
9194 | exit_qualification); | 9974 | exit_qualification); |
9195 | 9975 | ||
9976 | if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr, | ||
9977 | vmcs12->vm_exit_msr_store_count)) | ||
9978 | nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL); | ||
9979 | |||
9196 | vmx_load_vmcs01(vcpu); | 9980 | vmx_load_vmcs01(vcpu); |
9197 | 9981 | ||
9198 | if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) | 9982 | if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) |
@@ -9235,6 +10019,12 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
9235 | nested_release_page(vmx->nested.virtual_apic_page); | 10019 | nested_release_page(vmx->nested.virtual_apic_page); |
9236 | vmx->nested.virtual_apic_page = NULL; | 10020 | vmx->nested.virtual_apic_page = NULL; |
9237 | } | 10021 | } |
10022 | if (vmx->nested.pi_desc_page) { | ||
10023 | kunmap(vmx->nested.pi_desc_page); | ||
10024 | nested_release_page(vmx->nested.pi_desc_page); | ||
10025 | vmx->nested.pi_desc_page = NULL; | ||
10026 | vmx->nested.pi_desc = NULL; | ||
10027 | } | ||
9238 | 10028 | ||
9239 | /* | 10029 | /* |
9240 | * We are now running in L2, mmu_notifier will force to reload the | 10030 | * We are now running in L2, mmu_notifier will force to reload the |
@@ -9301,6 +10091,31 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) | |||
9301 | shrink_ple_window(vcpu); | 10091 | shrink_ple_window(vcpu); |
9302 | } | 10092 | } |
9303 | 10093 | ||
10094 | static void vmx_slot_enable_log_dirty(struct kvm *kvm, | ||
10095 | struct kvm_memory_slot *slot) | ||
10096 | { | ||
10097 | kvm_mmu_slot_leaf_clear_dirty(kvm, slot); | ||
10098 | kvm_mmu_slot_largepage_remove_write_access(kvm, slot); | ||
10099 | } | ||
10100 | |||
10101 | static void vmx_slot_disable_log_dirty(struct kvm *kvm, | ||
10102 | struct kvm_memory_slot *slot) | ||
10103 | { | ||
10104 | kvm_mmu_slot_set_dirty(kvm, slot); | ||
10105 | } | ||
10106 | |||
10107 | static void vmx_flush_log_dirty(struct kvm *kvm) | ||
10108 | { | ||
10109 | kvm_flush_pml_buffers(kvm); | ||
10110 | } | ||
10111 | |||
10112 | static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, | ||
10113 | struct kvm_memory_slot *memslot, | ||
10114 | gfn_t offset, unsigned long mask) | ||
10115 | { | ||
10116 | kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); | ||
10117 | } | ||
10118 | |||
9304 | static struct kvm_x86_ops vmx_x86_ops = { | 10119 | static struct kvm_x86_ops vmx_x86_ops = { |
9305 | .cpu_has_kvm_support = cpu_has_kvm_support, | 10120 | .cpu_has_kvm_support = cpu_has_kvm_support, |
9306 | .disabled_by_bios = vmx_disabled_by_bios, | 10121 | .disabled_by_bios = vmx_disabled_by_bios, |
@@ -9409,6 +10224,11 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
9409 | .check_nested_events = vmx_check_nested_events, | 10224 | .check_nested_events = vmx_check_nested_events, |
9410 | 10225 | ||
9411 | .sched_in = vmx_sched_in, | 10226 | .sched_in = vmx_sched_in, |
10227 | |||
10228 | .slot_enable_log_dirty = vmx_slot_enable_log_dirty, | ||
10229 | .slot_disable_log_dirty = vmx_slot_disable_log_dirty, | ||
10230 | .flush_log_dirty = vmx_flush_log_dirty, | ||
10231 | .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, | ||
9412 | }; | 10232 | }; |
9413 | 10233 | ||
9414 | static int __init vmx_init(void) | 10234 | static int __init vmx_init(void) |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c259814200bd..bd7a70be41b3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -108,6 +108,10 @@ EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); | |||
108 | static u32 tsc_tolerance_ppm = 250; | 108 | static u32 tsc_tolerance_ppm = 250; |
109 | module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); | 109 | module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); |
110 | 110 | ||
111 | /* lapic timer advance (tscdeadline mode only) in nanoseconds */ | ||
112 | unsigned int lapic_timer_advance_ns = 0; | ||
113 | module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); | ||
114 | |||
111 | static bool backwards_tsc_observed = false; | 115 | static bool backwards_tsc_observed = false; |
112 | 116 | ||
113 | #define KVM_NR_SHARED_MSRS 16 | 117 | #define KVM_NR_SHARED_MSRS 16 |
@@ -141,6 +145,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
141 | { "irq_window", VCPU_STAT(irq_window_exits) }, | 145 | { "irq_window", VCPU_STAT(irq_window_exits) }, |
142 | { "nmi_window", VCPU_STAT(nmi_window_exits) }, | 146 | { "nmi_window", VCPU_STAT(nmi_window_exits) }, |
143 | { "halt_exits", VCPU_STAT(halt_exits) }, | 147 | { "halt_exits", VCPU_STAT(halt_exits) }, |
148 | { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, | ||
144 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, | 149 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, |
145 | { "hypercalls", VCPU_STAT(hypercalls) }, | 150 | { "hypercalls", VCPU_STAT(hypercalls) }, |
146 | { "request_irq", VCPU_STAT(request_irq_exits) }, | 151 | { "request_irq", VCPU_STAT(request_irq_exits) }, |
@@ -492,7 +497,7 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | |||
492 | } | 497 | } |
493 | EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu); | 498 | EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu); |
494 | 499 | ||
495 | int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, | 500 | static int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, |
496 | void *data, int offset, int len, u32 access) | 501 | void *data, int offset, int len, u32 access) |
497 | { | 502 | { |
498 | return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn, | 503 | return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn, |
@@ -643,7 +648,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) | |||
643 | } | 648 | } |
644 | } | 649 | } |
645 | 650 | ||
646 | int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) | 651 | static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) |
647 | { | 652 | { |
648 | u64 xcr0 = xcr; | 653 | u64 xcr0 = xcr; |
649 | u64 old_xcr0 = vcpu->arch.xcr0; | 654 | u64 old_xcr0 = vcpu->arch.xcr0; |
@@ -1083,6 +1088,15 @@ static void update_pvclock_gtod(struct timekeeper *tk) | |||
1083 | } | 1088 | } |
1084 | #endif | 1089 | #endif |
1085 | 1090 | ||
1091 | void kvm_set_pending_timer(struct kvm_vcpu *vcpu) | ||
1092 | { | ||
1093 | /* | ||
1094 | * Note: KVM_REQ_PENDING_TIMER is implicitly checked in | ||
1095 | * vcpu_enter_guest. This function is only called from | ||
1096 | * the physical CPU that is running vcpu. | ||
1097 | */ | ||
1098 | kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); | ||
1099 | } | ||
1086 | 1100 | ||
1087 | static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | 1101 | static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) |
1088 | { | 1102 | { |
@@ -1180,7 +1194,7 @@ static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0); | |||
1180 | #endif | 1194 | #endif |
1181 | 1195 | ||
1182 | static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); | 1196 | static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); |
1183 | unsigned long max_tsc_khz; | 1197 | static unsigned long max_tsc_khz; |
1184 | 1198 | ||
1185 | static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) | 1199 | static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) |
1186 | { | 1200 | { |
@@ -1234,7 +1248,7 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) | |||
1234 | return tsc; | 1248 | return tsc; |
1235 | } | 1249 | } |
1236 | 1250 | ||
1237 | void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) | 1251 | static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) |
1238 | { | 1252 | { |
1239 | #ifdef CONFIG_X86_64 | 1253 | #ifdef CONFIG_X86_64 |
1240 | bool vcpus_matched; | 1254 | bool vcpus_matched; |
@@ -1529,7 +1543,8 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm) | |||
1529 | &ka->master_cycle_now); | 1543 | &ka->master_cycle_now); |
1530 | 1544 | ||
1531 | ka->use_master_clock = host_tsc_clocksource && vcpus_matched | 1545 | ka->use_master_clock = host_tsc_clocksource && vcpus_matched |
1532 | && !backwards_tsc_observed; | 1546 | && !backwards_tsc_observed |
1547 | && !ka->boot_vcpu_runs_old_kvmclock; | ||
1533 | 1548 | ||
1534 | if (ka->use_master_clock) | 1549 | if (ka->use_master_clock) |
1535 | atomic_set(&kvm_guest_has_master_clock, 1); | 1550 | atomic_set(&kvm_guest_has_master_clock, 1); |
@@ -2161,8 +2176,20 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2161 | case MSR_KVM_SYSTEM_TIME_NEW: | 2176 | case MSR_KVM_SYSTEM_TIME_NEW: |
2162 | case MSR_KVM_SYSTEM_TIME: { | 2177 | case MSR_KVM_SYSTEM_TIME: { |
2163 | u64 gpa_offset; | 2178 | u64 gpa_offset; |
2179 | struct kvm_arch *ka = &vcpu->kvm->arch; | ||
2180 | |||
2164 | kvmclock_reset(vcpu); | 2181 | kvmclock_reset(vcpu); |
2165 | 2182 | ||
2183 | if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) { | ||
2184 | bool tmp = (msr == MSR_KVM_SYSTEM_TIME); | ||
2185 | |||
2186 | if (ka->boot_vcpu_runs_old_kvmclock != tmp) | ||
2187 | set_bit(KVM_REQ_MASTERCLOCK_UPDATE, | ||
2188 | &vcpu->requests); | ||
2189 | |||
2190 | ka->boot_vcpu_runs_old_kvmclock = tmp; | ||
2191 | } | ||
2192 | |||
2166 | vcpu->arch.time = data; | 2193 | vcpu->arch.time = data; |
2167 | kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu); | 2194 | kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu); |
2168 | 2195 | ||
@@ -2324,6 +2351,7 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2324 | { | 2351 | { |
2325 | return kvm_x86_ops->get_msr(vcpu, msr_index, pdata); | 2352 | return kvm_x86_ops->get_msr(vcpu, msr_index, pdata); |
2326 | } | 2353 | } |
2354 | EXPORT_SYMBOL_GPL(kvm_get_msr); | ||
2327 | 2355 | ||
2328 | static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | 2356 | static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) |
2329 | { | 2357 | { |
@@ -2738,6 +2766,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
2738 | case KVM_CAP_READONLY_MEM: | 2766 | case KVM_CAP_READONLY_MEM: |
2739 | case KVM_CAP_HYPERV_TIME: | 2767 | case KVM_CAP_HYPERV_TIME: |
2740 | case KVM_CAP_IOAPIC_POLARITY_IGNORED: | 2768 | case KVM_CAP_IOAPIC_POLARITY_IGNORED: |
2769 | case KVM_CAP_TSC_DEADLINE_TIMER: | ||
2741 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT | 2770 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT |
2742 | case KVM_CAP_ASSIGN_DEV_IRQ: | 2771 | case KVM_CAP_ASSIGN_DEV_IRQ: |
2743 | case KVM_CAP_PCI_2_3: | 2772 | case KVM_CAP_PCI_2_3: |
@@ -2776,9 +2805,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
2776 | case KVM_CAP_TSC_CONTROL: | 2805 | case KVM_CAP_TSC_CONTROL: |
2777 | r = kvm_has_tsc_control; | 2806 | r = kvm_has_tsc_control; |
2778 | break; | 2807 | break; |
2779 | case KVM_CAP_TSC_DEADLINE_TIMER: | ||
2780 | r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER); | ||
2781 | break; | ||
2782 | default: | 2808 | default: |
2783 | r = 0; | 2809 | r = 0; |
2784 | break; | 2810 | break; |
@@ -3734,83 +3760,43 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, | |||
3734 | * @kvm: kvm instance | 3760 | * @kvm: kvm instance |
3735 | * @log: slot id and address to which we copy the log | 3761 | * @log: slot id and address to which we copy the log |
3736 | * | 3762 | * |
3737 | * We need to keep it in mind that VCPU threads can write to the bitmap | 3763 | * Steps 1-4 below provide general overview of dirty page logging. See |
3738 | * concurrently. So, to avoid losing data, we keep the following order for | 3764 | * kvm_get_dirty_log_protect() function description for additional details. |
3739 | * each bit: | 3765 | * |
3766 | * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we | ||
3767 | * always flush the TLB (step 4) even if previous step failed and the dirty | ||
3768 | * bitmap may be corrupt. Regardless of previous outcome the KVM logging API | ||
3769 | * does not preclude user space subsequent dirty log read. Flushing TLB ensures | ||
3770 | * writes will be marked dirty for next log read. | ||
3740 | * | 3771 | * |
3741 | * 1. Take a snapshot of the bit and clear it if needed. | 3772 | * 1. Take a snapshot of the bit and clear it if needed. |
3742 | * 2. Write protect the corresponding page. | 3773 | * 2. Write protect the corresponding page. |
3743 | * 3. Flush TLB's if needed. | 3774 | * 3. Copy the snapshot to the userspace. |
3744 | * 4. Copy the snapshot to the userspace. | 3775 | * 4. Flush TLB's if needed. |
3745 | * | ||
3746 | * Between 2 and 3, the guest may write to the page using the remaining TLB | ||
3747 | * entry. This is not a problem because the page will be reported dirty at | ||
3748 | * step 4 using the snapshot taken before and step 3 ensures that successive | ||
3749 | * writes will be logged for the next call. | ||
3750 | */ | 3776 | */ |
3751 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) | 3777 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) |
3752 | { | 3778 | { |
3753 | int r; | ||
3754 | struct kvm_memory_slot *memslot; | ||
3755 | unsigned long n, i; | ||
3756 | unsigned long *dirty_bitmap; | ||
3757 | unsigned long *dirty_bitmap_buffer; | ||
3758 | bool is_dirty = false; | 3779 | bool is_dirty = false; |
3780 | int r; | ||
3759 | 3781 | ||
3760 | mutex_lock(&kvm->slots_lock); | 3782 | mutex_lock(&kvm->slots_lock); |
3761 | 3783 | ||
3762 | r = -EINVAL; | 3784 | /* |
3763 | if (log->slot >= KVM_USER_MEM_SLOTS) | 3785 | * Flush potentially hardware-cached dirty pages to dirty_bitmap. |
3764 | goto out; | 3786 | */ |
3765 | 3787 | if (kvm_x86_ops->flush_log_dirty) | |
3766 | memslot = id_to_memslot(kvm->memslots, log->slot); | 3788 | kvm_x86_ops->flush_log_dirty(kvm); |
3767 | |||
3768 | dirty_bitmap = memslot->dirty_bitmap; | ||
3769 | r = -ENOENT; | ||
3770 | if (!dirty_bitmap) | ||
3771 | goto out; | ||
3772 | |||
3773 | n = kvm_dirty_bitmap_bytes(memslot); | ||
3774 | |||
3775 | dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long); | ||
3776 | memset(dirty_bitmap_buffer, 0, n); | ||
3777 | |||
3778 | spin_lock(&kvm->mmu_lock); | ||
3779 | |||
3780 | for (i = 0; i < n / sizeof(long); i++) { | ||
3781 | unsigned long mask; | ||
3782 | gfn_t offset; | ||
3783 | |||
3784 | if (!dirty_bitmap[i]) | ||
3785 | continue; | ||
3786 | |||
3787 | is_dirty = true; | ||
3788 | |||
3789 | mask = xchg(&dirty_bitmap[i], 0); | ||
3790 | dirty_bitmap_buffer[i] = mask; | ||
3791 | |||
3792 | offset = i * BITS_PER_LONG; | ||
3793 | kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask); | ||
3794 | } | ||
3795 | |||
3796 | spin_unlock(&kvm->mmu_lock); | ||
3797 | 3789 | ||
3798 | /* See the comments in kvm_mmu_slot_remove_write_access(). */ | 3790 | r = kvm_get_dirty_log_protect(kvm, log, &is_dirty); |
3799 | lockdep_assert_held(&kvm->slots_lock); | ||
3800 | 3791 | ||
3801 | /* | 3792 | /* |
3802 | * All the TLBs can be flushed out of mmu lock, see the comments in | 3793 | * All the TLBs can be flushed out of mmu lock, see the comments in |
3803 | * kvm_mmu_slot_remove_write_access(). | 3794 | * kvm_mmu_slot_remove_write_access(). |
3804 | */ | 3795 | */ |
3796 | lockdep_assert_held(&kvm->slots_lock); | ||
3805 | if (is_dirty) | 3797 | if (is_dirty) |
3806 | kvm_flush_remote_tlbs(kvm); | 3798 | kvm_flush_remote_tlbs(kvm); |
3807 | 3799 | ||
3808 | r = -EFAULT; | ||
3809 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) | ||
3810 | goto out; | ||
3811 | |||
3812 | r = 0; | ||
3813 | out: | ||
3814 | mutex_unlock(&kvm->slots_lock); | 3800 | mutex_unlock(&kvm->slots_lock); |
3815 | return r; | 3801 | return r; |
3816 | } | 3802 | } |
@@ -4516,6 +4502,8 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, | |||
4516 | if (rc != X86EMUL_CONTINUE) | 4502 | if (rc != X86EMUL_CONTINUE) |
4517 | return rc; | 4503 | return rc; |
4518 | addr += now; | 4504 | addr += now; |
4505 | if (ctxt->mode != X86EMUL_MODE_PROT64) | ||
4506 | addr = (u32)addr; | ||
4519 | val += now; | 4507 | val += now; |
4520 | bytes -= now; | 4508 | bytes -= now; |
4521 | } | 4509 | } |
@@ -4984,6 +4972,11 @@ static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulon | |||
4984 | kvm_register_write(emul_to_vcpu(ctxt), reg, val); | 4972 | kvm_register_write(emul_to_vcpu(ctxt), reg, val); |
4985 | } | 4973 | } |
4986 | 4974 | ||
4975 | static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked) | ||
4976 | { | ||
4977 | kvm_x86_ops->set_nmi_mask(emul_to_vcpu(ctxt), masked); | ||
4978 | } | ||
4979 | |||
4987 | static const struct x86_emulate_ops emulate_ops = { | 4980 | static const struct x86_emulate_ops emulate_ops = { |
4988 | .read_gpr = emulator_read_gpr, | 4981 | .read_gpr = emulator_read_gpr, |
4989 | .write_gpr = emulator_write_gpr, | 4982 | .write_gpr = emulator_write_gpr, |
@@ -5019,6 +5012,7 @@ static const struct x86_emulate_ops emulate_ops = { | |||
5019 | .put_fpu = emulator_put_fpu, | 5012 | .put_fpu = emulator_put_fpu, |
5020 | .intercept = emulator_intercept, | 5013 | .intercept = emulator_intercept, |
5021 | .get_cpuid = emulator_get_cpuid, | 5014 | .get_cpuid = emulator_get_cpuid, |
5015 | .set_nmi_mask = emulator_set_nmi_mask, | ||
5022 | }; | 5016 | }; |
5023 | 5017 | ||
5024 | static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) | 5018 | static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) |
@@ -6311,6 +6305,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6311 | } | 6305 | } |
6312 | 6306 | ||
6313 | trace_kvm_entry(vcpu->vcpu_id); | 6307 | trace_kvm_entry(vcpu->vcpu_id); |
6308 | wait_lapic_expire(vcpu); | ||
6314 | kvm_x86_ops->run(vcpu); | 6309 | kvm_x86_ops->run(vcpu); |
6315 | 6310 | ||
6316 | /* | 6311 | /* |
@@ -7041,15 +7036,13 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
7041 | return r; | 7036 | return r; |
7042 | } | 7037 | } |
7043 | 7038 | ||
7044 | int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) | 7039 | void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) |
7045 | { | 7040 | { |
7046 | int r; | ||
7047 | struct msr_data msr; | 7041 | struct msr_data msr; |
7048 | struct kvm *kvm = vcpu->kvm; | 7042 | struct kvm *kvm = vcpu->kvm; |
7049 | 7043 | ||
7050 | r = vcpu_load(vcpu); | 7044 | if (vcpu_load(vcpu)) |
7051 | if (r) | 7045 | return; |
7052 | return r; | ||
7053 | msr.data = 0x0; | 7046 | msr.data = 0x0; |
7054 | msr.index = MSR_IA32_TSC; | 7047 | msr.index = MSR_IA32_TSC; |
7055 | msr.host_initiated = true; | 7048 | msr.host_initiated = true; |
@@ -7058,8 +7051,6 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) | |||
7058 | 7051 | ||
7059 | schedule_delayed_work(&kvm->arch.kvmclock_sync_work, | 7052 | schedule_delayed_work(&kvm->arch.kvmclock_sync_work, |
7060 | KVMCLOCK_SYNC_PERIOD); | 7053 | KVMCLOCK_SYNC_PERIOD); |
7061 | |||
7062 | return r; | ||
7063 | } | 7054 | } |
7064 | 7055 | ||
7065 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | 7056 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) |
@@ -7549,12 +7540,62 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
7549 | return 0; | 7540 | return 0; |
7550 | } | 7541 | } |
7551 | 7542 | ||
7543 | static void kvm_mmu_slot_apply_flags(struct kvm *kvm, | ||
7544 | struct kvm_memory_slot *new) | ||
7545 | { | ||
7546 | /* Still write protect RO slot */ | ||
7547 | if (new->flags & KVM_MEM_READONLY) { | ||
7548 | kvm_mmu_slot_remove_write_access(kvm, new); | ||
7549 | return; | ||
7550 | } | ||
7551 | |||
7552 | /* | ||
7553 | * Call kvm_x86_ops dirty logging hooks when they are valid. | ||
7554 | * | ||
7555 | * kvm_x86_ops->slot_disable_log_dirty is called when: | ||
7556 | * | ||
7557 | * - KVM_MR_CREATE with dirty logging is disabled | ||
7558 | * - KVM_MR_FLAGS_ONLY with dirty logging is disabled in new flag | ||
7559 | * | ||
7560 | * The reason is, in case of PML, we need to set D-bit for any slots | ||
7561 | * with dirty logging disabled in order to eliminate unnecessary GPA | ||
7562 | * logging in PML buffer (and potential PML buffer full VMEXT). This | ||
7563 | * guarantees leaving PML enabled during guest's lifetime won't have | ||
7564 | * any additonal overhead from PML when guest is running with dirty | ||
7565 | * logging disabled for memory slots. | ||
7566 | * | ||
7567 | * kvm_x86_ops->slot_enable_log_dirty is called when switching new slot | ||
7568 | * to dirty logging mode. | ||
7569 | * | ||
7570 | * If kvm_x86_ops dirty logging hooks are invalid, use write protect. | ||
7571 | * | ||
7572 | * In case of write protect: | ||
7573 | * | ||
7574 | * Write protect all pages for dirty logging. | ||
7575 | * | ||
7576 | * All the sptes including the large sptes which point to this | ||
7577 | * slot are set to readonly. We can not create any new large | ||
7578 | * spte on this slot until the end of the logging. | ||
7579 | * | ||
7580 | * See the comments in fast_page_fault(). | ||
7581 | */ | ||
7582 | if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) { | ||
7583 | if (kvm_x86_ops->slot_enable_log_dirty) | ||
7584 | kvm_x86_ops->slot_enable_log_dirty(kvm, new); | ||
7585 | else | ||
7586 | kvm_mmu_slot_remove_write_access(kvm, new); | ||
7587 | } else { | ||
7588 | if (kvm_x86_ops->slot_disable_log_dirty) | ||
7589 | kvm_x86_ops->slot_disable_log_dirty(kvm, new); | ||
7590 | } | ||
7591 | } | ||
7592 | |||
7552 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 7593 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
7553 | struct kvm_userspace_memory_region *mem, | 7594 | struct kvm_userspace_memory_region *mem, |
7554 | const struct kvm_memory_slot *old, | 7595 | const struct kvm_memory_slot *old, |
7555 | enum kvm_mr_change change) | 7596 | enum kvm_mr_change change) |
7556 | { | 7597 | { |
7557 | 7598 | struct kvm_memory_slot *new; | |
7558 | int nr_mmu_pages = 0; | 7599 | int nr_mmu_pages = 0; |
7559 | 7600 | ||
7560 | if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) { | 7601 | if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) { |
@@ -7573,17 +7614,20 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
7573 | 7614 | ||
7574 | if (nr_mmu_pages) | 7615 | if (nr_mmu_pages) |
7575 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); | 7616 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); |
7617 | |||
7618 | /* It's OK to get 'new' slot here as it has already been installed */ | ||
7619 | new = id_to_memslot(kvm->memslots, mem->slot); | ||
7620 | |||
7576 | /* | 7621 | /* |
7577 | * Write protect all pages for dirty logging. | 7622 | * Set up write protection and/or dirty logging for the new slot. |
7578 | * | 7623 | * |
7579 | * All the sptes including the large sptes which point to this | 7624 | * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of old slot have |
7580 | * slot are set to readonly. We can not create any new large | 7625 | * been zapped so no dirty logging staff is needed for old slot. For |
7581 | * spte on this slot until the end of the logging. | 7626 | * KVM_MR_FLAGS_ONLY, the old slot is essentially the same one as the |
7582 | * | 7627 | * new and it's also covered when dealing with the new slot. |
7583 | * See the comments in fast_page_fault(). | ||
7584 | */ | 7628 | */ |
7585 | if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) | 7629 | if (change != KVM_MR_DELETE) |
7586 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 7630 | kvm_mmu_slot_apply_flags(kvm, new); |
7587 | } | 7631 | } |
7588 | 7632 | ||
7589 | void kvm_arch_flush_shadow_all(struct kvm *kvm) | 7633 | void kvm_arch_flush_shadow_all(struct kvm *kvm) |
@@ -7837,3 +7881,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); | |||
7837 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); | 7881 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); |
7838 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset); | 7882 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset); |
7839 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window); | 7883 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window); |
7884 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full); | ||
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index cc1d61af6140..f5fef1868096 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -147,6 +147,7 @@ static inline void kvm_register_writel(struct kvm_vcpu *vcpu, | |||
147 | 147 | ||
148 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); | 148 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); |
149 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); | 149 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); |
150 | void kvm_set_pending_timer(struct kvm_vcpu *vcpu); | ||
150 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); | 151 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); |
151 | 152 | ||
152 | void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr); | 153 | void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr); |
@@ -170,5 +171,7 @@ extern u64 kvm_supported_xcr0(void); | |||
170 | 171 | ||
171 | extern unsigned int min_timer_period_us; | 172 | extern unsigned int min_timer_period_us; |
172 | 173 | ||
174 | extern unsigned int lapic_timer_advance_ns; | ||
175 | |||
173 | extern struct static_key kvm_no_apic_vcpu; | 176 | extern struct static_key kvm_no_apic_vcpu; |
174 | #endif | 177 | #endif |
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 1a146ccee701..2ab290bec655 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c | |||
@@ -481,15 +481,19 @@ out: | |||
481 | return tlist; | 481 | return tlist; |
482 | } | 482 | } |
483 | 483 | ||
484 | #define MPIDR_TO_SGI_AFFINITY(cluster_id, level) \ | ||
485 | (MPIDR_AFFINITY_LEVEL(cluster_id, level) \ | ||
486 | << ICC_SGI1R_AFFINITY_## level ##_SHIFT) | ||
487 | |||
484 | static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq) | 488 | static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq) |
485 | { | 489 | { |
486 | u64 val; | 490 | u64 val; |
487 | 491 | ||
488 | val = (MPIDR_AFFINITY_LEVEL(cluster_id, 3) << 48 | | 492 | val = (MPIDR_TO_SGI_AFFINITY(cluster_id, 3) | |
489 | MPIDR_AFFINITY_LEVEL(cluster_id, 2) << 32 | | 493 | MPIDR_TO_SGI_AFFINITY(cluster_id, 2) | |
490 | irq << 24 | | 494 | irq << ICC_SGI1R_SGI_ID_SHIFT | |
491 | MPIDR_AFFINITY_LEVEL(cluster_id, 1) << 16 | | 495 | MPIDR_TO_SGI_AFFINITY(cluster_id, 1) | |
492 | tlist); | 496 | tlist << ICC_SGI1R_TARGET_LIST_SHIFT); |
493 | 497 | ||
494 | pr_debug("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val); | 498 | pr_debug("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val); |
495 | gic_write_sgi1r(val); | 499 | gic_write_sgi1r(val); |
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index daf6cd5079ec..1efa4fdb7fe2 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c | |||
@@ -54,6 +54,7 @@ static unsigned long sclp_hsa_size; | |||
54 | static unsigned int sclp_max_cpu; | 54 | static unsigned int sclp_max_cpu; |
55 | static struct sclp_ipl_info sclp_ipl_info; | 55 | static struct sclp_ipl_info sclp_ipl_info; |
56 | static unsigned char sclp_siif; | 56 | static unsigned char sclp_siif; |
57 | static unsigned char sclp_sigpif; | ||
57 | static u32 sclp_ibc; | 58 | static u32 sclp_ibc; |
58 | static unsigned int sclp_mtid; | 59 | static unsigned int sclp_mtid; |
59 | static unsigned int sclp_mtid_cp; | 60 | static unsigned int sclp_mtid_cp; |
@@ -140,6 +141,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) | |||
140 | if (boot_cpu_address != cpue->core_id) | 141 | if (boot_cpu_address != cpue->core_id) |
141 | continue; | 142 | continue; |
142 | sclp_siif = cpue->siif; | 143 | sclp_siif = cpue->siif; |
144 | sclp_sigpif = cpue->sigpif; | ||
143 | break; | 145 | break; |
144 | } | 146 | } |
145 | 147 | ||
@@ -186,6 +188,12 @@ int sclp_has_siif(void) | |||
186 | } | 188 | } |
187 | EXPORT_SYMBOL(sclp_has_siif); | 189 | EXPORT_SYMBOL(sclp_has_siif); |
188 | 190 | ||
191 | int sclp_has_sigpif(void) | ||
192 | { | ||
193 | return sclp_sigpif; | ||
194 | } | ||
195 | EXPORT_SYMBOL(sclp_has_sigpif); | ||
196 | |||
189 | unsigned int sclp_get_ibc(void) | 197 | unsigned int sclp_get_ibc(void) |
190 | { | 198 | { |
191 | return sclp_ibc; | 199 | return sclp_ibc; |
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index ac4888dc86bc..7c55dd5dd2c9 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h | |||
@@ -33,10 +33,11 @@ | |||
33 | #define VGIC_V2_MAX_LRS (1 << 6) | 33 | #define VGIC_V2_MAX_LRS (1 << 6) |
34 | #define VGIC_V3_MAX_LRS 16 | 34 | #define VGIC_V3_MAX_LRS 16 |
35 | #define VGIC_MAX_IRQS 1024 | 35 | #define VGIC_MAX_IRQS 1024 |
36 | #define VGIC_V2_MAX_CPUS 8 | ||
36 | 37 | ||
37 | /* Sanity checks... */ | 38 | /* Sanity checks... */ |
38 | #if (KVM_MAX_VCPUS > 8) | 39 | #if (KVM_MAX_VCPUS > 255) |
39 | #error Invalid number of CPU interfaces | 40 | #error Too many KVM VCPUs, the VGIC only supports up to 255 VCPUs for now |
40 | #endif | 41 | #endif |
41 | 42 | ||
42 | #if (VGIC_NR_IRQS_LEGACY & 31) | 43 | #if (VGIC_NR_IRQS_LEGACY & 31) |
@@ -132,6 +133,18 @@ struct vgic_params { | |||
132 | unsigned int maint_irq; | 133 | unsigned int maint_irq; |
133 | /* Virtual control interface base address */ | 134 | /* Virtual control interface base address */ |
134 | void __iomem *vctrl_base; | 135 | void __iomem *vctrl_base; |
136 | int max_gic_vcpus; | ||
137 | /* Only needed for the legacy KVM_CREATE_IRQCHIP */ | ||
138 | bool can_emulate_gicv2; | ||
139 | }; | ||
140 | |||
141 | struct vgic_vm_ops { | ||
142 | bool (*handle_mmio)(struct kvm_vcpu *, struct kvm_run *, | ||
143 | struct kvm_exit_mmio *); | ||
144 | bool (*queue_sgi)(struct kvm_vcpu *, int irq); | ||
145 | void (*add_sgi_source)(struct kvm_vcpu *, int irq, int source); | ||
146 | int (*init_model)(struct kvm *); | ||
147 | int (*map_resources)(struct kvm *, const struct vgic_params *); | ||
135 | }; | 148 | }; |
136 | 149 | ||
137 | struct vgic_dist { | 150 | struct vgic_dist { |
@@ -140,6 +153,9 @@ struct vgic_dist { | |||
140 | bool in_kernel; | 153 | bool in_kernel; |
141 | bool ready; | 154 | bool ready; |
142 | 155 | ||
156 | /* vGIC model the kernel emulates for the guest (GICv2 or GICv3) */ | ||
157 | u32 vgic_model; | ||
158 | |||
143 | int nr_cpus; | 159 | int nr_cpus; |
144 | int nr_irqs; | 160 | int nr_irqs; |
145 | 161 | ||
@@ -148,7 +164,11 @@ struct vgic_dist { | |||
148 | 164 | ||
149 | /* Distributor and vcpu interface mapping in the guest */ | 165 | /* Distributor and vcpu interface mapping in the guest */ |
150 | phys_addr_t vgic_dist_base; | 166 | phys_addr_t vgic_dist_base; |
151 | phys_addr_t vgic_cpu_base; | 167 | /* GICv2 and GICv3 use different mapped register blocks */ |
168 | union { | ||
169 | phys_addr_t vgic_cpu_base; | ||
170 | phys_addr_t vgic_redist_base; | ||
171 | }; | ||
152 | 172 | ||
153 | /* Distributor enabled */ | 173 | /* Distributor enabled */ |
154 | u32 enabled; | 174 | u32 enabled; |
@@ -210,8 +230,13 @@ struct vgic_dist { | |||
210 | */ | 230 | */ |
211 | struct vgic_bitmap *irq_spi_target; | 231 | struct vgic_bitmap *irq_spi_target; |
212 | 232 | ||
233 | /* Target MPIDR for each IRQ (needed for GICv3 IROUTERn) only */ | ||
234 | u32 *irq_spi_mpidr; | ||
235 | |||
213 | /* Bitmap indicating which CPU has something pending */ | 236 | /* Bitmap indicating which CPU has something pending */ |
214 | unsigned long *irq_pending_on_cpu; | 237 | unsigned long *irq_pending_on_cpu; |
238 | |||
239 | struct vgic_vm_ops vm_ops; | ||
215 | #endif | 240 | #endif |
216 | }; | 241 | }; |
217 | 242 | ||
@@ -229,6 +254,7 @@ struct vgic_v3_cpu_if { | |||
229 | #ifdef CONFIG_ARM_GIC_V3 | 254 | #ifdef CONFIG_ARM_GIC_V3 |
230 | u32 vgic_hcr; | 255 | u32 vgic_hcr; |
231 | u32 vgic_vmcr; | 256 | u32 vgic_vmcr; |
257 | u32 vgic_sre; /* Restored only, change ignored */ | ||
232 | u32 vgic_misr; /* Saved only */ | 258 | u32 vgic_misr; /* Saved only */ |
233 | u32 vgic_eisr; /* Saved only */ | 259 | u32 vgic_eisr; /* Saved only */ |
234 | u32 vgic_elrsr; /* Saved only */ | 260 | u32 vgic_elrsr; /* Saved only */ |
@@ -275,13 +301,15 @@ struct kvm_exit_mmio; | |||
275 | int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); | 301 | int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); |
276 | int kvm_vgic_hyp_init(void); | 302 | int kvm_vgic_hyp_init(void); |
277 | int kvm_vgic_map_resources(struct kvm *kvm); | 303 | int kvm_vgic_map_resources(struct kvm *kvm); |
278 | int kvm_vgic_create(struct kvm *kvm); | 304 | int kvm_vgic_get_max_vcpus(void); |
305 | int kvm_vgic_create(struct kvm *kvm, u32 type); | ||
279 | void kvm_vgic_destroy(struct kvm *kvm); | 306 | void kvm_vgic_destroy(struct kvm *kvm); |
280 | void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); | 307 | void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); |
281 | void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); | 308 | void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); |
282 | void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); | 309 | void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); |
283 | int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, | 310 | int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, |
284 | bool level); | 311 | bool level); |
312 | void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); | ||
285 | int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); | 313 | int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); |
286 | bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, | 314 | bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, |
287 | struct kvm_exit_mmio *mmio); | 315 | struct kvm_exit_mmio *mmio); |
@@ -327,7 +355,7 @@ static inline int kvm_vgic_map_resources(struct kvm *kvm) | |||
327 | return 0; | 355 | return 0; |
328 | } | 356 | } |
329 | 357 | ||
330 | static inline int kvm_vgic_create(struct kvm *kvm) | 358 | static inline int kvm_vgic_create(struct kvm *kvm, u32 type) |
331 | { | 359 | { |
332 | return 0; | 360 | return 0; |
333 | } | 361 | } |
@@ -379,6 +407,11 @@ static inline bool vgic_ready(struct kvm *kvm) | |||
379 | { | 407 | { |
380 | return true; | 408 | return true; |
381 | } | 409 | } |
410 | |||
411 | static inline int kvm_vgic_get_max_vcpus(void) | ||
412 | { | ||
413 | return KVM_MAX_VCPUS; | ||
414 | } | ||
382 | #endif | 415 | #endif |
383 | 416 | ||
384 | #endif | 417 | #endif |
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 1e8b0cf30792..800544bc7bfd 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h | |||
@@ -33,6 +33,7 @@ | |||
33 | #define GICD_SETSPI_SR 0x0050 | 33 | #define GICD_SETSPI_SR 0x0050 |
34 | #define GICD_CLRSPI_SR 0x0058 | 34 | #define GICD_CLRSPI_SR 0x0058 |
35 | #define GICD_SEIR 0x0068 | 35 | #define GICD_SEIR 0x0068 |
36 | #define GICD_IGROUPR 0x0080 | ||
36 | #define GICD_ISENABLER 0x0100 | 37 | #define GICD_ISENABLER 0x0100 |
37 | #define GICD_ICENABLER 0x0180 | 38 | #define GICD_ICENABLER 0x0180 |
38 | #define GICD_ISPENDR 0x0200 | 39 | #define GICD_ISPENDR 0x0200 |
@@ -41,14 +42,37 @@ | |||
41 | #define GICD_ICACTIVER 0x0380 | 42 | #define GICD_ICACTIVER 0x0380 |
42 | #define GICD_IPRIORITYR 0x0400 | 43 | #define GICD_IPRIORITYR 0x0400 |
43 | #define GICD_ICFGR 0x0C00 | 44 | #define GICD_ICFGR 0x0C00 |
45 | #define GICD_IGRPMODR 0x0D00 | ||
46 | #define GICD_NSACR 0x0E00 | ||
44 | #define GICD_IROUTER 0x6000 | 47 | #define GICD_IROUTER 0x6000 |
48 | #define GICD_IDREGS 0xFFD0 | ||
45 | #define GICD_PIDR2 0xFFE8 | 49 | #define GICD_PIDR2 0xFFE8 |
46 | 50 | ||
51 | /* | ||
52 | * Those registers are actually from GICv2, but the spec demands that they | ||
53 | * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3). | ||
54 | */ | ||
55 | #define GICD_ITARGETSR 0x0800 | ||
56 | #define GICD_SGIR 0x0F00 | ||
57 | #define GICD_CPENDSGIR 0x0F10 | ||
58 | #define GICD_SPENDSGIR 0x0F20 | ||
59 | |||
47 | #define GICD_CTLR_RWP (1U << 31) | 60 | #define GICD_CTLR_RWP (1U << 31) |
61 | #define GICD_CTLR_DS (1U << 6) | ||
48 | #define GICD_CTLR_ARE_NS (1U << 4) | 62 | #define GICD_CTLR_ARE_NS (1U << 4) |
49 | #define GICD_CTLR_ENABLE_G1A (1U << 1) | 63 | #define GICD_CTLR_ENABLE_G1A (1U << 1) |
50 | #define GICD_CTLR_ENABLE_G1 (1U << 0) | 64 | #define GICD_CTLR_ENABLE_G1 (1U << 0) |
51 | 65 | ||
66 | /* | ||
67 | * In systems with a single security state (what we emulate in KVM) | ||
68 | * the meaning of the interrupt group enable bits is slightly different | ||
69 | */ | ||
70 | #define GICD_CTLR_ENABLE_SS_G1 (1U << 1) | ||
71 | #define GICD_CTLR_ENABLE_SS_G0 (1U << 0) | ||
72 | |||
73 | #define GICD_TYPER_LPIS (1U << 17) | ||
74 | #define GICD_TYPER_MBIS (1U << 16) | ||
75 | |||
52 | #define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1) | 76 | #define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1) |
53 | #define GICD_TYPER_IRQS(typer) ((((typer) & 0x1f) + 1) * 32) | 77 | #define GICD_TYPER_IRQS(typer) ((((typer) & 0x1f) + 1) * 32) |
54 | #define GICD_TYPER_LPIS (1U << 17) | 78 | #define GICD_TYPER_LPIS (1U << 17) |
@@ -60,6 +84,8 @@ | |||
60 | #define GIC_PIDR2_ARCH_GICv3 0x30 | 84 | #define GIC_PIDR2_ARCH_GICv3 0x30 |
61 | #define GIC_PIDR2_ARCH_GICv4 0x40 | 85 | #define GIC_PIDR2_ARCH_GICv4 0x40 |
62 | 86 | ||
87 | #define GIC_V3_DIST_SIZE 0x10000 | ||
88 | |||
63 | /* | 89 | /* |
64 | * Re-Distributor registers, offsets from RD_base | 90 | * Re-Distributor registers, offsets from RD_base |
65 | */ | 91 | */ |
@@ -78,6 +104,7 @@ | |||
78 | #define GICR_SYNCR 0x00C0 | 104 | #define GICR_SYNCR 0x00C0 |
79 | #define GICR_MOVLPIR 0x0100 | 105 | #define GICR_MOVLPIR 0x0100 |
80 | #define GICR_MOVALLR 0x0110 | 106 | #define GICR_MOVALLR 0x0110 |
107 | #define GICR_IDREGS GICD_IDREGS | ||
81 | #define GICR_PIDR2 GICD_PIDR2 | 108 | #define GICR_PIDR2 GICD_PIDR2 |
82 | 109 | ||
83 | #define GICR_CTLR_ENABLE_LPIS (1UL << 0) | 110 | #define GICR_CTLR_ENABLE_LPIS (1UL << 0) |
@@ -104,6 +131,7 @@ | |||
104 | /* | 131 | /* |
105 | * Re-Distributor registers, offsets from SGI_base | 132 | * Re-Distributor registers, offsets from SGI_base |
106 | */ | 133 | */ |
134 | #define GICR_IGROUPR0 GICD_IGROUPR | ||
107 | #define GICR_ISENABLER0 GICD_ISENABLER | 135 | #define GICR_ISENABLER0 GICD_ISENABLER |
108 | #define GICR_ICENABLER0 GICD_ICENABLER | 136 | #define GICR_ICENABLER0 GICD_ICENABLER |
109 | #define GICR_ISPENDR0 GICD_ISPENDR | 137 | #define GICR_ISPENDR0 GICD_ISPENDR |
@@ -112,11 +140,15 @@ | |||
112 | #define GICR_ICACTIVER0 GICD_ICACTIVER | 140 | #define GICR_ICACTIVER0 GICD_ICACTIVER |
113 | #define GICR_IPRIORITYR0 GICD_IPRIORITYR | 141 | #define GICR_IPRIORITYR0 GICD_IPRIORITYR |
114 | #define GICR_ICFGR0 GICD_ICFGR | 142 | #define GICR_ICFGR0 GICD_ICFGR |
143 | #define GICR_IGRPMODR0 GICD_IGRPMODR | ||
144 | #define GICR_NSACR GICD_NSACR | ||
115 | 145 | ||
116 | #define GICR_TYPER_PLPIS (1U << 0) | 146 | #define GICR_TYPER_PLPIS (1U << 0) |
117 | #define GICR_TYPER_VLPIS (1U << 1) | 147 | #define GICR_TYPER_VLPIS (1U << 1) |
118 | #define GICR_TYPER_LAST (1U << 4) | 148 | #define GICR_TYPER_LAST (1U << 4) |
119 | 149 | ||
150 | #define GIC_V3_REDIST_SIZE 0x20000 | ||
151 | |||
120 | #define LPI_PROP_GROUP1 (1 << 1) | 152 | #define LPI_PROP_GROUP1 (1 << 1) |
121 | #define LPI_PROP_ENABLED (1 << 0) | 153 | #define LPI_PROP_ENABLED (1 << 0) |
122 | 154 | ||
@@ -248,6 +280,18 @@ | |||
248 | #define ICC_SRE_EL2_SRE (1 << 0) | 280 | #define ICC_SRE_EL2_SRE (1 << 0) |
249 | #define ICC_SRE_EL2_ENABLE (1 << 3) | 281 | #define ICC_SRE_EL2_ENABLE (1 << 3) |
250 | 282 | ||
283 | #define ICC_SGI1R_TARGET_LIST_SHIFT 0 | ||
284 | #define ICC_SGI1R_TARGET_LIST_MASK (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT) | ||
285 | #define ICC_SGI1R_AFFINITY_1_SHIFT 16 | ||
286 | #define ICC_SGI1R_AFFINITY_1_MASK (0xff << ICC_SGI1R_AFFINITY_1_SHIFT) | ||
287 | #define ICC_SGI1R_SGI_ID_SHIFT 24 | ||
288 | #define ICC_SGI1R_SGI_ID_MASK (0xff << ICC_SGI1R_SGI_ID_SHIFT) | ||
289 | #define ICC_SGI1R_AFFINITY_2_SHIFT 32 | ||
290 | #define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT) | ||
291 | #define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40 | ||
292 | #define ICC_SGI1R_AFFINITY_3_SHIFT 48 | ||
293 | #define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT) | ||
294 | |||
251 | /* | 295 | /* |
252 | * System register definitions | 296 | * System register definitions |
253 | */ | 297 | */ |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d189ee098aa2..d12b2104d19b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -33,10 +33,6 @@ | |||
33 | 33 | ||
34 | #include <asm/kvm_host.h> | 34 | #include <asm/kvm_host.h> |
35 | 35 | ||
36 | #ifndef KVM_MMIO_SIZE | ||
37 | #define KVM_MMIO_SIZE 8 | ||
38 | #endif | ||
39 | |||
40 | /* | 36 | /* |
41 | * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used | 37 | * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used |
42 | * in kvm, other bits are visible for userspace which are defined in | 38 | * in kvm, other bits are visible for userspace which are defined in |
@@ -600,6 +596,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext); | |||
600 | 596 | ||
601 | int kvm_get_dirty_log(struct kvm *kvm, | 597 | int kvm_get_dirty_log(struct kvm *kvm, |
602 | struct kvm_dirty_log *log, int *is_dirty); | 598 | struct kvm_dirty_log *log, int *is_dirty); |
599 | |||
600 | int kvm_get_dirty_log_protect(struct kvm *kvm, | ||
601 | struct kvm_dirty_log *log, bool *is_dirty); | ||
602 | |||
603 | void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, | ||
604 | struct kvm_memory_slot *slot, | ||
605 | gfn_t gfn_offset, | ||
606 | unsigned long mask); | ||
607 | |||
603 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | 608 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, |
604 | struct kvm_dirty_log *log); | 609 | struct kvm_dirty_log *log); |
605 | 610 | ||
@@ -641,7 +646,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); | |||
641 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); | 646 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); |
642 | struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id); | 647 | struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id); |
643 | int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); | 648 | int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); |
644 | int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); | 649 | void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); |
645 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); | 650 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); |
646 | 651 | ||
647 | int kvm_arch_hardware_enable(void); | 652 | int kvm_arch_hardware_enable(void); |
@@ -1031,6 +1036,8 @@ void kvm_unregister_device_ops(u32 type); | |||
1031 | 1036 | ||
1032 | extern struct kvm_device_ops kvm_mpic_ops; | 1037 | extern struct kvm_device_ops kvm_mpic_ops; |
1033 | extern struct kvm_device_ops kvm_xics_ops; | 1038 | extern struct kvm_device_ops kvm_xics_ops; |
1039 | extern struct kvm_device_ops kvm_arm_vgic_v2_ops; | ||
1040 | extern struct kvm_device_ops kvm_arm_vgic_v3_ops; | ||
1034 | 1041 | ||
1035 | #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT | 1042 | #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT |
1036 | 1043 | ||
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 86b399c66c3d..a44062da684b 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h | |||
@@ -37,6 +37,25 @@ TRACE_EVENT(kvm_userspace_exit, | |||
37 | __entry->errno < 0 ? -__entry->errno : __entry->reason) | 37 | __entry->errno < 0 ? -__entry->errno : __entry->reason) |
38 | ); | 38 | ); |
39 | 39 | ||
40 | TRACE_EVENT(kvm_vcpu_wakeup, | ||
41 | TP_PROTO(__u64 ns, bool waited), | ||
42 | TP_ARGS(ns, waited), | ||
43 | |||
44 | TP_STRUCT__entry( | ||
45 | __field( __u64, ns ) | ||
46 | __field( bool, waited ) | ||
47 | ), | ||
48 | |||
49 | TP_fast_assign( | ||
50 | __entry->ns = ns; | ||
51 | __entry->waited = waited; | ||
52 | ), | ||
53 | |||
54 | TP_printk("%s time %lld ns", | ||
55 | __entry->waited ? "wait" : "poll", | ||
56 | __entry->ns) | ||
57 | ); | ||
58 | |||
40 | #if defined(CONFIG_HAVE_KVM_IRQFD) | 59 | #if defined(CONFIG_HAVE_KVM_IRQFD) |
41 | TRACE_EVENT(kvm_set_irq, | 60 | TRACE_EVENT(kvm_set_irq, |
42 | TP_PROTO(unsigned int gsi, int level, int irq_source_id), | 61 | TP_PROTO(unsigned int gsi, int level, int irq_source_id), |
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a37fd1224f36..805570650062 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h | |||
@@ -491,6 +491,11 @@ struct kvm_s390_emerg_info { | |||
491 | __u16 code; | 491 | __u16 code; |
492 | }; | 492 | }; |
493 | 493 | ||
494 | #define KVM_S390_STOP_FLAG_STORE_STATUS 0x01 | ||
495 | struct kvm_s390_stop_info { | ||
496 | __u32 flags; | ||
497 | }; | ||
498 | |||
494 | struct kvm_s390_mchk_info { | 499 | struct kvm_s390_mchk_info { |
495 | __u64 cr14; | 500 | __u64 cr14; |
496 | __u64 mcic; | 501 | __u64 mcic; |
@@ -509,6 +514,7 @@ struct kvm_s390_irq { | |||
509 | struct kvm_s390_emerg_info emerg; | 514 | struct kvm_s390_emerg_info emerg; |
510 | struct kvm_s390_extcall_info extcall; | 515 | struct kvm_s390_extcall_info extcall; |
511 | struct kvm_s390_prefix_info prefix; | 516 | struct kvm_s390_prefix_info prefix; |
517 | struct kvm_s390_stop_info stop; | ||
512 | struct kvm_s390_mchk_info mchk; | 518 | struct kvm_s390_mchk_info mchk; |
513 | char reserved[64]; | 519 | char reserved[64]; |
514 | } u; | 520 | } u; |
@@ -753,6 +759,7 @@ struct kvm_ppc_smmu_info { | |||
753 | #define KVM_CAP_PPC_FIXUP_HCALL 103 | 759 | #define KVM_CAP_PPC_FIXUP_HCALL 103 |
754 | #define KVM_CAP_PPC_ENABLE_HCALL 104 | 760 | #define KVM_CAP_PPC_ENABLE_HCALL 104 |
755 | #define KVM_CAP_CHECK_EXTENSION_VM 105 | 761 | #define KVM_CAP_CHECK_EXTENSION_VM 105 |
762 | #define KVM_CAP_S390_USER_SIGP 106 | ||
756 | 763 | ||
757 | #ifdef KVM_CAP_IRQ_ROUTING | 764 | #ifdef KVM_CAP_IRQ_ROUTING |
758 | 765 | ||
@@ -952,6 +959,8 @@ enum kvm_device_type { | |||
952 | #define KVM_DEV_TYPE_ARM_VGIC_V2 KVM_DEV_TYPE_ARM_VGIC_V2 | 959 | #define KVM_DEV_TYPE_ARM_VGIC_V2 KVM_DEV_TYPE_ARM_VGIC_V2 |
953 | KVM_DEV_TYPE_FLIC, | 960 | KVM_DEV_TYPE_FLIC, |
954 | #define KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_FLIC | 961 | #define KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_FLIC |
962 | KVM_DEV_TYPE_ARM_VGIC_V3, | ||
963 | #define KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_V3 | ||
955 | KVM_DEV_TYPE_MAX, | 964 | KVM_DEV_TYPE_MAX, |
956 | }; | 965 | }; |
957 | 966 | ||
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index fc0c5e603eb4..e2c876d5a03b 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig | |||
@@ -37,3 +37,13 @@ config HAVE_KVM_CPU_RELAX_INTERCEPT | |||
37 | 37 | ||
38 | config KVM_VFIO | 38 | config KVM_VFIO |
39 | bool | 39 | bool |
40 | |||
41 | config HAVE_KVM_ARCH_TLB_FLUSH_ALL | ||
42 | bool | ||
43 | |||
44 | config KVM_GENERIC_DIRTYLOG_READ_PROTECT | ||
45 | bool | ||
46 | |||
47 | config KVM_COMPAT | ||
48 | def_bool y | ||
49 | depends on COMPAT && !S390 | ||
diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c new file mode 100644 index 000000000000..19c6210f02cf --- /dev/null +++ b/virt/kvm/arm/vgic-v2-emul.c | |||
@@ -0,0 +1,847 @@ | |||
1 | /* | ||
2 | * Contains GICv2 specific emulation code, was in vgic.c before. | ||
3 | * | ||
4 | * Copyright (C) 2012 ARM Ltd. | ||
5 | * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
18 | */ | ||
19 | |||
20 | #include <linux/cpu.h> | ||
21 | #include <linux/kvm.h> | ||
22 | #include <linux/kvm_host.h> | ||
23 | #include <linux/interrupt.h> | ||
24 | #include <linux/io.h> | ||
25 | #include <linux/uaccess.h> | ||
26 | |||
27 | #include <linux/irqchip/arm-gic.h> | ||
28 | |||
29 | #include <asm/kvm_emulate.h> | ||
30 | #include <asm/kvm_arm.h> | ||
31 | #include <asm/kvm_mmu.h> | ||
32 | |||
33 | #include "vgic.h" | ||
34 | |||
35 | #define GICC_ARCH_VERSION_V2 0x2 | ||
36 | |||
37 | static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); | ||
38 | static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi) | ||
39 | { | ||
40 | return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi; | ||
41 | } | ||
42 | |||
43 | static bool handle_mmio_misc(struct kvm_vcpu *vcpu, | ||
44 | struct kvm_exit_mmio *mmio, phys_addr_t offset) | ||
45 | { | ||
46 | u32 reg; | ||
47 | u32 word_offset = offset & 3; | ||
48 | |||
49 | switch (offset & ~3) { | ||
50 | case 0: /* GICD_CTLR */ | ||
51 | reg = vcpu->kvm->arch.vgic.enabled; | ||
52 | vgic_reg_access(mmio, ®, word_offset, | ||
53 | ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); | ||
54 | if (mmio->is_write) { | ||
55 | vcpu->kvm->arch.vgic.enabled = reg & 1; | ||
56 | vgic_update_state(vcpu->kvm); | ||
57 | return true; | ||
58 | } | ||
59 | break; | ||
60 | |||
61 | case 4: /* GICD_TYPER */ | ||
62 | reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; | ||
63 | reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1; | ||
64 | vgic_reg_access(mmio, ®, word_offset, | ||
65 | ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); | ||
66 | break; | ||
67 | |||
68 | case 8: /* GICD_IIDR */ | ||
69 | reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); | ||
70 | vgic_reg_access(mmio, ®, word_offset, | ||
71 | ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); | ||
72 | break; | ||
73 | } | ||
74 | |||
75 | return false; | ||
76 | } | ||
77 | |||
78 | static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu, | ||
79 | struct kvm_exit_mmio *mmio, | ||
80 | phys_addr_t offset) | ||
81 | { | ||
82 | return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, | ||
83 | vcpu->vcpu_id, ACCESS_WRITE_SETBIT); | ||
84 | } | ||
85 | |||
86 | static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu, | ||
87 | struct kvm_exit_mmio *mmio, | ||
88 | phys_addr_t offset) | ||
89 | { | ||
90 | return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, | ||
91 | vcpu->vcpu_id, ACCESS_WRITE_CLEARBIT); | ||
92 | } | ||
93 | |||
94 | static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, | ||
95 | struct kvm_exit_mmio *mmio, | ||
96 | phys_addr_t offset) | ||
97 | { | ||
98 | return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset, | ||
99 | vcpu->vcpu_id); | ||
100 | } | ||
101 | |||
102 | static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, | ||
103 | struct kvm_exit_mmio *mmio, | ||
104 | phys_addr_t offset) | ||
105 | { | ||
106 | return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset, | ||
107 | vcpu->vcpu_id); | ||
108 | } | ||
109 | |||
110 | static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, | ||
111 | struct kvm_exit_mmio *mmio, | ||
112 | phys_addr_t offset) | ||
113 | { | ||
114 | u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority, | ||
115 | vcpu->vcpu_id, offset); | ||
116 | vgic_reg_access(mmio, reg, offset, | ||
117 | ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); | ||
118 | return false; | ||
119 | } | ||
120 | |||
121 | #define GICD_ITARGETSR_SIZE 32 | ||
122 | #define GICD_CPUTARGETS_BITS 8 | ||
123 | #define GICD_IRQS_PER_ITARGETSR (GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS) | ||
124 | static u32 vgic_get_target_reg(struct kvm *kvm, int irq) | ||
125 | { | ||
126 | struct vgic_dist *dist = &kvm->arch.vgic; | ||
127 | int i; | ||
128 | u32 val = 0; | ||
129 | |||
130 | irq -= VGIC_NR_PRIVATE_IRQS; | ||
131 | |||
132 | for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) | ||
133 | val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8); | ||
134 | |||
135 | return val; | ||
136 | } | ||
137 | |||
138 | static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq) | ||
139 | { | ||
140 | struct vgic_dist *dist = &kvm->arch.vgic; | ||
141 | struct kvm_vcpu *vcpu; | ||
142 | int i, c; | ||
143 | unsigned long *bmap; | ||
144 | u32 target; | ||
145 | |||
146 | irq -= VGIC_NR_PRIVATE_IRQS; | ||
147 | |||
148 | /* | ||
149 | * Pick the LSB in each byte. This ensures we target exactly | ||
150 | * one vcpu per IRQ. If the byte is null, assume we target | ||
151 | * CPU0. | ||
152 | */ | ||
153 | for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) { | ||
154 | int shift = i * GICD_CPUTARGETS_BITS; | ||
155 | |||
156 | target = ffs((val >> shift) & 0xffU); | ||
157 | target = target ? (target - 1) : 0; | ||
158 | dist->irq_spi_cpu[irq + i] = target; | ||
159 | kvm_for_each_vcpu(c, vcpu, kvm) { | ||
160 | bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]); | ||
161 | if (c == target) | ||
162 | set_bit(irq + i, bmap); | ||
163 | else | ||
164 | clear_bit(irq + i, bmap); | ||
165 | } | ||
166 | } | ||
167 | } | ||
168 | |||
169 | static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu, | ||
170 | struct kvm_exit_mmio *mmio, | ||
171 | phys_addr_t offset) | ||
172 | { | ||
173 | u32 reg; | ||
174 | |||
175 | /* We treat the banked interrupts targets as read-only */ | ||
176 | if (offset < 32) { | ||
177 | u32 roreg; | ||
178 | |||
179 | roreg = 1 << vcpu->vcpu_id; | ||
180 | roreg |= roreg << 8; | ||
181 | roreg |= roreg << 16; | ||
182 | |||
183 | vgic_reg_access(mmio, &roreg, offset, | ||
184 | ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); | ||
185 | return false; | ||
186 | } | ||
187 | |||
188 | reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U); | ||
189 | vgic_reg_access(mmio, ®, offset, | ||
190 | ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); | ||
191 | if (mmio->is_write) { | ||
192 | vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U); | ||
193 | vgic_update_state(vcpu->kvm); | ||
194 | return true; | ||
195 | } | ||
196 | |||
197 | return false; | ||
198 | } | ||
199 | |||
200 | static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, | ||
201 | struct kvm_exit_mmio *mmio, phys_addr_t offset) | ||
202 | { | ||
203 | u32 *reg; | ||
204 | |||
205 | reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, | ||
206 | vcpu->vcpu_id, offset >> 1); | ||
207 | |||
208 | return vgic_handle_cfg_reg(reg, mmio, offset); | ||
209 | } | ||
210 | |||
211 | static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, | ||
212 | struct kvm_exit_mmio *mmio, phys_addr_t offset) | ||
213 | { | ||
214 | u32 reg; | ||
215 | |||
216 | vgic_reg_access(mmio, ®, offset, | ||
217 | ACCESS_READ_RAZ | ACCESS_WRITE_VALUE); | ||
218 | if (mmio->is_write) { | ||
219 | vgic_dispatch_sgi(vcpu, reg); | ||
220 | vgic_update_state(vcpu->kvm); | ||
221 | return true; | ||
222 | } | ||
223 | |||
224 | return false; | ||
225 | } | ||
226 | |||
227 | /* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */ | ||
228 | static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, | ||
229 | struct kvm_exit_mmio *mmio, | ||
230 | phys_addr_t offset) | ||
231 | { | ||
232 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
233 | int sgi; | ||
234 | int min_sgi = (offset & ~0x3); | ||
235 | int max_sgi = min_sgi + 3; | ||
236 | int vcpu_id = vcpu->vcpu_id; | ||
237 | u32 reg = 0; | ||
238 | |||
239 | /* Copy source SGIs from distributor side */ | ||
240 | for (sgi = min_sgi; sgi <= max_sgi; sgi++) { | ||
241 | u8 sources = *vgic_get_sgi_sources(dist, vcpu_id, sgi); | ||
242 | |||
243 | reg |= ((u32)sources) << (8 * (sgi - min_sgi)); | ||
244 | } | ||
245 | |||
246 | mmio_data_write(mmio, ~0, reg); | ||
247 | return false; | ||
248 | } | ||
249 | |||
250 | static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, | ||
251 | struct kvm_exit_mmio *mmio, | ||
252 | phys_addr_t offset, bool set) | ||
253 | { | ||
254 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
255 | int sgi; | ||
256 | int min_sgi = (offset & ~0x3); | ||
257 | int max_sgi = min_sgi + 3; | ||
258 | int vcpu_id = vcpu->vcpu_id; | ||
259 | u32 reg; | ||
260 | bool updated = false; | ||
261 | |||
262 | reg = mmio_data_read(mmio, ~0); | ||
263 | |||
264 | /* Clear pending SGIs on the distributor */ | ||
265 | for (sgi = min_sgi; sgi <= max_sgi; sgi++) { | ||
266 | u8 mask = reg >> (8 * (sgi - min_sgi)); | ||
267 | u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi); | ||
268 | |||
269 | if (set) { | ||
270 | if ((*src & mask) != mask) | ||
271 | updated = true; | ||
272 | *src |= mask; | ||
273 | } else { | ||
274 | if (*src & mask) | ||
275 | updated = true; | ||
276 | *src &= ~mask; | ||
277 | } | ||
278 | } | ||
279 | |||
280 | if (updated) | ||
281 | vgic_update_state(vcpu->kvm); | ||
282 | |||
283 | return updated; | ||
284 | } | ||
285 | |||
286 | static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu, | ||
287 | struct kvm_exit_mmio *mmio, | ||
288 | phys_addr_t offset) | ||
289 | { | ||
290 | if (!mmio->is_write) | ||
291 | return read_set_clear_sgi_pend_reg(vcpu, mmio, offset); | ||
292 | else | ||
293 | return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true); | ||
294 | } | ||
295 | |||
296 | static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, | ||
297 | struct kvm_exit_mmio *mmio, | ||
298 | phys_addr_t offset) | ||
299 | { | ||
300 | if (!mmio->is_write) | ||
301 | return read_set_clear_sgi_pend_reg(vcpu, mmio, offset); | ||
302 | else | ||
303 | return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false); | ||
304 | } | ||
305 | |||
306 | static const struct kvm_mmio_range vgic_dist_ranges[] = { | ||
307 | { | ||
308 | .base = GIC_DIST_CTRL, | ||
309 | .len = 12, | ||
310 | .bits_per_irq = 0, | ||
311 | .handle_mmio = handle_mmio_misc, | ||
312 | }, | ||
313 | { | ||
314 | .base = GIC_DIST_IGROUP, | ||
315 | .len = VGIC_MAX_IRQS / 8, | ||
316 | .bits_per_irq = 1, | ||
317 | .handle_mmio = handle_mmio_raz_wi, | ||
318 | }, | ||
319 | { | ||
320 | .base = GIC_DIST_ENABLE_SET, | ||
321 | .len = VGIC_MAX_IRQS / 8, | ||
322 | .bits_per_irq = 1, | ||
323 | .handle_mmio = handle_mmio_set_enable_reg, | ||
324 | }, | ||
325 | { | ||
326 | .base = GIC_DIST_ENABLE_CLEAR, | ||
327 | .len = VGIC_MAX_IRQS / 8, | ||
328 | .bits_per_irq = 1, | ||
329 | .handle_mmio = handle_mmio_clear_enable_reg, | ||
330 | }, | ||
331 | { | ||
332 | .base = GIC_DIST_PENDING_SET, | ||
333 | .len = VGIC_MAX_IRQS / 8, | ||
334 | .bits_per_irq = 1, | ||
335 | .handle_mmio = handle_mmio_set_pending_reg, | ||
336 | }, | ||
337 | { | ||
338 | .base = GIC_DIST_PENDING_CLEAR, | ||
339 | .len = VGIC_MAX_IRQS / 8, | ||
340 | .bits_per_irq = 1, | ||
341 | .handle_mmio = handle_mmio_clear_pending_reg, | ||
342 | }, | ||
343 | { | ||
344 | .base = GIC_DIST_ACTIVE_SET, | ||
345 | .len = VGIC_MAX_IRQS / 8, | ||
346 | .bits_per_irq = 1, | ||
347 | .handle_mmio = handle_mmio_raz_wi, | ||
348 | }, | ||
349 | { | ||
350 | .base = GIC_DIST_ACTIVE_CLEAR, | ||
351 | .len = VGIC_MAX_IRQS / 8, | ||
352 | .bits_per_irq = 1, | ||
353 | .handle_mmio = handle_mmio_raz_wi, | ||
354 | }, | ||
355 | { | ||
356 | .base = GIC_DIST_PRI, | ||
357 | .len = VGIC_MAX_IRQS, | ||
358 | .bits_per_irq = 8, | ||
359 | .handle_mmio = handle_mmio_priority_reg, | ||
360 | }, | ||
361 | { | ||
362 | .base = GIC_DIST_TARGET, | ||
363 | .len = VGIC_MAX_IRQS, | ||
364 | .bits_per_irq = 8, | ||
365 | .handle_mmio = handle_mmio_target_reg, | ||
366 | }, | ||
367 | { | ||
368 | .base = GIC_DIST_CONFIG, | ||
369 | .len = VGIC_MAX_IRQS / 4, | ||
370 | .bits_per_irq = 2, | ||
371 | .handle_mmio = handle_mmio_cfg_reg, | ||
372 | }, | ||
373 | { | ||
374 | .base = GIC_DIST_SOFTINT, | ||
375 | .len = 4, | ||
376 | .handle_mmio = handle_mmio_sgi_reg, | ||
377 | }, | ||
378 | { | ||
379 | .base = GIC_DIST_SGI_PENDING_CLEAR, | ||
380 | .len = VGIC_NR_SGIS, | ||
381 | .handle_mmio = handle_mmio_sgi_clear, | ||
382 | }, | ||
383 | { | ||
384 | .base = GIC_DIST_SGI_PENDING_SET, | ||
385 | .len = VGIC_NR_SGIS, | ||
386 | .handle_mmio = handle_mmio_sgi_set, | ||
387 | }, | ||
388 | {} | ||
389 | }; | ||
390 | |||
391 | static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, | ||
392 | struct kvm_exit_mmio *mmio) | ||
393 | { | ||
394 | unsigned long base = vcpu->kvm->arch.vgic.vgic_dist_base; | ||
395 | |||
396 | if (!is_in_range(mmio->phys_addr, mmio->len, base, | ||
397 | KVM_VGIC_V2_DIST_SIZE)) | ||
398 | return false; | ||
399 | |||
400 | /* GICv2 does not support accesses wider than 32 bits */ | ||
401 | if (mmio->len > 4) { | ||
402 | kvm_inject_dabt(vcpu, mmio->phys_addr); | ||
403 | return true; | ||
404 | } | ||
405 | |||
406 | return vgic_handle_mmio_range(vcpu, run, mmio, vgic_dist_ranges, base); | ||
407 | } | ||
408 | |||
409 | static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) | ||
410 | { | ||
411 | struct kvm *kvm = vcpu->kvm; | ||
412 | struct vgic_dist *dist = &kvm->arch.vgic; | ||
413 | int nrcpus = atomic_read(&kvm->online_vcpus); | ||
414 | u8 target_cpus; | ||
415 | int sgi, mode, c, vcpu_id; | ||
416 | |||
417 | vcpu_id = vcpu->vcpu_id; | ||
418 | |||
419 | sgi = reg & 0xf; | ||
420 | target_cpus = (reg >> 16) & 0xff; | ||
421 | mode = (reg >> 24) & 3; | ||
422 | |||
423 | switch (mode) { | ||
424 | case 0: | ||
425 | if (!target_cpus) | ||
426 | return; | ||
427 | break; | ||
428 | |||
429 | case 1: | ||
430 | target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff; | ||
431 | break; | ||
432 | |||
433 | case 2: | ||
434 | target_cpus = 1 << vcpu_id; | ||
435 | break; | ||
436 | } | ||
437 | |||
438 | kvm_for_each_vcpu(c, vcpu, kvm) { | ||
439 | if (target_cpus & 1) { | ||
440 | /* Flag the SGI as pending */ | ||
441 | vgic_dist_irq_set_pending(vcpu, sgi); | ||
442 | *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id; | ||
443 | kvm_debug("SGI%d from CPU%d to CPU%d\n", | ||
444 | sgi, vcpu_id, c); | ||
445 | } | ||
446 | |||
447 | target_cpus >>= 1; | ||
448 | } | ||
449 | } | ||
450 | |||
451 | static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int irq) | ||
452 | { | ||
453 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
454 | unsigned long sources; | ||
455 | int vcpu_id = vcpu->vcpu_id; | ||
456 | int c; | ||
457 | |||
458 | sources = *vgic_get_sgi_sources(dist, vcpu_id, irq); | ||
459 | |||
460 | for_each_set_bit(c, &sources, dist->nr_cpus) { | ||
461 | if (vgic_queue_irq(vcpu, c, irq)) | ||
462 | clear_bit(c, &sources); | ||
463 | } | ||
464 | |||
465 | *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources; | ||
466 | |||
467 | /* | ||
468 | * If the sources bitmap has been cleared it means that we | ||
469 | * could queue all the SGIs onto link registers (see the | ||
470 | * clear_bit above), and therefore we are done with them in | ||
471 | * our emulated gic and can get rid of them. | ||
472 | */ | ||
473 | if (!sources) { | ||
474 | vgic_dist_irq_clear_pending(vcpu, irq); | ||
475 | vgic_cpu_irq_clear(vcpu, irq); | ||
476 | return true; | ||
477 | } | ||
478 | |||
479 | return false; | ||
480 | } | ||
481 | |||
482 | /** | ||
483 | * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs | ||
484 | * @kvm: pointer to the kvm struct | ||
485 | * | ||
486 | * Map the virtual CPU interface into the VM before running any VCPUs. We | ||
487 | * can't do this at creation time, because user space must first set the | ||
488 | * virtual CPU interface address in the guest physical address space. | ||
489 | */ | ||
490 | static int vgic_v2_map_resources(struct kvm *kvm, | ||
491 | const struct vgic_params *params) | ||
492 | { | ||
493 | int ret = 0; | ||
494 | |||
495 | if (!irqchip_in_kernel(kvm)) | ||
496 | return 0; | ||
497 | |||
498 | mutex_lock(&kvm->lock); | ||
499 | |||
500 | if (vgic_ready(kvm)) | ||
501 | goto out; | ||
502 | |||
503 | if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || | ||
504 | IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) { | ||
505 | kvm_err("Need to set vgic cpu and dist addresses first\n"); | ||
506 | ret = -ENXIO; | ||
507 | goto out; | ||
508 | } | ||
509 | |||
510 | /* | ||
511 | * Initialize the vgic if this hasn't already been done on demand by | ||
512 | * accessing the vgic state from userspace. | ||
513 | */ | ||
514 | ret = vgic_init(kvm); | ||
515 | if (ret) { | ||
516 | kvm_err("Unable to allocate maps\n"); | ||
517 | goto out; | ||
518 | } | ||
519 | |||
520 | ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, | ||
521 | params->vcpu_base, KVM_VGIC_V2_CPU_SIZE, | ||
522 | true); | ||
523 | if (ret) { | ||
524 | kvm_err("Unable to remap VGIC CPU to VCPU\n"); | ||
525 | goto out; | ||
526 | } | ||
527 | |||
528 | kvm->arch.vgic.ready = true; | ||
529 | out: | ||
530 | if (ret) | ||
531 | kvm_vgic_destroy(kvm); | ||
532 | mutex_unlock(&kvm->lock); | ||
533 | return ret; | ||
534 | } | ||
535 | |||
536 | static void vgic_v2_add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source) | ||
537 | { | ||
538 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
539 | |||
540 | *vgic_get_sgi_sources(dist, vcpu->vcpu_id, irq) |= 1 << source; | ||
541 | } | ||
542 | |||
543 | static int vgic_v2_init_model(struct kvm *kvm) | ||
544 | { | ||
545 | int i; | ||
546 | |||
547 | for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4) | ||
548 | vgic_set_target_reg(kvm, 0, i); | ||
549 | |||
550 | return 0; | ||
551 | } | ||
552 | |||
553 | void vgic_v2_init_emulation(struct kvm *kvm) | ||
554 | { | ||
555 | struct vgic_dist *dist = &kvm->arch.vgic; | ||
556 | |||
557 | dist->vm_ops.handle_mmio = vgic_v2_handle_mmio; | ||
558 | dist->vm_ops.queue_sgi = vgic_v2_queue_sgi; | ||
559 | dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source; | ||
560 | dist->vm_ops.init_model = vgic_v2_init_model; | ||
561 | dist->vm_ops.map_resources = vgic_v2_map_resources; | ||
562 | |||
563 | kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS; | ||
564 | } | ||
565 | |||
566 | static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu, | ||
567 | struct kvm_exit_mmio *mmio, phys_addr_t offset) | ||
568 | { | ||
569 | bool updated = false; | ||
570 | struct vgic_vmcr vmcr; | ||
571 | u32 *vmcr_field; | ||
572 | u32 reg; | ||
573 | |||
574 | vgic_get_vmcr(vcpu, &vmcr); | ||
575 | |||
576 | switch (offset & ~0x3) { | ||
577 | case GIC_CPU_CTRL: | ||
578 | vmcr_field = &vmcr.ctlr; | ||
579 | break; | ||
580 | case GIC_CPU_PRIMASK: | ||
581 | vmcr_field = &vmcr.pmr; | ||
582 | break; | ||
583 | case GIC_CPU_BINPOINT: | ||
584 | vmcr_field = &vmcr.bpr; | ||
585 | break; | ||
586 | case GIC_CPU_ALIAS_BINPOINT: | ||
587 | vmcr_field = &vmcr.abpr; | ||
588 | break; | ||
589 | default: | ||
590 | BUG(); | ||
591 | } | ||
592 | |||
593 | if (!mmio->is_write) { | ||
594 | reg = *vmcr_field; | ||
595 | mmio_data_write(mmio, ~0, reg); | ||
596 | } else { | ||
597 | reg = mmio_data_read(mmio, ~0); | ||
598 | if (reg != *vmcr_field) { | ||
599 | *vmcr_field = reg; | ||
600 | vgic_set_vmcr(vcpu, &vmcr); | ||
601 | updated = true; | ||
602 | } | ||
603 | } | ||
604 | return updated; | ||
605 | } | ||
606 | |||
607 | static bool handle_mmio_abpr(struct kvm_vcpu *vcpu, | ||
608 | struct kvm_exit_mmio *mmio, phys_addr_t offset) | ||
609 | { | ||
610 | return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT); | ||
611 | } | ||
612 | |||
613 | static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu, | ||
614 | struct kvm_exit_mmio *mmio, | ||
615 | phys_addr_t offset) | ||
616 | { | ||
617 | u32 reg; | ||
618 | |||
619 | if (mmio->is_write) | ||
620 | return false; | ||
621 | |||
622 | /* GICC_IIDR */ | ||
623 | reg = (PRODUCT_ID_KVM << 20) | | ||
624 | (GICC_ARCH_VERSION_V2 << 16) | | ||
625 | (IMPLEMENTER_ARM << 0); | ||
626 | mmio_data_write(mmio, ~0, reg); | ||
627 | return false; | ||
628 | } | ||
629 | |||
630 | /* | ||
631 | * CPU Interface Register accesses - these are not accessed by the VM, but by | ||
632 | * user space for saving and restoring VGIC state. | ||
633 | */ | ||
634 | static const struct kvm_mmio_range vgic_cpu_ranges[] = { | ||
635 | { | ||
636 | .base = GIC_CPU_CTRL, | ||
637 | .len = 12, | ||
638 | .handle_mmio = handle_cpu_mmio_misc, | ||
639 | }, | ||
640 | { | ||
641 | .base = GIC_CPU_ALIAS_BINPOINT, | ||
642 | .len = 4, | ||
643 | .handle_mmio = handle_mmio_abpr, | ||
644 | }, | ||
645 | { | ||
646 | .base = GIC_CPU_ACTIVEPRIO, | ||
647 | .len = 16, | ||
648 | .handle_mmio = handle_mmio_raz_wi, | ||
649 | }, | ||
650 | { | ||
651 | .base = GIC_CPU_IDENT, | ||
652 | .len = 4, | ||
653 | .handle_mmio = handle_cpu_mmio_ident, | ||
654 | }, | ||
655 | }; | ||
656 | |||
657 | static int vgic_attr_regs_access(struct kvm_device *dev, | ||
658 | struct kvm_device_attr *attr, | ||
659 | u32 *reg, bool is_write) | ||
660 | { | ||
661 | const struct kvm_mmio_range *r = NULL, *ranges; | ||
662 | phys_addr_t offset; | ||
663 | int ret, cpuid, c; | ||
664 | struct kvm_vcpu *vcpu, *tmp_vcpu; | ||
665 | struct vgic_dist *vgic; | ||
666 | struct kvm_exit_mmio mmio; | ||
667 | |||
668 | offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; | ||
669 | cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> | ||
670 | KVM_DEV_ARM_VGIC_CPUID_SHIFT; | ||
671 | |||
672 | mutex_lock(&dev->kvm->lock); | ||
673 | |||
674 | ret = vgic_init(dev->kvm); | ||
675 | if (ret) | ||
676 | goto out; | ||
677 | |||
678 | if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { | ||
679 | ret = -EINVAL; | ||
680 | goto out; | ||
681 | } | ||
682 | |||
683 | vcpu = kvm_get_vcpu(dev->kvm, cpuid); | ||
684 | vgic = &dev->kvm->arch.vgic; | ||
685 | |||
686 | mmio.len = 4; | ||
687 | mmio.is_write = is_write; | ||
688 | if (is_write) | ||
689 | mmio_data_write(&mmio, ~0, *reg); | ||
690 | switch (attr->group) { | ||
691 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
692 | mmio.phys_addr = vgic->vgic_dist_base + offset; | ||
693 | ranges = vgic_dist_ranges; | ||
694 | break; | ||
695 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: | ||
696 | mmio.phys_addr = vgic->vgic_cpu_base + offset; | ||
697 | ranges = vgic_cpu_ranges; | ||
698 | break; | ||
699 | default: | ||
700 | BUG(); | ||
701 | } | ||
702 | r = vgic_find_range(ranges, &mmio, offset); | ||
703 | |||
704 | if (unlikely(!r || !r->handle_mmio)) { | ||
705 | ret = -ENXIO; | ||
706 | goto out; | ||
707 | } | ||
708 | |||
709 | |||
710 | spin_lock(&vgic->lock); | ||
711 | |||
712 | /* | ||
713 | * Ensure that no other VCPU is running by checking the vcpu->cpu | ||
714 | * field. If no other VPCUs are running we can safely access the VGIC | ||
715 | * state, because even if another VPU is run after this point, that | ||
716 | * VCPU will not touch the vgic state, because it will block on | ||
717 | * getting the vgic->lock in kvm_vgic_sync_hwstate(). | ||
718 | */ | ||
719 | kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) { | ||
720 | if (unlikely(tmp_vcpu->cpu != -1)) { | ||
721 | ret = -EBUSY; | ||
722 | goto out_vgic_unlock; | ||
723 | } | ||
724 | } | ||
725 | |||
726 | /* | ||
727 | * Move all pending IRQs from the LRs on all VCPUs so the pending | ||
728 | * state can be properly represented in the register state accessible | ||
729 | * through this API. | ||
730 | */ | ||
731 | kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) | ||
732 | vgic_unqueue_irqs(tmp_vcpu); | ||
733 | |||
734 | offset -= r->base; | ||
735 | r->handle_mmio(vcpu, &mmio, offset); | ||
736 | |||
737 | if (!is_write) | ||
738 | *reg = mmio_data_read(&mmio, ~0); | ||
739 | |||
740 | ret = 0; | ||
741 | out_vgic_unlock: | ||
742 | spin_unlock(&vgic->lock); | ||
743 | out: | ||
744 | mutex_unlock(&dev->kvm->lock); | ||
745 | return ret; | ||
746 | } | ||
747 | |||
748 | static int vgic_v2_create(struct kvm_device *dev, u32 type) | ||
749 | { | ||
750 | return kvm_vgic_create(dev->kvm, type); | ||
751 | } | ||
752 | |||
753 | static void vgic_v2_destroy(struct kvm_device *dev) | ||
754 | { | ||
755 | kfree(dev); | ||
756 | } | ||
757 | |||
758 | static int vgic_v2_set_attr(struct kvm_device *dev, | ||
759 | struct kvm_device_attr *attr) | ||
760 | { | ||
761 | int ret; | ||
762 | |||
763 | ret = vgic_set_common_attr(dev, attr); | ||
764 | if (ret != -ENXIO) | ||
765 | return ret; | ||
766 | |||
767 | switch (attr->group) { | ||
768 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
769 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { | ||
770 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; | ||
771 | u32 reg; | ||
772 | |||
773 | if (get_user(reg, uaddr)) | ||
774 | return -EFAULT; | ||
775 | |||
776 | return vgic_attr_regs_access(dev, attr, ®, true); | ||
777 | } | ||
778 | |||
779 | } | ||
780 | |||
781 | return -ENXIO; | ||
782 | } | ||
783 | |||
784 | static int vgic_v2_get_attr(struct kvm_device *dev, | ||
785 | struct kvm_device_attr *attr) | ||
786 | { | ||
787 | int ret; | ||
788 | |||
789 | ret = vgic_get_common_attr(dev, attr); | ||
790 | if (ret != -ENXIO) | ||
791 | return ret; | ||
792 | |||
793 | switch (attr->group) { | ||
794 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
795 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { | ||
796 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; | ||
797 | u32 reg = 0; | ||
798 | |||
799 | ret = vgic_attr_regs_access(dev, attr, ®, false); | ||
800 | if (ret) | ||
801 | return ret; | ||
802 | return put_user(reg, uaddr); | ||
803 | } | ||
804 | |||
805 | } | ||
806 | |||
807 | return -ENXIO; | ||
808 | } | ||
809 | |||
810 | static int vgic_v2_has_attr(struct kvm_device *dev, | ||
811 | struct kvm_device_attr *attr) | ||
812 | { | ||
813 | phys_addr_t offset; | ||
814 | |||
815 | switch (attr->group) { | ||
816 | case KVM_DEV_ARM_VGIC_GRP_ADDR: | ||
817 | switch (attr->attr) { | ||
818 | case KVM_VGIC_V2_ADDR_TYPE_DIST: | ||
819 | case KVM_VGIC_V2_ADDR_TYPE_CPU: | ||
820 | return 0; | ||
821 | } | ||
822 | break; | ||
823 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
824 | offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; | ||
825 | return vgic_has_attr_regs(vgic_dist_ranges, offset); | ||
826 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: | ||
827 | offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; | ||
828 | return vgic_has_attr_regs(vgic_cpu_ranges, offset); | ||
829 | case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: | ||
830 | return 0; | ||
831 | case KVM_DEV_ARM_VGIC_GRP_CTRL: | ||
832 | switch (attr->attr) { | ||
833 | case KVM_DEV_ARM_VGIC_CTRL_INIT: | ||
834 | return 0; | ||
835 | } | ||
836 | } | ||
837 | return -ENXIO; | ||
838 | } | ||
839 | |||
840 | struct kvm_device_ops kvm_arm_vgic_v2_ops = { | ||
841 | .name = "kvm-arm-vgic-v2", | ||
842 | .create = vgic_v2_create, | ||
843 | .destroy = vgic_v2_destroy, | ||
844 | .set_attr = vgic_v2_set_attr, | ||
845 | .get_attr = vgic_v2_get_attr, | ||
846 | .has_attr = vgic_v2_has_attr, | ||
847 | }; | ||
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index 2935405ad22f..a0a7b5d1a070 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c | |||
@@ -229,12 +229,16 @@ int vgic_v2_probe(struct device_node *vgic_node, | |||
229 | goto out_unmap; | 229 | goto out_unmap; |
230 | } | 230 | } |
231 | 231 | ||
232 | vgic->can_emulate_gicv2 = true; | ||
233 | kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2); | ||
234 | |||
232 | vgic->vcpu_base = vcpu_res.start; | 235 | vgic->vcpu_base = vcpu_res.start; |
233 | 236 | ||
234 | kvm_info("%s@%llx IRQ%d\n", vgic_node->name, | 237 | kvm_info("%s@%llx IRQ%d\n", vgic_node->name, |
235 | vctrl_res.start, vgic->maint_irq); | 238 | vctrl_res.start, vgic->maint_irq); |
236 | 239 | ||
237 | vgic->type = VGIC_V2; | 240 | vgic->type = VGIC_V2; |
241 | vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS; | ||
238 | *ops = &vgic_v2_ops; | 242 | *ops = &vgic_v2_ops; |
239 | *params = vgic; | 243 | *params = vgic; |
240 | goto out; | 244 | goto out; |
diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c new file mode 100644 index 000000000000..b3f154631515 --- /dev/null +++ b/virt/kvm/arm/vgic-v3-emul.c | |||
@@ -0,0 +1,1036 @@ | |||
1 | /* | ||
2 | * GICv3 distributor and redistributor emulation | ||
3 | * | ||
4 | * GICv3 emulation is currently only supported on a GICv3 host (because | ||
5 | * we rely on the hardware's CPU interface virtualization support), but | ||
6 | * supports both hardware with or without the optional GICv2 backwards | ||
7 | * compatibility features. | ||
8 | * | ||
9 | * Limitations of the emulation: | ||
10 | * (RAZ/WI: read as zero, write ignore, RAO/WI: read as one, write ignore) | ||
11 | * - We do not support LPIs (yet). TYPER.LPIS is reported as 0 and is RAZ/WI. | ||
12 | * - We do not support the message based interrupts (MBIs) triggered by | ||
13 | * writes to the GICD_{SET,CLR}SPI_* registers. TYPER.MBIS is reported as 0. | ||
14 | * - We do not support the (optional) backwards compatibility feature. | ||
15 | * GICD_CTLR.ARE resets to 1 and is RAO/WI. If the _host_ GIC supports | ||
16 | * the compatiblity feature, you can use a GICv2 in the guest, though. | ||
17 | * - We only support a single security state. GICD_CTLR.DS is 1 and is RAO/WI. | ||
18 | * - Priorities are not emulated (same as the GICv2 emulation). Linux | ||
19 | * as a guest is fine with this, because it does not use priorities. | ||
20 | * - We only support Group1 interrupts. Again Linux uses only those. | ||
21 | * | ||
22 | * Copyright (C) 2014 ARM Ltd. | ||
23 | * Author: Andre Przywara <andre.przywara@arm.com> | ||
24 | * | ||
25 | * This program is free software; you can redistribute it and/or modify | ||
26 | * it under the terms of the GNU General Public License version 2 as | ||
27 | * published by the Free Software Foundation. | ||
28 | * | ||
29 | * This program is distributed in the hope that it will be useful, | ||
30 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
31 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
32 | * GNU General Public License for more details. | ||
33 | * | ||
34 | * You should have received a copy of the GNU General Public License | ||
35 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
36 | */ | ||
37 | |||
38 | #include <linux/cpu.h> | ||
39 | #include <linux/kvm.h> | ||
40 | #include <linux/kvm_host.h> | ||
41 | #include <linux/interrupt.h> | ||
42 | |||
43 | #include <linux/irqchip/arm-gic-v3.h> | ||
44 | #include <kvm/arm_vgic.h> | ||
45 | |||
46 | #include <asm/kvm_emulate.h> | ||
47 | #include <asm/kvm_arm.h> | ||
48 | #include <asm/kvm_mmu.h> | ||
49 | |||
50 | #include "vgic.h" | ||
51 | |||
52 | static bool handle_mmio_rao_wi(struct kvm_vcpu *vcpu, | ||
53 | struct kvm_exit_mmio *mmio, phys_addr_t offset) | ||
54 | { | ||
55 | u32 reg = 0xffffffff; | ||
56 | |||
57 | vgic_reg_access(mmio, ®, offset, | ||
58 | ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); | ||
59 | |||
60 | return false; | ||
61 | } | ||
62 | |||
63 | static bool handle_mmio_ctlr(struct kvm_vcpu *vcpu, | ||
64 | struct kvm_exit_mmio *mmio, phys_addr_t offset) | ||
65 | { | ||
66 | u32 reg = 0; | ||
67 | |||
68 | /* | ||
69 | * Force ARE and DS to 1, the guest cannot change this. | ||
70 | * For the time being we only support Group1 interrupts. | ||
71 | */ | ||
72 | if (vcpu->kvm->arch.vgic.enabled) | ||
73 | reg = GICD_CTLR_ENABLE_SS_G1; | ||
74 | reg |= GICD_CTLR_ARE_NS | GICD_CTLR_DS; | ||
75 | |||
76 | vgic_reg_access(mmio, ®, offset, | ||
77 | ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); | ||
78 | if (mmio->is_write) { | ||
79 | if (reg & GICD_CTLR_ENABLE_SS_G0) | ||
80 | kvm_info("guest tried to enable unsupported Group0 interrupts\n"); | ||
81 | vcpu->kvm->arch.vgic.enabled = !!(reg & GICD_CTLR_ENABLE_SS_G1); | ||
82 | vgic_update_state(vcpu->kvm); | ||
83 | return true; | ||
84 | } | ||
85 | return false; | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * As this implementation does not provide compatibility | ||
90 | * with GICv2 (ARE==1), we report zero CPUs in bits [5..7]. | ||
91 | * Also LPIs and MBIs are not supported, so we set the respective bits to 0. | ||
92 | * Also we report at most 2**10=1024 interrupt IDs (to match 1024 SPIs). | ||
93 | */ | ||
94 | #define INTERRUPT_ID_BITS 10 | ||
95 | static bool handle_mmio_typer(struct kvm_vcpu *vcpu, | ||
96 | struct kvm_exit_mmio *mmio, phys_addr_t offset) | ||
97 | { | ||
98 | u32 reg; | ||
99 | |||
100 | reg = (min(vcpu->kvm->arch.vgic.nr_irqs, 1024) >> 5) - 1; | ||
101 | |||
102 | reg |= (INTERRUPT_ID_BITS - 1) << 19; | ||
103 | |||
104 | vgic_reg_access(mmio, ®, offset, | ||
105 | ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); | ||
106 | |||
107 | return false; | ||
108 | } | ||
109 | |||
110 | static bool handle_mmio_iidr(struct kvm_vcpu *vcpu, | ||
111 | struct kvm_exit_mmio *mmio, phys_addr_t offset) | ||
112 | { | ||
113 | u32 reg; | ||
114 | |||
115 | reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); | ||
116 | vgic_reg_access(mmio, ®, offset, | ||
117 | ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); | ||
118 | |||
119 | return false; | ||
120 | } | ||
121 | |||
122 | static bool handle_mmio_set_enable_reg_dist(struct kvm_vcpu *vcpu, | ||
123 | struct kvm_exit_mmio *mmio, | ||
124 | phys_addr_t offset) | ||
125 | { | ||
126 | if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) | ||
127 | return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, | ||
128 | vcpu->vcpu_id, | ||
129 | ACCESS_WRITE_SETBIT); | ||
130 | |||
131 | vgic_reg_access(mmio, NULL, offset, | ||
132 | ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); | ||
133 | return false; | ||
134 | } | ||
135 | |||
136 | static bool handle_mmio_clear_enable_reg_dist(struct kvm_vcpu *vcpu, | ||
137 | struct kvm_exit_mmio *mmio, | ||
138 | phys_addr_t offset) | ||
139 | { | ||
140 | if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) | ||
141 | return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, | ||
142 | vcpu->vcpu_id, | ||
143 | ACCESS_WRITE_CLEARBIT); | ||
144 | |||
145 | vgic_reg_access(mmio, NULL, offset, | ||
146 | ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); | ||
147 | return false; | ||
148 | } | ||
149 | |||
150 | static bool handle_mmio_set_pending_reg_dist(struct kvm_vcpu *vcpu, | ||
151 | struct kvm_exit_mmio *mmio, | ||
152 | phys_addr_t offset) | ||
153 | { | ||
154 | if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) | ||
155 | return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset, | ||
156 | vcpu->vcpu_id); | ||
157 | |||
158 | vgic_reg_access(mmio, NULL, offset, | ||
159 | ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); | ||
160 | return false; | ||
161 | } | ||
162 | |||
163 | static bool handle_mmio_clear_pending_reg_dist(struct kvm_vcpu *vcpu, | ||
164 | struct kvm_exit_mmio *mmio, | ||
165 | phys_addr_t offset) | ||
166 | { | ||
167 | if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) | ||
168 | return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset, | ||
169 | vcpu->vcpu_id); | ||
170 | |||
171 | vgic_reg_access(mmio, NULL, offset, | ||
172 | ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); | ||
173 | return false; | ||
174 | } | ||
175 | |||
176 | static bool handle_mmio_priority_reg_dist(struct kvm_vcpu *vcpu, | ||
177 | struct kvm_exit_mmio *mmio, | ||
178 | phys_addr_t offset) | ||
179 | { | ||
180 | u32 *reg; | ||
181 | |||
182 | if (unlikely(offset < VGIC_NR_PRIVATE_IRQS)) { | ||
183 | vgic_reg_access(mmio, NULL, offset, | ||
184 | ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); | ||
185 | return false; | ||
186 | } | ||
187 | |||
188 | reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority, | ||
189 | vcpu->vcpu_id, offset); | ||
190 | vgic_reg_access(mmio, reg, offset, | ||
191 | ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); | ||
192 | return false; | ||
193 | } | ||
194 | |||
195 | static bool handle_mmio_cfg_reg_dist(struct kvm_vcpu *vcpu, | ||
196 | struct kvm_exit_mmio *mmio, | ||
197 | phys_addr_t offset) | ||
198 | { | ||
199 | u32 *reg; | ||
200 | |||
201 | if (unlikely(offset < VGIC_NR_PRIVATE_IRQS / 4)) { | ||
202 | vgic_reg_access(mmio, NULL, offset, | ||
203 | ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); | ||
204 | return false; | ||
205 | } | ||
206 | |||
207 | reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, | ||
208 | vcpu->vcpu_id, offset >> 1); | ||
209 | |||
210 | return vgic_handle_cfg_reg(reg, mmio, offset); | ||
211 | } | ||
212 | |||
213 | /* | ||
214 | * We use a compressed version of the MPIDR (all 32 bits in one 32-bit word) | ||
215 | * when we store the target MPIDR written by the guest. | ||
216 | */ | ||
217 | static u32 compress_mpidr(unsigned long mpidr) | ||
218 | { | ||
219 | u32 ret; | ||
220 | |||
221 | ret = MPIDR_AFFINITY_LEVEL(mpidr, 0); | ||
222 | ret |= MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8; | ||
223 | ret |= MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16; | ||
224 | ret |= MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24; | ||
225 | |||
226 | return ret; | ||
227 | } | ||
228 | |||
229 | static unsigned long uncompress_mpidr(u32 value) | ||
230 | { | ||
231 | unsigned long mpidr; | ||
232 | |||
233 | mpidr = ((value >> 0) & 0xFF) << MPIDR_LEVEL_SHIFT(0); | ||
234 | mpidr |= ((value >> 8) & 0xFF) << MPIDR_LEVEL_SHIFT(1); | ||
235 | mpidr |= ((value >> 16) & 0xFF) << MPIDR_LEVEL_SHIFT(2); | ||
236 | mpidr |= (u64)((value >> 24) & 0xFF) << MPIDR_LEVEL_SHIFT(3); | ||
237 | |||
238 | return mpidr; | ||
239 | } | ||
240 | |||
241 | /* | ||
242 | * Lookup the given MPIDR value to get the vcpu_id (if there is one) | ||
243 | * and store that in the irq_spi_cpu[] array. | ||
244 | * This limits the number of VCPUs to 255 for now, extending the data | ||
245 | * type (or storing kvm_vcpu pointers) should lift the limit. | ||
246 | * Store the original MPIDR value in an extra array to support read-as-written. | ||
247 | * Unallocated MPIDRs are translated to a special value and caught | ||
248 | * before any array accesses. | ||
249 | */ | ||
250 | static bool handle_mmio_route_reg(struct kvm_vcpu *vcpu, | ||
251 | struct kvm_exit_mmio *mmio, | ||
252 | phys_addr_t offset) | ||
253 | { | ||
254 | struct kvm *kvm = vcpu->kvm; | ||
255 | struct vgic_dist *dist = &kvm->arch.vgic; | ||
256 | int spi; | ||
257 | u32 reg; | ||
258 | int vcpu_id; | ||
259 | unsigned long *bmap, mpidr; | ||
260 | |||
261 | /* | ||
262 | * The upper 32 bits of each 64 bit register are zero, | ||
263 | * as we don't support Aff3. | ||
264 | */ | ||
265 | if ((offset & 4)) { | ||
266 | vgic_reg_access(mmio, NULL, offset, | ||
267 | ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); | ||
268 | return false; | ||
269 | } | ||
270 | |||
271 | /* This region only covers SPIs, so no handling of private IRQs here. */ | ||
272 | spi = offset / 8; | ||
273 | |||
274 | /* get the stored MPIDR for this IRQ */ | ||
275 | mpidr = uncompress_mpidr(dist->irq_spi_mpidr[spi]); | ||
276 | reg = mpidr; | ||
277 | |||
278 | vgic_reg_access(mmio, ®, offset, | ||
279 | ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); | ||
280 | |||
281 | if (!mmio->is_write) | ||
282 | return false; | ||
283 | |||
284 | /* | ||
285 | * Now clear the currently assigned vCPU from the map, making room | ||
286 | * for the new one to be written below | ||
287 | */ | ||
288 | vcpu = kvm_mpidr_to_vcpu(kvm, mpidr); | ||
289 | if (likely(vcpu)) { | ||
290 | vcpu_id = vcpu->vcpu_id; | ||
291 | bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]); | ||
292 | __clear_bit(spi, bmap); | ||
293 | } | ||
294 | |||
295 | dist->irq_spi_mpidr[spi] = compress_mpidr(reg); | ||
296 | vcpu = kvm_mpidr_to_vcpu(kvm, reg & MPIDR_HWID_BITMASK); | ||
297 | |||
298 | /* | ||
299 | * The spec says that non-existent MPIDR values should not be | ||
300 | * forwarded to any existent (v)CPU, but should be able to become | ||
301 | * pending anyway. We simply keep the irq_spi_target[] array empty, so | ||
302 | * the interrupt will never be injected. | ||
303 | * irq_spi_cpu[irq] gets a magic value in this case. | ||
304 | */ | ||
305 | if (likely(vcpu)) { | ||
306 | vcpu_id = vcpu->vcpu_id; | ||
307 | dist->irq_spi_cpu[spi] = vcpu_id; | ||
308 | bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]); | ||
309 | __set_bit(spi, bmap); | ||
310 | } else { | ||
311 | dist->irq_spi_cpu[spi] = VCPU_NOT_ALLOCATED; | ||
312 | } | ||
313 | |||
314 | vgic_update_state(kvm); | ||
315 | |||
316 | return true; | ||
317 | } | ||
318 | |||
319 | /* | ||
320 | * We should be careful about promising too much when a guest reads | ||
321 | * this register. Don't claim to be like any hardware implementation, | ||
322 | * but just report the GIC as version 3 - which is what a Linux guest | ||
323 | * would check. | ||
324 | */ | ||
325 | static bool handle_mmio_idregs(struct kvm_vcpu *vcpu, | ||
326 | struct kvm_exit_mmio *mmio, | ||
327 | phys_addr_t offset) | ||
328 | { | ||
329 | u32 reg = 0; | ||
330 | |||
331 | switch (offset + GICD_IDREGS) { | ||
332 | case GICD_PIDR2: | ||
333 | reg = 0x3b; | ||
334 | break; | ||
335 | } | ||
336 | |||
337 | vgic_reg_access(mmio, ®, offset, | ||
338 | ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); | ||
339 | |||
340 | return false; | ||
341 | } | ||
342 | |||
343 | static const struct kvm_mmio_range vgic_v3_dist_ranges[] = { | ||
344 | { | ||
345 | .base = GICD_CTLR, | ||
346 | .len = 0x04, | ||
347 | .bits_per_irq = 0, | ||
348 | .handle_mmio = handle_mmio_ctlr, | ||
349 | }, | ||
350 | { | ||
351 | .base = GICD_TYPER, | ||
352 | .len = 0x04, | ||
353 | .bits_per_irq = 0, | ||
354 | .handle_mmio = handle_mmio_typer, | ||
355 | }, | ||
356 | { | ||
357 | .base = GICD_IIDR, | ||
358 | .len = 0x04, | ||
359 | .bits_per_irq = 0, | ||
360 | .handle_mmio = handle_mmio_iidr, | ||
361 | }, | ||
362 | { | ||
363 | /* this register is optional, it is RAZ/WI if not implemented */ | ||
364 | .base = GICD_STATUSR, | ||
365 | .len = 0x04, | ||
366 | .bits_per_irq = 0, | ||
367 | .handle_mmio = handle_mmio_raz_wi, | ||
368 | }, | ||
369 | { | ||
370 | /* this write only register is WI when TYPER.MBIS=0 */ | ||
371 | .base = GICD_SETSPI_NSR, | ||
372 | .len = 0x04, | ||
373 | .bits_per_irq = 0, | ||
374 | .handle_mmio = handle_mmio_raz_wi, | ||
375 | }, | ||
376 | { | ||
377 | /* this write only register is WI when TYPER.MBIS=0 */ | ||
378 | .base = GICD_CLRSPI_NSR, | ||
379 | .len = 0x04, | ||
380 | .bits_per_irq = 0, | ||
381 | .handle_mmio = handle_mmio_raz_wi, | ||
382 | }, | ||
383 | { | ||
384 | /* this is RAZ/WI when DS=1 */ | ||
385 | .base = GICD_SETSPI_SR, | ||
386 | .len = 0x04, | ||
387 | .bits_per_irq = 0, | ||
388 | .handle_mmio = handle_mmio_raz_wi, | ||
389 | }, | ||
390 | { | ||
391 | /* this is RAZ/WI when DS=1 */ | ||
392 | .base = GICD_CLRSPI_SR, | ||
393 | .len = 0x04, | ||
394 | .bits_per_irq = 0, | ||
395 | .handle_mmio = handle_mmio_raz_wi, | ||
396 | }, | ||
397 | { | ||
398 | .base = GICD_IGROUPR, | ||
399 | .len = 0x80, | ||
400 | .bits_per_irq = 1, | ||
401 | .handle_mmio = handle_mmio_rao_wi, | ||
402 | }, | ||
403 | { | ||
404 | .base = GICD_ISENABLER, | ||
405 | .len = 0x80, | ||
406 | .bits_per_irq = 1, | ||
407 | .handle_mmio = handle_mmio_set_enable_reg_dist, | ||
408 | }, | ||
409 | { | ||
410 | .base = GICD_ICENABLER, | ||
411 | .len = 0x80, | ||
412 | .bits_per_irq = 1, | ||
413 | .handle_mmio = handle_mmio_clear_enable_reg_dist, | ||
414 | }, | ||
415 | { | ||
416 | .base = GICD_ISPENDR, | ||
417 | .len = 0x80, | ||
418 | .bits_per_irq = 1, | ||
419 | .handle_mmio = handle_mmio_set_pending_reg_dist, | ||
420 | }, | ||
421 | { | ||
422 | .base = GICD_ICPENDR, | ||
423 | .len = 0x80, | ||
424 | .bits_per_irq = 1, | ||
425 | .handle_mmio = handle_mmio_clear_pending_reg_dist, | ||
426 | }, | ||
427 | { | ||
428 | .base = GICD_ISACTIVER, | ||
429 | .len = 0x80, | ||
430 | .bits_per_irq = 1, | ||
431 | .handle_mmio = handle_mmio_raz_wi, | ||
432 | }, | ||
433 | { | ||
434 | .base = GICD_ICACTIVER, | ||
435 | .len = 0x80, | ||
436 | .bits_per_irq = 1, | ||
437 | .handle_mmio = handle_mmio_raz_wi, | ||
438 | }, | ||
439 | { | ||
440 | .base = GICD_IPRIORITYR, | ||
441 | .len = 0x400, | ||
442 | .bits_per_irq = 8, | ||
443 | .handle_mmio = handle_mmio_priority_reg_dist, | ||
444 | }, | ||
445 | { | ||
446 | /* TARGETSRn is RES0 when ARE=1 */ | ||
447 | .base = GICD_ITARGETSR, | ||
448 | .len = 0x400, | ||
449 | .bits_per_irq = 8, | ||
450 | .handle_mmio = handle_mmio_raz_wi, | ||
451 | }, | ||
452 | { | ||
453 | .base = GICD_ICFGR, | ||
454 | .len = 0x100, | ||
455 | .bits_per_irq = 2, | ||
456 | .handle_mmio = handle_mmio_cfg_reg_dist, | ||
457 | }, | ||
458 | { | ||
459 | /* this is RAZ/WI when DS=1 */ | ||
460 | .base = GICD_IGRPMODR, | ||
461 | .len = 0x80, | ||
462 | .bits_per_irq = 1, | ||
463 | .handle_mmio = handle_mmio_raz_wi, | ||
464 | }, | ||
465 | { | ||
466 | /* this is RAZ/WI when DS=1 */ | ||
467 | .base = GICD_NSACR, | ||
468 | .len = 0x100, | ||
469 | .bits_per_irq = 2, | ||
470 | .handle_mmio = handle_mmio_raz_wi, | ||
471 | }, | ||
472 | { | ||
473 | /* this is RAZ/WI when ARE=1 */ | ||
474 | .base = GICD_SGIR, | ||
475 | .len = 0x04, | ||
476 | .handle_mmio = handle_mmio_raz_wi, | ||
477 | }, | ||
478 | { | ||
479 | /* this is RAZ/WI when ARE=1 */ | ||
480 | .base = GICD_CPENDSGIR, | ||
481 | .len = 0x10, | ||
482 | .handle_mmio = handle_mmio_raz_wi, | ||
483 | }, | ||
484 | { | ||
485 | /* this is RAZ/WI when ARE=1 */ | ||
486 | .base = GICD_SPENDSGIR, | ||
487 | .len = 0x10, | ||
488 | .handle_mmio = handle_mmio_raz_wi, | ||
489 | }, | ||
490 | { | ||
491 | .base = GICD_IROUTER + 0x100, | ||
492 | .len = 0x1ee0, | ||
493 | .bits_per_irq = 64, | ||
494 | .handle_mmio = handle_mmio_route_reg, | ||
495 | }, | ||
496 | { | ||
497 | .base = GICD_IDREGS, | ||
498 | .len = 0x30, | ||
499 | .bits_per_irq = 0, | ||
500 | .handle_mmio = handle_mmio_idregs, | ||
501 | }, | ||
502 | {}, | ||
503 | }; | ||
504 | |||
505 | static bool handle_mmio_set_enable_reg_redist(struct kvm_vcpu *vcpu, | ||
506 | struct kvm_exit_mmio *mmio, | ||
507 | phys_addr_t offset) | ||
508 | { | ||
509 | struct kvm_vcpu *redist_vcpu = mmio->private; | ||
510 | |||
511 | return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, | ||
512 | redist_vcpu->vcpu_id, | ||
513 | ACCESS_WRITE_SETBIT); | ||
514 | } | ||
515 | |||
516 | static bool handle_mmio_clear_enable_reg_redist(struct kvm_vcpu *vcpu, | ||
517 | struct kvm_exit_mmio *mmio, | ||
518 | phys_addr_t offset) | ||
519 | { | ||
520 | struct kvm_vcpu *redist_vcpu = mmio->private; | ||
521 | |||
522 | return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, | ||
523 | redist_vcpu->vcpu_id, | ||
524 | ACCESS_WRITE_CLEARBIT); | ||
525 | } | ||
526 | |||
527 | static bool handle_mmio_set_pending_reg_redist(struct kvm_vcpu *vcpu, | ||
528 | struct kvm_exit_mmio *mmio, | ||
529 | phys_addr_t offset) | ||
530 | { | ||
531 | struct kvm_vcpu *redist_vcpu = mmio->private; | ||
532 | |||
533 | return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset, | ||
534 | redist_vcpu->vcpu_id); | ||
535 | } | ||
536 | |||
537 | static bool handle_mmio_clear_pending_reg_redist(struct kvm_vcpu *vcpu, | ||
538 | struct kvm_exit_mmio *mmio, | ||
539 | phys_addr_t offset) | ||
540 | { | ||
541 | struct kvm_vcpu *redist_vcpu = mmio->private; | ||
542 | |||
543 | return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset, | ||
544 | redist_vcpu->vcpu_id); | ||
545 | } | ||
546 | |||
547 | static bool handle_mmio_priority_reg_redist(struct kvm_vcpu *vcpu, | ||
548 | struct kvm_exit_mmio *mmio, | ||
549 | phys_addr_t offset) | ||
550 | { | ||
551 | struct kvm_vcpu *redist_vcpu = mmio->private; | ||
552 | u32 *reg; | ||
553 | |||
554 | reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority, | ||
555 | redist_vcpu->vcpu_id, offset); | ||
556 | vgic_reg_access(mmio, reg, offset, | ||
557 | ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); | ||
558 | return false; | ||
559 | } | ||
560 | |||
561 | static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu *vcpu, | ||
562 | struct kvm_exit_mmio *mmio, | ||
563 | phys_addr_t offset) | ||
564 | { | ||
565 | struct kvm_vcpu *redist_vcpu = mmio->private; | ||
566 | |||
567 | u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, | ||
568 | redist_vcpu->vcpu_id, offset >> 1); | ||
569 | |||
570 | return vgic_handle_cfg_reg(reg, mmio, offset); | ||
571 | } | ||
572 | |||
573 | static const struct kvm_mmio_range vgic_redist_sgi_ranges[] = { | ||
574 | { | ||
575 | .base = GICR_IGROUPR0, | ||
576 | .len = 0x04, | ||
577 | .bits_per_irq = 1, | ||
578 | .handle_mmio = handle_mmio_rao_wi, | ||
579 | }, | ||
580 | { | ||
581 | .base = GICR_ISENABLER0, | ||
582 | .len = 0x04, | ||
583 | .bits_per_irq = 1, | ||
584 | .handle_mmio = handle_mmio_set_enable_reg_redist, | ||
585 | }, | ||
586 | { | ||
587 | .base = GICR_ICENABLER0, | ||
588 | .len = 0x04, | ||
589 | .bits_per_irq = 1, | ||
590 | .handle_mmio = handle_mmio_clear_enable_reg_redist, | ||
591 | }, | ||
592 | { | ||
593 | .base = GICR_ISPENDR0, | ||
594 | .len = 0x04, | ||
595 | .bits_per_irq = 1, | ||
596 | .handle_mmio = handle_mmio_set_pending_reg_redist, | ||
597 | }, | ||
598 | { | ||
599 | .base = GICR_ICPENDR0, | ||
600 | .len = 0x04, | ||
601 | .bits_per_irq = 1, | ||
602 | .handle_mmio = handle_mmio_clear_pending_reg_redist, | ||
603 | }, | ||
604 | { | ||
605 | .base = GICR_ISACTIVER0, | ||
606 | .len = 0x04, | ||
607 | .bits_per_irq = 1, | ||
608 | .handle_mmio = handle_mmio_raz_wi, | ||
609 | }, | ||
610 | { | ||
611 | .base = GICR_ICACTIVER0, | ||
612 | .len = 0x04, | ||
613 | .bits_per_irq = 1, | ||
614 | .handle_mmio = handle_mmio_raz_wi, | ||
615 | }, | ||
616 | { | ||
617 | .base = GICR_IPRIORITYR0, | ||
618 | .len = 0x20, | ||
619 | .bits_per_irq = 8, | ||
620 | .handle_mmio = handle_mmio_priority_reg_redist, | ||
621 | }, | ||
622 | { | ||
623 | .base = GICR_ICFGR0, | ||
624 | .len = 0x08, | ||
625 | .bits_per_irq = 2, | ||
626 | .handle_mmio = handle_mmio_cfg_reg_redist, | ||
627 | }, | ||
628 | { | ||
629 | .base = GICR_IGRPMODR0, | ||
630 | .len = 0x04, | ||
631 | .bits_per_irq = 1, | ||
632 | .handle_mmio = handle_mmio_raz_wi, | ||
633 | }, | ||
634 | { | ||
635 | .base = GICR_NSACR, | ||
636 | .len = 0x04, | ||
637 | .handle_mmio = handle_mmio_raz_wi, | ||
638 | }, | ||
639 | {}, | ||
640 | }; | ||
641 | |||
642 | static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu, | ||
643 | struct kvm_exit_mmio *mmio, | ||
644 | phys_addr_t offset) | ||
645 | { | ||
646 | /* since we don't support LPIs, this register is zero for now */ | ||
647 | vgic_reg_access(mmio, NULL, offset, | ||
648 | ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); | ||
649 | return false; | ||
650 | } | ||
651 | |||
652 | static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu, | ||
653 | struct kvm_exit_mmio *mmio, | ||
654 | phys_addr_t offset) | ||
655 | { | ||
656 | u32 reg; | ||
657 | u64 mpidr; | ||
658 | struct kvm_vcpu *redist_vcpu = mmio->private; | ||
659 | int target_vcpu_id = redist_vcpu->vcpu_id; | ||
660 | |||
661 | /* the upper 32 bits contain the affinity value */ | ||
662 | if ((offset & ~3) == 4) { | ||
663 | mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu); | ||
664 | reg = compress_mpidr(mpidr); | ||
665 | |||
666 | vgic_reg_access(mmio, ®, offset, | ||
667 | ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); | ||
668 | return false; | ||
669 | } | ||
670 | |||
671 | reg = redist_vcpu->vcpu_id << 8; | ||
672 | if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) | ||
673 | reg |= GICR_TYPER_LAST; | ||
674 | vgic_reg_access(mmio, ®, offset, | ||
675 | ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); | ||
676 | return false; | ||
677 | } | ||
678 | |||
679 | static const struct kvm_mmio_range vgic_redist_ranges[] = { | ||
680 | { | ||
681 | .base = GICR_CTLR, | ||
682 | .len = 0x04, | ||
683 | .bits_per_irq = 0, | ||
684 | .handle_mmio = handle_mmio_ctlr_redist, | ||
685 | }, | ||
686 | { | ||
687 | .base = GICR_TYPER, | ||
688 | .len = 0x08, | ||
689 | .bits_per_irq = 0, | ||
690 | .handle_mmio = handle_mmio_typer_redist, | ||
691 | }, | ||
692 | { | ||
693 | .base = GICR_IIDR, | ||
694 | .len = 0x04, | ||
695 | .bits_per_irq = 0, | ||
696 | .handle_mmio = handle_mmio_iidr, | ||
697 | }, | ||
698 | { | ||
699 | .base = GICR_WAKER, | ||
700 | .len = 0x04, | ||
701 | .bits_per_irq = 0, | ||
702 | .handle_mmio = handle_mmio_raz_wi, | ||
703 | }, | ||
704 | { | ||
705 | .base = GICR_IDREGS, | ||
706 | .len = 0x30, | ||
707 | .bits_per_irq = 0, | ||
708 | .handle_mmio = handle_mmio_idregs, | ||
709 | }, | ||
710 | {}, | ||
711 | }; | ||
712 | |||
713 | /* | ||
714 | * This function splits accesses between the distributor and the two | ||
715 | * redistributor parts (private/SPI). As each redistributor is accessible | ||
716 | * from any CPU, we have to determine the affected VCPU by taking the faulting | ||
717 | * address into account. We then pass this VCPU to the handler function via | ||
718 | * the private parameter. | ||
719 | */ | ||
720 | #define SGI_BASE_OFFSET SZ_64K | ||
721 | static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, | ||
722 | struct kvm_exit_mmio *mmio) | ||
723 | { | ||
724 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
725 | unsigned long dbase = dist->vgic_dist_base; | ||
726 | unsigned long rdbase = dist->vgic_redist_base; | ||
727 | int nrcpus = atomic_read(&vcpu->kvm->online_vcpus); | ||
728 | int vcpu_id; | ||
729 | const struct kvm_mmio_range *mmio_range; | ||
730 | |||
731 | if (is_in_range(mmio->phys_addr, mmio->len, dbase, GIC_V3_DIST_SIZE)) { | ||
732 | return vgic_handle_mmio_range(vcpu, run, mmio, | ||
733 | vgic_v3_dist_ranges, dbase); | ||
734 | } | ||
735 | |||
736 | if (!is_in_range(mmio->phys_addr, mmio->len, rdbase, | ||
737 | GIC_V3_REDIST_SIZE * nrcpus)) | ||
738 | return false; | ||
739 | |||
740 | vcpu_id = (mmio->phys_addr - rdbase) / GIC_V3_REDIST_SIZE; | ||
741 | rdbase += (vcpu_id * GIC_V3_REDIST_SIZE); | ||
742 | mmio->private = kvm_get_vcpu(vcpu->kvm, vcpu_id); | ||
743 | |||
744 | if (mmio->phys_addr >= rdbase + SGI_BASE_OFFSET) { | ||
745 | rdbase += SGI_BASE_OFFSET; | ||
746 | mmio_range = vgic_redist_sgi_ranges; | ||
747 | } else { | ||
748 | mmio_range = vgic_redist_ranges; | ||
749 | } | ||
750 | return vgic_handle_mmio_range(vcpu, run, mmio, mmio_range, rdbase); | ||
751 | } | ||
752 | |||
753 | static bool vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, int irq) | ||
754 | { | ||
755 | if (vgic_queue_irq(vcpu, 0, irq)) { | ||
756 | vgic_dist_irq_clear_pending(vcpu, irq); | ||
757 | vgic_cpu_irq_clear(vcpu, irq); | ||
758 | return true; | ||
759 | } | ||
760 | |||
761 | return false; | ||
762 | } | ||
763 | |||
764 | static int vgic_v3_map_resources(struct kvm *kvm, | ||
765 | const struct vgic_params *params) | ||
766 | { | ||
767 | int ret = 0; | ||
768 | struct vgic_dist *dist = &kvm->arch.vgic; | ||
769 | |||
770 | if (!irqchip_in_kernel(kvm)) | ||
771 | return 0; | ||
772 | |||
773 | mutex_lock(&kvm->lock); | ||
774 | |||
775 | if (vgic_ready(kvm)) | ||
776 | goto out; | ||
777 | |||
778 | if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || | ||
779 | IS_VGIC_ADDR_UNDEF(dist->vgic_redist_base)) { | ||
780 | kvm_err("Need to set vgic distributor addresses first\n"); | ||
781 | ret = -ENXIO; | ||
782 | goto out; | ||
783 | } | ||
784 | |||
785 | /* | ||
786 | * For a VGICv3 we require the userland to explicitly initialize | ||
787 | * the VGIC before we need to use it. | ||
788 | */ | ||
789 | if (!vgic_initialized(kvm)) { | ||
790 | ret = -EBUSY; | ||
791 | goto out; | ||
792 | } | ||
793 | |||
794 | kvm->arch.vgic.ready = true; | ||
795 | out: | ||
796 | if (ret) | ||
797 | kvm_vgic_destroy(kvm); | ||
798 | mutex_unlock(&kvm->lock); | ||
799 | return ret; | ||
800 | } | ||
801 | |||
802 | static int vgic_v3_init_model(struct kvm *kvm) | ||
803 | { | ||
804 | int i; | ||
805 | u32 mpidr; | ||
806 | struct vgic_dist *dist = &kvm->arch.vgic; | ||
807 | int nr_spis = dist->nr_irqs - VGIC_NR_PRIVATE_IRQS; | ||
808 | |||
809 | dist->irq_spi_mpidr = kcalloc(nr_spis, sizeof(dist->irq_spi_mpidr[0]), | ||
810 | GFP_KERNEL); | ||
811 | |||
812 | if (!dist->irq_spi_mpidr) | ||
813 | return -ENOMEM; | ||
814 | |||
815 | /* Initialize the target VCPUs for each IRQ to VCPU 0 */ | ||
816 | mpidr = compress_mpidr(kvm_vcpu_get_mpidr_aff(kvm_get_vcpu(kvm, 0))); | ||
817 | for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i++) { | ||
818 | dist->irq_spi_cpu[i - VGIC_NR_PRIVATE_IRQS] = 0; | ||
819 | dist->irq_spi_mpidr[i - VGIC_NR_PRIVATE_IRQS] = mpidr; | ||
820 | vgic_bitmap_set_irq_val(dist->irq_spi_target, 0, i, 1); | ||
821 | } | ||
822 | |||
823 | return 0; | ||
824 | } | ||
825 | |||
826 | /* GICv3 does not keep track of SGI sources anymore. */ | ||
827 | static void vgic_v3_add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source) | ||
828 | { | ||
829 | } | ||
830 | |||
831 | void vgic_v3_init_emulation(struct kvm *kvm) | ||
832 | { | ||
833 | struct vgic_dist *dist = &kvm->arch.vgic; | ||
834 | |||
835 | dist->vm_ops.handle_mmio = vgic_v3_handle_mmio; | ||
836 | dist->vm_ops.queue_sgi = vgic_v3_queue_sgi; | ||
837 | dist->vm_ops.add_sgi_source = vgic_v3_add_sgi_source; | ||
838 | dist->vm_ops.init_model = vgic_v3_init_model; | ||
839 | dist->vm_ops.map_resources = vgic_v3_map_resources; | ||
840 | |||
841 | kvm->arch.max_vcpus = KVM_MAX_VCPUS; | ||
842 | } | ||
843 | |||
844 | /* | ||
845 | * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI | ||
846 | * generation register ICC_SGI1R_EL1) with a given VCPU. | ||
847 | * If the VCPU's MPIDR matches, return the level0 affinity, otherwise | ||
848 | * return -1. | ||
849 | */ | ||
850 | static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu) | ||
851 | { | ||
852 | unsigned long affinity; | ||
853 | int level0; | ||
854 | |||
855 | /* | ||
856 | * Split the current VCPU's MPIDR into affinity level 0 and the | ||
857 | * rest as this is what we have to compare against. | ||
858 | */ | ||
859 | affinity = kvm_vcpu_get_mpidr_aff(vcpu); | ||
860 | level0 = MPIDR_AFFINITY_LEVEL(affinity, 0); | ||
861 | affinity &= ~MPIDR_LEVEL_MASK; | ||
862 | |||
863 | /* bail out if the upper three levels don't match */ | ||
864 | if (sgi_aff != affinity) | ||
865 | return -1; | ||
866 | |||
867 | /* Is this VCPU's bit set in the mask ? */ | ||
868 | if (!(sgi_cpu_mask & BIT(level0))) | ||
869 | return -1; | ||
870 | |||
871 | return level0; | ||
872 | } | ||
873 | |||
874 | #define SGI_AFFINITY_LEVEL(reg, level) \ | ||
875 | ((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \ | ||
876 | >> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level)) | ||
877 | |||
878 | /** | ||
879 | * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs | ||
880 | * @vcpu: The VCPU requesting a SGI | ||
881 | * @reg: The value written into the ICC_SGI1R_EL1 register by that VCPU | ||
882 | * | ||
883 | * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register. | ||
884 | * This will trap in sys_regs.c and call this function. | ||
885 | * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the | ||
886 | * target processors as well as a bitmask of 16 Aff0 CPUs. | ||
887 | * If the interrupt routing mode bit is not set, we iterate over all VCPUs to | ||
888 | * check for matching ones. If this bit is set, we signal all, but not the | ||
889 | * calling VCPU. | ||
890 | */ | ||
891 | void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) | ||
892 | { | ||
893 | struct kvm *kvm = vcpu->kvm; | ||
894 | struct kvm_vcpu *c_vcpu; | ||
895 | struct vgic_dist *dist = &kvm->arch.vgic; | ||
896 | u16 target_cpus; | ||
897 | u64 mpidr; | ||
898 | int sgi, c; | ||
899 | int vcpu_id = vcpu->vcpu_id; | ||
900 | bool broadcast; | ||
901 | int updated = 0; | ||
902 | |||
903 | sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT; | ||
904 | broadcast = reg & BIT(ICC_SGI1R_IRQ_ROUTING_MODE_BIT); | ||
905 | target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT; | ||
906 | mpidr = SGI_AFFINITY_LEVEL(reg, 3); | ||
907 | mpidr |= SGI_AFFINITY_LEVEL(reg, 2); | ||
908 | mpidr |= SGI_AFFINITY_LEVEL(reg, 1); | ||
909 | |||
910 | /* | ||
911 | * We take the dist lock here, because we come from the sysregs | ||
912 | * code path and not from the MMIO one (which already takes the lock). | ||
913 | */ | ||
914 | spin_lock(&dist->lock); | ||
915 | |||
916 | /* | ||
917 | * We iterate over all VCPUs to find the MPIDRs matching the request. | ||
918 | * If we have handled one CPU, we clear it's bit to detect early | ||
919 | * if we are already finished. This avoids iterating through all | ||
920 | * VCPUs when most of the times we just signal a single VCPU. | ||
921 | */ | ||
922 | kvm_for_each_vcpu(c, c_vcpu, kvm) { | ||
923 | |||
924 | /* Exit early if we have dealt with all requested CPUs */ | ||
925 | if (!broadcast && target_cpus == 0) | ||
926 | break; | ||
927 | |||
928 | /* Don't signal the calling VCPU */ | ||
929 | if (broadcast && c == vcpu_id) | ||
930 | continue; | ||
931 | |||
932 | if (!broadcast) { | ||
933 | int level0; | ||
934 | |||
935 | level0 = match_mpidr(mpidr, target_cpus, c_vcpu); | ||
936 | if (level0 == -1) | ||
937 | continue; | ||
938 | |||
939 | /* remove this matching VCPU from the mask */ | ||
940 | target_cpus &= ~BIT(level0); | ||
941 | } | ||
942 | |||
943 | /* Flag the SGI as pending */ | ||
944 | vgic_dist_irq_set_pending(c_vcpu, sgi); | ||
945 | updated = 1; | ||
946 | kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); | ||
947 | } | ||
948 | if (updated) | ||
949 | vgic_update_state(vcpu->kvm); | ||
950 | spin_unlock(&dist->lock); | ||
951 | if (updated) | ||
952 | vgic_kick_vcpus(vcpu->kvm); | ||
953 | } | ||
954 | |||
955 | static int vgic_v3_create(struct kvm_device *dev, u32 type) | ||
956 | { | ||
957 | return kvm_vgic_create(dev->kvm, type); | ||
958 | } | ||
959 | |||
960 | static void vgic_v3_destroy(struct kvm_device *dev) | ||
961 | { | ||
962 | kfree(dev); | ||
963 | } | ||
964 | |||
965 | static int vgic_v3_set_attr(struct kvm_device *dev, | ||
966 | struct kvm_device_attr *attr) | ||
967 | { | ||
968 | int ret; | ||
969 | |||
970 | ret = vgic_set_common_attr(dev, attr); | ||
971 | if (ret != -ENXIO) | ||
972 | return ret; | ||
973 | |||
974 | switch (attr->group) { | ||
975 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
976 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: | ||
977 | return -ENXIO; | ||
978 | } | ||
979 | |||
980 | return -ENXIO; | ||
981 | } | ||
982 | |||
983 | static int vgic_v3_get_attr(struct kvm_device *dev, | ||
984 | struct kvm_device_attr *attr) | ||
985 | { | ||
986 | int ret; | ||
987 | |||
988 | ret = vgic_get_common_attr(dev, attr); | ||
989 | if (ret != -ENXIO) | ||
990 | return ret; | ||
991 | |||
992 | switch (attr->group) { | ||
993 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
994 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: | ||
995 | return -ENXIO; | ||
996 | } | ||
997 | |||
998 | return -ENXIO; | ||
999 | } | ||
1000 | |||
1001 | static int vgic_v3_has_attr(struct kvm_device *dev, | ||
1002 | struct kvm_device_attr *attr) | ||
1003 | { | ||
1004 | switch (attr->group) { | ||
1005 | case KVM_DEV_ARM_VGIC_GRP_ADDR: | ||
1006 | switch (attr->attr) { | ||
1007 | case KVM_VGIC_V2_ADDR_TYPE_DIST: | ||
1008 | case KVM_VGIC_V2_ADDR_TYPE_CPU: | ||
1009 | return -ENXIO; | ||
1010 | case KVM_VGIC_V3_ADDR_TYPE_DIST: | ||
1011 | case KVM_VGIC_V3_ADDR_TYPE_REDIST: | ||
1012 | return 0; | ||
1013 | } | ||
1014 | break; | ||
1015 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
1016 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: | ||
1017 | return -ENXIO; | ||
1018 | case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: | ||
1019 | return 0; | ||
1020 | case KVM_DEV_ARM_VGIC_GRP_CTRL: | ||
1021 | switch (attr->attr) { | ||
1022 | case KVM_DEV_ARM_VGIC_CTRL_INIT: | ||
1023 | return 0; | ||
1024 | } | ||
1025 | } | ||
1026 | return -ENXIO; | ||
1027 | } | ||
1028 | |||
1029 | struct kvm_device_ops kvm_arm_vgic_v3_ops = { | ||
1030 | .name = "kvm-arm-vgic-v3", | ||
1031 | .create = vgic_v3_create, | ||
1032 | .destroy = vgic_v3_destroy, | ||
1033 | .set_attr = vgic_v3_set_attr, | ||
1034 | .get_attr = vgic_v3_get_attr, | ||
1035 | .has_attr = vgic_v3_has_attr, | ||
1036 | }; | ||
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index 1c2c8eef0599..3a62d8a9a2c6 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #define GICH_LR_VIRTUALID (0x3ffUL << 0) | 34 | #define GICH_LR_VIRTUALID (0x3ffUL << 0) |
35 | #define GICH_LR_PHYSID_CPUID_SHIFT (10) | 35 | #define GICH_LR_PHYSID_CPUID_SHIFT (10) |
36 | #define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) | 36 | #define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) |
37 | #define ICH_LR_VIRTUALID_MASK (BIT_ULL(32) - 1) | ||
37 | 38 | ||
38 | /* | 39 | /* |
39 | * LRs are stored in reverse order in memory. make sure we index them | 40 | * LRs are stored in reverse order in memory. make sure we index them |
@@ -48,12 +49,17 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) | |||
48 | struct vgic_lr lr_desc; | 49 | struct vgic_lr lr_desc; |
49 | u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)]; | 50 | u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)]; |
50 | 51 | ||
51 | lr_desc.irq = val & GICH_LR_VIRTUALID; | 52 | if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) |
52 | if (lr_desc.irq <= 15) | 53 | lr_desc.irq = val & ICH_LR_VIRTUALID_MASK; |
53 | lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; | ||
54 | else | 54 | else |
55 | lr_desc.source = 0; | 55 | lr_desc.irq = val & GICH_LR_VIRTUALID; |
56 | lr_desc.state = 0; | 56 | |
57 | lr_desc.source = 0; | ||
58 | if (lr_desc.irq <= 15 && | ||
59 | vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) | ||
60 | lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; | ||
61 | |||
62 | lr_desc.state = 0; | ||
57 | 63 | ||
58 | if (val & ICH_LR_PENDING_BIT) | 64 | if (val & ICH_LR_PENDING_BIT) |
59 | lr_desc.state |= LR_STATE_PENDING; | 65 | lr_desc.state |= LR_STATE_PENDING; |
@@ -68,8 +74,20 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) | |||
68 | static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, | 74 | static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, |
69 | struct vgic_lr lr_desc) | 75 | struct vgic_lr lr_desc) |
70 | { | 76 | { |
71 | u64 lr_val = (((u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | | 77 | u64 lr_val; |
72 | lr_desc.irq); | 78 | |
79 | lr_val = lr_desc.irq; | ||
80 | |||
81 | /* | ||
82 | * Currently all guest IRQs are Group1, as Group0 would result | ||
83 | * in a FIQ in the guest, which it wouldn't expect. | ||
84 | * Eventually we want to make this configurable, so we may revisit | ||
85 | * this in the future. | ||
86 | */ | ||
87 | if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) | ||
88 | lr_val |= ICH_LR_GROUP; | ||
89 | else | ||
90 | lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT; | ||
73 | 91 | ||
74 | if (lr_desc.state & LR_STATE_PENDING) | 92 | if (lr_desc.state & LR_STATE_PENDING) |
75 | lr_val |= ICH_LR_PENDING_BIT; | 93 | lr_val |= ICH_LR_PENDING_BIT; |
@@ -145,15 +163,27 @@ static void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) | |||
145 | 163 | ||
146 | static void vgic_v3_enable(struct kvm_vcpu *vcpu) | 164 | static void vgic_v3_enable(struct kvm_vcpu *vcpu) |
147 | { | 165 | { |
166 | struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; | ||
167 | |||
148 | /* | 168 | /* |
149 | * By forcing VMCR to zero, the GIC will restore the binary | 169 | * By forcing VMCR to zero, the GIC will restore the binary |
150 | * points to their reset values. Anything else resets to zero | 170 | * points to their reset values. Anything else resets to zero |
151 | * anyway. | 171 | * anyway. |
152 | */ | 172 | */ |
153 | vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = 0; | 173 | vgic_v3->vgic_vmcr = 0; |
174 | |||
175 | /* | ||
176 | * If we are emulating a GICv3, we do it in an non-GICv2-compatible | ||
177 | * way, so we force SRE to 1 to demonstrate this to the guest. | ||
178 | * This goes with the spec allowing the value to be RAO/WI. | ||
179 | */ | ||
180 | if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) | ||
181 | vgic_v3->vgic_sre = ICC_SRE_EL1_SRE; | ||
182 | else | ||
183 | vgic_v3->vgic_sre = 0; | ||
154 | 184 | ||
155 | /* Get the show on the road... */ | 185 | /* Get the show on the road... */ |
156 | vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr = ICH_HCR_EN; | 186 | vgic_v3->vgic_hcr = ICH_HCR_EN; |
157 | } | 187 | } |
158 | 188 | ||
159 | static const struct vgic_ops vgic_v3_ops = { | 189 | static const struct vgic_ops vgic_v3_ops = { |
@@ -205,35 +235,37 @@ int vgic_v3_probe(struct device_node *vgic_node, | |||
205 | * maximum of 16 list registers. Just ignore bit 4... | 235 | * maximum of 16 list registers. Just ignore bit 4... |
206 | */ | 236 | */ |
207 | vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1; | 237 | vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1; |
238 | vgic->can_emulate_gicv2 = false; | ||
208 | 239 | ||
209 | if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx)) | 240 | if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx)) |
210 | gicv_idx = 1; | 241 | gicv_idx = 1; |
211 | 242 | ||
212 | gicv_idx += 3; /* Also skip GICD, GICC, GICH */ | 243 | gicv_idx += 3; /* Also skip GICD, GICC, GICH */ |
213 | if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) { | 244 | if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) { |
214 | kvm_err("Cannot obtain GICV region\n"); | 245 | kvm_info("GICv3: no GICV resource entry\n"); |
215 | ret = -ENXIO; | 246 | vgic->vcpu_base = 0; |
216 | goto out; | 247 | } else if (!PAGE_ALIGNED(vcpu_res.start)) { |
217 | } | 248 | pr_warn("GICV physical address 0x%llx not page aligned\n", |
218 | |||
219 | if (!PAGE_ALIGNED(vcpu_res.start)) { | ||
220 | kvm_err("GICV physical address 0x%llx not page aligned\n", | ||
221 | (unsigned long long)vcpu_res.start); | 249 | (unsigned long long)vcpu_res.start); |
222 | ret = -ENXIO; | 250 | vgic->vcpu_base = 0; |
223 | goto out; | 251 | } else if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { |
224 | } | 252 | pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n", |
225 | |||
226 | if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { | ||
227 | kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n", | ||
228 | (unsigned long long)resource_size(&vcpu_res), | 253 | (unsigned long long)resource_size(&vcpu_res), |
229 | PAGE_SIZE); | 254 | PAGE_SIZE); |
230 | ret = -ENXIO; | 255 | vgic->vcpu_base = 0; |
231 | goto out; | 256 | } else { |
257 | vgic->vcpu_base = vcpu_res.start; | ||
258 | vgic->can_emulate_gicv2 = true; | ||
259 | kvm_register_device_ops(&kvm_arm_vgic_v2_ops, | ||
260 | KVM_DEV_TYPE_ARM_VGIC_V2); | ||
232 | } | 261 | } |
262 | if (vgic->vcpu_base == 0) | ||
263 | kvm_info("disabling GICv2 emulation\n"); | ||
264 | kvm_register_device_ops(&kvm_arm_vgic_v3_ops, KVM_DEV_TYPE_ARM_VGIC_V3); | ||
233 | 265 | ||
234 | vgic->vcpu_base = vcpu_res.start; | ||
235 | vgic->vctrl_base = NULL; | 266 | vgic->vctrl_base = NULL; |
236 | vgic->type = VGIC_V3; | 267 | vgic->type = VGIC_V3; |
268 | vgic->max_gic_vcpus = KVM_MAX_VCPUS; | ||
237 | 269 | ||
238 | kvm_info("%s@%llx IRQ%d\n", vgic_node->name, | 270 | kvm_info("%s@%llx IRQ%d\n", vgic_node->name, |
239 | vcpu_res.start, vgic->maint_irq); | 271 | vcpu_res.start, vgic->maint_irq); |
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 03affc7bf453..0cc6ab6005a0 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c | |||
@@ -75,37 +75,31 @@ | |||
75 | * inactive as long as the external input line is held high. | 75 | * inactive as long as the external input line is held high. |
76 | */ | 76 | */ |
77 | 77 | ||
78 | #define VGIC_ADDR_UNDEF (-1) | 78 | #include "vgic.h" |
79 | #define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) | 79 | |
80 | |||
81 | #define PRODUCT_ID_KVM 0x4b /* ASCII code K */ | ||
82 | #define IMPLEMENTER_ARM 0x43b | ||
83 | #define GICC_ARCH_VERSION_V2 0x2 | ||
84 | |||
85 | #define ACCESS_READ_VALUE (1 << 0) | ||
86 | #define ACCESS_READ_RAZ (0 << 0) | ||
87 | #define ACCESS_READ_MASK(x) ((x) & (1 << 0)) | ||
88 | #define ACCESS_WRITE_IGNORED (0 << 1) | ||
89 | #define ACCESS_WRITE_SETBIT (1 << 1) | ||
90 | #define ACCESS_WRITE_CLEARBIT (2 << 1) | ||
91 | #define ACCESS_WRITE_VALUE (3 << 1) | ||
92 | #define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) | ||
93 | |||
94 | static int vgic_init(struct kvm *kvm); | ||
95 | static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); | 80 | static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); |
96 | static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); | 81 | static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); |
97 | static void vgic_update_state(struct kvm *kvm); | ||
98 | static void vgic_kick_vcpus(struct kvm *kvm); | ||
99 | static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi); | ||
100 | static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); | ||
101 | static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); | 82 | static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); |
102 | static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); | 83 | static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); |
103 | static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); | ||
104 | static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); | ||
105 | 84 | ||
106 | static const struct vgic_ops *vgic_ops; | 85 | static const struct vgic_ops *vgic_ops; |
107 | static const struct vgic_params *vgic; | 86 | static const struct vgic_params *vgic; |
108 | 87 | ||
88 | static void add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source) | ||
89 | { | ||
90 | vcpu->kvm->arch.vgic.vm_ops.add_sgi_source(vcpu, irq, source); | ||
91 | } | ||
92 | |||
93 | static bool queue_sgi(struct kvm_vcpu *vcpu, int irq) | ||
94 | { | ||
95 | return vcpu->kvm->arch.vgic.vm_ops.queue_sgi(vcpu, irq); | ||
96 | } | ||
97 | |||
98 | int kvm_vgic_map_resources(struct kvm *kvm) | ||
99 | { | ||
100 | return kvm->arch.vgic.vm_ops.map_resources(kvm, vgic); | ||
101 | } | ||
102 | |||
109 | /* | 103 | /* |
110 | * struct vgic_bitmap contains a bitmap made of unsigned longs, but | 104 | * struct vgic_bitmap contains a bitmap made of unsigned longs, but |
111 | * extracts u32s out of them. | 105 | * extracts u32s out of them. |
@@ -160,8 +154,7 @@ static unsigned long *u64_to_bitmask(u64 *val) | |||
160 | return (unsigned long *)val; | 154 | return (unsigned long *)val; |
161 | } | 155 | } |
162 | 156 | ||
163 | static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, | 157 | u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset) |
164 | int cpuid, u32 offset) | ||
165 | { | 158 | { |
166 | offset >>= 2; | 159 | offset >>= 2; |
167 | if (!offset) | 160 | if (!offset) |
@@ -179,8 +172,8 @@ static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, | |||
179 | return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared); | 172 | return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared); |
180 | } | 173 | } |
181 | 174 | ||
182 | static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, | 175 | void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, |
183 | int irq, int val) | 176 | int irq, int val) |
184 | { | 177 | { |
185 | unsigned long *reg; | 178 | unsigned long *reg; |
186 | 179 | ||
@@ -202,7 +195,7 @@ static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid) | |||
202 | return x->private + cpuid; | 195 | return x->private + cpuid; |
203 | } | 196 | } |
204 | 197 | ||
205 | static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) | 198 | unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) |
206 | { | 199 | { |
207 | return x->shared; | 200 | return x->shared; |
208 | } | 201 | } |
@@ -229,7 +222,7 @@ static void vgic_free_bytemap(struct vgic_bytemap *b) | |||
229 | b->shared = NULL; | 222 | b->shared = NULL; |
230 | } | 223 | } |
231 | 224 | ||
232 | static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) | 225 | u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) |
233 | { | 226 | { |
234 | u32 *reg; | 227 | u32 *reg; |
235 | 228 | ||
@@ -326,14 +319,14 @@ static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) | |||
326 | return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq); | 319 | return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq); |
327 | } | 320 | } |
328 | 321 | ||
329 | static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq) | 322 | void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq) |
330 | { | 323 | { |
331 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 324 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
332 | 325 | ||
333 | vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1); | 326 | vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1); |
334 | } | 327 | } |
335 | 328 | ||
336 | static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq) | 329 | void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq) |
337 | { | 330 | { |
338 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 331 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
339 | 332 | ||
@@ -349,7 +342,7 @@ static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq) | |||
349 | vcpu->arch.vgic_cpu.pending_shared); | 342 | vcpu->arch.vgic_cpu.pending_shared); |
350 | } | 343 | } |
351 | 344 | ||
352 | static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) | 345 | void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) |
353 | { | 346 | { |
354 | if (irq < VGIC_NR_PRIVATE_IRQS) | 347 | if (irq < VGIC_NR_PRIVATE_IRQS) |
355 | clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu); | 348 | clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu); |
@@ -363,16 +356,6 @@ static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq) | |||
363 | return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq); | 356 | return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq); |
364 | } | 357 | } |
365 | 358 | ||
366 | static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) | ||
367 | { | ||
368 | return le32_to_cpu(*((u32 *)mmio->data)) & mask; | ||
369 | } | ||
370 | |||
371 | static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) | ||
372 | { | ||
373 | *((u32 *)mmio->data) = cpu_to_le32(value) & mask; | ||
374 | } | ||
375 | |||
376 | /** | 359 | /** |
377 | * vgic_reg_access - access vgic register | 360 | * vgic_reg_access - access vgic register |
378 | * @mmio: pointer to the data describing the mmio access | 361 | * @mmio: pointer to the data describing the mmio access |
@@ -384,8 +367,8 @@ static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) | |||
384 | * modes defined for vgic register access | 367 | * modes defined for vgic register access |
385 | * (read,raz,write-ignored,setbit,clearbit,write) | 368 | * (read,raz,write-ignored,setbit,clearbit,write) |
386 | */ | 369 | */ |
387 | static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, | 370 | void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, |
388 | phys_addr_t offset, int mode) | 371 | phys_addr_t offset, int mode) |
389 | { | 372 | { |
390 | int word_offset = (offset & 3) * 8; | 373 | int word_offset = (offset & 3) * 8; |
391 | u32 mask = (1UL << (mmio->len * 8)) - 1; | 374 | u32 mask = (1UL << (mmio->len * 8)) - 1; |
@@ -434,107 +417,58 @@ static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, | |||
434 | } | 417 | } |
435 | } | 418 | } |
436 | 419 | ||
437 | static bool handle_mmio_misc(struct kvm_vcpu *vcpu, | 420 | bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, |
438 | struct kvm_exit_mmio *mmio, phys_addr_t offset) | 421 | phys_addr_t offset) |
439 | { | ||
440 | u32 reg; | ||
441 | u32 word_offset = offset & 3; | ||
442 | |||
443 | switch (offset & ~3) { | ||
444 | case 0: /* GICD_CTLR */ | ||
445 | reg = vcpu->kvm->arch.vgic.enabled; | ||
446 | vgic_reg_access(mmio, ®, word_offset, | ||
447 | ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); | ||
448 | if (mmio->is_write) { | ||
449 | vcpu->kvm->arch.vgic.enabled = reg & 1; | ||
450 | vgic_update_state(vcpu->kvm); | ||
451 | return true; | ||
452 | } | ||
453 | break; | ||
454 | |||
455 | case 4: /* GICD_TYPER */ | ||
456 | reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; | ||
457 | reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1; | ||
458 | vgic_reg_access(mmio, ®, word_offset, | ||
459 | ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); | ||
460 | break; | ||
461 | |||
462 | case 8: /* GICD_IIDR */ | ||
463 | reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); | ||
464 | vgic_reg_access(mmio, ®, word_offset, | ||
465 | ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); | ||
466 | break; | ||
467 | } | ||
468 | |||
469 | return false; | ||
470 | } | ||
471 | |||
472 | static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, | ||
473 | struct kvm_exit_mmio *mmio, phys_addr_t offset) | ||
474 | { | 422 | { |
475 | vgic_reg_access(mmio, NULL, offset, | 423 | vgic_reg_access(mmio, NULL, offset, |
476 | ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); | 424 | ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); |
477 | return false; | 425 | return false; |
478 | } | 426 | } |
479 | 427 | ||
480 | static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu, | 428 | bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, |
481 | struct kvm_exit_mmio *mmio, | 429 | phys_addr_t offset, int vcpu_id, int access) |
482 | phys_addr_t offset) | ||
483 | { | 430 | { |
484 | u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled, | 431 | u32 *reg; |
485 | vcpu->vcpu_id, offset); | 432 | int mode = ACCESS_READ_VALUE | access; |
486 | vgic_reg_access(mmio, reg, offset, | 433 | struct kvm_vcpu *target_vcpu = kvm_get_vcpu(kvm, vcpu_id); |
487 | ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); | ||
488 | if (mmio->is_write) { | ||
489 | vgic_update_state(vcpu->kvm); | ||
490 | return true; | ||
491 | } | ||
492 | |||
493 | return false; | ||
494 | } | ||
495 | 434 | ||
496 | static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu, | 435 | reg = vgic_bitmap_get_reg(&kvm->arch.vgic.irq_enabled, vcpu_id, offset); |
497 | struct kvm_exit_mmio *mmio, | 436 | vgic_reg_access(mmio, reg, offset, mode); |
498 | phys_addr_t offset) | ||
499 | { | ||
500 | u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled, | ||
501 | vcpu->vcpu_id, offset); | ||
502 | vgic_reg_access(mmio, reg, offset, | ||
503 | ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); | ||
504 | if (mmio->is_write) { | 437 | if (mmio->is_write) { |
505 | if (offset < 4) /* Force SGI enabled */ | 438 | if (access & ACCESS_WRITE_CLEARBIT) { |
506 | *reg |= 0xffff; | 439 | if (offset < 4) /* Force SGI enabled */ |
507 | vgic_retire_disabled_irqs(vcpu); | 440 | *reg |= 0xffff; |
508 | vgic_update_state(vcpu->kvm); | 441 | vgic_retire_disabled_irqs(target_vcpu); |
442 | } | ||
443 | vgic_update_state(kvm); | ||
509 | return true; | 444 | return true; |
510 | } | 445 | } |
511 | 446 | ||
512 | return false; | 447 | return false; |
513 | } | 448 | } |
514 | 449 | ||
515 | static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, | 450 | bool vgic_handle_set_pending_reg(struct kvm *kvm, |
516 | struct kvm_exit_mmio *mmio, | 451 | struct kvm_exit_mmio *mmio, |
517 | phys_addr_t offset) | 452 | phys_addr_t offset, int vcpu_id) |
518 | { | 453 | { |
519 | u32 *reg, orig; | 454 | u32 *reg, orig; |
520 | u32 level_mask; | 455 | u32 level_mask; |
521 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 456 | int mode = ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT; |
457 | struct vgic_dist *dist = &kvm->arch.vgic; | ||
522 | 458 | ||
523 | reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset); | 459 | reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu_id, offset); |
524 | level_mask = (~(*reg)); | 460 | level_mask = (~(*reg)); |
525 | 461 | ||
526 | /* Mark both level and edge triggered irqs as pending */ | 462 | /* Mark both level and edge triggered irqs as pending */ |
527 | reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); | 463 | reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset); |
528 | orig = *reg; | 464 | orig = *reg; |
529 | vgic_reg_access(mmio, reg, offset, | 465 | vgic_reg_access(mmio, reg, offset, mode); |
530 | ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); | ||
531 | 466 | ||
532 | if (mmio->is_write) { | 467 | if (mmio->is_write) { |
533 | /* Set the soft-pending flag only for level-triggered irqs */ | 468 | /* Set the soft-pending flag only for level-triggered irqs */ |
534 | reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, | 469 | reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, |
535 | vcpu->vcpu_id, offset); | 470 | vcpu_id, offset); |
536 | vgic_reg_access(mmio, reg, offset, | 471 | vgic_reg_access(mmio, reg, offset, mode); |
537 | ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); | ||
538 | *reg &= level_mask; | 472 | *reg &= level_mask; |
539 | 473 | ||
540 | /* Ignore writes to SGIs */ | 474 | /* Ignore writes to SGIs */ |
@@ -543,31 +477,30 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, | |||
543 | *reg |= orig & 0xffff; | 477 | *reg |= orig & 0xffff; |
544 | } | 478 | } |
545 | 479 | ||
546 | vgic_update_state(vcpu->kvm); | 480 | vgic_update_state(kvm); |
547 | return true; | 481 | return true; |
548 | } | 482 | } |
549 | 483 | ||
550 | return false; | 484 | return false; |
551 | } | 485 | } |
552 | 486 | ||
553 | static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, | 487 | bool vgic_handle_clear_pending_reg(struct kvm *kvm, |
554 | struct kvm_exit_mmio *mmio, | 488 | struct kvm_exit_mmio *mmio, |
555 | phys_addr_t offset) | 489 | phys_addr_t offset, int vcpu_id) |
556 | { | 490 | { |
557 | u32 *level_active; | 491 | u32 *level_active; |
558 | u32 *reg, orig; | 492 | u32 *reg, orig; |
559 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 493 | int mode = ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT; |
494 | struct vgic_dist *dist = &kvm->arch.vgic; | ||
560 | 495 | ||
561 | reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); | 496 | reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset); |
562 | orig = *reg; | 497 | orig = *reg; |
563 | vgic_reg_access(mmio, reg, offset, | 498 | vgic_reg_access(mmio, reg, offset, mode); |
564 | ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); | ||
565 | if (mmio->is_write) { | 499 | if (mmio->is_write) { |
566 | /* Re-set level triggered level-active interrupts */ | 500 | /* Re-set level triggered level-active interrupts */ |
567 | level_active = vgic_bitmap_get_reg(&dist->irq_level, | 501 | level_active = vgic_bitmap_get_reg(&dist->irq_level, |
568 | vcpu->vcpu_id, offset); | 502 | vcpu_id, offset); |
569 | reg = vgic_bitmap_get_reg(&dist->irq_pending, | 503 | reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset); |
570 | vcpu->vcpu_id, offset); | ||
571 | *reg |= *level_active; | 504 | *reg |= *level_active; |
572 | 505 | ||
573 | /* Ignore writes to SGIs */ | 506 | /* Ignore writes to SGIs */ |
@@ -578,101 +511,12 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, | |||
578 | 511 | ||
579 | /* Clear soft-pending flags */ | 512 | /* Clear soft-pending flags */ |
580 | reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, | 513 | reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, |
581 | vcpu->vcpu_id, offset); | 514 | vcpu_id, offset); |
582 | vgic_reg_access(mmio, reg, offset, | 515 | vgic_reg_access(mmio, reg, offset, mode); |
583 | ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); | ||
584 | 516 | ||
585 | vgic_update_state(vcpu->kvm); | 517 | vgic_update_state(kvm); |
586 | return true; | 518 | return true; |
587 | } | 519 | } |
588 | |||
589 | return false; | ||
590 | } | ||
591 | |||
592 | static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, | ||
593 | struct kvm_exit_mmio *mmio, | ||
594 | phys_addr_t offset) | ||
595 | { | ||
596 | u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority, | ||
597 | vcpu->vcpu_id, offset); | ||
598 | vgic_reg_access(mmio, reg, offset, | ||
599 | ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); | ||
600 | return false; | ||
601 | } | ||
602 | |||
603 | #define GICD_ITARGETSR_SIZE 32 | ||
604 | #define GICD_CPUTARGETS_BITS 8 | ||
605 | #define GICD_IRQS_PER_ITARGETSR (GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS) | ||
606 | static u32 vgic_get_target_reg(struct kvm *kvm, int irq) | ||
607 | { | ||
608 | struct vgic_dist *dist = &kvm->arch.vgic; | ||
609 | int i; | ||
610 | u32 val = 0; | ||
611 | |||
612 | irq -= VGIC_NR_PRIVATE_IRQS; | ||
613 | |||
614 | for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) | ||
615 | val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8); | ||
616 | |||
617 | return val; | ||
618 | } | ||
619 | |||
620 | static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq) | ||
621 | { | ||
622 | struct vgic_dist *dist = &kvm->arch.vgic; | ||
623 | struct kvm_vcpu *vcpu; | ||
624 | int i, c; | ||
625 | unsigned long *bmap; | ||
626 | u32 target; | ||
627 | |||
628 | irq -= VGIC_NR_PRIVATE_IRQS; | ||
629 | |||
630 | /* | ||
631 | * Pick the LSB in each byte. This ensures we target exactly | ||
632 | * one vcpu per IRQ. If the byte is null, assume we target | ||
633 | * CPU0. | ||
634 | */ | ||
635 | for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) { | ||
636 | int shift = i * GICD_CPUTARGETS_BITS; | ||
637 | target = ffs((val >> shift) & 0xffU); | ||
638 | target = target ? (target - 1) : 0; | ||
639 | dist->irq_spi_cpu[irq + i] = target; | ||
640 | kvm_for_each_vcpu(c, vcpu, kvm) { | ||
641 | bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]); | ||
642 | if (c == target) | ||
643 | set_bit(irq + i, bmap); | ||
644 | else | ||
645 | clear_bit(irq + i, bmap); | ||
646 | } | ||
647 | } | ||
648 | } | ||
649 | |||
650 | static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu, | ||
651 | struct kvm_exit_mmio *mmio, | ||
652 | phys_addr_t offset) | ||
653 | { | ||
654 | u32 reg; | ||
655 | |||
656 | /* We treat the banked interrupts targets as read-only */ | ||
657 | if (offset < 32) { | ||
658 | u32 roreg = 1 << vcpu->vcpu_id; | ||
659 | roreg |= roreg << 8; | ||
660 | roreg |= roreg << 16; | ||
661 | |||
662 | vgic_reg_access(mmio, &roreg, offset, | ||
663 | ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); | ||
664 | return false; | ||
665 | } | ||
666 | |||
667 | reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U); | ||
668 | vgic_reg_access(mmio, ®, offset, | ||
669 | ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); | ||
670 | if (mmio->is_write) { | ||
671 | vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U); | ||
672 | vgic_update_state(vcpu->kvm); | ||
673 | return true; | ||
674 | } | ||
675 | |||
676 | return false; | 520 | return false; |
677 | } | 521 | } |
678 | 522 | ||
@@ -711,14 +555,10 @@ static u16 vgic_cfg_compress(u32 val) | |||
711 | * LSB is always 0. As such, we only keep the upper bit, and use the | 555 | * LSB is always 0. As such, we only keep the upper bit, and use the |
712 | * two above functions to compress/expand the bits | 556 | * two above functions to compress/expand the bits |
713 | */ | 557 | */ |
714 | static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, | 558 | bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, |
715 | struct kvm_exit_mmio *mmio, phys_addr_t offset) | 559 | phys_addr_t offset) |
716 | { | 560 | { |
717 | u32 val; | 561 | u32 val; |
718 | u32 *reg; | ||
719 | |||
720 | reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, | ||
721 | vcpu->vcpu_id, offset >> 1); | ||
722 | 562 | ||
723 | if (offset & 4) | 563 | if (offset & 4) |
724 | val = *reg >> 16; | 564 | val = *reg >> 16; |
@@ -747,21 +587,6 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, | |||
747 | return false; | 587 | return false; |
748 | } | 588 | } |
749 | 589 | ||
750 | static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, | ||
751 | struct kvm_exit_mmio *mmio, phys_addr_t offset) | ||
752 | { | ||
753 | u32 reg; | ||
754 | vgic_reg_access(mmio, ®, offset, | ||
755 | ACCESS_READ_RAZ | ACCESS_WRITE_VALUE); | ||
756 | if (mmio->is_write) { | ||
757 | vgic_dispatch_sgi(vcpu, reg); | ||
758 | vgic_update_state(vcpu->kvm); | ||
759 | return true; | ||
760 | } | ||
761 | |||
762 | return false; | ||
763 | } | ||
764 | |||
765 | /** | 590 | /** |
766 | * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor | 591 | * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor |
767 | * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs | 592 | * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs |
@@ -774,11 +599,9 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, | |||
774 | * to the distributor but the active state stays in the LRs, because we don't | 599 | * to the distributor but the active state stays in the LRs, because we don't |
775 | * track the active state on the distributor side. | 600 | * track the active state on the distributor side. |
776 | */ | 601 | */ |
777 | static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) | 602 | void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) |
778 | { | 603 | { |
779 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
780 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | 604 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; |
781 | int vcpu_id = vcpu->vcpu_id; | ||
782 | int i; | 605 | int i; |
783 | 606 | ||
784 | for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { | 607 | for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { |
@@ -805,7 +628,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) | |||
805 | */ | 628 | */ |
806 | vgic_dist_irq_set_pending(vcpu, lr.irq); | 629 | vgic_dist_irq_set_pending(vcpu, lr.irq); |
807 | if (lr.irq < VGIC_NR_SGIS) | 630 | if (lr.irq < VGIC_NR_SGIS) |
808 | *vgic_get_sgi_sources(dist, vcpu_id, lr.irq) |= 1 << lr.source; | 631 | add_sgi_source(vcpu, lr.irq, lr.source); |
809 | lr.state &= ~LR_STATE_PENDING; | 632 | lr.state &= ~LR_STATE_PENDING; |
810 | vgic_set_lr(vcpu, i, lr); | 633 | vgic_set_lr(vcpu, i, lr); |
811 | 634 | ||
@@ -824,188 +647,12 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) | |||
824 | } | 647 | } |
825 | } | 648 | } |
826 | 649 | ||
827 | /* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */ | 650 | const |
828 | static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, | 651 | struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges, |
829 | struct kvm_exit_mmio *mmio, | ||
830 | phys_addr_t offset) | ||
831 | { | ||
832 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
833 | int sgi; | ||
834 | int min_sgi = (offset & ~0x3); | ||
835 | int max_sgi = min_sgi + 3; | ||
836 | int vcpu_id = vcpu->vcpu_id; | ||
837 | u32 reg = 0; | ||
838 | |||
839 | /* Copy source SGIs from distributor side */ | ||
840 | for (sgi = min_sgi; sgi <= max_sgi; sgi++) { | ||
841 | int shift = 8 * (sgi - min_sgi); | ||
842 | reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift; | ||
843 | } | ||
844 | |||
845 | mmio_data_write(mmio, ~0, reg); | ||
846 | return false; | ||
847 | } | ||
848 | |||
849 | static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, | ||
850 | struct kvm_exit_mmio *mmio, | ||
851 | phys_addr_t offset, bool set) | ||
852 | { | ||
853 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
854 | int sgi; | ||
855 | int min_sgi = (offset & ~0x3); | ||
856 | int max_sgi = min_sgi + 3; | ||
857 | int vcpu_id = vcpu->vcpu_id; | ||
858 | u32 reg; | ||
859 | bool updated = false; | ||
860 | |||
861 | reg = mmio_data_read(mmio, ~0); | ||
862 | |||
863 | /* Clear pending SGIs on the distributor */ | ||
864 | for (sgi = min_sgi; sgi <= max_sgi; sgi++) { | ||
865 | u8 mask = reg >> (8 * (sgi - min_sgi)); | ||
866 | u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi); | ||
867 | if (set) { | ||
868 | if ((*src & mask) != mask) | ||
869 | updated = true; | ||
870 | *src |= mask; | ||
871 | } else { | ||
872 | if (*src & mask) | ||
873 | updated = true; | ||
874 | *src &= ~mask; | ||
875 | } | ||
876 | } | ||
877 | |||
878 | if (updated) | ||
879 | vgic_update_state(vcpu->kvm); | ||
880 | |||
881 | return updated; | ||
882 | } | ||
883 | |||
884 | static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu, | ||
885 | struct kvm_exit_mmio *mmio, | ||
886 | phys_addr_t offset) | ||
887 | { | ||
888 | if (!mmio->is_write) | ||
889 | return read_set_clear_sgi_pend_reg(vcpu, mmio, offset); | ||
890 | else | ||
891 | return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true); | ||
892 | } | ||
893 | |||
894 | static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, | ||
895 | struct kvm_exit_mmio *mmio, | ||
896 | phys_addr_t offset) | ||
897 | { | ||
898 | if (!mmio->is_write) | ||
899 | return read_set_clear_sgi_pend_reg(vcpu, mmio, offset); | ||
900 | else | ||
901 | return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false); | ||
902 | } | ||
903 | |||
904 | /* | ||
905 | * I would have liked to use the kvm_bus_io_*() API instead, but it | ||
906 | * cannot cope with banked registers (only the VM pointer is passed | ||
907 | * around, and we need the vcpu). One of these days, someone please | ||
908 | * fix it! | ||
909 | */ | ||
910 | struct mmio_range { | ||
911 | phys_addr_t base; | ||
912 | unsigned long len; | ||
913 | int bits_per_irq; | ||
914 | bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, | ||
915 | phys_addr_t offset); | ||
916 | }; | ||
917 | |||
918 | static const struct mmio_range vgic_dist_ranges[] = { | ||
919 | { | ||
920 | .base = GIC_DIST_CTRL, | ||
921 | .len = 12, | ||
922 | .bits_per_irq = 0, | ||
923 | .handle_mmio = handle_mmio_misc, | ||
924 | }, | ||
925 | { | ||
926 | .base = GIC_DIST_IGROUP, | ||
927 | .len = VGIC_MAX_IRQS / 8, | ||
928 | .bits_per_irq = 1, | ||
929 | .handle_mmio = handle_mmio_raz_wi, | ||
930 | }, | ||
931 | { | ||
932 | .base = GIC_DIST_ENABLE_SET, | ||
933 | .len = VGIC_MAX_IRQS / 8, | ||
934 | .bits_per_irq = 1, | ||
935 | .handle_mmio = handle_mmio_set_enable_reg, | ||
936 | }, | ||
937 | { | ||
938 | .base = GIC_DIST_ENABLE_CLEAR, | ||
939 | .len = VGIC_MAX_IRQS / 8, | ||
940 | .bits_per_irq = 1, | ||
941 | .handle_mmio = handle_mmio_clear_enable_reg, | ||
942 | }, | ||
943 | { | ||
944 | .base = GIC_DIST_PENDING_SET, | ||
945 | .len = VGIC_MAX_IRQS / 8, | ||
946 | .bits_per_irq = 1, | ||
947 | .handle_mmio = handle_mmio_set_pending_reg, | ||
948 | }, | ||
949 | { | ||
950 | .base = GIC_DIST_PENDING_CLEAR, | ||
951 | .len = VGIC_MAX_IRQS / 8, | ||
952 | .bits_per_irq = 1, | ||
953 | .handle_mmio = handle_mmio_clear_pending_reg, | ||
954 | }, | ||
955 | { | ||
956 | .base = GIC_DIST_ACTIVE_SET, | ||
957 | .len = VGIC_MAX_IRQS / 8, | ||
958 | .bits_per_irq = 1, | ||
959 | .handle_mmio = handle_mmio_raz_wi, | ||
960 | }, | ||
961 | { | ||
962 | .base = GIC_DIST_ACTIVE_CLEAR, | ||
963 | .len = VGIC_MAX_IRQS / 8, | ||
964 | .bits_per_irq = 1, | ||
965 | .handle_mmio = handle_mmio_raz_wi, | ||
966 | }, | ||
967 | { | ||
968 | .base = GIC_DIST_PRI, | ||
969 | .len = VGIC_MAX_IRQS, | ||
970 | .bits_per_irq = 8, | ||
971 | .handle_mmio = handle_mmio_priority_reg, | ||
972 | }, | ||
973 | { | ||
974 | .base = GIC_DIST_TARGET, | ||
975 | .len = VGIC_MAX_IRQS, | ||
976 | .bits_per_irq = 8, | ||
977 | .handle_mmio = handle_mmio_target_reg, | ||
978 | }, | ||
979 | { | ||
980 | .base = GIC_DIST_CONFIG, | ||
981 | .len = VGIC_MAX_IRQS / 4, | ||
982 | .bits_per_irq = 2, | ||
983 | .handle_mmio = handle_mmio_cfg_reg, | ||
984 | }, | ||
985 | { | ||
986 | .base = GIC_DIST_SOFTINT, | ||
987 | .len = 4, | ||
988 | .handle_mmio = handle_mmio_sgi_reg, | ||
989 | }, | ||
990 | { | ||
991 | .base = GIC_DIST_SGI_PENDING_CLEAR, | ||
992 | .len = VGIC_NR_SGIS, | ||
993 | .handle_mmio = handle_mmio_sgi_clear, | ||
994 | }, | ||
995 | { | ||
996 | .base = GIC_DIST_SGI_PENDING_SET, | ||
997 | .len = VGIC_NR_SGIS, | ||
998 | .handle_mmio = handle_mmio_sgi_set, | ||
999 | }, | ||
1000 | {} | ||
1001 | }; | ||
1002 | |||
1003 | static const | ||
1004 | struct mmio_range *find_matching_range(const struct mmio_range *ranges, | ||
1005 | struct kvm_exit_mmio *mmio, | 652 | struct kvm_exit_mmio *mmio, |
1006 | phys_addr_t offset) | 653 | phys_addr_t offset) |
1007 | { | 654 | { |
1008 | const struct mmio_range *r = ranges; | 655 | const struct kvm_mmio_range *r = ranges; |
1009 | 656 | ||
1010 | while (r->len) { | 657 | while (r->len) { |
1011 | if (offset >= r->base && | 658 | if (offset >= r->base && |
@@ -1018,7 +665,7 @@ struct mmio_range *find_matching_range(const struct mmio_range *ranges, | |||
1018 | } | 665 | } |
1019 | 666 | ||
1020 | static bool vgic_validate_access(const struct vgic_dist *dist, | 667 | static bool vgic_validate_access(const struct vgic_dist *dist, |
1021 | const struct mmio_range *range, | 668 | const struct kvm_mmio_range *range, |
1022 | unsigned long offset) | 669 | unsigned long offset) |
1023 | { | 670 | { |
1024 | int irq; | 671 | int irq; |
@@ -1033,37 +680,76 @@ static bool vgic_validate_access(const struct vgic_dist *dist, | |||
1033 | return true; | 680 | return true; |
1034 | } | 681 | } |
1035 | 682 | ||
683 | /* | ||
684 | * Call the respective handler function for the given range. | ||
685 | * We split up any 64 bit accesses into two consecutive 32 bit | ||
686 | * handler calls and merge the result afterwards. | ||
687 | * We do this in a little endian fashion regardless of the host's | ||
688 | * or guest's endianness, because the GIC is always LE and the rest of | ||
689 | * the code (vgic_reg_access) also puts it in a LE fashion already. | ||
690 | * At this point we have already identified the handle function, so | ||
691 | * range points to that one entry and offset is relative to this. | ||
692 | */ | ||
693 | static bool call_range_handler(struct kvm_vcpu *vcpu, | ||
694 | struct kvm_exit_mmio *mmio, | ||
695 | unsigned long offset, | ||
696 | const struct kvm_mmio_range *range) | ||
697 | { | ||
698 | u32 *data32 = (void *)mmio->data; | ||
699 | struct kvm_exit_mmio mmio32; | ||
700 | bool ret; | ||
701 | |||
702 | if (likely(mmio->len <= 4)) | ||
703 | return range->handle_mmio(vcpu, mmio, offset); | ||
704 | |||
705 | /* | ||
706 | * Any access bigger than 4 bytes (that we currently handle in KVM) | ||
707 | * is actually 8 bytes long, caused by a 64-bit access | ||
708 | */ | ||
709 | |||
710 | mmio32.len = 4; | ||
711 | mmio32.is_write = mmio->is_write; | ||
712 | mmio32.private = mmio->private; | ||
713 | |||
714 | mmio32.phys_addr = mmio->phys_addr + 4; | ||
715 | if (mmio->is_write) | ||
716 | *(u32 *)mmio32.data = data32[1]; | ||
717 | ret = range->handle_mmio(vcpu, &mmio32, offset + 4); | ||
718 | if (!mmio->is_write) | ||
719 | data32[1] = *(u32 *)mmio32.data; | ||
720 | |||
721 | mmio32.phys_addr = mmio->phys_addr; | ||
722 | if (mmio->is_write) | ||
723 | *(u32 *)mmio32.data = data32[0]; | ||
724 | ret |= range->handle_mmio(vcpu, &mmio32, offset); | ||
725 | if (!mmio->is_write) | ||
726 | data32[0] = *(u32 *)mmio32.data; | ||
727 | |||
728 | return ret; | ||
729 | } | ||
730 | |||
1036 | /** | 731 | /** |
1037 | * vgic_handle_mmio - handle an in-kernel MMIO access | 732 | * vgic_handle_mmio_range - handle an in-kernel MMIO access |
1038 | * @vcpu: pointer to the vcpu performing the access | 733 | * @vcpu: pointer to the vcpu performing the access |
1039 | * @run: pointer to the kvm_run structure | 734 | * @run: pointer to the kvm_run structure |
1040 | * @mmio: pointer to the data describing the access | 735 | * @mmio: pointer to the data describing the access |
736 | * @ranges: array of MMIO ranges in a given region | ||
737 | * @mmio_base: base address of that region | ||
1041 | * | 738 | * |
1042 | * returns true if the MMIO access has been performed in kernel space, | 739 | * returns true if the MMIO access could be performed |
1043 | * and false if it needs to be emulated in user space. | ||
1044 | */ | 740 | */ |
1045 | bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, | 741 | bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, |
1046 | struct kvm_exit_mmio *mmio) | 742 | struct kvm_exit_mmio *mmio, |
743 | const struct kvm_mmio_range *ranges, | ||
744 | unsigned long mmio_base) | ||
1047 | { | 745 | { |
1048 | const struct mmio_range *range; | 746 | const struct kvm_mmio_range *range; |
1049 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 747 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
1050 | unsigned long base = dist->vgic_dist_base; | ||
1051 | bool updated_state; | 748 | bool updated_state; |
1052 | unsigned long offset; | 749 | unsigned long offset; |
1053 | 750 | ||
1054 | if (!irqchip_in_kernel(vcpu->kvm) || | 751 | offset = mmio->phys_addr - mmio_base; |
1055 | mmio->phys_addr < base || | 752 | range = vgic_find_range(ranges, mmio, offset); |
1056 | (mmio->phys_addr + mmio->len) > (base + KVM_VGIC_V2_DIST_SIZE)) | ||
1057 | return false; | ||
1058 | |||
1059 | /* We don't support ldrd / strd or ldm / stm to the emulated vgic */ | ||
1060 | if (mmio->len > 4) { | ||
1061 | kvm_inject_dabt(vcpu, mmio->phys_addr); | ||
1062 | return true; | ||
1063 | } | ||
1064 | |||
1065 | offset = mmio->phys_addr - base; | ||
1066 | range = find_matching_range(vgic_dist_ranges, mmio, offset); | ||
1067 | if (unlikely(!range || !range->handle_mmio)) { | 753 | if (unlikely(!range || !range->handle_mmio)) { |
1068 | pr_warn("Unhandled access %d %08llx %d\n", | 754 | pr_warn("Unhandled access %d %08llx %d\n", |
1069 | mmio->is_write, mmio->phys_addr, mmio->len); | 755 | mmio->is_write, mmio->phys_addr, mmio->len); |
@@ -1071,12 +757,12 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, | |||
1071 | } | 757 | } |
1072 | 758 | ||
1073 | spin_lock(&vcpu->kvm->arch.vgic.lock); | 759 | spin_lock(&vcpu->kvm->arch.vgic.lock); |
1074 | offset = mmio->phys_addr - range->base - base; | 760 | offset -= range->base; |
1075 | if (vgic_validate_access(dist, range, offset)) { | 761 | if (vgic_validate_access(dist, range, offset)) { |
1076 | updated_state = range->handle_mmio(vcpu, mmio, offset); | 762 | updated_state = call_range_handler(vcpu, mmio, offset, range); |
1077 | } else { | 763 | } else { |
1078 | vgic_reg_access(mmio, NULL, offset, | 764 | if (!mmio->is_write) |
1079 | ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); | 765 | memset(mmio->data, 0, mmio->len); |
1080 | updated_state = false; | 766 | updated_state = false; |
1081 | } | 767 | } |
1082 | spin_unlock(&vcpu->kvm->arch.vgic.lock); | 768 | spin_unlock(&vcpu->kvm->arch.vgic.lock); |
@@ -1089,50 +775,28 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, | |||
1089 | return true; | 775 | return true; |
1090 | } | 776 | } |
1091 | 777 | ||
1092 | static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi) | 778 | /** |
1093 | { | 779 | * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation |
1094 | return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi; | 780 | * @vcpu: pointer to the vcpu performing the access |
1095 | } | 781 | * @run: pointer to the kvm_run structure |
1096 | 782 | * @mmio: pointer to the data describing the access | |
1097 | static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) | 783 | * |
784 | * returns true if the MMIO access has been performed in kernel space, | ||
785 | * and false if it needs to be emulated in user space. | ||
786 | * Calls the actual handling routine for the selected VGIC model. | ||
787 | */ | ||
788 | bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, | ||
789 | struct kvm_exit_mmio *mmio) | ||
1098 | { | 790 | { |
1099 | struct kvm *kvm = vcpu->kvm; | 791 | if (!irqchip_in_kernel(vcpu->kvm)) |
1100 | struct vgic_dist *dist = &kvm->arch.vgic; | 792 | return false; |
1101 | int nrcpus = atomic_read(&kvm->online_vcpus); | ||
1102 | u8 target_cpus; | ||
1103 | int sgi, mode, c, vcpu_id; | ||
1104 | |||
1105 | vcpu_id = vcpu->vcpu_id; | ||
1106 | |||
1107 | sgi = reg & 0xf; | ||
1108 | target_cpus = (reg >> 16) & 0xff; | ||
1109 | mode = (reg >> 24) & 3; | ||
1110 | |||
1111 | switch (mode) { | ||
1112 | case 0: | ||
1113 | if (!target_cpus) | ||
1114 | return; | ||
1115 | break; | ||
1116 | |||
1117 | case 1: | ||
1118 | target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff; | ||
1119 | break; | ||
1120 | |||
1121 | case 2: | ||
1122 | target_cpus = 1 << vcpu_id; | ||
1123 | break; | ||
1124 | } | ||
1125 | |||
1126 | kvm_for_each_vcpu(c, vcpu, kvm) { | ||
1127 | if (target_cpus & 1) { | ||
1128 | /* Flag the SGI as pending */ | ||
1129 | vgic_dist_irq_set_pending(vcpu, sgi); | ||
1130 | *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id; | ||
1131 | kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); | ||
1132 | } | ||
1133 | 793 | ||
1134 | target_cpus >>= 1; | 794 | /* |
1135 | } | 795 | * This will currently call either vgic_v2_handle_mmio() or |
796 | * vgic_v3_handle_mmio(), which in turn will call | ||
797 | * vgic_handle_mmio_range() defined above. | ||
798 | */ | ||
799 | return vcpu->kvm->arch.vgic.vm_ops.handle_mmio(vcpu, run, mmio); | ||
1136 | } | 800 | } |
1137 | 801 | ||
1138 | static int vgic_nr_shared_irqs(struct vgic_dist *dist) | 802 | static int vgic_nr_shared_irqs(struct vgic_dist *dist) |
@@ -1173,7 +837,7 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) | |||
1173 | * Update the interrupt state and determine which CPUs have pending | 837 | * Update the interrupt state and determine which CPUs have pending |
1174 | * interrupts. Must be called with distributor lock held. | 838 | * interrupts. Must be called with distributor lock held. |
1175 | */ | 839 | */ |
1176 | static void vgic_update_state(struct kvm *kvm) | 840 | void vgic_update_state(struct kvm *kvm) |
1177 | { | 841 | { |
1178 | struct vgic_dist *dist = &kvm->arch.vgic; | 842 | struct vgic_dist *dist = &kvm->arch.vgic; |
1179 | struct kvm_vcpu *vcpu; | 843 | struct kvm_vcpu *vcpu; |
@@ -1234,12 +898,12 @@ static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu) | |||
1234 | vgic_ops->disable_underflow(vcpu); | 898 | vgic_ops->disable_underflow(vcpu); |
1235 | } | 899 | } |
1236 | 900 | ||
1237 | static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) | 901 | void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) |
1238 | { | 902 | { |
1239 | vgic_ops->get_vmcr(vcpu, vmcr); | 903 | vgic_ops->get_vmcr(vcpu, vmcr); |
1240 | } | 904 | } |
1241 | 905 | ||
1242 | static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) | 906 | void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) |
1243 | { | 907 | { |
1244 | vgic_ops->set_vmcr(vcpu, vmcr); | 908 | vgic_ops->set_vmcr(vcpu, vmcr); |
1245 | } | 909 | } |
@@ -1288,8 +952,9 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) | |||
1288 | /* | 952 | /* |
1289 | * Queue an interrupt to a CPU virtual interface. Return true on success, | 953 | * Queue an interrupt to a CPU virtual interface. Return true on success, |
1290 | * or false if it wasn't possible to queue it. | 954 | * or false if it wasn't possible to queue it. |
955 | * sgi_source must be zero for any non-SGI interrupts. | ||
1291 | */ | 956 | */ |
1292 | static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) | 957 | bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) |
1293 | { | 958 | { |
1294 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | 959 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; |
1295 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 960 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
@@ -1338,37 +1003,6 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) | |||
1338 | return true; | 1003 | return true; |
1339 | } | 1004 | } |
1340 | 1005 | ||
1341 | static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) | ||
1342 | { | ||
1343 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
1344 | unsigned long sources; | ||
1345 | int vcpu_id = vcpu->vcpu_id; | ||
1346 | int c; | ||
1347 | |||
1348 | sources = *vgic_get_sgi_sources(dist, vcpu_id, irq); | ||
1349 | |||
1350 | for_each_set_bit(c, &sources, dist->nr_cpus) { | ||
1351 | if (vgic_queue_irq(vcpu, c, irq)) | ||
1352 | clear_bit(c, &sources); | ||
1353 | } | ||
1354 | |||
1355 | *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources; | ||
1356 | |||
1357 | /* | ||
1358 | * If the sources bitmap has been cleared it means that we | ||
1359 | * could queue all the SGIs onto link registers (see the | ||
1360 | * clear_bit above), and therefore we are done with them in | ||
1361 | * our emulated gic and can get rid of them. | ||
1362 | */ | ||
1363 | if (!sources) { | ||
1364 | vgic_dist_irq_clear_pending(vcpu, irq); | ||
1365 | vgic_cpu_irq_clear(vcpu, irq); | ||
1366 | return true; | ||
1367 | } | ||
1368 | |||
1369 | return false; | ||
1370 | } | ||
1371 | |||
1372 | static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) | 1006 | static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) |
1373 | { | 1007 | { |
1374 | if (!vgic_can_sample_irq(vcpu, irq)) | 1008 | if (!vgic_can_sample_irq(vcpu, irq)) |
@@ -1413,7 +1047,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) | |||
1413 | 1047 | ||
1414 | /* SGIs */ | 1048 | /* SGIs */ |
1415 | for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) { | 1049 | for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) { |
1416 | if (!vgic_queue_sgi(vcpu, i)) | 1050 | if (!queue_sgi(vcpu, i)) |
1417 | overflow = 1; | 1051 | overflow = 1; |
1418 | } | 1052 | } |
1419 | 1053 | ||
@@ -1575,7 +1209,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) | |||
1575 | return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); | 1209 | return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); |
1576 | } | 1210 | } |
1577 | 1211 | ||
1578 | static void vgic_kick_vcpus(struct kvm *kvm) | 1212 | void vgic_kick_vcpus(struct kvm *kvm) |
1579 | { | 1213 | { |
1580 | struct kvm_vcpu *vcpu; | 1214 | struct kvm_vcpu *vcpu; |
1581 | int c; | 1215 | int c; |
@@ -1615,7 +1249,7 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, | |||
1615 | struct kvm_vcpu *vcpu; | 1249 | struct kvm_vcpu *vcpu; |
1616 | int edge_triggered, level_triggered; | 1250 | int edge_triggered, level_triggered; |
1617 | int enabled; | 1251 | int enabled; |
1618 | bool ret = true; | 1252 | bool ret = true, can_inject = true; |
1619 | 1253 | ||
1620 | spin_lock(&dist->lock); | 1254 | spin_lock(&dist->lock); |
1621 | 1255 | ||
@@ -1630,6 +1264,11 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, | |||
1630 | 1264 | ||
1631 | if (irq_num >= VGIC_NR_PRIVATE_IRQS) { | 1265 | if (irq_num >= VGIC_NR_PRIVATE_IRQS) { |
1632 | cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS]; | 1266 | cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS]; |
1267 | if (cpuid == VCPU_NOT_ALLOCATED) { | ||
1268 | /* Pretend we use CPU0, and prevent injection */ | ||
1269 | cpuid = 0; | ||
1270 | can_inject = false; | ||
1271 | } | ||
1633 | vcpu = kvm_get_vcpu(kvm, cpuid); | 1272 | vcpu = kvm_get_vcpu(kvm, cpuid); |
1634 | } | 1273 | } |
1635 | 1274 | ||
@@ -1652,7 +1291,7 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, | |||
1652 | 1291 | ||
1653 | enabled = vgic_irq_is_enabled(vcpu, irq_num); | 1292 | enabled = vgic_irq_is_enabled(vcpu, irq_num); |
1654 | 1293 | ||
1655 | if (!enabled) { | 1294 | if (!enabled || !can_inject) { |
1656 | ret = false; | 1295 | ret = false; |
1657 | goto out; | 1296 | goto out; |
1658 | } | 1297 | } |
@@ -1698,6 +1337,16 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, | |||
1698 | int vcpu_id; | 1337 | int vcpu_id; |
1699 | 1338 | ||
1700 | if (unlikely(!vgic_initialized(kvm))) { | 1339 | if (unlikely(!vgic_initialized(kvm))) { |
1340 | /* | ||
1341 | * We only provide the automatic initialization of the VGIC | ||
1342 | * for the legacy case of a GICv2. Any other type must | ||
1343 | * be explicitly initialized once setup with the respective | ||
1344 | * KVM device call. | ||
1345 | */ | ||
1346 | if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) { | ||
1347 | ret = -EBUSY; | ||
1348 | goto out; | ||
1349 | } | ||
1701 | mutex_lock(&kvm->lock); | 1350 | mutex_lock(&kvm->lock); |
1702 | ret = vgic_init(kvm); | 1351 | ret = vgic_init(kvm); |
1703 | mutex_unlock(&kvm->lock); | 1352 | mutex_unlock(&kvm->lock); |
@@ -1762,6 +1411,17 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) | |||
1762 | return 0; | 1411 | return 0; |
1763 | } | 1412 | } |
1764 | 1413 | ||
1414 | /** | ||
1415 | * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW | ||
1416 | * | ||
1417 | * The host's GIC naturally limits the maximum amount of VCPUs a guest | ||
1418 | * can use. | ||
1419 | */ | ||
1420 | int kvm_vgic_get_max_vcpus(void) | ||
1421 | { | ||
1422 | return vgic->max_gic_vcpus; | ||
1423 | } | ||
1424 | |||
1765 | void kvm_vgic_destroy(struct kvm *kvm) | 1425 | void kvm_vgic_destroy(struct kvm *kvm) |
1766 | { | 1426 | { |
1767 | struct vgic_dist *dist = &kvm->arch.vgic; | 1427 | struct vgic_dist *dist = &kvm->arch.vgic; |
@@ -1784,6 +1444,7 @@ void kvm_vgic_destroy(struct kvm *kvm) | |||
1784 | } | 1444 | } |
1785 | kfree(dist->irq_sgi_sources); | 1445 | kfree(dist->irq_sgi_sources); |
1786 | kfree(dist->irq_spi_cpu); | 1446 | kfree(dist->irq_spi_cpu); |
1447 | kfree(dist->irq_spi_mpidr); | ||
1787 | kfree(dist->irq_spi_target); | 1448 | kfree(dist->irq_spi_target); |
1788 | kfree(dist->irq_pending_on_cpu); | 1449 | kfree(dist->irq_pending_on_cpu); |
1789 | dist->irq_sgi_sources = NULL; | 1450 | dist->irq_sgi_sources = NULL; |
@@ -1797,7 +1458,7 @@ void kvm_vgic_destroy(struct kvm *kvm) | |||
1797 | * Allocate and initialize the various data structures. Must be called | 1458 | * Allocate and initialize the various data structures. Must be called |
1798 | * with kvm->lock held! | 1459 | * with kvm->lock held! |
1799 | */ | 1460 | */ |
1800 | static int vgic_init(struct kvm *kvm) | 1461 | int vgic_init(struct kvm *kvm) |
1801 | { | 1462 | { |
1802 | struct vgic_dist *dist = &kvm->arch.vgic; | 1463 | struct vgic_dist *dist = &kvm->arch.vgic; |
1803 | struct kvm_vcpu *vcpu; | 1464 | struct kvm_vcpu *vcpu; |
@@ -1809,7 +1470,7 @@ static int vgic_init(struct kvm *kvm) | |||
1809 | 1470 | ||
1810 | nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus); | 1471 | nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus); |
1811 | if (!nr_cpus) /* No vcpus? Can't be good... */ | 1472 | if (!nr_cpus) /* No vcpus? Can't be good... */ |
1812 | return -EINVAL; | 1473 | return -ENODEV; |
1813 | 1474 | ||
1814 | /* | 1475 | /* |
1815 | * If nobody configured the number of interrupts, use the | 1476 | * If nobody configured the number of interrupts, use the |
@@ -1852,8 +1513,9 @@ static int vgic_init(struct kvm *kvm) | |||
1852 | if (ret) | 1513 | if (ret) |
1853 | goto out; | 1514 | goto out; |
1854 | 1515 | ||
1855 | for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4) | 1516 | ret = kvm->arch.vgic.vm_ops.init_model(kvm); |
1856 | vgic_set_target_reg(kvm, 0, i); | 1517 | if (ret) |
1518 | goto out; | ||
1857 | 1519 | ||
1858 | kvm_for_each_vcpu(vcpu_id, vcpu, kvm) { | 1520 | kvm_for_each_vcpu(vcpu_id, vcpu, kvm) { |
1859 | ret = vgic_vcpu_init_maps(vcpu, nr_irqs); | 1521 | ret = vgic_vcpu_init_maps(vcpu, nr_irqs); |
@@ -1882,72 +1544,49 @@ out: | |||
1882 | return ret; | 1544 | return ret; |
1883 | } | 1545 | } |
1884 | 1546 | ||
1885 | /** | 1547 | static int init_vgic_model(struct kvm *kvm, int type) |
1886 | * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs | ||
1887 | * @kvm: pointer to the kvm struct | ||
1888 | * | ||
1889 | * Map the virtual CPU interface into the VM before running any VCPUs. We | ||
1890 | * can't do this at creation time, because user space must first set the | ||
1891 | * virtual CPU interface address in the guest physical address space. | ||
1892 | */ | ||
1893 | int kvm_vgic_map_resources(struct kvm *kvm) | ||
1894 | { | 1548 | { |
1895 | int ret = 0; | 1549 | switch (type) { |
1896 | 1550 | case KVM_DEV_TYPE_ARM_VGIC_V2: | |
1897 | if (!irqchip_in_kernel(kvm)) | 1551 | vgic_v2_init_emulation(kvm); |
1898 | return 0; | 1552 | break; |
1899 | 1553 | #ifdef CONFIG_ARM_GIC_V3 | |
1900 | mutex_lock(&kvm->lock); | 1554 | case KVM_DEV_TYPE_ARM_VGIC_V3: |
1901 | 1555 | vgic_v3_init_emulation(kvm); | |
1902 | if (vgic_ready(kvm)) | 1556 | break; |
1903 | goto out; | 1557 | #endif |
1904 | 1558 | default: | |
1905 | if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || | 1559 | return -ENODEV; |
1906 | IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) { | ||
1907 | kvm_err("Need to set vgic cpu and dist addresses first\n"); | ||
1908 | ret = -ENXIO; | ||
1909 | goto out; | ||
1910 | } | ||
1911 | |||
1912 | /* | ||
1913 | * Initialize the vgic if this hasn't already been done on demand by | ||
1914 | * accessing the vgic state from userspace. | ||
1915 | */ | ||
1916 | ret = vgic_init(kvm); | ||
1917 | if (ret) { | ||
1918 | kvm_err("Unable to allocate maps\n"); | ||
1919 | goto out; | ||
1920 | } | 1560 | } |
1921 | 1561 | ||
1922 | ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, | 1562 | if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus) |
1923 | vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE, | 1563 | return -E2BIG; |
1924 | true); | ||
1925 | if (ret) { | ||
1926 | kvm_err("Unable to remap VGIC CPU to VCPU\n"); | ||
1927 | goto out; | ||
1928 | } | ||
1929 | 1564 | ||
1930 | kvm->arch.vgic.ready = true; | 1565 | return 0; |
1931 | out: | ||
1932 | if (ret) | ||
1933 | kvm_vgic_destroy(kvm); | ||
1934 | mutex_unlock(&kvm->lock); | ||
1935 | return ret; | ||
1936 | } | 1566 | } |
1937 | 1567 | ||
1938 | int kvm_vgic_create(struct kvm *kvm) | 1568 | int kvm_vgic_create(struct kvm *kvm, u32 type) |
1939 | { | 1569 | { |
1940 | int i, vcpu_lock_idx = -1, ret; | 1570 | int i, vcpu_lock_idx = -1, ret; |
1941 | struct kvm_vcpu *vcpu; | 1571 | struct kvm_vcpu *vcpu; |
1942 | 1572 | ||
1943 | mutex_lock(&kvm->lock); | 1573 | mutex_lock(&kvm->lock); |
1944 | 1574 | ||
1945 | if (kvm->arch.vgic.vctrl_base) { | 1575 | if (irqchip_in_kernel(kvm)) { |
1946 | ret = -EEXIST; | 1576 | ret = -EEXIST; |
1947 | goto out; | 1577 | goto out; |
1948 | } | 1578 | } |
1949 | 1579 | ||
1950 | /* | 1580 | /* |
1581 | * This function is also called by the KVM_CREATE_IRQCHIP handler, | ||
1582 | * which had no chance yet to check the availability of the GICv2 | ||
1583 | * emulation. So check this here again. KVM_CREATE_DEVICE does | ||
1584 | * the proper checks already. | ||
1585 | */ | ||
1586 | if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2) | ||
1587 | return -ENODEV; | ||
1588 | |||
1589 | /* | ||
1951 | * Any time a vcpu is run, vcpu_load is called which tries to grab the | 1590 | * Any time a vcpu is run, vcpu_load is called which tries to grab the |
1952 | * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure | 1591 | * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure |
1953 | * that no other VCPUs are run while we create the vgic. | 1592 | * that no other VCPUs are run while we create the vgic. |
@@ -1965,11 +1604,17 @@ int kvm_vgic_create(struct kvm *kvm) | |||
1965 | } | 1604 | } |
1966 | ret = 0; | 1605 | ret = 0; |
1967 | 1606 | ||
1607 | ret = init_vgic_model(kvm, type); | ||
1608 | if (ret) | ||
1609 | goto out_unlock; | ||
1610 | |||
1968 | spin_lock_init(&kvm->arch.vgic.lock); | 1611 | spin_lock_init(&kvm->arch.vgic.lock); |
1969 | kvm->arch.vgic.in_kernel = true; | 1612 | kvm->arch.vgic.in_kernel = true; |
1613 | kvm->arch.vgic.vgic_model = type; | ||
1970 | kvm->arch.vgic.vctrl_base = vgic->vctrl_base; | 1614 | kvm->arch.vgic.vctrl_base = vgic->vctrl_base; |
1971 | kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; | 1615 | kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; |
1972 | kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; | 1616 | kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; |
1617 | kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF; | ||
1973 | 1618 | ||
1974 | out_unlock: | 1619 | out_unlock: |
1975 | for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { | 1620 | for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { |
@@ -2022,7 +1667,7 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, | |||
2022 | /** | 1667 | /** |
2023 | * kvm_vgic_addr - set or get vgic VM base addresses | 1668 | * kvm_vgic_addr - set or get vgic VM base addresses |
2024 | * @kvm: pointer to the vm struct | 1669 | * @kvm: pointer to the vm struct |
2025 | * @type: the VGIC addr type, one of KVM_VGIC_V2_ADDR_TYPE_XXX | 1670 | * @type: the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX |
2026 | * @addr: pointer to address value | 1671 | * @addr: pointer to address value |
2027 | * @write: if true set the address in the VM address space, if false read the | 1672 | * @write: if true set the address in the VM address space, if false read the |
2028 | * address | 1673 | * address |
@@ -2036,216 +1681,64 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) | |||
2036 | { | 1681 | { |
2037 | int r = 0; | 1682 | int r = 0; |
2038 | struct vgic_dist *vgic = &kvm->arch.vgic; | 1683 | struct vgic_dist *vgic = &kvm->arch.vgic; |
1684 | int type_needed; | ||
1685 | phys_addr_t *addr_ptr, block_size; | ||
1686 | phys_addr_t alignment; | ||
2039 | 1687 | ||
2040 | mutex_lock(&kvm->lock); | 1688 | mutex_lock(&kvm->lock); |
2041 | switch (type) { | 1689 | switch (type) { |
2042 | case KVM_VGIC_V2_ADDR_TYPE_DIST: | 1690 | case KVM_VGIC_V2_ADDR_TYPE_DIST: |
2043 | if (write) { | 1691 | type_needed = KVM_DEV_TYPE_ARM_VGIC_V2; |
2044 | r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base, | 1692 | addr_ptr = &vgic->vgic_dist_base; |
2045 | *addr, KVM_VGIC_V2_DIST_SIZE); | 1693 | block_size = KVM_VGIC_V2_DIST_SIZE; |
2046 | } else { | 1694 | alignment = SZ_4K; |
2047 | *addr = vgic->vgic_dist_base; | ||
2048 | } | ||
2049 | break; | 1695 | break; |
2050 | case KVM_VGIC_V2_ADDR_TYPE_CPU: | 1696 | case KVM_VGIC_V2_ADDR_TYPE_CPU: |
2051 | if (write) { | 1697 | type_needed = KVM_DEV_TYPE_ARM_VGIC_V2; |
2052 | r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base, | 1698 | addr_ptr = &vgic->vgic_cpu_base; |
2053 | *addr, KVM_VGIC_V2_CPU_SIZE); | 1699 | block_size = KVM_VGIC_V2_CPU_SIZE; |
2054 | } else { | 1700 | alignment = SZ_4K; |
2055 | *addr = vgic->vgic_cpu_base; | ||
2056 | } | ||
2057 | break; | 1701 | break; |
2058 | default: | 1702 | #ifdef CONFIG_ARM_GIC_V3 |
2059 | r = -ENODEV; | 1703 | case KVM_VGIC_V3_ADDR_TYPE_DIST: |
2060 | } | 1704 | type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; |
2061 | 1705 | addr_ptr = &vgic->vgic_dist_base; | |
2062 | mutex_unlock(&kvm->lock); | 1706 | block_size = KVM_VGIC_V3_DIST_SIZE; |
2063 | return r; | 1707 | alignment = SZ_64K; |
2064 | } | ||
2065 | |||
2066 | static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu, | ||
2067 | struct kvm_exit_mmio *mmio, phys_addr_t offset) | ||
2068 | { | ||
2069 | bool updated = false; | ||
2070 | struct vgic_vmcr vmcr; | ||
2071 | u32 *vmcr_field; | ||
2072 | u32 reg; | ||
2073 | |||
2074 | vgic_get_vmcr(vcpu, &vmcr); | ||
2075 | |||
2076 | switch (offset & ~0x3) { | ||
2077 | case GIC_CPU_CTRL: | ||
2078 | vmcr_field = &vmcr.ctlr; | ||
2079 | break; | ||
2080 | case GIC_CPU_PRIMASK: | ||
2081 | vmcr_field = &vmcr.pmr; | ||
2082 | break; | 1708 | break; |
2083 | case GIC_CPU_BINPOINT: | 1709 | case KVM_VGIC_V3_ADDR_TYPE_REDIST: |
2084 | vmcr_field = &vmcr.bpr; | 1710 | type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; |
2085 | break; | 1711 | addr_ptr = &vgic->vgic_redist_base; |
2086 | case GIC_CPU_ALIAS_BINPOINT: | 1712 | block_size = KVM_VGIC_V3_REDIST_SIZE; |
2087 | vmcr_field = &vmcr.abpr; | 1713 | alignment = SZ_64K; |
2088 | break; | 1714 | break; |
1715 | #endif | ||
2089 | default: | 1716 | default: |
2090 | BUG(); | 1717 | r = -ENODEV; |
2091 | } | ||
2092 | |||
2093 | if (!mmio->is_write) { | ||
2094 | reg = *vmcr_field; | ||
2095 | mmio_data_write(mmio, ~0, reg); | ||
2096 | } else { | ||
2097 | reg = mmio_data_read(mmio, ~0); | ||
2098 | if (reg != *vmcr_field) { | ||
2099 | *vmcr_field = reg; | ||
2100 | vgic_set_vmcr(vcpu, &vmcr); | ||
2101 | updated = true; | ||
2102 | } | ||
2103 | } | ||
2104 | return updated; | ||
2105 | } | ||
2106 | |||
2107 | static bool handle_mmio_abpr(struct kvm_vcpu *vcpu, | ||
2108 | struct kvm_exit_mmio *mmio, phys_addr_t offset) | ||
2109 | { | ||
2110 | return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT); | ||
2111 | } | ||
2112 | |||
2113 | static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu, | ||
2114 | struct kvm_exit_mmio *mmio, | ||
2115 | phys_addr_t offset) | ||
2116 | { | ||
2117 | u32 reg; | ||
2118 | |||
2119 | if (mmio->is_write) | ||
2120 | return false; | ||
2121 | |||
2122 | /* GICC_IIDR */ | ||
2123 | reg = (PRODUCT_ID_KVM << 20) | | ||
2124 | (GICC_ARCH_VERSION_V2 << 16) | | ||
2125 | (IMPLEMENTER_ARM << 0); | ||
2126 | mmio_data_write(mmio, ~0, reg); | ||
2127 | return false; | ||
2128 | } | ||
2129 | |||
2130 | /* | ||
2131 | * CPU Interface Register accesses - these are not accessed by the VM, but by | ||
2132 | * user space for saving and restoring VGIC state. | ||
2133 | */ | ||
2134 | static const struct mmio_range vgic_cpu_ranges[] = { | ||
2135 | { | ||
2136 | .base = GIC_CPU_CTRL, | ||
2137 | .len = 12, | ||
2138 | .handle_mmio = handle_cpu_mmio_misc, | ||
2139 | }, | ||
2140 | { | ||
2141 | .base = GIC_CPU_ALIAS_BINPOINT, | ||
2142 | .len = 4, | ||
2143 | .handle_mmio = handle_mmio_abpr, | ||
2144 | }, | ||
2145 | { | ||
2146 | .base = GIC_CPU_ACTIVEPRIO, | ||
2147 | .len = 16, | ||
2148 | .handle_mmio = handle_mmio_raz_wi, | ||
2149 | }, | ||
2150 | { | ||
2151 | .base = GIC_CPU_IDENT, | ||
2152 | .len = 4, | ||
2153 | .handle_mmio = handle_cpu_mmio_ident, | ||
2154 | }, | ||
2155 | }; | ||
2156 | |||
2157 | static int vgic_attr_regs_access(struct kvm_device *dev, | ||
2158 | struct kvm_device_attr *attr, | ||
2159 | u32 *reg, bool is_write) | ||
2160 | { | ||
2161 | const struct mmio_range *r = NULL, *ranges; | ||
2162 | phys_addr_t offset; | ||
2163 | int ret, cpuid, c; | ||
2164 | struct kvm_vcpu *vcpu, *tmp_vcpu; | ||
2165 | struct vgic_dist *vgic; | ||
2166 | struct kvm_exit_mmio mmio; | ||
2167 | |||
2168 | offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; | ||
2169 | cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> | ||
2170 | KVM_DEV_ARM_VGIC_CPUID_SHIFT; | ||
2171 | |||
2172 | mutex_lock(&dev->kvm->lock); | ||
2173 | |||
2174 | ret = vgic_init(dev->kvm); | ||
2175 | if (ret) | ||
2176 | goto out; | ||
2177 | |||
2178 | if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { | ||
2179 | ret = -EINVAL; | ||
2180 | goto out; | 1718 | goto out; |
2181 | } | 1719 | } |
2182 | 1720 | ||
2183 | vcpu = kvm_get_vcpu(dev->kvm, cpuid); | 1721 | if (vgic->vgic_model != type_needed) { |
2184 | vgic = &dev->kvm->arch.vgic; | 1722 | r = -ENODEV; |
2185 | |||
2186 | mmio.len = 4; | ||
2187 | mmio.is_write = is_write; | ||
2188 | if (is_write) | ||
2189 | mmio_data_write(&mmio, ~0, *reg); | ||
2190 | switch (attr->group) { | ||
2191 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
2192 | mmio.phys_addr = vgic->vgic_dist_base + offset; | ||
2193 | ranges = vgic_dist_ranges; | ||
2194 | break; | ||
2195 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: | ||
2196 | mmio.phys_addr = vgic->vgic_cpu_base + offset; | ||
2197 | ranges = vgic_cpu_ranges; | ||
2198 | break; | ||
2199 | default: | ||
2200 | BUG(); | ||
2201 | } | ||
2202 | r = find_matching_range(ranges, &mmio, offset); | ||
2203 | |||
2204 | if (unlikely(!r || !r->handle_mmio)) { | ||
2205 | ret = -ENXIO; | ||
2206 | goto out; | 1723 | goto out; |
2207 | } | 1724 | } |
2208 | 1725 | ||
2209 | 1726 | if (write) { | |
2210 | spin_lock(&vgic->lock); | 1727 | if (!IS_ALIGNED(*addr, alignment)) |
2211 | 1728 | r = -EINVAL; | |
2212 | /* | 1729 | else |
2213 | * Ensure that no other VCPU is running by checking the vcpu->cpu | 1730 | r = vgic_ioaddr_assign(kvm, addr_ptr, *addr, |
2214 | * field. If no other VPCUs are running we can safely access the VGIC | 1731 | block_size); |
2215 | * state, because even if another VPU is run after this point, that | 1732 | } else { |
2216 | * VCPU will not touch the vgic state, because it will block on | 1733 | *addr = *addr_ptr; |
2217 | * getting the vgic->lock in kvm_vgic_sync_hwstate(). | ||
2218 | */ | ||
2219 | kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) { | ||
2220 | if (unlikely(tmp_vcpu->cpu != -1)) { | ||
2221 | ret = -EBUSY; | ||
2222 | goto out_vgic_unlock; | ||
2223 | } | ||
2224 | } | 1734 | } |
2225 | 1735 | ||
2226 | /* | ||
2227 | * Move all pending IRQs from the LRs on all VCPUs so the pending | ||
2228 | * state can be properly represented in the register state accessible | ||
2229 | * through this API. | ||
2230 | */ | ||
2231 | kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) | ||
2232 | vgic_unqueue_irqs(tmp_vcpu); | ||
2233 | |||
2234 | offset -= r->base; | ||
2235 | r->handle_mmio(vcpu, &mmio, offset); | ||
2236 | |||
2237 | if (!is_write) | ||
2238 | *reg = mmio_data_read(&mmio, ~0); | ||
2239 | |||
2240 | ret = 0; | ||
2241 | out_vgic_unlock: | ||
2242 | spin_unlock(&vgic->lock); | ||
2243 | out: | 1736 | out: |
2244 | mutex_unlock(&dev->kvm->lock); | 1737 | mutex_unlock(&kvm->lock); |
2245 | return ret; | 1738 | return r; |
2246 | } | 1739 | } |
2247 | 1740 | ||
2248 | static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | 1741 | int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr) |
2249 | { | 1742 | { |
2250 | int r; | 1743 | int r; |
2251 | 1744 | ||
@@ -2261,17 +1754,6 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | |||
2261 | r = kvm_vgic_addr(dev->kvm, type, &addr, true); | 1754 | r = kvm_vgic_addr(dev->kvm, type, &addr, true); |
2262 | return (r == -ENODEV) ? -ENXIO : r; | 1755 | return (r == -ENODEV) ? -ENXIO : r; |
2263 | } | 1756 | } |
2264 | |||
2265 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
2266 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { | ||
2267 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; | ||
2268 | u32 reg; | ||
2269 | |||
2270 | if (get_user(reg, uaddr)) | ||
2271 | return -EFAULT; | ||
2272 | |||
2273 | return vgic_attr_regs_access(dev, attr, ®, true); | ||
2274 | } | ||
2275 | case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { | 1757 | case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { |
2276 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; | 1758 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; |
2277 | u32 val; | 1759 | u32 val; |
@@ -2302,13 +1784,20 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | |||
2302 | 1784 | ||
2303 | return ret; | 1785 | return ret; |
2304 | } | 1786 | } |
2305 | 1787 | case KVM_DEV_ARM_VGIC_GRP_CTRL: { | |
1788 | switch (attr->attr) { | ||
1789 | case KVM_DEV_ARM_VGIC_CTRL_INIT: | ||
1790 | r = vgic_init(dev->kvm); | ||
1791 | return r; | ||
1792 | } | ||
1793 | break; | ||
1794 | } | ||
2306 | } | 1795 | } |
2307 | 1796 | ||
2308 | return -ENXIO; | 1797 | return -ENXIO; |
2309 | } | 1798 | } |
2310 | 1799 | ||
2311 | static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | 1800 | int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr) |
2312 | { | 1801 | { |
2313 | int r = -ENXIO; | 1802 | int r = -ENXIO; |
2314 | 1803 | ||
@@ -2326,20 +1815,9 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | |||
2326 | return -EFAULT; | 1815 | return -EFAULT; |
2327 | break; | 1816 | break; |
2328 | } | 1817 | } |
2329 | |||
2330 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
2331 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { | ||
2332 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; | ||
2333 | u32 reg = 0; | ||
2334 | |||
2335 | r = vgic_attr_regs_access(dev, attr, ®, false); | ||
2336 | if (r) | ||
2337 | return r; | ||
2338 | r = put_user(reg, uaddr); | ||
2339 | break; | ||
2340 | } | ||
2341 | case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { | 1818 | case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { |
2342 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; | 1819 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; |
1820 | |||
2343 | r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr); | 1821 | r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr); |
2344 | break; | 1822 | break; |
2345 | } | 1823 | } |
@@ -2349,61 +1827,17 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | |||
2349 | return r; | 1827 | return r; |
2350 | } | 1828 | } |
2351 | 1829 | ||
2352 | static int vgic_has_attr_regs(const struct mmio_range *ranges, | 1830 | int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset) |
2353 | phys_addr_t offset) | ||
2354 | { | 1831 | { |
2355 | struct kvm_exit_mmio dev_attr_mmio; | 1832 | struct kvm_exit_mmio dev_attr_mmio; |
2356 | 1833 | ||
2357 | dev_attr_mmio.len = 4; | 1834 | dev_attr_mmio.len = 4; |
2358 | if (find_matching_range(ranges, &dev_attr_mmio, offset)) | 1835 | if (vgic_find_range(ranges, &dev_attr_mmio, offset)) |
2359 | return 0; | 1836 | return 0; |
2360 | else | 1837 | else |
2361 | return -ENXIO; | 1838 | return -ENXIO; |
2362 | } | 1839 | } |
2363 | 1840 | ||
2364 | static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | ||
2365 | { | ||
2366 | phys_addr_t offset; | ||
2367 | |||
2368 | switch (attr->group) { | ||
2369 | case KVM_DEV_ARM_VGIC_GRP_ADDR: | ||
2370 | switch (attr->attr) { | ||
2371 | case KVM_VGIC_V2_ADDR_TYPE_DIST: | ||
2372 | case KVM_VGIC_V2_ADDR_TYPE_CPU: | ||
2373 | return 0; | ||
2374 | } | ||
2375 | break; | ||
2376 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
2377 | offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; | ||
2378 | return vgic_has_attr_regs(vgic_dist_ranges, offset); | ||
2379 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: | ||
2380 | offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; | ||
2381 | return vgic_has_attr_regs(vgic_cpu_ranges, offset); | ||
2382 | case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: | ||
2383 | return 0; | ||
2384 | } | ||
2385 | return -ENXIO; | ||
2386 | } | ||
2387 | |||
2388 | static void vgic_destroy(struct kvm_device *dev) | ||
2389 | { | ||
2390 | kfree(dev); | ||
2391 | } | ||
2392 | |||
2393 | static int vgic_create(struct kvm_device *dev, u32 type) | ||
2394 | { | ||
2395 | return kvm_vgic_create(dev->kvm); | ||
2396 | } | ||
2397 | |||
2398 | static struct kvm_device_ops kvm_arm_vgic_v2_ops = { | ||
2399 | .name = "kvm-arm-vgic", | ||
2400 | .create = vgic_create, | ||
2401 | .destroy = vgic_destroy, | ||
2402 | .set_attr = vgic_set_attr, | ||
2403 | .get_attr = vgic_get_attr, | ||
2404 | .has_attr = vgic_has_attr, | ||
2405 | }; | ||
2406 | |||
2407 | static void vgic_init_maintenance_interrupt(void *info) | 1841 | static void vgic_init_maintenance_interrupt(void *info) |
2408 | { | 1842 | { |
2409 | enable_percpu_irq(vgic->maint_irq, 0); | 1843 | enable_percpu_irq(vgic->maint_irq, 0); |
@@ -2474,8 +1908,7 @@ int kvm_vgic_hyp_init(void) | |||
2474 | 1908 | ||
2475 | on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); | 1909 | on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); |
2476 | 1910 | ||
2477 | return kvm_register_device_ops(&kvm_arm_vgic_v2_ops, | 1911 | return 0; |
2478 | KVM_DEV_TYPE_ARM_VGIC_V2); | ||
2479 | 1912 | ||
2480 | out_free_irq: | 1913 | out_free_irq: |
2481 | free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); | 1914 | free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); |
diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h new file mode 100644 index 000000000000..1e83bdf5f499 --- /dev/null +++ b/virt/kvm/arm/vgic.h | |||
@@ -0,0 +1,123 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2012-2014 ARM Ltd. | ||
3 | * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
4 | * | ||
5 | * Derived from virt/kvm/arm/vgic.c | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
18 | */ | ||
19 | |||
20 | #ifndef __KVM_VGIC_H__ | ||
21 | #define __KVM_VGIC_H__ | ||
22 | |||
23 | #define VGIC_ADDR_UNDEF (-1) | ||
24 | #define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) | ||
25 | |||
26 | #define PRODUCT_ID_KVM 0x4b /* ASCII code K */ | ||
27 | #define IMPLEMENTER_ARM 0x43b | ||
28 | |||
29 | #define ACCESS_READ_VALUE (1 << 0) | ||
30 | #define ACCESS_READ_RAZ (0 << 0) | ||
31 | #define ACCESS_READ_MASK(x) ((x) & (1 << 0)) | ||
32 | #define ACCESS_WRITE_IGNORED (0 << 1) | ||
33 | #define ACCESS_WRITE_SETBIT (1 << 1) | ||
34 | #define ACCESS_WRITE_CLEARBIT (2 << 1) | ||
35 | #define ACCESS_WRITE_VALUE (3 << 1) | ||
36 | #define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) | ||
37 | |||
38 | #define VCPU_NOT_ALLOCATED ((u8)-1) | ||
39 | |||
40 | unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x); | ||
41 | |||
42 | void vgic_update_state(struct kvm *kvm); | ||
43 | int vgic_init_common_maps(struct kvm *kvm); | ||
44 | |||
45 | u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset); | ||
46 | u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset); | ||
47 | |||
48 | void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq); | ||
49 | void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq); | ||
50 | void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq); | ||
51 | void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, | ||
52 | int irq, int val); | ||
53 | |||
54 | void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); | ||
55 | void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); | ||
56 | |||
57 | bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq); | ||
58 | void vgic_unqueue_irqs(struct kvm_vcpu *vcpu); | ||
59 | |||
60 | void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, | ||
61 | phys_addr_t offset, int mode); | ||
62 | bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, | ||
63 | phys_addr_t offset); | ||
64 | |||
65 | static inline | ||
66 | u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) | ||
67 | { | ||
68 | return le32_to_cpu(*((u32 *)mmio->data)) & mask; | ||
69 | } | ||
70 | |||
71 | static inline | ||
72 | void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) | ||
73 | { | ||
74 | *((u32 *)mmio->data) = cpu_to_le32(value) & mask; | ||
75 | } | ||
76 | |||
77 | struct kvm_mmio_range { | ||
78 | phys_addr_t base; | ||
79 | unsigned long len; | ||
80 | int bits_per_irq; | ||
81 | bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, | ||
82 | phys_addr_t offset); | ||
83 | }; | ||
84 | |||
85 | static inline bool is_in_range(phys_addr_t addr, unsigned long len, | ||
86 | phys_addr_t baseaddr, unsigned long size) | ||
87 | { | ||
88 | return (addr >= baseaddr) && (addr + len <= baseaddr + size); | ||
89 | } | ||
90 | |||
91 | const | ||
92 | struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges, | ||
93 | struct kvm_exit_mmio *mmio, | ||
94 | phys_addr_t offset); | ||
95 | |||
96 | bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, | ||
97 | struct kvm_exit_mmio *mmio, | ||
98 | const struct kvm_mmio_range *ranges, | ||
99 | unsigned long mmio_base); | ||
100 | |||
101 | bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, | ||
102 | phys_addr_t offset, int vcpu_id, int access); | ||
103 | |||
104 | bool vgic_handle_set_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, | ||
105 | phys_addr_t offset, int vcpu_id); | ||
106 | |||
107 | bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, | ||
108 | phys_addr_t offset, int vcpu_id); | ||
109 | |||
110 | bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, | ||
111 | phys_addr_t offset); | ||
112 | |||
113 | void vgic_kick_vcpus(struct kvm *kvm); | ||
114 | |||
115 | int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset); | ||
116 | int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr); | ||
117 | int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr); | ||
118 | |||
119 | int vgic_init(struct kvm *kvm); | ||
120 | void vgic_v2_init_emulation(struct kvm *kvm); | ||
121 | void vgic_v3_init_emulation(struct kvm *kvm); | ||
122 | |||
123 | #endif | ||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 458b9b14b15c..a1093700f3a4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -66,6 +66,9 @@ | |||
66 | MODULE_AUTHOR("Qumranet"); | 66 | MODULE_AUTHOR("Qumranet"); |
67 | MODULE_LICENSE("GPL"); | 67 | MODULE_LICENSE("GPL"); |
68 | 68 | ||
69 | unsigned int halt_poll_ns = 0; | ||
70 | module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); | ||
71 | |||
69 | /* | 72 | /* |
70 | * Ordering of locks: | 73 | * Ordering of locks: |
71 | * | 74 | * |
@@ -89,7 +92,7 @@ struct dentry *kvm_debugfs_dir; | |||
89 | 92 | ||
90 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, | 93 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, |
91 | unsigned long arg); | 94 | unsigned long arg); |
92 | #ifdef CONFIG_COMPAT | 95 | #ifdef CONFIG_KVM_COMPAT |
93 | static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl, | 96 | static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl, |
94 | unsigned long arg); | 97 | unsigned long arg); |
95 | #endif | 98 | #endif |
@@ -176,6 +179,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) | |||
176 | return called; | 179 | return called; |
177 | } | 180 | } |
178 | 181 | ||
182 | #ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL | ||
179 | void kvm_flush_remote_tlbs(struct kvm *kvm) | 183 | void kvm_flush_remote_tlbs(struct kvm *kvm) |
180 | { | 184 | { |
181 | long dirty_count = kvm->tlbs_dirty; | 185 | long dirty_count = kvm->tlbs_dirty; |
@@ -186,6 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) | |||
186 | cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); | 190 | cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); |
187 | } | 191 | } |
188 | EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); | 192 | EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); |
193 | #endif | ||
189 | 194 | ||
190 | void kvm_reload_remote_mmus(struct kvm *kvm) | 195 | void kvm_reload_remote_mmus(struct kvm *kvm) |
191 | { | 196 | { |
@@ -673,6 +678,7 @@ static void update_memslots(struct kvm_memslots *slots, | |||
673 | if (!new->npages) { | 678 | if (!new->npages) { |
674 | WARN_ON(!mslots[i].npages); | 679 | WARN_ON(!mslots[i].npages); |
675 | new->base_gfn = 0; | 680 | new->base_gfn = 0; |
681 | new->flags = 0; | ||
676 | if (mslots[i].npages) | 682 | if (mslots[i].npages) |
677 | slots->used_slots--; | 683 | slots->used_slots--; |
678 | } else { | 684 | } else { |
@@ -993,6 +999,86 @@ out: | |||
993 | } | 999 | } |
994 | EXPORT_SYMBOL_GPL(kvm_get_dirty_log); | 1000 | EXPORT_SYMBOL_GPL(kvm_get_dirty_log); |
995 | 1001 | ||
1002 | #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT | ||
1003 | /** | ||
1004 | * kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages | ||
1005 | * are dirty write protect them for next write. | ||
1006 | * @kvm: pointer to kvm instance | ||
1007 | * @log: slot id and address to which we copy the log | ||
1008 | * @is_dirty: flag set if any page is dirty | ||
1009 | * | ||
1010 | * We need to keep it in mind that VCPU threads can write to the bitmap | ||
1011 | * concurrently. So, to avoid losing track of dirty pages we keep the | ||
1012 | * following order: | ||
1013 | * | ||
1014 | * 1. Take a snapshot of the bit and clear it if needed. | ||
1015 | * 2. Write protect the corresponding page. | ||
1016 | * 3. Copy the snapshot to the userspace. | ||
1017 | * 4. Upon return caller flushes TLB's if needed. | ||
1018 | * | ||
1019 | * Between 2 and 4, the guest may write to the page using the remaining TLB | ||
1020 | * entry. This is not a problem because the page is reported dirty using | ||
1021 | * the snapshot taken before and step 4 ensures that writes done after | ||
1022 | * exiting to userspace will be logged for the next call. | ||
1023 | * | ||
1024 | */ | ||
1025 | int kvm_get_dirty_log_protect(struct kvm *kvm, | ||
1026 | struct kvm_dirty_log *log, bool *is_dirty) | ||
1027 | { | ||
1028 | struct kvm_memory_slot *memslot; | ||
1029 | int r, i; | ||
1030 | unsigned long n; | ||
1031 | unsigned long *dirty_bitmap; | ||
1032 | unsigned long *dirty_bitmap_buffer; | ||
1033 | |||
1034 | r = -EINVAL; | ||
1035 | if (log->slot >= KVM_USER_MEM_SLOTS) | ||
1036 | goto out; | ||
1037 | |||
1038 | memslot = id_to_memslot(kvm->memslots, log->slot); | ||
1039 | |||
1040 | dirty_bitmap = memslot->dirty_bitmap; | ||
1041 | r = -ENOENT; | ||
1042 | if (!dirty_bitmap) | ||
1043 | goto out; | ||
1044 | |||
1045 | n = kvm_dirty_bitmap_bytes(memslot); | ||
1046 | |||
1047 | dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long); | ||
1048 | memset(dirty_bitmap_buffer, 0, n); | ||
1049 | |||
1050 | spin_lock(&kvm->mmu_lock); | ||
1051 | *is_dirty = false; | ||
1052 | for (i = 0; i < n / sizeof(long); i++) { | ||
1053 | unsigned long mask; | ||
1054 | gfn_t offset; | ||
1055 | |||
1056 | if (!dirty_bitmap[i]) | ||
1057 | continue; | ||
1058 | |||
1059 | *is_dirty = true; | ||
1060 | |||
1061 | mask = xchg(&dirty_bitmap[i], 0); | ||
1062 | dirty_bitmap_buffer[i] = mask; | ||
1063 | |||
1064 | offset = i * BITS_PER_LONG; | ||
1065 | kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, | ||
1066 | mask); | ||
1067 | } | ||
1068 | |||
1069 | spin_unlock(&kvm->mmu_lock); | ||
1070 | |||
1071 | r = -EFAULT; | ||
1072 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) | ||
1073 | goto out; | ||
1074 | |||
1075 | r = 0; | ||
1076 | out: | ||
1077 | return r; | ||
1078 | } | ||
1079 | EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect); | ||
1080 | #endif | ||
1081 | |||
996 | bool kvm_largepages_enabled(void) | 1082 | bool kvm_largepages_enabled(void) |
997 | { | 1083 | { |
998 | return largepages_enabled; | 1084 | return largepages_enabled; |
@@ -1551,6 +1637,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, | |||
1551 | } | 1637 | } |
1552 | return 0; | 1638 | return 0; |
1553 | } | 1639 | } |
1640 | EXPORT_SYMBOL_GPL(kvm_write_guest); | ||
1554 | 1641 | ||
1555 | int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | 1642 | int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, |
1556 | gpa_t gpa, unsigned long len) | 1643 | gpa_t gpa, unsigned long len) |
@@ -1687,29 +1774,60 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | |||
1687 | } | 1774 | } |
1688 | EXPORT_SYMBOL_GPL(mark_page_dirty); | 1775 | EXPORT_SYMBOL_GPL(mark_page_dirty); |
1689 | 1776 | ||
1777 | static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu) | ||
1778 | { | ||
1779 | if (kvm_arch_vcpu_runnable(vcpu)) { | ||
1780 | kvm_make_request(KVM_REQ_UNHALT, vcpu); | ||
1781 | return -EINTR; | ||
1782 | } | ||
1783 | if (kvm_cpu_has_pending_timer(vcpu)) | ||
1784 | return -EINTR; | ||
1785 | if (signal_pending(current)) | ||
1786 | return -EINTR; | ||
1787 | |||
1788 | return 0; | ||
1789 | } | ||
1790 | |||
1690 | /* | 1791 | /* |
1691 | * The vCPU has executed a HLT instruction with in-kernel mode enabled. | 1792 | * The vCPU has executed a HLT instruction with in-kernel mode enabled. |
1692 | */ | 1793 | */ |
1693 | void kvm_vcpu_block(struct kvm_vcpu *vcpu) | 1794 | void kvm_vcpu_block(struct kvm_vcpu *vcpu) |
1694 | { | 1795 | { |
1796 | ktime_t start, cur; | ||
1695 | DEFINE_WAIT(wait); | 1797 | DEFINE_WAIT(wait); |
1798 | bool waited = false; | ||
1799 | |||
1800 | start = cur = ktime_get(); | ||
1801 | if (halt_poll_ns) { | ||
1802 | ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns); | ||
1803 | do { | ||
1804 | /* | ||
1805 | * This sets KVM_REQ_UNHALT if an interrupt | ||
1806 | * arrives. | ||
1807 | */ | ||
1808 | if (kvm_vcpu_check_block(vcpu) < 0) { | ||
1809 | ++vcpu->stat.halt_successful_poll; | ||
1810 | goto out; | ||
1811 | } | ||
1812 | cur = ktime_get(); | ||
1813 | } while (single_task_running() && ktime_before(cur, stop)); | ||
1814 | } | ||
1696 | 1815 | ||
1697 | for (;;) { | 1816 | for (;;) { |
1698 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | 1817 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); |
1699 | 1818 | ||
1700 | if (kvm_arch_vcpu_runnable(vcpu)) { | 1819 | if (kvm_vcpu_check_block(vcpu) < 0) |
1701 | kvm_make_request(KVM_REQ_UNHALT, vcpu); | ||
1702 | break; | ||
1703 | } | ||
1704 | if (kvm_cpu_has_pending_timer(vcpu)) | ||
1705 | break; | ||
1706 | if (signal_pending(current)) | ||
1707 | break; | 1820 | break; |
1708 | 1821 | ||
1822 | waited = true; | ||
1709 | schedule(); | 1823 | schedule(); |
1710 | } | 1824 | } |
1711 | 1825 | ||
1712 | finish_wait(&vcpu->wq, &wait); | 1826 | finish_wait(&vcpu->wq, &wait); |
1827 | cur = ktime_get(); | ||
1828 | |||
1829 | out: | ||
1830 | trace_kvm_vcpu_wakeup(ktime_to_ns(cur) - ktime_to_ns(start), waited); | ||
1713 | } | 1831 | } |
1714 | EXPORT_SYMBOL_GPL(kvm_vcpu_block); | 1832 | EXPORT_SYMBOL_GPL(kvm_vcpu_block); |
1715 | 1833 | ||
@@ -1892,7 +2010,7 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp) | |||
1892 | static struct file_operations kvm_vcpu_fops = { | 2010 | static struct file_operations kvm_vcpu_fops = { |
1893 | .release = kvm_vcpu_release, | 2011 | .release = kvm_vcpu_release, |
1894 | .unlocked_ioctl = kvm_vcpu_ioctl, | 2012 | .unlocked_ioctl = kvm_vcpu_ioctl, |
1895 | #ifdef CONFIG_COMPAT | 2013 | #ifdef CONFIG_KVM_COMPAT |
1896 | .compat_ioctl = kvm_vcpu_compat_ioctl, | 2014 | .compat_ioctl = kvm_vcpu_compat_ioctl, |
1897 | #endif | 2015 | #endif |
1898 | .mmap = kvm_vcpu_mmap, | 2016 | .mmap = kvm_vcpu_mmap, |
@@ -2182,7 +2300,7 @@ out: | |||
2182 | return r; | 2300 | return r; |
2183 | } | 2301 | } |
2184 | 2302 | ||
2185 | #ifdef CONFIG_COMPAT | 2303 | #ifdef CONFIG_KVM_COMPAT |
2186 | static long kvm_vcpu_compat_ioctl(struct file *filp, | 2304 | static long kvm_vcpu_compat_ioctl(struct file *filp, |
2187 | unsigned int ioctl, unsigned long arg) | 2305 | unsigned int ioctl, unsigned long arg) |
2188 | { | 2306 | { |
@@ -2274,7 +2392,7 @@ static int kvm_device_release(struct inode *inode, struct file *filp) | |||
2274 | 2392 | ||
2275 | static const struct file_operations kvm_device_fops = { | 2393 | static const struct file_operations kvm_device_fops = { |
2276 | .unlocked_ioctl = kvm_device_ioctl, | 2394 | .unlocked_ioctl = kvm_device_ioctl, |
2277 | #ifdef CONFIG_COMPAT | 2395 | #ifdef CONFIG_KVM_COMPAT |
2278 | .compat_ioctl = kvm_device_ioctl, | 2396 | .compat_ioctl = kvm_device_ioctl, |
2279 | #endif | 2397 | #endif |
2280 | .release = kvm_device_release, | 2398 | .release = kvm_device_release, |
@@ -2561,7 +2679,7 @@ out: | |||
2561 | return r; | 2679 | return r; |
2562 | } | 2680 | } |
2563 | 2681 | ||
2564 | #ifdef CONFIG_COMPAT | 2682 | #ifdef CONFIG_KVM_COMPAT |
2565 | struct compat_kvm_dirty_log { | 2683 | struct compat_kvm_dirty_log { |
2566 | __u32 slot; | 2684 | __u32 slot; |
2567 | __u32 padding1; | 2685 | __u32 padding1; |
@@ -2608,7 +2726,7 @@ out: | |||
2608 | static struct file_operations kvm_vm_fops = { | 2726 | static struct file_operations kvm_vm_fops = { |
2609 | .release = kvm_vm_release, | 2727 | .release = kvm_vm_release, |
2610 | .unlocked_ioctl = kvm_vm_ioctl, | 2728 | .unlocked_ioctl = kvm_vm_ioctl, |
2611 | #ifdef CONFIG_COMPAT | 2729 | #ifdef CONFIG_KVM_COMPAT |
2612 | .compat_ioctl = kvm_vm_compat_ioctl, | 2730 | .compat_ioctl = kvm_vm_compat_ioctl, |
2613 | #endif | 2731 | #endif |
2614 | .llseek = noop_llseek, | 2732 | .llseek = noop_llseek, |