diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-16 12:55:35 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-16 12:55:35 -0400 |
commit | 10dc3747661bea9215417b659449bb7b8ed3df2c (patch) | |
tree | d943974b4941203a7db2fabe4896852cf0f16bc4 | |
parent | 047486d8e7c2a7e8d75b068b69cb67b47364f5d4 (diff) | |
parent | f958ee745f70b60d0e41927cab2c073104bc70c2 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"One of the largest releases for KVM... Hardly any generic
changes, but lots of architecture-specific updates.
ARM:
- VHE support so that we can run the kernel at EL2 on ARMv8.1 systems
- PMU support for guests
- 32bit world switch rewritten in C
- various optimizations to the vgic save/restore code.
PPC:
- enabled KVM-VFIO integration ("VFIO device")
- optimizations to speed up IPIs between vcpus
- in-kernel handling of IOMMU hypercalls
- support for dynamic DMA windows (DDW).
s390:
- provide the floating point registers via sync regs;
- separated instruction vs. data accesses
- dirty log improvements for huge guests
- bugfixes and documentation improvements.
x86:
- Hyper-V VMBus hypercall userspace exit
- alternative implementation of lowest-priority interrupts using
vector hashing (for better VT-d posted interrupt support)
- fixed guest debugging with nested virtualizations
- improved interrupt tracking in the in-kernel IOAPIC
- generic infrastructure for tracking writes to guest
memory - currently its only use is to speedup the legacy shadow
paging (pre-EPT) case, but in the future it will be used for
virtual GPUs as well
- much cleanup (LAPIC, kvmclock, MMU, PIT), including ubsan fixes"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (217 commits)
KVM: x86: remove eager_fpu field of struct kvm_vcpu_arch
KVM: x86: disable MPX if host did not enable MPX XSAVE features
arm64: KVM: vgic-v3: Only wipe LRs on vcpu exit
arm64: KVM: vgic-v3: Reset LRs at boot time
arm64: KVM: vgic-v3: Do not save an LR known to be empty
arm64: KVM: vgic-v3: Save maintenance interrupt state only if required
arm64: KVM: vgic-v3: Avoid accessing ICH registers
KVM: arm/arm64: vgic-v2: Make GICD_SGIR quicker to hit
KVM: arm/arm64: vgic-v2: Only wipe LRs on vcpu exit
KVM: arm/arm64: vgic-v2: Reset LRs at boot time
KVM: arm/arm64: vgic-v2: Do not save an LR known to be empty
KVM: arm/arm64: vgic-v2: Move GICH_ELRSR saving to its own function
KVM: arm/arm64: vgic-v2: Save maintenance interrupt state only if required
KVM: arm/arm64: vgic-v2: Avoid accessing GICH registers
KVM: s390: allocate only one DMA page per VM
KVM: s390: enable STFLE interpretation only if enabled for the guest
KVM: s390: wake up when the VCPU cpu timer expires
KVM: s390: step the VCPU timer while in enabled wait
KVM: s390: protect VCPU cpu timer with a seqcount
KVM: s390: step VCPU cpu timer during kvm_run ioctl
...
142 files changed, 6757 insertions, 2939 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 07e4cdf02407..4d0542c5206b 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -2507,8 +2507,9 @@ struct kvm_create_device { | |||
2507 | 2507 | ||
2508 | 4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR | 2508 | 4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR |
2509 | 2509 | ||
2510 | Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device | 2510 | Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device, |
2511 | Type: device ioctl, vm ioctl | 2511 | KVM_CAP_VCPU_ATTRIBUTES for vcpu device |
2512 | Type: device ioctl, vm ioctl, vcpu ioctl | ||
2512 | Parameters: struct kvm_device_attr | 2513 | Parameters: struct kvm_device_attr |
2513 | Returns: 0 on success, -1 on error | 2514 | Returns: 0 on success, -1 on error |
2514 | Errors: | 2515 | Errors: |
@@ -2533,8 +2534,9 @@ struct kvm_device_attr { | |||
2533 | 2534 | ||
2534 | 4.81 KVM_HAS_DEVICE_ATTR | 2535 | 4.81 KVM_HAS_DEVICE_ATTR |
2535 | 2536 | ||
2536 | Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device | 2537 | Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device, |
2537 | Type: device ioctl, vm ioctl | 2538 | KVM_CAP_VCPU_ATTRIBUTES for vcpu device |
2539 | Type: device ioctl, vm ioctl, vcpu ioctl | ||
2538 | Parameters: struct kvm_device_attr | 2540 | Parameters: struct kvm_device_attr |
2539 | Returns: 0 on success, -1 on error | 2541 | Returns: 0 on success, -1 on error |
2540 | Errors: | 2542 | Errors: |
@@ -2577,6 +2579,8 @@ Possible features: | |||
2577 | Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). | 2579 | Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). |
2578 | - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU. | 2580 | - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU. |
2579 | Depends on KVM_CAP_ARM_PSCI_0_2. | 2581 | Depends on KVM_CAP_ARM_PSCI_0_2. |
2582 | - KVM_ARM_VCPU_PMU_V3: Emulate PMUv3 for the CPU. | ||
2583 | Depends on KVM_CAP_ARM_PMU_V3. | ||
2580 | 2584 | ||
2581 | 2585 | ||
2582 | 4.83 KVM_ARM_PREFERRED_TARGET | 2586 | 4.83 KVM_ARM_PREFERRED_TARGET |
@@ -3035,6 +3039,87 @@ Returns: 0 on success, -1 on error | |||
3035 | 3039 | ||
3036 | Queues an SMI on the thread's vcpu. | 3040 | Queues an SMI on the thread's vcpu. |
3037 | 3041 | ||
3042 | 4.97 KVM_CAP_PPC_MULTITCE | ||
3043 | |||
3044 | Capability: KVM_CAP_PPC_MULTITCE | ||
3045 | Architectures: ppc | ||
3046 | Type: vm | ||
3047 | |||
3048 | This capability means the kernel is capable of handling hypercalls | ||
3049 | H_PUT_TCE_INDIRECT and H_STUFF_TCE without passing those into the user | ||
3050 | space. This significantly accelerates DMA operations for PPC KVM guests. | ||
3051 | User space should expect that its handlers for these hypercalls | ||
3052 | are not going to be called if user space previously registered LIOBN | ||
3053 | in KVM (via KVM_CREATE_SPAPR_TCE or similar calls). | ||
3054 | |||
3055 | In order to enable H_PUT_TCE_INDIRECT and H_STUFF_TCE use in the guest, | ||
3056 | user space might have to advertise it for the guest. For example, | ||
3057 | IBM pSeries (sPAPR) guest starts using them if "hcall-multi-tce" is | ||
3058 | present in the "ibm,hypertas-functions" device-tree property. | ||
3059 | |||
3060 | The hypercalls mentioned above may or may not be processed successfully | ||
3061 | in the kernel based fast path. If they can not be handled by the kernel, | ||
3062 | they will get passed on to user space. So user space still has to have | ||
3063 | an implementation for these despite the in kernel acceleration. | ||
3064 | |||
3065 | This capability is always enabled. | ||
3066 | |||
3067 | 4.98 KVM_CREATE_SPAPR_TCE_64 | ||
3068 | |||
3069 | Capability: KVM_CAP_SPAPR_TCE_64 | ||
3070 | Architectures: powerpc | ||
3071 | Type: vm ioctl | ||
3072 | Parameters: struct kvm_create_spapr_tce_64 (in) | ||
3073 | Returns: file descriptor for manipulating the created TCE table | ||
3074 | |||
3075 | This is an extension for KVM_CAP_SPAPR_TCE which only supports 32bit | ||
3076 | windows, described in 4.62 KVM_CREATE_SPAPR_TCE | ||
3077 | |||
3078 | This capability uses extended struct in ioctl interface: | ||
3079 | |||
3080 | /* for KVM_CAP_SPAPR_TCE_64 */ | ||
3081 | struct kvm_create_spapr_tce_64 { | ||
3082 | __u64 liobn; | ||
3083 | __u32 page_shift; | ||
3084 | __u32 flags; | ||
3085 | __u64 offset; /* in pages */ | ||
3086 | __u64 size; /* in pages */ | ||
3087 | }; | ||
3088 | |||
3089 | The aim of extension is to support an additional bigger DMA window with | ||
3090 | a variable page size. | ||
3091 | KVM_CREATE_SPAPR_TCE_64 receives a 64bit window size, an IOMMU page shift and | ||
3092 | a bus offset of the corresponding DMA window, @size and @offset are numbers | ||
3093 | of IOMMU pages. | ||
3094 | |||
3095 | @flags are not used at the moment. | ||
3096 | |||
3097 | The rest of functionality is identical to KVM_CREATE_SPAPR_TCE. | ||
3098 | |||
3099 | 4.98 KVM_REINJECT_CONTROL | ||
3100 | |||
3101 | Capability: KVM_CAP_REINJECT_CONTROL | ||
3102 | Architectures: x86 | ||
3103 | Type: vm ioctl | ||
3104 | Parameters: struct kvm_reinject_control (in) | ||
3105 | Returns: 0 on success, | ||
3106 | -EFAULT if struct kvm_reinject_control cannot be read, | ||
3107 | -ENXIO if KVM_CREATE_PIT or KVM_CREATE_PIT2 didn't succeed earlier. | ||
3108 | |||
3109 | i8254 (PIT) has two modes, reinject and !reinject. The default is reinject, | ||
3110 | where KVM queues elapsed i8254 ticks and monitors completion of interrupt from | ||
3111 | vector(s) that i8254 injects. Reinject mode dequeues a tick and injects its | ||
3112 | interrupt whenever there isn't a pending interrupt from i8254. | ||
3113 | !reinject mode injects an interrupt as soon as a tick arrives. | ||
3114 | |||
3115 | struct kvm_reinject_control { | ||
3116 | __u8 pit_reinject; | ||
3117 | __u8 reserved[31]; | ||
3118 | }; | ||
3119 | |||
3120 | pit_reinject = 0 (!reinject mode) is recommended, unless running an old | ||
3121 | operating system that uses the PIT for timing (e.g. Linux 2.4.x). | ||
3122 | |||
3038 | 5. The kvm_run structure | 3123 | 5. The kvm_run structure |
3039 | ------------------------ | 3124 | ------------------------ |
3040 | 3125 | ||
@@ -3339,6 +3424,7 @@ EOI was received. | |||
3339 | 3424 | ||
3340 | struct kvm_hyperv_exit { | 3425 | struct kvm_hyperv_exit { |
3341 | #define KVM_EXIT_HYPERV_SYNIC 1 | 3426 | #define KVM_EXIT_HYPERV_SYNIC 1 |
3427 | #define KVM_EXIT_HYPERV_HCALL 2 | ||
3342 | __u32 type; | 3428 | __u32 type; |
3343 | union { | 3429 | union { |
3344 | struct { | 3430 | struct { |
@@ -3347,6 +3433,11 @@ EOI was received. | |||
3347 | __u64 evt_page; | 3433 | __u64 evt_page; |
3348 | __u64 msg_page; | 3434 | __u64 msg_page; |
3349 | } synic; | 3435 | } synic; |
3436 | struct { | ||
3437 | __u64 input; | ||
3438 | __u64 result; | ||
3439 | __u64 params[2]; | ||
3440 | } hcall; | ||
3350 | } u; | 3441 | } u; |
3351 | }; | 3442 | }; |
3352 | /* KVM_EXIT_HYPERV */ | 3443 | /* KVM_EXIT_HYPERV */ |
diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt index d1ad9d5cae46..e3e314cb83e8 100644 --- a/Documentation/virtual/kvm/devices/s390_flic.txt +++ b/Documentation/virtual/kvm/devices/s390_flic.txt | |||
@@ -88,6 +88,8 @@ struct kvm_s390_io_adapter_req { | |||
88 | perform a gmap translation for the guest address provided in addr, | 88 | perform a gmap translation for the guest address provided in addr, |
89 | pin a userspace page for the translated address and add it to the | 89 | pin a userspace page for the translated address and add it to the |
90 | list of mappings | 90 | list of mappings |
91 | Note: A new mapping will be created unconditionally; therefore, | ||
92 | the calling code should avoid making duplicate mappings. | ||
91 | 93 | ||
92 | KVM_S390_IO_ADAPTER_UNMAP | 94 | KVM_S390_IO_ADAPTER_UNMAP |
93 | release a userspace page for the translated address specified in addr | 95 | release a userspace page for the translated address specified in addr |
diff --git a/Documentation/virtual/kvm/devices/vcpu.txt b/Documentation/virtual/kvm/devices/vcpu.txt new file mode 100644 index 000000000000..c04165868faf --- /dev/null +++ b/Documentation/virtual/kvm/devices/vcpu.txt | |||
@@ -0,0 +1,33 @@ | |||
1 | Generic vcpu interface | ||
2 | ==================================== | ||
3 | |||
4 | The virtual cpu "device" also accepts the ioctls KVM_SET_DEVICE_ATTR, | ||
5 | KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same struct | ||
6 | kvm_device_attr as other devices, but targets VCPU-wide settings and controls. | ||
7 | |||
8 | The groups and attributes per virtual cpu, if any, are architecture specific. | ||
9 | |||
10 | 1. GROUP: KVM_ARM_VCPU_PMU_V3_CTRL | ||
11 | Architectures: ARM64 | ||
12 | |||
13 | 1.1. ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_IRQ | ||
14 | Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a | ||
15 | pointer to an int | ||
16 | Returns: -EBUSY: The PMU overflow interrupt is already set | ||
17 | -ENXIO: The overflow interrupt not set when attempting to get it | ||
18 | -ENODEV: PMUv3 not supported | ||
19 | -EINVAL: Invalid PMU overflow interrupt number supplied | ||
20 | |||
21 | A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt | ||
22 | number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt | ||
23 | type must be same for each vcpu. As a PPI, the interrupt number is the same for | ||
24 | all vcpus, while as an SPI it must be a separate number per vcpu. | ||
25 | |||
26 | 1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT | ||
27 | Parameters: no additional parameter in kvm_device_attr.addr | ||
28 | Returns: -ENODEV: PMUv3 not supported | ||
29 | -ENXIO: PMUv3 not properly configured as required prior to calling this | ||
30 | attribute | ||
31 | -EBUSY: PMUv3 already initialized | ||
32 | |||
33 | Request the initialization of the PMUv3. | ||
diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index f083a168eb35..a9ea8774a45f 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt | |||
@@ -84,3 +84,55 @@ Returns: -EBUSY in case 1 or more vcpus are already activated (only in write | |||
84 | -EFAULT if the given address is not accessible from kernel space | 84 | -EFAULT if the given address is not accessible from kernel space |
85 | -ENOMEM if not enough memory is available to process the ioctl | 85 | -ENOMEM if not enough memory is available to process the ioctl |
86 | 0 in case of success | 86 | 0 in case of success |
87 | |||
88 | 3. GROUP: KVM_S390_VM_TOD | ||
89 | Architectures: s390 | ||
90 | |||
91 | 3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH | ||
92 | |||
93 | Allows user space to set/get the TOD clock extension (u8). | ||
94 | |||
95 | Parameters: address of a buffer in user space to store the data (u8) to | ||
96 | Returns: -EFAULT if the given address is not accessible from kernel space | ||
97 | -EINVAL if setting the TOD clock extension to != 0 is not supported | ||
98 | |||
99 | 3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW | ||
100 | |||
101 | Allows user space to set/get bits 0-63 of the TOD clock register as defined in | ||
102 | the POP (u64). | ||
103 | |||
104 | Parameters: address of a buffer in user space to store the data (u64) to | ||
105 | Returns: -EFAULT if the given address is not accessible from kernel space | ||
106 | |||
107 | 4. GROUP: KVM_S390_VM_CRYPTO | ||
108 | Architectures: s390 | ||
109 | |||
110 | 4.1. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_AES_KW (w/o) | ||
111 | |||
112 | Allows user space to enable aes key wrapping, including generating a new | ||
113 | wrapping key. | ||
114 | |||
115 | Parameters: none | ||
116 | Returns: 0 | ||
117 | |||
118 | 4.2. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_DEA_KW (w/o) | ||
119 | |||
120 | Allows user space to enable dea key wrapping, including generating a new | ||
121 | wrapping key. | ||
122 | |||
123 | Parameters: none | ||
124 | Returns: 0 | ||
125 | |||
126 | 4.3. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_AES_KW (w/o) | ||
127 | |||
128 | Allows user space to disable aes key wrapping, clearing the wrapping key. | ||
129 | |||
130 | Parameters: none | ||
131 | Returns: 0 | ||
132 | |||
133 | 4.4. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_DEA_KW (w/o) | ||
134 | |||
135 | Allows user space to disable dea key wrapping, clearing the wrapping key. | ||
136 | |||
137 | Parameters: none | ||
138 | Returns: 0 | ||
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt index c81731096a43..481b6a9c25d5 100644 --- a/Documentation/virtual/kvm/mmu.txt +++ b/Documentation/virtual/kvm/mmu.txt | |||
@@ -392,11 +392,11 @@ To instantiate a large spte, four constraints must be satisfied: | |||
392 | write-protected pages | 392 | write-protected pages |
393 | - the guest page must be wholly contained by a single memory slot | 393 | - the guest page must be wholly contained by a single memory slot |
394 | 394 | ||
395 | To check the last two conditions, the mmu maintains a ->write_count set of | 395 | To check the last two conditions, the mmu maintains a ->disallow_lpage set of |
396 | arrays for each memory slot and large page size. Every write protected page | 396 | arrays for each memory slot and large page size. Every write protected page |
397 | causes its write_count to be incremented, thus preventing instantiation of | 397 | causes its disallow_lpage to be incremented, thus preventing instantiation of |
398 | a large spte. The frames at the end of an unaligned memory slot have | 398 | a large spte. The frames at the end of an unaligned memory slot have |
399 | artificially inflated ->write_counts so they can never be instantiated. | 399 | artificially inflated ->disallow_lpages so they can never be instantiated. |
400 | 400 | ||
401 | Zapping all pages (page generation count) | 401 | Zapping all pages (page generation count) |
402 | ========================================= | 402 | ========================================= |
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 194c91b610ff..15d58b42d5a1 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h | |||
@@ -19,38 +19,7 @@ | |||
19 | #ifndef __ARM_KVM_ASM_H__ | 19 | #ifndef __ARM_KVM_ASM_H__ |
20 | #define __ARM_KVM_ASM_H__ | 20 | #define __ARM_KVM_ASM_H__ |
21 | 21 | ||
22 | /* 0 is reserved as an invalid value. */ | 22 | #include <asm/virt.h> |
23 | #define c0_MPIDR 1 /* MultiProcessor ID Register */ | ||
24 | #define c0_CSSELR 2 /* Cache Size Selection Register */ | ||
25 | #define c1_SCTLR 3 /* System Control Register */ | ||
26 | #define c1_ACTLR 4 /* Auxiliary Control Register */ | ||
27 | #define c1_CPACR 5 /* Coprocessor Access Control */ | ||
28 | #define c2_TTBR0 6 /* Translation Table Base Register 0 */ | ||
29 | #define c2_TTBR0_high 7 /* TTBR0 top 32 bits */ | ||
30 | #define c2_TTBR1 8 /* Translation Table Base Register 1 */ | ||
31 | #define c2_TTBR1_high 9 /* TTBR1 top 32 bits */ | ||
32 | #define c2_TTBCR 10 /* Translation Table Base Control R. */ | ||
33 | #define c3_DACR 11 /* Domain Access Control Register */ | ||
34 | #define c5_DFSR 12 /* Data Fault Status Register */ | ||
35 | #define c5_IFSR 13 /* Instruction Fault Status Register */ | ||
36 | #define c5_ADFSR 14 /* Auxilary Data Fault Status R */ | ||
37 | #define c5_AIFSR 15 /* Auxilary Instrunction Fault Status R */ | ||
38 | #define c6_DFAR 16 /* Data Fault Address Register */ | ||
39 | #define c6_IFAR 17 /* Instruction Fault Address Register */ | ||
40 | #define c7_PAR 18 /* Physical Address Register */ | ||
41 | #define c7_PAR_high 19 /* PAR top 32 bits */ | ||
42 | #define c9_L2CTLR 20 /* Cortex A15/A7 L2 Control Register */ | ||
43 | #define c10_PRRR 21 /* Primary Region Remap Register */ | ||
44 | #define c10_NMRR 22 /* Normal Memory Remap Register */ | ||
45 | #define c12_VBAR 23 /* Vector Base Address Register */ | ||
46 | #define c13_CID 24 /* Context ID Register */ | ||
47 | #define c13_TID_URW 25 /* Thread ID, User R/W */ | ||
48 | #define c13_TID_URO 26 /* Thread ID, User R/O */ | ||
49 | #define c13_TID_PRIV 27 /* Thread ID, Privileged */ | ||
50 | #define c14_CNTKCTL 28 /* Timer Control Register (PL1) */ | ||
51 | #define c10_AMAIR0 29 /* Auxilary Memory Attribute Indirection Reg0 */ | ||
52 | #define c10_AMAIR1 30 /* Auxilary Memory Attribute Indirection Reg1 */ | ||
53 | #define NR_CP15_REGS 31 /* Number of regs (incl. invalid) */ | ||
54 | 23 | ||
55 | #define ARM_EXCEPTION_RESET 0 | 24 | #define ARM_EXCEPTION_RESET 0 |
56 | #define ARM_EXCEPTION_UNDEFINED 1 | 25 | #define ARM_EXCEPTION_UNDEFINED 1 |
@@ -86,19 +55,15 @@ struct kvm_vcpu; | |||
86 | extern char __kvm_hyp_init[]; | 55 | extern char __kvm_hyp_init[]; |
87 | extern char __kvm_hyp_init_end[]; | 56 | extern char __kvm_hyp_init_end[]; |
88 | 57 | ||
89 | extern char __kvm_hyp_exit[]; | ||
90 | extern char __kvm_hyp_exit_end[]; | ||
91 | |||
92 | extern char __kvm_hyp_vector[]; | 58 | extern char __kvm_hyp_vector[]; |
93 | 59 | ||
94 | extern char __kvm_hyp_code_start[]; | ||
95 | extern char __kvm_hyp_code_end[]; | ||
96 | |||
97 | extern void __kvm_flush_vm_context(void); | 60 | extern void __kvm_flush_vm_context(void); |
98 | extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); | 61 | extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); |
99 | extern void __kvm_tlb_flush_vmid(struct kvm *kvm); | 62 | extern void __kvm_tlb_flush_vmid(struct kvm *kvm); |
100 | 63 | ||
101 | extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); | 64 | extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); |
65 | |||
66 | extern void __init_stage2_translation(void); | ||
102 | #endif | 67 | #endif |
103 | 68 | ||
104 | #endif /* __ARM_KVM_ASM_H__ */ | 69 | #endif /* __ARM_KVM_ASM_H__ */ |
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 3095df091ff8..ee5328fc4b06 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h | |||
@@ -68,12 +68,12 @@ static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) | |||
68 | 68 | ||
69 | static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu) | 69 | static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu) |
70 | { | 70 | { |
71 | return &vcpu->arch.regs.usr_regs.ARM_pc; | 71 | return &vcpu->arch.ctxt.gp_regs.usr_regs.ARM_pc; |
72 | } | 72 | } |
73 | 73 | ||
74 | static inline unsigned long *vcpu_cpsr(struct kvm_vcpu *vcpu) | 74 | static inline unsigned long *vcpu_cpsr(struct kvm_vcpu *vcpu) |
75 | { | 75 | { |
76 | return &vcpu->arch.regs.usr_regs.ARM_cpsr; | 76 | return &vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr; |
77 | } | 77 | } |
78 | 78 | ||
79 | static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) | 79 | static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) |
@@ -83,13 +83,13 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) | |||
83 | 83 | ||
84 | static inline bool mode_has_spsr(struct kvm_vcpu *vcpu) | 84 | static inline bool mode_has_spsr(struct kvm_vcpu *vcpu) |
85 | { | 85 | { |
86 | unsigned long cpsr_mode = vcpu->arch.regs.usr_regs.ARM_cpsr & MODE_MASK; | 86 | unsigned long cpsr_mode = vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr & MODE_MASK; |
87 | return (cpsr_mode > USR_MODE && cpsr_mode < SYSTEM_MODE); | 87 | return (cpsr_mode > USR_MODE && cpsr_mode < SYSTEM_MODE); |
88 | } | 88 | } |
89 | 89 | ||
90 | static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu) | 90 | static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu) |
91 | { | 91 | { |
92 | unsigned long cpsr_mode = vcpu->arch.regs.usr_regs.ARM_cpsr & MODE_MASK; | 92 | unsigned long cpsr_mode = vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr & MODE_MASK; |
93 | return cpsr_mode > USR_MODE;; | 93 | return cpsr_mode > USR_MODE;; |
94 | } | 94 | } |
95 | 95 | ||
@@ -108,11 +108,6 @@ static inline phys_addr_t kvm_vcpu_get_fault_ipa(struct kvm_vcpu *vcpu) | |||
108 | return ((phys_addr_t)vcpu->arch.fault.hpfar & HPFAR_MASK) << 8; | 108 | return ((phys_addr_t)vcpu->arch.fault.hpfar & HPFAR_MASK) << 8; |
109 | } | 109 | } |
110 | 110 | ||
111 | static inline unsigned long kvm_vcpu_get_hyp_pc(struct kvm_vcpu *vcpu) | ||
112 | { | ||
113 | return vcpu->arch.fault.hyp_pc; | ||
114 | } | ||
115 | |||
116 | static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu) | 111 | static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu) |
117 | { | 112 | { |
118 | return kvm_vcpu_get_hsr(vcpu) & HSR_ISV; | 113 | return kvm_vcpu_get_hsr(vcpu) & HSR_ISV; |
@@ -143,6 +138,11 @@ static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu) | |||
143 | return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_S1PTW; | 138 | return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_S1PTW; |
144 | } | 139 | } |
145 | 140 | ||
141 | static inline bool kvm_vcpu_dabt_is_cm(struct kvm_vcpu *vcpu) | ||
142 | { | ||
143 | return !!(kvm_vcpu_get_hsr(vcpu) & HSR_DABT_CM); | ||
144 | } | ||
145 | |||
146 | /* Get Access Size from a data abort */ | 146 | /* Get Access Size from a data abort */ |
147 | static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu) | 147 | static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu) |
148 | { | 148 | { |
@@ -192,7 +192,7 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu) | |||
192 | 192 | ||
193 | static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) | 193 | static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) |
194 | { | 194 | { |
195 | return vcpu->arch.cp15[c0_MPIDR] & MPIDR_HWID_BITMASK; | 195 | return vcpu_cp15(vcpu, c0_MPIDR) & MPIDR_HWID_BITMASK; |
196 | } | 196 | } |
197 | 197 | ||
198 | static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) | 198 | static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) |
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index f9f27792d8ed..385070180c25 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h | |||
@@ -85,20 +85,61 @@ struct kvm_vcpu_fault_info { | |||
85 | u32 hsr; /* Hyp Syndrome Register */ | 85 | u32 hsr; /* Hyp Syndrome Register */ |
86 | u32 hxfar; /* Hyp Data/Inst. Fault Address Register */ | 86 | u32 hxfar; /* Hyp Data/Inst. Fault Address Register */ |
87 | u32 hpfar; /* Hyp IPA Fault Address Register */ | 87 | u32 hpfar; /* Hyp IPA Fault Address Register */ |
88 | u32 hyp_pc; /* PC when exception was taken from Hyp mode */ | ||
89 | }; | 88 | }; |
90 | 89 | ||
91 | typedef struct vfp_hard_struct kvm_cpu_context_t; | 90 | /* |
91 | * 0 is reserved as an invalid value. | ||
92 | * Order should be kept in sync with the save/restore code. | ||
93 | */ | ||
94 | enum vcpu_sysreg { | ||
95 | __INVALID_SYSREG__, | ||
96 | c0_MPIDR, /* MultiProcessor ID Register */ | ||
97 | c0_CSSELR, /* Cache Size Selection Register */ | ||
98 | c1_SCTLR, /* System Control Register */ | ||
99 | c1_ACTLR, /* Auxiliary Control Register */ | ||
100 | c1_CPACR, /* Coprocessor Access Control */ | ||
101 | c2_TTBR0, /* Translation Table Base Register 0 */ | ||
102 | c2_TTBR0_high, /* TTBR0 top 32 bits */ | ||
103 | c2_TTBR1, /* Translation Table Base Register 1 */ | ||
104 | c2_TTBR1_high, /* TTBR1 top 32 bits */ | ||
105 | c2_TTBCR, /* Translation Table Base Control R. */ | ||
106 | c3_DACR, /* Domain Access Control Register */ | ||
107 | c5_DFSR, /* Data Fault Status Register */ | ||
108 | c5_IFSR, /* Instruction Fault Status Register */ | ||
109 | c5_ADFSR, /* Auxilary Data Fault Status R */ | ||
110 | c5_AIFSR, /* Auxilary Instrunction Fault Status R */ | ||
111 | c6_DFAR, /* Data Fault Address Register */ | ||
112 | c6_IFAR, /* Instruction Fault Address Register */ | ||
113 | c7_PAR, /* Physical Address Register */ | ||
114 | c7_PAR_high, /* PAR top 32 bits */ | ||
115 | c9_L2CTLR, /* Cortex A15/A7 L2 Control Register */ | ||
116 | c10_PRRR, /* Primary Region Remap Register */ | ||
117 | c10_NMRR, /* Normal Memory Remap Register */ | ||
118 | c12_VBAR, /* Vector Base Address Register */ | ||
119 | c13_CID, /* Context ID Register */ | ||
120 | c13_TID_URW, /* Thread ID, User R/W */ | ||
121 | c13_TID_URO, /* Thread ID, User R/O */ | ||
122 | c13_TID_PRIV, /* Thread ID, Privileged */ | ||
123 | c14_CNTKCTL, /* Timer Control Register (PL1) */ | ||
124 | c10_AMAIR0, /* Auxilary Memory Attribute Indirection Reg0 */ | ||
125 | c10_AMAIR1, /* Auxilary Memory Attribute Indirection Reg1 */ | ||
126 | NR_CP15_REGS /* Number of regs (incl. invalid) */ | ||
127 | }; | ||
128 | |||
129 | struct kvm_cpu_context { | ||
130 | struct kvm_regs gp_regs; | ||
131 | struct vfp_hard_struct vfp; | ||
132 | u32 cp15[NR_CP15_REGS]; | ||
133 | }; | ||
134 | |||
135 | typedef struct kvm_cpu_context kvm_cpu_context_t; | ||
92 | 136 | ||
93 | struct kvm_vcpu_arch { | 137 | struct kvm_vcpu_arch { |
94 | struct kvm_regs regs; | 138 | struct kvm_cpu_context ctxt; |
95 | 139 | ||
96 | int target; /* Processor target */ | 140 | int target; /* Processor target */ |
97 | DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES); | 141 | DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES); |
98 | 142 | ||
99 | /* System control coprocessor (cp15) */ | ||
100 | u32 cp15[NR_CP15_REGS]; | ||
101 | |||
102 | /* The CPU type we expose to the VM */ | 143 | /* The CPU type we expose to the VM */ |
103 | u32 midr; | 144 | u32 midr; |
104 | 145 | ||
@@ -111,9 +152,6 @@ struct kvm_vcpu_arch { | |||
111 | /* Exception Information */ | 152 | /* Exception Information */ |
112 | struct kvm_vcpu_fault_info fault; | 153 | struct kvm_vcpu_fault_info fault; |
113 | 154 | ||
114 | /* Floating point registers (VFP and Advanced SIMD/NEON) */ | ||
115 | struct vfp_hard_struct vfp_guest; | ||
116 | |||
117 | /* Host FP context */ | 155 | /* Host FP context */ |
118 | kvm_cpu_context_t *host_cpu_context; | 156 | kvm_cpu_context_t *host_cpu_context; |
119 | 157 | ||
@@ -158,12 +196,14 @@ struct kvm_vcpu_stat { | |||
158 | u64 exits; | 196 | u64 exits; |
159 | }; | 197 | }; |
160 | 198 | ||
199 | #define vcpu_cp15(v,r) (v)->arch.ctxt.cp15[r] | ||
200 | |||
161 | int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); | 201 | int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); |
162 | unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); | 202 | unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); |
163 | int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); | 203 | int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); |
164 | int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); | 204 | int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); |
165 | int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); | 205 | int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); |
166 | u64 kvm_call_hyp(void *hypfn, ...); | 206 | unsigned long kvm_call_hyp(void *hypfn, ...); |
167 | void force_vm_exit(const cpumask_t *mask); | 207 | void force_vm_exit(const cpumask_t *mask); |
168 | 208 | ||
169 | #define KVM_ARCH_WANT_MMU_NOTIFIER | 209 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
@@ -220,6 +260,11 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, | |||
220 | kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); | 260 | kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); |
221 | } | 261 | } |
222 | 262 | ||
263 | static inline void __cpu_init_stage2(void) | ||
264 | { | ||
265 | kvm_call_hyp(__init_stage2_translation); | ||
266 | } | ||
267 | |||
223 | static inline int kvm_arch_dev_ioctl_check_extension(long ext) | 268 | static inline int kvm_arch_dev_ioctl_check_extension(long ext) |
224 | { | 269 | { |
225 | return 0; | 270 | return 0; |
@@ -242,5 +287,20 @@ static inline void kvm_arm_init_debug(void) {} | |||
242 | static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} | 287 | static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} |
243 | static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} | 288 | static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} |
244 | static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} | 289 | static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} |
290 | static inline int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, | ||
291 | struct kvm_device_attr *attr) | ||
292 | { | ||
293 | return -ENXIO; | ||
294 | } | ||
295 | static inline int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, | ||
296 | struct kvm_device_attr *attr) | ||
297 | { | ||
298 | return -ENXIO; | ||
299 | } | ||
300 | static inline int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, | ||
301 | struct kvm_device_attr *attr) | ||
302 | { | ||
303 | return -ENXIO; | ||
304 | } | ||
245 | 305 | ||
246 | #endif /* __ARM_KVM_HOST_H__ */ | 306 | #endif /* __ARM_KVM_HOST_H__ */ |
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h new file mode 100644 index 000000000000..f0e860761380 --- /dev/null +++ b/arch/arm/include/asm/kvm_hyp.h | |||
@@ -0,0 +1,139 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015 - ARM Ltd | ||
3 | * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
16 | */ | ||
17 | |||
18 | #ifndef __ARM_KVM_HYP_H__ | ||
19 | #define __ARM_KVM_HYP_H__ | ||
20 | |||
21 | #include <linux/compiler.h> | ||
22 | #include <linux/kvm_host.h> | ||
23 | #include <asm/kvm_mmu.h> | ||
24 | #include <asm/vfp.h> | ||
25 | |||
26 | #define __hyp_text __section(.hyp.text) notrace | ||
27 | |||
28 | #define kern_hyp_va(v) (v) | ||
29 | #define hyp_kern_va(v) (v) | ||
30 | |||
31 | #define __ACCESS_CP15(CRn, Op1, CRm, Op2) \ | ||
32 | "mrc", "mcr", __stringify(p15, Op1, %0, CRn, CRm, Op2), u32 | ||
33 | #define __ACCESS_CP15_64(Op1, CRm) \ | ||
34 | "mrrc", "mcrr", __stringify(p15, Op1, %Q0, %R0, CRm), u64 | ||
35 | #define __ACCESS_VFP(CRn) \ | ||
36 | "mrc", "mcr", __stringify(p10, 7, %0, CRn, cr0, 0), u32 | ||
37 | |||
38 | #define __write_sysreg(v, r, w, c, t) asm volatile(w " " c : : "r" ((t)(v))) | ||
39 | #define write_sysreg(v, ...) __write_sysreg(v, __VA_ARGS__) | ||
40 | |||
41 | #define __read_sysreg(r, w, c, t) ({ \ | ||
42 | t __val; \ | ||
43 | asm volatile(r " " c : "=r" (__val)); \ | ||
44 | __val; \ | ||
45 | }) | ||
46 | #define read_sysreg(...) __read_sysreg(__VA_ARGS__) | ||
47 | |||
48 | #define write_special(v, r) \ | ||
49 | asm volatile("msr " __stringify(r) ", %0" : : "r" (v)) | ||
50 | #define read_special(r) ({ \ | ||
51 | u32 __val; \ | ||
52 | asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \ | ||
53 | __val; \ | ||
54 | }) | ||
55 | |||
56 | #define TTBR0 __ACCESS_CP15_64(0, c2) | ||
57 | #define TTBR1 __ACCESS_CP15_64(1, c2) | ||
58 | #define VTTBR __ACCESS_CP15_64(6, c2) | ||
59 | #define PAR __ACCESS_CP15_64(0, c7) | ||
60 | #define CNTV_CVAL __ACCESS_CP15_64(3, c14) | ||
61 | #define CNTVOFF __ACCESS_CP15_64(4, c14) | ||
62 | |||
63 | #define MIDR __ACCESS_CP15(c0, 0, c0, 0) | ||
64 | #define CSSELR __ACCESS_CP15(c0, 2, c0, 0) | ||
65 | #define VPIDR __ACCESS_CP15(c0, 4, c0, 0) | ||
66 | #define VMPIDR __ACCESS_CP15(c0, 4, c0, 5) | ||
67 | #define SCTLR __ACCESS_CP15(c1, 0, c0, 0) | ||
68 | #define CPACR __ACCESS_CP15(c1, 0, c0, 2) | ||
69 | #define HCR __ACCESS_CP15(c1, 4, c1, 0) | ||
70 | #define HDCR __ACCESS_CP15(c1, 4, c1, 1) | ||
71 | #define HCPTR __ACCESS_CP15(c1, 4, c1, 2) | ||
72 | #define HSTR __ACCESS_CP15(c1, 4, c1, 3) | ||
73 | #define TTBCR __ACCESS_CP15(c2, 0, c0, 2) | ||
74 | #define HTCR __ACCESS_CP15(c2, 4, c0, 2) | ||
75 | #define VTCR __ACCESS_CP15(c2, 4, c1, 2) | ||
76 | #define DACR __ACCESS_CP15(c3, 0, c0, 0) | ||
77 | #define DFSR __ACCESS_CP15(c5, 0, c0, 0) | ||
78 | #define IFSR __ACCESS_CP15(c5, 0, c0, 1) | ||
79 | #define ADFSR __ACCESS_CP15(c5, 0, c1, 0) | ||
80 | #define AIFSR __ACCESS_CP15(c5, 0, c1, 1) | ||
81 | #define HSR __ACCESS_CP15(c5, 4, c2, 0) | ||
82 | #define DFAR __ACCESS_CP15(c6, 0, c0, 0) | ||
83 | #define IFAR __ACCESS_CP15(c6, 0, c0, 2) | ||
84 | #define HDFAR __ACCESS_CP15(c6, 4, c0, 0) | ||
85 | #define HIFAR __ACCESS_CP15(c6, 4, c0, 2) | ||
86 | #define HPFAR __ACCESS_CP15(c6, 4, c0, 4) | ||
87 | #define ICIALLUIS __ACCESS_CP15(c7, 0, c1, 0) | ||
88 | #define ATS1CPR __ACCESS_CP15(c7, 0, c8, 0) | ||
89 | #define TLBIALLIS __ACCESS_CP15(c8, 0, c3, 0) | ||
90 | #define TLBIALLNSNHIS __ACCESS_CP15(c8, 4, c3, 4) | ||
91 | #define PRRR __ACCESS_CP15(c10, 0, c2, 0) | ||
92 | #define NMRR __ACCESS_CP15(c10, 0, c2, 1) | ||
93 | #define AMAIR0 __ACCESS_CP15(c10, 0, c3, 0) | ||
94 | #define AMAIR1 __ACCESS_CP15(c10, 0, c3, 1) | ||
95 | #define VBAR __ACCESS_CP15(c12, 0, c0, 0) | ||
96 | #define CID __ACCESS_CP15(c13, 0, c0, 1) | ||
97 | #define TID_URW __ACCESS_CP15(c13, 0, c0, 2) | ||
98 | #define TID_URO __ACCESS_CP15(c13, 0, c0, 3) | ||
99 | #define TID_PRIV __ACCESS_CP15(c13, 0, c0, 4) | ||
100 | #define HTPIDR __ACCESS_CP15(c13, 4, c0, 2) | ||
101 | #define CNTKCTL __ACCESS_CP15(c14, 0, c1, 0) | ||
102 | #define CNTV_CTL __ACCESS_CP15(c14, 0, c3, 1) | ||
103 | #define CNTHCTL __ACCESS_CP15(c14, 4, c1, 0) | ||
104 | |||
105 | #define VFP_FPEXC __ACCESS_VFP(FPEXC) | ||
106 | |||
107 | /* AArch64 compatibility macros, only for the timer so far */ | ||
108 | #define read_sysreg_el0(r) read_sysreg(r##_el0) | ||
109 | #define write_sysreg_el0(v, r) write_sysreg(v, r##_el0) | ||
110 | |||
111 | #define cntv_ctl_el0 CNTV_CTL | ||
112 | #define cntv_cval_el0 CNTV_CVAL | ||
113 | #define cntvoff_el2 CNTVOFF | ||
114 | #define cnthctl_el2 CNTHCTL | ||
115 | |||
116 | void __timer_save_state(struct kvm_vcpu *vcpu); | ||
117 | void __timer_restore_state(struct kvm_vcpu *vcpu); | ||
118 | |||
119 | void __vgic_v2_save_state(struct kvm_vcpu *vcpu); | ||
120 | void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); | ||
121 | |||
122 | void __sysreg_save_state(struct kvm_cpu_context *ctxt); | ||
123 | void __sysreg_restore_state(struct kvm_cpu_context *ctxt); | ||
124 | |||
125 | void asmlinkage __vfp_save_state(struct vfp_hard_struct *vfp); | ||
126 | void asmlinkage __vfp_restore_state(struct vfp_hard_struct *vfp); | ||
127 | static inline bool __vfp_enabled(void) | ||
128 | { | ||
129 | return !(read_sysreg(HCPTR) & (HCPTR_TCP(11) | HCPTR_TCP(10))); | ||
130 | } | ||
131 | |||
132 | void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt); | ||
133 | void __hyp_text __banked_restore_state(struct kvm_cpu_context *ctxt); | ||
134 | |||
135 | int asmlinkage __guest_enter(struct kvm_vcpu *vcpu, | ||
136 | struct kvm_cpu_context *host); | ||
137 | int asmlinkage __hyp_do_panic(const char *, int, u32); | ||
138 | |||
139 | #endif /* __ARM_KVM_HYP_H__ */ | ||
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index a520b7987a29..da44be9db4fa 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h | |||
@@ -179,7 +179,7 @@ struct kvm; | |||
179 | 179 | ||
180 | static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) | 180 | static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) |
181 | { | 181 | { |
182 | return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101; | 182 | return (vcpu_cp15(vcpu, c1_SCTLR) & 0b101) == 0b101; |
183 | } | 183 | } |
184 | 184 | ||
185 | static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, | 185 | static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, |
diff --git a/arch/arm/include/asm/virt.h b/arch/arm/include/asm/virt.h index 4371f45c5784..d4ceaf5f299b 100644 --- a/arch/arm/include/asm/virt.h +++ b/arch/arm/include/asm/virt.h | |||
@@ -74,6 +74,15 @@ static inline bool is_hyp_mode_mismatched(void) | |||
74 | { | 74 | { |
75 | return !!(__boot_cpu_mode & BOOT_CPU_MODE_MISMATCH); | 75 | return !!(__boot_cpu_mode & BOOT_CPU_MODE_MISMATCH); |
76 | } | 76 | } |
77 | |||
78 | static inline bool is_kernel_in_hyp_mode(void) | ||
79 | { | ||
80 | return false; | ||
81 | } | ||
82 | |||
83 | /* The section containing the hypervisor text */ | ||
84 | extern char __hyp_text_start[]; | ||
85 | extern char __hyp_text_end[]; | ||
77 | #endif | 86 | #endif |
78 | 87 | ||
79 | #endif /* __ASSEMBLY__ */ | 88 | #endif /* __ASSEMBLY__ */ |
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 871b8267d211..27d05813ff09 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c | |||
@@ -170,41 +170,11 @@ int main(void) | |||
170 | DEFINE(CACHE_WRITEBACK_GRANULE, __CACHE_WRITEBACK_GRANULE); | 170 | DEFINE(CACHE_WRITEBACK_GRANULE, __CACHE_WRITEBACK_GRANULE); |
171 | BLANK(); | 171 | BLANK(); |
172 | #ifdef CONFIG_KVM_ARM_HOST | 172 | #ifdef CONFIG_KVM_ARM_HOST |
173 | DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); | 173 | DEFINE(VCPU_GUEST_CTXT, offsetof(struct kvm_vcpu, arch.ctxt)); |
174 | DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.midr)); | 174 | DEFINE(VCPU_HOST_CTXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); |
175 | DEFINE(VCPU_CP15, offsetof(struct kvm_vcpu, arch.cp15)); | 175 | DEFINE(CPU_CTXT_VFP, offsetof(struct kvm_cpu_context, vfp)); |
176 | DEFINE(VCPU_VFP_GUEST, offsetof(struct kvm_vcpu, arch.vfp_guest)); | 176 | DEFINE(CPU_CTXT_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); |
177 | DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.host_cpu_context)); | 177 | DEFINE(GP_REGS_USR, offsetof(struct kvm_regs, usr_regs)); |
178 | DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs)); | ||
179 | DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs)); | ||
180 | DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs)); | ||
181 | DEFINE(VCPU_ABT_REGS, offsetof(struct kvm_vcpu, arch.regs.abt_regs)); | ||
182 | DEFINE(VCPU_UND_REGS, offsetof(struct kvm_vcpu, arch.regs.und_regs)); | ||
183 | DEFINE(VCPU_IRQ_REGS, offsetof(struct kvm_vcpu, arch.regs.irq_regs)); | ||
184 | DEFINE(VCPU_FIQ_REGS, offsetof(struct kvm_vcpu, arch.regs.fiq_regs)); | ||
185 | DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc)); | ||
186 | DEFINE(VCPU_CPSR, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr)); | ||
187 | DEFINE(VCPU_HCR, offsetof(struct kvm_vcpu, arch.hcr)); | ||
188 | DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); | ||
189 | DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.fault.hsr)); | ||
190 | DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.fault.hxfar)); | ||
191 | DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.fault.hpfar)); | ||
192 | DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.fault.hyp_pc)); | ||
193 | DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); | ||
194 | DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); | ||
195 | DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); | ||
196 | DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); | ||
197 | DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr)); | ||
198 | DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); | ||
199 | DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); | ||
200 | DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); | ||
201 | DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); | ||
202 | DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); | ||
203 | DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); | ||
204 | DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); | ||
205 | DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); | ||
206 | DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); | ||
207 | DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); | ||
208 | #endif | 178 | #endif |
209 | BLANK(); | 179 | BLANK(); |
210 | #ifdef CONFIG_VDSO | 180 | #ifdef CONFIG_VDSO |
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 8b60fde5ce48..b4139cbbbdd9 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S | |||
@@ -18,6 +18,11 @@ | |||
18 | *(.proc.info.init) \ | 18 | *(.proc.info.init) \ |
19 | VMLINUX_SYMBOL(__proc_info_end) = .; | 19 | VMLINUX_SYMBOL(__proc_info_end) = .; |
20 | 20 | ||
21 | #define HYPERVISOR_TEXT \ | ||
22 | VMLINUX_SYMBOL(__hyp_text_start) = .; \ | ||
23 | *(.hyp.text) \ | ||
24 | VMLINUX_SYMBOL(__hyp_text_end) = .; | ||
25 | |||
21 | #define IDMAP_TEXT \ | 26 | #define IDMAP_TEXT \ |
22 | ALIGN_FUNCTION(); \ | 27 | ALIGN_FUNCTION(); \ |
23 | VMLINUX_SYMBOL(__idmap_text_start) = .; \ | 28 | VMLINUX_SYMBOL(__idmap_text_start) = .; \ |
@@ -108,6 +113,7 @@ SECTIONS | |||
108 | TEXT_TEXT | 113 | TEXT_TEXT |
109 | SCHED_TEXT | 114 | SCHED_TEXT |
110 | LOCK_TEXT | 115 | LOCK_TEXT |
116 | HYPERVISOR_TEXT | ||
111 | KPROBES_TEXT | 117 | KPROBES_TEXT |
112 | *(.gnu.warning) | 118 | *(.gnu.warning) |
113 | *(.glue_7) | 119 | *(.glue_7) |
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index c5eef02c52ba..eb1bf4309c13 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile | |||
@@ -17,6 +17,7 @@ AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) | |||
17 | KVM := ../../../virt/kvm | 17 | KVM := ../../../virt/kvm |
18 | kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o | 18 | kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o |
19 | 19 | ||
20 | obj-$(CONFIG_KVM_ARM_HOST) += hyp/ | ||
20 | obj-y += kvm-arm.o init.o interrupts.o | 21 | obj-y += kvm-arm.o init.o interrupts.o |
21 | obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o | 22 | obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o |
22 | obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o | 23 | obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o |
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 08e49c423c24..76552b51c7ae 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/sched.h> | 28 | #include <linux/sched.h> |
29 | #include <linux/kvm.h> | 29 | #include <linux/kvm.h> |
30 | #include <trace/events/kvm.h> | 30 | #include <trace/events/kvm.h> |
31 | #include <kvm/arm_pmu.h> | ||
31 | 32 | ||
32 | #define CREATE_TRACE_POINTS | 33 | #define CREATE_TRACE_POINTS |
33 | #include "trace.h" | 34 | #include "trace.h" |
@@ -265,6 +266,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | |||
265 | kvm_mmu_free_memory_caches(vcpu); | 266 | kvm_mmu_free_memory_caches(vcpu); |
266 | kvm_timer_vcpu_terminate(vcpu); | 267 | kvm_timer_vcpu_terminate(vcpu); |
267 | kvm_vgic_vcpu_destroy(vcpu); | 268 | kvm_vgic_vcpu_destroy(vcpu); |
269 | kvm_pmu_vcpu_destroy(vcpu); | ||
268 | kmem_cache_free(kvm_vcpu_cache, vcpu); | 270 | kmem_cache_free(kvm_vcpu_cache, vcpu); |
269 | } | 271 | } |
270 | 272 | ||
@@ -320,6 +322,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
320 | vcpu->cpu = -1; | 322 | vcpu->cpu = -1; |
321 | 323 | ||
322 | kvm_arm_set_running_vcpu(NULL); | 324 | kvm_arm_set_running_vcpu(NULL); |
325 | kvm_timer_vcpu_put(vcpu); | ||
323 | } | 326 | } |
324 | 327 | ||
325 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | 328 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, |
@@ -577,6 +580,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
577 | * non-preemptible context. | 580 | * non-preemptible context. |
578 | */ | 581 | */ |
579 | preempt_disable(); | 582 | preempt_disable(); |
583 | kvm_pmu_flush_hwstate(vcpu); | ||
580 | kvm_timer_flush_hwstate(vcpu); | 584 | kvm_timer_flush_hwstate(vcpu); |
581 | kvm_vgic_flush_hwstate(vcpu); | 585 | kvm_vgic_flush_hwstate(vcpu); |
582 | 586 | ||
@@ -593,6 +597,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
593 | if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || | 597 | if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || |
594 | vcpu->arch.power_off || vcpu->arch.pause) { | 598 | vcpu->arch.power_off || vcpu->arch.pause) { |
595 | local_irq_enable(); | 599 | local_irq_enable(); |
600 | kvm_pmu_sync_hwstate(vcpu); | ||
596 | kvm_timer_sync_hwstate(vcpu); | 601 | kvm_timer_sync_hwstate(vcpu); |
597 | kvm_vgic_sync_hwstate(vcpu); | 602 | kvm_vgic_sync_hwstate(vcpu); |
598 | preempt_enable(); | 603 | preempt_enable(); |
@@ -642,10 +647,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
642 | trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); | 647 | trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); |
643 | 648 | ||
644 | /* | 649 | /* |
645 | * We must sync the timer state before the vgic state so that | 650 | * We must sync the PMU and timer state before the vgic state so |
646 | * the vgic can properly sample the updated state of the | 651 | * that the vgic can properly sample the updated state of the |
647 | * interrupt line. | 652 | * interrupt line. |
648 | */ | 653 | */ |
654 | kvm_pmu_sync_hwstate(vcpu); | ||
649 | kvm_timer_sync_hwstate(vcpu); | 655 | kvm_timer_sync_hwstate(vcpu); |
650 | 656 | ||
651 | kvm_vgic_sync_hwstate(vcpu); | 657 | kvm_vgic_sync_hwstate(vcpu); |
@@ -823,11 +829,54 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, | |||
823 | return 0; | 829 | return 0; |
824 | } | 830 | } |
825 | 831 | ||
832 | static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu, | ||
833 | struct kvm_device_attr *attr) | ||
834 | { | ||
835 | int ret = -ENXIO; | ||
836 | |||
837 | switch (attr->group) { | ||
838 | default: | ||
839 | ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr); | ||
840 | break; | ||
841 | } | ||
842 | |||
843 | return ret; | ||
844 | } | ||
845 | |||
846 | static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu, | ||
847 | struct kvm_device_attr *attr) | ||
848 | { | ||
849 | int ret = -ENXIO; | ||
850 | |||
851 | switch (attr->group) { | ||
852 | default: | ||
853 | ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr); | ||
854 | break; | ||
855 | } | ||
856 | |||
857 | return ret; | ||
858 | } | ||
859 | |||
860 | static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu, | ||
861 | struct kvm_device_attr *attr) | ||
862 | { | ||
863 | int ret = -ENXIO; | ||
864 | |||
865 | switch (attr->group) { | ||
866 | default: | ||
867 | ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr); | ||
868 | break; | ||
869 | } | ||
870 | |||
871 | return ret; | ||
872 | } | ||
873 | |||
826 | long kvm_arch_vcpu_ioctl(struct file *filp, | 874 | long kvm_arch_vcpu_ioctl(struct file *filp, |
827 | unsigned int ioctl, unsigned long arg) | 875 | unsigned int ioctl, unsigned long arg) |
828 | { | 876 | { |
829 | struct kvm_vcpu *vcpu = filp->private_data; | 877 | struct kvm_vcpu *vcpu = filp->private_data; |
830 | void __user *argp = (void __user *)arg; | 878 | void __user *argp = (void __user *)arg; |
879 | struct kvm_device_attr attr; | ||
831 | 880 | ||
832 | switch (ioctl) { | 881 | switch (ioctl) { |
833 | case KVM_ARM_VCPU_INIT: { | 882 | case KVM_ARM_VCPU_INIT: { |
@@ -870,6 +919,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
870 | return -E2BIG; | 919 | return -E2BIG; |
871 | return kvm_arm_copy_reg_indices(vcpu, user_list->reg); | 920 | return kvm_arm_copy_reg_indices(vcpu, user_list->reg); |
872 | } | 921 | } |
922 | case KVM_SET_DEVICE_ATTR: { | ||
923 | if (copy_from_user(&attr, argp, sizeof(attr))) | ||
924 | return -EFAULT; | ||
925 | return kvm_arm_vcpu_set_attr(vcpu, &attr); | ||
926 | } | ||
927 | case KVM_GET_DEVICE_ATTR: { | ||
928 | if (copy_from_user(&attr, argp, sizeof(attr))) | ||
929 | return -EFAULT; | ||
930 | return kvm_arm_vcpu_get_attr(vcpu, &attr); | ||
931 | } | ||
932 | case KVM_HAS_DEVICE_ATTR: { | ||
933 | if (copy_from_user(&attr, argp, sizeof(attr))) | ||
934 | return -EFAULT; | ||
935 | return kvm_arm_vcpu_has_attr(vcpu, &attr); | ||
936 | } | ||
873 | default: | 937 | default: |
874 | return -EINVAL; | 938 | return -EINVAL; |
875 | } | 939 | } |
@@ -967,6 +1031,11 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
967 | } | 1031 | } |
968 | } | 1032 | } |
969 | 1033 | ||
1034 | static void cpu_init_stage2(void *dummy) | ||
1035 | { | ||
1036 | __cpu_init_stage2(); | ||
1037 | } | ||
1038 | |||
970 | static void cpu_init_hyp_mode(void *dummy) | 1039 | static void cpu_init_hyp_mode(void *dummy) |
971 | { | 1040 | { |
972 | phys_addr_t boot_pgd_ptr; | 1041 | phys_addr_t boot_pgd_ptr; |
@@ -985,6 +1054,7 @@ static void cpu_init_hyp_mode(void *dummy) | |||
985 | vector_ptr = (unsigned long)__kvm_hyp_vector; | 1054 | vector_ptr = (unsigned long)__kvm_hyp_vector; |
986 | 1055 | ||
987 | __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr); | 1056 | __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr); |
1057 | __cpu_init_stage2(); | ||
988 | 1058 | ||
989 | kvm_arm_init_debug(); | 1059 | kvm_arm_init_debug(); |
990 | } | 1060 | } |
@@ -1035,6 +1105,82 @@ static inline void hyp_cpu_pm_init(void) | |||
1035 | } | 1105 | } |
1036 | #endif | 1106 | #endif |
1037 | 1107 | ||
1108 | static void teardown_common_resources(void) | ||
1109 | { | ||
1110 | free_percpu(kvm_host_cpu_state); | ||
1111 | } | ||
1112 | |||
1113 | static int init_common_resources(void) | ||
1114 | { | ||
1115 | kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t); | ||
1116 | if (!kvm_host_cpu_state) { | ||
1117 | kvm_err("Cannot allocate host CPU state\n"); | ||
1118 | return -ENOMEM; | ||
1119 | } | ||
1120 | |||
1121 | return 0; | ||
1122 | } | ||
1123 | |||
1124 | static int init_subsystems(void) | ||
1125 | { | ||
1126 | int err; | ||
1127 | |||
1128 | /* | ||
1129 | * Init HYP view of VGIC | ||
1130 | */ | ||
1131 | err = kvm_vgic_hyp_init(); | ||
1132 | switch (err) { | ||
1133 | case 0: | ||
1134 | vgic_present = true; | ||
1135 | break; | ||
1136 | case -ENODEV: | ||
1137 | case -ENXIO: | ||
1138 | vgic_present = false; | ||
1139 | break; | ||
1140 | default: | ||
1141 | return err; | ||
1142 | } | ||
1143 | |||
1144 | /* | ||
1145 | * Init HYP architected timer support | ||
1146 | */ | ||
1147 | err = kvm_timer_hyp_init(); | ||
1148 | if (err) | ||
1149 | return err; | ||
1150 | |||
1151 | kvm_perf_init(); | ||
1152 | kvm_coproc_table_init(); | ||
1153 | |||
1154 | return 0; | ||
1155 | } | ||
1156 | |||
1157 | static void teardown_hyp_mode(void) | ||
1158 | { | ||
1159 | int cpu; | ||
1160 | |||
1161 | if (is_kernel_in_hyp_mode()) | ||
1162 | return; | ||
1163 | |||
1164 | free_hyp_pgds(); | ||
1165 | for_each_possible_cpu(cpu) | ||
1166 | free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); | ||
1167 | } | ||
1168 | |||
1169 | static int init_vhe_mode(void) | ||
1170 | { | ||
1171 | /* | ||
1172 | * Execute the init code on each CPU. | ||
1173 | */ | ||
1174 | on_each_cpu(cpu_init_stage2, NULL, 1); | ||
1175 | |||
1176 | /* set size of VMID supported by CPU */ | ||
1177 | kvm_vmid_bits = kvm_get_vmid_bits(); | ||
1178 | kvm_info("%d-bit VMID\n", kvm_vmid_bits); | ||
1179 | |||
1180 | kvm_info("VHE mode initialized successfully\n"); | ||
1181 | return 0; | ||
1182 | } | ||
1183 | |||
1038 | /** | 1184 | /** |
1039 | * Inits Hyp-mode on all online CPUs | 1185 | * Inits Hyp-mode on all online CPUs |
1040 | */ | 1186 | */ |
@@ -1065,7 +1211,7 @@ static int init_hyp_mode(void) | |||
1065 | stack_page = __get_free_page(GFP_KERNEL); | 1211 | stack_page = __get_free_page(GFP_KERNEL); |
1066 | if (!stack_page) { | 1212 | if (!stack_page) { |
1067 | err = -ENOMEM; | 1213 | err = -ENOMEM; |
1068 | goto out_free_stack_pages; | 1214 | goto out_err; |
1069 | } | 1215 | } |
1070 | 1216 | ||
1071 | per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page; | 1217 | per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page; |
@@ -1074,16 +1220,16 @@ static int init_hyp_mode(void) | |||
1074 | /* | 1220 | /* |
1075 | * Map the Hyp-code called directly from the host | 1221 | * Map the Hyp-code called directly from the host |
1076 | */ | 1222 | */ |
1077 | err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end); | 1223 | err = create_hyp_mappings(__hyp_text_start, __hyp_text_end); |
1078 | if (err) { | 1224 | if (err) { |
1079 | kvm_err("Cannot map world-switch code\n"); | 1225 | kvm_err("Cannot map world-switch code\n"); |
1080 | goto out_free_mappings; | 1226 | goto out_err; |
1081 | } | 1227 | } |
1082 | 1228 | ||
1083 | err = create_hyp_mappings(__start_rodata, __end_rodata); | 1229 | err = create_hyp_mappings(__start_rodata, __end_rodata); |
1084 | if (err) { | 1230 | if (err) { |
1085 | kvm_err("Cannot map rodata section\n"); | 1231 | kvm_err("Cannot map rodata section\n"); |
1086 | goto out_free_mappings; | 1232 | goto out_err; |
1087 | } | 1233 | } |
1088 | 1234 | ||
1089 | /* | 1235 | /* |
@@ -1095,20 +1241,10 @@ static int init_hyp_mode(void) | |||
1095 | 1241 | ||
1096 | if (err) { | 1242 | if (err) { |
1097 | kvm_err("Cannot map hyp stack\n"); | 1243 | kvm_err("Cannot map hyp stack\n"); |
1098 | goto out_free_mappings; | 1244 | goto out_err; |
1099 | } | 1245 | } |
1100 | } | 1246 | } |
1101 | 1247 | ||
1102 | /* | ||
1103 | * Map the host CPU structures | ||
1104 | */ | ||
1105 | kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t); | ||
1106 | if (!kvm_host_cpu_state) { | ||
1107 | err = -ENOMEM; | ||
1108 | kvm_err("Cannot allocate host CPU state\n"); | ||
1109 | goto out_free_mappings; | ||
1110 | } | ||
1111 | |||
1112 | for_each_possible_cpu(cpu) { | 1248 | for_each_possible_cpu(cpu) { |
1113 | kvm_cpu_context_t *cpu_ctxt; | 1249 | kvm_cpu_context_t *cpu_ctxt; |
1114 | 1250 | ||
@@ -1117,7 +1253,7 @@ static int init_hyp_mode(void) | |||
1117 | 1253 | ||
1118 | if (err) { | 1254 | if (err) { |
1119 | kvm_err("Cannot map host CPU state: %d\n", err); | 1255 | kvm_err("Cannot map host CPU state: %d\n", err); |
1120 | goto out_free_context; | 1256 | goto out_err; |
1121 | } | 1257 | } |
1122 | } | 1258 | } |
1123 | 1259 | ||
@@ -1126,34 +1262,22 @@ static int init_hyp_mode(void) | |||
1126 | */ | 1262 | */ |
1127 | on_each_cpu(cpu_init_hyp_mode, NULL, 1); | 1263 | on_each_cpu(cpu_init_hyp_mode, NULL, 1); |
1128 | 1264 | ||
1129 | /* | ||
1130 | * Init HYP view of VGIC | ||
1131 | */ | ||
1132 | err = kvm_vgic_hyp_init(); | ||
1133 | switch (err) { | ||
1134 | case 0: | ||
1135 | vgic_present = true; | ||
1136 | break; | ||
1137 | case -ENODEV: | ||
1138 | case -ENXIO: | ||
1139 | vgic_present = false; | ||
1140 | break; | ||
1141 | default: | ||
1142 | goto out_free_context; | ||
1143 | } | ||
1144 | |||
1145 | /* | ||
1146 | * Init HYP architected timer support | ||
1147 | */ | ||
1148 | err = kvm_timer_hyp_init(); | ||
1149 | if (err) | ||
1150 | goto out_free_context; | ||
1151 | |||
1152 | #ifndef CONFIG_HOTPLUG_CPU | 1265 | #ifndef CONFIG_HOTPLUG_CPU |
1153 | free_boot_hyp_pgd(); | 1266 | free_boot_hyp_pgd(); |
1154 | #endif | 1267 | #endif |
1155 | 1268 | ||
1156 | kvm_perf_init(); | 1269 | cpu_notifier_register_begin(); |
1270 | |||
1271 | err = __register_cpu_notifier(&hyp_init_cpu_nb); | ||
1272 | |||
1273 | cpu_notifier_register_done(); | ||
1274 | |||
1275 | if (err) { | ||
1276 | kvm_err("Cannot register HYP init CPU notifier (%d)\n", err); | ||
1277 | goto out_err; | ||
1278 | } | ||
1279 | |||
1280 | hyp_cpu_pm_init(); | ||
1157 | 1281 | ||
1158 | /* set size of VMID supported by CPU */ | 1282 | /* set size of VMID supported by CPU */ |
1159 | kvm_vmid_bits = kvm_get_vmid_bits(); | 1283 | kvm_vmid_bits = kvm_get_vmid_bits(); |
@@ -1162,14 +1286,9 @@ static int init_hyp_mode(void) | |||
1162 | kvm_info("Hyp mode initialized successfully\n"); | 1286 | kvm_info("Hyp mode initialized successfully\n"); |
1163 | 1287 | ||
1164 | return 0; | 1288 | return 0; |
1165 | out_free_context: | 1289 | |
1166 | free_percpu(kvm_host_cpu_state); | ||
1167 | out_free_mappings: | ||
1168 | free_hyp_pgds(); | ||
1169 | out_free_stack_pages: | ||
1170 | for_each_possible_cpu(cpu) | ||
1171 | free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); | ||
1172 | out_err: | 1290 | out_err: |
1291 | teardown_hyp_mode(); | ||
1173 | kvm_err("error initializing Hyp mode: %d\n", err); | 1292 | kvm_err("error initializing Hyp mode: %d\n", err); |
1174 | return err; | 1293 | return err; |
1175 | } | 1294 | } |
@@ -1213,26 +1332,27 @@ int kvm_arch_init(void *opaque) | |||
1213 | } | 1332 | } |
1214 | } | 1333 | } |
1215 | 1334 | ||
1216 | cpu_notifier_register_begin(); | 1335 | err = init_common_resources(); |
1217 | |||
1218 | err = init_hyp_mode(); | ||
1219 | if (err) | 1336 | if (err) |
1220 | goto out_err; | 1337 | return err; |
1221 | 1338 | ||
1222 | err = __register_cpu_notifier(&hyp_init_cpu_nb); | 1339 | if (is_kernel_in_hyp_mode()) |
1223 | if (err) { | 1340 | err = init_vhe_mode(); |
1224 | kvm_err("Cannot register HYP init CPU notifier (%d)\n", err); | 1341 | else |
1342 | err = init_hyp_mode(); | ||
1343 | if (err) | ||
1225 | goto out_err; | 1344 | goto out_err; |
1226 | } | ||
1227 | |||
1228 | cpu_notifier_register_done(); | ||
1229 | 1345 | ||
1230 | hyp_cpu_pm_init(); | 1346 | err = init_subsystems(); |
1347 | if (err) | ||
1348 | goto out_hyp; | ||
1231 | 1349 | ||
1232 | kvm_coproc_table_init(); | ||
1233 | return 0; | 1350 | return 0; |
1351 | |||
1352 | out_hyp: | ||
1353 | teardown_hyp_mode(); | ||
1234 | out_err: | 1354 | out_err: |
1235 | cpu_notifier_register_done(); | 1355 | teardown_common_resources(); |
1236 | return err; | 1356 | return err; |
1237 | } | 1357 | } |
1238 | 1358 | ||
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index f3d88dc388bc..1bb2b79c01ff 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c | |||
@@ -16,6 +16,8 @@ | |||
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | 17 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | */ | 18 | */ |
19 | |||
20 | #include <linux/bsearch.h> | ||
19 | #include <linux/mm.h> | 21 | #include <linux/mm.h> |
20 | #include <linux/kvm_host.h> | 22 | #include <linux/kvm_host.h> |
21 | #include <linux/uaccess.h> | 23 | #include <linux/uaccess.h> |
@@ -54,8 +56,8 @@ static inline void vcpu_cp15_reg64_set(struct kvm_vcpu *vcpu, | |||
54 | const struct coproc_reg *r, | 56 | const struct coproc_reg *r, |
55 | u64 val) | 57 | u64 val) |
56 | { | 58 | { |
57 | vcpu->arch.cp15[r->reg] = val & 0xffffffff; | 59 | vcpu_cp15(vcpu, r->reg) = val & 0xffffffff; |
58 | vcpu->arch.cp15[r->reg + 1] = val >> 32; | 60 | vcpu_cp15(vcpu, r->reg + 1) = val >> 32; |
59 | } | 61 | } |
60 | 62 | ||
61 | static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu, | 63 | static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu, |
@@ -63,9 +65,9 @@ static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu, | |||
63 | { | 65 | { |
64 | u64 val; | 66 | u64 val; |
65 | 67 | ||
66 | val = vcpu->arch.cp15[r->reg + 1]; | 68 | val = vcpu_cp15(vcpu, r->reg + 1); |
67 | val = val << 32; | 69 | val = val << 32; |
68 | val = val | vcpu->arch.cp15[r->reg]; | 70 | val = val | vcpu_cp15(vcpu, r->reg); |
69 | return val; | 71 | return val; |
70 | } | 72 | } |
71 | 73 | ||
@@ -104,7 +106,7 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) | |||
104 | * vcpu_id, but we read the 'U' bit from the underlying | 106 | * vcpu_id, but we read the 'U' bit from the underlying |
105 | * hardware directly. | 107 | * hardware directly. |
106 | */ | 108 | */ |
107 | vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) | | 109 | vcpu_cp15(vcpu, c0_MPIDR) = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) | |
108 | ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) | | 110 | ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) | |
109 | (vcpu->vcpu_id & 3)); | 111 | (vcpu->vcpu_id & 3)); |
110 | } | 112 | } |
@@ -117,7 +119,7 @@ static bool access_actlr(struct kvm_vcpu *vcpu, | |||
117 | if (p->is_write) | 119 | if (p->is_write) |
118 | return ignore_write(vcpu, p); | 120 | return ignore_write(vcpu, p); |
119 | 121 | ||
120 | *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; | 122 | *vcpu_reg(vcpu, p->Rt1) = vcpu_cp15(vcpu, c1_ACTLR); |
121 | return true; | 123 | return true; |
122 | } | 124 | } |
123 | 125 | ||
@@ -139,7 +141,7 @@ static bool access_l2ctlr(struct kvm_vcpu *vcpu, | |||
139 | if (p->is_write) | 141 | if (p->is_write) |
140 | return ignore_write(vcpu, p); | 142 | return ignore_write(vcpu, p); |
141 | 143 | ||
142 | *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; | 144 | *vcpu_reg(vcpu, p->Rt1) = vcpu_cp15(vcpu, c9_L2CTLR); |
143 | return true; | 145 | return true; |
144 | } | 146 | } |
145 | 147 | ||
@@ -156,7 +158,7 @@ static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) | |||
156 | ncores = min(ncores, 3U); | 158 | ncores = min(ncores, 3U); |
157 | l2ctlr |= (ncores & 3) << 24; | 159 | l2ctlr |= (ncores & 3) << 24; |
158 | 160 | ||
159 | vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; | 161 | vcpu_cp15(vcpu, c9_L2CTLR) = l2ctlr; |
160 | } | 162 | } |
161 | 163 | ||
162 | static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) | 164 | static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) |
@@ -171,7 +173,7 @@ static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) | |||
171 | else | 173 | else |
172 | actlr &= ~(1U << 6); | 174 | actlr &= ~(1U << 6); |
173 | 175 | ||
174 | vcpu->arch.cp15[c1_ACTLR] = actlr; | 176 | vcpu_cp15(vcpu, c1_ACTLR) = actlr; |
175 | } | 177 | } |
176 | 178 | ||
177 | /* | 179 | /* |
@@ -218,9 +220,9 @@ bool access_vm_reg(struct kvm_vcpu *vcpu, | |||
218 | 220 | ||
219 | BUG_ON(!p->is_write); | 221 | BUG_ON(!p->is_write); |
220 | 222 | ||
221 | vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1); | 223 | vcpu_cp15(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt1); |
222 | if (p->is_64bit) | 224 | if (p->is_64bit) |
223 | vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2); | 225 | vcpu_cp15(vcpu, r->reg + 1) = *vcpu_reg(vcpu, p->Rt2); |
224 | 226 | ||
225 | kvm_toggle_cache(vcpu, was_enabled); | 227 | kvm_toggle_cache(vcpu, was_enabled); |
226 | return true; | 228 | return true; |
@@ -381,17 +383,26 @@ static const struct coproc_reg cp15_regs[] = { | |||
381 | { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, | 383 | { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, |
382 | }; | 384 | }; |
383 | 385 | ||
386 | static int check_reg_table(const struct coproc_reg *table, unsigned int n) | ||
387 | { | ||
388 | unsigned int i; | ||
389 | |||
390 | for (i = 1; i < n; i++) { | ||
391 | if (cmp_reg(&table[i-1], &table[i]) >= 0) { | ||
392 | kvm_err("reg table %p out of order (%d)\n", table, i - 1); | ||
393 | return 1; | ||
394 | } | ||
395 | } | ||
396 | |||
397 | return 0; | ||
398 | } | ||
399 | |||
384 | /* Target specific emulation tables */ | 400 | /* Target specific emulation tables */ |
385 | static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS]; | 401 | static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS]; |
386 | 402 | ||
387 | void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table) | 403 | void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table) |
388 | { | 404 | { |
389 | unsigned int i; | 405 | BUG_ON(check_reg_table(table->table, table->num)); |
390 | |||
391 | for (i = 1; i < table->num; i++) | ||
392 | BUG_ON(cmp_reg(&table->table[i-1], | ||
393 | &table->table[i]) >= 0); | ||
394 | |||
395 | target_tables[table->target] = table; | 406 | target_tables[table->target] = table; |
396 | } | 407 | } |
397 | 408 | ||
@@ -405,29 +416,32 @@ static const struct coproc_reg *get_target_table(unsigned target, size_t *num) | |||
405 | return table->table; | 416 | return table->table; |
406 | } | 417 | } |
407 | 418 | ||
419 | #define reg_to_match_value(x) \ | ||
420 | ({ \ | ||
421 | unsigned long val; \ | ||
422 | val = (x)->CRn << 11; \ | ||
423 | val |= (x)->CRm << 7; \ | ||
424 | val |= (x)->Op1 << 4; \ | ||
425 | val |= (x)->Op2 << 1; \ | ||
426 | val |= !(x)->is_64bit; \ | ||
427 | val; \ | ||
428 | }) | ||
429 | |||
430 | static int match_reg(const void *key, const void *elt) | ||
431 | { | ||
432 | const unsigned long pval = (unsigned long)key; | ||
433 | const struct coproc_reg *r = elt; | ||
434 | |||
435 | return pval - reg_to_match_value(r); | ||
436 | } | ||
437 | |||
408 | static const struct coproc_reg *find_reg(const struct coproc_params *params, | 438 | static const struct coproc_reg *find_reg(const struct coproc_params *params, |
409 | const struct coproc_reg table[], | 439 | const struct coproc_reg table[], |
410 | unsigned int num) | 440 | unsigned int num) |
411 | { | 441 | { |
412 | unsigned int i; | 442 | unsigned long pval = reg_to_match_value(params); |
413 | |||
414 | for (i = 0; i < num; i++) { | ||
415 | const struct coproc_reg *r = &table[i]; | ||
416 | |||
417 | if (params->is_64bit != r->is_64) | ||
418 | continue; | ||
419 | if (params->CRn != r->CRn) | ||
420 | continue; | ||
421 | if (params->CRm != r->CRm) | ||
422 | continue; | ||
423 | if (params->Op1 != r->Op1) | ||
424 | continue; | ||
425 | if (params->Op2 != r->Op2) | ||
426 | continue; | ||
427 | 443 | ||
428 | return r; | 444 | return bsearch((void *)pval, table, num, sizeof(table[0]), match_reg); |
429 | } | ||
430 | return NULL; | ||
431 | } | 445 | } |
432 | 446 | ||
433 | static int emulate_cp15(struct kvm_vcpu *vcpu, | 447 | static int emulate_cp15(struct kvm_vcpu *vcpu, |
@@ -645,6 +659,9 @@ static struct coproc_reg invariant_cp15[] = { | |||
645 | { CRn( 0), CRm( 0), Op1( 0), Op2( 3), is32, NULL, get_TLBTR }, | 659 | { CRn( 0), CRm( 0), Op1( 0), Op2( 3), is32, NULL, get_TLBTR }, |
646 | { CRn( 0), CRm( 0), Op1( 0), Op2( 6), is32, NULL, get_REVIDR }, | 660 | { CRn( 0), CRm( 0), Op1( 0), Op2( 6), is32, NULL, get_REVIDR }, |
647 | 661 | ||
662 | { CRn( 0), CRm( 0), Op1( 1), Op2( 1), is32, NULL, get_CLIDR }, | ||
663 | { CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR }, | ||
664 | |||
648 | { CRn( 0), CRm( 1), Op1( 0), Op2( 0), is32, NULL, get_ID_PFR0 }, | 665 | { CRn( 0), CRm( 1), Op1( 0), Op2( 0), is32, NULL, get_ID_PFR0 }, |
649 | { CRn( 0), CRm( 1), Op1( 0), Op2( 1), is32, NULL, get_ID_PFR1 }, | 666 | { CRn( 0), CRm( 1), Op1( 0), Op2( 1), is32, NULL, get_ID_PFR1 }, |
650 | { CRn( 0), CRm( 1), Op1( 0), Op2( 2), is32, NULL, get_ID_DFR0 }, | 667 | { CRn( 0), CRm( 1), Op1( 0), Op2( 2), is32, NULL, get_ID_DFR0 }, |
@@ -660,9 +677,6 @@ static struct coproc_reg invariant_cp15[] = { | |||
660 | { CRn( 0), CRm( 2), Op1( 0), Op2( 3), is32, NULL, get_ID_ISAR3 }, | 677 | { CRn( 0), CRm( 2), Op1( 0), Op2( 3), is32, NULL, get_ID_ISAR3 }, |
661 | { CRn( 0), CRm( 2), Op1( 0), Op2( 4), is32, NULL, get_ID_ISAR4 }, | 678 | { CRn( 0), CRm( 2), Op1( 0), Op2( 4), is32, NULL, get_ID_ISAR4 }, |
662 | { CRn( 0), CRm( 2), Op1( 0), Op2( 5), is32, NULL, get_ID_ISAR5 }, | 679 | { CRn( 0), CRm( 2), Op1( 0), Op2( 5), is32, NULL, get_ID_ISAR5 }, |
663 | |||
664 | { CRn( 0), CRm( 0), Op1( 1), Op2( 1), is32, NULL, get_CLIDR }, | ||
665 | { CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR }, | ||
666 | }; | 680 | }; |
667 | 681 | ||
668 | /* | 682 | /* |
@@ -901,7 +915,7 @@ static int vfp_get_reg(const struct kvm_vcpu *vcpu, u64 id, void __user *uaddr) | |||
901 | if (vfpid < num_fp_regs()) { | 915 | if (vfpid < num_fp_regs()) { |
902 | if (KVM_REG_SIZE(id) != 8) | 916 | if (KVM_REG_SIZE(id) != 8) |
903 | return -ENOENT; | 917 | return -ENOENT; |
904 | return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpregs[vfpid], | 918 | return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpregs[vfpid], |
905 | id); | 919 | id); |
906 | } | 920 | } |
907 | 921 | ||
@@ -911,13 +925,13 @@ static int vfp_get_reg(const struct kvm_vcpu *vcpu, u64 id, void __user *uaddr) | |||
911 | 925 | ||
912 | switch (vfpid) { | 926 | switch (vfpid) { |
913 | case KVM_REG_ARM_VFP_FPEXC: | 927 | case KVM_REG_ARM_VFP_FPEXC: |
914 | return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpexc, id); | 928 | return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpexc, id); |
915 | case KVM_REG_ARM_VFP_FPSCR: | 929 | case KVM_REG_ARM_VFP_FPSCR: |
916 | return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpscr, id); | 930 | return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpscr, id); |
917 | case KVM_REG_ARM_VFP_FPINST: | 931 | case KVM_REG_ARM_VFP_FPINST: |
918 | return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpinst, id); | 932 | return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpinst, id); |
919 | case KVM_REG_ARM_VFP_FPINST2: | 933 | case KVM_REG_ARM_VFP_FPINST2: |
920 | return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpinst2, id); | 934 | return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpinst2, id); |
921 | case KVM_REG_ARM_VFP_MVFR0: | 935 | case KVM_REG_ARM_VFP_MVFR0: |
922 | val = fmrx(MVFR0); | 936 | val = fmrx(MVFR0); |
923 | return reg_to_user(uaddr, &val, id); | 937 | return reg_to_user(uaddr, &val, id); |
@@ -945,7 +959,7 @@ static int vfp_set_reg(struct kvm_vcpu *vcpu, u64 id, const void __user *uaddr) | |||
945 | if (vfpid < num_fp_regs()) { | 959 | if (vfpid < num_fp_regs()) { |
946 | if (KVM_REG_SIZE(id) != 8) | 960 | if (KVM_REG_SIZE(id) != 8) |
947 | return -ENOENT; | 961 | return -ENOENT; |
948 | return reg_from_user(&vcpu->arch.vfp_guest.fpregs[vfpid], | 962 | return reg_from_user(&vcpu->arch.ctxt.vfp.fpregs[vfpid], |
949 | uaddr, id); | 963 | uaddr, id); |
950 | } | 964 | } |
951 | 965 | ||
@@ -955,13 +969,13 @@ static int vfp_set_reg(struct kvm_vcpu *vcpu, u64 id, const void __user *uaddr) | |||
955 | 969 | ||
956 | switch (vfpid) { | 970 | switch (vfpid) { |
957 | case KVM_REG_ARM_VFP_FPEXC: | 971 | case KVM_REG_ARM_VFP_FPEXC: |
958 | return reg_from_user(&vcpu->arch.vfp_guest.fpexc, uaddr, id); | 972 | return reg_from_user(&vcpu->arch.ctxt.vfp.fpexc, uaddr, id); |
959 | case KVM_REG_ARM_VFP_FPSCR: | 973 | case KVM_REG_ARM_VFP_FPSCR: |
960 | return reg_from_user(&vcpu->arch.vfp_guest.fpscr, uaddr, id); | 974 | return reg_from_user(&vcpu->arch.ctxt.vfp.fpscr, uaddr, id); |
961 | case KVM_REG_ARM_VFP_FPINST: | 975 | case KVM_REG_ARM_VFP_FPINST: |
962 | return reg_from_user(&vcpu->arch.vfp_guest.fpinst, uaddr, id); | 976 | return reg_from_user(&vcpu->arch.ctxt.vfp.fpinst, uaddr, id); |
963 | case KVM_REG_ARM_VFP_FPINST2: | 977 | case KVM_REG_ARM_VFP_FPINST2: |
964 | return reg_from_user(&vcpu->arch.vfp_guest.fpinst2, uaddr, id); | 978 | return reg_from_user(&vcpu->arch.ctxt.vfp.fpinst2, uaddr, id); |
965 | /* These are invariant. */ | 979 | /* These are invariant. */ |
966 | case KVM_REG_ARM_VFP_MVFR0: | 980 | case KVM_REG_ARM_VFP_MVFR0: |
967 | if (reg_from_user(&val, uaddr, id)) | 981 | if (reg_from_user(&val, uaddr, id)) |
@@ -1030,7 +1044,7 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) | |||
1030 | val = vcpu_cp15_reg64_get(vcpu, r); | 1044 | val = vcpu_cp15_reg64_get(vcpu, r); |
1031 | ret = reg_to_user(uaddr, &val, reg->id); | 1045 | ret = reg_to_user(uaddr, &val, reg->id); |
1032 | } else if (KVM_REG_SIZE(reg->id) == 4) { | 1046 | } else if (KVM_REG_SIZE(reg->id) == 4) { |
1033 | ret = reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id); | 1047 | ret = reg_to_user(uaddr, &vcpu_cp15(vcpu, r->reg), reg->id); |
1034 | } | 1048 | } |
1035 | 1049 | ||
1036 | return ret; | 1050 | return ret; |
@@ -1060,7 +1074,7 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) | |||
1060 | if (!ret) | 1074 | if (!ret) |
1061 | vcpu_cp15_reg64_set(vcpu, r, val); | 1075 | vcpu_cp15_reg64_set(vcpu, r, val); |
1062 | } else if (KVM_REG_SIZE(reg->id) == 4) { | 1076 | } else if (KVM_REG_SIZE(reg->id) == 4) { |
1063 | ret = reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id); | 1077 | ret = reg_from_user(&vcpu_cp15(vcpu, r->reg), uaddr, reg->id); |
1064 | } | 1078 | } |
1065 | 1079 | ||
1066 | return ret; | 1080 | return ret; |
@@ -1096,7 +1110,7 @@ static int write_demux_regids(u64 __user *uindices) | |||
1096 | static u64 cp15_to_index(const struct coproc_reg *reg) | 1110 | static u64 cp15_to_index(const struct coproc_reg *reg) |
1097 | { | 1111 | { |
1098 | u64 val = KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT); | 1112 | u64 val = KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT); |
1099 | if (reg->is_64) { | 1113 | if (reg->is_64bit) { |
1100 | val |= KVM_REG_SIZE_U64; | 1114 | val |= KVM_REG_SIZE_U64; |
1101 | val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT); | 1115 | val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT); |
1102 | /* | 1116 | /* |
@@ -1210,8 +1224,8 @@ void kvm_coproc_table_init(void) | |||
1210 | unsigned int i; | 1224 | unsigned int i; |
1211 | 1225 | ||
1212 | /* Make sure tables are unique and in order. */ | 1226 | /* Make sure tables are unique and in order. */ |
1213 | for (i = 1; i < ARRAY_SIZE(cp15_regs); i++) | 1227 | BUG_ON(check_reg_table(cp15_regs, ARRAY_SIZE(cp15_regs))); |
1214 | BUG_ON(cmp_reg(&cp15_regs[i-1], &cp15_regs[i]) >= 0); | 1228 | BUG_ON(check_reg_table(invariant_cp15, ARRAY_SIZE(invariant_cp15))); |
1215 | 1229 | ||
1216 | /* We abuse the reset function to overwrite the table itself. */ | 1230 | /* We abuse the reset function to overwrite the table itself. */ |
1217 | for (i = 0; i < ARRAY_SIZE(invariant_cp15); i++) | 1231 | for (i = 0; i < ARRAY_SIZE(invariant_cp15); i++) |
@@ -1248,7 +1262,7 @@ void kvm_reset_coprocs(struct kvm_vcpu *vcpu) | |||
1248 | const struct coproc_reg *table; | 1262 | const struct coproc_reg *table; |
1249 | 1263 | ||
1250 | /* Catch someone adding a register without putting in reset entry. */ | 1264 | /* Catch someone adding a register without putting in reset entry. */ |
1251 | memset(vcpu->arch.cp15, 0x42, sizeof(vcpu->arch.cp15)); | 1265 | memset(vcpu->arch.ctxt.cp15, 0x42, sizeof(vcpu->arch.ctxt.cp15)); |
1252 | 1266 | ||
1253 | /* Generic chip reset first (so target could override). */ | 1267 | /* Generic chip reset first (so target could override). */ |
1254 | reset_coproc_regs(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs)); | 1268 | reset_coproc_regs(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs)); |
@@ -1257,6 +1271,6 @@ void kvm_reset_coprocs(struct kvm_vcpu *vcpu) | |||
1257 | reset_coproc_regs(vcpu, table, num); | 1271 | reset_coproc_regs(vcpu, table, num); |
1258 | 1272 | ||
1259 | for (num = 1; num < NR_CP15_REGS; num++) | 1273 | for (num = 1; num < NR_CP15_REGS; num++) |
1260 | if (vcpu->arch.cp15[num] == 0x42424242) | 1274 | if (vcpu_cp15(vcpu, num) == 0x42424242) |
1261 | panic("Didn't reset vcpu->arch.cp15[%zi]", num); | 1275 | panic("Didn't reset vcpu_cp15(vcpu, %zi)", num); |
1262 | } | 1276 | } |
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h index 88d24a3a9778..eef1759c2b65 100644 --- a/arch/arm/kvm/coproc.h +++ b/arch/arm/kvm/coproc.h | |||
@@ -37,7 +37,7 @@ struct coproc_reg { | |||
37 | unsigned long Op1; | 37 | unsigned long Op1; |
38 | unsigned long Op2; | 38 | unsigned long Op2; |
39 | 39 | ||
40 | bool is_64; | 40 | bool is_64bit; |
41 | 41 | ||
42 | /* Trapped access from guest, if non-NULL. */ | 42 | /* Trapped access from guest, if non-NULL. */ |
43 | bool (*access)(struct kvm_vcpu *, | 43 | bool (*access)(struct kvm_vcpu *, |
@@ -47,7 +47,7 @@ struct coproc_reg { | |||
47 | /* Initialization for vcpu. */ | 47 | /* Initialization for vcpu. */ |
48 | void (*reset)(struct kvm_vcpu *, const struct coproc_reg *); | 48 | void (*reset)(struct kvm_vcpu *, const struct coproc_reg *); |
49 | 49 | ||
50 | /* Index into vcpu->arch.cp15[], or 0 if we don't need to save it. */ | 50 | /* Index into vcpu_cp15(vcpu, ...), or 0 if we don't need to save it. */ |
51 | unsigned long reg; | 51 | unsigned long reg; |
52 | 52 | ||
53 | /* Value (usually reset value) */ | 53 | /* Value (usually reset value) */ |
@@ -104,25 +104,25 @@ static inline void reset_unknown(struct kvm_vcpu *vcpu, | |||
104 | const struct coproc_reg *r) | 104 | const struct coproc_reg *r) |
105 | { | 105 | { |
106 | BUG_ON(!r->reg); | 106 | BUG_ON(!r->reg); |
107 | BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.cp15)); | 107 | BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.ctxt.cp15)); |
108 | vcpu->arch.cp15[r->reg] = 0xdecafbad; | 108 | vcpu_cp15(vcpu, r->reg) = 0xdecafbad; |
109 | } | 109 | } |
110 | 110 | ||
111 | static inline void reset_val(struct kvm_vcpu *vcpu, const struct coproc_reg *r) | 111 | static inline void reset_val(struct kvm_vcpu *vcpu, const struct coproc_reg *r) |
112 | { | 112 | { |
113 | BUG_ON(!r->reg); | 113 | BUG_ON(!r->reg); |
114 | BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.cp15)); | 114 | BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.ctxt.cp15)); |
115 | vcpu->arch.cp15[r->reg] = r->val; | 115 | vcpu_cp15(vcpu, r->reg) = r->val; |
116 | } | 116 | } |
117 | 117 | ||
118 | static inline void reset_unknown64(struct kvm_vcpu *vcpu, | 118 | static inline void reset_unknown64(struct kvm_vcpu *vcpu, |
119 | const struct coproc_reg *r) | 119 | const struct coproc_reg *r) |
120 | { | 120 | { |
121 | BUG_ON(!r->reg); | 121 | BUG_ON(!r->reg); |
122 | BUG_ON(r->reg + 1 >= ARRAY_SIZE(vcpu->arch.cp15)); | 122 | BUG_ON(r->reg + 1 >= ARRAY_SIZE(vcpu->arch.ctxt.cp15)); |
123 | 123 | ||
124 | vcpu->arch.cp15[r->reg] = 0xdecafbad; | 124 | vcpu_cp15(vcpu, r->reg) = 0xdecafbad; |
125 | vcpu->arch.cp15[r->reg+1] = 0xd0c0ffee; | 125 | vcpu_cp15(vcpu, r->reg+1) = 0xd0c0ffee; |
126 | } | 126 | } |
127 | 127 | ||
128 | static inline int cmp_reg(const struct coproc_reg *i1, | 128 | static inline int cmp_reg(const struct coproc_reg *i1, |
@@ -141,7 +141,7 @@ static inline int cmp_reg(const struct coproc_reg *i1, | |||
141 | return i1->Op1 - i2->Op1; | 141 | return i1->Op1 - i2->Op1; |
142 | if (i1->Op2 != i2->Op2) | 142 | if (i1->Op2 != i2->Op2) |
143 | return i1->Op2 - i2->Op2; | 143 | return i1->Op2 - i2->Op2; |
144 | return i2->is_64 - i1->is_64; | 144 | return i2->is_64bit - i1->is_64bit; |
145 | } | 145 | } |
146 | 146 | ||
147 | 147 | ||
@@ -150,8 +150,8 @@ static inline int cmp_reg(const struct coproc_reg *i1, | |||
150 | #define CRm64(_x) .CRn = _x, .CRm = 0 | 150 | #define CRm64(_x) .CRn = _x, .CRm = 0 |
151 | #define Op1(_x) .Op1 = _x | 151 | #define Op1(_x) .Op1 = _x |
152 | #define Op2(_x) .Op2 = _x | 152 | #define Op2(_x) .Op2 = _x |
153 | #define is64 .is_64 = true | 153 | #define is64 .is_64bit = true |
154 | #define is32 .is_64 = false | 154 | #define is32 .is_64bit = false |
155 | 155 | ||
156 | bool access_vm_reg(struct kvm_vcpu *vcpu, | 156 | bool access_vm_reg(struct kvm_vcpu *vcpu, |
157 | const struct coproc_params *p, | 157 | const struct coproc_params *p, |
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index dc99159857b4..a494def3f195 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c | |||
@@ -112,7 +112,7 @@ static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][15] = { | |||
112 | */ | 112 | */ |
113 | unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num) | 113 | unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num) |
114 | { | 114 | { |
115 | unsigned long *reg_array = (unsigned long *)&vcpu->arch.regs; | 115 | unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs; |
116 | unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK; | 116 | unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK; |
117 | 117 | ||
118 | switch (mode) { | 118 | switch (mode) { |
@@ -147,15 +147,15 @@ unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu) | |||
147 | unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK; | 147 | unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK; |
148 | switch (mode) { | 148 | switch (mode) { |
149 | case SVC_MODE: | 149 | case SVC_MODE: |
150 | return &vcpu->arch.regs.KVM_ARM_SVC_spsr; | 150 | return &vcpu->arch.ctxt.gp_regs.KVM_ARM_SVC_spsr; |
151 | case ABT_MODE: | 151 | case ABT_MODE: |
152 | return &vcpu->arch.regs.KVM_ARM_ABT_spsr; | 152 | return &vcpu->arch.ctxt.gp_regs.KVM_ARM_ABT_spsr; |
153 | case UND_MODE: | 153 | case UND_MODE: |
154 | return &vcpu->arch.regs.KVM_ARM_UND_spsr; | 154 | return &vcpu->arch.ctxt.gp_regs.KVM_ARM_UND_spsr; |
155 | case IRQ_MODE: | 155 | case IRQ_MODE: |
156 | return &vcpu->arch.regs.KVM_ARM_IRQ_spsr; | 156 | return &vcpu->arch.ctxt.gp_regs.KVM_ARM_IRQ_spsr; |
157 | case FIQ_MODE: | 157 | case FIQ_MODE: |
158 | return &vcpu->arch.regs.KVM_ARM_FIQ_spsr; | 158 | return &vcpu->arch.ctxt.gp_regs.KVM_ARM_FIQ_spsr; |
159 | default: | 159 | default: |
160 | BUG(); | 160 | BUG(); |
161 | } | 161 | } |
@@ -266,8 +266,8 @@ void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) | |||
266 | 266 | ||
267 | static u32 exc_vector_base(struct kvm_vcpu *vcpu) | 267 | static u32 exc_vector_base(struct kvm_vcpu *vcpu) |
268 | { | 268 | { |
269 | u32 sctlr = vcpu->arch.cp15[c1_SCTLR]; | 269 | u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); |
270 | u32 vbar = vcpu->arch.cp15[c12_VBAR]; | 270 | u32 vbar = vcpu_cp15(vcpu, c12_VBAR); |
271 | 271 | ||
272 | if (sctlr & SCTLR_V) | 272 | if (sctlr & SCTLR_V) |
273 | return 0xffff0000; | 273 | return 0xffff0000; |
@@ -282,7 +282,7 @@ static u32 exc_vector_base(struct kvm_vcpu *vcpu) | |||
282 | static void kvm_update_psr(struct kvm_vcpu *vcpu, unsigned long mode) | 282 | static void kvm_update_psr(struct kvm_vcpu *vcpu, unsigned long mode) |
283 | { | 283 | { |
284 | unsigned long cpsr = *vcpu_cpsr(vcpu); | 284 | unsigned long cpsr = *vcpu_cpsr(vcpu); |
285 | u32 sctlr = vcpu->arch.cp15[c1_SCTLR]; | 285 | u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); |
286 | 286 | ||
287 | *vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | mode; | 287 | *vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | mode; |
288 | 288 | ||
@@ -357,22 +357,22 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr) | |||
357 | 357 | ||
358 | if (is_pabt) { | 358 | if (is_pabt) { |
359 | /* Set IFAR and IFSR */ | 359 | /* Set IFAR and IFSR */ |
360 | vcpu->arch.cp15[c6_IFAR] = addr; | 360 | vcpu_cp15(vcpu, c6_IFAR) = addr; |
361 | is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31); | 361 | is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); |
362 | /* Always give debug fault for now - should give guest a clue */ | 362 | /* Always give debug fault for now - should give guest a clue */ |
363 | if (is_lpae) | 363 | if (is_lpae) |
364 | vcpu->arch.cp15[c5_IFSR] = 1 << 9 | 0x22; | 364 | vcpu_cp15(vcpu, c5_IFSR) = 1 << 9 | 0x22; |
365 | else | 365 | else |
366 | vcpu->arch.cp15[c5_IFSR] = 2; | 366 | vcpu_cp15(vcpu, c5_IFSR) = 2; |
367 | } else { /* !iabt */ | 367 | } else { /* !iabt */ |
368 | /* Set DFAR and DFSR */ | 368 | /* Set DFAR and DFSR */ |
369 | vcpu->arch.cp15[c6_DFAR] = addr; | 369 | vcpu_cp15(vcpu, c6_DFAR) = addr; |
370 | is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31); | 370 | is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); |
371 | /* Always give debug fault for now - should give guest a clue */ | 371 | /* Always give debug fault for now - should give guest a clue */ |
372 | if (is_lpae) | 372 | if (is_lpae) |
373 | vcpu->arch.cp15[c5_DFSR] = 1 << 9 | 0x22; | 373 | vcpu_cp15(vcpu, c5_DFSR) = 1 << 9 | 0x22; |
374 | else | 374 | else |
375 | vcpu->arch.cp15[c5_DFSR] = 2; | 375 | vcpu_cp15(vcpu, c5_DFSR) = 2; |
376 | } | 376 | } |
377 | 377 | ||
378 | } | 378 | } |
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 99361f11354a..9093ed0f8b2a 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c | |||
@@ -25,7 +25,6 @@ | |||
25 | #include <asm/cputype.h> | 25 | #include <asm/cputype.h> |
26 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
27 | #include <asm/kvm.h> | 27 | #include <asm/kvm.h> |
28 | #include <asm/kvm_asm.h> | ||
29 | #include <asm/kvm_emulate.h> | 28 | #include <asm/kvm_emulate.h> |
30 | #include <asm/kvm_coproc.h> | 29 | #include <asm/kvm_coproc.h> |
31 | 30 | ||
@@ -55,7 +54,7 @@ static u64 core_reg_offset_from_id(u64 id) | |||
55 | static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) | 54 | static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) |
56 | { | 55 | { |
57 | u32 __user *uaddr = (u32 __user *)(long)reg->addr; | 56 | u32 __user *uaddr = (u32 __user *)(long)reg->addr; |
58 | struct kvm_regs *regs = &vcpu->arch.regs; | 57 | struct kvm_regs *regs = &vcpu->arch.ctxt.gp_regs; |
59 | u64 off; | 58 | u64 off; |
60 | 59 | ||
61 | if (KVM_REG_SIZE(reg->id) != 4) | 60 | if (KVM_REG_SIZE(reg->id) != 4) |
@@ -72,7 +71,7 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) | |||
72 | static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) | 71 | static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) |
73 | { | 72 | { |
74 | u32 __user *uaddr = (u32 __user *)(long)reg->addr; | 73 | u32 __user *uaddr = (u32 __user *)(long)reg->addr; |
75 | struct kvm_regs *regs = &vcpu->arch.regs; | 74 | struct kvm_regs *regs = &vcpu->arch.ctxt.gp_regs; |
76 | u64 off, val; | 75 | u64 off, val; |
77 | 76 | ||
78 | if (KVM_REG_SIZE(reg->id) != 4) | 77 | if (KVM_REG_SIZE(reg->id) != 4) |
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 3ede90d8b20b..3f1ef0dbc899 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c | |||
@@ -147,13 +147,6 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, | |||
147 | switch (exception_index) { | 147 | switch (exception_index) { |
148 | case ARM_EXCEPTION_IRQ: | 148 | case ARM_EXCEPTION_IRQ: |
149 | return 1; | 149 | return 1; |
150 | case ARM_EXCEPTION_UNDEFINED: | ||
151 | kvm_err("Undefined exception in Hyp mode at: %#08lx\n", | ||
152 | kvm_vcpu_get_hyp_pc(vcpu)); | ||
153 | BUG(); | ||
154 | panic("KVM: Hypervisor undefined exception!\n"); | ||
155 | case ARM_EXCEPTION_DATA_ABORT: | ||
156 | case ARM_EXCEPTION_PREF_ABORT: | ||
157 | case ARM_EXCEPTION_HVC: | 150 | case ARM_EXCEPTION_HVC: |
158 | /* | 151 | /* |
159 | * See ARM ARM B1.14.1: "Hyp traps on instructions | 152 | * See ARM ARM B1.14.1: "Hyp traps on instructions |
diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile new file mode 100644 index 000000000000..8dfa5f7f9290 --- /dev/null +++ b/arch/arm/kvm/hyp/Makefile | |||
@@ -0,0 +1,17 @@ | |||
1 | # | ||
2 | # Makefile for Kernel-based Virtual Machine module, HYP part | ||
3 | # | ||
4 | |||
5 | KVM=../../../../virt/kvm | ||
6 | |||
7 | obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o | ||
8 | obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o | ||
9 | |||
10 | obj-$(CONFIG_KVM_ARM_HOST) += tlb.o | ||
11 | obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o | ||
12 | obj-$(CONFIG_KVM_ARM_HOST) += vfp.o | ||
13 | obj-$(CONFIG_KVM_ARM_HOST) += banked-sr.o | ||
14 | obj-$(CONFIG_KVM_ARM_HOST) += entry.o | ||
15 | obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o | ||
16 | obj-$(CONFIG_KVM_ARM_HOST) += switch.o | ||
17 | obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o | ||
diff --git a/arch/arm/kvm/hyp/banked-sr.c b/arch/arm/kvm/hyp/banked-sr.c new file mode 100644 index 000000000000..111bda8cdebd --- /dev/null +++ b/arch/arm/kvm/hyp/banked-sr.c | |||
@@ -0,0 +1,77 @@ | |||
1 | /* | ||
2 | * Original code: | ||
3 | * Copyright (C) 2012 - Virtual Open Systems and Columbia University | ||
4 | * Author: Christoffer Dall <c.dall@virtualopensystems.com> | ||
5 | * | ||
6 | * Mostly rewritten in C by Marc Zyngier <marc.zyngier@arm.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | #include <asm/kvm_hyp.h> | ||
22 | |||
23 | __asm__(".arch_extension virt"); | ||
24 | |||
25 | void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt) | ||
26 | { | ||
27 | ctxt->gp_regs.usr_regs.ARM_sp = read_special(SP_usr); | ||
28 | ctxt->gp_regs.usr_regs.ARM_pc = read_special(ELR_hyp); | ||
29 | ctxt->gp_regs.usr_regs.ARM_cpsr = read_special(SPSR); | ||
30 | ctxt->gp_regs.KVM_ARM_SVC_sp = read_special(SP_svc); | ||
31 | ctxt->gp_regs.KVM_ARM_SVC_lr = read_special(LR_svc); | ||
32 | ctxt->gp_regs.KVM_ARM_SVC_spsr = read_special(SPSR_svc); | ||
33 | ctxt->gp_regs.KVM_ARM_ABT_sp = read_special(SP_abt); | ||
34 | ctxt->gp_regs.KVM_ARM_ABT_lr = read_special(LR_abt); | ||
35 | ctxt->gp_regs.KVM_ARM_ABT_spsr = read_special(SPSR_abt); | ||
36 | ctxt->gp_regs.KVM_ARM_UND_sp = read_special(SP_und); | ||
37 | ctxt->gp_regs.KVM_ARM_UND_lr = read_special(LR_und); | ||
38 | ctxt->gp_regs.KVM_ARM_UND_spsr = read_special(SPSR_und); | ||
39 | ctxt->gp_regs.KVM_ARM_IRQ_sp = read_special(SP_irq); | ||
40 | ctxt->gp_regs.KVM_ARM_IRQ_lr = read_special(LR_irq); | ||
41 | ctxt->gp_regs.KVM_ARM_IRQ_spsr = read_special(SPSR_irq); | ||
42 | ctxt->gp_regs.KVM_ARM_FIQ_r8 = read_special(R8_fiq); | ||
43 | ctxt->gp_regs.KVM_ARM_FIQ_r9 = read_special(R9_fiq); | ||
44 | ctxt->gp_regs.KVM_ARM_FIQ_r10 = read_special(R10_fiq); | ||
45 | ctxt->gp_regs.KVM_ARM_FIQ_fp = read_special(R11_fiq); | ||
46 | ctxt->gp_regs.KVM_ARM_FIQ_ip = read_special(R12_fiq); | ||
47 | ctxt->gp_regs.KVM_ARM_FIQ_sp = read_special(SP_fiq); | ||
48 | ctxt->gp_regs.KVM_ARM_FIQ_lr = read_special(LR_fiq); | ||
49 | ctxt->gp_regs.KVM_ARM_FIQ_spsr = read_special(SPSR_fiq); | ||
50 | } | ||
51 | |||
52 | void __hyp_text __banked_restore_state(struct kvm_cpu_context *ctxt) | ||
53 | { | ||
54 | write_special(ctxt->gp_regs.usr_regs.ARM_sp, SP_usr); | ||
55 | write_special(ctxt->gp_regs.usr_regs.ARM_pc, ELR_hyp); | ||
56 | write_special(ctxt->gp_regs.usr_regs.ARM_cpsr, SPSR_cxsf); | ||
57 | write_special(ctxt->gp_regs.KVM_ARM_SVC_sp, SP_svc); | ||
58 | write_special(ctxt->gp_regs.KVM_ARM_SVC_lr, LR_svc); | ||
59 | write_special(ctxt->gp_regs.KVM_ARM_SVC_spsr, SPSR_svc); | ||
60 | write_special(ctxt->gp_regs.KVM_ARM_ABT_sp, SP_abt); | ||
61 | write_special(ctxt->gp_regs.KVM_ARM_ABT_lr, LR_abt); | ||
62 | write_special(ctxt->gp_regs.KVM_ARM_ABT_spsr, SPSR_abt); | ||
63 | write_special(ctxt->gp_regs.KVM_ARM_UND_sp, SP_und); | ||
64 | write_special(ctxt->gp_regs.KVM_ARM_UND_lr, LR_und); | ||
65 | write_special(ctxt->gp_regs.KVM_ARM_UND_spsr, SPSR_und); | ||
66 | write_special(ctxt->gp_regs.KVM_ARM_IRQ_sp, SP_irq); | ||
67 | write_special(ctxt->gp_regs.KVM_ARM_IRQ_lr, LR_irq); | ||
68 | write_special(ctxt->gp_regs.KVM_ARM_IRQ_spsr, SPSR_irq); | ||
69 | write_special(ctxt->gp_regs.KVM_ARM_FIQ_r8, R8_fiq); | ||
70 | write_special(ctxt->gp_regs.KVM_ARM_FIQ_r9, R9_fiq); | ||
71 | write_special(ctxt->gp_regs.KVM_ARM_FIQ_r10, R10_fiq); | ||
72 | write_special(ctxt->gp_regs.KVM_ARM_FIQ_fp, R11_fiq); | ||
73 | write_special(ctxt->gp_regs.KVM_ARM_FIQ_ip, R12_fiq); | ||
74 | write_special(ctxt->gp_regs.KVM_ARM_FIQ_sp, SP_fiq); | ||
75 | write_special(ctxt->gp_regs.KVM_ARM_FIQ_lr, LR_fiq); | ||
76 | write_special(ctxt->gp_regs.KVM_ARM_FIQ_spsr, SPSR_fiq); | ||
77 | } | ||
diff --git a/arch/arm/kvm/hyp/cp15-sr.c b/arch/arm/kvm/hyp/cp15-sr.c new file mode 100644 index 000000000000..c4782812714c --- /dev/null +++ b/arch/arm/kvm/hyp/cp15-sr.c | |||
@@ -0,0 +1,84 @@ | |||
1 | /* | ||
2 | * Original code: | ||
3 | * Copyright (C) 2012 - Virtual Open Systems and Columbia University | ||
4 | * Author: Christoffer Dall <c.dall@virtualopensystems.com> | ||
5 | * | ||
6 | * Mostly rewritten in C by Marc Zyngier <marc.zyngier@arm.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | #include <asm/kvm_hyp.h> | ||
22 | |||
23 | static u64 *cp15_64(struct kvm_cpu_context *ctxt, int idx) | ||
24 | { | ||
25 | return (u64 *)(ctxt->cp15 + idx); | ||
26 | } | ||
27 | |||
28 | void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) | ||
29 | { | ||
30 | ctxt->cp15[c0_MPIDR] = read_sysreg(VMPIDR); | ||
31 | ctxt->cp15[c0_CSSELR] = read_sysreg(CSSELR); | ||
32 | ctxt->cp15[c1_SCTLR] = read_sysreg(SCTLR); | ||
33 | ctxt->cp15[c1_CPACR] = read_sysreg(CPACR); | ||
34 | *cp15_64(ctxt, c2_TTBR0) = read_sysreg(TTBR0); | ||
35 | *cp15_64(ctxt, c2_TTBR1) = read_sysreg(TTBR1); | ||
36 | ctxt->cp15[c2_TTBCR] = read_sysreg(TTBCR); | ||
37 | ctxt->cp15[c3_DACR] = read_sysreg(DACR); | ||
38 | ctxt->cp15[c5_DFSR] = read_sysreg(DFSR); | ||
39 | ctxt->cp15[c5_IFSR] = read_sysreg(IFSR); | ||
40 | ctxt->cp15[c5_ADFSR] = read_sysreg(ADFSR); | ||
41 | ctxt->cp15[c5_AIFSR] = read_sysreg(AIFSR); | ||
42 | ctxt->cp15[c6_DFAR] = read_sysreg(DFAR); | ||
43 | ctxt->cp15[c6_IFAR] = read_sysreg(IFAR); | ||
44 | *cp15_64(ctxt, c7_PAR) = read_sysreg(PAR); | ||
45 | ctxt->cp15[c10_PRRR] = read_sysreg(PRRR); | ||
46 | ctxt->cp15[c10_NMRR] = read_sysreg(NMRR); | ||
47 | ctxt->cp15[c10_AMAIR0] = read_sysreg(AMAIR0); | ||
48 | ctxt->cp15[c10_AMAIR1] = read_sysreg(AMAIR1); | ||
49 | ctxt->cp15[c12_VBAR] = read_sysreg(VBAR); | ||
50 | ctxt->cp15[c13_CID] = read_sysreg(CID); | ||
51 | ctxt->cp15[c13_TID_URW] = read_sysreg(TID_URW); | ||
52 | ctxt->cp15[c13_TID_URO] = read_sysreg(TID_URO); | ||
53 | ctxt->cp15[c13_TID_PRIV] = read_sysreg(TID_PRIV); | ||
54 | ctxt->cp15[c14_CNTKCTL] = read_sysreg(CNTKCTL); | ||
55 | } | ||
56 | |||
57 | void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) | ||
58 | { | ||
59 | write_sysreg(ctxt->cp15[c0_MPIDR], VMPIDR); | ||
60 | write_sysreg(ctxt->cp15[c0_CSSELR], CSSELR); | ||
61 | write_sysreg(ctxt->cp15[c1_SCTLR], SCTLR); | ||
62 | write_sysreg(ctxt->cp15[c1_CPACR], CPACR); | ||
63 | write_sysreg(*cp15_64(ctxt, c2_TTBR0), TTBR0); | ||
64 | write_sysreg(*cp15_64(ctxt, c2_TTBR1), TTBR1); | ||
65 | write_sysreg(ctxt->cp15[c2_TTBCR], TTBCR); | ||
66 | write_sysreg(ctxt->cp15[c3_DACR], DACR); | ||
67 | write_sysreg(ctxt->cp15[c5_DFSR], DFSR); | ||
68 | write_sysreg(ctxt->cp15[c5_IFSR], IFSR); | ||
69 | write_sysreg(ctxt->cp15[c5_ADFSR], ADFSR); | ||
70 | write_sysreg(ctxt->cp15[c5_AIFSR], AIFSR); | ||
71 | write_sysreg(ctxt->cp15[c6_DFAR], DFAR); | ||
72 | write_sysreg(ctxt->cp15[c6_IFAR], IFAR); | ||
73 | write_sysreg(*cp15_64(ctxt, c7_PAR), PAR); | ||
74 | write_sysreg(ctxt->cp15[c10_PRRR], PRRR); | ||
75 | write_sysreg(ctxt->cp15[c10_NMRR], NMRR); | ||
76 | write_sysreg(ctxt->cp15[c10_AMAIR0], AMAIR0); | ||
77 | write_sysreg(ctxt->cp15[c10_AMAIR1], AMAIR1); | ||
78 | write_sysreg(ctxt->cp15[c12_VBAR], VBAR); | ||
79 | write_sysreg(ctxt->cp15[c13_CID], CID); | ||
80 | write_sysreg(ctxt->cp15[c13_TID_URW], TID_URW); | ||
81 | write_sysreg(ctxt->cp15[c13_TID_URO], TID_URO); | ||
82 | write_sysreg(ctxt->cp15[c13_TID_PRIV], TID_PRIV); | ||
83 | write_sysreg(ctxt->cp15[c14_CNTKCTL], CNTKCTL); | ||
84 | } | ||
diff --git a/arch/arm/kvm/hyp/entry.S b/arch/arm/kvm/hyp/entry.S new file mode 100644 index 000000000000..21c238871c9e --- /dev/null +++ b/arch/arm/kvm/hyp/entry.S | |||
@@ -0,0 +1,101 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2016 - ARM Ltd | ||
3 | * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
16 | */ | ||
17 | |||
18 | #include <linux/linkage.h> | ||
19 | #include <asm/asm-offsets.h> | ||
20 | #include <asm/kvm_arm.h> | ||
21 | |||
22 | .arch_extension virt | ||
23 | |||
24 | .text | ||
25 | .pushsection .hyp.text, "ax" | ||
26 | |||
27 | #define USR_REGS_OFFSET (CPU_CTXT_GP_REGS + GP_REGS_USR) | ||
28 | |||
29 | /* int __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host) */ | ||
30 | ENTRY(__guest_enter) | ||
31 | @ Save host registers | ||
32 | add r1, r1, #(USR_REGS_OFFSET + S_R4) | ||
33 | stm r1!, {r4-r12} | ||
34 | str lr, [r1, #4] @ Skip SP_usr (already saved) | ||
35 | |||
36 | @ Restore guest registers | ||
37 | add r0, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R0) | ||
38 | ldr lr, [r0, #S_LR] | ||
39 | ldm r0, {r0-r12} | ||
40 | |||
41 | clrex | ||
42 | eret | ||
43 | ENDPROC(__guest_enter) | ||
44 | |||
45 | ENTRY(__guest_exit) | ||
46 | /* | ||
47 | * return convention: | ||
48 | * guest r0, r1, r2 saved on the stack | ||
49 | * r0: vcpu pointer | ||
50 | * r1: exception code | ||
51 | */ | ||
52 | |||
53 | add r2, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R3) | ||
54 | stm r2!, {r3-r12} | ||
55 | str lr, [r2, #4] | ||
56 | add r2, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R0) | ||
57 | pop {r3, r4, r5} @ r0, r1, r2 | ||
58 | stm r2, {r3-r5} | ||
59 | |||
60 | ldr r0, [r0, #VCPU_HOST_CTXT] | ||
61 | add r0, r0, #(USR_REGS_OFFSET + S_R4) | ||
62 | ldm r0!, {r4-r12} | ||
63 | ldr lr, [r0, #4] | ||
64 | |||
65 | mov r0, r1 | ||
66 | bx lr | ||
67 | ENDPROC(__guest_exit) | ||
68 | |||
69 | /* | ||
70 | * If VFPv3 support is not available, then we will not switch the VFP | ||
71 | * registers; however cp10 and cp11 accesses will still trap and fallback | ||
72 | * to the regular coprocessor emulation code, which currently will | ||
73 | * inject an undefined exception to the guest. | ||
74 | */ | ||
75 | #ifdef CONFIG_VFPv3 | ||
76 | ENTRY(__vfp_guest_restore) | ||
77 | push {r3, r4, lr} | ||
78 | |||
79 | @ NEON/VFP used. Turn on VFP access. | ||
80 | mrc p15, 4, r1, c1, c1, 2 @ HCPTR | ||
81 | bic r1, r1, #(HCPTR_TCP(10) | HCPTR_TCP(11)) | ||
82 | mcr p15, 4, r1, c1, c1, 2 @ HCPTR | ||
83 | isb | ||
84 | |||
85 | @ Switch VFP/NEON hardware state to the guest's | ||
86 | mov r4, r0 | ||
87 | ldr r0, [r0, #VCPU_HOST_CTXT] | ||
88 | add r0, r0, #CPU_CTXT_VFP | ||
89 | bl __vfp_save_state | ||
90 | add r0, r4, #(VCPU_GUEST_CTXT + CPU_CTXT_VFP) | ||
91 | bl __vfp_restore_state | ||
92 | |||
93 | pop {r3, r4, lr} | ||
94 | pop {r0, r1, r2} | ||
95 | clrex | ||
96 | eret | ||
97 | ENDPROC(__vfp_guest_restore) | ||
98 | #endif | ||
99 | |||
100 | .popsection | ||
101 | |||
diff --git a/arch/arm/kvm/hyp/hyp-entry.S b/arch/arm/kvm/hyp/hyp-entry.S new file mode 100644 index 000000000000..78091383a5d9 --- /dev/null +++ b/arch/arm/kvm/hyp/hyp-entry.S | |||
@@ -0,0 +1,169 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2012 - Virtual Open Systems and Columbia University | ||
3 | * Author: Christoffer Dall <c.dall@virtualopensystems.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License, version 2, as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/linkage.h> | ||
20 | #include <asm/kvm_arm.h> | ||
21 | #include <asm/kvm_asm.h> | ||
22 | |||
23 | .arch_extension virt | ||
24 | |||
25 | .text | ||
26 | .pushsection .hyp.text, "ax" | ||
27 | |||
28 | .macro load_vcpu reg | ||
29 | mrc p15, 4, \reg, c13, c0, 2 @ HTPIDR | ||
30 | .endm | ||
31 | |||
32 | /******************************************************************** | ||
33 | * Hypervisor exception vector and handlers | ||
34 | * | ||
35 | * | ||
36 | * The KVM/ARM Hypervisor ABI is defined as follows: | ||
37 | * | ||
38 | * Entry to Hyp mode from the host kernel will happen _only_ when an HVC | ||
39 | * instruction is issued since all traps are disabled when running the host | ||
40 | * kernel as per the Hyp-mode initialization at boot time. | ||
41 | * | ||
42 | * HVC instructions cause a trap to the vector page + offset 0x14 (see hyp_hvc | ||
43 | * below) when the HVC instruction is called from SVC mode (i.e. a guest or the | ||
44 | * host kernel) and they cause a trap to the vector page + offset 0x8 when HVC | ||
45 | * instructions are called from within Hyp-mode. | ||
46 | * | ||
47 | * Hyp-ABI: Calling HYP-mode functions from host (in SVC mode): | ||
48 | * Switching to Hyp mode is done through a simple HVC #0 instruction. The | ||
49 | * exception vector code will check that the HVC comes from VMID==0. | ||
50 | * - r0 contains a pointer to a HYP function | ||
51 | * - r1, r2, and r3 contain arguments to the above function. | ||
52 | * - The HYP function will be called with its arguments in r0, r1 and r2. | ||
53 | * On HYP function return, we return directly to SVC. | ||
54 | * | ||
55 | * Note that the above is used to execute code in Hyp-mode from a host-kernel | ||
56 | * point of view, and is a different concept from performing a world-switch and | ||
57 | * executing guest code SVC mode (with a VMID != 0). | ||
58 | */ | ||
59 | |||
60 | .align 5 | ||
61 | __kvm_hyp_vector: | ||
62 | .global __kvm_hyp_vector | ||
63 | |||
64 | @ Hyp-mode exception vector | ||
65 | W(b) hyp_reset | ||
66 | W(b) hyp_undef | ||
67 | W(b) hyp_svc | ||
68 | W(b) hyp_pabt | ||
69 | W(b) hyp_dabt | ||
70 | W(b) hyp_hvc | ||
71 | W(b) hyp_irq | ||
72 | W(b) hyp_fiq | ||
73 | |||
74 | .macro invalid_vector label, cause | ||
75 | .align | ||
76 | \label: mov r0, #\cause | ||
77 | b __hyp_panic | ||
78 | .endm | ||
79 | |||
80 | invalid_vector hyp_reset ARM_EXCEPTION_RESET | ||
81 | invalid_vector hyp_undef ARM_EXCEPTION_UNDEFINED | ||
82 | invalid_vector hyp_svc ARM_EXCEPTION_SOFTWARE | ||
83 | invalid_vector hyp_pabt ARM_EXCEPTION_PREF_ABORT | ||
84 | invalid_vector hyp_dabt ARM_EXCEPTION_DATA_ABORT | ||
85 | invalid_vector hyp_fiq ARM_EXCEPTION_FIQ | ||
86 | |||
87 | ENTRY(__hyp_do_panic) | ||
88 | mrs lr, cpsr | ||
89 | bic lr, lr, #MODE_MASK | ||
90 | orr lr, lr, #SVC_MODE | ||
91 | THUMB( orr lr, lr, #PSR_T_BIT ) | ||
92 | msr spsr_cxsf, lr | ||
93 | ldr lr, =panic | ||
94 | msr ELR_hyp, lr | ||
95 | ldr lr, =kvm_call_hyp | ||
96 | clrex | ||
97 | eret | ||
98 | ENDPROC(__hyp_do_panic) | ||
99 | |||
100 | hyp_hvc: | ||
101 | /* | ||
102 | * Getting here is either because of a trap from a guest, | ||
103 | * or from executing HVC from the host kernel, which means | ||
104 | * "do something in Hyp mode". | ||
105 | */ | ||
106 | push {r0, r1, r2} | ||
107 | |||
108 | @ Check syndrome register | ||
109 | mrc p15, 4, r1, c5, c2, 0 @ HSR | ||
110 | lsr r0, r1, #HSR_EC_SHIFT | ||
111 | cmp r0, #HSR_EC_HVC | ||
112 | bne guest_trap @ Not HVC instr. | ||
113 | |||
114 | /* | ||
115 | * Let's check if the HVC came from VMID 0 and allow simple | ||
116 | * switch to Hyp mode | ||
117 | */ | ||
118 | mrrc p15, 6, r0, r2, c2 | ||
119 | lsr r2, r2, #16 | ||
120 | and r2, r2, #0xff | ||
121 | cmp r2, #0 | ||
122 | bne guest_trap @ Guest called HVC | ||
123 | |||
124 | /* | ||
125 | * Getting here means host called HVC, we shift parameters and branch | ||
126 | * to Hyp function. | ||
127 | */ | ||
128 | pop {r0, r1, r2} | ||
129 | |||
130 | /* Check for __hyp_get_vectors */ | ||
131 | cmp r0, #-1 | ||
132 | mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR | ||
133 | beq 1f | ||
134 | |||
135 | push {lr} | ||
136 | |||
137 | mov lr, r0 | ||
138 | mov r0, r1 | ||
139 | mov r1, r2 | ||
140 | mov r2, r3 | ||
141 | |||
142 | THUMB( orr lr, #1) | ||
143 | blx lr @ Call the HYP function | ||
144 | |||
145 | pop {lr} | ||
146 | 1: eret | ||
147 | |||
148 | guest_trap: | ||
149 | load_vcpu r0 @ Load VCPU pointer to r0 | ||
150 | |||
151 | #ifdef CONFIG_VFPv3 | ||
152 | @ Check for a VFP access | ||
153 | lsr r1, r1, #HSR_EC_SHIFT | ||
154 | cmp r1, #HSR_EC_CP_0_13 | ||
155 | beq __vfp_guest_restore | ||
156 | #endif | ||
157 | |||
158 | mov r1, #ARM_EXCEPTION_HVC | ||
159 | b __guest_exit | ||
160 | |||
161 | hyp_irq: | ||
162 | push {r0, r1, r2} | ||
163 | mov r1, #ARM_EXCEPTION_IRQ | ||
164 | load_vcpu r0 @ Load VCPU pointer to r0 | ||
165 | b __guest_exit | ||
166 | |||
167 | .ltorg | ||
168 | |||
169 | .popsection | ||
diff --git a/arch/arm/kvm/hyp/s2-setup.c b/arch/arm/kvm/hyp/s2-setup.c new file mode 100644 index 000000000000..7be39af2ed6c --- /dev/null +++ b/arch/arm/kvm/hyp/s2-setup.c | |||
@@ -0,0 +1,33 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2016 - ARM Ltd | ||
3 | * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
16 | */ | ||
17 | |||
18 | #include <linux/types.h> | ||
19 | #include <asm/kvm_arm.h> | ||
20 | #include <asm/kvm_asm.h> | ||
21 | #include <asm/kvm_hyp.h> | ||
22 | |||
23 | void __hyp_text __init_stage2_translation(void) | ||
24 | { | ||
25 | u64 val; | ||
26 | |||
27 | val = read_sysreg(VTCR) & ~VTCR_MASK; | ||
28 | |||
29 | val |= read_sysreg(HTCR) & VTCR_HTCR_SH; | ||
30 | val |= KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S; | ||
31 | |||
32 | write_sysreg(val, VTCR); | ||
33 | } | ||
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c new file mode 100644 index 000000000000..b13caa90cd44 --- /dev/null +++ b/arch/arm/kvm/hyp/switch.c | |||
@@ -0,0 +1,232 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015 - ARM Ltd | ||
3 | * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
16 | */ | ||
17 | |||
18 | #include <asm/kvm_asm.h> | ||
19 | #include <asm/kvm_hyp.h> | ||
20 | |||
21 | __asm__(".arch_extension virt"); | ||
22 | |||
23 | /* | ||
24 | * Activate the traps, saving the host's fpexc register before | ||
25 | * overwriting it. We'll restore it on VM exit. | ||
26 | */ | ||
27 | static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu, u32 *fpexc_host) | ||
28 | { | ||
29 | u32 val; | ||
30 | |||
31 | /* | ||
32 | * We are about to set HCPTR.TCP10/11 to trap all floating point | ||
33 | * register accesses to HYP, however, the ARM ARM clearly states that | ||
34 | * traps are only taken to HYP if the operation would not otherwise | ||
35 | * trap to SVC. Therefore, always make sure that for 32-bit guests, | ||
36 | * we set FPEXC.EN to prevent traps to SVC, when setting the TCP bits. | ||
37 | */ | ||
38 | val = read_sysreg(VFP_FPEXC); | ||
39 | *fpexc_host = val; | ||
40 | if (!(val & FPEXC_EN)) { | ||
41 | write_sysreg(val | FPEXC_EN, VFP_FPEXC); | ||
42 | isb(); | ||
43 | } | ||
44 | |||
45 | write_sysreg(vcpu->arch.hcr | vcpu->arch.irq_lines, HCR); | ||
46 | /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ | ||
47 | write_sysreg(HSTR_T(15), HSTR); | ||
48 | write_sysreg(HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11), HCPTR); | ||
49 | val = read_sysreg(HDCR); | ||
50 | write_sysreg(val | HDCR_TPM | HDCR_TPMCR, HDCR); | ||
51 | } | ||
52 | |||
53 | static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) | ||
54 | { | ||
55 | u32 val; | ||
56 | |||
57 | write_sysreg(0, HCR); | ||
58 | write_sysreg(0, HSTR); | ||
59 | val = read_sysreg(HDCR); | ||
60 | write_sysreg(val & ~(HDCR_TPM | HDCR_TPMCR), HDCR); | ||
61 | write_sysreg(0, HCPTR); | ||
62 | } | ||
63 | |||
64 | static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) | ||
65 | { | ||
66 | struct kvm *kvm = kern_hyp_va(vcpu->kvm); | ||
67 | write_sysreg(kvm->arch.vttbr, VTTBR); | ||
68 | write_sysreg(vcpu->arch.midr, VPIDR); | ||
69 | } | ||
70 | |||
71 | static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) | ||
72 | { | ||
73 | write_sysreg(0, VTTBR); | ||
74 | write_sysreg(read_sysreg(MIDR), VPIDR); | ||
75 | } | ||
76 | |||
77 | static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu) | ||
78 | { | ||
79 | __vgic_v2_save_state(vcpu); | ||
80 | } | ||
81 | |||
82 | static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) | ||
83 | { | ||
84 | __vgic_v2_restore_state(vcpu); | ||
85 | } | ||
86 | |||
87 | static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) | ||
88 | { | ||
89 | u32 hsr = read_sysreg(HSR); | ||
90 | u8 ec = hsr >> HSR_EC_SHIFT; | ||
91 | u32 hpfar, far; | ||
92 | |||
93 | vcpu->arch.fault.hsr = hsr; | ||
94 | |||
95 | if (ec == HSR_EC_IABT) | ||
96 | far = read_sysreg(HIFAR); | ||
97 | else if (ec == HSR_EC_DABT) | ||
98 | far = read_sysreg(HDFAR); | ||
99 | else | ||
100 | return true; | ||
101 | |||
102 | /* | ||
103 | * B3.13.5 Reporting exceptions taken to the Non-secure PL2 mode: | ||
104 | * | ||
105 | * Abort on the stage 2 translation for a memory access from a | ||
106 | * Non-secure PL1 or PL0 mode: | ||
107 | * | ||
108 | * For any Access flag fault or Translation fault, and also for any | ||
109 | * Permission fault on the stage 2 translation of a memory access | ||
110 | * made as part of a translation table walk for a stage 1 translation, | ||
111 | * the HPFAR holds the IPA that caused the fault. Otherwise, the HPFAR | ||
112 | * is UNKNOWN. | ||
113 | */ | ||
114 | if (!(hsr & HSR_DABT_S1PTW) && (hsr & HSR_FSC_TYPE) == FSC_PERM) { | ||
115 | u64 par, tmp; | ||
116 | |||
117 | par = read_sysreg(PAR); | ||
118 | write_sysreg(far, ATS1CPR); | ||
119 | isb(); | ||
120 | |||
121 | tmp = read_sysreg(PAR); | ||
122 | write_sysreg(par, PAR); | ||
123 | |||
124 | if (unlikely(tmp & 1)) | ||
125 | return false; /* Translation failed, back to guest */ | ||
126 | |||
127 | hpfar = ((tmp >> 12) & ((1UL << 28) - 1)) << 4; | ||
128 | } else { | ||
129 | hpfar = read_sysreg(HPFAR); | ||
130 | } | ||
131 | |||
132 | vcpu->arch.fault.hxfar = far; | ||
133 | vcpu->arch.fault.hpfar = hpfar; | ||
134 | return true; | ||
135 | } | ||
136 | |||
137 | static int __hyp_text __guest_run(struct kvm_vcpu *vcpu) | ||
138 | { | ||
139 | struct kvm_cpu_context *host_ctxt; | ||
140 | struct kvm_cpu_context *guest_ctxt; | ||
141 | bool fp_enabled; | ||
142 | u64 exit_code; | ||
143 | u32 fpexc; | ||
144 | |||
145 | vcpu = kern_hyp_va(vcpu); | ||
146 | write_sysreg(vcpu, HTPIDR); | ||
147 | |||
148 | host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); | ||
149 | guest_ctxt = &vcpu->arch.ctxt; | ||
150 | |||
151 | __sysreg_save_state(host_ctxt); | ||
152 | __banked_save_state(host_ctxt); | ||
153 | |||
154 | __activate_traps(vcpu, &fpexc); | ||
155 | __activate_vm(vcpu); | ||
156 | |||
157 | __vgic_restore_state(vcpu); | ||
158 | __timer_restore_state(vcpu); | ||
159 | |||
160 | __sysreg_restore_state(guest_ctxt); | ||
161 | __banked_restore_state(guest_ctxt); | ||
162 | |||
163 | /* Jump in the fire! */ | ||
164 | again: | ||
165 | exit_code = __guest_enter(vcpu, host_ctxt); | ||
166 | /* And we're baaack! */ | ||
167 | |||
168 | if (exit_code == ARM_EXCEPTION_HVC && !__populate_fault_info(vcpu)) | ||
169 | goto again; | ||
170 | |||
171 | fp_enabled = __vfp_enabled(); | ||
172 | |||
173 | __banked_save_state(guest_ctxt); | ||
174 | __sysreg_save_state(guest_ctxt); | ||
175 | __timer_save_state(vcpu); | ||
176 | __vgic_save_state(vcpu); | ||
177 | |||
178 | __deactivate_traps(vcpu); | ||
179 | __deactivate_vm(vcpu); | ||
180 | |||
181 | __banked_restore_state(host_ctxt); | ||
182 | __sysreg_restore_state(host_ctxt); | ||
183 | |||
184 | if (fp_enabled) { | ||
185 | __vfp_save_state(&guest_ctxt->vfp); | ||
186 | __vfp_restore_state(&host_ctxt->vfp); | ||
187 | } | ||
188 | |||
189 | write_sysreg(fpexc, VFP_FPEXC); | ||
190 | |||
191 | return exit_code; | ||
192 | } | ||
193 | |||
194 | __alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu); | ||
195 | |||
196 | static const char * const __hyp_panic_string[] = { | ||
197 | [ARM_EXCEPTION_RESET] = "\nHYP panic: RST PC:%08x CPSR:%08x", | ||
198 | [ARM_EXCEPTION_UNDEFINED] = "\nHYP panic: UNDEF PC:%08x CPSR:%08x", | ||
199 | [ARM_EXCEPTION_SOFTWARE] = "\nHYP panic: SVC PC:%08x CPSR:%08x", | ||
200 | [ARM_EXCEPTION_PREF_ABORT] = "\nHYP panic: PABRT PC:%08x CPSR:%08x", | ||
201 | [ARM_EXCEPTION_DATA_ABORT] = "\nHYP panic: DABRT PC:%08x ADDR:%08x", | ||
202 | [ARM_EXCEPTION_IRQ] = "\nHYP panic: IRQ PC:%08x CPSR:%08x", | ||
203 | [ARM_EXCEPTION_FIQ] = "\nHYP panic: FIQ PC:%08x CPSR:%08x", | ||
204 | [ARM_EXCEPTION_HVC] = "\nHYP panic: HVC PC:%08x CPSR:%08x", | ||
205 | }; | ||
206 | |||
207 | void __hyp_text __noreturn __hyp_panic(int cause) | ||
208 | { | ||
209 | u32 elr = read_special(ELR_hyp); | ||
210 | u32 val; | ||
211 | |||
212 | if (cause == ARM_EXCEPTION_DATA_ABORT) | ||
213 | val = read_sysreg(HDFAR); | ||
214 | else | ||
215 | val = read_special(SPSR); | ||
216 | |||
217 | if (read_sysreg(VTTBR)) { | ||
218 | struct kvm_vcpu *vcpu; | ||
219 | struct kvm_cpu_context *host_ctxt; | ||
220 | |||
221 | vcpu = (struct kvm_vcpu *)read_sysreg(HTPIDR); | ||
222 | host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); | ||
223 | __deactivate_traps(vcpu); | ||
224 | __deactivate_vm(vcpu); | ||
225 | __sysreg_restore_state(host_ctxt); | ||
226 | } | ||
227 | |||
228 | /* Call panic for real */ | ||
229 | __hyp_do_panic(__hyp_panic_string[cause], elr, val); | ||
230 | |||
231 | unreachable(); | ||
232 | } | ||
diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c new file mode 100644 index 000000000000..a2636001e616 --- /dev/null +++ b/arch/arm/kvm/hyp/tlb.c | |||
@@ -0,0 +1,70 @@ | |||
1 | /* | ||
2 | * Original code: | ||
3 | * Copyright (C) 2012 - Virtual Open Systems and Columbia University | ||
4 | * Author: Christoffer Dall <c.dall@virtualopensystems.com> | ||
5 | * | ||
6 | * Mostly rewritten in C by Marc Zyngier <marc.zyngier@arm.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | */ | ||
20 | |||
21 | #include <asm/kvm_hyp.h> | ||
22 | |||
23 | /** | ||
24 | * Flush per-VMID TLBs | ||
25 | * | ||
26 | * __kvm_tlb_flush_vmid(struct kvm *kvm); | ||
27 | * | ||
28 | * We rely on the hardware to broadcast the TLB invalidation to all CPUs | ||
29 | * inside the inner-shareable domain (which is the case for all v7 | ||
30 | * implementations). If we come across a non-IS SMP implementation, we'll | ||
31 | * have to use an IPI based mechanism. Until then, we stick to the simple | ||
32 | * hardware assisted version. | ||
33 | * | ||
34 | * As v7 does not support flushing per IPA, just nuke the whole TLB | ||
35 | * instead, ignoring the ipa value. | ||
36 | */ | ||
37 | static void __hyp_text __tlb_flush_vmid(struct kvm *kvm) | ||
38 | { | ||
39 | dsb(ishst); | ||
40 | |||
41 | /* Switch to requested VMID */ | ||
42 | kvm = kern_hyp_va(kvm); | ||
43 | write_sysreg(kvm->arch.vttbr, VTTBR); | ||
44 | isb(); | ||
45 | |||
46 | write_sysreg(0, TLBIALLIS); | ||
47 | dsb(ish); | ||
48 | isb(); | ||
49 | |||
50 | write_sysreg(0, VTTBR); | ||
51 | } | ||
52 | |||
53 | __alias(__tlb_flush_vmid) void __kvm_tlb_flush_vmid(struct kvm *kvm); | ||
54 | |||
55 | static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) | ||
56 | { | ||
57 | __tlb_flush_vmid(kvm); | ||
58 | } | ||
59 | |||
60 | __alias(__tlb_flush_vmid_ipa) void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, | ||
61 | phys_addr_t ipa); | ||
62 | |||
63 | static void __hyp_text __tlb_flush_vm_context(void) | ||
64 | { | ||
65 | write_sysreg(0, TLBIALLNSNHIS); | ||
66 | write_sysreg(0, ICIALLUIS); | ||
67 | dsb(ish); | ||
68 | } | ||
69 | |||
70 | __alias(__tlb_flush_vm_context) void __kvm_flush_vm_context(void); | ||
diff --git a/arch/arm/kvm/hyp/vfp.S b/arch/arm/kvm/hyp/vfp.S new file mode 100644 index 000000000000..7c297e87eb8b --- /dev/null +++ b/arch/arm/kvm/hyp/vfp.S | |||
@@ -0,0 +1,68 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2012 - Virtual Open Systems and Columbia University | ||
3 | * Author: Christoffer Dall <c.dall@virtualopensystems.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
16 | */ | ||
17 | |||
18 | #include <linux/linkage.h> | ||
19 | #include <asm/vfpmacros.h> | ||
20 | |||
21 | .text | ||
22 | .pushsection .hyp.text, "ax" | ||
23 | |||
24 | /* void __vfp_save_state(struct vfp_hard_struct *vfp); */ | ||
25 | ENTRY(__vfp_save_state) | ||
26 | push {r4, r5} | ||
27 | VFPFMRX r1, FPEXC | ||
28 | |||
29 | @ Make sure *really* VFP is enabled so we can touch the registers. | ||
30 | orr r5, r1, #FPEXC_EN | ||
31 | tst r5, #FPEXC_EX @ Check for VFP Subarchitecture | ||
32 | bic r5, r5, #FPEXC_EX @ FPEXC_EX disable | ||
33 | VFPFMXR FPEXC, r5 | ||
34 | isb | ||
35 | |||
36 | VFPFMRX r2, FPSCR | ||
37 | beq 1f | ||
38 | |||
39 | @ If FPEXC_EX is 0, then FPINST/FPINST2 reads are upredictable, so | ||
40 | @ we only need to save them if FPEXC_EX is set. | ||
41 | VFPFMRX r3, FPINST | ||
42 | tst r5, #FPEXC_FP2V | ||
43 | VFPFMRX r4, FPINST2, ne @ vmrsne | ||
44 | 1: | ||
45 | VFPFSTMIA r0, r5 @ Save VFP registers | ||
46 | stm r0, {r1-r4} @ Save FPEXC, FPSCR, FPINST, FPINST2 | ||
47 | pop {r4, r5} | ||
48 | bx lr | ||
49 | ENDPROC(__vfp_save_state) | ||
50 | |||
51 | /* void __vfp_restore_state(struct vfp_hard_struct *vfp); | ||
52 | * Assume FPEXC_EN is on and FPEXC_EX is off */ | ||
53 | ENTRY(__vfp_restore_state) | ||
54 | VFPFLDMIA r0, r1 @ Load VFP registers | ||
55 | ldm r0, {r0-r3} @ Load FPEXC, FPSCR, FPINST, FPINST2 | ||
56 | |||
57 | VFPFMXR FPSCR, r1 | ||
58 | tst r0, #FPEXC_EX @ Check for VFP Subarchitecture | ||
59 | beq 1f | ||
60 | VFPFMXR FPINST, r2 | ||
61 | tst r0, #FPEXC_FP2V | ||
62 | VFPFMXR FPINST2, r3, ne | ||
63 | 1: | ||
64 | VFPFMXR FPEXC, r0 @ FPEXC (last, in case !EN) | ||
65 | bx lr | ||
66 | ENDPROC(__vfp_restore_state) | ||
67 | |||
68 | .popsection | ||
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index 3988e72d16ff..1f9ae17476f9 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S | |||
@@ -84,14 +84,6 @@ __do_hyp_init: | |||
84 | orr r0, r0, r1 | 84 | orr r0, r0, r1 |
85 | mcr p15, 4, r0, c2, c0, 2 @ HTCR | 85 | mcr p15, 4, r0, c2, c0, 2 @ HTCR |
86 | 86 | ||
87 | mrc p15, 4, r1, c2, c1, 2 @ VTCR | ||
88 | ldr r2, =VTCR_MASK | ||
89 | bic r1, r1, r2 | ||
90 | bic r0, r0, #(~VTCR_HTCR_SH) @ clear non-reusable HTCR bits | ||
91 | orr r1, r0, r1 | ||
92 | orr r1, r1, #(KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S) | ||
93 | mcr p15, 4, r1, c2, c1, 2 @ VTCR | ||
94 | |||
95 | @ Use the same memory attributes for hyp. accesses as the kernel | 87 | @ Use the same memory attributes for hyp. accesses as the kernel |
96 | @ (copy MAIRx ro HMAIRx). | 88 | @ (copy MAIRx ro HMAIRx). |
97 | mrc p15, 0, r0, c10, c2, 0 | 89 | mrc p15, 0, r0, c10, c2, 0 |
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 900ef6dd8f72..b1bd316f14c0 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S | |||
@@ -17,211 +17,14 @@ | |||
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/linkage.h> | 19 | #include <linux/linkage.h> |
20 | #include <linux/const.h> | ||
21 | #include <asm/unified.h> | ||
22 | #include <asm/page.h> | ||
23 | #include <asm/ptrace.h> | ||
24 | #include <asm/asm-offsets.h> | ||
25 | #include <asm/kvm_asm.h> | ||
26 | #include <asm/kvm_arm.h> | ||
27 | #include <asm/vfpmacros.h> | ||
28 | #include "interrupts_head.S" | ||
29 | 20 | ||
30 | .text | 21 | .text |
31 | 22 | ||
32 | __kvm_hyp_code_start: | ||
33 | .globl __kvm_hyp_code_start | ||
34 | |||
35 | /******************************************************************** | ||
36 | * Flush per-VMID TLBs | ||
37 | * | ||
38 | * void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); | ||
39 | * | ||
40 | * We rely on the hardware to broadcast the TLB invalidation to all CPUs | ||
41 | * inside the inner-shareable domain (which is the case for all v7 | ||
42 | * implementations). If we come across a non-IS SMP implementation, we'll | ||
43 | * have to use an IPI based mechanism. Until then, we stick to the simple | ||
44 | * hardware assisted version. | ||
45 | * | ||
46 | * As v7 does not support flushing per IPA, just nuke the whole TLB | ||
47 | * instead, ignoring the ipa value. | ||
48 | */ | ||
49 | ENTRY(__kvm_tlb_flush_vmid_ipa) | ||
50 | push {r2, r3} | ||
51 | |||
52 | dsb ishst | ||
53 | add r0, r0, #KVM_VTTBR | ||
54 | ldrd r2, r3, [r0] | ||
55 | mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR | ||
56 | isb | ||
57 | mcr p15, 0, r0, c8, c3, 0 @ TLBIALLIS (rt ignored) | ||
58 | dsb ish | ||
59 | isb | ||
60 | mov r2, #0 | ||
61 | mov r3, #0 | ||
62 | mcrr p15, 6, r2, r3, c2 @ Back to VMID #0 | ||
63 | isb @ Not necessary if followed by eret | ||
64 | |||
65 | pop {r2, r3} | ||
66 | bx lr | ||
67 | ENDPROC(__kvm_tlb_flush_vmid_ipa) | ||
68 | |||
69 | /** | ||
70 | * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs | ||
71 | * | ||
72 | * Reuses __kvm_tlb_flush_vmid_ipa() for ARMv7, without passing address | ||
73 | * parameter | ||
74 | */ | ||
75 | |||
76 | ENTRY(__kvm_tlb_flush_vmid) | ||
77 | b __kvm_tlb_flush_vmid_ipa | ||
78 | ENDPROC(__kvm_tlb_flush_vmid) | ||
79 | |||
80 | /******************************************************************** | ||
81 | * Flush TLBs and instruction caches of all CPUs inside the inner-shareable | ||
82 | * domain, for all VMIDs | ||
83 | * | ||
84 | * void __kvm_flush_vm_context(void); | ||
85 | */ | ||
86 | ENTRY(__kvm_flush_vm_context) | ||
87 | mov r0, #0 @ rn parameter for c15 flushes is SBZ | ||
88 | |||
89 | /* Invalidate NS Non-Hyp TLB Inner Shareable (TLBIALLNSNHIS) */ | ||
90 | mcr p15, 4, r0, c8, c3, 4 | ||
91 | /* Invalidate instruction caches Inner Shareable (ICIALLUIS) */ | ||
92 | mcr p15, 0, r0, c7, c1, 0 | ||
93 | dsb ish | ||
94 | isb @ Not necessary if followed by eret | ||
95 | |||
96 | bx lr | ||
97 | ENDPROC(__kvm_flush_vm_context) | ||
98 | |||
99 | |||
100 | /******************************************************************** | ||
101 | * Hypervisor world-switch code | ||
102 | * | ||
103 | * | ||
104 | * int __kvm_vcpu_run(struct kvm_vcpu *vcpu) | ||
105 | */ | ||
106 | ENTRY(__kvm_vcpu_run) | ||
107 | @ Save the vcpu pointer | ||
108 | mcr p15, 4, vcpu, c13, c0, 2 @ HTPIDR | ||
109 | |||
110 | save_host_regs | ||
111 | |||
112 | restore_vgic_state | ||
113 | restore_timer_state | ||
114 | |||
115 | @ Store hardware CP15 state and load guest state | ||
116 | read_cp15_state store_to_vcpu = 0 | ||
117 | write_cp15_state read_from_vcpu = 1 | ||
118 | |||
119 | @ If the host kernel has not been configured with VFPv3 support, | ||
120 | @ then it is safer if we deny guests from using it as well. | ||
121 | #ifdef CONFIG_VFPv3 | ||
122 | @ Set FPEXC_EN so the guest doesn't trap floating point instructions | ||
123 | VFPFMRX r2, FPEXC @ VMRS | ||
124 | push {r2} | ||
125 | orr r2, r2, #FPEXC_EN | ||
126 | VFPFMXR FPEXC, r2 @ VMSR | ||
127 | #endif | ||
128 | |||
129 | @ Configure Hyp-role | ||
130 | configure_hyp_role vmentry | ||
131 | |||
132 | @ Trap coprocessor CRx accesses | ||
133 | set_hstr vmentry | ||
134 | set_hcptr vmentry, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)) | ||
135 | set_hdcr vmentry | ||
136 | |||
137 | @ Write configured ID register into MIDR alias | ||
138 | ldr r1, [vcpu, #VCPU_MIDR] | ||
139 | mcr p15, 4, r1, c0, c0, 0 | ||
140 | |||
141 | @ Write guest view of MPIDR into VMPIDR | ||
142 | ldr r1, [vcpu, #CP15_OFFSET(c0_MPIDR)] | ||
143 | mcr p15, 4, r1, c0, c0, 5 | ||
144 | |||
145 | @ Set up guest memory translation | ||
146 | ldr r1, [vcpu, #VCPU_KVM] | ||
147 | add r1, r1, #KVM_VTTBR | ||
148 | ldrd r2, r3, [r1] | ||
149 | mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR | ||
150 | |||
151 | @ We're all done, just restore the GPRs and go to the guest | ||
152 | restore_guest_regs | ||
153 | clrex @ Clear exclusive monitor | ||
154 | eret | ||
155 | |||
156 | __kvm_vcpu_return: | ||
157 | /* | ||
158 | * return convention: | ||
159 | * guest r0, r1, r2 saved on the stack | ||
160 | * r0: vcpu pointer | ||
161 | * r1: exception code | ||
162 | */ | ||
163 | save_guest_regs | ||
164 | |||
165 | @ Set VMID == 0 | ||
166 | mov r2, #0 | ||
167 | mov r3, #0 | ||
168 | mcrr p15, 6, r2, r3, c2 @ Write VTTBR | ||
169 | |||
170 | @ Don't trap coprocessor accesses for host kernel | ||
171 | set_hstr vmexit | ||
172 | set_hdcr vmexit | ||
173 | set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)), after_vfp_restore | ||
174 | |||
175 | #ifdef CONFIG_VFPv3 | ||
176 | @ Switch VFP/NEON hardware state to the host's | ||
177 | add r7, vcpu, #VCPU_VFP_GUEST | ||
178 | store_vfp_state r7 | ||
179 | add r7, vcpu, #VCPU_VFP_HOST | ||
180 | ldr r7, [r7] | ||
181 | restore_vfp_state r7 | ||
182 | |||
183 | after_vfp_restore: | ||
184 | @ Restore FPEXC_EN which we clobbered on entry | ||
185 | pop {r2} | ||
186 | VFPFMXR FPEXC, r2 | ||
187 | #else | ||
188 | after_vfp_restore: | ||
189 | #endif | ||
190 | |||
191 | @ Reset Hyp-role | ||
192 | configure_hyp_role vmexit | ||
193 | |||
194 | @ Let host read hardware MIDR | ||
195 | mrc p15, 0, r2, c0, c0, 0 | ||
196 | mcr p15, 4, r2, c0, c0, 0 | ||
197 | |||
198 | @ Back to hardware MPIDR | ||
199 | mrc p15, 0, r2, c0, c0, 5 | ||
200 | mcr p15, 4, r2, c0, c0, 5 | ||
201 | |||
202 | @ Store guest CP15 state and restore host state | ||
203 | read_cp15_state store_to_vcpu = 1 | ||
204 | write_cp15_state read_from_vcpu = 0 | ||
205 | |||
206 | save_timer_state | ||
207 | save_vgic_state | ||
208 | |||
209 | restore_host_regs | ||
210 | clrex @ Clear exclusive monitor | ||
211 | #ifndef CONFIG_CPU_ENDIAN_BE8 | ||
212 | mov r0, r1 @ Return the return code | ||
213 | mov r1, #0 @ Clear upper bits in return value | ||
214 | #else | ||
215 | @ r1 already has return code | ||
216 | mov r0, #0 @ Clear upper bits in return value | ||
217 | #endif /* CONFIG_CPU_ENDIAN_BE8 */ | ||
218 | bx lr @ return to IOCTL | ||
219 | |||
220 | /******************************************************************** | 23 | /******************************************************************** |
221 | * Call function in Hyp mode | 24 | * Call function in Hyp mode |
222 | * | 25 | * |
223 | * | 26 | * |
224 | * u64 kvm_call_hyp(void *hypfn, ...); | 27 | * unsigned long kvm_call_hyp(void *hypfn, ...); |
225 | * | 28 | * |
226 | * This is not really a variadic function in the classic C-way and care must | 29 | * This is not really a variadic function in the classic C-way and care must |
227 | * be taken when calling this to ensure parameters are passed in registers | 30 | * be taken when calling this to ensure parameters are passed in registers |
@@ -232,7 +35,7 @@ after_vfp_restore: | |||
232 | * passed as r0, r1, and r2 (a maximum of 3 arguments in addition to the | 35 | * passed as r0, r1, and r2 (a maximum of 3 arguments in addition to the |
233 | * function pointer can be passed). The function being called must be mapped | 36 | * function pointer can be passed). The function being called must be mapped |
234 | * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are | 37 | * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are |
235 | * passed in r0 and r1. | 38 | * passed in r0 (strictly 32bit). |
236 | * | 39 | * |
237 | * A function pointer with a value of 0xffffffff has a special meaning, | 40 | * A function pointer with a value of 0xffffffff has a special meaning, |
238 | * and is used to implement __hyp_get_vectors in the same way as in | 41 | * and is used to implement __hyp_get_vectors in the same way as in |
@@ -246,281 +49,4 @@ after_vfp_restore: | |||
246 | ENTRY(kvm_call_hyp) | 49 | ENTRY(kvm_call_hyp) |
247 | hvc #0 | 50 | hvc #0 |
248 | bx lr | 51 | bx lr |
249 | 52 | ENDPROC(kvm_call_hyp) | |
250 | /******************************************************************** | ||
251 | * Hypervisor exception vector and handlers | ||
252 | * | ||
253 | * | ||
254 | * The KVM/ARM Hypervisor ABI is defined as follows: | ||
255 | * | ||
256 | * Entry to Hyp mode from the host kernel will happen _only_ when an HVC | ||
257 | * instruction is issued since all traps are disabled when running the host | ||
258 | * kernel as per the Hyp-mode initialization at boot time. | ||
259 | * | ||
260 | * HVC instructions cause a trap to the vector page + offset 0x14 (see hyp_hvc | ||
261 | * below) when the HVC instruction is called from SVC mode (i.e. a guest or the | ||
262 | * host kernel) and they cause a trap to the vector page + offset 0x8 when HVC | ||
263 | * instructions are called from within Hyp-mode. | ||
264 | * | ||
265 | * Hyp-ABI: Calling HYP-mode functions from host (in SVC mode): | ||
266 | * Switching to Hyp mode is done through a simple HVC #0 instruction. The | ||
267 | * exception vector code will check that the HVC comes from VMID==0 and if | ||
268 | * so will push the necessary state (SPSR, lr_usr) on the Hyp stack. | ||
269 | * - r0 contains a pointer to a HYP function | ||
270 | * - r1, r2, and r3 contain arguments to the above function. | ||
271 | * - The HYP function will be called with its arguments in r0, r1 and r2. | ||
272 | * On HYP function return, we return directly to SVC. | ||
273 | * | ||
274 | * Note that the above is used to execute code in Hyp-mode from a host-kernel | ||
275 | * point of view, and is a different concept from performing a world-switch and | ||
276 | * executing guest code SVC mode (with a VMID != 0). | ||
277 | */ | ||
278 | |||
279 | /* Handle undef, svc, pabt, or dabt by crashing with a user notice */ | ||
280 | .macro bad_exception exception_code, panic_str | ||
281 | push {r0-r2} | ||
282 | mrrc p15, 6, r0, r1, c2 @ Read VTTBR | ||
283 | lsr r1, r1, #16 | ||
284 | ands r1, r1, #0xff | ||
285 | beq 99f | ||
286 | |||
287 | load_vcpu @ Load VCPU pointer | ||
288 | .if \exception_code == ARM_EXCEPTION_DATA_ABORT | ||
289 | mrc p15, 4, r2, c5, c2, 0 @ HSR | ||
290 | mrc p15, 4, r1, c6, c0, 0 @ HDFAR | ||
291 | str r2, [vcpu, #VCPU_HSR] | ||
292 | str r1, [vcpu, #VCPU_HxFAR] | ||
293 | .endif | ||
294 | .if \exception_code == ARM_EXCEPTION_PREF_ABORT | ||
295 | mrc p15, 4, r2, c5, c2, 0 @ HSR | ||
296 | mrc p15, 4, r1, c6, c0, 2 @ HIFAR | ||
297 | str r2, [vcpu, #VCPU_HSR] | ||
298 | str r1, [vcpu, #VCPU_HxFAR] | ||
299 | .endif | ||
300 | mov r1, #\exception_code | ||
301 | b __kvm_vcpu_return | ||
302 | |||
303 | @ We were in the host already. Let's craft a panic-ing return to SVC. | ||
304 | 99: mrs r2, cpsr | ||
305 | bic r2, r2, #MODE_MASK | ||
306 | orr r2, r2, #SVC_MODE | ||
307 | THUMB( orr r2, r2, #PSR_T_BIT ) | ||
308 | msr spsr_cxsf, r2 | ||
309 | mrs r1, ELR_hyp | ||
310 | ldr r2, =panic | ||
311 | msr ELR_hyp, r2 | ||
312 | ldr r0, =\panic_str | ||
313 | clrex @ Clear exclusive monitor | ||
314 | eret | ||
315 | .endm | ||
316 | |||
317 | .text | ||
318 | |||
319 | .align 5 | ||
320 | __kvm_hyp_vector: | ||
321 | .globl __kvm_hyp_vector | ||
322 | |||
323 | @ Hyp-mode exception vector | ||
324 | W(b) hyp_reset | ||
325 | W(b) hyp_undef | ||
326 | W(b) hyp_svc | ||
327 | W(b) hyp_pabt | ||
328 | W(b) hyp_dabt | ||
329 | W(b) hyp_hvc | ||
330 | W(b) hyp_irq | ||
331 | W(b) hyp_fiq | ||
332 | |||
333 | .align | ||
334 | hyp_reset: | ||
335 | b hyp_reset | ||
336 | |||
337 | .align | ||
338 | hyp_undef: | ||
339 | bad_exception ARM_EXCEPTION_UNDEFINED, und_die_str | ||
340 | |||
341 | .align | ||
342 | hyp_svc: | ||
343 | bad_exception ARM_EXCEPTION_HVC, svc_die_str | ||
344 | |||
345 | .align | ||
346 | hyp_pabt: | ||
347 | bad_exception ARM_EXCEPTION_PREF_ABORT, pabt_die_str | ||
348 | |||
349 | .align | ||
350 | hyp_dabt: | ||
351 | bad_exception ARM_EXCEPTION_DATA_ABORT, dabt_die_str | ||
352 | |||
353 | .align | ||
354 | hyp_hvc: | ||
355 | /* | ||
356 | * Getting here is either becuase of a trap from a guest or from calling | ||
357 | * HVC from the host kernel, which means "switch to Hyp mode". | ||
358 | */ | ||
359 | push {r0, r1, r2} | ||
360 | |||
361 | @ Check syndrome register | ||
362 | mrc p15, 4, r1, c5, c2, 0 @ HSR | ||
363 | lsr r0, r1, #HSR_EC_SHIFT | ||
364 | cmp r0, #HSR_EC_HVC | ||
365 | bne guest_trap @ Not HVC instr. | ||
366 | |||
367 | /* | ||
368 | * Let's check if the HVC came from VMID 0 and allow simple | ||
369 | * switch to Hyp mode | ||
370 | */ | ||
371 | mrrc p15, 6, r0, r2, c2 | ||
372 | lsr r2, r2, #16 | ||
373 | and r2, r2, #0xff | ||
374 | cmp r2, #0 | ||
375 | bne guest_trap @ Guest called HVC | ||
376 | |||
377 | /* | ||
378 | * Getting here means host called HVC, we shift parameters and branch | ||
379 | * to Hyp function. | ||
380 | */ | ||
381 | pop {r0, r1, r2} | ||
382 | |||
383 | /* Check for __hyp_get_vectors */ | ||
384 | cmp r0, #-1 | ||
385 | mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR | ||
386 | beq 1f | ||
387 | |||
388 | push {lr} | ||
389 | mrs lr, SPSR | ||
390 | push {lr} | ||
391 | |||
392 | mov lr, r0 | ||
393 | mov r0, r1 | ||
394 | mov r1, r2 | ||
395 | mov r2, r3 | ||
396 | |||
397 | THUMB( orr lr, #1) | ||
398 | blx lr @ Call the HYP function | ||
399 | |||
400 | pop {lr} | ||
401 | msr SPSR_csxf, lr | ||
402 | pop {lr} | ||
403 | 1: eret | ||
404 | |||
405 | guest_trap: | ||
406 | load_vcpu @ Load VCPU pointer to r0 | ||
407 | str r1, [vcpu, #VCPU_HSR] | ||
408 | |||
409 | @ Check if we need the fault information | ||
410 | lsr r1, r1, #HSR_EC_SHIFT | ||
411 | #ifdef CONFIG_VFPv3 | ||
412 | cmp r1, #HSR_EC_CP_0_13 | ||
413 | beq switch_to_guest_vfp | ||
414 | #endif | ||
415 | cmp r1, #HSR_EC_IABT | ||
416 | mrceq p15, 4, r2, c6, c0, 2 @ HIFAR | ||
417 | beq 2f | ||
418 | cmp r1, #HSR_EC_DABT | ||
419 | bne 1f | ||
420 | mrc p15, 4, r2, c6, c0, 0 @ HDFAR | ||
421 | |||
422 | 2: str r2, [vcpu, #VCPU_HxFAR] | ||
423 | |||
424 | /* | ||
425 | * B3.13.5 Reporting exceptions taken to the Non-secure PL2 mode: | ||
426 | * | ||
427 | * Abort on the stage 2 translation for a memory access from a | ||
428 | * Non-secure PL1 or PL0 mode: | ||
429 | * | ||
430 | * For any Access flag fault or Translation fault, and also for any | ||
431 | * Permission fault on the stage 2 translation of a memory access | ||
432 | * made as part of a translation table walk for a stage 1 translation, | ||
433 | * the HPFAR holds the IPA that caused the fault. Otherwise, the HPFAR | ||
434 | * is UNKNOWN. | ||
435 | */ | ||
436 | |||
437 | /* Check for permission fault, and S1PTW */ | ||
438 | mrc p15, 4, r1, c5, c2, 0 @ HSR | ||
439 | and r0, r1, #HSR_FSC_TYPE | ||
440 | cmp r0, #FSC_PERM | ||
441 | tsteq r1, #(1 << 7) @ S1PTW | ||
442 | mrcne p15, 4, r2, c6, c0, 4 @ HPFAR | ||
443 | bne 3f | ||
444 | |||
445 | /* Preserve PAR */ | ||
446 | mrrc p15, 0, r0, r1, c7 @ PAR | ||
447 | push {r0, r1} | ||
448 | |||
449 | /* Resolve IPA using the xFAR */ | ||
450 | mcr p15, 0, r2, c7, c8, 0 @ ATS1CPR | ||
451 | isb | ||
452 | mrrc p15, 0, r0, r1, c7 @ PAR | ||
453 | tst r0, #1 | ||
454 | bne 4f @ Failed translation | ||
455 | ubfx r2, r0, #12, #20 | ||
456 | lsl r2, r2, #4 | ||
457 | orr r2, r2, r1, lsl #24 | ||
458 | |||
459 | /* Restore PAR */ | ||
460 | pop {r0, r1} | ||
461 | mcrr p15, 0, r0, r1, c7 @ PAR | ||
462 | |||
463 | 3: load_vcpu @ Load VCPU pointer to r0 | ||
464 | str r2, [r0, #VCPU_HPFAR] | ||
465 | |||
466 | 1: mov r1, #ARM_EXCEPTION_HVC | ||
467 | b __kvm_vcpu_return | ||
468 | |||
469 | 4: pop {r0, r1} @ Failed translation, return to guest | ||
470 | mcrr p15, 0, r0, r1, c7 @ PAR | ||
471 | clrex | ||
472 | pop {r0, r1, r2} | ||
473 | eret | ||
474 | |||
475 | /* | ||
476 | * If VFPv3 support is not available, then we will not switch the VFP | ||
477 | * registers; however cp10 and cp11 accesses will still trap and fallback | ||
478 | * to the regular coprocessor emulation code, which currently will | ||
479 | * inject an undefined exception to the guest. | ||
480 | */ | ||
481 | #ifdef CONFIG_VFPv3 | ||
482 | switch_to_guest_vfp: | ||
483 | push {r3-r7} | ||
484 | |||
485 | @ NEON/VFP used. Turn on VFP access. | ||
486 | set_hcptr vmtrap, (HCPTR_TCP(10) | HCPTR_TCP(11)) | ||
487 | |||
488 | @ Switch VFP/NEON hardware state to the guest's | ||
489 | add r7, r0, #VCPU_VFP_HOST | ||
490 | ldr r7, [r7] | ||
491 | store_vfp_state r7 | ||
492 | add r7, r0, #VCPU_VFP_GUEST | ||
493 | restore_vfp_state r7 | ||
494 | |||
495 | pop {r3-r7} | ||
496 | pop {r0-r2} | ||
497 | clrex | ||
498 | eret | ||
499 | #endif | ||
500 | |||
501 | .align | ||
502 | hyp_irq: | ||
503 | push {r0, r1, r2} | ||
504 | mov r1, #ARM_EXCEPTION_IRQ | ||
505 | load_vcpu @ Load VCPU pointer to r0 | ||
506 | b __kvm_vcpu_return | ||
507 | |||
508 | .align | ||
509 | hyp_fiq: | ||
510 | b hyp_fiq | ||
511 | |||
512 | .ltorg | ||
513 | |||
514 | __kvm_hyp_code_end: | ||
515 | .globl __kvm_hyp_code_end | ||
516 | |||
517 | .section ".rodata" | ||
518 | |||
519 | und_die_str: | ||
520 | .ascii "unexpected undefined exception in Hyp mode at: %#08x\n" | ||
521 | pabt_die_str: | ||
522 | .ascii "unexpected prefetch abort in Hyp mode at: %#08x\n" | ||
523 | dabt_die_str: | ||
524 | .ascii "unexpected data abort in Hyp mode at: %#08x\n" | ||
525 | svc_die_str: | ||
526 | .ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x\n" | ||
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S deleted file mode 100644 index 51a59504bef4..000000000000 --- a/arch/arm/kvm/interrupts_head.S +++ /dev/null | |||
@@ -1,648 +0,0 @@ | |||
1 | #include <linux/irqchip/arm-gic.h> | ||
2 | #include <asm/assembler.h> | ||
3 | |||
4 | #define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4)) | ||
5 | #define VCPU_USR_SP (VCPU_USR_REG(13)) | ||
6 | #define VCPU_USR_LR (VCPU_USR_REG(14)) | ||
7 | #define CP15_OFFSET(_cp15_reg_idx) (VCPU_CP15 + (_cp15_reg_idx * 4)) | ||
8 | |||
9 | /* | ||
10 | * Many of these macros need to access the VCPU structure, which is always | ||
11 | * held in r0. These macros should never clobber r1, as it is used to hold the | ||
12 | * exception code on the return path (except of course the macro that switches | ||
13 | * all the registers before the final jump to the VM). | ||
14 | */ | ||
15 | vcpu .req r0 @ vcpu pointer always in r0 | ||
16 | |||
17 | /* Clobbers {r2-r6} */ | ||
18 | .macro store_vfp_state vfp_base | ||
19 | @ The VFPFMRX and VFPFMXR macros are the VMRS and VMSR instructions | ||
20 | VFPFMRX r2, FPEXC | ||
21 | @ Make sure VFP is enabled so we can touch the registers. | ||
22 | orr r6, r2, #FPEXC_EN | ||
23 | VFPFMXR FPEXC, r6 | ||
24 | |||
25 | VFPFMRX r3, FPSCR | ||
26 | tst r2, #FPEXC_EX @ Check for VFP Subarchitecture | ||
27 | beq 1f | ||
28 | @ If FPEXC_EX is 0, then FPINST/FPINST2 reads are upredictable, so | ||
29 | @ we only need to save them if FPEXC_EX is set. | ||
30 | VFPFMRX r4, FPINST | ||
31 | tst r2, #FPEXC_FP2V | ||
32 | VFPFMRX r5, FPINST2, ne @ vmrsne | ||
33 | bic r6, r2, #FPEXC_EX @ FPEXC_EX disable | ||
34 | VFPFMXR FPEXC, r6 | ||
35 | 1: | ||
36 | VFPFSTMIA \vfp_base, r6 @ Save VFP registers | ||
37 | stm \vfp_base, {r2-r5} @ Save FPEXC, FPSCR, FPINST, FPINST2 | ||
38 | .endm | ||
39 | |||
40 | /* Assume FPEXC_EN is on and FPEXC_EX is off, clobbers {r2-r6} */ | ||
41 | .macro restore_vfp_state vfp_base | ||
42 | VFPFLDMIA \vfp_base, r6 @ Load VFP registers | ||
43 | ldm \vfp_base, {r2-r5} @ Load FPEXC, FPSCR, FPINST, FPINST2 | ||
44 | |||
45 | VFPFMXR FPSCR, r3 | ||
46 | tst r2, #FPEXC_EX @ Check for VFP Subarchitecture | ||
47 | beq 1f | ||
48 | VFPFMXR FPINST, r4 | ||
49 | tst r2, #FPEXC_FP2V | ||
50 | VFPFMXR FPINST2, r5, ne | ||
51 | 1: | ||
52 | VFPFMXR FPEXC, r2 @ FPEXC (last, in case !EN) | ||
53 | .endm | ||
54 | |||
55 | /* These are simply for the macros to work - value don't have meaning */ | ||
56 | .equ usr, 0 | ||
57 | .equ svc, 1 | ||
58 | .equ abt, 2 | ||
59 | .equ und, 3 | ||
60 | .equ irq, 4 | ||
61 | .equ fiq, 5 | ||
62 | |||
63 | .macro push_host_regs_mode mode | ||
64 | mrs r2, SP_\mode | ||
65 | mrs r3, LR_\mode | ||
66 | mrs r4, SPSR_\mode | ||
67 | push {r2, r3, r4} | ||
68 | .endm | ||
69 | |||
70 | /* | ||
71 | * Store all host persistent registers on the stack. | ||
72 | * Clobbers all registers, in all modes, except r0 and r1. | ||
73 | */ | ||
74 | .macro save_host_regs | ||
75 | /* Hyp regs. Only ELR_hyp (SPSR_hyp already saved) */ | ||
76 | mrs r2, ELR_hyp | ||
77 | push {r2} | ||
78 | |||
79 | /* usr regs */ | ||
80 | push {r4-r12} @ r0-r3 are always clobbered | ||
81 | mrs r2, SP_usr | ||
82 | mov r3, lr | ||
83 | push {r2, r3} | ||
84 | |||
85 | push_host_regs_mode svc | ||
86 | push_host_regs_mode abt | ||
87 | push_host_regs_mode und | ||
88 | push_host_regs_mode irq | ||
89 | |||
90 | /* fiq regs */ | ||
91 | mrs r2, r8_fiq | ||
92 | mrs r3, r9_fiq | ||
93 | mrs r4, r10_fiq | ||
94 | mrs r5, r11_fiq | ||
95 | mrs r6, r12_fiq | ||
96 | mrs r7, SP_fiq | ||
97 | mrs r8, LR_fiq | ||
98 | mrs r9, SPSR_fiq | ||
99 | push {r2-r9} | ||
100 | .endm | ||
101 | |||
102 | .macro pop_host_regs_mode mode | ||
103 | pop {r2, r3, r4} | ||
104 | msr SP_\mode, r2 | ||
105 | msr LR_\mode, r3 | ||
106 | msr SPSR_\mode, r4 | ||
107 | .endm | ||
108 | |||
109 | /* | ||
110 | * Restore all host registers from the stack. | ||
111 | * Clobbers all registers, in all modes, except r0 and r1. | ||
112 | */ | ||
113 | .macro restore_host_regs | ||
114 | pop {r2-r9} | ||
115 | msr r8_fiq, r2 | ||
116 | msr r9_fiq, r3 | ||
117 | msr r10_fiq, r4 | ||
118 | msr r11_fiq, r5 | ||
119 | msr r12_fiq, r6 | ||
120 | msr SP_fiq, r7 | ||
121 | msr LR_fiq, r8 | ||
122 | msr SPSR_fiq, r9 | ||
123 | |||
124 | pop_host_regs_mode irq | ||
125 | pop_host_regs_mode und | ||
126 | pop_host_regs_mode abt | ||
127 | pop_host_regs_mode svc | ||
128 | |||
129 | pop {r2, r3} | ||
130 | msr SP_usr, r2 | ||
131 | mov lr, r3 | ||
132 | pop {r4-r12} | ||
133 | |||
134 | pop {r2} | ||
135 | msr ELR_hyp, r2 | ||
136 | .endm | ||
137 | |||
138 | /* | ||
139 | * Restore SP, LR and SPSR for a given mode. offset is the offset of | ||
140 | * this mode's registers from the VCPU base. | ||
141 | * | ||
142 | * Assumes vcpu pointer in vcpu reg | ||
143 | * | ||
144 | * Clobbers r1, r2, r3, r4. | ||
145 | */ | ||
146 | .macro restore_guest_regs_mode mode, offset | ||
147 | add r1, vcpu, \offset | ||
148 | ldm r1, {r2, r3, r4} | ||
149 | msr SP_\mode, r2 | ||
150 | msr LR_\mode, r3 | ||
151 | msr SPSR_\mode, r4 | ||
152 | .endm | ||
153 | |||
154 | /* | ||
155 | * Restore all guest registers from the vcpu struct. | ||
156 | * | ||
157 | * Assumes vcpu pointer in vcpu reg | ||
158 | * | ||
159 | * Clobbers *all* registers. | ||
160 | */ | ||
161 | .macro restore_guest_regs | ||
162 | restore_guest_regs_mode svc, #VCPU_SVC_REGS | ||
163 | restore_guest_regs_mode abt, #VCPU_ABT_REGS | ||
164 | restore_guest_regs_mode und, #VCPU_UND_REGS | ||
165 | restore_guest_regs_mode irq, #VCPU_IRQ_REGS | ||
166 | |||
167 | add r1, vcpu, #VCPU_FIQ_REGS | ||
168 | ldm r1, {r2-r9} | ||
169 | msr r8_fiq, r2 | ||
170 | msr r9_fiq, r3 | ||
171 | msr r10_fiq, r4 | ||
172 | msr r11_fiq, r5 | ||
173 | msr r12_fiq, r6 | ||
174 | msr SP_fiq, r7 | ||
175 | msr LR_fiq, r8 | ||
176 | msr SPSR_fiq, r9 | ||
177 | |||
178 | @ Load return state | ||
179 | ldr r2, [vcpu, #VCPU_PC] | ||
180 | ldr r3, [vcpu, #VCPU_CPSR] | ||
181 | msr ELR_hyp, r2 | ||
182 | msr SPSR_cxsf, r3 | ||
183 | |||
184 | @ Load user registers | ||
185 | ldr r2, [vcpu, #VCPU_USR_SP] | ||
186 | ldr r3, [vcpu, #VCPU_USR_LR] | ||
187 | msr SP_usr, r2 | ||
188 | mov lr, r3 | ||
189 | add vcpu, vcpu, #(VCPU_USR_REGS) | ||
190 | ldm vcpu, {r0-r12} | ||
191 | .endm | ||
192 | |||
193 | /* | ||
194 | * Save SP, LR and SPSR for a given mode. offset is the offset of | ||
195 | * this mode's registers from the VCPU base. | ||
196 | * | ||
197 | * Assumes vcpu pointer in vcpu reg | ||
198 | * | ||
199 | * Clobbers r2, r3, r4, r5. | ||
200 | */ | ||
201 | .macro save_guest_regs_mode mode, offset | ||
202 | add r2, vcpu, \offset | ||
203 | mrs r3, SP_\mode | ||
204 | mrs r4, LR_\mode | ||
205 | mrs r5, SPSR_\mode | ||
206 | stm r2, {r3, r4, r5} | ||
207 | .endm | ||
208 | |||
209 | /* | ||
210 | * Save all guest registers to the vcpu struct | ||
211 | * Expects guest's r0, r1, r2 on the stack. | ||
212 | * | ||
213 | * Assumes vcpu pointer in vcpu reg | ||
214 | * | ||
215 | * Clobbers r2, r3, r4, r5. | ||
216 | */ | ||
217 | .macro save_guest_regs | ||
218 | @ Store usr registers | ||
219 | add r2, vcpu, #VCPU_USR_REG(3) | ||
220 | stm r2, {r3-r12} | ||
221 | add r2, vcpu, #VCPU_USR_REG(0) | ||
222 | pop {r3, r4, r5} @ r0, r1, r2 | ||
223 | stm r2, {r3, r4, r5} | ||
224 | mrs r2, SP_usr | ||
225 | mov r3, lr | ||
226 | str r2, [vcpu, #VCPU_USR_SP] | ||
227 | str r3, [vcpu, #VCPU_USR_LR] | ||
228 | |||
229 | @ Store return state | ||
230 | mrs r2, ELR_hyp | ||
231 | mrs r3, spsr | ||
232 | str r2, [vcpu, #VCPU_PC] | ||
233 | str r3, [vcpu, #VCPU_CPSR] | ||
234 | |||
235 | @ Store other guest registers | ||
236 | save_guest_regs_mode svc, #VCPU_SVC_REGS | ||
237 | save_guest_regs_mode abt, #VCPU_ABT_REGS | ||
238 | save_guest_regs_mode und, #VCPU_UND_REGS | ||
239 | save_guest_regs_mode irq, #VCPU_IRQ_REGS | ||
240 | .endm | ||
241 | |||
242 | /* Reads cp15 registers from hardware and stores them in memory | ||
243 | * @store_to_vcpu: If 0, registers are written in-order to the stack, | ||
244 | * otherwise to the VCPU struct pointed to by vcpup | ||
245 | * | ||
246 | * Assumes vcpu pointer in vcpu reg | ||
247 | * | ||
248 | * Clobbers r2 - r12 | ||
249 | */ | ||
250 | .macro read_cp15_state store_to_vcpu | ||
251 | mrc p15, 0, r2, c1, c0, 0 @ SCTLR | ||
252 | mrc p15, 0, r3, c1, c0, 2 @ CPACR | ||
253 | mrc p15, 0, r4, c2, c0, 2 @ TTBCR | ||
254 | mrc p15, 0, r5, c3, c0, 0 @ DACR | ||
255 | mrrc p15, 0, r6, r7, c2 @ TTBR 0 | ||
256 | mrrc p15, 1, r8, r9, c2 @ TTBR 1 | ||
257 | mrc p15, 0, r10, c10, c2, 0 @ PRRR | ||
258 | mrc p15, 0, r11, c10, c2, 1 @ NMRR | ||
259 | mrc p15, 2, r12, c0, c0, 0 @ CSSELR | ||
260 | |||
261 | .if \store_to_vcpu == 0 | ||
262 | push {r2-r12} @ Push CP15 registers | ||
263 | .else | ||
264 | str r2, [vcpu, #CP15_OFFSET(c1_SCTLR)] | ||
265 | str r3, [vcpu, #CP15_OFFSET(c1_CPACR)] | ||
266 | str r4, [vcpu, #CP15_OFFSET(c2_TTBCR)] | ||
267 | str r5, [vcpu, #CP15_OFFSET(c3_DACR)] | ||
268 | add r2, vcpu, #CP15_OFFSET(c2_TTBR0) | ||
269 | strd r6, r7, [r2] | ||
270 | add r2, vcpu, #CP15_OFFSET(c2_TTBR1) | ||
271 | strd r8, r9, [r2] | ||
272 | str r10, [vcpu, #CP15_OFFSET(c10_PRRR)] | ||
273 | str r11, [vcpu, #CP15_OFFSET(c10_NMRR)] | ||
274 | str r12, [vcpu, #CP15_OFFSET(c0_CSSELR)] | ||
275 | .endif | ||
276 | |||
277 | mrc p15, 0, r2, c13, c0, 1 @ CID | ||
278 | mrc p15, 0, r3, c13, c0, 2 @ TID_URW | ||
279 | mrc p15, 0, r4, c13, c0, 3 @ TID_URO | ||
280 | mrc p15, 0, r5, c13, c0, 4 @ TID_PRIV | ||
281 | mrc p15, 0, r6, c5, c0, 0 @ DFSR | ||
282 | mrc p15, 0, r7, c5, c0, 1 @ IFSR | ||
283 | mrc p15, 0, r8, c5, c1, 0 @ ADFSR | ||
284 | mrc p15, 0, r9, c5, c1, 1 @ AIFSR | ||
285 | mrc p15, 0, r10, c6, c0, 0 @ DFAR | ||
286 | mrc p15, 0, r11, c6, c0, 2 @ IFAR | ||
287 | mrc p15, 0, r12, c12, c0, 0 @ VBAR | ||
288 | |||
289 | .if \store_to_vcpu == 0 | ||
290 | push {r2-r12} @ Push CP15 registers | ||
291 | .else | ||
292 | str r2, [vcpu, #CP15_OFFSET(c13_CID)] | ||
293 | str r3, [vcpu, #CP15_OFFSET(c13_TID_URW)] | ||
294 | str r4, [vcpu, #CP15_OFFSET(c13_TID_URO)] | ||
295 | str r5, [vcpu, #CP15_OFFSET(c13_TID_PRIV)] | ||
296 | str r6, [vcpu, #CP15_OFFSET(c5_DFSR)] | ||
297 | str r7, [vcpu, #CP15_OFFSET(c5_IFSR)] | ||
298 | str r8, [vcpu, #CP15_OFFSET(c5_ADFSR)] | ||
299 | str r9, [vcpu, #CP15_OFFSET(c5_AIFSR)] | ||
300 | str r10, [vcpu, #CP15_OFFSET(c6_DFAR)] | ||
301 | str r11, [vcpu, #CP15_OFFSET(c6_IFAR)] | ||
302 | str r12, [vcpu, #CP15_OFFSET(c12_VBAR)] | ||
303 | .endif | ||
304 | |||
305 | mrc p15, 0, r2, c14, c1, 0 @ CNTKCTL | ||
306 | mrrc p15, 0, r4, r5, c7 @ PAR | ||
307 | mrc p15, 0, r6, c10, c3, 0 @ AMAIR0 | ||
308 | mrc p15, 0, r7, c10, c3, 1 @ AMAIR1 | ||
309 | |||
310 | .if \store_to_vcpu == 0 | ||
311 | push {r2,r4-r7} | ||
312 | .else | ||
313 | str r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] | ||
314 | add r12, vcpu, #CP15_OFFSET(c7_PAR) | ||
315 | strd r4, r5, [r12] | ||
316 | str r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)] | ||
317 | str r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)] | ||
318 | .endif | ||
319 | .endm | ||
320 | |||
321 | /* | ||
322 | * Reads cp15 registers from memory and writes them to hardware | ||
323 | * @read_from_vcpu: If 0, registers are read in-order from the stack, | ||
324 | * otherwise from the VCPU struct pointed to by vcpup | ||
325 | * | ||
326 | * Assumes vcpu pointer in vcpu reg | ||
327 | */ | ||
328 | .macro write_cp15_state read_from_vcpu | ||
329 | .if \read_from_vcpu == 0 | ||
330 | pop {r2,r4-r7} | ||
331 | .else | ||
332 | ldr r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] | ||
333 | add r12, vcpu, #CP15_OFFSET(c7_PAR) | ||
334 | ldrd r4, r5, [r12] | ||
335 | ldr r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)] | ||
336 | ldr r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)] | ||
337 | .endif | ||
338 | |||
339 | mcr p15, 0, r2, c14, c1, 0 @ CNTKCTL | ||
340 | mcrr p15, 0, r4, r5, c7 @ PAR | ||
341 | mcr p15, 0, r6, c10, c3, 0 @ AMAIR0 | ||
342 | mcr p15, 0, r7, c10, c3, 1 @ AMAIR1 | ||
343 | |||
344 | .if \read_from_vcpu == 0 | ||
345 | pop {r2-r12} | ||
346 | .else | ||
347 | ldr r2, [vcpu, #CP15_OFFSET(c13_CID)] | ||
348 | ldr r3, [vcpu, #CP15_OFFSET(c13_TID_URW)] | ||
349 | ldr r4, [vcpu, #CP15_OFFSET(c13_TID_URO)] | ||
350 | ldr r5, [vcpu, #CP15_OFFSET(c13_TID_PRIV)] | ||
351 | ldr r6, [vcpu, #CP15_OFFSET(c5_DFSR)] | ||
352 | ldr r7, [vcpu, #CP15_OFFSET(c5_IFSR)] | ||
353 | ldr r8, [vcpu, #CP15_OFFSET(c5_ADFSR)] | ||
354 | ldr r9, [vcpu, #CP15_OFFSET(c5_AIFSR)] | ||
355 | ldr r10, [vcpu, #CP15_OFFSET(c6_DFAR)] | ||
356 | ldr r11, [vcpu, #CP15_OFFSET(c6_IFAR)] | ||
357 | ldr r12, [vcpu, #CP15_OFFSET(c12_VBAR)] | ||
358 | .endif | ||
359 | |||
360 | mcr p15, 0, r2, c13, c0, 1 @ CID | ||
361 | mcr p15, 0, r3, c13, c0, 2 @ TID_URW | ||
362 | mcr p15, 0, r4, c13, c0, 3 @ TID_URO | ||
363 | mcr p15, 0, r5, c13, c0, 4 @ TID_PRIV | ||
364 | mcr p15, 0, r6, c5, c0, 0 @ DFSR | ||
365 | mcr p15, 0, r7, c5, c0, 1 @ IFSR | ||
366 | mcr p15, 0, r8, c5, c1, 0 @ ADFSR | ||
367 | mcr p15, 0, r9, c5, c1, 1 @ AIFSR | ||
368 | mcr p15, 0, r10, c6, c0, 0 @ DFAR | ||
369 | mcr p15, 0, r11, c6, c0, 2 @ IFAR | ||
370 | mcr p15, 0, r12, c12, c0, 0 @ VBAR | ||
371 | |||
372 | .if \read_from_vcpu == 0 | ||
373 | pop {r2-r12} | ||
374 | .else | ||
375 | ldr r2, [vcpu, #CP15_OFFSET(c1_SCTLR)] | ||
376 | ldr r3, [vcpu, #CP15_OFFSET(c1_CPACR)] | ||
377 | ldr r4, [vcpu, #CP15_OFFSET(c2_TTBCR)] | ||
378 | ldr r5, [vcpu, #CP15_OFFSET(c3_DACR)] | ||
379 | add r12, vcpu, #CP15_OFFSET(c2_TTBR0) | ||
380 | ldrd r6, r7, [r12] | ||
381 | add r12, vcpu, #CP15_OFFSET(c2_TTBR1) | ||
382 | ldrd r8, r9, [r12] | ||
383 | ldr r10, [vcpu, #CP15_OFFSET(c10_PRRR)] | ||
384 | ldr r11, [vcpu, #CP15_OFFSET(c10_NMRR)] | ||
385 | ldr r12, [vcpu, #CP15_OFFSET(c0_CSSELR)] | ||
386 | .endif | ||
387 | |||
388 | mcr p15, 0, r2, c1, c0, 0 @ SCTLR | ||
389 | mcr p15, 0, r3, c1, c0, 2 @ CPACR | ||
390 | mcr p15, 0, r4, c2, c0, 2 @ TTBCR | ||
391 | mcr p15, 0, r5, c3, c0, 0 @ DACR | ||
392 | mcrr p15, 0, r6, r7, c2 @ TTBR 0 | ||
393 | mcrr p15, 1, r8, r9, c2 @ TTBR 1 | ||
394 | mcr p15, 0, r10, c10, c2, 0 @ PRRR | ||
395 | mcr p15, 0, r11, c10, c2, 1 @ NMRR | ||
396 | mcr p15, 2, r12, c0, c0, 0 @ CSSELR | ||
397 | .endm | ||
398 | |||
399 | /* | ||
400 | * Save the VGIC CPU state into memory | ||
401 | * | ||
402 | * Assumes vcpu pointer in vcpu reg | ||
403 | */ | ||
404 | .macro save_vgic_state | ||
405 | /* Get VGIC VCTRL base into r2 */ | ||
406 | ldr r2, [vcpu, #VCPU_KVM] | ||
407 | ldr r2, [r2, #KVM_VGIC_VCTRL] | ||
408 | cmp r2, #0 | ||
409 | beq 2f | ||
410 | |||
411 | /* Compute the address of struct vgic_cpu */ | ||
412 | add r11, vcpu, #VCPU_VGIC_CPU | ||
413 | |||
414 | /* Save all interesting registers */ | ||
415 | ldr r4, [r2, #GICH_VMCR] | ||
416 | ldr r5, [r2, #GICH_MISR] | ||
417 | ldr r6, [r2, #GICH_EISR0] | ||
418 | ldr r7, [r2, #GICH_EISR1] | ||
419 | ldr r8, [r2, #GICH_ELRSR0] | ||
420 | ldr r9, [r2, #GICH_ELRSR1] | ||
421 | ldr r10, [r2, #GICH_APR] | ||
422 | ARM_BE8(rev r4, r4 ) | ||
423 | ARM_BE8(rev r5, r5 ) | ||
424 | ARM_BE8(rev r6, r6 ) | ||
425 | ARM_BE8(rev r7, r7 ) | ||
426 | ARM_BE8(rev r8, r8 ) | ||
427 | ARM_BE8(rev r9, r9 ) | ||
428 | ARM_BE8(rev r10, r10 ) | ||
429 | |||
430 | str r4, [r11, #VGIC_V2_CPU_VMCR] | ||
431 | str r5, [r11, #VGIC_V2_CPU_MISR] | ||
432 | #ifdef CONFIG_CPU_ENDIAN_BE8 | ||
433 | str r6, [r11, #(VGIC_V2_CPU_EISR + 4)] | ||
434 | str r7, [r11, #VGIC_V2_CPU_EISR] | ||
435 | str r8, [r11, #(VGIC_V2_CPU_ELRSR + 4)] | ||
436 | str r9, [r11, #VGIC_V2_CPU_ELRSR] | ||
437 | #else | ||
438 | str r6, [r11, #VGIC_V2_CPU_EISR] | ||
439 | str r7, [r11, #(VGIC_V2_CPU_EISR + 4)] | ||
440 | str r8, [r11, #VGIC_V2_CPU_ELRSR] | ||
441 | str r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)] | ||
442 | #endif | ||
443 | str r10, [r11, #VGIC_V2_CPU_APR] | ||
444 | |||
445 | /* Clear GICH_HCR */ | ||
446 | mov r5, #0 | ||
447 | str r5, [r2, #GICH_HCR] | ||
448 | |||
449 | /* Save list registers */ | ||
450 | add r2, r2, #GICH_LR0 | ||
451 | add r3, r11, #VGIC_V2_CPU_LR | ||
452 | ldr r4, [r11, #VGIC_CPU_NR_LR] | ||
453 | 1: ldr r6, [r2], #4 | ||
454 | ARM_BE8(rev r6, r6 ) | ||
455 | str r6, [r3], #4 | ||
456 | subs r4, r4, #1 | ||
457 | bne 1b | ||
458 | 2: | ||
459 | .endm | ||
460 | |||
461 | /* | ||
462 | * Restore the VGIC CPU state from memory | ||
463 | * | ||
464 | * Assumes vcpu pointer in vcpu reg | ||
465 | */ | ||
466 | .macro restore_vgic_state | ||
467 | /* Get VGIC VCTRL base into r2 */ | ||
468 | ldr r2, [vcpu, #VCPU_KVM] | ||
469 | ldr r2, [r2, #KVM_VGIC_VCTRL] | ||
470 | cmp r2, #0 | ||
471 | beq 2f | ||
472 | |||
473 | /* Compute the address of struct vgic_cpu */ | ||
474 | add r11, vcpu, #VCPU_VGIC_CPU | ||
475 | |||
476 | /* We only restore a minimal set of registers */ | ||
477 | ldr r3, [r11, #VGIC_V2_CPU_HCR] | ||
478 | ldr r4, [r11, #VGIC_V2_CPU_VMCR] | ||
479 | ldr r8, [r11, #VGIC_V2_CPU_APR] | ||
480 | ARM_BE8(rev r3, r3 ) | ||
481 | ARM_BE8(rev r4, r4 ) | ||
482 | ARM_BE8(rev r8, r8 ) | ||
483 | |||
484 | str r3, [r2, #GICH_HCR] | ||
485 | str r4, [r2, #GICH_VMCR] | ||
486 | str r8, [r2, #GICH_APR] | ||
487 | |||
488 | /* Restore list registers */ | ||
489 | add r2, r2, #GICH_LR0 | ||
490 | add r3, r11, #VGIC_V2_CPU_LR | ||
491 | ldr r4, [r11, #VGIC_CPU_NR_LR] | ||
492 | 1: ldr r6, [r3], #4 | ||
493 | ARM_BE8(rev r6, r6 ) | ||
494 | str r6, [r2], #4 | ||
495 | subs r4, r4, #1 | ||
496 | bne 1b | ||
497 | 2: | ||
498 | .endm | ||
499 | |||
500 | #define CNTHCTL_PL1PCTEN (1 << 0) | ||
501 | #define CNTHCTL_PL1PCEN (1 << 1) | ||
502 | |||
503 | /* | ||
504 | * Save the timer state onto the VCPU and allow physical timer/counter access | ||
505 | * for the host. | ||
506 | * | ||
507 | * Assumes vcpu pointer in vcpu reg | ||
508 | * Clobbers r2-r5 | ||
509 | */ | ||
510 | .macro save_timer_state | ||
511 | ldr r4, [vcpu, #VCPU_KVM] | ||
512 | ldr r2, [r4, #KVM_TIMER_ENABLED] | ||
513 | cmp r2, #0 | ||
514 | beq 1f | ||
515 | |||
516 | mrc p15, 0, r2, c14, c3, 1 @ CNTV_CTL | ||
517 | str r2, [vcpu, #VCPU_TIMER_CNTV_CTL] | ||
518 | |||
519 | isb | ||
520 | |||
521 | mrrc p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL | ||
522 | ldr r4, =VCPU_TIMER_CNTV_CVAL | ||
523 | add r5, vcpu, r4 | ||
524 | strd r2, r3, [r5] | ||
525 | |||
526 | @ Ensure host CNTVCT == CNTPCT | ||
527 | mov r2, #0 | ||
528 | mcrr p15, 4, r2, r2, c14 @ CNTVOFF | ||
529 | |||
530 | 1: | ||
531 | mov r2, #0 @ Clear ENABLE | ||
532 | mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL | ||
533 | |||
534 | @ Allow physical timer/counter access for the host | ||
535 | mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL | ||
536 | orr r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN) | ||
537 | mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL | ||
538 | .endm | ||
539 | |||
540 | /* | ||
541 | * Load the timer state from the VCPU and deny physical timer/counter access | ||
542 | * for the host. | ||
543 | * | ||
544 | * Assumes vcpu pointer in vcpu reg | ||
545 | * Clobbers r2-r5 | ||
546 | */ | ||
547 | .macro restore_timer_state | ||
548 | @ Disallow physical timer access for the guest | ||
549 | @ Physical counter access is allowed | ||
550 | mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL | ||
551 | orr r2, r2, #CNTHCTL_PL1PCTEN | ||
552 | bic r2, r2, #CNTHCTL_PL1PCEN | ||
553 | mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL | ||
554 | |||
555 | ldr r4, [vcpu, #VCPU_KVM] | ||
556 | ldr r2, [r4, #KVM_TIMER_ENABLED] | ||
557 | cmp r2, #0 | ||
558 | beq 1f | ||
559 | |||
560 | ldr r2, [r4, #KVM_TIMER_CNTVOFF] | ||
561 | ldr r3, [r4, #(KVM_TIMER_CNTVOFF + 4)] | ||
562 | mcrr p15, 4, rr_lo_hi(r2, r3), c14 @ CNTVOFF | ||
563 | |||
564 | ldr r4, =VCPU_TIMER_CNTV_CVAL | ||
565 | add r5, vcpu, r4 | ||
566 | ldrd r2, r3, [r5] | ||
567 | mcrr p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL | ||
568 | isb | ||
569 | |||
570 | ldr r2, [vcpu, #VCPU_TIMER_CNTV_CTL] | ||
571 | and r2, r2, #3 | ||
572 | mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL | ||
573 | 1: | ||
574 | .endm | ||
575 | |||
576 | .equ vmentry, 0 | ||
577 | .equ vmexit, 1 | ||
578 | |||
579 | /* Configures the HSTR (Hyp System Trap Register) on entry/return | ||
580 | * (hardware reset value is 0) */ | ||
581 | .macro set_hstr operation | ||
582 | mrc p15, 4, r2, c1, c1, 3 | ||
583 | ldr r3, =HSTR_T(15) | ||
584 | .if \operation == vmentry | ||
585 | orr r2, r2, r3 @ Trap CR{15} | ||
586 | .else | ||
587 | bic r2, r2, r3 @ Don't trap any CRx accesses | ||
588 | .endif | ||
589 | mcr p15, 4, r2, c1, c1, 3 | ||
590 | .endm | ||
591 | |||
592 | /* Configures the HCPTR (Hyp Coprocessor Trap Register) on entry/return | ||
593 | * (hardware reset value is 0). Keep previous value in r2. | ||
594 | * An ISB is emited on vmexit/vmtrap, but executed on vmexit only if | ||
595 | * VFP wasn't already enabled (always executed on vmtrap). | ||
596 | * If a label is specified with vmexit, it is branched to if VFP wasn't | ||
597 | * enabled. | ||
598 | */ | ||
599 | .macro set_hcptr operation, mask, label = none | ||
600 | mrc p15, 4, r2, c1, c1, 2 | ||
601 | ldr r3, =\mask | ||
602 | .if \operation == vmentry | ||
603 | orr r3, r2, r3 @ Trap coproc-accesses defined in mask | ||
604 | .else | ||
605 | bic r3, r2, r3 @ Don't trap defined coproc-accesses | ||
606 | .endif | ||
607 | mcr p15, 4, r3, c1, c1, 2 | ||
608 | .if \operation != vmentry | ||
609 | .if \operation == vmexit | ||
610 | tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11)) | ||
611 | beq 1f | ||
612 | .endif | ||
613 | isb | ||
614 | .if \label != none | ||
615 | b \label | ||
616 | .endif | ||
617 | 1: | ||
618 | .endif | ||
619 | .endm | ||
620 | |||
621 | /* Configures the HDCR (Hyp Debug Configuration Register) on entry/return | ||
622 | * (hardware reset value is 0) */ | ||
623 | .macro set_hdcr operation | ||
624 | mrc p15, 4, r2, c1, c1, 1 | ||
625 | ldr r3, =(HDCR_TPM|HDCR_TPMCR) | ||
626 | .if \operation == vmentry | ||
627 | orr r2, r2, r3 @ Trap some perfmon accesses | ||
628 | .else | ||
629 | bic r2, r2, r3 @ Don't trap any perfmon accesses | ||
630 | .endif | ||
631 | mcr p15, 4, r2, c1, c1, 1 | ||
632 | .endm | ||
633 | |||
634 | /* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */ | ||
635 | .macro configure_hyp_role operation | ||
636 | .if \operation == vmentry | ||
637 | ldr r2, [vcpu, #VCPU_HCR] | ||
638 | ldr r3, [vcpu, #VCPU_IRQ_LINES] | ||
639 | orr r2, r2, r3 | ||
640 | .else | ||
641 | mov r2, #0 | ||
642 | .endif | ||
643 | mcr p15, 4, r2, c1, c1, 0 @ HCR | ||
644 | .endm | ||
645 | |||
646 | .macro load_vcpu | ||
647 | mrc p15, 4, vcpu, c13, c0, 2 @ HTPIDR | ||
648 | .endm | ||
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index aba61fd3697a..58dbd5c439df 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <asm/kvm_mmio.h> | 28 | #include <asm/kvm_mmio.h> |
29 | #include <asm/kvm_asm.h> | 29 | #include <asm/kvm_asm.h> |
30 | #include <asm/kvm_emulate.h> | 30 | #include <asm/kvm_emulate.h> |
31 | #include <asm/virt.h> | ||
31 | 32 | ||
32 | #include "trace.h" | 33 | #include "trace.h" |
33 | 34 | ||
@@ -598,6 +599,9 @@ int create_hyp_mappings(void *from, void *to) | |||
598 | unsigned long start = KERN_TO_HYP((unsigned long)from); | 599 | unsigned long start = KERN_TO_HYP((unsigned long)from); |
599 | unsigned long end = KERN_TO_HYP((unsigned long)to); | 600 | unsigned long end = KERN_TO_HYP((unsigned long)to); |
600 | 601 | ||
602 | if (is_kernel_in_hyp_mode()) | ||
603 | return 0; | ||
604 | |||
601 | start = start & PAGE_MASK; | 605 | start = start & PAGE_MASK; |
602 | end = PAGE_ALIGN(end); | 606 | end = PAGE_ALIGN(end); |
603 | 607 | ||
@@ -630,6 +634,9 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) | |||
630 | unsigned long start = KERN_TO_HYP((unsigned long)from); | 634 | unsigned long start = KERN_TO_HYP((unsigned long)from); |
631 | unsigned long end = KERN_TO_HYP((unsigned long)to); | 635 | unsigned long end = KERN_TO_HYP((unsigned long)to); |
632 | 636 | ||
637 | if (is_kernel_in_hyp_mode()) | ||
638 | return 0; | ||
639 | |||
633 | /* Check for a valid kernel IO mapping */ | 640 | /* Check for a valid kernel IO mapping */ |
634 | if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)) | 641 | if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)) |
635 | return -EINVAL; | 642 | return -EINVAL; |
@@ -1431,6 +1438,22 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
1431 | } | 1438 | } |
1432 | 1439 | ||
1433 | /* | 1440 | /* |
1441 | * Check for a cache maintenance operation. Since we | ||
1442 | * ended-up here, we know it is outside of any memory | ||
1443 | * slot. But we can't find out if that is for a device, | ||
1444 | * or if the guest is just being stupid. The only thing | ||
1445 | * we know for sure is that this range cannot be cached. | ||
1446 | * | ||
1447 | * So let's assume that the guest is just being | ||
1448 | * cautious, and skip the instruction. | ||
1449 | */ | ||
1450 | if (kvm_vcpu_dabt_is_cm(vcpu)) { | ||
1451 | kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); | ||
1452 | ret = 1; | ||
1453 | goto out_unlock; | ||
1454 | } | ||
1455 | |||
1456 | /* | ||
1434 | * The IPA is reported as [MAX:12], so we need to | 1457 | * The IPA is reported as [MAX:12], so we need to |
1435 | * complement it with the bottom 12 bits from the | 1458 | * complement it with the bottom 12 bits from the |
1436 | * faulting VA. This is always 12 bits, irrespective | 1459 | * faulting VA. This is always 12 bits, irrespective |
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index eeb85858d6bb..0048b5a62a50 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c | |||
@@ -71,7 +71,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) | |||
71 | } | 71 | } |
72 | 72 | ||
73 | /* Reset core registers */ | 73 | /* Reset core registers */ |
74 | memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs)); | 74 | memcpy(&vcpu->arch.ctxt.gp_regs, reset_regs, sizeof(vcpu->arch.ctxt.gp_regs)); |
75 | 75 | ||
76 | /* Reset CP15 registers */ | 76 | /* Reset CP15 registers */ |
77 | kvm_reset_coprocs(vcpu); | 77 | kvm_reset_coprocs(vcpu); |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8cc62289a63e..cf118d93290d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig | |||
@@ -750,6 +750,19 @@ config ARM64_LSE_ATOMICS | |||
750 | not support these instructions and requires the kernel to be | 750 | not support these instructions and requires the kernel to be |
751 | built with binutils >= 2.25. | 751 | built with binutils >= 2.25. |
752 | 752 | ||
753 | config ARM64_VHE | ||
754 | bool "Enable support for Virtualization Host Extensions (VHE)" | ||
755 | default y | ||
756 | help | ||
757 | Virtualization Host Extensions (VHE) allow the kernel to run | ||
758 | directly at EL2 (instead of EL1) on processors that support | ||
759 | it. This leads to better performance for KVM, as they reduce | ||
760 | the cost of the world switch. | ||
761 | |||
762 | Selecting this option allows the VHE feature to be detected | ||
763 | at runtime, and does not affect processors that do not | ||
764 | implement this feature. | ||
765 | |||
753 | endmenu | 766 | endmenu |
754 | 767 | ||
755 | endmenu | 768 | endmenu |
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 8f271b83f910..a5c769b1c65b 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h | |||
@@ -30,8 +30,12 @@ | |||
30 | #define ARM64_HAS_LSE_ATOMICS 5 | 30 | #define ARM64_HAS_LSE_ATOMICS 5 |
31 | #define ARM64_WORKAROUND_CAVIUM_23154 6 | 31 | #define ARM64_WORKAROUND_CAVIUM_23154 6 |
32 | #define ARM64_WORKAROUND_834220 7 | 32 | #define ARM64_WORKAROUND_834220 7 |
33 | /* #define ARM64_HAS_NO_HW_PREFETCH 8 */ | ||
34 | /* #define ARM64_HAS_UAO 9 */ | ||
35 | /* #define ARM64_ALT_PAN_NOT_UAO 10 */ | ||
36 | #define ARM64_HAS_VIRT_HOST_EXTN 11 | ||
33 | 37 | ||
34 | #define ARM64_NCAPS 8 | 38 | #define ARM64_NCAPS 12 |
35 | 39 | ||
36 | #ifndef __ASSEMBLY__ | 40 | #ifndef __ASSEMBLY__ |
37 | 41 | ||
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index 9732908bfc8a..115ea2a64520 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h | |||
@@ -18,6 +18,7 @@ | |||
18 | 18 | ||
19 | #include <asm/cputype.h> | 19 | #include <asm/cputype.h> |
20 | #include <asm/cpufeature.h> | 20 | #include <asm/cpufeature.h> |
21 | #include <asm/virt.h> | ||
21 | 22 | ||
22 | #ifdef __KERNEL__ | 23 | #ifdef __KERNEL__ |
23 | 24 | ||
@@ -35,10 +36,21 @@ struct arch_hw_breakpoint { | |||
35 | struct arch_hw_breakpoint_ctrl ctrl; | 36 | struct arch_hw_breakpoint_ctrl ctrl; |
36 | }; | 37 | }; |
37 | 38 | ||
39 | /* Privilege Levels */ | ||
40 | #define AARCH64_BREAKPOINT_EL1 1 | ||
41 | #define AARCH64_BREAKPOINT_EL0 2 | ||
42 | |||
43 | #define DBG_HMC_HYP (1 << 13) | ||
44 | |||
38 | static inline u32 encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl) | 45 | static inline u32 encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl) |
39 | { | 46 | { |
40 | return (ctrl.len << 5) | (ctrl.type << 3) | (ctrl.privilege << 1) | | 47 | u32 val = (ctrl.len << 5) | (ctrl.type << 3) | (ctrl.privilege << 1) | |
41 | ctrl.enabled; | 48 | ctrl.enabled; |
49 | |||
50 | if (is_kernel_in_hyp_mode() && ctrl.privilege == AARCH64_BREAKPOINT_EL1) | ||
51 | val |= DBG_HMC_HYP; | ||
52 | |||
53 | return val; | ||
42 | } | 54 | } |
43 | 55 | ||
44 | static inline void decode_ctrl_reg(u32 reg, | 56 | static inline void decode_ctrl_reg(u32 reg, |
@@ -61,10 +73,6 @@ static inline void decode_ctrl_reg(u32 reg, | |||
61 | #define ARM_BREAKPOINT_STORE 2 | 73 | #define ARM_BREAKPOINT_STORE 2 |
62 | #define AARCH64_ESR_ACCESS_MASK (1 << 6) | 74 | #define AARCH64_ESR_ACCESS_MASK (1 << 6) |
63 | 75 | ||
64 | /* Privilege Levels */ | ||
65 | #define AARCH64_BREAKPOINT_EL1 1 | ||
66 | #define AARCH64_BREAKPOINT_EL0 2 | ||
67 | |||
68 | /* Lengths */ | 76 | /* Lengths */ |
69 | #define ARM_BREAKPOINT_LEN_1 0x1 | 77 | #define ARM_BREAKPOINT_LEN_1 0x1 |
70 | #define ARM_BREAKPOINT_LEN_2 0x3 | 78 | #define ARM_BREAKPOINT_LEN_2 0x3 |
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index d201d4b396d1..b56a0a81e4cb 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <asm/types.h> | 23 | #include <asm/types.h> |
24 | 24 | ||
25 | /* Hyp Configuration Register (HCR) bits */ | 25 | /* Hyp Configuration Register (HCR) bits */ |
26 | #define HCR_E2H (UL(1) << 34) | ||
26 | #define HCR_ID (UL(1) << 33) | 27 | #define HCR_ID (UL(1) << 33) |
27 | #define HCR_CD (UL(1) << 32) | 28 | #define HCR_CD (UL(1) << 32) |
28 | #define HCR_RW_SHIFT 31 | 29 | #define HCR_RW_SHIFT 31 |
@@ -81,7 +82,7 @@ | |||
81 | HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW) | 82 | HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW) |
82 | #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) | 83 | #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) |
83 | #define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO) | 84 | #define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO) |
84 | 85 | #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) | |
85 | 86 | ||
86 | /* Hyp System Control Register (SCTLR_EL2) bits */ | 87 | /* Hyp System Control Register (SCTLR_EL2) bits */ |
87 | #define SCTLR_EL2_EE (1 << 25) | 88 | #define SCTLR_EL2_EE (1 << 25) |
@@ -216,4 +217,7 @@ | |||
216 | ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ | 217 | ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ |
217 | ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) | 218 | ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) |
218 | 219 | ||
220 | #define CPACR_EL1_FPEN (3 << 20) | ||
221 | #define CPACR_EL1_TTA (1 << 28) | ||
222 | |||
219 | #endif /* __ARM64_KVM_ARM_H__ */ | 223 | #endif /* __ARM64_KVM_ARM_H__ */ |
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 52b777b7d407..2d02ba67478c 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h | |||
@@ -35,9 +35,6 @@ extern char __kvm_hyp_init_end[]; | |||
35 | 35 | ||
36 | extern char __kvm_hyp_vector[]; | 36 | extern char __kvm_hyp_vector[]; |
37 | 37 | ||
38 | #define __kvm_hyp_code_start __hyp_text_start | ||
39 | #define __kvm_hyp_code_end __hyp_text_end | ||
40 | |||
41 | extern void __kvm_flush_vm_context(void); | 38 | extern void __kvm_flush_vm_context(void); |
42 | extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); | 39 | extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); |
43 | extern void __kvm_tlb_flush_vmid(struct kvm *kvm); | 40 | extern void __kvm_tlb_flush_vmid(struct kvm *kvm); |
@@ -45,9 +42,12 @@ extern void __kvm_tlb_flush_vmid(struct kvm *kvm); | |||
45 | extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); | 42 | extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); |
46 | 43 | ||
47 | extern u64 __vgic_v3_get_ich_vtr_el2(void); | 44 | extern u64 __vgic_v3_get_ich_vtr_el2(void); |
45 | extern void __vgic_v3_init_lrs(void); | ||
48 | 46 | ||
49 | extern u32 __kvm_get_mdcr_el2(void); | 47 | extern u32 __kvm_get_mdcr_el2(void); |
50 | 48 | ||
49 | extern void __init_stage2_translation(void); | ||
50 | |||
51 | #endif | 51 | #endif |
52 | 52 | ||
53 | #endif /* __ARM_KVM_ASM_H__ */ | 53 | #endif /* __ARM_KVM_ASM_H__ */ |
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 779a5872a2c5..40bc1681b6d5 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <asm/kvm_mmio.h> | 29 | #include <asm/kvm_mmio.h> |
30 | #include <asm/ptrace.h> | 30 | #include <asm/ptrace.h> |
31 | #include <asm/cputype.h> | 31 | #include <asm/cputype.h> |
32 | #include <asm/virt.h> | ||
32 | 33 | ||
33 | unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); | 34 | unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); |
34 | unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); | 35 | unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); |
@@ -43,6 +44,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); | |||
43 | static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) | 44 | static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) |
44 | { | 45 | { |
45 | vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; | 46 | vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; |
47 | if (is_kernel_in_hyp_mode()) | ||
48 | vcpu->arch.hcr_el2 |= HCR_E2H; | ||
46 | if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) | 49 | if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) |
47 | vcpu->arch.hcr_el2 &= ~HCR_RW; | 50 | vcpu->arch.hcr_el2 &= ~HCR_RW; |
48 | } | 51 | } |
@@ -189,6 +192,11 @@ static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) | |||
189 | return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); | 192 | return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); |
190 | } | 193 | } |
191 | 194 | ||
195 | static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu) | ||
196 | { | ||
197 | return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM); | ||
198 | } | ||
199 | |||
192 | static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) | 200 | static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) |
193 | { | 201 | { |
194 | return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); | 202 | return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); |
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 689d4c95e12f..71fa6fe9d54a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h | |||
@@ -25,7 +25,9 @@ | |||
25 | #include <linux/types.h> | 25 | #include <linux/types.h> |
26 | #include <linux/kvm_types.h> | 26 | #include <linux/kvm_types.h> |
27 | #include <asm/kvm.h> | 27 | #include <asm/kvm.h> |
28 | #include <asm/kvm_asm.h> | ||
28 | #include <asm/kvm_mmio.h> | 29 | #include <asm/kvm_mmio.h> |
30 | #include <asm/kvm_perf_event.h> | ||
29 | 31 | ||
30 | #define __KVM_HAVE_ARCH_INTC_INITIALIZED | 32 | #define __KVM_HAVE_ARCH_INTC_INITIALIZED |
31 | 33 | ||
@@ -36,10 +38,11 @@ | |||
36 | 38 | ||
37 | #include <kvm/arm_vgic.h> | 39 | #include <kvm/arm_vgic.h> |
38 | #include <kvm/arm_arch_timer.h> | 40 | #include <kvm/arm_arch_timer.h> |
41 | #include <kvm/arm_pmu.h> | ||
39 | 42 | ||
40 | #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS | 43 | #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS |
41 | 44 | ||
42 | #define KVM_VCPU_MAX_FEATURES 3 | 45 | #define KVM_VCPU_MAX_FEATURES 4 |
43 | 46 | ||
44 | int __attribute_const__ kvm_target_cpu(void); | 47 | int __attribute_const__ kvm_target_cpu(void); |
45 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); | 48 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); |
@@ -114,6 +117,21 @@ enum vcpu_sysreg { | |||
114 | MDSCR_EL1, /* Monitor Debug System Control Register */ | 117 | MDSCR_EL1, /* Monitor Debug System Control Register */ |
115 | MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */ | 118 | MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */ |
116 | 119 | ||
120 | /* Performance Monitors Registers */ | ||
121 | PMCR_EL0, /* Control Register */ | ||
122 | PMSELR_EL0, /* Event Counter Selection Register */ | ||
123 | PMEVCNTR0_EL0, /* Event Counter Register (0-30) */ | ||
124 | PMEVCNTR30_EL0 = PMEVCNTR0_EL0 + 30, | ||
125 | PMCCNTR_EL0, /* Cycle Counter Register */ | ||
126 | PMEVTYPER0_EL0, /* Event Type Register (0-30) */ | ||
127 | PMEVTYPER30_EL0 = PMEVTYPER0_EL0 + 30, | ||
128 | PMCCFILTR_EL0, /* Cycle Count Filter Register */ | ||
129 | PMCNTENSET_EL0, /* Count Enable Set Register */ | ||
130 | PMINTENSET_EL1, /* Interrupt Enable Set Register */ | ||
131 | PMOVSSET_EL0, /* Overflow Flag Status Set Register */ | ||
132 | PMSWINC_EL0, /* Software Increment Register */ | ||
133 | PMUSERENR_EL0, /* User Enable Register */ | ||
134 | |||
117 | /* 32bit specific registers. Keep them at the end of the range */ | 135 | /* 32bit specific registers. Keep them at the end of the range */ |
118 | DACR32_EL2, /* Domain Access Control Register */ | 136 | DACR32_EL2, /* Domain Access Control Register */ |
119 | IFSR32_EL2, /* Instruction Fault Status Register */ | 137 | IFSR32_EL2, /* Instruction Fault Status Register */ |
@@ -211,6 +229,7 @@ struct kvm_vcpu_arch { | |||
211 | /* VGIC state */ | 229 | /* VGIC state */ |
212 | struct vgic_cpu vgic_cpu; | 230 | struct vgic_cpu vgic_cpu; |
213 | struct arch_timer_cpu timer_cpu; | 231 | struct arch_timer_cpu timer_cpu; |
232 | struct kvm_pmu pmu; | ||
214 | 233 | ||
215 | /* | 234 | /* |
216 | * Anything that is not used directly from assembly code goes | 235 | * Anything that is not used directly from assembly code goes |
@@ -342,5 +361,18 @@ void kvm_arm_init_debug(void); | |||
342 | void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); | 361 | void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); |
343 | void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); | 362 | void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); |
344 | void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); | 363 | void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); |
364 | int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, | ||
365 | struct kvm_device_attr *attr); | ||
366 | int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, | ||
367 | struct kvm_device_attr *attr); | ||
368 | int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, | ||
369 | struct kvm_device_attr *attr); | ||
370 | |||
371 | /* #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) */ | ||
372 | |||
373 | static inline void __cpu_init_stage2(void) | ||
374 | { | ||
375 | kvm_call_hyp(__init_stage2_translation); | ||
376 | } | ||
345 | 377 | ||
346 | #endif /* __ARM64_KVM_HOST_H__ */ | 378 | #endif /* __ARM64_KVM_HOST_H__ */ |
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h new file mode 100644 index 000000000000..a46b019ebcf5 --- /dev/null +++ b/arch/arm64/include/asm/kvm_hyp.h | |||
@@ -0,0 +1,181 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015 - ARM Ltd | ||
3 | * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
16 | */ | ||
17 | |||
18 | #ifndef __ARM64_KVM_HYP_H__ | ||
19 | #define __ARM64_KVM_HYP_H__ | ||
20 | |||
21 | #include <linux/compiler.h> | ||
22 | #include <linux/kvm_host.h> | ||
23 | #include <asm/kvm_mmu.h> | ||
24 | #include <asm/kvm_perf_event.h> | ||
25 | #include <asm/sysreg.h> | ||
26 | |||
27 | #define __hyp_text __section(.hyp.text) notrace | ||
28 | |||
29 | static inline unsigned long __kern_hyp_va(unsigned long v) | ||
30 | { | ||
31 | asm volatile(ALTERNATIVE("and %0, %0, %1", | ||
32 | "nop", | ||
33 | ARM64_HAS_VIRT_HOST_EXTN) | ||
34 | : "+r" (v) : "i" (HYP_PAGE_OFFSET_MASK)); | ||
35 | return v; | ||
36 | } | ||
37 | |||
38 | #define kern_hyp_va(v) (typeof(v))(__kern_hyp_va((unsigned long)(v))) | ||
39 | |||
40 | static inline unsigned long __hyp_kern_va(unsigned long v) | ||
41 | { | ||
42 | u64 offset = PAGE_OFFSET - HYP_PAGE_OFFSET; | ||
43 | asm volatile(ALTERNATIVE("add %0, %0, %1", | ||
44 | "nop", | ||
45 | ARM64_HAS_VIRT_HOST_EXTN) | ||
46 | : "+r" (v) : "r" (offset)); | ||
47 | return v; | ||
48 | } | ||
49 | |||
50 | #define hyp_kern_va(v) (typeof(v))(__hyp_kern_va((unsigned long)(v))) | ||
51 | |||
52 | #define read_sysreg_elx(r,nvh,vh) \ | ||
53 | ({ \ | ||
54 | u64 reg; \ | ||
55 | asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##nvh),\ | ||
56 | "mrs_s %0, " __stringify(r##vh),\ | ||
57 | ARM64_HAS_VIRT_HOST_EXTN) \ | ||
58 | : "=r" (reg)); \ | ||
59 | reg; \ | ||
60 | }) | ||
61 | |||
62 | #define write_sysreg_elx(v,r,nvh,vh) \ | ||
63 | do { \ | ||
64 | u64 __val = (u64)(v); \ | ||
65 | asm volatile(ALTERNATIVE("msr " __stringify(r##nvh) ", %x0",\ | ||
66 | "msr_s " __stringify(r##vh) ", %x0",\ | ||
67 | ARM64_HAS_VIRT_HOST_EXTN) \ | ||
68 | : : "rZ" (__val)); \ | ||
69 | } while (0) | ||
70 | |||
71 | /* | ||
72 | * Unified accessors for registers that have a different encoding | ||
73 | * between VHE and non-VHE. They must be specified without their "ELx" | ||
74 | * encoding. | ||
75 | */ | ||
76 | #define read_sysreg_el2(r) \ | ||
77 | ({ \ | ||
78 | u64 reg; \ | ||
79 | asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##_EL2),\ | ||
80 | "mrs %0, " __stringify(r##_EL1),\ | ||
81 | ARM64_HAS_VIRT_HOST_EXTN) \ | ||
82 | : "=r" (reg)); \ | ||
83 | reg; \ | ||
84 | }) | ||
85 | |||
86 | #define write_sysreg_el2(v,r) \ | ||
87 | do { \ | ||
88 | u64 __val = (u64)(v); \ | ||
89 | asm volatile(ALTERNATIVE("msr " __stringify(r##_EL2) ", %x0",\ | ||
90 | "msr " __stringify(r##_EL1) ", %x0",\ | ||
91 | ARM64_HAS_VIRT_HOST_EXTN) \ | ||
92 | : : "rZ" (__val)); \ | ||
93 | } while (0) | ||
94 | |||
95 | #define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02) | ||
96 | #define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02) | ||
97 | #define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12) | ||
98 | #define write_sysreg_el1(v,r) write_sysreg_elx(v, r, _EL1, _EL12) | ||
99 | |||
100 | /* The VHE specific system registers and their encoding */ | ||
101 | #define sctlr_EL12 sys_reg(3, 5, 1, 0, 0) | ||
102 | #define cpacr_EL12 sys_reg(3, 5, 1, 0, 2) | ||
103 | #define ttbr0_EL12 sys_reg(3, 5, 2, 0, 0) | ||
104 | #define ttbr1_EL12 sys_reg(3, 5, 2, 0, 1) | ||
105 | #define tcr_EL12 sys_reg(3, 5, 2, 0, 2) | ||
106 | #define afsr0_EL12 sys_reg(3, 5, 5, 1, 0) | ||
107 | #define afsr1_EL12 sys_reg(3, 5, 5, 1, 1) | ||
108 | #define esr_EL12 sys_reg(3, 5, 5, 2, 0) | ||
109 | #define far_EL12 sys_reg(3, 5, 6, 0, 0) | ||
110 | #define mair_EL12 sys_reg(3, 5, 10, 2, 0) | ||
111 | #define amair_EL12 sys_reg(3, 5, 10, 3, 0) | ||
112 | #define vbar_EL12 sys_reg(3, 5, 12, 0, 0) | ||
113 | #define contextidr_EL12 sys_reg(3, 5, 13, 0, 1) | ||
114 | #define cntkctl_EL12 sys_reg(3, 5, 14, 1, 0) | ||
115 | #define cntp_tval_EL02 sys_reg(3, 5, 14, 2, 0) | ||
116 | #define cntp_ctl_EL02 sys_reg(3, 5, 14, 2, 1) | ||
117 | #define cntp_cval_EL02 sys_reg(3, 5, 14, 2, 2) | ||
118 | #define cntv_tval_EL02 sys_reg(3, 5, 14, 3, 0) | ||
119 | #define cntv_ctl_EL02 sys_reg(3, 5, 14, 3, 1) | ||
120 | #define cntv_cval_EL02 sys_reg(3, 5, 14, 3, 2) | ||
121 | #define spsr_EL12 sys_reg(3, 5, 4, 0, 0) | ||
122 | #define elr_EL12 sys_reg(3, 5, 4, 0, 1) | ||
123 | |||
124 | /** | ||
125 | * hyp_alternate_select - Generates patchable code sequences that are | ||
126 | * used to switch between two implementations of a function, depending | ||
127 | * on the availability of a feature. | ||
128 | * | ||
129 | * @fname: a symbol name that will be defined as a function returning a | ||
130 | * function pointer whose type will match @orig and @alt | ||
131 | * @orig: A pointer to the default function, as returned by @fname when | ||
132 | * @cond doesn't hold | ||
133 | * @alt: A pointer to the alternate function, as returned by @fname | ||
134 | * when @cond holds | ||
135 | * @cond: a CPU feature (as described in asm/cpufeature.h) | ||
136 | */ | ||
137 | #define hyp_alternate_select(fname, orig, alt, cond) \ | ||
138 | typeof(orig) * __hyp_text fname(void) \ | ||
139 | { \ | ||
140 | typeof(alt) *val = orig; \ | ||
141 | asm volatile(ALTERNATIVE("nop \n", \ | ||
142 | "mov %0, %1 \n", \ | ||
143 | cond) \ | ||
144 | : "+r" (val) : "r" (alt)); \ | ||
145 | return val; \ | ||
146 | } | ||
147 | |||
148 | void __vgic_v2_save_state(struct kvm_vcpu *vcpu); | ||
149 | void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); | ||
150 | |||
151 | void __vgic_v3_save_state(struct kvm_vcpu *vcpu); | ||
152 | void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); | ||
153 | |||
154 | void __timer_save_state(struct kvm_vcpu *vcpu); | ||
155 | void __timer_restore_state(struct kvm_vcpu *vcpu); | ||
156 | |||
157 | void __sysreg_save_host_state(struct kvm_cpu_context *ctxt); | ||
158 | void __sysreg_restore_host_state(struct kvm_cpu_context *ctxt); | ||
159 | void __sysreg_save_guest_state(struct kvm_cpu_context *ctxt); | ||
160 | void __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt); | ||
161 | void __sysreg32_save_state(struct kvm_vcpu *vcpu); | ||
162 | void __sysreg32_restore_state(struct kvm_vcpu *vcpu); | ||
163 | |||
164 | void __debug_save_state(struct kvm_vcpu *vcpu, | ||
165 | struct kvm_guest_debug_arch *dbg, | ||
166 | struct kvm_cpu_context *ctxt); | ||
167 | void __debug_restore_state(struct kvm_vcpu *vcpu, | ||
168 | struct kvm_guest_debug_arch *dbg, | ||
169 | struct kvm_cpu_context *ctxt); | ||
170 | void __debug_cond_save_host_state(struct kvm_vcpu *vcpu); | ||
171 | void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu); | ||
172 | |||
173 | void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); | ||
174 | void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); | ||
175 | bool __fpsimd_enabled(void); | ||
176 | |||
177 | u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); | ||
178 | void __noreturn __hyp_do_panic(unsigned long, ...); | ||
179 | |||
180 | #endif /* __ARM64_KVM_HYP_H__ */ | ||
181 | |||
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 736433912a1e..9a9318adefa6 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h | |||
@@ -23,13 +23,16 @@ | |||
23 | #include <asm/cpufeature.h> | 23 | #include <asm/cpufeature.h> |
24 | 24 | ||
25 | /* | 25 | /* |
26 | * As we only have the TTBR0_EL2 register, we cannot express | 26 | * As ARMv8.0 only has the TTBR0_EL2 register, we cannot express |
27 | * "negative" addresses. This makes it impossible to directly share | 27 | * "negative" addresses. This makes it impossible to directly share |
28 | * mappings with the kernel. | 28 | * mappings with the kernel. |
29 | * | 29 | * |
30 | * Instead, give the HYP mode its own VA region at a fixed offset from | 30 | * Instead, give the HYP mode its own VA region at a fixed offset from |
31 | * the kernel by just masking the top bits (which are all ones for a | 31 | * the kernel by just masking the top bits (which are all ones for a |
32 | * kernel address). | 32 | * kernel address). |
33 | * | ||
34 | * ARMv8.1 (using VHE) does have a TTBR1_EL2, and doesn't use these | ||
35 | * macros (the entire kernel runs at EL2). | ||
33 | */ | 36 | */ |
34 | #define HYP_PAGE_OFFSET_SHIFT VA_BITS | 37 | #define HYP_PAGE_OFFSET_SHIFT VA_BITS |
35 | #define HYP_PAGE_OFFSET_MASK ((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1) | 38 | #define HYP_PAGE_OFFSET_MASK ((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1) |
@@ -56,12 +59,19 @@ | |||
56 | 59 | ||
57 | #ifdef __ASSEMBLY__ | 60 | #ifdef __ASSEMBLY__ |
58 | 61 | ||
62 | #include <asm/alternative.h> | ||
63 | #include <asm/cpufeature.h> | ||
64 | |||
59 | /* | 65 | /* |
60 | * Convert a kernel VA into a HYP VA. | 66 | * Convert a kernel VA into a HYP VA. |
61 | * reg: VA to be converted. | 67 | * reg: VA to be converted. |
62 | */ | 68 | */ |
63 | .macro kern_hyp_va reg | 69 | .macro kern_hyp_va reg |
70 | alternative_if_not ARM64_HAS_VIRT_HOST_EXTN | ||
64 | and \reg, \reg, #HYP_PAGE_OFFSET_MASK | 71 | and \reg, \reg, #HYP_PAGE_OFFSET_MASK |
72 | alternative_else | ||
73 | nop | ||
74 | alternative_endif | ||
65 | .endm | 75 | .endm |
66 | 76 | ||
67 | #else | 77 | #else |
diff --git a/arch/arm64/include/asm/kvm_perf_event.h b/arch/arm64/include/asm/kvm_perf_event.h new file mode 100644 index 000000000000..c18fdebb8f66 --- /dev/null +++ b/arch/arm64/include/asm/kvm_perf_event.h | |||
@@ -0,0 +1,68 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2012 ARM Ltd. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __ASM_KVM_PERF_EVENT_H | ||
18 | #define __ASM_KVM_PERF_EVENT_H | ||
19 | |||
20 | #define ARMV8_PMU_MAX_COUNTERS 32 | ||
21 | #define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1) | ||
22 | |||
23 | /* | ||
24 | * Per-CPU PMCR: config reg | ||
25 | */ | ||
26 | #define ARMV8_PMU_PMCR_E (1 << 0) /* Enable all counters */ | ||
27 | #define ARMV8_PMU_PMCR_P (1 << 1) /* Reset all counters */ | ||
28 | #define ARMV8_PMU_PMCR_C (1 << 2) /* Cycle counter reset */ | ||
29 | #define ARMV8_PMU_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */ | ||
30 | #define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */ | ||
31 | #define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ | ||
32 | /* Determines which bit of PMCCNTR_EL0 generates an overflow */ | ||
33 | #define ARMV8_PMU_PMCR_LC (1 << 6) | ||
34 | #define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */ | ||
35 | #define ARMV8_PMU_PMCR_N_MASK 0x1f | ||
36 | #define ARMV8_PMU_PMCR_MASK 0x7f /* Mask for writable bits */ | ||
37 | |||
38 | /* | ||
39 | * PMOVSR: counters overflow flag status reg | ||
40 | */ | ||
41 | #define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */ | ||
42 | #define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK | ||
43 | |||
44 | /* | ||
45 | * PMXEVTYPER: Event selection reg | ||
46 | */ | ||
47 | #define ARMV8_PMU_EVTYPE_MASK 0xc80003ff /* Mask for writable bits */ | ||
48 | #define ARMV8_PMU_EVTYPE_EVENT 0x3ff /* Mask for EVENT bits */ | ||
49 | |||
50 | #define ARMV8_PMU_EVTYPE_EVENT_SW_INCR 0 /* Software increment event */ | ||
51 | |||
52 | /* | ||
53 | * Event filters for PMUv3 | ||
54 | */ | ||
55 | #define ARMV8_PMU_EXCLUDE_EL1 (1 << 31) | ||
56 | #define ARMV8_PMU_EXCLUDE_EL0 (1 << 30) | ||
57 | #define ARMV8_PMU_INCLUDE_EL2 (1 << 27) | ||
58 | |||
59 | /* | ||
60 | * PMUSERENR: user enable reg | ||
61 | */ | ||
62 | #define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */ | ||
63 | #define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */ | ||
64 | #define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */ | ||
65 | #define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ | ||
66 | #define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ | ||
67 | |||
68 | #endif | ||
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 7a5df5252dd7..9f22dd607958 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h | |||
@@ -23,6 +23,8 @@ | |||
23 | 23 | ||
24 | #ifndef __ASSEMBLY__ | 24 | #ifndef __ASSEMBLY__ |
25 | 25 | ||
26 | #include <asm/ptrace.h> | ||
27 | |||
26 | /* | 28 | /* |
27 | * __boot_cpu_mode records what mode CPUs were booted in. | 29 | * __boot_cpu_mode records what mode CPUs were booted in. |
28 | * A correctly-implemented bootloader must start all CPUs in the same mode: | 30 | * A correctly-implemented bootloader must start all CPUs in the same mode: |
@@ -50,6 +52,14 @@ static inline bool is_hyp_mode_mismatched(void) | |||
50 | return __boot_cpu_mode[0] != __boot_cpu_mode[1]; | 52 | return __boot_cpu_mode[0] != __boot_cpu_mode[1]; |
51 | } | 53 | } |
52 | 54 | ||
55 | static inline bool is_kernel_in_hyp_mode(void) | ||
56 | { | ||
57 | u64 el; | ||
58 | |||
59 | asm("mrs %0, CurrentEL" : "=r" (el)); | ||
60 | return el == CurrentEL_EL2; | ||
61 | } | ||
62 | |||
53 | /* The section containing the hypervisor text */ | 63 | /* The section containing the hypervisor text */ |
54 | extern char __hyp_text_start[]; | 64 | extern char __hyp_text_start[]; |
55 | extern char __hyp_text_end[]; | 65 | extern char __hyp_text_end[]; |
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 2d4ca4bb0dd3..f209ea151dca 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h | |||
@@ -94,6 +94,7 @@ struct kvm_regs { | |||
94 | #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ | 94 | #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ |
95 | #define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ | 95 | #define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ |
96 | #define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */ | 96 | #define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */ |
97 | #define KVM_ARM_VCPU_PMU_V3 3 /* Support guest PMUv3 */ | ||
97 | 98 | ||
98 | struct kvm_vcpu_init { | 99 | struct kvm_vcpu_init { |
99 | __u32 target; | 100 | __u32 target; |
@@ -204,6 +205,11 @@ struct kvm_arch_memory_slot { | |||
204 | #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 | 205 | #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 |
205 | #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 | 206 | #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 |
206 | 207 | ||
208 | /* Device Control API on vcpu fd */ | ||
209 | #define KVM_ARM_VCPU_PMU_V3_CTRL 0 | ||
210 | #define KVM_ARM_VCPU_PMU_V3_IRQ 0 | ||
211 | #define KVM_ARM_VCPU_PMU_V3_INIT 1 | ||
212 | |||
207 | /* KVM_IRQ_LINE irq field index values */ | 213 | /* KVM_IRQ_LINE irq field index values */ |
208 | #define KVM_ARM_IRQ_TYPE_SHIFT 24 | 214 | #define KVM_ARM_IRQ_TYPE_SHIFT 24 |
209 | #define KVM_ARM_IRQ_TYPE_MASK 0xff | 215 | #define KVM_ARM_IRQ_TYPE_MASK 0xff |
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index fffa4ac6c25a..b0ab4e93db0d 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c | |||
@@ -110,9 +110,6 @@ int main(void) | |||
110 | DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); | 110 | DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); |
111 | DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); | 111 | DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); |
112 | DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); | 112 | DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); |
113 | DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2)); | ||
114 | DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); | ||
115 | DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); | ||
116 | DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); | 113 | DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); |
117 | #endif | 114 | #endif |
118 | #ifdef CONFIG_CPU_PM | 115 | #ifdef CONFIG_CPU_PM |
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 5c90aa490a2b..ba745199297e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <asm/cpu_ops.h> | 26 | #include <asm/cpu_ops.h> |
27 | #include <asm/processor.h> | 27 | #include <asm/processor.h> |
28 | #include <asm/sysreg.h> | 28 | #include <asm/sysreg.h> |
29 | #include <asm/virt.h> | ||
29 | 30 | ||
30 | unsigned long elf_hwcap __read_mostly; | 31 | unsigned long elf_hwcap __read_mostly; |
31 | EXPORT_SYMBOL_GPL(elf_hwcap); | 32 | EXPORT_SYMBOL_GPL(elf_hwcap); |
@@ -621,6 +622,11 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry) | |||
621 | return has_sre; | 622 | return has_sre; |
622 | } | 623 | } |
623 | 624 | ||
625 | static bool runs_at_el2(const struct arm64_cpu_capabilities *entry) | ||
626 | { | ||
627 | return is_kernel_in_hyp_mode(); | ||
628 | } | ||
629 | |||
624 | static const struct arm64_cpu_capabilities arm64_features[] = { | 630 | static const struct arm64_cpu_capabilities arm64_features[] = { |
625 | { | 631 | { |
626 | .desc = "GIC system register CPU interface", | 632 | .desc = "GIC system register CPU interface", |
@@ -651,6 +657,11 @@ static const struct arm64_cpu_capabilities arm64_features[] = { | |||
651 | .min_field_value = 2, | 657 | .min_field_value = 2, |
652 | }, | 658 | }, |
653 | #endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ | 659 | #endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ |
660 | { | ||
661 | .desc = "Virtualization Host Extensions", | ||
662 | .capability = ARM64_HAS_VIRT_HOST_EXTN, | ||
663 | .matches = runs_at_el2, | ||
664 | }, | ||
654 | {}, | 665 | {}, |
655 | }; | 666 | }; |
656 | 667 | ||
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 917d98108b3f..6f2f37743d3b 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <asm/cache.h> | 30 | #include <asm/cache.h> |
31 | #include <asm/cputype.h> | 31 | #include <asm/cputype.h> |
32 | #include <asm/kernel-pgtable.h> | 32 | #include <asm/kernel-pgtable.h> |
33 | #include <asm/kvm_arm.h> | ||
33 | #include <asm/memory.h> | 34 | #include <asm/memory.h> |
34 | #include <asm/pgtable-hwdef.h> | 35 | #include <asm/pgtable-hwdef.h> |
35 | #include <asm/pgtable.h> | 36 | #include <asm/pgtable.h> |
@@ -464,9 +465,27 @@ CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 | |||
464 | isb | 465 | isb |
465 | ret | 466 | ret |
466 | 467 | ||
468 | 2: | ||
469 | #ifdef CONFIG_ARM64_VHE | ||
470 | /* | ||
471 | * Check for VHE being present. For the rest of the EL2 setup, | ||
472 | * x2 being non-zero indicates that we do have VHE, and that the | ||
473 | * kernel is intended to run at EL2. | ||
474 | */ | ||
475 | mrs x2, id_aa64mmfr1_el1 | ||
476 | ubfx x2, x2, #8, #4 | ||
477 | #else | ||
478 | mov x2, xzr | ||
479 | #endif | ||
480 | |||
467 | /* Hyp configuration. */ | 481 | /* Hyp configuration. */ |
468 | 2: mov x0, #(1 << 31) // 64-bit EL1 | 482 | mov x0, #HCR_RW // 64-bit EL1 |
483 | cbz x2, set_hcr | ||
484 | orr x0, x0, #HCR_TGE // Enable Host Extensions | ||
485 | orr x0, x0, #HCR_E2H | ||
486 | set_hcr: | ||
469 | msr hcr_el2, x0 | 487 | msr hcr_el2, x0 |
488 | isb | ||
470 | 489 | ||
471 | /* Generic timers. */ | 490 | /* Generic timers. */ |
472 | mrs x0, cnthctl_el2 | 491 | mrs x0, cnthctl_el2 |
@@ -526,6 +545,13 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems | |||
526 | /* Stage-2 translation */ | 545 | /* Stage-2 translation */ |
527 | msr vttbr_el2, xzr | 546 | msr vttbr_el2, xzr |
528 | 547 | ||
548 | cbz x2, install_el2_stub | ||
549 | |||
550 | mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 | ||
551 | isb | ||
552 | ret | ||
553 | |||
554 | install_el2_stub: | ||
529 | /* Hypervisor stub */ | 555 | /* Hypervisor stub */ |
530 | adrp x0, __hyp_stub_vectors | 556 | adrp x0, __hyp_stub_vectors |
531 | add x0, x0, #:lo12:__hyp_stub_vectors | 557 | add x0, x0, #:lo12:__hyp_stub_vectors |
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index f7ab14c4d5df..1b52269ffa87 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c | |||
@@ -20,6 +20,7 @@ | |||
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <asm/irq_regs.h> | 22 | #include <asm/irq_regs.h> |
23 | #include <asm/virt.h> | ||
23 | 24 | ||
24 | #include <linux/of.h> | 25 | #include <linux/of.h> |
25 | #include <linux/perf/arm_pmu.h> | 26 | #include <linux/perf/arm_pmu.h> |
@@ -691,9 +692,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, | |||
691 | 692 | ||
692 | if (attr->exclude_idle) | 693 | if (attr->exclude_idle) |
693 | return -EPERM; | 694 | return -EPERM; |
695 | if (is_kernel_in_hyp_mode() && | ||
696 | attr->exclude_kernel != attr->exclude_hv) | ||
697 | return -EINVAL; | ||
694 | if (attr->exclude_user) | 698 | if (attr->exclude_user) |
695 | config_base |= ARMV8_EXCLUDE_EL0; | 699 | config_base |= ARMV8_EXCLUDE_EL0; |
696 | if (attr->exclude_kernel) | 700 | if (!is_kernel_in_hyp_mode() && attr->exclude_kernel) |
697 | config_base |= ARMV8_EXCLUDE_EL1; | 701 | config_base |= ARMV8_EXCLUDE_EL1; |
698 | if (!attr->exclude_hv) | 702 | if (!attr->exclude_hv) |
699 | config_base |= ARMV8_INCLUDE_EL2; | 703 | config_base |= ARMV8_INCLUDE_EL2; |
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index a5272c07d1cb..de7450df7629 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig | |||
@@ -36,6 +36,7 @@ config KVM | |||
36 | select HAVE_KVM_EVENTFD | 36 | select HAVE_KVM_EVENTFD |
37 | select HAVE_KVM_IRQFD | 37 | select HAVE_KVM_IRQFD |
38 | select KVM_ARM_VGIC_V3 | 38 | select KVM_ARM_VGIC_V3 |
39 | select KVM_ARM_PMU if HW_PERF_EVENTS | ||
39 | ---help--- | 40 | ---help--- |
40 | Support hosting virtualized guest machines. | 41 | Support hosting virtualized guest machines. |
41 | We don't support KVM with 16K page tables yet, due to the multiple | 42 | We don't support KVM with 16K page tables yet, due to the multiple |
@@ -48,6 +49,12 @@ config KVM_ARM_HOST | |||
48 | ---help--- | 49 | ---help--- |
49 | Provides host support for ARM processors. | 50 | Provides host support for ARM processors. |
50 | 51 | ||
52 | config KVM_ARM_PMU | ||
53 | bool | ||
54 | ---help--- | ||
55 | Adds support for a virtual Performance Monitoring Unit (PMU) in | ||
56 | virtual machines. | ||
57 | |||
51 | source drivers/vhost/Kconfig | 58 | source drivers/vhost/Kconfig |
52 | 59 | ||
53 | endif # VIRTUALIZATION | 60 | endif # VIRTUALIZATION |
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index caee9ee8e12a..122cff482ac4 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile | |||
@@ -26,3 +26,4 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o | |||
26 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o | 26 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o |
27 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o | 27 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o |
28 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o | 28 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o |
29 | kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o | ||
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 9e54ad7c240a..32fad75bb9ff 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c | |||
@@ -380,3 +380,54 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
380 | } | 380 | } |
381 | return 0; | 381 | return 0; |
382 | } | 382 | } |
383 | |||
384 | int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, | ||
385 | struct kvm_device_attr *attr) | ||
386 | { | ||
387 | int ret; | ||
388 | |||
389 | switch (attr->group) { | ||
390 | case KVM_ARM_VCPU_PMU_V3_CTRL: | ||
391 | ret = kvm_arm_pmu_v3_set_attr(vcpu, attr); | ||
392 | break; | ||
393 | default: | ||
394 | ret = -ENXIO; | ||
395 | break; | ||
396 | } | ||
397 | |||
398 | return ret; | ||
399 | } | ||
400 | |||
401 | int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, | ||
402 | struct kvm_device_attr *attr) | ||
403 | { | ||
404 | int ret; | ||
405 | |||
406 | switch (attr->group) { | ||
407 | case KVM_ARM_VCPU_PMU_V3_CTRL: | ||
408 | ret = kvm_arm_pmu_v3_get_attr(vcpu, attr); | ||
409 | break; | ||
410 | default: | ||
411 | ret = -ENXIO; | ||
412 | break; | ||
413 | } | ||
414 | |||
415 | return ret; | ||
416 | } | ||
417 | |||
418 | int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, | ||
419 | struct kvm_device_attr *attr) | ||
420 | { | ||
421 | int ret; | ||
422 | |||
423 | switch (attr->group) { | ||
424 | case KVM_ARM_VCPU_PMU_V3_CTRL: | ||
425 | ret = kvm_arm_pmu_v3_has_attr(vcpu, attr); | ||
426 | break; | ||
427 | default: | ||
428 | ret = -ENXIO; | ||
429 | break; | ||
430 | } | ||
431 | |||
432 | return ret; | ||
433 | } | ||
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index d073b5a216f7..7d8747c6427c 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S | |||
@@ -87,26 +87,13 @@ __do_hyp_init: | |||
87 | #endif | 87 | #endif |
88 | /* | 88 | /* |
89 | * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in | 89 | * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in |
90 | * TCR_EL2 and VTCR_EL2. | 90 | * TCR_EL2. |
91 | */ | 91 | */ |
92 | mrs x5, ID_AA64MMFR0_EL1 | 92 | mrs x5, ID_AA64MMFR0_EL1 |
93 | bfi x4, x5, #16, #3 | 93 | bfi x4, x5, #16, #3 |
94 | 94 | ||
95 | msr tcr_el2, x4 | 95 | msr tcr_el2, x4 |
96 | 96 | ||
97 | ldr x4, =VTCR_EL2_FLAGS | ||
98 | bfi x4, x5, #16, #3 | ||
99 | /* | ||
100 | * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS bit in | ||
101 | * VTCR_EL2. | ||
102 | */ | ||
103 | mrs x5, ID_AA64MMFR1_EL1 | ||
104 | ubfx x5, x5, #5, #1 | ||
105 | lsl x5, x5, #VTCR_EL2_VS | ||
106 | orr x4, x4, x5 | ||
107 | |||
108 | msr vtcr_el2, x4 | ||
109 | |||
110 | mrs x4, mair_el1 | 97 | mrs x4, mair_el1 |
111 | msr mair_el2, x4 | 98 | msr mair_el2, x4 |
112 | isb | 99 | isb |
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 0ccdcbbef3c2..0689a74e6ba0 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S | |||
@@ -17,7 +17,9 @@ | |||
17 | 17 | ||
18 | #include <linux/linkage.h> | 18 | #include <linux/linkage.h> |
19 | 19 | ||
20 | #include <asm/alternative.h> | ||
20 | #include <asm/assembler.h> | 21 | #include <asm/assembler.h> |
22 | #include <asm/cpufeature.h> | ||
21 | 23 | ||
22 | /* | 24 | /* |
23 | * u64 kvm_call_hyp(void *hypfn, ...); | 25 | * u64 kvm_call_hyp(void *hypfn, ...); |
@@ -38,6 +40,11 @@ | |||
38 | * arch/arm64/kernel/hyp_stub.S. | 40 | * arch/arm64/kernel/hyp_stub.S. |
39 | */ | 41 | */ |
40 | ENTRY(kvm_call_hyp) | 42 | ENTRY(kvm_call_hyp) |
43 | alternative_if_not ARM64_HAS_VIRT_HOST_EXTN | ||
41 | hvc #0 | 44 | hvc #0 |
42 | ret | 45 | ret |
46 | alternative_else | ||
47 | b __vhe_hyp_call | ||
48 | nop | ||
49 | alternative_endif | ||
43 | ENDPROC(kvm_call_hyp) | 50 | ENDPROC(kvm_call_hyp) |
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 826032bc3945..b6a8fc5ad1af 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile | |||
@@ -2,9 +2,12 @@ | |||
2 | # Makefile for Kernel-based Virtual Machine module, HYP part | 2 | # Makefile for Kernel-based Virtual Machine module, HYP part |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o | 5 | KVM=../../../../virt/kvm |
6 | |||
7 | obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o | ||
8 | obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o | ||
9 | |||
6 | obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o | 10 | obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o |
7 | obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o | ||
8 | obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o | 11 | obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o |
9 | obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o | 12 | obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o |
10 | obj-$(CONFIG_KVM_ARM_HOST) += entry.o | 13 | obj-$(CONFIG_KVM_ARM_HOST) += entry.o |
@@ -12,3 +15,4 @@ obj-$(CONFIG_KVM_ARM_HOST) += switch.o | |||
12 | obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o | 15 | obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o |
13 | obj-$(CONFIG_KVM_ARM_HOST) += tlb.o | 16 | obj-$(CONFIG_KVM_ARM_HOST) += tlb.o |
14 | obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o | 17 | obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o |
18 | obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o | ||
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c index c9c1e97501a9..053cf8b057c1 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/debug-sr.c | |||
@@ -19,9 +19,7 @@ | |||
19 | #include <linux/kvm_host.h> | 19 | #include <linux/kvm_host.h> |
20 | 20 | ||
21 | #include <asm/kvm_asm.h> | 21 | #include <asm/kvm_asm.h> |
22 | #include <asm/kvm_mmu.h> | 22 | #include <asm/kvm_hyp.h> |
23 | |||
24 | #include "hyp.h" | ||
25 | 23 | ||
26 | #define read_debug(r,n) read_sysreg(r##n##_el1) | 24 | #define read_debug(r,n) read_sysreg(r##n##_el1) |
27 | #define write_debug(v,r,n) write_sysreg(v, r##n##_el1) | 25 | #define write_debug(v,r,n) write_sysreg(v, r##n##_el1) |
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index fd0fbe9b7e6a..ce9e5e5f28cf 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S | |||
@@ -130,9 +130,15 @@ ENDPROC(__guest_exit) | |||
130 | ENTRY(__fpsimd_guest_restore) | 130 | ENTRY(__fpsimd_guest_restore) |
131 | stp x4, lr, [sp, #-16]! | 131 | stp x4, lr, [sp, #-16]! |
132 | 132 | ||
133 | alternative_if_not ARM64_HAS_VIRT_HOST_EXTN | ||
133 | mrs x2, cptr_el2 | 134 | mrs x2, cptr_el2 |
134 | bic x2, x2, #CPTR_EL2_TFP | 135 | bic x2, x2, #CPTR_EL2_TFP |
135 | msr cptr_el2, x2 | 136 | msr cptr_el2, x2 |
137 | alternative_else | ||
138 | mrs x2, cpacr_el1 | ||
139 | orr x2, x2, #CPACR_EL1_FPEN | ||
140 | msr cpacr_el1, x2 | ||
141 | alternative_endif | ||
136 | isb | 142 | isb |
137 | 143 | ||
138 | mrs x3, tpidr_el2 | 144 | mrs x3, tpidr_el2 |
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 93e8d983c0bd..3488894397ff 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S | |||
@@ -19,7 +19,6 @@ | |||
19 | 19 | ||
20 | #include <asm/alternative.h> | 20 | #include <asm/alternative.h> |
21 | #include <asm/assembler.h> | 21 | #include <asm/assembler.h> |
22 | #include <asm/asm-offsets.h> | ||
23 | #include <asm/cpufeature.h> | 22 | #include <asm/cpufeature.h> |
24 | #include <asm/kvm_arm.h> | 23 | #include <asm/kvm_arm.h> |
25 | #include <asm/kvm_asm.h> | 24 | #include <asm/kvm_asm.h> |
@@ -38,10 +37,42 @@ | |||
38 | ldp x0, x1, [sp], #16 | 37 | ldp x0, x1, [sp], #16 |
39 | .endm | 38 | .endm |
40 | 39 | ||
40 | .macro do_el2_call | ||
41 | /* | ||
42 | * Shuffle the parameters before calling the function | ||
43 | * pointed to in x0. Assumes parameters in x[1,2,3]. | ||
44 | */ | ||
45 | sub sp, sp, #16 | ||
46 | str lr, [sp] | ||
47 | mov lr, x0 | ||
48 | mov x0, x1 | ||
49 | mov x1, x2 | ||
50 | mov x2, x3 | ||
51 | blr lr | ||
52 | ldr lr, [sp] | ||
53 | add sp, sp, #16 | ||
54 | .endm | ||
55 | |||
56 | ENTRY(__vhe_hyp_call) | ||
57 | do_el2_call | ||
58 | /* | ||
59 | * We used to rely on having an exception return to get | ||
60 | * an implicit isb. In the E2H case, we don't have it anymore. | ||
61 | * rather than changing all the leaf functions, just do it here | ||
62 | * before returning to the rest of the kernel. | ||
63 | */ | ||
64 | isb | ||
65 | ret | ||
66 | ENDPROC(__vhe_hyp_call) | ||
67 | |||
41 | el1_sync: // Guest trapped into EL2 | 68 | el1_sync: // Guest trapped into EL2 |
42 | save_x0_to_x3 | 69 | save_x0_to_x3 |
43 | 70 | ||
71 | alternative_if_not ARM64_HAS_VIRT_HOST_EXTN | ||
44 | mrs x1, esr_el2 | 72 | mrs x1, esr_el2 |
73 | alternative_else | ||
74 | mrs x1, esr_el1 | ||
75 | alternative_endif | ||
45 | lsr x2, x1, #ESR_ELx_EC_SHIFT | 76 | lsr x2, x1, #ESR_ELx_EC_SHIFT |
46 | 77 | ||
47 | cmp x2, #ESR_ELx_EC_HVC64 | 78 | cmp x2, #ESR_ELx_EC_HVC64 |
@@ -58,19 +89,13 @@ el1_sync: // Guest trapped into EL2 | |||
58 | mrs x0, vbar_el2 | 89 | mrs x0, vbar_el2 |
59 | b 2f | 90 | b 2f |
60 | 91 | ||
61 | 1: stp lr, xzr, [sp, #-16]! | 92 | 1: |
62 | |||
63 | /* | 93 | /* |
64 | * Compute the function address in EL2, and shuffle the parameters. | 94 | * Perform the EL2 call |
65 | */ | 95 | */ |
66 | kern_hyp_va x0 | 96 | kern_hyp_va x0 |
67 | mov lr, x0 | 97 | do_el2_call |
68 | mov x0, x1 | ||
69 | mov x1, x2 | ||
70 | mov x2, x3 | ||
71 | blr lr | ||
72 | 98 | ||
73 | ldp lr, xzr, [sp], #16 | ||
74 | 2: eret | 99 | 2: eret |
75 | 100 | ||
76 | el1_trap: | 101 | el1_trap: |
@@ -83,72 +108,10 @@ el1_trap: | |||
83 | cmp x2, #ESR_ELx_EC_FP_ASIMD | 108 | cmp x2, #ESR_ELx_EC_FP_ASIMD |
84 | b.eq __fpsimd_guest_restore | 109 | b.eq __fpsimd_guest_restore |
85 | 110 | ||
86 | cmp x2, #ESR_ELx_EC_DABT_LOW | 111 | mrs x0, tpidr_el2 |
87 | mov x0, #ESR_ELx_EC_IABT_LOW | ||
88 | ccmp x2, x0, #4, ne | ||
89 | b.ne 1f // Not an abort we care about | ||
90 | |||
91 | /* This is an abort. Check for permission fault */ | ||
92 | alternative_if_not ARM64_WORKAROUND_834220 | ||
93 | and x2, x1, #ESR_ELx_FSC_TYPE | ||
94 | cmp x2, #FSC_PERM | ||
95 | b.ne 1f // Not a permission fault | ||
96 | alternative_else | ||
97 | nop // Use the permission fault path to | ||
98 | nop // check for a valid S1 translation, | ||
99 | nop // regardless of the ESR value. | ||
100 | alternative_endif | ||
101 | |||
102 | /* | ||
103 | * Check for Stage-1 page table walk, which is guaranteed | ||
104 | * to give a valid HPFAR_EL2. | ||
105 | */ | ||
106 | tbnz x1, #7, 1f // S1PTW is set | ||
107 | |||
108 | /* Preserve PAR_EL1 */ | ||
109 | mrs x3, par_el1 | ||
110 | stp x3, xzr, [sp, #-16]! | ||
111 | |||
112 | /* | ||
113 | * Permission fault, HPFAR_EL2 is invalid. | ||
114 | * Resolve the IPA the hard way using the guest VA. | ||
115 | * Stage-1 translation already validated the memory access rights. | ||
116 | * As such, we can use the EL1 translation regime, and don't have | ||
117 | * to distinguish between EL0 and EL1 access. | ||
118 | */ | ||
119 | mrs x2, far_el2 | ||
120 | at s1e1r, x2 | ||
121 | isb | ||
122 | |||
123 | /* Read result */ | ||
124 | mrs x3, par_el1 | ||
125 | ldp x0, xzr, [sp], #16 // Restore PAR_EL1 from the stack | ||
126 | msr par_el1, x0 | ||
127 | tbnz x3, #0, 3f // Bail out if we failed the translation | ||
128 | ubfx x3, x3, #12, #36 // Extract IPA | ||
129 | lsl x3, x3, #4 // and present it like HPFAR | ||
130 | b 2f | ||
131 | |||
132 | 1: mrs x3, hpfar_el2 | ||
133 | mrs x2, far_el2 | ||
134 | |||
135 | 2: mrs x0, tpidr_el2 | ||
136 | str w1, [x0, #VCPU_ESR_EL2] | ||
137 | str x2, [x0, #VCPU_FAR_EL2] | ||
138 | str x3, [x0, #VCPU_HPFAR_EL2] | ||
139 | |||
140 | mov x1, #ARM_EXCEPTION_TRAP | 112 | mov x1, #ARM_EXCEPTION_TRAP |
141 | b __guest_exit | 113 | b __guest_exit |
142 | 114 | ||
143 | /* | ||
144 | * Translation failed. Just return to the guest and | ||
145 | * let it fault again. Another CPU is probably playing | ||
146 | * behind our back. | ||
147 | */ | ||
148 | 3: restore_x0_to_x3 | ||
149 | |||
150 | eret | ||
151 | |||
152 | el1_irq: | 115 | el1_irq: |
153 | save_x0_to_x3 | 116 | save_x0_to_x3 |
154 | mrs x0, tpidr_el2 | 117 | mrs x0, tpidr_el2 |
diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h deleted file mode 100644 index fb275178b6af..000000000000 --- a/arch/arm64/kvm/hyp/hyp.h +++ /dev/null | |||
@@ -1,90 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015 - ARM Ltd | ||
3 | * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
16 | */ | ||
17 | |||
18 | #ifndef __ARM64_KVM_HYP_H__ | ||
19 | #define __ARM64_KVM_HYP_H__ | ||
20 | |||
21 | #include <linux/compiler.h> | ||
22 | #include <linux/kvm_host.h> | ||
23 | #include <asm/kvm_mmu.h> | ||
24 | #include <asm/sysreg.h> | ||
25 | |||
26 | #define __hyp_text __section(.hyp.text) notrace | ||
27 | |||
28 | #define kern_hyp_va(v) (typeof(v))((unsigned long)(v) & HYP_PAGE_OFFSET_MASK) | ||
29 | #define hyp_kern_va(v) (typeof(v))((unsigned long)(v) - HYP_PAGE_OFFSET \ | ||
30 | + PAGE_OFFSET) | ||
31 | |||
32 | /** | ||
33 | * hyp_alternate_select - Generates patchable code sequences that are | ||
34 | * used to switch between two implementations of a function, depending | ||
35 | * on the availability of a feature. | ||
36 | * | ||
37 | * @fname: a symbol name that will be defined as a function returning a | ||
38 | * function pointer whose type will match @orig and @alt | ||
39 | * @orig: A pointer to the default function, as returned by @fname when | ||
40 | * @cond doesn't hold | ||
41 | * @alt: A pointer to the alternate function, as returned by @fname | ||
42 | * when @cond holds | ||
43 | * @cond: a CPU feature (as described in asm/cpufeature.h) | ||
44 | */ | ||
45 | #define hyp_alternate_select(fname, orig, alt, cond) \ | ||
46 | typeof(orig) * __hyp_text fname(void) \ | ||
47 | { \ | ||
48 | typeof(alt) *val = orig; \ | ||
49 | asm volatile(ALTERNATIVE("nop \n", \ | ||
50 | "mov %0, %1 \n", \ | ||
51 | cond) \ | ||
52 | : "+r" (val) : "r" (alt)); \ | ||
53 | return val; \ | ||
54 | } | ||
55 | |||
56 | void __vgic_v2_save_state(struct kvm_vcpu *vcpu); | ||
57 | void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); | ||
58 | |||
59 | void __vgic_v3_save_state(struct kvm_vcpu *vcpu); | ||
60 | void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); | ||
61 | |||
62 | void __timer_save_state(struct kvm_vcpu *vcpu); | ||
63 | void __timer_restore_state(struct kvm_vcpu *vcpu); | ||
64 | |||
65 | void __sysreg_save_state(struct kvm_cpu_context *ctxt); | ||
66 | void __sysreg_restore_state(struct kvm_cpu_context *ctxt); | ||
67 | void __sysreg32_save_state(struct kvm_vcpu *vcpu); | ||
68 | void __sysreg32_restore_state(struct kvm_vcpu *vcpu); | ||
69 | |||
70 | void __debug_save_state(struct kvm_vcpu *vcpu, | ||
71 | struct kvm_guest_debug_arch *dbg, | ||
72 | struct kvm_cpu_context *ctxt); | ||
73 | void __debug_restore_state(struct kvm_vcpu *vcpu, | ||
74 | struct kvm_guest_debug_arch *dbg, | ||
75 | struct kvm_cpu_context *ctxt); | ||
76 | void __debug_cond_save_host_state(struct kvm_vcpu *vcpu); | ||
77 | void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu); | ||
78 | |||
79 | void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); | ||
80 | void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); | ||
81 | static inline bool __fpsimd_enabled(void) | ||
82 | { | ||
83 | return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP); | ||
84 | } | ||
85 | |||
86 | u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); | ||
87 | void __noreturn __hyp_do_panic(unsigned long, ...); | ||
88 | |||
89 | #endif /* __ARM64_KVM_HYP_H__ */ | ||
90 | |||
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c new file mode 100644 index 000000000000..bfc54fd82797 --- /dev/null +++ b/arch/arm64/kvm/hyp/s2-setup.c | |||
@@ -0,0 +1,43 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2016 - ARM Ltd | ||
3 | * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
16 | */ | ||
17 | |||
18 | #include <linux/types.h> | ||
19 | #include <asm/kvm_arm.h> | ||
20 | #include <asm/kvm_asm.h> | ||
21 | #include <asm/kvm_hyp.h> | ||
22 | |||
23 | void __hyp_text __init_stage2_translation(void) | ||
24 | { | ||
25 | u64 val = VTCR_EL2_FLAGS; | ||
26 | u64 tmp; | ||
27 | |||
28 | /* | ||
29 | * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS | ||
30 | * bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while | ||
31 | * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2... | ||
32 | */ | ||
33 | val |= (read_sysreg(id_aa64mmfr0_el1) & 7) << 16; | ||
34 | |||
35 | /* | ||
36 | * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS | ||
37 | * bit in VTCR_EL2. | ||
38 | */ | ||
39 | tmp = (read_sysreg(id_aa64mmfr1_el1) >> 4) & 0xf; | ||
40 | val |= (tmp == 2) ? VTCR_EL2_VS : 0; | ||
41 | |||
42 | write_sysreg(val, vtcr_el2); | ||
43 | } | ||
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index f0e7bdfae134..437cfad5e3d8 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c | |||
@@ -15,7 +15,53 @@ | |||
15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | 15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include "hyp.h" | 18 | #include <linux/types.h> |
19 | #include <asm/kvm_asm.h> | ||
20 | #include <asm/kvm_hyp.h> | ||
21 | |||
22 | static bool __hyp_text __fpsimd_enabled_nvhe(void) | ||
23 | { | ||
24 | return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP); | ||
25 | } | ||
26 | |||
27 | static bool __hyp_text __fpsimd_enabled_vhe(void) | ||
28 | { | ||
29 | return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN); | ||
30 | } | ||
31 | |||
32 | static hyp_alternate_select(__fpsimd_is_enabled, | ||
33 | __fpsimd_enabled_nvhe, __fpsimd_enabled_vhe, | ||
34 | ARM64_HAS_VIRT_HOST_EXTN); | ||
35 | |||
36 | bool __hyp_text __fpsimd_enabled(void) | ||
37 | { | ||
38 | return __fpsimd_is_enabled()(); | ||
39 | } | ||
40 | |||
41 | static void __hyp_text __activate_traps_vhe(void) | ||
42 | { | ||
43 | u64 val; | ||
44 | |||
45 | val = read_sysreg(cpacr_el1); | ||
46 | val |= CPACR_EL1_TTA; | ||
47 | val &= ~CPACR_EL1_FPEN; | ||
48 | write_sysreg(val, cpacr_el1); | ||
49 | |||
50 | write_sysreg(__kvm_hyp_vector, vbar_el1); | ||
51 | } | ||
52 | |||
53 | static void __hyp_text __activate_traps_nvhe(void) | ||
54 | { | ||
55 | u64 val; | ||
56 | |||
57 | val = CPTR_EL2_DEFAULT; | ||
58 | val |= CPTR_EL2_TTA | CPTR_EL2_TFP; | ||
59 | write_sysreg(val, cptr_el2); | ||
60 | } | ||
61 | |||
62 | static hyp_alternate_select(__activate_traps_arch, | ||
63 | __activate_traps_nvhe, __activate_traps_vhe, | ||
64 | ARM64_HAS_VIRT_HOST_EXTN); | ||
19 | 65 | ||
20 | static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) | 66 | static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) |
21 | { | 67 | { |
@@ -36,20 +82,37 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) | |||
36 | write_sysreg(val, hcr_el2); | 82 | write_sysreg(val, hcr_el2); |
37 | /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ | 83 | /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ |
38 | write_sysreg(1 << 15, hstr_el2); | 84 | write_sysreg(1 << 15, hstr_el2); |
85 | /* Make sure we trap PMU access from EL0 to EL2 */ | ||
86 | write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); | ||
87 | write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); | ||
88 | __activate_traps_arch()(); | ||
89 | } | ||
39 | 90 | ||
40 | val = CPTR_EL2_DEFAULT; | 91 | static void __hyp_text __deactivate_traps_vhe(void) |
41 | val |= CPTR_EL2_TTA | CPTR_EL2_TFP; | 92 | { |
42 | write_sysreg(val, cptr_el2); | 93 | extern char vectors[]; /* kernel exception vectors */ |
43 | 94 | ||
44 | write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); | 95 | write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); |
96 | write_sysreg(CPACR_EL1_FPEN, cpacr_el1); | ||
97 | write_sysreg(vectors, vbar_el1); | ||
45 | } | 98 | } |
46 | 99 | ||
47 | static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) | 100 | static void __hyp_text __deactivate_traps_nvhe(void) |
48 | { | 101 | { |
49 | write_sysreg(HCR_RW, hcr_el2); | 102 | write_sysreg(HCR_RW, hcr_el2); |
103 | write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); | ||
104 | } | ||
105 | |||
106 | static hyp_alternate_select(__deactivate_traps_arch, | ||
107 | __deactivate_traps_nvhe, __deactivate_traps_vhe, | ||
108 | ARM64_HAS_VIRT_HOST_EXTN); | ||
109 | |||
110 | static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) | ||
111 | { | ||
112 | __deactivate_traps_arch()(); | ||
50 | write_sysreg(0, hstr_el2); | 113 | write_sysreg(0, hstr_el2); |
51 | write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2); | 114 | write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2); |
52 | write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); | 115 | write_sysreg(0, pmuserenr_el0); |
53 | } | 116 | } |
54 | 117 | ||
55 | static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) | 118 | static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) |
@@ -89,6 +152,86 @@ static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) | |||
89 | __vgic_call_restore_state()(vcpu); | 152 | __vgic_call_restore_state()(vcpu); |
90 | } | 153 | } |
91 | 154 | ||
155 | static bool __hyp_text __true_value(void) | ||
156 | { | ||
157 | return true; | ||
158 | } | ||
159 | |||
160 | static bool __hyp_text __false_value(void) | ||
161 | { | ||
162 | return false; | ||
163 | } | ||
164 | |||
165 | static hyp_alternate_select(__check_arm_834220, | ||
166 | __false_value, __true_value, | ||
167 | ARM64_WORKAROUND_834220); | ||
168 | |||
169 | static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) | ||
170 | { | ||
171 | u64 par, tmp; | ||
172 | |||
173 | /* | ||
174 | * Resolve the IPA the hard way using the guest VA. | ||
175 | * | ||
176 | * Stage-1 translation already validated the memory access | ||
177 | * rights. As such, we can use the EL1 translation regime, and | ||
178 | * don't have to distinguish between EL0 and EL1 access. | ||
179 | * | ||
180 | * We do need to save/restore PAR_EL1 though, as we haven't | ||
181 | * saved the guest context yet, and we may return early... | ||
182 | */ | ||
183 | par = read_sysreg(par_el1); | ||
184 | asm volatile("at s1e1r, %0" : : "r" (far)); | ||
185 | isb(); | ||
186 | |||
187 | tmp = read_sysreg(par_el1); | ||
188 | write_sysreg(par, par_el1); | ||
189 | |||
190 | if (unlikely(tmp & 1)) | ||
191 | return false; /* Translation failed, back to guest */ | ||
192 | |||
193 | /* Convert PAR to HPFAR format */ | ||
194 | *hpfar = ((tmp >> 12) & ((1UL << 36) - 1)) << 4; | ||
195 | return true; | ||
196 | } | ||
197 | |||
198 | static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) | ||
199 | { | ||
200 | u64 esr = read_sysreg_el2(esr); | ||
201 | u8 ec = esr >> ESR_ELx_EC_SHIFT; | ||
202 | u64 hpfar, far; | ||
203 | |||
204 | vcpu->arch.fault.esr_el2 = esr; | ||
205 | |||
206 | if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) | ||
207 | return true; | ||
208 | |||
209 | far = read_sysreg_el2(far); | ||
210 | |||
211 | /* | ||
212 | * The HPFAR can be invalid if the stage 2 fault did not | ||
213 | * happen during a stage 1 page table walk (the ESR_EL2.S1PTW | ||
214 | * bit is clear) and one of the two following cases are true: | ||
215 | * 1. The fault was due to a permission fault | ||
216 | * 2. The processor carries errata 834220 | ||
217 | * | ||
218 | * Therefore, for all non S1PTW faults where we either have a | ||
219 | * permission fault or the errata workaround is enabled, we | ||
220 | * resolve the IPA using the AT instruction. | ||
221 | */ | ||
222 | if (!(esr & ESR_ELx_S1PTW) && | ||
223 | (__check_arm_834220()() || (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { | ||
224 | if (!__translate_far_to_hpfar(far, &hpfar)) | ||
225 | return false; | ||
226 | } else { | ||
227 | hpfar = read_sysreg(hpfar_el2); | ||
228 | } | ||
229 | |||
230 | vcpu->arch.fault.far_el2 = far; | ||
231 | vcpu->arch.fault.hpfar_el2 = hpfar; | ||
232 | return true; | ||
233 | } | ||
234 | |||
92 | static int __hyp_text __guest_run(struct kvm_vcpu *vcpu) | 235 | static int __hyp_text __guest_run(struct kvm_vcpu *vcpu) |
93 | { | 236 | { |
94 | struct kvm_cpu_context *host_ctxt; | 237 | struct kvm_cpu_context *host_ctxt; |
@@ -102,7 +245,7 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu) | |||
102 | host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); | 245 | host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); |
103 | guest_ctxt = &vcpu->arch.ctxt; | 246 | guest_ctxt = &vcpu->arch.ctxt; |
104 | 247 | ||
105 | __sysreg_save_state(host_ctxt); | 248 | __sysreg_save_host_state(host_ctxt); |
106 | __debug_cond_save_host_state(vcpu); | 249 | __debug_cond_save_host_state(vcpu); |
107 | 250 | ||
108 | __activate_traps(vcpu); | 251 | __activate_traps(vcpu); |
@@ -116,16 +259,20 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu) | |||
116 | * to Cortex-A57 erratum #852523. | 259 | * to Cortex-A57 erratum #852523. |
117 | */ | 260 | */ |
118 | __sysreg32_restore_state(vcpu); | 261 | __sysreg32_restore_state(vcpu); |
119 | __sysreg_restore_state(guest_ctxt); | 262 | __sysreg_restore_guest_state(guest_ctxt); |
120 | __debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); | 263 | __debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); |
121 | 264 | ||
122 | /* Jump in the fire! */ | 265 | /* Jump in the fire! */ |
266 | again: | ||
123 | exit_code = __guest_enter(vcpu, host_ctxt); | 267 | exit_code = __guest_enter(vcpu, host_ctxt); |
124 | /* And we're baaack! */ | 268 | /* And we're baaack! */ |
125 | 269 | ||
270 | if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu)) | ||
271 | goto again; | ||
272 | |||
126 | fp_enabled = __fpsimd_enabled(); | 273 | fp_enabled = __fpsimd_enabled(); |
127 | 274 | ||
128 | __sysreg_save_state(guest_ctxt); | 275 | __sysreg_save_guest_state(guest_ctxt); |
129 | __sysreg32_save_state(vcpu); | 276 | __sysreg32_save_state(vcpu); |
130 | __timer_save_state(vcpu); | 277 | __timer_save_state(vcpu); |
131 | __vgic_save_state(vcpu); | 278 | __vgic_save_state(vcpu); |
@@ -133,7 +280,7 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu) | |||
133 | __deactivate_traps(vcpu); | 280 | __deactivate_traps(vcpu); |
134 | __deactivate_vm(vcpu); | 281 | __deactivate_vm(vcpu); |
135 | 282 | ||
136 | __sysreg_restore_state(host_ctxt); | 283 | __sysreg_restore_host_state(host_ctxt); |
137 | 284 | ||
138 | if (fp_enabled) { | 285 | if (fp_enabled) { |
139 | __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); | 286 | __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); |
@@ -150,11 +297,34 @@ __alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu); | |||
150 | 297 | ||
151 | static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; | 298 | static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; |
152 | 299 | ||
153 | void __hyp_text __noreturn __hyp_panic(void) | 300 | static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par) |
154 | { | 301 | { |
155 | unsigned long str_va = (unsigned long)__hyp_panic_string; | 302 | unsigned long str_va = (unsigned long)__hyp_panic_string; |
156 | u64 spsr = read_sysreg(spsr_el2); | 303 | |
157 | u64 elr = read_sysreg(elr_el2); | 304 | __hyp_do_panic(hyp_kern_va(str_va), |
305 | spsr, elr, | ||
306 | read_sysreg(esr_el2), read_sysreg_el2(far), | ||
307 | read_sysreg(hpfar_el2), par, | ||
308 | (void *)read_sysreg(tpidr_el2)); | ||
309 | } | ||
310 | |||
311 | static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par) | ||
312 | { | ||
313 | panic(__hyp_panic_string, | ||
314 | spsr, elr, | ||
315 | read_sysreg_el2(esr), read_sysreg_el2(far), | ||
316 | read_sysreg(hpfar_el2), par, | ||
317 | (void *)read_sysreg(tpidr_el2)); | ||
318 | } | ||
319 | |||
320 | static hyp_alternate_select(__hyp_call_panic, | ||
321 | __hyp_call_panic_nvhe, __hyp_call_panic_vhe, | ||
322 | ARM64_HAS_VIRT_HOST_EXTN); | ||
323 | |||
324 | void __hyp_text __noreturn __hyp_panic(void) | ||
325 | { | ||
326 | u64 spsr = read_sysreg_el2(spsr); | ||
327 | u64 elr = read_sysreg_el2(elr); | ||
158 | u64 par = read_sysreg(par_el1); | 328 | u64 par = read_sysreg(par_el1); |
159 | 329 | ||
160 | if (read_sysreg(vttbr_el2)) { | 330 | if (read_sysreg(vttbr_el2)) { |
@@ -165,15 +335,11 @@ void __hyp_text __noreturn __hyp_panic(void) | |||
165 | host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); | 335 | host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); |
166 | __deactivate_traps(vcpu); | 336 | __deactivate_traps(vcpu); |
167 | __deactivate_vm(vcpu); | 337 | __deactivate_vm(vcpu); |
168 | __sysreg_restore_state(host_ctxt); | 338 | __sysreg_restore_host_state(host_ctxt); |
169 | } | 339 | } |
170 | 340 | ||
171 | /* Call panic for real */ | 341 | /* Call panic for real */ |
172 | __hyp_do_panic(hyp_kern_va(str_va), | 342 | __hyp_call_panic()(spsr, elr, par); |
173 | spsr, elr, | ||
174 | read_sysreg(esr_el2), read_sysreg(far_el2), | ||
175 | read_sysreg(hpfar_el2), par, | ||
176 | (void *)read_sysreg(tpidr_el2)); | ||
177 | 343 | ||
178 | unreachable(); | 344 | unreachable(); |
179 | } | 345 | } |
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 425630980229..0f7c40eb3f53 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c | |||
@@ -19,75 +19,122 @@ | |||
19 | #include <linux/kvm_host.h> | 19 | #include <linux/kvm_host.h> |
20 | 20 | ||
21 | #include <asm/kvm_asm.h> | 21 | #include <asm/kvm_asm.h> |
22 | #include <asm/kvm_mmu.h> | 22 | #include <asm/kvm_hyp.h> |
23 | 23 | ||
24 | #include "hyp.h" | 24 | /* Yes, this does nothing, on purpose */ |
25 | static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { } | ||
25 | 26 | ||
26 | /* ctxt is already in the HYP VA space */ | 27 | /* |
27 | void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) | 28 | * Non-VHE: Both host and guest must save everything. |
29 | * | ||
30 | * VHE: Host must save tpidr*_el[01], actlr_el1, sp0, pc, pstate, and | ||
31 | * guest must save everything. | ||
32 | */ | ||
33 | |||
34 | static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt) | ||
28 | { | 35 | { |
29 | ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2); | ||
30 | ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); | ||
31 | ctxt->sys_regs[SCTLR_EL1] = read_sysreg(sctlr_el1); | ||
32 | ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); | 36 | ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); |
33 | ctxt->sys_regs[CPACR_EL1] = read_sysreg(cpacr_el1); | ||
34 | ctxt->sys_regs[TTBR0_EL1] = read_sysreg(ttbr0_el1); | ||
35 | ctxt->sys_regs[TTBR1_EL1] = read_sysreg(ttbr1_el1); | ||
36 | ctxt->sys_regs[TCR_EL1] = read_sysreg(tcr_el1); | ||
37 | ctxt->sys_regs[ESR_EL1] = read_sysreg(esr_el1); | ||
38 | ctxt->sys_regs[AFSR0_EL1] = read_sysreg(afsr0_el1); | ||
39 | ctxt->sys_regs[AFSR1_EL1] = read_sysreg(afsr1_el1); | ||
40 | ctxt->sys_regs[FAR_EL1] = read_sysreg(far_el1); | ||
41 | ctxt->sys_regs[MAIR_EL1] = read_sysreg(mair_el1); | ||
42 | ctxt->sys_regs[VBAR_EL1] = read_sysreg(vbar_el1); | ||
43 | ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg(contextidr_el1); | ||
44 | ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); | 37 | ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); |
45 | ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); | 38 | ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); |
46 | ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); | 39 | ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); |
47 | ctxt->sys_regs[AMAIR_EL1] = read_sysreg(amair_el1); | 40 | ctxt->gp_regs.regs.sp = read_sysreg(sp_el0); |
48 | ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg(cntkctl_el1); | 41 | ctxt->gp_regs.regs.pc = read_sysreg_el2(elr); |
42 | ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr); | ||
43 | } | ||
44 | |||
45 | static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) | ||
46 | { | ||
47 | ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2); | ||
48 | ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); | ||
49 | ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr); | ||
50 | ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(cpacr); | ||
51 | ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(ttbr0); | ||
52 | ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(ttbr1); | ||
53 | ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(tcr); | ||
54 | ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(esr); | ||
55 | ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(afsr0); | ||
56 | ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(afsr1); | ||
57 | ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(far); | ||
58 | ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(mair); | ||
59 | ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(vbar); | ||
60 | ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(contextidr); | ||
61 | ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(amair); | ||
62 | ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(cntkctl); | ||
49 | ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); | 63 | ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); |
50 | ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); | 64 | ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); |
51 | 65 | ||
52 | ctxt->gp_regs.regs.sp = read_sysreg(sp_el0); | ||
53 | ctxt->gp_regs.regs.pc = read_sysreg(elr_el2); | ||
54 | ctxt->gp_regs.regs.pstate = read_sysreg(spsr_el2); | ||
55 | ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); | 66 | ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); |
56 | ctxt->gp_regs.elr_el1 = read_sysreg(elr_el1); | 67 | ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr); |
57 | ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg(spsr_el1); | 68 | ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr); |
69 | } | ||
70 | |||
71 | static hyp_alternate_select(__sysreg_call_save_host_state, | ||
72 | __sysreg_save_state, __sysreg_do_nothing, | ||
73 | ARM64_HAS_VIRT_HOST_EXTN); | ||
74 | |||
75 | void __hyp_text __sysreg_save_host_state(struct kvm_cpu_context *ctxt) | ||
76 | { | ||
77 | __sysreg_call_save_host_state()(ctxt); | ||
78 | __sysreg_save_common_state(ctxt); | ||
79 | } | ||
80 | |||
81 | void __hyp_text __sysreg_save_guest_state(struct kvm_cpu_context *ctxt) | ||
82 | { | ||
83 | __sysreg_save_state(ctxt); | ||
84 | __sysreg_save_common_state(ctxt); | ||
58 | } | 85 | } |
59 | 86 | ||
60 | void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) | 87 | static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) |
61 | { | 88 | { |
62 | write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); | ||
63 | write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); | ||
64 | write_sysreg(ctxt->sys_regs[SCTLR_EL1], sctlr_el1); | ||
65 | write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); | 89 | write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); |
66 | write_sysreg(ctxt->sys_regs[CPACR_EL1], cpacr_el1); | ||
67 | write_sysreg(ctxt->sys_regs[TTBR0_EL1], ttbr0_el1); | ||
68 | write_sysreg(ctxt->sys_regs[TTBR1_EL1], ttbr1_el1); | ||
69 | write_sysreg(ctxt->sys_regs[TCR_EL1], tcr_el1); | ||
70 | write_sysreg(ctxt->sys_regs[ESR_EL1], esr_el1); | ||
71 | write_sysreg(ctxt->sys_regs[AFSR0_EL1], afsr0_el1); | ||
72 | write_sysreg(ctxt->sys_regs[AFSR1_EL1], afsr1_el1); | ||
73 | write_sysreg(ctxt->sys_regs[FAR_EL1], far_el1); | ||
74 | write_sysreg(ctxt->sys_regs[MAIR_EL1], mair_el1); | ||
75 | write_sysreg(ctxt->sys_regs[VBAR_EL1], vbar_el1); | ||
76 | write_sysreg(ctxt->sys_regs[CONTEXTIDR_EL1], contextidr_el1); | ||
77 | write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); | 90 | write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); |
78 | write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); | 91 | write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); |
79 | write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); | 92 | write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); |
80 | write_sysreg(ctxt->sys_regs[AMAIR_EL1], amair_el1); | 93 | write_sysreg(ctxt->gp_regs.regs.sp, sp_el0); |
81 | write_sysreg(ctxt->sys_regs[CNTKCTL_EL1], cntkctl_el1); | 94 | write_sysreg_el2(ctxt->gp_regs.regs.pc, elr); |
82 | write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); | 95 | write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr); |
83 | write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); | 96 | } |
84 | 97 | ||
85 | write_sysreg(ctxt->gp_regs.regs.sp, sp_el0); | 98 | static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) |
86 | write_sysreg(ctxt->gp_regs.regs.pc, elr_el2); | 99 | { |
87 | write_sysreg(ctxt->gp_regs.regs.pstate, spsr_el2); | 100 | write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); |
88 | write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); | 101 | write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); |
89 | write_sysreg(ctxt->gp_regs.elr_el1, elr_el1); | 102 | write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], sctlr); |
90 | write_sysreg(ctxt->gp_regs.spsr[KVM_SPSR_EL1], spsr_el1); | 103 | write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], cpacr); |
104 | write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], ttbr0); | ||
105 | write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], ttbr1); | ||
106 | write_sysreg_el1(ctxt->sys_regs[TCR_EL1], tcr); | ||
107 | write_sysreg_el1(ctxt->sys_regs[ESR_EL1], esr); | ||
108 | write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], afsr0); | ||
109 | write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], afsr1); | ||
110 | write_sysreg_el1(ctxt->sys_regs[FAR_EL1], far); | ||
111 | write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], mair); | ||
112 | write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], vbar); | ||
113 | write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],contextidr); | ||
114 | write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], amair); | ||
115 | write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], cntkctl); | ||
116 | write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); | ||
117 | write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); | ||
118 | |||
119 | write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); | ||
120 | write_sysreg_el1(ctxt->gp_regs.elr_el1, elr); | ||
121 | write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr); | ||
122 | } | ||
123 | |||
124 | static hyp_alternate_select(__sysreg_call_restore_host_state, | ||
125 | __sysreg_restore_state, __sysreg_do_nothing, | ||
126 | ARM64_HAS_VIRT_HOST_EXTN); | ||
127 | |||
128 | void __hyp_text __sysreg_restore_host_state(struct kvm_cpu_context *ctxt) | ||
129 | { | ||
130 | __sysreg_call_restore_host_state()(ctxt); | ||
131 | __sysreg_restore_common_state(ctxt); | ||
132 | } | ||
133 | |||
134 | void __hyp_text __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt) | ||
135 | { | ||
136 | __sysreg_restore_state(ctxt); | ||
137 | __sysreg_restore_common_state(ctxt); | ||
91 | } | 138 | } |
92 | 139 | ||
93 | void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) | 140 | void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) |
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index 2a7e0d838698..be8177cdd3bf 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c | |||
@@ -15,7 +15,7 @@ | |||
15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | 15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include "hyp.h" | 18 | #include <asm/kvm_hyp.h> |
19 | 19 | ||
20 | static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) | 20 | static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) |
21 | { | 21 | { |
diff --git a/arch/arm64/kvm/hyp/vgic-v2-sr.c b/arch/arm64/kvm/hyp/vgic-v2-sr.c deleted file mode 100644 index e71761238cfc..000000000000 --- a/arch/arm64/kvm/hyp/vgic-v2-sr.c +++ /dev/null | |||
@@ -1,84 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2012-2015 - ARM Ltd | ||
3 | * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
16 | */ | ||
17 | |||
18 | #include <linux/compiler.h> | ||
19 | #include <linux/irqchip/arm-gic.h> | ||
20 | #include <linux/kvm_host.h> | ||
21 | |||
22 | #include <asm/kvm_mmu.h> | ||
23 | |||
24 | #include "hyp.h" | ||
25 | |||
26 | /* vcpu is already in the HYP VA space */ | ||
27 | void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu) | ||
28 | { | ||
29 | struct kvm *kvm = kern_hyp_va(vcpu->kvm); | ||
30 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; | ||
31 | struct vgic_dist *vgic = &kvm->arch.vgic; | ||
32 | void __iomem *base = kern_hyp_va(vgic->vctrl_base); | ||
33 | u32 eisr0, eisr1, elrsr0, elrsr1; | ||
34 | int i, nr_lr; | ||
35 | |||
36 | if (!base) | ||
37 | return; | ||
38 | |||
39 | nr_lr = vcpu->arch.vgic_cpu.nr_lr; | ||
40 | cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR); | ||
41 | cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR); | ||
42 | eisr0 = readl_relaxed(base + GICH_EISR0); | ||
43 | elrsr0 = readl_relaxed(base + GICH_ELRSR0); | ||
44 | if (unlikely(nr_lr > 32)) { | ||
45 | eisr1 = readl_relaxed(base + GICH_EISR1); | ||
46 | elrsr1 = readl_relaxed(base + GICH_ELRSR1); | ||
47 | } else { | ||
48 | eisr1 = elrsr1 = 0; | ||
49 | } | ||
50 | #ifdef CONFIG_CPU_BIG_ENDIAN | ||
51 | cpu_if->vgic_eisr = ((u64)eisr0 << 32) | eisr1; | ||
52 | cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1; | ||
53 | #else | ||
54 | cpu_if->vgic_eisr = ((u64)eisr1 << 32) | eisr0; | ||
55 | cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0; | ||
56 | #endif | ||
57 | cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); | ||
58 | |||
59 | writel_relaxed(0, base + GICH_HCR); | ||
60 | |||
61 | for (i = 0; i < nr_lr; i++) | ||
62 | cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); | ||
63 | } | ||
64 | |||
65 | /* vcpu is already in the HYP VA space */ | ||
66 | void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu) | ||
67 | { | ||
68 | struct kvm *kvm = kern_hyp_va(vcpu->kvm); | ||
69 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; | ||
70 | struct vgic_dist *vgic = &kvm->arch.vgic; | ||
71 | void __iomem *base = kern_hyp_va(vgic->vctrl_base); | ||
72 | int i, nr_lr; | ||
73 | |||
74 | if (!base) | ||
75 | return; | ||
76 | |||
77 | writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); | ||
78 | writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR); | ||
79 | writel_relaxed(cpu_if->vgic_apr, base + GICH_APR); | ||
80 | |||
81 | nr_lr = vcpu->arch.vgic_cpu.nr_lr; | ||
82 | for (i = 0; i < nr_lr; i++) | ||
83 | writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4)); | ||
84 | } | ||
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 5dd2a26444ec..fff7cd42b3a3 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c | |||
@@ -19,9 +19,7 @@ | |||
19 | #include <linux/irqchip/arm-gic-v3.h> | 19 | #include <linux/irqchip/arm-gic-v3.h> |
20 | #include <linux/kvm_host.h> | 20 | #include <linux/kvm_host.h> |
21 | 21 | ||
22 | #include <asm/kvm_mmu.h> | 22 | #include <asm/kvm_hyp.h> |
23 | |||
24 | #include "hyp.h" | ||
25 | 23 | ||
26 | #define vtr_to_max_lr_idx(v) ((v) & 0xf) | 24 | #define vtr_to_max_lr_idx(v) ((v) & 0xf) |
27 | #define vtr_to_nr_pri_bits(v) (((u32)(v) >> 29) + 1) | 25 | #define vtr_to_nr_pri_bits(v) (((u32)(v) >> 29) + 1) |
@@ -39,12 +37,133 @@ | |||
39 | asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\ | 37 | asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\ |
40 | } while (0) | 38 | } while (0) |
41 | 39 | ||
42 | /* vcpu is already in the HYP VA space */ | 40 | static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) |
41 | { | ||
42 | switch (lr & 0xf) { | ||
43 | case 0: | ||
44 | return read_gicreg(ICH_LR0_EL2); | ||
45 | case 1: | ||
46 | return read_gicreg(ICH_LR1_EL2); | ||
47 | case 2: | ||
48 | return read_gicreg(ICH_LR2_EL2); | ||
49 | case 3: | ||
50 | return read_gicreg(ICH_LR3_EL2); | ||
51 | case 4: | ||
52 | return read_gicreg(ICH_LR4_EL2); | ||
53 | case 5: | ||
54 | return read_gicreg(ICH_LR5_EL2); | ||
55 | case 6: | ||
56 | return read_gicreg(ICH_LR6_EL2); | ||
57 | case 7: | ||
58 | return read_gicreg(ICH_LR7_EL2); | ||
59 | case 8: | ||
60 | return read_gicreg(ICH_LR8_EL2); | ||
61 | case 9: | ||
62 | return read_gicreg(ICH_LR9_EL2); | ||
63 | case 10: | ||
64 | return read_gicreg(ICH_LR10_EL2); | ||
65 | case 11: | ||
66 | return read_gicreg(ICH_LR11_EL2); | ||
67 | case 12: | ||
68 | return read_gicreg(ICH_LR12_EL2); | ||
69 | case 13: | ||
70 | return read_gicreg(ICH_LR13_EL2); | ||
71 | case 14: | ||
72 | return read_gicreg(ICH_LR14_EL2); | ||
73 | case 15: | ||
74 | return read_gicreg(ICH_LR15_EL2); | ||
75 | } | ||
76 | |||
77 | unreachable(); | ||
78 | } | ||
79 | |||
80 | static void __hyp_text __gic_v3_set_lr(u64 val, int lr) | ||
81 | { | ||
82 | switch (lr & 0xf) { | ||
83 | case 0: | ||
84 | write_gicreg(val, ICH_LR0_EL2); | ||
85 | break; | ||
86 | case 1: | ||
87 | write_gicreg(val, ICH_LR1_EL2); | ||
88 | break; | ||
89 | case 2: | ||
90 | write_gicreg(val, ICH_LR2_EL2); | ||
91 | break; | ||
92 | case 3: | ||
93 | write_gicreg(val, ICH_LR3_EL2); | ||
94 | break; | ||
95 | case 4: | ||
96 | write_gicreg(val, ICH_LR4_EL2); | ||
97 | break; | ||
98 | case 5: | ||
99 | write_gicreg(val, ICH_LR5_EL2); | ||
100 | break; | ||
101 | case 6: | ||
102 | write_gicreg(val, ICH_LR6_EL2); | ||
103 | break; | ||
104 | case 7: | ||
105 | write_gicreg(val, ICH_LR7_EL2); | ||
106 | break; | ||
107 | case 8: | ||
108 | write_gicreg(val, ICH_LR8_EL2); | ||
109 | break; | ||
110 | case 9: | ||
111 | write_gicreg(val, ICH_LR9_EL2); | ||
112 | break; | ||
113 | case 10: | ||
114 | write_gicreg(val, ICH_LR10_EL2); | ||
115 | break; | ||
116 | case 11: | ||
117 | write_gicreg(val, ICH_LR11_EL2); | ||
118 | break; | ||
119 | case 12: | ||
120 | write_gicreg(val, ICH_LR12_EL2); | ||
121 | break; | ||
122 | case 13: | ||
123 | write_gicreg(val, ICH_LR13_EL2); | ||
124 | break; | ||
125 | case 14: | ||
126 | write_gicreg(val, ICH_LR14_EL2); | ||
127 | break; | ||
128 | case 15: | ||
129 | write_gicreg(val, ICH_LR15_EL2); | ||
130 | break; | ||
131 | } | ||
132 | } | ||
133 | |||
134 | static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, int nr_lr) | ||
135 | { | ||
136 | struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; | ||
137 | int i; | ||
138 | bool expect_mi; | ||
139 | |||
140 | expect_mi = !!(cpu_if->vgic_hcr & ICH_HCR_UIE); | ||
141 | |||
142 | for (i = 0; i < nr_lr; i++) { | ||
143 | if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i))) | ||
144 | continue; | ||
145 | |||
146 | expect_mi |= (!(cpu_if->vgic_lr[i] & ICH_LR_HW) && | ||
147 | (cpu_if->vgic_lr[i] & ICH_LR_EOI)); | ||
148 | } | ||
149 | |||
150 | if (expect_mi) { | ||
151 | cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2); | ||
152 | |||
153 | if (cpu_if->vgic_misr & ICH_MISR_EOI) | ||
154 | cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2); | ||
155 | else | ||
156 | cpu_if->vgic_eisr = 0; | ||
157 | } else { | ||
158 | cpu_if->vgic_misr = 0; | ||
159 | cpu_if->vgic_eisr = 0; | ||
160 | } | ||
161 | } | ||
162 | |||
43 | void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) | 163 | void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) |
44 | { | 164 | { |
45 | struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; | 165 | struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; |
46 | u64 val; | 166 | u64 val; |
47 | u32 max_lr_idx, nr_pri_bits; | ||
48 | 167 | ||
49 | /* | 168 | /* |
50 | * Make sure stores to the GIC via the memory mapped interface | 169 | * Make sure stores to the GIC via the memory mapped interface |
@@ -53,68 +172,66 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) | |||
53 | dsb(st); | 172 | dsb(st); |
54 | 173 | ||
55 | cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); | 174 | cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); |
56 | cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2); | ||
57 | cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2); | ||
58 | cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2); | ||
59 | 175 | ||
60 | write_gicreg(0, ICH_HCR_EL2); | 176 | if (vcpu->arch.vgic_cpu.live_lrs) { |
61 | val = read_gicreg(ICH_VTR_EL2); | 177 | int i; |
62 | max_lr_idx = vtr_to_max_lr_idx(val); | 178 | u32 max_lr_idx, nr_pri_bits; |
63 | nr_pri_bits = vtr_to_nr_pri_bits(val); | ||
64 | 179 | ||
65 | switch (max_lr_idx) { | 180 | cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2); |
66 | case 15: | ||
67 | cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)] = read_gicreg(ICH_LR15_EL2); | ||
68 | case 14: | ||
69 | cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)] = read_gicreg(ICH_LR14_EL2); | ||
70 | case 13: | ||
71 | cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)] = read_gicreg(ICH_LR13_EL2); | ||
72 | case 12: | ||
73 | cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)] = read_gicreg(ICH_LR12_EL2); | ||
74 | case 11: | ||
75 | cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)] = read_gicreg(ICH_LR11_EL2); | ||
76 | case 10: | ||
77 | cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)] = read_gicreg(ICH_LR10_EL2); | ||
78 | case 9: | ||
79 | cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)] = read_gicreg(ICH_LR9_EL2); | ||
80 | case 8: | ||
81 | cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)] = read_gicreg(ICH_LR8_EL2); | ||
82 | case 7: | ||
83 | cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)] = read_gicreg(ICH_LR7_EL2); | ||
84 | case 6: | ||
85 | cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)] = read_gicreg(ICH_LR6_EL2); | ||
86 | case 5: | ||
87 | cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)] = read_gicreg(ICH_LR5_EL2); | ||
88 | case 4: | ||
89 | cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)] = read_gicreg(ICH_LR4_EL2); | ||
90 | case 3: | ||
91 | cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)] = read_gicreg(ICH_LR3_EL2); | ||
92 | case 2: | ||
93 | cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)] = read_gicreg(ICH_LR2_EL2); | ||
94 | case 1: | ||
95 | cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)] = read_gicreg(ICH_LR1_EL2); | ||
96 | case 0: | ||
97 | cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)] = read_gicreg(ICH_LR0_EL2); | ||
98 | } | ||
99 | 181 | ||
100 | switch (nr_pri_bits) { | 182 | write_gicreg(0, ICH_HCR_EL2); |
101 | case 7: | 183 | val = read_gicreg(ICH_VTR_EL2); |
102 | cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2); | 184 | max_lr_idx = vtr_to_max_lr_idx(val); |
103 | cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2); | 185 | nr_pri_bits = vtr_to_nr_pri_bits(val); |
104 | case 6: | ||
105 | cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2); | ||
106 | default: | ||
107 | cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2); | ||
108 | } | ||
109 | 186 | ||
110 | switch (nr_pri_bits) { | 187 | save_maint_int_state(vcpu, max_lr_idx + 1); |
111 | case 7: | 188 | |
112 | cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2); | 189 | for (i = 0; i <= max_lr_idx; i++) { |
113 | cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2); | 190 | if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i))) |
114 | case 6: | 191 | continue; |
115 | cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2); | 192 | |
116 | default: | 193 | if (cpu_if->vgic_elrsr & (1 << i)) { |
117 | cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2); | 194 | cpu_if->vgic_lr[i] &= ~ICH_LR_STATE; |
195 | continue; | ||
196 | } | ||
197 | |||
198 | cpu_if->vgic_lr[i] = __gic_v3_get_lr(i); | ||
199 | __gic_v3_set_lr(0, i); | ||
200 | } | ||
201 | |||
202 | switch (nr_pri_bits) { | ||
203 | case 7: | ||
204 | cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2); | ||
205 | cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2); | ||
206 | case 6: | ||
207 | cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2); | ||
208 | default: | ||
209 | cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2); | ||
210 | } | ||
211 | |||
212 | switch (nr_pri_bits) { | ||
213 | case 7: | ||
214 | cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2); | ||
215 | cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2); | ||
216 | case 6: | ||
217 | cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2); | ||
218 | default: | ||
219 | cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2); | ||
220 | } | ||
221 | |||
222 | vcpu->arch.vgic_cpu.live_lrs = 0; | ||
223 | } else { | ||
224 | cpu_if->vgic_misr = 0; | ||
225 | cpu_if->vgic_eisr = 0; | ||
226 | cpu_if->vgic_elrsr = 0xffff; | ||
227 | cpu_if->vgic_ap0r[0] = 0; | ||
228 | cpu_if->vgic_ap0r[1] = 0; | ||
229 | cpu_if->vgic_ap0r[2] = 0; | ||
230 | cpu_if->vgic_ap0r[3] = 0; | ||
231 | cpu_if->vgic_ap1r[0] = 0; | ||
232 | cpu_if->vgic_ap1r[1] = 0; | ||
233 | cpu_if->vgic_ap1r[2] = 0; | ||
234 | cpu_if->vgic_ap1r[3] = 0; | ||
118 | } | 235 | } |
119 | 236 | ||
120 | val = read_gicreg(ICC_SRE_EL2); | 237 | val = read_gicreg(ICC_SRE_EL2); |
@@ -128,6 +245,8 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) | |||
128 | struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; | 245 | struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; |
129 | u64 val; | 246 | u64 val; |
130 | u32 max_lr_idx, nr_pri_bits; | 247 | u32 max_lr_idx, nr_pri_bits; |
248 | u16 live_lrs = 0; | ||
249 | int i; | ||
131 | 250 | ||
132 | /* | 251 | /* |
133 | * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a | 252 | * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a |
@@ -140,66 +259,46 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) | |||
140 | write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1); | 259 | write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1); |
141 | isb(); | 260 | isb(); |
142 | 261 | ||
143 | write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); | ||
144 | write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); | ||
145 | |||
146 | val = read_gicreg(ICH_VTR_EL2); | 262 | val = read_gicreg(ICH_VTR_EL2); |
147 | max_lr_idx = vtr_to_max_lr_idx(val); | 263 | max_lr_idx = vtr_to_max_lr_idx(val); |
148 | nr_pri_bits = vtr_to_nr_pri_bits(val); | 264 | nr_pri_bits = vtr_to_nr_pri_bits(val); |
149 | 265 | ||
150 | switch (nr_pri_bits) { | 266 | for (i = 0; i <= max_lr_idx; i++) { |
151 | case 7: | 267 | if (cpu_if->vgic_lr[i] & ICH_LR_STATE) |
152 | write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2); | 268 | live_lrs |= (1 << i); |
153 | write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2); | ||
154 | case 6: | ||
155 | write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2); | ||
156 | default: | ||
157 | write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2); | ||
158 | } | 269 | } |
159 | 270 | ||
160 | switch (nr_pri_bits) { | 271 | write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); |
161 | case 7: | ||
162 | write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); | ||
163 | write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); | ||
164 | case 6: | ||
165 | write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2); | ||
166 | default: | ||
167 | write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); | ||
168 | } | ||
169 | 272 | ||
170 | switch (max_lr_idx) { | 273 | if (live_lrs) { |
171 | case 15: | 274 | write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); |
172 | write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2); | 275 | |
173 | case 14: | 276 | switch (nr_pri_bits) { |
174 | write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)], ICH_LR14_EL2); | 277 | case 7: |
175 | case 13: | 278 | write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2); |
176 | write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)], ICH_LR13_EL2); | 279 | write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2); |
177 | case 12: | 280 | case 6: |
178 | write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)], ICH_LR12_EL2); | 281 | write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2); |
179 | case 11: | 282 | default: |
180 | write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)], ICH_LR11_EL2); | 283 | write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2); |
181 | case 10: | 284 | } |
182 | write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)], ICH_LR10_EL2); | 285 | |
183 | case 9: | 286 | switch (nr_pri_bits) { |
184 | write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)], ICH_LR9_EL2); | 287 | case 7: |
185 | case 8: | 288 | write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); |
186 | write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)], ICH_LR8_EL2); | 289 | write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); |
187 | case 7: | 290 | case 6: |
188 | write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)], ICH_LR7_EL2); | 291 | write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2); |
189 | case 6: | 292 | default: |
190 | write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)], ICH_LR6_EL2); | 293 | write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); |
191 | case 5: | 294 | } |
192 | write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)], ICH_LR5_EL2); | 295 | |
193 | case 4: | 296 | for (i = 0; i <= max_lr_idx; i++) { |
194 | write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)], ICH_LR4_EL2); | 297 | if (!(live_lrs & (1 << i))) |
195 | case 3: | 298 | continue; |
196 | write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)], ICH_LR3_EL2); | 299 | |
197 | case 2: | 300 | __gic_v3_set_lr(cpu_if->vgic_lr[i], i); |
198 | write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)], ICH_LR2_EL2); | 301 | } |
199 | case 1: | ||
200 | write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)], ICH_LR1_EL2); | ||
201 | case 0: | ||
202 | write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)], ICH_LR0_EL2); | ||
203 | } | 302 | } |
204 | 303 | ||
205 | /* | 304 | /* |
@@ -209,6 +308,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) | |||
209 | */ | 308 | */ |
210 | isb(); | 309 | isb(); |
211 | dsb(sy); | 310 | dsb(sy); |
311 | vcpu->arch.vgic_cpu.live_lrs = live_lrs; | ||
212 | 312 | ||
213 | /* | 313 | /* |
214 | * Prevent the guest from touching the GIC system registers if | 314 | * Prevent the guest from touching the GIC system registers if |
@@ -220,6 +320,15 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) | |||
220 | } | 320 | } |
221 | } | 321 | } |
222 | 322 | ||
323 | void __hyp_text __vgic_v3_init_lrs(void) | ||
324 | { | ||
325 | int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2)); | ||
326 | int i; | ||
327 | |||
328 | for (i = 0; i <= max_lr_idx; i++) | ||
329 | __gic_v3_set_lr(0, i); | ||
330 | } | ||
331 | |||
223 | static u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void) | 332 | static u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void) |
224 | { | 333 | { |
225 | return read_gicreg(ICH_VTR_EL2); | 334 | return read_gicreg(ICH_VTR_EL2); |
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f34745cb3d23..9677bf069bcc 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c | |||
@@ -77,7 +77,11 @@ int kvm_arch_dev_ioctl_check_extension(long ext) | |||
77 | case KVM_CAP_GUEST_DEBUG_HW_WPS: | 77 | case KVM_CAP_GUEST_DEBUG_HW_WPS: |
78 | r = get_num_wrps(); | 78 | r = get_num_wrps(); |
79 | break; | 79 | break; |
80 | case KVM_CAP_ARM_PMU_V3: | ||
81 | r = kvm_arm_support_pmu_v3(); | ||
82 | break; | ||
80 | case KVM_CAP_SET_GUEST_DEBUG: | 83 | case KVM_CAP_SET_GUEST_DEBUG: |
84 | case KVM_CAP_VCPU_ATTRIBUTES: | ||
81 | r = 1; | 85 | r = 1; |
82 | break; | 86 | break; |
83 | default: | 87 | default: |
@@ -120,6 +124,9 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) | |||
120 | /* Reset system registers */ | 124 | /* Reset system registers */ |
121 | kvm_reset_sys_regs(vcpu); | 125 | kvm_reset_sys_regs(vcpu); |
122 | 126 | ||
127 | /* Reset PMU */ | ||
128 | kvm_pmu_vcpu_reset(vcpu); | ||
129 | |||
123 | /* Reset timer */ | 130 | /* Reset timer */ |
124 | return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); | 131 | return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); |
125 | } | 132 | } |
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 2e90371cfb37..61ba59104845 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c | |||
@@ -20,6 +20,7 @@ | |||
20 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | 20 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
21 | */ | 21 | */ |
22 | 22 | ||
23 | #include <linux/bsearch.h> | ||
23 | #include <linux/kvm_host.h> | 24 | #include <linux/kvm_host.h> |
24 | #include <linux/mm.h> | 25 | #include <linux/mm.h> |
25 | #include <linux/uaccess.h> | 26 | #include <linux/uaccess.h> |
@@ -34,6 +35,7 @@ | |||
34 | #include <asm/kvm_emulate.h> | 35 | #include <asm/kvm_emulate.h> |
35 | #include <asm/kvm_host.h> | 36 | #include <asm/kvm_host.h> |
36 | #include <asm/kvm_mmu.h> | 37 | #include <asm/kvm_mmu.h> |
38 | #include <asm/perf_event.h> | ||
37 | 39 | ||
38 | #include <trace/events/kvm.h> | 40 | #include <trace/events/kvm.h> |
39 | 41 | ||
@@ -439,6 +441,344 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) | |||
439 | vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr; | 441 | vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr; |
440 | } | 442 | } |
441 | 443 | ||
444 | static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) | ||
445 | { | ||
446 | u64 pmcr, val; | ||
447 | |||
448 | asm volatile("mrs %0, pmcr_el0\n" : "=r" (pmcr)); | ||
449 | /* Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) is reset to UNKNOWN | ||
450 | * except PMCR.E resetting to zero. | ||
451 | */ | ||
452 | val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) | ||
453 | | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); | ||
454 | vcpu_sys_reg(vcpu, PMCR_EL0) = val; | ||
455 | } | ||
456 | |||
457 | static bool pmu_access_el0_disabled(struct kvm_vcpu *vcpu) | ||
458 | { | ||
459 | u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0); | ||
460 | |||
461 | return !((reg & ARMV8_PMU_USERENR_EN) || vcpu_mode_priv(vcpu)); | ||
462 | } | ||
463 | |||
464 | static bool pmu_write_swinc_el0_disabled(struct kvm_vcpu *vcpu) | ||
465 | { | ||
466 | u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0); | ||
467 | |||
468 | return !((reg & (ARMV8_PMU_USERENR_SW | ARMV8_PMU_USERENR_EN)) | ||
469 | || vcpu_mode_priv(vcpu)); | ||
470 | } | ||
471 | |||
472 | static bool pmu_access_cycle_counter_el0_disabled(struct kvm_vcpu *vcpu) | ||
473 | { | ||
474 | u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0); | ||
475 | |||
476 | return !((reg & (ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_EN)) | ||
477 | || vcpu_mode_priv(vcpu)); | ||
478 | } | ||
479 | |||
480 | static bool pmu_access_event_counter_el0_disabled(struct kvm_vcpu *vcpu) | ||
481 | { | ||
482 | u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0); | ||
483 | |||
484 | return !((reg & (ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_EN)) | ||
485 | || vcpu_mode_priv(vcpu)); | ||
486 | } | ||
487 | |||
488 | static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
489 | const struct sys_reg_desc *r) | ||
490 | { | ||
491 | u64 val; | ||
492 | |||
493 | if (!kvm_arm_pmu_v3_ready(vcpu)) | ||
494 | return trap_raz_wi(vcpu, p, r); | ||
495 | |||
496 | if (pmu_access_el0_disabled(vcpu)) | ||
497 | return false; | ||
498 | |||
499 | if (p->is_write) { | ||
500 | /* Only update writeable bits of PMCR */ | ||
501 | val = vcpu_sys_reg(vcpu, PMCR_EL0); | ||
502 | val &= ~ARMV8_PMU_PMCR_MASK; | ||
503 | val |= p->regval & ARMV8_PMU_PMCR_MASK; | ||
504 | vcpu_sys_reg(vcpu, PMCR_EL0) = val; | ||
505 | kvm_pmu_handle_pmcr(vcpu, val); | ||
506 | } else { | ||
507 | /* PMCR.P & PMCR.C are RAZ */ | ||
508 | val = vcpu_sys_reg(vcpu, PMCR_EL0) | ||
509 | & ~(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C); | ||
510 | p->regval = val; | ||
511 | } | ||
512 | |||
513 | return true; | ||
514 | } | ||
515 | |||
516 | static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
517 | const struct sys_reg_desc *r) | ||
518 | { | ||
519 | if (!kvm_arm_pmu_v3_ready(vcpu)) | ||
520 | return trap_raz_wi(vcpu, p, r); | ||
521 | |||
522 | if (pmu_access_event_counter_el0_disabled(vcpu)) | ||
523 | return false; | ||
524 | |||
525 | if (p->is_write) | ||
526 | vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval; | ||
527 | else | ||
528 | /* return PMSELR.SEL field */ | ||
529 | p->regval = vcpu_sys_reg(vcpu, PMSELR_EL0) | ||
530 | & ARMV8_PMU_COUNTER_MASK; | ||
531 | |||
532 | return true; | ||
533 | } | ||
534 | |||
535 | static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
536 | const struct sys_reg_desc *r) | ||
537 | { | ||
538 | u64 pmceid; | ||
539 | |||
540 | if (!kvm_arm_pmu_v3_ready(vcpu)) | ||
541 | return trap_raz_wi(vcpu, p, r); | ||
542 | |||
543 | BUG_ON(p->is_write); | ||
544 | |||
545 | if (pmu_access_el0_disabled(vcpu)) | ||
546 | return false; | ||
547 | |||
548 | if (!(p->Op2 & 1)) | ||
549 | asm volatile("mrs %0, pmceid0_el0\n" : "=r" (pmceid)); | ||
550 | else | ||
551 | asm volatile("mrs %0, pmceid1_el0\n" : "=r" (pmceid)); | ||
552 | |||
553 | p->regval = pmceid; | ||
554 | |||
555 | return true; | ||
556 | } | ||
557 | |||
558 | static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx) | ||
559 | { | ||
560 | u64 pmcr, val; | ||
561 | |||
562 | pmcr = vcpu_sys_reg(vcpu, PMCR_EL0); | ||
563 | val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK; | ||
564 | if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) | ||
565 | return false; | ||
566 | |||
567 | return true; | ||
568 | } | ||
569 | |||
570 | static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, | ||
571 | struct sys_reg_params *p, | ||
572 | const struct sys_reg_desc *r) | ||
573 | { | ||
574 | u64 idx; | ||
575 | |||
576 | if (!kvm_arm_pmu_v3_ready(vcpu)) | ||
577 | return trap_raz_wi(vcpu, p, r); | ||
578 | |||
579 | if (r->CRn == 9 && r->CRm == 13) { | ||
580 | if (r->Op2 == 2) { | ||
581 | /* PMXEVCNTR_EL0 */ | ||
582 | if (pmu_access_event_counter_el0_disabled(vcpu)) | ||
583 | return false; | ||
584 | |||
585 | idx = vcpu_sys_reg(vcpu, PMSELR_EL0) | ||
586 | & ARMV8_PMU_COUNTER_MASK; | ||
587 | } else if (r->Op2 == 0) { | ||
588 | /* PMCCNTR_EL0 */ | ||
589 | if (pmu_access_cycle_counter_el0_disabled(vcpu)) | ||
590 | return false; | ||
591 | |||
592 | idx = ARMV8_PMU_CYCLE_IDX; | ||
593 | } else { | ||
594 | BUG(); | ||
595 | } | ||
596 | } else if (r->CRn == 14 && (r->CRm & 12) == 8) { | ||
597 | /* PMEVCNTRn_EL0 */ | ||
598 | if (pmu_access_event_counter_el0_disabled(vcpu)) | ||
599 | return false; | ||
600 | |||
601 | idx = ((r->CRm & 3) << 3) | (r->Op2 & 7); | ||
602 | } else { | ||
603 | BUG(); | ||
604 | } | ||
605 | |||
606 | if (!pmu_counter_idx_valid(vcpu, idx)) | ||
607 | return false; | ||
608 | |||
609 | if (p->is_write) { | ||
610 | if (pmu_access_el0_disabled(vcpu)) | ||
611 | return false; | ||
612 | |||
613 | kvm_pmu_set_counter_value(vcpu, idx, p->regval); | ||
614 | } else { | ||
615 | p->regval = kvm_pmu_get_counter_value(vcpu, idx); | ||
616 | } | ||
617 | |||
618 | return true; | ||
619 | } | ||
620 | |||
621 | static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
622 | const struct sys_reg_desc *r) | ||
623 | { | ||
624 | u64 idx, reg; | ||
625 | |||
626 | if (!kvm_arm_pmu_v3_ready(vcpu)) | ||
627 | return trap_raz_wi(vcpu, p, r); | ||
628 | |||
629 | if (pmu_access_el0_disabled(vcpu)) | ||
630 | return false; | ||
631 | |||
632 | if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 1) { | ||
633 | /* PMXEVTYPER_EL0 */ | ||
634 | idx = vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK; | ||
635 | reg = PMEVTYPER0_EL0 + idx; | ||
636 | } else if (r->CRn == 14 && (r->CRm & 12) == 12) { | ||
637 | idx = ((r->CRm & 3) << 3) | (r->Op2 & 7); | ||
638 | if (idx == ARMV8_PMU_CYCLE_IDX) | ||
639 | reg = PMCCFILTR_EL0; | ||
640 | else | ||
641 | /* PMEVTYPERn_EL0 */ | ||
642 | reg = PMEVTYPER0_EL0 + idx; | ||
643 | } else { | ||
644 | BUG(); | ||
645 | } | ||
646 | |||
647 | if (!pmu_counter_idx_valid(vcpu, idx)) | ||
648 | return false; | ||
649 | |||
650 | if (p->is_write) { | ||
651 | kvm_pmu_set_counter_event_type(vcpu, p->regval, idx); | ||
652 | vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK; | ||
653 | } else { | ||
654 | p->regval = vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK; | ||
655 | } | ||
656 | |||
657 | return true; | ||
658 | } | ||
659 | |||
660 | static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
661 | const struct sys_reg_desc *r) | ||
662 | { | ||
663 | u64 val, mask; | ||
664 | |||
665 | if (!kvm_arm_pmu_v3_ready(vcpu)) | ||
666 | return trap_raz_wi(vcpu, p, r); | ||
667 | |||
668 | if (pmu_access_el0_disabled(vcpu)) | ||
669 | return false; | ||
670 | |||
671 | mask = kvm_pmu_valid_counter_mask(vcpu); | ||
672 | if (p->is_write) { | ||
673 | val = p->regval & mask; | ||
674 | if (r->Op2 & 0x1) { | ||
675 | /* accessing PMCNTENSET_EL0 */ | ||
676 | vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val; | ||
677 | kvm_pmu_enable_counter(vcpu, val); | ||
678 | } else { | ||
679 | /* accessing PMCNTENCLR_EL0 */ | ||
680 | vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val; | ||
681 | kvm_pmu_disable_counter(vcpu, val); | ||
682 | } | ||
683 | } else { | ||
684 | p->regval = vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask; | ||
685 | } | ||
686 | |||
687 | return true; | ||
688 | } | ||
689 | |||
690 | static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
691 | const struct sys_reg_desc *r) | ||
692 | { | ||
693 | u64 mask = kvm_pmu_valid_counter_mask(vcpu); | ||
694 | |||
695 | if (!kvm_arm_pmu_v3_ready(vcpu)) | ||
696 | return trap_raz_wi(vcpu, p, r); | ||
697 | |||
698 | if (!vcpu_mode_priv(vcpu)) | ||
699 | return false; | ||
700 | |||
701 | if (p->is_write) { | ||
702 | u64 val = p->regval & mask; | ||
703 | |||
704 | if (r->Op2 & 0x1) | ||
705 | /* accessing PMINTENSET_EL1 */ | ||
706 | vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val; | ||
707 | else | ||
708 | /* accessing PMINTENCLR_EL1 */ | ||
709 | vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val; | ||
710 | } else { | ||
711 | p->regval = vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask; | ||
712 | } | ||
713 | |||
714 | return true; | ||
715 | } | ||
716 | |||
717 | static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
718 | const struct sys_reg_desc *r) | ||
719 | { | ||
720 | u64 mask = kvm_pmu_valid_counter_mask(vcpu); | ||
721 | |||
722 | if (!kvm_arm_pmu_v3_ready(vcpu)) | ||
723 | return trap_raz_wi(vcpu, p, r); | ||
724 | |||
725 | if (pmu_access_el0_disabled(vcpu)) | ||
726 | return false; | ||
727 | |||
728 | if (p->is_write) { | ||
729 | if (r->CRm & 0x2) | ||
730 | /* accessing PMOVSSET_EL0 */ | ||
731 | kvm_pmu_overflow_set(vcpu, p->regval & mask); | ||
732 | else | ||
733 | /* accessing PMOVSCLR_EL0 */ | ||
734 | vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask); | ||
735 | } else { | ||
736 | p->regval = vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask; | ||
737 | } | ||
738 | |||
739 | return true; | ||
740 | } | ||
741 | |||
742 | static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
743 | const struct sys_reg_desc *r) | ||
744 | { | ||
745 | u64 mask; | ||
746 | |||
747 | if (!kvm_arm_pmu_v3_ready(vcpu)) | ||
748 | return trap_raz_wi(vcpu, p, r); | ||
749 | |||
750 | if (pmu_write_swinc_el0_disabled(vcpu)) | ||
751 | return false; | ||
752 | |||
753 | if (p->is_write) { | ||
754 | mask = kvm_pmu_valid_counter_mask(vcpu); | ||
755 | kvm_pmu_software_increment(vcpu, p->regval & mask); | ||
756 | return true; | ||
757 | } | ||
758 | |||
759 | return false; | ||
760 | } | ||
761 | |||
762 | static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
763 | const struct sys_reg_desc *r) | ||
764 | { | ||
765 | if (!kvm_arm_pmu_v3_ready(vcpu)) | ||
766 | return trap_raz_wi(vcpu, p, r); | ||
767 | |||
768 | if (p->is_write) { | ||
769 | if (!vcpu_mode_priv(vcpu)) | ||
770 | return false; | ||
771 | |||
772 | vcpu_sys_reg(vcpu, PMUSERENR_EL0) = p->regval | ||
773 | & ARMV8_PMU_USERENR_MASK; | ||
774 | } else { | ||
775 | p->regval = vcpu_sys_reg(vcpu, PMUSERENR_EL0) | ||
776 | & ARMV8_PMU_USERENR_MASK; | ||
777 | } | ||
778 | |||
779 | return true; | ||
780 | } | ||
781 | |||
442 | /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ | 782 | /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ |
443 | #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ | 783 | #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ |
444 | /* DBGBVRn_EL1 */ \ | 784 | /* DBGBVRn_EL1 */ \ |
@@ -454,6 +794,20 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) | |||
454 | { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \ | 794 | { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \ |
455 | trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr } | 795 | trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr } |
456 | 796 | ||
797 | /* Macro to expand the PMEVCNTRn_EL0 register */ | ||
798 | #define PMU_PMEVCNTR_EL0(n) \ | ||
799 | /* PMEVCNTRn_EL0 */ \ | ||
800 | { Op0(0b11), Op1(0b011), CRn(0b1110), \ | ||
801 | CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \ | ||
802 | access_pmu_evcntr, reset_unknown, (PMEVCNTR0_EL0 + n), } | ||
803 | |||
804 | /* Macro to expand the PMEVTYPERn_EL0 register */ | ||
805 | #define PMU_PMEVTYPER_EL0(n) \ | ||
806 | /* PMEVTYPERn_EL0 */ \ | ||
807 | { Op0(0b11), Op1(0b011), CRn(0b1110), \ | ||
808 | CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \ | ||
809 | access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } | ||
810 | |||
457 | /* | 811 | /* |
458 | * Architected system registers. | 812 | * Architected system registers. |
459 | * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 | 813 | * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 |
@@ -583,10 +937,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { | |||
583 | 937 | ||
584 | /* PMINTENSET_EL1 */ | 938 | /* PMINTENSET_EL1 */ |
585 | { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001), | 939 | { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001), |
586 | trap_raz_wi }, | 940 | access_pminten, reset_unknown, PMINTENSET_EL1 }, |
587 | /* PMINTENCLR_EL1 */ | 941 | /* PMINTENCLR_EL1 */ |
588 | { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010), | 942 | { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010), |
589 | trap_raz_wi }, | 943 | access_pminten, NULL, PMINTENSET_EL1 }, |
590 | 944 | ||
591 | /* MAIR_EL1 */ | 945 | /* MAIR_EL1 */ |
592 | { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000), | 946 | { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000), |
@@ -623,43 +977,46 @@ static const struct sys_reg_desc sys_reg_descs[] = { | |||
623 | 977 | ||
624 | /* PMCR_EL0 */ | 978 | /* PMCR_EL0 */ |
625 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000), | 979 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000), |
626 | trap_raz_wi }, | 980 | access_pmcr, reset_pmcr, }, |
627 | /* PMCNTENSET_EL0 */ | 981 | /* PMCNTENSET_EL0 */ |
628 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001), | 982 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001), |
629 | trap_raz_wi }, | 983 | access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, |
630 | /* PMCNTENCLR_EL0 */ | 984 | /* PMCNTENCLR_EL0 */ |
631 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010), | 985 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010), |
632 | trap_raz_wi }, | 986 | access_pmcnten, NULL, PMCNTENSET_EL0 }, |
633 | /* PMOVSCLR_EL0 */ | 987 | /* PMOVSCLR_EL0 */ |
634 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011), | 988 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011), |
635 | trap_raz_wi }, | 989 | access_pmovs, NULL, PMOVSSET_EL0 }, |
636 | /* PMSWINC_EL0 */ | 990 | /* PMSWINC_EL0 */ |
637 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100), | 991 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100), |
638 | trap_raz_wi }, | 992 | access_pmswinc, reset_unknown, PMSWINC_EL0 }, |
639 | /* PMSELR_EL0 */ | 993 | /* PMSELR_EL0 */ |
640 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101), | 994 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101), |
641 | trap_raz_wi }, | 995 | access_pmselr, reset_unknown, PMSELR_EL0 }, |
642 | /* PMCEID0_EL0 */ | 996 | /* PMCEID0_EL0 */ |
643 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110), | 997 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110), |
644 | trap_raz_wi }, | 998 | access_pmceid }, |
645 | /* PMCEID1_EL0 */ | 999 | /* PMCEID1_EL0 */ |
646 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111), | 1000 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111), |
647 | trap_raz_wi }, | 1001 | access_pmceid }, |
648 | /* PMCCNTR_EL0 */ | 1002 | /* PMCCNTR_EL0 */ |
649 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000), | 1003 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000), |
650 | trap_raz_wi }, | 1004 | access_pmu_evcntr, reset_unknown, PMCCNTR_EL0 }, |
651 | /* PMXEVTYPER_EL0 */ | 1005 | /* PMXEVTYPER_EL0 */ |
652 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001), | 1006 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001), |
653 | trap_raz_wi }, | 1007 | access_pmu_evtyper }, |
654 | /* PMXEVCNTR_EL0 */ | 1008 | /* PMXEVCNTR_EL0 */ |
655 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010), | 1009 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010), |
656 | trap_raz_wi }, | 1010 | access_pmu_evcntr }, |
657 | /* PMUSERENR_EL0 */ | 1011 | /* PMUSERENR_EL0 |
1012 | * This register resets as unknown in 64bit mode while it resets as zero | ||
1013 | * in 32bit mode. Here we choose to reset it as zero for consistency. | ||
1014 | */ | ||
658 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000), | 1015 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000), |
659 | trap_raz_wi }, | 1016 | access_pmuserenr, reset_val, PMUSERENR_EL0, 0 }, |
660 | /* PMOVSSET_EL0 */ | 1017 | /* PMOVSSET_EL0 */ |
661 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011), | 1018 | { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011), |
662 | trap_raz_wi }, | 1019 | access_pmovs, reset_unknown, PMOVSSET_EL0 }, |
663 | 1020 | ||
664 | /* TPIDR_EL0 */ | 1021 | /* TPIDR_EL0 */ |
665 | { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010), | 1022 | { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010), |
@@ -668,6 +1025,77 @@ static const struct sys_reg_desc sys_reg_descs[] = { | |||
668 | { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011), | 1025 | { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011), |
669 | NULL, reset_unknown, TPIDRRO_EL0 }, | 1026 | NULL, reset_unknown, TPIDRRO_EL0 }, |
670 | 1027 | ||
1028 | /* PMEVCNTRn_EL0 */ | ||
1029 | PMU_PMEVCNTR_EL0(0), | ||
1030 | PMU_PMEVCNTR_EL0(1), | ||
1031 | PMU_PMEVCNTR_EL0(2), | ||
1032 | PMU_PMEVCNTR_EL0(3), | ||
1033 | PMU_PMEVCNTR_EL0(4), | ||
1034 | PMU_PMEVCNTR_EL0(5), | ||
1035 | PMU_PMEVCNTR_EL0(6), | ||
1036 | PMU_PMEVCNTR_EL0(7), | ||
1037 | PMU_PMEVCNTR_EL0(8), | ||
1038 | PMU_PMEVCNTR_EL0(9), | ||
1039 | PMU_PMEVCNTR_EL0(10), | ||
1040 | PMU_PMEVCNTR_EL0(11), | ||
1041 | PMU_PMEVCNTR_EL0(12), | ||
1042 | PMU_PMEVCNTR_EL0(13), | ||
1043 | PMU_PMEVCNTR_EL0(14), | ||
1044 | PMU_PMEVCNTR_EL0(15), | ||
1045 | PMU_PMEVCNTR_EL0(16), | ||
1046 | PMU_PMEVCNTR_EL0(17), | ||
1047 | PMU_PMEVCNTR_EL0(18), | ||
1048 | PMU_PMEVCNTR_EL0(19), | ||
1049 | PMU_PMEVCNTR_EL0(20), | ||
1050 | PMU_PMEVCNTR_EL0(21), | ||
1051 | PMU_PMEVCNTR_EL0(22), | ||
1052 | PMU_PMEVCNTR_EL0(23), | ||
1053 | PMU_PMEVCNTR_EL0(24), | ||
1054 | PMU_PMEVCNTR_EL0(25), | ||
1055 | PMU_PMEVCNTR_EL0(26), | ||
1056 | PMU_PMEVCNTR_EL0(27), | ||
1057 | PMU_PMEVCNTR_EL0(28), | ||
1058 | PMU_PMEVCNTR_EL0(29), | ||
1059 | PMU_PMEVCNTR_EL0(30), | ||
1060 | /* PMEVTYPERn_EL0 */ | ||
1061 | PMU_PMEVTYPER_EL0(0), | ||
1062 | PMU_PMEVTYPER_EL0(1), | ||
1063 | PMU_PMEVTYPER_EL0(2), | ||
1064 | PMU_PMEVTYPER_EL0(3), | ||
1065 | PMU_PMEVTYPER_EL0(4), | ||
1066 | PMU_PMEVTYPER_EL0(5), | ||
1067 | PMU_PMEVTYPER_EL0(6), | ||
1068 | PMU_PMEVTYPER_EL0(7), | ||
1069 | PMU_PMEVTYPER_EL0(8), | ||
1070 | PMU_PMEVTYPER_EL0(9), | ||
1071 | PMU_PMEVTYPER_EL0(10), | ||
1072 | PMU_PMEVTYPER_EL0(11), | ||
1073 | PMU_PMEVTYPER_EL0(12), | ||
1074 | PMU_PMEVTYPER_EL0(13), | ||
1075 | PMU_PMEVTYPER_EL0(14), | ||
1076 | PMU_PMEVTYPER_EL0(15), | ||
1077 | PMU_PMEVTYPER_EL0(16), | ||
1078 | PMU_PMEVTYPER_EL0(17), | ||
1079 | PMU_PMEVTYPER_EL0(18), | ||
1080 | PMU_PMEVTYPER_EL0(19), | ||
1081 | PMU_PMEVTYPER_EL0(20), | ||
1082 | PMU_PMEVTYPER_EL0(21), | ||
1083 | PMU_PMEVTYPER_EL0(22), | ||
1084 | PMU_PMEVTYPER_EL0(23), | ||
1085 | PMU_PMEVTYPER_EL0(24), | ||
1086 | PMU_PMEVTYPER_EL0(25), | ||
1087 | PMU_PMEVTYPER_EL0(26), | ||
1088 | PMU_PMEVTYPER_EL0(27), | ||
1089 | PMU_PMEVTYPER_EL0(28), | ||
1090 | PMU_PMEVTYPER_EL0(29), | ||
1091 | PMU_PMEVTYPER_EL0(30), | ||
1092 | /* PMCCFILTR_EL0 | ||
1093 | * This register resets as unknown in 64bit mode while it resets as zero | ||
1094 | * in 32bit mode. Here we choose to reset it as zero for consistency. | ||
1095 | */ | ||
1096 | { Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b1111), Op2(0b111), | ||
1097 | access_pmu_evtyper, reset_val, PMCCFILTR_EL0, 0 }, | ||
1098 | |||
671 | /* DACR32_EL2 */ | 1099 | /* DACR32_EL2 */ |
672 | { Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000), | 1100 | { Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000), |
673 | NULL, reset_unknown, DACR32_EL2 }, | 1101 | NULL, reset_unknown, DACR32_EL2 }, |
@@ -857,6 +1285,20 @@ static const struct sys_reg_desc cp14_64_regs[] = { | |||
857 | { Op1( 0), CRm( 2), .access = trap_raz_wi }, | 1285 | { Op1( 0), CRm( 2), .access = trap_raz_wi }, |
858 | }; | 1286 | }; |
859 | 1287 | ||
1288 | /* Macro to expand the PMEVCNTRn register */ | ||
1289 | #define PMU_PMEVCNTR(n) \ | ||
1290 | /* PMEVCNTRn */ \ | ||
1291 | { Op1(0), CRn(0b1110), \ | ||
1292 | CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \ | ||
1293 | access_pmu_evcntr } | ||
1294 | |||
1295 | /* Macro to expand the PMEVTYPERn register */ | ||
1296 | #define PMU_PMEVTYPER(n) \ | ||
1297 | /* PMEVTYPERn */ \ | ||
1298 | { Op1(0), CRn(0b1110), \ | ||
1299 | CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \ | ||
1300 | access_pmu_evtyper } | ||
1301 | |||
860 | /* | 1302 | /* |
861 | * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding, | 1303 | * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding, |
862 | * depending on the way they are accessed (as a 32bit or a 64bit | 1304 | * depending on the way they are accessed (as a 32bit or a 64bit |
@@ -885,19 +1327,21 @@ static const struct sys_reg_desc cp15_regs[] = { | |||
885 | { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw }, | 1327 | { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw }, |
886 | 1328 | ||
887 | /* PMU */ | 1329 | /* PMU */ |
888 | { Op1( 0), CRn( 9), CRm(12), Op2( 0), trap_raz_wi }, | 1330 | { Op1( 0), CRn( 9), CRm(12), Op2( 0), access_pmcr }, |
889 | { Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi }, | 1331 | { Op1( 0), CRn( 9), CRm(12), Op2( 1), access_pmcnten }, |
890 | { Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi }, | 1332 | { Op1( 0), CRn( 9), CRm(12), Op2( 2), access_pmcnten }, |
891 | { Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi }, | 1333 | { Op1( 0), CRn( 9), CRm(12), Op2( 3), access_pmovs }, |
892 | { Op1( 0), CRn( 9), CRm(12), Op2( 5), trap_raz_wi }, | 1334 | { Op1( 0), CRn( 9), CRm(12), Op2( 4), access_pmswinc }, |
893 | { Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi }, | 1335 | { Op1( 0), CRn( 9), CRm(12), Op2( 5), access_pmselr }, |
894 | { Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi }, | 1336 | { Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmceid }, |
895 | { Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi }, | 1337 | { Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmceid }, |
896 | { Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi }, | 1338 | { Op1( 0), CRn( 9), CRm(13), Op2( 0), access_pmu_evcntr }, |
897 | { Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi }, | 1339 | { Op1( 0), CRn( 9), CRm(13), Op2( 1), access_pmu_evtyper }, |
898 | { Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi }, | 1340 | { Op1( 0), CRn( 9), CRm(13), Op2( 2), access_pmu_evcntr }, |
899 | { Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi }, | 1341 | { Op1( 0), CRn( 9), CRm(14), Op2( 0), access_pmuserenr }, |
900 | { Op1( 0), CRn( 9), CRm(14), Op2( 2), trap_raz_wi }, | 1342 | { Op1( 0), CRn( 9), CRm(14), Op2( 1), access_pminten }, |
1343 | { Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pminten }, | ||
1344 | { Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmovs }, | ||
901 | 1345 | ||
902 | { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR }, | 1346 | { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR }, |
903 | { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR }, | 1347 | { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR }, |
@@ -908,10 +1352,78 @@ static const struct sys_reg_desc cp15_regs[] = { | |||
908 | { Op1( 0), CRn(12), CRm(12), Op2( 5), trap_raz_wi }, | 1352 | { Op1( 0), CRn(12), CRm(12), Op2( 5), trap_raz_wi }, |
909 | 1353 | ||
910 | { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, | 1354 | { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, |
1355 | |||
1356 | /* PMEVCNTRn */ | ||
1357 | PMU_PMEVCNTR(0), | ||
1358 | PMU_PMEVCNTR(1), | ||
1359 | PMU_PMEVCNTR(2), | ||
1360 | PMU_PMEVCNTR(3), | ||
1361 | PMU_PMEVCNTR(4), | ||
1362 | PMU_PMEVCNTR(5), | ||
1363 | PMU_PMEVCNTR(6), | ||
1364 | PMU_PMEVCNTR(7), | ||
1365 | PMU_PMEVCNTR(8), | ||
1366 | PMU_PMEVCNTR(9), | ||
1367 | PMU_PMEVCNTR(10), | ||
1368 | PMU_PMEVCNTR(11), | ||
1369 | PMU_PMEVCNTR(12), | ||
1370 | PMU_PMEVCNTR(13), | ||
1371 | PMU_PMEVCNTR(14), | ||
1372 | PMU_PMEVCNTR(15), | ||
1373 | PMU_PMEVCNTR(16), | ||
1374 | PMU_PMEVCNTR(17), | ||
1375 | PMU_PMEVCNTR(18), | ||
1376 | PMU_PMEVCNTR(19), | ||
1377 | PMU_PMEVCNTR(20), | ||
1378 | PMU_PMEVCNTR(21), | ||
1379 | PMU_PMEVCNTR(22), | ||
1380 | PMU_PMEVCNTR(23), | ||
1381 | PMU_PMEVCNTR(24), | ||
1382 | PMU_PMEVCNTR(25), | ||
1383 | PMU_PMEVCNTR(26), | ||
1384 | PMU_PMEVCNTR(27), | ||
1385 | PMU_PMEVCNTR(28), | ||
1386 | PMU_PMEVCNTR(29), | ||
1387 | PMU_PMEVCNTR(30), | ||
1388 | /* PMEVTYPERn */ | ||
1389 | PMU_PMEVTYPER(0), | ||
1390 | PMU_PMEVTYPER(1), | ||
1391 | PMU_PMEVTYPER(2), | ||
1392 | PMU_PMEVTYPER(3), | ||
1393 | PMU_PMEVTYPER(4), | ||
1394 | PMU_PMEVTYPER(5), | ||
1395 | PMU_PMEVTYPER(6), | ||
1396 | PMU_PMEVTYPER(7), | ||
1397 | PMU_PMEVTYPER(8), | ||
1398 | PMU_PMEVTYPER(9), | ||
1399 | PMU_PMEVTYPER(10), | ||
1400 | PMU_PMEVTYPER(11), | ||
1401 | PMU_PMEVTYPER(12), | ||
1402 | PMU_PMEVTYPER(13), | ||
1403 | PMU_PMEVTYPER(14), | ||
1404 | PMU_PMEVTYPER(15), | ||
1405 | PMU_PMEVTYPER(16), | ||
1406 | PMU_PMEVTYPER(17), | ||
1407 | PMU_PMEVTYPER(18), | ||
1408 | PMU_PMEVTYPER(19), | ||
1409 | PMU_PMEVTYPER(20), | ||
1410 | PMU_PMEVTYPER(21), | ||
1411 | PMU_PMEVTYPER(22), | ||
1412 | PMU_PMEVTYPER(23), | ||
1413 | PMU_PMEVTYPER(24), | ||
1414 | PMU_PMEVTYPER(25), | ||
1415 | PMU_PMEVTYPER(26), | ||
1416 | PMU_PMEVTYPER(27), | ||
1417 | PMU_PMEVTYPER(28), | ||
1418 | PMU_PMEVTYPER(29), | ||
1419 | PMU_PMEVTYPER(30), | ||
1420 | /* PMCCFILTR */ | ||
1421 | { Op1(0), CRn(14), CRm(15), Op2(7), access_pmu_evtyper }, | ||
911 | }; | 1422 | }; |
912 | 1423 | ||
913 | static const struct sys_reg_desc cp15_64_regs[] = { | 1424 | static const struct sys_reg_desc cp15_64_regs[] = { |
914 | { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, | 1425 | { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, |
1426 | { Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr }, | ||
915 | { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, | 1427 | { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, |
916 | { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, | 1428 | { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, |
917 | }; | 1429 | }; |
@@ -942,29 +1454,32 @@ static const struct sys_reg_desc *get_target_table(unsigned target, | |||
942 | } | 1454 | } |
943 | } | 1455 | } |
944 | 1456 | ||
1457 | #define reg_to_match_value(x) \ | ||
1458 | ({ \ | ||
1459 | unsigned long val; \ | ||
1460 | val = (x)->Op0 << 14; \ | ||
1461 | val |= (x)->Op1 << 11; \ | ||
1462 | val |= (x)->CRn << 7; \ | ||
1463 | val |= (x)->CRm << 3; \ | ||
1464 | val |= (x)->Op2; \ | ||
1465 | val; \ | ||
1466 | }) | ||
1467 | |||
1468 | static int match_sys_reg(const void *key, const void *elt) | ||
1469 | { | ||
1470 | const unsigned long pval = (unsigned long)key; | ||
1471 | const struct sys_reg_desc *r = elt; | ||
1472 | |||
1473 | return pval - reg_to_match_value(r); | ||
1474 | } | ||
1475 | |||
945 | static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params, | 1476 | static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params, |
946 | const struct sys_reg_desc table[], | 1477 | const struct sys_reg_desc table[], |
947 | unsigned int num) | 1478 | unsigned int num) |
948 | { | 1479 | { |
949 | unsigned int i; | 1480 | unsigned long pval = reg_to_match_value(params); |
950 | |||
951 | for (i = 0; i < num; i++) { | ||
952 | const struct sys_reg_desc *r = &table[i]; | ||
953 | |||
954 | if (params->Op0 != r->Op0) | ||
955 | continue; | ||
956 | if (params->Op1 != r->Op1) | ||
957 | continue; | ||
958 | if (params->CRn != r->CRn) | ||
959 | continue; | ||
960 | if (params->CRm != r->CRm) | ||
961 | continue; | ||
962 | if (params->Op2 != r->Op2) | ||
963 | continue; | ||
964 | 1481 | ||
965 | return r; | 1482 | return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg); |
966 | } | ||
967 | return NULL; | ||
968 | } | 1483 | } |
969 | 1484 | ||
970 | int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run) | 1485 | int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run) |
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 2aa79c864e91..7529aab068f5 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h | |||
@@ -33,8 +33,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) | |||
33 | } | 33 | } |
34 | #endif | 34 | #endif |
35 | 35 | ||
36 | #define SPAPR_TCE_SHIFT 12 | ||
37 | |||
38 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | 36 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
39 | #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ | 37 | #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ |
40 | #endif | 38 | #endif |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index c98afa538b3a..d7b343170453 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -182,7 +182,10 @@ struct kvmppc_spapr_tce_table { | |||
182 | struct list_head list; | 182 | struct list_head list; |
183 | struct kvm *kvm; | 183 | struct kvm *kvm; |
184 | u64 liobn; | 184 | u64 liobn; |
185 | u32 window_size; | 185 | struct rcu_head rcu; |
186 | u32 page_shift; | ||
187 | u64 offset; /* in pages */ | ||
188 | u64 size; /* window size in pages */ | ||
186 | struct page *pages[0]; | 189 | struct page *pages[0]; |
187 | }; | 190 | }; |
188 | 191 | ||
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 2241d5357129..2544edabe7f3 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
@@ -165,9 +165,25 @@ extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, | |||
165 | extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); | 165 | extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); |
166 | 166 | ||
167 | extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | 167 | extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, |
168 | struct kvm_create_spapr_tce *args); | 168 | struct kvm_create_spapr_tce_64 *args); |
169 | extern struct kvmppc_spapr_tce_table *kvmppc_find_table( | ||
170 | struct kvm_vcpu *vcpu, unsigned long liobn); | ||
171 | extern long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt, | ||
172 | unsigned long ioba, unsigned long npages); | ||
173 | extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt, | ||
174 | unsigned long tce); | ||
175 | extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, | ||
176 | unsigned long *ua, unsigned long **prmap); | ||
177 | extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt, | ||
178 | unsigned long idx, unsigned long tce); | ||
169 | extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | 179 | extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, |
170 | unsigned long ioba, unsigned long tce); | 180 | unsigned long ioba, unsigned long tce); |
181 | extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, | ||
182 | unsigned long liobn, unsigned long ioba, | ||
183 | unsigned long tce_list, unsigned long npages); | ||
184 | extern long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, | ||
185 | unsigned long liobn, unsigned long ioba, | ||
186 | unsigned long tce_value, unsigned long npages); | ||
171 | extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | 187 | extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, |
172 | unsigned long ioba); | 188 | unsigned long ioba); |
173 | extern struct page *kvm_alloc_hpt(unsigned long nr_pages); | 189 | extern struct page *kvm_alloc_hpt(unsigned long nr_pages); |
@@ -437,6 +453,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) | |||
437 | { | 453 | { |
438 | return vcpu->arch.irq_type == KVMPPC_IRQ_XICS; | 454 | return vcpu->arch.irq_type == KVMPPC_IRQ_XICS; |
439 | } | 455 | } |
456 | extern void kvmppc_alloc_host_rm_ops(void); | ||
457 | extern void kvmppc_free_host_rm_ops(void); | ||
440 | extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); | 458 | extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); |
441 | extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server); | 459 | extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server); |
442 | extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args); | 460 | extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args); |
@@ -445,7 +463,11 @@ extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu); | |||
445 | extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval); | 463 | extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval); |
446 | extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev, | 464 | extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev, |
447 | struct kvm_vcpu *vcpu, u32 cpu); | 465 | struct kvm_vcpu *vcpu, u32 cpu); |
466 | extern void kvmppc_xics_ipi_action(void); | ||
467 | extern int h_ipi_redirect; | ||
448 | #else | 468 | #else |
469 | static inline void kvmppc_alloc_host_rm_ops(void) {}; | ||
470 | static inline void kvmppc_free_host_rm_ops(void) {}; | ||
449 | static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) | 471 | static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) |
450 | { return 0; } | 472 | { return 0; } |
451 | static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { } | 473 | static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { } |
@@ -459,6 +481,33 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) | |||
459 | { return 0; } | 481 | { return 0; } |
460 | #endif | 482 | #endif |
461 | 483 | ||
484 | /* | ||
485 | * Host-side operations we want to set up while running in real | ||
486 | * mode in the guest operating on the xics. | ||
487 | * Currently only VCPU wakeup is supported. | ||
488 | */ | ||
489 | |||
490 | union kvmppc_rm_state { | ||
491 | unsigned long raw; | ||
492 | struct { | ||
493 | u32 in_host; | ||
494 | u32 rm_action; | ||
495 | }; | ||
496 | }; | ||
497 | |||
498 | struct kvmppc_host_rm_core { | ||
499 | union kvmppc_rm_state rm_state; | ||
500 | void *rm_data; | ||
501 | char pad[112]; | ||
502 | }; | ||
503 | |||
504 | struct kvmppc_host_rm_ops { | ||
505 | struct kvmppc_host_rm_core *rm_core; | ||
506 | void (*vcpu_kick)(struct kvm_vcpu *vcpu); | ||
507 | }; | ||
508 | |||
509 | extern struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv; | ||
510 | |||
462 | static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu) | 511 | static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu) |
463 | { | 512 | { |
464 | #ifdef CONFIG_KVM_BOOKE_HV | 513 | #ifdef CONFIG_KVM_BOOKE_HV |
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index ac9fb114e25d..47897a30982d 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h | |||
@@ -78,6 +78,9 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, | |||
78 | } | 78 | } |
79 | return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift); | 79 | return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift); |
80 | } | 80 | } |
81 | |||
82 | unsigned long vmalloc_to_phys(void *vmalloc_addr); | ||
83 | |||
81 | #endif /* __ASSEMBLY__ */ | 84 | #endif /* __ASSEMBLY__ */ |
82 | 85 | ||
83 | #endif /* _ASM_POWERPC_PGTABLE_H */ | 86 | #endif /* _ASM_POWERPC_PGTABLE_H */ |
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 825663c30945..78083ed20792 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h | |||
@@ -114,6 +114,9 @@ extern int cpu_to_core_id(int cpu); | |||
114 | #define PPC_MSG_TICK_BROADCAST 2 | 114 | #define PPC_MSG_TICK_BROADCAST 2 |
115 | #define PPC_MSG_DEBUGGER_BREAK 3 | 115 | #define PPC_MSG_DEBUGGER_BREAK 3 |
116 | 116 | ||
117 | /* This is only used by the powernv kernel */ | ||
118 | #define PPC_MSG_RM_HOST_ACTION 4 | ||
119 | |||
117 | /* for irq controllers that have dedicated ipis per message (4) */ | 120 | /* for irq controllers that have dedicated ipis per message (4) */ |
118 | extern int smp_request_message_ipi(int virq, int message); | 121 | extern int smp_request_message_ipi(int virq, int message); |
119 | extern const char *smp_ipi_name[]; | 122 | extern const char *smp_ipi_name[]; |
@@ -121,6 +124,7 @@ extern const char *smp_ipi_name[]; | |||
121 | /* for irq controllers with only a single ipi */ | 124 | /* for irq controllers with only a single ipi */ |
122 | extern void smp_muxed_ipi_set_data(int cpu, unsigned long data); | 125 | extern void smp_muxed_ipi_set_data(int cpu, unsigned long data); |
123 | extern void smp_muxed_ipi_message_pass(int cpu, int msg); | 126 | extern void smp_muxed_ipi_message_pass(int cpu, int msg); |
127 | extern void smp_muxed_ipi_set_message(int cpu, int msg); | ||
124 | extern irqreturn_t smp_ipi_demux(void); | 128 | extern irqreturn_t smp_ipi_demux(void); |
125 | 129 | ||
126 | void smp_init_pSeries(void); | 130 | void smp_init_pSeries(void); |
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 0e25bdb190bb..254604856e69 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h | |||
@@ -30,6 +30,7 @@ | |||
30 | #ifdef CONFIG_PPC_ICP_NATIVE | 30 | #ifdef CONFIG_PPC_ICP_NATIVE |
31 | extern int icp_native_init(void); | 31 | extern int icp_native_init(void); |
32 | extern void icp_native_flush_interrupt(void); | 32 | extern void icp_native_flush_interrupt(void); |
33 | extern void icp_native_cause_ipi_rm(int cpu); | ||
33 | #else | 34 | #else |
34 | static inline int icp_native_init(void) { return -ENODEV; } | 35 | static inline int icp_native_init(void) { return -ENODEV; } |
35 | #endif | 36 | #endif |
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index ab4d4732c492..c93cf35ce379 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h | |||
@@ -333,6 +333,15 @@ struct kvm_create_spapr_tce { | |||
333 | __u32 window_size; | 333 | __u32 window_size; |
334 | }; | 334 | }; |
335 | 335 | ||
336 | /* for KVM_CAP_SPAPR_TCE_64 */ | ||
337 | struct kvm_create_spapr_tce_64 { | ||
338 | __u64 liobn; | ||
339 | __u32 page_shift; | ||
340 | __u32 flags; | ||
341 | __u64 offset; /* in pages */ | ||
342 | __u64 size; /* in pages */ | ||
343 | }; | ||
344 | |||
336 | /* for KVM_ALLOCATE_RMA */ | 345 | /* for KVM_ALLOCATE_RMA */ |
337 | struct kvm_allocate_rma { | 346 | struct kvm_allocate_rma { |
338 | __u64 rma_size; | 347 | __u64 rma_size; |
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index cc13d4c83291..b7dea05f0725 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c | |||
@@ -206,7 +206,7 @@ int smp_request_message_ipi(int virq, int msg) | |||
206 | 206 | ||
207 | #ifdef CONFIG_PPC_SMP_MUXED_IPI | 207 | #ifdef CONFIG_PPC_SMP_MUXED_IPI |
208 | struct cpu_messages { | 208 | struct cpu_messages { |
209 | int messages; /* current messages */ | 209 | long messages; /* current messages */ |
210 | unsigned long data; /* data for cause ipi */ | 210 | unsigned long data; /* data for cause ipi */ |
211 | }; | 211 | }; |
212 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message); | 212 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message); |
@@ -218,7 +218,7 @@ void smp_muxed_ipi_set_data(int cpu, unsigned long data) | |||
218 | info->data = data; | 218 | info->data = data; |
219 | } | 219 | } |
220 | 220 | ||
221 | void smp_muxed_ipi_message_pass(int cpu, int msg) | 221 | void smp_muxed_ipi_set_message(int cpu, int msg) |
222 | { | 222 | { |
223 | struct cpu_messages *info = &per_cpu(ipi_message, cpu); | 223 | struct cpu_messages *info = &per_cpu(ipi_message, cpu); |
224 | char *message = (char *)&info->messages; | 224 | char *message = (char *)&info->messages; |
@@ -228,6 +228,13 @@ void smp_muxed_ipi_message_pass(int cpu, int msg) | |||
228 | */ | 228 | */ |
229 | smp_mb(); | 229 | smp_mb(); |
230 | message[msg] = 1; | 230 | message[msg] = 1; |
231 | } | ||
232 | |||
233 | void smp_muxed_ipi_message_pass(int cpu, int msg) | ||
234 | { | ||
235 | struct cpu_messages *info = &per_cpu(ipi_message, cpu); | ||
236 | |||
237 | smp_muxed_ipi_set_message(cpu, msg); | ||
231 | /* | 238 | /* |
232 | * cause_ipi functions are required to include a full barrier | 239 | * cause_ipi functions are required to include a full barrier |
233 | * before doing whatever causes the IPI. | 240 | * before doing whatever causes the IPI. |
@@ -236,20 +243,31 @@ void smp_muxed_ipi_message_pass(int cpu, int msg) | |||
236 | } | 243 | } |
237 | 244 | ||
238 | #ifdef __BIG_ENDIAN__ | 245 | #ifdef __BIG_ENDIAN__ |
239 | #define IPI_MESSAGE(A) (1 << (24 - 8 * (A))) | 246 | #define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A))) |
240 | #else | 247 | #else |
241 | #define IPI_MESSAGE(A) (1 << (8 * (A))) | 248 | #define IPI_MESSAGE(A) (1uL << (8 * (A))) |
242 | #endif | 249 | #endif |
243 | 250 | ||
244 | irqreturn_t smp_ipi_demux(void) | 251 | irqreturn_t smp_ipi_demux(void) |
245 | { | 252 | { |
246 | struct cpu_messages *info = this_cpu_ptr(&ipi_message); | 253 | struct cpu_messages *info = this_cpu_ptr(&ipi_message); |
247 | unsigned int all; | 254 | unsigned long all; |
248 | 255 | ||
249 | mb(); /* order any irq clear */ | 256 | mb(); /* order any irq clear */ |
250 | 257 | ||
251 | do { | 258 | do { |
252 | all = xchg(&info->messages, 0); | 259 | all = xchg(&info->messages, 0); |
260 | #if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) | ||
261 | /* | ||
262 | * Must check for PPC_MSG_RM_HOST_ACTION messages | ||
263 | * before PPC_MSG_CALL_FUNCTION messages because when | ||
264 | * a VM is destroyed, we call kick_all_cpus_sync() | ||
265 | * to ensure that any pending PPC_MSG_RM_HOST_ACTION | ||
266 | * messages have completed before we free any VCPUs. | ||
267 | */ | ||
268 | if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION)) | ||
269 | kvmppc_xics_ipi_action(); | ||
270 | #endif | ||
253 | if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION)) | 271 | if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION)) |
254 | generic_smp_call_function_interrupt(); | 272 | generic_smp_call_function_interrupt(); |
255 | if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE)) | 273 | if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE)) |
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 0570eef83fba..7f7b6d86ac73 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile | |||
@@ -8,7 +8,7 @@ ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm | |||
8 | KVM := ../../../virt/kvm | 8 | KVM := ../../../virt/kvm |
9 | 9 | ||
10 | common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ | 10 | common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ |
11 | $(KVM)/eventfd.o | 11 | $(KVM)/eventfd.o $(KVM)/vfio.o |
12 | 12 | ||
13 | CFLAGS_e500_mmu.o := -I. | 13 | CFLAGS_e500_mmu.o := -I. |
14 | CFLAGS_e500_mmu_host.o := -I. | 14 | CFLAGS_e500_mmu_host.o := -I. |
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 638c6d9be9e0..b34220d2aa42 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
@@ -807,7 +807,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) | |||
807 | { | 807 | { |
808 | 808 | ||
809 | #ifdef CONFIG_PPC64 | 809 | #ifdef CONFIG_PPC64 |
810 | INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); | 810 | INIT_LIST_HEAD_RCU(&kvm->arch.spapr_tce_tables); |
811 | INIT_LIST_HEAD(&kvm->arch.rtas_tokens); | 811 | INIT_LIST_HEAD(&kvm->arch.rtas_tokens); |
812 | #endif | 812 | #endif |
813 | 813 | ||
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 54cf9bc94dad..2c2d1030843a 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c | |||
@@ -14,6 +14,7 @@ | |||
14 | * | 14 | * |
15 | * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> | 15 | * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> |
16 | * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> | 16 | * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> |
17 | * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com> | ||
17 | */ | 18 | */ |
18 | 19 | ||
19 | #include <linux/types.h> | 20 | #include <linux/types.h> |
@@ -36,28 +37,69 @@ | |||
36 | #include <asm/ppc-opcode.h> | 37 | #include <asm/ppc-opcode.h> |
37 | #include <asm/kvm_host.h> | 38 | #include <asm/kvm_host.h> |
38 | #include <asm/udbg.h> | 39 | #include <asm/udbg.h> |
40 | #include <asm/iommu.h> | ||
41 | #include <asm/tce.h> | ||
39 | 42 | ||
40 | #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) | 43 | static unsigned long kvmppc_tce_pages(unsigned long iommu_pages) |
44 | { | ||
45 | return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; | ||
46 | } | ||
41 | 47 | ||
42 | static long kvmppc_stt_npages(unsigned long window_size) | 48 | static unsigned long kvmppc_stt_pages(unsigned long tce_pages) |
43 | { | 49 | { |
44 | return ALIGN((window_size >> SPAPR_TCE_SHIFT) | 50 | unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) + |
45 | * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; | 51 | (tce_pages * sizeof(struct page *)); |
52 | |||
53 | return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE; | ||
46 | } | 54 | } |
47 | 55 | ||
48 | static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt) | 56 | static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc) |
49 | { | 57 | { |
50 | struct kvm *kvm = stt->kvm; | 58 | long ret = 0; |
51 | int i; | ||
52 | 59 | ||
53 | mutex_lock(&kvm->lock); | 60 | if (!current || !current->mm) |
54 | list_del(&stt->list); | 61 | return ret; /* process exited */ |
55 | for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++) | 62 | |
63 | down_write(¤t->mm->mmap_sem); | ||
64 | |||
65 | if (inc) { | ||
66 | unsigned long locked, lock_limit; | ||
67 | |||
68 | locked = current->mm->locked_vm + stt_pages; | ||
69 | lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | ||
70 | if (locked > lock_limit && !capable(CAP_IPC_LOCK)) | ||
71 | ret = -ENOMEM; | ||
72 | else | ||
73 | current->mm->locked_vm += stt_pages; | ||
74 | } else { | ||
75 | if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm)) | ||
76 | stt_pages = current->mm->locked_vm; | ||
77 | |||
78 | current->mm->locked_vm -= stt_pages; | ||
79 | } | ||
80 | |||
81 | pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid, | ||
82 | inc ? '+' : '-', | ||
83 | stt_pages << PAGE_SHIFT, | ||
84 | current->mm->locked_vm << PAGE_SHIFT, | ||
85 | rlimit(RLIMIT_MEMLOCK), | ||
86 | ret ? " - exceeded" : ""); | ||
87 | |||
88 | up_write(¤t->mm->mmap_sem); | ||
89 | |||
90 | return ret; | ||
91 | } | ||
92 | |||
93 | static void release_spapr_tce_table(struct rcu_head *head) | ||
94 | { | ||
95 | struct kvmppc_spapr_tce_table *stt = container_of(head, | ||
96 | struct kvmppc_spapr_tce_table, rcu); | ||
97 | unsigned long i, npages = kvmppc_tce_pages(stt->size); | ||
98 | |||
99 | for (i = 0; i < npages; i++) | ||
56 | __free_page(stt->pages[i]); | 100 | __free_page(stt->pages[i]); |
57 | kfree(stt); | ||
58 | mutex_unlock(&kvm->lock); | ||
59 | 101 | ||
60 | kvm_put_kvm(kvm); | 102 | kfree(stt); |
61 | } | 103 | } |
62 | 104 | ||
63 | static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 105 | static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
@@ -65,7 +107,7 @@ static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
65 | struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data; | 107 | struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data; |
66 | struct page *page; | 108 | struct page *page; |
67 | 109 | ||
68 | if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size)) | 110 | if (vmf->pgoff >= kvmppc_tce_pages(stt->size)) |
69 | return VM_FAULT_SIGBUS; | 111 | return VM_FAULT_SIGBUS; |
70 | 112 | ||
71 | page = stt->pages[vmf->pgoff]; | 113 | page = stt->pages[vmf->pgoff]; |
@@ -88,7 +130,14 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) | |||
88 | { | 130 | { |
89 | struct kvmppc_spapr_tce_table *stt = filp->private_data; | 131 | struct kvmppc_spapr_tce_table *stt = filp->private_data; |
90 | 132 | ||
91 | release_spapr_tce_table(stt); | 133 | list_del_rcu(&stt->list); |
134 | |||
135 | kvm_put_kvm(stt->kvm); | ||
136 | |||
137 | kvmppc_account_memlimit( | ||
138 | kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false); | ||
139 | call_rcu(&stt->rcu, release_spapr_tce_table); | ||
140 | |||
92 | return 0; | 141 | return 0; |
93 | } | 142 | } |
94 | 143 | ||
@@ -98,20 +147,29 @@ static const struct file_operations kvm_spapr_tce_fops = { | |||
98 | }; | 147 | }; |
99 | 148 | ||
100 | long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | 149 | long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, |
101 | struct kvm_create_spapr_tce *args) | 150 | struct kvm_create_spapr_tce_64 *args) |
102 | { | 151 | { |
103 | struct kvmppc_spapr_tce_table *stt = NULL; | 152 | struct kvmppc_spapr_tce_table *stt = NULL; |
104 | long npages; | 153 | unsigned long npages, size; |
105 | int ret = -ENOMEM; | 154 | int ret = -ENOMEM; |
106 | int i; | 155 | int i; |
107 | 156 | ||
157 | if (!args->size) | ||
158 | return -EINVAL; | ||
159 | |||
108 | /* Check this LIOBN hasn't been previously allocated */ | 160 | /* Check this LIOBN hasn't been previously allocated */ |
109 | list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { | 161 | list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { |
110 | if (stt->liobn == args->liobn) | 162 | if (stt->liobn == args->liobn) |
111 | return -EBUSY; | 163 | return -EBUSY; |
112 | } | 164 | } |
113 | 165 | ||
114 | npages = kvmppc_stt_npages(args->window_size); | 166 | size = args->size; |
167 | npages = kvmppc_tce_pages(size); | ||
168 | ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); | ||
169 | if (ret) { | ||
170 | stt = NULL; | ||
171 | goto fail; | ||
172 | } | ||
115 | 173 | ||
116 | stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), | 174 | stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), |
117 | GFP_KERNEL); | 175 | GFP_KERNEL); |
@@ -119,7 +177,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |||
119 | goto fail; | 177 | goto fail; |
120 | 178 | ||
121 | stt->liobn = args->liobn; | 179 | stt->liobn = args->liobn; |
122 | stt->window_size = args->window_size; | 180 | stt->page_shift = args->page_shift; |
181 | stt->offset = args->offset; | ||
182 | stt->size = size; | ||
123 | stt->kvm = kvm; | 183 | stt->kvm = kvm; |
124 | 184 | ||
125 | for (i = 0; i < npages; i++) { | 185 | for (i = 0; i < npages; i++) { |
@@ -131,7 +191,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |||
131 | kvm_get_kvm(kvm); | 191 | kvm_get_kvm(kvm); |
132 | 192 | ||
133 | mutex_lock(&kvm->lock); | 193 | mutex_lock(&kvm->lock); |
134 | list_add(&stt->list, &kvm->arch.spapr_tce_tables); | 194 | list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); |
135 | 195 | ||
136 | mutex_unlock(&kvm->lock); | 196 | mutex_unlock(&kvm->lock); |
137 | 197 | ||
@@ -148,3 +208,59 @@ fail: | |||
148 | } | 208 | } |
149 | return ret; | 209 | return ret; |
150 | } | 210 | } |
211 | |||
212 | long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, | ||
213 | unsigned long liobn, unsigned long ioba, | ||
214 | unsigned long tce_list, unsigned long npages) | ||
215 | { | ||
216 | struct kvmppc_spapr_tce_table *stt; | ||
217 | long i, ret = H_SUCCESS, idx; | ||
218 | unsigned long entry, ua = 0; | ||
219 | u64 __user *tces, tce; | ||
220 | |||
221 | stt = kvmppc_find_table(vcpu, liobn); | ||
222 | if (!stt) | ||
223 | return H_TOO_HARD; | ||
224 | |||
225 | entry = ioba >> stt->page_shift; | ||
226 | /* | ||
227 | * SPAPR spec says that the maximum size of the list is 512 TCEs | ||
228 | * so the whole table fits in 4K page | ||
229 | */ | ||
230 | if (npages > 512) | ||
231 | return H_PARAMETER; | ||
232 | |||
233 | if (tce_list & (SZ_4K - 1)) | ||
234 | return H_PARAMETER; | ||
235 | |||
236 | ret = kvmppc_ioba_validate(stt, ioba, npages); | ||
237 | if (ret != H_SUCCESS) | ||
238 | return ret; | ||
239 | |||
240 | idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
241 | if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) { | ||
242 | ret = H_TOO_HARD; | ||
243 | goto unlock_exit; | ||
244 | } | ||
245 | tces = (u64 __user *) ua; | ||
246 | |||
247 | for (i = 0; i < npages; ++i) { | ||
248 | if (get_user(tce, tces + i)) { | ||
249 | ret = H_TOO_HARD; | ||
250 | goto unlock_exit; | ||
251 | } | ||
252 | tce = be64_to_cpu(tce); | ||
253 | |||
254 | ret = kvmppc_tce_validate(stt, tce); | ||
255 | if (ret != H_SUCCESS) | ||
256 | goto unlock_exit; | ||
257 | |||
258 | kvmppc_tce_put(stt, entry + i, tce); | ||
259 | } | ||
260 | |||
261 | unlock_exit: | ||
262 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | ||
263 | |||
264 | return ret; | ||
265 | } | ||
266 | EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect); | ||
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 89e96b3e0039..44be73e6aa26 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c | |||
@@ -14,6 +14,7 @@ | |||
14 | * | 14 | * |
15 | * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> | 15 | * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> |
16 | * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> | 16 | * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> |
17 | * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com> | ||
17 | */ | 18 | */ |
18 | 19 | ||
19 | #include <linux/types.h> | 20 | #include <linux/types.h> |
@@ -30,76 +31,321 @@ | |||
30 | #include <asm/kvm_ppc.h> | 31 | #include <asm/kvm_ppc.h> |
31 | #include <asm/kvm_book3s.h> | 32 | #include <asm/kvm_book3s.h> |
32 | #include <asm/mmu-hash64.h> | 33 | #include <asm/mmu-hash64.h> |
34 | #include <asm/mmu_context.h> | ||
33 | #include <asm/hvcall.h> | 35 | #include <asm/hvcall.h> |
34 | #include <asm/synch.h> | 36 | #include <asm/synch.h> |
35 | #include <asm/ppc-opcode.h> | 37 | #include <asm/ppc-opcode.h> |
36 | #include <asm/kvm_host.h> | 38 | #include <asm/kvm_host.h> |
37 | #include <asm/udbg.h> | 39 | #include <asm/udbg.h> |
40 | #include <asm/iommu.h> | ||
41 | #include <asm/tce.h> | ||
42 | #include <asm/iommu.h> | ||
38 | 43 | ||
39 | #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) | 44 | #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) |
40 | 45 | ||
41 | /* WARNING: This will be called in real-mode on HV KVM and virtual | 46 | /* |
47 | * Finds a TCE table descriptor by LIOBN. | ||
48 | * | ||
49 | * WARNING: This will be called in real or virtual mode on HV KVM and virtual | ||
42 | * mode on PR KVM | 50 | * mode on PR KVM |
43 | */ | 51 | */ |
44 | long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | 52 | struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu, |
45 | unsigned long ioba, unsigned long tce) | 53 | unsigned long liobn) |
46 | { | 54 | { |
47 | struct kvm *kvm = vcpu->kvm; | 55 | struct kvm *kvm = vcpu->kvm; |
48 | struct kvmppc_spapr_tce_table *stt; | 56 | struct kvmppc_spapr_tce_table *stt; |
49 | 57 | ||
58 | list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list) | ||
59 | if (stt->liobn == liobn) | ||
60 | return stt; | ||
61 | |||
62 | return NULL; | ||
63 | } | ||
64 | EXPORT_SYMBOL_GPL(kvmppc_find_table); | ||
65 | |||
66 | /* | ||
67 | * Validates IO address. | ||
68 | * | ||
69 | * WARNING: This will be called in real-mode on HV KVM and virtual | ||
70 | * mode on PR KVM | ||
71 | */ | ||
72 | long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt, | ||
73 | unsigned long ioba, unsigned long npages) | ||
74 | { | ||
75 | unsigned long mask = (1ULL << stt->page_shift) - 1; | ||
76 | unsigned long idx = ioba >> stt->page_shift; | ||
77 | |||
78 | if ((ioba & mask) || (idx < stt->offset) || | ||
79 | (idx - stt->offset + npages > stt->size) || | ||
80 | (idx + npages < idx)) | ||
81 | return H_PARAMETER; | ||
82 | |||
83 | return H_SUCCESS; | ||
84 | } | ||
85 | EXPORT_SYMBOL_GPL(kvmppc_ioba_validate); | ||
86 | |||
87 | /* | ||
88 | * Validates TCE address. | ||
89 | * At the moment flags and page mask are validated. | ||
90 | * As the host kernel does not access those addresses (just puts them | ||
91 | * to the table and user space is supposed to process them), we can skip | ||
92 | * checking other things (such as TCE is a guest RAM address or the page | ||
93 | * was actually allocated). | ||
94 | * | ||
95 | * WARNING: This will be called in real-mode on HV KVM and virtual | ||
96 | * mode on PR KVM | ||
97 | */ | ||
98 | long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce) | ||
99 | { | ||
100 | unsigned long page_mask = ~((1ULL << stt->page_shift) - 1); | ||
101 | unsigned long mask = ~(page_mask | TCE_PCI_WRITE | TCE_PCI_READ); | ||
102 | |||
103 | if (tce & mask) | ||
104 | return H_PARAMETER; | ||
105 | |||
106 | return H_SUCCESS; | ||
107 | } | ||
108 | EXPORT_SYMBOL_GPL(kvmppc_tce_validate); | ||
109 | |||
110 | /* Note on the use of page_address() in real mode, | ||
111 | * | ||
112 | * It is safe to use page_address() in real mode on ppc64 because | ||
113 | * page_address() is always defined as lowmem_page_address() | ||
114 | * which returns __va(PFN_PHYS(page_to_pfn(page))) which is arithmetic | ||
115 | * operation and does not access page struct. | ||
116 | * | ||
117 | * Theoretically page_address() could be defined different | ||
118 | * but either WANT_PAGE_VIRTUAL or HASHED_PAGE_VIRTUAL | ||
119 | * would have to be enabled. | ||
120 | * WANT_PAGE_VIRTUAL is never enabled on ppc32/ppc64, | ||
121 | * HASHED_PAGE_VIRTUAL could be enabled for ppc32 only and only | ||
122 | * if CONFIG_HIGHMEM is defined. As CONFIG_SPARSEMEM_VMEMMAP | ||
123 | * is not expected to be enabled on ppc32, page_address() | ||
124 | * is safe for ppc32 as well. | ||
125 | * | ||
126 | * WARNING: This will be called in real-mode on HV KVM and virtual | ||
127 | * mode on PR KVM | ||
128 | */ | ||
129 | static u64 *kvmppc_page_address(struct page *page) | ||
130 | { | ||
131 | #if defined(HASHED_PAGE_VIRTUAL) || defined(WANT_PAGE_VIRTUAL) | ||
132 | #error TODO: fix to avoid page_address() here | ||
133 | #endif | ||
134 | return (u64 *) page_address(page); | ||
135 | } | ||
136 | |||
137 | /* | ||
138 | * Handles TCE requests for emulated devices. | ||
139 | * Puts guest TCE values to the table and expects user space to convert them. | ||
140 | * Called in both real and virtual modes. | ||
141 | * Cannot fail so kvmppc_tce_validate must be called before it. | ||
142 | * | ||
143 | * WARNING: This will be called in real-mode on HV KVM and virtual | ||
144 | * mode on PR KVM | ||
145 | */ | ||
146 | void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, | ||
147 | unsigned long idx, unsigned long tce) | ||
148 | { | ||
149 | struct page *page; | ||
150 | u64 *tbl; | ||
151 | |||
152 | idx -= stt->offset; | ||
153 | page = stt->pages[idx / TCES_PER_PAGE]; | ||
154 | tbl = kvmppc_page_address(page); | ||
155 | |||
156 | tbl[idx % TCES_PER_PAGE] = tce; | ||
157 | } | ||
158 | EXPORT_SYMBOL_GPL(kvmppc_tce_put); | ||
159 | |||
160 | long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, | ||
161 | unsigned long *ua, unsigned long **prmap) | ||
162 | { | ||
163 | unsigned long gfn = gpa >> PAGE_SHIFT; | ||
164 | struct kvm_memory_slot *memslot; | ||
165 | |||
166 | memslot = search_memslots(kvm_memslots(kvm), gfn); | ||
167 | if (!memslot) | ||
168 | return -EINVAL; | ||
169 | |||
170 | *ua = __gfn_to_hva_memslot(memslot, gfn) | | ||
171 | (gpa & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); | ||
172 | |||
173 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | ||
174 | if (prmap) | ||
175 | *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; | ||
176 | #endif | ||
177 | |||
178 | return 0; | ||
179 | } | ||
180 | EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua); | ||
181 | |||
182 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | ||
183 | long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | ||
184 | unsigned long ioba, unsigned long tce) | ||
185 | { | ||
186 | struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn); | ||
187 | long ret; | ||
188 | |||
50 | /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ | 189 | /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ |
51 | /* liobn, ioba, tce); */ | 190 | /* liobn, ioba, tce); */ |
52 | 191 | ||
53 | list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { | 192 | if (!stt) |
54 | if (stt->liobn == liobn) { | 193 | return H_TOO_HARD; |
55 | unsigned long idx = ioba >> SPAPR_TCE_SHIFT; | 194 | |
56 | struct page *page; | 195 | ret = kvmppc_ioba_validate(stt, ioba, 1); |
57 | u64 *tbl; | 196 | if (ret != H_SUCCESS) |
58 | 197 | return ret; | |
59 | /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p window_size=0x%x\n", */ | ||
60 | /* liobn, stt, stt->window_size); */ | ||
61 | if (ioba >= stt->window_size) | ||
62 | return H_PARAMETER; | ||
63 | |||
64 | page = stt->pages[idx / TCES_PER_PAGE]; | ||
65 | tbl = (u64 *)page_address(page); | ||
66 | |||
67 | /* FIXME: Need to validate the TCE itself */ | ||
68 | /* udbg_printf("tce @ %p\n", &tbl[idx % TCES_PER_PAGE]); */ | ||
69 | tbl[idx % TCES_PER_PAGE] = tce; | ||
70 | return H_SUCCESS; | ||
71 | } | ||
72 | } | ||
73 | 198 | ||
74 | /* Didn't find the liobn, punt it to userspace */ | 199 | ret = kvmppc_tce_validate(stt, tce); |
75 | return H_TOO_HARD; | 200 | if (ret != H_SUCCESS) |
201 | return ret; | ||
202 | |||
203 | kvmppc_tce_put(stt, ioba >> stt->page_shift, tce); | ||
204 | |||
205 | return H_SUCCESS; | ||
76 | } | 206 | } |
77 | EXPORT_SYMBOL_GPL(kvmppc_h_put_tce); | 207 | EXPORT_SYMBOL_GPL(kvmppc_h_put_tce); |
78 | 208 | ||
79 | long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | 209 | static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, |
80 | unsigned long ioba) | 210 | unsigned long ua, unsigned long *phpa) |
211 | { | ||
212 | pte_t *ptep, pte; | ||
213 | unsigned shift = 0; | ||
214 | |||
215 | ptep = __find_linux_pte_or_hugepte(vcpu->arch.pgdir, ua, NULL, &shift); | ||
216 | if (!ptep || !pte_present(*ptep)) | ||
217 | return -ENXIO; | ||
218 | pte = *ptep; | ||
219 | |||
220 | if (!shift) | ||
221 | shift = PAGE_SHIFT; | ||
222 | |||
223 | /* Avoid handling anything potentially complicated in realmode */ | ||
224 | if (shift > PAGE_SHIFT) | ||
225 | return -EAGAIN; | ||
226 | |||
227 | if (!pte_young(pte)) | ||
228 | return -EAGAIN; | ||
229 | |||
230 | *phpa = (pte_pfn(pte) << PAGE_SHIFT) | (ua & ((1ULL << shift) - 1)) | | ||
231 | (ua & ~PAGE_MASK); | ||
232 | |||
233 | return 0; | ||
234 | } | ||
235 | |||
236 | long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, | ||
237 | unsigned long liobn, unsigned long ioba, | ||
238 | unsigned long tce_list, unsigned long npages) | ||
81 | { | 239 | { |
82 | struct kvm *kvm = vcpu->kvm; | ||
83 | struct kvmppc_spapr_tce_table *stt; | 240 | struct kvmppc_spapr_tce_table *stt; |
241 | long i, ret = H_SUCCESS; | ||
242 | unsigned long tces, entry, ua = 0; | ||
243 | unsigned long *rmap = NULL; | ||
244 | |||
245 | stt = kvmppc_find_table(vcpu, liobn); | ||
246 | if (!stt) | ||
247 | return H_TOO_HARD; | ||
248 | |||
249 | entry = ioba >> stt->page_shift; | ||
250 | /* | ||
251 | * The spec says that the maximum size of the list is 512 TCEs | ||
252 | * so the whole table addressed resides in 4K page | ||
253 | */ | ||
254 | if (npages > 512) | ||
255 | return H_PARAMETER; | ||
256 | |||
257 | if (tce_list & (SZ_4K - 1)) | ||
258 | return H_PARAMETER; | ||
259 | |||
260 | ret = kvmppc_ioba_validate(stt, ioba, npages); | ||
261 | if (ret != H_SUCCESS) | ||
262 | return ret; | ||
84 | 263 | ||
85 | list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { | 264 | if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) |
86 | if (stt->liobn == liobn) { | 265 | return H_TOO_HARD; |
87 | unsigned long idx = ioba >> SPAPR_TCE_SHIFT; | ||
88 | struct page *page; | ||
89 | u64 *tbl; | ||
90 | 266 | ||
91 | if (ioba >= stt->window_size) | 267 | rmap = (void *) vmalloc_to_phys(rmap); |
92 | return H_PARAMETER; | ||
93 | 268 | ||
94 | page = stt->pages[idx / TCES_PER_PAGE]; | 269 | /* |
95 | tbl = (u64 *)page_address(page); | 270 | * Synchronize with the MMU notifier callbacks in |
271 | * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.). | ||
272 | * While we have the rmap lock, code running on other CPUs | ||
273 | * cannot finish unmapping the host real page that backs | ||
274 | * this guest real page, so we are OK to access the host | ||
275 | * real page. | ||
276 | */ | ||
277 | lock_rmap(rmap); | ||
278 | if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) { | ||
279 | ret = H_TOO_HARD; | ||
280 | goto unlock_exit; | ||
281 | } | ||
282 | |||
283 | for (i = 0; i < npages; ++i) { | ||
284 | unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); | ||
285 | |||
286 | ret = kvmppc_tce_validate(stt, tce); | ||
287 | if (ret != H_SUCCESS) | ||
288 | goto unlock_exit; | ||
96 | 289 | ||
97 | vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE]; | 290 | kvmppc_tce_put(stt, entry + i, tce); |
98 | return H_SUCCESS; | ||
99 | } | ||
100 | } | 291 | } |
101 | 292 | ||
102 | /* Didn't find the liobn, punt it to userspace */ | 293 | unlock_exit: |
103 | return H_TOO_HARD; | 294 | unlock_rmap(rmap); |
295 | |||
296 | return ret; | ||
297 | } | ||
298 | |||
299 | long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, | ||
300 | unsigned long liobn, unsigned long ioba, | ||
301 | unsigned long tce_value, unsigned long npages) | ||
302 | { | ||
303 | struct kvmppc_spapr_tce_table *stt; | ||
304 | long i, ret; | ||
305 | |||
306 | stt = kvmppc_find_table(vcpu, liobn); | ||
307 | if (!stt) | ||
308 | return H_TOO_HARD; | ||
309 | |||
310 | ret = kvmppc_ioba_validate(stt, ioba, npages); | ||
311 | if (ret != H_SUCCESS) | ||
312 | return ret; | ||
313 | |||
314 | /* Check permission bits only to allow userspace poison TCE for debug */ | ||
315 | if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ)) | ||
316 | return H_PARAMETER; | ||
317 | |||
318 | for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) | ||
319 | kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); | ||
320 | |||
321 | return H_SUCCESS; | ||
322 | } | ||
323 | EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce); | ||
324 | |||
325 | long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | ||
326 | unsigned long ioba) | ||
327 | { | ||
328 | struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn); | ||
329 | long ret; | ||
330 | unsigned long idx; | ||
331 | struct page *page; | ||
332 | u64 *tbl; | ||
333 | |||
334 | if (!stt) | ||
335 | return H_TOO_HARD; | ||
336 | |||
337 | ret = kvmppc_ioba_validate(stt, ioba, 1); | ||
338 | if (ret != H_SUCCESS) | ||
339 | return ret; | ||
340 | |||
341 | idx = (ioba >> stt->page_shift) - stt->offset; | ||
342 | page = stt->pages[idx / TCES_PER_PAGE]; | ||
343 | tbl = (u64 *)page_address(page); | ||
344 | |||
345 | vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE]; | ||
346 | |||
347 | return H_SUCCESS; | ||
104 | } | 348 | } |
105 | EXPORT_SYMBOL_GPL(kvmppc_h_get_tce); | 349 | EXPORT_SYMBOL_GPL(kvmppc_h_get_tce); |
350 | |||
351 | #endif /* KVM_BOOK3S_HV_POSSIBLE */ | ||
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index f1187bb6dd4d..84fb4fcfaa41 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -81,6 +81,17 @@ static int target_smt_mode; | |||
81 | module_param(target_smt_mode, int, S_IRUGO | S_IWUSR); | 81 | module_param(target_smt_mode, int, S_IRUGO | S_IWUSR); |
82 | MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)"); | 82 | MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)"); |
83 | 83 | ||
84 | #ifdef CONFIG_KVM_XICS | ||
85 | static struct kernel_param_ops module_param_ops = { | ||
86 | .set = param_set_int, | ||
87 | .get = param_get_int, | ||
88 | }; | ||
89 | |||
90 | module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, | ||
91 | S_IRUGO | S_IWUSR); | ||
92 | MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); | ||
93 | #endif | ||
94 | |||
84 | static void kvmppc_end_cede(struct kvm_vcpu *vcpu); | 95 | static void kvmppc_end_cede(struct kvm_vcpu *vcpu); |
85 | static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); | 96 | static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); |
86 | 97 | ||
@@ -768,7 +779,31 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | |||
768 | if (kvmppc_xics_enabled(vcpu)) { | 779 | if (kvmppc_xics_enabled(vcpu)) { |
769 | ret = kvmppc_xics_hcall(vcpu, req); | 780 | ret = kvmppc_xics_hcall(vcpu, req); |
770 | break; | 781 | break; |
771 | } /* fallthrough */ | 782 | } |
783 | return RESUME_HOST; | ||
784 | case H_PUT_TCE: | ||
785 | ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4), | ||
786 | kvmppc_get_gpr(vcpu, 5), | ||
787 | kvmppc_get_gpr(vcpu, 6)); | ||
788 | if (ret == H_TOO_HARD) | ||
789 | return RESUME_HOST; | ||
790 | break; | ||
791 | case H_PUT_TCE_INDIRECT: | ||
792 | ret = kvmppc_h_put_tce_indirect(vcpu, kvmppc_get_gpr(vcpu, 4), | ||
793 | kvmppc_get_gpr(vcpu, 5), | ||
794 | kvmppc_get_gpr(vcpu, 6), | ||
795 | kvmppc_get_gpr(vcpu, 7)); | ||
796 | if (ret == H_TOO_HARD) | ||
797 | return RESUME_HOST; | ||
798 | break; | ||
799 | case H_STUFF_TCE: | ||
800 | ret = kvmppc_h_stuff_tce(vcpu, kvmppc_get_gpr(vcpu, 4), | ||
801 | kvmppc_get_gpr(vcpu, 5), | ||
802 | kvmppc_get_gpr(vcpu, 6), | ||
803 | kvmppc_get_gpr(vcpu, 7)); | ||
804 | if (ret == H_TOO_HARD) | ||
805 | return RESUME_HOST; | ||
806 | break; | ||
772 | default: | 807 | default: |
773 | return RESUME_HOST; | 808 | return RESUME_HOST; |
774 | } | 809 | } |
@@ -2279,6 +2314,46 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) | |||
2279 | } | 2314 | } |
2280 | 2315 | ||
2281 | /* | 2316 | /* |
2317 | * Clear core from the list of active host cores as we are about to | ||
2318 | * enter the guest. Only do this if it is the primary thread of the | ||
2319 | * core (not if a subcore) that is entering the guest. | ||
2320 | */ | ||
2321 | static inline void kvmppc_clear_host_core(int cpu) | ||
2322 | { | ||
2323 | int core; | ||
2324 | |||
2325 | if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu)) | ||
2326 | return; | ||
2327 | /* | ||
2328 | * Memory barrier can be omitted here as we will do a smp_wmb() | ||
2329 | * later in kvmppc_start_thread and we need ensure that state is | ||
2330 | * visible to other CPUs only after we enter guest. | ||
2331 | */ | ||
2332 | core = cpu >> threads_shift; | ||
2333 | kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0; | ||
2334 | } | ||
2335 | |||
2336 | /* | ||
2337 | * Advertise this core as an active host core since we exited the guest | ||
2338 | * Only need to do this if it is the primary thread of the core that is | ||
2339 | * exiting. | ||
2340 | */ | ||
2341 | static inline void kvmppc_set_host_core(int cpu) | ||
2342 | { | ||
2343 | int core; | ||
2344 | |||
2345 | if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu)) | ||
2346 | return; | ||
2347 | |||
2348 | /* | ||
2349 | * Memory barrier can be omitted here because we do a spin_unlock | ||
2350 | * immediately after this which provides the memory barrier. | ||
2351 | */ | ||
2352 | core = cpu >> threads_shift; | ||
2353 | kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1; | ||
2354 | } | ||
2355 | |||
2356 | /* | ||
2282 | * Run a set of guest threads on a physical core. | 2357 | * Run a set of guest threads on a physical core. |
2283 | * Called with vc->lock held. | 2358 | * Called with vc->lock held. |
2284 | */ | 2359 | */ |
@@ -2390,6 +2465,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) | |||
2390 | } | 2465 | } |
2391 | } | 2466 | } |
2392 | 2467 | ||
2468 | kvmppc_clear_host_core(pcpu); | ||
2469 | |||
2393 | /* Start all the threads */ | 2470 | /* Start all the threads */ |
2394 | active = 0; | 2471 | active = 0; |
2395 | for (sub = 0; sub < core_info.n_subcores; ++sub) { | 2472 | for (sub = 0; sub < core_info.n_subcores; ++sub) { |
@@ -2486,6 +2563,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) | |||
2486 | kvmppc_ipi_thread(pcpu + i); | 2563 | kvmppc_ipi_thread(pcpu + i); |
2487 | } | 2564 | } |
2488 | 2565 | ||
2566 | kvmppc_set_host_core(pcpu); | ||
2567 | |||
2489 | spin_unlock(&vc->lock); | 2568 | spin_unlock(&vc->lock); |
2490 | 2569 | ||
2491 | /* make sure updates to secondary vcpu structs are visible now */ | 2570 | /* make sure updates to secondary vcpu structs are visible now */ |
@@ -2983,6 +3062,114 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) | |||
2983 | goto out_srcu; | 3062 | goto out_srcu; |
2984 | } | 3063 | } |
2985 | 3064 | ||
3065 | #ifdef CONFIG_KVM_XICS | ||
3066 | static int kvmppc_cpu_notify(struct notifier_block *self, unsigned long action, | ||
3067 | void *hcpu) | ||
3068 | { | ||
3069 | unsigned long cpu = (long)hcpu; | ||
3070 | |||
3071 | switch (action) { | ||
3072 | case CPU_UP_PREPARE: | ||
3073 | case CPU_UP_PREPARE_FROZEN: | ||
3074 | kvmppc_set_host_core(cpu); | ||
3075 | break; | ||
3076 | |||
3077 | #ifdef CONFIG_HOTPLUG_CPU | ||
3078 | case CPU_DEAD: | ||
3079 | case CPU_DEAD_FROZEN: | ||
3080 | case CPU_UP_CANCELED: | ||
3081 | case CPU_UP_CANCELED_FROZEN: | ||
3082 | kvmppc_clear_host_core(cpu); | ||
3083 | break; | ||
3084 | #endif | ||
3085 | default: | ||
3086 | break; | ||
3087 | } | ||
3088 | |||
3089 | return NOTIFY_OK; | ||
3090 | } | ||
3091 | |||
3092 | static struct notifier_block kvmppc_cpu_notifier = { | ||
3093 | .notifier_call = kvmppc_cpu_notify, | ||
3094 | }; | ||
3095 | |||
3096 | /* | ||
3097 | * Allocate a per-core structure for managing state about which cores are | ||
3098 | * running in the host versus the guest and for exchanging data between | ||
3099 | * real mode KVM and CPU running in the host. | ||
3100 | * This is only done for the first VM. | ||
3101 | * The allocated structure stays even if all VMs have stopped. | ||
3102 | * It is only freed when the kvm-hv module is unloaded. | ||
3103 | * It's OK for this routine to fail, we just don't support host | ||
3104 | * core operations like redirecting H_IPI wakeups. | ||
3105 | */ | ||
3106 | void kvmppc_alloc_host_rm_ops(void) | ||
3107 | { | ||
3108 | struct kvmppc_host_rm_ops *ops; | ||
3109 | unsigned long l_ops; | ||
3110 | int cpu, core; | ||
3111 | int size; | ||
3112 | |||
3113 | /* Not the first time here ? */ | ||
3114 | if (kvmppc_host_rm_ops_hv != NULL) | ||
3115 | return; | ||
3116 | |||
3117 | ops = kzalloc(sizeof(struct kvmppc_host_rm_ops), GFP_KERNEL); | ||
3118 | if (!ops) | ||
3119 | return; | ||
3120 | |||
3121 | size = cpu_nr_cores() * sizeof(struct kvmppc_host_rm_core); | ||
3122 | ops->rm_core = kzalloc(size, GFP_KERNEL); | ||
3123 | |||
3124 | if (!ops->rm_core) { | ||
3125 | kfree(ops); | ||
3126 | return; | ||
3127 | } | ||
3128 | |||
3129 | get_online_cpus(); | ||
3130 | |||
3131 | for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) { | ||
3132 | if (!cpu_online(cpu)) | ||
3133 | continue; | ||
3134 | |||
3135 | core = cpu >> threads_shift; | ||
3136 | ops->rm_core[core].rm_state.in_host = 1; | ||
3137 | } | ||
3138 | |||
3139 | ops->vcpu_kick = kvmppc_fast_vcpu_kick_hv; | ||
3140 | |||
3141 | /* | ||
3142 | * Make the contents of the kvmppc_host_rm_ops structure visible | ||
3143 | * to other CPUs before we assign it to the global variable. | ||
3144 | * Do an atomic assignment (no locks used here), but if someone | ||
3145 | * beats us to it, just free our copy and return. | ||
3146 | */ | ||
3147 | smp_wmb(); | ||
3148 | l_ops = (unsigned long) ops; | ||
3149 | |||
3150 | if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) { | ||
3151 | put_online_cpus(); | ||
3152 | kfree(ops->rm_core); | ||
3153 | kfree(ops); | ||
3154 | return; | ||
3155 | } | ||
3156 | |||
3157 | register_cpu_notifier(&kvmppc_cpu_notifier); | ||
3158 | |||
3159 | put_online_cpus(); | ||
3160 | } | ||
3161 | |||
3162 | void kvmppc_free_host_rm_ops(void) | ||
3163 | { | ||
3164 | if (kvmppc_host_rm_ops_hv) { | ||
3165 | unregister_cpu_notifier(&kvmppc_cpu_notifier); | ||
3166 | kfree(kvmppc_host_rm_ops_hv->rm_core); | ||
3167 | kfree(kvmppc_host_rm_ops_hv); | ||
3168 | kvmppc_host_rm_ops_hv = NULL; | ||
3169 | } | ||
3170 | } | ||
3171 | #endif | ||
3172 | |||
2986 | static int kvmppc_core_init_vm_hv(struct kvm *kvm) | 3173 | static int kvmppc_core_init_vm_hv(struct kvm *kvm) |
2987 | { | 3174 | { |
2988 | unsigned long lpcr, lpid; | 3175 | unsigned long lpcr, lpid; |
@@ -2995,6 +3182,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) | |||
2995 | return -ENOMEM; | 3182 | return -ENOMEM; |
2996 | kvm->arch.lpid = lpid; | 3183 | kvm->arch.lpid = lpid; |
2997 | 3184 | ||
3185 | kvmppc_alloc_host_rm_ops(); | ||
3186 | |||
2998 | /* | 3187 | /* |
2999 | * Since we don't flush the TLB when tearing down a VM, | 3188 | * Since we don't flush the TLB when tearing down a VM, |
3000 | * and this lpid might have previously been used, | 3189 | * and this lpid might have previously been used, |
@@ -3228,6 +3417,7 @@ static int kvmppc_book3s_init_hv(void) | |||
3228 | 3417 | ||
3229 | static void kvmppc_book3s_exit_hv(void) | 3418 | static void kvmppc_book3s_exit_hv(void) |
3230 | { | 3419 | { |
3420 | kvmppc_free_host_rm_ops(); | ||
3231 | kvmppc_hv_ops = NULL; | 3421 | kvmppc_hv_ops = NULL; |
3232 | } | 3422 | } |
3233 | 3423 | ||
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index fd7006bf6b1a..5f0380db3eab 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c | |||
@@ -283,3 +283,6 @@ void kvmhv_commence_exit(int trap) | |||
283 | kvmhv_interrupt_vcore(vc, ee); | 283 | kvmhv_interrupt_vcore(vc, ee); |
284 | } | 284 | } |
285 | } | 285 | } |
286 | |||
287 | struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv; | ||
288 | EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv); | ||
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 24f58076d49e..980d8a6f7284 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c | |||
@@ -17,12 +17,16 @@ | |||
17 | #include <asm/xics.h> | 17 | #include <asm/xics.h> |
18 | #include <asm/debug.h> | 18 | #include <asm/debug.h> |
19 | #include <asm/synch.h> | 19 | #include <asm/synch.h> |
20 | #include <asm/cputhreads.h> | ||
20 | #include <asm/ppc-opcode.h> | 21 | #include <asm/ppc-opcode.h> |
21 | 22 | ||
22 | #include "book3s_xics.h" | 23 | #include "book3s_xics.h" |
23 | 24 | ||
24 | #define DEBUG_PASSUP | 25 | #define DEBUG_PASSUP |
25 | 26 | ||
27 | int h_ipi_redirect = 1; | ||
28 | EXPORT_SYMBOL(h_ipi_redirect); | ||
29 | |||
26 | static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | 30 | static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, |
27 | u32 new_irq); | 31 | u32 new_irq); |
28 | 32 | ||
@@ -50,11 +54,84 @@ static void ics_rm_check_resend(struct kvmppc_xics *xics, | |||
50 | 54 | ||
51 | /* -- ICP routines -- */ | 55 | /* -- ICP routines -- */ |
52 | 56 | ||
57 | #ifdef CONFIG_SMP | ||
58 | static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) | ||
59 | { | ||
60 | int hcpu; | ||
61 | |||
62 | hcpu = hcore << threads_shift; | ||
63 | kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu; | ||
64 | smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION); | ||
65 | icp_native_cause_ipi_rm(hcpu); | ||
66 | } | ||
67 | #else | ||
68 | static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { } | ||
69 | #endif | ||
70 | |||
71 | /* | ||
72 | * We start the search from our current CPU Id in the core map | ||
73 | * and go in a circle until we get back to our ID looking for a | ||
74 | * core that is running in host context and that hasn't already | ||
75 | * been targeted for another rm_host_ops. | ||
76 | * | ||
77 | * In the future, could consider using a fairer algorithm (one | ||
78 | * that distributes the IPIs better) | ||
79 | * | ||
80 | * Returns -1, if no CPU could be found in the host | ||
81 | * Else, returns a CPU Id which has been reserved for use | ||
82 | */ | ||
83 | static inline int grab_next_hostcore(int start, | ||
84 | struct kvmppc_host_rm_core *rm_core, int max, int action) | ||
85 | { | ||
86 | bool success; | ||
87 | int core; | ||
88 | union kvmppc_rm_state old, new; | ||
89 | |||
90 | for (core = start + 1; core < max; core++) { | ||
91 | old = new = READ_ONCE(rm_core[core].rm_state); | ||
92 | |||
93 | if (!old.in_host || old.rm_action) | ||
94 | continue; | ||
95 | |||
96 | /* Try to grab this host core if not taken already. */ | ||
97 | new.rm_action = action; | ||
98 | |||
99 | success = cmpxchg64(&rm_core[core].rm_state.raw, | ||
100 | old.raw, new.raw) == old.raw; | ||
101 | if (success) { | ||
102 | /* | ||
103 | * Make sure that the store to the rm_action is made | ||
104 | * visible before we return to caller (and the | ||
105 | * subsequent store to rm_data) to synchronize with | ||
106 | * the IPI handler. | ||
107 | */ | ||
108 | smp_wmb(); | ||
109 | return core; | ||
110 | } | ||
111 | } | ||
112 | |||
113 | return -1; | ||
114 | } | ||
115 | |||
116 | static inline int find_available_hostcore(int action) | ||
117 | { | ||
118 | int core; | ||
119 | int my_core = smp_processor_id() >> threads_shift; | ||
120 | struct kvmppc_host_rm_core *rm_core = kvmppc_host_rm_ops_hv->rm_core; | ||
121 | |||
122 | core = grab_next_hostcore(my_core, rm_core, cpu_nr_cores(), action); | ||
123 | if (core == -1) | ||
124 | core = grab_next_hostcore(core, rm_core, my_core, action); | ||
125 | |||
126 | return core; | ||
127 | } | ||
128 | |||
53 | static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, | 129 | static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, |
54 | struct kvm_vcpu *this_vcpu) | 130 | struct kvm_vcpu *this_vcpu) |
55 | { | 131 | { |
56 | struct kvmppc_icp *this_icp = this_vcpu->arch.icp; | 132 | struct kvmppc_icp *this_icp = this_vcpu->arch.icp; |
57 | int cpu; | 133 | int cpu; |
134 | int hcore; | ||
58 | 135 | ||
59 | /* Mark the target VCPU as having an interrupt pending */ | 136 | /* Mark the target VCPU as having an interrupt pending */ |
60 | vcpu->stat.queue_intr++; | 137 | vcpu->stat.queue_intr++; |
@@ -66,11 +143,22 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, | |||
66 | return; | 143 | return; |
67 | } | 144 | } |
68 | 145 | ||
69 | /* Check if the core is loaded, if not, too hard */ | 146 | /* |
147 | * Check if the core is loaded, | ||
148 | * if not, find an available host core to post to wake the VCPU, | ||
149 | * if we can't find one, set up state to eventually return too hard. | ||
150 | */ | ||
70 | cpu = vcpu->arch.thread_cpu; | 151 | cpu = vcpu->arch.thread_cpu; |
71 | if (cpu < 0 || cpu >= nr_cpu_ids) { | 152 | if (cpu < 0 || cpu >= nr_cpu_ids) { |
72 | this_icp->rm_action |= XICS_RM_KICK_VCPU; | 153 | hcore = -1; |
73 | this_icp->rm_kick_target = vcpu; | 154 | if (kvmppc_host_rm_ops_hv && h_ipi_redirect) |
155 | hcore = find_available_hostcore(XICS_RM_KICK_VCPU); | ||
156 | if (hcore != -1) { | ||
157 | icp_send_hcore_msg(hcore, vcpu); | ||
158 | } else { | ||
159 | this_icp->rm_action |= XICS_RM_KICK_VCPU; | ||
160 | this_icp->rm_kick_target = vcpu; | ||
161 | } | ||
74 | return; | 162 | return; |
75 | } | 163 | } |
76 | 164 | ||
@@ -623,3 +711,40 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) | |||
623 | bail: | 711 | bail: |
624 | return check_too_hard(xics, icp); | 712 | return check_too_hard(xics, icp); |
625 | } | 713 | } |
714 | |||
715 | /* --- Non-real mode XICS-related built-in routines --- */ | ||
716 | |||
717 | /** | ||
718 | * Host Operations poked by RM KVM | ||
719 | */ | ||
720 | static void rm_host_ipi_action(int action, void *data) | ||
721 | { | ||
722 | switch (action) { | ||
723 | case XICS_RM_KICK_VCPU: | ||
724 | kvmppc_host_rm_ops_hv->vcpu_kick(data); | ||
725 | break; | ||
726 | default: | ||
727 | WARN(1, "Unexpected rm_action=%d data=%p\n", action, data); | ||
728 | break; | ||
729 | } | ||
730 | |||
731 | } | ||
732 | |||
733 | void kvmppc_xics_ipi_action(void) | ||
734 | { | ||
735 | int core; | ||
736 | unsigned int cpu = smp_processor_id(); | ||
737 | struct kvmppc_host_rm_core *rm_corep; | ||
738 | |||
739 | core = cpu >> threads_shift; | ||
740 | rm_corep = &kvmppc_host_rm_ops_hv->rm_core[core]; | ||
741 | |||
742 | if (rm_corep->rm_data) { | ||
743 | rm_host_ipi_action(rm_corep->rm_state.rm_action, | ||
744 | rm_corep->rm_data); | ||
745 | /* Order these stores against the real mode KVM */ | ||
746 | rm_corep->rm_data = NULL; | ||
747 | smp_wmb(); | ||
748 | rm_corep->rm_state.rm_action = 0; | ||
749 | } | ||
750 | } | ||
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 25ae2c9913c3..85b32f16fa74 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
@@ -2020,8 +2020,8 @@ hcall_real_table: | |||
2020 | .long 0 /* 0x12c */ | 2020 | .long 0 /* 0x12c */ |
2021 | .long 0 /* 0x130 */ | 2021 | .long 0 /* 0x130 */ |
2022 | .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table | 2022 | .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table |
2023 | .long 0 /* 0x138 */ | 2023 | .long DOTSYM(kvmppc_h_stuff_tce) - hcall_real_table |
2024 | .long 0 /* 0x13c */ | 2024 | .long DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table |
2025 | .long 0 /* 0x140 */ | 2025 | .long 0 /* 0x140 */ |
2026 | .long 0 /* 0x144 */ | 2026 | .long 0 /* 0x144 */ |
2027 | .long 0 /* 0x148 */ | 2027 | .long 0 /* 0x148 */ |
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index f2c75a1e0536..02176fd52f84 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c | |||
@@ -280,6 +280,37 @@ static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu) | |||
280 | return EMULATE_DONE; | 280 | return EMULATE_DONE; |
281 | } | 281 | } |
282 | 282 | ||
283 | static int kvmppc_h_pr_put_tce_indirect(struct kvm_vcpu *vcpu) | ||
284 | { | ||
285 | unsigned long liobn = kvmppc_get_gpr(vcpu, 4); | ||
286 | unsigned long ioba = kvmppc_get_gpr(vcpu, 5); | ||
287 | unsigned long tce = kvmppc_get_gpr(vcpu, 6); | ||
288 | unsigned long npages = kvmppc_get_gpr(vcpu, 7); | ||
289 | long rc; | ||
290 | |||
291 | rc = kvmppc_h_put_tce_indirect(vcpu, liobn, ioba, | ||
292 | tce, npages); | ||
293 | if (rc == H_TOO_HARD) | ||
294 | return EMULATE_FAIL; | ||
295 | kvmppc_set_gpr(vcpu, 3, rc); | ||
296 | return EMULATE_DONE; | ||
297 | } | ||
298 | |||
299 | static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu) | ||
300 | { | ||
301 | unsigned long liobn = kvmppc_get_gpr(vcpu, 4); | ||
302 | unsigned long ioba = kvmppc_get_gpr(vcpu, 5); | ||
303 | unsigned long tce_value = kvmppc_get_gpr(vcpu, 6); | ||
304 | unsigned long npages = kvmppc_get_gpr(vcpu, 7); | ||
305 | long rc; | ||
306 | |||
307 | rc = kvmppc_h_stuff_tce(vcpu, liobn, ioba, tce_value, npages); | ||
308 | if (rc == H_TOO_HARD) | ||
309 | return EMULATE_FAIL; | ||
310 | kvmppc_set_gpr(vcpu, 3, rc); | ||
311 | return EMULATE_DONE; | ||
312 | } | ||
313 | |||
283 | static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) | 314 | static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) |
284 | { | 315 | { |
285 | long rc = kvmppc_xics_hcall(vcpu, cmd); | 316 | long rc = kvmppc_xics_hcall(vcpu, cmd); |
@@ -306,6 +337,10 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) | |||
306 | return kvmppc_h_pr_bulk_remove(vcpu); | 337 | return kvmppc_h_pr_bulk_remove(vcpu); |
307 | case H_PUT_TCE: | 338 | case H_PUT_TCE: |
308 | return kvmppc_h_pr_put_tce(vcpu); | 339 | return kvmppc_h_pr_put_tce(vcpu); |
340 | case H_PUT_TCE_INDIRECT: | ||
341 | return kvmppc_h_pr_put_tce_indirect(vcpu); | ||
342 | case H_STUFF_TCE: | ||
343 | return kvmppc_h_pr_stuff_tce(vcpu); | ||
309 | case H_CEDE: | 344 | case H_CEDE: |
310 | kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE); | 345 | kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE); |
311 | kvm_vcpu_block(vcpu); | 346 | kvm_vcpu_block(vcpu); |
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index a3b182dcb823..19aa59b0850c 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <asm/tlbflush.h> | 33 | #include <asm/tlbflush.h> |
34 | #include <asm/cputhreads.h> | 34 | #include <asm/cputhreads.h> |
35 | #include <asm/irqflags.h> | 35 | #include <asm/irqflags.h> |
36 | #include <asm/iommu.h> | ||
36 | #include "timing.h" | 37 | #include "timing.h" |
37 | #include "irq.h" | 38 | #include "irq.h" |
38 | #include "../mm/mmu_decl.h" | 39 | #include "../mm/mmu_decl.h" |
@@ -437,6 +438,16 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
437 | unsigned int i; | 438 | unsigned int i; |
438 | struct kvm_vcpu *vcpu; | 439 | struct kvm_vcpu *vcpu; |
439 | 440 | ||
441 | #ifdef CONFIG_KVM_XICS | ||
442 | /* | ||
443 | * We call kick_all_cpus_sync() to ensure that all | ||
444 | * CPUs have executed any pending IPIs before we | ||
445 | * continue and free VCPUs structures below. | ||
446 | */ | ||
447 | if (is_kvmppc_hv_enabled(kvm)) | ||
448 | kick_all_cpus_sync(); | ||
449 | #endif | ||
450 | |||
440 | kvm_for_each_vcpu(i, vcpu, kvm) | 451 | kvm_for_each_vcpu(i, vcpu, kvm) |
441 | kvm_arch_vcpu_free(vcpu); | 452 | kvm_arch_vcpu_free(vcpu); |
442 | 453 | ||
@@ -509,6 +520,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
509 | 520 | ||
510 | #ifdef CONFIG_PPC_BOOK3S_64 | 521 | #ifdef CONFIG_PPC_BOOK3S_64 |
511 | case KVM_CAP_SPAPR_TCE: | 522 | case KVM_CAP_SPAPR_TCE: |
523 | case KVM_CAP_SPAPR_TCE_64: | ||
512 | case KVM_CAP_PPC_ALLOC_HTAB: | 524 | case KVM_CAP_PPC_ALLOC_HTAB: |
513 | case KVM_CAP_PPC_RTAS: | 525 | case KVM_CAP_PPC_RTAS: |
514 | case KVM_CAP_PPC_FIXUP_HCALL: | 526 | case KVM_CAP_PPC_FIXUP_HCALL: |
@@ -569,6 +581,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
569 | case KVM_CAP_PPC_GET_SMMU_INFO: | 581 | case KVM_CAP_PPC_GET_SMMU_INFO: |
570 | r = 1; | 582 | r = 1; |
571 | break; | 583 | break; |
584 | case KVM_CAP_SPAPR_MULTITCE: | ||
585 | r = 1; | ||
586 | break; | ||
572 | #endif | 587 | #endif |
573 | default: | 588 | default: |
574 | r = 0; | 589 | r = 0; |
@@ -1331,13 +1346,34 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
1331 | break; | 1346 | break; |
1332 | } | 1347 | } |
1333 | #ifdef CONFIG_PPC_BOOK3S_64 | 1348 | #ifdef CONFIG_PPC_BOOK3S_64 |
1349 | case KVM_CREATE_SPAPR_TCE_64: { | ||
1350 | struct kvm_create_spapr_tce_64 create_tce_64; | ||
1351 | |||
1352 | r = -EFAULT; | ||
1353 | if (copy_from_user(&create_tce_64, argp, sizeof(create_tce_64))) | ||
1354 | goto out; | ||
1355 | if (create_tce_64.flags) { | ||
1356 | r = -EINVAL; | ||
1357 | goto out; | ||
1358 | } | ||
1359 | r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64); | ||
1360 | goto out; | ||
1361 | } | ||
1334 | case KVM_CREATE_SPAPR_TCE: { | 1362 | case KVM_CREATE_SPAPR_TCE: { |
1335 | struct kvm_create_spapr_tce create_tce; | 1363 | struct kvm_create_spapr_tce create_tce; |
1364 | struct kvm_create_spapr_tce_64 create_tce_64; | ||
1336 | 1365 | ||
1337 | r = -EFAULT; | 1366 | r = -EFAULT; |
1338 | if (copy_from_user(&create_tce, argp, sizeof(create_tce))) | 1367 | if (copy_from_user(&create_tce, argp, sizeof(create_tce))) |
1339 | goto out; | 1368 | goto out; |
1340 | r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce); | 1369 | |
1370 | create_tce_64.liobn = create_tce.liobn; | ||
1371 | create_tce_64.page_shift = IOMMU_PAGE_SHIFT_4K; | ||
1372 | create_tce_64.offset = 0; | ||
1373 | create_tce_64.size = create_tce.window_size >> | ||
1374 | IOMMU_PAGE_SHIFT_4K; | ||
1375 | create_tce_64.flags = 0; | ||
1376 | r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64); | ||
1341 | goto out; | 1377 | goto out; |
1342 | } | 1378 | } |
1343 | case KVM_PPC_GET_SMMU_INFO: { | 1379 | case KVM_PPC_GET_SMMU_INFO: { |
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 83dfd7925c72..de37ff445362 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c | |||
@@ -243,3 +243,11 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr) | |||
243 | } | 243 | } |
244 | #endif /* CONFIG_DEBUG_VM */ | 244 | #endif /* CONFIG_DEBUG_VM */ |
245 | 245 | ||
246 | unsigned long vmalloc_to_phys(void *va) | ||
247 | { | ||
248 | unsigned long pfn = vmalloc_to_pfn(va); | ||
249 | |||
250 | BUG_ON(!pfn); | ||
251 | return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va); | ||
252 | } | ||
253 | EXPORT_SYMBOL_GPL(vmalloc_to_phys); | ||
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 9f9dfda9ed2c..3b09ecfd0aee 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c | |||
@@ -493,14 +493,6 @@ static size_t event_to_attr_ct(struct hv_24x7_event_data *event) | |||
493 | } | 493 | } |
494 | } | 494 | } |
495 | 495 | ||
496 | static unsigned long vmalloc_to_phys(void *v) | ||
497 | { | ||
498 | struct page *p = vmalloc_to_page(v); | ||
499 | |||
500 | BUG_ON(!p); | ||
501 | return page_to_phys(p) + offset_in_page(v); | ||
502 | } | ||
503 | |||
504 | /* */ | 496 | /* */ |
505 | struct event_uniq { | 497 | struct event_uniq { |
506 | struct rb_node node; | 498 | struct rb_node node; |
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index eae32654bdf2..afdf62f2a695 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c | |||
@@ -159,6 +159,27 @@ static void icp_native_cause_ipi(int cpu, unsigned long data) | |||
159 | icp_native_set_qirr(cpu, IPI_PRIORITY); | 159 | icp_native_set_qirr(cpu, IPI_PRIORITY); |
160 | } | 160 | } |
161 | 161 | ||
162 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | ||
163 | void icp_native_cause_ipi_rm(int cpu) | ||
164 | { | ||
165 | /* | ||
166 | * Currently not used to send IPIs to another CPU | ||
167 | * on the same core. Only caller is KVM real mode. | ||
168 | * Need the physical address of the XICS to be | ||
169 | * previously saved in kvm_hstate in the paca. | ||
170 | */ | ||
171 | unsigned long xics_phys; | ||
172 | |||
173 | /* | ||
174 | * Just like the cause_ipi functions, it is required to | ||
175 | * include a full barrier (out8 includes a sync) before | ||
176 | * causing the IPI. | ||
177 | */ | ||
178 | xics_phys = paca[cpu].kvm_hstate.xics_phys; | ||
179 | out_rm8((u8 *)(xics_phys + XICS_MFRR), IPI_PRIORITY); | ||
180 | } | ||
181 | #endif | ||
182 | |||
162 | /* | 183 | /* |
163 | * Called when an interrupt is received on an off-line CPU to | 184 | * Called when an interrupt is received on an off-line CPU to |
164 | * clear the interrupt, so that the CPU can go back to nap mode. | 185 | * clear the interrupt, so that the CPU can go back to nap mode. |
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index b0c8ad0799c7..6da41fab70fb 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/kvm_types.h> | 20 | #include <linux/kvm_types.h> |
21 | #include <linux/kvm_host.h> | 21 | #include <linux/kvm_host.h> |
22 | #include <linux/kvm.h> | 22 | #include <linux/kvm.h> |
23 | #include <linux/seqlock.h> | ||
23 | #include <asm/debug.h> | 24 | #include <asm/debug.h> |
24 | #include <asm/cpu.h> | 25 | #include <asm/cpu.h> |
25 | #include <asm/fpu/api.h> | 26 | #include <asm/fpu/api.h> |
@@ -229,17 +230,11 @@ struct kvm_s390_itdb { | |||
229 | __u8 data[256]; | 230 | __u8 data[256]; |
230 | } __packed; | 231 | } __packed; |
231 | 232 | ||
232 | struct kvm_s390_vregs { | ||
233 | __vector128 vrs[32]; | ||
234 | __u8 reserved200[512]; /* for future vector expansion */ | ||
235 | } __packed; | ||
236 | |||
237 | struct sie_page { | 233 | struct sie_page { |
238 | struct kvm_s390_sie_block sie_block; | 234 | struct kvm_s390_sie_block sie_block; |
239 | __u8 reserved200[1024]; /* 0x0200 */ | 235 | __u8 reserved200[1024]; /* 0x0200 */ |
240 | struct kvm_s390_itdb itdb; /* 0x0600 */ | 236 | struct kvm_s390_itdb itdb; /* 0x0600 */ |
241 | __u8 reserved700[1280]; /* 0x0700 */ | 237 | __u8 reserved700[2304]; /* 0x0700 */ |
242 | struct kvm_s390_vregs vregs; /* 0x0c00 */ | ||
243 | } __packed; | 238 | } __packed; |
244 | 239 | ||
245 | struct kvm_vcpu_stat { | 240 | struct kvm_vcpu_stat { |
@@ -558,6 +553,15 @@ struct kvm_vcpu_arch { | |||
558 | unsigned long pfault_token; | 553 | unsigned long pfault_token; |
559 | unsigned long pfault_select; | 554 | unsigned long pfault_select; |
560 | unsigned long pfault_compare; | 555 | unsigned long pfault_compare; |
556 | bool cputm_enabled; | ||
557 | /* | ||
558 | * The seqcount protects updates to cputm_start and sie_block.cputm, | ||
559 | * this way we can have non-blocking reads with consistent values. | ||
560 | * Only the owning VCPU thread (vcpu->cpu) is allowed to change these | ||
561 | * values and to start/stop/enable/disable cpu timer accounting. | ||
562 | */ | ||
563 | seqcount_t cputm_seqcount; | ||
564 | __u64 cputm_start; | ||
561 | }; | 565 | }; |
562 | 566 | ||
563 | struct kvm_vm_stat { | 567 | struct kvm_vm_stat { |
@@ -596,15 +600,11 @@ struct s390_io_adapter { | |||
596 | #define S390_ARCH_FAC_MASK_SIZE_U64 \ | 600 | #define S390_ARCH_FAC_MASK_SIZE_U64 \ |
597 | (S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64)) | 601 | (S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64)) |
598 | 602 | ||
599 | struct kvm_s390_fac { | ||
600 | /* facility list requested by guest */ | ||
601 | __u64 list[S390_ARCH_FAC_LIST_SIZE_U64]; | ||
602 | /* facility mask supported by kvm & hosting machine */ | ||
603 | __u64 mask[S390_ARCH_FAC_LIST_SIZE_U64]; | ||
604 | }; | ||
605 | |||
606 | struct kvm_s390_cpu_model { | 603 | struct kvm_s390_cpu_model { |
607 | struct kvm_s390_fac *fac; | 604 | /* facility mask supported by kvm & hosting machine */ |
605 | __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64]; | ||
606 | /* facility list requested by guest (in dma page) */ | ||
607 | __u64 *fac_list; | ||
608 | struct cpuid cpu_id; | 608 | struct cpuid cpu_id; |
609 | unsigned short ibc; | 609 | unsigned short ibc; |
610 | }; | 610 | }; |
@@ -623,6 +623,16 @@ struct kvm_s390_crypto_cb { | |||
623 | __u8 reserved80[128]; /* 0x0080 */ | 623 | __u8 reserved80[128]; /* 0x0080 */ |
624 | }; | 624 | }; |
625 | 625 | ||
626 | /* | ||
627 | * sie_page2 has to be allocated as DMA because fac_list and crycb need | ||
628 | * 31bit addresses in the sie control block. | ||
629 | */ | ||
630 | struct sie_page2 { | ||
631 | __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */ | ||
632 | struct kvm_s390_crypto_cb crycb; /* 0x0800 */ | ||
633 | u8 reserved900[0x1000 - 0x900]; /* 0x0900 */ | ||
634 | } __packed; | ||
635 | |||
626 | struct kvm_arch{ | 636 | struct kvm_arch{ |
627 | void *sca; | 637 | void *sca; |
628 | int use_esca; | 638 | int use_esca; |
@@ -643,6 +653,7 @@ struct kvm_arch{ | |||
643 | int ipte_lock_count; | 653 | int ipte_lock_count; |
644 | struct mutex ipte_mutex; | 654 | struct mutex ipte_mutex; |
645 | spinlock_t start_stop_lock; | 655 | spinlock_t start_stop_lock; |
656 | struct sie_page2 *sie_page2; | ||
646 | struct kvm_s390_cpu_model model; | 657 | struct kvm_s390_cpu_model model; |
647 | struct kvm_s390_crypto crypto; | 658 | struct kvm_s390_crypto crypto; |
648 | u64 epoch; | 659 | u64 epoch; |
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index fe84bd5fe7ce..347fe5afa419 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h | |||
@@ -154,6 +154,7 @@ struct kvm_guest_debug_arch { | |||
154 | #define KVM_SYNC_PFAULT (1UL << 5) | 154 | #define KVM_SYNC_PFAULT (1UL << 5) |
155 | #define KVM_SYNC_VRS (1UL << 6) | 155 | #define KVM_SYNC_VRS (1UL << 6) |
156 | #define KVM_SYNC_RICCB (1UL << 7) | 156 | #define KVM_SYNC_RICCB (1UL << 7) |
157 | #define KVM_SYNC_FPRS (1UL << 8) | ||
157 | /* definition of registers in kvm_run */ | 158 | /* definition of registers in kvm_run */ |
158 | struct kvm_sync_regs { | 159 | struct kvm_sync_regs { |
159 | __u64 prefix; /* prefix register */ | 160 | __u64 prefix; /* prefix register */ |
@@ -168,9 +169,12 @@ struct kvm_sync_regs { | |||
168 | __u64 pft; /* pfault token [PFAULT] */ | 169 | __u64 pft; /* pfault token [PFAULT] */ |
169 | __u64 pfs; /* pfault select [PFAULT] */ | 170 | __u64 pfs; /* pfault select [PFAULT] */ |
170 | __u64 pfc; /* pfault compare [PFAULT] */ | 171 | __u64 pfc; /* pfault compare [PFAULT] */ |
171 | __u64 vrs[32][2]; /* vector registers */ | 172 | union { |
173 | __u64 vrs[32][2]; /* vector registers (KVM_SYNC_VRS) */ | ||
174 | __u64 fprs[16]; /* fp registers (KVM_SYNC_FPRS) */ | ||
175 | }; | ||
172 | __u8 reserved[512]; /* for future vector expansion */ | 176 | __u8 reserved[512]; /* for future vector expansion */ |
173 | __u32 fpc; /* only valid with vector registers */ | 177 | __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ |
174 | __u8 padding[52]; /* riccb needs to be 64byte aligned */ | 178 | __u8 padding[52]; /* riccb needs to be 64byte aligned */ |
175 | __u8 riccb[64]; /* runtime instrumentation controls block */ | 179 | __u8 riccb[64]; /* runtime instrumentation controls block */ |
176 | }; | 180 | }; |
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h index ee69c0854c88..5dbaa72baa64 100644 --- a/arch/s390/include/uapi/asm/sie.h +++ b/arch/s390/include/uapi/asm/sie.h | |||
@@ -7,6 +7,7 @@ | |||
7 | { 0x9c, "DIAG (0x9c) time slice end directed" }, \ | 7 | { 0x9c, "DIAG (0x9c) time slice end directed" }, \ |
8 | { 0x204, "DIAG (0x204) logical-cpu utilization" }, \ | 8 | { 0x204, "DIAG (0x204) logical-cpu utilization" }, \ |
9 | { 0x258, "DIAG (0x258) page-reference services" }, \ | 9 | { 0x258, "DIAG (0x258) page-reference services" }, \ |
10 | { 0x288, "DIAG (0x288) watchdog functions" }, \ | ||
10 | { 0x308, "DIAG (0x308) ipl functions" }, \ | 11 | { 0x308, "DIAG (0x308) ipl functions" }, \ |
11 | { 0x500, "DIAG (0x500) KVM virtio functions" }, \ | 12 | { 0x500, "DIAG (0x500) KVM virtio functions" }, \ |
12 | { 0x501, "DIAG (0x501) KVM breakpoint" } | 13 | { 0x501, "DIAG (0x501) KVM breakpoint" } |
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index d30db40437dc..66938d283b77 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c | |||
@@ -373,7 +373,7 @@ void ipte_unlock(struct kvm_vcpu *vcpu) | |||
373 | } | 373 | } |
374 | 374 | ||
375 | static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar, | 375 | static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar, |
376 | int write) | 376 | enum gacc_mode mode) |
377 | { | 377 | { |
378 | union alet alet; | 378 | union alet alet; |
379 | struct ale ale; | 379 | struct ale ale; |
@@ -454,7 +454,7 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar, | |||
454 | } | 454 | } |
455 | } | 455 | } |
456 | 456 | ||
457 | if (ale.fo == 1 && write) | 457 | if (ale.fo == 1 && mode == GACC_STORE) |
458 | return PGM_PROTECTION; | 458 | return PGM_PROTECTION; |
459 | 459 | ||
460 | asce->val = aste.asce; | 460 | asce->val = aste.asce; |
@@ -477,25 +477,28 @@ enum { | |||
477 | }; | 477 | }; |
478 | 478 | ||
479 | static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, | 479 | static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, |
480 | ar_t ar, int write) | 480 | ar_t ar, enum gacc_mode mode) |
481 | { | 481 | { |
482 | int rc; | 482 | int rc; |
483 | psw_t *psw = &vcpu->arch.sie_block->gpsw; | 483 | struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); |
484 | struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; | 484 | struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; |
485 | struct trans_exc_code_bits *tec_bits; | 485 | struct trans_exc_code_bits *tec_bits; |
486 | 486 | ||
487 | memset(pgm, 0, sizeof(*pgm)); | 487 | memset(pgm, 0, sizeof(*pgm)); |
488 | tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; | 488 | tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; |
489 | tec_bits->fsi = write ? FSI_STORE : FSI_FETCH; | 489 | tec_bits->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; |
490 | tec_bits->as = psw_bits(*psw).as; | 490 | tec_bits->as = psw.as; |
491 | 491 | ||
492 | if (!psw_bits(*psw).t) { | 492 | if (!psw.t) { |
493 | asce->val = 0; | 493 | asce->val = 0; |
494 | asce->r = 1; | 494 | asce->r = 1; |
495 | return 0; | 495 | return 0; |
496 | } | 496 | } |
497 | 497 | ||
498 | switch (psw_bits(vcpu->arch.sie_block->gpsw).as) { | 498 | if (mode == GACC_IFETCH) |
499 | psw.as = psw.as == PSW_AS_HOME ? PSW_AS_HOME : PSW_AS_PRIMARY; | ||
500 | |||
501 | switch (psw.as) { | ||
499 | case PSW_AS_PRIMARY: | 502 | case PSW_AS_PRIMARY: |
500 | asce->val = vcpu->arch.sie_block->gcr[1]; | 503 | asce->val = vcpu->arch.sie_block->gcr[1]; |
501 | return 0; | 504 | return 0; |
@@ -506,7 +509,7 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, | |||
506 | asce->val = vcpu->arch.sie_block->gcr[13]; | 509 | asce->val = vcpu->arch.sie_block->gcr[13]; |
507 | return 0; | 510 | return 0; |
508 | case PSW_AS_ACCREG: | 511 | case PSW_AS_ACCREG: |
509 | rc = ar_translation(vcpu, asce, ar, write); | 512 | rc = ar_translation(vcpu, asce, ar, mode); |
510 | switch (rc) { | 513 | switch (rc) { |
511 | case PGM_ALEN_TRANSLATION: | 514 | case PGM_ALEN_TRANSLATION: |
512 | case PGM_ALE_SEQUENCE: | 515 | case PGM_ALE_SEQUENCE: |
@@ -538,7 +541,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) | |||
538 | * @gva: guest virtual address | 541 | * @gva: guest virtual address |
539 | * @gpa: points to where guest physical (absolute) address should be stored | 542 | * @gpa: points to where guest physical (absolute) address should be stored |
540 | * @asce: effective asce | 543 | * @asce: effective asce |
541 | * @write: indicates if access is a write access | 544 | * @mode: indicates the access mode to be used |
542 | * | 545 | * |
543 | * Translate a guest virtual address into a guest absolute address by means | 546 | * Translate a guest virtual address into a guest absolute address by means |
544 | * of dynamic address translation as specified by the architecture. | 547 | * of dynamic address translation as specified by the architecture. |
@@ -554,7 +557,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) | |||
554 | */ | 557 | */ |
555 | static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, | 558 | static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, |
556 | unsigned long *gpa, const union asce asce, | 559 | unsigned long *gpa, const union asce asce, |
557 | int write) | 560 | enum gacc_mode mode) |
558 | { | 561 | { |
559 | union vaddress vaddr = {.addr = gva}; | 562 | union vaddress vaddr = {.addr = gva}; |
560 | union raddress raddr = {.addr = gva}; | 563 | union raddress raddr = {.addr = gva}; |
@@ -699,7 +702,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, | |||
699 | real_address: | 702 | real_address: |
700 | raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr); | 703 | raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr); |
701 | absolute_address: | 704 | absolute_address: |
702 | if (write && dat_protection) | 705 | if (mode == GACC_STORE && dat_protection) |
703 | return PGM_PROTECTION; | 706 | return PGM_PROTECTION; |
704 | if (kvm_is_error_gpa(vcpu->kvm, raddr.addr)) | 707 | if (kvm_is_error_gpa(vcpu->kvm, raddr.addr)) |
705 | return PGM_ADDRESSING; | 708 | return PGM_ADDRESSING; |
@@ -728,7 +731,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, | |||
728 | 731 | ||
729 | static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, | 732 | static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, |
730 | unsigned long *pages, unsigned long nr_pages, | 733 | unsigned long *pages, unsigned long nr_pages, |
731 | const union asce asce, int write) | 734 | const union asce asce, enum gacc_mode mode) |
732 | { | 735 | { |
733 | struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; | 736 | struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; |
734 | psw_t *psw = &vcpu->arch.sie_block->gpsw; | 737 | psw_t *psw = &vcpu->arch.sie_block->gpsw; |
@@ -740,13 +743,13 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, | |||
740 | while (nr_pages) { | 743 | while (nr_pages) { |
741 | ga = kvm_s390_logical_to_effective(vcpu, ga); | 744 | ga = kvm_s390_logical_to_effective(vcpu, ga); |
742 | tec_bits->addr = ga >> PAGE_SHIFT; | 745 | tec_bits->addr = ga >> PAGE_SHIFT; |
743 | if (write && lap_enabled && is_low_address(ga)) { | 746 | if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) { |
744 | pgm->code = PGM_PROTECTION; | 747 | pgm->code = PGM_PROTECTION; |
745 | return pgm->code; | 748 | return pgm->code; |
746 | } | 749 | } |
747 | ga &= PAGE_MASK; | 750 | ga &= PAGE_MASK; |
748 | if (psw_bits(*psw).t) { | 751 | if (psw_bits(*psw).t) { |
749 | rc = guest_translate(vcpu, ga, pages, asce, write); | 752 | rc = guest_translate(vcpu, ga, pages, asce, mode); |
750 | if (rc < 0) | 753 | if (rc < 0) |
751 | return rc; | 754 | return rc; |
752 | if (rc == PGM_PROTECTION) | 755 | if (rc == PGM_PROTECTION) |
@@ -768,7 +771,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, | |||
768 | } | 771 | } |
769 | 772 | ||
770 | int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, | 773 | int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, |
771 | unsigned long len, int write) | 774 | unsigned long len, enum gacc_mode mode) |
772 | { | 775 | { |
773 | psw_t *psw = &vcpu->arch.sie_block->gpsw; | 776 | psw_t *psw = &vcpu->arch.sie_block->gpsw; |
774 | unsigned long _len, nr_pages, gpa, idx; | 777 | unsigned long _len, nr_pages, gpa, idx; |
@@ -780,7 +783,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, | |||
780 | 783 | ||
781 | if (!len) | 784 | if (!len) |
782 | return 0; | 785 | return 0; |
783 | rc = get_vcpu_asce(vcpu, &asce, ar, write); | 786 | rc = get_vcpu_asce(vcpu, &asce, ar, mode); |
784 | if (rc) | 787 | if (rc) |
785 | return rc; | 788 | return rc; |
786 | nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; | 789 | nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; |
@@ -792,11 +795,11 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, | |||
792 | need_ipte_lock = psw_bits(*psw).t && !asce.r; | 795 | need_ipte_lock = psw_bits(*psw).t && !asce.r; |
793 | if (need_ipte_lock) | 796 | if (need_ipte_lock) |
794 | ipte_lock(vcpu); | 797 | ipte_lock(vcpu); |
795 | rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, write); | 798 | rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, mode); |
796 | for (idx = 0; idx < nr_pages && !rc; idx++) { | 799 | for (idx = 0; idx < nr_pages && !rc; idx++) { |
797 | gpa = *(pages + idx) + (ga & ~PAGE_MASK); | 800 | gpa = *(pages + idx) + (ga & ~PAGE_MASK); |
798 | _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); | 801 | _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); |
799 | if (write) | 802 | if (mode == GACC_STORE) |
800 | rc = kvm_write_guest(vcpu->kvm, gpa, data, _len); | 803 | rc = kvm_write_guest(vcpu->kvm, gpa, data, _len); |
801 | else | 804 | else |
802 | rc = kvm_read_guest(vcpu->kvm, gpa, data, _len); | 805 | rc = kvm_read_guest(vcpu->kvm, gpa, data, _len); |
@@ -812,7 +815,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, | |||
812 | } | 815 | } |
813 | 816 | ||
814 | int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, | 817 | int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, |
815 | void *data, unsigned long len, int write) | 818 | void *data, unsigned long len, enum gacc_mode mode) |
816 | { | 819 | { |
817 | unsigned long _len, gpa; | 820 | unsigned long _len, gpa; |
818 | int rc = 0; | 821 | int rc = 0; |
@@ -820,7 +823,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, | |||
820 | while (len && !rc) { | 823 | while (len && !rc) { |
821 | gpa = kvm_s390_real_to_abs(vcpu, gra); | 824 | gpa = kvm_s390_real_to_abs(vcpu, gra); |
822 | _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); | 825 | _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); |
823 | if (write) | 826 | if (mode) |
824 | rc = write_guest_abs(vcpu, gpa, data, _len); | 827 | rc = write_guest_abs(vcpu, gpa, data, _len); |
825 | else | 828 | else |
826 | rc = read_guest_abs(vcpu, gpa, data, _len); | 829 | rc = read_guest_abs(vcpu, gpa, data, _len); |
@@ -841,7 +844,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, | |||
841 | * has to take care of this. | 844 | * has to take care of this. |
842 | */ | 845 | */ |
843 | int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, | 846 | int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, |
844 | unsigned long *gpa, int write) | 847 | unsigned long *gpa, enum gacc_mode mode) |
845 | { | 848 | { |
846 | struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; | 849 | struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; |
847 | psw_t *psw = &vcpu->arch.sie_block->gpsw; | 850 | psw_t *psw = &vcpu->arch.sie_block->gpsw; |
@@ -851,19 +854,19 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, | |||
851 | 854 | ||
852 | gva = kvm_s390_logical_to_effective(vcpu, gva); | 855 | gva = kvm_s390_logical_to_effective(vcpu, gva); |
853 | tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; | 856 | tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; |
854 | rc = get_vcpu_asce(vcpu, &asce, ar, write); | 857 | rc = get_vcpu_asce(vcpu, &asce, ar, mode); |
855 | tec->addr = gva >> PAGE_SHIFT; | 858 | tec->addr = gva >> PAGE_SHIFT; |
856 | if (rc) | 859 | if (rc) |
857 | return rc; | 860 | return rc; |
858 | if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) { | 861 | if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) { |
859 | if (write) { | 862 | if (mode == GACC_STORE) { |
860 | rc = pgm->code = PGM_PROTECTION; | 863 | rc = pgm->code = PGM_PROTECTION; |
861 | return rc; | 864 | return rc; |
862 | } | 865 | } |
863 | } | 866 | } |
864 | 867 | ||
865 | if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */ | 868 | if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */ |
866 | rc = guest_translate(vcpu, gva, gpa, asce, write); | 869 | rc = guest_translate(vcpu, gva, gpa, asce, mode); |
867 | if (rc > 0) { | 870 | if (rc > 0) { |
868 | if (rc == PGM_PROTECTION) | 871 | if (rc == PGM_PROTECTION) |
869 | tec->b61 = 1; | 872 | tec->b61 = 1; |
@@ -883,7 +886,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, | |||
883 | * check_gva_range - test a range of guest virtual addresses for accessibility | 886 | * check_gva_range - test a range of guest virtual addresses for accessibility |
884 | */ | 887 | */ |
885 | int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, | 888 | int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, |
886 | unsigned long length, int is_write) | 889 | unsigned long length, enum gacc_mode mode) |
887 | { | 890 | { |
888 | unsigned long gpa; | 891 | unsigned long gpa; |
889 | unsigned long currlen; | 892 | unsigned long currlen; |
@@ -892,7 +895,7 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, | |||
892 | ipte_lock(vcpu); | 895 | ipte_lock(vcpu); |
893 | while (length > 0 && !rc) { | 896 | while (length > 0 && !rc) { |
894 | currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE)); | 897 | currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE)); |
895 | rc = guest_translate_address(vcpu, gva, ar, &gpa, is_write); | 898 | rc = guest_translate_address(vcpu, gva, ar, &gpa, mode); |
896 | gva += currlen; | 899 | gva += currlen; |
897 | length -= currlen; | 900 | length -= currlen; |
898 | } | 901 | } |
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index ef03726cc661..df0a79dd8159 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h | |||
@@ -155,16 +155,22 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data, | |||
155 | return kvm_read_guest(vcpu->kvm, gpa, data, len); | 155 | return kvm_read_guest(vcpu->kvm, gpa, data, len); |
156 | } | 156 | } |
157 | 157 | ||
158 | enum gacc_mode { | ||
159 | GACC_FETCH, | ||
160 | GACC_STORE, | ||
161 | GACC_IFETCH, | ||
162 | }; | ||
163 | |||
158 | int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, | 164 | int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, |
159 | ar_t ar, unsigned long *gpa, int write); | 165 | ar_t ar, unsigned long *gpa, enum gacc_mode mode); |
160 | int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, | 166 | int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, |
161 | unsigned long length, int is_write); | 167 | unsigned long length, enum gacc_mode mode); |
162 | 168 | ||
163 | int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, | 169 | int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, |
164 | unsigned long len, int write); | 170 | unsigned long len, enum gacc_mode mode); |
165 | 171 | ||
166 | int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, | 172 | int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, |
167 | void *data, unsigned long len, int write); | 173 | void *data, unsigned long len, enum gacc_mode mode); |
168 | 174 | ||
169 | /** | 175 | /** |
170 | * write_guest - copy data from kernel space to guest space | 176 | * write_guest - copy data from kernel space to guest space |
@@ -215,7 +221,7 @@ static inline __must_check | |||
215 | int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, | 221 | int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, |
216 | unsigned long len) | 222 | unsigned long len) |
217 | { | 223 | { |
218 | return access_guest(vcpu, ga, ar, data, len, 1); | 224 | return access_guest(vcpu, ga, ar, data, len, GACC_STORE); |
219 | } | 225 | } |
220 | 226 | ||
221 | /** | 227 | /** |
@@ -235,7 +241,27 @@ static inline __must_check | |||
235 | int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, | 241 | int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, |
236 | unsigned long len) | 242 | unsigned long len) |
237 | { | 243 | { |
238 | return access_guest(vcpu, ga, ar, data, len, 0); | 244 | return access_guest(vcpu, ga, ar, data, len, GACC_FETCH); |
245 | } | ||
246 | |||
247 | /** | ||
248 | * read_guest_instr - copy instruction data from guest space to kernel space | ||
249 | * @vcpu: virtual cpu | ||
250 | * @data: destination address in kernel space | ||
251 | * @len: number of bytes to copy | ||
252 | * | ||
253 | * Copy @len bytes from the current psw address (guest space) to @data (kernel | ||
254 | * space). | ||
255 | * | ||
256 | * The behaviour of read_guest_instr is identical to read_guest, except that | ||
257 | * instruction data will be read from primary space when in home-space or | ||
258 | * address-space mode. | ||
259 | */ | ||
260 | static inline __must_check | ||
261 | int read_guest_instr(struct kvm_vcpu *vcpu, void *data, unsigned long len) | ||
262 | { | ||
263 | return access_guest(vcpu, vcpu->arch.sie_block->gpsw.addr, 0, data, len, | ||
264 | GACC_IFETCH); | ||
239 | } | 265 | } |
240 | 266 | ||
241 | /** | 267 | /** |
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index d53c10753c46..2e6b54e4d3f9 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c | |||
@@ -38,17 +38,32 @@ static const intercept_handler_t instruction_handlers[256] = { | |||
38 | [0xeb] = kvm_s390_handle_eb, | 38 | [0xeb] = kvm_s390_handle_eb, |
39 | }; | 39 | }; |
40 | 40 | ||
41 | void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc) | 41 | u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu) |
42 | { | 42 | { |
43 | struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; | 43 | struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; |
44 | u8 ilen = 0; | ||
44 | 45 | ||
45 | /* Use the length of the EXECUTE instruction if necessary */ | 46 | switch (vcpu->arch.sie_block->icptcode) { |
46 | if (sie_block->icptstatus & 1) { | 47 | case ICPT_INST: |
47 | ilc = (sie_block->icptstatus >> 4) & 0x6; | 48 | case ICPT_INSTPROGI: |
48 | if (!ilc) | 49 | case ICPT_OPEREXC: |
49 | ilc = 4; | 50 | case ICPT_PARTEXEC: |
51 | case ICPT_IOINST: | ||
52 | /* instruction only stored for these icptcodes */ | ||
53 | ilen = insn_length(vcpu->arch.sie_block->ipa >> 8); | ||
54 | /* Use the length of the EXECUTE instruction if necessary */ | ||
55 | if (sie_block->icptstatus & 1) { | ||
56 | ilen = (sie_block->icptstatus >> 4) & 0x6; | ||
57 | if (!ilen) | ||
58 | ilen = 4; | ||
59 | } | ||
60 | break; | ||
61 | case ICPT_PROGI: | ||
62 | /* bit 1+2 of pgmilc are the ilc, so we directly get ilen */ | ||
63 | ilen = vcpu->arch.sie_block->pgmilc & 0x6; | ||
64 | break; | ||
50 | } | 65 | } |
51 | sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilc); | 66 | return ilen; |
52 | } | 67 | } |
53 | 68 | ||
54 | static int handle_noop(struct kvm_vcpu *vcpu) | 69 | static int handle_noop(struct kvm_vcpu *vcpu) |
@@ -121,11 +136,13 @@ static int handle_instruction(struct kvm_vcpu *vcpu) | |||
121 | return -EOPNOTSUPP; | 136 | return -EOPNOTSUPP; |
122 | } | 137 | } |
123 | 138 | ||
124 | static void __extract_prog_irq(struct kvm_vcpu *vcpu, | 139 | static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu) |
125 | struct kvm_s390_pgm_info *pgm_info) | ||
126 | { | 140 | { |
127 | memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info)); | 141 | struct kvm_s390_pgm_info pgm_info = { |
128 | pgm_info->code = vcpu->arch.sie_block->iprcc; | 142 | .code = vcpu->arch.sie_block->iprcc, |
143 | /* the PSW has already been rewound */ | ||
144 | .flags = KVM_S390_PGM_FLAGS_NO_REWIND, | ||
145 | }; | ||
129 | 146 | ||
130 | switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) { | 147 | switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) { |
131 | case PGM_AFX_TRANSLATION: | 148 | case PGM_AFX_TRANSLATION: |
@@ -138,7 +155,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu, | |||
138 | case PGM_PRIMARY_AUTHORITY: | 155 | case PGM_PRIMARY_AUTHORITY: |
139 | case PGM_SECONDARY_AUTHORITY: | 156 | case PGM_SECONDARY_AUTHORITY: |
140 | case PGM_SPACE_SWITCH: | 157 | case PGM_SPACE_SWITCH: |
141 | pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; | 158 | pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc; |
142 | break; | 159 | break; |
143 | case PGM_ALEN_TRANSLATION: | 160 | case PGM_ALEN_TRANSLATION: |
144 | case PGM_ALE_SEQUENCE: | 161 | case PGM_ALE_SEQUENCE: |
@@ -146,7 +163,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu, | |||
146 | case PGM_ASTE_SEQUENCE: | 163 | case PGM_ASTE_SEQUENCE: |
147 | case PGM_ASTE_VALIDITY: | 164 | case PGM_ASTE_VALIDITY: |
148 | case PGM_EXTENDED_AUTHORITY: | 165 | case PGM_EXTENDED_AUTHORITY: |
149 | pgm_info->exc_access_id = vcpu->arch.sie_block->eai; | 166 | pgm_info.exc_access_id = vcpu->arch.sie_block->eai; |
150 | break; | 167 | break; |
151 | case PGM_ASCE_TYPE: | 168 | case PGM_ASCE_TYPE: |
152 | case PGM_PAGE_TRANSLATION: | 169 | case PGM_PAGE_TRANSLATION: |
@@ -154,32 +171,33 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu, | |||
154 | case PGM_REGION_SECOND_TRANS: | 171 | case PGM_REGION_SECOND_TRANS: |
155 | case PGM_REGION_THIRD_TRANS: | 172 | case PGM_REGION_THIRD_TRANS: |
156 | case PGM_SEGMENT_TRANSLATION: | 173 | case PGM_SEGMENT_TRANSLATION: |
157 | pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; | 174 | pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc; |
158 | pgm_info->exc_access_id = vcpu->arch.sie_block->eai; | 175 | pgm_info.exc_access_id = vcpu->arch.sie_block->eai; |
159 | pgm_info->op_access_id = vcpu->arch.sie_block->oai; | 176 | pgm_info.op_access_id = vcpu->arch.sie_block->oai; |
160 | break; | 177 | break; |
161 | case PGM_MONITOR: | 178 | case PGM_MONITOR: |
162 | pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn; | 179 | pgm_info.mon_class_nr = vcpu->arch.sie_block->mcn; |
163 | pgm_info->mon_code = vcpu->arch.sie_block->tecmc; | 180 | pgm_info.mon_code = vcpu->arch.sie_block->tecmc; |
164 | break; | 181 | break; |
165 | case PGM_VECTOR_PROCESSING: | 182 | case PGM_VECTOR_PROCESSING: |
166 | case PGM_DATA: | 183 | case PGM_DATA: |
167 | pgm_info->data_exc_code = vcpu->arch.sie_block->dxc; | 184 | pgm_info.data_exc_code = vcpu->arch.sie_block->dxc; |
168 | break; | 185 | break; |
169 | case PGM_PROTECTION: | 186 | case PGM_PROTECTION: |
170 | pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; | 187 | pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc; |
171 | pgm_info->exc_access_id = vcpu->arch.sie_block->eai; | 188 | pgm_info.exc_access_id = vcpu->arch.sie_block->eai; |
172 | break; | 189 | break; |
173 | default: | 190 | default: |
174 | break; | 191 | break; |
175 | } | 192 | } |
176 | 193 | ||
177 | if (vcpu->arch.sie_block->iprcc & PGM_PER) { | 194 | if (vcpu->arch.sie_block->iprcc & PGM_PER) { |
178 | pgm_info->per_code = vcpu->arch.sie_block->perc; | 195 | pgm_info.per_code = vcpu->arch.sie_block->perc; |
179 | pgm_info->per_atmid = vcpu->arch.sie_block->peratmid; | 196 | pgm_info.per_atmid = vcpu->arch.sie_block->peratmid; |
180 | pgm_info->per_address = vcpu->arch.sie_block->peraddr; | 197 | pgm_info.per_address = vcpu->arch.sie_block->peraddr; |
181 | pgm_info->per_access_id = vcpu->arch.sie_block->peraid; | 198 | pgm_info.per_access_id = vcpu->arch.sie_block->peraid; |
182 | } | 199 | } |
200 | return kvm_s390_inject_prog_irq(vcpu, &pgm_info); | ||
183 | } | 201 | } |
184 | 202 | ||
185 | /* | 203 | /* |
@@ -208,7 +226,6 @@ static int handle_itdb(struct kvm_vcpu *vcpu) | |||
208 | 226 | ||
209 | static int handle_prog(struct kvm_vcpu *vcpu) | 227 | static int handle_prog(struct kvm_vcpu *vcpu) |
210 | { | 228 | { |
211 | struct kvm_s390_pgm_info pgm_info; | ||
212 | psw_t psw; | 229 | psw_t psw; |
213 | int rc; | 230 | int rc; |
214 | 231 | ||
@@ -234,8 +251,7 @@ static int handle_prog(struct kvm_vcpu *vcpu) | |||
234 | if (rc) | 251 | if (rc) |
235 | return rc; | 252 | return rc; |
236 | 253 | ||
237 | __extract_prog_irq(vcpu, &pgm_info); | 254 | return inject_prog_on_prog_intercept(vcpu); |
238 | return kvm_s390_inject_prog_irq(vcpu, &pgm_info); | ||
239 | } | 255 | } |
240 | 256 | ||
241 | /** | 257 | /** |
@@ -302,7 +318,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) | |||
302 | 318 | ||
303 | /* Make sure that the source is paged-in */ | 319 | /* Make sure that the source is paged-in */ |
304 | rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2], | 320 | rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2], |
305 | reg2, &srcaddr, 0); | 321 | reg2, &srcaddr, GACC_FETCH); |
306 | if (rc) | 322 | if (rc) |
307 | return kvm_s390_inject_prog_cond(vcpu, rc); | 323 | return kvm_s390_inject_prog_cond(vcpu, rc); |
308 | rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0); | 324 | rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0); |
@@ -311,14 +327,14 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) | |||
311 | 327 | ||
312 | /* Make sure that the destination is paged-in */ | 328 | /* Make sure that the destination is paged-in */ |
313 | rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1], | 329 | rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1], |
314 | reg1, &dstaddr, 1); | 330 | reg1, &dstaddr, GACC_STORE); |
315 | if (rc) | 331 | if (rc) |
316 | return kvm_s390_inject_prog_cond(vcpu, rc); | 332 | return kvm_s390_inject_prog_cond(vcpu, rc); |
317 | rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1); | 333 | rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1); |
318 | if (rc != 0) | 334 | if (rc != 0) |
319 | return rc; | 335 | return rc; |
320 | 336 | ||
321 | kvm_s390_rewind_psw(vcpu, 4); | 337 | kvm_s390_retry_instr(vcpu); |
322 | 338 | ||
323 | return 0; | 339 | return 0; |
324 | } | 340 | } |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 9ffc73221792..704809d91ddd 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
@@ -182,8 +182,9 @@ static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu) | |||
182 | 182 | ||
183 | static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu) | 183 | static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu) |
184 | { | 184 | { |
185 | return (vcpu->arch.sie_block->cputm >> 63) && | 185 | if (!cpu_timer_interrupts_enabled(vcpu)) |
186 | cpu_timer_interrupts_enabled(vcpu); | 186 | return 0; |
187 | return kvm_s390_get_cpu_timer(vcpu) >> 63; | ||
187 | } | 188 | } |
188 | 189 | ||
189 | static inline int is_ioirq(unsigned long irq_type) | 190 | static inline int is_ioirq(unsigned long irq_type) |
@@ -335,23 +336,6 @@ static void set_intercept_indicators(struct kvm_vcpu *vcpu) | |||
335 | set_intercept_indicators_stop(vcpu); | 336 | set_intercept_indicators_stop(vcpu); |
336 | } | 337 | } |
337 | 338 | ||
338 | static u16 get_ilc(struct kvm_vcpu *vcpu) | ||
339 | { | ||
340 | switch (vcpu->arch.sie_block->icptcode) { | ||
341 | case ICPT_INST: | ||
342 | case ICPT_INSTPROGI: | ||
343 | case ICPT_OPEREXC: | ||
344 | case ICPT_PARTEXEC: | ||
345 | case ICPT_IOINST: | ||
346 | /* last instruction only stored for these icptcodes */ | ||
347 | return insn_length(vcpu->arch.sie_block->ipa >> 8); | ||
348 | case ICPT_PROGI: | ||
349 | return vcpu->arch.sie_block->pgmilc; | ||
350 | default: | ||
351 | return 0; | ||
352 | } | ||
353 | } | ||
354 | |||
355 | static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu) | 339 | static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu) |
356 | { | 340 | { |
357 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 341 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
@@ -588,7 +572,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) | |||
588 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 572 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
589 | struct kvm_s390_pgm_info pgm_info; | 573 | struct kvm_s390_pgm_info pgm_info; |
590 | int rc = 0, nullifying = false; | 574 | int rc = 0, nullifying = false; |
591 | u16 ilc = get_ilc(vcpu); | 575 | u16 ilen; |
592 | 576 | ||
593 | spin_lock(&li->lock); | 577 | spin_lock(&li->lock); |
594 | pgm_info = li->irq.pgm; | 578 | pgm_info = li->irq.pgm; |
@@ -596,8 +580,9 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) | |||
596 | memset(&li->irq.pgm, 0, sizeof(pgm_info)); | 580 | memset(&li->irq.pgm, 0, sizeof(pgm_info)); |
597 | spin_unlock(&li->lock); | 581 | spin_unlock(&li->lock); |
598 | 582 | ||
599 | VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilc:%d", | 583 | ilen = pgm_info.flags & KVM_S390_PGM_FLAGS_ILC_MASK; |
600 | pgm_info.code, ilc); | 584 | VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilen:%d", |
585 | pgm_info.code, ilen); | ||
601 | vcpu->stat.deliver_program_int++; | 586 | vcpu->stat.deliver_program_int++; |
602 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, | 587 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, |
603 | pgm_info.code, 0); | 588 | pgm_info.code, 0); |
@@ -681,10 +666,11 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) | |||
681 | (u8 *) __LC_PER_ACCESS_ID); | 666 | (u8 *) __LC_PER_ACCESS_ID); |
682 | } | 667 | } |
683 | 668 | ||
684 | if (nullifying && vcpu->arch.sie_block->icptcode == ICPT_INST) | 669 | if (nullifying && !(pgm_info.flags & KVM_S390_PGM_FLAGS_NO_REWIND)) |
685 | kvm_s390_rewind_psw(vcpu, ilc); | 670 | kvm_s390_rewind_psw(vcpu, ilen); |
686 | 671 | ||
687 | rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC); | 672 | /* bit 1+2 of the target are the ilc, so we can directly use ilen */ |
673 | rc |= put_guest_lc(vcpu, ilen, (u16 *) __LC_PGM_ILC); | ||
688 | rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea, | 674 | rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea, |
689 | (u64 *) __LC_LAST_BREAK); | 675 | (u64 *) __LC_LAST_BREAK); |
690 | rc |= put_guest_lc(vcpu, pgm_info.code, | 676 | rc |= put_guest_lc(vcpu, pgm_info.code, |
@@ -923,9 +909,35 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | |||
923 | return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu); | 909 | return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu); |
924 | } | 910 | } |
925 | 911 | ||
912 | static u64 __calculate_sltime(struct kvm_vcpu *vcpu) | ||
913 | { | ||
914 | u64 now, cputm, sltime = 0; | ||
915 | |||
916 | if (ckc_interrupts_enabled(vcpu)) { | ||
917 | now = kvm_s390_get_tod_clock_fast(vcpu->kvm); | ||
918 | sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); | ||
919 | /* already expired or overflow? */ | ||
920 | if (!sltime || vcpu->arch.sie_block->ckc <= now) | ||
921 | return 0; | ||
922 | if (cpu_timer_interrupts_enabled(vcpu)) { | ||
923 | cputm = kvm_s390_get_cpu_timer(vcpu); | ||
924 | /* already expired? */ | ||
925 | if (cputm >> 63) | ||
926 | return 0; | ||
927 | return min(sltime, tod_to_ns(cputm)); | ||
928 | } | ||
929 | } else if (cpu_timer_interrupts_enabled(vcpu)) { | ||
930 | sltime = kvm_s390_get_cpu_timer(vcpu); | ||
931 | /* already expired? */ | ||
932 | if (sltime >> 63) | ||
933 | return 0; | ||
934 | } | ||
935 | return sltime; | ||
936 | } | ||
937 | |||
926 | int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) | 938 | int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) |
927 | { | 939 | { |
928 | u64 now, sltime; | 940 | u64 sltime; |
929 | 941 | ||
930 | vcpu->stat.exit_wait_state++; | 942 | vcpu->stat.exit_wait_state++; |
931 | 943 | ||
@@ -938,22 +950,20 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) | |||
938 | return -EOPNOTSUPP; /* disabled wait */ | 950 | return -EOPNOTSUPP; /* disabled wait */ |
939 | } | 951 | } |
940 | 952 | ||
941 | if (!ckc_interrupts_enabled(vcpu)) { | 953 | if (!ckc_interrupts_enabled(vcpu) && |
954 | !cpu_timer_interrupts_enabled(vcpu)) { | ||
942 | VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); | 955 | VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); |
943 | __set_cpu_idle(vcpu); | 956 | __set_cpu_idle(vcpu); |
944 | goto no_timer; | 957 | goto no_timer; |
945 | } | 958 | } |
946 | 959 | ||
947 | now = kvm_s390_get_tod_clock_fast(vcpu->kvm); | 960 | sltime = __calculate_sltime(vcpu); |
948 | sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); | 961 | if (!sltime) |
949 | |||
950 | /* underflow */ | ||
951 | if (vcpu->arch.sie_block->ckc < now) | ||
952 | return 0; | 962 | return 0; |
953 | 963 | ||
954 | __set_cpu_idle(vcpu); | 964 | __set_cpu_idle(vcpu); |
955 | hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); | 965 | hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); |
956 | VCPU_EVENT(vcpu, 4, "enabled wait via clock comparator: %llu ns", sltime); | 966 | VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime); |
957 | no_timer: | 967 | no_timer: |
958 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 968 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
959 | kvm_vcpu_block(vcpu); | 969 | kvm_vcpu_block(vcpu); |
@@ -980,18 +990,16 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) | |||
980 | enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) | 990 | enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) |
981 | { | 991 | { |
982 | struct kvm_vcpu *vcpu; | 992 | struct kvm_vcpu *vcpu; |
983 | u64 now, sltime; | 993 | u64 sltime; |
984 | 994 | ||
985 | vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); | 995 | vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); |
986 | now = kvm_s390_get_tod_clock_fast(vcpu->kvm); | 996 | sltime = __calculate_sltime(vcpu); |
987 | sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); | ||
988 | 997 | ||
989 | /* | 998 | /* |
990 | * If the monotonic clock runs faster than the tod clock we might be | 999 | * If the monotonic clock runs faster than the tod clock we might be |
991 | * woken up too early and have to go back to sleep to avoid deadlocks. | 1000 | * woken up too early and have to go back to sleep to avoid deadlocks. |
992 | */ | 1001 | */ |
993 | if (vcpu->arch.sie_block->ckc > now && | 1002 | if (sltime && hrtimer_forward_now(timer, ns_to_ktime(sltime))) |
994 | hrtimer_forward_now(timer, ns_to_ktime(sltime))) | ||
995 | return HRTIMER_RESTART; | 1003 | return HRTIMER_RESTART; |
996 | kvm_s390_vcpu_wakeup(vcpu); | 1004 | kvm_s390_vcpu_wakeup(vcpu); |
997 | return HRTIMER_NORESTART; | 1005 | return HRTIMER_NORESTART; |
@@ -1059,8 +1067,16 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) | |||
1059 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, | 1067 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, |
1060 | irq->u.pgm.code, 0); | 1068 | irq->u.pgm.code, 0); |
1061 | 1069 | ||
1070 | if (!(irq->u.pgm.flags & KVM_S390_PGM_FLAGS_ILC_VALID)) { | ||
1071 | /* auto detection if no valid ILC was given */ | ||
1072 | irq->u.pgm.flags &= ~KVM_S390_PGM_FLAGS_ILC_MASK; | ||
1073 | irq->u.pgm.flags |= kvm_s390_get_ilen(vcpu); | ||
1074 | irq->u.pgm.flags |= KVM_S390_PGM_FLAGS_ILC_VALID; | ||
1075 | } | ||
1076 | |||
1062 | if (irq->u.pgm.code == PGM_PER) { | 1077 | if (irq->u.pgm.code == PGM_PER) { |
1063 | li->irq.pgm.code |= PGM_PER; | 1078 | li->irq.pgm.code |= PGM_PER; |
1079 | li->irq.pgm.flags = irq->u.pgm.flags; | ||
1064 | /* only modify PER related information */ | 1080 | /* only modify PER related information */ |
1065 | li->irq.pgm.per_address = irq->u.pgm.per_address; | 1081 | li->irq.pgm.per_address = irq->u.pgm.per_address; |
1066 | li->irq.pgm.per_code = irq->u.pgm.per_code; | 1082 | li->irq.pgm.per_code = irq->u.pgm.per_code; |
@@ -1069,6 +1085,7 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) | |||
1069 | } else if (!(irq->u.pgm.code & PGM_PER)) { | 1085 | } else if (!(irq->u.pgm.code & PGM_PER)) { |
1070 | li->irq.pgm.code = (li->irq.pgm.code & PGM_PER) | | 1086 | li->irq.pgm.code = (li->irq.pgm.code & PGM_PER) | |
1071 | irq->u.pgm.code; | 1087 | irq->u.pgm.code; |
1088 | li->irq.pgm.flags = irq->u.pgm.flags; | ||
1072 | /* only modify non-PER information */ | 1089 | /* only modify non-PER information */ |
1073 | li->irq.pgm.trans_exc_code = irq->u.pgm.trans_exc_code; | 1090 | li->irq.pgm.trans_exc_code = irq->u.pgm.trans_exc_code; |
1074 | li->irq.pgm.mon_code = irq->u.pgm.mon_code; | 1091 | li->irq.pgm.mon_code = irq->u.pgm.mon_code; |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 03dfe9c667f4..e196582fe87d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -158,6 +158,8 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, | |||
158 | kvm->arch.epoch -= *delta; | 158 | kvm->arch.epoch -= *delta; |
159 | kvm_for_each_vcpu(i, vcpu, kvm) { | 159 | kvm_for_each_vcpu(i, vcpu, kvm) { |
160 | vcpu->arch.sie_block->epoch -= *delta; | 160 | vcpu->arch.sie_block->epoch -= *delta; |
161 | if (vcpu->arch.cputm_enabled) | ||
162 | vcpu->arch.cputm_start += *delta; | ||
161 | } | 163 | } |
162 | } | 164 | } |
163 | return NOTIFY_OK; | 165 | return NOTIFY_OK; |
@@ -274,7 +276,6 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm, | |||
274 | unsigned long address; | 276 | unsigned long address; |
275 | struct gmap *gmap = kvm->arch.gmap; | 277 | struct gmap *gmap = kvm->arch.gmap; |
276 | 278 | ||
277 | down_read(&gmap->mm->mmap_sem); | ||
278 | /* Loop over all guest pages */ | 279 | /* Loop over all guest pages */ |
279 | last_gfn = memslot->base_gfn + memslot->npages; | 280 | last_gfn = memslot->base_gfn + memslot->npages; |
280 | for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { | 281 | for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { |
@@ -282,8 +283,10 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm, | |||
282 | 283 | ||
283 | if (gmap_test_and_clear_dirty(address, gmap)) | 284 | if (gmap_test_and_clear_dirty(address, gmap)) |
284 | mark_page_dirty(kvm, cur_gfn); | 285 | mark_page_dirty(kvm, cur_gfn); |
286 | if (fatal_signal_pending(current)) | ||
287 | return; | ||
288 | cond_resched(); | ||
285 | } | 289 | } |
286 | up_read(&gmap->mm->mmap_sem); | ||
287 | } | 290 | } |
288 | 291 | ||
289 | /* Section: vm related */ | 292 | /* Section: vm related */ |
@@ -352,8 +355,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) | |||
352 | if (atomic_read(&kvm->online_vcpus)) { | 355 | if (atomic_read(&kvm->online_vcpus)) { |
353 | r = -EBUSY; | 356 | r = -EBUSY; |
354 | } else if (MACHINE_HAS_VX) { | 357 | } else if (MACHINE_HAS_VX) { |
355 | set_kvm_facility(kvm->arch.model.fac->mask, 129); | 358 | set_kvm_facility(kvm->arch.model.fac_mask, 129); |
356 | set_kvm_facility(kvm->arch.model.fac->list, 129); | 359 | set_kvm_facility(kvm->arch.model.fac_list, 129); |
357 | r = 0; | 360 | r = 0; |
358 | } else | 361 | } else |
359 | r = -EINVAL; | 362 | r = -EINVAL; |
@@ -367,8 +370,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) | |||
367 | if (atomic_read(&kvm->online_vcpus)) { | 370 | if (atomic_read(&kvm->online_vcpus)) { |
368 | r = -EBUSY; | 371 | r = -EBUSY; |
369 | } else if (test_facility(64)) { | 372 | } else if (test_facility(64)) { |
370 | set_kvm_facility(kvm->arch.model.fac->mask, 64); | 373 | set_kvm_facility(kvm->arch.model.fac_mask, 64); |
371 | set_kvm_facility(kvm->arch.model.fac->list, 64); | 374 | set_kvm_facility(kvm->arch.model.fac_list, 64); |
372 | r = 0; | 375 | r = 0; |
373 | } | 376 | } |
374 | mutex_unlock(&kvm->lock); | 377 | mutex_unlock(&kvm->lock); |
@@ -651,7 +654,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) | |||
651 | memcpy(&kvm->arch.model.cpu_id, &proc->cpuid, | 654 | memcpy(&kvm->arch.model.cpu_id, &proc->cpuid, |
652 | sizeof(struct cpuid)); | 655 | sizeof(struct cpuid)); |
653 | kvm->arch.model.ibc = proc->ibc; | 656 | kvm->arch.model.ibc = proc->ibc; |
654 | memcpy(kvm->arch.model.fac->list, proc->fac_list, | 657 | memcpy(kvm->arch.model.fac_list, proc->fac_list, |
655 | S390_ARCH_FAC_LIST_SIZE_BYTE); | 658 | S390_ARCH_FAC_LIST_SIZE_BYTE); |
656 | } else | 659 | } else |
657 | ret = -EFAULT; | 660 | ret = -EFAULT; |
@@ -685,7 +688,8 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) | |||
685 | } | 688 | } |
686 | memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid)); | 689 | memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid)); |
687 | proc->ibc = kvm->arch.model.ibc; | 690 | proc->ibc = kvm->arch.model.ibc; |
688 | memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE); | 691 | memcpy(&proc->fac_list, kvm->arch.model.fac_list, |
692 | S390_ARCH_FAC_LIST_SIZE_BYTE); | ||
689 | if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) | 693 | if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) |
690 | ret = -EFAULT; | 694 | ret = -EFAULT; |
691 | kfree(proc); | 695 | kfree(proc); |
@@ -705,7 +709,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) | |||
705 | } | 709 | } |
706 | get_cpu_id((struct cpuid *) &mach->cpuid); | 710 | get_cpu_id((struct cpuid *) &mach->cpuid); |
707 | mach->ibc = sclp.ibc; | 711 | mach->ibc = sclp.ibc; |
708 | memcpy(&mach->fac_mask, kvm->arch.model.fac->mask, | 712 | memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, |
709 | S390_ARCH_FAC_LIST_SIZE_BYTE); | 713 | S390_ARCH_FAC_LIST_SIZE_BYTE); |
710 | memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, | 714 | memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, |
711 | S390_ARCH_FAC_LIST_SIZE_BYTE); | 715 | S390_ARCH_FAC_LIST_SIZE_BYTE); |
@@ -1082,16 +1086,12 @@ static void kvm_s390_get_cpu_id(struct cpuid *cpu_id) | |||
1082 | cpu_id->version = 0xff; | 1086 | cpu_id->version = 0xff; |
1083 | } | 1087 | } |
1084 | 1088 | ||
1085 | static int kvm_s390_crypto_init(struct kvm *kvm) | 1089 | static void kvm_s390_crypto_init(struct kvm *kvm) |
1086 | { | 1090 | { |
1087 | if (!test_kvm_facility(kvm, 76)) | 1091 | if (!test_kvm_facility(kvm, 76)) |
1088 | return 0; | 1092 | return; |
1089 | |||
1090 | kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb), | ||
1091 | GFP_KERNEL | GFP_DMA); | ||
1092 | if (!kvm->arch.crypto.crycb) | ||
1093 | return -ENOMEM; | ||
1094 | 1093 | ||
1094 | kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; | ||
1095 | kvm_s390_set_crycb_format(kvm); | 1095 | kvm_s390_set_crycb_format(kvm); |
1096 | 1096 | ||
1097 | /* Enable AES/DEA protected key functions by default */ | 1097 | /* Enable AES/DEA protected key functions by default */ |
@@ -1101,8 +1101,6 @@ static int kvm_s390_crypto_init(struct kvm *kvm) | |||
1101 | sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); | 1101 | sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); |
1102 | get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, | 1102 | get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, |
1103 | sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); | 1103 | sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); |
1104 | |||
1105 | return 0; | ||
1106 | } | 1104 | } |
1107 | 1105 | ||
1108 | static void sca_dispose(struct kvm *kvm) | 1106 | static void sca_dispose(struct kvm *kvm) |
@@ -1156,37 +1154,30 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
1156 | if (!kvm->arch.dbf) | 1154 | if (!kvm->arch.dbf) |
1157 | goto out_err; | 1155 | goto out_err; |
1158 | 1156 | ||
1159 | /* | 1157 | kvm->arch.sie_page2 = |
1160 | * The architectural maximum amount of facilities is 16 kbit. To store | 1158 | (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA); |
1161 | * this amount, 2 kbyte of memory is required. Thus we need a full | 1159 | if (!kvm->arch.sie_page2) |
1162 | * page to hold the guest facility list (arch.model.fac->list) and the | ||
1163 | * facility mask (arch.model.fac->mask). Its address size has to be | ||
1164 | * 31 bits and word aligned. | ||
1165 | */ | ||
1166 | kvm->arch.model.fac = | ||
1167 | (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA); | ||
1168 | if (!kvm->arch.model.fac) | ||
1169 | goto out_err; | 1160 | goto out_err; |
1170 | 1161 | ||
1171 | /* Populate the facility mask initially. */ | 1162 | /* Populate the facility mask initially. */ |
1172 | memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list, | 1163 | memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list, |
1173 | S390_ARCH_FAC_LIST_SIZE_BYTE); | 1164 | S390_ARCH_FAC_LIST_SIZE_BYTE); |
1174 | for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) { | 1165 | for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) { |
1175 | if (i < kvm_s390_fac_list_mask_size()) | 1166 | if (i < kvm_s390_fac_list_mask_size()) |
1176 | kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i]; | 1167 | kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i]; |
1177 | else | 1168 | else |
1178 | kvm->arch.model.fac->mask[i] = 0UL; | 1169 | kvm->arch.model.fac_mask[i] = 0UL; |
1179 | } | 1170 | } |
1180 | 1171 | ||
1181 | /* Populate the facility list initially. */ | 1172 | /* Populate the facility list initially. */ |
1182 | memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask, | 1173 | kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; |
1174 | memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask, | ||
1183 | S390_ARCH_FAC_LIST_SIZE_BYTE); | 1175 | S390_ARCH_FAC_LIST_SIZE_BYTE); |
1184 | 1176 | ||
1185 | kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id); | 1177 | kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id); |
1186 | kvm->arch.model.ibc = sclp.ibc & 0x0fff; | 1178 | kvm->arch.model.ibc = sclp.ibc & 0x0fff; |
1187 | 1179 | ||
1188 | if (kvm_s390_crypto_init(kvm) < 0) | 1180 | kvm_s390_crypto_init(kvm); |
1189 | goto out_err; | ||
1190 | 1181 | ||
1191 | spin_lock_init(&kvm->arch.float_int.lock); | 1182 | spin_lock_init(&kvm->arch.float_int.lock); |
1192 | for (i = 0; i < FIRQ_LIST_COUNT; i++) | 1183 | for (i = 0; i < FIRQ_LIST_COUNT; i++) |
@@ -1222,8 +1213,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
1222 | 1213 | ||
1223 | return 0; | 1214 | return 0; |
1224 | out_err: | 1215 | out_err: |
1225 | kfree(kvm->arch.crypto.crycb); | 1216 | free_page((unsigned long)kvm->arch.sie_page2); |
1226 | free_page((unsigned long)kvm->arch.model.fac); | ||
1227 | debug_unregister(kvm->arch.dbf); | 1217 | debug_unregister(kvm->arch.dbf); |
1228 | sca_dispose(kvm); | 1218 | sca_dispose(kvm); |
1229 | KVM_EVENT(3, "creation of vm failed: %d", rc); | 1219 | KVM_EVENT(3, "creation of vm failed: %d", rc); |
@@ -1269,10 +1259,9 @@ static void kvm_free_vcpus(struct kvm *kvm) | |||
1269 | void kvm_arch_destroy_vm(struct kvm *kvm) | 1259 | void kvm_arch_destroy_vm(struct kvm *kvm) |
1270 | { | 1260 | { |
1271 | kvm_free_vcpus(kvm); | 1261 | kvm_free_vcpus(kvm); |
1272 | free_page((unsigned long)kvm->arch.model.fac); | ||
1273 | sca_dispose(kvm); | 1262 | sca_dispose(kvm); |
1274 | debug_unregister(kvm->arch.dbf); | 1263 | debug_unregister(kvm->arch.dbf); |
1275 | kfree(kvm->arch.crypto.crycb); | 1264 | free_page((unsigned long)kvm->arch.sie_page2); |
1276 | if (!kvm_is_ucontrol(kvm)) | 1265 | if (!kvm_is_ucontrol(kvm)) |
1277 | gmap_free(kvm->arch.gmap); | 1266 | gmap_free(kvm->arch.gmap); |
1278 | kvm_s390_destroy_adapters(kvm); | 1267 | kvm_s390_destroy_adapters(kvm); |
@@ -1414,8 +1403,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
1414 | KVM_SYNC_PFAULT; | 1403 | KVM_SYNC_PFAULT; |
1415 | if (test_kvm_facility(vcpu->kvm, 64)) | 1404 | if (test_kvm_facility(vcpu->kvm, 64)) |
1416 | vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; | 1405 | vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; |
1417 | if (test_kvm_facility(vcpu->kvm, 129)) | 1406 | /* fprs can be synchronized via vrs, even if the guest has no vx. With |
1407 | * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. | ||
1408 | */ | ||
1409 | if (MACHINE_HAS_VX) | ||
1418 | vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; | 1410 | vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; |
1411 | else | ||
1412 | vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; | ||
1419 | 1413 | ||
1420 | if (kvm_is_ucontrol(vcpu->kvm)) | 1414 | if (kvm_is_ucontrol(vcpu->kvm)) |
1421 | return __kvm_ucontrol_vcpu_init(vcpu); | 1415 | return __kvm_ucontrol_vcpu_init(vcpu); |
@@ -1423,6 +1417,93 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
1423 | return 0; | 1417 | return 0; |
1424 | } | 1418 | } |
1425 | 1419 | ||
1420 | /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ | ||
1421 | static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) | ||
1422 | { | ||
1423 | WARN_ON_ONCE(vcpu->arch.cputm_start != 0); | ||
1424 | raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); | ||
1425 | vcpu->arch.cputm_start = get_tod_clock_fast(); | ||
1426 | raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); | ||
1427 | } | ||
1428 | |||
1429 | /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ | ||
1430 | static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) | ||
1431 | { | ||
1432 | WARN_ON_ONCE(vcpu->arch.cputm_start == 0); | ||
1433 | raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); | ||
1434 | vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; | ||
1435 | vcpu->arch.cputm_start = 0; | ||
1436 | raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); | ||
1437 | } | ||
1438 | |||
1439 | /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ | ||
1440 | static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) | ||
1441 | { | ||
1442 | WARN_ON_ONCE(vcpu->arch.cputm_enabled); | ||
1443 | vcpu->arch.cputm_enabled = true; | ||
1444 | __start_cpu_timer_accounting(vcpu); | ||
1445 | } | ||
1446 | |||
1447 | /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ | ||
1448 | static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) | ||
1449 | { | ||
1450 | WARN_ON_ONCE(!vcpu->arch.cputm_enabled); | ||
1451 | __stop_cpu_timer_accounting(vcpu); | ||
1452 | vcpu->arch.cputm_enabled = false; | ||
1453 | } | ||
1454 | |||
1455 | static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) | ||
1456 | { | ||
1457 | preempt_disable(); /* protect from TOD sync and vcpu_load/put */ | ||
1458 | __enable_cpu_timer_accounting(vcpu); | ||
1459 | preempt_enable(); | ||
1460 | } | ||
1461 | |||
1462 | static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) | ||
1463 | { | ||
1464 | preempt_disable(); /* protect from TOD sync and vcpu_load/put */ | ||
1465 | __disable_cpu_timer_accounting(vcpu); | ||
1466 | preempt_enable(); | ||
1467 | } | ||
1468 | |||
1469 | /* set the cpu timer - may only be called from the VCPU thread itself */ | ||
1470 | void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) | ||
1471 | { | ||
1472 | preempt_disable(); /* protect from TOD sync and vcpu_load/put */ | ||
1473 | raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); | ||
1474 | if (vcpu->arch.cputm_enabled) | ||
1475 | vcpu->arch.cputm_start = get_tod_clock_fast(); | ||
1476 | vcpu->arch.sie_block->cputm = cputm; | ||
1477 | raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); | ||
1478 | preempt_enable(); | ||
1479 | } | ||
1480 | |||
1481 | /* update and get the cpu timer - can also be called from other VCPU threads */ | ||
1482 | __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) | ||
1483 | { | ||
1484 | unsigned int seq; | ||
1485 | __u64 value; | ||
1486 | |||
1487 | if (unlikely(!vcpu->arch.cputm_enabled)) | ||
1488 | return vcpu->arch.sie_block->cputm; | ||
1489 | |||
1490 | preempt_disable(); /* protect from TOD sync and vcpu_load/put */ | ||
1491 | do { | ||
1492 | seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); | ||
1493 | /* | ||
1494 | * If the writer would ever execute a read in the critical | ||
1495 | * section, e.g. in irq context, we have a deadlock. | ||
1496 | */ | ||
1497 | WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); | ||
1498 | value = vcpu->arch.sie_block->cputm; | ||
1499 | /* if cputm_start is 0, accounting is being started/stopped */ | ||
1500 | if (likely(vcpu->arch.cputm_start)) | ||
1501 | value -= get_tod_clock_fast() - vcpu->arch.cputm_start; | ||
1502 | } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); | ||
1503 | preempt_enable(); | ||
1504 | return value; | ||
1505 | } | ||
1506 | |||
1426 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 1507 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
1427 | { | 1508 | { |
1428 | /* Save host register state */ | 1509 | /* Save host register state */ |
@@ -1430,10 +1511,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
1430 | vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; | 1511 | vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; |
1431 | vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; | 1512 | vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; |
1432 | 1513 | ||
1433 | /* Depending on MACHINE_HAS_VX, data stored to vrs either | 1514 | if (MACHINE_HAS_VX) |
1434 | * has vector register or floating point register format. | 1515 | current->thread.fpu.regs = vcpu->run->s.regs.vrs; |
1435 | */ | 1516 | else |
1436 | current->thread.fpu.regs = vcpu->run->s.regs.vrs; | 1517 | current->thread.fpu.regs = vcpu->run->s.regs.fprs; |
1437 | current->thread.fpu.fpc = vcpu->run->s.regs.fpc; | 1518 | current->thread.fpu.fpc = vcpu->run->s.regs.fpc; |
1438 | if (test_fp_ctl(current->thread.fpu.fpc)) | 1519 | if (test_fp_ctl(current->thread.fpu.fpc)) |
1439 | /* User space provided an invalid FPC, let's clear it */ | 1520 | /* User space provided an invalid FPC, let's clear it */ |
@@ -1443,10 +1524,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
1443 | restore_access_regs(vcpu->run->s.regs.acrs); | 1524 | restore_access_regs(vcpu->run->s.regs.acrs); |
1444 | gmap_enable(vcpu->arch.gmap); | 1525 | gmap_enable(vcpu->arch.gmap); |
1445 | atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); | 1526 | atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); |
1527 | if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) | ||
1528 | __start_cpu_timer_accounting(vcpu); | ||
1529 | vcpu->cpu = cpu; | ||
1446 | } | 1530 | } |
1447 | 1531 | ||
1448 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | 1532 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) |
1449 | { | 1533 | { |
1534 | vcpu->cpu = -1; | ||
1535 | if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) | ||
1536 | __stop_cpu_timer_accounting(vcpu); | ||
1450 | atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); | 1537 | atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); |
1451 | gmap_disable(vcpu->arch.gmap); | 1538 | gmap_disable(vcpu->arch.gmap); |
1452 | 1539 | ||
@@ -1468,7 +1555,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) | |||
1468 | vcpu->arch.sie_block->gpsw.mask = 0UL; | 1555 | vcpu->arch.sie_block->gpsw.mask = 0UL; |
1469 | vcpu->arch.sie_block->gpsw.addr = 0UL; | 1556 | vcpu->arch.sie_block->gpsw.addr = 0UL; |
1470 | kvm_s390_set_prefix(vcpu, 0); | 1557 | kvm_s390_set_prefix(vcpu, 0); |
1471 | vcpu->arch.sie_block->cputm = 0UL; | 1558 | kvm_s390_set_cpu_timer(vcpu, 0); |
1472 | vcpu->arch.sie_block->ckc = 0UL; | 1559 | vcpu->arch.sie_block->ckc = 0UL; |
1473 | vcpu->arch.sie_block->todpr = 0; | 1560 | vcpu->arch.sie_block->todpr = 0; |
1474 | memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); | 1561 | memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); |
@@ -1538,7 +1625,8 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) | |||
1538 | 1625 | ||
1539 | vcpu->arch.cpu_id = model->cpu_id; | 1626 | vcpu->arch.cpu_id = model->cpu_id; |
1540 | vcpu->arch.sie_block->ibc = model->ibc; | 1627 | vcpu->arch.sie_block->ibc = model->ibc; |
1541 | vcpu->arch.sie_block->fac = (int) (long) model->fac->list; | 1628 | if (test_kvm_facility(vcpu->kvm, 7)) |
1629 | vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; | ||
1542 | } | 1630 | } |
1543 | 1631 | ||
1544 | int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | 1632 | int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) |
@@ -1616,6 +1704,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, | |||
1616 | vcpu->arch.local_int.float_int = &kvm->arch.float_int; | 1704 | vcpu->arch.local_int.float_int = &kvm->arch.float_int; |
1617 | vcpu->arch.local_int.wq = &vcpu->wq; | 1705 | vcpu->arch.local_int.wq = &vcpu->wq; |
1618 | vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; | 1706 | vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; |
1707 | seqcount_init(&vcpu->arch.cputm_seqcount); | ||
1619 | 1708 | ||
1620 | rc = kvm_vcpu_init(vcpu, kvm, id); | 1709 | rc = kvm_vcpu_init(vcpu, kvm, id); |
1621 | if (rc) | 1710 | if (rc) |
@@ -1715,7 +1804,7 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, | |||
1715 | (u64 __user *)reg->addr); | 1804 | (u64 __user *)reg->addr); |
1716 | break; | 1805 | break; |
1717 | case KVM_REG_S390_CPU_TIMER: | 1806 | case KVM_REG_S390_CPU_TIMER: |
1718 | r = put_user(vcpu->arch.sie_block->cputm, | 1807 | r = put_user(kvm_s390_get_cpu_timer(vcpu), |
1719 | (u64 __user *)reg->addr); | 1808 | (u64 __user *)reg->addr); |
1720 | break; | 1809 | break; |
1721 | case KVM_REG_S390_CLOCK_COMP: | 1810 | case KVM_REG_S390_CLOCK_COMP: |
@@ -1753,6 +1842,7 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, | |||
1753 | struct kvm_one_reg *reg) | 1842 | struct kvm_one_reg *reg) |
1754 | { | 1843 | { |
1755 | int r = -EINVAL; | 1844 | int r = -EINVAL; |
1845 | __u64 val; | ||
1756 | 1846 | ||
1757 | switch (reg->id) { | 1847 | switch (reg->id) { |
1758 | case KVM_REG_S390_TODPR: | 1848 | case KVM_REG_S390_TODPR: |
@@ -1764,8 +1854,9 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, | |||
1764 | (u64 __user *)reg->addr); | 1854 | (u64 __user *)reg->addr); |
1765 | break; | 1855 | break; |
1766 | case KVM_REG_S390_CPU_TIMER: | 1856 | case KVM_REG_S390_CPU_TIMER: |
1767 | r = get_user(vcpu->arch.sie_block->cputm, | 1857 | r = get_user(val, (u64 __user *)reg->addr); |
1768 | (u64 __user *)reg->addr); | 1858 | if (!r) |
1859 | kvm_s390_set_cpu_timer(vcpu, val); | ||
1769 | break; | 1860 | break; |
1770 | case KVM_REG_S390_CLOCK_COMP: | 1861 | case KVM_REG_S390_CLOCK_COMP: |
1771 | r = get_user(vcpu->arch.sie_block->ckc, | 1862 | r = get_user(vcpu->arch.sie_block->ckc, |
@@ -2158,8 +2249,10 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) | |||
2158 | 2249 | ||
2159 | static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) | 2250 | static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) |
2160 | { | 2251 | { |
2161 | psw_t *psw = &vcpu->arch.sie_block->gpsw; | 2252 | struct kvm_s390_pgm_info pgm_info = { |
2162 | u8 opcode; | 2253 | .code = PGM_ADDRESSING, |
2254 | }; | ||
2255 | u8 opcode, ilen; | ||
2163 | int rc; | 2256 | int rc; |
2164 | 2257 | ||
2165 | VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); | 2258 | VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); |
@@ -2173,12 +2266,21 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) | |||
2173 | * to look up the current opcode to get the length of the instruction | 2266 | * to look up the current opcode to get the length of the instruction |
2174 | * to be able to forward the PSW. | 2267 | * to be able to forward the PSW. |
2175 | */ | 2268 | */ |
2176 | rc = read_guest(vcpu, psw->addr, 0, &opcode, 1); | 2269 | rc = read_guest_instr(vcpu, &opcode, 1); |
2177 | if (rc) | 2270 | ilen = insn_length(opcode); |
2178 | return kvm_s390_inject_prog_cond(vcpu, rc); | 2271 | if (rc < 0) { |
2179 | psw->addr = __rewind_psw(*psw, -insn_length(opcode)); | 2272 | return rc; |
2180 | 2273 | } else if (rc) { | |
2181 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 2274 | /* Instruction-Fetching Exceptions - we can't detect the ilen. |
2275 | * Forward by arbitrary ilc, injection will take care of | ||
2276 | * nullification if necessary. | ||
2277 | */ | ||
2278 | pgm_info = vcpu->arch.pgm; | ||
2279 | ilen = 4; | ||
2280 | } | ||
2281 | pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; | ||
2282 | kvm_s390_forward_psw(vcpu, ilen); | ||
2283 | return kvm_s390_inject_prog_irq(vcpu, &pgm_info); | ||
2182 | } | 2284 | } |
2183 | 2285 | ||
2184 | static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) | 2286 | static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) |
@@ -2244,10 +2346,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
2244 | */ | 2346 | */ |
2245 | local_irq_disable(); | 2347 | local_irq_disable(); |
2246 | __kvm_guest_enter(); | 2348 | __kvm_guest_enter(); |
2349 | __disable_cpu_timer_accounting(vcpu); | ||
2247 | local_irq_enable(); | 2350 | local_irq_enable(); |
2248 | exit_reason = sie64a(vcpu->arch.sie_block, | 2351 | exit_reason = sie64a(vcpu->arch.sie_block, |
2249 | vcpu->run->s.regs.gprs); | 2352 | vcpu->run->s.regs.gprs); |
2250 | local_irq_disable(); | 2353 | local_irq_disable(); |
2354 | __enable_cpu_timer_accounting(vcpu); | ||
2251 | __kvm_guest_exit(); | 2355 | __kvm_guest_exit(); |
2252 | local_irq_enable(); | 2356 | local_irq_enable(); |
2253 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 2357 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
@@ -2271,7 +2375,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2271 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | 2375 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
2272 | } | 2376 | } |
2273 | if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { | 2377 | if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { |
2274 | vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm; | 2378 | kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); |
2275 | vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; | 2379 | vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; |
2276 | vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; | 2380 | vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; |
2277 | vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; | 2381 | vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; |
@@ -2293,7 +2397,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2293 | kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; | 2397 | kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; |
2294 | kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); | 2398 | kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); |
2295 | memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); | 2399 | memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); |
2296 | kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm; | 2400 | kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); |
2297 | kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; | 2401 | kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; |
2298 | kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; | 2402 | kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; |
2299 | kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; | 2403 | kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; |
@@ -2325,6 +2429,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2325 | } | 2429 | } |
2326 | 2430 | ||
2327 | sync_regs(vcpu, kvm_run); | 2431 | sync_regs(vcpu, kvm_run); |
2432 | enable_cpu_timer_accounting(vcpu); | ||
2328 | 2433 | ||
2329 | might_fault(); | 2434 | might_fault(); |
2330 | rc = __vcpu_run(vcpu); | 2435 | rc = __vcpu_run(vcpu); |
@@ -2344,6 +2449,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2344 | rc = 0; | 2449 | rc = 0; |
2345 | } | 2450 | } |
2346 | 2451 | ||
2452 | disable_cpu_timer_accounting(vcpu); | ||
2347 | store_regs(vcpu, kvm_run); | 2453 | store_regs(vcpu, kvm_run); |
2348 | 2454 | ||
2349 | if (vcpu->sigset_active) | 2455 | if (vcpu->sigset_active) |
@@ -2364,7 +2470,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) | |||
2364 | unsigned char archmode = 1; | 2470 | unsigned char archmode = 1; |
2365 | freg_t fprs[NUM_FPRS]; | 2471 | freg_t fprs[NUM_FPRS]; |
2366 | unsigned int px; | 2472 | unsigned int px; |
2367 | u64 clkcomp; | 2473 | u64 clkcomp, cputm; |
2368 | int rc; | 2474 | int rc; |
2369 | 2475 | ||
2370 | px = kvm_s390_get_prefix(vcpu); | 2476 | px = kvm_s390_get_prefix(vcpu); |
@@ -2386,7 +2492,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) | |||
2386 | fprs, 128); | 2492 | fprs, 128); |
2387 | } else { | 2493 | } else { |
2388 | rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, | 2494 | rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, |
2389 | vcpu->run->s.regs.vrs, 128); | 2495 | vcpu->run->s.regs.fprs, 128); |
2390 | } | 2496 | } |
2391 | rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, | 2497 | rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, |
2392 | vcpu->run->s.regs.gprs, 128); | 2498 | vcpu->run->s.regs.gprs, 128); |
@@ -2398,8 +2504,9 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) | |||
2398 | &vcpu->run->s.regs.fpc, 4); | 2504 | &vcpu->run->s.regs.fpc, 4); |
2399 | rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, | 2505 | rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, |
2400 | &vcpu->arch.sie_block->todpr, 4); | 2506 | &vcpu->arch.sie_block->todpr, 4); |
2507 | cputm = kvm_s390_get_cpu_timer(vcpu); | ||
2401 | rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, | 2508 | rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, |
2402 | &vcpu->arch.sie_block->cputm, 8); | 2509 | &cputm, 8); |
2403 | clkcomp = vcpu->arch.sie_block->ckc >> 8; | 2510 | clkcomp = vcpu->arch.sie_block->ckc >> 8; |
2404 | rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, | 2511 | rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, |
2405 | &clkcomp, 8); | 2512 | &clkcomp, 8); |
@@ -2605,7 +2712,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, | |||
2605 | switch (mop->op) { | 2712 | switch (mop->op) { |
2606 | case KVM_S390_MEMOP_LOGICAL_READ: | 2713 | case KVM_S390_MEMOP_LOGICAL_READ: |
2607 | if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { | 2714 | if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { |
2608 | r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false); | 2715 | r = check_gva_range(vcpu, mop->gaddr, mop->ar, |
2716 | mop->size, GACC_FETCH); | ||
2609 | break; | 2717 | break; |
2610 | } | 2718 | } |
2611 | r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); | 2719 | r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); |
@@ -2616,7 +2724,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, | |||
2616 | break; | 2724 | break; |
2617 | case KVM_S390_MEMOP_LOGICAL_WRITE: | 2725 | case KVM_S390_MEMOP_LOGICAL_WRITE: |
2618 | if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { | 2726 | if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { |
2619 | r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true); | 2727 | r = check_gva_range(vcpu, mop->gaddr, mop->ar, |
2728 | mop->size, GACC_STORE); | ||
2620 | break; | 2729 | break; |
2621 | } | 2730 | } |
2622 | if (copy_from_user(tmpbuf, uaddr, mop->size)) { | 2731 | if (copy_from_user(tmpbuf, uaddr, mop->size)) { |
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index df1abada1f36..8621ab00ec8e 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/kvm.h> | 19 | #include <linux/kvm.h> |
20 | #include <linux/kvm_host.h> | 20 | #include <linux/kvm_host.h> |
21 | #include <asm/facility.h> | 21 | #include <asm/facility.h> |
22 | #include <asm/processor.h> | ||
22 | 23 | ||
23 | typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); | 24 | typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); |
24 | 25 | ||
@@ -53,6 +54,11 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) | |||
53 | return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED; | 54 | return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED; |
54 | } | 55 | } |
55 | 56 | ||
57 | static inline int is_vcpu_idle(struct kvm_vcpu *vcpu) | ||
58 | { | ||
59 | return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_WAIT; | ||
60 | } | ||
61 | |||
56 | static inline int kvm_is_ucontrol(struct kvm *kvm) | 62 | static inline int kvm_is_ucontrol(struct kvm *kvm) |
57 | { | 63 | { |
58 | #ifdef CONFIG_KVM_S390_UCONTROL | 64 | #ifdef CONFIG_KVM_S390_UCONTROL |
@@ -154,8 +160,8 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) | |||
154 | /* test availability of facility in a kvm instance */ | 160 | /* test availability of facility in a kvm instance */ |
155 | static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr) | 161 | static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr) |
156 | { | 162 | { |
157 | return __test_facility(nr, kvm->arch.model.fac->mask) && | 163 | return __test_facility(nr, kvm->arch.model.fac_mask) && |
158 | __test_facility(nr, kvm->arch.model.fac->list); | 164 | __test_facility(nr, kvm->arch.model.fac_list); |
159 | } | 165 | } |
160 | 166 | ||
161 | static inline int set_kvm_facility(u64 *fac_list, unsigned long nr) | 167 | static inline int set_kvm_facility(u64 *fac_list, unsigned long nr) |
@@ -212,8 +218,22 @@ int kvm_s390_reinject_io_int(struct kvm *kvm, | |||
212 | int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); | 218 | int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); |
213 | 219 | ||
214 | /* implemented in intercept.c */ | 220 | /* implemented in intercept.c */ |
215 | void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc); | 221 | u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu); |
216 | int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); | 222 | int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); |
223 | static inline void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilen) | ||
224 | { | ||
225 | struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; | ||
226 | |||
227 | sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilen); | ||
228 | } | ||
229 | static inline void kvm_s390_forward_psw(struct kvm_vcpu *vcpu, int ilen) | ||
230 | { | ||
231 | kvm_s390_rewind_psw(vcpu, -ilen); | ||
232 | } | ||
233 | static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu) | ||
234 | { | ||
235 | kvm_s390_rewind_psw(vcpu, kvm_s390_get_ilen(vcpu)); | ||
236 | } | ||
217 | 237 | ||
218 | /* implemented in priv.c */ | 238 | /* implemented in priv.c */ |
219 | int is_valid_psw(psw_t *psw); | 239 | int is_valid_psw(psw_t *psw); |
@@ -248,6 +268,8 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu); | |||
248 | void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); | 268 | void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); |
249 | unsigned long kvm_s390_fac_list_mask_size(void); | 269 | unsigned long kvm_s390_fac_list_mask_size(void); |
250 | extern unsigned long kvm_s390_fac_list_mask[]; | 270 | extern unsigned long kvm_s390_fac_list_mask[]; |
271 | void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm); | ||
272 | __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu); | ||
251 | 273 | ||
252 | /* implemented in diag.c */ | 274 | /* implemented in diag.c */ |
253 | int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); | 275 | int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); |
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index ed74e86d9b9e..f218ccf016c8 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c | |||
@@ -173,7 +173,7 @@ static int handle_skey(struct kvm_vcpu *vcpu) | |||
173 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | 173 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) |
174 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | 174 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); |
175 | 175 | ||
176 | kvm_s390_rewind_psw(vcpu, 4); | 176 | kvm_s390_retry_instr(vcpu); |
177 | VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); | 177 | VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); |
178 | return 0; | 178 | return 0; |
179 | } | 179 | } |
@@ -184,7 +184,7 @@ static int handle_ipte_interlock(struct kvm_vcpu *vcpu) | |||
184 | if (psw_bits(vcpu->arch.sie_block->gpsw).p) | 184 | if (psw_bits(vcpu->arch.sie_block->gpsw).p) |
185 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | 185 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); |
186 | wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu)); | 186 | wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu)); |
187 | kvm_s390_rewind_psw(vcpu, 4); | 187 | kvm_s390_retry_instr(vcpu); |
188 | VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation"); | 188 | VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation"); |
189 | return 0; | 189 | return 0; |
190 | } | 190 | } |
@@ -354,7 +354,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu) | |||
354 | * We need to shift the lower 32 facility bits (bit 0-31) from a u64 | 354 | * We need to shift the lower 32 facility bits (bit 0-31) from a u64 |
355 | * into a u32 memory representation. They will remain bits 0-31. | 355 | * into a u32 memory representation. They will remain bits 0-31. |
356 | */ | 356 | */ |
357 | fac = *vcpu->kvm->arch.model.fac->list >> 32; | 357 | fac = *vcpu->kvm->arch.model.fac_list >> 32; |
358 | rc = write_guest_lc(vcpu, offsetof(struct lowcore, stfl_fac_list), | 358 | rc = write_guest_lc(vcpu, offsetof(struct lowcore, stfl_fac_list), |
359 | &fac, sizeof(fac)); | 359 | &fac, sizeof(fac)); |
360 | if (rc) | 360 | if (rc) |
@@ -759,8 +759,8 @@ static int handle_essa(struct kvm_vcpu *vcpu) | |||
759 | if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6) | 759 | if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6) |
760 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 760 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
761 | 761 | ||
762 | /* Rewind PSW to repeat the ESSA instruction */ | 762 | /* Retry the ESSA instruction */ |
763 | kvm_s390_rewind_psw(vcpu, 4); | 763 | kvm_s390_retry_instr(vcpu); |
764 | vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ | 764 | vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ |
765 | cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo); | 765 | cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo); |
766 | down_read(&gmap->mm->mmap_sem); | 766 | down_read(&gmap->mm->mmap_sem); |
@@ -981,11 +981,12 @@ static int handle_tprot(struct kvm_vcpu *vcpu) | |||
981 | return -EOPNOTSUPP; | 981 | return -EOPNOTSUPP; |
982 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) | 982 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) |
983 | ipte_lock(vcpu); | 983 | ipte_lock(vcpu); |
984 | ret = guest_translate_address(vcpu, address1, ar, &gpa, 1); | 984 | ret = guest_translate_address(vcpu, address1, ar, &gpa, GACC_STORE); |
985 | if (ret == PGM_PROTECTION) { | 985 | if (ret == PGM_PROTECTION) { |
986 | /* Write protected? Try again with read-only... */ | 986 | /* Write protected? Try again with read-only... */ |
987 | cc = 1; | 987 | cc = 1; |
988 | ret = guest_translate_address(vcpu, address1, ar, &gpa, 0); | 988 | ret = guest_translate_address(vcpu, address1, ar, &gpa, |
989 | GACC_FETCH); | ||
989 | } | 990 | } |
990 | if (ret) { | 991 | if (ret) { |
991 | if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) { | 992 | if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) { |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 44adbb819041..01c8b501cb6d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <asm/mtrr.h> | 32 | #include <asm/mtrr.h> |
33 | #include <asm/msr-index.h> | 33 | #include <asm/msr-index.h> |
34 | #include <asm/asm.h> | 34 | #include <asm/asm.h> |
35 | #include <asm/kvm_page_track.h> | ||
35 | 36 | ||
36 | #define KVM_MAX_VCPUS 255 | 37 | #define KVM_MAX_VCPUS 255 |
37 | #define KVM_SOFT_MAX_VCPUS 160 | 38 | #define KVM_SOFT_MAX_VCPUS 160 |
@@ -214,6 +215,14 @@ struct kvm_mmu_memory_cache { | |||
214 | void *objects[KVM_NR_MEM_OBJS]; | 215 | void *objects[KVM_NR_MEM_OBJS]; |
215 | }; | 216 | }; |
216 | 217 | ||
218 | /* | ||
219 | * the pages used as guest page table on soft mmu are tracked by | ||
220 | * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used | ||
221 | * by indirect shadow page can not be more than 15 bits. | ||
222 | * | ||
223 | * Currently, we used 14 bits that are @level, @cr4_pae, @quadrant, @access, | ||
224 | * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp. | ||
225 | */ | ||
217 | union kvm_mmu_page_role { | 226 | union kvm_mmu_page_role { |
218 | unsigned word; | 227 | unsigned word; |
219 | struct { | 228 | struct { |
@@ -276,7 +285,7 @@ struct kvm_mmu_page { | |||
276 | #endif | 285 | #endif |
277 | 286 | ||
278 | /* Number of writes since the last time traversal visited this page. */ | 287 | /* Number of writes since the last time traversal visited this page. */ |
279 | int write_flooding_count; | 288 | atomic_t write_flooding_count; |
280 | }; | 289 | }; |
281 | 290 | ||
282 | struct kvm_pio_request { | 291 | struct kvm_pio_request { |
@@ -338,12 +347,8 @@ struct kvm_mmu { | |||
338 | 347 | ||
339 | struct rsvd_bits_validate guest_rsvd_check; | 348 | struct rsvd_bits_validate guest_rsvd_check; |
340 | 349 | ||
341 | /* | 350 | /* Can have large pages at levels 2..last_nonleaf_level-1. */ |
342 | * Bitmap: bit set = last pte in walk | 351 | u8 last_nonleaf_level; |
343 | * index[0:1]: level (zero-based) | ||
344 | * index[2]: pte.ps | ||
345 | */ | ||
346 | u8 last_pte_bitmap; | ||
347 | 352 | ||
348 | bool nx; | 353 | bool nx; |
349 | 354 | ||
@@ -498,7 +503,6 @@ struct kvm_vcpu_arch { | |||
498 | struct kvm_mmu_memory_cache mmu_page_header_cache; | 503 | struct kvm_mmu_memory_cache mmu_page_header_cache; |
499 | 504 | ||
500 | struct fpu guest_fpu; | 505 | struct fpu guest_fpu; |
501 | bool eager_fpu; | ||
502 | u64 xcr0; | 506 | u64 xcr0; |
503 | u64 guest_supported_xcr0; | 507 | u64 guest_supported_xcr0; |
504 | u32 guest_xstate_size; | 508 | u32 guest_xstate_size; |
@@ -644,12 +648,13 @@ struct kvm_vcpu_arch { | |||
644 | }; | 648 | }; |
645 | 649 | ||
646 | struct kvm_lpage_info { | 650 | struct kvm_lpage_info { |
647 | int write_count; | 651 | int disallow_lpage; |
648 | }; | 652 | }; |
649 | 653 | ||
650 | struct kvm_arch_memory_slot { | 654 | struct kvm_arch_memory_slot { |
651 | struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES]; | 655 | struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES]; |
652 | struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; | 656 | struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; |
657 | unsigned short *gfn_track[KVM_PAGE_TRACK_MAX]; | ||
653 | }; | 658 | }; |
654 | 659 | ||
655 | /* | 660 | /* |
@@ -694,6 +699,8 @@ struct kvm_arch { | |||
694 | */ | 699 | */ |
695 | struct list_head active_mmu_pages; | 700 | struct list_head active_mmu_pages; |
696 | struct list_head zapped_obsolete_pages; | 701 | struct list_head zapped_obsolete_pages; |
702 | struct kvm_page_track_notifier_node mmu_sp_tracker; | ||
703 | struct kvm_page_track_notifier_head track_notifier_head; | ||
697 | 704 | ||
698 | struct list_head assigned_dev_head; | 705 | struct list_head assigned_dev_head; |
699 | struct iommu_domain *iommu_domain; | 706 | struct iommu_domain *iommu_domain; |
@@ -754,6 +761,8 @@ struct kvm_arch { | |||
754 | 761 | ||
755 | bool irqchip_split; | 762 | bool irqchip_split; |
756 | u8 nr_reserved_ioapic_pins; | 763 | u8 nr_reserved_ioapic_pins; |
764 | |||
765 | bool disabled_lapic_found; | ||
757 | }; | 766 | }; |
758 | 767 | ||
759 | struct kvm_vm_stat { | 768 | struct kvm_vm_stat { |
@@ -988,6 +997,8 @@ void kvm_mmu_module_exit(void); | |||
988 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu); | 997 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu); |
989 | int kvm_mmu_create(struct kvm_vcpu *vcpu); | 998 | int kvm_mmu_create(struct kvm_vcpu *vcpu); |
990 | void kvm_mmu_setup(struct kvm_vcpu *vcpu); | 999 | void kvm_mmu_setup(struct kvm_vcpu *vcpu); |
1000 | void kvm_mmu_init_vm(struct kvm *kvm); | ||
1001 | void kvm_mmu_uninit_vm(struct kvm *kvm); | ||
991 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 1002 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
992 | u64 dirty_mask, u64 nx_mask, u64 x_mask); | 1003 | u64 dirty_mask, u64 nx_mask, u64 x_mask); |
993 | 1004 | ||
@@ -1127,8 +1138,6 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); | |||
1127 | 1138 | ||
1128 | void kvm_inject_nmi(struct kvm_vcpu *vcpu); | 1139 | void kvm_inject_nmi(struct kvm_vcpu *vcpu); |
1129 | 1140 | ||
1130 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | ||
1131 | const u8 *new, int bytes); | ||
1132 | int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); | 1141 | int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); |
1133 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); | 1142 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); |
1134 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); | 1143 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h new file mode 100644 index 000000000000..c2b8d24a235c --- /dev/null +++ b/arch/x86/include/asm/kvm_page_track.h | |||
@@ -0,0 +1,61 @@ | |||
1 | #ifndef _ASM_X86_KVM_PAGE_TRACK_H | ||
2 | #define _ASM_X86_KVM_PAGE_TRACK_H | ||
3 | |||
4 | enum kvm_page_track_mode { | ||
5 | KVM_PAGE_TRACK_WRITE, | ||
6 | KVM_PAGE_TRACK_MAX, | ||
7 | }; | ||
8 | |||
9 | /* | ||
10 | * The notifier represented by @kvm_page_track_notifier_node is linked into | ||
11 | * the head which will be notified when guest is triggering the track event. | ||
12 | * | ||
13 | * Write access on the head is protected by kvm->mmu_lock, read access | ||
14 | * is protected by track_srcu. | ||
15 | */ | ||
16 | struct kvm_page_track_notifier_head { | ||
17 | struct srcu_struct track_srcu; | ||
18 | struct hlist_head track_notifier_list; | ||
19 | }; | ||
20 | |||
21 | struct kvm_page_track_notifier_node { | ||
22 | struct hlist_node node; | ||
23 | |||
24 | /* | ||
25 | * It is called when guest is writing the write-tracked page | ||
26 | * and write emulation is finished at that time. | ||
27 | * | ||
28 | * @vcpu: the vcpu where the write access happened. | ||
29 | * @gpa: the physical address written by guest. | ||
30 | * @new: the data was written to the address. | ||
31 | * @bytes: the written length. | ||
32 | */ | ||
33 | void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, | ||
34 | int bytes); | ||
35 | }; | ||
36 | |||
37 | void kvm_page_track_init(struct kvm *kvm); | ||
38 | |||
39 | void kvm_page_track_free_memslot(struct kvm_memory_slot *free, | ||
40 | struct kvm_memory_slot *dont); | ||
41 | int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, | ||
42 | unsigned long npages); | ||
43 | |||
44 | void kvm_slot_page_track_add_page(struct kvm *kvm, | ||
45 | struct kvm_memory_slot *slot, gfn_t gfn, | ||
46 | enum kvm_page_track_mode mode); | ||
47 | void kvm_slot_page_track_remove_page(struct kvm *kvm, | ||
48 | struct kvm_memory_slot *slot, gfn_t gfn, | ||
49 | enum kvm_page_track_mode mode); | ||
50 | bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn, | ||
51 | enum kvm_page_track_mode mode); | ||
52 | |||
53 | void | ||
54 | kvm_page_track_register_notifier(struct kvm *kvm, | ||
55 | struct kvm_page_track_notifier_node *n); | ||
56 | void | ||
57 | kvm_page_track_unregister_notifier(struct kvm *kvm, | ||
58 | struct kvm_page_track_notifier_node *n); | ||
59 | void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, | ||
60 | int bytes); | ||
61 | #endif | ||
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 7956412d09bd..9b1a91834ac8 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h | |||
@@ -226,7 +226,9 @@ | |||
226 | (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) | 226 | (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) |
227 | 227 | ||
228 | /* Declare the various hypercall operations. */ | 228 | /* Declare the various hypercall operations. */ |
229 | #define HV_X64_HV_NOTIFY_LONG_SPIN_WAIT 0x0008 | 229 | #define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 |
230 | #define HVCALL_POST_MESSAGE 0x005c | ||
231 | #define HVCALL_SIGNAL_EVENT 0x005d | ||
230 | 232 | ||
231 | #define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001 | 233 | #define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001 |
232 | #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12 | 234 | #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12 |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index a1ff508bb423..464fa477afbf 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -13,9 +13,10 @@ kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o | |||
13 | 13 | ||
14 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ | 14 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ |
15 | i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \ | 15 | i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \ |
16 | hyperv.o | 16 | hyperv.o page_track.o |
17 | 17 | ||
18 | kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o | 18 | kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o |
19 | |||
19 | kvm-intel-y += vmx.o pmu_intel.o | 20 | kvm-intel-y += vmx.o pmu_intel.o |
20 | kvm-amd-y += svm.o pmu_amd.o | 21 | kvm-amd-y += svm.o pmu_amd.o |
21 | 22 | ||
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c index 9dc091acd5fb..308b8597c691 100644 --- a/arch/x86/kvm/assigned-dev.c +++ b/arch/x86/kvm/assigned-dev.c | |||
@@ -51,11 +51,9 @@ struct kvm_assigned_dev_kernel { | |||
51 | static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, | 51 | static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, |
52 | int assigned_dev_id) | 52 | int assigned_dev_id) |
53 | { | 53 | { |
54 | struct list_head *ptr; | ||
55 | struct kvm_assigned_dev_kernel *match; | 54 | struct kvm_assigned_dev_kernel *match; |
56 | 55 | ||
57 | list_for_each(ptr, head) { | 56 | list_for_each_entry(match, head, list) { |
58 | match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); | ||
59 | if (match->assigned_dev_id == assigned_dev_id) | 57 | if (match->assigned_dev_id == assigned_dev_id) |
60 | return match; | 58 | return match; |
61 | } | 59 | } |
@@ -373,14 +371,10 @@ static void kvm_free_assigned_device(struct kvm *kvm, | |||
373 | 371 | ||
374 | void kvm_free_all_assigned_devices(struct kvm *kvm) | 372 | void kvm_free_all_assigned_devices(struct kvm *kvm) |
375 | { | 373 | { |
376 | struct list_head *ptr, *ptr2; | 374 | struct kvm_assigned_dev_kernel *assigned_dev, *tmp; |
377 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
378 | |||
379 | list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { | ||
380 | assigned_dev = list_entry(ptr, | ||
381 | struct kvm_assigned_dev_kernel, | ||
382 | list); | ||
383 | 375 | ||
376 | list_for_each_entry_safe(assigned_dev, tmp, | ||
377 | &kvm->arch.assigned_dev_head, list) { | ||
384 | kvm_free_assigned_device(kvm, assigned_dev); | 378 | kvm_free_assigned_device(kvm, assigned_dev); |
385 | } | 379 | } |
386 | } | 380 | } |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 6525e926f566..0029644bf09c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -46,11 +46,18 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted) | |||
46 | return ret; | 46 | return ret; |
47 | } | 47 | } |
48 | 48 | ||
49 | bool kvm_mpx_supported(void) | ||
50 | { | ||
51 | return ((host_xcr0 & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)) | ||
52 | && kvm_x86_ops->mpx_supported()); | ||
53 | } | ||
54 | EXPORT_SYMBOL_GPL(kvm_mpx_supported); | ||
55 | |||
49 | u64 kvm_supported_xcr0(void) | 56 | u64 kvm_supported_xcr0(void) |
50 | { | 57 | { |
51 | u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0; | 58 | u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0; |
52 | 59 | ||
53 | if (!kvm_x86_ops->mpx_supported()) | 60 | if (!kvm_mpx_supported()) |
54 | xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); | 61 | xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); |
55 | 62 | ||
56 | return xcr0; | 63 | return xcr0; |
@@ -97,8 +104,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) | |||
97 | if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) | 104 | if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) |
98 | best->ebx = xstate_required_size(vcpu->arch.xcr0, true); | 105 | best->ebx = xstate_required_size(vcpu->arch.xcr0, true); |
99 | 106 | ||
100 | vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu); | 107 | if (use_eager_fpu()) |
101 | if (vcpu->arch.eager_fpu) | ||
102 | kvm_x86_ops->fpu_activate(vcpu); | 108 | kvm_x86_ops->fpu_activate(vcpu); |
103 | 109 | ||
104 | /* | 110 | /* |
@@ -295,7 +301,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
295 | #endif | 301 | #endif |
296 | unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; | 302 | unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; |
297 | unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; | 303 | unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; |
298 | unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0; | 304 | unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0; |
299 | unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0; | 305 | unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0; |
300 | 306 | ||
301 | /* cpuid 1.edx */ | 307 | /* cpuid 1.edx */ |
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index c8eda1498121..66a6581724ad 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <asm/cpu.h> | 5 | #include <asm/cpu.h> |
6 | 6 | ||
7 | int kvm_update_cpuid(struct kvm_vcpu *vcpu); | 7 | int kvm_update_cpuid(struct kvm_vcpu *vcpu); |
8 | bool kvm_mpx_supported(void); | ||
8 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | 9 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, |
9 | u32 function, u32 index); | 10 | u32 function, u32 index); |
10 | int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, | 11 | int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, |
@@ -135,14 +136,6 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) | |||
135 | return best && (best->ebx & bit(X86_FEATURE_RTM)); | 136 | return best && (best->ebx & bit(X86_FEATURE_RTM)); |
136 | } | 137 | } |
137 | 138 | ||
138 | static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) | ||
139 | { | ||
140 | struct kvm_cpuid_entry2 *best; | ||
141 | |||
142 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
143 | return best && (best->ebx & bit(X86_FEATURE_MPX)); | ||
144 | } | ||
145 | |||
146 | static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu) | 139 | static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu) |
147 | { | 140 | { |
148 | struct kvm_cpuid_entry2 *best; | 141 | struct kvm_cpuid_entry2 *best; |
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index c58ba67175ac..5ff3485acb60 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c | |||
@@ -1043,6 +1043,27 @@ bool kvm_hv_hypercall_enabled(struct kvm *kvm) | |||
1043 | return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE; | 1043 | return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE; |
1044 | } | 1044 | } |
1045 | 1045 | ||
1046 | static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) | ||
1047 | { | ||
1048 | bool longmode; | ||
1049 | |||
1050 | longmode = is_64_bit_mode(vcpu); | ||
1051 | if (longmode) | ||
1052 | kvm_register_write(vcpu, VCPU_REGS_RAX, result); | ||
1053 | else { | ||
1054 | kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32); | ||
1055 | kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0xffffffff); | ||
1056 | } | ||
1057 | } | ||
1058 | |||
1059 | static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) | ||
1060 | { | ||
1061 | struct kvm_run *run = vcpu->run; | ||
1062 | |||
1063 | kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result); | ||
1064 | return 1; | ||
1065 | } | ||
1066 | |||
1046 | int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | 1067 | int kvm_hv_hypercall(struct kvm_vcpu *vcpu) |
1047 | { | 1068 | { |
1048 | u64 param, ingpa, outgpa, ret; | 1069 | u64 param, ingpa, outgpa, ret; |
@@ -1055,7 +1076,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | |||
1055 | */ | 1076 | */ |
1056 | if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { | 1077 | if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { |
1057 | kvm_queue_exception(vcpu, UD_VECTOR); | 1078 | kvm_queue_exception(vcpu, UD_VECTOR); |
1058 | return 0; | 1079 | return 1; |
1059 | } | 1080 | } |
1060 | 1081 | ||
1061 | longmode = is_64_bit_mode(vcpu); | 1082 | longmode = is_64_bit_mode(vcpu); |
@@ -1083,22 +1104,33 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | |||
1083 | 1104 | ||
1084 | trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); | 1105 | trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); |
1085 | 1106 | ||
1107 | /* Hypercall continuation is not supported yet */ | ||
1108 | if (rep_cnt || rep_idx) { | ||
1109 | res = HV_STATUS_INVALID_HYPERCALL_CODE; | ||
1110 | goto set_result; | ||
1111 | } | ||
1112 | |||
1086 | switch (code) { | 1113 | switch (code) { |
1087 | case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: | 1114 | case HVCALL_NOTIFY_LONG_SPIN_WAIT: |
1088 | kvm_vcpu_on_spin(vcpu); | 1115 | kvm_vcpu_on_spin(vcpu); |
1089 | break; | 1116 | break; |
1117 | case HVCALL_POST_MESSAGE: | ||
1118 | case HVCALL_SIGNAL_EVENT: | ||
1119 | vcpu->run->exit_reason = KVM_EXIT_HYPERV; | ||
1120 | vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL; | ||
1121 | vcpu->run->hyperv.u.hcall.input = param; | ||
1122 | vcpu->run->hyperv.u.hcall.params[0] = ingpa; | ||
1123 | vcpu->run->hyperv.u.hcall.params[1] = outgpa; | ||
1124 | vcpu->arch.complete_userspace_io = | ||
1125 | kvm_hv_hypercall_complete_userspace; | ||
1126 | return 0; | ||
1090 | default: | 1127 | default: |
1091 | res = HV_STATUS_INVALID_HYPERCALL_CODE; | 1128 | res = HV_STATUS_INVALID_HYPERCALL_CODE; |
1092 | break; | 1129 | break; |
1093 | } | 1130 | } |
1094 | 1131 | ||
1132 | set_result: | ||
1095 | ret = res | (((u64)rep_done & 0xfff) << 32); | 1133 | ret = res | (((u64)rep_done & 0xfff) << 32); |
1096 | if (longmode) { | 1134 | kvm_hv_hypercall_set_result(vcpu, ret); |
1097 | kvm_register_write(vcpu, VCPU_REGS_RAX, ret); | ||
1098 | } else { | ||
1099 | kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32); | ||
1100 | kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff); | ||
1101 | } | ||
1102 | |||
1103 | return 1; | 1135 | return 1; |
1104 | } | 1136 | } |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index b0ea42b78ccd..a4bf5b45d65a 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -51,32 +51,9 @@ | |||
51 | #define RW_STATE_WORD0 3 | 51 | #define RW_STATE_WORD0 3 |
52 | #define RW_STATE_WORD1 4 | 52 | #define RW_STATE_WORD1 4 |
53 | 53 | ||
54 | /* Compute with 96 bit intermediate result: (a*b)/c */ | 54 | static void pit_set_gate(struct kvm_pit *pit, int channel, u32 val) |
55 | static u64 muldiv64(u64 a, u32 b, u32 c) | ||
56 | { | 55 | { |
57 | union { | 56 | struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel]; |
58 | u64 ll; | ||
59 | struct { | ||
60 | u32 low, high; | ||
61 | } l; | ||
62 | } u, res; | ||
63 | u64 rl, rh; | ||
64 | |||
65 | u.ll = a; | ||
66 | rl = (u64)u.l.low * (u64)b; | ||
67 | rh = (u64)u.l.high * (u64)b; | ||
68 | rh += (rl >> 32); | ||
69 | res.l.high = div64_u64(rh, c); | ||
70 | res.l.low = div64_u64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c); | ||
71 | return res.ll; | ||
72 | } | ||
73 | |||
74 | static void pit_set_gate(struct kvm *kvm, int channel, u32 val) | ||
75 | { | ||
76 | struct kvm_kpit_channel_state *c = | ||
77 | &kvm->arch.vpit->pit_state.channels[channel]; | ||
78 | |||
79 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); | ||
80 | 57 | ||
81 | switch (c->mode) { | 58 | switch (c->mode) { |
82 | default: | 59 | default: |
@@ -97,18 +74,16 @@ static void pit_set_gate(struct kvm *kvm, int channel, u32 val) | |||
97 | c->gate = val; | 74 | c->gate = val; |
98 | } | 75 | } |
99 | 76 | ||
100 | static int pit_get_gate(struct kvm *kvm, int channel) | 77 | static int pit_get_gate(struct kvm_pit *pit, int channel) |
101 | { | 78 | { |
102 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); | 79 | return pit->pit_state.channels[channel].gate; |
103 | |||
104 | return kvm->arch.vpit->pit_state.channels[channel].gate; | ||
105 | } | 80 | } |
106 | 81 | ||
107 | static s64 __kpit_elapsed(struct kvm *kvm) | 82 | static s64 __kpit_elapsed(struct kvm_pit *pit) |
108 | { | 83 | { |
109 | s64 elapsed; | 84 | s64 elapsed; |
110 | ktime_t remaining; | 85 | ktime_t remaining; |
111 | struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; | 86 | struct kvm_kpit_state *ps = &pit->pit_state; |
112 | 87 | ||
113 | if (!ps->period) | 88 | if (!ps->period) |
114 | return 0; | 89 | return 0; |
@@ -128,26 +103,23 @@ static s64 __kpit_elapsed(struct kvm *kvm) | |||
128 | return elapsed; | 103 | return elapsed; |
129 | } | 104 | } |
130 | 105 | ||
131 | static s64 kpit_elapsed(struct kvm *kvm, struct kvm_kpit_channel_state *c, | 106 | static s64 kpit_elapsed(struct kvm_pit *pit, struct kvm_kpit_channel_state *c, |
132 | int channel) | 107 | int channel) |
133 | { | 108 | { |
134 | if (channel == 0) | 109 | if (channel == 0) |
135 | return __kpit_elapsed(kvm); | 110 | return __kpit_elapsed(pit); |
136 | 111 | ||
137 | return ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); | 112 | return ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); |
138 | } | 113 | } |
139 | 114 | ||
140 | static int pit_get_count(struct kvm *kvm, int channel) | 115 | static int pit_get_count(struct kvm_pit *pit, int channel) |
141 | { | 116 | { |
142 | struct kvm_kpit_channel_state *c = | 117 | struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel]; |
143 | &kvm->arch.vpit->pit_state.channels[channel]; | ||
144 | s64 d, t; | 118 | s64 d, t; |
145 | int counter; | 119 | int counter; |
146 | 120 | ||
147 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); | 121 | t = kpit_elapsed(pit, c, channel); |
148 | 122 | d = mul_u64_u32_div(t, KVM_PIT_FREQ, NSEC_PER_SEC); | |
149 | t = kpit_elapsed(kvm, c, channel); | ||
150 | d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); | ||
151 | 123 | ||
152 | switch (c->mode) { | 124 | switch (c->mode) { |
153 | case 0: | 125 | case 0: |
@@ -167,17 +139,14 @@ static int pit_get_count(struct kvm *kvm, int channel) | |||
167 | return counter; | 139 | return counter; |
168 | } | 140 | } |
169 | 141 | ||
170 | static int pit_get_out(struct kvm *kvm, int channel) | 142 | static int pit_get_out(struct kvm_pit *pit, int channel) |
171 | { | 143 | { |
172 | struct kvm_kpit_channel_state *c = | 144 | struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel]; |
173 | &kvm->arch.vpit->pit_state.channels[channel]; | ||
174 | s64 d, t; | 145 | s64 d, t; |
175 | int out; | 146 | int out; |
176 | 147 | ||
177 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); | 148 | t = kpit_elapsed(pit, c, channel); |
178 | 149 | d = mul_u64_u32_div(t, KVM_PIT_FREQ, NSEC_PER_SEC); | |
179 | t = kpit_elapsed(kvm, c, channel); | ||
180 | d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); | ||
181 | 150 | ||
182 | switch (c->mode) { | 151 | switch (c->mode) { |
183 | default: | 152 | default: |
@@ -202,29 +171,23 @@ static int pit_get_out(struct kvm *kvm, int channel) | |||
202 | return out; | 171 | return out; |
203 | } | 172 | } |
204 | 173 | ||
205 | static void pit_latch_count(struct kvm *kvm, int channel) | 174 | static void pit_latch_count(struct kvm_pit *pit, int channel) |
206 | { | 175 | { |
207 | struct kvm_kpit_channel_state *c = | 176 | struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel]; |
208 | &kvm->arch.vpit->pit_state.channels[channel]; | ||
209 | |||
210 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); | ||
211 | 177 | ||
212 | if (!c->count_latched) { | 178 | if (!c->count_latched) { |
213 | c->latched_count = pit_get_count(kvm, channel); | 179 | c->latched_count = pit_get_count(pit, channel); |
214 | c->count_latched = c->rw_mode; | 180 | c->count_latched = c->rw_mode; |
215 | } | 181 | } |
216 | } | 182 | } |
217 | 183 | ||
218 | static void pit_latch_status(struct kvm *kvm, int channel) | 184 | static void pit_latch_status(struct kvm_pit *pit, int channel) |
219 | { | 185 | { |
220 | struct kvm_kpit_channel_state *c = | 186 | struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel]; |
221 | &kvm->arch.vpit->pit_state.channels[channel]; | ||
222 | |||
223 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); | ||
224 | 187 | ||
225 | if (!c->status_latched) { | 188 | if (!c->status_latched) { |
226 | /* TODO: Return NULL COUNT (bit 6). */ | 189 | /* TODO: Return NULL COUNT (bit 6). */ |
227 | c->status = ((pit_get_out(kvm, channel) << 7) | | 190 | c->status = ((pit_get_out(pit, channel) << 7) | |
228 | (c->rw_mode << 4) | | 191 | (c->rw_mode << 4) | |
229 | (c->mode << 1) | | 192 | (c->mode << 1) | |
230 | c->bcd); | 193 | c->bcd); |
@@ -232,26 +195,24 @@ static void pit_latch_status(struct kvm *kvm, int channel) | |||
232 | } | 195 | } |
233 | } | 196 | } |
234 | 197 | ||
198 | static inline struct kvm_pit *pit_state_to_pit(struct kvm_kpit_state *ps) | ||
199 | { | ||
200 | return container_of(ps, struct kvm_pit, pit_state); | ||
201 | } | ||
202 | |||
235 | static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) | 203 | static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) |
236 | { | 204 | { |
237 | struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, | 205 | struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, |
238 | irq_ack_notifier); | 206 | irq_ack_notifier); |
239 | int value; | 207 | struct kvm_pit *pit = pit_state_to_pit(ps); |
240 | 208 | ||
241 | spin_lock(&ps->inject_lock); | 209 | atomic_set(&ps->irq_ack, 1); |
242 | value = atomic_dec_return(&ps->pending); | 210 | /* irq_ack should be set before pending is read. Order accesses with |
243 | if (value < 0) | 211 | * inc(pending) in pit_timer_fn and xchg(irq_ack, 0) in pit_do_work. |
244 | /* spurious acks can be generated if, for example, the | 212 | */ |
245 | * PIC is being reset. Handle it gracefully here | 213 | smp_mb(); |
246 | */ | 214 | if (atomic_dec_if_positive(&ps->pending) > 0) |
247 | atomic_inc(&ps->pending); | 215 | queue_kthread_work(&pit->worker, &pit->expired); |
248 | else if (value > 0) | ||
249 | /* in this case, we had multiple outstanding pit interrupts | ||
250 | * that we needed to inject. Reinject | ||
251 | */ | ||
252 | queue_kthread_work(&ps->pit->worker, &ps->pit->expired); | ||
253 | ps->irq_ack = 1; | ||
254 | spin_unlock(&ps->inject_lock); | ||
255 | } | 216 | } |
256 | 217 | ||
257 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | 218 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) |
@@ -282,45 +243,36 @@ static void pit_do_work(struct kthread_work *work) | |||
282 | struct kvm_vcpu *vcpu; | 243 | struct kvm_vcpu *vcpu; |
283 | int i; | 244 | int i; |
284 | struct kvm_kpit_state *ps = &pit->pit_state; | 245 | struct kvm_kpit_state *ps = &pit->pit_state; |
285 | int inject = 0; | ||
286 | 246 | ||
287 | /* Try to inject pending interrupts when | 247 | if (atomic_read(&ps->reinject) && !atomic_xchg(&ps->irq_ack, 0)) |
288 | * last one has been acked. | 248 | return; |
249 | |||
250 | kvm_set_irq(kvm, pit->irq_source_id, 0, 1, false); | ||
251 | kvm_set_irq(kvm, pit->irq_source_id, 0, 0, false); | ||
252 | |||
253 | /* | ||
254 | * Provides NMI watchdog support via Virtual Wire mode. | ||
255 | * The route is: PIT -> LVT0 in NMI mode. | ||
256 | * | ||
257 | * Note: Our Virtual Wire implementation does not follow | ||
258 | * the MP specification. We propagate a PIT interrupt to all | ||
259 | * VCPUs and only when LVT0 is in NMI mode. The interrupt can | ||
260 | * also be simultaneously delivered through PIC and IOAPIC. | ||
289 | */ | 261 | */ |
290 | spin_lock(&ps->inject_lock); | 262 | if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0) |
291 | if (ps->irq_ack) { | 263 | kvm_for_each_vcpu(i, vcpu, kvm) |
292 | ps->irq_ack = 0; | 264 | kvm_apic_nmi_wd_deliver(vcpu); |
293 | inject = 1; | ||
294 | } | ||
295 | spin_unlock(&ps->inject_lock); | ||
296 | if (inject) { | ||
297 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false); | ||
298 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false); | ||
299 | |||
300 | /* | ||
301 | * Provides NMI watchdog support via Virtual Wire mode. | ||
302 | * The route is: PIT -> PIC -> LVT0 in NMI mode. | ||
303 | * | ||
304 | * Note: Our Virtual Wire implementation is simplified, only | ||
305 | * propagating PIT interrupts to all VCPUs when they have set | ||
306 | * LVT0 to NMI delivery. Other PIC interrupts are just sent to | ||
307 | * VCPU0, and only if its LVT0 is in EXTINT mode. | ||
308 | */ | ||
309 | if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0) | ||
310 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
311 | kvm_apic_nmi_wd_deliver(vcpu); | ||
312 | } | ||
313 | } | 265 | } |
314 | 266 | ||
315 | static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) | 267 | static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) |
316 | { | 268 | { |
317 | struct kvm_kpit_state *ps = container_of(data, struct kvm_kpit_state, timer); | 269 | struct kvm_kpit_state *ps = container_of(data, struct kvm_kpit_state, timer); |
318 | struct kvm_pit *pt = ps->kvm->arch.vpit; | 270 | struct kvm_pit *pt = pit_state_to_pit(ps); |
319 | 271 | ||
320 | if (ps->reinject || !atomic_read(&ps->pending)) { | 272 | if (atomic_read(&ps->reinject)) |
321 | atomic_inc(&ps->pending); | 273 | atomic_inc(&ps->pending); |
322 | queue_kthread_work(&pt->worker, &pt->expired); | 274 | |
323 | } | 275 | queue_kthread_work(&pt->worker, &pt->expired); |
324 | 276 | ||
325 | if (ps->is_periodic) { | 277 | if (ps->is_periodic) { |
326 | hrtimer_add_expires_ns(&ps->timer, ps->period); | 278 | hrtimer_add_expires_ns(&ps->timer, ps->period); |
@@ -329,30 +281,54 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) | |||
329 | return HRTIMER_NORESTART; | 281 | return HRTIMER_NORESTART; |
330 | } | 282 | } |
331 | 283 | ||
332 | static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) | 284 | static inline void kvm_pit_reset_reinject(struct kvm_pit *pit) |
333 | { | 285 | { |
334 | struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; | 286 | atomic_set(&pit->pit_state.pending, 0); |
287 | atomic_set(&pit->pit_state.irq_ack, 1); | ||
288 | } | ||
289 | |||
290 | void kvm_pit_set_reinject(struct kvm_pit *pit, bool reinject) | ||
291 | { | ||
292 | struct kvm_kpit_state *ps = &pit->pit_state; | ||
293 | struct kvm *kvm = pit->kvm; | ||
294 | |||
295 | if (atomic_read(&ps->reinject) == reinject) | ||
296 | return; | ||
297 | |||
298 | if (reinject) { | ||
299 | /* The initial state is preserved while ps->reinject == 0. */ | ||
300 | kvm_pit_reset_reinject(pit); | ||
301 | kvm_register_irq_ack_notifier(kvm, &ps->irq_ack_notifier); | ||
302 | kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); | ||
303 | } else { | ||
304 | kvm_unregister_irq_ack_notifier(kvm, &ps->irq_ack_notifier); | ||
305 | kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier); | ||
306 | } | ||
307 | |||
308 | atomic_set(&ps->reinject, reinject); | ||
309 | } | ||
310 | |||
311 | static void create_pit_timer(struct kvm_pit *pit, u32 val, int is_period) | ||
312 | { | ||
313 | struct kvm_kpit_state *ps = &pit->pit_state; | ||
314 | struct kvm *kvm = pit->kvm; | ||
335 | s64 interval; | 315 | s64 interval; |
336 | 316 | ||
337 | if (!ioapic_in_kernel(kvm) || | 317 | if (!ioapic_in_kernel(kvm) || |
338 | ps->flags & KVM_PIT_FLAGS_HPET_LEGACY) | 318 | ps->flags & KVM_PIT_FLAGS_HPET_LEGACY) |
339 | return; | 319 | return; |
340 | 320 | ||
341 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); | 321 | interval = mul_u64_u32_div(val, NSEC_PER_SEC, KVM_PIT_FREQ); |
342 | 322 | ||
343 | pr_debug("create pit timer, interval is %llu nsec\n", interval); | 323 | pr_debug("create pit timer, interval is %llu nsec\n", interval); |
344 | 324 | ||
345 | /* TODO The new value only affected after the retriggered */ | 325 | /* TODO The new value only affected after the retriggered */ |
346 | hrtimer_cancel(&ps->timer); | 326 | hrtimer_cancel(&ps->timer); |
347 | flush_kthread_work(&ps->pit->expired); | 327 | flush_kthread_work(&pit->expired); |
348 | ps->period = interval; | 328 | ps->period = interval; |
349 | ps->is_periodic = is_period; | 329 | ps->is_periodic = is_period; |
350 | 330 | ||
351 | ps->timer.function = pit_timer_fn; | 331 | kvm_pit_reset_reinject(pit); |
352 | ps->kvm = ps->pit->kvm; | ||
353 | |||
354 | atomic_set(&ps->pending, 0); | ||
355 | ps->irq_ack = 1; | ||
356 | 332 | ||
357 | /* | 333 | /* |
358 | * Do not allow the guest to program periodic timers with small | 334 | * Do not allow the guest to program periodic timers with small |
@@ -375,11 +351,9 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) | |||
375 | HRTIMER_MODE_ABS); | 351 | HRTIMER_MODE_ABS); |
376 | } | 352 | } |
377 | 353 | ||
378 | static void pit_load_count(struct kvm *kvm, int channel, u32 val) | 354 | static void pit_load_count(struct kvm_pit *pit, int channel, u32 val) |
379 | { | 355 | { |
380 | struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; | 356 | struct kvm_kpit_state *ps = &pit->pit_state; |
381 | |||
382 | WARN_ON(!mutex_is_locked(&ps->lock)); | ||
383 | 357 | ||
384 | pr_debug("load_count val is %d, channel is %d\n", val, channel); | 358 | pr_debug("load_count val is %d, channel is %d\n", val, channel); |
385 | 359 | ||
@@ -404,29 +378,33 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) | |||
404 | case 1: | 378 | case 1: |
405 | /* FIXME: enhance mode 4 precision */ | 379 | /* FIXME: enhance mode 4 precision */ |
406 | case 4: | 380 | case 4: |
407 | create_pit_timer(kvm, val, 0); | 381 | create_pit_timer(pit, val, 0); |
408 | break; | 382 | break; |
409 | case 2: | 383 | case 2: |
410 | case 3: | 384 | case 3: |
411 | create_pit_timer(kvm, val, 1); | 385 | create_pit_timer(pit, val, 1); |
412 | break; | 386 | break; |
413 | default: | 387 | default: |
414 | destroy_pit_timer(kvm->arch.vpit); | 388 | destroy_pit_timer(pit); |
415 | } | 389 | } |
416 | } | 390 | } |
417 | 391 | ||
418 | void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start) | 392 | void kvm_pit_load_count(struct kvm_pit *pit, int channel, u32 val, |
393 | int hpet_legacy_start) | ||
419 | { | 394 | { |
420 | u8 saved_mode; | 395 | u8 saved_mode; |
396 | |||
397 | WARN_ON_ONCE(!mutex_is_locked(&pit->pit_state.lock)); | ||
398 | |||
421 | if (hpet_legacy_start) { | 399 | if (hpet_legacy_start) { |
422 | /* save existing mode for later reenablement */ | 400 | /* save existing mode for later reenablement */ |
423 | WARN_ON(channel != 0); | 401 | WARN_ON(channel != 0); |
424 | saved_mode = kvm->arch.vpit->pit_state.channels[0].mode; | 402 | saved_mode = pit->pit_state.channels[0].mode; |
425 | kvm->arch.vpit->pit_state.channels[0].mode = 0xff; /* disable timer */ | 403 | pit->pit_state.channels[0].mode = 0xff; /* disable timer */ |
426 | pit_load_count(kvm, channel, val); | 404 | pit_load_count(pit, channel, val); |
427 | kvm->arch.vpit->pit_state.channels[0].mode = saved_mode; | 405 | pit->pit_state.channels[0].mode = saved_mode; |
428 | } else { | 406 | } else { |
429 | pit_load_count(kvm, channel, val); | 407 | pit_load_count(pit, channel, val); |
430 | } | 408 | } |
431 | } | 409 | } |
432 | 410 | ||
@@ -452,7 +430,6 @@ static int pit_ioport_write(struct kvm_vcpu *vcpu, | |||
452 | { | 430 | { |
453 | struct kvm_pit *pit = dev_to_pit(this); | 431 | struct kvm_pit *pit = dev_to_pit(this); |
454 | struct kvm_kpit_state *pit_state = &pit->pit_state; | 432 | struct kvm_kpit_state *pit_state = &pit->pit_state; |
455 | struct kvm *kvm = pit->kvm; | ||
456 | int channel, access; | 433 | int channel, access; |
457 | struct kvm_kpit_channel_state *s; | 434 | struct kvm_kpit_channel_state *s; |
458 | u32 val = *(u32 *) data; | 435 | u32 val = *(u32 *) data; |
@@ -476,9 +453,9 @@ static int pit_ioport_write(struct kvm_vcpu *vcpu, | |||
476 | s = &pit_state->channels[channel]; | 453 | s = &pit_state->channels[channel]; |
477 | if (val & (2 << channel)) { | 454 | if (val & (2 << channel)) { |
478 | if (!(val & 0x20)) | 455 | if (!(val & 0x20)) |
479 | pit_latch_count(kvm, channel); | 456 | pit_latch_count(pit, channel); |
480 | if (!(val & 0x10)) | 457 | if (!(val & 0x10)) |
481 | pit_latch_status(kvm, channel); | 458 | pit_latch_status(pit, channel); |
482 | } | 459 | } |
483 | } | 460 | } |
484 | } else { | 461 | } else { |
@@ -486,7 +463,7 @@ static int pit_ioport_write(struct kvm_vcpu *vcpu, | |||
486 | s = &pit_state->channels[channel]; | 463 | s = &pit_state->channels[channel]; |
487 | access = (val >> 4) & KVM_PIT_CHANNEL_MASK; | 464 | access = (val >> 4) & KVM_PIT_CHANNEL_MASK; |
488 | if (access == 0) { | 465 | if (access == 0) { |
489 | pit_latch_count(kvm, channel); | 466 | pit_latch_count(pit, channel); |
490 | } else { | 467 | } else { |
491 | s->rw_mode = access; | 468 | s->rw_mode = access; |
492 | s->read_state = access; | 469 | s->read_state = access; |
@@ -503,17 +480,17 @@ static int pit_ioport_write(struct kvm_vcpu *vcpu, | |||
503 | switch (s->write_state) { | 480 | switch (s->write_state) { |
504 | default: | 481 | default: |
505 | case RW_STATE_LSB: | 482 | case RW_STATE_LSB: |
506 | pit_load_count(kvm, addr, val); | 483 | pit_load_count(pit, addr, val); |
507 | break; | 484 | break; |
508 | case RW_STATE_MSB: | 485 | case RW_STATE_MSB: |
509 | pit_load_count(kvm, addr, val << 8); | 486 | pit_load_count(pit, addr, val << 8); |
510 | break; | 487 | break; |
511 | case RW_STATE_WORD0: | 488 | case RW_STATE_WORD0: |
512 | s->write_latch = val; | 489 | s->write_latch = val; |
513 | s->write_state = RW_STATE_WORD1; | 490 | s->write_state = RW_STATE_WORD1; |
514 | break; | 491 | break; |
515 | case RW_STATE_WORD1: | 492 | case RW_STATE_WORD1: |
516 | pit_load_count(kvm, addr, s->write_latch | (val << 8)); | 493 | pit_load_count(pit, addr, s->write_latch | (val << 8)); |
517 | s->write_state = RW_STATE_WORD0; | 494 | s->write_state = RW_STATE_WORD0; |
518 | break; | 495 | break; |
519 | } | 496 | } |
@@ -529,7 +506,6 @@ static int pit_ioport_read(struct kvm_vcpu *vcpu, | |||
529 | { | 506 | { |
530 | struct kvm_pit *pit = dev_to_pit(this); | 507 | struct kvm_pit *pit = dev_to_pit(this); |
531 | struct kvm_kpit_state *pit_state = &pit->pit_state; | 508 | struct kvm_kpit_state *pit_state = &pit->pit_state; |
532 | struct kvm *kvm = pit->kvm; | ||
533 | int ret, count; | 509 | int ret, count; |
534 | struct kvm_kpit_channel_state *s; | 510 | struct kvm_kpit_channel_state *s; |
535 | if (!pit_in_range(addr)) | 511 | if (!pit_in_range(addr)) |
@@ -566,20 +542,20 @@ static int pit_ioport_read(struct kvm_vcpu *vcpu, | |||
566 | switch (s->read_state) { | 542 | switch (s->read_state) { |
567 | default: | 543 | default: |
568 | case RW_STATE_LSB: | 544 | case RW_STATE_LSB: |
569 | count = pit_get_count(kvm, addr); | 545 | count = pit_get_count(pit, addr); |
570 | ret = count & 0xff; | 546 | ret = count & 0xff; |
571 | break; | 547 | break; |
572 | case RW_STATE_MSB: | 548 | case RW_STATE_MSB: |
573 | count = pit_get_count(kvm, addr); | 549 | count = pit_get_count(pit, addr); |
574 | ret = (count >> 8) & 0xff; | 550 | ret = (count >> 8) & 0xff; |
575 | break; | 551 | break; |
576 | case RW_STATE_WORD0: | 552 | case RW_STATE_WORD0: |
577 | count = pit_get_count(kvm, addr); | 553 | count = pit_get_count(pit, addr); |
578 | ret = count & 0xff; | 554 | ret = count & 0xff; |
579 | s->read_state = RW_STATE_WORD1; | 555 | s->read_state = RW_STATE_WORD1; |
580 | break; | 556 | break; |
581 | case RW_STATE_WORD1: | 557 | case RW_STATE_WORD1: |
582 | count = pit_get_count(kvm, addr); | 558 | count = pit_get_count(pit, addr); |
583 | ret = (count >> 8) & 0xff; | 559 | ret = (count >> 8) & 0xff; |
584 | s->read_state = RW_STATE_WORD0; | 560 | s->read_state = RW_STATE_WORD0; |
585 | break; | 561 | break; |
@@ -600,14 +576,13 @@ static int speaker_ioport_write(struct kvm_vcpu *vcpu, | |||
600 | { | 576 | { |
601 | struct kvm_pit *pit = speaker_to_pit(this); | 577 | struct kvm_pit *pit = speaker_to_pit(this); |
602 | struct kvm_kpit_state *pit_state = &pit->pit_state; | 578 | struct kvm_kpit_state *pit_state = &pit->pit_state; |
603 | struct kvm *kvm = pit->kvm; | ||
604 | u32 val = *(u32 *) data; | 579 | u32 val = *(u32 *) data; |
605 | if (addr != KVM_SPEAKER_BASE_ADDRESS) | 580 | if (addr != KVM_SPEAKER_BASE_ADDRESS) |
606 | return -EOPNOTSUPP; | 581 | return -EOPNOTSUPP; |
607 | 582 | ||
608 | mutex_lock(&pit_state->lock); | 583 | mutex_lock(&pit_state->lock); |
609 | pit_state->speaker_data_on = (val >> 1) & 1; | 584 | pit_state->speaker_data_on = (val >> 1) & 1; |
610 | pit_set_gate(kvm, 2, val & 1); | 585 | pit_set_gate(pit, 2, val & 1); |
611 | mutex_unlock(&pit_state->lock); | 586 | mutex_unlock(&pit_state->lock); |
612 | return 0; | 587 | return 0; |
613 | } | 588 | } |
@@ -618,7 +593,6 @@ static int speaker_ioport_read(struct kvm_vcpu *vcpu, | |||
618 | { | 593 | { |
619 | struct kvm_pit *pit = speaker_to_pit(this); | 594 | struct kvm_pit *pit = speaker_to_pit(this); |
620 | struct kvm_kpit_state *pit_state = &pit->pit_state; | 595 | struct kvm_kpit_state *pit_state = &pit->pit_state; |
621 | struct kvm *kvm = pit->kvm; | ||
622 | unsigned int refresh_clock; | 596 | unsigned int refresh_clock; |
623 | int ret; | 597 | int ret; |
624 | if (addr != KVM_SPEAKER_BASE_ADDRESS) | 598 | if (addr != KVM_SPEAKER_BASE_ADDRESS) |
@@ -628,8 +602,8 @@ static int speaker_ioport_read(struct kvm_vcpu *vcpu, | |||
628 | refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1; | 602 | refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1; |
629 | 603 | ||
630 | mutex_lock(&pit_state->lock); | 604 | mutex_lock(&pit_state->lock); |
631 | ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) | | 605 | ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(pit, 2) | |
632 | (pit_get_out(kvm, 2) << 5) | (refresh_clock << 4)); | 606 | (pit_get_out(pit, 2) << 5) | (refresh_clock << 4)); |
633 | if (len > sizeof(ret)) | 607 | if (len > sizeof(ret)) |
634 | len = sizeof(ret); | 608 | len = sizeof(ret); |
635 | memcpy(data, (char *)&ret, len); | 609 | memcpy(data, (char *)&ret, len); |
@@ -637,33 +611,28 @@ static int speaker_ioport_read(struct kvm_vcpu *vcpu, | |||
637 | return 0; | 611 | return 0; |
638 | } | 612 | } |
639 | 613 | ||
640 | void kvm_pit_reset(struct kvm_pit *pit) | 614 | static void kvm_pit_reset(struct kvm_pit *pit) |
641 | { | 615 | { |
642 | int i; | 616 | int i; |
643 | struct kvm_kpit_channel_state *c; | 617 | struct kvm_kpit_channel_state *c; |
644 | 618 | ||
645 | mutex_lock(&pit->pit_state.lock); | ||
646 | pit->pit_state.flags = 0; | 619 | pit->pit_state.flags = 0; |
647 | for (i = 0; i < 3; i++) { | 620 | for (i = 0; i < 3; i++) { |
648 | c = &pit->pit_state.channels[i]; | 621 | c = &pit->pit_state.channels[i]; |
649 | c->mode = 0xff; | 622 | c->mode = 0xff; |
650 | c->gate = (i != 2); | 623 | c->gate = (i != 2); |
651 | pit_load_count(pit->kvm, i, 0); | 624 | pit_load_count(pit, i, 0); |
652 | } | 625 | } |
653 | mutex_unlock(&pit->pit_state.lock); | ||
654 | 626 | ||
655 | atomic_set(&pit->pit_state.pending, 0); | 627 | kvm_pit_reset_reinject(pit); |
656 | pit->pit_state.irq_ack = 1; | ||
657 | } | 628 | } |
658 | 629 | ||
659 | static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask) | 630 | static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask) |
660 | { | 631 | { |
661 | struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier); | 632 | struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier); |
662 | 633 | ||
663 | if (!mask) { | 634 | if (!mask) |
664 | atomic_set(&pit->pit_state.pending, 0); | 635 | kvm_pit_reset_reinject(pit); |
665 | pit->pit_state.irq_ack = 1; | ||
666 | } | ||
667 | } | 636 | } |
668 | 637 | ||
669 | static const struct kvm_io_device_ops pit_dev_ops = { | 638 | static const struct kvm_io_device_ops pit_dev_ops = { |
@@ -690,14 +659,10 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
690 | return NULL; | 659 | return NULL; |
691 | 660 | ||
692 | pit->irq_source_id = kvm_request_irq_source_id(kvm); | 661 | pit->irq_source_id = kvm_request_irq_source_id(kvm); |
693 | if (pit->irq_source_id < 0) { | 662 | if (pit->irq_source_id < 0) |
694 | kfree(pit); | 663 | goto fail_request; |
695 | return NULL; | ||
696 | } | ||
697 | 664 | ||
698 | mutex_init(&pit->pit_state.lock); | 665 | mutex_init(&pit->pit_state.lock); |
699 | mutex_lock(&pit->pit_state.lock); | ||
700 | spin_lock_init(&pit->pit_state.inject_lock); | ||
701 | 666 | ||
702 | pid = get_pid(task_tgid(current)); | 667 | pid = get_pid(task_tgid(current)); |
703 | pid_nr = pid_vnr(pid); | 668 | pid_nr = pid_vnr(pid); |
@@ -706,36 +671,30 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
706 | init_kthread_worker(&pit->worker); | 671 | init_kthread_worker(&pit->worker); |
707 | pit->worker_task = kthread_run(kthread_worker_fn, &pit->worker, | 672 | pit->worker_task = kthread_run(kthread_worker_fn, &pit->worker, |
708 | "kvm-pit/%d", pid_nr); | 673 | "kvm-pit/%d", pid_nr); |
709 | if (IS_ERR(pit->worker_task)) { | 674 | if (IS_ERR(pit->worker_task)) |
710 | mutex_unlock(&pit->pit_state.lock); | 675 | goto fail_kthread; |
711 | kvm_free_irq_source_id(kvm, pit->irq_source_id); | 676 | |
712 | kfree(pit); | ||
713 | return NULL; | ||
714 | } | ||
715 | init_kthread_work(&pit->expired, pit_do_work); | 677 | init_kthread_work(&pit->expired, pit_do_work); |
716 | 678 | ||
717 | kvm->arch.vpit = pit; | ||
718 | pit->kvm = kvm; | 679 | pit->kvm = kvm; |
719 | 680 | ||
720 | pit_state = &pit->pit_state; | 681 | pit_state = &pit->pit_state; |
721 | pit_state->pit = pit; | ||
722 | hrtimer_init(&pit_state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 682 | hrtimer_init(&pit_state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
683 | pit_state->timer.function = pit_timer_fn; | ||
684 | |||
723 | pit_state->irq_ack_notifier.gsi = 0; | 685 | pit_state->irq_ack_notifier.gsi = 0; |
724 | pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq; | 686 | pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq; |
725 | kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); | 687 | pit->mask_notifier.func = pit_mask_notifer; |
726 | pit_state->reinject = true; | ||
727 | mutex_unlock(&pit->pit_state.lock); | ||
728 | 688 | ||
729 | kvm_pit_reset(pit); | 689 | kvm_pit_reset(pit); |
730 | 690 | ||
731 | pit->mask_notifier.func = pit_mask_notifer; | 691 | kvm_pit_set_reinject(pit, true); |
732 | kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); | ||
733 | 692 | ||
734 | kvm_iodevice_init(&pit->dev, &pit_dev_ops); | 693 | kvm_iodevice_init(&pit->dev, &pit_dev_ops); |
735 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS, | 694 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS, |
736 | KVM_PIT_MEM_LENGTH, &pit->dev); | 695 | KVM_PIT_MEM_LENGTH, &pit->dev); |
737 | if (ret < 0) | 696 | if (ret < 0) |
738 | goto fail; | 697 | goto fail_register_pit; |
739 | 698 | ||
740 | if (flags & KVM_PIT_SPEAKER_DUMMY) { | 699 | if (flags & KVM_PIT_SPEAKER_DUMMY) { |
741 | kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops); | 700 | kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops); |
@@ -743,42 +702,35 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
743 | KVM_SPEAKER_BASE_ADDRESS, 4, | 702 | KVM_SPEAKER_BASE_ADDRESS, 4, |
744 | &pit->speaker_dev); | 703 | &pit->speaker_dev); |
745 | if (ret < 0) | 704 | if (ret < 0) |
746 | goto fail_unregister; | 705 | goto fail_register_speaker; |
747 | } | 706 | } |
748 | 707 | ||
749 | return pit; | 708 | return pit; |
750 | 709 | ||
751 | fail_unregister: | 710 | fail_register_speaker: |
752 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev); | 711 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev); |
753 | 712 | fail_register_pit: | |
754 | fail: | 713 | kvm_pit_set_reinject(pit, false); |
755 | kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier); | ||
756 | kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); | ||
757 | kvm_free_irq_source_id(kvm, pit->irq_source_id); | ||
758 | kthread_stop(pit->worker_task); | 714 | kthread_stop(pit->worker_task); |
715 | fail_kthread: | ||
716 | kvm_free_irq_source_id(kvm, pit->irq_source_id); | ||
717 | fail_request: | ||
759 | kfree(pit); | 718 | kfree(pit); |
760 | return NULL; | 719 | return NULL; |
761 | } | 720 | } |
762 | 721 | ||
763 | void kvm_free_pit(struct kvm *kvm) | 722 | void kvm_free_pit(struct kvm *kvm) |
764 | { | 723 | { |
765 | struct hrtimer *timer; | 724 | struct kvm_pit *pit = kvm->arch.vpit; |
766 | 725 | ||
767 | if (kvm->arch.vpit) { | 726 | if (pit) { |
768 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &kvm->arch.vpit->dev); | 727 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev); |
769 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, | 728 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->speaker_dev); |
770 | &kvm->arch.vpit->speaker_dev); | 729 | kvm_pit_set_reinject(pit, false); |
771 | kvm_unregister_irq_mask_notifier(kvm, 0, | 730 | hrtimer_cancel(&pit->pit_state.timer); |
772 | &kvm->arch.vpit->mask_notifier); | 731 | flush_kthread_work(&pit->expired); |
773 | kvm_unregister_irq_ack_notifier(kvm, | 732 | kthread_stop(pit->worker_task); |
774 | &kvm->arch.vpit->pit_state.irq_ack_notifier); | 733 | kvm_free_irq_source_id(kvm, pit->irq_source_id); |
775 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | 734 | kfree(pit); |
776 | timer = &kvm->arch.vpit->pit_state.timer; | ||
777 | hrtimer_cancel(timer); | ||
778 | flush_kthread_work(&kvm->arch.vpit->expired); | ||
779 | kthread_stop(kvm->arch.vpit->worker_task); | ||
780 | kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id); | ||
781 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | ||
782 | kfree(kvm->arch.vpit); | ||
783 | } | 735 | } |
784 | } | 736 | } |
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index c84990b42b5b..2f5af0798326 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h | |||
@@ -22,19 +22,18 @@ struct kvm_kpit_channel_state { | |||
22 | }; | 22 | }; |
23 | 23 | ||
24 | struct kvm_kpit_state { | 24 | struct kvm_kpit_state { |
25 | /* All members before "struct mutex lock" are protected by the lock. */ | ||
25 | struct kvm_kpit_channel_state channels[3]; | 26 | struct kvm_kpit_channel_state channels[3]; |
26 | u32 flags; | 27 | u32 flags; |
27 | bool is_periodic; | 28 | bool is_periodic; |
28 | s64 period; /* unit: ns */ | 29 | s64 period; /* unit: ns */ |
29 | struct hrtimer timer; | 30 | struct hrtimer timer; |
30 | atomic_t pending; /* accumulated triggered timers */ | ||
31 | bool reinject; | ||
32 | struct kvm *kvm; | ||
33 | u32 speaker_data_on; | 31 | u32 speaker_data_on; |
32 | |||
34 | struct mutex lock; | 33 | struct mutex lock; |
35 | struct kvm_pit *pit; | 34 | atomic_t reinject; |
36 | spinlock_t inject_lock; | 35 | atomic_t pending; /* accumulated triggered timers */ |
37 | unsigned long irq_ack; | 36 | atomic_t irq_ack; |
38 | struct kvm_irq_ack_notifier irq_ack_notifier; | 37 | struct kvm_irq_ack_notifier irq_ack_notifier; |
39 | }; | 38 | }; |
40 | 39 | ||
@@ -57,9 +56,11 @@ struct kvm_pit { | |||
57 | #define KVM_MAX_PIT_INTR_INTERVAL HZ / 100 | 56 | #define KVM_MAX_PIT_INTR_INTERVAL HZ / 100 |
58 | #define KVM_PIT_CHANNEL_MASK 0x3 | 57 | #define KVM_PIT_CHANNEL_MASK 0x3 |
59 | 58 | ||
60 | void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start); | ||
61 | struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags); | 59 | struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags); |
62 | void kvm_free_pit(struct kvm *kvm); | 60 | void kvm_free_pit(struct kvm *kvm); |
63 | void kvm_pit_reset(struct kvm_pit *pit); | 61 | |
62 | void kvm_pit_load_count(struct kvm_pit *pit, int channel, u32 val, | ||
63 | int hpet_legacy_start); | ||
64 | void kvm_pit_set_reinject(struct kvm_pit *pit, bool reinject); | ||
64 | 65 | ||
65 | #endif | 66 | #endif |
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 1facfd60b04a..9db47090ead0 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c | |||
@@ -94,7 +94,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, | |||
94 | static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) | 94 | static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) |
95 | { | 95 | { |
96 | ioapic->rtc_status.pending_eoi = 0; | 96 | ioapic->rtc_status.pending_eoi = 0; |
97 | bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS); | 97 | bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPUS); |
98 | } | 98 | } |
99 | 99 | ||
100 | static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); | 100 | static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); |
@@ -117,16 +117,16 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) | |||
117 | return; | 117 | return; |
118 | 118 | ||
119 | new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); | 119 | new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); |
120 | old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); | 120 | old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map); |
121 | 121 | ||
122 | if (new_val == old_val) | 122 | if (new_val == old_val) |
123 | return; | 123 | return; |
124 | 124 | ||
125 | if (new_val) { | 125 | if (new_val) { |
126 | __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); | 126 | __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map); |
127 | ioapic->rtc_status.pending_eoi++; | 127 | ioapic->rtc_status.pending_eoi++; |
128 | } else { | 128 | } else { |
129 | __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); | 129 | __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map); |
130 | ioapic->rtc_status.pending_eoi--; | 130 | ioapic->rtc_status.pending_eoi--; |
131 | rtc_status_pending_eoi_check_valid(ioapic); | 131 | rtc_status_pending_eoi_check_valid(ioapic); |
132 | } | 132 | } |
@@ -156,7 +156,8 @@ static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic) | |||
156 | 156 | ||
157 | static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu) | 157 | static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu) |
158 | { | 158 | { |
159 | if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) { | 159 | if (test_and_clear_bit(vcpu->vcpu_id, |
160 | ioapic->rtc_status.dest_map.map)) { | ||
160 | --ioapic->rtc_status.pending_eoi; | 161 | --ioapic->rtc_status.pending_eoi; |
161 | rtc_status_pending_eoi_check_valid(ioapic); | 162 | rtc_status_pending_eoi_check_valid(ioapic); |
162 | } | 163 | } |
@@ -236,10 +237,17 @@ static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr) | |||
236 | void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) | 237 | void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) |
237 | { | 238 | { |
238 | struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; | 239 | struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; |
240 | struct dest_map *dest_map = &ioapic->rtc_status.dest_map; | ||
239 | union kvm_ioapic_redirect_entry *e; | 241 | union kvm_ioapic_redirect_entry *e; |
240 | int index; | 242 | int index; |
241 | 243 | ||
242 | spin_lock(&ioapic->lock); | 244 | spin_lock(&ioapic->lock); |
245 | |||
246 | /* Make sure we see any missing RTC EOI */ | ||
247 | if (test_bit(vcpu->vcpu_id, dest_map->map)) | ||
248 | __set_bit(dest_map->vectors[vcpu->vcpu_id], | ||
249 | ioapic_handled_vectors); | ||
250 | |||
243 | for (index = 0; index < IOAPIC_NUM_PINS; index++) { | 251 | for (index = 0; index < IOAPIC_NUM_PINS; index++) { |
244 | e = &ioapic->redirtbl[index]; | 252 | e = &ioapic->redirtbl[index]; |
245 | if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || | 253 | if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || |
@@ -346,7 +354,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) | |||
346 | */ | 354 | */ |
347 | BUG_ON(ioapic->rtc_status.pending_eoi != 0); | 355 | BUG_ON(ioapic->rtc_status.pending_eoi != 0); |
348 | ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, | 356 | ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, |
349 | ioapic->rtc_status.dest_map); | 357 | &ioapic->rtc_status.dest_map); |
350 | ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret); | 358 | ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret); |
351 | } else | 359 | } else |
352 | ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); | 360 | ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); |
@@ -407,8 +415,14 @@ static void kvm_ioapic_eoi_inject_work(struct work_struct *work) | |||
407 | static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, | 415 | static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, |
408 | struct kvm_ioapic *ioapic, int vector, int trigger_mode) | 416 | struct kvm_ioapic *ioapic, int vector, int trigger_mode) |
409 | { | 417 | { |
410 | int i; | 418 | struct dest_map *dest_map = &ioapic->rtc_status.dest_map; |
411 | struct kvm_lapic *apic = vcpu->arch.apic; | 419 | struct kvm_lapic *apic = vcpu->arch.apic; |
420 | int i; | ||
421 | |||
422 | /* RTC special handling */ | ||
423 | if (test_bit(vcpu->vcpu_id, dest_map->map) && | ||
424 | vector == dest_map->vectors[vcpu->vcpu_id]) | ||
425 | rtc_irq_eoi(ioapic, vcpu); | ||
412 | 426 | ||
413 | for (i = 0; i < IOAPIC_NUM_PINS; i++) { | 427 | for (i = 0; i < IOAPIC_NUM_PINS; i++) { |
414 | union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; | 428 | union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; |
@@ -416,8 +430,6 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, | |||
416 | if (ent->fields.vector != vector) | 430 | if (ent->fields.vector != vector) |
417 | continue; | 431 | continue; |
418 | 432 | ||
419 | if (i == RTC_GSI) | ||
420 | rtc_irq_eoi(ioapic, vcpu); | ||
421 | /* | 433 | /* |
422 | * We are dropping lock while calling ack notifiers because ack | 434 | * We are dropping lock while calling ack notifiers because ack |
423 | * notifier callbacks for assigned devices call into IOAPIC | 435 | * notifier callbacks for assigned devices call into IOAPIC |
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 2d16dc251d81..7d2692a49657 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h | |||
@@ -40,9 +40,21 @@ struct kvm_vcpu; | |||
40 | #define RTC_GSI -1U | 40 | #define RTC_GSI -1U |
41 | #endif | 41 | #endif |
42 | 42 | ||
43 | struct dest_map { | ||
44 | /* vcpu bitmap where IRQ has been sent */ | ||
45 | DECLARE_BITMAP(map, KVM_MAX_VCPUS); | ||
46 | |||
47 | /* | ||
48 | * Vector sent to a given vcpu, only valid when | ||
49 | * the vcpu's bit in map is set | ||
50 | */ | ||
51 | u8 vectors[KVM_MAX_VCPUS]; | ||
52 | }; | ||
53 | |||
54 | |||
43 | struct rtc_status { | 55 | struct rtc_status { |
44 | int pending_eoi; | 56 | int pending_eoi; |
45 | DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS); | 57 | struct dest_map dest_map; |
46 | }; | 58 | }; |
47 | 59 | ||
48 | union kvm_ioapic_redirect_entry { | 60 | union kvm_ioapic_redirect_entry { |
@@ -118,7 +130,8 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, | |||
118 | int level, bool line_status); | 130 | int level, bool line_status); |
119 | void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); | 131 | void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); |
120 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | 132 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, |
121 | struct kvm_lapic_irq *irq, unsigned long *dest_map); | 133 | struct kvm_lapic_irq *irq, |
134 | struct dest_map *dest_map); | ||
122 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); | 135 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); |
123 | int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); | 136 | int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); |
124 | void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, | 137 | void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, |
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 3982b479bb5f..95fcc7b13866 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c | |||
@@ -33,7 +33,10 @@ | |||
33 | */ | 33 | */ |
34 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | 34 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) |
35 | { | 35 | { |
36 | return apic_has_pending_timer(vcpu); | 36 | if (lapic_in_kernel(vcpu)) |
37 | return apic_has_pending_timer(vcpu); | ||
38 | |||
39 | return 0; | ||
37 | } | 40 | } |
38 | EXPORT_SYMBOL(kvm_cpu_has_pending_timer); | 41 | EXPORT_SYMBOL(kvm_cpu_has_pending_timer); |
39 | 42 | ||
@@ -137,8 +140,8 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); | |||
137 | 140 | ||
138 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) | 141 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) |
139 | { | 142 | { |
140 | kvm_inject_apic_timer_irqs(vcpu); | 143 | if (lapic_in_kernel(vcpu)) |
141 | /* TODO: PIT, RTC etc. */ | 144 | kvm_inject_apic_timer_irqs(vcpu); |
142 | } | 145 | } |
143 | EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); | 146 | EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); |
144 | 147 | ||
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index ae5c78f2337d..61ebdc13a29a 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -109,14 +109,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm) | |||
109 | return ret; | 109 | return ret; |
110 | } | 110 | } |
111 | 111 | ||
112 | static inline int lapic_in_kernel(struct kvm_vcpu *vcpu) | ||
113 | { | ||
114 | /* Same as irqchip_in_kernel(vcpu->kvm), but with less | ||
115 | * pointer chasing and no unnecessary memory barriers. | ||
116 | */ | ||
117 | return vcpu->arch.apic != NULL; | ||
118 | } | ||
119 | |||
120 | void kvm_pic_reset(struct kvm_kpic_state *s); | 112 | void kvm_pic_reset(struct kvm_kpic_state *s); |
121 | 113 | ||
122 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); | 114 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 8fc89efb5250..54ead79e444b 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include "lapic.h" | 34 | #include "lapic.h" |
35 | 35 | ||
36 | #include "hyperv.h" | 36 | #include "hyperv.h" |
37 | #include "x86.h" | ||
37 | 38 | ||
38 | static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, | 39 | static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, |
39 | struct kvm *kvm, int irq_source_id, int level, | 40 | struct kvm *kvm, int irq_source_id, int level, |
@@ -53,10 +54,12 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, | |||
53 | } | 54 | } |
54 | 55 | ||
55 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | 56 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, |
56 | struct kvm_lapic_irq *irq, unsigned long *dest_map) | 57 | struct kvm_lapic_irq *irq, struct dest_map *dest_map) |
57 | { | 58 | { |
58 | int i, r = -1; | 59 | int i, r = -1; |
59 | struct kvm_vcpu *vcpu, *lowest = NULL; | 60 | struct kvm_vcpu *vcpu, *lowest = NULL; |
61 | unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; | ||
62 | unsigned int dest_vcpus = 0; | ||
60 | 63 | ||
61 | if (irq->dest_mode == 0 && irq->dest_id == 0xff && | 64 | if (irq->dest_mode == 0 && irq->dest_id == 0xff && |
62 | kvm_lowest_prio_delivery(irq)) { | 65 | kvm_lowest_prio_delivery(irq)) { |
@@ -67,6 +70,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
67 | if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) | 70 | if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) |
68 | return r; | 71 | return r; |
69 | 72 | ||
73 | memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap)); | ||
74 | |||
70 | kvm_for_each_vcpu(i, vcpu, kvm) { | 75 | kvm_for_each_vcpu(i, vcpu, kvm) { |
71 | if (!kvm_apic_present(vcpu)) | 76 | if (!kvm_apic_present(vcpu)) |
72 | continue; | 77 | continue; |
@@ -80,13 +85,25 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
80 | r = 0; | 85 | r = 0; |
81 | r += kvm_apic_set_irq(vcpu, irq, dest_map); | 86 | r += kvm_apic_set_irq(vcpu, irq, dest_map); |
82 | } else if (kvm_lapic_enabled(vcpu)) { | 87 | } else if (kvm_lapic_enabled(vcpu)) { |
83 | if (!lowest) | 88 | if (!kvm_vector_hashing_enabled()) { |
84 | lowest = vcpu; | 89 | if (!lowest) |
85 | else if (kvm_apic_compare_prio(vcpu, lowest) < 0) | 90 | lowest = vcpu; |
86 | lowest = vcpu; | 91 | else if (kvm_apic_compare_prio(vcpu, lowest) < 0) |
92 | lowest = vcpu; | ||
93 | } else { | ||
94 | __set_bit(i, dest_vcpu_bitmap); | ||
95 | dest_vcpus++; | ||
96 | } | ||
87 | } | 97 | } |
88 | } | 98 | } |
89 | 99 | ||
100 | if (dest_vcpus != 0) { | ||
101 | int idx = kvm_vector_to_index(irq->vector, dest_vcpus, | ||
102 | dest_vcpu_bitmap, KVM_MAX_VCPUS); | ||
103 | |||
104 | lowest = kvm_get_vcpu(kvm, idx); | ||
105 | } | ||
106 | |||
90 | if (lowest) | 107 | if (lowest) |
91 | r = kvm_apic_set_irq(lowest, irq, dest_map); | 108 | r = kvm_apic_set_irq(lowest, irq, dest_map); |
92 | 109 | ||
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3a045f39ed81..443d2a57ad3d 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -281,7 +281,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) | |||
281 | struct kvm_cpuid_entry2 *feat; | 281 | struct kvm_cpuid_entry2 *feat; |
282 | u32 v = APIC_VERSION; | 282 | u32 v = APIC_VERSION; |
283 | 283 | ||
284 | if (!kvm_vcpu_has_lapic(vcpu)) | 284 | if (!lapic_in_kernel(vcpu)) |
285 | return; | 285 | return; |
286 | 286 | ||
287 | feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); | 287 | feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); |
@@ -475,26 +475,20 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) | |||
475 | 475 | ||
476 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) | 476 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) |
477 | { | 477 | { |
478 | int highest_irr; | ||
479 | |||
480 | /* This may race with setting of irr in __apic_accept_irq() and | 478 | /* This may race with setting of irr in __apic_accept_irq() and |
481 | * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq | 479 | * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq |
482 | * will cause vmexit immediately and the value will be recalculated | 480 | * will cause vmexit immediately and the value will be recalculated |
483 | * on the next vmentry. | 481 | * on the next vmentry. |
484 | */ | 482 | */ |
485 | if (!kvm_vcpu_has_lapic(vcpu)) | 483 | return apic_find_highest_irr(vcpu->arch.apic); |
486 | return 0; | ||
487 | highest_irr = apic_find_highest_irr(vcpu->arch.apic); | ||
488 | |||
489 | return highest_irr; | ||
490 | } | 484 | } |
491 | 485 | ||
492 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | 486 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, |
493 | int vector, int level, int trig_mode, | 487 | int vector, int level, int trig_mode, |
494 | unsigned long *dest_map); | 488 | struct dest_map *dest_map); |
495 | 489 | ||
496 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, | 490 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, |
497 | unsigned long *dest_map) | 491 | struct dest_map *dest_map) |
498 | { | 492 | { |
499 | struct kvm_lapic *apic = vcpu->arch.apic; | 493 | struct kvm_lapic *apic = vcpu->arch.apic; |
500 | 494 | ||
@@ -675,8 +669,33 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | |||
675 | } | 669 | } |
676 | } | 670 | } |
677 | 671 | ||
672 | int kvm_vector_to_index(u32 vector, u32 dest_vcpus, | ||
673 | const unsigned long *bitmap, u32 bitmap_size) | ||
674 | { | ||
675 | u32 mod; | ||
676 | int i, idx = -1; | ||
677 | |||
678 | mod = vector % dest_vcpus; | ||
679 | |||
680 | for (i = 0; i <= mod; i++) { | ||
681 | idx = find_next_bit(bitmap, bitmap_size, idx + 1); | ||
682 | BUG_ON(idx == bitmap_size); | ||
683 | } | ||
684 | |||
685 | return idx; | ||
686 | } | ||
687 | |||
688 | static void kvm_apic_disabled_lapic_found(struct kvm *kvm) | ||
689 | { | ||
690 | if (!kvm->arch.disabled_lapic_found) { | ||
691 | kvm->arch.disabled_lapic_found = true; | ||
692 | printk(KERN_INFO | ||
693 | "Disabled LAPIC found during irq injection\n"); | ||
694 | } | ||
695 | } | ||
696 | |||
678 | bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | 697 | bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, |
679 | struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map) | 698 | struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map) |
680 | { | 699 | { |
681 | struct kvm_apic_map *map; | 700 | struct kvm_apic_map *map; |
682 | unsigned long bitmap = 1; | 701 | unsigned long bitmap = 1; |
@@ -727,21 +746,42 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | |||
727 | 746 | ||
728 | dst = map->logical_map[cid]; | 747 | dst = map->logical_map[cid]; |
729 | 748 | ||
730 | if (kvm_lowest_prio_delivery(irq)) { | 749 | if (!kvm_lowest_prio_delivery(irq)) |
750 | goto set_irq; | ||
751 | |||
752 | if (!kvm_vector_hashing_enabled()) { | ||
731 | int l = -1; | 753 | int l = -1; |
732 | for_each_set_bit(i, &bitmap, 16) { | 754 | for_each_set_bit(i, &bitmap, 16) { |
733 | if (!dst[i]) | 755 | if (!dst[i]) |
734 | continue; | 756 | continue; |
735 | if (l < 0) | 757 | if (l < 0) |
736 | l = i; | 758 | l = i; |
737 | else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0) | 759 | else if (kvm_apic_compare_prio(dst[i]->vcpu, |
760 | dst[l]->vcpu) < 0) | ||
738 | l = i; | 761 | l = i; |
739 | } | 762 | } |
740 | |||
741 | bitmap = (l >= 0) ? 1 << l : 0; | 763 | bitmap = (l >= 0) ? 1 << l : 0; |
764 | } else { | ||
765 | int idx; | ||
766 | unsigned int dest_vcpus; | ||
767 | |||
768 | dest_vcpus = hweight16(bitmap); | ||
769 | if (dest_vcpus == 0) | ||
770 | goto out; | ||
771 | |||
772 | idx = kvm_vector_to_index(irq->vector, | ||
773 | dest_vcpus, &bitmap, 16); | ||
774 | |||
775 | if (!dst[idx]) { | ||
776 | kvm_apic_disabled_lapic_found(kvm); | ||
777 | goto out; | ||
778 | } | ||
779 | |||
780 | bitmap = (idx >= 0) ? 1 << idx : 0; | ||
742 | } | 781 | } |
743 | } | 782 | } |
744 | 783 | ||
784 | set_irq: | ||
745 | for_each_set_bit(i, &bitmap, 16) { | 785 | for_each_set_bit(i, &bitmap, 16) { |
746 | if (!dst[i]) | 786 | if (!dst[i]) |
747 | continue; | 787 | continue; |
@@ -754,6 +794,20 @@ out: | |||
754 | return ret; | 794 | return ret; |
755 | } | 795 | } |
756 | 796 | ||
797 | /* | ||
798 | * This routine tries to handler interrupts in posted mode, here is how | ||
799 | * it deals with different cases: | ||
800 | * - For single-destination interrupts, handle it in posted mode | ||
801 | * - Else if vector hashing is enabled and it is a lowest-priority | ||
802 | * interrupt, handle it in posted mode and use the following mechanism | ||
803 | * to find the destinaiton vCPU. | ||
804 | * 1. For lowest-priority interrupts, store all the possible | ||
805 | * destination vCPUs in an array. | ||
806 | * 2. Use "guest vector % max number of destination vCPUs" to find | ||
807 | * the right destination vCPU in the array for the lowest-priority | ||
808 | * interrupt. | ||
809 | * - Otherwise, use remapped mode to inject the interrupt. | ||
810 | */ | ||
757 | bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, | 811 | bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, |
758 | struct kvm_vcpu **dest_vcpu) | 812 | struct kvm_vcpu **dest_vcpu) |
759 | { | 813 | { |
@@ -795,16 +849,37 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, | |||
795 | if (cid >= ARRAY_SIZE(map->logical_map)) | 849 | if (cid >= ARRAY_SIZE(map->logical_map)) |
796 | goto out; | 850 | goto out; |
797 | 851 | ||
798 | for_each_set_bit(i, &bitmap, 16) { | 852 | if (kvm_vector_hashing_enabled() && |
799 | dst = map->logical_map[cid][i]; | 853 | kvm_lowest_prio_delivery(irq)) { |
800 | if (++r == 2) | 854 | int idx; |
855 | unsigned int dest_vcpus; | ||
856 | |||
857 | dest_vcpus = hweight16(bitmap); | ||
858 | if (dest_vcpus == 0) | ||
801 | goto out; | 859 | goto out; |
802 | } | ||
803 | 860 | ||
804 | if (dst && kvm_apic_present(dst->vcpu)) | 861 | idx = kvm_vector_to_index(irq->vector, dest_vcpus, |
862 | &bitmap, 16); | ||
863 | |||
864 | dst = map->logical_map[cid][idx]; | ||
865 | if (!dst) { | ||
866 | kvm_apic_disabled_lapic_found(kvm); | ||
867 | goto out; | ||
868 | } | ||
869 | |||
805 | *dest_vcpu = dst->vcpu; | 870 | *dest_vcpu = dst->vcpu; |
806 | else | 871 | } else { |
807 | goto out; | 872 | for_each_set_bit(i, &bitmap, 16) { |
873 | dst = map->logical_map[cid][i]; | ||
874 | if (++r == 2) | ||
875 | goto out; | ||
876 | } | ||
877 | |||
878 | if (dst && kvm_apic_present(dst->vcpu)) | ||
879 | *dest_vcpu = dst->vcpu; | ||
880 | else | ||
881 | goto out; | ||
882 | } | ||
808 | } | 883 | } |
809 | 884 | ||
810 | ret = true; | 885 | ret = true; |
@@ -819,7 +894,7 @@ out: | |||
819 | */ | 894 | */ |
820 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | 895 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, |
821 | int vector, int level, int trig_mode, | 896 | int vector, int level, int trig_mode, |
822 | unsigned long *dest_map) | 897 | struct dest_map *dest_map) |
823 | { | 898 | { |
824 | int result = 0; | 899 | int result = 0; |
825 | struct kvm_vcpu *vcpu = apic->vcpu; | 900 | struct kvm_vcpu *vcpu = apic->vcpu; |
@@ -839,8 +914,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
839 | 914 | ||
840 | result = 1; | 915 | result = 1; |
841 | 916 | ||
842 | if (dest_map) | 917 | if (dest_map) { |
843 | __set_bit(vcpu->vcpu_id, dest_map); | 918 | __set_bit(vcpu->vcpu_id, dest_map->map); |
919 | dest_map->vectors[vcpu->vcpu_id] = vector; | ||
920 | } | ||
844 | 921 | ||
845 | if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) { | 922 | if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) { |
846 | if (trig_mode) | 923 | if (trig_mode) |
@@ -1239,7 +1316,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) | |||
1239 | struct kvm_lapic *apic = vcpu->arch.apic; | 1316 | struct kvm_lapic *apic = vcpu->arch.apic; |
1240 | u64 guest_tsc, tsc_deadline; | 1317 | u64 guest_tsc, tsc_deadline; |
1241 | 1318 | ||
1242 | if (!kvm_vcpu_has_lapic(vcpu)) | 1319 | if (!lapic_in_kernel(vcpu)) |
1243 | return; | 1320 | return; |
1244 | 1321 | ||
1245 | if (apic->lapic_timer.expired_tscdeadline == 0) | 1322 | if (apic->lapic_timer.expired_tscdeadline == 0) |
@@ -1515,8 +1592,7 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, | |||
1515 | 1592 | ||
1516 | void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) | 1593 | void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) |
1517 | { | 1594 | { |
1518 | if (kvm_vcpu_has_lapic(vcpu)) | 1595 | apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); |
1519 | apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); | ||
1520 | } | 1596 | } |
1521 | EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); | 1597 | EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); |
1522 | 1598 | ||
@@ -1566,7 +1642,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) | |||
1566 | { | 1642 | { |
1567 | struct kvm_lapic *apic = vcpu->arch.apic; | 1643 | struct kvm_lapic *apic = vcpu->arch.apic; |
1568 | 1644 | ||
1569 | if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || | 1645 | if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) || |
1570 | apic_lvtt_period(apic)) | 1646 | apic_lvtt_period(apic)) |
1571 | return 0; | 1647 | return 0; |
1572 | 1648 | ||
@@ -1577,7 +1653,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) | |||
1577 | { | 1653 | { |
1578 | struct kvm_lapic *apic = vcpu->arch.apic; | 1654 | struct kvm_lapic *apic = vcpu->arch.apic; |
1579 | 1655 | ||
1580 | if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || | 1656 | if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) || |
1581 | apic_lvtt_period(apic)) | 1657 | apic_lvtt_period(apic)) |
1582 | return; | 1658 | return; |
1583 | 1659 | ||
@@ -1590,9 +1666,6 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) | |||
1590 | { | 1666 | { |
1591 | struct kvm_lapic *apic = vcpu->arch.apic; | 1667 | struct kvm_lapic *apic = vcpu->arch.apic; |
1592 | 1668 | ||
1593 | if (!kvm_vcpu_has_lapic(vcpu)) | ||
1594 | return; | ||
1595 | |||
1596 | apic_set_tpr(apic, ((cr8 & 0x0f) << 4) | 1669 | apic_set_tpr(apic, ((cr8 & 0x0f) << 4) |
1597 | | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4)); | 1670 | | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4)); |
1598 | } | 1671 | } |
@@ -1601,9 +1674,6 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) | |||
1601 | { | 1674 | { |
1602 | u64 tpr; | 1675 | u64 tpr; |
1603 | 1676 | ||
1604 | if (!kvm_vcpu_has_lapic(vcpu)) | ||
1605 | return 0; | ||
1606 | |||
1607 | tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI); | 1677 | tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI); |
1608 | 1678 | ||
1609 | return (tpr & 0xf0) >> 4; | 1679 | return (tpr & 0xf0) >> 4; |
@@ -1728,8 +1798,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu) | |||
1728 | { | 1798 | { |
1729 | struct kvm_lapic *apic = vcpu->arch.apic; | 1799 | struct kvm_lapic *apic = vcpu->arch.apic; |
1730 | 1800 | ||
1731 | if (kvm_vcpu_has_lapic(vcpu) && apic_enabled(apic) && | 1801 | if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT)) |
1732 | apic_lvt_enabled(apic, APIC_LVTT)) | ||
1733 | return atomic_read(&apic->lapic_timer.pending); | 1802 | return atomic_read(&apic->lapic_timer.pending); |
1734 | 1803 | ||
1735 | return 0; | 1804 | return 0; |
@@ -1826,7 +1895,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) | |||
1826 | struct kvm_lapic *apic = vcpu->arch.apic; | 1895 | struct kvm_lapic *apic = vcpu->arch.apic; |
1827 | int highest_irr; | 1896 | int highest_irr; |
1828 | 1897 | ||
1829 | if (!kvm_vcpu_has_lapic(vcpu) || !apic_enabled(apic)) | 1898 | if (!apic_enabled(apic)) |
1830 | return -1; | 1899 | return -1; |
1831 | 1900 | ||
1832 | apic_update_ppr(apic); | 1901 | apic_update_ppr(apic); |
@@ -1854,9 +1923,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) | |||
1854 | { | 1923 | { |
1855 | struct kvm_lapic *apic = vcpu->arch.apic; | 1924 | struct kvm_lapic *apic = vcpu->arch.apic; |
1856 | 1925 | ||
1857 | if (!kvm_vcpu_has_lapic(vcpu)) | ||
1858 | return; | ||
1859 | |||
1860 | if (atomic_read(&apic->lapic_timer.pending) > 0) { | 1926 | if (atomic_read(&apic->lapic_timer.pending) > 0) { |
1861 | kvm_apic_local_deliver(apic, APIC_LVTT); | 1927 | kvm_apic_local_deliver(apic, APIC_LVTT); |
1862 | if (apic_lvtt_tscdeadline(apic)) | 1928 | if (apic_lvtt_tscdeadline(apic)) |
@@ -1932,7 +1998,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) | |||
1932 | { | 1998 | { |
1933 | struct hrtimer *timer; | 1999 | struct hrtimer *timer; |
1934 | 2000 | ||
1935 | if (!kvm_vcpu_has_lapic(vcpu)) | 2001 | if (!lapic_in_kernel(vcpu)) |
1936 | return; | 2002 | return; |
1937 | 2003 | ||
1938 | timer = &vcpu->arch.apic->lapic_timer.timer; | 2004 | timer = &vcpu->arch.apic->lapic_timer.timer; |
@@ -2105,7 +2171,7 @@ int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) | |||
2105 | { | 2171 | { |
2106 | struct kvm_lapic *apic = vcpu->arch.apic; | 2172 | struct kvm_lapic *apic = vcpu->arch.apic; |
2107 | 2173 | ||
2108 | if (!kvm_vcpu_has_lapic(vcpu)) | 2174 | if (!lapic_in_kernel(vcpu)) |
2109 | return 1; | 2175 | return 1; |
2110 | 2176 | ||
2111 | /* if this is ICR write vector before command */ | 2177 | /* if this is ICR write vector before command */ |
@@ -2119,7 +2185,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) | |||
2119 | struct kvm_lapic *apic = vcpu->arch.apic; | 2185 | struct kvm_lapic *apic = vcpu->arch.apic; |
2120 | u32 low, high = 0; | 2186 | u32 low, high = 0; |
2121 | 2187 | ||
2122 | if (!kvm_vcpu_has_lapic(vcpu)) | 2188 | if (!lapic_in_kernel(vcpu)) |
2123 | return 1; | 2189 | return 1; |
2124 | 2190 | ||
2125 | if (apic_reg_read(apic, reg, 4, &low)) | 2191 | if (apic_reg_read(apic, reg, 4, &low)) |
@@ -2151,7 +2217,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) | |||
2151 | u8 sipi_vector; | 2217 | u8 sipi_vector; |
2152 | unsigned long pe; | 2218 | unsigned long pe; |
2153 | 2219 | ||
2154 | if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events) | 2220 | if (!lapic_in_kernel(vcpu) || !apic->pending_events) |
2155 | return; | 2221 | return; |
2156 | 2222 | ||
2157 | /* | 2223 | /* |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 41bdb35b4b67..f71183e502ee 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -42,6 +42,9 @@ struct kvm_lapic { | |||
42 | unsigned long pending_events; | 42 | unsigned long pending_events; |
43 | unsigned int sipi_vector; | 43 | unsigned int sipi_vector; |
44 | }; | 44 | }; |
45 | |||
46 | struct dest_map; | ||
47 | |||
45 | int kvm_create_lapic(struct kvm_vcpu *vcpu); | 48 | int kvm_create_lapic(struct kvm_vcpu *vcpu); |
46 | void kvm_free_lapic(struct kvm_vcpu *vcpu); | 49 | void kvm_free_lapic(struct kvm_vcpu *vcpu); |
47 | 50 | ||
@@ -60,11 +63,11 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu); | |||
60 | void __kvm_apic_update_irr(u32 *pir, void *regs); | 63 | void __kvm_apic_update_irr(u32 *pir, void *regs); |
61 | void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); | 64 | void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); |
62 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, | 65 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, |
63 | unsigned long *dest_map); | 66 | struct dest_map *dest_map); |
64 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); | 67 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); |
65 | 68 | ||
66 | bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | 69 | bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, |
67 | struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map); | 70 | struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map); |
68 | 71 | ||
69 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); | 72 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); |
70 | int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info); | 73 | int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info); |
@@ -103,7 +106,7 @@ static inline u32 kvm_apic_get_reg(struct kvm_lapic *apic, int reg_off) | |||
103 | 106 | ||
104 | extern struct static_key kvm_no_apic_vcpu; | 107 | extern struct static_key kvm_no_apic_vcpu; |
105 | 108 | ||
106 | static inline bool kvm_vcpu_has_lapic(struct kvm_vcpu *vcpu) | 109 | static inline bool lapic_in_kernel(struct kvm_vcpu *vcpu) |
107 | { | 110 | { |
108 | if (static_key_false(&kvm_no_apic_vcpu)) | 111 | if (static_key_false(&kvm_no_apic_vcpu)) |
109 | return vcpu->arch.apic; | 112 | return vcpu->arch.apic; |
@@ -130,7 +133,7 @@ static inline bool kvm_apic_sw_enabled(struct kvm_lapic *apic) | |||
130 | 133 | ||
131 | static inline bool kvm_apic_present(struct kvm_vcpu *vcpu) | 134 | static inline bool kvm_apic_present(struct kvm_vcpu *vcpu) |
132 | { | 135 | { |
133 | return kvm_vcpu_has_lapic(vcpu) && kvm_apic_hw_enabled(vcpu->arch.apic); | 136 | return lapic_in_kernel(vcpu) && kvm_apic_hw_enabled(vcpu->arch.apic); |
134 | } | 137 | } |
135 | 138 | ||
136 | static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu) | 139 | static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu) |
@@ -150,7 +153,7 @@ static inline bool kvm_vcpu_apicv_active(struct kvm_vcpu *vcpu) | |||
150 | 153 | ||
151 | static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) | 154 | static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) |
152 | { | 155 | { |
153 | return kvm_vcpu_has_lapic(vcpu) && vcpu->arch.apic->pending_events; | 156 | return lapic_in_kernel(vcpu) && vcpu->arch.apic->pending_events; |
154 | } | 157 | } |
155 | 158 | ||
156 | static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq) | 159 | static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq) |
@@ -161,7 +164,7 @@ static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq) | |||
161 | 164 | ||
162 | static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) | 165 | static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) |
163 | { | 166 | { |
164 | return kvm_vcpu_has_lapic(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); | 167 | return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); |
165 | } | 168 | } |
166 | 169 | ||
167 | static inline int kvm_apic_id(struct kvm_lapic *apic) | 170 | static inline int kvm_apic_id(struct kvm_lapic *apic) |
@@ -175,4 +178,6 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu); | |||
175 | 178 | ||
176 | bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, | 179 | bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, |
177 | struct kvm_vcpu **dest_vcpu); | 180 | struct kvm_vcpu **dest_vcpu); |
181 | int kvm_vector_to_index(u32 vector, u32 dest_vcpus, | ||
182 | const unsigned long *bitmap, u32 bitmap_size); | ||
178 | #endif | 183 | #endif |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1e7a49bfc94f..c512f095cdac 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <asm/cmpxchg.h> | 41 | #include <asm/cmpxchg.h> |
42 | #include <asm/io.h> | 42 | #include <asm/io.h> |
43 | #include <asm/vmx.h> | 43 | #include <asm/vmx.h> |
44 | #include <asm/kvm_page_track.h> | ||
44 | 45 | ||
45 | /* | 46 | /* |
46 | * When setting this variable to true it enables Two-Dimensional-Paging | 47 | * When setting this variable to true it enables Two-Dimensional-Paging |
@@ -776,62 +777,85 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn, | |||
776 | return &slot->arch.lpage_info[level - 2][idx]; | 777 | return &slot->arch.lpage_info[level - 2][idx]; |
777 | } | 778 | } |
778 | 779 | ||
780 | static void update_gfn_disallow_lpage_count(struct kvm_memory_slot *slot, | ||
781 | gfn_t gfn, int count) | ||
782 | { | ||
783 | struct kvm_lpage_info *linfo; | ||
784 | int i; | ||
785 | |||
786 | for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { | ||
787 | linfo = lpage_info_slot(gfn, slot, i); | ||
788 | linfo->disallow_lpage += count; | ||
789 | WARN_ON(linfo->disallow_lpage < 0); | ||
790 | } | ||
791 | } | ||
792 | |||
793 | void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn) | ||
794 | { | ||
795 | update_gfn_disallow_lpage_count(slot, gfn, 1); | ||
796 | } | ||
797 | |||
798 | void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn) | ||
799 | { | ||
800 | update_gfn_disallow_lpage_count(slot, gfn, -1); | ||
801 | } | ||
802 | |||
779 | static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) | 803 | static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) |
780 | { | 804 | { |
781 | struct kvm_memslots *slots; | 805 | struct kvm_memslots *slots; |
782 | struct kvm_memory_slot *slot; | 806 | struct kvm_memory_slot *slot; |
783 | struct kvm_lpage_info *linfo; | ||
784 | gfn_t gfn; | 807 | gfn_t gfn; |
785 | int i; | ||
786 | 808 | ||
809 | kvm->arch.indirect_shadow_pages++; | ||
787 | gfn = sp->gfn; | 810 | gfn = sp->gfn; |
788 | slots = kvm_memslots_for_spte_role(kvm, sp->role); | 811 | slots = kvm_memslots_for_spte_role(kvm, sp->role); |
789 | slot = __gfn_to_memslot(slots, gfn); | 812 | slot = __gfn_to_memslot(slots, gfn); |
790 | for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { | 813 | |
791 | linfo = lpage_info_slot(gfn, slot, i); | 814 | /* the non-leaf shadow pages are keeping readonly. */ |
792 | linfo->write_count += 1; | 815 | if (sp->role.level > PT_PAGE_TABLE_LEVEL) |
793 | } | 816 | return kvm_slot_page_track_add_page(kvm, slot, gfn, |
794 | kvm->arch.indirect_shadow_pages++; | 817 | KVM_PAGE_TRACK_WRITE); |
818 | |||
819 | kvm_mmu_gfn_disallow_lpage(slot, gfn); | ||
795 | } | 820 | } |
796 | 821 | ||
797 | static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) | 822 | static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) |
798 | { | 823 | { |
799 | struct kvm_memslots *slots; | 824 | struct kvm_memslots *slots; |
800 | struct kvm_memory_slot *slot; | 825 | struct kvm_memory_slot *slot; |
801 | struct kvm_lpage_info *linfo; | ||
802 | gfn_t gfn; | 826 | gfn_t gfn; |
803 | int i; | ||
804 | 827 | ||
828 | kvm->arch.indirect_shadow_pages--; | ||
805 | gfn = sp->gfn; | 829 | gfn = sp->gfn; |
806 | slots = kvm_memslots_for_spte_role(kvm, sp->role); | 830 | slots = kvm_memslots_for_spte_role(kvm, sp->role); |
807 | slot = __gfn_to_memslot(slots, gfn); | 831 | slot = __gfn_to_memslot(slots, gfn); |
808 | for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { | 832 | if (sp->role.level > PT_PAGE_TABLE_LEVEL) |
809 | linfo = lpage_info_slot(gfn, slot, i); | 833 | return kvm_slot_page_track_remove_page(kvm, slot, gfn, |
810 | linfo->write_count -= 1; | 834 | KVM_PAGE_TRACK_WRITE); |
811 | WARN_ON(linfo->write_count < 0); | 835 | |
812 | } | 836 | kvm_mmu_gfn_allow_lpage(slot, gfn); |
813 | kvm->arch.indirect_shadow_pages--; | ||
814 | } | 837 | } |
815 | 838 | ||
816 | static int __has_wrprotected_page(gfn_t gfn, int level, | 839 | static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, |
817 | struct kvm_memory_slot *slot) | 840 | struct kvm_memory_slot *slot) |
818 | { | 841 | { |
819 | struct kvm_lpage_info *linfo; | 842 | struct kvm_lpage_info *linfo; |
820 | 843 | ||
821 | if (slot) { | 844 | if (slot) { |
822 | linfo = lpage_info_slot(gfn, slot, level); | 845 | linfo = lpage_info_slot(gfn, slot, level); |
823 | return linfo->write_count; | 846 | return !!linfo->disallow_lpage; |
824 | } | 847 | } |
825 | 848 | ||
826 | return 1; | 849 | return true; |
827 | } | 850 | } |
828 | 851 | ||
829 | static int has_wrprotected_page(struct kvm_vcpu *vcpu, gfn_t gfn, int level) | 852 | static bool mmu_gfn_lpage_is_disallowed(struct kvm_vcpu *vcpu, gfn_t gfn, |
853 | int level) | ||
830 | { | 854 | { |
831 | struct kvm_memory_slot *slot; | 855 | struct kvm_memory_slot *slot; |
832 | 856 | ||
833 | slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); | 857 | slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); |
834 | return __has_wrprotected_page(gfn, level, slot); | 858 | return __mmu_gfn_lpage_is_disallowed(gfn, level, slot); |
835 | } | 859 | } |
836 | 860 | ||
837 | static int host_mapping_level(struct kvm *kvm, gfn_t gfn) | 861 | static int host_mapping_level(struct kvm *kvm, gfn_t gfn) |
@@ -897,7 +921,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn, | |||
897 | max_level = min(kvm_x86_ops->get_lpage_level(), host_level); | 921 | max_level = min(kvm_x86_ops->get_lpage_level(), host_level); |
898 | 922 | ||
899 | for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) | 923 | for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) |
900 | if (__has_wrprotected_page(large_gfn, level, slot)) | 924 | if (__mmu_gfn_lpage_is_disallowed(large_gfn, level, slot)) |
901 | break; | 925 | break; |
902 | 926 | ||
903 | return level - 1; | 927 | return level - 1; |
@@ -1323,23 +1347,29 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, | |||
1323 | kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); | 1347 | kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); |
1324 | } | 1348 | } |
1325 | 1349 | ||
1326 | static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) | 1350 | bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, |
1351 | struct kvm_memory_slot *slot, u64 gfn) | ||
1327 | { | 1352 | { |
1328 | struct kvm_memory_slot *slot; | ||
1329 | struct kvm_rmap_head *rmap_head; | 1353 | struct kvm_rmap_head *rmap_head; |
1330 | int i; | 1354 | int i; |
1331 | bool write_protected = false; | 1355 | bool write_protected = false; |
1332 | 1356 | ||
1333 | slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); | ||
1334 | |||
1335 | for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { | 1357 | for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { |
1336 | rmap_head = __gfn_to_rmap(gfn, i, slot); | 1358 | rmap_head = __gfn_to_rmap(gfn, i, slot); |
1337 | write_protected |= __rmap_write_protect(vcpu->kvm, rmap_head, true); | 1359 | write_protected |= __rmap_write_protect(kvm, rmap_head, true); |
1338 | } | 1360 | } |
1339 | 1361 | ||
1340 | return write_protected; | 1362 | return write_protected; |
1341 | } | 1363 | } |
1342 | 1364 | ||
1365 | static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) | ||
1366 | { | ||
1367 | struct kvm_memory_slot *slot; | ||
1368 | |||
1369 | slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); | ||
1370 | return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn); | ||
1371 | } | ||
1372 | |||
1343 | static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head) | 1373 | static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head) |
1344 | { | 1374 | { |
1345 | u64 *sptep; | 1375 | u64 *sptep; |
@@ -1754,7 +1784,7 @@ static void mark_unsync(u64 *spte) | |||
1754 | static int nonpaging_sync_page(struct kvm_vcpu *vcpu, | 1784 | static int nonpaging_sync_page(struct kvm_vcpu *vcpu, |
1755 | struct kvm_mmu_page *sp) | 1785 | struct kvm_mmu_page *sp) |
1756 | { | 1786 | { |
1757 | return 1; | 1787 | return 0; |
1758 | } | 1788 | } |
1759 | 1789 | ||
1760 | static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) | 1790 | static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) |
@@ -1840,13 +1870,16 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, | |||
1840 | return nr_unsync_leaf; | 1870 | return nr_unsync_leaf; |
1841 | } | 1871 | } |
1842 | 1872 | ||
1873 | #define INVALID_INDEX (-1) | ||
1874 | |||
1843 | static int mmu_unsync_walk(struct kvm_mmu_page *sp, | 1875 | static int mmu_unsync_walk(struct kvm_mmu_page *sp, |
1844 | struct kvm_mmu_pages *pvec) | 1876 | struct kvm_mmu_pages *pvec) |
1845 | { | 1877 | { |
1878 | pvec->nr = 0; | ||
1846 | if (!sp->unsync_children) | 1879 | if (!sp->unsync_children) |
1847 | return 0; | 1880 | return 0; |
1848 | 1881 | ||
1849 | mmu_pages_add(pvec, sp, 0); | 1882 | mmu_pages_add(pvec, sp, INVALID_INDEX); |
1850 | return __mmu_unsync_walk(sp, pvec); | 1883 | return __mmu_unsync_walk(sp, pvec); |
1851 | } | 1884 | } |
1852 | 1885 | ||
@@ -1883,37 +1916,35 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, | |||
1883 | if ((_sp)->role.direct || (_sp)->role.invalid) {} else | 1916 | if ((_sp)->role.direct || (_sp)->role.invalid) {} else |
1884 | 1917 | ||
1885 | /* @sp->gfn should be write-protected at the call site */ | 1918 | /* @sp->gfn should be write-protected at the call site */ |
1886 | static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 1919 | static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
1887 | struct list_head *invalid_list, bool clear_unsync) | 1920 | struct list_head *invalid_list) |
1888 | { | 1921 | { |
1889 | if (sp->role.cr4_pae != !!is_pae(vcpu)) { | 1922 | if (sp->role.cr4_pae != !!is_pae(vcpu)) { |
1890 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); | 1923 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); |
1891 | return 1; | 1924 | return false; |
1892 | } | 1925 | } |
1893 | 1926 | ||
1894 | if (clear_unsync) | 1927 | if (vcpu->arch.mmu.sync_page(vcpu, sp) == 0) { |
1895 | kvm_unlink_unsync_page(vcpu->kvm, sp); | ||
1896 | |||
1897 | if (vcpu->arch.mmu.sync_page(vcpu, sp)) { | ||
1898 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); | 1928 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); |
1899 | return 1; | 1929 | return false; |
1900 | } | 1930 | } |
1901 | 1931 | ||
1902 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | 1932 | return true; |
1903 | return 0; | ||
1904 | } | 1933 | } |
1905 | 1934 | ||
1906 | static int kvm_sync_page_transient(struct kvm_vcpu *vcpu, | 1935 | static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu, |
1907 | struct kvm_mmu_page *sp) | 1936 | struct list_head *invalid_list, |
1937 | bool remote_flush, bool local_flush) | ||
1908 | { | 1938 | { |
1909 | LIST_HEAD(invalid_list); | 1939 | if (!list_empty(invalid_list)) { |
1910 | int ret; | 1940 | kvm_mmu_commit_zap_page(vcpu->kvm, invalid_list); |
1911 | 1941 | return; | |
1912 | ret = __kvm_sync_page(vcpu, sp, &invalid_list, false); | 1942 | } |
1913 | if (ret) | ||
1914 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
1915 | 1943 | ||
1916 | return ret; | 1944 | if (remote_flush) |
1945 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
1946 | else if (local_flush) | ||
1947 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | ||
1917 | } | 1948 | } |
1918 | 1949 | ||
1919 | #ifdef CONFIG_KVM_MMU_AUDIT | 1950 | #ifdef CONFIG_KVM_MMU_AUDIT |
@@ -1923,46 +1954,38 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { } | |||
1923 | static void mmu_audit_disable(void) { } | 1954 | static void mmu_audit_disable(void) { } |
1924 | #endif | 1955 | #endif |
1925 | 1956 | ||
1926 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 1957 | static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
1927 | struct list_head *invalid_list) | 1958 | struct list_head *invalid_list) |
1928 | { | 1959 | { |
1929 | return __kvm_sync_page(vcpu, sp, invalid_list, true); | 1960 | kvm_unlink_unsync_page(vcpu->kvm, sp); |
1961 | return __kvm_sync_page(vcpu, sp, invalid_list); | ||
1930 | } | 1962 | } |
1931 | 1963 | ||
1932 | /* @gfn should be write-protected at the call site */ | 1964 | /* @gfn should be write-protected at the call site */ |
1933 | static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) | 1965 | static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, |
1966 | struct list_head *invalid_list) | ||
1934 | { | 1967 | { |
1935 | struct kvm_mmu_page *s; | 1968 | struct kvm_mmu_page *s; |
1936 | LIST_HEAD(invalid_list); | 1969 | bool ret = false; |
1937 | bool flush = false; | ||
1938 | 1970 | ||
1939 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { | 1971 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { |
1940 | if (!s->unsync) | 1972 | if (!s->unsync) |
1941 | continue; | 1973 | continue; |
1942 | 1974 | ||
1943 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); | 1975 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); |
1944 | kvm_unlink_unsync_page(vcpu->kvm, s); | 1976 | ret |= kvm_sync_page(vcpu, s, invalid_list); |
1945 | if ((s->role.cr4_pae != !!is_pae(vcpu)) || | ||
1946 | (vcpu->arch.mmu.sync_page(vcpu, s))) { | ||
1947 | kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list); | ||
1948 | continue; | ||
1949 | } | ||
1950 | flush = true; | ||
1951 | } | 1977 | } |
1952 | 1978 | ||
1953 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 1979 | return ret; |
1954 | if (flush) | ||
1955 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | ||
1956 | } | 1980 | } |
1957 | 1981 | ||
1958 | struct mmu_page_path { | 1982 | struct mmu_page_path { |
1959 | struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; | 1983 | struct kvm_mmu_page *parent[PT64_ROOT_LEVEL]; |
1960 | unsigned int idx[PT64_ROOT_LEVEL-1]; | 1984 | unsigned int idx[PT64_ROOT_LEVEL]; |
1961 | }; | 1985 | }; |
1962 | 1986 | ||
1963 | #define for_each_sp(pvec, sp, parents, i) \ | 1987 | #define for_each_sp(pvec, sp, parents, i) \ |
1964 | for (i = mmu_pages_next(&pvec, &parents, -1), \ | 1988 | for (i = mmu_pages_first(&pvec, &parents); \ |
1965 | sp = pvec.page[i].sp; \ | ||
1966 | i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ | 1989 | i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ |
1967 | i = mmu_pages_next(&pvec, &parents, i)) | 1990 | i = mmu_pages_next(&pvec, &parents, i)) |
1968 | 1991 | ||
@@ -1974,19 +1997,43 @@ static int mmu_pages_next(struct kvm_mmu_pages *pvec, | |||
1974 | 1997 | ||
1975 | for (n = i+1; n < pvec->nr; n++) { | 1998 | for (n = i+1; n < pvec->nr; n++) { |
1976 | struct kvm_mmu_page *sp = pvec->page[n].sp; | 1999 | struct kvm_mmu_page *sp = pvec->page[n].sp; |
2000 | unsigned idx = pvec->page[n].idx; | ||
2001 | int level = sp->role.level; | ||
1977 | 2002 | ||
1978 | if (sp->role.level == PT_PAGE_TABLE_LEVEL) { | 2003 | parents->idx[level-1] = idx; |
1979 | parents->idx[0] = pvec->page[n].idx; | 2004 | if (level == PT_PAGE_TABLE_LEVEL) |
1980 | return n; | 2005 | break; |
1981 | } | ||
1982 | 2006 | ||
1983 | parents->parent[sp->role.level-2] = sp; | 2007 | parents->parent[level-2] = sp; |
1984 | parents->idx[sp->role.level-1] = pvec->page[n].idx; | ||
1985 | } | 2008 | } |
1986 | 2009 | ||
1987 | return n; | 2010 | return n; |
1988 | } | 2011 | } |
1989 | 2012 | ||
2013 | static int mmu_pages_first(struct kvm_mmu_pages *pvec, | ||
2014 | struct mmu_page_path *parents) | ||
2015 | { | ||
2016 | struct kvm_mmu_page *sp; | ||
2017 | int level; | ||
2018 | |||
2019 | if (pvec->nr == 0) | ||
2020 | return 0; | ||
2021 | |||
2022 | WARN_ON(pvec->page[0].idx != INVALID_INDEX); | ||
2023 | |||
2024 | sp = pvec->page[0].sp; | ||
2025 | level = sp->role.level; | ||
2026 | WARN_ON(level == PT_PAGE_TABLE_LEVEL); | ||
2027 | |||
2028 | parents->parent[level-2] = sp; | ||
2029 | |||
2030 | /* Also set up a sentinel. Further entries in pvec are all | ||
2031 | * children of sp, so this element is never overwritten. | ||
2032 | */ | ||
2033 | parents->parent[level-1] = NULL; | ||
2034 | return mmu_pages_next(pvec, parents, 0); | ||
2035 | } | ||
2036 | |||
1990 | static void mmu_pages_clear_parents(struct mmu_page_path *parents) | 2037 | static void mmu_pages_clear_parents(struct mmu_page_path *parents) |
1991 | { | 2038 | { |
1992 | struct kvm_mmu_page *sp; | 2039 | struct kvm_mmu_page *sp; |
@@ -1994,22 +2041,14 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents) | |||
1994 | 2041 | ||
1995 | do { | 2042 | do { |
1996 | unsigned int idx = parents->idx[level]; | 2043 | unsigned int idx = parents->idx[level]; |
1997 | |||
1998 | sp = parents->parent[level]; | 2044 | sp = parents->parent[level]; |
1999 | if (!sp) | 2045 | if (!sp) |
2000 | return; | 2046 | return; |
2001 | 2047 | ||
2048 | WARN_ON(idx == INVALID_INDEX); | ||
2002 | clear_unsync_child_bit(sp, idx); | 2049 | clear_unsync_child_bit(sp, idx); |
2003 | level++; | 2050 | level++; |
2004 | } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children); | 2051 | } while (!sp->unsync_children); |
2005 | } | ||
2006 | |||
2007 | static void kvm_mmu_pages_init(struct kvm_mmu_page *parent, | ||
2008 | struct mmu_page_path *parents, | ||
2009 | struct kvm_mmu_pages *pvec) | ||
2010 | { | ||
2011 | parents->parent[parent->role.level-1] = NULL; | ||
2012 | pvec->nr = 0; | ||
2013 | } | 2052 | } |
2014 | 2053 | ||
2015 | static void mmu_sync_children(struct kvm_vcpu *vcpu, | 2054 | static void mmu_sync_children(struct kvm_vcpu *vcpu, |
@@ -2020,30 +2059,36 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, | |||
2020 | struct mmu_page_path parents; | 2059 | struct mmu_page_path parents; |
2021 | struct kvm_mmu_pages pages; | 2060 | struct kvm_mmu_pages pages; |
2022 | LIST_HEAD(invalid_list); | 2061 | LIST_HEAD(invalid_list); |
2062 | bool flush = false; | ||
2023 | 2063 | ||
2024 | kvm_mmu_pages_init(parent, &parents, &pages); | ||
2025 | while (mmu_unsync_walk(parent, &pages)) { | 2064 | while (mmu_unsync_walk(parent, &pages)) { |
2026 | bool protected = false; | 2065 | bool protected = false; |
2027 | 2066 | ||
2028 | for_each_sp(pages, sp, parents, i) | 2067 | for_each_sp(pages, sp, parents, i) |
2029 | protected |= rmap_write_protect(vcpu, sp->gfn); | 2068 | protected |= rmap_write_protect(vcpu, sp->gfn); |
2030 | 2069 | ||
2031 | if (protected) | 2070 | if (protected) { |
2032 | kvm_flush_remote_tlbs(vcpu->kvm); | 2071 | kvm_flush_remote_tlbs(vcpu->kvm); |
2072 | flush = false; | ||
2073 | } | ||
2033 | 2074 | ||
2034 | for_each_sp(pages, sp, parents, i) { | 2075 | for_each_sp(pages, sp, parents, i) { |
2035 | kvm_sync_page(vcpu, sp, &invalid_list); | 2076 | flush |= kvm_sync_page(vcpu, sp, &invalid_list); |
2036 | mmu_pages_clear_parents(&parents); | 2077 | mmu_pages_clear_parents(&parents); |
2037 | } | 2078 | } |
2038 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 2079 | if (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)) { |
2039 | cond_resched_lock(&vcpu->kvm->mmu_lock); | 2080 | kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); |
2040 | kvm_mmu_pages_init(parent, &parents, &pages); | 2081 | cond_resched_lock(&vcpu->kvm->mmu_lock); |
2082 | flush = false; | ||
2083 | } | ||
2041 | } | 2084 | } |
2085 | |||
2086 | kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); | ||
2042 | } | 2087 | } |
2043 | 2088 | ||
2044 | static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp) | 2089 | static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp) |
2045 | { | 2090 | { |
2046 | sp->write_flooding_count = 0; | 2091 | atomic_set(&sp->write_flooding_count, 0); |
2047 | } | 2092 | } |
2048 | 2093 | ||
2049 | static void clear_sp_write_flooding_count(u64 *spte) | 2094 | static void clear_sp_write_flooding_count(u64 *spte) |
@@ -2069,6 +2114,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
2069 | unsigned quadrant; | 2114 | unsigned quadrant; |
2070 | struct kvm_mmu_page *sp; | 2115 | struct kvm_mmu_page *sp; |
2071 | bool need_sync = false; | 2116 | bool need_sync = false; |
2117 | bool flush = false; | ||
2118 | LIST_HEAD(invalid_list); | ||
2072 | 2119 | ||
2073 | role = vcpu->arch.mmu.base_role; | 2120 | role = vcpu->arch.mmu.base_role; |
2074 | role.level = level; | 2121 | role.level = level; |
@@ -2092,8 +2139,16 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
2092 | if (sp->role.word != role.word) | 2139 | if (sp->role.word != role.word) |
2093 | continue; | 2140 | continue; |
2094 | 2141 | ||
2095 | if (sp->unsync && kvm_sync_page_transient(vcpu, sp)) | 2142 | if (sp->unsync) { |
2096 | break; | 2143 | /* The page is good, but __kvm_sync_page might still end |
2144 | * up zapping it. If so, break in order to rebuild it. | ||
2145 | */ | ||
2146 | if (!__kvm_sync_page(vcpu, sp, &invalid_list)) | ||
2147 | break; | ||
2148 | |||
2149 | WARN_ON(!list_empty(&invalid_list)); | ||
2150 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | ||
2151 | } | ||
2097 | 2152 | ||
2098 | if (sp->unsync_children) | 2153 | if (sp->unsync_children) |
2099 | kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); | 2154 | kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); |
@@ -2112,16 +2167,24 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
2112 | hlist_add_head(&sp->hash_link, | 2167 | hlist_add_head(&sp->hash_link, |
2113 | &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]); | 2168 | &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]); |
2114 | if (!direct) { | 2169 | if (!direct) { |
2115 | if (rmap_write_protect(vcpu, gfn)) | 2170 | /* |
2171 | * we should do write protection before syncing pages | ||
2172 | * otherwise the content of the synced shadow page may | ||
2173 | * be inconsistent with guest page table. | ||
2174 | */ | ||
2175 | account_shadowed(vcpu->kvm, sp); | ||
2176 | if (level == PT_PAGE_TABLE_LEVEL && | ||
2177 | rmap_write_protect(vcpu, gfn)) | ||
2116 | kvm_flush_remote_tlbs(vcpu->kvm); | 2178 | kvm_flush_remote_tlbs(vcpu->kvm); |
2117 | if (level > PT_PAGE_TABLE_LEVEL && need_sync) | ||
2118 | kvm_sync_pages(vcpu, gfn); | ||
2119 | 2179 | ||
2120 | account_shadowed(vcpu->kvm, sp); | 2180 | if (level > PT_PAGE_TABLE_LEVEL && need_sync) |
2181 | flush |= kvm_sync_pages(vcpu, gfn, &invalid_list); | ||
2121 | } | 2182 | } |
2122 | sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; | 2183 | sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; |
2123 | clear_page(sp->spt); | 2184 | clear_page(sp->spt); |
2124 | trace_kvm_mmu_get_page(sp, true); | 2185 | trace_kvm_mmu_get_page(sp, true); |
2186 | |||
2187 | kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); | ||
2125 | return sp; | 2188 | return sp; |
2126 | } | 2189 | } |
2127 | 2190 | ||
@@ -2269,7 +2332,6 @@ static int mmu_zap_unsync_children(struct kvm *kvm, | |||
2269 | if (parent->role.level == PT_PAGE_TABLE_LEVEL) | 2332 | if (parent->role.level == PT_PAGE_TABLE_LEVEL) |
2270 | return 0; | 2333 | return 0; |
2271 | 2334 | ||
2272 | kvm_mmu_pages_init(parent, &parents, &pages); | ||
2273 | while (mmu_unsync_walk(parent, &pages)) { | 2335 | while (mmu_unsync_walk(parent, &pages)) { |
2274 | struct kvm_mmu_page *sp; | 2336 | struct kvm_mmu_page *sp; |
2275 | 2337 | ||
@@ -2278,7 +2340,6 @@ static int mmu_zap_unsync_children(struct kvm *kvm, | |||
2278 | mmu_pages_clear_parents(&parents); | 2340 | mmu_pages_clear_parents(&parents); |
2279 | zapped++; | 2341 | zapped++; |
2280 | } | 2342 | } |
2281 | kvm_mmu_pages_init(parent, &parents, &pages); | ||
2282 | } | 2343 | } |
2283 | 2344 | ||
2284 | return zapped; | 2345 | return zapped; |
@@ -2354,8 +2415,8 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm, | |||
2354 | if (list_empty(&kvm->arch.active_mmu_pages)) | 2415 | if (list_empty(&kvm->arch.active_mmu_pages)) |
2355 | return false; | 2416 | return false; |
2356 | 2417 | ||
2357 | sp = list_entry(kvm->arch.active_mmu_pages.prev, | 2418 | sp = list_last_entry(&kvm->arch.active_mmu_pages, |
2358 | struct kvm_mmu_page, link); | 2419 | struct kvm_mmu_page, link); |
2359 | kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); | 2420 | kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); |
2360 | 2421 | ||
2361 | return true; | 2422 | return true; |
@@ -2408,7 +2469,7 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
2408 | } | 2469 | } |
2409 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page); | 2470 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page); |
2410 | 2471 | ||
2411 | static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 2472 | static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
2412 | { | 2473 | { |
2413 | trace_kvm_mmu_unsync_page(sp); | 2474 | trace_kvm_mmu_unsync_page(sp); |
2414 | ++vcpu->kvm->stat.mmu_unsync; | 2475 | ++vcpu->kvm->stat.mmu_unsync; |
@@ -2417,37 +2478,26 @@ static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
2417 | kvm_mmu_mark_parents_unsync(sp); | 2478 | kvm_mmu_mark_parents_unsync(sp); |
2418 | } | 2479 | } |
2419 | 2480 | ||
2420 | static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) | 2481 | static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, |
2482 | bool can_unsync) | ||
2421 | { | 2483 | { |
2422 | struct kvm_mmu_page *s; | 2484 | struct kvm_mmu_page *sp; |
2423 | |||
2424 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { | ||
2425 | if (s->unsync) | ||
2426 | continue; | ||
2427 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); | ||
2428 | __kvm_unsync_page(vcpu, s); | ||
2429 | } | ||
2430 | } | ||
2431 | 2485 | ||
2432 | static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, | 2486 | if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE)) |
2433 | bool can_unsync) | 2487 | return true; |
2434 | { | ||
2435 | struct kvm_mmu_page *s; | ||
2436 | bool need_unsync = false; | ||
2437 | 2488 | ||
2438 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { | 2489 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { |
2439 | if (!can_unsync) | 2490 | if (!can_unsync) |
2440 | return 1; | 2491 | return true; |
2441 | 2492 | ||
2442 | if (s->role.level != PT_PAGE_TABLE_LEVEL) | 2493 | if (sp->unsync) |
2443 | return 1; | 2494 | continue; |
2444 | 2495 | ||
2445 | if (!s->unsync) | 2496 | WARN_ON(sp->role.level != PT_PAGE_TABLE_LEVEL); |
2446 | need_unsync = true; | 2497 | kvm_unsync_page(vcpu, sp); |
2447 | } | 2498 | } |
2448 | if (need_unsync) | 2499 | |
2449 | kvm_unsync_pages(vcpu, gfn); | 2500 | return false; |
2450 | return 0; | ||
2451 | } | 2501 | } |
2452 | 2502 | ||
2453 | static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) | 2503 | static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) |
@@ -2503,7 +2553,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2503 | * be fixed if guest refault. | 2553 | * be fixed if guest refault. |
2504 | */ | 2554 | */ |
2505 | if (level > PT_PAGE_TABLE_LEVEL && | 2555 | if (level > PT_PAGE_TABLE_LEVEL && |
2506 | has_wrprotected_page(vcpu, gfn, level)) | 2556 | mmu_gfn_lpage_is_disallowed(vcpu, gfn, level)) |
2507 | goto done; | 2557 | goto done; |
2508 | 2558 | ||
2509 | spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; | 2559 | spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; |
@@ -2768,7 +2818,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, | |||
2768 | if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) && | 2818 | if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) && |
2769 | level == PT_PAGE_TABLE_LEVEL && | 2819 | level == PT_PAGE_TABLE_LEVEL && |
2770 | PageTransCompound(pfn_to_page(pfn)) && | 2820 | PageTransCompound(pfn_to_page(pfn)) && |
2771 | !has_wrprotected_page(vcpu, gfn, PT_DIRECTORY_LEVEL)) { | 2821 | !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) { |
2772 | unsigned long mask; | 2822 | unsigned long mask; |
2773 | /* | 2823 | /* |
2774 | * mmu_notifier_retry was successful and we hold the | 2824 | * mmu_notifier_retry was successful and we hold the |
@@ -2796,20 +2846,16 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, | |||
2796 | static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, | 2846 | static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, |
2797 | kvm_pfn_t pfn, unsigned access, int *ret_val) | 2847 | kvm_pfn_t pfn, unsigned access, int *ret_val) |
2798 | { | 2848 | { |
2799 | bool ret = true; | ||
2800 | |||
2801 | /* The pfn is invalid, report the error! */ | 2849 | /* The pfn is invalid, report the error! */ |
2802 | if (unlikely(is_error_pfn(pfn))) { | 2850 | if (unlikely(is_error_pfn(pfn))) { |
2803 | *ret_val = kvm_handle_bad_page(vcpu, gfn, pfn); | 2851 | *ret_val = kvm_handle_bad_page(vcpu, gfn, pfn); |
2804 | goto exit; | 2852 | return true; |
2805 | } | 2853 | } |
2806 | 2854 | ||
2807 | if (unlikely(is_noslot_pfn(pfn))) | 2855 | if (unlikely(is_noslot_pfn(pfn))) |
2808 | vcpu_cache_mmio_info(vcpu, gva, gfn, access); | 2856 | vcpu_cache_mmio_info(vcpu, gva, gfn, access); |
2809 | 2857 | ||
2810 | ret = false; | 2858 | return false; |
2811 | exit: | ||
2812 | return ret; | ||
2813 | } | 2859 | } |
2814 | 2860 | ||
2815 | static bool page_fault_can_be_fast(u32 error_code) | 2861 | static bool page_fault_can_be_fast(u32 error_code) |
@@ -3273,7 +3319,7 @@ static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level) | |||
3273 | return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level); | 3319 | return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level); |
3274 | } | 3320 | } |
3275 | 3321 | ||
3276 | static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) | 3322 | static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) |
3277 | { | 3323 | { |
3278 | if (direct) | 3324 | if (direct) |
3279 | return vcpu_match_mmio_gpa(vcpu, addr); | 3325 | return vcpu_match_mmio_gpa(vcpu, addr); |
@@ -3332,7 +3378,7 @@ int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) | |||
3332 | u64 spte; | 3378 | u64 spte; |
3333 | bool reserved; | 3379 | bool reserved; |
3334 | 3380 | ||
3335 | if (quickly_check_mmio_pf(vcpu, addr, direct)) | 3381 | if (mmio_info_in_cache(vcpu, addr, direct)) |
3336 | return RET_MMIO_PF_EMULATE; | 3382 | return RET_MMIO_PF_EMULATE; |
3337 | 3383 | ||
3338 | reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte); | 3384 | reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte); |
@@ -3362,20 +3408,53 @@ int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) | |||
3362 | } | 3408 | } |
3363 | EXPORT_SYMBOL_GPL(handle_mmio_page_fault); | 3409 | EXPORT_SYMBOL_GPL(handle_mmio_page_fault); |
3364 | 3410 | ||
3411 | static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu, | ||
3412 | u32 error_code, gfn_t gfn) | ||
3413 | { | ||
3414 | if (unlikely(error_code & PFERR_RSVD_MASK)) | ||
3415 | return false; | ||
3416 | |||
3417 | if (!(error_code & PFERR_PRESENT_MASK) || | ||
3418 | !(error_code & PFERR_WRITE_MASK)) | ||
3419 | return false; | ||
3420 | |||
3421 | /* | ||
3422 | * guest is writing the page which is write tracked which can | ||
3423 | * not be fixed by page fault handler. | ||
3424 | */ | ||
3425 | if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE)) | ||
3426 | return true; | ||
3427 | |||
3428 | return false; | ||
3429 | } | ||
3430 | |||
3431 | static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) | ||
3432 | { | ||
3433 | struct kvm_shadow_walk_iterator iterator; | ||
3434 | u64 spte; | ||
3435 | |||
3436 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
3437 | return; | ||
3438 | |||
3439 | walk_shadow_page_lockless_begin(vcpu); | ||
3440 | for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { | ||
3441 | clear_sp_write_flooding_count(iterator.sptep); | ||
3442 | if (!is_shadow_present_pte(spte)) | ||
3443 | break; | ||
3444 | } | ||
3445 | walk_shadow_page_lockless_end(vcpu); | ||
3446 | } | ||
3447 | |||
3365 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | 3448 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, |
3366 | u32 error_code, bool prefault) | 3449 | u32 error_code, bool prefault) |
3367 | { | 3450 | { |
3368 | gfn_t gfn; | 3451 | gfn_t gfn = gva >> PAGE_SHIFT; |
3369 | int r; | 3452 | int r; |
3370 | 3453 | ||
3371 | pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); | 3454 | pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); |
3372 | 3455 | ||
3373 | if (unlikely(error_code & PFERR_RSVD_MASK)) { | 3456 | if (page_fault_handle_page_track(vcpu, error_code, gfn)) |
3374 | r = handle_mmio_page_fault(vcpu, gva, true); | 3457 | return 1; |
3375 | |||
3376 | if (likely(r != RET_MMIO_PF_INVALID)) | ||
3377 | return r; | ||
3378 | } | ||
3379 | 3458 | ||
3380 | r = mmu_topup_memory_caches(vcpu); | 3459 | r = mmu_topup_memory_caches(vcpu); |
3381 | if (r) | 3460 | if (r) |
@@ -3383,7 +3462,6 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | |||
3383 | 3462 | ||
3384 | MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 3463 | MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
3385 | 3464 | ||
3386 | gfn = gva >> PAGE_SHIFT; | ||
3387 | 3465 | ||
3388 | return nonpaging_map(vcpu, gva & PAGE_MASK, | 3466 | return nonpaging_map(vcpu, gva & PAGE_MASK, |
3389 | error_code, gfn, prefault); | 3467 | error_code, gfn, prefault); |
@@ -3460,12 +3538,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
3460 | 3538 | ||
3461 | MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 3539 | MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
3462 | 3540 | ||
3463 | if (unlikely(error_code & PFERR_RSVD_MASK)) { | 3541 | if (page_fault_handle_page_track(vcpu, error_code, gfn)) |
3464 | r = handle_mmio_page_fault(vcpu, gpa, true); | 3542 | return 1; |
3465 | |||
3466 | if (likely(r != RET_MMIO_PF_INVALID)) | ||
3467 | return r; | ||
3468 | } | ||
3469 | 3543 | ||
3470 | r = mmu_topup_memory_caches(vcpu); | 3544 | r = mmu_topup_memory_caches(vcpu); |
3471 | if (r) | 3545 | if (r) |
@@ -3558,13 +3632,24 @@ static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, | |||
3558 | return false; | 3632 | return false; |
3559 | } | 3633 | } |
3560 | 3634 | ||
3561 | static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte) | 3635 | static inline bool is_last_gpte(struct kvm_mmu *mmu, |
3636 | unsigned level, unsigned gpte) | ||
3562 | { | 3637 | { |
3563 | unsigned index; | 3638 | /* |
3639 | * PT_PAGE_TABLE_LEVEL always terminates. The RHS has bit 7 set | ||
3640 | * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means | ||
3641 | * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then. | ||
3642 | */ | ||
3643 | gpte |= level - PT_PAGE_TABLE_LEVEL - 1; | ||
3564 | 3644 | ||
3565 | index = level - 1; | 3645 | /* |
3566 | index |= (gpte & PT_PAGE_SIZE_MASK) >> (PT_PAGE_SIZE_SHIFT - 2); | 3646 | * The RHS has bit 7 set iff level < mmu->last_nonleaf_level. |
3567 | return mmu->last_pte_bitmap & (1 << index); | 3647 | * If it is clear, there are no large pages at this level, so clear |
3648 | * PT_PAGE_SIZE_MASK in gpte if that is the case. | ||
3649 | */ | ||
3650 | gpte &= level - mmu->last_nonleaf_level; | ||
3651 | |||
3652 | return gpte & PT_PAGE_SIZE_MASK; | ||
3568 | } | 3653 | } |
3569 | 3654 | ||
3570 | #define PTTYPE_EPT 18 /* arbitrary */ | 3655 | #define PTTYPE_EPT 18 /* arbitrary */ |
@@ -3838,22 +3923,13 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, | |||
3838 | } | 3923 | } |
3839 | } | 3924 | } |
3840 | 3925 | ||
3841 | static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) | 3926 | static void update_last_nonleaf_level(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) |
3842 | { | 3927 | { |
3843 | u8 map; | 3928 | unsigned root_level = mmu->root_level; |
3844 | unsigned level, root_level = mmu->root_level; | 3929 | |
3845 | const unsigned ps_set_index = 1 << 2; /* bit 2 of index: ps */ | 3930 | mmu->last_nonleaf_level = root_level; |
3846 | 3931 | if (root_level == PT32_ROOT_LEVEL && is_pse(vcpu)) | |
3847 | if (root_level == PT32E_ROOT_LEVEL) | 3932 | mmu->last_nonleaf_level++; |
3848 | --root_level; | ||
3849 | /* PT_PAGE_TABLE_LEVEL always terminates */ | ||
3850 | map = 1 | (1 << ps_set_index); | ||
3851 | for (level = PT_DIRECTORY_LEVEL; level <= root_level; ++level) { | ||
3852 | if (level <= PT_PDPE_LEVEL | ||
3853 | && (mmu->root_level >= PT32E_ROOT_LEVEL || is_pse(vcpu))) | ||
3854 | map |= 1 << (ps_set_index | (level - 1)); | ||
3855 | } | ||
3856 | mmu->last_pte_bitmap = map; | ||
3857 | } | 3933 | } |
3858 | 3934 | ||
3859 | static void paging64_init_context_common(struct kvm_vcpu *vcpu, | 3935 | static void paging64_init_context_common(struct kvm_vcpu *vcpu, |
@@ -3865,7 +3941,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu, | |||
3865 | 3941 | ||
3866 | reset_rsvds_bits_mask(vcpu, context); | 3942 | reset_rsvds_bits_mask(vcpu, context); |
3867 | update_permission_bitmask(vcpu, context, false); | 3943 | update_permission_bitmask(vcpu, context, false); |
3868 | update_last_pte_bitmap(vcpu, context); | 3944 | update_last_nonleaf_level(vcpu, context); |
3869 | 3945 | ||
3870 | MMU_WARN_ON(!is_pae(vcpu)); | 3946 | MMU_WARN_ON(!is_pae(vcpu)); |
3871 | context->page_fault = paging64_page_fault; | 3947 | context->page_fault = paging64_page_fault; |
@@ -3892,7 +3968,7 @@ static void paging32_init_context(struct kvm_vcpu *vcpu, | |||
3892 | 3968 | ||
3893 | reset_rsvds_bits_mask(vcpu, context); | 3969 | reset_rsvds_bits_mask(vcpu, context); |
3894 | update_permission_bitmask(vcpu, context, false); | 3970 | update_permission_bitmask(vcpu, context, false); |
3895 | update_last_pte_bitmap(vcpu, context); | 3971 | update_last_nonleaf_level(vcpu, context); |
3896 | 3972 | ||
3897 | context->page_fault = paging32_page_fault; | 3973 | context->page_fault = paging32_page_fault; |
3898 | context->gva_to_gpa = paging32_gva_to_gpa; | 3974 | context->gva_to_gpa = paging32_gva_to_gpa; |
@@ -3950,7 +4026,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
3950 | } | 4026 | } |
3951 | 4027 | ||
3952 | update_permission_bitmask(vcpu, context, false); | 4028 | update_permission_bitmask(vcpu, context, false); |
3953 | update_last_pte_bitmap(vcpu, context); | 4029 | update_last_nonleaf_level(vcpu, context); |
3954 | reset_tdp_shadow_zero_bits_mask(vcpu, context); | 4030 | reset_tdp_shadow_zero_bits_mask(vcpu, context); |
3955 | } | 4031 | } |
3956 | 4032 | ||
@@ -4056,7 +4132,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) | |||
4056 | } | 4132 | } |
4057 | 4133 | ||
4058 | update_permission_bitmask(vcpu, g_context, false); | 4134 | update_permission_bitmask(vcpu, g_context, false); |
4059 | update_last_pte_bitmap(vcpu, g_context); | 4135 | update_last_nonleaf_level(vcpu, g_context); |
4060 | } | 4136 | } |
4061 | 4137 | ||
4062 | static void init_kvm_mmu(struct kvm_vcpu *vcpu) | 4138 | static void init_kvm_mmu(struct kvm_vcpu *vcpu) |
@@ -4127,18 +4203,6 @@ static bool need_remote_flush(u64 old, u64 new) | |||
4127 | return (old & ~new & PT64_PERM_MASK) != 0; | 4203 | return (old & ~new & PT64_PERM_MASK) != 0; |
4128 | } | 4204 | } |
4129 | 4205 | ||
4130 | static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page, | ||
4131 | bool remote_flush, bool local_flush) | ||
4132 | { | ||
4133 | if (zap_page) | ||
4134 | return; | ||
4135 | |||
4136 | if (remote_flush) | ||
4137 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
4138 | else if (local_flush) | ||
4139 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | ||
4140 | } | ||
4141 | |||
4142 | static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, | 4206 | static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, |
4143 | const u8 *new, int *bytes) | 4207 | const u8 *new, int *bytes) |
4144 | { | 4208 | { |
@@ -4188,7 +4252,8 @@ static bool detect_write_flooding(struct kvm_mmu_page *sp) | |||
4188 | if (sp->role.level == PT_PAGE_TABLE_LEVEL) | 4252 | if (sp->role.level == PT_PAGE_TABLE_LEVEL) |
4189 | return false; | 4253 | return false; |
4190 | 4254 | ||
4191 | return ++sp->write_flooding_count >= 3; | 4255 | atomic_inc(&sp->write_flooding_count); |
4256 | return atomic_read(&sp->write_flooding_count) >= 3; | ||
4192 | } | 4257 | } |
4193 | 4258 | ||
4194 | /* | 4259 | /* |
@@ -4250,15 +4315,15 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte) | |||
4250 | return spte; | 4315 | return spte; |
4251 | } | 4316 | } |
4252 | 4317 | ||
4253 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 4318 | static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
4254 | const u8 *new, int bytes) | 4319 | const u8 *new, int bytes) |
4255 | { | 4320 | { |
4256 | gfn_t gfn = gpa >> PAGE_SHIFT; | 4321 | gfn_t gfn = gpa >> PAGE_SHIFT; |
4257 | struct kvm_mmu_page *sp; | 4322 | struct kvm_mmu_page *sp; |
4258 | LIST_HEAD(invalid_list); | 4323 | LIST_HEAD(invalid_list); |
4259 | u64 entry, gentry, *spte; | 4324 | u64 entry, gentry, *spte; |
4260 | int npte; | 4325 | int npte; |
4261 | bool remote_flush, local_flush, zap_page; | 4326 | bool remote_flush, local_flush; |
4262 | union kvm_mmu_page_role mask = { }; | 4327 | union kvm_mmu_page_role mask = { }; |
4263 | 4328 | ||
4264 | mask.cr0_wp = 1; | 4329 | mask.cr0_wp = 1; |
@@ -4275,7 +4340,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
4275 | if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) | 4340 | if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) |
4276 | return; | 4341 | return; |
4277 | 4342 | ||
4278 | zap_page = remote_flush = local_flush = false; | 4343 | remote_flush = local_flush = false; |
4279 | 4344 | ||
4280 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | 4345 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
4281 | 4346 | ||
@@ -4295,8 +4360,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
4295 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { | 4360 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { |
4296 | if (detect_write_misaligned(sp, gpa, bytes) || | 4361 | if (detect_write_misaligned(sp, gpa, bytes) || |
4297 | detect_write_flooding(sp)) { | 4362 | detect_write_flooding(sp)) { |
4298 | zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, | 4363 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); |
4299 | &invalid_list); | ||
4300 | ++vcpu->kvm->stat.mmu_flooded; | 4364 | ++vcpu->kvm->stat.mmu_flooded; |
4301 | continue; | 4365 | continue; |
4302 | } | 4366 | } |
@@ -4318,8 +4382,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
4318 | ++spte; | 4382 | ++spte; |
4319 | } | 4383 | } |
4320 | } | 4384 | } |
4321 | mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush); | 4385 | kvm_mmu_flush_or_zap(vcpu, &invalid_list, remote_flush, local_flush); |
4322 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
4323 | kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE); | 4386 | kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE); |
4324 | spin_unlock(&vcpu->kvm->mmu_lock); | 4387 | spin_unlock(&vcpu->kvm->mmu_lock); |
4325 | } | 4388 | } |
@@ -4356,32 +4419,34 @@ static void make_mmu_pages_available(struct kvm_vcpu *vcpu) | |||
4356 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 4419 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
4357 | } | 4420 | } |
4358 | 4421 | ||
4359 | static bool is_mmio_page_fault(struct kvm_vcpu *vcpu, gva_t addr) | ||
4360 | { | ||
4361 | if (vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu)) | ||
4362 | return vcpu_match_mmio_gpa(vcpu, addr); | ||
4363 | |||
4364 | return vcpu_match_mmio_gva(vcpu, addr); | ||
4365 | } | ||
4366 | |||
4367 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, | 4422 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, |
4368 | void *insn, int insn_len) | 4423 | void *insn, int insn_len) |
4369 | { | 4424 | { |
4370 | int r, emulation_type = EMULTYPE_RETRY; | 4425 | int r, emulation_type = EMULTYPE_RETRY; |
4371 | enum emulation_result er; | 4426 | enum emulation_result er; |
4427 | bool direct = vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu); | ||
4428 | |||
4429 | if (unlikely(error_code & PFERR_RSVD_MASK)) { | ||
4430 | r = handle_mmio_page_fault(vcpu, cr2, direct); | ||
4431 | if (r == RET_MMIO_PF_EMULATE) { | ||
4432 | emulation_type = 0; | ||
4433 | goto emulate; | ||
4434 | } | ||
4435 | if (r == RET_MMIO_PF_RETRY) | ||
4436 | return 1; | ||
4437 | if (r < 0) | ||
4438 | return r; | ||
4439 | } | ||
4372 | 4440 | ||
4373 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false); | 4441 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false); |
4374 | if (r < 0) | 4442 | if (r < 0) |
4375 | goto out; | 4443 | return r; |
4376 | 4444 | if (!r) | |
4377 | if (!r) { | 4445 | return 1; |
4378 | r = 1; | ||
4379 | goto out; | ||
4380 | } | ||
4381 | 4446 | ||
4382 | if (is_mmio_page_fault(vcpu, cr2)) | 4447 | if (mmio_info_in_cache(vcpu, cr2, direct)) |
4383 | emulation_type = 0; | 4448 | emulation_type = 0; |
4384 | 4449 | emulate: | |
4385 | er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); | 4450 | er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); |
4386 | 4451 | ||
4387 | switch (er) { | 4452 | switch (er) { |
@@ -4395,8 +4460,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, | |||
4395 | default: | 4460 | default: |
4396 | BUG(); | 4461 | BUG(); |
4397 | } | 4462 | } |
4398 | out: | ||
4399 | return r; | ||
4400 | } | 4463 | } |
4401 | EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); | 4464 | EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); |
4402 | 4465 | ||
@@ -4465,6 +4528,21 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu) | |||
4465 | init_kvm_mmu(vcpu); | 4528 | init_kvm_mmu(vcpu); |
4466 | } | 4529 | } |
4467 | 4530 | ||
4531 | void kvm_mmu_init_vm(struct kvm *kvm) | ||
4532 | { | ||
4533 | struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; | ||
4534 | |||
4535 | node->track_write = kvm_mmu_pte_write; | ||
4536 | kvm_page_track_register_notifier(kvm, node); | ||
4537 | } | ||
4538 | |||
4539 | void kvm_mmu_uninit_vm(struct kvm *kvm) | ||
4540 | { | ||
4541 | struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; | ||
4542 | |||
4543 | kvm_page_track_unregister_notifier(kvm, node); | ||
4544 | } | ||
4545 | |||
4468 | /* The return value indicates if tlb flush on all vcpus is needed. */ | 4546 | /* The return value indicates if tlb flush on all vcpus is needed. */ |
4469 | typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); | 4547 | typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); |
4470 | 4548 | ||
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 55ffb7b0f95e..58fe98a0a526 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -174,4 +174,9 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | |||
174 | 174 | ||
175 | void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); | 175 | void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); |
176 | void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end); | 176 | void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end); |
177 | |||
178 | void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); | ||
179 | void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); | ||
180 | bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, | ||
181 | struct kvm_memory_slot *slot, u64 gfn); | ||
177 | #endif | 182 | #endif |
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c new file mode 100644 index 000000000000..11f76436f74f --- /dev/null +++ b/arch/x86/kvm/page_track.c | |||
@@ -0,0 +1,222 @@ | |||
1 | /* | ||
2 | * Support KVM gust page tracking | ||
3 | * | ||
4 | * This feature allows us to track page access in guest. Currently, only | ||
5 | * write access is tracked. | ||
6 | * | ||
7 | * Copyright(C) 2015 Intel Corporation. | ||
8 | * | ||
9 | * Author: | ||
10 | * Xiao Guangrong <guangrong.xiao@linux.intel.com> | ||
11 | * | ||
12 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
13 | * the COPYING file in the top-level directory. | ||
14 | */ | ||
15 | |||
16 | #include <linux/kvm_host.h> | ||
17 | #include <asm/kvm_host.h> | ||
18 | #include <asm/kvm_page_track.h> | ||
19 | |||
20 | #include "mmu.h" | ||
21 | |||
22 | void kvm_page_track_free_memslot(struct kvm_memory_slot *free, | ||
23 | struct kvm_memory_slot *dont) | ||
24 | { | ||
25 | int i; | ||
26 | |||
27 | for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) | ||
28 | if (!dont || free->arch.gfn_track[i] != | ||
29 | dont->arch.gfn_track[i]) { | ||
30 | kvfree(free->arch.gfn_track[i]); | ||
31 | free->arch.gfn_track[i] = NULL; | ||
32 | } | ||
33 | } | ||
34 | |||
35 | int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, | ||
36 | unsigned long npages) | ||
37 | { | ||
38 | int i; | ||
39 | |||
40 | for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { | ||
41 | slot->arch.gfn_track[i] = kvm_kvzalloc(npages * | ||
42 | sizeof(*slot->arch.gfn_track[i])); | ||
43 | if (!slot->arch.gfn_track[i]) | ||
44 | goto track_free; | ||
45 | } | ||
46 | |||
47 | return 0; | ||
48 | |||
49 | track_free: | ||
50 | kvm_page_track_free_memslot(slot, NULL); | ||
51 | return -ENOMEM; | ||
52 | } | ||
53 | |||
54 | static inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode) | ||
55 | { | ||
56 | if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX) | ||
57 | return false; | ||
58 | |||
59 | return true; | ||
60 | } | ||
61 | |||
62 | static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn, | ||
63 | enum kvm_page_track_mode mode, short count) | ||
64 | { | ||
65 | int index, val; | ||
66 | |||
67 | index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL); | ||
68 | |||
69 | val = slot->arch.gfn_track[mode][index]; | ||
70 | |||
71 | if (WARN_ON(val + count < 0 || val + count > USHRT_MAX)) | ||
72 | return; | ||
73 | |||
74 | slot->arch.gfn_track[mode][index] += count; | ||
75 | } | ||
76 | |||
77 | /* | ||
78 | * add guest page to the tracking pool so that corresponding access on that | ||
79 | * page will be intercepted. | ||
80 | * | ||
81 | * It should be called under the protection both of mmu-lock and kvm->srcu | ||
82 | * or kvm->slots_lock. | ||
83 | * | ||
84 | * @kvm: the guest instance we are interested in. | ||
85 | * @slot: the @gfn belongs to. | ||
86 | * @gfn: the guest page. | ||
87 | * @mode: tracking mode, currently only write track is supported. | ||
88 | */ | ||
89 | void kvm_slot_page_track_add_page(struct kvm *kvm, | ||
90 | struct kvm_memory_slot *slot, gfn_t gfn, | ||
91 | enum kvm_page_track_mode mode) | ||
92 | { | ||
93 | |||
94 | if (WARN_ON(!page_track_mode_is_valid(mode))) | ||
95 | return; | ||
96 | |||
97 | update_gfn_track(slot, gfn, mode, 1); | ||
98 | |||
99 | /* | ||
100 | * new track stops large page mapping for the | ||
101 | * tracked page. | ||
102 | */ | ||
103 | kvm_mmu_gfn_disallow_lpage(slot, gfn); | ||
104 | |||
105 | if (mode == KVM_PAGE_TRACK_WRITE) | ||
106 | if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn)) | ||
107 | kvm_flush_remote_tlbs(kvm); | ||
108 | } | ||
109 | |||
110 | /* | ||
111 | * remove the guest page from the tracking pool which stops the interception | ||
112 | * of corresponding access on that page. It is the opposed operation of | ||
113 | * kvm_slot_page_track_add_page(). | ||
114 | * | ||
115 | * It should be called under the protection both of mmu-lock and kvm->srcu | ||
116 | * or kvm->slots_lock. | ||
117 | * | ||
118 | * @kvm: the guest instance we are interested in. | ||
119 | * @slot: the @gfn belongs to. | ||
120 | * @gfn: the guest page. | ||
121 | * @mode: tracking mode, currently only write track is supported. | ||
122 | */ | ||
123 | void kvm_slot_page_track_remove_page(struct kvm *kvm, | ||
124 | struct kvm_memory_slot *slot, gfn_t gfn, | ||
125 | enum kvm_page_track_mode mode) | ||
126 | { | ||
127 | if (WARN_ON(!page_track_mode_is_valid(mode))) | ||
128 | return; | ||
129 | |||
130 | update_gfn_track(slot, gfn, mode, -1); | ||
131 | |||
132 | /* | ||
133 | * allow large page mapping for the tracked page | ||
134 | * after the tracker is gone. | ||
135 | */ | ||
136 | kvm_mmu_gfn_allow_lpage(slot, gfn); | ||
137 | } | ||
138 | |||
139 | /* | ||
140 | * check if the corresponding access on the specified guest page is tracked. | ||
141 | */ | ||
142 | bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn, | ||
143 | enum kvm_page_track_mode mode) | ||
144 | { | ||
145 | struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); | ||
146 | int index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL); | ||
147 | |||
148 | if (WARN_ON(!page_track_mode_is_valid(mode))) | ||
149 | return false; | ||
150 | |||
151 | return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]); | ||
152 | } | ||
153 | |||
154 | void kvm_page_track_init(struct kvm *kvm) | ||
155 | { | ||
156 | struct kvm_page_track_notifier_head *head; | ||
157 | |||
158 | head = &kvm->arch.track_notifier_head; | ||
159 | init_srcu_struct(&head->track_srcu); | ||
160 | INIT_HLIST_HEAD(&head->track_notifier_list); | ||
161 | } | ||
162 | |||
163 | /* | ||
164 | * register the notifier so that event interception for the tracked guest | ||
165 | * pages can be received. | ||
166 | */ | ||
167 | void | ||
168 | kvm_page_track_register_notifier(struct kvm *kvm, | ||
169 | struct kvm_page_track_notifier_node *n) | ||
170 | { | ||
171 | struct kvm_page_track_notifier_head *head; | ||
172 | |||
173 | head = &kvm->arch.track_notifier_head; | ||
174 | |||
175 | spin_lock(&kvm->mmu_lock); | ||
176 | hlist_add_head_rcu(&n->node, &head->track_notifier_list); | ||
177 | spin_unlock(&kvm->mmu_lock); | ||
178 | } | ||
179 | |||
180 | /* | ||
181 | * stop receiving the event interception. It is the opposed operation of | ||
182 | * kvm_page_track_register_notifier(). | ||
183 | */ | ||
184 | void | ||
185 | kvm_page_track_unregister_notifier(struct kvm *kvm, | ||
186 | struct kvm_page_track_notifier_node *n) | ||
187 | { | ||
188 | struct kvm_page_track_notifier_head *head; | ||
189 | |||
190 | head = &kvm->arch.track_notifier_head; | ||
191 | |||
192 | spin_lock(&kvm->mmu_lock); | ||
193 | hlist_del_rcu(&n->node); | ||
194 | spin_unlock(&kvm->mmu_lock); | ||
195 | synchronize_srcu(&head->track_srcu); | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | * Notify the node that write access is intercepted and write emulation is | ||
200 | * finished at this time. | ||
201 | * | ||
202 | * The node should figure out if the written page is the one that node is | ||
203 | * interested in by itself. | ||
204 | */ | ||
205 | void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, | ||
206 | int bytes) | ||
207 | { | ||
208 | struct kvm_page_track_notifier_head *head; | ||
209 | struct kvm_page_track_notifier_node *n; | ||
210 | int idx; | ||
211 | |||
212 | head = &vcpu->kvm->arch.track_notifier_head; | ||
213 | |||
214 | if (hlist_empty(&head->track_notifier_list)) | ||
215 | return; | ||
216 | |||
217 | idx = srcu_read_lock(&head->track_srcu); | ||
218 | hlist_for_each_entry_rcu(n, &head->track_notifier_list, node) | ||
219 | if (n->track_write) | ||
220 | n->track_write(vcpu, gpa, new, bytes); | ||
221 | srcu_read_unlock(&head->track_srcu, idx); | ||
222 | } | ||
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 2ce4f05e81d3..e159a8185ad9 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -189,8 +189,11 @@ static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte) | |||
189 | ((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) | | 189 | ((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) | |
190 | ACC_USER_MASK; | 190 | ACC_USER_MASK; |
191 | #else | 191 | #else |
192 | access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; | 192 | BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK); |
193 | access &= ~(gpte >> PT64_NX_SHIFT); | 193 | BUILD_BUG_ON(ACC_EXEC_MASK != 1); |
194 | access = gpte & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK); | ||
195 | /* Combine NX with P (which is set here) to get ACC_EXEC_MASK. */ | ||
196 | access ^= (gpte >> PT64_NX_SHIFT); | ||
194 | #endif | 197 | #endif |
195 | 198 | ||
196 | return access; | 199 | return access; |
@@ -702,24 +705,17 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
702 | 705 | ||
703 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); | 706 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); |
704 | 707 | ||
705 | if (unlikely(error_code & PFERR_RSVD_MASK)) { | ||
706 | r = handle_mmio_page_fault(vcpu, addr, mmu_is_nested(vcpu)); | ||
707 | if (likely(r != RET_MMIO_PF_INVALID)) | ||
708 | return r; | ||
709 | |||
710 | /* | ||
711 | * page fault with PFEC.RSVD = 1 is caused by shadow | ||
712 | * page fault, should not be used to walk guest page | ||
713 | * table. | ||
714 | */ | ||
715 | error_code &= ~PFERR_RSVD_MASK; | ||
716 | }; | ||
717 | |||
718 | r = mmu_topup_memory_caches(vcpu); | 708 | r = mmu_topup_memory_caches(vcpu); |
719 | if (r) | 709 | if (r) |
720 | return r; | 710 | return r; |
721 | 711 | ||
722 | /* | 712 | /* |
713 | * If PFEC.RSVD is set, this is a shadow page fault. | ||
714 | * The bit needs to be cleared before walking guest page tables. | ||
715 | */ | ||
716 | error_code &= ~PFERR_RSVD_MASK; | ||
717 | |||
718 | /* | ||
723 | * Look up the guest pte for the faulting address. | 719 | * Look up the guest pte for the faulting address. |
724 | */ | 720 | */ |
725 | r = FNAME(walk_addr)(&walker, vcpu, addr, error_code); | 721 | r = FNAME(walk_addr)(&walker, vcpu, addr, error_code); |
@@ -735,6 +731,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
735 | return 0; | 731 | return 0; |
736 | } | 732 | } |
737 | 733 | ||
734 | if (page_fault_handle_page_track(vcpu, error_code, walker.gfn)) { | ||
735 | shadow_page_table_clear_flood(vcpu, addr); | ||
736 | return 1; | ||
737 | } | ||
738 | |||
738 | vcpu->arch.write_fault_to_shadow_pgtable = false; | 739 | vcpu->arch.write_fault_to_shadow_pgtable = false; |
739 | 740 | ||
740 | is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu, | 741 | is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu, |
@@ -945,7 +946,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
945 | 946 | ||
946 | if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte, | 947 | if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte, |
947 | sizeof(pt_element_t))) | 948 | sizeof(pt_element_t))) |
948 | return -EINVAL; | 949 | return 0; |
949 | 950 | ||
950 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { | 951 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { |
951 | vcpu->kvm->tlbs_dirty++; | 952 | vcpu->kvm->tlbs_dirty++; |
@@ -977,7 +978,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
977 | host_writable); | 978 | host_writable); |
978 | } | 979 | } |
979 | 980 | ||
980 | return !nr_present; | 981 | return nr_present; |
981 | } | 982 | } |
982 | 983 | ||
983 | #undef pt_element_t | 984 | #undef pt_element_t |
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 31aa2c85dc97..06ce377dcbc9 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
@@ -257,7 +257,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) | |||
257 | 257 | ||
258 | void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu) | 258 | void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu) |
259 | { | 259 | { |
260 | if (vcpu->arch.apic) | 260 | if (lapic_in_kernel(vcpu)) |
261 | kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC); | 261 | kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC); |
262 | } | 262 | } |
263 | 263 | ||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c13a64b7d789..95070386d599 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1858,8 +1858,7 @@ static int halt_interception(struct vcpu_svm *svm) | |||
1858 | static int vmmcall_interception(struct vcpu_svm *svm) | 1858 | static int vmmcall_interception(struct vcpu_svm *svm) |
1859 | { | 1859 | { |
1860 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 1860 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
1861 | kvm_emulate_hypercall(&svm->vcpu); | 1861 | return kvm_emulate_hypercall(&svm->vcpu); |
1862 | return 1; | ||
1863 | } | 1862 | } |
1864 | 1863 | ||
1865 | static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) | 1864 | static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index ad9f6a23f139..2f1ea2f61e1f 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -996,11 +996,13 @@ TRACE_EVENT(kvm_enter_smm, | |||
996 | * Tracepoint for VT-d posted-interrupts. | 996 | * Tracepoint for VT-d posted-interrupts. |
997 | */ | 997 | */ |
998 | TRACE_EVENT(kvm_pi_irte_update, | 998 | TRACE_EVENT(kvm_pi_irte_update, |
999 | TP_PROTO(unsigned int vcpu_id, unsigned int gsi, | 999 | TP_PROTO(unsigned int host_irq, unsigned int vcpu_id, |
1000 | unsigned int gvec, u64 pi_desc_addr, bool set), | 1000 | unsigned int gsi, unsigned int gvec, |
1001 | TP_ARGS(vcpu_id, gsi, gvec, pi_desc_addr, set), | 1001 | u64 pi_desc_addr, bool set), |
1002 | TP_ARGS(host_irq, vcpu_id, gsi, gvec, pi_desc_addr, set), | ||
1002 | 1003 | ||
1003 | TP_STRUCT__entry( | 1004 | TP_STRUCT__entry( |
1005 | __field( unsigned int, host_irq ) | ||
1004 | __field( unsigned int, vcpu_id ) | 1006 | __field( unsigned int, vcpu_id ) |
1005 | __field( unsigned int, gsi ) | 1007 | __field( unsigned int, gsi ) |
1006 | __field( unsigned int, gvec ) | 1008 | __field( unsigned int, gvec ) |
@@ -1009,6 +1011,7 @@ TRACE_EVENT(kvm_pi_irte_update, | |||
1009 | ), | 1011 | ), |
1010 | 1012 | ||
1011 | TP_fast_assign( | 1013 | TP_fast_assign( |
1014 | __entry->host_irq = host_irq; | ||
1012 | __entry->vcpu_id = vcpu_id; | 1015 | __entry->vcpu_id = vcpu_id; |
1013 | __entry->gsi = gsi; | 1016 | __entry->gsi = gsi; |
1014 | __entry->gvec = gvec; | 1017 | __entry->gvec = gvec; |
@@ -1016,9 +1019,10 @@ TRACE_EVENT(kvm_pi_irte_update, | |||
1016 | __entry->set = set; | 1019 | __entry->set = set; |
1017 | ), | 1020 | ), |
1018 | 1021 | ||
1019 | TP_printk("VT-d PI is %s for this irq, vcpu %u, gsi: 0x%x, " | 1022 | TP_printk("VT-d PI is %s for irq %u, vcpu %u, gsi: 0x%x, " |
1020 | "gvec: 0x%x, pi_desc_addr: 0x%llx", | 1023 | "gvec: 0x%x, pi_desc_addr: 0x%llx", |
1021 | __entry->set ? "enabled and being updated" : "disabled", | 1024 | __entry->set ? "enabled and being updated" : "disabled", |
1025 | __entry->host_irq, | ||
1022 | __entry->vcpu_id, | 1026 | __entry->vcpu_id, |
1023 | __entry->gsi, | 1027 | __entry->gsi, |
1024 | __entry->gvec, | 1028 | __entry->gvec, |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9bd8f44baded..5e45c2731a5d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -863,7 +863,6 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); | |||
863 | static u64 construct_eptp(unsigned long root_hpa); | 863 | static u64 construct_eptp(unsigned long root_hpa); |
864 | static void kvm_cpu_vmxon(u64 addr); | 864 | static void kvm_cpu_vmxon(u64 addr); |
865 | static void kvm_cpu_vmxoff(void); | 865 | static void kvm_cpu_vmxoff(void); |
866 | static bool vmx_mpx_supported(void); | ||
867 | static bool vmx_xsaves_supported(void); | 866 | static bool vmx_xsaves_supported(void); |
868 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); | 867 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); |
869 | static void vmx_set_segment(struct kvm_vcpu *vcpu, | 868 | static void vmx_set_segment(struct kvm_vcpu *vcpu, |
@@ -963,25 +962,36 @@ static const u32 vmx_msr_index[] = { | |||
963 | MSR_EFER, MSR_TSC_AUX, MSR_STAR, | 962 | MSR_EFER, MSR_TSC_AUX, MSR_STAR, |
964 | }; | 963 | }; |
965 | 964 | ||
966 | static inline bool is_page_fault(u32 intr_info) | 965 | static inline bool is_exception_n(u32 intr_info, u8 vector) |
967 | { | 966 | { |
968 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 967 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
969 | INTR_INFO_VALID_MASK)) == | 968 | INTR_INFO_VALID_MASK)) == |
970 | (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); | 969 | (INTR_TYPE_HARD_EXCEPTION | vector | INTR_INFO_VALID_MASK); |
970 | } | ||
971 | |||
972 | static inline bool is_debug(u32 intr_info) | ||
973 | { | ||
974 | return is_exception_n(intr_info, DB_VECTOR); | ||
975 | } | ||
976 | |||
977 | static inline bool is_breakpoint(u32 intr_info) | ||
978 | { | ||
979 | return is_exception_n(intr_info, BP_VECTOR); | ||
980 | } | ||
981 | |||
982 | static inline bool is_page_fault(u32 intr_info) | ||
983 | { | ||
984 | return is_exception_n(intr_info, PF_VECTOR); | ||
971 | } | 985 | } |
972 | 986 | ||
973 | static inline bool is_no_device(u32 intr_info) | 987 | static inline bool is_no_device(u32 intr_info) |
974 | { | 988 | { |
975 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 989 | return is_exception_n(intr_info, NM_VECTOR); |
976 | INTR_INFO_VALID_MASK)) == | ||
977 | (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); | ||
978 | } | 990 | } |
979 | 991 | ||
980 | static inline bool is_invalid_opcode(u32 intr_info) | 992 | static inline bool is_invalid_opcode(u32 intr_info) |
981 | { | 993 | { |
982 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 994 | return is_exception_n(intr_info, UD_VECTOR); |
983 | INTR_INFO_VALID_MASK)) == | ||
984 | (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); | ||
985 | } | 995 | } |
986 | 996 | ||
987 | static inline bool is_external_interrupt(u32 intr_info) | 997 | static inline bool is_external_interrupt(u32 intr_info) |
@@ -2605,7 +2615,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
2605 | VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | | 2615 | VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | |
2606 | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; | 2616 | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; |
2607 | 2617 | ||
2608 | if (vmx_mpx_supported()) | 2618 | if (kvm_mpx_supported()) |
2609 | vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; | 2619 | vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; |
2610 | 2620 | ||
2611 | /* We support free control of debug control saving. */ | 2621 | /* We support free control of debug control saving. */ |
@@ -2626,7 +2636,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
2626 | VM_ENTRY_LOAD_IA32_PAT; | 2636 | VM_ENTRY_LOAD_IA32_PAT; |
2627 | vmx->nested.nested_vmx_entry_ctls_high |= | 2637 | vmx->nested.nested_vmx_entry_ctls_high |= |
2628 | (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); | 2638 | (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); |
2629 | if (vmx_mpx_supported()) | 2639 | if (kvm_mpx_supported()) |
2630 | vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; | 2640 | vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; |
2631 | 2641 | ||
2632 | /* We support free control of debug control loading. */ | 2642 | /* We support free control of debug control loading. */ |
@@ -2870,7 +2880,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2870 | msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); | 2880 | msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); |
2871 | break; | 2881 | break; |
2872 | case MSR_IA32_BNDCFGS: | 2882 | case MSR_IA32_BNDCFGS: |
2873 | if (!vmx_mpx_supported()) | 2883 | if (!kvm_mpx_supported()) |
2874 | return 1; | 2884 | return 1; |
2875 | msr_info->data = vmcs_read64(GUEST_BNDCFGS); | 2885 | msr_info->data = vmcs_read64(GUEST_BNDCFGS); |
2876 | break; | 2886 | break; |
@@ -2947,7 +2957,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2947 | vmcs_writel(GUEST_SYSENTER_ESP, data); | 2957 | vmcs_writel(GUEST_SYSENTER_ESP, data); |
2948 | break; | 2958 | break; |
2949 | case MSR_IA32_BNDCFGS: | 2959 | case MSR_IA32_BNDCFGS: |
2950 | if (!vmx_mpx_supported()) | 2960 | if (!kvm_mpx_supported()) |
2951 | return 1; | 2961 | return 1; |
2952 | vmcs_write64(GUEST_BNDCFGS, data); | 2962 | vmcs_write64(GUEST_BNDCFGS, data); |
2953 | break; | 2963 | break; |
@@ -3420,7 +3430,7 @@ static void init_vmcs_shadow_fields(void) | |||
3420 | for (i = j = 0; i < max_shadow_read_write_fields; i++) { | 3430 | for (i = j = 0; i < max_shadow_read_write_fields; i++) { |
3421 | switch (shadow_read_write_fields[i]) { | 3431 | switch (shadow_read_write_fields[i]) { |
3422 | case GUEST_BNDCFGS: | 3432 | case GUEST_BNDCFGS: |
3423 | if (!vmx_mpx_supported()) | 3433 | if (!kvm_mpx_supported()) |
3424 | continue; | 3434 | continue; |
3425 | break; | 3435 | break; |
3426 | default: | 3436 | default: |
@@ -5629,11 +5639,8 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
5629 | } | 5639 | } |
5630 | 5640 | ||
5631 | if (vcpu->guest_debug == 0) { | 5641 | if (vcpu->guest_debug == 0) { |
5632 | u32 cpu_based_vm_exec_control; | 5642 | vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, |
5633 | 5643 | CPU_BASED_MOV_DR_EXITING); | |
5634 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
5635 | cpu_based_vm_exec_control &= ~CPU_BASED_MOV_DR_EXITING; | ||
5636 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
5637 | 5644 | ||
5638 | /* | 5645 | /* |
5639 | * No more DR vmexits; force a reload of the debug registers | 5646 | * No more DR vmexits; force a reload of the debug registers |
@@ -5670,8 +5677,6 @@ static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) | |||
5670 | 5677 | ||
5671 | static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) | 5678 | static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) |
5672 | { | 5679 | { |
5673 | u32 cpu_based_vm_exec_control; | ||
5674 | |||
5675 | get_debugreg(vcpu->arch.db[0], 0); | 5680 | get_debugreg(vcpu->arch.db[0], 0); |
5676 | get_debugreg(vcpu->arch.db[1], 1); | 5681 | get_debugreg(vcpu->arch.db[1], 1); |
5677 | get_debugreg(vcpu->arch.db[2], 2); | 5682 | get_debugreg(vcpu->arch.db[2], 2); |
@@ -5680,10 +5685,7 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) | |||
5680 | vcpu->arch.dr7 = vmcs_readl(GUEST_DR7); | 5685 | vcpu->arch.dr7 = vmcs_readl(GUEST_DR7); |
5681 | 5686 | ||
5682 | vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; | 5687 | vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; |
5683 | 5688 | vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_MOV_DR_EXITING); | |
5684 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
5685 | cpu_based_vm_exec_control |= CPU_BASED_MOV_DR_EXITING; | ||
5686 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
5687 | } | 5689 | } |
5688 | 5690 | ||
5689 | static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) | 5691 | static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) |
@@ -5768,8 +5770,7 @@ static int handle_halt(struct kvm_vcpu *vcpu) | |||
5768 | 5770 | ||
5769 | static int handle_vmcall(struct kvm_vcpu *vcpu) | 5771 | static int handle_vmcall(struct kvm_vcpu *vcpu) |
5770 | { | 5772 | { |
5771 | kvm_emulate_hypercall(vcpu); | 5773 | return kvm_emulate_hypercall(vcpu); |
5772 | return 1; | ||
5773 | } | 5774 | } |
5774 | 5775 | ||
5775 | static int handle_invd(struct kvm_vcpu *vcpu) | 5776 | static int handle_invd(struct kvm_vcpu *vcpu) |
@@ -6456,8 +6457,8 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) | |||
6456 | 6457 | ||
6457 | if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { | 6458 | if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { |
6458 | /* Recycle the least recently used VMCS. */ | 6459 | /* Recycle the least recently used VMCS. */ |
6459 | item = list_entry(vmx->nested.vmcs02_pool.prev, | 6460 | item = list_last_entry(&vmx->nested.vmcs02_pool, |
6460 | struct vmcs02_list, list); | 6461 | struct vmcs02_list, list); |
6461 | item->vmptr = vmx->nested.current_vmptr; | 6462 | item->vmptr = vmx->nested.current_vmptr; |
6462 | list_move(&item->list, &vmx->nested.vmcs02_pool); | 6463 | list_move(&item->list, &vmx->nested.vmcs02_pool); |
6463 | return &item->vmcs02; | 6464 | return &item->vmcs02; |
@@ -7773,6 +7774,13 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
7773 | else if (is_no_device(intr_info) && | 7774 | else if (is_no_device(intr_info) && |
7774 | !(vmcs12->guest_cr0 & X86_CR0_TS)) | 7775 | !(vmcs12->guest_cr0 & X86_CR0_TS)) |
7775 | return false; | 7776 | return false; |
7777 | else if (is_debug(intr_info) && | ||
7778 | vcpu->guest_debug & | ||
7779 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) | ||
7780 | return false; | ||
7781 | else if (is_breakpoint(intr_info) && | ||
7782 | vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) | ||
7783 | return false; | ||
7776 | return vmcs12->exception_bitmap & | 7784 | return vmcs12->exception_bitmap & |
7777 | (1u << (intr_info & INTR_INFO_VECTOR_MASK)); | 7785 | (1u << (intr_info & INTR_INFO_VECTOR_MASK)); |
7778 | case EXIT_REASON_EXTERNAL_INTERRUPT: | 7786 | case EXIT_REASON_EXTERNAL_INTERRUPT: |
@@ -10277,7 +10285,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
10277 | vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); | 10285 | vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); |
10278 | vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); | 10286 | vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); |
10279 | vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); | 10287 | vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); |
10280 | if (vmx_mpx_supported()) | 10288 | if (kvm_mpx_supported()) |
10281 | vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); | 10289 | vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); |
10282 | if (nested_cpu_has_xsaves(vmcs12)) | 10290 | if (nested_cpu_has_xsaves(vmcs12)) |
10283 | vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP); | 10291 | vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP); |
@@ -10785,13 +10793,26 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, | |||
10785 | */ | 10793 | */ |
10786 | 10794 | ||
10787 | kvm_set_msi_irq(e, &irq); | 10795 | kvm_set_msi_irq(e, &irq); |
10788 | if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) | 10796 | if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) { |
10797 | /* | ||
10798 | * Make sure the IRTE is in remapped mode if | ||
10799 | * we don't handle it in posted mode. | ||
10800 | */ | ||
10801 | ret = irq_set_vcpu_affinity(host_irq, NULL); | ||
10802 | if (ret < 0) { | ||
10803 | printk(KERN_INFO | ||
10804 | "failed to back to remapped mode, irq: %u\n", | ||
10805 | host_irq); | ||
10806 | goto out; | ||
10807 | } | ||
10808 | |||
10789 | continue; | 10809 | continue; |
10810 | } | ||
10790 | 10811 | ||
10791 | vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); | 10812 | vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); |
10792 | vcpu_info.vector = irq.vector; | 10813 | vcpu_info.vector = irq.vector; |
10793 | 10814 | ||
10794 | trace_kvm_pi_irte_update(vcpu->vcpu_id, e->gsi, | 10815 | trace_kvm_pi_irte_update(vcpu->vcpu_id, host_irq, e->gsi, |
10795 | vcpu_info.vector, vcpu_info.pi_desc_addr, set); | 10816 | vcpu_info.vector, vcpu_info.pi_desc_addr, set); |
10796 | 10817 | ||
10797 | if (set) | 10818 | if (set) |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eaf6ee8c28b8..7236bd3a4c3d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -123,6 +123,9 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); | |||
123 | unsigned int __read_mostly lapic_timer_advance_ns = 0; | 123 | unsigned int __read_mostly lapic_timer_advance_ns = 0; |
124 | module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); | 124 | module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); |
125 | 125 | ||
126 | static bool __read_mostly vector_hashing = true; | ||
127 | module_param(vector_hashing, bool, S_IRUGO); | ||
128 | |||
126 | static bool __read_mostly backwards_tsc_observed = false; | 129 | static bool __read_mostly backwards_tsc_observed = false; |
127 | 130 | ||
128 | #define KVM_NR_SHARED_MSRS 16 | 131 | #define KVM_NR_SHARED_MSRS 16 |
@@ -1196,17 +1199,11 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | |||
1196 | 1199 | ||
1197 | static uint32_t div_frac(uint32_t dividend, uint32_t divisor) | 1200 | static uint32_t div_frac(uint32_t dividend, uint32_t divisor) |
1198 | { | 1201 | { |
1199 | uint32_t quotient, remainder; | 1202 | do_shl32_div32(dividend, divisor); |
1200 | 1203 | return dividend; | |
1201 | /* Don't try to replace with do_div(), this one calculates | ||
1202 | * "(dividend << 32) / divisor" */ | ||
1203 | __asm__ ( "divl %4" | ||
1204 | : "=a" (quotient), "=d" (remainder) | ||
1205 | : "0" (0), "1" (dividend), "r" (divisor) ); | ||
1206 | return quotient; | ||
1207 | } | 1204 | } |
1208 | 1205 | ||
1209 | static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz, | 1206 | static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz, |
1210 | s8 *pshift, u32 *pmultiplier) | 1207 | s8 *pshift, u32 *pmultiplier) |
1211 | { | 1208 | { |
1212 | uint64_t scaled64; | 1209 | uint64_t scaled64; |
@@ -1214,8 +1211,8 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz, | |||
1214 | uint64_t tps64; | 1211 | uint64_t tps64; |
1215 | uint32_t tps32; | 1212 | uint32_t tps32; |
1216 | 1213 | ||
1217 | tps64 = base_khz * 1000LL; | 1214 | tps64 = base_hz; |
1218 | scaled64 = scaled_khz * 1000LL; | 1215 | scaled64 = scaled_hz; |
1219 | while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) { | 1216 | while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) { |
1220 | tps64 >>= 1; | 1217 | tps64 >>= 1; |
1221 | shift--; | 1218 | shift--; |
@@ -1233,8 +1230,8 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz, | |||
1233 | *pshift = shift; | 1230 | *pshift = shift; |
1234 | *pmultiplier = div_frac(scaled64, tps32); | 1231 | *pmultiplier = div_frac(scaled64, tps32); |
1235 | 1232 | ||
1236 | pr_debug("%s: base_khz %u => %u, shift %d, mul %u\n", | 1233 | pr_debug("%s: base_hz %llu => %llu, shift %d, mul %u\n", |
1237 | __func__, base_khz, scaled_khz, shift, *pmultiplier); | 1234 | __func__, base_hz, scaled_hz, shift, *pmultiplier); |
1238 | } | 1235 | } |
1239 | 1236 | ||
1240 | #ifdef CONFIG_X86_64 | 1237 | #ifdef CONFIG_X86_64 |
@@ -1293,23 +1290,23 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) | |||
1293 | return 0; | 1290 | return 0; |
1294 | } | 1291 | } |
1295 | 1292 | ||
1296 | static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) | 1293 | static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) |
1297 | { | 1294 | { |
1298 | u32 thresh_lo, thresh_hi; | 1295 | u32 thresh_lo, thresh_hi; |
1299 | int use_scaling = 0; | 1296 | int use_scaling = 0; |
1300 | 1297 | ||
1301 | /* tsc_khz can be zero if TSC calibration fails */ | 1298 | /* tsc_khz can be zero if TSC calibration fails */ |
1302 | if (this_tsc_khz == 0) { | 1299 | if (user_tsc_khz == 0) { |
1303 | /* set tsc_scaling_ratio to a safe value */ | 1300 | /* set tsc_scaling_ratio to a safe value */ |
1304 | vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio; | 1301 | vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio; |
1305 | return -1; | 1302 | return -1; |
1306 | } | 1303 | } |
1307 | 1304 | ||
1308 | /* Compute a scale to convert nanoseconds in TSC cycles */ | 1305 | /* Compute a scale to convert nanoseconds in TSC cycles */ |
1309 | kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, | 1306 | kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC, |
1310 | &vcpu->arch.virtual_tsc_shift, | 1307 | &vcpu->arch.virtual_tsc_shift, |
1311 | &vcpu->arch.virtual_tsc_mult); | 1308 | &vcpu->arch.virtual_tsc_mult); |
1312 | vcpu->arch.virtual_tsc_khz = this_tsc_khz; | 1309 | vcpu->arch.virtual_tsc_khz = user_tsc_khz; |
1313 | 1310 | ||
1314 | /* | 1311 | /* |
1315 | * Compute the variation in TSC rate which is acceptable | 1312 | * Compute the variation in TSC rate which is acceptable |
@@ -1319,11 +1316,11 @@ static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) | |||
1319 | */ | 1316 | */ |
1320 | thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm); | 1317 | thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm); |
1321 | thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm); | 1318 | thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm); |
1322 | if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) { | 1319 | if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) { |
1323 | pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi); | 1320 | pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi); |
1324 | use_scaling = 1; | 1321 | use_scaling = 1; |
1325 | } | 1322 | } |
1326 | return set_tsc_khz(vcpu, this_tsc_khz, use_scaling); | 1323 | return set_tsc_khz(vcpu, user_tsc_khz, use_scaling); |
1327 | } | 1324 | } |
1328 | 1325 | ||
1329 | static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) | 1326 | static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) |
@@ -1716,7 +1713,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) | |||
1716 | 1713 | ||
1717 | static int kvm_guest_time_update(struct kvm_vcpu *v) | 1714 | static int kvm_guest_time_update(struct kvm_vcpu *v) |
1718 | { | 1715 | { |
1719 | unsigned long flags, this_tsc_khz, tgt_tsc_khz; | 1716 | unsigned long flags, tgt_tsc_khz; |
1720 | struct kvm_vcpu_arch *vcpu = &v->arch; | 1717 | struct kvm_vcpu_arch *vcpu = &v->arch; |
1721 | struct kvm_arch *ka = &v->kvm->arch; | 1718 | struct kvm_arch *ka = &v->kvm->arch; |
1722 | s64 kernel_ns; | 1719 | s64 kernel_ns; |
@@ -1742,8 +1739,8 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1742 | 1739 | ||
1743 | /* Keep irq disabled to prevent changes to the clock */ | 1740 | /* Keep irq disabled to prevent changes to the clock */ |
1744 | local_irq_save(flags); | 1741 | local_irq_save(flags); |
1745 | this_tsc_khz = __this_cpu_read(cpu_tsc_khz); | 1742 | tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz); |
1746 | if (unlikely(this_tsc_khz == 0)) { | 1743 | if (unlikely(tgt_tsc_khz == 0)) { |
1747 | local_irq_restore(flags); | 1744 | local_irq_restore(flags); |
1748 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); | 1745 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); |
1749 | return 1; | 1746 | return 1; |
@@ -1778,13 +1775,14 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1778 | if (!vcpu->pv_time_enabled) | 1775 | if (!vcpu->pv_time_enabled) |
1779 | return 0; | 1776 | return 0; |
1780 | 1777 | ||
1781 | if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { | 1778 | if (kvm_has_tsc_control) |
1782 | tgt_tsc_khz = kvm_has_tsc_control ? | 1779 | tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz); |
1783 | vcpu->virtual_tsc_khz : this_tsc_khz; | 1780 | |
1784 | kvm_get_time_scale(NSEC_PER_SEC / 1000, tgt_tsc_khz, | 1781 | if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) { |
1782 | kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL, | ||
1785 | &vcpu->hv_clock.tsc_shift, | 1783 | &vcpu->hv_clock.tsc_shift, |
1786 | &vcpu->hv_clock.tsc_to_system_mul); | 1784 | &vcpu->hv_clock.tsc_to_system_mul); |
1787 | vcpu->hw_tsc_khz = this_tsc_khz; | 1785 | vcpu->hw_tsc_khz = tgt_tsc_khz; |
1788 | } | 1786 | } |
1789 | 1787 | ||
1790 | /* With all the info we got, fill in the values */ | 1788 | /* With all the info we got, fill in the values */ |
@@ -2987,7 +2985,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2987 | kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); | 2985 | kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); |
2988 | 2986 | ||
2989 | if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR && | 2987 | if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR && |
2990 | kvm_vcpu_has_lapic(vcpu)) | 2988 | lapic_in_kernel(vcpu)) |
2991 | vcpu->arch.apic->sipi_vector = events->sipi_vector; | 2989 | vcpu->arch.apic->sipi_vector = events->sipi_vector; |
2992 | 2990 | ||
2993 | if (events->flags & KVM_VCPUEVENT_VALID_SMM) { | 2991 | if (events->flags & KVM_VCPUEVENT_VALID_SMM) { |
@@ -3000,7 +2998,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
3000 | vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; | 2998 | vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; |
3001 | else | 2999 | else |
3002 | vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK; | 3000 | vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK; |
3003 | if (kvm_vcpu_has_lapic(vcpu)) { | 3001 | if (lapic_in_kernel(vcpu)) { |
3004 | if (events->smi.latched_init) | 3002 | if (events->smi.latched_init) |
3005 | set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); | 3003 | set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); |
3006 | else | 3004 | else |
@@ -3240,7 +3238,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
3240 | switch (ioctl) { | 3238 | switch (ioctl) { |
3241 | case KVM_GET_LAPIC: { | 3239 | case KVM_GET_LAPIC: { |
3242 | r = -EINVAL; | 3240 | r = -EINVAL; |
3243 | if (!vcpu->arch.apic) | 3241 | if (!lapic_in_kernel(vcpu)) |
3244 | goto out; | 3242 | goto out; |
3245 | u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); | 3243 | u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); |
3246 | 3244 | ||
@@ -3258,7 +3256,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
3258 | } | 3256 | } |
3259 | case KVM_SET_LAPIC: { | 3257 | case KVM_SET_LAPIC: { |
3260 | r = -EINVAL; | 3258 | r = -EINVAL; |
3261 | if (!vcpu->arch.apic) | 3259 | if (!lapic_in_kernel(vcpu)) |
3262 | goto out; | 3260 | goto out; |
3263 | u.lapic = memdup_user(argp, sizeof(*u.lapic)); | 3261 | u.lapic = memdup_user(argp, sizeof(*u.lapic)); |
3264 | if (IS_ERR(u.lapic)) | 3262 | if (IS_ERR(u.lapic)) |
@@ -3605,20 +3603,26 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | |||
3605 | 3603 | ||
3606 | static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps) | 3604 | static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps) |
3607 | { | 3605 | { |
3608 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | 3606 | struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state; |
3609 | memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state)); | 3607 | |
3610 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | 3608 | BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels)); |
3609 | |||
3610 | mutex_lock(&kps->lock); | ||
3611 | memcpy(ps, &kps->channels, sizeof(*ps)); | ||
3612 | mutex_unlock(&kps->lock); | ||
3611 | return 0; | 3613 | return 0; |
3612 | } | 3614 | } |
3613 | 3615 | ||
3614 | static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) | 3616 | static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) |
3615 | { | 3617 | { |
3616 | int i; | 3618 | int i; |
3617 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | 3619 | struct kvm_pit *pit = kvm->arch.vpit; |
3618 | memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state)); | 3620 | |
3621 | mutex_lock(&pit->pit_state.lock); | ||
3622 | memcpy(&pit->pit_state.channels, ps, sizeof(*ps)); | ||
3619 | for (i = 0; i < 3; i++) | 3623 | for (i = 0; i < 3; i++) |
3620 | kvm_pit_load_count(kvm, i, ps->channels[i].count, 0); | 3624 | kvm_pit_load_count(pit, i, ps->channels[i].count, 0); |
3621 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | 3625 | mutex_unlock(&pit->pit_state.lock); |
3622 | return 0; | 3626 | return 0; |
3623 | } | 3627 | } |
3624 | 3628 | ||
@@ -3638,29 +3642,39 @@ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) | |||
3638 | int start = 0; | 3642 | int start = 0; |
3639 | int i; | 3643 | int i; |
3640 | u32 prev_legacy, cur_legacy; | 3644 | u32 prev_legacy, cur_legacy; |
3641 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | 3645 | struct kvm_pit *pit = kvm->arch.vpit; |
3642 | prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY; | 3646 | |
3647 | mutex_lock(&pit->pit_state.lock); | ||
3648 | prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY; | ||
3643 | cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY; | 3649 | cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY; |
3644 | if (!prev_legacy && cur_legacy) | 3650 | if (!prev_legacy && cur_legacy) |
3645 | start = 1; | 3651 | start = 1; |
3646 | memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels, | 3652 | memcpy(&pit->pit_state.channels, &ps->channels, |
3647 | sizeof(kvm->arch.vpit->pit_state.channels)); | 3653 | sizeof(pit->pit_state.channels)); |
3648 | kvm->arch.vpit->pit_state.flags = ps->flags; | 3654 | pit->pit_state.flags = ps->flags; |
3649 | for (i = 0; i < 3; i++) | 3655 | for (i = 0; i < 3; i++) |
3650 | kvm_pit_load_count(kvm, i, kvm->arch.vpit->pit_state.channels[i].count, | 3656 | kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count, |
3651 | start && i == 0); | 3657 | start && i == 0); |
3652 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | 3658 | mutex_unlock(&pit->pit_state.lock); |
3653 | return 0; | 3659 | return 0; |
3654 | } | 3660 | } |
3655 | 3661 | ||
3656 | static int kvm_vm_ioctl_reinject(struct kvm *kvm, | 3662 | static int kvm_vm_ioctl_reinject(struct kvm *kvm, |
3657 | struct kvm_reinject_control *control) | 3663 | struct kvm_reinject_control *control) |
3658 | { | 3664 | { |
3659 | if (!kvm->arch.vpit) | 3665 | struct kvm_pit *pit = kvm->arch.vpit; |
3666 | |||
3667 | if (!pit) | ||
3660 | return -ENXIO; | 3668 | return -ENXIO; |
3661 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | 3669 | |
3662 | kvm->arch.vpit->pit_state.reinject = control->pit_reinject; | 3670 | /* pit->pit_state.lock was overloaded to prevent userspace from getting |
3663 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | 3671 | * an inconsistent state after running multiple KVM_REINJECT_CONTROL |
3672 | * ioctls in parallel. Use a separate lock if that ioctl isn't rare. | ||
3673 | */ | ||
3674 | mutex_lock(&pit->pit_state.lock); | ||
3675 | kvm_pit_set_reinject(pit, control->pit_reinject); | ||
3676 | mutex_unlock(&pit->pit_state.lock); | ||
3677 | |||
3664 | return 0; | 3678 | return 0; |
3665 | } | 3679 | } |
3666 | 3680 | ||
@@ -4093,7 +4107,7 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, | |||
4093 | 4107 | ||
4094 | do { | 4108 | do { |
4095 | n = min(len, 8); | 4109 | n = min(len, 8); |
4096 | if (!(vcpu->arch.apic && | 4110 | if (!(lapic_in_kernel(vcpu) && |
4097 | !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v)) | 4111 | !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v)) |
4098 | && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v)) | 4112 | && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v)) |
4099 | break; | 4113 | break; |
@@ -4113,7 +4127,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) | |||
4113 | 4127 | ||
4114 | do { | 4128 | do { |
4115 | n = min(len, 8); | 4129 | n = min(len, 8); |
4116 | if (!(vcpu->arch.apic && | 4130 | if (!(lapic_in_kernel(vcpu) && |
4117 | !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev, | 4131 | !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev, |
4118 | addr, n, v)) | 4132 | addr, n, v)) |
4119 | && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) | 4133 | && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) |
@@ -4346,7 +4360,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
4346 | ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes); | 4360 | ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes); |
4347 | if (ret < 0) | 4361 | if (ret < 0) |
4348 | return 0; | 4362 | return 0; |
4349 | kvm_mmu_pte_write(vcpu, gpa, val, bytes); | 4363 | kvm_page_track_write(vcpu, gpa, val, bytes); |
4350 | return 1; | 4364 | return 1; |
4351 | } | 4365 | } |
4352 | 4366 | ||
@@ -4604,7 +4618,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, | |||
4604 | return X86EMUL_CMPXCHG_FAILED; | 4618 | return X86EMUL_CMPXCHG_FAILED; |
4605 | 4619 | ||
4606 | kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); | 4620 | kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); |
4607 | kvm_mmu_pte_write(vcpu, gpa, new, bytes); | 4621 | kvm_page_track_write(vcpu, gpa, new, bytes); |
4608 | 4622 | ||
4609 | return X86EMUL_CONTINUE; | 4623 | return X86EMUL_CONTINUE; |
4610 | 4624 | ||
@@ -6010,7 +6024,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) | |||
6010 | if (!kvm_x86_ops->update_cr8_intercept) | 6024 | if (!kvm_x86_ops->update_cr8_intercept) |
6011 | return; | 6025 | return; |
6012 | 6026 | ||
6013 | if (!vcpu->arch.apic) | 6027 | if (!lapic_in_kernel(vcpu)) |
6014 | return; | 6028 | return; |
6015 | 6029 | ||
6016 | if (vcpu->arch.apicv_active) | 6030 | if (vcpu->arch.apicv_active) |
@@ -7038,7 +7052,7 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | |||
7038 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | 7052 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, |
7039 | struct kvm_mp_state *mp_state) | 7053 | struct kvm_mp_state *mp_state) |
7040 | { | 7054 | { |
7041 | if (!kvm_vcpu_has_lapic(vcpu) && | 7055 | if (!lapic_in_kernel(vcpu) && |
7042 | mp_state->mp_state != KVM_MP_STATE_RUNNABLE) | 7056 | mp_state->mp_state != KVM_MP_STATE_RUNNABLE) |
7043 | return -EINVAL; | 7057 | return -EINVAL; |
7044 | 7058 | ||
@@ -7314,7 +7328,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | |||
7314 | * Every 255 times fpu_counter rolls over to 0; a guest that uses | 7328 | * Every 255 times fpu_counter rolls over to 0; a guest that uses |
7315 | * the FPU in bursts will revert to loading it on demand. | 7329 | * the FPU in bursts will revert to loading it on demand. |
7316 | */ | 7330 | */ |
7317 | if (!vcpu->arch.eager_fpu) { | 7331 | if (!use_eager_fpu()) { |
7318 | if (++vcpu->fpu_counter < 5) | 7332 | if (++vcpu->fpu_counter < 5) |
7319 | kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); | 7333 | kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); |
7320 | } | 7334 | } |
@@ -7593,6 +7607,7 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) | |||
7593 | } | 7607 | } |
7594 | 7608 | ||
7595 | struct static_key kvm_no_apic_vcpu __read_mostly; | 7609 | struct static_key kvm_no_apic_vcpu __read_mostly; |
7610 | EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu); | ||
7596 | 7611 | ||
7597 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | 7612 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) |
7598 | { | 7613 | { |
@@ -7724,6 +7739,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
7724 | INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); | 7739 | INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); |
7725 | INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn); | 7740 | INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn); |
7726 | 7741 | ||
7742 | kvm_page_track_init(kvm); | ||
7743 | kvm_mmu_init_vm(kvm); | ||
7744 | |||
7727 | return 0; | 7745 | return 0; |
7728 | } | 7746 | } |
7729 | 7747 | ||
@@ -7850,6 +7868,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
7850 | kfree(kvm->arch.vioapic); | 7868 | kfree(kvm->arch.vioapic); |
7851 | kvm_free_vcpus(kvm); | 7869 | kvm_free_vcpus(kvm); |
7852 | kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); | 7870 | kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); |
7871 | kvm_mmu_uninit_vm(kvm); | ||
7853 | } | 7872 | } |
7854 | 7873 | ||
7855 | void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, | 7874 | void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, |
@@ -7871,6 +7890,8 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, | |||
7871 | free->arch.lpage_info[i - 1] = NULL; | 7890 | free->arch.lpage_info[i - 1] = NULL; |
7872 | } | 7891 | } |
7873 | } | 7892 | } |
7893 | |||
7894 | kvm_page_track_free_memslot(free, dont); | ||
7874 | } | 7895 | } |
7875 | 7896 | ||
7876 | int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | 7897 | int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, |
@@ -7879,6 +7900,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | |||
7879 | int i; | 7900 | int i; |
7880 | 7901 | ||
7881 | for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { | 7902 | for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { |
7903 | struct kvm_lpage_info *linfo; | ||
7882 | unsigned long ugfn; | 7904 | unsigned long ugfn; |
7883 | int lpages; | 7905 | int lpages; |
7884 | int level = i + 1; | 7906 | int level = i + 1; |
@@ -7893,15 +7915,16 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | |||
7893 | if (i == 0) | 7915 | if (i == 0) |
7894 | continue; | 7916 | continue; |
7895 | 7917 | ||
7896 | slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages * | 7918 | linfo = kvm_kvzalloc(lpages * sizeof(*linfo)); |
7897 | sizeof(*slot->arch.lpage_info[i - 1])); | 7919 | if (!linfo) |
7898 | if (!slot->arch.lpage_info[i - 1]) | ||
7899 | goto out_free; | 7920 | goto out_free; |
7900 | 7921 | ||
7922 | slot->arch.lpage_info[i - 1] = linfo; | ||
7923 | |||
7901 | if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) | 7924 | if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) |
7902 | slot->arch.lpage_info[i - 1][0].write_count = 1; | 7925 | linfo[0].disallow_lpage = 1; |
7903 | if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) | 7926 | if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) |
7904 | slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1; | 7927 | linfo[lpages - 1].disallow_lpage = 1; |
7905 | ugfn = slot->userspace_addr >> PAGE_SHIFT; | 7928 | ugfn = slot->userspace_addr >> PAGE_SHIFT; |
7906 | /* | 7929 | /* |
7907 | * If the gfn and userspace address are not aligned wrt each | 7930 | * If the gfn and userspace address are not aligned wrt each |
@@ -7913,10 +7936,13 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | |||
7913 | unsigned long j; | 7936 | unsigned long j; |
7914 | 7937 | ||
7915 | for (j = 0; j < lpages; ++j) | 7938 | for (j = 0; j < lpages; ++j) |
7916 | slot->arch.lpage_info[i - 1][j].write_count = 1; | 7939 | linfo[j].disallow_lpage = 1; |
7917 | } | 7940 | } |
7918 | } | 7941 | } |
7919 | 7942 | ||
7943 | if (kvm_page_track_create_memslot(slot, npages)) | ||
7944 | goto out_free; | ||
7945 | |||
7920 | return 0; | 7946 | return 0; |
7921 | 7947 | ||
7922 | out_free: | 7948 | out_free: |
@@ -8370,6 +8396,12 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, | |||
8370 | return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set); | 8396 | return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set); |
8371 | } | 8397 | } |
8372 | 8398 | ||
8399 | bool kvm_vector_hashing_enabled(void) | ||
8400 | { | ||
8401 | return vector_hashing; | ||
8402 | } | ||
8403 | EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled); | ||
8404 | |||
8373 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); | 8405 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); |
8374 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); | 8406 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); |
8375 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); | 8407 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index f2afa5fe48a6..007940faa5c6 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -179,6 +179,7 @@ int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data); | |||
179 | int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); | 179 | int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); |
180 | bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, | 180 | bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, |
181 | int page_num); | 181 | int page_num); |
182 | bool kvm_vector_hashing_enabled(void); | ||
182 | 183 | ||
183 | #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ | 184 | #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ |
184 | | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \ | 185 | | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \ |
@@ -192,4 +193,19 @@ extern unsigned int min_timer_period_us; | |||
192 | extern unsigned int lapic_timer_advance_ns; | 193 | extern unsigned int lapic_timer_advance_ns; |
193 | 194 | ||
194 | extern struct static_key kvm_no_apic_vcpu; | 195 | extern struct static_key kvm_no_apic_vcpu; |
196 | |||
197 | /* Same "calling convention" as do_div: | ||
198 | * - divide (n << 32) by base | ||
199 | * - put result in n | ||
200 | * - return remainder | ||
201 | */ | ||
202 | #define do_shl32_div32(n, base) \ | ||
203 | ({ \ | ||
204 | u32 __quot, __rem; \ | ||
205 | asm("divl %2" : "=a" (__quot), "=d" (__rem) \ | ||
206 | : "rm" (base), "0" (0), "1" ((u32) n)); \ | ||
207 | n = __quot; \ | ||
208 | __rem; \ | ||
209 | }) | ||
210 | |||
195 | #endif | 211 | #endif |
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index f0dd9d42bc7b..5152b3898155 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c | |||
@@ -75,7 +75,7 @@ static int arch_timer_ppi[MAX_TIMER_PPI]; | |||
75 | 75 | ||
76 | static struct clock_event_device __percpu *arch_timer_evt; | 76 | static struct clock_event_device __percpu *arch_timer_evt; |
77 | 77 | ||
78 | static bool arch_timer_use_virtual = true; | 78 | static enum ppi_nr arch_timer_uses_ppi = VIRT_PPI; |
79 | static bool arch_timer_c3stop; | 79 | static bool arch_timer_c3stop; |
80 | static bool arch_timer_mem_use_virtual; | 80 | static bool arch_timer_mem_use_virtual; |
81 | 81 | ||
@@ -271,16 +271,22 @@ static void __arch_timer_setup(unsigned type, | |||
271 | clk->name = "arch_sys_timer"; | 271 | clk->name = "arch_sys_timer"; |
272 | clk->rating = 450; | 272 | clk->rating = 450; |
273 | clk->cpumask = cpumask_of(smp_processor_id()); | 273 | clk->cpumask = cpumask_of(smp_processor_id()); |
274 | if (arch_timer_use_virtual) { | 274 | clk->irq = arch_timer_ppi[arch_timer_uses_ppi]; |
275 | clk->irq = arch_timer_ppi[VIRT_PPI]; | 275 | switch (arch_timer_uses_ppi) { |
276 | case VIRT_PPI: | ||
276 | clk->set_state_shutdown = arch_timer_shutdown_virt; | 277 | clk->set_state_shutdown = arch_timer_shutdown_virt; |
277 | clk->set_state_oneshot_stopped = arch_timer_shutdown_virt; | 278 | clk->set_state_oneshot_stopped = arch_timer_shutdown_virt; |
278 | clk->set_next_event = arch_timer_set_next_event_virt; | 279 | clk->set_next_event = arch_timer_set_next_event_virt; |
279 | } else { | 280 | break; |
280 | clk->irq = arch_timer_ppi[PHYS_SECURE_PPI]; | 281 | case PHYS_SECURE_PPI: |
282 | case PHYS_NONSECURE_PPI: | ||
283 | case HYP_PPI: | ||
281 | clk->set_state_shutdown = arch_timer_shutdown_phys; | 284 | clk->set_state_shutdown = arch_timer_shutdown_phys; |
282 | clk->set_state_oneshot_stopped = arch_timer_shutdown_phys; | 285 | clk->set_state_oneshot_stopped = arch_timer_shutdown_phys; |
283 | clk->set_next_event = arch_timer_set_next_event_phys; | 286 | clk->set_next_event = arch_timer_set_next_event_phys; |
287 | break; | ||
288 | default: | ||
289 | BUG(); | ||
284 | } | 290 | } |
285 | } else { | 291 | } else { |
286 | clk->features |= CLOCK_EVT_FEAT_DYNIRQ; | 292 | clk->features |= CLOCK_EVT_FEAT_DYNIRQ; |
@@ -350,17 +356,20 @@ static void arch_counter_set_user_access(void) | |||
350 | arch_timer_set_cntkctl(cntkctl); | 356 | arch_timer_set_cntkctl(cntkctl); |
351 | } | 357 | } |
352 | 358 | ||
359 | static bool arch_timer_has_nonsecure_ppi(void) | ||
360 | { | ||
361 | return (arch_timer_uses_ppi == PHYS_SECURE_PPI && | ||
362 | arch_timer_ppi[PHYS_NONSECURE_PPI]); | ||
363 | } | ||
364 | |||
353 | static int arch_timer_setup(struct clock_event_device *clk) | 365 | static int arch_timer_setup(struct clock_event_device *clk) |
354 | { | 366 | { |
355 | __arch_timer_setup(ARCH_CP15_TIMER, clk); | 367 | __arch_timer_setup(ARCH_CP15_TIMER, clk); |
356 | 368 | ||
357 | if (arch_timer_use_virtual) | 369 | enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], 0); |
358 | enable_percpu_irq(arch_timer_ppi[VIRT_PPI], 0); | 370 | |
359 | else { | 371 | if (arch_timer_has_nonsecure_ppi()) |
360 | enable_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI], 0); | 372 | enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], 0); |
361 | if (arch_timer_ppi[PHYS_NONSECURE_PPI]) | ||
362 | enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], 0); | ||
363 | } | ||
364 | 373 | ||
365 | arch_counter_set_user_access(); | 374 | arch_counter_set_user_access(); |
366 | if (IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM)) | 375 | if (IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM)) |
@@ -402,7 +411,7 @@ static void arch_timer_banner(unsigned type) | |||
402 | (unsigned long)arch_timer_rate / 1000000, | 411 | (unsigned long)arch_timer_rate / 1000000, |
403 | (unsigned long)(arch_timer_rate / 10000) % 100, | 412 | (unsigned long)(arch_timer_rate / 10000) % 100, |
404 | type & ARCH_CP15_TIMER ? | 413 | type & ARCH_CP15_TIMER ? |
405 | arch_timer_use_virtual ? "virt" : "phys" : | 414 | (arch_timer_uses_ppi == VIRT_PPI) ? "virt" : "phys" : |
406 | "", | 415 | "", |
407 | type == (ARCH_CP15_TIMER | ARCH_MEM_TIMER) ? "/" : "", | 416 | type == (ARCH_CP15_TIMER | ARCH_MEM_TIMER) ? "/" : "", |
408 | type & ARCH_MEM_TIMER ? | 417 | type & ARCH_MEM_TIMER ? |
@@ -472,7 +481,7 @@ static void __init arch_counter_register(unsigned type) | |||
472 | 481 | ||
473 | /* Register the CP15 based counter if we have one */ | 482 | /* Register the CP15 based counter if we have one */ |
474 | if (type & ARCH_CP15_TIMER) { | 483 | if (type & ARCH_CP15_TIMER) { |
475 | if (IS_ENABLED(CONFIG_ARM64) || arch_timer_use_virtual) | 484 | if (IS_ENABLED(CONFIG_ARM64) || arch_timer_uses_ppi == VIRT_PPI) |
476 | arch_timer_read_counter = arch_counter_get_cntvct; | 485 | arch_timer_read_counter = arch_counter_get_cntvct; |
477 | else | 486 | else |
478 | arch_timer_read_counter = arch_counter_get_cntpct; | 487 | arch_timer_read_counter = arch_counter_get_cntpct; |
@@ -502,13 +511,9 @@ static void arch_timer_stop(struct clock_event_device *clk) | |||
502 | pr_debug("arch_timer_teardown disable IRQ%d cpu #%d\n", | 511 | pr_debug("arch_timer_teardown disable IRQ%d cpu #%d\n", |
503 | clk->irq, smp_processor_id()); | 512 | clk->irq, smp_processor_id()); |
504 | 513 | ||
505 | if (arch_timer_use_virtual) | 514 | disable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi]); |
506 | disable_percpu_irq(arch_timer_ppi[VIRT_PPI]); | 515 | if (arch_timer_has_nonsecure_ppi()) |
507 | else { | 516 | disable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI]); |
508 | disable_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI]); | ||
509 | if (arch_timer_ppi[PHYS_NONSECURE_PPI]) | ||
510 | disable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI]); | ||
511 | } | ||
512 | 517 | ||
513 | clk->set_state_shutdown(clk); | 518 | clk->set_state_shutdown(clk); |
514 | } | 519 | } |
@@ -574,12 +579,14 @@ static int __init arch_timer_register(void) | |||
574 | goto out; | 579 | goto out; |
575 | } | 580 | } |
576 | 581 | ||
577 | if (arch_timer_use_virtual) { | 582 | ppi = arch_timer_ppi[arch_timer_uses_ppi]; |
578 | ppi = arch_timer_ppi[VIRT_PPI]; | 583 | switch (arch_timer_uses_ppi) { |
584 | case VIRT_PPI: | ||
579 | err = request_percpu_irq(ppi, arch_timer_handler_virt, | 585 | err = request_percpu_irq(ppi, arch_timer_handler_virt, |
580 | "arch_timer", arch_timer_evt); | 586 | "arch_timer", arch_timer_evt); |
581 | } else { | 587 | break; |
582 | ppi = arch_timer_ppi[PHYS_SECURE_PPI]; | 588 | case PHYS_SECURE_PPI: |
589 | case PHYS_NONSECURE_PPI: | ||
583 | err = request_percpu_irq(ppi, arch_timer_handler_phys, | 590 | err = request_percpu_irq(ppi, arch_timer_handler_phys, |
584 | "arch_timer", arch_timer_evt); | 591 | "arch_timer", arch_timer_evt); |
585 | if (!err && arch_timer_ppi[PHYS_NONSECURE_PPI]) { | 592 | if (!err && arch_timer_ppi[PHYS_NONSECURE_PPI]) { |
@@ -590,6 +597,13 @@ static int __init arch_timer_register(void) | |||
590 | free_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI], | 597 | free_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI], |
591 | arch_timer_evt); | 598 | arch_timer_evt); |
592 | } | 599 | } |
600 | break; | ||
601 | case HYP_PPI: | ||
602 | err = request_percpu_irq(ppi, arch_timer_handler_phys, | ||
603 | "arch_timer", arch_timer_evt); | ||
604 | break; | ||
605 | default: | ||
606 | BUG(); | ||
593 | } | 607 | } |
594 | 608 | ||
595 | if (err) { | 609 | if (err) { |
@@ -614,15 +628,10 @@ static int __init arch_timer_register(void) | |||
614 | out_unreg_notify: | 628 | out_unreg_notify: |
615 | unregister_cpu_notifier(&arch_timer_cpu_nb); | 629 | unregister_cpu_notifier(&arch_timer_cpu_nb); |
616 | out_free_irq: | 630 | out_free_irq: |
617 | if (arch_timer_use_virtual) | 631 | free_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], arch_timer_evt); |
618 | free_percpu_irq(arch_timer_ppi[VIRT_PPI], arch_timer_evt); | 632 | if (arch_timer_has_nonsecure_ppi()) |
619 | else { | 633 | free_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], |
620 | free_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI], | ||
621 | arch_timer_evt); | 634 | arch_timer_evt); |
622 | if (arch_timer_ppi[PHYS_NONSECURE_PPI]) | ||
623 | free_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], | ||
624 | arch_timer_evt); | ||
625 | } | ||
626 | 635 | ||
627 | out_free: | 636 | out_free: |
628 | free_percpu(arch_timer_evt); | 637 | free_percpu(arch_timer_evt); |
@@ -709,12 +718,25 @@ static void __init arch_timer_init(void) | |||
709 | * | 718 | * |
710 | * If no interrupt provided for virtual timer, we'll have to | 719 | * If no interrupt provided for virtual timer, we'll have to |
711 | * stick to the physical timer. It'd better be accessible... | 720 | * stick to the physical timer. It'd better be accessible... |
721 | * | ||
722 | * On ARMv8.1 with VH extensions, the kernel runs in HYP. VHE | ||
723 | * accesses to CNTP_*_EL1 registers are silently redirected to | ||
724 | * their CNTHP_*_EL2 counterparts, and use a different PPI | ||
725 | * number. | ||
712 | */ | 726 | */ |
713 | if (is_hyp_mode_available() || !arch_timer_ppi[VIRT_PPI]) { | 727 | if (is_hyp_mode_available() || !arch_timer_ppi[VIRT_PPI]) { |
714 | arch_timer_use_virtual = false; | 728 | bool has_ppi; |
729 | |||
730 | if (is_kernel_in_hyp_mode()) { | ||
731 | arch_timer_uses_ppi = HYP_PPI; | ||
732 | has_ppi = !!arch_timer_ppi[HYP_PPI]; | ||
733 | } else { | ||
734 | arch_timer_uses_ppi = PHYS_SECURE_PPI; | ||
735 | has_ppi = (!!arch_timer_ppi[PHYS_SECURE_PPI] || | ||
736 | !!arch_timer_ppi[PHYS_NONSECURE_PPI]); | ||
737 | } | ||
715 | 738 | ||
716 | if (!arch_timer_ppi[PHYS_SECURE_PPI] || | 739 | if (!has_ppi) { |
717 | !arch_timer_ppi[PHYS_NONSECURE_PPI]) { | ||
718 | pr_warn("arch_timer: No interrupt available, giving up\n"); | 740 | pr_warn("arch_timer: No interrupt available, giving up\n"); |
719 | return; | 741 | return; |
720 | } | 742 | } |
@@ -747,7 +769,7 @@ static void __init arch_timer_of_init(struct device_node *np) | |||
747 | */ | 769 | */ |
748 | if (IS_ENABLED(CONFIG_ARM) && | 770 | if (IS_ENABLED(CONFIG_ARM) && |
749 | of_property_read_bool(np, "arm,cpu-registers-not-fw-configured")) | 771 | of_property_read_bool(np, "arm,cpu-registers-not-fw-configured")) |
750 | arch_timer_use_virtual = false; | 772 | arch_timer_uses_ppi = PHYS_SECURE_PPI; |
751 | 773 | ||
752 | arch_timer_init(); | 774 | arch_timer_init(); |
753 | } | 775 | } |
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 4ebc796b4f33..2f8c0f40930b 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h | |||
@@ -256,12 +256,6 @@ struct hv_monitor_page { | |||
256 | u8 rsvdz4[1984]; | 256 | u8 rsvdz4[1984]; |
257 | }; | 257 | }; |
258 | 258 | ||
259 | /* Declare the various hypercall operations. */ | ||
260 | enum hv_call_code { | ||
261 | HVCALL_POST_MESSAGE = 0x005c, | ||
262 | HVCALL_SIGNAL_EVENT = 0x005d, | ||
263 | }; | ||
264 | |||
265 | /* Definition of the hv_post_message hypercall input structure. */ | 259 | /* Definition of the hv_post_message hypercall input structure. */ |
266 | struct hv_input_post_message { | 260 | struct hv_input_post_message { |
267 | union hv_connection_id connectionid; | 261 | union hv_connection_id connectionid; |
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 1800227af9d6..b651aed9dc6b 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h | |||
@@ -55,6 +55,9 @@ struct arch_timer_cpu { | |||
55 | 55 | ||
56 | /* VGIC mapping */ | 56 | /* VGIC mapping */ |
57 | struct irq_phys_map *map; | 57 | struct irq_phys_map *map; |
58 | |||
59 | /* Active IRQ state caching */ | ||
60 | bool active_cleared_last; | ||
58 | }; | 61 | }; |
59 | 62 | ||
60 | int kvm_timer_hyp_init(void); | 63 | int kvm_timer_hyp_init(void); |
@@ -74,4 +77,6 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu); | |||
74 | void kvm_timer_schedule(struct kvm_vcpu *vcpu); | 77 | void kvm_timer_schedule(struct kvm_vcpu *vcpu); |
75 | void kvm_timer_unschedule(struct kvm_vcpu *vcpu); | 78 | void kvm_timer_unschedule(struct kvm_vcpu *vcpu); |
76 | 79 | ||
80 | void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu); | ||
81 | |||
77 | #endif | 82 | #endif |
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h new file mode 100644 index 000000000000..fe389ac31489 --- /dev/null +++ b/include/kvm/arm_pmu.h | |||
@@ -0,0 +1,110 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015 Linaro Ltd. | ||
3 | * Author: Shannon Zhao <shannon.zhao@linaro.org> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
16 | */ | ||
17 | |||
18 | #ifndef __ASM_ARM_KVM_PMU_H | ||
19 | #define __ASM_ARM_KVM_PMU_H | ||
20 | |||
21 | #ifdef CONFIG_KVM_ARM_PMU | ||
22 | |||
23 | #include <linux/perf_event.h> | ||
24 | #include <asm/perf_event.h> | ||
25 | |||
26 | #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) | ||
27 | |||
28 | struct kvm_pmc { | ||
29 | u8 idx; /* index into the pmu->pmc array */ | ||
30 | struct perf_event *perf_event; | ||
31 | u64 bitmask; | ||
32 | }; | ||
33 | |||
34 | struct kvm_pmu { | ||
35 | int irq_num; | ||
36 | struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS]; | ||
37 | bool ready; | ||
38 | bool irq_level; | ||
39 | }; | ||
40 | |||
41 | #define kvm_arm_pmu_v3_ready(v) ((v)->arch.pmu.ready) | ||
42 | #define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num >= VGIC_NR_SGIS) | ||
43 | u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx); | ||
44 | void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val); | ||
45 | u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu); | ||
46 | void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu); | ||
47 | void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu); | ||
48 | void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val); | ||
49 | void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val); | ||
50 | void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val); | ||
51 | void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu); | ||
52 | void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu); | ||
53 | void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val); | ||
54 | void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val); | ||
55 | void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, | ||
56 | u64 select_idx); | ||
57 | bool kvm_arm_support_pmu_v3(void); | ||
58 | int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, | ||
59 | struct kvm_device_attr *attr); | ||
60 | int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, | ||
61 | struct kvm_device_attr *attr); | ||
62 | int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, | ||
63 | struct kvm_device_attr *attr); | ||
64 | #else | ||
65 | struct kvm_pmu { | ||
66 | }; | ||
67 | |||
68 | #define kvm_arm_pmu_v3_ready(v) (false) | ||
69 | #define kvm_arm_pmu_irq_initialized(v) (false) | ||
70 | static inline u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, | ||
71 | u64 select_idx) | ||
72 | { | ||
73 | return 0; | ||
74 | } | ||
75 | static inline void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, | ||
76 | u64 select_idx, u64 val) {} | ||
77 | static inline u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) | ||
78 | { | ||
79 | return 0; | ||
80 | } | ||
81 | static inline void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {} | ||
82 | static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {} | ||
83 | static inline void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val) {} | ||
84 | static inline void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) {} | ||
85 | static inline void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) {} | ||
86 | static inline void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) {} | ||
87 | static inline void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) {} | ||
88 | static inline void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) {} | ||
89 | static inline void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) {} | ||
90 | static inline void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, | ||
91 | u64 data, u64 select_idx) {} | ||
92 | static inline bool kvm_arm_support_pmu_v3(void) { return false; } | ||
93 | static inline int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, | ||
94 | struct kvm_device_attr *attr) | ||
95 | { | ||
96 | return -ENXIO; | ||
97 | } | ||
98 | static inline int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, | ||
99 | struct kvm_device_attr *attr) | ||
100 | { | ||
101 | return -ENXIO; | ||
102 | } | ||
103 | static inline int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, | ||
104 | struct kvm_device_attr *attr) | ||
105 | { | ||
106 | return -ENXIO; | ||
107 | } | ||
108 | #endif | ||
109 | |||
110 | #endif | ||
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 13a3d537811b..281caf847fad 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h | |||
@@ -279,12 +279,6 @@ struct vgic_v2_cpu_if { | |||
279 | u32 vgic_lr[VGIC_V2_MAX_LRS]; | 279 | u32 vgic_lr[VGIC_V2_MAX_LRS]; |
280 | }; | 280 | }; |
281 | 281 | ||
282 | /* | ||
283 | * LRs are stored in reverse order in memory. make sure we index them | ||
284 | * correctly. | ||
285 | */ | ||
286 | #define VGIC_V3_LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr) | ||
287 | |||
288 | struct vgic_v3_cpu_if { | 282 | struct vgic_v3_cpu_if { |
289 | #ifdef CONFIG_KVM_ARM_VGIC_V3 | 283 | #ifdef CONFIG_KVM_ARM_VGIC_V3 |
290 | u32 vgic_hcr; | 284 | u32 vgic_hcr; |
@@ -321,6 +315,8 @@ struct vgic_cpu { | |||
321 | 315 | ||
322 | /* Protected by the distributor's irq_phys_map_lock */ | 316 | /* Protected by the distributor's irq_phys_map_lock */ |
323 | struct list_head irq_phys_map_list; | 317 | struct list_head irq_phys_map_list; |
318 | |||
319 | u64 live_lrs; | ||
324 | }; | 320 | }; |
325 | 321 | ||
326 | #define LR_EMPTY 0xff | 322 | #define LR_EMPTY 0xff |
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index d6f83222a6a1..aa69253ecc7d 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h | |||
@@ -359,14 +359,15 @@ TRACE_EVENT( | |||
359 | #endif | 359 | #endif |
360 | 360 | ||
361 | TRACE_EVENT(kvm_halt_poll_ns, | 361 | TRACE_EVENT(kvm_halt_poll_ns, |
362 | TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old), | 362 | TP_PROTO(bool grow, unsigned int vcpu_id, unsigned int new, |
363 | unsigned int old), | ||
363 | TP_ARGS(grow, vcpu_id, new, old), | 364 | TP_ARGS(grow, vcpu_id, new, old), |
364 | 365 | ||
365 | TP_STRUCT__entry( | 366 | TP_STRUCT__entry( |
366 | __field(bool, grow) | 367 | __field(bool, grow) |
367 | __field(unsigned int, vcpu_id) | 368 | __field(unsigned int, vcpu_id) |
368 | __field(int, new) | 369 | __field(unsigned int, new) |
369 | __field(int, old) | 370 | __field(unsigned int, old) |
370 | ), | 371 | ), |
371 | 372 | ||
372 | TP_fast_assign( | 373 | TP_fast_assign( |
@@ -376,7 +377,7 @@ TRACE_EVENT(kvm_halt_poll_ns, | |||
376 | __entry->old = old; | 377 | __entry->old = old; |
377 | ), | 378 | ), |
378 | 379 | ||
379 | TP_printk("vcpu %u: halt_poll_ns %d (%s %d)", | 380 | TP_printk("vcpu %u: halt_poll_ns %u (%s %u)", |
380 | __entry->vcpu_id, | 381 | __entry->vcpu_id, |
381 | __entry->new, | 382 | __entry->new, |
382 | __entry->grow ? "grow" : "shrink", | 383 | __entry->grow ? "grow" : "shrink", |
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 9da905157cee..a7f1f8032ec1 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h | |||
@@ -157,6 +157,7 @@ struct kvm_s390_skeys { | |||
157 | 157 | ||
158 | struct kvm_hyperv_exit { | 158 | struct kvm_hyperv_exit { |
159 | #define KVM_EXIT_HYPERV_SYNIC 1 | 159 | #define KVM_EXIT_HYPERV_SYNIC 1 |
160 | #define KVM_EXIT_HYPERV_HCALL 2 | ||
160 | __u32 type; | 161 | __u32 type; |
161 | union { | 162 | union { |
162 | struct { | 163 | struct { |
@@ -165,6 +166,11 @@ struct kvm_hyperv_exit { | |||
165 | __u64 evt_page; | 166 | __u64 evt_page; |
166 | __u64 msg_page; | 167 | __u64 msg_page; |
167 | } synic; | 168 | } synic; |
169 | struct { | ||
170 | __u64 input; | ||
171 | __u64 result; | ||
172 | __u64 params[2]; | ||
173 | } hcall; | ||
168 | } u; | 174 | } u; |
169 | }; | 175 | }; |
170 | 176 | ||
@@ -541,7 +547,13 @@ struct kvm_s390_pgm_info { | |||
541 | __u8 exc_access_id; | 547 | __u8 exc_access_id; |
542 | __u8 per_access_id; | 548 | __u8 per_access_id; |
543 | __u8 op_access_id; | 549 | __u8 op_access_id; |
544 | __u8 pad[3]; | 550 | #define KVM_S390_PGM_FLAGS_ILC_VALID 0x01 |
551 | #define KVM_S390_PGM_FLAGS_ILC_0 0x02 | ||
552 | #define KVM_S390_PGM_FLAGS_ILC_1 0x04 | ||
553 | #define KVM_S390_PGM_FLAGS_ILC_MASK 0x06 | ||
554 | #define KVM_S390_PGM_FLAGS_NO_REWIND 0x08 | ||
555 | __u8 flags; | ||
556 | __u8 pad[2]; | ||
545 | }; | 557 | }; |
546 | 558 | ||
547 | struct kvm_s390_prefix_info { | 559 | struct kvm_s390_prefix_info { |
@@ -850,6 +862,9 @@ struct kvm_ppc_smmu_info { | |||
850 | #define KVM_CAP_IOEVENTFD_ANY_LENGTH 122 | 862 | #define KVM_CAP_IOEVENTFD_ANY_LENGTH 122 |
851 | #define KVM_CAP_HYPERV_SYNIC 123 | 863 | #define KVM_CAP_HYPERV_SYNIC 123 |
852 | #define KVM_CAP_S390_RI 124 | 864 | #define KVM_CAP_S390_RI 124 |
865 | #define KVM_CAP_SPAPR_TCE_64 125 | ||
866 | #define KVM_CAP_ARM_PMU_V3 126 | ||
867 | #define KVM_CAP_VCPU_ATTRIBUTES 127 | ||
853 | 868 | ||
854 | #ifdef KVM_CAP_IRQ_ROUTING | 869 | #ifdef KVM_CAP_IRQ_ROUTING |
855 | 870 | ||
@@ -1142,6 +1157,8 @@ struct kvm_s390_ucas_mapping { | |||
1142 | /* Available with KVM_CAP_PPC_ALLOC_HTAB */ | 1157 | /* Available with KVM_CAP_PPC_ALLOC_HTAB */ |
1143 | #define KVM_PPC_ALLOCATE_HTAB _IOWR(KVMIO, 0xa7, __u32) | 1158 | #define KVM_PPC_ALLOCATE_HTAB _IOWR(KVMIO, 0xa7, __u32) |
1144 | #define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) | 1159 | #define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) |
1160 | #define KVM_CREATE_SPAPR_TCE_64 _IOW(KVMIO, 0xa8, \ | ||
1161 | struct kvm_create_spapr_tce_64) | ||
1145 | /* Available with KVM_CAP_RMA */ | 1162 | /* Available with KVM_CAP_RMA */ |
1146 | #define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) | 1163 | #define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) |
1147 | /* Available with KVM_CAP_PPC_HTAB_FD */ | 1164 | /* Available with KVM_CAP_PPC_HTAB_FD */ |
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index ea6064696fe4..a9ad4fe3f68f 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c | |||
@@ -34,6 +34,11 @@ static struct timecounter *timecounter; | |||
34 | static struct workqueue_struct *wqueue; | 34 | static struct workqueue_struct *wqueue; |
35 | static unsigned int host_vtimer_irq; | 35 | static unsigned int host_vtimer_irq; |
36 | 36 | ||
37 | void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) | ||
38 | { | ||
39 | vcpu->arch.timer_cpu.active_cleared_last = false; | ||
40 | } | ||
41 | |||
37 | static cycle_t kvm_phys_timer_read(void) | 42 | static cycle_t kvm_phys_timer_read(void) |
38 | { | 43 | { |
39 | return timecounter->cc->read(timecounter->cc); | 44 | return timecounter->cc->read(timecounter->cc); |
@@ -130,6 +135,7 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) | |||
130 | 135 | ||
131 | BUG_ON(!vgic_initialized(vcpu->kvm)); | 136 | BUG_ON(!vgic_initialized(vcpu->kvm)); |
132 | 137 | ||
138 | timer->active_cleared_last = false; | ||
133 | timer->irq.level = new_level; | 139 | timer->irq.level = new_level; |
134 | trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq, | 140 | trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq, |
135 | timer->irq.level); | 141 | timer->irq.level); |
@@ -245,10 +251,35 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) | |||
245 | else | 251 | else |
246 | phys_active = false; | 252 | phys_active = false; |
247 | 253 | ||
254 | /* | ||
255 | * We want to avoid hitting the (re)distributor as much as | ||
256 | * possible, as this is a potentially expensive MMIO access | ||
257 | * (not to mention locks in the irq layer), and a solution for | ||
258 | * this is to cache the "active" state in memory. | ||
259 | * | ||
260 | * Things to consider: we cannot cache an "active set" state, | ||
261 | * because the HW can change this behind our back (it becomes | ||
262 | * "clear" in the HW). We must then restrict the caching to | ||
263 | * the "clear" state. | ||
264 | * | ||
265 | * The cache is invalidated on: | ||
266 | * - vcpu put, indicating that the HW cannot be trusted to be | ||
267 | * in a sane state on the next vcpu load, | ||
268 | * - any change in the interrupt state | ||
269 | * | ||
270 | * Usage conditions: | ||
271 | * - cached value is "active clear" | ||
272 | * - value to be programmed is "active clear" | ||
273 | */ | ||
274 | if (timer->active_cleared_last && !phys_active) | ||
275 | return; | ||
276 | |||
248 | ret = irq_set_irqchip_state(timer->map->irq, | 277 | ret = irq_set_irqchip_state(timer->map->irq, |
249 | IRQCHIP_STATE_ACTIVE, | 278 | IRQCHIP_STATE_ACTIVE, |
250 | phys_active); | 279 | phys_active); |
251 | WARN_ON(ret); | 280 | WARN_ON(ret); |
281 | |||
282 | timer->active_cleared_last = !phys_active; | ||
252 | } | 283 | } |
253 | 284 | ||
254 | /** | 285 | /** |
diff --git a/arch/arm64/kvm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c index 1051e5d7320f..ea00d69e7078 100644 --- a/arch/arm64/kvm/hyp/timer-sr.c +++ b/virt/kvm/arm/hyp/timer-sr.c | |||
@@ -19,9 +19,7 @@ | |||
19 | #include <linux/compiler.h> | 19 | #include <linux/compiler.h> |
20 | #include <linux/kvm_host.h> | 20 | #include <linux/kvm_host.h> |
21 | 21 | ||
22 | #include <asm/kvm_mmu.h> | 22 | #include <asm/kvm_hyp.h> |
23 | |||
24 | #include "hyp.h" | ||
25 | 23 | ||
26 | /* vcpu is already in the HYP VA space */ | 24 | /* vcpu is already in the HYP VA space */ |
27 | void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) | 25 | void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) |
@@ -31,12 +29,12 @@ void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) | |||
31 | u64 val; | 29 | u64 val; |
32 | 30 | ||
33 | if (kvm->arch.timer.enabled) { | 31 | if (kvm->arch.timer.enabled) { |
34 | timer->cntv_ctl = read_sysreg(cntv_ctl_el0); | 32 | timer->cntv_ctl = read_sysreg_el0(cntv_ctl); |
35 | timer->cntv_cval = read_sysreg(cntv_cval_el0); | 33 | timer->cntv_cval = read_sysreg_el0(cntv_cval); |
36 | } | 34 | } |
37 | 35 | ||
38 | /* Disable the virtual timer */ | 36 | /* Disable the virtual timer */ |
39 | write_sysreg(0, cntv_ctl_el0); | 37 | write_sysreg_el0(0, cntv_ctl); |
40 | 38 | ||
41 | /* Allow physical timer/counter access for the host */ | 39 | /* Allow physical timer/counter access for the host */ |
42 | val = read_sysreg(cnthctl_el2); | 40 | val = read_sysreg(cnthctl_el2); |
@@ -64,8 +62,8 @@ void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) | |||
64 | 62 | ||
65 | if (kvm->arch.timer.enabled) { | 63 | if (kvm->arch.timer.enabled) { |
66 | write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2); | 64 | write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2); |
67 | write_sysreg(timer->cntv_cval, cntv_cval_el0); | 65 | write_sysreg_el0(timer->cntv_cval, cntv_cval); |
68 | isb(); | 66 | isb(); |
69 | write_sysreg(timer->cntv_ctl, cntv_ctl_el0); | 67 | write_sysreg_el0(timer->cntv_ctl, cntv_ctl); |
70 | } | 68 | } |
71 | } | 69 | } |
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c new file mode 100644 index 000000000000..674bdf8ecf4f --- /dev/null +++ b/virt/kvm/arm/hyp/vgic-v2-sr.c | |||
@@ -0,0 +1,170 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2012-2015 - ARM Ltd | ||
3 | * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
16 | */ | ||
17 | |||
18 | #include <linux/compiler.h> | ||
19 | #include <linux/irqchip/arm-gic.h> | ||
20 | #include <linux/kvm_host.h> | ||
21 | |||
22 | #include <asm/kvm_hyp.h> | ||
23 | |||
24 | static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, | ||
25 | void __iomem *base) | ||
26 | { | ||
27 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; | ||
28 | int nr_lr = vcpu->arch.vgic_cpu.nr_lr; | ||
29 | u32 eisr0, eisr1; | ||
30 | int i; | ||
31 | bool expect_mi; | ||
32 | |||
33 | expect_mi = !!(cpu_if->vgic_hcr & GICH_HCR_UIE); | ||
34 | |||
35 | for (i = 0; i < nr_lr; i++) { | ||
36 | if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i))) | ||
37 | continue; | ||
38 | |||
39 | expect_mi |= (!(cpu_if->vgic_lr[i] & GICH_LR_HW) && | ||
40 | (cpu_if->vgic_lr[i] & GICH_LR_EOI)); | ||
41 | } | ||
42 | |||
43 | if (expect_mi) { | ||
44 | cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR); | ||
45 | |||
46 | if (cpu_if->vgic_misr & GICH_MISR_EOI) { | ||
47 | eisr0 = readl_relaxed(base + GICH_EISR0); | ||
48 | if (unlikely(nr_lr > 32)) | ||
49 | eisr1 = readl_relaxed(base + GICH_EISR1); | ||
50 | else | ||
51 | eisr1 = 0; | ||
52 | } else { | ||
53 | eisr0 = eisr1 = 0; | ||
54 | } | ||
55 | } else { | ||
56 | cpu_if->vgic_misr = 0; | ||
57 | eisr0 = eisr1 = 0; | ||
58 | } | ||
59 | |||
60 | #ifdef CONFIG_CPU_BIG_ENDIAN | ||
61 | cpu_if->vgic_eisr = ((u64)eisr0 << 32) | eisr1; | ||
62 | #else | ||
63 | cpu_if->vgic_eisr = ((u64)eisr1 << 32) | eisr0; | ||
64 | #endif | ||
65 | } | ||
66 | |||
67 | static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) | ||
68 | { | ||
69 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; | ||
70 | int nr_lr = vcpu->arch.vgic_cpu.nr_lr; | ||
71 | u32 elrsr0, elrsr1; | ||
72 | |||
73 | elrsr0 = readl_relaxed(base + GICH_ELRSR0); | ||
74 | if (unlikely(nr_lr > 32)) | ||
75 | elrsr1 = readl_relaxed(base + GICH_ELRSR1); | ||
76 | else | ||
77 | elrsr1 = 0; | ||
78 | |||
79 | #ifdef CONFIG_CPU_BIG_ENDIAN | ||
80 | cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1; | ||
81 | #else | ||
82 | cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0; | ||
83 | #endif | ||
84 | } | ||
85 | |||
86 | static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) | ||
87 | { | ||
88 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; | ||
89 | int nr_lr = vcpu->arch.vgic_cpu.nr_lr; | ||
90 | int i; | ||
91 | |||
92 | for (i = 0; i < nr_lr; i++) { | ||
93 | if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i))) | ||
94 | continue; | ||
95 | |||
96 | if (cpu_if->vgic_elrsr & (1UL << i)) { | ||
97 | cpu_if->vgic_lr[i] &= ~GICH_LR_STATE; | ||
98 | continue; | ||
99 | } | ||
100 | |||
101 | cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); | ||
102 | writel_relaxed(0, base + GICH_LR0 + (i * 4)); | ||
103 | } | ||
104 | } | ||
105 | |||
106 | /* vcpu is already in the HYP VA space */ | ||
107 | void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu) | ||
108 | { | ||
109 | struct kvm *kvm = kern_hyp_va(vcpu->kvm); | ||
110 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; | ||
111 | struct vgic_dist *vgic = &kvm->arch.vgic; | ||
112 | void __iomem *base = kern_hyp_va(vgic->vctrl_base); | ||
113 | |||
114 | if (!base) | ||
115 | return; | ||
116 | |||
117 | cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR); | ||
118 | |||
119 | if (vcpu->arch.vgic_cpu.live_lrs) { | ||
120 | cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); | ||
121 | |||
122 | save_maint_int_state(vcpu, base); | ||
123 | save_elrsr(vcpu, base); | ||
124 | save_lrs(vcpu, base); | ||
125 | |||
126 | writel_relaxed(0, base + GICH_HCR); | ||
127 | |||
128 | vcpu->arch.vgic_cpu.live_lrs = 0; | ||
129 | } else { | ||
130 | cpu_if->vgic_eisr = 0; | ||
131 | cpu_if->vgic_elrsr = ~0UL; | ||
132 | cpu_if->vgic_misr = 0; | ||
133 | cpu_if->vgic_apr = 0; | ||
134 | } | ||
135 | } | ||
136 | |||
137 | /* vcpu is already in the HYP VA space */ | ||
138 | void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu) | ||
139 | { | ||
140 | struct kvm *kvm = kern_hyp_va(vcpu->kvm); | ||
141 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; | ||
142 | struct vgic_dist *vgic = &kvm->arch.vgic; | ||
143 | void __iomem *base = kern_hyp_va(vgic->vctrl_base); | ||
144 | int i, nr_lr; | ||
145 | u64 live_lrs = 0; | ||
146 | |||
147 | if (!base) | ||
148 | return; | ||
149 | |||
150 | nr_lr = vcpu->arch.vgic_cpu.nr_lr; | ||
151 | |||
152 | for (i = 0; i < nr_lr; i++) | ||
153 | if (cpu_if->vgic_lr[i] & GICH_LR_STATE) | ||
154 | live_lrs |= 1UL << i; | ||
155 | |||
156 | if (live_lrs) { | ||
157 | writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); | ||
158 | writel_relaxed(cpu_if->vgic_apr, base + GICH_APR); | ||
159 | for (i = 0; i < nr_lr; i++) { | ||
160 | if (!(live_lrs & (1UL << i))) | ||
161 | continue; | ||
162 | |||
163 | writel_relaxed(cpu_if->vgic_lr[i], | ||
164 | base + GICH_LR0 + (i * 4)); | ||
165 | } | ||
166 | } | ||
167 | |||
168 | writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR); | ||
169 | vcpu->arch.vgic_cpu.live_lrs = live_lrs; | ||
170 | } | ||
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c new file mode 100644 index 000000000000..b5754c6c5508 --- /dev/null +++ b/virt/kvm/arm/pmu.c | |||
@@ -0,0 +1,529 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015 Linaro Ltd. | ||
3 | * Author: Shannon Zhao <shannon.zhao@linaro.org> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
16 | */ | ||
17 | |||
18 | #include <linux/cpu.h> | ||
19 | #include <linux/kvm.h> | ||
20 | #include <linux/kvm_host.h> | ||
21 | #include <linux/perf_event.h> | ||
22 | #include <linux/uaccess.h> | ||
23 | #include <asm/kvm_emulate.h> | ||
24 | #include <kvm/arm_pmu.h> | ||
25 | #include <kvm/arm_vgic.h> | ||
26 | |||
27 | /** | ||
28 | * kvm_pmu_get_counter_value - get PMU counter value | ||
29 | * @vcpu: The vcpu pointer | ||
30 | * @select_idx: The counter index | ||
31 | */ | ||
32 | u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) | ||
33 | { | ||
34 | u64 counter, reg, enabled, running; | ||
35 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
36 | struct kvm_pmc *pmc = &pmu->pmc[select_idx]; | ||
37 | |||
38 | reg = (select_idx == ARMV8_PMU_CYCLE_IDX) | ||
39 | ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; | ||
40 | counter = vcpu_sys_reg(vcpu, reg); | ||
41 | |||
42 | /* The real counter value is equal to the value of counter register plus | ||
43 | * the value perf event counts. | ||
44 | */ | ||
45 | if (pmc->perf_event) | ||
46 | counter += perf_event_read_value(pmc->perf_event, &enabled, | ||
47 | &running); | ||
48 | |||
49 | return counter & pmc->bitmask; | ||
50 | } | ||
51 | |||
52 | /** | ||
53 | * kvm_pmu_set_counter_value - set PMU counter value | ||
54 | * @vcpu: The vcpu pointer | ||
55 | * @select_idx: The counter index | ||
56 | * @val: The counter value | ||
57 | */ | ||
58 | void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) | ||
59 | { | ||
60 | u64 reg; | ||
61 | |||
62 | reg = (select_idx == ARMV8_PMU_CYCLE_IDX) | ||
63 | ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; | ||
64 | vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); | ||
65 | } | ||
66 | |||
67 | /** | ||
68 | * kvm_pmu_stop_counter - stop PMU counter | ||
69 | * @pmc: The PMU counter pointer | ||
70 | * | ||
71 | * If this counter has been configured to monitor some event, release it here. | ||
72 | */ | ||
73 | static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) | ||
74 | { | ||
75 | u64 counter, reg; | ||
76 | |||
77 | if (pmc->perf_event) { | ||
78 | counter = kvm_pmu_get_counter_value(vcpu, pmc->idx); | ||
79 | reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) | ||
80 | ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; | ||
81 | vcpu_sys_reg(vcpu, reg) = counter; | ||
82 | perf_event_disable(pmc->perf_event); | ||
83 | perf_event_release_kernel(pmc->perf_event); | ||
84 | pmc->perf_event = NULL; | ||
85 | } | ||
86 | } | ||
87 | |||
88 | /** | ||
89 | * kvm_pmu_vcpu_reset - reset pmu state for cpu | ||
90 | * @vcpu: The vcpu pointer | ||
91 | * | ||
92 | */ | ||
93 | void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) | ||
94 | { | ||
95 | int i; | ||
96 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
97 | |||
98 | for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { | ||
99 | kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); | ||
100 | pmu->pmc[i].idx = i; | ||
101 | pmu->pmc[i].bitmask = 0xffffffffUL; | ||
102 | } | ||
103 | } | ||
104 | |||
105 | /** | ||
106 | * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu | ||
107 | * @vcpu: The vcpu pointer | ||
108 | * | ||
109 | */ | ||
110 | void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) | ||
111 | { | ||
112 | int i; | ||
113 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
114 | |||
115 | for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { | ||
116 | struct kvm_pmc *pmc = &pmu->pmc[i]; | ||
117 | |||
118 | if (pmc->perf_event) { | ||
119 | perf_event_disable(pmc->perf_event); | ||
120 | perf_event_release_kernel(pmc->perf_event); | ||
121 | pmc->perf_event = NULL; | ||
122 | } | ||
123 | } | ||
124 | } | ||
125 | |||
126 | u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) | ||
127 | { | ||
128 | u64 val = vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; | ||
129 | |||
130 | val &= ARMV8_PMU_PMCR_N_MASK; | ||
131 | if (val == 0) | ||
132 | return BIT(ARMV8_PMU_CYCLE_IDX); | ||
133 | else | ||
134 | return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX); | ||
135 | } | ||
136 | |||
137 | /** | ||
138 | * kvm_pmu_enable_counter - enable selected PMU counter | ||
139 | * @vcpu: The vcpu pointer | ||
140 | * @val: the value guest writes to PMCNTENSET register | ||
141 | * | ||
142 | * Call perf_event_enable to start counting the perf event | ||
143 | */ | ||
144 | void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) | ||
145 | { | ||
146 | int i; | ||
147 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
148 | struct kvm_pmc *pmc; | ||
149 | |||
150 | if (!(vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) | ||
151 | return; | ||
152 | |||
153 | for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { | ||
154 | if (!(val & BIT(i))) | ||
155 | continue; | ||
156 | |||
157 | pmc = &pmu->pmc[i]; | ||
158 | if (pmc->perf_event) { | ||
159 | perf_event_enable(pmc->perf_event); | ||
160 | if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) | ||
161 | kvm_debug("fail to enable perf event\n"); | ||
162 | } | ||
163 | } | ||
164 | } | ||
165 | |||
166 | /** | ||
167 | * kvm_pmu_disable_counter - disable selected PMU counter | ||
168 | * @vcpu: The vcpu pointer | ||
169 | * @val: the value guest writes to PMCNTENCLR register | ||
170 | * | ||
171 | * Call perf_event_disable to stop counting the perf event | ||
172 | */ | ||
173 | void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val) | ||
174 | { | ||
175 | int i; | ||
176 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
177 | struct kvm_pmc *pmc; | ||
178 | |||
179 | if (!val) | ||
180 | return; | ||
181 | |||
182 | for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { | ||
183 | if (!(val & BIT(i))) | ||
184 | continue; | ||
185 | |||
186 | pmc = &pmu->pmc[i]; | ||
187 | if (pmc->perf_event) | ||
188 | perf_event_disable(pmc->perf_event); | ||
189 | } | ||
190 | } | ||
191 | |||
192 | static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) | ||
193 | { | ||
194 | u64 reg = 0; | ||
195 | |||
196 | if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) | ||
197 | reg = vcpu_sys_reg(vcpu, PMOVSSET_EL0); | ||
198 | reg &= vcpu_sys_reg(vcpu, PMCNTENSET_EL0); | ||
199 | reg &= vcpu_sys_reg(vcpu, PMINTENSET_EL1); | ||
200 | reg &= kvm_pmu_valid_counter_mask(vcpu); | ||
201 | |||
202 | return reg; | ||
203 | } | ||
204 | |||
205 | /** | ||
206 | * kvm_pmu_overflow_set - set PMU overflow interrupt | ||
207 | * @vcpu: The vcpu pointer | ||
208 | * @val: the value guest writes to PMOVSSET register | ||
209 | */ | ||
210 | void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) | ||
211 | { | ||
212 | u64 reg; | ||
213 | |||
214 | if (val == 0) | ||
215 | return; | ||
216 | |||
217 | vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= val; | ||
218 | reg = kvm_pmu_overflow_status(vcpu); | ||
219 | if (reg != 0) | ||
220 | kvm_vcpu_kick(vcpu); | ||
221 | } | ||
222 | |||
223 | static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) | ||
224 | { | ||
225 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
226 | bool overflow; | ||
227 | |||
228 | if (!kvm_arm_pmu_v3_ready(vcpu)) | ||
229 | return; | ||
230 | |||
231 | overflow = !!kvm_pmu_overflow_status(vcpu); | ||
232 | if (pmu->irq_level != overflow) { | ||
233 | pmu->irq_level = overflow; | ||
234 | kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, | ||
235 | pmu->irq_num, overflow); | ||
236 | } | ||
237 | } | ||
238 | |||
239 | /** | ||
240 | * kvm_pmu_flush_hwstate - flush pmu state to cpu | ||
241 | * @vcpu: The vcpu pointer | ||
242 | * | ||
243 | * Check if the PMU has overflowed while we were running in the host, and inject | ||
244 | * an interrupt if that was the case. | ||
245 | */ | ||
246 | void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) | ||
247 | { | ||
248 | kvm_pmu_update_state(vcpu); | ||
249 | } | ||
250 | |||
251 | /** | ||
252 | * kvm_pmu_sync_hwstate - sync pmu state from cpu | ||
253 | * @vcpu: The vcpu pointer | ||
254 | * | ||
255 | * Check if the PMU has overflowed while we were running in the guest, and | ||
256 | * inject an interrupt if that was the case. | ||
257 | */ | ||
258 | void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) | ||
259 | { | ||
260 | kvm_pmu_update_state(vcpu); | ||
261 | } | ||
262 | |||
263 | static inline struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) | ||
264 | { | ||
265 | struct kvm_pmu *pmu; | ||
266 | struct kvm_vcpu_arch *vcpu_arch; | ||
267 | |||
268 | pmc -= pmc->idx; | ||
269 | pmu = container_of(pmc, struct kvm_pmu, pmc[0]); | ||
270 | vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu); | ||
271 | return container_of(vcpu_arch, struct kvm_vcpu, arch); | ||
272 | } | ||
273 | |||
274 | /** | ||
275 | * When perf event overflows, call kvm_pmu_overflow_set to set overflow status. | ||
276 | */ | ||
277 | static void kvm_pmu_perf_overflow(struct perf_event *perf_event, | ||
278 | struct perf_sample_data *data, | ||
279 | struct pt_regs *regs) | ||
280 | { | ||
281 | struct kvm_pmc *pmc = perf_event->overflow_handler_context; | ||
282 | struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); | ||
283 | int idx = pmc->idx; | ||
284 | |||
285 | kvm_pmu_overflow_set(vcpu, BIT(idx)); | ||
286 | } | ||
287 | |||
288 | /** | ||
289 | * kvm_pmu_software_increment - do software increment | ||
290 | * @vcpu: The vcpu pointer | ||
291 | * @val: the value guest writes to PMSWINC register | ||
292 | */ | ||
293 | void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) | ||
294 | { | ||
295 | int i; | ||
296 | u64 type, enable, reg; | ||
297 | |||
298 | if (val == 0) | ||
299 | return; | ||
300 | |||
301 | enable = vcpu_sys_reg(vcpu, PMCNTENSET_EL0); | ||
302 | for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { | ||
303 | if (!(val & BIT(i))) | ||
304 | continue; | ||
305 | type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i) | ||
306 | & ARMV8_PMU_EVTYPE_EVENT; | ||
307 | if ((type == ARMV8_PMU_EVTYPE_EVENT_SW_INCR) | ||
308 | && (enable & BIT(i))) { | ||
309 | reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; | ||
310 | reg = lower_32_bits(reg); | ||
311 | vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; | ||
312 | if (!reg) | ||
313 | kvm_pmu_overflow_set(vcpu, BIT(i)); | ||
314 | } | ||
315 | } | ||
316 | } | ||
317 | |||
318 | /** | ||
319 | * kvm_pmu_handle_pmcr - handle PMCR register | ||
320 | * @vcpu: The vcpu pointer | ||
321 | * @val: the value guest writes to PMCR register | ||
322 | */ | ||
323 | void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) | ||
324 | { | ||
325 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
326 | struct kvm_pmc *pmc; | ||
327 | u64 mask; | ||
328 | int i; | ||
329 | |||
330 | mask = kvm_pmu_valid_counter_mask(vcpu); | ||
331 | if (val & ARMV8_PMU_PMCR_E) { | ||
332 | kvm_pmu_enable_counter(vcpu, | ||
333 | vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask); | ||
334 | } else { | ||
335 | kvm_pmu_disable_counter(vcpu, mask); | ||
336 | } | ||
337 | |||
338 | if (val & ARMV8_PMU_PMCR_C) | ||
339 | kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); | ||
340 | |||
341 | if (val & ARMV8_PMU_PMCR_P) { | ||
342 | for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) | ||
343 | kvm_pmu_set_counter_value(vcpu, i, 0); | ||
344 | } | ||
345 | |||
346 | if (val & ARMV8_PMU_PMCR_LC) { | ||
347 | pmc = &pmu->pmc[ARMV8_PMU_CYCLE_IDX]; | ||
348 | pmc->bitmask = 0xffffffffffffffffUL; | ||
349 | } | ||
350 | } | ||
351 | |||
352 | static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) | ||
353 | { | ||
354 | return (vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && | ||
355 | (vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); | ||
356 | } | ||
357 | |||
358 | /** | ||
359 | * kvm_pmu_set_counter_event_type - set selected counter to monitor some event | ||
360 | * @vcpu: The vcpu pointer | ||
361 | * @data: The data guest writes to PMXEVTYPER_EL0 | ||
362 | * @select_idx: The number of selected counter | ||
363 | * | ||
364 | * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an | ||
365 | * event with given hardware event number. Here we call perf_event API to | ||
366 | * emulate this action and create a kernel perf event for it. | ||
367 | */ | ||
368 | void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, | ||
369 | u64 select_idx) | ||
370 | { | ||
371 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
372 | struct kvm_pmc *pmc = &pmu->pmc[select_idx]; | ||
373 | struct perf_event *event; | ||
374 | struct perf_event_attr attr; | ||
375 | u64 eventsel, counter; | ||
376 | |||
377 | kvm_pmu_stop_counter(vcpu, pmc); | ||
378 | eventsel = data & ARMV8_PMU_EVTYPE_EVENT; | ||
379 | |||
380 | /* Software increment event does't need to be backed by a perf event */ | ||
381 | if (eventsel == ARMV8_PMU_EVTYPE_EVENT_SW_INCR) | ||
382 | return; | ||
383 | |||
384 | memset(&attr, 0, sizeof(struct perf_event_attr)); | ||
385 | attr.type = PERF_TYPE_RAW; | ||
386 | attr.size = sizeof(attr); | ||
387 | attr.pinned = 1; | ||
388 | attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, select_idx); | ||
389 | attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; | ||
390 | attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; | ||
391 | attr.exclude_hv = 1; /* Don't count EL2 events */ | ||
392 | attr.exclude_host = 1; /* Don't count host events */ | ||
393 | attr.config = eventsel; | ||
394 | |||
395 | counter = kvm_pmu_get_counter_value(vcpu, select_idx); | ||
396 | /* The initial sample period (overflow count) of an event. */ | ||
397 | attr.sample_period = (-counter) & pmc->bitmask; | ||
398 | |||
399 | event = perf_event_create_kernel_counter(&attr, -1, current, | ||
400 | kvm_pmu_perf_overflow, pmc); | ||
401 | if (IS_ERR(event)) { | ||
402 | pr_err_once("kvm: pmu event creation failed %ld\n", | ||
403 | PTR_ERR(event)); | ||
404 | return; | ||
405 | } | ||
406 | |||
407 | pmc->perf_event = event; | ||
408 | } | ||
409 | |||
410 | bool kvm_arm_support_pmu_v3(void) | ||
411 | { | ||
412 | /* | ||
413 | * Check if HW_PERF_EVENTS are supported by checking the number of | ||
414 | * hardware performance counters. This could ensure the presence of | ||
415 | * a physical PMU and CONFIG_PERF_EVENT is selected. | ||
416 | */ | ||
417 | return (perf_num_counters() > 0); | ||
418 | } | ||
419 | |||
420 | static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) | ||
421 | { | ||
422 | if (!kvm_arm_support_pmu_v3()) | ||
423 | return -ENODEV; | ||
424 | |||
425 | if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features) || | ||
426 | !kvm_arm_pmu_irq_initialized(vcpu)) | ||
427 | return -ENXIO; | ||
428 | |||
429 | if (kvm_arm_pmu_v3_ready(vcpu)) | ||
430 | return -EBUSY; | ||
431 | |||
432 | kvm_pmu_vcpu_reset(vcpu); | ||
433 | vcpu->arch.pmu.ready = true; | ||
434 | |||
435 | return 0; | ||
436 | } | ||
437 | |||
438 | static bool irq_is_valid(struct kvm *kvm, int irq, bool is_ppi) | ||
439 | { | ||
440 | int i; | ||
441 | struct kvm_vcpu *vcpu; | ||
442 | |||
443 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
444 | if (!kvm_arm_pmu_irq_initialized(vcpu)) | ||
445 | continue; | ||
446 | |||
447 | if (is_ppi) { | ||
448 | if (vcpu->arch.pmu.irq_num != irq) | ||
449 | return false; | ||
450 | } else { | ||
451 | if (vcpu->arch.pmu.irq_num == irq) | ||
452 | return false; | ||
453 | } | ||
454 | } | ||
455 | |||
456 | return true; | ||
457 | } | ||
458 | |||
459 | |||
460 | int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) | ||
461 | { | ||
462 | switch (attr->attr) { | ||
463 | case KVM_ARM_VCPU_PMU_V3_IRQ: { | ||
464 | int __user *uaddr = (int __user *)(long)attr->addr; | ||
465 | int irq; | ||
466 | |||
467 | if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) | ||
468 | return -ENODEV; | ||
469 | |||
470 | if (get_user(irq, uaddr)) | ||
471 | return -EFAULT; | ||
472 | |||
473 | /* | ||
474 | * The PMU overflow interrupt could be a PPI or SPI, but for one | ||
475 | * VM the interrupt type must be same for each vcpu. As a PPI, | ||
476 | * the interrupt number is the same for all vcpus, while as an | ||
477 | * SPI it must be a separate number per vcpu. | ||
478 | */ | ||
479 | if (irq < VGIC_NR_SGIS || irq >= vcpu->kvm->arch.vgic.nr_irqs || | ||
480 | !irq_is_valid(vcpu->kvm, irq, irq < VGIC_NR_PRIVATE_IRQS)) | ||
481 | return -EINVAL; | ||
482 | |||
483 | if (kvm_arm_pmu_irq_initialized(vcpu)) | ||
484 | return -EBUSY; | ||
485 | |||
486 | kvm_debug("Set kvm ARM PMU irq: %d\n", irq); | ||
487 | vcpu->arch.pmu.irq_num = irq; | ||
488 | return 0; | ||
489 | } | ||
490 | case KVM_ARM_VCPU_PMU_V3_INIT: | ||
491 | return kvm_arm_pmu_v3_init(vcpu); | ||
492 | } | ||
493 | |||
494 | return -ENXIO; | ||
495 | } | ||
496 | |||
497 | int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) | ||
498 | { | ||
499 | switch (attr->attr) { | ||
500 | case KVM_ARM_VCPU_PMU_V3_IRQ: { | ||
501 | int __user *uaddr = (int __user *)(long)attr->addr; | ||
502 | int irq; | ||
503 | |||
504 | if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) | ||
505 | return -ENODEV; | ||
506 | |||
507 | if (!kvm_arm_pmu_irq_initialized(vcpu)) | ||
508 | return -ENXIO; | ||
509 | |||
510 | irq = vcpu->arch.pmu.irq_num; | ||
511 | return put_user(irq, uaddr); | ||
512 | } | ||
513 | } | ||
514 | |||
515 | return -ENXIO; | ||
516 | } | ||
517 | |||
518 | int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) | ||
519 | { | ||
520 | switch (attr->attr) { | ||
521 | case KVM_ARM_VCPU_PMU_V3_IRQ: | ||
522 | case KVM_ARM_VCPU_PMU_V3_INIT: | ||
523 | if (kvm_arm_support_pmu_v3() && | ||
524 | test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) | ||
525 | return 0; | ||
526 | } | ||
527 | |||
528 | return -ENXIO; | ||
529 | } | ||
diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c index 13907970d11c..1b0bee095427 100644 --- a/virt/kvm/arm/vgic-v2-emul.c +++ b/virt/kvm/arm/vgic-v2-emul.c | |||
@@ -321,6 +321,11 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, | |||
321 | 321 | ||
322 | static const struct vgic_io_range vgic_dist_ranges[] = { | 322 | static const struct vgic_io_range vgic_dist_ranges[] = { |
323 | { | 323 | { |
324 | .base = GIC_DIST_SOFTINT, | ||
325 | .len = 4, | ||
326 | .handle_mmio = handle_mmio_sgi_reg, | ||
327 | }, | ||
328 | { | ||
324 | .base = GIC_DIST_CTRL, | 329 | .base = GIC_DIST_CTRL, |
325 | .len = 12, | 330 | .len = 12, |
326 | .bits_per_irq = 0, | 331 | .bits_per_irq = 0, |
@@ -387,11 +392,6 @@ static const struct vgic_io_range vgic_dist_ranges[] = { | |||
387 | .handle_mmio = handle_mmio_cfg_reg, | 392 | .handle_mmio = handle_mmio_cfg_reg, |
388 | }, | 393 | }, |
389 | { | 394 | { |
390 | .base = GIC_DIST_SOFTINT, | ||
391 | .len = 4, | ||
392 | .handle_mmio = handle_mmio_sgi_reg, | ||
393 | }, | ||
394 | { | ||
395 | .base = GIC_DIST_SGI_PENDING_CLEAR, | 395 | .base = GIC_DIST_SGI_PENDING_CLEAR, |
396 | .len = VGIC_NR_SGIS, | 396 | .len = VGIC_NR_SGIS, |
397 | .handle_mmio = handle_mmio_sgi_clear, | 397 | .handle_mmio = handle_mmio_sgi_clear, |
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index ff02f08df74d..67ec334ce1d0 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c | |||
@@ -176,6 +176,15 @@ static const struct vgic_ops vgic_v2_ops = { | |||
176 | 176 | ||
177 | static struct vgic_params vgic_v2_params; | 177 | static struct vgic_params vgic_v2_params; |
178 | 178 | ||
179 | static void vgic_cpu_init_lrs(void *params) | ||
180 | { | ||
181 | struct vgic_params *vgic = params; | ||
182 | int i; | ||
183 | |||
184 | for (i = 0; i < vgic->nr_lr; i++) | ||
185 | writel_relaxed(0, vgic->vctrl_base + GICH_LR0 + (i * 4)); | ||
186 | } | ||
187 | |||
179 | /** | 188 | /** |
180 | * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT | 189 | * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT |
181 | * @node: pointer to the DT node | 190 | * @node: pointer to the DT node |
@@ -257,6 +266,9 @@ int vgic_v2_probe(struct device_node *vgic_node, | |||
257 | 266 | ||
258 | vgic->type = VGIC_V2; | 267 | vgic->type = VGIC_V2; |
259 | vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS; | 268 | vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS; |
269 | |||
270 | on_each_cpu(vgic_cpu_init_lrs, vgic, 1); | ||
271 | |||
260 | *ops = &vgic_v2_ops; | 272 | *ops = &vgic_v2_ops; |
261 | *params = vgic; | 273 | *params = vgic; |
262 | goto out; | 274 | goto out; |
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index 453eafd4dd6e..999bdc6d9d9f 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c | |||
@@ -42,7 +42,7 @@ static u32 ich_vtr_el2; | |||
42 | static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) | 42 | static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) |
43 | { | 43 | { |
44 | struct vgic_lr lr_desc; | 44 | struct vgic_lr lr_desc; |
45 | u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[VGIC_V3_LR_INDEX(lr)]; | 45 | u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr]; |
46 | 46 | ||
47 | if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) | 47 | if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) |
48 | lr_desc.irq = val & ICH_LR_VIRTUALID_MASK; | 48 | lr_desc.irq = val & ICH_LR_VIRTUALID_MASK; |
@@ -106,7 +106,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, | |||
106 | lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT; | 106 | lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT; |
107 | } | 107 | } |
108 | 108 | ||
109 | vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[VGIC_V3_LR_INDEX(lr)] = lr_val; | 109 | vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = lr_val; |
110 | 110 | ||
111 | if (!(lr_desc.state & LR_STATE_MASK)) | 111 | if (!(lr_desc.state & LR_STATE_MASK)) |
112 | vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); | 112 | vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); |
@@ -216,6 +216,11 @@ static const struct vgic_ops vgic_v3_ops = { | |||
216 | 216 | ||
217 | static struct vgic_params vgic_v3_params; | 217 | static struct vgic_params vgic_v3_params; |
218 | 218 | ||
219 | static void vgic_cpu_init_lrs(void *params) | ||
220 | { | ||
221 | kvm_call_hyp(__vgic_v3_init_lrs); | ||
222 | } | ||
223 | |||
219 | /** | 224 | /** |
220 | * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT | 225 | * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT |
221 | * @node: pointer to the DT node | 226 | * @node: pointer to the DT node |
@@ -284,6 +289,8 @@ int vgic_v3_probe(struct device_node *vgic_node, | |||
284 | kvm_info("%s@%llx IRQ%d\n", vgic_node->name, | 289 | kvm_info("%s@%llx IRQ%d\n", vgic_node->name, |
285 | vcpu_res.start, vgic->maint_irq); | 290 | vcpu_res.start, vgic->maint_irq); |
286 | 291 | ||
292 | on_each_cpu(vgic_cpu_init_lrs, vgic, 1); | ||
293 | |||
287 | *ops = &vgic_v3_ops; | 294 | *ops = &vgic_v3_ops; |
288 | *params = vgic; | 295 | *params = vgic; |
289 | 296 | ||
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 65da997b430a..f0d061f92674 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c | |||
@@ -109,8 +109,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) | |||
109 | /* cancel outstanding work queue item */ | 109 | /* cancel outstanding work queue item */ |
110 | while (!list_empty(&vcpu->async_pf.queue)) { | 110 | while (!list_empty(&vcpu->async_pf.queue)) { |
111 | struct kvm_async_pf *work = | 111 | struct kvm_async_pf *work = |
112 | list_entry(vcpu->async_pf.queue.next, | 112 | list_first_entry(&vcpu->async_pf.queue, |
113 | typeof(*work), queue); | 113 | typeof(*work), queue); |
114 | list_del(&work->queue); | 114 | list_del(&work->queue); |
115 | 115 | ||
116 | #ifdef CONFIG_KVM_ASYNC_PF_SYNC | 116 | #ifdef CONFIG_KVM_ASYNC_PF_SYNC |
@@ -127,8 +127,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) | |||
127 | spin_lock(&vcpu->async_pf.lock); | 127 | spin_lock(&vcpu->async_pf.lock); |
128 | while (!list_empty(&vcpu->async_pf.done)) { | 128 | while (!list_empty(&vcpu->async_pf.done)) { |
129 | struct kvm_async_pf *work = | 129 | struct kvm_async_pf *work = |
130 | list_entry(vcpu->async_pf.done.next, | 130 | list_first_entry(&vcpu->async_pf.done, |
131 | typeof(*work), link); | 131 | typeof(*work), link); |
132 | list_del(&work->link); | 132 | list_del(&work->link); |
133 | kmem_cache_free(async_pf_cache, work); | 133 | kmem_cache_free(async_pf_cache, work); |
134 | } | 134 | } |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5af50c3ddd53..7ba1d10ffed2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -72,11 +72,11 @@ module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); | |||
72 | 72 | ||
73 | /* Default doubles per-vcpu halt_poll_ns. */ | 73 | /* Default doubles per-vcpu halt_poll_ns. */ |
74 | static unsigned int halt_poll_ns_grow = 2; | 74 | static unsigned int halt_poll_ns_grow = 2; |
75 | module_param(halt_poll_ns_grow, int, S_IRUGO); | 75 | module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR); |
76 | 76 | ||
77 | /* Default resets per-vcpu halt_poll_ns . */ | 77 | /* Default resets per-vcpu halt_poll_ns . */ |
78 | static unsigned int halt_poll_ns_shrink; | 78 | static unsigned int halt_poll_ns_shrink; |
79 | module_param(halt_poll_ns_shrink, int, S_IRUGO); | 79 | module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR); |
80 | 80 | ||
81 | /* | 81 | /* |
82 | * Ordering of locks: | 82 | * Ordering of locks: |
@@ -619,13 +619,10 @@ void *kvm_kvzalloc(unsigned long size) | |||
619 | 619 | ||
620 | static void kvm_destroy_devices(struct kvm *kvm) | 620 | static void kvm_destroy_devices(struct kvm *kvm) |
621 | { | 621 | { |
622 | struct list_head *node, *tmp; | 622 | struct kvm_device *dev, *tmp; |
623 | 623 | ||
624 | list_for_each_safe(node, tmp, &kvm->devices) { | 624 | list_for_each_entry_safe(dev, tmp, &kvm->devices, vm_node) { |
625 | struct kvm_device *dev = | 625 | list_del(&dev->vm_node); |
626 | list_entry(node, struct kvm_device, vm_node); | ||
627 | |||
628 | list_del(node); | ||
629 | dev->ops->destroy(dev); | 626 | dev->ops->destroy(dev); |
630 | } | 627 | } |
631 | } | 628 | } |
@@ -1436,11 +1433,17 @@ kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, | |||
1436 | { | 1433 | { |
1437 | unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault); | 1434 | unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault); |
1438 | 1435 | ||
1439 | if (addr == KVM_HVA_ERR_RO_BAD) | 1436 | if (addr == KVM_HVA_ERR_RO_BAD) { |
1437 | if (writable) | ||
1438 | *writable = false; | ||
1440 | return KVM_PFN_ERR_RO_FAULT; | 1439 | return KVM_PFN_ERR_RO_FAULT; |
1440 | } | ||
1441 | 1441 | ||
1442 | if (kvm_is_error_hva(addr)) | 1442 | if (kvm_is_error_hva(addr)) { |
1443 | if (writable) | ||
1444 | *writable = false; | ||
1443 | return KVM_PFN_NOSLOT; | 1445 | return KVM_PFN_NOSLOT; |
1446 | } | ||
1444 | 1447 | ||
1445 | /* Do not map writable pfn in the readonly memslot. */ | 1448 | /* Do not map writable pfn in the readonly memslot. */ |
1446 | if (writable && memslot_is_readonly(slot)) { | 1449 | if (writable && memslot_is_readonly(slot)) { |
@@ -1942,14 +1945,15 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); | |||
1942 | 1945 | ||
1943 | static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) | 1946 | static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) |
1944 | { | 1947 | { |
1945 | int old, val; | 1948 | unsigned int old, val, grow; |
1946 | 1949 | ||
1947 | old = val = vcpu->halt_poll_ns; | 1950 | old = val = vcpu->halt_poll_ns; |
1951 | grow = READ_ONCE(halt_poll_ns_grow); | ||
1948 | /* 10us base */ | 1952 | /* 10us base */ |
1949 | if (val == 0 && halt_poll_ns_grow) | 1953 | if (val == 0 && grow) |
1950 | val = 10000; | 1954 | val = 10000; |
1951 | else | 1955 | else |
1952 | val *= halt_poll_ns_grow; | 1956 | val *= grow; |
1953 | 1957 | ||
1954 | if (val > halt_poll_ns) | 1958 | if (val > halt_poll_ns) |
1955 | val = halt_poll_ns; | 1959 | val = halt_poll_ns; |
@@ -1960,13 +1964,14 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) | |||
1960 | 1964 | ||
1961 | static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu) | 1965 | static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu) |
1962 | { | 1966 | { |
1963 | int old, val; | 1967 | unsigned int old, val, shrink; |
1964 | 1968 | ||
1965 | old = val = vcpu->halt_poll_ns; | 1969 | old = val = vcpu->halt_poll_ns; |
1966 | if (halt_poll_ns_shrink == 0) | 1970 | shrink = READ_ONCE(halt_poll_ns_shrink); |
1971 | if (shrink == 0) | ||
1967 | val = 0; | 1972 | val = 0; |
1968 | else | 1973 | else |
1969 | val /= halt_poll_ns_shrink; | 1974 | val /= shrink; |
1970 | 1975 | ||
1971 | vcpu->halt_poll_ns = val; | 1976 | vcpu->halt_poll_ns = val; |
1972 | trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old); | 1977 | trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old); |