aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-03-16 12:55:35 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-16 12:55:35 -0400
commit10dc3747661bea9215417b659449bb7b8ed3df2c (patch)
treed943974b4941203a7db2fabe4896852cf0f16bc4
parent047486d8e7c2a7e8d75b068b69cb67b47364f5d4 (diff)
parentf958ee745f70b60d0e41927cab2c073104bc70c2 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "One of the largest releases for KVM... Hardly any generic changes, but lots of architecture-specific updates. ARM: - VHE support so that we can run the kernel at EL2 on ARMv8.1 systems - PMU support for guests - 32bit world switch rewritten in C - various optimizations to the vgic save/restore code. PPC: - enabled KVM-VFIO integration ("VFIO device") - optimizations to speed up IPIs between vcpus - in-kernel handling of IOMMU hypercalls - support for dynamic DMA windows (DDW). s390: - provide the floating point registers via sync regs; - separated instruction vs. data accesses - dirty log improvements for huge guests - bugfixes and documentation improvements. x86: - Hyper-V VMBus hypercall userspace exit - alternative implementation of lowest-priority interrupts using vector hashing (for better VT-d posted interrupt support) - fixed guest debugging with nested virtualizations - improved interrupt tracking in the in-kernel IOAPIC - generic infrastructure for tracking writes to guest memory - currently its only use is to speedup the legacy shadow paging (pre-EPT) case, but in the future it will be used for virtual GPUs as well - much cleanup (LAPIC, kvmclock, MMU, PIT), including ubsan fixes" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (217 commits) KVM: x86: remove eager_fpu field of struct kvm_vcpu_arch KVM: x86: disable MPX if host did not enable MPX XSAVE features arm64: KVM: vgic-v3: Only wipe LRs on vcpu exit arm64: KVM: vgic-v3: Reset LRs at boot time arm64: KVM: vgic-v3: Do not save an LR known to be empty arm64: KVM: vgic-v3: Save maintenance interrupt state only if required arm64: KVM: vgic-v3: Avoid accessing ICH registers KVM: arm/arm64: vgic-v2: Make GICD_SGIR quicker to hit KVM: arm/arm64: vgic-v2: Only wipe LRs on vcpu exit KVM: arm/arm64: vgic-v2: Reset LRs at boot time KVM: arm/arm64: vgic-v2: Do not save an LR known to be empty KVM: arm/arm64: vgic-v2: Move GICH_ELRSR saving to its own function KVM: arm/arm64: vgic-v2: Save maintenance interrupt state only if required KVM: arm/arm64: vgic-v2: Avoid accessing GICH registers KVM: s390: allocate only one DMA page per VM KVM: s390: enable STFLE interpretation only if enabled for the guest KVM: s390: wake up when the VCPU cpu timer expires KVM: s390: step the VCPU timer while in enabled wait KVM: s390: protect VCPU cpu timer with a seqcount KVM: s390: step VCPU cpu timer during kvm_run ioctl ...
-rw-r--r--Documentation/virtual/kvm/api.txt99
-rw-r--r--Documentation/virtual/kvm/devices/s390_flic.txt2
-rw-r--r--Documentation/virtual/kvm/devices/vcpu.txt33
-rw-r--r--Documentation/virtual/kvm/devices/vm.txt52
-rw-r--r--Documentation/virtual/kvm/mmu.txt6
-rw-r--r--arch/arm/include/asm/kvm_asm.h41
-rw-r--r--arch/arm/include/asm/kvm_emulate.h20
-rw-r--r--arch/arm/include/asm/kvm_host.h80
-rw-r--r--arch/arm/include/asm/kvm_hyp.h139
-rw-r--r--arch/arm/include/asm/kvm_mmu.h2
-rw-r--r--arch/arm/include/asm/virt.h9
-rw-r--r--arch/arm/kernel/asm-offsets.c40
-rw-r--r--arch/arm/kernel/vmlinux.lds.S6
-rw-r--r--arch/arm/kvm/Makefile1
-rw-r--r--arch/arm/kvm/arm.c244
-rw-r--r--arch/arm/kvm/coproc.c126
-rw-r--r--arch/arm/kvm/coproc.h24
-rw-r--r--arch/arm/kvm/emulate.c34
-rw-r--r--arch/arm/kvm/guest.c5
-rw-r--r--arch/arm/kvm/handle_exit.c7
-rw-r--r--arch/arm/kvm/hyp/Makefile17
-rw-r--r--arch/arm/kvm/hyp/banked-sr.c77
-rw-r--r--arch/arm/kvm/hyp/cp15-sr.c84
-rw-r--r--arch/arm/kvm/hyp/entry.S101
-rw-r--r--arch/arm/kvm/hyp/hyp-entry.S169
-rw-r--r--arch/arm/kvm/hyp/s2-setup.c33
-rw-r--r--arch/arm/kvm/hyp/switch.c232
-rw-r--r--arch/arm/kvm/hyp/tlb.c70
-rw-r--r--arch/arm/kvm/hyp/vfp.S68
-rw-r--r--arch/arm/kvm/init.S8
-rw-r--r--arch/arm/kvm/interrupts.S480
-rw-r--r--arch/arm/kvm/interrupts_head.S648
-rw-r--r--arch/arm/kvm/mmu.c23
-rw-r--r--arch/arm/kvm/reset.c2
-rw-r--r--arch/arm64/Kconfig13
-rw-r--r--arch/arm64/include/asm/cpufeature.h6
-rw-r--r--arch/arm64/include/asm/hw_breakpoint.h18
-rw-r--r--arch/arm64/include/asm/kvm_arm.h6
-rw-r--r--arch/arm64/include/asm/kvm_asm.h6
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h8
-rw-r--r--arch/arm64/include/asm/kvm_host.h34
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h181
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h12
-rw-r--r--arch/arm64/include/asm/kvm_perf_event.h68
-rw-r--r--arch/arm64/include/asm/virt.h10
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h6
-rw-r--r--arch/arm64/kernel/asm-offsets.c3
-rw-r--r--arch/arm64/kernel/cpufeature.c11
-rw-r--r--arch/arm64/kernel/head.S28
-rw-r--r--arch/arm64/kernel/perf_event.c6
-rw-r--r--arch/arm64/kvm/Kconfig7
-rw-r--r--arch/arm64/kvm/Makefile1
-rw-r--r--arch/arm64/kvm/guest.c51
-rw-r--r--arch/arm64/kvm/hyp-init.S15
-rw-r--r--arch/arm64/kvm/hyp.S7
-rw-r--r--arch/arm64/kvm/hyp/Makefile8
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c4
-rw-r--r--arch/arm64/kvm/hyp/entry.S6
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S109
-rw-r--r--arch/arm64/kvm/hyp/hyp.h90
-rw-r--r--arch/arm64/kvm/hyp/s2-setup.c43
-rw-r--r--arch/arm64/kvm/hyp/switch.c206
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c149
-rw-r--r--arch/arm64/kvm/hyp/tlb.c2
-rw-r--r--arch/arm64/kvm/hyp/vgic-v2-sr.c84
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c341
-rw-r--r--arch/arm64/kvm/reset.c7
-rw-r--r--arch/arm64/kvm/sys_regs.c609
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h5
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h51
-rw-r--r--arch/powerpc/include/asm/pgtable.h3
-rw-r--r--arch/powerpc/include/asm/smp.h4
-rw-r--r--arch/powerpc/include/asm/xics.h1
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h9
-rw-r--r--arch/powerpc/kernel/smp.c28
-rw-r--r--arch/powerpc/kvm/Makefile2
-rw-r--r--arch/powerpc/kvm/book3s.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c156
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c330
-rw-r--r--arch/powerpc/kvm/book3s_hv.c192
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c131
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S4
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c35
-rw-r--r--arch/powerpc/kvm/powerpc.c38
-rw-r--r--arch/powerpc/mm/pgtable.c8
-rw-r--r--arch/powerpc/perf/hv-24x7.c8
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c21
-rw-r--r--arch/s390/include/asm/kvm_host.h41
-rw-r--r--arch/s390/include/uapi/asm/kvm.h8
-rw-r--r--arch/s390/include/uapi/asm/sie.h1
-rw-r--r--arch/s390/kvm/gaccess.c57
-rw-r--r--arch/s390/kvm/gaccess.h38
-rw-r--r--arch/s390/kvm/intercept.c78
-rw-r--r--arch/s390/kvm/interrupt.c93
-rw-r--r--arch/s390/kvm/kvm-s390.c235
-rw-r--r--arch/s390/kvm/kvm-s390.h28
-rw-r--r--arch/s390/kvm/priv.c15
-rw-r--r--arch/x86/include/asm/kvm_host.h31
-rw-r--r--arch/x86/include/asm/kvm_page_track.h61
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h4
-rw-r--r--arch/x86/kvm/Makefile3
-rw-r--r--arch/x86/kvm/assigned-dev.c14
-rw-r--r--arch/x86/kvm/cpuid.c14
-rw-r--r--arch/x86/kvm/cpuid.h9
-rw-r--r--arch/x86/kvm/hyperv.c50
-rw-r--r--arch/x86/kvm/i8254.c350
-rw-r--r--arch/x86/kvm/i8254.h17
-rw-r--r--arch/x86/kvm/ioapic.c30
-rw-r--r--arch/x86/kvm/ioapic.h17
-rw-r--r--arch/x86/kvm/irq.c9
-rw-r--r--arch/x86/kvm/irq.h8
-rw-r--r--arch/x86/kvm/irq_comm.c27
-rw-r--r--arch/x86/kvm/lapic.c156
-rw-r--r--arch/x86/kvm/lapic.h17
-rw-r--r--arch/x86/kvm/mmu.c502
-rw-r--r--arch/x86/kvm/mmu.h5
-rw-r--r--arch/x86/kvm/page_track.c222
-rw-r--r--arch/x86/kvm/paging_tmpl.h35
-rw-r--r--arch/x86/kvm/pmu.c2
-rw-r--r--arch/x86/kvm/svm.c3
-rw-r--r--arch/x86/kvm/trace.h12
-rw-r--r--arch/x86/kvm/vmx.c85
-rw-r--r--arch/x86/kvm/x86.c158
-rw-r--r--arch/x86/kvm/x86.h16
-rw-r--r--drivers/clocksource/arm_arch_timer.c96
-rw-r--r--drivers/hv/hyperv_vmbus.h6
-rw-r--r--include/kvm/arm_arch_timer.h5
-rw-r--r--include/kvm/arm_pmu.h110
-rw-r--r--include/kvm/arm_vgic.h8
-rw-r--r--include/trace/events/kvm.h9
-rw-r--r--include/uapi/linux/kvm.h19
-rw-r--r--virt/kvm/arm/arch_timer.c31
-rw-r--r--virt/kvm/arm/hyp/timer-sr.c (renamed from arch/arm64/kvm/hyp/timer-sr.c)14
-rw-r--r--virt/kvm/arm/hyp/vgic-v2-sr.c170
-rw-r--r--virt/kvm/arm/pmu.c529
-rw-r--r--virt/kvm/arm/vgic-v2-emul.c10
-rw-r--r--virt/kvm/arm/vgic-v2.c12
-rw-r--r--virt/kvm/arm/vgic-v3.c11
-rw-r--r--virt/kvm/async_pf.c8
-rw-r--r--virt/kvm/kvm_main.c37
142 files changed, 6757 insertions, 2939 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 07e4cdf02407..4d0542c5206b 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2507,8 +2507,9 @@ struct kvm_create_device {
2507 2507
25084.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR 25084.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
2509 2509
2510Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device 2510Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
2511Type: device ioctl, vm ioctl 2511 KVM_CAP_VCPU_ATTRIBUTES for vcpu device
2512Type: device ioctl, vm ioctl, vcpu ioctl
2512Parameters: struct kvm_device_attr 2513Parameters: struct kvm_device_attr
2513Returns: 0 on success, -1 on error 2514Returns: 0 on success, -1 on error
2514Errors: 2515Errors:
@@ -2533,8 +2534,9 @@ struct kvm_device_attr {
2533 2534
25344.81 KVM_HAS_DEVICE_ATTR 25354.81 KVM_HAS_DEVICE_ATTR
2535 2536
2536Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device 2537Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
2537Type: device ioctl, vm ioctl 2538 KVM_CAP_VCPU_ATTRIBUTES for vcpu device
2539Type: device ioctl, vm ioctl, vcpu ioctl
2538Parameters: struct kvm_device_attr 2540Parameters: struct kvm_device_attr
2539Returns: 0 on success, -1 on error 2541Returns: 0 on success, -1 on error
2540Errors: 2542Errors:
@@ -2577,6 +2579,8 @@ Possible features:
2577 Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). 2579 Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
2578 - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU. 2580 - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU.
2579 Depends on KVM_CAP_ARM_PSCI_0_2. 2581 Depends on KVM_CAP_ARM_PSCI_0_2.
2582 - KVM_ARM_VCPU_PMU_V3: Emulate PMUv3 for the CPU.
2583 Depends on KVM_CAP_ARM_PMU_V3.
2580 2584
2581 2585
25824.83 KVM_ARM_PREFERRED_TARGET 25864.83 KVM_ARM_PREFERRED_TARGET
@@ -3035,6 +3039,87 @@ Returns: 0 on success, -1 on error
3035 3039
3036Queues an SMI on the thread's vcpu. 3040Queues an SMI on the thread's vcpu.
3037 3041
30424.97 KVM_CAP_PPC_MULTITCE
3043
3044Capability: KVM_CAP_PPC_MULTITCE
3045Architectures: ppc
3046Type: vm
3047
3048This capability means the kernel is capable of handling hypercalls
3049H_PUT_TCE_INDIRECT and H_STUFF_TCE without passing those into the user
3050space. This significantly accelerates DMA operations for PPC KVM guests.
3051User space should expect that its handlers for these hypercalls
3052are not going to be called if user space previously registered LIOBN
3053in KVM (via KVM_CREATE_SPAPR_TCE or similar calls).
3054
3055In order to enable H_PUT_TCE_INDIRECT and H_STUFF_TCE use in the guest,
3056user space might have to advertise it for the guest. For example,
3057IBM pSeries (sPAPR) guest starts using them if "hcall-multi-tce" is
3058present in the "ibm,hypertas-functions" device-tree property.
3059
3060The hypercalls mentioned above may or may not be processed successfully
3061in the kernel based fast path. If they can not be handled by the kernel,
3062they will get passed on to user space. So user space still has to have
3063an implementation for these despite the in kernel acceleration.
3064
3065This capability is always enabled.
3066
30674.98 KVM_CREATE_SPAPR_TCE_64
3068
3069Capability: KVM_CAP_SPAPR_TCE_64
3070Architectures: powerpc
3071Type: vm ioctl
3072Parameters: struct kvm_create_spapr_tce_64 (in)
3073Returns: file descriptor for manipulating the created TCE table
3074
3075This is an extension for KVM_CAP_SPAPR_TCE which only supports 32bit
3076windows, described in 4.62 KVM_CREATE_SPAPR_TCE
3077
3078This capability uses extended struct in ioctl interface:
3079
3080/* for KVM_CAP_SPAPR_TCE_64 */
3081struct kvm_create_spapr_tce_64 {
3082 __u64 liobn;
3083 __u32 page_shift;
3084 __u32 flags;
3085 __u64 offset; /* in pages */
3086 __u64 size; /* in pages */
3087};
3088
3089The aim of extension is to support an additional bigger DMA window with
3090a variable page size.
3091KVM_CREATE_SPAPR_TCE_64 receives a 64bit window size, an IOMMU page shift and
3092a bus offset of the corresponding DMA window, @size and @offset are numbers
3093of IOMMU pages.
3094
3095@flags are not used at the moment.
3096
3097The rest of functionality is identical to KVM_CREATE_SPAPR_TCE.
3098
30994.98 KVM_REINJECT_CONTROL
3100
3101Capability: KVM_CAP_REINJECT_CONTROL
3102Architectures: x86
3103Type: vm ioctl
3104Parameters: struct kvm_reinject_control (in)
3105Returns: 0 on success,
3106 -EFAULT if struct kvm_reinject_control cannot be read,
3107 -ENXIO if KVM_CREATE_PIT or KVM_CREATE_PIT2 didn't succeed earlier.
3108
3109i8254 (PIT) has two modes, reinject and !reinject. The default is reinject,
3110where KVM queues elapsed i8254 ticks and monitors completion of interrupt from
3111vector(s) that i8254 injects. Reinject mode dequeues a tick and injects its
3112interrupt whenever there isn't a pending interrupt from i8254.
3113!reinject mode injects an interrupt as soon as a tick arrives.
3114
3115struct kvm_reinject_control {
3116 __u8 pit_reinject;
3117 __u8 reserved[31];
3118};
3119
3120pit_reinject = 0 (!reinject mode) is recommended, unless running an old
3121operating system that uses the PIT for timing (e.g. Linux 2.4.x).
3122
30385. The kvm_run structure 31235. The kvm_run structure
3039------------------------ 3124------------------------
3040 3125
@@ -3339,6 +3424,7 @@ EOI was received.
3339 3424
3340 struct kvm_hyperv_exit { 3425 struct kvm_hyperv_exit {
3341#define KVM_EXIT_HYPERV_SYNIC 1 3426#define KVM_EXIT_HYPERV_SYNIC 1
3427#define KVM_EXIT_HYPERV_HCALL 2
3342 __u32 type; 3428 __u32 type;
3343 union { 3429 union {
3344 struct { 3430 struct {
@@ -3347,6 +3433,11 @@ EOI was received.
3347 __u64 evt_page; 3433 __u64 evt_page;
3348 __u64 msg_page; 3434 __u64 msg_page;
3349 } synic; 3435 } synic;
3436 struct {
3437 __u64 input;
3438 __u64 result;
3439 __u64 params[2];
3440 } hcall;
3350 } u; 3441 } u;
3351 }; 3442 };
3352 /* KVM_EXIT_HYPERV */ 3443 /* KVM_EXIT_HYPERV */
diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt
index d1ad9d5cae46..e3e314cb83e8 100644
--- a/Documentation/virtual/kvm/devices/s390_flic.txt
+++ b/Documentation/virtual/kvm/devices/s390_flic.txt
@@ -88,6 +88,8 @@ struct kvm_s390_io_adapter_req {
88 perform a gmap translation for the guest address provided in addr, 88 perform a gmap translation for the guest address provided in addr,
89 pin a userspace page for the translated address and add it to the 89 pin a userspace page for the translated address and add it to the
90 list of mappings 90 list of mappings
91 Note: A new mapping will be created unconditionally; therefore,
92 the calling code should avoid making duplicate mappings.
91 93
92 KVM_S390_IO_ADAPTER_UNMAP 94 KVM_S390_IO_ADAPTER_UNMAP
93 release a userspace page for the translated address specified in addr 95 release a userspace page for the translated address specified in addr
diff --git a/Documentation/virtual/kvm/devices/vcpu.txt b/Documentation/virtual/kvm/devices/vcpu.txt
new file mode 100644
index 000000000000..c04165868faf
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/vcpu.txt
@@ -0,0 +1,33 @@
1Generic vcpu interface
2====================================
3
4The virtual cpu "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
5KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same struct
6kvm_device_attr as other devices, but targets VCPU-wide settings and controls.
7
8The groups and attributes per virtual cpu, if any, are architecture specific.
9
101. GROUP: KVM_ARM_VCPU_PMU_V3_CTRL
11Architectures: ARM64
12
131.1. ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_IRQ
14Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a
15 pointer to an int
16Returns: -EBUSY: The PMU overflow interrupt is already set
17 -ENXIO: The overflow interrupt not set when attempting to get it
18 -ENODEV: PMUv3 not supported
19 -EINVAL: Invalid PMU overflow interrupt number supplied
20
21A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt
22number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt
23type must be same for each vcpu. As a PPI, the interrupt number is the same for
24all vcpus, while as an SPI it must be a separate number per vcpu.
25
261.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT
27Parameters: no additional parameter in kvm_device_attr.addr
28Returns: -ENODEV: PMUv3 not supported
29 -ENXIO: PMUv3 not properly configured as required prior to calling this
30 attribute
31 -EBUSY: PMUv3 already initialized
32
33Request the initialization of the PMUv3.
diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt
index f083a168eb35..a9ea8774a45f 100644
--- a/Documentation/virtual/kvm/devices/vm.txt
+++ b/Documentation/virtual/kvm/devices/vm.txt
@@ -84,3 +84,55 @@ Returns: -EBUSY in case 1 or more vcpus are already activated (only in write
84 -EFAULT if the given address is not accessible from kernel space 84 -EFAULT if the given address is not accessible from kernel space
85 -ENOMEM if not enough memory is available to process the ioctl 85 -ENOMEM if not enough memory is available to process the ioctl
86 0 in case of success 86 0 in case of success
87
883. GROUP: KVM_S390_VM_TOD
89Architectures: s390
90
913.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH
92
93Allows user space to set/get the TOD clock extension (u8).
94
95Parameters: address of a buffer in user space to store the data (u8) to
96Returns: -EFAULT if the given address is not accessible from kernel space
97 -EINVAL if setting the TOD clock extension to != 0 is not supported
98
993.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW
100
101Allows user space to set/get bits 0-63 of the TOD clock register as defined in
102the POP (u64).
103
104Parameters: address of a buffer in user space to store the data (u64) to
105Returns: -EFAULT if the given address is not accessible from kernel space
106
1074. GROUP: KVM_S390_VM_CRYPTO
108Architectures: s390
109
1104.1. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_AES_KW (w/o)
111
112Allows user space to enable aes key wrapping, including generating a new
113wrapping key.
114
115Parameters: none
116Returns: 0
117
1184.2. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_DEA_KW (w/o)
119
120Allows user space to enable dea key wrapping, including generating a new
121wrapping key.
122
123Parameters: none
124Returns: 0
125
1264.3. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_AES_KW (w/o)
127
128Allows user space to disable aes key wrapping, clearing the wrapping key.
129
130Parameters: none
131Returns: 0
132
1334.4. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_DEA_KW (w/o)
134
135Allows user space to disable dea key wrapping, clearing the wrapping key.
136
137Parameters: none
138Returns: 0
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt
index c81731096a43..481b6a9c25d5 100644
--- a/Documentation/virtual/kvm/mmu.txt
+++ b/Documentation/virtual/kvm/mmu.txt
@@ -392,11 +392,11 @@ To instantiate a large spte, four constraints must be satisfied:
392 write-protected pages 392 write-protected pages
393- the guest page must be wholly contained by a single memory slot 393- the guest page must be wholly contained by a single memory slot
394 394
395To check the last two conditions, the mmu maintains a ->write_count set of 395To check the last two conditions, the mmu maintains a ->disallow_lpage set of
396arrays for each memory slot and large page size. Every write protected page 396arrays for each memory slot and large page size. Every write protected page
397causes its write_count to be incremented, thus preventing instantiation of 397causes its disallow_lpage to be incremented, thus preventing instantiation of
398a large spte. The frames at the end of an unaligned memory slot have 398a large spte. The frames at the end of an unaligned memory slot have
399artificially inflated ->write_counts so they can never be instantiated. 399artificially inflated ->disallow_lpages so they can never be instantiated.
400 400
401Zapping all pages (page generation count) 401Zapping all pages (page generation count)
402========================================= 402=========================================
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 194c91b610ff..15d58b42d5a1 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -19,38 +19,7 @@
19#ifndef __ARM_KVM_ASM_H__ 19#ifndef __ARM_KVM_ASM_H__
20#define __ARM_KVM_ASM_H__ 20#define __ARM_KVM_ASM_H__
21 21
22/* 0 is reserved as an invalid value. */ 22#include <asm/virt.h>
23#define c0_MPIDR 1 /* MultiProcessor ID Register */
24#define c0_CSSELR 2 /* Cache Size Selection Register */
25#define c1_SCTLR 3 /* System Control Register */
26#define c1_ACTLR 4 /* Auxiliary Control Register */
27#define c1_CPACR 5 /* Coprocessor Access Control */
28#define c2_TTBR0 6 /* Translation Table Base Register 0 */
29#define c2_TTBR0_high 7 /* TTBR0 top 32 bits */
30#define c2_TTBR1 8 /* Translation Table Base Register 1 */
31#define c2_TTBR1_high 9 /* TTBR1 top 32 bits */
32#define c2_TTBCR 10 /* Translation Table Base Control R. */
33#define c3_DACR 11 /* Domain Access Control Register */
34#define c5_DFSR 12 /* Data Fault Status Register */
35#define c5_IFSR 13 /* Instruction Fault Status Register */
36#define c5_ADFSR 14 /* Auxilary Data Fault Status R */
37#define c5_AIFSR 15 /* Auxilary Instrunction Fault Status R */
38#define c6_DFAR 16 /* Data Fault Address Register */
39#define c6_IFAR 17 /* Instruction Fault Address Register */
40#define c7_PAR 18 /* Physical Address Register */
41#define c7_PAR_high 19 /* PAR top 32 bits */
42#define c9_L2CTLR 20 /* Cortex A15/A7 L2 Control Register */
43#define c10_PRRR 21 /* Primary Region Remap Register */
44#define c10_NMRR 22 /* Normal Memory Remap Register */
45#define c12_VBAR 23 /* Vector Base Address Register */
46#define c13_CID 24 /* Context ID Register */
47#define c13_TID_URW 25 /* Thread ID, User R/W */
48#define c13_TID_URO 26 /* Thread ID, User R/O */
49#define c13_TID_PRIV 27 /* Thread ID, Privileged */
50#define c14_CNTKCTL 28 /* Timer Control Register (PL1) */
51#define c10_AMAIR0 29 /* Auxilary Memory Attribute Indirection Reg0 */
52#define c10_AMAIR1 30 /* Auxilary Memory Attribute Indirection Reg1 */
53#define NR_CP15_REGS 31 /* Number of regs (incl. invalid) */
54 23
55#define ARM_EXCEPTION_RESET 0 24#define ARM_EXCEPTION_RESET 0
56#define ARM_EXCEPTION_UNDEFINED 1 25#define ARM_EXCEPTION_UNDEFINED 1
@@ -86,19 +55,15 @@ struct kvm_vcpu;
86extern char __kvm_hyp_init[]; 55extern char __kvm_hyp_init[];
87extern char __kvm_hyp_init_end[]; 56extern char __kvm_hyp_init_end[];
88 57
89extern char __kvm_hyp_exit[];
90extern char __kvm_hyp_exit_end[];
91
92extern char __kvm_hyp_vector[]; 58extern char __kvm_hyp_vector[];
93 59
94extern char __kvm_hyp_code_start[];
95extern char __kvm_hyp_code_end[];
96
97extern void __kvm_flush_vm_context(void); 60extern void __kvm_flush_vm_context(void);
98extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); 61extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
99extern void __kvm_tlb_flush_vmid(struct kvm *kvm); 62extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
100 63
101extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); 64extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
65
66extern void __init_stage2_translation(void);
102#endif 67#endif
103 68
104#endif /* __ARM_KVM_ASM_H__ */ 69#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 3095df091ff8..ee5328fc4b06 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -68,12 +68,12 @@ static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
68 68
69static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu) 69static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu)
70{ 70{
71 return &vcpu->arch.regs.usr_regs.ARM_pc; 71 return &vcpu->arch.ctxt.gp_regs.usr_regs.ARM_pc;
72} 72}
73 73
74static inline unsigned long *vcpu_cpsr(struct kvm_vcpu *vcpu) 74static inline unsigned long *vcpu_cpsr(struct kvm_vcpu *vcpu)
75{ 75{
76 return &vcpu->arch.regs.usr_regs.ARM_cpsr; 76 return &vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr;
77} 77}
78 78
79static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) 79static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
@@ -83,13 +83,13 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
83 83
84static inline bool mode_has_spsr(struct kvm_vcpu *vcpu) 84static inline bool mode_has_spsr(struct kvm_vcpu *vcpu)
85{ 85{
86 unsigned long cpsr_mode = vcpu->arch.regs.usr_regs.ARM_cpsr & MODE_MASK; 86 unsigned long cpsr_mode = vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr & MODE_MASK;
87 return (cpsr_mode > USR_MODE && cpsr_mode < SYSTEM_MODE); 87 return (cpsr_mode > USR_MODE && cpsr_mode < SYSTEM_MODE);
88} 88}
89 89
90static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu) 90static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
91{ 91{
92 unsigned long cpsr_mode = vcpu->arch.regs.usr_regs.ARM_cpsr & MODE_MASK; 92 unsigned long cpsr_mode = vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr & MODE_MASK;
93 return cpsr_mode > USR_MODE;; 93 return cpsr_mode > USR_MODE;;
94} 94}
95 95
@@ -108,11 +108,6 @@ static inline phys_addr_t kvm_vcpu_get_fault_ipa(struct kvm_vcpu *vcpu)
108 return ((phys_addr_t)vcpu->arch.fault.hpfar & HPFAR_MASK) << 8; 108 return ((phys_addr_t)vcpu->arch.fault.hpfar & HPFAR_MASK) << 8;
109} 109}
110 110
111static inline unsigned long kvm_vcpu_get_hyp_pc(struct kvm_vcpu *vcpu)
112{
113 return vcpu->arch.fault.hyp_pc;
114}
115
116static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu) 111static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu)
117{ 112{
118 return kvm_vcpu_get_hsr(vcpu) & HSR_ISV; 113 return kvm_vcpu_get_hsr(vcpu) & HSR_ISV;
@@ -143,6 +138,11 @@ static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu)
143 return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_S1PTW; 138 return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_S1PTW;
144} 139}
145 140
141static inline bool kvm_vcpu_dabt_is_cm(struct kvm_vcpu *vcpu)
142{
143 return !!(kvm_vcpu_get_hsr(vcpu) & HSR_DABT_CM);
144}
145
146/* Get Access Size from a data abort */ 146/* Get Access Size from a data abort */
147static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu) 147static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu)
148{ 148{
@@ -192,7 +192,7 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
192 192
193static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) 193static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
194{ 194{
195 return vcpu->arch.cp15[c0_MPIDR] & MPIDR_HWID_BITMASK; 195 return vcpu_cp15(vcpu, c0_MPIDR) & MPIDR_HWID_BITMASK;
196} 196}
197 197
198static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) 198static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index f9f27792d8ed..385070180c25 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -85,20 +85,61 @@ struct kvm_vcpu_fault_info {
85 u32 hsr; /* Hyp Syndrome Register */ 85 u32 hsr; /* Hyp Syndrome Register */
86 u32 hxfar; /* Hyp Data/Inst. Fault Address Register */ 86 u32 hxfar; /* Hyp Data/Inst. Fault Address Register */
87 u32 hpfar; /* Hyp IPA Fault Address Register */ 87 u32 hpfar; /* Hyp IPA Fault Address Register */
88 u32 hyp_pc; /* PC when exception was taken from Hyp mode */
89}; 88};
90 89
91typedef struct vfp_hard_struct kvm_cpu_context_t; 90/*
91 * 0 is reserved as an invalid value.
92 * Order should be kept in sync with the save/restore code.
93 */
94enum vcpu_sysreg {
95 __INVALID_SYSREG__,
96 c0_MPIDR, /* MultiProcessor ID Register */
97 c0_CSSELR, /* Cache Size Selection Register */
98 c1_SCTLR, /* System Control Register */
99 c1_ACTLR, /* Auxiliary Control Register */
100 c1_CPACR, /* Coprocessor Access Control */
101 c2_TTBR0, /* Translation Table Base Register 0 */
102 c2_TTBR0_high, /* TTBR0 top 32 bits */
103 c2_TTBR1, /* Translation Table Base Register 1 */
104 c2_TTBR1_high, /* TTBR1 top 32 bits */
105 c2_TTBCR, /* Translation Table Base Control R. */
106 c3_DACR, /* Domain Access Control Register */
107 c5_DFSR, /* Data Fault Status Register */
108 c5_IFSR, /* Instruction Fault Status Register */
109 c5_ADFSR, /* Auxilary Data Fault Status R */
110 c5_AIFSR, /* Auxilary Instrunction Fault Status R */
111 c6_DFAR, /* Data Fault Address Register */
112 c6_IFAR, /* Instruction Fault Address Register */
113 c7_PAR, /* Physical Address Register */
114 c7_PAR_high, /* PAR top 32 bits */
115 c9_L2CTLR, /* Cortex A15/A7 L2 Control Register */
116 c10_PRRR, /* Primary Region Remap Register */
117 c10_NMRR, /* Normal Memory Remap Register */
118 c12_VBAR, /* Vector Base Address Register */
119 c13_CID, /* Context ID Register */
120 c13_TID_URW, /* Thread ID, User R/W */
121 c13_TID_URO, /* Thread ID, User R/O */
122 c13_TID_PRIV, /* Thread ID, Privileged */
123 c14_CNTKCTL, /* Timer Control Register (PL1) */
124 c10_AMAIR0, /* Auxilary Memory Attribute Indirection Reg0 */
125 c10_AMAIR1, /* Auxilary Memory Attribute Indirection Reg1 */
126 NR_CP15_REGS /* Number of regs (incl. invalid) */
127};
128
129struct kvm_cpu_context {
130 struct kvm_regs gp_regs;
131 struct vfp_hard_struct vfp;
132 u32 cp15[NR_CP15_REGS];
133};
134
135typedef struct kvm_cpu_context kvm_cpu_context_t;
92 136
93struct kvm_vcpu_arch { 137struct kvm_vcpu_arch {
94 struct kvm_regs regs; 138 struct kvm_cpu_context ctxt;
95 139
96 int target; /* Processor target */ 140 int target; /* Processor target */
97 DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES); 141 DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
98 142
99 /* System control coprocessor (cp15) */
100 u32 cp15[NR_CP15_REGS];
101
102 /* The CPU type we expose to the VM */ 143 /* The CPU type we expose to the VM */
103 u32 midr; 144 u32 midr;
104 145
@@ -111,9 +152,6 @@ struct kvm_vcpu_arch {
111 /* Exception Information */ 152 /* Exception Information */
112 struct kvm_vcpu_fault_info fault; 153 struct kvm_vcpu_fault_info fault;
113 154
114 /* Floating point registers (VFP and Advanced SIMD/NEON) */
115 struct vfp_hard_struct vfp_guest;
116
117 /* Host FP context */ 155 /* Host FP context */
118 kvm_cpu_context_t *host_cpu_context; 156 kvm_cpu_context_t *host_cpu_context;
119 157
@@ -158,12 +196,14 @@ struct kvm_vcpu_stat {
158 u64 exits; 196 u64 exits;
159}; 197};
160 198
199#define vcpu_cp15(v,r) (v)->arch.ctxt.cp15[r]
200
161int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); 201int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
162unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); 202unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
163int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); 203int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
164int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); 204int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
165int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); 205int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
166u64 kvm_call_hyp(void *hypfn, ...); 206unsigned long kvm_call_hyp(void *hypfn, ...);
167void force_vm_exit(const cpumask_t *mask); 207void force_vm_exit(const cpumask_t *mask);
168 208
169#define KVM_ARCH_WANT_MMU_NOTIFIER 209#define KVM_ARCH_WANT_MMU_NOTIFIER
@@ -220,6 +260,11 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
220 kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); 260 kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr);
221} 261}
222 262
263static inline void __cpu_init_stage2(void)
264{
265 kvm_call_hyp(__init_stage2_translation);
266}
267
223static inline int kvm_arch_dev_ioctl_check_extension(long ext) 268static inline int kvm_arch_dev_ioctl_check_extension(long ext)
224{ 269{
225 return 0; 270 return 0;
@@ -242,5 +287,20 @@ static inline void kvm_arm_init_debug(void) {}
242static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} 287static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
243static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} 288static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
244static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} 289static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
290static inline int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
291 struct kvm_device_attr *attr)
292{
293 return -ENXIO;
294}
295static inline int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
296 struct kvm_device_attr *attr)
297{
298 return -ENXIO;
299}
300static inline int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
301 struct kvm_device_attr *attr)
302{
303 return -ENXIO;
304}
245 305
246#endif /* __ARM_KVM_HOST_H__ */ 306#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
new file mode 100644
index 000000000000..f0e860761380
--- /dev/null
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -0,0 +1,139 @@
1/*
2 * Copyright (C) 2015 - ARM Ltd
3 * Author: Marc Zyngier <marc.zyngier@arm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef __ARM_KVM_HYP_H__
19#define __ARM_KVM_HYP_H__
20
21#include <linux/compiler.h>
22#include <linux/kvm_host.h>
23#include <asm/kvm_mmu.h>
24#include <asm/vfp.h>
25
26#define __hyp_text __section(.hyp.text) notrace
27
28#define kern_hyp_va(v) (v)
29#define hyp_kern_va(v) (v)
30
31#define __ACCESS_CP15(CRn, Op1, CRm, Op2) \
32 "mrc", "mcr", __stringify(p15, Op1, %0, CRn, CRm, Op2), u32
33#define __ACCESS_CP15_64(Op1, CRm) \
34 "mrrc", "mcrr", __stringify(p15, Op1, %Q0, %R0, CRm), u64
35#define __ACCESS_VFP(CRn) \
36 "mrc", "mcr", __stringify(p10, 7, %0, CRn, cr0, 0), u32
37
38#define __write_sysreg(v, r, w, c, t) asm volatile(w " " c : : "r" ((t)(v)))
39#define write_sysreg(v, ...) __write_sysreg(v, __VA_ARGS__)
40
41#define __read_sysreg(r, w, c, t) ({ \
42 t __val; \
43 asm volatile(r " " c : "=r" (__val)); \
44 __val; \
45})
46#define read_sysreg(...) __read_sysreg(__VA_ARGS__)
47
48#define write_special(v, r) \
49 asm volatile("msr " __stringify(r) ", %0" : : "r" (v))
50#define read_special(r) ({ \
51 u32 __val; \
52 asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \
53 __val; \
54})
55
56#define TTBR0 __ACCESS_CP15_64(0, c2)
57#define TTBR1 __ACCESS_CP15_64(1, c2)
58#define VTTBR __ACCESS_CP15_64(6, c2)
59#define PAR __ACCESS_CP15_64(0, c7)
60#define CNTV_CVAL __ACCESS_CP15_64(3, c14)
61#define CNTVOFF __ACCESS_CP15_64(4, c14)
62
63#define MIDR __ACCESS_CP15(c0, 0, c0, 0)
64#define CSSELR __ACCESS_CP15(c0, 2, c0, 0)
65#define VPIDR __ACCESS_CP15(c0, 4, c0, 0)
66#define VMPIDR __ACCESS_CP15(c0, 4, c0, 5)
67#define SCTLR __ACCESS_CP15(c1, 0, c0, 0)
68#define CPACR __ACCESS_CP15(c1, 0, c0, 2)
69#define HCR __ACCESS_CP15(c1, 4, c1, 0)
70#define HDCR __ACCESS_CP15(c1, 4, c1, 1)
71#define HCPTR __ACCESS_CP15(c1, 4, c1, 2)
72#define HSTR __ACCESS_CP15(c1, 4, c1, 3)
73#define TTBCR __ACCESS_CP15(c2, 0, c0, 2)
74#define HTCR __ACCESS_CP15(c2, 4, c0, 2)
75#define VTCR __ACCESS_CP15(c2, 4, c1, 2)
76#define DACR __ACCESS_CP15(c3, 0, c0, 0)
77#define DFSR __ACCESS_CP15(c5, 0, c0, 0)
78#define IFSR __ACCESS_CP15(c5, 0, c0, 1)
79#define ADFSR __ACCESS_CP15(c5, 0, c1, 0)
80#define AIFSR __ACCESS_CP15(c5, 0, c1, 1)
81#define HSR __ACCESS_CP15(c5, 4, c2, 0)
82#define DFAR __ACCESS_CP15(c6, 0, c0, 0)
83#define IFAR __ACCESS_CP15(c6, 0, c0, 2)
84#define HDFAR __ACCESS_CP15(c6, 4, c0, 0)
85#define HIFAR __ACCESS_CP15(c6, 4, c0, 2)
86#define HPFAR __ACCESS_CP15(c6, 4, c0, 4)
87#define ICIALLUIS __ACCESS_CP15(c7, 0, c1, 0)
88#define ATS1CPR __ACCESS_CP15(c7, 0, c8, 0)
89#define TLBIALLIS __ACCESS_CP15(c8, 0, c3, 0)
90#define TLBIALLNSNHIS __ACCESS_CP15(c8, 4, c3, 4)
91#define PRRR __ACCESS_CP15(c10, 0, c2, 0)
92#define NMRR __ACCESS_CP15(c10, 0, c2, 1)
93#define AMAIR0 __ACCESS_CP15(c10, 0, c3, 0)
94#define AMAIR1 __ACCESS_CP15(c10, 0, c3, 1)
95#define VBAR __ACCESS_CP15(c12, 0, c0, 0)
96#define CID __ACCESS_CP15(c13, 0, c0, 1)
97#define TID_URW __ACCESS_CP15(c13, 0, c0, 2)
98#define TID_URO __ACCESS_CP15(c13, 0, c0, 3)
99#define TID_PRIV __ACCESS_CP15(c13, 0, c0, 4)
100#define HTPIDR __ACCESS_CP15(c13, 4, c0, 2)
101#define CNTKCTL __ACCESS_CP15(c14, 0, c1, 0)
102#define CNTV_CTL __ACCESS_CP15(c14, 0, c3, 1)
103#define CNTHCTL __ACCESS_CP15(c14, 4, c1, 0)
104
105#define VFP_FPEXC __ACCESS_VFP(FPEXC)
106
107/* AArch64 compatibility macros, only for the timer so far */
108#define read_sysreg_el0(r) read_sysreg(r##_el0)
109#define write_sysreg_el0(v, r) write_sysreg(v, r##_el0)
110
111#define cntv_ctl_el0 CNTV_CTL
112#define cntv_cval_el0 CNTV_CVAL
113#define cntvoff_el2 CNTVOFF
114#define cnthctl_el2 CNTHCTL
115
116void __timer_save_state(struct kvm_vcpu *vcpu);
117void __timer_restore_state(struct kvm_vcpu *vcpu);
118
119void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
120void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
121
122void __sysreg_save_state(struct kvm_cpu_context *ctxt);
123void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
124
125void asmlinkage __vfp_save_state(struct vfp_hard_struct *vfp);
126void asmlinkage __vfp_restore_state(struct vfp_hard_struct *vfp);
127static inline bool __vfp_enabled(void)
128{
129 return !(read_sysreg(HCPTR) & (HCPTR_TCP(11) | HCPTR_TCP(10)));
130}
131
132void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt);
133void __hyp_text __banked_restore_state(struct kvm_cpu_context *ctxt);
134
135int asmlinkage __guest_enter(struct kvm_vcpu *vcpu,
136 struct kvm_cpu_context *host);
137int asmlinkage __hyp_do_panic(const char *, int, u32);
138
139#endif /* __ARM_KVM_HYP_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index a520b7987a29..da44be9db4fa 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -179,7 +179,7 @@ struct kvm;
179 179
180static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) 180static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
181{ 181{
182 return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101; 182 return (vcpu_cp15(vcpu, c1_SCTLR) & 0b101) == 0b101;
183} 183}
184 184
185static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, 185static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
diff --git a/arch/arm/include/asm/virt.h b/arch/arm/include/asm/virt.h
index 4371f45c5784..d4ceaf5f299b 100644
--- a/arch/arm/include/asm/virt.h
+++ b/arch/arm/include/asm/virt.h
@@ -74,6 +74,15 @@ static inline bool is_hyp_mode_mismatched(void)
74{ 74{
75 return !!(__boot_cpu_mode & BOOT_CPU_MODE_MISMATCH); 75 return !!(__boot_cpu_mode & BOOT_CPU_MODE_MISMATCH);
76} 76}
77
78static inline bool is_kernel_in_hyp_mode(void)
79{
80 return false;
81}
82
83/* The section containing the hypervisor text */
84extern char __hyp_text_start[];
85extern char __hyp_text_end[];
77#endif 86#endif
78 87
79#endif /* __ASSEMBLY__ */ 88#endif /* __ASSEMBLY__ */
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 871b8267d211..27d05813ff09 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -170,41 +170,11 @@ int main(void)
170 DEFINE(CACHE_WRITEBACK_GRANULE, __CACHE_WRITEBACK_GRANULE); 170 DEFINE(CACHE_WRITEBACK_GRANULE, __CACHE_WRITEBACK_GRANULE);
171 BLANK(); 171 BLANK();
172#ifdef CONFIG_KVM_ARM_HOST 172#ifdef CONFIG_KVM_ARM_HOST
173 DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); 173 DEFINE(VCPU_GUEST_CTXT, offsetof(struct kvm_vcpu, arch.ctxt));
174 DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.midr)); 174 DEFINE(VCPU_HOST_CTXT, offsetof(struct kvm_vcpu, arch.host_cpu_context));
175 DEFINE(VCPU_CP15, offsetof(struct kvm_vcpu, arch.cp15)); 175 DEFINE(CPU_CTXT_VFP, offsetof(struct kvm_cpu_context, vfp));
176 DEFINE(VCPU_VFP_GUEST, offsetof(struct kvm_vcpu, arch.vfp_guest)); 176 DEFINE(CPU_CTXT_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
177 DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.host_cpu_context)); 177 DEFINE(GP_REGS_USR, offsetof(struct kvm_regs, usr_regs));
178 DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs));
179 DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs));
180 DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs));
181 DEFINE(VCPU_ABT_REGS, offsetof(struct kvm_vcpu, arch.regs.abt_regs));
182 DEFINE(VCPU_UND_REGS, offsetof(struct kvm_vcpu, arch.regs.und_regs));
183 DEFINE(VCPU_IRQ_REGS, offsetof(struct kvm_vcpu, arch.regs.irq_regs));
184 DEFINE(VCPU_FIQ_REGS, offsetof(struct kvm_vcpu, arch.regs.fiq_regs));
185 DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc));
186 DEFINE(VCPU_CPSR, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr));
187 DEFINE(VCPU_HCR, offsetof(struct kvm_vcpu, arch.hcr));
188 DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines));
189 DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.fault.hsr));
190 DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.fault.hxfar));
191 DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.fault.hpfar));
192 DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.fault.hyp_pc));
193 DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu));
194 DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
195 DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
196 DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
197 DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
198 DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
199 DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
200 DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
201 DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr));
202 DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
203 DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
204 DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff));
205 DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled));
206 DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base));
207 DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr));
208#endif 178#endif
209 BLANK(); 179 BLANK();
210#ifdef CONFIG_VDSO 180#ifdef CONFIG_VDSO
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 8b60fde5ce48..b4139cbbbdd9 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -18,6 +18,11 @@
18 *(.proc.info.init) \ 18 *(.proc.info.init) \
19 VMLINUX_SYMBOL(__proc_info_end) = .; 19 VMLINUX_SYMBOL(__proc_info_end) = .;
20 20
21#define HYPERVISOR_TEXT \
22 VMLINUX_SYMBOL(__hyp_text_start) = .; \
23 *(.hyp.text) \
24 VMLINUX_SYMBOL(__hyp_text_end) = .;
25
21#define IDMAP_TEXT \ 26#define IDMAP_TEXT \
22 ALIGN_FUNCTION(); \ 27 ALIGN_FUNCTION(); \
23 VMLINUX_SYMBOL(__idmap_text_start) = .; \ 28 VMLINUX_SYMBOL(__idmap_text_start) = .; \
@@ -108,6 +113,7 @@ SECTIONS
108 TEXT_TEXT 113 TEXT_TEXT
109 SCHED_TEXT 114 SCHED_TEXT
110 LOCK_TEXT 115 LOCK_TEXT
116 HYPERVISOR_TEXT
111 KPROBES_TEXT 117 KPROBES_TEXT
112 *(.gnu.warning) 118 *(.gnu.warning)
113 *(.glue_7) 119 *(.glue_7)
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index c5eef02c52ba..eb1bf4309c13 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -17,6 +17,7 @@ AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
17KVM := ../../../virt/kvm 17KVM := ../../../virt/kvm
18kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o 18kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
19 19
20obj-$(CONFIG_KVM_ARM_HOST) += hyp/
20obj-y += kvm-arm.o init.o interrupts.o 21obj-y += kvm-arm.o init.o interrupts.o
21obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o 22obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
22obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o 23obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 08e49c423c24..76552b51c7ae 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -28,6 +28,7 @@
28#include <linux/sched.h> 28#include <linux/sched.h>
29#include <linux/kvm.h> 29#include <linux/kvm.h>
30#include <trace/events/kvm.h> 30#include <trace/events/kvm.h>
31#include <kvm/arm_pmu.h>
31 32
32#define CREATE_TRACE_POINTS 33#define CREATE_TRACE_POINTS
33#include "trace.h" 34#include "trace.h"
@@ -265,6 +266,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
265 kvm_mmu_free_memory_caches(vcpu); 266 kvm_mmu_free_memory_caches(vcpu);
266 kvm_timer_vcpu_terminate(vcpu); 267 kvm_timer_vcpu_terminate(vcpu);
267 kvm_vgic_vcpu_destroy(vcpu); 268 kvm_vgic_vcpu_destroy(vcpu);
269 kvm_pmu_vcpu_destroy(vcpu);
268 kmem_cache_free(kvm_vcpu_cache, vcpu); 270 kmem_cache_free(kvm_vcpu_cache, vcpu);
269} 271}
270 272
@@ -320,6 +322,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
320 vcpu->cpu = -1; 322 vcpu->cpu = -1;
321 323
322 kvm_arm_set_running_vcpu(NULL); 324 kvm_arm_set_running_vcpu(NULL);
325 kvm_timer_vcpu_put(vcpu);
323} 326}
324 327
325int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 328int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
@@ -577,6 +580,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
577 * non-preemptible context. 580 * non-preemptible context.
578 */ 581 */
579 preempt_disable(); 582 preempt_disable();
583 kvm_pmu_flush_hwstate(vcpu);
580 kvm_timer_flush_hwstate(vcpu); 584 kvm_timer_flush_hwstate(vcpu);
581 kvm_vgic_flush_hwstate(vcpu); 585 kvm_vgic_flush_hwstate(vcpu);
582 586
@@ -593,6 +597,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
593 if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || 597 if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
594 vcpu->arch.power_off || vcpu->arch.pause) { 598 vcpu->arch.power_off || vcpu->arch.pause) {
595 local_irq_enable(); 599 local_irq_enable();
600 kvm_pmu_sync_hwstate(vcpu);
596 kvm_timer_sync_hwstate(vcpu); 601 kvm_timer_sync_hwstate(vcpu);
597 kvm_vgic_sync_hwstate(vcpu); 602 kvm_vgic_sync_hwstate(vcpu);
598 preempt_enable(); 603 preempt_enable();
@@ -642,10 +647,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
642 trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); 647 trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
643 648
644 /* 649 /*
645 * We must sync the timer state before the vgic state so that 650 * We must sync the PMU and timer state before the vgic state so
646 * the vgic can properly sample the updated state of the 651 * that the vgic can properly sample the updated state of the
647 * interrupt line. 652 * interrupt line.
648 */ 653 */
654 kvm_pmu_sync_hwstate(vcpu);
649 kvm_timer_sync_hwstate(vcpu); 655 kvm_timer_sync_hwstate(vcpu);
650 656
651 kvm_vgic_sync_hwstate(vcpu); 657 kvm_vgic_sync_hwstate(vcpu);
@@ -823,11 +829,54 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
823 return 0; 829 return 0;
824} 830}
825 831
832static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu,
833 struct kvm_device_attr *attr)
834{
835 int ret = -ENXIO;
836
837 switch (attr->group) {
838 default:
839 ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr);
840 break;
841 }
842
843 return ret;
844}
845
846static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu,
847 struct kvm_device_attr *attr)
848{
849 int ret = -ENXIO;
850
851 switch (attr->group) {
852 default:
853 ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr);
854 break;
855 }
856
857 return ret;
858}
859
860static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu,
861 struct kvm_device_attr *attr)
862{
863 int ret = -ENXIO;
864
865 switch (attr->group) {
866 default:
867 ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr);
868 break;
869 }
870
871 return ret;
872}
873
826long kvm_arch_vcpu_ioctl(struct file *filp, 874long kvm_arch_vcpu_ioctl(struct file *filp,
827 unsigned int ioctl, unsigned long arg) 875 unsigned int ioctl, unsigned long arg)
828{ 876{
829 struct kvm_vcpu *vcpu = filp->private_data; 877 struct kvm_vcpu *vcpu = filp->private_data;
830 void __user *argp = (void __user *)arg; 878 void __user *argp = (void __user *)arg;
879 struct kvm_device_attr attr;
831 880
832 switch (ioctl) { 881 switch (ioctl) {
833 case KVM_ARM_VCPU_INIT: { 882 case KVM_ARM_VCPU_INIT: {
@@ -870,6 +919,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
870 return -E2BIG; 919 return -E2BIG;
871 return kvm_arm_copy_reg_indices(vcpu, user_list->reg); 920 return kvm_arm_copy_reg_indices(vcpu, user_list->reg);
872 } 921 }
922 case KVM_SET_DEVICE_ATTR: {
923 if (copy_from_user(&attr, argp, sizeof(attr)))
924 return -EFAULT;
925 return kvm_arm_vcpu_set_attr(vcpu, &attr);
926 }
927 case KVM_GET_DEVICE_ATTR: {
928 if (copy_from_user(&attr, argp, sizeof(attr)))
929 return -EFAULT;
930 return kvm_arm_vcpu_get_attr(vcpu, &attr);
931 }
932 case KVM_HAS_DEVICE_ATTR: {
933 if (copy_from_user(&attr, argp, sizeof(attr)))
934 return -EFAULT;
935 return kvm_arm_vcpu_has_attr(vcpu, &attr);
936 }
873 default: 937 default:
874 return -EINVAL; 938 return -EINVAL;
875 } 939 }
@@ -967,6 +1031,11 @@ long kvm_arch_vm_ioctl(struct file *filp,
967 } 1031 }
968} 1032}
969 1033
1034static void cpu_init_stage2(void *dummy)
1035{
1036 __cpu_init_stage2();
1037}
1038
970static void cpu_init_hyp_mode(void *dummy) 1039static void cpu_init_hyp_mode(void *dummy)
971{ 1040{
972 phys_addr_t boot_pgd_ptr; 1041 phys_addr_t boot_pgd_ptr;
@@ -985,6 +1054,7 @@ static void cpu_init_hyp_mode(void *dummy)
985 vector_ptr = (unsigned long)__kvm_hyp_vector; 1054 vector_ptr = (unsigned long)__kvm_hyp_vector;
986 1055
987 __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr); 1056 __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr);
1057 __cpu_init_stage2();
988 1058
989 kvm_arm_init_debug(); 1059 kvm_arm_init_debug();
990} 1060}
@@ -1035,6 +1105,82 @@ static inline void hyp_cpu_pm_init(void)
1035} 1105}
1036#endif 1106#endif
1037 1107
1108static void teardown_common_resources(void)
1109{
1110 free_percpu(kvm_host_cpu_state);
1111}
1112
1113static int init_common_resources(void)
1114{
1115 kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
1116 if (!kvm_host_cpu_state) {
1117 kvm_err("Cannot allocate host CPU state\n");
1118 return -ENOMEM;
1119 }
1120
1121 return 0;
1122}
1123
1124static int init_subsystems(void)
1125{
1126 int err;
1127
1128 /*
1129 * Init HYP view of VGIC
1130 */
1131 err = kvm_vgic_hyp_init();
1132 switch (err) {
1133 case 0:
1134 vgic_present = true;
1135 break;
1136 case -ENODEV:
1137 case -ENXIO:
1138 vgic_present = false;
1139 break;
1140 default:
1141 return err;
1142 }
1143
1144 /*
1145 * Init HYP architected timer support
1146 */
1147 err = kvm_timer_hyp_init();
1148 if (err)
1149 return err;
1150
1151 kvm_perf_init();
1152 kvm_coproc_table_init();
1153
1154 return 0;
1155}
1156
1157static void teardown_hyp_mode(void)
1158{
1159 int cpu;
1160
1161 if (is_kernel_in_hyp_mode())
1162 return;
1163
1164 free_hyp_pgds();
1165 for_each_possible_cpu(cpu)
1166 free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
1167}
1168
1169static int init_vhe_mode(void)
1170{
1171 /*
1172 * Execute the init code on each CPU.
1173 */
1174 on_each_cpu(cpu_init_stage2, NULL, 1);
1175
1176 /* set size of VMID supported by CPU */
1177 kvm_vmid_bits = kvm_get_vmid_bits();
1178 kvm_info("%d-bit VMID\n", kvm_vmid_bits);
1179
1180 kvm_info("VHE mode initialized successfully\n");
1181 return 0;
1182}
1183
1038/** 1184/**
1039 * Inits Hyp-mode on all online CPUs 1185 * Inits Hyp-mode on all online CPUs
1040 */ 1186 */
@@ -1065,7 +1211,7 @@ static int init_hyp_mode(void)
1065 stack_page = __get_free_page(GFP_KERNEL); 1211 stack_page = __get_free_page(GFP_KERNEL);
1066 if (!stack_page) { 1212 if (!stack_page) {
1067 err = -ENOMEM; 1213 err = -ENOMEM;
1068 goto out_free_stack_pages; 1214 goto out_err;
1069 } 1215 }
1070 1216
1071 per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page; 1217 per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
@@ -1074,16 +1220,16 @@ static int init_hyp_mode(void)
1074 /* 1220 /*
1075 * Map the Hyp-code called directly from the host 1221 * Map the Hyp-code called directly from the host
1076 */ 1222 */
1077 err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end); 1223 err = create_hyp_mappings(__hyp_text_start, __hyp_text_end);
1078 if (err) { 1224 if (err) {
1079 kvm_err("Cannot map world-switch code\n"); 1225 kvm_err("Cannot map world-switch code\n");
1080 goto out_free_mappings; 1226 goto out_err;
1081 } 1227 }
1082 1228
1083 err = create_hyp_mappings(__start_rodata, __end_rodata); 1229 err = create_hyp_mappings(__start_rodata, __end_rodata);
1084 if (err) { 1230 if (err) {
1085 kvm_err("Cannot map rodata section\n"); 1231 kvm_err("Cannot map rodata section\n");
1086 goto out_free_mappings; 1232 goto out_err;
1087 } 1233 }
1088 1234
1089 /* 1235 /*
@@ -1095,20 +1241,10 @@ static int init_hyp_mode(void)
1095 1241
1096 if (err) { 1242 if (err) {
1097 kvm_err("Cannot map hyp stack\n"); 1243 kvm_err("Cannot map hyp stack\n");
1098 goto out_free_mappings; 1244 goto out_err;
1099 } 1245 }
1100 } 1246 }
1101 1247
1102 /*
1103 * Map the host CPU structures
1104 */
1105 kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
1106 if (!kvm_host_cpu_state) {
1107 err = -ENOMEM;
1108 kvm_err("Cannot allocate host CPU state\n");
1109 goto out_free_mappings;
1110 }
1111
1112 for_each_possible_cpu(cpu) { 1248 for_each_possible_cpu(cpu) {
1113 kvm_cpu_context_t *cpu_ctxt; 1249 kvm_cpu_context_t *cpu_ctxt;
1114 1250
@@ -1117,7 +1253,7 @@ static int init_hyp_mode(void)
1117 1253
1118 if (err) { 1254 if (err) {
1119 kvm_err("Cannot map host CPU state: %d\n", err); 1255 kvm_err("Cannot map host CPU state: %d\n", err);
1120 goto out_free_context; 1256 goto out_err;
1121 } 1257 }
1122 } 1258 }
1123 1259
@@ -1126,34 +1262,22 @@ static int init_hyp_mode(void)
1126 */ 1262 */
1127 on_each_cpu(cpu_init_hyp_mode, NULL, 1); 1263 on_each_cpu(cpu_init_hyp_mode, NULL, 1);
1128 1264
1129 /*
1130 * Init HYP view of VGIC
1131 */
1132 err = kvm_vgic_hyp_init();
1133 switch (err) {
1134 case 0:
1135 vgic_present = true;
1136 break;
1137 case -ENODEV:
1138 case -ENXIO:
1139 vgic_present = false;
1140 break;
1141 default:
1142 goto out_free_context;
1143 }
1144
1145 /*
1146 * Init HYP architected timer support
1147 */
1148 err = kvm_timer_hyp_init();
1149 if (err)
1150 goto out_free_context;
1151
1152#ifndef CONFIG_HOTPLUG_CPU 1265#ifndef CONFIG_HOTPLUG_CPU
1153 free_boot_hyp_pgd(); 1266 free_boot_hyp_pgd();
1154#endif 1267#endif
1155 1268
1156 kvm_perf_init(); 1269 cpu_notifier_register_begin();
1270
1271 err = __register_cpu_notifier(&hyp_init_cpu_nb);
1272
1273 cpu_notifier_register_done();
1274
1275 if (err) {
1276 kvm_err("Cannot register HYP init CPU notifier (%d)\n", err);
1277 goto out_err;
1278 }
1279
1280 hyp_cpu_pm_init();
1157 1281
1158 /* set size of VMID supported by CPU */ 1282 /* set size of VMID supported by CPU */
1159 kvm_vmid_bits = kvm_get_vmid_bits(); 1283 kvm_vmid_bits = kvm_get_vmid_bits();
@@ -1162,14 +1286,9 @@ static int init_hyp_mode(void)
1162 kvm_info("Hyp mode initialized successfully\n"); 1286 kvm_info("Hyp mode initialized successfully\n");
1163 1287
1164 return 0; 1288 return 0;
1165out_free_context: 1289
1166 free_percpu(kvm_host_cpu_state);
1167out_free_mappings:
1168 free_hyp_pgds();
1169out_free_stack_pages:
1170 for_each_possible_cpu(cpu)
1171 free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
1172out_err: 1290out_err:
1291 teardown_hyp_mode();
1173 kvm_err("error initializing Hyp mode: %d\n", err); 1292 kvm_err("error initializing Hyp mode: %d\n", err);
1174 return err; 1293 return err;
1175} 1294}
@@ -1213,26 +1332,27 @@ int kvm_arch_init(void *opaque)
1213 } 1332 }
1214 } 1333 }
1215 1334
1216 cpu_notifier_register_begin(); 1335 err = init_common_resources();
1217
1218 err = init_hyp_mode();
1219 if (err) 1336 if (err)
1220 goto out_err; 1337 return err;
1221 1338
1222 err = __register_cpu_notifier(&hyp_init_cpu_nb); 1339 if (is_kernel_in_hyp_mode())
1223 if (err) { 1340 err = init_vhe_mode();
1224 kvm_err("Cannot register HYP init CPU notifier (%d)\n", err); 1341 else
1342 err = init_hyp_mode();
1343 if (err)
1225 goto out_err; 1344 goto out_err;
1226 }
1227
1228 cpu_notifier_register_done();
1229 1345
1230 hyp_cpu_pm_init(); 1346 err = init_subsystems();
1347 if (err)
1348 goto out_hyp;
1231 1349
1232 kvm_coproc_table_init();
1233 return 0; 1350 return 0;
1351
1352out_hyp:
1353 teardown_hyp_mode();
1234out_err: 1354out_err:
1235 cpu_notifier_register_done(); 1355 teardown_common_resources();
1236 return err; 1356 return err;
1237} 1357}
1238 1358
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index f3d88dc388bc..1bb2b79c01ff 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -16,6 +16,8 @@
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 */ 18 */
19
20#include <linux/bsearch.h>
19#include <linux/mm.h> 21#include <linux/mm.h>
20#include <linux/kvm_host.h> 22#include <linux/kvm_host.h>
21#include <linux/uaccess.h> 23#include <linux/uaccess.h>
@@ -54,8 +56,8 @@ static inline void vcpu_cp15_reg64_set(struct kvm_vcpu *vcpu,
54 const struct coproc_reg *r, 56 const struct coproc_reg *r,
55 u64 val) 57 u64 val)
56{ 58{
57 vcpu->arch.cp15[r->reg] = val & 0xffffffff; 59 vcpu_cp15(vcpu, r->reg) = val & 0xffffffff;
58 vcpu->arch.cp15[r->reg + 1] = val >> 32; 60 vcpu_cp15(vcpu, r->reg + 1) = val >> 32;
59} 61}
60 62
61static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu, 63static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu,
@@ -63,9 +65,9 @@ static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu,
63{ 65{
64 u64 val; 66 u64 val;
65 67
66 val = vcpu->arch.cp15[r->reg + 1]; 68 val = vcpu_cp15(vcpu, r->reg + 1);
67 val = val << 32; 69 val = val << 32;
68 val = val | vcpu->arch.cp15[r->reg]; 70 val = val | vcpu_cp15(vcpu, r->reg);
69 return val; 71 return val;
70} 72}
71 73
@@ -104,7 +106,7 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
104 * vcpu_id, but we read the 'U' bit from the underlying 106 * vcpu_id, but we read the 'U' bit from the underlying
105 * hardware directly. 107 * hardware directly.
106 */ 108 */
107 vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) | 109 vcpu_cp15(vcpu, c0_MPIDR) = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) |
108 ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) | 110 ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) |
109 (vcpu->vcpu_id & 3)); 111 (vcpu->vcpu_id & 3));
110} 112}
@@ -117,7 +119,7 @@ static bool access_actlr(struct kvm_vcpu *vcpu,
117 if (p->is_write) 119 if (p->is_write)
118 return ignore_write(vcpu, p); 120 return ignore_write(vcpu, p);
119 121
120 *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; 122 *vcpu_reg(vcpu, p->Rt1) = vcpu_cp15(vcpu, c1_ACTLR);
121 return true; 123 return true;
122} 124}
123 125
@@ -139,7 +141,7 @@ static bool access_l2ctlr(struct kvm_vcpu *vcpu,
139 if (p->is_write) 141 if (p->is_write)
140 return ignore_write(vcpu, p); 142 return ignore_write(vcpu, p);
141 143
142 *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; 144 *vcpu_reg(vcpu, p->Rt1) = vcpu_cp15(vcpu, c9_L2CTLR);
143 return true; 145 return true;
144} 146}
145 147
@@ -156,7 +158,7 @@ static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
156 ncores = min(ncores, 3U); 158 ncores = min(ncores, 3U);
157 l2ctlr |= (ncores & 3) << 24; 159 l2ctlr |= (ncores & 3) << 24;
158 160
159 vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; 161 vcpu_cp15(vcpu, c9_L2CTLR) = l2ctlr;
160} 162}
161 163
162static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) 164static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
@@ -171,7 +173,7 @@ static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
171 else 173 else
172 actlr &= ~(1U << 6); 174 actlr &= ~(1U << 6);
173 175
174 vcpu->arch.cp15[c1_ACTLR] = actlr; 176 vcpu_cp15(vcpu, c1_ACTLR) = actlr;
175} 177}
176 178
177/* 179/*
@@ -218,9 +220,9 @@ bool access_vm_reg(struct kvm_vcpu *vcpu,
218 220
219 BUG_ON(!p->is_write); 221 BUG_ON(!p->is_write);
220 222
221 vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1); 223 vcpu_cp15(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt1);
222 if (p->is_64bit) 224 if (p->is_64bit)
223 vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2); 225 vcpu_cp15(vcpu, r->reg + 1) = *vcpu_reg(vcpu, p->Rt2);
224 226
225 kvm_toggle_cache(vcpu, was_enabled); 227 kvm_toggle_cache(vcpu, was_enabled);
226 return true; 228 return true;
@@ -381,17 +383,26 @@ static const struct coproc_reg cp15_regs[] = {
381 { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, 383 { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
382}; 384};
383 385
386static int check_reg_table(const struct coproc_reg *table, unsigned int n)
387{
388 unsigned int i;
389
390 for (i = 1; i < n; i++) {
391 if (cmp_reg(&table[i-1], &table[i]) >= 0) {
392 kvm_err("reg table %p out of order (%d)\n", table, i - 1);
393 return 1;
394 }
395 }
396
397 return 0;
398}
399
384/* Target specific emulation tables */ 400/* Target specific emulation tables */
385static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS]; 401static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS];
386 402
387void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table) 403void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table)
388{ 404{
389 unsigned int i; 405 BUG_ON(check_reg_table(table->table, table->num));
390
391 for (i = 1; i < table->num; i++)
392 BUG_ON(cmp_reg(&table->table[i-1],
393 &table->table[i]) >= 0);
394
395 target_tables[table->target] = table; 406 target_tables[table->target] = table;
396} 407}
397 408
@@ -405,29 +416,32 @@ static const struct coproc_reg *get_target_table(unsigned target, size_t *num)
405 return table->table; 416 return table->table;
406} 417}
407 418
419#define reg_to_match_value(x) \
420 ({ \
421 unsigned long val; \
422 val = (x)->CRn << 11; \
423 val |= (x)->CRm << 7; \
424 val |= (x)->Op1 << 4; \
425 val |= (x)->Op2 << 1; \
426 val |= !(x)->is_64bit; \
427 val; \
428 })
429
430static int match_reg(const void *key, const void *elt)
431{
432 const unsigned long pval = (unsigned long)key;
433 const struct coproc_reg *r = elt;
434
435 return pval - reg_to_match_value(r);
436}
437
408static const struct coproc_reg *find_reg(const struct coproc_params *params, 438static const struct coproc_reg *find_reg(const struct coproc_params *params,
409 const struct coproc_reg table[], 439 const struct coproc_reg table[],
410 unsigned int num) 440 unsigned int num)
411{ 441{
412 unsigned int i; 442 unsigned long pval = reg_to_match_value(params);
413
414 for (i = 0; i < num; i++) {
415 const struct coproc_reg *r = &table[i];
416
417 if (params->is_64bit != r->is_64)
418 continue;
419 if (params->CRn != r->CRn)
420 continue;
421 if (params->CRm != r->CRm)
422 continue;
423 if (params->Op1 != r->Op1)
424 continue;
425 if (params->Op2 != r->Op2)
426 continue;
427 443
428 return r; 444 return bsearch((void *)pval, table, num, sizeof(table[0]), match_reg);
429 }
430 return NULL;
431} 445}
432 446
433static int emulate_cp15(struct kvm_vcpu *vcpu, 447static int emulate_cp15(struct kvm_vcpu *vcpu,
@@ -645,6 +659,9 @@ static struct coproc_reg invariant_cp15[] = {
645 { CRn( 0), CRm( 0), Op1( 0), Op2( 3), is32, NULL, get_TLBTR }, 659 { CRn( 0), CRm( 0), Op1( 0), Op2( 3), is32, NULL, get_TLBTR },
646 { CRn( 0), CRm( 0), Op1( 0), Op2( 6), is32, NULL, get_REVIDR }, 660 { CRn( 0), CRm( 0), Op1( 0), Op2( 6), is32, NULL, get_REVIDR },
647 661
662 { CRn( 0), CRm( 0), Op1( 1), Op2( 1), is32, NULL, get_CLIDR },
663 { CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR },
664
648 { CRn( 0), CRm( 1), Op1( 0), Op2( 0), is32, NULL, get_ID_PFR0 }, 665 { CRn( 0), CRm( 1), Op1( 0), Op2( 0), is32, NULL, get_ID_PFR0 },
649 { CRn( 0), CRm( 1), Op1( 0), Op2( 1), is32, NULL, get_ID_PFR1 }, 666 { CRn( 0), CRm( 1), Op1( 0), Op2( 1), is32, NULL, get_ID_PFR1 },
650 { CRn( 0), CRm( 1), Op1( 0), Op2( 2), is32, NULL, get_ID_DFR0 }, 667 { CRn( 0), CRm( 1), Op1( 0), Op2( 2), is32, NULL, get_ID_DFR0 },
@@ -660,9 +677,6 @@ static struct coproc_reg invariant_cp15[] = {
660 { CRn( 0), CRm( 2), Op1( 0), Op2( 3), is32, NULL, get_ID_ISAR3 }, 677 { CRn( 0), CRm( 2), Op1( 0), Op2( 3), is32, NULL, get_ID_ISAR3 },
661 { CRn( 0), CRm( 2), Op1( 0), Op2( 4), is32, NULL, get_ID_ISAR4 }, 678 { CRn( 0), CRm( 2), Op1( 0), Op2( 4), is32, NULL, get_ID_ISAR4 },
662 { CRn( 0), CRm( 2), Op1( 0), Op2( 5), is32, NULL, get_ID_ISAR5 }, 679 { CRn( 0), CRm( 2), Op1( 0), Op2( 5), is32, NULL, get_ID_ISAR5 },
663
664 { CRn( 0), CRm( 0), Op1( 1), Op2( 1), is32, NULL, get_CLIDR },
665 { CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR },
666}; 680};
667 681
668/* 682/*
@@ -901,7 +915,7 @@ static int vfp_get_reg(const struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
901 if (vfpid < num_fp_regs()) { 915 if (vfpid < num_fp_regs()) {
902 if (KVM_REG_SIZE(id) != 8) 916 if (KVM_REG_SIZE(id) != 8)
903 return -ENOENT; 917 return -ENOENT;
904 return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpregs[vfpid], 918 return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpregs[vfpid],
905 id); 919 id);
906 } 920 }
907 921
@@ -911,13 +925,13 @@ static int vfp_get_reg(const struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
911 925
912 switch (vfpid) { 926 switch (vfpid) {
913 case KVM_REG_ARM_VFP_FPEXC: 927 case KVM_REG_ARM_VFP_FPEXC:
914 return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpexc, id); 928 return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpexc, id);
915 case KVM_REG_ARM_VFP_FPSCR: 929 case KVM_REG_ARM_VFP_FPSCR:
916 return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpscr, id); 930 return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpscr, id);
917 case KVM_REG_ARM_VFP_FPINST: 931 case KVM_REG_ARM_VFP_FPINST:
918 return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpinst, id); 932 return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpinst, id);
919 case KVM_REG_ARM_VFP_FPINST2: 933 case KVM_REG_ARM_VFP_FPINST2:
920 return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpinst2, id); 934 return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpinst2, id);
921 case KVM_REG_ARM_VFP_MVFR0: 935 case KVM_REG_ARM_VFP_MVFR0:
922 val = fmrx(MVFR0); 936 val = fmrx(MVFR0);
923 return reg_to_user(uaddr, &val, id); 937 return reg_to_user(uaddr, &val, id);
@@ -945,7 +959,7 @@ static int vfp_set_reg(struct kvm_vcpu *vcpu, u64 id, const void __user *uaddr)
945 if (vfpid < num_fp_regs()) { 959 if (vfpid < num_fp_regs()) {
946 if (KVM_REG_SIZE(id) != 8) 960 if (KVM_REG_SIZE(id) != 8)
947 return -ENOENT; 961 return -ENOENT;
948 return reg_from_user(&vcpu->arch.vfp_guest.fpregs[vfpid], 962 return reg_from_user(&vcpu->arch.ctxt.vfp.fpregs[vfpid],
949 uaddr, id); 963 uaddr, id);
950 } 964 }
951 965
@@ -955,13 +969,13 @@ static int vfp_set_reg(struct kvm_vcpu *vcpu, u64 id, const void __user *uaddr)
955 969
956 switch (vfpid) { 970 switch (vfpid) {
957 case KVM_REG_ARM_VFP_FPEXC: 971 case KVM_REG_ARM_VFP_FPEXC:
958 return reg_from_user(&vcpu->arch.vfp_guest.fpexc, uaddr, id); 972 return reg_from_user(&vcpu->arch.ctxt.vfp.fpexc, uaddr, id);
959 case KVM_REG_ARM_VFP_FPSCR: 973 case KVM_REG_ARM_VFP_FPSCR:
960 return reg_from_user(&vcpu->arch.vfp_guest.fpscr, uaddr, id); 974 return reg_from_user(&vcpu->arch.ctxt.vfp.fpscr, uaddr, id);
961 case KVM_REG_ARM_VFP_FPINST: 975 case KVM_REG_ARM_VFP_FPINST:
962 return reg_from_user(&vcpu->arch.vfp_guest.fpinst, uaddr, id); 976 return reg_from_user(&vcpu->arch.ctxt.vfp.fpinst, uaddr, id);
963 case KVM_REG_ARM_VFP_FPINST2: 977 case KVM_REG_ARM_VFP_FPINST2:
964 return reg_from_user(&vcpu->arch.vfp_guest.fpinst2, uaddr, id); 978 return reg_from_user(&vcpu->arch.ctxt.vfp.fpinst2, uaddr, id);
965 /* These are invariant. */ 979 /* These are invariant. */
966 case KVM_REG_ARM_VFP_MVFR0: 980 case KVM_REG_ARM_VFP_MVFR0:
967 if (reg_from_user(&val, uaddr, id)) 981 if (reg_from_user(&val, uaddr, id))
@@ -1030,7 +1044,7 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
1030 val = vcpu_cp15_reg64_get(vcpu, r); 1044 val = vcpu_cp15_reg64_get(vcpu, r);
1031 ret = reg_to_user(uaddr, &val, reg->id); 1045 ret = reg_to_user(uaddr, &val, reg->id);
1032 } else if (KVM_REG_SIZE(reg->id) == 4) { 1046 } else if (KVM_REG_SIZE(reg->id) == 4) {
1033 ret = reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id); 1047 ret = reg_to_user(uaddr, &vcpu_cp15(vcpu, r->reg), reg->id);
1034 } 1048 }
1035 1049
1036 return ret; 1050 return ret;
@@ -1060,7 +1074,7 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
1060 if (!ret) 1074 if (!ret)
1061 vcpu_cp15_reg64_set(vcpu, r, val); 1075 vcpu_cp15_reg64_set(vcpu, r, val);
1062 } else if (KVM_REG_SIZE(reg->id) == 4) { 1076 } else if (KVM_REG_SIZE(reg->id) == 4) {
1063 ret = reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id); 1077 ret = reg_from_user(&vcpu_cp15(vcpu, r->reg), uaddr, reg->id);
1064 } 1078 }
1065 1079
1066 return ret; 1080 return ret;
@@ -1096,7 +1110,7 @@ static int write_demux_regids(u64 __user *uindices)
1096static u64 cp15_to_index(const struct coproc_reg *reg) 1110static u64 cp15_to_index(const struct coproc_reg *reg)
1097{ 1111{
1098 u64 val = KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT); 1112 u64 val = KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT);
1099 if (reg->is_64) { 1113 if (reg->is_64bit) {
1100 val |= KVM_REG_SIZE_U64; 1114 val |= KVM_REG_SIZE_U64;
1101 val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT); 1115 val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT);
1102 /* 1116 /*
@@ -1210,8 +1224,8 @@ void kvm_coproc_table_init(void)
1210 unsigned int i; 1224 unsigned int i;
1211 1225
1212 /* Make sure tables are unique and in order. */ 1226 /* Make sure tables are unique and in order. */
1213 for (i = 1; i < ARRAY_SIZE(cp15_regs); i++) 1227 BUG_ON(check_reg_table(cp15_regs, ARRAY_SIZE(cp15_regs)));
1214 BUG_ON(cmp_reg(&cp15_regs[i-1], &cp15_regs[i]) >= 0); 1228 BUG_ON(check_reg_table(invariant_cp15, ARRAY_SIZE(invariant_cp15)));
1215 1229
1216 /* We abuse the reset function to overwrite the table itself. */ 1230 /* We abuse the reset function to overwrite the table itself. */
1217 for (i = 0; i < ARRAY_SIZE(invariant_cp15); i++) 1231 for (i = 0; i < ARRAY_SIZE(invariant_cp15); i++)
@@ -1248,7 +1262,7 @@ void kvm_reset_coprocs(struct kvm_vcpu *vcpu)
1248 const struct coproc_reg *table; 1262 const struct coproc_reg *table;
1249 1263
1250 /* Catch someone adding a register without putting in reset entry. */ 1264 /* Catch someone adding a register without putting in reset entry. */
1251 memset(vcpu->arch.cp15, 0x42, sizeof(vcpu->arch.cp15)); 1265 memset(vcpu->arch.ctxt.cp15, 0x42, sizeof(vcpu->arch.ctxt.cp15));
1252 1266
1253 /* Generic chip reset first (so target could override). */ 1267 /* Generic chip reset first (so target could override). */
1254 reset_coproc_regs(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs)); 1268 reset_coproc_regs(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs));
@@ -1257,6 +1271,6 @@ void kvm_reset_coprocs(struct kvm_vcpu *vcpu)
1257 reset_coproc_regs(vcpu, table, num); 1271 reset_coproc_regs(vcpu, table, num);
1258 1272
1259 for (num = 1; num < NR_CP15_REGS; num++) 1273 for (num = 1; num < NR_CP15_REGS; num++)
1260 if (vcpu->arch.cp15[num] == 0x42424242) 1274 if (vcpu_cp15(vcpu, num) == 0x42424242)
1261 panic("Didn't reset vcpu->arch.cp15[%zi]", num); 1275 panic("Didn't reset vcpu_cp15(vcpu, %zi)", num);
1262} 1276}
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index 88d24a3a9778..eef1759c2b65 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -37,7 +37,7 @@ struct coproc_reg {
37 unsigned long Op1; 37 unsigned long Op1;
38 unsigned long Op2; 38 unsigned long Op2;
39 39
40 bool is_64; 40 bool is_64bit;
41 41
42 /* Trapped access from guest, if non-NULL. */ 42 /* Trapped access from guest, if non-NULL. */
43 bool (*access)(struct kvm_vcpu *, 43 bool (*access)(struct kvm_vcpu *,
@@ -47,7 +47,7 @@ struct coproc_reg {
47 /* Initialization for vcpu. */ 47 /* Initialization for vcpu. */
48 void (*reset)(struct kvm_vcpu *, const struct coproc_reg *); 48 void (*reset)(struct kvm_vcpu *, const struct coproc_reg *);
49 49
50 /* Index into vcpu->arch.cp15[], or 0 if we don't need to save it. */ 50 /* Index into vcpu_cp15(vcpu, ...), or 0 if we don't need to save it. */
51 unsigned long reg; 51 unsigned long reg;
52 52
53 /* Value (usually reset value) */ 53 /* Value (usually reset value) */
@@ -104,25 +104,25 @@ static inline void reset_unknown(struct kvm_vcpu *vcpu,
104 const struct coproc_reg *r) 104 const struct coproc_reg *r)
105{ 105{
106 BUG_ON(!r->reg); 106 BUG_ON(!r->reg);
107 BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.cp15)); 107 BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.ctxt.cp15));
108 vcpu->arch.cp15[r->reg] = 0xdecafbad; 108 vcpu_cp15(vcpu, r->reg) = 0xdecafbad;
109} 109}
110 110
111static inline void reset_val(struct kvm_vcpu *vcpu, const struct coproc_reg *r) 111static inline void reset_val(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
112{ 112{
113 BUG_ON(!r->reg); 113 BUG_ON(!r->reg);
114 BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.cp15)); 114 BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.ctxt.cp15));
115 vcpu->arch.cp15[r->reg] = r->val; 115 vcpu_cp15(vcpu, r->reg) = r->val;
116} 116}
117 117
118static inline void reset_unknown64(struct kvm_vcpu *vcpu, 118static inline void reset_unknown64(struct kvm_vcpu *vcpu,
119 const struct coproc_reg *r) 119 const struct coproc_reg *r)
120{ 120{
121 BUG_ON(!r->reg); 121 BUG_ON(!r->reg);
122 BUG_ON(r->reg + 1 >= ARRAY_SIZE(vcpu->arch.cp15)); 122 BUG_ON(r->reg + 1 >= ARRAY_SIZE(vcpu->arch.ctxt.cp15));
123 123
124 vcpu->arch.cp15[r->reg] = 0xdecafbad; 124 vcpu_cp15(vcpu, r->reg) = 0xdecafbad;
125 vcpu->arch.cp15[r->reg+1] = 0xd0c0ffee; 125 vcpu_cp15(vcpu, r->reg+1) = 0xd0c0ffee;
126} 126}
127 127
128static inline int cmp_reg(const struct coproc_reg *i1, 128static inline int cmp_reg(const struct coproc_reg *i1,
@@ -141,7 +141,7 @@ static inline int cmp_reg(const struct coproc_reg *i1,
141 return i1->Op1 - i2->Op1; 141 return i1->Op1 - i2->Op1;
142 if (i1->Op2 != i2->Op2) 142 if (i1->Op2 != i2->Op2)
143 return i1->Op2 - i2->Op2; 143 return i1->Op2 - i2->Op2;
144 return i2->is_64 - i1->is_64; 144 return i2->is_64bit - i1->is_64bit;
145} 145}
146 146
147 147
@@ -150,8 +150,8 @@ static inline int cmp_reg(const struct coproc_reg *i1,
150#define CRm64(_x) .CRn = _x, .CRm = 0 150#define CRm64(_x) .CRn = _x, .CRm = 0
151#define Op1(_x) .Op1 = _x 151#define Op1(_x) .Op1 = _x
152#define Op2(_x) .Op2 = _x 152#define Op2(_x) .Op2 = _x
153#define is64 .is_64 = true 153#define is64 .is_64bit = true
154#define is32 .is_64 = false 154#define is32 .is_64bit = false
155 155
156bool access_vm_reg(struct kvm_vcpu *vcpu, 156bool access_vm_reg(struct kvm_vcpu *vcpu,
157 const struct coproc_params *p, 157 const struct coproc_params *p,
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index dc99159857b4..a494def3f195 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -112,7 +112,7 @@ static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][15] = {
112 */ 112 */
113unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num) 113unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num)
114{ 114{
115 unsigned long *reg_array = (unsigned long *)&vcpu->arch.regs; 115 unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs;
116 unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK; 116 unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK;
117 117
118 switch (mode) { 118 switch (mode) {
@@ -147,15 +147,15 @@ unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu)
147 unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK; 147 unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK;
148 switch (mode) { 148 switch (mode) {
149 case SVC_MODE: 149 case SVC_MODE:
150 return &vcpu->arch.regs.KVM_ARM_SVC_spsr; 150 return &vcpu->arch.ctxt.gp_regs.KVM_ARM_SVC_spsr;
151 case ABT_MODE: 151 case ABT_MODE:
152 return &vcpu->arch.regs.KVM_ARM_ABT_spsr; 152 return &vcpu->arch.ctxt.gp_regs.KVM_ARM_ABT_spsr;
153 case UND_MODE: 153 case UND_MODE:
154 return &vcpu->arch.regs.KVM_ARM_UND_spsr; 154 return &vcpu->arch.ctxt.gp_regs.KVM_ARM_UND_spsr;
155 case IRQ_MODE: 155 case IRQ_MODE:
156 return &vcpu->arch.regs.KVM_ARM_IRQ_spsr; 156 return &vcpu->arch.ctxt.gp_regs.KVM_ARM_IRQ_spsr;
157 case FIQ_MODE: 157 case FIQ_MODE:
158 return &vcpu->arch.regs.KVM_ARM_FIQ_spsr; 158 return &vcpu->arch.ctxt.gp_regs.KVM_ARM_FIQ_spsr;
159 default: 159 default:
160 BUG(); 160 BUG();
161 } 161 }
@@ -266,8 +266,8 @@ void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
266 266
267static u32 exc_vector_base(struct kvm_vcpu *vcpu) 267static u32 exc_vector_base(struct kvm_vcpu *vcpu)
268{ 268{
269 u32 sctlr = vcpu->arch.cp15[c1_SCTLR]; 269 u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
270 u32 vbar = vcpu->arch.cp15[c12_VBAR]; 270 u32 vbar = vcpu_cp15(vcpu, c12_VBAR);
271 271
272 if (sctlr & SCTLR_V) 272 if (sctlr & SCTLR_V)
273 return 0xffff0000; 273 return 0xffff0000;
@@ -282,7 +282,7 @@ static u32 exc_vector_base(struct kvm_vcpu *vcpu)
282static void kvm_update_psr(struct kvm_vcpu *vcpu, unsigned long mode) 282static void kvm_update_psr(struct kvm_vcpu *vcpu, unsigned long mode)
283{ 283{
284 unsigned long cpsr = *vcpu_cpsr(vcpu); 284 unsigned long cpsr = *vcpu_cpsr(vcpu);
285 u32 sctlr = vcpu->arch.cp15[c1_SCTLR]; 285 u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
286 286
287 *vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | mode; 287 *vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | mode;
288 288
@@ -357,22 +357,22 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
357 357
358 if (is_pabt) { 358 if (is_pabt) {
359 /* Set IFAR and IFSR */ 359 /* Set IFAR and IFSR */
360 vcpu->arch.cp15[c6_IFAR] = addr; 360 vcpu_cp15(vcpu, c6_IFAR) = addr;
361 is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31); 361 is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31);
362 /* Always give debug fault for now - should give guest a clue */ 362 /* Always give debug fault for now - should give guest a clue */
363 if (is_lpae) 363 if (is_lpae)
364 vcpu->arch.cp15[c5_IFSR] = 1 << 9 | 0x22; 364 vcpu_cp15(vcpu, c5_IFSR) = 1 << 9 | 0x22;
365 else 365 else
366 vcpu->arch.cp15[c5_IFSR] = 2; 366 vcpu_cp15(vcpu, c5_IFSR) = 2;
367 } else { /* !iabt */ 367 } else { /* !iabt */
368 /* Set DFAR and DFSR */ 368 /* Set DFAR and DFSR */
369 vcpu->arch.cp15[c6_DFAR] = addr; 369 vcpu_cp15(vcpu, c6_DFAR) = addr;
370 is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31); 370 is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31);
371 /* Always give debug fault for now - should give guest a clue */ 371 /* Always give debug fault for now - should give guest a clue */
372 if (is_lpae) 372 if (is_lpae)
373 vcpu->arch.cp15[c5_DFSR] = 1 << 9 | 0x22; 373 vcpu_cp15(vcpu, c5_DFSR) = 1 << 9 | 0x22;
374 else 374 else
375 vcpu->arch.cp15[c5_DFSR] = 2; 375 vcpu_cp15(vcpu, c5_DFSR) = 2;
376 } 376 }
377 377
378} 378}
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 99361f11354a..9093ed0f8b2a 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -25,7 +25,6 @@
25#include <asm/cputype.h> 25#include <asm/cputype.h>
26#include <asm/uaccess.h> 26#include <asm/uaccess.h>
27#include <asm/kvm.h> 27#include <asm/kvm.h>
28#include <asm/kvm_asm.h>
29#include <asm/kvm_emulate.h> 28#include <asm/kvm_emulate.h>
30#include <asm/kvm_coproc.h> 29#include <asm/kvm_coproc.h>
31 30
@@ -55,7 +54,7 @@ static u64 core_reg_offset_from_id(u64 id)
55static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) 54static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
56{ 55{
57 u32 __user *uaddr = (u32 __user *)(long)reg->addr; 56 u32 __user *uaddr = (u32 __user *)(long)reg->addr;
58 struct kvm_regs *regs = &vcpu->arch.regs; 57 struct kvm_regs *regs = &vcpu->arch.ctxt.gp_regs;
59 u64 off; 58 u64 off;
60 59
61 if (KVM_REG_SIZE(reg->id) != 4) 60 if (KVM_REG_SIZE(reg->id) != 4)
@@ -72,7 +71,7 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
72static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) 71static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
73{ 72{
74 u32 __user *uaddr = (u32 __user *)(long)reg->addr; 73 u32 __user *uaddr = (u32 __user *)(long)reg->addr;
75 struct kvm_regs *regs = &vcpu->arch.regs; 74 struct kvm_regs *regs = &vcpu->arch.ctxt.gp_regs;
76 u64 off, val; 75 u64 off, val;
77 76
78 if (KVM_REG_SIZE(reg->id) != 4) 77 if (KVM_REG_SIZE(reg->id) != 4)
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 3ede90d8b20b..3f1ef0dbc899 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -147,13 +147,6 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
147 switch (exception_index) { 147 switch (exception_index) {
148 case ARM_EXCEPTION_IRQ: 148 case ARM_EXCEPTION_IRQ:
149 return 1; 149 return 1;
150 case ARM_EXCEPTION_UNDEFINED:
151 kvm_err("Undefined exception in Hyp mode at: %#08lx\n",
152 kvm_vcpu_get_hyp_pc(vcpu));
153 BUG();
154 panic("KVM: Hypervisor undefined exception!\n");
155 case ARM_EXCEPTION_DATA_ABORT:
156 case ARM_EXCEPTION_PREF_ABORT:
157 case ARM_EXCEPTION_HVC: 150 case ARM_EXCEPTION_HVC:
158 /* 151 /*
159 * See ARM ARM B1.14.1: "Hyp traps on instructions 152 * See ARM ARM B1.14.1: "Hyp traps on instructions
diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile
new file mode 100644
index 000000000000..8dfa5f7f9290
--- /dev/null
+++ b/arch/arm/kvm/hyp/Makefile
@@ -0,0 +1,17 @@
1#
2# Makefile for Kernel-based Virtual Machine module, HYP part
3#
4
5KVM=../../../../virt/kvm
6
7obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
8obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
9
10obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
11obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o
12obj-$(CONFIG_KVM_ARM_HOST) += vfp.o
13obj-$(CONFIG_KVM_ARM_HOST) += banked-sr.o
14obj-$(CONFIG_KVM_ARM_HOST) += entry.o
15obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
16obj-$(CONFIG_KVM_ARM_HOST) += switch.o
17obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
diff --git a/arch/arm/kvm/hyp/banked-sr.c b/arch/arm/kvm/hyp/banked-sr.c
new file mode 100644
index 000000000000..111bda8cdebd
--- /dev/null
+++ b/arch/arm/kvm/hyp/banked-sr.c
@@ -0,0 +1,77 @@
1/*
2 * Original code:
3 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
4 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
5 *
6 * Mostly rewritten in C by Marc Zyngier <marc.zyngier@arm.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21#include <asm/kvm_hyp.h>
22
23__asm__(".arch_extension virt");
24
25void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt)
26{
27 ctxt->gp_regs.usr_regs.ARM_sp = read_special(SP_usr);
28 ctxt->gp_regs.usr_regs.ARM_pc = read_special(ELR_hyp);
29 ctxt->gp_regs.usr_regs.ARM_cpsr = read_special(SPSR);
30 ctxt->gp_regs.KVM_ARM_SVC_sp = read_special(SP_svc);
31 ctxt->gp_regs.KVM_ARM_SVC_lr = read_special(LR_svc);
32 ctxt->gp_regs.KVM_ARM_SVC_spsr = read_special(SPSR_svc);
33 ctxt->gp_regs.KVM_ARM_ABT_sp = read_special(SP_abt);
34 ctxt->gp_regs.KVM_ARM_ABT_lr = read_special(LR_abt);
35 ctxt->gp_regs.KVM_ARM_ABT_spsr = read_special(SPSR_abt);
36 ctxt->gp_regs.KVM_ARM_UND_sp = read_special(SP_und);
37 ctxt->gp_regs.KVM_ARM_UND_lr = read_special(LR_und);
38 ctxt->gp_regs.KVM_ARM_UND_spsr = read_special(SPSR_und);
39 ctxt->gp_regs.KVM_ARM_IRQ_sp = read_special(SP_irq);
40 ctxt->gp_regs.KVM_ARM_IRQ_lr = read_special(LR_irq);
41 ctxt->gp_regs.KVM_ARM_IRQ_spsr = read_special(SPSR_irq);
42 ctxt->gp_regs.KVM_ARM_FIQ_r8 = read_special(R8_fiq);
43 ctxt->gp_regs.KVM_ARM_FIQ_r9 = read_special(R9_fiq);
44 ctxt->gp_regs.KVM_ARM_FIQ_r10 = read_special(R10_fiq);
45 ctxt->gp_regs.KVM_ARM_FIQ_fp = read_special(R11_fiq);
46 ctxt->gp_regs.KVM_ARM_FIQ_ip = read_special(R12_fiq);
47 ctxt->gp_regs.KVM_ARM_FIQ_sp = read_special(SP_fiq);
48 ctxt->gp_regs.KVM_ARM_FIQ_lr = read_special(LR_fiq);
49 ctxt->gp_regs.KVM_ARM_FIQ_spsr = read_special(SPSR_fiq);
50}
51
52void __hyp_text __banked_restore_state(struct kvm_cpu_context *ctxt)
53{
54 write_special(ctxt->gp_regs.usr_regs.ARM_sp, SP_usr);
55 write_special(ctxt->gp_regs.usr_regs.ARM_pc, ELR_hyp);
56 write_special(ctxt->gp_regs.usr_regs.ARM_cpsr, SPSR_cxsf);
57 write_special(ctxt->gp_regs.KVM_ARM_SVC_sp, SP_svc);
58 write_special(ctxt->gp_regs.KVM_ARM_SVC_lr, LR_svc);
59 write_special(ctxt->gp_regs.KVM_ARM_SVC_spsr, SPSR_svc);
60 write_special(ctxt->gp_regs.KVM_ARM_ABT_sp, SP_abt);
61 write_special(ctxt->gp_regs.KVM_ARM_ABT_lr, LR_abt);
62 write_special(ctxt->gp_regs.KVM_ARM_ABT_spsr, SPSR_abt);
63 write_special(ctxt->gp_regs.KVM_ARM_UND_sp, SP_und);
64 write_special(ctxt->gp_regs.KVM_ARM_UND_lr, LR_und);
65 write_special(ctxt->gp_regs.KVM_ARM_UND_spsr, SPSR_und);
66 write_special(ctxt->gp_regs.KVM_ARM_IRQ_sp, SP_irq);
67 write_special(ctxt->gp_regs.KVM_ARM_IRQ_lr, LR_irq);
68 write_special(ctxt->gp_regs.KVM_ARM_IRQ_spsr, SPSR_irq);
69 write_special(ctxt->gp_regs.KVM_ARM_FIQ_r8, R8_fiq);
70 write_special(ctxt->gp_regs.KVM_ARM_FIQ_r9, R9_fiq);
71 write_special(ctxt->gp_regs.KVM_ARM_FIQ_r10, R10_fiq);
72 write_special(ctxt->gp_regs.KVM_ARM_FIQ_fp, R11_fiq);
73 write_special(ctxt->gp_regs.KVM_ARM_FIQ_ip, R12_fiq);
74 write_special(ctxt->gp_regs.KVM_ARM_FIQ_sp, SP_fiq);
75 write_special(ctxt->gp_regs.KVM_ARM_FIQ_lr, LR_fiq);
76 write_special(ctxt->gp_regs.KVM_ARM_FIQ_spsr, SPSR_fiq);
77}
diff --git a/arch/arm/kvm/hyp/cp15-sr.c b/arch/arm/kvm/hyp/cp15-sr.c
new file mode 100644
index 000000000000..c4782812714c
--- /dev/null
+++ b/arch/arm/kvm/hyp/cp15-sr.c
@@ -0,0 +1,84 @@
1/*
2 * Original code:
3 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
4 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
5 *
6 * Mostly rewritten in C by Marc Zyngier <marc.zyngier@arm.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21#include <asm/kvm_hyp.h>
22
23static u64 *cp15_64(struct kvm_cpu_context *ctxt, int idx)
24{
25 return (u64 *)(ctxt->cp15 + idx);
26}
27
28void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
29{
30 ctxt->cp15[c0_MPIDR] = read_sysreg(VMPIDR);
31 ctxt->cp15[c0_CSSELR] = read_sysreg(CSSELR);
32 ctxt->cp15[c1_SCTLR] = read_sysreg(SCTLR);
33 ctxt->cp15[c1_CPACR] = read_sysreg(CPACR);
34 *cp15_64(ctxt, c2_TTBR0) = read_sysreg(TTBR0);
35 *cp15_64(ctxt, c2_TTBR1) = read_sysreg(TTBR1);
36 ctxt->cp15[c2_TTBCR] = read_sysreg(TTBCR);
37 ctxt->cp15[c3_DACR] = read_sysreg(DACR);
38 ctxt->cp15[c5_DFSR] = read_sysreg(DFSR);
39 ctxt->cp15[c5_IFSR] = read_sysreg(IFSR);
40 ctxt->cp15[c5_ADFSR] = read_sysreg(ADFSR);
41 ctxt->cp15[c5_AIFSR] = read_sysreg(AIFSR);
42 ctxt->cp15[c6_DFAR] = read_sysreg(DFAR);
43 ctxt->cp15[c6_IFAR] = read_sysreg(IFAR);
44 *cp15_64(ctxt, c7_PAR) = read_sysreg(PAR);
45 ctxt->cp15[c10_PRRR] = read_sysreg(PRRR);
46 ctxt->cp15[c10_NMRR] = read_sysreg(NMRR);
47 ctxt->cp15[c10_AMAIR0] = read_sysreg(AMAIR0);
48 ctxt->cp15[c10_AMAIR1] = read_sysreg(AMAIR1);
49 ctxt->cp15[c12_VBAR] = read_sysreg(VBAR);
50 ctxt->cp15[c13_CID] = read_sysreg(CID);
51 ctxt->cp15[c13_TID_URW] = read_sysreg(TID_URW);
52 ctxt->cp15[c13_TID_URO] = read_sysreg(TID_URO);
53 ctxt->cp15[c13_TID_PRIV] = read_sysreg(TID_PRIV);
54 ctxt->cp15[c14_CNTKCTL] = read_sysreg(CNTKCTL);
55}
56
57void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
58{
59 write_sysreg(ctxt->cp15[c0_MPIDR], VMPIDR);
60 write_sysreg(ctxt->cp15[c0_CSSELR], CSSELR);
61 write_sysreg(ctxt->cp15[c1_SCTLR], SCTLR);
62 write_sysreg(ctxt->cp15[c1_CPACR], CPACR);
63 write_sysreg(*cp15_64(ctxt, c2_TTBR0), TTBR0);
64 write_sysreg(*cp15_64(ctxt, c2_TTBR1), TTBR1);
65 write_sysreg(ctxt->cp15[c2_TTBCR], TTBCR);
66 write_sysreg(ctxt->cp15[c3_DACR], DACR);
67 write_sysreg(ctxt->cp15[c5_DFSR], DFSR);
68 write_sysreg(ctxt->cp15[c5_IFSR], IFSR);
69 write_sysreg(ctxt->cp15[c5_ADFSR], ADFSR);
70 write_sysreg(ctxt->cp15[c5_AIFSR], AIFSR);
71 write_sysreg(ctxt->cp15[c6_DFAR], DFAR);
72 write_sysreg(ctxt->cp15[c6_IFAR], IFAR);
73 write_sysreg(*cp15_64(ctxt, c7_PAR), PAR);
74 write_sysreg(ctxt->cp15[c10_PRRR], PRRR);
75 write_sysreg(ctxt->cp15[c10_NMRR], NMRR);
76 write_sysreg(ctxt->cp15[c10_AMAIR0], AMAIR0);
77 write_sysreg(ctxt->cp15[c10_AMAIR1], AMAIR1);
78 write_sysreg(ctxt->cp15[c12_VBAR], VBAR);
79 write_sysreg(ctxt->cp15[c13_CID], CID);
80 write_sysreg(ctxt->cp15[c13_TID_URW], TID_URW);
81 write_sysreg(ctxt->cp15[c13_TID_URO], TID_URO);
82 write_sysreg(ctxt->cp15[c13_TID_PRIV], TID_PRIV);
83 write_sysreg(ctxt->cp15[c14_CNTKCTL], CNTKCTL);
84}
diff --git a/arch/arm/kvm/hyp/entry.S b/arch/arm/kvm/hyp/entry.S
new file mode 100644
index 000000000000..21c238871c9e
--- /dev/null
+++ b/arch/arm/kvm/hyp/entry.S
@@ -0,0 +1,101 @@
1/*
2 * Copyright (C) 2016 - ARM Ltd
3 * Author: Marc Zyngier <marc.zyngier@arm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16*/
17
18#include <linux/linkage.h>
19#include <asm/asm-offsets.h>
20#include <asm/kvm_arm.h>
21
22 .arch_extension virt
23
24 .text
25 .pushsection .hyp.text, "ax"
26
27#define USR_REGS_OFFSET (CPU_CTXT_GP_REGS + GP_REGS_USR)
28
29/* int __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host) */
30ENTRY(__guest_enter)
31 @ Save host registers
32 add r1, r1, #(USR_REGS_OFFSET + S_R4)
33 stm r1!, {r4-r12}
34 str lr, [r1, #4] @ Skip SP_usr (already saved)
35
36 @ Restore guest registers
37 add r0, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R0)
38 ldr lr, [r0, #S_LR]
39 ldm r0, {r0-r12}
40
41 clrex
42 eret
43ENDPROC(__guest_enter)
44
45ENTRY(__guest_exit)
46 /*
47 * return convention:
48 * guest r0, r1, r2 saved on the stack
49 * r0: vcpu pointer
50 * r1: exception code
51 */
52
53 add r2, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R3)
54 stm r2!, {r3-r12}
55 str lr, [r2, #4]
56 add r2, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R0)
57 pop {r3, r4, r5} @ r0, r1, r2
58 stm r2, {r3-r5}
59
60 ldr r0, [r0, #VCPU_HOST_CTXT]
61 add r0, r0, #(USR_REGS_OFFSET + S_R4)
62 ldm r0!, {r4-r12}
63 ldr lr, [r0, #4]
64
65 mov r0, r1
66 bx lr
67ENDPROC(__guest_exit)
68
69/*
70 * If VFPv3 support is not available, then we will not switch the VFP
71 * registers; however cp10 and cp11 accesses will still trap and fallback
72 * to the regular coprocessor emulation code, which currently will
73 * inject an undefined exception to the guest.
74 */
75#ifdef CONFIG_VFPv3
76ENTRY(__vfp_guest_restore)
77 push {r3, r4, lr}
78
79 @ NEON/VFP used. Turn on VFP access.
80 mrc p15, 4, r1, c1, c1, 2 @ HCPTR
81 bic r1, r1, #(HCPTR_TCP(10) | HCPTR_TCP(11))
82 mcr p15, 4, r1, c1, c1, 2 @ HCPTR
83 isb
84
85 @ Switch VFP/NEON hardware state to the guest's
86 mov r4, r0
87 ldr r0, [r0, #VCPU_HOST_CTXT]
88 add r0, r0, #CPU_CTXT_VFP
89 bl __vfp_save_state
90 add r0, r4, #(VCPU_GUEST_CTXT + CPU_CTXT_VFP)
91 bl __vfp_restore_state
92
93 pop {r3, r4, lr}
94 pop {r0, r1, r2}
95 clrex
96 eret
97ENDPROC(__vfp_guest_restore)
98#endif
99
100 .popsection
101
diff --git a/arch/arm/kvm/hyp/hyp-entry.S b/arch/arm/kvm/hyp/hyp-entry.S
new file mode 100644
index 000000000000..78091383a5d9
--- /dev/null
+++ b/arch/arm/kvm/hyp/hyp-entry.S
@@ -0,0 +1,169 @@
1/*
2 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
3 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2, as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19#include <linux/linkage.h>
20#include <asm/kvm_arm.h>
21#include <asm/kvm_asm.h>
22
23 .arch_extension virt
24
25 .text
26 .pushsection .hyp.text, "ax"
27
28.macro load_vcpu reg
29 mrc p15, 4, \reg, c13, c0, 2 @ HTPIDR
30.endm
31
32/********************************************************************
33 * Hypervisor exception vector and handlers
34 *
35 *
36 * The KVM/ARM Hypervisor ABI is defined as follows:
37 *
38 * Entry to Hyp mode from the host kernel will happen _only_ when an HVC
39 * instruction is issued since all traps are disabled when running the host
40 * kernel as per the Hyp-mode initialization at boot time.
41 *
42 * HVC instructions cause a trap to the vector page + offset 0x14 (see hyp_hvc
43 * below) when the HVC instruction is called from SVC mode (i.e. a guest or the
44 * host kernel) and they cause a trap to the vector page + offset 0x8 when HVC
45 * instructions are called from within Hyp-mode.
46 *
47 * Hyp-ABI: Calling HYP-mode functions from host (in SVC mode):
48 * Switching to Hyp mode is done through a simple HVC #0 instruction. The
49 * exception vector code will check that the HVC comes from VMID==0.
50 * - r0 contains a pointer to a HYP function
51 * - r1, r2, and r3 contain arguments to the above function.
52 * - The HYP function will be called with its arguments in r0, r1 and r2.
53 * On HYP function return, we return directly to SVC.
54 *
55 * Note that the above is used to execute code in Hyp-mode from a host-kernel
56 * point of view, and is a different concept from performing a world-switch and
57 * executing guest code SVC mode (with a VMID != 0).
58 */
59
60 .align 5
61__kvm_hyp_vector:
62 .global __kvm_hyp_vector
63
64 @ Hyp-mode exception vector
65 W(b) hyp_reset
66 W(b) hyp_undef
67 W(b) hyp_svc
68 W(b) hyp_pabt
69 W(b) hyp_dabt
70 W(b) hyp_hvc
71 W(b) hyp_irq
72 W(b) hyp_fiq
73
74.macro invalid_vector label, cause
75 .align
76\label: mov r0, #\cause
77 b __hyp_panic
78.endm
79
80 invalid_vector hyp_reset ARM_EXCEPTION_RESET
81 invalid_vector hyp_undef ARM_EXCEPTION_UNDEFINED
82 invalid_vector hyp_svc ARM_EXCEPTION_SOFTWARE
83 invalid_vector hyp_pabt ARM_EXCEPTION_PREF_ABORT
84 invalid_vector hyp_dabt ARM_EXCEPTION_DATA_ABORT
85 invalid_vector hyp_fiq ARM_EXCEPTION_FIQ
86
87ENTRY(__hyp_do_panic)
88 mrs lr, cpsr
89 bic lr, lr, #MODE_MASK
90 orr lr, lr, #SVC_MODE
91THUMB( orr lr, lr, #PSR_T_BIT )
92 msr spsr_cxsf, lr
93 ldr lr, =panic
94 msr ELR_hyp, lr
95 ldr lr, =kvm_call_hyp
96 clrex
97 eret
98ENDPROC(__hyp_do_panic)
99
100hyp_hvc:
101 /*
102 * Getting here is either because of a trap from a guest,
103 * or from executing HVC from the host kernel, which means
104 * "do something in Hyp mode".
105 */
106 push {r0, r1, r2}
107
108 @ Check syndrome register
109 mrc p15, 4, r1, c5, c2, 0 @ HSR
110 lsr r0, r1, #HSR_EC_SHIFT
111 cmp r0, #HSR_EC_HVC
112 bne guest_trap @ Not HVC instr.
113
114 /*
115 * Let's check if the HVC came from VMID 0 and allow simple
116 * switch to Hyp mode
117 */
118 mrrc p15, 6, r0, r2, c2
119 lsr r2, r2, #16
120 and r2, r2, #0xff
121 cmp r2, #0
122 bne guest_trap @ Guest called HVC
123
124 /*
125 * Getting here means host called HVC, we shift parameters and branch
126 * to Hyp function.
127 */
128 pop {r0, r1, r2}
129
130 /* Check for __hyp_get_vectors */
131 cmp r0, #-1
132 mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR
133 beq 1f
134
135 push {lr}
136
137 mov lr, r0
138 mov r0, r1
139 mov r1, r2
140 mov r2, r3
141
142THUMB( orr lr, #1)
143 blx lr @ Call the HYP function
144
145 pop {lr}
1461: eret
147
148guest_trap:
149 load_vcpu r0 @ Load VCPU pointer to r0
150
151#ifdef CONFIG_VFPv3
152 @ Check for a VFP access
153 lsr r1, r1, #HSR_EC_SHIFT
154 cmp r1, #HSR_EC_CP_0_13
155 beq __vfp_guest_restore
156#endif
157
158 mov r1, #ARM_EXCEPTION_HVC
159 b __guest_exit
160
161hyp_irq:
162 push {r0, r1, r2}
163 mov r1, #ARM_EXCEPTION_IRQ
164 load_vcpu r0 @ Load VCPU pointer to r0
165 b __guest_exit
166
167 .ltorg
168
169 .popsection
diff --git a/arch/arm/kvm/hyp/s2-setup.c b/arch/arm/kvm/hyp/s2-setup.c
new file mode 100644
index 000000000000..7be39af2ed6c
--- /dev/null
+++ b/arch/arm/kvm/hyp/s2-setup.c
@@ -0,0 +1,33 @@
1/*
2 * Copyright (C) 2016 - ARM Ltd
3 * Author: Marc Zyngier <marc.zyngier@arm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include <linux/types.h>
19#include <asm/kvm_arm.h>
20#include <asm/kvm_asm.h>
21#include <asm/kvm_hyp.h>
22
23void __hyp_text __init_stage2_translation(void)
24{
25 u64 val;
26
27 val = read_sysreg(VTCR) & ~VTCR_MASK;
28
29 val |= read_sysreg(HTCR) & VTCR_HTCR_SH;
30 val |= KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S;
31
32 write_sysreg(val, VTCR);
33}
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
new file mode 100644
index 000000000000..b13caa90cd44
--- /dev/null
+++ b/arch/arm/kvm/hyp/switch.c
@@ -0,0 +1,232 @@
1/*
2 * Copyright (C) 2015 - ARM Ltd
3 * Author: Marc Zyngier <marc.zyngier@arm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include <asm/kvm_asm.h>
19#include <asm/kvm_hyp.h>
20
21__asm__(".arch_extension virt");
22
23/*
24 * Activate the traps, saving the host's fpexc register before
25 * overwriting it. We'll restore it on VM exit.
26 */
27static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu, u32 *fpexc_host)
28{
29 u32 val;
30
31 /*
32 * We are about to set HCPTR.TCP10/11 to trap all floating point
33 * register accesses to HYP, however, the ARM ARM clearly states that
34 * traps are only taken to HYP if the operation would not otherwise
35 * trap to SVC. Therefore, always make sure that for 32-bit guests,
36 * we set FPEXC.EN to prevent traps to SVC, when setting the TCP bits.
37 */
38 val = read_sysreg(VFP_FPEXC);
39 *fpexc_host = val;
40 if (!(val & FPEXC_EN)) {
41 write_sysreg(val | FPEXC_EN, VFP_FPEXC);
42 isb();
43 }
44
45 write_sysreg(vcpu->arch.hcr | vcpu->arch.irq_lines, HCR);
46 /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
47 write_sysreg(HSTR_T(15), HSTR);
48 write_sysreg(HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11), HCPTR);
49 val = read_sysreg(HDCR);
50 write_sysreg(val | HDCR_TPM | HDCR_TPMCR, HDCR);
51}
52
53static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
54{
55 u32 val;
56
57 write_sysreg(0, HCR);
58 write_sysreg(0, HSTR);
59 val = read_sysreg(HDCR);
60 write_sysreg(val & ~(HDCR_TPM | HDCR_TPMCR), HDCR);
61 write_sysreg(0, HCPTR);
62}
63
64static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
65{
66 struct kvm *kvm = kern_hyp_va(vcpu->kvm);
67 write_sysreg(kvm->arch.vttbr, VTTBR);
68 write_sysreg(vcpu->arch.midr, VPIDR);
69}
70
71static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
72{
73 write_sysreg(0, VTTBR);
74 write_sysreg(read_sysreg(MIDR), VPIDR);
75}
76
77static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu)
78{
79 __vgic_v2_save_state(vcpu);
80}
81
82static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
83{
84 __vgic_v2_restore_state(vcpu);
85}
86
87static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
88{
89 u32 hsr = read_sysreg(HSR);
90 u8 ec = hsr >> HSR_EC_SHIFT;
91 u32 hpfar, far;
92
93 vcpu->arch.fault.hsr = hsr;
94
95 if (ec == HSR_EC_IABT)
96 far = read_sysreg(HIFAR);
97 else if (ec == HSR_EC_DABT)
98 far = read_sysreg(HDFAR);
99 else
100 return true;
101
102 /*
103 * B3.13.5 Reporting exceptions taken to the Non-secure PL2 mode:
104 *
105 * Abort on the stage 2 translation for a memory access from a
106 * Non-secure PL1 or PL0 mode:
107 *
108 * For any Access flag fault or Translation fault, and also for any
109 * Permission fault on the stage 2 translation of a memory access
110 * made as part of a translation table walk for a stage 1 translation,
111 * the HPFAR holds the IPA that caused the fault. Otherwise, the HPFAR
112 * is UNKNOWN.
113 */
114 if (!(hsr & HSR_DABT_S1PTW) && (hsr & HSR_FSC_TYPE) == FSC_PERM) {
115 u64 par, tmp;
116
117 par = read_sysreg(PAR);
118 write_sysreg(far, ATS1CPR);
119 isb();
120
121 tmp = read_sysreg(PAR);
122 write_sysreg(par, PAR);
123
124 if (unlikely(tmp & 1))
125 return false; /* Translation failed, back to guest */
126
127 hpfar = ((tmp >> 12) & ((1UL << 28) - 1)) << 4;
128 } else {
129 hpfar = read_sysreg(HPFAR);
130 }
131
132 vcpu->arch.fault.hxfar = far;
133 vcpu->arch.fault.hpfar = hpfar;
134 return true;
135}
136
137static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
138{
139 struct kvm_cpu_context *host_ctxt;
140 struct kvm_cpu_context *guest_ctxt;
141 bool fp_enabled;
142 u64 exit_code;
143 u32 fpexc;
144
145 vcpu = kern_hyp_va(vcpu);
146 write_sysreg(vcpu, HTPIDR);
147
148 host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
149 guest_ctxt = &vcpu->arch.ctxt;
150
151 __sysreg_save_state(host_ctxt);
152 __banked_save_state(host_ctxt);
153
154 __activate_traps(vcpu, &fpexc);
155 __activate_vm(vcpu);
156
157 __vgic_restore_state(vcpu);
158 __timer_restore_state(vcpu);
159
160 __sysreg_restore_state(guest_ctxt);
161 __banked_restore_state(guest_ctxt);
162
163 /* Jump in the fire! */
164again:
165 exit_code = __guest_enter(vcpu, host_ctxt);
166 /* And we're baaack! */
167
168 if (exit_code == ARM_EXCEPTION_HVC && !__populate_fault_info(vcpu))
169 goto again;
170
171 fp_enabled = __vfp_enabled();
172
173 __banked_save_state(guest_ctxt);
174 __sysreg_save_state(guest_ctxt);
175 __timer_save_state(vcpu);
176 __vgic_save_state(vcpu);
177
178 __deactivate_traps(vcpu);
179 __deactivate_vm(vcpu);
180
181 __banked_restore_state(host_ctxt);
182 __sysreg_restore_state(host_ctxt);
183
184 if (fp_enabled) {
185 __vfp_save_state(&guest_ctxt->vfp);
186 __vfp_restore_state(&host_ctxt->vfp);
187 }
188
189 write_sysreg(fpexc, VFP_FPEXC);
190
191 return exit_code;
192}
193
194__alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
195
196static const char * const __hyp_panic_string[] = {
197 [ARM_EXCEPTION_RESET] = "\nHYP panic: RST PC:%08x CPSR:%08x",
198 [ARM_EXCEPTION_UNDEFINED] = "\nHYP panic: UNDEF PC:%08x CPSR:%08x",
199 [ARM_EXCEPTION_SOFTWARE] = "\nHYP panic: SVC PC:%08x CPSR:%08x",
200 [ARM_EXCEPTION_PREF_ABORT] = "\nHYP panic: PABRT PC:%08x CPSR:%08x",
201 [ARM_EXCEPTION_DATA_ABORT] = "\nHYP panic: DABRT PC:%08x ADDR:%08x",
202 [ARM_EXCEPTION_IRQ] = "\nHYP panic: IRQ PC:%08x CPSR:%08x",
203 [ARM_EXCEPTION_FIQ] = "\nHYP panic: FIQ PC:%08x CPSR:%08x",
204 [ARM_EXCEPTION_HVC] = "\nHYP panic: HVC PC:%08x CPSR:%08x",
205};
206
207void __hyp_text __noreturn __hyp_panic(int cause)
208{
209 u32 elr = read_special(ELR_hyp);
210 u32 val;
211
212 if (cause == ARM_EXCEPTION_DATA_ABORT)
213 val = read_sysreg(HDFAR);
214 else
215 val = read_special(SPSR);
216
217 if (read_sysreg(VTTBR)) {
218 struct kvm_vcpu *vcpu;
219 struct kvm_cpu_context *host_ctxt;
220
221 vcpu = (struct kvm_vcpu *)read_sysreg(HTPIDR);
222 host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
223 __deactivate_traps(vcpu);
224 __deactivate_vm(vcpu);
225 __sysreg_restore_state(host_ctxt);
226 }
227
228 /* Call panic for real */
229 __hyp_do_panic(__hyp_panic_string[cause], elr, val);
230
231 unreachable();
232}
diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c
new file mode 100644
index 000000000000..a2636001e616
--- /dev/null
+++ b/arch/arm/kvm/hyp/tlb.c
@@ -0,0 +1,70 @@
1/*
2 * Original code:
3 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
4 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
5 *
6 * Mostly rewritten in C by Marc Zyngier <marc.zyngier@arm.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21#include <asm/kvm_hyp.h>
22
23/**
24 * Flush per-VMID TLBs
25 *
26 * __kvm_tlb_flush_vmid(struct kvm *kvm);
27 *
28 * We rely on the hardware to broadcast the TLB invalidation to all CPUs
29 * inside the inner-shareable domain (which is the case for all v7
30 * implementations). If we come across a non-IS SMP implementation, we'll
31 * have to use an IPI based mechanism. Until then, we stick to the simple
32 * hardware assisted version.
33 *
34 * As v7 does not support flushing per IPA, just nuke the whole TLB
35 * instead, ignoring the ipa value.
36 */
37static void __hyp_text __tlb_flush_vmid(struct kvm *kvm)
38{
39 dsb(ishst);
40
41 /* Switch to requested VMID */
42 kvm = kern_hyp_va(kvm);
43 write_sysreg(kvm->arch.vttbr, VTTBR);
44 isb();
45
46 write_sysreg(0, TLBIALLIS);
47 dsb(ish);
48 isb();
49
50 write_sysreg(0, VTTBR);
51}
52
53__alias(__tlb_flush_vmid) void __kvm_tlb_flush_vmid(struct kvm *kvm);
54
55static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
56{
57 __tlb_flush_vmid(kvm);
58}
59
60__alias(__tlb_flush_vmid_ipa) void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm,
61 phys_addr_t ipa);
62
63static void __hyp_text __tlb_flush_vm_context(void)
64{
65 write_sysreg(0, TLBIALLNSNHIS);
66 write_sysreg(0, ICIALLUIS);
67 dsb(ish);
68}
69
70__alias(__tlb_flush_vm_context) void __kvm_flush_vm_context(void);
diff --git a/arch/arm/kvm/hyp/vfp.S b/arch/arm/kvm/hyp/vfp.S
new file mode 100644
index 000000000000..7c297e87eb8b
--- /dev/null
+++ b/arch/arm/kvm/hyp/vfp.S
@@ -0,0 +1,68 @@
1/*
2 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
3 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include <linux/linkage.h>
19#include <asm/vfpmacros.h>
20
21 .text
22 .pushsection .hyp.text, "ax"
23
24/* void __vfp_save_state(struct vfp_hard_struct *vfp); */
25ENTRY(__vfp_save_state)
26 push {r4, r5}
27 VFPFMRX r1, FPEXC
28
29 @ Make sure *really* VFP is enabled so we can touch the registers.
30 orr r5, r1, #FPEXC_EN
31 tst r5, #FPEXC_EX @ Check for VFP Subarchitecture
32 bic r5, r5, #FPEXC_EX @ FPEXC_EX disable
33 VFPFMXR FPEXC, r5
34 isb
35
36 VFPFMRX r2, FPSCR
37 beq 1f
38
39 @ If FPEXC_EX is 0, then FPINST/FPINST2 reads are upredictable, so
40 @ we only need to save them if FPEXC_EX is set.
41 VFPFMRX r3, FPINST
42 tst r5, #FPEXC_FP2V
43 VFPFMRX r4, FPINST2, ne @ vmrsne
441:
45 VFPFSTMIA r0, r5 @ Save VFP registers
46 stm r0, {r1-r4} @ Save FPEXC, FPSCR, FPINST, FPINST2
47 pop {r4, r5}
48 bx lr
49ENDPROC(__vfp_save_state)
50
51/* void __vfp_restore_state(struct vfp_hard_struct *vfp);
52 * Assume FPEXC_EN is on and FPEXC_EX is off */
53ENTRY(__vfp_restore_state)
54 VFPFLDMIA r0, r1 @ Load VFP registers
55 ldm r0, {r0-r3} @ Load FPEXC, FPSCR, FPINST, FPINST2
56
57 VFPFMXR FPSCR, r1
58 tst r0, #FPEXC_EX @ Check for VFP Subarchitecture
59 beq 1f
60 VFPFMXR FPINST, r2
61 tst r0, #FPEXC_FP2V
62 VFPFMXR FPINST2, r3, ne
631:
64 VFPFMXR FPEXC, r0 @ FPEXC (last, in case !EN)
65 bx lr
66ENDPROC(__vfp_restore_state)
67
68 .popsection
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index 3988e72d16ff..1f9ae17476f9 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -84,14 +84,6 @@ __do_hyp_init:
84 orr r0, r0, r1 84 orr r0, r0, r1
85 mcr p15, 4, r0, c2, c0, 2 @ HTCR 85 mcr p15, 4, r0, c2, c0, 2 @ HTCR
86 86
87 mrc p15, 4, r1, c2, c1, 2 @ VTCR
88 ldr r2, =VTCR_MASK
89 bic r1, r1, r2
90 bic r0, r0, #(~VTCR_HTCR_SH) @ clear non-reusable HTCR bits
91 orr r1, r0, r1
92 orr r1, r1, #(KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S)
93 mcr p15, 4, r1, c2, c1, 2 @ VTCR
94
95 @ Use the same memory attributes for hyp. accesses as the kernel 87 @ Use the same memory attributes for hyp. accesses as the kernel
96 @ (copy MAIRx ro HMAIRx). 88 @ (copy MAIRx ro HMAIRx).
97 mrc p15, 0, r0, c10, c2, 0 89 mrc p15, 0, r0, c10, c2, 0
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 900ef6dd8f72..b1bd316f14c0 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -17,211 +17,14 @@
17 */ 17 */
18 18
19#include <linux/linkage.h> 19#include <linux/linkage.h>
20#include <linux/const.h>
21#include <asm/unified.h>
22#include <asm/page.h>
23#include <asm/ptrace.h>
24#include <asm/asm-offsets.h>
25#include <asm/kvm_asm.h>
26#include <asm/kvm_arm.h>
27#include <asm/vfpmacros.h>
28#include "interrupts_head.S"
29 20
30 .text 21 .text
31 22
32__kvm_hyp_code_start:
33 .globl __kvm_hyp_code_start
34
35/********************************************************************
36 * Flush per-VMID TLBs
37 *
38 * void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
39 *
40 * We rely on the hardware to broadcast the TLB invalidation to all CPUs
41 * inside the inner-shareable domain (which is the case for all v7
42 * implementations). If we come across a non-IS SMP implementation, we'll
43 * have to use an IPI based mechanism. Until then, we stick to the simple
44 * hardware assisted version.
45 *
46 * As v7 does not support flushing per IPA, just nuke the whole TLB
47 * instead, ignoring the ipa value.
48 */
49ENTRY(__kvm_tlb_flush_vmid_ipa)
50 push {r2, r3}
51
52 dsb ishst
53 add r0, r0, #KVM_VTTBR
54 ldrd r2, r3, [r0]
55 mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR
56 isb
57 mcr p15, 0, r0, c8, c3, 0 @ TLBIALLIS (rt ignored)
58 dsb ish
59 isb
60 mov r2, #0
61 mov r3, #0
62 mcrr p15, 6, r2, r3, c2 @ Back to VMID #0
63 isb @ Not necessary if followed by eret
64
65 pop {r2, r3}
66 bx lr
67ENDPROC(__kvm_tlb_flush_vmid_ipa)
68
69/**
70 * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs
71 *
72 * Reuses __kvm_tlb_flush_vmid_ipa() for ARMv7, without passing address
73 * parameter
74 */
75
76ENTRY(__kvm_tlb_flush_vmid)
77 b __kvm_tlb_flush_vmid_ipa
78ENDPROC(__kvm_tlb_flush_vmid)
79
80/********************************************************************
81 * Flush TLBs and instruction caches of all CPUs inside the inner-shareable
82 * domain, for all VMIDs
83 *
84 * void __kvm_flush_vm_context(void);
85 */
86ENTRY(__kvm_flush_vm_context)
87 mov r0, #0 @ rn parameter for c15 flushes is SBZ
88
89 /* Invalidate NS Non-Hyp TLB Inner Shareable (TLBIALLNSNHIS) */
90 mcr p15, 4, r0, c8, c3, 4
91 /* Invalidate instruction caches Inner Shareable (ICIALLUIS) */
92 mcr p15, 0, r0, c7, c1, 0
93 dsb ish
94 isb @ Not necessary if followed by eret
95
96 bx lr
97ENDPROC(__kvm_flush_vm_context)
98
99
100/********************************************************************
101 * Hypervisor world-switch code
102 *
103 *
104 * int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
105 */
106ENTRY(__kvm_vcpu_run)
107 @ Save the vcpu pointer
108 mcr p15, 4, vcpu, c13, c0, 2 @ HTPIDR
109
110 save_host_regs
111
112 restore_vgic_state
113 restore_timer_state
114
115 @ Store hardware CP15 state and load guest state
116 read_cp15_state store_to_vcpu = 0
117 write_cp15_state read_from_vcpu = 1
118
119 @ If the host kernel has not been configured with VFPv3 support,
120 @ then it is safer if we deny guests from using it as well.
121#ifdef CONFIG_VFPv3
122 @ Set FPEXC_EN so the guest doesn't trap floating point instructions
123 VFPFMRX r2, FPEXC @ VMRS
124 push {r2}
125 orr r2, r2, #FPEXC_EN
126 VFPFMXR FPEXC, r2 @ VMSR
127#endif
128
129 @ Configure Hyp-role
130 configure_hyp_role vmentry
131
132 @ Trap coprocessor CRx accesses
133 set_hstr vmentry
134 set_hcptr vmentry, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11))
135 set_hdcr vmentry
136
137 @ Write configured ID register into MIDR alias
138 ldr r1, [vcpu, #VCPU_MIDR]
139 mcr p15, 4, r1, c0, c0, 0
140
141 @ Write guest view of MPIDR into VMPIDR
142 ldr r1, [vcpu, #CP15_OFFSET(c0_MPIDR)]
143 mcr p15, 4, r1, c0, c0, 5
144
145 @ Set up guest memory translation
146 ldr r1, [vcpu, #VCPU_KVM]
147 add r1, r1, #KVM_VTTBR
148 ldrd r2, r3, [r1]
149 mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR
150
151 @ We're all done, just restore the GPRs and go to the guest
152 restore_guest_regs
153 clrex @ Clear exclusive monitor
154 eret
155
156__kvm_vcpu_return:
157 /*
158 * return convention:
159 * guest r0, r1, r2 saved on the stack
160 * r0: vcpu pointer
161 * r1: exception code
162 */
163 save_guest_regs
164
165 @ Set VMID == 0
166 mov r2, #0
167 mov r3, #0
168 mcrr p15, 6, r2, r3, c2 @ Write VTTBR
169
170 @ Don't trap coprocessor accesses for host kernel
171 set_hstr vmexit
172 set_hdcr vmexit
173 set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)), after_vfp_restore
174
175#ifdef CONFIG_VFPv3
176 @ Switch VFP/NEON hardware state to the host's
177 add r7, vcpu, #VCPU_VFP_GUEST
178 store_vfp_state r7
179 add r7, vcpu, #VCPU_VFP_HOST
180 ldr r7, [r7]
181 restore_vfp_state r7
182
183after_vfp_restore:
184 @ Restore FPEXC_EN which we clobbered on entry
185 pop {r2}
186 VFPFMXR FPEXC, r2
187#else
188after_vfp_restore:
189#endif
190
191 @ Reset Hyp-role
192 configure_hyp_role vmexit
193
194 @ Let host read hardware MIDR
195 mrc p15, 0, r2, c0, c0, 0
196 mcr p15, 4, r2, c0, c0, 0
197
198 @ Back to hardware MPIDR
199 mrc p15, 0, r2, c0, c0, 5
200 mcr p15, 4, r2, c0, c0, 5
201
202 @ Store guest CP15 state and restore host state
203 read_cp15_state store_to_vcpu = 1
204 write_cp15_state read_from_vcpu = 0
205
206 save_timer_state
207 save_vgic_state
208
209 restore_host_regs
210 clrex @ Clear exclusive monitor
211#ifndef CONFIG_CPU_ENDIAN_BE8
212 mov r0, r1 @ Return the return code
213 mov r1, #0 @ Clear upper bits in return value
214#else
215 @ r1 already has return code
216 mov r0, #0 @ Clear upper bits in return value
217#endif /* CONFIG_CPU_ENDIAN_BE8 */
218 bx lr @ return to IOCTL
219
220/******************************************************************** 23/********************************************************************
221 * Call function in Hyp mode 24 * Call function in Hyp mode
222 * 25 *
223 * 26 *
224 * u64 kvm_call_hyp(void *hypfn, ...); 27 * unsigned long kvm_call_hyp(void *hypfn, ...);
225 * 28 *
226 * This is not really a variadic function in the classic C-way and care must 29 * This is not really a variadic function in the classic C-way and care must
227 * be taken when calling this to ensure parameters are passed in registers 30 * be taken when calling this to ensure parameters are passed in registers
@@ -232,7 +35,7 @@ after_vfp_restore:
232 * passed as r0, r1, and r2 (a maximum of 3 arguments in addition to the 35 * passed as r0, r1, and r2 (a maximum of 3 arguments in addition to the
233 * function pointer can be passed). The function being called must be mapped 36 * function pointer can be passed). The function being called must be mapped
234 * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are 37 * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are
235 * passed in r0 and r1. 38 * passed in r0 (strictly 32bit).
236 * 39 *
237 * A function pointer with a value of 0xffffffff has a special meaning, 40 * A function pointer with a value of 0xffffffff has a special meaning,
238 * and is used to implement __hyp_get_vectors in the same way as in 41 * and is used to implement __hyp_get_vectors in the same way as in
@@ -246,281 +49,4 @@ after_vfp_restore:
246ENTRY(kvm_call_hyp) 49ENTRY(kvm_call_hyp)
247 hvc #0 50 hvc #0
248 bx lr 51 bx lr
249 52ENDPROC(kvm_call_hyp)
250/********************************************************************
251 * Hypervisor exception vector and handlers
252 *
253 *
254 * The KVM/ARM Hypervisor ABI is defined as follows:
255 *
256 * Entry to Hyp mode from the host kernel will happen _only_ when an HVC
257 * instruction is issued since all traps are disabled when running the host
258 * kernel as per the Hyp-mode initialization at boot time.
259 *
260 * HVC instructions cause a trap to the vector page + offset 0x14 (see hyp_hvc
261 * below) when the HVC instruction is called from SVC mode (i.e. a guest or the
262 * host kernel) and they cause a trap to the vector page + offset 0x8 when HVC
263 * instructions are called from within Hyp-mode.
264 *
265 * Hyp-ABI: Calling HYP-mode functions from host (in SVC mode):
266 * Switching to Hyp mode is done through a simple HVC #0 instruction. The
267 * exception vector code will check that the HVC comes from VMID==0 and if
268 * so will push the necessary state (SPSR, lr_usr) on the Hyp stack.
269 * - r0 contains a pointer to a HYP function
270 * - r1, r2, and r3 contain arguments to the above function.
271 * - The HYP function will be called with its arguments in r0, r1 and r2.
272 * On HYP function return, we return directly to SVC.
273 *
274 * Note that the above is used to execute code in Hyp-mode from a host-kernel
275 * point of view, and is a different concept from performing a world-switch and
276 * executing guest code SVC mode (with a VMID != 0).
277 */
278
279/* Handle undef, svc, pabt, or dabt by crashing with a user notice */
280.macro bad_exception exception_code, panic_str
281 push {r0-r2}
282 mrrc p15, 6, r0, r1, c2 @ Read VTTBR
283 lsr r1, r1, #16
284 ands r1, r1, #0xff
285 beq 99f
286
287 load_vcpu @ Load VCPU pointer
288 .if \exception_code == ARM_EXCEPTION_DATA_ABORT
289 mrc p15, 4, r2, c5, c2, 0 @ HSR
290 mrc p15, 4, r1, c6, c0, 0 @ HDFAR
291 str r2, [vcpu, #VCPU_HSR]
292 str r1, [vcpu, #VCPU_HxFAR]
293 .endif
294 .if \exception_code == ARM_EXCEPTION_PREF_ABORT
295 mrc p15, 4, r2, c5, c2, 0 @ HSR
296 mrc p15, 4, r1, c6, c0, 2 @ HIFAR
297 str r2, [vcpu, #VCPU_HSR]
298 str r1, [vcpu, #VCPU_HxFAR]
299 .endif
300 mov r1, #\exception_code
301 b __kvm_vcpu_return
302
303 @ We were in the host already. Let's craft a panic-ing return to SVC.
30499: mrs r2, cpsr
305 bic r2, r2, #MODE_MASK
306 orr r2, r2, #SVC_MODE
307THUMB( orr r2, r2, #PSR_T_BIT )
308 msr spsr_cxsf, r2
309 mrs r1, ELR_hyp
310 ldr r2, =panic
311 msr ELR_hyp, r2
312 ldr r0, =\panic_str
313 clrex @ Clear exclusive monitor
314 eret
315.endm
316
317 .text
318
319 .align 5
320__kvm_hyp_vector:
321 .globl __kvm_hyp_vector
322
323 @ Hyp-mode exception vector
324 W(b) hyp_reset
325 W(b) hyp_undef
326 W(b) hyp_svc
327 W(b) hyp_pabt
328 W(b) hyp_dabt
329 W(b) hyp_hvc
330 W(b) hyp_irq
331 W(b) hyp_fiq
332
333 .align
334hyp_reset:
335 b hyp_reset
336
337 .align
338hyp_undef:
339 bad_exception ARM_EXCEPTION_UNDEFINED, und_die_str
340
341 .align
342hyp_svc:
343 bad_exception ARM_EXCEPTION_HVC, svc_die_str
344
345 .align
346hyp_pabt:
347 bad_exception ARM_EXCEPTION_PREF_ABORT, pabt_die_str
348
349 .align
350hyp_dabt:
351 bad_exception ARM_EXCEPTION_DATA_ABORT, dabt_die_str
352
353 .align
354hyp_hvc:
355 /*
356 * Getting here is either becuase of a trap from a guest or from calling
357 * HVC from the host kernel, which means "switch to Hyp mode".
358 */
359 push {r0, r1, r2}
360
361 @ Check syndrome register
362 mrc p15, 4, r1, c5, c2, 0 @ HSR
363 lsr r0, r1, #HSR_EC_SHIFT
364 cmp r0, #HSR_EC_HVC
365 bne guest_trap @ Not HVC instr.
366
367 /*
368 * Let's check if the HVC came from VMID 0 and allow simple
369 * switch to Hyp mode
370 */
371 mrrc p15, 6, r0, r2, c2
372 lsr r2, r2, #16
373 and r2, r2, #0xff
374 cmp r2, #0
375 bne guest_trap @ Guest called HVC
376
377 /*
378 * Getting here means host called HVC, we shift parameters and branch
379 * to Hyp function.
380 */
381 pop {r0, r1, r2}
382
383 /* Check for __hyp_get_vectors */
384 cmp r0, #-1
385 mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR
386 beq 1f
387
388 push {lr}
389 mrs lr, SPSR
390 push {lr}
391
392 mov lr, r0
393 mov r0, r1
394 mov r1, r2
395 mov r2, r3
396
397THUMB( orr lr, #1)
398 blx lr @ Call the HYP function
399
400 pop {lr}
401 msr SPSR_csxf, lr
402 pop {lr}
4031: eret
404
405guest_trap:
406 load_vcpu @ Load VCPU pointer to r0
407 str r1, [vcpu, #VCPU_HSR]
408
409 @ Check if we need the fault information
410 lsr r1, r1, #HSR_EC_SHIFT
411#ifdef CONFIG_VFPv3
412 cmp r1, #HSR_EC_CP_0_13
413 beq switch_to_guest_vfp
414#endif
415 cmp r1, #HSR_EC_IABT
416 mrceq p15, 4, r2, c6, c0, 2 @ HIFAR
417 beq 2f
418 cmp r1, #HSR_EC_DABT
419 bne 1f
420 mrc p15, 4, r2, c6, c0, 0 @ HDFAR
421
4222: str r2, [vcpu, #VCPU_HxFAR]
423
424 /*
425 * B3.13.5 Reporting exceptions taken to the Non-secure PL2 mode:
426 *
427 * Abort on the stage 2 translation for a memory access from a
428 * Non-secure PL1 or PL0 mode:
429 *
430 * For any Access flag fault or Translation fault, and also for any
431 * Permission fault on the stage 2 translation of a memory access
432 * made as part of a translation table walk for a stage 1 translation,
433 * the HPFAR holds the IPA that caused the fault. Otherwise, the HPFAR
434 * is UNKNOWN.
435 */
436
437 /* Check for permission fault, and S1PTW */
438 mrc p15, 4, r1, c5, c2, 0 @ HSR
439 and r0, r1, #HSR_FSC_TYPE
440 cmp r0, #FSC_PERM
441 tsteq r1, #(1 << 7) @ S1PTW
442 mrcne p15, 4, r2, c6, c0, 4 @ HPFAR
443 bne 3f
444
445 /* Preserve PAR */
446 mrrc p15, 0, r0, r1, c7 @ PAR
447 push {r0, r1}
448
449 /* Resolve IPA using the xFAR */
450 mcr p15, 0, r2, c7, c8, 0 @ ATS1CPR
451 isb
452 mrrc p15, 0, r0, r1, c7 @ PAR
453 tst r0, #1
454 bne 4f @ Failed translation
455 ubfx r2, r0, #12, #20
456 lsl r2, r2, #4
457 orr r2, r2, r1, lsl #24
458
459 /* Restore PAR */
460 pop {r0, r1}
461 mcrr p15, 0, r0, r1, c7 @ PAR
462
4633: load_vcpu @ Load VCPU pointer to r0
464 str r2, [r0, #VCPU_HPFAR]
465
4661: mov r1, #ARM_EXCEPTION_HVC
467 b __kvm_vcpu_return
468
4694: pop {r0, r1} @ Failed translation, return to guest
470 mcrr p15, 0, r0, r1, c7 @ PAR
471 clrex
472 pop {r0, r1, r2}
473 eret
474
475/*
476 * If VFPv3 support is not available, then we will not switch the VFP
477 * registers; however cp10 and cp11 accesses will still trap and fallback
478 * to the regular coprocessor emulation code, which currently will
479 * inject an undefined exception to the guest.
480 */
481#ifdef CONFIG_VFPv3
482switch_to_guest_vfp:
483 push {r3-r7}
484
485 @ NEON/VFP used. Turn on VFP access.
486 set_hcptr vmtrap, (HCPTR_TCP(10) | HCPTR_TCP(11))
487
488 @ Switch VFP/NEON hardware state to the guest's
489 add r7, r0, #VCPU_VFP_HOST
490 ldr r7, [r7]
491 store_vfp_state r7
492 add r7, r0, #VCPU_VFP_GUEST
493 restore_vfp_state r7
494
495 pop {r3-r7}
496 pop {r0-r2}
497 clrex
498 eret
499#endif
500
501 .align
502hyp_irq:
503 push {r0, r1, r2}
504 mov r1, #ARM_EXCEPTION_IRQ
505 load_vcpu @ Load VCPU pointer to r0
506 b __kvm_vcpu_return
507
508 .align
509hyp_fiq:
510 b hyp_fiq
511
512 .ltorg
513
514__kvm_hyp_code_end:
515 .globl __kvm_hyp_code_end
516
517 .section ".rodata"
518
519und_die_str:
520 .ascii "unexpected undefined exception in Hyp mode at: %#08x\n"
521pabt_die_str:
522 .ascii "unexpected prefetch abort in Hyp mode at: %#08x\n"
523dabt_die_str:
524 .ascii "unexpected data abort in Hyp mode at: %#08x\n"
525svc_die_str:
526 .ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x\n"
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
deleted file mode 100644
index 51a59504bef4..000000000000
--- a/arch/arm/kvm/interrupts_head.S
+++ /dev/null
@@ -1,648 +0,0 @@
1#include <linux/irqchip/arm-gic.h>
2#include <asm/assembler.h>
3
4#define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4))
5#define VCPU_USR_SP (VCPU_USR_REG(13))
6#define VCPU_USR_LR (VCPU_USR_REG(14))
7#define CP15_OFFSET(_cp15_reg_idx) (VCPU_CP15 + (_cp15_reg_idx * 4))
8
9/*
10 * Many of these macros need to access the VCPU structure, which is always
11 * held in r0. These macros should never clobber r1, as it is used to hold the
12 * exception code on the return path (except of course the macro that switches
13 * all the registers before the final jump to the VM).
14 */
15vcpu .req r0 @ vcpu pointer always in r0
16
17/* Clobbers {r2-r6} */
18.macro store_vfp_state vfp_base
19 @ The VFPFMRX and VFPFMXR macros are the VMRS and VMSR instructions
20 VFPFMRX r2, FPEXC
21 @ Make sure VFP is enabled so we can touch the registers.
22 orr r6, r2, #FPEXC_EN
23 VFPFMXR FPEXC, r6
24
25 VFPFMRX r3, FPSCR
26 tst r2, #FPEXC_EX @ Check for VFP Subarchitecture
27 beq 1f
28 @ If FPEXC_EX is 0, then FPINST/FPINST2 reads are upredictable, so
29 @ we only need to save them if FPEXC_EX is set.
30 VFPFMRX r4, FPINST
31 tst r2, #FPEXC_FP2V
32 VFPFMRX r5, FPINST2, ne @ vmrsne
33 bic r6, r2, #FPEXC_EX @ FPEXC_EX disable
34 VFPFMXR FPEXC, r6
351:
36 VFPFSTMIA \vfp_base, r6 @ Save VFP registers
37 stm \vfp_base, {r2-r5} @ Save FPEXC, FPSCR, FPINST, FPINST2
38.endm
39
40/* Assume FPEXC_EN is on and FPEXC_EX is off, clobbers {r2-r6} */
41.macro restore_vfp_state vfp_base
42 VFPFLDMIA \vfp_base, r6 @ Load VFP registers
43 ldm \vfp_base, {r2-r5} @ Load FPEXC, FPSCR, FPINST, FPINST2
44
45 VFPFMXR FPSCR, r3
46 tst r2, #FPEXC_EX @ Check for VFP Subarchitecture
47 beq 1f
48 VFPFMXR FPINST, r4
49 tst r2, #FPEXC_FP2V
50 VFPFMXR FPINST2, r5, ne
511:
52 VFPFMXR FPEXC, r2 @ FPEXC (last, in case !EN)
53.endm
54
55/* These are simply for the macros to work - value don't have meaning */
56.equ usr, 0
57.equ svc, 1
58.equ abt, 2
59.equ und, 3
60.equ irq, 4
61.equ fiq, 5
62
63.macro push_host_regs_mode mode
64 mrs r2, SP_\mode
65 mrs r3, LR_\mode
66 mrs r4, SPSR_\mode
67 push {r2, r3, r4}
68.endm
69
70/*
71 * Store all host persistent registers on the stack.
72 * Clobbers all registers, in all modes, except r0 and r1.
73 */
74.macro save_host_regs
75 /* Hyp regs. Only ELR_hyp (SPSR_hyp already saved) */
76 mrs r2, ELR_hyp
77 push {r2}
78
79 /* usr regs */
80 push {r4-r12} @ r0-r3 are always clobbered
81 mrs r2, SP_usr
82 mov r3, lr
83 push {r2, r3}
84
85 push_host_regs_mode svc
86 push_host_regs_mode abt
87 push_host_regs_mode und
88 push_host_regs_mode irq
89
90 /* fiq regs */
91 mrs r2, r8_fiq
92 mrs r3, r9_fiq
93 mrs r4, r10_fiq
94 mrs r5, r11_fiq
95 mrs r6, r12_fiq
96 mrs r7, SP_fiq
97 mrs r8, LR_fiq
98 mrs r9, SPSR_fiq
99 push {r2-r9}
100.endm
101
102.macro pop_host_regs_mode mode
103 pop {r2, r3, r4}
104 msr SP_\mode, r2
105 msr LR_\mode, r3
106 msr SPSR_\mode, r4
107.endm
108
109/*
110 * Restore all host registers from the stack.
111 * Clobbers all registers, in all modes, except r0 and r1.
112 */
113.macro restore_host_regs
114 pop {r2-r9}
115 msr r8_fiq, r2
116 msr r9_fiq, r3
117 msr r10_fiq, r4
118 msr r11_fiq, r5
119 msr r12_fiq, r6
120 msr SP_fiq, r7
121 msr LR_fiq, r8
122 msr SPSR_fiq, r9
123
124 pop_host_regs_mode irq
125 pop_host_regs_mode und
126 pop_host_regs_mode abt
127 pop_host_regs_mode svc
128
129 pop {r2, r3}
130 msr SP_usr, r2
131 mov lr, r3
132 pop {r4-r12}
133
134 pop {r2}
135 msr ELR_hyp, r2
136.endm
137
138/*
139 * Restore SP, LR and SPSR for a given mode. offset is the offset of
140 * this mode's registers from the VCPU base.
141 *
142 * Assumes vcpu pointer in vcpu reg
143 *
144 * Clobbers r1, r2, r3, r4.
145 */
146.macro restore_guest_regs_mode mode, offset
147 add r1, vcpu, \offset
148 ldm r1, {r2, r3, r4}
149 msr SP_\mode, r2
150 msr LR_\mode, r3
151 msr SPSR_\mode, r4
152.endm
153
154/*
155 * Restore all guest registers from the vcpu struct.
156 *
157 * Assumes vcpu pointer in vcpu reg
158 *
159 * Clobbers *all* registers.
160 */
161.macro restore_guest_regs
162 restore_guest_regs_mode svc, #VCPU_SVC_REGS
163 restore_guest_regs_mode abt, #VCPU_ABT_REGS
164 restore_guest_regs_mode und, #VCPU_UND_REGS
165 restore_guest_regs_mode irq, #VCPU_IRQ_REGS
166
167 add r1, vcpu, #VCPU_FIQ_REGS
168 ldm r1, {r2-r9}
169 msr r8_fiq, r2
170 msr r9_fiq, r3
171 msr r10_fiq, r4
172 msr r11_fiq, r5
173 msr r12_fiq, r6
174 msr SP_fiq, r7
175 msr LR_fiq, r8
176 msr SPSR_fiq, r9
177
178 @ Load return state
179 ldr r2, [vcpu, #VCPU_PC]
180 ldr r3, [vcpu, #VCPU_CPSR]
181 msr ELR_hyp, r2
182 msr SPSR_cxsf, r3
183
184 @ Load user registers
185 ldr r2, [vcpu, #VCPU_USR_SP]
186 ldr r3, [vcpu, #VCPU_USR_LR]
187 msr SP_usr, r2
188 mov lr, r3
189 add vcpu, vcpu, #(VCPU_USR_REGS)
190 ldm vcpu, {r0-r12}
191.endm
192
193/*
194 * Save SP, LR and SPSR for a given mode. offset is the offset of
195 * this mode's registers from the VCPU base.
196 *
197 * Assumes vcpu pointer in vcpu reg
198 *
199 * Clobbers r2, r3, r4, r5.
200 */
201.macro save_guest_regs_mode mode, offset
202 add r2, vcpu, \offset
203 mrs r3, SP_\mode
204 mrs r4, LR_\mode
205 mrs r5, SPSR_\mode
206 stm r2, {r3, r4, r5}
207.endm
208
209/*
210 * Save all guest registers to the vcpu struct
211 * Expects guest's r0, r1, r2 on the stack.
212 *
213 * Assumes vcpu pointer in vcpu reg
214 *
215 * Clobbers r2, r3, r4, r5.
216 */
217.macro save_guest_regs
218 @ Store usr registers
219 add r2, vcpu, #VCPU_USR_REG(3)
220 stm r2, {r3-r12}
221 add r2, vcpu, #VCPU_USR_REG(0)
222 pop {r3, r4, r5} @ r0, r1, r2
223 stm r2, {r3, r4, r5}
224 mrs r2, SP_usr
225 mov r3, lr
226 str r2, [vcpu, #VCPU_USR_SP]
227 str r3, [vcpu, #VCPU_USR_LR]
228
229 @ Store return state
230 mrs r2, ELR_hyp
231 mrs r3, spsr
232 str r2, [vcpu, #VCPU_PC]
233 str r3, [vcpu, #VCPU_CPSR]
234
235 @ Store other guest registers
236 save_guest_regs_mode svc, #VCPU_SVC_REGS
237 save_guest_regs_mode abt, #VCPU_ABT_REGS
238 save_guest_regs_mode und, #VCPU_UND_REGS
239 save_guest_regs_mode irq, #VCPU_IRQ_REGS
240.endm
241
242/* Reads cp15 registers from hardware and stores them in memory
243 * @store_to_vcpu: If 0, registers are written in-order to the stack,
244 * otherwise to the VCPU struct pointed to by vcpup
245 *
246 * Assumes vcpu pointer in vcpu reg
247 *
248 * Clobbers r2 - r12
249 */
250.macro read_cp15_state store_to_vcpu
251 mrc p15, 0, r2, c1, c0, 0 @ SCTLR
252 mrc p15, 0, r3, c1, c0, 2 @ CPACR
253 mrc p15, 0, r4, c2, c0, 2 @ TTBCR
254 mrc p15, 0, r5, c3, c0, 0 @ DACR
255 mrrc p15, 0, r6, r7, c2 @ TTBR 0
256 mrrc p15, 1, r8, r9, c2 @ TTBR 1
257 mrc p15, 0, r10, c10, c2, 0 @ PRRR
258 mrc p15, 0, r11, c10, c2, 1 @ NMRR
259 mrc p15, 2, r12, c0, c0, 0 @ CSSELR
260
261 .if \store_to_vcpu == 0
262 push {r2-r12} @ Push CP15 registers
263 .else
264 str r2, [vcpu, #CP15_OFFSET(c1_SCTLR)]
265 str r3, [vcpu, #CP15_OFFSET(c1_CPACR)]
266 str r4, [vcpu, #CP15_OFFSET(c2_TTBCR)]
267 str r5, [vcpu, #CP15_OFFSET(c3_DACR)]
268 add r2, vcpu, #CP15_OFFSET(c2_TTBR0)
269 strd r6, r7, [r2]
270 add r2, vcpu, #CP15_OFFSET(c2_TTBR1)
271 strd r8, r9, [r2]
272 str r10, [vcpu, #CP15_OFFSET(c10_PRRR)]
273 str r11, [vcpu, #CP15_OFFSET(c10_NMRR)]
274 str r12, [vcpu, #CP15_OFFSET(c0_CSSELR)]
275 .endif
276
277 mrc p15, 0, r2, c13, c0, 1 @ CID
278 mrc p15, 0, r3, c13, c0, 2 @ TID_URW
279 mrc p15, 0, r4, c13, c0, 3 @ TID_URO
280 mrc p15, 0, r5, c13, c0, 4 @ TID_PRIV
281 mrc p15, 0, r6, c5, c0, 0 @ DFSR
282 mrc p15, 0, r7, c5, c0, 1 @ IFSR
283 mrc p15, 0, r8, c5, c1, 0 @ ADFSR
284 mrc p15, 0, r9, c5, c1, 1 @ AIFSR
285 mrc p15, 0, r10, c6, c0, 0 @ DFAR
286 mrc p15, 0, r11, c6, c0, 2 @ IFAR
287 mrc p15, 0, r12, c12, c0, 0 @ VBAR
288
289 .if \store_to_vcpu == 0
290 push {r2-r12} @ Push CP15 registers
291 .else
292 str r2, [vcpu, #CP15_OFFSET(c13_CID)]
293 str r3, [vcpu, #CP15_OFFSET(c13_TID_URW)]
294 str r4, [vcpu, #CP15_OFFSET(c13_TID_URO)]
295 str r5, [vcpu, #CP15_OFFSET(c13_TID_PRIV)]
296 str r6, [vcpu, #CP15_OFFSET(c5_DFSR)]
297 str r7, [vcpu, #CP15_OFFSET(c5_IFSR)]
298 str r8, [vcpu, #CP15_OFFSET(c5_ADFSR)]
299 str r9, [vcpu, #CP15_OFFSET(c5_AIFSR)]
300 str r10, [vcpu, #CP15_OFFSET(c6_DFAR)]
301 str r11, [vcpu, #CP15_OFFSET(c6_IFAR)]
302 str r12, [vcpu, #CP15_OFFSET(c12_VBAR)]
303 .endif
304
305 mrc p15, 0, r2, c14, c1, 0 @ CNTKCTL
306 mrrc p15, 0, r4, r5, c7 @ PAR
307 mrc p15, 0, r6, c10, c3, 0 @ AMAIR0
308 mrc p15, 0, r7, c10, c3, 1 @ AMAIR1
309
310 .if \store_to_vcpu == 0
311 push {r2,r4-r7}
312 .else
313 str r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
314 add r12, vcpu, #CP15_OFFSET(c7_PAR)
315 strd r4, r5, [r12]
316 str r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)]
317 str r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]
318 .endif
319.endm
320
321/*
322 * Reads cp15 registers from memory and writes them to hardware
323 * @read_from_vcpu: If 0, registers are read in-order from the stack,
324 * otherwise from the VCPU struct pointed to by vcpup
325 *
326 * Assumes vcpu pointer in vcpu reg
327 */
328.macro write_cp15_state read_from_vcpu
329 .if \read_from_vcpu == 0
330 pop {r2,r4-r7}
331 .else
332 ldr r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
333 add r12, vcpu, #CP15_OFFSET(c7_PAR)
334 ldrd r4, r5, [r12]
335 ldr r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)]
336 ldr r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]
337 .endif
338
339 mcr p15, 0, r2, c14, c1, 0 @ CNTKCTL
340 mcrr p15, 0, r4, r5, c7 @ PAR
341 mcr p15, 0, r6, c10, c3, 0 @ AMAIR0
342 mcr p15, 0, r7, c10, c3, 1 @ AMAIR1
343
344 .if \read_from_vcpu == 0
345 pop {r2-r12}
346 .else
347 ldr r2, [vcpu, #CP15_OFFSET(c13_CID)]
348 ldr r3, [vcpu, #CP15_OFFSET(c13_TID_URW)]
349 ldr r4, [vcpu, #CP15_OFFSET(c13_TID_URO)]
350 ldr r5, [vcpu, #CP15_OFFSET(c13_TID_PRIV)]
351 ldr r6, [vcpu, #CP15_OFFSET(c5_DFSR)]
352 ldr r7, [vcpu, #CP15_OFFSET(c5_IFSR)]
353 ldr r8, [vcpu, #CP15_OFFSET(c5_ADFSR)]
354 ldr r9, [vcpu, #CP15_OFFSET(c5_AIFSR)]
355 ldr r10, [vcpu, #CP15_OFFSET(c6_DFAR)]
356 ldr r11, [vcpu, #CP15_OFFSET(c6_IFAR)]
357 ldr r12, [vcpu, #CP15_OFFSET(c12_VBAR)]
358 .endif
359
360 mcr p15, 0, r2, c13, c0, 1 @ CID
361 mcr p15, 0, r3, c13, c0, 2 @ TID_URW
362 mcr p15, 0, r4, c13, c0, 3 @ TID_URO
363 mcr p15, 0, r5, c13, c0, 4 @ TID_PRIV
364 mcr p15, 0, r6, c5, c0, 0 @ DFSR
365 mcr p15, 0, r7, c5, c0, 1 @ IFSR
366 mcr p15, 0, r8, c5, c1, 0 @ ADFSR
367 mcr p15, 0, r9, c5, c1, 1 @ AIFSR
368 mcr p15, 0, r10, c6, c0, 0 @ DFAR
369 mcr p15, 0, r11, c6, c0, 2 @ IFAR
370 mcr p15, 0, r12, c12, c0, 0 @ VBAR
371
372 .if \read_from_vcpu == 0
373 pop {r2-r12}
374 .else
375 ldr r2, [vcpu, #CP15_OFFSET(c1_SCTLR)]
376 ldr r3, [vcpu, #CP15_OFFSET(c1_CPACR)]
377 ldr r4, [vcpu, #CP15_OFFSET(c2_TTBCR)]
378 ldr r5, [vcpu, #CP15_OFFSET(c3_DACR)]
379 add r12, vcpu, #CP15_OFFSET(c2_TTBR0)
380 ldrd r6, r7, [r12]
381 add r12, vcpu, #CP15_OFFSET(c2_TTBR1)
382 ldrd r8, r9, [r12]
383 ldr r10, [vcpu, #CP15_OFFSET(c10_PRRR)]
384 ldr r11, [vcpu, #CP15_OFFSET(c10_NMRR)]
385 ldr r12, [vcpu, #CP15_OFFSET(c0_CSSELR)]
386 .endif
387
388 mcr p15, 0, r2, c1, c0, 0 @ SCTLR
389 mcr p15, 0, r3, c1, c0, 2 @ CPACR
390 mcr p15, 0, r4, c2, c0, 2 @ TTBCR
391 mcr p15, 0, r5, c3, c0, 0 @ DACR
392 mcrr p15, 0, r6, r7, c2 @ TTBR 0
393 mcrr p15, 1, r8, r9, c2 @ TTBR 1
394 mcr p15, 0, r10, c10, c2, 0 @ PRRR
395 mcr p15, 0, r11, c10, c2, 1 @ NMRR
396 mcr p15, 2, r12, c0, c0, 0 @ CSSELR
397.endm
398
399/*
400 * Save the VGIC CPU state into memory
401 *
402 * Assumes vcpu pointer in vcpu reg
403 */
404.macro save_vgic_state
405 /* Get VGIC VCTRL base into r2 */
406 ldr r2, [vcpu, #VCPU_KVM]
407 ldr r2, [r2, #KVM_VGIC_VCTRL]
408 cmp r2, #0
409 beq 2f
410
411 /* Compute the address of struct vgic_cpu */
412 add r11, vcpu, #VCPU_VGIC_CPU
413
414 /* Save all interesting registers */
415 ldr r4, [r2, #GICH_VMCR]
416 ldr r5, [r2, #GICH_MISR]
417 ldr r6, [r2, #GICH_EISR0]
418 ldr r7, [r2, #GICH_EISR1]
419 ldr r8, [r2, #GICH_ELRSR0]
420 ldr r9, [r2, #GICH_ELRSR1]
421 ldr r10, [r2, #GICH_APR]
422ARM_BE8(rev r4, r4 )
423ARM_BE8(rev r5, r5 )
424ARM_BE8(rev r6, r6 )
425ARM_BE8(rev r7, r7 )
426ARM_BE8(rev r8, r8 )
427ARM_BE8(rev r9, r9 )
428ARM_BE8(rev r10, r10 )
429
430 str r4, [r11, #VGIC_V2_CPU_VMCR]
431 str r5, [r11, #VGIC_V2_CPU_MISR]
432#ifdef CONFIG_CPU_ENDIAN_BE8
433 str r6, [r11, #(VGIC_V2_CPU_EISR + 4)]
434 str r7, [r11, #VGIC_V2_CPU_EISR]
435 str r8, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
436 str r9, [r11, #VGIC_V2_CPU_ELRSR]
437#else
438 str r6, [r11, #VGIC_V2_CPU_EISR]
439 str r7, [r11, #(VGIC_V2_CPU_EISR + 4)]
440 str r8, [r11, #VGIC_V2_CPU_ELRSR]
441 str r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
442#endif
443 str r10, [r11, #VGIC_V2_CPU_APR]
444
445 /* Clear GICH_HCR */
446 mov r5, #0
447 str r5, [r2, #GICH_HCR]
448
449 /* Save list registers */
450 add r2, r2, #GICH_LR0
451 add r3, r11, #VGIC_V2_CPU_LR
452 ldr r4, [r11, #VGIC_CPU_NR_LR]
4531: ldr r6, [r2], #4
454ARM_BE8(rev r6, r6 )
455 str r6, [r3], #4
456 subs r4, r4, #1
457 bne 1b
4582:
459.endm
460
461/*
462 * Restore the VGIC CPU state from memory
463 *
464 * Assumes vcpu pointer in vcpu reg
465 */
466.macro restore_vgic_state
467 /* Get VGIC VCTRL base into r2 */
468 ldr r2, [vcpu, #VCPU_KVM]
469 ldr r2, [r2, #KVM_VGIC_VCTRL]
470 cmp r2, #0
471 beq 2f
472
473 /* Compute the address of struct vgic_cpu */
474 add r11, vcpu, #VCPU_VGIC_CPU
475
476 /* We only restore a minimal set of registers */
477 ldr r3, [r11, #VGIC_V2_CPU_HCR]
478 ldr r4, [r11, #VGIC_V2_CPU_VMCR]
479 ldr r8, [r11, #VGIC_V2_CPU_APR]
480ARM_BE8(rev r3, r3 )
481ARM_BE8(rev r4, r4 )
482ARM_BE8(rev r8, r8 )
483
484 str r3, [r2, #GICH_HCR]
485 str r4, [r2, #GICH_VMCR]
486 str r8, [r2, #GICH_APR]
487
488 /* Restore list registers */
489 add r2, r2, #GICH_LR0
490 add r3, r11, #VGIC_V2_CPU_LR
491 ldr r4, [r11, #VGIC_CPU_NR_LR]
4921: ldr r6, [r3], #4
493ARM_BE8(rev r6, r6 )
494 str r6, [r2], #4
495 subs r4, r4, #1
496 bne 1b
4972:
498.endm
499
500#define CNTHCTL_PL1PCTEN (1 << 0)
501#define CNTHCTL_PL1PCEN (1 << 1)
502
503/*
504 * Save the timer state onto the VCPU and allow physical timer/counter access
505 * for the host.
506 *
507 * Assumes vcpu pointer in vcpu reg
508 * Clobbers r2-r5
509 */
510.macro save_timer_state
511 ldr r4, [vcpu, #VCPU_KVM]
512 ldr r2, [r4, #KVM_TIMER_ENABLED]
513 cmp r2, #0
514 beq 1f
515
516 mrc p15, 0, r2, c14, c3, 1 @ CNTV_CTL
517 str r2, [vcpu, #VCPU_TIMER_CNTV_CTL]
518
519 isb
520
521 mrrc p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL
522 ldr r4, =VCPU_TIMER_CNTV_CVAL
523 add r5, vcpu, r4
524 strd r2, r3, [r5]
525
526 @ Ensure host CNTVCT == CNTPCT
527 mov r2, #0
528 mcrr p15, 4, r2, r2, c14 @ CNTVOFF
529
5301:
531 mov r2, #0 @ Clear ENABLE
532 mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL
533
534 @ Allow physical timer/counter access for the host
535 mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL
536 orr r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN)
537 mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL
538.endm
539
540/*
541 * Load the timer state from the VCPU and deny physical timer/counter access
542 * for the host.
543 *
544 * Assumes vcpu pointer in vcpu reg
545 * Clobbers r2-r5
546 */
547.macro restore_timer_state
548 @ Disallow physical timer access for the guest
549 @ Physical counter access is allowed
550 mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL
551 orr r2, r2, #CNTHCTL_PL1PCTEN
552 bic r2, r2, #CNTHCTL_PL1PCEN
553 mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL
554
555 ldr r4, [vcpu, #VCPU_KVM]
556 ldr r2, [r4, #KVM_TIMER_ENABLED]
557 cmp r2, #0
558 beq 1f
559
560 ldr r2, [r4, #KVM_TIMER_CNTVOFF]
561 ldr r3, [r4, #(KVM_TIMER_CNTVOFF + 4)]
562 mcrr p15, 4, rr_lo_hi(r2, r3), c14 @ CNTVOFF
563
564 ldr r4, =VCPU_TIMER_CNTV_CVAL
565 add r5, vcpu, r4
566 ldrd r2, r3, [r5]
567 mcrr p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL
568 isb
569
570 ldr r2, [vcpu, #VCPU_TIMER_CNTV_CTL]
571 and r2, r2, #3
572 mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL
5731:
574.endm
575
576.equ vmentry, 0
577.equ vmexit, 1
578
579/* Configures the HSTR (Hyp System Trap Register) on entry/return
580 * (hardware reset value is 0) */
581.macro set_hstr operation
582 mrc p15, 4, r2, c1, c1, 3
583 ldr r3, =HSTR_T(15)
584 .if \operation == vmentry
585 orr r2, r2, r3 @ Trap CR{15}
586 .else
587 bic r2, r2, r3 @ Don't trap any CRx accesses
588 .endif
589 mcr p15, 4, r2, c1, c1, 3
590.endm
591
592/* Configures the HCPTR (Hyp Coprocessor Trap Register) on entry/return
593 * (hardware reset value is 0). Keep previous value in r2.
594 * An ISB is emited on vmexit/vmtrap, but executed on vmexit only if
595 * VFP wasn't already enabled (always executed on vmtrap).
596 * If a label is specified with vmexit, it is branched to if VFP wasn't
597 * enabled.
598 */
599.macro set_hcptr operation, mask, label = none
600 mrc p15, 4, r2, c1, c1, 2
601 ldr r3, =\mask
602 .if \operation == vmentry
603 orr r3, r2, r3 @ Trap coproc-accesses defined in mask
604 .else
605 bic r3, r2, r3 @ Don't trap defined coproc-accesses
606 .endif
607 mcr p15, 4, r3, c1, c1, 2
608 .if \operation != vmentry
609 .if \operation == vmexit
610 tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11))
611 beq 1f
612 .endif
613 isb
614 .if \label != none
615 b \label
616 .endif
6171:
618 .endif
619.endm
620
621/* Configures the HDCR (Hyp Debug Configuration Register) on entry/return
622 * (hardware reset value is 0) */
623.macro set_hdcr operation
624 mrc p15, 4, r2, c1, c1, 1
625 ldr r3, =(HDCR_TPM|HDCR_TPMCR)
626 .if \operation == vmentry
627 orr r2, r2, r3 @ Trap some perfmon accesses
628 .else
629 bic r2, r2, r3 @ Don't trap any perfmon accesses
630 .endif
631 mcr p15, 4, r2, c1, c1, 1
632.endm
633
634/* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */
635.macro configure_hyp_role operation
636 .if \operation == vmentry
637 ldr r2, [vcpu, #VCPU_HCR]
638 ldr r3, [vcpu, #VCPU_IRQ_LINES]
639 orr r2, r2, r3
640 .else
641 mov r2, #0
642 .endif
643 mcr p15, 4, r2, c1, c1, 0 @ HCR
644.endm
645
646.macro load_vcpu
647 mrc p15, 4, vcpu, c13, c0, 2 @ HTPIDR
648.endm
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index aba61fd3697a..58dbd5c439df 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -28,6 +28,7 @@
28#include <asm/kvm_mmio.h> 28#include <asm/kvm_mmio.h>
29#include <asm/kvm_asm.h> 29#include <asm/kvm_asm.h>
30#include <asm/kvm_emulate.h> 30#include <asm/kvm_emulate.h>
31#include <asm/virt.h>
31 32
32#include "trace.h" 33#include "trace.h"
33 34
@@ -598,6 +599,9 @@ int create_hyp_mappings(void *from, void *to)
598 unsigned long start = KERN_TO_HYP((unsigned long)from); 599 unsigned long start = KERN_TO_HYP((unsigned long)from);
599 unsigned long end = KERN_TO_HYP((unsigned long)to); 600 unsigned long end = KERN_TO_HYP((unsigned long)to);
600 601
602 if (is_kernel_in_hyp_mode())
603 return 0;
604
601 start = start & PAGE_MASK; 605 start = start & PAGE_MASK;
602 end = PAGE_ALIGN(end); 606 end = PAGE_ALIGN(end);
603 607
@@ -630,6 +634,9 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
630 unsigned long start = KERN_TO_HYP((unsigned long)from); 634 unsigned long start = KERN_TO_HYP((unsigned long)from);
631 unsigned long end = KERN_TO_HYP((unsigned long)to); 635 unsigned long end = KERN_TO_HYP((unsigned long)to);
632 636
637 if (is_kernel_in_hyp_mode())
638 return 0;
639
633 /* Check for a valid kernel IO mapping */ 640 /* Check for a valid kernel IO mapping */
634 if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)) 641 if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
635 return -EINVAL; 642 return -EINVAL;
@@ -1431,6 +1438,22 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
1431 } 1438 }
1432 1439
1433 /* 1440 /*
1441 * Check for a cache maintenance operation. Since we
1442 * ended-up here, we know it is outside of any memory
1443 * slot. But we can't find out if that is for a device,
1444 * or if the guest is just being stupid. The only thing
1445 * we know for sure is that this range cannot be cached.
1446 *
1447 * So let's assume that the guest is just being
1448 * cautious, and skip the instruction.
1449 */
1450 if (kvm_vcpu_dabt_is_cm(vcpu)) {
1451 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
1452 ret = 1;
1453 goto out_unlock;
1454 }
1455
1456 /*
1434 * The IPA is reported as [MAX:12], so we need to 1457 * The IPA is reported as [MAX:12], so we need to
1435 * complement it with the bottom 12 bits from the 1458 * complement it with the bottom 12 bits from the
1436 * faulting VA. This is always 12 bits, irrespective 1459 * faulting VA. This is always 12 bits, irrespective
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index eeb85858d6bb..0048b5a62a50 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -71,7 +71,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
71 } 71 }
72 72
73 /* Reset core registers */ 73 /* Reset core registers */
74 memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs)); 74 memcpy(&vcpu->arch.ctxt.gp_regs, reset_regs, sizeof(vcpu->arch.ctxt.gp_regs));
75 75
76 /* Reset CP15 registers */ 76 /* Reset CP15 registers */
77 kvm_reset_coprocs(vcpu); 77 kvm_reset_coprocs(vcpu);
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8cc62289a63e..cf118d93290d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -750,6 +750,19 @@ config ARM64_LSE_ATOMICS
750 not support these instructions and requires the kernel to be 750 not support these instructions and requires the kernel to be
751 built with binutils >= 2.25. 751 built with binutils >= 2.25.
752 752
753config ARM64_VHE
754 bool "Enable support for Virtualization Host Extensions (VHE)"
755 default y
756 help
757 Virtualization Host Extensions (VHE) allow the kernel to run
758 directly at EL2 (instead of EL1) on processors that support
759 it. This leads to better performance for KVM, as they reduce
760 the cost of the world switch.
761
762 Selecting this option allows the VHE feature to be detected
763 at runtime, and does not affect processors that do not
764 implement this feature.
765
753endmenu 766endmenu
754 767
755endmenu 768endmenu
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 8f271b83f910..a5c769b1c65b 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -30,8 +30,12 @@
30#define ARM64_HAS_LSE_ATOMICS 5 30#define ARM64_HAS_LSE_ATOMICS 5
31#define ARM64_WORKAROUND_CAVIUM_23154 6 31#define ARM64_WORKAROUND_CAVIUM_23154 6
32#define ARM64_WORKAROUND_834220 7 32#define ARM64_WORKAROUND_834220 7
33/* #define ARM64_HAS_NO_HW_PREFETCH 8 */
34/* #define ARM64_HAS_UAO 9 */
35/* #define ARM64_ALT_PAN_NOT_UAO 10 */
36#define ARM64_HAS_VIRT_HOST_EXTN 11
33 37
34#define ARM64_NCAPS 8 38#define ARM64_NCAPS 12
35 39
36#ifndef __ASSEMBLY__ 40#ifndef __ASSEMBLY__
37 41
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index 9732908bfc8a..115ea2a64520 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -18,6 +18,7 @@
18 18
19#include <asm/cputype.h> 19#include <asm/cputype.h>
20#include <asm/cpufeature.h> 20#include <asm/cpufeature.h>
21#include <asm/virt.h>
21 22
22#ifdef __KERNEL__ 23#ifdef __KERNEL__
23 24
@@ -35,10 +36,21 @@ struct arch_hw_breakpoint {
35 struct arch_hw_breakpoint_ctrl ctrl; 36 struct arch_hw_breakpoint_ctrl ctrl;
36}; 37};
37 38
39/* Privilege Levels */
40#define AARCH64_BREAKPOINT_EL1 1
41#define AARCH64_BREAKPOINT_EL0 2
42
43#define DBG_HMC_HYP (1 << 13)
44
38static inline u32 encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl) 45static inline u32 encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl)
39{ 46{
40 return (ctrl.len << 5) | (ctrl.type << 3) | (ctrl.privilege << 1) | 47 u32 val = (ctrl.len << 5) | (ctrl.type << 3) | (ctrl.privilege << 1) |
41 ctrl.enabled; 48 ctrl.enabled;
49
50 if (is_kernel_in_hyp_mode() && ctrl.privilege == AARCH64_BREAKPOINT_EL1)
51 val |= DBG_HMC_HYP;
52
53 return val;
42} 54}
43 55
44static inline void decode_ctrl_reg(u32 reg, 56static inline void decode_ctrl_reg(u32 reg,
@@ -61,10 +73,6 @@ static inline void decode_ctrl_reg(u32 reg,
61#define ARM_BREAKPOINT_STORE 2 73#define ARM_BREAKPOINT_STORE 2
62#define AARCH64_ESR_ACCESS_MASK (1 << 6) 74#define AARCH64_ESR_ACCESS_MASK (1 << 6)
63 75
64/* Privilege Levels */
65#define AARCH64_BREAKPOINT_EL1 1
66#define AARCH64_BREAKPOINT_EL0 2
67
68/* Lengths */ 76/* Lengths */
69#define ARM_BREAKPOINT_LEN_1 0x1 77#define ARM_BREAKPOINT_LEN_1 0x1
70#define ARM_BREAKPOINT_LEN_2 0x3 78#define ARM_BREAKPOINT_LEN_2 0x3
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index d201d4b396d1..b56a0a81e4cb 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -23,6 +23,7 @@
23#include <asm/types.h> 23#include <asm/types.h>
24 24
25/* Hyp Configuration Register (HCR) bits */ 25/* Hyp Configuration Register (HCR) bits */
26#define HCR_E2H (UL(1) << 34)
26#define HCR_ID (UL(1) << 33) 27#define HCR_ID (UL(1) << 33)
27#define HCR_CD (UL(1) << 32) 28#define HCR_CD (UL(1) << 32)
28#define HCR_RW_SHIFT 31 29#define HCR_RW_SHIFT 31
@@ -81,7 +82,7 @@
81 HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW) 82 HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW)
82#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) 83#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
83#define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO) 84#define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO)
84 85#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
85 86
86/* Hyp System Control Register (SCTLR_EL2) bits */ 87/* Hyp System Control Register (SCTLR_EL2) bits */
87#define SCTLR_EL2_EE (1 << 25) 88#define SCTLR_EL2_EE (1 << 25)
@@ -216,4 +217,7 @@
216 ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ 217 ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
217 ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) 218 ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
218 219
220#define CPACR_EL1_FPEN (3 << 20)
221#define CPACR_EL1_TTA (1 << 28)
222
219#endif /* __ARM64_KVM_ARM_H__ */ 223#endif /* __ARM64_KVM_ARM_H__ */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 52b777b7d407..2d02ba67478c 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -35,9 +35,6 @@ extern char __kvm_hyp_init_end[];
35 35
36extern char __kvm_hyp_vector[]; 36extern char __kvm_hyp_vector[];
37 37
38#define __kvm_hyp_code_start __hyp_text_start
39#define __kvm_hyp_code_end __hyp_text_end
40
41extern void __kvm_flush_vm_context(void); 38extern void __kvm_flush_vm_context(void);
42extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); 39extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
43extern void __kvm_tlb_flush_vmid(struct kvm *kvm); 40extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
@@ -45,9 +42,12 @@ extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
45extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); 42extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
46 43
47extern u64 __vgic_v3_get_ich_vtr_el2(void); 44extern u64 __vgic_v3_get_ich_vtr_el2(void);
45extern void __vgic_v3_init_lrs(void);
48 46
49extern u32 __kvm_get_mdcr_el2(void); 47extern u32 __kvm_get_mdcr_el2(void);
50 48
49extern void __init_stage2_translation(void);
50
51#endif 51#endif
52 52
53#endif /* __ARM_KVM_ASM_H__ */ 53#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 779a5872a2c5..40bc1681b6d5 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -29,6 +29,7 @@
29#include <asm/kvm_mmio.h> 29#include <asm/kvm_mmio.h>
30#include <asm/ptrace.h> 30#include <asm/ptrace.h>
31#include <asm/cputype.h> 31#include <asm/cputype.h>
32#include <asm/virt.h>
32 33
33unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); 34unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
34unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); 35unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
@@ -43,6 +44,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
43static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) 44static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
44{ 45{
45 vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; 46 vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
47 if (is_kernel_in_hyp_mode())
48 vcpu->arch.hcr_el2 |= HCR_E2H;
46 if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) 49 if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
47 vcpu->arch.hcr_el2 &= ~HCR_RW; 50 vcpu->arch.hcr_el2 &= ~HCR_RW;
48} 51}
@@ -189,6 +192,11 @@ static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
189 return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); 192 return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW);
190} 193}
191 194
195static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu)
196{
197 return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM);
198}
199
192static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) 200static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
193{ 201{
194 return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); 202 return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 689d4c95e12f..71fa6fe9d54a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -25,7 +25,9 @@
25#include <linux/types.h> 25#include <linux/types.h>
26#include <linux/kvm_types.h> 26#include <linux/kvm_types.h>
27#include <asm/kvm.h> 27#include <asm/kvm.h>
28#include <asm/kvm_asm.h>
28#include <asm/kvm_mmio.h> 29#include <asm/kvm_mmio.h>
30#include <asm/kvm_perf_event.h>
29 31
30#define __KVM_HAVE_ARCH_INTC_INITIALIZED 32#define __KVM_HAVE_ARCH_INTC_INITIALIZED
31 33
@@ -36,10 +38,11 @@
36 38
37#include <kvm/arm_vgic.h> 39#include <kvm/arm_vgic.h>
38#include <kvm/arm_arch_timer.h> 40#include <kvm/arm_arch_timer.h>
41#include <kvm/arm_pmu.h>
39 42
40#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS 43#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
41 44
42#define KVM_VCPU_MAX_FEATURES 3 45#define KVM_VCPU_MAX_FEATURES 4
43 46
44int __attribute_const__ kvm_target_cpu(void); 47int __attribute_const__ kvm_target_cpu(void);
45int kvm_reset_vcpu(struct kvm_vcpu *vcpu); 48int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
@@ -114,6 +117,21 @@ enum vcpu_sysreg {
114 MDSCR_EL1, /* Monitor Debug System Control Register */ 117 MDSCR_EL1, /* Monitor Debug System Control Register */
115 MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */ 118 MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */
116 119
120 /* Performance Monitors Registers */
121 PMCR_EL0, /* Control Register */
122 PMSELR_EL0, /* Event Counter Selection Register */
123 PMEVCNTR0_EL0, /* Event Counter Register (0-30) */
124 PMEVCNTR30_EL0 = PMEVCNTR0_EL0 + 30,
125 PMCCNTR_EL0, /* Cycle Counter Register */
126 PMEVTYPER0_EL0, /* Event Type Register (0-30) */
127 PMEVTYPER30_EL0 = PMEVTYPER0_EL0 + 30,
128 PMCCFILTR_EL0, /* Cycle Count Filter Register */
129 PMCNTENSET_EL0, /* Count Enable Set Register */
130 PMINTENSET_EL1, /* Interrupt Enable Set Register */
131 PMOVSSET_EL0, /* Overflow Flag Status Set Register */
132 PMSWINC_EL0, /* Software Increment Register */
133 PMUSERENR_EL0, /* User Enable Register */
134
117 /* 32bit specific registers. Keep them at the end of the range */ 135 /* 32bit specific registers. Keep them at the end of the range */
118 DACR32_EL2, /* Domain Access Control Register */ 136 DACR32_EL2, /* Domain Access Control Register */
119 IFSR32_EL2, /* Instruction Fault Status Register */ 137 IFSR32_EL2, /* Instruction Fault Status Register */
@@ -211,6 +229,7 @@ struct kvm_vcpu_arch {
211 /* VGIC state */ 229 /* VGIC state */
212 struct vgic_cpu vgic_cpu; 230 struct vgic_cpu vgic_cpu;
213 struct arch_timer_cpu timer_cpu; 231 struct arch_timer_cpu timer_cpu;
232 struct kvm_pmu pmu;
214 233
215 /* 234 /*
216 * Anything that is not used directly from assembly code goes 235 * Anything that is not used directly from assembly code goes
@@ -342,5 +361,18 @@ void kvm_arm_init_debug(void);
342void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); 361void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
343void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); 362void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
344void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); 363void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
364int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
365 struct kvm_device_attr *attr);
366int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
367 struct kvm_device_attr *attr);
368int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
369 struct kvm_device_attr *attr);
370
371/* #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) */
372
373static inline void __cpu_init_stage2(void)
374{
375 kvm_call_hyp(__init_stage2_translation);
376}
345 377
346#endif /* __ARM64_KVM_HOST_H__ */ 378#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
new file mode 100644
index 000000000000..a46b019ebcf5
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -0,0 +1,181 @@
1/*
2 * Copyright (C) 2015 - ARM Ltd
3 * Author: Marc Zyngier <marc.zyngier@arm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef __ARM64_KVM_HYP_H__
19#define __ARM64_KVM_HYP_H__
20
21#include <linux/compiler.h>
22#include <linux/kvm_host.h>
23#include <asm/kvm_mmu.h>
24#include <asm/kvm_perf_event.h>
25#include <asm/sysreg.h>
26
27#define __hyp_text __section(.hyp.text) notrace
28
29static inline unsigned long __kern_hyp_va(unsigned long v)
30{
31 asm volatile(ALTERNATIVE("and %0, %0, %1",
32 "nop",
33 ARM64_HAS_VIRT_HOST_EXTN)
34 : "+r" (v) : "i" (HYP_PAGE_OFFSET_MASK));
35 return v;
36}
37
38#define kern_hyp_va(v) (typeof(v))(__kern_hyp_va((unsigned long)(v)))
39
40static inline unsigned long __hyp_kern_va(unsigned long v)
41{
42 u64 offset = PAGE_OFFSET - HYP_PAGE_OFFSET;
43 asm volatile(ALTERNATIVE("add %0, %0, %1",
44 "nop",
45 ARM64_HAS_VIRT_HOST_EXTN)
46 : "+r" (v) : "r" (offset));
47 return v;
48}
49
50#define hyp_kern_va(v) (typeof(v))(__hyp_kern_va((unsigned long)(v)))
51
52#define read_sysreg_elx(r,nvh,vh) \
53 ({ \
54 u64 reg; \
55 asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##nvh),\
56 "mrs_s %0, " __stringify(r##vh),\
57 ARM64_HAS_VIRT_HOST_EXTN) \
58 : "=r" (reg)); \
59 reg; \
60 })
61
62#define write_sysreg_elx(v,r,nvh,vh) \
63 do { \
64 u64 __val = (u64)(v); \
65 asm volatile(ALTERNATIVE("msr " __stringify(r##nvh) ", %x0",\
66 "msr_s " __stringify(r##vh) ", %x0",\
67 ARM64_HAS_VIRT_HOST_EXTN) \
68 : : "rZ" (__val)); \
69 } while (0)
70
71/*
72 * Unified accessors for registers that have a different encoding
73 * between VHE and non-VHE. They must be specified without their "ELx"
74 * encoding.
75 */
76#define read_sysreg_el2(r) \
77 ({ \
78 u64 reg; \
79 asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##_EL2),\
80 "mrs %0, " __stringify(r##_EL1),\
81 ARM64_HAS_VIRT_HOST_EXTN) \
82 : "=r" (reg)); \
83 reg; \
84 })
85
86#define write_sysreg_el2(v,r) \
87 do { \
88 u64 __val = (u64)(v); \
89 asm volatile(ALTERNATIVE("msr " __stringify(r##_EL2) ", %x0",\
90 "msr " __stringify(r##_EL1) ", %x0",\
91 ARM64_HAS_VIRT_HOST_EXTN) \
92 : : "rZ" (__val)); \
93 } while (0)
94
95#define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02)
96#define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02)
97#define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12)
98#define write_sysreg_el1(v,r) write_sysreg_elx(v, r, _EL1, _EL12)
99
100/* The VHE specific system registers and their encoding */
101#define sctlr_EL12 sys_reg(3, 5, 1, 0, 0)
102#define cpacr_EL12 sys_reg(3, 5, 1, 0, 2)
103#define ttbr0_EL12 sys_reg(3, 5, 2, 0, 0)
104#define ttbr1_EL12 sys_reg(3, 5, 2, 0, 1)
105#define tcr_EL12 sys_reg(3, 5, 2, 0, 2)
106#define afsr0_EL12 sys_reg(3, 5, 5, 1, 0)
107#define afsr1_EL12 sys_reg(3, 5, 5, 1, 1)
108#define esr_EL12 sys_reg(3, 5, 5, 2, 0)
109#define far_EL12 sys_reg(3, 5, 6, 0, 0)
110#define mair_EL12 sys_reg(3, 5, 10, 2, 0)
111#define amair_EL12 sys_reg(3, 5, 10, 3, 0)
112#define vbar_EL12 sys_reg(3, 5, 12, 0, 0)
113#define contextidr_EL12 sys_reg(3, 5, 13, 0, 1)
114#define cntkctl_EL12 sys_reg(3, 5, 14, 1, 0)
115#define cntp_tval_EL02 sys_reg(3, 5, 14, 2, 0)
116#define cntp_ctl_EL02 sys_reg(3, 5, 14, 2, 1)
117#define cntp_cval_EL02 sys_reg(3, 5, 14, 2, 2)
118#define cntv_tval_EL02 sys_reg(3, 5, 14, 3, 0)
119#define cntv_ctl_EL02 sys_reg(3, 5, 14, 3, 1)
120#define cntv_cval_EL02 sys_reg(3, 5, 14, 3, 2)
121#define spsr_EL12 sys_reg(3, 5, 4, 0, 0)
122#define elr_EL12 sys_reg(3, 5, 4, 0, 1)
123
124/**
125 * hyp_alternate_select - Generates patchable code sequences that are
126 * used to switch between two implementations of a function, depending
127 * on the availability of a feature.
128 *
129 * @fname: a symbol name that will be defined as a function returning a
130 * function pointer whose type will match @orig and @alt
131 * @orig: A pointer to the default function, as returned by @fname when
132 * @cond doesn't hold
133 * @alt: A pointer to the alternate function, as returned by @fname
134 * when @cond holds
135 * @cond: a CPU feature (as described in asm/cpufeature.h)
136 */
137#define hyp_alternate_select(fname, orig, alt, cond) \
138typeof(orig) * __hyp_text fname(void) \
139{ \
140 typeof(alt) *val = orig; \
141 asm volatile(ALTERNATIVE("nop \n", \
142 "mov %0, %1 \n", \
143 cond) \
144 : "+r" (val) : "r" (alt)); \
145 return val; \
146}
147
148void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
149void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
150
151void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
152void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
153
154void __timer_save_state(struct kvm_vcpu *vcpu);
155void __timer_restore_state(struct kvm_vcpu *vcpu);
156
157void __sysreg_save_host_state(struct kvm_cpu_context *ctxt);
158void __sysreg_restore_host_state(struct kvm_cpu_context *ctxt);
159void __sysreg_save_guest_state(struct kvm_cpu_context *ctxt);
160void __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt);
161void __sysreg32_save_state(struct kvm_vcpu *vcpu);
162void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
163
164void __debug_save_state(struct kvm_vcpu *vcpu,
165 struct kvm_guest_debug_arch *dbg,
166 struct kvm_cpu_context *ctxt);
167void __debug_restore_state(struct kvm_vcpu *vcpu,
168 struct kvm_guest_debug_arch *dbg,
169 struct kvm_cpu_context *ctxt);
170void __debug_cond_save_host_state(struct kvm_vcpu *vcpu);
171void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu);
172
173void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
174void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
175bool __fpsimd_enabled(void);
176
177u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
178void __noreturn __hyp_do_panic(unsigned long, ...);
179
180#endif /* __ARM64_KVM_HYP_H__ */
181
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 736433912a1e..9a9318adefa6 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -23,13 +23,16 @@
23#include <asm/cpufeature.h> 23#include <asm/cpufeature.h>
24 24
25/* 25/*
26 * As we only have the TTBR0_EL2 register, we cannot express 26 * As ARMv8.0 only has the TTBR0_EL2 register, we cannot express
27 * "negative" addresses. This makes it impossible to directly share 27 * "negative" addresses. This makes it impossible to directly share
28 * mappings with the kernel. 28 * mappings with the kernel.
29 * 29 *
30 * Instead, give the HYP mode its own VA region at a fixed offset from 30 * Instead, give the HYP mode its own VA region at a fixed offset from
31 * the kernel by just masking the top bits (which are all ones for a 31 * the kernel by just masking the top bits (which are all ones for a
32 * kernel address). 32 * kernel address).
33 *
34 * ARMv8.1 (using VHE) does have a TTBR1_EL2, and doesn't use these
35 * macros (the entire kernel runs at EL2).
33 */ 36 */
34#define HYP_PAGE_OFFSET_SHIFT VA_BITS 37#define HYP_PAGE_OFFSET_SHIFT VA_BITS
35#define HYP_PAGE_OFFSET_MASK ((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1) 38#define HYP_PAGE_OFFSET_MASK ((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1)
@@ -56,12 +59,19 @@
56 59
57#ifdef __ASSEMBLY__ 60#ifdef __ASSEMBLY__
58 61
62#include <asm/alternative.h>
63#include <asm/cpufeature.h>
64
59/* 65/*
60 * Convert a kernel VA into a HYP VA. 66 * Convert a kernel VA into a HYP VA.
61 * reg: VA to be converted. 67 * reg: VA to be converted.
62 */ 68 */
63.macro kern_hyp_va reg 69.macro kern_hyp_va reg
70alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
64 and \reg, \reg, #HYP_PAGE_OFFSET_MASK 71 and \reg, \reg, #HYP_PAGE_OFFSET_MASK
72alternative_else
73 nop
74alternative_endif
65.endm 75.endm
66 76
67#else 77#else
diff --git a/arch/arm64/include/asm/kvm_perf_event.h b/arch/arm64/include/asm/kvm_perf_event.h
new file mode 100644
index 000000000000..c18fdebb8f66
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_perf_event.h
@@ -0,0 +1,68 @@
1/*
2 * Copyright (C) 2012 ARM Ltd.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __ASM_KVM_PERF_EVENT_H
18#define __ASM_KVM_PERF_EVENT_H
19
20#define ARMV8_PMU_MAX_COUNTERS 32
21#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1)
22
23/*
24 * Per-CPU PMCR: config reg
25 */
26#define ARMV8_PMU_PMCR_E (1 << 0) /* Enable all counters */
27#define ARMV8_PMU_PMCR_P (1 << 1) /* Reset all counters */
28#define ARMV8_PMU_PMCR_C (1 << 2) /* Cycle counter reset */
29#define ARMV8_PMU_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */
30#define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */
31#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
32/* Determines which bit of PMCCNTR_EL0 generates an overflow */
33#define ARMV8_PMU_PMCR_LC (1 << 6)
34#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */
35#define ARMV8_PMU_PMCR_N_MASK 0x1f
36#define ARMV8_PMU_PMCR_MASK 0x7f /* Mask for writable bits */
37
38/*
39 * PMOVSR: counters overflow flag status reg
40 */
41#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */
42#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK
43
44/*
45 * PMXEVTYPER: Event selection reg
46 */
47#define ARMV8_PMU_EVTYPE_MASK 0xc80003ff /* Mask for writable bits */
48#define ARMV8_PMU_EVTYPE_EVENT 0x3ff /* Mask for EVENT bits */
49
50#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR 0 /* Software increment event */
51
52/*
53 * Event filters for PMUv3
54 */
55#define ARMV8_PMU_EXCLUDE_EL1 (1 << 31)
56#define ARMV8_PMU_EXCLUDE_EL0 (1 << 30)
57#define ARMV8_PMU_INCLUDE_EL2 (1 << 27)
58
59/*
60 * PMUSERENR: user enable reg
61 */
62#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */
63#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */
64#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */
65#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */
66#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */
67
68#endif
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 7a5df5252dd7..9f22dd607958 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -23,6 +23,8 @@
23 23
24#ifndef __ASSEMBLY__ 24#ifndef __ASSEMBLY__
25 25
26#include <asm/ptrace.h>
27
26/* 28/*
27 * __boot_cpu_mode records what mode CPUs were booted in. 29 * __boot_cpu_mode records what mode CPUs were booted in.
28 * A correctly-implemented bootloader must start all CPUs in the same mode: 30 * A correctly-implemented bootloader must start all CPUs in the same mode:
@@ -50,6 +52,14 @@ static inline bool is_hyp_mode_mismatched(void)
50 return __boot_cpu_mode[0] != __boot_cpu_mode[1]; 52 return __boot_cpu_mode[0] != __boot_cpu_mode[1];
51} 53}
52 54
55static inline bool is_kernel_in_hyp_mode(void)
56{
57 u64 el;
58
59 asm("mrs %0, CurrentEL" : "=r" (el));
60 return el == CurrentEL_EL2;
61}
62
53/* The section containing the hypervisor text */ 63/* The section containing the hypervisor text */
54extern char __hyp_text_start[]; 64extern char __hyp_text_start[];
55extern char __hyp_text_end[]; 65extern char __hyp_text_end[];
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 2d4ca4bb0dd3..f209ea151dca 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -94,6 +94,7 @@ struct kvm_regs {
94#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ 94#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
95#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ 95#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */
96#define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */ 96#define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */
97#define KVM_ARM_VCPU_PMU_V3 3 /* Support guest PMUv3 */
97 98
98struct kvm_vcpu_init { 99struct kvm_vcpu_init {
99 __u32 target; 100 __u32 target;
@@ -204,6 +205,11 @@ struct kvm_arch_memory_slot {
204#define KVM_DEV_ARM_VGIC_GRP_CTRL 4 205#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
205#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 206#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
206 207
208/* Device Control API on vcpu fd */
209#define KVM_ARM_VCPU_PMU_V3_CTRL 0
210#define KVM_ARM_VCPU_PMU_V3_IRQ 0
211#define KVM_ARM_VCPU_PMU_V3_INIT 1
212
207/* KVM_IRQ_LINE irq field index values */ 213/* KVM_IRQ_LINE irq field index values */
208#define KVM_ARM_IRQ_TYPE_SHIFT 24 214#define KVM_ARM_IRQ_TYPE_SHIFT 24
209#define KVM_ARM_IRQ_TYPE_MASK 0xff 215#define KVM_ARM_IRQ_TYPE_MASK 0xff
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index fffa4ac6c25a..b0ab4e93db0d 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -110,9 +110,6 @@ int main(void)
110 DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); 110 DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs));
111 DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); 111 DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs));
112 DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); 112 DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
113 DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2));
114 DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2));
115 DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
116 DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); 113 DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context));
117#endif 114#endif
118#ifdef CONFIG_CPU_PM 115#ifdef CONFIG_CPU_PM
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 5c90aa490a2b..ba745199297e 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -26,6 +26,7 @@
26#include <asm/cpu_ops.h> 26#include <asm/cpu_ops.h>
27#include <asm/processor.h> 27#include <asm/processor.h>
28#include <asm/sysreg.h> 28#include <asm/sysreg.h>
29#include <asm/virt.h>
29 30
30unsigned long elf_hwcap __read_mostly; 31unsigned long elf_hwcap __read_mostly;
31EXPORT_SYMBOL_GPL(elf_hwcap); 32EXPORT_SYMBOL_GPL(elf_hwcap);
@@ -621,6 +622,11 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry)
621 return has_sre; 622 return has_sre;
622} 623}
623 624
625static bool runs_at_el2(const struct arm64_cpu_capabilities *entry)
626{
627 return is_kernel_in_hyp_mode();
628}
629
624static const struct arm64_cpu_capabilities arm64_features[] = { 630static const struct arm64_cpu_capabilities arm64_features[] = {
625 { 631 {
626 .desc = "GIC system register CPU interface", 632 .desc = "GIC system register CPU interface",
@@ -651,6 +657,11 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
651 .min_field_value = 2, 657 .min_field_value = 2,
652 }, 658 },
653#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ 659#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
660 {
661 .desc = "Virtualization Host Extensions",
662 .capability = ARM64_HAS_VIRT_HOST_EXTN,
663 .matches = runs_at_el2,
664 },
654 {}, 665 {},
655}; 666};
656 667
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 917d98108b3f..6f2f37743d3b 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -30,6 +30,7 @@
30#include <asm/cache.h> 30#include <asm/cache.h>
31#include <asm/cputype.h> 31#include <asm/cputype.h>
32#include <asm/kernel-pgtable.h> 32#include <asm/kernel-pgtable.h>
33#include <asm/kvm_arm.h>
33#include <asm/memory.h> 34#include <asm/memory.h>
34#include <asm/pgtable-hwdef.h> 35#include <asm/pgtable-hwdef.h>
35#include <asm/pgtable.h> 36#include <asm/pgtable.h>
@@ -464,9 +465,27 @@ CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1
464 isb 465 isb
465 ret 466 ret
466 467
4682:
469#ifdef CONFIG_ARM64_VHE
470 /*
471 * Check for VHE being present. For the rest of the EL2 setup,
472 * x2 being non-zero indicates that we do have VHE, and that the
473 * kernel is intended to run at EL2.
474 */
475 mrs x2, id_aa64mmfr1_el1
476 ubfx x2, x2, #8, #4
477#else
478 mov x2, xzr
479#endif
480
467 /* Hyp configuration. */ 481 /* Hyp configuration. */
4682: mov x0, #(1 << 31) // 64-bit EL1 482 mov x0, #HCR_RW // 64-bit EL1
483 cbz x2, set_hcr
484 orr x0, x0, #HCR_TGE // Enable Host Extensions
485 orr x0, x0, #HCR_E2H
486set_hcr:
469 msr hcr_el2, x0 487 msr hcr_el2, x0
488 isb
470 489
471 /* Generic timers. */ 490 /* Generic timers. */
472 mrs x0, cnthctl_el2 491 mrs x0, cnthctl_el2
@@ -526,6 +545,13 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems
526 /* Stage-2 translation */ 545 /* Stage-2 translation */
527 msr vttbr_el2, xzr 546 msr vttbr_el2, xzr
528 547
548 cbz x2, install_el2_stub
549
550 mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
551 isb
552 ret
553
554install_el2_stub:
529 /* Hypervisor stub */ 555 /* Hypervisor stub */
530 adrp x0, __hyp_stub_vectors 556 adrp x0, __hyp_stub_vectors
531 add x0, x0, #:lo12:__hyp_stub_vectors 557 add x0, x0, #:lo12:__hyp_stub_vectors
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index f7ab14c4d5df..1b52269ffa87 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -20,6 +20,7 @@
20 */ 20 */
21 21
22#include <asm/irq_regs.h> 22#include <asm/irq_regs.h>
23#include <asm/virt.h>
23 24
24#include <linux/of.h> 25#include <linux/of.h>
25#include <linux/perf/arm_pmu.h> 26#include <linux/perf/arm_pmu.h>
@@ -691,9 +692,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
691 692
692 if (attr->exclude_idle) 693 if (attr->exclude_idle)
693 return -EPERM; 694 return -EPERM;
695 if (is_kernel_in_hyp_mode() &&
696 attr->exclude_kernel != attr->exclude_hv)
697 return -EINVAL;
694 if (attr->exclude_user) 698 if (attr->exclude_user)
695 config_base |= ARMV8_EXCLUDE_EL0; 699 config_base |= ARMV8_EXCLUDE_EL0;
696 if (attr->exclude_kernel) 700 if (!is_kernel_in_hyp_mode() && attr->exclude_kernel)
697 config_base |= ARMV8_EXCLUDE_EL1; 701 config_base |= ARMV8_EXCLUDE_EL1;
698 if (!attr->exclude_hv) 702 if (!attr->exclude_hv)
699 config_base |= ARMV8_INCLUDE_EL2; 703 config_base |= ARMV8_INCLUDE_EL2;
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a5272c07d1cb..de7450df7629 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -36,6 +36,7 @@ config KVM
36 select HAVE_KVM_EVENTFD 36 select HAVE_KVM_EVENTFD
37 select HAVE_KVM_IRQFD 37 select HAVE_KVM_IRQFD
38 select KVM_ARM_VGIC_V3 38 select KVM_ARM_VGIC_V3
39 select KVM_ARM_PMU if HW_PERF_EVENTS
39 ---help--- 40 ---help---
40 Support hosting virtualized guest machines. 41 Support hosting virtualized guest machines.
41 We don't support KVM with 16K page tables yet, due to the multiple 42 We don't support KVM with 16K page tables yet, due to the multiple
@@ -48,6 +49,12 @@ config KVM_ARM_HOST
48 ---help--- 49 ---help---
49 Provides host support for ARM processors. 50 Provides host support for ARM processors.
50 51
52config KVM_ARM_PMU
53 bool
54 ---help---
55 Adds support for a virtual Performance Monitoring Unit (PMU) in
56 virtual machines.
57
51source drivers/vhost/Kconfig 58source drivers/vhost/Kconfig
52 59
53endif # VIRTUALIZATION 60endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index caee9ee8e12a..122cff482ac4 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -26,3 +26,4 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o
26kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o 26kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
27kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o 27kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
28kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o 28kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
29kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 9e54ad7c240a..32fad75bb9ff 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -380,3 +380,54 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
380 } 380 }
381 return 0; 381 return 0;
382} 382}
383
384int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
385 struct kvm_device_attr *attr)
386{
387 int ret;
388
389 switch (attr->group) {
390 case KVM_ARM_VCPU_PMU_V3_CTRL:
391 ret = kvm_arm_pmu_v3_set_attr(vcpu, attr);
392 break;
393 default:
394 ret = -ENXIO;
395 break;
396 }
397
398 return ret;
399}
400
401int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
402 struct kvm_device_attr *attr)
403{
404 int ret;
405
406 switch (attr->group) {
407 case KVM_ARM_VCPU_PMU_V3_CTRL:
408 ret = kvm_arm_pmu_v3_get_attr(vcpu, attr);
409 break;
410 default:
411 ret = -ENXIO;
412 break;
413 }
414
415 return ret;
416}
417
418int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
419 struct kvm_device_attr *attr)
420{
421 int ret;
422
423 switch (attr->group) {
424 case KVM_ARM_VCPU_PMU_V3_CTRL:
425 ret = kvm_arm_pmu_v3_has_attr(vcpu, attr);
426 break;
427 default:
428 ret = -ENXIO;
429 break;
430 }
431
432 return ret;
433}
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index d073b5a216f7..7d8747c6427c 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -87,26 +87,13 @@ __do_hyp_init:
87#endif 87#endif
88 /* 88 /*
89 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in 89 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
90 * TCR_EL2 and VTCR_EL2. 90 * TCR_EL2.
91 */ 91 */
92 mrs x5, ID_AA64MMFR0_EL1 92 mrs x5, ID_AA64MMFR0_EL1
93 bfi x4, x5, #16, #3 93 bfi x4, x5, #16, #3
94 94
95 msr tcr_el2, x4 95 msr tcr_el2, x4
96 96
97 ldr x4, =VTCR_EL2_FLAGS
98 bfi x4, x5, #16, #3
99 /*
100 * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS bit in
101 * VTCR_EL2.
102 */
103 mrs x5, ID_AA64MMFR1_EL1
104 ubfx x5, x5, #5, #1
105 lsl x5, x5, #VTCR_EL2_VS
106 orr x4, x4, x5
107
108 msr vtcr_el2, x4
109
110 mrs x4, mair_el1 97 mrs x4, mair_el1
111 msr mair_el2, x4 98 msr mair_el2, x4
112 isb 99 isb
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 0ccdcbbef3c2..0689a74e6ba0 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -17,7 +17,9 @@
17 17
18#include <linux/linkage.h> 18#include <linux/linkage.h>
19 19
20#include <asm/alternative.h>
20#include <asm/assembler.h> 21#include <asm/assembler.h>
22#include <asm/cpufeature.h>
21 23
22/* 24/*
23 * u64 kvm_call_hyp(void *hypfn, ...); 25 * u64 kvm_call_hyp(void *hypfn, ...);
@@ -38,6 +40,11 @@
38 * arch/arm64/kernel/hyp_stub.S. 40 * arch/arm64/kernel/hyp_stub.S.
39 */ 41 */
40ENTRY(kvm_call_hyp) 42ENTRY(kvm_call_hyp)
43alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
41 hvc #0 44 hvc #0
42 ret 45 ret
46alternative_else
47 b __vhe_hyp_call
48 nop
49alternative_endif
43ENDPROC(kvm_call_hyp) 50ENDPROC(kvm_call_hyp)
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 826032bc3945..b6a8fc5ad1af 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -2,9 +2,12 @@
2# Makefile for Kernel-based Virtual Machine module, HYP part 2# Makefile for Kernel-based Virtual Machine module, HYP part
3# 3#
4 4
5obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o 5KVM=../../../../virt/kvm
6
7obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
8obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
9
6obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o 10obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o
7obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o
8obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o 11obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
9obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o 12obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
10obj-$(CONFIG_KVM_ARM_HOST) += entry.o 13obj-$(CONFIG_KVM_ARM_HOST) += entry.o
@@ -12,3 +15,4 @@ obj-$(CONFIG_KVM_ARM_HOST) += switch.o
12obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o 15obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
13obj-$(CONFIG_KVM_ARM_HOST) += tlb.o 16obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
14obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o 17obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
18obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index c9c1e97501a9..053cf8b057c1 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -19,9 +19,7 @@
19#include <linux/kvm_host.h> 19#include <linux/kvm_host.h>
20 20
21#include <asm/kvm_asm.h> 21#include <asm/kvm_asm.h>
22#include <asm/kvm_mmu.h> 22#include <asm/kvm_hyp.h>
23
24#include "hyp.h"
25 23
26#define read_debug(r,n) read_sysreg(r##n##_el1) 24#define read_debug(r,n) read_sysreg(r##n##_el1)
27#define write_debug(v,r,n) write_sysreg(v, r##n##_el1) 25#define write_debug(v,r,n) write_sysreg(v, r##n##_el1)
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index fd0fbe9b7e6a..ce9e5e5f28cf 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -130,9 +130,15 @@ ENDPROC(__guest_exit)
130ENTRY(__fpsimd_guest_restore) 130ENTRY(__fpsimd_guest_restore)
131 stp x4, lr, [sp, #-16]! 131 stp x4, lr, [sp, #-16]!
132 132
133alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
133 mrs x2, cptr_el2 134 mrs x2, cptr_el2
134 bic x2, x2, #CPTR_EL2_TFP 135 bic x2, x2, #CPTR_EL2_TFP
135 msr cptr_el2, x2 136 msr cptr_el2, x2
137alternative_else
138 mrs x2, cpacr_el1
139 orr x2, x2, #CPACR_EL1_FPEN
140 msr cpacr_el1, x2
141alternative_endif
136 isb 142 isb
137 143
138 mrs x3, tpidr_el2 144 mrs x3, tpidr_el2
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 93e8d983c0bd..3488894397ff 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -19,7 +19,6 @@
19 19
20#include <asm/alternative.h> 20#include <asm/alternative.h>
21#include <asm/assembler.h> 21#include <asm/assembler.h>
22#include <asm/asm-offsets.h>
23#include <asm/cpufeature.h> 22#include <asm/cpufeature.h>
24#include <asm/kvm_arm.h> 23#include <asm/kvm_arm.h>
25#include <asm/kvm_asm.h> 24#include <asm/kvm_asm.h>
@@ -38,10 +37,42 @@
38 ldp x0, x1, [sp], #16 37 ldp x0, x1, [sp], #16
39.endm 38.endm
40 39
40.macro do_el2_call
41 /*
42 * Shuffle the parameters before calling the function
43 * pointed to in x0. Assumes parameters in x[1,2,3].
44 */
45 sub sp, sp, #16
46 str lr, [sp]
47 mov lr, x0
48 mov x0, x1
49 mov x1, x2
50 mov x2, x3
51 blr lr
52 ldr lr, [sp]
53 add sp, sp, #16
54.endm
55
56ENTRY(__vhe_hyp_call)
57 do_el2_call
58 /*
59 * We used to rely on having an exception return to get
60 * an implicit isb. In the E2H case, we don't have it anymore.
61 * rather than changing all the leaf functions, just do it here
62 * before returning to the rest of the kernel.
63 */
64 isb
65 ret
66ENDPROC(__vhe_hyp_call)
67
41el1_sync: // Guest trapped into EL2 68el1_sync: // Guest trapped into EL2
42 save_x0_to_x3 69 save_x0_to_x3
43 70
71alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
44 mrs x1, esr_el2 72 mrs x1, esr_el2
73alternative_else
74 mrs x1, esr_el1
75alternative_endif
45 lsr x2, x1, #ESR_ELx_EC_SHIFT 76 lsr x2, x1, #ESR_ELx_EC_SHIFT
46 77
47 cmp x2, #ESR_ELx_EC_HVC64 78 cmp x2, #ESR_ELx_EC_HVC64
@@ -58,19 +89,13 @@ el1_sync: // Guest trapped into EL2
58 mrs x0, vbar_el2 89 mrs x0, vbar_el2
59 b 2f 90 b 2f
60 91
611: stp lr, xzr, [sp, #-16]! 921:
62
63 /* 93 /*
64 * Compute the function address in EL2, and shuffle the parameters. 94 * Perform the EL2 call
65 */ 95 */
66 kern_hyp_va x0 96 kern_hyp_va x0
67 mov lr, x0 97 do_el2_call
68 mov x0, x1
69 mov x1, x2
70 mov x2, x3
71 blr lr
72 98
73 ldp lr, xzr, [sp], #16
742: eret 992: eret
75 100
76el1_trap: 101el1_trap:
@@ -83,72 +108,10 @@ el1_trap:
83 cmp x2, #ESR_ELx_EC_FP_ASIMD 108 cmp x2, #ESR_ELx_EC_FP_ASIMD
84 b.eq __fpsimd_guest_restore 109 b.eq __fpsimd_guest_restore
85 110
86 cmp x2, #ESR_ELx_EC_DABT_LOW 111 mrs x0, tpidr_el2
87 mov x0, #ESR_ELx_EC_IABT_LOW
88 ccmp x2, x0, #4, ne
89 b.ne 1f // Not an abort we care about
90
91 /* This is an abort. Check for permission fault */
92alternative_if_not ARM64_WORKAROUND_834220
93 and x2, x1, #ESR_ELx_FSC_TYPE
94 cmp x2, #FSC_PERM
95 b.ne 1f // Not a permission fault
96alternative_else
97 nop // Use the permission fault path to
98 nop // check for a valid S1 translation,
99 nop // regardless of the ESR value.
100alternative_endif
101
102 /*
103 * Check for Stage-1 page table walk, which is guaranteed
104 * to give a valid HPFAR_EL2.
105 */
106 tbnz x1, #7, 1f // S1PTW is set
107
108 /* Preserve PAR_EL1 */
109 mrs x3, par_el1
110 stp x3, xzr, [sp, #-16]!
111
112 /*
113 * Permission fault, HPFAR_EL2 is invalid.
114 * Resolve the IPA the hard way using the guest VA.
115 * Stage-1 translation already validated the memory access rights.
116 * As such, we can use the EL1 translation regime, and don't have
117 * to distinguish between EL0 and EL1 access.
118 */
119 mrs x2, far_el2
120 at s1e1r, x2
121 isb
122
123 /* Read result */
124 mrs x3, par_el1
125 ldp x0, xzr, [sp], #16 // Restore PAR_EL1 from the stack
126 msr par_el1, x0
127 tbnz x3, #0, 3f // Bail out if we failed the translation
128 ubfx x3, x3, #12, #36 // Extract IPA
129 lsl x3, x3, #4 // and present it like HPFAR
130 b 2f
131
1321: mrs x3, hpfar_el2
133 mrs x2, far_el2
134
1352: mrs x0, tpidr_el2
136 str w1, [x0, #VCPU_ESR_EL2]
137 str x2, [x0, #VCPU_FAR_EL2]
138 str x3, [x0, #VCPU_HPFAR_EL2]
139
140 mov x1, #ARM_EXCEPTION_TRAP 112 mov x1, #ARM_EXCEPTION_TRAP
141 b __guest_exit 113 b __guest_exit
142 114
143 /*
144 * Translation failed. Just return to the guest and
145 * let it fault again. Another CPU is probably playing
146 * behind our back.
147 */
1483: restore_x0_to_x3
149
150 eret
151
152el1_irq: 115el1_irq:
153 save_x0_to_x3 116 save_x0_to_x3
154 mrs x0, tpidr_el2 117 mrs x0, tpidr_el2
diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h
deleted file mode 100644
index fb275178b6af..000000000000
--- a/arch/arm64/kvm/hyp/hyp.h
+++ /dev/null
@@ -1,90 +0,0 @@
1/*
2 * Copyright (C) 2015 - ARM Ltd
3 * Author: Marc Zyngier <marc.zyngier@arm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef __ARM64_KVM_HYP_H__
19#define __ARM64_KVM_HYP_H__
20
21#include <linux/compiler.h>
22#include <linux/kvm_host.h>
23#include <asm/kvm_mmu.h>
24#include <asm/sysreg.h>
25
26#define __hyp_text __section(.hyp.text) notrace
27
28#define kern_hyp_va(v) (typeof(v))((unsigned long)(v) & HYP_PAGE_OFFSET_MASK)
29#define hyp_kern_va(v) (typeof(v))((unsigned long)(v) - HYP_PAGE_OFFSET \
30 + PAGE_OFFSET)
31
32/**
33 * hyp_alternate_select - Generates patchable code sequences that are
34 * used to switch between two implementations of a function, depending
35 * on the availability of a feature.
36 *
37 * @fname: a symbol name that will be defined as a function returning a
38 * function pointer whose type will match @orig and @alt
39 * @orig: A pointer to the default function, as returned by @fname when
40 * @cond doesn't hold
41 * @alt: A pointer to the alternate function, as returned by @fname
42 * when @cond holds
43 * @cond: a CPU feature (as described in asm/cpufeature.h)
44 */
45#define hyp_alternate_select(fname, orig, alt, cond) \
46typeof(orig) * __hyp_text fname(void) \
47{ \
48 typeof(alt) *val = orig; \
49 asm volatile(ALTERNATIVE("nop \n", \
50 "mov %0, %1 \n", \
51 cond) \
52 : "+r" (val) : "r" (alt)); \
53 return val; \
54}
55
56void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
57void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
58
59void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
60void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
61
62void __timer_save_state(struct kvm_vcpu *vcpu);
63void __timer_restore_state(struct kvm_vcpu *vcpu);
64
65void __sysreg_save_state(struct kvm_cpu_context *ctxt);
66void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
67void __sysreg32_save_state(struct kvm_vcpu *vcpu);
68void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
69
70void __debug_save_state(struct kvm_vcpu *vcpu,
71 struct kvm_guest_debug_arch *dbg,
72 struct kvm_cpu_context *ctxt);
73void __debug_restore_state(struct kvm_vcpu *vcpu,
74 struct kvm_guest_debug_arch *dbg,
75 struct kvm_cpu_context *ctxt);
76void __debug_cond_save_host_state(struct kvm_vcpu *vcpu);
77void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu);
78
79void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
80void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
81static inline bool __fpsimd_enabled(void)
82{
83 return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
84}
85
86u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
87void __noreturn __hyp_do_panic(unsigned long, ...);
88
89#endif /* __ARM64_KVM_HYP_H__ */
90
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
new file mode 100644
index 000000000000..bfc54fd82797
--- /dev/null
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -0,0 +1,43 @@
1/*
2 * Copyright (C) 2016 - ARM Ltd
3 * Author: Marc Zyngier <marc.zyngier@arm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include <linux/types.h>
19#include <asm/kvm_arm.h>
20#include <asm/kvm_asm.h>
21#include <asm/kvm_hyp.h>
22
23void __hyp_text __init_stage2_translation(void)
24{
25 u64 val = VTCR_EL2_FLAGS;
26 u64 tmp;
27
28 /*
29 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS
30 * bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while
31 * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2...
32 */
33 val |= (read_sysreg(id_aa64mmfr0_el1) & 7) << 16;
34
35 /*
36 * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS
37 * bit in VTCR_EL2.
38 */
39 tmp = (read_sysreg(id_aa64mmfr1_el1) >> 4) & 0xf;
40 val |= (tmp == 2) ? VTCR_EL2_VS : 0;
41
42 write_sysreg(val, vtcr_el2);
43}
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index f0e7bdfae134..437cfad5e3d8 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -15,7 +15,53 @@
15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "hyp.h" 18#include <linux/types.h>
19#include <asm/kvm_asm.h>
20#include <asm/kvm_hyp.h>
21
22static bool __hyp_text __fpsimd_enabled_nvhe(void)
23{
24 return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
25}
26
27static bool __hyp_text __fpsimd_enabled_vhe(void)
28{
29 return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN);
30}
31
32static hyp_alternate_select(__fpsimd_is_enabled,
33 __fpsimd_enabled_nvhe, __fpsimd_enabled_vhe,
34 ARM64_HAS_VIRT_HOST_EXTN);
35
36bool __hyp_text __fpsimd_enabled(void)
37{
38 return __fpsimd_is_enabled()();
39}
40
41static void __hyp_text __activate_traps_vhe(void)
42{
43 u64 val;
44
45 val = read_sysreg(cpacr_el1);
46 val |= CPACR_EL1_TTA;
47 val &= ~CPACR_EL1_FPEN;
48 write_sysreg(val, cpacr_el1);
49
50 write_sysreg(__kvm_hyp_vector, vbar_el1);
51}
52
53static void __hyp_text __activate_traps_nvhe(void)
54{
55 u64 val;
56
57 val = CPTR_EL2_DEFAULT;
58 val |= CPTR_EL2_TTA | CPTR_EL2_TFP;
59 write_sysreg(val, cptr_el2);
60}
61
62static hyp_alternate_select(__activate_traps_arch,
63 __activate_traps_nvhe, __activate_traps_vhe,
64 ARM64_HAS_VIRT_HOST_EXTN);
19 65
20static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) 66static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
21{ 67{
@@ -36,20 +82,37 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
36 write_sysreg(val, hcr_el2); 82 write_sysreg(val, hcr_el2);
37 /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ 83 /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
38 write_sysreg(1 << 15, hstr_el2); 84 write_sysreg(1 << 15, hstr_el2);
85 /* Make sure we trap PMU access from EL0 to EL2 */
86 write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
87 write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
88 __activate_traps_arch()();
89}
39 90
40 val = CPTR_EL2_DEFAULT; 91static void __hyp_text __deactivate_traps_vhe(void)
41 val |= CPTR_EL2_TTA | CPTR_EL2_TFP; 92{
42 write_sysreg(val, cptr_el2); 93 extern char vectors[]; /* kernel exception vectors */
43 94
44 write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); 95 write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
96 write_sysreg(CPACR_EL1_FPEN, cpacr_el1);
97 write_sysreg(vectors, vbar_el1);
45} 98}
46 99
47static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) 100static void __hyp_text __deactivate_traps_nvhe(void)
48{ 101{
49 write_sysreg(HCR_RW, hcr_el2); 102 write_sysreg(HCR_RW, hcr_el2);
103 write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
104}
105
106static hyp_alternate_select(__deactivate_traps_arch,
107 __deactivate_traps_nvhe, __deactivate_traps_vhe,
108 ARM64_HAS_VIRT_HOST_EXTN);
109
110static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
111{
112 __deactivate_traps_arch()();
50 write_sysreg(0, hstr_el2); 113 write_sysreg(0, hstr_el2);
51 write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2); 114 write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2);
52 write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); 115 write_sysreg(0, pmuserenr_el0);
53} 116}
54 117
55static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) 118static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
@@ -89,6 +152,86 @@ static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
89 __vgic_call_restore_state()(vcpu); 152 __vgic_call_restore_state()(vcpu);
90} 153}
91 154
155static bool __hyp_text __true_value(void)
156{
157 return true;
158}
159
160static bool __hyp_text __false_value(void)
161{
162 return false;
163}
164
165static hyp_alternate_select(__check_arm_834220,
166 __false_value, __true_value,
167 ARM64_WORKAROUND_834220);
168
169static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
170{
171 u64 par, tmp;
172
173 /*
174 * Resolve the IPA the hard way using the guest VA.
175 *
176 * Stage-1 translation already validated the memory access
177 * rights. As such, we can use the EL1 translation regime, and
178 * don't have to distinguish between EL0 and EL1 access.
179 *
180 * We do need to save/restore PAR_EL1 though, as we haven't
181 * saved the guest context yet, and we may return early...
182 */
183 par = read_sysreg(par_el1);
184 asm volatile("at s1e1r, %0" : : "r" (far));
185 isb();
186
187 tmp = read_sysreg(par_el1);
188 write_sysreg(par, par_el1);
189
190 if (unlikely(tmp & 1))
191 return false; /* Translation failed, back to guest */
192
193 /* Convert PAR to HPFAR format */
194 *hpfar = ((tmp >> 12) & ((1UL << 36) - 1)) << 4;
195 return true;
196}
197
198static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
199{
200 u64 esr = read_sysreg_el2(esr);
201 u8 ec = esr >> ESR_ELx_EC_SHIFT;
202 u64 hpfar, far;
203
204 vcpu->arch.fault.esr_el2 = esr;
205
206 if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
207 return true;
208
209 far = read_sysreg_el2(far);
210
211 /*
212 * The HPFAR can be invalid if the stage 2 fault did not
213 * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
214 * bit is clear) and one of the two following cases are true:
215 * 1. The fault was due to a permission fault
216 * 2. The processor carries errata 834220
217 *
218 * Therefore, for all non S1PTW faults where we either have a
219 * permission fault or the errata workaround is enabled, we
220 * resolve the IPA using the AT instruction.
221 */
222 if (!(esr & ESR_ELx_S1PTW) &&
223 (__check_arm_834220()() || (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
224 if (!__translate_far_to_hpfar(far, &hpfar))
225 return false;
226 } else {
227 hpfar = read_sysreg(hpfar_el2);
228 }
229
230 vcpu->arch.fault.far_el2 = far;
231 vcpu->arch.fault.hpfar_el2 = hpfar;
232 return true;
233}
234
92static int __hyp_text __guest_run(struct kvm_vcpu *vcpu) 235static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
93{ 236{
94 struct kvm_cpu_context *host_ctxt; 237 struct kvm_cpu_context *host_ctxt;
@@ -102,7 +245,7 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
102 host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); 245 host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
103 guest_ctxt = &vcpu->arch.ctxt; 246 guest_ctxt = &vcpu->arch.ctxt;
104 247
105 __sysreg_save_state(host_ctxt); 248 __sysreg_save_host_state(host_ctxt);
106 __debug_cond_save_host_state(vcpu); 249 __debug_cond_save_host_state(vcpu);
107 250
108 __activate_traps(vcpu); 251 __activate_traps(vcpu);
@@ -116,16 +259,20 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
116 * to Cortex-A57 erratum #852523. 259 * to Cortex-A57 erratum #852523.
117 */ 260 */
118 __sysreg32_restore_state(vcpu); 261 __sysreg32_restore_state(vcpu);
119 __sysreg_restore_state(guest_ctxt); 262 __sysreg_restore_guest_state(guest_ctxt);
120 __debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); 263 __debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
121 264
122 /* Jump in the fire! */ 265 /* Jump in the fire! */
266again:
123 exit_code = __guest_enter(vcpu, host_ctxt); 267 exit_code = __guest_enter(vcpu, host_ctxt);
124 /* And we're baaack! */ 268 /* And we're baaack! */
125 269
270 if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
271 goto again;
272
126 fp_enabled = __fpsimd_enabled(); 273 fp_enabled = __fpsimd_enabled();
127 274
128 __sysreg_save_state(guest_ctxt); 275 __sysreg_save_guest_state(guest_ctxt);
129 __sysreg32_save_state(vcpu); 276 __sysreg32_save_state(vcpu);
130 __timer_save_state(vcpu); 277 __timer_save_state(vcpu);
131 __vgic_save_state(vcpu); 278 __vgic_save_state(vcpu);
@@ -133,7 +280,7 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
133 __deactivate_traps(vcpu); 280 __deactivate_traps(vcpu);
134 __deactivate_vm(vcpu); 281 __deactivate_vm(vcpu);
135 282
136 __sysreg_restore_state(host_ctxt); 283 __sysreg_restore_host_state(host_ctxt);
137 284
138 if (fp_enabled) { 285 if (fp_enabled) {
139 __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); 286 __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
@@ -150,11 +297,34 @@ __alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
150 297
151static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; 298static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
152 299
153void __hyp_text __noreturn __hyp_panic(void) 300static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)
154{ 301{
155 unsigned long str_va = (unsigned long)__hyp_panic_string; 302 unsigned long str_va = (unsigned long)__hyp_panic_string;
156 u64 spsr = read_sysreg(spsr_el2); 303
157 u64 elr = read_sysreg(elr_el2); 304 __hyp_do_panic(hyp_kern_va(str_va),
305 spsr, elr,
306 read_sysreg(esr_el2), read_sysreg_el2(far),
307 read_sysreg(hpfar_el2), par,
308 (void *)read_sysreg(tpidr_el2));
309}
310
311static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par)
312{
313 panic(__hyp_panic_string,
314 spsr, elr,
315 read_sysreg_el2(esr), read_sysreg_el2(far),
316 read_sysreg(hpfar_el2), par,
317 (void *)read_sysreg(tpidr_el2));
318}
319
320static hyp_alternate_select(__hyp_call_panic,
321 __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
322 ARM64_HAS_VIRT_HOST_EXTN);
323
324void __hyp_text __noreturn __hyp_panic(void)
325{
326 u64 spsr = read_sysreg_el2(spsr);
327 u64 elr = read_sysreg_el2(elr);
158 u64 par = read_sysreg(par_el1); 328 u64 par = read_sysreg(par_el1);
159 329
160 if (read_sysreg(vttbr_el2)) { 330 if (read_sysreg(vttbr_el2)) {
@@ -165,15 +335,11 @@ void __hyp_text __noreturn __hyp_panic(void)
165 host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); 335 host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
166 __deactivate_traps(vcpu); 336 __deactivate_traps(vcpu);
167 __deactivate_vm(vcpu); 337 __deactivate_vm(vcpu);
168 __sysreg_restore_state(host_ctxt); 338 __sysreg_restore_host_state(host_ctxt);
169 } 339 }
170 340
171 /* Call panic for real */ 341 /* Call panic for real */
172 __hyp_do_panic(hyp_kern_va(str_va), 342 __hyp_call_panic()(spsr, elr, par);
173 spsr, elr,
174 read_sysreg(esr_el2), read_sysreg(far_el2),
175 read_sysreg(hpfar_el2), par,
176 (void *)read_sysreg(tpidr_el2));
177 343
178 unreachable(); 344 unreachable();
179} 345}
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 425630980229..0f7c40eb3f53 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -19,75 +19,122 @@
19#include <linux/kvm_host.h> 19#include <linux/kvm_host.h>
20 20
21#include <asm/kvm_asm.h> 21#include <asm/kvm_asm.h>
22#include <asm/kvm_mmu.h> 22#include <asm/kvm_hyp.h>
23 23
24#include "hyp.h" 24/* Yes, this does nothing, on purpose */
25static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { }
25 26
26/* ctxt is already in the HYP VA space */ 27/*
27void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) 28 * Non-VHE: Both host and guest must save everything.
29 *
30 * VHE: Host must save tpidr*_el[01], actlr_el1, sp0, pc, pstate, and
31 * guest must save everything.
32 */
33
34static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
28{ 35{
29 ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2);
30 ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1);
31 ctxt->sys_regs[SCTLR_EL1] = read_sysreg(sctlr_el1);
32 ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); 36 ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1);
33 ctxt->sys_regs[CPACR_EL1] = read_sysreg(cpacr_el1);
34 ctxt->sys_regs[TTBR0_EL1] = read_sysreg(ttbr0_el1);
35 ctxt->sys_regs[TTBR1_EL1] = read_sysreg(ttbr1_el1);
36 ctxt->sys_regs[TCR_EL1] = read_sysreg(tcr_el1);
37 ctxt->sys_regs[ESR_EL1] = read_sysreg(esr_el1);
38 ctxt->sys_regs[AFSR0_EL1] = read_sysreg(afsr0_el1);
39 ctxt->sys_regs[AFSR1_EL1] = read_sysreg(afsr1_el1);
40 ctxt->sys_regs[FAR_EL1] = read_sysreg(far_el1);
41 ctxt->sys_regs[MAIR_EL1] = read_sysreg(mair_el1);
42 ctxt->sys_regs[VBAR_EL1] = read_sysreg(vbar_el1);
43 ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg(contextidr_el1);
44 ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); 37 ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0);
45 ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); 38 ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
46 ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); 39 ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
47 ctxt->sys_regs[AMAIR_EL1] = read_sysreg(amair_el1); 40 ctxt->gp_regs.regs.sp = read_sysreg(sp_el0);
48 ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg(cntkctl_el1); 41 ctxt->gp_regs.regs.pc = read_sysreg_el2(elr);
42 ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr);
43}
44
45static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
46{
47 ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2);
48 ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1);
49 ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr);
50 ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(cpacr);
51 ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(ttbr0);
52 ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(ttbr1);
53 ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(tcr);
54 ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(esr);
55 ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(afsr0);
56 ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(afsr1);
57 ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(far);
58 ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(mair);
59 ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(vbar);
60 ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(contextidr);
61 ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(amair);
62 ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(cntkctl);
49 ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); 63 ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1);
50 ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); 64 ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1);
51 65
52 ctxt->gp_regs.regs.sp = read_sysreg(sp_el0);
53 ctxt->gp_regs.regs.pc = read_sysreg(elr_el2);
54 ctxt->gp_regs.regs.pstate = read_sysreg(spsr_el2);
55 ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); 66 ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1);
56 ctxt->gp_regs.elr_el1 = read_sysreg(elr_el1); 67 ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr);
57 ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg(spsr_el1); 68 ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr);
69}
70
71static hyp_alternate_select(__sysreg_call_save_host_state,
72 __sysreg_save_state, __sysreg_do_nothing,
73 ARM64_HAS_VIRT_HOST_EXTN);
74
75void __hyp_text __sysreg_save_host_state(struct kvm_cpu_context *ctxt)
76{
77 __sysreg_call_save_host_state()(ctxt);
78 __sysreg_save_common_state(ctxt);
79}
80
81void __hyp_text __sysreg_save_guest_state(struct kvm_cpu_context *ctxt)
82{
83 __sysreg_save_state(ctxt);
84 __sysreg_save_common_state(ctxt);
58} 85}
59 86
60void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) 87static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
61{ 88{
62 write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
63 write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
64 write_sysreg(ctxt->sys_regs[SCTLR_EL1], sctlr_el1);
65 write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); 89 write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
66 write_sysreg(ctxt->sys_regs[CPACR_EL1], cpacr_el1);
67 write_sysreg(ctxt->sys_regs[TTBR0_EL1], ttbr0_el1);
68 write_sysreg(ctxt->sys_regs[TTBR1_EL1], ttbr1_el1);
69 write_sysreg(ctxt->sys_regs[TCR_EL1], tcr_el1);
70 write_sysreg(ctxt->sys_regs[ESR_EL1], esr_el1);
71 write_sysreg(ctxt->sys_regs[AFSR0_EL1], afsr0_el1);
72 write_sysreg(ctxt->sys_regs[AFSR1_EL1], afsr1_el1);
73 write_sysreg(ctxt->sys_regs[FAR_EL1], far_el1);
74 write_sysreg(ctxt->sys_regs[MAIR_EL1], mair_el1);
75 write_sysreg(ctxt->sys_regs[VBAR_EL1], vbar_el1);
76 write_sysreg(ctxt->sys_regs[CONTEXTIDR_EL1], contextidr_el1);
77 write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); 90 write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
78 write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); 91 write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
79 write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); 92 write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
80 write_sysreg(ctxt->sys_regs[AMAIR_EL1], amair_el1); 93 write_sysreg(ctxt->gp_regs.regs.sp, sp_el0);
81 write_sysreg(ctxt->sys_regs[CNTKCTL_EL1], cntkctl_el1); 94 write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
82 write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); 95 write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
83 write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); 96}
84 97
85 write_sysreg(ctxt->gp_regs.regs.sp, sp_el0); 98static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
86 write_sysreg(ctxt->gp_regs.regs.pc, elr_el2); 99{
87 write_sysreg(ctxt->gp_regs.regs.pstate, spsr_el2); 100 write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
88 write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); 101 write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
89 write_sysreg(ctxt->gp_regs.elr_el1, elr_el1); 102 write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], sctlr);
90 write_sysreg(ctxt->gp_regs.spsr[KVM_SPSR_EL1], spsr_el1); 103 write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], cpacr);
104 write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], ttbr0);
105 write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], ttbr1);
106 write_sysreg_el1(ctxt->sys_regs[TCR_EL1], tcr);
107 write_sysreg_el1(ctxt->sys_regs[ESR_EL1], esr);
108 write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], afsr0);
109 write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], afsr1);
110 write_sysreg_el1(ctxt->sys_regs[FAR_EL1], far);
111 write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], mair);
112 write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], vbar);
113 write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],contextidr);
114 write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], amair);
115 write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], cntkctl);
116 write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
117 write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
118
119 write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
120 write_sysreg_el1(ctxt->gp_regs.elr_el1, elr);
121 write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr);
122}
123
124static hyp_alternate_select(__sysreg_call_restore_host_state,
125 __sysreg_restore_state, __sysreg_do_nothing,
126 ARM64_HAS_VIRT_HOST_EXTN);
127
128void __hyp_text __sysreg_restore_host_state(struct kvm_cpu_context *ctxt)
129{
130 __sysreg_call_restore_host_state()(ctxt);
131 __sysreg_restore_common_state(ctxt);
132}
133
134void __hyp_text __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt)
135{
136 __sysreg_restore_state(ctxt);
137 __sysreg_restore_common_state(ctxt);
91} 138}
92 139
93void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) 140void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 2a7e0d838698..be8177cdd3bf 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -15,7 +15,7 @@
15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include "hyp.h" 18#include <asm/kvm_hyp.h>
19 19
20static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) 20static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
21{ 21{
diff --git a/arch/arm64/kvm/hyp/vgic-v2-sr.c b/arch/arm64/kvm/hyp/vgic-v2-sr.c
deleted file mode 100644
index e71761238cfc..000000000000
--- a/arch/arm64/kvm/hyp/vgic-v2-sr.c
+++ /dev/null
@@ -1,84 +0,0 @@
1/*
2 * Copyright (C) 2012-2015 - ARM Ltd
3 * Author: Marc Zyngier <marc.zyngier@arm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include <linux/compiler.h>
19#include <linux/irqchip/arm-gic.h>
20#include <linux/kvm_host.h>
21
22#include <asm/kvm_mmu.h>
23
24#include "hyp.h"
25
26/* vcpu is already in the HYP VA space */
27void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu)
28{
29 struct kvm *kvm = kern_hyp_va(vcpu->kvm);
30 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
31 struct vgic_dist *vgic = &kvm->arch.vgic;
32 void __iomem *base = kern_hyp_va(vgic->vctrl_base);
33 u32 eisr0, eisr1, elrsr0, elrsr1;
34 int i, nr_lr;
35
36 if (!base)
37 return;
38
39 nr_lr = vcpu->arch.vgic_cpu.nr_lr;
40 cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR);
41 cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR);
42 eisr0 = readl_relaxed(base + GICH_EISR0);
43 elrsr0 = readl_relaxed(base + GICH_ELRSR0);
44 if (unlikely(nr_lr > 32)) {
45 eisr1 = readl_relaxed(base + GICH_EISR1);
46 elrsr1 = readl_relaxed(base + GICH_ELRSR1);
47 } else {
48 eisr1 = elrsr1 = 0;
49 }
50#ifdef CONFIG_CPU_BIG_ENDIAN
51 cpu_if->vgic_eisr = ((u64)eisr0 << 32) | eisr1;
52 cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1;
53#else
54 cpu_if->vgic_eisr = ((u64)eisr1 << 32) | eisr0;
55 cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0;
56#endif
57 cpu_if->vgic_apr = readl_relaxed(base + GICH_APR);
58
59 writel_relaxed(0, base + GICH_HCR);
60
61 for (i = 0; i < nr_lr; i++)
62 cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4));
63}
64
65/* vcpu is already in the HYP VA space */
66void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu)
67{
68 struct kvm *kvm = kern_hyp_va(vcpu->kvm);
69 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
70 struct vgic_dist *vgic = &kvm->arch.vgic;
71 void __iomem *base = kern_hyp_va(vgic->vctrl_base);
72 int i, nr_lr;
73
74 if (!base)
75 return;
76
77 writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
78 writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR);
79 writel_relaxed(cpu_if->vgic_apr, base + GICH_APR);
80
81 nr_lr = vcpu->arch.vgic_cpu.nr_lr;
82 for (i = 0; i < nr_lr; i++)
83 writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4));
84}
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 5dd2a26444ec..fff7cd42b3a3 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -19,9 +19,7 @@
19#include <linux/irqchip/arm-gic-v3.h> 19#include <linux/irqchip/arm-gic-v3.h>
20#include <linux/kvm_host.h> 20#include <linux/kvm_host.h>
21 21
22#include <asm/kvm_mmu.h> 22#include <asm/kvm_hyp.h>
23
24#include "hyp.h"
25 23
26#define vtr_to_max_lr_idx(v) ((v) & 0xf) 24#define vtr_to_max_lr_idx(v) ((v) & 0xf)
27#define vtr_to_nr_pri_bits(v) (((u32)(v) >> 29) + 1) 25#define vtr_to_nr_pri_bits(v) (((u32)(v) >> 29) + 1)
@@ -39,12 +37,133 @@
39 asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\ 37 asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\
40 } while (0) 38 } while (0)
41 39
42/* vcpu is already in the HYP VA space */ 40static u64 __hyp_text __gic_v3_get_lr(unsigned int lr)
41{
42 switch (lr & 0xf) {
43 case 0:
44 return read_gicreg(ICH_LR0_EL2);
45 case 1:
46 return read_gicreg(ICH_LR1_EL2);
47 case 2:
48 return read_gicreg(ICH_LR2_EL2);
49 case 3:
50 return read_gicreg(ICH_LR3_EL2);
51 case 4:
52 return read_gicreg(ICH_LR4_EL2);
53 case 5:
54 return read_gicreg(ICH_LR5_EL2);
55 case 6:
56 return read_gicreg(ICH_LR6_EL2);
57 case 7:
58 return read_gicreg(ICH_LR7_EL2);
59 case 8:
60 return read_gicreg(ICH_LR8_EL2);
61 case 9:
62 return read_gicreg(ICH_LR9_EL2);
63 case 10:
64 return read_gicreg(ICH_LR10_EL2);
65 case 11:
66 return read_gicreg(ICH_LR11_EL2);
67 case 12:
68 return read_gicreg(ICH_LR12_EL2);
69 case 13:
70 return read_gicreg(ICH_LR13_EL2);
71 case 14:
72 return read_gicreg(ICH_LR14_EL2);
73 case 15:
74 return read_gicreg(ICH_LR15_EL2);
75 }
76
77 unreachable();
78}
79
80static void __hyp_text __gic_v3_set_lr(u64 val, int lr)
81{
82 switch (lr & 0xf) {
83 case 0:
84 write_gicreg(val, ICH_LR0_EL2);
85 break;
86 case 1:
87 write_gicreg(val, ICH_LR1_EL2);
88 break;
89 case 2:
90 write_gicreg(val, ICH_LR2_EL2);
91 break;
92 case 3:
93 write_gicreg(val, ICH_LR3_EL2);
94 break;
95 case 4:
96 write_gicreg(val, ICH_LR4_EL2);
97 break;
98 case 5:
99 write_gicreg(val, ICH_LR5_EL2);
100 break;
101 case 6:
102 write_gicreg(val, ICH_LR6_EL2);
103 break;
104 case 7:
105 write_gicreg(val, ICH_LR7_EL2);
106 break;
107 case 8:
108 write_gicreg(val, ICH_LR8_EL2);
109 break;
110 case 9:
111 write_gicreg(val, ICH_LR9_EL2);
112 break;
113 case 10:
114 write_gicreg(val, ICH_LR10_EL2);
115 break;
116 case 11:
117 write_gicreg(val, ICH_LR11_EL2);
118 break;
119 case 12:
120 write_gicreg(val, ICH_LR12_EL2);
121 break;
122 case 13:
123 write_gicreg(val, ICH_LR13_EL2);
124 break;
125 case 14:
126 write_gicreg(val, ICH_LR14_EL2);
127 break;
128 case 15:
129 write_gicreg(val, ICH_LR15_EL2);
130 break;
131 }
132}
133
134static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, int nr_lr)
135{
136 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
137 int i;
138 bool expect_mi;
139
140 expect_mi = !!(cpu_if->vgic_hcr & ICH_HCR_UIE);
141
142 for (i = 0; i < nr_lr; i++) {
143 if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
144 continue;
145
146 expect_mi |= (!(cpu_if->vgic_lr[i] & ICH_LR_HW) &&
147 (cpu_if->vgic_lr[i] & ICH_LR_EOI));
148 }
149
150 if (expect_mi) {
151 cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2);
152
153 if (cpu_if->vgic_misr & ICH_MISR_EOI)
154 cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2);
155 else
156 cpu_if->vgic_eisr = 0;
157 } else {
158 cpu_if->vgic_misr = 0;
159 cpu_if->vgic_eisr = 0;
160 }
161}
162
43void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) 163void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
44{ 164{
45 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; 165 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
46 u64 val; 166 u64 val;
47 u32 max_lr_idx, nr_pri_bits;
48 167
49 /* 168 /*
50 * Make sure stores to the GIC via the memory mapped interface 169 * Make sure stores to the GIC via the memory mapped interface
@@ -53,68 +172,66 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
53 dsb(st); 172 dsb(st);
54 173
55 cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); 174 cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
56 cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2);
57 cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2);
58 cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
59 175
60 write_gicreg(0, ICH_HCR_EL2); 176 if (vcpu->arch.vgic_cpu.live_lrs) {
61 val = read_gicreg(ICH_VTR_EL2); 177 int i;
62 max_lr_idx = vtr_to_max_lr_idx(val); 178 u32 max_lr_idx, nr_pri_bits;
63 nr_pri_bits = vtr_to_nr_pri_bits(val);
64 179
65 switch (max_lr_idx) { 180 cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
66 case 15:
67 cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)] = read_gicreg(ICH_LR15_EL2);
68 case 14:
69 cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)] = read_gicreg(ICH_LR14_EL2);
70 case 13:
71 cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)] = read_gicreg(ICH_LR13_EL2);
72 case 12:
73 cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)] = read_gicreg(ICH_LR12_EL2);
74 case 11:
75 cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)] = read_gicreg(ICH_LR11_EL2);
76 case 10:
77 cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)] = read_gicreg(ICH_LR10_EL2);
78 case 9:
79 cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)] = read_gicreg(ICH_LR9_EL2);
80 case 8:
81 cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)] = read_gicreg(ICH_LR8_EL2);
82 case 7:
83 cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)] = read_gicreg(ICH_LR7_EL2);
84 case 6:
85 cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)] = read_gicreg(ICH_LR6_EL2);
86 case 5:
87 cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)] = read_gicreg(ICH_LR5_EL2);
88 case 4:
89 cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)] = read_gicreg(ICH_LR4_EL2);
90 case 3:
91 cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)] = read_gicreg(ICH_LR3_EL2);
92 case 2:
93 cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)] = read_gicreg(ICH_LR2_EL2);
94 case 1:
95 cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)] = read_gicreg(ICH_LR1_EL2);
96 case 0:
97 cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)] = read_gicreg(ICH_LR0_EL2);
98 }
99 181
100 switch (nr_pri_bits) { 182 write_gicreg(0, ICH_HCR_EL2);
101 case 7: 183 val = read_gicreg(ICH_VTR_EL2);
102 cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2); 184 max_lr_idx = vtr_to_max_lr_idx(val);
103 cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2); 185 nr_pri_bits = vtr_to_nr_pri_bits(val);
104 case 6:
105 cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2);
106 default:
107 cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2);
108 }
109 186
110 switch (nr_pri_bits) { 187 save_maint_int_state(vcpu, max_lr_idx + 1);
111 case 7: 188
112 cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2); 189 for (i = 0; i <= max_lr_idx; i++) {
113 cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2); 190 if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
114 case 6: 191 continue;
115 cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2); 192
116 default: 193 if (cpu_if->vgic_elrsr & (1 << i)) {
117 cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2); 194 cpu_if->vgic_lr[i] &= ~ICH_LR_STATE;
195 continue;
196 }
197
198 cpu_if->vgic_lr[i] = __gic_v3_get_lr(i);
199 __gic_v3_set_lr(0, i);
200 }
201
202 switch (nr_pri_bits) {
203 case 7:
204 cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2);
205 cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2);
206 case 6:
207 cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2);
208 default:
209 cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2);
210 }
211
212 switch (nr_pri_bits) {
213 case 7:
214 cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2);
215 cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2);
216 case 6:
217 cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2);
218 default:
219 cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2);
220 }
221
222 vcpu->arch.vgic_cpu.live_lrs = 0;
223 } else {
224 cpu_if->vgic_misr = 0;
225 cpu_if->vgic_eisr = 0;
226 cpu_if->vgic_elrsr = 0xffff;
227 cpu_if->vgic_ap0r[0] = 0;
228 cpu_if->vgic_ap0r[1] = 0;
229 cpu_if->vgic_ap0r[2] = 0;
230 cpu_if->vgic_ap0r[3] = 0;
231 cpu_if->vgic_ap1r[0] = 0;
232 cpu_if->vgic_ap1r[1] = 0;
233 cpu_if->vgic_ap1r[2] = 0;
234 cpu_if->vgic_ap1r[3] = 0;
118 } 235 }
119 236
120 val = read_gicreg(ICC_SRE_EL2); 237 val = read_gicreg(ICC_SRE_EL2);
@@ -128,6 +245,8 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
128 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; 245 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
129 u64 val; 246 u64 val;
130 u32 max_lr_idx, nr_pri_bits; 247 u32 max_lr_idx, nr_pri_bits;
248 u16 live_lrs = 0;
249 int i;
131 250
132 /* 251 /*
133 * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a 252 * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a
@@ -140,66 +259,46 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
140 write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1); 259 write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1);
141 isb(); 260 isb();
142 261
143 write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
144 write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
145
146 val = read_gicreg(ICH_VTR_EL2); 262 val = read_gicreg(ICH_VTR_EL2);
147 max_lr_idx = vtr_to_max_lr_idx(val); 263 max_lr_idx = vtr_to_max_lr_idx(val);
148 nr_pri_bits = vtr_to_nr_pri_bits(val); 264 nr_pri_bits = vtr_to_nr_pri_bits(val);
149 265
150 switch (nr_pri_bits) { 266 for (i = 0; i <= max_lr_idx; i++) {
151 case 7: 267 if (cpu_if->vgic_lr[i] & ICH_LR_STATE)
152 write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2); 268 live_lrs |= (1 << i);
153 write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
154 case 6:
155 write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2);
156 default:
157 write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
158 } 269 }
159 270
160 switch (nr_pri_bits) { 271 write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
161 case 7:
162 write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
163 write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
164 case 6:
165 write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
166 default:
167 write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
168 }
169 272
170 switch (max_lr_idx) { 273 if (live_lrs) {
171 case 15: 274 write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
172 write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2); 275
173 case 14: 276 switch (nr_pri_bits) {
174 write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)], ICH_LR14_EL2); 277 case 7:
175 case 13: 278 write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
176 write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)], ICH_LR13_EL2); 279 write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
177 case 12: 280 case 6:
178 write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)], ICH_LR12_EL2); 281 write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2);
179 case 11: 282 default:
180 write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)], ICH_LR11_EL2); 283 write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
181 case 10: 284 }
182 write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)], ICH_LR10_EL2); 285
183 case 9: 286 switch (nr_pri_bits) {
184 write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)], ICH_LR9_EL2); 287 case 7:
185 case 8: 288 write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
186 write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)], ICH_LR8_EL2); 289 write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
187 case 7: 290 case 6:
188 write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)], ICH_LR7_EL2); 291 write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
189 case 6: 292 default:
190 write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)], ICH_LR6_EL2); 293 write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
191 case 5: 294 }
192 write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)], ICH_LR5_EL2); 295
193 case 4: 296 for (i = 0; i <= max_lr_idx; i++) {
194 write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)], ICH_LR4_EL2); 297 if (!(live_lrs & (1 << i)))
195 case 3: 298 continue;
196 write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)], ICH_LR3_EL2); 299
197 case 2: 300 __gic_v3_set_lr(cpu_if->vgic_lr[i], i);
198 write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)], ICH_LR2_EL2); 301 }
199 case 1:
200 write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)], ICH_LR1_EL2);
201 case 0:
202 write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)], ICH_LR0_EL2);
203 } 302 }
204 303
205 /* 304 /*
@@ -209,6 +308,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
209 */ 308 */
210 isb(); 309 isb();
211 dsb(sy); 310 dsb(sy);
311 vcpu->arch.vgic_cpu.live_lrs = live_lrs;
212 312
213 /* 313 /*
214 * Prevent the guest from touching the GIC system registers if 314 * Prevent the guest from touching the GIC system registers if
@@ -220,6 +320,15 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
220 } 320 }
221} 321}
222 322
323void __hyp_text __vgic_v3_init_lrs(void)
324{
325 int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2));
326 int i;
327
328 for (i = 0; i <= max_lr_idx; i++)
329 __gic_v3_set_lr(0, i);
330}
331
223static u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void) 332static u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void)
224{ 333{
225 return read_gicreg(ICH_VTR_EL2); 334 return read_gicreg(ICH_VTR_EL2);
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index f34745cb3d23..9677bf069bcc 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -77,7 +77,11 @@ int kvm_arch_dev_ioctl_check_extension(long ext)
77 case KVM_CAP_GUEST_DEBUG_HW_WPS: 77 case KVM_CAP_GUEST_DEBUG_HW_WPS:
78 r = get_num_wrps(); 78 r = get_num_wrps();
79 break; 79 break;
80 case KVM_CAP_ARM_PMU_V3:
81 r = kvm_arm_support_pmu_v3();
82 break;
80 case KVM_CAP_SET_GUEST_DEBUG: 83 case KVM_CAP_SET_GUEST_DEBUG:
84 case KVM_CAP_VCPU_ATTRIBUTES:
81 r = 1; 85 r = 1;
82 break; 86 break;
83 default: 87 default:
@@ -120,6 +124,9 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
120 /* Reset system registers */ 124 /* Reset system registers */
121 kvm_reset_sys_regs(vcpu); 125 kvm_reset_sys_regs(vcpu);
122 126
127 /* Reset PMU */
128 kvm_pmu_vcpu_reset(vcpu);
129
123 /* Reset timer */ 130 /* Reset timer */
124 return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); 131 return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
125} 132}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 2e90371cfb37..61ba59104845 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -20,6 +20,7 @@
20 * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 */ 21 */
22 22
23#include <linux/bsearch.h>
23#include <linux/kvm_host.h> 24#include <linux/kvm_host.h>
24#include <linux/mm.h> 25#include <linux/mm.h>
25#include <linux/uaccess.h> 26#include <linux/uaccess.h>
@@ -34,6 +35,7 @@
34#include <asm/kvm_emulate.h> 35#include <asm/kvm_emulate.h>
35#include <asm/kvm_host.h> 36#include <asm/kvm_host.h>
36#include <asm/kvm_mmu.h> 37#include <asm/kvm_mmu.h>
38#include <asm/perf_event.h>
37 39
38#include <trace/events/kvm.h> 40#include <trace/events/kvm.h>
39 41
@@ -439,6 +441,344 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
439 vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr; 441 vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr;
440} 442}
441 443
444static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
445{
446 u64 pmcr, val;
447
448 asm volatile("mrs %0, pmcr_el0\n" : "=r" (pmcr));
449 /* Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) is reset to UNKNOWN
450 * except PMCR.E resetting to zero.
451 */
452 val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
453 | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E);
454 vcpu_sys_reg(vcpu, PMCR_EL0) = val;
455}
456
457static bool pmu_access_el0_disabled(struct kvm_vcpu *vcpu)
458{
459 u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
460
461 return !((reg & ARMV8_PMU_USERENR_EN) || vcpu_mode_priv(vcpu));
462}
463
464static bool pmu_write_swinc_el0_disabled(struct kvm_vcpu *vcpu)
465{
466 u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
467
468 return !((reg & (ARMV8_PMU_USERENR_SW | ARMV8_PMU_USERENR_EN))
469 || vcpu_mode_priv(vcpu));
470}
471
472static bool pmu_access_cycle_counter_el0_disabled(struct kvm_vcpu *vcpu)
473{
474 u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
475
476 return !((reg & (ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_EN))
477 || vcpu_mode_priv(vcpu));
478}
479
480static bool pmu_access_event_counter_el0_disabled(struct kvm_vcpu *vcpu)
481{
482 u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
483
484 return !((reg & (ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_EN))
485 || vcpu_mode_priv(vcpu));
486}
487
488static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
489 const struct sys_reg_desc *r)
490{
491 u64 val;
492
493 if (!kvm_arm_pmu_v3_ready(vcpu))
494 return trap_raz_wi(vcpu, p, r);
495
496 if (pmu_access_el0_disabled(vcpu))
497 return false;
498
499 if (p->is_write) {
500 /* Only update writeable bits of PMCR */
501 val = vcpu_sys_reg(vcpu, PMCR_EL0);
502 val &= ~ARMV8_PMU_PMCR_MASK;
503 val |= p->regval & ARMV8_PMU_PMCR_MASK;
504 vcpu_sys_reg(vcpu, PMCR_EL0) = val;
505 kvm_pmu_handle_pmcr(vcpu, val);
506 } else {
507 /* PMCR.P & PMCR.C are RAZ */
508 val = vcpu_sys_reg(vcpu, PMCR_EL0)
509 & ~(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C);
510 p->regval = val;
511 }
512
513 return true;
514}
515
516static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
517 const struct sys_reg_desc *r)
518{
519 if (!kvm_arm_pmu_v3_ready(vcpu))
520 return trap_raz_wi(vcpu, p, r);
521
522 if (pmu_access_event_counter_el0_disabled(vcpu))
523 return false;
524
525 if (p->is_write)
526 vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval;
527 else
528 /* return PMSELR.SEL field */
529 p->regval = vcpu_sys_reg(vcpu, PMSELR_EL0)
530 & ARMV8_PMU_COUNTER_MASK;
531
532 return true;
533}
534
535static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
536 const struct sys_reg_desc *r)
537{
538 u64 pmceid;
539
540 if (!kvm_arm_pmu_v3_ready(vcpu))
541 return trap_raz_wi(vcpu, p, r);
542
543 BUG_ON(p->is_write);
544
545 if (pmu_access_el0_disabled(vcpu))
546 return false;
547
548 if (!(p->Op2 & 1))
549 asm volatile("mrs %0, pmceid0_el0\n" : "=r" (pmceid));
550 else
551 asm volatile("mrs %0, pmceid1_el0\n" : "=r" (pmceid));
552
553 p->regval = pmceid;
554
555 return true;
556}
557
558static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx)
559{
560 u64 pmcr, val;
561
562 pmcr = vcpu_sys_reg(vcpu, PMCR_EL0);
563 val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
564 if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX)
565 return false;
566
567 return true;
568}
569
570static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
571 struct sys_reg_params *p,
572 const struct sys_reg_desc *r)
573{
574 u64 idx;
575
576 if (!kvm_arm_pmu_v3_ready(vcpu))
577 return trap_raz_wi(vcpu, p, r);
578
579 if (r->CRn == 9 && r->CRm == 13) {
580 if (r->Op2 == 2) {
581 /* PMXEVCNTR_EL0 */
582 if (pmu_access_event_counter_el0_disabled(vcpu))
583 return false;
584
585 idx = vcpu_sys_reg(vcpu, PMSELR_EL0)
586 & ARMV8_PMU_COUNTER_MASK;
587 } else if (r->Op2 == 0) {
588 /* PMCCNTR_EL0 */
589 if (pmu_access_cycle_counter_el0_disabled(vcpu))
590 return false;
591
592 idx = ARMV8_PMU_CYCLE_IDX;
593 } else {
594 BUG();
595 }
596 } else if (r->CRn == 14 && (r->CRm & 12) == 8) {
597 /* PMEVCNTRn_EL0 */
598 if (pmu_access_event_counter_el0_disabled(vcpu))
599 return false;
600
601 idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
602 } else {
603 BUG();
604 }
605
606 if (!pmu_counter_idx_valid(vcpu, idx))
607 return false;
608
609 if (p->is_write) {
610 if (pmu_access_el0_disabled(vcpu))
611 return false;
612
613 kvm_pmu_set_counter_value(vcpu, idx, p->regval);
614 } else {
615 p->regval = kvm_pmu_get_counter_value(vcpu, idx);
616 }
617
618 return true;
619}
620
621static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
622 const struct sys_reg_desc *r)
623{
624 u64 idx, reg;
625
626 if (!kvm_arm_pmu_v3_ready(vcpu))
627 return trap_raz_wi(vcpu, p, r);
628
629 if (pmu_access_el0_disabled(vcpu))
630 return false;
631
632 if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 1) {
633 /* PMXEVTYPER_EL0 */
634 idx = vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK;
635 reg = PMEVTYPER0_EL0 + idx;
636 } else if (r->CRn == 14 && (r->CRm & 12) == 12) {
637 idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
638 if (idx == ARMV8_PMU_CYCLE_IDX)
639 reg = PMCCFILTR_EL0;
640 else
641 /* PMEVTYPERn_EL0 */
642 reg = PMEVTYPER0_EL0 + idx;
643 } else {
644 BUG();
645 }
646
647 if (!pmu_counter_idx_valid(vcpu, idx))
648 return false;
649
650 if (p->is_write) {
651 kvm_pmu_set_counter_event_type(vcpu, p->regval, idx);
652 vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK;
653 } else {
654 p->regval = vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK;
655 }
656
657 return true;
658}
659
660static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
661 const struct sys_reg_desc *r)
662{
663 u64 val, mask;
664
665 if (!kvm_arm_pmu_v3_ready(vcpu))
666 return trap_raz_wi(vcpu, p, r);
667
668 if (pmu_access_el0_disabled(vcpu))
669 return false;
670
671 mask = kvm_pmu_valid_counter_mask(vcpu);
672 if (p->is_write) {
673 val = p->regval & mask;
674 if (r->Op2 & 0x1) {
675 /* accessing PMCNTENSET_EL0 */
676 vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
677 kvm_pmu_enable_counter(vcpu, val);
678 } else {
679 /* accessing PMCNTENCLR_EL0 */
680 vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
681 kvm_pmu_disable_counter(vcpu, val);
682 }
683 } else {
684 p->regval = vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
685 }
686
687 return true;
688}
689
690static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
691 const struct sys_reg_desc *r)
692{
693 u64 mask = kvm_pmu_valid_counter_mask(vcpu);
694
695 if (!kvm_arm_pmu_v3_ready(vcpu))
696 return trap_raz_wi(vcpu, p, r);
697
698 if (!vcpu_mode_priv(vcpu))
699 return false;
700
701 if (p->is_write) {
702 u64 val = p->regval & mask;
703
704 if (r->Op2 & 0x1)
705 /* accessing PMINTENSET_EL1 */
706 vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val;
707 else
708 /* accessing PMINTENCLR_EL1 */
709 vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
710 } else {
711 p->regval = vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask;
712 }
713
714 return true;
715}
716
717static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
718 const struct sys_reg_desc *r)
719{
720 u64 mask = kvm_pmu_valid_counter_mask(vcpu);
721
722 if (!kvm_arm_pmu_v3_ready(vcpu))
723 return trap_raz_wi(vcpu, p, r);
724
725 if (pmu_access_el0_disabled(vcpu))
726 return false;
727
728 if (p->is_write) {
729 if (r->CRm & 0x2)
730 /* accessing PMOVSSET_EL0 */
731 kvm_pmu_overflow_set(vcpu, p->regval & mask);
732 else
733 /* accessing PMOVSCLR_EL0 */
734 vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
735 } else {
736 p->regval = vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask;
737 }
738
739 return true;
740}
741
742static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
743 const struct sys_reg_desc *r)
744{
745 u64 mask;
746
747 if (!kvm_arm_pmu_v3_ready(vcpu))
748 return trap_raz_wi(vcpu, p, r);
749
750 if (pmu_write_swinc_el0_disabled(vcpu))
751 return false;
752
753 if (p->is_write) {
754 mask = kvm_pmu_valid_counter_mask(vcpu);
755 kvm_pmu_software_increment(vcpu, p->regval & mask);
756 return true;
757 }
758
759 return false;
760}
761
762static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
763 const struct sys_reg_desc *r)
764{
765 if (!kvm_arm_pmu_v3_ready(vcpu))
766 return trap_raz_wi(vcpu, p, r);
767
768 if (p->is_write) {
769 if (!vcpu_mode_priv(vcpu))
770 return false;
771
772 vcpu_sys_reg(vcpu, PMUSERENR_EL0) = p->regval
773 & ARMV8_PMU_USERENR_MASK;
774 } else {
775 p->regval = vcpu_sys_reg(vcpu, PMUSERENR_EL0)
776 & ARMV8_PMU_USERENR_MASK;
777 }
778
779 return true;
780}
781
442/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ 782/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
443#define DBG_BCR_BVR_WCR_WVR_EL1(n) \ 783#define DBG_BCR_BVR_WCR_WVR_EL1(n) \
444 /* DBGBVRn_EL1 */ \ 784 /* DBGBVRn_EL1 */ \
@@ -454,6 +794,20 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
454 { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \ 794 { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \
455 trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr } 795 trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr }
456 796
797/* Macro to expand the PMEVCNTRn_EL0 register */
798#define PMU_PMEVCNTR_EL0(n) \
799 /* PMEVCNTRn_EL0 */ \
800 { Op0(0b11), Op1(0b011), CRn(0b1110), \
801 CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
802 access_pmu_evcntr, reset_unknown, (PMEVCNTR0_EL0 + n), }
803
804/* Macro to expand the PMEVTYPERn_EL0 register */
805#define PMU_PMEVTYPER_EL0(n) \
806 /* PMEVTYPERn_EL0 */ \
807 { Op0(0b11), Op1(0b011), CRn(0b1110), \
808 CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
809 access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), }
810
457/* 811/*
458 * Architected system registers. 812 * Architected system registers.
459 * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 813 * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@@ -583,10 +937,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
583 937
584 /* PMINTENSET_EL1 */ 938 /* PMINTENSET_EL1 */
585 { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001), 939 { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),
586 trap_raz_wi }, 940 access_pminten, reset_unknown, PMINTENSET_EL1 },
587 /* PMINTENCLR_EL1 */ 941 /* PMINTENCLR_EL1 */
588 { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010), 942 { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010),
589 trap_raz_wi }, 943 access_pminten, NULL, PMINTENSET_EL1 },
590 944
591 /* MAIR_EL1 */ 945 /* MAIR_EL1 */
592 { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000), 946 { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
@@ -623,43 +977,46 @@ static const struct sys_reg_desc sys_reg_descs[] = {
623 977
624 /* PMCR_EL0 */ 978 /* PMCR_EL0 */
625 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000), 979 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000),
626 trap_raz_wi }, 980 access_pmcr, reset_pmcr, },
627 /* PMCNTENSET_EL0 */ 981 /* PMCNTENSET_EL0 */
628 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001), 982 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001),
629 trap_raz_wi }, 983 access_pmcnten, reset_unknown, PMCNTENSET_EL0 },
630 /* PMCNTENCLR_EL0 */ 984 /* PMCNTENCLR_EL0 */
631 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010), 985 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010),
632 trap_raz_wi }, 986 access_pmcnten, NULL, PMCNTENSET_EL0 },
633 /* PMOVSCLR_EL0 */ 987 /* PMOVSCLR_EL0 */
634 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011), 988 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011),
635 trap_raz_wi }, 989 access_pmovs, NULL, PMOVSSET_EL0 },
636 /* PMSWINC_EL0 */ 990 /* PMSWINC_EL0 */
637 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100), 991 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100),
638 trap_raz_wi }, 992 access_pmswinc, reset_unknown, PMSWINC_EL0 },
639 /* PMSELR_EL0 */ 993 /* PMSELR_EL0 */
640 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101), 994 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101),
641 trap_raz_wi }, 995 access_pmselr, reset_unknown, PMSELR_EL0 },
642 /* PMCEID0_EL0 */ 996 /* PMCEID0_EL0 */
643 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110), 997 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110),
644 trap_raz_wi }, 998 access_pmceid },
645 /* PMCEID1_EL0 */ 999 /* PMCEID1_EL0 */
646 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111), 1000 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111),
647 trap_raz_wi }, 1001 access_pmceid },
648 /* PMCCNTR_EL0 */ 1002 /* PMCCNTR_EL0 */
649 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000), 1003 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000),
650 trap_raz_wi }, 1004 access_pmu_evcntr, reset_unknown, PMCCNTR_EL0 },
651 /* PMXEVTYPER_EL0 */ 1005 /* PMXEVTYPER_EL0 */
652 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001), 1006 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001),
653 trap_raz_wi }, 1007 access_pmu_evtyper },
654 /* PMXEVCNTR_EL0 */ 1008 /* PMXEVCNTR_EL0 */
655 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010), 1009 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010),
656 trap_raz_wi }, 1010 access_pmu_evcntr },
657 /* PMUSERENR_EL0 */ 1011 /* PMUSERENR_EL0
1012 * This register resets as unknown in 64bit mode while it resets as zero
1013 * in 32bit mode. Here we choose to reset it as zero for consistency.
1014 */
658 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000), 1015 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000),
659 trap_raz_wi }, 1016 access_pmuserenr, reset_val, PMUSERENR_EL0, 0 },
660 /* PMOVSSET_EL0 */ 1017 /* PMOVSSET_EL0 */
661 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011), 1018 { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011),
662 trap_raz_wi }, 1019 access_pmovs, reset_unknown, PMOVSSET_EL0 },
663 1020
664 /* TPIDR_EL0 */ 1021 /* TPIDR_EL0 */
665 { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010), 1022 { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010),
@@ -668,6 +1025,77 @@ static const struct sys_reg_desc sys_reg_descs[] = {
668 { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011), 1025 { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011),
669 NULL, reset_unknown, TPIDRRO_EL0 }, 1026 NULL, reset_unknown, TPIDRRO_EL0 },
670 1027
1028 /* PMEVCNTRn_EL0 */
1029 PMU_PMEVCNTR_EL0(0),
1030 PMU_PMEVCNTR_EL0(1),
1031 PMU_PMEVCNTR_EL0(2),
1032 PMU_PMEVCNTR_EL0(3),
1033 PMU_PMEVCNTR_EL0(4),
1034 PMU_PMEVCNTR_EL0(5),
1035 PMU_PMEVCNTR_EL0(6),
1036 PMU_PMEVCNTR_EL0(7),
1037 PMU_PMEVCNTR_EL0(8),
1038 PMU_PMEVCNTR_EL0(9),
1039 PMU_PMEVCNTR_EL0(10),
1040 PMU_PMEVCNTR_EL0(11),
1041 PMU_PMEVCNTR_EL0(12),
1042 PMU_PMEVCNTR_EL0(13),
1043 PMU_PMEVCNTR_EL0(14),
1044 PMU_PMEVCNTR_EL0(15),
1045 PMU_PMEVCNTR_EL0(16),
1046 PMU_PMEVCNTR_EL0(17),
1047 PMU_PMEVCNTR_EL0(18),
1048 PMU_PMEVCNTR_EL0(19),
1049 PMU_PMEVCNTR_EL0(20),
1050 PMU_PMEVCNTR_EL0(21),
1051 PMU_PMEVCNTR_EL0(22),
1052 PMU_PMEVCNTR_EL0(23),
1053 PMU_PMEVCNTR_EL0(24),
1054 PMU_PMEVCNTR_EL0(25),
1055 PMU_PMEVCNTR_EL0(26),
1056 PMU_PMEVCNTR_EL0(27),
1057 PMU_PMEVCNTR_EL0(28),
1058 PMU_PMEVCNTR_EL0(29),
1059 PMU_PMEVCNTR_EL0(30),
1060 /* PMEVTYPERn_EL0 */
1061 PMU_PMEVTYPER_EL0(0),
1062 PMU_PMEVTYPER_EL0(1),
1063 PMU_PMEVTYPER_EL0(2),
1064 PMU_PMEVTYPER_EL0(3),
1065 PMU_PMEVTYPER_EL0(4),
1066 PMU_PMEVTYPER_EL0(5),
1067 PMU_PMEVTYPER_EL0(6),
1068 PMU_PMEVTYPER_EL0(7),
1069 PMU_PMEVTYPER_EL0(8),
1070 PMU_PMEVTYPER_EL0(9),
1071 PMU_PMEVTYPER_EL0(10),
1072 PMU_PMEVTYPER_EL0(11),
1073 PMU_PMEVTYPER_EL0(12),
1074 PMU_PMEVTYPER_EL0(13),
1075 PMU_PMEVTYPER_EL0(14),
1076 PMU_PMEVTYPER_EL0(15),
1077 PMU_PMEVTYPER_EL0(16),
1078 PMU_PMEVTYPER_EL0(17),
1079 PMU_PMEVTYPER_EL0(18),
1080 PMU_PMEVTYPER_EL0(19),
1081 PMU_PMEVTYPER_EL0(20),
1082 PMU_PMEVTYPER_EL0(21),
1083 PMU_PMEVTYPER_EL0(22),
1084 PMU_PMEVTYPER_EL0(23),
1085 PMU_PMEVTYPER_EL0(24),
1086 PMU_PMEVTYPER_EL0(25),
1087 PMU_PMEVTYPER_EL0(26),
1088 PMU_PMEVTYPER_EL0(27),
1089 PMU_PMEVTYPER_EL0(28),
1090 PMU_PMEVTYPER_EL0(29),
1091 PMU_PMEVTYPER_EL0(30),
1092 /* PMCCFILTR_EL0
1093 * This register resets as unknown in 64bit mode while it resets as zero
1094 * in 32bit mode. Here we choose to reset it as zero for consistency.
1095 */
1096 { Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b1111), Op2(0b111),
1097 access_pmu_evtyper, reset_val, PMCCFILTR_EL0, 0 },
1098
671 /* DACR32_EL2 */ 1099 /* DACR32_EL2 */
672 { Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000), 1100 { Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000),
673 NULL, reset_unknown, DACR32_EL2 }, 1101 NULL, reset_unknown, DACR32_EL2 },
@@ -857,6 +1285,20 @@ static const struct sys_reg_desc cp14_64_regs[] = {
857 { Op1( 0), CRm( 2), .access = trap_raz_wi }, 1285 { Op1( 0), CRm( 2), .access = trap_raz_wi },
858}; 1286};
859 1287
1288/* Macro to expand the PMEVCNTRn register */
1289#define PMU_PMEVCNTR(n) \
1290 /* PMEVCNTRn */ \
1291 { Op1(0), CRn(0b1110), \
1292 CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
1293 access_pmu_evcntr }
1294
1295/* Macro to expand the PMEVTYPERn register */
1296#define PMU_PMEVTYPER(n) \
1297 /* PMEVTYPERn */ \
1298 { Op1(0), CRn(0b1110), \
1299 CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
1300 access_pmu_evtyper }
1301
860/* 1302/*
861 * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding, 1303 * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
862 * depending on the way they are accessed (as a 32bit or a 64bit 1304 * depending on the way they are accessed (as a 32bit or a 64bit
@@ -885,19 +1327,21 @@ static const struct sys_reg_desc cp15_regs[] = {
885 { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw }, 1327 { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
886 1328
887 /* PMU */ 1329 /* PMU */
888 { Op1( 0), CRn( 9), CRm(12), Op2( 0), trap_raz_wi }, 1330 { Op1( 0), CRn( 9), CRm(12), Op2( 0), access_pmcr },
889 { Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi }, 1331 { Op1( 0), CRn( 9), CRm(12), Op2( 1), access_pmcnten },
890 { Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi }, 1332 { Op1( 0), CRn( 9), CRm(12), Op2( 2), access_pmcnten },
891 { Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi }, 1333 { Op1( 0), CRn( 9), CRm(12), Op2( 3), access_pmovs },
892 { Op1( 0), CRn( 9), CRm(12), Op2( 5), trap_raz_wi }, 1334 { Op1( 0), CRn( 9), CRm(12), Op2( 4), access_pmswinc },
893 { Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi }, 1335 { Op1( 0), CRn( 9), CRm(12), Op2( 5), access_pmselr },
894 { Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi }, 1336 { Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmceid },
895 { Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi }, 1337 { Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmceid },
896 { Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi }, 1338 { Op1( 0), CRn( 9), CRm(13), Op2( 0), access_pmu_evcntr },
897 { Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi }, 1339 { Op1( 0), CRn( 9), CRm(13), Op2( 1), access_pmu_evtyper },
898 { Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi }, 1340 { Op1( 0), CRn( 9), CRm(13), Op2( 2), access_pmu_evcntr },
899 { Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi }, 1341 { Op1( 0), CRn( 9), CRm(14), Op2( 0), access_pmuserenr },
900 { Op1( 0), CRn( 9), CRm(14), Op2( 2), trap_raz_wi }, 1342 { Op1( 0), CRn( 9), CRm(14), Op2( 1), access_pminten },
1343 { Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pminten },
1344 { Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmovs },
901 1345
902 { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR }, 1346 { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
903 { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR }, 1347 { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },
@@ -908,10 +1352,78 @@ static const struct sys_reg_desc cp15_regs[] = {
908 { Op1( 0), CRn(12), CRm(12), Op2( 5), trap_raz_wi }, 1352 { Op1( 0), CRn(12), CRm(12), Op2( 5), trap_raz_wi },
909 1353
910 { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, 1354 { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
1355
1356 /* PMEVCNTRn */
1357 PMU_PMEVCNTR(0),
1358 PMU_PMEVCNTR(1),
1359 PMU_PMEVCNTR(2),
1360 PMU_PMEVCNTR(3),
1361 PMU_PMEVCNTR(4),
1362 PMU_PMEVCNTR(5),
1363 PMU_PMEVCNTR(6),
1364 PMU_PMEVCNTR(7),
1365 PMU_PMEVCNTR(8),
1366 PMU_PMEVCNTR(9),
1367 PMU_PMEVCNTR(10),
1368 PMU_PMEVCNTR(11),
1369 PMU_PMEVCNTR(12),
1370 PMU_PMEVCNTR(13),
1371 PMU_PMEVCNTR(14),
1372 PMU_PMEVCNTR(15),
1373 PMU_PMEVCNTR(16),
1374 PMU_PMEVCNTR(17),
1375 PMU_PMEVCNTR(18),
1376 PMU_PMEVCNTR(19),
1377 PMU_PMEVCNTR(20),
1378 PMU_PMEVCNTR(21),
1379 PMU_PMEVCNTR(22),
1380 PMU_PMEVCNTR(23),
1381 PMU_PMEVCNTR(24),
1382 PMU_PMEVCNTR(25),
1383 PMU_PMEVCNTR(26),
1384 PMU_PMEVCNTR(27),
1385 PMU_PMEVCNTR(28),
1386 PMU_PMEVCNTR(29),
1387 PMU_PMEVCNTR(30),
1388 /* PMEVTYPERn */
1389 PMU_PMEVTYPER(0),
1390 PMU_PMEVTYPER(1),
1391 PMU_PMEVTYPER(2),
1392 PMU_PMEVTYPER(3),
1393 PMU_PMEVTYPER(4),
1394 PMU_PMEVTYPER(5),
1395 PMU_PMEVTYPER(6),
1396 PMU_PMEVTYPER(7),
1397 PMU_PMEVTYPER(8),
1398 PMU_PMEVTYPER(9),
1399 PMU_PMEVTYPER(10),
1400 PMU_PMEVTYPER(11),
1401 PMU_PMEVTYPER(12),
1402 PMU_PMEVTYPER(13),
1403 PMU_PMEVTYPER(14),
1404 PMU_PMEVTYPER(15),
1405 PMU_PMEVTYPER(16),
1406 PMU_PMEVTYPER(17),
1407 PMU_PMEVTYPER(18),
1408 PMU_PMEVTYPER(19),
1409 PMU_PMEVTYPER(20),
1410 PMU_PMEVTYPER(21),
1411 PMU_PMEVTYPER(22),
1412 PMU_PMEVTYPER(23),
1413 PMU_PMEVTYPER(24),
1414 PMU_PMEVTYPER(25),
1415 PMU_PMEVTYPER(26),
1416 PMU_PMEVTYPER(27),
1417 PMU_PMEVTYPER(28),
1418 PMU_PMEVTYPER(29),
1419 PMU_PMEVTYPER(30),
1420 /* PMCCFILTR */
1421 { Op1(0), CRn(14), CRm(15), Op2(7), access_pmu_evtyper },
911}; 1422};
912 1423
913static const struct sys_reg_desc cp15_64_regs[] = { 1424static const struct sys_reg_desc cp15_64_regs[] = {
914 { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, 1425 { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
1426 { Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr },
915 { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, 1427 { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
916 { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, 1428 { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
917}; 1429};
@@ -942,29 +1454,32 @@ static const struct sys_reg_desc *get_target_table(unsigned target,
942 } 1454 }
943} 1455}
944 1456
1457#define reg_to_match_value(x) \
1458 ({ \
1459 unsigned long val; \
1460 val = (x)->Op0 << 14; \
1461 val |= (x)->Op1 << 11; \
1462 val |= (x)->CRn << 7; \
1463 val |= (x)->CRm << 3; \
1464 val |= (x)->Op2; \
1465 val; \
1466 })
1467
1468static int match_sys_reg(const void *key, const void *elt)
1469{
1470 const unsigned long pval = (unsigned long)key;
1471 const struct sys_reg_desc *r = elt;
1472
1473 return pval - reg_to_match_value(r);
1474}
1475
945static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params, 1476static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
946 const struct sys_reg_desc table[], 1477 const struct sys_reg_desc table[],
947 unsigned int num) 1478 unsigned int num)
948{ 1479{
949 unsigned int i; 1480 unsigned long pval = reg_to_match_value(params);
950
951 for (i = 0; i < num; i++) {
952 const struct sys_reg_desc *r = &table[i];
953
954 if (params->Op0 != r->Op0)
955 continue;
956 if (params->Op1 != r->Op1)
957 continue;
958 if (params->CRn != r->CRn)
959 continue;
960 if (params->CRm != r->CRm)
961 continue;
962 if (params->Op2 != r->Op2)
963 continue;
964 1481
965 return r; 1482 return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
966 }
967 return NULL;
968} 1483}
969 1484
970int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run) 1485int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 2aa79c864e91..7529aab068f5 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -33,8 +33,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
33} 33}
34#endif 34#endif
35 35
36#define SPAPR_TCE_SHIFT 12
37
38#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 36#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
39#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ 37#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
40#endif 38#endif
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index c98afa538b3a..d7b343170453 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -182,7 +182,10 @@ struct kvmppc_spapr_tce_table {
182 struct list_head list; 182 struct list_head list;
183 struct kvm *kvm; 183 struct kvm *kvm;
184 u64 liobn; 184 u64 liobn;
185 u32 window_size; 185 struct rcu_head rcu;
186 u32 page_shift;
187 u64 offset; /* in pages */
188 u64 size; /* window size in pages */
186 struct page *pages[0]; 189 struct page *pages[0];
187}; 190};
188 191
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 2241d5357129..2544edabe7f3 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -165,9 +165,25 @@ extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
165extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); 165extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
166 166
167extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, 167extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
168 struct kvm_create_spapr_tce *args); 168 struct kvm_create_spapr_tce_64 *args);
169extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
170 struct kvm_vcpu *vcpu, unsigned long liobn);
171extern long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt,
172 unsigned long ioba, unsigned long npages);
173extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt,
174 unsigned long tce);
175extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
176 unsigned long *ua, unsigned long **prmap);
177extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
178 unsigned long idx, unsigned long tce);
169extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 179extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
170 unsigned long ioba, unsigned long tce); 180 unsigned long ioba, unsigned long tce);
181extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
182 unsigned long liobn, unsigned long ioba,
183 unsigned long tce_list, unsigned long npages);
184extern long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
185 unsigned long liobn, unsigned long ioba,
186 unsigned long tce_value, unsigned long npages);
171extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 187extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
172 unsigned long ioba); 188 unsigned long ioba);
173extern struct page *kvm_alloc_hpt(unsigned long nr_pages); 189extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
@@ -437,6 +453,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
437{ 453{
438 return vcpu->arch.irq_type == KVMPPC_IRQ_XICS; 454 return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
439} 455}
456extern void kvmppc_alloc_host_rm_ops(void);
457extern void kvmppc_free_host_rm_ops(void);
440extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); 458extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
441extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server); 459extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
442extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args); 460extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
@@ -445,7 +463,11 @@ extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
445extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval); 463extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
446extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev, 464extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
447 struct kvm_vcpu *vcpu, u32 cpu); 465 struct kvm_vcpu *vcpu, u32 cpu);
466extern void kvmppc_xics_ipi_action(void);
467extern int h_ipi_redirect;
448#else 468#else
469static inline void kvmppc_alloc_host_rm_ops(void) {};
470static inline void kvmppc_free_host_rm_ops(void) {};
449static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) 471static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
450 { return 0; } 472 { return 0; }
451static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { } 473static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
@@ -459,6 +481,33 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
459 { return 0; } 481 { return 0; }
460#endif 482#endif
461 483
484/*
485 * Host-side operations we want to set up while running in real
486 * mode in the guest operating on the xics.
487 * Currently only VCPU wakeup is supported.
488 */
489
490union kvmppc_rm_state {
491 unsigned long raw;
492 struct {
493 u32 in_host;
494 u32 rm_action;
495 };
496};
497
498struct kvmppc_host_rm_core {
499 union kvmppc_rm_state rm_state;
500 void *rm_data;
501 char pad[112];
502};
503
504struct kvmppc_host_rm_ops {
505 struct kvmppc_host_rm_core *rm_core;
506 void (*vcpu_kick)(struct kvm_vcpu *vcpu);
507};
508
509extern struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
510
462static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu) 511static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu)
463{ 512{
464#ifdef CONFIG_KVM_BOOKE_HV 513#ifdef CONFIG_KVM_BOOKE_HV
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index ac9fb114e25d..47897a30982d 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -78,6 +78,9 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
78 } 78 }
79 return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift); 79 return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift);
80} 80}
81
82unsigned long vmalloc_to_phys(void *vmalloc_addr);
83
81#endif /* __ASSEMBLY__ */ 84#endif /* __ASSEMBLY__ */
82 85
83#endif /* _ASM_POWERPC_PGTABLE_H */ 86#endif /* _ASM_POWERPC_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 825663c30945..78083ed20792 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -114,6 +114,9 @@ extern int cpu_to_core_id(int cpu);
114#define PPC_MSG_TICK_BROADCAST 2 114#define PPC_MSG_TICK_BROADCAST 2
115#define PPC_MSG_DEBUGGER_BREAK 3 115#define PPC_MSG_DEBUGGER_BREAK 3
116 116
117/* This is only used by the powernv kernel */
118#define PPC_MSG_RM_HOST_ACTION 4
119
117/* for irq controllers that have dedicated ipis per message (4) */ 120/* for irq controllers that have dedicated ipis per message (4) */
118extern int smp_request_message_ipi(int virq, int message); 121extern int smp_request_message_ipi(int virq, int message);
119extern const char *smp_ipi_name[]; 122extern const char *smp_ipi_name[];
@@ -121,6 +124,7 @@ extern const char *smp_ipi_name[];
121/* for irq controllers with only a single ipi */ 124/* for irq controllers with only a single ipi */
122extern void smp_muxed_ipi_set_data(int cpu, unsigned long data); 125extern void smp_muxed_ipi_set_data(int cpu, unsigned long data);
123extern void smp_muxed_ipi_message_pass(int cpu, int msg); 126extern void smp_muxed_ipi_message_pass(int cpu, int msg);
127extern void smp_muxed_ipi_set_message(int cpu, int msg);
124extern irqreturn_t smp_ipi_demux(void); 128extern irqreturn_t smp_ipi_demux(void);
125 129
126void smp_init_pSeries(void); 130void smp_init_pSeries(void);
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index 0e25bdb190bb..254604856e69 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -30,6 +30,7 @@
30#ifdef CONFIG_PPC_ICP_NATIVE 30#ifdef CONFIG_PPC_ICP_NATIVE
31extern int icp_native_init(void); 31extern int icp_native_init(void);
32extern void icp_native_flush_interrupt(void); 32extern void icp_native_flush_interrupt(void);
33extern void icp_native_cause_ipi_rm(int cpu);
33#else 34#else
34static inline int icp_native_init(void) { return -ENODEV; } 35static inline int icp_native_init(void) { return -ENODEV; }
35#endif 36#endif
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index ab4d4732c492..c93cf35ce379 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -333,6 +333,15 @@ struct kvm_create_spapr_tce {
333 __u32 window_size; 333 __u32 window_size;
334}; 334};
335 335
336/* for KVM_CAP_SPAPR_TCE_64 */
337struct kvm_create_spapr_tce_64 {
338 __u64 liobn;
339 __u32 page_shift;
340 __u32 flags;
341 __u64 offset; /* in pages */
342 __u64 size; /* in pages */
343};
344
336/* for KVM_ALLOCATE_RMA */ 345/* for KVM_ALLOCATE_RMA */
337struct kvm_allocate_rma { 346struct kvm_allocate_rma {
338 __u64 rma_size; 347 __u64 rma_size;
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index cc13d4c83291..b7dea05f0725 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -206,7 +206,7 @@ int smp_request_message_ipi(int virq, int msg)
206 206
207#ifdef CONFIG_PPC_SMP_MUXED_IPI 207#ifdef CONFIG_PPC_SMP_MUXED_IPI
208struct cpu_messages { 208struct cpu_messages {
209 int messages; /* current messages */ 209 long messages; /* current messages */
210 unsigned long data; /* data for cause ipi */ 210 unsigned long data; /* data for cause ipi */
211}; 211};
212static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message); 212static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
@@ -218,7 +218,7 @@ void smp_muxed_ipi_set_data(int cpu, unsigned long data)
218 info->data = data; 218 info->data = data;
219} 219}
220 220
221void smp_muxed_ipi_message_pass(int cpu, int msg) 221void smp_muxed_ipi_set_message(int cpu, int msg)
222{ 222{
223 struct cpu_messages *info = &per_cpu(ipi_message, cpu); 223 struct cpu_messages *info = &per_cpu(ipi_message, cpu);
224 char *message = (char *)&info->messages; 224 char *message = (char *)&info->messages;
@@ -228,6 +228,13 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
228 */ 228 */
229 smp_mb(); 229 smp_mb();
230 message[msg] = 1; 230 message[msg] = 1;
231}
232
233void smp_muxed_ipi_message_pass(int cpu, int msg)
234{
235 struct cpu_messages *info = &per_cpu(ipi_message, cpu);
236
237 smp_muxed_ipi_set_message(cpu, msg);
231 /* 238 /*
232 * cause_ipi functions are required to include a full barrier 239 * cause_ipi functions are required to include a full barrier
233 * before doing whatever causes the IPI. 240 * before doing whatever causes the IPI.
@@ -236,20 +243,31 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
236} 243}
237 244
238#ifdef __BIG_ENDIAN__ 245#ifdef __BIG_ENDIAN__
239#define IPI_MESSAGE(A) (1 << (24 - 8 * (A))) 246#define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A)))
240#else 247#else
241#define IPI_MESSAGE(A) (1 << (8 * (A))) 248#define IPI_MESSAGE(A) (1uL << (8 * (A)))
242#endif 249#endif
243 250
244irqreturn_t smp_ipi_demux(void) 251irqreturn_t smp_ipi_demux(void)
245{ 252{
246 struct cpu_messages *info = this_cpu_ptr(&ipi_message); 253 struct cpu_messages *info = this_cpu_ptr(&ipi_message);
247 unsigned int all; 254 unsigned long all;
248 255
249 mb(); /* order any irq clear */ 256 mb(); /* order any irq clear */
250 257
251 do { 258 do {
252 all = xchg(&info->messages, 0); 259 all = xchg(&info->messages, 0);
260#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
261 /*
262 * Must check for PPC_MSG_RM_HOST_ACTION messages
263 * before PPC_MSG_CALL_FUNCTION messages because when
264 * a VM is destroyed, we call kick_all_cpus_sync()
265 * to ensure that any pending PPC_MSG_RM_HOST_ACTION
266 * messages have completed before we free any VCPUs.
267 */
268 if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION))
269 kvmppc_xics_ipi_action();
270#endif
253 if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION)) 271 if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION))
254 generic_smp_call_function_interrupt(); 272 generic_smp_call_function_interrupt();
255 if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE)) 273 if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 0570eef83fba..7f7b6d86ac73 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -8,7 +8,7 @@ ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
8KVM := ../../../virt/kvm 8KVM := ../../../virt/kvm
9 9
10common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ 10common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
11 $(KVM)/eventfd.o 11 $(KVM)/eventfd.o $(KVM)/vfio.o
12 12
13CFLAGS_e500_mmu.o := -I. 13CFLAGS_e500_mmu.o := -I.
14CFLAGS_e500_mmu_host.o := -I. 14CFLAGS_e500_mmu_host.o := -I.
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 638c6d9be9e0..b34220d2aa42 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -807,7 +807,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
807{ 807{
808 808
809#ifdef CONFIG_PPC64 809#ifdef CONFIG_PPC64
810 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); 810 INIT_LIST_HEAD_RCU(&kvm->arch.spapr_tce_tables);
811 INIT_LIST_HEAD(&kvm->arch.rtas_tokens); 811 INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
812#endif 812#endif
813 813
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 54cf9bc94dad..2c2d1030843a 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -14,6 +14,7 @@
14 * 14 *
15 * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 15 * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
16 * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> 16 * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
17 * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
17 */ 18 */
18 19
19#include <linux/types.h> 20#include <linux/types.h>
@@ -36,28 +37,69 @@
36#include <asm/ppc-opcode.h> 37#include <asm/ppc-opcode.h>
37#include <asm/kvm_host.h> 38#include <asm/kvm_host.h>
38#include <asm/udbg.h> 39#include <asm/udbg.h>
40#include <asm/iommu.h>
41#include <asm/tce.h>
39 42
40#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) 43static unsigned long kvmppc_tce_pages(unsigned long iommu_pages)
44{
45 return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
46}
41 47
42static long kvmppc_stt_npages(unsigned long window_size) 48static unsigned long kvmppc_stt_pages(unsigned long tce_pages)
43{ 49{
44 return ALIGN((window_size >> SPAPR_TCE_SHIFT) 50 unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) +
45 * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; 51 (tce_pages * sizeof(struct page *));
52
53 return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE;
46} 54}
47 55
48static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt) 56static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc)
49{ 57{
50 struct kvm *kvm = stt->kvm; 58 long ret = 0;
51 int i;
52 59
53 mutex_lock(&kvm->lock); 60 if (!current || !current->mm)
54 list_del(&stt->list); 61 return ret; /* process exited */
55 for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++) 62
63 down_write(&current->mm->mmap_sem);
64
65 if (inc) {
66 unsigned long locked, lock_limit;
67
68 locked = current->mm->locked_vm + stt_pages;
69 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
70 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
71 ret = -ENOMEM;
72 else
73 current->mm->locked_vm += stt_pages;
74 } else {
75 if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm))
76 stt_pages = current->mm->locked_vm;
77
78 current->mm->locked_vm -= stt_pages;
79 }
80
81 pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid,
82 inc ? '+' : '-',
83 stt_pages << PAGE_SHIFT,
84 current->mm->locked_vm << PAGE_SHIFT,
85 rlimit(RLIMIT_MEMLOCK),
86 ret ? " - exceeded" : "");
87
88 up_write(&current->mm->mmap_sem);
89
90 return ret;
91}
92
93static void release_spapr_tce_table(struct rcu_head *head)
94{
95 struct kvmppc_spapr_tce_table *stt = container_of(head,
96 struct kvmppc_spapr_tce_table, rcu);
97 unsigned long i, npages = kvmppc_tce_pages(stt->size);
98
99 for (i = 0; i < npages; i++)
56 __free_page(stt->pages[i]); 100 __free_page(stt->pages[i]);
57 kfree(stt);
58 mutex_unlock(&kvm->lock);
59 101
60 kvm_put_kvm(kvm); 102 kfree(stt);
61} 103}
62 104
63static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 105static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -65,7 +107,7 @@ static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
65 struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data; 107 struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
66 struct page *page; 108 struct page *page;
67 109
68 if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size)) 110 if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
69 return VM_FAULT_SIGBUS; 111 return VM_FAULT_SIGBUS;
70 112
71 page = stt->pages[vmf->pgoff]; 113 page = stt->pages[vmf->pgoff];
@@ -88,7 +130,14 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
88{ 130{
89 struct kvmppc_spapr_tce_table *stt = filp->private_data; 131 struct kvmppc_spapr_tce_table *stt = filp->private_data;
90 132
91 release_spapr_tce_table(stt); 133 list_del_rcu(&stt->list);
134
135 kvm_put_kvm(stt->kvm);
136
137 kvmppc_account_memlimit(
138 kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
139 call_rcu(&stt->rcu, release_spapr_tce_table);
140
92 return 0; 141 return 0;
93} 142}
94 143
@@ -98,20 +147,29 @@ static const struct file_operations kvm_spapr_tce_fops = {
98}; 147};
99 148
100long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, 149long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
101 struct kvm_create_spapr_tce *args) 150 struct kvm_create_spapr_tce_64 *args)
102{ 151{
103 struct kvmppc_spapr_tce_table *stt = NULL; 152 struct kvmppc_spapr_tce_table *stt = NULL;
104 long npages; 153 unsigned long npages, size;
105 int ret = -ENOMEM; 154 int ret = -ENOMEM;
106 int i; 155 int i;
107 156
157 if (!args->size)
158 return -EINVAL;
159
108 /* Check this LIOBN hasn't been previously allocated */ 160 /* Check this LIOBN hasn't been previously allocated */
109 list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { 161 list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
110 if (stt->liobn == args->liobn) 162 if (stt->liobn == args->liobn)
111 return -EBUSY; 163 return -EBUSY;
112 } 164 }
113 165
114 npages = kvmppc_stt_npages(args->window_size); 166 size = args->size;
167 npages = kvmppc_tce_pages(size);
168 ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
169 if (ret) {
170 stt = NULL;
171 goto fail;
172 }
115 173
116 stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), 174 stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
117 GFP_KERNEL); 175 GFP_KERNEL);
@@ -119,7 +177,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
119 goto fail; 177 goto fail;
120 178
121 stt->liobn = args->liobn; 179 stt->liobn = args->liobn;
122 stt->window_size = args->window_size; 180 stt->page_shift = args->page_shift;
181 stt->offset = args->offset;
182 stt->size = size;
123 stt->kvm = kvm; 183 stt->kvm = kvm;
124 184
125 for (i = 0; i < npages; i++) { 185 for (i = 0; i < npages; i++) {
@@ -131,7 +191,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
131 kvm_get_kvm(kvm); 191 kvm_get_kvm(kvm);
132 192
133 mutex_lock(&kvm->lock); 193 mutex_lock(&kvm->lock);
134 list_add(&stt->list, &kvm->arch.spapr_tce_tables); 194 list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
135 195
136 mutex_unlock(&kvm->lock); 196 mutex_unlock(&kvm->lock);
137 197
@@ -148,3 +208,59 @@ fail:
148 } 208 }
149 return ret; 209 return ret;
150} 210}
211
212long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
213 unsigned long liobn, unsigned long ioba,
214 unsigned long tce_list, unsigned long npages)
215{
216 struct kvmppc_spapr_tce_table *stt;
217 long i, ret = H_SUCCESS, idx;
218 unsigned long entry, ua = 0;
219 u64 __user *tces, tce;
220
221 stt = kvmppc_find_table(vcpu, liobn);
222 if (!stt)
223 return H_TOO_HARD;
224
225 entry = ioba >> stt->page_shift;
226 /*
227 * SPAPR spec says that the maximum size of the list is 512 TCEs
228 * so the whole table fits in 4K page
229 */
230 if (npages > 512)
231 return H_PARAMETER;
232
233 if (tce_list & (SZ_4K - 1))
234 return H_PARAMETER;
235
236 ret = kvmppc_ioba_validate(stt, ioba, npages);
237 if (ret != H_SUCCESS)
238 return ret;
239
240 idx = srcu_read_lock(&vcpu->kvm->srcu);
241 if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
242 ret = H_TOO_HARD;
243 goto unlock_exit;
244 }
245 tces = (u64 __user *) ua;
246
247 for (i = 0; i < npages; ++i) {
248 if (get_user(tce, tces + i)) {
249 ret = H_TOO_HARD;
250 goto unlock_exit;
251 }
252 tce = be64_to_cpu(tce);
253
254 ret = kvmppc_tce_validate(stt, tce);
255 if (ret != H_SUCCESS)
256 goto unlock_exit;
257
258 kvmppc_tce_put(stt, entry + i, tce);
259 }
260
261unlock_exit:
262 srcu_read_unlock(&vcpu->kvm->srcu, idx);
263
264 return ret;
265}
266EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect);
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 89e96b3e0039..44be73e6aa26 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -14,6 +14,7 @@
14 * 14 *
15 * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 15 * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
16 * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> 16 * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
17 * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
17 */ 18 */
18 19
19#include <linux/types.h> 20#include <linux/types.h>
@@ -30,76 +31,321 @@
30#include <asm/kvm_ppc.h> 31#include <asm/kvm_ppc.h>
31#include <asm/kvm_book3s.h> 32#include <asm/kvm_book3s.h>
32#include <asm/mmu-hash64.h> 33#include <asm/mmu-hash64.h>
34#include <asm/mmu_context.h>
33#include <asm/hvcall.h> 35#include <asm/hvcall.h>
34#include <asm/synch.h> 36#include <asm/synch.h>
35#include <asm/ppc-opcode.h> 37#include <asm/ppc-opcode.h>
36#include <asm/kvm_host.h> 38#include <asm/kvm_host.h>
37#include <asm/udbg.h> 39#include <asm/udbg.h>
40#include <asm/iommu.h>
41#include <asm/tce.h>
42#include <asm/iommu.h>
38 43
39#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) 44#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
40 45
41/* WARNING: This will be called in real-mode on HV KVM and virtual 46/*
47 * Finds a TCE table descriptor by LIOBN.
48 *
49 * WARNING: This will be called in real or virtual mode on HV KVM and virtual
42 * mode on PR KVM 50 * mode on PR KVM
43 */ 51 */
44long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 52struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu,
45 unsigned long ioba, unsigned long tce) 53 unsigned long liobn)
46{ 54{
47 struct kvm *kvm = vcpu->kvm; 55 struct kvm *kvm = vcpu->kvm;
48 struct kvmppc_spapr_tce_table *stt; 56 struct kvmppc_spapr_tce_table *stt;
49 57
58 list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list)
59 if (stt->liobn == liobn)
60 return stt;
61
62 return NULL;
63}
64EXPORT_SYMBOL_GPL(kvmppc_find_table);
65
66/*
67 * Validates IO address.
68 *
69 * WARNING: This will be called in real-mode on HV KVM and virtual
70 * mode on PR KVM
71 */
72long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt,
73 unsigned long ioba, unsigned long npages)
74{
75 unsigned long mask = (1ULL << stt->page_shift) - 1;
76 unsigned long idx = ioba >> stt->page_shift;
77
78 if ((ioba & mask) || (idx < stt->offset) ||
79 (idx - stt->offset + npages > stt->size) ||
80 (idx + npages < idx))
81 return H_PARAMETER;
82
83 return H_SUCCESS;
84}
85EXPORT_SYMBOL_GPL(kvmppc_ioba_validate);
86
87/*
88 * Validates TCE address.
89 * At the moment flags and page mask are validated.
90 * As the host kernel does not access those addresses (just puts them
91 * to the table and user space is supposed to process them), we can skip
92 * checking other things (such as TCE is a guest RAM address or the page
93 * was actually allocated).
94 *
95 * WARNING: This will be called in real-mode on HV KVM and virtual
96 * mode on PR KVM
97 */
98long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce)
99{
100 unsigned long page_mask = ~((1ULL << stt->page_shift) - 1);
101 unsigned long mask = ~(page_mask | TCE_PCI_WRITE | TCE_PCI_READ);
102
103 if (tce & mask)
104 return H_PARAMETER;
105
106 return H_SUCCESS;
107}
108EXPORT_SYMBOL_GPL(kvmppc_tce_validate);
109
110/* Note on the use of page_address() in real mode,
111 *
112 * It is safe to use page_address() in real mode on ppc64 because
113 * page_address() is always defined as lowmem_page_address()
114 * which returns __va(PFN_PHYS(page_to_pfn(page))) which is arithmetic
115 * operation and does not access page struct.
116 *
117 * Theoretically page_address() could be defined different
118 * but either WANT_PAGE_VIRTUAL or HASHED_PAGE_VIRTUAL
119 * would have to be enabled.
120 * WANT_PAGE_VIRTUAL is never enabled on ppc32/ppc64,
121 * HASHED_PAGE_VIRTUAL could be enabled for ppc32 only and only
122 * if CONFIG_HIGHMEM is defined. As CONFIG_SPARSEMEM_VMEMMAP
123 * is not expected to be enabled on ppc32, page_address()
124 * is safe for ppc32 as well.
125 *
126 * WARNING: This will be called in real-mode on HV KVM and virtual
127 * mode on PR KVM
128 */
129static u64 *kvmppc_page_address(struct page *page)
130{
131#if defined(HASHED_PAGE_VIRTUAL) || defined(WANT_PAGE_VIRTUAL)
132#error TODO: fix to avoid page_address() here
133#endif
134 return (u64 *) page_address(page);
135}
136
137/*
138 * Handles TCE requests for emulated devices.
139 * Puts guest TCE values to the table and expects user space to convert them.
140 * Called in both real and virtual modes.
141 * Cannot fail so kvmppc_tce_validate must be called before it.
142 *
143 * WARNING: This will be called in real-mode on HV KVM and virtual
144 * mode on PR KVM
145 */
146void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
147 unsigned long idx, unsigned long tce)
148{
149 struct page *page;
150 u64 *tbl;
151
152 idx -= stt->offset;
153 page = stt->pages[idx / TCES_PER_PAGE];
154 tbl = kvmppc_page_address(page);
155
156 tbl[idx % TCES_PER_PAGE] = tce;
157}
158EXPORT_SYMBOL_GPL(kvmppc_tce_put);
159
160long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
161 unsigned long *ua, unsigned long **prmap)
162{
163 unsigned long gfn = gpa >> PAGE_SHIFT;
164 struct kvm_memory_slot *memslot;
165
166 memslot = search_memslots(kvm_memslots(kvm), gfn);
167 if (!memslot)
168 return -EINVAL;
169
170 *ua = __gfn_to_hva_memslot(memslot, gfn) |
171 (gpa & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
172
173#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
174 if (prmap)
175 *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
176#endif
177
178 return 0;
179}
180EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
181
182#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
183long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
184 unsigned long ioba, unsigned long tce)
185{
186 struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
187 long ret;
188
50 /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ 189 /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
51 /* liobn, ioba, tce); */ 190 /* liobn, ioba, tce); */
52 191
53 list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { 192 if (!stt)
54 if (stt->liobn == liobn) { 193 return H_TOO_HARD;
55 unsigned long idx = ioba >> SPAPR_TCE_SHIFT; 194
56 struct page *page; 195 ret = kvmppc_ioba_validate(stt, ioba, 1);
57 u64 *tbl; 196 if (ret != H_SUCCESS)
58 197 return ret;
59 /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p window_size=0x%x\n", */
60 /* liobn, stt, stt->window_size); */
61 if (ioba >= stt->window_size)
62 return H_PARAMETER;
63
64 page = stt->pages[idx / TCES_PER_PAGE];
65 tbl = (u64 *)page_address(page);
66
67 /* FIXME: Need to validate the TCE itself */
68 /* udbg_printf("tce @ %p\n", &tbl[idx % TCES_PER_PAGE]); */
69 tbl[idx % TCES_PER_PAGE] = tce;
70 return H_SUCCESS;
71 }
72 }
73 198
74 /* Didn't find the liobn, punt it to userspace */ 199 ret = kvmppc_tce_validate(stt, tce);
75 return H_TOO_HARD; 200 if (ret != H_SUCCESS)
201 return ret;
202
203 kvmppc_tce_put(stt, ioba >> stt->page_shift, tce);
204
205 return H_SUCCESS;
76} 206}
77EXPORT_SYMBOL_GPL(kvmppc_h_put_tce); 207EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
78 208
79long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 209static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu,
80 unsigned long ioba) 210 unsigned long ua, unsigned long *phpa)
211{
212 pte_t *ptep, pte;
213 unsigned shift = 0;
214
215 ptep = __find_linux_pte_or_hugepte(vcpu->arch.pgdir, ua, NULL, &shift);
216 if (!ptep || !pte_present(*ptep))
217 return -ENXIO;
218 pte = *ptep;
219
220 if (!shift)
221 shift = PAGE_SHIFT;
222
223 /* Avoid handling anything potentially complicated in realmode */
224 if (shift > PAGE_SHIFT)
225 return -EAGAIN;
226
227 if (!pte_young(pte))
228 return -EAGAIN;
229
230 *phpa = (pte_pfn(pte) << PAGE_SHIFT) | (ua & ((1ULL << shift) - 1)) |
231 (ua & ~PAGE_MASK);
232
233 return 0;
234}
235
236long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
237 unsigned long liobn, unsigned long ioba,
238 unsigned long tce_list, unsigned long npages)
81{ 239{
82 struct kvm *kvm = vcpu->kvm;
83 struct kvmppc_spapr_tce_table *stt; 240 struct kvmppc_spapr_tce_table *stt;
241 long i, ret = H_SUCCESS;
242 unsigned long tces, entry, ua = 0;
243 unsigned long *rmap = NULL;
244
245 stt = kvmppc_find_table(vcpu, liobn);
246 if (!stt)
247 return H_TOO_HARD;
248
249 entry = ioba >> stt->page_shift;
250 /*
251 * The spec says that the maximum size of the list is 512 TCEs
252 * so the whole table addressed resides in 4K page
253 */
254 if (npages > 512)
255 return H_PARAMETER;
256
257 if (tce_list & (SZ_4K - 1))
258 return H_PARAMETER;
259
260 ret = kvmppc_ioba_validate(stt, ioba, npages);
261 if (ret != H_SUCCESS)
262 return ret;
84 263
85 list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { 264 if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
86 if (stt->liobn == liobn) { 265 return H_TOO_HARD;
87 unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
88 struct page *page;
89 u64 *tbl;
90 266
91 if (ioba >= stt->window_size) 267 rmap = (void *) vmalloc_to_phys(rmap);
92 return H_PARAMETER;
93 268
94 page = stt->pages[idx / TCES_PER_PAGE]; 269 /*
95 tbl = (u64 *)page_address(page); 270 * Synchronize with the MMU notifier callbacks in
271 * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.).
272 * While we have the rmap lock, code running on other CPUs
273 * cannot finish unmapping the host real page that backs
274 * this guest real page, so we are OK to access the host
275 * real page.
276 */
277 lock_rmap(rmap);
278 if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) {
279 ret = H_TOO_HARD;
280 goto unlock_exit;
281 }
282
283 for (i = 0; i < npages; ++i) {
284 unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
285
286 ret = kvmppc_tce_validate(stt, tce);
287 if (ret != H_SUCCESS)
288 goto unlock_exit;
96 289
97 vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE]; 290 kvmppc_tce_put(stt, entry + i, tce);
98 return H_SUCCESS;
99 }
100 } 291 }
101 292
102 /* Didn't find the liobn, punt it to userspace */ 293unlock_exit:
103 return H_TOO_HARD; 294 unlock_rmap(rmap);
295
296 return ret;
297}
298
299long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
300 unsigned long liobn, unsigned long ioba,
301 unsigned long tce_value, unsigned long npages)
302{
303 struct kvmppc_spapr_tce_table *stt;
304 long i, ret;
305
306 stt = kvmppc_find_table(vcpu, liobn);
307 if (!stt)
308 return H_TOO_HARD;
309
310 ret = kvmppc_ioba_validate(stt, ioba, npages);
311 if (ret != H_SUCCESS)
312 return ret;
313
314 /* Check permission bits only to allow userspace poison TCE for debug */
315 if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
316 return H_PARAMETER;
317
318 for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
319 kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
320
321 return H_SUCCESS;
322}
323EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);
324
325long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
326 unsigned long ioba)
327{
328 struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
329 long ret;
330 unsigned long idx;
331 struct page *page;
332 u64 *tbl;
333
334 if (!stt)
335 return H_TOO_HARD;
336
337 ret = kvmppc_ioba_validate(stt, ioba, 1);
338 if (ret != H_SUCCESS)
339 return ret;
340
341 idx = (ioba >> stt->page_shift) - stt->offset;
342 page = stt->pages[idx / TCES_PER_PAGE];
343 tbl = (u64 *)page_address(page);
344
345 vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE];
346
347 return H_SUCCESS;
104} 348}
105EXPORT_SYMBOL_GPL(kvmppc_h_get_tce); 349EXPORT_SYMBOL_GPL(kvmppc_h_get_tce);
350
351#endif /* KVM_BOOK3S_HV_POSSIBLE */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index f1187bb6dd4d..84fb4fcfaa41 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -81,6 +81,17 @@ static int target_smt_mode;
81module_param(target_smt_mode, int, S_IRUGO | S_IWUSR); 81module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
82MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)"); 82MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
83 83
84#ifdef CONFIG_KVM_XICS
85static struct kernel_param_ops module_param_ops = {
86 .set = param_set_int,
87 .get = param_get_int,
88};
89
90module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
91 S_IRUGO | S_IWUSR);
92MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
93#endif
94
84static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 95static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
85static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 96static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
86 97
@@ -768,7 +779,31 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
768 if (kvmppc_xics_enabled(vcpu)) { 779 if (kvmppc_xics_enabled(vcpu)) {
769 ret = kvmppc_xics_hcall(vcpu, req); 780 ret = kvmppc_xics_hcall(vcpu, req);
770 break; 781 break;
771 } /* fallthrough */ 782 }
783 return RESUME_HOST;
784 case H_PUT_TCE:
785 ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
786 kvmppc_get_gpr(vcpu, 5),
787 kvmppc_get_gpr(vcpu, 6));
788 if (ret == H_TOO_HARD)
789 return RESUME_HOST;
790 break;
791 case H_PUT_TCE_INDIRECT:
792 ret = kvmppc_h_put_tce_indirect(vcpu, kvmppc_get_gpr(vcpu, 4),
793 kvmppc_get_gpr(vcpu, 5),
794 kvmppc_get_gpr(vcpu, 6),
795 kvmppc_get_gpr(vcpu, 7));
796 if (ret == H_TOO_HARD)
797 return RESUME_HOST;
798 break;
799 case H_STUFF_TCE:
800 ret = kvmppc_h_stuff_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
801 kvmppc_get_gpr(vcpu, 5),
802 kvmppc_get_gpr(vcpu, 6),
803 kvmppc_get_gpr(vcpu, 7));
804 if (ret == H_TOO_HARD)
805 return RESUME_HOST;
806 break;
772 default: 807 default:
773 return RESUME_HOST; 808 return RESUME_HOST;
774 } 809 }
@@ -2279,6 +2314,46 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
2279} 2314}
2280 2315
2281/* 2316/*
2317 * Clear core from the list of active host cores as we are about to
2318 * enter the guest. Only do this if it is the primary thread of the
2319 * core (not if a subcore) that is entering the guest.
2320 */
2321static inline void kvmppc_clear_host_core(int cpu)
2322{
2323 int core;
2324
2325 if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
2326 return;
2327 /*
2328 * Memory barrier can be omitted here as we will do a smp_wmb()
2329 * later in kvmppc_start_thread and we need ensure that state is
2330 * visible to other CPUs only after we enter guest.
2331 */
2332 core = cpu >> threads_shift;
2333 kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0;
2334}
2335
2336/*
2337 * Advertise this core as an active host core since we exited the guest
2338 * Only need to do this if it is the primary thread of the core that is
2339 * exiting.
2340 */
2341static inline void kvmppc_set_host_core(int cpu)
2342{
2343 int core;
2344
2345 if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
2346 return;
2347
2348 /*
2349 * Memory barrier can be omitted here because we do a spin_unlock
2350 * immediately after this which provides the memory barrier.
2351 */
2352 core = cpu >> threads_shift;
2353 kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1;
2354}
2355
2356/*
2282 * Run a set of guest threads on a physical core. 2357 * Run a set of guest threads on a physical core.
2283 * Called with vc->lock held. 2358 * Called with vc->lock held.
2284 */ 2359 */
@@ -2390,6 +2465,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2390 } 2465 }
2391 } 2466 }
2392 2467
2468 kvmppc_clear_host_core(pcpu);
2469
2393 /* Start all the threads */ 2470 /* Start all the threads */
2394 active = 0; 2471 active = 0;
2395 for (sub = 0; sub < core_info.n_subcores; ++sub) { 2472 for (sub = 0; sub < core_info.n_subcores; ++sub) {
@@ -2486,6 +2563,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2486 kvmppc_ipi_thread(pcpu + i); 2563 kvmppc_ipi_thread(pcpu + i);
2487 } 2564 }
2488 2565
2566 kvmppc_set_host_core(pcpu);
2567
2489 spin_unlock(&vc->lock); 2568 spin_unlock(&vc->lock);
2490 2569
2491 /* make sure updates to secondary vcpu structs are visible now */ 2570 /* make sure updates to secondary vcpu structs are visible now */
@@ -2983,6 +3062,114 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
2983 goto out_srcu; 3062 goto out_srcu;
2984} 3063}
2985 3064
3065#ifdef CONFIG_KVM_XICS
3066static int kvmppc_cpu_notify(struct notifier_block *self, unsigned long action,
3067 void *hcpu)
3068{
3069 unsigned long cpu = (long)hcpu;
3070
3071 switch (action) {
3072 case CPU_UP_PREPARE:
3073 case CPU_UP_PREPARE_FROZEN:
3074 kvmppc_set_host_core(cpu);
3075 break;
3076
3077#ifdef CONFIG_HOTPLUG_CPU
3078 case CPU_DEAD:
3079 case CPU_DEAD_FROZEN:
3080 case CPU_UP_CANCELED:
3081 case CPU_UP_CANCELED_FROZEN:
3082 kvmppc_clear_host_core(cpu);
3083 break;
3084#endif
3085 default:
3086 break;
3087 }
3088
3089 return NOTIFY_OK;
3090}
3091
3092static struct notifier_block kvmppc_cpu_notifier = {
3093 .notifier_call = kvmppc_cpu_notify,
3094};
3095
3096/*
3097 * Allocate a per-core structure for managing state about which cores are
3098 * running in the host versus the guest and for exchanging data between
3099 * real mode KVM and CPU running in the host.
3100 * This is only done for the first VM.
3101 * The allocated structure stays even if all VMs have stopped.
3102 * It is only freed when the kvm-hv module is unloaded.
3103 * It's OK for this routine to fail, we just don't support host
3104 * core operations like redirecting H_IPI wakeups.
3105 */
3106void kvmppc_alloc_host_rm_ops(void)
3107{
3108 struct kvmppc_host_rm_ops *ops;
3109 unsigned long l_ops;
3110 int cpu, core;
3111 int size;
3112
3113 /* Not the first time here ? */
3114 if (kvmppc_host_rm_ops_hv != NULL)
3115 return;
3116
3117 ops = kzalloc(sizeof(struct kvmppc_host_rm_ops), GFP_KERNEL);
3118 if (!ops)
3119 return;
3120
3121 size = cpu_nr_cores() * sizeof(struct kvmppc_host_rm_core);
3122 ops->rm_core = kzalloc(size, GFP_KERNEL);
3123
3124 if (!ops->rm_core) {
3125 kfree(ops);
3126 return;
3127 }
3128
3129 get_online_cpus();
3130
3131 for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
3132 if (!cpu_online(cpu))
3133 continue;
3134
3135 core = cpu >> threads_shift;
3136 ops->rm_core[core].rm_state.in_host = 1;
3137 }
3138
3139 ops->vcpu_kick = kvmppc_fast_vcpu_kick_hv;
3140
3141 /*
3142 * Make the contents of the kvmppc_host_rm_ops structure visible
3143 * to other CPUs before we assign it to the global variable.
3144 * Do an atomic assignment (no locks used here), but if someone
3145 * beats us to it, just free our copy and return.
3146 */
3147 smp_wmb();
3148 l_ops = (unsigned long) ops;
3149
3150 if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
3151 put_online_cpus();
3152 kfree(ops->rm_core);
3153 kfree(ops);
3154 return;
3155 }
3156
3157 register_cpu_notifier(&kvmppc_cpu_notifier);
3158
3159 put_online_cpus();
3160}
3161
3162void kvmppc_free_host_rm_ops(void)
3163{
3164 if (kvmppc_host_rm_ops_hv) {
3165 unregister_cpu_notifier(&kvmppc_cpu_notifier);
3166 kfree(kvmppc_host_rm_ops_hv->rm_core);
3167 kfree(kvmppc_host_rm_ops_hv);
3168 kvmppc_host_rm_ops_hv = NULL;
3169 }
3170}
3171#endif
3172
2986static int kvmppc_core_init_vm_hv(struct kvm *kvm) 3173static int kvmppc_core_init_vm_hv(struct kvm *kvm)
2987{ 3174{
2988 unsigned long lpcr, lpid; 3175 unsigned long lpcr, lpid;
@@ -2995,6 +3182,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
2995 return -ENOMEM; 3182 return -ENOMEM;
2996 kvm->arch.lpid = lpid; 3183 kvm->arch.lpid = lpid;
2997 3184
3185 kvmppc_alloc_host_rm_ops();
3186
2998 /* 3187 /*
2999 * Since we don't flush the TLB when tearing down a VM, 3188 * Since we don't flush the TLB when tearing down a VM,
3000 * and this lpid might have previously been used, 3189 * and this lpid might have previously been used,
@@ -3228,6 +3417,7 @@ static int kvmppc_book3s_init_hv(void)
3228 3417
3229static void kvmppc_book3s_exit_hv(void) 3418static void kvmppc_book3s_exit_hv(void)
3230{ 3419{
3420 kvmppc_free_host_rm_ops();
3231 kvmppc_hv_ops = NULL; 3421 kvmppc_hv_ops = NULL;
3232} 3422}
3233 3423
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index fd7006bf6b1a..5f0380db3eab 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -283,3 +283,6 @@ void kvmhv_commence_exit(int trap)
283 kvmhv_interrupt_vcore(vc, ee); 283 kvmhv_interrupt_vcore(vc, ee);
284 } 284 }
285} 285}
286
287struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
288EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 24f58076d49e..980d8a6f7284 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -17,12 +17,16 @@
17#include <asm/xics.h> 17#include <asm/xics.h>
18#include <asm/debug.h> 18#include <asm/debug.h>
19#include <asm/synch.h> 19#include <asm/synch.h>
20#include <asm/cputhreads.h>
20#include <asm/ppc-opcode.h> 21#include <asm/ppc-opcode.h>
21 22
22#include "book3s_xics.h" 23#include "book3s_xics.h"
23 24
24#define DEBUG_PASSUP 25#define DEBUG_PASSUP
25 26
27int h_ipi_redirect = 1;
28EXPORT_SYMBOL(h_ipi_redirect);
29
26static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, 30static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
27 u32 new_irq); 31 u32 new_irq);
28 32
@@ -50,11 +54,84 @@ static void ics_rm_check_resend(struct kvmppc_xics *xics,
50 54
51/* -- ICP routines -- */ 55/* -- ICP routines -- */
52 56
57#ifdef CONFIG_SMP
58static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
59{
60 int hcpu;
61
62 hcpu = hcore << threads_shift;
63 kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
64 smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
65 icp_native_cause_ipi_rm(hcpu);
66}
67#else
68static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { }
69#endif
70
71/*
72 * We start the search from our current CPU Id in the core map
73 * and go in a circle until we get back to our ID looking for a
74 * core that is running in host context and that hasn't already
75 * been targeted for another rm_host_ops.
76 *
77 * In the future, could consider using a fairer algorithm (one
78 * that distributes the IPIs better)
79 *
80 * Returns -1, if no CPU could be found in the host
81 * Else, returns a CPU Id which has been reserved for use
82 */
83static inline int grab_next_hostcore(int start,
84 struct kvmppc_host_rm_core *rm_core, int max, int action)
85{
86 bool success;
87 int core;
88 union kvmppc_rm_state old, new;
89
90 for (core = start + 1; core < max; core++) {
91 old = new = READ_ONCE(rm_core[core].rm_state);
92
93 if (!old.in_host || old.rm_action)
94 continue;
95
96 /* Try to grab this host core if not taken already. */
97 new.rm_action = action;
98
99 success = cmpxchg64(&rm_core[core].rm_state.raw,
100 old.raw, new.raw) == old.raw;
101 if (success) {
102 /*
103 * Make sure that the store to the rm_action is made
104 * visible before we return to caller (and the
105 * subsequent store to rm_data) to synchronize with
106 * the IPI handler.
107 */
108 smp_wmb();
109 return core;
110 }
111 }
112
113 return -1;
114}
115
116static inline int find_available_hostcore(int action)
117{
118 int core;
119 int my_core = smp_processor_id() >> threads_shift;
120 struct kvmppc_host_rm_core *rm_core = kvmppc_host_rm_ops_hv->rm_core;
121
122 core = grab_next_hostcore(my_core, rm_core, cpu_nr_cores(), action);
123 if (core == -1)
124 core = grab_next_hostcore(core, rm_core, my_core, action);
125
126 return core;
127}
128
53static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, 129static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
54 struct kvm_vcpu *this_vcpu) 130 struct kvm_vcpu *this_vcpu)
55{ 131{
56 struct kvmppc_icp *this_icp = this_vcpu->arch.icp; 132 struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
57 int cpu; 133 int cpu;
134 int hcore;
58 135
59 /* Mark the target VCPU as having an interrupt pending */ 136 /* Mark the target VCPU as having an interrupt pending */
60 vcpu->stat.queue_intr++; 137 vcpu->stat.queue_intr++;
@@ -66,11 +143,22 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
66 return; 143 return;
67 } 144 }
68 145
69 /* Check if the core is loaded, if not, too hard */ 146 /*
147 * Check if the core is loaded,
148 * if not, find an available host core to post to wake the VCPU,
149 * if we can't find one, set up state to eventually return too hard.
150 */
70 cpu = vcpu->arch.thread_cpu; 151 cpu = vcpu->arch.thread_cpu;
71 if (cpu < 0 || cpu >= nr_cpu_ids) { 152 if (cpu < 0 || cpu >= nr_cpu_ids) {
72 this_icp->rm_action |= XICS_RM_KICK_VCPU; 153 hcore = -1;
73 this_icp->rm_kick_target = vcpu; 154 if (kvmppc_host_rm_ops_hv && h_ipi_redirect)
155 hcore = find_available_hostcore(XICS_RM_KICK_VCPU);
156 if (hcore != -1) {
157 icp_send_hcore_msg(hcore, vcpu);
158 } else {
159 this_icp->rm_action |= XICS_RM_KICK_VCPU;
160 this_icp->rm_kick_target = vcpu;
161 }
74 return; 162 return;
75 } 163 }
76 164
@@ -623,3 +711,40 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
623 bail: 711 bail:
624 return check_too_hard(xics, icp); 712 return check_too_hard(xics, icp);
625} 713}
714
715/* --- Non-real mode XICS-related built-in routines --- */
716
717/**
718 * Host Operations poked by RM KVM
719 */
720static void rm_host_ipi_action(int action, void *data)
721{
722 switch (action) {
723 case XICS_RM_KICK_VCPU:
724 kvmppc_host_rm_ops_hv->vcpu_kick(data);
725 break;
726 default:
727 WARN(1, "Unexpected rm_action=%d data=%p\n", action, data);
728 break;
729 }
730
731}
732
733void kvmppc_xics_ipi_action(void)
734{
735 int core;
736 unsigned int cpu = smp_processor_id();
737 struct kvmppc_host_rm_core *rm_corep;
738
739 core = cpu >> threads_shift;
740 rm_corep = &kvmppc_host_rm_ops_hv->rm_core[core];
741
742 if (rm_corep->rm_data) {
743 rm_host_ipi_action(rm_corep->rm_state.rm_action,
744 rm_corep->rm_data);
745 /* Order these stores against the real mode KVM */
746 rm_corep->rm_data = NULL;
747 smp_wmb();
748 rm_corep->rm_state.rm_action = 0;
749 }
750}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 25ae2c9913c3..85b32f16fa74 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2020,8 +2020,8 @@ hcall_real_table:
2020 .long 0 /* 0x12c */ 2020 .long 0 /* 0x12c */
2021 .long 0 /* 0x130 */ 2021 .long 0 /* 0x130 */
2022 .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table 2022 .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
2023 .long 0 /* 0x138 */ 2023 .long DOTSYM(kvmppc_h_stuff_tce) - hcall_real_table
2024 .long 0 /* 0x13c */ 2024 .long DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table
2025 .long 0 /* 0x140 */ 2025 .long 0 /* 0x140 */
2026 .long 0 /* 0x144 */ 2026 .long 0 /* 0x144 */
2027 .long 0 /* 0x148 */ 2027 .long 0 /* 0x148 */
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index f2c75a1e0536..02176fd52f84 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -280,6 +280,37 @@ static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
280 return EMULATE_DONE; 280 return EMULATE_DONE;
281} 281}
282 282
283static int kvmppc_h_pr_put_tce_indirect(struct kvm_vcpu *vcpu)
284{
285 unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
286 unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
287 unsigned long tce = kvmppc_get_gpr(vcpu, 6);
288 unsigned long npages = kvmppc_get_gpr(vcpu, 7);
289 long rc;
290
291 rc = kvmppc_h_put_tce_indirect(vcpu, liobn, ioba,
292 tce, npages);
293 if (rc == H_TOO_HARD)
294 return EMULATE_FAIL;
295 kvmppc_set_gpr(vcpu, 3, rc);
296 return EMULATE_DONE;
297}
298
299static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu)
300{
301 unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
302 unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
303 unsigned long tce_value = kvmppc_get_gpr(vcpu, 6);
304 unsigned long npages = kvmppc_get_gpr(vcpu, 7);
305 long rc;
306
307 rc = kvmppc_h_stuff_tce(vcpu, liobn, ioba, tce_value, npages);
308 if (rc == H_TOO_HARD)
309 return EMULATE_FAIL;
310 kvmppc_set_gpr(vcpu, 3, rc);
311 return EMULATE_DONE;
312}
313
283static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) 314static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
284{ 315{
285 long rc = kvmppc_xics_hcall(vcpu, cmd); 316 long rc = kvmppc_xics_hcall(vcpu, cmd);
@@ -306,6 +337,10 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
306 return kvmppc_h_pr_bulk_remove(vcpu); 337 return kvmppc_h_pr_bulk_remove(vcpu);
307 case H_PUT_TCE: 338 case H_PUT_TCE:
308 return kvmppc_h_pr_put_tce(vcpu); 339 return kvmppc_h_pr_put_tce(vcpu);
340 case H_PUT_TCE_INDIRECT:
341 return kvmppc_h_pr_put_tce_indirect(vcpu);
342 case H_STUFF_TCE:
343 return kvmppc_h_pr_stuff_tce(vcpu);
309 case H_CEDE: 344 case H_CEDE:
310 kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE); 345 kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
311 kvm_vcpu_block(vcpu); 346 kvm_vcpu_block(vcpu);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index a3b182dcb823..19aa59b0850c 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -33,6 +33,7 @@
33#include <asm/tlbflush.h> 33#include <asm/tlbflush.h>
34#include <asm/cputhreads.h> 34#include <asm/cputhreads.h>
35#include <asm/irqflags.h> 35#include <asm/irqflags.h>
36#include <asm/iommu.h>
36#include "timing.h" 37#include "timing.h"
37#include "irq.h" 38#include "irq.h"
38#include "../mm/mmu_decl.h" 39#include "../mm/mmu_decl.h"
@@ -437,6 +438,16 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
437 unsigned int i; 438 unsigned int i;
438 struct kvm_vcpu *vcpu; 439 struct kvm_vcpu *vcpu;
439 440
441#ifdef CONFIG_KVM_XICS
442 /*
443 * We call kick_all_cpus_sync() to ensure that all
444 * CPUs have executed any pending IPIs before we
445 * continue and free VCPUs structures below.
446 */
447 if (is_kvmppc_hv_enabled(kvm))
448 kick_all_cpus_sync();
449#endif
450
440 kvm_for_each_vcpu(i, vcpu, kvm) 451 kvm_for_each_vcpu(i, vcpu, kvm)
441 kvm_arch_vcpu_free(vcpu); 452 kvm_arch_vcpu_free(vcpu);
442 453
@@ -509,6 +520,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
509 520
510#ifdef CONFIG_PPC_BOOK3S_64 521#ifdef CONFIG_PPC_BOOK3S_64
511 case KVM_CAP_SPAPR_TCE: 522 case KVM_CAP_SPAPR_TCE:
523 case KVM_CAP_SPAPR_TCE_64:
512 case KVM_CAP_PPC_ALLOC_HTAB: 524 case KVM_CAP_PPC_ALLOC_HTAB:
513 case KVM_CAP_PPC_RTAS: 525 case KVM_CAP_PPC_RTAS:
514 case KVM_CAP_PPC_FIXUP_HCALL: 526 case KVM_CAP_PPC_FIXUP_HCALL:
@@ -569,6 +581,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
569 case KVM_CAP_PPC_GET_SMMU_INFO: 581 case KVM_CAP_PPC_GET_SMMU_INFO:
570 r = 1; 582 r = 1;
571 break; 583 break;
584 case KVM_CAP_SPAPR_MULTITCE:
585 r = 1;
586 break;
572#endif 587#endif
573 default: 588 default:
574 r = 0; 589 r = 0;
@@ -1331,13 +1346,34 @@ long kvm_arch_vm_ioctl(struct file *filp,
1331 break; 1346 break;
1332 } 1347 }
1333#ifdef CONFIG_PPC_BOOK3S_64 1348#ifdef CONFIG_PPC_BOOK3S_64
1349 case KVM_CREATE_SPAPR_TCE_64: {
1350 struct kvm_create_spapr_tce_64 create_tce_64;
1351
1352 r = -EFAULT;
1353 if (copy_from_user(&create_tce_64, argp, sizeof(create_tce_64)))
1354 goto out;
1355 if (create_tce_64.flags) {
1356 r = -EINVAL;
1357 goto out;
1358 }
1359 r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
1360 goto out;
1361 }
1334 case KVM_CREATE_SPAPR_TCE: { 1362 case KVM_CREATE_SPAPR_TCE: {
1335 struct kvm_create_spapr_tce create_tce; 1363 struct kvm_create_spapr_tce create_tce;
1364 struct kvm_create_spapr_tce_64 create_tce_64;
1336 1365
1337 r = -EFAULT; 1366 r = -EFAULT;
1338 if (copy_from_user(&create_tce, argp, sizeof(create_tce))) 1367 if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
1339 goto out; 1368 goto out;
1340 r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce); 1369
1370 create_tce_64.liobn = create_tce.liobn;
1371 create_tce_64.page_shift = IOMMU_PAGE_SHIFT_4K;
1372 create_tce_64.offset = 0;
1373 create_tce_64.size = create_tce.window_size >>
1374 IOMMU_PAGE_SHIFT_4K;
1375 create_tce_64.flags = 0;
1376 r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
1341 goto out; 1377 goto out;
1342 } 1378 }
1343 case KVM_PPC_GET_SMMU_INFO: { 1379 case KVM_PPC_GET_SMMU_INFO: {
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 83dfd7925c72..de37ff445362 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -243,3 +243,11 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
243} 243}
244#endif /* CONFIG_DEBUG_VM */ 244#endif /* CONFIG_DEBUG_VM */
245 245
246unsigned long vmalloc_to_phys(void *va)
247{
248 unsigned long pfn = vmalloc_to_pfn(va);
249
250 BUG_ON(!pfn);
251 return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va);
252}
253EXPORT_SYMBOL_GPL(vmalloc_to_phys);
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 9f9dfda9ed2c..3b09ecfd0aee 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -493,14 +493,6 @@ static size_t event_to_attr_ct(struct hv_24x7_event_data *event)
493 } 493 }
494} 494}
495 495
496static unsigned long vmalloc_to_phys(void *v)
497{
498 struct page *p = vmalloc_to_page(v);
499
500 BUG_ON(!p);
501 return page_to_phys(p) + offset_in_page(v);
502}
503
504/* */ 496/* */
505struct event_uniq { 497struct event_uniq {
506 struct rb_node node; 498 struct rb_node node;
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index eae32654bdf2..afdf62f2a695 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -159,6 +159,27 @@ static void icp_native_cause_ipi(int cpu, unsigned long data)
159 icp_native_set_qirr(cpu, IPI_PRIORITY); 159 icp_native_set_qirr(cpu, IPI_PRIORITY);
160} 160}
161 161
162#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
163void icp_native_cause_ipi_rm(int cpu)
164{
165 /*
166 * Currently not used to send IPIs to another CPU
167 * on the same core. Only caller is KVM real mode.
168 * Need the physical address of the XICS to be
169 * previously saved in kvm_hstate in the paca.
170 */
171 unsigned long xics_phys;
172
173 /*
174 * Just like the cause_ipi functions, it is required to
175 * include a full barrier (out8 includes a sync) before
176 * causing the IPI.
177 */
178 xics_phys = paca[cpu].kvm_hstate.xics_phys;
179 out_rm8((u8 *)(xics_phys + XICS_MFRR), IPI_PRIORITY);
180}
181#endif
182
162/* 183/*
163 * Called when an interrupt is received on an off-line CPU to 184 * Called when an interrupt is received on an off-line CPU to
164 * clear the interrupt, so that the CPU can go back to nap mode. 185 * clear the interrupt, so that the CPU can go back to nap mode.
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index b0c8ad0799c7..6da41fab70fb 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -20,6 +20,7 @@
20#include <linux/kvm_types.h> 20#include <linux/kvm_types.h>
21#include <linux/kvm_host.h> 21#include <linux/kvm_host.h>
22#include <linux/kvm.h> 22#include <linux/kvm.h>
23#include <linux/seqlock.h>
23#include <asm/debug.h> 24#include <asm/debug.h>
24#include <asm/cpu.h> 25#include <asm/cpu.h>
25#include <asm/fpu/api.h> 26#include <asm/fpu/api.h>
@@ -229,17 +230,11 @@ struct kvm_s390_itdb {
229 __u8 data[256]; 230 __u8 data[256];
230} __packed; 231} __packed;
231 232
232struct kvm_s390_vregs {
233 __vector128 vrs[32];
234 __u8 reserved200[512]; /* for future vector expansion */
235} __packed;
236
237struct sie_page { 233struct sie_page {
238 struct kvm_s390_sie_block sie_block; 234 struct kvm_s390_sie_block sie_block;
239 __u8 reserved200[1024]; /* 0x0200 */ 235 __u8 reserved200[1024]; /* 0x0200 */
240 struct kvm_s390_itdb itdb; /* 0x0600 */ 236 struct kvm_s390_itdb itdb; /* 0x0600 */
241 __u8 reserved700[1280]; /* 0x0700 */ 237 __u8 reserved700[2304]; /* 0x0700 */
242 struct kvm_s390_vregs vregs; /* 0x0c00 */
243} __packed; 238} __packed;
244 239
245struct kvm_vcpu_stat { 240struct kvm_vcpu_stat {
@@ -558,6 +553,15 @@ struct kvm_vcpu_arch {
558 unsigned long pfault_token; 553 unsigned long pfault_token;
559 unsigned long pfault_select; 554 unsigned long pfault_select;
560 unsigned long pfault_compare; 555 unsigned long pfault_compare;
556 bool cputm_enabled;
557 /*
558 * The seqcount protects updates to cputm_start and sie_block.cputm,
559 * this way we can have non-blocking reads with consistent values.
560 * Only the owning VCPU thread (vcpu->cpu) is allowed to change these
561 * values and to start/stop/enable/disable cpu timer accounting.
562 */
563 seqcount_t cputm_seqcount;
564 __u64 cputm_start;
561}; 565};
562 566
563struct kvm_vm_stat { 567struct kvm_vm_stat {
@@ -596,15 +600,11 @@ struct s390_io_adapter {
596#define S390_ARCH_FAC_MASK_SIZE_U64 \ 600#define S390_ARCH_FAC_MASK_SIZE_U64 \
597 (S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64)) 601 (S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64))
598 602
599struct kvm_s390_fac {
600 /* facility list requested by guest */
601 __u64 list[S390_ARCH_FAC_LIST_SIZE_U64];
602 /* facility mask supported by kvm & hosting machine */
603 __u64 mask[S390_ARCH_FAC_LIST_SIZE_U64];
604};
605
606struct kvm_s390_cpu_model { 603struct kvm_s390_cpu_model {
607 struct kvm_s390_fac *fac; 604 /* facility mask supported by kvm & hosting machine */
605 __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64];
606 /* facility list requested by guest (in dma page) */
607 __u64 *fac_list;
608 struct cpuid cpu_id; 608 struct cpuid cpu_id;
609 unsigned short ibc; 609 unsigned short ibc;
610}; 610};
@@ -623,6 +623,16 @@ struct kvm_s390_crypto_cb {
623 __u8 reserved80[128]; /* 0x0080 */ 623 __u8 reserved80[128]; /* 0x0080 */
624}; 624};
625 625
626/*
627 * sie_page2 has to be allocated as DMA because fac_list and crycb need
628 * 31bit addresses in the sie control block.
629 */
630struct sie_page2 {
631 __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */
632 struct kvm_s390_crypto_cb crycb; /* 0x0800 */
633 u8 reserved900[0x1000 - 0x900]; /* 0x0900 */
634} __packed;
635
626struct kvm_arch{ 636struct kvm_arch{
627 void *sca; 637 void *sca;
628 int use_esca; 638 int use_esca;
@@ -643,6 +653,7 @@ struct kvm_arch{
643 int ipte_lock_count; 653 int ipte_lock_count;
644 struct mutex ipte_mutex; 654 struct mutex ipte_mutex;
645 spinlock_t start_stop_lock; 655 spinlock_t start_stop_lock;
656 struct sie_page2 *sie_page2;
646 struct kvm_s390_cpu_model model; 657 struct kvm_s390_cpu_model model;
647 struct kvm_s390_crypto crypto; 658 struct kvm_s390_crypto crypto;
648 u64 epoch; 659 u64 epoch;
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index fe84bd5fe7ce..347fe5afa419 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -154,6 +154,7 @@ struct kvm_guest_debug_arch {
154#define KVM_SYNC_PFAULT (1UL << 5) 154#define KVM_SYNC_PFAULT (1UL << 5)
155#define KVM_SYNC_VRS (1UL << 6) 155#define KVM_SYNC_VRS (1UL << 6)
156#define KVM_SYNC_RICCB (1UL << 7) 156#define KVM_SYNC_RICCB (1UL << 7)
157#define KVM_SYNC_FPRS (1UL << 8)
157/* definition of registers in kvm_run */ 158/* definition of registers in kvm_run */
158struct kvm_sync_regs { 159struct kvm_sync_regs {
159 __u64 prefix; /* prefix register */ 160 __u64 prefix; /* prefix register */
@@ -168,9 +169,12 @@ struct kvm_sync_regs {
168 __u64 pft; /* pfault token [PFAULT] */ 169 __u64 pft; /* pfault token [PFAULT] */
169 __u64 pfs; /* pfault select [PFAULT] */ 170 __u64 pfs; /* pfault select [PFAULT] */
170 __u64 pfc; /* pfault compare [PFAULT] */ 171 __u64 pfc; /* pfault compare [PFAULT] */
171 __u64 vrs[32][2]; /* vector registers */ 172 union {
173 __u64 vrs[32][2]; /* vector registers (KVM_SYNC_VRS) */
174 __u64 fprs[16]; /* fp registers (KVM_SYNC_FPRS) */
175 };
172 __u8 reserved[512]; /* for future vector expansion */ 176 __u8 reserved[512]; /* for future vector expansion */
173 __u32 fpc; /* only valid with vector registers */ 177 __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */
174 __u8 padding[52]; /* riccb needs to be 64byte aligned */ 178 __u8 padding[52]; /* riccb needs to be 64byte aligned */
175 __u8 riccb[64]; /* runtime instrumentation controls block */ 179 __u8 riccb[64]; /* runtime instrumentation controls block */
176}; 180};
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
index ee69c0854c88..5dbaa72baa64 100644
--- a/arch/s390/include/uapi/asm/sie.h
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -7,6 +7,7 @@
7 { 0x9c, "DIAG (0x9c) time slice end directed" }, \ 7 { 0x9c, "DIAG (0x9c) time slice end directed" }, \
8 { 0x204, "DIAG (0x204) logical-cpu utilization" }, \ 8 { 0x204, "DIAG (0x204) logical-cpu utilization" }, \
9 { 0x258, "DIAG (0x258) page-reference services" }, \ 9 { 0x258, "DIAG (0x258) page-reference services" }, \
10 { 0x288, "DIAG (0x288) watchdog functions" }, \
10 { 0x308, "DIAG (0x308) ipl functions" }, \ 11 { 0x308, "DIAG (0x308) ipl functions" }, \
11 { 0x500, "DIAG (0x500) KVM virtio functions" }, \ 12 { 0x500, "DIAG (0x500) KVM virtio functions" }, \
12 { 0x501, "DIAG (0x501) KVM breakpoint" } 13 { 0x501, "DIAG (0x501) KVM breakpoint" }
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index d30db40437dc..66938d283b77 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -373,7 +373,7 @@ void ipte_unlock(struct kvm_vcpu *vcpu)
373} 373}
374 374
375static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar, 375static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar,
376 int write) 376 enum gacc_mode mode)
377{ 377{
378 union alet alet; 378 union alet alet;
379 struct ale ale; 379 struct ale ale;
@@ -454,7 +454,7 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar,
454 } 454 }
455 } 455 }
456 456
457 if (ale.fo == 1 && write) 457 if (ale.fo == 1 && mode == GACC_STORE)
458 return PGM_PROTECTION; 458 return PGM_PROTECTION;
459 459
460 asce->val = aste.asce; 460 asce->val = aste.asce;
@@ -477,25 +477,28 @@ enum {
477}; 477};
478 478
479static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, 479static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
480 ar_t ar, int write) 480 ar_t ar, enum gacc_mode mode)
481{ 481{
482 int rc; 482 int rc;
483 psw_t *psw = &vcpu->arch.sie_block->gpsw; 483 struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
484 struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; 484 struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
485 struct trans_exc_code_bits *tec_bits; 485 struct trans_exc_code_bits *tec_bits;
486 486
487 memset(pgm, 0, sizeof(*pgm)); 487 memset(pgm, 0, sizeof(*pgm));
488 tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; 488 tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
489 tec_bits->fsi = write ? FSI_STORE : FSI_FETCH; 489 tec_bits->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
490 tec_bits->as = psw_bits(*psw).as; 490 tec_bits->as = psw.as;
491 491
492 if (!psw_bits(*psw).t) { 492 if (!psw.t) {
493 asce->val = 0; 493 asce->val = 0;
494 asce->r = 1; 494 asce->r = 1;
495 return 0; 495 return 0;
496 } 496 }
497 497
498 switch (psw_bits(vcpu->arch.sie_block->gpsw).as) { 498 if (mode == GACC_IFETCH)
499 psw.as = psw.as == PSW_AS_HOME ? PSW_AS_HOME : PSW_AS_PRIMARY;
500
501 switch (psw.as) {
499 case PSW_AS_PRIMARY: 502 case PSW_AS_PRIMARY:
500 asce->val = vcpu->arch.sie_block->gcr[1]; 503 asce->val = vcpu->arch.sie_block->gcr[1];
501 return 0; 504 return 0;
@@ -506,7 +509,7 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
506 asce->val = vcpu->arch.sie_block->gcr[13]; 509 asce->val = vcpu->arch.sie_block->gcr[13];
507 return 0; 510 return 0;
508 case PSW_AS_ACCREG: 511 case PSW_AS_ACCREG:
509 rc = ar_translation(vcpu, asce, ar, write); 512 rc = ar_translation(vcpu, asce, ar, mode);
510 switch (rc) { 513 switch (rc) {
511 case PGM_ALEN_TRANSLATION: 514 case PGM_ALEN_TRANSLATION:
512 case PGM_ALE_SEQUENCE: 515 case PGM_ALE_SEQUENCE:
@@ -538,7 +541,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
538 * @gva: guest virtual address 541 * @gva: guest virtual address
539 * @gpa: points to where guest physical (absolute) address should be stored 542 * @gpa: points to where guest physical (absolute) address should be stored
540 * @asce: effective asce 543 * @asce: effective asce
541 * @write: indicates if access is a write access 544 * @mode: indicates the access mode to be used
542 * 545 *
543 * Translate a guest virtual address into a guest absolute address by means 546 * Translate a guest virtual address into a guest absolute address by means
544 * of dynamic address translation as specified by the architecture. 547 * of dynamic address translation as specified by the architecture.
@@ -554,7 +557,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
554 */ 557 */
555static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, 558static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
556 unsigned long *gpa, const union asce asce, 559 unsigned long *gpa, const union asce asce,
557 int write) 560 enum gacc_mode mode)
558{ 561{
559 union vaddress vaddr = {.addr = gva}; 562 union vaddress vaddr = {.addr = gva};
560 union raddress raddr = {.addr = gva}; 563 union raddress raddr = {.addr = gva};
@@ -699,7 +702,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
699real_address: 702real_address:
700 raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr); 703 raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
701absolute_address: 704absolute_address:
702 if (write && dat_protection) 705 if (mode == GACC_STORE && dat_protection)
703 return PGM_PROTECTION; 706 return PGM_PROTECTION;
704 if (kvm_is_error_gpa(vcpu->kvm, raddr.addr)) 707 if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
705 return PGM_ADDRESSING; 708 return PGM_ADDRESSING;
@@ -728,7 +731,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
728 731
729static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, 732static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
730 unsigned long *pages, unsigned long nr_pages, 733 unsigned long *pages, unsigned long nr_pages,
731 const union asce asce, int write) 734 const union asce asce, enum gacc_mode mode)
732{ 735{
733 struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; 736 struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
734 psw_t *psw = &vcpu->arch.sie_block->gpsw; 737 psw_t *psw = &vcpu->arch.sie_block->gpsw;
@@ -740,13 +743,13 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
740 while (nr_pages) { 743 while (nr_pages) {
741 ga = kvm_s390_logical_to_effective(vcpu, ga); 744 ga = kvm_s390_logical_to_effective(vcpu, ga);
742 tec_bits->addr = ga >> PAGE_SHIFT; 745 tec_bits->addr = ga >> PAGE_SHIFT;
743 if (write && lap_enabled && is_low_address(ga)) { 746 if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) {
744 pgm->code = PGM_PROTECTION; 747 pgm->code = PGM_PROTECTION;
745 return pgm->code; 748 return pgm->code;
746 } 749 }
747 ga &= PAGE_MASK; 750 ga &= PAGE_MASK;
748 if (psw_bits(*psw).t) { 751 if (psw_bits(*psw).t) {
749 rc = guest_translate(vcpu, ga, pages, asce, write); 752 rc = guest_translate(vcpu, ga, pages, asce, mode);
750 if (rc < 0) 753 if (rc < 0)
751 return rc; 754 return rc;
752 if (rc == PGM_PROTECTION) 755 if (rc == PGM_PROTECTION)
@@ -768,7 +771,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
768} 771}
769 772
770int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, 773int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
771 unsigned long len, int write) 774 unsigned long len, enum gacc_mode mode)
772{ 775{
773 psw_t *psw = &vcpu->arch.sie_block->gpsw; 776 psw_t *psw = &vcpu->arch.sie_block->gpsw;
774 unsigned long _len, nr_pages, gpa, idx; 777 unsigned long _len, nr_pages, gpa, idx;
@@ -780,7 +783,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
780 783
781 if (!len) 784 if (!len)
782 return 0; 785 return 0;
783 rc = get_vcpu_asce(vcpu, &asce, ar, write); 786 rc = get_vcpu_asce(vcpu, &asce, ar, mode);
784 if (rc) 787 if (rc)
785 return rc; 788 return rc;
786 nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; 789 nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
@@ -792,11 +795,11 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
792 need_ipte_lock = psw_bits(*psw).t && !asce.r; 795 need_ipte_lock = psw_bits(*psw).t && !asce.r;
793 if (need_ipte_lock) 796 if (need_ipte_lock)
794 ipte_lock(vcpu); 797 ipte_lock(vcpu);
795 rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, write); 798 rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, mode);
796 for (idx = 0; idx < nr_pages && !rc; idx++) { 799 for (idx = 0; idx < nr_pages && !rc; idx++) {
797 gpa = *(pages + idx) + (ga & ~PAGE_MASK); 800 gpa = *(pages + idx) + (ga & ~PAGE_MASK);
798 _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); 801 _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
799 if (write) 802 if (mode == GACC_STORE)
800 rc = kvm_write_guest(vcpu->kvm, gpa, data, _len); 803 rc = kvm_write_guest(vcpu->kvm, gpa, data, _len);
801 else 804 else
802 rc = kvm_read_guest(vcpu->kvm, gpa, data, _len); 805 rc = kvm_read_guest(vcpu->kvm, gpa, data, _len);
@@ -812,7 +815,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
812} 815}
813 816
814int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, 817int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
815 void *data, unsigned long len, int write) 818 void *data, unsigned long len, enum gacc_mode mode)
816{ 819{
817 unsigned long _len, gpa; 820 unsigned long _len, gpa;
818 int rc = 0; 821 int rc = 0;
@@ -820,7 +823,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
820 while (len && !rc) { 823 while (len && !rc) {
821 gpa = kvm_s390_real_to_abs(vcpu, gra); 824 gpa = kvm_s390_real_to_abs(vcpu, gra);
822 _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); 825 _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
823 if (write) 826 if (mode)
824 rc = write_guest_abs(vcpu, gpa, data, _len); 827 rc = write_guest_abs(vcpu, gpa, data, _len);
825 else 828 else
826 rc = read_guest_abs(vcpu, gpa, data, _len); 829 rc = read_guest_abs(vcpu, gpa, data, _len);
@@ -841,7 +844,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
841 * has to take care of this. 844 * has to take care of this.
842 */ 845 */
843int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, 846int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
844 unsigned long *gpa, int write) 847 unsigned long *gpa, enum gacc_mode mode)
845{ 848{
846 struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; 849 struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
847 psw_t *psw = &vcpu->arch.sie_block->gpsw; 850 psw_t *psw = &vcpu->arch.sie_block->gpsw;
@@ -851,19 +854,19 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
851 854
852 gva = kvm_s390_logical_to_effective(vcpu, gva); 855 gva = kvm_s390_logical_to_effective(vcpu, gva);
853 tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; 856 tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
854 rc = get_vcpu_asce(vcpu, &asce, ar, write); 857 rc = get_vcpu_asce(vcpu, &asce, ar, mode);
855 tec->addr = gva >> PAGE_SHIFT; 858 tec->addr = gva >> PAGE_SHIFT;
856 if (rc) 859 if (rc)
857 return rc; 860 return rc;
858 if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) { 861 if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
859 if (write) { 862 if (mode == GACC_STORE) {
860 rc = pgm->code = PGM_PROTECTION; 863 rc = pgm->code = PGM_PROTECTION;
861 return rc; 864 return rc;
862 } 865 }
863 } 866 }
864 867
865 if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */ 868 if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */
866 rc = guest_translate(vcpu, gva, gpa, asce, write); 869 rc = guest_translate(vcpu, gva, gpa, asce, mode);
867 if (rc > 0) { 870 if (rc > 0) {
868 if (rc == PGM_PROTECTION) 871 if (rc == PGM_PROTECTION)
869 tec->b61 = 1; 872 tec->b61 = 1;
@@ -883,7 +886,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
883 * check_gva_range - test a range of guest virtual addresses for accessibility 886 * check_gva_range - test a range of guest virtual addresses for accessibility
884 */ 887 */
885int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, 888int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
886 unsigned long length, int is_write) 889 unsigned long length, enum gacc_mode mode)
887{ 890{
888 unsigned long gpa; 891 unsigned long gpa;
889 unsigned long currlen; 892 unsigned long currlen;
@@ -892,7 +895,7 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
892 ipte_lock(vcpu); 895 ipte_lock(vcpu);
893 while (length > 0 && !rc) { 896 while (length > 0 && !rc) {
894 currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE)); 897 currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
895 rc = guest_translate_address(vcpu, gva, ar, &gpa, is_write); 898 rc = guest_translate_address(vcpu, gva, ar, &gpa, mode);
896 gva += currlen; 899 gva += currlen;
897 length -= currlen; 900 length -= currlen;
898 } 901 }
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index ef03726cc661..df0a79dd8159 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -155,16 +155,22 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
155 return kvm_read_guest(vcpu->kvm, gpa, data, len); 155 return kvm_read_guest(vcpu->kvm, gpa, data, len);
156} 156}
157 157
158enum gacc_mode {
159 GACC_FETCH,
160 GACC_STORE,
161 GACC_IFETCH,
162};
163
158int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, 164int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
159 ar_t ar, unsigned long *gpa, int write); 165 ar_t ar, unsigned long *gpa, enum gacc_mode mode);
160int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, 166int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
161 unsigned long length, int is_write); 167 unsigned long length, enum gacc_mode mode);
162 168
163int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, 169int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
164 unsigned long len, int write); 170 unsigned long len, enum gacc_mode mode);
165 171
166int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, 172int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
167 void *data, unsigned long len, int write); 173 void *data, unsigned long len, enum gacc_mode mode);
168 174
169/** 175/**
170 * write_guest - copy data from kernel space to guest space 176 * write_guest - copy data from kernel space to guest space
@@ -215,7 +221,7 @@ static inline __must_check
215int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, 221int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
216 unsigned long len) 222 unsigned long len)
217{ 223{
218 return access_guest(vcpu, ga, ar, data, len, 1); 224 return access_guest(vcpu, ga, ar, data, len, GACC_STORE);
219} 225}
220 226
221/** 227/**
@@ -235,7 +241,27 @@ static inline __must_check
235int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, 241int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
236 unsigned long len) 242 unsigned long len)
237{ 243{
238 return access_guest(vcpu, ga, ar, data, len, 0); 244 return access_guest(vcpu, ga, ar, data, len, GACC_FETCH);
245}
246
247/**
248 * read_guest_instr - copy instruction data from guest space to kernel space
249 * @vcpu: virtual cpu
250 * @data: destination address in kernel space
251 * @len: number of bytes to copy
252 *
253 * Copy @len bytes from the current psw address (guest space) to @data (kernel
254 * space).
255 *
256 * The behaviour of read_guest_instr is identical to read_guest, except that
257 * instruction data will be read from primary space when in home-space or
258 * address-space mode.
259 */
260static inline __must_check
261int read_guest_instr(struct kvm_vcpu *vcpu, void *data, unsigned long len)
262{
263 return access_guest(vcpu, vcpu->arch.sie_block->gpsw.addr, 0, data, len,
264 GACC_IFETCH);
239} 265}
240 266
241/** 267/**
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index d53c10753c46..2e6b54e4d3f9 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -38,17 +38,32 @@ static const intercept_handler_t instruction_handlers[256] = {
38 [0xeb] = kvm_s390_handle_eb, 38 [0xeb] = kvm_s390_handle_eb,
39}; 39};
40 40
41void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc) 41u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
42{ 42{
43 struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; 43 struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
44 u8 ilen = 0;
44 45
45 /* Use the length of the EXECUTE instruction if necessary */ 46 switch (vcpu->arch.sie_block->icptcode) {
46 if (sie_block->icptstatus & 1) { 47 case ICPT_INST:
47 ilc = (sie_block->icptstatus >> 4) & 0x6; 48 case ICPT_INSTPROGI:
48 if (!ilc) 49 case ICPT_OPEREXC:
49 ilc = 4; 50 case ICPT_PARTEXEC:
51 case ICPT_IOINST:
52 /* instruction only stored for these icptcodes */
53 ilen = insn_length(vcpu->arch.sie_block->ipa >> 8);
54 /* Use the length of the EXECUTE instruction if necessary */
55 if (sie_block->icptstatus & 1) {
56 ilen = (sie_block->icptstatus >> 4) & 0x6;
57 if (!ilen)
58 ilen = 4;
59 }
60 break;
61 case ICPT_PROGI:
62 /* bit 1+2 of pgmilc are the ilc, so we directly get ilen */
63 ilen = vcpu->arch.sie_block->pgmilc & 0x6;
64 break;
50 } 65 }
51 sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilc); 66 return ilen;
52} 67}
53 68
54static int handle_noop(struct kvm_vcpu *vcpu) 69static int handle_noop(struct kvm_vcpu *vcpu)
@@ -121,11 +136,13 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
121 return -EOPNOTSUPP; 136 return -EOPNOTSUPP;
122} 137}
123 138
124static void __extract_prog_irq(struct kvm_vcpu *vcpu, 139static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu)
125 struct kvm_s390_pgm_info *pgm_info)
126{ 140{
127 memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info)); 141 struct kvm_s390_pgm_info pgm_info = {
128 pgm_info->code = vcpu->arch.sie_block->iprcc; 142 .code = vcpu->arch.sie_block->iprcc,
143 /* the PSW has already been rewound */
144 .flags = KVM_S390_PGM_FLAGS_NO_REWIND,
145 };
129 146
130 switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) { 147 switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) {
131 case PGM_AFX_TRANSLATION: 148 case PGM_AFX_TRANSLATION:
@@ -138,7 +155,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
138 case PGM_PRIMARY_AUTHORITY: 155 case PGM_PRIMARY_AUTHORITY:
139 case PGM_SECONDARY_AUTHORITY: 156 case PGM_SECONDARY_AUTHORITY:
140 case PGM_SPACE_SWITCH: 157 case PGM_SPACE_SWITCH:
141 pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; 158 pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
142 break; 159 break;
143 case PGM_ALEN_TRANSLATION: 160 case PGM_ALEN_TRANSLATION:
144 case PGM_ALE_SEQUENCE: 161 case PGM_ALE_SEQUENCE:
@@ -146,7 +163,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
146 case PGM_ASTE_SEQUENCE: 163 case PGM_ASTE_SEQUENCE:
147 case PGM_ASTE_VALIDITY: 164 case PGM_ASTE_VALIDITY:
148 case PGM_EXTENDED_AUTHORITY: 165 case PGM_EXTENDED_AUTHORITY:
149 pgm_info->exc_access_id = vcpu->arch.sie_block->eai; 166 pgm_info.exc_access_id = vcpu->arch.sie_block->eai;
150 break; 167 break;
151 case PGM_ASCE_TYPE: 168 case PGM_ASCE_TYPE:
152 case PGM_PAGE_TRANSLATION: 169 case PGM_PAGE_TRANSLATION:
@@ -154,32 +171,33 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
154 case PGM_REGION_SECOND_TRANS: 171 case PGM_REGION_SECOND_TRANS:
155 case PGM_REGION_THIRD_TRANS: 172 case PGM_REGION_THIRD_TRANS:
156 case PGM_SEGMENT_TRANSLATION: 173 case PGM_SEGMENT_TRANSLATION:
157 pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; 174 pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
158 pgm_info->exc_access_id = vcpu->arch.sie_block->eai; 175 pgm_info.exc_access_id = vcpu->arch.sie_block->eai;
159 pgm_info->op_access_id = vcpu->arch.sie_block->oai; 176 pgm_info.op_access_id = vcpu->arch.sie_block->oai;
160 break; 177 break;
161 case PGM_MONITOR: 178 case PGM_MONITOR:
162 pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn; 179 pgm_info.mon_class_nr = vcpu->arch.sie_block->mcn;
163 pgm_info->mon_code = vcpu->arch.sie_block->tecmc; 180 pgm_info.mon_code = vcpu->arch.sie_block->tecmc;
164 break; 181 break;
165 case PGM_VECTOR_PROCESSING: 182 case PGM_VECTOR_PROCESSING:
166 case PGM_DATA: 183 case PGM_DATA:
167 pgm_info->data_exc_code = vcpu->arch.sie_block->dxc; 184 pgm_info.data_exc_code = vcpu->arch.sie_block->dxc;
168 break; 185 break;
169 case PGM_PROTECTION: 186 case PGM_PROTECTION:
170 pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; 187 pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
171 pgm_info->exc_access_id = vcpu->arch.sie_block->eai; 188 pgm_info.exc_access_id = vcpu->arch.sie_block->eai;
172 break; 189 break;
173 default: 190 default:
174 break; 191 break;
175 } 192 }
176 193
177 if (vcpu->arch.sie_block->iprcc & PGM_PER) { 194 if (vcpu->arch.sie_block->iprcc & PGM_PER) {
178 pgm_info->per_code = vcpu->arch.sie_block->perc; 195 pgm_info.per_code = vcpu->arch.sie_block->perc;
179 pgm_info->per_atmid = vcpu->arch.sie_block->peratmid; 196 pgm_info.per_atmid = vcpu->arch.sie_block->peratmid;
180 pgm_info->per_address = vcpu->arch.sie_block->peraddr; 197 pgm_info.per_address = vcpu->arch.sie_block->peraddr;
181 pgm_info->per_access_id = vcpu->arch.sie_block->peraid; 198 pgm_info.per_access_id = vcpu->arch.sie_block->peraid;
182 } 199 }
200 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
183} 201}
184 202
185/* 203/*
@@ -208,7 +226,6 @@ static int handle_itdb(struct kvm_vcpu *vcpu)
208 226
209static int handle_prog(struct kvm_vcpu *vcpu) 227static int handle_prog(struct kvm_vcpu *vcpu)
210{ 228{
211 struct kvm_s390_pgm_info pgm_info;
212 psw_t psw; 229 psw_t psw;
213 int rc; 230 int rc;
214 231
@@ -234,8 +251,7 @@ static int handle_prog(struct kvm_vcpu *vcpu)
234 if (rc) 251 if (rc)
235 return rc; 252 return rc;
236 253
237 __extract_prog_irq(vcpu, &pgm_info); 254 return inject_prog_on_prog_intercept(vcpu);
238 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
239} 255}
240 256
241/** 257/**
@@ -302,7 +318,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
302 318
303 /* Make sure that the source is paged-in */ 319 /* Make sure that the source is paged-in */
304 rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2], 320 rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2],
305 reg2, &srcaddr, 0); 321 reg2, &srcaddr, GACC_FETCH);
306 if (rc) 322 if (rc)
307 return kvm_s390_inject_prog_cond(vcpu, rc); 323 return kvm_s390_inject_prog_cond(vcpu, rc);
308 rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0); 324 rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
@@ -311,14 +327,14 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
311 327
312 /* Make sure that the destination is paged-in */ 328 /* Make sure that the destination is paged-in */
313 rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1], 329 rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1],
314 reg1, &dstaddr, 1); 330 reg1, &dstaddr, GACC_STORE);
315 if (rc) 331 if (rc)
316 return kvm_s390_inject_prog_cond(vcpu, rc); 332 return kvm_s390_inject_prog_cond(vcpu, rc);
317 rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1); 333 rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
318 if (rc != 0) 334 if (rc != 0)
319 return rc; 335 return rc;
320 336
321 kvm_s390_rewind_psw(vcpu, 4); 337 kvm_s390_retry_instr(vcpu);
322 338
323 return 0; 339 return 0;
324} 340}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 9ffc73221792..704809d91ddd 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -182,8 +182,9 @@ static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu)
182 182
183static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu) 183static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
184{ 184{
185 return (vcpu->arch.sie_block->cputm >> 63) && 185 if (!cpu_timer_interrupts_enabled(vcpu))
186 cpu_timer_interrupts_enabled(vcpu); 186 return 0;
187 return kvm_s390_get_cpu_timer(vcpu) >> 63;
187} 188}
188 189
189static inline int is_ioirq(unsigned long irq_type) 190static inline int is_ioirq(unsigned long irq_type)
@@ -335,23 +336,6 @@ static void set_intercept_indicators(struct kvm_vcpu *vcpu)
335 set_intercept_indicators_stop(vcpu); 336 set_intercept_indicators_stop(vcpu);
336} 337}
337 338
338static u16 get_ilc(struct kvm_vcpu *vcpu)
339{
340 switch (vcpu->arch.sie_block->icptcode) {
341 case ICPT_INST:
342 case ICPT_INSTPROGI:
343 case ICPT_OPEREXC:
344 case ICPT_PARTEXEC:
345 case ICPT_IOINST:
346 /* last instruction only stored for these icptcodes */
347 return insn_length(vcpu->arch.sie_block->ipa >> 8);
348 case ICPT_PROGI:
349 return vcpu->arch.sie_block->pgmilc;
350 default:
351 return 0;
352 }
353}
354
355static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu) 339static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
356{ 340{
357 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 341 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -588,7 +572,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
588 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 572 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
589 struct kvm_s390_pgm_info pgm_info; 573 struct kvm_s390_pgm_info pgm_info;
590 int rc = 0, nullifying = false; 574 int rc = 0, nullifying = false;
591 u16 ilc = get_ilc(vcpu); 575 u16 ilen;
592 576
593 spin_lock(&li->lock); 577 spin_lock(&li->lock);
594 pgm_info = li->irq.pgm; 578 pgm_info = li->irq.pgm;
@@ -596,8 +580,9 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
596 memset(&li->irq.pgm, 0, sizeof(pgm_info)); 580 memset(&li->irq.pgm, 0, sizeof(pgm_info));
597 spin_unlock(&li->lock); 581 spin_unlock(&li->lock);
598 582
599 VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilc:%d", 583 ilen = pgm_info.flags & KVM_S390_PGM_FLAGS_ILC_MASK;
600 pgm_info.code, ilc); 584 VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilen:%d",
585 pgm_info.code, ilen);
601 vcpu->stat.deliver_program_int++; 586 vcpu->stat.deliver_program_int++;
602 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, 587 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
603 pgm_info.code, 0); 588 pgm_info.code, 0);
@@ -681,10 +666,11 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
681 (u8 *) __LC_PER_ACCESS_ID); 666 (u8 *) __LC_PER_ACCESS_ID);
682 } 667 }
683 668
684 if (nullifying && vcpu->arch.sie_block->icptcode == ICPT_INST) 669 if (nullifying && !(pgm_info.flags & KVM_S390_PGM_FLAGS_NO_REWIND))
685 kvm_s390_rewind_psw(vcpu, ilc); 670 kvm_s390_rewind_psw(vcpu, ilen);
686 671
687 rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC); 672 /* bit 1+2 of the target are the ilc, so we can directly use ilen */
673 rc |= put_guest_lc(vcpu, ilen, (u16 *) __LC_PGM_ILC);
688 rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea, 674 rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea,
689 (u64 *) __LC_LAST_BREAK); 675 (u64 *) __LC_LAST_BREAK);
690 rc |= put_guest_lc(vcpu, pgm_info.code, 676 rc |= put_guest_lc(vcpu, pgm_info.code,
@@ -923,9 +909,35 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
923 return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu); 909 return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu);
924} 910}
925 911
912static u64 __calculate_sltime(struct kvm_vcpu *vcpu)
913{
914 u64 now, cputm, sltime = 0;
915
916 if (ckc_interrupts_enabled(vcpu)) {
917 now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
918 sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
919 /* already expired or overflow? */
920 if (!sltime || vcpu->arch.sie_block->ckc <= now)
921 return 0;
922 if (cpu_timer_interrupts_enabled(vcpu)) {
923 cputm = kvm_s390_get_cpu_timer(vcpu);
924 /* already expired? */
925 if (cputm >> 63)
926 return 0;
927 return min(sltime, tod_to_ns(cputm));
928 }
929 } else if (cpu_timer_interrupts_enabled(vcpu)) {
930 sltime = kvm_s390_get_cpu_timer(vcpu);
931 /* already expired? */
932 if (sltime >> 63)
933 return 0;
934 }
935 return sltime;
936}
937
926int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) 938int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
927{ 939{
928 u64 now, sltime; 940 u64 sltime;
929 941
930 vcpu->stat.exit_wait_state++; 942 vcpu->stat.exit_wait_state++;
931 943
@@ -938,22 +950,20 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
938 return -EOPNOTSUPP; /* disabled wait */ 950 return -EOPNOTSUPP; /* disabled wait */
939 } 951 }
940 952
941 if (!ckc_interrupts_enabled(vcpu)) { 953 if (!ckc_interrupts_enabled(vcpu) &&
954 !cpu_timer_interrupts_enabled(vcpu)) {
942 VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); 955 VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
943 __set_cpu_idle(vcpu); 956 __set_cpu_idle(vcpu);
944 goto no_timer; 957 goto no_timer;
945 } 958 }
946 959
947 now = kvm_s390_get_tod_clock_fast(vcpu->kvm); 960 sltime = __calculate_sltime(vcpu);
948 sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); 961 if (!sltime)
949
950 /* underflow */
951 if (vcpu->arch.sie_block->ckc < now)
952 return 0; 962 return 0;
953 963
954 __set_cpu_idle(vcpu); 964 __set_cpu_idle(vcpu);
955 hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); 965 hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
956 VCPU_EVENT(vcpu, 4, "enabled wait via clock comparator: %llu ns", sltime); 966 VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime);
957no_timer: 967no_timer:
958 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 968 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
959 kvm_vcpu_block(vcpu); 969 kvm_vcpu_block(vcpu);
@@ -980,18 +990,16 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
980enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) 990enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
981{ 991{
982 struct kvm_vcpu *vcpu; 992 struct kvm_vcpu *vcpu;
983 u64 now, sltime; 993 u64 sltime;
984 994
985 vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); 995 vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
986 now = kvm_s390_get_tod_clock_fast(vcpu->kvm); 996 sltime = __calculate_sltime(vcpu);
987 sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
988 997
989 /* 998 /*
990 * If the monotonic clock runs faster than the tod clock we might be 999 * If the monotonic clock runs faster than the tod clock we might be
991 * woken up too early and have to go back to sleep to avoid deadlocks. 1000 * woken up too early and have to go back to sleep to avoid deadlocks.
992 */ 1001 */
993 if (vcpu->arch.sie_block->ckc > now && 1002 if (sltime && hrtimer_forward_now(timer, ns_to_ktime(sltime)))
994 hrtimer_forward_now(timer, ns_to_ktime(sltime)))
995 return HRTIMER_RESTART; 1003 return HRTIMER_RESTART;
996 kvm_s390_vcpu_wakeup(vcpu); 1004 kvm_s390_vcpu_wakeup(vcpu);
997 return HRTIMER_NORESTART; 1005 return HRTIMER_NORESTART;
@@ -1059,8 +1067,16 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
1059 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, 1067 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
1060 irq->u.pgm.code, 0); 1068 irq->u.pgm.code, 0);
1061 1069
1070 if (!(irq->u.pgm.flags & KVM_S390_PGM_FLAGS_ILC_VALID)) {
1071 /* auto detection if no valid ILC was given */
1072 irq->u.pgm.flags &= ~KVM_S390_PGM_FLAGS_ILC_MASK;
1073 irq->u.pgm.flags |= kvm_s390_get_ilen(vcpu);
1074 irq->u.pgm.flags |= KVM_S390_PGM_FLAGS_ILC_VALID;
1075 }
1076
1062 if (irq->u.pgm.code == PGM_PER) { 1077 if (irq->u.pgm.code == PGM_PER) {
1063 li->irq.pgm.code |= PGM_PER; 1078 li->irq.pgm.code |= PGM_PER;
1079 li->irq.pgm.flags = irq->u.pgm.flags;
1064 /* only modify PER related information */ 1080 /* only modify PER related information */
1065 li->irq.pgm.per_address = irq->u.pgm.per_address; 1081 li->irq.pgm.per_address = irq->u.pgm.per_address;
1066 li->irq.pgm.per_code = irq->u.pgm.per_code; 1082 li->irq.pgm.per_code = irq->u.pgm.per_code;
@@ -1069,6 +1085,7 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
1069 } else if (!(irq->u.pgm.code & PGM_PER)) { 1085 } else if (!(irq->u.pgm.code & PGM_PER)) {
1070 li->irq.pgm.code = (li->irq.pgm.code & PGM_PER) | 1086 li->irq.pgm.code = (li->irq.pgm.code & PGM_PER) |
1071 irq->u.pgm.code; 1087 irq->u.pgm.code;
1088 li->irq.pgm.flags = irq->u.pgm.flags;
1072 /* only modify non-PER information */ 1089 /* only modify non-PER information */
1073 li->irq.pgm.trans_exc_code = irq->u.pgm.trans_exc_code; 1090 li->irq.pgm.trans_exc_code = irq->u.pgm.trans_exc_code;
1074 li->irq.pgm.mon_code = irq->u.pgm.mon_code; 1091 li->irq.pgm.mon_code = irq->u.pgm.mon_code;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 03dfe9c667f4..e196582fe87d 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -158,6 +158,8 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
158 kvm->arch.epoch -= *delta; 158 kvm->arch.epoch -= *delta;
159 kvm_for_each_vcpu(i, vcpu, kvm) { 159 kvm_for_each_vcpu(i, vcpu, kvm) {
160 vcpu->arch.sie_block->epoch -= *delta; 160 vcpu->arch.sie_block->epoch -= *delta;
161 if (vcpu->arch.cputm_enabled)
162 vcpu->arch.cputm_start += *delta;
161 } 163 }
162 } 164 }
163 return NOTIFY_OK; 165 return NOTIFY_OK;
@@ -274,7 +276,6 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm,
274 unsigned long address; 276 unsigned long address;
275 struct gmap *gmap = kvm->arch.gmap; 277 struct gmap *gmap = kvm->arch.gmap;
276 278
277 down_read(&gmap->mm->mmap_sem);
278 /* Loop over all guest pages */ 279 /* Loop over all guest pages */
279 last_gfn = memslot->base_gfn + memslot->npages; 280 last_gfn = memslot->base_gfn + memslot->npages;
280 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { 281 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
@@ -282,8 +283,10 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm,
282 283
283 if (gmap_test_and_clear_dirty(address, gmap)) 284 if (gmap_test_and_clear_dirty(address, gmap))
284 mark_page_dirty(kvm, cur_gfn); 285 mark_page_dirty(kvm, cur_gfn);
286 if (fatal_signal_pending(current))
287 return;
288 cond_resched();
285 } 289 }
286 up_read(&gmap->mm->mmap_sem);
287} 290}
288 291
289/* Section: vm related */ 292/* Section: vm related */
@@ -352,8 +355,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
352 if (atomic_read(&kvm->online_vcpus)) { 355 if (atomic_read(&kvm->online_vcpus)) {
353 r = -EBUSY; 356 r = -EBUSY;
354 } else if (MACHINE_HAS_VX) { 357 } else if (MACHINE_HAS_VX) {
355 set_kvm_facility(kvm->arch.model.fac->mask, 129); 358 set_kvm_facility(kvm->arch.model.fac_mask, 129);
356 set_kvm_facility(kvm->arch.model.fac->list, 129); 359 set_kvm_facility(kvm->arch.model.fac_list, 129);
357 r = 0; 360 r = 0;
358 } else 361 } else
359 r = -EINVAL; 362 r = -EINVAL;
@@ -367,8 +370,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
367 if (atomic_read(&kvm->online_vcpus)) { 370 if (atomic_read(&kvm->online_vcpus)) {
368 r = -EBUSY; 371 r = -EBUSY;
369 } else if (test_facility(64)) { 372 } else if (test_facility(64)) {
370 set_kvm_facility(kvm->arch.model.fac->mask, 64); 373 set_kvm_facility(kvm->arch.model.fac_mask, 64);
371 set_kvm_facility(kvm->arch.model.fac->list, 64); 374 set_kvm_facility(kvm->arch.model.fac_list, 64);
372 r = 0; 375 r = 0;
373 } 376 }
374 mutex_unlock(&kvm->lock); 377 mutex_unlock(&kvm->lock);
@@ -651,7 +654,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
651 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid, 654 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
652 sizeof(struct cpuid)); 655 sizeof(struct cpuid));
653 kvm->arch.model.ibc = proc->ibc; 656 kvm->arch.model.ibc = proc->ibc;
654 memcpy(kvm->arch.model.fac->list, proc->fac_list, 657 memcpy(kvm->arch.model.fac_list, proc->fac_list,
655 S390_ARCH_FAC_LIST_SIZE_BYTE); 658 S390_ARCH_FAC_LIST_SIZE_BYTE);
656 } else 659 } else
657 ret = -EFAULT; 660 ret = -EFAULT;
@@ -685,7 +688,8 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
685 } 688 }
686 memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid)); 689 memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
687 proc->ibc = kvm->arch.model.ibc; 690 proc->ibc = kvm->arch.model.ibc;
688 memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE); 691 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
692 S390_ARCH_FAC_LIST_SIZE_BYTE);
689 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 693 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
690 ret = -EFAULT; 694 ret = -EFAULT;
691 kfree(proc); 695 kfree(proc);
@@ -705,7 +709,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
705 } 709 }
706 get_cpu_id((struct cpuid *) &mach->cpuid); 710 get_cpu_id((struct cpuid *) &mach->cpuid);
707 mach->ibc = sclp.ibc; 711 mach->ibc = sclp.ibc;
708 memcpy(&mach->fac_mask, kvm->arch.model.fac->mask, 712 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
709 S390_ARCH_FAC_LIST_SIZE_BYTE); 713 S390_ARCH_FAC_LIST_SIZE_BYTE);
710 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, 714 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
711 S390_ARCH_FAC_LIST_SIZE_BYTE); 715 S390_ARCH_FAC_LIST_SIZE_BYTE);
@@ -1082,16 +1086,12 @@ static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1082 cpu_id->version = 0xff; 1086 cpu_id->version = 0xff;
1083} 1087}
1084 1088
1085static int kvm_s390_crypto_init(struct kvm *kvm) 1089static void kvm_s390_crypto_init(struct kvm *kvm)
1086{ 1090{
1087 if (!test_kvm_facility(kvm, 76)) 1091 if (!test_kvm_facility(kvm, 76))
1088 return 0; 1092 return;
1089
1090 kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1091 GFP_KERNEL | GFP_DMA);
1092 if (!kvm->arch.crypto.crycb)
1093 return -ENOMEM;
1094 1093
1094 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1095 kvm_s390_set_crycb_format(kvm); 1095 kvm_s390_set_crycb_format(kvm);
1096 1096
1097 /* Enable AES/DEA protected key functions by default */ 1097 /* Enable AES/DEA protected key functions by default */
@@ -1101,8 +1101,6 @@ static int kvm_s390_crypto_init(struct kvm *kvm)
1101 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 1101 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1102 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 1102 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1103 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 1103 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1104
1105 return 0;
1106} 1104}
1107 1105
1108static void sca_dispose(struct kvm *kvm) 1106static void sca_dispose(struct kvm *kvm)
@@ -1156,37 +1154,30 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1156 if (!kvm->arch.dbf) 1154 if (!kvm->arch.dbf)
1157 goto out_err; 1155 goto out_err;
1158 1156
1159 /* 1157 kvm->arch.sie_page2 =
1160 * The architectural maximum amount of facilities is 16 kbit. To store 1158 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1161 * this amount, 2 kbyte of memory is required. Thus we need a full 1159 if (!kvm->arch.sie_page2)
1162 * page to hold the guest facility list (arch.model.fac->list) and the
1163 * facility mask (arch.model.fac->mask). Its address size has to be
1164 * 31 bits and word aligned.
1165 */
1166 kvm->arch.model.fac =
1167 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1168 if (!kvm->arch.model.fac)
1169 goto out_err; 1160 goto out_err;
1170 1161
1171 /* Populate the facility mask initially. */ 1162 /* Populate the facility mask initially. */
1172 memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list, 1163 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1173 S390_ARCH_FAC_LIST_SIZE_BYTE); 1164 S390_ARCH_FAC_LIST_SIZE_BYTE);
1174 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) { 1165 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1175 if (i < kvm_s390_fac_list_mask_size()) 1166 if (i < kvm_s390_fac_list_mask_size())
1176 kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i]; 1167 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1177 else 1168 else
1178 kvm->arch.model.fac->mask[i] = 0UL; 1169 kvm->arch.model.fac_mask[i] = 0UL;
1179 } 1170 }
1180 1171
1181 /* Populate the facility list initially. */ 1172 /* Populate the facility list initially. */
1182 memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask, 1173 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1174 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1183 S390_ARCH_FAC_LIST_SIZE_BYTE); 1175 S390_ARCH_FAC_LIST_SIZE_BYTE);
1184 1176
1185 kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id); 1177 kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1186 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 1178 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1187 1179
1188 if (kvm_s390_crypto_init(kvm) < 0) 1180 kvm_s390_crypto_init(kvm);
1189 goto out_err;
1190 1181
1191 spin_lock_init(&kvm->arch.float_int.lock); 1182 spin_lock_init(&kvm->arch.float_int.lock);
1192 for (i = 0; i < FIRQ_LIST_COUNT; i++) 1183 for (i = 0; i < FIRQ_LIST_COUNT; i++)
@@ -1222,8 +1213,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1222 1213
1223 return 0; 1214 return 0;
1224out_err: 1215out_err:
1225 kfree(kvm->arch.crypto.crycb); 1216 free_page((unsigned long)kvm->arch.sie_page2);
1226 free_page((unsigned long)kvm->arch.model.fac);
1227 debug_unregister(kvm->arch.dbf); 1217 debug_unregister(kvm->arch.dbf);
1228 sca_dispose(kvm); 1218 sca_dispose(kvm);
1229 KVM_EVENT(3, "creation of vm failed: %d", rc); 1219 KVM_EVENT(3, "creation of vm failed: %d", rc);
@@ -1269,10 +1259,9 @@ static void kvm_free_vcpus(struct kvm *kvm)
1269void kvm_arch_destroy_vm(struct kvm *kvm) 1259void kvm_arch_destroy_vm(struct kvm *kvm)
1270{ 1260{
1271 kvm_free_vcpus(kvm); 1261 kvm_free_vcpus(kvm);
1272 free_page((unsigned long)kvm->arch.model.fac);
1273 sca_dispose(kvm); 1262 sca_dispose(kvm);
1274 debug_unregister(kvm->arch.dbf); 1263 debug_unregister(kvm->arch.dbf);
1275 kfree(kvm->arch.crypto.crycb); 1264 free_page((unsigned long)kvm->arch.sie_page2);
1276 if (!kvm_is_ucontrol(kvm)) 1265 if (!kvm_is_ucontrol(kvm))
1277 gmap_free(kvm->arch.gmap); 1266 gmap_free(kvm->arch.gmap);
1278 kvm_s390_destroy_adapters(kvm); 1267 kvm_s390_destroy_adapters(kvm);
@@ -1414,8 +1403,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1414 KVM_SYNC_PFAULT; 1403 KVM_SYNC_PFAULT;
1415 if (test_kvm_facility(vcpu->kvm, 64)) 1404 if (test_kvm_facility(vcpu->kvm, 64))
1416 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 1405 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1417 if (test_kvm_facility(vcpu->kvm, 129)) 1406 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1407 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1408 */
1409 if (MACHINE_HAS_VX)
1418 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 1410 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1411 else
1412 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1419 1413
1420 if (kvm_is_ucontrol(vcpu->kvm)) 1414 if (kvm_is_ucontrol(vcpu->kvm))
1421 return __kvm_ucontrol_vcpu_init(vcpu); 1415 return __kvm_ucontrol_vcpu_init(vcpu);
@@ -1423,6 +1417,93 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1423 return 0; 1417 return 0;
1424} 1418}
1425 1419
1420/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1421static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1422{
1423 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1424 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1425 vcpu->arch.cputm_start = get_tod_clock_fast();
1426 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1427}
1428
1429/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1430static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1431{
1432 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1433 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1434 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1435 vcpu->arch.cputm_start = 0;
1436 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1437}
1438
1439/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1440static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1441{
1442 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1443 vcpu->arch.cputm_enabled = true;
1444 __start_cpu_timer_accounting(vcpu);
1445}
1446
1447/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1448static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1449{
1450 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1451 __stop_cpu_timer_accounting(vcpu);
1452 vcpu->arch.cputm_enabled = false;
1453}
1454
1455static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1456{
1457 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1458 __enable_cpu_timer_accounting(vcpu);
1459 preempt_enable();
1460}
1461
1462static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1463{
1464 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1465 __disable_cpu_timer_accounting(vcpu);
1466 preempt_enable();
1467}
1468
1469/* set the cpu timer - may only be called from the VCPU thread itself */
1470void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1471{
1472 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1473 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1474 if (vcpu->arch.cputm_enabled)
1475 vcpu->arch.cputm_start = get_tod_clock_fast();
1476 vcpu->arch.sie_block->cputm = cputm;
1477 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1478 preempt_enable();
1479}
1480
1481/* update and get the cpu timer - can also be called from other VCPU threads */
1482__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1483{
1484 unsigned int seq;
1485 __u64 value;
1486
1487 if (unlikely(!vcpu->arch.cputm_enabled))
1488 return vcpu->arch.sie_block->cputm;
1489
1490 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1491 do {
1492 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1493 /*
1494 * If the writer would ever execute a read in the critical
1495 * section, e.g. in irq context, we have a deadlock.
1496 */
1497 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1498 value = vcpu->arch.sie_block->cputm;
1499 /* if cputm_start is 0, accounting is being started/stopped */
1500 if (likely(vcpu->arch.cputm_start))
1501 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1502 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1503 preempt_enable();
1504 return value;
1505}
1506
1426void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1507void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1427{ 1508{
1428 /* Save host register state */ 1509 /* Save host register state */
@@ -1430,10 +1511,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1430 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; 1511 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1431 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; 1512 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1432 1513
1433 /* Depending on MACHINE_HAS_VX, data stored to vrs either 1514 if (MACHINE_HAS_VX)
1434 * has vector register or floating point register format. 1515 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1435 */ 1516 else
1436 current->thread.fpu.regs = vcpu->run->s.regs.vrs; 1517 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1437 current->thread.fpu.fpc = vcpu->run->s.regs.fpc; 1518 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1438 if (test_fp_ctl(current->thread.fpu.fpc)) 1519 if (test_fp_ctl(current->thread.fpu.fpc))
1439 /* User space provided an invalid FPC, let's clear it */ 1520 /* User space provided an invalid FPC, let's clear it */
@@ -1443,10 +1524,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1443 restore_access_regs(vcpu->run->s.regs.acrs); 1524 restore_access_regs(vcpu->run->s.regs.acrs);
1444 gmap_enable(vcpu->arch.gmap); 1525 gmap_enable(vcpu->arch.gmap);
1445 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); 1526 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1527 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1528 __start_cpu_timer_accounting(vcpu);
1529 vcpu->cpu = cpu;
1446} 1530}
1447 1531
1448void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 1532void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1449{ 1533{
1534 vcpu->cpu = -1;
1535 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1536 __stop_cpu_timer_accounting(vcpu);
1450 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); 1537 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1451 gmap_disable(vcpu->arch.gmap); 1538 gmap_disable(vcpu->arch.gmap);
1452 1539
@@ -1468,7 +1555,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1468 vcpu->arch.sie_block->gpsw.mask = 0UL; 1555 vcpu->arch.sie_block->gpsw.mask = 0UL;
1469 vcpu->arch.sie_block->gpsw.addr = 0UL; 1556 vcpu->arch.sie_block->gpsw.addr = 0UL;
1470 kvm_s390_set_prefix(vcpu, 0); 1557 kvm_s390_set_prefix(vcpu, 0);
1471 vcpu->arch.sie_block->cputm = 0UL; 1558 kvm_s390_set_cpu_timer(vcpu, 0);
1472 vcpu->arch.sie_block->ckc = 0UL; 1559 vcpu->arch.sie_block->ckc = 0UL;
1473 vcpu->arch.sie_block->todpr = 0; 1560 vcpu->arch.sie_block->todpr = 0;
1474 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); 1561 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
@@ -1538,7 +1625,8 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1538 1625
1539 vcpu->arch.cpu_id = model->cpu_id; 1626 vcpu->arch.cpu_id = model->cpu_id;
1540 vcpu->arch.sie_block->ibc = model->ibc; 1627 vcpu->arch.sie_block->ibc = model->ibc;
1541 vcpu->arch.sie_block->fac = (int) (long) model->fac->list; 1628 if (test_kvm_facility(vcpu->kvm, 7))
1629 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1542} 1630}
1543 1631
1544int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 1632int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
@@ -1616,6 +1704,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1616 vcpu->arch.local_int.float_int = &kvm->arch.float_int; 1704 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1617 vcpu->arch.local_int.wq = &vcpu->wq; 1705 vcpu->arch.local_int.wq = &vcpu->wq;
1618 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; 1706 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1707 seqcount_init(&vcpu->arch.cputm_seqcount);
1619 1708
1620 rc = kvm_vcpu_init(vcpu, kvm, id); 1709 rc = kvm_vcpu_init(vcpu, kvm, id);
1621 if (rc) 1710 if (rc)
@@ -1715,7 +1804,7 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1715 (u64 __user *)reg->addr); 1804 (u64 __user *)reg->addr);
1716 break; 1805 break;
1717 case KVM_REG_S390_CPU_TIMER: 1806 case KVM_REG_S390_CPU_TIMER:
1718 r = put_user(vcpu->arch.sie_block->cputm, 1807 r = put_user(kvm_s390_get_cpu_timer(vcpu),
1719 (u64 __user *)reg->addr); 1808 (u64 __user *)reg->addr);
1720 break; 1809 break;
1721 case KVM_REG_S390_CLOCK_COMP: 1810 case KVM_REG_S390_CLOCK_COMP:
@@ -1753,6 +1842,7 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1753 struct kvm_one_reg *reg) 1842 struct kvm_one_reg *reg)
1754{ 1843{
1755 int r = -EINVAL; 1844 int r = -EINVAL;
1845 __u64 val;
1756 1846
1757 switch (reg->id) { 1847 switch (reg->id) {
1758 case KVM_REG_S390_TODPR: 1848 case KVM_REG_S390_TODPR:
@@ -1764,8 +1854,9 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1764 (u64 __user *)reg->addr); 1854 (u64 __user *)reg->addr);
1765 break; 1855 break;
1766 case KVM_REG_S390_CPU_TIMER: 1856 case KVM_REG_S390_CPU_TIMER:
1767 r = get_user(vcpu->arch.sie_block->cputm, 1857 r = get_user(val, (u64 __user *)reg->addr);
1768 (u64 __user *)reg->addr); 1858 if (!r)
1859 kvm_s390_set_cpu_timer(vcpu, val);
1769 break; 1860 break;
1770 case KVM_REG_S390_CLOCK_COMP: 1861 case KVM_REG_S390_CLOCK_COMP:
1771 r = get_user(vcpu->arch.sie_block->ckc, 1862 r = get_user(vcpu->arch.sie_block->ckc,
@@ -2158,8 +2249,10 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2158 2249
2159static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) 2250static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2160{ 2251{
2161 psw_t *psw = &vcpu->arch.sie_block->gpsw; 2252 struct kvm_s390_pgm_info pgm_info = {
2162 u8 opcode; 2253 .code = PGM_ADDRESSING,
2254 };
2255 u8 opcode, ilen;
2163 int rc; 2256 int rc;
2164 2257
2165 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 2258 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
@@ -2173,12 +2266,21 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2173 * to look up the current opcode to get the length of the instruction 2266 * to look up the current opcode to get the length of the instruction
2174 * to be able to forward the PSW. 2267 * to be able to forward the PSW.
2175 */ 2268 */
2176 rc = read_guest(vcpu, psw->addr, 0, &opcode, 1); 2269 rc = read_guest_instr(vcpu, &opcode, 1);
2177 if (rc) 2270 ilen = insn_length(opcode);
2178 return kvm_s390_inject_prog_cond(vcpu, rc); 2271 if (rc < 0) {
2179 psw->addr = __rewind_psw(*psw, -insn_length(opcode)); 2272 return rc;
2180 2273 } else if (rc) {
2181 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 2274 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2275 * Forward by arbitrary ilc, injection will take care of
2276 * nullification if necessary.
2277 */
2278 pgm_info = vcpu->arch.pgm;
2279 ilen = 4;
2280 }
2281 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2282 kvm_s390_forward_psw(vcpu, ilen);
2283 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2182} 2284}
2183 2285
2184static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 2286static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
@@ -2244,10 +2346,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
2244 */ 2346 */
2245 local_irq_disable(); 2347 local_irq_disable();
2246 __kvm_guest_enter(); 2348 __kvm_guest_enter();
2349 __disable_cpu_timer_accounting(vcpu);
2247 local_irq_enable(); 2350 local_irq_enable();
2248 exit_reason = sie64a(vcpu->arch.sie_block, 2351 exit_reason = sie64a(vcpu->arch.sie_block,
2249 vcpu->run->s.regs.gprs); 2352 vcpu->run->s.regs.gprs);
2250 local_irq_disable(); 2353 local_irq_disable();
2354 __enable_cpu_timer_accounting(vcpu);
2251 __kvm_guest_exit(); 2355 __kvm_guest_exit();
2252 local_irq_enable(); 2356 local_irq_enable();
2253 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 2357 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -2271,7 +2375,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2271 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 2375 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2272 } 2376 }
2273 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 2377 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2274 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm; 2378 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2275 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 2379 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2276 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 2380 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2277 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 2381 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
@@ -2293,7 +2397,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2293 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 2397 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2294 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 2398 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2295 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 2399 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2296 kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm; 2400 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2297 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 2401 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2298 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 2402 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2299 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 2403 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
@@ -2325,6 +2429,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2325 } 2429 }
2326 2430
2327 sync_regs(vcpu, kvm_run); 2431 sync_regs(vcpu, kvm_run);
2432 enable_cpu_timer_accounting(vcpu);
2328 2433
2329 might_fault(); 2434 might_fault();
2330 rc = __vcpu_run(vcpu); 2435 rc = __vcpu_run(vcpu);
@@ -2344,6 +2449,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2344 rc = 0; 2449 rc = 0;
2345 } 2450 }
2346 2451
2452 disable_cpu_timer_accounting(vcpu);
2347 store_regs(vcpu, kvm_run); 2453 store_regs(vcpu, kvm_run);
2348 2454
2349 if (vcpu->sigset_active) 2455 if (vcpu->sigset_active)
@@ -2364,7 +2470,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2364 unsigned char archmode = 1; 2470 unsigned char archmode = 1;
2365 freg_t fprs[NUM_FPRS]; 2471 freg_t fprs[NUM_FPRS];
2366 unsigned int px; 2472 unsigned int px;
2367 u64 clkcomp; 2473 u64 clkcomp, cputm;
2368 int rc; 2474 int rc;
2369 2475
2370 px = kvm_s390_get_prefix(vcpu); 2476 px = kvm_s390_get_prefix(vcpu);
@@ -2386,7 +2492,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2386 fprs, 128); 2492 fprs, 128);
2387 } else { 2493 } else {
2388 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 2494 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2389 vcpu->run->s.regs.vrs, 128); 2495 vcpu->run->s.regs.fprs, 128);
2390 } 2496 }
2391 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 2497 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2392 vcpu->run->s.regs.gprs, 128); 2498 vcpu->run->s.regs.gprs, 128);
@@ -2398,8 +2504,9 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2398 &vcpu->run->s.regs.fpc, 4); 2504 &vcpu->run->s.regs.fpc, 4);
2399 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 2505 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2400 &vcpu->arch.sie_block->todpr, 4); 2506 &vcpu->arch.sie_block->todpr, 4);
2507 cputm = kvm_s390_get_cpu_timer(vcpu);
2401 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 2508 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2402 &vcpu->arch.sie_block->cputm, 8); 2509 &cputm, 8);
2403 clkcomp = vcpu->arch.sie_block->ckc >> 8; 2510 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2404 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 2511 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2405 &clkcomp, 8); 2512 &clkcomp, 8);
@@ -2605,7 +2712,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2605 switch (mop->op) { 2712 switch (mop->op) {
2606 case KVM_S390_MEMOP_LOGICAL_READ: 2713 case KVM_S390_MEMOP_LOGICAL_READ:
2607 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 2714 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2608 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false); 2715 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2716 mop->size, GACC_FETCH);
2609 break; 2717 break;
2610 } 2718 }
2611 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 2719 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
@@ -2616,7 +2724,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2616 break; 2724 break;
2617 case KVM_S390_MEMOP_LOGICAL_WRITE: 2725 case KVM_S390_MEMOP_LOGICAL_WRITE:
2618 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 2726 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2619 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true); 2727 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2728 mop->size, GACC_STORE);
2620 break; 2729 break;
2621 } 2730 }
2622 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 2731 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index df1abada1f36..8621ab00ec8e 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -19,6 +19,7 @@
19#include <linux/kvm.h> 19#include <linux/kvm.h>
20#include <linux/kvm_host.h> 20#include <linux/kvm_host.h>
21#include <asm/facility.h> 21#include <asm/facility.h>
22#include <asm/processor.h>
22 23
23typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); 24typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
24 25
@@ -53,6 +54,11 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
53 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED; 54 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
54} 55}
55 56
57static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
58{
59 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_WAIT;
60}
61
56static inline int kvm_is_ucontrol(struct kvm *kvm) 62static inline int kvm_is_ucontrol(struct kvm *kvm)
57{ 63{
58#ifdef CONFIG_KVM_S390_UCONTROL 64#ifdef CONFIG_KVM_S390_UCONTROL
@@ -154,8 +160,8 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
154/* test availability of facility in a kvm instance */ 160/* test availability of facility in a kvm instance */
155static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr) 161static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr)
156{ 162{
157 return __test_facility(nr, kvm->arch.model.fac->mask) && 163 return __test_facility(nr, kvm->arch.model.fac_mask) &&
158 __test_facility(nr, kvm->arch.model.fac->list); 164 __test_facility(nr, kvm->arch.model.fac_list);
159} 165}
160 166
161static inline int set_kvm_facility(u64 *fac_list, unsigned long nr) 167static inline int set_kvm_facility(u64 *fac_list, unsigned long nr)
@@ -212,8 +218,22 @@ int kvm_s390_reinject_io_int(struct kvm *kvm,
212int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); 218int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
213 219
214/* implemented in intercept.c */ 220/* implemented in intercept.c */
215void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc); 221u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu);
216int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); 222int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
223static inline void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilen)
224{
225 struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
226
227 sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilen);
228}
229static inline void kvm_s390_forward_psw(struct kvm_vcpu *vcpu, int ilen)
230{
231 kvm_s390_rewind_psw(vcpu, -ilen);
232}
233static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu)
234{
235 kvm_s390_rewind_psw(vcpu, kvm_s390_get_ilen(vcpu));
236}
217 237
218/* implemented in priv.c */ 238/* implemented in priv.c */
219int is_valid_psw(psw_t *psw); 239int is_valid_psw(psw_t *psw);
@@ -248,6 +268,8 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
248void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); 268void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
249unsigned long kvm_s390_fac_list_mask_size(void); 269unsigned long kvm_s390_fac_list_mask_size(void);
250extern unsigned long kvm_s390_fac_list_mask[]; 270extern unsigned long kvm_s390_fac_list_mask[];
271void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
272__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
251 273
252/* implemented in diag.c */ 274/* implemented in diag.c */
253int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); 275int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index ed74e86d9b9e..f218ccf016c8 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -173,7 +173,7 @@ static int handle_skey(struct kvm_vcpu *vcpu)
173 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 173 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
174 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 174 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
175 175
176 kvm_s390_rewind_psw(vcpu, 4); 176 kvm_s390_retry_instr(vcpu);
177 VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); 177 VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
178 return 0; 178 return 0;
179} 179}
@@ -184,7 +184,7 @@ static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
184 if (psw_bits(vcpu->arch.sie_block->gpsw).p) 184 if (psw_bits(vcpu->arch.sie_block->gpsw).p)
185 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 185 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
186 wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu)); 186 wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
187 kvm_s390_rewind_psw(vcpu, 4); 187 kvm_s390_retry_instr(vcpu);
188 VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation"); 188 VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
189 return 0; 189 return 0;
190} 190}
@@ -354,7 +354,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
354 * We need to shift the lower 32 facility bits (bit 0-31) from a u64 354 * We need to shift the lower 32 facility bits (bit 0-31) from a u64
355 * into a u32 memory representation. They will remain bits 0-31. 355 * into a u32 memory representation. They will remain bits 0-31.
356 */ 356 */
357 fac = *vcpu->kvm->arch.model.fac->list >> 32; 357 fac = *vcpu->kvm->arch.model.fac_list >> 32;
358 rc = write_guest_lc(vcpu, offsetof(struct lowcore, stfl_fac_list), 358 rc = write_guest_lc(vcpu, offsetof(struct lowcore, stfl_fac_list),
359 &fac, sizeof(fac)); 359 &fac, sizeof(fac));
360 if (rc) 360 if (rc)
@@ -759,8 +759,8 @@ static int handle_essa(struct kvm_vcpu *vcpu)
759 if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6) 759 if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6)
760 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 760 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
761 761
762 /* Rewind PSW to repeat the ESSA instruction */ 762 /* Retry the ESSA instruction */
763 kvm_s390_rewind_psw(vcpu, 4); 763 kvm_s390_retry_instr(vcpu);
764 vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ 764 vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
765 cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo); 765 cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
766 down_read(&gmap->mm->mmap_sem); 766 down_read(&gmap->mm->mmap_sem);
@@ -981,11 +981,12 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
981 return -EOPNOTSUPP; 981 return -EOPNOTSUPP;
982 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) 982 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
983 ipte_lock(vcpu); 983 ipte_lock(vcpu);
984 ret = guest_translate_address(vcpu, address1, ar, &gpa, 1); 984 ret = guest_translate_address(vcpu, address1, ar, &gpa, GACC_STORE);
985 if (ret == PGM_PROTECTION) { 985 if (ret == PGM_PROTECTION) {
986 /* Write protected? Try again with read-only... */ 986 /* Write protected? Try again with read-only... */
987 cc = 1; 987 cc = 1;
988 ret = guest_translate_address(vcpu, address1, ar, &gpa, 0); 988 ret = guest_translate_address(vcpu, address1, ar, &gpa,
989 GACC_FETCH);
989 } 990 }
990 if (ret) { 991 if (ret) {
991 if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) { 992 if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) {
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 44adbb819041..01c8b501cb6d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -32,6 +32,7 @@
32#include <asm/mtrr.h> 32#include <asm/mtrr.h>
33#include <asm/msr-index.h> 33#include <asm/msr-index.h>
34#include <asm/asm.h> 34#include <asm/asm.h>
35#include <asm/kvm_page_track.h>
35 36
36#define KVM_MAX_VCPUS 255 37#define KVM_MAX_VCPUS 255
37#define KVM_SOFT_MAX_VCPUS 160 38#define KVM_SOFT_MAX_VCPUS 160
@@ -214,6 +215,14 @@ struct kvm_mmu_memory_cache {
214 void *objects[KVM_NR_MEM_OBJS]; 215 void *objects[KVM_NR_MEM_OBJS];
215}; 216};
216 217
218/*
219 * the pages used as guest page table on soft mmu are tracked by
220 * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used
221 * by indirect shadow page can not be more than 15 bits.
222 *
223 * Currently, we used 14 bits that are @level, @cr4_pae, @quadrant, @access,
224 * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp.
225 */
217union kvm_mmu_page_role { 226union kvm_mmu_page_role {
218 unsigned word; 227 unsigned word;
219 struct { 228 struct {
@@ -276,7 +285,7 @@ struct kvm_mmu_page {
276#endif 285#endif
277 286
278 /* Number of writes since the last time traversal visited this page. */ 287 /* Number of writes since the last time traversal visited this page. */
279 int write_flooding_count; 288 atomic_t write_flooding_count;
280}; 289};
281 290
282struct kvm_pio_request { 291struct kvm_pio_request {
@@ -338,12 +347,8 @@ struct kvm_mmu {
338 347
339 struct rsvd_bits_validate guest_rsvd_check; 348 struct rsvd_bits_validate guest_rsvd_check;
340 349
341 /* 350 /* Can have large pages at levels 2..last_nonleaf_level-1. */
342 * Bitmap: bit set = last pte in walk 351 u8 last_nonleaf_level;
343 * index[0:1]: level (zero-based)
344 * index[2]: pte.ps
345 */
346 u8 last_pte_bitmap;
347 352
348 bool nx; 353 bool nx;
349 354
@@ -498,7 +503,6 @@ struct kvm_vcpu_arch {
498 struct kvm_mmu_memory_cache mmu_page_header_cache; 503 struct kvm_mmu_memory_cache mmu_page_header_cache;
499 504
500 struct fpu guest_fpu; 505 struct fpu guest_fpu;
501 bool eager_fpu;
502 u64 xcr0; 506 u64 xcr0;
503 u64 guest_supported_xcr0; 507 u64 guest_supported_xcr0;
504 u32 guest_xstate_size; 508 u32 guest_xstate_size;
@@ -644,12 +648,13 @@ struct kvm_vcpu_arch {
644}; 648};
645 649
646struct kvm_lpage_info { 650struct kvm_lpage_info {
647 int write_count; 651 int disallow_lpage;
648}; 652};
649 653
650struct kvm_arch_memory_slot { 654struct kvm_arch_memory_slot {
651 struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES]; 655 struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
652 struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; 656 struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
657 unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
653}; 658};
654 659
655/* 660/*
@@ -694,6 +699,8 @@ struct kvm_arch {
694 */ 699 */
695 struct list_head active_mmu_pages; 700 struct list_head active_mmu_pages;
696 struct list_head zapped_obsolete_pages; 701 struct list_head zapped_obsolete_pages;
702 struct kvm_page_track_notifier_node mmu_sp_tracker;
703 struct kvm_page_track_notifier_head track_notifier_head;
697 704
698 struct list_head assigned_dev_head; 705 struct list_head assigned_dev_head;
699 struct iommu_domain *iommu_domain; 706 struct iommu_domain *iommu_domain;
@@ -754,6 +761,8 @@ struct kvm_arch {
754 761
755 bool irqchip_split; 762 bool irqchip_split;
756 u8 nr_reserved_ioapic_pins; 763 u8 nr_reserved_ioapic_pins;
764
765 bool disabled_lapic_found;
757}; 766};
758 767
759struct kvm_vm_stat { 768struct kvm_vm_stat {
@@ -988,6 +997,8 @@ void kvm_mmu_module_exit(void);
988void kvm_mmu_destroy(struct kvm_vcpu *vcpu); 997void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
989int kvm_mmu_create(struct kvm_vcpu *vcpu); 998int kvm_mmu_create(struct kvm_vcpu *vcpu);
990void kvm_mmu_setup(struct kvm_vcpu *vcpu); 999void kvm_mmu_setup(struct kvm_vcpu *vcpu);
1000void kvm_mmu_init_vm(struct kvm *kvm);
1001void kvm_mmu_uninit_vm(struct kvm *kvm);
991void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 1002void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
992 u64 dirty_mask, u64 nx_mask, u64 x_mask); 1003 u64 dirty_mask, u64 nx_mask, u64 x_mask);
993 1004
@@ -1127,8 +1138,6 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
1127 1138
1128void kvm_inject_nmi(struct kvm_vcpu *vcpu); 1139void kvm_inject_nmi(struct kvm_vcpu *vcpu);
1129 1140
1130void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1131 const u8 *new, int bytes);
1132int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); 1141int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
1133int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); 1142int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
1134void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); 1143void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
new file mode 100644
index 000000000000..c2b8d24a235c
--- /dev/null
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -0,0 +1,61 @@
1#ifndef _ASM_X86_KVM_PAGE_TRACK_H
2#define _ASM_X86_KVM_PAGE_TRACK_H
3
4enum kvm_page_track_mode {
5 KVM_PAGE_TRACK_WRITE,
6 KVM_PAGE_TRACK_MAX,
7};
8
9/*
10 * The notifier represented by @kvm_page_track_notifier_node is linked into
11 * the head which will be notified when guest is triggering the track event.
12 *
13 * Write access on the head is protected by kvm->mmu_lock, read access
14 * is protected by track_srcu.
15 */
16struct kvm_page_track_notifier_head {
17 struct srcu_struct track_srcu;
18 struct hlist_head track_notifier_list;
19};
20
21struct kvm_page_track_notifier_node {
22 struct hlist_node node;
23
24 /*
25 * It is called when guest is writing the write-tracked page
26 * and write emulation is finished at that time.
27 *
28 * @vcpu: the vcpu where the write access happened.
29 * @gpa: the physical address written by guest.
30 * @new: the data was written to the address.
31 * @bytes: the written length.
32 */
33 void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
34 int bytes);
35};
36
37void kvm_page_track_init(struct kvm *kvm);
38
39void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
40 struct kvm_memory_slot *dont);
41int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
42 unsigned long npages);
43
44void kvm_slot_page_track_add_page(struct kvm *kvm,
45 struct kvm_memory_slot *slot, gfn_t gfn,
46 enum kvm_page_track_mode mode);
47void kvm_slot_page_track_remove_page(struct kvm *kvm,
48 struct kvm_memory_slot *slot, gfn_t gfn,
49 enum kvm_page_track_mode mode);
50bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
51 enum kvm_page_track_mode mode);
52
53void
54kvm_page_track_register_notifier(struct kvm *kvm,
55 struct kvm_page_track_notifier_node *n);
56void
57kvm_page_track_unregister_notifier(struct kvm *kvm,
58 struct kvm_page_track_notifier_node *n);
59void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
60 int bytes);
61#endif
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 7956412d09bd..9b1a91834ac8 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -226,7 +226,9 @@
226 (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) 226 (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
227 227
228/* Declare the various hypercall operations. */ 228/* Declare the various hypercall operations. */
229#define HV_X64_HV_NOTIFY_LONG_SPIN_WAIT 0x0008 229#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
230#define HVCALL_POST_MESSAGE 0x005c
231#define HVCALL_SIGNAL_EVENT 0x005d
230 232
231#define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001 233#define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001
232#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12 234#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index a1ff508bb423..464fa477afbf 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -13,9 +13,10 @@ kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
13 13
14kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ 14kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
15 i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \ 15 i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
16 hyperv.o 16 hyperv.o page_track.o
17 17
18kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o 18kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o
19
19kvm-intel-y += vmx.o pmu_intel.o 20kvm-intel-y += vmx.o pmu_intel.o
20kvm-amd-y += svm.o pmu_amd.o 21kvm-amd-y += svm.o pmu_amd.o
21 22
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
index 9dc091acd5fb..308b8597c691 100644
--- a/arch/x86/kvm/assigned-dev.c
+++ b/arch/x86/kvm/assigned-dev.c
@@ -51,11 +51,9 @@ struct kvm_assigned_dev_kernel {
51static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, 51static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
52 int assigned_dev_id) 52 int assigned_dev_id)
53{ 53{
54 struct list_head *ptr;
55 struct kvm_assigned_dev_kernel *match; 54 struct kvm_assigned_dev_kernel *match;
56 55
57 list_for_each(ptr, head) { 56 list_for_each_entry(match, head, list) {
58 match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
59 if (match->assigned_dev_id == assigned_dev_id) 57 if (match->assigned_dev_id == assigned_dev_id)
60 return match; 58 return match;
61 } 59 }
@@ -373,14 +371,10 @@ static void kvm_free_assigned_device(struct kvm *kvm,
373 371
374void kvm_free_all_assigned_devices(struct kvm *kvm) 372void kvm_free_all_assigned_devices(struct kvm *kvm)
375{ 373{
376 struct list_head *ptr, *ptr2; 374 struct kvm_assigned_dev_kernel *assigned_dev, *tmp;
377 struct kvm_assigned_dev_kernel *assigned_dev;
378
379 list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
380 assigned_dev = list_entry(ptr,
381 struct kvm_assigned_dev_kernel,
382 list);
383 375
376 list_for_each_entry_safe(assigned_dev, tmp,
377 &kvm->arch.assigned_dev_head, list) {
384 kvm_free_assigned_device(kvm, assigned_dev); 378 kvm_free_assigned_device(kvm, assigned_dev);
385 } 379 }
386} 380}
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 6525e926f566..0029644bf09c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -46,11 +46,18 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted)
46 return ret; 46 return ret;
47} 47}
48 48
49bool kvm_mpx_supported(void)
50{
51 return ((host_xcr0 & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR))
52 && kvm_x86_ops->mpx_supported());
53}
54EXPORT_SYMBOL_GPL(kvm_mpx_supported);
55
49u64 kvm_supported_xcr0(void) 56u64 kvm_supported_xcr0(void)
50{ 57{
51 u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0; 58 u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
52 59
53 if (!kvm_x86_ops->mpx_supported()) 60 if (!kvm_mpx_supported())
54 xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); 61 xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
55 62
56 return xcr0; 63 return xcr0;
@@ -97,8 +104,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
97 if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) 104 if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
98 best->ebx = xstate_required_size(vcpu->arch.xcr0, true); 105 best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
99 106
100 vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu); 107 if (use_eager_fpu())
101 if (vcpu->arch.eager_fpu)
102 kvm_x86_ops->fpu_activate(vcpu); 108 kvm_x86_ops->fpu_activate(vcpu);
103 109
104 /* 110 /*
@@ -295,7 +301,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
295#endif 301#endif
296 unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; 302 unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
297 unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; 303 unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
298 unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0; 304 unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
299 unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0; 305 unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
300 306
301 /* cpuid 1.edx */ 307 /* cpuid 1.edx */
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index c8eda1498121..66a6581724ad 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -5,6 +5,7 @@
5#include <asm/cpu.h> 5#include <asm/cpu.h>
6 6
7int kvm_update_cpuid(struct kvm_vcpu *vcpu); 7int kvm_update_cpuid(struct kvm_vcpu *vcpu);
8bool kvm_mpx_supported(void);
8struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, 9struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
9 u32 function, u32 index); 10 u32 function, u32 index);
10int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, 11int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
@@ -135,14 +136,6 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
135 return best && (best->ebx & bit(X86_FEATURE_RTM)); 136 return best && (best->ebx & bit(X86_FEATURE_RTM));
136} 137}
137 138
138static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
139{
140 struct kvm_cpuid_entry2 *best;
141
142 best = kvm_find_cpuid_entry(vcpu, 7, 0);
143 return best && (best->ebx & bit(X86_FEATURE_MPX));
144}
145
146static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu) 139static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu)
147{ 140{
148 struct kvm_cpuid_entry2 *best; 141 struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index c58ba67175ac..5ff3485acb60 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1043,6 +1043,27 @@ bool kvm_hv_hypercall_enabled(struct kvm *kvm)
1043 return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE; 1043 return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
1044} 1044}
1045 1045
1046static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
1047{
1048 bool longmode;
1049
1050 longmode = is_64_bit_mode(vcpu);
1051 if (longmode)
1052 kvm_register_write(vcpu, VCPU_REGS_RAX, result);
1053 else {
1054 kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32);
1055 kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0xffffffff);
1056 }
1057}
1058
1059static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
1060{
1061 struct kvm_run *run = vcpu->run;
1062
1063 kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result);
1064 return 1;
1065}
1066
1046int kvm_hv_hypercall(struct kvm_vcpu *vcpu) 1067int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
1047{ 1068{
1048 u64 param, ingpa, outgpa, ret; 1069 u64 param, ingpa, outgpa, ret;
@@ -1055,7 +1076,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
1055 */ 1076 */
1056 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { 1077 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
1057 kvm_queue_exception(vcpu, UD_VECTOR); 1078 kvm_queue_exception(vcpu, UD_VECTOR);
1058 return 0; 1079 return 1;
1059 } 1080 }
1060 1081
1061 longmode = is_64_bit_mode(vcpu); 1082 longmode = is_64_bit_mode(vcpu);
@@ -1083,22 +1104,33 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
1083 1104
1084 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); 1105 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
1085 1106
1107 /* Hypercall continuation is not supported yet */
1108 if (rep_cnt || rep_idx) {
1109 res = HV_STATUS_INVALID_HYPERCALL_CODE;
1110 goto set_result;
1111 }
1112
1086 switch (code) { 1113 switch (code) {
1087 case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: 1114 case HVCALL_NOTIFY_LONG_SPIN_WAIT:
1088 kvm_vcpu_on_spin(vcpu); 1115 kvm_vcpu_on_spin(vcpu);
1089 break; 1116 break;
1117 case HVCALL_POST_MESSAGE:
1118 case HVCALL_SIGNAL_EVENT:
1119 vcpu->run->exit_reason = KVM_EXIT_HYPERV;
1120 vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
1121 vcpu->run->hyperv.u.hcall.input = param;
1122 vcpu->run->hyperv.u.hcall.params[0] = ingpa;
1123 vcpu->run->hyperv.u.hcall.params[1] = outgpa;
1124 vcpu->arch.complete_userspace_io =
1125 kvm_hv_hypercall_complete_userspace;
1126 return 0;
1090 default: 1127 default:
1091 res = HV_STATUS_INVALID_HYPERCALL_CODE; 1128 res = HV_STATUS_INVALID_HYPERCALL_CODE;
1092 break; 1129 break;
1093 } 1130 }
1094 1131
1132set_result:
1095 ret = res | (((u64)rep_done & 0xfff) << 32); 1133 ret = res | (((u64)rep_done & 0xfff) << 32);
1096 if (longmode) { 1134 kvm_hv_hypercall_set_result(vcpu, ret);
1097 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
1098 } else {
1099 kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
1100 kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
1101 }
1102
1103 return 1; 1135 return 1;
1104} 1136}
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index b0ea42b78ccd..a4bf5b45d65a 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -51,32 +51,9 @@
51#define RW_STATE_WORD0 3 51#define RW_STATE_WORD0 3
52#define RW_STATE_WORD1 4 52#define RW_STATE_WORD1 4
53 53
54/* Compute with 96 bit intermediate result: (a*b)/c */ 54static void pit_set_gate(struct kvm_pit *pit, int channel, u32 val)
55static u64 muldiv64(u64 a, u32 b, u32 c)
56{ 55{
57 union { 56 struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel];
58 u64 ll;
59 struct {
60 u32 low, high;
61 } l;
62 } u, res;
63 u64 rl, rh;
64
65 u.ll = a;
66 rl = (u64)u.l.low * (u64)b;
67 rh = (u64)u.l.high * (u64)b;
68 rh += (rl >> 32);
69 res.l.high = div64_u64(rh, c);
70 res.l.low = div64_u64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
71 return res.ll;
72}
73
74static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
75{
76 struct kvm_kpit_channel_state *c =
77 &kvm->arch.vpit->pit_state.channels[channel];
78
79 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
80 57
81 switch (c->mode) { 58 switch (c->mode) {
82 default: 59 default:
@@ -97,18 +74,16 @@ static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
97 c->gate = val; 74 c->gate = val;
98} 75}
99 76
100static int pit_get_gate(struct kvm *kvm, int channel) 77static int pit_get_gate(struct kvm_pit *pit, int channel)
101{ 78{
102 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); 79 return pit->pit_state.channels[channel].gate;
103
104 return kvm->arch.vpit->pit_state.channels[channel].gate;
105} 80}
106 81
107static s64 __kpit_elapsed(struct kvm *kvm) 82static s64 __kpit_elapsed(struct kvm_pit *pit)
108{ 83{
109 s64 elapsed; 84 s64 elapsed;
110 ktime_t remaining; 85 ktime_t remaining;
111 struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; 86 struct kvm_kpit_state *ps = &pit->pit_state;
112 87
113 if (!ps->period) 88 if (!ps->period)
114 return 0; 89 return 0;
@@ -128,26 +103,23 @@ static s64 __kpit_elapsed(struct kvm *kvm)
128 return elapsed; 103 return elapsed;
129} 104}
130 105
131static s64 kpit_elapsed(struct kvm *kvm, struct kvm_kpit_channel_state *c, 106static s64 kpit_elapsed(struct kvm_pit *pit, struct kvm_kpit_channel_state *c,
132 int channel) 107 int channel)
133{ 108{
134 if (channel == 0) 109 if (channel == 0)
135 return __kpit_elapsed(kvm); 110 return __kpit_elapsed(pit);
136 111
137 return ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); 112 return ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
138} 113}
139 114
140static int pit_get_count(struct kvm *kvm, int channel) 115static int pit_get_count(struct kvm_pit *pit, int channel)
141{ 116{
142 struct kvm_kpit_channel_state *c = 117 struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel];
143 &kvm->arch.vpit->pit_state.channels[channel];
144 s64 d, t; 118 s64 d, t;
145 int counter; 119 int counter;
146 120
147 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); 121 t = kpit_elapsed(pit, c, channel);
148 122 d = mul_u64_u32_div(t, KVM_PIT_FREQ, NSEC_PER_SEC);
149 t = kpit_elapsed(kvm, c, channel);
150 d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
151 123
152 switch (c->mode) { 124 switch (c->mode) {
153 case 0: 125 case 0:
@@ -167,17 +139,14 @@ static int pit_get_count(struct kvm *kvm, int channel)
167 return counter; 139 return counter;
168} 140}
169 141
170static int pit_get_out(struct kvm *kvm, int channel) 142static int pit_get_out(struct kvm_pit *pit, int channel)
171{ 143{
172 struct kvm_kpit_channel_state *c = 144 struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel];
173 &kvm->arch.vpit->pit_state.channels[channel];
174 s64 d, t; 145 s64 d, t;
175 int out; 146 int out;
176 147
177 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); 148 t = kpit_elapsed(pit, c, channel);
178 149 d = mul_u64_u32_div(t, KVM_PIT_FREQ, NSEC_PER_SEC);
179 t = kpit_elapsed(kvm, c, channel);
180 d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
181 150
182 switch (c->mode) { 151 switch (c->mode) {
183 default: 152 default:
@@ -202,29 +171,23 @@ static int pit_get_out(struct kvm *kvm, int channel)
202 return out; 171 return out;
203} 172}
204 173
205static void pit_latch_count(struct kvm *kvm, int channel) 174static void pit_latch_count(struct kvm_pit *pit, int channel)
206{ 175{
207 struct kvm_kpit_channel_state *c = 176 struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel];
208 &kvm->arch.vpit->pit_state.channels[channel];
209
210 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
211 177
212 if (!c->count_latched) { 178 if (!c->count_latched) {
213 c->latched_count = pit_get_count(kvm, channel); 179 c->latched_count = pit_get_count(pit, channel);
214 c->count_latched = c->rw_mode; 180 c->count_latched = c->rw_mode;
215 } 181 }
216} 182}
217 183
218static void pit_latch_status(struct kvm *kvm, int channel) 184static void pit_latch_status(struct kvm_pit *pit, int channel)
219{ 185{
220 struct kvm_kpit_channel_state *c = 186 struct kvm_kpit_channel_state *c = &pit->pit_state.channels[channel];
221 &kvm->arch.vpit->pit_state.channels[channel];
222
223 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
224 187
225 if (!c->status_latched) { 188 if (!c->status_latched) {
226 /* TODO: Return NULL COUNT (bit 6). */ 189 /* TODO: Return NULL COUNT (bit 6). */
227 c->status = ((pit_get_out(kvm, channel) << 7) | 190 c->status = ((pit_get_out(pit, channel) << 7) |
228 (c->rw_mode << 4) | 191 (c->rw_mode << 4) |
229 (c->mode << 1) | 192 (c->mode << 1) |
230 c->bcd); 193 c->bcd);
@@ -232,26 +195,24 @@ static void pit_latch_status(struct kvm *kvm, int channel)
232 } 195 }
233} 196}
234 197
198static inline struct kvm_pit *pit_state_to_pit(struct kvm_kpit_state *ps)
199{
200 return container_of(ps, struct kvm_pit, pit_state);
201}
202
235static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) 203static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
236{ 204{
237 struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, 205 struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
238 irq_ack_notifier); 206 irq_ack_notifier);
239 int value; 207 struct kvm_pit *pit = pit_state_to_pit(ps);
240 208
241 spin_lock(&ps->inject_lock); 209 atomic_set(&ps->irq_ack, 1);
242 value = atomic_dec_return(&ps->pending); 210 /* irq_ack should be set before pending is read. Order accesses with
243 if (value < 0) 211 * inc(pending) in pit_timer_fn and xchg(irq_ack, 0) in pit_do_work.
244 /* spurious acks can be generated if, for example, the 212 */
245 * PIC is being reset. Handle it gracefully here 213 smp_mb();
246 */ 214 if (atomic_dec_if_positive(&ps->pending) > 0)
247 atomic_inc(&ps->pending); 215 queue_kthread_work(&pit->worker, &pit->expired);
248 else if (value > 0)
249 /* in this case, we had multiple outstanding pit interrupts
250 * that we needed to inject. Reinject
251 */
252 queue_kthread_work(&ps->pit->worker, &ps->pit->expired);
253 ps->irq_ack = 1;
254 spin_unlock(&ps->inject_lock);
255} 216}
256 217
257void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) 218void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
@@ -282,45 +243,36 @@ static void pit_do_work(struct kthread_work *work)
282 struct kvm_vcpu *vcpu; 243 struct kvm_vcpu *vcpu;
283 int i; 244 int i;
284 struct kvm_kpit_state *ps = &pit->pit_state; 245 struct kvm_kpit_state *ps = &pit->pit_state;
285 int inject = 0;
286 246
287 /* Try to inject pending interrupts when 247 if (atomic_read(&ps->reinject) && !atomic_xchg(&ps->irq_ack, 0))
288 * last one has been acked. 248 return;
249
250 kvm_set_irq(kvm, pit->irq_source_id, 0, 1, false);
251 kvm_set_irq(kvm, pit->irq_source_id, 0, 0, false);
252
253 /*
254 * Provides NMI watchdog support via Virtual Wire mode.
255 * The route is: PIT -> LVT0 in NMI mode.
256 *
257 * Note: Our Virtual Wire implementation does not follow
258 * the MP specification. We propagate a PIT interrupt to all
259 * VCPUs and only when LVT0 is in NMI mode. The interrupt can
260 * also be simultaneously delivered through PIC and IOAPIC.
289 */ 261 */
290 spin_lock(&ps->inject_lock); 262 if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0)
291 if (ps->irq_ack) { 263 kvm_for_each_vcpu(i, vcpu, kvm)
292 ps->irq_ack = 0; 264 kvm_apic_nmi_wd_deliver(vcpu);
293 inject = 1;
294 }
295 spin_unlock(&ps->inject_lock);
296 if (inject) {
297 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false);
298 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false);
299
300 /*
301 * Provides NMI watchdog support via Virtual Wire mode.
302 * The route is: PIT -> PIC -> LVT0 in NMI mode.
303 *
304 * Note: Our Virtual Wire implementation is simplified, only
305 * propagating PIT interrupts to all VCPUs when they have set
306 * LVT0 to NMI delivery. Other PIC interrupts are just sent to
307 * VCPU0, and only if its LVT0 is in EXTINT mode.
308 */
309 if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0)
310 kvm_for_each_vcpu(i, vcpu, kvm)
311 kvm_apic_nmi_wd_deliver(vcpu);
312 }
313} 265}
314 266
315static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) 267static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
316{ 268{
317 struct kvm_kpit_state *ps = container_of(data, struct kvm_kpit_state, timer); 269 struct kvm_kpit_state *ps = container_of(data, struct kvm_kpit_state, timer);
318 struct kvm_pit *pt = ps->kvm->arch.vpit; 270 struct kvm_pit *pt = pit_state_to_pit(ps);
319 271
320 if (ps->reinject || !atomic_read(&ps->pending)) { 272 if (atomic_read(&ps->reinject))
321 atomic_inc(&ps->pending); 273 atomic_inc(&ps->pending);
322 queue_kthread_work(&pt->worker, &pt->expired); 274
323 } 275 queue_kthread_work(&pt->worker, &pt->expired);
324 276
325 if (ps->is_periodic) { 277 if (ps->is_periodic) {
326 hrtimer_add_expires_ns(&ps->timer, ps->period); 278 hrtimer_add_expires_ns(&ps->timer, ps->period);
@@ -329,30 +281,54 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
329 return HRTIMER_NORESTART; 281 return HRTIMER_NORESTART;
330} 282}
331 283
332static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) 284static inline void kvm_pit_reset_reinject(struct kvm_pit *pit)
333{ 285{
334 struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; 286 atomic_set(&pit->pit_state.pending, 0);
287 atomic_set(&pit->pit_state.irq_ack, 1);
288}
289
290void kvm_pit_set_reinject(struct kvm_pit *pit, bool reinject)
291{
292 struct kvm_kpit_state *ps = &pit->pit_state;
293 struct kvm *kvm = pit->kvm;
294
295 if (atomic_read(&ps->reinject) == reinject)
296 return;
297
298 if (reinject) {
299 /* The initial state is preserved while ps->reinject == 0. */
300 kvm_pit_reset_reinject(pit);
301 kvm_register_irq_ack_notifier(kvm, &ps->irq_ack_notifier);
302 kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
303 } else {
304 kvm_unregister_irq_ack_notifier(kvm, &ps->irq_ack_notifier);
305 kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
306 }
307
308 atomic_set(&ps->reinject, reinject);
309}
310
311static void create_pit_timer(struct kvm_pit *pit, u32 val, int is_period)
312{
313 struct kvm_kpit_state *ps = &pit->pit_state;
314 struct kvm *kvm = pit->kvm;
335 s64 interval; 315 s64 interval;
336 316
337 if (!ioapic_in_kernel(kvm) || 317 if (!ioapic_in_kernel(kvm) ||
338 ps->flags & KVM_PIT_FLAGS_HPET_LEGACY) 318 ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
339 return; 319 return;
340 320
341 interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); 321 interval = mul_u64_u32_div(val, NSEC_PER_SEC, KVM_PIT_FREQ);
342 322
343 pr_debug("create pit timer, interval is %llu nsec\n", interval); 323 pr_debug("create pit timer, interval is %llu nsec\n", interval);
344 324
345 /* TODO The new value only affected after the retriggered */ 325 /* TODO The new value only affected after the retriggered */
346 hrtimer_cancel(&ps->timer); 326 hrtimer_cancel(&ps->timer);
347 flush_kthread_work(&ps->pit->expired); 327 flush_kthread_work(&pit->expired);
348 ps->period = interval; 328 ps->period = interval;
349 ps->is_periodic = is_period; 329 ps->is_periodic = is_period;
350 330
351 ps->timer.function = pit_timer_fn; 331 kvm_pit_reset_reinject(pit);
352 ps->kvm = ps->pit->kvm;
353
354 atomic_set(&ps->pending, 0);
355 ps->irq_ack = 1;
356 332
357 /* 333 /*
358 * Do not allow the guest to program periodic timers with small 334 * Do not allow the guest to program periodic timers with small
@@ -375,11 +351,9 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
375 HRTIMER_MODE_ABS); 351 HRTIMER_MODE_ABS);
376} 352}
377 353
378static void pit_load_count(struct kvm *kvm, int channel, u32 val) 354static void pit_load_count(struct kvm_pit *pit, int channel, u32 val)
379{ 355{
380 struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; 356 struct kvm_kpit_state *ps = &pit->pit_state;
381
382 WARN_ON(!mutex_is_locked(&ps->lock));
383 357
384 pr_debug("load_count val is %d, channel is %d\n", val, channel); 358 pr_debug("load_count val is %d, channel is %d\n", val, channel);
385 359
@@ -404,29 +378,33 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
404 case 1: 378 case 1:
405 /* FIXME: enhance mode 4 precision */ 379 /* FIXME: enhance mode 4 precision */
406 case 4: 380 case 4:
407 create_pit_timer(kvm, val, 0); 381 create_pit_timer(pit, val, 0);
408 break; 382 break;
409 case 2: 383 case 2:
410 case 3: 384 case 3:
411 create_pit_timer(kvm, val, 1); 385 create_pit_timer(pit, val, 1);
412 break; 386 break;
413 default: 387 default:
414 destroy_pit_timer(kvm->arch.vpit); 388 destroy_pit_timer(pit);
415 } 389 }
416} 390}
417 391
418void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start) 392void kvm_pit_load_count(struct kvm_pit *pit, int channel, u32 val,
393 int hpet_legacy_start)
419{ 394{
420 u8 saved_mode; 395 u8 saved_mode;
396
397 WARN_ON_ONCE(!mutex_is_locked(&pit->pit_state.lock));
398
421 if (hpet_legacy_start) { 399 if (hpet_legacy_start) {
422 /* save existing mode for later reenablement */ 400 /* save existing mode for later reenablement */
423 WARN_ON(channel != 0); 401 WARN_ON(channel != 0);
424 saved_mode = kvm->arch.vpit->pit_state.channels[0].mode; 402 saved_mode = pit->pit_state.channels[0].mode;
425 kvm->arch.vpit->pit_state.channels[0].mode = 0xff; /* disable timer */ 403 pit->pit_state.channels[0].mode = 0xff; /* disable timer */
426 pit_load_count(kvm, channel, val); 404 pit_load_count(pit, channel, val);
427 kvm->arch.vpit->pit_state.channels[0].mode = saved_mode; 405 pit->pit_state.channels[0].mode = saved_mode;
428 } else { 406 } else {
429 pit_load_count(kvm, channel, val); 407 pit_load_count(pit, channel, val);
430 } 408 }
431} 409}
432 410
@@ -452,7 +430,6 @@ static int pit_ioport_write(struct kvm_vcpu *vcpu,
452{ 430{
453 struct kvm_pit *pit = dev_to_pit(this); 431 struct kvm_pit *pit = dev_to_pit(this);
454 struct kvm_kpit_state *pit_state = &pit->pit_state; 432 struct kvm_kpit_state *pit_state = &pit->pit_state;
455 struct kvm *kvm = pit->kvm;
456 int channel, access; 433 int channel, access;
457 struct kvm_kpit_channel_state *s; 434 struct kvm_kpit_channel_state *s;
458 u32 val = *(u32 *) data; 435 u32 val = *(u32 *) data;
@@ -476,9 +453,9 @@ static int pit_ioport_write(struct kvm_vcpu *vcpu,
476 s = &pit_state->channels[channel]; 453 s = &pit_state->channels[channel];
477 if (val & (2 << channel)) { 454 if (val & (2 << channel)) {
478 if (!(val & 0x20)) 455 if (!(val & 0x20))
479 pit_latch_count(kvm, channel); 456 pit_latch_count(pit, channel);
480 if (!(val & 0x10)) 457 if (!(val & 0x10))
481 pit_latch_status(kvm, channel); 458 pit_latch_status(pit, channel);
482 } 459 }
483 } 460 }
484 } else { 461 } else {
@@ -486,7 +463,7 @@ static int pit_ioport_write(struct kvm_vcpu *vcpu,
486 s = &pit_state->channels[channel]; 463 s = &pit_state->channels[channel];
487 access = (val >> 4) & KVM_PIT_CHANNEL_MASK; 464 access = (val >> 4) & KVM_PIT_CHANNEL_MASK;
488 if (access == 0) { 465 if (access == 0) {
489 pit_latch_count(kvm, channel); 466 pit_latch_count(pit, channel);
490 } else { 467 } else {
491 s->rw_mode = access; 468 s->rw_mode = access;
492 s->read_state = access; 469 s->read_state = access;
@@ -503,17 +480,17 @@ static int pit_ioport_write(struct kvm_vcpu *vcpu,
503 switch (s->write_state) { 480 switch (s->write_state) {
504 default: 481 default:
505 case RW_STATE_LSB: 482 case RW_STATE_LSB:
506 pit_load_count(kvm, addr, val); 483 pit_load_count(pit, addr, val);
507 break; 484 break;
508 case RW_STATE_MSB: 485 case RW_STATE_MSB:
509 pit_load_count(kvm, addr, val << 8); 486 pit_load_count(pit, addr, val << 8);
510 break; 487 break;
511 case RW_STATE_WORD0: 488 case RW_STATE_WORD0:
512 s->write_latch = val; 489 s->write_latch = val;
513 s->write_state = RW_STATE_WORD1; 490 s->write_state = RW_STATE_WORD1;
514 break; 491 break;
515 case RW_STATE_WORD1: 492 case RW_STATE_WORD1:
516 pit_load_count(kvm, addr, s->write_latch | (val << 8)); 493 pit_load_count(pit, addr, s->write_latch | (val << 8));
517 s->write_state = RW_STATE_WORD0; 494 s->write_state = RW_STATE_WORD0;
518 break; 495 break;
519 } 496 }
@@ -529,7 +506,6 @@ static int pit_ioport_read(struct kvm_vcpu *vcpu,
529{ 506{
530 struct kvm_pit *pit = dev_to_pit(this); 507 struct kvm_pit *pit = dev_to_pit(this);
531 struct kvm_kpit_state *pit_state = &pit->pit_state; 508 struct kvm_kpit_state *pit_state = &pit->pit_state;
532 struct kvm *kvm = pit->kvm;
533 int ret, count; 509 int ret, count;
534 struct kvm_kpit_channel_state *s; 510 struct kvm_kpit_channel_state *s;
535 if (!pit_in_range(addr)) 511 if (!pit_in_range(addr))
@@ -566,20 +542,20 @@ static int pit_ioport_read(struct kvm_vcpu *vcpu,
566 switch (s->read_state) { 542 switch (s->read_state) {
567 default: 543 default:
568 case RW_STATE_LSB: 544 case RW_STATE_LSB:
569 count = pit_get_count(kvm, addr); 545 count = pit_get_count(pit, addr);
570 ret = count & 0xff; 546 ret = count & 0xff;
571 break; 547 break;
572 case RW_STATE_MSB: 548 case RW_STATE_MSB:
573 count = pit_get_count(kvm, addr); 549 count = pit_get_count(pit, addr);
574 ret = (count >> 8) & 0xff; 550 ret = (count >> 8) & 0xff;
575 break; 551 break;
576 case RW_STATE_WORD0: 552 case RW_STATE_WORD0:
577 count = pit_get_count(kvm, addr); 553 count = pit_get_count(pit, addr);
578 ret = count & 0xff; 554 ret = count & 0xff;
579 s->read_state = RW_STATE_WORD1; 555 s->read_state = RW_STATE_WORD1;
580 break; 556 break;
581 case RW_STATE_WORD1: 557 case RW_STATE_WORD1:
582 count = pit_get_count(kvm, addr); 558 count = pit_get_count(pit, addr);
583 ret = (count >> 8) & 0xff; 559 ret = (count >> 8) & 0xff;
584 s->read_state = RW_STATE_WORD0; 560 s->read_state = RW_STATE_WORD0;
585 break; 561 break;
@@ -600,14 +576,13 @@ static int speaker_ioport_write(struct kvm_vcpu *vcpu,
600{ 576{
601 struct kvm_pit *pit = speaker_to_pit(this); 577 struct kvm_pit *pit = speaker_to_pit(this);
602 struct kvm_kpit_state *pit_state = &pit->pit_state; 578 struct kvm_kpit_state *pit_state = &pit->pit_state;
603 struct kvm *kvm = pit->kvm;
604 u32 val = *(u32 *) data; 579 u32 val = *(u32 *) data;
605 if (addr != KVM_SPEAKER_BASE_ADDRESS) 580 if (addr != KVM_SPEAKER_BASE_ADDRESS)
606 return -EOPNOTSUPP; 581 return -EOPNOTSUPP;
607 582
608 mutex_lock(&pit_state->lock); 583 mutex_lock(&pit_state->lock);
609 pit_state->speaker_data_on = (val >> 1) & 1; 584 pit_state->speaker_data_on = (val >> 1) & 1;
610 pit_set_gate(kvm, 2, val & 1); 585 pit_set_gate(pit, 2, val & 1);
611 mutex_unlock(&pit_state->lock); 586 mutex_unlock(&pit_state->lock);
612 return 0; 587 return 0;
613} 588}
@@ -618,7 +593,6 @@ static int speaker_ioport_read(struct kvm_vcpu *vcpu,
618{ 593{
619 struct kvm_pit *pit = speaker_to_pit(this); 594 struct kvm_pit *pit = speaker_to_pit(this);
620 struct kvm_kpit_state *pit_state = &pit->pit_state; 595 struct kvm_kpit_state *pit_state = &pit->pit_state;
621 struct kvm *kvm = pit->kvm;
622 unsigned int refresh_clock; 596 unsigned int refresh_clock;
623 int ret; 597 int ret;
624 if (addr != KVM_SPEAKER_BASE_ADDRESS) 598 if (addr != KVM_SPEAKER_BASE_ADDRESS)
@@ -628,8 +602,8 @@ static int speaker_ioport_read(struct kvm_vcpu *vcpu,
628 refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1; 602 refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
629 603
630 mutex_lock(&pit_state->lock); 604 mutex_lock(&pit_state->lock);
631 ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) | 605 ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(pit, 2) |
632 (pit_get_out(kvm, 2) << 5) | (refresh_clock << 4)); 606 (pit_get_out(pit, 2) << 5) | (refresh_clock << 4));
633 if (len > sizeof(ret)) 607 if (len > sizeof(ret))
634 len = sizeof(ret); 608 len = sizeof(ret);
635 memcpy(data, (char *)&ret, len); 609 memcpy(data, (char *)&ret, len);
@@ -637,33 +611,28 @@ static int speaker_ioport_read(struct kvm_vcpu *vcpu,
637 return 0; 611 return 0;
638} 612}
639 613
640void kvm_pit_reset(struct kvm_pit *pit) 614static void kvm_pit_reset(struct kvm_pit *pit)
641{ 615{
642 int i; 616 int i;
643 struct kvm_kpit_channel_state *c; 617 struct kvm_kpit_channel_state *c;
644 618
645 mutex_lock(&pit->pit_state.lock);
646 pit->pit_state.flags = 0; 619 pit->pit_state.flags = 0;
647 for (i = 0; i < 3; i++) { 620 for (i = 0; i < 3; i++) {
648 c = &pit->pit_state.channels[i]; 621 c = &pit->pit_state.channels[i];
649 c->mode = 0xff; 622 c->mode = 0xff;
650 c->gate = (i != 2); 623 c->gate = (i != 2);
651 pit_load_count(pit->kvm, i, 0); 624 pit_load_count(pit, i, 0);
652 } 625 }
653 mutex_unlock(&pit->pit_state.lock);
654 626
655 atomic_set(&pit->pit_state.pending, 0); 627 kvm_pit_reset_reinject(pit);
656 pit->pit_state.irq_ack = 1;
657} 628}
658 629
659static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask) 630static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
660{ 631{
661 struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier); 632 struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
662 633
663 if (!mask) { 634 if (!mask)
664 atomic_set(&pit->pit_state.pending, 0); 635 kvm_pit_reset_reinject(pit);
665 pit->pit_state.irq_ack = 1;
666 }
667} 636}
668 637
669static const struct kvm_io_device_ops pit_dev_ops = { 638static const struct kvm_io_device_ops pit_dev_ops = {
@@ -690,14 +659,10 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
690 return NULL; 659 return NULL;
691 660
692 pit->irq_source_id = kvm_request_irq_source_id(kvm); 661 pit->irq_source_id = kvm_request_irq_source_id(kvm);
693 if (pit->irq_source_id < 0) { 662 if (pit->irq_source_id < 0)
694 kfree(pit); 663 goto fail_request;
695 return NULL;
696 }
697 664
698 mutex_init(&pit->pit_state.lock); 665 mutex_init(&pit->pit_state.lock);
699 mutex_lock(&pit->pit_state.lock);
700 spin_lock_init(&pit->pit_state.inject_lock);
701 666
702 pid = get_pid(task_tgid(current)); 667 pid = get_pid(task_tgid(current));
703 pid_nr = pid_vnr(pid); 668 pid_nr = pid_vnr(pid);
@@ -706,36 +671,30 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
706 init_kthread_worker(&pit->worker); 671 init_kthread_worker(&pit->worker);
707 pit->worker_task = kthread_run(kthread_worker_fn, &pit->worker, 672 pit->worker_task = kthread_run(kthread_worker_fn, &pit->worker,
708 "kvm-pit/%d", pid_nr); 673 "kvm-pit/%d", pid_nr);
709 if (IS_ERR(pit->worker_task)) { 674 if (IS_ERR(pit->worker_task))
710 mutex_unlock(&pit->pit_state.lock); 675 goto fail_kthread;
711 kvm_free_irq_source_id(kvm, pit->irq_source_id); 676
712 kfree(pit);
713 return NULL;
714 }
715 init_kthread_work(&pit->expired, pit_do_work); 677 init_kthread_work(&pit->expired, pit_do_work);
716 678
717 kvm->arch.vpit = pit;
718 pit->kvm = kvm; 679 pit->kvm = kvm;
719 680
720 pit_state = &pit->pit_state; 681 pit_state = &pit->pit_state;
721 pit_state->pit = pit;
722 hrtimer_init(&pit_state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 682 hrtimer_init(&pit_state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
683 pit_state->timer.function = pit_timer_fn;
684
723 pit_state->irq_ack_notifier.gsi = 0; 685 pit_state->irq_ack_notifier.gsi = 0;
724 pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq; 686 pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
725 kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); 687 pit->mask_notifier.func = pit_mask_notifer;
726 pit_state->reinject = true;
727 mutex_unlock(&pit->pit_state.lock);
728 688
729 kvm_pit_reset(pit); 689 kvm_pit_reset(pit);
730 690
731 pit->mask_notifier.func = pit_mask_notifer; 691 kvm_pit_set_reinject(pit, true);
732 kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
733 692
734 kvm_iodevice_init(&pit->dev, &pit_dev_ops); 693 kvm_iodevice_init(&pit->dev, &pit_dev_ops);
735 ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS, 694 ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS,
736 KVM_PIT_MEM_LENGTH, &pit->dev); 695 KVM_PIT_MEM_LENGTH, &pit->dev);
737 if (ret < 0) 696 if (ret < 0)
738 goto fail; 697 goto fail_register_pit;
739 698
740 if (flags & KVM_PIT_SPEAKER_DUMMY) { 699 if (flags & KVM_PIT_SPEAKER_DUMMY) {
741 kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops); 700 kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
@@ -743,42 +702,35 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
743 KVM_SPEAKER_BASE_ADDRESS, 4, 702 KVM_SPEAKER_BASE_ADDRESS, 4,
744 &pit->speaker_dev); 703 &pit->speaker_dev);
745 if (ret < 0) 704 if (ret < 0)
746 goto fail_unregister; 705 goto fail_register_speaker;
747 } 706 }
748 707
749 return pit; 708 return pit;
750 709
751fail_unregister: 710fail_register_speaker:
752 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev); 711 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
753 712fail_register_pit:
754fail: 713 kvm_pit_set_reinject(pit, false);
755 kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
756 kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
757 kvm_free_irq_source_id(kvm, pit->irq_source_id);
758 kthread_stop(pit->worker_task); 714 kthread_stop(pit->worker_task);
715fail_kthread:
716 kvm_free_irq_source_id(kvm, pit->irq_source_id);
717fail_request:
759 kfree(pit); 718 kfree(pit);
760 return NULL; 719 return NULL;
761} 720}
762 721
763void kvm_free_pit(struct kvm *kvm) 722void kvm_free_pit(struct kvm *kvm)
764{ 723{
765 struct hrtimer *timer; 724 struct kvm_pit *pit = kvm->arch.vpit;
766 725
767 if (kvm->arch.vpit) { 726 if (pit) {
768 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &kvm->arch.vpit->dev); 727 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
769 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, 728 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->speaker_dev);
770 &kvm->arch.vpit->speaker_dev); 729 kvm_pit_set_reinject(pit, false);
771 kvm_unregister_irq_mask_notifier(kvm, 0, 730 hrtimer_cancel(&pit->pit_state.timer);
772 &kvm->arch.vpit->mask_notifier); 731 flush_kthread_work(&pit->expired);
773 kvm_unregister_irq_ack_notifier(kvm, 732 kthread_stop(pit->worker_task);
774 &kvm->arch.vpit->pit_state.irq_ack_notifier); 733 kvm_free_irq_source_id(kvm, pit->irq_source_id);
775 mutex_lock(&kvm->arch.vpit->pit_state.lock); 734 kfree(pit);
776 timer = &kvm->arch.vpit->pit_state.timer;
777 hrtimer_cancel(timer);
778 flush_kthread_work(&kvm->arch.vpit->expired);
779 kthread_stop(kvm->arch.vpit->worker_task);
780 kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id);
781 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
782 kfree(kvm->arch.vpit);
783 } 735 }
784} 736}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index c84990b42b5b..2f5af0798326 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -22,19 +22,18 @@ struct kvm_kpit_channel_state {
22}; 22};
23 23
24struct kvm_kpit_state { 24struct kvm_kpit_state {
25 /* All members before "struct mutex lock" are protected by the lock. */
25 struct kvm_kpit_channel_state channels[3]; 26 struct kvm_kpit_channel_state channels[3];
26 u32 flags; 27 u32 flags;
27 bool is_periodic; 28 bool is_periodic;
28 s64 period; /* unit: ns */ 29 s64 period; /* unit: ns */
29 struct hrtimer timer; 30 struct hrtimer timer;
30 atomic_t pending; /* accumulated triggered timers */
31 bool reinject;
32 struct kvm *kvm;
33 u32 speaker_data_on; 31 u32 speaker_data_on;
32
34 struct mutex lock; 33 struct mutex lock;
35 struct kvm_pit *pit; 34 atomic_t reinject;
36 spinlock_t inject_lock; 35 atomic_t pending; /* accumulated triggered timers */
37 unsigned long irq_ack; 36 atomic_t irq_ack;
38 struct kvm_irq_ack_notifier irq_ack_notifier; 37 struct kvm_irq_ack_notifier irq_ack_notifier;
39}; 38};
40 39
@@ -57,9 +56,11 @@ struct kvm_pit {
57#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100 56#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100
58#define KVM_PIT_CHANNEL_MASK 0x3 57#define KVM_PIT_CHANNEL_MASK 0x3
59 58
60void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start);
61struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags); 59struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
62void kvm_free_pit(struct kvm *kvm); 60void kvm_free_pit(struct kvm *kvm);
63void kvm_pit_reset(struct kvm_pit *pit); 61
62void kvm_pit_load_count(struct kvm_pit *pit, int channel, u32 val,
63 int hpet_legacy_start);
64void kvm_pit_set_reinject(struct kvm_pit *pit, bool reinject);
64 65
65#endif 66#endif
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 1facfd60b04a..9db47090ead0 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -94,7 +94,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
94static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) 94static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
95{ 95{
96 ioapic->rtc_status.pending_eoi = 0; 96 ioapic->rtc_status.pending_eoi = 0;
97 bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS); 97 bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPUS);
98} 98}
99 99
100static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); 100static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
@@ -117,16 +117,16 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
117 return; 117 return;
118 118
119 new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); 119 new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector);
120 old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); 120 old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map);
121 121
122 if (new_val == old_val) 122 if (new_val == old_val)
123 return; 123 return;
124 124
125 if (new_val) { 125 if (new_val) {
126 __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); 126 __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map);
127 ioapic->rtc_status.pending_eoi++; 127 ioapic->rtc_status.pending_eoi++;
128 } else { 128 } else {
129 __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); 129 __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map);
130 ioapic->rtc_status.pending_eoi--; 130 ioapic->rtc_status.pending_eoi--;
131 rtc_status_pending_eoi_check_valid(ioapic); 131 rtc_status_pending_eoi_check_valid(ioapic);
132 } 132 }
@@ -156,7 +156,8 @@ static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic)
156 156
157static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu) 157static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu)
158{ 158{
159 if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) { 159 if (test_and_clear_bit(vcpu->vcpu_id,
160 ioapic->rtc_status.dest_map.map)) {
160 --ioapic->rtc_status.pending_eoi; 161 --ioapic->rtc_status.pending_eoi;
161 rtc_status_pending_eoi_check_valid(ioapic); 162 rtc_status_pending_eoi_check_valid(ioapic);
162 } 163 }
@@ -236,10 +237,17 @@ static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
236void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) 237void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
237{ 238{
238 struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; 239 struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
240 struct dest_map *dest_map = &ioapic->rtc_status.dest_map;
239 union kvm_ioapic_redirect_entry *e; 241 union kvm_ioapic_redirect_entry *e;
240 int index; 242 int index;
241 243
242 spin_lock(&ioapic->lock); 244 spin_lock(&ioapic->lock);
245
246 /* Make sure we see any missing RTC EOI */
247 if (test_bit(vcpu->vcpu_id, dest_map->map))
248 __set_bit(dest_map->vectors[vcpu->vcpu_id],
249 ioapic_handled_vectors);
250
243 for (index = 0; index < IOAPIC_NUM_PINS; index++) { 251 for (index = 0; index < IOAPIC_NUM_PINS; index++) {
244 e = &ioapic->redirtbl[index]; 252 e = &ioapic->redirtbl[index];
245 if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || 253 if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
@@ -346,7 +354,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
346 */ 354 */
347 BUG_ON(ioapic->rtc_status.pending_eoi != 0); 355 BUG_ON(ioapic->rtc_status.pending_eoi != 0);
348 ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, 356 ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
349 ioapic->rtc_status.dest_map); 357 &ioapic->rtc_status.dest_map);
350 ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret); 358 ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret);
351 } else 359 } else
352 ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); 360 ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
@@ -407,8 +415,14 @@ static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
407static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, 415static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
408 struct kvm_ioapic *ioapic, int vector, int trigger_mode) 416 struct kvm_ioapic *ioapic, int vector, int trigger_mode)
409{ 417{
410 int i; 418 struct dest_map *dest_map = &ioapic->rtc_status.dest_map;
411 struct kvm_lapic *apic = vcpu->arch.apic; 419 struct kvm_lapic *apic = vcpu->arch.apic;
420 int i;
421
422 /* RTC special handling */
423 if (test_bit(vcpu->vcpu_id, dest_map->map) &&
424 vector == dest_map->vectors[vcpu->vcpu_id])
425 rtc_irq_eoi(ioapic, vcpu);
412 426
413 for (i = 0; i < IOAPIC_NUM_PINS; i++) { 427 for (i = 0; i < IOAPIC_NUM_PINS; i++) {
414 union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; 428 union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
@@ -416,8 +430,6 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
416 if (ent->fields.vector != vector) 430 if (ent->fields.vector != vector)
417 continue; 431 continue;
418 432
419 if (i == RTC_GSI)
420 rtc_irq_eoi(ioapic, vcpu);
421 /* 433 /*
422 * We are dropping lock while calling ack notifiers because ack 434 * We are dropping lock while calling ack notifiers because ack
423 * notifier callbacks for assigned devices call into IOAPIC 435 * notifier callbacks for assigned devices call into IOAPIC
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 2d16dc251d81..7d2692a49657 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -40,9 +40,21 @@ struct kvm_vcpu;
40#define RTC_GSI -1U 40#define RTC_GSI -1U
41#endif 41#endif
42 42
43struct dest_map {
44 /* vcpu bitmap where IRQ has been sent */
45 DECLARE_BITMAP(map, KVM_MAX_VCPUS);
46
47 /*
48 * Vector sent to a given vcpu, only valid when
49 * the vcpu's bit in map is set
50 */
51 u8 vectors[KVM_MAX_VCPUS];
52};
53
54
43struct rtc_status { 55struct rtc_status {
44 int pending_eoi; 56 int pending_eoi;
45 DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS); 57 struct dest_map dest_map;
46}; 58};
47 59
48union kvm_ioapic_redirect_entry { 60union kvm_ioapic_redirect_entry {
@@ -118,7 +130,8 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
118 int level, bool line_status); 130 int level, bool line_status);
119void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); 131void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
120int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, 132int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
121 struct kvm_lapic_irq *irq, unsigned long *dest_map); 133 struct kvm_lapic_irq *irq,
134 struct dest_map *dest_map);
122int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); 135int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
123int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); 136int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
124void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, 137void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 3982b479bb5f..95fcc7b13866 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -33,7 +33,10 @@
33 */ 33 */
34int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 34int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
35{ 35{
36 return apic_has_pending_timer(vcpu); 36 if (lapic_in_kernel(vcpu))
37 return apic_has_pending_timer(vcpu);
38
39 return 0;
37} 40}
38EXPORT_SYMBOL(kvm_cpu_has_pending_timer); 41EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
39 42
@@ -137,8 +140,8 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
137 140
138void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) 141void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
139{ 142{
140 kvm_inject_apic_timer_irqs(vcpu); 143 if (lapic_in_kernel(vcpu))
141 /* TODO: PIT, RTC etc. */ 144 kvm_inject_apic_timer_irqs(vcpu);
142} 145}
143EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); 146EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
144 147
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index ae5c78f2337d..61ebdc13a29a 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -109,14 +109,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
109 return ret; 109 return ret;
110} 110}
111 111
112static inline int lapic_in_kernel(struct kvm_vcpu *vcpu)
113{
114 /* Same as irqchip_in_kernel(vcpu->kvm), but with less
115 * pointer chasing and no unnecessary memory barriers.
116 */
117 return vcpu->arch.apic != NULL;
118}
119
120void kvm_pic_reset(struct kvm_kpic_state *s); 112void kvm_pic_reset(struct kvm_kpic_state *s);
121 113
122void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); 114void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 8fc89efb5250..54ead79e444b 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -34,6 +34,7 @@
34#include "lapic.h" 34#include "lapic.h"
35 35
36#include "hyperv.h" 36#include "hyperv.h"
37#include "x86.h"
37 38
38static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, 39static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
39 struct kvm *kvm, int irq_source_id, int level, 40 struct kvm *kvm, int irq_source_id, int level,
@@ -53,10 +54,12 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
53} 54}
54 55
55int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, 56int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
56 struct kvm_lapic_irq *irq, unsigned long *dest_map) 57 struct kvm_lapic_irq *irq, struct dest_map *dest_map)
57{ 58{
58 int i, r = -1; 59 int i, r = -1;
59 struct kvm_vcpu *vcpu, *lowest = NULL; 60 struct kvm_vcpu *vcpu, *lowest = NULL;
61 unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
62 unsigned int dest_vcpus = 0;
60 63
61 if (irq->dest_mode == 0 && irq->dest_id == 0xff && 64 if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
62 kvm_lowest_prio_delivery(irq)) { 65 kvm_lowest_prio_delivery(irq)) {
@@ -67,6 +70,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
67 if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) 70 if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
68 return r; 71 return r;
69 72
73 memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
74
70 kvm_for_each_vcpu(i, vcpu, kvm) { 75 kvm_for_each_vcpu(i, vcpu, kvm) {
71 if (!kvm_apic_present(vcpu)) 76 if (!kvm_apic_present(vcpu))
72 continue; 77 continue;
@@ -80,13 +85,25 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
80 r = 0; 85 r = 0;
81 r += kvm_apic_set_irq(vcpu, irq, dest_map); 86 r += kvm_apic_set_irq(vcpu, irq, dest_map);
82 } else if (kvm_lapic_enabled(vcpu)) { 87 } else if (kvm_lapic_enabled(vcpu)) {
83 if (!lowest) 88 if (!kvm_vector_hashing_enabled()) {
84 lowest = vcpu; 89 if (!lowest)
85 else if (kvm_apic_compare_prio(vcpu, lowest) < 0) 90 lowest = vcpu;
86 lowest = vcpu; 91 else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
92 lowest = vcpu;
93 } else {
94 __set_bit(i, dest_vcpu_bitmap);
95 dest_vcpus++;
96 }
87 } 97 }
88 } 98 }
89 99
100 if (dest_vcpus != 0) {
101 int idx = kvm_vector_to_index(irq->vector, dest_vcpus,
102 dest_vcpu_bitmap, KVM_MAX_VCPUS);
103
104 lowest = kvm_get_vcpu(kvm, idx);
105 }
106
90 if (lowest) 107 if (lowest)
91 r = kvm_apic_set_irq(lowest, irq, dest_map); 108 r = kvm_apic_set_irq(lowest, irq, dest_map);
92 109
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3a045f39ed81..443d2a57ad3d 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -281,7 +281,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
281 struct kvm_cpuid_entry2 *feat; 281 struct kvm_cpuid_entry2 *feat;
282 u32 v = APIC_VERSION; 282 u32 v = APIC_VERSION;
283 283
284 if (!kvm_vcpu_has_lapic(vcpu)) 284 if (!lapic_in_kernel(vcpu))
285 return; 285 return;
286 286
287 feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); 287 feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
@@ -475,26 +475,20 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
475 475
476int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) 476int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
477{ 477{
478 int highest_irr;
479
480 /* This may race with setting of irr in __apic_accept_irq() and 478 /* This may race with setting of irr in __apic_accept_irq() and
481 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq 479 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
482 * will cause vmexit immediately and the value will be recalculated 480 * will cause vmexit immediately and the value will be recalculated
483 * on the next vmentry. 481 * on the next vmentry.
484 */ 482 */
485 if (!kvm_vcpu_has_lapic(vcpu)) 483 return apic_find_highest_irr(vcpu->arch.apic);
486 return 0;
487 highest_irr = apic_find_highest_irr(vcpu->arch.apic);
488
489 return highest_irr;
490} 484}
491 485
492static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 486static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
493 int vector, int level, int trig_mode, 487 int vector, int level, int trig_mode,
494 unsigned long *dest_map); 488 struct dest_map *dest_map);
495 489
496int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, 490int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
497 unsigned long *dest_map) 491 struct dest_map *dest_map)
498{ 492{
499 struct kvm_lapic *apic = vcpu->arch.apic; 493 struct kvm_lapic *apic = vcpu->arch.apic;
500 494
@@ -675,8 +669,33 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
675 } 669 }
676} 670}
677 671
672int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
673 const unsigned long *bitmap, u32 bitmap_size)
674{
675 u32 mod;
676 int i, idx = -1;
677
678 mod = vector % dest_vcpus;
679
680 for (i = 0; i <= mod; i++) {
681 idx = find_next_bit(bitmap, bitmap_size, idx + 1);
682 BUG_ON(idx == bitmap_size);
683 }
684
685 return idx;
686}
687
688static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
689{
690 if (!kvm->arch.disabled_lapic_found) {
691 kvm->arch.disabled_lapic_found = true;
692 printk(KERN_INFO
693 "Disabled LAPIC found during irq injection\n");
694 }
695}
696
678bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 697bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
679 struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map) 698 struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
680{ 699{
681 struct kvm_apic_map *map; 700 struct kvm_apic_map *map;
682 unsigned long bitmap = 1; 701 unsigned long bitmap = 1;
@@ -727,21 +746,42 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
727 746
728 dst = map->logical_map[cid]; 747 dst = map->logical_map[cid];
729 748
730 if (kvm_lowest_prio_delivery(irq)) { 749 if (!kvm_lowest_prio_delivery(irq))
750 goto set_irq;
751
752 if (!kvm_vector_hashing_enabled()) {
731 int l = -1; 753 int l = -1;
732 for_each_set_bit(i, &bitmap, 16) { 754 for_each_set_bit(i, &bitmap, 16) {
733 if (!dst[i]) 755 if (!dst[i])
734 continue; 756 continue;
735 if (l < 0) 757 if (l < 0)
736 l = i; 758 l = i;
737 else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0) 759 else if (kvm_apic_compare_prio(dst[i]->vcpu,
760 dst[l]->vcpu) < 0)
738 l = i; 761 l = i;
739 } 762 }
740
741 bitmap = (l >= 0) ? 1 << l : 0; 763 bitmap = (l >= 0) ? 1 << l : 0;
764 } else {
765 int idx;
766 unsigned int dest_vcpus;
767
768 dest_vcpus = hweight16(bitmap);
769 if (dest_vcpus == 0)
770 goto out;
771
772 idx = kvm_vector_to_index(irq->vector,
773 dest_vcpus, &bitmap, 16);
774
775 if (!dst[idx]) {
776 kvm_apic_disabled_lapic_found(kvm);
777 goto out;
778 }
779
780 bitmap = (idx >= 0) ? 1 << idx : 0;
742 } 781 }
743 } 782 }
744 783
784set_irq:
745 for_each_set_bit(i, &bitmap, 16) { 785 for_each_set_bit(i, &bitmap, 16) {
746 if (!dst[i]) 786 if (!dst[i])
747 continue; 787 continue;
@@ -754,6 +794,20 @@ out:
754 return ret; 794 return ret;
755} 795}
756 796
797/*
798 * This routine tries to handler interrupts in posted mode, here is how
799 * it deals with different cases:
800 * - For single-destination interrupts, handle it in posted mode
801 * - Else if vector hashing is enabled and it is a lowest-priority
802 * interrupt, handle it in posted mode and use the following mechanism
803 * to find the destinaiton vCPU.
804 * 1. For lowest-priority interrupts, store all the possible
805 * destination vCPUs in an array.
806 * 2. Use "guest vector % max number of destination vCPUs" to find
807 * the right destination vCPU in the array for the lowest-priority
808 * interrupt.
809 * - Otherwise, use remapped mode to inject the interrupt.
810 */
757bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, 811bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
758 struct kvm_vcpu **dest_vcpu) 812 struct kvm_vcpu **dest_vcpu)
759{ 813{
@@ -795,16 +849,37 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
795 if (cid >= ARRAY_SIZE(map->logical_map)) 849 if (cid >= ARRAY_SIZE(map->logical_map))
796 goto out; 850 goto out;
797 851
798 for_each_set_bit(i, &bitmap, 16) { 852 if (kvm_vector_hashing_enabled() &&
799 dst = map->logical_map[cid][i]; 853 kvm_lowest_prio_delivery(irq)) {
800 if (++r == 2) 854 int idx;
855 unsigned int dest_vcpus;
856
857 dest_vcpus = hweight16(bitmap);
858 if (dest_vcpus == 0)
801 goto out; 859 goto out;
802 }
803 860
804 if (dst && kvm_apic_present(dst->vcpu)) 861 idx = kvm_vector_to_index(irq->vector, dest_vcpus,
862 &bitmap, 16);
863
864 dst = map->logical_map[cid][idx];
865 if (!dst) {
866 kvm_apic_disabled_lapic_found(kvm);
867 goto out;
868 }
869
805 *dest_vcpu = dst->vcpu; 870 *dest_vcpu = dst->vcpu;
806 else 871 } else {
807 goto out; 872 for_each_set_bit(i, &bitmap, 16) {
873 dst = map->logical_map[cid][i];
874 if (++r == 2)
875 goto out;
876 }
877
878 if (dst && kvm_apic_present(dst->vcpu))
879 *dest_vcpu = dst->vcpu;
880 else
881 goto out;
882 }
808 } 883 }
809 884
810 ret = true; 885 ret = true;
@@ -819,7 +894,7 @@ out:
819 */ 894 */
820static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 895static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
821 int vector, int level, int trig_mode, 896 int vector, int level, int trig_mode,
822 unsigned long *dest_map) 897 struct dest_map *dest_map)
823{ 898{
824 int result = 0; 899 int result = 0;
825 struct kvm_vcpu *vcpu = apic->vcpu; 900 struct kvm_vcpu *vcpu = apic->vcpu;
@@ -839,8 +914,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
839 914
840 result = 1; 915 result = 1;
841 916
842 if (dest_map) 917 if (dest_map) {
843 __set_bit(vcpu->vcpu_id, dest_map); 918 __set_bit(vcpu->vcpu_id, dest_map->map);
919 dest_map->vectors[vcpu->vcpu_id] = vector;
920 }
844 921
845 if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) { 922 if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
846 if (trig_mode) 923 if (trig_mode)
@@ -1239,7 +1316,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
1239 struct kvm_lapic *apic = vcpu->arch.apic; 1316 struct kvm_lapic *apic = vcpu->arch.apic;
1240 u64 guest_tsc, tsc_deadline; 1317 u64 guest_tsc, tsc_deadline;
1241 1318
1242 if (!kvm_vcpu_has_lapic(vcpu)) 1319 if (!lapic_in_kernel(vcpu))
1243 return; 1320 return;
1244 1321
1245 if (apic->lapic_timer.expired_tscdeadline == 0) 1322 if (apic->lapic_timer.expired_tscdeadline == 0)
@@ -1515,8 +1592,7 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1515 1592
1516void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) 1593void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
1517{ 1594{
1518 if (kvm_vcpu_has_lapic(vcpu)) 1595 apic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
1519 apic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
1520} 1596}
1521EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); 1597EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
1522 1598
@@ -1566,7 +1642,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
1566{ 1642{
1567 struct kvm_lapic *apic = vcpu->arch.apic; 1643 struct kvm_lapic *apic = vcpu->arch.apic;
1568 1644
1569 if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || 1645 if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
1570 apic_lvtt_period(apic)) 1646 apic_lvtt_period(apic))
1571 return 0; 1647 return 0;
1572 1648
@@ -1577,7 +1653,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
1577{ 1653{
1578 struct kvm_lapic *apic = vcpu->arch.apic; 1654 struct kvm_lapic *apic = vcpu->arch.apic;
1579 1655
1580 if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || 1656 if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
1581 apic_lvtt_period(apic)) 1657 apic_lvtt_period(apic))
1582 return; 1658 return;
1583 1659
@@ -1590,9 +1666,6 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
1590{ 1666{
1591 struct kvm_lapic *apic = vcpu->arch.apic; 1667 struct kvm_lapic *apic = vcpu->arch.apic;
1592 1668
1593 if (!kvm_vcpu_has_lapic(vcpu))
1594 return;
1595
1596 apic_set_tpr(apic, ((cr8 & 0x0f) << 4) 1669 apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
1597 | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4)); 1670 | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4));
1598} 1671}
@@ -1601,9 +1674,6 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
1601{ 1674{
1602 u64 tpr; 1675 u64 tpr;
1603 1676
1604 if (!kvm_vcpu_has_lapic(vcpu))
1605 return 0;
1606
1607 tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI); 1677 tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
1608 1678
1609 return (tpr & 0xf0) >> 4; 1679 return (tpr & 0xf0) >> 4;
@@ -1728,8 +1798,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
1728{ 1798{
1729 struct kvm_lapic *apic = vcpu->arch.apic; 1799 struct kvm_lapic *apic = vcpu->arch.apic;
1730 1800
1731 if (kvm_vcpu_has_lapic(vcpu) && apic_enabled(apic) && 1801 if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
1732 apic_lvt_enabled(apic, APIC_LVTT))
1733 return atomic_read(&apic->lapic_timer.pending); 1802 return atomic_read(&apic->lapic_timer.pending);
1734 1803
1735 return 0; 1804 return 0;
@@ -1826,7 +1895,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
1826 struct kvm_lapic *apic = vcpu->arch.apic; 1895 struct kvm_lapic *apic = vcpu->arch.apic;
1827 int highest_irr; 1896 int highest_irr;
1828 1897
1829 if (!kvm_vcpu_has_lapic(vcpu) || !apic_enabled(apic)) 1898 if (!apic_enabled(apic))
1830 return -1; 1899 return -1;
1831 1900
1832 apic_update_ppr(apic); 1901 apic_update_ppr(apic);
@@ -1854,9 +1923,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
1854{ 1923{
1855 struct kvm_lapic *apic = vcpu->arch.apic; 1924 struct kvm_lapic *apic = vcpu->arch.apic;
1856 1925
1857 if (!kvm_vcpu_has_lapic(vcpu))
1858 return;
1859
1860 if (atomic_read(&apic->lapic_timer.pending) > 0) { 1926 if (atomic_read(&apic->lapic_timer.pending) > 0) {
1861 kvm_apic_local_deliver(apic, APIC_LVTT); 1927 kvm_apic_local_deliver(apic, APIC_LVTT);
1862 if (apic_lvtt_tscdeadline(apic)) 1928 if (apic_lvtt_tscdeadline(apic))
@@ -1932,7 +1998,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
1932{ 1998{
1933 struct hrtimer *timer; 1999 struct hrtimer *timer;
1934 2000
1935 if (!kvm_vcpu_has_lapic(vcpu)) 2001 if (!lapic_in_kernel(vcpu))
1936 return; 2002 return;
1937 2003
1938 timer = &vcpu->arch.apic->lapic_timer.timer; 2004 timer = &vcpu->arch.apic->lapic_timer.timer;
@@ -2105,7 +2171,7 @@ int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
2105{ 2171{
2106 struct kvm_lapic *apic = vcpu->arch.apic; 2172 struct kvm_lapic *apic = vcpu->arch.apic;
2107 2173
2108 if (!kvm_vcpu_has_lapic(vcpu)) 2174 if (!lapic_in_kernel(vcpu))
2109 return 1; 2175 return 1;
2110 2176
2111 /* if this is ICR write vector before command */ 2177 /* if this is ICR write vector before command */
@@ -2119,7 +2185,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
2119 struct kvm_lapic *apic = vcpu->arch.apic; 2185 struct kvm_lapic *apic = vcpu->arch.apic;
2120 u32 low, high = 0; 2186 u32 low, high = 0;
2121 2187
2122 if (!kvm_vcpu_has_lapic(vcpu)) 2188 if (!lapic_in_kernel(vcpu))
2123 return 1; 2189 return 1;
2124 2190
2125 if (apic_reg_read(apic, reg, 4, &low)) 2191 if (apic_reg_read(apic, reg, 4, &low))
@@ -2151,7 +2217,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
2151 u8 sipi_vector; 2217 u8 sipi_vector;
2152 unsigned long pe; 2218 unsigned long pe;
2153 2219
2154 if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events) 2220 if (!lapic_in_kernel(vcpu) || !apic->pending_events)
2155 return; 2221 return;
2156 2222
2157 /* 2223 /*
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 41bdb35b4b67..f71183e502ee 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -42,6 +42,9 @@ struct kvm_lapic {
42 unsigned long pending_events; 42 unsigned long pending_events;
43 unsigned int sipi_vector; 43 unsigned int sipi_vector;
44}; 44};
45
46struct dest_map;
47
45int kvm_create_lapic(struct kvm_vcpu *vcpu); 48int kvm_create_lapic(struct kvm_vcpu *vcpu);
46void kvm_free_lapic(struct kvm_vcpu *vcpu); 49void kvm_free_lapic(struct kvm_vcpu *vcpu);
47 50
@@ -60,11 +63,11 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu);
60void __kvm_apic_update_irr(u32 *pir, void *regs); 63void __kvm_apic_update_irr(u32 *pir, void *regs);
61void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); 64void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
62int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, 65int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
63 unsigned long *dest_map); 66 struct dest_map *dest_map);
64int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); 67int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
65 68
66bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 69bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
67 struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map); 70 struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map);
68 71
69u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); 72u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
70int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info); 73int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
@@ -103,7 +106,7 @@ static inline u32 kvm_apic_get_reg(struct kvm_lapic *apic, int reg_off)
103 106
104extern struct static_key kvm_no_apic_vcpu; 107extern struct static_key kvm_no_apic_vcpu;
105 108
106static inline bool kvm_vcpu_has_lapic(struct kvm_vcpu *vcpu) 109static inline bool lapic_in_kernel(struct kvm_vcpu *vcpu)
107{ 110{
108 if (static_key_false(&kvm_no_apic_vcpu)) 111 if (static_key_false(&kvm_no_apic_vcpu))
109 return vcpu->arch.apic; 112 return vcpu->arch.apic;
@@ -130,7 +133,7 @@ static inline bool kvm_apic_sw_enabled(struct kvm_lapic *apic)
130 133
131static inline bool kvm_apic_present(struct kvm_vcpu *vcpu) 134static inline bool kvm_apic_present(struct kvm_vcpu *vcpu)
132{ 135{
133 return kvm_vcpu_has_lapic(vcpu) && kvm_apic_hw_enabled(vcpu->arch.apic); 136 return lapic_in_kernel(vcpu) && kvm_apic_hw_enabled(vcpu->arch.apic);
134} 137}
135 138
136static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu) 139static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
@@ -150,7 +153,7 @@ static inline bool kvm_vcpu_apicv_active(struct kvm_vcpu *vcpu)
150 153
151static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) 154static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
152{ 155{
153 return kvm_vcpu_has_lapic(vcpu) && vcpu->arch.apic->pending_events; 156 return lapic_in_kernel(vcpu) && vcpu->arch.apic->pending_events;
154} 157}
155 158
156static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq) 159static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq)
@@ -161,7 +164,7 @@ static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq)
161 164
162static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) 165static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
163{ 166{
164 return kvm_vcpu_has_lapic(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); 167 return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
165} 168}
166 169
167static inline int kvm_apic_id(struct kvm_lapic *apic) 170static inline int kvm_apic_id(struct kvm_lapic *apic)
@@ -175,4 +178,6 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu);
175 178
176bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, 179bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
177 struct kvm_vcpu **dest_vcpu); 180 struct kvm_vcpu **dest_vcpu);
181int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
182 const unsigned long *bitmap, u32 bitmap_size);
178#endif 183#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 1e7a49bfc94f..c512f095cdac 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -41,6 +41,7 @@
41#include <asm/cmpxchg.h> 41#include <asm/cmpxchg.h>
42#include <asm/io.h> 42#include <asm/io.h>
43#include <asm/vmx.h> 43#include <asm/vmx.h>
44#include <asm/kvm_page_track.h>
44 45
45/* 46/*
46 * When setting this variable to true it enables Two-Dimensional-Paging 47 * When setting this variable to true it enables Two-Dimensional-Paging
@@ -776,62 +777,85 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn,
776 return &slot->arch.lpage_info[level - 2][idx]; 777 return &slot->arch.lpage_info[level - 2][idx];
777} 778}
778 779
780static void update_gfn_disallow_lpage_count(struct kvm_memory_slot *slot,
781 gfn_t gfn, int count)
782{
783 struct kvm_lpage_info *linfo;
784 int i;
785
786 for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
787 linfo = lpage_info_slot(gfn, slot, i);
788 linfo->disallow_lpage += count;
789 WARN_ON(linfo->disallow_lpage < 0);
790 }
791}
792
793void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn)
794{
795 update_gfn_disallow_lpage_count(slot, gfn, 1);
796}
797
798void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn)
799{
800 update_gfn_disallow_lpage_count(slot, gfn, -1);
801}
802
779static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) 803static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
780{ 804{
781 struct kvm_memslots *slots; 805 struct kvm_memslots *slots;
782 struct kvm_memory_slot *slot; 806 struct kvm_memory_slot *slot;
783 struct kvm_lpage_info *linfo;
784 gfn_t gfn; 807 gfn_t gfn;
785 int i;
786 808
809 kvm->arch.indirect_shadow_pages++;
787 gfn = sp->gfn; 810 gfn = sp->gfn;
788 slots = kvm_memslots_for_spte_role(kvm, sp->role); 811 slots = kvm_memslots_for_spte_role(kvm, sp->role);
789 slot = __gfn_to_memslot(slots, gfn); 812 slot = __gfn_to_memslot(slots, gfn);
790 for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { 813
791 linfo = lpage_info_slot(gfn, slot, i); 814 /* the non-leaf shadow pages are keeping readonly. */
792 linfo->write_count += 1; 815 if (sp->role.level > PT_PAGE_TABLE_LEVEL)
793 } 816 return kvm_slot_page_track_add_page(kvm, slot, gfn,
794 kvm->arch.indirect_shadow_pages++; 817 KVM_PAGE_TRACK_WRITE);
818
819 kvm_mmu_gfn_disallow_lpage(slot, gfn);
795} 820}
796 821
797static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) 822static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
798{ 823{
799 struct kvm_memslots *slots; 824 struct kvm_memslots *slots;
800 struct kvm_memory_slot *slot; 825 struct kvm_memory_slot *slot;
801 struct kvm_lpage_info *linfo;
802 gfn_t gfn; 826 gfn_t gfn;
803 int i;
804 827
828 kvm->arch.indirect_shadow_pages--;
805 gfn = sp->gfn; 829 gfn = sp->gfn;
806 slots = kvm_memslots_for_spte_role(kvm, sp->role); 830 slots = kvm_memslots_for_spte_role(kvm, sp->role);
807 slot = __gfn_to_memslot(slots, gfn); 831 slot = __gfn_to_memslot(slots, gfn);
808 for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { 832 if (sp->role.level > PT_PAGE_TABLE_LEVEL)
809 linfo = lpage_info_slot(gfn, slot, i); 833 return kvm_slot_page_track_remove_page(kvm, slot, gfn,
810 linfo->write_count -= 1; 834 KVM_PAGE_TRACK_WRITE);
811 WARN_ON(linfo->write_count < 0); 835
812 } 836 kvm_mmu_gfn_allow_lpage(slot, gfn);
813 kvm->arch.indirect_shadow_pages--;
814} 837}
815 838
816static int __has_wrprotected_page(gfn_t gfn, int level, 839static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
817 struct kvm_memory_slot *slot) 840 struct kvm_memory_slot *slot)
818{ 841{
819 struct kvm_lpage_info *linfo; 842 struct kvm_lpage_info *linfo;
820 843
821 if (slot) { 844 if (slot) {
822 linfo = lpage_info_slot(gfn, slot, level); 845 linfo = lpage_info_slot(gfn, slot, level);
823 return linfo->write_count; 846 return !!linfo->disallow_lpage;
824 } 847 }
825 848
826 return 1; 849 return true;
827} 850}
828 851
829static int has_wrprotected_page(struct kvm_vcpu *vcpu, gfn_t gfn, int level) 852static bool mmu_gfn_lpage_is_disallowed(struct kvm_vcpu *vcpu, gfn_t gfn,
853 int level)
830{ 854{
831 struct kvm_memory_slot *slot; 855 struct kvm_memory_slot *slot;
832 856
833 slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); 857 slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
834 return __has_wrprotected_page(gfn, level, slot); 858 return __mmu_gfn_lpage_is_disallowed(gfn, level, slot);
835} 859}
836 860
837static int host_mapping_level(struct kvm *kvm, gfn_t gfn) 861static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
@@ -897,7 +921,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn,
897 max_level = min(kvm_x86_ops->get_lpage_level(), host_level); 921 max_level = min(kvm_x86_ops->get_lpage_level(), host_level);
898 922
899 for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) 923 for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
900 if (__has_wrprotected_page(large_gfn, level, slot)) 924 if (__mmu_gfn_lpage_is_disallowed(large_gfn, level, slot))
901 break; 925 break;
902 926
903 return level - 1; 927 return level - 1;
@@ -1323,23 +1347,29 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
1323 kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); 1347 kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
1324} 1348}
1325 1349
1326static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) 1350bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
1351 struct kvm_memory_slot *slot, u64 gfn)
1327{ 1352{
1328 struct kvm_memory_slot *slot;
1329 struct kvm_rmap_head *rmap_head; 1353 struct kvm_rmap_head *rmap_head;
1330 int i; 1354 int i;
1331 bool write_protected = false; 1355 bool write_protected = false;
1332 1356
1333 slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
1334
1335 for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { 1357 for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
1336 rmap_head = __gfn_to_rmap(gfn, i, slot); 1358 rmap_head = __gfn_to_rmap(gfn, i, slot);
1337 write_protected |= __rmap_write_protect(vcpu->kvm, rmap_head, true); 1359 write_protected |= __rmap_write_protect(kvm, rmap_head, true);
1338 } 1360 }
1339 1361
1340 return write_protected; 1362 return write_protected;
1341} 1363}
1342 1364
1365static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
1366{
1367 struct kvm_memory_slot *slot;
1368
1369 slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
1370 return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn);
1371}
1372
1343static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head) 1373static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
1344{ 1374{
1345 u64 *sptep; 1375 u64 *sptep;
@@ -1754,7 +1784,7 @@ static void mark_unsync(u64 *spte)
1754static int nonpaging_sync_page(struct kvm_vcpu *vcpu, 1784static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
1755 struct kvm_mmu_page *sp) 1785 struct kvm_mmu_page *sp)
1756{ 1786{
1757 return 1; 1787 return 0;
1758} 1788}
1759 1789
1760static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) 1790static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
@@ -1840,13 +1870,16 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
1840 return nr_unsync_leaf; 1870 return nr_unsync_leaf;
1841} 1871}
1842 1872
1873#define INVALID_INDEX (-1)
1874
1843static int mmu_unsync_walk(struct kvm_mmu_page *sp, 1875static int mmu_unsync_walk(struct kvm_mmu_page *sp,
1844 struct kvm_mmu_pages *pvec) 1876 struct kvm_mmu_pages *pvec)
1845{ 1877{
1878 pvec->nr = 0;
1846 if (!sp->unsync_children) 1879 if (!sp->unsync_children)
1847 return 0; 1880 return 0;
1848 1881
1849 mmu_pages_add(pvec, sp, 0); 1882 mmu_pages_add(pvec, sp, INVALID_INDEX);
1850 return __mmu_unsync_walk(sp, pvec); 1883 return __mmu_unsync_walk(sp, pvec);
1851} 1884}
1852 1885
@@ -1883,37 +1916,35 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
1883 if ((_sp)->role.direct || (_sp)->role.invalid) {} else 1916 if ((_sp)->role.direct || (_sp)->role.invalid) {} else
1884 1917
1885/* @sp->gfn should be write-protected at the call site */ 1918/* @sp->gfn should be write-protected at the call site */
1886static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 1919static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
1887 struct list_head *invalid_list, bool clear_unsync) 1920 struct list_head *invalid_list)
1888{ 1921{
1889 if (sp->role.cr4_pae != !!is_pae(vcpu)) { 1922 if (sp->role.cr4_pae != !!is_pae(vcpu)) {
1890 kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); 1923 kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
1891 return 1; 1924 return false;
1892 } 1925 }
1893 1926
1894 if (clear_unsync) 1927 if (vcpu->arch.mmu.sync_page(vcpu, sp) == 0) {
1895 kvm_unlink_unsync_page(vcpu->kvm, sp);
1896
1897 if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
1898 kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); 1928 kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
1899 return 1; 1929 return false;
1900 } 1930 }
1901 1931
1902 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 1932 return true;
1903 return 0;
1904} 1933}
1905 1934
1906static int kvm_sync_page_transient(struct kvm_vcpu *vcpu, 1935static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu,
1907 struct kvm_mmu_page *sp) 1936 struct list_head *invalid_list,
1937 bool remote_flush, bool local_flush)
1908{ 1938{
1909 LIST_HEAD(invalid_list); 1939 if (!list_empty(invalid_list)) {
1910 int ret; 1940 kvm_mmu_commit_zap_page(vcpu->kvm, invalid_list);
1911 1941 return;
1912 ret = __kvm_sync_page(vcpu, sp, &invalid_list, false); 1942 }
1913 if (ret)
1914 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
1915 1943
1916 return ret; 1944 if (remote_flush)
1945 kvm_flush_remote_tlbs(vcpu->kvm);
1946 else if (local_flush)
1947 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
1917} 1948}
1918 1949
1919#ifdef CONFIG_KVM_MMU_AUDIT 1950#ifdef CONFIG_KVM_MMU_AUDIT
@@ -1923,46 +1954,38 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
1923static void mmu_audit_disable(void) { } 1954static void mmu_audit_disable(void) { }
1924#endif 1955#endif
1925 1956
1926static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 1957static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
1927 struct list_head *invalid_list) 1958 struct list_head *invalid_list)
1928{ 1959{
1929 return __kvm_sync_page(vcpu, sp, invalid_list, true); 1960 kvm_unlink_unsync_page(vcpu->kvm, sp);
1961 return __kvm_sync_page(vcpu, sp, invalid_list);
1930} 1962}
1931 1963
1932/* @gfn should be write-protected at the call site */ 1964/* @gfn should be write-protected at the call site */
1933static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) 1965static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn,
1966 struct list_head *invalid_list)
1934{ 1967{
1935 struct kvm_mmu_page *s; 1968 struct kvm_mmu_page *s;
1936 LIST_HEAD(invalid_list); 1969 bool ret = false;
1937 bool flush = false;
1938 1970
1939 for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { 1971 for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
1940 if (!s->unsync) 1972 if (!s->unsync)
1941 continue; 1973 continue;
1942 1974
1943 WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); 1975 WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
1944 kvm_unlink_unsync_page(vcpu->kvm, s); 1976 ret |= kvm_sync_page(vcpu, s, invalid_list);
1945 if ((s->role.cr4_pae != !!is_pae(vcpu)) ||
1946 (vcpu->arch.mmu.sync_page(vcpu, s))) {
1947 kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list);
1948 continue;
1949 }
1950 flush = true;
1951 } 1977 }
1952 1978
1953 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); 1979 return ret;
1954 if (flush)
1955 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
1956} 1980}
1957 1981
1958struct mmu_page_path { 1982struct mmu_page_path {
1959 struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; 1983 struct kvm_mmu_page *parent[PT64_ROOT_LEVEL];
1960 unsigned int idx[PT64_ROOT_LEVEL-1]; 1984 unsigned int idx[PT64_ROOT_LEVEL];
1961}; 1985};
1962 1986
1963#define for_each_sp(pvec, sp, parents, i) \ 1987#define for_each_sp(pvec, sp, parents, i) \
1964 for (i = mmu_pages_next(&pvec, &parents, -1), \ 1988 for (i = mmu_pages_first(&pvec, &parents); \
1965 sp = pvec.page[i].sp; \
1966 i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ 1989 i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \
1967 i = mmu_pages_next(&pvec, &parents, i)) 1990 i = mmu_pages_next(&pvec, &parents, i))
1968 1991
@@ -1974,19 +1997,43 @@ static int mmu_pages_next(struct kvm_mmu_pages *pvec,
1974 1997
1975 for (n = i+1; n < pvec->nr; n++) { 1998 for (n = i+1; n < pvec->nr; n++) {
1976 struct kvm_mmu_page *sp = pvec->page[n].sp; 1999 struct kvm_mmu_page *sp = pvec->page[n].sp;
2000 unsigned idx = pvec->page[n].idx;
2001 int level = sp->role.level;
1977 2002
1978 if (sp->role.level == PT_PAGE_TABLE_LEVEL) { 2003 parents->idx[level-1] = idx;
1979 parents->idx[0] = pvec->page[n].idx; 2004 if (level == PT_PAGE_TABLE_LEVEL)
1980 return n; 2005 break;
1981 }
1982 2006
1983 parents->parent[sp->role.level-2] = sp; 2007 parents->parent[level-2] = sp;
1984 parents->idx[sp->role.level-1] = pvec->page[n].idx;
1985 } 2008 }
1986 2009
1987 return n; 2010 return n;
1988} 2011}
1989 2012
2013static int mmu_pages_first(struct kvm_mmu_pages *pvec,
2014 struct mmu_page_path *parents)
2015{
2016 struct kvm_mmu_page *sp;
2017 int level;
2018
2019 if (pvec->nr == 0)
2020 return 0;
2021
2022 WARN_ON(pvec->page[0].idx != INVALID_INDEX);
2023
2024 sp = pvec->page[0].sp;
2025 level = sp->role.level;
2026 WARN_ON(level == PT_PAGE_TABLE_LEVEL);
2027
2028 parents->parent[level-2] = sp;
2029
2030 /* Also set up a sentinel. Further entries in pvec are all
2031 * children of sp, so this element is never overwritten.
2032 */
2033 parents->parent[level-1] = NULL;
2034 return mmu_pages_next(pvec, parents, 0);
2035}
2036
1990static void mmu_pages_clear_parents(struct mmu_page_path *parents) 2037static void mmu_pages_clear_parents(struct mmu_page_path *parents)
1991{ 2038{
1992 struct kvm_mmu_page *sp; 2039 struct kvm_mmu_page *sp;
@@ -1994,22 +2041,14 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
1994 2041
1995 do { 2042 do {
1996 unsigned int idx = parents->idx[level]; 2043 unsigned int idx = parents->idx[level];
1997
1998 sp = parents->parent[level]; 2044 sp = parents->parent[level];
1999 if (!sp) 2045 if (!sp)
2000 return; 2046 return;
2001 2047
2048 WARN_ON(idx == INVALID_INDEX);
2002 clear_unsync_child_bit(sp, idx); 2049 clear_unsync_child_bit(sp, idx);
2003 level++; 2050 level++;
2004 } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children); 2051 } while (!sp->unsync_children);
2005}
2006
2007static void kvm_mmu_pages_init(struct kvm_mmu_page *parent,
2008 struct mmu_page_path *parents,
2009 struct kvm_mmu_pages *pvec)
2010{
2011 parents->parent[parent->role.level-1] = NULL;
2012 pvec->nr = 0;
2013} 2052}
2014 2053
2015static void mmu_sync_children(struct kvm_vcpu *vcpu, 2054static void mmu_sync_children(struct kvm_vcpu *vcpu,
@@ -2020,30 +2059,36 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
2020 struct mmu_page_path parents; 2059 struct mmu_page_path parents;
2021 struct kvm_mmu_pages pages; 2060 struct kvm_mmu_pages pages;
2022 LIST_HEAD(invalid_list); 2061 LIST_HEAD(invalid_list);
2062 bool flush = false;
2023 2063
2024 kvm_mmu_pages_init(parent, &parents, &pages);
2025 while (mmu_unsync_walk(parent, &pages)) { 2064 while (mmu_unsync_walk(parent, &pages)) {
2026 bool protected = false; 2065 bool protected = false;
2027 2066
2028 for_each_sp(pages, sp, parents, i) 2067 for_each_sp(pages, sp, parents, i)
2029 protected |= rmap_write_protect(vcpu, sp->gfn); 2068 protected |= rmap_write_protect(vcpu, sp->gfn);
2030 2069
2031 if (protected) 2070 if (protected) {
2032 kvm_flush_remote_tlbs(vcpu->kvm); 2071 kvm_flush_remote_tlbs(vcpu->kvm);
2072 flush = false;
2073 }
2033 2074
2034 for_each_sp(pages, sp, parents, i) { 2075 for_each_sp(pages, sp, parents, i) {
2035 kvm_sync_page(vcpu, sp, &invalid_list); 2076 flush |= kvm_sync_page(vcpu, sp, &invalid_list);
2036 mmu_pages_clear_parents(&parents); 2077 mmu_pages_clear_parents(&parents);
2037 } 2078 }
2038 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); 2079 if (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)) {
2039 cond_resched_lock(&vcpu->kvm->mmu_lock); 2080 kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
2040 kvm_mmu_pages_init(parent, &parents, &pages); 2081 cond_resched_lock(&vcpu->kvm->mmu_lock);
2082 flush = false;
2083 }
2041 } 2084 }
2085
2086 kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
2042} 2087}
2043 2088
2044static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp) 2089static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
2045{ 2090{
2046 sp->write_flooding_count = 0; 2091 atomic_set(&sp->write_flooding_count, 0);
2047} 2092}
2048 2093
2049static void clear_sp_write_flooding_count(u64 *spte) 2094static void clear_sp_write_flooding_count(u64 *spte)
@@ -2069,6 +2114,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
2069 unsigned quadrant; 2114 unsigned quadrant;
2070 struct kvm_mmu_page *sp; 2115 struct kvm_mmu_page *sp;
2071 bool need_sync = false; 2116 bool need_sync = false;
2117 bool flush = false;
2118 LIST_HEAD(invalid_list);
2072 2119
2073 role = vcpu->arch.mmu.base_role; 2120 role = vcpu->arch.mmu.base_role;
2074 role.level = level; 2121 role.level = level;
@@ -2092,8 +2139,16 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
2092 if (sp->role.word != role.word) 2139 if (sp->role.word != role.word)
2093 continue; 2140 continue;
2094 2141
2095 if (sp->unsync && kvm_sync_page_transient(vcpu, sp)) 2142 if (sp->unsync) {
2096 break; 2143 /* The page is good, but __kvm_sync_page might still end
2144 * up zapping it. If so, break in order to rebuild it.
2145 */
2146 if (!__kvm_sync_page(vcpu, sp, &invalid_list))
2147 break;
2148
2149 WARN_ON(!list_empty(&invalid_list));
2150 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2151 }
2097 2152
2098 if (sp->unsync_children) 2153 if (sp->unsync_children)
2099 kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); 2154 kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
@@ -2112,16 +2167,24 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
2112 hlist_add_head(&sp->hash_link, 2167 hlist_add_head(&sp->hash_link,
2113 &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]); 2168 &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);
2114 if (!direct) { 2169 if (!direct) {
2115 if (rmap_write_protect(vcpu, gfn)) 2170 /*
2171 * we should do write protection before syncing pages
2172 * otherwise the content of the synced shadow page may
2173 * be inconsistent with guest page table.
2174 */
2175 account_shadowed(vcpu->kvm, sp);
2176 if (level == PT_PAGE_TABLE_LEVEL &&
2177 rmap_write_protect(vcpu, gfn))
2116 kvm_flush_remote_tlbs(vcpu->kvm); 2178 kvm_flush_remote_tlbs(vcpu->kvm);
2117 if (level > PT_PAGE_TABLE_LEVEL && need_sync)
2118 kvm_sync_pages(vcpu, gfn);
2119 2179
2120 account_shadowed(vcpu->kvm, sp); 2180 if (level > PT_PAGE_TABLE_LEVEL && need_sync)
2181 flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
2121 } 2182 }
2122 sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; 2183 sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
2123 clear_page(sp->spt); 2184 clear_page(sp->spt);
2124 trace_kvm_mmu_get_page(sp, true); 2185 trace_kvm_mmu_get_page(sp, true);
2186
2187 kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
2125 return sp; 2188 return sp;
2126} 2189}
2127 2190
@@ -2269,7 +2332,6 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
2269 if (parent->role.level == PT_PAGE_TABLE_LEVEL) 2332 if (parent->role.level == PT_PAGE_TABLE_LEVEL)
2270 return 0; 2333 return 0;
2271 2334
2272 kvm_mmu_pages_init(parent, &parents, &pages);
2273 while (mmu_unsync_walk(parent, &pages)) { 2335 while (mmu_unsync_walk(parent, &pages)) {
2274 struct kvm_mmu_page *sp; 2336 struct kvm_mmu_page *sp;
2275 2337
@@ -2278,7 +2340,6 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
2278 mmu_pages_clear_parents(&parents); 2340 mmu_pages_clear_parents(&parents);
2279 zapped++; 2341 zapped++;
2280 } 2342 }
2281 kvm_mmu_pages_init(parent, &parents, &pages);
2282 } 2343 }
2283 2344
2284 return zapped; 2345 return zapped;
@@ -2354,8 +2415,8 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
2354 if (list_empty(&kvm->arch.active_mmu_pages)) 2415 if (list_empty(&kvm->arch.active_mmu_pages))
2355 return false; 2416 return false;
2356 2417
2357 sp = list_entry(kvm->arch.active_mmu_pages.prev, 2418 sp = list_last_entry(&kvm->arch.active_mmu_pages,
2358 struct kvm_mmu_page, link); 2419 struct kvm_mmu_page, link);
2359 kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); 2420 kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
2360 2421
2361 return true; 2422 return true;
@@ -2408,7 +2469,7 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
2408} 2469}
2409EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page); 2470EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
2410 2471
2411static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 2472static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
2412{ 2473{
2413 trace_kvm_mmu_unsync_page(sp); 2474 trace_kvm_mmu_unsync_page(sp);
2414 ++vcpu->kvm->stat.mmu_unsync; 2475 ++vcpu->kvm->stat.mmu_unsync;
@@ -2417,37 +2478,26 @@ static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
2417 kvm_mmu_mark_parents_unsync(sp); 2478 kvm_mmu_mark_parents_unsync(sp);
2418} 2479}
2419 2480
2420static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) 2481static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
2482 bool can_unsync)
2421{ 2483{
2422 struct kvm_mmu_page *s; 2484 struct kvm_mmu_page *sp;
2423
2424 for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
2425 if (s->unsync)
2426 continue;
2427 WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
2428 __kvm_unsync_page(vcpu, s);
2429 }
2430}
2431 2485
2432static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, 2486 if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE))
2433 bool can_unsync) 2487 return true;
2434{
2435 struct kvm_mmu_page *s;
2436 bool need_unsync = false;
2437 2488
2438 for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { 2489 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
2439 if (!can_unsync) 2490 if (!can_unsync)
2440 return 1; 2491 return true;
2441 2492
2442 if (s->role.level != PT_PAGE_TABLE_LEVEL) 2493 if (sp->unsync)
2443 return 1; 2494 continue;
2444 2495
2445 if (!s->unsync) 2496 WARN_ON(sp->role.level != PT_PAGE_TABLE_LEVEL);
2446 need_unsync = true; 2497 kvm_unsync_page(vcpu, sp);
2447 } 2498 }
2448 if (need_unsync) 2499
2449 kvm_unsync_pages(vcpu, gfn); 2500 return false;
2450 return 0;
2451} 2501}
2452 2502
2453static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) 2503static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
@@ -2503,7 +2553,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2503 * be fixed if guest refault. 2553 * be fixed if guest refault.
2504 */ 2554 */
2505 if (level > PT_PAGE_TABLE_LEVEL && 2555 if (level > PT_PAGE_TABLE_LEVEL &&
2506 has_wrprotected_page(vcpu, gfn, level)) 2556 mmu_gfn_lpage_is_disallowed(vcpu, gfn, level))
2507 goto done; 2557 goto done;
2508 2558
2509 spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; 2559 spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
@@ -2768,7 +2818,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
2768 if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) && 2818 if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
2769 level == PT_PAGE_TABLE_LEVEL && 2819 level == PT_PAGE_TABLE_LEVEL &&
2770 PageTransCompound(pfn_to_page(pfn)) && 2820 PageTransCompound(pfn_to_page(pfn)) &&
2771 !has_wrprotected_page(vcpu, gfn, PT_DIRECTORY_LEVEL)) { 2821 !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) {
2772 unsigned long mask; 2822 unsigned long mask;
2773 /* 2823 /*
2774 * mmu_notifier_retry was successful and we hold the 2824 * mmu_notifier_retry was successful and we hold the
@@ -2796,20 +2846,16 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
2796static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, 2846static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
2797 kvm_pfn_t pfn, unsigned access, int *ret_val) 2847 kvm_pfn_t pfn, unsigned access, int *ret_val)
2798{ 2848{
2799 bool ret = true;
2800
2801 /* The pfn is invalid, report the error! */ 2849 /* The pfn is invalid, report the error! */
2802 if (unlikely(is_error_pfn(pfn))) { 2850 if (unlikely(is_error_pfn(pfn))) {
2803 *ret_val = kvm_handle_bad_page(vcpu, gfn, pfn); 2851 *ret_val = kvm_handle_bad_page(vcpu, gfn, pfn);
2804 goto exit; 2852 return true;
2805 } 2853 }
2806 2854
2807 if (unlikely(is_noslot_pfn(pfn))) 2855 if (unlikely(is_noslot_pfn(pfn)))
2808 vcpu_cache_mmio_info(vcpu, gva, gfn, access); 2856 vcpu_cache_mmio_info(vcpu, gva, gfn, access);
2809 2857
2810 ret = false; 2858 return false;
2811exit:
2812 return ret;
2813} 2859}
2814 2860
2815static bool page_fault_can_be_fast(u32 error_code) 2861static bool page_fault_can_be_fast(u32 error_code)
@@ -3273,7 +3319,7 @@ static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
3273 return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level); 3319 return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level);
3274} 3320}
3275 3321
3276static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) 3322static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
3277{ 3323{
3278 if (direct) 3324 if (direct)
3279 return vcpu_match_mmio_gpa(vcpu, addr); 3325 return vcpu_match_mmio_gpa(vcpu, addr);
@@ -3332,7 +3378,7 @@ int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
3332 u64 spte; 3378 u64 spte;
3333 bool reserved; 3379 bool reserved;
3334 3380
3335 if (quickly_check_mmio_pf(vcpu, addr, direct)) 3381 if (mmio_info_in_cache(vcpu, addr, direct))
3336 return RET_MMIO_PF_EMULATE; 3382 return RET_MMIO_PF_EMULATE;
3337 3383
3338 reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte); 3384 reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte);
@@ -3362,20 +3408,53 @@ int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
3362} 3408}
3363EXPORT_SYMBOL_GPL(handle_mmio_page_fault); 3409EXPORT_SYMBOL_GPL(handle_mmio_page_fault);
3364 3410
3411static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
3412 u32 error_code, gfn_t gfn)
3413{
3414 if (unlikely(error_code & PFERR_RSVD_MASK))
3415 return false;
3416
3417 if (!(error_code & PFERR_PRESENT_MASK) ||
3418 !(error_code & PFERR_WRITE_MASK))
3419 return false;
3420
3421 /*
3422 * guest is writing the page which is write tracked which can
3423 * not be fixed by page fault handler.
3424 */
3425 if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE))
3426 return true;
3427
3428 return false;
3429}
3430
3431static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
3432{
3433 struct kvm_shadow_walk_iterator iterator;
3434 u64 spte;
3435
3436 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
3437 return;
3438
3439 walk_shadow_page_lockless_begin(vcpu);
3440 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
3441 clear_sp_write_flooding_count(iterator.sptep);
3442 if (!is_shadow_present_pte(spte))
3443 break;
3444 }
3445 walk_shadow_page_lockless_end(vcpu);
3446}
3447
3365static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, 3448static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
3366 u32 error_code, bool prefault) 3449 u32 error_code, bool prefault)
3367{ 3450{
3368 gfn_t gfn; 3451 gfn_t gfn = gva >> PAGE_SHIFT;
3369 int r; 3452 int r;
3370 3453
3371 pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); 3454 pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
3372 3455
3373 if (unlikely(error_code & PFERR_RSVD_MASK)) { 3456 if (page_fault_handle_page_track(vcpu, error_code, gfn))
3374 r = handle_mmio_page_fault(vcpu, gva, true); 3457 return 1;
3375
3376 if (likely(r != RET_MMIO_PF_INVALID))
3377 return r;
3378 }
3379 3458
3380 r = mmu_topup_memory_caches(vcpu); 3459 r = mmu_topup_memory_caches(vcpu);
3381 if (r) 3460 if (r)
@@ -3383,7 +3462,6 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
3383 3462
3384 MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); 3463 MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
3385 3464
3386 gfn = gva >> PAGE_SHIFT;
3387 3465
3388 return nonpaging_map(vcpu, gva & PAGE_MASK, 3466 return nonpaging_map(vcpu, gva & PAGE_MASK,
3389 error_code, gfn, prefault); 3467 error_code, gfn, prefault);
@@ -3460,12 +3538,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
3460 3538
3461 MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); 3539 MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
3462 3540
3463 if (unlikely(error_code & PFERR_RSVD_MASK)) { 3541 if (page_fault_handle_page_track(vcpu, error_code, gfn))
3464 r = handle_mmio_page_fault(vcpu, gpa, true); 3542 return 1;
3465
3466 if (likely(r != RET_MMIO_PF_INVALID))
3467 return r;
3468 }
3469 3543
3470 r = mmu_topup_memory_caches(vcpu); 3544 r = mmu_topup_memory_caches(vcpu);
3471 if (r) 3545 if (r)
@@ -3558,13 +3632,24 @@ static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
3558 return false; 3632 return false;
3559} 3633}
3560 3634
3561static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte) 3635static inline bool is_last_gpte(struct kvm_mmu *mmu,
3636 unsigned level, unsigned gpte)
3562{ 3637{
3563 unsigned index; 3638 /*
3639 * PT_PAGE_TABLE_LEVEL always terminates. The RHS has bit 7 set
3640 * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means
3641 * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then.
3642 */
3643 gpte |= level - PT_PAGE_TABLE_LEVEL - 1;
3564 3644
3565 index = level - 1; 3645 /*
3566 index |= (gpte & PT_PAGE_SIZE_MASK) >> (PT_PAGE_SIZE_SHIFT - 2); 3646 * The RHS has bit 7 set iff level < mmu->last_nonleaf_level.
3567 return mmu->last_pte_bitmap & (1 << index); 3647 * If it is clear, there are no large pages at this level, so clear
3648 * PT_PAGE_SIZE_MASK in gpte if that is the case.
3649 */
3650 gpte &= level - mmu->last_nonleaf_level;
3651
3652 return gpte & PT_PAGE_SIZE_MASK;
3568} 3653}
3569 3654
3570#define PTTYPE_EPT 18 /* arbitrary */ 3655#define PTTYPE_EPT 18 /* arbitrary */
@@ -3838,22 +3923,13 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
3838 } 3923 }
3839} 3924}
3840 3925
3841static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) 3926static void update_last_nonleaf_level(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
3842{ 3927{
3843 u8 map; 3928 unsigned root_level = mmu->root_level;
3844 unsigned level, root_level = mmu->root_level; 3929
3845 const unsigned ps_set_index = 1 << 2; /* bit 2 of index: ps */ 3930 mmu->last_nonleaf_level = root_level;
3846 3931 if (root_level == PT32_ROOT_LEVEL && is_pse(vcpu))
3847 if (root_level == PT32E_ROOT_LEVEL) 3932 mmu->last_nonleaf_level++;
3848 --root_level;
3849 /* PT_PAGE_TABLE_LEVEL always terminates */
3850 map = 1 | (1 << ps_set_index);
3851 for (level = PT_DIRECTORY_LEVEL; level <= root_level; ++level) {
3852 if (level <= PT_PDPE_LEVEL
3853 && (mmu->root_level >= PT32E_ROOT_LEVEL || is_pse(vcpu)))
3854 map |= 1 << (ps_set_index | (level - 1));
3855 }
3856 mmu->last_pte_bitmap = map;
3857} 3933}
3858 3934
3859static void paging64_init_context_common(struct kvm_vcpu *vcpu, 3935static void paging64_init_context_common(struct kvm_vcpu *vcpu,
@@ -3865,7 +3941,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
3865 3941
3866 reset_rsvds_bits_mask(vcpu, context); 3942 reset_rsvds_bits_mask(vcpu, context);
3867 update_permission_bitmask(vcpu, context, false); 3943 update_permission_bitmask(vcpu, context, false);
3868 update_last_pte_bitmap(vcpu, context); 3944 update_last_nonleaf_level(vcpu, context);
3869 3945
3870 MMU_WARN_ON(!is_pae(vcpu)); 3946 MMU_WARN_ON(!is_pae(vcpu));
3871 context->page_fault = paging64_page_fault; 3947 context->page_fault = paging64_page_fault;
@@ -3892,7 +3968,7 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,
3892 3968
3893 reset_rsvds_bits_mask(vcpu, context); 3969 reset_rsvds_bits_mask(vcpu, context);
3894 update_permission_bitmask(vcpu, context, false); 3970 update_permission_bitmask(vcpu, context, false);
3895 update_last_pte_bitmap(vcpu, context); 3971 update_last_nonleaf_level(vcpu, context);
3896 3972
3897 context->page_fault = paging32_page_fault; 3973 context->page_fault = paging32_page_fault;
3898 context->gva_to_gpa = paging32_gva_to_gpa; 3974 context->gva_to_gpa = paging32_gva_to_gpa;
@@ -3950,7 +4026,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
3950 } 4026 }
3951 4027
3952 update_permission_bitmask(vcpu, context, false); 4028 update_permission_bitmask(vcpu, context, false);
3953 update_last_pte_bitmap(vcpu, context); 4029 update_last_nonleaf_level(vcpu, context);
3954 reset_tdp_shadow_zero_bits_mask(vcpu, context); 4030 reset_tdp_shadow_zero_bits_mask(vcpu, context);
3955} 4031}
3956 4032
@@ -4056,7 +4132,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
4056 } 4132 }
4057 4133
4058 update_permission_bitmask(vcpu, g_context, false); 4134 update_permission_bitmask(vcpu, g_context, false);
4059 update_last_pte_bitmap(vcpu, g_context); 4135 update_last_nonleaf_level(vcpu, g_context);
4060} 4136}
4061 4137
4062static void init_kvm_mmu(struct kvm_vcpu *vcpu) 4138static void init_kvm_mmu(struct kvm_vcpu *vcpu)
@@ -4127,18 +4203,6 @@ static bool need_remote_flush(u64 old, u64 new)
4127 return (old & ~new & PT64_PERM_MASK) != 0; 4203 return (old & ~new & PT64_PERM_MASK) != 0;
4128} 4204}
4129 4205
4130static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page,
4131 bool remote_flush, bool local_flush)
4132{
4133 if (zap_page)
4134 return;
4135
4136 if (remote_flush)
4137 kvm_flush_remote_tlbs(vcpu->kvm);
4138 else if (local_flush)
4139 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4140}
4141
4142static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, 4206static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
4143 const u8 *new, int *bytes) 4207 const u8 *new, int *bytes)
4144{ 4208{
@@ -4188,7 +4252,8 @@ static bool detect_write_flooding(struct kvm_mmu_page *sp)
4188 if (sp->role.level == PT_PAGE_TABLE_LEVEL) 4252 if (sp->role.level == PT_PAGE_TABLE_LEVEL)
4189 return false; 4253 return false;
4190 4254
4191 return ++sp->write_flooding_count >= 3; 4255 atomic_inc(&sp->write_flooding_count);
4256 return atomic_read(&sp->write_flooding_count) >= 3;
4192} 4257}
4193 4258
4194/* 4259/*
@@ -4250,15 +4315,15 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
4250 return spte; 4315 return spte;
4251} 4316}
4252 4317
4253void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 4318static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
4254 const u8 *new, int bytes) 4319 const u8 *new, int bytes)
4255{ 4320{
4256 gfn_t gfn = gpa >> PAGE_SHIFT; 4321 gfn_t gfn = gpa >> PAGE_SHIFT;
4257 struct kvm_mmu_page *sp; 4322 struct kvm_mmu_page *sp;
4258 LIST_HEAD(invalid_list); 4323 LIST_HEAD(invalid_list);
4259 u64 entry, gentry, *spte; 4324 u64 entry, gentry, *spte;
4260 int npte; 4325 int npte;
4261 bool remote_flush, local_flush, zap_page; 4326 bool remote_flush, local_flush;
4262 union kvm_mmu_page_role mask = { }; 4327 union kvm_mmu_page_role mask = { };
4263 4328
4264 mask.cr0_wp = 1; 4329 mask.cr0_wp = 1;
@@ -4275,7 +4340,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
4275 if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) 4340 if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
4276 return; 4341 return;
4277 4342
4278 zap_page = remote_flush = local_flush = false; 4343 remote_flush = local_flush = false;
4279 4344
4280 pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); 4345 pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
4281 4346
@@ -4295,8 +4360,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
4295 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { 4360 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
4296 if (detect_write_misaligned(sp, gpa, bytes) || 4361 if (detect_write_misaligned(sp, gpa, bytes) ||
4297 detect_write_flooding(sp)) { 4362 detect_write_flooding(sp)) {
4298 zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, 4363 kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
4299 &invalid_list);
4300 ++vcpu->kvm->stat.mmu_flooded; 4364 ++vcpu->kvm->stat.mmu_flooded;
4301 continue; 4365 continue;
4302 } 4366 }
@@ -4318,8 +4382,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
4318 ++spte; 4382 ++spte;
4319 } 4383 }
4320 } 4384 }
4321 mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush); 4385 kvm_mmu_flush_or_zap(vcpu, &invalid_list, remote_flush, local_flush);
4322 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
4323 kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE); 4386 kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE);
4324 spin_unlock(&vcpu->kvm->mmu_lock); 4387 spin_unlock(&vcpu->kvm->mmu_lock);
4325} 4388}
@@ -4356,32 +4419,34 @@ static void make_mmu_pages_available(struct kvm_vcpu *vcpu)
4356 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); 4419 kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
4357} 4420}
4358 4421
4359static bool is_mmio_page_fault(struct kvm_vcpu *vcpu, gva_t addr)
4360{
4361 if (vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu))
4362 return vcpu_match_mmio_gpa(vcpu, addr);
4363
4364 return vcpu_match_mmio_gva(vcpu, addr);
4365}
4366
4367int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, 4422int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
4368 void *insn, int insn_len) 4423 void *insn, int insn_len)
4369{ 4424{
4370 int r, emulation_type = EMULTYPE_RETRY; 4425 int r, emulation_type = EMULTYPE_RETRY;
4371 enum emulation_result er; 4426 enum emulation_result er;
4427 bool direct = vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu);
4428
4429 if (unlikely(error_code & PFERR_RSVD_MASK)) {
4430 r = handle_mmio_page_fault(vcpu, cr2, direct);
4431 if (r == RET_MMIO_PF_EMULATE) {
4432 emulation_type = 0;
4433 goto emulate;
4434 }
4435 if (r == RET_MMIO_PF_RETRY)
4436 return 1;
4437 if (r < 0)
4438 return r;
4439 }
4372 4440
4373 r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false); 4441 r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false);
4374 if (r < 0) 4442 if (r < 0)
4375 goto out; 4443 return r;
4376 4444 if (!r)
4377 if (!r) { 4445 return 1;
4378 r = 1;
4379 goto out;
4380 }
4381 4446
4382 if (is_mmio_page_fault(vcpu, cr2)) 4447 if (mmio_info_in_cache(vcpu, cr2, direct))
4383 emulation_type = 0; 4448 emulation_type = 0;
4384 4449emulate:
4385 er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); 4450 er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
4386 4451
4387 switch (er) { 4452 switch (er) {
@@ -4395,8 +4460,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
4395 default: 4460 default:
4396 BUG(); 4461 BUG();
4397 } 4462 }
4398out:
4399 return r;
4400} 4463}
4401EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); 4464EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
4402 4465
@@ -4465,6 +4528,21 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu)
4465 init_kvm_mmu(vcpu); 4528 init_kvm_mmu(vcpu);
4466} 4529}
4467 4530
4531void kvm_mmu_init_vm(struct kvm *kvm)
4532{
4533 struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
4534
4535 node->track_write = kvm_mmu_pte_write;
4536 kvm_page_track_register_notifier(kvm, node);
4537}
4538
4539void kvm_mmu_uninit_vm(struct kvm *kvm)
4540{
4541 struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
4542
4543 kvm_page_track_unregister_notifier(kvm, node);
4544}
4545
4468/* The return value indicates if tlb flush on all vcpus is needed. */ 4546/* The return value indicates if tlb flush on all vcpus is needed. */
4469typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); 4547typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
4470 4548
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 55ffb7b0f95e..58fe98a0a526 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -174,4 +174,9 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
174 174
175void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); 175void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
176void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end); 176void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
177
178void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
179void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
180bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
181 struct kvm_memory_slot *slot, u64 gfn);
177#endif 182#endif
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
new file mode 100644
index 000000000000..11f76436f74f
--- /dev/null
+++ b/arch/x86/kvm/page_track.c
@@ -0,0 +1,222 @@
1/*
2 * Support KVM gust page tracking
3 *
4 * This feature allows us to track page access in guest. Currently, only
5 * write access is tracked.
6 *
7 * Copyright(C) 2015 Intel Corporation.
8 *
9 * Author:
10 * Xiao Guangrong <guangrong.xiao@linux.intel.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2. See
13 * the COPYING file in the top-level directory.
14 */
15
16#include <linux/kvm_host.h>
17#include <asm/kvm_host.h>
18#include <asm/kvm_page_track.h>
19
20#include "mmu.h"
21
22void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
23 struct kvm_memory_slot *dont)
24{
25 int i;
26
27 for (i = 0; i < KVM_PAGE_TRACK_MAX; i++)
28 if (!dont || free->arch.gfn_track[i] !=
29 dont->arch.gfn_track[i]) {
30 kvfree(free->arch.gfn_track[i]);
31 free->arch.gfn_track[i] = NULL;
32 }
33}
34
35int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
36 unsigned long npages)
37{
38 int i;
39
40 for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
41 slot->arch.gfn_track[i] = kvm_kvzalloc(npages *
42 sizeof(*slot->arch.gfn_track[i]));
43 if (!slot->arch.gfn_track[i])
44 goto track_free;
45 }
46
47 return 0;
48
49track_free:
50 kvm_page_track_free_memslot(slot, NULL);
51 return -ENOMEM;
52}
53
54static inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode)
55{
56 if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX)
57 return false;
58
59 return true;
60}
61
62static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
63 enum kvm_page_track_mode mode, short count)
64{
65 int index, val;
66
67 index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL);
68
69 val = slot->arch.gfn_track[mode][index];
70
71 if (WARN_ON(val + count < 0 || val + count > USHRT_MAX))
72 return;
73
74 slot->arch.gfn_track[mode][index] += count;
75}
76
77/*
78 * add guest page to the tracking pool so that corresponding access on that
79 * page will be intercepted.
80 *
81 * It should be called under the protection both of mmu-lock and kvm->srcu
82 * or kvm->slots_lock.
83 *
84 * @kvm: the guest instance we are interested in.
85 * @slot: the @gfn belongs to.
86 * @gfn: the guest page.
87 * @mode: tracking mode, currently only write track is supported.
88 */
89void kvm_slot_page_track_add_page(struct kvm *kvm,
90 struct kvm_memory_slot *slot, gfn_t gfn,
91 enum kvm_page_track_mode mode)
92{
93
94 if (WARN_ON(!page_track_mode_is_valid(mode)))
95 return;
96
97 update_gfn_track(slot, gfn, mode, 1);
98
99 /*
100 * new track stops large page mapping for the
101 * tracked page.
102 */
103 kvm_mmu_gfn_disallow_lpage(slot, gfn);
104
105 if (mode == KVM_PAGE_TRACK_WRITE)
106 if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn))
107 kvm_flush_remote_tlbs(kvm);
108}
109
110/*
111 * remove the guest page from the tracking pool which stops the interception
112 * of corresponding access on that page. It is the opposed operation of
113 * kvm_slot_page_track_add_page().
114 *
115 * It should be called under the protection both of mmu-lock and kvm->srcu
116 * or kvm->slots_lock.
117 *
118 * @kvm: the guest instance we are interested in.
119 * @slot: the @gfn belongs to.
120 * @gfn: the guest page.
121 * @mode: tracking mode, currently only write track is supported.
122 */
123void kvm_slot_page_track_remove_page(struct kvm *kvm,
124 struct kvm_memory_slot *slot, gfn_t gfn,
125 enum kvm_page_track_mode mode)
126{
127 if (WARN_ON(!page_track_mode_is_valid(mode)))
128 return;
129
130 update_gfn_track(slot, gfn, mode, -1);
131
132 /*
133 * allow large page mapping for the tracked page
134 * after the tracker is gone.
135 */
136 kvm_mmu_gfn_allow_lpage(slot, gfn);
137}
138
139/*
140 * check if the corresponding access on the specified guest page is tracked.
141 */
142bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
143 enum kvm_page_track_mode mode)
144{
145 struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
146 int index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL);
147
148 if (WARN_ON(!page_track_mode_is_valid(mode)))
149 return false;
150
151 return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]);
152}
153
154void kvm_page_track_init(struct kvm *kvm)
155{
156 struct kvm_page_track_notifier_head *head;
157
158 head = &kvm->arch.track_notifier_head;
159 init_srcu_struct(&head->track_srcu);
160 INIT_HLIST_HEAD(&head->track_notifier_list);
161}
162
163/*
164 * register the notifier so that event interception for the tracked guest
165 * pages can be received.
166 */
167void
168kvm_page_track_register_notifier(struct kvm *kvm,
169 struct kvm_page_track_notifier_node *n)
170{
171 struct kvm_page_track_notifier_head *head;
172
173 head = &kvm->arch.track_notifier_head;
174
175 spin_lock(&kvm->mmu_lock);
176 hlist_add_head_rcu(&n->node, &head->track_notifier_list);
177 spin_unlock(&kvm->mmu_lock);
178}
179
180/*
181 * stop receiving the event interception. It is the opposed operation of
182 * kvm_page_track_register_notifier().
183 */
184void
185kvm_page_track_unregister_notifier(struct kvm *kvm,
186 struct kvm_page_track_notifier_node *n)
187{
188 struct kvm_page_track_notifier_head *head;
189
190 head = &kvm->arch.track_notifier_head;
191
192 spin_lock(&kvm->mmu_lock);
193 hlist_del_rcu(&n->node);
194 spin_unlock(&kvm->mmu_lock);
195 synchronize_srcu(&head->track_srcu);
196}
197
198/*
199 * Notify the node that write access is intercepted and write emulation is
200 * finished at this time.
201 *
202 * The node should figure out if the written page is the one that node is
203 * interested in by itself.
204 */
205void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
206 int bytes)
207{
208 struct kvm_page_track_notifier_head *head;
209 struct kvm_page_track_notifier_node *n;
210 int idx;
211
212 head = &vcpu->kvm->arch.track_notifier_head;
213
214 if (hlist_empty(&head->track_notifier_list))
215 return;
216
217 idx = srcu_read_lock(&head->track_srcu);
218 hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
219 if (n->track_write)
220 n->track_write(vcpu, gpa, new, bytes);
221 srcu_read_unlock(&head->track_srcu, idx);
222}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 2ce4f05e81d3..e159a8185ad9 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -189,8 +189,11 @@ static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte)
189 ((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) | 189 ((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
190 ACC_USER_MASK; 190 ACC_USER_MASK;
191#else 191#else
192 access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; 192 BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK);
193 access &= ~(gpte >> PT64_NX_SHIFT); 193 BUILD_BUG_ON(ACC_EXEC_MASK != 1);
194 access = gpte & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK);
195 /* Combine NX with P (which is set here) to get ACC_EXEC_MASK. */
196 access ^= (gpte >> PT64_NX_SHIFT);
194#endif 197#endif
195 198
196 return access; 199 return access;
@@ -702,24 +705,17 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
702 705
703 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); 706 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
704 707
705 if (unlikely(error_code & PFERR_RSVD_MASK)) {
706 r = handle_mmio_page_fault(vcpu, addr, mmu_is_nested(vcpu));
707 if (likely(r != RET_MMIO_PF_INVALID))
708 return r;
709
710 /*
711 * page fault with PFEC.RSVD = 1 is caused by shadow
712 * page fault, should not be used to walk guest page
713 * table.
714 */
715 error_code &= ~PFERR_RSVD_MASK;
716 };
717
718 r = mmu_topup_memory_caches(vcpu); 708 r = mmu_topup_memory_caches(vcpu);
719 if (r) 709 if (r)
720 return r; 710 return r;
721 711
722 /* 712 /*
713 * If PFEC.RSVD is set, this is a shadow page fault.
714 * The bit needs to be cleared before walking guest page tables.
715 */
716 error_code &= ~PFERR_RSVD_MASK;
717
718 /*
723 * Look up the guest pte for the faulting address. 719 * Look up the guest pte for the faulting address.
724 */ 720 */
725 r = FNAME(walk_addr)(&walker, vcpu, addr, error_code); 721 r = FNAME(walk_addr)(&walker, vcpu, addr, error_code);
@@ -735,6 +731,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
735 return 0; 731 return 0;
736 } 732 }
737 733
734 if (page_fault_handle_page_track(vcpu, error_code, walker.gfn)) {
735 shadow_page_table_clear_flood(vcpu, addr);
736 return 1;
737 }
738
738 vcpu->arch.write_fault_to_shadow_pgtable = false; 739 vcpu->arch.write_fault_to_shadow_pgtable = false;
739 740
740 is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu, 741 is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
@@ -945,7 +946,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
945 946
946 if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte, 947 if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte,
947 sizeof(pt_element_t))) 948 sizeof(pt_element_t)))
948 return -EINVAL; 949 return 0;
949 950
950 if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { 951 if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
951 vcpu->kvm->tlbs_dirty++; 952 vcpu->kvm->tlbs_dirty++;
@@ -977,7 +978,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
977 host_writable); 978 host_writable);
978 } 979 }
979 980
980 return !nr_present; 981 return nr_present;
981} 982}
982 983
983#undef pt_element_t 984#undef pt_element_t
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 31aa2c85dc97..06ce377dcbc9 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -257,7 +257,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
257 257
258void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu) 258void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
259{ 259{
260 if (vcpu->arch.apic) 260 if (lapic_in_kernel(vcpu))
261 kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC); 261 kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
262} 262}
263 263
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c13a64b7d789..95070386d599 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1858,8 +1858,7 @@ static int halt_interception(struct vcpu_svm *svm)
1858static int vmmcall_interception(struct vcpu_svm *svm) 1858static int vmmcall_interception(struct vcpu_svm *svm)
1859{ 1859{
1860 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 1860 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1861 kvm_emulate_hypercall(&svm->vcpu); 1861 return kvm_emulate_hypercall(&svm->vcpu);
1862 return 1;
1863} 1862}
1864 1863
1865static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) 1864static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index ad9f6a23f139..2f1ea2f61e1f 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -996,11 +996,13 @@ TRACE_EVENT(kvm_enter_smm,
996 * Tracepoint for VT-d posted-interrupts. 996 * Tracepoint for VT-d posted-interrupts.
997 */ 997 */
998TRACE_EVENT(kvm_pi_irte_update, 998TRACE_EVENT(kvm_pi_irte_update,
999 TP_PROTO(unsigned int vcpu_id, unsigned int gsi, 999 TP_PROTO(unsigned int host_irq, unsigned int vcpu_id,
1000 unsigned int gvec, u64 pi_desc_addr, bool set), 1000 unsigned int gsi, unsigned int gvec,
1001 TP_ARGS(vcpu_id, gsi, gvec, pi_desc_addr, set), 1001 u64 pi_desc_addr, bool set),
1002 TP_ARGS(host_irq, vcpu_id, gsi, gvec, pi_desc_addr, set),
1002 1003
1003 TP_STRUCT__entry( 1004 TP_STRUCT__entry(
1005 __field( unsigned int, host_irq )
1004 __field( unsigned int, vcpu_id ) 1006 __field( unsigned int, vcpu_id )
1005 __field( unsigned int, gsi ) 1007 __field( unsigned int, gsi )
1006 __field( unsigned int, gvec ) 1008 __field( unsigned int, gvec )
@@ -1009,6 +1011,7 @@ TRACE_EVENT(kvm_pi_irte_update,
1009 ), 1011 ),
1010 1012
1011 TP_fast_assign( 1013 TP_fast_assign(
1014 __entry->host_irq = host_irq;
1012 __entry->vcpu_id = vcpu_id; 1015 __entry->vcpu_id = vcpu_id;
1013 __entry->gsi = gsi; 1016 __entry->gsi = gsi;
1014 __entry->gvec = gvec; 1017 __entry->gvec = gvec;
@@ -1016,9 +1019,10 @@ TRACE_EVENT(kvm_pi_irte_update,
1016 __entry->set = set; 1019 __entry->set = set;
1017 ), 1020 ),
1018 1021
1019 TP_printk("VT-d PI is %s for this irq, vcpu %u, gsi: 0x%x, " 1022 TP_printk("VT-d PI is %s for irq %u, vcpu %u, gsi: 0x%x, "
1020 "gvec: 0x%x, pi_desc_addr: 0x%llx", 1023 "gvec: 0x%x, pi_desc_addr: 0x%llx",
1021 __entry->set ? "enabled and being updated" : "disabled", 1024 __entry->set ? "enabled and being updated" : "disabled",
1025 __entry->host_irq,
1022 __entry->vcpu_id, 1026 __entry->vcpu_id,
1023 __entry->gsi, 1027 __entry->gsi,
1024 __entry->gvec, 1028 __entry->gvec,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9bd8f44baded..5e45c2731a5d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -863,7 +863,6 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
863static u64 construct_eptp(unsigned long root_hpa); 863static u64 construct_eptp(unsigned long root_hpa);
864static void kvm_cpu_vmxon(u64 addr); 864static void kvm_cpu_vmxon(u64 addr);
865static void kvm_cpu_vmxoff(void); 865static void kvm_cpu_vmxoff(void);
866static bool vmx_mpx_supported(void);
867static bool vmx_xsaves_supported(void); 866static bool vmx_xsaves_supported(void);
868static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); 867static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
869static void vmx_set_segment(struct kvm_vcpu *vcpu, 868static void vmx_set_segment(struct kvm_vcpu *vcpu,
@@ -963,25 +962,36 @@ static const u32 vmx_msr_index[] = {
963 MSR_EFER, MSR_TSC_AUX, MSR_STAR, 962 MSR_EFER, MSR_TSC_AUX, MSR_STAR,
964}; 963};
965 964
966static inline bool is_page_fault(u32 intr_info) 965static inline bool is_exception_n(u32 intr_info, u8 vector)
967{ 966{
968 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 967 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
969 INTR_INFO_VALID_MASK)) == 968 INTR_INFO_VALID_MASK)) ==
970 (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); 969 (INTR_TYPE_HARD_EXCEPTION | vector | INTR_INFO_VALID_MASK);
970}
971
972static inline bool is_debug(u32 intr_info)
973{
974 return is_exception_n(intr_info, DB_VECTOR);
975}
976
977static inline bool is_breakpoint(u32 intr_info)
978{
979 return is_exception_n(intr_info, BP_VECTOR);
980}
981
982static inline bool is_page_fault(u32 intr_info)
983{
984 return is_exception_n(intr_info, PF_VECTOR);
971} 985}
972 986
973static inline bool is_no_device(u32 intr_info) 987static inline bool is_no_device(u32 intr_info)
974{ 988{
975 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 989 return is_exception_n(intr_info, NM_VECTOR);
976 INTR_INFO_VALID_MASK)) ==
977 (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
978} 990}
979 991
980static inline bool is_invalid_opcode(u32 intr_info) 992static inline bool is_invalid_opcode(u32 intr_info)
981{ 993{
982 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 994 return is_exception_n(intr_info, UD_VECTOR);
983 INTR_INFO_VALID_MASK)) ==
984 (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
985} 995}
986 996
987static inline bool is_external_interrupt(u32 intr_info) 997static inline bool is_external_interrupt(u32 intr_info)
@@ -2605,7 +2615,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
2605 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | 2615 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
2606 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; 2616 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
2607 2617
2608 if (vmx_mpx_supported()) 2618 if (kvm_mpx_supported())
2609 vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; 2619 vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
2610 2620
2611 /* We support free control of debug control saving. */ 2621 /* We support free control of debug control saving. */
@@ -2626,7 +2636,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
2626 VM_ENTRY_LOAD_IA32_PAT; 2636 VM_ENTRY_LOAD_IA32_PAT;
2627 vmx->nested.nested_vmx_entry_ctls_high |= 2637 vmx->nested.nested_vmx_entry_ctls_high |=
2628 (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); 2638 (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
2629 if (vmx_mpx_supported()) 2639 if (kvm_mpx_supported())
2630 vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; 2640 vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
2631 2641
2632 /* We support free control of debug control loading. */ 2642 /* We support free control of debug control loading. */
@@ -2870,7 +2880,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2870 msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); 2880 msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
2871 break; 2881 break;
2872 case MSR_IA32_BNDCFGS: 2882 case MSR_IA32_BNDCFGS:
2873 if (!vmx_mpx_supported()) 2883 if (!kvm_mpx_supported())
2874 return 1; 2884 return 1;
2875 msr_info->data = vmcs_read64(GUEST_BNDCFGS); 2885 msr_info->data = vmcs_read64(GUEST_BNDCFGS);
2876 break; 2886 break;
@@ -2947,7 +2957,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2947 vmcs_writel(GUEST_SYSENTER_ESP, data); 2957 vmcs_writel(GUEST_SYSENTER_ESP, data);
2948 break; 2958 break;
2949 case MSR_IA32_BNDCFGS: 2959 case MSR_IA32_BNDCFGS:
2950 if (!vmx_mpx_supported()) 2960 if (!kvm_mpx_supported())
2951 return 1; 2961 return 1;
2952 vmcs_write64(GUEST_BNDCFGS, data); 2962 vmcs_write64(GUEST_BNDCFGS, data);
2953 break; 2963 break;
@@ -3420,7 +3430,7 @@ static void init_vmcs_shadow_fields(void)
3420 for (i = j = 0; i < max_shadow_read_write_fields; i++) { 3430 for (i = j = 0; i < max_shadow_read_write_fields; i++) {
3421 switch (shadow_read_write_fields[i]) { 3431 switch (shadow_read_write_fields[i]) {
3422 case GUEST_BNDCFGS: 3432 case GUEST_BNDCFGS:
3423 if (!vmx_mpx_supported()) 3433 if (!kvm_mpx_supported())
3424 continue; 3434 continue;
3425 break; 3435 break;
3426 default: 3436 default:
@@ -5629,11 +5639,8 @@ static int handle_dr(struct kvm_vcpu *vcpu)
5629 } 5639 }
5630 5640
5631 if (vcpu->guest_debug == 0) { 5641 if (vcpu->guest_debug == 0) {
5632 u32 cpu_based_vm_exec_control; 5642 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
5633 5643 CPU_BASED_MOV_DR_EXITING);
5634 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
5635 cpu_based_vm_exec_control &= ~CPU_BASED_MOV_DR_EXITING;
5636 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
5637 5644
5638 /* 5645 /*
5639 * No more DR vmexits; force a reload of the debug registers 5646 * No more DR vmexits; force a reload of the debug registers
@@ -5670,8 +5677,6 @@ static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
5670 5677
5671static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) 5678static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
5672{ 5679{
5673 u32 cpu_based_vm_exec_control;
5674
5675 get_debugreg(vcpu->arch.db[0], 0); 5680 get_debugreg(vcpu->arch.db[0], 0);
5676 get_debugreg(vcpu->arch.db[1], 1); 5681 get_debugreg(vcpu->arch.db[1], 1);
5677 get_debugreg(vcpu->arch.db[2], 2); 5682 get_debugreg(vcpu->arch.db[2], 2);
@@ -5680,10 +5685,7 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
5680 vcpu->arch.dr7 = vmcs_readl(GUEST_DR7); 5685 vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
5681 5686
5682 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; 5687 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
5683 5688 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_MOV_DR_EXITING);
5684 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
5685 cpu_based_vm_exec_control |= CPU_BASED_MOV_DR_EXITING;
5686 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
5687} 5689}
5688 5690
5689static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) 5691static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
@@ -5768,8 +5770,7 @@ static int handle_halt(struct kvm_vcpu *vcpu)
5768 5770
5769static int handle_vmcall(struct kvm_vcpu *vcpu) 5771static int handle_vmcall(struct kvm_vcpu *vcpu)
5770{ 5772{
5771 kvm_emulate_hypercall(vcpu); 5773 return kvm_emulate_hypercall(vcpu);
5772 return 1;
5773} 5774}
5774 5775
5775static int handle_invd(struct kvm_vcpu *vcpu) 5776static int handle_invd(struct kvm_vcpu *vcpu)
@@ -6456,8 +6457,8 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
6456 6457
6457 if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { 6458 if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) {
6458 /* Recycle the least recently used VMCS. */ 6459 /* Recycle the least recently used VMCS. */
6459 item = list_entry(vmx->nested.vmcs02_pool.prev, 6460 item = list_last_entry(&vmx->nested.vmcs02_pool,
6460 struct vmcs02_list, list); 6461 struct vmcs02_list, list);
6461 item->vmptr = vmx->nested.current_vmptr; 6462 item->vmptr = vmx->nested.current_vmptr;
6462 list_move(&item->list, &vmx->nested.vmcs02_pool); 6463 list_move(&item->list, &vmx->nested.vmcs02_pool);
6463 return &item->vmcs02; 6464 return &item->vmcs02;
@@ -7773,6 +7774,13 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
7773 else if (is_no_device(intr_info) && 7774 else if (is_no_device(intr_info) &&
7774 !(vmcs12->guest_cr0 & X86_CR0_TS)) 7775 !(vmcs12->guest_cr0 & X86_CR0_TS))
7775 return false; 7776 return false;
7777 else if (is_debug(intr_info) &&
7778 vcpu->guest_debug &
7779 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
7780 return false;
7781 else if (is_breakpoint(intr_info) &&
7782 vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
7783 return false;
7776 return vmcs12->exception_bitmap & 7784 return vmcs12->exception_bitmap &
7777 (1u << (intr_info & INTR_INFO_VECTOR_MASK)); 7785 (1u << (intr_info & INTR_INFO_VECTOR_MASK));
7778 case EXIT_REASON_EXTERNAL_INTERRUPT: 7786 case EXIT_REASON_EXTERNAL_INTERRUPT:
@@ -10277,7 +10285,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10277 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); 10285 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
10278 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); 10286 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
10279 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); 10287 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
10280 if (vmx_mpx_supported()) 10288 if (kvm_mpx_supported())
10281 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); 10289 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
10282 if (nested_cpu_has_xsaves(vmcs12)) 10290 if (nested_cpu_has_xsaves(vmcs12))
10283 vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP); 10291 vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP);
@@ -10785,13 +10793,26 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
10785 */ 10793 */
10786 10794
10787 kvm_set_msi_irq(e, &irq); 10795 kvm_set_msi_irq(e, &irq);
10788 if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) 10796 if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
10797 /*
10798 * Make sure the IRTE is in remapped mode if
10799 * we don't handle it in posted mode.
10800 */
10801 ret = irq_set_vcpu_affinity(host_irq, NULL);
10802 if (ret < 0) {
10803 printk(KERN_INFO
10804 "failed to back to remapped mode, irq: %u\n",
10805 host_irq);
10806 goto out;
10807 }
10808
10789 continue; 10809 continue;
10810 }
10790 10811
10791 vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); 10812 vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
10792 vcpu_info.vector = irq.vector; 10813 vcpu_info.vector = irq.vector;
10793 10814
10794 trace_kvm_pi_irte_update(vcpu->vcpu_id, e->gsi, 10815 trace_kvm_pi_irte_update(vcpu->vcpu_id, host_irq, e->gsi,
10795 vcpu_info.vector, vcpu_info.pi_desc_addr, set); 10816 vcpu_info.vector, vcpu_info.pi_desc_addr, set);
10796 10817
10797 if (set) 10818 if (set)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eaf6ee8c28b8..7236bd3a4c3d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -123,6 +123,9 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
123unsigned int __read_mostly lapic_timer_advance_ns = 0; 123unsigned int __read_mostly lapic_timer_advance_ns = 0;
124module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); 124module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
125 125
126static bool __read_mostly vector_hashing = true;
127module_param(vector_hashing, bool, S_IRUGO);
128
126static bool __read_mostly backwards_tsc_observed = false; 129static bool __read_mostly backwards_tsc_observed = false;
127 130
128#define KVM_NR_SHARED_MSRS 16 131#define KVM_NR_SHARED_MSRS 16
@@ -1196,17 +1199,11 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
1196 1199
1197static uint32_t div_frac(uint32_t dividend, uint32_t divisor) 1200static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
1198{ 1201{
1199 uint32_t quotient, remainder; 1202 do_shl32_div32(dividend, divisor);
1200 1203 return dividend;
1201 /* Don't try to replace with do_div(), this one calculates
1202 * "(dividend << 32) / divisor" */
1203 __asm__ ( "divl %4"
1204 : "=a" (quotient), "=d" (remainder)
1205 : "0" (0), "1" (dividend), "r" (divisor) );
1206 return quotient;
1207} 1204}
1208 1205
1209static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz, 1206static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
1210 s8 *pshift, u32 *pmultiplier) 1207 s8 *pshift, u32 *pmultiplier)
1211{ 1208{
1212 uint64_t scaled64; 1209 uint64_t scaled64;
@@ -1214,8 +1211,8 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
1214 uint64_t tps64; 1211 uint64_t tps64;
1215 uint32_t tps32; 1212 uint32_t tps32;
1216 1213
1217 tps64 = base_khz * 1000LL; 1214 tps64 = base_hz;
1218 scaled64 = scaled_khz * 1000LL; 1215 scaled64 = scaled_hz;
1219 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) { 1216 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
1220 tps64 >>= 1; 1217 tps64 >>= 1;
1221 shift--; 1218 shift--;
@@ -1233,8 +1230,8 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
1233 *pshift = shift; 1230 *pshift = shift;
1234 *pmultiplier = div_frac(scaled64, tps32); 1231 *pmultiplier = div_frac(scaled64, tps32);
1235 1232
1236 pr_debug("%s: base_khz %u => %u, shift %d, mul %u\n", 1233 pr_debug("%s: base_hz %llu => %llu, shift %d, mul %u\n",
1237 __func__, base_khz, scaled_khz, shift, *pmultiplier); 1234 __func__, base_hz, scaled_hz, shift, *pmultiplier);
1238} 1235}
1239 1236
1240#ifdef CONFIG_X86_64 1237#ifdef CONFIG_X86_64
@@ -1293,23 +1290,23 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
1293 return 0; 1290 return 0;
1294} 1291}
1295 1292
1296static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) 1293static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
1297{ 1294{
1298 u32 thresh_lo, thresh_hi; 1295 u32 thresh_lo, thresh_hi;
1299 int use_scaling = 0; 1296 int use_scaling = 0;
1300 1297
1301 /* tsc_khz can be zero if TSC calibration fails */ 1298 /* tsc_khz can be zero if TSC calibration fails */
1302 if (this_tsc_khz == 0) { 1299 if (user_tsc_khz == 0) {
1303 /* set tsc_scaling_ratio to a safe value */ 1300 /* set tsc_scaling_ratio to a safe value */
1304 vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio; 1301 vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
1305 return -1; 1302 return -1;
1306 } 1303 }
1307 1304
1308 /* Compute a scale to convert nanoseconds in TSC cycles */ 1305 /* Compute a scale to convert nanoseconds in TSC cycles */
1309 kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, 1306 kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC,
1310 &vcpu->arch.virtual_tsc_shift, 1307 &vcpu->arch.virtual_tsc_shift,
1311 &vcpu->arch.virtual_tsc_mult); 1308 &vcpu->arch.virtual_tsc_mult);
1312 vcpu->arch.virtual_tsc_khz = this_tsc_khz; 1309 vcpu->arch.virtual_tsc_khz = user_tsc_khz;
1313 1310
1314 /* 1311 /*
1315 * Compute the variation in TSC rate which is acceptable 1312 * Compute the variation in TSC rate which is acceptable
@@ -1319,11 +1316,11 @@ static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
1319 */ 1316 */
1320 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm); 1317 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
1321 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm); 1318 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
1322 if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) { 1319 if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) {
1323 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi); 1320 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi);
1324 use_scaling = 1; 1321 use_scaling = 1;
1325 } 1322 }
1326 return set_tsc_khz(vcpu, this_tsc_khz, use_scaling); 1323 return set_tsc_khz(vcpu, user_tsc_khz, use_scaling);
1327} 1324}
1328 1325
1329static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) 1326static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
@@ -1716,7 +1713,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
1716 1713
1717static int kvm_guest_time_update(struct kvm_vcpu *v) 1714static int kvm_guest_time_update(struct kvm_vcpu *v)
1718{ 1715{
1719 unsigned long flags, this_tsc_khz, tgt_tsc_khz; 1716 unsigned long flags, tgt_tsc_khz;
1720 struct kvm_vcpu_arch *vcpu = &v->arch; 1717 struct kvm_vcpu_arch *vcpu = &v->arch;
1721 struct kvm_arch *ka = &v->kvm->arch; 1718 struct kvm_arch *ka = &v->kvm->arch;
1722 s64 kernel_ns; 1719 s64 kernel_ns;
@@ -1742,8 +1739,8 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1742 1739
1743 /* Keep irq disabled to prevent changes to the clock */ 1740 /* Keep irq disabled to prevent changes to the clock */
1744 local_irq_save(flags); 1741 local_irq_save(flags);
1745 this_tsc_khz = __this_cpu_read(cpu_tsc_khz); 1742 tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
1746 if (unlikely(this_tsc_khz == 0)) { 1743 if (unlikely(tgt_tsc_khz == 0)) {
1747 local_irq_restore(flags); 1744 local_irq_restore(flags);
1748 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); 1745 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1749 return 1; 1746 return 1;
@@ -1778,13 +1775,14 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1778 if (!vcpu->pv_time_enabled) 1775 if (!vcpu->pv_time_enabled)
1779 return 0; 1776 return 0;
1780 1777
1781 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { 1778 if (kvm_has_tsc_control)
1782 tgt_tsc_khz = kvm_has_tsc_control ? 1779 tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
1783 vcpu->virtual_tsc_khz : this_tsc_khz; 1780
1784 kvm_get_time_scale(NSEC_PER_SEC / 1000, tgt_tsc_khz, 1781 if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
1782 kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
1785 &vcpu->hv_clock.tsc_shift, 1783 &vcpu->hv_clock.tsc_shift,
1786 &vcpu->hv_clock.tsc_to_system_mul); 1784 &vcpu->hv_clock.tsc_to_system_mul);
1787 vcpu->hw_tsc_khz = this_tsc_khz; 1785 vcpu->hw_tsc_khz = tgt_tsc_khz;
1788 } 1786 }
1789 1787
1790 /* With all the info we got, fill in the values */ 1788 /* With all the info we got, fill in the values */
@@ -2987,7 +2985,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2987 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); 2985 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
2988 2986
2989 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR && 2987 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
2990 kvm_vcpu_has_lapic(vcpu)) 2988 lapic_in_kernel(vcpu))
2991 vcpu->arch.apic->sipi_vector = events->sipi_vector; 2989 vcpu->arch.apic->sipi_vector = events->sipi_vector;
2992 2990
2993 if (events->flags & KVM_VCPUEVENT_VALID_SMM) { 2991 if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
@@ -3000,7 +2998,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
3000 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; 2998 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
3001 else 2999 else
3002 vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK; 3000 vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
3003 if (kvm_vcpu_has_lapic(vcpu)) { 3001 if (lapic_in_kernel(vcpu)) {
3004 if (events->smi.latched_init) 3002 if (events->smi.latched_init)
3005 set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); 3003 set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
3006 else 3004 else
@@ -3240,7 +3238,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
3240 switch (ioctl) { 3238 switch (ioctl) {
3241 case KVM_GET_LAPIC: { 3239 case KVM_GET_LAPIC: {
3242 r = -EINVAL; 3240 r = -EINVAL;
3243 if (!vcpu->arch.apic) 3241 if (!lapic_in_kernel(vcpu))
3244 goto out; 3242 goto out;
3245 u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); 3243 u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
3246 3244
@@ -3258,7 +3256,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
3258 } 3256 }
3259 case KVM_SET_LAPIC: { 3257 case KVM_SET_LAPIC: {
3260 r = -EINVAL; 3258 r = -EINVAL;
3261 if (!vcpu->arch.apic) 3259 if (!lapic_in_kernel(vcpu))
3262 goto out; 3260 goto out;
3263 u.lapic = memdup_user(argp, sizeof(*u.lapic)); 3261 u.lapic = memdup_user(argp, sizeof(*u.lapic));
3264 if (IS_ERR(u.lapic)) 3262 if (IS_ERR(u.lapic))
@@ -3605,20 +3603,26 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3605 3603
3606static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps) 3604static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3607{ 3605{
3608 mutex_lock(&kvm->arch.vpit->pit_state.lock); 3606 struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state;
3609 memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state)); 3607
3610 mutex_unlock(&kvm->arch.vpit->pit_state.lock); 3608 BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels));
3609
3610 mutex_lock(&kps->lock);
3611 memcpy(ps, &kps->channels, sizeof(*ps));
3612 mutex_unlock(&kps->lock);
3611 return 0; 3613 return 0;
3612} 3614}
3613 3615
3614static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) 3616static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3615{ 3617{
3616 int i; 3618 int i;
3617 mutex_lock(&kvm->arch.vpit->pit_state.lock); 3619 struct kvm_pit *pit = kvm->arch.vpit;
3618 memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state)); 3620
3621 mutex_lock(&pit->pit_state.lock);
3622 memcpy(&pit->pit_state.channels, ps, sizeof(*ps));
3619 for (i = 0; i < 3; i++) 3623 for (i = 0; i < 3; i++)
3620 kvm_pit_load_count(kvm, i, ps->channels[i].count, 0); 3624 kvm_pit_load_count(pit, i, ps->channels[i].count, 0);
3621 mutex_unlock(&kvm->arch.vpit->pit_state.lock); 3625 mutex_unlock(&pit->pit_state.lock);
3622 return 0; 3626 return 0;
3623} 3627}
3624 3628
@@ -3638,29 +3642,39 @@ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3638 int start = 0; 3642 int start = 0;
3639 int i; 3643 int i;
3640 u32 prev_legacy, cur_legacy; 3644 u32 prev_legacy, cur_legacy;
3641 mutex_lock(&kvm->arch.vpit->pit_state.lock); 3645 struct kvm_pit *pit = kvm->arch.vpit;
3642 prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY; 3646
3647 mutex_lock(&pit->pit_state.lock);
3648 prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
3643 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY; 3649 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
3644 if (!prev_legacy && cur_legacy) 3650 if (!prev_legacy && cur_legacy)
3645 start = 1; 3651 start = 1;
3646 memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels, 3652 memcpy(&pit->pit_state.channels, &ps->channels,
3647 sizeof(kvm->arch.vpit->pit_state.channels)); 3653 sizeof(pit->pit_state.channels));
3648 kvm->arch.vpit->pit_state.flags = ps->flags; 3654 pit->pit_state.flags = ps->flags;
3649 for (i = 0; i < 3; i++) 3655 for (i = 0; i < 3; i++)
3650 kvm_pit_load_count(kvm, i, kvm->arch.vpit->pit_state.channels[i].count, 3656 kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count,
3651 start && i == 0); 3657 start && i == 0);
3652 mutex_unlock(&kvm->arch.vpit->pit_state.lock); 3658 mutex_unlock(&pit->pit_state.lock);
3653 return 0; 3659 return 0;
3654} 3660}
3655 3661
3656static int kvm_vm_ioctl_reinject(struct kvm *kvm, 3662static int kvm_vm_ioctl_reinject(struct kvm *kvm,
3657 struct kvm_reinject_control *control) 3663 struct kvm_reinject_control *control)
3658{ 3664{
3659 if (!kvm->arch.vpit) 3665 struct kvm_pit *pit = kvm->arch.vpit;
3666
3667 if (!pit)
3660 return -ENXIO; 3668 return -ENXIO;
3661 mutex_lock(&kvm->arch.vpit->pit_state.lock); 3669
3662 kvm->arch.vpit->pit_state.reinject = control->pit_reinject; 3670 /* pit->pit_state.lock was overloaded to prevent userspace from getting
3663 mutex_unlock(&kvm->arch.vpit->pit_state.lock); 3671 * an inconsistent state after running multiple KVM_REINJECT_CONTROL
3672 * ioctls in parallel. Use a separate lock if that ioctl isn't rare.
3673 */
3674 mutex_lock(&pit->pit_state.lock);
3675 kvm_pit_set_reinject(pit, control->pit_reinject);
3676 mutex_unlock(&pit->pit_state.lock);
3677
3664 return 0; 3678 return 0;
3665} 3679}
3666 3680
@@ -4093,7 +4107,7 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
4093 4107
4094 do { 4108 do {
4095 n = min(len, 8); 4109 n = min(len, 8);
4096 if (!(vcpu->arch.apic && 4110 if (!(lapic_in_kernel(vcpu) &&
4097 !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v)) 4111 !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
4098 && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v)) 4112 && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
4099 break; 4113 break;
@@ -4113,7 +4127,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
4113 4127
4114 do { 4128 do {
4115 n = min(len, 8); 4129 n = min(len, 8);
4116 if (!(vcpu->arch.apic && 4130 if (!(lapic_in_kernel(vcpu) &&
4117 !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev, 4131 !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
4118 addr, n, v)) 4132 addr, n, v))
4119 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) 4133 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
@@ -4346,7 +4360,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
4346 ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes); 4360 ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes);
4347 if (ret < 0) 4361 if (ret < 0)
4348 return 0; 4362 return 0;
4349 kvm_mmu_pte_write(vcpu, gpa, val, bytes); 4363 kvm_page_track_write(vcpu, gpa, val, bytes);
4350 return 1; 4364 return 1;
4351} 4365}
4352 4366
@@ -4604,7 +4618,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
4604 return X86EMUL_CMPXCHG_FAILED; 4618 return X86EMUL_CMPXCHG_FAILED;
4605 4619
4606 kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); 4620 kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
4607 kvm_mmu_pte_write(vcpu, gpa, new, bytes); 4621 kvm_page_track_write(vcpu, gpa, new, bytes);
4608 4622
4609 return X86EMUL_CONTINUE; 4623 return X86EMUL_CONTINUE;
4610 4624
@@ -6010,7 +6024,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
6010 if (!kvm_x86_ops->update_cr8_intercept) 6024 if (!kvm_x86_ops->update_cr8_intercept)
6011 return; 6025 return;
6012 6026
6013 if (!vcpu->arch.apic) 6027 if (!lapic_in_kernel(vcpu))
6014 return; 6028 return;
6015 6029
6016 if (vcpu->arch.apicv_active) 6030 if (vcpu->arch.apicv_active)
@@ -7038,7 +7052,7 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
7038int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 7052int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
7039 struct kvm_mp_state *mp_state) 7053 struct kvm_mp_state *mp_state)
7040{ 7054{
7041 if (!kvm_vcpu_has_lapic(vcpu) && 7055 if (!lapic_in_kernel(vcpu) &&
7042 mp_state->mp_state != KVM_MP_STATE_RUNNABLE) 7056 mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
7043 return -EINVAL; 7057 return -EINVAL;
7044 7058
@@ -7314,7 +7328,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
7314 * Every 255 times fpu_counter rolls over to 0; a guest that uses 7328 * Every 255 times fpu_counter rolls over to 0; a guest that uses
7315 * the FPU in bursts will revert to loading it on demand. 7329 * the FPU in bursts will revert to loading it on demand.
7316 */ 7330 */
7317 if (!vcpu->arch.eager_fpu) { 7331 if (!use_eager_fpu()) {
7318 if (++vcpu->fpu_counter < 5) 7332 if (++vcpu->fpu_counter < 5)
7319 kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); 7333 kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
7320 } 7334 }
@@ -7593,6 +7607,7 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
7593} 7607}
7594 7608
7595struct static_key kvm_no_apic_vcpu __read_mostly; 7609struct static_key kvm_no_apic_vcpu __read_mostly;
7610EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
7596 7611
7597int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 7612int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
7598{ 7613{
@@ -7724,6 +7739,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
7724 INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); 7739 INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
7725 INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn); 7740 INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
7726 7741
7742 kvm_page_track_init(kvm);
7743 kvm_mmu_init_vm(kvm);
7744
7727 return 0; 7745 return 0;
7728} 7746}
7729 7747
@@ -7850,6 +7868,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
7850 kfree(kvm->arch.vioapic); 7868 kfree(kvm->arch.vioapic);
7851 kvm_free_vcpus(kvm); 7869 kvm_free_vcpus(kvm);
7852 kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); 7870 kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
7871 kvm_mmu_uninit_vm(kvm);
7853} 7872}
7854 7873
7855void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 7874void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
@@ -7871,6 +7890,8 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
7871 free->arch.lpage_info[i - 1] = NULL; 7890 free->arch.lpage_info[i - 1] = NULL;
7872 } 7891 }
7873 } 7892 }
7893
7894 kvm_page_track_free_memslot(free, dont);
7874} 7895}
7875 7896
7876int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 7897int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
@@ -7879,6 +7900,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
7879 int i; 7900 int i;
7880 7901
7881 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { 7902 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
7903 struct kvm_lpage_info *linfo;
7882 unsigned long ugfn; 7904 unsigned long ugfn;
7883 int lpages; 7905 int lpages;
7884 int level = i + 1; 7906 int level = i + 1;
@@ -7893,15 +7915,16 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
7893 if (i == 0) 7915 if (i == 0)
7894 continue; 7916 continue;
7895 7917
7896 slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages * 7918 linfo = kvm_kvzalloc(lpages * sizeof(*linfo));
7897 sizeof(*slot->arch.lpage_info[i - 1])); 7919 if (!linfo)
7898 if (!slot->arch.lpage_info[i - 1])
7899 goto out_free; 7920 goto out_free;
7900 7921
7922 slot->arch.lpage_info[i - 1] = linfo;
7923
7901 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) 7924 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
7902 slot->arch.lpage_info[i - 1][0].write_count = 1; 7925 linfo[0].disallow_lpage = 1;
7903 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) 7926 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
7904 slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1; 7927 linfo[lpages - 1].disallow_lpage = 1;
7905 ugfn = slot->userspace_addr >> PAGE_SHIFT; 7928 ugfn = slot->userspace_addr >> PAGE_SHIFT;
7906 /* 7929 /*
7907 * If the gfn and userspace address are not aligned wrt each 7930 * If the gfn and userspace address are not aligned wrt each
@@ -7913,10 +7936,13 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
7913 unsigned long j; 7936 unsigned long j;
7914 7937
7915 for (j = 0; j < lpages; ++j) 7938 for (j = 0; j < lpages; ++j)
7916 slot->arch.lpage_info[i - 1][j].write_count = 1; 7939 linfo[j].disallow_lpage = 1;
7917 } 7940 }
7918 } 7941 }
7919 7942
7943 if (kvm_page_track_create_memslot(slot, npages))
7944 goto out_free;
7945
7920 return 0; 7946 return 0;
7921 7947
7922out_free: 7948out_free:
@@ -8370,6 +8396,12 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
8370 return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set); 8396 return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
8371} 8397}
8372 8398
8399bool kvm_vector_hashing_enabled(void)
8400{
8401 return vector_hashing;
8402}
8403EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled);
8404
8373EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); 8405EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
8374EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); 8406EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
8375EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); 8407EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index f2afa5fe48a6..007940faa5c6 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -179,6 +179,7 @@ int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
179int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); 179int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
180bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, 180bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
181 int page_num); 181 int page_num);
182bool kvm_vector_hashing_enabled(void);
182 183
183#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ 184#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
184 | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \ 185 | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
@@ -192,4 +193,19 @@ extern unsigned int min_timer_period_us;
192extern unsigned int lapic_timer_advance_ns; 193extern unsigned int lapic_timer_advance_ns;
193 194
194extern struct static_key kvm_no_apic_vcpu; 195extern struct static_key kvm_no_apic_vcpu;
196
197/* Same "calling convention" as do_div:
198 * - divide (n << 32) by base
199 * - put result in n
200 * - return remainder
201 */
202#define do_shl32_div32(n, base) \
203 ({ \
204 u32 __quot, __rem; \
205 asm("divl %2" : "=a" (__quot), "=d" (__rem) \
206 : "rm" (base), "0" (0), "1" ((u32) n)); \
207 n = __quot; \
208 __rem; \
209 })
210
195#endif 211#endif
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index f0dd9d42bc7b..5152b3898155 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -75,7 +75,7 @@ static int arch_timer_ppi[MAX_TIMER_PPI];
75 75
76static struct clock_event_device __percpu *arch_timer_evt; 76static struct clock_event_device __percpu *arch_timer_evt;
77 77
78static bool arch_timer_use_virtual = true; 78static enum ppi_nr arch_timer_uses_ppi = VIRT_PPI;
79static bool arch_timer_c3stop; 79static bool arch_timer_c3stop;
80static bool arch_timer_mem_use_virtual; 80static bool arch_timer_mem_use_virtual;
81 81
@@ -271,16 +271,22 @@ static void __arch_timer_setup(unsigned type,
271 clk->name = "arch_sys_timer"; 271 clk->name = "arch_sys_timer";
272 clk->rating = 450; 272 clk->rating = 450;
273 clk->cpumask = cpumask_of(smp_processor_id()); 273 clk->cpumask = cpumask_of(smp_processor_id());
274 if (arch_timer_use_virtual) { 274 clk->irq = arch_timer_ppi[arch_timer_uses_ppi];
275 clk->irq = arch_timer_ppi[VIRT_PPI]; 275 switch (arch_timer_uses_ppi) {
276 case VIRT_PPI:
276 clk->set_state_shutdown = arch_timer_shutdown_virt; 277 clk->set_state_shutdown = arch_timer_shutdown_virt;
277 clk->set_state_oneshot_stopped = arch_timer_shutdown_virt; 278 clk->set_state_oneshot_stopped = arch_timer_shutdown_virt;
278 clk->set_next_event = arch_timer_set_next_event_virt; 279 clk->set_next_event = arch_timer_set_next_event_virt;
279 } else { 280 break;
280 clk->irq = arch_timer_ppi[PHYS_SECURE_PPI]; 281 case PHYS_SECURE_PPI:
282 case PHYS_NONSECURE_PPI:
283 case HYP_PPI:
281 clk->set_state_shutdown = arch_timer_shutdown_phys; 284 clk->set_state_shutdown = arch_timer_shutdown_phys;
282 clk->set_state_oneshot_stopped = arch_timer_shutdown_phys; 285 clk->set_state_oneshot_stopped = arch_timer_shutdown_phys;
283 clk->set_next_event = arch_timer_set_next_event_phys; 286 clk->set_next_event = arch_timer_set_next_event_phys;
287 break;
288 default:
289 BUG();
284 } 290 }
285 } else { 291 } else {
286 clk->features |= CLOCK_EVT_FEAT_DYNIRQ; 292 clk->features |= CLOCK_EVT_FEAT_DYNIRQ;
@@ -350,17 +356,20 @@ static void arch_counter_set_user_access(void)
350 arch_timer_set_cntkctl(cntkctl); 356 arch_timer_set_cntkctl(cntkctl);
351} 357}
352 358
359static bool arch_timer_has_nonsecure_ppi(void)
360{
361 return (arch_timer_uses_ppi == PHYS_SECURE_PPI &&
362 arch_timer_ppi[PHYS_NONSECURE_PPI]);
363}
364
353static int arch_timer_setup(struct clock_event_device *clk) 365static int arch_timer_setup(struct clock_event_device *clk)
354{ 366{
355 __arch_timer_setup(ARCH_CP15_TIMER, clk); 367 __arch_timer_setup(ARCH_CP15_TIMER, clk);
356 368
357 if (arch_timer_use_virtual) 369 enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], 0);
358 enable_percpu_irq(arch_timer_ppi[VIRT_PPI], 0); 370
359 else { 371 if (arch_timer_has_nonsecure_ppi())
360 enable_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI], 0); 372 enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], 0);
361 if (arch_timer_ppi[PHYS_NONSECURE_PPI])
362 enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], 0);
363 }
364 373
365 arch_counter_set_user_access(); 374 arch_counter_set_user_access();
366 if (IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM)) 375 if (IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM))
@@ -402,7 +411,7 @@ static void arch_timer_banner(unsigned type)
402 (unsigned long)arch_timer_rate / 1000000, 411 (unsigned long)arch_timer_rate / 1000000,
403 (unsigned long)(arch_timer_rate / 10000) % 100, 412 (unsigned long)(arch_timer_rate / 10000) % 100,
404 type & ARCH_CP15_TIMER ? 413 type & ARCH_CP15_TIMER ?
405 arch_timer_use_virtual ? "virt" : "phys" : 414 (arch_timer_uses_ppi == VIRT_PPI) ? "virt" : "phys" :
406 "", 415 "",
407 type == (ARCH_CP15_TIMER | ARCH_MEM_TIMER) ? "/" : "", 416 type == (ARCH_CP15_TIMER | ARCH_MEM_TIMER) ? "/" : "",
408 type & ARCH_MEM_TIMER ? 417 type & ARCH_MEM_TIMER ?
@@ -472,7 +481,7 @@ static void __init arch_counter_register(unsigned type)
472 481
473 /* Register the CP15 based counter if we have one */ 482 /* Register the CP15 based counter if we have one */
474 if (type & ARCH_CP15_TIMER) { 483 if (type & ARCH_CP15_TIMER) {
475 if (IS_ENABLED(CONFIG_ARM64) || arch_timer_use_virtual) 484 if (IS_ENABLED(CONFIG_ARM64) || arch_timer_uses_ppi == VIRT_PPI)
476 arch_timer_read_counter = arch_counter_get_cntvct; 485 arch_timer_read_counter = arch_counter_get_cntvct;
477 else 486 else
478 arch_timer_read_counter = arch_counter_get_cntpct; 487 arch_timer_read_counter = arch_counter_get_cntpct;
@@ -502,13 +511,9 @@ static void arch_timer_stop(struct clock_event_device *clk)
502 pr_debug("arch_timer_teardown disable IRQ%d cpu #%d\n", 511 pr_debug("arch_timer_teardown disable IRQ%d cpu #%d\n",
503 clk->irq, smp_processor_id()); 512 clk->irq, smp_processor_id());
504 513
505 if (arch_timer_use_virtual) 514 disable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi]);
506 disable_percpu_irq(arch_timer_ppi[VIRT_PPI]); 515 if (arch_timer_has_nonsecure_ppi())
507 else { 516 disable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI]);
508 disable_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI]);
509 if (arch_timer_ppi[PHYS_NONSECURE_PPI])
510 disable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI]);
511 }
512 517
513 clk->set_state_shutdown(clk); 518 clk->set_state_shutdown(clk);
514} 519}
@@ -574,12 +579,14 @@ static int __init arch_timer_register(void)
574 goto out; 579 goto out;
575 } 580 }
576 581
577 if (arch_timer_use_virtual) { 582 ppi = arch_timer_ppi[arch_timer_uses_ppi];
578 ppi = arch_timer_ppi[VIRT_PPI]; 583 switch (arch_timer_uses_ppi) {
584 case VIRT_PPI:
579 err = request_percpu_irq(ppi, arch_timer_handler_virt, 585 err = request_percpu_irq(ppi, arch_timer_handler_virt,
580 "arch_timer", arch_timer_evt); 586 "arch_timer", arch_timer_evt);
581 } else { 587 break;
582 ppi = arch_timer_ppi[PHYS_SECURE_PPI]; 588 case PHYS_SECURE_PPI:
589 case PHYS_NONSECURE_PPI:
583 err = request_percpu_irq(ppi, arch_timer_handler_phys, 590 err = request_percpu_irq(ppi, arch_timer_handler_phys,
584 "arch_timer", arch_timer_evt); 591 "arch_timer", arch_timer_evt);
585 if (!err && arch_timer_ppi[PHYS_NONSECURE_PPI]) { 592 if (!err && arch_timer_ppi[PHYS_NONSECURE_PPI]) {
@@ -590,6 +597,13 @@ static int __init arch_timer_register(void)
590 free_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI], 597 free_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI],
591 arch_timer_evt); 598 arch_timer_evt);
592 } 599 }
600 break;
601 case HYP_PPI:
602 err = request_percpu_irq(ppi, arch_timer_handler_phys,
603 "arch_timer", arch_timer_evt);
604 break;
605 default:
606 BUG();
593 } 607 }
594 608
595 if (err) { 609 if (err) {
@@ -614,15 +628,10 @@ static int __init arch_timer_register(void)
614out_unreg_notify: 628out_unreg_notify:
615 unregister_cpu_notifier(&arch_timer_cpu_nb); 629 unregister_cpu_notifier(&arch_timer_cpu_nb);
616out_free_irq: 630out_free_irq:
617 if (arch_timer_use_virtual) 631 free_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], arch_timer_evt);
618 free_percpu_irq(arch_timer_ppi[VIRT_PPI], arch_timer_evt); 632 if (arch_timer_has_nonsecure_ppi())
619 else { 633 free_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI],
620 free_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI],
621 arch_timer_evt); 634 arch_timer_evt);
622 if (arch_timer_ppi[PHYS_NONSECURE_PPI])
623 free_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI],
624 arch_timer_evt);
625 }
626 635
627out_free: 636out_free:
628 free_percpu(arch_timer_evt); 637 free_percpu(arch_timer_evt);
@@ -709,12 +718,25 @@ static void __init arch_timer_init(void)
709 * 718 *
710 * If no interrupt provided for virtual timer, we'll have to 719 * If no interrupt provided for virtual timer, we'll have to
711 * stick to the physical timer. It'd better be accessible... 720 * stick to the physical timer. It'd better be accessible...
721 *
722 * On ARMv8.1 with VH extensions, the kernel runs in HYP. VHE
723 * accesses to CNTP_*_EL1 registers are silently redirected to
724 * their CNTHP_*_EL2 counterparts, and use a different PPI
725 * number.
712 */ 726 */
713 if (is_hyp_mode_available() || !arch_timer_ppi[VIRT_PPI]) { 727 if (is_hyp_mode_available() || !arch_timer_ppi[VIRT_PPI]) {
714 arch_timer_use_virtual = false; 728 bool has_ppi;
729
730 if (is_kernel_in_hyp_mode()) {
731 arch_timer_uses_ppi = HYP_PPI;
732 has_ppi = !!arch_timer_ppi[HYP_PPI];
733 } else {
734 arch_timer_uses_ppi = PHYS_SECURE_PPI;
735 has_ppi = (!!arch_timer_ppi[PHYS_SECURE_PPI] ||
736 !!arch_timer_ppi[PHYS_NONSECURE_PPI]);
737 }
715 738
716 if (!arch_timer_ppi[PHYS_SECURE_PPI] || 739 if (!has_ppi) {
717 !arch_timer_ppi[PHYS_NONSECURE_PPI]) {
718 pr_warn("arch_timer: No interrupt available, giving up\n"); 740 pr_warn("arch_timer: No interrupt available, giving up\n");
719 return; 741 return;
720 } 742 }
@@ -747,7 +769,7 @@ static void __init arch_timer_of_init(struct device_node *np)
747 */ 769 */
748 if (IS_ENABLED(CONFIG_ARM) && 770 if (IS_ENABLED(CONFIG_ARM) &&
749 of_property_read_bool(np, "arm,cpu-registers-not-fw-configured")) 771 of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
750 arch_timer_use_virtual = false; 772 arch_timer_uses_ppi = PHYS_SECURE_PPI;
751 773
752 arch_timer_init(); 774 arch_timer_init();
753} 775}
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 4ebc796b4f33..2f8c0f40930b 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -256,12 +256,6 @@ struct hv_monitor_page {
256 u8 rsvdz4[1984]; 256 u8 rsvdz4[1984];
257}; 257};
258 258
259/* Declare the various hypercall operations. */
260enum hv_call_code {
261 HVCALL_POST_MESSAGE = 0x005c,
262 HVCALL_SIGNAL_EVENT = 0x005d,
263};
264
265/* Definition of the hv_post_message hypercall input structure. */ 259/* Definition of the hv_post_message hypercall input structure. */
266struct hv_input_post_message { 260struct hv_input_post_message {
267 union hv_connection_id connectionid; 261 union hv_connection_id connectionid;
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 1800227af9d6..b651aed9dc6b 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -55,6 +55,9 @@ struct arch_timer_cpu {
55 55
56 /* VGIC mapping */ 56 /* VGIC mapping */
57 struct irq_phys_map *map; 57 struct irq_phys_map *map;
58
59 /* Active IRQ state caching */
60 bool active_cleared_last;
58}; 61};
59 62
60int kvm_timer_hyp_init(void); 63int kvm_timer_hyp_init(void);
@@ -74,4 +77,6 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu);
74void kvm_timer_schedule(struct kvm_vcpu *vcpu); 77void kvm_timer_schedule(struct kvm_vcpu *vcpu);
75void kvm_timer_unschedule(struct kvm_vcpu *vcpu); 78void kvm_timer_unschedule(struct kvm_vcpu *vcpu);
76 79
80void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu);
81
77#endif 82#endif
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
new file mode 100644
index 000000000000..fe389ac31489
--- /dev/null
+++ b/include/kvm/arm_pmu.h
@@ -0,0 +1,110 @@
1/*
2 * Copyright (C) 2015 Linaro Ltd.
3 * Author: Shannon Zhao <shannon.zhao@linaro.org>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef __ASM_ARM_KVM_PMU_H
19#define __ASM_ARM_KVM_PMU_H
20
21#ifdef CONFIG_KVM_ARM_PMU
22
23#include <linux/perf_event.h>
24#include <asm/perf_event.h>
25
26#define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1)
27
28struct kvm_pmc {
29 u8 idx; /* index into the pmu->pmc array */
30 struct perf_event *perf_event;
31 u64 bitmask;
32};
33
34struct kvm_pmu {
35 int irq_num;
36 struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS];
37 bool ready;
38 bool irq_level;
39};
40
41#define kvm_arm_pmu_v3_ready(v) ((v)->arch.pmu.ready)
42#define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num >= VGIC_NR_SGIS)
43u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx);
44void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val);
45u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu);
46void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu);
47void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu);
48void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val);
49void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val);
50void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val);
51void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu);
52void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu);
53void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val);
54void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val);
55void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
56 u64 select_idx);
57bool kvm_arm_support_pmu_v3(void);
58int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu,
59 struct kvm_device_attr *attr);
60int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu,
61 struct kvm_device_attr *attr);
62int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu,
63 struct kvm_device_attr *attr);
64#else
65struct kvm_pmu {
66};
67
68#define kvm_arm_pmu_v3_ready(v) (false)
69#define kvm_arm_pmu_irq_initialized(v) (false)
70static inline u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
71 u64 select_idx)
72{
73 return 0;
74}
75static inline void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu,
76 u64 select_idx, u64 val) {}
77static inline u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
78{
79 return 0;
80}
81static inline void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {}
82static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {}
83static inline void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val) {}
84static inline void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) {}
85static inline void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) {}
86static inline void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) {}
87static inline void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) {}
88static inline void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) {}
89static inline void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) {}
90static inline void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu,
91 u64 data, u64 select_idx) {}
92static inline bool kvm_arm_support_pmu_v3(void) { return false; }
93static inline int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu,
94 struct kvm_device_attr *attr)
95{
96 return -ENXIO;
97}
98static inline int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu,
99 struct kvm_device_attr *attr)
100{
101 return -ENXIO;
102}
103static inline int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu,
104 struct kvm_device_attr *attr)
105{
106 return -ENXIO;
107}
108#endif
109
110#endif
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 13a3d537811b..281caf847fad 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -279,12 +279,6 @@ struct vgic_v2_cpu_if {
279 u32 vgic_lr[VGIC_V2_MAX_LRS]; 279 u32 vgic_lr[VGIC_V2_MAX_LRS];
280}; 280};
281 281
282/*
283 * LRs are stored in reverse order in memory. make sure we index them
284 * correctly.
285 */
286#define VGIC_V3_LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr)
287
288struct vgic_v3_cpu_if { 282struct vgic_v3_cpu_if {
289#ifdef CONFIG_KVM_ARM_VGIC_V3 283#ifdef CONFIG_KVM_ARM_VGIC_V3
290 u32 vgic_hcr; 284 u32 vgic_hcr;
@@ -321,6 +315,8 @@ struct vgic_cpu {
321 315
322 /* Protected by the distributor's irq_phys_map_lock */ 316 /* Protected by the distributor's irq_phys_map_lock */
323 struct list_head irq_phys_map_list; 317 struct list_head irq_phys_map_list;
318
319 u64 live_lrs;
324}; 320};
325 321
326#define LR_EMPTY 0xff 322#define LR_EMPTY 0xff
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index d6f83222a6a1..aa69253ecc7d 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -359,14 +359,15 @@ TRACE_EVENT(
359#endif 359#endif
360 360
361TRACE_EVENT(kvm_halt_poll_ns, 361TRACE_EVENT(kvm_halt_poll_ns,
362 TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old), 362 TP_PROTO(bool grow, unsigned int vcpu_id, unsigned int new,
363 unsigned int old),
363 TP_ARGS(grow, vcpu_id, new, old), 364 TP_ARGS(grow, vcpu_id, new, old),
364 365
365 TP_STRUCT__entry( 366 TP_STRUCT__entry(
366 __field(bool, grow) 367 __field(bool, grow)
367 __field(unsigned int, vcpu_id) 368 __field(unsigned int, vcpu_id)
368 __field(int, new) 369 __field(unsigned int, new)
369 __field(int, old) 370 __field(unsigned int, old)
370 ), 371 ),
371 372
372 TP_fast_assign( 373 TP_fast_assign(
@@ -376,7 +377,7 @@ TRACE_EVENT(kvm_halt_poll_ns,
376 __entry->old = old; 377 __entry->old = old;
377 ), 378 ),
378 379
379 TP_printk("vcpu %u: halt_poll_ns %d (%s %d)", 380 TP_printk("vcpu %u: halt_poll_ns %u (%s %u)",
380 __entry->vcpu_id, 381 __entry->vcpu_id,
381 __entry->new, 382 __entry->new,
382 __entry->grow ? "grow" : "shrink", 383 __entry->grow ? "grow" : "shrink",
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 9da905157cee..a7f1f8032ec1 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -157,6 +157,7 @@ struct kvm_s390_skeys {
157 157
158struct kvm_hyperv_exit { 158struct kvm_hyperv_exit {
159#define KVM_EXIT_HYPERV_SYNIC 1 159#define KVM_EXIT_HYPERV_SYNIC 1
160#define KVM_EXIT_HYPERV_HCALL 2
160 __u32 type; 161 __u32 type;
161 union { 162 union {
162 struct { 163 struct {
@@ -165,6 +166,11 @@ struct kvm_hyperv_exit {
165 __u64 evt_page; 166 __u64 evt_page;
166 __u64 msg_page; 167 __u64 msg_page;
167 } synic; 168 } synic;
169 struct {
170 __u64 input;
171 __u64 result;
172 __u64 params[2];
173 } hcall;
168 } u; 174 } u;
169}; 175};
170 176
@@ -541,7 +547,13 @@ struct kvm_s390_pgm_info {
541 __u8 exc_access_id; 547 __u8 exc_access_id;
542 __u8 per_access_id; 548 __u8 per_access_id;
543 __u8 op_access_id; 549 __u8 op_access_id;
544 __u8 pad[3]; 550#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01
551#define KVM_S390_PGM_FLAGS_ILC_0 0x02
552#define KVM_S390_PGM_FLAGS_ILC_1 0x04
553#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06
554#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08
555 __u8 flags;
556 __u8 pad[2];
545}; 557};
546 558
547struct kvm_s390_prefix_info { 559struct kvm_s390_prefix_info {
@@ -850,6 +862,9 @@ struct kvm_ppc_smmu_info {
850#define KVM_CAP_IOEVENTFD_ANY_LENGTH 122 862#define KVM_CAP_IOEVENTFD_ANY_LENGTH 122
851#define KVM_CAP_HYPERV_SYNIC 123 863#define KVM_CAP_HYPERV_SYNIC 123
852#define KVM_CAP_S390_RI 124 864#define KVM_CAP_S390_RI 124
865#define KVM_CAP_SPAPR_TCE_64 125
866#define KVM_CAP_ARM_PMU_V3 126
867#define KVM_CAP_VCPU_ATTRIBUTES 127
853 868
854#ifdef KVM_CAP_IRQ_ROUTING 869#ifdef KVM_CAP_IRQ_ROUTING
855 870
@@ -1142,6 +1157,8 @@ struct kvm_s390_ucas_mapping {
1142/* Available with KVM_CAP_PPC_ALLOC_HTAB */ 1157/* Available with KVM_CAP_PPC_ALLOC_HTAB */
1143#define KVM_PPC_ALLOCATE_HTAB _IOWR(KVMIO, 0xa7, __u32) 1158#define KVM_PPC_ALLOCATE_HTAB _IOWR(KVMIO, 0xa7, __u32)
1144#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) 1159#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce)
1160#define KVM_CREATE_SPAPR_TCE_64 _IOW(KVMIO, 0xa8, \
1161 struct kvm_create_spapr_tce_64)
1145/* Available with KVM_CAP_RMA */ 1162/* Available with KVM_CAP_RMA */
1146#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) 1163#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma)
1147/* Available with KVM_CAP_PPC_HTAB_FD */ 1164/* Available with KVM_CAP_PPC_HTAB_FD */
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index ea6064696fe4..a9ad4fe3f68f 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -34,6 +34,11 @@ static struct timecounter *timecounter;
34static struct workqueue_struct *wqueue; 34static struct workqueue_struct *wqueue;
35static unsigned int host_vtimer_irq; 35static unsigned int host_vtimer_irq;
36 36
37void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
38{
39 vcpu->arch.timer_cpu.active_cleared_last = false;
40}
41
37static cycle_t kvm_phys_timer_read(void) 42static cycle_t kvm_phys_timer_read(void)
38{ 43{
39 return timecounter->cc->read(timecounter->cc); 44 return timecounter->cc->read(timecounter->cc);
@@ -130,6 +135,7 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
130 135
131 BUG_ON(!vgic_initialized(vcpu->kvm)); 136 BUG_ON(!vgic_initialized(vcpu->kvm));
132 137
138 timer->active_cleared_last = false;
133 timer->irq.level = new_level; 139 timer->irq.level = new_level;
134 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq, 140 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq,
135 timer->irq.level); 141 timer->irq.level);
@@ -245,10 +251,35 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
245 else 251 else
246 phys_active = false; 252 phys_active = false;
247 253
254 /*
255 * We want to avoid hitting the (re)distributor as much as
256 * possible, as this is a potentially expensive MMIO access
257 * (not to mention locks in the irq layer), and a solution for
258 * this is to cache the "active" state in memory.
259 *
260 * Things to consider: we cannot cache an "active set" state,
261 * because the HW can change this behind our back (it becomes
262 * "clear" in the HW). We must then restrict the caching to
263 * the "clear" state.
264 *
265 * The cache is invalidated on:
266 * - vcpu put, indicating that the HW cannot be trusted to be
267 * in a sane state on the next vcpu load,
268 * - any change in the interrupt state
269 *
270 * Usage conditions:
271 * - cached value is "active clear"
272 * - value to be programmed is "active clear"
273 */
274 if (timer->active_cleared_last && !phys_active)
275 return;
276
248 ret = irq_set_irqchip_state(timer->map->irq, 277 ret = irq_set_irqchip_state(timer->map->irq,
249 IRQCHIP_STATE_ACTIVE, 278 IRQCHIP_STATE_ACTIVE,
250 phys_active); 279 phys_active);
251 WARN_ON(ret); 280 WARN_ON(ret);
281
282 timer->active_cleared_last = !phys_active;
252} 283}
253 284
254/** 285/**
diff --git a/arch/arm64/kvm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c
index 1051e5d7320f..ea00d69e7078 100644
--- a/arch/arm64/kvm/hyp/timer-sr.c
+++ b/virt/kvm/arm/hyp/timer-sr.c
@@ -19,9 +19,7 @@
19#include <linux/compiler.h> 19#include <linux/compiler.h>
20#include <linux/kvm_host.h> 20#include <linux/kvm_host.h>
21 21
22#include <asm/kvm_mmu.h> 22#include <asm/kvm_hyp.h>
23
24#include "hyp.h"
25 23
26/* vcpu is already in the HYP VA space */ 24/* vcpu is already in the HYP VA space */
27void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) 25void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu)
@@ -31,12 +29,12 @@ void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu)
31 u64 val; 29 u64 val;
32 30
33 if (kvm->arch.timer.enabled) { 31 if (kvm->arch.timer.enabled) {
34 timer->cntv_ctl = read_sysreg(cntv_ctl_el0); 32 timer->cntv_ctl = read_sysreg_el0(cntv_ctl);
35 timer->cntv_cval = read_sysreg(cntv_cval_el0); 33 timer->cntv_cval = read_sysreg_el0(cntv_cval);
36 } 34 }
37 35
38 /* Disable the virtual timer */ 36 /* Disable the virtual timer */
39 write_sysreg(0, cntv_ctl_el0); 37 write_sysreg_el0(0, cntv_ctl);
40 38
41 /* Allow physical timer/counter access for the host */ 39 /* Allow physical timer/counter access for the host */
42 val = read_sysreg(cnthctl_el2); 40 val = read_sysreg(cnthctl_el2);
@@ -64,8 +62,8 @@ void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu)
64 62
65 if (kvm->arch.timer.enabled) { 63 if (kvm->arch.timer.enabled) {
66 write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2); 64 write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2);
67 write_sysreg(timer->cntv_cval, cntv_cval_el0); 65 write_sysreg_el0(timer->cntv_cval, cntv_cval);
68 isb(); 66 isb();
69 write_sysreg(timer->cntv_ctl, cntv_ctl_el0); 67 write_sysreg_el0(timer->cntv_ctl, cntv_ctl);
70 } 68 }
71} 69}
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c
new file mode 100644
index 000000000000..674bdf8ecf4f
--- /dev/null
+++ b/virt/kvm/arm/hyp/vgic-v2-sr.c
@@ -0,0 +1,170 @@
1/*
2 * Copyright (C) 2012-2015 - ARM Ltd
3 * Author: Marc Zyngier <marc.zyngier@arm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include <linux/compiler.h>
19#include <linux/irqchip/arm-gic.h>
20#include <linux/kvm_host.h>
21
22#include <asm/kvm_hyp.h>
23
24static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu,
25 void __iomem *base)
26{
27 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
28 int nr_lr = vcpu->arch.vgic_cpu.nr_lr;
29 u32 eisr0, eisr1;
30 int i;
31 bool expect_mi;
32
33 expect_mi = !!(cpu_if->vgic_hcr & GICH_HCR_UIE);
34
35 for (i = 0; i < nr_lr; i++) {
36 if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
37 continue;
38
39 expect_mi |= (!(cpu_if->vgic_lr[i] & GICH_LR_HW) &&
40 (cpu_if->vgic_lr[i] & GICH_LR_EOI));
41 }
42
43 if (expect_mi) {
44 cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR);
45
46 if (cpu_if->vgic_misr & GICH_MISR_EOI) {
47 eisr0 = readl_relaxed(base + GICH_EISR0);
48 if (unlikely(nr_lr > 32))
49 eisr1 = readl_relaxed(base + GICH_EISR1);
50 else
51 eisr1 = 0;
52 } else {
53 eisr0 = eisr1 = 0;
54 }
55 } else {
56 cpu_if->vgic_misr = 0;
57 eisr0 = eisr1 = 0;
58 }
59
60#ifdef CONFIG_CPU_BIG_ENDIAN
61 cpu_if->vgic_eisr = ((u64)eisr0 << 32) | eisr1;
62#else
63 cpu_if->vgic_eisr = ((u64)eisr1 << 32) | eisr0;
64#endif
65}
66
67static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
68{
69 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
70 int nr_lr = vcpu->arch.vgic_cpu.nr_lr;
71 u32 elrsr0, elrsr1;
72
73 elrsr0 = readl_relaxed(base + GICH_ELRSR0);
74 if (unlikely(nr_lr > 32))
75 elrsr1 = readl_relaxed(base + GICH_ELRSR1);
76 else
77 elrsr1 = 0;
78
79#ifdef CONFIG_CPU_BIG_ENDIAN
80 cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1;
81#else
82 cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0;
83#endif
84}
85
86static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
87{
88 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
89 int nr_lr = vcpu->arch.vgic_cpu.nr_lr;
90 int i;
91
92 for (i = 0; i < nr_lr; i++) {
93 if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
94 continue;
95
96 if (cpu_if->vgic_elrsr & (1UL << i)) {
97 cpu_if->vgic_lr[i] &= ~GICH_LR_STATE;
98 continue;
99 }
100
101 cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4));
102 writel_relaxed(0, base + GICH_LR0 + (i * 4));
103 }
104}
105
106/* vcpu is already in the HYP VA space */
107void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu)
108{
109 struct kvm *kvm = kern_hyp_va(vcpu->kvm);
110 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
111 struct vgic_dist *vgic = &kvm->arch.vgic;
112 void __iomem *base = kern_hyp_va(vgic->vctrl_base);
113
114 if (!base)
115 return;
116
117 cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR);
118
119 if (vcpu->arch.vgic_cpu.live_lrs) {
120 cpu_if->vgic_apr = readl_relaxed(base + GICH_APR);
121
122 save_maint_int_state(vcpu, base);
123 save_elrsr(vcpu, base);
124 save_lrs(vcpu, base);
125
126 writel_relaxed(0, base + GICH_HCR);
127
128 vcpu->arch.vgic_cpu.live_lrs = 0;
129 } else {
130 cpu_if->vgic_eisr = 0;
131 cpu_if->vgic_elrsr = ~0UL;
132 cpu_if->vgic_misr = 0;
133 cpu_if->vgic_apr = 0;
134 }
135}
136
137/* vcpu is already in the HYP VA space */
138void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu)
139{
140 struct kvm *kvm = kern_hyp_va(vcpu->kvm);
141 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
142 struct vgic_dist *vgic = &kvm->arch.vgic;
143 void __iomem *base = kern_hyp_va(vgic->vctrl_base);
144 int i, nr_lr;
145 u64 live_lrs = 0;
146
147 if (!base)
148 return;
149
150 nr_lr = vcpu->arch.vgic_cpu.nr_lr;
151
152 for (i = 0; i < nr_lr; i++)
153 if (cpu_if->vgic_lr[i] & GICH_LR_STATE)
154 live_lrs |= 1UL << i;
155
156 if (live_lrs) {
157 writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
158 writel_relaxed(cpu_if->vgic_apr, base + GICH_APR);
159 for (i = 0; i < nr_lr; i++) {
160 if (!(live_lrs & (1UL << i)))
161 continue;
162
163 writel_relaxed(cpu_if->vgic_lr[i],
164 base + GICH_LR0 + (i * 4));
165 }
166 }
167
168 writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR);
169 vcpu->arch.vgic_cpu.live_lrs = live_lrs;
170}
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
new file mode 100644
index 000000000000..b5754c6c5508
--- /dev/null
+++ b/virt/kvm/arm/pmu.c
@@ -0,0 +1,529 @@
1/*
2 * Copyright (C) 2015 Linaro Ltd.
3 * Author: Shannon Zhao <shannon.zhao@linaro.org>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include <linux/cpu.h>
19#include <linux/kvm.h>
20#include <linux/kvm_host.h>
21#include <linux/perf_event.h>
22#include <linux/uaccess.h>
23#include <asm/kvm_emulate.h>
24#include <kvm/arm_pmu.h>
25#include <kvm/arm_vgic.h>
26
27/**
28 * kvm_pmu_get_counter_value - get PMU counter value
29 * @vcpu: The vcpu pointer
30 * @select_idx: The counter index
31 */
32u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
33{
34 u64 counter, reg, enabled, running;
35 struct kvm_pmu *pmu = &vcpu->arch.pmu;
36 struct kvm_pmc *pmc = &pmu->pmc[select_idx];
37
38 reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
39 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
40 counter = vcpu_sys_reg(vcpu, reg);
41
42 /* The real counter value is equal to the value of counter register plus
43 * the value perf event counts.
44 */
45 if (pmc->perf_event)
46 counter += perf_event_read_value(pmc->perf_event, &enabled,
47 &running);
48
49 return counter & pmc->bitmask;
50}
51
52/**
53 * kvm_pmu_set_counter_value - set PMU counter value
54 * @vcpu: The vcpu pointer
55 * @select_idx: The counter index
56 * @val: The counter value
57 */
58void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
59{
60 u64 reg;
61
62 reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
63 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
64 vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
65}
66
67/**
68 * kvm_pmu_stop_counter - stop PMU counter
69 * @pmc: The PMU counter pointer
70 *
71 * If this counter has been configured to monitor some event, release it here.
72 */
73static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
74{
75 u64 counter, reg;
76
77 if (pmc->perf_event) {
78 counter = kvm_pmu_get_counter_value(vcpu, pmc->idx);
79 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
80 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
81 vcpu_sys_reg(vcpu, reg) = counter;
82 perf_event_disable(pmc->perf_event);
83 perf_event_release_kernel(pmc->perf_event);
84 pmc->perf_event = NULL;
85 }
86}
87
88/**
89 * kvm_pmu_vcpu_reset - reset pmu state for cpu
90 * @vcpu: The vcpu pointer
91 *
92 */
93void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
94{
95 int i;
96 struct kvm_pmu *pmu = &vcpu->arch.pmu;
97
98 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
99 kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
100 pmu->pmc[i].idx = i;
101 pmu->pmc[i].bitmask = 0xffffffffUL;
102 }
103}
104
105/**
106 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
107 * @vcpu: The vcpu pointer
108 *
109 */
110void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
111{
112 int i;
113 struct kvm_pmu *pmu = &vcpu->arch.pmu;
114
115 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
116 struct kvm_pmc *pmc = &pmu->pmc[i];
117
118 if (pmc->perf_event) {
119 perf_event_disable(pmc->perf_event);
120 perf_event_release_kernel(pmc->perf_event);
121 pmc->perf_event = NULL;
122 }
123 }
124}
125
126u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
127{
128 u64 val = vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
129
130 val &= ARMV8_PMU_PMCR_N_MASK;
131 if (val == 0)
132 return BIT(ARMV8_PMU_CYCLE_IDX);
133 else
134 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
135}
136
137/**
138 * kvm_pmu_enable_counter - enable selected PMU counter
139 * @vcpu: The vcpu pointer
140 * @val: the value guest writes to PMCNTENSET register
141 *
142 * Call perf_event_enable to start counting the perf event
143 */
144void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val)
145{
146 int i;
147 struct kvm_pmu *pmu = &vcpu->arch.pmu;
148 struct kvm_pmc *pmc;
149
150 if (!(vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
151 return;
152
153 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
154 if (!(val & BIT(i)))
155 continue;
156
157 pmc = &pmu->pmc[i];
158 if (pmc->perf_event) {
159 perf_event_enable(pmc->perf_event);
160 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
161 kvm_debug("fail to enable perf event\n");
162 }
163 }
164}
165
166/**
167 * kvm_pmu_disable_counter - disable selected PMU counter
168 * @vcpu: The vcpu pointer
169 * @val: the value guest writes to PMCNTENCLR register
170 *
171 * Call perf_event_disable to stop counting the perf event
172 */
173void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val)
174{
175 int i;
176 struct kvm_pmu *pmu = &vcpu->arch.pmu;
177 struct kvm_pmc *pmc;
178
179 if (!val)
180 return;
181
182 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
183 if (!(val & BIT(i)))
184 continue;
185
186 pmc = &pmu->pmc[i];
187 if (pmc->perf_event)
188 perf_event_disable(pmc->perf_event);
189 }
190}
191
192static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
193{
194 u64 reg = 0;
195
196 if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
197 reg = vcpu_sys_reg(vcpu, PMOVSSET_EL0);
198 reg &= vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
199 reg &= vcpu_sys_reg(vcpu, PMINTENSET_EL1);
200 reg &= kvm_pmu_valid_counter_mask(vcpu);
201
202 return reg;
203}
204
205/**
206 * kvm_pmu_overflow_set - set PMU overflow interrupt
207 * @vcpu: The vcpu pointer
208 * @val: the value guest writes to PMOVSSET register
209 */
210void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val)
211{
212 u64 reg;
213
214 if (val == 0)
215 return;
216
217 vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= val;
218 reg = kvm_pmu_overflow_status(vcpu);
219 if (reg != 0)
220 kvm_vcpu_kick(vcpu);
221}
222
223static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
224{
225 struct kvm_pmu *pmu = &vcpu->arch.pmu;
226 bool overflow;
227
228 if (!kvm_arm_pmu_v3_ready(vcpu))
229 return;
230
231 overflow = !!kvm_pmu_overflow_status(vcpu);
232 if (pmu->irq_level != overflow) {
233 pmu->irq_level = overflow;
234 kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
235 pmu->irq_num, overflow);
236 }
237}
238
239/**
240 * kvm_pmu_flush_hwstate - flush pmu state to cpu
241 * @vcpu: The vcpu pointer
242 *
243 * Check if the PMU has overflowed while we were running in the host, and inject
244 * an interrupt if that was the case.
245 */
246void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
247{
248 kvm_pmu_update_state(vcpu);
249}
250
251/**
252 * kvm_pmu_sync_hwstate - sync pmu state from cpu
253 * @vcpu: The vcpu pointer
254 *
255 * Check if the PMU has overflowed while we were running in the guest, and
256 * inject an interrupt if that was the case.
257 */
258void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
259{
260 kvm_pmu_update_state(vcpu);
261}
262
263static inline struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
264{
265 struct kvm_pmu *pmu;
266 struct kvm_vcpu_arch *vcpu_arch;
267
268 pmc -= pmc->idx;
269 pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
270 vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
271 return container_of(vcpu_arch, struct kvm_vcpu, arch);
272}
273
274/**
275 * When perf event overflows, call kvm_pmu_overflow_set to set overflow status.
276 */
277static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
278 struct perf_sample_data *data,
279 struct pt_regs *regs)
280{
281 struct kvm_pmc *pmc = perf_event->overflow_handler_context;
282 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
283 int idx = pmc->idx;
284
285 kvm_pmu_overflow_set(vcpu, BIT(idx));
286}
287
288/**
289 * kvm_pmu_software_increment - do software increment
290 * @vcpu: The vcpu pointer
291 * @val: the value guest writes to PMSWINC register
292 */
293void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
294{
295 int i;
296 u64 type, enable, reg;
297
298 if (val == 0)
299 return;
300
301 enable = vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
302 for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
303 if (!(val & BIT(i)))
304 continue;
305 type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
306 & ARMV8_PMU_EVTYPE_EVENT;
307 if ((type == ARMV8_PMU_EVTYPE_EVENT_SW_INCR)
308 && (enable & BIT(i))) {
309 reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
310 reg = lower_32_bits(reg);
311 vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
312 if (!reg)
313 kvm_pmu_overflow_set(vcpu, BIT(i));
314 }
315 }
316}
317
318/**
319 * kvm_pmu_handle_pmcr - handle PMCR register
320 * @vcpu: The vcpu pointer
321 * @val: the value guest writes to PMCR register
322 */
323void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
324{
325 struct kvm_pmu *pmu = &vcpu->arch.pmu;
326 struct kvm_pmc *pmc;
327 u64 mask;
328 int i;
329
330 mask = kvm_pmu_valid_counter_mask(vcpu);
331 if (val & ARMV8_PMU_PMCR_E) {
332 kvm_pmu_enable_counter(vcpu,
333 vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
334 } else {
335 kvm_pmu_disable_counter(vcpu, mask);
336 }
337
338 if (val & ARMV8_PMU_PMCR_C)
339 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
340
341 if (val & ARMV8_PMU_PMCR_P) {
342 for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++)
343 kvm_pmu_set_counter_value(vcpu, i, 0);
344 }
345
346 if (val & ARMV8_PMU_PMCR_LC) {
347 pmc = &pmu->pmc[ARMV8_PMU_CYCLE_IDX];
348 pmc->bitmask = 0xffffffffffffffffUL;
349 }
350}
351
352static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
353{
354 return (vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
355 (vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
356}
357
358/**
359 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
360 * @vcpu: The vcpu pointer
361 * @data: The data guest writes to PMXEVTYPER_EL0
362 * @select_idx: The number of selected counter
363 *
364 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
365 * event with given hardware event number. Here we call perf_event API to
366 * emulate this action and create a kernel perf event for it.
367 */
368void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
369 u64 select_idx)
370{
371 struct kvm_pmu *pmu = &vcpu->arch.pmu;
372 struct kvm_pmc *pmc = &pmu->pmc[select_idx];
373 struct perf_event *event;
374 struct perf_event_attr attr;
375 u64 eventsel, counter;
376
377 kvm_pmu_stop_counter(vcpu, pmc);
378 eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
379
380 /* Software increment event does't need to be backed by a perf event */
381 if (eventsel == ARMV8_PMU_EVTYPE_EVENT_SW_INCR)
382 return;
383
384 memset(&attr, 0, sizeof(struct perf_event_attr));
385 attr.type = PERF_TYPE_RAW;
386 attr.size = sizeof(attr);
387 attr.pinned = 1;
388 attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, select_idx);
389 attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
390 attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
391 attr.exclude_hv = 1; /* Don't count EL2 events */
392 attr.exclude_host = 1; /* Don't count host events */
393 attr.config = eventsel;
394
395 counter = kvm_pmu_get_counter_value(vcpu, select_idx);
396 /* The initial sample period (overflow count) of an event. */
397 attr.sample_period = (-counter) & pmc->bitmask;
398
399 event = perf_event_create_kernel_counter(&attr, -1, current,
400 kvm_pmu_perf_overflow, pmc);
401 if (IS_ERR(event)) {
402 pr_err_once("kvm: pmu event creation failed %ld\n",
403 PTR_ERR(event));
404 return;
405 }
406
407 pmc->perf_event = event;
408}
409
410bool kvm_arm_support_pmu_v3(void)
411{
412 /*
413 * Check if HW_PERF_EVENTS are supported by checking the number of
414 * hardware performance counters. This could ensure the presence of
415 * a physical PMU and CONFIG_PERF_EVENT is selected.
416 */
417 return (perf_num_counters() > 0);
418}
419
420static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
421{
422 if (!kvm_arm_support_pmu_v3())
423 return -ENODEV;
424
425 if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features) ||
426 !kvm_arm_pmu_irq_initialized(vcpu))
427 return -ENXIO;
428
429 if (kvm_arm_pmu_v3_ready(vcpu))
430 return -EBUSY;
431
432 kvm_pmu_vcpu_reset(vcpu);
433 vcpu->arch.pmu.ready = true;
434
435 return 0;
436}
437
438static bool irq_is_valid(struct kvm *kvm, int irq, bool is_ppi)
439{
440 int i;
441 struct kvm_vcpu *vcpu;
442
443 kvm_for_each_vcpu(i, vcpu, kvm) {
444 if (!kvm_arm_pmu_irq_initialized(vcpu))
445 continue;
446
447 if (is_ppi) {
448 if (vcpu->arch.pmu.irq_num != irq)
449 return false;
450 } else {
451 if (vcpu->arch.pmu.irq_num == irq)
452 return false;
453 }
454 }
455
456 return true;
457}
458
459
460int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
461{
462 switch (attr->attr) {
463 case KVM_ARM_VCPU_PMU_V3_IRQ: {
464 int __user *uaddr = (int __user *)(long)attr->addr;
465 int irq;
466
467 if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
468 return -ENODEV;
469
470 if (get_user(irq, uaddr))
471 return -EFAULT;
472
473 /*
474 * The PMU overflow interrupt could be a PPI or SPI, but for one
475 * VM the interrupt type must be same for each vcpu. As a PPI,
476 * the interrupt number is the same for all vcpus, while as an
477 * SPI it must be a separate number per vcpu.
478 */
479 if (irq < VGIC_NR_SGIS || irq >= vcpu->kvm->arch.vgic.nr_irqs ||
480 !irq_is_valid(vcpu->kvm, irq, irq < VGIC_NR_PRIVATE_IRQS))
481 return -EINVAL;
482
483 if (kvm_arm_pmu_irq_initialized(vcpu))
484 return -EBUSY;
485
486 kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
487 vcpu->arch.pmu.irq_num = irq;
488 return 0;
489 }
490 case KVM_ARM_VCPU_PMU_V3_INIT:
491 return kvm_arm_pmu_v3_init(vcpu);
492 }
493
494 return -ENXIO;
495}
496
497int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
498{
499 switch (attr->attr) {
500 case KVM_ARM_VCPU_PMU_V3_IRQ: {
501 int __user *uaddr = (int __user *)(long)attr->addr;
502 int irq;
503
504 if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
505 return -ENODEV;
506
507 if (!kvm_arm_pmu_irq_initialized(vcpu))
508 return -ENXIO;
509
510 irq = vcpu->arch.pmu.irq_num;
511 return put_user(irq, uaddr);
512 }
513 }
514
515 return -ENXIO;
516}
517
518int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
519{
520 switch (attr->attr) {
521 case KVM_ARM_VCPU_PMU_V3_IRQ:
522 case KVM_ARM_VCPU_PMU_V3_INIT:
523 if (kvm_arm_support_pmu_v3() &&
524 test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
525 return 0;
526 }
527
528 return -ENXIO;
529}
diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c
index 13907970d11c..1b0bee095427 100644
--- a/virt/kvm/arm/vgic-v2-emul.c
+++ b/virt/kvm/arm/vgic-v2-emul.c
@@ -321,6 +321,11 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
321 321
322static const struct vgic_io_range vgic_dist_ranges[] = { 322static const struct vgic_io_range vgic_dist_ranges[] = {
323 { 323 {
324 .base = GIC_DIST_SOFTINT,
325 .len = 4,
326 .handle_mmio = handle_mmio_sgi_reg,
327 },
328 {
324 .base = GIC_DIST_CTRL, 329 .base = GIC_DIST_CTRL,
325 .len = 12, 330 .len = 12,
326 .bits_per_irq = 0, 331 .bits_per_irq = 0,
@@ -387,11 +392,6 @@ static const struct vgic_io_range vgic_dist_ranges[] = {
387 .handle_mmio = handle_mmio_cfg_reg, 392 .handle_mmio = handle_mmio_cfg_reg,
388 }, 393 },
389 { 394 {
390 .base = GIC_DIST_SOFTINT,
391 .len = 4,
392 .handle_mmio = handle_mmio_sgi_reg,
393 },
394 {
395 .base = GIC_DIST_SGI_PENDING_CLEAR, 395 .base = GIC_DIST_SGI_PENDING_CLEAR,
396 .len = VGIC_NR_SGIS, 396 .len = VGIC_NR_SGIS,
397 .handle_mmio = handle_mmio_sgi_clear, 397 .handle_mmio = handle_mmio_sgi_clear,
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index ff02f08df74d..67ec334ce1d0 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -176,6 +176,15 @@ static const struct vgic_ops vgic_v2_ops = {
176 176
177static struct vgic_params vgic_v2_params; 177static struct vgic_params vgic_v2_params;
178 178
179static void vgic_cpu_init_lrs(void *params)
180{
181 struct vgic_params *vgic = params;
182 int i;
183
184 for (i = 0; i < vgic->nr_lr; i++)
185 writel_relaxed(0, vgic->vctrl_base + GICH_LR0 + (i * 4));
186}
187
179/** 188/**
180 * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT 189 * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
181 * @node: pointer to the DT node 190 * @node: pointer to the DT node
@@ -257,6 +266,9 @@ int vgic_v2_probe(struct device_node *vgic_node,
257 266
258 vgic->type = VGIC_V2; 267 vgic->type = VGIC_V2;
259 vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS; 268 vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS;
269
270 on_each_cpu(vgic_cpu_init_lrs, vgic, 1);
271
260 *ops = &vgic_v2_ops; 272 *ops = &vgic_v2_ops;
261 *params = vgic; 273 *params = vgic;
262 goto out; 274 goto out;
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 453eafd4dd6e..999bdc6d9d9f 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -42,7 +42,7 @@ static u32 ich_vtr_el2;
42static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) 42static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
43{ 43{
44 struct vgic_lr lr_desc; 44 struct vgic_lr lr_desc;
45 u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[VGIC_V3_LR_INDEX(lr)]; 45 u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr];
46 46
47 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) 47 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
48 lr_desc.irq = val & ICH_LR_VIRTUALID_MASK; 48 lr_desc.irq = val & ICH_LR_VIRTUALID_MASK;
@@ -106,7 +106,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
106 lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT; 106 lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT;
107 } 107 }
108 108
109 vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[VGIC_V3_LR_INDEX(lr)] = lr_val; 109 vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = lr_val;
110 110
111 if (!(lr_desc.state & LR_STATE_MASK)) 111 if (!(lr_desc.state & LR_STATE_MASK))
112 vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); 112 vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
@@ -216,6 +216,11 @@ static const struct vgic_ops vgic_v3_ops = {
216 216
217static struct vgic_params vgic_v3_params; 217static struct vgic_params vgic_v3_params;
218 218
219static void vgic_cpu_init_lrs(void *params)
220{
221 kvm_call_hyp(__vgic_v3_init_lrs);
222}
223
219/** 224/**
220 * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT 225 * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
221 * @node: pointer to the DT node 226 * @node: pointer to the DT node
@@ -284,6 +289,8 @@ int vgic_v3_probe(struct device_node *vgic_node,
284 kvm_info("%s@%llx IRQ%d\n", vgic_node->name, 289 kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
285 vcpu_res.start, vgic->maint_irq); 290 vcpu_res.start, vgic->maint_irq);
286 291
292 on_each_cpu(vgic_cpu_init_lrs, vgic, 1);
293
287 *ops = &vgic_v3_ops; 294 *ops = &vgic_v3_ops;
288 *params = vgic; 295 *params = vgic;
289 296
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 65da997b430a..f0d061f92674 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -109,8 +109,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
109 /* cancel outstanding work queue item */ 109 /* cancel outstanding work queue item */
110 while (!list_empty(&vcpu->async_pf.queue)) { 110 while (!list_empty(&vcpu->async_pf.queue)) {
111 struct kvm_async_pf *work = 111 struct kvm_async_pf *work =
112 list_entry(vcpu->async_pf.queue.next, 112 list_first_entry(&vcpu->async_pf.queue,
113 typeof(*work), queue); 113 typeof(*work), queue);
114 list_del(&work->queue); 114 list_del(&work->queue);
115 115
116#ifdef CONFIG_KVM_ASYNC_PF_SYNC 116#ifdef CONFIG_KVM_ASYNC_PF_SYNC
@@ -127,8 +127,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
127 spin_lock(&vcpu->async_pf.lock); 127 spin_lock(&vcpu->async_pf.lock);
128 while (!list_empty(&vcpu->async_pf.done)) { 128 while (!list_empty(&vcpu->async_pf.done)) {
129 struct kvm_async_pf *work = 129 struct kvm_async_pf *work =
130 list_entry(vcpu->async_pf.done.next, 130 list_first_entry(&vcpu->async_pf.done,
131 typeof(*work), link); 131 typeof(*work), link);
132 list_del(&work->link); 132 list_del(&work->link);
133 kmem_cache_free(async_pf_cache, work); 133 kmem_cache_free(async_pf_cache, work);
134 } 134 }
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5af50c3ddd53..7ba1d10ffed2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -72,11 +72,11 @@ module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
72 72
73/* Default doubles per-vcpu halt_poll_ns. */ 73/* Default doubles per-vcpu halt_poll_ns. */
74static unsigned int halt_poll_ns_grow = 2; 74static unsigned int halt_poll_ns_grow = 2;
75module_param(halt_poll_ns_grow, int, S_IRUGO); 75module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR);
76 76
77/* Default resets per-vcpu halt_poll_ns . */ 77/* Default resets per-vcpu halt_poll_ns . */
78static unsigned int halt_poll_ns_shrink; 78static unsigned int halt_poll_ns_shrink;
79module_param(halt_poll_ns_shrink, int, S_IRUGO); 79module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
80 80
81/* 81/*
82 * Ordering of locks: 82 * Ordering of locks:
@@ -619,13 +619,10 @@ void *kvm_kvzalloc(unsigned long size)
619 619
620static void kvm_destroy_devices(struct kvm *kvm) 620static void kvm_destroy_devices(struct kvm *kvm)
621{ 621{
622 struct list_head *node, *tmp; 622 struct kvm_device *dev, *tmp;
623 623
624 list_for_each_safe(node, tmp, &kvm->devices) { 624 list_for_each_entry_safe(dev, tmp, &kvm->devices, vm_node) {
625 struct kvm_device *dev = 625 list_del(&dev->vm_node);
626 list_entry(node, struct kvm_device, vm_node);
627
628 list_del(node);
629 dev->ops->destroy(dev); 626 dev->ops->destroy(dev);
630 } 627 }
631} 628}
@@ -1436,11 +1433,17 @@ kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
1436{ 1433{
1437 unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault); 1434 unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
1438 1435
1439 if (addr == KVM_HVA_ERR_RO_BAD) 1436 if (addr == KVM_HVA_ERR_RO_BAD) {
1437 if (writable)
1438 *writable = false;
1440 return KVM_PFN_ERR_RO_FAULT; 1439 return KVM_PFN_ERR_RO_FAULT;
1440 }
1441 1441
1442 if (kvm_is_error_hva(addr)) 1442 if (kvm_is_error_hva(addr)) {
1443 if (writable)
1444 *writable = false;
1443 return KVM_PFN_NOSLOT; 1445 return KVM_PFN_NOSLOT;
1446 }
1444 1447
1445 /* Do not map writable pfn in the readonly memslot. */ 1448 /* Do not map writable pfn in the readonly memslot. */
1446 if (writable && memslot_is_readonly(slot)) { 1449 if (writable && memslot_is_readonly(slot)) {
@@ -1942,14 +1945,15 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
1942 1945
1943static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) 1946static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
1944{ 1947{
1945 int old, val; 1948 unsigned int old, val, grow;
1946 1949
1947 old = val = vcpu->halt_poll_ns; 1950 old = val = vcpu->halt_poll_ns;
1951 grow = READ_ONCE(halt_poll_ns_grow);
1948 /* 10us base */ 1952 /* 10us base */
1949 if (val == 0 && halt_poll_ns_grow) 1953 if (val == 0 && grow)
1950 val = 10000; 1954 val = 10000;
1951 else 1955 else
1952 val *= halt_poll_ns_grow; 1956 val *= grow;
1953 1957
1954 if (val > halt_poll_ns) 1958 if (val > halt_poll_ns)
1955 val = halt_poll_ns; 1959 val = halt_poll_ns;
@@ -1960,13 +1964,14 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
1960 1964
1961static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu) 1965static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
1962{ 1966{
1963 int old, val; 1967 unsigned int old, val, shrink;
1964 1968
1965 old = val = vcpu->halt_poll_ns; 1969 old = val = vcpu->halt_poll_ns;
1966 if (halt_poll_ns_shrink == 0) 1970 shrink = READ_ONCE(halt_poll_ns_shrink);
1971 if (shrink == 0)
1967 val = 0; 1972 val = 0;
1968 else 1973 else
1969 val /= halt_poll_ns_shrink; 1974 val /= shrink;
1970 1975
1971 vcpu->halt_poll_ns = val; 1976 vcpu->halt_poll_ns = val;
1972 trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old); 1977 trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old);