aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-12-18 19:05:28 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-18 19:05:28 -0500
commit66dcff86ba40eebb5133cccf450878f2bba102ef (patch)
treee7eb49ad9316989a529b00303d2dd2cffa61a7f5
parent91ed9e8a32d9a76adc59c83f8b40024076cf8a02 (diff)
parent2c4aa55a6af070262cca425745e8e54310e96b8d (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM update from Paolo Bonzini: "3.19 changes for KVM: - spring cleaning: removed support for IA64, and for hardware- assisted virtualization on the PPC970 - ARM, PPC, s390 all had only small fixes For x86: - small performance improvements (though only on weird guests) - usual round of hardware-compliancy fixes from Nadav - APICv fixes - XSAVES support for hosts and guests. XSAVES hosts were broken because the (non-KVM) XSAVES patches inadvertently changed the KVM userspace ABI whenever XSAVES was enabled; hence, this part is going to stable. Guest support is just a matter of exposing the feature and CPUID leaves support" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (179 commits) KVM: move APIC types to arch/x86/ KVM: PPC: Book3S: Enable in-kernel XICS emulation by default KVM: PPC: Book3S HV: Improve H_CONFER implementation KVM: PPC: Book3S HV: Fix endianness of instruction obtained from HEIR register KVM: PPC: Book3S HV: Remove code for PPC970 processors KVM: PPC: Book3S HV: Tracepoints for KVM HV guest interactions KVM: PPC: Book3S HV: Simplify locking around stolen time calculations arch: powerpc: kvm: book3s_paired_singles.c: Remove unused function arch: powerpc: kvm: book3s_pr.c: Remove unused function arch: powerpc: kvm: book3s.c: Remove some unused functions arch: powerpc: kvm: book3s_32_mmu.c: Remove unused function KVM: PPC: Book3S HV: Check wait conditions before sleeping in kvmppc_vcore_blocked KVM: PPC: Book3S HV: ptes are big endian KVM: PPC: Book3S HV: Fix inaccuracies in ICP emulation for H_IPI KVM: PPC: Book3S HV: Fix KSM memory corruption KVM: PPC: Book3S HV: Fix an issue where guest is paused on receiving HMI KVM: PPC: Book3S HV: Fix computation of tlbie operand KVM: PPC: Book3S HV: Add missing HPTE unlock KVM: PPC: BookE: Improve irq inject tracepoint arm/arm64: KVM: Require in-kernel vgic for the arch timers ...
-rw-r--r--Documentation/ia64/kvm.txt83
-rw-r--r--Documentation/virtual/kvm/api.txt102
-rw-r--r--Documentation/virtual/kvm/devices/vm.txt10
-rw-r--r--Documentation/virtual/kvm/msr.txt2
-rw-r--r--MAINTAINERS9
-rw-r--r--arch/arm/include/asm/kvm_emulate.h5
-rw-r--r--arch/arm/include/asm/kvm_host.h2
-rw-r--r--arch/arm/include/asm/kvm_mmu.h6
-rw-r--r--arch/arm/kvm/arm.c78
-rw-r--r--arch/arm/kvm/guest.c26
-rw-r--r--arch/arm/kvm/mmio.c15
-rw-r--r--arch/arm/kvm/mmu.c92
-rw-r--r--arch/arm/kvm/psci.c18
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h5
-rw-r--r--arch/arm64/include/asm/kvm_host.h3
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h6
-rw-r--r--arch/arm64/kvm/guest.c26
-rw-r--r--arch/ia64/Kconfig3
-rw-r--r--arch/ia64/Makefile1
-rw-r--r--arch/ia64/include/asm/kvm_host.h609
-rw-r--r--arch/ia64/include/asm/pvclock-abi.h48
-rw-r--r--arch/ia64/include/uapi/asm/kvm.h268
-rw-r--r--arch/ia64/kvm/Kconfig66
-rw-r--r--arch/ia64/kvm/Makefile67
-rw-r--r--arch/ia64/kvm/asm-offsets.c241
-rw-r--r--arch/ia64/kvm/irq.h33
-rw-r--r--arch/ia64/kvm/kvm-ia64.c1942
-rw-r--r--arch/ia64/kvm/kvm_fw.c674
-rw-r--r--arch/ia64/kvm/kvm_lib.c21
-rw-r--r--arch/ia64/kvm/kvm_minstate.h266
-rw-r--r--arch/ia64/kvm/lapic.h30
-rw-r--r--arch/ia64/kvm/memcpy.S1
-rw-r--r--arch/ia64/kvm/memset.S1
-rw-r--r--arch/ia64/kvm/misc.h94
-rw-r--r--arch/ia64/kvm/mmio.c336
-rw-r--r--arch/ia64/kvm/optvfault.S1090
-rw-r--r--arch/ia64/kvm/process.c1024
-rw-r--r--arch/ia64/kvm/trampoline.S1038
-rw-r--r--arch/ia64/kvm/vcpu.c2209
-rw-r--r--arch/ia64/kvm/vcpu.h752
-rw-r--r--arch/ia64/kvm/vmm.c99
-rw-r--r--arch/ia64/kvm/vmm_ivt.S1392
-rw-r--r--arch/ia64/kvm/vti.h290
-rw-r--r--arch/ia64/kvm/vtlb.c640
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h2
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h3
-rw-r--r--arch/powerpc/include/asm/kvm_host.h18
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c2
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/book3s.c8
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu.c5
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c224
-rw-r--r--arch/powerpc/kvm/book3s_hv.c438
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c136
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S39
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c5
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c150
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c36
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S251
-rw-r--r--arch/powerpc/kvm/book3s_paired_singles.c8
-rw-r--r--arch/powerpc/kvm/book3s_pr.c5
-rw-r--r--arch/powerpc/kvm/book3s_xics.c30
-rw-r--r--arch/powerpc/kvm/book3s_xics.h1
-rw-r--r--arch/powerpc/kvm/e500.c8
-rw-r--r--arch/powerpc/kvm/powerpc.c10
-rw-r--r--arch/powerpc/kvm/trace_book3s.h32
-rw-r--r--arch/powerpc/kvm/trace_booke.h47
-rw-r--r--arch/powerpc/kvm/trace_hv.h477
-rw-r--r--arch/powerpc/kvm/trace_pr.h25
-rw-r--r--arch/s390/include/asm/kvm_host.h99
-rw-r--r--arch/s390/include/asm/pgalloc.h1
-rw-r--r--arch/s390/include/asm/sigp.h1
-rw-r--r--arch/s390/kvm/gaccess.c40
-rw-r--r--arch/s390/kvm/intercept.c20
-rw-r--r--arch/s390/kvm/interrupt.c1044
-rw-r--r--arch/s390/kvm/kvm-s390.c22
-rw-r--r--arch/s390/kvm/kvm-s390.h11
-rw-r--r--arch/s390/kvm/priv.c95
-rw-r--r--arch/s390/kvm/sigp.c305
-rw-r--r--arch/s390/mm/pgtable.c41
-rw-r--r--arch/x86/include/asm/kvm_host.h37
-rw-r--r--arch/x86/include/asm/vmx.h3
-rw-r--r--arch/x86/include/asm/xsave.h1
-rw-r--r--arch/x86/include/uapi/asm/vmx.h6
-rw-r--r--arch/x86/kernel/kvm.c9
-rw-r--r--arch/x86/kernel/kvmclock.c20
-rw-r--r--arch/x86/kernel/xsave.c1
-rw-r--r--arch/x86/kvm/Makefile7
-rw-r--r--arch/x86/kvm/assigned-dev.c (renamed from virt/kvm/assigned-dev.c)30
-rw-r--r--arch/x86/kvm/assigned-dev.h32
-rw-r--r--arch/x86/kvm/cpuid.c57
-rw-r--r--arch/x86/kvm/emulate.c408
-rw-r--r--arch/x86/kvm/ioapic.c (renamed from virt/kvm/ioapic.c)12
-rw-r--r--arch/x86/kvm/ioapic.h (renamed from virt/kvm/ioapic.h)21
-rw-r--r--arch/x86/kvm/iommu.c (renamed from virt/kvm/iommu.c)11
-rw-r--r--arch/x86/kvm/irq_comm.c (renamed from virt/kvm/irq_comm.c)45
-rw-r--r--arch/x86/kvm/lapic.c210
-rw-r--r--arch/x86/kvm/lapic.h14
-rw-r--r--arch/x86/kvm/mmu.c7
-rw-r--r--arch/x86/kvm/svm.c24
-rw-r--r--arch/x86/kvm/trace.h37
-rw-r--r--arch/x86/kvm/vmx.c608
-rw-r--r--arch/x86/kvm/x86.c226
-rw-r--r--arch/x86/kvm/x86.h3
-rw-r--r--include/kvm/arm_arch_timer.h10
-rw-r--r--include/kvm/arm_vgic.h12
-rw-r--r--include/linux/kvm_host.h106
-rw-r--r--include/linux/kvm_types.h27
-rw-r--r--include/uapi/linux/kvm.h11
-rw-r--r--virt/kvm/arm/arch_timer.c30
-rw-r--r--virt/kvm/arm/vgic.c116
-rw-r--r--virt/kvm/eventfd.c7
-rw-r--r--virt/kvm/kvm_main.c133
114 files changed, 3551 insertions, 16095 deletions
diff --git a/Documentation/ia64/kvm.txt b/Documentation/ia64/kvm.txt
deleted file mode 100644
index ffb5c80bec3e..000000000000
--- a/Documentation/ia64/kvm.txt
+++ /dev/null
@@ -1,83 +0,0 @@
1Currently, kvm module is in EXPERIMENTAL stage on IA64. This means that
2interfaces are not stable enough to use. So, please don't run critical
3applications in virtual machine.
4We will try our best to improve it in future versions!
5
6 Guide: How to boot up guests on kvm/ia64
7
8This guide is to describe how to enable kvm support for IA-64 systems.
9
101. Get the kvm source from git.kernel.org.
11 Userspace source:
12 git clone git://git.kernel.org/pub/scm/virt/kvm/kvm-userspace.git
13 Kernel Source:
14 git clone git://git.kernel.org/pub/scm/linux/kernel/git/xiantao/kvm-ia64.git
15
162. Compile the source code.
17 2.1 Compile userspace code:
18 (1)cd ./kvm-userspace
19 (2)./configure
20 (3)cd kernel
21 (4)make sync LINUX= $kernel_dir (kernel_dir is the directory of kernel source.)
22 (5)cd ..
23 (6)make qemu
24 (7)cd qemu; make install
25
26 2.2 Compile kernel source code:
27 (1) cd ./$kernel_dir
28 (2) Make menuconfig
29 (3) Enter into virtualization option, and choose kvm.
30 (4) make
31 (5) Once (4) done, make modules_install
32 (6) Make initrd, and use new kernel to reboot up host machine.
33 (7) Once (6) done, cd $kernel_dir/arch/ia64/kvm
34 (8) insmod kvm.ko; insmod kvm-intel.ko
35
36Note: For step 2, please make sure that host page size == TARGET_PAGE_SIZE of qemu, otherwise, may fail.
37
383. Get Guest Firmware named as Flash.fd, and put it under right place:
39 (1) If you have the guest firmware (binary) released by Intel Corp for Xen, use it directly.
40
41 (2) If you have no firmware at hand, Please download its source from
42 hg clone http://xenbits.xensource.com/ext/efi-vfirmware.hg
43 you can get the firmware's binary in the directory of efi-vfirmware.hg/binaries.
44
45 (3) Rename the firmware you owned to Flash.fd, and copy it to /usr/local/share/qemu
46
474. Boot up Linux or Windows guests:
48 4.1 Create or install a image for guest boot. If you have xen experience, it should be easy.
49
50 4.2 Boot up guests use the following command.
51 /usr/local/bin/qemu-system-ia64 -smp xx -m 512 -hda $your_image
52 (xx is the number of virtual processors for the guest, now the maximum value is 4)
53
545. Known possible issue on some platforms with old Firmware.
55
56In the event of strange host crash issues, try to solve it through either of the following ways:
57
58(1): Upgrade your Firmware to the latest one.
59
60(2): Applying the below patch to kernel source.
61diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
62index 0b53344..f02b0f7 100644
63--- a/arch/ia64/kernel/pal.S
64+++ b/arch/ia64/kernel/pal.S
65@@ -84,7 +84,8 @@ GLOBAL_ENTRY(ia64_pal_call_static)
66 mov ar.pfs = loc1
67 mov rp = loc0
68 ;;
69- srlz.d // serialize restoration of psr.l
70+ srlz.i // serialize restoration of psr.l
71+ ;;
72 br.ret.sptk.many b0
73 END(ia64_pal_call_static)
74
756. Bug report:
76 If you found any issues when use kvm/ia64, Please post the bug info to kvm-ia64-devel mailing list.
77 https://lists.sourceforge.net/lists/listinfo/kvm-ia64-devel/
78
79Thanks for your interest! Let's work together, and make kvm/ia64 stronger and stronger!
80
81
82 Xiantao Zhang <xiantao.zhang@intel.com>
83 2008.3.10
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 7610eaa4d491..0007fef4ed81 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -68,9 +68,12 @@ description:
68 68
69 Capability: which KVM extension provides this ioctl. Can be 'basic', 69 Capability: which KVM extension provides this ioctl. Can be 'basic',
70 which means that is will be provided by any kernel that supports 70 which means that is will be provided by any kernel that supports
71 API version 12 (see section 4.1), or a KVM_CAP_xyz constant, which 71 API version 12 (see section 4.1), a KVM_CAP_xyz constant, which
72 means availability needs to be checked with KVM_CHECK_EXTENSION 72 means availability needs to be checked with KVM_CHECK_EXTENSION
73 (see section 4.4). 73 (see section 4.4), or 'none' which means that while not all kernels
74 support this ioctl, there's no capability bit to check its
75 availability: for kernels that don't support the ioctl,
76 the ioctl returns -ENOTTY.
74 77
75 Architectures: which instruction set architectures provide this ioctl. 78 Architectures: which instruction set architectures provide this ioctl.
76 x86 includes both i386 and x86_64. 79 x86 includes both i386 and x86_64.
@@ -604,7 +607,7 @@ struct kvm_fpu {
6044.24 KVM_CREATE_IRQCHIP 6074.24 KVM_CREATE_IRQCHIP
605 608
606Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390) 609Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390)
607Architectures: x86, ia64, ARM, arm64, s390 610Architectures: x86, ARM, arm64, s390
608Type: vm ioctl 611Type: vm ioctl
609Parameters: none 612Parameters: none
610Returns: 0 on success, -1 on error 613Returns: 0 on success, -1 on error
@@ -612,7 +615,7 @@ Returns: 0 on success, -1 on error
612Creates an interrupt controller model in the kernel. On x86, creates a virtual 615Creates an interrupt controller model in the kernel. On x86, creates a virtual
613ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a 616ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
614local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23 617local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
615only go to the IOAPIC. On ia64, a IOSAPIC is created. On ARM/arm64, a GIC is 618only go to the IOAPIC. On ARM/arm64, a GIC is
616created. On s390, a dummy irq routing table is created. 619created. On s390, a dummy irq routing table is created.
617 620
618Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled 621Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled
@@ -622,7 +625,7 @@ before KVM_CREATE_IRQCHIP can be used.
6224.25 KVM_IRQ_LINE 6254.25 KVM_IRQ_LINE
623 626
624Capability: KVM_CAP_IRQCHIP 627Capability: KVM_CAP_IRQCHIP
625Architectures: x86, ia64, arm, arm64 628Architectures: x86, arm, arm64
626Type: vm ioctl 629Type: vm ioctl
627Parameters: struct kvm_irq_level 630Parameters: struct kvm_irq_level
628Returns: 0 on success, -1 on error 631Returns: 0 on success, -1 on error
@@ -676,7 +679,7 @@ struct kvm_irq_level {
6764.26 KVM_GET_IRQCHIP 6794.26 KVM_GET_IRQCHIP
677 680
678Capability: KVM_CAP_IRQCHIP 681Capability: KVM_CAP_IRQCHIP
679Architectures: x86, ia64 682Architectures: x86
680Type: vm ioctl 683Type: vm ioctl
681Parameters: struct kvm_irqchip (in/out) 684Parameters: struct kvm_irqchip (in/out)
682Returns: 0 on success, -1 on error 685Returns: 0 on success, -1 on error
@@ -698,7 +701,7 @@ struct kvm_irqchip {
6984.27 KVM_SET_IRQCHIP 7014.27 KVM_SET_IRQCHIP
699 702
700Capability: KVM_CAP_IRQCHIP 703Capability: KVM_CAP_IRQCHIP
701Architectures: x86, ia64 704Architectures: x86
702Type: vm ioctl 705Type: vm ioctl
703Parameters: struct kvm_irqchip (in) 706Parameters: struct kvm_irqchip (in)
704Returns: 0 on success, -1 on error 707Returns: 0 on success, -1 on error
@@ -991,7 +994,7 @@ for vm-wide capabilities.
9914.38 KVM_GET_MP_STATE 9944.38 KVM_GET_MP_STATE
992 995
993Capability: KVM_CAP_MP_STATE 996Capability: KVM_CAP_MP_STATE
994Architectures: x86, ia64, s390 997Architectures: x86, s390
995Type: vcpu ioctl 998Type: vcpu ioctl
996Parameters: struct kvm_mp_state (out) 999Parameters: struct kvm_mp_state (out)
997Returns: 0 on success; -1 on error 1000Returns: 0 on success; -1 on error
@@ -1005,16 +1008,15 @@ uniprocessor guests).
1005 1008
1006Possible values are: 1009Possible values are:
1007 1010
1008 - KVM_MP_STATE_RUNNABLE: the vcpu is currently running [x86, ia64] 1011 - KVM_MP_STATE_RUNNABLE: the vcpu is currently running [x86]
1009 - KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP) 1012 - KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP)
1010 which has not yet received an INIT signal [x86, 1013 which has not yet received an INIT signal [x86]
1011 ia64]
1012 - KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is 1014 - KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is
1013 now ready for a SIPI [x86, ia64] 1015 now ready for a SIPI [x86]
1014 - KVM_MP_STATE_HALTED: the vcpu has executed a HLT instruction and 1016 - KVM_MP_STATE_HALTED: the vcpu has executed a HLT instruction and
1015 is waiting for an interrupt [x86, ia64] 1017 is waiting for an interrupt [x86]
1016 - KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector 1018 - KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector
1017 accessible via KVM_GET_VCPU_EVENTS) [x86, ia64] 1019 accessible via KVM_GET_VCPU_EVENTS) [x86]
1018 - KVM_MP_STATE_STOPPED: the vcpu is stopped [s390] 1020 - KVM_MP_STATE_STOPPED: the vcpu is stopped [s390]
1019 - KVM_MP_STATE_CHECK_STOP: the vcpu is in a special error state [s390] 1021 - KVM_MP_STATE_CHECK_STOP: the vcpu is in a special error state [s390]
1020 - KVM_MP_STATE_OPERATING: the vcpu is operating (running or halted) 1022 - KVM_MP_STATE_OPERATING: the vcpu is operating (running or halted)
@@ -1022,7 +1024,7 @@ Possible values are:
1022 - KVM_MP_STATE_LOAD: the vcpu is in a special load/startup state 1024 - KVM_MP_STATE_LOAD: the vcpu is in a special load/startup state
1023 [s390] 1025 [s390]
1024 1026
1025On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an 1027On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
1026in-kernel irqchip, the multiprocessing state must be maintained by userspace on 1028in-kernel irqchip, the multiprocessing state must be maintained by userspace on
1027these architectures. 1029these architectures.
1028 1030
@@ -1030,7 +1032,7 @@ these architectures.
10304.39 KVM_SET_MP_STATE 10324.39 KVM_SET_MP_STATE
1031 1033
1032Capability: KVM_CAP_MP_STATE 1034Capability: KVM_CAP_MP_STATE
1033Architectures: x86, ia64, s390 1035Architectures: x86, s390
1034Type: vcpu ioctl 1036Type: vcpu ioctl
1035Parameters: struct kvm_mp_state (in) 1037Parameters: struct kvm_mp_state (in)
1036Returns: 0 on success; -1 on error 1038Returns: 0 on success; -1 on error
@@ -1038,7 +1040,7 @@ Returns: 0 on success; -1 on error
1038Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for 1040Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for
1039arguments. 1041arguments.
1040 1042
1041On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an 1043On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
1042in-kernel irqchip, the multiprocessing state must be maintained by userspace on 1044in-kernel irqchip, the multiprocessing state must be maintained by userspace on
1043these architectures. 1045these architectures.
1044 1046
@@ -1065,7 +1067,7 @@ documentation when it pops into existence).
10654.41 KVM_SET_BOOT_CPU_ID 10674.41 KVM_SET_BOOT_CPU_ID
1066 1068
1067Capability: KVM_CAP_SET_BOOT_CPU_ID 1069Capability: KVM_CAP_SET_BOOT_CPU_ID
1068Architectures: x86, ia64 1070Architectures: x86
1069Type: vm ioctl 1071Type: vm ioctl
1070Parameters: unsigned long vcpu_id 1072Parameters: unsigned long vcpu_id
1071Returns: 0 on success, -1 on error 1073Returns: 0 on success, -1 on error
@@ -1257,8 +1259,8 @@ The flags bitmap is defined as:
1257 1259
12584.48 KVM_ASSIGN_PCI_DEVICE 12604.48 KVM_ASSIGN_PCI_DEVICE
1259 1261
1260Capability: KVM_CAP_DEVICE_ASSIGNMENT 1262Capability: none
1261Architectures: x86 ia64 1263Architectures: x86
1262Type: vm ioctl 1264Type: vm ioctl
1263Parameters: struct kvm_assigned_pci_dev (in) 1265Parameters: struct kvm_assigned_pci_dev (in)
1264Returns: 0 on success, -1 on error 1266Returns: 0 on success, -1 on error
@@ -1298,25 +1300,36 @@ Only PCI header type 0 devices with PCI BAR resources are supported by
1298device assignment. The user requesting this ioctl must have read/write 1300device assignment. The user requesting this ioctl must have read/write
1299access to the PCI sysfs resource files associated with the device. 1301access to the PCI sysfs resource files associated with the device.
1300 1302
1303Errors:
1304 ENOTTY: kernel does not support this ioctl
1305
1306 Other error conditions may be defined by individual device types or
1307 have their standard meanings.
1308
1301 1309
13024.49 KVM_DEASSIGN_PCI_DEVICE 13104.49 KVM_DEASSIGN_PCI_DEVICE
1303 1311
1304Capability: KVM_CAP_DEVICE_DEASSIGNMENT 1312Capability: none
1305Architectures: x86 ia64 1313Architectures: x86
1306Type: vm ioctl 1314Type: vm ioctl
1307Parameters: struct kvm_assigned_pci_dev (in) 1315Parameters: struct kvm_assigned_pci_dev (in)
1308Returns: 0 on success, -1 on error 1316Returns: 0 on success, -1 on error
1309 1317
1310Ends PCI device assignment, releasing all associated resources. 1318Ends PCI device assignment, releasing all associated resources.
1311 1319
1312See KVM_CAP_DEVICE_ASSIGNMENT for the data structure. Only assigned_dev_id is 1320See KVM_ASSIGN_PCI_DEVICE for the data structure. Only assigned_dev_id is
1313used in kvm_assigned_pci_dev to identify the device. 1321used in kvm_assigned_pci_dev to identify the device.
1314 1322
1323Errors:
1324 ENOTTY: kernel does not support this ioctl
1325
1326 Other error conditions may be defined by individual device types or
1327 have their standard meanings.
1315 1328
13164.50 KVM_ASSIGN_DEV_IRQ 13294.50 KVM_ASSIGN_DEV_IRQ
1317 1330
1318Capability: KVM_CAP_ASSIGN_DEV_IRQ 1331Capability: KVM_CAP_ASSIGN_DEV_IRQ
1319Architectures: x86 ia64 1332Architectures: x86
1320Type: vm ioctl 1333Type: vm ioctl
1321Parameters: struct kvm_assigned_irq (in) 1334Parameters: struct kvm_assigned_irq (in)
1322Returns: 0 on success, -1 on error 1335Returns: 0 on success, -1 on error
@@ -1346,11 +1359,17 @@ The following flags are defined:
1346It is not valid to specify multiple types per host or guest IRQ. However, the 1359It is not valid to specify multiple types per host or guest IRQ. However, the
1347IRQ type of host and guest can differ or can even be null. 1360IRQ type of host and guest can differ or can even be null.
1348 1361
1362Errors:
1363 ENOTTY: kernel does not support this ioctl
1364
1365 Other error conditions may be defined by individual device types or
1366 have their standard meanings.
1367
1349 1368
13504.51 KVM_DEASSIGN_DEV_IRQ 13694.51 KVM_DEASSIGN_DEV_IRQ
1351 1370
1352Capability: KVM_CAP_ASSIGN_DEV_IRQ 1371Capability: KVM_CAP_ASSIGN_DEV_IRQ
1353Architectures: x86 ia64 1372Architectures: x86
1354Type: vm ioctl 1373Type: vm ioctl
1355Parameters: struct kvm_assigned_irq (in) 1374Parameters: struct kvm_assigned_irq (in)
1356Returns: 0 on success, -1 on error 1375Returns: 0 on success, -1 on error
@@ -1365,7 +1384,7 @@ KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed.
13654.52 KVM_SET_GSI_ROUTING 13844.52 KVM_SET_GSI_ROUTING
1366 1385
1367Capability: KVM_CAP_IRQ_ROUTING 1386Capability: KVM_CAP_IRQ_ROUTING
1368Architectures: x86 ia64 s390 1387Architectures: x86 s390
1369Type: vm ioctl 1388Type: vm ioctl
1370Parameters: struct kvm_irq_routing (in) 1389Parameters: struct kvm_irq_routing (in)
1371Returns: 0 on success, -1 on error 1390Returns: 0 on success, -1 on error
@@ -1423,8 +1442,8 @@ struct kvm_irq_routing_s390_adapter {
1423 1442
14244.53 KVM_ASSIGN_SET_MSIX_NR 14434.53 KVM_ASSIGN_SET_MSIX_NR
1425 1444
1426Capability: KVM_CAP_DEVICE_MSIX 1445Capability: none
1427Architectures: x86 ia64 1446Architectures: x86
1428Type: vm ioctl 1447Type: vm ioctl
1429Parameters: struct kvm_assigned_msix_nr (in) 1448Parameters: struct kvm_assigned_msix_nr (in)
1430Returns: 0 on success, -1 on error 1449Returns: 0 on success, -1 on error
@@ -1445,8 +1464,8 @@ struct kvm_assigned_msix_nr {
1445 1464
14464.54 KVM_ASSIGN_SET_MSIX_ENTRY 14654.54 KVM_ASSIGN_SET_MSIX_ENTRY
1447 1466
1448Capability: KVM_CAP_DEVICE_MSIX 1467Capability: none
1449Architectures: x86 ia64 1468Architectures: x86
1450Type: vm ioctl 1469Type: vm ioctl
1451Parameters: struct kvm_assigned_msix_entry (in) 1470Parameters: struct kvm_assigned_msix_entry (in)
1452Returns: 0 on success, -1 on error 1471Returns: 0 on success, -1 on error
@@ -1461,6 +1480,12 @@ struct kvm_assigned_msix_entry {
1461 __u16 padding[3]; 1480 __u16 padding[3];
1462}; 1481};
1463 1482
1483Errors:
1484 ENOTTY: kernel does not support this ioctl
1485
1486 Other error conditions may be defined by individual device types or
1487 have their standard meanings.
1488
1464 1489
14654.55 KVM_SET_TSC_KHZ 14904.55 KVM_SET_TSC_KHZ
1466 1491
@@ -2453,9 +2478,15 @@ return ENOEXEC for that vcpu.
2453Note that because some registers reflect machine topology, all vcpus 2478Note that because some registers reflect machine topology, all vcpus
2454should be created before this ioctl is invoked. 2479should be created before this ioctl is invoked.
2455 2480
2481Userspace can call this function multiple times for a given vcpu, including
2482after the vcpu has been run. This will reset the vcpu to its initial
2483state. All calls to this function after the initial call must use the same
2484target and same set of feature flags, otherwise EINVAL will be returned.
2485
2456Possible features: 2486Possible features:
2457 - KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state. 2487 - KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state.
2458 Depends on KVM_CAP_ARM_PSCI. 2488 Depends on KVM_CAP_ARM_PSCI. If not set, the CPU will be powered on
2489 and execute guest code when KVM_RUN is called.
2459 - KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode. 2490 - KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode.
2460 Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). 2491 Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
2461 - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU. 2492 - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU.
@@ -2951,6 +2982,15 @@ HVC instruction based PSCI call from the vcpu. The 'type' field describes
2951the system-level event type. The 'flags' field describes architecture 2982the system-level event type. The 'flags' field describes architecture
2952specific flags for the system-level event. 2983specific flags for the system-level event.
2953 2984
2985Valid values for 'type' are:
2986 KVM_SYSTEM_EVENT_SHUTDOWN -- the guest has requested a shutdown of the
2987 VM. Userspace is not obliged to honour this, and if it does honour
2988 this does not need to destroy the VM synchronously (ie it may call
2989 KVM_RUN again before shutdown finally occurs).
2990 KVM_SYSTEM_EVENT_RESET -- the guest has requested a reset of the VM.
2991 As with SHUTDOWN, userspace can choose to ignore the request, or
2992 to schedule the reset to occur in the future and may call KVM_RUN again.
2993
2954 /* Fix the size of the union. */ 2994 /* Fix the size of the union. */
2955 char padding[256]; 2995 char padding[256];
2956 }; 2996 };
diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt
index 0d16f96c0eac..d426fc87fe93 100644
--- a/Documentation/virtual/kvm/devices/vm.txt
+++ b/Documentation/virtual/kvm/devices/vm.txt
@@ -12,14 +12,14 @@ specific.
121. GROUP: KVM_S390_VM_MEM_CTRL 121. GROUP: KVM_S390_VM_MEM_CTRL
13Architectures: s390 13Architectures: s390
14 14
151.1. ATTRIBUTE: KVM_S390_VM_MEM_CTRL 151.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA
16Parameters: none 16Parameters: none
17Returns: -EBUSY if already a vcpus is defined, otherwise 0 17Returns: -EBUSY if a vcpu is already defined, otherwise 0
18 18
19Enables CMMA for the virtual machine 19Enables Collaborative Memory Management Assist (CMMA) for the virtual machine.
20 20
211.2. ATTRIBUTE: KVM_S390_VM_CLR_CMMA 211.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA
22Parameteres: none 22Parameters: none
23Returns: 0 23Returns: 0
24 24
25Clear the CMMA status for all guest pages, so any pages the guest marked 25Clear the CMMA status for all guest pages, so any pages the guest marked
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
index 6d470ae7b073..2a71c8f29f68 100644
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
@@ -168,7 +168,7 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02
168 64 byte memory area which must be in guest RAM and must be 168 64 byte memory area which must be in guest RAM and must be
169 zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1 169 zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1
170 when asynchronous page faults are enabled on the vcpu 0 when 170 when asynchronous page faults are enabled on the vcpu 0 when
171 disabled. Bit 2 is 1 if asynchronous page faults can be injected 171 disabled. Bit 1 is 1 if asynchronous page faults can be injected
172 when vcpu is in cpl == 0. 172 when vcpu is in cpl == 0.
173 173
174 First 4 byte of 64 byte memory location will be written to by 174 First 4 byte of 64 byte memory location will be written to by
diff --git a/MAINTAINERS b/MAINTAINERS
index 4507a7e87c00..7605833aabc6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5495,15 +5495,6 @@ S: Supported
5495F: arch/powerpc/include/asm/kvm* 5495F: arch/powerpc/include/asm/kvm*
5496F: arch/powerpc/kvm/ 5496F: arch/powerpc/kvm/
5497 5497
5498KERNEL VIRTUAL MACHINE For Itanium (KVM/IA64)
5499M: Xiantao Zhang <xiantao.zhang@intel.com>
5500L: kvm-ia64@vger.kernel.org
5501W: http://kvm.qumranet.com
5502S: Supported
5503F: Documentation/ia64/kvm.txt
5504F: arch/ia64/include/asm/kvm*
5505F: arch/ia64/kvm/
5506
5507KERNEL VIRTUAL MACHINE for s390 (KVM/s390) 5498KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
5508M: Christian Borntraeger <borntraeger@de.ibm.com> 5499M: Christian Borntraeger <borntraeger@de.ibm.com>
5509M: Cornelia Huck <cornelia.huck@de.ibm.com> 5500M: Cornelia Huck <cornelia.huck@de.ibm.com>
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index b9db269c6e61..66ce17655bb9 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -33,6 +33,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
33void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); 33void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
34void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); 34void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
35 35
36static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
37{
38 vcpu->arch.hcr = HCR_GUEST_MASK;
39}
40
36static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) 41static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
37{ 42{
38 return 1; 43 return 1;
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 53036e21756b..254e0650e48b 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -150,8 +150,6 @@ struct kvm_vcpu_stat {
150 u32 halt_wakeup; 150 u32 halt_wakeup;
151}; 151};
152 152
153int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
154 const struct kvm_vcpu_init *init);
155int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); 153int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
156unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); 154unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
157int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); 155int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index acb0d5712716..63e0ecc04901 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -52,6 +52,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
52void free_boot_hyp_pgd(void); 52void free_boot_hyp_pgd(void);
53void free_hyp_pgds(void); 53void free_hyp_pgds(void);
54 54
55void stage2_unmap_vm(struct kvm *kvm);
55int kvm_alloc_stage2_pgd(struct kvm *kvm); 56int kvm_alloc_stage2_pgd(struct kvm *kvm);
56void kvm_free_stage2_pgd(struct kvm *kvm); 57void kvm_free_stage2_pgd(struct kvm *kvm);
57int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, 58int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -161,9 +162,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
161} 162}
162 163
163static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, 164static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
164 unsigned long size) 165 unsigned long size,
166 bool ipa_uncached)
165{ 167{
166 if (!vcpu_has_cache_enabled(vcpu)) 168 if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
167 kvm_flush_dcache_to_poc((void *)hva, size); 169 kvm_flush_dcache_to_poc((void *)hva, size);
168 170
169 /* 171 /*
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9e193c8a959e..2d6d91001062 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -213,6 +213,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
213 int err; 213 int err;
214 struct kvm_vcpu *vcpu; 214 struct kvm_vcpu *vcpu;
215 215
216 if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
217 err = -EBUSY;
218 goto out;
219 }
220
216 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 221 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
217 if (!vcpu) { 222 if (!vcpu) {
218 err = -ENOMEM; 223 err = -ENOMEM;
@@ -263,6 +268,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
263{ 268{
264 /* Force users to call KVM_ARM_VCPU_INIT */ 269 /* Force users to call KVM_ARM_VCPU_INIT */
265 vcpu->arch.target = -1; 270 vcpu->arch.target = -1;
271 bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
266 272
267 /* Set up the timer */ 273 /* Set up the timer */
268 kvm_timer_vcpu_init(vcpu); 274 kvm_timer_vcpu_init(vcpu);
@@ -419,6 +425,7 @@ static void update_vttbr(struct kvm *kvm)
419 425
420static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) 426static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
421{ 427{
428 struct kvm *kvm = vcpu->kvm;
422 int ret; 429 int ret;
423 430
424 if (likely(vcpu->arch.has_run_once)) 431 if (likely(vcpu->arch.has_run_once))
@@ -427,15 +434,23 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
427 vcpu->arch.has_run_once = true; 434 vcpu->arch.has_run_once = true;
428 435
429 /* 436 /*
430 * Initialize the VGIC before running a vcpu the first time on 437 * Map the VGIC hardware resources before running a vcpu the first
431 * this VM. 438 * time on this VM.
432 */ 439 */
433 if (unlikely(!vgic_initialized(vcpu->kvm))) { 440 if (unlikely(!vgic_ready(kvm))) {
434 ret = kvm_vgic_init(vcpu->kvm); 441 ret = kvm_vgic_map_resources(kvm);
435 if (ret) 442 if (ret)
436 return ret; 443 return ret;
437 } 444 }
438 445
446 /*
447 * Enable the arch timers only if we have an in-kernel VGIC
448 * and it has been properly initialized, since we cannot handle
449 * interrupts from the virtual timer with a userspace gic.
450 */
451 if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
452 kvm_timer_enable(kvm);
453
439 return 0; 454 return 0;
440} 455}
441 456
@@ -649,6 +664,48 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
649 return -EINVAL; 664 return -EINVAL;
650} 665}
651 666
667static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
668 const struct kvm_vcpu_init *init)
669{
670 unsigned int i;
671 int phys_target = kvm_target_cpu();
672
673 if (init->target != phys_target)
674 return -EINVAL;
675
676 /*
677 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
678 * use the same target.
679 */
680 if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
681 return -EINVAL;
682
683 /* -ENOENT for unknown features, -EINVAL for invalid combinations. */
684 for (i = 0; i < sizeof(init->features) * 8; i++) {
685 bool set = (init->features[i / 32] & (1 << (i % 32)));
686
687 if (set && i >= KVM_VCPU_MAX_FEATURES)
688 return -ENOENT;
689
690 /*
691 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
692 * use the same feature set.
693 */
694 if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
695 test_bit(i, vcpu->arch.features) != set)
696 return -EINVAL;
697
698 if (set)
699 set_bit(i, vcpu->arch.features);
700 }
701
702 vcpu->arch.target = phys_target;
703
704 /* Now we know what it is, we can reset it. */
705 return kvm_reset_vcpu(vcpu);
706}
707
708
652static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, 709static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
653 struct kvm_vcpu_init *init) 710 struct kvm_vcpu_init *init)
654{ 711{
@@ -659,10 +716,21 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
659 return ret; 716 return ret;
660 717
661 /* 718 /*
719 * Ensure a rebooted VM will fault in RAM pages and detect if the
720 * guest MMU is turned off and flush the caches as needed.
721 */
722 if (vcpu->arch.has_run_once)
723 stage2_unmap_vm(vcpu->kvm);
724
725 vcpu_reset_hcr(vcpu);
726
727 /*
662 * Handle the "start in power-off" case by marking the VCPU as paused. 728 * Handle the "start in power-off" case by marking the VCPU as paused.
663 */ 729 */
664 if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) 730 if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
665 vcpu->arch.pause = true; 731 vcpu->arch.pause = true;
732 else
733 vcpu->arch.pause = false;
666 734
667 return 0; 735 return 0;
668} 736}
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index cc0b78769bd8..384bab67c462 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
38 38
39int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 39int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
40{ 40{
41 vcpu->arch.hcr = HCR_GUEST_MASK;
42 return 0; 41 return 0;
43} 42}
44 43
@@ -274,31 +273,6 @@ int __attribute_const__ kvm_target_cpu(void)
274 } 273 }
275} 274}
276 275
277int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
278 const struct kvm_vcpu_init *init)
279{
280 unsigned int i;
281
282 /* We can only cope with guest==host and only on A15/A7 (for now). */
283 if (init->target != kvm_target_cpu())
284 return -EINVAL;
285
286 vcpu->arch.target = init->target;
287 bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
288
289 /* -ENOENT for unknown features, -EINVAL for invalid combinations. */
290 for (i = 0; i < sizeof(init->features) * 8; i++) {
291 if (test_bit(i, (void *)init->features)) {
292 if (i >= KVM_VCPU_MAX_FEATURES)
293 return -ENOENT;
294 set_bit(i, vcpu->arch.features);
295 }
296 }
297
298 /* Now we know what it is, we can reset it. */
299 return kvm_reset_vcpu(vcpu);
300}
301
302int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) 276int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
303{ 277{
304 int target = kvm_target_cpu(); 278 int target = kvm_target_cpu();
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 4cb5a93182e9..5d3bfc0eb3f0 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -187,15 +187,18 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
187 } 187 }
188 188
189 rt = vcpu->arch.mmio_decode.rt; 189 rt = vcpu->arch.mmio_decode.rt;
190 data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len);
191 190
192 trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE : 191 if (mmio.is_write) {
193 KVM_TRACE_MMIO_READ_UNSATISFIED, 192 data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt),
194 mmio.len, fault_ipa, 193 mmio.len);
195 (mmio.is_write) ? data : 0);
196 194
197 if (mmio.is_write) 195 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len,
196 fault_ipa, data);
198 mmio_write_buf(mmio.data, mmio.len, data); 197 mmio_write_buf(mmio.data, mmio.len, data);
198 } else {
199 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len,
200 fault_ipa, 0);
201 }
199 202
200 if (vgic_handle_mmio(vcpu, run, &mmio)) 203 if (vgic_handle_mmio(vcpu, run, &mmio))
201 return 1; 204 return 1;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 8664ff17cbbe..1dc9778a00af 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -612,6 +612,71 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
612 unmap_range(kvm, kvm->arch.pgd, start, size); 612 unmap_range(kvm, kvm->arch.pgd, start, size);
613} 613}
614 614
615static void stage2_unmap_memslot(struct kvm *kvm,
616 struct kvm_memory_slot *memslot)
617{
618 hva_t hva = memslot->userspace_addr;
619 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
620 phys_addr_t size = PAGE_SIZE * memslot->npages;
621 hva_t reg_end = hva + size;
622
623 /*
624 * A memory region could potentially cover multiple VMAs, and any holes
625 * between them, so iterate over all of them to find out if we should
626 * unmap any of them.
627 *
628 * +--------------------------------------------+
629 * +---------------+----------------+ +----------------+
630 * | : VMA 1 | VMA 2 | | VMA 3 : |
631 * +---------------+----------------+ +----------------+
632 * | memory region |
633 * +--------------------------------------------+
634 */
635 do {
636 struct vm_area_struct *vma = find_vma(current->mm, hva);
637 hva_t vm_start, vm_end;
638
639 if (!vma || vma->vm_start >= reg_end)
640 break;
641
642 /*
643 * Take the intersection of this VMA with the memory region
644 */
645 vm_start = max(hva, vma->vm_start);
646 vm_end = min(reg_end, vma->vm_end);
647
648 if (!(vma->vm_flags & VM_PFNMAP)) {
649 gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
650 unmap_stage2_range(kvm, gpa, vm_end - vm_start);
651 }
652 hva = vm_end;
653 } while (hva < reg_end);
654}
655
656/**
657 * stage2_unmap_vm - Unmap Stage-2 RAM mappings
658 * @kvm: The struct kvm pointer
659 *
660 * Go through the memregions and unmap any reguler RAM
661 * backing memory already mapped to the VM.
662 */
663void stage2_unmap_vm(struct kvm *kvm)
664{
665 struct kvm_memslots *slots;
666 struct kvm_memory_slot *memslot;
667 int idx;
668
669 idx = srcu_read_lock(&kvm->srcu);
670 spin_lock(&kvm->mmu_lock);
671
672 slots = kvm_memslots(kvm);
673 kvm_for_each_memslot(memslot, slots)
674 stage2_unmap_memslot(kvm, memslot);
675
676 spin_unlock(&kvm->mmu_lock);
677 srcu_read_unlock(&kvm->srcu, idx);
678}
679
615/** 680/**
616 * kvm_free_stage2_pgd - free all stage-2 tables 681 * kvm_free_stage2_pgd - free all stage-2 tables
617 * @kvm: The KVM struct pointer for the VM. 682 * @kvm: The KVM struct pointer for the VM.
@@ -853,6 +918,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
853 struct vm_area_struct *vma; 918 struct vm_area_struct *vma;
854 pfn_t pfn; 919 pfn_t pfn;
855 pgprot_t mem_type = PAGE_S2; 920 pgprot_t mem_type = PAGE_S2;
921 bool fault_ipa_uncached;
856 922
857 write_fault = kvm_is_write_fault(vcpu); 923 write_fault = kvm_is_write_fault(vcpu);
858 if (fault_status == FSC_PERM && !write_fault) { 924 if (fault_status == FSC_PERM && !write_fault) {
@@ -919,6 +985,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
919 if (!hugetlb && !force_pte) 985 if (!hugetlb && !force_pte)
920 hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); 986 hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
921 987
988 fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT;
989
922 if (hugetlb) { 990 if (hugetlb) {
923 pmd_t new_pmd = pfn_pmd(pfn, mem_type); 991 pmd_t new_pmd = pfn_pmd(pfn, mem_type);
924 new_pmd = pmd_mkhuge(new_pmd); 992 new_pmd = pmd_mkhuge(new_pmd);
@@ -926,7 +994,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
926 kvm_set_s2pmd_writable(&new_pmd); 994 kvm_set_s2pmd_writable(&new_pmd);
927 kvm_set_pfn_dirty(pfn); 995 kvm_set_pfn_dirty(pfn);
928 } 996 }
929 coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE); 997 coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE,
998 fault_ipa_uncached);
930 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); 999 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
931 } else { 1000 } else {
932 pte_t new_pte = pfn_pte(pfn, mem_type); 1001 pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -934,7 +1003,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
934 kvm_set_s2pte_writable(&new_pte); 1003 kvm_set_s2pte_writable(&new_pte);
935 kvm_set_pfn_dirty(pfn); 1004 kvm_set_pfn_dirty(pfn);
936 } 1005 }
937 coherent_cache_guest_page(vcpu, hva, PAGE_SIZE); 1006 coherent_cache_guest_page(vcpu, hva, PAGE_SIZE,
1007 fault_ipa_uncached);
938 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, 1008 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
939 pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); 1009 pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
940 } 1010 }
@@ -1294,11 +1364,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
1294 hva = vm_end; 1364 hva = vm_end;
1295 } while (hva < reg_end); 1365 } while (hva < reg_end);
1296 1366
1297 if (ret) { 1367 spin_lock(&kvm->mmu_lock);
1298 spin_lock(&kvm->mmu_lock); 1368 if (ret)
1299 unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); 1369 unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
1300 spin_unlock(&kvm->mmu_lock); 1370 else
1301 } 1371 stage2_flush_memslot(kvm, memslot);
1372 spin_unlock(&kvm->mmu_lock);
1302 return ret; 1373 return ret;
1303} 1374}
1304 1375
@@ -1310,6 +1381,15 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
1310int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 1381int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
1311 unsigned long npages) 1382 unsigned long npages)
1312{ 1383{
1384 /*
1385 * Readonly memslots are not incoherent with the caches by definition,
1386 * but in practice, they are used mostly to emulate ROMs or NOR flashes
1387 * that the guest may consider devices and hence map as uncached.
1388 * To prevent incoherency issues in these cases, tag all readonly
1389 * regions as incoherent.
1390 */
1391 if (slot->flags & KVM_MEM_READONLY)
1392 slot->flags |= KVM_MEMSLOT_INCOHERENT;
1313 return 0; 1393 return 0;
1314} 1394}
1315 1395
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 09cf37737ee2..58cb3248d277 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -15,6 +15,7 @@
15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#include <linux/preempt.h>
18#include <linux/kvm_host.h> 19#include <linux/kvm_host.h>
19#include <linux/wait.h> 20#include <linux/wait.h>
20 21
@@ -166,6 +167,23 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
166 167
167static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) 168static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
168{ 169{
170 int i;
171 struct kvm_vcpu *tmp;
172
173 /*
174 * The KVM ABI specifies that a system event exit may call KVM_RUN
175 * again and may perform shutdown/reboot at a later time that when the
176 * actual request is made. Since we are implementing PSCI and a
177 * caller of PSCI reboot and shutdown expects that the system shuts
178 * down or reboots immediately, let's make sure that VCPUs are not run
179 * after this call is handled and before the VCPUs have been
180 * re-initialized.
181 */
182 kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
183 tmp->arch.pause = true;
184 kvm_vcpu_kick(tmp);
185 }
186
169 memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); 187 memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
170 vcpu->run->system_event.type = type; 188 vcpu->run->system_event.type = type;
171 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; 189 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 5674a55b5518..8127e45e2637 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -38,6 +38,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
38void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); 38void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
39void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); 39void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
40 40
41static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
42{
43 vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
44}
45
41static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) 46static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
42{ 47{
43 return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; 48 return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2012c4ba8d67..0b7dfdb931df 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -165,8 +165,6 @@ struct kvm_vcpu_stat {
165 u32 halt_wakeup; 165 u32 halt_wakeup;
166}; 166};
167 167
168int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
169 const struct kvm_vcpu_init *init);
170int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); 168int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
171unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); 169unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
172int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); 170int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
@@ -200,6 +198,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
200struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); 198struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
201 199
202u64 kvm_call_hyp(void *hypfn, ...); 200u64 kvm_call_hyp(void *hypfn, ...);
201void force_vm_exit(const cpumask_t *mask);
203 202
204int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, 203int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
205 int exception_index); 204 int exception_index);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 0caf7a59f6a1..14a74f136272 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -83,6 +83,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
83void free_boot_hyp_pgd(void); 83void free_boot_hyp_pgd(void);
84void free_hyp_pgds(void); 84void free_hyp_pgds(void);
85 85
86void stage2_unmap_vm(struct kvm *kvm);
86int kvm_alloc_stage2_pgd(struct kvm *kvm); 87int kvm_alloc_stage2_pgd(struct kvm *kvm);
87void kvm_free_stage2_pgd(struct kvm *kvm); 88void kvm_free_stage2_pgd(struct kvm *kvm);
88int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, 89int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -243,9 +244,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
243} 244}
244 245
245static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, 246static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
246 unsigned long size) 247 unsigned long size,
248 bool ipa_uncached)
247{ 249{
248 if (!vcpu_has_cache_enabled(vcpu)) 250 if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
249 kvm_flush_dcache_to_poc((void *)hva, size); 251 kvm_flush_dcache_to_poc((void *)hva, size);
250 252
251 if (!icache_is_aliasing()) { /* PIPT */ 253 if (!icache_is_aliasing()) { /* PIPT */
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 76794692c20b..9535bd555d1d 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
38 38
39int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 39int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
40{ 40{
41 vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
42 return 0; 41 return 0;
43} 42}
44 43
@@ -297,31 +296,6 @@ int __attribute_const__ kvm_target_cpu(void)
297 return -EINVAL; 296 return -EINVAL;
298} 297}
299 298
300int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
301 const struct kvm_vcpu_init *init)
302{
303 unsigned int i;
304 int phys_target = kvm_target_cpu();
305
306 if (init->target != phys_target)
307 return -EINVAL;
308
309 vcpu->arch.target = phys_target;
310 bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
311
312 /* -ENOENT for unknown features, -EINVAL for invalid combinations. */
313 for (i = 0; i < sizeof(init->features) * 8; i++) {
314 if (init->features[i / 32] & (1 << (i % 32))) {
315 if (i >= KVM_VCPU_MAX_FEATURES)
316 return -ENOENT;
317 set_bit(i, vcpu->arch.features);
318 }
319 }
320
321 /* Now we know what it is, we can reset it. */
322 return kvm_reset_vcpu(vcpu);
323}
324
325int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) 299int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
326{ 300{
327 int target = kvm_target_cpu(); 301 int target = kvm_target_cpu();
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 536d13b0bea6..371b55bc5a6e 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -20,7 +20,6 @@ config IA64
20 select HAVE_DYNAMIC_FTRACE if (!ITANIUM) 20 select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
21 select HAVE_FUNCTION_TRACER 21 select HAVE_FUNCTION_TRACER
22 select HAVE_DMA_ATTRS 22 select HAVE_DMA_ATTRS
23 select HAVE_KVM
24 select TTY 23 select TTY
25 select HAVE_ARCH_TRACEHOOK 24 select HAVE_ARCH_TRACEHOOK
26 select HAVE_DMA_API_DEBUG 25 select HAVE_DMA_API_DEBUG
@@ -640,8 +639,6 @@ source "security/Kconfig"
640 639
641source "crypto/Kconfig" 640source "crypto/Kconfig"
642 641
643source "arch/ia64/kvm/Kconfig"
644
645source "lib/Kconfig" 642source "lib/Kconfig"
646 643
647config IOMMU_HELPER 644config IOMMU_HELPER
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 5441b14994fc..970d0bd99621 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -53,7 +53,6 @@ core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/
53core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/ 53core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
54core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/ 54core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/
55core-$(CONFIG_IA64_SGI_UV) += arch/ia64/uv/ 55core-$(CONFIG_IA64_SGI_UV) += arch/ia64/uv/
56core-$(CONFIG_KVM) += arch/ia64/kvm/
57 56
58drivers-$(CONFIG_PCI) += arch/ia64/pci/ 57drivers-$(CONFIG_PCI) += arch/ia64/pci/
59drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/ 58drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
deleted file mode 100644
index 4729752b7256..000000000000
--- a/arch/ia64/include/asm/kvm_host.h
+++ /dev/null
@@ -1,609 +0,0 @@
1/*
2 * kvm_host.h: used for kvm module, and hold ia64-specific sections.
3 *
4 * Copyright (C) 2007, Intel Corporation.
5 *
6 * Xiantao Zhang <xiantao.zhang@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 * Place - Suite 330, Boston, MA 02111-1307 USA.
20 *
21 */
22
23#ifndef __ASM_KVM_HOST_H
24#define __ASM_KVM_HOST_H
25
26#define KVM_USER_MEM_SLOTS 32
27
28#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
29#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
30
31/* define exit reasons from vmm to kvm*/
32#define EXIT_REASON_VM_PANIC 0
33#define EXIT_REASON_MMIO_INSTRUCTION 1
34#define EXIT_REASON_PAL_CALL 2
35#define EXIT_REASON_SAL_CALL 3
36#define EXIT_REASON_SWITCH_RR6 4
37#define EXIT_REASON_VM_DESTROY 5
38#define EXIT_REASON_EXTERNAL_INTERRUPT 6
39#define EXIT_REASON_IPI 7
40#define EXIT_REASON_PTC_G 8
41#define EXIT_REASON_DEBUG 20
42
43/*Define vmm address space and vm data space.*/
44#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20)
45#define KVM_VMM_SHIFT 24
46#define KVM_VMM_BASE 0xD000000000000000
47#define VMM_SIZE (__IA64_UL_CONST(8)<<20)
48
49/*
50 * Define vm_buffer, used by PAL Services, base address.
51 * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M
52 */
53#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE)
54#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20)
55
56/*
57 * kvm guest's data area looks as follow:
58 *
59 * +----------------------+ ------- KVM_VM_DATA_SIZE
60 * | vcpu[n]'s data | | ___________________KVM_STK_OFFSET
61 * | | | / |
62 * | .......... | | /vcpu's struct&stack |
63 * | .......... | | /---------------------|---- 0
64 * | vcpu[5]'s data | | / vpd |
65 * | vcpu[4]'s data | |/-----------------------|
66 * | vcpu[3]'s data | / vtlb |
67 * | vcpu[2]'s data | /|------------------------|
68 * | vcpu[1]'s data |/ | vhpt |
69 * | vcpu[0]'s data |____________________________|
70 * +----------------------+ |
71 * | memory dirty log | |
72 * +----------------------+ |
73 * | vm's data struct | |
74 * +----------------------+ |
75 * | | |
76 * | | |
77 * | | |
78 * | | |
79 * | | |
80 * | | |
81 * | | |
82 * | vm's p2m table | |
83 * | | |
84 * | | |
85 * | | | |
86 * vm's data->| | | |
87 * +----------------------+ ------- 0
88 * To support large memory, needs to increase the size of p2m.
89 * To support more vcpus, needs to ensure it has enough space to
90 * hold vcpus' data.
91 */
92
93#define KVM_VM_DATA_SHIFT 26
94#define KVM_VM_DATA_SIZE (__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT)
95#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VM_DATA_SIZE)
96
97#define KVM_P2M_BASE KVM_VM_DATA_BASE
98#define KVM_P2M_SIZE (__IA64_UL_CONST(24) << 20)
99
100#define VHPT_SHIFT 16
101#define VHPT_SIZE (__IA64_UL_CONST(1) << VHPT_SHIFT)
102#define VHPT_NUM_ENTRIES (__IA64_UL_CONST(1) << (VHPT_SHIFT-5))
103
104#define VTLB_SHIFT 16
105#define VTLB_SIZE (__IA64_UL_CONST(1) << VTLB_SHIFT)
106#define VTLB_NUM_ENTRIES (1UL << (VHPT_SHIFT-5))
107
108#define VPD_SHIFT 16
109#define VPD_SIZE (__IA64_UL_CONST(1) << VPD_SHIFT)
110
111#define VCPU_STRUCT_SHIFT 16
112#define VCPU_STRUCT_SIZE (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
113
114/*
115 * This must match KVM_IA64_VCPU_STACK_{SHIFT,SIZE} arch/ia64/include/asm/kvm.h
116 */
117#define KVM_STK_SHIFT 16
118#define KVM_STK_OFFSET (__IA64_UL_CONST(1)<< KVM_STK_SHIFT)
119
120#define KVM_VM_STRUCT_SHIFT 19
121#define KVM_VM_STRUCT_SIZE (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
122
123#define KVM_MEM_DIRY_LOG_SHIFT 19
124#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT)
125
126#ifndef __ASSEMBLY__
127
128/*Define the max vcpus and memory for Guests.*/
129#define KVM_MAX_VCPUS (KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\
130 KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data)
131#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT)
132
133#define VMM_LOG_LEN 256
134
135#include <linux/types.h>
136#include <linux/mm.h>
137#include <linux/kvm.h>
138#include <linux/kvm_para.h>
139#include <linux/kvm_types.h>
140
141#include <asm/pal.h>
142#include <asm/sal.h>
143#include <asm/page.h>
144
145struct kvm_vcpu_data {
146 char vcpu_vhpt[VHPT_SIZE];
147 char vcpu_vtlb[VTLB_SIZE];
148 char vcpu_vpd[VPD_SIZE];
149 char vcpu_struct[VCPU_STRUCT_SIZE];
150};
151
152struct kvm_vm_data {
153 char kvm_p2m[KVM_P2M_SIZE];
154 char kvm_vm_struct[KVM_VM_STRUCT_SIZE];
155 char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE];
156 struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
157};
158
159#define VCPU_BASE(n) (KVM_VM_DATA_BASE + \
160 offsetof(struct kvm_vm_data, vcpu_data[n]))
161#define KVM_VM_BASE (KVM_VM_DATA_BASE + \
162 offsetof(struct kvm_vm_data, kvm_vm_struct))
163#define KVM_MEM_DIRTY_LOG_BASE KVM_VM_DATA_BASE + \
164 offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
165
166#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt))
167#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb))
168#define VPD_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd))
169#define VCPU_STRUCT_BASE(n) (VCPU_BASE(n) + \
170 offsetof(struct kvm_vcpu_data, vcpu_struct))
171
172/*IO section definitions*/
173#define IOREQ_READ 1
174#define IOREQ_WRITE 0
175
176#define STATE_IOREQ_NONE 0
177#define STATE_IOREQ_READY 1
178#define STATE_IOREQ_INPROCESS 2
179#define STATE_IORESP_READY 3
180
181/*Guest Physical address layout.*/
182#define GPFN_MEM (0UL << 60) /* Guest pfn is normal mem */
183#define GPFN_FRAME_BUFFER (1UL << 60) /* VGA framebuffer */
184#define GPFN_LOW_MMIO (2UL << 60) /* Low MMIO range */
185#define GPFN_PIB (3UL << 60) /* PIB base */
186#define GPFN_IOSAPIC (4UL << 60) /* IOSAPIC base */
187#define GPFN_LEGACY_IO (5UL << 60) /* Legacy I/O base */
188#define GPFN_GFW (6UL << 60) /* Guest Firmware */
189#define GPFN_PHYS_MMIO (7UL << 60) /* Directed MMIO Range */
190
191#define GPFN_IO_MASK (7UL << 60) /* Guest pfn is I/O type */
192#define GPFN_INV_MASK (1UL << 63) /* Guest pfn is invalid */
193#define INVALID_MFN (~0UL)
194#define MEM_G (1UL << 30)
195#define MEM_M (1UL << 20)
196#define MMIO_START (3 * MEM_G)
197#define MMIO_SIZE (512 * MEM_M)
198#define VGA_IO_START 0xA0000UL
199#define VGA_IO_SIZE 0x20000
200#define LEGACY_IO_START (MMIO_START + MMIO_SIZE)
201#define LEGACY_IO_SIZE (64 * MEM_M)
202#define IO_SAPIC_START 0xfec00000UL
203#define IO_SAPIC_SIZE 0x100000
204#define PIB_START 0xfee00000UL
205#define PIB_SIZE 0x200000
206#define GFW_START (4 * MEM_G - 16 * MEM_M)
207#define GFW_SIZE (16 * MEM_M)
208
209/*Deliver mode, defined for ioapic.c*/
210#define dest_Fixed IOSAPIC_FIXED
211#define dest_LowestPrio IOSAPIC_LOWEST_PRIORITY
212
213#define NMI_VECTOR 2
214#define ExtINT_VECTOR 0
215#define NULL_VECTOR (-1)
216#define IA64_SPURIOUS_INT_VECTOR 0x0f
217
218#define VCPU_LID(v) (((u64)(v)->vcpu_id) << 24)
219
220/*
221 *Delivery mode
222 */
223#define SAPIC_DELIV_SHIFT 8
224#define SAPIC_FIXED 0x0
225#define SAPIC_LOWEST_PRIORITY 0x1
226#define SAPIC_PMI 0x2
227#define SAPIC_NMI 0x4
228#define SAPIC_INIT 0x5
229#define SAPIC_EXTINT 0x7
230
231/*
232 * vcpu->requests bit members for arch
233 */
234#define KVM_REQ_PTC_G 32
235#define KVM_REQ_RESUME 33
236
237struct kvm_mmio_req {
238 uint64_t addr; /* physical address */
239 uint64_t size; /* size in bytes */
240 uint64_t data; /* data (or paddr of data) */
241 uint8_t state:4;
242 uint8_t dir:1; /* 1=read, 0=write */
243};
244
245/*Pal data struct */
246struct kvm_pal_call{
247 /*In area*/
248 uint64_t gr28;
249 uint64_t gr29;
250 uint64_t gr30;
251 uint64_t gr31;
252 /*Out area*/
253 struct ia64_pal_retval ret;
254};
255
256/* Sal data structure */
257struct kvm_sal_call{
258 /*In area*/
259 uint64_t in0;
260 uint64_t in1;
261 uint64_t in2;
262 uint64_t in3;
263 uint64_t in4;
264 uint64_t in5;
265 uint64_t in6;
266 uint64_t in7;
267 struct sal_ret_values ret;
268};
269
270/*Guest change rr6*/
271struct kvm_switch_rr6 {
272 uint64_t old_rr;
273 uint64_t new_rr;
274};
275
276union ia64_ipi_a{
277 unsigned long val;
278 struct {
279 unsigned long rv : 3;
280 unsigned long ir : 1;
281 unsigned long eid : 8;
282 unsigned long id : 8;
283 unsigned long ib_base : 44;
284 };
285};
286
287union ia64_ipi_d {
288 unsigned long val;
289 struct {
290 unsigned long vector : 8;
291 unsigned long dm : 3;
292 unsigned long ig : 53;
293 };
294};
295
296/*ipi check exit data*/
297struct kvm_ipi_data{
298 union ia64_ipi_a addr;
299 union ia64_ipi_d data;
300};
301
302/*global purge data*/
303struct kvm_ptc_g {
304 unsigned long vaddr;
305 unsigned long rr;
306 unsigned long ps;
307 struct kvm_vcpu *vcpu;
308};
309
310/*Exit control data */
311struct exit_ctl_data{
312 uint32_t exit_reason;
313 uint32_t vm_status;
314 union {
315 struct kvm_mmio_req ioreq;
316 struct kvm_pal_call pal_data;
317 struct kvm_sal_call sal_data;
318 struct kvm_switch_rr6 rr_data;
319 struct kvm_ipi_data ipi_data;
320 struct kvm_ptc_g ptc_g_data;
321 } u;
322};
323
324union pte_flags {
325 unsigned long val;
326 struct {
327 unsigned long p : 1; /*0 */
328 unsigned long : 1; /* 1 */
329 unsigned long ma : 3; /* 2-4 */
330 unsigned long a : 1; /* 5 */
331 unsigned long d : 1; /* 6 */
332 unsigned long pl : 2; /* 7-8 */
333 unsigned long ar : 3; /* 9-11 */
334 unsigned long ppn : 38; /* 12-49 */
335 unsigned long : 2; /* 50-51 */
336 unsigned long ed : 1; /* 52 */
337 };
338};
339
340union ia64_pta {
341 unsigned long val;
342 struct {
343 unsigned long ve : 1;
344 unsigned long reserved0 : 1;
345 unsigned long size : 6;
346 unsigned long vf : 1;
347 unsigned long reserved1 : 6;
348 unsigned long base : 49;
349 };
350};
351
352struct thash_cb {
353 /* THASH base information */
354 struct thash_data *hash; /* hash table pointer */
355 union ia64_pta pta;
356 int num;
357};
358
359struct kvm_vcpu_stat {
360 u32 halt_wakeup;
361};
362
363struct kvm_vcpu_arch {
364 int launched;
365 int last_exit;
366 int last_run_cpu;
367 int vmm_tr_slot;
368 int vm_tr_slot;
369 int sn_rtc_tr_slot;
370
371#define KVM_MP_STATE_RUNNABLE 0
372#define KVM_MP_STATE_UNINITIALIZED 1
373#define KVM_MP_STATE_INIT_RECEIVED 2
374#define KVM_MP_STATE_HALTED 3
375 int mp_state;
376
377#define MAX_PTC_G_NUM 3
378 int ptc_g_count;
379 struct kvm_ptc_g ptc_g_data[MAX_PTC_G_NUM];
380
381 /*halt timer to wake up sleepy vcpus*/
382 struct hrtimer hlt_timer;
383 long ht_active;
384
385 struct kvm_lapic *apic; /* kernel irqchip context */
386 struct vpd *vpd;
387
388 /* Exit data for vmm_transition*/
389 struct exit_ctl_data exit_data;
390
391 cpumask_t cache_coherent_map;
392
393 unsigned long vmm_rr;
394 unsigned long host_rr6;
395 unsigned long psbits[8];
396 unsigned long cr_iipa;
397 unsigned long cr_isr;
398 unsigned long vsa_base;
399 unsigned long dirty_log_lock_pa;
400 unsigned long __gp;
401 /* TR and TC. */
402 struct thash_data itrs[NITRS];
403 struct thash_data dtrs[NDTRS];
404 /* Bit is set if there is a tr/tc for the region. */
405 unsigned char itr_regions;
406 unsigned char dtr_regions;
407 unsigned char tc_regions;
408 /* purge all */
409 unsigned long ptce_base;
410 unsigned long ptce_count[2];
411 unsigned long ptce_stride[2];
412 /* itc/itm */
413 unsigned long last_itc;
414 long itc_offset;
415 unsigned long itc_check;
416 unsigned long timer_check;
417 unsigned int timer_pending;
418 unsigned int timer_fired;
419
420 unsigned long vrr[8];
421 unsigned long ibr[8];
422 unsigned long dbr[8];
423 unsigned long insvc[4]; /* Interrupt in service. */
424 unsigned long xtp;
425
426 unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */
427 unsigned long metaphysical_rr4; /* from kvm_arch (so is pinned) */
428 unsigned long metaphysical_saved_rr0; /* from kvm_arch */
429 unsigned long metaphysical_saved_rr4; /* from kvm_arch */
430 unsigned long fp_psr; /*used for lazy float register */
431 unsigned long saved_gp;
432 /*for phycial emulation */
433 int mode_flags;
434 struct thash_cb vtlb;
435 struct thash_cb vhpt;
436 char irq_check;
437 char irq_new_pending;
438
439 unsigned long opcode;
440 unsigned long cause;
441 char log_buf[VMM_LOG_LEN];
442 union context host;
443 union context guest;
444
445 char mmio_data[8];
446};
447
448struct kvm_vm_stat {
449 u64 remote_tlb_flush;
450};
451
452struct kvm_sal_data {
453 unsigned long boot_ip;
454 unsigned long boot_gp;
455};
456
457struct kvm_arch_memory_slot {
458};
459
460struct kvm_arch {
461 spinlock_t dirty_log_lock;
462
463 unsigned long vm_base;
464 unsigned long metaphysical_rr0;
465 unsigned long metaphysical_rr4;
466 unsigned long vmm_init_rr;
467
468 int is_sn2;
469
470 struct kvm_ioapic *vioapic;
471 struct kvm_vm_stat stat;
472 struct kvm_sal_data rdv_sal_data;
473
474 struct list_head assigned_dev_head;
475 struct iommu_domain *iommu_domain;
476 bool iommu_noncoherent;
477
478 unsigned long irq_sources_bitmap;
479 unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
480};
481
482union cpuid3_t {
483 u64 value;
484 struct {
485 u64 number : 8;
486 u64 revision : 8;
487 u64 model : 8;
488 u64 family : 8;
489 u64 archrev : 8;
490 u64 rv : 24;
491 };
492};
493
494struct kvm_pt_regs {
495 /* The following registers are saved by SAVE_MIN: */
496 unsigned long b6; /* scratch */
497 unsigned long b7; /* scratch */
498
499 unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */
500 unsigned long ar_ssd; /* reserved for future use (scratch) */
501
502 unsigned long r8; /* scratch (return value register 0) */
503 unsigned long r9; /* scratch (return value register 1) */
504 unsigned long r10; /* scratch (return value register 2) */
505 unsigned long r11; /* scratch (return value register 3) */
506
507 unsigned long cr_ipsr; /* interrupted task's psr */
508 unsigned long cr_iip; /* interrupted task's instruction pointer */
509 unsigned long cr_ifs; /* interrupted task's function state */
510
511 unsigned long ar_unat; /* interrupted task's NaT register (preserved) */
512 unsigned long ar_pfs; /* prev function state */
513 unsigned long ar_rsc; /* RSE configuration */
514 /* The following two are valid only if cr_ipsr.cpl > 0: */
515 unsigned long ar_rnat; /* RSE NaT */
516 unsigned long ar_bspstore; /* RSE bspstore */
517
518 unsigned long pr; /* 64 predicate registers (1 bit each) */
519 unsigned long b0; /* return pointer (bp) */
520 unsigned long loadrs; /* size of dirty partition << 16 */
521
522 unsigned long r1; /* the gp pointer */
523 unsigned long r12; /* interrupted task's memory stack pointer */
524 unsigned long r13; /* thread pointer */
525
526 unsigned long ar_fpsr; /* floating point status (preserved) */
527 unsigned long r15; /* scratch */
528
529 /* The remaining registers are NOT saved for system calls. */
530 unsigned long r14; /* scratch */
531 unsigned long r2; /* scratch */
532 unsigned long r3; /* scratch */
533 unsigned long r16; /* scratch */
534 unsigned long r17; /* scratch */
535 unsigned long r18; /* scratch */
536 unsigned long r19; /* scratch */
537 unsigned long r20; /* scratch */
538 unsigned long r21; /* scratch */
539 unsigned long r22; /* scratch */
540 unsigned long r23; /* scratch */
541 unsigned long r24; /* scratch */
542 unsigned long r25; /* scratch */
543 unsigned long r26; /* scratch */
544 unsigned long r27; /* scratch */
545 unsigned long r28; /* scratch */
546 unsigned long r29; /* scratch */
547 unsigned long r30; /* scratch */
548 unsigned long r31; /* scratch */
549 unsigned long ar_ccv; /* compare/exchange value (scratch) */
550
551 /*
552 * Floating point registers that the kernel considers scratch:
553 */
554 struct ia64_fpreg f6; /* scratch */
555 struct ia64_fpreg f7; /* scratch */
556 struct ia64_fpreg f8; /* scratch */
557 struct ia64_fpreg f9; /* scratch */
558 struct ia64_fpreg f10; /* scratch */
559 struct ia64_fpreg f11; /* scratch */
560
561 unsigned long r4; /* preserved */
562 unsigned long r5; /* preserved */
563 unsigned long r6; /* preserved */
564 unsigned long r7; /* preserved */
565 unsigned long eml_unat; /* used for emulating instruction */
566 unsigned long pad0; /* alignment pad */
567};
568
569static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v)
570{
571 return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1;
572}
573
574typedef int kvm_vmm_entry(void);
575typedef void kvm_tramp_entry(union context *host, union context *guest);
576
577struct kvm_vmm_info{
578 struct module *module;
579 kvm_vmm_entry *vmm_entry;
580 kvm_tramp_entry *tramp_entry;
581 unsigned long vmm_ivt;
582 unsigned long patch_mov_ar;
583 unsigned long patch_mov_ar_sn2;
584};
585
586int kvm_highest_pending_irq(struct kvm_vcpu *vcpu);
587int kvm_emulate_halt(struct kvm_vcpu *vcpu);
588int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
589void kvm_sal_emul(struct kvm_vcpu *vcpu);
590
591#define __KVM_HAVE_ARCH_VM_ALLOC 1
592struct kvm *kvm_arch_alloc_vm(void);
593void kvm_arch_free_vm(struct kvm *kvm);
594
595static inline void kvm_arch_sync_events(struct kvm *kvm) {}
596static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {}
597static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {}
598static inline void kvm_arch_free_memslot(struct kvm *kvm,
599 struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
600static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
601static inline void kvm_arch_commit_memory_region(struct kvm *kvm,
602 struct kvm_userspace_memory_region *mem,
603 const struct kvm_memory_slot *old,
604 enum kvm_mr_change change) {}
605static inline void kvm_arch_hardware_unsetup(void) {}
606
607#endif /* __ASSEMBLY__*/
608
609#endif
diff --git a/arch/ia64/include/asm/pvclock-abi.h b/arch/ia64/include/asm/pvclock-abi.h
deleted file mode 100644
index 42b233bedeb5..000000000000
--- a/arch/ia64/include/asm/pvclock-abi.h
+++ /dev/null
@@ -1,48 +0,0 @@
1/*
2 * same structure to x86's
3 * Hopefully asm-x86/pvclock-abi.h would be moved to somewhere more generic.
4 * For now, define same duplicated definitions.
5 */
6
7#ifndef _ASM_IA64__PVCLOCK_ABI_H
8#define _ASM_IA64__PVCLOCK_ABI_H
9#ifndef __ASSEMBLY__
10
11/*
12 * These structs MUST NOT be changed.
13 * They are the ABI between hypervisor and guest OS.
14 * KVM is using this.
15 *
16 * pvclock_vcpu_time_info holds the system time and the tsc timestamp
17 * of the last update. So the guest can use the tsc delta to get a
18 * more precise system time. There is one per virtual cpu.
19 *
20 * pvclock_wall_clock references the point in time when the system
21 * time was zero (usually boot time), thus the guest calculates the
22 * current wall clock by adding the system time.
23 *
24 * Protocol for the "version" fields is: hypervisor raises it (making
25 * it uneven) before it starts updating the fields and raises it again
26 * (making it even) when it is done. Thus the guest can make sure the
27 * time values it got are consistent by checking the version before
28 * and after reading them.
29 */
30
31struct pvclock_vcpu_time_info {
32 u32 version;
33 u32 pad0;
34 u64 tsc_timestamp;
35 u64 system_time;
36 u32 tsc_to_system_mul;
37 s8 tsc_shift;
38 u8 pad[3];
39} __attribute__((__packed__)); /* 32 bytes */
40
41struct pvclock_wall_clock {
42 u32 version;
43 u32 sec;
44 u32 nsec;
45} __attribute__((__packed__));
46
47#endif /* __ASSEMBLY__ */
48#endif /* _ASM_IA64__PVCLOCK_ABI_H */
diff --git a/arch/ia64/include/uapi/asm/kvm.h b/arch/ia64/include/uapi/asm/kvm.h
deleted file mode 100644
index 99503c284400..000000000000
--- a/arch/ia64/include/uapi/asm/kvm.h
+++ /dev/null
@@ -1,268 +0,0 @@
1#ifndef __ASM_IA64_KVM_H
2#define __ASM_IA64_KVM_H
3
4/*
5 * kvm structure definitions for ia64
6 *
7 * Copyright (C) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms and conditions of the GNU General Public License,
11 * version 2, as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
20 * Place - Suite 330, Boston, MA 02111-1307 USA.
21 *
22 */
23
24#include <linux/types.h>
25#include <linux/ioctl.h>
26
27/* Select x86 specific features in <linux/kvm.h> */
28#define __KVM_HAVE_IOAPIC
29#define __KVM_HAVE_IRQ_LINE
30
31/* Architectural interrupt line count. */
32#define KVM_NR_INTERRUPTS 256
33
34#define KVM_IOAPIC_NUM_PINS 48
35
36struct kvm_ioapic_state {
37 __u64 base_address;
38 __u32 ioregsel;
39 __u32 id;
40 __u32 irr;
41 __u32 pad;
42 union {
43 __u64 bits;
44 struct {
45 __u8 vector;
46 __u8 delivery_mode:3;
47 __u8 dest_mode:1;
48 __u8 delivery_status:1;
49 __u8 polarity:1;
50 __u8 remote_irr:1;
51 __u8 trig_mode:1;
52 __u8 mask:1;
53 __u8 reserve:7;
54 __u8 reserved[4];
55 __u8 dest_id;
56 } fields;
57 } redirtbl[KVM_IOAPIC_NUM_PINS];
58};
59
60#define KVM_IRQCHIP_PIC_MASTER 0
61#define KVM_IRQCHIP_PIC_SLAVE 1
62#define KVM_IRQCHIP_IOAPIC 2
63#define KVM_NR_IRQCHIPS 3
64
65#define KVM_CONTEXT_SIZE 8*1024
66
67struct kvm_fpreg {
68 union {
69 unsigned long bits[2];
70 long double __dummy; /* force 16-byte alignment */
71 } u;
72};
73
74union context {
75 /* 8K size */
76 char dummy[KVM_CONTEXT_SIZE];
77 struct {
78 unsigned long psr;
79 unsigned long pr;
80 unsigned long caller_unat;
81 unsigned long pad;
82 unsigned long gr[32];
83 unsigned long ar[128];
84 unsigned long br[8];
85 unsigned long cr[128];
86 unsigned long rr[8];
87 unsigned long ibr[8];
88 unsigned long dbr[8];
89 unsigned long pkr[8];
90 struct kvm_fpreg fr[128];
91 };
92};
93
94struct thash_data {
95 union {
96 struct {
97 unsigned long p : 1; /* 0 */
98 unsigned long rv1 : 1; /* 1 */
99 unsigned long ma : 3; /* 2-4 */
100 unsigned long a : 1; /* 5 */
101 unsigned long d : 1; /* 6 */
102 unsigned long pl : 2; /* 7-8 */
103 unsigned long ar : 3; /* 9-11 */
104 unsigned long ppn : 38; /* 12-49 */
105 unsigned long rv2 : 2; /* 50-51 */
106 unsigned long ed : 1; /* 52 */
107 unsigned long ig1 : 11; /* 53-63 */
108 };
109 struct {
110 unsigned long __rv1 : 53; /* 0-52 */
111 unsigned long contiguous : 1; /*53 */
112 unsigned long tc : 1; /* 54 TR or TC */
113 unsigned long cl : 1;
114 /* 55 I side or D side cache line */
115 unsigned long len : 4; /* 56-59 */
116 unsigned long io : 1; /* 60 entry is for io or not */
117 unsigned long nomap : 1;
118 /* 61 entry cann't be inserted into machine TLB.*/
119 unsigned long checked : 1;
120 /* 62 for VTLB/VHPT sanity check */
121 unsigned long invalid : 1;
122 /* 63 invalid entry */
123 };
124 unsigned long page_flags;
125 }; /* same for VHPT and TLB */
126
127 union {
128 struct {
129 unsigned long rv3 : 2;
130 unsigned long ps : 6;
131 unsigned long key : 24;
132 unsigned long rv4 : 32;
133 };
134 unsigned long itir;
135 };
136 union {
137 struct {
138 unsigned long ig2 : 12;
139 unsigned long vpn : 49;
140 unsigned long vrn : 3;
141 };
142 unsigned long ifa;
143 unsigned long vadr;
144 struct {
145 unsigned long tag : 63;
146 unsigned long ti : 1;
147 };
148 unsigned long etag;
149 };
150 union {
151 struct thash_data *next;
152 unsigned long rid;
153 unsigned long gpaddr;
154 };
155};
156
157#define NITRS 8
158#define NDTRS 8
159
160struct saved_vpd {
161 unsigned long vhpi;
162 unsigned long vgr[16];
163 unsigned long vbgr[16];
164 unsigned long vnat;
165 unsigned long vbnat;
166 unsigned long vcpuid[5];
167 unsigned long vpsr;
168 unsigned long vpr;
169 union {
170 unsigned long vcr[128];
171 struct {
172 unsigned long dcr;
173 unsigned long itm;
174 unsigned long iva;
175 unsigned long rsv1[5];
176 unsigned long pta;
177 unsigned long rsv2[7];
178 unsigned long ipsr;
179 unsigned long isr;
180 unsigned long rsv3;
181 unsigned long iip;
182 unsigned long ifa;
183 unsigned long itir;
184 unsigned long iipa;
185 unsigned long ifs;
186 unsigned long iim;
187 unsigned long iha;
188 unsigned long rsv4[38];
189 unsigned long lid;
190 unsigned long ivr;
191 unsigned long tpr;
192 unsigned long eoi;
193 unsigned long irr[4];
194 unsigned long itv;
195 unsigned long pmv;
196 unsigned long cmcv;
197 unsigned long rsv5[5];
198 unsigned long lrr0;
199 unsigned long lrr1;
200 unsigned long rsv6[46];
201 };
202 };
203};
204
205struct kvm_regs {
206 struct saved_vpd vpd;
207 /*Arch-regs*/
208 int mp_state;
209 unsigned long vmm_rr;
210 /* TR and TC. */
211 struct thash_data itrs[NITRS];
212 struct thash_data dtrs[NDTRS];
213 /* Bit is set if there is a tr/tc for the region. */
214 unsigned char itr_regions;
215 unsigned char dtr_regions;
216 unsigned char tc_regions;
217
218 char irq_check;
219 unsigned long saved_itc;
220 unsigned long itc_check;
221 unsigned long timer_check;
222 unsigned long timer_pending;
223 unsigned long last_itc;
224
225 unsigned long vrr[8];
226 unsigned long ibr[8];
227 unsigned long dbr[8];
228 unsigned long insvc[4]; /* Interrupt in service. */
229 unsigned long xtp;
230
231 unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */
232 unsigned long metaphysical_rr4; /* from kvm_arch (so is pinned) */
233 unsigned long metaphysical_saved_rr0; /* from kvm_arch */
234 unsigned long metaphysical_saved_rr4; /* from kvm_arch */
235 unsigned long fp_psr; /*used for lazy float register */
236 unsigned long saved_gp;
237 /*for phycial emulation */
238
239 union context saved_guest;
240
241 unsigned long reserved[64]; /* for future use */
242};
243
244struct kvm_sregs {
245};
246
247struct kvm_fpu {
248};
249
250#define KVM_IA64_VCPU_STACK_SHIFT 16
251#define KVM_IA64_VCPU_STACK_SIZE (1UL << KVM_IA64_VCPU_STACK_SHIFT)
252
253struct kvm_ia64_vcpu_stack {
254 unsigned char stack[KVM_IA64_VCPU_STACK_SIZE];
255};
256
257struct kvm_debug_exit_arch {
258};
259
260/* for KVM_SET_GUEST_DEBUG */
261struct kvm_guest_debug_arch {
262};
263
264/* definition of registers in kvm_run */
265struct kvm_sync_regs {
266};
267
268#endif
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
deleted file mode 100644
index 3d50ea955c4c..000000000000
--- a/arch/ia64/kvm/Kconfig
+++ /dev/null
@@ -1,66 +0,0 @@
1#
2# KVM configuration
3#
4
5source "virt/kvm/Kconfig"
6
7menuconfig VIRTUALIZATION
8 bool "Virtualization"
9 depends on HAVE_KVM || IA64
10 default y
11 ---help---
12 Say Y here to get to see options for using your Linux host to run other
13 operating systems inside virtual machines (guests).
14 This option alone does not add any kernel code.
15
16 If you say N, all options in this submenu will be skipped and disabled.
17
18if VIRTUALIZATION
19
20config KVM
21 tristate "Kernel-based Virtual Machine (KVM) support"
22 depends on BROKEN
23 depends on HAVE_KVM && MODULES
24 depends on BROKEN
25 select PREEMPT_NOTIFIERS
26 select ANON_INODES
27 select HAVE_KVM_IRQCHIP
28 select HAVE_KVM_IRQFD
29 select HAVE_KVM_IRQ_ROUTING
30 select KVM_APIC_ARCHITECTURE
31 select KVM_MMIO
32 ---help---
33 Support hosting fully virtualized guest machines using hardware
34 virtualization extensions. You will need a fairly recent
35 processor equipped with virtualization extensions. You will also
36 need to select one or more of the processor modules below.
37
38 This module provides access to the hardware capabilities through
39 a character device node named /dev/kvm.
40
41 To compile this as a module, choose M here: the module
42 will be called kvm.
43
44 If unsure, say N.
45
46config KVM_INTEL
47 tristate "KVM for Intel Itanium 2 processors support"
48 depends on KVM && m
49 ---help---
50 Provides support for KVM on Itanium 2 processors equipped with the VT
51 extensions.
52
53config KVM_DEVICE_ASSIGNMENT
54 bool "KVM legacy PCI device assignment support"
55 depends on KVM && PCI && IOMMU_API
56 default y
57 ---help---
58 Provide support for legacy PCI device assignment through KVM. The
59 kernel now also supports a full featured userspace device driver
60 framework through VFIO, which supersedes much of this support.
61
62 If unsure, say Y.
63
64source drivers/vhost/Kconfig
65
66endif # VIRTUALIZATION
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
deleted file mode 100644
index 18e45ec49bbf..000000000000
--- a/arch/ia64/kvm/Makefile
+++ /dev/null
@@ -1,67 +0,0 @@
1#This Make file is to generate asm-offsets.h and build source.
2#
3
4#Generate asm-offsets.h for vmm module build
5offsets-file := asm-offsets.h
6
7always := $(offsets-file)
8targets := $(offsets-file)
9targets += arch/ia64/kvm/asm-offsets.s
10
11# Default sed regexp - multiline due to syntax constraints
12define sed-y
13 "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
14endef
15
16quiet_cmd_offsets = GEN $@
17define cmd_offsets
18 (set -e; \
19 echo "#ifndef __ASM_KVM_OFFSETS_H__"; \
20 echo "#define __ASM_KVM_OFFSETS_H__"; \
21 echo "/*"; \
22 echo " * DO NOT MODIFY."; \
23 echo " *"; \
24 echo " * This file was generated by Makefile"; \
25 echo " *"; \
26 echo " */"; \
27 echo ""; \
28 sed -ne $(sed-y) $<; \
29 echo ""; \
30 echo "#endif" ) > $@
31endef
32
33# We use internal rules to avoid the "is up to date" message from make
34arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c \
35 $(wildcard $(srctree)/arch/ia64/include/asm/*.h)\
36 $(wildcard $(srctree)/include/linux/*.h)
37 $(call if_changed_dep,cc_s_c)
38
39$(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s
40 $(call cmd,offsets)
41
42FORCE : $(obj)/$(offsets-file)
43
44#
45# Makefile for Kernel-based Virtual Machine module
46#
47
48ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
49asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
50KVM := ../../../virt/kvm
51
52common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \
53 $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o
54
55ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y)
56common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o
57endif
58
59kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
60obj-$(CONFIG_KVM) += kvm.o
61
62CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
63kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
64 vtlb.o process.o kvm_lib.o
65#Add link memcpy and memset to avoid possible structure assignment error
66kvm-intel-objs += memcpy.o memset.o
67obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
deleted file mode 100644
index 9324c875caf5..000000000000
--- a/arch/ia64/kvm/asm-offsets.c
+++ /dev/null
@@ -1,241 +0,0 @@
1/*
2 * asm-offsets.c Generate definitions needed by assembly language modules.
3 * This code generates raw asm output which is post-processed
4 * to extract and format the required data.
5 *
6 * Anthony Xu <anthony.xu@intel.com>
7 * Xiantao Zhang <xiantao.zhang@intel.com>
8 * Copyright (c) 2007 Intel Corporation KVM support.
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms and conditions of the GNU General Public License,
12 * version 2, as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * more details.
18 *
19 * You should have received a copy of the GNU General Public License along with
20 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
21 * Place - Suite 330, Boston, MA 02111-1307 USA.
22 *
23 */
24
25#include <linux/kvm_host.h>
26#include <linux/kbuild.h>
27
28#include "vcpu.h"
29
30void foo(void)
31{
32 DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
33 DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs));
34
35 BLANK();
36
37 DEFINE(VMM_VCPU_META_RR0_OFFSET,
38 offsetof(struct kvm_vcpu, arch.metaphysical_rr0));
39 DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
40 offsetof(struct kvm_vcpu,
41 arch.metaphysical_saved_rr0));
42 DEFINE(VMM_VCPU_VRR0_OFFSET,
43 offsetof(struct kvm_vcpu, arch.vrr[0]));
44 DEFINE(VMM_VPD_IRR0_OFFSET,
45 offsetof(struct vpd, irr[0]));
46 DEFINE(VMM_VCPU_ITC_CHECK_OFFSET,
47 offsetof(struct kvm_vcpu, arch.itc_check));
48 DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET,
49 offsetof(struct kvm_vcpu, arch.irq_check));
50 DEFINE(VMM_VPD_VHPI_OFFSET,
51 offsetof(struct vpd, vhpi));
52 DEFINE(VMM_VCPU_VSA_BASE_OFFSET,
53 offsetof(struct kvm_vcpu, arch.vsa_base));
54 DEFINE(VMM_VCPU_VPD_OFFSET,
55 offsetof(struct kvm_vcpu, arch.vpd));
56 DEFINE(VMM_VCPU_IRQ_CHECK,
57 offsetof(struct kvm_vcpu, arch.irq_check));
58 DEFINE(VMM_VCPU_TIMER_PENDING,
59 offsetof(struct kvm_vcpu, arch.timer_pending));
60 DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
61 offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0));
62 DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
63 offsetof(struct kvm_vcpu, arch.mode_flags));
64 DEFINE(VMM_VCPU_ITC_OFS_OFFSET,
65 offsetof(struct kvm_vcpu, arch.itc_offset));
66 DEFINE(VMM_VCPU_LAST_ITC_OFFSET,
67 offsetof(struct kvm_vcpu, arch.last_itc));
68 DEFINE(VMM_VCPU_SAVED_GP_OFFSET,
69 offsetof(struct kvm_vcpu, arch.saved_gp));
70
71 BLANK();
72
73 DEFINE(VMM_PT_REGS_B6_OFFSET,
74 offsetof(struct kvm_pt_regs, b6));
75 DEFINE(VMM_PT_REGS_B7_OFFSET,
76 offsetof(struct kvm_pt_regs, b7));
77 DEFINE(VMM_PT_REGS_AR_CSD_OFFSET,
78 offsetof(struct kvm_pt_regs, ar_csd));
79 DEFINE(VMM_PT_REGS_AR_SSD_OFFSET,
80 offsetof(struct kvm_pt_regs, ar_ssd));
81 DEFINE(VMM_PT_REGS_R8_OFFSET,
82 offsetof(struct kvm_pt_regs, r8));
83 DEFINE(VMM_PT_REGS_R9_OFFSET,
84 offsetof(struct kvm_pt_regs, r9));
85 DEFINE(VMM_PT_REGS_R10_OFFSET,
86 offsetof(struct kvm_pt_regs, r10));
87 DEFINE(VMM_PT_REGS_R11_OFFSET,
88 offsetof(struct kvm_pt_regs, r11));
89 DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET,
90 offsetof(struct kvm_pt_regs, cr_ipsr));
91 DEFINE(VMM_PT_REGS_CR_IIP_OFFSET,
92 offsetof(struct kvm_pt_regs, cr_iip));
93 DEFINE(VMM_PT_REGS_CR_IFS_OFFSET,
94 offsetof(struct kvm_pt_regs, cr_ifs));
95 DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET,
96 offsetof(struct kvm_pt_regs, ar_unat));
97 DEFINE(VMM_PT_REGS_AR_PFS_OFFSET,
98 offsetof(struct kvm_pt_regs, ar_pfs));
99 DEFINE(VMM_PT_REGS_AR_RSC_OFFSET,
100 offsetof(struct kvm_pt_regs, ar_rsc));
101 DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET,
102 offsetof(struct kvm_pt_regs, ar_rnat));
103
104 DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET,
105 offsetof(struct kvm_pt_regs, ar_bspstore));
106 DEFINE(VMM_PT_REGS_PR_OFFSET,
107 offsetof(struct kvm_pt_regs, pr));
108 DEFINE(VMM_PT_REGS_B0_OFFSET,
109 offsetof(struct kvm_pt_regs, b0));
110 DEFINE(VMM_PT_REGS_LOADRS_OFFSET,
111 offsetof(struct kvm_pt_regs, loadrs));
112 DEFINE(VMM_PT_REGS_R1_OFFSET,
113 offsetof(struct kvm_pt_regs, r1));
114 DEFINE(VMM_PT_REGS_R12_OFFSET,
115 offsetof(struct kvm_pt_regs, r12));
116 DEFINE(VMM_PT_REGS_R13_OFFSET,
117 offsetof(struct kvm_pt_regs, r13));
118 DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET,
119 offsetof(struct kvm_pt_regs, ar_fpsr));
120 DEFINE(VMM_PT_REGS_R15_OFFSET,
121 offsetof(struct kvm_pt_regs, r15));
122 DEFINE(VMM_PT_REGS_R14_OFFSET,
123 offsetof(struct kvm_pt_regs, r14));
124 DEFINE(VMM_PT_REGS_R2_OFFSET,
125 offsetof(struct kvm_pt_regs, r2));
126 DEFINE(VMM_PT_REGS_R3_OFFSET,
127 offsetof(struct kvm_pt_regs, r3));
128 DEFINE(VMM_PT_REGS_R16_OFFSET,
129 offsetof(struct kvm_pt_regs, r16));
130 DEFINE(VMM_PT_REGS_R17_OFFSET,
131 offsetof(struct kvm_pt_regs, r17));
132 DEFINE(VMM_PT_REGS_R18_OFFSET,
133 offsetof(struct kvm_pt_regs, r18));
134 DEFINE(VMM_PT_REGS_R19_OFFSET,
135 offsetof(struct kvm_pt_regs, r19));
136 DEFINE(VMM_PT_REGS_R20_OFFSET,
137 offsetof(struct kvm_pt_regs, r20));
138 DEFINE(VMM_PT_REGS_R21_OFFSET,
139 offsetof(struct kvm_pt_regs, r21));
140 DEFINE(VMM_PT_REGS_R22_OFFSET,
141 offsetof(struct kvm_pt_regs, r22));
142 DEFINE(VMM_PT_REGS_R23_OFFSET,
143 offsetof(struct kvm_pt_regs, r23));
144 DEFINE(VMM_PT_REGS_R24_OFFSET,
145 offsetof(struct kvm_pt_regs, r24));
146 DEFINE(VMM_PT_REGS_R25_OFFSET,
147 offsetof(struct kvm_pt_regs, r25));
148 DEFINE(VMM_PT_REGS_R26_OFFSET,
149 offsetof(struct kvm_pt_regs, r26));
150 DEFINE(VMM_PT_REGS_R27_OFFSET,
151 offsetof(struct kvm_pt_regs, r27));
152 DEFINE(VMM_PT_REGS_R28_OFFSET,
153 offsetof(struct kvm_pt_regs, r28));
154 DEFINE(VMM_PT_REGS_R29_OFFSET,
155 offsetof(struct kvm_pt_regs, r29));
156 DEFINE(VMM_PT_REGS_R30_OFFSET,
157 offsetof(struct kvm_pt_regs, r30));
158 DEFINE(VMM_PT_REGS_R31_OFFSET,
159 offsetof(struct kvm_pt_regs, r31));
160 DEFINE(VMM_PT_REGS_AR_CCV_OFFSET,
161 offsetof(struct kvm_pt_regs, ar_ccv));
162 DEFINE(VMM_PT_REGS_F6_OFFSET,
163 offsetof(struct kvm_pt_regs, f6));
164 DEFINE(VMM_PT_REGS_F7_OFFSET,
165 offsetof(struct kvm_pt_regs, f7));
166 DEFINE(VMM_PT_REGS_F8_OFFSET,
167 offsetof(struct kvm_pt_regs, f8));
168 DEFINE(VMM_PT_REGS_F9_OFFSET,
169 offsetof(struct kvm_pt_regs, f9));
170 DEFINE(VMM_PT_REGS_F10_OFFSET,
171 offsetof(struct kvm_pt_regs, f10));
172 DEFINE(VMM_PT_REGS_F11_OFFSET,
173 offsetof(struct kvm_pt_regs, f11));
174 DEFINE(VMM_PT_REGS_R4_OFFSET,
175 offsetof(struct kvm_pt_regs, r4));
176 DEFINE(VMM_PT_REGS_R5_OFFSET,
177 offsetof(struct kvm_pt_regs, r5));
178 DEFINE(VMM_PT_REGS_R6_OFFSET,
179 offsetof(struct kvm_pt_regs, r6));
180 DEFINE(VMM_PT_REGS_R7_OFFSET,
181 offsetof(struct kvm_pt_regs, r7));
182 DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET,
183 offsetof(struct kvm_pt_regs, eml_unat));
184 DEFINE(VMM_VCPU_IIPA_OFFSET,
185 offsetof(struct kvm_vcpu, arch.cr_iipa));
186 DEFINE(VMM_VCPU_OPCODE_OFFSET,
187 offsetof(struct kvm_vcpu, arch.opcode));
188 DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause));
189 DEFINE(VMM_VCPU_ISR_OFFSET,
190 offsetof(struct kvm_vcpu, arch.cr_isr));
191 DEFINE(VMM_PT_REGS_R16_SLOT,
192 (((offsetof(struct kvm_pt_regs, r16)
193 - sizeof(struct kvm_pt_regs)) >> 3) & 0x3f));
194 DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
195 offsetof(struct kvm_vcpu, arch.mode_flags));
196 DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp));
197 BLANK();
198
199 DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd));
200 DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs));
201 DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET,
202 offsetof(struct kvm_vcpu, arch.insvc[0]));
203 DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta));
204 DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr));
205
206 DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4]));
207 DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5]));
208 DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12]));
209 DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13]));
210 DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0]));
211 DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1]));
212 DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0]));
213 DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1]));
214 DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2]));
215 DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0]));
216 DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16]));
217 DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18]));
218 DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19]));
219 DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21]));
220 DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24]));
221 DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27]));
222 DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28]));
223 DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29]));
224 DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30]));
225 DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36]));
226 DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40]));
227 DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64]));
228 DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65]));
229 DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0]));
230 DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2]));
231 DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8]));
232 DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0]));
233 DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0]));
234 DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2]));
235 DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3]));
236 DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32]));
237 DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33]));
238 DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0]));
239 DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr));
240 BLANK();
241}
diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h
deleted file mode 100644
index c0785a728271..000000000000
--- a/arch/ia64/kvm/irq.h
+++ /dev/null
@@ -1,33 +0,0 @@
1/*
2 * irq.h: In-kernel interrupt controller related definitions
3 * Copyright (c) 2008, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 * Authors:
19 * Xiantao Zhang <xiantao.zhang@intel.com>
20 *
21 */
22
23#ifndef __IRQ_H
24#define __IRQ_H
25
26#include "lapic.h"
27
28static inline int irqchip_in_kernel(struct kvm *kvm)
29{
30 return 1;
31}
32
33#endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
deleted file mode 100644
index dbe46f43884d..000000000000
--- a/arch/ia64/kvm/kvm-ia64.c
+++ /dev/null
@@ -1,1942 +0,0 @@
1/*
2 * kvm_ia64.c: Basic KVM support On Itanium series processors
3 *
4 *
5 * Copyright (C) 2007, Intel Corporation.
6 * Xiantao Zhang (xiantao.zhang@intel.com)
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 * Place - Suite 330, Boston, MA 02111-1307 USA.
20 *
21 */
22
23#include <linux/module.h>
24#include <linux/errno.h>
25#include <linux/percpu.h>
26#include <linux/fs.h>
27#include <linux/slab.h>
28#include <linux/smp.h>
29#include <linux/kvm_host.h>
30#include <linux/kvm.h>
31#include <linux/bitops.h>
32#include <linux/hrtimer.h>
33#include <linux/uaccess.h>
34#include <linux/iommu.h>
35#include <linux/intel-iommu.h>
36#include <linux/pci.h>
37
38#include <asm/pgtable.h>
39#include <asm/gcc_intrin.h>
40#include <asm/pal.h>
41#include <asm/cacheflush.h>
42#include <asm/div64.h>
43#include <asm/tlb.h>
44#include <asm/elf.h>
45#include <asm/sn/addrs.h>
46#include <asm/sn/clksupport.h>
47#include <asm/sn/shub_mmr.h>
48
49#include "misc.h"
50#include "vti.h"
51#include "iodev.h"
52#include "ioapic.h"
53#include "lapic.h"
54#include "irq.h"
55
56static unsigned long kvm_vmm_base;
57static unsigned long kvm_vsa_base;
58static unsigned long kvm_vm_buffer;
59static unsigned long kvm_vm_buffer_size;
60unsigned long kvm_vmm_gp;
61
62static long vp_env_info;
63
64static struct kvm_vmm_info *kvm_vmm_info;
65
66static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu);
67
68struct kvm_stats_debugfs_item debugfs_entries[] = {
69 { NULL }
70};
71
72static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu)
73{
74#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
75 if (vcpu->kvm->arch.is_sn2)
76 return rtc_time();
77 else
78#endif
79 return ia64_getreg(_IA64_REG_AR_ITC);
80}
81
82static void kvm_flush_icache(unsigned long start, unsigned long len)
83{
84 int l;
85
86 for (l = 0; l < (len + 32); l += 32)
87 ia64_fc((void *)(start + l));
88
89 ia64_sync_i();
90 ia64_srlz_i();
91}
92
93static void kvm_flush_tlb_all(void)
94{
95 unsigned long i, j, count0, count1, stride0, stride1, addr;
96 long flags;
97
98 addr = local_cpu_data->ptce_base;
99 count0 = local_cpu_data->ptce_count[0];
100 count1 = local_cpu_data->ptce_count[1];
101 stride0 = local_cpu_data->ptce_stride[0];
102 stride1 = local_cpu_data->ptce_stride[1];
103
104 local_irq_save(flags);
105 for (i = 0; i < count0; ++i) {
106 for (j = 0; j < count1; ++j) {
107 ia64_ptce(addr);
108 addr += stride1;
109 }
110 addr += stride0;
111 }
112 local_irq_restore(flags);
113 ia64_srlz_i(); /* srlz.i implies srlz.d */
114}
115
116long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
117{
118 struct ia64_pal_retval iprv;
119
120 PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva,
121 (u64)opt_handler);
122
123 return iprv.status;
124}
125
126static DEFINE_SPINLOCK(vp_lock);
127
128int kvm_arch_hardware_enable(void)
129{
130 long status;
131 long tmp_base;
132 unsigned long pte;
133 unsigned long saved_psr;
134 int slot;
135
136 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
137 local_irq_save(saved_psr);
138 slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
139 local_irq_restore(saved_psr);
140 if (slot < 0)
141 return -EINVAL;
142
143 spin_lock(&vp_lock);
144 status = ia64_pal_vp_init_env(kvm_vsa_base ?
145 VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
146 __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
147 if (status != 0) {
148 spin_unlock(&vp_lock);
149 printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
150 return -EINVAL;
151 }
152
153 if (!kvm_vsa_base) {
154 kvm_vsa_base = tmp_base;
155 printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base);
156 }
157 spin_unlock(&vp_lock);
158 ia64_ptr_entry(0x3, slot);
159
160 return 0;
161}
162
163void kvm_arch_hardware_disable(void)
164{
165
166 long status;
167 int slot;
168 unsigned long pte;
169 unsigned long saved_psr;
170 unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA);
171
172 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
173 PAGE_KERNEL));
174
175 local_irq_save(saved_psr);
176 slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
177 local_irq_restore(saved_psr);
178 if (slot < 0)
179 return;
180
181 status = ia64_pal_vp_exit_env(host_iva);
182 if (status)
183 printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n",
184 status);
185 ia64_ptr_entry(0x3, slot);
186}
187
188void kvm_arch_check_processor_compat(void *rtn)
189{
190 *(int *)rtn = 0;
191}
192
193int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
194{
195
196 int r;
197
198 switch (ext) {
199 case KVM_CAP_IRQCHIP:
200 case KVM_CAP_MP_STATE:
201 case KVM_CAP_IRQ_INJECT_STATUS:
202 case KVM_CAP_IOAPIC_POLARITY_IGNORED:
203 r = 1;
204 break;
205 case KVM_CAP_COALESCED_MMIO:
206 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
207 break;
208#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
209 case KVM_CAP_IOMMU:
210 r = iommu_present(&pci_bus_type);
211 break;
212#endif
213 default:
214 r = 0;
215 }
216 return r;
217
218}
219
220static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
221{
222 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
223 kvm_run->hw.hardware_exit_reason = 1;
224 return 0;
225}
226
227static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
228{
229 struct kvm_mmio_req *p;
230 struct kvm_io_device *mmio_dev;
231 int r;
232
233 p = kvm_get_vcpu_ioreq(vcpu);
234
235 if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
236 goto mmio;
237 vcpu->mmio_needed = 1;
238 vcpu->mmio_fragments[0].gpa = kvm_run->mmio.phys_addr = p->addr;
239 vcpu->mmio_fragments[0].len = kvm_run->mmio.len = p->size;
240 vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
241
242 if (vcpu->mmio_is_write)
243 memcpy(vcpu->arch.mmio_data, &p->data, p->size);
244 memcpy(kvm_run->mmio.data, &p->data, p->size);
245 kvm_run->exit_reason = KVM_EXIT_MMIO;
246 return 0;
247mmio:
248 if (p->dir)
249 r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
250 p->size, &p->data);
251 else
252 r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
253 p->size, &p->data);
254 if (r)
255 printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
256 p->state = STATE_IORESP_READY;
257
258 return 1;
259}
260
261static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
262{
263 struct exit_ctl_data *p;
264
265 p = kvm_get_exit_data(vcpu);
266
267 if (p->exit_reason == EXIT_REASON_PAL_CALL)
268 return kvm_pal_emul(vcpu, kvm_run);
269 else {
270 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
271 kvm_run->hw.hardware_exit_reason = 2;
272 return 0;
273 }
274}
275
276static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
277{
278 struct exit_ctl_data *p;
279
280 p = kvm_get_exit_data(vcpu);
281
282 if (p->exit_reason == EXIT_REASON_SAL_CALL) {
283 kvm_sal_emul(vcpu);
284 return 1;
285 } else {
286 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
287 kvm_run->hw.hardware_exit_reason = 3;
288 return 0;
289 }
290
291}
292
293static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector)
294{
295 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
296
297 if (!test_and_set_bit(vector, &vpd->irr[0])) {
298 vcpu->arch.irq_new_pending = 1;
299 kvm_vcpu_kick(vcpu);
300 return 1;
301 }
302 return 0;
303}
304
305/*
306 * offset: address offset to IPI space.
307 * value: deliver value.
308 */
309static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
310 uint64_t vector)
311{
312 switch (dm) {
313 case SAPIC_FIXED:
314 break;
315 case SAPIC_NMI:
316 vector = 2;
317 break;
318 case SAPIC_EXTINT:
319 vector = 0;
320 break;
321 case SAPIC_INIT:
322 case SAPIC_PMI:
323 default:
324 printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
325 return;
326 }
327 __apic_accept_irq(vcpu, vector);
328}
329
330static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
331 unsigned long eid)
332{
333 union ia64_lid lid;
334 int i;
335 struct kvm_vcpu *vcpu;
336
337 kvm_for_each_vcpu(i, vcpu, kvm) {
338 lid.val = VCPU_LID(vcpu);
339 if (lid.id == id && lid.eid == eid)
340 return vcpu;
341 }
342
343 return NULL;
344}
345
346static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
347{
348 struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
349 struct kvm_vcpu *target_vcpu;
350 struct kvm_pt_regs *regs;
351 union ia64_ipi_a addr = p->u.ipi_data.addr;
352 union ia64_ipi_d data = p->u.ipi_data.data;
353
354 target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid);
355 if (!target_vcpu)
356 return handle_vm_error(vcpu, kvm_run);
357
358 if (!target_vcpu->arch.launched) {
359 regs = vcpu_regs(target_vcpu);
360
361 regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip;
362 regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp;
363
364 target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
365 if (waitqueue_active(&target_vcpu->wq))
366 wake_up_interruptible(&target_vcpu->wq);
367 } else {
368 vcpu_deliver_ipi(target_vcpu, data.dm, data.vector);
369 if (target_vcpu != vcpu)
370 kvm_vcpu_kick(target_vcpu);
371 }
372
373 return 1;
374}
375
376struct call_data {
377 struct kvm_ptc_g ptc_g_data;
378 struct kvm_vcpu *vcpu;
379};
380
381static void vcpu_global_purge(void *info)
382{
383 struct call_data *p = (struct call_data *)info;
384 struct kvm_vcpu *vcpu = p->vcpu;
385
386 if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
387 return;
388
389 set_bit(KVM_REQ_PTC_G, &vcpu->requests);
390 if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) {
391 vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] =
392 p->ptc_g_data;
393 } else {
394 clear_bit(KVM_REQ_PTC_G, &vcpu->requests);
395 vcpu->arch.ptc_g_count = 0;
396 set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
397 }
398}
399
400static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
401{
402 struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
403 struct kvm *kvm = vcpu->kvm;
404 struct call_data call_data;
405 int i;
406 struct kvm_vcpu *vcpui;
407
408 call_data.ptc_g_data = p->u.ptc_g_data;
409
410 kvm_for_each_vcpu(i, vcpui, kvm) {
411 if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED ||
412 vcpu == vcpui)
413 continue;
414
415 if (waitqueue_active(&vcpui->wq))
416 wake_up_interruptible(&vcpui->wq);
417
418 if (vcpui->cpu != -1) {
419 call_data.vcpu = vcpui;
420 smp_call_function_single(vcpui->cpu,
421 vcpu_global_purge, &call_data, 1);
422 } else
423 printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
424
425 }
426 return 1;
427}
428
429static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
430{
431 return 1;
432}
433
434static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu)
435{
436 unsigned long pte, rtc_phys_addr, map_addr;
437 int slot;
438
439 map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT);
440 rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC;
441 pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC));
442 slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT);
443 vcpu->arch.sn_rtc_tr_slot = slot;
444 if (slot < 0) {
445 printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n");
446 slot = 0;
447 }
448 return slot;
449}
450
451int kvm_emulate_halt(struct kvm_vcpu *vcpu)
452{
453
454 ktime_t kt;
455 long itc_diff;
456 unsigned long vcpu_now_itc;
457 unsigned long expires;
458 struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
459 unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec;
460 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
461
462 if (irqchip_in_kernel(vcpu->kvm)) {
463
464 vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset;
465
466 if (time_after(vcpu_now_itc, vpd->itm)) {
467 vcpu->arch.timer_check = 1;
468 return 1;
469 }
470 itc_diff = vpd->itm - vcpu_now_itc;
471 if (itc_diff < 0)
472 itc_diff = -itc_diff;
473
474 expires = div64_u64(itc_diff, cyc_per_usec);
475 kt = ktime_set(0, 1000 * expires);
476
477 vcpu->arch.ht_active = 1;
478 hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
479
480 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
481 kvm_vcpu_block(vcpu);
482 hrtimer_cancel(p_ht);
483 vcpu->arch.ht_active = 0;
484
485 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) ||
486 kvm_cpu_has_pending_timer(vcpu))
487 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
488 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
489
490 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
491 return -EINTR;
492 return 1;
493 } else {
494 printk(KERN_ERR"kvm: Unsupported userspace halt!");
495 return 0;
496 }
497}
498
499static int handle_vm_shutdown(struct kvm_vcpu *vcpu,
500 struct kvm_run *kvm_run)
501{
502 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
503 return 0;
504}
505
506static int handle_external_interrupt(struct kvm_vcpu *vcpu,
507 struct kvm_run *kvm_run)
508{
509 return 1;
510}
511
512static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
513 struct kvm_run *kvm_run)
514{
515 printk("VMM: %s", vcpu->arch.log_buf);
516 return 1;
517}
518
519static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
520 struct kvm_run *kvm_run) = {
521 [EXIT_REASON_VM_PANIC] = handle_vm_error,
522 [EXIT_REASON_MMIO_INSTRUCTION] = handle_mmio,
523 [EXIT_REASON_PAL_CALL] = handle_pal_call,
524 [EXIT_REASON_SAL_CALL] = handle_sal_call,
525 [EXIT_REASON_SWITCH_RR6] = handle_switch_rr6,
526 [EXIT_REASON_VM_DESTROY] = handle_vm_shutdown,
527 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
528 [EXIT_REASON_IPI] = handle_ipi,
529 [EXIT_REASON_PTC_G] = handle_global_purge,
530 [EXIT_REASON_DEBUG] = handle_vcpu_debug,
531
532};
533
534static const int kvm_vti_max_exit_handlers =
535 sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers);
536
537static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu)
538{
539 struct exit_ctl_data *p_exit_data;
540
541 p_exit_data = kvm_get_exit_data(vcpu);
542 return p_exit_data->exit_reason;
543}
544
545/*
546 * The guest has exited. See if we can fix it or if we need userspace
547 * assistance.
548 */
549static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
550{
551 u32 exit_reason = kvm_get_exit_reason(vcpu);
552 vcpu->arch.last_exit = exit_reason;
553
554 if (exit_reason < kvm_vti_max_exit_handlers
555 && kvm_vti_exit_handlers[exit_reason])
556 return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run);
557 else {
558 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
559 kvm_run->hw.hardware_exit_reason = exit_reason;
560 }
561 return 0;
562}
563
564static inline void vti_set_rr6(unsigned long rr6)
565{
566 ia64_set_rr(RR6, rr6);
567 ia64_srlz_i();
568}
569
570static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
571{
572 unsigned long pte;
573 struct kvm *kvm = vcpu->kvm;
574 int r;
575
576 /*Insert a pair of tr to map vmm*/
577 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
578 r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
579 if (r < 0)
580 goto out;
581 vcpu->arch.vmm_tr_slot = r;
582 /*Insert a pairt of tr to map data of vm*/
583 pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL));
584 r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE,
585 pte, KVM_VM_DATA_SHIFT);
586 if (r < 0)
587 goto out;
588 vcpu->arch.vm_tr_slot = r;
589
590#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
591 if (kvm->arch.is_sn2) {
592 r = kvm_sn2_setup_mappings(vcpu);
593 if (r < 0)
594 goto out;
595 }
596#endif
597
598 r = 0;
599out:
600 return r;
601}
602
603static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
604{
605 struct kvm *kvm = vcpu->kvm;
606 ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
607 ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
608#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
609 if (kvm->arch.is_sn2)
610 ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot);
611#endif
612}
613
614static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
615{
616 unsigned long psr;
617 int r;
618 int cpu = smp_processor_id();
619
620 if (vcpu->arch.last_run_cpu != cpu ||
621 per_cpu(last_vcpu, cpu) != vcpu) {
622 per_cpu(last_vcpu, cpu) = vcpu;
623 vcpu->arch.last_run_cpu = cpu;
624 kvm_flush_tlb_all();
625 }
626
627 vcpu->arch.host_rr6 = ia64_get_rr(RR6);
628 vti_set_rr6(vcpu->arch.vmm_rr);
629 local_irq_save(psr);
630 r = kvm_insert_vmm_mapping(vcpu);
631 local_irq_restore(psr);
632 return r;
633}
634
635static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
636{
637 kvm_purge_vmm_mapping(vcpu);
638 vti_set_rr6(vcpu->arch.host_rr6);
639}
640
641static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
642{
643 union context *host_ctx, *guest_ctx;
644 int r, idx;
645
646 idx = srcu_read_lock(&vcpu->kvm->srcu);
647
648again:
649 if (signal_pending(current)) {
650 r = -EINTR;
651 kvm_run->exit_reason = KVM_EXIT_INTR;
652 goto out;
653 }
654
655 preempt_disable();
656 local_irq_disable();
657
658 /*Get host and guest context with guest address space.*/
659 host_ctx = kvm_get_host_context(vcpu);
660 guest_ctx = kvm_get_guest_context(vcpu);
661
662 clear_bit(KVM_REQ_KICK, &vcpu->requests);
663
664 r = kvm_vcpu_pre_transition(vcpu);
665 if (r < 0)
666 goto vcpu_run_fail;
667
668 srcu_read_unlock(&vcpu->kvm->srcu, idx);
669 vcpu->mode = IN_GUEST_MODE;
670 kvm_guest_enter();
671
672 /*
673 * Transition to the guest
674 */
675 kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
676
677 kvm_vcpu_post_transition(vcpu);
678
679 vcpu->arch.launched = 1;
680 set_bit(KVM_REQ_KICK, &vcpu->requests);
681 local_irq_enable();
682
683 /*
684 * We must have an instruction between local_irq_enable() and
685 * kvm_guest_exit(), so the timer interrupt isn't delayed by
686 * the interrupt shadow. The stat.exits increment will do nicely.
687 * But we need to prevent reordering, hence this barrier():
688 */
689 barrier();
690 kvm_guest_exit();
691 vcpu->mode = OUTSIDE_GUEST_MODE;
692 preempt_enable();
693
694 idx = srcu_read_lock(&vcpu->kvm->srcu);
695
696 r = kvm_handle_exit(kvm_run, vcpu);
697
698 if (r > 0) {
699 if (!need_resched())
700 goto again;
701 }
702
703out:
704 srcu_read_unlock(&vcpu->kvm->srcu, idx);
705 if (r > 0) {
706 cond_resched();
707 idx = srcu_read_lock(&vcpu->kvm->srcu);
708 goto again;
709 }
710
711 return r;
712
713vcpu_run_fail:
714 local_irq_enable();
715 preempt_enable();
716 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
717 goto out;
718}
719
720static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
721{
722 struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
723
724 if (!vcpu->mmio_is_write)
725 memcpy(&p->data, vcpu->arch.mmio_data, 8);
726 p->state = STATE_IORESP_READY;
727}
728
729int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
730{
731 int r;
732 sigset_t sigsaved;
733
734 if (vcpu->sigset_active)
735 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
736
737 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
738 kvm_vcpu_block(vcpu);
739 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
740 r = -EAGAIN;
741 goto out;
742 }
743
744 if (vcpu->mmio_needed) {
745 memcpy(vcpu->arch.mmio_data, kvm_run->mmio.data, 8);
746 kvm_set_mmio_data(vcpu);
747 vcpu->mmio_read_completed = 1;
748 vcpu->mmio_needed = 0;
749 }
750 r = __vcpu_run(vcpu, kvm_run);
751out:
752 if (vcpu->sigset_active)
753 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
754
755 return r;
756}
757
758struct kvm *kvm_arch_alloc_vm(void)
759{
760
761 struct kvm *kvm;
762 uint64_t vm_base;
763
764 BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
765
766 vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
767
768 if (!vm_base)
769 return NULL;
770
771 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
772 kvm = (struct kvm *)(vm_base +
773 offsetof(struct kvm_vm_data, kvm_vm_struct));
774 kvm->arch.vm_base = vm_base;
775 printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
776
777 return kvm;
778}
779
780struct kvm_ia64_io_range {
781 unsigned long start;
782 unsigned long size;
783 unsigned long type;
784};
785
786static const struct kvm_ia64_io_range io_ranges[] = {
787 {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER},
788 {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO},
789 {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO},
790 {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC},
791 {PIB_START, PIB_SIZE, GPFN_PIB},
792};
793
794static void kvm_build_io_pmt(struct kvm *kvm)
795{
796 unsigned long i, j;
797
798 /* Mark I/O ranges */
799 for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range));
800 i++) {
801 for (j = io_ranges[i].start;
802 j < io_ranges[i].start + io_ranges[i].size;
803 j += PAGE_SIZE)
804 kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT,
805 io_ranges[i].type, 0);
806 }
807
808}
809
810/*Use unused rids to virtualize guest rid.*/
811#define GUEST_PHYSICAL_RR0 0x1739
812#define GUEST_PHYSICAL_RR4 0x2739
813#define VMM_INIT_RR 0x1660
814
815int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
816{
817 BUG_ON(!kvm);
818
819 if (type)
820 return -EINVAL;
821
822 kvm->arch.is_sn2 = ia64_platform_is("sn2");
823
824 kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
825 kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
826 kvm->arch.vmm_init_rr = VMM_INIT_RR;
827
828 /*
829 *Fill P2M entries for MMIO/IO ranges
830 */
831 kvm_build_io_pmt(kvm);
832
833 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
834
835 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
836 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
837
838 return 0;
839}
840
841static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
842 struct kvm_irqchip *chip)
843{
844 int r;
845
846 r = 0;
847 switch (chip->chip_id) {
848 case KVM_IRQCHIP_IOAPIC:
849 r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
850 break;
851 default:
852 r = -EINVAL;
853 break;
854 }
855 return r;
856}
857
858static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
859{
860 int r;
861
862 r = 0;
863 switch (chip->chip_id) {
864 case KVM_IRQCHIP_IOAPIC:
865 r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
866 break;
867 default:
868 r = -EINVAL;
869 break;
870 }
871 return r;
872}
873
874#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x
875
876int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
877{
878 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
879 int i;
880
881 for (i = 0; i < 16; i++) {
882 vpd->vgr[i] = regs->vpd.vgr[i];
883 vpd->vbgr[i] = regs->vpd.vbgr[i];
884 }
885 for (i = 0; i < 128; i++)
886 vpd->vcr[i] = regs->vpd.vcr[i];
887 vpd->vhpi = regs->vpd.vhpi;
888 vpd->vnat = regs->vpd.vnat;
889 vpd->vbnat = regs->vpd.vbnat;
890 vpd->vpsr = regs->vpd.vpsr;
891
892 vpd->vpr = regs->vpd.vpr;
893
894 memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
895
896 RESTORE_REGS(mp_state);
897 RESTORE_REGS(vmm_rr);
898 memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS);
899 memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS);
900 RESTORE_REGS(itr_regions);
901 RESTORE_REGS(dtr_regions);
902 RESTORE_REGS(tc_regions);
903 RESTORE_REGS(irq_check);
904 RESTORE_REGS(itc_check);
905 RESTORE_REGS(timer_check);
906 RESTORE_REGS(timer_pending);
907 RESTORE_REGS(last_itc);
908 for (i = 0; i < 8; i++) {
909 vcpu->arch.vrr[i] = regs->vrr[i];
910 vcpu->arch.ibr[i] = regs->ibr[i];
911 vcpu->arch.dbr[i] = regs->dbr[i];
912 }
913 for (i = 0; i < 4; i++)
914 vcpu->arch.insvc[i] = regs->insvc[i];
915 RESTORE_REGS(xtp);
916 RESTORE_REGS(metaphysical_rr0);
917 RESTORE_REGS(metaphysical_rr4);
918 RESTORE_REGS(metaphysical_saved_rr0);
919 RESTORE_REGS(metaphysical_saved_rr4);
920 RESTORE_REGS(fp_psr);
921 RESTORE_REGS(saved_gp);
922
923 vcpu->arch.irq_new_pending = 1;
924 vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu);
925 set_bit(KVM_REQ_RESUME, &vcpu->requests);
926
927 return 0;
928}
929
930int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
931 bool line_status)
932{
933 if (!irqchip_in_kernel(kvm))
934 return -ENXIO;
935
936 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
937 irq_event->irq, irq_event->level,
938 line_status);
939 return 0;
940}
941
942long kvm_arch_vm_ioctl(struct file *filp,
943 unsigned int ioctl, unsigned long arg)
944{
945 struct kvm *kvm = filp->private_data;
946 void __user *argp = (void __user *)arg;
947 int r = -ENOTTY;
948
949 switch (ioctl) {
950 case KVM_CREATE_IRQCHIP:
951 r = -EFAULT;
952 r = kvm_ioapic_init(kvm);
953 if (r)
954 goto out;
955 r = kvm_setup_default_irq_routing(kvm);
956 if (r) {
957 mutex_lock(&kvm->slots_lock);
958 kvm_ioapic_destroy(kvm);
959 mutex_unlock(&kvm->slots_lock);
960 goto out;
961 }
962 break;
963 case KVM_GET_IRQCHIP: {
964 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
965 struct kvm_irqchip chip;
966
967 r = -EFAULT;
968 if (copy_from_user(&chip, argp, sizeof chip))
969 goto out;
970 r = -ENXIO;
971 if (!irqchip_in_kernel(kvm))
972 goto out;
973 r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
974 if (r)
975 goto out;
976 r = -EFAULT;
977 if (copy_to_user(argp, &chip, sizeof chip))
978 goto out;
979 r = 0;
980 break;
981 }
982 case KVM_SET_IRQCHIP: {
983 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
984 struct kvm_irqchip chip;
985
986 r = -EFAULT;
987 if (copy_from_user(&chip, argp, sizeof chip))
988 goto out;
989 r = -ENXIO;
990 if (!irqchip_in_kernel(kvm))
991 goto out;
992 r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
993 if (r)
994 goto out;
995 r = 0;
996 break;
997 }
998 default:
999 ;
1000 }
1001out:
1002 return r;
1003}
1004
1005int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1006 struct kvm_sregs *sregs)
1007{
1008 return -EINVAL;
1009}
1010
1011int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1012 struct kvm_sregs *sregs)
1013{
1014 return -EINVAL;
1015
1016}
1017int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1018 struct kvm_translation *tr)
1019{
1020
1021 return -EINVAL;
1022}
1023
1024static int kvm_alloc_vmm_area(void)
1025{
1026 if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) {
1027 kvm_vmm_base = __get_free_pages(GFP_KERNEL,
1028 get_order(KVM_VMM_SIZE));
1029 if (!kvm_vmm_base)
1030 return -ENOMEM;
1031
1032 memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
1033 kvm_vm_buffer = kvm_vmm_base + VMM_SIZE;
1034
1035 printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n",
1036 kvm_vmm_base, kvm_vm_buffer);
1037 }
1038
1039 return 0;
1040}
1041
1042static void kvm_free_vmm_area(void)
1043{
1044 if (kvm_vmm_base) {
1045 /*Zero this area before free to avoid bits leak!!*/
1046 memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
1047 free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE));
1048 kvm_vmm_base = 0;
1049 kvm_vm_buffer = 0;
1050 kvm_vsa_base = 0;
1051 }
1052}
1053
1054static int vti_init_vpd(struct kvm_vcpu *vcpu)
1055{
1056 int i;
1057 union cpuid3_t cpuid3;
1058 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1059
1060 if (IS_ERR(vpd))
1061 return PTR_ERR(vpd);
1062
1063 /* CPUID init */
1064 for (i = 0; i < 5; i++)
1065 vpd->vcpuid[i] = ia64_get_cpuid(i);
1066
1067 /* Limit the CPUID number to 5 */
1068 cpuid3.value = vpd->vcpuid[3];
1069 cpuid3.number = 4; /* 5 - 1 */
1070 vpd->vcpuid[3] = cpuid3.value;
1071
1072 /*Set vac and vdc fields*/
1073 vpd->vac.a_from_int_cr = 1;
1074 vpd->vac.a_to_int_cr = 1;
1075 vpd->vac.a_from_psr = 1;
1076 vpd->vac.a_from_cpuid = 1;
1077 vpd->vac.a_cover = 1;
1078 vpd->vac.a_bsw = 1;
1079 vpd->vac.a_int = 1;
1080 vpd->vdc.d_vmsw = 1;
1081
1082 /*Set virtual buffer*/
1083 vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE;
1084
1085 return 0;
1086}
1087
1088static int vti_create_vp(struct kvm_vcpu *vcpu)
1089{
1090 long ret;
1091 struct vpd *vpd = vcpu->arch.vpd;
1092 unsigned long vmm_ivt;
1093
1094 vmm_ivt = kvm_vmm_info->vmm_ivt;
1095
1096 printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt);
1097
1098 ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0);
1099
1100 if (ret) {
1101 printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n");
1102 return -EINVAL;
1103 }
1104 return 0;
1105}
1106
1107static void init_ptce_info(struct kvm_vcpu *vcpu)
1108{
1109 ia64_ptce_info_t ptce = {0};
1110
1111 ia64_get_ptce(&ptce);
1112 vcpu->arch.ptce_base = ptce.base;
1113 vcpu->arch.ptce_count[0] = ptce.count[0];
1114 vcpu->arch.ptce_count[1] = ptce.count[1];
1115 vcpu->arch.ptce_stride[0] = ptce.stride[0];
1116 vcpu->arch.ptce_stride[1] = ptce.stride[1];
1117}
1118
1119static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu)
1120{
1121 struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
1122
1123 if (hrtimer_cancel(p_ht))
1124 hrtimer_start_expires(p_ht, HRTIMER_MODE_ABS);
1125}
1126
1127static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
1128{
1129 struct kvm_vcpu *vcpu;
1130 wait_queue_head_t *q;
1131
1132 vcpu = container_of(data, struct kvm_vcpu, arch.hlt_timer);
1133 q = &vcpu->wq;
1134
1135 if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
1136 goto out;
1137
1138 if (waitqueue_active(q))
1139 wake_up_interruptible(q);
1140
1141out:
1142 vcpu->arch.timer_fired = 1;
1143 vcpu->arch.timer_check = 1;
1144 return HRTIMER_NORESTART;
1145}
1146
1147#define PALE_RESET_ENTRY 0x80000000ffffffb0UL
1148
1149bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
1150{
1151 return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
1152}
1153
1154int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1155{
1156 struct kvm_vcpu *v;
1157 int r;
1158 int i;
1159 long itc_offset;
1160 struct kvm *kvm = vcpu->kvm;
1161 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1162
1163 union context *p_ctx = &vcpu->arch.guest;
1164 struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu);
1165
1166 /*Init vcpu context for first run.*/
1167 if (IS_ERR(vmm_vcpu))
1168 return PTR_ERR(vmm_vcpu);
1169
1170 if (kvm_vcpu_is_bsp(vcpu)) {
1171 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
1172
1173 /*Set entry address for first run.*/
1174 regs->cr_iip = PALE_RESET_ENTRY;
1175
1176 /*Initialize itc offset for vcpus*/
1177 itc_offset = 0UL - kvm_get_itc(vcpu);
1178 for (i = 0; i < KVM_MAX_VCPUS; i++) {
1179 v = (struct kvm_vcpu *)((char *)vcpu +
1180 sizeof(struct kvm_vcpu_data) * i);
1181 v->arch.itc_offset = itc_offset;
1182 v->arch.last_itc = 0;
1183 }
1184 } else
1185 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
1186
1187 r = -ENOMEM;
1188 vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL);
1189 if (!vcpu->arch.apic)
1190 goto out;
1191 vcpu->arch.apic->vcpu = vcpu;
1192
1193 p_ctx->gr[1] = 0;
1194 p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
1195 p_ctx->gr[13] = (unsigned long)vmm_vcpu;
1196 p_ctx->psr = 0x1008522000UL;
1197 p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
1198 p_ctx->caller_unat = 0;
1199 p_ctx->pr = 0x0;
1200 p_ctx->ar[36] = 0x0; /*unat*/
1201 p_ctx->ar[19] = 0x0; /*rnat*/
1202 p_ctx->ar[18] = (unsigned long)vmm_vcpu +
1203 ((sizeof(struct kvm_vcpu)+15) & ~15);
1204 p_ctx->ar[64] = 0x0; /*pfs*/
1205 p_ctx->cr[0] = 0x7e04UL;
1206 p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt;
1207 p_ctx->cr[8] = 0x3c;
1208
1209 /*Initialize region register*/
1210 p_ctx->rr[0] = 0x30;
1211 p_ctx->rr[1] = 0x30;
1212 p_ctx->rr[2] = 0x30;
1213 p_ctx->rr[3] = 0x30;
1214 p_ctx->rr[4] = 0x30;
1215 p_ctx->rr[5] = 0x30;
1216 p_ctx->rr[7] = 0x30;
1217
1218 /*Initialize branch register 0*/
1219 p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry;
1220
1221 vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr;
1222 vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0;
1223 vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4;
1224
1225 hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1226 vcpu->arch.hlt_timer.function = hlt_timer_fn;
1227
1228 vcpu->arch.last_run_cpu = -1;
1229 vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
1230 vcpu->arch.vsa_base = kvm_vsa_base;
1231 vcpu->arch.__gp = kvm_vmm_gp;
1232 vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
1233 vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
1234 vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
1235 init_ptce_info(vcpu);
1236
1237 r = 0;
1238out:
1239 return r;
1240}
1241
1242static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
1243{
1244 unsigned long psr;
1245 int r;
1246
1247 local_irq_save(psr);
1248 r = kvm_insert_vmm_mapping(vcpu);
1249 local_irq_restore(psr);
1250 if (r)
1251 goto fail;
1252 r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
1253 if (r)
1254 goto fail;
1255
1256 r = vti_init_vpd(vcpu);
1257 if (r) {
1258 printk(KERN_DEBUG"kvm: vpd init error!!\n");
1259 goto uninit;
1260 }
1261
1262 r = vti_create_vp(vcpu);
1263 if (r)
1264 goto uninit;
1265
1266 kvm_purge_vmm_mapping(vcpu);
1267
1268 return 0;
1269uninit:
1270 kvm_vcpu_uninit(vcpu);
1271fail:
1272 return r;
1273}
1274
1275struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1276 unsigned int id)
1277{
1278 struct kvm_vcpu *vcpu;
1279 unsigned long vm_base = kvm->arch.vm_base;
1280 int r;
1281 int cpu;
1282
1283 BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
1284
1285 r = -EINVAL;
1286 if (id >= KVM_MAX_VCPUS) {
1287 printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
1288 KVM_MAX_VCPUS);
1289 goto fail;
1290 }
1291
1292 r = -ENOMEM;
1293 if (!vm_base) {
1294 printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
1295 goto fail;
1296 }
1297 vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
1298 vcpu_data[id].vcpu_struct));
1299 vcpu->kvm = kvm;
1300
1301 cpu = get_cpu();
1302 r = vti_vcpu_setup(vcpu, id);
1303 put_cpu();
1304
1305 if (r) {
1306 printk(KERN_DEBUG"kvm: vcpu_setup error!!\n");
1307 goto fail;
1308 }
1309
1310 return vcpu;
1311fail:
1312 return ERR_PTR(r);
1313}
1314
1315int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1316{
1317 return 0;
1318}
1319
1320int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1321{
1322 return 0;
1323}
1324
1325int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1326{
1327 return -EINVAL;
1328}
1329
1330int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1331{
1332 return -EINVAL;
1333}
1334
1335int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1336 struct kvm_guest_debug *dbg)
1337{
1338 return -EINVAL;
1339}
1340
1341void kvm_arch_free_vm(struct kvm *kvm)
1342{
1343 unsigned long vm_base = kvm->arch.vm_base;
1344
1345 if (vm_base) {
1346 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
1347 free_pages(vm_base, get_order(KVM_VM_DATA_SIZE));
1348 }
1349
1350}
1351
1352static void kvm_release_vm_pages(struct kvm *kvm)
1353{
1354 struct kvm_memslots *slots;
1355 struct kvm_memory_slot *memslot;
1356 int j;
1357
1358 slots = kvm_memslots(kvm);
1359 kvm_for_each_memslot(memslot, slots) {
1360 for (j = 0; j < memslot->npages; j++) {
1361 if (memslot->rmap[j])
1362 put_page((struct page *)memslot->rmap[j]);
1363 }
1364 }
1365}
1366
1367void kvm_arch_destroy_vm(struct kvm *kvm)
1368{
1369 kvm_iommu_unmap_guest(kvm);
1370 kvm_free_all_assigned_devices(kvm);
1371 kfree(kvm->arch.vioapic);
1372 kvm_release_vm_pages(kvm);
1373}
1374
1375void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1376{
1377 if (cpu != vcpu->cpu) {
1378 vcpu->cpu = cpu;
1379 if (vcpu->arch.ht_active)
1380 kvm_migrate_hlt_timer(vcpu);
1381 }
1382}
1383
1384#define SAVE_REGS(_x) regs->_x = vcpu->arch._x
1385
1386int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1387{
1388 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1389 int i;
1390
1391 vcpu_load(vcpu);
1392
1393 for (i = 0; i < 16; i++) {
1394 regs->vpd.vgr[i] = vpd->vgr[i];
1395 regs->vpd.vbgr[i] = vpd->vbgr[i];
1396 }
1397 for (i = 0; i < 128; i++)
1398 regs->vpd.vcr[i] = vpd->vcr[i];
1399 regs->vpd.vhpi = vpd->vhpi;
1400 regs->vpd.vnat = vpd->vnat;
1401 regs->vpd.vbnat = vpd->vbnat;
1402 regs->vpd.vpsr = vpd->vpsr;
1403 regs->vpd.vpr = vpd->vpr;
1404
1405 memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
1406
1407 SAVE_REGS(mp_state);
1408 SAVE_REGS(vmm_rr);
1409 memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
1410 memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS);
1411 SAVE_REGS(itr_regions);
1412 SAVE_REGS(dtr_regions);
1413 SAVE_REGS(tc_regions);
1414 SAVE_REGS(irq_check);
1415 SAVE_REGS(itc_check);
1416 SAVE_REGS(timer_check);
1417 SAVE_REGS(timer_pending);
1418 SAVE_REGS(last_itc);
1419 for (i = 0; i < 8; i++) {
1420 regs->vrr[i] = vcpu->arch.vrr[i];
1421 regs->ibr[i] = vcpu->arch.ibr[i];
1422 regs->dbr[i] = vcpu->arch.dbr[i];
1423 }
1424 for (i = 0; i < 4; i++)
1425 regs->insvc[i] = vcpu->arch.insvc[i];
1426 regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu);
1427 SAVE_REGS(xtp);
1428 SAVE_REGS(metaphysical_rr0);
1429 SAVE_REGS(metaphysical_rr4);
1430 SAVE_REGS(metaphysical_saved_rr0);
1431 SAVE_REGS(metaphysical_saved_rr4);
1432 SAVE_REGS(fp_psr);
1433 SAVE_REGS(saved_gp);
1434
1435 vcpu_put(vcpu);
1436 return 0;
1437}
1438
1439int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu,
1440 struct kvm_ia64_vcpu_stack *stack)
1441{
1442 memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack));
1443 return 0;
1444}
1445
1446int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu,
1447 struct kvm_ia64_vcpu_stack *stack)
1448{
1449 memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu),
1450 sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu));
1451
1452 vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data;
1453 return 0;
1454}
1455
1456void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
1457{
1458
1459 hrtimer_cancel(&vcpu->arch.hlt_timer);
1460 kfree(vcpu->arch.apic);
1461}
1462
1463long kvm_arch_vcpu_ioctl(struct file *filp,
1464 unsigned int ioctl, unsigned long arg)
1465{
1466 struct kvm_vcpu *vcpu = filp->private_data;
1467 void __user *argp = (void __user *)arg;
1468 struct kvm_ia64_vcpu_stack *stack = NULL;
1469 long r;
1470
1471 switch (ioctl) {
1472 case KVM_IA64_VCPU_GET_STACK: {
1473 struct kvm_ia64_vcpu_stack __user *user_stack;
1474 void __user *first_p = argp;
1475
1476 r = -EFAULT;
1477 if (copy_from_user(&user_stack, first_p, sizeof(void *)))
1478 goto out;
1479
1480 if (!access_ok(VERIFY_WRITE, user_stack,
1481 sizeof(struct kvm_ia64_vcpu_stack))) {
1482 printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: "
1483 "Illegal user destination address for stack\n");
1484 goto out;
1485 }
1486 stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
1487 if (!stack) {
1488 r = -ENOMEM;
1489 goto out;
1490 }
1491
1492 r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack);
1493 if (r)
1494 goto out;
1495
1496 if (copy_to_user(user_stack, stack,
1497 sizeof(struct kvm_ia64_vcpu_stack))) {
1498 r = -EFAULT;
1499 goto out;
1500 }
1501
1502 break;
1503 }
1504 case KVM_IA64_VCPU_SET_STACK: {
1505 struct kvm_ia64_vcpu_stack __user *user_stack;
1506 void __user *first_p = argp;
1507
1508 r = -EFAULT;
1509 if (copy_from_user(&user_stack, first_p, sizeof(void *)))
1510 goto out;
1511
1512 if (!access_ok(VERIFY_READ, user_stack,
1513 sizeof(struct kvm_ia64_vcpu_stack))) {
1514 printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: "
1515 "Illegal user address for stack\n");
1516 goto out;
1517 }
1518 stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
1519 if (!stack) {
1520 r = -ENOMEM;
1521 goto out;
1522 }
1523 if (copy_from_user(stack, user_stack,
1524 sizeof(struct kvm_ia64_vcpu_stack)))
1525 goto out;
1526
1527 r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack);
1528 break;
1529 }
1530
1531 default:
1532 r = -EINVAL;
1533 }
1534
1535out:
1536 kfree(stack);
1537 return r;
1538}
1539
1540int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
1541{
1542 return VM_FAULT_SIGBUS;
1543}
1544
1545int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
1546 unsigned long npages)
1547{
1548 return 0;
1549}
1550
1551int kvm_arch_prepare_memory_region(struct kvm *kvm,
1552 struct kvm_memory_slot *memslot,
1553 struct kvm_userspace_memory_region *mem,
1554 enum kvm_mr_change change)
1555{
1556 unsigned long i;
1557 unsigned long pfn;
1558 int npages = memslot->npages;
1559 unsigned long base_gfn = memslot->base_gfn;
1560
1561 if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
1562 return -ENOMEM;
1563
1564 for (i = 0; i < npages; i++) {
1565 pfn = gfn_to_pfn(kvm, base_gfn + i);
1566 if (!kvm_is_reserved_pfn(pfn)) {
1567 kvm_set_pmt_entry(kvm, base_gfn + i,
1568 pfn << PAGE_SHIFT,
1569 _PAGE_AR_RWX | _PAGE_MA_WB);
1570 memslot->rmap[i] = (unsigned long)pfn_to_page(pfn);
1571 } else {
1572 kvm_set_pmt_entry(kvm, base_gfn + i,
1573 GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT),
1574 _PAGE_MA_UC);
1575 memslot->rmap[i] = 0;
1576 }
1577 }
1578
1579 return 0;
1580}
1581
1582void kvm_arch_flush_shadow_all(struct kvm *kvm)
1583{
1584 kvm_flush_remote_tlbs(kvm);
1585}
1586
1587void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
1588 struct kvm_memory_slot *slot)
1589{
1590 kvm_arch_flush_shadow_all();
1591}
1592
1593long kvm_arch_dev_ioctl(struct file *filp,
1594 unsigned int ioctl, unsigned long arg)
1595{
1596 return -EINVAL;
1597}
1598
1599void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1600{
1601 kvm_vcpu_uninit(vcpu);
1602}
1603
1604static int vti_cpu_has_kvm_support(void)
1605{
1606 long avail = 1, status = 1, control = 1;
1607 long ret;
1608
1609 ret = ia64_pal_proc_get_features(&avail, &status, &control, 0);
1610 if (ret)
1611 goto out;
1612
1613 if (!(avail & PAL_PROC_VM_BIT))
1614 goto out;
1615
1616 printk(KERN_DEBUG"kvm: Hardware Supports VT\n");
1617
1618 ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info);
1619 if (ret)
1620 goto out;
1621 printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size);
1622
1623 if (!(vp_env_info & VP_OPCODE)) {
1624 printk(KERN_WARNING"kvm: No opcode ability on hardware, "
1625 "vm_env_info:0x%lx\n", vp_env_info);
1626 }
1627
1628 return 1;
1629out:
1630 return 0;
1631}
1632
1633
1634/*
1635 * On SN2, the ITC isn't stable, so copy in fast path code to use the
1636 * SN2 RTC, replacing the ITC based default verion.
1637 */
1638static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info,
1639 struct module *module)
1640{
1641 unsigned long new_ar, new_ar_sn2;
1642 unsigned long module_base;
1643
1644 if (!ia64_platform_is("sn2"))
1645 return;
1646
1647 module_base = (unsigned long)module->module_core;
1648
1649 new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base;
1650 new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base;
1651
1652 printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC "
1653 "as source\n");
1654
1655 /*
1656 * Copy the SN2 version of mov_ar into place. They are both
1657 * the same size, so 6 bundles is sufficient (6 * 0x10).
1658 */
1659 memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60);
1660}
1661
1662static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
1663 struct module *module)
1664{
1665 unsigned long module_base;
1666 unsigned long vmm_size;
1667
1668 unsigned long vmm_offset, func_offset, fdesc_offset;
1669 struct fdesc *p_fdesc;
1670
1671 BUG_ON(!module);
1672
1673 if (!kvm_vmm_base) {
1674 printk("kvm: kvm area hasn't been initialized yet!!\n");
1675 return -EFAULT;
1676 }
1677
1678 /*Calculate new position of relocated vmm module.*/
1679 module_base = (unsigned long)module->module_core;
1680 vmm_size = module->core_size;
1681 if (unlikely(vmm_size > KVM_VMM_SIZE))
1682 return -EFAULT;
1683
1684 memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
1685 kvm_patch_vmm(vmm_info, module);
1686 kvm_flush_icache(kvm_vmm_base, vmm_size);
1687
1688 /*Recalculate kvm_vmm_info based on new VMM*/
1689 vmm_offset = vmm_info->vmm_ivt - module_base;
1690 kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset;
1691 printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n",
1692 kvm_vmm_info->vmm_ivt);
1693
1694 fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base;
1695 kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE +
1696 fdesc_offset);
1697 func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base;
1698 p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
1699 p_fdesc->ip = KVM_VMM_BASE + func_offset;
1700 p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base);
1701
1702 printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n",
1703 KVM_VMM_BASE+func_offset);
1704
1705 fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base;
1706 kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE +
1707 fdesc_offset);
1708 func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base;
1709 p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
1710 p_fdesc->ip = KVM_VMM_BASE + func_offset;
1711 p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base);
1712
1713 kvm_vmm_gp = p_fdesc->gp;
1714
1715 printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n",
1716 kvm_vmm_info->vmm_entry);
1717 printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n",
1718 KVM_VMM_BASE + func_offset);
1719
1720 return 0;
1721}
1722
1723int kvm_arch_init(void *opaque)
1724{
1725 int r;
1726 struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque;
1727
1728 if (!vti_cpu_has_kvm_support()) {
1729 printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n");
1730 r = -EOPNOTSUPP;
1731 goto out;
1732 }
1733
1734 if (kvm_vmm_info) {
1735 printk(KERN_ERR "kvm: Already loaded VMM module!\n");
1736 r = -EEXIST;
1737 goto out;
1738 }
1739
1740 r = -ENOMEM;
1741 kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL);
1742 if (!kvm_vmm_info)
1743 goto out;
1744
1745 if (kvm_alloc_vmm_area())
1746 goto out_free0;
1747
1748 r = kvm_relocate_vmm(vmm_info, vmm_info->module);
1749 if (r)
1750 goto out_free1;
1751
1752 return 0;
1753
1754out_free1:
1755 kvm_free_vmm_area();
1756out_free0:
1757 kfree(kvm_vmm_info);
1758out:
1759 return r;
1760}
1761
1762void kvm_arch_exit(void)
1763{
1764 kvm_free_vmm_area();
1765 kfree(kvm_vmm_info);
1766 kvm_vmm_info = NULL;
1767}
1768
1769static void kvm_ia64_sync_dirty_log(struct kvm *kvm,
1770 struct kvm_memory_slot *memslot)
1771{
1772 int i;
1773 long base;
1774 unsigned long n;
1775 unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
1776 offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
1777
1778 n = kvm_dirty_bitmap_bytes(memslot);
1779 base = memslot->base_gfn / BITS_PER_LONG;
1780
1781 spin_lock(&kvm->arch.dirty_log_lock);
1782 for (i = 0; i < n/sizeof(long); ++i) {
1783 memslot->dirty_bitmap[i] = dirty_bitmap[base + i];
1784 dirty_bitmap[base + i] = 0;
1785 }
1786 spin_unlock(&kvm->arch.dirty_log_lock);
1787}
1788
1789int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1790 struct kvm_dirty_log *log)
1791{
1792 int r;
1793 unsigned long n;
1794 struct kvm_memory_slot *memslot;
1795 int is_dirty = 0;
1796
1797 mutex_lock(&kvm->slots_lock);
1798
1799 r = -EINVAL;
1800 if (log->slot >= KVM_USER_MEM_SLOTS)
1801 goto out;
1802
1803 memslot = id_to_memslot(kvm->memslots, log->slot);
1804 r = -ENOENT;
1805 if (!memslot->dirty_bitmap)
1806 goto out;
1807
1808 kvm_ia64_sync_dirty_log(kvm, memslot);
1809 r = kvm_get_dirty_log(kvm, log, &is_dirty);
1810 if (r)
1811 goto out;
1812
1813 /* If nothing is dirty, don't bother messing with page tables. */
1814 if (is_dirty) {
1815 kvm_flush_remote_tlbs(kvm);
1816 n = kvm_dirty_bitmap_bytes(memslot);
1817 memset(memslot->dirty_bitmap, 0, n);
1818 }
1819 r = 0;
1820out:
1821 mutex_unlock(&kvm->slots_lock);
1822 return r;
1823}
1824
1825int kvm_arch_hardware_setup(void)
1826{
1827 return 0;
1828}
1829
1830int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
1831{
1832 return __apic_accept_irq(vcpu, irq->vector);
1833}
1834
1835int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
1836{
1837 return apic->vcpu->vcpu_id == dest;
1838}
1839
1840int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
1841{
1842 return 0;
1843}
1844
1845int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1846{
1847 return vcpu1->arch.xtp - vcpu2->arch.xtp;
1848}
1849
1850int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
1851 int short_hand, int dest, int dest_mode)
1852{
1853 struct kvm_lapic *target = vcpu->arch.apic;
1854 return (dest_mode == 0) ?
1855 kvm_apic_match_physical_addr(target, dest) :
1856 kvm_apic_match_logical_addr(target, dest);
1857}
1858
1859static int find_highest_bits(int *dat)
1860{
1861 u32 bits, bitnum;
1862 int i;
1863
1864 /* loop for all 256 bits */
1865 for (i = 7; i >= 0 ; i--) {
1866 bits = dat[i];
1867 if (bits) {
1868 bitnum = fls(bits);
1869 return i * 32 + bitnum - 1;
1870 }
1871 }
1872
1873 return -1;
1874}
1875
1876int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
1877{
1878 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1879
1880 if (vpd->irr[0] & (1UL << NMI_VECTOR))
1881 return NMI_VECTOR;
1882 if (vpd->irr[0] & (1UL << ExtINT_VECTOR))
1883 return ExtINT_VECTOR;
1884
1885 return find_highest_bits((int *)&vpd->irr[0]);
1886}
1887
1888int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
1889{
1890 return vcpu->arch.timer_fired;
1891}
1892
1893int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1894{
1895 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) ||
1896 (kvm_highest_pending_irq(vcpu) != -1);
1897}
1898
1899int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1900{
1901 return (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests));
1902}
1903
1904int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1905 struct kvm_mp_state *mp_state)
1906{
1907 mp_state->mp_state = vcpu->arch.mp_state;
1908 return 0;
1909}
1910
1911static int vcpu_reset(struct kvm_vcpu *vcpu)
1912{
1913 int r;
1914 long psr;
1915 local_irq_save(psr);
1916 r = kvm_insert_vmm_mapping(vcpu);
1917 local_irq_restore(psr);
1918 if (r)
1919 goto fail;
1920
1921 vcpu->arch.launched = 0;
1922 kvm_arch_vcpu_uninit(vcpu);
1923 r = kvm_arch_vcpu_init(vcpu);
1924 if (r)
1925 goto fail;
1926
1927 kvm_purge_vmm_mapping(vcpu);
1928 r = 0;
1929fail:
1930 return r;
1931}
1932
1933int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1934 struct kvm_mp_state *mp_state)
1935{
1936 int r = 0;
1937
1938 vcpu->arch.mp_state = mp_state->mp_state;
1939 if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)
1940 r = vcpu_reset(vcpu);
1941 return r;
1942}
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
deleted file mode 100644
index cb548ee9fcae..000000000000
--- a/arch/ia64/kvm/kvm_fw.c
+++ /dev/null
@@ -1,674 +0,0 @@
1/*
2 * PAL/SAL call delegation
3 *
4 * Copyright (c) 2004 Li Susie <susie.li@intel.com>
5 * Copyright (c) 2005 Yu Ke <ke.yu@intel.com>
6 * Copyright (c) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 * Place - Suite 330, Boston, MA 02111-1307 USA.
20 */
21
22#include <linux/kvm_host.h>
23#include <linux/smp.h>
24#include <asm/sn/addrs.h>
25#include <asm/sn/clksupport.h>
26#include <asm/sn/shub_mmr.h>
27
28#include "vti.h"
29#include "misc.h"
30
31#include <asm/pal.h>
32#include <asm/sal.h>
33#include <asm/tlb.h>
34
35/*
36 * Handy macros to make sure that the PAL return values start out
37 * as something meaningful.
38 */
39#define INIT_PAL_STATUS_UNIMPLEMENTED(x) \
40 { \
41 x.status = PAL_STATUS_UNIMPLEMENTED; \
42 x.v0 = 0; \
43 x.v1 = 0; \
44 x.v2 = 0; \
45 }
46
47#define INIT_PAL_STATUS_SUCCESS(x) \
48 { \
49 x.status = PAL_STATUS_SUCCESS; \
50 x.v0 = 0; \
51 x.v1 = 0; \
52 x.v2 = 0; \
53 }
54
55static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu,
56 u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) {
57 struct exit_ctl_data *p;
58
59 if (vcpu) {
60 p = &vcpu->arch.exit_data;
61 if (p->exit_reason == EXIT_REASON_PAL_CALL) {
62 *gr28 = p->u.pal_data.gr28;
63 *gr29 = p->u.pal_data.gr29;
64 *gr30 = p->u.pal_data.gr30;
65 *gr31 = p->u.pal_data.gr31;
66 return ;
67 }
68 }
69 printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n");
70}
71
72static void set_pal_result(struct kvm_vcpu *vcpu,
73 struct ia64_pal_retval result) {
74
75 struct exit_ctl_data *p;
76
77 p = kvm_get_exit_data(vcpu);
78 if (p->exit_reason == EXIT_REASON_PAL_CALL) {
79 p->u.pal_data.ret = result;
80 return ;
81 }
82 INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret);
83}
84
85static void set_sal_result(struct kvm_vcpu *vcpu,
86 struct sal_ret_values result) {
87 struct exit_ctl_data *p;
88
89 p = kvm_get_exit_data(vcpu);
90 if (p->exit_reason == EXIT_REASON_SAL_CALL) {
91 p->u.sal_data.ret = result;
92 return ;
93 }
94 printk(KERN_WARNING"Failed to set sal result!!\n");
95}
96
97struct cache_flush_args {
98 u64 cache_type;
99 u64 operation;
100 u64 progress;
101 long status;
102};
103
104cpumask_t cpu_cache_coherent_map;
105
106static void remote_pal_cache_flush(void *data)
107{
108 struct cache_flush_args *args = data;
109 long status;
110 u64 progress = args->progress;
111
112 status = ia64_pal_cache_flush(args->cache_type, args->operation,
113 &progress, NULL);
114 if (status != 0)
115 args->status = status;
116}
117
118static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu)
119{
120 u64 gr28, gr29, gr30, gr31;
121 struct ia64_pal_retval result = {0, 0, 0, 0};
122 struct cache_flush_args args = {0, 0, 0, 0};
123 long psr;
124
125 gr28 = gr29 = gr30 = gr31 = 0;
126 kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31);
127
128 if (gr31 != 0)
129 printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu);
130
131 /* Always call Host Pal in int=1 */
132 gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS;
133 args.cache_type = gr29;
134 args.operation = gr30;
135 smp_call_function(remote_pal_cache_flush,
136 (void *)&args, 1);
137 if (args.status != 0)
138 printk(KERN_ERR"pal_cache_flush error!,"
139 "status:0x%lx\n", args.status);
140 /*
141 * Call Host PAL cache flush
142 * Clear psr.ic when call PAL_CACHE_FLUSH
143 */
144 local_irq_save(psr);
145 result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1,
146 &result.v0);
147 local_irq_restore(psr);
148 if (result.status != 0)
149 printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld"
150 "in1:%lx,in2:%lx\n",
151 vcpu, result.status, gr29, gr30);
152
153#if 0
154 if (gr29 == PAL_CACHE_TYPE_COHERENT) {
155 cpus_setall(vcpu->arch.cache_coherent_map);
156 cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map);
157 cpus_setall(cpu_cache_coherent_map);
158 cpu_clear(vcpu->cpu, cpu_cache_coherent_map);
159 }
160#endif
161 return result;
162}
163
164struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu)
165{
166
167 struct ia64_pal_retval result;
168
169 PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0);
170 return result;
171}
172
173static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu)
174{
175
176 struct ia64_pal_retval result;
177
178 PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0);
179
180 /*
181 * PAL_FREQ_BASE may not be implemented in some platforms,
182 * call SAL instead.
183 */
184 if (result.v0 == 0) {
185 result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
186 &result.v0,
187 &result.v1);
188 result.v2 = 0;
189 }
190
191 return result;
192}
193
194/*
195 * On the SGI SN2, the ITC isn't stable. Emulation backed by the SN2
196 * RTC is used instead. This function patches the ratios from SAL
197 * to match the RTC before providing them to the guest.
198 */
199static void sn2_patch_itc_freq_ratios(struct ia64_pal_retval *result)
200{
201 struct pal_freq_ratio *ratio;
202 unsigned long sal_freq, sal_drift, factor;
203
204 result->status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
205 &sal_freq, &sal_drift);
206 ratio = (struct pal_freq_ratio *)&result->v2;
207 factor = ((sal_freq * 3) + (sn_rtc_cycles_per_second / 2)) /
208 sn_rtc_cycles_per_second;
209
210 ratio->num = 3;
211 ratio->den = factor;
212}
213
214static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
215{
216 struct ia64_pal_retval result;
217
218 PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0);
219
220 if (vcpu->kvm->arch.is_sn2)
221 sn2_patch_itc_freq_ratios(&result);
222
223 return result;
224}
225
226static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu)
227{
228 struct ia64_pal_retval result;
229
230 INIT_PAL_STATUS_UNIMPLEMENTED(result);
231 return result;
232}
233
234static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu)
235{
236
237 struct ia64_pal_retval result;
238
239 INIT_PAL_STATUS_SUCCESS(result);
240 return result;
241}
242
243static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu)
244{
245
246 struct ia64_pal_retval result = {0, 0, 0, 0};
247 long in0, in1, in2, in3;
248
249 kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
250 result.status = ia64_pal_proc_get_features(&result.v0, &result.v1,
251 &result.v2, in2);
252
253 return result;
254}
255
256static struct ia64_pal_retval pal_register_info(struct kvm_vcpu *vcpu)
257{
258
259 struct ia64_pal_retval result = {0, 0, 0, 0};
260 long in0, in1, in2, in3;
261
262 kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
263 result.status = ia64_pal_register_info(in1, &result.v1, &result.v2);
264
265 return result;
266}
267
268static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu)
269{
270
271 pal_cache_config_info_t ci;
272 long status;
273 unsigned long in0, in1, in2, in3, r9, r10;
274
275 kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
276 status = ia64_pal_cache_config_info(in1, in2, &ci);
277 r9 = ci.pcci_info_1.pcci1_data;
278 r10 = ci.pcci_info_2.pcci2_data;
279 return ((struct ia64_pal_retval){status, r9, r10, 0});
280}
281
282#define GUEST_IMPL_VA_MSB 59
283#define GUEST_RID_BITS 18
284
285static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu)
286{
287
288 pal_vm_info_1_u_t vminfo1;
289 pal_vm_info_2_u_t vminfo2;
290 struct ia64_pal_retval result;
291
292 PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0);
293 if (!result.status) {
294 vminfo1.pvi1_val = result.v0;
295 vminfo1.pal_vm_info_1_s.max_itr_entry = 8;
296 vminfo1.pal_vm_info_1_s.max_dtr_entry = 8;
297 result.v0 = vminfo1.pvi1_val;
298 vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB;
299 vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS;
300 result.v1 = vminfo2.pvi2_val;
301 }
302
303 return result;
304}
305
306static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu)
307{
308 struct ia64_pal_retval result;
309 unsigned long in0, in1, in2, in3;
310
311 kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
312
313 result.status = ia64_pal_vm_info(in1, in2,
314 (pal_tc_info_u_t *)&result.v1, &result.v2);
315
316 return result;
317}
318
319static u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu)
320{
321 u64 index = 0;
322 struct exit_ctl_data *p;
323
324 p = kvm_get_exit_data(vcpu);
325 if (p->exit_reason == EXIT_REASON_PAL_CALL)
326 index = p->u.pal_data.gr28;
327
328 return index;
329}
330
331static void prepare_for_halt(struct kvm_vcpu *vcpu)
332{
333 vcpu->arch.timer_pending = 1;
334 vcpu->arch.timer_fired = 0;
335}
336
337static struct ia64_pal_retval pal_perf_mon_info(struct kvm_vcpu *vcpu)
338{
339 long status;
340 unsigned long in0, in1, in2, in3, r9;
341 unsigned long pm_buffer[16];
342
343 kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
344 status = ia64_pal_perf_mon_info(pm_buffer,
345 (pal_perf_mon_info_u_t *) &r9);
346 if (status != 0) {
347 printk(KERN_DEBUG"PAL_PERF_MON_INFO fails ret=%ld\n", status);
348 } else {
349 if (in1)
350 memcpy((void *)in1, pm_buffer, sizeof(pm_buffer));
351 else {
352 status = PAL_STATUS_EINVAL;
353 printk(KERN_WARNING"Invalid parameters "
354 "for PAL call:0x%lx!\n", in0);
355 }
356 }
357 return (struct ia64_pal_retval){status, r9, 0, 0};
358}
359
360static struct ia64_pal_retval pal_halt_info(struct kvm_vcpu *vcpu)
361{
362 unsigned long in0, in1, in2, in3;
363 long status;
364 unsigned long res = 1000UL | (1000UL << 16) | (10UL << 32)
365 | (1UL << 61) | (1UL << 60);
366
367 kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
368 if (in1) {
369 memcpy((void *)in1, &res, sizeof(res));
370 status = 0;
371 } else{
372 status = PAL_STATUS_EINVAL;
373 printk(KERN_WARNING"Invalid parameters "
374 "for PAL call:0x%lx!\n", in0);
375 }
376
377 return (struct ia64_pal_retval){status, 0, 0, 0};
378}
379
380static struct ia64_pal_retval pal_mem_attrib(struct kvm_vcpu *vcpu)
381{
382 unsigned long r9;
383 long status;
384
385 status = ia64_pal_mem_attrib(&r9);
386
387 return (struct ia64_pal_retval){status, r9, 0, 0};
388}
389
390static void remote_pal_prefetch_visibility(void *v)
391{
392 s64 trans_type = (s64)v;
393 ia64_pal_prefetch_visibility(trans_type);
394}
395
396static struct ia64_pal_retval pal_prefetch_visibility(struct kvm_vcpu *vcpu)
397{
398 struct ia64_pal_retval result = {0, 0, 0, 0};
399 unsigned long in0, in1, in2, in3;
400 kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
401 result.status = ia64_pal_prefetch_visibility(in1);
402 if (result.status == 0) {
403 /* Must be performed on all remote processors
404 in the coherence domain. */
405 smp_call_function(remote_pal_prefetch_visibility,
406 (void *)in1, 1);
407 /* Unnecessary on remote processor for other vcpus!*/
408 result.status = 1;
409 }
410 return result;
411}
412
413static void remote_pal_mc_drain(void *v)
414{
415 ia64_pal_mc_drain();
416}
417
418static struct ia64_pal_retval pal_get_brand_info(struct kvm_vcpu *vcpu)
419{
420 struct ia64_pal_retval result = {0, 0, 0, 0};
421 unsigned long in0, in1, in2, in3;
422
423 kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
424
425 if (in1 == 0 && in2) {
426 char brand_info[128];
427 result.status = ia64_pal_get_brand_info(brand_info);
428 if (result.status == PAL_STATUS_SUCCESS)
429 memcpy((void *)in2, brand_info, 128);
430 } else {
431 result.status = PAL_STATUS_REQUIRES_MEMORY;
432 printk(KERN_WARNING"Invalid parameters for "
433 "PAL call:0x%lx!\n", in0);
434 }
435
436 return result;
437}
438
439int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
440{
441
442 u64 gr28;
443 struct ia64_pal_retval result;
444 int ret = 1;
445
446 gr28 = kvm_get_pal_call_index(vcpu);
447 switch (gr28) {
448 case PAL_CACHE_FLUSH:
449 result = pal_cache_flush(vcpu);
450 break;
451 case PAL_MEM_ATTRIB:
452 result = pal_mem_attrib(vcpu);
453 break;
454 case PAL_CACHE_SUMMARY:
455 result = pal_cache_summary(vcpu);
456 break;
457 case PAL_PERF_MON_INFO:
458 result = pal_perf_mon_info(vcpu);
459 break;
460 case PAL_HALT_INFO:
461 result = pal_halt_info(vcpu);
462 break;
463 case PAL_HALT_LIGHT:
464 {
465 INIT_PAL_STATUS_SUCCESS(result);
466 prepare_for_halt(vcpu);
467 if (kvm_highest_pending_irq(vcpu) == -1)
468 ret = kvm_emulate_halt(vcpu);
469 }
470 break;
471
472 case PAL_PREFETCH_VISIBILITY:
473 result = pal_prefetch_visibility(vcpu);
474 break;
475 case PAL_MC_DRAIN:
476 result.status = ia64_pal_mc_drain();
477 /* FIXME: All vcpus likely call PAL_MC_DRAIN.
478 That causes the congestion. */
479 smp_call_function(remote_pal_mc_drain, NULL, 1);
480 break;
481
482 case PAL_FREQ_RATIOS:
483 result = pal_freq_ratios(vcpu);
484 break;
485
486 case PAL_FREQ_BASE:
487 result = pal_freq_base(vcpu);
488 break;
489
490 case PAL_LOGICAL_TO_PHYSICAL :
491 result = pal_logical_to_physica(vcpu);
492 break;
493
494 case PAL_VM_SUMMARY :
495 result = pal_vm_summary(vcpu);
496 break;
497
498 case PAL_VM_INFO :
499 result = pal_vm_info(vcpu);
500 break;
501 case PAL_PLATFORM_ADDR :
502 result = pal_platform_addr(vcpu);
503 break;
504 case PAL_CACHE_INFO:
505 result = pal_cache_info(vcpu);
506 break;
507 case PAL_PTCE_INFO:
508 INIT_PAL_STATUS_SUCCESS(result);
509 result.v1 = (1L << 32) | 1L;
510 break;
511 case PAL_REGISTER_INFO:
512 result = pal_register_info(vcpu);
513 break;
514 case PAL_VM_PAGE_SIZE:
515 result.status = ia64_pal_vm_page_size(&result.v0,
516 &result.v1);
517 break;
518 case PAL_RSE_INFO:
519 result.status = ia64_pal_rse_info(&result.v0,
520 (pal_hints_u_t *)&result.v1);
521 break;
522 case PAL_PROC_GET_FEATURES:
523 result = pal_proc_get_features(vcpu);
524 break;
525 case PAL_DEBUG_INFO:
526 result.status = ia64_pal_debug_info(&result.v0,
527 &result.v1);
528 break;
529 case PAL_VERSION:
530 result.status = ia64_pal_version(
531 (pal_version_u_t *)&result.v0,
532 (pal_version_u_t *)&result.v1);
533 break;
534 case PAL_FIXED_ADDR:
535 result.status = PAL_STATUS_SUCCESS;
536 result.v0 = vcpu->vcpu_id;
537 break;
538 case PAL_BRAND_INFO:
539 result = pal_get_brand_info(vcpu);
540 break;
541 case PAL_GET_PSTATE:
542 case PAL_CACHE_SHARED_INFO:
543 INIT_PAL_STATUS_UNIMPLEMENTED(result);
544 break;
545 default:
546 INIT_PAL_STATUS_UNIMPLEMENTED(result);
547 printk(KERN_WARNING"kvm: Unsupported pal call,"
548 " index:0x%lx\n", gr28);
549 }
550 set_pal_result(vcpu, result);
551 return ret;
552}
553
554static struct sal_ret_values sal_emulator(struct kvm *kvm,
555 long index, unsigned long in1,
556 unsigned long in2, unsigned long in3,
557 unsigned long in4, unsigned long in5,
558 unsigned long in6, unsigned long in7)
559{
560 unsigned long r9 = 0;
561 unsigned long r10 = 0;
562 long r11 = 0;
563 long status;
564
565 status = 0;
566 switch (index) {
567 case SAL_FREQ_BASE:
568 status = ia64_sal_freq_base(in1, &r9, &r10);
569 break;
570 case SAL_PCI_CONFIG_READ:
571 printk(KERN_WARNING"kvm: Not allowed to call here!"
572 " SAL_PCI_CONFIG_READ\n");
573 break;
574 case SAL_PCI_CONFIG_WRITE:
575 printk(KERN_WARNING"kvm: Not allowed to call here!"
576 " SAL_PCI_CONFIG_WRITE\n");
577 break;
578 case SAL_SET_VECTORS:
579 if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) {
580 if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) {
581 status = -2;
582 } else {
583 kvm->arch.rdv_sal_data.boot_ip = in2;
584 kvm->arch.rdv_sal_data.boot_gp = in3;
585 }
586 printk("Rendvous called! iip:%lx\n\n", in2);
587 } else
588 printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu."
589 "ignored...\n", in1);
590 break;
591 case SAL_GET_STATE_INFO:
592 /* No more info. */
593 status = -5;
594 r9 = 0;
595 break;
596 case SAL_GET_STATE_INFO_SIZE:
597 /* Return a dummy size. */
598 status = 0;
599 r9 = 128;
600 break;
601 case SAL_CLEAR_STATE_INFO:
602 /* Noop. */
603 break;
604 case SAL_MC_RENDEZ:
605 printk(KERN_WARNING
606 "kvm: called SAL_MC_RENDEZ. ignored...\n");
607 break;
608 case SAL_MC_SET_PARAMS:
609 printk(KERN_WARNING
610 "kvm: called SAL_MC_SET_PARAMS.ignored!\n");
611 break;
612 case SAL_CACHE_FLUSH:
613 if (1) {
614 /*Flush using SAL.
615 This method is faster but has a side
616 effect on other vcpu running on
617 this cpu. */
618 status = ia64_sal_cache_flush(in1);
619 } else {
620 /*Maybe need to implement the method
621 without side effect!*/
622 status = 0;
623 }
624 break;
625 case SAL_CACHE_INIT:
626 printk(KERN_WARNING
627 "kvm: called SAL_CACHE_INIT. ignored...\n");
628 break;
629 case SAL_UPDATE_PAL:
630 printk(KERN_WARNING
631 "kvm: CALLED SAL_UPDATE_PAL. ignored...\n");
632 break;
633 default:
634 printk(KERN_WARNING"kvm: called SAL_CALL with unknown index."
635 " index:%ld\n", index);
636 status = -1;
637 break;
638 }
639 return ((struct sal_ret_values) {status, r9, r10, r11});
640}
641
642static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1,
643 u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){
644
645 struct exit_ctl_data *p;
646
647 p = kvm_get_exit_data(vcpu);
648
649 if (p->exit_reason == EXIT_REASON_SAL_CALL) {
650 *in0 = p->u.sal_data.in0;
651 *in1 = p->u.sal_data.in1;
652 *in2 = p->u.sal_data.in2;
653 *in3 = p->u.sal_data.in3;
654 *in4 = p->u.sal_data.in4;
655 *in5 = p->u.sal_data.in5;
656 *in6 = p->u.sal_data.in6;
657 *in7 = p->u.sal_data.in7;
658 return ;
659 }
660 *in0 = 0;
661}
662
663void kvm_sal_emul(struct kvm_vcpu *vcpu)
664{
665
666 struct sal_ret_values result;
667 u64 index, in1, in2, in3, in4, in5, in6, in7;
668
669 kvm_get_sal_call_data(vcpu, &index, &in1, &in2,
670 &in3, &in4, &in5, &in6, &in7);
671 result = sal_emulator(vcpu->kvm, index, in1, in2, in3,
672 in4, in5, in6, in7);
673 set_sal_result(vcpu, result);
674}
diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c
deleted file mode 100644
index f1268b8e6f9e..000000000000
--- a/arch/ia64/kvm/kvm_lib.c
+++ /dev/null
@@ -1,21 +0,0 @@
1/*
2 * kvm_lib.c: Compile some libraries for kvm-intel module.
3 *
4 * Just include kernel's library, and disable symbols export.
5 * Copyright (C) 2008, Intel Corporation.
6 * Xiantao Zhang (xiantao.zhang@intel.com)
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 *
12 */
13#undef CONFIG_MODULES
14#include <linux/module.h>
15#undef CONFIG_KALLSYMS
16#undef EXPORT_SYMBOL
17#undef EXPORT_SYMBOL_GPL
18#define EXPORT_SYMBOL(sym)
19#define EXPORT_SYMBOL_GPL(sym)
20#include "../../../lib/vsprintf.c"
21#include "../../../lib/ctype.c"
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
deleted file mode 100644
index b2bcaa2787aa..000000000000
--- a/arch/ia64/kvm/kvm_minstate.h
+++ /dev/null
@@ -1,266 +0,0 @@
1/*
2 * kvm_minstate.h: min save macros
3 * Copyright (c) 2007, Intel Corporation.
4 *
5 * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
6 * Xiantao Zhang (xiantao.zhang@intel.com)
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 * Place - Suite 330, Boston, MA 02111-1307 USA.
20 *
21 */
22
23
24#include <asm/asmmacro.h>
25#include <asm/types.h>
26#include <asm/kregs.h>
27#include <asm/kvm_host.h>
28
29#include "asm-offsets.h"
30
31#define KVM_MINSTATE_START_SAVE_MIN \
32 mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\
33 ;; \
34 mov.m r28 = ar.rnat; \
35 addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \
36 ;; \
37 lfetch.fault.excl.nt1 [r22]; \
38 addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1; \
39 mov r23 = ar.bspstore; /* save ar.bspstore */ \
40 ;; \
41 mov ar.bspstore = r22; /* switch to kernel RBS */\
42 ;; \
43 mov r18 = ar.bsp; \
44 mov ar.rsc = 0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */
45
46
47
48#define KVM_MINSTATE_END_SAVE_MIN \
49 bsw.1; /* switch back to bank 1 (must be last in insn group) */\
50 ;;
51
52
53#define PAL_VSA_SYNC_READ \
54 /* begin to call pal vps sync_read */ \
55{.mii; \
56 add r25 = VMM_VPD_BASE_OFFSET, r21; \
57 nop 0x0; \
58 mov r24=ip; \
59 ;; \
60} \
61{.mmb \
62 add r24=0x20, r24; \
63 ld8 r25 = [r25]; /* read vpd base */ \
64 br.cond.sptk kvm_vps_sync_read; /*call the service*/ \
65 ;; \
66}; \
67
68
69#define KVM_MINSTATE_GET_CURRENT(reg) mov reg=r21
70
71/*
72 * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
73 * the minimum state necessary that allows us to turn psr.ic back
74 * on.
75 *
76 * Assumed state upon entry:
77 * psr.ic: off
78 * r31: contains saved predicates (pr)
79 *
80 * Upon exit, the state is as follows:
81 * psr.ic: off
82 * r2 = points to &pt_regs.r16
83 * r8 = contents of ar.ccv
84 * r9 = contents of ar.csd
85 * r10 = contents of ar.ssd
86 * r11 = FPSR_DEFAULT
87 * r12 = kernel sp (kernel virtual address)
88 * r13 = points to current task_struct (kernel virtual address)
89 * p15 = TRUE if psr.i is set in cr.ipsr
90 * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
91 * preserved
92 *
93 * Note that psr.ic is NOT turned on by this macro. This is so that
94 * we can pass interruption state as arguments to a handler.
95 */
96
97
98#define PT(f) (VMM_PT_REGS_##f##_OFFSET)
99
100#define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \
101 KVM_MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \
102 mov r27 = ar.rsc; /* M */ \
103 mov r20 = r1; /* A */ \
104 mov r25 = ar.unat; /* M */ \
105 mov r29 = cr.ipsr; /* M */ \
106 mov r26 = ar.pfs; /* I */ \
107 mov r18 = cr.isr; \
108 COVER; /* B;; (or nothing) */ \
109 ;; \
110 tbit.z p0,p15 = r29,IA64_PSR_I_BIT; \
111 mov r1 = r16; \
112/* mov r21=r16; */ \
113 /* switch from user to kernel RBS: */ \
114 ;; \
115 invala; /* M */ \
116 SAVE_IFS; \
117 ;; \
118 KVM_MINSTATE_START_SAVE_MIN \
119 adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */ \
120 adds r16 = PT(CR_IPSR),r1; \
121 ;; \
122 lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \
123 st8 [r16] = r29; /* save cr.ipsr */ \
124 ;; \
125 lfetch.fault.excl.nt1 [r17]; \
126 tbit.nz p15,p0 = r29,IA64_PSR_I_BIT; \
127 mov r29 = b0 \
128 ;; \
129 adds r16 = PT(R8),r1; /* initialize first base pointer */\
130 adds r17 = PT(R9),r1; /* initialize second base pointer */\
131 ;; \
132.mem.offset 0,0; st8.spill [r16] = r8,16; \
133.mem.offset 8,0; st8.spill [r17] = r9,16; \
134 ;; \
135.mem.offset 0,0; st8.spill [r16] = r10,24; \
136.mem.offset 8,0; st8.spill [r17] = r11,24; \
137 ;; \
138 mov r9 = cr.iip; /* M */ \
139 mov r10 = ar.fpsr; /* M */ \
140 ;; \
141 st8 [r16] = r9,16; /* save cr.iip */ \
142 st8 [r17] = r30,16; /* save cr.ifs */ \
143 sub r18 = r18,r22; /* r18=RSE.ndirty*8 */ \
144 ;; \
145 st8 [r16] = r25,16; /* save ar.unat */ \
146 st8 [r17] = r26,16; /* save ar.pfs */ \
147 shl r18 = r18,16; /* calu ar.rsc used for "loadrs" */\
148 ;; \
149 st8 [r16] = r27,16; /* save ar.rsc */ \
150 st8 [r17] = r28,16; /* save ar.rnat */ \
151 ;; /* avoid RAW on r16 & r17 */ \
152 st8 [r16] = r23,16; /* save ar.bspstore */ \
153 st8 [r17] = r31,16; /* save predicates */ \
154 ;; \
155 st8 [r16] = r29,16; /* save b0 */ \
156 st8 [r17] = r18,16; /* save ar.rsc value for "loadrs" */\
157 ;; \
158.mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */ \
159.mem.offset 8,0; st8.spill [r17] = r12,16; \
160 adds r12 = -16,r1; /* switch to kernel memory stack */ \
161 ;; \
162.mem.offset 0,0; st8.spill [r16] = r13,16; \
163.mem.offset 8,0; st8.spill [r17] = r10,16; /* save ar.fpsr */\
164 mov r13 = r21; /* establish `current' */ \
165 ;; \
166.mem.offset 0,0; st8.spill [r16] = r15,16; \
167.mem.offset 8,0; st8.spill [r17] = r14,16; \
168 ;; \
169.mem.offset 0,0; st8.spill [r16] = r2,16; \
170.mem.offset 8,0; st8.spill [r17] = r3,16; \
171 adds r2 = VMM_PT_REGS_R16_OFFSET,r1; \
172 ;; \
173 adds r16 = VMM_VCPU_IIPA_OFFSET,r13; \
174 adds r17 = VMM_VCPU_ISR_OFFSET,r13; \
175 mov r26 = cr.iipa; \
176 mov r27 = cr.isr; \
177 ;; \
178 st8 [r16] = r26; \
179 st8 [r17] = r27; \
180 ;; \
181 EXTRA; \
182 mov r8 = ar.ccv; \
183 mov r9 = ar.csd; \
184 mov r10 = ar.ssd; \
185 movl r11 = FPSR_DEFAULT; /* L-unit */ \
186 adds r17 = VMM_VCPU_GP_OFFSET,r13; \
187 ;; \
188 ld8 r1 = [r17];/* establish kernel global pointer */ \
189 ;; \
190 PAL_VSA_SYNC_READ \
191 KVM_MINSTATE_END_SAVE_MIN
192
193/*
194 * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
195 *
196 * Assumed state upon entry:
197 * psr.ic: on
198 * r2: points to &pt_regs.f6
199 * r3: points to &pt_regs.f7
200 * r8: contents of ar.ccv
201 * r9: contents of ar.csd
202 * r10: contents of ar.ssd
203 * r11: FPSR_DEFAULT
204 *
205 * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
206 */
207#define KVM_SAVE_REST \
208.mem.offset 0,0; st8.spill [r2] = r16,16; \
209.mem.offset 8,0; st8.spill [r3] = r17,16; \
210 ;; \
211.mem.offset 0,0; st8.spill [r2] = r18,16; \
212.mem.offset 8,0; st8.spill [r3] = r19,16; \
213 ;; \
214.mem.offset 0,0; st8.spill [r2] = r20,16; \
215.mem.offset 8,0; st8.spill [r3] = r21,16; \
216 mov r18=b6; \
217 ;; \
218.mem.offset 0,0; st8.spill [r2] = r22,16; \
219.mem.offset 8,0; st8.spill [r3] = r23,16; \
220 mov r19 = b7; \
221 ;; \
222.mem.offset 0,0; st8.spill [r2] = r24,16; \
223.mem.offset 8,0; st8.spill [r3] = r25,16; \
224 ;; \
225.mem.offset 0,0; st8.spill [r2] = r26,16; \
226.mem.offset 8,0; st8.spill [r3] = r27,16; \
227 ;; \
228.mem.offset 0,0; st8.spill [r2] = r28,16; \
229.mem.offset 8,0; st8.spill [r3] = r29,16; \
230 ;; \
231.mem.offset 0,0; st8.spill [r2] = r30,16; \
232.mem.offset 8,0; st8.spill [r3] = r31,32; \
233 ;; \
234 mov ar.fpsr = r11; \
235 st8 [r2] = r8,8; \
236 adds r24 = PT(B6)-PT(F7),r3; \
237 adds r25 = PT(B7)-PT(F7),r3; \
238 ;; \
239 st8 [r24] = r18,16; /* b6 */ \
240 st8 [r25] = r19,16; /* b7 */ \
241 adds r2 = PT(R4)-PT(F6),r2; \
242 adds r3 = PT(R5)-PT(F7),r3; \
243 ;; \
244 st8 [r24] = r9; /* ar.csd */ \
245 st8 [r25] = r10; /* ar.ssd */ \
246 ;; \
247 mov r18 = ar.unat; \
248 adds r19 = PT(EML_UNAT)-PT(R4),r2; \
249 ;; \
250 st8 [r19] = r18; /* eml_unat */ \
251
252
253#define KVM_SAVE_EXTRA \
254.mem.offset 0,0; st8.spill [r2] = r4,16; \
255.mem.offset 8,0; st8.spill [r3] = r5,16; \
256 ;; \
257.mem.offset 0,0; st8.spill [r2] = r6,16; \
258.mem.offset 8,0; st8.spill [r3] = r7; \
259 ;; \
260 mov r26 = ar.unat; \
261 ;; \
262 st8 [r2] = r26;/* eml_unat */ \
263
264#define KVM_SAVE_MIN_WITH_COVER KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,)
265#define KVM_SAVE_MIN_WITH_COVER_R19 KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19)
266#define KVM_SAVE_MIN KVM_DO_SAVE_MIN( , mov r30 = r0, )
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
deleted file mode 100644
index c5f92a926a9a..000000000000
--- a/arch/ia64/kvm/lapic.h
+++ /dev/null
@@ -1,30 +0,0 @@
1#ifndef __KVM_IA64_LAPIC_H
2#define __KVM_IA64_LAPIC_H
3
4#include <linux/kvm_host.h>
5
6/*
7 * vlsapic
8 */
9struct kvm_lapic{
10 struct kvm_vcpu *vcpu;
11 uint64_t insvc[4];
12 uint64_t vhpi;
13 uint8_t xtp;
14 uint8_t pal_init_pending;
15 uint8_t pad[2];
16};
17
18int kvm_create_lapic(struct kvm_vcpu *vcpu);
19void kvm_free_lapic(struct kvm_vcpu *vcpu);
20
21int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
22int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
23int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
24 int short_hand, int dest, int dest_mode);
25int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
26int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
27#define kvm_apic_present(x) (true)
28#define kvm_lapic_enabled(x) (true)
29
30#endif
diff --git a/arch/ia64/kvm/memcpy.S b/arch/ia64/kvm/memcpy.S
deleted file mode 100644
index c04cdbe9f80f..000000000000
--- a/arch/ia64/kvm/memcpy.S
+++ /dev/null
@@ -1 +0,0 @@
1#include "../lib/memcpy.S"
diff --git a/arch/ia64/kvm/memset.S b/arch/ia64/kvm/memset.S
deleted file mode 100644
index 83c3066d844a..000000000000
--- a/arch/ia64/kvm/memset.S
+++ /dev/null
@@ -1 +0,0 @@
1#include "../lib/memset.S"
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h
deleted file mode 100644
index dd979e00b574..000000000000
--- a/arch/ia64/kvm/misc.h
+++ /dev/null
@@ -1,94 +0,0 @@
1#ifndef __KVM_IA64_MISC_H
2#define __KVM_IA64_MISC_H
3
4#include <linux/kvm_host.h>
5/*
6 * misc.h
7 * Copyright (C) 2007, Intel Corporation.
8 * Xiantao Zhang (xiantao.zhang@intel.com)
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms and conditions of the GNU General Public License,
12 * version 2, as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * more details.
18 *
19 * You should have received a copy of the GNU General Public License along with
20 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
21 * Place - Suite 330, Boston, MA 02111-1307 USA.
22 *
23 */
24
25/*
26 *Return p2m base address at host side!
27 */
28static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
29{
30 return (uint64_t *)(kvm->arch.vm_base +
31 offsetof(struct kvm_vm_data, kvm_p2m));
32}
33
34static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
35 u64 paddr, u64 mem_flags)
36{
37 uint64_t *pmt_base = kvm_host_get_pmt(kvm);
38 unsigned long pte;
39
40 pte = PAGE_ALIGN(paddr) | mem_flags;
41 pmt_base[gfn] = pte;
42}
43
44/*Function for translating host address to guest address*/
45
46static inline void *to_guest(struct kvm *kvm, void *addr)
47{
48 return (void *)((unsigned long)(addr) - kvm->arch.vm_base +
49 KVM_VM_DATA_BASE);
50}
51
52/*Function for translating guest address to host address*/
53
54static inline void *to_host(struct kvm *kvm, void *addr)
55{
56 return (void *)((unsigned long)addr - KVM_VM_DATA_BASE
57 + kvm->arch.vm_base);
58}
59
60/* Get host context of the vcpu */
61static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu)
62{
63 union context *ctx = &vcpu->arch.host;
64 return to_guest(vcpu->kvm, ctx);
65}
66
67/* Get guest context of the vcpu */
68static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu)
69{
70 union context *ctx = &vcpu->arch.guest;
71 return to_guest(vcpu->kvm, ctx);
72}
73
74/* kvm get exit data from gvmm! */
75static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu)
76{
77 return &vcpu->arch.exit_data;
78}
79
80/*kvm get vcpu ioreq for kvm module!*/
81static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu)
82{
83 struct exit_ctl_data *p_ctl_data;
84
85 if (vcpu) {
86 p_ctl_data = kvm_get_exit_data(vcpu);
87 if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION)
88 return &p_ctl_data->u.ioreq;
89 }
90
91 return NULL;
92}
93
94#endif
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
deleted file mode 100644
index f1e17d3d6cd9..000000000000
--- a/arch/ia64/kvm/mmio.c
+++ /dev/null
@@ -1,336 +0,0 @@
1/*
2 * mmio.c: MMIO emulation components.
3 * Copyright (c) 2004, Intel Corporation.
4 * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
5 * Kun Tian (Kevin Tian) (Kevin.tian@intel.com)
6 *
7 * Copyright (c) 2007 Intel Corporation KVM support.
8 * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
9 * Xiantao Zhang (xiantao.zhang@intel.com)
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms and conditions of the GNU General Public License,
13 * version 2, as published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 * more details.
19 *
20 * You should have received a copy of the GNU General Public License along with
21 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
22 * Place - Suite 330, Boston, MA 02111-1307 USA.
23 *
24 */
25
26#include <linux/kvm_host.h>
27
28#include "vcpu.h"
29
30static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val)
31{
32 VLSAPIC_XTP(v) = val;
33}
34
35/*
36 * LSAPIC OFFSET
37 */
38#define PIB_LOW_HALF(ofst) !(ofst & (1 << 20))
39#define PIB_OFST_INTA 0x1E0000
40#define PIB_OFST_XTP 0x1E0008
41
42/*
43 * execute write IPI op.
44 */
45static void vlsapic_write_ipi(struct kvm_vcpu *vcpu,
46 uint64_t addr, uint64_t data)
47{
48 struct exit_ctl_data *p = &current_vcpu->arch.exit_data;
49 unsigned long psr;
50
51 local_irq_save(psr);
52
53 p->exit_reason = EXIT_REASON_IPI;
54 p->u.ipi_data.addr.val = addr;
55 p->u.ipi_data.data.val = data;
56 vmm_transition(current_vcpu);
57
58 local_irq_restore(psr);
59
60}
61
62void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
63 unsigned long length, unsigned long val)
64{
65 addr &= (PIB_SIZE - 1);
66
67 switch (addr) {
68 case PIB_OFST_INTA:
69 panic_vm(v, "Undefined write on PIB INTA\n");
70 break;
71 case PIB_OFST_XTP:
72 if (length == 1) {
73 vlsapic_write_xtp(v, val);
74 } else {
75 panic_vm(v, "Undefined write on PIB XTP\n");
76 }
77 break;
78 default:
79 if (PIB_LOW_HALF(addr)) {
80 /*Lower half */
81 if (length != 8)
82 panic_vm(v, "Can't LHF write with size %ld!\n",
83 length);
84 else
85 vlsapic_write_ipi(v, addr, val);
86 } else { /*Upper half */
87 panic_vm(v, "IPI-UHF write %lx\n", addr);
88 }
89 break;
90 }
91}
92
93unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
94 unsigned long length)
95{
96 uint64_t result = 0;
97
98 addr &= (PIB_SIZE - 1);
99
100 switch (addr) {
101 case PIB_OFST_INTA:
102 if (length == 1) /* 1 byte load */
103 ; /* There is no i8259, there is no INTA access*/
104 else
105 panic_vm(v, "Undefined read on PIB INTA\n");
106
107 break;
108 case PIB_OFST_XTP:
109 if (length == 1) {
110 result = VLSAPIC_XTP(v);
111 } else {
112 panic_vm(v, "Undefined read on PIB XTP\n");
113 }
114 break;
115 default:
116 panic_vm(v, "Undefined addr access for lsapic!\n");
117 break;
118 }
119 return result;
120}
121
122static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
123 u16 s, int ma, int dir)
124{
125 unsigned long iot;
126 struct exit_ctl_data *p = &vcpu->arch.exit_data;
127 unsigned long psr;
128
129 iot = __gpfn_is_io(src_pa >> PAGE_SHIFT);
130
131 local_irq_save(psr);
132
133 /*Intercept the access for PIB range*/
134 if (iot == GPFN_PIB) {
135 if (!dir)
136 lsapic_write(vcpu, src_pa, s, *dest);
137 else
138 *dest = lsapic_read(vcpu, src_pa, s);
139 goto out;
140 }
141 p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION;
142 p->u.ioreq.addr = src_pa;
143 p->u.ioreq.size = s;
144 p->u.ioreq.dir = dir;
145 if (dir == IOREQ_WRITE)
146 p->u.ioreq.data = *dest;
147 p->u.ioreq.state = STATE_IOREQ_READY;
148 vmm_transition(vcpu);
149
150 if (p->u.ioreq.state == STATE_IORESP_READY) {
151 if (dir == IOREQ_READ)
152 /* it's necessary to ensure zero extending */
153 *dest = p->u.ioreq.data & (~0UL >> (64-(s*8)));
154 } else
155 panic_vm(vcpu, "Unhandled mmio access returned!\n");
156out:
157 local_irq_restore(psr);
158 return ;
159}
160
161/*
162 dir 1: read 0:write
163 inst_type 0:integer 1:floating point
164 */
165#define SL_INTEGER 0 /* store/load interger*/
166#define SL_FLOATING 1 /* store/load floating*/
167
168void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
169{
170 struct kvm_pt_regs *regs;
171 IA64_BUNDLE bundle;
172 int slot, dir = 0;
173 int inst_type = -1;
174 u16 size = 0;
175 u64 data, slot1a, slot1b, temp, update_reg;
176 s32 imm;
177 INST64 inst;
178
179 regs = vcpu_regs(vcpu);
180
181 if (fetch_code(vcpu, regs->cr_iip, &bundle)) {
182 /* if fetch code fail, return and try again */
183 return;
184 }
185 slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
186 if (!slot)
187 inst.inst = bundle.slot0;
188 else if (slot == 1) {
189 slot1a = bundle.slot1a;
190 slot1b = bundle.slot1b;
191 inst.inst = slot1a + (slot1b << 18);
192 } else if (slot == 2)
193 inst.inst = bundle.slot2;
194
195 /* Integer Load/Store */
196 if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) {
197 inst_type = SL_INTEGER;
198 size = (inst.M1.x6 & 0x3);
199 if ((inst.M1.x6 >> 2) > 0xb) {
200 /*write*/
201 dir = IOREQ_WRITE;
202 data = vcpu_get_gr(vcpu, inst.M4.r2);
203 } else if ((inst.M1.x6 >> 2) < 0xb) {
204 /*read*/
205 dir = IOREQ_READ;
206 }
207 } else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) {
208 /* Integer Load + Reg update */
209 inst_type = SL_INTEGER;
210 dir = IOREQ_READ;
211 size = (inst.M2.x6 & 0x3);
212 temp = vcpu_get_gr(vcpu, inst.M2.r3);
213 update_reg = vcpu_get_gr(vcpu, inst.M2.r2);
214 temp += update_reg;
215 vcpu_set_gr(vcpu, inst.M2.r3, temp, 0);
216 } else if (inst.M3.major == 5) {
217 /*Integer Load/Store + Imm update*/
218 inst_type = SL_INTEGER;
219 size = (inst.M3.x6&0x3);
220 if ((inst.M5.x6 >> 2) > 0xb) {
221 /*write*/
222 dir = IOREQ_WRITE;
223 data = vcpu_get_gr(vcpu, inst.M5.r2);
224 temp = vcpu_get_gr(vcpu, inst.M5.r3);
225 imm = (inst.M5.s << 31) | (inst.M5.i << 30) |
226 (inst.M5.imm7 << 23);
227 temp += imm >> 23;
228 vcpu_set_gr(vcpu, inst.M5.r3, temp, 0);
229
230 } else if ((inst.M3.x6 >> 2) < 0xb) {
231 /*read*/
232 dir = IOREQ_READ;
233 temp = vcpu_get_gr(vcpu, inst.M3.r3);
234 imm = (inst.M3.s << 31) | (inst.M3.i << 30) |
235 (inst.M3.imm7 << 23);
236 temp += imm >> 23;
237 vcpu_set_gr(vcpu, inst.M3.r3, temp, 0);
238
239 }
240 } else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B
241 && inst.M9.m == 0 && inst.M9.x == 0) {
242 /* Floating-point spill*/
243 struct ia64_fpreg v;
244
245 inst_type = SL_FLOATING;
246 dir = IOREQ_WRITE;
247 vcpu_get_fpreg(vcpu, inst.M9.f2, &v);
248 /* Write high word. FIXME: this is a kludge! */
249 v.u.bits[1] &= 0x3ffff;
250 mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], 8,
251 ma, IOREQ_WRITE);
252 data = v.u.bits[0];
253 size = 3;
254 } else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) {
255 /* Floating-point spill + Imm update */
256 struct ia64_fpreg v;
257
258 inst_type = SL_FLOATING;
259 dir = IOREQ_WRITE;
260 vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
261 temp = vcpu_get_gr(vcpu, inst.M10.r3);
262 imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
263 (inst.M10.imm7 << 23);
264 temp += imm >> 23;
265 vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
266
267 /* Write high word.FIXME: this is a kludge! */
268 v.u.bits[1] &= 0x3ffff;
269 mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1],
270 8, ma, IOREQ_WRITE);
271 data = v.u.bits[0];
272 size = 3;
273 } else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) {
274 /* Floating-point stf8 + Imm update */
275 struct ia64_fpreg v;
276 inst_type = SL_FLOATING;
277 dir = IOREQ_WRITE;
278 size = 3;
279 vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
280 data = v.u.bits[0]; /* Significand. */
281 temp = vcpu_get_gr(vcpu, inst.M10.r3);
282 imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
283 (inst.M10.imm7 << 23);
284 temp += imm >> 23;
285 vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
286 } else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c
287 && inst.M15.x6 <= 0x2f) {
288 temp = vcpu_get_gr(vcpu, inst.M15.r3);
289 imm = (inst.M15.s << 31) | (inst.M15.i << 30) |
290 (inst.M15.imm7 << 23);
291 temp += imm >> 23;
292 vcpu_set_gr(vcpu, inst.M15.r3, temp, 0);
293
294 vcpu_increment_iip(vcpu);
295 return;
296 } else if (inst.M12.major == 6 && inst.M12.m == 1
297 && inst.M12.x == 1 && inst.M12.x6 == 1) {
298 /* Floating-point Load Pair + Imm ldfp8 M12*/
299 struct ia64_fpreg v;
300
301 inst_type = SL_FLOATING;
302 dir = IOREQ_READ;
303 size = 8; /*ldfd*/
304 mmio_access(vcpu, padr, &data, size, ma, dir);
305 v.u.bits[0] = data;
306 v.u.bits[1] = 0x1003E;
307 vcpu_set_fpreg(vcpu, inst.M12.f1, &v);
308 padr += 8;
309 mmio_access(vcpu, padr, &data, size, ma, dir);
310 v.u.bits[0] = data;
311 v.u.bits[1] = 0x1003E;
312 vcpu_set_fpreg(vcpu, inst.M12.f2, &v);
313 padr += 8;
314 vcpu_set_gr(vcpu, inst.M12.r3, padr, 0);
315 vcpu_increment_iip(vcpu);
316 return;
317 } else {
318 inst_type = -1;
319 panic_vm(vcpu, "Unsupported MMIO access instruction! "
320 "Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
321 bundle.i64[0], bundle.i64[1]);
322 }
323
324 size = 1 << size;
325 if (dir == IOREQ_WRITE) {
326 mmio_access(vcpu, padr, &data, size, ma, dir);
327 } else {
328 mmio_access(vcpu, padr, &data, size, ma, dir);
329 if (inst_type == SL_INTEGER)
330 vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
331 else
332 panic_vm(vcpu, "Unsupported instruction type!\n");
333
334 }
335 vcpu_increment_iip(vcpu);
336}
diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S
deleted file mode 100644
index f793be3effff..000000000000
--- a/arch/ia64/kvm/optvfault.S
+++ /dev/null
@@ -1,1090 +0,0 @@
1/*
2 * arch/ia64/kvm/optvfault.S
3 * optimize virtualization fault handler
4 *
5 * Copyright (C) 2006 Intel Co
6 * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
7 * Copyright (C) 2008 Intel Co
8 * Add the support for Tukwila processors.
9 * Xiantao Zhang <xiantao.zhang@intel.com>
10 */
11
12#include <asm/asmmacro.h>
13#include <asm/processor.h>
14#include <asm/kvm_host.h>
15
16#include "vti.h"
17#include "asm-offsets.h"
18
19#define ACCE_MOV_FROM_AR
20#define ACCE_MOV_FROM_RR
21#define ACCE_MOV_TO_RR
22#define ACCE_RSM
23#define ACCE_SSM
24#define ACCE_MOV_TO_PSR
25#define ACCE_THASH
26
27#define VMX_VPS_SYNC_READ \
28 add r16=VMM_VPD_BASE_OFFSET,r21; \
29 mov r17 = b0; \
30 mov r18 = r24; \
31 mov r19 = r25; \
32 mov r20 = r31; \
33 ;; \
34{.mii; \
35 ld8 r16 = [r16]; \
36 nop 0x0; \
37 mov r24 = ip; \
38 ;; \
39}; \
40{.mmb; \
41 add r24=0x20, r24; \
42 mov r25 =r16; \
43 br.sptk.many kvm_vps_sync_read; \
44}; \
45 mov b0 = r17; \
46 mov r24 = r18; \
47 mov r25 = r19; \
48 mov r31 = r20
49
50ENTRY(kvm_vps_entry)
51 adds r29 = VMM_VCPU_VSA_BASE_OFFSET,r21
52 ;;
53 ld8 r29 = [r29]
54 ;;
55 add r29 = r29, r30
56 ;;
57 mov b0 = r29
58 br.sptk.many b0
59END(kvm_vps_entry)
60
61/*
62 * Inputs:
63 * r24 : return address
64 * r25 : vpd
65 * r29 : scratch
66 *
67 */
68GLOBAL_ENTRY(kvm_vps_sync_read)
69 movl r30 = PAL_VPS_SYNC_READ
70 ;;
71 br.sptk.many kvm_vps_entry
72END(kvm_vps_sync_read)
73
74/*
75 * Inputs:
76 * r24 : return address
77 * r25 : vpd
78 * r29 : scratch
79 *
80 */
81GLOBAL_ENTRY(kvm_vps_sync_write)
82 movl r30 = PAL_VPS_SYNC_WRITE
83 ;;
84 br.sptk.many kvm_vps_entry
85END(kvm_vps_sync_write)
86
87/*
88 * Inputs:
89 * r23 : pr
90 * r24 : guest b0
91 * r25 : vpd
92 *
93 */
94GLOBAL_ENTRY(kvm_vps_resume_normal)
95 movl r30 = PAL_VPS_RESUME_NORMAL
96 ;;
97 mov pr=r23,-2
98 br.sptk.many kvm_vps_entry
99END(kvm_vps_resume_normal)
100
101/*
102 * Inputs:
103 * r23 : pr
104 * r24 : guest b0
105 * r25 : vpd
106 * r17 : isr
107 */
108GLOBAL_ENTRY(kvm_vps_resume_handler)
109 movl r30 = PAL_VPS_RESUME_HANDLER
110 ;;
111 ld8 r26=[r25]
112 shr r17=r17,IA64_ISR_IR_BIT
113 ;;
114 dep r26=r17,r26,63,1 // bit 63 of r26 indicate whether enable CFLE
115 mov pr=r23,-2
116 br.sptk.many kvm_vps_entry
117END(kvm_vps_resume_handler)
118
119//mov r1=ar3
120GLOBAL_ENTRY(kvm_asm_mov_from_ar)
121#ifndef ACCE_MOV_FROM_AR
122 br.many kvm_virtualization_fault_back
123#endif
124 add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
125 add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
126 extr.u r17=r25,6,7
127 ;;
128 ld8 r18=[r18]
129 mov r19=ar.itc
130 mov r24=b0
131 ;;
132 add r19=r19,r18
133 addl r20=@gprel(asm_mov_to_reg),gp
134 ;;
135 st8 [r16] = r19
136 adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
137 shladd r17=r17,4,r20
138 ;;
139 mov b0=r17
140 br.sptk.few b0
141 ;;
142END(kvm_asm_mov_from_ar)
143
144/*
145 * Special SGI SN2 optimized version of mov_from_ar using the SN2 RTC
146 * clock as it's source for emulating the ITC. This version will be
147 * copied on top of the original version if the host is determined to
148 * be an SN2.
149 */
150GLOBAL_ENTRY(kvm_asm_mov_from_ar_sn2)
151 add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
152 movl r19 = (KVM_VMM_BASE+(1<<KVM_VMM_SHIFT))
153
154 add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
155 extr.u r17=r25,6,7
156 mov r24=b0
157 ;;
158 ld8 r18=[r18]
159 ld8 r19=[r19]
160 addl r20=@gprel(asm_mov_to_reg),gp
161 ;;
162 add r19=r19,r18
163 shladd r17=r17,4,r20
164 ;;
165 adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
166 st8 [r16] = r19
167 mov b0=r17
168 br.sptk.few b0
169 ;;
170END(kvm_asm_mov_from_ar_sn2)
171
172
173
174// mov r1=rr[r3]
175GLOBAL_ENTRY(kvm_asm_mov_from_rr)
176#ifndef ACCE_MOV_FROM_RR
177 br.many kvm_virtualization_fault_back
178#endif
179 extr.u r16=r25,20,7
180 extr.u r17=r25,6,7
181 addl r20=@gprel(asm_mov_from_reg),gp
182 ;;
183 adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20
184 shladd r16=r16,4,r20
185 mov r24=b0
186 ;;
187 add r27=VMM_VCPU_VRR0_OFFSET,r21
188 mov b0=r16
189 br.many b0
190 ;;
191kvm_asm_mov_from_rr_back_1:
192 adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
193 adds r22=asm_mov_to_reg-asm_mov_from_reg,r20
194 shr.u r26=r19,61
195 ;;
196 shladd r17=r17,4,r22
197 shladd r27=r26,3,r27
198 ;;
199 ld8 r19=[r27]
200 mov b0=r17
201 br.many b0
202END(kvm_asm_mov_from_rr)
203
204
205// mov rr[r3]=r2
206GLOBAL_ENTRY(kvm_asm_mov_to_rr)
207#ifndef ACCE_MOV_TO_RR
208 br.many kvm_virtualization_fault_back
209#endif
210 extr.u r16=r25,20,7
211 extr.u r17=r25,13,7
212 addl r20=@gprel(asm_mov_from_reg),gp
213 ;;
214 adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20
215 shladd r16=r16,4,r20
216 mov r22=b0
217 ;;
218 add r27=VMM_VCPU_VRR0_OFFSET,r21
219 mov b0=r16
220 br.many b0
221 ;;
222kvm_asm_mov_to_rr_back_1:
223 adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20
224 shr.u r23=r19,61
225 shladd r17=r17,4,r20
226 ;;
227 //if rr6, go back
228 cmp.eq p6,p0=6,r23
229 mov b0=r22
230 (p6) br.cond.dpnt.many kvm_virtualization_fault_back
231 ;;
232 mov r28=r19
233 mov b0=r17
234 br.many b0
235kvm_asm_mov_to_rr_back_2:
236 adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
237 shladd r27=r23,3,r27
238 ;; // vrr.rid<<4 |0xe
239 st8 [r27]=r19
240 mov b0=r30
241 ;;
242 extr.u r16=r19,8,26
243 extr.u r18 =r19,2,6
244 mov r17 =0xe
245 ;;
246 shladd r16 = r16, 4, r17
247 extr.u r19 =r19,0,8
248 ;;
249 shl r16 = r16,8
250 ;;
251 add r19 = r19, r16
252 ;; //set ve 1
253 dep r19=-1,r19,0,1
254 cmp.lt p6,p0=14,r18
255 ;;
256 (p6) mov r18=14
257 ;;
258 (p6) dep r19=r18,r19,2,6
259 ;;
260 cmp.eq p6,p0=0,r23
261 ;;
262 cmp.eq.or p6,p0=4,r23
263 ;;
264 adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21
265 (p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
266 ;;
267 ld4 r16=[r16]
268 cmp.eq p7,p0=r0,r0
269 (p6) shladd r17=r23,1,r17
270 ;;
271 (p6) st8 [r17]=r19
272 (p6) tbit.nz p6,p7=r16,0
273 ;;
274 (p7) mov rr[r28]=r19
275 mov r24=r22
276 br.many b0
277END(kvm_asm_mov_to_rr)
278
279
280//rsm
281GLOBAL_ENTRY(kvm_asm_rsm)
282#ifndef ACCE_RSM
283 br.many kvm_virtualization_fault_back
284#endif
285 VMX_VPS_SYNC_READ
286 ;;
287 extr.u r26=r25,6,21
288 extr.u r27=r25,31,2
289 ;;
290 extr.u r28=r25,36,1
291 dep r26=r27,r26,21,2
292 ;;
293 add r17=VPD_VPSR_START_OFFSET,r16
294 add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
295 //r26 is imm24
296 dep r26=r28,r26,23,1
297 ;;
298 ld8 r18=[r17]
299 movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI
300 ld4 r23=[r22]
301 sub r27=-1,r26
302 mov r24=b0
303 ;;
304 mov r20=cr.ipsr
305 or r28=r27,r28
306 and r19=r18,r27
307 ;;
308 st8 [r17]=r19
309 and r20=r20,r28
310 /* Comment it out due to short of fp lazy alorgithm support
311 adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
312 ;;
313 ld8 r27=[r27]
314 ;;
315 tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT
316 ;;
317 (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
318 */
319 ;;
320 mov cr.ipsr=r20
321 tbit.nz p6,p0=r23,0
322 ;;
323 tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT
324 (p6) br.dptk kvm_resume_to_guest_with_sync
325 ;;
326 add r26=VMM_VCPU_META_RR0_OFFSET,r21
327 add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
328 dep r23=-1,r23,0,1
329 ;;
330 ld8 r26=[r26]
331 ld8 r27=[r27]
332 st4 [r22]=r23
333 dep.z r28=4,61,3
334 ;;
335 mov rr[r0]=r26
336 ;;
337 mov rr[r28]=r27
338 ;;
339 srlz.d
340 br.many kvm_resume_to_guest_with_sync
341END(kvm_asm_rsm)
342
343
344//ssm
345GLOBAL_ENTRY(kvm_asm_ssm)
346#ifndef ACCE_SSM
347 br.many kvm_virtualization_fault_back
348#endif
349 VMX_VPS_SYNC_READ
350 ;;
351 extr.u r26=r25,6,21
352 extr.u r27=r25,31,2
353 ;;
354 extr.u r28=r25,36,1
355 dep r26=r27,r26,21,2
356 ;; //r26 is imm24
357 add r27=VPD_VPSR_START_OFFSET,r16
358 dep r26=r28,r26,23,1
359 ;; //r19 vpsr
360 ld8 r29=[r27]
361 mov r24=b0
362 ;;
363 add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
364 mov r20=cr.ipsr
365 or r19=r29,r26
366 ;;
367 ld4 r23=[r22]
368 st8 [r27]=r19
369 or r20=r20,r26
370 ;;
371 mov cr.ipsr=r20
372 movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
373 ;;
374 and r19=r28,r19
375 tbit.z p6,p0=r23,0
376 ;;
377 cmp.ne.or p6,p0=r28,r19
378 (p6) br.dptk kvm_asm_ssm_1
379 ;;
380 add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
381 add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
382 dep r23=0,r23,0,1
383 ;;
384 ld8 r26=[r26]
385 ld8 r27=[r27]
386 st4 [r22]=r23
387 dep.z r28=4,61,3
388 ;;
389 mov rr[r0]=r26
390 ;;
391 mov rr[r28]=r27
392 ;;
393 srlz.d
394 ;;
395kvm_asm_ssm_1:
396 tbit.nz p6,p0=r29,IA64_PSR_I_BIT
397 ;;
398 tbit.z.or p6,p0=r19,IA64_PSR_I_BIT
399 (p6) br.dptk kvm_resume_to_guest_with_sync
400 ;;
401 add r29=VPD_VTPR_START_OFFSET,r16
402 add r30=VPD_VHPI_START_OFFSET,r16
403 ;;
404 ld8 r29=[r29]
405 ld8 r30=[r30]
406 ;;
407 extr.u r17=r29,4,4
408 extr.u r18=r29,16,1
409 ;;
410 dep r17=r18,r17,4,1
411 ;;
412 cmp.gt p6,p0=r30,r17
413 (p6) br.dpnt.few kvm_asm_dispatch_vexirq
414 br.many kvm_resume_to_guest_with_sync
415END(kvm_asm_ssm)
416
417
418//mov psr.l=r2
419GLOBAL_ENTRY(kvm_asm_mov_to_psr)
420#ifndef ACCE_MOV_TO_PSR
421 br.many kvm_virtualization_fault_back
422#endif
423 VMX_VPS_SYNC_READ
424 ;;
425 extr.u r26=r25,13,7 //r2
426 addl r20=@gprel(asm_mov_from_reg),gp
427 ;;
428 adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20
429 shladd r26=r26,4,r20
430 mov r24=b0
431 ;;
432 add r27=VPD_VPSR_START_OFFSET,r16
433 mov b0=r26
434 br.many b0
435 ;;
436kvm_asm_mov_to_psr_back:
437 ld8 r17=[r27]
438 add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
439 dep r19=0,r19,32,32
440 ;;
441 ld4 r23=[r22]
442 dep r18=0,r17,0,32
443 ;;
444 add r30=r18,r19
445 movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
446 ;;
447 st8 [r27]=r30
448 and r27=r28,r30
449 and r29=r28,r17
450 ;;
451 cmp.eq p5,p0=r29,r27
452 cmp.eq p6,p7=r28,r27
453 (p5) br.many kvm_asm_mov_to_psr_1
454 ;;
455 //virtual to physical
456 (p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21
457 (p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
458 (p7) dep r23=-1,r23,0,1
459 ;;
460 //physical to virtual
461 (p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
462 (p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
463 (p6) dep r23=0,r23,0,1
464 ;;
465 ld8 r26=[r26]
466 ld8 r27=[r27]
467 st4 [r22]=r23
468 dep.z r28=4,61,3
469 ;;
470 mov rr[r0]=r26
471 ;;
472 mov rr[r28]=r27
473 ;;
474 srlz.d
475 ;;
476kvm_asm_mov_to_psr_1:
477 mov r20=cr.ipsr
478 movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT
479 ;;
480 or r19=r19,r28
481 dep r20=0,r20,0,32
482 ;;
483 add r20=r19,r20
484 mov b0=r24
485 ;;
486 /* Comment it out due to short of fp lazy algorithm support
487 adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
488 ;;
489 ld8 r27=[r27]
490 ;;
491 tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT
492 ;;
493 (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
494 ;;
495 */
496 mov cr.ipsr=r20
497 cmp.ne p6,p0=r0,r0
498 ;;
499 tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT
500 tbit.z.or p6,p0=r30,IA64_PSR_I_BIT
501 (p6) br.dpnt.few kvm_resume_to_guest_with_sync
502 ;;
503 add r29=VPD_VTPR_START_OFFSET,r16
504 add r30=VPD_VHPI_START_OFFSET,r16
505 ;;
506 ld8 r29=[r29]
507 ld8 r30=[r30]
508 ;;
509 extr.u r17=r29,4,4
510 extr.u r18=r29,16,1
511 ;;
512 dep r17=r18,r17,4,1
513 ;;
514 cmp.gt p6,p0=r30,r17
515 (p6) br.dpnt.few kvm_asm_dispatch_vexirq
516 br.many kvm_resume_to_guest_with_sync
517END(kvm_asm_mov_to_psr)
518
519
520ENTRY(kvm_asm_dispatch_vexirq)
521//increment iip
522 mov r17 = b0
523 mov r18 = r31
524{.mii
525 add r25=VMM_VPD_BASE_OFFSET,r21
526 nop 0x0
527 mov r24 = ip
528 ;;
529}
530{.mmb
531 add r24 = 0x20, r24
532 ld8 r25 = [r25]
533 br.sptk.many kvm_vps_sync_write
534}
535 mov b0 =r17
536 mov r16=cr.ipsr
537 mov r31 = r18
538 mov r19 = 37
539 ;;
540 extr.u r17=r16,IA64_PSR_RI_BIT,2
541 tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
542 ;;
543 (p6) mov r18=cr.iip
544 (p6) mov r17=r0
545 (p7) add r17=1,r17
546 ;;
547 (p6) add r18=0x10,r18
548 dep r16=r17,r16,IA64_PSR_RI_BIT,2
549 ;;
550 (p6) mov cr.iip=r18
551 mov cr.ipsr=r16
552 mov r30 =1
553 br.many kvm_dispatch_vexirq
554END(kvm_asm_dispatch_vexirq)
555
556// thash
557// TODO: add support when pta.vf = 1
558GLOBAL_ENTRY(kvm_asm_thash)
559#ifndef ACCE_THASH
560 br.many kvm_virtualization_fault_back
561#endif
562 extr.u r17=r25,20,7 // get r3 from opcode in r25
563 extr.u r18=r25,6,7 // get r1 from opcode in r25
564 addl r20=@gprel(asm_mov_from_reg),gp
565 ;;
566 adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20
567 shladd r17=r17,4,r20 // get addr of MOVE_FROM_REG(r17)
568 adds r16=VMM_VPD_BASE_OFFSET,r21 // get vcpu.arch.priveregs
569 ;;
570 mov r24=b0
571 ;;
572 ld8 r16=[r16] // get VPD addr
573 mov b0=r17
574 br.many b0 // r19 return value
575 ;;
576kvm_asm_thash_back1:
577 shr.u r23=r19,61 // get RR number
578 adds r28=VMM_VCPU_VRR0_OFFSET,r21 // get vcpu->arch.vrr[0]'s addr
579 adds r16=VMM_VPD_VPTA_OFFSET,r16 // get vpta
580 ;;
581 shladd r27=r23,3,r28 // get vcpu->arch.vrr[r23]'s addr
582 ld8 r17=[r16] // get PTA
583 mov r26=1
584 ;;
585 extr.u r29=r17,2,6 // get pta.size
586 ld8 r28=[r27] // get vcpu->arch.vrr[r23]'s value
587 ;;
588 mov b0=r24
589 //Fallback to C if pta.vf is set
590 tbit.nz p6,p0=r17, 8
591 ;;
592 (p6) mov r24=EVENT_THASH
593 (p6) br.cond.dpnt.many kvm_virtualization_fault_back
594 extr.u r28=r28,2,6 // get rr.ps
595 shl r22=r26,r29 // 1UL << pta.size
596 ;;
597 shr.u r23=r19,r28 // vaddr >> rr.ps
598 adds r26=3,r29 // pta.size + 3
599 shl r27=r17,3 // pta << 3
600 ;;
601 shl r23=r23,3 // (vaddr >> rr.ps) << 3
602 shr.u r27=r27,r26 // (pta << 3) >> (pta.size+3)
603 movl r16=7<<61
604 ;;
605 adds r22=-1,r22 // (1UL << pta.size) - 1
606 shl r27=r27,r29 // ((pta<<3)>>(pta.size+3))<<pta.size
607 and r19=r19,r16 // vaddr & VRN_MASK
608 ;;
609 and r22=r22,r23 // vhpt_offset
610 or r19=r19,r27 // (vadr&VRN_MASK)|(((pta<<3)>>(pta.size + 3))<<pta.size)
611 adds r26=asm_mov_to_reg-asm_mov_from_reg,r20
612 ;;
613 or r19=r19,r22 // calc pval
614 shladd r17=r18,4,r26
615 adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
616 ;;
617 mov b0=r17
618 br.many b0
619END(kvm_asm_thash)
620
621#define MOV_TO_REG0 \
622{; \
623 nop.b 0x0; \
624 nop.b 0x0; \
625 nop.b 0x0; \
626 ;; \
627};
628
629
630#define MOV_TO_REG(n) \
631{; \
632 mov r##n##=r19; \
633 mov b0=r30; \
634 br.sptk.many b0; \
635 ;; \
636};
637
638
639#define MOV_FROM_REG(n) \
640{; \
641 mov r19=r##n##; \
642 mov b0=r30; \
643 br.sptk.many b0; \
644 ;; \
645};
646
647
648#define MOV_TO_BANK0_REG(n) \
649ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##); \
650{; \
651 mov r26=r2; \
652 mov r2=r19; \
653 bsw.1; \
654 ;; \
655}; \
656{; \
657 mov r##n##=r2; \
658 nop.b 0x0; \
659 bsw.0; \
660 ;; \
661}; \
662{; \
663 mov r2=r26; \
664 mov b0=r30; \
665 br.sptk.many b0; \
666 ;; \
667}; \
668END(asm_mov_to_bank0_reg##n##)
669
670
671#define MOV_FROM_BANK0_REG(n) \
672ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##); \
673{; \
674 mov r26=r2; \
675 nop.b 0x0; \
676 bsw.1; \
677 ;; \
678}; \
679{; \
680 mov r2=r##n##; \
681 nop.b 0x0; \
682 bsw.0; \
683 ;; \
684}; \
685{; \
686 mov r19=r2; \
687 mov r2=r26; \
688 mov b0=r30; \
689}; \
690{; \
691 nop.b 0x0; \
692 nop.b 0x0; \
693 br.sptk.many b0; \
694 ;; \
695}; \
696END(asm_mov_from_bank0_reg##n##)
697
698
699#define JMP_TO_MOV_TO_BANK0_REG(n) \
700{; \
701 nop.b 0x0; \
702 nop.b 0x0; \
703 br.sptk.many asm_mov_to_bank0_reg##n##; \
704 ;; \
705}
706
707
708#define JMP_TO_MOV_FROM_BANK0_REG(n) \
709{; \
710 nop.b 0x0; \
711 nop.b 0x0; \
712 br.sptk.many asm_mov_from_bank0_reg##n##; \
713 ;; \
714}
715
716
717MOV_FROM_BANK0_REG(16)
718MOV_FROM_BANK0_REG(17)
719MOV_FROM_BANK0_REG(18)
720MOV_FROM_BANK0_REG(19)
721MOV_FROM_BANK0_REG(20)
722MOV_FROM_BANK0_REG(21)
723MOV_FROM_BANK0_REG(22)
724MOV_FROM_BANK0_REG(23)
725MOV_FROM_BANK0_REG(24)
726MOV_FROM_BANK0_REG(25)
727MOV_FROM_BANK0_REG(26)
728MOV_FROM_BANK0_REG(27)
729MOV_FROM_BANK0_REG(28)
730MOV_FROM_BANK0_REG(29)
731MOV_FROM_BANK0_REG(30)
732MOV_FROM_BANK0_REG(31)
733
734
735// mov from reg table
736ENTRY(asm_mov_from_reg)
737 MOV_FROM_REG(0)
738 MOV_FROM_REG(1)
739 MOV_FROM_REG(2)
740 MOV_FROM_REG(3)
741 MOV_FROM_REG(4)
742 MOV_FROM_REG(5)
743 MOV_FROM_REG(6)
744 MOV_FROM_REG(7)
745 MOV_FROM_REG(8)
746 MOV_FROM_REG(9)
747 MOV_FROM_REG(10)
748 MOV_FROM_REG(11)
749 MOV_FROM_REG(12)
750 MOV_FROM_REG(13)
751 MOV_FROM_REG(14)
752 MOV_FROM_REG(15)
753 JMP_TO_MOV_FROM_BANK0_REG(16)
754 JMP_TO_MOV_FROM_BANK0_REG(17)
755 JMP_TO_MOV_FROM_BANK0_REG(18)
756 JMP_TO_MOV_FROM_BANK0_REG(19)
757 JMP_TO_MOV_FROM_BANK0_REG(20)
758 JMP_TO_MOV_FROM_BANK0_REG(21)
759 JMP_TO_MOV_FROM_BANK0_REG(22)
760 JMP_TO_MOV_FROM_BANK0_REG(23)
761 JMP_TO_MOV_FROM_BANK0_REG(24)
762 JMP_TO_MOV_FROM_BANK0_REG(25)
763 JMP_TO_MOV_FROM_BANK0_REG(26)
764 JMP_TO_MOV_FROM_BANK0_REG(27)
765 JMP_TO_MOV_FROM_BANK0_REG(28)
766 JMP_TO_MOV_FROM_BANK0_REG(29)
767 JMP_TO_MOV_FROM_BANK0_REG(30)
768 JMP_TO_MOV_FROM_BANK0_REG(31)
769 MOV_FROM_REG(32)
770 MOV_FROM_REG(33)
771 MOV_FROM_REG(34)
772 MOV_FROM_REG(35)
773 MOV_FROM_REG(36)
774 MOV_FROM_REG(37)
775 MOV_FROM_REG(38)
776 MOV_FROM_REG(39)
777 MOV_FROM_REG(40)
778 MOV_FROM_REG(41)
779 MOV_FROM_REG(42)
780 MOV_FROM_REG(43)
781 MOV_FROM_REG(44)
782 MOV_FROM_REG(45)
783 MOV_FROM_REG(46)
784 MOV_FROM_REG(47)
785 MOV_FROM_REG(48)
786 MOV_FROM_REG(49)
787 MOV_FROM_REG(50)
788 MOV_FROM_REG(51)
789 MOV_FROM_REG(52)
790 MOV_FROM_REG(53)
791 MOV_FROM_REG(54)
792 MOV_FROM_REG(55)
793 MOV_FROM_REG(56)
794 MOV_FROM_REG(57)
795 MOV_FROM_REG(58)
796 MOV_FROM_REG(59)
797 MOV_FROM_REG(60)
798 MOV_FROM_REG(61)
799 MOV_FROM_REG(62)
800 MOV_FROM_REG(63)
801 MOV_FROM_REG(64)
802 MOV_FROM_REG(65)
803 MOV_FROM_REG(66)
804 MOV_FROM_REG(67)
805 MOV_FROM_REG(68)
806 MOV_FROM_REG(69)
807 MOV_FROM_REG(70)
808 MOV_FROM_REG(71)
809 MOV_FROM_REG(72)
810 MOV_FROM_REG(73)
811 MOV_FROM_REG(74)
812 MOV_FROM_REG(75)
813 MOV_FROM_REG(76)
814 MOV_FROM_REG(77)
815 MOV_FROM_REG(78)
816 MOV_FROM_REG(79)
817 MOV_FROM_REG(80)
818 MOV_FROM_REG(81)
819 MOV_FROM_REG(82)
820 MOV_FROM_REG(83)
821 MOV_FROM_REG(84)
822 MOV_FROM_REG(85)
823 MOV_FROM_REG(86)
824 MOV_FROM_REG(87)
825 MOV_FROM_REG(88)
826 MOV_FROM_REG(89)
827 MOV_FROM_REG(90)
828 MOV_FROM_REG(91)
829 MOV_FROM_REG(92)
830 MOV_FROM_REG(93)
831 MOV_FROM_REG(94)
832 MOV_FROM_REG(95)
833 MOV_FROM_REG(96)
834 MOV_FROM_REG(97)
835 MOV_FROM_REG(98)
836 MOV_FROM_REG(99)
837 MOV_FROM_REG(100)
838 MOV_FROM_REG(101)
839 MOV_FROM_REG(102)
840 MOV_FROM_REG(103)
841 MOV_FROM_REG(104)
842 MOV_FROM_REG(105)
843 MOV_FROM_REG(106)
844 MOV_FROM_REG(107)
845 MOV_FROM_REG(108)
846 MOV_FROM_REG(109)
847 MOV_FROM_REG(110)
848 MOV_FROM_REG(111)
849 MOV_FROM_REG(112)
850 MOV_FROM_REG(113)
851 MOV_FROM_REG(114)
852 MOV_FROM_REG(115)
853 MOV_FROM_REG(116)
854 MOV_FROM_REG(117)
855 MOV_FROM_REG(118)
856 MOV_FROM_REG(119)
857 MOV_FROM_REG(120)
858 MOV_FROM_REG(121)
859 MOV_FROM_REG(122)
860 MOV_FROM_REG(123)
861 MOV_FROM_REG(124)
862 MOV_FROM_REG(125)
863 MOV_FROM_REG(126)
864 MOV_FROM_REG(127)
865END(asm_mov_from_reg)
866
867
868/* must be in bank 0
869 * parameter:
870 * r31: pr
871 * r24: b0
872 */
873ENTRY(kvm_resume_to_guest_with_sync)
874 adds r19=VMM_VPD_BASE_OFFSET,r21
875 mov r16 = r31
876 mov r17 = r24
877 ;;
878{.mii
879 ld8 r25 =[r19]
880 nop 0x0
881 mov r24 = ip
882 ;;
883}
884{.mmb
885 add r24 =0x20, r24
886 nop 0x0
887 br.sptk.many kvm_vps_sync_write
888}
889
890 mov r31 = r16
891 mov r24 =r17
892 ;;
893 br.sptk.many kvm_resume_to_guest
894END(kvm_resume_to_guest_with_sync)
895
896ENTRY(kvm_resume_to_guest)
897 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
898 ;;
899 ld8 r1 =[r16]
900 adds r20 = VMM_VCPU_VSA_BASE_OFFSET,r21
901 ;;
902 mov r16=cr.ipsr
903 ;;
904 ld8 r20 = [r20]
905 adds r19=VMM_VPD_BASE_OFFSET,r21
906 ;;
907 ld8 r25=[r19]
908 extr.u r17=r16,IA64_PSR_RI_BIT,2
909 tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
910 ;;
911 (p6) mov r18=cr.iip
912 (p6) mov r17=r0
913 ;;
914 (p6) add r18=0x10,r18
915 (p7) add r17=1,r17
916 ;;
917 (p6) mov cr.iip=r18
918 dep r16=r17,r16,IA64_PSR_RI_BIT,2
919 ;;
920 mov cr.ipsr=r16
921 adds r19= VPD_VPSR_START_OFFSET,r25
922 add r28=PAL_VPS_RESUME_NORMAL,r20
923 add r29=PAL_VPS_RESUME_HANDLER,r20
924 ;;
925 ld8 r19=[r19]
926 mov b0=r29
927 mov r27=cr.isr
928 ;;
929 tbit.z p6,p7 = r19,IA64_PSR_IC_BIT // p7=vpsr.ic
930 shr r27=r27,IA64_ISR_IR_BIT
931 ;;
932 (p6) ld8 r26=[r25]
933 (p7) mov b0=r28
934 ;;
935 (p6) dep r26=r27,r26,63,1
936 mov pr=r31,-2
937 br.sptk.many b0 // call pal service
938 ;;
939END(kvm_resume_to_guest)
940
941
942MOV_TO_BANK0_REG(16)
943MOV_TO_BANK0_REG(17)
944MOV_TO_BANK0_REG(18)
945MOV_TO_BANK0_REG(19)
946MOV_TO_BANK0_REG(20)
947MOV_TO_BANK0_REG(21)
948MOV_TO_BANK0_REG(22)
949MOV_TO_BANK0_REG(23)
950MOV_TO_BANK0_REG(24)
951MOV_TO_BANK0_REG(25)
952MOV_TO_BANK0_REG(26)
953MOV_TO_BANK0_REG(27)
954MOV_TO_BANK0_REG(28)
955MOV_TO_BANK0_REG(29)
956MOV_TO_BANK0_REG(30)
957MOV_TO_BANK0_REG(31)
958
959
960// mov to reg table
961ENTRY(asm_mov_to_reg)
962 MOV_TO_REG0
963 MOV_TO_REG(1)
964 MOV_TO_REG(2)
965 MOV_TO_REG(3)
966 MOV_TO_REG(4)
967 MOV_TO_REG(5)
968 MOV_TO_REG(6)
969 MOV_TO_REG(7)
970 MOV_TO_REG(8)
971 MOV_TO_REG(9)
972 MOV_TO_REG(10)
973 MOV_TO_REG(11)
974 MOV_TO_REG(12)
975 MOV_TO_REG(13)
976 MOV_TO_REG(14)
977 MOV_TO_REG(15)
978 JMP_TO_MOV_TO_BANK0_REG(16)
979 JMP_TO_MOV_TO_BANK0_REG(17)
980 JMP_TO_MOV_TO_BANK0_REG(18)
981 JMP_TO_MOV_TO_BANK0_REG(19)
982 JMP_TO_MOV_TO_BANK0_REG(20)
983 JMP_TO_MOV_TO_BANK0_REG(21)
984 JMP_TO_MOV_TO_BANK0_REG(22)
985 JMP_TO_MOV_TO_BANK0_REG(23)
986 JMP_TO_MOV_TO_BANK0_REG(24)
987 JMP_TO_MOV_TO_BANK0_REG(25)
988 JMP_TO_MOV_TO_BANK0_REG(26)
989 JMP_TO_MOV_TO_BANK0_REG(27)
990 JMP_TO_MOV_TO_BANK0_REG(28)
991 JMP_TO_MOV_TO_BANK0_REG(29)
992 JMP_TO_MOV_TO_BANK0_REG(30)
993 JMP_TO_MOV_TO_BANK0_REG(31)
994 MOV_TO_REG(32)
995 MOV_TO_REG(33)
996 MOV_TO_REG(34)
997 MOV_TO_REG(35)
998 MOV_TO_REG(36)
999 MOV_TO_REG(37)
1000 MOV_TO_REG(38)
1001 MOV_TO_REG(39)
1002 MOV_TO_REG(40)
1003 MOV_TO_REG(41)
1004 MOV_TO_REG(42)
1005 MOV_TO_REG(43)
1006 MOV_TO_REG(44)
1007 MOV_TO_REG(45)
1008 MOV_TO_REG(46)
1009 MOV_TO_REG(47)
1010 MOV_TO_REG(48)
1011 MOV_TO_REG(49)
1012 MOV_TO_REG(50)
1013 MOV_TO_REG(51)
1014 MOV_TO_REG(52)
1015 MOV_TO_REG(53)
1016 MOV_TO_REG(54)
1017 MOV_TO_REG(55)
1018 MOV_TO_REG(56)
1019 MOV_TO_REG(57)
1020 MOV_TO_REG(58)
1021 MOV_TO_REG(59)
1022 MOV_TO_REG(60)
1023 MOV_TO_REG(61)
1024 MOV_TO_REG(62)
1025 MOV_TO_REG(63)
1026 MOV_TO_REG(64)
1027 MOV_TO_REG(65)
1028 MOV_TO_REG(66)
1029 MOV_TO_REG(67)
1030 MOV_TO_REG(68)
1031 MOV_TO_REG(69)
1032 MOV_TO_REG(70)
1033 MOV_TO_REG(71)
1034 MOV_TO_REG(72)
1035 MOV_TO_REG(73)
1036 MOV_TO_REG(74)
1037 MOV_TO_REG(75)
1038 MOV_TO_REG(76)
1039 MOV_TO_REG(77)
1040 MOV_TO_REG(78)
1041 MOV_TO_REG(79)
1042 MOV_TO_REG(80)
1043 MOV_TO_REG(81)
1044 MOV_TO_REG(82)
1045 MOV_TO_REG(83)
1046 MOV_TO_REG(84)
1047 MOV_TO_REG(85)
1048 MOV_TO_REG(86)
1049 MOV_TO_REG(87)
1050 MOV_TO_REG(88)
1051 MOV_TO_REG(89)
1052 MOV_TO_REG(90)
1053 MOV_TO_REG(91)
1054 MOV_TO_REG(92)
1055 MOV_TO_REG(93)
1056 MOV_TO_REG(94)
1057 MOV_TO_REG(95)
1058 MOV_TO_REG(96)
1059 MOV_TO_REG(97)
1060 MOV_TO_REG(98)
1061 MOV_TO_REG(99)
1062 MOV_TO_REG(100)
1063 MOV_TO_REG(101)
1064 MOV_TO_REG(102)
1065 MOV_TO_REG(103)
1066 MOV_TO_REG(104)
1067 MOV_TO_REG(105)
1068 MOV_TO_REG(106)
1069 MOV_TO_REG(107)
1070 MOV_TO_REG(108)
1071 MOV_TO_REG(109)
1072 MOV_TO_REG(110)
1073 MOV_TO_REG(111)
1074 MOV_TO_REG(112)
1075 MOV_TO_REG(113)
1076 MOV_TO_REG(114)
1077 MOV_TO_REG(115)
1078 MOV_TO_REG(116)
1079 MOV_TO_REG(117)
1080 MOV_TO_REG(118)
1081 MOV_TO_REG(119)
1082 MOV_TO_REG(120)
1083 MOV_TO_REG(121)
1084 MOV_TO_REG(122)
1085 MOV_TO_REG(123)
1086 MOV_TO_REG(124)
1087 MOV_TO_REG(125)
1088 MOV_TO_REG(126)
1089 MOV_TO_REG(127)
1090END(asm_mov_to_reg)
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
deleted file mode 100644
index b0398740b48d..000000000000
--- a/arch/ia64/kvm/process.c
+++ /dev/null
@@ -1,1024 +0,0 @@
1/*
2 * process.c: handle interruption inject for guests.
3 * Copyright (c) 2005, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 * Shaofan Li (Susue Li) <susie.li@intel.com>
19 * Xiaoyan Feng (Fleming Feng) <fleming.feng@intel.com>
20 * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
21 * Xiantao Zhang (xiantao.zhang@intel.com)
22 */
23#include "vcpu.h"
24
25#include <asm/pal.h>
26#include <asm/sal.h>
27#include <asm/fpswa.h>
28#include <asm/kregs.h>
29#include <asm/tlb.h>
30
31fpswa_interface_t *vmm_fpswa_interface;
32
33#define IA64_VHPT_TRANS_VECTOR 0x0000
34#define IA64_INST_TLB_VECTOR 0x0400
35#define IA64_DATA_TLB_VECTOR 0x0800
36#define IA64_ALT_INST_TLB_VECTOR 0x0c00
37#define IA64_ALT_DATA_TLB_VECTOR 0x1000
38#define IA64_DATA_NESTED_TLB_VECTOR 0x1400
39#define IA64_INST_KEY_MISS_VECTOR 0x1800
40#define IA64_DATA_KEY_MISS_VECTOR 0x1c00
41#define IA64_DIRTY_BIT_VECTOR 0x2000
42#define IA64_INST_ACCESS_BIT_VECTOR 0x2400
43#define IA64_DATA_ACCESS_BIT_VECTOR 0x2800
44#define IA64_BREAK_VECTOR 0x2c00
45#define IA64_EXTINT_VECTOR 0x3000
46#define IA64_PAGE_NOT_PRESENT_VECTOR 0x5000
47#define IA64_KEY_PERMISSION_VECTOR 0x5100
48#define IA64_INST_ACCESS_RIGHTS_VECTOR 0x5200
49#define IA64_DATA_ACCESS_RIGHTS_VECTOR 0x5300
50#define IA64_GENEX_VECTOR 0x5400
51#define IA64_DISABLED_FPREG_VECTOR 0x5500
52#define IA64_NAT_CONSUMPTION_VECTOR 0x5600
53#define IA64_SPECULATION_VECTOR 0x5700 /* UNUSED */
54#define IA64_DEBUG_VECTOR 0x5900
55#define IA64_UNALIGNED_REF_VECTOR 0x5a00
56#define IA64_UNSUPPORTED_DATA_REF_VECTOR 0x5b00
57#define IA64_FP_FAULT_VECTOR 0x5c00
58#define IA64_FP_TRAP_VECTOR 0x5d00
59#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR 0x5e00
60#define IA64_TAKEN_BRANCH_TRAP_VECTOR 0x5f00
61#define IA64_SINGLE_STEP_TRAP_VECTOR 0x6000
62
63/* SDM vol2 5.5 - IVA based interruption handling */
64#define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\
65 IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT | \
66 IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT)
67
68#define DOMN_PAL_REQUEST 0x110000
69#define DOMN_SAL_REQUEST 0x110001
70
71static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800,
72 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00,
73 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400,
74 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00,
75 0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600,
76 0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00,
77 0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800,
78 0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00
79};
80
81static void collect_interruption(struct kvm_vcpu *vcpu)
82{
83 u64 ipsr;
84 u64 vdcr;
85 u64 vifs;
86 unsigned long vpsr;
87 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
88
89 vpsr = vcpu_get_psr(vcpu);
90 vcpu_bsw0(vcpu);
91 if (vpsr & IA64_PSR_IC) {
92
93 /* Sync mpsr id/da/dd/ss/ed bits to vipsr
94 * since after guest do rfi, we still want these bits on in
95 * mpsr
96 */
97
98 ipsr = regs->cr_ipsr;
99 vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA
100 | IA64_PSR_DD | IA64_PSR_SS
101 | IA64_PSR_ED));
102 vcpu_set_ipsr(vcpu, vpsr);
103
104 /* Currently, for trap, we do not advance IIP to next
105 * instruction. That's because we assume caller already
106 * set up IIP correctly
107 */
108
109 vcpu_set_iip(vcpu , regs->cr_iip);
110
111 /* set vifs.v to zero */
112 vifs = VCPU(vcpu, ifs);
113 vifs &= ~IA64_IFS_V;
114 vcpu_set_ifs(vcpu, vifs);
115
116 vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa));
117 }
118
119 vdcr = VCPU(vcpu, dcr);
120
121 /* Set guest psr
122 * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged
123 * be: set to the value of dcr.be
124 * pp: set to the value of dcr.pp
125 */
126 vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION;
127 vpsr |= (vdcr & IA64_DCR_BE);
128
129 /* VDCR pp bit position is different from VPSR pp bit */
130 if (vdcr & IA64_DCR_PP) {
131 vpsr |= IA64_PSR_PP;
132 } else {
133 vpsr &= ~IA64_PSR_PP;
134 }
135
136 vcpu_set_psr(vcpu, vpsr);
137
138}
139
140void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec)
141{
142 u64 viva;
143 struct kvm_pt_regs *regs;
144 union ia64_isr pt_isr;
145
146 regs = vcpu_regs(vcpu);
147
148 /* clear cr.isr.ir (incomplete register frame)*/
149 pt_isr.val = VMX(vcpu, cr_isr);
150 pt_isr.ir = 0;
151 VMX(vcpu, cr_isr) = pt_isr.val;
152
153 collect_interruption(vcpu);
154
155 viva = vcpu_get_iva(vcpu);
156 regs->cr_iip = viva + vec;
157}
158
159static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa)
160{
161 union ia64_rr rr, rr1;
162
163 rr.val = vcpu_get_rr(vcpu, ifa);
164 rr1.val = 0;
165 rr1.ps = rr.ps;
166 rr1.rid = rr.rid;
167 return (rr1.val);
168}
169
170/*
171 * Set vIFA & vITIR & vIHA, when vPSR.ic =1
172 * Parameter:
173 * set_ifa: if true, set vIFA
174 * set_itir: if true, set vITIR
175 * set_iha: if true, set vIHA
176 */
177void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr,
178 int set_ifa, int set_itir, int set_iha)
179{
180 long vpsr;
181 u64 value;
182
183 vpsr = VCPU(vcpu, vpsr);
184 /* Vol2, Table 8-1 */
185 if (vpsr & IA64_PSR_IC) {
186 if (set_ifa)
187 vcpu_set_ifa(vcpu, vadr);
188 if (set_itir) {
189 value = vcpu_get_itir_on_fault(vcpu, vadr);
190 vcpu_set_itir(vcpu, value);
191 }
192
193 if (set_iha) {
194 value = vcpu_thash(vcpu, vadr);
195 vcpu_set_iha(vcpu, value);
196 }
197 }
198}
199
200/*
201 * Data TLB Fault
202 * @ Data TLB vector
203 * Refer to SDM Vol2 Table 5-6 & 8-1
204 */
205void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
206{
207 /* If vPSR.ic, IFA, ITIR, IHA */
208 set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
209 inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR);
210}
211
212/*
213 * Instruction TLB Fault
214 * @ Instruction TLB vector
215 * Refer to SDM Vol2 Table 5-6 & 8-1
216 */
217void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
218{
219 /* If vPSR.ic, IFA, ITIR, IHA */
220 set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
221 inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR);
222}
223
224/*
225 * Data Nested TLB Fault
226 * @ Data Nested TLB Vector
227 * Refer to SDM Vol2 Table 5-6 & 8-1
228 */
229void nested_dtlb(struct kvm_vcpu *vcpu)
230{
231 inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR);
232}
233
234/*
235 * Alternate Data TLB Fault
236 * @ Alternate Data TLB vector
237 * Refer to SDM Vol2 Table 5-6 & 8-1
238 */
239void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr)
240{
241 set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
242 inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR);
243}
244
245/*
246 * Data TLB Fault
247 * @ Data TLB vector
248 * Refer to SDM Vol2 Table 5-6 & 8-1
249 */
250void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr)
251{
252 set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
253 inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR);
254}
255
256/* Deal with:
257 * VHPT Translation Vector
258 */
259static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
260{
261 /* If vPSR.ic, IFA, ITIR, IHA*/
262 set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
263 inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR);
264}
265
266/*
267 * VHPT Instruction Fault
268 * @ VHPT Translation vector
269 * Refer to SDM Vol2 Table 5-6 & 8-1
270 */
271void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
272{
273 _vhpt_fault(vcpu, vadr);
274}
275
276/*
277 * VHPT Data Fault
278 * @ VHPT Translation vector
279 * Refer to SDM Vol2 Table 5-6 & 8-1
280 */
281void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
282{
283 _vhpt_fault(vcpu, vadr);
284}
285
286/*
287 * Deal with:
288 * General Exception vector
289 */
290void _general_exception(struct kvm_vcpu *vcpu)
291{
292 inject_guest_interruption(vcpu, IA64_GENEX_VECTOR);
293}
294
295/*
296 * Illegal Operation Fault
297 * @ General Exception Vector
298 * Refer to SDM Vol2 Table 5-6 & 8-1
299 */
300void illegal_op(struct kvm_vcpu *vcpu)
301{
302 _general_exception(vcpu);
303}
304
305/*
306 * Illegal Dependency Fault
307 * @ General Exception Vector
308 * Refer to SDM Vol2 Table 5-6 & 8-1
309 */
310void illegal_dep(struct kvm_vcpu *vcpu)
311{
312 _general_exception(vcpu);
313}
314
315/*
316 * Reserved Register/Field Fault
317 * @ General Exception Vector
318 * Refer to SDM Vol2 Table 5-6 & 8-1
319 */
320void rsv_reg_field(struct kvm_vcpu *vcpu)
321{
322 _general_exception(vcpu);
323}
324/*
325 * Privileged Operation Fault
326 * @ General Exception Vector
327 * Refer to SDM Vol2 Table 5-6 & 8-1
328 */
329
330void privilege_op(struct kvm_vcpu *vcpu)
331{
332 _general_exception(vcpu);
333}
334
335/*
336 * Unimplement Data Address Fault
337 * @ General Exception Vector
338 * Refer to SDM Vol2 Table 5-6 & 8-1
339 */
340void unimpl_daddr(struct kvm_vcpu *vcpu)
341{
342 _general_exception(vcpu);
343}
344
345/*
346 * Privileged Register Fault
347 * @ General Exception Vector
348 * Refer to SDM Vol2 Table 5-6 & 8-1
349 */
350void privilege_reg(struct kvm_vcpu *vcpu)
351{
352 _general_exception(vcpu);
353}
354
355/* Deal with
356 * Nat consumption vector
357 * Parameter:
358 * vaddr: Optional, if t == REGISTER
359 */
360static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr,
361 enum tlb_miss_type t)
362{
363 /* If vPSR.ic && t == DATA/INST, IFA */
364 if (t == DATA || t == INSTRUCTION) {
365 /* IFA */
366 set_ifa_itir_iha(vcpu, vadr, 1, 0, 0);
367 }
368
369 inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR);
370}
371
372/*
373 * Instruction Nat Page Consumption Fault
374 * @ Nat Consumption Vector
375 * Refer to SDM Vol2 Table 5-6 & 8-1
376 */
377void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
378{
379 _nat_consumption_fault(vcpu, vadr, INSTRUCTION);
380}
381
382/*
383 * Register Nat Consumption Fault
384 * @ Nat Consumption Vector
385 * Refer to SDM Vol2 Table 5-6 & 8-1
386 */
387void rnat_consumption(struct kvm_vcpu *vcpu)
388{
389 _nat_consumption_fault(vcpu, 0, REGISTER);
390}
391
392/*
393 * Data Nat Page Consumption Fault
394 * @ Nat Consumption Vector
395 * Refer to SDM Vol2 Table 5-6 & 8-1
396 */
397void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
398{
399 _nat_consumption_fault(vcpu, vadr, DATA);
400}
401
402/* Deal with
403 * Page not present vector
404 */
405static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
406{
407 /* If vPSR.ic, IFA, ITIR */
408 set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
409 inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
410}
411
412void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
413{
414 __page_not_present(vcpu, vadr);
415}
416
417void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
418{
419 __page_not_present(vcpu, vadr);
420}
421
422/* Deal with
423 * Data access rights vector
424 */
425void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr)
426{
427 /* If vPSR.ic, IFA, ITIR */
428 set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
429 inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR);
430}
431
432fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr,
433 unsigned long *fpsr, unsigned long *isr, unsigned long *pr,
434 unsigned long *ifs, struct kvm_pt_regs *regs)
435{
436 fp_state_t fp_state;
437 fpswa_ret_t ret;
438 struct kvm_vcpu *vcpu = current_vcpu;
439
440 uint64_t old_rr7 = ia64_get_rr(7UL<<61);
441
442 if (!vmm_fpswa_interface)
443 return (fpswa_ret_t) {-1, 0, 0, 0};
444
445 memset(&fp_state, 0, sizeof(fp_state_t));
446
447 /*
448 * compute fp_state. only FP registers f6 - f11 are used by the
449 * vmm, so set those bits in the mask and set the low volatile
450 * pointer to point to these registers.
451 */
452 fp_state.bitmask_low64 = 0xfc0; /* bit6..bit11 */
453
454 fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) &regs->f6;
455
456 /*
457 * unsigned long (*EFI_FPSWA) (
458 * unsigned long trap_type,
459 * void *Bundle,
460 * unsigned long *pipsr,
461 * unsigned long *pfsr,
462 * unsigned long *pisr,
463 * unsigned long *ppreds,
464 * unsigned long *pifs,
465 * void *fp_state);
466 */
467 /*Call host fpswa interface directly to virtualize
468 *guest fpswa request!
469 */
470 ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]);
471 ia64_srlz_d();
472
473 ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle,
474 ipsr, fpsr, isr, pr, ifs, &fp_state);
475 ia64_set_rr(7UL << 61, old_rr7);
476 ia64_srlz_d();
477 return ret;
478}
479
480/*
481 * Handle floating-point assist faults and traps for domain.
482 */
483unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs,
484 unsigned long isr)
485{
486 struct kvm_vcpu *v = current_vcpu;
487 IA64_BUNDLE bundle;
488 unsigned long fault_ip;
489 fpswa_ret_t ret;
490
491 fault_ip = regs->cr_iip;
492 /*
493 * When the FP trap occurs, the trapping instruction is completed.
494 * If ipsr.ri == 0, there is the trapping instruction in previous
495 * bundle.
496 */
497 if (!fp_fault && (ia64_psr(regs)->ri == 0))
498 fault_ip -= 16;
499
500 if (fetch_code(v, fault_ip, &bundle))
501 return -EAGAIN;
502
503 if (!bundle.i64[0] && !bundle.i64[1])
504 return -EACCES;
505
506 ret = vmm_fp_emulate(fp_fault, &bundle, &regs->cr_ipsr, &regs->ar_fpsr,
507 &isr, &regs->pr, &regs->cr_ifs, regs);
508 return ret.status;
509}
510
511void reflect_interruption(u64 ifa, u64 isr, u64 iim,
512 u64 vec, struct kvm_pt_regs *regs)
513{
514 u64 vector;
515 int status ;
516 struct kvm_vcpu *vcpu = current_vcpu;
517 u64 vpsr = VCPU(vcpu, vpsr);
518
519 vector = vec2off[vec];
520
521 if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
522 panic_vm(vcpu, "Interruption with vector :0x%lx occurs "
523 "with psr.ic = 0\n", vector);
524 return;
525 }
526
527 switch (vec) {
528 case 32: /*IA64_FP_FAULT_VECTOR*/
529 status = vmm_handle_fpu_swa(1, regs, isr);
530 if (!status) {
531 vcpu_increment_iip(vcpu);
532 return;
533 } else if (-EAGAIN == status)
534 return;
535 break;
536 case 33: /*IA64_FP_TRAP_VECTOR*/
537 status = vmm_handle_fpu_swa(0, regs, isr);
538 if (!status)
539 return ;
540 break;
541 }
542
543 VCPU(vcpu, isr) = isr;
544 VCPU(vcpu, iipa) = regs->cr_iip;
545 if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
546 VCPU(vcpu, iim) = iim;
547 else
548 set_ifa_itir_iha(vcpu, ifa, 1, 1, 1);
549
550 inject_guest_interruption(vcpu, vector);
551}
552
553static unsigned long kvm_trans_pal_call_args(struct kvm_vcpu *vcpu,
554 unsigned long arg)
555{
556 struct thash_data *data;
557 unsigned long gpa, poff;
558
559 if (!is_physical_mode(vcpu)) {
560 /* Depends on caller to provide the DTR or DTC mapping.*/
561 data = vtlb_lookup(vcpu, arg, D_TLB);
562 if (data)
563 gpa = data->page_flags & _PAGE_PPN_MASK;
564 else {
565 data = vhpt_lookup(arg);
566 if (!data)
567 return 0;
568 gpa = data->gpaddr & _PAGE_PPN_MASK;
569 }
570
571 poff = arg & (PSIZE(data->ps) - 1);
572 arg = PAGEALIGN(gpa, data->ps) | poff;
573 }
574 arg = kvm_gpa_to_mpa(arg << 1 >> 1);
575
576 return (unsigned long)__va(arg);
577}
578
579static void set_pal_call_data(struct kvm_vcpu *vcpu)
580{
581 struct exit_ctl_data *p = &vcpu->arch.exit_data;
582 unsigned long gr28 = vcpu_get_gr(vcpu, 28);
583 unsigned long gr29 = vcpu_get_gr(vcpu, 29);
584 unsigned long gr30 = vcpu_get_gr(vcpu, 30);
585
586 /*FIXME:For static and stacked convention, firmware
587 * has put the parameters in gr28-gr31 before
588 * break to vmm !!*/
589
590 switch (gr28) {
591 case PAL_PERF_MON_INFO:
592 case PAL_HALT_INFO:
593 p->u.pal_data.gr29 = kvm_trans_pal_call_args(vcpu, gr29);
594 p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
595 break;
596 case PAL_BRAND_INFO:
597 p->u.pal_data.gr29 = gr29;
598 p->u.pal_data.gr30 = kvm_trans_pal_call_args(vcpu, gr30);
599 break;
600 default:
601 p->u.pal_data.gr29 = gr29;
602 p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
603 }
604 p->u.pal_data.gr28 = gr28;
605 p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31);
606
607 p->exit_reason = EXIT_REASON_PAL_CALL;
608}
609
610static void get_pal_call_result(struct kvm_vcpu *vcpu)
611{
612 struct exit_ctl_data *p = &vcpu->arch.exit_data;
613
614 if (p->exit_reason == EXIT_REASON_PAL_CALL) {
615 vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0);
616 vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0);
617 vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
618 vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
619 } else
620 panic_vm(vcpu, "Mis-set for exit reason!\n");
621}
622
623static void set_sal_call_data(struct kvm_vcpu *vcpu)
624{
625 struct exit_ctl_data *p = &vcpu->arch.exit_data;
626
627 p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32);
628 p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33);
629 p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34);
630 p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35);
631 p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36);
632 p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37);
633 p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38);
634 p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39);
635 p->exit_reason = EXIT_REASON_SAL_CALL;
636}
637
638static void get_sal_call_result(struct kvm_vcpu *vcpu)
639{
640 struct exit_ctl_data *p = &vcpu->arch.exit_data;
641
642 if (p->exit_reason == EXIT_REASON_SAL_CALL) {
643 vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0);
644 vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0);
645 vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
646 vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
647 } else
648 panic_vm(vcpu, "Mis-set for exit reason!\n");
649}
650
651void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
652 unsigned long isr, unsigned long iim)
653{
654 struct kvm_vcpu *v = current_vcpu;
655 long psr;
656
657 if (ia64_psr(regs)->cpl == 0) {
658 /* Allow hypercalls only when cpl = 0. */
659 if (iim == DOMN_PAL_REQUEST) {
660 local_irq_save(psr);
661 set_pal_call_data(v);
662 vmm_transition(v);
663 get_pal_call_result(v);
664 vcpu_increment_iip(v);
665 local_irq_restore(psr);
666 return;
667 } else if (iim == DOMN_SAL_REQUEST) {
668 local_irq_save(psr);
669 set_sal_call_data(v);
670 vmm_transition(v);
671 get_sal_call_result(v);
672 vcpu_increment_iip(v);
673 local_irq_restore(psr);
674 return;
675 }
676 }
677 reflect_interruption(ifa, isr, iim, 11, regs);
678}
679
680void check_pending_irq(struct kvm_vcpu *vcpu)
681{
682 int mask, h_pending, h_inservice;
683 u64 isr;
684 unsigned long vpsr;
685 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
686
687 h_pending = highest_pending_irq(vcpu);
688 if (h_pending == NULL_VECTOR) {
689 update_vhpi(vcpu, NULL_VECTOR);
690 return;
691 }
692 h_inservice = highest_inservice_irq(vcpu);
693
694 vpsr = VCPU(vcpu, vpsr);
695 mask = irq_masked(vcpu, h_pending, h_inservice);
696 if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) {
697 isr = vpsr & IA64_PSR_RI;
698 update_vhpi(vcpu, h_pending);
699 reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
700 } else if (mask == IRQ_MASKED_BY_INSVC) {
701 if (VCPU(vcpu, vhpi))
702 update_vhpi(vcpu, NULL_VECTOR);
703 } else {
704 /* masked by vpsr.i or vtpr.*/
705 update_vhpi(vcpu, h_pending);
706 }
707}
708
709static void generate_exirq(struct kvm_vcpu *vcpu)
710{
711 unsigned vpsr;
712 uint64_t isr;
713
714 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
715
716 vpsr = VCPU(vcpu, vpsr);
717 isr = vpsr & IA64_PSR_RI;
718 if (!(vpsr & IA64_PSR_IC))
719 panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n");
720 reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
721}
722
723void vhpi_detection(struct kvm_vcpu *vcpu)
724{
725 uint64_t threshold, vhpi;
726 union ia64_tpr vtpr;
727 struct ia64_psr vpsr;
728
729 vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
730 vtpr.val = VCPU(vcpu, tpr);
731
732 threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic;
733 vhpi = VCPU(vcpu, vhpi);
734 if (vhpi > threshold) {
735 /* interrupt actived*/
736 generate_exirq(vcpu);
737 }
738}
739
740void leave_hypervisor_tail(void)
741{
742 struct kvm_vcpu *v = current_vcpu;
743
744 if (VMX(v, timer_check)) {
745 VMX(v, timer_check) = 0;
746 if (VMX(v, itc_check)) {
747 if (vcpu_get_itc(v) > VCPU(v, itm)) {
748 if (!(VCPU(v, itv) & (1 << 16))) {
749 vcpu_pend_interrupt(v, VCPU(v, itv)
750 & 0xff);
751 VMX(v, itc_check) = 0;
752 } else {
753 v->arch.timer_pending = 1;
754 }
755 VMX(v, last_itc) = VCPU(v, itm) + 1;
756 }
757 }
758 }
759
760 rmb();
761 if (v->arch.irq_new_pending) {
762 v->arch.irq_new_pending = 0;
763 VMX(v, irq_check) = 0;
764 check_pending_irq(v);
765 return;
766 }
767 if (VMX(v, irq_check)) {
768 VMX(v, irq_check) = 0;
769 vhpi_detection(v);
770 }
771}
772
773static inline void handle_lds(struct kvm_pt_regs *regs)
774{
775 regs->cr_ipsr |= IA64_PSR_ED;
776}
777
778void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type)
779{
780 unsigned long pte;
781 union ia64_rr rr;
782
783 rr.val = ia64_get_rr(vadr);
784 pte = vadr & _PAGE_PPN_MASK;
785 pte = pte | PHY_PAGE_WB;
786 thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type);
787 return;
788}
789
790void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs)
791{
792 unsigned long vpsr;
793 int type;
794
795 u64 vhpt_adr, gppa, pteval, rr, itir;
796 union ia64_isr misr;
797 union ia64_pta vpta;
798 struct thash_data *data;
799 struct kvm_vcpu *v = current_vcpu;
800
801 vpsr = VCPU(v, vpsr);
802 misr.val = VMX(v, cr_isr);
803
804 type = vec;
805
806 if (is_physical_mode(v) && (!(vadr << 1 >> 62))) {
807 if (vec == 2) {
808 if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) {
809 emulate_io_inst(v, ((vadr << 1) >> 1), 4);
810 return;
811 }
812 }
813 physical_tlb_miss(v, vadr, type);
814 return;
815 }
816 data = vtlb_lookup(v, vadr, type);
817 if (data != 0) {
818 if (type == D_TLB) {
819 gppa = (vadr & ((1UL << data->ps) - 1))
820 + (data->ppn >> (data->ps - 12) << data->ps);
821 if (__gpfn_is_io(gppa >> PAGE_SHIFT)) {
822 if (data->pl >= ((regs->cr_ipsr >>
823 IA64_PSR_CPL0_BIT) & 3))
824 emulate_io_inst(v, gppa, data->ma);
825 else {
826 vcpu_set_isr(v, misr.val);
827 data_access_rights(v, vadr);
828 }
829 return ;
830 }
831 }
832 thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type);
833
834 } else if (type == D_TLB) {
835 if (misr.sp) {
836 handle_lds(regs);
837 return;
838 }
839
840 rr = vcpu_get_rr(v, vadr);
841 itir = rr & (RR_RID_MASK | RR_PS_MASK);
842
843 if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) {
844 if (vpsr & IA64_PSR_IC) {
845 vcpu_set_isr(v, misr.val);
846 alt_dtlb(v, vadr);
847 } else {
848 nested_dtlb(v);
849 }
850 return ;
851 }
852
853 vpta.val = vcpu_get_pta(v);
854 /* avoid recursively walking (short format) VHPT */
855
856 vhpt_adr = vcpu_thash(v, vadr);
857 if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
858 /* VHPT successfully read. */
859 if (!(pteval & _PAGE_P)) {
860 if (vpsr & IA64_PSR_IC) {
861 vcpu_set_isr(v, misr.val);
862 dtlb_fault(v, vadr);
863 } else {
864 nested_dtlb(v);
865 }
866 } else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) {
867 thash_purge_and_insert(v, pteval, itir,
868 vadr, D_TLB);
869 } else if (vpsr & IA64_PSR_IC) {
870 vcpu_set_isr(v, misr.val);
871 dtlb_fault(v, vadr);
872 } else {
873 nested_dtlb(v);
874 }
875 } else {
876 /* Can't read VHPT. */
877 if (vpsr & IA64_PSR_IC) {
878 vcpu_set_isr(v, misr.val);
879 dvhpt_fault(v, vadr);
880 } else {
881 nested_dtlb(v);
882 }
883 }
884 } else if (type == I_TLB) {
885 if (!(vpsr & IA64_PSR_IC))
886 misr.ni = 1;
887 if (!vhpt_enabled(v, vadr, INST_REF)) {
888 vcpu_set_isr(v, misr.val);
889 alt_itlb(v, vadr);
890 return;
891 }
892
893 vpta.val = vcpu_get_pta(v);
894
895 vhpt_adr = vcpu_thash(v, vadr);
896 if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
897 /* VHPT successfully read. */
898 if (pteval & _PAGE_P) {
899 if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) {
900 vcpu_set_isr(v, misr.val);
901 itlb_fault(v, vadr);
902 return ;
903 }
904 rr = vcpu_get_rr(v, vadr);
905 itir = rr & (RR_RID_MASK | RR_PS_MASK);
906 thash_purge_and_insert(v, pteval, itir,
907 vadr, I_TLB);
908 } else {
909 vcpu_set_isr(v, misr.val);
910 inst_page_not_present(v, vadr);
911 }
912 } else {
913 vcpu_set_isr(v, misr.val);
914 ivhpt_fault(v, vadr);
915 }
916 }
917}
918
919void kvm_vexirq(struct kvm_vcpu *vcpu)
920{
921 u64 vpsr, isr;
922 struct kvm_pt_regs *regs;
923
924 regs = vcpu_regs(vcpu);
925 vpsr = VCPU(vcpu, vpsr);
926 isr = vpsr & IA64_PSR_RI;
927 reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/
928}
929
930void kvm_ia64_handle_irq(struct kvm_vcpu *v)
931{
932 struct exit_ctl_data *p = &v->arch.exit_data;
933 long psr;
934
935 local_irq_save(psr);
936 p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
937 vmm_transition(v);
938 local_irq_restore(psr);
939
940 VMX(v, timer_check) = 1;
941
942}
943
944static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos)
945{
946 u64 oldrid, moldrid, oldpsbits, vaddr;
947 struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos];
948 vaddr = p->vaddr;
949
950 oldrid = VMX(v, vrr[0]);
951 VMX(v, vrr[0]) = p->rr;
952 oldpsbits = VMX(v, psbits[0]);
953 VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]);
954 moldrid = ia64_get_rr(0x0);
955 ia64_set_rr(0x0, vrrtomrr(p->rr));
956 ia64_srlz_d();
957
958 vaddr = PAGEALIGN(vaddr, p->ps);
959 thash_purge_entries_remote(v, vaddr, p->ps);
960
961 VMX(v, vrr[0]) = oldrid;
962 VMX(v, psbits[0]) = oldpsbits;
963 ia64_set_rr(0x0, moldrid);
964 ia64_dv_serialize_data();
965}
966
967static void vcpu_do_resume(struct kvm_vcpu *vcpu)
968{
969 /*Re-init VHPT and VTLB once from resume*/
970 vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES;
971 thash_init(&vcpu->arch.vhpt, VHPT_SHIFT);
972 vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES;
973 thash_init(&vcpu->arch.vtlb, VTLB_SHIFT);
974
975 ia64_set_pta(vcpu->arch.vhpt.pta.val);
976}
977
978static void vmm_sanity_check(struct kvm_vcpu *vcpu)
979{
980 struct exit_ctl_data *p = &vcpu->arch.exit_data;
981
982 if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) {
983 panic_vm(vcpu, "Failed to do vmm sanity check,"
984 "it maybe caused by crashed vmm!!\n\n");
985 }
986}
987
988static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
989{
990 vmm_sanity_check(vcpu); /*Guarantee vcpu running on healthy vmm!*/
991
992 if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
993 vcpu_do_resume(vcpu);
994 return;
995 }
996
997 if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) {
998 thash_purge_all(vcpu);
999 return;
1000 }
1001
1002 if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) {
1003 while (vcpu->arch.ptc_g_count > 0)
1004 ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count);
1005 }
1006}
1007
1008void vmm_transition(struct kvm_vcpu *vcpu)
1009{
1010 ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd,
1011 1, 0, 0, 0, 0, 0);
1012 vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host);
1013 ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd,
1014 1, 0, 0, 0, 0, 0);
1015 kvm_do_resume_op(vcpu);
1016}
1017
1018void vmm_panic_handler(u64 vec)
1019{
1020 struct kvm_vcpu *vcpu = current_vcpu;
1021 vmm_sanity = 0;
1022 panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n",
1023 vec2off[vec]);
1024}
diff --git a/arch/ia64/kvm/trampoline.S b/arch/ia64/kvm/trampoline.S
deleted file mode 100644
index 30897d44d61e..000000000000
--- a/arch/ia64/kvm/trampoline.S
+++ /dev/null
@@ -1,1038 +0,0 @@
1/* Save all processor states
2 *
3 * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com>
4 * Copyright (c) 2007 Anthony Xu <anthony.xu@intel.com>
5 */
6
7#include <asm/asmmacro.h>
8#include "asm-offsets.h"
9
10
11#define CTX(name) VMM_CTX_##name##_OFFSET
12
13 /*
14 * r32: context_t base address
15 */
16#define SAVE_BRANCH_REGS \
17 add r2 = CTX(B0),r32; \
18 add r3 = CTX(B1),r32; \
19 mov r16 = b0; \
20 mov r17 = b1; \
21 ;; \
22 st8 [r2]=r16,16; \
23 st8 [r3]=r17,16; \
24 ;; \
25 mov r16 = b2; \
26 mov r17 = b3; \
27 ;; \
28 st8 [r2]=r16,16; \
29 st8 [r3]=r17,16; \
30 ;; \
31 mov r16 = b4; \
32 mov r17 = b5; \
33 ;; \
34 st8 [r2]=r16; \
35 st8 [r3]=r17; \
36 ;;
37
38 /*
39 * r33: context_t base address
40 */
41#define RESTORE_BRANCH_REGS \
42 add r2 = CTX(B0),r33; \
43 add r3 = CTX(B1),r33; \
44 ;; \
45 ld8 r16=[r2],16; \
46 ld8 r17=[r3],16; \
47 ;; \
48 mov b0 = r16; \
49 mov b1 = r17; \
50 ;; \
51 ld8 r16=[r2],16; \
52 ld8 r17=[r3],16; \
53 ;; \
54 mov b2 = r16; \
55 mov b3 = r17; \
56 ;; \
57 ld8 r16=[r2]; \
58 ld8 r17=[r3]; \
59 ;; \
60 mov b4=r16; \
61 mov b5=r17; \
62 ;;
63
64
65 /*
66 * r32: context_t base address
67 * bsw == 1
68 * Save all bank1 general registers, r4 ~ r7
69 */
70#define SAVE_GENERAL_REGS \
71 add r2=CTX(R4),r32; \
72 add r3=CTX(R5),r32; \
73 ;; \
74.mem.offset 0,0; \
75 st8.spill [r2]=r4,16; \
76.mem.offset 8,0; \
77 st8.spill [r3]=r5,16; \
78 ;; \
79.mem.offset 0,0; \
80 st8.spill [r2]=r6,48; \
81.mem.offset 8,0; \
82 st8.spill [r3]=r7,48; \
83 ;; \
84.mem.offset 0,0; \
85 st8.spill [r2]=r12; \
86.mem.offset 8,0; \
87 st8.spill [r3]=r13; \
88 ;;
89
90 /*
91 * r33: context_t base address
92 * bsw == 1
93 */
94#define RESTORE_GENERAL_REGS \
95 add r2=CTX(R4),r33; \
96 add r3=CTX(R5),r33; \
97 ;; \
98 ld8.fill r4=[r2],16; \
99 ld8.fill r5=[r3],16; \
100 ;; \
101 ld8.fill r6=[r2],48; \
102 ld8.fill r7=[r3],48; \
103 ;; \
104 ld8.fill r12=[r2]; \
105 ld8.fill r13 =[r3]; \
106 ;;
107
108
109
110
111 /*
112 * r32: context_t base address
113 */
114#define SAVE_KERNEL_REGS \
115 add r2 = CTX(KR0),r32; \
116 add r3 = CTX(KR1),r32; \
117 mov r16 = ar.k0; \
118 mov r17 = ar.k1; \
119 ;; \
120 st8 [r2] = r16,16; \
121 st8 [r3] = r17,16; \
122 ;; \
123 mov r16 = ar.k2; \
124 mov r17 = ar.k3; \
125 ;; \
126 st8 [r2] = r16,16; \
127 st8 [r3] = r17,16; \
128 ;; \
129 mov r16 = ar.k4; \
130 mov r17 = ar.k5; \
131 ;; \
132 st8 [r2] = r16,16; \
133 st8 [r3] = r17,16; \
134 ;; \
135 mov r16 = ar.k6; \
136 mov r17 = ar.k7; \
137 ;; \
138 st8 [r2] = r16; \
139 st8 [r3] = r17; \
140 ;;
141
142
143
144 /*
145 * r33: context_t base address
146 */
147#define RESTORE_KERNEL_REGS \
148 add r2 = CTX(KR0),r33; \
149 add r3 = CTX(KR1),r33; \
150 ;; \
151 ld8 r16=[r2],16; \
152 ld8 r17=[r3],16; \
153 ;; \
154 mov ar.k0=r16; \
155 mov ar.k1=r17; \
156 ;; \
157 ld8 r16=[r2],16; \
158 ld8 r17=[r3],16; \
159 ;; \
160 mov ar.k2=r16; \
161 mov ar.k3=r17; \
162 ;; \
163 ld8 r16=[r2],16; \
164 ld8 r17=[r3],16; \
165 ;; \
166 mov ar.k4=r16; \
167 mov ar.k5=r17; \
168 ;; \
169 ld8 r16=[r2],16; \
170 ld8 r17=[r3],16; \
171 ;; \
172 mov ar.k6=r16; \
173 mov ar.k7=r17; \
174 ;;
175
176
177
178 /*
179 * r32: context_t base address
180 */
181#define SAVE_APP_REGS \
182 add r2 = CTX(BSPSTORE),r32; \
183 mov r16 = ar.bspstore; \
184 ;; \
185 st8 [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\
186 mov r16 = ar.rnat; \
187 ;; \
188 st8 [r2] = r16,CTX(FCR)-CTX(RNAT); \
189 mov r16 = ar.fcr; \
190 ;; \
191 st8 [r2] = r16,CTX(EFLAG)-CTX(FCR); \
192 mov r16 = ar.eflag; \
193 ;; \
194 st8 [r2] = r16,CTX(CFLG)-CTX(EFLAG); \
195 mov r16 = ar.cflg; \
196 ;; \
197 st8 [r2] = r16,CTX(FSR)-CTX(CFLG); \
198 mov r16 = ar.fsr; \
199 ;; \
200 st8 [r2] = r16,CTX(FIR)-CTX(FSR); \
201 mov r16 = ar.fir; \
202 ;; \
203 st8 [r2] = r16,CTX(FDR)-CTX(FIR); \
204 mov r16 = ar.fdr; \
205 ;; \
206 st8 [r2] = r16,CTX(UNAT)-CTX(FDR); \
207 mov r16 = ar.unat; \
208 ;; \
209 st8 [r2] = r16,CTX(FPSR)-CTX(UNAT); \
210 mov r16 = ar.fpsr; \
211 ;; \
212 st8 [r2] = r16,CTX(PFS)-CTX(FPSR); \
213 mov r16 = ar.pfs; \
214 ;; \
215 st8 [r2] = r16,CTX(LC)-CTX(PFS); \
216 mov r16 = ar.lc; \
217 ;; \
218 st8 [r2] = r16; \
219 ;;
220
221 /*
222 * r33: context_t base address
223 */
224#define RESTORE_APP_REGS \
225 add r2=CTX(BSPSTORE),r33; \
226 ;; \
227 ld8 r16=[r2],CTX(RNAT)-CTX(BSPSTORE); \
228 ;; \
229 mov ar.bspstore=r16; \
230 ld8 r16=[r2],CTX(FCR)-CTX(RNAT); \
231 ;; \
232 mov ar.rnat=r16; \
233 ld8 r16=[r2],CTX(EFLAG)-CTX(FCR); \
234 ;; \
235 mov ar.fcr=r16; \
236 ld8 r16=[r2],CTX(CFLG)-CTX(EFLAG); \
237 ;; \
238 mov ar.eflag=r16; \
239 ld8 r16=[r2],CTX(FSR)-CTX(CFLG); \
240 ;; \
241 mov ar.cflg=r16; \
242 ld8 r16=[r2],CTX(FIR)-CTX(FSR); \
243 ;; \
244 mov ar.fsr=r16; \
245 ld8 r16=[r2],CTX(FDR)-CTX(FIR); \
246 ;; \
247 mov ar.fir=r16; \
248 ld8 r16=[r2],CTX(UNAT)-CTX(FDR); \
249 ;; \
250 mov ar.fdr=r16; \
251 ld8 r16=[r2],CTX(FPSR)-CTX(UNAT); \
252 ;; \
253 mov ar.unat=r16; \
254 ld8 r16=[r2],CTX(PFS)-CTX(FPSR); \
255 ;; \
256 mov ar.fpsr=r16; \
257 ld8 r16=[r2],CTX(LC)-CTX(PFS); \
258 ;; \
259 mov ar.pfs=r16; \
260 ld8 r16=[r2]; \
261 ;; \
262 mov ar.lc=r16; \
263 ;;
264
265 /*
266 * r32: context_t base address
267 */
268#define SAVE_CTL_REGS \
269 add r2 = CTX(DCR),r32; \
270 mov r16 = cr.dcr; \
271 ;; \
272 st8 [r2] = r16,CTX(IVA)-CTX(DCR); \
273 ;; \
274 mov r16 = cr.iva; \
275 ;; \
276 st8 [r2] = r16,CTX(PTA)-CTX(IVA); \
277 ;; \
278 mov r16 = cr.pta; \
279 ;; \
280 st8 [r2] = r16 ; \
281 ;;
282
283 /*
284 * r33: context_t base address
285 */
286#define RESTORE_CTL_REGS \
287 add r2 = CTX(DCR),r33; \
288 ;; \
289 ld8 r16 = [r2],CTX(IVA)-CTX(DCR); \
290 ;; \
291 mov cr.dcr = r16; \
292 dv_serialize_data; \
293 ;; \
294 ld8 r16 = [r2],CTX(PTA)-CTX(IVA); \
295 ;; \
296 mov cr.iva = r16; \
297 dv_serialize_data; \
298 ;; \
299 ld8 r16 = [r2]; \
300 ;; \
301 mov cr.pta = r16; \
302 dv_serialize_data; \
303 ;;
304
305
306 /*
307 * r32: context_t base address
308 */
309#define SAVE_REGION_REGS \
310 add r2=CTX(RR0),r32; \
311 mov r16=rr[r0]; \
312 dep.z r18=1,61,3; \
313 ;; \
314 st8 [r2]=r16,8; \
315 mov r17=rr[r18]; \
316 dep.z r18=2,61,3; \
317 ;; \
318 st8 [r2]=r17,8; \
319 mov r16=rr[r18]; \
320 dep.z r18=3,61,3; \
321 ;; \
322 st8 [r2]=r16,8; \
323 mov r17=rr[r18]; \
324 dep.z r18=4,61,3; \
325 ;; \
326 st8 [r2]=r17,8; \
327 mov r16=rr[r18]; \
328 dep.z r18=5,61,3; \
329 ;; \
330 st8 [r2]=r16,8; \
331 mov r17=rr[r18]; \
332 dep.z r18=7,61,3; \
333 ;; \
334 st8 [r2]=r17,16; \
335 mov r16=rr[r18]; \
336 ;; \
337 st8 [r2]=r16,8; \
338 ;;
339
340 /*
341 * r33:context_t base address
342 */
343#define RESTORE_REGION_REGS \
344 add r2=CTX(RR0),r33;\
345 mov r18=r0; \
346 ;; \
347 ld8 r20=[r2],8; \
348 ;; /* rr0 */ \
349 ld8 r21=[r2],8; \
350 ;; /* rr1 */ \
351 ld8 r22=[r2],8; \
352 ;; /* rr2 */ \
353 ld8 r23=[r2],8; \
354 ;; /* rr3 */ \
355 ld8 r24=[r2],8; \
356 ;; /* rr4 */ \
357 ld8 r25=[r2],16; \
358 ;; /* rr5 */ \
359 ld8 r27=[r2]; \
360 ;; /* rr7 */ \
361 mov rr[r18]=r20; \
362 dep.z r18=1,61,3; \
363 ;; /* rr1 */ \
364 mov rr[r18]=r21; \
365 dep.z r18=2,61,3; \
366 ;; /* rr2 */ \
367 mov rr[r18]=r22; \
368 dep.z r18=3,61,3; \
369 ;; /* rr3 */ \
370 mov rr[r18]=r23; \
371 dep.z r18=4,61,3; \
372 ;; /* rr4 */ \
373 mov rr[r18]=r24; \
374 dep.z r18=5,61,3; \
375 ;; /* rr5 */ \
376 mov rr[r18]=r25; \
377 dep.z r18=7,61,3; \
378 ;; /* rr7 */ \
379 mov rr[r18]=r27; \
380 ;; \
381 srlz.i; \
382 ;;
383
384
385
386 /*
387 * r32: context_t base address
388 * r36~r39:scratch registers
389 */
390#define SAVE_DEBUG_REGS \
391 add r2=CTX(IBR0),r32; \
392 add r3=CTX(DBR0),r32; \
393 mov r16=ibr[r0]; \
394 mov r17=dbr[r0]; \
395 ;; \
396 st8 [r2]=r16,8; \
397 st8 [r3]=r17,8; \
398 add r18=1,r0; \
399 ;; \
400 mov r16=ibr[r18]; \
401 mov r17=dbr[r18]; \
402 ;; \
403 st8 [r2]=r16,8; \
404 st8 [r3]=r17,8; \
405 add r18=2,r0; \
406 ;; \
407 mov r16=ibr[r18]; \
408 mov r17=dbr[r18]; \
409 ;; \
410 st8 [r2]=r16,8; \
411 st8 [r3]=r17,8; \
412 add r18=2,r0; \
413 ;; \
414 mov r16=ibr[r18]; \
415 mov r17=dbr[r18]; \
416 ;; \
417 st8 [r2]=r16,8; \
418 st8 [r3]=r17,8; \
419 add r18=3,r0; \
420 ;; \
421 mov r16=ibr[r18]; \
422 mov r17=dbr[r18]; \
423 ;; \
424 st8 [r2]=r16,8; \
425 st8 [r3]=r17,8; \
426 add r18=4,r0; \
427 ;; \
428 mov r16=ibr[r18]; \
429 mov r17=dbr[r18]; \
430 ;; \
431 st8 [r2]=r16,8; \
432 st8 [r3]=r17,8; \
433 add r18=5,r0; \
434 ;; \
435 mov r16=ibr[r18]; \
436 mov r17=dbr[r18]; \
437 ;; \
438 st8 [r2]=r16,8; \
439 st8 [r3]=r17,8; \
440 add r18=6,r0; \
441 ;; \
442 mov r16=ibr[r18]; \
443 mov r17=dbr[r18]; \
444 ;; \
445 st8 [r2]=r16,8; \
446 st8 [r3]=r17,8; \
447 add r18=7,r0; \
448 ;; \
449 mov r16=ibr[r18]; \
450 mov r17=dbr[r18]; \
451 ;; \
452 st8 [r2]=r16,8; \
453 st8 [r3]=r17,8; \
454 ;;
455
456
457/*
458 * r33: point to context_t structure
459 * ar.lc are corrupted.
460 */
461#define RESTORE_DEBUG_REGS \
462 add r2=CTX(IBR0),r33; \
463 add r3=CTX(DBR0),r33; \
464 mov r16=7; \
465 mov r17=r0; \
466 ;; \
467 mov ar.lc = r16; \
468 ;; \
4691: \
470 ld8 r18=[r2],8; \
471 ld8 r19=[r3],8; \
472 ;; \
473 mov ibr[r17]=r18; \
474 mov dbr[r17]=r19; \
475 ;; \
476 srlz.i; \
477 ;; \
478 add r17=1,r17; \
479 br.cloop.sptk 1b; \
480 ;;
481
482
483 /*
484 * r32: context_t base address
485 */
486#define SAVE_FPU_LOW \
487 add r2=CTX(F2),r32; \
488 add r3=CTX(F3),r32; \
489 ;; \
490 stf.spill.nta [r2]=f2,32; \
491 stf.spill.nta [r3]=f3,32; \
492 ;; \
493 stf.spill.nta [r2]=f4,32; \
494 stf.spill.nta [r3]=f5,32; \
495 ;; \
496 stf.spill.nta [r2]=f6,32; \
497 stf.spill.nta [r3]=f7,32; \
498 ;; \
499 stf.spill.nta [r2]=f8,32; \
500 stf.spill.nta [r3]=f9,32; \
501 ;; \
502 stf.spill.nta [r2]=f10,32; \
503 stf.spill.nta [r3]=f11,32; \
504 ;; \
505 stf.spill.nta [r2]=f12,32; \
506 stf.spill.nta [r3]=f13,32; \
507 ;; \
508 stf.spill.nta [r2]=f14,32; \
509 stf.spill.nta [r3]=f15,32; \
510 ;; \
511 stf.spill.nta [r2]=f16,32; \
512 stf.spill.nta [r3]=f17,32; \
513 ;; \
514 stf.spill.nta [r2]=f18,32; \
515 stf.spill.nta [r3]=f19,32; \
516 ;; \
517 stf.spill.nta [r2]=f20,32; \
518 stf.spill.nta [r3]=f21,32; \
519 ;; \
520 stf.spill.nta [r2]=f22,32; \
521 stf.spill.nta [r3]=f23,32; \
522 ;; \
523 stf.spill.nta [r2]=f24,32; \
524 stf.spill.nta [r3]=f25,32; \
525 ;; \
526 stf.spill.nta [r2]=f26,32; \
527 stf.spill.nta [r3]=f27,32; \
528 ;; \
529 stf.spill.nta [r2]=f28,32; \
530 stf.spill.nta [r3]=f29,32; \
531 ;; \
532 stf.spill.nta [r2]=f30; \
533 stf.spill.nta [r3]=f31; \
534 ;;
535
536 /*
537 * r32: context_t base address
538 */
539#define SAVE_FPU_HIGH \
540 add r2=CTX(F32),r32; \
541 add r3=CTX(F33),r32; \
542 ;; \
543 stf.spill.nta [r2]=f32,32; \
544 stf.spill.nta [r3]=f33,32; \
545 ;; \
546 stf.spill.nta [r2]=f34,32; \
547 stf.spill.nta [r3]=f35,32; \
548 ;; \
549 stf.spill.nta [r2]=f36,32; \
550 stf.spill.nta [r3]=f37,32; \
551 ;; \
552 stf.spill.nta [r2]=f38,32; \
553 stf.spill.nta [r3]=f39,32; \
554 ;; \
555 stf.spill.nta [r2]=f40,32; \
556 stf.spill.nta [r3]=f41,32; \
557 ;; \
558 stf.spill.nta [r2]=f42,32; \
559 stf.spill.nta [r3]=f43,32; \
560 ;; \
561 stf.spill.nta [r2]=f44,32; \
562 stf.spill.nta [r3]=f45,32; \
563 ;; \
564 stf.spill.nta [r2]=f46,32; \
565 stf.spill.nta [r3]=f47,32; \
566 ;; \
567 stf.spill.nta [r2]=f48,32; \
568 stf.spill.nta [r3]=f49,32; \
569 ;; \
570 stf.spill.nta [r2]=f50,32; \
571 stf.spill.nta [r3]=f51,32; \
572 ;; \
573 stf.spill.nta [r2]=f52,32; \
574 stf.spill.nta [r3]=f53,32; \
575 ;; \
576 stf.spill.nta [r2]=f54,32; \
577 stf.spill.nta [r3]=f55,32; \
578 ;; \
579 stf.spill.nta [r2]=f56,32; \
580 stf.spill.nta [r3]=f57,32; \
581 ;; \
582 stf.spill.nta [r2]=f58,32; \
583 stf.spill.nta [r3]=f59,32; \
584 ;; \
585 stf.spill.nta [r2]=f60,32; \
586 stf.spill.nta [r3]=f61,32; \
587 ;; \
588 stf.spill.nta [r2]=f62,32; \
589 stf.spill.nta [r3]=f63,32; \
590 ;; \
591 stf.spill.nta [r2]=f64,32; \
592 stf.spill.nta [r3]=f65,32; \
593 ;; \
594 stf.spill.nta [r2]=f66,32; \
595 stf.spill.nta [r3]=f67,32; \
596 ;; \
597 stf.spill.nta [r2]=f68,32; \
598 stf.spill.nta [r3]=f69,32; \
599 ;; \
600 stf.spill.nta [r2]=f70,32; \
601 stf.spill.nta [r3]=f71,32; \
602 ;; \
603 stf.spill.nta [r2]=f72,32; \
604 stf.spill.nta [r3]=f73,32; \
605 ;; \
606 stf.spill.nta [r2]=f74,32; \
607 stf.spill.nta [r3]=f75,32; \
608 ;; \
609 stf.spill.nta [r2]=f76,32; \
610 stf.spill.nta [r3]=f77,32; \
611 ;; \
612 stf.spill.nta [r2]=f78,32; \
613 stf.spill.nta [r3]=f79,32; \
614 ;; \
615 stf.spill.nta [r2]=f80,32; \
616 stf.spill.nta [r3]=f81,32; \
617 ;; \
618 stf.spill.nta [r2]=f82,32; \
619 stf.spill.nta [r3]=f83,32; \
620 ;; \
621 stf.spill.nta [r2]=f84,32; \
622 stf.spill.nta [r3]=f85,32; \
623 ;; \
624 stf.spill.nta [r2]=f86,32; \
625 stf.spill.nta [r3]=f87,32; \
626 ;; \
627 stf.spill.nta [r2]=f88,32; \
628 stf.spill.nta [r3]=f89,32; \
629 ;; \
630 stf.spill.nta [r2]=f90,32; \
631 stf.spill.nta [r3]=f91,32; \
632 ;; \
633 stf.spill.nta [r2]=f92,32; \
634 stf.spill.nta [r3]=f93,32; \
635 ;; \
636 stf.spill.nta [r2]=f94,32; \
637 stf.spill.nta [r3]=f95,32; \
638 ;; \
639 stf.spill.nta [r2]=f96,32; \
640 stf.spill.nta [r3]=f97,32; \
641 ;; \
642 stf.spill.nta [r2]=f98,32; \
643 stf.spill.nta [r3]=f99,32; \
644 ;; \
645 stf.spill.nta [r2]=f100,32; \
646 stf.spill.nta [r3]=f101,32; \
647 ;; \
648 stf.spill.nta [r2]=f102,32; \
649 stf.spill.nta [r3]=f103,32; \
650 ;; \
651 stf.spill.nta [r2]=f104,32; \
652 stf.spill.nta [r3]=f105,32; \
653 ;; \
654 stf.spill.nta [r2]=f106,32; \
655 stf.spill.nta [r3]=f107,32; \
656 ;; \
657 stf.spill.nta [r2]=f108,32; \
658 stf.spill.nta [r3]=f109,32; \
659 ;; \
660 stf.spill.nta [r2]=f110,32; \
661 stf.spill.nta [r3]=f111,32; \
662 ;; \
663 stf.spill.nta [r2]=f112,32; \
664 stf.spill.nta [r3]=f113,32; \
665 ;; \
666 stf.spill.nta [r2]=f114,32; \
667 stf.spill.nta [r3]=f115,32; \
668 ;; \
669 stf.spill.nta [r2]=f116,32; \
670 stf.spill.nta [r3]=f117,32; \
671 ;; \
672 stf.spill.nta [r2]=f118,32; \
673 stf.spill.nta [r3]=f119,32; \
674 ;; \
675 stf.spill.nta [r2]=f120,32; \
676 stf.spill.nta [r3]=f121,32; \
677 ;; \
678 stf.spill.nta [r2]=f122,32; \
679 stf.spill.nta [r3]=f123,32; \
680 ;; \
681 stf.spill.nta [r2]=f124,32; \
682 stf.spill.nta [r3]=f125,32; \
683 ;; \
684 stf.spill.nta [r2]=f126; \
685 stf.spill.nta [r3]=f127; \
686 ;;
687
688 /*
689 * r33: point to context_t structure
690 */
691#define RESTORE_FPU_LOW \
692 add r2 = CTX(F2), r33; \
693 add r3 = CTX(F3), r33; \
694 ;; \
695 ldf.fill.nta f2 = [r2], 32; \
696 ldf.fill.nta f3 = [r3], 32; \
697 ;; \
698 ldf.fill.nta f4 = [r2], 32; \
699 ldf.fill.nta f5 = [r3], 32; \
700 ;; \
701 ldf.fill.nta f6 = [r2], 32; \
702 ldf.fill.nta f7 = [r3], 32; \
703 ;; \
704 ldf.fill.nta f8 = [r2], 32; \
705 ldf.fill.nta f9 = [r3], 32; \
706 ;; \
707 ldf.fill.nta f10 = [r2], 32; \
708 ldf.fill.nta f11 = [r3], 32; \
709 ;; \
710 ldf.fill.nta f12 = [r2], 32; \
711 ldf.fill.nta f13 = [r3], 32; \
712 ;; \
713 ldf.fill.nta f14 = [r2], 32; \
714 ldf.fill.nta f15 = [r3], 32; \
715 ;; \
716 ldf.fill.nta f16 = [r2], 32; \
717 ldf.fill.nta f17 = [r3], 32; \
718 ;; \
719 ldf.fill.nta f18 = [r2], 32; \
720 ldf.fill.nta f19 = [r3], 32; \
721 ;; \
722 ldf.fill.nta f20 = [r2], 32; \
723 ldf.fill.nta f21 = [r3], 32; \
724 ;; \
725 ldf.fill.nta f22 = [r2], 32; \
726 ldf.fill.nta f23 = [r3], 32; \
727 ;; \
728 ldf.fill.nta f24 = [r2], 32; \
729 ldf.fill.nta f25 = [r3], 32; \
730 ;; \
731 ldf.fill.nta f26 = [r2], 32; \
732 ldf.fill.nta f27 = [r3], 32; \
733 ;; \
734 ldf.fill.nta f28 = [r2], 32; \
735 ldf.fill.nta f29 = [r3], 32; \
736 ;; \
737 ldf.fill.nta f30 = [r2], 32; \
738 ldf.fill.nta f31 = [r3], 32; \
739 ;;
740
741
742
743 /*
744 * r33: point to context_t structure
745 */
746#define RESTORE_FPU_HIGH \
747 add r2 = CTX(F32), r33; \
748 add r3 = CTX(F33), r33; \
749 ;; \
750 ldf.fill.nta f32 = [r2], 32; \
751 ldf.fill.nta f33 = [r3], 32; \
752 ;; \
753 ldf.fill.nta f34 = [r2], 32; \
754 ldf.fill.nta f35 = [r3], 32; \
755 ;; \
756 ldf.fill.nta f36 = [r2], 32; \
757 ldf.fill.nta f37 = [r3], 32; \
758 ;; \
759 ldf.fill.nta f38 = [r2], 32; \
760 ldf.fill.nta f39 = [r3], 32; \
761 ;; \
762 ldf.fill.nta f40 = [r2], 32; \
763 ldf.fill.nta f41 = [r3], 32; \
764 ;; \
765 ldf.fill.nta f42 = [r2], 32; \
766 ldf.fill.nta f43 = [r3], 32; \
767 ;; \
768 ldf.fill.nta f44 = [r2], 32; \
769 ldf.fill.nta f45 = [r3], 32; \
770 ;; \
771 ldf.fill.nta f46 = [r2], 32; \
772 ldf.fill.nta f47 = [r3], 32; \
773 ;; \
774 ldf.fill.nta f48 = [r2], 32; \
775 ldf.fill.nta f49 = [r3], 32; \
776 ;; \
777 ldf.fill.nta f50 = [r2], 32; \
778 ldf.fill.nta f51 = [r3], 32; \
779 ;; \
780 ldf.fill.nta f52 = [r2], 32; \
781 ldf.fill.nta f53 = [r3], 32; \
782 ;; \
783 ldf.fill.nta f54 = [r2], 32; \
784 ldf.fill.nta f55 = [r3], 32; \
785 ;; \
786 ldf.fill.nta f56 = [r2], 32; \
787 ldf.fill.nta f57 = [r3], 32; \
788 ;; \
789 ldf.fill.nta f58 = [r2], 32; \
790 ldf.fill.nta f59 = [r3], 32; \
791 ;; \
792 ldf.fill.nta f60 = [r2], 32; \
793 ldf.fill.nta f61 = [r3], 32; \
794 ;; \
795 ldf.fill.nta f62 = [r2], 32; \
796 ldf.fill.nta f63 = [r3], 32; \
797 ;; \
798 ldf.fill.nta f64 = [r2], 32; \
799 ldf.fill.nta f65 = [r3], 32; \
800 ;; \
801 ldf.fill.nta f66 = [r2], 32; \
802 ldf.fill.nta f67 = [r3], 32; \
803 ;; \
804 ldf.fill.nta f68 = [r2], 32; \
805 ldf.fill.nta f69 = [r3], 32; \
806 ;; \
807 ldf.fill.nta f70 = [r2], 32; \
808 ldf.fill.nta f71 = [r3], 32; \
809 ;; \
810 ldf.fill.nta f72 = [r2], 32; \
811 ldf.fill.nta f73 = [r3], 32; \
812 ;; \
813 ldf.fill.nta f74 = [r2], 32; \
814 ldf.fill.nta f75 = [r3], 32; \
815 ;; \
816 ldf.fill.nta f76 = [r2], 32; \
817 ldf.fill.nta f77 = [r3], 32; \
818 ;; \
819 ldf.fill.nta f78 = [r2], 32; \
820 ldf.fill.nta f79 = [r3], 32; \
821 ;; \
822 ldf.fill.nta f80 = [r2], 32; \
823 ldf.fill.nta f81 = [r3], 32; \
824 ;; \
825 ldf.fill.nta f82 = [r2], 32; \
826 ldf.fill.nta f83 = [r3], 32; \
827 ;; \
828 ldf.fill.nta f84 = [r2], 32; \
829 ldf.fill.nta f85 = [r3], 32; \
830 ;; \
831 ldf.fill.nta f86 = [r2], 32; \
832 ldf.fill.nta f87 = [r3], 32; \
833 ;; \
834 ldf.fill.nta f88 = [r2], 32; \
835 ldf.fill.nta f89 = [r3], 32; \
836 ;; \
837 ldf.fill.nta f90 = [r2], 32; \
838 ldf.fill.nta f91 = [r3], 32; \
839 ;; \
840 ldf.fill.nta f92 = [r2], 32; \
841 ldf.fill.nta f93 = [r3], 32; \
842 ;; \
843 ldf.fill.nta f94 = [r2], 32; \
844 ldf.fill.nta f95 = [r3], 32; \
845 ;; \
846 ldf.fill.nta f96 = [r2], 32; \
847 ldf.fill.nta f97 = [r3], 32; \
848 ;; \
849 ldf.fill.nta f98 = [r2], 32; \
850 ldf.fill.nta f99 = [r3], 32; \
851 ;; \
852 ldf.fill.nta f100 = [r2], 32; \
853 ldf.fill.nta f101 = [r3], 32; \
854 ;; \
855 ldf.fill.nta f102 = [r2], 32; \
856 ldf.fill.nta f103 = [r3], 32; \
857 ;; \
858 ldf.fill.nta f104 = [r2], 32; \
859 ldf.fill.nta f105 = [r3], 32; \
860 ;; \
861 ldf.fill.nta f106 = [r2], 32; \
862 ldf.fill.nta f107 = [r3], 32; \
863 ;; \
864 ldf.fill.nta f108 = [r2], 32; \
865 ldf.fill.nta f109 = [r3], 32; \
866 ;; \
867 ldf.fill.nta f110 = [r2], 32; \
868 ldf.fill.nta f111 = [r3], 32; \
869 ;; \
870 ldf.fill.nta f112 = [r2], 32; \
871 ldf.fill.nta f113 = [r3], 32; \
872 ;; \
873 ldf.fill.nta f114 = [r2], 32; \
874 ldf.fill.nta f115 = [r3], 32; \
875 ;; \
876 ldf.fill.nta f116 = [r2], 32; \
877 ldf.fill.nta f117 = [r3], 32; \
878 ;; \
879 ldf.fill.nta f118 = [r2], 32; \
880 ldf.fill.nta f119 = [r3], 32; \
881 ;; \
882 ldf.fill.nta f120 = [r2], 32; \
883 ldf.fill.nta f121 = [r3], 32; \
884 ;; \
885 ldf.fill.nta f122 = [r2], 32; \
886 ldf.fill.nta f123 = [r3], 32; \
887 ;; \
888 ldf.fill.nta f124 = [r2], 32; \
889 ldf.fill.nta f125 = [r3], 32; \
890 ;; \
891 ldf.fill.nta f126 = [r2], 32; \
892 ldf.fill.nta f127 = [r3], 32; \
893 ;;
894
895 /*
896 * r32: context_t base address
897 */
898#define SAVE_PTK_REGS \
899 add r2=CTX(PKR0), r32; \
900 mov r16=7; \
901 ;; \
902 mov ar.lc=r16; \
903 mov r17=r0; \
904 ;; \
9051: \
906 mov r18=pkr[r17]; \
907 ;; \
908 srlz.i; \
909 ;; \
910 st8 [r2]=r18, 8; \
911 ;; \
912 add r17 =1,r17; \
913 ;; \
914 br.cloop.sptk 1b; \
915 ;;
916
917/*
918 * r33: point to context_t structure
919 * ar.lc are corrupted.
920 */
921#define RESTORE_PTK_REGS \
922 add r2=CTX(PKR0), r33; \
923 mov r16=7; \
924 ;; \
925 mov ar.lc=r16; \
926 mov r17=r0; \
927 ;; \
9281: \
929 ld8 r18=[r2], 8; \
930 ;; \
931 mov pkr[r17]=r18; \
932 ;; \
933 srlz.i; \
934 ;; \
935 add r17 =1,r17; \
936 ;; \
937 br.cloop.sptk 1b; \
938 ;;
939
940
941/*
942 * void vmm_trampoline( context_t * from,
943 * context_t * to)
944 *
945 * from: r32
946 * to: r33
947 * note: interrupt disabled before call this function.
948 */
949GLOBAL_ENTRY(vmm_trampoline)
950 mov r16 = psr
951 adds r2 = CTX(PSR), r32
952 ;;
953 st8 [r2] = r16, 8 // psr
954 mov r17 = pr
955 ;;
956 st8 [r2] = r17, 8 // pr
957 mov r18 = ar.unat
958 ;;
959 st8 [r2] = r18
960 mov r17 = ar.rsc
961 ;;
962 adds r2 = CTX(RSC),r32
963 ;;
964 st8 [r2]= r17
965 mov ar.rsc =0
966 flushrs
967 ;;
968 SAVE_GENERAL_REGS
969 ;;
970 SAVE_KERNEL_REGS
971 ;;
972 SAVE_APP_REGS
973 ;;
974 SAVE_BRANCH_REGS
975 ;;
976 SAVE_CTL_REGS
977 ;;
978 SAVE_REGION_REGS
979 ;;
980 //SAVE_DEBUG_REGS
981 ;;
982 rsm psr.dfl
983 ;;
984 srlz.d
985 ;;
986 SAVE_FPU_LOW
987 ;;
988 rsm psr.dfh
989 ;;
990 srlz.d
991 ;;
992 SAVE_FPU_HIGH
993 ;;
994 SAVE_PTK_REGS
995 ;;
996 RESTORE_PTK_REGS
997 ;;
998 RESTORE_FPU_HIGH
999 ;;
1000 RESTORE_FPU_LOW
1001 ;;
1002 //RESTORE_DEBUG_REGS
1003 ;;
1004 RESTORE_REGION_REGS
1005 ;;
1006 RESTORE_CTL_REGS
1007 ;;
1008 RESTORE_BRANCH_REGS
1009 ;;
1010 RESTORE_APP_REGS
1011 ;;
1012 RESTORE_KERNEL_REGS
1013 ;;
1014 RESTORE_GENERAL_REGS
1015 ;;
1016 adds r2=CTX(PSR), r33
1017 ;;
1018 ld8 r16=[r2], 8 // psr
1019 ;;
1020 mov psr.l=r16
1021 ;;
1022 srlz.d
1023 ;;
1024 ld8 r16=[r2], 8 // pr
1025 ;;
1026 mov pr =r16,-1
1027 ld8 r16=[r2] // unat
1028 ;;
1029 mov ar.unat=r16
1030 ;;
1031 adds r2=CTX(RSC),r33
1032 ;;
1033 ld8 r16 =[r2]
1034 ;;
1035 mov ar.rsc = r16
1036 ;;
1037 br.ret.sptk.few b0
1038END(vmm_trampoline)
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
deleted file mode 100644
index 958815c9787d..000000000000
--- a/arch/ia64/kvm/vcpu.c
+++ /dev/null
@@ -1,2209 +0,0 @@
1/*
2 * kvm_vcpu.c: handling all virtual cpu related thing.
3 * Copyright (c) 2005, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 * Shaofan Li (Susue Li) <susie.li@intel.com>
19 * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
20 * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
21 * Xiantao Zhang <xiantao.zhang@intel.com>
22 */
23
24#include <linux/kvm_host.h>
25#include <linux/types.h>
26
27#include <asm/processor.h>
28#include <asm/ia64regs.h>
29#include <asm/gcc_intrin.h>
30#include <asm/kregs.h>
31#include <asm/pgtable.h>
32#include <asm/tlb.h>
33
34#include "asm-offsets.h"
35#include "vcpu.h"
36
37/*
38 * Special notes:
39 * - Index by it/dt/rt sequence
40 * - Only existing mode transitions are allowed in this table
41 * - RSE is placed at lazy mode when emulating guest partial mode
42 * - If gva happens to be rr0 and rr4, only allowed case is identity
43 * mapping (gva=gpa), or panic! (How?)
44 */
45int mm_switch_table[8][8] = {
46 /* 2004/09/12(Kevin): Allow switch to self */
47 /*
48 * (it,dt,rt): (0,0,0) -> (1,1,1)
49 * This kind of transition usually occurs in the very early
50 * stage of Linux boot up procedure. Another case is in efi
51 * and pal calls. (see "arch/ia64/kernel/head.S")
52 *
53 * (it,dt,rt): (0,0,0) -> (0,1,1)
54 * This kind of transition is found when OSYa exits efi boot
55 * service. Due to gva = gpa in this case (Same region),
56 * data access can be satisfied though itlb entry for physical
57 * emulation is hit.
58 */
59 {SW_SELF, 0, 0, SW_NOP, 0, 0, 0, SW_P2V},
60 {0, 0, 0, 0, 0, 0, 0, 0},
61 {0, 0, 0, 0, 0, 0, 0, 0},
62 /*
63 * (it,dt,rt): (0,1,1) -> (1,1,1)
64 * This kind of transition is found in OSYa.
65 *
66 * (it,dt,rt): (0,1,1) -> (0,0,0)
67 * This kind of transition is found in OSYa
68 */
69 {SW_NOP, 0, 0, SW_SELF, 0, 0, 0, SW_P2V},
70 /* (1,0,0)->(1,1,1) */
71 {0, 0, 0, 0, 0, 0, 0, SW_P2V},
72 /*
73 * (it,dt,rt): (1,0,1) -> (1,1,1)
74 * This kind of transition usually occurs when Linux returns
75 * from the low level TLB miss handlers.
76 * (see "arch/ia64/kernel/ivt.S")
77 */
78 {0, 0, 0, 0, 0, SW_SELF, 0, SW_P2V},
79 {0, 0, 0, 0, 0, 0, 0, 0},
80 /*
81 * (it,dt,rt): (1,1,1) -> (1,0,1)
82 * This kind of transition usually occurs in Linux low level
83 * TLB miss handler. (see "arch/ia64/kernel/ivt.S")
84 *
85 * (it,dt,rt): (1,1,1) -> (0,0,0)
86 * This kind of transition usually occurs in pal and efi calls,
87 * which requires running in physical mode.
88 * (see "arch/ia64/kernel/head.S")
89 * (1,1,1)->(1,0,0)
90 */
91
92 {SW_V2P, 0, 0, 0, SW_V2P, SW_V2P, 0, SW_SELF},
93};
94
95void physical_mode_init(struct kvm_vcpu *vcpu)
96{
97 vcpu->arch.mode_flags = GUEST_IN_PHY;
98}
99
100void switch_to_physical_rid(struct kvm_vcpu *vcpu)
101{
102 unsigned long psr;
103
104 /* Save original virtual mode rr[0] and rr[4] */
105 psr = ia64_clear_ic();
106 ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0);
107 ia64_srlz_d();
108 ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4);
109 ia64_srlz_d();
110
111 ia64_set_psr(psr);
112 return;
113}
114
115void switch_to_virtual_rid(struct kvm_vcpu *vcpu)
116{
117 unsigned long psr;
118
119 psr = ia64_clear_ic();
120 ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0);
121 ia64_srlz_d();
122 ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4);
123 ia64_srlz_d();
124 ia64_set_psr(psr);
125 return;
126}
127
128static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr)
129{
130 return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)];
131}
132
133void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
134 struct ia64_psr new_psr)
135{
136 int act;
137 act = mm_switch_action(old_psr, new_psr);
138 switch (act) {
139 case SW_V2P:
140 /*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n",
141 old_psr.val, new_psr.val);*/
142 switch_to_physical_rid(vcpu);
143 /*
144 * Set rse to enforced lazy, to prevent active rse
145 *save/restor when guest physical mode.
146 */
147 vcpu->arch.mode_flags |= GUEST_IN_PHY;
148 break;
149 case SW_P2V:
150 switch_to_virtual_rid(vcpu);
151 /*
152 * recover old mode which is saved when entering
153 * guest physical mode
154 */
155 vcpu->arch.mode_flags &= ~GUEST_IN_PHY;
156 break;
157 case SW_SELF:
158 break;
159 case SW_NOP:
160 break;
161 default:
162 /* Sanity check */
163 break;
164 }
165 return;
166}
167
168/*
169 * In physical mode, insert tc/tr for region 0 and 4 uses
170 * RID[0] and RID[4] which is for physical mode emulation.
171 * However what those inserted tc/tr wants is rid for
172 * virtual mode. So original virtual rid needs to be restored
173 * before insert.
174 *
175 * Operations which required such switch include:
176 * - insertions (itc.*, itr.*)
177 * - purges (ptc.* and ptr.*)
178 * - tpa
179 * - tak
180 * - thash?, ttag?
181 * All above needs actual virtual rid for destination entry.
182 */
183
184void check_mm_mode_switch(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
185 struct ia64_psr new_psr)
186{
187
188 if ((old_psr.dt != new_psr.dt)
189 || (old_psr.it != new_psr.it)
190 || (old_psr.rt != new_psr.rt))
191 switch_mm_mode(vcpu, old_psr, new_psr);
192
193 return;
194}
195
196
197/*
198 * In physical mode, insert tc/tr for region 0 and 4 uses
199 * RID[0] and RID[4] which is for physical mode emulation.
200 * However what those inserted tc/tr wants is rid for
201 * virtual mode. So original virtual rid needs to be restored
202 * before insert.
203 *
204 * Operations which required such switch include:
205 * - insertions (itc.*, itr.*)
206 * - purges (ptc.* and ptr.*)
207 * - tpa
208 * - tak
209 * - thash?, ttag?
210 * All above needs actual virtual rid for destination entry.
211 */
212
213void prepare_if_physical_mode(struct kvm_vcpu *vcpu)
214{
215 if (is_physical_mode(vcpu)) {
216 vcpu->arch.mode_flags |= GUEST_PHY_EMUL;
217 switch_to_virtual_rid(vcpu);
218 }
219 return;
220}
221
222/* Recover always follows prepare */
223void recover_if_physical_mode(struct kvm_vcpu *vcpu)
224{
225 if (is_physical_mode(vcpu))
226 switch_to_physical_rid(vcpu);
227 vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL;
228 return;
229}
230
231#define RPT(x) ((u16) &((struct kvm_pt_regs *)0)->x)
232
233static u16 gr_info[32] = {
234 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */
235 RPT(r1), RPT(r2), RPT(r3),
236 RPT(r4), RPT(r5), RPT(r6), RPT(r7),
237 RPT(r8), RPT(r9), RPT(r10), RPT(r11),
238 RPT(r12), RPT(r13), RPT(r14), RPT(r15),
239 RPT(r16), RPT(r17), RPT(r18), RPT(r19),
240 RPT(r20), RPT(r21), RPT(r22), RPT(r23),
241 RPT(r24), RPT(r25), RPT(r26), RPT(r27),
242 RPT(r28), RPT(r29), RPT(r30), RPT(r31)
243};
244
245#define IA64_FIRST_STACKED_GR 32
246#define IA64_FIRST_ROTATING_FR 32
247
248static inline unsigned long
249rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg)
250{
251 reg += rrb;
252 if (reg >= sor)
253 reg -= sor;
254 return reg;
255}
256
257/*
258 * Return the (rotated) index for floating point register
259 * be in the REGNUM (REGNUM must range from 32-127,
260 * result is in the range from 0-95.
261 */
262static inline unsigned long fph_index(struct kvm_pt_regs *regs,
263 long regnum)
264{
265 unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
266 return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
267}
268
269/*
270 * The inverse of the above: given bspstore and the number of
271 * registers, calculate ar.bsp.
272 */
273static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr,
274 long num_regs)
275{
276 long delta = ia64_rse_slot_num(addr) + num_regs;
277 int i = 0;
278
279 if (num_regs < 0)
280 delta -= 0x3e;
281 if (delta < 0) {
282 while (delta <= -0x3f) {
283 i--;
284 delta += 0x3f;
285 }
286 } else {
287 while (delta >= 0x3f) {
288 i++;
289 delta -= 0x3f;
290 }
291 }
292
293 return addr + num_regs + i;
294}
295
296static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
297 unsigned long *val, int *nat)
298{
299 unsigned long *bsp, *addr, *rnat_addr, *bspstore;
300 unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
301 unsigned long nat_mask;
302 unsigned long old_rsc, new_rsc;
303 long sof = (regs->cr_ifs) & 0x7f;
304 long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
305 long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
306 long ridx = r1 - 32;
307
308 if (ridx < sor)
309 ridx = rotate_reg(sor, rrb_gr, ridx);
310
311 old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
312 new_rsc = old_rsc&(~(0x3));
313 ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
314
315 bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
316 bsp = kbs + (regs->loadrs >> 19);
317
318 addr = kvm_rse_skip_regs(bsp, -sof + ridx);
319 nat_mask = 1UL << ia64_rse_slot_num(addr);
320 rnat_addr = ia64_rse_rnat_addr(addr);
321
322 if (addr >= bspstore) {
323 ia64_flushrs();
324 ia64_mf();
325 bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
326 }
327 *val = *addr;
328 if (nat) {
329 if (bspstore < rnat_addr)
330 *nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT)
331 & nat_mask);
332 else
333 *nat = (int)!!((*rnat_addr) & nat_mask);
334 ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
335 }
336}
337
338void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
339 unsigned long val, unsigned long nat)
340{
341 unsigned long *bsp, *bspstore, *addr, *rnat_addr;
342 unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
343 unsigned long nat_mask;
344 unsigned long old_rsc, new_rsc, psr;
345 unsigned long rnat;
346 long sof = (regs->cr_ifs) & 0x7f;
347 long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
348 long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
349 long ridx = r1 - 32;
350
351 if (ridx < sor)
352 ridx = rotate_reg(sor, rrb_gr, ridx);
353
354 old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
355 /* put RSC to lazy mode, and set loadrs 0 */
356 new_rsc = old_rsc & (~0x3fff0003);
357 ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
358 bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */
359
360 addr = kvm_rse_skip_regs(bsp, -sof + ridx);
361 nat_mask = 1UL << ia64_rse_slot_num(addr);
362 rnat_addr = ia64_rse_rnat_addr(addr);
363
364 local_irq_save(psr);
365 bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
366 if (addr >= bspstore) {
367
368 ia64_flushrs();
369 ia64_mf();
370 *addr = val;
371 bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
372 rnat = ia64_getreg(_IA64_REG_AR_RNAT);
373 if (bspstore < rnat_addr)
374 rnat = rnat & (~nat_mask);
375 else
376 *rnat_addr = (*rnat_addr)&(~nat_mask);
377
378 ia64_mf();
379 ia64_loadrs();
380 ia64_setreg(_IA64_REG_AR_RNAT, rnat);
381 } else {
382 rnat = ia64_getreg(_IA64_REG_AR_RNAT);
383 *addr = val;
384 if (bspstore < rnat_addr)
385 rnat = rnat&(~nat_mask);
386 else
387 *rnat_addr = (*rnat_addr) & (~nat_mask);
388
389 ia64_setreg(_IA64_REG_AR_BSPSTORE, (unsigned long)bspstore);
390 ia64_setreg(_IA64_REG_AR_RNAT, rnat);
391 }
392 local_irq_restore(psr);
393 ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
394}
395
396void getreg(unsigned long regnum, unsigned long *val,
397 int *nat, struct kvm_pt_regs *regs)
398{
399 unsigned long addr, *unat;
400 if (regnum >= IA64_FIRST_STACKED_GR) {
401 get_rse_reg(regs, regnum, val, nat);
402 return;
403 }
404
405 /*
406 * Now look at registers in [0-31] range and init correct UNAT
407 */
408 addr = (unsigned long)regs;
409 unat = &regs->eml_unat;
410
411 addr += gr_info[regnum];
412
413 *val = *(unsigned long *)addr;
414 /*
415 * do it only when requested
416 */
417 if (nat)
418 *nat = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL;
419}
420
421void setreg(unsigned long regnum, unsigned long val,
422 int nat, struct kvm_pt_regs *regs)
423{
424 unsigned long addr;
425 unsigned long bitmask;
426 unsigned long *unat;
427
428 /*
429 * First takes care of stacked registers
430 */
431 if (regnum >= IA64_FIRST_STACKED_GR) {
432 set_rse_reg(regs, regnum, val, nat);
433 return;
434 }
435
436 /*
437 * Now look at registers in [0-31] range and init correct UNAT
438 */
439 addr = (unsigned long)regs;
440 unat = &regs->eml_unat;
441 /*
442 * add offset from base of struct
443 * and do it !
444 */
445 addr += gr_info[regnum];
446
447 *(unsigned long *)addr = val;
448
449 /*
450 * We need to clear the corresponding UNAT bit to fully emulate the load
451 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
452 */
453 bitmask = 1UL << ((addr >> 3) & 0x3f);
454 if (nat)
455 *unat |= bitmask;
456 else
457 *unat &= ~bitmask;
458
459}
460
461u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
462{
463 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
464 unsigned long val;
465
466 if (!reg)
467 return 0;
468 getreg(reg, &val, 0, regs);
469 return val;
470}
471
472void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, u64 value, int nat)
473{
474 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
475 long sof = (regs->cr_ifs) & 0x7f;
476
477 if (!reg)
478 return;
479 if (reg >= sof + 32)
480 return;
481 setreg(reg, value, nat, regs); /* FIXME: handle NATs later*/
482}
483
484void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
485 struct kvm_pt_regs *regs)
486{
487 /* Take floating register rotation into consideration*/
488 if (regnum >= IA64_FIRST_ROTATING_FR)
489 regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
490#define CASE_FIXED_FP(reg) \
491 case (reg) : \
492 ia64_stf_spill(fpval, reg); \
493 break
494
495 switch (regnum) {
496 CASE_FIXED_FP(0);
497 CASE_FIXED_FP(1);
498 CASE_FIXED_FP(2);
499 CASE_FIXED_FP(3);
500 CASE_FIXED_FP(4);
501 CASE_FIXED_FP(5);
502
503 CASE_FIXED_FP(6);
504 CASE_FIXED_FP(7);
505 CASE_FIXED_FP(8);
506 CASE_FIXED_FP(9);
507 CASE_FIXED_FP(10);
508 CASE_FIXED_FP(11);
509
510 CASE_FIXED_FP(12);
511 CASE_FIXED_FP(13);
512 CASE_FIXED_FP(14);
513 CASE_FIXED_FP(15);
514 CASE_FIXED_FP(16);
515 CASE_FIXED_FP(17);
516 CASE_FIXED_FP(18);
517 CASE_FIXED_FP(19);
518 CASE_FIXED_FP(20);
519 CASE_FIXED_FP(21);
520 CASE_FIXED_FP(22);
521 CASE_FIXED_FP(23);
522 CASE_FIXED_FP(24);
523 CASE_FIXED_FP(25);
524 CASE_FIXED_FP(26);
525 CASE_FIXED_FP(27);
526 CASE_FIXED_FP(28);
527 CASE_FIXED_FP(29);
528 CASE_FIXED_FP(30);
529 CASE_FIXED_FP(31);
530 CASE_FIXED_FP(32);
531 CASE_FIXED_FP(33);
532 CASE_FIXED_FP(34);
533 CASE_FIXED_FP(35);
534 CASE_FIXED_FP(36);
535 CASE_FIXED_FP(37);
536 CASE_FIXED_FP(38);
537 CASE_FIXED_FP(39);
538 CASE_FIXED_FP(40);
539 CASE_FIXED_FP(41);
540 CASE_FIXED_FP(42);
541 CASE_FIXED_FP(43);
542 CASE_FIXED_FP(44);
543 CASE_FIXED_FP(45);
544 CASE_FIXED_FP(46);
545 CASE_FIXED_FP(47);
546 CASE_FIXED_FP(48);
547 CASE_FIXED_FP(49);
548 CASE_FIXED_FP(50);
549 CASE_FIXED_FP(51);
550 CASE_FIXED_FP(52);
551 CASE_FIXED_FP(53);
552 CASE_FIXED_FP(54);
553 CASE_FIXED_FP(55);
554 CASE_FIXED_FP(56);
555 CASE_FIXED_FP(57);
556 CASE_FIXED_FP(58);
557 CASE_FIXED_FP(59);
558 CASE_FIXED_FP(60);
559 CASE_FIXED_FP(61);
560 CASE_FIXED_FP(62);
561 CASE_FIXED_FP(63);
562 CASE_FIXED_FP(64);
563 CASE_FIXED_FP(65);
564 CASE_FIXED_FP(66);
565 CASE_FIXED_FP(67);
566 CASE_FIXED_FP(68);
567 CASE_FIXED_FP(69);
568 CASE_FIXED_FP(70);
569 CASE_FIXED_FP(71);
570 CASE_FIXED_FP(72);
571 CASE_FIXED_FP(73);
572 CASE_FIXED_FP(74);
573 CASE_FIXED_FP(75);
574 CASE_FIXED_FP(76);
575 CASE_FIXED_FP(77);
576 CASE_FIXED_FP(78);
577 CASE_FIXED_FP(79);
578 CASE_FIXED_FP(80);
579 CASE_FIXED_FP(81);
580 CASE_FIXED_FP(82);
581 CASE_FIXED_FP(83);
582 CASE_FIXED_FP(84);
583 CASE_FIXED_FP(85);
584 CASE_FIXED_FP(86);
585 CASE_FIXED_FP(87);
586 CASE_FIXED_FP(88);
587 CASE_FIXED_FP(89);
588 CASE_FIXED_FP(90);
589 CASE_FIXED_FP(91);
590 CASE_FIXED_FP(92);
591 CASE_FIXED_FP(93);
592 CASE_FIXED_FP(94);
593 CASE_FIXED_FP(95);
594 CASE_FIXED_FP(96);
595 CASE_FIXED_FP(97);
596 CASE_FIXED_FP(98);
597 CASE_FIXED_FP(99);
598 CASE_FIXED_FP(100);
599 CASE_FIXED_FP(101);
600 CASE_FIXED_FP(102);
601 CASE_FIXED_FP(103);
602 CASE_FIXED_FP(104);
603 CASE_FIXED_FP(105);
604 CASE_FIXED_FP(106);
605 CASE_FIXED_FP(107);
606 CASE_FIXED_FP(108);
607 CASE_FIXED_FP(109);
608 CASE_FIXED_FP(110);
609 CASE_FIXED_FP(111);
610 CASE_FIXED_FP(112);
611 CASE_FIXED_FP(113);
612 CASE_FIXED_FP(114);
613 CASE_FIXED_FP(115);
614 CASE_FIXED_FP(116);
615 CASE_FIXED_FP(117);
616 CASE_FIXED_FP(118);
617 CASE_FIXED_FP(119);
618 CASE_FIXED_FP(120);
619 CASE_FIXED_FP(121);
620 CASE_FIXED_FP(122);
621 CASE_FIXED_FP(123);
622 CASE_FIXED_FP(124);
623 CASE_FIXED_FP(125);
624 CASE_FIXED_FP(126);
625 CASE_FIXED_FP(127);
626 }
627#undef CASE_FIXED_FP
628}
629
630void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
631 struct kvm_pt_regs *regs)
632{
633 /* Take floating register rotation into consideration*/
634 if (regnum >= IA64_FIRST_ROTATING_FR)
635 regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
636
637#define CASE_FIXED_FP(reg) \
638 case (reg) : \
639 ia64_ldf_fill(reg, fpval); \
640 break
641
642 switch (regnum) {
643 CASE_FIXED_FP(2);
644 CASE_FIXED_FP(3);
645 CASE_FIXED_FP(4);
646 CASE_FIXED_FP(5);
647
648 CASE_FIXED_FP(6);
649 CASE_FIXED_FP(7);
650 CASE_FIXED_FP(8);
651 CASE_FIXED_FP(9);
652 CASE_FIXED_FP(10);
653 CASE_FIXED_FP(11);
654
655 CASE_FIXED_FP(12);
656 CASE_FIXED_FP(13);
657 CASE_FIXED_FP(14);
658 CASE_FIXED_FP(15);
659 CASE_FIXED_FP(16);
660 CASE_FIXED_FP(17);
661 CASE_FIXED_FP(18);
662 CASE_FIXED_FP(19);
663 CASE_FIXED_FP(20);
664 CASE_FIXED_FP(21);
665 CASE_FIXED_FP(22);
666 CASE_FIXED_FP(23);
667 CASE_FIXED_FP(24);
668 CASE_FIXED_FP(25);
669 CASE_FIXED_FP(26);
670 CASE_FIXED_FP(27);
671 CASE_FIXED_FP(28);
672 CASE_FIXED_FP(29);
673 CASE_FIXED_FP(30);
674 CASE_FIXED_FP(31);
675 CASE_FIXED_FP(32);
676 CASE_FIXED_FP(33);
677 CASE_FIXED_FP(34);
678 CASE_FIXED_FP(35);
679 CASE_FIXED_FP(36);
680 CASE_FIXED_FP(37);
681 CASE_FIXED_FP(38);
682 CASE_FIXED_FP(39);
683 CASE_FIXED_FP(40);
684 CASE_FIXED_FP(41);
685 CASE_FIXED_FP(42);
686 CASE_FIXED_FP(43);
687 CASE_FIXED_FP(44);
688 CASE_FIXED_FP(45);
689 CASE_FIXED_FP(46);
690 CASE_FIXED_FP(47);
691 CASE_FIXED_FP(48);
692 CASE_FIXED_FP(49);
693 CASE_FIXED_FP(50);
694 CASE_FIXED_FP(51);
695 CASE_FIXED_FP(52);
696 CASE_FIXED_FP(53);
697 CASE_FIXED_FP(54);
698 CASE_FIXED_FP(55);
699 CASE_FIXED_FP(56);
700 CASE_FIXED_FP(57);
701 CASE_FIXED_FP(58);
702 CASE_FIXED_FP(59);
703 CASE_FIXED_FP(60);
704 CASE_FIXED_FP(61);
705 CASE_FIXED_FP(62);
706 CASE_FIXED_FP(63);
707 CASE_FIXED_FP(64);
708 CASE_FIXED_FP(65);
709 CASE_FIXED_FP(66);
710 CASE_FIXED_FP(67);
711 CASE_FIXED_FP(68);
712 CASE_FIXED_FP(69);
713 CASE_FIXED_FP(70);
714 CASE_FIXED_FP(71);
715 CASE_FIXED_FP(72);
716 CASE_FIXED_FP(73);
717 CASE_FIXED_FP(74);
718 CASE_FIXED_FP(75);
719 CASE_FIXED_FP(76);
720 CASE_FIXED_FP(77);
721 CASE_FIXED_FP(78);
722 CASE_FIXED_FP(79);
723 CASE_FIXED_FP(80);
724 CASE_FIXED_FP(81);
725 CASE_FIXED_FP(82);
726 CASE_FIXED_FP(83);
727 CASE_FIXED_FP(84);
728 CASE_FIXED_FP(85);
729 CASE_FIXED_FP(86);
730 CASE_FIXED_FP(87);
731 CASE_FIXED_FP(88);
732 CASE_FIXED_FP(89);
733 CASE_FIXED_FP(90);
734 CASE_FIXED_FP(91);
735 CASE_FIXED_FP(92);
736 CASE_FIXED_FP(93);
737 CASE_FIXED_FP(94);
738 CASE_FIXED_FP(95);
739 CASE_FIXED_FP(96);
740 CASE_FIXED_FP(97);
741 CASE_FIXED_FP(98);
742 CASE_FIXED_FP(99);
743 CASE_FIXED_FP(100);
744 CASE_FIXED_FP(101);
745 CASE_FIXED_FP(102);
746 CASE_FIXED_FP(103);
747 CASE_FIXED_FP(104);
748 CASE_FIXED_FP(105);
749 CASE_FIXED_FP(106);
750 CASE_FIXED_FP(107);
751 CASE_FIXED_FP(108);
752 CASE_FIXED_FP(109);
753 CASE_FIXED_FP(110);
754 CASE_FIXED_FP(111);
755 CASE_FIXED_FP(112);
756 CASE_FIXED_FP(113);
757 CASE_FIXED_FP(114);
758 CASE_FIXED_FP(115);
759 CASE_FIXED_FP(116);
760 CASE_FIXED_FP(117);
761 CASE_FIXED_FP(118);
762 CASE_FIXED_FP(119);
763 CASE_FIXED_FP(120);
764 CASE_FIXED_FP(121);
765 CASE_FIXED_FP(122);
766 CASE_FIXED_FP(123);
767 CASE_FIXED_FP(124);
768 CASE_FIXED_FP(125);
769 CASE_FIXED_FP(126);
770 CASE_FIXED_FP(127);
771 }
772}
773
774void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
775 struct ia64_fpreg *val)
776{
777 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
778
779 getfpreg(reg, val, regs); /* FIXME: handle NATs later*/
780}
781
782void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
783 struct ia64_fpreg *val)
784{
785 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
786
787 if (reg > 1)
788 setfpreg(reg, val, regs); /* FIXME: handle NATs later*/
789}
790
791/*
792 * The Altix RTC is mapped specially here for the vmm module
793 */
794#define SN_RTC_BASE (u64 *)(KVM_VMM_BASE+(1UL<<KVM_VMM_SHIFT))
795static long kvm_get_itc(struct kvm_vcpu *vcpu)
796{
797#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
798 struct kvm *kvm = (struct kvm *)KVM_VM_BASE;
799
800 if (kvm->arch.is_sn2)
801 return (*SN_RTC_BASE);
802 else
803#endif
804 return ia64_getreg(_IA64_REG_AR_ITC);
805}
806
807/************************************************************************
808 * lsapic timer
809 ***********************************************************************/
810u64 vcpu_get_itc(struct kvm_vcpu *vcpu)
811{
812 unsigned long guest_itc;
813 guest_itc = VMX(vcpu, itc_offset) + kvm_get_itc(vcpu);
814
815 if (guest_itc >= VMX(vcpu, last_itc)) {
816 VMX(vcpu, last_itc) = guest_itc;
817 return guest_itc;
818 } else
819 return VMX(vcpu, last_itc);
820}
821
822static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val);
823static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
824{
825 struct kvm_vcpu *v;
826 struct kvm *kvm;
827 int i;
828 long itc_offset = val - kvm_get_itc(vcpu);
829 unsigned long vitv = VCPU(vcpu, itv);
830
831 kvm = (struct kvm *)KVM_VM_BASE;
832
833 if (kvm_vcpu_is_bsp(vcpu)) {
834 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) {
835 v = (struct kvm_vcpu *)((char *)vcpu +
836 sizeof(struct kvm_vcpu_data) * i);
837 VMX(v, itc_offset) = itc_offset;
838 VMX(v, last_itc) = 0;
839 }
840 }
841 VMX(vcpu, last_itc) = 0;
842 if (VCPU(vcpu, itm) <= val) {
843 VMX(vcpu, itc_check) = 0;
844 vcpu_unpend_interrupt(vcpu, vitv);
845 } else {
846 VMX(vcpu, itc_check) = 1;
847 vcpu_set_itm(vcpu, VCPU(vcpu, itm));
848 }
849
850}
851
852static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu)
853{
854 return ((u64)VCPU(vcpu, itm));
855}
856
857static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val)
858{
859 unsigned long vitv = VCPU(vcpu, itv);
860 VCPU(vcpu, itm) = val;
861
862 if (val > vcpu_get_itc(vcpu)) {
863 VMX(vcpu, itc_check) = 1;
864 vcpu_unpend_interrupt(vcpu, vitv);
865 VMX(vcpu, timer_pending) = 0;
866 } else
867 VMX(vcpu, itc_check) = 0;
868}
869
870#define ITV_VECTOR(itv) (itv&0xff)
871#define ITV_IRQ_MASK(itv) (itv&(1<<16))
872
873static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val)
874{
875 VCPU(vcpu, itv) = val;
876 if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) {
877 vcpu_pend_interrupt(vcpu, ITV_VECTOR(val));
878 vcpu->arch.timer_pending = 0;
879 }
880}
881
882static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val)
883{
884 int vec;
885
886 vec = highest_inservice_irq(vcpu);
887 if (vec == NULL_VECTOR)
888 return;
889 VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63));
890 VCPU(vcpu, eoi) = 0;
891 vcpu->arch.irq_new_pending = 1;
892
893}
894
895/* See Table 5-8 in SDM vol2 for the definition */
896int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice)
897{
898 union ia64_tpr vtpr;
899
900 vtpr.val = VCPU(vcpu, tpr);
901
902 if (h_inservice == NMI_VECTOR)
903 return IRQ_MASKED_BY_INSVC;
904
905 if (h_pending == NMI_VECTOR) {
906 /* Non Maskable Interrupt */
907 return IRQ_NO_MASKED;
908 }
909
910 if (h_inservice == ExtINT_VECTOR)
911 return IRQ_MASKED_BY_INSVC;
912
913 if (h_pending == ExtINT_VECTOR) {
914 if (vtpr.mmi) {
915 /* mask all external IRQ */
916 return IRQ_MASKED_BY_VTPR;
917 } else
918 return IRQ_NO_MASKED;
919 }
920
921 if (is_higher_irq(h_pending, h_inservice)) {
922 if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4)))
923 return IRQ_NO_MASKED;
924 else
925 return IRQ_MASKED_BY_VTPR;
926 } else {
927 return IRQ_MASKED_BY_INSVC;
928 }
929}
930
931void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
932{
933 long spsr;
934 int ret;
935
936 local_irq_save(spsr);
937 ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0]));
938 local_irq_restore(spsr);
939
940 vcpu->arch.irq_new_pending = 1;
941}
942
943void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
944{
945 long spsr;
946 int ret;
947
948 local_irq_save(spsr);
949 ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0]));
950 local_irq_restore(spsr);
951 if (ret) {
952 vcpu->arch.irq_new_pending = 1;
953 wmb();
954 }
955}
956
957void update_vhpi(struct kvm_vcpu *vcpu, int vec)
958{
959 u64 vhpi;
960
961 if (vec == NULL_VECTOR)
962 vhpi = 0;
963 else if (vec == NMI_VECTOR)
964 vhpi = 32;
965 else if (vec == ExtINT_VECTOR)
966 vhpi = 16;
967 else
968 vhpi = vec >> 4;
969
970 VCPU(vcpu, vhpi) = vhpi;
971 if (VCPU(vcpu, vac).a_int)
972 ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT,
973 (u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0);
974}
975
976u64 vcpu_get_ivr(struct kvm_vcpu *vcpu)
977{
978 int vec, h_inservice, mask;
979
980 vec = highest_pending_irq(vcpu);
981 h_inservice = highest_inservice_irq(vcpu);
982 mask = irq_masked(vcpu, vec, h_inservice);
983 if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) {
984 if (VCPU(vcpu, vhpi))
985 update_vhpi(vcpu, NULL_VECTOR);
986 return IA64_SPURIOUS_INT_VECTOR;
987 }
988 if (mask == IRQ_MASKED_BY_VTPR) {
989 update_vhpi(vcpu, vec);
990 return IA64_SPURIOUS_INT_VECTOR;
991 }
992 VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63));
993 vcpu_unpend_interrupt(vcpu, vec);
994 return (u64)vec;
995}
996
997/**************************************************************************
998 Privileged operation emulation routines
999 **************************************************************************/
1000u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr)
1001{
1002 union ia64_pta vpta;
1003 union ia64_rr vrr;
1004 u64 pval;
1005 u64 vhpt_offset;
1006
1007 vpta.val = vcpu_get_pta(vcpu);
1008 vrr.val = vcpu_get_rr(vcpu, vadr);
1009 vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1);
1010 if (vpta.vf) {
1011 pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val,
1012 vpta.val, 0, 0, 0, 0);
1013 } else {
1014 pval = (vadr & VRN_MASK) | vhpt_offset |
1015 (vpta.val << 3 >> (vpta.size + 3) << (vpta.size));
1016 }
1017 return pval;
1018}
1019
1020u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr)
1021{
1022 union ia64_rr vrr;
1023 union ia64_pta vpta;
1024 u64 pval;
1025
1026 vpta.val = vcpu_get_pta(vcpu);
1027 vrr.val = vcpu_get_rr(vcpu, vadr);
1028 if (vpta.vf) {
1029 pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val,
1030 0, 0, 0, 0, 0);
1031 } else
1032 pval = 1;
1033
1034 return pval;
1035}
1036
1037u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr)
1038{
1039 struct thash_data *data;
1040 union ia64_pta vpta;
1041 u64 key;
1042
1043 vpta.val = vcpu_get_pta(vcpu);
1044 if (vpta.vf == 0) {
1045 key = 1;
1046 return key;
1047 }
1048 data = vtlb_lookup(vcpu, vadr, D_TLB);
1049 if (!data || !data->p)
1050 key = 1;
1051 else
1052 key = data->key;
1053
1054 return key;
1055}
1056
1057void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
1058{
1059 unsigned long thash, vadr;
1060
1061 vadr = vcpu_get_gr(vcpu, inst.M46.r3);
1062 thash = vcpu_thash(vcpu, vadr);
1063 vcpu_set_gr(vcpu, inst.M46.r1, thash, 0);
1064}
1065
1066void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
1067{
1068 unsigned long tag, vadr;
1069
1070 vadr = vcpu_get_gr(vcpu, inst.M46.r3);
1071 tag = vcpu_ttag(vcpu, vadr);
1072 vcpu_set_gr(vcpu, inst.M46.r1, tag, 0);
1073}
1074
1075int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, unsigned long *padr)
1076{
1077 struct thash_data *data;
1078 union ia64_isr visr, pt_isr;
1079 struct kvm_pt_regs *regs;
1080 struct ia64_psr vpsr;
1081
1082 regs = vcpu_regs(vcpu);
1083 pt_isr.val = VMX(vcpu, cr_isr);
1084 visr.val = 0;
1085 visr.ei = pt_isr.ei;
1086 visr.ir = pt_isr.ir;
1087 vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
1088 visr.na = 1;
1089
1090 data = vhpt_lookup(vadr);
1091 if (data) {
1092 if (data->p == 0) {
1093 vcpu_set_isr(vcpu, visr.val);
1094 data_page_not_present(vcpu, vadr);
1095 return IA64_FAULT;
1096 } else if (data->ma == VA_MATTR_NATPAGE) {
1097 vcpu_set_isr(vcpu, visr.val);
1098 dnat_page_consumption(vcpu, vadr);
1099 return IA64_FAULT;
1100 } else {
1101 *padr = (data->gpaddr >> data->ps << data->ps) |
1102 (vadr & (PSIZE(data->ps) - 1));
1103 return IA64_NO_FAULT;
1104 }
1105 }
1106
1107 data = vtlb_lookup(vcpu, vadr, D_TLB);
1108 if (data) {
1109 if (data->p == 0) {
1110 vcpu_set_isr(vcpu, visr.val);
1111 data_page_not_present(vcpu, vadr);
1112 return IA64_FAULT;
1113 } else if (data->ma == VA_MATTR_NATPAGE) {
1114 vcpu_set_isr(vcpu, visr.val);
1115 dnat_page_consumption(vcpu, vadr);
1116 return IA64_FAULT;
1117 } else{
1118 *padr = ((data->ppn >> (data->ps - 12)) << data->ps)
1119 | (vadr & (PSIZE(data->ps) - 1));
1120 return IA64_NO_FAULT;
1121 }
1122 }
1123 if (!vhpt_enabled(vcpu, vadr, NA_REF)) {
1124 if (vpsr.ic) {
1125 vcpu_set_isr(vcpu, visr.val);
1126 alt_dtlb(vcpu, vadr);
1127 return IA64_FAULT;
1128 } else {
1129 nested_dtlb(vcpu);
1130 return IA64_FAULT;
1131 }
1132 } else {
1133 if (vpsr.ic) {
1134 vcpu_set_isr(vcpu, visr.val);
1135 dvhpt_fault(vcpu, vadr);
1136 return IA64_FAULT;
1137 } else{
1138 nested_dtlb(vcpu);
1139 return IA64_FAULT;
1140 }
1141 }
1142
1143 return IA64_NO_FAULT;
1144}
1145
1146int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst)
1147{
1148 unsigned long r1, r3;
1149
1150 r3 = vcpu_get_gr(vcpu, inst.M46.r3);
1151
1152 if (vcpu_tpa(vcpu, r3, &r1))
1153 return IA64_FAULT;
1154
1155 vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
1156 return(IA64_NO_FAULT);
1157}
1158
1159void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst)
1160{
1161 unsigned long r1, r3;
1162
1163 r3 = vcpu_get_gr(vcpu, inst.M46.r3);
1164 r1 = vcpu_tak(vcpu, r3);
1165 vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
1166}
1167
1168/************************************
1169 * Insert/Purge translation register/cache
1170 ************************************/
1171void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
1172{
1173 thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB);
1174}
1175
1176void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
1177{
1178 thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB);
1179}
1180
1181void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
1182{
1183 u64 ps, va, rid;
1184 struct thash_data *p_itr;
1185
1186 ps = itir_ps(itir);
1187 va = PAGEALIGN(ifa, ps);
1188 pte &= ~PAGE_FLAGS_RV_MASK;
1189 rid = vcpu_get_rr(vcpu, ifa);
1190 rid = rid & RR_RID_MASK;
1191 p_itr = (struct thash_data *)&vcpu->arch.itrs[slot];
1192 vcpu_set_tr(p_itr, pte, itir, va, rid);
1193 vcpu_quick_region_set(VMX(vcpu, itr_regions), va);
1194}
1195
1196
1197void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
1198{
1199 u64 gpfn;
1200 u64 ps, va, rid;
1201 struct thash_data *p_dtr;
1202
1203 ps = itir_ps(itir);
1204 va = PAGEALIGN(ifa, ps);
1205 pte &= ~PAGE_FLAGS_RV_MASK;
1206
1207 if (ps != _PAGE_SIZE_16M)
1208 thash_purge_entries(vcpu, va, ps);
1209 gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
1210 if (__gpfn_is_io(gpfn))
1211 pte |= VTLB_PTE_IO;
1212 rid = vcpu_get_rr(vcpu, va);
1213 rid = rid & RR_RID_MASK;
1214 p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot];
1215 vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot],
1216 pte, itir, va, rid);
1217 vcpu_quick_region_set(VMX(vcpu, dtr_regions), va);
1218}
1219
1220void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
1221{
1222 int index;
1223 u64 va;
1224
1225 va = PAGEALIGN(ifa, ps);
1226 while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0)
1227 vcpu->arch.dtrs[index].page_flags = 0;
1228
1229 thash_purge_entries(vcpu, va, ps);
1230}
1231
1232void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
1233{
1234 int index;
1235 u64 va;
1236
1237 va = PAGEALIGN(ifa, ps);
1238 while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0)
1239 vcpu->arch.itrs[index].page_flags = 0;
1240
1241 thash_purge_entries(vcpu, va, ps);
1242}
1243
1244void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps)
1245{
1246 va = PAGEALIGN(va, ps);
1247 thash_purge_entries(vcpu, va, ps);
1248}
1249
1250void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va)
1251{
1252 thash_purge_all(vcpu);
1253}
1254
1255void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps)
1256{
1257 struct exit_ctl_data *p = &vcpu->arch.exit_data;
1258 long psr;
1259 local_irq_save(psr);
1260 p->exit_reason = EXIT_REASON_PTC_G;
1261
1262 p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va);
1263 p->u.ptc_g_data.vaddr = va;
1264 p->u.ptc_g_data.ps = ps;
1265 vmm_transition(vcpu);
1266 /* Do Local Purge Here*/
1267 vcpu_ptc_l(vcpu, va, ps);
1268 local_irq_restore(psr);
1269}
1270
1271
1272void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps)
1273{
1274 vcpu_ptc_ga(vcpu, va, ps);
1275}
1276
1277void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst)
1278{
1279 unsigned long ifa;
1280
1281 ifa = vcpu_get_gr(vcpu, inst.M45.r3);
1282 vcpu_ptc_e(vcpu, ifa);
1283}
1284
1285void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst)
1286{
1287 unsigned long ifa, itir;
1288
1289 ifa = vcpu_get_gr(vcpu, inst.M45.r3);
1290 itir = vcpu_get_gr(vcpu, inst.M45.r2);
1291 vcpu_ptc_g(vcpu, ifa, itir_ps(itir));
1292}
1293
1294void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst)
1295{
1296 unsigned long ifa, itir;
1297
1298 ifa = vcpu_get_gr(vcpu, inst.M45.r3);
1299 itir = vcpu_get_gr(vcpu, inst.M45.r2);
1300 vcpu_ptc_ga(vcpu, ifa, itir_ps(itir));
1301}
1302
1303void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst)
1304{
1305 unsigned long ifa, itir;
1306
1307 ifa = vcpu_get_gr(vcpu, inst.M45.r3);
1308 itir = vcpu_get_gr(vcpu, inst.M45.r2);
1309 vcpu_ptc_l(vcpu, ifa, itir_ps(itir));
1310}
1311
1312void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst)
1313{
1314 unsigned long ifa, itir;
1315
1316 ifa = vcpu_get_gr(vcpu, inst.M45.r3);
1317 itir = vcpu_get_gr(vcpu, inst.M45.r2);
1318 vcpu_ptr_d(vcpu, ifa, itir_ps(itir));
1319}
1320
1321void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst)
1322{
1323 unsigned long ifa, itir;
1324
1325 ifa = vcpu_get_gr(vcpu, inst.M45.r3);
1326 itir = vcpu_get_gr(vcpu, inst.M45.r2);
1327 vcpu_ptr_i(vcpu, ifa, itir_ps(itir));
1328}
1329
1330void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst)
1331{
1332 unsigned long itir, ifa, pte, slot;
1333
1334 slot = vcpu_get_gr(vcpu, inst.M45.r3);
1335 pte = vcpu_get_gr(vcpu, inst.M45.r2);
1336 itir = vcpu_get_itir(vcpu);
1337 ifa = vcpu_get_ifa(vcpu);
1338 vcpu_itr_d(vcpu, slot, pte, itir, ifa);
1339}
1340
1341
1342
1343void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst)
1344{
1345 unsigned long itir, ifa, pte, slot;
1346
1347 slot = vcpu_get_gr(vcpu, inst.M45.r3);
1348 pte = vcpu_get_gr(vcpu, inst.M45.r2);
1349 itir = vcpu_get_itir(vcpu);
1350 ifa = vcpu_get_ifa(vcpu);
1351 vcpu_itr_i(vcpu, slot, pte, itir, ifa);
1352}
1353
1354void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst)
1355{
1356 unsigned long itir, ifa, pte;
1357
1358 itir = vcpu_get_itir(vcpu);
1359 ifa = vcpu_get_ifa(vcpu);
1360 pte = vcpu_get_gr(vcpu, inst.M45.r2);
1361 vcpu_itc_d(vcpu, pte, itir, ifa);
1362}
1363
1364void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst)
1365{
1366 unsigned long itir, ifa, pte;
1367
1368 itir = vcpu_get_itir(vcpu);
1369 ifa = vcpu_get_ifa(vcpu);
1370 pte = vcpu_get_gr(vcpu, inst.M45.r2);
1371 vcpu_itc_i(vcpu, pte, itir, ifa);
1372}
1373
1374/*************************************
1375 * Moves to semi-privileged registers
1376 *************************************/
1377
1378void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst)
1379{
1380 unsigned long imm;
1381
1382 if (inst.M30.s)
1383 imm = -inst.M30.imm;
1384 else
1385 imm = inst.M30.imm;
1386
1387 vcpu_set_itc(vcpu, imm);
1388}
1389
1390void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
1391{
1392 unsigned long r2;
1393
1394 r2 = vcpu_get_gr(vcpu, inst.M29.r2);
1395 vcpu_set_itc(vcpu, r2);
1396}
1397
1398void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
1399{
1400 unsigned long r1;
1401
1402 r1 = vcpu_get_itc(vcpu);
1403 vcpu_set_gr(vcpu, inst.M31.r1, r1, 0);
1404}
1405
1406/**************************************************************************
1407 struct kvm_vcpu protection key register access routines
1408 **************************************************************************/
1409
1410unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg)
1411{
1412 return ((unsigned long)ia64_get_pkr(reg));
1413}
1414
1415void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val)
1416{
1417 ia64_set_pkr(reg, val);
1418}
1419
1420/********************************
1421 * Moves to privileged registers
1422 ********************************/
1423unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg,
1424 unsigned long val)
1425{
1426 union ia64_rr oldrr, newrr;
1427 unsigned long rrval;
1428 struct exit_ctl_data *p = &vcpu->arch.exit_data;
1429 unsigned long psr;
1430
1431 oldrr.val = vcpu_get_rr(vcpu, reg);
1432 newrr.val = val;
1433 vcpu->arch.vrr[reg >> VRN_SHIFT] = val;
1434
1435 switch ((unsigned long)(reg >> VRN_SHIFT)) {
1436 case VRN6:
1437 vcpu->arch.vmm_rr = vrrtomrr(val);
1438 local_irq_save(psr);
1439 p->exit_reason = EXIT_REASON_SWITCH_RR6;
1440 vmm_transition(vcpu);
1441 local_irq_restore(psr);
1442 break;
1443 case VRN4:
1444 rrval = vrrtomrr(val);
1445 vcpu->arch.metaphysical_saved_rr4 = rrval;
1446 if (!is_physical_mode(vcpu))
1447 ia64_set_rr(reg, rrval);
1448 break;
1449 case VRN0:
1450 rrval = vrrtomrr(val);
1451 vcpu->arch.metaphysical_saved_rr0 = rrval;
1452 if (!is_physical_mode(vcpu))
1453 ia64_set_rr(reg, rrval);
1454 break;
1455 default:
1456 ia64_set_rr(reg, vrrtomrr(val));
1457 break;
1458 }
1459
1460 return (IA64_NO_FAULT);
1461}
1462
1463void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst)
1464{
1465 unsigned long r3, r2;
1466
1467 r3 = vcpu_get_gr(vcpu, inst.M42.r3);
1468 r2 = vcpu_get_gr(vcpu, inst.M42.r2);
1469 vcpu_set_rr(vcpu, r3, r2);
1470}
1471
1472void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst)
1473{
1474}
1475
1476void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst)
1477{
1478}
1479
1480void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst)
1481{
1482 unsigned long r3, r2;
1483
1484 r3 = vcpu_get_gr(vcpu, inst.M42.r3);
1485 r2 = vcpu_get_gr(vcpu, inst.M42.r2);
1486 vcpu_set_pmc(vcpu, r3, r2);
1487}
1488
1489void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst)
1490{
1491 unsigned long r3, r2;
1492
1493 r3 = vcpu_get_gr(vcpu, inst.M42.r3);
1494 r2 = vcpu_get_gr(vcpu, inst.M42.r2);
1495 vcpu_set_pmd(vcpu, r3, r2);
1496}
1497
1498void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst)
1499{
1500 u64 r3, r2;
1501
1502 r3 = vcpu_get_gr(vcpu, inst.M42.r3);
1503 r2 = vcpu_get_gr(vcpu, inst.M42.r2);
1504 vcpu_set_pkr(vcpu, r3, r2);
1505}
1506
1507void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst)
1508{
1509 unsigned long r3, r1;
1510
1511 r3 = vcpu_get_gr(vcpu, inst.M43.r3);
1512 r1 = vcpu_get_rr(vcpu, r3);
1513 vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
1514}
1515
1516void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst)
1517{
1518 unsigned long r3, r1;
1519
1520 r3 = vcpu_get_gr(vcpu, inst.M43.r3);
1521 r1 = vcpu_get_pkr(vcpu, r3);
1522 vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
1523}
1524
1525void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst)
1526{
1527 unsigned long r3, r1;
1528
1529 r3 = vcpu_get_gr(vcpu, inst.M43.r3);
1530 r1 = vcpu_get_dbr(vcpu, r3);
1531 vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
1532}
1533
1534void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst)
1535{
1536 unsigned long r3, r1;
1537
1538 r3 = vcpu_get_gr(vcpu, inst.M43.r3);
1539 r1 = vcpu_get_ibr(vcpu, r3);
1540 vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
1541}
1542
1543void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst)
1544{
1545 unsigned long r3, r1;
1546
1547 r3 = vcpu_get_gr(vcpu, inst.M43.r3);
1548 r1 = vcpu_get_pmc(vcpu, r3);
1549 vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
1550}
1551
1552unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg)
1553{
1554 /* FIXME: This could get called as a result of a rsvd-reg fault */
1555 if (reg > (ia64_get_cpuid(3) & 0xff))
1556 return 0;
1557 else
1558 return ia64_get_cpuid(reg);
1559}
1560
1561void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst)
1562{
1563 unsigned long r3, r1;
1564
1565 r3 = vcpu_get_gr(vcpu, inst.M43.r3);
1566 r1 = vcpu_get_cpuid(vcpu, r3);
1567 vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
1568}
1569
1570void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val)
1571{
1572 VCPU(vcpu, tpr) = val;
1573 vcpu->arch.irq_check = 1;
1574}
1575
1576unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst)
1577{
1578 unsigned long r2;
1579
1580 r2 = vcpu_get_gr(vcpu, inst.M32.r2);
1581 VCPU(vcpu, vcr[inst.M32.cr3]) = r2;
1582
1583 switch (inst.M32.cr3) {
1584 case 0:
1585 vcpu_set_dcr(vcpu, r2);
1586 break;
1587 case 1:
1588 vcpu_set_itm(vcpu, r2);
1589 break;
1590 case 66:
1591 vcpu_set_tpr(vcpu, r2);
1592 break;
1593 case 67:
1594 vcpu_set_eoi(vcpu, r2);
1595 break;
1596 default:
1597 break;
1598 }
1599
1600 return 0;
1601}
1602
1603unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
1604{
1605 unsigned long tgt = inst.M33.r1;
1606 unsigned long val;
1607
1608 switch (inst.M33.cr3) {
1609 case 65:
1610 val = vcpu_get_ivr(vcpu);
1611 vcpu_set_gr(vcpu, tgt, val, 0);
1612 break;
1613
1614 case 67:
1615 vcpu_set_gr(vcpu, tgt, 0L, 0);
1616 break;
1617 default:
1618 val = VCPU(vcpu, vcr[inst.M33.cr3]);
1619 vcpu_set_gr(vcpu, tgt, val, 0);
1620 break;
1621 }
1622
1623 return 0;
1624}
1625
1626void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
1627{
1628
1629 unsigned long mask;
1630 struct kvm_pt_regs *regs;
1631 struct ia64_psr old_psr, new_psr;
1632
1633 old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
1634
1635 regs = vcpu_regs(vcpu);
1636 /* We only support guest as:
1637 * vpsr.pk = 0
1638 * vpsr.is = 0
1639 * Otherwise panic
1640 */
1641 if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
1642 panic_vm(vcpu, "Only support guests with vpsr.pk =0 "
1643 "& vpsr.is=0\n");
1644
1645 /*
1646 * For those IA64_PSR bits: id/da/dd/ss/ed/ia
1647 * Since these bits will become 0, after success execution of each
1648 * instruction, we will change set them to mIA64_PSR
1649 */
1650 VCPU(vcpu, vpsr) = val
1651 & (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD |
1652 IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA));
1653
1654 if (!old_psr.i && (val & IA64_PSR_I)) {
1655 /* vpsr.i 0->1 */
1656 vcpu->arch.irq_check = 1;
1657 }
1658 new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
1659
1660 /*
1661 * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr)
1662 * , except for the following bits:
1663 * ic/i/dt/si/rt/mc/it/bn/vm
1664 */
1665 mask = IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI +
1666 IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN +
1667 IA64_PSR_VM;
1668
1669 regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask));
1670
1671 check_mm_mode_switch(vcpu, old_psr, new_psr);
1672
1673 return ;
1674}
1675
1676unsigned long vcpu_cover(struct kvm_vcpu *vcpu)
1677{
1678 struct ia64_psr vpsr;
1679
1680 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1681 vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
1682
1683 if (!vpsr.ic)
1684 VCPU(vcpu, ifs) = regs->cr_ifs;
1685 regs->cr_ifs = IA64_IFS_V;
1686 return (IA64_NO_FAULT);
1687}
1688
1689
1690
1691/**************************************************************************
1692 VCPU banked general register access routines
1693 **************************************************************************/
1694#define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \
1695 do { \
1696 __asm__ __volatile__ ( \
1697 ";;extr.u %0 = %3,%6,16;;\n" \
1698 "dep %1 = %0, %1, 0, 16;;\n" \
1699 "st8 [%4] = %1\n" \
1700 "extr.u %0 = %2, 16, 16;;\n" \
1701 "dep %3 = %0, %3, %6, 16;;\n" \
1702 "st8 [%5] = %3\n" \
1703 ::"r"(i), "r"(*b1unat), "r"(*b0unat), \
1704 "r"(*runat), "r"(b1unat), "r"(runat), \
1705 "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \
1706 } while (0)
1707
1708void vcpu_bsw0(struct kvm_vcpu *vcpu)
1709{
1710 unsigned long i;
1711
1712 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1713 unsigned long *r = &regs->r16;
1714 unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
1715 unsigned long *b1 = &VCPU(vcpu, vgr[0]);
1716 unsigned long *runat = &regs->eml_unat;
1717 unsigned long *b0unat = &VCPU(vcpu, vbnat);
1718 unsigned long *b1unat = &VCPU(vcpu, vnat);
1719
1720
1721 if (VCPU(vcpu, vpsr) & IA64_PSR_BN) {
1722 for (i = 0; i < 16; i++) {
1723 *b1++ = *r;
1724 *r++ = *b0++;
1725 }
1726 vcpu_bsw0_unat(i, b0unat, b1unat, runat,
1727 VMM_PT_REGS_R16_SLOT);
1728 VCPU(vcpu, vpsr) &= ~IA64_PSR_BN;
1729 }
1730}
1731
1732#define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \
1733 do { \
1734 __asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n" \
1735 "dep %1 = %0, %1, 16, 16;;\n" \
1736 "st8 [%4] = %1\n" \
1737 "extr.u %0 = %2, 0, 16;;\n" \
1738 "dep %3 = %0, %3, %6, 16;;\n" \
1739 "st8 [%5] = %3\n" \
1740 ::"r"(i), "r"(*b0unat), "r"(*b1unat), \
1741 "r"(*runat), "r"(b0unat), "r"(runat), \
1742 "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \
1743 } while (0)
1744
1745void vcpu_bsw1(struct kvm_vcpu *vcpu)
1746{
1747 unsigned long i;
1748 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1749 unsigned long *r = &regs->r16;
1750 unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
1751 unsigned long *b1 = &VCPU(vcpu, vgr[0]);
1752 unsigned long *runat = &regs->eml_unat;
1753 unsigned long *b0unat = &VCPU(vcpu, vbnat);
1754 unsigned long *b1unat = &VCPU(vcpu, vnat);
1755
1756 if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) {
1757 for (i = 0; i < 16; i++) {
1758 *b0++ = *r;
1759 *r++ = *b1++;
1760 }
1761 vcpu_bsw1_unat(i, b0unat, b1unat, runat,
1762 VMM_PT_REGS_R16_SLOT);
1763 VCPU(vcpu, vpsr) |= IA64_PSR_BN;
1764 }
1765}
1766
1767void vcpu_rfi(struct kvm_vcpu *vcpu)
1768{
1769 unsigned long ifs, psr;
1770 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1771
1772 psr = VCPU(vcpu, ipsr);
1773 if (psr & IA64_PSR_BN)
1774 vcpu_bsw1(vcpu);
1775 else
1776 vcpu_bsw0(vcpu);
1777 vcpu_set_psr(vcpu, psr);
1778 ifs = VCPU(vcpu, ifs);
1779 if (ifs >> 63)
1780 regs->cr_ifs = ifs;
1781 regs->cr_iip = VCPU(vcpu, iip);
1782}
1783
1784/*
1785 VPSR can't keep track of below bits of guest PSR
1786 This function gets guest PSR
1787 */
1788
1789unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu)
1790{
1791 unsigned long mask;
1792 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1793
1794 mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL |
1795 IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI;
1796 return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask);
1797}
1798
1799void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst)
1800{
1801 unsigned long vpsr;
1802 unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21)
1803 | inst.M44.imm;
1804
1805 vpsr = vcpu_get_psr(vcpu);
1806 vpsr &= (~imm24);
1807 vcpu_set_psr(vcpu, vpsr);
1808}
1809
1810void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst)
1811{
1812 unsigned long vpsr;
1813 unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21)
1814 | inst.M44.imm;
1815
1816 vpsr = vcpu_get_psr(vcpu);
1817 vpsr |= imm24;
1818 vcpu_set_psr(vcpu, vpsr);
1819}
1820
1821/* Generate Mask
1822 * Parameter:
1823 * bit -- starting bit
1824 * len -- how many bits
1825 */
1826#define MASK(bit,len) \
1827({ \
1828 __u64 ret; \
1829 \
1830 __asm __volatile("dep %0=-1, r0, %1, %2"\
1831 : "=r" (ret): \
1832 "M" (bit), \
1833 "M" (len)); \
1834 ret; \
1835})
1836
1837void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val)
1838{
1839 val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32));
1840 vcpu_set_psr(vcpu, val);
1841}
1842
1843void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst)
1844{
1845 unsigned long val;
1846
1847 val = vcpu_get_gr(vcpu, inst.M35.r2);
1848 vcpu_set_psr_l(vcpu, val);
1849}
1850
1851void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst)
1852{
1853 unsigned long val;
1854
1855 val = vcpu_get_psr(vcpu);
1856 val = (val & MASK(0, 32)) | (val & MASK(35, 2));
1857 vcpu_set_gr(vcpu, inst.M33.r1, val, 0);
1858}
1859
1860void vcpu_increment_iip(struct kvm_vcpu *vcpu)
1861{
1862 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1863 struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
1864 if (ipsr->ri == 2) {
1865 ipsr->ri = 0;
1866 regs->cr_iip += 16;
1867 } else
1868 ipsr->ri++;
1869}
1870
1871void vcpu_decrement_iip(struct kvm_vcpu *vcpu)
1872{
1873 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1874 struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
1875
1876 if (ipsr->ri == 0) {
1877 ipsr->ri = 2;
1878 regs->cr_iip -= 16;
1879 } else
1880 ipsr->ri--;
1881}
1882
1883/** Emulate a privileged operation.
1884 *
1885 *
1886 * @param vcpu virtual cpu
1887 * @cause the reason cause virtualization fault
1888 * @opcode the instruction code which cause virtualization fault
1889 */
1890
1891void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs)
1892{
1893 unsigned long status, cause, opcode ;
1894 INST64 inst;
1895
1896 status = IA64_NO_FAULT;
1897 cause = VMX(vcpu, cause);
1898 opcode = VMX(vcpu, opcode);
1899 inst.inst = opcode;
1900 /*
1901 * Switch to actual virtual rid in rr0 and rr4,
1902 * which is required by some tlb related instructions.
1903 */
1904 prepare_if_physical_mode(vcpu);
1905
1906 switch (cause) {
1907 case EVENT_RSM:
1908 kvm_rsm(vcpu, inst);
1909 break;
1910 case EVENT_SSM:
1911 kvm_ssm(vcpu, inst);
1912 break;
1913 case EVENT_MOV_TO_PSR:
1914 kvm_mov_to_psr(vcpu, inst);
1915 break;
1916 case EVENT_MOV_FROM_PSR:
1917 kvm_mov_from_psr(vcpu, inst);
1918 break;
1919 case EVENT_MOV_FROM_CR:
1920 kvm_mov_from_cr(vcpu, inst);
1921 break;
1922 case EVENT_MOV_TO_CR:
1923 kvm_mov_to_cr(vcpu, inst);
1924 break;
1925 case EVENT_BSW_0:
1926 vcpu_bsw0(vcpu);
1927 break;
1928 case EVENT_BSW_1:
1929 vcpu_bsw1(vcpu);
1930 break;
1931 case EVENT_COVER:
1932 vcpu_cover(vcpu);
1933 break;
1934 case EVENT_RFI:
1935 vcpu_rfi(vcpu);
1936 break;
1937 case EVENT_ITR_D:
1938 kvm_itr_d(vcpu, inst);
1939 break;
1940 case EVENT_ITR_I:
1941 kvm_itr_i(vcpu, inst);
1942 break;
1943 case EVENT_PTR_D:
1944 kvm_ptr_d(vcpu, inst);
1945 break;
1946 case EVENT_PTR_I:
1947 kvm_ptr_i(vcpu, inst);
1948 break;
1949 case EVENT_ITC_D:
1950 kvm_itc_d(vcpu, inst);
1951 break;
1952 case EVENT_ITC_I:
1953 kvm_itc_i(vcpu, inst);
1954 break;
1955 case EVENT_PTC_L:
1956 kvm_ptc_l(vcpu, inst);
1957 break;
1958 case EVENT_PTC_G:
1959 kvm_ptc_g(vcpu, inst);
1960 break;
1961 case EVENT_PTC_GA:
1962 kvm_ptc_ga(vcpu, inst);
1963 break;
1964 case EVENT_PTC_E:
1965 kvm_ptc_e(vcpu, inst);
1966 break;
1967 case EVENT_MOV_TO_RR:
1968 kvm_mov_to_rr(vcpu, inst);
1969 break;
1970 case EVENT_MOV_FROM_RR:
1971 kvm_mov_from_rr(vcpu, inst);
1972 break;
1973 case EVENT_THASH:
1974 kvm_thash(vcpu, inst);
1975 break;
1976 case EVENT_TTAG:
1977 kvm_ttag(vcpu, inst);
1978 break;
1979 case EVENT_TPA:
1980 status = kvm_tpa(vcpu, inst);
1981 break;
1982 case EVENT_TAK:
1983 kvm_tak(vcpu, inst);
1984 break;
1985 case EVENT_MOV_TO_AR_IMM:
1986 kvm_mov_to_ar_imm(vcpu, inst);
1987 break;
1988 case EVENT_MOV_TO_AR:
1989 kvm_mov_to_ar_reg(vcpu, inst);
1990 break;
1991 case EVENT_MOV_FROM_AR:
1992 kvm_mov_from_ar_reg(vcpu, inst);
1993 break;
1994 case EVENT_MOV_TO_DBR:
1995 kvm_mov_to_dbr(vcpu, inst);
1996 break;
1997 case EVENT_MOV_TO_IBR:
1998 kvm_mov_to_ibr(vcpu, inst);
1999 break;
2000 case EVENT_MOV_TO_PMC:
2001 kvm_mov_to_pmc(vcpu, inst);
2002 break;
2003 case EVENT_MOV_TO_PMD:
2004 kvm_mov_to_pmd(vcpu, inst);
2005 break;
2006 case EVENT_MOV_TO_PKR:
2007 kvm_mov_to_pkr(vcpu, inst);
2008 break;
2009 case EVENT_MOV_FROM_DBR:
2010 kvm_mov_from_dbr(vcpu, inst);
2011 break;
2012 case EVENT_MOV_FROM_IBR:
2013 kvm_mov_from_ibr(vcpu, inst);
2014 break;
2015 case EVENT_MOV_FROM_PMC:
2016 kvm_mov_from_pmc(vcpu, inst);
2017 break;
2018 case EVENT_MOV_FROM_PKR:
2019 kvm_mov_from_pkr(vcpu, inst);
2020 break;
2021 case EVENT_MOV_FROM_CPUID:
2022 kvm_mov_from_cpuid(vcpu, inst);
2023 break;
2024 case EVENT_VMSW:
2025 status = IA64_FAULT;
2026 break;
2027 default:
2028 break;
2029 };
2030 /*Assume all status is NO_FAULT ?*/
2031 if (status == IA64_NO_FAULT && cause != EVENT_RFI)
2032 vcpu_increment_iip(vcpu);
2033
2034 recover_if_physical_mode(vcpu);
2035}
2036
2037void init_vcpu(struct kvm_vcpu *vcpu)
2038{
2039 int i;
2040
2041 vcpu->arch.mode_flags = GUEST_IN_PHY;
2042 VMX(vcpu, vrr[0]) = 0x38;
2043 VMX(vcpu, vrr[1]) = 0x38;
2044 VMX(vcpu, vrr[2]) = 0x38;
2045 VMX(vcpu, vrr[3]) = 0x38;
2046 VMX(vcpu, vrr[4]) = 0x38;
2047 VMX(vcpu, vrr[5]) = 0x38;
2048 VMX(vcpu, vrr[6]) = 0x38;
2049 VMX(vcpu, vrr[7]) = 0x38;
2050 VCPU(vcpu, vpsr) = IA64_PSR_BN;
2051 VCPU(vcpu, dcr) = 0;
2052 /* pta.size must not be 0. The minimum is 15 (32k) */
2053 VCPU(vcpu, pta) = 15 << 2;
2054 VCPU(vcpu, itv) = 0x10000;
2055 VCPU(vcpu, itm) = 0;
2056 VMX(vcpu, last_itc) = 0;
2057
2058 VCPU(vcpu, lid) = VCPU_LID(vcpu);
2059 VCPU(vcpu, ivr) = 0;
2060 VCPU(vcpu, tpr) = 0x10000;
2061 VCPU(vcpu, eoi) = 0;
2062 VCPU(vcpu, irr[0]) = 0;
2063 VCPU(vcpu, irr[1]) = 0;
2064 VCPU(vcpu, irr[2]) = 0;
2065 VCPU(vcpu, irr[3]) = 0;
2066 VCPU(vcpu, pmv) = 0x10000;
2067 VCPU(vcpu, cmcv) = 0x10000;
2068 VCPU(vcpu, lrr0) = 0x10000; /* default reset value? */
2069 VCPU(vcpu, lrr1) = 0x10000; /* default reset value? */
2070 update_vhpi(vcpu, NULL_VECTOR);
2071 VLSAPIC_XTP(vcpu) = 0x80; /* disabled */
2072
2073 for (i = 0; i < 4; i++)
2074 VLSAPIC_INSVC(vcpu, i) = 0;
2075}
2076
2077void kvm_init_all_rr(struct kvm_vcpu *vcpu)
2078{
2079 unsigned long psr;
2080
2081 local_irq_save(psr);
2082
2083 /* WARNING: not allow co-exist of both virtual mode and physical
2084 * mode in same region
2085 */
2086
2087 vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0]));
2088 vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4]));
2089
2090 if (is_physical_mode(vcpu)) {
2091 if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
2092 panic_vm(vcpu, "Machine Status conflicts!\n");
2093
2094 ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
2095 ia64_dv_serialize_data();
2096 ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4);
2097 ia64_dv_serialize_data();
2098 } else {
2099 ia64_set_rr((VRN0 << VRN_SHIFT),
2100 vcpu->arch.metaphysical_saved_rr0);
2101 ia64_dv_serialize_data();
2102 ia64_set_rr((VRN4 << VRN_SHIFT),
2103 vcpu->arch.metaphysical_saved_rr4);
2104 ia64_dv_serialize_data();
2105 }
2106 ia64_set_rr((VRN1 << VRN_SHIFT),
2107 vrrtomrr(VMX(vcpu, vrr[VRN1])));
2108 ia64_dv_serialize_data();
2109 ia64_set_rr((VRN2 << VRN_SHIFT),
2110 vrrtomrr(VMX(vcpu, vrr[VRN2])));
2111 ia64_dv_serialize_data();
2112 ia64_set_rr((VRN3 << VRN_SHIFT),
2113 vrrtomrr(VMX(vcpu, vrr[VRN3])));
2114 ia64_dv_serialize_data();
2115 ia64_set_rr((VRN5 << VRN_SHIFT),
2116 vrrtomrr(VMX(vcpu, vrr[VRN5])));
2117 ia64_dv_serialize_data();
2118 ia64_set_rr((VRN7 << VRN_SHIFT),
2119 vrrtomrr(VMX(vcpu, vrr[VRN7])));
2120 ia64_dv_serialize_data();
2121 ia64_srlz_d();
2122 ia64_set_psr(psr);
2123}
2124
2125int vmm_entry(void)
2126{
2127 struct kvm_vcpu *v;
2128 v = current_vcpu;
2129
2130 ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd,
2131 0, 0, 0, 0, 0, 0);
2132 kvm_init_vtlb(v);
2133 kvm_init_vhpt(v);
2134 init_vcpu(v);
2135 kvm_init_all_rr(v);
2136 vmm_reset_entry();
2137
2138 return 0;
2139}
2140
2141static void kvm_show_registers(struct kvm_pt_regs *regs)
2142{
2143 unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
2144
2145 struct kvm_vcpu *vcpu = current_vcpu;
2146 if (vcpu != NULL)
2147 printk("vcpu 0x%p vcpu %d\n",
2148 vcpu, vcpu->vcpu_id);
2149
2150 printk("psr : %016lx ifs : %016lx ip : [<%016lx>]\n",
2151 regs->cr_ipsr, regs->cr_ifs, ip);
2152
2153 printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
2154 regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
2155 printk("rnat: %016lx bspstore: %016lx pr : %016lx\n",
2156 regs->ar_rnat, regs->ar_bspstore, regs->pr);
2157 printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
2158 regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
2159 printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
2160 printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0,
2161 regs->b6, regs->b7);
2162 printk("f6 : %05lx%016lx f7 : %05lx%016lx\n",
2163 regs->f6.u.bits[1], regs->f6.u.bits[0],
2164 regs->f7.u.bits[1], regs->f7.u.bits[0]);
2165 printk("f8 : %05lx%016lx f9 : %05lx%016lx\n",
2166 regs->f8.u.bits[1], regs->f8.u.bits[0],
2167 regs->f9.u.bits[1], regs->f9.u.bits[0]);
2168 printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
2169 regs->f10.u.bits[1], regs->f10.u.bits[0],
2170 regs->f11.u.bits[1], regs->f11.u.bits[0]);
2171
2172 printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1,
2173 regs->r2, regs->r3);
2174 printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8,
2175 regs->r9, regs->r10);
2176 printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11,
2177 regs->r12, regs->r13);
2178 printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14,
2179 regs->r15, regs->r16);
2180 printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17,
2181 regs->r18, regs->r19);
2182 printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20,
2183 regs->r21, regs->r22);
2184 printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23,
2185 regs->r24, regs->r25);
2186 printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26,
2187 regs->r27, regs->r28);
2188 printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29,
2189 regs->r30, regs->r31);
2190
2191}
2192
2193void panic_vm(struct kvm_vcpu *v, const char *fmt, ...)
2194{
2195 va_list args;
2196 char buf[256];
2197
2198 struct kvm_pt_regs *regs = vcpu_regs(v);
2199 struct exit_ctl_data *p = &v->arch.exit_data;
2200 va_start(args, fmt);
2201 vsnprintf(buf, sizeof(buf), fmt, args);
2202 va_end(args);
2203 printk(buf);
2204 kvm_show_registers(regs);
2205 p->exit_reason = EXIT_REASON_VM_PANIC;
2206 vmm_transition(v);
2207 /*Never to return*/
2208 while (1);
2209}
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
deleted file mode 100644
index 988911b4cc7a..000000000000
--- a/arch/ia64/kvm/vcpu.h
+++ /dev/null
@@ -1,752 +0,0 @@
1/*
2 * vcpu.h: vcpu routines
3 * Copyright (c) 2005, Intel Corporation.
4 * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
5 * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
6 *
7 * Copyright (c) 2007, Intel Corporation.
8 * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
9 * Xiantao Zhang (xiantao.zhang@intel.com)
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms and conditions of the GNU General Public License,
13 * version 2, as published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 * more details.
19 *
20 * You should have received a copy of the GNU General Public License along with
21 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
22 * Place - Suite 330, Boston, MA 02111-1307 USA.
23 *
24 */
25
26
27#ifndef __KVM_VCPU_H__
28#define __KVM_VCPU_H__
29
30#include <asm/types.h>
31#include <asm/fpu.h>
32#include <asm/processor.h>
33
34#ifndef __ASSEMBLY__
35#include "vti.h"
36
37#include <linux/kvm_host.h>
38#include <linux/spinlock.h>
39
40typedef unsigned long IA64_INST;
41
42typedef union U_IA64_BUNDLE {
43 unsigned long i64[2];
44 struct { unsigned long template:5, slot0:41, slot1a:18,
45 slot1b:23, slot2:41; };
46 /* NOTE: following doesn't work because bitfields can't cross natural
47 size boundaries
48 struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */
49} IA64_BUNDLE;
50
51typedef union U_INST64_A5 {
52 IA64_INST inst;
53 struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5,
54 imm9d:9, s:1, major:4; };
55} INST64_A5;
56
57typedef union U_INST64_B4 {
58 IA64_INST inst;
59 struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6,
60 wh:2, d:1, un1:1, major:4; };
61} INST64_B4;
62
63typedef union U_INST64_B8 {
64 IA64_INST inst;
65 struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; };
66} INST64_B8;
67
68typedef union U_INST64_B9 {
69 IA64_INST inst;
70 struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; };
71} INST64_B9;
72
73typedef union U_INST64_I19 {
74 IA64_INST inst;
75 struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; };
76} INST64_I19;
77
78typedef union U_INST64_I26 {
79 IA64_INST inst;
80 struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
81} INST64_I26;
82
83typedef union U_INST64_I27 {
84 IA64_INST inst;
85 struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; };
86} INST64_I27;
87
88typedef union U_INST64_I28 { /* not privileged (mov from AR) */
89 IA64_INST inst;
90 struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
91} INST64_I28;
92
93typedef union U_INST64_M28 {
94 IA64_INST inst;
95 struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; };
96} INST64_M28;
97
98typedef union U_INST64_M29 {
99 IA64_INST inst;
100 struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
101} INST64_M29;
102
103typedef union U_INST64_M30 {
104 IA64_INST inst;
105 struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2,
106 x3:3, s:1, major:4; };
107} INST64_M30;
108
109typedef union U_INST64_M31 {
110 IA64_INST inst;
111 struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
112} INST64_M31;
113
114typedef union U_INST64_M32 {
115 IA64_INST inst;
116 struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; };
117} INST64_M32;
118
119typedef union U_INST64_M33 {
120 IA64_INST inst;
121 struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; };
122} INST64_M33;
123
124typedef union U_INST64_M35 {
125 IA64_INST inst;
126 struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
127
128} INST64_M35;
129
130typedef union U_INST64_M36 {
131 IA64_INST inst;
132 struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; };
133} INST64_M36;
134
135typedef union U_INST64_M37 {
136 IA64_INST inst;
137 struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3,
138 i:1, major:4; };
139} INST64_M37;
140
141typedef union U_INST64_M41 {
142 IA64_INST inst;
143 struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
144} INST64_M41;
145
146typedef union U_INST64_M42 {
147 IA64_INST inst;
148 struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
149} INST64_M42;
150
151typedef union U_INST64_M43 {
152 IA64_INST inst;
153 struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; };
154} INST64_M43;
155
156typedef union U_INST64_M44 {
157 IA64_INST inst;
158 struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; };
159} INST64_M44;
160
161typedef union U_INST64_M45 {
162 IA64_INST inst;
163 struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
164} INST64_M45;
165
166typedef union U_INST64_M46 {
167 IA64_INST inst;
168 struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6,
169 x3:3, un1:1, major:4; };
170} INST64_M46;
171
172typedef union U_INST64_M47 {
173 IA64_INST inst;
174 struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; };
175} INST64_M47;
176
177typedef union U_INST64_M1{
178 IA64_INST inst;
179 struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2,
180 x6:6, m:1, major:4; };
181} INST64_M1;
182
183typedef union U_INST64_M2{
184 IA64_INST inst;
185 struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2,
186 x6:6, m:1, major:4; };
187} INST64_M2;
188
189typedef union U_INST64_M3{
190 IA64_INST inst;
191 struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2,
192 x6:6, s:1, major:4; };
193} INST64_M3;
194
195typedef union U_INST64_M4 {
196 IA64_INST inst;
197 struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2,
198 x6:6, m:1, major:4; };
199} INST64_M4;
200
201typedef union U_INST64_M5 {
202 IA64_INST inst;
203 struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2,
204 x6:6, s:1, major:4; };
205} INST64_M5;
206
207typedef union U_INST64_M6 {
208 IA64_INST inst;
209 struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2,
210 x6:6, m:1, major:4; };
211} INST64_M6;
212
213typedef union U_INST64_M9 {
214 IA64_INST inst;
215 struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2,
216 x6:6, m:1, major:4; };
217} INST64_M9;
218
219typedef union U_INST64_M10 {
220 IA64_INST inst;
221 struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2,
222 x6:6, s:1, major:4; };
223} INST64_M10;
224
225typedef union U_INST64_M12 {
226 IA64_INST inst;
227 struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2,
228 x6:6, m:1, major:4; };
229} INST64_M12;
230
231typedef union U_INST64_M15 {
232 IA64_INST inst;
233 struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2,
234 x6:6, s:1, major:4; };
235} INST64_M15;
236
237typedef union U_INST64 {
238 IA64_INST inst;
239 struct { unsigned long :37, major:4; } generic;
240 INST64_A5 A5; /* used in build_hypercall_bundle only */
241 INST64_B4 B4; /* used in build_hypercall_bundle only */
242 INST64_B8 B8; /* rfi, bsw.[01] */
243 INST64_B9 B9; /* break.b */
244 INST64_I19 I19; /* used in build_hypercall_bundle only */
245 INST64_I26 I26; /* mov register to ar (I unit) */
246 INST64_I27 I27; /* mov immediate to ar (I unit) */
247 INST64_I28 I28; /* mov from ar (I unit) */
248 INST64_M1 M1; /* ld integer */
249 INST64_M2 M2;
250 INST64_M3 M3;
251 INST64_M4 M4; /* st integer */
252 INST64_M5 M5;
253 INST64_M6 M6; /* ldfd floating pointer */
254 INST64_M9 M9; /* stfd floating pointer */
255 INST64_M10 M10; /* stfd floating pointer */
256 INST64_M12 M12; /* ldfd pair floating pointer */
257 INST64_M15 M15; /* lfetch + imm update */
258 INST64_M28 M28; /* purge translation cache entry */
259 INST64_M29 M29; /* mov register to ar (M unit) */
260 INST64_M30 M30; /* mov immediate to ar (M unit) */
261 INST64_M31 M31; /* mov from ar (M unit) */
262 INST64_M32 M32; /* mov reg to cr */
263 INST64_M33 M33; /* mov from cr */
264 INST64_M35 M35; /* mov to psr */
265 INST64_M36 M36; /* mov from psr */
266 INST64_M37 M37; /* break.m */
267 INST64_M41 M41; /* translation cache insert */
268 INST64_M42 M42; /* mov to indirect reg/translation reg insert*/
269 INST64_M43 M43; /* mov from indirect reg */
270 INST64_M44 M44; /* set/reset system mask */
271 INST64_M45 M45; /* translation purge */
272 INST64_M46 M46; /* translation access (tpa,tak) */
273 INST64_M47 M47; /* purge translation entry */
274} INST64;
275
276#define MASK_41 ((unsigned long)0x1ffffffffff)
277
278/* Virtual address memory attributes encoding */
279#define VA_MATTR_WB 0x0
280#define VA_MATTR_UC 0x4
281#define VA_MATTR_UCE 0x5
282#define VA_MATTR_WC 0x6
283#define VA_MATTR_NATPAGE 0x7
284
285#define PMASK(size) (~((size) - 1))
286#define PSIZE(size) (1UL<<(size))
287#define CLEARLSB(ppn, nbits) (((ppn) >> (nbits)) << (nbits))
288#define PAGEALIGN(va, ps) CLEARLSB(va, ps)
289#define PAGE_FLAGS_RV_MASK (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53))
290#define _PAGE_MA_ST (0x1 << 2) /* is reserved for software use */
291
292#define ARCH_PAGE_SHIFT 12
293
294#define INVALID_TI_TAG (1UL << 63)
295
296#define VTLB_PTE_P_BIT 0
297#define VTLB_PTE_IO_BIT 60
298#define VTLB_PTE_IO (1UL<<VTLB_PTE_IO_BIT)
299#define VTLB_PTE_P (1UL<<VTLB_PTE_P_BIT)
300
301#define vcpu_quick_region_check(_tr_regions,_ifa) \
302 (_tr_regions & (1 << ((unsigned long)_ifa >> 61)))
303
304#define vcpu_quick_region_set(_tr_regions,_ifa) \
305 do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0)
306
307static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir,
308 u64 va, u64 rid)
309{
310 trp->page_flags = pte;
311 trp->itir = itir;
312 trp->vadr = va;
313 trp->rid = rid;
314}
315
316extern u64 kvm_get_mpt_entry(u64 gpfn);
317
318/* Return I/ */
319static inline u64 __gpfn_is_io(u64 gpfn)
320{
321 u64 pte;
322 pte = kvm_get_mpt_entry(gpfn);
323 if (!(pte & GPFN_INV_MASK)) {
324 pte = pte & GPFN_IO_MASK;
325 if (pte != GPFN_PHYS_MMIO)
326 return pte;
327 }
328 return 0;
329}
330#endif
331#define IA64_NO_FAULT 0
332#define IA64_FAULT 1
333
334#define VMM_RBS_OFFSET ((VMM_TASK_SIZE + 15) & ~15)
335
336#define SW_BAD 0 /* Bad mode transitition */
337#define SW_V2P 1 /* Physical emulatino is activated */
338#define SW_P2V 2 /* Exit physical mode emulation */
339#define SW_SELF 3 /* No mode transition */
340#define SW_NOP 4 /* Mode transition, but without action required */
341
342#define GUEST_IN_PHY 0x1
343#define GUEST_PHY_EMUL 0x2
344
345#define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP))
346
347#define VRN_SHIFT 61
348#define VRN_MASK 0xe000000000000000
349#define VRN0 0x0UL
350#define VRN1 0x1UL
351#define VRN2 0x2UL
352#define VRN3 0x3UL
353#define VRN4 0x4UL
354#define VRN5 0x5UL
355#define VRN6 0x6UL
356#define VRN7 0x7UL
357
358#define IRQ_NO_MASKED 0
359#define IRQ_MASKED_BY_VTPR 1
360#define IRQ_MASKED_BY_INSVC 2 /* masked by inservice IRQ */
361
362#define PTA_BASE_SHIFT 15
363
364#define IA64_PSR_VM_BIT 46
365#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT)
366
367/* Interruption Function State */
368#define IA64_IFS_V_BIT 63
369#define IA64_IFS_V (__IA64_UL(1) << IA64_IFS_V_BIT)
370
371#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX)
372#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX)
373
374#ifndef __ASSEMBLY__
375
376#include <asm/gcc_intrin.h>
377
378#define is_physical_mode(v) \
379 ((v->arch.mode_flags) & GUEST_IN_PHY)
380
381#define is_virtual_mode(v) \
382 (!is_physical_mode(v))
383
384#define MODE_IND(psr) \
385 (((psr).it << 2) + ((psr).dt << 1) + (psr).rt)
386
387#ifndef CONFIG_SMP
388#define _vmm_raw_spin_lock(x) do {}while(0)
389#define _vmm_raw_spin_unlock(x) do {}while(0)
390#else
391typedef struct {
392 volatile unsigned int lock;
393} vmm_spinlock_t;
394#define _vmm_raw_spin_lock(x) \
395 do { \
396 __u32 *ia64_spinlock_ptr = (__u32 *) (x); \
397 __u64 ia64_spinlock_val; \
398 ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
399 if (unlikely(ia64_spinlock_val)) { \
400 do { \
401 while (*ia64_spinlock_ptr) \
402 ia64_barrier(); \
403 ia64_spinlock_val = \
404 ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
405 } while (ia64_spinlock_val); \
406 } \
407 } while (0)
408
409#define _vmm_raw_spin_unlock(x) \
410 do { barrier(); \
411 ((vmm_spinlock_t *)x)->lock = 0; } \
412while (0)
413#endif
414
415void vmm_spin_lock(vmm_spinlock_t *lock);
416void vmm_spin_unlock(vmm_spinlock_t *lock);
417enum {
418 I_TLB = 1,
419 D_TLB = 2
420};
421
422union kvm_va {
423 struct {
424 unsigned long off : 60; /* intra-region offset */
425 unsigned long reg : 4; /* region number */
426 } f;
427 unsigned long l;
428 void *p;
429};
430
431#define __kvm_pa(x) ({union kvm_va _v; _v.l = (long) (x); \
432 _v.f.reg = 0; _v.l; })
433#define __kvm_va(x) ({union kvm_va _v; _v.l = (long) (x); \
434 _v.f.reg = -1; _v.p; })
435
436#define _REGION_ID(x) ({union ia64_rr _v; _v.val = (long)(x); \
437 _v.rid; })
438#define _REGION_PAGE_SIZE(x) ({union ia64_rr _v; _v.val = (long)(x); \
439 _v.ps; })
440#define _REGION_HW_WALKER(x) ({union ia64_rr _v; _v.val = (long)(x); \
441 _v.ve; })
442
443enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF };
444enum tlb_miss_type { INSTRUCTION, DATA, REGISTER };
445
446#define VCPU(_v, _x) ((_v)->arch.vpd->_x)
447#define VMX(_v, _x) ((_v)->arch._x)
448
449#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i])
450#define VLSAPIC_XTP(_v) VMX(_v, xtp)
451
452static inline unsigned long itir_ps(unsigned long itir)
453{
454 return ((itir >> 2) & 0x3f);
455}
456
457
458/**************************************************************************
459 VCPU control register access routines
460 **************************************************************************/
461
462static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu)
463{
464 return ((u64)VCPU(vcpu, itir));
465}
466
467static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val)
468{
469 VCPU(vcpu, itir) = val;
470}
471
472static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu)
473{
474 return ((u64)VCPU(vcpu, ifa));
475}
476
477static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val)
478{
479 VCPU(vcpu, ifa) = val;
480}
481
482static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu)
483{
484 return ((u64)VCPU(vcpu, iva));
485}
486
487static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu)
488{
489 return ((u64)VCPU(vcpu, pta));
490}
491
492static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu)
493{
494 return ((u64)VCPU(vcpu, lid));
495}
496
497static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu)
498{
499 return ((u64)VCPU(vcpu, tpr));
500}
501
502static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu)
503{
504 return (0UL); /*reads of eoi always return 0 */
505}
506
507static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu)
508{
509 return ((u64)VCPU(vcpu, irr[0]));
510}
511
512static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu)
513{
514 return ((u64)VCPU(vcpu, irr[1]));
515}
516
517static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu)
518{
519 return ((u64)VCPU(vcpu, irr[2]));
520}
521
522static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu)
523{
524 return ((u64)VCPU(vcpu, irr[3]));
525}
526
527static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val)
528{
529 ia64_setreg(_IA64_REG_CR_DCR, val);
530}
531
532static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val)
533{
534 VCPU(vcpu, isr) = val;
535}
536
537static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val)
538{
539 VCPU(vcpu, lid) = val;
540}
541
542static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val)
543{
544 VCPU(vcpu, ipsr) = val;
545}
546
547static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val)
548{
549 VCPU(vcpu, iip) = val;
550}
551
552static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val)
553{
554 VCPU(vcpu, ifs) = val;
555}
556
557static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val)
558{
559 VCPU(vcpu, iipa) = val;
560}
561
562static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val)
563{
564 VCPU(vcpu, iha) = val;
565}
566
567
568static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg)
569{
570 return vcpu->arch.vrr[reg>>61];
571}
572
573/**************************************************************************
574 VCPU debug breakpoint register access routines
575 **************************************************************************/
576
577static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
578{
579 __ia64_set_dbr(reg, val);
580}
581
582static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
583{
584 ia64_set_ibr(reg, val);
585}
586
587static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg)
588{
589 return ((u64)__ia64_get_dbr(reg));
590}
591
592static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg)
593{
594 return ((u64)ia64_get_ibr(reg));
595}
596
597/**************************************************************************
598 VCPU performance monitor register access routines
599 **************************************************************************/
600static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val)
601{
602 /* NOTE: Writes to unimplemented PMC registers are discarded */
603 ia64_set_pmc(reg, val);
604}
605
606static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val)
607{
608 /* NOTE: Writes to unimplemented PMD registers are discarded */
609 ia64_set_pmd(reg, val);
610}
611
612static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg)
613{
614 /* NOTE: Reads from unimplemented PMC registers return zero */
615 return ((u64)ia64_get_pmc(reg));
616}
617
618static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg)
619{
620 /* NOTE: Reads from unimplemented PMD registers return zero */
621 return ((u64)ia64_get_pmd(reg));
622}
623
624static inline unsigned long vrrtomrr(unsigned long val)
625{
626 union ia64_rr rr;
627 rr.val = val;
628 rr.rid = (rr.rid << 4) | 0xe;
629 if (rr.ps > PAGE_SHIFT)
630 rr.ps = PAGE_SHIFT;
631 rr.ve = 1;
632 return rr.val;
633}
634
635
636static inline int highest_bits(int *dat)
637{
638 u32 bits, bitnum;
639 int i;
640
641 /* loop for all 256 bits */
642 for (i = 7; i >= 0 ; i--) {
643 bits = dat[i];
644 if (bits) {
645 bitnum = fls(bits);
646 return i * 32 + bitnum - 1;
647 }
648 }
649 return NULL_VECTOR;
650}
651
652/*
653 * The pending irq is higher than the inservice one.
654 *
655 */
656static inline int is_higher_irq(int pending, int inservice)
657{
658 return ((pending > inservice)
659 || ((pending != NULL_VECTOR)
660 && (inservice == NULL_VECTOR)));
661}
662
663static inline int is_higher_class(int pending, int mic)
664{
665 return ((pending >> 4) > mic);
666}
667
668/*
669 * Return 0-255 for pending irq.
670 * NULL_VECTOR: when no pending.
671 */
672static inline int highest_pending_irq(struct kvm_vcpu *vcpu)
673{
674 if (VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR))
675 return NMI_VECTOR;
676 if (VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR))
677 return ExtINT_VECTOR;
678
679 return highest_bits((int *)&VCPU(vcpu, irr[0]));
680}
681
682static inline int highest_inservice_irq(struct kvm_vcpu *vcpu)
683{
684 if (VMX(vcpu, insvc[0]) & (1UL<<NMI_VECTOR))
685 return NMI_VECTOR;
686 if (VMX(vcpu, insvc[0]) & (1UL<<ExtINT_VECTOR))
687 return ExtINT_VECTOR;
688
689 return highest_bits((int *)&(VMX(vcpu, insvc[0])));
690}
691
692extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
693 struct ia64_fpreg *val);
694extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
695 struct ia64_fpreg *val);
696extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg);
697extern void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg,
698 u64 val, int nat);
699extern unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu);
700extern void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val);
701extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr);
702extern void vcpu_bsw0(struct kvm_vcpu *vcpu);
703extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte,
704 u64 itir, u64 va, int type);
705extern struct thash_data *vhpt_lookup(u64 va);
706extern u64 guest_vhpt_lookup(u64 iha, u64 *pte);
707extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps);
708extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps);
709extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va);
710extern void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
711 u64 itir, u64 ifa, int type);
712extern void thash_purge_all(struct kvm_vcpu *v);
713extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v,
714 u64 va, int is_data);
715extern int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va,
716 u64 ps, int is_data);
717
718extern void vcpu_increment_iip(struct kvm_vcpu *v);
719extern void vcpu_decrement_iip(struct kvm_vcpu *vcpu);
720extern void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
721extern void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
722extern void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr);
723extern void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr);
724extern void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr);
725extern void nested_dtlb(struct kvm_vcpu *vcpu);
726extern void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr);
727extern int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref);
728
729extern void update_vhpi(struct kvm_vcpu *vcpu, int vec);
730extern int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice);
731
732extern int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle);
733extern void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma);
734extern void vmm_transition(struct kvm_vcpu *vcpu);
735extern void vmm_trampoline(union context *from, union context *to);
736extern int vmm_entry(void);
737extern u64 vcpu_get_itc(struct kvm_vcpu *vcpu);
738
739extern void vmm_reset_entry(void);
740void kvm_init_vtlb(struct kvm_vcpu *v);
741void kvm_init_vhpt(struct kvm_vcpu *v);
742void thash_init(struct thash_cb *hcb, u64 sz);
743
744void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
745u64 kvm_gpa_to_mpa(u64 gpa);
746extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
747 u64 arg4, u64 arg5, u64 arg6, u64 arg7);
748
749extern long vmm_sanity;
750
751#endif
752#endif /* __VCPU_H__ */
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
deleted file mode 100644
index 176a12cd56de..000000000000
--- a/arch/ia64/kvm/vmm.c
+++ /dev/null
@@ -1,99 +0,0 @@
1/*
2 * vmm.c: vmm module interface with kvm module
3 *
4 * Copyright (c) 2007, Intel Corporation.
5 *
6 * Xiantao Zhang (xiantao.zhang@intel.com)
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 * Place - Suite 330, Boston, MA 02111-1307 USA.
20 */
21
22
23#include <linux/kernel.h>
24#include <linux/module.h>
25#include <asm/fpswa.h>
26
27#include "vcpu.h"
28
29MODULE_AUTHOR("Intel");
30MODULE_LICENSE("GPL");
31
32extern char kvm_ia64_ivt;
33extern char kvm_asm_mov_from_ar;
34extern char kvm_asm_mov_from_ar_sn2;
35extern fpswa_interface_t *vmm_fpswa_interface;
36
37long vmm_sanity = 1;
38
39struct kvm_vmm_info vmm_info = {
40 .module = THIS_MODULE,
41 .vmm_entry = vmm_entry,
42 .tramp_entry = vmm_trampoline,
43 .vmm_ivt = (unsigned long)&kvm_ia64_ivt,
44 .patch_mov_ar = (unsigned long)&kvm_asm_mov_from_ar,
45 .patch_mov_ar_sn2 = (unsigned long)&kvm_asm_mov_from_ar_sn2,
46};
47
48static int __init kvm_vmm_init(void)
49{
50
51 vmm_fpswa_interface = fpswa_interface;
52
53 /*Register vmm data to kvm side*/
54 return kvm_init(&vmm_info, 1024, 0, THIS_MODULE);
55}
56
57static void __exit kvm_vmm_exit(void)
58{
59 kvm_exit();
60 return ;
61}
62
63void vmm_spin_lock(vmm_spinlock_t *lock)
64{
65 _vmm_raw_spin_lock(lock);
66}
67
68void vmm_spin_unlock(vmm_spinlock_t *lock)
69{
70 _vmm_raw_spin_unlock(lock);
71}
72
73static void vcpu_debug_exit(struct kvm_vcpu *vcpu)
74{
75 struct exit_ctl_data *p = &vcpu->arch.exit_data;
76 long psr;
77
78 local_irq_save(psr);
79 p->exit_reason = EXIT_REASON_DEBUG;
80 vmm_transition(vcpu);
81 local_irq_restore(psr);
82}
83
84asmlinkage int printk(const char *fmt, ...)
85{
86 struct kvm_vcpu *vcpu = current_vcpu;
87 va_list args;
88 int r;
89
90 memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN);
91 va_start(args, fmt);
92 r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args);
93 va_end(args);
94 vcpu_debug_exit(vcpu);
95 return r;
96}
97
98module_init(kvm_vmm_init)
99module_exit(kvm_vmm_exit)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
deleted file mode 100644
index 397e34a63e18..000000000000
--- a/arch/ia64/kvm/vmm_ivt.S
+++ /dev/null
@@ -1,1392 +0,0 @@
1/*
2 * arch/ia64/kvm/vmm_ivt.S
3 *
4 * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
5 * Stephane Eranian <eranian@hpl.hp.com>
6 * David Mosberger <davidm@hpl.hp.com>
7 * Copyright (C) 2000, 2002-2003 Intel Co
8 * Asit Mallick <asit.k.mallick@intel.com>
9 * Suresh Siddha <suresh.b.siddha@intel.com>
10 * Kenneth Chen <kenneth.w.chen@intel.com>
11 * Fenghua Yu <fenghua.yu@intel.com>
12 *
13 *
14 * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling
15 * for SMP
16 * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB
17 * handler now uses virtual PT.
18 *
19 * 07/6/20 Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
20 * Supporting Intel virtualization architecture
21 *
22 */
23
24/*
25 * This file defines the interruption vector table used by the CPU.
26 * It does not include one entry per possible cause of interruption.
27 *
28 * The first 20 entries of the table contain 64 bundles each while the
29 * remaining 48 entries contain only 16 bundles each.
30 *
31 * The 64 bundles are used to allow inlining the whole handler for
32 * critical
33 * interruptions like TLB misses.
34 *
35 * For each entry, the comment is as follows:
36 *
37 * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss
38 * (12,51)
39 * entry offset ----/ / / /
40 * /
41 * entry number ---------/ / /
42 * /
43 * size of the entry -------------/ /
44 * /
45 * vector name -------------------------------------/
46 * /
47 * interruptions triggering this vector
48 * ----------------------/
49 *
50 * The table is 32KB in size and must be aligned on 32KB
51 * boundary.
52 * (The CPU ignores the 15 lower bits of the address)
53 *
54 * Table is based upon EAS2.6 (Oct 1999)
55 */
56
57
58#include <asm/asmmacro.h>
59#include <asm/cache.h>
60#include <asm/pgtable.h>
61
62#include "asm-offsets.h"
63#include "vcpu.h"
64#include "kvm_minstate.h"
65#include "vti.h"
66
67#if 0
68# define PSR_DEFAULT_BITS psr.ac
69#else
70# define PSR_DEFAULT_BITS 0
71#endif
72
73#define KVM_FAULT(n) \
74 kvm_fault_##n:; \
75 mov r19=n;; \
76 br.sptk.many kvm_vmm_panic; \
77 ;; \
78
79#define KVM_REFLECT(n) \
80 mov r31=pr; \
81 mov r19=n; /* prepare to save predicates */ \
82 mov r29=cr.ipsr; \
83 ;; \
84 tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \
85(p7) br.sptk.many kvm_dispatch_reflection; \
86 br.sptk.many kvm_vmm_panic; \
87
88GLOBAL_ENTRY(kvm_vmm_panic)
89 KVM_SAVE_MIN_WITH_COVER_R19
90 alloc r14=ar.pfs,0,0,1,0
91 mov out0=r15
92 adds r3=8,r2 // set up second base pointer
93 ;;
94 ssm psr.ic
95 ;;
96 srlz.i // guarantee that interruption collection is on
97 ;;
98 (p15) ssm psr.i // restore psr.
99 addl r14=@gprel(ia64_leave_hypervisor),gp
100 ;;
101 KVM_SAVE_REST
102 mov rp=r14
103 ;;
104 br.call.sptk.many b6=vmm_panic_handler;
105END(kvm_vmm_panic)
106
107 .section .text..ivt,"ax"
108
109 .align 32768 // align on 32KB boundary
110 .global kvm_ia64_ivt
111kvm_ia64_ivt:
112///////////////////////////////////////////////////////////////
113// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
114ENTRY(kvm_vhpt_miss)
115 KVM_FAULT(0)
116END(kvm_vhpt_miss)
117
118 .org kvm_ia64_ivt+0x400
119////////////////////////////////////////////////////////////////
120// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
121ENTRY(kvm_itlb_miss)
122 mov r31 = pr
123 mov r29=cr.ipsr;
124 ;;
125 tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
126(p6) br.sptk kvm_alt_itlb_miss
127 mov r19 = 1
128 br.sptk kvm_itlb_miss_dispatch
129 KVM_FAULT(1);
130END(kvm_itlb_miss)
131
132 .org kvm_ia64_ivt+0x0800
133//////////////////////////////////////////////////////////////////
134// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
135ENTRY(kvm_dtlb_miss)
136 mov r31 = pr
137 mov r29=cr.ipsr;
138 ;;
139 tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
140(p6) br.sptk kvm_alt_dtlb_miss
141 br.sptk kvm_dtlb_miss_dispatch
142END(kvm_dtlb_miss)
143
144 .org kvm_ia64_ivt+0x0c00
145////////////////////////////////////////////////////////////////////
146// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
147ENTRY(kvm_alt_itlb_miss)
148 mov r16=cr.ifa // get address that caused the TLB miss
149 ;;
150 movl r17=PAGE_KERNEL
151 mov r24=cr.ipsr
152 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
153 ;;
154 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
155 ;;
156 or r19=r17,r19 // insert PTE control bits into r19
157 ;;
158 movl r20=IA64_GRANULE_SHIFT<<2
159 ;;
160 mov cr.itir=r20
161 ;;
162 itc.i r19 // insert the TLB entry
163 mov pr=r31,-1
164 rfi
165END(kvm_alt_itlb_miss)
166
167 .org kvm_ia64_ivt+0x1000
168/////////////////////////////////////////////////////////////////////
169// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
170ENTRY(kvm_alt_dtlb_miss)
171 mov r16=cr.ifa // get address that caused the TLB miss
172 ;;
173 movl r17=PAGE_KERNEL
174 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
175 mov r24=cr.ipsr
176 ;;
177 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
178 ;;
179 or r19=r19,r17 // insert PTE control bits into r19
180 ;;
181 movl r20=IA64_GRANULE_SHIFT<<2
182 ;;
183 mov cr.itir=r20
184 ;;
185 itc.d r19 // insert the TLB entry
186 mov pr=r31,-1
187 rfi
188END(kvm_alt_dtlb_miss)
189
190 .org kvm_ia64_ivt+0x1400
191//////////////////////////////////////////////////////////////////////
192// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
193ENTRY(kvm_nested_dtlb_miss)
194 KVM_FAULT(5)
195END(kvm_nested_dtlb_miss)
196
197 .org kvm_ia64_ivt+0x1800
198/////////////////////////////////////////////////////////////////////
199// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
200ENTRY(kvm_ikey_miss)
201 KVM_REFLECT(6)
202END(kvm_ikey_miss)
203
204 .org kvm_ia64_ivt+0x1c00
205/////////////////////////////////////////////////////////////////////
206// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
207ENTRY(kvm_dkey_miss)
208 KVM_REFLECT(7)
209END(kvm_dkey_miss)
210
211 .org kvm_ia64_ivt+0x2000
212////////////////////////////////////////////////////////////////////
213// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
214ENTRY(kvm_dirty_bit)
215 KVM_REFLECT(8)
216END(kvm_dirty_bit)
217
218 .org kvm_ia64_ivt+0x2400
219////////////////////////////////////////////////////////////////////
220// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
221ENTRY(kvm_iaccess_bit)
222 KVM_REFLECT(9)
223END(kvm_iaccess_bit)
224
225 .org kvm_ia64_ivt+0x2800
226///////////////////////////////////////////////////////////////////
227// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
228ENTRY(kvm_daccess_bit)
229 KVM_REFLECT(10)
230END(kvm_daccess_bit)
231
232 .org kvm_ia64_ivt+0x2c00
233/////////////////////////////////////////////////////////////////
234// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
235ENTRY(kvm_break_fault)
236 mov r31=pr
237 mov r19=11
238 mov r29=cr.ipsr
239 ;;
240 KVM_SAVE_MIN_WITH_COVER_R19
241 ;;
242 alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!)
243 mov out0=cr.ifa
244 mov out2=cr.isr // FIXME: pity to make this slow access twice
245 mov out3=cr.iim // FIXME: pity to make this slow access twice
246 adds r3=8,r2 // set up second base pointer
247 ;;
248 ssm psr.ic
249 ;;
250 srlz.i // guarantee that interruption collection is on
251 ;;
252 (p15)ssm psr.i // restore psr.i
253 addl r14=@gprel(ia64_leave_hypervisor),gp
254 ;;
255 KVM_SAVE_REST
256 mov rp=r14
257 ;;
258 adds out1=16,sp
259 br.call.sptk.many b6=kvm_ia64_handle_break
260 ;;
261END(kvm_break_fault)
262
263 .org kvm_ia64_ivt+0x3000
264/////////////////////////////////////////////////////////////////
265// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
266ENTRY(kvm_interrupt)
267 mov r31=pr // prepare to save predicates
268 mov r19=12
269 mov r29=cr.ipsr
270 ;;
271 tbit.z p6,p7=r29,IA64_PSR_VM_BIT
272 tbit.z p0,p15=r29,IA64_PSR_I_BIT
273 ;;
274(p7) br.sptk kvm_dispatch_interrupt
275 ;;
276 mov r27=ar.rsc /* M */
277 mov r20=r1 /* A */
278 mov r25=ar.unat /* M */
279 mov r26=ar.pfs /* I */
280 mov r28=cr.iip /* M */
281 cover /* B (or nothing) */
282 ;;
283 mov r1=sp
284 ;;
285 invala /* M */
286 mov r30=cr.ifs
287 ;;
288 addl r1=-VMM_PT_REGS_SIZE,r1
289 ;;
290 adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */
291 adds r16=PT(CR_IPSR),r1
292 ;;
293 lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
294 st8 [r16]=r29 /* save cr.ipsr */
295 ;;
296 lfetch.fault.excl.nt1 [r17]
297 mov r29=b0
298 ;;
299 adds r16=PT(R8),r1 /* initialize first base pointer */
300 adds r17=PT(R9),r1 /* initialize second base pointer */
301 mov r18=r0 /* make sure r18 isn't NaT */
302 ;;
303.mem.offset 0,0; st8.spill [r16]=r8,16
304.mem.offset 8,0; st8.spill [r17]=r9,16
305 ;;
306.mem.offset 0,0; st8.spill [r16]=r10,24
307.mem.offset 8,0; st8.spill [r17]=r11,24
308 ;;
309 st8 [r16]=r28,16 /* save cr.iip */
310 st8 [r17]=r30,16 /* save cr.ifs */
311 mov r8=ar.fpsr /* M */
312 mov r9=ar.csd
313 mov r10=ar.ssd
314 movl r11=FPSR_DEFAULT /* L-unit */
315 ;;
316 st8 [r16]=r25,16 /* save ar.unat */
317 st8 [r17]=r26,16 /* save ar.pfs */
318 shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */
319 ;;
320 st8 [r16]=r27,16 /* save ar.rsc */
321 adds r17=16,r17 /* skip over ar_rnat field */
322 ;;
323 st8 [r17]=r31,16 /* save predicates */
324 adds r16=16,r16 /* skip over ar_bspstore field */
325 ;;
326 st8 [r16]=r29,16 /* save b0 */
327 st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */
328 ;;
329.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */
330.mem.offset 8,0; st8.spill [r17]=r12,16
331 adds r12=-16,r1
332 /* switch to kernel memory stack (with 16 bytes of scratch) */
333 ;;
334.mem.offset 0,0; st8.spill [r16]=r13,16
335.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
336 ;;
337.mem.offset 0,0; st8.spill [r16]=r15,16
338.mem.offset 8,0; st8.spill [r17]=r14,16
339 dep r14=-1,r0,60,4
340 ;;
341.mem.offset 0,0; st8.spill [r16]=r2,16
342.mem.offset 8,0; st8.spill [r17]=r3,16
343 adds r2=VMM_PT_REGS_R16_OFFSET,r1
344 adds r14 = VMM_VCPU_GP_OFFSET,r13
345 ;;
346 mov r8=ar.ccv
347 ld8 r14 = [r14]
348 ;;
349 mov r1=r14 /* establish kernel global pointer */
350 ;; \
351 bsw.1
352 ;;
353 alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
354 mov out0=r13
355 ;;
356 ssm psr.ic
357 ;;
358 srlz.i
359 ;;
360 //(p15) ssm psr.i
361 adds r3=8,r2 // set up second base pointer for SAVE_REST
362 srlz.i // ensure everybody knows psr.ic is back on
363 ;;
364.mem.offset 0,0; st8.spill [r2]=r16,16
365.mem.offset 8,0; st8.spill [r3]=r17,16
366 ;;
367.mem.offset 0,0; st8.spill [r2]=r18,16
368.mem.offset 8,0; st8.spill [r3]=r19,16
369 ;;
370.mem.offset 0,0; st8.spill [r2]=r20,16
371.mem.offset 8,0; st8.spill [r3]=r21,16
372 mov r18=b6
373 ;;
374.mem.offset 0,0; st8.spill [r2]=r22,16
375.mem.offset 8,0; st8.spill [r3]=r23,16
376 mov r19=b7
377 ;;
378.mem.offset 0,0; st8.spill [r2]=r24,16
379.mem.offset 8,0; st8.spill [r3]=r25,16
380 ;;
381.mem.offset 0,0; st8.spill [r2]=r26,16
382.mem.offset 8,0; st8.spill [r3]=r27,16
383 ;;
384.mem.offset 0,0; st8.spill [r2]=r28,16
385.mem.offset 8,0; st8.spill [r3]=r29,16
386 ;;
387.mem.offset 0,0; st8.spill [r2]=r30,16
388.mem.offset 8,0; st8.spill [r3]=r31,32
389 ;;
390 mov ar.fpsr=r11 /* M-unit */
391 st8 [r2]=r8,8 /* ar.ccv */
392 adds r24=PT(B6)-PT(F7),r3
393 ;;
394 stf.spill [r2]=f6,32
395 stf.spill [r3]=f7,32
396 ;;
397 stf.spill [r2]=f8,32
398 stf.spill [r3]=f9,32
399 ;;
400 stf.spill [r2]=f10
401 stf.spill [r3]=f11
402 adds r25=PT(B7)-PT(F11),r3
403 ;;
404 st8 [r24]=r18,16 /* b6 */
405 st8 [r25]=r19,16 /* b7 */
406 ;;
407 st8 [r24]=r9 /* ar.csd */
408 st8 [r25]=r10 /* ar.ssd */
409 ;;
410 srlz.d // make sure we see the effect of cr.ivr
411 addl r14=@gprel(ia64_leave_nested),gp
412 ;;
413 mov rp=r14
414 br.call.sptk.many b6=kvm_ia64_handle_irq
415 ;;
416END(kvm_interrupt)
417
418 .global kvm_dispatch_vexirq
419 .org kvm_ia64_ivt+0x3400
420//////////////////////////////////////////////////////////////////////
421// 0x3400 Entry 13 (size 64 bundles) Reserved
422ENTRY(kvm_virtual_exirq)
423 mov r31=pr
424 mov r19=13
425 mov r30 =r0
426 ;;
427kvm_dispatch_vexirq:
428 cmp.eq p6,p0 = 1,r30
429 ;;
430(p6) add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
431 ;;
432(p6) ld8 r1 = [r29]
433 ;;
434 KVM_SAVE_MIN_WITH_COVER_R19
435 alloc r14=ar.pfs,0,0,1,0
436 mov out0=r13
437
438 ssm psr.ic
439 ;;
440 srlz.i // guarantee that interruption collection is on
441 ;;
442 (p15) ssm psr.i // restore psr.i
443 adds r3=8,r2 // set up second base pointer
444 ;;
445 KVM_SAVE_REST
446 addl r14=@gprel(ia64_leave_hypervisor),gp
447 ;;
448 mov rp=r14
449 br.call.sptk.many b6=kvm_vexirq
450END(kvm_virtual_exirq)
451
452 .org kvm_ia64_ivt+0x3800
453/////////////////////////////////////////////////////////////////////
454// 0x3800 Entry 14 (size 64 bundles) Reserved
455 KVM_FAULT(14)
456 // this code segment is from 2.6.16.13
457
458 .org kvm_ia64_ivt+0x3c00
459///////////////////////////////////////////////////////////////////////
460// 0x3c00 Entry 15 (size 64 bundles) Reserved
461 KVM_FAULT(15)
462
463 .org kvm_ia64_ivt+0x4000
464///////////////////////////////////////////////////////////////////////
465// 0x4000 Entry 16 (size 64 bundles) Reserved
466 KVM_FAULT(16)
467
468 .org kvm_ia64_ivt+0x4400
469//////////////////////////////////////////////////////////////////////
470// 0x4400 Entry 17 (size 64 bundles) Reserved
471 KVM_FAULT(17)
472
473 .org kvm_ia64_ivt+0x4800
474//////////////////////////////////////////////////////////////////////
475// 0x4800 Entry 18 (size 64 bundles) Reserved
476 KVM_FAULT(18)
477
478 .org kvm_ia64_ivt+0x4c00
479//////////////////////////////////////////////////////////////////////
480// 0x4c00 Entry 19 (size 64 bundles) Reserved
481 KVM_FAULT(19)
482
483 .org kvm_ia64_ivt+0x5000
484//////////////////////////////////////////////////////////////////////
485// 0x5000 Entry 20 (size 16 bundles) Page Not Present
486ENTRY(kvm_page_not_present)
487 KVM_REFLECT(20)
488END(kvm_page_not_present)
489
490 .org kvm_ia64_ivt+0x5100
491///////////////////////////////////////////////////////////////////////
492// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
493ENTRY(kvm_key_permission)
494 KVM_REFLECT(21)
495END(kvm_key_permission)
496
497 .org kvm_ia64_ivt+0x5200
498//////////////////////////////////////////////////////////////////////
499// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
500ENTRY(kvm_iaccess_rights)
501 KVM_REFLECT(22)
502END(kvm_iaccess_rights)
503
504 .org kvm_ia64_ivt+0x5300
505//////////////////////////////////////////////////////////////////////
506// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
507ENTRY(kvm_daccess_rights)
508 KVM_REFLECT(23)
509END(kvm_daccess_rights)
510
511 .org kvm_ia64_ivt+0x5400
512/////////////////////////////////////////////////////////////////////
513// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
514ENTRY(kvm_general_exception)
515 KVM_REFLECT(24)
516 KVM_FAULT(24)
517END(kvm_general_exception)
518
519 .org kvm_ia64_ivt+0x5500
520//////////////////////////////////////////////////////////////////////
521// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
522ENTRY(kvm_disabled_fp_reg)
523 KVM_REFLECT(25)
524END(kvm_disabled_fp_reg)
525
526 .org kvm_ia64_ivt+0x5600
527////////////////////////////////////////////////////////////////////
528// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
529ENTRY(kvm_nat_consumption)
530 KVM_REFLECT(26)
531END(kvm_nat_consumption)
532
533 .org kvm_ia64_ivt+0x5700
534/////////////////////////////////////////////////////////////////////
535// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
536ENTRY(kvm_speculation_vector)
537 KVM_REFLECT(27)
538END(kvm_speculation_vector)
539
540 .org kvm_ia64_ivt+0x5800
541/////////////////////////////////////////////////////////////////////
542// 0x5800 Entry 28 (size 16 bundles) Reserved
543 KVM_FAULT(28)
544
545 .org kvm_ia64_ivt+0x5900
546///////////////////////////////////////////////////////////////////
547// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
548ENTRY(kvm_debug_vector)
549 KVM_FAULT(29)
550END(kvm_debug_vector)
551
552 .org kvm_ia64_ivt+0x5a00
553///////////////////////////////////////////////////////////////
554// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
555ENTRY(kvm_unaligned_access)
556 KVM_REFLECT(30)
557END(kvm_unaligned_access)
558
559 .org kvm_ia64_ivt+0x5b00
560//////////////////////////////////////////////////////////////////////
561// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
562ENTRY(kvm_unsupported_data_reference)
563 KVM_REFLECT(31)
564END(kvm_unsupported_data_reference)
565
566 .org kvm_ia64_ivt+0x5c00
567////////////////////////////////////////////////////////////////////
568// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
569ENTRY(kvm_floating_point_fault)
570 KVM_REFLECT(32)
571END(kvm_floating_point_fault)
572
573 .org kvm_ia64_ivt+0x5d00
574/////////////////////////////////////////////////////////////////////
575// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
576ENTRY(kvm_floating_point_trap)
577 KVM_REFLECT(33)
578END(kvm_floating_point_trap)
579
580 .org kvm_ia64_ivt+0x5e00
581//////////////////////////////////////////////////////////////////////
582// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
583ENTRY(kvm_lower_privilege_trap)
584 KVM_REFLECT(34)
585END(kvm_lower_privilege_trap)
586
587 .org kvm_ia64_ivt+0x5f00
588//////////////////////////////////////////////////////////////////////
589// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
590ENTRY(kvm_taken_branch_trap)
591 KVM_REFLECT(35)
592END(kvm_taken_branch_trap)
593
594 .org kvm_ia64_ivt+0x6000
595////////////////////////////////////////////////////////////////////
596// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
597ENTRY(kvm_single_step_trap)
598 KVM_REFLECT(36)
599END(kvm_single_step_trap)
600 .global kvm_virtualization_fault_back
601 .org kvm_ia64_ivt+0x6100
602/////////////////////////////////////////////////////////////////////
603// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
604ENTRY(kvm_virtualization_fault)
605 mov r31=pr
606 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
607 ;;
608 st8 [r16] = r1
609 adds r17 = VMM_VCPU_GP_OFFSET, r21
610 ;;
611 ld8 r1 = [r17]
612 cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
613 cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
614 cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
615 cmp.eq p9,p0=EVENT_RSM,r24
616 cmp.eq p10,p0=EVENT_SSM,r24
617 cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
618 cmp.eq p12,p0=EVENT_THASH,r24
619(p6) br.dptk.many kvm_asm_mov_from_ar
620(p7) br.dptk.many kvm_asm_mov_from_rr
621(p8) br.dptk.many kvm_asm_mov_to_rr
622(p9) br.dptk.many kvm_asm_rsm
623(p10) br.dptk.many kvm_asm_ssm
624(p11) br.dptk.many kvm_asm_mov_to_psr
625(p12) br.dptk.many kvm_asm_thash
626 ;;
627kvm_virtualization_fault_back:
628 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
629 ;;
630 ld8 r1 = [r16]
631 ;;
632 mov r19=37
633 adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
634 adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
635 ;;
636 st8 [r16] = r24
637 st8 [r17] = r25
638 ;;
639 cmp.ne p6,p0=EVENT_RFI, r24
640(p6) br.sptk kvm_dispatch_virtualization_fault
641 ;;
642 adds r18=VMM_VPD_BASE_OFFSET,r21
643 ;;
644 ld8 r18=[r18]
645 ;;
646 adds r18=VMM_VPD_VIFS_OFFSET,r18
647 ;;
648 ld8 r18=[r18]
649 ;;
650 tbit.z p6,p0=r18,63
651(p6) br.sptk kvm_dispatch_virtualization_fault
652 ;;
653//if vifs.v=1 desert current register frame
654 alloc r18=ar.pfs,0,0,0,0
655 br.sptk kvm_dispatch_virtualization_fault
656END(kvm_virtualization_fault)
657
658 .org kvm_ia64_ivt+0x6200
659//////////////////////////////////////////////////////////////
660// 0x6200 Entry 38 (size 16 bundles) Reserved
661 KVM_FAULT(38)
662
663 .org kvm_ia64_ivt+0x6300
664/////////////////////////////////////////////////////////////////
665// 0x6300 Entry 39 (size 16 bundles) Reserved
666 KVM_FAULT(39)
667
668 .org kvm_ia64_ivt+0x6400
669/////////////////////////////////////////////////////////////////
670// 0x6400 Entry 40 (size 16 bundles) Reserved
671 KVM_FAULT(40)
672
673 .org kvm_ia64_ivt+0x6500
674//////////////////////////////////////////////////////////////////
675// 0x6500 Entry 41 (size 16 bundles) Reserved
676 KVM_FAULT(41)
677
678 .org kvm_ia64_ivt+0x6600
679//////////////////////////////////////////////////////////////////
680// 0x6600 Entry 42 (size 16 bundles) Reserved
681 KVM_FAULT(42)
682
683 .org kvm_ia64_ivt+0x6700
684//////////////////////////////////////////////////////////////////
685// 0x6700 Entry 43 (size 16 bundles) Reserved
686 KVM_FAULT(43)
687
688 .org kvm_ia64_ivt+0x6800
689//////////////////////////////////////////////////////////////////
690// 0x6800 Entry 44 (size 16 bundles) Reserved
691 KVM_FAULT(44)
692
693 .org kvm_ia64_ivt+0x6900
694///////////////////////////////////////////////////////////////////
695// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
696//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
697ENTRY(kvm_ia32_exception)
698 KVM_FAULT(45)
699END(kvm_ia32_exception)
700
701 .org kvm_ia64_ivt+0x6a00
702////////////////////////////////////////////////////////////////////
703// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
704ENTRY(kvm_ia32_intercept)
705 KVM_FAULT(47)
706END(kvm_ia32_intercept)
707
708 .org kvm_ia64_ivt+0x6c00
709/////////////////////////////////////////////////////////////////////
710// 0x6c00 Entry 48 (size 16 bundles) Reserved
711 KVM_FAULT(48)
712
713 .org kvm_ia64_ivt+0x6d00
714//////////////////////////////////////////////////////////////////////
715// 0x6d00 Entry 49 (size 16 bundles) Reserved
716 KVM_FAULT(49)
717
718 .org kvm_ia64_ivt+0x6e00
719//////////////////////////////////////////////////////////////////////
720// 0x6e00 Entry 50 (size 16 bundles) Reserved
721 KVM_FAULT(50)
722
723 .org kvm_ia64_ivt+0x6f00
724/////////////////////////////////////////////////////////////////////
725// 0x6f00 Entry 51 (size 16 bundles) Reserved
726 KVM_FAULT(52)
727
728 .org kvm_ia64_ivt+0x7100
729////////////////////////////////////////////////////////////////////
730// 0x7100 Entry 53 (size 16 bundles) Reserved
731 KVM_FAULT(53)
732
733 .org kvm_ia64_ivt+0x7200
734/////////////////////////////////////////////////////////////////////
735// 0x7200 Entry 54 (size 16 bundles) Reserved
736 KVM_FAULT(54)
737
738 .org kvm_ia64_ivt+0x7300
739////////////////////////////////////////////////////////////////////
740// 0x7300 Entry 55 (size 16 bundles) Reserved
741 KVM_FAULT(55)
742
743 .org kvm_ia64_ivt+0x7400
744////////////////////////////////////////////////////////////////////
745// 0x7400 Entry 56 (size 16 bundles) Reserved
746 KVM_FAULT(56)
747
748 .org kvm_ia64_ivt+0x7500
749/////////////////////////////////////////////////////////////////////
750// 0x7500 Entry 57 (size 16 bundles) Reserved
751 KVM_FAULT(57)
752
753 .org kvm_ia64_ivt+0x7600
754/////////////////////////////////////////////////////////////////////
755// 0x7600 Entry 58 (size 16 bundles) Reserved
756 KVM_FAULT(58)
757
758 .org kvm_ia64_ivt+0x7700
759////////////////////////////////////////////////////////////////////
760// 0x7700 Entry 59 (size 16 bundles) Reserved
761 KVM_FAULT(59)
762
763 .org kvm_ia64_ivt+0x7800
764////////////////////////////////////////////////////////////////////
765// 0x7800 Entry 60 (size 16 bundles) Reserved
766 KVM_FAULT(60)
767
768 .org kvm_ia64_ivt+0x7900
769/////////////////////////////////////////////////////////////////////
770// 0x7900 Entry 61 (size 16 bundles) Reserved
771 KVM_FAULT(61)
772
773 .org kvm_ia64_ivt+0x7a00
774/////////////////////////////////////////////////////////////////////
775// 0x7a00 Entry 62 (size 16 bundles) Reserved
776 KVM_FAULT(62)
777
778 .org kvm_ia64_ivt+0x7b00
779/////////////////////////////////////////////////////////////////////
780// 0x7b00 Entry 63 (size 16 bundles) Reserved
781 KVM_FAULT(63)
782
783 .org kvm_ia64_ivt+0x7c00
784////////////////////////////////////////////////////////////////////
785// 0x7c00 Entry 64 (size 16 bundles) Reserved
786 KVM_FAULT(64)
787
788 .org kvm_ia64_ivt+0x7d00
789/////////////////////////////////////////////////////////////////////
790// 0x7d00 Entry 65 (size 16 bundles) Reserved
791 KVM_FAULT(65)
792
793 .org kvm_ia64_ivt+0x7e00
794/////////////////////////////////////////////////////////////////////
795// 0x7e00 Entry 66 (size 16 bundles) Reserved
796 KVM_FAULT(66)
797
798 .org kvm_ia64_ivt+0x7f00
799////////////////////////////////////////////////////////////////////
800// 0x7f00 Entry 67 (size 16 bundles) Reserved
801 KVM_FAULT(67)
802
803 .org kvm_ia64_ivt+0x8000
804// There is no particular reason for this code to be here, other than that
805// there happens to be space here that would go unused otherwise. If this
806// fault ever gets "unreserved", simply moved the following code to a more
807// suitable spot...
808
809
810ENTRY(kvm_dtlb_miss_dispatch)
811 mov r19 = 2
812 KVM_SAVE_MIN_WITH_COVER_R19
813 alloc r14=ar.pfs,0,0,3,0
814 mov out0=cr.ifa
815 mov out1=r15
816 adds r3=8,r2 // set up second base pointer
817 ;;
818 ssm psr.ic
819 ;;
820 srlz.i // guarantee that interruption collection is on
821 ;;
822 (p15) ssm psr.i // restore psr.i
823 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
824 ;;
825 KVM_SAVE_REST
826 KVM_SAVE_EXTRA
827 mov rp=r14
828 ;;
829 adds out2=16,r12
830 br.call.sptk.many b6=kvm_page_fault
831END(kvm_dtlb_miss_dispatch)
832
833ENTRY(kvm_itlb_miss_dispatch)
834
835 KVM_SAVE_MIN_WITH_COVER_R19
836 alloc r14=ar.pfs,0,0,3,0
837 mov out0=cr.ifa
838 mov out1=r15
839 adds r3=8,r2 // set up second base pointer
840 ;;
841 ssm psr.ic
842 ;;
843 srlz.i // guarantee that interruption collection is on
844 ;;
845 (p15) ssm psr.i // restore psr.i
846 addl r14=@gprel(ia64_leave_hypervisor),gp
847 ;;
848 KVM_SAVE_REST
849 mov rp=r14
850 ;;
851 adds out2=16,r12
852 br.call.sptk.many b6=kvm_page_fault
853END(kvm_itlb_miss_dispatch)
854
855ENTRY(kvm_dispatch_reflection)
856/*
857 * Input:
858 * psr.ic: off
859 * r19: intr type (offset into ivt, see ia64_int.h)
860 * r31: contains saved predicates (pr)
861 */
862 KVM_SAVE_MIN_WITH_COVER_R19
863 alloc r14=ar.pfs,0,0,5,0
864 mov out0=cr.ifa
865 mov out1=cr.isr
866 mov out2=cr.iim
867 mov out3=r15
868 adds r3=8,r2 // set up second base pointer
869 ;;
870 ssm psr.ic
871 ;;
872 srlz.i // guarantee that interruption collection is on
873 ;;
874 (p15) ssm psr.i // restore psr.i
875 addl r14=@gprel(ia64_leave_hypervisor),gp
876 ;;
877 KVM_SAVE_REST
878 mov rp=r14
879 ;;
880 adds out4=16,r12
881 br.call.sptk.many b6=reflect_interruption
882END(kvm_dispatch_reflection)
883
884ENTRY(kvm_dispatch_virtualization_fault)
885 adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
886 adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
887 ;;
888 st8 [r16] = r24
889 st8 [r17] = r25
890 ;;
891 KVM_SAVE_MIN_WITH_COVER_R19
892 ;;
893 alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!)
894 mov out0=r13 //vcpu
895 adds r3=8,r2 // set up second base pointer
896 ;;
897 ssm psr.ic
898 ;;
899 srlz.i // guarantee that interruption collection is on
900 ;;
901 (p15) ssm psr.i // restore psr.i
902 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
903 ;;
904 KVM_SAVE_REST
905 KVM_SAVE_EXTRA
906 mov rp=r14
907 ;;
908 adds out1=16,sp //regs
909 br.call.sptk.many b6=kvm_emulate
910END(kvm_dispatch_virtualization_fault)
911
912
913ENTRY(kvm_dispatch_interrupt)
914 KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3
915 ;;
916 alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
917 adds r3=8,r2 // set up second base pointer for SAVE_REST
918 ;;
919 ssm psr.ic
920 ;;
921 srlz.i
922 ;;
923 (p15) ssm psr.i
924 addl r14=@gprel(ia64_leave_hypervisor),gp
925 ;;
926 KVM_SAVE_REST
927 mov rp=r14
928 ;;
929 mov out0=r13 // pass pointer to pt_regs as second arg
930 br.call.sptk.many b6=kvm_ia64_handle_irq
931END(kvm_dispatch_interrupt)
932
933GLOBAL_ENTRY(ia64_leave_nested)
934 rsm psr.i
935 ;;
936 adds r21=PT(PR)+16,r12
937 ;;
938 lfetch [r21],PT(CR_IPSR)-PT(PR)
939 adds r2=PT(B6)+16,r12
940 adds r3=PT(R16)+16,r12
941 ;;
942 lfetch [r21]
943 ld8 r28=[r2],8 // load b6
944 adds r29=PT(R24)+16,r12
945
946 ld8.fill r16=[r3]
947 adds r3=PT(AR_CSD)-PT(R16),r3
948 adds r30=PT(AR_CCV)+16,r12
949 ;;
950 ld8.fill r24=[r29]
951 ld8 r15=[r30] // load ar.ccv
952 ;;
953 ld8 r29=[r2],16 // load b7
954 ld8 r30=[r3],16 // load ar.csd
955 ;;
956 ld8 r31=[r2],16 // load ar.ssd
957 ld8.fill r8=[r3],16
958 ;;
959 ld8.fill r9=[r2],16
960 ld8.fill r10=[r3],PT(R17)-PT(R10)
961 ;;
962 ld8.fill r11=[r2],PT(R18)-PT(R11)
963 ld8.fill r17=[r3],16
964 ;;
965 ld8.fill r18=[r2],16
966 ld8.fill r19=[r3],16
967 ;;
968 ld8.fill r20=[r2],16
969 ld8.fill r21=[r3],16
970 mov ar.csd=r30
971 mov ar.ssd=r31
972 ;;
973 rsm psr.i | psr.ic
974 // initiate turning off of interrupt and interruption collection
975 invala // invalidate ALAT
976 ;;
977 srlz.i
978 ;;
979 ld8.fill r22=[r2],24
980 ld8.fill r23=[r3],24
981 mov b6=r28
982 ;;
983 ld8.fill r25=[r2],16
984 ld8.fill r26=[r3],16
985 mov b7=r29
986 ;;
987 ld8.fill r27=[r2],16
988 ld8.fill r28=[r3],16
989 ;;
990 ld8.fill r29=[r2],16
991 ld8.fill r30=[r3],24
992 ;;
993 ld8.fill r31=[r2],PT(F9)-PT(R31)
994 adds r3=PT(F10)-PT(F6),r3
995 ;;
996 ldf.fill f9=[r2],PT(F6)-PT(F9)
997 ldf.fill f10=[r3],PT(F8)-PT(F10)
998 ;;
999 ldf.fill f6=[r2],PT(F7)-PT(F6)
1000 ;;
1001 ldf.fill f7=[r2],PT(F11)-PT(F7)
1002 ldf.fill f8=[r3],32
1003 ;;
1004 srlz.i // ensure interruption collection is off
1005 mov ar.ccv=r15
1006 ;;
1007 bsw.0 // switch back to bank 0 (no stop bit required beforehand...)
1008 ;;
1009 ldf.fill f11=[r2]
1010// mov r18=r13
1011// mov r21=r13
1012 adds r16=PT(CR_IPSR)+16,r12
1013 adds r17=PT(CR_IIP)+16,r12
1014 ;;
1015 ld8 r29=[r16],16 // load cr.ipsr
1016 ld8 r28=[r17],16 // load cr.iip
1017 ;;
1018 ld8 r30=[r16],16 // load cr.ifs
1019 ld8 r25=[r17],16 // load ar.unat
1020 ;;
1021 ld8 r26=[r16],16 // load ar.pfs
1022 ld8 r27=[r17],16 // load ar.rsc
1023 cmp.eq p9,p0=r0,r0
1024 // set p9 to indicate that we should restore cr.ifs
1025 ;;
1026 ld8 r24=[r16],16 // load ar.rnat (may be garbage)
1027 ld8 r23=[r17],16// load ar.bspstore (may be garbage)
1028 ;;
1029 ld8 r31=[r16],16 // load predicates
1030 ld8 r22=[r17],16 // load b0
1031 ;;
1032 ld8 r19=[r16],16 // load ar.rsc value for "loadrs"
1033 ld8.fill r1=[r17],16 // load r1
1034 ;;
1035 ld8.fill r12=[r16],16
1036 ld8.fill r13=[r17],16
1037 ;;
1038 ld8 r20=[r16],16 // ar.fpsr
1039 ld8.fill r15=[r17],16
1040 ;;
1041 ld8.fill r14=[r16],16
1042 ld8.fill r2=[r17]
1043 ;;
1044 ld8.fill r3=[r16]
1045 ;;
1046 mov r16=ar.bsp // get existing backing store pointer
1047 ;;
1048 mov b0=r22
1049 mov ar.pfs=r26
1050 mov cr.ifs=r30
1051 mov cr.ipsr=r29
1052 mov ar.fpsr=r20
1053 mov cr.iip=r28
1054 ;;
1055 mov ar.rsc=r27
1056 mov ar.unat=r25
1057 mov pr=r31,-1
1058 rfi
1059END(ia64_leave_nested)
1060
1061GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
1062/*
1063 * work.need_resched etc. mustn't get changed
1064 *by this CPU before it returns to
1065 * user- or fsys-mode, hence we disable interrupts early on:
1066 */
1067 adds r2 = PT(R4)+16,r12
1068 adds r3 = PT(R5)+16,r12
1069 adds r8 = PT(EML_UNAT)+16,r12
1070 ;;
1071 ld8 r8 = [r8]
1072 ;;
1073 mov ar.unat=r8
1074 ;;
1075 ld8.fill r4=[r2],16 //load r4
1076 ld8.fill r5=[r3],16 //load r5
1077 ;;
1078 ld8.fill r6=[r2] //load r6
1079 ld8.fill r7=[r3] //load r7
1080 ;;
1081END(ia64_leave_hypervisor_prepare)
1082//fall through
1083GLOBAL_ENTRY(ia64_leave_hypervisor)
1084 rsm psr.i
1085 ;;
1086 br.call.sptk.many b0=leave_hypervisor_tail
1087 ;;
1088 adds r20=PT(PR)+16,r12
1089 adds r8=PT(EML_UNAT)+16,r12
1090 ;;
1091 ld8 r8=[r8]
1092 ;;
1093 mov ar.unat=r8
1094 ;;
1095 lfetch [r20],PT(CR_IPSR)-PT(PR)
1096 adds r2 = PT(B6)+16,r12
1097 adds r3 = PT(B7)+16,r12
1098 ;;
1099 lfetch [r20]
1100 ;;
1101 ld8 r24=[r2],16 /* B6 */
1102 ld8 r25=[r3],16 /* B7 */
1103 ;;
1104 ld8 r26=[r2],16 /* ar_csd */
1105 ld8 r27=[r3],16 /* ar_ssd */
1106 mov b6 = r24
1107 ;;
1108 ld8.fill r8=[r2],16
1109 ld8.fill r9=[r3],16
1110 mov b7 = r25
1111 ;;
1112 mov ar.csd = r26
1113 mov ar.ssd = r27
1114 ;;
1115 ld8.fill r10=[r2],PT(R15)-PT(R10)
1116 ld8.fill r11=[r3],PT(R14)-PT(R11)
1117 ;;
1118 ld8.fill r15=[r2],PT(R16)-PT(R15)
1119 ld8.fill r14=[r3],PT(R17)-PT(R14)
1120 ;;
1121 ld8.fill r16=[r2],16
1122 ld8.fill r17=[r3],16
1123 ;;
1124 ld8.fill r18=[r2],16
1125 ld8.fill r19=[r3],16
1126 ;;
1127 ld8.fill r20=[r2],16
1128 ld8.fill r21=[r3],16
1129 ;;
1130 ld8.fill r22=[r2],16
1131 ld8.fill r23=[r3],16
1132 ;;
1133 ld8.fill r24=[r2],16
1134 ld8.fill r25=[r3],16
1135 ;;
1136 ld8.fill r26=[r2],16
1137 ld8.fill r27=[r3],16
1138 ;;
1139 ld8.fill r28=[r2],16
1140 ld8.fill r29=[r3],16
1141 ;;
1142 ld8.fill r30=[r2],PT(F6)-PT(R30)
1143 ld8.fill r31=[r3],PT(F7)-PT(R31)
1144 ;;
1145 rsm psr.i | psr.ic
1146 // initiate turning off of interrupt and interruption collection
1147 invala // invalidate ALAT
1148 ;;
1149 srlz.i // ensure interruption collection is off
1150 ;;
1151 bsw.0
1152 ;;
1153 adds r16 = PT(CR_IPSR)+16,r12
1154 adds r17 = PT(CR_IIP)+16,r12
1155 mov r21=r13 // get current
1156 ;;
1157 ld8 r31=[r16],16 // load cr.ipsr
1158 ld8 r30=[r17],16 // load cr.iip
1159 ;;
1160 ld8 r29=[r16],16 // load cr.ifs
1161 ld8 r28=[r17],16 // load ar.unat
1162 ;;
1163 ld8 r27=[r16],16 // load ar.pfs
1164 ld8 r26=[r17],16 // load ar.rsc
1165 ;;
1166 ld8 r25=[r16],16 // load ar.rnat
1167 ld8 r24=[r17],16 // load ar.bspstore
1168 ;;
1169 ld8 r23=[r16],16 // load predicates
1170 ld8 r22=[r17],16 // load b0
1171 ;;
1172 ld8 r20=[r16],16 // load ar.rsc value for "loadrs"
1173 ld8.fill r1=[r17],16 //load r1
1174 ;;
1175 ld8.fill r12=[r16],16 //load r12
1176 ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13
1177 ;;
1178 ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr
1179 ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2
1180 ;;
1181 ld8.fill r3=[r16] //load r3
1182 ld8 r18=[r17] //load ar_ccv
1183 ;;
1184 mov ar.fpsr=r19
1185 mov ar.ccv=r18
1186 shr.u r18=r20,16
1187 ;;
1188kvm_rbs_switch:
1189 mov r19=96
1190
1191kvm_dont_preserve_current_frame:
1192/*
1193 * To prevent leaking bits between the hypervisor and guest domain,
1194 * we must clear the stacked registers in the "invalid" partition here.
1195 * 5 registers/cycle on McKinley).
1196 */
1197# define pRecurse p6
1198# define pReturn p7
1199# define Nregs 14
1200
1201 alloc loc0=ar.pfs,2,Nregs-2,2,0
1202 shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8))
1203 sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize
1204 ;;
1205 mov ar.rsc=r20 // load ar.rsc to be used for "loadrs"
1206 shladd in0=loc1,3,r19
1207 mov in1=0
1208 ;;
1209 TEXT_ALIGN(32)
1210kvm_rse_clear_invalid:
1211 alloc loc0=ar.pfs,2,Nregs-2,2,0
1212 cmp.lt pRecurse,p0=Nregs*8,in0
1213 // if more than Nregs regs left to clear, (re)curse
1214 add out0=-Nregs*8,in0
1215 add out1=1,in1 // increment recursion count
1216 mov loc1=0
1217 mov loc2=0
1218 ;;
1219 mov loc3=0
1220 mov loc4=0
1221 mov loc5=0
1222 mov loc6=0
1223 mov loc7=0
1224(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
1225 ;;
1226 mov loc8=0
1227 mov loc9=0
1228 cmp.ne pReturn,p0=r0,in1
1229 // if recursion count != 0, we need to do a br.ret
1230 mov loc10=0
1231 mov loc11=0
1232(pReturn) br.ret.dptk.many b0
1233
1234# undef pRecurse
1235# undef pReturn
1236
1237// loadrs has already been shifted
1238 alloc r16=ar.pfs,0,0,0,0 // drop current register frame
1239 ;;
1240 loadrs
1241 ;;
1242 mov ar.bspstore=r24
1243 ;;
1244 mov ar.unat=r28
1245 mov ar.rnat=r25
1246 mov ar.rsc=r26
1247 ;;
1248 mov cr.ipsr=r31
1249 mov cr.iip=r30
1250 mov cr.ifs=r29
1251 mov ar.pfs=r27
1252 adds r18=VMM_VPD_BASE_OFFSET,r21
1253 ;;
1254 ld8 r18=[r18] //vpd
1255 adds r17=VMM_VCPU_ISR_OFFSET,r21
1256 ;;
1257 ld8 r17=[r17]
1258 adds r19=VMM_VPD_VPSR_OFFSET,r18
1259 ;;
1260 ld8 r19=[r19] //vpsr
1261 mov r25=r18
1262 adds r16= VMM_VCPU_GP_OFFSET,r21
1263 ;;
1264 ld8 r16= [r16] // Put gp in r24
1265 movl r24=@gprel(ia64_vmm_entry) // calculate return address
1266 ;;
1267 add r24=r24,r16
1268 ;;
1269 br.sptk.many kvm_vps_sync_write // call the service
1270 ;;
1271END(ia64_leave_hypervisor)
1272// fall through
1273GLOBAL_ENTRY(ia64_vmm_entry)
1274/*
1275 * must be at bank 0
1276 * parameter:
1277 * r17:cr.isr
1278 * r18:vpd
1279 * r19:vpsr
1280 * r22:b0
1281 * r23:predicate
1282 */
1283 mov r24=r22
1284 mov r25=r18
1285 tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic
1286(p1) br.cond.sptk.few kvm_vps_resume_normal
1287(p2) br.cond.sptk.many kvm_vps_resume_handler
1288 ;;
1289END(ia64_vmm_entry)
1290
1291/*
1292 * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
1293 * u64 arg3, u64 arg4, u64 arg5,
1294 * u64 arg6, u64 arg7);
1295 *
1296 * XXX: The currently defined services use only 4 args at the max. The
1297 * rest are not consumed.
1298 */
1299GLOBAL_ENTRY(ia64_call_vsa)
1300 .regstk 4,4,0,0
1301
1302rpsave = loc0
1303pfssave = loc1
1304psrsave = loc2
1305entry = loc3
1306hostret = r24
1307
1308 alloc pfssave=ar.pfs,4,4,0,0
1309 mov rpsave=rp
1310 adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
1311 ;;
1312 ld8 entry=[entry]
13131: mov hostret=ip
1314 mov r25=in1 // copy arguments
1315 mov r26=in2
1316 mov r27=in3
1317 mov psrsave=psr
1318 ;;
1319 tbit.nz p6,p0=psrsave,14 // IA64_PSR_I
1320 tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC
1321 ;;
1322 add hostret=2f-1b,hostret // calculate return address
1323 add entry=entry,in0
1324 ;;
1325 rsm psr.i | psr.ic
1326 ;;
1327 srlz.i
1328 mov b6=entry
1329 br.cond.sptk b6 // call the service
13302:
1331// Architectural sequence for enabling interrupts if necessary
1332(p7) ssm psr.ic
1333 ;;
1334(p7) srlz.i
1335 ;;
1336(p6) ssm psr.i
1337 ;;
1338 mov rp=rpsave
1339 mov ar.pfs=pfssave
1340 mov r8=r31
1341 ;;
1342 srlz.d
1343 br.ret.sptk rp
1344
1345END(ia64_call_vsa)
1346
1347#define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100)
1348
1349GLOBAL_ENTRY(vmm_reset_entry)
1350 //set up ipsr, iip, vpd.vpsr, dcr
1351 // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
1352 // For DCR: all bits 0
1353 bsw.0
1354 ;;
1355 mov r21 =r13
1356 adds r14=-VMM_PT_REGS_SIZE, r12
1357 ;;
1358 movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
1359 movl r10=0x8000000000000000
1360 adds r16=PT(CR_IIP), r14
1361 adds r20=PT(R1), r14
1362 ;;
1363 rsm psr.ic | psr.i
1364 ;;
1365 srlz.i
1366 ;;
1367 mov ar.rsc = 0
1368 ;;
1369 flushrs
1370 ;;
1371 mov ar.bspstore = 0
1372 // clear BSPSTORE
1373 ;;
1374 mov cr.ipsr=r6
1375 mov cr.ifs=r10
1376 ld8 r4 = [r16] // Set init iip for first run.
1377 ld8 r1 = [r20]
1378 ;;
1379 mov cr.iip=r4
1380 adds r16=VMM_VPD_BASE_OFFSET,r13
1381 ;;
1382 ld8 r18=[r16]
1383 ;;
1384 adds r19=VMM_VPD_VPSR_OFFSET,r18
1385 ;;
1386 ld8 r19=[r19]
1387 mov r17=r0
1388 mov r22=r0
1389 mov r23=r0
1390 br.cond.sptk ia64_vmm_entry
1391 br.ret.sptk b0
1392END(vmm_reset_entry)
diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h
deleted file mode 100644
index b214b5b0432d..000000000000
--- a/arch/ia64/kvm/vti.h
+++ /dev/null
@@ -1,290 +0,0 @@
1/*
2 * vti.h: prototype for generial vt related interface
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
6 * Fred Yang (fred.yang@intel.com)
7 * Kun Tian (Kevin Tian) (kevin.tian@intel.com)
8 *
9 * Copyright (c) 2007, Intel Corporation.
10 * Zhang xiantao <xiantao.zhang@intel.com>
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms and conditions of the GNU General Public License,
14 * version 2, as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 * more details.
20 *
21 * You should have received a copy of the GNU General Public License along with
22 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
23 * Place - Suite 330, Boston, MA 02111-1307 USA.
24 */
25#ifndef _KVM_VT_I_H
26#define _KVM_VT_I_H
27
28#ifndef __ASSEMBLY__
29#include <asm/page.h>
30
31#include <linux/kvm_host.h>
32
33/* define itr.i and itr.d in ia64_itr function */
34#define ITR 0x01
35#define DTR 0x02
36#define IaDTR 0x03
37
38#define IA64_TR_VMM 6 /*itr6, dtr6 : maps vmm code, vmbuffer*/
39#define IA64_TR_VM_DATA 7 /*dtr7 : maps current vm data*/
40
41#define RR6 (6UL<<61)
42#define RR7 (7UL<<61)
43
44
45/* config_options in pal_vp_init_env */
46#define VP_INITIALIZE 1UL
47#define VP_FR_PMC 1UL<<1
48#define VP_OPCODE 1UL<<8
49#define VP_CAUSE 1UL<<9
50#define VP_FW_ACC 1UL<<63
51
52/* init vp env with initializing vm_buffer */
53#define VP_INIT_ENV_INITALIZE (VP_INITIALIZE | VP_FR_PMC |\
54 VP_OPCODE | VP_CAUSE | VP_FW_ACC)
55/* init vp env without initializing vm_buffer */
56#define VP_INIT_ENV VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC
57
58#define PAL_VP_CREATE 265
59/* Stacked Virt. Initializes a new VPD for the operation of
60 * a new virtual processor in the virtual environment.
61 */
62#define PAL_VP_ENV_INFO 266
63/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/
64#define PAL_VP_EXIT_ENV 267
65/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/
66#define PAL_VP_INIT_ENV 268
67/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/
68#define PAL_VP_REGISTER 269
69/*Stacked Virt. Register a different host IVT for the virtual processor.*/
70#define PAL_VP_RESUME 270
71/* Renamed from PAL_VP_RESUME */
72#define PAL_VP_RESTORE 270
73/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/
74#define PAL_VP_SUSPEND 271
75/* Renamed from PAL_VP_SUSPEND */
76#define PAL_VP_SAVE 271
77/* Stacked Virt. Suspends operation for the specified virtual processor on
78 * the logical processor.
79 */
80#define PAL_VP_TERMINATE 272
81/* Stacked Virt. Terminates operation for the specified virtual processor.*/
82
83union vac {
84 unsigned long value;
85 struct {
86 unsigned int a_int:1;
87 unsigned int a_from_int_cr:1;
88 unsigned int a_to_int_cr:1;
89 unsigned int a_from_psr:1;
90 unsigned int a_from_cpuid:1;
91 unsigned int a_cover:1;
92 unsigned int a_bsw:1;
93 long reserved:57;
94 };
95};
96
97union vdc {
98 unsigned long value;
99 struct {
100 unsigned int d_vmsw:1;
101 unsigned int d_extint:1;
102 unsigned int d_ibr_dbr:1;
103 unsigned int d_pmc:1;
104 unsigned int d_to_pmd:1;
105 unsigned int d_itm:1;
106 long reserved:58;
107 };
108};
109
110struct vpd {
111 union vac vac;
112 union vdc vdc;
113 unsigned long virt_env_vaddr;
114 unsigned long reserved1[29];
115 unsigned long vhpi;
116 unsigned long reserved2[95];
117 unsigned long vgr[16];
118 unsigned long vbgr[16];
119 unsigned long vnat;
120 unsigned long vbnat;
121 unsigned long vcpuid[5];
122 unsigned long reserved3[11];
123 unsigned long vpsr;
124 unsigned long vpr;
125 unsigned long reserved4[76];
126 union {
127 unsigned long vcr[128];
128 struct {
129 unsigned long dcr;
130 unsigned long itm;
131 unsigned long iva;
132 unsigned long rsv1[5];
133 unsigned long pta;
134 unsigned long rsv2[7];
135 unsigned long ipsr;
136 unsigned long isr;
137 unsigned long rsv3;
138 unsigned long iip;
139 unsigned long ifa;
140 unsigned long itir;
141 unsigned long iipa;
142 unsigned long ifs;
143 unsigned long iim;
144 unsigned long iha;
145 unsigned long rsv4[38];
146 unsigned long lid;
147 unsigned long ivr;
148 unsigned long tpr;
149 unsigned long eoi;
150 unsigned long irr[4];
151 unsigned long itv;
152 unsigned long pmv;
153 unsigned long cmcv;
154 unsigned long rsv5[5];
155 unsigned long lrr0;
156 unsigned long lrr1;
157 unsigned long rsv6[46];
158 };
159 };
160 unsigned long reserved5[128];
161 unsigned long reserved6[3456];
162 unsigned long vmm_avail[128];
163 unsigned long reserved7[4096];
164};
165
166#define PAL_PROC_VM_BIT (1UL << 40)
167#define PAL_PROC_VMSW_BIT (1UL << 54)
168
169static inline s64 ia64_pal_vp_env_info(u64 *buffer_size,
170 u64 *vp_env_info)
171{
172 struct ia64_pal_retval iprv;
173 PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0);
174 *buffer_size = iprv.v0;
175 *vp_env_info = iprv.v1;
176 return iprv.status;
177}
178
179static inline s64 ia64_pal_vp_exit_env(u64 iva)
180{
181 struct ia64_pal_retval iprv;
182
183 PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0);
184 return iprv.status;
185}
186
187static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr,
188 u64 vbase_addr, u64 *vsa_base)
189{
190 struct ia64_pal_retval iprv;
191
192 PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr,
193 vbase_addr);
194 *vsa_base = iprv.v0;
195
196 return iprv.status;
197}
198
199static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector)
200{
201 struct ia64_pal_retval iprv;
202
203 PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0);
204
205 return iprv.status;
206}
207
208static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector)
209{
210 struct ia64_pal_retval iprv;
211
212 PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0);
213
214 return iprv.status;
215}
216
217#endif
218
219/*VPD field offset*/
220#define VPD_VAC_START_OFFSET 0
221#define VPD_VDC_START_OFFSET 8
222#define VPD_VHPI_START_OFFSET 256
223#define VPD_VGR_START_OFFSET 1024
224#define VPD_VBGR_START_OFFSET 1152
225#define VPD_VNAT_START_OFFSET 1280
226#define VPD_VBNAT_START_OFFSET 1288
227#define VPD_VCPUID_START_OFFSET 1296
228#define VPD_VPSR_START_OFFSET 1424
229#define VPD_VPR_START_OFFSET 1432
230#define VPD_VRSE_CFLE_START_OFFSET 1440
231#define VPD_VCR_START_OFFSET 2048
232#define VPD_VTPR_START_OFFSET 2576
233#define VPD_VRR_START_OFFSET 3072
234#define VPD_VMM_VAIL_START_OFFSET 31744
235
236/*Virtualization faults*/
237
238#define EVENT_MOV_TO_AR 1
239#define EVENT_MOV_TO_AR_IMM 2
240#define EVENT_MOV_FROM_AR 3
241#define EVENT_MOV_TO_CR 4
242#define EVENT_MOV_FROM_CR 5
243#define EVENT_MOV_TO_PSR 6
244#define EVENT_MOV_FROM_PSR 7
245#define EVENT_ITC_D 8
246#define EVENT_ITC_I 9
247#define EVENT_MOV_TO_RR 10
248#define EVENT_MOV_TO_DBR 11
249#define EVENT_MOV_TO_IBR 12
250#define EVENT_MOV_TO_PKR 13
251#define EVENT_MOV_TO_PMC 14
252#define EVENT_MOV_TO_PMD 15
253#define EVENT_ITR_D 16
254#define EVENT_ITR_I 17
255#define EVENT_MOV_FROM_RR 18
256#define EVENT_MOV_FROM_DBR 19
257#define EVENT_MOV_FROM_IBR 20
258#define EVENT_MOV_FROM_PKR 21
259#define EVENT_MOV_FROM_PMC 22
260#define EVENT_MOV_FROM_CPUID 23
261#define EVENT_SSM 24
262#define EVENT_RSM 25
263#define EVENT_PTC_L 26
264#define EVENT_PTC_G 27
265#define EVENT_PTC_GA 28
266#define EVENT_PTR_D 29
267#define EVENT_PTR_I 30
268#define EVENT_THASH 31
269#define EVENT_TTAG 32
270#define EVENT_TPA 33
271#define EVENT_TAK 34
272#define EVENT_PTC_E 35
273#define EVENT_COVER 36
274#define EVENT_RFI 37
275#define EVENT_BSW_0 38
276#define EVENT_BSW_1 39
277#define EVENT_VMSW 40
278
279/**PAL virtual services offsets */
280#define PAL_VPS_RESUME_NORMAL 0x0000
281#define PAL_VPS_RESUME_HANDLER 0x0400
282#define PAL_VPS_SYNC_READ 0x0800
283#define PAL_VPS_SYNC_WRITE 0x0c00
284#define PAL_VPS_SET_PENDING_INTERRUPT 0x1000
285#define PAL_VPS_THASH 0x1400
286#define PAL_VPS_TTAG 0x1800
287#define PAL_VPS_RESTORE 0x1c00
288#define PAL_VPS_SAVE 0x2000
289
290#endif/* _VT_I_H*/
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
deleted file mode 100644
index a7869f8f49a6..000000000000
--- a/arch/ia64/kvm/vtlb.c
+++ /dev/null
@@ -1,640 +0,0 @@
1/*
2 * vtlb.c: guest virtual tlb handling module.
3 * Copyright (c) 2004, Intel Corporation.
4 * Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com>
5 * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
6 *
7 * Copyright (c) 2007, Intel Corporation.
8 * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
9 * Xiantao Zhang <xiantao.zhang@intel.com>
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms and conditions of the GNU General Public License,
13 * version 2, as published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 * more details.
19 *
20 * You should have received a copy of the GNU General Public License along with
21 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
22 * Place - Suite 330, Boston, MA 02111-1307 USA.
23 *
24 */
25
26#include "vcpu.h"
27
28#include <linux/rwsem.h>
29
30#include <asm/tlb.h>
31
32/*
33 * Check to see if the address rid:va is translated by the TLB
34 */
35
36static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va)
37{
38 return ((trp->p) && (trp->rid == rid)
39 && ((va-trp->vadr) < PSIZE(trp->ps)));
40}
41
42/*
43 * Only for GUEST TR format.
44 */
45static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva)
46{
47 u64 sa1, ea1;
48
49 if (!trp->p || trp->rid != rid)
50 return 0;
51
52 sa1 = trp->vadr;
53 ea1 = sa1 + PSIZE(trp->ps) - 1;
54 eva -= 1;
55 if ((sva > ea1) || (sa1 > eva))
56 return 0;
57 else
58 return 1;
59
60}
61
62void machine_tlb_purge(u64 va, u64 ps)
63{
64 ia64_ptcl(va, ps << 2);
65}
66
67void local_flush_tlb_all(void)
68{
69 int i, j;
70 unsigned long flags, count0, count1;
71 unsigned long stride0, stride1, addr;
72
73 addr = current_vcpu->arch.ptce_base;
74 count0 = current_vcpu->arch.ptce_count[0];
75 count1 = current_vcpu->arch.ptce_count[1];
76 stride0 = current_vcpu->arch.ptce_stride[0];
77 stride1 = current_vcpu->arch.ptce_stride[1];
78
79 local_irq_save(flags);
80 for (i = 0; i < count0; ++i) {
81 for (j = 0; j < count1; ++j) {
82 ia64_ptce(addr);
83 addr += stride1;
84 }
85 addr += stride0;
86 }
87 local_irq_restore(flags);
88 ia64_srlz_i(); /* srlz.i implies srlz.d */
89}
90
91int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref)
92{
93 union ia64_rr vrr;
94 union ia64_pta vpta;
95 struct ia64_psr vpsr;
96
97 vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
98 vrr.val = vcpu_get_rr(vcpu, vadr);
99 vpta.val = vcpu_get_pta(vcpu);
100
101 if (vrr.ve & vpta.ve) {
102 switch (ref) {
103 case DATA_REF:
104 case NA_REF:
105 return vpsr.dt;
106 case INST_REF:
107 return vpsr.dt && vpsr.it && vpsr.ic;
108 case RSE_REF:
109 return vpsr.dt && vpsr.rt;
110
111 }
112 }
113 return 0;
114}
115
116struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag)
117{
118 u64 index, pfn, rid, pfn_bits;
119
120 pfn_bits = vpta.size - 5 - 8;
121 pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr);
122 rid = _REGION_ID(vrr);
123 index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1));
124 *tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16);
125
126 return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) +
127 (index << 5));
128}
129
130struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type)
131{
132
133 struct thash_data *trp;
134 int i;
135 u64 rid;
136
137 rid = vcpu_get_rr(vcpu, va);
138 rid = rid & RR_RID_MASK;
139 if (type == D_TLB) {
140 if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
141 for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
142 i < NDTRS; i++, trp++) {
143 if (__is_tr_translated(trp, rid, va))
144 return trp;
145 }
146 }
147 } else {
148 if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
149 for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
150 i < NITRS; i++, trp++) {
151 if (__is_tr_translated(trp, rid, va))
152 return trp;
153 }
154 }
155 }
156
157 return NULL;
158}
159
160static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte)
161{
162 union ia64_rr rr;
163 struct thash_data *head;
164 unsigned long ps, gpaddr;
165
166 ps = itir_ps(itir);
167 rr.val = ia64_get_rr(ifa);
168
169 gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
170 (ifa & ((1UL << ps) - 1));
171
172 head = (struct thash_data *)ia64_thash(ifa);
173 head->etag = INVALID_TI_TAG;
174 ia64_mf();
175 head->page_flags = pte & ~PAGE_FLAGS_RV_MASK;
176 head->itir = rr.ps << 2;
177 head->etag = ia64_ttag(ifa);
178 head->gpaddr = gpaddr;
179}
180
181void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
182{
183 u64 i, dirty_pages = 1;
184 u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
185 vmm_spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
186 void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE;
187
188 dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
189
190 vmm_spin_lock(lock);
191 for (i = 0; i < dirty_pages; i++) {
192 /* avoid RMW */
193 if (!test_bit(base_gfn + i, dirty_bitmap))
194 set_bit(base_gfn + i , dirty_bitmap);
195 }
196 vmm_spin_unlock(lock);
197}
198
199void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type)
200{
201 u64 phy_pte, psr;
202 union ia64_rr mrr;
203
204 mrr.val = ia64_get_rr(va);
205 phy_pte = translate_phy_pte(&pte, itir, va);
206
207 if (itir_ps(itir) >= mrr.ps) {
208 vhpt_insert(phy_pte, itir, va, pte);
209 } else {
210 phy_pte &= ~PAGE_FLAGS_RV_MASK;
211 psr = ia64_clear_ic();
212 ia64_itc(type, va, phy_pte, itir_ps(itir));
213 paravirt_dv_serialize_data();
214 ia64_set_psr(psr);
215 }
216
217 if (!(pte&VTLB_PTE_IO))
218 mark_pages_dirty(v, pte, itir_ps(itir));
219}
220
221/*
222 * vhpt lookup
223 */
224struct thash_data *vhpt_lookup(u64 va)
225{
226 struct thash_data *head;
227 u64 tag;
228
229 head = (struct thash_data *)ia64_thash(va);
230 tag = ia64_ttag(va);
231 if (head->etag == tag)
232 return head;
233 return NULL;
234}
235
236u64 guest_vhpt_lookup(u64 iha, u64 *pte)
237{
238 u64 ret;
239 struct thash_data *data;
240
241 data = __vtr_lookup(current_vcpu, iha, D_TLB);
242 if (data != NULL)
243 thash_vhpt_insert(current_vcpu, data->page_flags,
244 data->itir, iha, D_TLB);
245
246 asm volatile ("rsm psr.ic|psr.i;;"
247 "srlz.d;;"
248 "ld8.s r9=[%1];;"
249 "tnat.nz p6,p7=r9;;"
250 "(p6) mov %0=1;"
251 "(p6) mov r9=r0;"
252 "(p7) extr.u r9=r9,0,53;;"
253 "(p7) mov %0=r0;"
254 "(p7) st8 [%2]=r9;;"
255 "ssm psr.ic;;"
256 "srlz.d;;"
257 "ssm psr.i;;"
258 "srlz.d;;"
259 : "=&r"(ret) : "r"(iha), "r"(pte) : "memory");
260
261 return ret;
262}
263
264/*
265 * purge software guest tlb
266 */
267
268static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps)
269{
270 struct thash_data *cur;
271 u64 start, curadr, size, psbits, tag, rr_ps, num;
272 union ia64_rr vrr;
273 struct thash_cb *hcb = &v->arch.vtlb;
274
275 vrr.val = vcpu_get_rr(v, va);
276 psbits = VMX(v, psbits[(va >> 61)]);
277 start = va & ~((1UL << ps) - 1);
278 while (psbits) {
279 curadr = start;
280 rr_ps = __ffs(psbits);
281 psbits &= ~(1UL << rr_ps);
282 num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps));
283 size = PSIZE(rr_ps);
284 vrr.ps = rr_ps;
285 while (num) {
286 cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag);
287 if (cur->etag == tag && cur->ps == rr_ps)
288 cur->etag = INVALID_TI_TAG;
289 curadr += size;
290 num--;
291 }
292 }
293}
294
295
296/*
297 * purge VHPT and machine TLB
298 */
299static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps)
300{
301 struct thash_data *cur;
302 u64 start, size, tag, num;
303 union ia64_rr rr;
304
305 start = va & ~((1UL << ps) - 1);
306 rr.val = ia64_get_rr(va);
307 size = PSIZE(rr.ps);
308 num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps));
309 while (num) {
310 cur = (struct thash_data *)ia64_thash(start);
311 tag = ia64_ttag(start);
312 if (cur->etag == tag)
313 cur->etag = INVALID_TI_TAG;
314 start += size;
315 num--;
316 }
317 machine_tlb_purge(va, ps);
318}
319
320/*
321 * Insert an entry into hash TLB or VHPT.
322 * NOTES:
323 * 1: When inserting VHPT to thash, "va" is a must covered
324 * address by the inserted machine VHPT entry.
325 * 2: The format of entry is always in TLB.
326 * 3: The caller need to make sure the new entry will not overlap
327 * with any existed entry.
328 */
329void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va)
330{
331 struct thash_data *head;
332 union ia64_rr vrr;
333 u64 tag;
334 struct thash_cb *hcb = &v->arch.vtlb;
335
336 vrr.val = vcpu_get_rr(v, va);
337 vrr.ps = itir_ps(itir);
338 VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps);
339 head = vsa_thash(hcb->pta, va, vrr.val, &tag);
340 head->page_flags = pte;
341 head->itir = itir;
342 head->etag = tag;
343}
344
345int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type)
346{
347 struct thash_data *trp;
348 int i;
349 u64 end, rid;
350
351 rid = vcpu_get_rr(vcpu, va);
352 rid = rid & RR_RID_MASK;
353 end = va + PSIZE(ps);
354 if (type == D_TLB) {
355 if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
356 for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
357 i < NDTRS; i++, trp++) {
358 if (__is_tr_overlap(trp, rid, va, end))
359 return i;
360 }
361 }
362 } else {
363 if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
364 for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
365 i < NITRS; i++, trp++) {
366 if (__is_tr_overlap(trp, rid, va, end))
367 return i;
368 }
369 }
370 }
371 return -1;
372}
373
374/*
375 * Purge entries in VTLB and VHPT
376 */
377void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps)
378{
379 if (vcpu_quick_region_check(v->arch.tc_regions, va))
380 vtlb_purge(v, va, ps);
381 vhpt_purge(v, va, ps);
382}
383
384void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps)
385{
386 u64 old_va = va;
387 va = REGION_OFFSET(va);
388 if (vcpu_quick_region_check(v->arch.tc_regions, old_va))
389 vtlb_purge(v, va, ps);
390 vhpt_purge(v, va, ps);
391}
392
393u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
394{
395 u64 ps, ps_mask, paddr, maddr, io_mask;
396 union pte_flags phy_pte;
397
398 ps = itir_ps(itir);
399 ps_mask = ~((1UL << ps) - 1);
400 phy_pte.val = *pte;
401 paddr = *pte;
402 paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask);
403 maddr = kvm_get_mpt_entry(paddr >> PAGE_SHIFT);
404 io_mask = maddr & GPFN_IO_MASK;
405 if (io_mask && (io_mask != GPFN_PHYS_MMIO)) {
406 *pte |= VTLB_PTE_IO;
407 return -1;
408 }
409 maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) |
410 (paddr & ~PAGE_MASK);
411 phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT;
412 return phy_pte.val;
413}
414
415/*
416 * Purge overlap TCs and then insert the new entry to emulate itc ops.
417 * Notes: Only TC entry can purge and insert.
418 */
419void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
420 u64 ifa, int type)
421{
422 u64 ps;
423 u64 phy_pte, io_mask, index;
424 union ia64_rr vrr, mrr;
425
426 ps = itir_ps(itir);
427 vrr.val = vcpu_get_rr(v, ifa);
428 mrr.val = ia64_get_rr(ifa);
429
430 index = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
431 io_mask = kvm_get_mpt_entry(index) & GPFN_IO_MASK;
432 phy_pte = translate_phy_pte(&pte, itir, ifa);
433
434 /* Ensure WB attribute if pte is related to a normal mem page,
435 * which is required by vga acceleration since qemu maps shared
436 * vram buffer with WB.
437 */
438 if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT) &&
439 io_mask != GPFN_PHYS_MMIO) {
440 pte &= ~_PAGE_MA_MASK;
441 phy_pte &= ~_PAGE_MA_MASK;
442 }
443
444 vtlb_purge(v, ifa, ps);
445 vhpt_purge(v, ifa, ps);
446
447 if ((ps != mrr.ps) || (pte & VTLB_PTE_IO)) {
448 vtlb_insert(v, pte, itir, ifa);
449 vcpu_quick_region_set(VMX(v, tc_regions), ifa);
450 }
451 if (pte & VTLB_PTE_IO)
452 return;
453
454 if (ps >= mrr.ps)
455 vhpt_insert(phy_pte, itir, ifa, pte);
456 else {
457 u64 psr;
458 phy_pte &= ~PAGE_FLAGS_RV_MASK;
459 psr = ia64_clear_ic();
460 ia64_itc(type, ifa, phy_pte, ps);
461 paravirt_dv_serialize_data();
462 ia64_set_psr(psr);
463 }
464 if (!(pte&VTLB_PTE_IO))
465 mark_pages_dirty(v, pte, ps);
466
467}
468
469/*
470 * Purge all TCs or VHPT entries including those in Hash table.
471 *
472 */
473
474void thash_purge_all(struct kvm_vcpu *v)
475{
476 int i;
477 struct thash_data *head;
478 struct thash_cb *vtlb, *vhpt;
479 vtlb = &v->arch.vtlb;
480 vhpt = &v->arch.vhpt;
481
482 for (i = 0; i < 8; i++)
483 VMX(v, psbits[i]) = 0;
484
485 head = vtlb->hash;
486 for (i = 0; i < vtlb->num; i++) {
487 head->page_flags = 0;
488 head->etag = INVALID_TI_TAG;
489 head->itir = 0;
490 head->next = 0;
491 head++;
492 };
493
494 head = vhpt->hash;
495 for (i = 0; i < vhpt->num; i++) {
496 head->page_flags = 0;
497 head->etag = INVALID_TI_TAG;
498 head->itir = 0;
499 head->next = 0;
500 head++;
501 };
502
503 local_flush_tlb_all();
504}
505
506/*
507 * Lookup the hash table and its collision chain to find an entry
508 * covering this address rid:va or the entry.
509 *
510 * INPUT:
511 * in: TLB format for both VHPT & TLB.
512 */
513struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
514{
515 struct thash_data *cch;
516 u64 psbits, ps, tag;
517 union ia64_rr vrr;
518
519 struct thash_cb *hcb = &v->arch.vtlb;
520
521 cch = __vtr_lookup(v, va, is_data);
522 if (cch)
523 return cch;
524
525 if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0)
526 return NULL;
527
528 psbits = VMX(v, psbits[(va >> 61)]);
529 vrr.val = vcpu_get_rr(v, va);
530 while (psbits) {
531 ps = __ffs(psbits);
532 psbits &= ~(1UL << ps);
533 vrr.ps = ps;
534 cch = vsa_thash(hcb->pta, va, vrr.val, &tag);
535 if (cch->etag == tag && cch->ps == ps)
536 return cch;
537 }
538
539 return NULL;
540}
541
542/*
543 * Initialize internal control data before service.
544 */
545void thash_init(struct thash_cb *hcb, u64 sz)
546{
547 int i;
548 struct thash_data *head;
549
550 hcb->pta.val = (unsigned long)hcb->hash;
551 hcb->pta.vf = 1;
552 hcb->pta.ve = 1;
553 hcb->pta.size = sz;
554 head = hcb->hash;
555 for (i = 0; i < hcb->num; i++) {
556 head->page_flags = 0;
557 head->itir = 0;
558 head->etag = INVALID_TI_TAG;
559 head->next = 0;
560 head++;
561 }
562}
563
564u64 kvm_get_mpt_entry(u64 gpfn)
565{
566 u64 *base = (u64 *) KVM_P2M_BASE;
567
568 if (gpfn >= (KVM_P2M_SIZE >> 3))
569 panic_vm(current_vcpu, "Invalid gpfn =%lx\n", gpfn);
570
571 return *(base + gpfn);
572}
573
574u64 kvm_lookup_mpa(u64 gpfn)
575{
576 u64 maddr;
577 maddr = kvm_get_mpt_entry(gpfn);
578 return maddr&_PAGE_PPN_MASK;
579}
580
581u64 kvm_gpa_to_mpa(u64 gpa)
582{
583 u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT);
584 return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK);
585}
586
587/*
588 * Fetch guest bundle code.
589 * INPUT:
590 * gip: guest ip
591 * pbundle: used to return fetched bundle.
592 */
593int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle)
594{
595 u64 gpip = 0; /* guest physical IP*/
596 u64 *vpa;
597 struct thash_data *tlb;
598 u64 maddr;
599
600 if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) {
601 /* I-side physical mode */
602 gpip = gip;
603 } else {
604 tlb = vtlb_lookup(vcpu, gip, I_TLB);
605 if (tlb)
606 gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) |
607 (gip & (PSIZE(tlb->ps) - 1));
608 }
609 if (gpip) {
610 maddr = kvm_gpa_to_mpa(gpip);
611 } else {
612 tlb = vhpt_lookup(gip);
613 if (tlb == NULL) {
614 ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2);
615 return IA64_FAULT;
616 }
617 maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps)
618 | (gip & (PSIZE(tlb->ps) - 1));
619 }
620 vpa = (u64 *)__kvm_va(maddr);
621
622 pbundle->i64[0] = *vpa++;
623 pbundle->i64[1] = *vpa;
624
625 return IA64_NO_FAULT;
626}
627
628void kvm_init_vhpt(struct kvm_vcpu *v)
629{
630 v->arch.vhpt.num = VHPT_NUM_ENTRIES;
631 thash_init(&v->arch.vhpt, VHPT_SHIFT);
632 ia64_set_pta(v->arch.vhpt.pta.val);
633 /*Enable VHPT here?*/
634}
635
636void kvm_init_vtlb(struct kvm_vcpu *v)
637{
638 v->arch.vtlb.num = VTLB_NUM_ENTRIES;
639 thash_init(&v->arch.vtlb, VTLB_SHIFT);
640}
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 6acf0c2a0f99..942c7b1678e3 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -170,8 +170,6 @@ extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
170 unsigned long *nb_ret); 170 unsigned long *nb_ret);
171extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr, 171extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr,
172 unsigned long gpa, bool dirty); 172 unsigned long gpa, bool dirty);
173extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
174 long pte_index, unsigned long pteh, unsigned long ptel);
175extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, 173extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
176 long pte_index, unsigned long pteh, unsigned long ptel, 174 long pte_index, unsigned long pteh, unsigned long ptel,
177 pgd_t *pgdir, bool realmode, unsigned long *idx_ret); 175 pgd_t *pgdir, bool realmode, unsigned long *idx_ret);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 0aa817933e6a..2d81e202bdcc 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -37,7 +37,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
37 37
38#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 38#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
39#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ 39#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
40extern unsigned long kvm_rma_pages;
41#endif 40#endif
42 41
43#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ 42#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
@@ -148,7 +147,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
148 /* This covers 14..54 bits of va*/ 147 /* This covers 14..54 bits of va*/
149 rb = (v & ~0x7fUL) << 16; /* AVA field */ 148 rb = (v & ~0x7fUL) << 16; /* AVA field */
150 149
151 rb |= v >> (62 - 8); /* B field */ 150 rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */
152 /* 151 /*
153 * AVA in v had cleared lower 23 bits. We need to derive 152 * AVA in v had cleared lower 23 bits. We need to derive
154 * that from pteg index 153 * that from pteg index
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 047855619cc4..7efd666a3fa7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -180,11 +180,6 @@ struct kvmppc_spapr_tce_table {
180 struct page *pages[0]; 180 struct page *pages[0];
181}; 181};
182 182
183struct kvm_rma_info {
184 atomic_t use_count;
185 unsigned long base_pfn;
186};
187
188/* XICS components, defined in book3s_xics.c */ 183/* XICS components, defined in book3s_xics.c */
189struct kvmppc_xics; 184struct kvmppc_xics;
190struct kvmppc_icp; 185struct kvmppc_icp;
@@ -214,16 +209,9 @@ struct revmap_entry {
214#define KVMPPC_RMAP_PRESENT 0x100000000ul 209#define KVMPPC_RMAP_PRESENT 0x100000000ul
215#define KVMPPC_RMAP_INDEX 0xfffffffful 210#define KVMPPC_RMAP_INDEX 0xfffffffful
216 211
217/* Low-order bits in memslot->arch.slot_phys[] */
218#define KVMPPC_PAGE_ORDER_MASK 0x1f
219#define KVMPPC_PAGE_NO_CACHE HPTE_R_I /* 0x20 */
220#define KVMPPC_PAGE_WRITETHRU HPTE_R_W /* 0x40 */
221#define KVMPPC_GOT_PAGE 0x80
222
223struct kvm_arch_memory_slot { 212struct kvm_arch_memory_slot {
224#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 213#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
225 unsigned long *rmap; 214 unsigned long *rmap;
226 unsigned long *slot_phys;
227#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 215#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
228}; 216};
229 217
@@ -242,14 +230,12 @@ struct kvm_arch {
242 struct kvm_rma_info *rma; 230 struct kvm_rma_info *rma;
243 unsigned long vrma_slb_v; 231 unsigned long vrma_slb_v;
244 int rma_setup_done; 232 int rma_setup_done;
245 int using_mmu_notifiers;
246 u32 hpt_order; 233 u32 hpt_order;
247 atomic_t vcpus_running; 234 atomic_t vcpus_running;
248 u32 online_vcores; 235 u32 online_vcores;
249 unsigned long hpt_npte; 236 unsigned long hpt_npte;
250 unsigned long hpt_mask; 237 unsigned long hpt_mask;
251 atomic_t hpte_mod_interest; 238 atomic_t hpte_mod_interest;
252 spinlock_t slot_phys_lock;
253 cpumask_t need_tlb_flush; 239 cpumask_t need_tlb_flush;
254 int hpt_cma_alloc; 240 int hpt_cma_alloc;
255#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 241#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
@@ -297,6 +283,7 @@ struct kvmppc_vcore {
297 struct list_head runnable_threads; 283 struct list_head runnable_threads;
298 spinlock_t lock; 284 spinlock_t lock;
299 wait_queue_head_t wq; 285 wait_queue_head_t wq;
286 spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
300 u64 stolen_tb; 287 u64 stolen_tb;
301 u64 preempt_tb; 288 u64 preempt_tb;
302 struct kvm_vcpu *runner; 289 struct kvm_vcpu *runner;
@@ -308,6 +295,7 @@ struct kvmppc_vcore {
308 ulong dpdes; /* doorbell state (POWER8) */ 295 ulong dpdes; /* doorbell state (POWER8) */
309 void *mpp_buffer; /* Micro Partition Prefetch buffer */ 296 void *mpp_buffer; /* Micro Partition Prefetch buffer */
310 bool mpp_buffer_is_valid; 297 bool mpp_buffer_is_valid;
298 ulong conferring_threads;
311}; 299};
312 300
313#define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) 301#define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff)
@@ -664,6 +652,8 @@ struct kvm_vcpu_arch {
664 spinlock_t tbacct_lock; 652 spinlock_t tbacct_lock;
665 u64 busy_stolen; 653 u64 busy_stolen;
666 u64 busy_preempt; 654 u64 busy_preempt;
655
656 u32 emul_inst;
667#endif 657#endif
668}; 658};
669 659
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index a6dcdb6d13c1..46bf652c9169 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -170,8 +170,6 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
170 unsigned long ioba, unsigned long tce); 170 unsigned long ioba, unsigned long tce);
171extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 171extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
172 unsigned long ioba); 172 unsigned long ioba);
173extern struct kvm_rma_info *kvm_alloc_rma(void);
174extern void kvm_release_rma(struct kvm_rma_info *ri);
175extern struct page *kvm_alloc_hpt(unsigned long nr_pages); 173extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
176extern void kvm_release_hpt(struct page *page, unsigned long nr_pages); 174extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
177extern int kvmppc_core_init_vm(struct kvm *kvm); 175extern int kvmppc_core_init_vm(struct kvm *kvm);
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index c161ef3f28a1..24d78e1871c9 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -489,7 +489,6 @@ int main(void)
489 DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); 489 DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
490 DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); 490 DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
491 DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1)); 491 DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
492 DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
493 DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); 492 DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
494 DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls)); 493 DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
495 DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr)); 494 DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
@@ -499,6 +498,7 @@ int main(void)
499 DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); 498 DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
500 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); 499 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
501 DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty)); 500 DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
501 DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst));
502#endif 502#endif
503#ifdef CONFIG_PPC_BOOK3S 503#ifdef CONFIG_PPC_BOOK3S
504 DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); 504 DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 602eb51d20bc..f5769f19ae25 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -172,6 +172,7 @@ config KVM_XICS
172 depends on KVM_BOOK3S_64 && !KVM_MPIC 172 depends on KVM_BOOK3S_64 && !KVM_MPIC
173 select HAVE_KVM_IRQCHIP 173 select HAVE_KVM_IRQCHIP
174 select HAVE_KVM_IRQFD 174 select HAVE_KVM_IRQFD
175 default y
175 ---help--- 176 ---help---
176 Include support for the XICS (eXternal Interrupt Controller 177 Include support for the XICS (eXternal Interrupt Controller
177 Specification) interrupt controller architecture used on 178 Specification) interrupt controller architecture used on
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index b32db4b95361..888bf466d8c6 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -64,14 +64,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
64 { NULL } 64 { NULL }
65}; 65};
66 66
67void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
68{
69}
70
71void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
72{
73}
74
75void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu) 67void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
76{ 68{
77 if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { 69 if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index cd0b0730e29e..a2eb6d354a57 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -78,11 +78,6 @@ static inline bool sr_kp(u32 sr_raw)
78 return (sr_raw & 0x20000000) ? true: false; 78 return (sr_raw & 0x20000000) ? true: false;
79} 79}
80 80
81static inline bool sr_nx(u32 sr_raw)
82{
83 return (sr_raw & 0x10000000) ? true: false;
84}
85
86static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, 81static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
87 struct kvmppc_pte *pte, bool data, 82 struct kvmppc_pte *pte, bool data,
88 bool iswrite); 83 bool iswrite);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index d40770248b6a..534acb3c6c3d 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,8 +37,7 @@
37#include <asm/ppc-opcode.h> 37#include <asm/ppc-opcode.h>
38#include <asm/cputable.h> 38#include <asm/cputable.h>
39 39
40/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ 40#include "trace_hv.h"
41#define MAX_LPID_970 63
42 41
43/* Power architecture requires HPT is at least 256kB */ 42/* Power architecture requires HPT is at least 256kB */
44#define PPC_MIN_HPT_ORDER 18 43#define PPC_MIN_HPT_ORDER 18
@@ -229,14 +228,9 @@ int kvmppc_mmu_hv_init(void)
229 if (!cpu_has_feature(CPU_FTR_HVMODE)) 228 if (!cpu_has_feature(CPU_FTR_HVMODE))
230 return -EINVAL; 229 return -EINVAL;
231 230
232 /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */ 231 /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
233 if (cpu_has_feature(CPU_FTR_ARCH_206)) { 232 host_lpid = mfspr(SPRN_LPID);
234 host_lpid = mfspr(SPRN_LPID); /* POWER7 */ 233 rsvd_lpid = LPID_RSVD;
235 rsvd_lpid = LPID_RSVD;
236 } else {
237 host_lpid = 0; /* PPC970 */
238 rsvd_lpid = MAX_LPID_970;
239 }
240 234
241 kvmppc_init_lpid(rsvd_lpid + 1); 235 kvmppc_init_lpid(rsvd_lpid + 1);
242 236
@@ -259,130 +253,12 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
259 kvmppc_set_msr(vcpu, msr); 253 kvmppc_set_msr(vcpu, msr);
260} 254}
261 255
262/*
263 * This is called to get a reference to a guest page if there isn't
264 * one already in the memslot->arch.slot_phys[] array.
265 */
266static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
267 struct kvm_memory_slot *memslot,
268 unsigned long psize)
269{
270 unsigned long start;
271 long np, err;
272 struct page *page, *hpage, *pages[1];
273 unsigned long s, pgsize;
274 unsigned long *physp;
275 unsigned int is_io, got, pgorder;
276 struct vm_area_struct *vma;
277 unsigned long pfn, i, npages;
278
279 physp = memslot->arch.slot_phys;
280 if (!physp)
281 return -EINVAL;
282 if (physp[gfn - memslot->base_gfn])
283 return 0;
284
285 is_io = 0;
286 got = 0;
287 page = NULL;
288 pgsize = psize;
289 err = -EINVAL;
290 start = gfn_to_hva_memslot(memslot, gfn);
291
292 /* Instantiate and get the page we want access to */
293 np = get_user_pages_fast(start, 1, 1, pages);
294 if (np != 1) {
295 /* Look up the vma for the page */
296 down_read(&current->mm->mmap_sem);
297 vma = find_vma(current->mm, start);
298 if (!vma || vma->vm_start > start ||
299 start + psize > vma->vm_end ||
300 !(vma->vm_flags & VM_PFNMAP))
301 goto up_err;
302 is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
303 pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
304 /* check alignment of pfn vs. requested page size */
305 if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1)))
306 goto up_err;
307 up_read(&current->mm->mmap_sem);
308
309 } else {
310 page = pages[0];
311 got = KVMPPC_GOT_PAGE;
312
313 /* See if this is a large page */
314 s = PAGE_SIZE;
315 if (PageHuge(page)) {
316 hpage = compound_head(page);
317 s <<= compound_order(hpage);
318 /* Get the whole large page if slot alignment is ok */
319 if (s > psize && slot_is_aligned(memslot, s) &&
320 !(memslot->userspace_addr & (s - 1))) {
321 start &= ~(s - 1);
322 pgsize = s;
323 get_page(hpage);
324 put_page(page);
325 page = hpage;
326 }
327 }
328 if (s < psize)
329 goto out;
330 pfn = page_to_pfn(page);
331 }
332
333 npages = pgsize >> PAGE_SHIFT;
334 pgorder = __ilog2(npages);
335 physp += (gfn - memslot->base_gfn) & ~(npages - 1);
336 spin_lock(&kvm->arch.slot_phys_lock);
337 for (i = 0; i < npages; ++i) {
338 if (!physp[i]) {
339 physp[i] = ((pfn + i) << PAGE_SHIFT) +
340 got + is_io + pgorder;
341 got = 0;
342 }
343 }
344 spin_unlock(&kvm->arch.slot_phys_lock);
345 err = 0;
346
347 out:
348 if (got)
349 put_page(page);
350 return err;
351
352 up_err:
353 up_read(&current->mm->mmap_sem);
354 return err;
355}
356
357long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, 256long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
358 long pte_index, unsigned long pteh, 257 long pte_index, unsigned long pteh,
359 unsigned long ptel, unsigned long *pte_idx_ret) 258 unsigned long ptel, unsigned long *pte_idx_ret)
360{ 259{
361 unsigned long psize, gpa, gfn;
362 struct kvm_memory_slot *memslot;
363 long ret; 260 long ret;
364 261
365 if (kvm->arch.using_mmu_notifiers)
366 goto do_insert;
367
368 psize = hpte_page_size(pteh, ptel);
369 if (!psize)
370 return H_PARAMETER;
371
372 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
373
374 /* Find the memslot (if any) for this address */
375 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
376 gfn = gpa >> PAGE_SHIFT;
377 memslot = gfn_to_memslot(kvm, gfn);
378 if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) {
379 if (!slot_is_aligned(memslot, psize))
380 return H_PARAMETER;
381 if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0)
382 return H_PARAMETER;
383 }
384
385 do_insert:
386 /* Protect linux PTE lookup from page table destruction */ 262 /* Protect linux PTE lookup from page table destruction */
387 rcu_read_lock_sched(); /* this disables preemption too */ 263 rcu_read_lock_sched(); /* this disables preemption too */
388 ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel, 264 ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
@@ -397,19 +273,6 @@ long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
397 273
398} 274}
399 275
400/*
401 * We come here on a H_ENTER call from the guest when we are not
402 * using mmu notifiers and we don't have the requested page pinned
403 * already.
404 */
405long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
406 long pte_index, unsigned long pteh,
407 unsigned long ptel)
408{
409 return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index,
410 pteh, ptel, &vcpu->arch.gpr[4]);
411}
412
413static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu, 276static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
414 gva_t eaddr) 277 gva_t eaddr)
415{ 278{
@@ -494,7 +357,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
494 gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G)); 357 gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G));
495 358
496 /* Storage key permission check for POWER7 */ 359 /* Storage key permission check for POWER7 */
497 if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) { 360 if (data && virtmode) {
498 int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr); 361 int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr);
499 if (amrfield & 1) 362 if (amrfield & 1)
500 gpte->may_read = 0; 363 gpte->may_read = 0;
@@ -622,14 +485,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
622 gfn = gpa >> PAGE_SHIFT; 485 gfn = gpa >> PAGE_SHIFT;
623 memslot = gfn_to_memslot(kvm, gfn); 486 memslot = gfn_to_memslot(kvm, gfn);
624 487
488 trace_kvm_page_fault_enter(vcpu, hpte, memslot, ea, dsisr);
489
625 /* No memslot means it's an emulated MMIO region */ 490 /* No memslot means it's an emulated MMIO region */
626 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 491 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
627 return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, 492 return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
628 dsisr & DSISR_ISSTORE); 493 dsisr & DSISR_ISSTORE);
629 494
630 if (!kvm->arch.using_mmu_notifiers)
631 return -EFAULT; /* should never get here */
632
633 /* 495 /*
634 * This should never happen, because of the slot_is_aligned() 496 * This should never happen, because of the slot_is_aligned()
635 * check in kvmppc_do_h_enter(). 497 * check in kvmppc_do_h_enter().
@@ -641,6 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
641 mmu_seq = kvm->mmu_notifier_seq; 503 mmu_seq = kvm->mmu_notifier_seq;
642 smp_rmb(); 504 smp_rmb();
643 505
506 ret = -EFAULT;
644 is_io = 0; 507 is_io = 0;
645 pfn = 0; 508 pfn = 0;
646 page = NULL; 509 page = NULL;
@@ -664,7 +527,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
664 } 527 }
665 up_read(&current->mm->mmap_sem); 528 up_read(&current->mm->mmap_sem);
666 if (!pfn) 529 if (!pfn)
667 return -EFAULT; 530 goto out_put;
668 } else { 531 } else {
669 page = pages[0]; 532 page = pages[0];
670 pfn = page_to_pfn(page); 533 pfn = page_to_pfn(page);
@@ -694,14 +557,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
694 } 557 }
695 } 558 }
696 559
697 ret = -EFAULT;
698 if (psize > pte_size) 560 if (psize > pte_size)
699 goto out_put; 561 goto out_put;
700 562
701 /* Check WIMG vs. the actual page we're accessing */ 563 /* Check WIMG vs. the actual page we're accessing */
702 if (!hpte_cache_flags_ok(r, is_io)) { 564 if (!hpte_cache_flags_ok(r, is_io)) {
703 if (is_io) 565 if (is_io)
704 return -EFAULT; 566 goto out_put;
567
705 /* 568 /*
706 * Allow guest to map emulated device memory as 569 * Allow guest to map emulated device memory as
707 * uncacheable, but actually make it cacheable. 570 * uncacheable, but actually make it cacheable.
@@ -765,6 +628,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
765 SetPageDirty(page); 628 SetPageDirty(page);
766 629
767 out_put: 630 out_put:
631 trace_kvm_page_fault_exit(vcpu, hpte, ret);
632
768 if (page) { 633 if (page) {
769 /* 634 /*
770 * We drop pages[0] here, not page because page might 635 * We drop pages[0] here, not page because page might
@@ -895,8 +760,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
895 psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel); 760 psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
896 if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) && 761 if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
897 hpte_rpn(ptel, psize) == gfn) { 762 hpte_rpn(ptel, psize) == gfn) {
898 if (kvm->arch.using_mmu_notifiers) 763 hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
899 hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
900 kvmppc_invalidate_hpte(kvm, hptep, i); 764 kvmppc_invalidate_hpte(kvm, hptep, i);
901 /* Harvest R and C */ 765 /* Harvest R and C */
902 rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); 766 rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
@@ -914,15 +778,13 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
914 778
915int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva) 779int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
916{ 780{
917 if (kvm->arch.using_mmu_notifiers) 781 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
918 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
919 return 0; 782 return 0;
920} 783}
921 784
922int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end) 785int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
923{ 786{
924 if (kvm->arch.using_mmu_notifiers) 787 kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
925 kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
926 return 0; 788 return 0;
927} 789}
928 790
@@ -1004,8 +866,6 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1004 866
1005int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end) 867int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
1006{ 868{
1007 if (!kvm->arch.using_mmu_notifiers)
1008 return 0;
1009 return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp); 869 return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp);
1010} 870}
1011 871
@@ -1042,15 +902,11 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1042 902
1043int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva) 903int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)
1044{ 904{
1045 if (!kvm->arch.using_mmu_notifiers)
1046 return 0;
1047 return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); 905 return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
1048} 906}
1049 907
1050void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte) 908void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
1051{ 909{
1052 if (!kvm->arch.using_mmu_notifiers)
1053 return;
1054 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 910 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
1055} 911}
1056 912
@@ -1117,8 +973,11 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
1117 } 973 }
1118 974
1119 /* Now check and modify the HPTE */ 975 /* Now check and modify the HPTE */
1120 if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) 976 if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) {
977 /* unlock and continue */
978 hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
1121 continue; 979 continue;
980 }
1122 981
1123 /* need to make it temporarily absent so C is stable */ 982 /* need to make it temporarily absent so C is stable */
1124 hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); 983 hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
@@ -1206,35 +1065,17 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
1206 struct page *page, *pages[1]; 1065 struct page *page, *pages[1];
1207 int npages; 1066 int npages;
1208 unsigned long hva, offset; 1067 unsigned long hva, offset;
1209 unsigned long pa;
1210 unsigned long *physp;
1211 int srcu_idx; 1068 int srcu_idx;
1212 1069
1213 srcu_idx = srcu_read_lock(&kvm->srcu); 1070 srcu_idx = srcu_read_lock(&kvm->srcu);
1214 memslot = gfn_to_memslot(kvm, gfn); 1071 memslot = gfn_to_memslot(kvm, gfn);
1215 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 1072 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
1216 goto err; 1073 goto err;
1217 if (!kvm->arch.using_mmu_notifiers) { 1074 hva = gfn_to_hva_memslot(memslot, gfn);
1218 physp = memslot->arch.slot_phys; 1075 npages = get_user_pages_fast(hva, 1, 1, pages);
1219 if (!physp) 1076 if (npages < 1)
1220 goto err; 1077 goto err;
1221 physp += gfn - memslot->base_gfn; 1078 page = pages[0];
1222 pa = *physp;
1223 if (!pa) {
1224 if (kvmppc_get_guest_page(kvm, gfn, memslot,
1225 PAGE_SIZE) < 0)
1226 goto err;
1227 pa = *physp;
1228 }
1229 page = pfn_to_page(pa >> PAGE_SHIFT);
1230 get_page(page);
1231 } else {
1232 hva = gfn_to_hva_memslot(memslot, gfn);
1233 npages = get_user_pages_fast(hva, 1, 1, pages);
1234 if (npages < 1)
1235 goto err;
1236 page = pages[0];
1237 }
1238 srcu_read_unlock(&kvm->srcu, srcu_idx); 1079 srcu_read_unlock(&kvm->srcu, srcu_idx);
1239 1080
1240 offset = gpa & (PAGE_SIZE - 1); 1081 offset = gpa & (PAGE_SIZE - 1);
@@ -1258,7 +1099,7 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
1258 1099
1259 put_page(page); 1100 put_page(page);
1260 1101
1261 if (!dirty || !kvm->arch.using_mmu_notifiers) 1102 if (!dirty)
1262 return; 1103 return;
1263 1104
1264 /* We need to mark this page dirty in the rmap chain */ 1105 /* We need to mark this page dirty in the rmap chain */
@@ -1539,9 +1380,15 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
1539 hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); 1380 hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
1540 lbuf = (unsigned long __user *)buf; 1381 lbuf = (unsigned long __user *)buf;
1541 for (j = 0; j < hdr.n_valid; ++j) { 1382 for (j = 0; j < hdr.n_valid; ++j) {
1383 __be64 hpte_v;
1384 __be64 hpte_r;
1385
1542 err = -EFAULT; 1386 err = -EFAULT;
1543 if (__get_user(v, lbuf) || __get_user(r, lbuf + 1)) 1387 if (__get_user(hpte_v, lbuf) ||
1388 __get_user(hpte_r, lbuf + 1))
1544 goto out; 1389 goto out;
1390 v = be64_to_cpu(hpte_v);
1391 r = be64_to_cpu(hpte_r);
1545 err = -EINVAL; 1392 err = -EINVAL;
1546 if (!(v & HPTE_V_VALID)) 1393 if (!(v & HPTE_V_VALID))
1547 goto out; 1394 goto out;
@@ -1652,10 +1499,7 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
1652{ 1499{
1653 struct kvmppc_mmu *mmu = &vcpu->arch.mmu; 1500 struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
1654 1501
1655 if (cpu_has_feature(CPU_FTR_ARCH_206)) 1502 vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */
1656 vcpu->arch.slb_nr = 32; /* POWER7 */
1657 else
1658 vcpu->arch.slb_nr = 64;
1659 1503
1660 mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; 1504 mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
1661 mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; 1505 mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e63587d30b70..de4018a1bc4b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -58,6 +58,9 @@
58 58
59#include "book3s.h" 59#include "book3s.h"
60 60
61#define CREATE_TRACE_POINTS
62#include "trace_hv.h"
63
61/* #define EXIT_DEBUG */ 64/* #define EXIT_DEBUG */
62/* #define EXIT_DEBUG_SIMPLE */ 65/* #define EXIT_DEBUG_SIMPLE */
63/* #define EXIT_DEBUG_INT */ 66/* #define EXIT_DEBUG_INT */
@@ -135,11 +138,10 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
135 * stolen. 138 * stolen.
136 * 139 *
137 * Updates to busy_stolen are protected by arch.tbacct_lock; 140 * Updates to busy_stolen are protected by arch.tbacct_lock;
138 * updates to vc->stolen_tb are protected by the arch.tbacct_lock 141 * updates to vc->stolen_tb are protected by the vcore->stoltb_lock
139 * of the vcpu that has taken responsibility for running the vcore 142 * lock. The stolen times are measured in units of timebase ticks.
140 * (i.e. vc->runner). The stolen times are measured in units of 143 * (Note that the != TB_NIL checks below are purely defensive;
141 * timebase ticks. (Note that the != TB_NIL checks below are 144 * they should never fail.)
142 * purely defensive; they should never fail.)
143 */ 145 */
144 146
145static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) 147static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
@@ -147,12 +149,21 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
147 struct kvmppc_vcore *vc = vcpu->arch.vcore; 149 struct kvmppc_vcore *vc = vcpu->arch.vcore;
148 unsigned long flags; 150 unsigned long flags;
149 151
150 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); 152 /*
151 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE && 153 * We can test vc->runner without taking the vcore lock,
152 vc->preempt_tb != TB_NIL) { 154 * because only this task ever sets vc->runner to this
153 vc->stolen_tb += mftb() - vc->preempt_tb; 155 * vcpu, and once it is set to this vcpu, only this task
154 vc->preempt_tb = TB_NIL; 156 * ever sets it to NULL.
157 */
158 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
159 spin_lock_irqsave(&vc->stoltb_lock, flags);
160 if (vc->preempt_tb != TB_NIL) {
161 vc->stolen_tb += mftb() - vc->preempt_tb;
162 vc->preempt_tb = TB_NIL;
163 }
164 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
155 } 165 }
166 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
156 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && 167 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
157 vcpu->arch.busy_preempt != TB_NIL) { 168 vcpu->arch.busy_preempt != TB_NIL) {
158 vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt; 169 vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
@@ -166,9 +177,12 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
166 struct kvmppc_vcore *vc = vcpu->arch.vcore; 177 struct kvmppc_vcore *vc = vcpu->arch.vcore;
167 unsigned long flags; 178 unsigned long flags;
168 179
169 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); 180 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
170 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) 181 spin_lock_irqsave(&vc->stoltb_lock, flags);
171 vc->preempt_tb = mftb(); 182 vc->preempt_tb = mftb();
183 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
184 }
185 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
172 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) 186 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
173 vcpu->arch.busy_preempt = mftb(); 187 vcpu->arch.busy_preempt = mftb();
174 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); 188 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
@@ -191,9 +205,6 @@ int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
191 struct kvmppc_vcore *vc = vcpu->arch.vcore; 205 struct kvmppc_vcore *vc = vcpu->arch.vcore;
192 206
193 if (arch_compat) { 207 if (arch_compat) {
194 if (!cpu_has_feature(CPU_FTR_ARCH_206))
195 return -EINVAL; /* 970 has no compat mode support */
196
197 switch (arch_compat) { 208 switch (arch_compat) {
198 case PVR_ARCH_205: 209 case PVR_ARCH_205:
199 /* 210 /*
@@ -505,25 +516,14 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
505static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) 516static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
506{ 517{
507 u64 p; 518 u64 p;
519 unsigned long flags;
508 520
509 /* 521 spin_lock_irqsave(&vc->stoltb_lock, flags);
510 * If we are the task running the vcore, then since we hold 522 p = vc->stolen_tb;
511 * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb
512 * can't be updated, so we don't need the tbacct_lock.
513 * If the vcore is inactive, it can't become active (since we
514 * hold the vcore lock), so the vcpu load/put functions won't
515 * update stolen_tb/preempt_tb, and we don't need tbacct_lock.
516 */
517 if (vc->vcore_state != VCORE_INACTIVE && 523 if (vc->vcore_state != VCORE_INACTIVE &&
518 vc->runner->arch.run_task != current) { 524 vc->preempt_tb != TB_NIL)
519 spin_lock_irq(&vc->runner->arch.tbacct_lock); 525 p += now - vc->preempt_tb;
520 p = vc->stolen_tb; 526 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
521 if (vc->preempt_tb != TB_NIL)
522 p += now - vc->preempt_tb;
523 spin_unlock_irq(&vc->runner->arch.tbacct_lock);
524 } else {
525 p = vc->stolen_tb;
526 }
527 return p; 527 return p;
528} 528}
529 529
@@ -607,10 +607,45 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
607 } 607 }
608} 608}
609 609
610static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
611{
612 struct kvmppc_vcore *vcore = target->arch.vcore;
613
614 /*
615 * We expect to have been called by the real mode handler
616 * (kvmppc_rm_h_confer()) which would have directly returned
617 * H_SUCCESS if the source vcore wasn't idle (e.g. if it may
618 * have useful work to do and should not confer) so we don't
619 * recheck that here.
620 */
621
622 spin_lock(&vcore->lock);
623 if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
624 vcore->vcore_state != VCORE_INACTIVE)
625 target = vcore->runner;
626 spin_unlock(&vcore->lock);
627
628 return kvm_vcpu_yield_to(target);
629}
630
631static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
632{
633 int yield_count = 0;
634 struct lppaca *lppaca;
635
636 spin_lock(&vcpu->arch.vpa_update_lock);
637 lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr;
638 if (lppaca)
639 yield_count = lppaca->yield_count;
640 spin_unlock(&vcpu->arch.vpa_update_lock);
641 return yield_count;
642}
643
610int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) 644int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
611{ 645{
612 unsigned long req = kvmppc_get_gpr(vcpu, 3); 646 unsigned long req = kvmppc_get_gpr(vcpu, 3);
613 unsigned long target, ret = H_SUCCESS; 647 unsigned long target, ret = H_SUCCESS;
648 int yield_count;
614 struct kvm_vcpu *tvcpu; 649 struct kvm_vcpu *tvcpu;
615 int idx, rc; 650 int idx, rc;
616 651
@@ -619,14 +654,6 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
619 return RESUME_HOST; 654 return RESUME_HOST;
620 655
621 switch (req) { 656 switch (req) {
622 case H_ENTER:
623 idx = srcu_read_lock(&vcpu->kvm->srcu);
624 ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
625 kvmppc_get_gpr(vcpu, 5),
626 kvmppc_get_gpr(vcpu, 6),
627 kvmppc_get_gpr(vcpu, 7));
628 srcu_read_unlock(&vcpu->kvm->srcu, idx);
629 break;
630 case H_CEDE: 657 case H_CEDE:
631 break; 658 break;
632 case H_PROD: 659 case H_PROD:
@@ -654,7 +681,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
654 ret = H_PARAMETER; 681 ret = H_PARAMETER;
655 break; 682 break;
656 } 683 }
657 kvm_vcpu_yield_to(tvcpu); 684 yield_count = kvmppc_get_gpr(vcpu, 5);
685 if (kvmppc_get_yield_count(tvcpu) != yield_count)
686 break;
687 kvm_arch_vcpu_yield_to(tvcpu);
658 break; 688 break;
659 case H_REGISTER_VPA: 689 case H_REGISTER_VPA:
660 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4), 690 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
@@ -769,6 +799,8 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
769 vcpu->stat.ext_intr_exits++; 799 vcpu->stat.ext_intr_exits++;
770 r = RESUME_GUEST; 800 r = RESUME_GUEST;
771 break; 801 break;
802 /* HMI is hypervisor interrupt and host has handled it. Resume guest.*/
803 case BOOK3S_INTERRUPT_HMI:
772 case BOOK3S_INTERRUPT_PERFMON: 804 case BOOK3S_INTERRUPT_PERFMON:
773 r = RESUME_GUEST; 805 r = RESUME_GUEST;
774 break; 806 break;
@@ -837,6 +869,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
837 * Accordingly return to Guest or Host. 869 * Accordingly return to Guest or Host.
838 */ 870 */
839 case BOOK3S_INTERRUPT_H_EMUL_ASSIST: 871 case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
872 if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED)
873 vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ?
874 swab32(vcpu->arch.emul_inst) :
875 vcpu->arch.emul_inst;
840 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { 876 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
841 r = kvmppc_emulate_debug_inst(run, vcpu); 877 r = kvmppc_emulate_debug_inst(run, vcpu);
842 } else { 878 } else {
@@ -1357,6 +1393,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
1357 1393
1358 INIT_LIST_HEAD(&vcore->runnable_threads); 1394 INIT_LIST_HEAD(&vcore->runnable_threads);
1359 spin_lock_init(&vcore->lock); 1395 spin_lock_init(&vcore->lock);
1396 spin_lock_init(&vcore->stoltb_lock);
1360 init_waitqueue_head(&vcore->wq); 1397 init_waitqueue_head(&vcore->wq);
1361 vcore->preempt_tb = TB_NIL; 1398 vcore->preempt_tb = TB_NIL;
1362 vcore->lpcr = kvm->arch.lpcr; 1399 vcore->lpcr = kvm->arch.lpcr;
@@ -1694,9 +1731,11 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
1694 vc->n_woken = 0; 1731 vc->n_woken = 0;
1695 vc->nap_count = 0; 1732 vc->nap_count = 0;
1696 vc->entry_exit_count = 0; 1733 vc->entry_exit_count = 0;
1734 vc->preempt_tb = TB_NIL;
1697 vc->vcore_state = VCORE_STARTING; 1735 vc->vcore_state = VCORE_STARTING;
1698 vc->in_guest = 0; 1736 vc->in_guest = 0;
1699 vc->napping_threads = 0; 1737 vc->napping_threads = 0;
1738 vc->conferring_threads = 0;
1700 1739
1701 /* 1740 /*
1702 * Updating any of the vpas requires calling kvmppc_pin_guest_page, 1741 * Updating any of the vpas requires calling kvmppc_pin_guest_page,
@@ -1726,6 +1765,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
1726 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 1765 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
1727 kvmppc_start_thread(vcpu); 1766 kvmppc_start_thread(vcpu);
1728 kvmppc_create_dtl_entry(vcpu, vc); 1767 kvmppc_create_dtl_entry(vcpu, vc);
1768 trace_kvm_guest_enter(vcpu);
1729 } 1769 }
1730 1770
1731 /* Set this explicitly in case thread 0 doesn't have a vcpu */ 1771 /* Set this explicitly in case thread 0 doesn't have a vcpu */
@@ -1734,6 +1774,9 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
1734 1774
1735 vc->vcore_state = VCORE_RUNNING; 1775 vc->vcore_state = VCORE_RUNNING;
1736 preempt_disable(); 1776 preempt_disable();
1777
1778 trace_kvmppc_run_core(vc, 0);
1779
1737 spin_unlock(&vc->lock); 1780 spin_unlock(&vc->lock);
1738 1781
1739 kvm_guest_enter(); 1782 kvm_guest_enter();
@@ -1779,6 +1822,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
1779 kvmppc_core_pending_dec(vcpu)) 1822 kvmppc_core_pending_dec(vcpu))
1780 kvmppc_core_dequeue_dec(vcpu); 1823 kvmppc_core_dequeue_dec(vcpu);
1781 1824
1825 trace_kvm_guest_exit(vcpu);
1826
1782 ret = RESUME_GUEST; 1827 ret = RESUME_GUEST;
1783 if (vcpu->arch.trap) 1828 if (vcpu->arch.trap)
1784 ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu, 1829 ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
@@ -1804,6 +1849,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
1804 wake_up(&vcpu->arch.cpu_run); 1849 wake_up(&vcpu->arch.cpu_run);
1805 } 1850 }
1806 } 1851 }
1852
1853 trace_kvmppc_run_core(vc, 1);
1807} 1854}
1808 1855
1809/* 1856/*
@@ -1826,15 +1873,37 @@ static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
1826 */ 1873 */
1827static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) 1874static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
1828{ 1875{
1876 struct kvm_vcpu *vcpu;
1877 int do_sleep = 1;
1878
1829 DEFINE_WAIT(wait); 1879 DEFINE_WAIT(wait);
1830 1880
1831 prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE); 1881 prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
1882
1883 /*
1884 * Check one last time for pending exceptions and ceded state after
1885 * we put ourselves on the wait queue
1886 */
1887 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
1888 if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) {
1889 do_sleep = 0;
1890 break;
1891 }
1892 }
1893
1894 if (!do_sleep) {
1895 finish_wait(&vc->wq, &wait);
1896 return;
1897 }
1898
1832 vc->vcore_state = VCORE_SLEEPING; 1899 vc->vcore_state = VCORE_SLEEPING;
1900 trace_kvmppc_vcore_blocked(vc, 0);
1833 spin_unlock(&vc->lock); 1901 spin_unlock(&vc->lock);
1834 schedule(); 1902 schedule();
1835 finish_wait(&vc->wq, &wait); 1903 finish_wait(&vc->wq, &wait);
1836 spin_lock(&vc->lock); 1904 spin_lock(&vc->lock);
1837 vc->vcore_state = VCORE_INACTIVE; 1905 vc->vcore_state = VCORE_INACTIVE;
1906 trace_kvmppc_vcore_blocked(vc, 1);
1838} 1907}
1839 1908
1840static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1909static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
@@ -1843,6 +1912,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1843 struct kvmppc_vcore *vc; 1912 struct kvmppc_vcore *vc;
1844 struct kvm_vcpu *v, *vn; 1913 struct kvm_vcpu *v, *vn;
1845 1914
1915 trace_kvmppc_run_vcpu_enter(vcpu);
1916
1846 kvm_run->exit_reason = 0; 1917 kvm_run->exit_reason = 0;
1847 vcpu->arch.ret = RESUME_GUEST; 1918 vcpu->arch.ret = RESUME_GUEST;
1848 vcpu->arch.trap = 0; 1919 vcpu->arch.trap = 0;
@@ -1872,6 +1943,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1872 VCORE_EXIT_COUNT(vc) == 0) { 1943 VCORE_EXIT_COUNT(vc) == 0) {
1873 kvmppc_create_dtl_entry(vcpu, vc); 1944 kvmppc_create_dtl_entry(vcpu, vc);
1874 kvmppc_start_thread(vcpu); 1945 kvmppc_start_thread(vcpu);
1946 trace_kvm_guest_enter(vcpu);
1875 } else if (vc->vcore_state == VCORE_SLEEPING) { 1947 } else if (vc->vcore_state == VCORE_SLEEPING) {
1876 wake_up(&vc->wq); 1948 wake_up(&vc->wq);
1877 } 1949 }
@@ -1936,6 +2008,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1936 wake_up(&v->arch.cpu_run); 2008 wake_up(&v->arch.cpu_run);
1937 } 2009 }
1938 2010
2011 trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
1939 spin_unlock(&vc->lock); 2012 spin_unlock(&vc->lock);
1940 return vcpu->arch.ret; 2013 return vcpu->arch.ret;
1941} 2014}
@@ -1962,7 +2035,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
1962 /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */ 2035 /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
1963 smp_mb(); 2036 smp_mb();
1964 2037
1965 /* On the first time here, set up HTAB and VRMA or RMA */ 2038 /* On the first time here, set up HTAB and VRMA */
1966 if (!vcpu->kvm->arch.rma_setup_done) { 2039 if (!vcpu->kvm->arch.rma_setup_done) {
1967 r = kvmppc_hv_setup_htab_rma(vcpu); 2040 r = kvmppc_hv_setup_htab_rma(vcpu);
1968 if (r) 2041 if (r)
@@ -1981,7 +2054,9 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
1981 2054
1982 if (run->exit_reason == KVM_EXIT_PAPR_HCALL && 2055 if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
1983 !(vcpu->arch.shregs.msr & MSR_PR)) { 2056 !(vcpu->arch.shregs.msr & MSR_PR)) {
2057 trace_kvm_hcall_enter(vcpu);
1984 r = kvmppc_pseries_do_hcall(vcpu); 2058 r = kvmppc_pseries_do_hcall(vcpu);
2059 trace_kvm_hcall_exit(vcpu, r);
1985 kvmppc_core_prepare_to_enter(vcpu); 2060 kvmppc_core_prepare_to_enter(vcpu);
1986 } else if (r == RESUME_PAGE_FAULT) { 2061 } else if (r == RESUME_PAGE_FAULT) {
1987 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 2062 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -1997,98 +2072,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
1997 return r; 2072 return r;
1998} 2073}
1999 2074
2000
2001/* Work out RMLS (real mode limit selector) field value for a given RMA size.
2002 Assumes POWER7 or PPC970. */
2003static inline int lpcr_rmls(unsigned long rma_size)
2004{
2005 switch (rma_size) {
2006 case 32ul << 20: /* 32 MB */
2007 if (cpu_has_feature(CPU_FTR_ARCH_206))
2008 return 8; /* only supported on POWER7 */
2009 return -1;
2010 case 64ul << 20: /* 64 MB */
2011 return 3;
2012 case 128ul << 20: /* 128 MB */
2013 return 7;
2014 case 256ul << 20: /* 256 MB */
2015 return 4;
2016 case 1ul << 30: /* 1 GB */
2017 return 2;
2018 case 16ul << 30: /* 16 GB */
2019 return 1;
2020 case 256ul << 30: /* 256 GB */
2021 return 0;
2022 default:
2023 return -1;
2024 }
2025}
2026
2027static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2028{
2029 struct page *page;
2030 struct kvm_rma_info *ri = vma->vm_file->private_data;
2031
2032 if (vmf->pgoff >= kvm_rma_pages)
2033 return VM_FAULT_SIGBUS;
2034
2035 page = pfn_to_page(ri->base_pfn + vmf->pgoff);
2036 get_page(page);
2037 vmf->page = page;
2038 return 0;
2039}
2040
2041static const struct vm_operations_struct kvm_rma_vm_ops = {
2042 .fault = kvm_rma_fault,
2043};
2044
2045static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
2046{
2047 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
2048 vma->vm_ops = &kvm_rma_vm_ops;
2049 return 0;
2050}
2051
2052static int kvm_rma_release(struct inode *inode, struct file *filp)
2053{
2054 struct kvm_rma_info *ri = filp->private_data;
2055
2056 kvm_release_rma(ri);
2057 return 0;
2058}
2059
2060static const struct file_operations kvm_rma_fops = {
2061 .mmap = kvm_rma_mmap,
2062 .release = kvm_rma_release,
2063};
2064
2065static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
2066 struct kvm_allocate_rma *ret)
2067{
2068 long fd;
2069 struct kvm_rma_info *ri;
2070 /*
2071 * Only do this on PPC970 in HV mode
2072 */
2073 if (!cpu_has_feature(CPU_FTR_HVMODE) ||
2074 !cpu_has_feature(CPU_FTR_ARCH_201))
2075 return -EINVAL;
2076
2077 if (!kvm_rma_pages)
2078 return -EINVAL;
2079
2080 ri = kvm_alloc_rma();
2081 if (!ri)
2082 return -ENOMEM;
2083
2084 fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR | O_CLOEXEC);
2085 if (fd < 0)
2086 kvm_release_rma(ri);
2087
2088 ret->rma_size = kvm_rma_pages << PAGE_SHIFT;
2089 return fd;
2090}
2091
2092static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps, 2075static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
2093 int linux_psize) 2076 int linux_psize)
2094{ 2077{
@@ -2167,26 +2150,6 @@ out:
2167 return r; 2150 return r;
2168} 2151}
2169 2152
2170static void unpin_slot(struct kvm_memory_slot *memslot)
2171{
2172 unsigned long *physp;
2173 unsigned long j, npages, pfn;
2174 struct page *page;
2175
2176 physp = memslot->arch.slot_phys;
2177 npages = memslot->npages;
2178 if (!physp)
2179 return;
2180 for (j = 0; j < npages; j++) {
2181 if (!(physp[j] & KVMPPC_GOT_PAGE))
2182 continue;
2183 pfn = physp[j] >> PAGE_SHIFT;
2184 page = pfn_to_page(pfn);
2185 SetPageDirty(page);
2186 put_page(page);
2187 }
2188}
2189
2190static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free, 2153static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
2191 struct kvm_memory_slot *dont) 2154 struct kvm_memory_slot *dont)
2192{ 2155{
@@ -2194,11 +2157,6 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
2194 vfree(free->arch.rmap); 2157 vfree(free->arch.rmap);
2195 free->arch.rmap = NULL; 2158 free->arch.rmap = NULL;
2196 } 2159 }
2197 if (!dont || free->arch.slot_phys != dont->arch.slot_phys) {
2198 unpin_slot(free);
2199 vfree(free->arch.slot_phys);
2200 free->arch.slot_phys = NULL;
2201 }
2202} 2160}
2203 2161
2204static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, 2162static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
@@ -2207,7 +2165,6 @@ static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
2207 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); 2165 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
2208 if (!slot->arch.rmap) 2166 if (!slot->arch.rmap)
2209 return -ENOMEM; 2167 return -ENOMEM;
2210 slot->arch.slot_phys = NULL;
2211 2168
2212 return 0; 2169 return 0;
2213} 2170}
@@ -2216,17 +2173,6 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
2216 struct kvm_memory_slot *memslot, 2173 struct kvm_memory_slot *memslot,
2217 struct kvm_userspace_memory_region *mem) 2174 struct kvm_userspace_memory_region *mem)
2218{ 2175{
2219 unsigned long *phys;
2220
2221 /* Allocate a slot_phys array if needed */
2222 phys = memslot->arch.slot_phys;
2223 if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) {
2224 phys = vzalloc(memslot->npages * sizeof(unsigned long));
2225 if (!phys)
2226 return -ENOMEM;
2227 memslot->arch.slot_phys = phys;
2228 }
2229
2230 return 0; 2176 return 0;
2231} 2177}
2232 2178
@@ -2284,17 +2230,11 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
2284{ 2230{
2285 int err = 0; 2231 int err = 0;
2286 struct kvm *kvm = vcpu->kvm; 2232 struct kvm *kvm = vcpu->kvm;
2287 struct kvm_rma_info *ri = NULL;
2288 unsigned long hva; 2233 unsigned long hva;
2289 struct kvm_memory_slot *memslot; 2234 struct kvm_memory_slot *memslot;
2290 struct vm_area_struct *vma; 2235 struct vm_area_struct *vma;
2291 unsigned long lpcr = 0, senc; 2236 unsigned long lpcr = 0, senc;
2292 unsigned long lpcr_mask = 0;
2293 unsigned long psize, porder; 2237 unsigned long psize, porder;
2294 unsigned long rma_size;
2295 unsigned long rmls;
2296 unsigned long *physp;
2297 unsigned long i, npages;
2298 int srcu_idx; 2238 int srcu_idx;
2299 2239
2300 mutex_lock(&kvm->lock); 2240 mutex_lock(&kvm->lock);
@@ -2329,88 +2269,25 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
2329 psize = vma_kernel_pagesize(vma); 2269 psize = vma_kernel_pagesize(vma);
2330 porder = __ilog2(psize); 2270 porder = __ilog2(psize);
2331 2271
2332 /* Is this one of our preallocated RMAs? */
2333 if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
2334 hva == vma->vm_start)
2335 ri = vma->vm_file->private_data;
2336
2337 up_read(&current->mm->mmap_sem); 2272 up_read(&current->mm->mmap_sem);
2338 2273
2339 if (!ri) { 2274 /* We can handle 4k, 64k or 16M pages in the VRMA */
2340 /* On POWER7, use VRMA; on PPC970, give up */ 2275 err = -EINVAL;
2341 err = -EPERM; 2276 if (!(psize == 0x1000 || psize == 0x10000 ||
2342 if (cpu_has_feature(CPU_FTR_ARCH_201)) { 2277 psize == 0x1000000))
2343 pr_err("KVM: CPU requires an RMO\n"); 2278 goto out_srcu;
2344 goto out_srcu;
2345 }
2346 2279
2347 /* We can handle 4k, 64k or 16M pages in the VRMA */ 2280 /* Update VRMASD field in the LPCR */
2348 err = -EINVAL; 2281 senc = slb_pgsize_encoding(psize);
2349 if (!(psize == 0x1000 || psize == 0x10000 || 2282 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
2350 psize == 0x1000000)) 2283 (VRMA_VSID << SLB_VSID_SHIFT_1T);
2351 goto out_srcu; 2284 /* the -4 is to account for senc values starting at 0x10 */
2285 lpcr = senc << (LPCR_VRMASD_SH - 4);
2352 2286
2353 /* Update VRMASD field in the LPCR */ 2287 /* Create HPTEs in the hash page table for the VRMA */
2354 senc = slb_pgsize_encoding(psize); 2288 kvmppc_map_vrma(vcpu, memslot, porder);
2355 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
2356 (VRMA_VSID << SLB_VSID_SHIFT_1T);
2357 lpcr_mask = LPCR_VRMASD;
2358 /* the -4 is to account for senc values starting at 0x10 */
2359 lpcr = senc << (LPCR_VRMASD_SH - 4);
2360 2289
2361 /* Create HPTEs in the hash page table for the VRMA */ 2290 kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
2362 kvmppc_map_vrma(vcpu, memslot, porder);
2363
2364 } else {
2365 /* Set up to use an RMO region */
2366 rma_size = kvm_rma_pages;
2367 if (rma_size > memslot->npages)
2368 rma_size = memslot->npages;
2369 rma_size <<= PAGE_SHIFT;
2370 rmls = lpcr_rmls(rma_size);
2371 err = -EINVAL;
2372 if ((long)rmls < 0) {
2373 pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
2374 goto out_srcu;
2375 }
2376 atomic_inc(&ri->use_count);
2377 kvm->arch.rma = ri;
2378
2379 /* Update LPCR and RMOR */
2380 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
2381 /* PPC970; insert RMLS value (split field) in HID4 */
2382 lpcr_mask = (1ul << HID4_RMLS0_SH) |
2383 (3ul << HID4_RMLS2_SH) | HID4_RMOR;
2384 lpcr = ((rmls >> 2) << HID4_RMLS0_SH) |
2385 ((rmls & 3) << HID4_RMLS2_SH);
2386 /* RMOR is also in HID4 */
2387 lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
2388 << HID4_RMOR_SH;
2389 } else {
2390 /* POWER7 */
2391 lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS;
2392 lpcr = rmls << LPCR_RMLS_SH;
2393 kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
2394 }
2395 pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
2396 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
2397
2398 /* Initialize phys addrs of pages in RMO */
2399 npages = kvm_rma_pages;
2400 porder = __ilog2(npages);
2401 physp = memslot->arch.slot_phys;
2402 if (physp) {
2403 if (npages > memslot->npages)
2404 npages = memslot->npages;
2405 spin_lock(&kvm->arch.slot_phys_lock);
2406 for (i = 0; i < npages; ++i)
2407 physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) +
2408 porder;
2409 spin_unlock(&kvm->arch.slot_phys_lock);
2410 }
2411 }
2412
2413 kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
2414 2291
2415 /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ 2292 /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
2416 smp_wmb(); 2293 smp_wmb();
@@ -2449,35 +2326,21 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
2449 memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls, 2326 memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
2450 sizeof(kvm->arch.enabled_hcalls)); 2327 sizeof(kvm->arch.enabled_hcalls));
2451 2328
2452 kvm->arch.rma = NULL;
2453
2454 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); 2329 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
2455 2330
2456 if (cpu_has_feature(CPU_FTR_ARCH_201)) { 2331 /* Init LPCR for virtual RMA mode */
2457 /* PPC970; HID4 is effectively the LPCR */ 2332 kvm->arch.host_lpid = mfspr(SPRN_LPID);
2458 kvm->arch.host_lpid = 0; 2333 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
2459 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4); 2334 lpcr &= LPCR_PECE | LPCR_LPES;
2460 lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH)); 2335 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
2461 lpcr |= ((lpid >> 4) << HID4_LPID1_SH) | 2336 LPCR_VPM0 | LPCR_VPM1;
2462 ((lpid & 0xf) << HID4_LPID5_SH); 2337 kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
2463 } else { 2338 (VRMA_VSID << SLB_VSID_SHIFT_1T);
2464 /* POWER7; init LPCR for virtual RMA mode */ 2339 /* On POWER8 turn on online bit to enable PURR/SPURR */
2465 kvm->arch.host_lpid = mfspr(SPRN_LPID); 2340 if (cpu_has_feature(CPU_FTR_ARCH_207S))
2466 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); 2341 lpcr |= LPCR_ONL;
2467 lpcr &= LPCR_PECE | LPCR_LPES;
2468 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
2469 LPCR_VPM0 | LPCR_VPM1;
2470 kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
2471 (VRMA_VSID << SLB_VSID_SHIFT_1T);
2472 /* On POWER8 turn on online bit to enable PURR/SPURR */
2473 if (cpu_has_feature(CPU_FTR_ARCH_207S))
2474 lpcr |= LPCR_ONL;
2475 }
2476 kvm->arch.lpcr = lpcr; 2342 kvm->arch.lpcr = lpcr;
2477 2343
2478 kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
2479 spin_lock_init(&kvm->arch.slot_phys_lock);
2480
2481 /* 2344 /*
2482 * Track that we now have a HV mode VM active. This blocks secondary 2345 * Track that we now have a HV mode VM active. This blocks secondary
2483 * CPU threads from coming online. 2346 * CPU threads from coming online.
@@ -2507,10 +2370,6 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
2507 kvm_hv_vm_deactivated(); 2370 kvm_hv_vm_deactivated();
2508 2371
2509 kvmppc_free_vcores(kvm); 2372 kvmppc_free_vcores(kvm);
2510 if (kvm->arch.rma) {
2511 kvm_release_rma(kvm->arch.rma);
2512 kvm->arch.rma = NULL;
2513 }
2514 2373
2515 kvmppc_free_hpt(kvm); 2374 kvmppc_free_hpt(kvm);
2516} 2375}
@@ -2536,7 +2395,8 @@ static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
2536 2395
2537static int kvmppc_core_check_processor_compat_hv(void) 2396static int kvmppc_core_check_processor_compat_hv(void)
2538{ 2397{
2539 if (!cpu_has_feature(CPU_FTR_HVMODE)) 2398 if (!cpu_has_feature(CPU_FTR_HVMODE) ||
2399 !cpu_has_feature(CPU_FTR_ARCH_206))
2540 return -EIO; 2400 return -EIO;
2541 return 0; 2401 return 0;
2542} 2402}
@@ -2550,16 +2410,6 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
2550 2410
2551 switch (ioctl) { 2411 switch (ioctl) {
2552 2412
2553 case KVM_ALLOCATE_RMA: {
2554 struct kvm_allocate_rma rma;
2555 struct kvm *kvm = filp->private_data;
2556
2557 r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
2558 if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
2559 r = -EFAULT;
2560 break;
2561 }
2562
2563 case KVM_PPC_ALLOCATE_HTAB: { 2413 case KVM_PPC_ALLOCATE_HTAB: {
2564 u32 htab_order; 2414 u32 htab_order;
2565 2415
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 3f1bb5a36c27..1f083ff8a61a 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -16,6 +16,7 @@
16#include <linux/memblock.h> 16#include <linux/memblock.h>
17#include <linux/sizes.h> 17#include <linux/sizes.h>
18#include <linux/cma.h> 18#include <linux/cma.h>
19#include <linux/bitops.h>
19 20
20#include <asm/cputable.h> 21#include <asm/cputable.h>
21#include <asm/kvm_ppc.h> 22#include <asm/kvm_ppc.h>
@@ -32,95 +33,9 @@
32 * By default we reserve 5% of memory for hash pagetable allocation. 33 * By default we reserve 5% of memory for hash pagetable allocation.
33 */ 34 */
34static unsigned long kvm_cma_resv_ratio = 5; 35static unsigned long kvm_cma_resv_ratio = 5;
35/*
36 * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
37 * Each RMA has to be physically contiguous and of a size that the
38 * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB,
39 * and other larger sizes. Since we are unlikely to be allocate that
40 * much physically contiguous memory after the system is up and running,
41 * we preallocate a set of RMAs in early boot using CMA.
42 * should be power of 2.
43 */
44unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */
45EXPORT_SYMBOL_GPL(kvm_rma_pages);
46 36
47static struct cma *kvm_cma; 37static struct cma *kvm_cma;
48 38
49/* Work out RMLS (real mode limit selector) field value for a given RMA size.
50 Assumes POWER7 or PPC970. */
51static inline int lpcr_rmls(unsigned long rma_size)
52{
53 switch (rma_size) {
54 case 32ul << 20: /* 32 MB */
55 if (cpu_has_feature(CPU_FTR_ARCH_206))
56 return 8; /* only supported on POWER7 */
57 return -1;
58 case 64ul << 20: /* 64 MB */
59 return 3;
60 case 128ul << 20: /* 128 MB */
61 return 7;
62 case 256ul << 20: /* 256 MB */
63 return 4;
64 case 1ul << 30: /* 1 GB */
65 return 2;
66 case 16ul << 30: /* 16 GB */
67 return 1;
68 case 256ul << 30: /* 256 GB */
69 return 0;
70 default:
71 return -1;
72 }
73}
74
75static int __init early_parse_rma_size(char *p)
76{
77 unsigned long kvm_rma_size;
78
79 pr_debug("%s(%s)\n", __func__, p);
80 if (!p)
81 return -EINVAL;
82 kvm_rma_size = memparse(p, &p);
83 /*
84 * Check that the requested size is one supported in hardware
85 */
86 if (lpcr_rmls(kvm_rma_size) < 0) {
87 pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
88 return -EINVAL;
89 }
90 kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
91 return 0;
92}
93early_param("kvm_rma_size", early_parse_rma_size);
94
95struct kvm_rma_info *kvm_alloc_rma()
96{
97 struct page *page;
98 struct kvm_rma_info *ri;
99
100 ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
101 if (!ri)
102 return NULL;
103 page = cma_alloc(kvm_cma, kvm_rma_pages, order_base_2(kvm_rma_pages));
104 if (!page)
105 goto err_out;
106 atomic_set(&ri->use_count, 1);
107 ri->base_pfn = page_to_pfn(page);
108 return ri;
109err_out:
110 kfree(ri);
111 return NULL;
112}
113EXPORT_SYMBOL_GPL(kvm_alloc_rma);
114
115void kvm_release_rma(struct kvm_rma_info *ri)
116{
117 if (atomic_dec_and_test(&ri->use_count)) {
118 cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages);
119 kfree(ri);
120 }
121}
122EXPORT_SYMBOL_GPL(kvm_release_rma);
123
124static int __init early_parse_kvm_cma_resv(char *p) 39static int __init early_parse_kvm_cma_resv(char *p)
125{ 40{
126 pr_debug("%s(%s)\n", __func__, p); 41 pr_debug("%s(%s)\n", __func__, p);
@@ -132,14 +47,9 @@ early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
132 47
133struct page *kvm_alloc_hpt(unsigned long nr_pages) 48struct page *kvm_alloc_hpt(unsigned long nr_pages)
134{ 49{
135 unsigned long align_pages = HPT_ALIGN_PAGES;
136
137 VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); 50 VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
138 51
139 /* Old CPUs require HPT aligned on a multiple of its size */ 52 return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES));
140 if (!cpu_has_feature(CPU_FTR_ARCH_206))
141 align_pages = nr_pages;
142 return cma_alloc(kvm_cma, nr_pages, order_base_2(align_pages));
143} 53}
144EXPORT_SYMBOL_GPL(kvm_alloc_hpt); 54EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
145 55
@@ -180,22 +90,44 @@ void __init kvm_cma_reserve(void)
180 if (selected_size) { 90 if (selected_size) {
181 pr_debug("%s: reserving %ld MiB for global area\n", __func__, 91 pr_debug("%s: reserving %ld MiB for global area\n", __func__,
182 (unsigned long)selected_size / SZ_1M); 92 (unsigned long)selected_size / SZ_1M);
183 /* 93 align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
184 * Old CPUs require HPT aligned on a multiple of its size. So for them
185 * make the alignment as max size we could request.
186 */
187 if (!cpu_has_feature(CPU_FTR_ARCH_206))
188 align_size = __rounddown_pow_of_two(selected_size);
189 else
190 align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
191
192 align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
193 cma_declare_contiguous(0, selected_size, 0, align_size, 94 cma_declare_contiguous(0, selected_size, 0, align_size,
194 KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma); 95 KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
195 } 96 }
196} 97}
197 98
198/* 99/*
100 * Real-mode H_CONFER implementation.
101 * We check if we are the only vcpu out of this virtual core
102 * still running in the guest and not ceded. If so, we pop up
103 * to the virtual-mode implementation; if not, just return to
104 * the guest.
105 */
106long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
107 unsigned int yield_count)
108{
109 struct kvmppc_vcore *vc = vcpu->arch.vcore;
110 int threads_running;
111 int threads_ceded;
112 int threads_conferring;
113 u64 stop = get_tb() + 10 * tb_ticks_per_usec;
114 int rv = H_SUCCESS; /* => don't yield */
115
116 set_bit(vcpu->arch.ptid, &vc->conferring_threads);
117 while ((get_tb() < stop) && (VCORE_EXIT_COUNT(vc) == 0)) {
118 threads_running = VCORE_ENTRY_COUNT(vc);
119 threads_ceded = hweight32(vc->napping_threads);
120 threads_conferring = hweight32(vc->conferring_threads);
121 if (threads_ceded + threads_conferring >= threads_running) {
122 rv = H_TOO_HARD; /* => do yield */
123 break;
124 }
125 }
126 clear_bit(vcpu->arch.ptid, &vc->conferring_threads);
127 return rv;
128}
129
130/*
199 * When running HV mode KVM we need to block certain operations while KVM VMs 131 * When running HV mode KVM we need to block certain operations while KVM VMs
200 * exist in the system. We use a counter of VMs to track this. 132 * exist in the system. We use a counter of VMs to track this.
201 * 133 *
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 731be7478b27..36540a99d178 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -52,10 +52,8 @@ _GLOBAL(__kvmppc_vcore_entry)
52 std r3, _CCR(r1) 52 std r3, _CCR(r1)
53 53
54 /* Save host DSCR */ 54 /* Save host DSCR */
55BEGIN_FTR_SECTION
56 mfspr r3, SPRN_DSCR 55 mfspr r3, SPRN_DSCR
57 std r3, HSTATE_DSCR(r13) 56 std r3, HSTATE_DSCR(r13)
58END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
59 57
60BEGIN_FTR_SECTION 58BEGIN_FTR_SECTION
61 /* Save host DABR */ 59 /* Save host DABR */
@@ -84,11 +82,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
84 mfspr r7, SPRN_MMCR0 /* save MMCR0 */ 82 mfspr r7, SPRN_MMCR0 /* save MMCR0 */
85 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */ 83 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
86 mfspr r6, SPRN_MMCRA 84 mfspr r6, SPRN_MMCRA
87BEGIN_FTR_SECTION 85 /* Clear MMCRA in order to disable SDAR updates */
88 /* On P7, clear MMCRA in order to disable SDAR updates */
89 li r5, 0 86 li r5, 0
90 mtspr SPRN_MMCRA, r5 87 mtspr SPRN_MMCRA, r5
91END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
92 isync 88 isync
93 ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ 89 ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
94 lbz r5, LPPACA_PMCINUSE(r3) 90 lbz r5, LPPACA_PMCINUSE(r3)
@@ -113,20 +109,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
113 mfspr r7, SPRN_PMC4 109 mfspr r7, SPRN_PMC4
114 mfspr r8, SPRN_PMC5 110 mfspr r8, SPRN_PMC5
115 mfspr r9, SPRN_PMC6 111 mfspr r9, SPRN_PMC6
116BEGIN_FTR_SECTION
117 mfspr r10, SPRN_PMC7
118 mfspr r11, SPRN_PMC8
119END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
120 stw r3, HSTATE_PMC(r13) 112 stw r3, HSTATE_PMC(r13)
121 stw r5, HSTATE_PMC + 4(r13) 113 stw r5, HSTATE_PMC + 4(r13)
122 stw r6, HSTATE_PMC + 8(r13) 114 stw r6, HSTATE_PMC + 8(r13)
123 stw r7, HSTATE_PMC + 12(r13) 115 stw r7, HSTATE_PMC + 12(r13)
124 stw r8, HSTATE_PMC + 16(r13) 116 stw r8, HSTATE_PMC + 16(r13)
125 stw r9, HSTATE_PMC + 20(r13) 117 stw r9, HSTATE_PMC + 20(r13)
126BEGIN_FTR_SECTION
127 stw r10, HSTATE_PMC + 24(r13)
128 stw r11, HSTATE_PMC + 28(r13)
129END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
13031: 11831:
131 119
132 /* 120 /*
@@ -140,31 +128,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
140 add r8,r8,r7 128 add r8,r8,r7
141 std r8,HSTATE_DECEXP(r13) 129 std r8,HSTATE_DECEXP(r13)
142 130
143#ifdef CONFIG_SMP
144 /*
145 * On PPC970, if the guest vcpu has an external interrupt pending,
146 * send ourselves an IPI so as to interrupt the guest once it
147 * enables interrupts. (It must have interrupts disabled,
148 * otherwise we would already have delivered the interrupt.)
149 *
150 * XXX If this is a UP build, smp_send_reschedule is not available,
151 * so the interrupt will be delayed until the next time the vcpu
152 * enters the guest with interrupts enabled.
153 */
154BEGIN_FTR_SECTION
155 ld r4, HSTATE_KVM_VCPU(r13)
156 ld r0, VCPU_PENDING_EXC(r4)
157 li r7, (1 << BOOK3S_IRQPRIO_EXTERNAL)
158 oris r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
159 and. r0, r0, r7
160 beq 32f
161 lhz r3, PACAPACAINDEX(r13)
162 bl smp_send_reschedule
163 nop
16432:
165END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
166#endif /* CONFIG_SMP */
167
168 /* Jump to partition switch code */ 131 /* Jump to partition switch code */
169 bl kvmppc_hv_entry_trampoline 132 bl kvmppc_hv_entry_trampoline
170 nop 133 nop
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index d562c8e2bc30..60081bd75847 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -138,8 +138,5 @@ out:
138 138
139long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) 139long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
140{ 140{
141 if (cpu_has_feature(CPU_FTR_ARCH_206)) 141 return kvmppc_realmode_mc_power7(vcpu);
142 return kvmppc_realmode_mc_power7(vcpu);
143
144 return 0;
145} 142}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 084ad54c73cd..510bdfbc4073 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -45,16 +45,12 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags)
45 * as indicated by local_paca->kvm_hstate.kvm_vcpu being set, 45 * as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
46 * we can use tlbiel as long as we mark all other physical 46 * we can use tlbiel as long as we mark all other physical
47 * cores as potentially having stale TLB entries for this lpid. 47 * cores as potentially having stale TLB entries for this lpid.
48 * If we're not using MMU notifiers, we never take pages away
49 * from the guest, so we can use tlbiel if requested.
50 * Otherwise, don't use tlbiel. 48 * Otherwise, don't use tlbiel.
51 */ 49 */
52 if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu) 50 if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
53 global = 0; 51 global = 0;
54 else if (kvm->arch.using_mmu_notifiers)
55 global = 1;
56 else 52 else
57 global = !(flags & H_LOCAL); 53 global = 1;
58 54
59 if (!global) { 55 if (!global) {
60 /* any other core might now have stale TLB entries... */ 56 /* any other core might now have stale TLB entries... */
@@ -170,7 +166,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
170 struct revmap_entry *rev; 166 struct revmap_entry *rev;
171 unsigned long g_ptel; 167 unsigned long g_ptel;
172 struct kvm_memory_slot *memslot; 168 struct kvm_memory_slot *memslot;
173 unsigned long *physp, pte_size; 169 unsigned long pte_size;
174 unsigned long is_io; 170 unsigned long is_io;
175 unsigned long *rmap; 171 unsigned long *rmap;
176 pte_t pte; 172 pte_t pte;
@@ -198,9 +194,6 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
198 is_io = ~0ul; 194 is_io = ~0ul;
199 rmap = NULL; 195 rmap = NULL;
200 if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { 196 if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
201 /* PPC970 can't do emulated MMIO */
202 if (!cpu_has_feature(CPU_FTR_ARCH_206))
203 return H_PARAMETER;
204 /* Emulated MMIO - mark this with key=31 */ 197 /* Emulated MMIO - mark this with key=31 */
205 pteh |= HPTE_V_ABSENT; 198 pteh |= HPTE_V_ABSENT;
206 ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO; 199 ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
@@ -213,37 +206,20 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
213 slot_fn = gfn - memslot->base_gfn; 206 slot_fn = gfn - memslot->base_gfn;
214 rmap = &memslot->arch.rmap[slot_fn]; 207 rmap = &memslot->arch.rmap[slot_fn];
215 208
216 if (!kvm->arch.using_mmu_notifiers) { 209 /* Translate to host virtual address */
217 physp = memslot->arch.slot_phys; 210 hva = __gfn_to_hva_memslot(memslot, gfn);
218 if (!physp) 211
219 return H_PARAMETER; 212 /* Look up the Linux PTE for the backing page */
220 physp += slot_fn; 213 pte_size = psize;
221 if (realmode) 214 pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size);
222 physp = real_vmalloc_addr(physp); 215 if (pte_present(pte) && !pte_numa(pte)) {
223 pa = *physp; 216 if (writing && !pte_write(pte))
224 if (!pa) 217 /* make the actual HPTE be read-only */
225 return H_TOO_HARD; 218 ptel = hpte_make_readonly(ptel);
226 is_io = pa & (HPTE_R_I | HPTE_R_W); 219 is_io = hpte_cache_bits(pte_val(pte));
227 pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK); 220 pa = pte_pfn(pte) << PAGE_SHIFT;
228 pa &= PAGE_MASK; 221 pa |= hva & (pte_size - 1);
229 pa |= gpa & ~PAGE_MASK; 222 pa |= gpa & ~PAGE_MASK;
230 } else {
231 /* Translate to host virtual address */
232 hva = __gfn_to_hva_memslot(memslot, gfn);
233
234 /* Look up the Linux PTE for the backing page */
235 pte_size = psize;
236 pte = lookup_linux_pte_and_update(pgdir, hva, writing,
237 &pte_size);
238 if (pte_present(pte) && !pte_numa(pte)) {
239 if (writing && !pte_write(pte))
240 /* make the actual HPTE be read-only */
241 ptel = hpte_make_readonly(ptel);
242 is_io = hpte_cache_bits(pte_val(pte));
243 pa = pte_pfn(pte) << PAGE_SHIFT;
244 pa |= hva & (pte_size - 1);
245 pa |= gpa & ~PAGE_MASK;
246 }
247 } 223 }
248 224
249 if (pte_size < psize) 225 if (pte_size < psize)
@@ -337,8 +313,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
337 rmap = real_vmalloc_addr(rmap); 313 rmap = real_vmalloc_addr(rmap);
338 lock_rmap(rmap); 314 lock_rmap(rmap);
339 /* Check for pending invalidations under the rmap chain lock */ 315 /* Check for pending invalidations under the rmap chain lock */
340 if (kvm->arch.using_mmu_notifiers && 316 if (mmu_notifier_retry(kvm, mmu_seq)) {
341 mmu_notifier_retry(kvm, mmu_seq)) {
342 /* inval in progress, write a non-present HPTE */ 317 /* inval in progress, write a non-present HPTE */
343 pteh |= HPTE_V_ABSENT; 318 pteh |= HPTE_V_ABSENT;
344 pteh &= ~HPTE_V_VALID; 319 pteh &= ~HPTE_V_VALID;
@@ -395,61 +370,11 @@ static inline int try_lock_tlbie(unsigned int *lock)
395 return old == 0; 370 return old == 0;
396} 371}
397 372
398/*
399 * tlbie/tlbiel is a bit different on the PPC970 compared to later
400 * processors such as POWER7; the large page bit is in the instruction
401 * not RB, and the top 16 bits and the bottom 12 bits of the VA
402 * in RB must be 0.
403 */
404static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues,
405 long npages, int global, bool need_sync)
406{
407 long i;
408
409 if (global) {
410 while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
411 cpu_relax();
412 if (need_sync)
413 asm volatile("ptesync" : : : "memory");
414 for (i = 0; i < npages; ++i) {
415 unsigned long rb = rbvalues[i];
416
417 if (rb & 1) /* large page */
418 asm volatile("tlbie %0,1" : :
419 "r" (rb & 0x0000fffffffff000ul));
420 else
421 asm volatile("tlbie %0,0" : :
422 "r" (rb & 0x0000fffffffff000ul));
423 }
424 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
425 kvm->arch.tlbie_lock = 0;
426 } else {
427 if (need_sync)
428 asm volatile("ptesync" : : : "memory");
429 for (i = 0; i < npages; ++i) {
430 unsigned long rb = rbvalues[i];
431
432 if (rb & 1) /* large page */
433 asm volatile("tlbiel %0,1" : :
434 "r" (rb & 0x0000fffffffff000ul));
435 else
436 asm volatile("tlbiel %0,0" : :
437 "r" (rb & 0x0000fffffffff000ul));
438 }
439 asm volatile("ptesync" : : : "memory");
440 }
441}
442
443static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, 373static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
444 long npages, int global, bool need_sync) 374 long npages, int global, bool need_sync)
445{ 375{
446 long i; 376 long i;
447 377
448 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
449 /* PPC970 tlbie instruction is a bit different */
450 do_tlbies_970(kvm, rbvalues, npages, global, need_sync);
451 return;
452 }
453 if (global) { 378 if (global) {
454 while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) 379 while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
455 cpu_relax(); 380 cpu_relax();
@@ -667,40 +592,29 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
667 rev->guest_rpte = r; 592 rev->guest_rpte = r;
668 note_hpte_modification(kvm, rev); 593 note_hpte_modification(kvm, rev);
669 } 594 }
670 r = (be64_to_cpu(hpte[1]) & ~mask) | bits;
671 595
672 /* Update HPTE */ 596 /* Update HPTE */
673 if (v & HPTE_V_VALID) { 597 if (v & HPTE_V_VALID) {
674 rb = compute_tlbie_rb(v, r, pte_index);
675 hpte[0] = cpu_to_be64(v & ~HPTE_V_VALID);
676 do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
677 /* 598 /*
678 * If the host has this page as readonly but the guest 599 * If the page is valid, don't let it transition from
679 * wants to make it read/write, reduce the permissions. 600 * readonly to writable. If it should be writable, we'll
680 * Checking the host permissions involves finding the 601 * take a trap and let the page fault code sort it out.
681 * memslot and then the Linux PTE for the page.
682 */ 602 */
683 if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) { 603 pte = be64_to_cpu(hpte[1]);
684 unsigned long psize, gfn, hva; 604 r = (pte & ~mask) | bits;
685 struct kvm_memory_slot *memslot; 605 if (hpte_is_writable(r) && !hpte_is_writable(pte))
686 pgd_t *pgdir = vcpu->arch.pgdir; 606 r = hpte_make_readonly(r);
687 pte_t pte; 607 /* If the PTE is changing, invalidate it first */
688 608 if (r != pte) {
689 psize = hpte_page_size(v, r); 609 rb = compute_tlbie_rb(v, r, pte_index);
690 gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; 610 hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) |
691 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 611 HPTE_V_ABSENT);
692 if (memslot) { 612 do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
693 hva = __gfn_to_hva_memslot(memslot, gfn); 613 true);
694 pte = lookup_linux_pte_and_update(pgdir, hva, 614 hpte[1] = cpu_to_be64(r);
695 1, &psize);
696 if (pte_present(pte) && !pte_write(pte))
697 r = hpte_make_readonly(r);
698 }
699 } 615 }
700 } 616 }
701 hpte[1] = cpu_to_be64(r); 617 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
702 eieio();
703 hpte[0] = cpu_to_be64(v & ~HPTE_V_HVLOCK);
704 asm volatile("ptesync" : : : "memory"); 618 asm volatile("ptesync" : : : "memory");
705 return H_SUCCESS; 619 return H_SUCCESS;
706} 620}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 3ee38e6e884f..7b066f6b02ad 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -183,8 +183,10 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
183 * state update in HW (ie bus transactions) so we can handle them 183 * state update in HW (ie bus transactions) so we can handle them
184 * separately here as well. 184 * separately here as well.
185 */ 185 */
186 if (resend) 186 if (resend) {
187 icp->rm_action |= XICS_RM_CHECK_RESEND; 187 icp->rm_action |= XICS_RM_CHECK_RESEND;
188 icp->rm_resend_icp = icp;
189 }
188} 190}
189 191
190 192
@@ -254,10 +256,25 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
254 * nothing needs to be done as there can be no XISR to 256 * nothing needs to be done as there can be no XISR to
255 * reject. 257 * reject.
256 * 258 *
259 * ICP state: Check_IPI
260 *
257 * If the CPPR is less favored, then we might be replacing 261 * If the CPPR is less favored, then we might be replacing
258 * an interrupt, and thus need to possibly reject it as in 262 * an interrupt, and thus need to possibly reject it.
259 * 263 *
260 * ICP state: Check_IPI 264 * ICP State: IPI
265 *
266 * Besides rejecting any pending interrupts, we also
267 * update XISR and pending_pri to mark IPI as pending.
268 *
269 * PAPR does not describe this state, but if the MFRR is being
270 * made less favored than its earlier value, there might be
271 * a previously-rejected interrupt needing to be resent.
272 * Ideally, we would want to resend only if
273 * prio(pending_interrupt) < mfrr &&
274 * prio(pending_interrupt) < cppr
275 * where pending interrupt is the one that was rejected. But
276 * we don't have that state, so we simply trigger a resend
277 * whenever the MFRR is made less favored.
261 */ 278 */
262 do { 279 do {
263 old_state = new_state = ACCESS_ONCE(icp->state); 280 old_state = new_state = ACCESS_ONCE(icp->state);
@@ -270,13 +287,14 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
270 resend = false; 287 resend = false;
271 if (mfrr < new_state.cppr) { 288 if (mfrr < new_state.cppr) {
272 /* Reject a pending interrupt if not an IPI */ 289 /* Reject a pending interrupt if not an IPI */
273 if (mfrr <= new_state.pending_pri) 290 if (mfrr <= new_state.pending_pri) {
274 reject = new_state.xisr; 291 reject = new_state.xisr;
275 new_state.pending_pri = mfrr; 292 new_state.pending_pri = mfrr;
276 new_state.xisr = XICS_IPI; 293 new_state.xisr = XICS_IPI;
294 }
277 } 295 }
278 296
279 if (mfrr > old_state.mfrr && mfrr > new_state.cppr) { 297 if (mfrr > old_state.mfrr) {
280 resend = new_state.need_resend; 298 resend = new_state.need_resend;
281 new_state.need_resend = 0; 299 new_state.need_resend = 0;
282 } 300 }
@@ -289,8 +307,10 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
289 } 307 }
290 308
291 /* Pass resends to virtual mode */ 309 /* Pass resends to virtual mode */
292 if (resend) 310 if (resend) {
293 this_icp->rm_action |= XICS_RM_CHECK_RESEND; 311 this_icp->rm_action |= XICS_RM_CHECK_RESEND;
312 this_icp->rm_resend_icp = icp;
313 }
294 314
295 return check_too_hard(xics, this_icp); 315 return check_too_hard(xics, this_icp);
296} 316}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 65c105b17a25..10554df13852 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -94,20 +94,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
94 lwz r6, HSTATE_PMC + 12(r13) 94 lwz r6, HSTATE_PMC + 12(r13)
95 lwz r8, HSTATE_PMC + 16(r13) 95 lwz r8, HSTATE_PMC + 16(r13)
96 lwz r9, HSTATE_PMC + 20(r13) 96 lwz r9, HSTATE_PMC + 20(r13)
97BEGIN_FTR_SECTION
98 lwz r10, HSTATE_PMC + 24(r13)
99 lwz r11, HSTATE_PMC + 28(r13)
100END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
101 mtspr SPRN_PMC1, r3 97 mtspr SPRN_PMC1, r3
102 mtspr SPRN_PMC2, r4 98 mtspr SPRN_PMC2, r4
103 mtspr SPRN_PMC3, r5 99 mtspr SPRN_PMC3, r5
104 mtspr SPRN_PMC4, r6 100 mtspr SPRN_PMC4, r6
105 mtspr SPRN_PMC5, r8 101 mtspr SPRN_PMC5, r8
106 mtspr SPRN_PMC6, r9 102 mtspr SPRN_PMC6, r9
107BEGIN_FTR_SECTION
108 mtspr SPRN_PMC7, r10
109 mtspr SPRN_PMC8, r11
110END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
111 ld r3, HSTATE_MMCR(r13) 103 ld r3, HSTATE_MMCR(r13)
112 ld r4, HSTATE_MMCR + 8(r13) 104 ld r4, HSTATE_MMCR + 8(r13)
113 ld r5, HSTATE_MMCR + 16(r13) 105 ld r5, HSTATE_MMCR + 16(r13)
@@ -153,11 +145,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
153 145
154 cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK 146 cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
155 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL 147 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
156BEGIN_FTR_SECTION
157 beq 11f 148 beq 11f
158 cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI 149 cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI
159 beq cr2, 14f /* HMI check */ 150 beq cr2, 14f /* HMI check */
160END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
161 151
162 /* RFI into the highmem handler, or branch to interrupt handler */ 152 /* RFI into the highmem handler, or branch to interrupt handler */
163 mfmsr r6 153 mfmsr r6
@@ -166,7 +156,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
166 mtmsrd r6, 1 /* Clear RI in MSR */ 156 mtmsrd r6, 1 /* Clear RI in MSR */
167 mtsrr0 r8 157 mtsrr0 r8
168 mtsrr1 r7 158 mtsrr1 r7
169 beqa 0x500 /* external interrupt (PPC970) */
170 beq cr1, 13f /* machine check */ 159 beq cr1, 13f /* machine check */
171 RFI 160 RFI
172 161
@@ -393,11 +382,8 @@ kvmppc_hv_entry:
393 slbia 382 slbia
394 ptesync 383 ptesync
395 384
396BEGIN_FTR_SECTION
397 b 30f
398END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
399 /* 385 /*
400 * POWER7 host -> guest partition switch code. 386 * POWER7/POWER8 host -> guest partition switch code.
401 * We don't have to lock against concurrent tlbies, 387 * We don't have to lock against concurrent tlbies,
402 * but we do have to coordinate across hardware threads. 388 * but we do have to coordinate across hardware threads.
403 */ 389 */
@@ -505,97 +491,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
505 cmpwi r3,512 /* 1 microsecond */ 491 cmpwi r3,512 /* 1 microsecond */
506 li r12,BOOK3S_INTERRUPT_HV_DECREMENTER 492 li r12,BOOK3S_INTERRUPT_HV_DECREMENTER
507 blt hdec_soon 493 blt hdec_soon
508 b 31f
509
510 /*
511 * PPC970 host -> guest partition switch code.
512 * We have to lock against concurrent tlbies,
513 * using native_tlbie_lock to lock against host tlbies
514 * and kvm->arch.tlbie_lock to lock against guest tlbies.
515 * We also have to invalidate the TLB since its
516 * entries aren't tagged with the LPID.
517 */
51830: ld r5,HSTATE_KVM_VCORE(r13)
519 ld r9,VCORE_KVM(r5) /* pointer to struct kvm */
520
521 /* first take native_tlbie_lock */
522 .section ".toc","aw"
523toc_tlbie_lock:
524 .tc native_tlbie_lock[TC],native_tlbie_lock
525 .previous
526 ld r3,toc_tlbie_lock@toc(r2)
527#ifdef __BIG_ENDIAN__
528 lwz r8,PACA_LOCK_TOKEN(r13)
529#else
530 lwz r8,PACAPACAINDEX(r13)
531#endif
53224: lwarx r0,0,r3
533 cmpwi r0,0
534 bne 24b
535 stwcx. r8,0,r3
536 bne 24b
537 isync
538
539 ld r5,HSTATE_KVM_VCORE(r13)
540 ld r7,VCORE_LPCR(r5) /* use vcore->lpcr to store HID4 */
541 li r0,0x18f
542 rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */
543 or r0,r7,r0
544 ptesync
545 sync
546 mtspr SPRN_HID4,r0 /* switch to reserved LPID */
547 isync
548 li r0,0
549 stw r0,0(r3) /* drop native_tlbie_lock */
550
551 /* invalidate the whole TLB */
552 li r0,256
553 mtctr r0
554 li r6,0
55525: tlbiel r6
556 addi r6,r6,0x1000
557 bdnz 25b
558 ptesync
559 494
560 /* Take the guest's tlbie_lock */
561 addi r3,r9,KVM_TLBIE_LOCK
56224: lwarx r0,0,r3
563 cmpwi r0,0
564 bne 24b
565 stwcx. r8,0,r3
566 bne 24b
567 isync
568 ld r6,KVM_SDR1(r9)
569 mtspr SPRN_SDR1,r6 /* switch to partition page table */
570
571 /* Set up HID4 with the guest's LPID etc. */
572 sync
573 mtspr SPRN_HID4,r7
574 isync
575
576 /* drop the guest's tlbie_lock */
577 li r0,0
578 stw r0,0(r3)
579
580 /* Check if HDEC expires soon */
581 mfspr r3,SPRN_HDEC
582 cmpwi r3,10
583 li r12,BOOK3S_INTERRUPT_HV_DECREMENTER
584 blt hdec_soon
585
586 /* Enable HDEC interrupts */
587 mfspr r0,SPRN_HID0
588 li r3,1
589 rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
590 sync
591 mtspr SPRN_HID0,r0
592 mfspr r0,SPRN_HID0
593 mfspr r0,SPRN_HID0
594 mfspr r0,SPRN_HID0
595 mfspr r0,SPRN_HID0
596 mfspr r0,SPRN_HID0
597 mfspr r0,SPRN_HID0
59831:
599 /* Do we have a guest vcpu to run? */ 495 /* Do we have a guest vcpu to run? */
600 cmpdi r4, 0 496 cmpdi r4, 0
601 beq kvmppc_primary_no_guest 497 beq kvmppc_primary_no_guest
@@ -625,7 +521,6 @@ kvmppc_got_guest:
625 stb r6, VCPU_VPA_DIRTY(r4) 521 stb r6, VCPU_VPA_DIRTY(r4)
62625: 52225:
627 523
628BEGIN_FTR_SECTION
629 /* Save purr/spurr */ 524 /* Save purr/spurr */
630 mfspr r5,SPRN_PURR 525 mfspr r5,SPRN_PURR
631 mfspr r6,SPRN_SPURR 526 mfspr r6,SPRN_SPURR
@@ -635,7 +530,6 @@ BEGIN_FTR_SECTION
635 ld r8,VCPU_SPURR(r4) 530 ld r8,VCPU_SPURR(r4)
636 mtspr SPRN_PURR,r7 531 mtspr SPRN_PURR,r7
637 mtspr SPRN_SPURR,r8 532 mtspr SPRN_SPURR,r8
638END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
639 533
640BEGIN_FTR_SECTION 534BEGIN_FTR_SECTION
641 /* Set partition DABR */ 535 /* Set partition DABR */
@@ -644,9 +538,7 @@ BEGIN_FTR_SECTION
644 ld r6,VCPU_DABR(r4) 538 ld r6,VCPU_DABR(r4)
645 mtspr SPRN_DABRX,r5 539 mtspr SPRN_DABRX,r5
646 mtspr SPRN_DABR,r6 540 mtspr SPRN_DABR,r6
647 BEGIN_FTR_SECTION_NESTED(89)
648 isync 541 isync
649 END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89)
650END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) 542END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
651 543
652#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 544#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -777,20 +669,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
777 lwz r7, VCPU_PMC + 12(r4) 669 lwz r7, VCPU_PMC + 12(r4)
778 lwz r8, VCPU_PMC + 16(r4) 670 lwz r8, VCPU_PMC + 16(r4)
779 lwz r9, VCPU_PMC + 20(r4) 671 lwz r9, VCPU_PMC + 20(r4)
780BEGIN_FTR_SECTION
781 lwz r10, VCPU_PMC + 24(r4)
782 lwz r11, VCPU_PMC + 28(r4)
783END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
784 mtspr SPRN_PMC1, r3 672 mtspr SPRN_PMC1, r3
785 mtspr SPRN_PMC2, r5 673 mtspr SPRN_PMC2, r5
786 mtspr SPRN_PMC3, r6 674 mtspr SPRN_PMC3, r6
787 mtspr SPRN_PMC4, r7 675 mtspr SPRN_PMC4, r7
788 mtspr SPRN_PMC5, r8 676 mtspr SPRN_PMC5, r8
789 mtspr SPRN_PMC6, r9 677 mtspr SPRN_PMC6, r9
790BEGIN_FTR_SECTION
791 mtspr SPRN_PMC7, r10
792 mtspr SPRN_PMC8, r11
793END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
794 ld r3, VCPU_MMCR(r4) 678 ld r3, VCPU_MMCR(r4)
795 ld r5, VCPU_MMCR + 8(r4) 679 ld r5, VCPU_MMCR + 8(r4)
796 ld r6, VCPU_MMCR + 16(r4) 680 ld r6, VCPU_MMCR + 16(r4)
@@ -837,14 +721,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
837 ld r30, VCPU_GPR(R30)(r4) 721 ld r30, VCPU_GPR(R30)(r4)
838 ld r31, VCPU_GPR(R31)(r4) 722 ld r31, VCPU_GPR(R31)(r4)
839 723
840BEGIN_FTR_SECTION
841 /* Switch DSCR to guest value */ 724 /* Switch DSCR to guest value */
842 ld r5, VCPU_DSCR(r4) 725 ld r5, VCPU_DSCR(r4)
843 mtspr SPRN_DSCR, r5 726 mtspr SPRN_DSCR, r5
844END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
845 727
846BEGIN_FTR_SECTION 728BEGIN_FTR_SECTION
847 /* Skip next section on POWER7 or PPC970 */ 729 /* Skip next section on POWER7 */
848 b 8f 730 b 8f
849END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) 731END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
850 /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */ 732 /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
@@ -920,7 +802,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
920 mtspr SPRN_DAR, r5 802 mtspr SPRN_DAR, r5
921 mtspr SPRN_DSISR, r6 803 mtspr SPRN_DSISR, r6
922 804
923BEGIN_FTR_SECTION
924 /* Restore AMR and UAMOR, set AMOR to all 1s */ 805 /* Restore AMR and UAMOR, set AMOR to all 1s */
925 ld r5,VCPU_AMR(r4) 806 ld r5,VCPU_AMR(r4)
926 ld r6,VCPU_UAMOR(r4) 807 ld r6,VCPU_UAMOR(r4)
@@ -928,7 +809,6 @@ BEGIN_FTR_SECTION
928 mtspr SPRN_AMR,r5 809 mtspr SPRN_AMR,r5
929 mtspr SPRN_UAMOR,r6 810 mtspr SPRN_UAMOR,r6
930 mtspr SPRN_AMOR,r7 811 mtspr SPRN_AMOR,r7
931END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
932 812
933 /* Restore state of CTRL run bit; assume 1 on entry */ 813 /* Restore state of CTRL run bit; assume 1 on entry */
934 lwz r5,VCPU_CTRL(r4) 814 lwz r5,VCPU_CTRL(r4)
@@ -963,13 +843,11 @@ deliver_guest_interrupt:
963 rldicl r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63 843 rldicl r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
964 cmpdi cr1, r0, 0 844 cmpdi cr1, r0, 0
965 andi. r8, r11, MSR_EE 845 andi. r8, r11, MSR_EE
966BEGIN_FTR_SECTION
967 mfspr r8, SPRN_LPCR 846 mfspr r8, SPRN_LPCR
968 /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */ 847 /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
969 rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH 848 rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
970 mtspr SPRN_LPCR, r8 849 mtspr SPRN_LPCR, r8
971 isync 850 isync
972END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
973 beq 5f 851 beq 5f
974 li r0, BOOK3S_INTERRUPT_EXTERNAL 852 li r0, BOOK3S_INTERRUPT_EXTERNAL
975 bne cr1, 12f 853 bne cr1, 12f
@@ -1124,15 +1002,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1124 1002
1125 stw r12,VCPU_TRAP(r9) 1003 stw r12,VCPU_TRAP(r9)
1126 1004
1127 /* Save HEIR (HV emulation assist reg) in last_inst 1005 /* Save HEIR (HV emulation assist reg) in emul_inst
1128 if this is an HEI (HV emulation interrupt, e40) */ 1006 if this is an HEI (HV emulation interrupt, e40) */
1129 li r3,KVM_INST_FETCH_FAILED 1007 li r3,KVM_INST_FETCH_FAILED
1130BEGIN_FTR_SECTION
1131 cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST 1008 cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
1132 bne 11f 1009 bne 11f
1133 mfspr r3,SPRN_HEIR 1010 mfspr r3,SPRN_HEIR
1134END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 101111: stw r3,VCPU_HEIR(r9)
113511: stw r3,VCPU_LAST_INST(r9)
1136 1012
1137 /* these are volatile across C function calls */ 1013 /* these are volatile across C function calls */
1138 mfctr r3 1014 mfctr r3
@@ -1140,13 +1016,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1140 std r3, VCPU_CTR(r9) 1016 std r3, VCPU_CTR(r9)
1141 stw r4, VCPU_XER(r9) 1017 stw r4, VCPU_XER(r9)
1142 1018
1143BEGIN_FTR_SECTION
1144 /* If this is a page table miss then see if it's theirs or ours */ 1019 /* If this is a page table miss then see if it's theirs or ours */
1145 cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE 1020 cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
1146 beq kvmppc_hdsi 1021 beq kvmppc_hdsi
1147 cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE 1022 cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE
1148 beq kvmppc_hisi 1023 beq kvmppc_hisi
1149END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1150 1024
1151 /* See if this is a leftover HDEC interrupt */ 1025 /* See if this is a leftover HDEC interrupt */
1152 cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER 1026 cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
@@ -1159,11 +1033,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1159 cmpwi r12,BOOK3S_INTERRUPT_SYSCALL 1033 cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
1160 beq hcall_try_real_mode 1034 beq hcall_try_real_mode
1161 1035
1162 /* Only handle external interrupts here on arch 206 and later */
1163BEGIN_FTR_SECTION
1164 b ext_interrupt_to_host
1165END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
1166
1167 /* External interrupt ? */ 1036 /* External interrupt ? */
1168 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL 1037 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
1169 bne+ ext_interrupt_to_host 1038 bne+ ext_interrupt_to_host
@@ -1193,11 +1062,9 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
1193 mfdsisr r7 1062 mfdsisr r7
1194 std r6, VCPU_DAR(r9) 1063 std r6, VCPU_DAR(r9)
1195 stw r7, VCPU_DSISR(r9) 1064 stw r7, VCPU_DSISR(r9)
1196BEGIN_FTR_SECTION
1197 /* don't overwrite fault_dar/fault_dsisr if HDSI */ 1065 /* don't overwrite fault_dar/fault_dsisr if HDSI */
1198 cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE 1066 cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
1199 beq 6f 1067 beq 6f
1200END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1201 std r6, VCPU_FAULT_DAR(r9) 1068 std r6, VCPU_FAULT_DAR(r9)
1202 stw r7, VCPU_FAULT_DSISR(r9) 1069 stw r7, VCPU_FAULT_DSISR(r9)
1203 1070
@@ -1236,7 +1103,6 @@ mc_cont:
1236 /* 1103 /*
1237 * Save the guest PURR/SPURR 1104 * Save the guest PURR/SPURR
1238 */ 1105 */
1239BEGIN_FTR_SECTION
1240 mfspr r5,SPRN_PURR 1106 mfspr r5,SPRN_PURR
1241 mfspr r6,SPRN_SPURR 1107 mfspr r6,SPRN_SPURR
1242 ld r7,VCPU_PURR(r9) 1108 ld r7,VCPU_PURR(r9)
@@ -1256,7 +1122,6 @@ BEGIN_FTR_SECTION
1256 add r4,r4,r6 1122 add r4,r4,r6
1257 mtspr SPRN_PURR,r3 1123 mtspr SPRN_PURR,r3
1258 mtspr SPRN_SPURR,r4 1124 mtspr SPRN_SPURR,r4
1259END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
1260 1125
1261 /* Save DEC */ 1126 /* Save DEC */
1262 mfspr r5,SPRN_DEC 1127 mfspr r5,SPRN_DEC
@@ -1306,22 +1171,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
13068: 11718:
1307 1172
1308 /* Save and reset AMR and UAMOR before turning on the MMU */ 1173 /* Save and reset AMR and UAMOR before turning on the MMU */
1309BEGIN_FTR_SECTION
1310 mfspr r5,SPRN_AMR 1174 mfspr r5,SPRN_AMR
1311 mfspr r6,SPRN_UAMOR 1175 mfspr r6,SPRN_UAMOR
1312 std r5,VCPU_AMR(r9) 1176 std r5,VCPU_AMR(r9)
1313 std r6,VCPU_UAMOR(r9) 1177 std r6,VCPU_UAMOR(r9)
1314 li r6,0 1178 li r6,0
1315 mtspr SPRN_AMR,r6 1179 mtspr SPRN_AMR,r6
1316END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1317 1180
1318 /* Switch DSCR back to host value */ 1181 /* Switch DSCR back to host value */
1319BEGIN_FTR_SECTION
1320 mfspr r8, SPRN_DSCR 1182 mfspr r8, SPRN_DSCR
1321 ld r7, HSTATE_DSCR(r13) 1183 ld r7, HSTATE_DSCR(r13)
1322 std r8, VCPU_DSCR(r9) 1184 std r8, VCPU_DSCR(r9)
1323 mtspr SPRN_DSCR, r7 1185 mtspr SPRN_DSCR, r7
1324END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1325 1186
1326 /* Save non-volatile GPRs */ 1187 /* Save non-volatile GPRs */
1327 std r14, VCPU_GPR(R14)(r9) 1188 std r14, VCPU_GPR(R14)(r9)
@@ -1503,11 +1364,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1503 mfspr r4, SPRN_MMCR0 /* save MMCR0 */ 1364 mfspr r4, SPRN_MMCR0 /* save MMCR0 */
1504 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ 1365 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
1505 mfspr r6, SPRN_MMCRA 1366 mfspr r6, SPRN_MMCRA
1506BEGIN_FTR_SECTION 1367 /* Clear MMCRA in order to disable SDAR updates */
1507 /* On P7, clear MMCRA in order to disable SDAR updates */
1508 li r7, 0 1368 li r7, 0
1509 mtspr SPRN_MMCRA, r7 1369 mtspr SPRN_MMCRA, r7
1510END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1511 isync 1370 isync
1512 beq 21f /* if no VPA, save PMU stuff anyway */ 1371 beq 21f /* if no VPA, save PMU stuff anyway */
1513 lbz r7, LPPACA_PMCINUSE(r8) 1372 lbz r7, LPPACA_PMCINUSE(r8)
@@ -1532,10 +1391,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1532 mfspr r6, SPRN_PMC4 1391 mfspr r6, SPRN_PMC4
1533 mfspr r7, SPRN_PMC5 1392 mfspr r7, SPRN_PMC5
1534 mfspr r8, SPRN_PMC6 1393 mfspr r8, SPRN_PMC6
1535BEGIN_FTR_SECTION
1536 mfspr r10, SPRN_PMC7
1537 mfspr r11, SPRN_PMC8
1538END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
1539 stw r3, VCPU_PMC(r9) 1394 stw r3, VCPU_PMC(r9)
1540 stw r4, VCPU_PMC + 4(r9) 1395 stw r4, VCPU_PMC + 4(r9)
1541 stw r5, VCPU_PMC + 8(r9) 1396 stw r5, VCPU_PMC + 8(r9)
@@ -1543,10 +1398,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
1543 stw r7, VCPU_PMC + 16(r9) 1398 stw r7, VCPU_PMC + 16(r9)
1544 stw r8, VCPU_PMC + 20(r9) 1399 stw r8, VCPU_PMC + 20(r9)
1545BEGIN_FTR_SECTION 1400BEGIN_FTR_SECTION
1546 stw r10, VCPU_PMC + 24(r9)
1547 stw r11, VCPU_PMC + 28(r9)
1548END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
1549BEGIN_FTR_SECTION
1550 mfspr r5, SPRN_SIER 1401 mfspr r5, SPRN_SIER
1551 mfspr r6, SPRN_SPMC1 1402 mfspr r6, SPRN_SPMC1
1552 mfspr r7, SPRN_SPMC2 1403 mfspr r7, SPRN_SPMC2
@@ -1566,11 +1417,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1566 ptesync 1417 ptesync
1567 1418
1568hdec_soon: /* r12 = trap, r13 = paca */ 1419hdec_soon: /* r12 = trap, r13 = paca */
1569BEGIN_FTR_SECTION
1570 b 32f
1571END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
1572 /* 1420 /*
1573 * POWER7 guest -> host partition switch code. 1421 * POWER7/POWER8 guest -> host partition switch code.
1574 * We don't have to lock against tlbies but we do 1422 * We don't have to lock against tlbies but we do
1575 * have to coordinate the hardware threads. 1423 * have to coordinate the hardware threads.
1576 */ 1424 */
@@ -1698,87 +1546,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
169816: ld r8,KVM_HOST_LPCR(r4) 154616: ld r8,KVM_HOST_LPCR(r4)
1699 mtspr SPRN_LPCR,r8 1547 mtspr SPRN_LPCR,r8
1700 isync 1548 isync
1701 b 33f
1702
1703 /*
1704 * PPC970 guest -> host partition switch code.
1705 * We have to lock against concurrent tlbies, and
1706 * we have to flush the whole TLB.
1707 */
170832: ld r5,HSTATE_KVM_VCORE(r13)
1709 ld r4,VCORE_KVM(r5) /* pointer to struct kvm */
1710
1711 /* Take the guest's tlbie_lock */
1712#ifdef __BIG_ENDIAN__
1713 lwz r8,PACA_LOCK_TOKEN(r13)
1714#else
1715 lwz r8,PACAPACAINDEX(r13)
1716#endif
1717 addi r3,r4,KVM_TLBIE_LOCK
171824: lwarx r0,0,r3
1719 cmpwi r0,0
1720 bne 24b
1721 stwcx. r8,0,r3
1722 bne 24b
1723 isync
1724
1725 ld r7,KVM_HOST_LPCR(r4) /* use kvm->arch.host_lpcr for HID4 */
1726 li r0,0x18f
1727 rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */
1728 or r0,r7,r0
1729 ptesync
1730 sync
1731 mtspr SPRN_HID4,r0 /* switch to reserved LPID */
1732 isync
1733 li r0,0
1734 stw r0,0(r3) /* drop guest tlbie_lock */
1735
1736 /* invalidate the whole TLB */
1737 li r0,256
1738 mtctr r0
1739 li r6,0
174025: tlbiel r6
1741 addi r6,r6,0x1000
1742 bdnz 25b
1743 ptesync
1744
1745 /* take native_tlbie_lock */
1746 ld r3,toc_tlbie_lock@toc(2)
174724: lwarx r0,0,r3
1748 cmpwi r0,0
1749 bne 24b
1750 stwcx. r8,0,r3
1751 bne 24b
1752 isync
1753
1754 ld r6,KVM_HOST_SDR1(r4)
1755 mtspr SPRN_SDR1,r6 /* switch to host page table */
1756
1757 /* Set up host HID4 value */
1758 sync
1759 mtspr SPRN_HID4,r7
1760 isync
1761 li r0,0
1762 stw r0,0(r3) /* drop native_tlbie_lock */
1763
1764 lis r8,0x7fff /* MAX_INT@h */
1765 mtspr SPRN_HDEC,r8
1766
1767 /* Disable HDEC interrupts */
1768 mfspr r0,SPRN_HID0
1769 li r3,0
1770 rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
1771 sync
1772 mtspr SPRN_HID0,r0
1773 mfspr r0,SPRN_HID0
1774 mfspr r0,SPRN_HID0
1775 mfspr r0,SPRN_HID0
1776 mfspr r0,SPRN_HID0
1777 mfspr r0,SPRN_HID0
1778 mfspr r0,SPRN_HID0
1779 1549
1780 /* load host SLB entries */ 1550 /* load host SLB entries */
178133: ld r8,PACA_SLBSHADOWPTR(r13) 1551 ld r8,PACA_SLBSHADOWPTR(r13)
1782 1552
1783 .rept SLB_NUM_BOLTED 1553 .rept SLB_NUM_BOLTED
1784 li r3, SLBSHADOW_SAVEAREA 1554 li r3, SLBSHADOW_SAVEAREA
@@ -2047,7 +1817,7 @@ hcall_real_table:
2047 .long 0 /* 0xd8 */ 1817 .long 0 /* 0xd8 */
2048 .long 0 /* 0xdc */ 1818 .long 0 /* 0xdc */
2049 .long DOTSYM(kvmppc_h_cede) - hcall_real_table 1819 .long DOTSYM(kvmppc_h_cede) - hcall_real_table
2050 .long 0 /* 0xe4 */ 1820 .long DOTSYM(kvmppc_rm_h_confer) - hcall_real_table
2051 .long 0 /* 0xe8 */ 1821 .long 0 /* 0xe8 */
2052 .long 0 /* 0xec */ 1822 .long 0 /* 0xec */
2053 .long 0 /* 0xf0 */ 1823 .long 0 /* 0xf0 */
@@ -2126,9 +1896,6 @@ _GLOBAL(kvmppc_h_cede)
2126 stw r0,VCPU_TRAP(r3) 1896 stw r0,VCPU_TRAP(r3)
2127 li r0,H_SUCCESS 1897 li r0,H_SUCCESS
2128 std r0,VCPU_GPR(R3)(r3) 1898 std r0,VCPU_GPR(R3)(r3)
2129BEGIN_FTR_SECTION
2130 b kvm_cede_exit /* just send it up to host on 970 */
2131END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
2132 1899
2133 /* 1900 /*
2134 * Set our bit in the bitmask of napping threads unless all the 1901 * Set our bit in the bitmask of napping threads unless all the
@@ -2455,7 +2222,6 @@ BEGIN_FTR_SECTION
2455END_FTR_SECTION_IFSET(CPU_FTR_VSX) 2222END_FTR_SECTION_IFSET(CPU_FTR_VSX)
2456#endif 2223#endif
2457 mtmsrd r8 2224 mtmsrd r8
2458 isync
2459 addi r3,r3,VCPU_FPRS 2225 addi r3,r3,VCPU_FPRS
2460 bl store_fp_state 2226 bl store_fp_state
2461#ifdef CONFIG_ALTIVEC 2227#ifdef CONFIG_ALTIVEC
@@ -2491,7 +2257,6 @@ BEGIN_FTR_SECTION
2491END_FTR_SECTION_IFSET(CPU_FTR_VSX) 2257END_FTR_SECTION_IFSET(CPU_FTR_VSX)
2492#endif 2258#endif
2493 mtmsrd r8 2259 mtmsrd r8
2494 isync
2495 addi r3,r4,VCPU_FPRS 2260 addi r3,r4,VCPU_FPRS
2496 bl load_fp_state 2261 bl load_fp_state
2497#ifdef CONFIG_ALTIVEC 2262#ifdef CONFIG_ALTIVEC
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index bfb8035314e3..bd6ab1672ae6 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -352,14 +352,6 @@ static inline u32 inst_get_field(u32 inst, int msb, int lsb)
352 return kvmppc_get_field(inst, msb + 32, lsb + 32); 352 return kvmppc_get_field(inst, msb + 32, lsb + 32);
353} 353}
354 354
355/*
356 * Replaces inst bits with ordering according to spec.
357 */
358static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value)
359{
360 return kvmppc_set_field(inst, msb + 32, lsb + 32, value);
361}
362
363bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst) 355bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
364{ 356{
365 if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) 357 if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index cf2eb16846d1..f57383941d03 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -644,11 +644,6 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
644 return r; 644 return r;
645} 645}
646 646
647static inline int get_fpr_index(int i)
648{
649 return i * TS_FPRWIDTH;
650}
651
652/* Give up external provider (FPU, Altivec, VSX) */ 647/* Give up external provider (FPU, Altivec, VSX) */
653void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) 648void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
654{ 649{
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index eaeb78047fb8..807351f76f84 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -613,10 +613,25 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
613 * there might be a previously-rejected interrupt needing 613 * there might be a previously-rejected interrupt needing
614 * to be resent. 614 * to be resent.
615 * 615 *
616 * ICP state: Check_IPI
617 *
616 * If the CPPR is less favored, then we might be replacing 618 * If the CPPR is less favored, then we might be replacing
617 * an interrupt, and thus need to possibly reject it as in 619 * an interrupt, and thus need to possibly reject it.
618 * 620 *
619 * ICP state: Check_IPI 621 * ICP State: IPI
622 *
623 * Besides rejecting any pending interrupts, we also
624 * update XISR and pending_pri to mark IPI as pending.
625 *
626 * PAPR does not describe this state, but if the MFRR is being
627 * made less favored than its earlier value, there might be
628 * a previously-rejected interrupt needing to be resent.
629 * Ideally, we would want to resend only if
630 * prio(pending_interrupt) < mfrr &&
631 * prio(pending_interrupt) < cppr
632 * where pending interrupt is the one that was rejected. But
633 * we don't have that state, so we simply trigger a resend
634 * whenever the MFRR is made less favored.
620 */ 635 */
621 do { 636 do {
622 old_state = new_state = ACCESS_ONCE(icp->state); 637 old_state = new_state = ACCESS_ONCE(icp->state);
@@ -629,13 +644,14 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
629 resend = false; 644 resend = false;
630 if (mfrr < new_state.cppr) { 645 if (mfrr < new_state.cppr) {
631 /* Reject a pending interrupt if not an IPI */ 646 /* Reject a pending interrupt if not an IPI */
632 if (mfrr <= new_state.pending_pri) 647 if (mfrr <= new_state.pending_pri) {
633 reject = new_state.xisr; 648 reject = new_state.xisr;
634 new_state.pending_pri = mfrr; 649 new_state.pending_pri = mfrr;
635 new_state.xisr = XICS_IPI; 650 new_state.xisr = XICS_IPI;
651 }
636 } 652 }
637 653
638 if (mfrr > old_state.mfrr && mfrr > new_state.cppr) { 654 if (mfrr > old_state.mfrr) {
639 resend = new_state.need_resend; 655 resend = new_state.need_resend;
640 new_state.need_resend = 0; 656 new_state.need_resend = 0;
641 } 657 }
@@ -789,7 +805,7 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
789 if (icp->rm_action & XICS_RM_KICK_VCPU) 805 if (icp->rm_action & XICS_RM_KICK_VCPU)
790 kvmppc_fast_vcpu_kick(icp->rm_kick_target); 806 kvmppc_fast_vcpu_kick(icp->rm_kick_target);
791 if (icp->rm_action & XICS_RM_CHECK_RESEND) 807 if (icp->rm_action & XICS_RM_CHECK_RESEND)
792 icp_check_resend(xics, icp); 808 icp_check_resend(xics, icp->rm_resend_icp);
793 if (icp->rm_action & XICS_RM_REJECT) 809 if (icp->rm_action & XICS_RM_REJECT)
794 icp_deliver_irq(xics, icp, icp->rm_reject); 810 icp_deliver_irq(xics, icp, icp->rm_reject);
795 if (icp->rm_action & XICS_RM_NOTIFY_EOI) 811 if (icp->rm_action & XICS_RM_NOTIFY_EOI)
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index e8aaa7a3f209..73f0f2723c07 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -74,6 +74,7 @@ struct kvmppc_icp {
74#define XICS_RM_NOTIFY_EOI 0x8 74#define XICS_RM_NOTIFY_EOI 0x8
75 u32 rm_action; 75 u32 rm_action;
76 struct kvm_vcpu *rm_kick_target; 76 struct kvm_vcpu *rm_kick_target;
77 struct kvmppc_icp *rm_resend_icp;
77 u32 rm_reject; 78 u32 rm_reject;
78 u32 rm_eoied_irq; 79 u32 rm_eoied_irq;
79 80
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index e1cb5881bd56..b29ce752c7d6 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -299,14 +299,6 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
299 kvmppc_e500_recalc_shadow_pid(to_e500(vcpu)); 299 kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
300} 300}
301 301
302void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
303{
304}
305
306void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
307{
308}
309
310static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu) 302static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu)
311{ 303{
312 kvmppc_booke_vcpu_load(vcpu, cpu); 304 kvmppc_booke_vcpu_load(vcpu, cpu);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index c1f8f53cd312..c45eaab752b0 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -527,18 +527,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
527 r = 0; 527 r = 0;
528 break; 528 break;
529 case KVM_CAP_PPC_RMA: 529 case KVM_CAP_PPC_RMA:
530 r = hv_enabled; 530 r = 0;
531 /* PPC970 requires an RMA */
532 if (r && cpu_has_feature(CPU_FTR_ARCH_201))
533 r = 2;
534 break; 531 break;
535#endif 532#endif
536 case KVM_CAP_SYNC_MMU: 533 case KVM_CAP_SYNC_MMU:
537#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 534#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
538 if (hv_enabled) 535 r = hv_enabled;
539 r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
540 else
541 r = 0;
542#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER) 536#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
543 r = 1; 537 r = 1;
544#else 538#else
diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h
new file mode 100644
index 000000000000..f647ce0f428b
--- /dev/null
+++ b/arch/powerpc/kvm/trace_book3s.h
@@ -0,0 +1,32 @@
1#if !defined(_TRACE_KVM_BOOK3S_H)
2#define _TRACE_KVM_BOOK3S_H
3
4/*
5 * Common defines used by the trace macros in trace_pr.h and trace_hv.h
6 */
7
8#define kvm_trace_symbol_exit \
9 {0x100, "SYSTEM_RESET"}, \
10 {0x200, "MACHINE_CHECK"}, \
11 {0x300, "DATA_STORAGE"}, \
12 {0x380, "DATA_SEGMENT"}, \
13 {0x400, "INST_STORAGE"}, \
14 {0x480, "INST_SEGMENT"}, \
15 {0x500, "EXTERNAL"}, \
16 {0x501, "EXTERNAL_LEVEL"}, \
17 {0x502, "EXTERNAL_HV"}, \
18 {0x600, "ALIGNMENT"}, \
19 {0x700, "PROGRAM"}, \
20 {0x800, "FP_UNAVAIL"}, \
21 {0x900, "DECREMENTER"}, \
22 {0x980, "HV_DECREMENTER"}, \
23 {0xc00, "SYSCALL"}, \
24 {0xd00, "TRACE"}, \
25 {0xe00, "H_DATA_STORAGE"}, \
26 {0xe20, "H_INST_STORAGE"}, \
27 {0xe40, "H_EMUL_ASSIST"}, \
28 {0xf00, "PERFMON"}, \
29 {0xf20, "ALTIVEC"}, \
30 {0xf40, "VSX"}
31
32#endif
diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h
index f7537cf26ce7..7ec534d1db9f 100644
--- a/arch/powerpc/kvm/trace_booke.h
+++ b/arch/powerpc/kvm/trace_booke.h
@@ -151,6 +151,47 @@ TRACE_EVENT(kvm_booke206_ref_release,
151 __entry->pfn, __entry->flags) 151 __entry->pfn, __entry->flags)
152); 152);
153 153
154#ifdef CONFIG_SPE_POSSIBLE
155#define kvm_trace_symbol_irqprio_spe \
156 {BOOKE_IRQPRIO_SPE_UNAVAIL, "SPE_UNAVAIL"}, \
157 {BOOKE_IRQPRIO_SPE_FP_DATA, "SPE_FP_DATA"}, \
158 {BOOKE_IRQPRIO_SPE_FP_ROUND, "SPE_FP_ROUND"},
159#else
160#define kvm_trace_symbol_irqprio_spe
161#endif
162
163#ifdef CONFIG_PPC_E500MC
164#define kvm_trace_symbol_irqprio_e500mc \
165 {BOOKE_IRQPRIO_ALTIVEC_UNAVAIL, "ALTIVEC_UNAVAIL"}, \
166 {BOOKE_IRQPRIO_ALTIVEC_ASSIST, "ALTIVEC_ASSIST"},
167#else
168#define kvm_trace_symbol_irqprio_e500mc
169#endif
170
171#define kvm_trace_symbol_irqprio \
172 kvm_trace_symbol_irqprio_spe \
173 kvm_trace_symbol_irqprio_e500mc \
174 {BOOKE_IRQPRIO_DATA_STORAGE, "DATA_STORAGE"}, \
175 {BOOKE_IRQPRIO_INST_STORAGE, "INST_STORAGE"}, \
176 {BOOKE_IRQPRIO_ALIGNMENT, "ALIGNMENT"}, \
177 {BOOKE_IRQPRIO_PROGRAM, "PROGRAM"}, \
178 {BOOKE_IRQPRIO_FP_UNAVAIL, "FP_UNAVAIL"}, \
179 {BOOKE_IRQPRIO_SYSCALL, "SYSCALL"}, \
180 {BOOKE_IRQPRIO_AP_UNAVAIL, "AP_UNAVAIL"}, \
181 {BOOKE_IRQPRIO_DTLB_MISS, "DTLB_MISS"}, \
182 {BOOKE_IRQPRIO_ITLB_MISS, "ITLB_MISS"}, \
183 {BOOKE_IRQPRIO_MACHINE_CHECK, "MACHINE_CHECK"}, \
184 {BOOKE_IRQPRIO_DEBUG, "DEBUG"}, \
185 {BOOKE_IRQPRIO_CRITICAL, "CRITICAL"}, \
186 {BOOKE_IRQPRIO_WATCHDOG, "WATCHDOG"}, \
187 {BOOKE_IRQPRIO_EXTERNAL, "EXTERNAL"}, \
188 {BOOKE_IRQPRIO_FIT, "FIT"}, \
189 {BOOKE_IRQPRIO_DECREMENTER, "DECREMENTER"}, \
190 {BOOKE_IRQPRIO_PERFORMANCE_MONITOR, "PERFORMANCE_MONITOR"}, \
191 {BOOKE_IRQPRIO_EXTERNAL_LEVEL, "EXTERNAL_LEVEL"}, \
192 {BOOKE_IRQPRIO_DBELL, "DBELL"}, \
193 {BOOKE_IRQPRIO_DBELL_CRIT, "DBELL_CRIT"} \
194
154TRACE_EVENT(kvm_booke_queue_irqprio, 195TRACE_EVENT(kvm_booke_queue_irqprio,
155 TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority), 196 TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
156 TP_ARGS(vcpu, priority), 197 TP_ARGS(vcpu, priority),
@@ -167,8 +208,10 @@ TRACE_EVENT(kvm_booke_queue_irqprio,
167 __entry->pending = vcpu->arch.pending_exceptions; 208 __entry->pending = vcpu->arch.pending_exceptions;
168 ), 209 ),
169 210
170 TP_printk("vcpu=%x prio=%x pending=%lx", 211 TP_printk("vcpu=%x prio=%s pending=%lx",
171 __entry->cpu_nr, __entry->priority, __entry->pending) 212 __entry->cpu_nr,
213 __print_symbolic(__entry->priority, kvm_trace_symbol_irqprio),
214 __entry->pending)
172); 215);
173 216
174#endif 217#endif
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
new file mode 100644
index 000000000000..33d9daff5783
--- /dev/null
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -0,0 +1,477 @@
1#if !defined(_TRACE_KVM_HV_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_KVM_HV_H
3
4#include <linux/tracepoint.h>
5#include "trace_book3s.h"
6#include <asm/hvcall.h>
7#include <asm/kvm_asm.h>
8
9#undef TRACE_SYSTEM
10#define TRACE_SYSTEM kvm_hv
11#define TRACE_INCLUDE_PATH .
12#define TRACE_INCLUDE_FILE trace_hv
13
14#define kvm_trace_symbol_hcall \
15 {H_REMOVE, "H_REMOVE"}, \
16 {H_ENTER, "H_ENTER"}, \
17 {H_READ, "H_READ"}, \
18 {H_CLEAR_MOD, "H_CLEAR_MOD"}, \
19 {H_CLEAR_REF, "H_CLEAR_REF"}, \
20 {H_PROTECT, "H_PROTECT"}, \
21 {H_GET_TCE, "H_GET_TCE"}, \
22 {H_PUT_TCE, "H_PUT_TCE"}, \
23 {H_SET_SPRG0, "H_SET_SPRG0"}, \
24 {H_SET_DABR, "H_SET_DABR"}, \
25 {H_PAGE_INIT, "H_PAGE_INIT"}, \
26 {H_SET_ASR, "H_SET_ASR"}, \
27 {H_ASR_ON, "H_ASR_ON"}, \
28 {H_ASR_OFF, "H_ASR_OFF"}, \
29 {H_LOGICAL_CI_LOAD, "H_LOGICAL_CI_LOAD"}, \
30 {H_LOGICAL_CI_STORE, "H_LOGICAL_CI_STORE"}, \
31 {H_LOGICAL_CACHE_LOAD, "H_LOGICAL_CACHE_LOAD"}, \
32 {H_LOGICAL_CACHE_STORE, "H_LOGICAL_CACHE_STORE"}, \
33 {H_LOGICAL_ICBI, "H_LOGICAL_ICBI"}, \
34 {H_LOGICAL_DCBF, "H_LOGICAL_DCBF"}, \
35 {H_GET_TERM_CHAR, "H_GET_TERM_CHAR"}, \
36 {H_PUT_TERM_CHAR, "H_PUT_TERM_CHAR"}, \
37 {H_REAL_TO_LOGICAL, "H_REAL_TO_LOGICAL"}, \
38 {H_HYPERVISOR_DATA, "H_HYPERVISOR_DATA"}, \
39 {H_EOI, "H_EOI"}, \
40 {H_CPPR, "H_CPPR"}, \
41 {H_IPI, "H_IPI"}, \
42 {H_IPOLL, "H_IPOLL"}, \
43 {H_XIRR, "H_XIRR"}, \
44 {H_PERFMON, "H_PERFMON"}, \
45 {H_MIGRATE_DMA, "H_MIGRATE_DMA"}, \
46 {H_REGISTER_VPA, "H_REGISTER_VPA"}, \
47 {H_CEDE, "H_CEDE"}, \
48 {H_CONFER, "H_CONFER"}, \
49 {H_PROD, "H_PROD"}, \
50 {H_GET_PPP, "H_GET_PPP"}, \
51 {H_SET_PPP, "H_SET_PPP"}, \
52 {H_PURR, "H_PURR"}, \
53 {H_PIC, "H_PIC"}, \
54 {H_REG_CRQ, "H_REG_CRQ"}, \
55 {H_FREE_CRQ, "H_FREE_CRQ"}, \
56 {H_VIO_SIGNAL, "H_VIO_SIGNAL"}, \
57 {H_SEND_CRQ, "H_SEND_CRQ"}, \
58 {H_COPY_RDMA, "H_COPY_RDMA"}, \
59 {H_REGISTER_LOGICAL_LAN, "H_REGISTER_LOGICAL_LAN"}, \
60 {H_FREE_LOGICAL_LAN, "H_FREE_LOGICAL_LAN"}, \
61 {H_ADD_LOGICAL_LAN_BUFFER, "H_ADD_LOGICAL_LAN_BUFFER"}, \
62 {H_SEND_LOGICAL_LAN, "H_SEND_LOGICAL_LAN"}, \
63 {H_BULK_REMOVE, "H_BULK_REMOVE"}, \
64 {H_MULTICAST_CTRL, "H_MULTICAST_CTRL"}, \
65 {H_SET_XDABR, "H_SET_XDABR"}, \
66 {H_STUFF_TCE, "H_STUFF_TCE"}, \
67 {H_PUT_TCE_INDIRECT, "H_PUT_TCE_INDIRECT"}, \
68 {H_CHANGE_LOGICAL_LAN_MAC, "H_CHANGE_LOGICAL_LAN_MAC"}, \
69 {H_VTERM_PARTNER_INFO, "H_VTERM_PARTNER_INFO"}, \
70 {H_REGISTER_VTERM, "H_REGISTER_VTERM"}, \
71 {H_FREE_VTERM, "H_FREE_VTERM"}, \
72 {H_RESET_EVENTS, "H_RESET_EVENTS"}, \
73 {H_ALLOC_RESOURCE, "H_ALLOC_RESOURCE"}, \
74 {H_FREE_RESOURCE, "H_FREE_RESOURCE"}, \
75 {H_MODIFY_QP, "H_MODIFY_QP"}, \
76 {H_QUERY_QP, "H_QUERY_QP"}, \
77 {H_REREGISTER_PMR, "H_REREGISTER_PMR"}, \
78 {H_REGISTER_SMR, "H_REGISTER_SMR"}, \
79 {H_QUERY_MR, "H_QUERY_MR"}, \
80 {H_QUERY_MW, "H_QUERY_MW"}, \
81 {H_QUERY_HCA, "H_QUERY_HCA"}, \
82 {H_QUERY_PORT, "H_QUERY_PORT"}, \
83 {H_MODIFY_PORT, "H_MODIFY_PORT"}, \
84 {H_DEFINE_AQP1, "H_DEFINE_AQP1"}, \
85 {H_GET_TRACE_BUFFER, "H_GET_TRACE_BUFFER"}, \
86 {H_DEFINE_AQP0, "H_DEFINE_AQP0"}, \
87 {H_RESIZE_MR, "H_RESIZE_MR"}, \
88 {H_ATTACH_MCQP, "H_ATTACH_MCQP"}, \
89 {H_DETACH_MCQP, "H_DETACH_MCQP"}, \
90 {H_CREATE_RPT, "H_CREATE_RPT"}, \
91 {H_REMOVE_RPT, "H_REMOVE_RPT"}, \
92 {H_REGISTER_RPAGES, "H_REGISTER_RPAGES"}, \
93 {H_DISABLE_AND_GETC, "H_DISABLE_AND_GETC"}, \
94 {H_ERROR_DATA, "H_ERROR_DATA"}, \
95 {H_GET_HCA_INFO, "H_GET_HCA_INFO"}, \
96 {H_GET_PERF_COUNT, "H_GET_PERF_COUNT"}, \
97 {H_MANAGE_TRACE, "H_MANAGE_TRACE"}, \
98 {H_FREE_LOGICAL_LAN_BUFFER, "H_FREE_LOGICAL_LAN_BUFFER"}, \
99 {H_QUERY_INT_STATE, "H_QUERY_INT_STATE"}, \
100 {H_POLL_PENDING, "H_POLL_PENDING"}, \
101 {H_ILLAN_ATTRIBUTES, "H_ILLAN_ATTRIBUTES"}, \
102 {H_MODIFY_HEA_QP, "H_MODIFY_HEA_QP"}, \
103 {H_QUERY_HEA_QP, "H_QUERY_HEA_QP"}, \
104 {H_QUERY_HEA, "H_QUERY_HEA"}, \
105 {H_QUERY_HEA_PORT, "H_QUERY_HEA_PORT"}, \
106 {H_MODIFY_HEA_PORT, "H_MODIFY_HEA_PORT"}, \
107 {H_REG_BCMC, "H_REG_BCMC"}, \
108 {H_DEREG_BCMC, "H_DEREG_BCMC"}, \
109 {H_REGISTER_HEA_RPAGES, "H_REGISTER_HEA_RPAGES"}, \
110 {H_DISABLE_AND_GET_HEA, "H_DISABLE_AND_GET_HEA"}, \
111 {H_GET_HEA_INFO, "H_GET_HEA_INFO"}, \
112 {H_ALLOC_HEA_RESOURCE, "H_ALLOC_HEA_RESOURCE"}, \
113 {H_ADD_CONN, "H_ADD_CONN"}, \
114 {H_DEL_CONN, "H_DEL_CONN"}, \
115 {H_JOIN, "H_JOIN"}, \
116 {H_VASI_STATE, "H_VASI_STATE"}, \
117 {H_ENABLE_CRQ, "H_ENABLE_CRQ"}, \
118 {H_GET_EM_PARMS, "H_GET_EM_PARMS"}, \
119 {H_SET_MPP, "H_SET_MPP"}, \
120 {H_GET_MPP, "H_GET_MPP"}, \
121 {H_HOME_NODE_ASSOCIATIVITY, "H_HOME_NODE_ASSOCIATIVITY"}, \
122 {H_BEST_ENERGY, "H_BEST_ENERGY"}, \
123 {H_XIRR_X, "H_XIRR_X"}, \
124 {H_RANDOM, "H_RANDOM"}, \
125 {H_COP, "H_COP"}, \
126 {H_GET_MPP_X, "H_GET_MPP_X"}, \
127 {H_SET_MODE, "H_SET_MODE"}, \
128 {H_RTAS, "H_RTAS"}
129
130#define kvm_trace_symbol_kvmret \
131 {RESUME_GUEST, "RESUME_GUEST"}, \
132 {RESUME_GUEST_NV, "RESUME_GUEST_NV"}, \
133 {RESUME_HOST, "RESUME_HOST"}, \
134 {RESUME_HOST_NV, "RESUME_HOST_NV"}
135
136#define kvm_trace_symbol_hcall_rc \
137 {H_SUCCESS, "H_SUCCESS"}, \
138 {H_BUSY, "H_BUSY"}, \
139 {H_CLOSED, "H_CLOSED"}, \
140 {H_NOT_AVAILABLE, "H_NOT_AVAILABLE"}, \
141 {H_CONSTRAINED, "H_CONSTRAINED"}, \
142 {H_PARTIAL, "H_PARTIAL"}, \
143 {H_IN_PROGRESS, "H_IN_PROGRESS"}, \
144 {H_PAGE_REGISTERED, "H_PAGE_REGISTERED"}, \
145 {H_PARTIAL_STORE, "H_PARTIAL_STORE"}, \
146 {H_PENDING, "H_PENDING"}, \
147 {H_CONTINUE, "H_CONTINUE"}, \
148 {H_LONG_BUSY_START_RANGE, "H_LONG_BUSY_START_RANGE"}, \
149 {H_LONG_BUSY_ORDER_1_MSEC, "H_LONG_BUSY_ORDER_1_MSEC"}, \
150 {H_LONG_BUSY_ORDER_10_MSEC, "H_LONG_BUSY_ORDER_10_MSEC"}, \
151 {H_LONG_BUSY_ORDER_100_MSEC, "H_LONG_BUSY_ORDER_100_MSEC"}, \
152 {H_LONG_BUSY_ORDER_1_SEC, "H_LONG_BUSY_ORDER_1_SEC"}, \
153 {H_LONG_BUSY_ORDER_10_SEC, "H_LONG_BUSY_ORDER_10_SEC"}, \
154 {H_LONG_BUSY_ORDER_100_SEC, "H_LONG_BUSY_ORDER_100_SEC"}, \
155 {H_LONG_BUSY_END_RANGE, "H_LONG_BUSY_END_RANGE"}, \
156 {H_TOO_HARD, "H_TOO_HARD"}, \
157 {H_HARDWARE, "H_HARDWARE"}, \
158 {H_FUNCTION, "H_FUNCTION"}, \
159 {H_PRIVILEGE, "H_PRIVILEGE"}, \
160 {H_PARAMETER, "H_PARAMETER"}, \
161 {H_BAD_MODE, "H_BAD_MODE"}, \
162 {H_PTEG_FULL, "H_PTEG_FULL"}, \
163 {H_NOT_FOUND, "H_NOT_FOUND"}, \
164 {H_RESERVED_DABR, "H_RESERVED_DABR"}, \
165 {H_NO_MEM, "H_NO_MEM"}, \
166 {H_AUTHORITY, "H_AUTHORITY"}, \
167 {H_PERMISSION, "H_PERMISSION"}, \
168 {H_DROPPED, "H_DROPPED"}, \
169 {H_SOURCE_PARM, "H_SOURCE_PARM"}, \
170 {H_DEST_PARM, "H_DEST_PARM"}, \
171 {H_REMOTE_PARM, "H_REMOTE_PARM"}, \
172 {H_RESOURCE, "H_RESOURCE"}, \
173 {H_ADAPTER_PARM, "H_ADAPTER_PARM"}, \
174 {H_RH_PARM, "H_RH_PARM"}, \
175 {H_RCQ_PARM, "H_RCQ_PARM"}, \
176 {H_SCQ_PARM, "H_SCQ_PARM"}, \
177 {H_EQ_PARM, "H_EQ_PARM"}, \
178 {H_RT_PARM, "H_RT_PARM"}, \
179 {H_ST_PARM, "H_ST_PARM"}, \
180 {H_SIGT_PARM, "H_SIGT_PARM"}, \
181 {H_TOKEN_PARM, "H_TOKEN_PARM"}, \
182 {H_MLENGTH_PARM, "H_MLENGTH_PARM"}, \
183 {H_MEM_PARM, "H_MEM_PARM"}, \
184 {H_MEM_ACCESS_PARM, "H_MEM_ACCESS_PARM"}, \
185 {H_ATTR_PARM, "H_ATTR_PARM"}, \
186 {H_PORT_PARM, "H_PORT_PARM"}, \
187 {H_MCG_PARM, "H_MCG_PARM"}, \
188 {H_VL_PARM, "H_VL_PARM"}, \
189 {H_TSIZE_PARM, "H_TSIZE_PARM"}, \
190 {H_TRACE_PARM, "H_TRACE_PARM"}, \
191 {H_MASK_PARM, "H_MASK_PARM"}, \
192 {H_MCG_FULL, "H_MCG_FULL"}, \
193 {H_ALIAS_EXIST, "H_ALIAS_EXIST"}, \
194 {H_P_COUNTER, "H_P_COUNTER"}, \
195 {H_TABLE_FULL, "H_TABLE_FULL"}, \
196 {H_ALT_TABLE, "H_ALT_TABLE"}, \
197 {H_MR_CONDITION, "H_MR_CONDITION"}, \
198 {H_NOT_ENOUGH_RESOURCES, "H_NOT_ENOUGH_RESOURCES"}, \
199 {H_R_STATE, "H_R_STATE"}, \
200 {H_RESCINDED, "H_RESCINDED"}, \
201 {H_P2, "H_P2"}, \
202 {H_P3, "H_P3"}, \
203 {H_P4, "H_P4"}, \
204 {H_P5, "H_P5"}, \
205 {H_P6, "H_P6"}, \
206 {H_P7, "H_P7"}, \
207 {H_P8, "H_P8"}, \
208 {H_P9, "H_P9"}, \
209 {H_TOO_BIG, "H_TOO_BIG"}, \
210 {H_OVERLAP, "H_OVERLAP"}, \
211 {H_INTERRUPT, "H_INTERRUPT"}, \
212 {H_BAD_DATA, "H_BAD_DATA"}, \
213 {H_NOT_ACTIVE, "H_NOT_ACTIVE"}, \
214 {H_SG_LIST, "H_SG_LIST"}, \
215 {H_OP_MODE, "H_OP_MODE"}, \
216 {H_COP_HW, "H_COP_HW"}, \
217 {H_UNSUPPORTED_FLAG_START, "H_UNSUPPORTED_FLAG_START"}, \
218 {H_UNSUPPORTED_FLAG_END, "H_UNSUPPORTED_FLAG_END"}, \
219 {H_MULTI_THREADS_ACTIVE, "H_MULTI_THREADS_ACTIVE"}, \
220 {H_OUTSTANDING_COP_OPS, "H_OUTSTANDING_COP_OPS"}
221
222TRACE_EVENT(kvm_guest_enter,
223 TP_PROTO(struct kvm_vcpu *vcpu),
224 TP_ARGS(vcpu),
225
226 TP_STRUCT__entry(
227 __field(int, vcpu_id)
228 __field(unsigned long, pc)
229 __field(unsigned long, pending_exceptions)
230 __field(u8, ceded)
231 ),
232
233 TP_fast_assign(
234 __entry->vcpu_id = vcpu->vcpu_id;
235 __entry->pc = kvmppc_get_pc(vcpu);
236 __entry->ceded = vcpu->arch.ceded;
237 __entry->pending_exceptions = vcpu->arch.pending_exceptions;
238 ),
239
240 TP_printk("VCPU %d: pc=0x%lx pexcp=0x%lx ceded=%d",
241 __entry->vcpu_id,
242 __entry->pc,
243 __entry->pending_exceptions, __entry->ceded)
244);
245
246TRACE_EVENT(kvm_guest_exit,
247 TP_PROTO(struct kvm_vcpu *vcpu),
248 TP_ARGS(vcpu),
249
250 TP_STRUCT__entry(
251 __field(int, vcpu_id)
252 __field(int, trap)
253 __field(unsigned long, pc)
254 __field(unsigned long, msr)
255 __field(u8, ceded)
256 ),
257
258 TP_fast_assign(
259 __entry->vcpu_id = vcpu->vcpu_id;
260 __entry->trap = vcpu->arch.trap;
261 __entry->ceded = vcpu->arch.ceded;
262 __entry->pc = kvmppc_get_pc(vcpu);
263 __entry->msr = vcpu->arch.shregs.msr;
264 ),
265
266 TP_printk("VCPU %d: trap=%s pc=0x%lx msr=0x%lx, ceded=%d",
267 __entry->vcpu_id,
268 __print_symbolic(__entry->trap, kvm_trace_symbol_exit),
269 __entry->pc, __entry->msr, __entry->ceded
270 )
271);
272
273TRACE_EVENT(kvm_page_fault_enter,
274 TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep,
275 struct kvm_memory_slot *memslot, unsigned long ea,
276 unsigned long dsisr),
277
278 TP_ARGS(vcpu, hptep, memslot, ea, dsisr),
279
280 TP_STRUCT__entry(
281 __field(int, vcpu_id)
282 __field(unsigned long, hpte_v)
283 __field(unsigned long, hpte_r)
284 __field(unsigned long, gpte_r)
285 __field(unsigned long, ea)
286 __field(u64, base_gfn)
287 __field(u32, slot_flags)
288 __field(u32, dsisr)
289 ),
290
291 TP_fast_assign(
292 __entry->vcpu_id = vcpu->vcpu_id;
293 __entry->hpte_v = hptep[0];
294 __entry->hpte_r = hptep[1];
295 __entry->gpte_r = hptep[2];
296 __entry->ea = ea;
297 __entry->dsisr = dsisr;
298 __entry->base_gfn = memslot ? memslot->base_gfn : -1UL;
299 __entry->slot_flags = memslot ? memslot->flags : 0;
300 ),
301
302 TP_printk("VCPU %d: hpte=0x%lx:0x%lx guest=0x%lx ea=0x%lx,%x slot=0x%llx,0x%x",
303 __entry->vcpu_id,
304 __entry->hpte_v, __entry->hpte_r, __entry->gpte_r,
305 __entry->ea, __entry->dsisr,
306 __entry->base_gfn, __entry->slot_flags)
307);
308
309TRACE_EVENT(kvm_page_fault_exit,
310 TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep, long ret),
311
312 TP_ARGS(vcpu, hptep, ret),
313
314 TP_STRUCT__entry(
315 __field(int, vcpu_id)
316 __field(unsigned long, hpte_v)
317 __field(unsigned long, hpte_r)
318 __field(long, ret)
319 ),
320
321 TP_fast_assign(
322 __entry->vcpu_id = vcpu->vcpu_id;
323 __entry->hpte_v = hptep[0];
324 __entry->hpte_r = hptep[1];
325 __entry->ret = ret;
326 ),
327
328 TP_printk("VCPU %d: hpte=0x%lx:0x%lx ret=0x%lx",
329 __entry->vcpu_id,
330 __entry->hpte_v, __entry->hpte_r, __entry->ret)
331);
332
333TRACE_EVENT(kvm_hcall_enter,
334 TP_PROTO(struct kvm_vcpu *vcpu),
335
336 TP_ARGS(vcpu),
337
338 TP_STRUCT__entry(
339 __field(int, vcpu_id)
340 __field(unsigned long, req)
341 __field(unsigned long, gpr4)
342 __field(unsigned long, gpr5)
343 __field(unsigned long, gpr6)
344 __field(unsigned long, gpr7)
345 ),
346
347 TP_fast_assign(
348 __entry->vcpu_id = vcpu->vcpu_id;
349 __entry->req = kvmppc_get_gpr(vcpu, 3);
350 __entry->gpr4 = kvmppc_get_gpr(vcpu, 4);
351 __entry->gpr5 = kvmppc_get_gpr(vcpu, 5);
352 __entry->gpr6 = kvmppc_get_gpr(vcpu, 6);
353 __entry->gpr7 = kvmppc_get_gpr(vcpu, 7);
354 ),
355
356 TP_printk("VCPU %d: hcall=%s GPR4-7=0x%lx,0x%lx,0x%lx,0x%lx",
357 __entry->vcpu_id,
358 __print_symbolic(__entry->req, kvm_trace_symbol_hcall),
359 __entry->gpr4, __entry->gpr5, __entry->gpr6, __entry->gpr7)
360);
361
362TRACE_EVENT(kvm_hcall_exit,
363 TP_PROTO(struct kvm_vcpu *vcpu, int ret),
364
365 TP_ARGS(vcpu, ret),
366
367 TP_STRUCT__entry(
368 __field(int, vcpu_id)
369 __field(unsigned long, ret)
370 __field(unsigned long, hcall_rc)
371 ),
372
373 TP_fast_assign(
374 __entry->vcpu_id = vcpu->vcpu_id;
375 __entry->ret = ret;
376 __entry->hcall_rc = kvmppc_get_gpr(vcpu, 3);
377 ),
378
379 TP_printk("VCPU %d: ret=%s hcall_rc=%s",
380 __entry->vcpu_id,
381 __print_symbolic(__entry->ret, kvm_trace_symbol_kvmret),
382 __print_symbolic(__entry->ret & RESUME_FLAG_HOST ?
383 H_TOO_HARD : __entry->hcall_rc,
384 kvm_trace_symbol_hcall_rc))
385);
386
387TRACE_EVENT(kvmppc_run_core,
388 TP_PROTO(struct kvmppc_vcore *vc, int where),
389
390 TP_ARGS(vc, where),
391
392 TP_STRUCT__entry(
393 __field(int, n_runnable)
394 __field(int, runner_vcpu)
395 __field(int, where)
396 __field(pid_t, tgid)
397 ),
398
399 TP_fast_assign(
400 __entry->runner_vcpu = vc->runner->vcpu_id;
401 __entry->n_runnable = vc->n_runnable;
402 __entry->where = where;
403 __entry->tgid = current->tgid;
404 ),
405
406 TP_printk("%s runner_vcpu==%d runnable=%d tgid=%d",
407 __entry->where ? "Exit" : "Enter",
408 __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
409);
410
411TRACE_EVENT(kvmppc_vcore_blocked,
412 TP_PROTO(struct kvmppc_vcore *vc, int where),
413
414 TP_ARGS(vc, where),
415
416 TP_STRUCT__entry(
417 __field(int, n_runnable)
418 __field(int, runner_vcpu)
419 __field(int, where)
420 __field(pid_t, tgid)
421 ),
422
423 TP_fast_assign(
424 __entry->runner_vcpu = vc->runner->vcpu_id;
425 __entry->n_runnable = vc->n_runnable;
426 __entry->where = where;
427 __entry->tgid = current->tgid;
428 ),
429
430 TP_printk("%s runner_vcpu=%d runnable=%d tgid=%d",
431 __entry->where ? "Exit" : "Enter",
432 __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
433);
434
435TRACE_EVENT(kvmppc_run_vcpu_enter,
436 TP_PROTO(struct kvm_vcpu *vcpu),
437
438 TP_ARGS(vcpu),
439
440 TP_STRUCT__entry(
441 __field(int, vcpu_id)
442 __field(pid_t, tgid)
443 ),
444
445 TP_fast_assign(
446 __entry->vcpu_id = vcpu->vcpu_id;
447 __entry->tgid = current->tgid;
448 ),
449
450 TP_printk("VCPU %d: tgid=%d", __entry->vcpu_id, __entry->tgid)
451);
452
453TRACE_EVENT(kvmppc_run_vcpu_exit,
454 TP_PROTO(struct kvm_vcpu *vcpu, struct kvm_run *run),
455
456 TP_ARGS(vcpu, run),
457
458 TP_STRUCT__entry(
459 __field(int, vcpu_id)
460 __field(int, exit)
461 __field(int, ret)
462 ),
463
464 TP_fast_assign(
465 __entry->vcpu_id = vcpu->vcpu_id;
466 __entry->exit = run->exit_reason;
467 __entry->ret = vcpu->arch.ret;
468 ),
469
470 TP_printk("VCPU %d: exit=%d, ret=%d",
471 __entry->vcpu_id, __entry->exit, __entry->ret)
472);
473
474#endif /* _TRACE_KVM_HV_H */
475
476/* This part must be outside protection */
477#include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
index e1357cd8dc1f..810507cb688a 100644
--- a/arch/powerpc/kvm/trace_pr.h
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -3,36 +3,13 @@
3#define _TRACE_KVM_PR_H 3#define _TRACE_KVM_PR_H
4 4
5#include <linux/tracepoint.h> 5#include <linux/tracepoint.h>
6#include "trace_book3s.h"
6 7
7#undef TRACE_SYSTEM 8#undef TRACE_SYSTEM
8#define TRACE_SYSTEM kvm_pr 9#define TRACE_SYSTEM kvm_pr
9#define TRACE_INCLUDE_PATH . 10#define TRACE_INCLUDE_PATH .
10#define TRACE_INCLUDE_FILE trace_pr 11#define TRACE_INCLUDE_FILE trace_pr
11 12
12#define kvm_trace_symbol_exit \
13 {0x100, "SYSTEM_RESET"}, \
14 {0x200, "MACHINE_CHECK"}, \
15 {0x300, "DATA_STORAGE"}, \
16 {0x380, "DATA_SEGMENT"}, \
17 {0x400, "INST_STORAGE"}, \
18 {0x480, "INST_SEGMENT"}, \
19 {0x500, "EXTERNAL"}, \
20 {0x501, "EXTERNAL_LEVEL"}, \
21 {0x502, "EXTERNAL_HV"}, \
22 {0x600, "ALIGNMENT"}, \
23 {0x700, "PROGRAM"}, \
24 {0x800, "FP_UNAVAIL"}, \
25 {0x900, "DECREMENTER"}, \
26 {0x980, "HV_DECREMENTER"}, \
27 {0xc00, "SYSCALL"}, \
28 {0xd00, "TRACE"}, \
29 {0xe00, "H_DATA_STORAGE"}, \
30 {0xe20, "H_INST_STORAGE"}, \
31 {0xe40, "H_EMUL_ASSIST"}, \
32 {0xf00, "PERFMON"}, \
33 {0xf20, "ALTIVEC"}, \
34 {0xf40, "VSX"}
35
36TRACE_EVENT(kvm_book3s_reenter, 13TRACE_EVENT(kvm_book3s_reenter,
37 TP_PROTO(int r, struct kvm_vcpu *vcpu), 14 TP_PROTO(int r, struct kvm_vcpu *vcpu),
38 TP_ARGS(r, vcpu), 15 TP_ARGS(r, vcpu),
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 2175f911a73a..9cba74d5d853 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -123,7 +123,7 @@ struct kvm_s390_sie_block {
123#define ICPT_PARTEXEC 0x38 123#define ICPT_PARTEXEC 0x38
124#define ICPT_IOINST 0x40 124#define ICPT_IOINST 0x40
125 __u8 icptcode; /* 0x0050 */ 125 __u8 icptcode; /* 0x0050 */
126 __u8 reserved51; /* 0x0051 */ 126 __u8 icptstatus; /* 0x0051 */
127 __u16 ihcpu; /* 0x0052 */ 127 __u16 ihcpu; /* 0x0052 */
128 __u8 reserved54[2]; /* 0x0054 */ 128 __u8 reserved54[2]; /* 0x0054 */
129 __u16 ipa; /* 0x0056 */ 129 __u16 ipa; /* 0x0056 */
@@ -226,10 +226,17 @@ struct kvm_vcpu_stat {
226 u32 instruction_sigp_sense_running; 226 u32 instruction_sigp_sense_running;
227 u32 instruction_sigp_external_call; 227 u32 instruction_sigp_external_call;
228 u32 instruction_sigp_emergency; 228 u32 instruction_sigp_emergency;
229 u32 instruction_sigp_cond_emergency;
230 u32 instruction_sigp_start;
229 u32 instruction_sigp_stop; 231 u32 instruction_sigp_stop;
232 u32 instruction_sigp_stop_store_status;
233 u32 instruction_sigp_store_status;
230 u32 instruction_sigp_arch; 234 u32 instruction_sigp_arch;
231 u32 instruction_sigp_prefix; 235 u32 instruction_sigp_prefix;
232 u32 instruction_sigp_restart; 236 u32 instruction_sigp_restart;
237 u32 instruction_sigp_init_cpu_reset;
238 u32 instruction_sigp_cpu_reset;
239 u32 instruction_sigp_unknown;
233 u32 diagnose_10; 240 u32 diagnose_10;
234 u32 diagnose_44; 241 u32 diagnose_44;
235 u32 diagnose_9c; 242 u32 diagnose_9c;
@@ -288,6 +295,79 @@ struct kvm_vcpu_stat {
288#define PGM_PER 0x80 295#define PGM_PER 0x80
289#define PGM_CRYPTO_OPERATION 0x119 296#define PGM_CRYPTO_OPERATION 0x119
290 297
298/* irq types in order of priority */
299enum irq_types {
300 IRQ_PEND_MCHK_EX = 0,
301 IRQ_PEND_SVC,
302 IRQ_PEND_PROG,
303 IRQ_PEND_MCHK_REP,
304 IRQ_PEND_EXT_IRQ_KEY,
305 IRQ_PEND_EXT_MALFUNC,
306 IRQ_PEND_EXT_EMERGENCY,
307 IRQ_PEND_EXT_EXTERNAL,
308 IRQ_PEND_EXT_CLOCK_COMP,
309 IRQ_PEND_EXT_CPU_TIMER,
310 IRQ_PEND_EXT_TIMING,
311 IRQ_PEND_EXT_SERVICE,
312 IRQ_PEND_EXT_HOST,
313 IRQ_PEND_PFAULT_INIT,
314 IRQ_PEND_PFAULT_DONE,
315 IRQ_PEND_VIRTIO,
316 IRQ_PEND_IO_ISC_0,
317 IRQ_PEND_IO_ISC_1,
318 IRQ_PEND_IO_ISC_2,
319 IRQ_PEND_IO_ISC_3,
320 IRQ_PEND_IO_ISC_4,
321 IRQ_PEND_IO_ISC_5,
322 IRQ_PEND_IO_ISC_6,
323 IRQ_PEND_IO_ISC_7,
324 IRQ_PEND_SIGP_STOP,
325 IRQ_PEND_RESTART,
326 IRQ_PEND_SET_PREFIX,
327 IRQ_PEND_COUNT
328};
329
330/*
331 * Repressible (non-floating) machine check interrupts
332 * subclass bits in MCIC
333 */
334#define MCHK_EXTD_BIT 58
335#define MCHK_DEGR_BIT 56
336#define MCHK_WARN_BIT 55
337#define MCHK_REP_MASK ((1UL << MCHK_DEGR_BIT) | \
338 (1UL << MCHK_EXTD_BIT) | \
339 (1UL << MCHK_WARN_BIT))
340
341/* Exigent machine check interrupts subclass bits in MCIC */
342#define MCHK_SD_BIT 63
343#define MCHK_PD_BIT 62
344#define MCHK_EX_MASK ((1UL << MCHK_SD_BIT) | (1UL << MCHK_PD_BIT))
345
346#define IRQ_PEND_EXT_MASK ((1UL << IRQ_PEND_EXT_IRQ_KEY) | \
347 (1UL << IRQ_PEND_EXT_CLOCK_COMP) | \
348 (1UL << IRQ_PEND_EXT_CPU_TIMER) | \
349 (1UL << IRQ_PEND_EXT_MALFUNC) | \
350 (1UL << IRQ_PEND_EXT_EMERGENCY) | \
351 (1UL << IRQ_PEND_EXT_EXTERNAL) | \
352 (1UL << IRQ_PEND_EXT_TIMING) | \
353 (1UL << IRQ_PEND_EXT_HOST) | \
354 (1UL << IRQ_PEND_EXT_SERVICE) | \
355 (1UL << IRQ_PEND_VIRTIO) | \
356 (1UL << IRQ_PEND_PFAULT_INIT) | \
357 (1UL << IRQ_PEND_PFAULT_DONE))
358
359#define IRQ_PEND_IO_MASK ((1UL << IRQ_PEND_IO_ISC_0) | \
360 (1UL << IRQ_PEND_IO_ISC_1) | \
361 (1UL << IRQ_PEND_IO_ISC_2) | \
362 (1UL << IRQ_PEND_IO_ISC_3) | \
363 (1UL << IRQ_PEND_IO_ISC_4) | \
364 (1UL << IRQ_PEND_IO_ISC_5) | \
365 (1UL << IRQ_PEND_IO_ISC_6) | \
366 (1UL << IRQ_PEND_IO_ISC_7))
367
368#define IRQ_PEND_MCHK_MASK ((1UL << IRQ_PEND_MCHK_REP) | \
369 (1UL << IRQ_PEND_MCHK_EX))
370
291struct kvm_s390_interrupt_info { 371struct kvm_s390_interrupt_info {
292 struct list_head list; 372 struct list_head list;
293 u64 type; 373 u64 type;
@@ -306,14 +386,25 @@ struct kvm_s390_interrupt_info {
306#define ACTION_STORE_ON_STOP (1<<0) 386#define ACTION_STORE_ON_STOP (1<<0)
307#define ACTION_STOP_ON_STOP (1<<1) 387#define ACTION_STOP_ON_STOP (1<<1)
308 388
389struct kvm_s390_irq_payload {
390 struct kvm_s390_io_info io;
391 struct kvm_s390_ext_info ext;
392 struct kvm_s390_pgm_info pgm;
393 struct kvm_s390_emerg_info emerg;
394 struct kvm_s390_extcall_info extcall;
395 struct kvm_s390_prefix_info prefix;
396 struct kvm_s390_mchk_info mchk;
397};
398
309struct kvm_s390_local_interrupt { 399struct kvm_s390_local_interrupt {
310 spinlock_t lock; 400 spinlock_t lock;
311 struct list_head list;
312 atomic_t active;
313 struct kvm_s390_float_interrupt *float_int; 401 struct kvm_s390_float_interrupt *float_int;
314 wait_queue_head_t *wq; 402 wait_queue_head_t *wq;
315 atomic_t *cpuflags; 403 atomic_t *cpuflags;
316 unsigned int action_bits; 404 unsigned int action_bits;
405 DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
406 struct kvm_s390_irq_payload irq;
407 unsigned long pending_irqs;
317}; 408};
318 409
319struct kvm_s390_float_interrupt { 410struct kvm_s390_float_interrupt {
@@ -434,6 +525,8 @@ struct kvm_arch{
434 int user_cpu_state_ctrl; 525 int user_cpu_state_ctrl;
435 struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; 526 struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
436 wait_queue_head_t ipte_wq; 527 wait_queue_head_t ipte_wq;
528 int ipte_lock_count;
529 struct mutex ipte_mutex;
437 spinlock_t start_stop_lock; 530 spinlock_t start_stop_lock;
438 struct kvm_s390_crypto crypto; 531 struct kvm_s390_crypto crypto;
439}; 532};
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index e510b9460efa..3009c2ba46d2 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -24,6 +24,7 @@ void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
24 24
25int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 25int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
26 unsigned long key, bool nq); 26 unsigned long key, bool nq);
27unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
27 28
28static inline void clear_table(unsigned long *s, unsigned long val, size_t n) 29static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
29{ 30{
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index 49576115dbb7..fad4ae23ece0 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -10,6 +10,7 @@
10#define SIGP_RESTART 6 10#define SIGP_RESTART 6
11#define SIGP_STOP_AND_STORE_STATUS 9 11#define SIGP_STOP_AND_STORE_STATUS 9
12#define SIGP_INITIAL_CPU_RESET 11 12#define SIGP_INITIAL_CPU_RESET 11
13#define SIGP_CPU_RESET 12
13#define SIGP_SET_PREFIX 13 14#define SIGP_SET_PREFIX 13
14#define SIGP_STORE_STATUS_AT_ADDRESS 14 15#define SIGP_STORE_STATUS_AT_ADDRESS 14
15#define SIGP_SET_ARCHITECTURE 18 16#define SIGP_SET_ARCHITECTURE 18
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 0f961a1c64b3..8b9ccf02a2c5 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -207,8 +207,6 @@ union raddress {
207 unsigned long pfra : 52; /* Page-Frame Real Address */ 207 unsigned long pfra : 52; /* Page-Frame Real Address */
208}; 208};
209 209
210static int ipte_lock_count;
211static DEFINE_MUTEX(ipte_mutex);
212 210
213int ipte_lock_held(struct kvm_vcpu *vcpu) 211int ipte_lock_held(struct kvm_vcpu *vcpu)
214{ 212{
@@ -216,47 +214,51 @@ int ipte_lock_held(struct kvm_vcpu *vcpu)
216 214
217 if (vcpu->arch.sie_block->eca & 1) 215 if (vcpu->arch.sie_block->eca & 1)
218 return ic->kh != 0; 216 return ic->kh != 0;
219 return ipte_lock_count != 0; 217 return vcpu->kvm->arch.ipte_lock_count != 0;
220} 218}
221 219
222static void ipte_lock_simple(struct kvm_vcpu *vcpu) 220static void ipte_lock_simple(struct kvm_vcpu *vcpu)
223{ 221{
224 union ipte_control old, new, *ic; 222 union ipte_control old, new, *ic;
225 223
226 mutex_lock(&ipte_mutex); 224 mutex_lock(&vcpu->kvm->arch.ipte_mutex);
227 ipte_lock_count++; 225 vcpu->kvm->arch.ipte_lock_count++;
228 if (ipte_lock_count > 1) 226 if (vcpu->kvm->arch.ipte_lock_count > 1)
229 goto out; 227 goto out;
230 ic = &vcpu->kvm->arch.sca->ipte_control; 228 ic = &vcpu->kvm->arch.sca->ipte_control;
231 do { 229 do {
232 old = ACCESS_ONCE(*ic); 230 old = *ic;
231 barrier();
233 while (old.k) { 232 while (old.k) {
234 cond_resched(); 233 cond_resched();
235 old = ACCESS_ONCE(*ic); 234 old = *ic;
235 barrier();
236 } 236 }
237 new = old; 237 new = old;
238 new.k = 1; 238 new.k = 1;
239 } while (cmpxchg(&ic->val, old.val, new.val) != old.val); 239 } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
240out: 240out:
241 mutex_unlock(&ipte_mutex); 241 mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
242} 242}
243 243
244static void ipte_unlock_simple(struct kvm_vcpu *vcpu) 244static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
245{ 245{
246 union ipte_control old, new, *ic; 246 union ipte_control old, new, *ic;
247 247
248 mutex_lock(&ipte_mutex); 248 mutex_lock(&vcpu->kvm->arch.ipte_mutex);
249 ipte_lock_count--; 249 vcpu->kvm->arch.ipte_lock_count--;
250 if (ipte_lock_count) 250 if (vcpu->kvm->arch.ipte_lock_count)
251 goto out; 251 goto out;
252 ic = &vcpu->kvm->arch.sca->ipte_control; 252 ic = &vcpu->kvm->arch.sca->ipte_control;
253 do { 253 do {
254 new = old = ACCESS_ONCE(*ic); 254 old = *ic;
255 barrier();
256 new = old;
255 new.k = 0; 257 new.k = 0;
256 } while (cmpxchg(&ic->val, old.val, new.val) != old.val); 258 } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
257 wake_up(&vcpu->kvm->arch.ipte_wq); 259 wake_up(&vcpu->kvm->arch.ipte_wq);
258out: 260out:
259 mutex_unlock(&ipte_mutex); 261 mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
260} 262}
261 263
262static void ipte_lock_siif(struct kvm_vcpu *vcpu) 264static void ipte_lock_siif(struct kvm_vcpu *vcpu)
@@ -265,10 +267,12 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu)
265 267
266 ic = &vcpu->kvm->arch.sca->ipte_control; 268 ic = &vcpu->kvm->arch.sca->ipte_control;
267 do { 269 do {
268 old = ACCESS_ONCE(*ic); 270 old = *ic;
271 barrier();
269 while (old.kg) { 272 while (old.kg) {
270 cond_resched(); 273 cond_resched();
271 old = ACCESS_ONCE(*ic); 274 old = *ic;
275 barrier();
272 } 276 }
273 new = old; 277 new = old;
274 new.k = 1; 278 new.k = 1;
@@ -282,7 +286,9 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
282 286
283 ic = &vcpu->kvm->arch.sca->ipte_control; 287 ic = &vcpu->kvm->arch.sca->ipte_control;
284 do { 288 do {
285 new = old = ACCESS_ONCE(*ic); 289 old = *ic;
290 barrier();
291 new = old;
286 new.kh--; 292 new.kh--;
287 if (!new.kh) 293 if (!new.kh)
288 new.k = 0; 294 new.k = 0;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index eaf46291d361..81c77ab8102e 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -38,6 +38,19 @@ static const intercept_handler_t instruction_handlers[256] = {
38 [0xeb] = kvm_s390_handle_eb, 38 [0xeb] = kvm_s390_handle_eb,
39}; 39};
40 40
41void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc)
42{
43 struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
44
45 /* Use the length of the EXECUTE instruction if necessary */
46 if (sie_block->icptstatus & 1) {
47 ilc = (sie_block->icptstatus >> 4) & 0x6;
48 if (!ilc)
49 ilc = 4;
50 }
51 sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilc);
52}
53
41static int handle_noop(struct kvm_vcpu *vcpu) 54static int handle_noop(struct kvm_vcpu *vcpu)
42{ 55{
43 switch (vcpu->arch.sie_block->icptcode) { 56 switch (vcpu->arch.sie_block->icptcode) {
@@ -244,7 +257,7 @@ static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
244static int handle_external_interrupt(struct kvm_vcpu *vcpu) 257static int handle_external_interrupt(struct kvm_vcpu *vcpu)
245{ 258{
246 u16 eic = vcpu->arch.sie_block->eic; 259 u16 eic = vcpu->arch.sie_block->eic;
247 struct kvm_s390_interrupt irq; 260 struct kvm_s390_irq irq;
248 psw_t newpsw; 261 psw_t newpsw;
249 int rc; 262 int rc;
250 263
@@ -269,7 +282,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
269 if (kvm_s390_si_ext_call_pending(vcpu)) 282 if (kvm_s390_si_ext_call_pending(vcpu))
270 return 0; 283 return 0;
271 irq.type = KVM_S390_INT_EXTERNAL_CALL; 284 irq.type = KVM_S390_INT_EXTERNAL_CALL;
272 irq.parm = vcpu->arch.sie_block->extcpuaddr; 285 irq.u.extcall.code = vcpu->arch.sie_block->extcpuaddr;
273 break; 286 break;
274 default: 287 default:
275 return -EOPNOTSUPP; 288 return -EOPNOTSUPP;
@@ -288,7 +301,6 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
288 */ 301 */
289static int handle_mvpg_pei(struct kvm_vcpu *vcpu) 302static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
290{ 303{
291 psw_t *psw = &vcpu->arch.sie_block->gpsw;
292 unsigned long srcaddr, dstaddr; 304 unsigned long srcaddr, dstaddr;
293 int reg1, reg2, rc; 305 int reg1, reg2, rc;
294 306
@@ -310,7 +322,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
310 if (rc != 0) 322 if (rc != 0)
311 return rc; 323 return rc;
312 324
313 psw->addr = __rewind_psw(*psw, 4); 325 kvm_s390_rewind_psw(vcpu, 4);
314 326
315 return 0; 327 return 0;
316} 328}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index a39838457f01..f00f31e66cd8 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -16,6 +16,7 @@
16#include <linux/mmu_context.h> 16#include <linux/mmu_context.h>
17#include <linux/signal.h> 17#include <linux/signal.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/bitmap.h>
19#include <asm/asm-offsets.h> 20#include <asm/asm-offsets.h>
20#include <asm/uaccess.h> 21#include <asm/uaccess.h>
21#include "kvm-s390.h" 22#include "kvm-s390.h"
@@ -27,8 +28,8 @@
27#define IOINT_CSSID_MASK 0x03fc0000 28#define IOINT_CSSID_MASK 0x03fc0000
28#define IOINT_AI_MASK 0x04000000 29#define IOINT_AI_MASK 0x04000000
29#define PFAULT_INIT 0x0600 30#define PFAULT_INIT 0x0600
30 31#define PFAULT_DONE 0x0680
31static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu); 32#define VIRTIO_PARAM 0x0d00
32 33
33static int is_ioint(u64 type) 34static int is_ioint(u64 type)
34{ 35{
@@ -136,6 +137,31 @@ static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
136 return 0; 137 return 0;
137} 138}
138 139
140static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
141{
142 return vcpu->arch.local_int.pending_irqs;
143}
144
145static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu)
146{
147 unsigned long active_mask = pending_local_irqs(vcpu);
148
149 if (psw_extint_disabled(vcpu))
150 active_mask &= ~IRQ_PEND_EXT_MASK;
151 if (!(vcpu->arch.sie_block->gcr[0] & 0x2000ul))
152 __clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask);
153 if (!(vcpu->arch.sie_block->gcr[0] & 0x4000ul))
154 __clear_bit(IRQ_PEND_EXT_EMERGENCY, &active_mask);
155 if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
156 __clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
157 if (!(vcpu->arch.sie_block->gcr[0] & 0x400ul))
158 __clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
159 if (psw_mchk_disabled(vcpu))
160 active_mask &= ~IRQ_PEND_MCHK_MASK;
161
162 return active_mask;
163}
164
139static void __set_cpu_idle(struct kvm_vcpu *vcpu) 165static void __set_cpu_idle(struct kvm_vcpu *vcpu)
140{ 166{
141 atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); 167 atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
@@ -170,26 +196,45 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
170 atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags); 196 atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
171} 197}
172 198
199static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
200{
201 if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK))
202 return;
203 if (psw_extint_disabled(vcpu))
204 __set_cpuflag(vcpu, CPUSTAT_EXT_INT);
205 else
206 vcpu->arch.sie_block->lctl |= LCTL_CR0;
207}
208
209static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
210{
211 if (!(pending_local_irqs(vcpu) & IRQ_PEND_MCHK_MASK))
212 return;
213 if (psw_mchk_disabled(vcpu))
214 vcpu->arch.sie_block->ictl |= ICTL_LPSW;
215 else
216 vcpu->arch.sie_block->lctl |= LCTL_CR14;
217}
218
219/* Set interception request for non-deliverable local interrupts */
220static void set_intercept_indicators_local(struct kvm_vcpu *vcpu)
221{
222 set_intercept_indicators_ext(vcpu);
223 set_intercept_indicators_mchk(vcpu);
224}
225
173static void __set_intercept_indicator(struct kvm_vcpu *vcpu, 226static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
174 struct kvm_s390_interrupt_info *inti) 227 struct kvm_s390_interrupt_info *inti)
175{ 228{
176 switch (inti->type) { 229 switch (inti->type) {
177 case KVM_S390_INT_EXTERNAL_CALL:
178 case KVM_S390_INT_EMERGENCY:
179 case KVM_S390_INT_SERVICE: 230 case KVM_S390_INT_SERVICE:
180 case KVM_S390_INT_PFAULT_INIT:
181 case KVM_S390_INT_PFAULT_DONE: 231 case KVM_S390_INT_PFAULT_DONE:
182 case KVM_S390_INT_VIRTIO: 232 case KVM_S390_INT_VIRTIO:
183 case KVM_S390_INT_CLOCK_COMP:
184 case KVM_S390_INT_CPU_TIMER:
185 if (psw_extint_disabled(vcpu)) 233 if (psw_extint_disabled(vcpu))
186 __set_cpuflag(vcpu, CPUSTAT_EXT_INT); 234 __set_cpuflag(vcpu, CPUSTAT_EXT_INT);
187 else 235 else
188 vcpu->arch.sie_block->lctl |= LCTL_CR0; 236 vcpu->arch.sie_block->lctl |= LCTL_CR0;
189 break; 237 break;
190 case KVM_S390_SIGP_STOP:
191 __set_cpuflag(vcpu, CPUSTAT_STOP_INT);
192 break;
193 case KVM_S390_MCHK: 238 case KVM_S390_MCHK:
194 if (psw_mchk_disabled(vcpu)) 239 if (psw_mchk_disabled(vcpu))
195 vcpu->arch.sie_block->ictl |= ICTL_LPSW; 240 vcpu->arch.sie_block->ictl |= ICTL_LPSW;
@@ -226,13 +271,236 @@ static u16 get_ilc(struct kvm_vcpu *vcpu)
226 } 271 }
227} 272}
228 273
229static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu, 274static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
230 struct kvm_s390_pgm_info *pgm_info) 275{
276 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
277 int rc;
278
279 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
280 0, 0);
281
282 rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
283 (u16 *)__LC_EXT_INT_CODE);
284 rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
285 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
286 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
287 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
288 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
289 clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
290 return rc ? -EFAULT : 0;
291}
292
293static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
294{
295 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
296 int rc;
297
298 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
299 0, 0);
300
301 rc = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
302 (u16 __user *)__LC_EXT_INT_CODE);
303 rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
304 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
305 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
306 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
307 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
308 clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
309 return rc ? -EFAULT : 0;
310}
311
312static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
313{
314 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
315 struct kvm_s390_ext_info ext;
316 int rc;
317
318 spin_lock(&li->lock);
319 ext = li->irq.ext;
320 clear_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
321 li->irq.ext.ext_params2 = 0;
322 spin_unlock(&li->lock);
323
324 VCPU_EVENT(vcpu, 4, "interrupt: pfault init parm:%x,parm64:%llx",
325 0, ext.ext_params2);
326 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
327 KVM_S390_INT_PFAULT_INIT,
328 0, ext.ext_params2);
329
330 rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *) __LC_EXT_INT_CODE);
331 rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
332 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
333 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
334 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
335 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
336 rc |= put_guest_lc(vcpu, ext.ext_params2, (u64 *) __LC_EXT_PARAMS2);
337 return rc ? -EFAULT : 0;
338}
339
340static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
341{
342 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
343 struct kvm_s390_mchk_info mchk;
344 int rc;
345
346 spin_lock(&li->lock);
347 mchk = li->irq.mchk;
348 /*
349 * If there was an exigent machine check pending, then any repressible
350 * machine checks that might have been pending are indicated along
351 * with it, so always clear both bits
352 */
353 clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
354 clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
355 memset(&li->irq.mchk, 0, sizeof(mchk));
356 spin_unlock(&li->lock);
357
358 VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
359 mchk.mcic);
360 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
361 mchk.cr14, mchk.mcic);
362
363 rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
364 rc |= put_guest_lc(vcpu, mchk.mcic,
365 (u64 __user *) __LC_MCCK_CODE);
366 rc |= put_guest_lc(vcpu, mchk.failing_storage_address,
367 (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
368 rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
369 &mchk.fixed_logout, sizeof(mchk.fixed_logout));
370 rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
371 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
372 rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
373 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
374 return rc ? -EFAULT : 0;
375}
376
377static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
378{
379 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
380 int rc;
381
382 VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
383 vcpu->stat.deliver_restart_signal++;
384 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
385
386 rc = write_guest_lc(vcpu,
387 offsetof(struct _lowcore, restart_old_psw),
388 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
389 rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
390 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
391 clear_bit(IRQ_PEND_RESTART, &li->pending_irqs);
392 return rc ? -EFAULT : 0;
393}
394
395static int __must_check __deliver_stop(struct kvm_vcpu *vcpu)
396{
397 VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
398 vcpu->stat.deliver_stop_signal++;
399 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_SIGP_STOP,
400 0, 0);
401
402 __set_cpuflag(vcpu, CPUSTAT_STOP_INT);
403 clear_bit(IRQ_PEND_SIGP_STOP, &vcpu->arch.local_int.pending_irqs);
404 return 0;
405}
406
407static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu)
408{
409 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
410 struct kvm_s390_prefix_info prefix;
411
412 spin_lock(&li->lock);
413 prefix = li->irq.prefix;
414 li->irq.prefix.address = 0;
415 clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
416 spin_unlock(&li->lock);
417
418 VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", prefix.address);
419 vcpu->stat.deliver_prefix_signal++;
420 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
421 KVM_S390_SIGP_SET_PREFIX,
422 prefix.address, 0);
423
424 kvm_s390_set_prefix(vcpu, prefix.address);
425 return 0;
426}
427
428static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu)
429{
430 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
431 int rc;
432 int cpu_addr;
433
434 spin_lock(&li->lock);
435 cpu_addr = find_first_bit(li->sigp_emerg_pending, KVM_MAX_VCPUS);
436 clear_bit(cpu_addr, li->sigp_emerg_pending);
437 if (bitmap_empty(li->sigp_emerg_pending, KVM_MAX_VCPUS))
438 clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
439 spin_unlock(&li->lock);
440
441 VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
442 vcpu->stat.deliver_emergency_signal++;
443 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
444 cpu_addr, 0);
445
446 rc = put_guest_lc(vcpu, EXT_IRQ_EMERGENCY_SIG,
447 (u16 *)__LC_EXT_INT_CODE);
448 rc |= put_guest_lc(vcpu, cpu_addr, (u16 *)__LC_EXT_CPU_ADDR);
449 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
450 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
451 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
452 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
453 return rc ? -EFAULT : 0;
454}
455
456static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
457{
458 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
459 struct kvm_s390_extcall_info extcall;
460 int rc;
461
462 spin_lock(&li->lock);
463 extcall = li->irq.extcall;
464 li->irq.extcall.code = 0;
465 clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
466 spin_unlock(&li->lock);
467
468 VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
469 vcpu->stat.deliver_external_call++;
470 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
471 KVM_S390_INT_EXTERNAL_CALL,
472 extcall.code, 0);
473
474 rc = put_guest_lc(vcpu, EXT_IRQ_EXTERNAL_CALL,
475 (u16 *)__LC_EXT_INT_CODE);
476 rc |= put_guest_lc(vcpu, extcall.code, (u16 *)__LC_EXT_CPU_ADDR);
477 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
478 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
479 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &vcpu->arch.sie_block->gpsw,
480 sizeof(psw_t));
481 return rc ? -EFAULT : 0;
482}
483
484static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
231{ 485{
486 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
487 struct kvm_s390_pgm_info pgm_info;
232 int rc = 0; 488 int rc = 0;
233 u16 ilc = get_ilc(vcpu); 489 u16 ilc = get_ilc(vcpu);
234 490
235 switch (pgm_info->code & ~PGM_PER) { 491 spin_lock(&li->lock);
492 pgm_info = li->irq.pgm;
493 clear_bit(IRQ_PEND_PROG, &li->pending_irqs);
494 memset(&li->irq.pgm, 0, sizeof(pgm_info));
495 spin_unlock(&li->lock);
496
497 VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
498 pgm_info.code, ilc);
499 vcpu->stat.deliver_program_int++;
500 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
501 pgm_info.code, 0);
502
503 switch (pgm_info.code & ~PGM_PER) {
236 case PGM_AFX_TRANSLATION: 504 case PGM_AFX_TRANSLATION:
237 case PGM_ASX_TRANSLATION: 505 case PGM_ASX_TRANSLATION:
238 case PGM_EX_TRANSLATION: 506 case PGM_EX_TRANSLATION:
@@ -243,7 +511,7 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
243 case PGM_PRIMARY_AUTHORITY: 511 case PGM_PRIMARY_AUTHORITY:
244 case PGM_SECONDARY_AUTHORITY: 512 case PGM_SECONDARY_AUTHORITY:
245 case PGM_SPACE_SWITCH: 513 case PGM_SPACE_SWITCH:
246 rc = put_guest_lc(vcpu, pgm_info->trans_exc_code, 514 rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
247 (u64 *)__LC_TRANS_EXC_CODE); 515 (u64 *)__LC_TRANS_EXC_CODE);
248 break; 516 break;
249 case PGM_ALEN_TRANSLATION: 517 case PGM_ALEN_TRANSLATION:
@@ -252,7 +520,7 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
252 case PGM_ASTE_SEQUENCE: 520 case PGM_ASTE_SEQUENCE:
253 case PGM_ASTE_VALIDITY: 521 case PGM_ASTE_VALIDITY:
254 case PGM_EXTENDED_AUTHORITY: 522 case PGM_EXTENDED_AUTHORITY:
255 rc = put_guest_lc(vcpu, pgm_info->exc_access_id, 523 rc = put_guest_lc(vcpu, pgm_info.exc_access_id,
256 (u8 *)__LC_EXC_ACCESS_ID); 524 (u8 *)__LC_EXC_ACCESS_ID);
257 break; 525 break;
258 case PGM_ASCE_TYPE: 526 case PGM_ASCE_TYPE:
@@ -261,247 +529,208 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
261 case PGM_REGION_SECOND_TRANS: 529 case PGM_REGION_SECOND_TRANS:
262 case PGM_REGION_THIRD_TRANS: 530 case PGM_REGION_THIRD_TRANS:
263 case PGM_SEGMENT_TRANSLATION: 531 case PGM_SEGMENT_TRANSLATION:
264 rc = put_guest_lc(vcpu, pgm_info->trans_exc_code, 532 rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
265 (u64 *)__LC_TRANS_EXC_CODE); 533 (u64 *)__LC_TRANS_EXC_CODE);
266 rc |= put_guest_lc(vcpu, pgm_info->exc_access_id, 534 rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
267 (u8 *)__LC_EXC_ACCESS_ID); 535 (u8 *)__LC_EXC_ACCESS_ID);
268 rc |= put_guest_lc(vcpu, pgm_info->op_access_id, 536 rc |= put_guest_lc(vcpu, pgm_info.op_access_id,
269 (u8 *)__LC_OP_ACCESS_ID); 537 (u8 *)__LC_OP_ACCESS_ID);
270 break; 538 break;
271 case PGM_MONITOR: 539 case PGM_MONITOR:
272 rc = put_guest_lc(vcpu, pgm_info->mon_class_nr, 540 rc = put_guest_lc(vcpu, pgm_info.mon_class_nr,
273 (u64 *)__LC_MON_CLASS_NR); 541 (u16 *)__LC_MON_CLASS_NR);
274 rc |= put_guest_lc(vcpu, pgm_info->mon_code, 542 rc |= put_guest_lc(vcpu, pgm_info.mon_code,
275 (u64 *)__LC_MON_CODE); 543 (u64 *)__LC_MON_CODE);
276 break; 544 break;
277 case PGM_DATA: 545 case PGM_DATA:
278 rc = put_guest_lc(vcpu, pgm_info->data_exc_code, 546 rc = put_guest_lc(vcpu, pgm_info.data_exc_code,
279 (u32 *)__LC_DATA_EXC_CODE); 547 (u32 *)__LC_DATA_EXC_CODE);
280 break; 548 break;
281 case PGM_PROTECTION: 549 case PGM_PROTECTION:
282 rc = put_guest_lc(vcpu, pgm_info->trans_exc_code, 550 rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
283 (u64 *)__LC_TRANS_EXC_CODE); 551 (u64 *)__LC_TRANS_EXC_CODE);
284 rc |= put_guest_lc(vcpu, pgm_info->exc_access_id, 552 rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
285 (u8 *)__LC_EXC_ACCESS_ID); 553 (u8 *)__LC_EXC_ACCESS_ID);
286 break; 554 break;
287 } 555 }
288 556
289 if (pgm_info->code & PGM_PER) { 557 if (pgm_info.code & PGM_PER) {
290 rc |= put_guest_lc(vcpu, pgm_info->per_code, 558 rc |= put_guest_lc(vcpu, pgm_info.per_code,
291 (u8 *) __LC_PER_CODE); 559 (u8 *) __LC_PER_CODE);
292 rc |= put_guest_lc(vcpu, pgm_info->per_atmid, 560 rc |= put_guest_lc(vcpu, pgm_info.per_atmid,
293 (u8 *)__LC_PER_ATMID); 561 (u8 *)__LC_PER_ATMID);
294 rc |= put_guest_lc(vcpu, pgm_info->per_address, 562 rc |= put_guest_lc(vcpu, pgm_info.per_address,
295 (u64 *) __LC_PER_ADDRESS); 563 (u64 *) __LC_PER_ADDRESS);
296 rc |= put_guest_lc(vcpu, pgm_info->per_access_id, 564 rc |= put_guest_lc(vcpu, pgm_info.per_access_id,
297 (u8 *) __LC_PER_ACCESS_ID); 565 (u8 *) __LC_PER_ACCESS_ID);
298 } 566 }
299 567
300 rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC); 568 rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
301 rc |= put_guest_lc(vcpu, pgm_info->code, 569 rc |= put_guest_lc(vcpu, pgm_info.code,
302 (u16 *)__LC_PGM_INT_CODE); 570 (u16 *)__LC_PGM_INT_CODE);
303 rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW, 571 rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
304 &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); 572 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
305 rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW, 573 rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
306 &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); 574 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
575 return rc ? -EFAULT : 0;
576}
307 577
308 return rc; 578static int __must_check __deliver_service(struct kvm_vcpu *vcpu,
579 struct kvm_s390_interrupt_info *inti)
580{
581 int rc;
582
583 VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
584 inti->ext.ext_params);
585 vcpu->stat.deliver_service_signal++;
586 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
587 inti->ext.ext_params, 0);
588
589 rc = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
590 rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
591 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
592 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
593 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
594 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
595 rc |= put_guest_lc(vcpu, inti->ext.ext_params,
596 (u32 *)__LC_EXT_PARAMS);
597 return rc ? -EFAULT : 0;
309} 598}
310 599
311static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu, 600static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu,
312 struct kvm_s390_interrupt_info *inti) 601 struct kvm_s390_interrupt_info *inti)
313{ 602{
314 const unsigned short table[] = { 2, 4, 4, 6 }; 603 int rc;
315 int rc = 0; 604
605 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
606 KVM_S390_INT_PFAULT_DONE, 0,
607 inti->ext.ext_params2);
608
609 rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
610 rc |= put_guest_lc(vcpu, PFAULT_DONE, (u16 *)__LC_EXT_CPU_ADDR);
611 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
612 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
613 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
614 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
615 rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
616 (u64 *)__LC_EXT_PARAMS2);
617 return rc ? -EFAULT : 0;
618}
619
620static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu,
621 struct kvm_s390_interrupt_info *inti)
622{
623 int rc;
624
625 VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
626 inti->ext.ext_params, inti->ext.ext_params2);
627 vcpu->stat.deliver_virtio_interrupt++;
628 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
629 inti->ext.ext_params,
630 inti->ext.ext_params2);
631
632 rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
633 rc |= put_guest_lc(vcpu, VIRTIO_PARAM, (u16 *)__LC_EXT_CPU_ADDR);
634 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
635 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
636 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
637 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
638 rc |= put_guest_lc(vcpu, inti->ext.ext_params,
639 (u32 *)__LC_EXT_PARAMS);
640 rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
641 (u64 *)__LC_EXT_PARAMS2);
642 return rc ? -EFAULT : 0;
643}
644
645static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
646 struct kvm_s390_interrupt_info *inti)
647{
648 int rc;
649
650 VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
651 vcpu->stat.deliver_io_int++;
652 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
653 ((__u32)inti->io.subchannel_id << 16) |
654 inti->io.subchannel_nr,
655 ((__u64)inti->io.io_int_parm << 32) |
656 inti->io.io_int_word);
657
658 rc = put_guest_lc(vcpu, inti->io.subchannel_id,
659 (u16 *)__LC_SUBCHANNEL_ID);
660 rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
661 (u16 *)__LC_SUBCHANNEL_NR);
662 rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
663 (u32 *)__LC_IO_INT_PARM);
664 rc |= put_guest_lc(vcpu, inti->io.io_int_word,
665 (u32 *)__LC_IO_INT_WORD);
666 rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
667 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
668 rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
669 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
670 return rc ? -EFAULT : 0;
671}
672
673static int __must_check __deliver_mchk_floating(struct kvm_vcpu *vcpu,
674 struct kvm_s390_interrupt_info *inti)
675{
676 struct kvm_s390_mchk_info *mchk = &inti->mchk;
677 int rc;
678
679 VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
680 mchk->mcic);
681 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
682 mchk->cr14, mchk->mcic);
683
684 rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
685 rc |= put_guest_lc(vcpu, mchk->mcic,
686 (u64 __user *) __LC_MCCK_CODE);
687 rc |= put_guest_lc(vcpu, mchk->failing_storage_address,
688 (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
689 rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
690 &mchk->fixed_logout, sizeof(mchk->fixed_logout));
691 rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
692 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
693 rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
694 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
695 return rc ? -EFAULT : 0;
696}
697
698typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
699
700static const deliver_irq_t deliver_irq_funcs[] = {
701 [IRQ_PEND_MCHK_EX] = __deliver_machine_check,
702 [IRQ_PEND_PROG] = __deliver_prog,
703 [IRQ_PEND_EXT_EMERGENCY] = __deliver_emergency_signal,
704 [IRQ_PEND_EXT_EXTERNAL] = __deliver_external_call,
705 [IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc,
706 [IRQ_PEND_EXT_CPU_TIMER] = __deliver_cpu_timer,
707 [IRQ_PEND_RESTART] = __deliver_restart,
708 [IRQ_PEND_SIGP_STOP] = __deliver_stop,
709 [IRQ_PEND_SET_PREFIX] = __deliver_set_prefix,
710 [IRQ_PEND_PFAULT_INIT] = __deliver_pfault_init,
711};
712
713static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu,
714 struct kvm_s390_interrupt_info *inti)
715{
716 int rc;
316 717
317 switch (inti->type) { 718 switch (inti->type) {
318 case KVM_S390_INT_EMERGENCY:
319 VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
320 vcpu->stat.deliver_emergency_signal++;
321 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
322 inti->emerg.code, 0);
323 rc = put_guest_lc(vcpu, 0x1201, (u16 *)__LC_EXT_INT_CODE);
324 rc |= put_guest_lc(vcpu, inti->emerg.code,
325 (u16 *)__LC_EXT_CPU_ADDR);
326 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
327 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
328 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
329 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
330 break;
331 case KVM_S390_INT_EXTERNAL_CALL:
332 VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
333 vcpu->stat.deliver_external_call++;
334 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
335 inti->extcall.code, 0);
336 rc = put_guest_lc(vcpu, 0x1202, (u16 *)__LC_EXT_INT_CODE);
337 rc |= put_guest_lc(vcpu, inti->extcall.code,
338 (u16 *)__LC_EXT_CPU_ADDR);
339 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
340 &vcpu->arch.sie_block->gpsw,
341 sizeof(psw_t));
342 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
343 &vcpu->arch.sie_block->gpsw,
344 sizeof(psw_t));
345 break;
346 case KVM_S390_INT_CLOCK_COMP:
347 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
348 inti->ext.ext_params, 0);
349 rc = deliver_ckc_interrupt(vcpu);
350 break;
351 case KVM_S390_INT_CPU_TIMER:
352 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
353 inti->ext.ext_params, 0);
354 rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
355 (u16 *)__LC_EXT_INT_CODE);
356 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
357 &vcpu->arch.sie_block->gpsw,
358 sizeof(psw_t));
359 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
360 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
361 rc |= put_guest_lc(vcpu, inti->ext.ext_params,
362 (u32 *)__LC_EXT_PARAMS);
363 break;
364 case KVM_S390_INT_SERVICE: 719 case KVM_S390_INT_SERVICE:
365 VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", 720 rc = __deliver_service(vcpu, inti);
366 inti->ext.ext_params);
367 vcpu->stat.deliver_service_signal++;
368 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
369 inti->ext.ext_params, 0);
370 rc = put_guest_lc(vcpu, 0x2401, (u16 *)__LC_EXT_INT_CODE);
371 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
372 &vcpu->arch.sie_block->gpsw,
373 sizeof(psw_t));
374 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
375 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
376 rc |= put_guest_lc(vcpu, inti->ext.ext_params,
377 (u32 *)__LC_EXT_PARAMS);
378 break;
379 case KVM_S390_INT_PFAULT_INIT:
380 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
381 inti->ext.ext_params2);
382 rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
383 (u16 *) __LC_EXT_INT_CODE);
384 rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
385 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
386 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
387 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
388 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
389 rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
390 (u64 *) __LC_EXT_PARAMS2);
391 break; 721 break;
392 case KVM_S390_INT_PFAULT_DONE: 722 case KVM_S390_INT_PFAULT_DONE:
393 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, 723 rc = __deliver_pfault_done(vcpu, inti);
394 inti->ext.ext_params2);
395 rc = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
396 rc |= put_guest_lc(vcpu, 0x0680, (u16 *)__LC_EXT_CPU_ADDR);
397 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
398 &vcpu->arch.sie_block->gpsw,
399 sizeof(psw_t));
400 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
401 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
402 rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
403 (u64 *)__LC_EXT_PARAMS2);
404 break; 724 break;
405 case KVM_S390_INT_VIRTIO: 725 case KVM_S390_INT_VIRTIO:
406 VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", 726 rc = __deliver_virtio(vcpu, inti);
407 inti->ext.ext_params, inti->ext.ext_params2);
408 vcpu->stat.deliver_virtio_interrupt++;
409 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
410 inti->ext.ext_params,
411 inti->ext.ext_params2);
412 rc = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
413 rc |= put_guest_lc(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR);
414 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
415 &vcpu->arch.sie_block->gpsw,
416 sizeof(psw_t));
417 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
418 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
419 rc |= put_guest_lc(vcpu, inti->ext.ext_params,
420 (u32 *)__LC_EXT_PARAMS);
421 rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
422 (u64 *)__LC_EXT_PARAMS2);
423 break;
424 case KVM_S390_SIGP_STOP:
425 VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
426 vcpu->stat.deliver_stop_signal++;
427 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
428 0, 0);
429 __set_intercept_indicator(vcpu, inti);
430 break;
431
432 case KVM_S390_SIGP_SET_PREFIX:
433 VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
434 inti->prefix.address);
435 vcpu->stat.deliver_prefix_signal++;
436 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
437 inti->prefix.address, 0);
438 kvm_s390_set_prefix(vcpu, inti->prefix.address);
439 break;
440
441 case KVM_S390_RESTART:
442 VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
443 vcpu->stat.deliver_restart_signal++;
444 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
445 0, 0);
446 rc = write_guest_lc(vcpu,
447 offsetof(struct _lowcore, restart_old_psw),
448 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
449 rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
450 &vcpu->arch.sie_block->gpsw,
451 sizeof(psw_t));
452 break; 727 break;
453 case KVM_S390_PROGRAM_INT:
454 VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
455 inti->pgm.code,
456 table[vcpu->arch.sie_block->ipa >> 14]);
457 vcpu->stat.deliver_program_int++;
458 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
459 inti->pgm.code, 0);
460 rc = __deliver_prog_irq(vcpu, &inti->pgm);
461 break;
462
463 case KVM_S390_MCHK: 728 case KVM_S390_MCHK:
464 VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", 729 rc = __deliver_mchk_floating(vcpu, inti);
465 inti->mchk.mcic);
466 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
467 inti->mchk.cr14,
468 inti->mchk.mcic);
469 rc = kvm_s390_vcpu_store_status(vcpu,
470 KVM_S390_STORE_STATUS_PREFIXED);
471 rc |= put_guest_lc(vcpu, inti->mchk.mcic, (u64 *)__LC_MCCK_CODE);
472 rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
473 &vcpu->arch.sie_block->gpsw,
474 sizeof(psw_t));
475 rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
476 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
477 break; 730 break;
478
479 case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: 731 case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
480 { 732 rc = __deliver_io(vcpu, inti);
481 __u32 param0 = ((__u32)inti->io.subchannel_id << 16) |
482 inti->io.subchannel_nr;
483 __u64 param1 = ((__u64)inti->io.io_int_parm << 32) |
484 inti->io.io_int_word;
485 VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
486 vcpu->stat.deliver_io_int++;
487 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
488 param0, param1);
489 rc = put_guest_lc(vcpu, inti->io.subchannel_id,
490 (u16 *)__LC_SUBCHANNEL_ID);
491 rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
492 (u16 *)__LC_SUBCHANNEL_NR);
493 rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
494 (u32 *)__LC_IO_INT_PARM);
495 rc |= put_guest_lc(vcpu, inti->io.io_int_word,
496 (u32 *)__LC_IO_INT_WORD);
497 rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
498 &vcpu->arch.sie_block->gpsw,
499 sizeof(psw_t));
500 rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
501 &vcpu->arch.sie_block->gpsw,
502 sizeof(psw_t));
503 break; 733 break;
504 }
505 default: 734 default:
506 BUG(); 735 BUG();
507 } 736 }
@@ -509,19 +738,6 @@ static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu,
509 return rc; 738 return rc;
510} 739}
511 740
512static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
513{
514 int rc;
515
516 rc = put_guest_lc(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
517 rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
518 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
519 rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
520 &vcpu->arch.sie_block->gpsw,
521 sizeof(psw_t));
522 return rc;
523}
524
525/* Check whether SIGP interpretation facility has an external call pending */ 741/* Check whether SIGP interpretation facility has an external call pending */
526int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu) 742int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu)
527{ 743{
@@ -538,20 +754,11 @@ int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu)
538 754
539int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) 755int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
540{ 756{
541 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
542 struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; 757 struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
543 struct kvm_s390_interrupt_info *inti; 758 struct kvm_s390_interrupt_info *inti;
544 int rc = 0; 759 int rc;
545 760
546 if (atomic_read(&li->active)) { 761 rc = !!deliverable_local_irqs(vcpu);
547 spin_lock(&li->lock);
548 list_for_each_entry(inti, &li->list, list)
549 if (__interrupt_is_deliverable(vcpu, inti)) {
550 rc = 1;
551 break;
552 }
553 spin_unlock(&li->lock);
554 }
555 762
556 if ((!rc) && atomic_read(&fi->active)) { 763 if ((!rc) && atomic_read(&fi->active)) {
557 spin_lock(&fi->lock); 764 spin_lock(&fi->lock);
@@ -643,18 +850,15 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
643void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) 850void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
644{ 851{
645 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 852 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
646 struct kvm_s390_interrupt_info *n, *inti = NULL;
647 853
648 spin_lock(&li->lock); 854 spin_lock(&li->lock);
649 list_for_each_entry_safe(inti, n, &li->list, list) { 855 li->pending_irqs = 0;
650 list_del(&inti->list); 856 bitmap_zero(li->sigp_emerg_pending, KVM_MAX_VCPUS);
651 kfree(inti); 857 memset(&li->irq, 0, sizeof(li->irq));
652 }
653 atomic_set(&li->active, 0);
654 spin_unlock(&li->lock); 858 spin_unlock(&li->lock);
655 859
656 /* clear pending external calls set by sigp interpretation facility */ 860 /* clear pending external calls set by sigp interpretation facility */
657 atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); 861 atomic_clear_mask(CPUSTAT_ECALL_PEND, li->cpuflags);
658 atomic_clear_mask(SIGP_CTRL_C, 862 atomic_clear_mask(SIGP_CTRL_C,
659 &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl); 863 &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
660} 864}
@@ -664,34 +868,35 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
664 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 868 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
665 struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; 869 struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
666 struct kvm_s390_interrupt_info *n, *inti = NULL; 870 struct kvm_s390_interrupt_info *n, *inti = NULL;
871 deliver_irq_t func;
667 int deliver; 872 int deliver;
668 int rc = 0; 873 int rc = 0;
874 unsigned long irq_type;
875 unsigned long deliverable_irqs;
669 876
670 __reset_intercept_indicators(vcpu); 877 __reset_intercept_indicators(vcpu);
671 if (atomic_read(&li->active)) {
672 do {
673 deliver = 0;
674 spin_lock(&li->lock);
675 list_for_each_entry_safe(inti, n, &li->list, list) {
676 if (__interrupt_is_deliverable(vcpu, inti)) {
677 list_del(&inti->list);
678 deliver = 1;
679 break;
680 }
681 __set_intercept_indicator(vcpu, inti);
682 }
683 if (list_empty(&li->list))
684 atomic_set(&li->active, 0);
685 spin_unlock(&li->lock);
686 if (deliver) {
687 rc = __do_deliver_interrupt(vcpu, inti);
688 kfree(inti);
689 }
690 } while (!rc && deliver);
691 }
692 878
693 if (!rc && kvm_cpu_has_pending_timer(vcpu)) 879 /* pending ckc conditions might have been invalidated */
694 rc = deliver_ckc_interrupt(vcpu); 880 clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
881 if (kvm_cpu_has_pending_timer(vcpu))
882 set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
883
884 do {
885 deliverable_irqs = deliverable_local_irqs(vcpu);
886 /* bits are in the order of interrupt priority */
887 irq_type = find_first_bit(&deliverable_irqs, IRQ_PEND_COUNT);
888 if (irq_type == IRQ_PEND_COUNT)
889 break;
890 func = deliver_irq_funcs[irq_type];
891 if (!func) {
892 WARN_ON_ONCE(func == NULL);
893 clear_bit(irq_type, &li->pending_irqs);
894 continue;
895 }
896 rc = func(vcpu);
897 } while (!rc && irq_type != IRQ_PEND_COUNT);
898
899 set_intercept_indicators_local(vcpu);
695 900
696 if (!rc && atomic_read(&fi->active)) { 901 if (!rc && atomic_read(&fi->active)) {
697 do { 902 do {
@@ -710,7 +915,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
710 atomic_set(&fi->active, 0); 915 atomic_set(&fi->active, 0);
711 spin_unlock(&fi->lock); 916 spin_unlock(&fi->lock);
712 if (deliver) { 917 if (deliver) {
713 rc = __do_deliver_interrupt(vcpu, inti); 918 rc = __deliver_floating_interrupt(vcpu, inti);
714 kfree(inti); 919 kfree(inti);
715 } 920 }
716 } while (!rc && deliver); 921 } while (!rc && deliver);
@@ -719,23 +924,26 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
719 return rc; 924 return rc;
720} 925}
721 926
722int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) 927static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
723{ 928{
724 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 929 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
725 struct kvm_s390_interrupt_info *inti;
726 930
727 inti = kzalloc(sizeof(*inti), GFP_KERNEL); 931 li->irq.pgm = irq->u.pgm;
728 if (!inti) 932 set_bit(IRQ_PEND_PROG, &li->pending_irqs);
729 return -ENOMEM; 933 return 0;
934}
730 935
731 inti->type = KVM_S390_PROGRAM_INT; 936int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
732 inti->pgm.code = code; 937{
938 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
939 struct kvm_s390_irq irq;
733 940
734 VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code); 941 VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
735 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, inti->type, code, 0, 1); 942 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, code,
943 0, 1);
736 spin_lock(&li->lock); 944 spin_lock(&li->lock);
737 list_add(&inti->list, &li->list); 945 irq.u.pgm.code = code;
738 atomic_set(&li->active, 1); 946 __inject_prog(vcpu, &irq);
739 BUG_ON(waitqueue_active(li->wq)); 947 BUG_ON(waitqueue_active(li->wq));
740 spin_unlock(&li->lock); 948 spin_unlock(&li->lock);
741 return 0; 949 return 0;
@@ -745,27 +953,166 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
745 struct kvm_s390_pgm_info *pgm_info) 953 struct kvm_s390_pgm_info *pgm_info)
746{ 954{
747 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 955 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
748 struct kvm_s390_interrupt_info *inti; 956 struct kvm_s390_irq irq;
749 957 int rc;
750 inti = kzalloc(sizeof(*inti), GFP_KERNEL);
751 if (!inti)
752 return -ENOMEM;
753 958
754 VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)", 959 VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
755 pgm_info->code); 960 pgm_info->code);
756 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, 961 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
757 pgm_info->code, 0, 1); 962 pgm_info->code, 0, 1);
758
759 inti->type = KVM_S390_PROGRAM_INT;
760 memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm));
761 spin_lock(&li->lock); 963 spin_lock(&li->lock);
762 list_add(&inti->list, &li->list); 964 irq.u.pgm = *pgm_info;
763 atomic_set(&li->active, 1); 965 rc = __inject_prog(vcpu, &irq);
764 BUG_ON(waitqueue_active(li->wq)); 966 BUG_ON(waitqueue_active(li->wq));
765 spin_unlock(&li->lock); 967 spin_unlock(&li->lock);
968 return rc;
969}
970
971static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
972{
973 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
974
975 VCPU_EVENT(vcpu, 3, "inject: external irq params:%x, params2:%llx",
976 irq->u.ext.ext_params, irq->u.ext.ext_params2);
977 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT,
978 irq->u.ext.ext_params,
979 irq->u.ext.ext_params2, 2);
980
981 li->irq.ext = irq->u.ext;
982 set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
983 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
766 return 0; 984 return 0;
767} 985}
768 986
987int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
988{
989 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
990 struct kvm_s390_extcall_info *extcall = &li->irq.extcall;
991
992 VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
993 irq->u.extcall.code);
994 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL,
995 irq->u.extcall.code, 0, 2);
996
997 *extcall = irq->u.extcall;
998 set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
999 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
1000 return 0;
1001}
1002
1003static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
1004{
1005 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
1006 struct kvm_s390_prefix_info *prefix = &li->irq.prefix;
1007
1008 VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
1009 prefix->address);
1010 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX,
1011 prefix->address, 0, 2);
1012
1013 *prefix = irq->u.prefix;
1014 set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
1015 return 0;
1016}
1017
1018static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
1019{
1020 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
1021
1022 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2);
1023
1024 li->action_bits |= ACTION_STOP_ON_STOP;
1025 set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
1026 return 0;
1027}
1028
1029static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
1030 struct kvm_s390_irq *irq)
1031{
1032 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
1033
1034 VCPU_EVENT(vcpu, 3, "inject: restart type %llx", irq->type);
1035 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0, 2);
1036
1037 set_bit(IRQ_PEND_RESTART, &li->pending_irqs);
1038 return 0;
1039}
1040
1041static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
1042 struct kvm_s390_irq *irq)
1043{
1044 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
1045 struct kvm_s390_emerg_info *emerg = &li->irq.emerg;
1046
1047 VCPU_EVENT(vcpu, 3, "inject: emergency %u\n",
1048 irq->u.emerg.code);
1049 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
1050 emerg->code, 0, 2);
1051
1052 set_bit(emerg->code, li->sigp_emerg_pending);
1053 set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
1054 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
1055 return 0;
1056}
1057
1058static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
1059{
1060 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
1061 struct kvm_s390_mchk_info *mchk = &li->irq.mchk;
1062
1063 VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
1064 mchk->mcic);
1065 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0,
1066 mchk->mcic, 2);
1067
1068 /*
1069 * Because repressible machine checks can be indicated along with
1070 * exigent machine checks (PoP, Chapter 11, Interruption action)
1071 * we need to combine cr14, mcic and external damage code.
1072 * Failing storage address and the logout area should not be or'ed
1073 * together, we just indicate the last occurrence of the corresponding
1074 * machine check
1075 */
1076 mchk->cr14 |= irq->u.mchk.cr14;
1077 mchk->mcic |= irq->u.mchk.mcic;
1078 mchk->ext_damage_code |= irq->u.mchk.ext_damage_code;
1079 mchk->failing_storage_address = irq->u.mchk.failing_storage_address;
1080 memcpy(&mchk->fixed_logout, &irq->u.mchk.fixed_logout,
1081 sizeof(mchk->fixed_logout));
1082 if (mchk->mcic & MCHK_EX_MASK)
1083 set_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
1084 else if (mchk->mcic & MCHK_REP_MASK)
1085 set_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
1086 return 0;
1087}
1088
1089static int __inject_ckc(struct kvm_vcpu *vcpu)
1090{
1091 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
1092
1093 VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CLOCK_COMP);
1094 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
1095 0, 0, 2);
1096
1097 set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
1098 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
1099 return 0;
1100}
1101
1102static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
1103{
1104 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
1105
1106 VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CPU_TIMER);
1107 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
1108 0, 0, 2);
1109
1110 set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
1111 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
1112 return 0;
1113}
1114
1115
769struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, 1116struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
770 u64 cr6, u64 schid) 1117 u64 cr6, u64 schid)
771{ 1118{
@@ -851,7 +1198,17 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
851 dst_vcpu = kvm_get_vcpu(kvm, sigcpu); 1198 dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
852 li = &dst_vcpu->arch.local_int; 1199 li = &dst_vcpu->arch.local_int;
853 spin_lock(&li->lock); 1200 spin_lock(&li->lock);
854 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); 1201 switch (inti->type) {
1202 case KVM_S390_MCHK:
1203 atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
1204 break;
1205 case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
1206 atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
1207 break;
1208 default:
1209 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
1210 break;
1211 }
855 spin_unlock(&li->lock); 1212 spin_unlock(&li->lock);
856 kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu)); 1213 kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
857unlock_fi: 1214unlock_fi:
@@ -920,92 +1277,85 @@ void kvm_s390_reinject_io_int(struct kvm *kvm,
920 __inject_vm(kvm, inti); 1277 __inject_vm(kvm, inti);
921} 1278}
922 1279
923int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, 1280int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
924 struct kvm_s390_interrupt *s390int) 1281 struct kvm_s390_irq *irq)
925{ 1282{
926 struct kvm_s390_local_interrupt *li; 1283 irq->type = s390int->type;
927 struct kvm_s390_interrupt_info *inti; 1284 switch (irq->type) {
1285 case KVM_S390_PROGRAM_INT:
1286 if (s390int->parm & 0xffff0000)
1287 return -EINVAL;
1288 irq->u.pgm.code = s390int->parm;
1289 break;
1290 case KVM_S390_SIGP_SET_PREFIX:
1291 irq->u.prefix.address = s390int->parm;
1292 break;
1293 case KVM_S390_INT_EXTERNAL_CALL:
1294 if (irq->u.extcall.code & 0xffff0000)
1295 return -EINVAL;
1296 irq->u.extcall.code = s390int->parm;
1297 break;
1298 case KVM_S390_INT_EMERGENCY:
1299 if (irq->u.emerg.code & 0xffff0000)
1300 return -EINVAL;
1301 irq->u.emerg.code = s390int->parm;
1302 break;
1303 case KVM_S390_MCHK:
1304 irq->u.mchk.mcic = s390int->parm64;
1305 break;
1306 }
1307 return 0;
1308}
928 1309
929 inti = kzalloc(sizeof(*inti), GFP_KERNEL); 1310int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
930 if (!inti) 1311{
931 return -ENOMEM; 1312 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
1313 int rc;
932 1314
933 switch (s390int->type) { 1315 spin_lock(&li->lock);
1316 switch (irq->type) {
934 case KVM_S390_PROGRAM_INT: 1317 case KVM_S390_PROGRAM_INT:
935 if (s390int->parm & 0xffff0000) {
936 kfree(inti);
937 return -EINVAL;
938 }
939 inti->type = s390int->type;
940 inti->pgm.code = s390int->parm;
941 VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)", 1318 VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
942 s390int->parm); 1319 irq->u.pgm.code);
1320 rc = __inject_prog(vcpu, irq);
943 break; 1321 break;
944 case KVM_S390_SIGP_SET_PREFIX: 1322 case KVM_S390_SIGP_SET_PREFIX:
945 inti->prefix.address = s390int->parm; 1323 rc = __inject_set_prefix(vcpu, irq);
946 inti->type = s390int->type;
947 VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
948 s390int->parm);
949 break; 1324 break;
950 case KVM_S390_SIGP_STOP: 1325 case KVM_S390_SIGP_STOP:
1326 rc = __inject_sigp_stop(vcpu, irq);
1327 break;
951 case KVM_S390_RESTART: 1328 case KVM_S390_RESTART:
1329 rc = __inject_sigp_restart(vcpu, irq);
1330 break;
952 case KVM_S390_INT_CLOCK_COMP: 1331 case KVM_S390_INT_CLOCK_COMP:
1332 rc = __inject_ckc(vcpu);
1333 break;
953 case KVM_S390_INT_CPU_TIMER: 1334 case KVM_S390_INT_CPU_TIMER:
954 VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); 1335 rc = __inject_cpu_timer(vcpu);
955 inti->type = s390int->type;
956 break; 1336 break;
957 case KVM_S390_INT_EXTERNAL_CALL: 1337 case KVM_S390_INT_EXTERNAL_CALL:
958 if (s390int->parm & 0xffff0000) { 1338 rc = __inject_extcall(vcpu, irq);
959 kfree(inti);
960 return -EINVAL;
961 }
962 VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
963 s390int->parm);
964 inti->type = s390int->type;
965 inti->extcall.code = s390int->parm;
966 break; 1339 break;
967 case KVM_S390_INT_EMERGENCY: 1340 case KVM_S390_INT_EMERGENCY:
968 if (s390int->parm & 0xffff0000) { 1341 rc = __inject_sigp_emergency(vcpu, irq);
969 kfree(inti);
970 return -EINVAL;
971 }
972 VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", s390int->parm);
973 inti->type = s390int->type;
974 inti->emerg.code = s390int->parm;
975 break; 1342 break;
976 case KVM_S390_MCHK: 1343 case KVM_S390_MCHK:
977 VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx", 1344 rc = __inject_mchk(vcpu, irq);
978 s390int->parm64);
979 inti->type = s390int->type;
980 inti->mchk.mcic = s390int->parm64;
981 break; 1345 break;
982 case KVM_S390_INT_PFAULT_INIT: 1346 case KVM_S390_INT_PFAULT_INIT:
983 inti->type = s390int->type; 1347 rc = __inject_pfault_init(vcpu, irq);
984 inti->ext.ext_params2 = s390int->parm64;
985 break; 1348 break;
986 case KVM_S390_INT_VIRTIO: 1349 case KVM_S390_INT_VIRTIO:
987 case KVM_S390_INT_SERVICE: 1350 case KVM_S390_INT_SERVICE:
988 case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: 1351 case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
989 default: 1352 default:
990 kfree(inti); 1353 rc = -EINVAL;
991 return -EINVAL;
992 } 1354 }
993 trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm,
994 s390int->parm64, 2);
995
996 li = &vcpu->arch.local_int;
997 spin_lock(&li->lock);
998 if (inti->type == KVM_S390_PROGRAM_INT)
999 list_add(&inti->list, &li->list);
1000 else
1001 list_add_tail(&inti->list, &li->list);
1002 atomic_set(&li->active, 1);
1003 if (inti->type == KVM_S390_SIGP_STOP)
1004 li->action_bits |= ACTION_STOP_ON_STOP;
1005 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
1006 spin_unlock(&li->lock); 1355 spin_unlock(&li->lock);
1007 kvm_s390_vcpu_wakeup(vcpu); 1356 if (!rc)
1008 return 0; 1357 kvm_s390_vcpu_wakeup(vcpu);
1358 return rc;
1009} 1359}
1010 1360
1011void kvm_s390_clear_float_irqs(struct kvm *kvm) 1361void kvm_s390_clear_float_irqs(struct kvm *kvm)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6b049ee75a56..3e09801e3104 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -81,10 +81,17 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
81 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, 81 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
82 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, 82 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
83 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, 83 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
84 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
85 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
84 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, 86 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
87 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
88 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
85 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, 89 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
86 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, 90 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
87 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, 91 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
92 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
93 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
94 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
88 { "diagnose_10", VCPU_STAT(diagnose_10) }, 95 { "diagnose_10", VCPU_STAT(diagnose_10) },
89 { "diagnose_44", VCPU_STAT(diagnose_44) }, 96 { "diagnose_44", VCPU_STAT(diagnose_44) },
90 { "diagnose_9c", VCPU_STAT(diagnose_9c) }, 97 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
@@ -453,6 +460,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
453 spin_lock_init(&kvm->arch.float_int.lock); 460 spin_lock_init(&kvm->arch.float_int.lock);
454 INIT_LIST_HEAD(&kvm->arch.float_int.list); 461 INIT_LIST_HEAD(&kvm->arch.float_int.list);
455 init_waitqueue_head(&kvm->arch.ipte_wq); 462 init_waitqueue_head(&kvm->arch.ipte_wq);
463 mutex_init(&kvm->arch.ipte_mutex);
456 464
457 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 465 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
458 VM_EVENT(kvm, 3, "%s", "vm created"); 466 VM_EVENT(kvm, 3, "%s", "vm created");
@@ -711,7 +719,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
711 } 719 }
712 720
713 spin_lock_init(&vcpu->arch.local_int.lock); 721 spin_lock_init(&vcpu->arch.local_int.lock);
714 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
715 vcpu->arch.local_int.float_int = &kvm->arch.float_int; 722 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
716 vcpu->arch.local_int.wq = &vcpu->wq; 723 vcpu->arch.local_int.wq = &vcpu->wq;
717 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; 724 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
@@ -1114,13 +1121,15 @@ static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1114 unsigned long token) 1121 unsigned long token)
1115{ 1122{
1116 struct kvm_s390_interrupt inti; 1123 struct kvm_s390_interrupt inti;
1117 inti.parm64 = token; 1124 struct kvm_s390_irq irq;
1118 1125
1119 if (start_token) { 1126 if (start_token) {
1120 inti.type = KVM_S390_INT_PFAULT_INIT; 1127 irq.u.ext.ext_params2 = token;
1121 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti)); 1128 irq.type = KVM_S390_INT_PFAULT_INIT;
1129 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1122 } else { 1130 } else {
1123 inti.type = KVM_S390_INT_PFAULT_DONE; 1131 inti.type = KVM_S390_INT_PFAULT_DONE;
1132 inti.parm64 = token;
1124 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 1133 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1125 } 1134 }
1126} 1135}
@@ -1614,11 +1623,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1614 switch (ioctl) { 1623 switch (ioctl) {
1615 case KVM_S390_INTERRUPT: { 1624 case KVM_S390_INTERRUPT: {
1616 struct kvm_s390_interrupt s390int; 1625 struct kvm_s390_interrupt s390int;
1626 struct kvm_s390_irq s390irq;
1617 1627
1618 r = -EFAULT; 1628 r = -EFAULT;
1619 if (copy_from_user(&s390int, argp, sizeof(s390int))) 1629 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1620 break; 1630 break;
1621 r = kvm_s390_inject_vcpu(vcpu, &s390int); 1631 if (s390int_to_s390irq(&s390int, &s390irq))
1632 return -EINVAL;
1633 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
1622 break; 1634 break;
1623 } 1635 }
1624 case KVM_S390_STORE_STATUS: 1636 case KVM_S390_STORE_STATUS:
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 244d02303182..a8f3d9b71c11 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -24,8 +24,6 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
24/* declare vfacilities extern */ 24/* declare vfacilities extern */
25extern unsigned long *vfacilities; 25extern unsigned long *vfacilities;
26 26
27int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
28
29/* Transactional Memory Execution related macros */ 27/* Transactional Memory Execution related macros */
30#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10)) 28#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10))
31#define TDB_FORMAT1 1 29#define TDB_FORMAT1 1
@@ -144,7 +142,7 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm);
144int __must_check kvm_s390_inject_vm(struct kvm *kvm, 142int __must_check kvm_s390_inject_vm(struct kvm *kvm,
145 struct kvm_s390_interrupt *s390int); 143 struct kvm_s390_interrupt *s390int);
146int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, 144int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
147 struct kvm_s390_interrupt *s390int); 145 struct kvm_s390_irq *irq);
148int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); 146int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
149struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, 147struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
150 u64 cr6, u64 schid); 148 u64 cr6, u64 schid);
@@ -152,6 +150,10 @@ void kvm_s390_reinject_io_int(struct kvm *kvm,
152 struct kvm_s390_interrupt_info *inti); 150 struct kvm_s390_interrupt_info *inti);
153int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); 151int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
154 152
153/* implemented in intercept.c */
154void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc);
155int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
156
155/* implemented in priv.c */ 157/* implemented in priv.c */
156int is_valid_psw(psw_t *psw); 158int is_valid_psw(psw_t *psw);
157int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); 159int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
@@ -222,6 +224,9 @@ static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc)
222 return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 224 return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
223} 225}
224 226
227int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
228 struct kvm_s390_irq *s390irq);
229
225/* implemented in interrupt.c */ 230/* implemented in interrupt.c */
226int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); 231int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
227int psw_extint_disabled(struct kvm_vcpu *vcpu); 232int psw_extint_disabled(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index f47cb0c6d906..1be578d64dfc 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -180,21 +180,18 @@ static int handle_skey(struct kvm_vcpu *vcpu)
180 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 180 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
181 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 181 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
182 182
183 vcpu->arch.sie_block->gpsw.addr = 183 kvm_s390_rewind_psw(vcpu, 4);
184 __rewind_psw(vcpu->arch.sie_block->gpsw, 4);
185 VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); 184 VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
186 return 0; 185 return 0;
187} 186}
188 187
189static int handle_ipte_interlock(struct kvm_vcpu *vcpu) 188static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
190{ 189{
191 psw_t *psw = &vcpu->arch.sie_block->gpsw;
192
193 vcpu->stat.instruction_ipte_interlock++; 190 vcpu->stat.instruction_ipte_interlock++;
194 if (psw_bits(*psw).p) 191 if (psw_bits(vcpu->arch.sie_block->gpsw).p)
195 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 192 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
196 wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu)); 193 wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
197 psw->addr = __rewind_psw(*psw, 4); 194 kvm_s390_rewind_psw(vcpu, 4);
198 VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation"); 195 VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
199 return 0; 196 return 0;
200} 197}
@@ -650,10 +647,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
650 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 647 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
651 648
652 start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; 649 start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
653 if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { 650 start = kvm_s390_logical_to_effective(vcpu, start);
654 if (kvm_s390_check_low_addr_protection(vcpu, start))
655 return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
656 }
657 651
658 switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { 652 switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
659 case 0x00000000: 653 case 0x00000000:
@@ -669,6 +663,12 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
669 default: 663 default:
670 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 664 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
671 } 665 }
666
667 if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
668 if (kvm_s390_check_low_addr_protection(vcpu, start))
669 return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
670 }
671
672 while (start < end) { 672 while (start < end) {
673 unsigned long useraddr, abs_addr; 673 unsigned long useraddr, abs_addr;
674 674
@@ -725,8 +725,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
725 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 725 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
726 726
727 /* Rewind PSW to repeat the ESSA instruction */ 727 /* Rewind PSW to repeat the ESSA instruction */
728 vcpu->arch.sie_block->gpsw.addr = 728 kvm_s390_rewind_psw(vcpu, 4);
729 __rewind_psw(vcpu->arch.sie_block->gpsw, 4);
730 vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ 729 vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
731 cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo); 730 cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
732 down_read(&gmap->mm->mmap_sem); 731 down_read(&gmap->mm->mmap_sem);
@@ -769,8 +768,8 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
769{ 768{
770 int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; 769 int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
771 int reg3 = vcpu->arch.sie_block->ipa & 0x000f; 770 int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
772 u32 val = 0; 771 int reg, rc, nr_regs;
773 int reg, rc; 772 u32 ctl_array[16];
774 u64 ga; 773 u64 ga;
775 774
776 vcpu->stat.instruction_lctl++; 775 vcpu->stat.instruction_lctl++;
@@ -786,19 +785,20 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
786 VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); 785 VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
787 trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga); 786 trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
788 787
788 nr_regs = ((reg3 - reg1) & 0xf) + 1;
789 rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
790 if (rc)
791 return kvm_s390_inject_prog_cond(vcpu, rc);
789 reg = reg1; 792 reg = reg1;
793 nr_regs = 0;
790 do { 794 do {
791 rc = read_guest(vcpu, ga, &val, sizeof(val));
792 if (rc)
793 return kvm_s390_inject_prog_cond(vcpu, rc);
794 vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; 795 vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
795 vcpu->arch.sie_block->gcr[reg] |= val; 796 vcpu->arch.sie_block->gcr[reg] |= ctl_array[nr_regs++];
796 ga += 4;
797 if (reg == reg3) 797 if (reg == reg3)
798 break; 798 break;
799 reg = (reg + 1) % 16; 799 reg = (reg + 1) % 16;
800 } while (1); 800 } while (1);
801 801 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
802 return 0; 802 return 0;
803} 803}
804 804
@@ -806,9 +806,9 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
806{ 806{
807 int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; 807 int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
808 int reg3 = vcpu->arch.sie_block->ipa & 0x000f; 808 int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
809 int reg, rc, nr_regs;
810 u32 ctl_array[16];
809 u64 ga; 811 u64 ga;
810 u32 val;
811 int reg, rc;
812 812
813 vcpu->stat.instruction_stctl++; 813 vcpu->stat.instruction_stctl++;
814 814
@@ -824,26 +824,24 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
824 trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga); 824 trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
825 825
826 reg = reg1; 826 reg = reg1;
827 nr_regs = 0;
827 do { 828 do {
828 val = vcpu->arch.sie_block->gcr[reg] & 0x00000000fffffffful; 829 ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg];
829 rc = write_guest(vcpu, ga, &val, sizeof(val));
830 if (rc)
831 return kvm_s390_inject_prog_cond(vcpu, rc);
832 ga += 4;
833 if (reg == reg3) 830 if (reg == reg3)
834 break; 831 break;
835 reg = (reg + 1) % 16; 832 reg = (reg + 1) % 16;
836 } while (1); 833 } while (1);
837 834 rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
838 return 0; 835 return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
839} 836}
840 837
841static int handle_lctlg(struct kvm_vcpu *vcpu) 838static int handle_lctlg(struct kvm_vcpu *vcpu)
842{ 839{
843 int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; 840 int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
844 int reg3 = vcpu->arch.sie_block->ipa & 0x000f; 841 int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
845 u64 ga, val; 842 int reg, rc, nr_regs;
846 int reg, rc; 843 u64 ctl_array[16];
844 u64 ga;
847 845
848 vcpu->stat.instruction_lctlg++; 846 vcpu->stat.instruction_lctlg++;
849 847
@@ -855,22 +853,22 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
855 if (ga & 7) 853 if (ga & 7)
856 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 854 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
857 855
858 reg = reg1;
859
860 VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); 856 VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
861 trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga); 857 trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
862 858
859 nr_regs = ((reg3 - reg1) & 0xf) + 1;
860 rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
861 if (rc)
862 return kvm_s390_inject_prog_cond(vcpu, rc);
863 reg = reg1;
864 nr_regs = 0;
863 do { 865 do {
864 rc = read_guest(vcpu, ga, &val, sizeof(val)); 866 vcpu->arch.sie_block->gcr[reg] = ctl_array[nr_regs++];
865 if (rc)
866 return kvm_s390_inject_prog_cond(vcpu, rc);
867 vcpu->arch.sie_block->gcr[reg] = val;
868 ga += 8;
869 if (reg == reg3) 867 if (reg == reg3)
870 break; 868 break;
871 reg = (reg + 1) % 16; 869 reg = (reg + 1) % 16;
872 } while (1); 870 } while (1);
873 871 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
874 return 0; 872 return 0;
875} 873}
876 874
@@ -878,8 +876,9 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
878{ 876{
879 int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; 877 int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
880 int reg3 = vcpu->arch.sie_block->ipa & 0x000f; 878 int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
881 u64 ga, val; 879 int reg, rc, nr_regs;
882 int reg, rc; 880 u64 ctl_array[16];
881 u64 ga;
883 882
884 vcpu->stat.instruction_stctg++; 883 vcpu->stat.instruction_stctg++;
885 884
@@ -891,23 +890,19 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
891 if (ga & 7) 890 if (ga & 7)
892 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 891 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
893 892
894 reg = reg1;
895
896 VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); 893 VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
897 trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga); 894 trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
898 895
896 reg = reg1;
897 nr_regs = 0;
899 do { 898 do {
900 val = vcpu->arch.sie_block->gcr[reg]; 899 ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg];
901 rc = write_guest(vcpu, ga, &val, sizeof(val));
902 if (rc)
903 return kvm_s390_inject_prog_cond(vcpu, rc);
904 ga += 8;
905 if (reg == reg3) 900 if (reg == reg3)
906 break; 901 break;
907 reg = (reg + 1) % 16; 902 reg = (reg + 1) % 16;
908 } while (1); 903 } while (1);
909 904 rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
910 return 0; 905 return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
911} 906}
912 907
913static const intercept_handler_t eb_handlers[256] = { 908static const intercept_handler_t eb_handlers[256] = {
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index cf243ba3d50f..6651f9f73973 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -20,20 +20,13 @@
20#include "kvm-s390.h" 20#include "kvm-s390.h"
21#include "trace.h" 21#include "trace.h"
22 22
23static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, 23static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
24 u64 *reg) 24 u64 *reg)
25{ 25{
26 struct kvm_s390_local_interrupt *li; 26 struct kvm_s390_local_interrupt *li;
27 struct kvm_vcpu *dst_vcpu = NULL;
28 int cpuflags; 27 int cpuflags;
29 int rc; 28 int rc;
30 29
31 if (cpu_addr >= KVM_MAX_VCPUS)
32 return SIGP_CC_NOT_OPERATIONAL;
33
34 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
35 if (!dst_vcpu)
36 return SIGP_CC_NOT_OPERATIONAL;
37 li = &dst_vcpu->arch.local_int; 30 li = &dst_vcpu->arch.local_int;
38 31
39 cpuflags = atomic_read(li->cpuflags); 32 cpuflags = atomic_read(li->cpuflags);
@@ -48,55 +41,53 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
48 rc = SIGP_CC_STATUS_STORED; 41 rc = SIGP_CC_STATUS_STORED;
49 } 42 }
50 43
51 VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc); 44 VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", dst_vcpu->vcpu_id,
45 rc);
52 return rc; 46 return rc;
53} 47}
54 48
55static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) 49static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
50 struct kvm_vcpu *dst_vcpu)
56{ 51{
57 struct kvm_s390_interrupt s390int = { 52 struct kvm_s390_irq irq = {
58 .type = KVM_S390_INT_EMERGENCY, 53 .type = KVM_S390_INT_EMERGENCY,
59 .parm = vcpu->vcpu_id, 54 .u.emerg.code = vcpu->vcpu_id,
60 }; 55 };
61 struct kvm_vcpu *dst_vcpu = NULL;
62 int rc = 0; 56 int rc = 0;
63 57
64 if (cpu_addr < KVM_MAX_VCPUS) 58 rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
65 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
66 if (!dst_vcpu)
67 return SIGP_CC_NOT_OPERATIONAL;
68
69 rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
70 if (!rc) 59 if (!rc)
71 VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); 60 VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x",
61 dst_vcpu->vcpu_id);
72 62
73 return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED; 63 return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
74} 64}
75 65
76static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr, 66static int __sigp_emergency(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
67{
68 return __inject_sigp_emergency(vcpu, dst_vcpu);
69}
70
71static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu,
72 struct kvm_vcpu *dst_vcpu,
77 u16 asn, u64 *reg) 73 u16 asn, u64 *reg)
78{ 74{
79 struct kvm_vcpu *dst_vcpu = NULL;
80 const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT; 75 const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT;
81 u16 p_asn, s_asn; 76 u16 p_asn, s_asn;
82 psw_t *psw; 77 psw_t *psw;
83 u32 flags; 78 u32 flags;
84 79
85 if (cpu_addr < KVM_MAX_VCPUS)
86 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
87 if (!dst_vcpu)
88 return SIGP_CC_NOT_OPERATIONAL;
89 flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags); 80 flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags);
90 psw = &dst_vcpu->arch.sie_block->gpsw; 81 psw = &dst_vcpu->arch.sie_block->gpsw;
91 p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff; /* Primary ASN */ 82 p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff; /* Primary ASN */
92 s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff; /* Secondary ASN */ 83 s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff; /* Secondary ASN */
93 84
94 /* Deliver the emergency signal? */ 85 /* Inject the emergency signal? */
95 if (!(flags & CPUSTAT_STOPPED) 86 if (!(flags & CPUSTAT_STOPPED)
96 || (psw->mask & psw_int_mask) != psw_int_mask 87 || (psw->mask & psw_int_mask) != psw_int_mask
97 || ((flags & CPUSTAT_WAIT) && psw->addr != 0) 88 || ((flags & CPUSTAT_WAIT) && psw->addr != 0)
98 || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) { 89 || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) {
99 return __sigp_emergency(vcpu, cpu_addr); 90 return __inject_sigp_emergency(vcpu, dst_vcpu);
100 } else { 91 } else {
101 *reg &= 0xffffffff00000000UL; 92 *reg &= 0xffffffff00000000UL;
102 *reg |= SIGP_STATUS_INCORRECT_STATE; 93 *reg |= SIGP_STATUS_INCORRECT_STATE;
@@ -104,23 +95,19 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
104 } 95 }
105} 96}
106 97
107static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) 98static int __sigp_external_call(struct kvm_vcpu *vcpu,
99 struct kvm_vcpu *dst_vcpu)
108{ 100{
109 struct kvm_s390_interrupt s390int = { 101 struct kvm_s390_irq irq = {
110 .type = KVM_S390_INT_EXTERNAL_CALL, 102 .type = KVM_S390_INT_EXTERNAL_CALL,
111 .parm = vcpu->vcpu_id, 103 .u.extcall.code = vcpu->vcpu_id,
112 }; 104 };
113 struct kvm_vcpu *dst_vcpu = NULL;
114 int rc; 105 int rc;
115 106
116 if (cpu_addr < KVM_MAX_VCPUS) 107 rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
117 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
118 if (!dst_vcpu)
119 return SIGP_CC_NOT_OPERATIONAL;
120
121 rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
122 if (!rc) 108 if (!rc)
123 VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); 109 VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x",
110 dst_vcpu->vcpu_id);
124 111
125 return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED; 112 return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
126} 113}
@@ -128,29 +115,20 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
128static int __inject_sigp_stop(struct kvm_vcpu *dst_vcpu, int action) 115static int __inject_sigp_stop(struct kvm_vcpu *dst_vcpu, int action)
129{ 116{
130 struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int; 117 struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
131 struct kvm_s390_interrupt_info *inti;
132 int rc = SIGP_CC_ORDER_CODE_ACCEPTED; 118 int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
133 119
134 inti = kzalloc(sizeof(*inti), GFP_ATOMIC);
135 if (!inti)
136 return -ENOMEM;
137 inti->type = KVM_S390_SIGP_STOP;
138
139 spin_lock(&li->lock); 120 spin_lock(&li->lock);
140 if (li->action_bits & ACTION_STOP_ON_STOP) { 121 if (li->action_bits & ACTION_STOP_ON_STOP) {
141 /* another SIGP STOP is pending */ 122 /* another SIGP STOP is pending */
142 kfree(inti);
143 rc = SIGP_CC_BUSY; 123 rc = SIGP_CC_BUSY;
144 goto out; 124 goto out;
145 } 125 }
146 if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { 126 if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
147 kfree(inti);
148 if ((action & ACTION_STORE_ON_STOP) != 0) 127 if ((action & ACTION_STORE_ON_STOP) != 0)
149 rc = -ESHUTDOWN; 128 rc = -ESHUTDOWN;
150 goto out; 129 goto out;
151 } 130 }
152 list_add_tail(&inti->list, &li->list); 131 set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
153 atomic_set(&li->active, 1);
154 li->action_bits |= action; 132 li->action_bits |= action;
155 atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); 133 atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
156 kvm_s390_vcpu_wakeup(dst_vcpu); 134 kvm_s390_vcpu_wakeup(dst_vcpu);
@@ -160,23 +138,27 @@ out:
160 return rc; 138 return rc;
161} 139}
162 140
163static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) 141static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
164{ 142{
165 struct kvm_vcpu *dst_vcpu = NULL;
166 int rc; 143 int rc;
167 144
168 if (cpu_addr >= KVM_MAX_VCPUS) 145 rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP);
169 return SIGP_CC_NOT_OPERATIONAL; 146 VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", dst_vcpu->vcpu_id);
170 147
171 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); 148 return rc;
172 if (!dst_vcpu) 149}
173 return SIGP_CC_NOT_OPERATIONAL;
174 150
175 rc = __inject_sigp_stop(dst_vcpu, action); 151static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu,
152 struct kvm_vcpu *dst_vcpu, u64 *reg)
153{
154 int rc;
176 155
177 VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); 156 rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP |
157 ACTION_STORE_ON_STOP);
158 VCPU_EVENT(vcpu, 4, "sent sigp stop and store status to cpu %x",
159 dst_vcpu->vcpu_id);
178 160
179 if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) { 161 if (rc == -ESHUTDOWN) {
180 /* If the CPU has already been stopped, we still have 162 /* If the CPU has already been stopped, we still have
181 * to save the status when doing stop-and-store. This 163 * to save the status when doing stop-and-store. This
182 * has to be done after unlocking all spinlocks. */ 164 * has to be done after unlocking all spinlocks. */
@@ -212,18 +194,12 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
212 return rc; 194 return rc;
213} 195}
214 196
215static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, 197static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
216 u64 *reg) 198 u32 address, u64 *reg)
217{ 199{
218 struct kvm_s390_local_interrupt *li; 200 struct kvm_s390_local_interrupt *li;
219 struct kvm_vcpu *dst_vcpu = NULL;
220 struct kvm_s390_interrupt_info *inti;
221 int rc; 201 int rc;
222 202
223 if (cpu_addr < KVM_MAX_VCPUS)
224 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
225 if (!dst_vcpu)
226 return SIGP_CC_NOT_OPERATIONAL;
227 li = &dst_vcpu->arch.local_int; 203 li = &dst_vcpu->arch.local_int;
228 204
229 /* 205 /*
@@ -238,46 +214,34 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
238 return SIGP_CC_STATUS_STORED; 214 return SIGP_CC_STATUS_STORED;
239 } 215 }
240 216
241 inti = kzalloc(sizeof(*inti), GFP_KERNEL);
242 if (!inti)
243 return SIGP_CC_BUSY;
244
245 spin_lock(&li->lock); 217 spin_lock(&li->lock);
246 /* cpu must be in stopped state */ 218 /* cpu must be in stopped state */
247 if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { 219 if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
248 *reg &= 0xffffffff00000000UL; 220 *reg &= 0xffffffff00000000UL;
249 *reg |= SIGP_STATUS_INCORRECT_STATE; 221 *reg |= SIGP_STATUS_INCORRECT_STATE;
250 rc = SIGP_CC_STATUS_STORED; 222 rc = SIGP_CC_STATUS_STORED;
251 kfree(inti);
252 goto out_li; 223 goto out_li;
253 } 224 }
254 225
255 inti->type = KVM_S390_SIGP_SET_PREFIX; 226 li->irq.prefix.address = address;
256 inti->prefix.address = address; 227 set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
257
258 list_add_tail(&inti->list, &li->list);
259 atomic_set(&li->active, 1);
260 kvm_s390_vcpu_wakeup(dst_vcpu); 228 kvm_s390_vcpu_wakeup(dst_vcpu);
261 rc = SIGP_CC_ORDER_CODE_ACCEPTED; 229 rc = SIGP_CC_ORDER_CODE_ACCEPTED;
262 230
263 VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); 231 VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", dst_vcpu->vcpu_id,
232 address);
264out_li: 233out_li:
265 spin_unlock(&li->lock); 234 spin_unlock(&li->lock);
266 return rc; 235 return rc;
267} 236}
268 237
269static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id, 238static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
270 u32 addr, u64 *reg) 239 struct kvm_vcpu *dst_vcpu,
240 u32 addr, u64 *reg)
271{ 241{
272 struct kvm_vcpu *dst_vcpu = NULL;
273 int flags; 242 int flags;
274 int rc; 243 int rc;
275 244
276 if (cpu_id < KVM_MAX_VCPUS)
277 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id);
278 if (!dst_vcpu)
279 return SIGP_CC_NOT_OPERATIONAL;
280
281 spin_lock(&dst_vcpu->arch.local_int.lock); 245 spin_lock(&dst_vcpu->arch.local_int.lock);
282 flags = atomic_read(dst_vcpu->arch.local_int.cpuflags); 246 flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
283 spin_unlock(&dst_vcpu->arch.local_int.lock); 247 spin_unlock(&dst_vcpu->arch.local_int.lock);
@@ -297,19 +261,12 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
297 return rc; 261 return rc;
298} 262}
299 263
300static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, 264static int __sigp_sense_running(struct kvm_vcpu *vcpu,
301 u64 *reg) 265 struct kvm_vcpu *dst_vcpu, u64 *reg)
302{ 266{
303 struct kvm_s390_local_interrupt *li; 267 struct kvm_s390_local_interrupt *li;
304 struct kvm_vcpu *dst_vcpu = NULL;
305 int rc; 268 int rc;
306 269
307 if (cpu_addr >= KVM_MAX_VCPUS)
308 return SIGP_CC_NOT_OPERATIONAL;
309
310 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
311 if (!dst_vcpu)
312 return SIGP_CC_NOT_OPERATIONAL;
313 li = &dst_vcpu->arch.local_int; 270 li = &dst_vcpu->arch.local_int;
314 if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) { 271 if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
315 /* running */ 272 /* running */
@@ -321,26 +278,19 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
321 rc = SIGP_CC_STATUS_STORED; 278 rc = SIGP_CC_STATUS_STORED;
322 } 279 }
323 280
324 VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr, 281 VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x",
325 rc); 282 dst_vcpu->vcpu_id, rc);
326 283
327 return rc; 284 return rc;
328} 285}
329 286
330/* Test whether the destination CPU is available and not busy */ 287static int __prepare_sigp_re_start(struct kvm_vcpu *vcpu,
331static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr) 288 struct kvm_vcpu *dst_vcpu, u8 order_code)
332{ 289{
333 struct kvm_s390_local_interrupt *li; 290 struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
334 int rc = SIGP_CC_ORDER_CODE_ACCEPTED; 291 /* handle (RE)START in user space */
335 struct kvm_vcpu *dst_vcpu = NULL; 292 int rc = -EOPNOTSUPP;
336
337 if (cpu_addr >= KVM_MAX_VCPUS)
338 return SIGP_CC_NOT_OPERATIONAL;
339 293
340 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
341 if (!dst_vcpu)
342 return SIGP_CC_NOT_OPERATIONAL;
343 li = &dst_vcpu->arch.local_int;
344 spin_lock(&li->lock); 294 spin_lock(&li->lock);
345 if (li->action_bits & ACTION_STOP_ON_STOP) 295 if (li->action_bits & ACTION_STOP_ON_STOP)
346 rc = SIGP_CC_BUSY; 296 rc = SIGP_CC_BUSY;
@@ -349,90 +299,131 @@ static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
349 return rc; 299 return rc;
350} 300}
351 301
352int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) 302static int __prepare_sigp_cpu_reset(struct kvm_vcpu *vcpu,
303 struct kvm_vcpu *dst_vcpu, u8 order_code)
353{ 304{
354 int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; 305 /* handle (INITIAL) CPU RESET in user space */
355 int r3 = vcpu->arch.sie_block->ipa & 0x000f; 306 return -EOPNOTSUPP;
356 u32 parameter; 307}
357 u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
358 u8 order_code;
359 int rc;
360 308
361 /* sigp in userspace can exit */ 309static int __prepare_sigp_unknown(struct kvm_vcpu *vcpu,
362 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 310 struct kvm_vcpu *dst_vcpu)
363 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 311{
312 /* handle unknown orders in user space */
313 return -EOPNOTSUPP;
314}
364 315
365 order_code = kvm_s390_get_base_disp_rs(vcpu); 316static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
317 u16 cpu_addr, u32 parameter, u64 *status_reg)
318{
319 int rc;
320 struct kvm_vcpu *dst_vcpu;
366 321
367 if (r1 % 2) 322 if (cpu_addr >= KVM_MAX_VCPUS)
368 parameter = vcpu->run->s.regs.gprs[r1]; 323 return SIGP_CC_NOT_OPERATIONAL;
369 else 324
370 parameter = vcpu->run->s.regs.gprs[r1 + 1]; 325 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
326 if (!dst_vcpu)
327 return SIGP_CC_NOT_OPERATIONAL;
371 328
372 trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
373 switch (order_code) { 329 switch (order_code) {
374 case SIGP_SENSE: 330 case SIGP_SENSE:
375 vcpu->stat.instruction_sigp_sense++; 331 vcpu->stat.instruction_sigp_sense++;
376 rc = __sigp_sense(vcpu, cpu_addr, 332 rc = __sigp_sense(vcpu, dst_vcpu, status_reg);
377 &vcpu->run->s.regs.gprs[r1]);
378 break; 333 break;
379 case SIGP_EXTERNAL_CALL: 334 case SIGP_EXTERNAL_CALL:
380 vcpu->stat.instruction_sigp_external_call++; 335 vcpu->stat.instruction_sigp_external_call++;
381 rc = __sigp_external_call(vcpu, cpu_addr); 336 rc = __sigp_external_call(vcpu, dst_vcpu);
382 break; 337 break;
383 case SIGP_EMERGENCY_SIGNAL: 338 case SIGP_EMERGENCY_SIGNAL:
384 vcpu->stat.instruction_sigp_emergency++; 339 vcpu->stat.instruction_sigp_emergency++;
385 rc = __sigp_emergency(vcpu, cpu_addr); 340 rc = __sigp_emergency(vcpu, dst_vcpu);
386 break; 341 break;
387 case SIGP_STOP: 342 case SIGP_STOP:
388 vcpu->stat.instruction_sigp_stop++; 343 vcpu->stat.instruction_sigp_stop++;
389 rc = __sigp_stop(vcpu, cpu_addr, ACTION_STOP_ON_STOP); 344 rc = __sigp_stop(vcpu, dst_vcpu);
390 break; 345 break;
391 case SIGP_STOP_AND_STORE_STATUS: 346 case SIGP_STOP_AND_STORE_STATUS:
392 vcpu->stat.instruction_sigp_stop++; 347 vcpu->stat.instruction_sigp_stop_store_status++;
393 rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP | 348 rc = __sigp_stop_and_store_status(vcpu, dst_vcpu, status_reg);
394 ACTION_STOP_ON_STOP);
395 break; 349 break;
396 case SIGP_STORE_STATUS_AT_ADDRESS: 350 case SIGP_STORE_STATUS_AT_ADDRESS:
397 rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter, 351 vcpu->stat.instruction_sigp_store_status++;
398 &vcpu->run->s.regs.gprs[r1]); 352 rc = __sigp_store_status_at_addr(vcpu, dst_vcpu, parameter,
399 break; 353 status_reg);
400 case SIGP_SET_ARCHITECTURE:
401 vcpu->stat.instruction_sigp_arch++;
402 rc = __sigp_set_arch(vcpu, parameter);
403 break; 354 break;
404 case SIGP_SET_PREFIX: 355 case SIGP_SET_PREFIX:
405 vcpu->stat.instruction_sigp_prefix++; 356 vcpu->stat.instruction_sigp_prefix++;
406 rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, 357 rc = __sigp_set_prefix(vcpu, dst_vcpu, parameter, status_reg);
407 &vcpu->run->s.regs.gprs[r1]);
408 break; 358 break;
409 case SIGP_COND_EMERGENCY_SIGNAL: 359 case SIGP_COND_EMERGENCY_SIGNAL:
410 rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter, 360 vcpu->stat.instruction_sigp_cond_emergency++;
411 &vcpu->run->s.regs.gprs[r1]); 361 rc = __sigp_conditional_emergency(vcpu, dst_vcpu, parameter,
362 status_reg);
412 break; 363 break;
413 case SIGP_SENSE_RUNNING: 364 case SIGP_SENSE_RUNNING:
414 vcpu->stat.instruction_sigp_sense_running++; 365 vcpu->stat.instruction_sigp_sense_running++;
415 rc = __sigp_sense_running(vcpu, cpu_addr, 366 rc = __sigp_sense_running(vcpu, dst_vcpu, status_reg);
416 &vcpu->run->s.regs.gprs[r1]);
417 break; 367 break;
418 case SIGP_START: 368 case SIGP_START:
419 rc = sigp_check_callable(vcpu, cpu_addr); 369 vcpu->stat.instruction_sigp_start++;
420 if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) 370 rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
421 rc = -EOPNOTSUPP; /* Handle START in user space */
422 break; 371 break;
423 case SIGP_RESTART: 372 case SIGP_RESTART:
424 vcpu->stat.instruction_sigp_restart++; 373 vcpu->stat.instruction_sigp_restart++;
425 rc = sigp_check_callable(vcpu, cpu_addr); 374 rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
426 if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) { 375 break;
427 VCPU_EVENT(vcpu, 4, 376 case SIGP_INITIAL_CPU_RESET:
428 "sigp restart %x to handle userspace", 377 vcpu->stat.instruction_sigp_init_cpu_reset++;
429 cpu_addr); 378 rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
430 /* user space must know about restart */ 379 break;
431 rc = -EOPNOTSUPP; 380 case SIGP_CPU_RESET:
432 } 381 vcpu->stat.instruction_sigp_cpu_reset++;
382 rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
383 break;
384 default:
385 vcpu->stat.instruction_sigp_unknown++;
386 rc = __prepare_sigp_unknown(vcpu, dst_vcpu);
387 }
388
389 if (rc == -EOPNOTSUPP)
390 VCPU_EVENT(vcpu, 4,
391 "sigp order %u -> cpu %x: handled in user space",
392 order_code, dst_vcpu->vcpu_id);
393
394 return rc;
395}
396
397int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
398{
399 int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
400 int r3 = vcpu->arch.sie_block->ipa & 0x000f;
401 u32 parameter;
402 u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
403 u8 order_code;
404 int rc;
405
406 /* sigp in userspace can exit */
407 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
408 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
409
410 order_code = kvm_s390_get_base_disp_rs(vcpu);
411
412 if (r1 % 2)
413 parameter = vcpu->run->s.regs.gprs[r1];
414 else
415 parameter = vcpu->run->s.regs.gprs[r1 + 1];
416
417 trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
418 switch (order_code) {
419 case SIGP_SET_ARCHITECTURE:
420 vcpu->stat.instruction_sigp_arch++;
421 rc = __sigp_set_arch(vcpu, parameter);
433 break; 422 break;
434 default: 423 default:
435 return -EOPNOTSUPP; 424 rc = handle_sigp_dst(vcpu, order_code, cpu_addr,
425 parameter,
426 &vcpu->run->s.regs.gprs[r1]);
436 } 427 }
437 428
438 if (rc < 0) 429 if (rc < 0)
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 71c7eff2c89f..be99357d238c 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -844,7 +844,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
844 844
845 down_read(&mm->mmap_sem); 845 down_read(&mm->mmap_sem);
846retry: 846retry:
847 ptep = get_locked_pte(current->mm, addr, &ptl); 847 ptep = get_locked_pte(mm, addr, &ptl);
848 if (unlikely(!ptep)) { 848 if (unlikely(!ptep)) {
849 up_read(&mm->mmap_sem); 849 up_read(&mm->mmap_sem);
850 return -EFAULT; 850 return -EFAULT;
@@ -888,6 +888,45 @@ retry:
888} 888}
889EXPORT_SYMBOL(set_guest_storage_key); 889EXPORT_SYMBOL(set_guest_storage_key);
890 890
891unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
892{
893 spinlock_t *ptl;
894 pgste_t pgste;
895 pte_t *ptep;
896 uint64_t physaddr;
897 unsigned long key = 0;
898
899 down_read(&mm->mmap_sem);
900 ptep = get_locked_pte(mm, addr, &ptl);
901 if (unlikely(!ptep)) {
902 up_read(&mm->mmap_sem);
903 return -EFAULT;
904 }
905 pgste = pgste_get_lock(ptep);
906
907 if (pte_val(*ptep) & _PAGE_INVALID) {
908 key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
909 key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56;
910 key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48;
911 key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48;
912 } else {
913 physaddr = pte_val(*ptep) & PAGE_MASK;
914 key = page_get_storage_key(physaddr);
915
916 /* Reflect guest's logical view, not physical */
917 if (pgste_val(pgste) & PGSTE_GR_BIT)
918 key |= _PAGE_REFERENCED;
919 if (pgste_val(pgste) & PGSTE_GC_BIT)
920 key |= _PAGE_CHANGED;
921 }
922
923 pgste_set_unlock(ptep, pgste);
924 pte_unmap_unlock(ptep, ptl);
925 up_read(&mm->mmap_sem);
926 return key;
927}
928EXPORT_SYMBOL(get_guest_storage_key);
929
891#else /* CONFIG_PGSTE */ 930#else /* CONFIG_PGSTE */
892 931
893static inline int page_table_with_pgste(struct page *page) 932static inline int page_table_with_pgste(struct page *page)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6ed0c30d6a0c..d89c6b828c96 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -33,7 +33,7 @@
33 33
34#define KVM_MAX_VCPUS 255 34#define KVM_MAX_VCPUS 255
35#define KVM_SOFT_MAX_VCPUS 160 35#define KVM_SOFT_MAX_VCPUS 160
36#define KVM_USER_MEM_SLOTS 125 36#define KVM_USER_MEM_SLOTS 509
37/* memory slots that are not exposed to userspace */ 37/* memory slots that are not exposed to userspace */
38#define KVM_PRIVATE_MEM_SLOTS 3 38#define KVM_PRIVATE_MEM_SLOTS 3
39#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) 39#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
@@ -51,6 +51,7 @@
51 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) 51 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
52 52
53#define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL 53#define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL
54#define CR3_PCID_INVD (1UL << 63)
54#define CR4_RESERVED_BITS \ 55#define CR4_RESERVED_BITS \
55 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ 56 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
56 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ 57 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
@@ -361,6 +362,7 @@ struct kvm_vcpu_arch {
361 int mp_state; 362 int mp_state;
362 u64 ia32_misc_enable_msr; 363 u64 ia32_misc_enable_msr;
363 bool tpr_access_reporting; 364 bool tpr_access_reporting;
365 u64 ia32_xss;
364 366
365 /* 367 /*
366 * Paging state of the vcpu 368 * Paging state of the vcpu
@@ -542,7 +544,7 @@ struct kvm_apic_map {
542 struct rcu_head rcu; 544 struct rcu_head rcu;
543 u8 ldr_bits; 545 u8 ldr_bits;
544 /* fields bellow are used to decode ldr values in different modes */ 546 /* fields bellow are used to decode ldr values in different modes */
545 u32 cid_shift, cid_mask, lid_mask; 547 u32 cid_shift, cid_mask, lid_mask, broadcast;
546 struct kvm_lapic *phys_map[256]; 548 struct kvm_lapic *phys_map[256];
547 /* first index is cluster id second is cpu id in a cluster */ 549 /* first index is cluster id second is cpu id in a cluster */
548 struct kvm_lapic *logical_map[16][16]; 550 struct kvm_lapic *logical_map[16][16];
@@ -602,6 +604,9 @@ struct kvm_arch {
602 604
603 struct kvm_xen_hvm_config xen_hvm_config; 605 struct kvm_xen_hvm_config xen_hvm_config;
604 606
607 /* reads protected by irq_srcu, writes by irq_lock */
608 struct hlist_head mask_notifier_list;
609
605 /* fields used by HYPER-V emulation */ 610 /* fields used by HYPER-V emulation */
606 u64 hv_guest_os_id; 611 u64 hv_guest_os_id;
607 u64 hv_hypercall; 612 u64 hv_hypercall;
@@ -659,6 +664,16 @@ struct msr_data {
659 u64 data; 664 u64 data;
660}; 665};
661 666
667struct kvm_lapic_irq {
668 u32 vector;
669 u32 delivery_mode;
670 u32 dest_mode;
671 u32 level;
672 u32 trig_mode;
673 u32 shorthand;
674 u32 dest_id;
675};
676
662struct kvm_x86_ops { 677struct kvm_x86_ops {
663 int (*cpu_has_kvm_support)(void); /* __init */ 678 int (*cpu_has_kvm_support)(void); /* __init */
664 int (*disabled_by_bios)(void); /* __init */ 679 int (*disabled_by_bios)(void); /* __init */
@@ -767,6 +782,7 @@ struct kvm_x86_ops {
767 enum x86_intercept_stage stage); 782 enum x86_intercept_stage stage);
768 void (*handle_external_intr)(struct kvm_vcpu *vcpu); 783 void (*handle_external_intr)(struct kvm_vcpu *vcpu);
769 bool (*mpx_supported)(void); 784 bool (*mpx_supported)(void);
785 bool (*xsaves_supported)(void);
770 786
771 int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); 787 int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
772 788
@@ -818,6 +834,19 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
818 const void *val, int bytes); 834 const void *val, int bytes);
819u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); 835u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
820 836
837struct kvm_irq_mask_notifier {
838 void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
839 int irq;
840 struct hlist_node link;
841};
842
843void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
844 struct kvm_irq_mask_notifier *kimn);
845void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
846 struct kvm_irq_mask_notifier *kimn);
847void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
848 bool mask);
849
821extern bool tdp_enabled; 850extern bool tdp_enabled;
822 851
823u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); 852u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
@@ -863,7 +892,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
863 892
864void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); 893void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
865int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); 894int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
866void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector); 895void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
867 896
868int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, 897int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
869 int reason, bool has_error_code, u32 error_code); 898 int reason, bool has_error_code, u32 error_code);
@@ -895,6 +924,7 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
895 gfn_t gfn, void *data, int offset, int len, 924 gfn_t gfn, void *data, int offset, int len,
896 u32 access); 925 u32 access);
897bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); 926bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
927bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr);
898 928
899static inline int __kvm_irq_line_state(unsigned long *irq_state, 929static inline int __kvm_irq_line_state(unsigned long *irq_state,
900 int irq_source_id, int level) 930 int irq_source_id, int level)
@@ -1066,6 +1096,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
1066void kvm_define_shared_msr(unsigned index, u32 msr); 1096void kvm_define_shared_msr(unsigned index, u32 msr);
1067int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); 1097int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
1068 1098
1099unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
1069bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); 1100bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
1070 1101
1071void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 1102void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index bcbfade26d8d..45afaee9555c 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -69,6 +69,7 @@
69#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 69#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
70#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 70#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
71#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 71#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
72#define SECONDARY_EXEC_XSAVES 0x00100000
72 73
73 74
74#define PIN_BASED_EXT_INTR_MASK 0x00000001 75#define PIN_BASED_EXT_INTR_MASK 0x00000001
@@ -159,6 +160,8 @@ enum vmcs_field {
159 EOI_EXIT_BITMAP3_HIGH = 0x00002023, 160 EOI_EXIT_BITMAP3_HIGH = 0x00002023,
160 VMREAD_BITMAP = 0x00002026, 161 VMREAD_BITMAP = 0x00002026,
161 VMWRITE_BITMAP = 0x00002028, 162 VMWRITE_BITMAP = 0x00002028,
163 XSS_EXIT_BITMAP = 0x0000202C,
164 XSS_EXIT_BITMAP_HIGH = 0x0000202D,
162 GUEST_PHYSICAL_ADDRESS = 0x00002400, 165 GUEST_PHYSICAL_ADDRESS = 0x00002400,
163 GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, 166 GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
164 VMCS_LINK_POINTER = 0x00002800, 167 VMCS_LINK_POINTER = 0x00002800,
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 7e7a79ada658..5fa9770035dc 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -16,6 +16,7 @@
16#define XSTATE_Hi16_ZMM 0x80 16#define XSTATE_Hi16_ZMM 0x80
17 17
18#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) 18#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
19#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
19/* Bit 63 of XCR0 is reserved for future expansion */ 20/* Bit 63 of XCR0 is reserved for future expansion */
20#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63))) 21#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63)))
21 22
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 990a2fe1588d..b813bf9da1e2 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -72,6 +72,8 @@
72#define EXIT_REASON_XSETBV 55 72#define EXIT_REASON_XSETBV 55
73#define EXIT_REASON_APIC_WRITE 56 73#define EXIT_REASON_APIC_WRITE 56
74#define EXIT_REASON_INVPCID 58 74#define EXIT_REASON_INVPCID 58
75#define EXIT_REASON_XSAVES 63
76#define EXIT_REASON_XRSTORS 64
75 77
76#define VMX_EXIT_REASONS \ 78#define VMX_EXIT_REASONS \
77 { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ 79 { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
@@ -116,6 +118,8 @@
116 { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ 118 { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
117 { EXIT_REASON_INVD, "INVD" }, \ 119 { EXIT_REASON_INVD, "INVD" }, \
118 { EXIT_REASON_INVVPID, "INVVPID" }, \ 120 { EXIT_REASON_INVVPID, "INVVPID" }, \
119 { EXIT_REASON_INVPCID, "INVPCID" } 121 { EXIT_REASON_INVPCID, "INVPCID" }, \
122 { EXIT_REASON_XSAVES, "XSAVES" }, \
123 { EXIT_REASON_XRSTORS, "XRSTORS" }
120 124
121#endif /* _UAPIVMX_H */ 125#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index f6945bef2cd1..94f643484300 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -283,7 +283,14 @@ NOKPROBE_SYMBOL(do_async_page_fault);
283static void __init paravirt_ops_setup(void) 283static void __init paravirt_ops_setup(void)
284{ 284{
285 pv_info.name = "KVM"; 285 pv_info.name = "KVM";
286 pv_info.paravirt_enabled = 1; 286
287 /*
288 * KVM isn't paravirt in the sense of paravirt_enabled. A KVM
289 * guest kernel works like a bare metal kernel with additional
290 * features, and paravirt_enabled is about features that are
291 * missing.
292 */
293 pv_info.paravirt_enabled = 0;
287 294
288 if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) 295 if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
289 pv_cpu_ops.io_delay = kvm_io_delay; 296 pv_cpu_ops.io_delay = kvm_io_delay;
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index d9156ceecdff..42caaef897c8 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -59,13 +59,12 @@ static void kvm_get_wallclock(struct timespec *now)
59 59
60 native_write_msr(msr_kvm_wall_clock, low, high); 60 native_write_msr(msr_kvm_wall_clock, low, high);
61 61
62 preempt_disable(); 62 cpu = get_cpu();
63 cpu = smp_processor_id();
64 63
65 vcpu_time = &hv_clock[cpu].pvti; 64 vcpu_time = &hv_clock[cpu].pvti;
66 pvclock_read_wallclock(&wall_clock, vcpu_time, now); 65 pvclock_read_wallclock(&wall_clock, vcpu_time, now);
67 66
68 preempt_enable(); 67 put_cpu();
69} 68}
70 69
71static int kvm_set_wallclock(const struct timespec *now) 70static int kvm_set_wallclock(const struct timespec *now)
@@ -107,11 +106,10 @@ static unsigned long kvm_get_tsc_khz(void)
107 int cpu; 106 int cpu;
108 unsigned long tsc_khz; 107 unsigned long tsc_khz;
109 108
110 preempt_disable(); 109 cpu = get_cpu();
111 cpu = smp_processor_id();
112 src = &hv_clock[cpu].pvti; 110 src = &hv_clock[cpu].pvti;
113 tsc_khz = pvclock_tsc_khz(src); 111 tsc_khz = pvclock_tsc_khz(src);
114 preempt_enable(); 112 put_cpu();
115 return tsc_khz; 113 return tsc_khz;
116} 114}
117 115
@@ -263,7 +261,6 @@ void __init kvmclock_init(void)
263#endif 261#endif
264 kvm_get_preset_lpj(); 262 kvm_get_preset_lpj();
265 clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); 263 clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
266 pv_info.paravirt_enabled = 1;
267 pv_info.name = "KVM"; 264 pv_info.name = "KVM";
268 265
269 if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) 266 if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
@@ -284,23 +281,22 @@ int __init kvm_setup_vsyscall_timeinfo(void)
284 281
285 size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); 282 size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
286 283
287 preempt_disable(); 284 cpu = get_cpu();
288 cpu = smp_processor_id();
289 285
290 vcpu_time = &hv_clock[cpu].pvti; 286 vcpu_time = &hv_clock[cpu].pvti;
291 flags = pvclock_read_flags(vcpu_time); 287 flags = pvclock_read_flags(vcpu_time);
292 288
293 if (!(flags & PVCLOCK_TSC_STABLE_BIT)) { 289 if (!(flags & PVCLOCK_TSC_STABLE_BIT)) {
294 preempt_enable(); 290 put_cpu();
295 return 1; 291 return 1;
296 } 292 }
297 293
298 if ((ret = pvclock_init_vsyscall(hv_clock, size))) { 294 if ((ret = pvclock_init_vsyscall(hv_clock, size))) {
299 preempt_enable(); 295 put_cpu();
300 return ret; 296 return ret;
301 } 297 }
302 298
303 preempt_enable(); 299 put_cpu();
304 300
305 kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; 301 kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
306#endif 302#endif
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 4c540c4719d8..0de1fae2bdf0 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -738,3 +738,4 @@ void *get_xsave_addr(struct xsave_struct *xsave, int xstate)
738 738
739 return (void *)xsave + xstate_comp_offsets[feature]; 739 return (void *)xsave + xstate_comp_offsets[feature];
740} 740}
741EXPORT_SYMBOL_GPL(get_xsave_addr);
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 25d22b2d6509..08f790dfadc9 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,14 +7,13 @@ CFLAGS_vmx.o := -I.
7 7
8KVM := ../../../virt/kvm 8KVM := ../../../virt/kvm
9 9
10kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ 10kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
11 $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
12 $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o 11 $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
13kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o
14kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o 12kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
15 13
16kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ 14kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
17 i8254.o cpuid.o pmu.o 15 i8254.o ioapic.o irq_comm.o cpuid.o pmu.o
16kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o
18kvm-intel-y += vmx.o 17kvm-intel-y += vmx.o
19kvm-amd-y += svm.o 18kvm-amd-y += svm.o
20 19
diff --git a/virt/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
index e05000e200d2..6eb5c20ee373 100644
--- a/virt/kvm/assigned-dev.c
+++ b/arch/x86/kvm/assigned-dev.c
@@ -20,6 +20,32 @@
20#include <linux/namei.h> 20#include <linux/namei.h>
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include "irq.h" 22#include "irq.h"
23#include "assigned-dev.h"
24
25struct kvm_assigned_dev_kernel {
26 struct kvm_irq_ack_notifier ack_notifier;
27 struct list_head list;
28 int assigned_dev_id;
29 int host_segnr;
30 int host_busnr;
31 int host_devfn;
32 unsigned int entries_nr;
33 int host_irq;
34 bool host_irq_disabled;
35 bool pci_2_3;
36 struct msix_entry *host_msix_entries;
37 int guest_irq;
38 struct msix_entry *guest_msix_entries;
39 unsigned long irq_requested_type;
40 int irq_source_id;
41 int flags;
42 struct pci_dev *dev;
43 struct kvm *kvm;
44 spinlock_t intx_lock;
45 spinlock_t intx_mask_lock;
46 char irq_name[32];
47 struct pci_saved_state *pci_saved_state;
48};
23 49
24static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, 50static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
25 int assigned_dev_id) 51 int assigned_dev_id)
@@ -748,7 +774,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
748 if (r) 774 if (r)
749 goto out_list_del; 775 goto out_list_del;
750 } 776 }
751 r = kvm_assign_device(kvm, match); 777 r = kvm_assign_device(kvm, match->dev);
752 if (r) 778 if (r)
753 goto out_list_del; 779 goto out_list_del;
754 780
@@ -790,7 +816,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
790 goto out; 816 goto out;
791 } 817 }
792 818
793 kvm_deassign_device(kvm, match); 819 kvm_deassign_device(kvm, match->dev);
794 820
795 kvm_free_assigned_device(kvm, match); 821 kvm_free_assigned_device(kvm, match);
796 822
diff --git a/arch/x86/kvm/assigned-dev.h b/arch/x86/kvm/assigned-dev.h
new file mode 100644
index 000000000000..a428c1a211b2
--- /dev/null
+++ b/arch/x86/kvm/assigned-dev.h
@@ -0,0 +1,32 @@
1#ifndef ARCH_X86_KVM_ASSIGNED_DEV_H
2#define ARCH_X86_KVM_ASSIGNED_DEV_H
3
4#include <linux/kvm_host.h>
5
6#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
7int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev);
8int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev);
9
10int kvm_iommu_map_guest(struct kvm *kvm);
11int kvm_iommu_unmap_guest(struct kvm *kvm);
12
13long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
14 unsigned long arg);
15
16void kvm_free_all_assigned_devices(struct kvm *kvm);
17#else
18static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
19{
20 return 0;
21}
22
23static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
24 unsigned long arg)
25{
26 return -ENOTTY;
27}
28
29static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
30#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */
31
32#endif /* ARCH_X86_KVM_ASSIGNED_DEV_H */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 976e3a57f9ea..8a80737ee6e6 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -23,7 +23,7 @@
23#include "mmu.h" 23#include "mmu.h"
24#include "trace.h" 24#include "trace.h"
25 25
26static u32 xstate_required_size(u64 xstate_bv) 26static u32 xstate_required_size(u64 xstate_bv, bool compacted)
27{ 27{
28 int feature_bit = 0; 28 int feature_bit = 0;
29 u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; 29 u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
@@ -31,9 +31,10 @@ static u32 xstate_required_size(u64 xstate_bv)
31 xstate_bv &= XSTATE_EXTEND_MASK; 31 xstate_bv &= XSTATE_EXTEND_MASK;
32 while (xstate_bv) { 32 while (xstate_bv) {
33 if (xstate_bv & 0x1) { 33 if (xstate_bv & 0x1) {
34 u32 eax, ebx, ecx, edx; 34 u32 eax, ebx, ecx, edx, offset;
35 cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx); 35 cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
36 ret = max(ret, eax + ebx); 36 offset = compacted ? ret : ebx;
37 ret = max(ret, offset + eax);
37 } 38 }
38 39
39 xstate_bv >>= 1; 40 xstate_bv >>= 1;
@@ -53,6 +54,8 @@ u64 kvm_supported_xcr0(void)
53 return xcr0; 54 return xcr0;
54} 55}
55 56
57#define F(x) bit(X86_FEATURE_##x)
58
56int kvm_update_cpuid(struct kvm_vcpu *vcpu) 59int kvm_update_cpuid(struct kvm_vcpu *vcpu)
57{ 60{
58 struct kvm_cpuid_entry2 *best; 61 struct kvm_cpuid_entry2 *best;
@@ -64,13 +67,13 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
64 67
65 /* Update OSXSAVE bit */ 68 /* Update OSXSAVE bit */
66 if (cpu_has_xsave && best->function == 0x1) { 69 if (cpu_has_xsave && best->function == 0x1) {
67 best->ecx &= ~(bit(X86_FEATURE_OSXSAVE)); 70 best->ecx &= ~F(OSXSAVE);
68 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) 71 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
69 best->ecx |= bit(X86_FEATURE_OSXSAVE); 72 best->ecx |= F(OSXSAVE);
70 } 73 }
71 74
72 if (apic) { 75 if (apic) {
73 if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER)) 76 if (best->ecx & F(TSC_DEADLINE_TIMER))
74 apic->lapic_timer.timer_mode_mask = 3 << 17; 77 apic->lapic_timer.timer_mode_mask = 3 << 17;
75 else 78 else
76 apic->lapic_timer.timer_mode_mask = 1 << 17; 79 apic->lapic_timer.timer_mode_mask = 1 << 17;
@@ -85,9 +88,13 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
85 (best->eax | ((u64)best->edx << 32)) & 88 (best->eax | ((u64)best->edx << 32)) &
86 kvm_supported_xcr0(); 89 kvm_supported_xcr0();
87 vcpu->arch.guest_xstate_size = best->ebx = 90 vcpu->arch.guest_xstate_size = best->ebx =
88 xstate_required_size(vcpu->arch.xcr0); 91 xstate_required_size(vcpu->arch.xcr0, false);
89 } 92 }
90 93
94 best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
95 if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
96 best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
97
91 /* 98 /*
92 * The existing code assumes virtual address is 48-bit in the canonical 99 * The existing code assumes virtual address is 48-bit in the canonical
93 * address checks; exit if it is ever changed. 100 * address checks; exit if it is ever changed.
@@ -122,8 +129,8 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
122 break; 129 break;
123 } 130 }
124 } 131 }
125 if (entry && (entry->edx & bit(X86_FEATURE_NX)) && !is_efer_nx()) { 132 if (entry && (entry->edx & F(NX)) && !is_efer_nx()) {
126 entry->edx &= ~bit(X86_FEATURE_NX); 133 entry->edx &= ~F(NX);
127 printk(KERN_INFO "kvm: guest NX capability removed\n"); 134 printk(KERN_INFO "kvm: guest NX capability removed\n");
128 } 135 }
129} 136}
@@ -227,8 +234,6 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
227 entry->flags = 0; 234 entry->flags = 0;
228} 235}
229 236
230#define F(x) bit(X86_FEATURE_##x)
231
232static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, 237static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
233 u32 func, u32 index, int *nent, int maxnent) 238 u32 func, u32 index, int *nent, int maxnent)
234{ 239{
@@ -267,6 +272,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
267 unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; 272 unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
268 unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; 273 unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
269 unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0; 274 unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0;
275 unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
270 276
271 /* cpuid 1.edx */ 277 /* cpuid 1.edx */
272 const u32 kvm_supported_word0_x86_features = 278 const u32 kvm_supported_word0_x86_features =
@@ -317,7 +323,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
317 const u32 kvm_supported_word9_x86_features = 323 const u32 kvm_supported_word9_x86_features =
318 F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | 324 F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
319 F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | 325 F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
320 F(ADX) | F(SMAP); 326 F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
327 F(AVX512CD);
328
329 /* cpuid 0xD.1.eax */
330 const u32 kvm_supported_word10_x86_features =
331 F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
321 332
322 /* all calls to cpuid_count() should be made on the same cpu */ 333 /* all calls to cpuid_count() should be made on the same cpu */
323 get_cpu(); 334 get_cpu();
@@ -453,16 +464,34 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
453 u64 supported = kvm_supported_xcr0(); 464 u64 supported = kvm_supported_xcr0();
454 465
455 entry->eax &= supported; 466 entry->eax &= supported;
467 entry->ebx = xstate_required_size(supported, false);
468 entry->ecx = entry->ebx;
456 entry->edx &= supported >> 32; 469 entry->edx &= supported >> 32;
457 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 470 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
471 if (!supported)
472 break;
473
458 for (idx = 1, i = 1; idx < 64; ++idx) { 474 for (idx = 1, i = 1; idx < 64; ++idx) {
459 u64 mask = ((u64)1 << idx); 475 u64 mask = ((u64)1 << idx);
460 if (*nent >= maxnent) 476 if (*nent >= maxnent)
461 goto out; 477 goto out;
462 478
463 do_cpuid_1_ent(&entry[i], function, idx); 479 do_cpuid_1_ent(&entry[i], function, idx);
464 if (entry[i].eax == 0 || !(supported & mask)) 480 if (idx == 1) {
465 continue; 481 entry[i].eax &= kvm_supported_word10_x86_features;
482 entry[i].ebx = 0;
483 if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
484 entry[i].ebx =
485 xstate_required_size(supported,
486 true);
487 } else {
488 if (entry[i].eax == 0 || !(supported & mask))
489 continue;
490 if (WARN_ON_ONCE(entry[i].ecx & 1))
491 continue;
492 }
493 entry[i].ecx = 0;
494 entry[i].edx = 0;
466 entry[i].flags |= 495 entry[i].flags |=
467 KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 496 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
468 ++*nent; 497 ++*nent;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 9f8a2faf5040..169b09d76ddd 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -123,6 +123,7 @@
123#define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */ 123#define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
124#define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ 124#define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
125#define Escape (5<<15) /* Escape to coprocessor instruction */ 125#define Escape (5<<15) /* Escape to coprocessor instruction */
126#define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
126#define Sse (1<<18) /* SSE Vector instruction */ 127#define Sse (1<<18) /* SSE Vector instruction */
127/* Generic ModRM decode. */ 128/* Generic ModRM decode. */
128#define ModRM (1<<19) 129#define ModRM (1<<19)
@@ -166,6 +167,8 @@
166#define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */ 167#define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
167#define NoBigReal ((u64)1 << 50) /* No big real mode */ 168#define NoBigReal ((u64)1 << 50) /* No big real mode */
168#define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */ 169#define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
170#define NearBranch ((u64)1 << 52) /* Near branches */
171#define No16 ((u64)1 << 53) /* No 16 bit operand */
169 172
170#define DstXacc (DstAccLo | SrcAccHi | SrcWrite) 173#define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
171 174
@@ -209,6 +212,7 @@ struct opcode {
209 const struct group_dual *gdual; 212 const struct group_dual *gdual;
210 const struct gprefix *gprefix; 213 const struct gprefix *gprefix;
211 const struct escape *esc; 214 const struct escape *esc;
215 const struct instr_dual *idual;
212 void (*fastop)(struct fastop *fake); 216 void (*fastop)(struct fastop *fake);
213 } u; 217 } u;
214 int (*check_perm)(struct x86_emulate_ctxt *ctxt); 218 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
@@ -231,6 +235,11 @@ struct escape {
231 struct opcode high[64]; 235 struct opcode high[64];
232}; 236};
233 237
238struct instr_dual {
239 struct opcode mod012;
240 struct opcode mod3;
241};
242
234/* EFLAGS bit definitions. */ 243/* EFLAGS bit definitions. */
235#define EFLG_ID (1<<21) 244#define EFLG_ID (1<<21)
236#define EFLG_VIP (1<<20) 245#define EFLG_VIP (1<<20)
@@ -379,6 +388,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
379 ON64(FOP2E(op##q, rax, cl)) \ 388 ON64(FOP2E(op##q, rax, cl)) \
380 FOP_END 389 FOP_END
381 390
391/* 2 operand, src and dest are reversed */
392#define FASTOP2R(op, name) \
393 FOP_START(name) \
394 FOP2E(op##b, dl, al) \
395 FOP2E(op##w, dx, ax) \
396 FOP2E(op##l, edx, eax) \
397 ON64(FOP2E(op##q, rdx, rax)) \
398 FOP_END
399
382#define FOP3E(op, dst, src, src2) \ 400#define FOP3E(op, dst, src, src2) \
383 FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET 401 FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
384 402
@@ -477,9 +495,9 @@ address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
477} 495}
478 496
479static inline unsigned long 497static inline unsigned long
480register_address(struct x86_emulate_ctxt *ctxt, unsigned long reg) 498register_address(struct x86_emulate_ctxt *ctxt, int reg)
481{ 499{
482 return address_mask(ctxt, reg); 500 return address_mask(ctxt, reg_read(ctxt, reg));
483} 501}
484 502
485static void masked_increment(ulong *reg, ulong mask, int inc) 503static void masked_increment(ulong *reg, ulong mask, int inc)
@@ -488,7 +506,7 @@ static void masked_increment(ulong *reg, ulong mask, int inc)
488} 506}
489 507
490static inline void 508static inline void
491register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc) 509register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
492{ 510{
493 ulong mask; 511 ulong mask;
494 512
@@ -496,7 +514,7 @@ register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, in
496 mask = ~0UL; 514 mask = ~0UL;
497 else 515 else
498 mask = ad_mask(ctxt); 516 mask = ad_mask(ctxt);
499 masked_increment(reg, mask, inc); 517 masked_increment(reg_rmw(ctxt, reg), mask, inc);
500} 518}
501 519
502static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc) 520static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
@@ -564,40 +582,6 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
564 return emulate_exception(ctxt, NM_VECTOR, 0, false); 582 return emulate_exception(ctxt, NM_VECTOR, 0, false);
565} 583}
566 584
567static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
568 int cs_l)
569{
570 switch (ctxt->op_bytes) {
571 case 2:
572 ctxt->_eip = (u16)dst;
573 break;
574 case 4:
575 ctxt->_eip = (u32)dst;
576 break;
577#ifdef CONFIG_X86_64
578 case 8:
579 if ((cs_l && is_noncanonical_address(dst)) ||
580 (!cs_l && (dst >> 32) != 0))
581 return emulate_gp(ctxt, 0);
582 ctxt->_eip = dst;
583 break;
584#endif
585 default:
586 WARN(1, "unsupported eip assignment size\n");
587 }
588 return X86EMUL_CONTINUE;
589}
590
591static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
592{
593 return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
594}
595
596static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
597{
598 return assign_eip_near(ctxt, ctxt->_eip + rel);
599}
600
601static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) 585static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
602{ 586{
603 u16 selector; 587 u16 selector;
@@ -641,25 +625,24 @@ static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size)
641 return true; 625 return true;
642} 626}
643 627
644static int __linearize(struct x86_emulate_ctxt *ctxt, 628static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
645 struct segmented_address addr, 629 struct segmented_address addr,
646 unsigned *max_size, unsigned size, 630 unsigned *max_size, unsigned size,
647 bool write, bool fetch, 631 bool write, bool fetch,
648 ulong *linear) 632 enum x86emul_mode mode, ulong *linear)
649{ 633{
650 struct desc_struct desc; 634 struct desc_struct desc;
651 bool usable; 635 bool usable;
652 ulong la; 636 ulong la;
653 u32 lim; 637 u32 lim;
654 u16 sel; 638 u16 sel;
655 unsigned cpl;
656 639
657 la = seg_base(ctxt, addr.seg) + addr.ea; 640 la = seg_base(ctxt, addr.seg) + addr.ea;
658 *max_size = 0; 641 *max_size = 0;
659 switch (ctxt->mode) { 642 switch (mode) {
660 case X86EMUL_MODE_PROT64: 643 case X86EMUL_MODE_PROT64:
661 if (((signed long)la << 16) >> 16 != la) 644 if (is_noncanonical_address(la))
662 return emulate_gp(ctxt, 0); 645 goto bad;
663 646
664 *max_size = min_t(u64, ~0u, (1ull << 48) - la); 647 *max_size = min_t(u64, ~0u, (1ull << 48) - la);
665 if (size > *max_size) 648 if (size > *max_size)
@@ -678,46 +661,20 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
678 if (!fetch && (desc.type & 8) && !(desc.type & 2)) 661 if (!fetch && (desc.type & 8) && !(desc.type & 2))
679 goto bad; 662 goto bad;
680 lim = desc_limit_scaled(&desc); 663 lim = desc_limit_scaled(&desc);
681 if ((ctxt->mode == X86EMUL_MODE_REAL) && !fetch && 664 if (!(desc.type & 8) && (desc.type & 4)) {
682 (ctxt->d & NoBigReal)) {
683 /* la is between zero and 0xffff */
684 if (la > 0xffff)
685 goto bad;
686 *max_size = 0x10000 - la;
687 } else if ((desc.type & 8) || !(desc.type & 4)) {
688 /* expand-up segment */
689 if (addr.ea > lim)
690 goto bad;
691 *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
692 } else {
693 /* expand-down segment */ 665 /* expand-down segment */
694 if (addr.ea <= lim) 666 if (addr.ea <= lim)
695 goto bad; 667 goto bad;
696 lim = desc.d ? 0xffffffff : 0xffff; 668 lim = desc.d ? 0xffffffff : 0xffff;
697 if (addr.ea > lim)
698 goto bad;
699 *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
700 } 669 }
670 if (addr.ea > lim)
671 goto bad;
672 *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
701 if (size > *max_size) 673 if (size > *max_size)
702 goto bad; 674 goto bad;
703 cpl = ctxt->ops->cpl(ctxt); 675 la &= (u32)-1;
704 if (!(desc.type & 8)) {
705 /* data segment */
706 if (cpl > desc.dpl)
707 goto bad;
708 } else if ((desc.type & 8) && !(desc.type & 4)) {
709 /* nonconforming code segment */
710 if (cpl != desc.dpl)
711 goto bad;
712 } else if ((desc.type & 8) && (desc.type & 4)) {
713 /* conforming code segment */
714 if (cpl < desc.dpl)
715 goto bad;
716 }
717 break; 676 break;
718 } 677 }
719 if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8)
720 la &= (u32)-1;
721 if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0)) 678 if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
722 return emulate_gp(ctxt, 0); 679 return emulate_gp(ctxt, 0);
723 *linear = la; 680 *linear = la;
@@ -735,9 +692,55 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
735 ulong *linear) 692 ulong *linear)
736{ 693{
737 unsigned max_size; 694 unsigned max_size;
738 return __linearize(ctxt, addr, &max_size, size, write, false, linear); 695 return __linearize(ctxt, addr, &max_size, size, write, false,
696 ctxt->mode, linear);
697}
698
699static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
700 enum x86emul_mode mode)
701{
702 ulong linear;
703 int rc;
704 unsigned max_size;
705 struct segmented_address addr = { .seg = VCPU_SREG_CS,
706 .ea = dst };
707
708 if (ctxt->op_bytes != sizeof(unsigned long))
709 addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
710 rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
711 if (rc == X86EMUL_CONTINUE)
712 ctxt->_eip = addr.ea;
713 return rc;
714}
715
716static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
717{
718 return assign_eip(ctxt, dst, ctxt->mode);
739} 719}
740 720
721static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
722 const struct desc_struct *cs_desc)
723{
724 enum x86emul_mode mode = ctxt->mode;
725
726#ifdef CONFIG_X86_64
727 if (ctxt->mode >= X86EMUL_MODE_PROT32 && cs_desc->l) {
728 u64 efer = 0;
729
730 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
731 if (efer & EFER_LMA)
732 mode = X86EMUL_MODE_PROT64;
733 }
734#endif
735 if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
736 mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
737 return assign_eip(ctxt, dst, mode);
738}
739
740static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
741{
742 return assign_eip_near(ctxt, ctxt->_eip + rel);
743}
741 744
742static int segmented_read_std(struct x86_emulate_ctxt *ctxt, 745static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
743 struct segmented_address addr, 746 struct segmented_address addr,
@@ -776,7 +779,8 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
776 * boundary check itself. Instead, we use max_size to check 779 * boundary check itself. Instead, we use max_size to check
777 * against op_size. 780 * against op_size.
778 */ 781 */
779 rc = __linearize(ctxt, addr, &max_size, 0, false, true, &linear); 782 rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
783 &linear);
780 if (unlikely(rc != X86EMUL_CONTINUE)) 784 if (unlikely(rc != X86EMUL_CONTINUE))
781 return rc; 785 return rc;
782 786
@@ -911,6 +915,8 @@ FASTOP2W(btc);
911 915
912FASTOP2(xadd); 916FASTOP2(xadd);
913 917
918FASTOP2R(cmp, cmp_r);
919
914static u8 test_cc(unsigned int condition, unsigned long flags) 920static u8 test_cc(unsigned int condition, unsigned long flags)
915{ 921{
916 u8 rc; 922 u8 rc;
@@ -1221,6 +1227,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1221 if (index_reg != 4) 1227 if (index_reg != 4)
1222 modrm_ea += reg_read(ctxt, index_reg) << scale; 1228 modrm_ea += reg_read(ctxt, index_reg) << scale;
1223 } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) { 1229 } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1230 modrm_ea += insn_fetch(s32, ctxt);
1224 if (ctxt->mode == X86EMUL_MODE_PROT64) 1231 if (ctxt->mode == X86EMUL_MODE_PROT64)
1225 ctxt->rip_relative = 1; 1232 ctxt->rip_relative = 1;
1226 } else { 1233 } else {
@@ -1229,10 +1236,6 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1229 adjust_modrm_seg(ctxt, base_reg); 1236 adjust_modrm_seg(ctxt, base_reg);
1230 } 1237 }
1231 switch (ctxt->modrm_mod) { 1238 switch (ctxt->modrm_mod) {
1232 case 0:
1233 if (ctxt->modrm_rm == 5)
1234 modrm_ea += insn_fetch(s32, ctxt);
1235 break;
1236 case 1: 1239 case 1:
1237 modrm_ea += insn_fetch(s8, ctxt); 1240 modrm_ea += insn_fetch(s8, ctxt);
1238 break; 1241 break;
@@ -1284,7 +1287,8 @@ static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1284 else 1287 else
1285 sv = (s64)ctxt->src.val & (s64)mask; 1288 sv = (s64)ctxt->src.val & (s64)mask;
1286 1289
1287 ctxt->dst.addr.mem.ea += (sv >> 3); 1290 ctxt->dst.addr.mem.ea = address_mask(ctxt,
1291 ctxt->dst.addr.mem.ea + (sv >> 3));
1288 } 1292 }
1289 1293
1290 /* only subword offset */ 1294 /* only subword offset */
@@ -1610,6 +1614,9 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1610 sizeof(base3), &ctxt->exception); 1614 sizeof(base3), &ctxt->exception);
1611 if (ret != X86EMUL_CONTINUE) 1615 if (ret != X86EMUL_CONTINUE)
1612 return ret; 1616 return ret;
1617 if (is_noncanonical_address(get_desc_base(&seg_desc) |
1618 ((u64)base3 << 32)))
1619 return emulate_gp(ctxt, 0);
1613 } 1620 }
1614load: 1621load:
1615 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); 1622 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
@@ -1807,6 +1814,10 @@ static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1807 int seg = ctxt->src2.val; 1814 int seg = ctxt->src2.val;
1808 1815
1809 ctxt->src.val = get_segment_selector(ctxt, seg); 1816 ctxt->src.val = get_segment_selector(ctxt, seg);
1817 if (ctxt->op_bytes == 4) {
1818 rsp_increment(ctxt, -2);
1819 ctxt->op_bytes = 2;
1820 }
1810 1821
1811 return em_push(ctxt); 1822 return em_push(ctxt);
1812} 1823}
@@ -1850,7 +1861,7 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt)
1850 1861
1851static int em_pushf(struct x86_emulate_ctxt *ctxt) 1862static int em_pushf(struct x86_emulate_ctxt *ctxt)
1852{ 1863{
1853 ctxt->src.val = (unsigned long)ctxt->eflags; 1864 ctxt->src.val = (unsigned long)ctxt->eflags & ~EFLG_VM;
1854 return em_push(ctxt); 1865 return em_push(ctxt);
1855} 1866}
1856 1867
@@ -2035,7 +2046,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2035 if (rc != X86EMUL_CONTINUE) 2046 if (rc != X86EMUL_CONTINUE)
2036 return rc; 2047 return rc;
2037 2048
2038 rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); 2049 rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
2039 if (rc != X86EMUL_CONTINUE) { 2050 if (rc != X86EMUL_CONTINUE) {
2040 WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64); 2051 WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
2041 /* assigning eip failed; restore the old cs */ 2052 /* assigning eip failed; restore the old cs */
@@ -2045,31 +2056,22 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2045 return rc; 2056 return rc;
2046} 2057}
2047 2058
2048static int em_grp45(struct x86_emulate_ctxt *ctxt) 2059static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2049{ 2060{
2050 int rc = X86EMUL_CONTINUE; 2061 return assign_eip_near(ctxt, ctxt->src.val);
2062}
2051 2063
2052 switch (ctxt->modrm_reg) { 2064static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
2053 case 2: /* call near abs */ { 2065{
2054 long int old_eip; 2066 int rc;
2055 old_eip = ctxt->_eip; 2067 long int old_eip;
2056 rc = assign_eip_near(ctxt, ctxt->src.val); 2068
2057 if (rc != X86EMUL_CONTINUE) 2069 old_eip = ctxt->_eip;
2058 break; 2070 rc = assign_eip_near(ctxt, ctxt->src.val);
2059 ctxt->src.val = old_eip; 2071 if (rc != X86EMUL_CONTINUE)
2060 rc = em_push(ctxt); 2072 return rc;
2061 break; 2073 ctxt->src.val = old_eip;
2062 } 2074 rc = em_push(ctxt);
2063 case 4: /* jmp abs */
2064 rc = assign_eip_near(ctxt, ctxt->src.val);
2065 break;
2066 case 5: /* jmp far */
2067 rc = em_jmp_far(ctxt);
2068 break;
2069 case 6: /* push */
2070 rc = em_push(ctxt);
2071 break;
2072 }
2073 return rc; 2075 return rc;
2074} 2076}
2075 2077
@@ -2128,11 +2130,11 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2128 /* Outer-privilege level return is not implemented */ 2130 /* Outer-privilege level return is not implemented */
2129 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) 2131 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2130 return X86EMUL_UNHANDLEABLE; 2132 return X86EMUL_UNHANDLEABLE;
2131 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false, 2133 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl, false,
2132 &new_desc); 2134 &new_desc);
2133 if (rc != X86EMUL_CONTINUE) 2135 if (rc != X86EMUL_CONTINUE)
2134 return rc; 2136 return rc;
2135 rc = assign_eip_far(ctxt, eip, new_desc.l); 2137 rc = assign_eip_far(ctxt, eip, &new_desc);
2136 if (rc != X86EMUL_CONTINUE) { 2138 if (rc != X86EMUL_CONTINUE) {
2137 WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64); 2139 WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
2138 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); 2140 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
@@ -2316,6 +2318,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
2316 2318
2317 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); 2319 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2318 ctxt->eflags &= ~msr_data; 2320 ctxt->eflags &= ~msr_data;
2321 ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
2319#endif 2322#endif
2320 } else { 2323 } else {
2321 /* legacy mode */ 2324 /* legacy mode */
@@ -2349,11 +2352,9 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2349 && !vendor_intel(ctxt)) 2352 && !vendor_intel(ctxt))
2350 return emulate_ud(ctxt); 2353 return emulate_ud(ctxt);
2351 2354
2352 /* XXX sysenter/sysexit have not been tested in 64bit mode. 2355 /* sysenter/sysexit have not been tested in 64bit mode. */
2353 * Therefore, we inject an #UD.
2354 */
2355 if (ctxt->mode == X86EMUL_MODE_PROT64) 2356 if (ctxt->mode == X86EMUL_MODE_PROT64)
2356 return emulate_ud(ctxt); 2357 return X86EMUL_UNHANDLEABLE;
2357 2358
2358 setup_syscalls_segments(ctxt, &cs, &ss); 2359 setup_syscalls_segments(ctxt, &cs, &ss);
2359 2360
@@ -2425,6 +2426,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2425 if ((msr_data & 0xfffc) == 0x0) 2426 if ((msr_data & 0xfffc) == 0x0)
2426 return emulate_gp(ctxt, 0); 2427 return emulate_gp(ctxt, 0);
2427 ss_sel = (u16)(msr_data + 24); 2428 ss_sel = (u16)(msr_data + 24);
2429 rcx = (u32)rcx;
2430 rdx = (u32)rdx;
2428 break; 2431 break;
2429 case X86EMUL_MODE_PROT64: 2432 case X86EMUL_MODE_PROT64:
2430 cs_sel = (u16)(msr_data + 32); 2433 cs_sel = (u16)(msr_data + 32);
@@ -2599,7 +2602,6 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
2599 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, 2602 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2600 &ctxt->exception); 2603 &ctxt->exception);
2601 if (ret != X86EMUL_CONTINUE) 2604 if (ret != X86EMUL_CONTINUE)
2602 /* FIXME: need to provide precise fault address */
2603 return ret; 2605 return ret;
2604 2606
2605 save_state_to_tss16(ctxt, &tss_seg); 2607 save_state_to_tss16(ctxt, &tss_seg);
@@ -2607,13 +2609,11 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
2607 ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, 2609 ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2608 &ctxt->exception); 2610 &ctxt->exception);
2609 if (ret != X86EMUL_CONTINUE) 2611 if (ret != X86EMUL_CONTINUE)
2610 /* FIXME: need to provide precise fault address */
2611 return ret; 2612 return ret;
2612 2613
2613 ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, 2614 ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
2614 &ctxt->exception); 2615 &ctxt->exception);
2615 if (ret != X86EMUL_CONTINUE) 2616 if (ret != X86EMUL_CONTINUE)
2616 /* FIXME: need to provide precise fault address */
2617 return ret; 2617 return ret;
2618 2618
2619 if (old_tss_sel != 0xffff) { 2619 if (old_tss_sel != 0xffff) {
@@ -2624,7 +2624,6 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
2624 sizeof tss_seg.prev_task_link, 2624 sizeof tss_seg.prev_task_link,
2625 &ctxt->exception); 2625 &ctxt->exception);
2626 if (ret != X86EMUL_CONTINUE) 2626 if (ret != X86EMUL_CONTINUE)
2627 /* FIXME: need to provide precise fault address */
2628 return ret; 2627 return ret;
2629 } 2628 }
2630 2629
@@ -2813,7 +2812,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2813 * 2812 *
2814 * 1. jmp/call/int to task gate: Check against DPL of the task gate 2813 * 1. jmp/call/int to task gate: Check against DPL of the task gate
2815 * 2. Exception/IRQ/iret: No check is performed 2814 * 2. Exception/IRQ/iret: No check is performed
2816 * 3. jmp/call to TSS: Check against DPL of the TSS 2815 * 3. jmp/call to TSS/task-gate: No check is performed since the
2816 * hardware checks it before exiting.
2817 */ 2817 */
2818 if (reason == TASK_SWITCH_GATE) { 2818 if (reason == TASK_SWITCH_GATE) {
2819 if (idt_index != -1) { 2819 if (idt_index != -1) {
@@ -2830,13 +2830,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2830 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl) 2830 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
2831 return emulate_gp(ctxt, (idt_index << 3) | 0x2); 2831 return emulate_gp(ctxt, (idt_index << 3) | 0x2);
2832 } 2832 }
2833 } else if (reason != TASK_SWITCH_IRET) {
2834 int dpl = next_tss_desc.dpl;
2835 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
2836 return emulate_gp(ctxt, tss_selector);
2837 } 2833 }
2838 2834
2839
2840 desc_limit = desc_limit_scaled(&next_tss_desc); 2835 desc_limit = desc_limit_scaled(&next_tss_desc);
2841 if (!next_tss_desc.p || 2836 if (!next_tss_desc.p ||
2842 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || 2837 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
@@ -2913,8 +2908,8 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
2913{ 2908{
2914 int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count; 2909 int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
2915 2910
2916 register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes); 2911 register_address_increment(ctxt, reg, df * op->bytes);
2917 op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg)); 2912 op->addr.mem.ea = register_address(ctxt, reg);
2918} 2913}
2919 2914
2920static int em_das(struct x86_emulate_ctxt *ctxt) 2915static int em_das(struct x86_emulate_ctxt *ctxt)
@@ -3025,7 +3020,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
3025 if (rc != X86EMUL_CONTINUE) 3020 if (rc != X86EMUL_CONTINUE)
3026 return X86EMUL_CONTINUE; 3021 return X86EMUL_CONTINUE;
3027 3022
3028 rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); 3023 rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
3029 if (rc != X86EMUL_CONTINUE) 3024 if (rc != X86EMUL_CONTINUE)
3030 goto fail; 3025 goto fail;
3031 3026
@@ -3215,6 +3210,8 @@ static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
3215 return emulate_ud(ctxt); 3210 return emulate_ud(ctxt);
3216 3211
3217 ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg); 3212 ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg);
3213 if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
3214 ctxt->dst.bytes = 2;
3218 return X86EMUL_CONTINUE; 3215 return X86EMUL_CONTINUE;
3219} 3216}
3220 3217
@@ -3317,7 +3314,7 @@ static int em_sidt(struct x86_emulate_ctxt *ctxt)
3317 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt); 3314 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
3318} 3315}
3319 3316
3320static int em_lgdt(struct x86_emulate_ctxt *ctxt) 3317static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3321{ 3318{
3322 struct desc_ptr desc_ptr; 3319 struct desc_ptr desc_ptr;
3323 int rc; 3320 int rc;
@@ -3329,12 +3326,23 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3329 ctxt->op_bytes); 3326 ctxt->op_bytes);
3330 if (rc != X86EMUL_CONTINUE) 3327 if (rc != X86EMUL_CONTINUE)
3331 return rc; 3328 return rc;
3332 ctxt->ops->set_gdt(ctxt, &desc_ptr); 3329 if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3330 is_noncanonical_address(desc_ptr.address))
3331 return emulate_gp(ctxt, 0);
3332 if (lgdt)
3333 ctxt->ops->set_gdt(ctxt, &desc_ptr);
3334 else
3335 ctxt->ops->set_idt(ctxt, &desc_ptr);
3333 /* Disable writeback. */ 3336 /* Disable writeback. */
3334 ctxt->dst.type = OP_NONE; 3337 ctxt->dst.type = OP_NONE;
3335 return X86EMUL_CONTINUE; 3338 return X86EMUL_CONTINUE;
3336} 3339}
3337 3340
3341static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3342{
3343 return em_lgdt_lidt(ctxt, true);
3344}
3345
3338static int em_vmmcall(struct x86_emulate_ctxt *ctxt) 3346static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
3339{ 3347{
3340 int rc; 3348 int rc;
@@ -3348,20 +3356,7 @@ static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
3348 3356
3349static int em_lidt(struct x86_emulate_ctxt *ctxt) 3357static int em_lidt(struct x86_emulate_ctxt *ctxt)
3350{ 3358{
3351 struct desc_ptr desc_ptr; 3359 return em_lgdt_lidt(ctxt, false);
3352 int rc;
3353
3354 if (ctxt->mode == X86EMUL_MODE_PROT64)
3355 ctxt->op_bytes = 8;
3356 rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3357 &desc_ptr.size, &desc_ptr.address,
3358 ctxt->op_bytes);
3359 if (rc != X86EMUL_CONTINUE)
3360 return rc;
3361 ctxt->ops->set_idt(ctxt, &desc_ptr);
3362 /* Disable writeback. */
3363 ctxt->dst.type = OP_NONE;
3364 return X86EMUL_CONTINUE;
3365} 3360}
3366 3361
3367static int em_smsw(struct x86_emulate_ctxt *ctxt) 3362static int em_smsw(struct x86_emulate_ctxt *ctxt)
@@ -3384,7 +3379,7 @@ static int em_loop(struct x86_emulate_ctxt *ctxt)
3384{ 3379{
3385 int rc = X86EMUL_CONTINUE; 3380 int rc = X86EMUL_CONTINUE;
3386 3381
3387 register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); 3382 register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3388 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && 3383 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3389 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) 3384 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3390 rc = jmp_rel(ctxt, ctxt->src.val); 3385 rc = jmp_rel(ctxt, ctxt->src.val);
@@ -3554,7 +3549,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
3554 3549
3555 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); 3550 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3556 if (efer & EFER_LMA) 3551 if (efer & EFER_LMA)
3557 rsvd = CR3_L_MODE_RESERVED_BITS; 3552 rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
3558 3553
3559 if (new_val & rsvd) 3554 if (new_val & rsvd)
3560 return emulate_gp(ctxt, 0); 3555 return emulate_gp(ctxt, 0);
@@ -3596,8 +3591,15 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt)
3596 if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5)) 3591 if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
3597 return emulate_ud(ctxt); 3592 return emulate_ud(ctxt);
3598 3593
3599 if (check_dr7_gd(ctxt)) 3594 if (check_dr7_gd(ctxt)) {
3595 ulong dr6;
3596
3597 ctxt->ops->get_dr(ctxt, 6, &dr6);
3598 dr6 &= ~15;
3599 dr6 |= DR6_BD | DR6_RTM;
3600 ctxt->ops->set_dr(ctxt, 6, dr6);
3600 return emulate_db(ctxt); 3601 return emulate_db(ctxt);
3602 }
3601 3603
3602 return X86EMUL_CONTINUE; 3604 return X86EMUL_CONTINUE;
3603} 3605}
@@ -3684,6 +3686,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3684#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } 3686#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
3685#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } 3687#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
3686#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } 3688#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
3689#define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
3687#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) } 3690#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
3688#define I(_f, _e) { .flags = (_f), .u.execute = (_e) } 3691#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
3689#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } 3692#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
@@ -3780,11 +3783,11 @@ static const struct opcode group4[] = {
3780static const struct opcode group5[] = { 3783static const struct opcode group5[] = {
3781 F(DstMem | SrcNone | Lock, em_inc), 3784 F(DstMem | SrcNone | Lock, em_inc),
3782 F(DstMem | SrcNone | Lock, em_dec), 3785 F(DstMem | SrcNone | Lock, em_dec),
3783 I(SrcMem | Stack, em_grp45), 3786 I(SrcMem | NearBranch, em_call_near_abs),
3784 I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), 3787 I(SrcMemFAddr | ImplicitOps | Stack, em_call_far),
3785 I(SrcMem | Stack, em_grp45), 3788 I(SrcMem | NearBranch, em_jmp_abs),
3786 I(SrcMemFAddr | ImplicitOps, em_grp45), 3789 I(SrcMemFAddr | ImplicitOps, em_jmp_far),
3787 I(SrcMem | Stack, em_grp45), D(Undefined), 3790 I(SrcMem | Stack, em_push), D(Undefined),
3788}; 3791};
3789 3792
3790static const struct opcode group6[] = { 3793static const struct opcode group6[] = {
@@ -3845,8 +3848,12 @@ static const struct gprefix pfx_0f_6f_0f_7f = {
3845 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), 3848 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
3846}; 3849};
3847 3850
3851static const struct instr_dual instr_dual_0f_2b = {
3852 I(0, em_mov), N
3853};
3854
3848static const struct gprefix pfx_0f_2b = { 3855static const struct gprefix pfx_0f_2b = {
3849 I(0, em_mov), I(0, em_mov), N, N, 3856 ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
3850}; 3857};
3851 3858
3852static const struct gprefix pfx_0f_28_0f_29 = { 3859static const struct gprefix pfx_0f_28_0f_29 = {
@@ -3920,6 +3927,10 @@ static const struct escape escape_dd = { {
3920 N, N, N, N, N, N, N, N, 3927 N, N, N, N, N, N, N, N,
3921} }; 3928} };
3922 3929
3930static const struct instr_dual instr_dual_0f_c3 = {
3931 I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
3932};
3933
3923static const struct opcode opcode_table[256] = { 3934static const struct opcode opcode_table[256] = {
3924 /* 0x00 - 0x07 */ 3935 /* 0x00 - 0x07 */
3925 F6ALU(Lock, em_add), 3936 F6ALU(Lock, em_add),
@@ -3964,7 +3975,7 @@ static const struct opcode opcode_table[256] = {
3964 I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */ 3975 I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
3965 I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */ 3976 I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
3966 /* 0x70 - 0x7F */ 3977 /* 0x70 - 0x7F */
3967 X16(D(SrcImmByte)), 3978 X16(D(SrcImmByte | NearBranch)),
3968 /* 0x80 - 0x87 */ 3979 /* 0x80 - 0x87 */
3969 G(ByteOp | DstMem | SrcImm, group1), 3980 G(ByteOp | DstMem | SrcImm, group1),
3970 G(DstMem | SrcImm, group1), 3981 G(DstMem | SrcImm, group1),
@@ -3991,20 +4002,20 @@ static const struct opcode opcode_table[256] = {
3991 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), 4002 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
3992 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), 4003 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
3993 I2bv(SrcSI | DstDI | Mov | String, em_mov), 4004 I2bv(SrcSI | DstDI | Mov | String, em_mov),
3994 F2bv(SrcSI | DstDI | String | NoWrite, em_cmp), 4005 F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r),
3995 /* 0xA8 - 0xAF */ 4006 /* 0xA8 - 0xAF */
3996 F2bv(DstAcc | SrcImm | NoWrite, em_test), 4007 F2bv(DstAcc | SrcImm | NoWrite, em_test),
3997 I2bv(SrcAcc | DstDI | Mov | String, em_mov), 4008 I2bv(SrcAcc | DstDI | Mov | String, em_mov),
3998 I2bv(SrcSI | DstAcc | Mov | String, em_mov), 4009 I2bv(SrcSI | DstAcc | Mov | String, em_mov),
3999 F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp), 4010 F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4000 /* 0xB0 - 0xB7 */ 4011 /* 0xB0 - 0xB7 */
4001 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), 4012 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4002 /* 0xB8 - 0xBF */ 4013 /* 0xB8 - 0xBF */
4003 X8(I(DstReg | SrcImm64 | Mov, em_mov)), 4014 X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4004 /* 0xC0 - 0xC7 */ 4015 /* 0xC0 - 0xC7 */
4005 G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2), 4016 G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4006 I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm), 4017 I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
4007 I(ImplicitOps | Stack, em_ret), 4018 I(ImplicitOps | NearBranch, em_ret),
4008 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), 4019 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
4009 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg), 4020 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4010 G(ByteOp, group11), G(0, group11), 4021 G(ByteOp, group11), G(0, group11),
@@ -4024,13 +4035,14 @@ static const struct opcode opcode_table[256] = {
4024 /* 0xD8 - 0xDF */ 4035 /* 0xD8 - 0xDF */
4025 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N, 4036 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4026 /* 0xE0 - 0xE7 */ 4037 /* 0xE0 - 0xE7 */
4027 X3(I(SrcImmByte, em_loop)), 4038 X3(I(SrcImmByte | NearBranch, em_loop)),
4028 I(SrcImmByte, em_jcxz), 4039 I(SrcImmByte | NearBranch, em_jcxz),
4029 I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in), 4040 I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
4030 I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out), 4041 I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4031 /* 0xE8 - 0xEF */ 4042 /* 0xE8 - 0xEF */
4032 I(SrcImm | Stack, em_call), D(SrcImm | ImplicitOps), 4043 I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
4033 I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps), 4044 I(SrcImmFAddr | No64, em_jmp_far),
4045 D(SrcImmByte | ImplicitOps | NearBranch),
4034 I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in), 4046 I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
4035 I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out), 4047 I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4036 /* 0xF0 - 0xF7 */ 4048 /* 0xF0 - 0xF7 */
@@ -4090,7 +4102,7 @@ static const struct opcode twobyte_table[256] = {
4090 N, N, N, N, 4102 N, N, N, N,
4091 N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f), 4103 N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4092 /* 0x80 - 0x8F */ 4104 /* 0x80 - 0x8F */
4093 X16(D(SrcImm)), 4105 X16(D(SrcImm | NearBranch)),
4094 /* 0x90 - 0x9F */ 4106 /* 0x90 - 0x9F */
4095 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), 4107 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4096 /* 0xA0 - 0xA7 */ 4108 /* 0xA0 - 0xA7 */
@@ -4121,7 +4133,7 @@ static const struct opcode twobyte_table[256] = {
4121 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), 4133 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4122 /* 0xC0 - 0xC7 */ 4134 /* 0xC0 - 0xC7 */
4123 F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd), 4135 F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4124 N, D(DstMem | SrcReg | ModRM | Mov), 4136 N, ID(0, &instr_dual_0f_c3),
4125 N, N, N, GD(0, &group9), 4137 N, N, N, GD(0, &group9),
4126 /* 0xC8 - 0xCF */ 4138 /* 0xC8 - 0xCF */
4127 X8(I(DstReg, em_bswap)), 4139 X8(I(DstReg, em_bswap)),
@@ -4134,12 +4146,20 @@ static const struct opcode twobyte_table[256] = {
4134 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N 4146 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
4135}; 4147};
4136 4148
4149static const struct instr_dual instr_dual_0f_38_f0 = {
4150 I(DstReg | SrcMem | Mov, em_movbe), N
4151};
4152
4153static const struct instr_dual instr_dual_0f_38_f1 = {
4154 I(DstMem | SrcReg | Mov, em_movbe), N
4155};
4156
4137static const struct gprefix three_byte_0f_38_f0 = { 4157static const struct gprefix three_byte_0f_38_f0 = {
4138 I(DstReg | SrcMem | Mov, em_movbe), N, N, N 4158 ID(0, &instr_dual_0f_38_f0), N, N, N
4139}; 4159};
4140 4160
4141static const struct gprefix three_byte_0f_38_f1 = { 4161static const struct gprefix three_byte_0f_38_f1 = {
4142 I(DstMem | SrcReg | Mov, em_movbe), N, N, N 4162 ID(0, &instr_dual_0f_38_f1), N, N, N
4143}; 4163};
4144 4164
4145/* 4165/*
@@ -4152,8 +4172,8 @@ static const struct opcode opcode_map_0f_38[256] = {
4152 /* 0x80 - 0xef */ 4172 /* 0x80 - 0xef */
4153 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), 4173 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4154 /* 0xf0 - 0xf1 */ 4174 /* 0xf0 - 0xf1 */
4155 GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f0), 4175 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
4156 GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f1), 4176 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
4157 /* 0xf2 - 0xff */ 4177 /* 0xf2 - 0xff */
4158 N, N, X4(N), X8(N) 4178 N, N, X4(N), X8(N)
4159}; 4179};
@@ -4275,7 +4295,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4275 op->type = OP_MEM; 4295 op->type = OP_MEM;
4276 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; 4296 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4277 op->addr.mem.ea = 4297 op->addr.mem.ea =
4278 register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI)); 4298 register_address(ctxt, VCPU_REGS_RDI);
4279 op->addr.mem.seg = VCPU_SREG_ES; 4299 op->addr.mem.seg = VCPU_SREG_ES;
4280 op->val = 0; 4300 op->val = 0;
4281 op->count = 1; 4301 op->count = 1;
@@ -4329,7 +4349,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4329 op->type = OP_MEM; 4349 op->type = OP_MEM;
4330 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; 4350 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4331 op->addr.mem.ea = 4351 op->addr.mem.ea =
4332 register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI)); 4352 register_address(ctxt, VCPU_REGS_RSI);
4333 op->addr.mem.seg = ctxt->seg_override; 4353 op->addr.mem.seg = ctxt->seg_override;
4334 op->val = 0; 4354 op->val = 0;
4335 op->count = 1; 4355 op->count = 1;
@@ -4338,7 +4358,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4338 op->type = OP_MEM; 4358 op->type = OP_MEM;
4339 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; 4359 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4340 op->addr.mem.ea = 4360 op->addr.mem.ea =
4341 register_address(ctxt, 4361 address_mask(ctxt,
4342 reg_read(ctxt, VCPU_REGS_RBX) + 4362 reg_read(ctxt, VCPU_REGS_RBX) +
4343 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff)); 4363 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4344 op->addr.mem.seg = ctxt->seg_override; 4364 op->addr.mem.seg = ctxt->seg_override;
@@ -4510,8 +4530,7 @@ done_prefixes:
4510 4530
4511 /* vex-prefix instructions are not implemented */ 4531 /* vex-prefix instructions are not implemented */
4512 if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) && 4532 if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
4513 (mode == X86EMUL_MODE_PROT64 || 4533 (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
4514 (mode >= X86EMUL_MODE_PROT16 && (ctxt->modrm & 0x80)))) {
4515 ctxt->d = NotImpl; 4534 ctxt->d = NotImpl;
4516 } 4535 }
4517 4536
@@ -4549,6 +4568,12 @@ done_prefixes:
4549 else 4568 else
4550 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7]; 4569 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
4551 break; 4570 break;
4571 case InstrDual:
4572 if ((ctxt->modrm >> 6) == 3)
4573 opcode = opcode.u.idual->mod3;
4574 else
4575 opcode = opcode.u.idual->mod012;
4576 break;
4552 default: 4577 default:
4553 return EMULATION_FAILED; 4578 return EMULATION_FAILED;
4554 } 4579 }
@@ -4567,7 +4592,8 @@ done_prefixes:
4567 return EMULATION_FAILED; 4592 return EMULATION_FAILED;
4568 4593
4569 if (unlikely(ctxt->d & 4594 if (unlikely(ctxt->d &
4570 (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) { 4595 (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
4596 No16))) {
4571 /* 4597 /*
4572 * These are copied unconditionally here, and checked unconditionally 4598 * These are copied unconditionally here, and checked unconditionally
4573 * in x86_emulate_insn. 4599 * in x86_emulate_insn.
@@ -4578,8 +4604,12 @@ done_prefixes:
4578 if (ctxt->d & NotImpl) 4604 if (ctxt->d & NotImpl)
4579 return EMULATION_FAILED; 4605 return EMULATION_FAILED;
4580 4606
4581 if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) 4607 if (mode == X86EMUL_MODE_PROT64) {
4582 ctxt->op_bytes = 8; 4608 if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
4609 ctxt->op_bytes = 8;
4610 else if (ctxt->d & NearBranch)
4611 ctxt->op_bytes = 8;
4612 }
4583 4613
4584 if (ctxt->d & Op3264) { 4614 if (ctxt->d & Op3264) {
4585 if (mode == X86EMUL_MODE_PROT64) 4615 if (mode == X86EMUL_MODE_PROT64)
@@ -4588,6 +4618,9 @@ done_prefixes:
4588 ctxt->op_bytes = 4; 4618 ctxt->op_bytes = 4;
4589 } 4619 }
4590 4620
4621 if ((ctxt->d & No16) && ctxt->op_bytes == 2)
4622 ctxt->op_bytes = 4;
4623
4591 if (ctxt->d & Sse) 4624 if (ctxt->d & Sse)
4592 ctxt->op_bytes = 16; 4625 ctxt->op_bytes = 16;
4593 else if (ctxt->d & Mmx) 4626 else if (ctxt->d & Mmx)
@@ -4631,7 +4664,8 @@ done_prefixes:
4631 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); 4664 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
4632 4665
4633 if (ctxt->rip_relative) 4666 if (ctxt->rip_relative)
4634 ctxt->memopp->addr.mem.ea += ctxt->_eip; 4667 ctxt->memopp->addr.mem.ea = address_mask(ctxt,
4668 ctxt->memopp->addr.mem.ea + ctxt->_eip);
4635 4669
4636done: 4670done:
4637 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; 4671 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
@@ -4775,6 +4809,12 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4775 goto done; 4809 goto done;
4776 } 4810 }
4777 4811
4812 /* Instruction can only be executed in protected mode */
4813 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
4814 rc = emulate_ud(ctxt);
4815 goto done;
4816 }
4817
4778 /* Privileged instruction can be executed only in CPL=0 */ 4818 /* Privileged instruction can be executed only in CPL=0 */
4779 if ((ctxt->d & Priv) && ops->cpl(ctxt)) { 4819 if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
4780 if (ctxt->d & PrivUD) 4820 if (ctxt->d & PrivUD)
@@ -4784,12 +4824,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4784 goto done; 4824 goto done;
4785 } 4825 }
4786 4826
4787 /* Instruction can only be executed in protected mode */
4788 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
4789 rc = emulate_ud(ctxt);
4790 goto done;
4791 }
4792
4793 /* Do instruction specific permission checks */ 4827 /* Do instruction specific permission checks */
4794 if (ctxt->d & CheckPerm) { 4828 if (ctxt->d & CheckPerm) {
4795 rc = ctxt->check_perm(ctxt); 4829 rc = ctxt->check_perm(ctxt);
@@ -4974,8 +5008,7 @@ writeback:
4974 count = ctxt->src.count; 5008 count = ctxt->src.count;
4975 else 5009 else
4976 count = ctxt->dst.count; 5010 count = ctxt->dst.count;
4977 register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), 5011 register_address_increment(ctxt, VCPU_REGS_RCX, -count);
4978 -count);
4979 5012
4980 if (!string_insn_completed(ctxt)) { 5013 if (!string_insn_completed(ctxt)) {
4981 /* 5014 /*
@@ -5053,11 +5086,6 @@ twobyte_insn:
5053 ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val : 5086 ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5054 (s16) ctxt->src.val; 5087 (s16) ctxt->src.val;
5055 break; 5088 break;
5056 case 0xc3: /* movnti */
5057 ctxt->dst.bytes = ctxt->op_bytes;
5058 ctxt->dst.val = (ctxt->op_bytes == 8) ? (u64) ctxt->src.val :
5059 (u32) ctxt->src.val;
5060 break;
5061 default: 5089 default:
5062 goto cannot_emulate; 5090 goto cannot_emulate;
5063 } 5091 }
diff --git a/virt/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 0ba4057d271b..b1947e0f3e10 100644
--- a/virt/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -270,7 +270,6 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
270 spin_unlock(&ioapic->lock); 270 spin_unlock(&ioapic->lock);
271} 271}
272 272
273#ifdef CONFIG_X86
274void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) 273void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
275{ 274{
276 struct kvm_ioapic *ioapic = kvm->arch.vioapic; 275 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
@@ -279,12 +278,6 @@ void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
279 return; 278 return;
280 kvm_make_scan_ioapic_request(kvm); 279 kvm_make_scan_ioapic_request(kvm);
281} 280}
282#else
283void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
284{
285 return;
286}
287#endif
288 281
289static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) 282static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
290{ 283{
@@ -586,11 +579,6 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
586 case IOAPIC_REG_WINDOW: 579 case IOAPIC_REG_WINDOW:
587 ioapic_write_indirect(ioapic, data); 580 ioapic_write_indirect(ioapic, data);
588 break; 581 break;
589#ifdef CONFIG_IA64
590 case IOAPIC_REG_EOI:
591 __kvm_ioapic_update_eoi(NULL, ioapic, data, IOAPIC_LEVEL_TRIG);
592 break;
593#endif
594 582
595 default: 583 default:
596 break; 584 break;
diff --git a/virt/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index e23b70634f1e..3c9195535ffc 100644
--- a/virt/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -19,7 +19,6 @@ struct kvm_vcpu;
19/* Direct registers. */ 19/* Direct registers. */
20#define IOAPIC_REG_SELECT 0x00 20#define IOAPIC_REG_SELECT 0x00
21#define IOAPIC_REG_WINDOW 0x10 21#define IOAPIC_REG_WINDOW 0x10
22#define IOAPIC_REG_EOI 0x40 /* IA64 IOSAPIC only */
23 22
24/* Indirect registers. */ 23/* Indirect registers. */
25#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */ 24#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */
@@ -45,6 +44,23 @@ struct rtc_status {
45 DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS); 44 DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
46}; 45};
47 46
47union kvm_ioapic_redirect_entry {
48 u64 bits;
49 struct {
50 u8 vector;
51 u8 delivery_mode:3;
52 u8 dest_mode:1;
53 u8 delivery_status:1;
54 u8 polarity:1;
55 u8 remote_irr:1;
56 u8 trig_mode:1;
57 u8 mask:1;
58 u8 reserve:7;
59 u8 reserved[4];
60 u8 dest_id;
61 } fields;
62};
63
48struct kvm_ioapic { 64struct kvm_ioapic {
49 u64 base_address; 65 u64 base_address;
50 u32 ioregsel; 66 u32 ioregsel;
@@ -83,7 +99,7 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
83 99
84void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); 100void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
85int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 101int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
86 int short_hand, int dest, int dest_mode); 102 int short_hand, unsigned int dest, int dest_mode);
87int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); 103int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
88void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, 104void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
89 int trigger_mode); 105 int trigger_mode);
@@ -97,7 +113,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
97 struct kvm_lapic_irq *irq, unsigned long *dest_map); 113 struct kvm_lapic_irq *irq, unsigned long *dest_map);
98int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); 114int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
99int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); 115int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
100void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
101void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, 116void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
102 u32 *tmr); 117 u32 *tmr);
103 118
diff --git a/virt/kvm/iommu.c b/arch/x86/kvm/iommu.c
index c1e6ae989a43..17b73eeac8a4 100644
--- a/virt/kvm/iommu.c
+++ b/arch/x86/kvm/iommu.c
@@ -31,6 +31,7 @@
31#include <linux/dmar.h> 31#include <linux/dmar.h>
32#include <linux/iommu.h> 32#include <linux/iommu.h>
33#include <linux/intel-iommu.h> 33#include <linux/intel-iommu.h>
34#include "assigned-dev.h"
34 35
35static bool allow_unsafe_assigned_interrupts; 36static bool allow_unsafe_assigned_interrupts;
36module_param_named(allow_unsafe_assigned_interrupts, 37module_param_named(allow_unsafe_assigned_interrupts,
@@ -169,10 +170,8 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
169 return r; 170 return r;
170} 171}
171 172
172int kvm_assign_device(struct kvm *kvm, 173int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev)
173 struct kvm_assigned_dev_kernel *assigned_dev)
174{ 174{
175 struct pci_dev *pdev = NULL;
176 struct iommu_domain *domain = kvm->arch.iommu_domain; 175 struct iommu_domain *domain = kvm->arch.iommu_domain;
177 int r; 176 int r;
178 bool noncoherent; 177 bool noncoherent;
@@ -181,7 +180,6 @@ int kvm_assign_device(struct kvm *kvm,
181 if (!domain) 180 if (!domain)
182 return 0; 181 return 0;
183 182
184 pdev = assigned_dev->dev;
185 if (pdev == NULL) 183 if (pdev == NULL)
186 return -ENODEV; 184 return -ENODEV;
187 185
@@ -212,17 +210,14 @@ out_unmap:
212 return r; 210 return r;
213} 211}
214 212
215int kvm_deassign_device(struct kvm *kvm, 213int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev)
216 struct kvm_assigned_dev_kernel *assigned_dev)
217{ 214{
218 struct iommu_domain *domain = kvm->arch.iommu_domain; 215 struct iommu_domain *domain = kvm->arch.iommu_domain;
219 struct pci_dev *pdev = NULL;
220 216
221 /* check if iommu exists and in use */ 217 /* check if iommu exists and in use */
222 if (!domain) 218 if (!domain)
223 return 0; 219 return 0;
224 220
225 pdev = assigned_dev->dev;
226 if (pdev == NULL) 221 if (pdev == NULL)
227 return -ENODEV; 222 return -ENODEV;
228 223
diff --git a/virt/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 963b8995a9e8..72298b3ac025 100644
--- a/virt/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -26,9 +26,6 @@
26#include <trace/events/kvm.h> 26#include <trace/events/kvm.h>
27 27
28#include <asm/msidef.h> 28#include <asm/msidef.h>
29#ifdef CONFIG_IA64
30#include <asm/iosapic.h>
31#endif
32 29
33#include "irq.h" 30#include "irq.h"
34 31
@@ -38,12 +35,8 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
38 struct kvm *kvm, int irq_source_id, int level, 35 struct kvm *kvm, int irq_source_id, int level,
39 bool line_status) 36 bool line_status)
40{ 37{
41#ifdef CONFIG_X86
42 struct kvm_pic *pic = pic_irqchip(kvm); 38 struct kvm_pic *pic = pic_irqchip(kvm);
43 return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level); 39 return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
44#else
45 return -1;
46#endif
47} 40}
48 41
49static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, 42static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
@@ -57,12 +50,7 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
57 50
58inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) 51inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
59{ 52{
60#ifdef CONFIG_IA64
61 return irq->delivery_mode ==
62 (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
63#else
64 return irq->delivery_mode == APIC_DM_LOWEST; 53 return irq->delivery_mode == APIC_DM_LOWEST;
65#endif
66} 54}
67 55
68int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, 56int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
@@ -202,9 +190,7 @@ int kvm_request_irq_source_id(struct kvm *kvm)
202 } 190 }
203 191
204 ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); 192 ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
205#ifdef CONFIG_X86
206 ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID); 193 ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
207#endif
208 set_bit(irq_source_id, bitmap); 194 set_bit(irq_source_id, bitmap);
209unlock: 195unlock:
210 mutex_unlock(&kvm->irq_lock); 196 mutex_unlock(&kvm->irq_lock);
@@ -215,9 +201,7 @@ unlock:
215void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) 201void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
216{ 202{
217 ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); 203 ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
218#ifdef CONFIG_X86
219 ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID); 204 ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
220#endif
221 205
222 mutex_lock(&kvm->irq_lock); 206 mutex_lock(&kvm->irq_lock);
223 if (irq_source_id < 0 || 207 if (irq_source_id < 0 ||
@@ -230,9 +214,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
230 goto unlock; 214 goto unlock;
231 215
232 kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id); 216 kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
233#ifdef CONFIG_X86
234 kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id); 217 kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id);
235#endif
236unlock: 218unlock:
237 mutex_unlock(&kvm->irq_lock); 219 mutex_unlock(&kvm->irq_lock);
238} 220}
@@ -242,7 +224,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
242{ 224{
243 mutex_lock(&kvm->irq_lock); 225 mutex_lock(&kvm->irq_lock);
244 kimn->irq = irq; 226 kimn->irq = irq;
245 hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list); 227 hlist_add_head_rcu(&kimn->link, &kvm->arch.mask_notifier_list);
246 mutex_unlock(&kvm->irq_lock); 228 mutex_unlock(&kvm->irq_lock);
247} 229}
248 230
@@ -264,7 +246,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
264 idx = srcu_read_lock(&kvm->irq_srcu); 246 idx = srcu_read_lock(&kvm->irq_srcu);
265 gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); 247 gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
266 if (gsi != -1) 248 if (gsi != -1)
267 hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link) 249 hlist_for_each_entry_rcu(kimn, &kvm->arch.mask_notifier_list, link)
268 if (kimn->irq == gsi) 250 if (kimn->irq == gsi)
269 kimn->func(kimn, mask); 251 kimn->func(kimn, mask);
270 srcu_read_unlock(&kvm->irq_srcu, idx); 252 srcu_read_unlock(&kvm->irq_srcu, idx);
@@ -322,16 +304,11 @@ out:
322 .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } } 304 .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
323#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq) 305#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
324 306
325#ifdef CONFIG_X86 307#define PIC_ROUTING_ENTRY(irq) \
326# define PIC_ROUTING_ENTRY(irq) \
327 { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ 308 { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
328 .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } } 309 .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } }
329# define ROUTING_ENTRY2(irq) \ 310#define ROUTING_ENTRY2(irq) \
330 IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq) 311 IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
331#else
332# define ROUTING_ENTRY2(irq) \
333 IOAPIC_ROUTING_ENTRY(irq)
334#endif
335 312
336static const struct kvm_irq_routing_entry default_routing[] = { 313static const struct kvm_irq_routing_entry default_routing[] = {
337 ROUTING_ENTRY2(0), ROUTING_ENTRY2(1), 314 ROUTING_ENTRY2(0), ROUTING_ENTRY2(1),
@@ -346,20 +323,6 @@ static const struct kvm_irq_routing_entry default_routing[] = {
346 ROUTING_ENTRY1(18), ROUTING_ENTRY1(19), 323 ROUTING_ENTRY1(18), ROUTING_ENTRY1(19),
347 ROUTING_ENTRY1(20), ROUTING_ENTRY1(21), 324 ROUTING_ENTRY1(20), ROUTING_ENTRY1(21),
348 ROUTING_ENTRY1(22), ROUTING_ENTRY1(23), 325 ROUTING_ENTRY1(22), ROUTING_ENTRY1(23),
349#ifdef CONFIG_IA64
350 ROUTING_ENTRY1(24), ROUTING_ENTRY1(25),
351 ROUTING_ENTRY1(26), ROUTING_ENTRY1(27),
352 ROUTING_ENTRY1(28), ROUTING_ENTRY1(29),
353 ROUTING_ENTRY1(30), ROUTING_ENTRY1(31),
354 ROUTING_ENTRY1(32), ROUTING_ENTRY1(33),
355 ROUTING_ENTRY1(34), ROUTING_ENTRY1(35),
356 ROUTING_ENTRY1(36), ROUTING_ENTRY1(37),
357 ROUTING_ENTRY1(38), ROUTING_ENTRY1(39),
358 ROUTING_ENTRY1(40), ROUTING_ENTRY1(41),
359 ROUTING_ENTRY1(42), ROUTING_ENTRY1(43),
360 ROUTING_ENTRY1(44), ROUTING_ENTRY1(45),
361 ROUTING_ENTRY1(46), ROUTING_ENTRY1(47),
362#endif
363}; 326};
364 327
365int kvm_setup_default_irq_routing(struct kvm *kvm) 328int kvm_setup_default_irq_routing(struct kvm *kvm)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b8345dd41b25..4f0c0b954686 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -68,6 +68,9 @@
68#define MAX_APIC_VECTOR 256 68#define MAX_APIC_VECTOR 256
69#define APIC_VECTORS_PER_REG 32 69#define APIC_VECTORS_PER_REG 32
70 70
71#define APIC_BROADCAST 0xFF
72#define X2APIC_BROADCAST 0xFFFFFFFFul
73
71#define VEC_POS(v) ((v) & (32 - 1)) 74#define VEC_POS(v) ((v) & (32 - 1))
72#define REG_POS(v) (((v) >> 5) << 4) 75#define REG_POS(v) (((v) >> 5) << 4)
73 76
@@ -129,8 +132,6 @@ static inline int kvm_apic_id(struct kvm_lapic *apic)
129 return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; 132 return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
130} 133}
131 134
132#define KVM_X2APIC_CID_BITS 0
133
134static void recalculate_apic_map(struct kvm *kvm) 135static void recalculate_apic_map(struct kvm *kvm)
135{ 136{
136 struct kvm_apic_map *new, *old = NULL; 137 struct kvm_apic_map *new, *old = NULL;
@@ -149,42 +150,56 @@ static void recalculate_apic_map(struct kvm *kvm)
149 new->cid_shift = 8; 150 new->cid_shift = 8;
150 new->cid_mask = 0; 151 new->cid_mask = 0;
151 new->lid_mask = 0xff; 152 new->lid_mask = 0xff;
153 new->broadcast = APIC_BROADCAST;
152 154
153 kvm_for_each_vcpu(i, vcpu, kvm) { 155 kvm_for_each_vcpu(i, vcpu, kvm) {
154 struct kvm_lapic *apic = vcpu->arch.apic; 156 struct kvm_lapic *apic = vcpu->arch.apic;
155 u16 cid, lid;
156 u32 ldr;
157 157
158 if (!kvm_apic_present(vcpu)) 158 if (!kvm_apic_present(vcpu))
159 continue; 159 continue;
160 160
161 if (apic_x2apic_mode(apic)) {
162 new->ldr_bits = 32;
163 new->cid_shift = 16;
164 new->cid_mask = new->lid_mask = 0xffff;
165 new->broadcast = X2APIC_BROADCAST;
166 } else if (kvm_apic_get_reg(apic, APIC_LDR)) {
167 if (kvm_apic_get_reg(apic, APIC_DFR) ==
168 APIC_DFR_CLUSTER) {
169 new->cid_shift = 4;
170 new->cid_mask = 0xf;
171 new->lid_mask = 0xf;
172 } else {
173 new->cid_shift = 8;
174 new->cid_mask = 0;
175 new->lid_mask = 0xff;
176 }
177 }
178
161 /* 179 /*
162 * All APICs have to be configured in the same mode by an OS. 180 * All APICs have to be configured in the same mode by an OS.
163 * We take advatage of this while building logical id loockup 181 * We take advatage of this while building logical id loockup
164 * table. After reset APICs are in xapic/flat mode, so if we 182 * table. After reset APICs are in software disabled mode, so if
165 * find apic with different setting we assume this is the mode 183 * we find apic with different setting we assume this is the mode
166 * OS wants all apics to be in; build lookup table accordingly. 184 * OS wants all apics to be in; build lookup table accordingly.
167 */ 185 */
168 if (apic_x2apic_mode(apic)) { 186 if (kvm_apic_sw_enabled(apic))
169 new->ldr_bits = 32; 187 break;
170 new->cid_shift = 16; 188 }
171 new->cid_mask = (1 << KVM_X2APIC_CID_BITS) - 1;
172 new->lid_mask = 0xffff;
173 } else if (kvm_apic_sw_enabled(apic) &&
174 !new->cid_mask /* flat mode */ &&
175 kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) {
176 new->cid_shift = 4;
177 new->cid_mask = 0xf;
178 new->lid_mask = 0xf;
179 }
180 189
181 new->phys_map[kvm_apic_id(apic)] = apic; 190 kvm_for_each_vcpu(i, vcpu, kvm) {
191 struct kvm_lapic *apic = vcpu->arch.apic;
192 u16 cid, lid;
193 u32 ldr, aid;
182 194
195 aid = kvm_apic_id(apic);
183 ldr = kvm_apic_get_reg(apic, APIC_LDR); 196 ldr = kvm_apic_get_reg(apic, APIC_LDR);
184 cid = apic_cluster_id(new, ldr); 197 cid = apic_cluster_id(new, ldr);
185 lid = apic_logical_id(new, ldr); 198 lid = apic_logical_id(new, ldr);
186 199
187 if (lid) 200 if (aid < ARRAY_SIZE(new->phys_map))
201 new->phys_map[aid] = apic;
202 if (lid && cid < ARRAY_SIZE(new->logical_map))
188 new->logical_map[cid][ffs(lid) - 1] = apic; 203 new->logical_map[cid][ffs(lid) - 1] = apic;
189 } 204 }
190out: 205out:
@@ -201,11 +216,13 @@ out:
201 216
202static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) 217static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
203{ 218{
204 u32 prev = kvm_apic_get_reg(apic, APIC_SPIV); 219 bool enabled = val & APIC_SPIV_APIC_ENABLED;
205 220
206 apic_set_reg(apic, APIC_SPIV, val); 221 apic_set_reg(apic, APIC_SPIV, val);
207 if ((prev ^ val) & APIC_SPIV_APIC_ENABLED) { 222
208 if (val & APIC_SPIV_APIC_ENABLED) { 223 if (enabled != apic->sw_enabled) {
224 apic->sw_enabled = enabled;
225 if (enabled) {
209 static_key_slow_dec_deferred(&apic_sw_disabled); 226 static_key_slow_dec_deferred(&apic_sw_disabled);
210 recalculate_apic_map(apic->vcpu->kvm); 227 recalculate_apic_map(apic->vcpu->kvm);
211 } else 228 } else
@@ -237,21 +254,17 @@ static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
237 254
238static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) 255static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
239{ 256{
240 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 257 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
241 apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT);
242} 258}
243 259
244static inline int apic_lvtt_period(struct kvm_lapic *apic) 260static inline int apic_lvtt_period(struct kvm_lapic *apic)
245{ 261{
246 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 262 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
247 apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC);
248} 263}
249 264
250static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) 265static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
251{ 266{
252 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 267 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
253 apic->lapic_timer.timer_mode_mask) ==
254 APIC_LVT_TIMER_TSCDEADLINE);
255} 268}
256 269
257static inline int apic_lvt_nmi_mode(u32 lvt_val) 270static inline int apic_lvt_nmi_mode(u32 lvt_val)
@@ -326,8 +339,12 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
326 339
327static inline void apic_set_irr(int vec, struct kvm_lapic *apic) 340static inline void apic_set_irr(int vec, struct kvm_lapic *apic)
328{ 341{
329 apic->irr_pending = true;
330 apic_set_vector(vec, apic->regs + APIC_IRR); 342 apic_set_vector(vec, apic->regs + APIC_IRR);
343 /*
344 * irr_pending must be true if any interrupt is pending; set it after
345 * APIC_IRR to avoid race with apic_clear_irr
346 */
347 apic->irr_pending = true;
331} 348}
332 349
333static inline int apic_search_irr(struct kvm_lapic *apic) 350static inline int apic_search_irr(struct kvm_lapic *apic)
@@ -359,13 +376,15 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
359 376
360 vcpu = apic->vcpu; 377 vcpu = apic->vcpu;
361 378
362 apic_clear_vector(vec, apic->regs + APIC_IRR); 379 if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) {
363 if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
364 /* try to update RVI */ 380 /* try to update RVI */
381 apic_clear_vector(vec, apic->regs + APIC_IRR);
365 kvm_make_request(KVM_REQ_EVENT, vcpu); 382 kvm_make_request(KVM_REQ_EVENT, vcpu);
366 else { 383 } else {
367 vec = apic_search_irr(apic); 384 apic->irr_pending = false;
368 apic->irr_pending = (vec != -1); 385 apic_clear_vector(vec, apic->regs + APIC_IRR);
386 if (apic_search_irr(apic) != -1)
387 apic->irr_pending = true;
369 } 388 }
370} 389}
371 390
@@ -558,16 +577,25 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
558 apic_update_ppr(apic); 577 apic_update_ppr(apic);
559} 578}
560 579
561int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) 580static int kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest)
581{
582 return dest == (apic_x2apic_mode(apic) ?
583 X2APIC_BROADCAST : APIC_BROADCAST);
584}
585
586int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest)
562{ 587{
563 return dest == 0xff || kvm_apic_id(apic) == dest; 588 return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest);
564} 589}
565 590
566int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) 591int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
567{ 592{
568 int result = 0; 593 int result = 0;
569 u32 logical_id; 594 u32 logical_id;
570 595
596 if (kvm_apic_broadcast(apic, mda))
597 return 1;
598
571 if (apic_x2apic_mode(apic)) { 599 if (apic_x2apic_mode(apic)) {
572 logical_id = kvm_apic_get_reg(apic, APIC_LDR); 600 logical_id = kvm_apic_get_reg(apic, APIC_LDR);
573 return logical_id & mda; 601 return logical_id & mda;
@@ -595,7 +623,7 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
595} 623}
596 624
597int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 625int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
598 int short_hand, int dest, int dest_mode) 626 int short_hand, unsigned int dest, int dest_mode)
599{ 627{
600 int result = 0; 628 int result = 0;
601 struct kvm_lapic *target = vcpu->arch.apic; 629 struct kvm_lapic *target = vcpu->arch.apic;
@@ -657,15 +685,24 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
657 if (!map) 685 if (!map)
658 goto out; 686 goto out;
659 687
688 if (irq->dest_id == map->broadcast)
689 goto out;
690
691 ret = true;
692
660 if (irq->dest_mode == 0) { /* physical mode */ 693 if (irq->dest_mode == 0) { /* physical mode */
661 if (irq->delivery_mode == APIC_DM_LOWEST || 694 if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
662 irq->dest_id == 0xff)
663 goto out; 695 goto out;
664 dst = &map->phys_map[irq->dest_id & 0xff]; 696
697 dst = &map->phys_map[irq->dest_id];
665 } else { 698 } else {
666 u32 mda = irq->dest_id << (32 - map->ldr_bits); 699 u32 mda = irq->dest_id << (32 - map->ldr_bits);
700 u16 cid = apic_cluster_id(map, mda);
701
702 if (cid >= ARRAY_SIZE(map->logical_map))
703 goto out;
667 704
668 dst = map->logical_map[apic_cluster_id(map, mda)]; 705 dst = map->logical_map[cid];
669 706
670 bitmap = apic_logical_id(map, mda); 707 bitmap = apic_logical_id(map, mda);
671 708
@@ -691,8 +728,6 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
691 *r = 0; 728 *r = 0;
692 *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); 729 *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
693 } 730 }
694
695 ret = true;
696out: 731out:
697 rcu_read_unlock(); 732 rcu_read_unlock();
698 return ret; 733 return ret;
@@ -1034,6 +1069,26 @@ static void update_divide_count(struct kvm_lapic *apic)
1034 apic->divide_count); 1069 apic->divide_count);
1035} 1070}
1036 1071
1072static void apic_timer_expired(struct kvm_lapic *apic)
1073{
1074 struct kvm_vcpu *vcpu = apic->vcpu;
1075 wait_queue_head_t *q = &vcpu->wq;
1076
1077 /*
1078 * Note: KVM_REQ_PENDING_TIMER is implicitly checked in
1079 * vcpu_enter_guest.
1080 */
1081 if (atomic_read(&apic->lapic_timer.pending))
1082 return;
1083
1084 atomic_inc(&apic->lapic_timer.pending);
1085 /* FIXME: this code should not know anything about vcpus */
1086 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
1087
1088 if (waitqueue_active(q))
1089 wake_up_interruptible(q);
1090}
1091
1037static void start_apic_timer(struct kvm_lapic *apic) 1092static void start_apic_timer(struct kvm_lapic *apic)
1038{ 1093{
1039 ktime_t now; 1094 ktime_t now;
@@ -1096,9 +1151,10 @@ static void start_apic_timer(struct kvm_lapic *apic)
1096 if (likely(tscdeadline > guest_tsc)) { 1151 if (likely(tscdeadline > guest_tsc)) {
1097 ns = (tscdeadline - guest_tsc) * 1000000ULL; 1152 ns = (tscdeadline - guest_tsc) * 1000000ULL;
1098 do_div(ns, this_tsc_khz); 1153 do_div(ns, this_tsc_khz);
1099 } 1154 hrtimer_start(&apic->lapic_timer.timer,
1100 hrtimer_start(&apic->lapic_timer.timer, 1155 ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
1101 ktime_add_ns(now, ns), HRTIMER_MODE_ABS); 1156 } else
1157 apic_timer_expired(apic);
1102 1158
1103 local_irq_restore(flags); 1159 local_irq_restore(flags);
1104 } 1160 }
@@ -1203,17 +1259,20 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1203 1259
1204 break; 1260 break;
1205 1261
1206 case APIC_LVTT: 1262 case APIC_LVTT: {
1207 if ((kvm_apic_get_reg(apic, APIC_LVTT) & 1263 u32 timer_mode = val & apic->lapic_timer.timer_mode_mask;
1208 apic->lapic_timer.timer_mode_mask) != 1264
1209 (val & apic->lapic_timer.timer_mode_mask)) 1265 if (apic->lapic_timer.timer_mode != timer_mode) {
1266 apic->lapic_timer.timer_mode = timer_mode;
1210 hrtimer_cancel(&apic->lapic_timer.timer); 1267 hrtimer_cancel(&apic->lapic_timer.timer);
1268 }
1211 1269
1212 if (!kvm_apic_sw_enabled(apic)) 1270 if (!kvm_apic_sw_enabled(apic))
1213 val |= APIC_LVT_MASKED; 1271 val |= APIC_LVT_MASKED;
1214 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); 1272 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
1215 apic_set_reg(apic, APIC_LVTT, val); 1273 apic_set_reg(apic, APIC_LVTT, val);
1216 break; 1274 break;
1275 }
1217 1276
1218 case APIC_TMICT: 1277 case APIC_TMICT:
1219 if (apic_lvtt_tscdeadline(apic)) 1278 if (apic_lvtt_tscdeadline(apic))
@@ -1320,7 +1379,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
1320 if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) 1379 if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
1321 static_key_slow_dec_deferred(&apic_hw_disabled); 1380 static_key_slow_dec_deferred(&apic_hw_disabled);
1322 1381
1323 if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED)) 1382 if (!apic->sw_enabled)
1324 static_key_slow_dec_deferred(&apic_sw_disabled); 1383 static_key_slow_dec_deferred(&apic_sw_disabled);
1325 1384
1326 if (apic->regs) 1385 if (apic->regs)
@@ -1355,9 +1414,6 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
1355 return; 1414 return;
1356 1415
1357 hrtimer_cancel(&apic->lapic_timer.timer); 1416 hrtimer_cancel(&apic->lapic_timer.timer);
1358 /* Inject here so clearing tscdeadline won't override new value */
1359 if (apic_has_pending_timer(vcpu))
1360 kvm_inject_apic_timer_irqs(vcpu);
1361 apic->lapic_timer.tscdeadline = data; 1417 apic->lapic_timer.tscdeadline = data;
1362 start_apic_timer(apic); 1418 start_apic_timer(apic);
1363} 1419}
@@ -1422,6 +1478,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
1422 apic->base_address = apic->vcpu->arch.apic_base & 1478 apic->base_address = apic->vcpu->arch.apic_base &
1423 MSR_IA32_APICBASE_BASE; 1479 MSR_IA32_APICBASE_BASE;
1424 1480
1481 if ((value & MSR_IA32_APICBASE_ENABLE) &&
1482 apic->base_address != APIC_DEFAULT_PHYS_BASE)
1483 pr_warn_once("APIC base relocation is unsupported by KVM");
1484
1425 /* with FSB delivery interrupt, we can restart APIC functionality */ 1485 /* with FSB delivery interrupt, we can restart APIC functionality */
1426 apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is " 1486 apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
1427 "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address); 1487 "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
@@ -1447,6 +1507,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
1447 1507
1448 for (i = 0; i < APIC_LVT_NUM; i++) 1508 for (i = 0; i < APIC_LVT_NUM; i++)
1449 apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); 1509 apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
1510 apic->lapic_timer.timer_mode = 0;
1450 apic_set_reg(apic, APIC_LVT0, 1511 apic_set_reg(apic, APIC_LVT0,
1451 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); 1512 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
1452 1513
@@ -1538,23 +1599,8 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
1538{ 1599{
1539 struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); 1600 struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
1540 struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); 1601 struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
1541 struct kvm_vcpu *vcpu = apic->vcpu;
1542 wait_queue_head_t *q = &vcpu->wq;
1543
1544 /*
1545 * There is a race window between reading and incrementing, but we do
1546 * not care about potentially losing timer events in the !reinject
1547 * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
1548 * in vcpu_enter_guest.
1549 */
1550 if (!atomic_read(&ktimer->pending)) {
1551 atomic_inc(&ktimer->pending);
1552 /* FIXME: this code should not know anything about vcpus */
1553 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
1554 }
1555 1602
1556 if (waitqueue_active(q)) 1603 apic_timer_expired(apic);
1557 wake_up_interruptible(q);
1558 1604
1559 if (lapic_is_periodic(apic)) { 1605 if (lapic_is_periodic(apic)) {
1560 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); 1606 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
@@ -1693,6 +1739,9 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
1693 apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ? 1739 apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ?
1694 1 : count_vectors(apic->regs + APIC_ISR); 1740 1 : count_vectors(apic->regs + APIC_ISR);
1695 apic->highest_isr_cache = -1; 1741 apic->highest_isr_cache = -1;
1742 if (kvm_x86_ops->hwapic_irr_update)
1743 kvm_x86_ops->hwapic_irr_update(vcpu,
1744 apic_find_highest_irr(apic));
1696 kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); 1745 kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
1697 kvm_make_request(KVM_REQ_EVENT, vcpu); 1746 kvm_make_request(KVM_REQ_EVENT, vcpu);
1698 kvm_rtc_eoi_tracking_restore_one(vcpu); 1747 kvm_rtc_eoi_tracking_restore_one(vcpu);
@@ -1837,8 +1886,11 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1837 if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) 1886 if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
1838 return 1; 1887 return 1;
1839 1888
1889 if (reg == APIC_ICR2)
1890 return 1;
1891
1840 /* if this is ICR write vector before command */ 1892 /* if this is ICR write vector before command */
1841 if (msr == 0x830) 1893 if (reg == APIC_ICR)
1842 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 1894 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
1843 return apic_reg_write(apic, reg, (u32)data); 1895 return apic_reg_write(apic, reg, (u32)data);
1844} 1896}
@@ -1851,9 +1903,15 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
1851 if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) 1903 if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
1852 return 1; 1904 return 1;
1853 1905
1906 if (reg == APIC_DFR || reg == APIC_ICR2) {
1907 apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n",
1908 reg);
1909 return 1;
1910 }
1911
1854 if (apic_reg_read(apic, reg, 4, &low)) 1912 if (apic_reg_read(apic, reg, 4, &low))
1855 return 1; 1913 return 1;
1856 if (msr == 0x830) 1914 if (reg == APIC_ICR)
1857 apic_reg_read(apic, APIC_ICR2, 4, &high); 1915 apic_reg_read(apic, APIC_ICR2, 4, &high);
1858 1916
1859 *data = (((u64)high) << 32) | low; 1917 *data = (((u64)high) << 32) | low;
@@ -1908,7 +1966,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
1908void kvm_apic_accept_events(struct kvm_vcpu *vcpu) 1966void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
1909{ 1967{
1910 struct kvm_lapic *apic = vcpu->arch.apic; 1968 struct kvm_lapic *apic = vcpu->arch.apic;
1911 unsigned int sipi_vector; 1969 u8 sipi_vector;
1912 unsigned long pe; 1970 unsigned long pe;
1913 1971
1914 if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events) 1972 if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 6a11845fd8b9..c674fce53cf9 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -11,6 +11,7 @@
11struct kvm_timer { 11struct kvm_timer {
12 struct hrtimer timer; 12 struct hrtimer timer;
13 s64 period; /* unit: ns */ 13 s64 period; /* unit: ns */
14 u32 timer_mode;
14 u32 timer_mode_mask; 15 u32 timer_mode_mask;
15 u64 tscdeadline; 16 u64 tscdeadline;
16 atomic_t pending; /* accumulated triggered timers */ 17 atomic_t pending; /* accumulated triggered timers */
@@ -22,6 +23,7 @@ struct kvm_lapic {
22 struct kvm_timer lapic_timer; 23 struct kvm_timer lapic_timer;
23 u32 divide_count; 24 u32 divide_count;
24 struct kvm_vcpu *vcpu; 25 struct kvm_vcpu *vcpu;
26 bool sw_enabled;
25 bool irr_pending; 27 bool irr_pending;
26 /* Number of bits set in ISR. */ 28 /* Number of bits set in ISR. */
27 s16 isr_count; 29 s16 isr_count;
@@ -55,8 +57,8 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu);
55 57
56void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr); 58void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
57void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); 59void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
58int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); 60int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest);
59int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); 61int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda);
60int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, 62int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
61 unsigned long *dest_map); 63 unsigned long *dest_map);
62int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); 64int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
@@ -119,11 +121,11 @@ static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic)
119 121
120extern struct static_key_deferred apic_sw_disabled; 122extern struct static_key_deferred apic_sw_disabled;
121 123
122static inline int kvm_apic_sw_enabled(struct kvm_lapic *apic) 124static inline bool kvm_apic_sw_enabled(struct kvm_lapic *apic)
123{ 125{
124 if (static_key_false(&apic_sw_disabled.key)) 126 if (static_key_false(&apic_sw_disabled.key))
125 return kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED; 127 return apic->sw_enabled;
126 return APIC_SPIV_APIC_ENABLED; 128 return true;
127} 129}
128 130
129static inline bool kvm_apic_present(struct kvm_vcpu *vcpu) 131static inline bool kvm_apic_present(struct kvm_vcpu *vcpu)
@@ -152,8 +154,6 @@ static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
152 ldr >>= 32 - map->ldr_bits; 154 ldr >>= 32 - map->ldr_bits;
153 cid = (ldr >> map->cid_shift) & map->cid_mask; 155 cid = (ldr >> map->cid_shift) & map->cid_mask;
154 156
155 BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
156
157 return cid; 157 return cid;
158} 158}
159 159
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 978f402006ee..10fbed126b11 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -214,13 +214,12 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
214#define MMIO_GEN_LOW_SHIFT 10 214#define MMIO_GEN_LOW_SHIFT 10
215#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 2) 215#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 2)
216#define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1) 216#define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1)
217#define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1)
218 217
219static u64 generation_mmio_spte_mask(unsigned int gen) 218static u64 generation_mmio_spte_mask(unsigned int gen)
220{ 219{
221 u64 mask; 220 u64 mask;
222 221
223 WARN_ON(gen > MMIO_MAX_GEN); 222 WARN_ON(gen & ~MMIO_GEN_MASK);
224 223
225 mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT; 224 mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT;
226 mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT; 225 mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT;
@@ -263,13 +262,13 @@ static bool is_mmio_spte(u64 spte)
263 262
264static gfn_t get_mmio_spte_gfn(u64 spte) 263static gfn_t get_mmio_spte_gfn(u64 spte)
265{ 264{
266 u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask; 265 u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
267 return (spte & ~mask) >> PAGE_SHIFT; 266 return (spte & ~mask) >> PAGE_SHIFT;
268} 267}
269 268
270static unsigned get_mmio_spte_access(u64 spte) 269static unsigned get_mmio_spte_access(u64 spte)
271{ 270{
272 u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask; 271 u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
273 return (spte & ~mask) & ~PAGE_MASK; 272 return (spte & ~mask) & ~PAGE_MASK;
274} 273}
275 274
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7527cefc5a43..41dd0387cccb 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1056,9 +1056,11 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho
1056{ 1056{
1057 struct vcpu_svm *svm = to_svm(vcpu); 1057 struct vcpu_svm *svm = to_svm(vcpu);
1058 1058
1059 WARN_ON(adjustment < 0); 1059 if (host) {
1060 if (host) 1060 if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
1061 adjustment = svm_scale_tsc(vcpu, adjustment); 1061 WARN_ON(adjustment < 0);
1062 adjustment = svm_scale_tsc(vcpu, (u64)adjustment);
1063 }
1062 1064
1063 svm->vmcb->control.tsc_offset += adjustment; 1065 svm->vmcb->control.tsc_offset += adjustment;
1064 if (is_guest_mode(vcpu)) 1066 if (is_guest_mode(vcpu))
@@ -2999,7 +3001,6 @@ static int dr_interception(struct vcpu_svm *svm)
2999{ 3001{
3000 int reg, dr; 3002 int reg, dr;
3001 unsigned long val; 3003 unsigned long val;
3002 int err;
3003 3004
3004 if (svm->vcpu.guest_debug == 0) { 3005 if (svm->vcpu.guest_debug == 0) {
3005 /* 3006 /*
@@ -3019,12 +3020,15 @@ static int dr_interception(struct vcpu_svm *svm)
3019 dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0; 3020 dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
3020 3021
3021 if (dr >= 16) { /* mov to DRn */ 3022 if (dr >= 16) { /* mov to DRn */
3023 if (!kvm_require_dr(&svm->vcpu, dr - 16))
3024 return 1;
3022 val = kvm_register_read(&svm->vcpu, reg); 3025 val = kvm_register_read(&svm->vcpu, reg);
3023 kvm_set_dr(&svm->vcpu, dr - 16, val); 3026 kvm_set_dr(&svm->vcpu, dr - 16, val);
3024 } else { 3027 } else {
3025 err = kvm_get_dr(&svm->vcpu, dr, &val); 3028 if (!kvm_require_dr(&svm->vcpu, dr))
3026 if (!err) 3029 return 1;
3027 kvm_register_write(&svm->vcpu, reg, val); 3030 kvm_get_dr(&svm->vcpu, dr, &val);
3031 kvm_register_write(&svm->vcpu, reg, val);
3028 } 3032 }
3029 3033
3030 skip_emulated_instruction(&svm->vcpu); 3034 skip_emulated_instruction(&svm->vcpu);
@@ -4123,6 +4127,11 @@ static bool svm_mpx_supported(void)
4123 return false; 4127 return false;
4124} 4128}
4125 4129
4130static bool svm_xsaves_supported(void)
4131{
4132 return false;
4133}
4134
4126static bool svm_has_wbinvd_exit(void) 4135static bool svm_has_wbinvd_exit(void)
4127{ 4136{
4128 return true; 4137 return true;
@@ -4410,6 +4419,7 @@ static struct kvm_x86_ops svm_x86_ops = {
4410 .rdtscp_supported = svm_rdtscp_supported, 4419 .rdtscp_supported = svm_rdtscp_supported,
4411 .invpcid_supported = svm_invpcid_supported, 4420 .invpcid_supported = svm_invpcid_supported,
4412 .mpx_supported = svm_mpx_supported, 4421 .mpx_supported = svm_mpx_supported,
4422 .xsaves_supported = svm_xsaves_supported,
4413 4423
4414 .set_supported_cpuid = svm_set_supported_cpuid, 4424 .set_supported_cpuid = svm_set_supported_cpuid,
4415 4425
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 6b06ab8748dd..c2a34bb5ad93 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -5,6 +5,7 @@
5#include <asm/vmx.h> 5#include <asm/vmx.h>
6#include <asm/svm.h> 6#include <asm/svm.h>
7#include <asm/clocksource.h> 7#include <asm/clocksource.h>
8#include <asm/pvclock-abi.h>
8 9
9#undef TRACE_SYSTEM 10#undef TRACE_SYSTEM
10#define TRACE_SYSTEM kvm 11#define TRACE_SYSTEM kvm
@@ -877,6 +878,42 @@ TRACE_EVENT(kvm_ple_window,
877#define trace_kvm_ple_window_shrink(vcpu_id, new, old) \ 878#define trace_kvm_ple_window_shrink(vcpu_id, new, old) \
878 trace_kvm_ple_window(false, vcpu_id, new, old) 879 trace_kvm_ple_window(false, vcpu_id, new, old)
879 880
881TRACE_EVENT(kvm_pvclock_update,
882 TP_PROTO(unsigned int vcpu_id, struct pvclock_vcpu_time_info *pvclock),
883 TP_ARGS(vcpu_id, pvclock),
884
885 TP_STRUCT__entry(
886 __field( unsigned int, vcpu_id )
887 __field( __u32, version )
888 __field( __u64, tsc_timestamp )
889 __field( __u64, system_time )
890 __field( __u32, tsc_to_system_mul )
891 __field( __s8, tsc_shift )
892 __field( __u8, flags )
893 ),
894
895 TP_fast_assign(
896 __entry->vcpu_id = vcpu_id;
897 __entry->version = pvclock->version;
898 __entry->tsc_timestamp = pvclock->tsc_timestamp;
899 __entry->system_time = pvclock->system_time;
900 __entry->tsc_to_system_mul = pvclock->tsc_to_system_mul;
901 __entry->tsc_shift = pvclock->tsc_shift;
902 __entry->flags = pvclock->flags;
903 ),
904
905 TP_printk("vcpu_id %u, pvclock { version %u, tsc_timestamp 0x%llx, "
906 "system_time 0x%llx, tsc_to_system_mul 0x%x, tsc_shift %d, "
907 "flags 0x%x }",
908 __entry->vcpu_id,
909 __entry->version,
910 __entry->tsc_timestamp,
911 __entry->system_time,
912 __entry->tsc_to_system_mul,
913 __entry->tsc_shift,
914 __entry->flags)
915);
916
880#endif /* _TRACE_KVM_H */ 917#endif /* _TRACE_KVM_H */
881 918
882#undef TRACE_INCLUDE_PATH 919#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3e556c68351b..feb852b04598 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -99,13 +99,15 @@ module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
99static bool __read_mostly nested = 0; 99static bool __read_mostly nested = 0;
100module_param(nested, bool, S_IRUGO); 100module_param(nested, bool, S_IRUGO);
101 101
102static u64 __read_mostly host_xss;
103
102#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) 104#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
103#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) 105#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
104#define KVM_VM_CR0_ALWAYS_ON \ 106#define KVM_VM_CR0_ALWAYS_ON \
105 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) 107 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
106#define KVM_CR4_GUEST_OWNED_BITS \ 108#define KVM_CR4_GUEST_OWNED_BITS \
107 (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ 109 (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
108 | X86_CR4_OSXMMEXCPT) 110 | X86_CR4_OSXMMEXCPT | X86_CR4_TSD)
109 111
110#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) 112#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
111#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) 113#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
@@ -214,6 +216,7 @@ struct __packed vmcs12 {
214 u64 virtual_apic_page_addr; 216 u64 virtual_apic_page_addr;
215 u64 apic_access_addr; 217 u64 apic_access_addr;
216 u64 ept_pointer; 218 u64 ept_pointer;
219 u64 xss_exit_bitmap;
217 u64 guest_physical_address; 220 u64 guest_physical_address;
218 u64 vmcs_link_pointer; 221 u64 vmcs_link_pointer;
219 u64 guest_ia32_debugctl; 222 u64 guest_ia32_debugctl;
@@ -616,6 +619,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
616 FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr), 619 FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
617 FIELD64(APIC_ACCESS_ADDR, apic_access_addr), 620 FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
618 FIELD64(EPT_POINTER, ept_pointer), 621 FIELD64(EPT_POINTER, ept_pointer),
622 FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
619 FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), 623 FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
620 FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), 624 FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
621 FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl), 625 FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
@@ -720,12 +724,15 @@ static const unsigned short vmcs_field_to_offset_table[] = {
720 FIELD(HOST_RSP, host_rsp), 724 FIELD(HOST_RSP, host_rsp),
721 FIELD(HOST_RIP, host_rip), 725 FIELD(HOST_RIP, host_rip),
722}; 726};
723static const int max_vmcs_field = ARRAY_SIZE(vmcs_field_to_offset_table);
724 727
725static inline short vmcs_field_to_offset(unsigned long field) 728static inline short vmcs_field_to_offset(unsigned long field)
726{ 729{
727 if (field >= max_vmcs_field || vmcs_field_to_offset_table[field] == 0) 730 BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
728 return -1; 731
732 if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
733 vmcs_field_to_offset_table[field] == 0)
734 return -ENOENT;
735
729 return vmcs_field_to_offset_table[field]; 736 return vmcs_field_to_offset_table[field];
730} 737}
731 738
@@ -758,6 +765,7 @@ static u64 construct_eptp(unsigned long root_hpa);
758static void kvm_cpu_vmxon(u64 addr); 765static void kvm_cpu_vmxon(u64 addr);
759static void kvm_cpu_vmxoff(void); 766static void kvm_cpu_vmxoff(void);
760static bool vmx_mpx_supported(void); 767static bool vmx_mpx_supported(void);
768static bool vmx_xsaves_supported(void);
761static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); 769static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
762static void vmx_set_segment(struct kvm_vcpu *vcpu, 770static void vmx_set_segment(struct kvm_vcpu *vcpu,
763 struct kvm_segment *var, int seg); 771 struct kvm_segment *var, int seg);
@@ -1098,6 +1106,12 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
1098 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); 1106 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
1099} 1107}
1100 1108
1109static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
1110{
1111 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES) &&
1112 vmx_xsaves_supported();
1113}
1114
1101static inline bool is_exception(u32 intr_info) 1115static inline bool is_exception(u32 intr_info)
1102{ 1116{
1103 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) 1117 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -1659,12 +1673,20 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
1659 vmx->guest_msrs[efer_offset].mask = ~ignore_bits; 1673 vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
1660 1674
1661 clear_atomic_switch_msr(vmx, MSR_EFER); 1675 clear_atomic_switch_msr(vmx, MSR_EFER);
1662 /* On ept, can't emulate nx, and must switch nx atomically */ 1676
1663 if (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX)) { 1677 /*
1678 * On EPT, we can't emulate NX, so we must switch EFER atomically.
1679 * On CPUs that support "load IA32_EFER", always switch EFER
1680 * atomically, since it's faster than switching it manually.
1681 */
1682 if (cpu_has_load_ia32_efer ||
1683 (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
1664 guest_efer = vmx->vcpu.arch.efer; 1684 guest_efer = vmx->vcpu.arch.efer;
1665 if (!(guest_efer & EFER_LMA)) 1685 if (!(guest_efer & EFER_LMA))
1666 guest_efer &= ~EFER_LME; 1686 guest_efer &= ~EFER_LME;
1667 add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer); 1687 if (guest_efer != host_efer)
1688 add_atomic_switch_msr(vmx, MSR_EFER,
1689 guest_efer, host_efer);
1668 return false; 1690 return false;
1669 } 1691 }
1670 1692
@@ -2377,12 +2399,13 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2377 nested_vmx_secondary_ctls_low = 0; 2399 nested_vmx_secondary_ctls_low = 0;
2378 nested_vmx_secondary_ctls_high &= 2400 nested_vmx_secondary_ctls_high &=
2379 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 2401 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
2380 SECONDARY_EXEC_UNRESTRICTED_GUEST | 2402 SECONDARY_EXEC_WBINVD_EXITING |
2381 SECONDARY_EXEC_WBINVD_EXITING; 2403 SECONDARY_EXEC_XSAVES;
2382 2404
2383 if (enable_ept) { 2405 if (enable_ept) {
2384 /* nested EPT: emulate EPT also to L1 */ 2406 /* nested EPT: emulate EPT also to L1 */
2385 nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; 2407 nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT |
2408 SECONDARY_EXEC_UNRESTRICTED_GUEST;
2386 nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | 2409 nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
2387 VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | 2410 VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
2388 VMX_EPT_INVEPT_BIT; 2411 VMX_EPT_INVEPT_BIT;
@@ -2558,6 +2581,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2558 if (!nested_vmx_allowed(vcpu)) 2581 if (!nested_vmx_allowed(vcpu))
2559 return 1; 2582 return 1;
2560 return vmx_get_vmx_msr(vcpu, msr_index, pdata); 2583 return vmx_get_vmx_msr(vcpu, msr_index, pdata);
2584 case MSR_IA32_XSS:
2585 if (!vmx_xsaves_supported())
2586 return 1;
2587 data = vcpu->arch.ia32_xss;
2588 break;
2561 case MSR_TSC_AUX: 2589 case MSR_TSC_AUX:
2562 if (!to_vmx(vcpu)->rdtscp_enabled) 2590 if (!to_vmx(vcpu)->rdtscp_enabled)
2563 return 1; 2591 return 1;
@@ -2649,6 +2677,22 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2649 break; 2677 break;
2650 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: 2678 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2651 return 1; /* they are read-only */ 2679 return 1; /* they are read-only */
2680 case MSR_IA32_XSS:
2681 if (!vmx_xsaves_supported())
2682 return 1;
2683 /*
2684 * The only supported bit as of Skylake is bit 8, but
2685 * it is not supported on KVM.
2686 */
2687 if (data != 0)
2688 return 1;
2689 vcpu->arch.ia32_xss = data;
2690 if (vcpu->arch.ia32_xss != host_xss)
2691 add_atomic_switch_msr(vmx, MSR_IA32_XSS,
2692 vcpu->arch.ia32_xss, host_xss);
2693 else
2694 clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
2695 break;
2652 case MSR_TSC_AUX: 2696 case MSR_TSC_AUX:
2653 if (!vmx->rdtscp_enabled) 2697 if (!vmx->rdtscp_enabled)
2654 return 1; 2698 return 1;
@@ -2884,7 +2928,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2884 SECONDARY_EXEC_ENABLE_INVPCID | 2928 SECONDARY_EXEC_ENABLE_INVPCID |
2885 SECONDARY_EXEC_APIC_REGISTER_VIRT | 2929 SECONDARY_EXEC_APIC_REGISTER_VIRT |
2886 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | 2930 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
2887 SECONDARY_EXEC_SHADOW_VMCS; 2931 SECONDARY_EXEC_SHADOW_VMCS |
2932 SECONDARY_EXEC_XSAVES;
2888 if (adjust_vmx_controls(min2, opt2, 2933 if (adjust_vmx_controls(min2, opt2,
2889 MSR_IA32_VMX_PROCBASED_CTLS2, 2934 MSR_IA32_VMX_PROCBASED_CTLS2,
2890 &_cpu_based_2nd_exec_control) < 0) 2935 &_cpu_based_2nd_exec_control) < 0)
@@ -3007,6 +3052,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
3007 } 3052 }
3008 } 3053 }
3009 3054
3055 if (cpu_has_xsaves)
3056 rdmsrl(MSR_IA32_XSS, host_xss);
3057
3010 return 0; 3058 return 0;
3011} 3059}
3012 3060
@@ -3110,76 +3158,6 @@ static __init int alloc_kvm_area(void)
3110 return 0; 3158 return 0;
3111} 3159}
3112 3160
3113static __init int hardware_setup(void)
3114{
3115 if (setup_vmcs_config(&vmcs_config) < 0)
3116 return -EIO;
3117
3118 if (boot_cpu_has(X86_FEATURE_NX))
3119 kvm_enable_efer_bits(EFER_NX);
3120
3121 if (!cpu_has_vmx_vpid())
3122 enable_vpid = 0;
3123 if (!cpu_has_vmx_shadow_vmcs())
3124 enable_shadow_vmcs = 0;
3125 if (enable_shadow_vmcs)
3126 init_vmcs_shadow_fields();
3127
3128 if (!cpu_has_vmx_ept() ||
3129 !cpu_has_vmx_ept_4levels()) {
3130 enable_ept = 0;
3131 enable_unrestricted_guest = 0;
3132 enable_ept_ad_bits = 0;
3133 }
3134
3135 if (!cpu_has_vmx_ept_ad_bits())
3136 enable_ept_ad_bits = 0;
3137
3138 if (!cpu_has_vmx_unrestricted_guest())
3139 enable_unrestricted_guest = 0;
3140
3141 if (!cpu_has_vmx_flexpriority()) {
3142 flexpriority_enabled = 0;
3143
3144 /*
3145 * set_apic_access_page_addr() is used to reload apic access
3146 * page upon invalidation. No need to do anything if the
3147 * processor does not have the APIC_ACCESS_ADDR VMCS field.
3148 */
3149 kvm_x86_ops->set_apic_access_page_addr = NULL;
3150 }
3151
3152 if (!cpu_has_vmx_tpr_shadow())
3153 kvm_x86_ops->update_cr8_intercept = NULL;
3154
3155 if (enable_ept && !cpu_has_vmx_ept_2m_page())
3156 kvm_disable_largepages();
3157
3158 if (!cpu_has_vmx_ple())
3159 ple_gap = 0;
3160
3161 if (!cpu_has_vmx_apicv())
3162 enable_apicv = 0;
3163
3164 if (enable_apicv)
3165 kvm_x86_ops->update_cr8_intercept = NULL;
3166 else {
3167 kvm_x86_ops->hwapic_irr_update = NULL;
3168 kvm_x86_ops->deliver_posted_interrupt = NULL;
3169 kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
3170 }
3171
3172 if (nested)
3173 nested_vmx_setup_ctls_msrs();
3174
3175 return alloc_kvm_area();
3176}
3177
3178static __exit void hardware_unsetup(void)
3179{
3180 free_kvm_area();
3181}
3182
3183static bool emulation_required(struct kvm_vcpu *vcpu) 3161static bool emulation_required(struct kvm_vcpu *vcpu)
3184{ 3162{
3185 return emulate_invalid_guest_state && !guest_state_valid(vcpu); 3163 return emulate_invalid_guest_state && !guest_state_valid(vcpu);
@@ -4396,6 +4374,7 @@ static void ept_set_mmio_spte_mask(void)
4396 kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull); 4374 kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
4397} 4375}
4398 4376
4377#define VMX_XSS_EXIT_BITMAP 0
4399/* 4378/*
4400 * Sets up the vmcs for emulated real mode. 4379 * Sets up the vmcs for emulated real mode.
4401 */ 4380 */
@@ -4505,6 +4484,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4505 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); 4484 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
4506 set_cr4_guest_host_mask(vmx); 4485 set_cr4_guest_host_mask(vmx);
4507 4486
4487 if (vmx_xsaves_supported())
4488 vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
4489
4508 return 0; 4490 return 0;
4509} 4491}
4510 4492
@@ -5163,13 +5145,20 @@ static int handle_cr(struct kvm_vcpu *vcpu)
5163static int handle_dr(struct kvm_vcpu *vcpu) 5145static int handle_dr(struct kvm_vcpu *vcpu)
5164{ 5146{
5165 unsigned long exit_qualification; 5147 unsigned long exit_qualification;
5166 int dr, reg; 5148 int dr, dr7, reg;
5149
5150 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5151 dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
5152
5153 /* First, if DR does not exist, trigger UD */
5154 if (!kvm_require_dr(vcpu, dr))
5155 return 1;
5167 5156
5168 /* Do not handle if the CPL > 0, will trigger GP on re-entry */ 5157 /* Do not handle if the CPL > 0, will trigger GP on re-entry */
5169 if (!kvm_require_cpl(vcpu, 0)) 5158 if (!kvm_require_cpl(vcpu, 0))
5170 return 1; 5159 return 1;
5171 dr = vmcs_readl(GUEST_DR7); 5160 dr7 = vmcs_readl(GUEST_DR7);
5172 if (dr & DR7_GD) { 5161 if (dr7 & DR7_GD) {
5173 /* 5162 /*
5174 * As the vm-exit takes precedence over the debug trap, we 5163 * As the vm-exit takes precedence over the debug trap, we
5175 * need to emulate the latter, either for the host or the 5164 * need to emulate the latter, either for the host or the
@@ -5177,17 +5166,14 @@ static int handle_dr(struct kvm_vcpu *vcpu)
5177 */ 5166 */
5178 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { 5167 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
5179 vcpu->run->debug.arch.dr6 = vcpu->arch.dr6; 5168 vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
5180 vcpu->run->debug.arch.dr7 = dr; 5169 vcpu->run->debug.arch.dr7 = dr7;
5181 vcpu->run->debug.arch.pc = 5170 vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
5182 vmcs_readl(GUEST_CS_BASE) +
5183 vmcs_readl(GUEST_RIP);
5184 vcpu->run->debug.arch.exception = DB_VECTOR; 5171 vcpu->run->debug.arch.exception = DB_VECTOR;
5185 vcpu->run->exit_reason = KVM_EXIT_DEBUG; 5172 vcpu->run->exit_reason = KVM_EXIT_DEBUG;
5186 return 0; 5173 return 0;
5187 } else { 5174 } else {
5188 vcpu->arch.dr7 &= ~DR7_GD; 5175 vcpu->arch.dr6 &= ~15;
5189 vcpu->arch.dr6 |= DR6_BD | DR6_RTM; 5176 vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
5190 vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
5191 kvm_queue_exception(vcpu, DB_VECTOR); 5177 kvm_queue_exception(vcpu, DB_VECTOR);
5192 return 1; 5178 return 1;
5193 } 5179 }
@@ -5209,8 +5195,6 @@ static int handle_dr(struct kvm_vcpu *vcpu)
5209 return 1; 5195 return 1;
5210 } 5196 }
5211 5197
5212 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5213 dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
5214 reg = DEBUG_REG_ACCESS_REG(exit_qualification); 5198 reg = DEBUG_REG_ACCESS_REG(exit_qualification);
5215 if (exit_qualification & TYPE_MOV_FROM_DR) { 5199 if (exit_qualification & TYPE_MOV_FROM_DR) {
5216 unsigned long val; 5200 unsigned long val;
@@ -5391,6 +5375,20 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu)
5391 return 1; 5375 return 1;
5392} 5376}
5393 5377
5378static int handle_xsaves(struct kvm_vcpu *vcpu)
5379{
5380 skip_emulated_instruction(vcpu);
5381 WARN(1, "this should never happen\n");
5382 return 1;
5383}
5384
5385static int handle_xrstors(struct kvm_vcpu *vcpu)
5386{
5387 skip_emulated_instruction(vcpu);
5388 WARN(1, "this should never happen\n");
5389 return 1;
5390}
5391
5394static int handle_apic_access(struct kvm_vcpu *vcpu) 5392static int handle_apic_access(struct kvm_vcpu *vcpu)
5395{ 5393{
5396 if (likely(fasteoi)) { 5394 if (likely(fasteoi)) {
@@ -5492,7 +5490,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
5492 } 5490 }
5493 5491
5494 /* clear all local breakpoint enable flags */ 5492 /* clear all local breakpoint enable flags */
5495 vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x55); 5493 vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x155);
5496 5494
5497 /* 5495 /*
5498 * TODO: What about debug traps on tss switch? 5496 * TODO: What about debug traps on tss switch?
@@ -5539,11 +5537,11 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
5539 trace_kvm_page_fault(gpa, exit_qualification); 5537 trace_kvm_page_fault(gpa, exit_qualification);
5540 5538
5541 /* It is a write fault? */ 5539 /* It is a write fault? */
5542 error_code = exit_qualification & (1U << 1); 5540 error_code = exit_qualification & PFERR_WRITE_MASK;
5543 /* It is a fetch fault? */ 5541 /* It is a fetch fault? */
5544 error_code |= (exit_qualification & (1U << 2)) << 2; 5542 error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK;
5545 /* ept page table is present? */ 5543 /* ept page table is present? */
5546 error_code |= (exit_qualification >> 3) & 0x1; 5544 error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK;
5547 5545
5548 vcpu->arch.exit_qualification = exit_qualification; 5546 vcpu->arch.exit_qualification = exit_qualification;
5549 5547
@@ -5785,6 +5783,204 @@ static void update_ple_window_actual_max(void)
5785 ple_window_grow, INT_MIN); 5783 ple_window_grow, INT_MIN);
5786} 5784}
5787 5785
5786static __init int hardware_setup(void)
5787{
5788 int r = -ENOMEM, i, msr;
5789
5790 rdmsrl_safe(MSR_EFER, &host_efer);
5791
5792 for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
5793 kvm_define_shared_msr(i, vmx_msr_index[i]);
5794
5795 vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
5796 if (!vmx_io_bitmap_a)
5797 return r;
5798
5799 vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
5800 if (!vmx_io_bitmap_b)
5801 goto out;
5802
5803 vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
5804 if (!vmx_msr_bitmap_legacy)
5805 goto out1;
5806
5807 vmx_msr_bitmap_legacy_x2apic =
5808 (unsigned long *)__get_free_page(GFP_KERNEL);
5809 if (!vmx_msr_bitmap_legacy_x2apic)
5810 goto out2;
5811
5812 vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
5813 if (!vmx_msr_bitmap_longmode)
5814 goto out3;
5815
5816 vmx_msr_bitmap_longmode_x2apic =
5817 (unsigned long *)__get_free_page(GFP_KERNEL);
5818 if (!vmx_msr_bitmap_longmode_x2apic)
5819 goto out4;
5820 vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
5821 if (!vmx_vmread_bitmap)
5822 goto out5;
5823
5824 vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
5825 if (!vmx_vmwrite_bitmap)
5826 goto out6;
5827
5828 memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
5829 memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
5830
5831 /*
5832 * Allow direct access to the PC debug port (it is often used for I/O
5833 * delays, but the vmexits simply slow things down).
5834 */
5835 memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
5836 clear_bit(0x80, vmx_io_bitmap_a);
5837
5838 memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
5839
5840 memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
5841 memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
5842
5843 vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
5844 vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
5845 vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
5846 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
5847 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
5848 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
5849 vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
5850
5851 memcpy(vmx_msr_bitmap_legacy_x2apic,
5852 vmx_msr_bitmap_legacy, PAGE_SIZE);
5853 memcpy(vmx_msr_bitmap_longmode_x2apic,
5854 vmx_msr_bitmap_longmode, PAGE_SIZE);
5855
5856 if (enable_apicv) {
5857 for (msr = 0x800; msr <= 0x8ff; msr++)
5858 vmx_disable_intercept_msr_read_x2apic(msr);
5859
5860 /* According SDM, in x2apic mode, the whole id reg is used.
5861 * But in KVM, it only use the highest eight bits. Need to
5862 * intercept it */
5863 vmx_enable_intercept_msr_read_x2apic(0x802);
5864 /* TMCCT */
5865 vmx_enable_intercept_msr_read_x2apic(0x839);
5866 /* TPR */
5867 vmx_disable_intercept_msr_write_x2apic(0x808);
5868 /* EOI */
5869 vmx_disable_intercept_msr_write_x2apic(0x80b);
5870 /* SELF-IPI */
5871 vmx_disable_intercept_msr_write_x2apic(0x83f);
5872 }
5873
5874 if (enable_ept) {
5875 kvm_mmu_set_mask_ptes(0ull,
5876 (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
5877 (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
5878 0ull, VMX_EPT_EXECUTABLE_MASK);
5879 ept_set_mmio_spte_mask();
5880 kvm_enable_tdp();
5881 } else
5882 kvm_disable_tdp();
5883
5884 update_ple_window_actual_max();
5885
5886 if (setup_vmcs_config(&vmcs_config) < 0) {
5887 r = -EIO;
5888 goto out7;
5889 }
5890
5891 if (boot_cpu_has(X86_FEATURE_NX))
5892 kvm_enable_efer_bits(EFER_NX);
5893
5894 if (!cpu_has_vmx_vpid())
5895 enable_vpid = 0;
5896 if (!cpu_has_vmx_shadow_vmcs())
5897 enable_shadow_vmcs = 0;
5898 if (enable_shadow_vmcs)
5899 init_vmcs_shadow_fields();
5900
5901 if (!cpu_has_vmx_ept() ||
5902 !cpu_has_vmx_ept_4levels()) {
5903 enable_ept = 0;
5904 enable_unrestricted_guest = 0;
5905 enable_ept_ad_bits = 0;
5906 }
5907
5908 if (!cpu_has_vmx_ept_ad_bits())
5909 enable_ept_ad_bits = 0;
5910
5911 if (!cpu_has_vmx_unrestricted_guest())
5912 enable_unrestricted_guest = 0;
5913
5914 if (!cpu_has_vmx_flexpriority()) {
5915 flexpriority_enabled = 0;
5916
5917 /*
5918 * set_apic_access_page_addr() is used to reload apic access
5919 * page upon invalidation. No need to do anything if the
5920 * processor does not have the APIC_ACCESS_ADDR VMCS field.
5921 */
5922 kvm_x86_ops->set_apic_access_page_addr = NULL;
5923 }
5924
5925 if (!cpu_has_vmx_tpr_shadow())
5926 kvm_x86_ops->update_cr8_intercept = NULL;
5927
5928 if (enable_ept && !cpu_has_vmx_ept_2m_page())
5929 kvm_disable_largepages();
5930
5931 if (!cpu_has_vmx_ple())
5932 ple_gap = 0;
5933
5934 if (!cpu_has_vmx_apicv())
5935 enable_apicv = 0;
5936
5937 if (enable_apicv)
5938 kvm_x86_ops->update_cr8_intercept = NULL;
5939 else {
5940 kvm_x86_ops->hwapic_irr_update = NULL;
5941 kvm_x86_ops->deliver_posted_interrupt = NULL;
5942 kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
5943 }
5944
5945 if (nested)
5946 nested_vmx_setup_ctls_msrs();
5947
5948 return alloc_kvm_area();
5949
5950out7:
5951 free_page((unsigned long)vmx_vmwrite_bitmap);
5952out6:
5953 free_page((unsigned long)vmx_vmread_bitmap);
5954out5:
5955 free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
5956out4:
5957 free_page((unsigned long)vmx_msr_bitmap_longmode);
5958out3:
5959 free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
5960out2:
5961 free_page((unsigned long)vmx_msr_bitmap_legacy);
5962out1:
5963 free_page((unsigned long)vmx_io_bitmap_b);
5964out:
5965 free_page((unsigned long)vmx_io_bitmap_a);
5966
5967 return r;
5968}
5969
5970static __exit void hardware_unsetup(void)
5971{
5972 free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
5973 free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
5974 free_page((unsigned long)vmx_msr_bitmap_legacy);
5975 free_page((unsigned long)vmx_msr_bitmap_longmode);
5976 free_page((unsigned long)vmx_io_bitmap_b);
5977 free_page((unsigned long)vmx_io_bitmap_a);
5978 free_page((unsigned long)vmx_vmwrite_bitmap);
5979 free_page((unsigned long)vmx_vmread_bitmap);
5980
5981 free_kvm_area();
5982}
5983
5788/* 5984/*
5789 * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE 5985 * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
5790 * exiting, so only get here on cpu with PAUSE-Loop-Exiting. 5986 * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
@@ -6361,58 +6557,60 @@ static inline int vmcs_field_readonly(unsigned long field)
6361 * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of 6557 * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
6362 * 64-bit fields are to be returned). 6558 * 64-bit fields are to be returned).
6363 */ 6559 */
6364static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu, 6560static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
6365 unsigned long field, u64 *ret) 6561 unsigned long field, u64 *ret)
6366{ 6562{
6367 short offset = vmcs_field_to_offset(field); 6563 short offset = vmcs_field_to_offset(field);
6368 char *p; 6564 char *p;
6369 6565
6370 if (offset < 0) 6566 if (offset < 0)
6371 return 0; 6567 return offset;
6372 6568
6373 p = ((char *)(get_vmcs12(vcpu))) + offset; 6569 p = ((char *)(get_vmcs12(vcpu))) + offset;
6374 6570
6375 switch (vmcs_field_type(field)) { 6571 switch (vmcs_field_type(field)) {
6376 case VMCS_FIELD_TYPE_NATURAL_WIDTH: 6572 case VMCS_FIELD_TYPE_NATURAL_WIDTH:
6377 *ret = *((natural_width *)p); 6573 *ret = *((natural_width *)p);
6378 return 1; 6574 return 0;
6379 case VMCS_FIELD_TYPE_U16: 6575 case VMCS_FIELD_TYPE_U16:
6380 *ret = *((u16 *)p); 6576 *ret = *((u16 *)p);
6381 return 1; 6577 return 0;
6382 case VMCS_FIELD_TYPE_U32: 6578 case VMCS_FIELD_TYPE_U32:
6383 *ret = *((u32 *)p); 6579 *ret = *((u32 *)p);
6384 return 1; 6580 return 0;
6385 case VMCS_FIELD_TYPE_U64: 6581 case VMCS_FIELD_TYPE_U64:
6386 *ret = *((u64 *)p); 6582 *ret = *((u64 *)p);
6387 return 1; 6583 return 0;
6388 default: 6584 default:
6389 return 0; /* can never happen. */ 6585 WARN_ON(1);
6586 return -ENOENT;
6390 } 6587 }
6391} 6588}
6392 6589
6393 6590
6394static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu, 6591static inline int vmcs12_write_any(struct kvm_vcpu *vcpu,
6395 unsigned long field, u64 field_value){ 6592 unsigned long field, u64 field_value){
6396 short offset = vmcs_field_to_offset(field); 6593 short offset = vmcs_field_to_offset(field);
6397 char *p = ((char *) get_vmcs12(vcpu)) + offset; 6594 char *p = ((char *) get_vmcs12(vcpu)) + offset;
6398 if (offset < 0) 6595 if (offset < 0)
6399 return false; 6596 return offset;
6400 6597
6401 switch (vmcs_field_type(field)) { 6598 switch (vmcs_field_type(field)) {
6402 case VMCS_FIELD_TYPE_U16: 6599 case VMCS_FIELD_TYPE_U16:
6403 *(u16 *)p = field_value; 6600 *(u16 *)p = field_value;
6404 return true; 6601 return 0;
6405 case VMCS_FIELD_TYPE_U32: 6602 case VMCS_FIELD_TYPE_U32:
6406 *(u32 *)p = field_value; 6603 *(u32 *)p = field_value;
6407 return true; 6604 return 0;
6408 case VMCS_FIELD_TYPE_U64: 6605 case VMCS_FIELD_TYPE_U64:
6409 *(u64 *)p = field_value; 6606 *(u64 *)p = field_value;
6410 return true; 6607 return 0;
6411 case VMCS_FIELD_TYPE_NATURAL_WIDTH: 6608 case VMCS_FIELD_TYPE_NATURAL_WIDTH:
6412 *(natural_width *)p = field_value; 6609 *(natural_width *)p = field_value;
6413 return true; 6610 return 0;
6414 default: 6611 default:
6415 return false; /* can never happen. */ 6612 WARN_ON(1);
6613 return -ENOENT;
6416 } 6614 }
6417 6615
6418} 6616}
@@ -6445,6 +6643,9 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
6445 case VMCS_FIELD_TYPE_NATURAL_WIDTH: 6643 case VMCS_FIELD_TYPE_NATURAL_WIDTH:
6446 field_value = vmcs_readl(field); 6644 field_value = vmcs_readl(field);
6447 break; 6645 break;
6646 default:
6647 WARN_ON(1);
6648 continue;
6448 } 6649 }
6449 vmcs12_write_any(&vmx->vcpu, field, field_value); 6650 vmcs12_write_any(&vmx->vcpu, field, field_value);
6450 } 6651 }
@@ -6490,6 +6691,9 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
6490 case VMCS_FIELD_TYPE_NATURAL_WIDTH: 6691 case VMCS_FIELD_TYPE_NATURAL_WIDTH:
6491 vmcs_writel(field, (long)field_value); 6692 vmcs_writel(field, (long)field_value);
6492 break; 6693 break;
6694 default:
6695 WARN_ON(1);
6696 break;
6493 } 6697 }
6494 } 6698 }
6495 } 6699 }
@@ -6528,7 +6732,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
6528 /* Decode instruction info and find the field to read */ 6732 /* Decode instruction info and find the field to read */
6529 field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); 6733 field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
6530 /* Read the field, zero-extended to a u64 field_value */ 6734 /* Read the field, zero-extended to a u64 field_value */
6531 if (!vmcs12_read_any(vcpu, field, &field_value)) { 6735 if (vmcs12_read_any(vcpu, field, &field_value) < 0) {
6532 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); 6736 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
6533 skip_emulated_instruction(vcpu); 6737 skip_emulated_instruction(vcpu);
6534 return 1; 6738 return 1;
@@ -6598,7 +6802,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
6598 return 1; 6802 return 1;
6599 } 6803 }
6600 6804
6601 if (!vmcs12_write_any(vcpu, field, field_value)) { 6805 if (vmcs12_write_any(vcpu, field, field_value) < 0) {
6602 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); 6806 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
6603 skip_emulated_instruction(vcpu); 6807 skip_emulated_instruction(vcpu);
6604 return 1; 6808 return 1;
@@ -6802,6 +7006,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
6802 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, 7006 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
6803 [EXIT_REASON_INVEPT] = handle_invept, 7007 [EXIT_REASON_INVEPT] = handle_invept,
6804 [EXIT_REASON_INVVPID] = handle_invvpid, 7008 [EXIT_REASON_INVVPID] = handle_invvpid,
7009 [EXIT_REASON_XSAVES] = handle_xsaves,
7010 [EXIT_REASON_XRSTORS] = handle_xrstors,
6805}; 7011};
6806 7012
6807static const int kvm_vmx_max_exit_handlers = 7013static const int kvm_vmx_max_exit_handlers =
@@ -7089,6 +7295,14 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
7089 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); 7295 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
7090 case EXIT_REASON_XSETBV: 7296 case EXIT_REASON_XSETBV:
7091 return 1; 7297 return 1;
7298 case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
7299 /*
7300 * This should never happen, since it is not possible to
7301 * set XSS to a non-zero value---neither in L1 nor in L2.
7302 * If if it were, XSS would have to be checked against
7303 * the XSS exit bitmap in vmcs12.
7304 */
7305 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
7092 default: 7306 default:
7093 return 1; 7307 return 1;
7094 } 7308 }
@@ -7277,6 +7491,9 @@ static void vmx_set_rvi(int vector)
7277 u16 status; 7491 u16 status;
7278 u8 old; 7492 u8 old;
7279 7493
7494 if (vector == -1)
7495 vector = 0;
7496
7280 status = vmcs_read16(GUEST_INTR_STATUS); 7497 status = vmcs_read16(GUEST_INTR_STATUS);
7281 old = (u8)status & 0xff; 7498 old = (u8)status & 0xff;
7282 if ((u8)vector != old) { 7499 if ((u8)vector != old) {
@@ -7288,22 +7505,23 @@ static void vmx_set_rvi(int vector)
7288 7505
7289static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) 7506static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
7290{ 7507{
7508 if (!is_guest_mode(vcpu)) {
7509 vmx_set_rvi(max_irr);
7510 return;
7511 }
7512
7291 if (max_irr == -1) 7513 if (max_irr == -1)
7292 return; 7514 return;
7293 7515
7294 /* 7516 /*
7295 * If a vmexit is needed, vmx_check_nested_events handles it. 7517 * In guest mode. If a vmexit is needed, vmx_check_nested_events
7518 * handles it.
7296 */ 7519 */
7297 if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) 7520 if (nested_exit_on_intr(vcpu))
7298 return; 7521 return;
7299 7522
7300 if (!is_guest_mode(vcpu)) {
7301 vmx_set_rvi(max_irr);
7302 return;
7303 }
7304
7305 /* 7523 /*
7306 * Fall back to pre-APICv interrupt injection since L2 7524 * Else, fall back to pre-APICv interrupt injection since L2
7307 * is run without virtual interrupt delivery. 7525 * is run without virtual interrupt delivery.
7308 */ 7526 */
7309 if (!kvm_event_needs_reinjection(vcpu) && 7527 if (!kvm_event_needs_reinjection(vcpu) &&
@@ -7400,6 +7618,12 @@ static bool vmx_mpx_supported(void)
7400 (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS); 7618 (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
7401} 7619}
7402 7620
7621static bool vmx_xsaves_supported(void)
7622{
7623 return vmcs_config.cpu_based_2nd_exec_ctrl &
7624 SECONDARY_EXEC_XSAVES;
7625}
7626
7403static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) 7627static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
7404{ 7628{
7405 u32 exit_intr_info; 7629 u32 exit_intr_info;
@@ -8135,6 +8359,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8135 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); 8359 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
8136 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip); 8360 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
8137 8361
8362 if (nested_cpu_has_xsaves(vmcs12))
8363 vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
8138 vmcs_write64(VMCS_LINK_POINTER, -1ull); 8364 vmcs_write64(VMCS_LINK_POINTER, -1ull);
8139 8365
8140 exec_control = vmcs12->pin_based_vm_exec_control; 8366 exec_control = vmcs12->pin_based_vm_exec_control;
@@ -8775,6 +9001,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
8775 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); 9001 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
8776 if (vmx_mpx_supported()) 9002 if (vmx_mpx_supported())
8777 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); 9003 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
9004 if (nested_cpu_has_xsaves(vmcs12))
9005 vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP);
8778 9006
8779 /* update exit information fields: */ 9007 /* update exit information fields: */
8780 9008
@@ -9176,6 +9404,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
9176 .check_intercept = vmx_check_intercept, 9404 .check_intercept = vmx_check_intercept,
9177 .handle_external_intr = vmx_handle_external_intr, 9405 .handle_external_intr = vmx_handle_external_intr,
9178 .mpx_supported = vmx_mpx_supported, 9406 .mpx_supported = vmx_mpx_supported,
9407 .xsaves_supported = vmx_xsaves_supported,
9179 9408
9180 .check_nested_events = vmx_check_nested_events, 9409 .check_nested_events = vmx_check_nested_events,
9181 9410
@@ -9184,150 +9413,21 @@ static struct kvm_x86_ops vmx_x86_ops = {
9184 9413
9185static int __init vmx_init(void) 9414static int __init vmx_init(void)
9186{ 9415{
9187 int r, i, msr; 9416 int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
9188 9417 __alignof__(struct vcpu_vmx), THIS_MODULE);
9189 rdmsrl_safe(MSR_EFER, &host_efer);
9190
9191 for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
9192 kvm_define_shared_msr(i, vmx_msr_index[i]);
9193
9194 vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
9195 if (!vmx_io_bitmap_a)
9196 return -ENOMEM;
9197
9198 r = -ENOMEM;
9199
9200 vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
9201 if (!vmx_io_bitmap_b)
9202 goto out;
9203
9204 vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
9205 if (!vmx_msr_bitmap_legacy)
9206 goto out1;
9207
9208 vmx_msr_bitmap_legacy_x2apic =
9209 (unsigned long *)__get_free_page(GFP_KERNEL);
9210 if (!vmx_msr_bitmap_legacy_x2apic)
9211 goto out2;
9212
9213 vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
9214 if (!vmx_msr_bitmap_longmode)
9215 goto out3;
9216
9217 vmx_msr_bitmap_longmode_x2apic =
9218 (unsigned long *)__get_free_page(GFP_KERNEL);
9219 if (!vmx_msr_bitmap_longmode_x2apic)
9220 goto out4;
9221 vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
9222 if (!vmx_vmread_bitmap)
9223 goto out5;
9224
9225 vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
9226 if (!vmx_vmwrite_bitmap)
9227 goto out6;
9228
9229 memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
9230 memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
9231
9232 /*
9233 * Allow direct access to the PC debug port (it is often used for I/O
9234 * delays, but the vmexits simply slow things down).
9235 */
9236 memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
9237 clear_bit(0x80, vmx_io_bitmap_a);
9238
9239 memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
9240
9241 memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
9242 memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
9243
9244 set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
9245
9246 r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
9247 __alignof__(struct vcpu_vmx), THIS_MODULE);
9248 if (r) 9418 if (r)
9249 goto out7; 9419 return r;
9250 9420
9251#ifdef CONFIG_KEXEC 9421#ifdef CONFIG_KEXEC
9252 rcu_assign_pointer(crash_vmclear_loaded_vmcss, 9422 rcu_assign_pointer(crash_vmclear_loaded_vmcss,
9253 crash_vmclear_local_loaded_vmcss); 9423 crash_vmclear_local_loaded_vmcss);
9254#endif 9424#endif
9255 9425
9256 vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
9257 vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
9258 vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
9259 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
9260 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
9261 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
9262 vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
9263
9264 memcpy(vmx_msr_bitmap_legacy_x2apic,
9265 vmx_msr_bitmap_legacy, PAGE_SIZE);
9266 memcpy(vmx_msr_bitmap_longmode_x2apic,
9267 vmx_msr_bitmap_longmode, PAGE_SIZE);
9268
9269 if (enable_apicv) {
9270 for (msr = 0x800; msr <= 0x8ff; msr++)
9271 vmx_disable_intercept_msr_read_x2apic(msr);
9272
9273 /* According SDM, in x2apic mode, the whole id reg is used.
9274 * But in KVM, it only use the highest eight bits. Need to
9275 * intercept it */
9276 vmx_enable_intercept_msr_read_x2apic(0x802);
9277 /* TMCCT */
9278 vmx_enable_intercept_msr_read_x2apic(0x839);
9279 /* TPR */
9280 vmx_disable_intercept_msr_write_x2apic(0x808);
9281 /* EOI */
9282 vmx_disable_intercept_msr_write_x2apic(0x80b);
9283 /* SELF-IPI */
9284 vmx_disable_intercept_msr_write_x2apic(0x83f);
9285 }
9286
9287 if (enable_ept) {
9288 kvm_mmu_set_mask_ptes(0ull,
9289 (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
9290 (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
9291 0ull, VMX_EPT_EXECUTABLE_MASK);
9292 ept_set_mmio_spte_mask();
9293 kvm_enable_tdp();
9294 } else
9295 kvm_disable_tdp();
9296
9297 update_ple_window_actual_max();
9298
9299 return 0; 9426 return 0;
9300
9301out7:
9302 free_page((unsigned long)vmx_vmwrite_bitmap);
9303out6:
9304 free_page((unsigned long)vmx_vmread_bitmap);
9305out5:
9306 free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
9307out4:
9308 free_page((unsigned long)vmx_msr_bitmap_longmode);
9309out3:
9310 free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
9311out2:
9312 free_page((unsigned long)vmx_msr_bitmap_legacy);
9313out1:
9314 free_page((unsigned long)vmx_io_bitmap_b);
9315out:
9316 free_page((unsigned long)vmx_io_bitmap_a);
9317 return r;
9318} 9427}
9319 9428
9320static void __exit vmx_exit(void) 9429static void __exit vmx_exit(void)
9321{ 9430{
9322 free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
9323 free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
9324 free_page((unsigned long)vmx_msr_bitmap_legacy);
9325 free_page((unsigned long)vmx_msr_bitmap_longmode);
9326 free_page((unsigned long)vmx_io_bitmap_b);
9327 free_page((unsigned long)vmx_io_bitmap_a);
9328 free_page((unsigned long)vmx_vmwrite_bitmap);
9329 free_page((unsigned long)vmx_vmread_bitmap);
9330
9331#ifdef CONFIG_KEXEC 9431#ifdef CONFIG_KEXEC
9332 RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); 9432 RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
9333 synchronize_rcu(); 9433 synchronize_rcu();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0033df32a745..c259814200bd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -27,6 +27,7 @@
27#include "kvm_cache_regs.h" 27#include "kvm_cache_regs.h"
28#include "x86.h" 28#include "x86.h"
29#include "cpuid.h" 29#include "cpuid.h"
30#include "assigned-dev.h"
30 31
31#include <linux/clocksource.h> 32#include <linux/clocksource.h>
32#include <linux/interrupt.h> 33#include <linux/interrupt.h>
@@ -353,6 +354,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
353 354
354 if (!vcpu->arch.exception.pending) { 355 if (!vcpu->arch.exception.pending) {
355 queue: 356 queue:
357 if (has_error && !is_protmode(vcpu))
358 has_error = false;
356 vcpu->arch.exception.pending = true; 359 vcpu->arch.exception.pending = true;
357 vcpu->arch.exception.has_error_code = has_error; 360 vcpu->arch.exception.has_error_code = has_error;
358 vcpu->arch.exception.nr = nr; 361 vcpu->arch.exception.nr = nr;
@@ -455,6 +458,16 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
455} 458}
456EXPORT_SYMBOL_GPL(kvm_require_cpl); 459EXPORT_SYMBOL_GPL(kvm_require_cpl);
457 460
461bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
462{
463 if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
464 return true;
465
466 kvm_queue_exception(vcpu, UD_VECTOR);
467 return false;
468}
469EXPORT_SYMBOL_GPL(kvm_require_dr);
470
458/* 471/*
459 * This function will be used to read from the physical memory of the currently 472 * This function will be used to read from the physical memory of the currently
460 * running guest. The difference to kvm_read_guest_page is that this function 473 * running guest. The difference to kvm_read_guest_page is that this function
@@ -656,6 +669,12 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
656 if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR))) 669 if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR)))
657 return 1; 670 return 1;
658 671
672 if (xcr0 & XSTATE_AVX512) {
673 if (!(xcr0 & XSTATE_YMM))
674 return 1;
675 if ((xcr0 & XSTATE_AVX512) != XSTATE_AVX512)
676 return 1;
677 }
659 kvm_put_guest_xcr0(vcpu); 678 kvm_put_guest_xcr0(vcpu);
660 vcpu->arch.xcr0 = xcr0; 679 vcpu->arch.xcr0 = xcr0;
661 680
@@ -732,6 +751,10 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4);
732 751
733int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 752int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
734{ 753{
754#ifdef CONFIG_X86_64
755 cr3 &= ~CR3_PCID_INVD;
756#endif
757
735 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { 758 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
736 kvm_mmu_sync_roots(vcpu); 759 kvm_mmu_sync_roots(vcpu);
737 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 760 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
@@ -811,8 +834,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
811 vcpu->arch.eff_db[dr] = val; 834 vcpu->arch.eff_db[dr] = val;
812 break; 835 break;
813 case 4: 836 case 4:
814 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
815 return 1; /* #UD */
816 /* fall through */ 837 /* fall through */
817 case 6: 838 case 6:
818 if (val & 0xffffffff00000000ULL) 839 if (val & 0xffffffff00000000ULL)
@@ -821,8 +842,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
821 kvm_update_dr6(vcpu); 842 kvm_update_dr6(vcpu);
822 break; 843 break;
823 case 5: 844 case 5:
824 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
825 return 1; /* #UD */
826 /* fall through */ 845 /* fall through */
827 default: /* 7 */ 846 default: /* 7 */
828 if (val & 0xffffffff00000000ULL) 847 if (val & 0xffffffff00000000ULL)
@@ -837,27 +856,21 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
837 856
838int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) 857int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
839{ 858{
840 int res; 859 if (__kvm_set_dr(vcpu, dr, val)) {
841
842 res = __kvm_set_dr(vcpu, dr, val);
843 if (res > 0)
844 kvm_queue_exception(vcpu, UD_VECTOR);
845 else if (res < 0)
846 kvm_inject_gp(vcpu, 0); 860 kvm_inject_gp(vcpu, 0);
847 861 return 1;
848 return res; 862 }
863 return 0;
849} 864}
850EXPORT_SYMBOL_GPL(kvm_set_dr); 865EXPORT_SYMBOL_GPL(kvm_set_dr);
851 866
852static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) 867int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
853{ 868{
854 switch (dr) { 869 switch (dr) {
855 case 0 ... 3: 870 case 0 ... 3:
856 *val = vcpu->arch.db[dr]; 871 *val = vcpu->arch.db[dr];
857 break; 872 break;
858 case 4: 873 case 4:
859 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
860 return 1;
861 /* fall through */ 874 /* fall through */
862 case 6: 875 case 6:
863 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) 876 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
@@ -866,23 +879,11 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
866 *val = kvm_x86_ops->get_dr6(vcpu); 879 *val = kvm_x86_ops->get_dr6(vcpu);
867 break; 880 break;
868 case 5: 881 case 5:
869 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
870 return 1;
871 /* fall through */ 882 /* fall through */
872 default: /* 7 */ 883 default: /* 7 */
873 *val = vcpu->arch.dr7; 884 *val = vcpu->arch.dr7;
874 break; 885 break;
875 } 886 }
876
877 return 0;
878}
879
880int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
881{
882 if (_kvm_get_dr(vcpu, dr, val)) {
883 kvm_queue_exception(vcpu, UD_VECTOR);
884 return 1;
885 }
886 return 0; 887 return 0;
887} 888}
888EXPORT_SYMBOL_GPL(kvm_get_dr); 889EXPORT_SYMBOL_GPL(kvm_get_dr);
@@ -1237,21 +1238,22 @@ void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
1237{ 1238{
1238#ifdef CONFIG_X86_64 1239#ifdef CONFIG_X86_64
1239 bool vcpus_matched; 1240 bool vcpus_matched;
1240 bool do_request = false;
1241 struct kvm_arch *ka = &vcpu->kvm->arch; 1241 struct kvm_arch *ka = &vcpu->kvm->arch;
1242 struct pvclock_gtod_data *gtod = &pvclock_gtod_data; 1242 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1243 1243
1244 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 == 1244 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1245 atomic_read(&vcpu->kvm->online_vcpus)); 1245 atomic_read(&vcpu->kvm->online_vcpus));
1246 1246
1247 if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC) 1247 /*
1248 if (!ka->use_master_clock) 1248 * Once the masterclock is enabled, always perform request in
1249 do_request = 1; 1249 * order to update it.
1250 1250 *
1251 if (!vcpus_matched && ka->use_master_clock) 1251 * In order to enable masterclock, the host clocksource must be TSC
1252 do_request = 1; 1252 * and the vcpus need to have matched TSCs. When that happens,
1253 1253 * perform request to enable masterclock.
1254 if (do_request) 1254 */
1255 if (ka->use_master_clock ||
1256 (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched))
1255 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); 1257 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
1256 1258
1257 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc, 1259 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
@@ -1637,16 +1639,16 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1637 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; 1639 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
1638 vcpu->last_guest_tsc = tsc_timestamp; 1640 vcpu->last_guest_tsc = tsc_timestamp;
1639 1641
1642 if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
1643 &guest_hv_clock, sizeof(guest_hv_clock))))
1644 return 0;
1645
1640 /* 1646 /*
1641 * The interface expects us to write an even number signaling that the 1647 * The interface expects us to write an even number signaling that the
1642 * update is finished. Since the guest won't see the intermediate 1648 * update is finished. Since the guest won't see the intermediate
1643 * state, we just increase by 2 at the end. 1649 * state, we just increase by 2 at the end.
1644 */ 1650 */
1645 vcpu->hv_clock.version += 2; 1651 vcpu->hv_clock.version = guest_hv_clock.version + 2;
1646
1647 if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
1648 &guest_hv_clock, sizeof(guest_hv_clock))))
1649 return 0;
1650 1652
1651 /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ 1653 /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
1652 pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED); 1654 pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
@@ -1662,6 +1664,8 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1662 1664
1663 vcpu->hv_clock.flags = pvclock_flags; 1665 vcpu->hv_clock.flags = pvclock_flags;
1664 1666
1667 trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
1668
1665 kvm_write_guest_cached(v->kvm, &vcpu->pv_time, 1669 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
1666 &vcpu->hv_clock, 1670 &vcpu->hv_clock,
1667 sizeof(vcpu->hv_clock)); 1671 sizeof(vcpu->hv_clock));
@@ -2140,7 +2144,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2140 case MSR_IA32_TSC_ADJUST: 2144 case MSR_IA32_TSC_ADJUST:
2141 if (guest_cpuid_has_tsc_adjust(vcpu)) { 2145 if (guest_cpuid_has_tsc_adjust(vcpu)) {
2142 if (!msr_info->host_initiated) { 2146 if (!msr_info->host_initiated) {
2143 u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; 2147 s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
2144 kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true); 2148 kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
2145 } 2149 }
2146 vcpu->arch.ia32_tsc_adjust_msr = data; 2150 vcpu->arch.ia32_tsc_adjust_msr = data;
@@ -3106,7 +3110,7 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
3106 unsigned long val; 3110 unsigned long val;
3107 3111
3108 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); 3112 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
3109 _kvm_get_dr(vcpu, 6, &val); 3113 kvm_get_dr(vcpu, 6, &val);
3110 dbgregs->dr6 = val; 3114 dbgregs->dr6 = val;
3111 dbgregs->dr7 = vcpu->arch.dr7; 3115 dbgregs->dr7 = vcpu->arch.dr7;
3112 dbgregs->flags = 0; 3116 dbgregs->flags = 0;
@@ -3128,15 +3132,89 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
3128 return 0; 3132 return 0;
3129} 3133}
3130 3134
3135#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
3136
3137static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
3138{
3139 struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
3140 u64 xstate_bv = xsave->xsave_hdr.xstate_bv;
3141 u64 valid;
3142
3143 /*
3144 * Copy legacy XSAVE area, to avoid complications with CPUID
3145 * leaves 0 and 1 in the loop below.
3146 */
3147 memcpy(dest, xsave, XSAVE_HDR_OFFSET);
3148
3149 /* Set XSTATE_BV */
3150 *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
3151
3152 /*
3153 * Copy each region from the possibly compacted offset to the
3154 * non-compacted offset.
3155 */
3156 valid = xstate_bv & ~XSTATE_FPSSE;
3157 while (valid) {
3158 u64 feature = valid & -valid;
3159 int index = fls64(feature) - 1;
3160 void *src = get_xsave_addr(xsave, feature);
3161
3162 if (src) {
3163 u32 size, offset, ecx, edx;
3164 cpuid_count(XSTATE_CPUID, index,
3165 &size, &offset, &ecx, &edx);
3166 memcpy(dest + offset, src, size);
3167 }
3168
3169 valid -= feature;
3170 }
3171}
3172
3173static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
3174{
3175 struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
3176 u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
3177 u64 valid;
3178
3179 /*
3180 * Copy legacy XSAVE area, to avoid complications with CPUID
3181 * leaves 0 and 1 in the loop below.
3182 */
3183 memcpy(xsave, src, XSAVE_HDR_OFFSET);
3184
3185 /* Set XSTATE_BV and possibly XCOMP_BV. */
3186 xsave->xsave_hdr.xstate_bv = xstate_bv;
3187 if (cpu_has_xsaves)
3188 xsave->xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
3189
3190 /*
3191 * Copy each region from the non-compacted offset to the
3192 * possibly compacted offset.
3193 */
3194 valid = xstate_bv & ~XSTATE_FPSSE;
3195 while (valid) {
3196 u64 feature = valid & -valid;
3197 int index = fls64(feature) - 1;
3198 void *dest = get_xsave_addr(xsave, feature);
3199
3200 if (dest) {
3201 u32 size, offset, ecx, edx;
3202 cpuid_count(XSTATE_CPUID, index,
3203 &size, &offset, &ecx, &edx);
3204 memcpy(dest, src + offset, size);
3205 } else
3206 WARN_ON_ONCE(1);
3207
3208 valid -= feature;
3209 }
3210}
3211
3131static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, 3212static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
3132 struct kvm_xsave *guest_xsave) 3213 struct kvm_xsave *guest_xsave)
3133{ 3214{
3134 if (cpu_has_xsave) { 3215 if (cpu_has_xsave) {
3135 memcpy(guest_xsave->region, 3216 memset(guest_xsave, 0, sizeof(struct kvm_xsave));
3136 &vcpu->arch.guest_fpu.state->xsave, 3217 fill_xsave((u8 *) guest_xsave->region, vcpu);
3137 vcpu->arch.guest_xstate_size);
3138 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &=
3139 vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE;
3140 } else { 3218 } else {
3141 memcpy(guest_xsave->region, 3219 memcpy(guest_xsave->region,
3142 &vcpu->arch.guest_fpu.state->fxsave, 3220 &vcpu->arch.guest_fpu.state->fxsave,
@@ -3160,8 +3238,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
3160 */ 3238 */
3161 if (xstate_bv & ~kvm_supported_xcr0()) 3239 if (xstate_bv & ~kvm_supported_xcr0())
3162 return -EINVAL; 3240 return -EINVAL;
3163 memcpy(&vcpu->arch.guest_fpu.state->xsave, 3241 load_xsave(vcpu, (u8 *)guest_xsave->region);
3164 guest_xsave->region, vcpu->arch.guest_xstate_size);
3165 } else { 3242 } else {
3166 if (xstate_bv & ~XSTATE_FPSSE) 3243 if (xstate_bv & ~XSTATE_FPSSE)
3167 return -EINVAL; 3244 return -EINVAL;
@@ -4004,7 +4081,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
4004 } 4081 }
4005 4082
4006 default: 4083 default:
4007 ; 4084 r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
4008 } 4085 }
4009out: 4086out:
4010 return r; 4087 return r;
@@ -4667,7 +4744,7 @@ static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
4667 4744
4668int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) 4745int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
4669{ 4746{
4670 return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); 4747 return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
4671} 4748}
4672 4749
4673int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) 4750int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
@@ -5211,21 +5288,17 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflag
5211 5288
5212static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) 5289static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
5213{ 5290{
5214 struct kvm_run *kvm_run = vcpu->run;
5215 unsigned long eip = vcpu->arch.emulate_ctxt.eip;
5216 u32 dr6 = 0;
5217
5218 if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) && 5291 if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
5219 (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) { 5292 (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
5220 dr6 = kvm_vcpu_check_hw_bp(eip, 0, 5293 struct kvm_run *kvm_run = vcpu->run;
5294 unsigned long eip = kvm_get_linear_rip(vcpu);
5295 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
5221 vcpu->arch.guest_debug_dr7, 5296 vcpu->arch.guest_debug_dr7,
5222 vcpu->arch.eff_db); 5297 vcpu->arch.eff_db);
5223 5298
5224 if (dr6 != 0) { 5299 if (dr6 != 0) {
5225 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM; 5300 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
5226 kvm_run->debug.arch.pc = kvm_rip_read(vcpu) + 5301 kvm_run->debug.arch.pc = eip;
5227 get_segment_base(vcpu, VCPU_SREG_CS);
5228
5229 kvm_run->debug.arch.exception = DB_VECTOR; 5302 kvm_run->debug.arch.exception = DB_VECTOR;
5230 kvm_run->exit_reason = KVM_EXIT_DEBUG; 5303 kvm_run->exit_reason = KVM_EXIT_DEBUG;
5231 *r = EMULATE_USER_EXIT; 5304 *r = EMULATE_USER_EXIT;
@@ -5235,7 +5308,8 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
5235 5308
5236 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) && 5309 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
5237 !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) { 5310 !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
5238 dr6 = kvm_vcpu_check_hw_bp(eip, 0, 5311 unsigned long eip = kvm_get_linear_rip(vcpu);
5312 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
5239 vcpu->arch.dr7, 5313 vcpu->arch.dr7,
5240 vcpu->arch.db); 5314 vcpu->arch.db);
5241 5315
@@ -5365,7 +5439,9 @@ restart:
5365 kvm_rip_write(vcpu, ctxt->eip); 5439 kvm_rip_write(vcpu, ctxt->eip);
5366 if (r == EMULATE_DONE) 5440 if (r == EMULATE_DONE)
5367 kvm_vcpu_check_singlestep(vcpu, rflags, &r); 5441 kvm_vcpu_check_singlestep(vcpu, rflags, &r);
5368 __kvm_set_rflags(vcpu, ctxt->eflags); 5442 if (!ctxt->have_exception ||
5443 exception_type(ctxt->exception.vector) == EXCPT_TRAP)
5444 __kvm_set_rflags(vcpu, ctxt->eflags);
5369 5445
5370 /* 5446 /*
5371 * For STI, interrupts are shadowed; so KVM_REQ_EVENT will 5447 * For STI, interrupts are shadowed; so KVM_REQ_EVENT will
@@ -5965,6 +6041,12 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
5965 __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | 6041 __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
5966 X86_EFLAGS_RF); 6042 X86_EFLAGS_RF);
5967 6043
6044 if (vcpu->arch.exception.nr == DB_VECTOR &&
6045 (vcpu->arch.dr7 & DR7_GD)) {
6046 vcpu->arch.dr7 &= ~DR7_GD;
6047 kvm_update_dr7(vcpu);
6048 }
6049
5968 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, 6050 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
5969 vcpu->arch.exception.has_error_code, 6051 vcpu->arch.exception.has_error_code,
5970 vcpu->arch.exception.error_code, 6052 vcpu->arch.exception.error_code,
@@ -6873,6 +6955,9 @@ int fx_init(struct kvm_vcpu *vcpu)
6873 return err; 6955 return err;
6874 6956
6875 fpu_finit(&vcpu->arch.guest_fpu); 6957 fpu_finit(&vcpu->arch.guest_fpu);
6958 if (cpu_has_xsaves)
6959 vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv =
6960 host_xcr0 | XSTATE_COMPACTION_ENABLED;
6876 6961
6877 /* 6962 /*
6878 * Ensure guest xcr0 is valid for loading 6963 * Ensure guest xcr0 is valid for loading
@@ -7024,7 +7109,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
7024 kvm_x86_ops->vcpu_reset(vcpu); 7109 kvm_x86_ops->vcpu_reset(vcpu);
7025} 7110}
7026 7111
7027void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector) 7112void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
7028{ 7113{
7029 struct kvm_segment cs; 7114 struct kvm_segment cs;
7030 7115
@@ -7256,6 +7341,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
7256 if (type) 7341 if (type)
7257 return -EINVAL; 7342 return -EINVAL;
7258 7343
7344 INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
7259 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 7345 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
7260 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); 7346 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
7261 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 7347 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
@@ -7536,12 +7622,18 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
7536 return kvm_x86_ops->interrupt_allowed(vcpu); 7622 return kvm_x86_ops->interrupt_allowed(vcpu);
7537} 7623}
7538 7624
7539bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip) 7625unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
7540{ 7626{
7541 unsigned long current_rip = kvm_rip_read(vcpu) + 7627 if (is_64_bit_mode(vcpu))
7542 get_segment_base(vcpu, VCPU_SREG_CS); 7628 return kvm_rip_read(vcpu);
7629 return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
7630 kvm_rip_read(vcpu));
7631}
7632EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
7543 7633
7544 return current_rip == linear_rip; 7634bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
7635{
7636 return kvm_get_linear_rip(vcpu) == linear_rip;
7545} 7637}
7546EXPORT_SYMBOL_GPL(kvm_is_linear_rip); 7638EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
7547 7639
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 7cb9c45a5fe0..cc1d61af6140 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -162,7 +162,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
162bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data); 162bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
163 163
164#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ 164#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
165 | XSTATE_BNDREGS | XSTATE_BNDCSR) 165 | XSTATE_BNDREGS | XSTATE_BNDCSR \
166 | XSTATE_AVX512)
166extern u64 host_xcr0; 167extern u64 host_xcr0;
167 168
168extern u64 kvm_supported_xcr0(void); 169extern u64 kvm_supported_xcr0(void);
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index ad9db6045b2f..b3f45a578344 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -60,7 +60,8 @@ struct arch_timer_cpu {
60 60
61#ifdef CONFIG_KVM_ARM_TIMER 61#ifdef CONFIG_KVM_ARM_TIMER
62int kvm_timer_hyp_init(void); 62int kvm_timer_hyp_init(void);
63int kvm_timer_init(struct kvm *kvm); 63void kvm_timer_enable(struct kvm *kvm);
64void kvm_timer_init(struct kvm *kvm);
64void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, 65void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
65 const struct kvm_irq_level *irq); 66 const struct kvm_irq_level *irq);
66void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); 67void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
@@ -77,11 +78,8 @@ static inline int kvm_timer_hyp_init(void)
77 return 0; 78 return 0;
78}; 79};
79 80
80static inline int kvm_timer_init(struct kvm *kvm) 81static inline void kvm_timer_enable(struct kvm *kvm) {}
81{ 82static inline void kvm_timer_init(struct kvm *kvm) {}
82 return 0;
83}
84
85static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, 83static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
86 const struct kvm_irq_level *irq) {} 84 const struct kvm_irq_level *irq) {}
87static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {} 85static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 206dcc3b3f7a..ac4888dc86bc 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -274,7 +274,7 @@ struct kvm_exit_mmio;
274#ifdef CONFIG_KVM_ARM_VGIC 274#ifdef CONFIG_KVM_ARM_VGIC
275int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); 275int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
276int kvm_vgic_hyp_init(void); 276int kvm_vgic_hyp_init(void);
277int kvm_vgic_init(struct kvm *kvm); 277int kvm_vgic_map_resources(struct kvm *kvm);
278int kvm_vgic_create(struct kvm *kvm); 278int kvm_vgic_create(struct kvm *kvm);
279void kvm_vgic_destroy(struct kvm *kvm); 279void kvm_vgic_destroy(struct kvm *kvm);
280void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); 280void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
@@ -287,7 +287,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
287 struct kvm_exit_mmio *mmio); 287 struct kvm_exit_mmio *mmio);
288 288
289#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) 289#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
290#define vgic_initialized(k) ((k)->arch.vgic.ready) 290#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus))
291#define vgic_ready(k) ((k)->arch.vgic.ready)
291 292
292int vgic_v2_probe(struct device_node *vgic_node, 293int vgic_v2_probe(struct device_node *vgic_node,
293 const struct vgic_ops **ops, 294 const struct vgic_ops **ops,
@@ -321,7 +322,7 @@ static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr,
321 return -ENXIO; 322 return -ENXIO;
322} 323}
323 324
324static inline int kvm_vgic_init(struct kvm *kvm) 325static inline int kvm_vgic_map_resources(struct kvm *kvm)
325{ 326{
326 return 0; 327 return 0;
327} 328}
@@ -373,6 +374,11 @@ static inline bool vgic_initialized(struct kvm *kvm)
373{ 374{
374 return true; 375 return true;
375} 376}
377
378static inline bool vgic_ready(struct kvm *kvm)
379{
380 return true;
381}
376#endif 382#endif
377 383
378#endif 384#endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a6059bdf7b03..26f106022c88 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -43,6 +43,7 @@
43 * include/linux/kvm_h. 43 * include/linux/kvm_h.
44 */ 44 */
45#define KVM_MEMSLOT_INVALID (1UL << 16) 45#define KVM_MEMSLOT_INVALID (1UL << 16)
46#define KVM_MEMSLOT_INCOHERENT (1UL << 17)
46 47
47/* Two fragments for cross MMIO pages. */ 48/* Two fragments for cross MMIO pages. */
48#define KVM_MAX_MMIO_FRAGMENTS 2 49#define KVM_MAX_MMIO_FRAGMENTS 2
@@ -353,6 +354,8 @@ struct kvm_memslots {
353 struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM]; 354 struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM];
354 /* The mapping table from slot id to the index in memslots[]. */ 355 /* The mapping table from slot id to the index in memslots[]. */
355 short id_to_index[KVM_MEM_SLOTS_NUM]; 356 short id_to_index[KVM_MEM_SLOTS_NUM];
357 atomic_t lru_slot;
358 int used_slots;
356}; 359};
357 360
358struct kvm { 361struct kvm {
@@ -395,7 +398,6 @@ struct kvm {
395 * Update side is protected by irq_lock. 398 * Update side is protected by irq_lock.
396 */ 399 */
397 struct kvm_irq_routing_table __rcu *irq_routing; 400 struct kvm_irq_routing_table __rcu *irq_routing;
398 struct hlist_head mask_notifier_list;
399#endif 401#endif
400#ifdef CONFIG_HAVE_KVM_IRQFD 402#ifdef CONFIG_HAVE_KVM_IRQFD
401 struct hlist_head irq_ack_notifier_list; 403 struct hlist_head irq_ack_notifier_list;
@@ -447,6 +449,14 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
447int __must_check vcpu_load(struct kvm_vcpu *vcpu); 449int __must_check vcpu_load(struct kvm_vcpu *vcpu);
448void vcpu_put(struct kvm_vcpu *vcpu); 450void vcpu_put(struct kvm_vcpu *vcpu);
449 451
452#ifdef __KVM_HAVE_IOAPIC
453void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
454#else
455static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
456{
457}
458#endif
459
450#ifdef CONFIG_HAVE_KVM_IRQFD 460#ifdef CONFIG_HAVE_KVM_IRQFD
451int kvm_irqfd_init(void); 461int kvm_irqfd_init(void);
452void kvm_irqfd_exit(void); 462void kvm_irqfd_exit(void);
@@ -711,44 +721,6 @@ struct kvm_irq_ack_notifier {
711 void (*irq_acked)(struct kvm_irq_ack_notifier *kian); 721 void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
712}; 722};
713 723
714struct kvm_assigned_dev_kernel {
715 struct kvm_irq_ack_notifier ack_notifier;
716 struct list_head list;
717 int assigned_dev_id;
718 int host_segnr;
719 int host_busnr;
720 int host_devfn;
721 unsigned int entries_nr;
722 int host_irq;
723 bool host_irq_disabled;
724 bool pci_2_3;
725 struct msix_entry *host_msix_entries;
726 int guest_irq;
727 struct msix_entry *guest_msix_entries;
728 unsigned long irq_requested_type;
729 int irq_source_id;
730 int flags;
731 struct pci_dev *dev;
732 struct kvm *kvm;
733 spinlock_t intx_lock;
734 spinlock_t intx_mask_lock;
735 char irq_name[32];
736 struct pci_saved_state *pci_saved_state;
737};
738
739struct kvm_irq_mask_notifier {
740 void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
741 int irq;
742 struct hlist_node link;
743};
744
745void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
746 struct kvm_irq_mask_notifier *kimn);
747void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
748 struct kvm_irq_mask_notifier *kimn);
749void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
750 bool mask);
751
752int kvm_irq_map_gsi(struct kvm *kvm, 724int kvm_irq_map_gsi(struct kvm *kvm,
753 struct kvm_kernel_irq_routing_entry *entries, int gsi); 725 struct kvm_kernel_irq_routing_entry *entries, int gsi);
754int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin); 726int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
@@ -770,12 +742,6 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
770#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT 742#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
771int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); 743int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
772void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot); 744void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
773int kvm_iommu_map_guest(struct kvm *kvm);
774int kvm_iommu_unmap_guest(struct kvm *kvm);
775int kvm_assign_device(struct kvm *kvm,
776 struct kvm_assigned_dev_kernel *assigned_dev);
777int kvm_deassign_device(struct kvm *kvm,
778 struct kvm_assigned_dev_kernel *assigned_dev);
779#else 745#else
780static inline int kvm_iommu_map_pages(struct kvm *kvm, 746static inline int kvm_iommu_map_pages(struct kvm *kvm,
781 struct kvm_memory_slot *slot) 747 struct kvm_memory_slot *slot)
@@ -787,11 +753,6 @@ static inline void kvm_iommu_unmap_pages(struct kvm *kvm,
787 struct kvm_memory_slot *slot) 753 struct kvm_memory_slot *slot)
788{ 754{
789} 755}
790
791static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
792{
793 return 0;
794}
795#endif 756#endif
796 757
797static inline void kvm_guest_enter(void) 758static inline void kvm_guest_enter(void)
@@ -832,12 +793,28 @@ static inline void kvm_guest_exit(void)
832static inline struct kvm_memory_slot * 793static inline struct kvm_memory_slot *
833search_memslots(struct kvm_memslots *slots, gfn_t gfn) 794search_memslots(struct kvm_memslots *slots, gfn_t gfn)
834{ 795{
835 struct kvm_memory_slot *memslot; 796 int start = 0, end = slots->used_slots;
797 int slot = atomic_read(&slots->lru_slot);
798 struct kvm_memory_slot *memslots = slots->memslots;
799
800 if (gfn >= memslots[slot].base_gfn &&
801 gfn < memslots[slot].base_gfn + memslots[slot].npages)
802 return &memslots[slot];
836 803
837 kvm_for_each_memslot(memslot, slots) 804 while (start < end) {
838 if (gfn >= memslot->base_gfn && 805 slot = start + (end - start) / 2;
839 gfn < memslot->base_gfn + memslot->npages) 806
840 return memslot; 807 if (gfn >= memslots[slot].base_gfn)
808 end = slot;
809 else
810 start = slot + 1;
811 }
812
813 if (gfn >= memslots[start].base_gfn &&
814 gfn < memslots[start].base_gfn + memslots[start].npages) {
815 atomic_set(&slots->lru_slot, start);
816 return &memslots[start];
817 }
841 818
842 return NULL; 819 return NULL;
843} 820}
@@ -1011,25 +988,6 @@ static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; }
1011 988
1012#endif 989#endif
1013 990
1014#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
1015
1016long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
1017 unsigned long arg);
1018
1019void kvm_free_all_assigned_devices(struct kvm *kvm);
1020
1021#else
1022
1023static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
1024 unsigned long arg)
1025{
1026 return -ENOTTY;
1027}
1028
1029static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
1030
1031#endif
1032
1033static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) 991static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
1034{ 992{
1035 set_bit(req, &vcpu->requests); 993 set_bit(req, &vcpu->requests);
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index b606bb689a3e..931da7e917cf 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -54,33 +54,6 @@ typedef u64 hfn_t;
54 54
55typedef hfn_t pfn_t; 55typedef hfn_t pfn_t;
56 56
57union kvm_ioapic_redirect_entry {
58 u64 bits;
59 struct {
60 u8 vector;
61 u8 delivery_mode:3;
62 u8 dest_mode:1;
63 u8 delivery_status:1;
64 u8 polarity:1;
65 u8 remote_irr:1;
66 u8 trig_mode:1;
67 u8 mask:1;
68 u8 reserve:7;
69 u8 reserved[4];
70 u8 dest_id;
71 } fields;
72};
73
74struct kvm_lapic_irq {
75 u32 vector;
76 u32 delivery_mode;
77 u32 dest_mode;
78 u32 level;
79 u32 trig_mode;
80 u32 shorthand;
81 u32 dest_id;
82};
83
84struct gfn_to_hva_cache { 57struct gfn_to_hva_cache {
85 u64 generation; 58 u64 generation;
86 gpa_t gpa; 59 gpa_t gpa;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 60768822b140..a37fd1224f36 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -647,11 +647,7 @@ struct kvm_ppc_smmu_info {
647#define KVM_CAP_MP_STATE 14 647#define KVM_CAP_MP_STATE 14
648#define KVM_CAP_COALESCED_MMIO 15 648#define KVM_CAP_COALESCED_MMIO 15
649#define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ 649#define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */
650#define KVM_CAP_DEVICE_ASSIGNMENT 17
651#define KVM_CAP_IOMMU 18 650#define KVM_CAP_IOMMU 18
652#ifdef __KVM_HAVE_MSI
653#define KVM_CAP_DEVICE_MSI 20
654#endif
655/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ 651/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
656#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 652#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
657#define KVM_CAP_USER_NMI 22 653#define KVM_CAP_USER_NMI 22
@@ -663,10 +659,6 @@ struct kvm_ppc_smmu_info {
663#endif 659#endif
664#define KVM_CAP_IRQ_ROUTING 25 660#define KVM_CAP_IRQ_ROUTING 25
665#define KVM_CAP_IRQ_INJECT_STATUS 26 661#define KVM_CAP_IRQ_INJECT_STATUS 26
666#define KVM_CAP_DEVICE_DEASSIGNMENT 27
667#ifdef __KVM_HAVE_MSIX
668#define KVM_CAP_DEVICE_MSIX 28
669#endif
670#define KVM_CAP_ASSIGN_DEV_IRQ 29 662#define KVM_CAP_ASSIGN_DEV_IRQ 29
671/* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ 663/* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
672#define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 664#define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
@@ -1107,9 +1099,6 @@ struct kvm_s390_ucas_mapping {
1107#define KVM_X86_SETUP_MCE _IOW(KVMIO, 0x9c, __u64) 1099#define KVM_X86_SETUP_MCE _IOW(KVMIO, 0x9c, __u64)
1108#define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO, 0x9d, __u64) 1100#define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO, 0x9d, __u64)
1109#define KVM_X86_SET_MCE _IOW(KVMIO, 0x9e, struct kvm_x86_mce) 1101#define KVM_X86_SET_MCE _IOW(KVMIO, 0x9e, struct kvm_x86_mce)
1110/* IA64 stack access */
1111#define KVM_IA64_VCPU_GET_STACK _IOR(KVMIO, 0x9a, void *)
1112#define KVM_IA64_VCPU_SET_STACK _IOW(KVMIO, 0x9b, void *)
1113/* Available with KVM_CAP_VCPU_EVENTS */ 1102/* Available with KVM_CAP_VCPU_EVENTS */
1114#define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events) 1103#define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events)
1115#define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events) 1104#define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events)
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 22fa819a9b6a..1c0772b340d8 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -61,12 +61,14 @@ static void timer_disarm(struct arch_timer_cpu *timer)
61 61
62static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) 62static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
63{ 63{
64 int ret;
64 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 65 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
65 66
66 timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; 67 timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
67 kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 68 ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
68 timer->irq->irq, 69 timer->irq->irq,
69 timer->irq->level); 70 timer->irq->level);
71 WARN_ON(ret);
70} 72}
71 73
72static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) 74static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
@@ -307,12 +309,24 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
307 timer_disarm(timer); 309 timer_disarm(timer);
308} 310}
309 311
310int kvm_timer_init(struct kvm *kvm) 312void kvm_timer_enable(struct kvm *kvm)
311{ 313{
312 if (timecounter && wqueue) { 314 if (kvm->arch.timer.enabled)
313 kvm->arch.timer.cntvoff = kvm_phys_timer_read(); 315 return;
316
317 /*
318 * There is a potential race here between VCPUs starting for the first
319 * time, which may be enabling the timer multiple times. That doesn't
320 * hurt though, because we're just setting a variable to the same
321 * variable that it already was. The important thing is that all
322 * VCPUs have the enabled variable set, before entering the guest, if
323 * the arch timers are enabled.
324 */
325 if (timecounter && wqueue)
314 kvm->arch.timer.enabled = 1; 326 kvm->arch.timer.enabled = 1;
315 } 327}
316 328
317 return 0; 329void kvm_timer_init(struct kvm *kvm)
330{
331 kvm->arch.timer.cntvoff = kvm_phys_timer_read();
318} 332}
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index aacdb59f30de..03affc7bf453 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -91,6 +91,7 @@
91#define ACCESS_WRITE_VALUE (3 << 1) 91#define ACCESS_WRITE_VALUE (3 << 1)
92#define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) 92#define ACCESS_WRITE_MASK(x) ((x) & (3 << 1))
93 93
94static int vgic_init(struct kvm *kvm);
94static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); 95static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
95static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); 96static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
96static void vgic_update_state(struct kvm *kvm); 97static void vgic_update_state(struct kvm *kvm);
@@ -1607,7 +1608,7 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
1607 } 1608 }
1608} 1609}
1609 1610
1610static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, 1611static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
1611 unsigned int irq_num, bool level) 1612 unsigned int irq_num, bool level)
1612{ 1613{
1613 struct vgic_dist *dist = &kvm->arch.vgic; 1614 struct vgic_dist *dist = &kvm->arch.vgic;
@@ -1643,9 +1644,10 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
1643 vgic_dist_irq_clear_level(vcpu, irq_num); 1644 vgic_dist_irq_clear_level(vcpu, irq_num);
1644 if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) 1645 if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
1645 vgic_dist_irq_clear_pending(vcpu, irq_num); 1646 vgic_dist_irq_clear_pending(vcpu, irq_num);
1646 } else {
1647 vgic_dist_irq_clear_pending(vcpu, irq_num);
1648 } 1647 }
1648
1649 ret = false;
1650 goto out;
1649 } 1651 }
1650 1652
1651 enabled = vgic_irq_is_enabled(vcpu, irq_num); 1653 enabled = vgic_irq_is_enabled(vcpu, irq_num);
@@ -1672,7 +1674,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
1672out: 1674out:
1673 spin_unlock(&dist->lock); 1675 spin_unlock(&dist->lock);
1674 1676
1675 return ret; 1677 return ret ? cpuid : -EINVAL;
1676} 1678}
1677 1679
1678/** 1680/**
@@ -1692,11 +1694,26 @@ out:
1692int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, 1694int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
1693 bool level) 1695 bool level)
1694{ 1696{
1695 if (likely(vgic_initialized(kvm)) && 1697 int ret = 0;
1696 vgic_update_irq_pending(kvm, cpuid, irq_num, level)) 1698 int vcpu_id;
1697 vgic_kick_vcpus(kvm);
1698 1699
1699 return 0; 1700 if (unlikely(!vgic_initialized(kvm))) {
1701 mutex_lock(&kvm->lock);
1702 ret = vgic_init(kvm);
1703 mutex_unlock(&kvm->lock);
1704
1705 if (ret)
1706 goto out;
1707 }
1708
1709 vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
1710 if (vcpu_id >= 0) {
1711 /* kick the specified vcpu */
1712 kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id));
1713 }
1714
1715out:
1716 return ret;
1700} 1717}
1701 1718
1702static irqreturn_t vgic_maintenance_handler(int irq, void *data) 1719static irqreturn_t vgic_maintenance_handler(int irq, void *data)
@@ -1726,39 +1743,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
1726 1743
1727 int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8; 1744 int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
1728 vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); 1745 vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
1729 vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL); 1746 vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
1730 1747
1731 if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) { 1748 if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
1732 kvm_vgic_vcpu_destroy(vcpu); 1749 kvm_vgic_vcpu_destroy(vcpu);
1733 return -ENOMEM; 1750 return -ENOMEM;
1734 } 1751 }
1735 1752
1736 return 0; 1753 memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs);
1737}
1738
1739/**
1740 * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
1741 * @vcpu: pointer to the vcpu struct
1742 *
1743 * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
1744 * this vcpu and enable the VGIC for this VCPU
1745 */
1746static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
1747{
1748 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1749 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1750 int i;
1751
1752 for (i = 0; i < dist->nr_irqs; i++) {
1753 if (i < VGIC_NR_PPIS)
1754 vgic_bitmap_set_irq_val(&dist->irq_enabled,
1755 vcpu->vcpu_id, i, 1);
1756 if (i < VGIC_NR_PRIVATE_IRQS)
1757 vgic_bitmap_set_irq_val(&dist->irq_cfg,
1758 vcpu->vcpu_id, i, VGIC_CFG_EDGE);
1759
1760 vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
1761 }
1762 1754
1763 /* 1755 /*
1764 * Store the number of LRs per vcpu, so we don't have to go 1756 * Store the number of LRs per vcpu, so we don't have to go
@@ -1767,7 +1759,7 @@ static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
1767 */ 1759 */
1768 vgic_cpu->nr_lr = vgic->nr_lr; 1760 vgic_cpu->nr_lr = vgic->nr_lr;
1769 1761
1770 vgic_enable(vcpu); 1762 return 0;
1771} 1763}
1772 1764
1773void kvm_vgic_destroy(struct kvm *kvm) 1765void kvm_vgic_destroy(struct kvm *kvm)
@@ -1798,20 +1790,21 @@ void kvm_vgic_destroy(struct kvm *kvm)
1798 dist->irq_spi_cpu = NULL; 1790 dist->irq_spi_cpu = NULL;
1799 dist->irq_spi_target = NULL; 1791 dist->irq_spi_target = NULL;
1800 dist->irq_pending_on_cpu = NULL; 1792 dist->irq_pending_on_cpu = NULL;
1793 dist->nr_cpus = 0;
1801} 1794}
1802 1795
1803/* 1796/*
1804 * Allocate and initialize the various data structures. Must be called 1797 * Allocate and initialize the various data structures. Must be called
1805 * with kvm->lock held! 1798 * with kvm->lock held!
1806 */ 1799 */
1807static int vgic_init_maps(struct kvm *kvm) 1800static int vgic_init(struct kvm *kvm)
1808{ 1801{
1809 struct vgic_dist *dist = &kvm->arch.vgic; 1802 struct vgic_dist *dist = &kvm->arch.vgic;
1810 struct kvm_vcpu *vcpu; 1803 struct kvm_vcpu *vcpu;
1811 int nr_cpus, nr_irqs; 1804 int nr_cpus, nr_irqs;
1812 int ret, i; 1805 int ret, i, vcpu_id;
1813 1806
1814 if (dist->nr_cpus) /* Already allocated */ 1807 if (vgic_initialized(kvm))
1815 return 0; 1808 return 0;
1816 1809
1817 nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus); 1810 nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
@@ -1859,16 +1852,28 @@ static int vgic_init_maps(struct kvm *kvm)
1859 if (ret) 1852 if (ret)
1860 goto out; 1853 goto out;
1861 1854
1862 kvm_for_each_vcpu(i, vcpu, kvm) { 1855 for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
1856 vgic_set_target_reg(kvm, 0, i);
1857
1858 kvm_for_each_vcpu(vcpu_id, vcpu, kvm) {
1863 ret = vgic_vcpu_init_maps(vcpu, nr_irqs); 1859 ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
1864 if (ret) { 1860 if (ret) {
1865 kvm_err("VGIC: Failed to allocate vcpu memory\n"); 1861 kvm_err("VGIC: Failed to allocate vcpu memory\n");
1866 break; 1862 break;
1867 } 1863 }
1868 }
1869 1864
1870 for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4) 1865 for (i = 0; i < dist->nr_irqs; i++) {
1871 vgic_set_target_reg(kvm, 0, i); 1866 if (i < VGIC_NR_PPIS)
1867 vgic_bitmap_set_irq_val(&dist->irq_enabled,
1868 vcpu->vcpu_id, i, 1);
1869 if (i < VGIC_NR_PRIVATE_IRQS)
1870 vgic_bitmap_set_irq_val(&dist->irq_cfg,
1871 vcpu->vcpu_id, i,
1872 VGIC_CFG_EDGE);
1873 }
1874
1875 vgic_enable(vcpu);
1876 }
1872 1877
1873out: 1878out:
1874 if (ret) 1879 if (ret)
@@ -1878,25 +1883,23 @@ out:
1878} 1883}
1879 1884
1880/** 1885/**
1881 * kvm_vgic_init - Initialize global VGIC state before running any VCPUs 1886 * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs
1882 * @kvm: pointer to the kvm struct 1887 * @kvm: pointer to the kvm struct
1883 * 1888 *
1884 * Map the virtual CPU interface into the VM before running any VCPUs. We 1889 * Map the virtual CPU interface into the VM before running any VCPUs. We
1885 * can't do this at creation time, because user space must first set the 1890 * can't do this at creation time, because user space must first set the
1886 * virtual CPU interface address in the guest physical address space. Also 1891 * virtual CPU interface address in the guest physical address space.
1887 * initialize the ITARGETSRn regs to 0 on the emulated distributor.
1888 */ 1892 */
1889int kvm_vgic_init(struct kvm *kvm) 1893int kvm_vgic_map_resources(struct kvm *kvm)
1890{ 1894{
1891 struct kvm_vcpu *vcpu; 1895 int ret = 0;
1892 int ret = 0, i;
1893 1896
1894 if (!irqchip_in_kernel(kvm)) 1897 if (!irqchip_in_kernel(kvm))
1895 return 0; 1898 return 0;
1896 1899
1897 mutex_lock(&kvm->lock); 1900 mutex_lock(&kvm->lock);
1898 1901
1899 if (vgic_initialized(kvm)) 1902 if (vgic_ready(kvm))
1900 goto out; 1903 goto out;
1901 1904
1902 if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || 1905 if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
@@ -1906,7 +1909,11 @@ int kvm_vgic_init(struct kvm *kvm)
1906 goto out; 1909 goto out;
1907 } 1910 }
1908 1911
1909 ret = vgic_init_maps(kvm); 1912 /*
1913 * Initialize the vgic if this hasn't already been done on demand by
1914 * accessing the vgic state from userspace.
1915 */
1916 ret = vgic_init(kvm);
1910 if (ret) { 1917 if (ret) {
1911 kvm_err("Unable to allocate maps\n"); 1918 kvm_err("Unable to allocate maps\n");
1912 goto out; 1919 goto out;
@@ -1920,9 +1927,6 @@ int kvm_vgic_init(struct kvm *kvm)
1920 goto out; 1927 goto out;
1921 } 1928 }
1922 1929
1923 kvm_for_each_vcpu(i, vcpu, kvm)
1924 kvm_vgic_vcpu_init(vcpu);
1925
1926 kvm->arch.vgic.ready = true; 1930 kvm->arch.vgic.ready = true;
1927out: 1931out:
1928 if (ret) 1932 if (ret)
@@ -2167,7 +2171,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
2167 2171
2168 mutex_lock(&dev->kvm->lock); 2172 mutex_lock(&dev->kvm->lock);
2169 2173
2170 ret = vgic_init_maps(dev->kvm); 2174 ret = vgic_init(dev->kvm);
2171 if (ret) 2175 if (ret)
2172 goto out; 2176 goto out;
2173 2177
@@ -2289,7 +2293,7 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2289 2293
2290 mutex_lock(&dev->kvm->lock); 2294 mutex_lock(&dev->kvm->lock);
2291 2295
2292 if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs) 2296 if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
2293 ret = -EBUSY; 2297 ret = -EBUSY;
2294 else 2298 else
2295 dev->kvm->arch.vgic.nr_irqs = val; 2299 dev->kvm->arch.vgic.nr_irqs = val;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index b0fb390943c6..148b2392c762 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -36,9 +36,6 @@
36#include <linux/seqlock.h> 36#include <linux/seqlock.h>
37#include <trace/events/kvm.h> 37#include <trace/events/kvm.h>
38 38
39#ifdef __KVM_HAVE_IOAPIC
40#include "ioapic.h"
41#endif
42#include "iodev.h" 39#include "iodev.h"
43 40
44#ifdef CONFIG_HAVE_KVM_IRQFD 41#ifdef CONFIG_HAVE_KVM_IRQFD
@@ -492,9 +489,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
492 mutex_lock(&kvm->irq_lock); 489 mutex_lock(&kvm->irq_lock);
493 hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); 490 hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
494 mutex_unlock(&kvm->irq_lock); 491 mutex_unlock(&kvm->irq_lock);
495#ifdef __KVM_HAVE_IOAPIC
496 kvm_vcpu_request_scan_ioapic(kvm); 492 kvm_vcpu_request_scan_ioapic(kvm);
497#endif
498} 493}
499 494
500void kvm_unregister_irq_ack_notifier(struct kvm *kvm, 495void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
@@ -504,9 +499,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
504 hlist_del_init_rcu(&kian->link); 499 hlist_del_init_rcu(&kian->link);
505 mutex_unlock(&kvm->irq_lock); 500 mutex_unlock(&kvm->irq_lock);
506 synchronize_srcu(&kvm->irq_srcu); 501 synchronize_srcu(&kvm->irq_srcu);
507#ifdef __KVM_HAVE_IOAPIC
508 kvm_vcpu_request_scan_ioapic(kvm); 502 kvm_vcpu_request_scan_ioapic(kvm);
509#endif
510} 503}
511#endif 504#endif
512 505
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3cee7b167052..f5283438ee05 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -124,15 +124,6 @@ int vcpu_load(struct kvm_vcpu *vcpu)
124 124
125 if (mutex_lock_killable(&vcpu->mutex)) 125 if (mutex_lock_killable(&vcpu->mutex))
126 return -EINTR; 126 return -EINTR;
127 if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
128 /* The thread running this VCPU changed. */
129 struct pid *oldpid = vcpu->pid;
130 struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
131 rcu_assign_pointer(vcpu->pid, newpid);
132 if (oldpid)
133 synchronize_rcu();
134 put_pid(oldpid);
135 }
136 cpu = get_cpu(); 127 cpu = get_cpu();
137 preempt_notifier_register(&vcpu->preempt_notifier); 128 preempt_notifier_register(&vcpu->preempt_notifier);
138 kvm_arch_vcpu_load(vcpu, cpu); 129 kvm_arch_vcpu_load(vcpu, cpu);
@@ -468,9 +459,6 @@ static struct kvm *kvm_create_vm(unsigned long type)
468 if (r) 459 if (r)
469 goto out_err_no_disable; 460 goto out_err_no_disable;
470 461
471#ifdef CONFIG_HAVE_KVM_IRQCHIP
472 INIT_HLIST_HEAD(&kvm->mask_notifier_list);
473#endif
474#ifdef CONFIG_HAVE_KVM_IRQFD 462#ifdef CONFIG_HAVE_KVM_IRQFD
475 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); 463 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
476#endif 464#endif
@@ -668,48 +656,46 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
668 return 0; 656 return 0;
669} 657}
670 658
671static int cmp_memslot(const void *slot1, const void *slot2)
672{
673 struct kvm_memory_slot *s1, *s2;
674
675 s1 = (struct kvm_memory_slot *)slot1;
676 s2 = (struct kvm_memory_slot *)slot2;
677
678 if (s1->npages < s2->npages)
679 return 1;
680 if (s1->npages > s2->npages)
681 return -1;
682
683 return 0;
684}
685
686/* 659/*
687 * Sort the memslots base on its size, so the larger slots 660 * Insert memslot and re-sort memslots based on their GFN,
688 * will get better fit. 661 * so binary search could be used to lookup GFN.
662 * Sorting algorithm takes advantage of having initially
663 * sorted array and known changed memslot position.
689 */ 664 */
690static void sort_memslots(struct kvm_memslots *slots)
691{
692 int i;
693
694 sort(slots->memslots, KVM_MEM_SLOTS_NUM,
695 sizeof(struct kvm_memory_slot), cmp_memslot, NULL);
696
697 for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
698 slots->id_to_index[slots->memslots[i].id] = i;
699}
700
701static void update_memslots(struct kvm_memslots *slots, 665static void update_memslots(struct kvm_memslots *slots,
702 struct kvm_memory_slot *new) 666 struct kvm_memory_slot *new)
703{ 667{
704 if (new) { 668 int id = new->id;
705 int id = new->id; 669 int i = slots->id_to_index[id];
706 struct kvm_memory_slot *old = id_to_memslot(slots, id); 670 struct kvm_memory_slot *mslots = slots->memslots;
707 unsigned long npages = old->npages;
708 671
709 *old = *new; 672 WARN_ON(mslots[i].id != id);
710 if (new->npages != npages) 673 if (!new->npages) {
711 sort_memslots(slots); 674 new->base_gfn = 0;
675 if (mslots[i].npages)
676 slots->used_slots--;
677 } else {
678 if (!mslots[i].npages)
679 slots->used_slots++;
712 } 680 }
681
682 while (i < KVM_MEM_SLOTS_NUM - 1 &&
683 new->base_gfn <= mslots[i + 1].base_gfn) {
684 if (!mslots[i + 1].npages)
685 break;
686 mslots[i] = mslots[i + 1];
687 slots->id_to_index[mslots[i].id] = i;
688 i++;
689 }
690 while (i > 0 &&
691 new->base_gfn > mslots[i - 1].base_gfn) {
692 mslots[i] = mslots[i - 1];
693 slots->id_to_index[mslots[i].id] = i;
694 i--;
695 }
696
697 mslots[i] = *new;
698 slots->id_to_index[mslots[i].id] = i;
713} 699}
714 700
715static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) 701static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
@@ -727,7 +713,7 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
727} 713}
728 714
729static struct kvm_memslots *install_new_memslots(struct kvm *kvm, 715static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
730 struct kvm_memslots *slots, struct kvm_memory_slot *new) 716 struct kvm_memslots *slots)
731{ 717{
732 struct kvm_memslots *old_memslots = kvm->memslots; 718 struct kvm_memslots *old_memslots = kvm->memslots;
733 719
@@ -738,7 +724,6 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
738 WARN_ON(old_memslots->generation & 1); 724 WARN_ON(old_memslots->generation & 1);
739 slots->generation = old_memslots->generation + 1; 725 slots->generation = old_memslots->generation + 1;
740 726
741 update_memslots(slots, new);
742 rcu_assign_pointer(kvm->memslots, slots); 727 rcu_assign_pointer(kvm->memslots, slots);
743 synchronize_srcu_expedited(&kvm->srcu); 728 synchronize_srcu_expedited(&kvm->srcu);
744 729
@@ -760,7 +745,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
760 * 745 *
761 * Discontiguous memory is allowed, mostly for framebuffers. 746 * Discontiguous memory is allowed, mostly for framebuffers.
762 * 747 *
763 * Must be called holding mmap_sem for write. 748 * Must be called holding kvm->slots_lock for write.
764 */ 749 */
765int __kvm_set_memory_region(struct kvm *kvm, 750int __kvm_set_memory_region(struct kvm *kvm,
766 struct kvm_userspace_memory_region *mem) 751 struct kvm_userspace_memory_region *mem)
@@ -866,15 +851,16 @@ int __kvm_set_memory_region(struct kvm *kvm,
866 goto out_free; 851 goto out_free;
867 } 852 }
868 853
854 slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
855 GFP_KERNEL);
856 if (!slots)
857 goto out_free;
858
869 if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { 859 if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
870 slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
871 GFP_KERNEL);
872 if (!slots)
873 goto out_free;
874 slot = id_to_memslot(slots, mem->slot); 860 slot = id_to_memslot(slots, mem->slot);
875 slot->flags |= KVM_MEMSLOT_INVALID; 861 slot->flags |= KVM_MEMSLOT_INVALID;
876 862
877 old_memslots = install_new_memslots(kvm, slots, NULL); 863 old_memslots = install_new_memslots(kvm, slots);
878 864
879 /* slot was deleted or moved, clear iommu mapping */ 865 /* slot was deleted or moved, clear iommu mapping */
880 kvm_iommu_unmap_pages(kvm, &old); 866 kvm_iommu_unmap_pages(kvm, &old);
@@ -886,6 +872,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
886 * - kvm_is_visible_gfn (mmu_check_roots) 872 * - kvm_is_visible_gfn (mmu_check_roots)
887 */ 873 */
888 kvm_arch_flush_shadow_memslot(kvm, slot); 874 kvm_arch_flush_shadow_memslot(kvm, slot);
875
876 /*
877 * We can re-use the old_memslots from above, the only difference
878 * from the currently installed memslots is the invalid flag. This
879 * will get overwritten by update_memslots anyway.
880 */
889 slots = old_memslots; 881 slots = old_memslots;
890 } 882 }
891 883
@@ -893,26 +885,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
893 if (r) 885 if (r)
894 goto out_slots; 886 goto out_slots;
895 887
896 r = -ENOMEM;
897 /*
898 * We can re-use the old_memslots from above, the only difference
899 * from the currently installed memslots is the invalid flag. This
900 * will get overwritten by update_memslots anyway.
901 */
902 if (!slots) {
903 slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
904 GFP_KERNEL);
905 if (!slots)
906 goto out_free;
907 }
908
909 /* actual memory is freed via old in kvm_free_physmem_slot below */ 888 /* actual memory is freed via old in kvm_free_physmem_slot below */
910 if (change == KVM_MR_DELETE) { 889 if (change == KVM_MR_DELETE) {
911 new.dirty_bitmap = NULL; 890 new.dirty_bitmap = NULL;
912 memset(&new.arch, 0, sizeof(new.arch)); 891 memset(&new.arch, 0, sizeof(new.arch));
913 } 892 }
914 893
915 old_memslots = install_new_memslots(kvm, slots, &new); 894 update_memslots(slots, &new);
895 old_memslots = install_new_memslots(kvm, slots);
916 896
917 kvm_arch_commit_memory_region(kvm, mem, &old, change); 897 kvm_arch_commit_memory_region(kvm, mem, &old, change);
918 898
@@ -1799,10 +1779,6 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target)
1799 rcu_read_unlock(); 1779 rcu_read_unlock();
1800 if (!task) 1780 if (!task)
1801 return ret; 1781 return ret;
1802 if (task->flags & PF_VCPU) {
1803 put_task_struct(task);
1804 return ret;
1805 }
1806 ret = yield_to(task, 1); 1782 ret = yield_to(task, 1);
1807 put_task_struct(task); 1783 put_task_struct(task);
1808 1784
@@ -2065,6 +2041,15 @@ static long kvm_vcpu_ioctl(struct file *filp,
2065 r = -EINVAL; 2041 r = -EINVAL;
2066 if (arg) 2042 if (arg)
2067 goto out; 2043 goto out;
2044 if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
2045 /* The thread running this VCPU changed. */
2046 struct pid *oldpid = vcpu->pid;
2047 struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
2048 rcu_assign_pointer(vcpu->pid, newpid);
2049 if (oldpid)
2050 synchronize_rcu();
2051 put_pid(oldpid);
2052 }
2068 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); 2053 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
2069 trace_kvm_userspace_exit(vcpu->run->exit_reason, r); 2054 trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
2070 break; 2055 break;
@@ -2599,8 +2584,6 @@ static long kvm_vm_ioctl(struct file *filp,
2599 break; 2584 break;
2600 default: 2585 default:
2601 r = kvm_arch_vm_ioctl(filp, ioctl, arg); 2586 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
2602 if (r == -ENOTTY)
2603 r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
2604 } 2587 }
2605out: 2588out:
2606 return r; 2589 return r;