diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-27 13:13:52 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-27 13:13:52 -0400 |
commit | 42cadc86008aae0fd9ff31642dc01ed50723cf32 (patch) | |
tree | b05d4c8f0561bad5a0183a89fb23ce4c8ee1653c | |
parent | fba5c1af5c4fd6645fe62ea84ccde0981282cf66 (diff) | |
parent | 66c0b394f08fd89236515c1c84485ea712a157be (diff) |
Merge branch 'kvm-updates-2.6.26' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm
* 'kvm-updates-2.6.26' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (147 commits)
KVM: kill file->f_count abuse in kvm
KVM: MMU: kvm_pv_mmu_op should not take mmap_sem
KVM: SVM: remove selective CR0 comment
KVM: SVM: remove now obsolete FIXME comment
KVM: SVM: disable CR8 intercept when tpr is not masking interrupts
KVM: SVM: sync V_TPR with LAPIC.TPR if CR8 write intercept is disabled
KVM: export kvm_lapic_set_tpr() to modules
KVM: SVM: sync TPR value to V_TPR field in the VMCB
KVM: ppc: PowerPC 440 KVM implementation
KVM: Add MAINTAINERS entry for PowerPC KVM
KVM: ppc: Add DCR access information to struct kvm_run
ppc: Export tlb_44x_hwater for KVM
KVM: Rename debugfs_dir to kvm_debugfs_dir
KVM: x86 emulator: fix lea to really get the effective address
KVM: x86 emulator: fix smsw and lmsw with a memory operand
KVM: x86 emulator: initialize src.val and dst.val for register operands
KVM: SVM: force a new asid when initializing the vmcb
KVM: fix kvm_vcpu_kick vs __vcpu_run race
KVM: add ioctls to save/store mpstate
KVM: Rename VCPU_MP_STATE_* to KVM_MP_STATE_*
...
119 files changed, 23723 insertions, 638 deletions
diff --git a/Documentation/ia64/kvm.txt b/Documentation/ia64/kvm.txt new file mode 100644 index 000000000000..bec9d815da33 --- /dev/null +++ b/Documentation/ia64/kvm.txt | |||
@@ -0,0 +1,82 @@ | |||
1 | Currently, kvm module in EXPERIMENTAL stage on IA64. This means that | ||
2 | interfaces are not stable enough to use. So, plase had better don't run | ||
3 | critical applications in virtual machine. We will try our best to make it | ||
4 | strong in future versions! | ||
5 | Guide: How to boot up guests on kvm/ia64 | ||
6 | |||
7 | This guide is to describe how to enable kvm support for IA-64 systems. | ||
8 | |||
9 | 1. Get the kvm source from git.kernel.org. | ||
10 | Userspace source: | ||
11 | git clone git://git.kernel.org/pub/scm/virt/kvm/kvm-userspace.git | ||
12 | Kernel Source: | ||
13 | git clone git://git.kernel.org/pub/scm/linux/kernel/git/xiantao/kvm-ia64.git | ||
14 | |||
15 | 2. Compile the source code. | ||
16 | 2.1 Compile userspace code: | ||
17 | (1)cd ./kvm-userspace | ||
18 | (2)./configure | ||
19 | (3)cd kernel | ||
20 | (4)make sync LINUX= $kernel_dir (kernel_dir is the directory of kernel source.) | ||
21 | (5)cd .. | ||
22 | (6)make qemu | ||
23 | (7)cd qemu; make install | ||
24 | |||
25 | 2.2 Compile kernel source code: | ||
26 | (1) cd ./$kernel_dir | ||
27 | (2) Make menuconfig | ||
28 | (3) Enter into virtualization option, and choose kvm. | ||
29 | (4) make | ||
30 | (5) Once (4) done, make modules_install | ||
31 | (6) Make initrd, and use new kernel to reboot up host machine. | ||
32 | (7) Once (6) done, cd $kernel_dir/arch/ia64/kvm | ||
33 | (8) insmod kvm.ko; insmod kvm-intel.ko | ||
34 | |||
35 | Note: For step 2, please make sure that host page size == TARGET_PAGE_SIZE of qemu, otherwise, may fail. | ||
36 | |||
37 | 3. Get Guest Firmware named as Flash.fd, and put it under right place: | ||
38 | (1) If you have the guest firmware (binary) released by Intel Corp for Xen, use it directly. | ||
39 | |||
40 | (2) If you have no firmware at hand, Please download its source from | ||
41 | hg clone http://xenbits.xensource.com/ext/efi-vfirmware.hg | ||
42 | you can get the firmware's binary in the directory of efi-vfirmware.hg/binaries. | ||
43 | |||
44 | (3) Rename the firware you owned to Flash.fd, and copy it to /usr/local/share/qemu | ||
45 | |||
46 | 4. Boot up Linux or Windows guests: | ||
47 | 4.1 Create or install a image for guest boot. If you have xen experience, it should be easy. | ||
48 | |||
49 | 4.2 Boot up guests use the following command. | ||
50 | /usr/local/bin/qemu-system-ia64 -smp xx -m 512 -hda $your_image | ||
51 | (xx is the number of virtual processors for the guest, now the maximum value is 4) | ||
52 | |||
53 | 5. Known possibile issue on some platforms with old Firmware. | ||
54 | |||
55 | If meet strange host crashe issues, try to solve it through either of the following ways: | ||
56 | |||
57 | (1): Upgrade your Firmware to the latest one. | ||
58 | |||
59 | (2): Applying the below patch to kernel source. | ||
60 | diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S | ||
61 | index 0b53344..f02b0f7 100644 | ||
62 | --- a/arch/ia64/kernel/pal.S | ||
63 | +++ b/arch/ia64/kernel/pal.S | ||
64 | @@ -84,7 +84,8 @@ GLOBAL_ENTRY(ia64_pal_call_static) | ||
65 | mov ar.pfs = loc1 | ||
66 | mov rp = loc0 | ||
67 | ;; | ||
68 | - srlz.d // seralize restoration of psr.l | ||
69 | + srlz.i // seralize restoration of psr.l | ||
70 | + ;; | ||
71 | br.ret.sptk.many b0 | ||
72 | END(ia64_pal_call_static) | ||
73 | |||
74 | 6. Bug report: | ||
75 | If you found any issues when use kvm/ia64, Please post the bug info to kvm-ia64-devel mailing list. | ||
76 | https://lists.sourceforge.net/lists/listinfo/kvm-ia64-devel/ | ||
77 | |||
78 | Thanks for your interest! Let's work together, and make kvm/ia64 stronger and stronger! | ||
79 | |||
80 | |||
81 | Xiantao Zhang <xiantao.zhang@intel.com> | ||
82 | 2008.3.10 | ||
diff --git a/Documentation/ioctl-number.txt b/Documentation/ioctl-number.txt index c18363bd8d11..240ce7a56c40 100644 --- a/Documentation/ioctl-number.txt +++ b/Documentation/ioctl-number.txt | |||
@@ -183,6 +183,8 @@ Code Seq# Include File Comments | |||
183 | 0xAC 00-1F linux/raw.h | 183 | 0xAC 00-1F linux/raw.h |
184 | 0xAD 00 Netfilter device in development: | 184 | 0xAD 00 Netfilter device in development: |
185 | <mailto:rusty@rustcorp.com.au> | 185 | <mailto:rusty@rustcorp.com.au> |
186 | 0xAE all linux/kvm.h Kernel-based Virtual Machine | ||
187 | <mailto:kvm-devel@lists.sourceforge.net> | ||
186 | 0xB0 all RATIO devices in development: | 188 | 0xB0 all RATIO devices in development: |
187 | <mailto:vgo@ratio.de> | 189 | <mailto:vgo@ratio.de> |
188 | 0xB1 00-1F PPPoX <mailto:mostrows@styx.uwaterloo.ca> | 190 | 0xB1 00-1F PPPoX <mailto:mostrows@styx.uwaterloo.ca> |
diff --git a/Documentation/powerpc/kvm_440.txt b/Documentation/powerpc/kvm_440.txt new file mode 100644 index 000000000000..c02a003fa03a --- /dev/null +++ b/Documentation/powerpc/kvm_440.txt | |||
@@ -0,0 +1,41 @@ | |||
1 | Hollis Blanchard <hollisb@us.ibm.com> | ||
2 | 15 Apr 2008 | ||
3 | |||
4 | Various notes on the implementation of KVM for PowerPC 440: | ||
5 | |||
6 | To enforce isolation, host userspace, guest kernel, and guest userspace all | ||
7 | run at user privilege level. Only the host kernel runs in supervisor mode. | ||
8 | Executing privileged instructions in the guest traps into KVM (in the host | ||
9 | kernel), where we decode and emulate them. Through this technique, unmodified | ||
10 | 440 Linux kernels can be run (slowly) as guests. Future performance work will | ||
11 | focus on reducing the overhead and frequency of these traps. | ||
12 | |||
13 | The usual code flow is started from userspace invoking an "run" ioctl, which | ||
14 | causes KVM to switch into guest context. We use IVPR to hijack the host | ||
15 | interrupt vectors while running the guest, which allows us to direct all | ||
16 | interrupts to kvmppc_handle_interrupt(). At this point, we could either | ||
17 | - handle the interrupt completely (e.g. emulate "mtspr SPRG0"), or | ||
18 | - let the host interrupt handler run (e.g. when the decrementer fires), or | ||
19 | - return to host userspace (e.g. when the guest performs device MMIO) | ||
20 | |||
21 | Address spaces: We take advantage of the fact that Linux doesn't use the AS=1 | ||
22 | address space (in host or guest), which gives us virtual address space to use | ||
23 | for guest mappings. While the guest is running, the host kernel remains mapped | ||
24 | in AS=0, but the guest can only use AS=1 mappings. | ||
25 | |||
26 | TLB entries: The TLB entries covering the host linear mapping remain | ||
27 | present while running the guest. This reduces the overhead of lightweight | ||
28 | exits, which are handled by KVM running in the host kernel. We keep three | ||
29 | copies of the TLB: | ||
30 | - guest TLB: contents of the TLB as the guest sees it | ||
31 | - shadow TLB: the TLB that is actually in hardware while guest is running | ||
32 | - host TLB: to restore TLB state when context switching guest -> host | ||
33 | When a TLB miss occurs because a mapping was not present in the shadow TLB, | ||
34 | but was present in the guest TLB, KVM handles the fault without invoking the | ||
35 | guest. Large guest pages are backed by multiple 4KB shadow pages through this | ||
36 | mechanism. | ||
37 | |||
38 | IO: MMIO and DCR accesses are emulated by userspace. We use virtio for network | ||
39 | and block IO, so those drivers must be enabled in the guest. It's possible | ||
40 | that some qemu device emulation (e.g. e1000 or rtl8139) may also work with | ||
41 | little effort. | ||
diff --git a/Documentation/s390/kvm.txt b/Documentation/s390/kvm.txt new file mode 100644 index 000000000000..6f5ceb0f09fc --- /dev/null +++ b/Documentation/s390/kvm.txt | |||
@@ -0,0 +1,125 @@ | |||
1 | *** BIG FAT WARNING *** | ||
2 | The kvm module is currently in EXPERIMENTAL state for s390. This means that | ||
3 | the interface to the module is not yet considered to remain stable. Thus, be | ||
4 | prepared that we keep breaking your userspace application and guest | ||
5 | compatibility over and over again until we feel happy with the result. Make sure | ||
6 | your guest kernel, your host kernel, and your userspace launcher are in a | ||
7 | consistent state. | ||
8 | |||
9 | This Documentation describes the unique ioctl calls to /dev/kvm, the resulting | ||
10 | kvm-vm file descriptors, and the kvm-vcpu file descriptors that differ from x86. | ||
11 | |||
12 | 1. ioctl calls to /dev/kvm | ||
13 | KVM does support the following ioctls on s390 that are common with other | ||
14 | architectures and do behave the same: | ||
15 | KVM_GET_API_VERSION | ||
16 | KVM_CREATE_VM (*) see note | ||
17 | KVM_CHECK_EXTENSION | ||
18 | KVM_GET_VCPU_MMAP_SIZE | ||
19 | |||
20 | Notes: | ||
21 | * KVM_CREATE_VM may fail on s390, if the calling process has multiple | ||
22 | threads and has not called KVM_S390_ENABLE_SIE before. | ||
23 | |||
24 | In addition, on s390 the following architecture specific ioctls are supported: | ||
25 | ioctl: KVM_S390_ENABLE_SIE | ||
26 | args: none | ||
27 | see also: include/linux/kvm.h | ||
28 | This call causes the kernel to switch on PGSTE in the user page table. This | ||
29 | operation is needed in order to run a virtual machine, and it requires the | ||
30 | calling process to be single-threaded. Note that the first call to KVM_CREATE_VM | ||
31 | will implicitly try to switch on PGSTE if the user process has not called | ||
32 | KVM_S390_ENABLE_SIE before. User processes that want to launch multiple threads | ||
33 | before creating a virtual machine have to call KVM_S390_ENABLE_SIE, or will | ||
34 | observe an error calling KVM_CREATE_VM. Switching on PGSTE is a one-time | ||
35 | operation, is not reversible, and will persist over the entire lifetime of | ||
36 | the calling process. It does not have any user-visible effect other than a small | ||
37 | performance penalty. | ||
38 | |||
39 | 2. ioctl calls to the kvm-vm file descriptor | ||
40 | KVM does support the following ioctls on s390 that are common with other | ||
41 | architectures and do behave the same: | ||
42 | KVM_CREATE_VCPU | ||
43 | KVM_SET_USER_MEMORY_REGION (*) see note | ||
44 | KVM_GET_DIRTY_LOG (**) see note | ||
45 | |||
46 | Notes: | ||
47 | * kvm does only allow exactly one memory slot on s390, which has to start | ||
48 | at guest absolute address zero and at a user address that is aligned on any | ||
49 | page boundary. This hardware "limitation" allows us to have a few unique | ||
50 | optimizations. The memory slot doesn't have to be filled | ||
51 | with memory actually, it may contain sparse holes. That said, with different | ||
52 | user memory layout this does still allow a large flexibility when | ||
53 | doing the guest memory setup. | ||
54 | ** KVM_GET_DIRTY_LOG doesn't work properly yet. The user will receive an empty | ||
55 | log. This ioctl call is only needed for guest migration, and we intend to | ||
56 | implement this one in the future. | ||
57 | |||
58 | In addition, on s390 the following architecture specific ioctls for the kvm-vm | ||
59 | file descriptor are supported: | ||
60 | ioctl: KVM_S390_INTERRUPT | ||
61 | args: struct kvm_s390_interrupt * | ||
62 | see also: include/linux/kvm.h | ||
63 | This ioctl is used to submit a floating interrupt for a virtual machine. | ||
64 | Floating interrupts may be delivered to any virtual cpu in the configuration. | ||
65 | Only some interrupt types defined in include/linux/kvm.h make sense when | ||
66 | submitted as floating interrupts. The following interrupts are not considered | ||
67 | to be useful as floating interrupts, and a call to inject them will result in | ||
68 | -EINVAL error code: program interrupts and interprocessor signals. Valid | ||
69 | floating interrupts are: | ||
70 | KVM_S390_INT_VIRTIO | ||
71 | KVM_S390_INT_SERVICE | ||
72 | |||
73 | 3. ioctl calls to the kvm-vcpu file descriptor | ||
74 | KVM does support the following ioctls on s390 that are common with other | ||
75 | architectures and do behave the same: | ||
76 | KVM_RUN | ||
77 | KVM_GET_REGS | ||
78 | KVM_SET_REGS | ||
79 | KVM_GET_SREGS | ||
80 | KVM_SET_SREGS | ||
81 | KVM_GET_FPU | ||
82 | KVM_SET_FPU | ||
83 | |||
84 | In addition, on s390 the following architecture specific ioctls for the | ||
85 | kvm-vcpu file descriptor are supported: | ||
86 | ioctl: KVM_S390_INTERRUPT | ||
87 | args: struct kvm_s390_interrupt * | ||
88 | see also: include/linux/kvm.h | ||
89 | This ioctl is used to submit an interrupt for a specific virtual cpu. | ||
90 | Only some interrupt types defined in include/linux/kvm.h make sense when | ||
91 | submitted for a specific cpu. The following interrupts are not considered | ||
92 | to be useful, and a call to inject them will result in -EINVAL error code: | ||
93 | service processor calls and virtio interrupts. Valid interrupt types are: | ||
94 | KVM_S390_PROGRAM_INT | ||
95 | KVM_S390_SIGP_STOP | ||
96 | KVM_S390_RESTART | ||
97 | KVM_S390_SIGP_SET_PREFIX | ||
98 | KVM_S390_INT_EMERGENCY | ||
99 | |||
100 | ioctl: KVM_S390_STORE_STATUS | ||
101 | args: unsigned long | ||
102 | see also: include/linux/kvm.h | ||
103 | This ioctl stores the state of the cpu at the guest real address given as | ||
104 | argument, unless one of the following values defined in include/linux/kvm.h | ||
105 | is given as arguement: | ||
106 | KVM_S390_STORE_STATUS_NOADDR - the CPU stores its status to the save area in | ||
107 | absolute lowcore as defined by the principles of operation | ||
108 | KVM_S390_STORE_STATUS_PREFIXED - the CPU stores its status to the save area in | ||
109 | its prefix page just like the dump tool that comes with zipl. This is useful | ||
110 | to create a system dump for use with lkcdutils or crash. | ||
111 | |||
112 | ioctl: KVM_S390_SET_INITIAL_PSW | ||
113 | args: struct kvm_s390_psw * | ||
114 | see also: include/linux/kvm.h | ||
115 | This ioctl can be used to set the processor status word (psw) of a stopped cpu | ||
116 | prior to running it with KVM_RUN. Note that this call is not required to modify | ||
117 | the psw during sie intercepts that fall back to userspace because struct kvm_run | ||
118 | does contain the psw, and this value is evaluated during reentry of KVM_RUN | ||
119 | after the intercept exit was recognized. | ||
120 | |||
121 | ioctl: KVM_S390_INITIAL_RESET | ||
122 | args: none | ||
123 | see also: include/linux/kvm.h | ||
124 | This ioctl can be used to perform an initial cpu reset as defined by the | ||
125 | principles of operation. The target cpu has to be in stopped state. | ||
diff --git a/MAINTAINERS b/MAINTAINERS index a942f3852499..c1dd1ae7b133 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -2329,6 +2329,13 @@ L: kvm-devel@lists.sourceforge.net | |||
2329 | W: kvm.sourceforge.net | 2329 | W: kvm.sourceforge.net |
2330 | S: Supported | 2330 | S: Supported |
2331 | 2331 | ||
2332 | KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC | ||
2333 | P: Hollis Blanchard | ||
2334 | M: hollisb@us.ibm.com | ||
2335 | L: kvm-ppc-devel@lists.sourceforge.net | ||
2336 | W: kvm.sourceforge.net | ||
2337 | S: Supported | ||
2338 | |||
2332 | KERNEL VIRTUAL MACHINE For Itanium(KVM/IA64) | 2339 | KERNEL VIRTUAL MACHINE For Itanium(KVM/IA64) |
2333 | P: Anthony Xu | 2340 | P: Anthony Xu |
2334 | M: anthony.xu@intel.com | 2341 | M: anthony.xu@intel.com |
@@ -2338,6 +2345,16 @@ L: kvm-ia64-devel@lists.sourceforge.net | |||
2338 | W: kvm.sourceforge.net | 2345 | W: kvm.sourceforge.net |
2339 | S: Supported | 2346 | S: Supported |
2340 | 2347 | ||
2348 | KERNEL VIRTUAL MACHINE for s390 (KVM/s390) | ||
2349 | P: Carsten Otte | ||
2350 | M: cotte@de.ibm.com | ||
2351 | P: Christian Borntraeger | ||
2352 | M: borntraeger@de.ibm.com | ||
2353 | M: linux390@de.ibm.com | ||
2354 | L: linux-s390@vger.kernel.org | ||
2355 | W: http://www.ibm.com/developerworks/linux/linux390/ | ||
2356 | S: Supported | ||
2357 | |||
2341 | KEXEC | 2358 | KEXEC |
2342 | P: Eric Biederman | 2359 | P: Eric Biederman |
2343 | M: ebiederm@xmission.com | 2360 | M: ebiederm@xmission.com |
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index cd13e138bd03..3aa6c821449a 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig | |||
@@ -19,6 +19,7 @@ config IA64 | |||
19 | select HAVE_OPROFILE | 19 | select HAVE_OPROFILE |
20 | select HAVE_KPROBES | 20 | select HAVE_KPROBES |
21 | select HAVE_KRETPROBES | 21 | select HAVE_KRETPROBES |
22 | select HAVE_KVM | ||
22 | default y | 23 | default y |
23 | help | 24 | help |
24 | The Itanium Processor Family is Intel's 64-bit successor to | 25 | The Itanium Processor Family is Intel's 64-bit successor to |
@@ -589,6 +590,8 @@ config MSPEC | |||
589 | 590 | ||
590 | source "fs/Kconfig" | 591 | source "fs/Kconfig" |
591 | 592 | ||
593 | source "arch/ia64/kvm/Kconfig" | ||
594 | |||
592 | source "lib/Kconfig" | 595 | source "lib/Kconfig" |
593 | 596 | ||
594 | # | 597 | # |
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index f1645c4f7039..ec4cca477f49 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile | |||
@@ -57,6 +57,7 @@ core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/ | |||
57 | core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/ | 57 | core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/ |
58 | core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/ | 58 | core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/ |
59 | core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/ | 59 | core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/ |
60 | core-$(CONFIG_KVM) += arch/ia64/kvm/ | ||
60 | 61 | ||
61 | drivers-$(CONFIG_PCI) += arch/ia64/pci/ | 62 | drivers-$(CONFIG_PCI) += arch/ia64/pci/ |
62 | drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/ | 63 | drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/ |
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig new file mode 100644 index 000000000000..7914e4828504 --- /dev/null +++ b/arch/ia64/kvm/Kconfig | |||
@@ -0,0 +1,49 @@ | |||
1 | # | ||
2 | # KVM configuration | ||
3 | # | ||
4 | config HAVE_KVM | ||
5 | bool | ||
6 | |||
7 | menuconfig VIRTUALIZATION | ||
8 | bool "Virtualization" | ||
9 | depends on HAVE_KVM || IA64 | ||
10 | default y | ||
11 | ---help--- | ||
12 | Say Y here to get to see options for using your Linux host to run other | ||
13 | operating systems inside virtual machines (guests). | ||
14 | This option alone does not add any kernel code. | ||
15 | |||
16 | If you say N, all options in this submenu will be skipped and disabled. | ||
17 | |||
18 | if VIRTUALIZATION | ||
19 | |||
20 | config KVM | ||
21 | tristate "Kernel-based Virtual Machine (KVM) support" | ||
22 | depends on HAVE_KVM && EXPERIMENTAL | ||
23 | select PREEMPT_NOTIFIERS | ||
24 | select ANON_INODES | ||
25 | ---help--- | ||
26 | Support hosting fully virtualized guest machines using hardware | ||
27 | virtualization extensions. You will need a fairly recent | ||
28 | processor equipped with virtualization extensions. You will also | ||
29 | need to select one or more of the processor modules below. | ||
30 | |||
31 | This module provides access to the hardware capabilities through | ||
32 | a character device node named /dev/kvm. | ||
33 | |||
34 | To compile this as a module, choose M here: the module | ||
35 | will be called kvm. | ||
36 | |||
37 | If unsure, say N. | ||
38 | |||
39 | config KVM_INTEL | ||
40 | tristate "KVM for Intel Itanium 2 processors support" | ||
41 | depends on KVM && m | ||
42 | ---help--- | ||
43 | Provides support for KVM on Itanium 2 processors equipped with the VT | ||
44 | extensions. | ||
45 | |||
46 | config KVM_TRACE | ||
47 | bool | ||
48 | |||
49 | endif # VIRTUALIZATION | ||
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile new file mode 100644 index 000000000000..41b034ffa73b --- /dev/null +++ b/arch/ia64/kvm/Makefile | |||
@@ -0,0 +1,61 @@ | |||
1 | #This Make file is to generate asm-offsets.h and build source. | ||
2 | # | ||
3 | |||
4 | #Generate asm-offsets.h for vmm module build | ||
5 | offsets-file := asm-offsets.h | ||
6 | |||
7 | always := $(offsets-file) | ||
8 | targets := $(offsets-file) | ||
9 | targets += arch/ia64/kvm/asm-offsets.s | ||
10 | clean-files := $(addprefix $(objtree)/,$(targets) $(obj)/memcpy.S $(obj)/memset.S) | ||
11 | |||
12 | # Default sed regexp - multiline due to syntax constraints | ||
13 | define sed-y | ||
14 | "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}" | ||
15 | endef | ||
16 | |||
17 | quiet_cmd_offsets = GEN $@ | ||
18 | define cmd_offsets | ||
19 | (set -e; \ | ||
20 | echo "#ifndef __ASM_KVM_OFFSETS_H__"; \ | ||
21 | echo "#define __ASM_KVM_OFFSETS_H__"; \ | ||
22 | echo "/*"; \ | ||
23 | echo " * DO NOT MODIFY."; \ | ||
24 | echo " *"; \ | ||
25 | echo " * This file was generated by Makefile"; \ | ||
26 | echo " *"; \ | ||
27 | echo " */"; \ | ||
28 | echo ""; \ | ||
29 | sed -ne $(sed-y) $<; \ | ||
30 | echo ""; \ | ||
31 | echo "#endif" ) > $@ | ||
32 | endef | ||
33 | # We use internal rules to avoid the "is up to date" message from make | ||
34 | arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c | ||
35 | $(call if_changed_dep,cc_s_c) | ||
36 | |||
37 | $(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s | ||
38 | $(call cmd,offsets) | ||
39 | |||
40 | # | ||
41 | # Makefile for Kernel-based Virtual Machine module | ||
42 | # | ||
43 | |||
44 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ | ||
45 | |||
46 | $(addprefix $(objtree)/,$(obj)/memcpy.S $(obj)/memset.S): | ||
47 | $(shell ln -snf ../lib/memcpy.S $(src)/memcpy.S) | ||
48 | $(shell ln -snf ../lib/memset.S $(src)/memset.S) | ||
49 | |||
50 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) | ||
51 | |||
52 | kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o | ||
53 | obj-$(CONFIG_KVM) += kvm.o | ||
54 | |||
55 | FORCE : $(obj)/$(offsets-file) | ||
56 | EXTRA_CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127 | ||
57 | kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \ | ||
58 | vtlb.o process.o | ||
59 | #Add link memcpy and memset to avoid possible structure assignment error | ||
60 | kvm-intel-objs += memset.o memcpy.o | ||
61 | obj-$(CONFIG_KVM_INTEL) += kvm-intel.o | ||
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c new file mode 100644 index 000000000000..4e3dc13a619c --- /dev/null +++ b/arch/ia64/kvm/asm-offsets.c | |||
@@ -0,0 +1,251 @@ | |||
1 | /* | ||
2 | * asm-offsets.c Generate definitions needed by assembly language modules. | ||
3 | * This code generates raw asm output which is post-processed | ||
4 | * to extract and format the required data. | ||
5 | * | ||
6 | * Anthony Xu <anthony.xu@intel.com> | ||
7 | * Xiantao Zhang <xiantao.zhang@intel.com> | ||
8 | * Copyright (c) 2007 Intel Corporation KVM support. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify it | ||
11 | * under the terms and conditions of the GNU General Public License, | ||
12 | * version 2, as published by the Free Software Foundation. | ||
13 | * | ||
14 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
15 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
17 | * more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along with | ||
20 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
21 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #include <linux/autoconf.h> | ||
26 | #include <linux/kvm_host.h> | ||
27 | |||
28 | #include "vcpu.h" | ||
29 | |||
30 | #define task_struct kvm_vcpu | ||
31 | |||
32 | #define DEFINE(sym, val) \ | ||
33 | asm volatile("\n->" #sym " (%0) " #val : : "i" (val)) | ||
34 | |||
35 | #define BLANK() asm volatile("\n->" : :) | ||
36 | |||
37 | #define OFFSET(_sym, _str, _mem) \ | ||
38 | DEFINE(_sym, offsetof(_str, _mem)); | ||
39 | |||
40 | void foo(void) | ||
41 | { | ||
42 | DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu)); | ||
43 | DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs)); | ||
44 | |||
45 | BLANK(); | ||
46 | |||
47 | DEFINE(VMM_VCPU_META_RR0_OFFSET, | ||
48 | offsetof(struct kvm_vcpu, arch.metaphysical_rr0)); | ||
49 | DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET, | ||
50 | offsetof(struct kvm_vcpu, | ||
51 | arch.metaphysical_saved_rr0)); | ||
52 | DEFINE(VMM_VCPU_VRR0_OFFSET, | ||
53 | offsetof(struct kvm_vcpu, arch.vrr[0])); | ||
54 | DEFINE(VMM_VPD_IRR0_OFFSET, | ||
55 | offsetof(struct vpd, irr[0])); | ||
56 | DEFINE(VMM_VCPU_ITC_CHECK_OFFSET, | ||
57 | offsetof(struct kvm_vcpu, arch.itc_check)); | ||
58 | DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET, | ||
59 | offsetof(struct kvm_vcpu, arch.irq_check)); | ||
60 | DEFINE(VMM_VPD_VHPI_OFFSET, | ||
61 | offsetof(struct vpd, vhpi)); | ||
62 | DEFINE(VMM_VCPU_VSA_BASE_OFFSET, | ||
63 | offsetof(struct kvm_vcpu, arch.vsa_base)); | ||
64 | DEFINE(VMM_VCPU_VPD_OFFSET, | ||
65 | offsetof(struct kvm_vcpu, arch.vpd)); | ||
66 | DEFINE(VMM_VCPU_IRQ_CHECK, | ||
67 | offsetof(struct kvm_vcpu, arch.irq_check)); | ||
68 | DEFINE(VMM_VCPU_TIMER_PENDING, | ||
69 | offsetof(struct kvm_vcpu, arch.timer_pending)); | ||
70 | DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET, | ||
71 | offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0)); | ||
72 | DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET, | ||
73 | offsetof(struct kvm_vcpu, arch.mode_flags)); | ||
74 | DEFINE(VMM_VCPU_ITC_OFS_OFFSET, | ||
75 | offsetof(struct kvm_vcpu, arch.itc_offset)); | ||
76 | DEFINE(VMM_VCPU_LAST_ITC_OFFSET, | ||
77 | offsetof(struct kvm_vcpu, arch.last_itc)); | ||
78 | DEFINE(VMM_VCPU_SAVED_GP_OFFSET, | ||
79 | offsetof(struct kvm_vcpu, arch.saved_gp)); | ||
80 | |||
81 | BLANK(); | ||
82 | |||
83 | DEFINE(VMM_PT_REGS_B6_OFFSET, | ||
84 | offsetof(struct kvm_pt_regs, b6)); | ||
85 | DEFINE(VMM_PT_REGS_B7_OFFSET, | ||
86 | offsetof(struct kvm_pt_regs, b7)); | ||
87 | DEFINE(VMM_PT_REGS_AR_CSD_OFFSET, | ||
88 | offsetof(struct kvm_pt_regs, ar_csd)); | ||
89 | DEFINE(VMM_PT_REGS_AR_SSD_OFFSET, | ||
90 | offsetof(struct kvm_pt_regs, ar_ssd)); | ||
91 | DEFINE(VMM_PT_REGS_R8_OFFSET, | ||
92 | offsetof(struct kvm_pt_regs, r8)); | ||
93 | DEFINE(VMM_PT_REGS_R9_OFFSET, | ||
94 | offsetof(struct kvm_pt_regs, r9)); | ||
95 | DEFINE(VMM_PT_REGS_R10_OFFSET, | ||
96 | offsetof(struct kvm_pt_regs, r10)); | ||
97 | DEFINE(VMM_PT_REGS_R11_OFFSET, | ||
98 | offsetof(struct kvm_pt_regs, r11)); | ||
99 | DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET, | ||
100 | offsetof(struct kvm_pt_regs, cr_ipsr)); | ||
101 | DEFINE(VMM_PT_REGS_CR_IIP_OFFSET, | ||
102 | offsetof(struct kvm_pt_regs, cr_iip)); | ||
103 | DEFINE(VMM_PT_REGS_CR_IFS_OFFSET, | ||
104 | offsetof(struct kvm_pt_regs, cr_ifs)); | ||
105 | DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET, | ||
106 | offsetof(struct kvm_pt_regs, ar_unat)); | ||
107 | DEFINE(VMM_PT_REGS_AR_PFS_OFFSET, | ||
108 | offsetof(struct kvm_pt_regs, ar_pfs)); | ||
109 | DEFINE(VMM_PT_REGS_AR_RSC_OFFSET, | ||
110 | offsetof(struct kvm_pt_regs, ar_rsc)); | ||
111 | DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET, | ||
112 | offsetof(struct kvm_pt_regs, ar_rnat)); | ||
113 | |||
114 | DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET, | ||
115 | offsetof(struct kvm_pt_regs, ar_bspstore)); | ||
116 | DEFINE(VMM_PT_REGS_PR_OFFSET, | ||
117 | offsetof(struct kvm_pt_regs, pr)); | ||
118 | DEFINE(VMM_PT_REGS_B0_OFFSET, | ||
119 | offsetof(struct kvm_pt_regs, b0)); | ||
120 | DEFINE(VMM_PT_REGS_LOADRS_OFFSET, | ||
121 | offsetof(struct kvm_pt_regs, loadrs)); | ||
122 | DEFINE(VMM_PT_REGS_R1_OFFSET, | ||
123 | offsetof(struct kvm_pt_regs, r1)); | ||
124 | DEFINE(VMM_PT_REGS_R12_OFFSET, | ||
125 | offsetof(struct kvm_pt_regs, r12)); | ||
126 | DEFINE(VMM_PT_REGS_R13_OFFSET, | ||
127 | offsetof(struct kvm_pt_regs, r13)); | ||
128 | DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET, | ||
129 | offsetof(struct kvm_pt_regs, ar_fpsr)); | ||
130 | DEFINE(VMM_PT_REGS_R15_OFFSET, | ||
131 | offsetof(struct kvm_pt_regs, r15)); | ||
132 | DEFINE(VMM_PT_REGS_R14_OFFSET, | ||
133 | offsetof(struct kvm_pt_regs, r14)); | ||
134 | DEFINE(VMM_PT_REGS_R2_OFFSET, | ||
135 | offsetof(struct kvm_pt_regs, r2)); | ||
136 | DEFINE(VMM_PT_REGS_R3_OFFSET, | ||
137 | offsetof(struct kvm_pt_regs, r3)); | ||
138 | DEFINE(VMM_PT_REGS_R16_OFFSET, | ||
139 | offsetof(struct kvm_pt_regs, r16)); | ||
140 | DEFINE(VMM_PT_REGS_R17_OFFSET, | ||
141 | offsetof(struct kvm_pt_regs, r17)); | ||
142 | DEFINE(VMM_PT_REGS_R18_OFFSET, | ||
143 | offsetof(struct kvm_pt_regs, r18)); | ||
144 | DEFINE(VMM_PT_REGS_R19_OFFSET, | ||
145 | offsetof(struct kvm_pt_regs, r19)); | ||
146 | DEFINE(VMM_PT_REGS_R20_OFFSET, | ||
147 | offsetof(struct kvm_pt_regs, r20)); | ||
148 | DEFINE(VMM_PT_REGS_R21_OFFSET, | ||
149 | offsetof(struct kvm_pt_regs, r21)); | ||
150 | DEFINE(VMM_PT_REGS_R22_OFFSET, | ||
151 | offsetof(struct kvm_pt_regs, r22)); | ||
152 | DEFINE(VMM_PT_REGS_R23_OFFSET, | ||
153 | offsetof(struct kvm_pt_regs, r23)); | ||
154 | DEFINE(VMM_PT_REGS_R24_OFFSET, | ||
155 | offsetof(struct kvm_pt_regs, r24)); | ||
156 | DEFINE(VMM_PT_REGS_R25_OFFSET, | ||
157 | offsetof(struct kvm_pt_regs, r25)); | ||
158 | DEFINE(VMM_PT_REGS_R26_OFFSET, | ||
159 | offsetof(struct kvm_pt_regs, r26)); | ||
160 | DEFINE(VMM_PT_REGS_R27_OFFSET, | ||
161 | offsetof(struct kvm_pt_regs, r27)); | ||
162 | DEFINE(VMM_PT_REGS_R28_OFFSET, | ||
163 | offsetof(struct kvm_pt_regs, r28)); | ||
164 | DEFINE(VMM_PT_REGS_R29_OFFSET, | ||
165 | offsetof(struct kvm_pt_regs, r29)); | ||
166 | DEFINE(VMM_PT_REGS_R30_OFFSET, | ||
167 | offsetof(struct kvm_pt_regs, r30)); | ||
168 | DEFINE(VMM_PT_REGS_R31_OFFSET, | ||
169 | offsetof(struct kvm_pt_regs, r31)); | ||
170 | DEFINE(VMM_PT_REGS_AR_CCV_OFFSET, | ||
171 | offsetof(struct kvm_pt_regs, ar_ccv)); | ||
172 | DEFINE(VMM_PT_REGS_F6_OFFSET, | ||
173 | offsetof(struct kvm_pt_regs, f6)); | ||
174 | DEFINE(VMM_PT_REGS_F7_OFFSET, | ||
175 | offsetof(struct kvm_pt_regs, f7)); | ||
176 | DEFINE(VMM_PT_REGS_F8_OFFSET, | ||
177 | offsetof(struct kvm_pt_regs, f8)); | ||
178 | DEFINE(VMM_PT_REGS_F9_OFFSET, | ||
179 | offsetof(struct kvm_pt_regs, f9)); | ||
180 | DEFINE(VMM_PT_REGS_F10_OFFSET, | ||
181 | offsetof(struct kvm_pt_regs, f10)); | ||
182 | DEFINE(VMM_PT_REGS_F11_OFFSET, | ||
183 | offsetof(struct kvm_pt_regs, f11)); | ||
184 | DEFINE(VMM_PT_REGS_R4_OFFSET, | ||
185 | offsetof(struct kvm_pt_regs, r4)); | ||
186 | DEFINE(VMM_PT_REGS_R5_OFFSET, | ||
187 | offsetof(struct kvm_pt_regs, r5)); | ||
188 | DEFINE(VMM_PT_REGS_R6_OFFSET, | ||
189 | offsetof(struct kvm_pt_regs, r6)); | ||
190 | DEFINE(VMM_PT_REGS_R7_OFFSET, | ||
191 | offsetof(struct kvm_pt_regs, r7)); | ||
192 | DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET, | ||
193 | offsetof(struct kvm_pt_regs, eml_unat)); | ||
194 | DEFINE(VMM_VCPU_IIPA_OFFSET, | ||
195 | offsetof(struct kvm_vcpu, arch.cr_iipa)); | ||
196 | DEFINE(VMM_VCPU_OPCODE_OFFSET, | ||
197 | offsetof(struct kvm_vcpu, arch.opcode)); | ||
198 | DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause)); | ||
199 | DEFINE(VMM_VCPU_ISR_OFFSET, | ||
200 | offsetof(struct kvm_vcpu, arch.cr_isr)); | ||
201 | DEFINE(VMM_PT_REGS_R16_SLOT, | ||
202 | (((offsetof(struct kvm_pt_regs, r16) | ||
203 | - sizeof(struct kvm_pt_regs)) >> 3) & 0x3f)); | ||
204 | DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET, | ||
205 | offsetof(struct kvm_vcpu, arch.mode_flags)); | ||
206 | DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp)); | ||
207 | BLANK(); | ||
208 | |||
209 | DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd)); | ||
210 | DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs)); | ||
211 | DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET, | ||
212 | offsetof(struct kvm_vcpu, arch.insvc[0])); | ||
213 | DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta)); | ||
214 | DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr)); | ||
215 | |||
216 | DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4])); | ||
217 | DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5])); | ||
218 | DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12])); | ||
219 | DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13])); | ||
220 | DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0])); | ||
221 | DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1])); | ||
222 | DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0])); | ||
223 | DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1])); | ||
224 | DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2])); | ||
225 | DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0])); | ||
226 | DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16])); | ||
227 | DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18])); | ||
228 | DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19])); | ||
229 | DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21])); | ||
230 | DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24])); | ||
231 | DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27])); | ||
232 | DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28])); | ||
233 | DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29])); | ||
234 | DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30])); | ||
235 | DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36])); | ||
236 | DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40])); | ||
237 | DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64])); | ||
238 | DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65])); | ||
239 | DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0])); | ||
240 | DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2])); | ||
241 | DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8])); | ||
242 | DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0])); | ||
243 | DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0])); | ||
244 | DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2])); | ||
245 | DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3])); | ||
246 | DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32])); | ||
247 | DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33])); | ||
248 | DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0])); | ||
249 | DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr)); | ||
250 | BLANK(); | ||
251 | } | ||
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c new file mode 100644 index 000000000000..6df073240135 --- /dev/null +++ b/arch/ia64/kvm/kvm-ia64.c | |||
@@ -0,0 +1,1806 @@ | |||
1 | |||
2 | /* | ||
3 | * kvm_ia64.c: Basic KVM suppport On Itanium series processors | ||
4 | * | ||
5 | * | ||
6 | * Copyright (C) 2007, Intel Corporation. | ||
7 | * Xiantao Zhang (xiantao.zhang@intel.com) | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | ||
10 | * under the terms and conditions of the GNU General Public License, | ||
11 | * version 2, as published by the Free Software Foundation. | ||
12 | * | ||
13 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
14 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
15 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
16 | * more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License along with | ||
19 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
20 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/module.h> | ||
25 | #include <linux/errno.h> | ||
26 | #include <linux/percpu.h> | ||
27 | #include <linux/gfp.h> | ||
28 | #include <linux/fs.h> | ||
29 | #include <linux/smp.h> | ||
30 | #include <linux/kvm_host.h> | ||
31 | #include <linux/kvm.h> | ||
32 | #include <linux/bitops.h> | ||
33 | #include <linux/hrtimer.h> | ||
34 | #include <linux/uaccess.h> | ||
35 | |||
36 | #include <asm/pgtable.h> | ||
37 | #include <asm/gcc_intrin.h> | ||
38 | #include <asm/pal.h> | ||
39 | #include <asm/cacheflush.h> | ||
40 | #include <asm/div64.h> | ||
41 | #include <asm/tlb.h> | ||
42 | |||
43 | #include "misc.h" | ||
44 | #include "vti.h" | ||
45 | #include "iodev.h" | ||
46 | #include "ioapic.h" | ||
47 | #include "lapic.h" | ||
48 | |||
49 | static unsigned long kvm_vmm_base; | ||
50 | static unsigned long kvm_vsa_base; | ||
51 | static unsigned long kvm_vm_buffer; | ||
52 | static unsigned long kvm_vm_buffer_size; | ||
53 | unsigned long kvm_vmm_gp; | ||
54 | |||
55 | static long vp_env_info; | ||
56 | |||
57 | static struct kvm_vmm_info *kvm_vmm_info; | ||
58 | |||
59 | static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu); | ||
60 | |||
61 | struct kvm_stats_debugfs_item debugfs_entries[] = { | ||
62 | { NULL } | ||
63 | }; | ||
64 | |||
65 | |||
66 | struct fdesc{ | ||
67 | unsigned long ip; | ||
68 | unsigned long gp; | ||
69 | }; | ||
70 | |||
71 | static void kvm_flush_icache(unsigned long start, unsigned long len) | ||
72 | { | ||
73 | int l; | ||
74 | |||
75 | for (l = 0; l < (len + 32); l += 32) | ||
76 | ia64_fc(start + l); | ||
77 | |||
78 | ia64_sync_i(); | ||
79 | ia64_srlz_i(); | ||
80 | } | ||
81 | |||
82 | static void kvm_flush_tlb_all(void) | ||
83 | { | ||
84 | unsigned long i, j, count0, count1, stride0, stride1, addr; | ||
85 | long flags; | ||
86 | |||
87 | addr = local_cpu_data->ptce_base; | ||
88 | count0 = local_cpu_data->ptce_count[0]; | ||
89 | count1 = local_cpu_data->ptce_count[1]; | ||
90 | stride0 = local_cpu_data->ptce_stride[0]; | ||
91 | stride1 = local_cpu_data->ptce_stride[1]; | ||
92 | |||
93 | local_irq_save(flags); | ||
94 | for (i = 0; i < count0; ++i) { | ||
95 | for (j = 0; j < count1; ++j) { | ||
96 | ia64_ptce(addr); | ||
97 | addr += stride1; | ||
98 | } | ||
99 | addr += stride0; | ||
100 | } | ||
101 | local_irq_restore(flags); | ||
102 | ia64_srlz_i(); /* srlz.i implies srlz.d */ | ||
103 | } | ||
104 | |||
105 | long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) | ||
106 | { | ||
107 | struct ia64_pal_retval iprv; | ||
108 | |||
109 | PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva, | ||
110 | (u64)opt_handler); | ||
111 | |||
112 | return iprv.status; | ||
113 | } | ||
114 | |||
115 | static DEFINE_SPINLOCK(vp_lock); | ||
116 | |||
117 | void kvm_arch_hardware_enable(void *garbage) | ||
118 | { | ||
119 | long status; | ||
120 | long tmp_base; | ||
121 | unsigned long pte; | ||
122 | unsigned long saved_psr; | ||
123 | int slot; | ||
124 | |||
125 | pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), | ||
126 | PAGE_KERNEL)); | ||
127 | local_irq_save(saved_psr); | ||
128 | slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); | ||
129 | if (slot < 0) | ||
130 | return; | ||
131 | local_irq_restore(saved_psr); | ||
132 | |||
133 | spin_lock(&vp_lock); | ||
134 | status = ia64_pal_vp_init_env(kvm_vsa_base ? | ||
135 | VP_INIT_ENV : VP_INIT_ENV_INITALIZE, | ||
136 | __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base); | ||
137 | if (status != 0) { | ||
138 | printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n"); | ||
139 | return ; | ||
140 | } | ||
141 | |||
142 | if (!kvm_vsa_base) { | ||
143 | kvm_vsa_base = tmp_base; | ||
144 | printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base); | ||
145 | } | ||
146 | spin_unlock(&vp_lock); | ||
147 | ia64_ptr_entry(0x3, slot); | ||
148 | } | ||
149 | |||
150 | void kvm_arch_hardware_disable(void *garbage) | ||
151 | { | ||
152 | |||
153 | long status; | ||
154 | int slot; | ||
155 | unsigned long pte; | ||
156 | unsigned long saved_psr; | ||
157 | unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA); | ||
158 | |||
159 | pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), | ||
160 | PAGE_KERNEL)); | ||
161 | |||
162 | local_irq_save(saved_psr); | ||
163 | slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); | ||
164 | if (slot < 0) | ||
165 | return; | ||
166 | local_irq_restore(saved_psr); | ||
167 | |||
168 | status = ia64_pal_vp_exit_env(host_iva); | ||
169 | if (status) | ||
170 | printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n", | ||
171 | status); | ||
172 | ia64_ptr_entry(0x3, slot); | ||
173 | } | ||
174 | |||
175 | void kvm_arch_check_processor_compat(void *rtn) | ||
176 | { | ||
177 | *(int *)rtn = 0; | ||
178 | } | ||
179 | |||
180 | int kvm_dev_ioctl_check_extension(long ext) | ||
181 | { | ||
182 | |||
183 | int r; | ||
184 | |||
185 | switch (ext) { | ||
186 | case KVM_CAP_IRQCHIP: | ||
187 | case KVM_CAP_USER_MEMORY: | ||
188 | |||
189 | r = 1; | ||
190 | break; | ||
191 | default: | ||
192 | r = 0; | ||
193 | } | ||
194 | return r; | ||
195 | |||
196 | } | ||
197 | |||
198 | static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, | ||
199 | gpa_t addr) | ||
200 | { | ||
201 | struct kvm_io_device *dev; | ||
202 | |||
203 | dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); | ||
204 | |||
205 | return dev; | ||
206 | } | ||
207 | |||
208 | static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
209 | { | ||
210 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | ||
211 | kvm_run->hw.hardware_exit_reason = 1; | ||
212 | return 0; | ||
213 | } | ||
214 | |||
215 | static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
216 | { | ||
217 | struct kvm_mmio_req *p; | ||
218 | struct kvm_io_device *mmio_dev; | ||
219 | |||
220 | p = kvm_get_vcpu_ioreq(vcpu); | ||
221 | |||
222 | if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS) | ||
223 | goto mmio; | ||
224 | vcpu->mmio_needed = 1; | ||
225 | vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr; | ||
226 | vcpu->mmio_size = kvm_run->mmio.len = p->size; | ||
227 | vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir; | ||
228 | |||
229 | if (vcpu->mmio_is_write) | ||
230 | memcpy(vcpu->mmio_data, &p->data, p->size); | ||
231 | memcpy(kvm_run->mmio.data, &p->data, p->size); | ||
232 | kvm_run->exit_reason = KVM_EXIT_MMIO; | ||
233 | return 0; | ||
234 | mmio: | ||
235 | mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr); | ||
236 | if (mmio_dev) { | ||
237 | if (!p->dir) | ||
238 | kvm_iodevice_write(mmio_dev, p->addr, p->size, | ||
239 | &p->data); | ||
240 | else | ||
241 | kvm_iodevice_read(mmio_dev, p->addr, p->size, | ||
242 | &p->data); | ||
243 | |||
244 | } else | ||
245 | printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr); | ||
246 | p->state = STATE_IORESP_READY; | ||
247 | |||
248 | return 1; | ||
249 | } | ||
250 | |||
251 | static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
252 | { | ||
253 | struct exit_ctl_data *p; | ||
254 | |||
255 | p = kvm_get_exit_data(vcpu); | ||
256 | |||
257 | if (p->exit_reason == EXIT_REASON_PAL_CALL) | ||
258 | return kvm_pal_emul(vcpu, kvm_run); | ||
259 | else { | ||
260 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | ||
261 | kvm_run->hw.hardware_exit_reason = 2; | ||
262 | return 0; | ||
263 | } | ||
264 | } | ||
265 | |||
266 | static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
267 | { | ||
268 | struct exit_ctl_data *p; | ||
269 | |||
270 | p = kvm_get_exit_data(vcpu); | ||
271 | |||
272 | if (p->exit_reason == EXIT_REASON_SAL_CALL) { | ||
273 | kvm_sal_emul(vcpu); | ||
274 | return 1; | ||
275 | } else { | ||
276 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | ||
277 | kvm_run->hw.hardware_exit_reason = 3; | ||
278 | return 0; | ||
279 | } | ||
280 | |||
281 | } | ||
282 | |||
283 | /* | ||
284 | * offset: address offset to IPI space. | ||
285 | * value: deliver value. | ||
286 | */ | ||
287 | static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm, | ||
288 | uint64_t vector) | ||
289 | { | ||
290 | switch (dm) { | ||
291 | case SAPIC_FIXED: | ||
292 | kvm_apic_set_irq(vcpu, vector, 0); | ||
293 | break; | ||
294 | case SAPIC_NMI: | ||
295 | kvm_apic_set_irq(vcpu, 2, 0); | ||
296 | break; | ||
297 | case SAPIC_EXTINT: | ||
298 | kvm_apic_set_irq(vcpu, 0, 0); | ||
299 | break; | ||
300 | case SAPIC_INIT: | ||
301 | case SAPIC_PMI: | ||
302 | default: | ||
303 | printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n"); | ||
304 | break; | ||
305 | } | ||
306 | } | ||
307 | |||
308 | static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id, | ||
309 | unsigned long eid) | ||
310 | { | ||
311 | union ia64_lid lid; | ||
312 | int i; | ||
313 | |||
314 | for (i = 0; i < KVM_MAX_VCPUS; i++) { | ||
315 | if (kvm->vcpus[i]) { | ||
316 | lid.val = VCPU_LID(kvm->vcpus[i]); | ||
317 | if (lid.id == id && lid.eid == eid) | ||
318 | return kvm->vcpus[i]; | ||
319 | } | ||
320 | } | ||
321 | |||
322 | return NULL; | ||
323 | } | ||
324 | |||
325 | static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
326 | { | ||
327 | struct exit_ctl_data *p = kvm_get_exit_data(vcpu); | ||
328 | struct kvm_vcpu *target_vcpu; | ||
329 | struct kvm_pt_regs *regs; | ||
330 | union ia64_ipi_a addr = p->u.ipi_data.addr; | ||
331 | union ia64_ipi_d data = p->u.ipi_data.data; | ||
332 | |||
333 | target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid); | ||
334 | if (!target_vcpu) | ||
335 | return handle_vm_error(vcpu, kvm_run); | ||
336 | |||
337 | if (!target_vcpu->arch.launched) { | ||
338 | regs = vcpu_regs(target_vcpu); | ||
339 | |||
340 | regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip; | ||
341 | regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp; | ||
342 | |||
343 | target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
344 | if (waitqueue_active(&target_vcpu->wq)) | ||
345 | wake_up_interruptible(&target_vcpu->wq); | ||
346 | } else { | ||
347 | vcpu_deliver_ipi(target_vcpu, data.dm, data.vector); | ||
348 | if (target_vcpu != vcpu) | ||
349 | kvm_vcpu_kick(target_vcpu); | ||
350 | } | ||
351 | |||
352 | return 1; | ||
353 | } | ||
354 | |||
355 | struct call_data { | ||
356 | struct kvm_ptc_g ptc_g_data; | ||
357 | struct kvm_vcpu *vcpu; | ||
358 | }; | ||
359 | |||
360 | static void vcpu_global_purge(void *info) | ||
361 | { | ||
362 | struct call_data *p = (struct call_data *)info; | ||
363 | struct kvm_vcpu *vcpu = p->vcpu; | ||
364 | |||
365 | if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) | ||
366 | return; | ||
367 | |||
368 | set_bit(KVM_REQ_PTC_G, &vcpu->requests); | ||
369 | if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) { | ||
370 | vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] = | ||
371 | p->ptc_g_data; | ||
372 | } else { | ||
373 | clear_bit(KVM_REQ_PTC_G, &vcpu->requests); | ||
374 | vcpu->arch.ptc_g_count = 0; | ||
375 | set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); | ||
376 | } | ||
377 | } | ||
378 | |||
379 | static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
380 | { | ||
381 | struct exit_ctl_data *p = kvm_get_exit_data(vcpu); | ||
382 | struct kvm *kvm = vcpu->kvm; | ||
383 | struct call_data call_data; | ||
384 | int i; | ||
385 | call_data.ptc_g_data = p->u.ptc_g_data; | ||
386 | |||
387 | for (i = 0; i < KVM_MAX_VCPUS; i++) { | ||
388 | if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state == | ||
389 | KVM_MP_STATE_UNINITIALIZED || | ||
390 | vcpu == kvm->vcpus[i]) | ||
391 | continue; | ||
392 | |||
393 | if (waitqueue_active(&kvm->vcpus[i]->wq)) | ||
394 | wake_up_interruptible(&kvm->vcpus[i]->wq); | ||
395 | |||
396 | if (kvm->vcpus[i]->cpu != -1) { | ||
397 | call_data.vcpu = kvm->vcpus[i]; | ||
398 | smp_call_function_single(kvm->vcpus[i]->cpu, | ||
399 | vcpu_global_purge, &call_data, 0, 1); | ||
400 | } else | ||
401 | printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n"); | ||
402 | |||
403 | } | ||
404 | return 1; | ||
405 | } | ||
406 | |||
407 | static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
408 | { | ||
409 | return 1; | ||
410 | } | ||
411 | |||
412 | int kvm_emulate_halt(struct kvm_vcpu *vcpu) | ||
413 | { | ||
414 | |||
415 | ktime_t kt; | ||
416 | long itc_diff; | ||
417 | unsigned long vcpu_now_itc; | ||
418 | |||
419 | unsigned long expires; | ||
420 | struct hrtimer *p_ht = &vcpu->arch.hlt_timer; | ||
421 | unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec; | ||
422 | struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); | ||
423 | |||
424 | vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset; | ||
425 | |||
426 | if (time_after(vcpu_now_itc, vpd->itm)) { | ||
427 | vcpu->arch.timer_check = 1; | ||
428 | return 1; | ||
429 | } | ||
430 | itc_diff = vpd->itm - vcpu_now_itc; | ||
431 | if (itc_diff < 0) | ||
432 | itc_diff = -itc_diff; | ||
433 | |||
434 | expires = div64_64(itc_diff, cyc_per_usec); | ||
435 | kt = ktime_set(0, 1000 * expires); | ||
436 | vcpu->arch.ht_active = 1; | ||
437 | hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); | ||
438 | |||
439 | if (irqchip_in_kernel(vcpu->kvm)) { | ||
440 | vcpu->arch.mp_state = KVM_MP_STATE_HALTED; | ||
441 | kvm_vcpu_block(vcpu); | ||
442 | hrtimer_cancel(p_ht); | ||
443 | vcpu->arch.ht_active = 0; | ||
444 | |||
445 | if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) | ||
446 | return -EINTR; | ||
447 | return 1; | ||
448 | } else { | ||
449 | printk(KERN_ERR"kvm: Unsupported userspace halt!"); | ||
450 | return 0; | ||
451 | } | ||
452 | } | ||
453 | |||
454 | static int handle_vm_shutdown(struct kvm_vcpu *vcpu, | ||
455 | struct kvm_run *kvm_run) | ||
456 | { | ||
457 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; | ||
458 | return 0; | ||
459 | } | ||
460 | |||
461 | static int handle_external_interrupt(struct kvm_vcpu *vcpu, | ||
462 | struct kvm_run *kvm_run) | ||
463 | { | ||
464 | return 1; | ||
465 | } | ||
466 | |||
467 | static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, | ||
468 | struct kvm_run *kvm_run) = { | ||
469 | [EXIT_REASON_VM_PANIC] = handle_vm_error, | ||
470 | [EXIT_REASON_MMIO_INSTRUCTION] = handle_mmio, | ||
471 | [EXIT_REASON_PAL_CALL] = handle_pal_call, | ||
472 | [EXIT_REASON_SAL_CALL] = handle_sal_call, | ||
473 | [EXIT_REASON_SWITCH_RR6] = handle_switch_rr6, | ||
474 | [EXIT_REASON_VM_DESTROY] = handle_vm_shutdown, | ||
475 | [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, | ||
476 | [EXIT_REASON_IPI] = handle_ipi, | ||
477 | [EXIT_REASON_PTC_G] = handle_global_purge, | ||
478 | |||
479 | }; | ||
480 | |||
481 | static const int kvm_vti_max_exit_handlers = | ||
482 | sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers); | ||
483 | |||
484 | static void kvm_prepare_guest_switch(struct kvm_vcpu *vcpu) | ||
485 | { | ||
486 | } | ||
487 | |||
488 | static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu) | ||
489 | { | ||
490 | struct exit_ctl_data *p_exit_data; | ||
491 | |||
492 | p_exit_data = kvm_get_exit_data(vcpu); | ||
493 | return p_exit_data->exit_reason; | ||
494 | } | ||
495 | |||
496 | /* | ||
497 | * The guest has exited. See if we can fix it or if we need userspace | ||
498 | * assistance. | ||
499 | */ | ||
500 | static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | ||
501 | { | ||
502 | u32 exit_reason = kvm_get_exit_reason(vcpu); | ||
503 | vcpu->arch.last_exit = exit_reason; | ||
504 | |||
505 | if (exit_reason < kvm_vti_max_exit_handlers | ||
506 | && kvm_vti_exit_handlers[exit_reason]) | ||
507 | return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run); | ||
508 | else { | ||
509 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | ||
510 | kvm_run->hw.hardware_exit_reason = exit_reason; | ||
511 | } | ||
512 | return 0; | ||
513 | } | ||
514 | |||
515 | static inline void vti_set_rr6(unsigned long rr6) | ||
516 | { | ||
517 | ia64_set_rr(RR6, rr6); | ||
518 | ia64_srlz_i(); | ||
519 | } | ||
520 | |||
521 | static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu) | ||
522 | { | ||
523 | unsigned long pte; | ||
524 | struct kvm *kvm = vcpu->kvm; | ||
525 | int r; | ||
526 | |||
527 | /*Insert a pair of tr to map vmm*/ | ||
528 | pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL)); | ||
529 | r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); | ||
530 | if (r < 0) | ||
531 | goto out; | ||
532 | vcpu->arch.vmm_tr_slot = r; | ||
533 | /*Insert a pairt of tr to map data of vm*/ | ||
534 | pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL)); | ||
535 | r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE, | ||
536 | pte, KVM_VM_DATA_SHIFT); | ||
537 | if (r < 0) | ||
538 | goto out; | ||
539 | vcpu->arch.vm_tr_slot = r; | ||
540 | r = 0; | ||
541 | out: | ||
542 | return r; | ||
543 | |||
544 | } | ||
545 | |||
546 | static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu) | ||
547 | { | ||
548 | |||
549 | ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot); | ||
550 | ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot); | ||
551 | |||
552 | } | ||
553 | |||
554 | static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu) | ||
555 | { | ||
556 | int cpu = smp_processor_id(); | ||
557 | |||
558 | if (vcpu->arch.last_run_cpu != cpu || | ||
559 | per_cpu(last_vcpu, cpu) != vcpu) { | ||
560 | per_cpu(last_vcpu, cpu) = vcpu; | ||
561 | vcpu->arch.last_run_cpu = cpu; | ||
562 | kvm_flush_tlb_all(); | ||
563 | } | ||
564 | |||
565 | vcpu->arch.host_rr6 = ia64_get_rr(RR6); | ||
566 | vti_set_rr6(vcpu->arch.vmm_rr); | ||
567 | return kvm_insert_vmm_mapping(vcpu); | ||
568 | } | ||
569 | static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu) | ||
570 | { | ||
571 | kvm_purge_vmm_mapping(vcpu); | ||
572 | vti_set_rr6(vcpu->arch.host_rr6); | ||
573 | } | ||
574 | |||
575 | static int vti_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
576 | { | ||
577 | union context *host_ctx, *guest_ctx; | ||
578 | int r; | ||
579 | |||
580 | /*Get host and guest context with guest address space.*/ | ||
581 | host_ctx = kvm_get_host_context(vcpu); | ||
582 | guest_ctx = kvm_get_guest_context(vcpu); | ||
583 | |||
584 | r = kvm_vcpu_pre_transition(vcpu); | ||
585 | if (r < 0) | ||
586 | goto out; | ||
587 | kvm_vmm_info->tramp_entry(host_ctx, guest_ctx); | ||
588 | kvm_vcpu_post_transition(vcpu); | ||
589 | r = 0; | ||
590 | out: | ||
591 | return r; | ||
592 | } | ||
593 | |||
594 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
595 | { | ||
596 | int r; | ||
597 | |||
598 | again: | ||
599 | preempt_disable(); | ||
600 | |||
601 | kvm_prepare_guest_switch(vcpu); | ||
602 | local_irq_disable(); | ||
603 | |||
604 | if (signal_pending(current)) { | ||
605 | local_irq_enable(); | ||
606 | preempt_enable(); | ||
607 | r = -EINTR; | ||
608 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
609 | goto out; | ||
610 | } | ||
611 | |||
612 | vcpu->guest_mode = 1; | ||
613 | kvm_guest_enter(); | ||
614 | |||
615 | r = vti_vcpu_run(vcpu, kvm_run); | ||
616 | if (r < 0) { | ||
617 | local_irq_enable(); | ||
618 | preempt_enable(); | ||
619 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | ||
620 | goto out; | ||
621 | } | ||
622 | |||
623 | vcpu->arch.launched = 1; | ||
624 | vcpu->guest_mode = 0; | ||
625 | local_irq_enable(); | ||
626 | |||
627 | /* | ||
628 | * We must have an instruction between local_irq_enable() and | ||
629 | * kvm_guest_exit(), so the timer interrupt isn't delayed by | ||
630 | * the interrupt shadow. The stat.exits increment will do nicely. | ||
631 | * But we need to prevent reordering, hence this barrier(): | ||
632 | */ | ||
633 | barrier(); | ||
634 | |||
635 | kvm_guest_exit(); | ||
636 | |||
637 | preempt_enable(); | ||
638 | |||
639 | r = kvm_handle_exit(kvm_run, vcpu); | ||
640 | |||
641 | if (r > 0) { | ||
642 | if (!need_resched()) | ||
643 | goto again; | ||
644 | } | ||
645 | |||
646 | out: | ||
647 | if (r > 0) { | ||
648 | kvm_resched(vcpu); | ||
649 | goto again; | ||
650 | } | ||
651 | |||
652 | return r; | ||
653 | } | ||
654 | |||
655 | static void kvm_set_mmio_data(struct kvm_vcpu *vcpu) | ||
656 | { | ||
657 | struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu); | ||
658 | |||
659 | if (!vcpu->mmio_is_write) | ||
660 | memcpy(&p->data, vcpu->mmio_data, 8); | ||
661 | p->state = STATE_IORESP_READY; | ||
662 | } | ||
663 | |||
664 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
665 | { | ||
666 | int r; | ||
667 | sigset_t sigsaved; | ||
668 | |||
669 | vcpu_load(vcpu); | ||
670 | |||
671 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { | ||
672 | kvm_vcpu_block(vcpu); | ||
673 | vcpu_put(vcpu); | ||
674 | return -EAGAIN; | ||
675 | } | ||
676 | |||
677 | if (vcpu->sigset_active) | ||
678 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | ||
679 | |||
680 | if (vcpu->mmio_needed) { | ||
681 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); | ||
682 | kvm_set_mmio_data(vcpu); | ||
683 | vcpu->mmio_read_completed = 1; | ||
684 | vcpu->mmio_needed = 0; | ||
685 | } | ||
686 | r = __vcpu_run(vcpu, kvm_run); | ||
687 | |||
688 | if (vcpu->sigset_active) | ||
689 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | ||
690 | |||
691 | vcpu_put(vcpu); | ||
692 | return r; | ||
693 | } | ||
694 | |||
695 | /* | ||
696 | * Allocate 16M memory for every vm to hold its specific data. | ||
697 | * Its memory map is defined in kvm_host.h. | ||
698 | */ | ||
699 | static struct kvm *kvm_alloc_kvm(void) | ||
700 | { | ||
701 | |||
702 | struct kvm *kvm; | ||
703 | uint64_t vm_base; | ||
704 | |||
705 | vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE)); | ||
706 | |||
707 | if (!vm_base) | ||
708 | return ERR_PTR(-ENOMEM); | ||
709 | printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base); | ||
710 | |||
711 | /* Zero all pages before use! */ | ||
712 | memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); | ||
713 | |||
714 | kvm = (struct kvm *)(vm_base + KVM_VM_OFS); | ||
715 | kvm->arch.vm_base = vm_base; | ||
716 | |||
717 | return kvm; | ||
718 | } | ||
719 | |||
720 | struct kvm_io_range { | ||
721 | unsigned long start; | ||
722 | unsigned long size; | ||
723 | unsigned long type; | ||
724 | }; | ||
725 | |||
726 | static const struct kvm_io_range io_ranges[] = { | ||
727 | {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER}, | ||
728 | {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO}, | ||
729 | {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO}, | ||
730 | {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC}, | ||
731 | {PIB_START, PIB_SIZE, GPFN_PIB}, | ||
732 | }; | ||
733 | |||
734 | static void kvm_build_io_pmt(struct kvm *kvm) | ||
735 | { | ||
736 | unsigned long i, j; | ||
737 | |||
738 | /* Mark I/O ranges */ | ||
739 | for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range)); | ||
740 | i++) { | ||
741 | for (j = io_ranges[i].start; | ||
742 | j < io_ranges[i].start + io_ranges[i].size; | ||
743 | j += PAGE_SIZE) | ||
744 | kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT, | ||
745 | io_ranges[i].type, 0); | ||
746 | } | ||
747 | |||
748 | } | ||
749 | |||
750 | /*Use unused rids to virtualize guest rid.*/ | ||
751 | #define GUEST_PHYSICAL_RR0 0x1739 | ||
752 | #define GUEST_PHYSICAL_RR4 0x2739 | ||
753 | #define VMM_INIT_RR 0x1660 | ||
754 | |||
755 | static void kvm_init_vm(struct kvm *kvm) | ||
756 | { | ||
757 | long vm_base; | ||
758 | |||
759 | BUG_ON(!kvm); | ||
760 | |||
761 | kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0; | ||
762 | kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4; | ||
763 | kvm->arch.vmm_init_rr = VMM_INIT_RR; | ||
764 | |||
765 | vm_base = kvm->arch.vm_base; | ||
766 | if (vm_base) { | ||
767 | kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS; | ||
768 | kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS; | ||
769 | kvm->arch.vpd_base = vm_base + KVM_VPD_OFS; | ||
770 | } | ||
771 | |||
772 | /* | ||
773 | *Fill P2M entries for MMIO/IO ranges | ||
774 | */ | ||
775 | kvm_build_io_pmt(kvm); | ||
776 | |||
777 | } | ||
778 | |||
779 | struct kvm *kvm_arch_create_vm(void) | ||
780 | { | ||
781 | struct kvm *kvm = kvm_alloc_kvm(); | ||
782 | |||
783 | if (IS_ERR(kvm)) | ||
784 | return ERR_PTR(-ENOMEM); | ||
785 | kvm_init_vm(kvm); | ||
786 | |||
787 | return kvm; | ||
788 | |||
789 | } | ||
790 | |||
791 | static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, | ||
792 | struct kvm_irqchip *chip) | ||
793 | { | ||
794 | int r; | ||
795 | |||
796 | r = 0; | ||
797 | switch (chip->chip_id) { | ||
798 | case KVM_IRQCHIP_IOAPIC: | ||
799 | memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm), | ||
800 | sizeof(struct kvm_ioapic_state)); | ||
801 | break; | ||
802 | default: | ||
803 | r = -EINVAL; | ||
804 | break; | ||
805 | } | ||
806 | return r; | ||
807 | } | ||
808 | |||
809 | static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | ||
810 | { | ||
811 | int r; | ||
812 | |||
813 | r = 0; | ||
814 | switch (chip->chip_id) { | ||
815 | case KVM_IRQCHIP_IOAPIC: | ||
816 | memcpy(ioapic_irqchip(kvm), | ||
817 | &chip->chip.ioapic, | ||
818 | sizeof(struct kvm_ioapic_state)); | ||
819 | break; | ||
820 | default: | ||
821 | r = -EINVAL; | ||
822 | break; | ||
823 | } | ||
824 | return r; | ||
825 | } | ||
826 | |||
827 | #define RESTORE_REGS(_x) vcpu->arch._x = regs->_x | ||
828 | |||
829 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | ||
830 | { | ||
831 | int i; | ||
832 | struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); | ||
833 | int r; | ||
834 | |||
835 | vcpu_load(vcpu); | ||
836 | |||
837 | for (i = 0; i < 16; i++) { | ||
838 | vpd->vgr[i] = regs->vpd.vgr[i]; | ||
839 | vpd->vbgr[i] = regs->vpd.vbgr[i]; | ||
840 | } | ||
841 | for (i = 0; i < 128; i++) | ||
842 | vpd->vcr[i] = regs->vpd.vcr[i]; | ||
843 | vpd->vhpi = regs->vpd.vhpi; | ||
844 | vpd->vnat = regs->vpd.vnat; | ||
845 | vpd->vbnat = regs->vpd.vbnat; | ||
846 | vpd->vpsr = regs->vpd.vpsr; | ||
847 | |||
848 | vpd->vpr = regs->vpd.vpr; | ||
849 | |||
850 | r = -EFAULT; | ||
851 | r = copy_from_user(&vcpu->arch.guest, regs->saved_guest, | ||
852 | sizeof(union context)); | ||
853 | if (r) | ||
854 | goto out; | ||
855 | r = copy_from_user(vcpu + 1, regs->saved_stack + | ||
856 | sizeof(struct kvm_vcpu), | ||
857 | IA64_STK_OFFSET - sizeof(struct kvm_vcpu)); | ||
858 | if (r) | ||
859 | goto out; | ||
860 | vcpu->arch.exit_data = | ||
861 | ((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data; | ||
862 | |||
863 | RESTORE_REGS(mp_state); | ||
864 | RESTORE_REGS(vmm_rr); | ||
865 | memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS); | ||
866 | memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS); | ||
867 | RESTORE_REGS(itr_regions); | ||
868 | RESTORE_REGS(dtr_regions); | ||
869 | RESTORE_REGS(tc_regions); | ||
870 | RESTORE_REGS(irq_check); | ||
871 | RESTORE_REGS(itc_check); | ||
872 | RESTORE_REGS(timer_check); | ||
873 | RESTORE_REGS(timer_pending); | ||
874 | RESTORE_REGS(last_itc); | ||
875 | for (i = 0; i < 8; i++) { | ||
876 | vcpu->arch.vrr[i] = regs->vrr[i]; | ||
877 | vcpu->arch.ibr[i] = regs->ibr[i]; | ||
878 | vcpu->arch.dbr[i] = regs->dbr[i]; | ||
879 | } | ||
880 | for (i = 0; i < 4; i++) | ||
881 | vcpu->arch.insvc[i] = regs->insvc[i]; | ||
882 | RESTORE_REGS(xtp); | ||
883 | RESTORE_REGS(metaphysical_rr0); | ||
884 | RESTORE_REGS(metaphysical_rr4); | ||
885 | RESTORE_REGS(metaphysical_saved_rr0); | ||
886 | RESTORE_REGS(metaphysical_saved_rr4); | ||
887 | RESTORE_REGS(fp_psr); | ||
888 | RESTORE_REGS(saved_gp); | ||
889 | |||
890 | vcpu->arch.irq_new_pending = 1; | ||
891 | vcpu->arch.itc_offset = regs->saved_itc - ia64_getreg(_IA64_REG_AR_ITC); | ||
892 | set_bit(KVM_REQ_RESUME, &vcpu->requests); | ||
893 | |||
894 | vcpu_put(vcpu); | ||
895 | r = 0; | ||
896 | out: | ||
897 | return r; | ||
898 | } | ||
899 | |||
900 | long kvm_arch_vm_ioctl(struct file *filp, | ||
901 | unsigned int ioctl, unsigned long arg) | ||
902 | { | ||
903 | struct kvm *kvm = filp->private_data; | ||
904 | void __user *argp = (void __user *)arg; | ||
905 | int r = -EINVAL; | ||
906 | |||
907 | switch (ioctl) { | ||
908 | case KVM_SET_MEMORY_REGION: { | ||
909 | struct kvm_memory_region kvm_mem; | ||
910 | struct kvm_userspace_memory_region kvm_userspace_mem; | ||
911 | |||
912 | r = -EFAULT; | ||
913 | if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) | ||
914 | goto out; | ||
915 | kvm_userspace_mem.slot = kvm_mem.slot; | ||
916 | kvm_userspace_mem.flags = kvm_mem.flags; | ||
917 | kvm_userspace_mem.guest_phys_addr = | ||
918 | kvm_mem.guest_phys_addr; | ||
919 | kvm_userspace_mem.memory_size = kvm_mem.memory_size; | ||
920 | r = kvm_vm_ioctl_set_memory_region(kvm, | ||
921 | &kvm_userspace_mem, 0); | ||
922 | if (r) | ||
923 | goto out; | ||
924 | break; | ||
925 | } | ||
926 | case KVM_CREATE_IRQCHIP: | ||
927 | r = -EFAULT; | ||
928 | r = kvm_ioapic_init(kvm); | ||
929 | if (r) | ||
930 | goto out; | ||
931 | break; | ||
932 | case KVM_IRQ_LINE: { | ||
933 | struct kvm_irq_level irq_event; | ||
934 | |||
935 | r = -EFAULT; | ||
936 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) | ||
937 | goto out; | ||
938 | if (irqchip_in_kernel(kvm)) { | ||
939 | mutex_lock(&kvm->lock); | ||
940 | kvm_ioapic_set_irq(kvm->arch.vioapic, | ||
941 | irq_event.irq, | ||
942 | irq_event.level); | ||
943 | mutex_unlock(&kvm->lock); | ||
944 | r = 0; | ||
945 | } | ||
946 | break; | ||
947 | } | ||
948 | case KVM_GET_IRQCHIP: { | ||
949 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ | ||
950 | struct kvm_irqchip chip; | ||
951 | |||
952 | r = -EFAULT; | ||
953 | if (copy_from_user(&chip, argp, sizeof chip)) | ||
954 | goto out; | ||
955 | r = -ENXIO; | ||
956 | if (!irqchip_in_kernel(kvm)) | ||
957 | goto out; | ||
958 | r = kvm_vm_ioctl_get_irqchip(kvm, &chip); | ||
959 | if (r) | ||
960 | goto out; | ||
961 | r = -EFAULT; | ||
962 | if (copy_to_user(argp, &chip, sizeof chip)) | ||
963 | goto out; | ||
964 | r = 0; | ||
965 | break; | ||
966 | } | ||
967 | case KVM_SET_IRQCHIP: { | ||
968 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ | ||
969 | struct kvm_irqchip chip; | ||
970 | |||
971 | r = -EFAULT; | ||
972 | if (copy_from_user(&chip, argp, sizeof chip)) | ||
973 | goto out; | ||
974 | r = -ENXIO; | ||
975 | if (!irqchip_in_kernel(kvm)) | ||
976 | goto out; | ||
977 | r = kvm_vm_ioctl_set_irqchip(kvm, &chip); | ||
978 | if (r) | ||
979 | goto out; | ||
980 | r = 0; | ||
981 | break; | ||
982 | } | ||
983 | default: | ||
984 | ; | ||
985 | } | ||
986 | out: | ||
987 | return r; | ||
988 | } | ||
989 | |||
990 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | ||
991 | struct kvm_sregs *sregs) | ||
992 | { | ||
993 | return -EINVAL; | ||
994 | } | ||
995 | |||
996 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | ||
997 | struct kvm_sregs *sregs) | ||
998 | { | ||
999 | return -EINVAL; | ||
1000 | |||
1001 | } | ||
1002 | int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | ||
1003 | struct kvm_translation *tr) | ||
1004 | { | ||
1005 | |||
1006 | return -EINVAL; | ||
1007 | } | ||
1008 | |||
1009 | static int kvm_alloc_vmm_area(void) | ||
1010 | { | ||
1011 | if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) { | ||
1012 | kvm_vmm_base = __get_free_pages(GFP_KERNEL, | ||
1013 | get_order(KVM_VMM_SIZE)); | ||
1014 | if (!kvm_vmm_base) | ||
1015 | return -ENOMEM; | ||
1016 | |||
1017 | memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE); | ||
1018 | kvm_vm_buffer = kvm_vmm_base + VMM_SIZE; | ||
1019 | |||
1020 | printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n", | ||
1021 | kvm_vmm_base, kvm_vm_buffer); | ||
1022 | } | ||
1023 | |||
1024 | return 0; | ||
1025 | } | ||
1026 | |||
1027 | static void kvm_free_vmm_area(void) | ||
1028 | { | ||
1029 | if (kvm_vmm_base) { | ||
1030 | /*Zero this area before free to avoid bits leak!!*/ | ||
1031 | memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE); | ||
1032 | free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE)); | ||
1033 | kvm_vmm_base = 0; | ||
1034 | kvm_vm_buffer = 0; | ||
1035 | kvm_vsa_base = 0; | ||
1036 | } | ||
1037 | } | ||
1038 | |||
1039 | /* | ||
1040 | * Make sure that a cpu that is being hot-unplugged does not have any vcpus | ||
1041 | * cached on it. Leave it as blank for IA64. | ||
1042 | */ | ||
1043 | void decache_vcpus_on_cpu(int cpu) | ||
1044 | { | ||
1045 | } | ||
1046 | |||
1047 | static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | ||
1048 | { | ||
1049 | } | ||
1050 | |||
1051 | static int vti_init_vpd(struct kvm_vcpu *vcpu) | ||
1052 | { | ||
1053 | int i; | ||
1054 | union cpuid3_t cpuid3; | ||
1055 | struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); | ||
1056 | |||
1057 | if (IS_ERR(vpd)) | ||
1058 | return PTR_ERR(vpd); | ||
1059 | |||
1060 | /* CPUID init */ | ||
1061 | for (i = 0; i < 5; i++) | ||
1062 | vpd->vcpuid[i] = ia64_get_cpuid(i); | ||
1063 | |||
1064 | /* Limit the CPUID number to 5 */ | ||
1065 | cpuid3.value = vpd->vcpuid[3]; | ||
1066 | cpuid3.number = 4; /* 5 - 1 */ | ||
1067 | vpd->vcpuid[3] = cpuid3.value; | ||
1068 | |||
1069 | /*Set vac and vdc fields*/ | ||
1070 | vpd->vac.a_from_int_cr = 1; | ||
1071 | vpd->vac.a_to_int_cr = 1; | ||
1072 | vpd->vac.a_from_psr = 1; | ||
1073 | vpd->vac.a_from_cpuid = 1; | ||
1074 | vpd->vac.a_cover = 1; | ||
1075 | vpd->vac.a_bsw = 1; | ||
1076 | vpd->vac.a_int = 1; | ||
1077 | vpd->vdc.d_vmsw = 1; | ||
1078 | |||
1079 | /*Set virtual buffer*/ | ||
1080 | vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE; | ||
1081 | |||
1082 | return 0; | ||
1083 | } | ||
1084 | |||
1085 | static int vti_create_vp(struct kvm_vcpu *vcpu) | ||
1086 | { | ||
1087 | long ret; | ||
1088 | struct vpd *vpd = vcpu->arch.vpd; | ||
1089 | unsigned long vmm_ivt; | ||
1090 | |||
1091 | vmm_ivt = kvm_vmm_info->vmm_ivt; | ||
1092 | |||
1093 | printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt); | ||
1094 | |||
1095 | ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0); | ||
1096 | |||
1097 | if (ret) { | ||
1098 | printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n"); | ||
1099 | return -EINVAL; | ||
1100 | } | ||
1101 | return 0; | ||
1102 | } | ||
1103 | |||
1104 | static void init_ptce_info(struct kvm_vcpu *vcpu) | ||
1105 | { | ||
1106 | ia64_ptce_info_t ptce = {0}; | ||
1107 | |||
1108 | ia64_get_ptce(&ptce); | ||
1109 | vcpu->arch.ptce_base = ptce.base; | ||
1110 | vcpu->arch.ptce_count[0] = ptce.count[0]; | ||
1111 | vcpu->arch.ptce_count[1] = ptce.count[1]; | ||
1112 | vcpu->arch.ptce_stride[0] = ptce.stride[0]; | ||
1113 | vcpu->arch.ptce_stride[1] = ptce.stride[1]; | ||
1114 | } | ||
1115 | |||
1116 | static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu) | ||
1117 | { | ||
1118 | struct hrtimer *p_ht = &vcpu->arch.hlt_timer; | ||
1119 | |||
1120 | if (hrtimer_cancel(p_ht)) | ||
1121 | hrtimer_start(p_ht, p_ht->expires, HRTIMER_MODE_ABS); | ||
1122 | } | ||
1123 | |||
1124 | static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data) | ||
1125 | { | ||
1126 | struct kvm_vcpu *vcpu; | ||
1127 | wait_queue_head_t *q; | ||
1128 | |||
1129 | vcpu = container_of(data, struct kvm_vcpu, arch.hlt_timer); | ||
1130 | if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED) | ||
1131 | goto out; | ||
1132 | |||
1133 | q = &vcpu->wq; | ||
1134 | if (waitqueue_active(q)) { | ||
1135 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
1136 | wake_up_interruptible(q); | ||
1137 | } | ||
1138 | out: | ||
1139 | vcpu->arch.timer_check = 1; | ||
1140 | return HRTIMER_NORESTART; | ||
1141 | } | ||
1142 | |||
1143 | #define PALE_RESET_ENTRY 0x80000000ffffffb0UL | ||
1144 | |||
1145 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | ||
1146 | { | ||
1147 | struct kvm_vcpu *v; | ||
1148 | int r; | ||
1149 | int i; | ||
1150 | long itc_offset; | ||
1151 | struct kvm *kvm = vcpu->kvm; | ||
1152 | struct kvm_pt_regs *regs = vcpu_regs(vcpu); | ||
1153 | |||
1154 | union context *p_ctx = &vcpu->arch.guest; | ||
1155 | struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu); | ||
1156 | |||
1157 | /*Init vcpu context for first run.*/ | ||
1158 | if (IS_ERR(vmm_vcpu)) | ||
1159 | return PTR_ERR(vmm_vcpu); | ||
1160 | |||
1161 | if (vcpu->vcpu_id == 0) { | ||
1162 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
1163 | |||
1164 | /*Set entry address for first run.*/ | ||
1165 | regs->cr_iip = PALE_RESET_ENTRY; | ||
1166 | |||
1167 | /*Initilize itc offset for vcpus*/ | ||
1168 | itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC); | ||
1169 | for (i = 0; i < MAX_VCPU_NUM; i++) { | ||
1170 | v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); | ||
1171 | v->arch.itc_offset = itc_offset; | ||
1172 | v->arch.last_itc = 0; | ||
1173 | } | ||
1174 | } else | ||
1175 | vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; | ||
1176 | |||
1177 | r = -ENOMEM; | ||
1178 | vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL); | ||
1179 | if (!vcpu->arch.apic) | ||
1180 | goto out; | ||
1181 | vcpu->arch.apic->vcpu = vcpu; | ||
1182 | |||
1183 | p_ctx->gr[1] = 0; | ||
1184 | p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET); | ||
1185 | p_ctx->gr[13] = (unsigned long)vmm_vcpu; | ||
1186 | p_ctx->psr = 0x1008522000UL; | ||
1187 | p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/ | ||
1188 | p_ctx->caller_unat = 0; | ||
1189 | p_ctx->pr = 0x0; | ||
1190 | p_ctx->ar[36] = 0x0; /*unat*/ | ||
1191 | p_ctx->ar[19] = 0x0; /*rnat*/ | ||
1192 | p_ctx->ar[18] = (unsigned long)vmm_vcpu + | ||
1193 | ((sizeof(struct kvm_vcpu)+15) & ~15); | ||
1194 | p_ctx->ar[64] = 0x0; /*pfs*/ | ||
1195 | p_ctx->cr[0] = 0x7e04UL; | ||
1196 | p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt; | ||
1197 | p_ctx->cr[8] = 0x3c; | ||
1198 | |||
1199 | /*Initilize region register*/ | ||
1200 | p_ctx->rr[0] = 0x30; | ||
1201 | p_ctx->rr[1] = 0x30; | ||
1202 | p_ctx->rr[2] = 0x30; | ||
1203 | p_ctx->rr[3] = 0x30; | ||
1204 | p_ctx->rr[4] = 0x30; | ||
1205 | p_ctx->rr[5] = 0x30; | ||
1206 | p_ctx->rr[7] = 0x30; | ||
1207 | |||
1208 | /*Initilize branch register 0*/ | ||
1209 | p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry; | ||
1210 | |||
1211 | vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr; | ||
1212 | vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0; | ||
1213 | vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4; | ||
1214 | |||
1215 | hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
1216 | vcpu->arch.hlt_timer.function = hlt_timer_fn; | ||
1217 | |||
1218 | vcpu->arch.last_run_cpu = -1; | ||
1219 | vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id); | ||
1220 | vcpu->arch.vsa_base = kvm_vsa_base; | ||
1221 | vcpu->arch.__gp = kvm_vmm_gp; | ||
1222 | vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock); | ||
1223 | vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id); | ||
1224 | vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id); | ||
1225 | init_ptce_info(vcpu); | ||
1226 | |||
1227 | r = 0; | ||
1228 | out: | ||
1229 | return r; | ||
1230 | } | ||
1231 | |||
1232 | static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id) | ||
1233 | { | ||
1234 | unsigned long psr; | ||
1235 | int r; | ||
1236 | |||
1237 | local_irq_save(psr); | ||
1238 | r = kvm_insert_vmm_mapping(vcpu); | ||
1239 | if (r) | ||
1240 | goto fail; | ||
1241 | r = kvm_vcpu_init(vcpu, vcpu->kvm, id); | ||
1242 | if (r) | ||
1243 | goto fail; | ||
1244 | |||
1245 | r = vti_init_vpd(vcpu); | ||
1246 | if (r) { | ||
1247 | printk(KERN_DEBUG"kvm: vpd init error!!\n"); | ||
1248 | goto uninit; | ||
1249 | } | ||
1250 | |||
1251 | r = vti_create_vp(vcpu); | ||
1252 | if (r) | ||
1253 | goto uninit; | ||
1254 | |||
1255 | kvm_purge_vmm_mapping(vcpu); | ||
1256 | local_irq_restore(psr); | ||
1257 | |||
1258 | return 0; | ||
1259 | uninit: | ||
1260 | kvm_vcpu_uninit(vcpu); | ||
1261 | fail: | ||
1262 | return r; | ||
1263 | } | ||
1264 | |||
1265 | struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, | ||
1266 | unsigned int id) | ||
1267 | { | ||
1268 | struct kvm_vcpu *vcpu; | ||
1269 | unsigned long vm_base = kvm->arch.vm_base; | ||
1270 | int r; | ||
1271 | int cpu; | ||
1272 | |||
1273 | r = -ENOMEM; | ||
1274 | if (!vm_base) { | ||
1275 | printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id); | ||
1276 | goto fail; | ||
1277 | } | ||
1278 | vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id); | ||
1279 | vcpu->kvm = kvm; | ||
1280 | |||
1281 | cpu = get_cpu(); | ||
1282 | vti_vcpu_load(vcpu, cpu); | ||
1283 | r = vti_vcpu_setup(vcpu, id); | ||
1284 | put_cpu(); | ||
1285 | |||
1286 | if (r) { | ||
1287 | printk(KERN_DEBUG"kvm: vcpu_setup error!!\n"); | ||
1288 | goto fail; | ||
1289 | } | ||
1290 | |||
1291 | return vcpu; | ||
1292 | fail: | ||
1293 | return ERR_PTR(r); | ||
1294 | } | ||
1295 | |||
1296 | int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | ||
1297 | { | ||
1298 | return 0; | ||
1299 | } | ||
1300 | |||
1301 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | ||
1302 | { | ||
1303 | return -EINVAL; | ||
1304 | } | ||
1305 | |||
1306 | int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | ||
1307 | { | ||
1308 | return -EINVAL; | ||
1309 | } | ||
1310 | |||
1311 | int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, | ||
1312 | struct kvm_debug_guest *dbg) | ||
1313 | { | ||
1314 | return -EINVAL; | ||
1315 | } | ||
1316 | |||
1317 | static void free_kvm(struct kvm *kvm) | ||
1318 | { | ||
1319 | unsigned long vm_base = kvm->arch.vm_base; | ||
1320 | |||
1321 | if (vm_base) { | ||
1322 | memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); | ||
1323 | free_pages(vm_base, get_order(KVM_VM_DATA_SIZE)); | ||
1324 | } | ||
1325 | |||
1326 | } | ||
1327 | |||
1328 | static void kvm_release_vm_pages(struct kvm *kvm) | ||
1329 | { | ||
1330 | struct kvm_memory_slot *memslot; | ||
1331 | int i, j; | ||
1332 | unsigned long base_gfn; | ||
1333 | |||
1334 | for (i = 0; i < kvm->nmemslots; i++) { | ||
1335 | memslot = &kvm->memslots[i]; | ||
1336 | base_gfn = memslot->base_gfn; | ||
1337 | |||
1338 | for (j = 0; j < memslot->npages; j++) { | ||
1339 | if (memslot->rmap[j]) | ||
1340 | put_page((struct page *)memslot->rmap[j]); | ||
1341 | } | ||
1342 | } | ||
1343 | } | ||
1344 | |||
1345 | void kvm_arch_destroy_vm(struct kvm *kvm) | ||
1346 | { | ||
1347 | kfree(kvm->arch.vioapic); | ||
1348 | kvm_release_vm_pages(kvm); | ||
1349 | kvm_free_physmem(kvm); | ||
1350 | free_kvm(kvm); | ||
1351 | } | ||
1352 | |||
1353 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | ||
1354 | { | ||
1355 | } | ||
1356 | |||
1357 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | ||
1358 | { | ||
1359 | if (cpu != vcpu->cpu) { | ||
1360 | vcpu->cpu = cpu; | ||
1361 | if (vcpu->arch.ht_active) | ||
1362 | kvm_migrate_hlt_timer(vcpu); | ||
1363 | } | ||
1364 | } | ||
1365 | |||
1366 | #define SAVE_REGS(_x) regs->_x = vcpu->arch._x | ||
1367 | |||
1368 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | ||
1369 | { | ||
1370 | int i; | ||
1371 | int r; | ||
1372 | struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); | ||
1373 | vcpu_load(vcpu); | ||
1374 | |||
1375 | for (i = 0; i < 16; i++) { | ||
1376 | regs->vpd.vgr[i] = vpd->vgr[i]; | ||
1377 | regs->vpd.vbgr[i] = vpd->vbgr[i]; | ||
1378 | } | ||
1379 | for (i = 0; i < 128; i++) | ||
1380 | regs->vpd.vcr[i] = vpd->vcr[i]; | ||
1381 | regs->vpd.vhpi = vpd->vhpi; | ||
1382 | regs->vpd.vnat = vpd->vnat; | ||
1383 | regs->vpd.vbnat = vpd->vbnat; | ||
1384 | regs->vpd.vpsr = vpd->vpsr; | ||
1385 | regs->vpd.vpr = vpd->vpr; | ||
1386 | |||
1387 | r = -EFAULT; | ||
1388 | r = copy_to_user(regs->saved_guest, &vcpu->arch.guest, | ||
1389 | sizeof(union context)); | ||
1390 | if (r) | ||
1391 | goto out; | ||
1392 | r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET); | ||
1393 | if (r) | ||
1394 | goto out; | ||
1395 | SAVE_REGS(mp_state); | ||
1396 | SAVE_REGS(vmm_rr); | ||
1397 | memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS); | ||
1398 | memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS); | ||
1399 | SAVE_REGS(itr_regions); | ||
1400 | SAVE_REGS(dtr_regions); | ||
1401 | SAVE_REGS(tc_regions); | ||
1402 | SAVE_REGS(irq_check); | ||
1403 | SAVE_REGS(itc_check); | ||
1404 | SAVE_REGS(timer_check); | ||
1405 | SAVE_REGS(timer_pending); | ||
1406 | SAVE_REGS(last_itc); | ||
1407 | for (i = 0; i < 8; i++) { | ||
1408 | regs->vrr[i] = vcpu->arch.vrr[i]; | ||
1409 | regs->ibr[i] = vcpu->arch.ibr[i]; | ||
1410 | regs->dbr[i] = vcpu->arch.dbr[i]; | ||
1411 | } | ||
1412 | for (i = 0; i < 4; i++) | ||
1413 | regs->insvc[i] = vcpu->arch.insvc[i]; | ||
1414 | regs->saved_itc = vcpu->arch.itc_offset + ia64_getreg(_IA64_REG_AR_ITC); | ||
1415 | SAVE_REGS(xtp); | ||
1416 | SAVE_REGS(metaphysical_rr0); | ||
1417 | SAVE_REGS(metaphysical_rr4); | ||
1418 | SAVE_REGS(metaphysical_saved_rr0); | ||
1419 | SAVE_REGS(metaphysical_saved_rr4); | ||
1420 | SAVE_REGS(fp_psr); | ||
1421 | SAVE_REGS(saved_gp); | ||
1422 | vcpu_put(vcpu); | ||
1423 | r = 0; | ||
1424 | out: | ||
1425 | return r; | ||
1426 | } | ||
1427 | |||
1428 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | ||
1429 | { | ||
1430 | |||
1431 | hrtimer_cancel(&vcpu->arch.hlt_timer); | ||
1432 | kfree(vcpu->arch.apic); | ||
1433 | } | ||
1434 | |||
1435 | |||
1436 | long kvm_arch_vcpu_ioctl(struct file *filp, | ||
1437 | unsigned int ioctl, unsigned long arg) | ||
1438 | { | ||
1439 | return -EINVAL; | ||
1440 | } | ||
1441 | |||
1442 | int kvm_arch_set_memory_region(struct kvm *kvm, | ||
1443 | struct kvm_userspace_memory_region *mem, | ||
1444 | struct kvm_memory_slot old, | ||
1445 | int user_alloc) | ||
1446 | { | ||
1447 | unsigned long i; | ||
1448 | struct page *page; | ||
1449 | int npages = mem->memory_size >> PAGE_SHIFT; | ||
1450 | struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; | ||
1451 | unsigned long base_gfn = memslot->base_gfn; | ||
1452 | |||
1453 | for (i = 0; i < npages; i++) { | ||
1454 | page = gfn_to_page(kvm, base_gfn + i); | ||
1455 | kvm_set_pmt_entry(kvm, base_gfn + i, | ||
1456 | page_to_pfn(page) << PAGE_SHIFT, | ||
1457 | _PAGE_AR_RWX|_PAGE_MA_WB); | ||
1458 | memslot->rmap[i] = (unsigned long)page; | ||
1459 | } | ||
1460 | |||
1461 | return 0; | ||
1462 | } | ||
1463 | |||
1464 | |||
1465 | long kvm_arch_dev_ioctl(struct file *filp, | ||
1466 | unsigned int ioctl, unsigned long arg) | ||
1467 | { | ||
1468 | return -EINVAL; | ||
1469 | } | ||
1470 | |||
1471 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | ||
1472 | { | ||
1473 | kvm_vcpu_uninit(vcpu); | ||
1474 | } | ||
1475 | |||
1476 | static int vti_cpu_has_kvm_support(void) | ||
1477 | { | ||
1478 | long avail = 1, status = 1, control = 1; | ||
1479 | long ret; | ||
1480 | |||
1481 | ret = ia64_pal_proc_get_features(&avail, &status, &control, 0); | ||
1482 | if (ret) | ||
1483 | goto out; | ||
1484 | |||
1485 | if (!(avail & PAL_PROC_VM_BIT)) | ||
1486 | goto out; | ||
1487 | |||
1488 | printk(KERN_DEBUG"kvm: Hardware Supports VT\n"); | ||
1489 | |||
1490 | ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info); | ||
1491 | if (ret) | ||
1492 | goto out; | ||
1493 | printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size); | ||
1494 | |||
1495 | if (!(vp_env_info & VP_OPCODE)) { | ||
1496 | printk(KERN_WARNING"kvm: No opcode ability on hardware, " | ||
1497 | "vm_env_info:0x%lx\n", vp_env_info); | ||
1498 | } | ||
1499 | |||
1500 | return 1; | ||
1501 | out: | ||
1502 | return 0; | ||
1503 | } | ||
1504 | |||
1505 | static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info, | ||
1506 | struct module *module) | ||
1507 | { | ||
1508 | unsigned long module_base; | ||
1509 | unsigned long vmm_size; | ||
1510 | |||
1511 | unsigned long vmm_offset, func_offset, fdesc_offset; | ||
1512 | struct fdesc *p_fdesc; | ||
1513 | |||
1514 | BUG_ON(!module); | ||
1515 | |||
1516 | if (!kvm_vmm_base) { | ||
1517 | printk("kvm: kvm area hasn't been initilized yet!!\n"); | ||
1518 | return -EFAULT; | ||
1519 | } | ||
1520 | |||
1521 | /*Calculate new position of relocated vmm module.*/ | ||
1522 | module_base = (unsigned long)module->module_core; | ||
1523 | vmm_size = module->core_size; | ||
1524 | if (unlikely(vmm_size > KVM_VMM_SIZE)) | ||
1525 | return -EFAULT; | ||
1526 | |||
1527 | memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size); | ||
1528 | kvm_flush_icache(kvm_vmm_base, vmm_size); | ||
1529 | |||
1530 | /*Recalculate kvm_vmm_info based on new VMM*/ | ||
1531 | vmm_offset = vmm_info->vmm_ivt - module_base; | ||
1532 | kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset; | ||
1533 | printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n", | ||
1534 | kvm_vmm_info->vmm_ivt); | ||
1535 | |||
1536 | fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base; | ||
1537 | kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE + | ||
1538 | fdesc_offset); | ||
1539 | func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base; | ||
1540 | p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset); | ||
1541 | p_fdesc->ip = KVM_VMM_BASE + func_offset; | ||
1542 | p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base); | ||
1543 | |||
1544 | printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n", | ||
1545 | KVM_VMM_BASE+func_offset); | ||
1546 | |||
1547 | fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base; | ||
1548 | kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE + | ||
1549 | fdesc_offset); | ||
1550 | func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base; | ||
1551 | p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset); | ||
1552 | p_fdesc->ip = KVM_VMM_BASE + func_offset; | ||
1553 | p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base); | ||
1554 | |||
1555 | kvm_vmm_gp = p_fdesc->gp; | ||
1556 | |||
1557 | printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n", | ||
1558 | kvm_vmm_info->vmm_entry); | ||
1559 | printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n", | ||
1560 | KVM_VMM_BASE + func_offset); | ||
1561 | |||
1562 | return 0; | ||
1563 | } | ||
1564 | |||
1565 | int kvm_arch_init(void *opaque) | ||
1566 | { | ||
1567 | int r; | ||
1568 | struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque; | ||
1569 | |||
1570 | if (!vti_cpu_has_kvm_support()) { | ||
1571 | printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n"); | ||
1572 | r = -EOPNOTSUPP; | ||
1573 | goto out; | ||
1574 | } | ||
1575 | |||
1576 | if (kvm_vmm_info) { | ||
1577 | printk(KERN_ERR "kvm: Already loaded VMM module!\n"); | ||
1578 | r = -EEXIST; | ||
1579 | goto out; | ||
1580 | } | ||
1581 | |||
1582 | r = -ENOMEM; | ||
1583 | kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL); | ||
1584 | if (!kvm_vmm_info) | ||
1585 | goto out; | ||
1586 | |||
1587 | if (kvm_alloc_vmm_area()) | ||
1588 | goto out_free0; | ||
1589 | |||
1590 | r = kvm_relocate_vmm(vmm_info, vmm_info->module); | ||
1591 | if (r) | ||
1592 | goto out_free1; | ||
1593 | |||
1594 | return 0; | ||
1595 | |||
1596 | out_free1: | ||
1597 | kvm_free_vmm_area(); | ||
1598 | out_free0: | ||
1599 | kfree(kvm_vmm_info); | ||
1600 | out: | ||
1601 | return r; | ||
1602 | } | ||
1603 | |||
1604 | void kvm_arch_exit(void) | ||
1605 | { | ||
1606 | kvm_free_vmm_area(); | ||
1607 | kfree(kvm_vmm_info); | ||
1608 | kvm_vmm_info = NULL; | ||
1609 | } | ||
1610 | |||
1611 | static int kvm_ia64_sync_dirty_log(struct kvm *kvm, | ||
1612 | struct kvm_dirty_log *log) | ||
1613 | { | ||
1614 | struct kvm_memory_slot *memslot; | ||
1615 | int r, i; | ||
1616 | long n, base; | ||
1617 | unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS | ||
1618 | + KVM_MEM_DIRTY_LOG_OFS); | ||
1619 | |||
1620 | r = -EINVAL; | ||
1621 | if (log->slot >= KVM_MEMORY_SLOTS) | ||
1622 | goto out; | ||
1623 | |||
1624 | memslot = &kvm->memslots[log->slot]; | ||
1625 | r = -ENOENT; | ||
1626 | if (!memslot->dirty_bitmap) | ||
1627 | goto out; | ||
1628 | |||
1629 | n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; | ||
1630 | base = memslot->base_gfn / BITS_PER_LONG; | ||
1631 | |||
1632 | for (i = 0; i < n/sizeof(long); ++i) { | ||
1633 | memslot->dirty_bitmap[i] = dirty_bitmap[base + i]; | ||
1634 | dirty_bitmap[base + i] = 0; | ||
1635 | } | ||
1636 | r = 0; | ||
1637 | out: | ||
1638 | return r; | ||
1639 | } | ||
1640 | |||
1641 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | ||
1642 | struct kvm_dirty_log *log) | ||
1643 | { | ||
1644 | int r; | ||
1645 | int n; | ||
1646 | struct kvm_memory_slot *memslot; | ||
1647 | int is_dirty = 0; | ||
1648 | |||
1649 | spin_lock(&kvm->arch.dirty_log_lock); | ||
1650 | |||
1651 | r = kvm_ia64_sync_dirty_log(kvm, log); | ||
1652 | if (r) | ||
1653 | goto out; | ||
1654 | |||
1655 | r = kvm_get_dirty_log(kvm, log, &is_dirty); | ||
1656 | if (r) | ||
1657 | goto out; | ||
1658 | |||
1659 | /* If nothing is dirty, don't bother messing with page tables. */ | ||
1660 | if (is_dirty) { | ||
1661 | kvm_flush_remote_tlbs(kvm); | ||
1662 | memslot = &kvm->memslots[log->slot]; | ||
1663 | n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; | ||
1664 | memset(memslot->dirty_bitmap, 0, n); | ||
1665 | } | ||
1666 | r = 0; | ||
1667 | out: | ||
1668 | spin_unlock(&kvm->arch.dirty_log_lock); | ||
1669 | return r; | ||
1670 | } | ||
1671 | |||
1672 | int kvm_arch_hardware_setup(void) | ||
1673 | { | ||
1674 | return 0; | ||
1675 | } | ||
1676 | |||
1677 | void kvm_arch_hardware_unsetup(void) | ||
1678 | { | ||
1679 | } | ||
1680 | |||
1681 | static void vcpu_kick_intr(void *info) | ||
1682 | { | ||
1683 | #ifdef DEBUG | ||
1684 | struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info; | ||
1685 | printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu); | ||
1686 | #endif | ||
1687 | } | ||
1688 | |||
1689 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu) | ||
1690 | { | ||
1691 | int ipi_pcpu = vcpu->cpu; | ||
1692 | |||
1693 | if (waitqueue_active(&vcpu->wq)) | ||
1694 | wake_up_interruptible(&vcpu->wq); | ||
1695 | |||
1696 | if (vcpu->guest_mode) | ||
1697 | smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); | ||
1698 | } | ||
1699 | |||
1700 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig) | ||
1701 | { | ||
1702 | |||
1703 | struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); | ||
1704 | |||
1705 | if (!test_and_set_bit(vec, &vpd->irr[0])) { | ||
1706 | vcpu->arch.irq_new_pending = 1; | ||
1707 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) | ||
1708 | kvm_vcpu_kick(vcpu); | ||
1709 | else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) { | ||
1710 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
1711 | if (waitqueue_active(&vcpu->wq)) | ||
1712 | wake_up_interruptible(&vcpu->wq); | ||
1713 | } | ||
1714 | return 1; | ||
1715 | } | ||
1716 | return 0; | ||
1717 | } | ||
1718 | |||
1719 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) | ||
1720 | { | ||
1721 | return apic->vcpu->vcpu_id == dest; | ||
1722 | } | ||
1723 | |||
1724 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) | ||
1725 | { | ||
1726 | return 0; | ||
1727 | } | ||
1728 | |||
1729 | struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, | ||
1730 | unsigned long bitmap) | ||
1731 | { | ||
1732 | struct kvm_vcpu *lvcpu = kvm->vcpus[0]; | ||
1733 | int i; | ||
1734 | |||
1735 | for (i = 1; i < KVM_MAX_VCPUS; i++) { | ||
1736 | if (!kvm->vcpus[i]) | ||
1737 | continue; | ||
1738 | if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp) | ||
1739 | lvcpu = kvm->vcpus[i]; | ||
1740 | } | ||
1741 | |||
1742 | return lvcpu; | ||
1743 | } | ||
1744 | |||
1745 | static int find_highest_bits(int *dat) | ||
1746 | { | ||
1747 | u32 bits, bitnum; | ||
1748 | int i; | ||
1749 | |||
1750 | /* loop for all 256 bits */ | ||
1751 | for (i = 7; i >= 0 ; i--) { | ||
1752 | bits = dat[i]; | ||
1753 | if (bits) { | ||
1754 | bitnum = fls(bits); | ||
1755 | return i * 32 + bitnum - 1; | ||
1756 | } | ||
1757 | } | ||
1758 | |||
1759 | return -1; | ||
1760 | } | ||
1761 | |||
1762 | int kvm_highest_pending_irq(struct kvm_vcpu *vcpu) | ||
1763 | { | ||
1764 | struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); | ||
1765 | |||
1766 | if (vpd->irr[0] & (1UL << NMI_VECTOR)) | ||
1767 | return NMI_VECTOR; | ||
1768 | if (vpd->irr[0] & (1UL << ExtINT_VECTOR)) | ||
1769 | return ExtINT_VECTOR; | ||
1770 | |||
1771 | return find_highest_bits((int *)&vpd->irr[0]); | ||
1772 | } | ||
1773 | |||
1774 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) | ||
1775 | { | ||
1776 | if (kvm_highest_pending_irq(vcpu) != -1) | ||
1777 | return 1; | ||
1778 | return 0; | ||
1779 | } | ||
1780 | |||
1781 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | ||
1782 | { | ||
1783 | return 0; | ||
1784 | } | ||
1785 | |||
1786 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | ||
1787 | { | ||
1788 | return gfn; | ||
1789 | } | ||
1790 | |||
1791 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | ||
1792 | { | ||
1793 | return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE; | ||
1794 | } | ||
1795 | |||
1796 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | ||
1797 | struct kvm_mp_state *mp_state) | ||
1798 | { | ||
1799 | return -EINVAL; | ||
1800 | } | ||
1801 | |||
1802 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | ||
1803 | struct kvm_mp_state *mp_state) | ||
1804 | { | ||
1805 | return -EINVAL; | ||
1806 | } | ||
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c new file mode 100644 index 000000000000..091f936c4485 --- /dev/null +++ b/arch/ia64/kvm/kvm_fw.c | |||
@@ -0,0 +1,500 @@ | |||
1 | /* | ||
2 | * PAL/SAL call delegation | ||
3 | * | ||
4 | * Copyright (c) 2004 Li Susie <susie.li@intel.com> | ||
5 | * Copyright (c) 2005 Yu Ke <ke.yu@intel.com> | ||
6 | * Copyright (c) 2007 Xiantao Zhang <xiantao.zhang@intel.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
19 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
20 | */ | ||
21 | |||
22 | #include <linux/kvm_host.h> | ||
23 | #include <linux/smp.h> | ||
24 | |||
25 | #include "vti.h" | ||
26 | #include "misc.h" | ||
27 | |||
28 | #include <asm/pal.h> | ||
29 | #include <asm/sal.h> | ||
30 | #include <asm/tlb.h> | ||
31 | |||
32 | /* | ||
33 | * Handy macros to make sure that the PAL return values start out | ||
34 | * as something meaningful. | ||
35 | */ | ||
36 | #define INIT_PAL_STATUS_UNIMPLEMENTED(x) \ | ||
37 | { \ | ||
38 | x.status = PAL_STATUS_UNIMPLEMENTED; \ | ||
39 | x.v0 = 0; \ | ||
40 | x.v1 = 0; \ | ||
41 | x.v2 = 0; \ | ||
42 | } | ||
43 | |||
44 | #define INIT_PAL_STATUS_SUCCESS(x) \ | ||
45 | { \ | ||
46 | x.status = PAL_STATUS_SUCCESS; \ | ||
47 | x.v0 = 0; \ | ||
48 | x.v1 = 0; \ | ||
49 | x.v2 = 0; \ | ||
50 | } | ||
51 | |||
52 | static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu, | ||
53 | u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) { | ||
54 | struct exit_ctl_data *p; | ||
55 | |||
56 | if (vcpu) { | ||
57 | p = &vcpu->arch.exit_data; | ||
58 | if (p->exit_reason == EXIT_REASON_PAL_CALL) { | ||
59 | *gr28 = p->u.pal_data.gr28; | ||
60 | *gr29 = p->u.pal_data.gr29; | ||
61 | *gr30 = p->u.pal_data.gr30; | ||
62 | *gr31 = p->u.pal_data.gr31; | ||
63 | return ; | ||
64 | } | ||
65 | } | ||
66 | printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n"); | ||
67 | } | ||
68 | |||
69 | static void set_pal_result(struct kvm_vcpu *vcpu, | ||
70 | struct ia64_pal_retval result) { | ||
71 | |||
72 | struct exit_ctl_data *p; | ||
73 | |||
74 | p = kvm_get_exit_data(vcpu); | ||
75 | if (p && p->exit_reason == EXIT_REASON_PAL_CALL) { | ||
76 | p->u.pal_data.ret = result; | ||
77 | return ; | ||
78 | } | ||
79 | INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret); | ||
80 | } | ||
81 | |||
82 | static void set_sal_result(struct kvm_vcpu *vcpu, | ||
83 | struct sal_ret_values result) { | ||
84 | struct exit_ctl_data *p; | ||
85 | |||
86 | p = kvm_get_exit_data(vcpu); | ||
87 | if (p && p->exit_reason == EXIT_REASON_SAL_CALL) { | ||
88 | p->u.sal_data.ret = result; | ||
89 | return ; | ||
90 | } | ||
91 | printk(KERN_WARNING"Failed to set sal result!!\n"); | ||
92 | } | ||
93 | |||
94 | struct cache_flush_args { | ||
95 | u64 cache_type; | ||
96 | u64 operation; | ||
97 | u64 progress; | ||
98 | long status; | ||
99 | }; | ||
100 | |||
101 | cpumask_t cpu_cache_coherent_map; | ||
102 | |||
103 | static void remote_pal_cache_flush(void *data) | ||
104 | { | ||
105 | struct cache_flush_args *args = data; | ||
106 | long status; | ||
107 | u64 progress = args->progress; | ||
108 | |||
109 | status = ia64_pal_cache_flush(args->cache_type, args->operation, | ||
110 | &progress, NULL); | ||
111 | if (status != 0) | ||
112 | args->status = status; | ||
113 | } | ||
114 | |||
115 | static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu) | ||
116 | { | ||
117 | u64 gr28, gr29, gr30, gr31; | ||
118 | struct ia64_pal_retval result = {0, 0, 0, 0}; | ||
119 | struct cache_flush_args args = {0, 0, 0, 0}; | ||
120 | long psr; | ||
121 | |||
122 | gr28 = gr29 = gr30 = gr31 = 0; | ||
123 | kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31); | ||
124 | |||
125 | if (gr31 != 0) | ||
126 | printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu); | ||
127 | |||
128 | /* Always call Host Pal in int=1 */ | ||
129 | gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS; | ||
130 | args.cache_type = gr29; | ||
131 | args.operation = gr30; | ||
132 | smp_call_function(remote_pal_cache_flush, | ||
133 | (void *)&args, 1, 1); | ||
134 | if (args.status != 0) | ||
135 | printk(KERN_ERR"pal_cache_flush error!," | ||
136 | "status:0x%lx\n", args.status); | ||
137 | /* | ||
138 | * Call Host PAL cache flush | ||
139 | * Clear psr.ic when call PAL_CACHE_FLUSH | ||
140 | */ | ||
141 | local_irq_save(psr); | ||
142 | result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1, | ||
143 | &result.v0); | ||
144 | local_irq_restore(psr); | ||
145 | if (result.status != 0) | ||
146 | printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld" | ||
147 | "in1:%lx,in2:%lx\n", | ||
148 | vcpu, result.status, gr29, gr30); | ||
149 | |||
150 | #if 0 | ||
151 | if (gr29 == PAL_CACHE_TYPE_COHERENT) { | ||
152 | cpus_setall(vcpu->arch.cache_coherent_map); | ||
153 | cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map); | ||
154 | cpus_setall(cpu_cache_coherent_map); | ||
155 | cpu_clear(vcpu->cpu, cpu_cache_coherent_map); | ||
156 | } | ||
157 | #endif | ||
158 | return result; | ||
159 | } | ||
160 | |||
161 | struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu) | ||
162 | { | ||
163 | |||
164 | struct ia64_pal_retval result; | ||
165 | |||
166 | PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0); | ||
167 | return result; | ||
168 | } | ||
169 | |||
170 | static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu) | ||
171 | { | ||
172 | |||
173 | struct ia64_pal_retval result; | ||
174 | |||
175 | PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0); | ||
176 | |||
177 | /* | ||
178 | * PAL_FREQ_BASE may not be implemented in some platforms, | ||
179 | * call SAL instead. | ||
180 | */ | ||
181 | if (result.v0 == 0) { | ||
182 | result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, | ||
183 | &result.v0, | ||
184 | &result.v1); | ||
185 | result.v2 = 0; | ||
186 | } | ||
187 | |||
188 | return result; | ||
189 | } | ||
190 | |||
191 | static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu) | ||
192 | { | ||
193 | |||
194 | struct ia64_pal_retval result; | ||
195 | |||
196 | PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0); | ||
197 | return result; | ||
198 | } | ||
199 | |||
200 | static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu) | ||
201 | { | ||
202 | struct ia64_pal_retval result; | ||
203 | |||
204 | INIT_PAL_STATUS_UNIMPLEMENTED(result); | ||
205 | return result; | ||
206 | } | ||
207 | |||
208 | static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu) | ||
209 | { | ||
210 | |||
211 | struct ia64_pal_retval result; | ||
212 | |||
213 | INIT_PAL_STATUS_SUCCESS(result); | ||
214 | return result; | ||
215 | } | ||
216 | |||
217 | static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu) | ||
218 | { | ||
219 | |||
220 | struct ia64_pal_retval result = {0, 0, 0, 0}; | ||
221 | long in0, in1, in2, in3; | ||
222 | |||
223 | kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); | ||
224 | result.status = ia64_pal_proc_get_features(&result.v0, &result.v1, | ||
225 | &result.v2, in2); | ||
226 | |||
227 | return result; | ||
228 | } | ||
229 | |||
230 | static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu) | ||
231 | { | ||
232 | |||
233 | pal_cache_config_info_t ci; | ||
234 | long status; | ||
235 | unsigned long in0, in1, in2, in3, r9, r10; | ||
236 | |||
237 | kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); | ||
238 | status = ia64_pal_cache_config_info(in1, in2, &ci); | ||
239 | r9 = ci.pcci_info_1.pcci1_data; | ||
240 | r10 = ci.pcci_info_2.pcci2_data; | ||
241 | return ((struct ia64_pal_retval){status, r9, r10, 0}); | ||
242 | } | ||
243 | |||
244 | #define GUEST_IMPL_VA_MSB 59 | ||
245 | #define GUEST_RID_BITS 18 | ||
246 | |||
247 | static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu) | ||
248 | { | ||
249 | |||
250 | pal_vm_info_1_u_t vminfo1; | ||
251 | pal_vm_info_2_u_t vminfo2; | ||
252 | struct ia64_pal_retval result; | ||
253 | |||
254 | PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0); | ||
255 | if (!result.status) { | ||
256 | vminfo1.pvi1_val = result.v0; | ||
257 | vminfo1.pal_vm_info_1_s.max_itr_entry = 8; | ||
258 | vminfo1.pal_vm_info_1_s.max_dtr_entry = 8; | ||
259 | result.v0 = vminfo1.pvi1_val; | ||
260 | vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB; | ||
261 | vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS; | ||
262 | result.v1 = vminfo2.pvi2_val; | ||
263 | } | ||
264 | |||
265 | return result; | ||
266 | } | ||
267 | |||
268 | static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu) | ||
269 | { | ||
270 | struct ia64_pal_retval result; | ||
271 | |||
272 | INIT_PAL_STATUS_UNIMPLEMENTED(result); | ||
273 | |||
274 | return result; | ||
275 | } | ||
276 | |||
277 | static u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu) | ||
278 | { | ||
279 | u64 index = 0; | ||
280 | struct exit_ctl_data *p; | ||
281 | |||
282 | p = kvm_get_exit_data(vcpu); | ||
283 | if (p && (p->exit_reason == EXIT_REASON_PAL_CALL)) | ||
284 | index = p->u.pal_data.gr28; | ||
285 | |||
286 | return index; | ||
287 | } | ||
288 | |||
289 | int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run) | ||
290 | { | ||
291 | |||
292 | u64 gr28; | ||
293 | struct ia64_pal_retval result; | ||
294 | int ret = 1; | ||
295 | |||
296 | gr28 = kvm_get_pal_call_index(vcpu); | ||
297 | /*printk("pal_call index:%lx\n",gr28);*/ | ||
298 | switch (gr28) { | ||
299 | case PAL_CACHE_FLUSH: | ||
300 | result = pal_cache_flush(vcpu); | ||
301 | break; | ||
302 | case PAL_CACHE_SUMMARY: | ||
303 | result = pal_cache_summary(vcpu); | ||
304 | break; | ||
305 | case PAL_HALT_LIGHT: | ||
306 | { | ||
307 | vcpu->arch.timer_pending = 1; | ||
308 | INIT_PAL_STATUS_SUCCESS(result); | ||
309 | if (kvm_highest_pending_irq(vcpu) == -1) | ||
310 | ret = kvm_emulate_halt(vcpu); | ||
311 | |||
312 | } | ||
313 | break; | ||
314 | |||
315 | case PAL_FREQ_RATIOS: | ||
316 | result = pal_freq_ratios(vcpu); | ||
317 | break; | ||
318 | |||
319 | case PAL_FREQ_BASE: | ||
320 | result = pal_freq_base(vcpu); | ||
321 | break; | ||
322 | |||
323 | case PAL_LOGICAL_TO_PHYSICAL : | ||
324 | result = pal_logical_to_physica(vcpu); | ||
325 | break; | ||
326 | |||
327 | case PAL_VM_SUMMARY : | ||
328 | result = pal_vm_summary(vcpu); | ||
329 | break; | ||
330 | |||
331 | case PAL_VM_INFO : | ||
332 | result = pal_vm_info(vcpu); | ||
333 | break; | ||
334 | case PAL_PLATFORM_ADDR : | ||
335 | result = pal_platform_addr(vcpu); | ||
336 | break; | ||
337 | case PAL_CACHE_INFO: | ||
338 | result = pal_cache_info(vcpu); | ||
339 | break; | ||
340 | case PAL_PTCE_INFO: | ||
341 | INIT_PAL_STATUS_SUCCESS(result); | ||
342 | result.v1 = (1L << 32) | 1L; | ||
343 | break; | ||
344 | case PAL_VM_PAGE_SIZE: | ||
345 | result.status = ia64_pal_vm_page_size(&result.v0, | ||
346 | &result.v1); | ||
347 | break; | ||
348 | case PAL_RSE_INFO: | ||
349 | result.status = ia64_pal_rse_info(&result.v0, | ||
350 | (pal_hints_u_t *)&result.v1); | ||
351 | break; | ||
352 | case PAL_PROC_GET_FEATURES: | ||
353 | result = pal_proc_get_features(vcpu); | ||
354 | break; | ||
355 | case PAL_DEBUG_INFO: | ||
356 | result.status = ia64_pal_debug_info(&result.v0, | ||
357 | &result.v1); | ||
358 | break; | ||
359 | case PAL_VERSION: | ||
360 | result.status = ia64_pal_version( | ||
361 | (pal_version_u_t *)&result.v0, | ||
362 | (pal_version_u_t *)&result.v1); | ||
363 | |||
364 | break; | ||
365 | case PAL_FIXED_ADDR: | ||
366 | result.status = PAL_STATUS_SUCCESS; | ||
367 | result.v0 = vcpu->vcpu_id; | ||
368 | break; | ||
369 | default: | ||
370 | INIT_PAL_STATUS_UNIMPLEMENTED(result); | ||
371 | printk(KERN_WARNING"kvm: Unsupported pal call," | ||
372 | " index:0x%lx\n", gr28); | ||
373 | } | ||
374 | set_pal_result(vcpu, result); | ||
375 | return ret; | ||
376 | } | ||
377 | |||
378 | static struct sal_ret_values sal_emulator(struct kvm *kvm, | ||
379 | long index, unsigned long in1, | ||
380 | unsigned long in2, unsigned long in3, | ||
381 | unsigned long in4, unsigned long in5, | ||
382 | unsigned long in6, unsigned long in7) | ||
383 | { | ||
384 | unsigned long r9 = 0; | ||
385 | unsigned long r10 = 0; | ||
386 | long r11 = 0; | ||
387 | long status; | ||
388 | |||
389 | status = 0; | ||
390 | switch (index) { | ||
391 | case SAL_FREQ_BASE: | ||
392 | status = ia64_sal_freq_base(in1, &r9, &r10); | ||
393 | break; | ||
394 | case SAL_PCI_CONFIG_READ: | ||
395 | printk(KERN_WARNING"kvm: Not allowed to call here!" | ||
396 | " SAL_PCI_CONFIG_READ\n"); | ||
397 | break; | ||
398 | case SAL_PCI_CONFIG_WRITE: | ||
399 | printk(KERN_WARNING"kvm: Not allowed to call here!" | ||
400 | " SAL_PCI_CONFIG_WRITE\n"); | ||
401 | break; | ||
402 | case SAL_SET_VECTORS: | ||
403 | if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) { | ||
404 | if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) { | ||
405 | status = -2; | ||
406 | } else { | ||
407 | kvm->arch.rdv_sal_data.boot_ip = in2; | ||
408 | kvm->arch.rdv_sal_data.boot_gp = in3; | ||
409 | } | ||
410 | printk("Rendvous called! iip:%lx\n\n", in2); | ||
411 | } else | ||
412 | printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu." | ||
413 | "ignored...\n", in1); | ||
414 | break; | ||
415 | case SAL_GET_STATE_INFO: | ||
416 | /* No more info. */ | ||
417 | status = -5; | ||
418 | r9 = 0; | ||
419 | break; | ||
420 | case SAL_GET_STATE_INFO_SIZE: | ||
421 | /* Return a dummy size. */ | ||
422 | status = 0; | ||
423 | r9 = 128; | ||
424 | break; | ||
425 | case SAL_CLEAR_STATE_INFO: | ||
426 | /* Noop. */ | ||
427 | break; | ||
428 | case SAL_MC_RENDEZ: | ||
429 | printk(KERN_WARNING | ||
430 | "kvm: called SAL_MC_RENDEZ. ignored...\n"); | ||
431 | break; | ||
432 | case SAL_MC_SET_PARAMS: | ||
433 | printk(KERN_WARNING | ||
434 | "kvm: called SAL_MC_SET_PARAMS.ignored!\n"); | ||
435 | break; | ||
436 | case SAL_CACHE_FLUSH: | ||
437 | if (1) { | ||
438 | /*Flush using SAL. | ||
439 | This method is faster but has a side | ||
440 | effect on other vcpu running on | ||
441 | this cpu. */ | ||
442 | status = ia64_sal_cache_flush(in1); | ||
443 | } else { | ||
444 | /*Maybe need to implement the method | ||
445 | without side effect!*/ | ||
446 | status = 0; | ||
447 | } | ||
448 | break; | ||
449 | case SAL_CACHE_INIT: | ||
450 | printk(KERN_WARNING | ||
451 | "kvm: called SAL_CACHE_INIT. ignored...\n"); | ||
452 | break; | ||
453 | case SAL_UPDATE_PAL: | ||
454 | printk(KERN_WARNING | ||
455 | "kvm: CALLED SAL_UPDATE_PAL. ignored...\n"); | ||
456 | break; | ||
457 | default: | ||
458 | printk(KERN_WARNING"kvm: called SAL_CALL with unknown index." | ||
459 | " index:%ld\n", index); | ||
460 | status = -1; | ||
461 | break; | ||
462 | } | ||
463 | return ((struct sal_ret_values) {status, r9, r10, r11}); | ||
464 | } | ||
465 | |||
466 | static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1, | ||
467 | u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){ | ||
468 | |||
469 | struct exit_ctl_data *p; | ||
470 | |||
471 | p = kvm_get_exit_data(vcpu); | ||
472 | |||
473 | if (p) { | ||
474 | if (p->exit_reason == EXIT_REASON_SAL_CALL) { | ||
475 | *in0 = p->u.sal_data.in0; | ||
476 | *in1 = p->u.sal_data.in1; | ||
477 | *in2 = p->u.sal_data.in2; | ||
478 | *in3 = p->u.sal_data.in3; | ||
479 | *in4 = p->u.sal_data.in4; | ||
480 | *in5 = p->u.sal_data.in5; | ||
481 | *in6 = p->u.sal_data.in6; | ||
482 | *in7 = p->u.sal_data.in7; | ||
483 | return ; | ||
484 | } | ||
485 | } | ||
486 | *in0 = 0; | ||
487 | } | ||
488 | |||
489 | void kvm_sal_emul(struct kvm_vcpu *vcpu) | ||
490 | { | ||
491 | |||
492 | struct sal_ret_values result; | ||
493 | u64 index, in1, in2, in3, in4, in5, in6, in7; | ||
494 | |||
495 | kvm_get_sal_call_data(vcpu, &index, &in1, &in2, | ||
496 | &in3, &in4, &in5, &in6, &in7); | ||
497 | result = sal_emulator(vcpu->kvm, index, in1, in2, in3, | ||
498 | in4, in5, in6, in7); | ||
499 | set_sal_result(vcpu, result); | ||
500 | } | ||
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h new file mode 100644 index 000000000000..13980d9b8bcf --- /dev/null +++ b/arch/ia64/kvm/kvm_minstate.h | |||
@@ -0,0 +1,273 @@ | |||
1 | /* | ||
2 | * kvm_minstate.h: min save macros | ||
3 | * Copyright (c) 2007, Intel Corporation. | ||
4 | * | ||
5 | * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) | ||
6 | * Xiantao Zhang (xiantao.zhang@intel.com) | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
19 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | |||
24 | #include <asm/asmmacro.h> | ||
25 | #include <asm/types.h> | ||
26 | #include <asm/kregs.h> | ||
27 | #include "asm-offsets.h" | ||
28 | |||
29 | #define KVM_MINSTATE_START_SAVE_MIN \ | ||
30 | mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\ | ||
31 | ;; \ | ||
32 | mov.m r28 = ar.rnat; \ | ||
33 | addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \ | ||
34 | ;; \ | ||
35 | lfetch.fault.excl.nt1 [r22]; \ | ||
36 | addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ | ||
37 | mov r23 = ar.bspstore; /* save ar.bspstore */ \ | ||
38 | ;; \ | ||
39 | mov ar.bspstore = r22; /* switch to kernel RBS */\ | ||
40 | ;; \ | ||
41 | mov r18 = ar.bsp; \ | ||
42 | mov ar.rsc = 0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ | ||
43 | |||
44 | |||
45 | |||
46 | #define KVM_MINSTATE_END_SAVE_MIN \ | ||
47 | bsw.1; /* switch back to bank 1 (must be last in insn group) */\ | ||
48 | ;; | ||
49 | |||
50 | |||
51 | #define PAL_VSA_SYNC_READ \ | ||
52 | /* begin to call pal vps sync_read */ \ | ||
53 | add r25 = VMM_VPD_BASE_OFFSET, r21; \ | ||
54 | adds r20 = VMM_VCPU_VSA_BASE_OFFSET, r21; /* entry point */ \ | ||
55 | ;; \ | ||
56 | ld8 r25 = [r25]; /* read vpd base */ \ | ||
57 | ld8 r20 = [r20]; \ | ||
58 | ;; \ | ||
59 | add r20 = PAL_VPS_SYNC_READ,r20; \ | ||
60 | ;; \ | ||
61 | { .mii; \ | ||
62 | nop 0x0; \ | ||
63 | mov r24 = ip; \ | ||
64 | mov b0 = r20; \ | ||
65 | ;; \ | ||
66 | }; \ | ||
67 | { .mmb; \ | ||
68 | add r24 = 0x20, r24; \ | ||
69 | nop 0x0; \ | ||
70 | br.cond.sptk b0; /* call the service */ \ | ||
71 | ;; \ | ||
72 | }; | ||
73 | |||
74 | |||
75 | |||
76 | #define KVM_MINSTATE_GET_CURRENT(reg) mov reg=r21 | ||
77 | |||
78 | /* | ||
79 | * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves | ||
80 | * the minimum state necessary that allows us to turn psr.ic back | ||
81 | * on. | ||
82 | * | ||
83 | * Assumed state upon entry: | ||
84 | * psr.ic: off | ||
85 | * r31: contains saved predicates (pr) | ||
86 | * | ||
87 | * Upon exit, the state is as follows: | ||
88 | * psr.ic: off | ||
89 | * r2 = points to &pt_regs.r16 | ||
90 | * r8 = contents of ar.ccv | ||
91 | * r9 = contents of ar.csd | ||
92 | * r10 = contents of ar.ssd | ||
93 | * r11 = FPSR_DEFAULT | ||
94 | * r12 = kernel sp (kernel virtual address) | ||
95 | * r13 = points to current task_struct (kernel virtual address) | ||
96 | * p15 = TRUE if psr.i is set in cr.ipsr | ||
97 | * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15: | ||
98 | * preserved | ||
99 | * | ||
100 | * Note that psr.ic is NOT turned on by this macro. This is so that | ||
101 | * we can pass interruption state as arguments to a handler. | ||
102 | */ | ||
103 | |||
104 | |||
105 | #define PT(f) (VMM_PT_REGS_##f##_OFFSET) | ||
106 | |||
107 | #define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \ | ||
108 | KVM_MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \ | ||
109 | mov r27 = ar.rsc; /* M */ \ | ||
110 | mov r20 = r1; /* A */ \ | ||
111 | mov r25 = ar.unat; /* M */ \ | ||
112 | mov r29 = cr.ipsr; /* M */ \ | ||
113 | mov r26 = ar.pfs; /* I */ \ | ||
114 | mov r18 = cr.isr; \ | ||
115 | COVER; /* B;; (or nothing) */ \ | ||
116 | ;; \ | ||
117 | tbit.z p0,p15 = r29,IA64_PSR_I_BIT; \ | ||
118 | mov r1 = r16; \ | ||
119 | /* mov r21=r16; */ \ | ||
120 | /* switch from user to kernel RBS: */ \ | ||
121 | ;; \ | ||
122 | invala; /* M */ \ | ||
123 | SAVE_IFS; \ | ||
124 | ;; \ | ||
125 | KVM_MINSTATE_START_SAVE_MIN \ | ||
126 | adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */ \ | ||
127 | adds r16 = PT(CR_IPSR),r1; \ | ||
128 | ;; \ | ||
129 | lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \ | ||
130 | st8 [r16] = r29; /* save cr.ipsr */ \ | ||
131 | ;; \ | ||
132 | lfetch.fault.excl.nt1 [r17]; \ | ||
133 | tbit.nz p15,p0 = r29,IA64_PSR_I_BIT; \ | ||
134 | mov r29 = b0 \ | ||
135 | ;; \ | ||
136 | adds r16 = PT(R8),r1; /* initialize first base pointer */\ | ||
137 | adds r17 = PT(R9),r1; /* initialize second base pointer */\ | ||
138 | ;; \ | ||
139 | .mem.offset 0,0; st8.spill [r16] = r8,16; \ | ||
140 | .mem.offset 8,0; st8.spill [r17] = r9,16; \ | ||
141 | ;; \ | ||
142 | .mem.offset 0,0; st8.spill [r16] = r10,24; \ | ||
143 | .mem.offset 8,0; st8.spill [r17] = r11,24; \ | ||
144 | ;; \ | ||
145 | mov r9 = cr.iip; /* M */ \ | ||
146 | mov r10 = ar.fpsr; /* M */ \ | ||
147 | ;; \ | ||
148 | st8 [r16] = r9,16; /* save cr.iip */ \ | ||
149 | st8 [r17] = r30,16; /* save cr.ifs */ \ | ||
150 | sub r18 = r18,r22; /* r18=RSE.ndirty*8 */ \ | ||
151 | ;; \ | ||
152 | st8 [r16] = r25,16; /* save ar.unat */ \ | ||
153 | st8 [r17] = r26,16; /* save ar.pfs */ \ | ||
154 | shl r18 = r18,16; /* calu ar.rsc used for "loadrs" */\ | ||
155 | ;; \ | ||
156 | st8 [r16] = r27,16; /* save ar.rsc */ \ | ||
157 | st8 [r17] = r28,16; /* save ar.rnat */ \ | ||
158 | ;; /* avoid RAW on r16 & r17 */ \ | ||
159 | st8 [r16] = r23,16; /* save ar.bspstore */ \ | ||
160 | st8 [r17] = r31,16; /* save predicates */ \ | ||
161 | ;; \ | ||
162 | st8 [r16] = r29,16; /* save b0 */ \ | ||
163 | st8 [r17] = r18,16; /* save ar.rsc value for "loadrs" */\ | ||
164 | ;; \ | ||
165 | .mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */ \ | ||
166 | .mem.offset 8,0; st8.spill [r17] = r12,16; \ | ||
167 | adds r12 = -16,r1; /* switch to kernel memory stack */ \ | ||
168 | ;; \ | ||
169 | .mem.offset 0,0; st8.spill [r16] = r13,16; \ | ||
170 | .mem.offset 8,0; st8.spill [r17] = r10,16; /* save ar.fpsr */\ | ||
171 | mov r13 = r21; /* establish `current' */ \ | ||
172 | ;; \ | ||
173 | .mem.offset 0,0; st8.spill [r16] = r15,16; \ | ||
174 | .mem.offset 8,0; st8.spill [r17] = r14,16; \ | ||
175 | ;; \ | ||
176 | .mem.offset 0,0; st8.spill [r16] = r2,16; \ | ||
177 | .mem.offset 8,0; st8.spill [r17] = r3,16; \ | ||
178 | adds r2 = VMM_PT_REGS_R16_OFFSET,r1; \ | ||
179 | ;; \ | ||
180 | adds r16 = VMM_VCPU_IIPA_OFFSET,r13; \ | ||
181 | adds r17 = VMM_VCPU_ISR_OFFSET,r13; \ | ||
182 | mov r26 = cr.iipa; \ | ||
183 | mov r27 = cr.isr; \ | ||
184 | ;; \ | ||
185 | st8 [r16] = r26; \ | ||
186 | st8 [r17] = r27; \ | ||
187 | ;; \ | ||
188 | EXTRA; \ | ||
189 | mov r8 = ar.ccv; \ | ||
190 | mov r9 = ar.csd; \ | ||
191 | mov r10 = ar.ssd; \ | ||
192 | movl r11 = FPSR_DEFAULT; /* L-unit */ \ | ||
193 | adds r17 = VMM_VCPU_GP_OFFSET,r13; \ | ||
194 | ;; \ | ||
195 | ld8 r1 = [r17];/* establish kernel global pointer */ \ | ||
196 | ;; \ | ||
197 | PAL_VSA_SYNC_READ \ | ||
198 | KVM_MINSTATE_END_SAVE_MIN | ||
199 | |||
200 | /* | ||
201 | * SAVE_REST saves the remainder of pt_regs (with psr.ic on). | ||
202 | * | ||
203 | * Assumed state upon entry: | ||
204 | * psr.ic: on | ||
205 | * r2: points to &pt_regs.f6 | ||
206 | * r3: points to &pt_regs.f7 | ||
207 | * r8: contents of ar.ccv | ||
208 | * r9: contents of ar.csd | ||
209 | * r10: contents of ar.ssd | ||
210 | * r11: FPSR_DEFAULT | ||
211 | * | ||
212 | * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST. | ||
213 | */ | ||
214 | #define KVM_SAVE_REST \ | ||
215 | .mem.offset 0,0; st8.spill [r2] = r16,16; \ | ||
216 | .mem.offset 8,0; st8.spill [r3] = r17,16; \ | ||
217 | ;; \ | ||
218 | .mem.offset 0,0; st8.spill [r2] = r18,16; \ | ||
219 | .mem.offset 8,0; st8.spill [r3] = r19,16; \ | ||
220 | ;; \ | ||
221 | .mem.offset 0,0; st8.spill [r2] = r20,16; \ | ||
222 | .mem.offset 8,0; st8.spill [r3] = r21,16; \ | ||
223 | mov r18=b6; \ | ||
224 | ;; \ | ||
225 | .mem.offset 0,0; st8.spill [r2] = r22,16; \ | ||
226 | .mem.offset 8,0; st8.spill [r3] = r23,16; \ | ||
227 | mov r19 = b7; \ | ||
228 | ;; \ | ||
229 | .mem.offset 0,0; st8.spill [r2] = r24,16; \ | ||
230 | .mem.offset 8,0; st8.spill [r3] = r25,16; \ | ||
231 | ;; \ | ||
232 | .mem.offset 0,0; st8.spill [r2] = r26,16; \ | ||
233 | .mem.offset 8,0; st8.spill [r3] = r27,16; \ | ||
234 | ;; \ | ||
235 | .mem.offset 0,0; st8.spill [r2] = r28,16; \ | ||
236 | .mem.offset 8,0; st8.spill [r3] = r29,16; \ | ||
237 | ;; \ | ||
238 | .mem.offset 0,0; st8.spill [r2] = r30,16; \ | ||
239 | .mem.offset 8,0; st8.spill [r3] = r31,32; \ | ||
240 | ;; \ | ||
241 | mov ar.fpsr = r11; \ | ||
242 | st8 [r2] = r8,8; \ | ||
243 | adds r24 = PT(B6)-PT(F7),r3; \ | ||
244 | adds r25 = PT(B7)-PT(F7),r3; \ | ||
245 | ;; \ | ||
246 | st8 [r24] = r18,16; /* b6 */ \ | ||
247 | st8 [r25] = r19,16; /* b7 */ \ | ||
248 | adds r2 = PT(R4)-PT(F6),r2; \ | ||
249 | adds r3 = PT(R5)-PT(F7),r3; \ | ||
250 | ;; \ | ||
251 | st8 [r24] = r9; /* ar.csd */ \ | ||
252 | st8 [r25] = r10; /* ar.ssd */ \ | ||
253 | ;; \ | ||
254 | mov r18 = ar.unat; \ | ||
255 | adds r19 = PT(EML_UNAT)-PT(R4),r2; \ | ||
256 | ;; \ | ||
257 | st8 [r19] = r18; /* eml_unat */ \ | ||
258 | |||
259 | |||
260 | #define KVM_SAVE_EXTRA \ | ||
261 | .mem.offset 0,0; st8.spill [r2] = r4,16; \ | ||
262 | .mem.offset 8,0; st8.spill [r3] = r5,16; \ | ||
263 | ;; \ | ||
264 | .mem.offset 0,0; st8.spill [r2] = r6,16; \ | ||
265 | .mem.offset 8,0; st8.spill [r3] = r7; \ | ||
266 | ;; \ | ||
267 | mov r26 = ar.unat; \ | ||
268 | ;; \ | ||
269 | st8 [r2] = r26;/* eml_unat */ \ | ||
270 | |||
271 | #define KVM_SAVE_MIN_WITH_COVER KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,) | ||
272 | #define KVM_SAVE_MIN_WITH_COVER_R19 KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19) | ||
273 | #define KVM_SAVE_MIN KVM_DO_SAVE_MIN( , mov r30 = r0, ) | ||
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h new file mode 100644 index 000000000000..6d6cbcb14893 --- /dev/null +++ b/arch/ia64/kvm/lapic.h | |||
@@ -0,0 +1,25 @@ | |||
1 | #ifndef __KVM_IA64_LAPIC_H | ||
2 | #define __KVM_IA64_LAPIC_H | ||
3 | |||
4 | #include <linux/kvm_host.h> | ||
5 | |||
6 | /* | ||
7 | * vlsapic | ||
8 | */ | ||
9 | struct kvm_lapic{ | ||
10 | struct kvm_vcpu *vcpu; | ||
11 | uint64_t insvc[4]; | ||
12 | uint64_t vhpi; | ||
13 | uint8_t xtp; | ||
14 | uint8_t pal_init_pending; | ||
15 | uint8_t pad[2]; | ||
16 | }; | ||
17 | |||
18 | int kvm_create_lapic(struct kvm_vcpu *vcpu); | ||
19 | void kvm_free_lapic(struct kvm_vcpu *vcpu); | ||
20 | |||
21 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); | ||
22 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); | ||
23 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig); | ||
24 | |||
25 | #endif | ||
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h new file mode 100644 index 000000000000..e585c4607344 --- /dev/null +++ b/arch/ia64/kvm/misc.h | |||
@@ -0,0 +1,93 @@ | |||
1 | #ifndef __KVM_IA64_MISC_H | ||
2 | #define __KVM_IA64_MISC_H | ||
3 | |||
4 | #include <linux/kvm_host.h> | ||
5 | /* | ||
6 | * misc.h | ||
7 | * Copyright (C) 2007, Intel Corporation. | ||
8 | * Xiantao Zhang (xiantao.zhang@intel.com) | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify it | ||
11 | * under the terms and conditions of the GNU General Public License, | ||
12 | * version 2, as published by the Free Software Foundation. | ||
13 | * | ||
14 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
15 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
17 | * more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along with | ||
20 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
21 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | *Return p2m base address at host side! | ||
27 | */ | ||
28 | static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm) | ||
29 | { | ||
30 | return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS); | ||
31 | } | ||
32 | |||
33 | static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn, | ||
34 | u64 paddr, u64 mem_flags) | ||
35 | { | ||
36 | uint64_t *pmt_base = kvm_host_get_pmt(kvm); | ||
37 | unsigned long pte; | ||
38 | |||
39 | pte = PAGE_ALIGN(paddr) | mem_flags; | ||
40 | pmt_base[gfn] = pte; | ||
41 | } | ||
42 | |||
43 | /*Function for translating host address to guest address*/ | ||
44 | |||
45 | static inline void *to_guest(struct kvm *kvm, void *addr) | ||
46 | { | ||
47 | return (void *)((unsigned long)(addr) - kvm->arch.vm_base + | ||
48 | KVM_VM_DATA_BASE); | ||
49 | } | ||
50 | |||
51 | /*Function for translating guest address to host address*/ | ||
52 | |||
53 | static inline void *to_host(struct kvm *kvm, void *addr) | ||
54 | { | ||
55 | return (void *)((unsigned long)addr - KVM_VM_DATA_BASE | ||
56 | + kvm->arch.vm_base); | ||
57 | } | ||
58 | |||
59 | /* Get host context of the vcpu */ | ||
60 | static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu) | ||
61 | { | ||
62 | union context *ctx = &vcpu->arch.host; | ||
63 | return to_guest(vcpu->kvm, ctx); | ||
64 | } | ||
65 | |||
66 | /* Get guest context of the vcpu */ | ||
67 | static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu) | ||
68 | { | ||
69 | union context *ctx = &vcpu->arch.guest; | ||
70 | return to_guest(vcpu->kvm, ctx); | ||
71 | } | ||
72 | |||
73 | /* kvm get exit data from gvmm! */ | ||
74 | static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu) | ||
75 | { | ||
76 | return &vcpu->arch.exit_data; | ||
77 | } | ||
78 | |||
79 | /*kvm get vcpu ioreq for kvm module!*/ | ||
80 | static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu) | ||
81 | { | ||
82 | struct exit_ctl_data *p_ctl_data; | ||
83 | |||
84 | if (vcpu) { | ||
85 | p_ctl_data = kvm_get_exit_data(vcpu); | ||
86 | if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION) | ||
87 | return &p_ctl_data->u.ioreq; | ||
88 | } | ||
89 | |||
90 | return NULL; | ||
91 | } | ||
92 | |||
93 | #endif | ||
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c new file mode 100644 index 000000000000..351bf70da463 --- /dev/null +++ b/arch/ia64/kvm/mmio.c | |||
@@ -0,0 +1,341 @@ | |||
1 | /* | ||
2 | * mmio.c: MMIO emulation components. | ||
3 | * Copyright (c) 2004, Intel Corporation. | ||
4 | * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) | ||
5 | * Kun Tian (Kevin Tian) (Kevin.tian@intel.com) | ||
6 | * | ||
7 | * Copyright (c) 2007 Intel Corporation KVM support. | ||
8 | * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) | ||
9 | * Xiantao Zhang (xiantao.zhang@intel.com) | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms and conditions of the GNU General Public License, | ||
13 | * version 2, as published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
16 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
17 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
18 | * more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License along with | ||
21 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
22 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #include <linux/kvm_host.h> | ||
27 | |||
28 | #include "vcpu.h" | ||
29 | |||
30 | static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val) | ||
31 | { | ||
32 | VLSAPIC_XTP(v) = val; | ||
33 | } | ||
34 | |||
35 | /* | ||
36 | * LSAPIC OFFSET | ||
37 | */ | ||
38 | #define PIB_LOW_HALF(ofst) !(ofst & (1 << 20)) | ||
39 | #define PIB_OFST_INTA 0x1E0000 | ||
40 | #define PIB_OFST_XTP 0x1E0008 | ||
41 | |||
42 | /* | ||
43 | * execute write IPI op. | ||
44 | */ | ||
45 | static void vlsapic_write_ipi(struct kvm_vcpu *vcpu, | ||
46 | uint64_t addr, uint64_t data) | ||
47 | { | ||
48 | struct exit_ctl_data *p = ¤t_vcpu->arch.exit_data; | ||
49 | unsigned long psr; | ||
50 | |||
51 | local_irq_save(psr); | ||
52 | |||
53 | p->exit_reason = EXIT_REASON_IPI; | ||
54 | p->u.ipi_data.addr.val = addr; | ||
55 | p->u.ipi_data.data.val = data; | ||
56 | vmm_transition(current_vcpu); | ||
57 | |||
58 | local_irq_restore(psr); | ||
59 | |||
60 | } | ||
61 | |||
62 | void lsapic_write(struct kvm_vcpu *v, unsigned long addr, | ||
63 | unsigned long length, unsigned long val) | ||
64 | { | ||
65 | addr &= (PIB_SIZE - 1); | ||
66 | |||
67 | switch (addr) { | ||
68 | case PIB_OFST_INTA: | ||
69 | /*panic_domain(NULL, "Undefined write on PIB INTA\n");*/ | ||
70 | panic_vm(v); | ||
71 | break; | ||
72 | case PIB_OFST_XTP: | ||
73 | if (length == 1) { | ||
74 | vlsapic_write_xtp(v, val); | ||
75 | } else { | ||
76 | /*panic_domain(NULL, | ||
77 | "Undefined write on PIB XTP\n");*/ | ||
78 | panic_vm(v); | ||
79 | } | ||
80 | break; | ||
81 | default: | ||
82 | if (PIB_LOW_HALF(addr)) { | ||
83 | /*lower half */ | ||
84 | if (length != 8) | ||
85 | /*panic_domain(NULL, | ||
86 | "Can't LHF write with size %ld!\n", | ||
87 | length);*/ | ||
88 | panic_vm(v); | ||
89 | else | ||
90 | vlsapic_write_ipi(v, addr, val); | ||
91 | } else { /* upper half | ||
92 | printk("IPI-UHF write %lx\n",addr);*/ | ||
93 | panic_vm(v); | ||
94 | } | ||
95 | break; | ||
96 | } | ||
97 | } | ||
98 | |||
99 | unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr, | ||
100 | unsigned long length) | ||
101 | { | ||
102 | uint64_t result = 0; | ||
103 | |||
104 | addr &= (PIB_SIZE - 1); | ||
105 | |||
106 | switch (addr) { | ||
107 | case PIB_OFST_INTA: | ||
108 | if (length == 1) /* 1 byte load */ | ||
109 | ; /* There is no i8259, there is no INTA access*/ | ||
110 | else | ||
111 | /*panic_domain(NULL,"Undefined read on PIB INTA\n"); */ | ||
112 | panic_vm(v); | ||
113 | |||
114 | break; | ||
115 | case PIB_OFST_XTP: | ||
116 | if (length == 1) { | ||
117 | result = VLSAPIC_XTP(v); | ||
118 | /* printk("read xtp %lx\n", result); */ | ||
119 | } else { | ||
120 | /*panic_domain(NULL, | ||
121 | "Undefined read on PIB XTP\n");*/ | ||
122 | panic_vm(v); | ||
123 | } | ||
124 | break; | ||
125 | default: | ||
126 | panic_vm(v); | ||
127 | break; | ||
128 | } | ||
129 | return result; | ||
130 | } | ||
131 | |||
132 | static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest, | ||
133 | u16 s, int ma, int dir) | ||
134 | { | ||
135 | unsigned long iot; | ||
136 | struct exit_ctl_data *p = &vcpu->arch.exit_data; | ||
137 | unsigned long psr; | ||
138 | |||
139 | iot = __gpfn_is_io(src_pa >> PAGE_SHIFT); | ||
140 | |||
141 | local_irq_save(psr); | ||
142 | |||
143 | /*Intercept the acces for PIB range*/ | ||
144 | if (iot == GPFN_PIB) { | ||
145 | if (!dir) | ||
146 | lsapic_write(vcpu, src_pa, s, *dest); | ||
147 | else | ||
148 | *dest = lsapic_read(vcpu, src_pa, s); | ||
149 | goto out; | ||
150 | } | ||
151 | p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION; | ||
152 | p->u.ioreq.addr = src_pa; | ||
153 | p->u.ioreq.size = s; | ||
154 | p->u.ioreq.dir = dir; | ||
155 | if (dir == IOREQ_WRITE) | ||
156 | p->u.ioreq.data = *dest; | ||
157 | p->u.ioreq.state = STATE_IOREQ_READY; | ||
158 | vmm_transition(vcpu); | ||
159 | |||
160 | if (p->u.ioreq.state == STATE_IORESP_READY) { | ||
161 | if (dir == IOREQ_READ) | ||
162 | *dest = p->u.ioreq.data; | ||
163 | } else | ||
164 | panic_vm(vcpu); | ||
165 | out: | ||
166 | local_irq_restore(psr); | ||
167 | return ; | ||
168 | } | ||
169 | |||
170 | /* | ||
171 | dir 1: read 0:write | ||
172 | inst_type 0:integer 1:floating point | ||
173 | */ | ||
174 | #define SL_INTEGER 0 /* store/load interger*/ | ||
175 | #define SL_FLOATING 1 /* store/load floating*/ | ||
176 | |||
177 | void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma) | ||
178 | { | ||
179 | struct kvm_pt_regs *regs; | ||
180 | IA64_BUNDLE bundle; | ||
181 | int slot, dir = 0; | ||
182 | int inst_type = -1; | ||
183 | u16 size = 0; | ||
184 | u64 data, slot1a, slot1b, temp, update_reg; | ||
185 | s32 imm; | ||
186 | INST64 inst; | ||
187 | |||
188 | regs = vcpu_regs(vcpu); | ||
189 | |||
190 | if (fetch_code(vcpu, regs->cr_iip, &bundle)) { | ||
191 | /* if fetch code fail, return and try again */ | ||
192 | return; | ||
193 | } | ||
194 | slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri; | ||
195 | if (!slot) | ||
196 | inst.inst = bundle.slot0; | ||
197 | else if (slot == 1) { | ||
198 | slot1a = bundle.slot1a; | ||
199 | slot1b = bundle.slot1b; | ||
200 | inst.inst = slot1a + (slot1b << 18); | ||
201 | } else if (slot == 2) | ||
202 | inst.inst = bundle.slot2; | ||
203 | |||
204 | /* Integer Load/Store */ | ||
205 | if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) { | ||
206 | inst_type = SL_INTEGER; | ||
207 | size = (inst.M1.x6 & 0x3); | ||
208 | if ((inst.M1.x6 >> 2) > 0xb) { | ||
209 | /*write*/ | ||
210 | dir = IOREQ_WRITE; | ||
211 | data = vcpu_get_gr(vcpu, inst.M4.r2); | ||
212 | } else if ((inst.M1.x6 >> 2) < 0xb) { | ||
213 | /*read*/ | ||
214 | dir = IOREQ_READ; | ||
215 | } | ||
216 | } else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) { | ||
217 | /* Integer Load + Reg update */ | ||
218 | inst_type = SL_INTEGER; | ||
219 | dir = IOREQ_READ; | ||
220 | size = (inst.M2.x6 & 0x3); | ||
221 | temp = vcpu_get_gr(vcpu, inst.M2.r3); | ||
222 | update_reg = vcpu_get_gr(vcpu, inst.M2.r2); | ||
223 | temp += update_reg; | ||
224 | vcpu_set_gr(vcpu, inst.M2.r3, temp, 0); | ||
225 | } else if (inst.M3.major == 5) { | ||
226 | /*Integer Load/Store + Imm update*/ | ||
227 | inst_type = SL_INTEGER; | ||
228 | size = (inst.M3.x6&0x3); | ||
229 | if ((inst.M5.x6 >> 2) > 0xb) { | ||
230 | /*write*/ | ||
231 | dir = IOREQ_WRITE; | ||
232 | data = vcpu_get_gr(vcpu, inst.M5.r2); | ||
233 | temp = vcpu_get_gr(vcpu, inst.M5.r3); | ||
234 | imm = (inst.M5.s << 31) | (inst.M5.i << 30) | | ||
235 | (inst.M5.imm7 << 23); | ||
236 | temp += imm >> 23; | ||
237 | vcpu_set_gr(vcpu, inst.M5.r3, temp, 0); | ||
238 | |||
239 | } else if ((inst.M3.x6 >> 2) < 0xb) { | ||
240 | /*read*/ | ||
241 | dir = IOREQ_READ; | ||
242 | temp = vcpu_get_gr(vcpu, inst.M3.r3); | ||
243 | imm = (inst.M3.s << 31) | (inst.M3.i << 30) | | ||
244 | (inst.M3.imm7 << 23); | ||
245 | temp += imm >> 23; | ||
246 | vcpu_set_gr(vcpu, inst.M3.r3, temp, 0); | ||
247 | |||
248 | } | ||
249 | } else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B | ||
250 | && inst.M9.m == 0 && inst.M9.x == 0) { | ||
251 | /* Floating-point spill*/ | ||
252 | struct ia64_fpreg v; | ||
253 | |||
254 | inst_type = SL_FLOATING; | ||
255 | dir = IOREQ_WRITE; | ||
256 | vcpu_get_fpreg(vcpu, inst.M9.f2, &v); | ||
257 | /* Write high word. FIXME: this is a kludge! */ | ||
258 | v.u.bits[1] &= 0x3ffff; | ||
259 | mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE); | ||
260 | data = v.u.bits[0]; | ||
261 | size = 3; | ||
262 | } else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) { | ||
263 | /* Floating-point spill + Imm update */ | ||
264 | struct ia64_fpreg v; | ||
265 | |||
266 | inst_type = SL_FLOATING; | ||
267 | dir = IOREQ_WRITE; | ||
268 | vcpu_get_fpreg(vcpu, inst.M10.f2, &v); | ||
269 | temp = vcpu_get_gr(vcpu, inst.M10.r3); | ||
270 | imm = (inst.M10.s << 31) | (inst.M10.i << 30) | | ||
271 | (inst.M10.imm7 << 23); | ||
272 | temp += imm >> 23; | ||
273 | vcpu_set_gr(vcpu, inst.M10.r3, temp, 0); | ||
274 | |||
275 | /* Write high word.FIXME: this is a kludge! */ | ||
276 | v.u.bits[1] &= 0x3ffff; | ||
277 | mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE); | ||
278 | data = v.u.bits[0]; | ||
279 | size = 3; | ||
280 | } else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) { | ||
281 | /* Floating-point stf8 + Imm update */ | ||
282 | struct ia64_fpreg v; | ||
283 | inst_type = SL_FLOATING; | ||
284 | dir = IOREQ_WRITE; | ||
285 | size = 3; | ||
286 | vcpu_get_fpreg(vcpu, inst.M10.f2, &v); | ||
287 | data = v.u.bits[0]; /* Significand. */ | ||
288 | temp = vcpu_get_gr(vcpu, inst.M10.r3); | ||
289 | imm = (inst.M10.s << 31) | (inst.M10.i << 30) | | ||
290 | (inst.M10.imm7 << 23); | ||
291 | temp += imm >> 23; | ||
292 | vcpu_set_gr(vcpu, inst.M10.r3, temp, 0); | ||
293 | } else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c | ||
294 | && inst.M15.x6 <= 0x2f) { | ||
295 | temp = vcpu_get_gr(vcpu, inst.M15.r3); | ||
296 | imm = (inst.M15.s << 31) | (inst.M15.i << 30) | | ||
297 | (inst.M15.imm7 << 23); | ||
298 | temp += imm >> 23; | ||
299 | vcpu_set_gr(vcpu, inst.M15.r3, temp, 0); | ||
300 | |||
301 | vcpu_increment_iip(vcpu); | ||
302 | return; | ||
303 | } else if (inst.M12.major == 6 && inst.M12.m == 1 | ||
304 | && inst.M12.x == 1 && inst.M12.x6 == 1) { | ||
305 | /* Floating-point Load Pair + Imm ldfp8 M12*/ | ||
306 | struct ia64_fpreg v; | ||
307 | |||
308 | inst_type = SL_FLOATING; | ||
309 | dir = IOREQ_READ; | ||
310 | size = 8; /*ldfd*/ | ||
311 | mmio_access(vcpu, padr, &data, size, ma, dir); | ||
312 | v.u.bits[0] = data; | ||
313 | v.u.bits[1] = 0x1003E; | ||
314 | vcpu_set_fpreg(vcpu, inst.M12.f1, &v); | ||
315 | padr += 8; | ||
316 | mmio_access(vcpu, padr, &data, size, ma, dir); | ||
317 | v.u.bits[0] = data; | ||
318 | v.u.bits[1] = 0x1003E; | ||
319 | vcpu_set_fpreg(vcpu, inst.M12.f2, &v); | ||
320 | padr += 8; | ||
321 | vcpu_set_gr(vcpu, inst.M12.r3, padr, 0); | ||
322 | vcpu_increment_iip(vcpu); | ||
323 | return; | ||
324 | } else { | ||
325 | inst_type = -1; | ||
326 | panic_vm(vcpu); | ||
327 | } | ||
328 | |||
329 | size = 1 << size; | ||
330 | if (dir == IOREQ_WRITE) { | ||
331 | mmio_access(vcpu, padr, &data, size, ma, dir); | ||
332 | } else { | ||
333 | mmio_access(vcpu, padr, &data, size, ma, dir); | ||
334 | if (inst_type == SL_INTEGER) | ||
335 | vcpu_set_gr(vcpu, inst.M1.r1, data, 0); | ||
336 | else | ||
337 | panic_vm(vcpu); | ||
338 | |||
339 | } | ||
340 | vcpu_increment_iip(vcpu); | ||
341 | } | ||
diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S new file mode 100644 index 000000000000..e4f15d641b22 --- /dev/null +++ b/arch/ia64/kvm/optvfault.S | |||
@@ -0,0 +1,918 @@ | |||
1 | /* | ||
2 | * arch/ia64/vmx/optvfault.S | ||
3 | * optimize virtualization fault handler | ||
4 | * | ||
5 | * Copyright (C) 2006 Intel Co | ||
6 | * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> | ||
7 | */ | ||
8 | |||
9 | #include <asm/asmmacro.h> | ||
10 | #include <asm/processor.h> | ||
11 | |||
12 | #include "vti.h" | ||
13 | #include "asm-offsets.h" | ||
14 | |||
15 | #define ACCE_MOV_FROM_AR | ||
16 | #define ACCE_MOV_FROM_RR | ||
17 | #define ACCE_MOV_TO_RR | ||
18 | #define ACCE_RSM | ||
19 | #define ACCE_SSM | ||
20 | #define ACCE_MOV_TO_PSR | ||
21 | #define ACCE_THASH | ||
22 | |||
23 | //mov r1=ar3 | ||
24 | GLOBAL_ENTRY(kvm_asm_mov_from_ar) | ||
25 | #ifndef ACCE_MOV_FROM_AR | ||
26 | br.many kvm_virtualization_fault_back | ||
27 | #endif | ||
28 | add r18=VMM_VCPU_ITC_OFS_OFFSET, r21 | ||
29 | add r16=VMM_VCPU_LAST_ITC_OFFSET,r21 | ||
30 | extr.u r17=r25,6,7 | ||
31 | ;; | ||
32 | ld8 r18=[r18] | ||
33 | mov r19=ar.itc | ||
34 | mov r24=b0 | ||
35 | ;; | ||
36 | add r19=r19,r18 | ||
37 | addl r20=@gprel(asm_mov_to_reg),gp | ||
38 | ;; | ||
39 | st8 [r16] = r19 | ||
40 | adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20 | ||
41 | shladd r17=r17,4,r20 | ||
42 | ;; | ||
43 | mov b0=r17 | ||
44 | br.sptk.few b0 | ||
45 | ;; | ||
46 | END(kvm_asm_mov_from_ar) | ||
47 | |||
48 | |||
49 | // mov r1=rr[r3] | ||
50 | GLOBAL_ENTRY(kvm_asm_mov_from_rr) | ||
51 | #ifndef ACCE_MOV_FROM_RR | ||
52 | br.many kvm_virtualization_fault_back | ||
53 | #endif | ||
54 | extr.u r16=r25,20,7 | ||
55 | extr.u r17=r25,6,7 | ||
56 | addl r20=@gprel(asm_mov_from_reg),gp | ||
57 | ;; | ||
58 | adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20 | ||
59 | shladd r16=r16,4,r20 | ||
60 | mov r24=b0 | ||
61 | ;; | ||
62 | add r27=VMM_VCPU_VRR0_OFFSET,r21 | ||
63 | mov b0=r16 | ||
64 | br.many b0 | ||
65 | ;; | ||
66 | kvm_asm_mov_from_rr_back_1: | ||
67 | adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20 | ||
68 | adds r22=asm_mov_to_reg-asm_mov_from_reg,r20 | ||
69 | shr.u r26=r19,61 | ||
70 | ;; | ||
71 | shladd r17=r17,4,r22 | ||
72 | shladd r27=r26,3,r27 | ||
73 | ;; | ||
74 | ld8 r19=[r27] | ||
75 | mov b0=r17 | ||
76 | br.many b0 | ||
77 | END(kvm_asm_mov_from_rr) | ||
78 | |||
79 | |||
80 | // mov rr[r3]=r2 | ||
81 | GLOBAL_ENTRY(kvm_asm_mov_to_rr) | ||
82 | #ifndef ACCE_MOV_TO_RR | ||
83 | br.many kvm_virtualization_fault_back | ||
84 | #endif | ||
85 | extr.u r16=r25,20,7 | ||
86 | extr.u r17=r25,13,7 | ||
87 | addl r20=@gprel(asm_mov_from_reg),gp | ||
88 | ;; | ||
89 | adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20 | ||
90 | shladd r16=r16,4,r20 | ||
91 | mov r22=b0 | ||
92 | ;; | ||
93 | add r27=VMM_VCPU_VRR0_OFFSET,r21 | ||
94 | mov b0=r16 | ||
95 | br.many b0 | ||
96 | ;; | ||
97 | kvm_asm_mov_to_rr_back_1: | ||
98 | adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20 | ||
99 | shr.u r23=r19,61 | ||
100 | shladd r17=r17,4,r20 | ||
101 | ;; | ||
102 | //if rr6, go back | ||
103 | cmp.eq p6,p0=6,r23 | ||
104 | mov b0=r22 | ||
105 | (p6) br.cond.dpnt.many kvm_virtualization_fault_back | ||
106 | ;; | ||
107 | mov r28=r19 | ||
108 | mov b0=r17 | ||
109 | br.many b0 | ||
110 | kvm_asm_mov_to_rr_back_2: | ||
111 | adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20 | ||
112 | shladd r27=r23,3,r27 | ||
113 | ;; // vrr.rid<<4 |0xe | ||
114 | st8 [r27]=r19 | ||
115 | mov b0=r30 | ||
116 | ;; | ||
117 | extr.u r16=r19,8,26 | ||
118 | extr.u r18 =r19,2,6 | ||
119 | mov r17 =0xe | ||
120 | ;; | ||
121 | shladd r16 = r16, 4, r17 | ||
122 | extr.u r19 =r19,0,8 | ||
123 | ;; | ||
124 | shl r16 = r16,8 | ||
125 | ;; | ||
126 | add r19 = r19, r16 | ||
127 | ;; //set ve 1 | ||
128 | dep r19=-1,r19,0,1 | ||
129 | cmp.lt p6,p0=14,r18 | ||
130 | ;; | ||
131 | (p6) mov r18=14 | ||
132 | ;; | ||
133 | (p6) dep r19=r18,r19,2,6 | ||
134 | ;; | ||
135 | cmp.eq p6,p0=0,r23 | ||
136 | ;; | ||
137 | cmp.eq.or p6,p0=4,r23 | ||
138 | ;; | ||
139 | adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21 | ||
140 | (p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 | ||
141 | ;; | ||
142 | ld4 r16=[r16] | ||
143 | cmp.eq p7,p0=r0,r0 | ||
144 | (p6) shladd r17=r23,1,r17 | ||
145 | ;; | ||
146 | (p6) st8 [r17]=r19 | ||
147 | (p6) tbit.nz p6,p7=r16,0 | ||
148 | ;; | ||
149 | (p7) mov rr[r28]=r19 | ||
150 | mov r24=r22 | ||
151 | br.many b0 | ||
152 | END(kvm_asm_mov_to_rr) | ||
153 | |||
154 | |||
155 | //rsm | ||
156 | GLOBAL_ENTRY(kvm_asm_rsm) | ||
157 | #ifndef ACCE_RSM | ||
158 | br.many kvm_virtualization_fault_back | ||
159 | #endif | ||
160 | add r16=VMM_VPD_BASE_OFFSET,r21 | ||
161 | extr.u r26=r25,6,21 | ||
162 | extr.u r27=r25,31,2 | ||
163 | ;; | ||
164 | ld8 r16=[r16] | ||
165 | extr.u r28=r25,36,1 | ||
166 | dep r26=r27,r26,21,2 | ||
167 | ;; | ||
168 | add r17=VPD_VPSR_START_OFFSET,r16 | ||
169 | add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 | ||
170 | //r26 is imm24 | ||
171 | dep r26=r28,r26,23,1 | ||
172 | ;; | ||
173 | ld8 r18=[r17] | ||
174 | movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI | ||
175 | ld4 r23=[r22] | ||
176 | sub r27=-1,r26 | ||
177 | mov r24=b0 | ||
178 | ;; | ||
179 | mov r20=cr.ipsr | ||
180 | or r28=r27,r28 | ||
181 | and r19=r18,r27 | ||
182 | ;; | ||
183 | st8 [r17]=r19 | ||
184 | and r20=r20,r28 | ||
185 | /* Comment it out due to short of fp lazy alorgithm support | ||
186 | adds r27=IA64_VCPU_FP_PSR_OFFSET,r21 | ||
187 | ;; | ||
188 | ld8 r27=[r27] | ||
189 | ;; | ||
190 | tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT | ||
191 | ;; | ||
192 | (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1 | ||
193 | */ | ||
194 | ;; | ||
195 | mov cr.ipsr=r20 | ||
196 | tbit.nz p6,p0=r23,0 | ||
197 | ;; | ||
198 | tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT | ||
199 | (p6) br.dptk kvm_resume_to_guest | ||
200 | ;; | ||
201 | add r26=VMM_VCPU_META_RR0_OFFSET,r21 | ||
202 | add r27=VMM_VCPU_META_RR0_OFFSET+8,r21 | ||
203 | dep r23=-1,r23,0,1 | ||
204 | ;; | ||
205 | ld8 r26=[r26] | ||
206 | ld8 r27=[r27] | ||
207 | st4 [r22]=r23 | ||
208 | dep.z r28=4,61,3 | ||
209 | ;; | ||
210 | mov rr[r0]=r26 | ||
211 | ;; | ||
212 | mov rr[r28]=r27 | ||
213 | ;; | ||
214 | srlz.d | ||
215 | br.many kvm_resume_to_guest | ||
216 | END(kvm_asm_rsm) | ||
217 | |||
218 | |||
219 | //ssm | ||
220 | GLOBAL_ENTRY(kvm_asm_ssm) | ||
221 | #ifndef ACCE_SSM | ||
222 | br.many kvm_virtualization_fault_back | ||
223 | #endif | ||
224 | add r16=VMM_VPD_BASE_OFFSET,r21 | ||
225 | extr.u r26=r25,6,21 | ||
226 | extr.u r27=r25,31,2 | ||
227 | ;; | ||
228 | ld8 r16=[r16] | ||
229 | extr.u r28=r25,36,1 | ||
230 | dep r26=r27,r26,21,2 | ||
231 | ;; //r26 is imm24 | ||
232 | add r27=VPD_VPSR_START_OFFSET,r16 | ||
233 | dep r26=r28,r26,23,1 | ||
234 | ;; //r19 vpsr | ||
235 | ld8 r29=[r27] | ||
236 | mov r24=b0 | ||
237 | ;; | ||
238 | add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 | ||
239 | mov r20=cr.ipsr | ||
240 | or r19=r29,r26 | ||
241 | ;; | ||
242 | ld4 r23=[r22] | ||
243 | st8 [r27]=r19 | ||
244 | or r20=r20,r26 | ||
245 | ;; | ||
246 | mov cr.ipsr=r20 | ||
247 | movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT | ||
248 | ;; | ||
249 | and r19=r28,r19 | ||
250 | tbit.z p6,p0=r23,0 | ||
251 | ;; | ||
252 | cmp.ne.or p6,p0=r28,r19 | ||
253 | (p6) br.dptk kvm_asm_ssm_1 | ||
254 | ;; | ||
255 | add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 | ||
256 | add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21 | ||
257 | dep r23=0,r23,0,1 | ||
258 | ;; | ||
259 | ld8 r26=[r26] | ||
260 | ld8 r27=[r27] | ||
261 | st4 [r22]=r23 | ||
262 | dep.z r28=4,61,3 | ||
263 | ;; | ||
264 | mov rr[r0]=r26 | ||
265 | ;; | ||
266 | mov rr[r28]=r27 | ||
267 | ;; | ||
268 | srlz.d | ||
269 | ;; | ||
270 | kvm_asm_ssm_1: | ||
271 | tbit.nz p6,p0=r29,IA64_PSR_I_BIT | ||
272 | ;; | ||
273 | tbit.z.or p6,p0=r19,IA64_PSR_I_BIT | ||
274 | (p6) br.dptk kvm_resume_to_guest | ||
275 | ;; | ||
276 | add r29=VPD_VTPR_START_OFFSET,r16 | ||
277 | add r30=VPD_VHPI_START_OFFSET,r16 | ||
278 | ;; | ||
279 | ld8 r29=[r29] | ||
280 | ld8 r30=[r30] | ||
281 | ;; | ||
282 | extr.u r17=r29,4,4 | ||
283 | extr.u r18=r29,16,1 | ||
284 | ;; | ||
285 | dep r17=r18,r17,4,1 | ||
286 | ;; | ||
287 | cmp.gt p6,p0=r30,r17 | ||
288 | (p6) br.dpnt.few kvm_asm_dispatch_vexirq | ||
289 | br.many kvm_resume_to_guest | ||
290 | END(kvm_asm_ssm) | ||
291 | |||
292 | |||
293 | //mov psr.l=r2 | ||
294 | GLOBAL_ENTRY(kvm_asm_mov_to_psr) | ||
295 | #ifndef ACCE_MOV_TO_PSR | ||
296 | br.many kvm_virtualization_fault_back | ||
297 | #endif | ||
298 | add r16=VMM_VPD_BASE_OFFSET,r21 | ||
299 | extr.u r26=r25,13,7 //r2 | ||
300 | ;; | ||
301 | ld8 r16=[r16] | ||
302 | addl r20=@gprel(asm_mov_from_reg),gp | ||
303 | ;; | ||
304 | adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20 | ||
305 | shladd r26=r26,4,r20 | ||
306 | mov r24=b0 | ||
307 | ;; | ||
308 | add r27=VPD_VPSR_START_OFFSET,r16 | ||
309 | mov b0=r26 | ||
310 | br.many b0 | ||
311 | ;; | ||
312 | kvm_asm_mov_to_psr_back: | ||
313 | ld8 r17=[r27] | ||
314 | add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 | ||
315 | dep r19=0,r19,32,32 | ||
316 | ;; | ||
317 | ld4 r23=[r22] | ||
318 | dep r18=0,r17,0,32 | ||
319 | ;; | ||
320 | add r30=r18,r19 | ||
321 | movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT | ||
322 | ;; | ||
323 | st8 [r27]=r30 | ||
324 | and r27=r28,r30 | ||
325 | and r29=r28,r17 | ||
326 | ;; | ||
327 | cmp.eq p5,p0=r29,r27 | ||
328 | cmp.eq p6,p7=r28,r27 | ||
329 | (p5) br.many kvm_asm_mov_to_psr_1 | ||
330 | ;; | ||
331 | //virtual to physical | ||
332 | (p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21 | ||
333 | (p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21 | ||
334 | (p7) dep r23=-1,r23,0,1 | ||
335 | ;; | ||
336 | //physical to virtual | ||
337 | (p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 | ||
338 | (p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21 | ||
339 | (p6) dep r23=0,r23,0,1 | ||
340 | ;; | ||
341 | ld8 r26=[r26] | ||
342 | ld8 r27=[r27] | ||
343 | st4 [r22]=r23 | ||
344 | dep.z r28=4,61,3 | ||
345 | ;; | ||
346 | mov rr[r0]=r26 | ||
347 | ;; | ||
348 | mov rr[r28]=r27 | ||
349 | ;; | ||
350 | srlz.d | ||
351 | ;; | ||
352 | kvm_asm_mov_to_psr_1: | ||
353 | mov r20=cr.ipsr | ||
354 | movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT | ||
355 | ;; | ||
356 | or r19=r19,r28 | ||
357 | dep r20=0,r20,0,32 | ||
358 | ;; | ||
359 | add r20=r19,r20 | ||
360 | mov b0=r24 | ||
361 | ;; | ||
362 | /* Comment it out due to short of fp lazy algorithm support | ||
363 | adds r27=IA64_VCPU_FP_PSR_OFFSET,r21 | ||
364 | ;; | ||
365 | ld8 r27=[r27] | ||
366 | ;; | ||
367 | tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT | ||
368 | ;; | ||
369 | (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1 | ||
370 | ;; | ||
371 | */ | ||
372 | mov cr.ipsr=r20 | ||
373 | cmp.ne p6,p0=r0,r0 | ||
374 | ;; | ||
375 | tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT | ||
376 | tbit.z.or p6,p0=r30,IA64_PSR_I_BIT | ||
377 | (p6) br.dpnt.few kvm_resume_to_guest | ||
378 | ;; | ||
379 | add r29=VPD_VTPR_START_OFFSET,r16 | ||
380 | add r30=VPD_VHPI_START_OFFSET,r16 | ||
381 | ;; | ||
382 | ld8 r29=[r29] | ||
383 | ld8 r30=[r30] | ||
384 | ;; | ||
385 | extr.u r17=r29,4,4 | ||
386 | extr.u r18=r29,16,1 | ||
387 | ;; | ||
388 | dep r17=r18,r17,4,1 | ||
389 | ;; | ||
390 | cmp.gt p6,p0=r30,r17 | ||
391 | (p6) br.dpnt.few kvm_asm_dispatch_vexirq | ||
392 | br.many kvm_resume_to_guest | ||
393 | END(kvm_asm_mov_to_psr) | ||
394 | |||
395 | |||
396 | ENTRY(kvm_asm_dispatch_vexirq) | ||
397 | //increment iip | ||
398 | mov r16=cr.ipsr | ||
399 | ;; | ||
400 | extr.u r17=r16,IA64_PSR_RI_BIT,2 | ||
401 | tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1 | ||
402 | ;; | ||
403 | (p6) mov r18=cr.iip | ||
404 | (p6) mov r17=r0 | ||
405 | (p7) add r17=1,r17 | ||
406 | ;; | ||
407 | (p6) add r18=0x10,r18 | ||
408 | dep r16=r17,r16,IA64_PSR_RI_BIT,2 | ||
409 | ;; | ||
410 | (p6) mov cr.iip=r18 | ||
411 | mov cr.ipsr=r16 | ||
412 | mov r30 =1 | ||
413 | br.many kvm_dispatch_vexirq | ||
414 | END(kvm_asm_dispatch_vexirq) | ||
415 | |||
416 | // thash | ||
417 | // TODO: add support when pta.vf = 1 | ||
418 | GLOBAL_ENTRY(kvm_asm_thash) | ||
419 | #ifndef ACCE_THASH | ||
420 | br.many kvm_virtualization_fault_back | ||
421 | #endif | ||
422 | extr.u r17=r25,20,7 // get r3 from opcode in r25 | ||
423 | extr.u r18=r25,6,7 // get r1 from opcode in r25 | ||
424 | addl r20=@gprel(asm_mov_from_reg),gp | ||
425 | ;; | ||
426 | adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20 | ||
427 | shladd r17=r17,4,r20 // get addr of MOVE_FROM_REG(r17) | ||
428 | adds r16=VMM_VPD_BASE_OFFSET,r21 // get vcpu.arch.priveregs | ||
429 | ;; | ||
430 | mov r24=b0 | ||
431 | ;; | ||
432 | ld8 r16=[r16] // get VPD addr | ||
433 | mov b0=r17 | ||
434 | br.many b0 // r19 return value | ||
435 | ;; | ||
436 | kvm_asm_thash_back1: | ||
437 | shr.u r23=r19,61 // get RR number | ||
438 | adds r25=VMM_VCPU_VRR0_OFFSET,r21 // get vcpu->arch.vrr[0]'s addr | ||
439 | adds r16=VMM_VPD_VPTA_OFFSET,r16 // get vpta | ||
440 | ;; | ||
441 | shladd r27=r23,3,r25 // get vcpu->arch.vrr[r23]'s addr | ||
442 | ld8 r17=[r16] // get PTA | ||
443 | mov r26=1 | ||
444 | ;; | ||
445 | extr.u r29=r17,2,6 // get pta.size | ||
446 | ld8 r25=[r27] // get vcpu->arch.vrr[r23]'s value | ||
447 | ;; | ||
448 | extr.u r25=r25,2,6 // get rr.ps | ||
449 | shl r22=r26,r29 // 1UL << pta.size | ||
450 | ;; | ||
451 | shr.u r23=r19,r25 // vaddr >> rr.ps | ||
452 | adds r26=3,r29 // pta.size + 3 | ||
453 | shl r27=r17,3 // pta << 3 | ||
454 | ;; | ||
455 | shl r23=r23,3 // (vaddr >> rr.ps) << 3 | ||
456 | shr.u r27=r27,r26 // (pta << 3) >> (pta.size+3) | ||
457 | movl r16=7<<61 | ||
458 | ;; | ||
459 | adds r22=-1,r22 // (1UL << pta.size) - 1 | ||
460 | shl r27=r27,r29 // ((pta<<3)>>(pta.size+3))<<pta.size | ||
461 | and r19=r19,r16 // vaddr & VRN_MASK | ||
462 | ;; | ||
463 | and r22=r22,r23 // vhpt_offset | ||
464 | or r19=r19,r27 // (vadr&VRN_MASK)|(((pta<<3)>>(pta.size + 3))<<pta.size) | ||
465 | adds r26=asm_mov_to_reg-asm_mov_from_reg,r20 | ||
466 | ;; | ||
467 | or r19=r19,r22 // calc pval | ||
468 | shladd r17=r18,4,r26 | ||
469 | adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20 | ||
470 | ;; | ||
471 | mov b0=r17 | ||
472 | br.many b0 | ||
473 | END(kvm_asm_thash) | ||
474 | |||
475 | #define MOV_TO_REG0 \ | ||
476 | {; \ | ||
477 | nop.b 0x0; \ | ||
478 | nop.b 0x0; \ | ||
479 | nop.b 0x0; \ | ||
480 | ;; \ | ||
481 | }; | ||
482 | |||
483 | |||
484 | #define MOV_TO_REG(n) \ | ||
485 | {; \ | ||
486 | mov r##n##=r19; \ | ||
487 | mov b0=r30; \ | ||
488 | br.sptk.many b0; \ | ||
489 | ;; \ | ||
490 | }; | ||
491 | |||
492 | |||
493 | #define MOV_FROM_REG(n) \ | ||
494 | {; \ | ||
495 | mov r19=r##n##; \ | ||
496 | mov b0=r30; \ | ||
497 | br.sptk.many b0; \ | ||
498 | ;; \ | ||
499 | }; | ||
500 | |||
501 | |||
502 | #define MOV_TO_BANK0_REG(n) \ | ||
503 | ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##); \ | ||
504 | {; \ | ||
505 | mov r26=r2; \ | ||
506 | mov r2=r19; \ | ||
507 | bsw.1; \ | ||
508 | ;; \ | ||
509 | }; \ | ||
510 | {; \ | ||
511 | mov r##n##=r2; \ | ||
512 | nop.b 0x0; \ | ||
513 | bsw.0; \ | ||
514 | ;; \ | ||
515 | }; \ | ||
516 | {; \ | ||
517 | mov r2=r26; \ | ||
518 | mov b0=r30; \ | ||
519 | br.sptk.many b0; \ | ||
520 | ;; \ | ||
521 | }; \ | ||
522 | END(asm_mov_to_bank0_reg##n##) | ||
523 | |||
524 | |||
525 | #define MOV_FROM_BANK0_REG(n) \ | ||
526 | ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##); \ | ||
527 | {; \ | ||
528 | mov r26=r2; \ | ||
529 | nop.b 0x0; \ | ||
530 | bsw.1; \ | ||
531 | ;; \ | ||
532 | }; \ | ||
533 | {; \ | ||
534 | mov r2=r##n##; \ | ||
535 | nop.b 0x0; \ | ||
536 | bsw.0; \ | ||
537 | ;; \ | ||
538 | }; \ | ||
539 | {; \ | ||
540 | mov r19=r2; \ | ||
541 | mov r2=r26; \ | ||
542 | mov b0=r30; \ | ||
543 | }; \ | ||
544 | {; \ | ||
545 | nop.b 0x0; \ | ||
546 | nop.b 0x0; \ | ||
547 | br.sptk.many b0; \ | ||
548 | ;; \ | ||
549 | }; \ | ||
550 | END(asm_mov_from_bank0_reg##n##) | ||
551 | |||
552 | |||
553 | #define JMP_TO_MOV_TO_BANK0_REG(n) \ | ||
554 | {; \ | ||
555 | nop.b 0x0; \ | ||
556 | nop.b 0x0; \ | ||
557 | br.sptk.many asm_mov_to_bank0_reg##n##; \ | ||
558 | ;; \ | ||
559 | } | ||
560 | |||
561 | |||
562 | #define JMP_TO_MOV_FROM_BANK0_REG(n) \ | ||
563 | {; \ | ||
564 | nop.b 0x0; \ | ||
565 | nop.b 0x0; \ | ||
566 | br.sptk.many asm_mov_from_bank0_reg##n##; \ | ||
567 | ;; \ | ||
568 | } | ||
569 | |||
570 | |||
571 | MOV_FROM_BANK0_REG(16) | ||
572 | MOV_FROM_BANK0_REG(17) | ||
573 | MOV_FROM_BANK0_REG(18) | ||
574 | MOV_FROM_BANK0_REG(19) | ||
575 | MOV_FROM_BANK0_REG(20) | ||
576 | MOV_FROM_BANK0_REG(21) | ||
577 | MOV_FROM_BANK0_REG(22) | ||
578 | MOV_FROM_BANK0_REG(23) | ||
579 | MOV_FROM_BANK0_REG(24) | ||
580 | MOV_FROM_BANK0_REG(25) | ||
581 | MOV_FROM_BANK0_REG(26) | ||
582 | MOV_FROM_BANK0_REG(27) | ||
583 | MOV_FROM_BANK0_REG(28) | ||
584 | MOV_FROM_BANK0_REG(29) | ||
585 | MOV_FROM_BANK0_REG(30) | ||
586 | MOV_FROM_BANK0_REG(31) | ||
587 | |||
588 | |||
589 | // mov from reg table | ||
590 | ENTRY(asm_mov_from_reg) | ||
591 | MOV_FROM_REG(0) | ||
592 | MOV_FROM_REG(1) | ||
593 | MOV_FROM_REG(2) | ||
594 | MOV_FROM_REG(3) | ||
595 | MOV_FROM_REG(4) | ||
596 | MOV_FROM_REG(5) | ||
597 | MOV_FROM_REG(6) | ||
598 | MOV_FROM_REG(7) | ||
599 | MOV_FROM_REG(8) | ||
600 | MOV_FROM_REG(9) | ||
601 | MOV_FROM_REG(10) | ||
602 | MOV_FROM_REG(11) | ||
603 | MOV_FROM_REG(12) | ||
604 | MOV_FROM_REG(13) | ||
605 | MOV_FROM_REG(14) | ||
606 | MOV_FROM_REG(15) | ||
607 | JMP_TO_MOV_FROM_BANK0_REG(16) | ||
608 | JMP_TO_MOV_FROM_BANK0_REG(17) | ||
609 | JMP_TO_MOV_FROM_BANK0_REG(18) | ||
610 | JMP_TO_MOV_FROM_BANK0_REG(19) | ||
611 | JMP_TO_MOV_FROM_BANK0_REG(20) | ||
612 | JMP_TO_MOV_FROM_BANK0_REG(21) | ||
613 | JMP_TO_MOV_FROM_BANK0_REG(22) | ||
614 | JMP_TO_MOV_FROM_BANK0_REG(23) | ||
615 | JMP_TO_MOV_FROM_BANK0_REG(24) | ||
616 | JMP_TO_MOV_FROM_BANK0_REG(25) | ||
617 | JMP_TO_MOV_FROM_BANK0_REG(26) | ||
618 | JMP_TO_MOV_FROM_BANK0_REG(27) | ||
619 | JMP_TO_MOV_FROM_BANK0_REG(28) | ||
620 | JMP_TO_MOV_FROM_BANK0_REG(29) | ||
621 | JMP_TO_MOV_FROM_BANK0_REG(30) | ||
622 | JMP_TO_MOV_FROM_BANK0_REG(31) | ||
623 | MOV_FROM_REG(32) | ||
624 | MOV_FROM_REG(33) | ||
625 | MOV_FROM_REG(34) | ||
626 | MOV_FROM_REG(35) | ||
627 | MOV_FROM_REG(36) | ||
628 | MOV_FROM_REG(37) | ||
629 | MOV_FROM_REG(38) | ||
630 | MOV_FROM_REG(39) | ||
631 | MOV_FROM_REG(40) | ||
632 | MOV_FROM_REG(41) | ||
633 | MOV_FROM_REG(42) | ||
634 | MOV_FROM_REG(43) | ||
635 | MOV_FROM_REG(44) | ||
636 | MOV_FROM_REG(45) | ||
637 | MOV_FROM_REG(46) | ||
638 | MOV_FROM_REG(47) | ||
639 | MOV_FROM_REG(48) | ||
640 | MOV_FROM_REG(49) | ||
641 | MOV_FROM_REG(50) | ||
642 | MOV_FROM_REG(51) | ||
643 | MOV_FROM_REG(52) | ||
644 | MOV_FROM_REG(53) | ||
645 | MOV_FROM_REG(54) | ||
646 | MOV_FROM_REG(55) | ||
647 | MOV_FROM_REG(56) | ||
648 | MOV_FROM_REG(57) | ||
649 | MOV_FROM_REG(58) | ||
650 | MOV_FROM_REG(59) | ||
651 | MOV_FROM_REG(60) | ||
652 | MOV_FROM_REG(61) | ||
653 | MOV_FROM_REG(62) | ||
654 | MOV_FROM_REG(63) | ||
655 | MOV_FROM_REG(64) | ||
656 | MOV_FROM_REG(65) | ||
657 | MOV_FROM_REG(66) | ||
658 | MOV_FROM_REG(67) | ||
659 | MOV_FROM_REG(68) | ||
660 | MOV_FROM_REG(69) | ||
661 | MOV_FROM_REG(70) | ||
662 | MOV_FROM_REG(71) | ||
663 | MOV_FROM_REG(72) | ||
664 | MOV_FROM_REG(73) | ||
665 | MOV_FROM_REG(74) | ||
666 | MOV_FROM_REG(75) | ||
667 | MOV_FROM_REG(76) | ||
668 | MOV_FROM_REG(77) | ||
669 | MOV_FROM_REG(78) | ||
670 | MOV_FROM_REG(79) | ||
671 | MOV_FROM_REG(80) | ||
672 | MOV_FROM_REG(81) | ||
673 | MOV_FROM_REG(82) | ||
674 | MOV_FROM_REG(83) | ||
675 | MOV_FROM_REG(84) | ||
676 | MOV_FROM_REG(85) | ||
677 | MOV_FROM_REG(86) | ||
678 | MOV_FROM_REG(87) | ||
679 | MOV_FROM_REG(88) | ||
680 | MOV_FROM_REG(89) | ||
681 | MOV_FROM_REG(90) | ||
682 | MOV_FROM_REG(91) | ||
683 | MOV_FROM_REG(92) | ||
684 | MOV_FROM_REG(93) | ||
685 | MOV_FROM_REG(94) | ||
686 | MOV_FROM_REG(95) | ||
687 | MOV_FROM_REG(96) | ||
688 | MOV_FROM_REG(97) | ||
689 | MOV_FROM_REG(98) | ||
690 | MOV_FROM_REG(99) | ||
691 | MOV_FROM_REG(100) | ||
692 | MOV_FROM_REG(101) | ||
693 | MOV_FROM_REG(102) | ||
694 | MOV_FROM_REG(103) | ||
695 | MOV_FROM_REG(104) | ||
696 | MOV_FROM_REG(105) | ||
697 | MOV_FROM_REG(106) | ||
698 | MOV_FROM_REG(107) | ||
699 | MOV_FROM_REG(108) | ||
700 | MOV_FROM_REG(109) | ||
701 | MOV_FROM_REG(110) | ||
702 | MOV_FROM_REG(111) | ||
703 | MOV_FROM_REG(112) | ||
704 | MOV_FROM_REG(113) | ||
705 | MOV_FROM_REG(114) | ||
706 | MOV_FROM_REG(115) | ||
707 | MOV_FROM_REG(116) | ||
708 | MOV_FROM_REG(117) | ||
709 | MOV_FROM_REG(118) | ||
710 | MOV_FROM_REG(119) | ||
711 | MOV_FROM_REG(120) | ||
712 | MOV_FROM_REG(121) | ||
713 | MOV_FROM_REG(122) | ||
714 | MOV_FROM_REG(123) | ||
715 | MOV_FROM_REG(124) | ||
716 | MOV_FROM_REG(125) | ||
717 | MOV_FROM_REG(126) | ||
718 | MOV_FROM_REG(127) | ||
719 | END(asm_mov_from_reg) | ||
720 | |||
721 | |||
722 | /* must be in bank 0 | ||
723 | * parameter: | ||
724 | * r31: pr | ||
725 | * r24: b0 | ||
726 | */ | ||
727 | ENTRY(kvm_resume_to_guest) | ||
728 | adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 | ||
729 | ;; | ||
730 | ld8 r1 =[r16] | ||
731 | adds r20 = VMM_VCPU_VSA_BASE_OFFSET,r21 | ||
732 | ;; | ||
733 | mov r16=cr.ipsr | ||
734 | ;; | ||
735 | ld8 r20 = [r20] | ||
736 | adds r19=VMM_VPD_BASE_OFFSET,r21 | ||
737 | ;; | ||
738 | ld8 r25=[r19] | ||
739 | extr.u r17=r16,IA64_PSR_RI_BIT,2 | ||
740 | tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1 | ||
741 | ;; | ||
742 | (p6) mov r18=cr.iip | ||
743 | (p6) mov r17=r0 | ||
744 | ;; | ||
745 | (p6) add r18=0x10,r18 | ||
746 | (p7) add r17=1,r17 | ||
747 | ;; | ||
748 | (p6) mov cr.iip=r18 | ||
749 | dep r16=r17,r16,IA64_PSR_RI_BIT,2 | ||
750 | ;; | ||
751 | mov cr.ipsr=r16 | ||
752 | adds r19= VPD_VPSR_START_OFFSET,r25 | ||
753 | add r28=PAL_VPS_RESUME_NORMAL,r20 | ||
754 | add r29=PAL_VPS_RESUME_HANDLER,r20 | ||
755 | ;; | ||
756 | ld8 r19=[r19] | ||
757 | mov b0=r29 | ||
758 | cmp.ne p6,p7 = r0,r0 | ||
759 | ;; | ||
760 | tbit.z p6,p7 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic | ||
761 | ;; | ||
762 | (p6) ld8 r26=[r25] | ||
763 | (p7) mov b0=r28 | ||
764 | mov pr=r31,-2 | ||
765 | br.sptk.many b0 // call pal service | ||
766 | ;; | ||
767 | END(kvm_resume_to_guest) | ||
768 | |||
769 | |||
770 | MOV_TO_BANK0_REG(16) | ||
771 | MOV_TO_BANK0_REG(17) | ||
772 | MOV_TO_BANK0_REG(18) | ||
773 | MOV_TO_BANK0_REG(19) | ||
774 | MOV_TO_BANK0_REG(20) | ||
775 | MOV_TO_BANK0_REG(21) | ||
776 | MOV_TO_BANK0_REG(22) | ||
777 | MOV_TO_BANK0_REG(23) | ||
778 | MOV_TO_BANK0_REG(24) | ||
779 | MOV_TO_BANK0_REG(25) | ||
780 | MOV_TO_BANK0_REG(26) | ||
781 | MOV_TO_BANK0_REG(27) | ||
782 | MOV_TO_BANK0_REG(28) | ||
783 | MOV_TO_BANK0_REG(29) | ||
784 | MOV_TO_BANK0_REG(30) | ||
785 | MOV_TO_BANK0_REG(31) | ||
786 | |||
787 | |||
788 | // mov to reg table | ||
789 | ENTRY(asm_mov_to_reg) | ||
790 | MOV_TO_REG0 | ||
791 | MOV_TO_REG(1) | ||
792 | MOV_TO_REG(2) | ||
793 | MOV_TO_REG(3) | ||
794 | MOV_TO_REG(4) | ||
795 | MOV_TO_REG(5) | ||
796 | MOV_TO_REG(6) | ||
797 | MOV_TO_REG(7) | ||
798 | MOV_TO_REG(8) | ||
799 | MOV_TO_REG(9) | ||
800 | MOV_TO_REG(10) | ||
801 | MOV_TO_REG(11) | ||
802 | MOV_TO_REG(12) | ||
803 | MOV_TO_REG(13) | ||
804 | MOV_TO_REG(14) | ||
805 | MOV_TO_REG(15) | ||
806 | JMP_TO_MOV_TO_BANK0_REG(16) | ||
807 | JMP_TO_MOV_TO_BANK0_REG(17) | ||
808 | JMP_TO_MOV_TO_BANK0_REG(18) | ||
809 | JMP_TO_MOV_TO_BANK0_REG(19) | ||
810 | JMP_TO_MOV_TO_BANK0_REG(20) | ||
811 | JMP_TO_MOV_TO_BANK0_REG(21) | ||
812 | JMP_TO_MOV_TO_BANK0_REG(22) | ||
813 | JMP_TO_MOV_TO_BANK0_REG(23) | ||
814 | JMP_TO_MOV_TO_BANK0_REG(24) | ||
815 | JMP_TO_MOV_TO_BANK0_REG(25) | ||
816 | JMP_TO_MOV_TO_BANK0_REG(26) | ||
817 | JMP_TO_MOV_TO_BANK0_REG(27) | ||
818 | JMP_TO_MOV_TO_BANK0_REG(28) | ||
819 | JMP_TO_MOV_TO_BANK0_REG(29) | ||
820 | JMP_TO_MOV_TO_BANK0_REG(30) | ||
821 | JMP_TO_MOV_TO_BANK0_REG(31) | ||
822 | MOV_TO_REG(32) | ||
823 | MOV_TO_REG(33) | ||
824 | MOV_TO_REG(34) | ||
825 | MOV_TO_REG(35) | ||
826 | MOV_TO_REG(36) | ||
827 | MOV_TO_REG(37) | ||
828 | MOV_TO_REG(38) | ||
829 | MOV_TO_REG(39) | ||
830 | MOV_TO_REG(40) | ||
831 | MOV_TO_REG(41) | ||
832 | MOV_TO_REG(42) | ||
833 | MOV_TO_REG(43) | ||
834 | MOV_TO_REG(44) | ||
835 | MOV_TO_REG(45) | ||
836 | MOV_TO_REG(46) | ||
837 | MOV_TO_REG(47) | ||
838 | MOV_TO_REG(48) | ||
839 | MOV_TO_REG(49) | ||
840 | MOV_TO_REG(50) | ||
841 | MOV_TO_REG(51) | ||
842 | MOV_TO_REG(52) | ||
843 | MOV_TO_REG(53) | ||
844 | MOV_TO_REG(54) | ||
845 | MOV_TO_REG(55) | ||
846 | MOV_TO_REG(56) | ||
847 | MOV_TO_REG(57) | ||
848 | MOV_TO_REG(58) | ||
849 | MOV_TO_REG(59) | ||
850 | MOV_TO_REG(60) | ||
851 | MOV_TO_REG(61) | ||
852 | MOV_TO_REG(62) | ||
853 | MOV_TO_REG(63) | ||
854 | MOV_TO_REG(64) | ||
855 | MOV_TO_REG(65) | ||
856 | MOV_TO_REG(66) | ||
857 | MOV_TO_REG(67) | ||
858 | MOV_TO_REG(68) | ||
859 | MOV_TO_REG(69) | ||
860 | MOV_TO_REG(70) | ||
861 | MOV_TO_REG(71) | ||
862 | MOV_TO_REG(72) | ||
863 | MOV_TO_REG(73) | ||
864 | MOV_TO_REG(74) | ||
865 | MOV_TO_REG(75) | ||
866 | MOV_TO_REG(76) | ||
867 | MOV_TO_REG(77) | ||
868 | MOV_TO_REG(78) | ||
869 | MOV_TO_REG(79) | ||
870 | MOV_TO_REG(80) | ||
871 | MOV_TO_REG(81) | ||
872 | MOV_TO_REG(82) | ||
873 | MOV_TO_REG(83) | ||
874 | MOV_TO_REG(84) | ||
875 | MOV_TO_REG(85) | ||
876 | MOV_TO_REG(86) | ||
877 | MOV_TO_REG(87) | ||
878 | MOV_TO_REG(88) | ||
879 | MOV_TO_REG(89) | ||
880 | MOV_TO_REG(90) | ||
881 | MOV_TO_REG(91) | ||
882 | MOV_TO_REG(92) | ||
883 | MOV_TO_REG(93) | ||
884 | MOV_TO_REG(94) | ||
885 | MOV_TO_REG(95) | ||
886 | MOV_TO_REG(96) | ||
887 | MOV_TO_REG(97) | ||
888 | MOV_TO_REG(98) | ||
889 | MOV_TO_REG(99) | ||
890 | MOV_TO_REG(100) | ||
891 | MOV_TO_REG(101) | ||
892 | MOV_TO_REG(102) | ||
893 | MOV_TO_REG(103) | ||
894 | MOV_TO_REG(104) | ||
895 | MOV_TO_REG(105) | ||
896 | MOV_TO_REG(106) | ||
897 | MOV_TO_REG(107) | ||
898 | MOV_TO_REG(108) | ||
899 | MOV_TO_REG(109) | ||
900 | MOV_TO_REG(110) | ||
901 | MOV_TO_REG(111) | ||
902 | MOV_TO_REG(112) | ||
903 | MOV_TO_REG(113) | ||
904 | MOV_TO_REG(114) | ||
905 | MOV_TO_REG(115) | ||
906 | MOV_TO_REG(116) | ||
907 | MOV_TO_REG(117) | ||
908 | MOV_TO_REG(118) | ||
909 | MOV_TO_REG(119) | ||
910 | MOV_TO_REG(120) | ||
911 | MOV_TO_REG(121) | ||
912 | MOV_TO_REG(122) | ||
913 | MOV_TO_REG(123) | ||
914 | MOV_TO_REG(124) | ||
915 | MOV_TO_REG(125) | ||
916 | MOV_TO_REG(126) | ||
917 | MOV_TO_REG(127) | ||
918 | END(asm_mov_to_reg) | ||
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c new file mode 100644 index 000000000000..5a33f7ed29a0 --- /dev/null +++ b/arch/ia64/kvm/process.c | |||
@@ -0,0 +1,970 @@ | |||
1 | /* | ||
2 | * process.c: handle interruption inject for guests. | ||
3 | * Copyright (c) 2005, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
16 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
17 | * | ||
18 | * Shaofan Li (Susue Li) <susie.li@intel.com> | ||
19 | * Xiaoyan Feng (Fleming Feng) <fleming.feng@intel.com> | ||
20 | * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) | ||
21 | * Xiantao Zhang (xiantao.zhang@intel.com) | ||
22 | */ | ||
23 | #include "vcpu.h" | ||
24 | |||
25 | #include <asm/pal.h> | ||
26 | #include <asm/sal.h> | ||
27 | #include <asm/fpswa.h> | ||
28 | #include <asm/kregs.h> | ||
29 | #include <asm/tlb.h> | ||
30 | |||
31 | fpswa_interface_t *vmm_fpswa_interface; | ||
32 | |||
33 | #define IA64_VHPT_TRANS_VECTOR 0x0000 | ||
34 | #define IA64_INST_TLB_VECTOR 0x0400 | ||
35 | #define IA64_DATA_TLB_VECTOR 0x0800 | ||
36 | #define IA64_ALT_INST_TLB_VECTOR 0x0c00 | ||
37 | #define IA64_ALT_DATA_TLB_VECTOR 0x1000 | ||
38 | #define IA64_DATA_NESTED_TLB_VECTOR 0x1400 | ||
39 | #define IA64_INST_KEY_MISS_VECTOR 0x1800 | ||
40 | #define IA64_DATA_KEY_MISS_VECTOR 0x1c00 | ||
41 | #define IA64_DIRTY_BIT_VECTOR 0x2000 | ||
42 | #define IA64_INST_ACCESS_BIT_VECTOR 0x2400 | ||
43 | #define IA64_DATA_ACCESS_BIT_VECTOR 0x2800 | ||
44 | #define IA64_BREAK_VECTOR 0x2c00 | ||
45 | #define IA64_EXTINT_VECTOR 0x3000 | ||
46 | #define IA64_PAGE_NOT_PRESENT_VECTOR 0x5000 | ||
47 | #define IA64_KEY_PERMISSION_VECTOR 0x5100 | ||
48 | #define IA64_INST_ACCESS_RIGHTS_VECTOR 0x5200 | ||
49 | #define IA64_DATA_ACCESS_RIGHTS_VECTOR 0x5300 | ||
50 | #define IA64_GENEX_VECTOR 0x5400 | ||
51 | #define IA64_DISABLED_FPREG_VECTOR 0x5500 | ||
52 | #define IA64_NAT_CONSUMPTION_VECTOR 0x5600 | ||
53 | #define IA64_SPECULATION_VECTOR 0x5700 /* UNUSED */ | ||
54 | #define IA64_DEBUG_VECTOR 0x5900 | ||
55 | #define IA64_UNALIGNED_REF_VECTOR 0x5a00 | ||
56 | #define IA64_UNSUPPORTED_DATA_REF_VECTOR 0x5b00 | ||
57 | #define IA64_FP_FAULT_VECTOR 0x5c00 | ||
58 | #define IA64_FP_TRAP_VECTOR 0x5d00 | ||
59 | #define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR 0x5e00 | ||
60 | #define IA64_TAKEN_BRANCH_TRAP_VECTOR 0x5f00 | ||
61 | #define IA64_SINGLE_STEP_TRAP_VECTOR 0x6000 | ||
62 | |||
63 | /* SDM vol2 5.5 - IVA based interruption handling */ | ||
64 | #define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\ | ||
65 | IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT | \ | ||
66 | IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT) | ||
67 | |||
68 | #define DOMN_PAL_REQUEST 0x110000 | ||
69 | #define DOMN_SAL_REQUEST 0x110001 | ||
70 | |||
71 | static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800, | ||
72 | 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, | ||
73 | 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400, | ||
74 | 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00, | ||
75 | 0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600, | ||
76 | 0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00, | ||
77 | 0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800, | ||
78 | 0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00 | ||
79 | }; | ||
80 | |||
81 | static void collect_interruption(struct kvm_vcpu *vcpu) | ||
82 | { | ||
83 | u64 ipsr; | ||
84 | u64 vdcr; | ||
85 | u64 vifs; | ||
86 | unsigned long vpsr; | ||
87 | struct kvm_pt_regs *regs = vcpu_regs(vcpu); | ||
88 | |||
89 | vpsr = vcpu_get_psr(vcpu); | ||
90 | vcpu_bsw0(vcpu); | ||
91 | if (vpsr & IA64_PSR_IC) { | ||
92 | |||
93 | /* Sync mpsr id/da/dd/ss/ed bits to vipsr | ||
94 | * since after guest do rfi, we still want these bits on in | ||
95 | * mpsr | ||
96 | */ | ||
97 | |||
98 | ipsr = regs->cr_ipsr; | ||
99 | vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA | ||
100 | | IA64_PSR_DD | IA64_PSR_SS | ||
101 | | IA64_PSR_ED)); | ||
102 | vcpu_set_ipsr(vcpu, vpsr); | ||
103 | |||
104 | /* Currently, for trap, we do not advance IIP to next | ||
105 | * instruction. That's because we assume caller already | ||
106 | * set up IIP correctly | ||
107 | */ | ||
108 | |||
109 | vcpu_set_iip(vcpu , regs->cr_iip); | ||
110 | |||
111 | /* set vifs.v to zero */ | ||
112 | vifs = VCPU(vcpu, ifs); | ||
113 | vifs &= ~IA64_IFS_V; | ||
114 | vcpu_set_ifs(vcpu, vifs); | ||
115 | |||
116 | vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa)); | ||
117 | } | ||
118 | |||
119 | vdcr = VCPU(vcpu, dcr); | ||
120 | |||
121 | /* Set guest psr | ||
122 | * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged | ||
123 | * be: set to the value of dcr.be | ||
124 | * pp: set to the value of dcr.pp | ||
125 | */ | ||
126 | vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION; | ||
127 | vpsr |= (vdcr & IA64_DCR_BE); | ||
128 | |||
129 | /* VDCR pp bit position is different from VPSR pp bit */ | ||
130 | if (vdcr & IA64_DCR_PP) { | ||
131 | vpsr |= IA64_PSR_PP; | ||
132 | } else { | ||
133 | vpsr &= ~IA64_PSR_PP;; | ||
134 | } | ||
135 | |||
136 | vcpu_set_psr(vcpu, vpsr); | ||
137 | |||
138 | } | ||
139 | |||
140 | void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec) | ||
141 | { | ||
142 | u64 viva; | ||
143 | struct kvm_pt_regs *regs; | ||
144 | union ia64_isr pt_isr; | ||
145 | |||
146 | regs = vcpu_regs(vcpu); | ||
147 | |||
148 | /* clear cr.isr.ir (incomplete register frame)*/ | ||
149 | pt_isr.val = VMX(vcpu, cr_isr); | ||
150 | pt_isr.ir = 0; | ||
151 | VMX(vcpu, cr_isr) = pt_isr.val; | ||
152 | |||
153 | collect_interruption(vcpu); | ||
154 | |||
155 | viva = vcpu_get_iva(vcpu); | ||
156 | regs->cr_iip = viva + vec; | ||
157 | } | ||
158 | |||
159 | static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa) | ||
160 | { | ||
161 | union ia64_rr rr, rr1; | ||
162 | |||
163 | rr.val = vcpu_get_rr(vcpu, ifa); | ||
164 | rr1.val = 0; | ||
165 | rr1.ps = rr.ps; | ||
166 | rr1.rid = rr.rid; | ||
167 | return (rr1.val); | ||
168 | } | ||
169 | |||
170 | |||
171 | /* | ||
172 | * Set vIFA & vITIR & vIHA, when vPSR.ic =1 | ||
173 | * Parameter: | ||
174 | * set_ifa: if true, set vIFA | ||
175 | * set_itir: if true, set vITIR | ||
176 | * set_iha: if true, set vIHA | ||
177 | */ | ||
178 | void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr, | ||
179 | int set_ifa, int set_itir, int set_iha) | ||
180 | { | ||
181 | long vpsr; | ||
182 | u64 value; | ||
183 | |||
184 | vpsr = VCPU(vcpu, vpsr); | ||
185 | /* Vol2, Table 8-1 */ | ||
186 | if (vpsr & IA64_PSR_IC) { | ||
187 | if (set_ifa) | ||
188 | vcpu_set_ifa(vcpu, vadr); | ||
189 | if (set_itir) { | ||
190 | value = vcpu_get_itir_on_fault(vcpu, vadr); | ||
191 | vcpu_set_itir(vcpu, value); | ||
192 | } | ||
193 | |||
194 | if (set_iha) { | ||
195 | value = vcpu_thash(vcpu, vadr); | ||
196 | vcpu_set_iha(vcpu, value); | ||
197 | } | ||
198 | } | ||
199 | } | ||
200 | |||
201 | /* | ||
202 | * Data TLB Fault | ||
203 | * @ Data TLB vector | ||
204 | * Refer to SDM Vol2 Table 5-6 & 8-1 | ||
205 | */ | ||
206 | void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr) | ||
207 | { | ||
208 | /* If vPSR.ic, IFA, ITIR, IHA */ | ||
209 | set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); | ||
210 | inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR); | ||
211 | } | ||
212 | |||
213 | /* | ||
214 | * Instruction TLB Fault | ||
215 | * @ Instruction TLB vector | ||
216 | * Refer to SDM Vol2 Table 5-6 & 8-1 | ||
217 | */ | ||
218 | void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr) | ||
219 | { | ||
220 | /* If vPSR.ic, IFA, ITIR, IHA */ | ||
221 | set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); | ||
222 | inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR); | ||
223 | } | ||
224 | |||
225 | |||
226 | |||
227 | /* | ||
228 | * Data Nested TLB Fault | ||
229 | * @ Data Nested TLB Vector | ||
230 | * Refer to SDM Vol2 Table 5-6 & 8-1 | ||
231 | */ | ||
232 | void nested_dtlb(struct kvm_vcpu *vcpu) | ||
233 | { | ||
234 | inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR); | ||
235 | } | ||
236 | |||
237 | /* | ||
238 | * Alternate Data TLB Fault | ||
239 | * @ Alternate Data TLB vector | ||
240 | * Refer to SDM Vol2 Table 5-6 & 8-1 | ||
241 | */ | ||
242 | void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr) | ||
243 | { | ||
244 | set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); | ||
245 | inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR); | ||
246 | } | ||
247 | |||
248 | |||
249 | /* | ||
250 | * Data TLB Fault | ||
251 | * @ Data TLB vector | ||
252 | * Refer to SDM Vol2 Table 5-6 & 8-1 | ||
253 | */ | ||
254 | void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr) | ||
255 | { | ||
256 | set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); | ||
257 | inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR); | ||
258 | } | ||
259 | |||
260 | /* Deal with: | ||
261 | * VHPT Translation Vector | ||
262 | */ | ||
263 | static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) | ||
264 | { | ||
265 | /* If vPSR.ic, IFA, ITIR, IHA*/ | ||
266 | set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); | ||
267 | inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR); | ||
268 | |||
269 | |||
270 | } | ||
271 | |||
272 | /* | ||
273 | * VHPT Instruction Fault | ||
274 | * @ VHPT Translation vector | ||
275 | * Refer to SDM Vol2 Table 5-6 & 8-1 | ||
276 | */ | ||
277 | void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) | ||
278 | { | ||
279 | _vhpt_fault(vcpu, vadr); | ||
280 | } | ||
281 | |||
282 | |||
283 | /* | ||
284 | * VHPT Data Fault | ||
285 | * @ VHPT Translation vector | ||
286 | * Refer to SDM Vol2 Table 5-6 & 8-1 | ||
287 | */ | ||
288 | void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) | ||
289 | { | ||
290 | _vhpt_fault(vcpu, vadr); | ||
291 | } | ||
292 | |||
293 | |||
294 | |||
295 | /* | ||
296 | * Deal with: | ||
297 | * General Exception vector | ||
298 | */ | ||
299 | void _general_exception(struct kvm_vcpu *vcpu) | ||
300 | { | ||
301 | inject_guest_interruption(vcpu, IA64_GENEX_VECTOR); | ||
302 | } | ||
303 | |||
304 | |||
305 | /* | ||
306 | * Illegal Operation Fault | ||
307 | * @ General Exception Vector | ||
308 | * Refer to SDM Vol2 Table 5-6 & 8-1 | ||
309 | */ | ||
310 | void illegal_op(struct kvm_vcpu *vcpu) | ||
311 | { | ||
312 | _general_exception(vcpu); | ||
313 | } | ||
314 | |||
315 | /* | ||
316 | * Illegal Dependency Fault | ||
317 | * @ General Exception Vector | ||
318 | * Refer to SDM Vol2 Table 5-6 & 8-1 | ||
319 | */ | ||
320 | void illegal_dep(struct kvm_vcpu *vcpu) | ||
321 | { | ||
322 | _general_exception(vcpu); | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * Reserved Register/Field Fault | ||
327 | * @ General Exception Vector | ||
328 | * Refer to SDM Vol2 Table 5-6 & 8-1 | ||
329 | */ | ||
330 | void rsv_reg_field(struct kvm_vcpu *vcpu) | ||
331 | { | ||
332 | _general_exception(vcpu); | ||
333 | } | ||
334 | /* | ||
335 | * Privileged Operation Fault | ||
336 | * @ General Exception Vector | ||
337 | * Refer to SDM Vol2 Table 5-6 & 8-1 | ||
338 | */ | ||
339 | |||
340 | void privilege_op(struct kvm_vcpu *vcpu) | ||
341 | { | ||
342 | _general_exception(vcpu); | ||
343 | } | ||
344 | |||
345 | /* | ||
346 | * Unimplement Data Address Fault | ||
347 | * @ General Exception Vector | ||
348 | * Refer to SDM Vol2 Table 5-6 & 8-1 | ||
349 | */ | ||
350 | void unimpl_daddr(struct kvm_vcpu *vcpu) | ||
351 | { | ||
352 | _general_exception(vcpu); | ||
353 | } | ||
354 | |||
355 | /* | ||
356 | * Privileged Register Fault | ||
357 | * @ General Exception Vector | ||
358 | * Refer to SDM Vol2 Table 5-6 & 8-1 | ||
359 | */ | ||
360 | void privilege_reg(struct kvm_vcpu *vcpu) | ||
361 | { | ||
362 | _general_exception(vcpu); | ||
363 | } | ||
364 | |||
365 | /* Deal with | ||
366 | * Nat consumption vector | ||
367 | * Parameter: | ||
368 | * vaddr: Optional, if t == REGISTER | ||
369 | */ | ||
370 | static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr, | ||
371 | enum tlb_miss_type t) | ||
372 | { | ||
373 | /* If vPSR.ic && t == DATA/INST, IFA */ | ||
374 | if (t == DATA || t == INSTRUCTION) { | ||
375 | /* IFA */ | ||
376 | set_ifa_itir_iha(vcpu, vadr, 1, 0, 0); | ||
377 | } | ||
378 | |||
379 | inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR); | ||
380 | } | ||
381 | |||
382 | /* | ||
383 | * Instruction Nat Page Consumption Fault | ||
384 | * @ Nat Consumption Vector | ||
385 | * Refer to SDM Vol2 Table 5-6 & 8-1 | ||
386 | */ | ||
387 | void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr) | ||
388 | { | ||
389 | _nat_consumption_fault(vcpu, vadr, INSTRUCTION); | ||
390 | } | ||
391 | |||
392 | /* | ||
393 | * Register Nat Consumption Fault | ||
394 | * @ Nat Consumption Vector | ||
395 | * Refer to SDM Vol2 Table 5-6 & 8-1 | ||
396 | */ | ||
397 | void rnat_consumption(struct kvm_vcpu *vcpu) | ||
398 | { | ||
399 | _nat_consumption_fault(vcpu, 0, REGISTER); | ||
400 | } | ||
401 | |||
402 | /* | ||
403 | * Data Nat Page Consumption Fault | ||
404 | * @ Nat Consumption Vector | ||
405 | * Refer to SDM Vol2 Table 5-6 & 8-1 | ||
406 | */ | ||
407 | void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr) | ||
408 | { | ||
409 | _nat_consumption_fault(vcpu, vadr, DATA); | ||
410 | } | ||
411 | |||
412 | /* Deal with | ||
413 | * Page not present vector | ||
414 | */ | ||
415 | static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr) | ||
416 | { | ||
417 | /* If vPSR.ic, IFA, ITIR */ | ||
418 | set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); | ||
419 | inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR); | ||
420 | } | ||
421 | |||
422 | |||
423 | void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr) | ||
424 | { | ||
425 | __page_not_present(vcpu, vadr); | ||
426 | } | ||
427 | |||
428 | |||
429 | void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr) | ||
430 | { | ||
431 | __page_not_present(vcpu, vadr); | ||
432 | } | ||
433 | |||
434 | |||
435 | /* Deal with | ||
436 | * Data access rights vector | ||
437 | */ | ||
438 | void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr) | ||
439 | { | ||
440 | /* If vPSR.ic, IFA, ITIR */ | ||
441 | set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); | ||
442 | inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR); | ||
443 | } | ||
444 | |||
445 | fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr, | ||
446 | unsigned long *fpsr, unsigned long *isr, unsigned long *pr, | ||
447 | unsigned long *ifs, struct kvm_pt_regs *regs) | ||
448 | { | ||
449 | fp_state_t fp_state; | ||
450 | fpswa_ret_t ret; | ||
451 | struct kvm_vcpu *vcpu = current_vcpu; | ||
452 | |||
453 | uint64_t old_rr7 = ia64_get_rr(7UL<<61); | ||
454 | |||
455 | if (!vmm_fpswa_interface) | ||
456 | return (fpswa_ret_t) {-1, 0, 0, 0}; | ||
457 | |||
458 | /* | ||
459 | * Just let fpswa driver to use hardware fp registers. | ||
460 | * No fp register is valid in memory. | ||
461 | */ | ||
462 | memset(&fp_state, 0, sizeof(fp_state_t)); | ||
463 | |||
464 | /* | ||
465 | * unsigned long (*EFI_FPSWA) ( | ||
466 | * unsigned long trap_type, | ||
467 | * void *Bundle, | ||
468 | * unsigned long *pipsr, | ||
469 | * unsigned long *pfsr, | ||
470 | * unsigned long *pisr, | ||
471 | * unsigned long *ppreds, | ||
472 | * unsigned long *pifs, | ||
473 | * void *fp_state); | ||
474 | */ | ||
475 | /*Call host fpswa interface directly to virtualize | ||
476 | *guest fpswa request! | ||
477 | */ | ||
478 | ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]); | ||
479 | ia64_srlz_d(); | ||
480 | |||
481 | ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle, | ||
482 | ipsr, fpsr, isr, pr, ifs, &fp_state); | ||
483 | ia64_set_rr(7UL << 61, old_rr7); | ||
484 | ia64_srlz_d(); | ||
485 | return ret; | ||
486 | } | ||
487 | |||
488 | /* | ||
489 | * Handle floating-point assist faults and traps for domain. | ||
490 | */ | ||
491 | unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs, | ||
492 | unsigned long isr) | ||
493 | { | ||
494 | struct kvm_vcpu *v = current_vcpu; | ||
495 | IA64_BUNDLE bundle; | ||
496 | unsigned long fault_ip; | ||
497 | fpswa_ret_t ret; | ||
498 | |||
499 | fault_ip = regs->cr_iip; | ||
500 | /* | ||
501 | * When the FP trap occurs, the trapping instruction is completed. | ||
502 | * If ipsr.ri == 0, there is the trapping instruction in previous | ||
503 | * bundle. | ||
504 | */ | ||
505 | if (!fp_fault && (ia64_psr(regs)->ri == 0)) | ||
506 | fault_ip -= 16; | ||
507 | |||
508 | if (fetch_code(v, fault_ip, &bundle)) | ||
509 | return -EAGAIN; | ||
510 | |||
511 | if (!bundle.i64[0] && !bundle.i64[1]) | ||
512 | return -EACCES; | ||
513 | |||
514 | ret = vmm_fp_emulate(fp_fault, &bundle, ®s->cr_ipsr, ®s->ar_fpsr, | ||
515 | &isr, ®s->pr, ®s->cr_ifs, regs); | ||
516 | return ret.status; | ||
517 | } | ||
518 | |||
519 | void reflect_interruption(u64 ifa, u64 isr, u64 iim, | ||
520 | u64 vec, struct kvm_pt_regs *regs) | ||
521 | { | ||
522 | u64 vector; | ||
523 | int status ; | ||
524 | struct kvm_vcpu *vcpu = current_vcpu; | ||
525 | u64 vpsr = VCPU(vcpu, vpsr); | ||
526 | |||
527 | vector = vec2off[vec]; | ||
528 | |||
529 | if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) { | ||
530 | panic_vm(vcpu); | ||
531 | return; | ||
532 | } | ||
533 | |||
534 | switch (vec) { | ||
535 | case 32: /*IA64_FP_FAULT_VECTOR*/ | ||
536 | status = vmm_handle_fpu_swa(1, regs, isr); | ||
537 | if (!status) { | ||
538 | vcpu_increment_iip(vcpu); | ||
539 | return; | ||
540 | } else if (-EAGAIN == status) | ||
541 | return; | ||
542 | break; | ||
543 | case 33: /*IA64_FP_TRAP_VECTOR*/ | ||
544 | status = vmm_handle_fpu_swa(0, regs, isr); | ||
545 | if (!status) | ||
546 | return ; | ||
547 | else if (-EAGAIN == status) { | ||
548 | vcpu_decrement_iip(vcpu); | ||
549 | return ; | ||
550 | } | ||
551 | break; | ||
552 | } | ||
553 | |||
554 | VCPU(vcpu, isr) = isr; | ||
555 | VCPU(vcpu, iipa) = regs->cr_iip; | ||
556 | if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR) | ||
557 | VCPU(vcpu, iim) = iim; | ||
558 | else | ||
559 | set_ifa_itir_iha(vcpu, ifa, 1, 1, 1); | ||
560 | |||
561 | inject_guest_interruption(vcpu, vector); | ||
562 | } | ||
563 | |||
564 | static void set_pal_call_data(struct kvm_vcpu *vcpu) | ||
565 | { | ||
566 | struct exit_ctl_data *p = &vcpu->arch.exit_data; | ||
567 | |||
568 | /*FIXME:For static and stacked convention, firmware | ||
569 | * has put the parameters in gr28-gr31 before | ||
570 | * break to vmm !!*/ | ||
571 | |||
572 | p->u.pal_data.gr28 = vcpu_get_gr(vcpu, 28); | ||
573 | p->u.pal_data.gr29 = vcpu_get_gr(vcpu, 29); | ||
574 | p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30); | ||
575 | p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31); | ||
576 | p->exit_reason = EXIT_REASON_PAL_CALL; | ||
577 | } | ||
578 | |||
579 | static void set_pal_call_result(struct kvm_vcpu *vcpu) | ||
580 | { | ||
581 | struct exit_ctl_data *p = &vcpu->arch.exit_data; | ||
582 | |||
583 | if (p->exit_reason == EXIT_REASON_PAL_CALL) { | ||
584 | vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0); | ||
585 | vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0); | ||
586 | vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0); | ||
587 | vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0); | ||
588 | } else | ||
589 | panic_vm(vcpu); | ||
590 | } | ||
591 | |||
592 | static void set_sal_call_data(struct kvm_vcpu *vcpu) | ||
593 | { | ||
594 | struct exit_ctl_data *p = &vcpu->arch.exit_data; | ||
595 | |||
596 | p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32); | ||
597 | p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33); | ||
598 | p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34); | ||
599 | p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35); | ||
600 | p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36); | ||
601 | p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37); | ||
602 | p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38); | ||
603 | p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39); | ||
604 | p->exit_reason = EXIT_REASON_SAL_CALL; | ||
605 | } | ||
606 | |||
607 | static void set_sal_call_result(struct kvm_vcpu *vcpu) | ||
608 | { | ||
609 | struct exit_ctl_data *p = &vcpu->arch.exit_data; | ||
610 | |||
611 | if (p->exit_reason == EXIT_REASON_SAL_CALL) { | ||
612 | vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0); | ||
613 | vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0); | ||
614 | vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0); | ||
615 | vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0); | ||
616 | } else | ||
617 | panic_vm(vcpu); | ||
618 | } | ||
619 | |||
620 | void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs, | ||
621 | unsigned long isr, unsigned long iim) | ||
622 | { | ||
623 | struct kvm_vcpu *v = current_vcpu; | ||
624 | |||
625 | if (ia64_psr(regs)->cpl == 0) { | ||
626 | /* Allow hypercalls only when cpl = 0. */ | ||
627 | if (iim == DOMN_PAL_REQUEST) { | ||
628 | set_pal_call_data(v); | ||
629 | vmm_transition(v); | ||
630 | set_pal_call_result(v); | ||
631 | vcpu_increment_iip(v); | ||
632 | return; | ||
633 | } else if (iim == DOMN_SAL_REQUEST) { | ||
634 | set_sal_call_data(v); | ||
635 | vmm_transition(v); | ||
636 | set_sal_call_result(v); | ||
637 | vcpu_increment_iip(v); | ||
638 | return; | ||
639 | } | ||
640 | } | ||
641 | reflect_interruption(ifa, isr, iim, 11, regs); | ||
642 | } | ||
643 | |||
644 | void check_pending_irq(struct kvm_vcpu *vcpu) | ||
645 | { | ||
646 | int mask, h_pending, h_inservice; | ||
647 | u64 isr; | ||
648 | unsigned long vpsr; | ||
649 | struct kvm_pt_regs *regs = vcpu_regs(vcpu); | ||
650 | |||
651 | h_pending = highest_pending_irq(vcpu); | ||
652 | if (h_pending == NULL_VECTOR) { | ||
653 | update_vhpi(vcpu, NULL_VECTOR); | ||
654 | return; | ||
655 | } | ||
656 | h_inservice = highest_inservice_irq(vcpu); | ||
657 | |||
658 | vpsr = VCPU(vcpu, vpsr); | ||
659 | mask = irq_masked(vcpu, h_pending, h_inservice); | ||
660 | if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) { | ||
661 | isr = vpsr & IA64_PSR_RI; | ||
662 | update_vhpi(vcpu, h_pending); | ||
663 | reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ | ||
664 | } else if (mask == IRQ_MASKED_BY_INSVC) { | ||
665 | if (VCPU(vcpu, vhpi)) | ||
666 | update_vhpi(vcpu, NULL_VECTOR); | ||
667 | } else { | ||
668 | /* masked by vpsr.i or vtpr.*/ | ||
669 | update_vhpi(vcpu, h_pending); | ||
670 | } | ||
671 | } | ||
672 | |||
673 | static void generate_exirq(struct kvm_vcpu *vcpu) | ||
674 | { | ||
675 | unsigned vpsr; | ||
676 | uint64_t isr; | ||
677 | |||
678 | struct kvm_pt_regs *regs = vcpu_regs(vcpu); | ||
679 | |||
680 | vpsr = VCPU(vcpu, vpsr); | ||
681 | isr = vpsr & IA64_PSR_RI; | ||
682 | if (!(vpsr & IA64_PSR_IC)) | ||
683 | panic_vm(vcpu); | ||
684 | reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ | ||
685 | } | ||
686 | |||
687 | void vhpi_detection(struct kvm_vcpu *vcpu) | ||
688 | { | ||
689 | uint64_t threshold, vhpi; | ||
690 | union ia64_tpr vtpr; | ||
691 | struct ia64_psr vpsr; | ||
692 | |||
693 | vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); | ||
694 | vtpr.val = VCPU(vcpu, tpr); | ||
695 | |||
696 | threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic; | ||
697 | vhpi = VCPU(vcpu, vhpi); | ||
698 | if (vhpi > threshold) { | ||
699 | /* interrupt actived*/ | ||
700 | generate_exirq(vcpu); | ||
701 | } | ||
702 | } | ||
703 | |||
704 | |||
705 | void leave_hypervisor_tail(void) | ||
706 | { | ||
707 | struct kvm_vcpu *v = current_vcpu; | ||
708 | |||
709 | if (VMX(v, timer_check)) { | ||
710 | VMX(v, timer_check) = 0; | ||
711 | if (VMX(v, itc_check)) { | ||
712 | if (vcpu_get_itc(v) > VCPU(v, itm)) { | ||
713 | if (!(VCPU(v, itv) & (1 << 16))) { | ||
714 | vcpu_pend_interrupt(v, VCPU(v, itv) | ||
715 | & 0xff); | ||
716 | VMX(v, itc_check) = 0; | ||
717 | } else { | ||
718 | v->arch.timer_pending = 1; | ||
719 | } | ||
720 | VMX(v, last_itc) = VCPU(v, itm) + 1; | ||
721 | } | ||
722 | } | ||
723 | } | ||
724 | |||
725 | rmb(); | ||
726 | if (v->arch.irq_new_pending) { | ||
727 | v->arch.irq_new_pending = 0; | ||
728 | VMX(v, irq_check) = 0; | ||
729 | check_pending_irq(v); | ||
730 | return; | ||
731 | } | ||
732 | if (VMX(v, irq_check)) { | ||
733 | VMX(v, irq_check) = 0; | ||
734 | vhpi_detection(v); | ||
735 | } | ||
736 | } | ||
737 | |||
738 | |||
739 | static inline void handle_lds(struct kvm_pt_regs *regs) | ||
740 | { | ||
741 | regs->cr_ipsr |= IA64_PSR_ED; | ||
742 | } | ||
743 | |||
744 | void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type) | ||
745 | { | ||
746 | unsigned long pte; | ||
747 | union ia64_rr rr; | ||
748 | |||
749 | rr.val = ia64_get_rr(vadr); | ||
750 | pte = vadr & _PAGE_PPN_MASK; | ||
751 | pte = pte | PHY_PAGE_WB; | ||
752 | thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type); | ||
753 | return; | ||
754 | } | ||
755 | |||
756 | void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs) | ||
757 | { | ||
758 | unsigned long vpsr; | ||
759 | int type; | ||
760 | |||
761 | u64 vhpt_adr, gppa, pteval, rr, itir; | ||
762 | union ia64_isr misr; | ||
763 | union ia64_pta vpta; | ||
764 | struct thash_data *data; | ||
765 | struct kvm_vcpu *v = current_vcpu; | ||
766 | |||
767 | vpsr = VCPU(v, vpsr); | ||
768 | misr.val = VMX(v, cr_isr); | ||
769 | |||
770 | type = vec; | ||
771 | |||
772 | if (is_physical_mode(v) && (!(vadr << 1 >> 62))) { | ||
773 | if (vec == 2) { | ||
774 | if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) { | ||
775 | emulate_io_inst(v, ((vadr << 1) >> 1), 4); | ||
776 | return; | ||
777 | } | ||
778 | } | ||
779 | physical_tlb_miss(v, vadr, type); | ||
780 | return; | ||
781 | } | ||
782 | data = vtlb_lookup(v, vadr, type); | ||
783 | if (data != 0) { | ||
784 | if (type == D_TLB) { | ||
785 | gppa = (vadr & ((1UL << data->ps) - 1)) | ||
786 | + (data->ppn >> (data->ps - 12) << data->ps); | ||
787 | if (__gpfn_is_io(gppa >> PAGE_SHIFT)) { | ||
788 | if (data->pl >= ((regs->cr_ipsr >> | ||
789 | IA64_PSR_CPL0_BIT) & 3)) | ||
790 | emulate_io_inst(v, gppa, data->ma); | ||
791 | else { | ||
792 | vcpu_set_isr(v, misr.val); | ||
793 | data_access_rights(v, vadr); | ||
794 | } | ||
795 | return ; | ||
796 | } | ||
797 | } | ||
798 | thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type); | ||
799 | |||
800 | } else if (type == D_TLB) { | ||
801 | if (misr.sp) { | ||
802 | handle_lds(regs); | ||
803 | return; | ||
804 | } | ||
805 | |||
806 | rr = vcpu_get_rr(v, vadr); | ||
807 | itir = rr & (RR_RID_MASK | RR_PS_MASK); | ||
808 | |||
809 | if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) { | ||
810 | if (vpsr & IA64_PSR_IC) { | ||
811 | vcpu_set_isr(v, misr.val); | ||
812 | alt_dtlb(v, vadr); | ||
813 | } else { | ||
814 | nested_dtlb(v); | ||
815 | } | ||
816 | return ; | ||
817 | } | ||
818 | |||
819 | vpta.val = vcpu_get_pta(v); | ||
820 | /* avoid recursively walking (short format) VHPT */ | ||
821 | |||
822 | vhpt_adr = vcpu_thash(v, vadr); | ||
823 | if (!guest_vhpt_lookup(vhpt_adr, &pteval)) { | ||
824 | /* VHPT successfully read. */ | ||
825 | if (!(pteval & _PAGE_P)) { | ||
826 | if (vpsr & IA64_PSR_IC) { | ||
827 | vcpu_set_isr(v, misr.val); | ||
828 | dtlb_fault(v, vadr); | ||
829 | } else { | ||
830 | nested_dtlb(v); | ||
831 | } | ||
832 | } else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) { | ||
833 | thash_purge_and_insert(v, pteval, itir, | ||
834 | vadr, D_TLB); | ||
835 | } else if (vpsr & IA64_PSR_IC) { | ||
836 | vcpu_set_isr(v, misr.val); | ||
837 | dtlb_fault(v, vadr); | ||
838 | } else { | ||
839 | nested_dtlb(v); | ||
840 | } | ||
841 | } else { | ||
842 | /* Can't read VHPT. */ | ||
843 | if (vpsr & IA64_PSR_IC) { | ||
844 | vcpu_set_isr(v, misr.val); | ||
845 | dvhpt_fault(v, vadr); | ||
846 | } else { | ||
847 | nested_dtlb(v); | ||
848 | } | ||
849 | } | ||
850 | } else if (type == I_TLB) { | ||
851 | if (!(vpsr & IA64_PSR_IC)) | ||
852 | misr.ni = 1; | ||
853 | if (!vhpt_enabled(v, vadr, INST_REF)) { | ||
854 | vcpu_set_isr(v, misr.val); | ||
855 | alt_itlb(v, vadr); | ||
856 | return; | ||
857 | } | ||
858 | |||
859 | vpta.val = vcpu_get_pta(v); | ||
860 | |||
861 | vhpt_adr = vcpu_thash(v, vadr); | ||
862 | if (!guest_vhpt_lookup(vhpt_adr, &pteval)) { | ||
863 | /* VHPT successfully read. */ | ||
864 | if (pteval & _PAGE_P) { | ||
865 | if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) { | ||
866 | vcpu_set_isr(v, misr.val); | ||
867 | itlb_fault(v, vadr); | ||
868 | return ; | ||
869 | } | ||
870 | rr = vcpu_get_rr(v, vadr); | ||
871 | itir = rr & (RR_RID_MASK | RR_PS_MASK); | ||
872 | thash_purge_and_insert(v, pteval, itir, | ||
873 | vadr, I_TLB); | ||
874 | } else { | ||
875 | vcpu_set_isr(v, misr.val); | ||
876 | inst_page_not_present(v, vadr); | ||
877 | } | ||
878 | } else { | ||
879 | vcpu_set_isr(v, misr.val); | ||
880 | ivhpt_fault(v, vadr); | ||
881 | } | ||
882 | } | ||
883 | } | ||
884 | |||
885 | void kvm_vexirq(struct kvm_vcpu *vcpu) | ||
886 | { | ||
887 | u64 vpsr, isr; | ||
888 | struct kvm_pt_regs *regs; | ||
889 | |||
890 | regs = vcpu_regs(vcpu); | ||
891 | vpsr = VCPU(vcpu, vpsr); | ||
892 | isr = vpsr & IA64_PSR_RI; | ||
893 | reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/ | ||
894 | } | ||
895 | |||
896 | void kvm_ia64_handle_irq(struct kvm_vcpu *v) | ||
897 | { | ||
898 | struct exit_ctl_data *p = &v->arch.exit_data; | ||
899 | long psr; | ||
900 | |||
901 | local_irq_save(psr); | ||
902 | p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; | ||
903 | vmm_transition(v); | ||
904 | local_irq_restore(psr); | ||
905 | |||
906 | VMX(v, timer_check) = 1; | ||
907 | |||
908 | } | ||
909 | |||
910 | static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos) | ||
911 | { | ||
912 | u64 oldrid, moldrid, oldpsbits, vaddr; | ||
913 | struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos]; | ||
914 | vaddr = p->vaddr; | ||
915 | |||
916 | oldrid = VMX(v, vrr[0]); | ||
917 | VMX(v, vrr[0]) = p->rr; | ||
918 | oldpsbits = VMX(v, psbits[0]); | ||
919 | VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]); | ||
920 | moldrid = ia64_get_rr(0x0); | ||
921 | ia64_set_rr(0x0, vrrtomrr(p->rr)); | ||
922 | ia64_srlz_d(); | ||
923 | |||
924 | vaddr = PAGEALIGN(vaddr, p->ps); | ||
925 | thash_purge_entries_remote(v, vaddr, p->ps); | ||
926 | |||
927 | VMX(v, vrr[0]) = oldrid; | ||
928 | VMX(v, psbits[0]) = oldpsbits; | ||
929 | ia64_set_rr(0x0, moldrid); | ||
930 | ia64_dv_serialize_data(); | ||
931 | } | ||
932 | |||
933 | static void vcpu_do_resume(struct kvm_vcpu *vcpu) | ||
934 | { | ||
935 | /*Re-init VHPT and VTLB once from resume*/ | ||
936 | vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES; | ||
937 | thash_init(&vcpu->arch.vhpt, VHPT_SHIFT); | ||
938 | vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES; | ||
939 | thash_init(&vcpu->arch.vtlb, VTLB_SHIFT); | ||
940 | |||
941 | ia64_set_pta(vcpu->arch.vhpt.pta.val); | ||
942 | } | ||
943 | |||
944 | static void kvm_do_resume_op(struct kvm_vcpu *vcpu) | ||
945 | { | ||
946 | if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) { | ||
947 | vcpu_do_resume(vcpu); | ||
948 | return; | ||
949 | } | ||
950 | |||
951 | if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) { | ||
952 | thash_purge_all(vcpu); | ||
953 | return; | ||
954 | } | ||
955 | |||
956 | if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) { | ||
957 | while (vcpu->arch.ptc_g_count > 0) | ||
958 | ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count); | ||
959 | } | ||
960 | } | ||
961 | |||
962 | void vmm_transition(struct kvm_vcpu *vcpu) | ||
963 | { | ||
964 | ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd, | ||
965 | 0, 0, 0, 0, 0, 0); | ||
966 | vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host); | ||
967 | ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd, | ||
968 | 0, 0, 0, 0, 0, 0); | ||
969 | kvm_do_resume_op(vcpu); | ||
970 | } | ||
diff --git a/arch/ia64/kvm/trampoline.S b/arch/ia64/kvm/trampoline.S new file mode 100644 index 000000000000..30897d44d61e --- /dev/null +++ b/arch/ia64/kvm/trampoline.S | |||
@@ -0,0 +1,1038 @@ | |||
1 | /* Save all processor states | ||
2 | * | ||
3 | * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com> | ||
4 | * Copyright (c) 2007 Anthony Xu <anthony.xu@intel.com> | ||
5 | */ | ||
6 | |||
7 | #include <asm/asmmacro.h> | ||
8 | #include "asm-offsets.h" | ||
9 | |||
10 | |||
11 | #define CTX(name) VMM_CTX_##name##_OFFSET | ||
12 | |||
13 | /* | ||
14 | * r32: context_t base address | ||
15 | */ | ||
16 | #define SAVE_BRANCH_REGS \ | ||
17 | add r2 = CTX(B0),r32; \ | ||
18 | add r3 = CTX(B1),r32; \ | ||
19 | mov r16 = b0; \ | ||
20 | mov r17 = b1; \ | ||
21 | ;; \ | ||
22 | st8 [r2]=r16,16; \ | ||
23 | st8 [r3]=r17,16; \ | ||
24 | ;; \ | ||
25 | mov r16 = b2; \ | ||
26 | mov r17 = b3; \ | ||
27 | ;; \ | ||
28 | st8 [r2]=r16,16; \ | ||
29 | st8 [r3]=r17,16; \ | ||
30 | ;; \ | ||
31 | mov r16 = b4; \ | ||
32 | mov r17 = b5; \ | ||
33 | ;; \ | ||
34 | st8 [r2]=r16; \ | ||
35 | st8 [r3]=r17; \ | ||
36 | ;; | ||
37 | |||
38 | /* | ||
39 | * r33: context_t base address | ||
40 | */ | ||
41 | #define RESTORE_BRANCH_REGS \ | ||
42 | add r2 = CTX(B0),r33; \ | ||
43 | add r3 = CTX(B1),r33; \ | ||
44 | ;; \ | ||
45 | ld8 r16=[r2],16; \ | ||
46 | ld8 r17=[r3],16; \ | ||
47 | ;; \ | ||
48 | mov b0 = r16; \ | ||
49 | mov b1 = r17; \ | ||
50 | ;; \ | ||
51 | ld8 r16=[r2],16; \ | ||
52 | ld8 r17=[r3],16; \ | ||
53 | ;; \ | ||
54 | mov b2 = r16; \ | ||
55 | mov b3 = r17; \ | ||
56 | ;; \ | ||
57 | ld8 r16=[r2]; \ | ||
58 | ld8 r17=[r3]; \ | ||
59 | ;; \ | ||
60 | mov b4=r16; \ | ||
61 | mov b5=r17; \ | ||
62 | ;; | ||
63 | |||
64 | |||
65 | /* | ||
66 | * r32: context_t base address | ||
67 | * bsw == 1 | ||
68 | * Save all bank1 general registers, r4 ~ r7 | ||
69 | */ | ||
70 | #define SAVE_GENERAL_REGS \ | ||
71 | add r2=CTX(R4),r32; \ | ||
72 | add r3=CTX(R5),r32; \ | ||
73 | ;; \ | ||
74 | .mem.offset 0,0; \ | ||
75 | st8.spill [r2]=r4,16; \ | ||
76 | .mem.offset 8,0; \ | ||
77 | st8.spill [r3]=r5,16; \ | ||
78 | ;; \ | ||
79 | .mem.offset 0,0; \ | ||
80 | st8.spill [r2]=r6,48; \ | ||
81 | .mem.offset 8,0; \ | ||
82 | st8.spill [r3]=r7,48; \ | ||
83 | ;; \ | ||
84 | .mem.offset 0,0; \ | ||
85 | st8.spill [r2]=r12; \ | ||
86 | .mem.offset 8,0; \ | ||
87 | st8.spill [r3]=r13; \ | ||
88 | ;; | ||
89 | |||
90 | /* | ||
91 | * r33: context_t base address | ||
92 | * bsw == 1 | ||
93 | */ | ||
94 | #define RESTORE_GENERAL_REGS \ | ||
95 | add r2=CTX(R4),r33; \ | ||
96 | add r3=CTX(R5),r33; \ | ||
97 | ;; \ | ||
98 | ld8.fill r4=[r2],16; \ | ||
99 | ld8.fill r5=[r3],16; \ | ||
100 | ;; \ | ||
101 | ld8.fill r6=[r2],48; \ | ||
102 | ld8.fill r7=[r3],48; \ | ||
103 | ;; \ | ||
104 | ld8.fill r12=[r2]; \ | ||
105 | ld8.fill r13 =[r3]; \ | ||
106 | ;; | ||
107 | |||
108 | |||
109 | |||
110 | |||
111 | /* | ||
112 | * r32: context_t base address | ||
113 | */ | ||
114 | #define SAVE_KERNEL_REGS \ | ||
115 | add r2 = CTX(KR0),r32; \ | ||
116 | add r3 = CTX(KR1),r32; \ | ||
117 | mov r16 = ar.k0; \ | ||
118 | mov r17 = ar.k1; \ | ||
119 | ;; \ | ||
120 | st8 [r2] = r16,16; \ | ||
121 | st8 [r3] = r17,16; \ | ||
122 | ;; \ | ||
123 | mov r16 = ar.k2; \ | ||
124 | mov r17 = ar.k3; \ | ||
125 | ;; \ | ||
126 | st8 [r2] = r16,16; \ | ||
127 | st8 [r3] = r17,16; \ | ||
128 | ;; \ | ||
129 | mov r16 = ar.k4; \ | ||
130 | mov r17 = ar.k5; \ | ||
131 | ;; \ | ||
132 | st8 [r2] = r16,16; \ | ||
133 | st8 [r3] = r17,16; \ | ||
134 | ;; \ | ||
135 | mov r16 = ar.k6; \ | ||
136 | mov r17 = ar.k7; \ | ||
137 | ;; \ | ||
138 | st8 [r2] = r16; \ | ||
139 | st8 [r3] = r17; \ | ||
140 | ;; | ||
141 | |||
142 | |||
143 | |||
144 | /* | ||
145 | * r33: context_t base address | ||
146 | */ | ||
147 | #define RESTORE_KERNEL_REGS \ | ||
148 | add r2 = CTX(KR0),r33; \ | ||
149 | add r3 = CTX(KR1),r33; \ | ||
150 | ;; \ | ||
151 | ld8 r16=[r2],16; \ | ||
152 | ld8 r17=[r3],16; \ | ||
153 | ;; \ | ||
154 | mov ar.k0=r16; \ | ||
155 | mov ar.k1=r17; \ | ||
156 | ;; \ | ||
157 | ld8 r16=[r2],16; \ | ||
158 | ld8 r17=[r3],16; \ | ||
159 | ;; \ | ||
160 | mov ar.k2=r16; \ | ||
161 | mov ar.k3=r17; \ | ||
162 | ;; \ | ||
163 | ld8 r16=[r2],16; \ | ||
164 | ld8 r17=[r3],16; \ | ||
165 | ;; \ | ||
166 | mov ar.k4=r16; \ | ||
167 | mov ar.k5=r17; \ | ||
168 | ;; \ | ||
169 | ld8 r16=[r2],16; \ | ||
170 | ld8 r17=[r3],16; \ | ||
171 | ;; \ | ||
172 | mov ar.k6=r16; \ | ||
173 | mov ar.k7=r17; \ | ||
174 | ;; | ||
175 | |||
176 | |||
177 | |||
178 | /* | ||
179 | * r32: context_t base address | ||
180 | */ | ||
181 | #define SAVE_APP_REGS \ | ||
182 | add r2 = CTX(BSPSTORE),r32; \ | ||
183 | mov r16 = ar.bspstore; \ | ||
184 | ;; \ | ||
185 | st8 [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\ | ||
186 | mov r16 = ar.rnat; \ | ||
187 | ;; \ | ||
188 | st8 [r2] = r16,CTX(FCR)-CTX(RNAT); \ | ||
189 | mov r16 = ar.fcr; \ | ||
190 | ;; \ | ||
191 | st8 [r2] = r16,CTX(EFLAG)-CTX(FCR); \ | ||
192 | mov r16 = ar.eflag; \ | ||
193 | ;; \ | ||
194 | st8 [r2] = r16,CTX(CFLG)-CTX(EFLAG); \ | ||
195 | mov r16 = ar.cflg; \ | ||
196 | ;; \ | ||
197 | st8 [r2] = r16,CTX(FSR)-CTX(CFLG); \ | ||
198 | mov r16 = ar.fsr; \ | ||
199 | ;; \ | ||
200 | st8 [r2] = r16,CTX(FIR)-CTX(FSR); \ | ||
201 | mov r16 = ar.fir; \ | ||
202 | ;; \ | ||
203 | st8 [r2] = r16,CTX(FDR)-CTX(FIR); \ | ||
204 | mov r16 = ar.fdr; \ | ||
205 | ;; \ | ||
206 | st8 [r2] = r16,CTX(UNAT)-CTX(FDR); \ | ||
207 | mov r16 = ar.unat; \ | ||
208 | ;; \ | ||
209 | st8 [r2] = r16,CTX(FPSR)-CTX(UNAT); \ | ||
210 | mov r16 = ar.fpsr; \ | ||
211 | ;; \ | ||
212 | st8 [r2] = r16,CTX(PFS)-CTX(FPSR); \ | ||
213 | mov r16 = ar.pfs; \ | ||
214 | ;; \ | ||
215 | st8 [r2] = r16,CTX(LC)-CTX(PFS); \ | ||
216 | mov r16 = ar.lc; \ | ||
217 | ;; \ | ||
218 | st8 [r2] = r16; \ | ||
219 | ;; | ||
220 | |||
221 | /* | ||
222 | * r33: context_t base address | ||
223 | */ | ||
224 | #define RESTORE_APP_REGS \ | ||
225 | add r2=CTX(BSPSTORE),r33; \ | ||
226 | ;; \ | ||
227 | ld8 r16=[r2],CTX(RNAT)-CTX(BSPSTORE); \ | ||
228 | ;; \ | ||
229 | mov ar.bspstore=r16; \ | ||
230 | ld8 r16=[r2],CTX(FCR)-CTX(RNAT); \ | ||
231 | ;; \ | ||
232 | mov ar.rnat=r16; \ | ||
233 | ld8 r16=[r2],CTX(EFLAG)-CTX(FCR); \ | ||
234 | ;; \ | ||
235 | mov ar.fcr=r16; \ | ||
236 | ld8 r16=[r2],CTX(CFLG)-CTX(EFLAG); \ | ||
237 | ;; \ | ||
238 | mov ar.eflag=r16; \ | ||
239 | ld8 r16=[r2],CTX(FSR)-CTX(CFLG); \ | ||
240 | ;; \ | ||
241 | mov ar.cflg=r16; \ | ||
242 | ld8 r16=[r2],CTX(FIR)-CTX(FSR); \ | ||
243 | ;; \ | ||
244 | mov ar.fsr=r16; \ | ||
245 | ld8 r16=[r2],CTX(FDR)-CTX(FIR); \ | ||
246 | ;; \ | ||
247 | mov ar.fir=r16; \ | ||
248 | ld8 r16=[r2],CTX(UNAT)-CTX(FDR); \ | ||
249 | ;; \ | ||
250 | mov ar.fdr=r16; \ | ||
251 | ld8 r16=[r2],CTX(FPSR)-CTX(UNAT); \ | ||
252 | ;; \ | ||
253 | mov ar.unat=r16; \ | ||
254 | ld8 r16=[r2],CTX(PFS)-CTX(FPSR); \ | ||
255 | ;; \ | ||
256 | mov ar.fpsr=r16; \ | ||
257 | ld8 r16=[r2],CTX(LC)-CTX(PFS); \ | ||
258 | ;; \ | ||
259 | mov ar.pfs=r16; \ | ||
260 | ld8 r16=[r2]; \ | ||
261 | ;; \ | ||
262 | mov ar.lc=r16; \ | ||
263 | ;; | ||
264 | |||
265 | /* | ||
266 | * r32: context_t base address | ||
267 | */ | ||
268 | #define SAVE_CTL_REGS \ | ||
269 | add r2 = CTX(DCR),r32; \ | ||
270 | mov r16 = cr.dcr; \ | ||
271 | ;; \ | ||
272 | st8 [r2] = r16,CTX(IVA)-CTX(DCR); \ | ||
273 | ;; \ | ||
274 | mov r16 = cr.iva; \ | ||
275 | ;; \ | ||
276 | st8 [r2] = r16,CTX(PTA)-CTX(IVA); \ | ||
277 | ;; \ | ||
278 | mov r16 = cr.pta; \ | ||
279 | ;; \ | ||
280 | st8 [r2] = r16 ; \ | ||
281 | ;; | ||
282 | |||
283 | /* | ||
284 | * r33: context_t base address | ||
285 | */ | ||
286 | #define RESTORE_CTL_REGS \ | ||
287 | add r2 = CTX(DCR),r33; \ | ||
288 | ;; \ | ||
289 | ld8 r16 = [r2],CTX(IVA)-CTX(DCR); \ | ||
290 | ;; \ | ||
291 | mov cr.dcr = r16; \ | ||
292 | dv_serialize_data; \ | ||
293 | ;; \ | ||
294 | ld8 r16 = [r2],CTX(PTA)-CTX(IVA); \ | ||
295 | ;; \ | ||
296 | mov cr.iva = r16; \ | ||
297 | dv_serialize_data; \ | ||
298 | ;; \ | ||
299 | ld8 r16 = [r2]; \ | ||
300 | ;; \ | ||
301 | mov cr.pta = r16; \ | ||
302 | dv_serialize_data; \ | ||
303 | ;; | ||
304 | |||
305 | |||
306 | /* | ||
307 | * r32: context_t base address | ||
308 | */ | ||
309 | #define SAVE_REGION_REGS \ | ||
310 | add r2=CTX(RR0),r32; \ | ||
311 | mov r16=rr[r0]; \ | ||
312 | dep.z r18=1,61,3; \ | ||
313 | ;; \ | ||
314 | st8 [r2]=r16,8; \ | ||
315 | mov r17=rr[r18]; \ | ||
316 | dep.z r18=2,61,3; \ | ||
317 | ;; \ | ||
318 | st8 [r2]=r17,8; \ | ||
319 | mov r16=rr[r18]; \ | ||
320 | dep.z r18=3,61,3; \ | ||
321 | ;; \ | ||
322 | st8 [r2]=r16,8; \ | ||
323 | mov r17=rr[r18]; \ | ||
324 | dep.z r18=4,61,3; \ | ||
325 | ;; \ | ||
326 | st8 [r2]=r17,8; \ | ||
327 | mov r16=rr[r18]; \ | ||
328 | dep.z r18=5,61,3; \ | ||
329 | ;; \ | ||
330 | st8 [r2]=r16,8; \ | ||
331 | mov r17=rr[r18]; \ | ||
332 | dep.z r18=7,61,3; \ | ||
333 | ;; \ | ||
334 | st8 [r2]=r17,16; \ | ||
335 | mov r16=rr[r18]; \ | ||
336 | ;; \ | ||
337 | st8 [r2]=r16,8; \ | ||
338 | ;; | ||
339 | |||
340 | /* | ||
341 | * r33:context_t base address | ||
342 | */ | ||
343 | #define RESTORE_REGION_REGS \ | ||
344 | add r2=CTX(RR0),r33;\ | ||
345 | mov r18=r0; \ | ||
346 | ;; \ | ||
347 | ld8 r20=[r2],8; \ | ||
348 | ;; /* rr0 */ \ | ||
349 | ld8 r21=[r2],8; \ | ||
350 | ;; /* rr1 */ \ | ||
351 | ld8 r22=[r2],8; \ | ||
352 | ;; /* rr2 */ \ | ||
353 | ld8 r23=[r2],8; \ | ||
354 | ;; /* rr3 */ \ | ||
355 | ld8 r24=[r2],8; \ | ||
356 | ;; /* rr4 */ \ | ||
357 | ld8 r25=[r2],16; \ | ||
358 | ;; /* rr5 */ \ | ||
359 | ld8 r27=[r2]; \ | ||
360 | ;; /* rr7 */ \ | ||
361 | mov rr[r18]=r20; \ | ||
362 | dep.z r18=1,61,3; \ | ||
363 | ;; /* rr1 */ \ | ||
364 | mov rr[r18]=r21; \ | ||
365 | dep.z r18=2,61,3; \ | ||
366 | ;; /* rr2 */ \ | ||
367 | mov rr[r18]=r22; \ | ||
368 | dep.z r18=3,61,3; \ | ||
369 | ;; /* rr3 */ \ | ||
370 | mov rr[r18]=r23; \ | ||
371 | dep.z r18=4,61,3; \ | ||
372 | ;; /* rr4 */ \ | ||
373 | mov rr[r18]=r24; \ | ||
374 | dep.z r18=5,61,3; \ | ||
375 | ;; /* rr5 */ \ | ||
376 | mov rr[r18]=r25; \ | ||
377 | dep.z r18=7,61,3; \ | ||
378 | ;; /* rr7 */ \ | ||
379 | mov rr[r18]=r27; \ | ||
380 | ;; \ | ||
381 | srlz.i; \ | ||
382 | ;; | ||
383 | |||
384 | |||
385 | |||
386 | /* | ||
387 | * r32: context_t base address | ||
388 | * r36~r39:scratch registers | ||
389 | */ | ||
390 | #define SAVE_DEBUG_REGS \ | ||
391 | add r2=CTX(IBR0),r32; \ | ||
392 | add r3=CTX(DBR0),r32; \ | ||
393 | mov r16=ibr[r0]; \ | ||
394 | mov r17=dbr[r0]; \ | ||
395 | ;; \ | ||
396 | st8 [r2]=r16,8; \ | ||
397 | st8 [r3]=r17,8; \ | ||
398 | add r18=1,r0; \ | ||
399 | ;; \ | ||
400 | mov r16=ibr[r18]; \ | ||
401 | mov r17=dbr[r18]; \ | ||
402 | ;; \ | ||
403 | st8 [r2]=r16,8; \ | ||
404 | st8 [r3]=r17,8; \ | ||
405 | add r18=2,r0; \ | ||
406 | ;; \ | ||
407 | mov r16=ibr[r18]; \ | ||
408 | mov r17=dbr[r18]; \ | ||
409 | ;; \ | ||
410 | st8 [r2]=r16,8; \ | ||
411 | st8 [r3]=r17,8; \ | ||
412 | add r18=2,r0; \ | ||
413 | ;; \ | ||
414 | mov r16=ibr[r18]; \ | ||
415 | mov r17=dbr[r18]; \ | ||
416 | ;; \ | ||
417 | st8 [r2]=r16,8; \ | ||
418 | st8 [r3]=r17,8; \ | ||
419 | add r18=3,r0; \ | ||
420 | ;; \ | ||
421 | mov r16=ibr[r18]; \ | ||
422 | mov r17=dbr[r18]; \ | ||
423 | ;; \ | ||
424 | st8 [r2]=r16,8; \ | ||
425 | st8 [r3]=r17,8; \ | ||
426 | add r18=4,r0; \ | ||
427 | ;; \ | ||
428 | mov r16=ibr[r18]; \ | ||
429 | mov r17=dbr[r18]; \ | ||
430 | ;; \ | ||
431 | st8 [r2]=r16,8; \ | ||
432 | st8 [r3]=r17,8; \ | ||
433 | add r18=5,r0; \ | ||
434 | ;; \ | ||
435 | mov r16=ibr[r18]; \ | ||
436 | mov r17=dbr[r18]; \ | ||
437 | ;; \ | ||
438 | st8 [r2]=r16,8; \ | ||
439 | st8 [r3]=r17,8; \ | ||
440 | add r18=6,r0; \ | ||
441 | ;; \ | ||
442 | mov r16=ibr[r18]; \ | ||
443 | mov r17=dbr[r18]; \ | ||
444 | ;; \ | ||
445 | st8 [r2]=r16,8; \ | ||
446 | st8 [r3]=r17,8; \ | ||
447 | add r18=7,r0; \ | ||
448 | ;; \ | ||
449 | mov r16=ibr[r18]; \ | ||
450 | mov r17=dbr[r18]; \ | ||
451 | ;; \ | ||
452 | st8 [r2]=r16,8; \ | ||
453 | st8 [r3]=r17,8; \ | ||
454 | ;; | ||
455 | |||
456 | |||
457 | /* | ||
458 | * r33: point to context_t structure | ||
459 | * ar.lc are corrupted. | ||
460 | */ | ||
461 | #define RESTORE_DEBUG_REGS \ | ||
462 | add r2=CTX(IBR0),r33; \ | ||
463 | add r3=CTX(DBR0),r33; \ | ||
464 | mov r16=7; \ | ||
465 | mov r17=r0; \ | ||
466 | ;; \ | ||
467 | mov ar.lc = r16; \ | ||
468 | ;; \ | ||
469 | 1: \ | ||
470 | ld8 r18=[r2],8; \ | ||
471 | ld8 r19=[r3],8; \ | ||
472 | ;; \ | ||
473 | mov ibr[r17]=r18; \ | ||
474 | mov dbr[r17]=r19; \ | ||
475 | ;; \ | ||
476 | srlz.i; \ | ||
477 | ;; \ | ||
478 | add r17=1,r17; \ | ||
479 | br.cloop.sptk 1b; \ | ||
480 | ;; | ||
481 | |||
482 | |||
483 | /* | ||
484 | * r32: context_t base address | ||
485 | */ | ||
486 | #define SAVE_FPU_LOW \ | ||
487 | add r2=CTX(F2),r32; \ | ||
488 | add r3=CTX(F3),r32; \ | ||
489 | ;; \ | ||
490 | stf.spill.nta [r2]=f2,32; \ | ||
491 | stf.spill.nta [r3]=f3,32; \ | ||
492 | ;; \ | ||
493 | stf.spill.nta [r2]=f4,32; \ | ||
494 | stf.spill.nta [r3]=f5,32; \ | ||
495 | ;; \ | ||
496 | stf.spill.nta [r2]=f6,32; \ | ||
497 | stf.spill.nta [r3]=f7,32; \ | ||
498 | ;; \ | ||
499 | stf.spill.nta [r2]=f8,32; \ | ||
500 | stf.spill.nta [r3]=f9,32; \ | ||
501 | ;; \ | ||
502 | stf.spill.nta [r2]=f10,32; \ | ||
503 | stf.spill.nta [r3]=f11,32; \ | ||
504 | ;; \ | ||
505 | stf.spill.nta [r2]=f12,32; \ | ||
506 | stf.spill.nta [r3]=f13,32; \ | ||
507 | ;; \ | ||
508 | stf.spill.nta [r2]=f14,32; \ | ||
509 | stf.spill.nta [r3]=f15,32; \ | ||
510 | ;; \ | ||
511 | stf.spill.nta [r2]=f16,32; \ | ||
512 | stf.spill.nta [r3]=f17,32; \ | ||
513 | ;; \ | ||
514 | stf.spill.nta [r2]=f18,32; \ | ||
515 | stf.spill.nta [r3]=f19,32; \ | ||
516 | ;; \ | ||
517 | stf.spill.nta [r2]=f20,32; \ | ||
518 | stf.spill.nta [r3]=f21,32; \ | ||
519 | ;; \ | ||
520 | stf.spill.nta [r2]=f22,32; \ | ||
521 | stf.spill.nta [r3]=f23,32; \ | ||
522 | ;; \ | ||
523 | stf.spill.nta [r2]=f24,32; \ | ||
524 | stf.spill.nta [r3]=f25,32; \ | ||
525 | ;; \ | ||
526 | stf.spill.nta [r2]=f26,32; \ | ||
527 | stf.spill.nta [r3]=f27,32; \ | ||
528 | ;; \ | ||
529 | stf.spill.nta [r2]=f28,32; \ | ||
530 | stf.spill.nta [r3]=f29,32; \ | ||
531 | ;; \ | ||
532 | stf.spill.nta [r2]=f30; \ | ||
533 | stf.spill.nta [r3]=f31; \ | ||
534 | ;; | ||
535 | |||
536 | /* | ||
537 | * r32: context_t base address | ||
538 | */ | ||
539 | #define SAVE_FPU_HIGH \ | ||
540 | add r2=CTX(F32),r32; \ | ||
541 | add r3=CTX(F33),r32; \ | ||
542 | ;; \ | ||
543 | stf.spill.nta [r2]=f32,32; \ | ||
544 | stf.spill.nta [r3]=f33,32; \ | ||
545 | ;; \ | ||
546 | stf.spill.nta [r2]=f34,32; \ | ||
547 | stf.spill.nta [r3]=f35,32; \ | ||
548 | ;; \ | ||
549 | stf.spill.nta [r2]=f36,32; \ | ||
550 | stf.spill.nta [r3]=f37,32; \ | ||
551 | ;; \ | ||
552 | stf.spill.nta [r2]=f38,32; \ | ||
553 | stf.spill.nta [r3]=f39,32; \ | ||
554 | ;; \ | ||
555 | stf.spill.nta [r2]=f40,32; \ | ||
556 | stf.spill.nta [r3]=f41,32; \ | ||
557 | ;; \ | ||
558 | stf.spill.nta [r2]=f42,32; \ | ||
559 | stf.spill.nta [r3]=f43,32; \ | ||
560 | ;; \ | ||
561 | stf.spill.nta [r2]=f44,32; \ | ||
562 | stf.spill.nta [r3]=f45,32; \ | ||
563 | ;; \ | ||
564 | stf.spill.nta [r2]=f46,32; \ | ||
565 | stf.spill.nta [r3]=f47,32; \ | ||
566 | ;; \ | ||
567 | stf.spill.nta [r2]=f48,32; \ | ||
568 | stf.spill.nta [r3]=f49,32; \ | ||
569 | ;; \ | ||
570 | stf.spill.nta [r2]=f50,32; \ | ||
571 | stf.spill.nta [r3]=f51,32; \ | ||
572 | ;; \ | ||
573 | stf.spill.nta [r2]=f52,32; \ | ||
574 | stf.spill.nta [r3]=f53,32; \ | ||
575 | ;; \ | ||
576 | stf.spill.nta [r2]=f54,32; \ | ||
577 | stf.spill.nta [r3]=f55,32; \ | ||
578 | ;; \ | ||
579 | stf.spill.nta [r2]=f56,32; \ | ||
580 | stf.spill.nta [r3]=f57,32; \ | ||
581 | ;; \ | ||
582 | stf.spill.nta [r2]=f58,32; \ | ||
583 | stf.spill.nta [r3]=f59,32; \ | ||
584 | ;; \ | ||
585 | stf.spill.nta [r2]=f60,32; \ | ||
586 | stf.spill.nta [r3]=f61,32; \ | ||
587 | ;; \ | ||
588 | stf.spill.nta [r2]=f62,32; \ | ||
589 | stf.spill.nta [r3]=f63,32; \ | ||
590 | ;; \ | ||
591 | stf.spill.nta [r2]=f64,32; \ | ||
592 | stf.spill.nta [r3]=f65,32; \ | ||
593 | ;; \ | ||
594 | stf.spill.nta [r2]=f66,32; \ | ||
595 | stf.spill.nta [r3]=f67,32; \ | ||
596 | ;; \ | ||
597 | stf.spill.nta [r2]=f68,32; \ | ||
598 | stf.spill.nta [r3]=f69,32; \ | ||
599 | ;; \ | ||
600 | stf.spill.nta [r2]=f70,32; \ | ||
601 | stf.spill.nta [r3]=f71,32; \ | ||
602 | ;; \ | ||
603 | stf.spill.nta [r2]=f72,32; \ | ||
604 | stf.spill.nta [r3]=f73,32; \ | ||
605 | ;; \ | ||
606 | stf.spill.nta [r2]=f74,32; \ | ||
607 | stf.spill.nta [r3]=f75,32; \ | ||
608 | ;; \ | ||
609 | stf.spill.nta [r2]=f76,32; \ | ||
610 | stf.spill.nta [r3]=f77,32; \ | ||
611 | ;; \ | ||
612 | stf.spill.nta [r2]=f78,32; \ | ||
613 | stf.spill.nta [r3]=f79,32; \ | ||
614 | ;; \ | ||
615 | stf.spill.nta [r2]=f80,32; \ | ||
616 | stf.spill.nta [r3]=f81,32; \ | ||
617 | ;; \ | ||
618 | stf.spill.nta [r2]=f82,32; \ | ||
619 | stf.spill.nta [r3]=f83,32; \ | ||
620 | ;; \ | ||
621 | stf.spill.nta [r2]=f84,32; \ | ||
622 | stf.spill.nta [r3]=f85,32; \ | ||
623 | ;; \ | ||
624 | stf.spill.nta [r2]=f86,32; \ | ||
625 | stf.spill.nta [r3]=f87,32; \ | ||
626 | ;; \ | ||
627 | stf.spill.nta [r2]=f88,32; \ | ||
628 | stf.spill.nta [r3]=f89,32; \ | ||
629 | ;; \ | ||
630 | stf.spill.nta [r2]=f90,32; \ | ||
631 | stf.spill.nta [r3]=f91,32; \ | ||
632 | ;; \ | ||
633 | stf.spill.nta [r2]=f92,32; \ | ||
634 | stf.spill.nta [r3]=f93,32; \ | ||
635 | ;; \ | ||
636 | stf.spill.nta [r2]=f94,32; \ | ||
637 | stf.spill.nta [r3]=f95,32; \ | ||
638 | ;; \ | ||
639 | stf.spill.nta [r2]=f96,32; \ | ||
640 | stf.spill.nta [r3]=f97,32; \ | ||
641 | ;; \ | ||
642 | stf.spill.nta [r2]=f98,32; \ | ||
643 | stf.spill.nta [r3]=f99,32; \ | ||
644 | ;; \ | ||
645 | stf.spill.nta [r2]=f100,32; \ | ||
646 | stf.spill.nta [r3]=f101,32; \ | ||
647 | ;; \ | ||
648 | stf.spill.nta [r2]=f102,32; \ | ||
649 | stf.spill.nta [r3]=f103,32; \ | ||
650 | ;; \ | ||
651 | stf.spill.nta [r2]=f104,32; \ | ||
652 | stf.spill.nta [r3]=f105,32; \ | ||
653 | ;; \ | ||
654 | stf.spill.nta [r2]=f106,32; \ | ||
655 | stf.spill.nta [r3]=f107,32; \ | ||
656 | ;; \ | ||
657 | stf.spill.nta [r2]=f108,32; \ | ||
658 | stf.spill.nta [r3]=f109,32; \ | ||
659 | ;; \ | ||
660 | stf.spill.nta [r2]=f110,32; \ | ||
661 | stf.spill.nta [r3]=f111,32; \ | ||
662 | ;; \ | ||
663 | stf.spill.nta [r2]=f112,32; \ | ||
664 | stf.spill.nta [r3]=f113,32; \ | ||
665 | ;; \ | ||
666 | stf.spill.nta [r2]=f114,32; \ | ||
667 | stf.spill.nta [r3]=f115,32; \ | ||
668 | ;; \ | ||
669 | stf.spill.nta [r2]=f116,32; \ | ||
670 | stf.spill.nta [r3]=f117,32; \ | ||
671 | ;; \ | ||
672 | stf.spill.nta [r2]=f118,32; \ | ||
673 | stf.spill.nta [r3]=f119,32; \ | ||
674 | ;; \ | ||
675 | stf.spill.nta [r2]=f120,32; \ | ||
676 | stf.spill.nta [r3]=f121,32; \ | ||
677 | ;; \ | ||
678 | stf.spill.nta [r2]=f122,32; \ | ||
679 | stf.spill.nta [r3]=f123,32; \ | ||
680 | ;; \ | ||
681 | stf.spill.nta [r2]=f124,32; \ | ||
682 | stf.spill.nta [r3]=f125,32; \ | ||
683 | ;; \ | ||
684 | stf.spill.nta [r2]=f126; \ | ||
685 | stf.spill.nta [r3]=f127; \ | ||
686 | ;; | ||
687 | |||
688 | /* | ||
689 | * r33: point to context_t structure | ||
690 | */ | ||
691 | #define RESTORE_FPU_LOW \ | ||
692 | add r2 = CTX(F2), r33; \ | ||
693 | add r3 = CTX(F3), r33; \ | ||
694 | ;; \ | ||
695 | ldf.fill.nta f2 = [r2], 32; \ | ||
696 | ldf.fill.nta f3 = [r3], 32; \ | ||
697 | ;; \ | ||
698 | ldf.fill.nta f4 = [r2], 32; \ | ||
699 | ldf.fill.nta f5 = [r3], 32; \ | ||
700 | ;; \ | ||
701 | ldf.fill.nta f6 = [r2], 32; \ | ||
702 | ldf.fill.nta f7 = [r3], 32; \ | ||
703 | ;; \ | ||
704 | ldf.fill.nta f8 = [r2], 32; \ | ||
705 | ldf.fill.nta f9 = [r3], 32; \ | ||
706 | ;; \ | ||
707 | ldf.fill.nta f10 = [r2], 32; \ | ||
708 | ldf.fill.nta f11 = [r3], 32; \ | ||
709 | ;; \ | ||
710 | ldf.fill.nta f12 = [r2], 32; \ | ||
711 | ldf.fill.nta f13 = [r3], 32; \ | ||
712 | ;; \ | ||
713 | ldf.fill.nta f14 = [r2], 32; \ | ||
714 | ldf.fill.nta f15 = [r3], 32; \ | ||
715 | ;; \ | ||
716 | ldf.fill.nta f16 = [r2], 32; \ | ||
717 | ldf.fill.nta f17 = [r3], 32; \ | ||
718 | ;; \ | ||
719 | ldf.fill.nta f18 = [r2], 32; \ | ||
720 | ldf.fill.nta f19 = [r3], 32; \ | ||
721 | ;; \ | ||
722 | ldf.fill.nta f20 = [r2], 32; \ | ||
723 | ldf.fill.nta f21 = [r3], 32; \ | ||
724 | ;; \ | ||
725 | ldf.fill.nta f22 = [r2], 32; \ | ||
726 | ldf.fill.nta f23 = [r3], 32; \ | ||
727 | ;; \ | ||
728 | ldf.fill.nta f24 = [r2], 32; \ | ||
729 | ldf.fill.nta f25 = [r3], 32; \ | ||
730 | ;; \ | ||
731 | ldf.fill.nta f26 = [r2], 32; \ | ||
732 | ldf.fill.nta f27 = [r3], 32; \ | ||
733 | ;; \ | ||
734 | ldf.fill.nta f28 = [r2], 32; \ | ||
735 | ldf.fill.nta f29 = [r3], 32; \ | ||
736 | ;; \ | ||
737 | ldf.fill.nta f30 = [r2], 32; \ | ||
738 | ldf.fill.nta f31 = [r3], 32; \ | ||
739 | ;; | ||
740 | |||
741 | |||
742 | |||
743 | /* | ||
744 | * r33: point to context_t structure | ||
745 | */ | ||
746 | #define RESTORE_FPU_HIGH \ | ||
747 | add r2 = CTX(F32), r33; \ | ||
748 | add r3 = CTX(F33), r33; \ | ||
749 | ;; \ | ||
750 | ldf.fill.nta f32 = [r2], 32; \ | ||
751 | ldf.fill.nta f33 = [r3], 32; \ | ||
752 | ;; \ | ||
753 | ldf.fill.nta f34 = [r2], 32; \ | ||
754 | ldf.fill.nta f35 = [r3], 32; \ | ||
755 | ;; \ | ||
756 | ldf.fill.nta f36 = [r2], 32; \ | ||
757 | ldf.fill.nta f37 = [r3], 32; \ | ||
758 | ;; \ | ||
759 | ldf.fill.nta f38 = [r2], 32; \ | ||
760 | ldf.fill.nta f39 = [r3], 32; \ | ||
761 | ;; \ | ||
762 | ldf.fill.nta f40 = [r2], 32; \ | ||
763 | ldf.fill.nta f41 = [r3], 32; \ | ||
764 | ;; \ | ||
765 | ldf.fill.nta f42 = [r2], 32; \ | ||
766 | ldf.fill.nta f43 = [r3], 32; \ | ||
767 | ;; \ | ||
768 | ldf.fill.nta f44 = [r2], 32; \ | ||
769 | ldf.fill.nta f45 = [r3], 32; \ | ||
770 | ;; \ | ||
771 | ldf.fill.nta f46 = [r2], 32; \ | ||
772 | ldf.fill.nta f47 = [r3], 32; \ | ||
773 | ;; \ | ||
774 | ldf.fill.nta f48 = [r2], 32; \ | ||
775 | ldf.fill.nta f49 = [r3], 32; \ | ||
776 | ;; \ | ||
777 | ldf.fill.nta f50 = [r2], 32; \ | ||
778 | ldf.fill.nta f51 = [r3], 32; \ | ||
779 | ;; \ | ||
780 | ldf.fill.nta f52 = [r2], 32; \ | ||
781 | ldf.fill.nta f53 = [r3], 32; \ | ||
782 | ;; \ | ||
783 | ldf.fill.nta f54 = [r2], 32; \ | ||
784 | ldf.fill.nta f55 = [r3], 32; \ | ||
785 | ;; \ | ||
786 | ldf.fill.nta f56 = [r2], 32; \ | ||
787 | ldf.fill.nta f57 = [r3], 32; \ | ||
788 | ;; \ | ||
789 | ldf.fill.nta f58 = [r2], 32; \ | ||
790 | ldf.fill.nta f59 = [r3], 32; \ | ||
791 | ;; \ | ||
792 | ldf.fill.nta f60 = [r2], 32; \ | ||
793 | ldf.fill.nta f61 = [r3], 32; \ | ||
794 | ;; \ | ||
795 | ldf.fill.nta f62 = [r2], 32; \ | ||
796 | ldf.fill.nta f63 = [r3], 32; \ | ||
797 | ;; \ | ||
798 | ldf.fill.nta f64 = [r2], 32; \ | ||
799 | ldf.fill.nta f65 = [r3], 32; \ | ||
800 | ;; \ | ||
801 | ldf.fill.nta f66 = [r2], 32; \ | ||
802 | ldf.fill.nta f67 = [r3], 32; \ | ||
803 | ;; \ | ||
804 | ldf.fill.nta f68 = [r2], 32; \ | ||
805 | ldf.fill.nta f69 = [r3], 32; \ | ||
806 | ;; \ | ||
807 | ldf.fill.nta f70 = [r2], 32; \ | ||
808 | ldf.fill.nta f71 = [r3], 32; \ | ||
809 | ;; \ | ||
810 | ldf.fill.nta f72 = [r2], 32; \ | ||
811 | ldf.fill.nta f73 = [r3], 32; \ | ||
812 | ;; \ | ||
813 | ldf.fill.nta f74 = [r2], 32; \ | ||
814 | ldf.fill.nta f75 = [r3], 32; \ | ||
815 | ;; \ | ||
816 | ldf.fill.nta f76 = [r2], 32; \ | ||
817 | ldf.fill.nta f77 = [r3], 32; \ | ||
818 | ;; \ | ||
819 | ldf.fill.nta f78 = [r2], 32; \ | ||
820 | ldf.fill.nta f79 = [r3], 32; \ | ||
821 | ;; \ | ||
822 | ldf.fill.nta f80 = [r2], 32; \ | ||
823 | ldf.fill.nta f81 = [r3], 32; \ | ||
824 | ;; \ | ||
825 | ldf.fill.nta f82 = [r2], 32; \ | ||
826 | ldf.fill.nta f83 = [r3], 32; \ | ||
827 | ;; \ | ||
828 | ldf.fill.nta f84 = [r2], 32; \ | ||
829 | ldf.fill.nta f85 = [r3], 32; \ | ||
830 | ;; \ | ||
831 | ldf.fill.nta f86 = [r2], 32; \ | ||
832 | ldf.fill.nta f87 = [r3], 32; \ | ||
833 | ;; \ | ||
834 | ldf.fill.nta f88 = [r2], 32; \ | ||
835 | ldf.fill.nta f89 = [r3], 32; \ | ||
836 | ;; \ | ||
837 | ldf.fill.nta f90 = [r2], 32; \ | ||
838 | ldf.fill.nta f91 = [r3], 32; \ | ||
839 | ;; \ | ||
840 | ldf.fill.nta f92 = [r2], 32; \ | ||
841 | ldf.fill.nta f93 = [r3], 32; \ | ||
842 | ;; \ | ||
843 | ldf.fill.nta f94 = [r2], 32; \ | ||
844 | ldf.fill.nta f95 = [r3], 32; \ | ||
845 | ;; \ | ||
846 | ldf.fill.nta f96 = [r2], 32; \ | ||
847 | ldf.fill.nta f97 = [r3], 32; \ | ||
848 | ;; \ | ||
849 | ldf.fill.nta f98 = [r2], 32; \ | ||
850 | ldf.fill.nta f99 = [r3], 32; \ | ||
851 | ;; \ | ||
852 | ldf.fill.nta f100 = [r2], 32; \ | ||
853 | ldf.fill.nta f101 = [r3], 32; \ | ||
854 | ;; \ | ||
855 | ldf.fill.nta f102 = [r2], 32; \ | ||
856 | ldf.fill.nta f103 = [r3], 32; \ | ||
857 | ;; \ | ||
858 | ldf.fill.nta f104 = [r2], 32; \ | ||
859 | ldf.fill.nta f105 = [r3], 32; \ | ||
860 | ;; \ | ||
861 | ldf.fill.nta f106 = [r2], 32; \ | ||
862 | ldf.fill.nta f107 = [r3], 32; \ | ||
863 | ;; \ | ||
864 | ldf.fill.nta f108 = [r2], 32; \ | ||
865 | ldf.fill.nta f109 = [r3], 32; \ | ||
866 | ;; \ | ||
867 | ldf.fill.nta f110 = [r2], 32; \ | ||
868 | ldf.fill.nta f111 = [r3], 32; \ | ||
869 | ;; \ | ||
870 | ldf.fill.nta f112 = [r2], 32; \ | ||
871 | ldf.fill.nta f113 = [r3], 32; \ | ||
872 | ;; \ | ||
873 | ldf.fill.nta f114 = [r2], 32; \ | ||
874 | ldf.fill.nta f115 = [r3], 32; \ | ||
875 | ;; \ | ||
876 | ldf.fill.nta f116 = [r2], 32; \ | ||
877 | ldf.fill.nta f117 = [r3], 32; \ | ||
878 | ;; \ | ||
879 | ldf.fill.nta f118 = [r2], 32; \ | ||
880 | ldf.fill.nta f119 = [r3], 32; \ | ||
881 | ;; \ | ||
882 | ldf.fill.nta f120 = [r2], 32; \ | ||
883 | ldf.fill.nta f121 = [r3], 32; \ | ||
884 | ;; \ | ||
885 | ldf.fill.nta f122 = [r2], 32; \ | ||
886 | ldf.fill.nta f123 = [r3], 32; \ | ||
887 | ;; \ | ||
888 | ldf.fill.nta f124 = [r2], 32; \ | ||
889 | ldf.fill.nta f125 = [r3], 32; \ | ||
890 | ;; \ | ||
891 | ldf.fill.nta f126 = [r2], 32; \ | ||
892 | ldf.fill.nta f127 = [r3], 32; \ | ||
893 | ;; | ||
894 | |||
895 | /* | ||
896 | * r32: context_t base address | ||
897 | */ | ||
898 | #define SAVE_PTK_REGS \ | ||
899 | add r2=CTX(PKR0), r32; \ | ||
900 | mov r16=7; \ | ||
901 | ;; \ | ||
902 | mov ar.lc=r16; \ | ||
903 | mov r17=r0; \ | ||
904 | ;; \ | ||
905 | 1: \ | ||
906 | mov r18=pkr[r17]; \ | ||
907 | ;; \ | ||
908 | srlz.i; \ | ||
909 | ;; \ | ||
910 | st8 [r2]=r18, 8; \ | ||
911 | ;; \ | ||
912 | add r17 =1,r17; \ | ||
913 | ;; \ | ||
914 | br.cloop.sptk 1b; \ | ||
915 | ;; | ||
916 | |||
917 | /* | ||
918 | * r33: point to context_t structure | ||
919 | * ar.lc are corrupted. | ||
920 | */ | ||
921 | #define RESTORE_PTK_REGS \ | ||
922 | add r2=CTX(PKR0), r33; \ | ||
923 | mov r16=7; \ | ||
924 | ;; \ | ||
925 | mov ar.lc=r16; \ | ||
926 | mov r17=r0; \ | ||
927 | ;; \ | ||
928 | 1: \ | ||
929 | ld8 r18=[r2], 8; \ | ||
930 | ;; \ | ||
931 | mov pkr[r17]=r18; \ | ||
932 | ;; \ | ||
933 | srlz.i; \ | ||
934 | ;; \ | ||
935 | add r17 =1,r17; \ | ||
936 | ;; \ | ||
937 | br.cloop.sptk 1b; \ | ||
938 | ;; | ||
939 | |||
940 | |||
941 | /* | ||
942 | * void vmm_trampoline( context_t * from, | ||
943 | * context_t * to) | ||
944 | * | ||
945 | * from: r32 | ||
946 | * to: r33 | ||
947 | * note: interrupt disabled before call this function. | ||
948 | */ | ||
949 | GLOBAL_ENTRY(vmm_trampoline) | ||
950 | mov r16 = psr | ||
951 | adds r2 = CTX(PSR), r32 | ||
952 | ;; | ||
953 | st8 [r2] = r16, 8 // psr | ||
954 | mov r17 = pr | ||
955 | ;; | ||
956 | st8 [r2] = r17, 8 // pr | ||
957 | mov r18 = ar.unat | ||
958 | ;; | ||
959 | st8 [r2] = r18 | ||
960 | mov r17 = ar.rsc | ||
961 | ;; | ||
962 | adds r2 = CTX(RSC),r32 | ||
963 | ;; | ||
964 | st8 [r2]= r17 | ||
965 | mov ar.rsc =0 | ||
966 | flushrs | ||
967 | ;; | ||
968 | SAVE_GENERAL_REGS | ||
969 | ;; | ||
970 | SAVE_KERNEL_REGS | ||
971 | ;; | ||
972 | SAVE_APP_REGS | ||
973 | ;; | ||
974 | SAVE_BRANCH_REGS | ||
975 | ;; | ||
976 | SAVE_CTL_REGS | ||
977 | ;; | ||
978 | SAVE_REGION_REGS | ||
979 | ;; | ||
980 | //SAVE_DEBUG_REGS | ||
981 | ;; | ||
982 | rsm psr.dfl | ||
983 | ;; | ||
984 | srlz.d | ||
985 | ;; | ||
986 | SAVE_FPU_LOW | ||
987 | ;; | ||
988 | rsm psr.dfh | ||
989 | ;; | ||
990 | srlz.d | ||
991 | ;; | ||
992 | SAVE_FPU_HIGH | ||
993 | ;; | ||
994 | SAVE_PTK_REGS | ||
995 | ;; | ||
996 | RESTORE_PTK_REGS | ||
997 | ;; | ||
998 | RESTORE_FPU_HIGH | ||
999 | ;; | ||
1000 | RESTORE_FPU_LOW | ||
1001 | ;; | ||
1002 | //RESTORE_DEBUG_REGS | ||
1003 | ;; | ||
1004 | RESTORE_REGION_REGS | ||
1005 | ;; | ||
1006 | RESTORE_CTL_REGS | ||
1007 | ;; | ||
1008 | RESTORE_BRANCH_REGS | ||
1009 | ;; | ||
1010 | RESTORE_APP_REGS | ||
1011 | ;; | ||
1012 | RESTORE_KERNEL_REGS | ||
1013 | ;; | ||
1014 | RESTORE_GENERAL_REGS | ||
1015 | ;; | ||
1016 | adds r2=CTX(PSR), r33 | ||
1017 | ;; | ||
1018 | ld8 r16=[r2], 8 // psr | ||
1019 | ;; | ||
1020 | mov psr.l=r16 | ||
1021 | ;; | ||
1022 | srlz.d | ||
1023 | ;; | ||
1024 | ld8 r16=[r2], 8 // pr | ||
1025 | ;; | ||
1026 | mov pr =r16,-1 | ||
1027 | ld8 r16=[r2] // unat | ||
1028 | ;; | ||
1029 | mov ar.unat=r16 | ||
1030 | ;; | ||
1031 | adds r2=CTX(RSC),r33 | ||
1032 | ;; | ||
1033 | ld8 r16 =[r2] | ||
1034 | ;; | ||
1035 | mov ar.rsc = r16 | ||
1036 | ;; | ||
1037 | br.ret.sptk.few b0 | ||
1038 | END(vmm_trampoline) | ||
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c new file mode 100644 index 000000000000..e44027ce5667 --- /dev/null +++ b/arch/ia64/kvm/vcpu.c | |||
@@ -0,0 +1,2163 @@ | |||
1 | /* | ||
2 | * kvm_vcpu.c: handling all virtual cpu related thing. | ||
3 | * Copyright (c) 2005, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
16 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
17 | * | ||
18 | * Shaofan Li (Susue Li) <susie.li@intel.com> | ||
19 | * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) | ||
20 | * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) | ||
21 | * Xiantao Zhang <xiantao.zhang@intel.com> | ||
22 | */ | ||
23 | |||
24 | #include <linux/kvm_host.h> | ||
25 | #include <linux/types.h> | ||
26 | |||
27 | #include <asm/processor.h> | ||
28 | #include <asm/ia64regs.h> | ||
29 | #include <asm/gcc_intrin.h> | ||
30 | #include <asm/kregs.h> | ||
31 | #include <asm/pgtable.h> | ||
32 | #include <asm/tlb.h> | ||
33 | |||
34 | #include "asm-offsets.h" | ||
35 | #include "vcpu.h" | ||
36 | |||
37 | /* | ||
38 | * Special notes: | ||
39 | * - Index by it/dt/rt sequence | ||
40 | * - Only existing mode transitions are allowed in this table | ||
41 | * - RSE is placed at lazy mode when emulating guest partial mode | ||
42 | * - If gva happens to be rr0 and rr4, only allowed case is identity | ||
43 | * mapping (gva=gpa), or panic! (How?) | ||
44 | */ | ||
45 | int mm_switch_table[8][8] = { | ||
46 | /* 2004/09/12(Kevin): Allow switch to self */ | ||
47 | /* | ||
48 | * (it,dt,rt): (0,0,0) -> (1,1,1) | ||
49 | * This kind of transition usually occurs in the very early | ||
50 | * stage of Linux boot up procedure. Another case is in efi | ||
51 | * and pal calls. (see "arch/ia64/kernel/head.S") | ||
52 | * | ||
53 | * (it,dt,rt): (0,0,0) -> (0,1,1) | ||
54 | * This kind of transition is found when OSYa exits efi boot | ||
55 | * service. Due to gva = gpa in this case (Same region), | ||
56 | * data access can be satisfied though itlb entry for physical | ||
57 | * emulation is hit. | ||
58 | */ | ||
59 | {SW_SELF, 0, 0, SW_NOP, 0, 0, 0, SW_P2V}, | ||
60 | {0, 0, 0, 0, 0, 0, 0, 0}, | ||
61 | {0, 0, 0, 0, 0, 0, 0, 0}, | ||
62 | /* | ||
63 | * (it,dt,rt): (0,1,1) -> (1,1,1) | ||
64 | * This kind of transition is found in OSYa. | ||
65 | * | ||
66 | * (it,dt,rt): (0,1,1) -> (0,0,0) | ||
67 | * This kind of transition is found in OSYa | ||
68 | */ | ||
69 | {SW_NOP, 0, 0, SW_SELF, 0, 0, 0, SW_P2V}, | ||
70 | /* (1,0,0)->(1,1,1) */ | ||
71 | {0, 0, 0, 0, 0, 0, 0, SW_P2V}, | ||
72 | /* | ||
73 | * (it,dt,rt): (1,0,1) -> (1,1,1) | ||
74 | * This kind of transition usually occurs when Linux returns | ||
75 | * from the low level TLB miss handlers. | ||
76 | * (see "arch/ia64/kernel/ivt.S") | ||
77 | */ | ||
78 | {0, 0, 0, 0, 0, SW_SELF, 0, SW_P2V}, | ||
79 | {0, 0, 0, 0, 0, 0, 0, 0}, | ||
80 | /* | ||
81 | * (it,dt,rt): (1,1,1) -> (1,0,1) | ||
82 | * This kind of transition usually occurs in Linux low level | ||
83 | * TLB miss handler. (see "arch/ia64/kernel/ivt.S") | ||
84 | * | ||
85 | * (it,dt,rt): (1,1,1) -> (0,0,0) | ||
86 | * This kind of transition usually occurs in pal and efi calls, | ||
87 | * which requires running in physical mode. | ||
88 | * (see "arch/ia64/kernel/head.S") | ||
89 | * (1,1,1)->(1,0,0) | ||
90 | */ | ||
91 | |||
92 | {SW_V2P, 0, 0, 0, SW_V2P, SW_V2P, 0, SW_SELF}, | ||
93 | }; | ||
94 | |||
95 | void physical_mode_init(struct kvm_vcpu *vcpu) | ||
96 | { | ||
97 | vcpu->arch.mode_flags = GUEST_IN_PHY; | ||
98 | } | ||
99 | |||
100 | void switch_to_physical_rid(struct kvm_vcpu *vcpu) | ||
101 | { | ||
102 | unsigned long psr; | ||
103 | |||
104 | /* Save original virtual mode rr[0] and rr[4] */ | ||
105 | psr = ia64_clear_ic(); | ||
106 | ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0); | ||
107 | ia64_srlz_d(); | ||
108 | ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4); | ||
109 | ia64_srlz_d(); | ||
110 | |||
111 | ia64_set_psr(psr); | ||
112 | return; | ||
113 | } | ||
114 | |||
115 | |||
116 | void switch_to_virtual_rid(struct kvm_vcpu *vcpu) | ||
117 | { | ||
118 | unsigned long psr; | ||
119 | |||
120 | psr = ia64_clear_ic(); | ||
121 | ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0); | ||
122 | ia64_srlz_d(); | ||
123 | ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4); | ||
124 | ia64_srlz_d(); | ||
125 | ia64_set_psr(psr); | ||
126 | return; | ||
127 | } | ||
128 | |||
129 | static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr) | ||
130 | { | ||
131 | return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)]; | ||
132 | } | ||
133 | |||
134 | void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr, | ||
135 | struct ia64_psr new_psr) | ||
136 | { | ||
137 | int act; | ||
138 | act = mm_switch_action(old_psr, new_psr); | ||
139 | switch (act) { | ||
140 | case SW_V2P: | ||
141 | /*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n", | ||
142 | old_psr.val, new_psr.val);*/ | ||
143 | switch_to_physical_rid(vcpu); | ||
144 | /* | ||
145 | * Set rse to enforced lazy, to prevent active rse | ||
146 | *save/restor when guest physical mode. | ||
147 | */ | ||
148 | vcpu->arch.mode_flags |= GUEST_IN_PHY; | ||
149 | break; | ||
150 | case SW_P2V: | ||
151 | switch_to_virtual_rid(vcpu); | ||
152 | /* | ||
153 | * recover old mode which is saved when entering | ||
154 | * guest physical mode | ||
155 | */ | ||
156 | vcpu->arch.mode_flags &= ~GUEST_IN_PHY; | ||
157 | break; | ||
158 | case SW_SELF: | ||
159 | break; | ||
160 | case SW_NOP: | ||
161 | break; | ||
162 | default: | ||
163 | /* Sanity check */ | ||
164 | break; | ||
165 | } | ||
166 | return; | ||
167 | } | ||
168 | |||
169 | |||
170 | |||
171 | /* | ||
172 | * In physical mode, insert tc/tr for region 0 and 4 uses | ||
173 | * RID[0] and RID[4] which is for physical mode emulation. | ||
174 | * However what those inserted tc/tr wants is rid for | ||
175 | * virtual mode. So original virtual rid needs to be restored | ||
176 | * before insert. | ||
177 | * | ||
178 | * Operations which required such switch include: | ||
179 | * - insertions (itc.*, itr.*) | ||
180 | * - purges (ptc.* and ptr.*) | ||
181 | * - tpa | ||
182 | * - tak | ||
183 | * - thash?, ttag? | ||
184 | * All above needs actual virtual rid for destination entry. | ||
185 | */ | ||
186 | |||
187 | void check_mm_mode_switch(struct kvm_vcpu *vcpu, struct ia64_psr old_psr, | ||
188 | struct ia64_psr new_psr) | ||
189 | { | ||
190 | |||
191 | if ((old_psr.dt != new_psr.dt) | ||
192 | || (old_psr.it != new_psr.it) | ||
193 | || (old_psr.rt != new_psr.rt)) | ||
194 | switch_mm_mode(vcpu, old_psr, new_psr); | ||
195 | |||
196 | return; | ||
197 | } | ||
198 | |||
199 | |||
200 | /* | ||
201 | * In physical mode, insert tc/tr for region 0 and 4 uses | ||
202 | * RID[0] and RID[4] which is for physical mode emulation. | ||
203 | * However what those inserted tc/tr wants is rid for | ||
204 | * virtual mode. So original virtual rid needs to be restored | ||
205 | * before insert. | ||
206 | * | ||
207 | * Operations which required such switch include: | ||
208 | * - insertions (itc.*, itr.*) | ||
209 | * - purges (ptc.* and ptr.*) | ||
210 | * - tpa | ||
211 | * - tak | ||
212 | * - thash?, ttag? | ||
213 | * All above needs actual virtual rid for destination entry. | ||
214 | */ | ||
215 | |||
216 | void prepare_if_physical_mode(struct kvm_vcpu *vcpu) | ||
217 | { | ||
218 | if (is_physical_mode(vcpu)) { | ||
219 | vcpu->arch.mode_flags |= GUEST_PHY_EMUL; | ||
220 | switch_to_virtual_rid(vcpu); | ||
221 | } | ||
222 | return; | ||
223 | } | ||
224 | |||
225 | /* Recover always follows prepare */ | ||
226 | void recover_if_physical_mode(struct kvm_vcpu *vcpu) | ||
227 | { | ||
228 | if (is_physical_mode(vcpu)) | ||
229 | switch_to_physical_rid(vcpu); | ||
230 | vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL; | ||
231 | return; | ||
232 | } | ||
233 | |||
234 | #define RPT(x) ((u16) &((struct kvm_pt_regs *)0)->x) | ||
235 | |||
236 | static u16 gr_info[32] = { | ||
237 | 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */ | ||
238 | RPT(r1), RPT(r2), RPT(r3), | ||
239 | RPT(r4), RPT(r5), RPT(r6), RPT(r7), | ||
240 | RPT(r8), RPT(r9), RPT(r10), RPT(r11), | ||
241 | RPT(r12), RPT(r13), RPT(r14), RPT(r15), | ||
242 | RPT(r16), RPT(r17), RPT(r18), RPT(r19), | ||
243 | RPT(r20), RPT(r21), RPT(r22), RPT(r23), | ||
244 | RPT(r24), RPT(r25), RPT(r26), RPT(r27), | ||
245 | RPT(r28), RPT(r29), RPT(r30), RPT(r31) | ||
246 | }; | ||
247 | |||
248 | #define IA64_FIRST_STACKED_GR 32 | ||
249 | #define IA64_FIRST_ROTATING_FR 32 | ||
250 | |||
251 | static inline unsigned long | ||
252 | rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg) | ||
253 | { | ||
254 | reg += rrb; | ||
255 | if (reg >= sor) | ||
256 | reg -= sor; | ||
257 | return reg; | ||
258 | } | ||
259 | |||
260 | /* | ||
261 | * Return the (rotated) index for floating point register | ||
262 | * be in the REGNUM (REGNUM must range from 32-127, | ||
263 | * result is in the range from 0-95. | ||
264 | */ | ||
265 | static inline unsigned long fph_index(struct kvm_pt_regs *regs, | ||
266 | long regnum) | ||
267 | { | ||
268 | unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f; | ||
269 | return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR)); | ||
270 | } | ||
271 | |||
272 | |||
273 | /* | ||
274 | * The inverse of the above: given bspstore and the number of | ||
275 | * registers, calculate ar.bsp. | ||
276 | */ | ||
277 | static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr, | ||
278 | long num_regs) | ||
279 | { | ||
280 | long delta = ia64_rse_slot_num(addr) + num_regs; | ||
281 | int i = 0; | ||
282 | |||
283 | if (num_regs < 0) | ||
284 | delta -= 0x3e; | ||
285 | if (delta < 0) { | ||
286 | while (delta <= -0x3f) { | ||
287 | i--; | ||
288 | delta += 0x3f; | ||
289 | } | ||
290 | } else { | ||
291 | while (delta >= 0x3f) { | ||
292 | i++; | ||
293 | delta -= 0x3f; | ||
294 | } | ||
295 | } | ||
296 | |||
297 | return addr + num_regs + i; | ||
298 | } | ||
299 | |||
300 | static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1, | ||
301 | unsigned long *val, int *nat) | ||
302 | { | ||
303 | unsigned long *bsp, *addr, *rnat_addr, *bspstore; | ||
304 | unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET; | ||
305 | unsigned long nat_mask; | ||
306 | unsigned long old_rsc, new_rsc; | ||
307 | long sof = (regs->cr_ifs) & 0x7f; | ||
308 | long sor = (((regs->cr_ifs >> 14) & 0xf) << 3); | ||
309 | long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; | ||
310 | long ridx = r1 - 32; | ||
311 | |||
312 | if (ridx < sor) | ||
313 | ridx = rotate_reg(sor, rrb_gr, ridx); | ||
314 | |||
315 | old_rsc = ia64_getreg(_IA64_REG_AR_RSC); | ||
316 | new_rsc = old_rsc&(~(0x3)); | ||
317 | ia64_setreg(_IA64_REG_AR_RSC, new_rsc); | ||
318 | |||
319 | bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); | ||
320 | bsp = kbs + (regs->loadrs >> 19); | ||
321 | |||
322 | addr = kvm_rse_skip_regs(bsp, -sof + ridx); | ||
323 | nat_mask = 1UL << ia64_rse_slot_num(addr); | ||
324 | rnat_addr = ia64_rse_rnat_addr(addr); | ||
325 | |||
326 | if (addr >= bspstore) { | ||
327 | ia64_flushrs(); | ||
328 | ia64_mf(); | ||
329 | bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); | ||
330 | } | ||
331 | *val = *addr; | ||
332 | if (nat) { | ||
333 | if (bspstore < rnat_addr) | ||
334 | *nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT) | ||
335 | & nat_mask); | ||
336 | else | ||
337 | *nat = (int)!!((*rnat_addr) & nat_mask); | ||
338 | ia64_setreg(_IA64_REG_AR_RSC, old_rsc); | ||
339 | } | ||
340 | } | ||
341 | |||
342 | void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1, | ||
343 | unsigned long val, unsigned long nat) | ||
344 | { | ||
345 | unsigned long *bsp, *bspstore, *addr, *rnat_addr; | ||
346 | unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET; | ||
347 | unsigned long nat_mask; | ||
348 | unsigned long old_rsc, new_rsc, psr; | ||
349 | unsigned long rnat; | ||
350 | long sof = (regs->cr_ifs) & 0x7f; | ||
351 | long sor = (((regs->cr_ifs >> 14) & 0xf) << 3); | ||
352 | long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; | ||
353 | long ridx = r1 - 32; | ||
354 | |||
355 | if (ridx < sor) | ||
356 | ridx = rotate_reg(sor, rrb_gr, ridx); | ||
357 | |||
358 | old_rsc = ia64_getreg(_IA64_REG_AR_RSC); | ||
359 | /* put RSC to lazy mode, and set loadrs 0 */ | ||
360 | new_rsc = old_rsc & (~0x3fff0003); | ||
361 | ia64_setreg(_IA64_REG_AR_RSC, new_rsc); | ||
362 | bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */ | ||
363 | |||
364 | addr = kvm_rse_skip_regs(bsp, -sof + ridx); | ||
365 | nat_mask = 1UL << ia64_rse_slot_num(addr); | ||
366 | rnat_addr = ia64_rse_rnat_addr(addr); | ||
367 | |||
368 | local_irq_save(psr); | ||
369 | bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); | ||
370 | if (addr >= bspstore) { | ||
371 | |||
372 | ia64_flushrs(); | ||
373 | ia64_mf(); | ||
374 | *addr = val; | ||
375 | bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); | ||
376 | rnat = ia64_getreg(_IA64_REG_AR_RNAT); | ||
377 | if (bspstore < rnat_addr) | ||
378 | rnat = rnat & (~nat_mask); | ||
379 | else | ||
380 | *rnat_addr = (*rnat_addr)&(~nat_mask); | ||
381 | |||
382 | ia64_mf(); | ||
383 | ia64_loadrs(); | ||
384 | ia64_setreg(_IA64_REG_AR_RNAT, rnat); | ||
385 | } else { | ||
386 | rnat = ia64_getreg(_IA64_REG_AR_RNAT); | ||
387 | *addr = val; | ||
388 | if (bspstore < rnat_addr) | ||
389 | rnat = rnat&(~nat_mask); | ||
390 | else | ||
391 | *rnat_addr = (*rnat_addr) & (~nat_mask); | ||
392 | |||
393 | ia64_setreg(_IA64_REG_AR_BSPSTORE, bspstore); | ||
394 | ia64_setreg(_IA64_REG_AR_RNAT, rnat); | ||
395 | } | ||
396 | local_irq_restore(psr); | ||
397 | ia64_setreg(_IA64_REG_AR_RSC, old_rsc); | ||
398 | } | ||
399 | |||
400 | void getreg(unsigned long regnum, unsigned long *val, | ||
401 | int *nat, struct kvm_pt_regs *regs) | ||
402 | { | ||
403 | unsigned long addr, *unat; | ||
404 | if (regnum >= IA64_FIRST_STACKED_GR) { | ||
405 | get_rse_reg(regs, regnum, val, nat); | ||
406 | return; | ||
407 | } | ||
408 | |||
409 | /* | ||
410 | * Now look at registers in [0-31] range and init correct UNAT | ||
411 | */ | ||
412 | addr = (unsigned long)regs; | ||
413 | unat = ®s->eml_unat;; | ||
414 | |||
415 | addr += gr_info[regnum]; | ||
416 | |||
417 | *val = *(unsigned long *)addr; | ||
418 | /* | ||
419 | * do it only when requested | ||
420 | */ | ||
421 | if (nat) | ||
422 | *nat = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL; | ||
423 | } | ||
424 | |||
425 | void setreg(unsigned long regnum, unsigned long val, | ||
426 | int nat, struct kvm_pt_regs *regs) | ||
427 | { | ||
428 | unsigned long addr; | ||
429 | unsigned long bitmask; | ||
430 | unsigned long *unat; | ||
431 | |||
432 | /* | ||
433 | * First takes care of stacked registers | ||
434 | */ | ||
435 | if (regnum >= IA64_FIRST_STACKED_GR) { | ||
436 | set_rse_reg(regs, regnum, val, nat); | ||
437 | return; | ||
438 | } | ||
439 | |||
440 | /* | ||
441 | * Now look at registers in [0-31] range and init correct UNAT | ||
442 | */ | ||
443 | addr = (unsigned long)regs; | ||
444 | unat = ®s->eml_unat; | ||
445 | /* | ||
446 | * add offset from base of struct | ||
447 | * and do it ! | ||
448 | */ | ||
449 | addr += gr_info[regnum]; | ||
450 | |||
451 | *(unsigned long *)addr = val; | ||
452 | |||
453 | /* | ||
454 | * We need to clear the corresponding UNAT bit to fully emulate the load | ||
455 | * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4 | ||
456 | */ | ||
457 | bitmask = 1UL << ((addr >> 3) & 0x3f); | ||
458 | if (nat) | ||
459 | *unat |= bitmask; | ||
460 | else | ||
461 | *unat &= ~bitmask; | ||
462 | |||
463 | } | ||
464 | |||
465 | u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg) | ||
466 | { | ||
467 | struct kvm_pt_regs *regs = vcpu_regs(vcpu); | ||
468 | u64 val; | ||
469 | |||
470 | if (!reg) | ||
471 | return 0; | ||
472 | getreg(reg, &val, 0, regs); | ||
473 | return val; | ||
474 | } | ||
475 | |||
476 | void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 value, int nat) | ||
477 | { | ||
478 | struct kvm_pt_regs *regs = vcpu_regs(vcpu); | ||
479 | long sof = (regs->cr_ifs) & 0x7f; | ||
480 | |||
481 | if (!reg) | ||
482 | return; | ||
483 | if (reg >= sof + 32) | ||
484 | return; | ||
485 | setreg(reg, value, nat, regs); /* FIXME: handle NATs later*/ | ||
486 | } | ||
487 | |||
488 | void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval, | ||
489 | struct kvm_pt_regs *regs) | ||
490 | { | ||
491 | /* Take floating register rotation into consideration*/ | ||
492 | if (regnum >= IA64_FIRST_ROTATING_FR) | ||
493 | regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum); | ||
494 | #define CASE_FIXED_FP(reg) \ | ||
495 | case (reg) : \ | ||
496 | ia64_stf_spill(fpval, reg); \ | ||
497 | break | ||
498 | |||
499 | switch (regnum) { | ||
500 | CASE_FIXED_FP(0); | ||
501 | CASE_FIXED_FP(1); | ||
502 | CASE_FIXED_FP(2); | ||
503 | CASE_FIXED_FP(3); | ||
504 | CASE_FIXED_FP(4); | ||
505 | CASE_FIXED_FP(5); | ||
506 | |||
507 | CASE_FIXED_FP(6); | ||
508 | CASE_FIXED_FP(7); | ||
509 | CASE_FIXED_FP(8); | ||
510 | CASE_FIXED_FP(9); | ||
511 | CASE_FIXED_FP(10); | ||
512 | CASE_FIXED_FP(11); | ||
513 | |||
514 | CASE_FIXED_FP(12); | ||
515 | CASE_FIXED_FP(13); | ||
516 | CASE_FIXED_FP(14); | ||
517 | CASE_FIXED_FP(15); | ||
518 | CASE_FIXED_FP(16); | ||
519 | CASE_FIXED_FP(17); | ||
520 | CASE_FIXED_FP(18); | ||
521 | CASE_FIXED_FP(19); | ||
522 | CASE_FIXED_FP(20); | ||
523 | CASE_FIXED_FP(21); | ||
524 | CASE_FIXED_FP(22); | ||
525 | CASE_FIXED_FP(23); | ||
526 | CASE_FIXED_FP(24); | ||
527 | CASE_FIXED_FP(25); | ||
528 | CASE_FIXED_FP(26); | ||
529 | CASE_FIXED_FP(27); | ||
530 | CASE_FIXED_FP(28); | ||
531 | CASE_FIXED_FP(29); | ||
532 | CASE_FIXED_FP(30); | ||
533 | CASE_FIXED_FP(31); | ||
534 | CASE_FIXED_FP(32); | ||
535 | CASE_FIXED_FP(33); | ||
536 | CASE_FIXED_FP(34); | ||
537 | CASE_FIXED_FP(35); | ||
538 | CASE_FIXED_FP(36); | ||
539 | CASE_FIXED_FP(37); | ||
540 | CASE_FIXED_FP(38); | ||
541 | CASE_FIXED_FP(39); | ||
542 | CASE_FIXED_FP(40); | ||
543 | CASE_FIXED_FP(41); | ||
544 | CASE_FIXED_FP(42); | ||
545 | CASE_FIXED_FP(43); | ||
546 | CASE_FIXED_FP(44); | ||
547 | CASE_FIXED_FP(45); | ||
548 | CASE_FIXED_FP(46); | ||
549 | CASE_FIXED_FP(47); | ||
550 | CASE_FIXED_FP(48); | ||
551 | CASE_FIXED_FP(49); | ||
552 | CASE_FIXED_FP(50); | ||
553 | CASE_FIXED_FP(51); | ||
554 | CASE_FIXED_FP(52); | ||
555 | CASE_FIXED_FP(53); | ||
556 | CASE_FIXED_FP(54); | ||
557 | CASE_FIXED_FP(55); | ||
558 | CASE_FIXED_FP(56); | ||
559 | CASE_FIXED_FP(57); | ||
560 | CASE_FIXED_FP(58); | ||
561 | CASE_FIXED_FP(59); | ||
562 | CASE_FIXED_FP(60); | ||
563 | CASE_FIXED_FP(61); | ||
564 | CASE_FIXED_FP(62); | ||
565 | CASE_FIXED_FP(63); | ||
566 | CASE_FIXED_FP(64); | ||
567 | CASE_FIXED_FP(65); | ||
568 | CASE_FIXED_FP(66); | ||
569 | CASE_FIXED_FP(67); | ||
570 | CASE_FIXED_FP(68); | ||
571 | CASE_FIXED_FP(69); | ||
572 | CASE_FIXED_FP(70); | ||
573 | CASE_FIXED_FP(71); | ||
574 | CASE_FIXED_FP(72); | ||
575 | CASE_FIXED_FP(73); | ||
576 | CASE_FIXED_FP(74); | ||
577 | CASE_FIXED_FP(75); | ||
578 | CASE_FIXED_FP(76); | ||
579 | CASE_FIXED_FP(77); | ||
580 | CASE_FIXED_FP(78); | ||
581 | CASE_FIXED_FP(79); | ||
582 | CASE_FIXED_FP(80); | ||
583 | CASE_FIXED_FP(81); | ||
584 | CASE_FIXED_FP(82); | ||
585 | CASE_FIXED_FP(83); | ||
586 | CASE_FIXED_FP(84); | ||
587 | CASE_FIXED_FP(85); | ||
588 | CASE_FIXED_FP(86); | ||
589 | CASE_FIXED_FP(87); | ||
590 | CASE_FIXED_FP(88); | ||
591 | CASE_FIXED_FP(89); | ||
592 | CASE_FIXED_FP(90); | ||
593 | CASE_FIXED_FP(91); | ||
594 | CASE_FIXED_FP(92); | ||
595 | CASE_FIXED_FP(93); | ||
596 | CASE_FIXED_FP(94); | ||
597 | CASE_FIXED_FP(95); | ||
598 | CASE_FIXED_FP(96); | ||
599 | CASE_FIXED_FP(97); | ||
600 | CASE_FIXED_FP(98); | ||
601 | CASE_FIXED_FP(99); | ||
602 | CASE_FIXED_FP(100); | ||
603 | CASE_FIXED_FP(101); | ||
604 | CASE_FIXED_FP(102); | ||
605 | CASE_FIXED_FP(103); | ||
606 | CASE_FIXED_FP(104); | ||
607 | CASE_FIXED_FP(105); | ||
608 | CASE_FIXED_FP(106); | ||
609 | CASE_FIXED_FP(107); | ||
610 | CASE_FIXED_FP(108); | ||
611 | CASE_FIXED_FP(109); | ||
612 | CASE_FIXED_FP(110); | ||
613 | CASE_FIXED_FP(111); | ||
614 | CASE_FIXED_FP(112); | ||
615 | CASE_FIXED_FP(113); | ||
616 | CASE_FIXED_FP(114); | ||
617 | CASE_FIXED_FP(115); | ||
618 | CASE_FIXED_FP(116); | ||
619 | CASE_FIXED_FP(117); | ||
620 | CASE_FIXED_FP(118); | ||
621 | CASE_FIXED_FP(119); | ||
622 | CASE_FIXED_FP(120); | ||
623 | CASE_FIXED_FP(121); | ||
624 | CASE_FIXED_FP(122); | ||
625 | CASE_FIXED_FP(123); | ||
626 | CASE_FIXED_FP(124); | ||
627 | CASE_FIXED_FP(125); | ||
628 | CASE_FIXED_FP(126); | ||
629 | CASE_FIXED_FP(127); | ||
630 | } | ||
631 | #undef CASE_FIXED_FP | ||
632 | } | ||
633 | |||
634 | void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval, | ||
635 | struct kvm_pt_regs *regs) | ||
636 | { | ||
637 | /* Take floating register rotation into consideration*/ | ||
638 | if (regnum >= IA64_FIRST_ROTATING_FR) | ||
639 | regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum); | ||
640 | |||
641 | #define CASE_FIXED_FP(reg) \ | ||
642 | case (reg) : \ | ||
643 | ia64_ldf_fill(reg, fpval); \ | ||
644 | break | ||
645 | |||
646 | switch (regnum) { | ||
647 | CASE_FIXED_FP(2); | ||
648 | CASE_FIXED_FP(3); | ||
649 | CASE_FIXED_FP(4); | ||
650 | CASE_FIXED_FP(5); | ||
651 | |||
652 | CASE_FIXED_FP(6); | ||
653 | CASE_FIXED_FP(7); | ||
654 | CASE_FIXED_FP(8); | ||
655 | CASE_FIXED_FP(9); | ||
656 | CASE_FIXED_FP(10); | ||
657 | CASE_FIXED_FP(11); | ||
658 | |||
659 | CASE_FIXED_FP(12); | ||
660 | CASE_FIXED_FP(13); | ||
661 | CASE_FIXED_FP(14); | ||
662 | CASE_FIXED_FP(15); | ||
663 | CASE_FIXED_FP(16); | ||
664 | CASE_FIXED_FP(17); | ||
665 | CASE_FIXED_FP(18); | ||
666 | CASE_FIXED_FP(19); | ||
667 | CASE_FIXED_FP(20); | ||
668 | CASE_FIXED_FP(21); | ||
669 | CASE_FIXED_FP(22); | ||
670 | CASE_FIXED_FP(23); | ||
671 | CASE_FIXED_FP(24); | ||
672 | CASE_FIXED_FP(25); | ||
673 | CASE_FIXED_FP(26); | ||
674 | CASE_FIXED_FP(27); | ||
675 | CASE_FIXED_FP(28); | ||
676 | CASE_FIXED_FP(29); | ||
677 | CASE_FIXED_FP(30); | ||
678 | CASE_FIXED_FP(31); | ||
679 | CASE_FIXED_FP(32); | ||
680 | CASE_FIXED_FP(33); | ||
681 | CASE_FIXED_FP(34); | ||
682 | CASE_FIXED_FP(35); | ||
683 | CASE_FIXED_FP(36); | ||
684 | CASE_FIXED_FP(37); | ||
685 | CASE_FIXED_FP(38); | ||
686 | CASE_FIXED_FP(39); | ||
687 | CASE_FIXED_FP(40); | ||
688 | CASE_FIXED_FP(41); | ||
689 | CASE_FIXED_FP(42); | ||
690 | CASE_FIXED_FP(43); | ||
691 | CASE_FIXED_FP(44); | ||
692 | CASE_FIXED_FP(45); | ||
693 | CASE_FIXED_FP(46); | ||
694 | CASE_FIXED_FP(47); | ||
695 | CASE_FIXED_FP(48); | ||
696 | CASE_FIXED_FP(49); | ||
697 | CASE_FIXED_FP(50); | ||
698 | CASE_FIXED_FP(51); | ||
699 | CASE_FIXED_FP(52); | ||
700 | CASE_FIXED_FP(53); | ||
701 | CASE_FIXED_FP(54); | ||
702 | CASE_FIXED_FP(55); | ||
703 | CASE_FIXED_FP(56); | ||
704 | CASE_FIXED_FP(57); | ||
705 | CASE_FIXED_FP(58); | ||
706 | CASE_FIXED_FP(59); | ||
707 | CASE_FIXED_FP(60); | ||
708 | CASE_FIXED_FP(61); | ||
709 | CASE_FIXED_FP(62); | ||
710 | CASE_FIXED_FP(63); | ||
711 | CASE_FIXED_FP(64); | ||
712 | CASE_FIXED_FP(65); | ||
713 | CASE_FIXED_FP(66); | ||
714 | CASE_FIXED_FP(67); | ||
715 | CASE_FIXED_FP(68); | ||
716 | CASE_FIXED_FP(69); | ||
717 | CASE_FIXED_FP(70); | ||
718 | CASE_FIXED_FP(71); | ||
719 | CASE_FIXED_FP(72); | ||
720 | CASE_FIXED_FP(73); | ||
721 | CASE_FIXED_FP(74); | ||
722 | CASE_FIXED_FP(75); | ||
723 | CASE_FIXED_FP(76); | ||
724 | CASE_FIXED_FP(77); | ||
725 | CASE_FIXED_FP(78); | ||
726 | CASE_FIXED_FP(79); | ||
727 | CASE_FIXED_FP(80); | ||
728 | CASE_FIXED_FP(81); | ||
729 | CASE_FIXED_FP(82); | ||
730 | CASE_FIXED_FP(83); | ||
731 | CASE_FIXED_FP(84); | ||
732 | CASE_FIXED_FP(85); | ||
733 | CASE_FIXED_FP(86); | ||
734 | CASE_FIXED_FP(87); | ||
735 | CASE_FIXED_FP(88); | ||
736 | CASE_FIXED_FP(89); | ||
737 | CASE_FIXED_FP(90); | ||
738 | CASE_FIXED_FP(91); | ||
739 | CASE_FIXED_FP(92); | ||
740 | CASE_FIXED_FP(93); | ||
741 | CASE_FIXED_FP(94); | ||
742 | CASE_FIXED_FP(95); | ||
743 | CASE_FIXED_FP(96); | ||
744 | CASE_FIXED_FP(97); | ||
745 | CASE_FIXED_FP(98); | ||
746 | CASE_FIXED_FP(99); | ||
747 | CASE_FIXED_FP(100); | ||
748 | CASE_FIXED_FP(101); | ||
749 | CASE_FIXED_FP(102); | ||
750 | CASE_FIXED_FP(103); | ||
751 | CASE_FIXED_FP(104); | ||
752 | CASE_FIXED_FP(105); | ||
753 | CASE_FIXED_FP(106); | ||
754 | CASE_FIXED_FP(107); | ||
755 | CASE_FIXED_FP(108); | ||
756 | CASE_FIXED_FP(109); | ||
757 | CASE_FIXED_FP(110); | ||
758 | CASE_FIXED_FP(111); | ||
759 | CASE_FIXED_FP(112); | ||
760 | CASE_FIXED_FP(113); | ||
761 | CASE_FIXED_FP(114); | ||
762 | CASE_FIXED_FP(115); | ||
763 | CASE_FIXED_FP(116); | ||
764 | CASE_FIXED_FP(117); | ||
765 | CASE_FIXED_FP(118); | ||
766 | CASE_FIXED_FP(119); | ||
767 | CASE_FIXED_FP(120); | ||
768 | CASE_FIXED_FP(121); | ||
769 | CASE_FIXED_FP(122); | ||
770 | CASE_FIXED_FP(123); | ||
771 | CASE_FIXED_FP(124); | ||
772 | CASE_FIXED_FP(125); | ||
773 | CASE_FIXED_FP(126); | ||
774 | CASE_FIXED_FP(127); | ||
775 | } | ||
776 | } | ||
777 | |||
778 | void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, | ||
779 | struct ia64_fpreg *val) | ||
780 | { | ||
781 | struct kvm_pt_regs *regs = vcpu_regs(vcpu); | ||
782 | |||
783 | getfpreg(reg, val, regs); /* FIXME: handle NATs later*/ | ||
784 | } | ||
785 | |||
786 | void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, | ||
787 | struct ia64_fpreg *val) | ||
788 | { | ||
789 | struct kvm_pt_regs *regs = vcpu_regs(vcpu); | ||
790 | |||
791 | if (reg > 1) | ||
792 | setfpreg(reg, val, regs); /* FIXME: handle NATs later*/ | ||
793 | } | ||
794 | |||
795 | /************************************************************************ | ||
796 | * lsapic timer | ||
797 | ***********************************************************************/ | ||
798 | u64 vcpu_get_itc(struct kvm_vcpu *vcpu) | ||
799 | { | ||
800 | unsigned long guest_itc; | ||
801 | guest_itc = VMX(vcpu, itc_offset) + ia64_getreg(_IA64_REG_AR_ITC); | ||
802 | |||
803 | if (guest_itc >= VMX(vcpu, last_itc)) { | ||
804 | VMX(vcpu, last_itc) = guest_itc; | ||
805 | return guest_itc; | ||
806 | } else | ||
807 | return VMX(vcpu, last_itc); | ||
808 | } | ||
809 | |||
810 | static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val); | ||
811 | static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val) | ||
812 | { | ||
813 | struct kvm_vcpu *v; | ||
814 | int i; | ||
815 | long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC); | ||
816 | unsigned long vitv = VCPU(vcpu, itv); | ||
817 | |||
818 | if (vcpu->vcpu_id == 0) { | ||
819 | for (i = 0; i < MAX_VCPU_NUM; i++) { | ||
820 | v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); | ||
821 | VMX(v, itc_offset) = itc_offset; | ||
822 | VMX(v, last_itc) = 0; | ||
823 | } | ||
824 | } | ||
825 | VMX(vcpu, last_itc) = 0; | ||
826 | if (VCPU(vcpu, itm) <= val) { | ||
827 | VMX(vcpu, itc_check) = 0; | ||
828 | vcpu_unpend_interrupt(vcpu, vitv); | ||
829 | } else { | ||
830 | VMX(vcpu, itc_check) = 1; | ||
831 | vcpu_set_itm(vcpu, VCPU(vcpu, itm)); | ||
832 | } | ||
833 | |||
834 | } | ||
835 | |||
836 | static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu) | ||
837 | { | ||
838 | return ((u64)VCPU(vcpu, itm)); | ||
839 | } | ||
840 | |||
841 | static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val) | ||
842 | { | ||
843 | unsigned long vitv = VCPU(vcpu, itv); | ||
844 | VCPU(vcpu, itm) = val; | ||
845 | |||
846 | if (val > vcpu_get_itc(vcpu)) { | ||
847 | VMX(vcpu, itc_check) = 1; | ||
848 | vcpu_unpend_interrupt(vcpu, vitv); | ||
849 | VMX(vcpu, timer_pending) = 0; | ||
850 | } else | ||
851 | VMX(vcpu, itc_check) = 0; | ||
852 | } | ||
853 | |||
854 | #define ITV_VECTOR(itv) (itv&0xff) | ||
855 | #define ITV_IRQ_MASK(itv) (itv&(1<<16)) | ||
856 | |||
857 | static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val) | ||
858 | { | ||
859 | VCPU(vcpu, itv) = val; | ||
860 | if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) { | ||
861 | vcpu_pend_interrupt(vcpu, ITV_VECTOR(val)); | ||
862 | vcpu->arch.timer_pending = 0; | ||
863 | } | ||
864 | } | ||
865 | |||
866 | static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val) | ||
867 | { | ||
868 | int vec; | ||
869 | |||
870 | vec = highest_inservice_irq(vcpu); | ||
871 | if (vec == NULL_VECTOR) | ||
872 | return; | ||
873 | VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63)); | ||
874 | VCPU(vcpu, eoi) = 0; | ||
875 | vcpu->arch.irq_new_pending = 1; | ||
876 | |||
877 | } | ||
878 | |||
879 | /* See Table 5-8 in SDM vol2 for the definition */ | ||
880 | int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice) | ||
881 | { | ||
882 | union ia64_tpr vtpr; | ||
883 | |||
884 | vtpr.val = VCPU(vcpu, tpr); | ||
885 | |||
886 | if (h_inservice == NMI_VECTOR) | ||
887 | return IRQ_MASKED_BY_INSVC; | ||
888 | |||
889 | if (h_pending == NMI_VECTOR) { | ||
890 | /* Non Maskable Interrupt */ | ||
891 | return IRQ_NO_MASKED; | ||
892 | } | ||
893 | |||
894 | if (h_inservice == ExtINT_VECTOR) | ||
895 | return IRQ_MASKED_BY_INSVC; | ||
896 | |||
897 | if (h_pending == ExtINT_VECTOR) { | ||
898 | if (vtpr.mmi) { | ||
899 | /* mask all external IRQ */ | ||
900 | return IRQ_MASKED_BY_VTPR; | ||
901 | } else | ||
902 | return IRQ_NO_MASKED; | ||
903 | } | ||
904 | |||
905 | if (is_higher_irq(h_pending, h_inservice)) { | ||
906 | if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4))) | ||
907 | return IRQ_NO_MASKED; | ||
908 | else | ||
909 | return IRQ_MASKED_BY_VTPR; | ||
910 | } else { | ||
911 | return IRQ_MASKED_BY_INSVC; | ||
912 | } | ||
913 | } | ||
914 | |||
915 | void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec) | ||
916 | { | ||
917 | long spsr; | ||
918 | int ret; | ||
919 | |||
920 | local_irq_save(spsr); | ||
921 | ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0])); | ||
922 | local_irq_restore(spsr); | ||
923 | |||
924 | vcpu->arch.irq_new_pending = 1; | ||
925 | } | ||
926 | |||
927 | void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec) | ||
928 | { | ||
929 | long spsr; | ||
930 | int ret; | ||
931 | |||
932 | local_irq_save(spsr); | ||
933 | ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0])); | ||
934 | local_irq_restore(spsr); | ||
935 | if (ret) { | ||
936 | vcpu->arch.irq_new_pending = 1; | ||
937 | wmb(); | ||
938 | } | ||
939 | } | ||
940 | |||
941 | void update_vhpi(struct kvm_vcpu *vcpu, int vec) | ||
942 | { | ||
943 | u64 vhpi; | ||
944 | |||
945 | if (vec == NULL_VECTOR) | ||
946 | vhpi = 0; | ||
947 | else if (vec == NMI_VECTOR) | ||
948 | vhpi = 32; | ||
949 | else if (vec == ExtINT_VECTOR) | ||
950 | vhpi = 16; | ||
951 | else | ||
952 | vhpi = vec >> 4; | ||
953 | |||
954 | VCPU(vcpu, vhpi) = vhpi; | ||
955 | if (VCPU(vcpu, vac).a_int) | ||
956 | ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT, | ||
957 | (u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0); | ||
958 | } | ||
959 | |||
960 | u64 vcpu_get_ivr(struct kvm_vcpu *vcpu) | ||
961 | { | ||
962 | int vec, h_inservice, mask; | ||
963 | |||
964 | vec = highest_pending_irq(vcpu); | ||
965 | h_inservice = highest_inservice_irq(vcpu); | ||
966 | mask = irq_masked(vcpu, vec, h_inservice); | ||
967 | if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) { | ||
968 | if (VCPU(vcpu, vhpi)) | ||
969 | update_vhpi(vcpu, NULL_VECTOR); | ||
970 | return IA64_SPURIOUS_INT_VECTOR; | ||
971 | } | ||
972 | if (mask == IRQ_MASKED_BY_VTPR) { | ||
973 | update_vhpi(vcpu, vec); | ||
974 | return IA64_SPURIOUS_INT_VECTOR; | ||
975 | } | ||
976 | VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63)); | ||
977 | vcpu_unpend_interrupt(vcpu, vec); | ||
978 | return (u64)vec; | ||
979 | } | ||
980 | |||
981 | /************************************************************************** | ||
982 | Privileged operation emulation routines | ||
983 | **************************************************************************/ | ||
984 | u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr) | ||
985 | { | ||
986 | union ia64_pta vpta; | ||
987 | union ia64_rr vrr; | ||
988 | u64 pval; | ||
989 | u64 vhpt_offset; | ||
990 | |||
991 | vpta.val = vcpu_get_pta(vcpu); | ||
992 | vrr.val = vcpu_get_rr(vcpu, vadr); | ||
993 | vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1); | ||
994 | if (vpta.vf) { | ||
995 | pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val, | ||
996 | vpta.val, 0, 0, 0, 0); | ||
997 | } else { | ||
998 | pval = (vadr & VRN_MASK) | vhpt_offset | | ||
999 | (vpta.val << 3 >> (vpta.size + 3) << (vpta.size)); | ||
1000 | } | ||
1001 | return pval; | ||
1002 | } | ||
1003 | |||
1004 | u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr) | ||
1005 | { | ||
1006 | union ia64_rr vrr; | ||
1007 | union ia64_pta vpta; | ||
1008 | u64 pval; | ||
1009 | |||
1010 | vpta.val = vcpu_get_pta(vcpu); | ||
1011 | vrr.val = vcpu_get_rr(vcpu, vadr); | ||
1012 | if (vpta.vf) { | ||
1013 | pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val, | ||
1014 | 0, 0, 0, 0, 0); | ||
1015 | } else | ||
1016 | pval = 1; | ||
1017 | |||
1018 | return pval; | ||
1019 | } | ||
1020 | |||
1021 | u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr) | ||
1022 | { | ||
1023 | struct thash_data *data; | ||
1024 | union ia64_pta vpta; | ||
1025 | u64 key; | ||
1026 | |||
1027 | vpta.val = vcpu_get_pta(vcpu); | ||
1028 | if (vpta.vf == 0) { | ||
1029 | key = 1; | ||
1030 | return key; | ||
1031 | } | ||
1032 | data = vtlb_lookup(vcpu, vadr, D_TLB); | ||
1033 | if (!data || !data->p) | ||
1034 | key = 1; | ||
1035 | else | ||
1036 | key = data->key; | ||
1037 | |||
1038 | return key; | ||
1039 | } | ||
1040 | |||
1041 | |||
1042 | |||
1043 | void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst) | ||
1044 | { | ||
1045 | unsigned long thash, vadr; | ||
1046 | |||
1047 | vadr = vcpu_get_gr(vcpu, inst.M46.r3); | ||
1048 | thash = vcpu_thash(vcpu, vadr); | ||
1049 | vcpu_set_gr(vcpu, inst.M46.r1, thash, 0); | ||
1050 | } | ||
1051 | |||
1052 | |||
1053 | void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst) | ||
1054 | { | ||
1055 | unsigned long tag, vadr; | ||
1056 | |||
1057 | vadr = vcpu_get_gr(vcpu, inst.M46.r3); | ||
1058 | tag = vcpu_ttag(vcpu, vadr); | ||
1059 | vcpu_set_gr(vcpu, inst.M46.r1, tag, 0); | ||
1060 | } | ||
1061 | |||
1062 | int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr) | ||
1063 | { | ||
1064 | struct thash_data *data; | ||
1065 | union ia64_isr visr, pt_isr; | ||
1066 | struct kvm_pt_regs *regs; | ||
1067 | struct ia64_psr vpsr; | ||
1068 | |||
1069 | regs = vcpu_regs(vcpu); | ||
1070 | pt_isr.val = VMX(vcpu, cr_isr); | ||
1071 | visr.val = 0; | ||
1072 | visr.ei = pt_isr.ei; | ||
1073 | visr.ir = pt_isr.ir; | ||
1074 | vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); | ||
1075 | visr.na = 1; | ||
1076 | |||
1077 | data = vhpt_lookup(vadr); | ||
1078 | if (data) { | ||
1079 | if (data->p == 0) { | ||
1080 | vcpu_set_isr(vcpu, visr.val); | ||
1081 | data_page_not_present(vcpu, vadr); | ||
1082 | return IA64_FAULT; | ||
1083 | } else if (data->ma == VA_MATTR_NATPAGE) { | ||
1084 | vcpu_set_isr(vcpu, visr.val); | ||
1085 | dnat_page_consumption(vcpu, vadr); | ||
1086 | return IA64_FAULT; | ||
1087 | } else { | ||
1088 | *padr = (data->gpaddr >> data->ps << data->ps) | | ||
1089 | (vadr & (PSIZE(data->ps) - 1)); | ||
1090 | return IA64_NO_FAULT; | ||
1091 | } | ||
1092 | } | ||
1093 | |||
1094 | data = vtlb_lookup(vcpu, vadr, D_TLB); | ||
1095 | if (data) { | ||
1096 | if (data->p == 0) { | ||
1097 | vcpu_set_isr(vcpu, visr.val); | ||
1098 | data_page_not_present(vcpu, vadr); | ||
1099 | return IA64_FAULT; | ||
1100 | } else if (data->ma == VA_MATTR_NATPAGE) { | ||
1101 | vcpu_set_isr(vcpu, visr.val); | ||
1102 | dnat_page_consumption(vcpu, vadr); | ||
1103 | return IA64_FAULT; | ||
1104 | } else{ | ||
1105 | *padr = ((data->ppn >> (data->ps - 12)) << data->ps) | ||
1106 | | (vadr & (PSIZE(data->ps) - 1)); | ||
1107 | return IA64_NO_FAULT; | ||
1108 | } | ||
1109 | } | ||
1110 | if (!vhpt_enabled(vcpu, vadr, NA_REF)) { | ||
1111 | if (vpsr.ic) { | ||
1112 | vcpu_set_isr(vcpu, visr.val); | ||
1113 | alt_dtlb(vcpu, vadr); | ||
1114 | return IA64_FAULT; | ||
1115 | } else { | ||
1116 | nested_dtlb(vcpu); | ||
1117 | return IA64_FAULT; | ||
1118 | } | ||
1119 | } else { | ||
1120 | if (vpsr.ic) { | ||
1121 | vcpu_set_isr(vcpu, visr.val); | ||
1122 | dvhpt_fault(vcpu, vadr); | ||
1123 | return IA64_FAULT; | ||
1124 | } else{ | ||
1125 | nested_dtlb(vcpu); | ||
1126 | return IA64_FAULT; | ||
1127 | } | ||
1128 | } | ||
1129 | |||
1130 | return IA64_NO_FAULT; | ||
1131 | } | ||
1132 | |||
1133 | |||
1134 | int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst) | ||
1135 | { | ||
1136 | unsigned long r1, r3; | ||
1137 | |||
1138 | r3 = vcpu_get_gr(vcpu, inst.M46.r3); | ||
1139 | |||
1140 | if (vcpu_tpa(vcpu, r3, &r1)) | ||
1141 | return IA64_FAULT; | ||
1142 | |||
1143 | vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); | ||
1144 | return(IA64_NO_FAULT); | ||
1145 | } | ||
1146 | |||
1147 | void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst) | ||
1148 | { | ||
1149 | unsigned long r1, r3; | ||
1150 | |||
1151 | r3 = vcpu_get_gr(vcpu, inst.M46.r3); | ||
1152 | r1 = vcpu_tak(vcpu, r3); | ||
1153 | vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); | ||
1154 | } | ||
1155 | |||
1156 | |||
1157 | /************************************ | ||
1158 | * Insert/Purge translation register/cache | ||
1159 | ************************************/ | ||
1160 | void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa) | ||
1161 | { | ||
1162 | thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB); | ||
1163 | } | ||
1164 | |||
1165 | void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa) | ||
1166 | { | ||
1167 | thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB); | ||
1168 | } | ||
1169 | |||
1170 | void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa) | ||
1171 | { | ||
1172 | u64 ps, va, rid; | ||
1173 | struct thash_data *p_itr; | ||
1174 | |||
1175 | ps = itir_ps(itir); | ||
1176 | va = PAGEALIGN(ifa, ps); | ||
1177 | pte &= ~PAGE_FLAGS_RV_MASK; | ||
1178 | rid = vcpu_get_rr(vcpu, ifa); | ||
1179 | rid = rid & RR_RID_MASK; | ||
1180 | p_itr = (struct thash_data *)&vcpu->arch.itrs[slot]; | ||
1181 | vcpu_set_tr(p_itr, pte, itir, va, rid); | ||
1182 | vcpu_quick_region_set(VMX(vcpu, itr_regions), va); | ||
1183 | } | ||
1184 | |||
1185 | |||
1186 | void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa) | ||
1187 | { | ||
1188 | u64 gpfn; | ||
1189 | u64 ps, va, rid; | ||
1190 | struct thash_data *p_dtr; | ||
1191 | |||
1192 | ps = itir_ps(itir); | ||
1193 | va = PAGEALIGN(ifa, ps); | ||
1194 | pte &= ~PAGE_FLAGS_RV_MASK; | ||
1195 | |||
1196 | if (ps != _PAGE_SIZE_16M) | ||
1197 | thash_purge_entries(vcpu, va, ps); | ||
1198 | gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT; | ||
1199 | if (__gpfn_is_io(gpfn)) | ||
1200 | pte |= VTLB_PTE_IO; | ||
1201 | rid = vcpu_get_rr(vcpu, va); | ||
1202 | rid = rid & RR_RID_MASK; | ||
1203 | p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot]; | ||
1204 | vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot], | ||
1205 | pte, itir, va, rid); | ||
1206 | vcpu_quick_region_set(VMX(vcpu, dtr_regions), va); | ||
1207 | } | ||
1208 | |||
1209 | void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps) | ||
1210 | { | ||
1211 | int index; | ||
1212 | u64 va; | ||
1213 | |||
1214 | va = PAGEALIGN(ifa, ps); | ||
1215 | while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0) | ||
1216 | vcpu->arch.dtrs[index].page_flags = 0; | ||
1217 | |||
1218 | thash_purge_entries(vcpu, va, ps); | ||
1219 | } | ||
1220 | |||
1221 | void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps) | ||
1222 | { | ||
1223 | int index; | ||
1224 | u64 va; | ||
1225 | |||
1226 | va = PAGEALIGN(ifa, ps); | ||
1227 | while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0) | ||
1228 | vcpu->arch.itrs[index].page_flags = 0; | ||
1229 | |||
1230 | thash_purge_entries(vcpu, va, ps); | ||
1231 | } | ||
1232 | |||
1233 | void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps) | ||
1234 | { | ||
1235 | va = PAGEALIGN(va, ps); | ||
1236 | thash_purge_entries(vcpu, va, ps); | ||
1237 | } | ||
1238 | |||
1239 | void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va) | ||
1240 | { | ||
1241 | thash_purge_all(vcpu); | ||
1242 | } | ||
1243 | |||
1244 | void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps) | ||
1245 | { | ||
1246 | struct exit_ctl_data *p = &vcpu->arch.exit_data; | ||
1247 | long psr; | ||
1248 | local_irq_save(psr); | ||
1249 | p->exit_reason = EXIT_REASON_PTC_G; | ||
1250 | |||
1251 | p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va); | ||
1252 | p->u.ptc_g_data.vaddr = va; | ||
1253 | p->u.ptc_g_data.ps = ps; | ||
1254 | vmm_transition(vcpu); | ||
1255 | /* Do Local Purge Here*/ | ||
1256 | vcpu_ptc_l(vcpu, va, ps); | ||
1257 | local_irq_restore(psr); | ||
1258 | } | ||
1259 | |||
1260 | |||
1261 | void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps) | ||
1262 | { | ||
1263 | vcpu_ptc_ga(vcpu, va, ps); | ||
1264 | } | ||
1265 | |||
1266 | void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst) | ||
1267 | { | ||
1268 | unsigned long ifa; | ||
1269 | |||
1270 | ifa = vcpu_get_gr(vcpu, inst.M45.r3); | ||
1271 | vcpu_ptc_e(vcpu, ifa); | ||
1272 | } | ||
1273 | |||
1274 | void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst) | ||
1275 | { | ||
1276 | unsigned long ifa, itir; | ||
1277 | |||
1278 | ifa = vcpu_get_gr(vcpu, inst.M45.r3); | ||
1279 | itir = vcpu_get_gr(vcpu, inst.M45.r2); | ||
1280 | vcpu_ptc_g(vcpu, ifa, itir_ps(itir)); | ||
1281 | } | ||
1282 | |||
1283 | void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst) | ||
1284 | { | ||
1285 | unsigned long ifa, itir; | ||
1286 | |||
1287 | ifa = vcpu_get_gr(vcpu, inst.M45.r3); | ||
1288 | itir = vcpu_get_gr(vcpu, inst.M45.r2); | ||
1289 | vcpu_ptc_ga(vcpu, ifa, itir_ps(itir)); | ||
1290 | } | ||
1291 | |||
1292 | void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst) | ||
1293 | { | ||
1294 | unsigned long ifa, itir; | ||
1295 | |||
1296 | ifa = vcpu_get_gr(vcpu, inst.M45.r3); | ||
1297 | itir = vcpu_get_gr(vcpu, inst.M45.r2); | ||
1298 | vcpu_ptc_l(vcpu, ifa, itir_ps(itir)); | ||
1299 | } | ||
1300 | |||
1301 | void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst) | ||
1302 | { | ||
1303 | unsigned long ifa, itir; | ||
1304 | |||
1305 | ifa = vcpu_get_gr(vcpu, inst.M45.r3); | ||
1306 | itir = vcpu_get_gr(vcpu, inst.M45.r2); | ||
1307 | vcpu_ptr_d(vcpu, ifa, itir_ps(itir)); | ||
1308 | } | ||
1309 | |||
1310 | void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst) | ||
1311 | { | ||
1312 | unsigned long ifa, itir; | ||
1313 | |||
1314 | ifa = vcpu_get_gr(vcpu, inst.M45.r3); | ||
1315 | itir = vcpu_get_gr(vcpu, inst.M45.r2); | ||
1316 | vcpu_ptr_i(vcpu, ifa, itir_ps(itir)); | ||
1317 | } | ||
1318 | |||
1319 | void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst) | ||
1320 | { | ||
1321 | unsigned long itir, ifa, pte, slot; | ||
1322 | |||
1323 | slot = vcpu_get_gr(vcpu, inst.M45.r3); | ||
1324 | pte = vcpu_get_gr(vcpu, inst.M45.r2); | ||
1325 | itir = vcpu_get_itir(vcpu); | ||
1326 | ifa = vcpu_get_ifa(vcpu); | ||
1327 | vcpu_itr_d(vcpu, slot, pte, itir, ifa); | ||
1328 | } | ||
1329 | |||
1330 | |||
1331 | |||
1332 | void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst) | ||
1333 | { | ||
1334 | unsigned long itir, ifa, pte, slot; | ||
1335 | |||
1336 | slot = vcpu_get_gr(vcpu, inst.M45.r3); | ||
1337 | pte = vcpu_get_gr(vcpu, inst.M45.r2); | ||
1338 | itir = vcpu_get_itir(vcpu); | ||
1339 | ifa = vcpu_get_ifa(vcpu); | ||
1340 | vcpu_itr_i(vcpu, slot, pte, itir, ifa); | ||
1341 | } | ||
1342 | |||
1343 | void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst) | ||
1344 | { | ||
1345 | unsigned long itir, ifa, pte; | ||
1346 | |||
1347 | itir = vcpu_get_itir(vcpu); | ||
1348 | ifa = vcpu_get_ifa(vcpu); | ||
1349 | pte = vcpu_get_gr(vcpu, inst.M45.r2); | ||
1350 | vcpu_itc_d(vcpu, pte, itir, ifa); | ||
1351 | } | ||
1352 | |||
1353 | void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst) | ||
1354 | { | ||
1355 | unsigned long itir, ifa, pte; | ||
1356 | |||
1357 | itir = vcpu_get_itir(vcpu); | ||
1358 | ifa = vcpu_get_ifa(vcpu); | ||
1359 | pte = vcpu_get_gr(vcpu, inst.M45.r2); | ||
1360 | vcpu_itc_i(vcpu, pte, itir, ifa); | ||
1361 | } | ||
1362 | |||
1363 | /************************************* | ||
1364 | * Moves to semi-privileged registers | ||
1365 | *************************************/ | ||
1366 | |||
1367 | void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst) | ||
1368 | { | ||
1369 | unsigned long imm; | ||
1370 | |||
1371 | if (inst.M30.s) | ||
1372 | imm = -inst.M30.imm; | ||
1373 | else | ||
1374 | imm = inst.M30.imm; | ||
1375 | |||
1376 | vcpu_set_itc(vcpu, imm); | ||
1377 | } | ||
1378 | |||
1379 | void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst) | ||
1380 | { | ||
1381 | unsigned long r2; | ||
1382 | |||
1383 | r2 = vcpu_get_gr(vcpu, inst.M29.r2); | ||
1384 | vcpu_set_itc(vcpu, r2); | ||
1385 | } | ||
1386 | |||
1387 | |||
1388 | void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst) | ||
1389 | { | ||
1390 | unsigned long r1; | ||
1391 | |||
1392 | r1 = vcpu_get_itc(vcpu); | ||
1393 | vcpu_set_gr(vcpu, inst.M31.r1, r1, 0); | ||
1394 | } | ||
1395 | /************************************************************************** | ||
1396 | struct kvm_vcpu*protection key register access routines | ||
1397 | **************************************************************************/ | ||
1398 | |||
1399 | unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg) | ||
1400 | { | ||
1401 | return ((unsigned long)ia64_get_pkr(reg)); | ||
1402 | } | ||
1403 | |||
1404 | void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val) | ||
1405 | { | ||
1406 | ia64_set_pkr(reg, val); | ||
1407 | } | ||
1408 | |||
1409 | |||
1410 | unsigned long vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, unsigned long ifa) | ||
1411 | { | ||
1412 | union ia64_rr rr, rr1; | ||
1413 | |||
1414 | rr.val = vcpu_get_rr(vcpu, ifa); | ||
1415 | rr1.val = 0; | ||
1416 | rr1.ps = rr.ps; | ||
1417 | rr1.rid = rr.rid; | ||
1418 | return (rr1.val); | ||
1419 | } | ||
1420 | |||
1421 | |||
1422 | |||
1423 | /******************************** | ||
1424 | * Moves to privileged registers | ||
1425 | ********************************/ | ||
1426 | unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg, | ||
1427 | unsigned long val) | ||
1428 | { | ||
1429 | union ia64_rr oldrr, newrr; | ||
1430 | unsigned long rrval; | ||
1431 | struct exit_ctl_data *p = &vcpu->arch.exit_data; | ||
1432 | unsigned long psr; | ||
1433 | |||
1434 | oldrr.val = vcpu_get_rr(vcpu, reg); | ||
1435 | newrr.val = val; | ||
1436 | vcpu->arch.vrr[reg >> VRN_SHIFT] = val; | ||
1437 | |||
1438 | switch ((unsigned long)(reg >> VRN_SHIFT)) { | ||
1439 | case VRN6: | ||
1440 | vcpu->arch.vmm_rr = vrrtomrr(val); | ||
1441 | local_irq_save(psr); | ||
1442 | p->exit_reason = EXIT_REASON_SWITCH_RR6; | ||
1443 | vmm_transition(vcpu); | ||
1444 | local_irq_restore(psr); | ||
1445 | break; | ||
1446 | case VRN4: | ||
1447 | rrval = vrrtomrr(val); | ||
1448 | vcpu->arch.metaphysical_saved_rr4 = rrval; | ||
1449 | if (!is_physical_mode(vcpu)) | ||
1450 | ia64_set_rr(reg, rrval); | ||
1451 | break; | ||
1452 | case VRN0: | ||
1453 | rrval = vrrtomrr(val); | ||
1454 | vcpu->arch.metaphysical_saved_rr0 = rrval; | ||
1455 | if (!is_physical_mode(vcpu)) | ||
1456 | ia64_set_rr(reg, rrval); | ||
1457 | break; | ||
1458 | default: | ||
1459 | ia64_set_rr(reg, vrrtomrr(val)); | ||
1460 | break; | ||
1461 | } | ||
1462 | |||
1463 | return (IA64_NO_FAULT); | ||
1464 | } | ||
1465 | |||
1466 | |||
1467 | |||
1468 | void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst) | ||
1469 | { | ||
1470 | unsigned long r3, r2; | ||
1471 | |||
1472 | r3 = vcpu_get_gr(vcpu, inst.M42.r3); | ||
1473 | r2 = vcpu_get_gr(vcpu, inst.M42.r2); | ||
1474 | vcpu_set_rr(vcpu, r3, r2); | ||
1475 | } | ||
1476 | |||
1477 | void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst) | ||
1478 | { | ||
1479 | } | ||
1480 | |||
1481 | void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst) | ||
1482 | { | ||
1483 | } | ||
1484 | |||
1485 | void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst) | ||
1486 | { | ||
1487 | unsigned long r3, r2; | ||
1488 | |||
1489 | r3 = vcpu_get_gr(vcpu, inst.M42.r3); | ||
1490 | r2 = vcpu_get_gr(vcpu, inst.M42.r2); | ||
1491 | vcpu_set_pmc(vcpu, r3, r2); | ||
1492 | } | ||
1493 | |||
1494 | void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst) | ||
1495 | { | ||
1496 | unsigned long r3, r2; | ||
1497 | |||
1498 | r3 = vcpu_get_gr(vcpu, inst.M42.r3); | ||
1499 | r2 = vcpu_get_gr(vcpu, inst.M42.r2); | ||
1500 | vcpu_set_pmd(vcpu, r3, r2); | ||
1501 | } | ||
1502 | |||
1503 | void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst) | ||
1504 | { | ||
1505 | u64 r3, r2; | ||
1506 | |||
1507 | r3 = vcpu_get_gr(vcpu, inst.M42.r3); | ||
1508 | r2 = vcpu_get_gr(vcpu, inst.M42.r2); | ||
1509 | vcpu_set_pkr(vcpu, r3, r2); | ||
1510 | } | ||
1511 | |||
1512 | |||
1513 | |||
1514 | void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst) | ||
1515 | { | ||
1516 | unsigned long r3, r1; | ||
1517 | |||
1518 | r3 = vcpu_get_gr(vcpu, inst.M43.r3); | ||
1519 | r1 = vcpu_get_rr(vcpu, r3); | ||
1520 | vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); | ||
1521 | } | ||
1522 | |||
1523 | void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst) | ||
1524 | { | ||
1525 | unsigned long r3, r1; | ||
1526 | |||
1527 | r3 = vcpu_get_gr(vcpu, inst.M43.r3); | ||
1528 | r1 = vcpu_get_pkr(vcpu, r3); | ||
1529 | vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); | ||
1530 | } | ||
1531 | |||
1532 | void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst) | ||
1533 | { | ||
1534 | unsigned long r3, r1; | ||
1535 | |||
1536 | r3 = vcpu_get_gr(vcpu, inst.M43.r3); | ||
1537 | r1 = vcpu_get_dbr(vcpu, r3); | ||
1538 | vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); | ||
1539 | } | ||
1540 | |||
1541 | void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst) | ||
1542 | { | ||
1543 | unsigned long r3, r1; | ||
1544 | |||
1545 | r3 = vcpu_get_gr(vcpu, inst.M43.r3); | ||
1546 | r1 = vcpu_get_ibr(vcpu, r3); | ||
1547 | vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); | ||
1548 | } | ||
1549 | |||
1550 | void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst) | ||
1551 | { | ||
1552 | unsigned long r3, r1; | ||
1553 | |||
1554 | r3 = vcpu_get_gr(vcpu, inst.M43.r3); | ||
1555 | r1 = vcpu_get_pmc(vcpu, r3); | ||
1556 | vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); | ||
1557 | } | ||
1558 | |||
1559 | |||
1560 | unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg) | ||
1561 | { | ||
1562 | /* FIXME: This could get called as a result of a rsvd-reg fault */ | ||
1563 | if (reg > (ia64_get_cpuid(3) & 0xff)) | ||
1564 | return 0; | ||
1565 | else | ||
1566 | return ia64_get_cpuid(reg); | ||
1567 | } | ||
1568 | |||
1569 | void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst) | ||
1570 | { | ||
1571 | unsigned long r3, r1; | ||
1572 | |||
1573 | r3 = vcpu_get_gr(vcpu, inst.M43.r3); | ||
1574 | r1 = vcpu_get_cpuid(vcpu, r3); | ||
1575 | vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); | ||
1576 | } | ||
1577 | |||
1578 | void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val) | ||
1579 | { | ||
1580 | VCPU(vcpu, tpr) = val; | ||
1581 | vcpu->arch.irq_check = 1; | ||
1582 | } | ||
1583 | |||
1584 | unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst) | ||
1585 | { | ||
1586 | unsigned long r2; | ||
1587 | |||
1588 | r2 = vcpu_get_gr(vcpu, inst.M32.r2); | ||
1589 | VCPU(vcpu, vcr[inst.M32.cr3]) = r2; | ||
1590 | |||
1591 | switch (inst.M32.cr3) { | ||
1592 | case 0: | ||
1593 | vcpu_set_dcr(vcpu, r2); | ||
1594 | break; | ||
1595 | case 1: | ||
1596 | vcpu_set_itm(vcpu, r2); | ||
1597 | break; | ||
1598 | case 66: | ||
1599 | vcpu_set_tpr(vcpu, r2); | ||
1600 | break; | ||
1601 | case 67: | ||
1602 | vcpu_set_eoi(vcpu, r2); | ||
1603 | break; | ||
1604 | default: | ||
1605 | break; | ||
1606 | } | ||
1607 | |||
1608 | return 0; | ||
1609 | } | ||
1610 | |||
1611 | |||
1612 | unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst) | ||
1613 | { | ||
1614 | unsigned long tgt = inst.M33.r1; | ||
1615 | unsigned long val; | ||
1616 | |||
1617 | switch (inst.M33.cr3) { | ||
1618 | case 65: | ||
1619 | val = vcpu_get_ivr(vcpu); | ||
1620 | vcpu_set_gr(vcpu, tgt, val, 0); | ||
1621 | break; | ||
1622 | |||
1623 | case 67: | ||
1624 | vcpu_set_gr(vcpu, tgt, 0L, 0); | ||
1625 | break; | ||
1626 | default: | ||
1627 | val = VCPU(vcpu, vcr[inst.M33.cr3]); | ||
1628 | vcpu_set_gr(vcpu, tgt, val, 0); | ||
1629 | break; | ||
1630 | } | ||
1631 | |||
1632 | return 0; | ||
1633 | } | ||
1634 | |||
1635 | |||
1636 | |||
1637 | void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val) | ||
1638 | { | ||
1639 | |||
1640 | unsigned long mask; | ||
1641 | struct kvm_pt_regs *regs; | ||
1642 | struct ia64_psr old_psr, new_psr; | ||
1643 | |||
1644 | old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); | ||
1645 | |||
1646 | regs = vcpu_regs(vcpu); | ||
1647 | /* We only support guest as: | ||
1648 | * vpsr.pk = 0 | ||
1649 | * vpsr.is = 0 | ||
1650 | * Otherwise panic | ||
1651 | */ | ||
1652 | if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM)) | ||
1653 | panic_vm(vcpu); | ||
1654 | |||
1655 | /* | ||
1656 | * For those IA64_PSR bits: id/da/dd/ss/ed/ia | ||
1657 | * Since these bits will become 0, after success execution of each | ||
1658 | * instruction, we will change set them to mIA64_PSR | ||
1659 | */ | ||
1660 | VCPU(vcpu, vpsr) = val | ||
1661 | & (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | | ||
1662 | IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA)); | ||
1663 | |||
1664 | if (!old_psr.i && (val & IA64_PSR_I)) { | ||
1665 | /* vpsr.i 0->1 */ | ||
1666 | vcpu->arch.irq_check = 1; | ||
1667 | } | ||
1668 | new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); | ||
1669 | |||
1670 | /* | ||
1671 | * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr) | ||
1672 | * , except for the following bits: | ||
1673 | * ic/i/dt/si/rt/mc/it/bn/vm | ||
1674 | */ | ||
1675 | mask = IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI + | ||
1676 | IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN + | ||
1677 | IA64_PSR_VM; | ||
1678 | |||
1679 | regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask)); | ||
1680 | |||
1681 | check_mm_mode_switch(vcpu, old_psr, new_psr); | ||
1682 | |||
1683 | return ; | ||
1684 | } | ||
1685 | |||
1686 | unsigned long vcpu_cover(struct kvm_vcpu *vcpu) | ||
1687 | { | ||
1688 | struct ia64_psr vpsr; | ||
1689 | |||
1690 | struct kvm_pt_regs *regs = vcpu_regs(vcpu); | ||
1691 | vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); | ||
1692 | |||
1693 | if (!vpsr.ic) | ||
1694 | VCPU(vcpu, ifs) = regs->cr_ifs; | ||
1695 | regs->cr_ifs = IA64_IFS_V; | ||
1696 | return (IA64_NO_FAULT); | ||
1697 | } | ||
1698 | |||
1699 | |||
1700 | |||
1701 | /************************************************************************** | ||
1702 | VCPU banked general register access routines | ||
1703 | **************************************************************************/ | ||
1704 | #define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \ | ||
1705 | do { \ | ||
1706 | __asm__ __volatile__ ( \ | ||
1707 | ";;extr.u %0 = %3,%6,16;;\n" \ | ||
1708 | "dep %1 = %0, %1, 0, 16;;\n" \ | ||
1709 | "st8 [%4] = %1\n" \ | ||
1710 | "extr.u %0 = %2, 16, 16;;\n" \ | ||
1711 | "dep %3 = %0, %3, %6, 16;;\n" \ | ||
1712 | "st8 [%5] = %3\n" \ | ||
1713 | ::"r"(i), "r"(*b1unat), "r"(*b0unat), \ | ||
1714 | "r"(*runat), "r"(b1unat), "r"(runat), \ | ||
1715 | "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \ | ||
1716 | } while (0) | ||
1717 | |||
1718 | void vcpu_bsw0(struct kvm_vcpu *vcpu) | ||
1719 | { | ||
1720 | unsigned long i; | ||
1721 | |||
1722 | struct kvm_pt_regs *regs = vcpu_regs(vcpu); | ||
1723 | unsigned long *r = ®s->r16; | ||
1724 | unsigned long *b0 = &VCPU(vcpu, vbgr[0]); | ||
1725 | unsigned long *b1 = &VCPU(vcpu, vgr[0]); | ||
1726 | unsigned long *runat = ®s->eml_unat; | ||
1727 | unsigned long *b0unat = &VCPU(vcpu, vbnat); | ||
1728 | unsigned long *b1unat = &VCPU(vcpu, vnat); | ||
1729 | |||
1730 | |||
1731 | if (VCPU(vcpu, vpsr) & IA64_PSR_BN) { | ||
1732 | for (i = 0; i < 16; i++) { | ||
1733 | *b1++ = *r; | ||
1734 | *r++ = *b0++; | ||
1735 | } | ||
1736 | vcpu_bsw0_unat(i, b0unat, b1unat, runat, | ||
1737 | VMM_PT_REGS_R16_SLOT); | ||
1738 | VCPU(vcpu, vpsr) &= ~IA64_PSR_BN; | ||
1739 | } | ||
1740 | } | ||
1741 | |||
1742 | #define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \ | ||
1743 | do { \ | ||
1744 | __asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n" \ | ||
1745 | "dep %1 = %0, %1, 16, 16;;\n" \ | ||
1746 | "st8 [%4] = %1\n" \ | ||
1747 | "extr.u %0 = %2, 0, 16;;\n" \ | ||
1748 | "dep %3 = %0, %3, %6, 16;;\n" \ | ||
1749 | "st8 [%5] = %3\n" \ | ||
1750 | ::"r"(i), "r"(*b0unat), "r"(*b1unat), \ | ||
1751 | "r"(*runat), "r"(b0unat), "r"(runat), \ | ||
1752 | "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \ | ||
1753 | } while (0) | ||
1754 | |||
1755 | void vcpu_bsw1(struct kvm_vcpu *vcpu) | ||
1756 | { | ||
1757 | unsigned long i; | ||
1758 | struct kvm_pt_regs *regs = vcpu_regs(vcpu); | ||
1759 | unsigned long *r = ®s->r16; | ||
1760 | unsigned long *b0 = &VCPU(vcpu, vbgr[0]); | ||
1761 | unsigned long *b1 = &VCPU(vcpu, vgr[0]); | ||
1762 | unsigned long *runat = ®s->eml_unat; | ||
1763 | unsigned long *b0unat = &VCPU(vcpu, vbnat); | ||
1764 | unsigned long *b1unat = &VCPU(vcpu, vnat); | ||
1765 | |||
1766 | if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) { | ||
1767 | for (i = 0; i < 16; i++) { | ||
1768 | *b0++ = *r; | ||
1769 | *r++ = *b1++; | ||
1770 | } | ||
1771 | vcpu_bsw1_unat(i, b0unat, b1unat, runat, | ||
1772 | VMM_PT_REGS_R16_SLOT); | ||
1773 | VCPU(vcpu, vpsr) |= IA64_PSR_BN; | ||
1774 | } | ||
1775 | } | ||
1776 | |||
1777 | |||
1778 | |||
1779 | |||
1780 | void vcpu_rfi(struct kvm_vcpu *vcpu) | ||
1781 | { | ||
1782 | unsigned long ifs, psr; | ||
1783 | struct kvm_pt_regs *regs = vcpu_regs(vcpu); | ||
1784 | |||
1785 | psr = VCPU(vcpu, ipsr); | ||
1786 | if (psr & IA64_PSR_BN) | ||
1787 | vcpu_bsw1(vcpu); | ||
1788 | else | ||
1789 | vcpu_bsw0(vcpu); | ||
1790 | vcpu_set_psr(vcpu, psr); | ||
1791 | ifs = VCPU(vcpu, ifs); | ||
1792 | if (ifs >> 63) | ||
1793 | regs->cr_ifs = ifs; | ||
1794 | regs->cr_iip = VCPU(vcpu, iip); | ||
1795 | } | ||
1796 | |||
1797 | |||
1798 | /* | ||
1799 | VPSR can't keep track of below bits of guest PSR | ||
1800 | This function gets guest PSR | ||
1801 | */ | ||
1802 | |||
1803 | unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu) | ||
1804 | { | ||
1805 | unsigned long mask; | ||
1806 | struct kvm_pt_regs *regs = vcpu_regs(vcpu); | ||
1807 | |||
1808 | mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL | | ||
1809 | IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI; | ||
1810 | return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask); | ||
1811 | } | ||
1812 | |||
1813 | void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst) | ||
1814 | { | ||
1815 | unsigned long vpsr; | ||
1816 | unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21) | ||
1817 | | inst.M44.imm; | ||
1818 | |||
1819 | vpsr = vcpu_get_psr(vcpu); | ||
1820 | vpsr &= (~imm24); | ||
1821 | vcpu_set_psr(vcpu, vpsr); | ||
1822 | } | ||
1823 | |||
1824 | void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst) | ||
1825 | { | ||
1826 | unsigned long vpsr; | ||
1827 | unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21) | ||
1828 | | inst.M44.imm; | ||
1829 | |||
1830 | vpsr = vcpu_get_psr(vcpu); | ||
1831 | vpsr |= imm24; | ||
1832 | vcpu_set_psr(vcpu, vpsr); | ||
1833 | } | ||
1834 | |||
1835 | /* Generate Mask | ||
1836 | * Parameter: | ||
1837 | * bit -- starting bit | ||
1838 | * len -- how many bits | ||
1839 | */ | ||
1840 | #define MASK(bit,len) \ | ||
1841 | ({ \ | ||
1842 | __u64 ret; \ | ||
1843 | \ | ||
1844 | __asm __volatile("dep %0=-1, r0, %1, %2"\ | ||
1845 | : "=r" (ret): \ | ||
1846 | "M" (bit), \ | ||
1847 | "M" (len)); \ | ||
1848 | ret; \ | ||
1849 | }) | ||
1850 | |||
1851 | void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val) | ||
1852 | { | ||
1853 | val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32)); | ||
1854 | vcpu_set_psr(vcpu, val); | ||
1855 | } | ||
1856 | |||
1857 | void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst) | ||
1858 | { | ||
1859 | unsigned long val; | ||
1860 | |||
1861 | val = vcpu_get_gr(vcpu, inst.M35.r2); | ||
1862 | vcpu_set_psr_l(vcpu, val); | ||
1863 | } | ||
1864 | |||
1865 | void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst) | ||
1866 | { | ||
1867 | unsigned long val; | ||
1868 | |||
1869 | val = vcpu_get_psr(vcpu); | ||
1870 | val = (val & MASK(0, 32)) | (val & MASK(35, 2)); | ||
1871 | vcpu_set_gr(vcpu, inst.M33.r1, val, 0); | ||
1872 | } | ||
1873 | |||
1874 | void vcpu_increment_iip(struct kvm_vcpu *vcpu) | ||
1875 | { | ||
1876 | struct kvm_pt_regs *regs = vcpu_regs(vcpu); | ||
1877 | struct ia64_psr *ipsr = (struct ia64_psr *)®s->cr_ipsr; | ||
1878 | if (ipsr->ri == 2) { | ||
1879 | ipsr->ri = 0; | ||
1880 | regs->cr_iip += 16; | ||
1881 | } else | ||
1882 | ipsr->ri++; | ||
1883 | } | ||
1884 | |||
1885 | void vcpu_decrement_iip(struct kvm_vcpu *vcpu) | ||
1886 | { | ||
1887 | struct kvm_pt_regs *regs = vcpu_regs(vcpu); | ||
1888 | struct ia64_psr *ipsr = (struct ia64_psr *)®s->cr_ipsr; | ||
1889 | |||
1890 | if (ipsr->ri == 0) { | ||
1891 | ipsr->ri = 2; | ||
1892 | regs->cr_iip -= 16; | ||
1893 | } else | ||
1894 | ipsr->ri--; | ||
1895 | } | ||
1896 | |||
1897 | /** Emulate a privileged operation. | ||
1898 | * | ||
1899 | * | ||
1900 | * @param vcpu virtual cpu | ||
1901 | * @cause the reason cause virtualization fault | ||
1902 | * @opcode the instruction code which cause virtualization fault | ||
1903 | */ | ||
1904 | |||
1905 | void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs) | ||
1906 | { | ||
1907 | unsigned long status, cause, opcode ; | ||
1908 | INST64 inst; | ||
1909 | |||
1910 | status = IA64_NO_FAULT; | ||
1911 | cause = VMX(vcpu, cause); | ||
1912 | opcode = VMX(vcpu, opcode); | ||
1913 | inst.inst = opcode; | ||
1914 | /* | ||
1915 | * Switch to actual virtual rid in rr0 and rr4, | ||
1916 | * which is required by some tlb related instructions. | ||
1917 | */ | ||
1918 | prepare_if_physical_mode(vcpu); | ||
1919 | |||
1920 | switch (cause) { | ||
1921 | case EVENT_RSM: | ||
1922 | kvm_rsm(vcpu, inst); | ||
1923 | break; | ||
1924 | case EVENT_SSM: | ||
1925 | kvm_ssm(vcpu, inst); | ||
1926 | break; | ||
1927 | case EVENT_MOV_TO_PSR: | ||
1928 | kvm_mov_to_psr(vcpu, inst); | ||
1929 | break; | ||
1930 | case EVENT_MOV_FROM_PSR: | ||
1931 | kvm_mov_from_psr(vcpu, inst); | ||
1932 | break; | ||
1933 | case EVENT_MOV_FROM_CR: | ||
1934 | kvm_mov_from_cr(vcpu, inst); | ||
1935 | break; | ||
1936 | case EVENT_MOV_TO_CR: | ||
1937 | kvm_mov_to_cr(vcpu, inst); | ||
1938 | break; | ||
1939 | case EVENT_BSW_0: | ||
1940 | vcpu_bsw0(vcpu); | ||
1941 | break; | ||
1942 | case EVENT_BSW_1: | ||
1943 | vcpu_bsw1(vcpu); | ||
1944 | break; | ||
1945 | case EVENT_COVER: | ||
1946 | vcpu_cover(vcpu); | ||
1947 | break; | ||
1948 | case EVENT_RFI: | ||
1949 | vcpu_rfi(vcpu); | ||
1950 | break; | ||
1951 | case EVENT_ITR_D: | ||
1952 | kvm_itr_d(vcpu, inst); | ||
1953 | break; | ||
1954 | case EVENT_ITR_I: | ||
1955 | kvm_itr_i(vcpu, inst); | ||
1956 | break; | ||
1957 | case EVENT_PTR_D: | ||
1958 | kvm_ptr_d(vcpu, inst); | ||
1959 | break; | ||
1960 | case EVENT_PTR_I: | ||
1961 | kvm_ptr_i(vcpu, inst); | ||
1962 | break; | ||
1963 | case EVENT_ITC_D: | ||
1964 | kvm_itc_d(vcpu, inst); | ||
1965 | break; | ||
1966 | case EVENT_ITC_I: | ||
1967 | kvm_itc_i(vcpu, inst); | ||
1968 | break; | ||
1969 | case EVENT_PTC_L: | ||
1970 | kvm_ptc_l(vcpu, inst); | ||
1971 | break; | ||
1972 | case EVENT_PTC_G: | ||
1973 | kvm_ptc_g(vcpu, inst); | ||
1974 | break; | ||
1975 | case EVENT_PTC_GA: | ||
1976 | kvm_ptc_ga(vcpu, inst); | ||
1977 | break; | ||
1978 | case EVENT_PTC_E: | ||
1979 | kvm_ptc_e(vcpu, inst); | ||
1980 | break; | ||
1981 | case EVENT_MOV_TO_RR: | ||
1982 | kvm_mov_to_rr(vcpu, inst); | ||
1983 | break; | ||
1984 | case EVENT_MOV_FROM_RR: | ||
1985 | kvm_mov_from_rr(vcpu, inst); | ||
1986 | break; | ||
1987 | case EVENT_THASH: | ||
1988 | kvm_thash(vcpu, inst); | ||
1989 | break; | ||
1990 | case EVENT_TTAG: | ||
1991 | kvm_ttag(vcpu, inst); | ||
1992 | break; | ||
1993 | case EVENT_TPA: | ||
1994 | status = kvm_tpa(vcpu, inst); | ||
1995 | break; | ||
1996 | case EVENT_TAK: | ||
1997 | kvm_tak(vcpu, inst); | ||
1998 | break; | ||
1999 | case EVENT_MOV_TO_AR_IMM: | ||
2000 | kvm_mov_to_ar_imm(vcpu, inst); | ||
2001 | break; | ||
2002 | case EVENT_MOV_TO_AR: | ||
2003 | kvm_mov_to_ar_reg(vcpu, inst); | ||
2004 | break; | ||
2005 | case EVENT_MOV_FROM_AR: | ||
2006 | kvm_mov_from_ar_reg(vcpu, inst); | ||
2007 | break; | ||
2008 | case EVENT_MOV_TO_DBR: | ||
2009 | kvm_mov_to_dbr(vcpu, inst); | ||
2010 | break; | ||
2011 | case EVENT_MOV_TO_IBR: | ||
2012 | kvm_mov_to_ibr(vcpu, inst); | ||
2013 | break; | ||
2014 | case EVENT_MOV_TO_PMC: | ||
2015 | kvm_mov_to_pmc(vcpu, inst); | ||
2016 | break; | ||
2017 | case EVENT_MOV_TO_PMD: | ||
2018 | kvm_mov_to_pmd(vcpu, inst); | ||
2019 | break; | ||
2020 | case EVENT_MOV_TO_PKR: | ||
2021 | kvm_mov_to_pkr(vcpu, inst); | ||
2022 | break; | ||
2023 | case EVENT_MOV_FROM_DBR: | ||
2024 | kvm_mov_from_dbr(vcpu, inst); | ||
2025 | break; | ||
2026 | case EVENT_MOV_FROM_IBR: | ||
2027 | kvm_mov_from_ibr(vcpu, inst); | ||
2028 | break; | ||
2029 | case EVENT_MOV_FROM_PMC: | ||
2030 | kvm_mov_from_pmc(vcpu, inst); | ||
2031 | break; | ||
2032 | case EVENT_MOV_FROM_PKR: | ||
2033 | kvm_mov_from_pkr(vcpu, inst); | ||
2034 | break; | ||
2035 | case EVENT_MOV_FROM_CPUID: | ||
2036 | kvm_mov_from_cpuid(vcpu, inst); | ||
2037 | break; | ||
2038 | case EVENT_VMSW: | ||
2039 | status = IA64_FAULT; | ||
2040 | break; | ||
2041 | default: | ||
2042 | break; | ||
2043 | }; | ||
2044 | /*Assume all status is NO_FAULT ?*/ | ||
2045 | if (status == IA64_NO_FAULT && cause != EVENT_RFI) | ||
2046 | vcpu_increment_iip(vcpu); | ||
2047 | |||
2048 | recover_if_physical_mode(vcpu); | ||
2049 | } | ||
2050 | |||
2051 | void init_vcpu(struct kvm_vcpu *vcpu) | ||
2052 | { | ||
2053 | int i; | ||
2054 | |||
2055 | vcpu->arch.mode_flags = GUEST_IN_PHY; | ||
2056 | VMX(vcpu, vrr[0]) = 0x38; | ||
2057 | VMX(vcpu, vrr[1]) = 0x38; | ||
2058 | VMX(vcpu, vrr[2]) = 0x38; | ||
2059 | VMX(vcpu, vrr[3]) = 0x38; | ||
2060 | VMX(vcpu, vrr[4]) = 0x38; | ||
2061 | VMX(vcpu, vrr[5]) = 0x38; | ||
2062 | VMX(vcpu, vrr[6]) = 0x38; | ||
2063 | VMX(vcpu, vrr[7]) = 0x38; | ||
2064 | VCPU(vcpu, vpsr) = IA64_PSR_BN; | ||
2065 | VCPU(vcpu, dcr) = 0; | ||
2066 | /* pta.size must not be 0. The minimum is 15 (32k) */ | ||
2067 | VCPU(vcpu, pta) = 15 << 2; | ||
2068 | VCPU(vcpu, itv) = 0x10000; | ||
2069 | VCPU(vcpu, itm) = 0; | ||
2070 | VMX(vcpu, last_itc) = 0; | ||
2071 | |||
2072 | VCPU(vcpu, lid) = VCPU_LID(vcpu); | ||
2073 | VCPU(vcpu, ivr) = 0; | ||
2074 | VCPU(vcpu, tpr) = 0x10000; | ||
2075 | VCPU(vcpu, eoi) = 0; | ||
2076 | VCPU(vcpu, irr[0]) = 0; | ||
2077 | VCPU(vcpu, irr[1]) = 0; | ||
2078 | VCPU(vcpu, irr[2]) = 0; | ||
2079 | VCPU(vcpu, irr[3]) = 0; | ||
2080 | VCPU(vcpu, pmv) = 0x10000; | ||
2081 | VCPU(vcpu, cmcv) = 0x10000; | ||
2082 | VCPU(vcpu, lrr0) = 0x10000; /* default reset value? */ | ||
2083 | VCPU(vcpu, lrr1) = 0x10000; /* default reset value? */ | ||
2084 | update_vhpi(vcpu, NULL_VECTOR); | ||
2085 | VLSAPIC_XTP(vcpu) = 0x80; /* disabled */ | ||
2086 | |||
2087 | for (i = 0; i < 4; i++) | ||
2088 | VLSAPIC_INSVC(vcpu, i) = 0; | ||
2089 | } | ||
2090 | |||
2091 | void kvm_init_all_rr(struct kvm_vcpu *vcpu) | ||
2092 | { | ||
2093 | unsigned long psr; | ||
2094 | |||
2095 | local_irq_save(psr); | ||
2096 | |||
2097 | /* WARNING: not allow co-exist of both virtual mode and physical | ||
2098 | * mode in same region | ||
2099 | */ | ||
2100 | |||
2101 | vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0])); | ||
2102 | vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4])); | ||
2103 | |||
2104 | if (is_physical_mode(vcpu)) { | ||
2105 | if (vcpu->arch.mode_flags & GUEST_PHY_EMUL) | ||
2106 | panic_vm(vcpu); | ||
2107 | |||
2108 | ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0); | ||
2109 | ia64_dv_serialize_data(); | ||
2110 | ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4); | ||
2111 | ia64_dv_serialize_data(); | ||
2112 | } else { | ||
2113 | ia64_set_rr((VRN0 << VRN_SHIFT), | ||
2114 | vcpu->arch.metaphysical_saved_rr0); | ||
2115 | ia64_dv_serialize_data(); | ||
2116 | ia64_set_rr((VRN4 << VRN_SHIFT), | ||
2117 | vcpu->arch.metaphysical_saved_rr4); | ||
2118 | ia64_dv_serialize_data(); | ||
2119 | } | ||
2120 | ia64_set_rr((VRN1 << VRN_SHIFT), | ||
2121 | vrrtomrr(VMX(vcpu, vrr[VRN1]))); | ||
2122 | ia64_dv_serialize_data(); | ||
2123 | ia64_set_rr((VRN2 << VRN_SHIFT), | ||
2124 | vrrtomrr(VMX(vcpu, vrr[VRN2]))); | ||
2125 | ia64_dv_serialize_data(); | ||
2126 | ia64_set_rr((VRN3 << VRN_SHIFT), | ||
2127 | vrrtomrr(VMX(vcpu, vrr[VRN3]))); | ||
2128 | ia64_dv_serialize_data(); | ||
2129 | ia64_set_rr((VRN5 << VRN_SHIFT), | ||
2130 | vrrtomrr(VMX(vcpu, vrr[VRN5]))); | ||
2131 | ia64_dv_serialize_data(); | ||
2132 | ia64_set_rr((VRN7 << VRN_SHIFT), | ||
2133 | vrrtomrr(VMX(vcpu, vrr[VRN7]))); | ||
2134 | ia64_dv_serialize_data(); | ||
2135 | ia64_srlz_d(); | ||
2136 | ia64_set_psr(psr); | ||
2137 | } | ||
2138 | |||
2139 | int vmm_entry(void) | ||
2140 | { | ||
2141 | struct kvm_vcpu *v; | ||
2142 | v = current_vcpu; | ||
2143 | |||
2144 | ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd, | ||
2145 | 0, 0, 0, 0, 0, 0); | ||
2146 | kvm_init_vtlb(v); | ||
2147 | kvm_init_vhpt(v); | ||
2148 | init_vcpu(v); | ||
2149 | kvm_init_all_rr(v); | ||
2150 | vmm_reset_entry(); | ||
2151 | |||
2152 | return 0; | ||
2153 | } | ||
2154 | |||
2155 | void panic_vm(struct kvm_vcpu *v) | ||
2156 | { | ||
2157 | struct exit_ctl_data *p = &v->arch.exit_data; | ||
2158 | |||
2159 | p->exit_reason = EXIT_REASON_VM_PANIC; | ||
2160 | vmm_transition(v); | ||
2161 | /*Never to return*/ | ||
2162 | while (1); | ||
2163 | } | ||
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h new file mode 100644 index 000000000000..b0fcfb62c49e --- /dev/null +++ b/arch/ia64/kvm/vcpu.h | |||
@@ -0,0 +1,740 @@ | |||
1 | /* | ||
2 | * vcpu.h: vcpu routines | ||
3 | * Copyright (c) 2005, Intel Corporation. | ||
4 | * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) | ||
5 | * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) | ||
6 | * | ||
7 | * Copyright (c) 2007, Intel Corporation. | ||
8 | * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) | ||
9 | * Xiantao Zhang (xiantao.zhang@intel.com) | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms and conditions of the GNU General Public License, | ||
13 | * version 2, as published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
16 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
17 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
18 | * more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License along with | ||
21 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
22 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | |||
27 | #ifndef __KVM_VCPU_H__ | ||
28 | #define __KVM_VCPU_H__ | ||
29 | |||
30 | #include <asm/types.h> | ||
31 | #include <asm/fpu.h> | ||
32 | #include <asm/processor.h> | ||
33 | |||
34 | #ifndef __ASSEMBLY__ | ||
35 | #include "vti.h" | ||
36 | |||
37 | #include <linux/kvm_host.h> | ||
38 | #include <linux/spinlock.h> | ||
39 | |||
40 | typedef unsigned long IA64_INST; | ||
41 | |||
42 | typedef union U_IA64_BUNDLE { | ||
43 | unsigned long i64[2]; | ||
44 | struct { unsigned long template:5, slot0:41, slot1a:18, | ||
45 | slot1b:23, slot2:41; }; | ||
46 | /* NOTE: following doesn't work because bitfields can't cross natural | ||
47 | size boundaries | ||
48 | struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */ | ||
49 | } IA64_BUNDLE; | ||
50 | |||
51 | typedef union U_INST64_A5 { | ||
52 | IA64_INST inst; | ||
53 | struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5, | ||
54 | imm9d:9, s:1, major:4; }; | ||
55 | } INST64_A5; | ||
56 | |||
57 | typedef union U_INST64_B4 { | ||
58 | IA64_INST inst; | ||
59 | struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6, | ||
60 | wh:2, d:1, un1:1, major:4; }; | ||
61 | } INST64_B4; | ||
62 | |||
63 | typedef union U_INST64_B8 { | ||
64 | IA64_INST inst; | ||
65 | struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; }; | ||
66 | } INST64_B8; | ||
67 | |||
68 | typedef union U_INST64_B9 { | ||
69 | IA64_INST inst; | ||
70 | struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; }; | ||
71 | } INST64_B9; | ||
72 | |||
73 | typedef union U_INST64_I19 { | ||
74 | IA64_INST inst; | ||
75 | struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; }; | ||
76 | } INST64_I19; | ||
77 | |||
78 | typedef union U_INST64_I26 { | ||
79 | IA64_INST inst; | ||
80 | struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; }; | ||
81 | } INST64_I26; | ||
82 | |||
83 | typedef union U_INST64_I27 { | ||
84 | IA64_INST inst; | ||
85 | struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; }; | ||
86 | } INST64_I27; | ||
87 | |||
88 | typedef union U_INST64_I28 { /* not privileged (mov from AR) */ | ||
89 | IA64_INST inst; | ||
90 | struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; }; | ||
91 | } INST64_I28; | ||
92 | |||
93 | typedef union U_INST64_M28 { | ||
94 | IA64_INST inst; | ||
95 | struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; }; | ||
96 | } INST64_M28; | ||
97 | |||
98 | typedef union U_INST64_M29 { | ||
99 | IA64_INST inst; | ||
100 | struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; }; | ||
101 | } INST64_M29; | ||
102 | |||
103 | typedef union U_INST64_M30 { | ||
104 | IA64_INST inst; | ||
105 | struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2, | ||
106 | x3:3, s:1, major:4; }; | ||
107 | } INST64_M30; | ||
108 | |||
109 | typedef union U_INST64_M31 { | ||
110 | IA64_INST inst; | ||
111 | struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; }; | ||
112 | } INST64_M31; | ||
113 | |||
114 | typedef union U_INST64_M32 { | ||
115 | IA64_INST inst; | ||
116 | struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; }; | ||
117 | } INST64_M32; | ||
118 | |||
119 | typedef union U_INST64_M33 { | ||
120 | IA64_INST inst; | ||
121 | struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; }; | ||
122 | } INST64_M33; | ||
123 | |||
124 | typedef union U_INST64_M35 { | ||
125 | IA64_INST inst; | ||
126 | struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; }; | ||
127 | |||
128 | } INST64_M35; | ||
129 | |||
130 | typedef union U_INST64_M36 { | ||
131 | IA64_INST inst; | ||
132 | struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; }; | ||
133 | } INST64_M36; | ||
134 | |||
135 | typedef union U_INST64_M37 { | ||
136 | IA64_INST inst; | ||
137 | struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3, | ||
138 | i:1, major:4; }; | ||
139 | } INST64_M37; | ||
140 | |||
141 | typedef union U_INST64_M41 { | ||
142 | IA64_INST inst; | ||
143 | struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; }; | ||
144 | } INST64_M41; | ||
145 | |||
146 | typedef union U_INST64_M42 { | ||
147 | IA64_INST inst; | ||
148 | struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; }; | ||
149 | } INST64_M42; | ||
150 | |||
151 | typedef union U_INST64_M43 { | ||
152 | IA64_INST inst; | ||
153 | struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; }; | ||
154 | } INST64_M43; | ||
155 | |||
156 | typedef union U_INST64_M44 { | ||
157 | IA64_INST inst; | ||
158 | struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; }; | ||
159 | } INST64_M44; | ||
160 | |||
161 | typedef union U_INST64_M45 { | ||
162 | IA64_INST inst; | ||
163 | struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; }; | ||
164 | } INST64_M45; | ||
165 | |||
166 | typedef union U_INST64_M46 { | ||
167 | IA64_INST inst; | ||
168 | struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6, | ||
169 | x3:3, un1:1, major:4; }; | ||
170 | } INST64_M46; | ||
171 | |||
172 | typedef union U_INST64_M47 { | ||
173 | IA64_INST inst; | ||
174 | struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; }; | ||
175 | } INST64_M47; | ||
176 | |||
177 | typedef union U_INST64_M1{ | ||
178 | IA64_INST inst; | ||
179 | struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2, | ||
180 | x6:6, m:1, major:4; }; | ||
181 | } INST64_M1; | ||
182 | |||
183 | typedef union U_INST64_M2{ | ||
184 | IA64_INST inst; | ||
185 | struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2, | ||
186 | x6:6, m:1, major:4; }; | ||
187 | } INST64_M2; | ||
188 | |||
189 | typedef union U_INST64_M3{ | ||
190 | IA64_INST inst; | ||
191 | struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2, | ||
192 | x6:6, s:1, major:4; }; | ||
193 | } INST64_M3; | ||
194 | |||
195 | typedef union U_INST64_M4 { | ||
196 | IA64_INST inst; | ||
197 | struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2, | ||
198 | x6:6, m:1, major:4; }; | ||
199 | } INST64_M4; | ||
200 | |||
201 | typedef union U_INST64_M5 { | ||
202 | IA64_INST inst; | ||
203 | struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2, | ||
204 | x6:6, s:1, major:4; }; | ||
205 | } INST64_M5; | ||
206 | |||
207 | typedef union U_INST64_M6 { | ||
208 | IA64_INST inst; | ||
209 | struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2, | ||
210 | x6:6, m:1, major:4; }; | ||
211 | } INST64_M6; | ||
212 | |||
213 | typedef union U_INST64_M9 { | ||
214 | IA64_INST inst; | ||
215 | struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2, | ||
216 | x6:6, m:1, major:4; }; | ||
217 | } INST64_M9; | ||
218 | |||
219 | typedef union U_INST64_M10 { | ||
220 | IA64_INST inst; | ||
221 | struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2, | ||
222 | x6:6, s:1, major:4; }; | ||
223 | } INST64_M10; | ||
224 | |||
225 | typedef union U_INST64_M12 { | ||
226 | IA64_INST inst; | ||
227 | struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2, | ||
228 | x6:6, m:1, major:4; }; | ||
229 | } INST64_M12; | ||
230 | |||
231 | typedef union U_INST64_M15 { | ||
232 | IA64_INST inst; | ||
233 | struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2, | ||
234 | x6:6, s:1, major:4; }; | ||
235 | } INST64_M15; | ||
236 | |||
237 | typedef union U_INST64 { | ||
238 | IA64_INST inst; | ||
239 | struct { unsigned long :37, major:4; } generic; | ||
240 | INST64_A5 A5; /* used in build_hypercall_bundle only */ | ||
241 | INST64_B4 B4; /* used in build_hypercall_bundle only */ | ||
242 | INST64_B8 B8; /* rfi, bsw.[01] */ | ||
243 | INST64_B9 B9; /* break.b */ | ||
244 | INST64_I19 I19; /* used in build_hypercall_bundle only */ | ||
245 | INST64_I26 I26; /* mov register to ar (I unit) */ | ||
246 | INST64_I27 I27; /* mov immediate to ar (I unit) */ | ||
247 | INST64_I28 I28; /* mov from ar (I unit) */ | ||
248 | INST64_M1 M1; /* ld integer */ | ||
249 | INST64_M2 M2; | ||
250 | INST64_M3 M3; | ||
251 | INST64_M4 M4; /* st integer */ | ||
252 | INST64_M5 M5; | ||
253 | INST64_M6 M6; /* ldfd floating pointer */ | ||
254 | INST64_M9 M9; /* stfd floating pointer */ | ||
255 | INST64_M10 M10; /* stfd floating pointer */ | ||
256 | INST64_M12 M12; /* ldfd pair floating pointer */ | ||
257 | INST64_M15 M15; /* lfetch + imm update */ | ||
258 | INST64_M28 M28; /* purge translation cache entry */ | ||
259 | INST64_M29 M29; /* mov register to ar (M unit) */ | ||
260 | INST64_M30 M30; /* mov immediate to ar (M unit) */ | ||
261 | INST64_M31 M31; /* mov from ar (M unit) */ | ||
262 | INST64_M32 M32; /* mov reg to cr */ | ||
263 | INST64_M33 M33; /* mov from cr */ | ||
264 | INST64_M35 M35; /* mov to psr */ | ||
265 | INST64_M36 M36; /* mov from psr */ | ||
266 | INST64_M37 M37; /* break.m */ | ||
267 | INST64_M41 M41; /* translation cache insert */ | ||
268 | INST64_M42 M42; /* mov to indirect reg/translation reg insert*/ | ||
269 | INST64_M43 M43; /* mov from indirect reg */ | ||
270 | INST64_M44 M44; /* set/reset system mask */ | ||
271 | INST64_M45 M45; /* translation purge */ | ||
272 | INST64_M46 M46; /* translation access (tpa,tak) */ | ||
273 | INST64_M47 M47; /* purge translation entry */ | ||
274 | } INST64; | ||
275 | |||
276 | #define MASK_41 ((unsigned long)0x1ffffffffff) | ||
277 | |||
278 | /* Virtual address memory attributes encoding */ | ||
279 | #define VA_MATTR_WB 0x0 | ||
280 | #define VA_MATTR_UC 0x4 | ||
281 | #define VA_MATTR_UCE 0x5 | ||
282 | #define VA_MATTR_WC 0x6 | ||
283 | #define VA_MATTR_NATPAGE 0x7 | ||
284 | |||
285 | #define PMASK(size) (~((size) - 1)) | ||
286 | #define PSIZE(size) (1UL<<(size)) | ||
287 | #define CLEARLSB(ppn, nbits) (((ppn) >> (nbits)) << (nbits)) | ||
288 | #define PAGEALIGN(va, ps) CLEARLSB(va, ps) | ||
289 | #define PAGE_FLAGS_RV_MASK (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53)) | ||
290 | #define _PAGE_MA_ST (0x1 << 2) /* is reserved for software use */ | ||
291 | |||
292 | #define ARCH_PAGE_SHIFT 12 | ||
293 | |||
294 | #define INVALID_TI_TAG (1UL << 63) | ||
295 | |||
296 | #define VTLB_PTE_P_BIT 0 | ||
297 | #define VTLB_PTE_IO_BIT 60 | ||
298 | #define VTLB_PTE_IO (1UL<<VTLB_PTE_IO_BIT) | ||
299 | #define VTLB_PTE_P (1UL<<VTLB_PTE_P_BIT) | ||
300 | |||
301 | #define vcpu_quick_region_check(_tr_regions,_ifa) \ | ||
302 | (_tr_regions & (1 << ((unsigned long)_ifa >> 61))) | ||
303 | |||
304 | #define vcpu_quick_region_set(_tr_regions,_ifa) \ | ||
305 | do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0) | ||
306 | |||
307 | static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir, | ||
308 | u64 va, u64 rid) | ||
309 | { | ||
310 | trp->page_flags = pte; | ||
311 | trp->itir = itir; | ||
312 | trp->vadr = va; | ||
313 | trp->rid = rid; | ||
314 | } | ||
315 | |||
316 | extern u64 kvm_lookup_mpa(u64 gpfn); | ||
317 | extern u64 kvm_gpa_to_mpa(u64 gpa); | ||
318 | |||
319 | /* Return I/O type if trye */ | ||
320 | #define __gpfn_is_io(gpfn) \ | ||
321 | ({ \ | ||
322 | u64 pte, ret = 0; \ | ||
323 | pte = kvm_lookup_mpa(gpfn); \ | ||
324 | if (!(pte & GPFN_INV_MASK)) \ | ||
325 | ret = pte & GPFN_IO_MASK; \ | ||
326 | ret; \ | ||
327 | }) | ||
328 | |||
329 | #endif | ||
330 | |||
331 | #define IA64_NO_FAULT 0 | ||
332 | #define IA64_FAULT 1 | ||
333 | |||
334 | #define VMM_RBS_OFFSET ((VMM_TASK_SIZE + 15) & ~15) | ||
335 | |||
336 | #define SW_BAD 0 /* Bad mode transitition */ | ||
337 | #define SW_V2P 1 /* Physical emulatino is activated */ | ||
338 | #define SW_P2V 2 /* Exit physical mode emulation */ | ||
339 | #define SW_SELF 3 /* No mode transition */ | ||
340 | #define SW_NOP 4 /* Mode transition, but without action required */ | ||
341 | |||
342 | #define GUEST_IN_PHY 0x1 | ||
343 | #define GUEST_PHY_EMUL 0x2 | ||
344 | |||
345 | #define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP)) | ||
346 | |||
347 | #define VRN_SHIFT 61 | ||
348 | #define VRN_MASK 0xe000000000000000 | ||
349 | #define VRN0 0x0UL | ||
350 | #define VRN1 0x1UL | ||
351 | #define VRN2 0x2UL | ||
352 | #define VRN3 0x3UL | ||
353 | #define VRN4 0x4UL | ||
354 | #define VRN5 0x5UL | ||
355 | #define VRN6 0x6UL | ||
356 | #define VRN7 0x7UL | ||
357 | |||
358 | #define IRQ_NO_MASKED 0 | ||
359 | #define IRQ_MASKED_BY_VTPR 1 | ||
360 | #define IRQ_MASKED_BY_INSVC 2 /* masked by inservice IRQ */ | ||
361 | |||
362 | #define PTA_BASE_SHIFT 15 | ||
363 | |||
364 | #define IA64_PSR_VM_BIT 46 | ||
365 | #define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT) | ||
366 | |||
367 | /* Interruption Function State */ | ||
368 | #define IA64_IFS_V_BIT 63 | ||
369 | #define IA64_IFS_V (__IA64_UL(1) << IA64_IFS_V_BIT) | ||
370 | |||
371 | #define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX) | ||
372 | #define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX) | ||
373 | |||
374 | #ifndef __ASSEMBLY__ | ||
375 | |||
376 | #include <asm/gcc_intrin.h> | ||
377 | |||
378 | #define is_physical_mode(v) \ | ||
379 | ((v->arch.mode_flags) & GUEST_IN_PHY) | ||
380 | |||
381 | #define is_virtual_mode(v) \ | ||
382 | (!is_physical_mode(v)) | ||
383 | |||
384 | #define MODE_IND(psr) \ | ||
385 | (((psr).it << 2) + ((psr).dt << 1) + (psr).rt) | ||
386 | |||
387 | #define _vmm_raw_spin_lock(x) \ | ||
388 | do { \ | ||
389 | __u32 *ia64_spinlock_ptr = (__u32 *) (x); \ | ||
390 | __u64 ia64_spinlock_val; \ | ||
391 | ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\ | ||
392 | if (unlikely(ia64_spinlock_val)) { \ | ||
393 | do { \ | ||
394 | while (*ia64_spinlock_ptr) \ | ||
395 | ia64_barrier(); \ | ||
396 | ia64_spinlock_val = \ | ||
397 | ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\ | ||
398 | } while (ia64_spinlock_val); \ | ||
399 | } \ | ||
400 | } while (0) | ||
401 | |||
402 | #define _vmm_raw_spin_unlock(x) \ | ||
403 | do { barrier(); \ | ||
404 | ((spinlock_t *)x)->raw_lock.lock = 0; } \ | ||
405 | while (0) | ||
406 | |||
407 | void vmm_spin_lock(spinlock_t *lock); | ||
408 | void vmm_spin_unlock(spinlock_t *lock); | ||
409 | enum { | ||
410 | I_TLB = 1, | ||
411 | D_TLB = 2 | ||
412 | }; | ||
413 | |||
414 | union kvm_va { | ||
415 | struct { | ||
416 | unsigned long off : 60; /* intra-region offset */ | ||
417 | unsigned long reg : 4; /* region number */ | ||
418 | } f; | ||
419 | unsigned long l; | ||
420 | void *p; | ||
421 | }; | ||
422 | |||
423 | #define __kvm_pa(x) ({union kvm_va _v; _v.l = (long) (x); \ | ||
424 | _v.f.reg = 0; _v.l; }) | ||
425 | #define __kvm_va(x) ({union kvm_va _v; _v.l = (long) (x); \ | ||
426 | _v.f.reg = -1; _v.p; }) | ||
427 | |||
428 | #define _REGION_ID(x) ({union ia64_rr _v; _v.val = (long)(x); \ | ||
429 | _v.rid; }) | ||
430 | #define _REGION_PAGE_SIZE(x) ({union ia64_rr _v; _v.val = (long)(x); \ | ||
431 | _v.ps; }) | ||
432 | #define _REGION_HW_WALKER(x) ({union ia64_rr _v; _v.val = (long)(x); \ | ||
433 | _v.ve; }) | ||
434 | |||
435 | enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF }; | ||
436 | enum tlb_miss_type { INSTRUCTION, DATA, REGISTER }; | ||
437 | |||
438 | #define VCPU(_v, _x) ((_v)->arch.vpd->_x) | ||
439 | #define VMX(_v, _x) ((_v)->arch._x) | ||
440 | |||
441 | #define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i]) | ||
442 | #define VLSAPIC_XTP(_v) VMX(_v, xtp) | ||
443 | |||
444 | static inline unsigned long itir_ps(unsigned long itir) | ||
445 | { | ||
446 | return ((itir >> 2) & 0x3f); | ||
447 | } | ||
448 | |||
449 | |||
450 | /************************************************************************** | ||
451 | VCPU control register access routines | ||
452 | **************************************************************************/ | ||
453 | |||
454 | static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu) | ||
455 | { | ||
456 | return ((u64)VCPU(vcpu, itir)); | ||
457 | } | ||
458 | |||
459 | static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val) | ||
460 | { | ||
461 | VCPU(vcpu, itir) = val; | ||
462 | } | ||
463 | |||
464 | static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu) | ||
465 | { | ||
466 | return ((u64)VCPU(vcpu, ifa)); | ||
467 | } | ||
468 | |||
469 | static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val) | ||
470 | { | ||
471 | VCPU(vcpu, ifa) = val; | ||
472 | } | ||
473 | |||
474 | static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu) | ||
475 | { | ||
476 | return ((u64)VCPU(vcpu, iva)); | ||
477 | } | ||
478 | |||
479 | static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu) | ||
480 | { | ||
481 | return ((u64)VCPU(vcpu, pta)); | ||
482 | } | ||
483 | |||
484 | static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu) | ||
485 | { | ||
486 | return ((u64)VCPU(vcpu, lid)); | ||
487 | } | ||
488 | |||
489 | static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu) | ||
490 | { | ||
491 | return ((u64)VCPU(vcpu, tpr)); | ||
492 | } | ||
493 | |||
494 | static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu) | ||
495 | { | ||
496 | return (0UL); /*reads of eoi always return 0 */ | ||
497 | } | ||
498 | |||
499 | static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu) | ||
500 | { | ||
501 | return ((u64)VCPU(vcpu, irr[0])); | ||
502 | } | ||
503 | |||
504 | static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu) | ||
505 | { | ||
506 | return ((u64)VCPU(vcpu, irr[1])); | ||
507 | } | ||
508 | |||
509 | static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu) | ||
510 | { | ||
511 | return ((u64)VCPU(vcpu, irr[2])); | ||
512 | } | ||
513 | |||
514 | static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu) | ||
515 | { | ||
516 | return ((u64)VCPU(vcpu, irr[3])); | ||
517 | } | ||
518 | |||
519 | static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val) | ||
520 | { | ||
521 | ia64_setreg(_IA64_REG_CR_DCR, val); | ||
522 | } | ||
523 | |||
524 | static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val) | ||
525 | { | ||
526 | VCPU(vcpu, isr) = val; | ||
527 | } | ||
528 | |||
529 | static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val) | ||
530 | { | ||
531 | VCPU(vcpu, lid) = val; | ||
532 | } | ||
533 | |||
534 | static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val) | ||
535 | { | ||
536 | VCPU(vcpu, ipsr) = val; | ||
537 | } | ||
538 | |||
539 | static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val) | ||
540 | { | ||
541 | VCPU(vcpu, iip) = val; | ||
542 | } | ||
543 | |||
544 | static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val) | ||
545 | { | ||
546 | VCPU(vcpu, ifs) = val; | ||
547 | } | ||
548 | |||
549 | static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val) | ||
550 | { | ||
551 | VCPU(vcpu, iipa) = val; | ||
552 | } | ||
553 | |||
554 | static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val) | ||
555 | { | ||
556 | VCPU(vcpu, iha) = val; | ||
557 | } | ||
558 | |||
559 | |||
560 | static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg) | ||
561 | { | ||
562 | return vcpu->arch.vrr[reg>>61]; | ||
563 | } | ||
564 | |||
565 | /************************************************************************** | ||
566 | VCPU debug breakpoint register access routines | ||
567 | **************************************************************************/ | ||
568 | |||
569 | static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val) | ||
570 | { | ||
571 | __ia64_set_dbr(reg, val); | ||
572 | } | ||
573 | |||
574 | static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val) | ||
575 | { | ||
576 | ia64_set_ibr(reg, val); | ||
577 | } | ||
578 | |||
579 | static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg) | ||
580 | { | ||
581 | return ((u64)__ia64_get_dbr(reg)); | ||
582 | } | ||
583 | |||
584 | static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg) | ||
585 | { | ||
586 | return ((u64)ia64_get_ibr(reg)); | ||
587 | } | ||
588 | |||
589 | /************************************************************************** | ||
590 | VCPU performance monitor register access routines | ||
591 | **************************************************************************/ | ||
592 | static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val) | ||
593 | { | ||
594 | /* NOTE: Writes to unimplemented PMC registers are discarded */ | ||
595 | ia64_set_pmc(reg, val); | ||
596 | } | ||
597 | |||
598 | static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val) | ||
599 | { | ||
600 | /* NOTE: Writes to unimplemented PMD registers are discarded */ | ||
601 | ia64_set_pmd(reg, val); | ||
602 | } | ||
603 | |||
604 | static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg) | ||
605 | { | ||
606 | /* NOTE: Reads from unimplemented PMC registers return zero */ | ||
607 | return ((u64)ia64_get_pmc(reg)); | ||
608 | } | ||
609 | |||
610 | static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg) | ||
611 | { | ||
612 | /* NOTE: Reads from unimplemented PMD registers return zero */ | ||
613 | return ((u64)ia64_get_pmd(reg)); | ||
614 | } | ||
615 | |||
616 | static inline unsigned long vrrtomrr(unsigned long val) | ||
617 | { | ||
618 | union ia64_rr rr; | ||
619 | rr.val = val; | ||
620 | rr.rid = (rr.rid << 4) | 0xe; | ||
621 | if (rr.ps > PAGE_SHIFT) | ||
622 | rr.ps = PAGE_SHIFT; | ||
623 | rr.ve = 1; | ||
624 | return rr.val; | ||
625 | } | ||
626 | |||
627 | |||
628 | static inline int highest_bits(int *dat) | ||
629 | { | ||
630 | u32 bits, bitnum; | ||
631 | int i; | ||
632 | |||
633 | /* loop for all 256 bits */ | ||
634 | for (i = 7; i >= 0 ; i--) { | ||
635 | bits = dat[i]; | ||
636 | if (bits) { | ||
637 | bitnum = fls(bits); | ||
638 | return i * 32 + bitnum - 1; | ||
639 | } | ||
640 | } | ||
641 | return NULL_VECTOR; | ||
642 | } | ||
643 | |||
644 | /* | ||
645 | * The pending irq is higher than the inservice one. | ||
646 | * | ||
647 | */ | ||
648 | static inline int is_higher_irq(int pending, int inservice) | ||
649 | { | ||
650 | return ((pending > inservice) | ||
651 | || ((pending != NULL_VECTOR) | ||
652 | && (inservice == NULL_VECTOR))); | ||
653 | } | ||
654 | |||
655 | static inline int is_higher_class(int pending, int mic) | ||
656 | { | ||
657 | return ((pending >> 4) > mic); | ||
658 | } | ||
659 | |||
660 | /* | ||
661 | * Return 0-255 for pending irq. | ||
662 | * NULL_VECTOR: when no pending. | ||
663 | */ | ||
664 | static inline int highest_pending_irq(struct kvm_vcpu *vcpu) | ||
665 | { | ||
666 | if (VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR)) | ||
667 | return NMI_VECTOR; | ||
668 | if (VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR)) | ||
669 | return ExtINT_VECTOR; | ||
670 | |||
671 | return highest_bits((int *)&VCPU(vcpu, irr[0])); | ||
672 | } | ||
673 | |||
674 | static inline int highest_inservice_irq(struct kvm_vcpu *vcpu) | ||
675 | { | ||
676 | if (VMX(vcpu, insvc[0]) & (1UL<<NMI_VECTOR)) | ||
677 | return NMI_VECTOR; | ||
678 | if (VMX(vcpu, insvc[0]) & (1UL<<ExtINT_VECTOR)) | ||
679 | return ExtINT_VECTOR; | ||
680 | |||
681 | return highest_bits((int *)&(VMX(vcpu, insvc[0]))); | ||
682 | } | ||
683 | |||
684 | extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, u64 reg, | ||
685 | struct ia64_fpreg *val); | ||
686 | extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, u64 reg, | ||
687 | struct ia64_fpreg *val); | ||
688 | extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, u64 reg); | ||
689 | extern void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 val, int nat); | ||
690 | extern u64 vcpu_get_psr(struct kvm_vcpu *vcpu); | ||
691 | extern void vcpu_set_psr(struct kvm_vcpu *vcpu, u64 val); | ||
692 | extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr); | ||
693 | extern void vcpu_bsw0(struct kvm_vcpu *vcpu); | ||
694 | extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, | ||
695 | u64 itir, u64 va, int type); | ||
696 | extern struct thash_data *vhpt_lookup(u64 va); | ||
697 | extern u64 guest_vhpt_lookup(u64 iha, u64 *pte); | ||
698 | extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps); | ||
699 | extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps); | ||
700 | extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va); | ||
701 | extern int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, | ||
702 | u64 itir, u64 ifa, int type); | ||
703 | extern void thash_purge_all(struct kvm_vcpu *v); | ||
704 | extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v, | ||
705 | u64 va, int is_data); | ||
706 | extern int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, | ||
707 | u64 ps, int is_data); | ||
708 | |||
709 | extern void vcpu_increment_iip(struct kvm_vcpu *v); | ||
710 | extern void vcpu_decrement_iip(struct kvm_vcpu *vcpu); | ||
711 | extern void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec); | ||
712 | extern void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec); | ||
713 | extern void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr); | ||
714 | extern void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr); | ||
715 | extern void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr); | ||
716 | extern void nested_dtlb(struct kvm_vcpu *vcpu); | ||
717 | extern void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr); | ||
718 | extern int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref); | ||
719 | |||
720 | extern void update_vhpi(struct kvm_vcpu *vcpu, int vec); | ||
721 | extern int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice); | ||
722 | |||
723 | extern int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle); | ||
724 | extern void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma); | ||
725 | extern void vmm_transition(struct kvm_vcpu *vcpu); | ||
726 | extern void vmm_trampoline(union context *from, union context *to); | ||
727 | extern int vmm_entry(void); | ||
728 | extern u64 vcpu_get_itc(struct kvm_vcpu *vcpu); | ||
729 | |||
730 | extern void vmm_reset_entry(void); | ||
731 | void kvm_init_vtlb(struct kvm_vcpu *v); | ||
732 | void kvm_init_vhpt(struct kvm_vcpu *v); | ||
733 | void thash_init(struct thash_cb *hcb, u64 sz); | ||
734 | |||
735 | void panic_vm(struct kvm_vcpu *v); | ||
736 | |||
737 | extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3, | ||
738 | u64 arg4, u64 arg5, u64 arg6, u64 arg7); | ||
739 | #endif | ||
740 | #endif /* __VCPU_H__ */ | ||
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c new file mode 100644 index 000000000000..2275bf4e681a --- /dev/null +++ b/arch/ia64/kvm/vmm.c | |||
@@ -0,0 +1,66 @@ | |||
1 | /* | ||
2 | * vmm.c: vmm module interface with kvm module | ||
3 | * | ||
4 | * Copyright (c) 2007, Intel Corporation. | ||
5 | * | ||
6 | * Xiantao Zhang (xiantao.zhang@intel.com) | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
19 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
20 | */ | ||
21 | |||
22 | |||
23 | #include<linux/module.h> | ||
24 | #include<asm/fpswa.h> | ||
25 | |||
26 | #include "vcpu.h" | ||
27 | |||
28 | MODULE_AUTHOR("Intel"); | ||
29 | MODULE_LICENSE("GPL"); | ||
30 | |||
31 | extern char kvm_ia64_ivt; | ||
32 | extern fpswa_interface_t *vmm_fpswa_interface; | ||
33 | |||
34 | struct kvm_vmm_info vmm_info = { | ||
35 | .module = THIS_MODULE, | ||
36 | .vmm_entry = vmm_entry, | ||
37 | .tramp_entry = vmm_trampoline, | ||
38 | .vmm_ivt = (unsigned long)&kvm_ia64_ivt, | ||
39 | }; | ||
40 | |||
41 | static int __init kvm_vmm_init(void) | ||
42 | { | ||
43 | |||
44 | vmm_fpswa_interface = fpswa_interface; | ||
45 | |||
46 | /*Register vmm data to kvm side*/ | ||
47 | return kvm_init(&vmm_info, 1024, THIS_MODULE); | ||
48 | } | ||
49 | |||
50 | static void __exit kvm_vmm_exit(void) | ||
51 | { | ||
52 | kvm_exit(); | ||
53 | return ; | ||
54 | } | ||
55 | |||
56 | void vmm_spin_lock(spinlock_t *lock) | ||
57 | { | ||
58 | _vmm_raw_spin_lock(lock); | ||
59 | } | ||
60 | |||
61 | void vmm_spin_unlock(spinlock_t *lock) | ||
62 | { | ||
63 | _vmm_raw_spin_unlock(lock); | ||
64 | } | ||
65 | module_init(kvm_vmm_init) | ||
66 | module_exit(kvm_vmm_exit) | ||
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S new file mode 100644 index 000000000000..3ee5f481c06d --- /dev/null +++ b/arch/ia64/kvm/vmm_ivt.S | |||
@@ -0,0 +1,1424 @@ | |||
1 | /* | ||
2 | * /ia64/kvm_ivt.S | ||
3 | * | ||
4 | * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co | ||
5 | * Stephane Eranian <eranian@hpl.hp.com> | ||
6 | * David Mosberger <davidm@hpl.hp.com> | ||
7 | * Copyright (C) 2000, 2002-2003 Intel Co | ||
8 | * Asit Mallick <asit.k.mallick@intel.com> | ||
9 | * Suresh Siddha <suresh.b.siddha@intel.com> | ||
10 | * Kenneth Chen <kenneth.w.chen@intel.com> | ||
11 | * Fenghua Yu <fenghua.yu@intel.com> | ||
12 | * | ||
13 | * | ||
14 | * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling | ||
15 | * for SMP | ||
16 | * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB | ||
17 | * handler now uses virtual PT. | ||
18 | * | ||
19 | * 07/6/20 Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) | ||
20 | * Supporting Intel virtualization architecture | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | /* | ||
25 | * This file defines the interruption vector table used by the CPU. | ||
26 | * It does not include one entry per possible cause of interruption. | ||
27 | * | ||
28 | * The first 20 entries of the table contain 64 bundles each while the | ||
29 | * remaining 48 entries contain only 16 bundles each. | ||
30 | * | ||
31 | * The 64 bundles are used to allow inlining the whole handler for | ||
32 | * critical | ||
33 | * interruptions like TLB misses. | ||
34 | * | ||
35 | * For each entry, the comment is as follows: | ||
36 | * | ||
37 | * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss | ||
38 | * (12,51) | ||
39 | * entry offset ----/ / / / | ||
40 | * / | ||
41 | * entry number ---------/ / / | ||
42 | * / | ||
43 | * size of the entry -------------/ / | ||
44 | * / | ||
45 | * vector name -------------------------------------/ | ||
46 | * / | ||
47 | * interruptions triggering this vector | ||
48 | * ----------------------/ | ||
49 | * | ||
50 | * The table is 32KB in size and must be aligned on 32KB | ||
51 | * boundary. | ||
52 | * (The CPU ignores the 15 lower bits of the address) | ||
53 | * | ||
54 | * Table is based upon EAS2.6 (Oct 1999) | ||
55 | */ | ||
56 | |||
57 | |||
58 | #include <asm/asmmacro.h> | ||
59 | #include <asm/cache.h> | ||
60 | #include <asm/pgtable.h> | ||
61 | |||
62 | #include "asm-offsets.h" | ||
63 | #include "vcpu.h" | ||
64 | #include "kvm_minstate.h" | ||
65 | #include "vti.h" | ||
66 | |||
67 | #if 1 | ||
68 | # define PSR_DEFAULT_BITS psr.ac | ||
69 | #else | ||
70 | # define PSR_DEFAULT_BITS 0 | ||
71 | #endif | ||
72 | |||
73 | |||
74 | #define KVM_FAULT(n) \ | ||
75 | kvm_fault_##n:; \ | ||
76 | mov r19=n;; \ | ||
77 | br.sptk.many kvm_fault_##n; \ | ||
78 | ;; \ | ||
79 | |||
80 | |||
81 | #define KVM_REFLECT(n) \ | ||
82 | mov r31=pr; \ | ||
83 | mov r19=n; /* prepare to save predicates */ \ | ||
84 | mov r29=cr.ipsr; \ | ||
85 | ;; \ | ||
86 | tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ | ||
87 | (p7)br.sptk.many kvm_dispatch_reflection; \ | ||
88 | br.sptk.many kvm_panic; \ | ||
89 | |||
90 | |||
91 | GLOBAL_ENTRY(kvm_panic) | ||
92 | br.sptk.many kvm_panic | ||
93 | ;; | ||
94 | END(kvm_panic) | ||
95 | |||
96 | |||
97 | |||
98 | |||
99 | |||
100 | .section .text.ivt,"ax" | ||
101 | |||
102 | .align 32768 // align on 32KB boundary | ||
103 | .global kvm_ia64_ivt | ||
104 | kvm_ia64_ivt: | ||
105 | /////////////////////////////////////////////////////////////// | ||
106 | // 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) | ||
107 | ENTRY(kvm_vhpt_miss) | ||
108 | KVM_FAULT(0) | ||
109 | END(kvm_vhpt_miss) | ||
110 | |||
111 | |||
112 | .org kvm_ia64_ivt+0x400 | ||
113 | //////////////////////////////////////////////////////////////// | ||
114 | // 0x0400 Entry 1 (size 64 bundles) ITLB (21) | ||
115 | ENTRY(kvm_itlb_miss) | ||
116 | mov r31 = pr | ||
117 | mov r29=cr.ipsr; | ||
118 | ;; | ||
119 | tbit.z p6,p7=r29,IA64_PSR_VM_BIT; | ||
120 | (p6) br.sptk kvm_alt_itlb_miss | ||
121 | mov r19 = 1 | ||
122 | br.sptk kvm_itlb_miss_dispatch | ||
123 | KVM_FAULT(1); | ||
124 | END(kvm_itlb_miss) | ||
125 | |||
126 | .org kvm_ia64_ivt+0x0800 | ||
127 | ////////////////////////////////////////////////////////////////// | ||
128 | // 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) | ||
129 | ENTRY(kvm_dtlb_miss) | ||
130 | mov r31 = pr | ||
131 | mov r29=cr.ipsr; | ||
132 | ;; | ||
133 | tbit.z p6,p7=r29,IA64_PSR_VM_BIT; | ||
134 | (p6)br.sptk kvm_alt_dtlb_miss | ||
135 | br.sptk kvm_dtlb_miss_dispatch | ||
136 | END(kvm_dtlb_miss) | ||
137 | |||
138 | .org kvm_ia64_ivt+0x0c00 | ||
139 | //////////////////////////////////////////////////////////////////// | ||
140 | // 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) | ||
141 | ENTRY(kvm_alt_itlb_miss) | ||
142 | mov r16=cr.ifa // get address that caused the TLB miss | ||
143 | ;; | ||
144 | movl r17=PAGE_KERNEL | ||
145 | mov r24=cr.ipsr | ||
146 | movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) | ||
147 | ;; | ||
148 | and r19=r19,r16 // clear ed, reserved bits, and PTE control bits | ||
149 | ;; | ||
150 | or r19=r17,r19 // insert PTE control bits into r19 | ||
151 | ;; | ||
152 | movl r20=IA64_GRANULE_SHIFT<<2 | ||
153 | ;; | ||
154 | mov cr.itir=r20 | ||
155 | ;; | ||
156 | itc.i r19 // insert the TLB entry | ||
157 | mov pr=r31,-1 | ||
158 | rfi | ||
159 | END(kvm_alt_itlb_miss) | ||
160 | |||
161 | .org kvm_ia64_ivt+0x1000 | ||
162 | ///////////////////////////////////////////////////////////////////// | ||
163 | // 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) | ||
164 | ENTRY(kvm_alt_dtlb_miss) | ||
165 | mov r16=cr.ifa // get address that caused the TLB miss | ||
166 | ;; | ||
167 | movl r17=PAGE_KERNEL | ||
168 | movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) | ||
169 | mov r24=cr.ipsr | ||
170 | ;; | ||
171 | and r19=r19,r16 // clear ed, reserved bits, and PTE control bits | ||
172 | ;; | ||
173 | or r19=r19,r17 // insert PTE control bits into r19 | ||
174 | ;; | ||
175 | movl r20=IA64_GRANULE_SHIFT<<2 | ||
176 | ;; | ||
177 | mov cr.itir=r20 | ||
178 | ;; | ||
179 | itc.d r19 // insert the TLB entry | ||
180 | mov pr=r31,-1 | ||
181 | rfi | ||
182 | END(kvm_alt_dtlb_miss) | ||
183 | |||
184 | .org kvm_ia64_ivt+0x1400 | ||
185 | ////////////////////////////////////////////////////////////////////// | ||
186 | // 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) | ||
187 | ENTRY(kvm_nested_dtlb_miss) | ||
188 | KVM_FAULT(5) | ||
189 | END(kvm_nested_dtlb_miss) | ||
190 | |||
191 | .org kvm_ia64_ivt+0x1800 | ||
192 | ///////////////////////////////////////////////////////////////////// | ||
193 | // 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) | ||
194 | ENTRY(kvm_ikey_miss) | ||
195 | KVM_REFLECT(6) | ||
196 | END(kvm_ikey_miss) | ||
197 | |||
198 | .org kvm_ia64_ivt+0x1c00 | ||
199 | ///////////////////////////////////////////////////////////////////// | ||
200 | // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) | ||
201 | ENTRY(kvm_dkey_miss) | ||
202 | KVM_REFLECT(7) | ||
203 | END(kvm_dkey_miss) | ||
204 | |||
205 | .org kvm_ia64_ivt+0x2000 | ||
206 | //////////////////////////////////////////////////////////////////// | ||
207 | // 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) | ||
208 | ENTRY(kvm_dirty_bit) | ||
209 | KVM_REFLECT(8) | ||
210 | END(kvm_dirty_bit) | ||
211 | |||
212 | .org kvm_ia64_ivt+0x2400 | ||
213 | //////////////////////////////////////////////////////////////////// | ||
214 | // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) | ||
215 | ENTRY(kvm_iaccess_bit) | ||
216 | KVM_REFLECT(9) | ||
217 | END(kvm_iaccess_bit) | ||
218 | |||
219 | .org kvm_ia64_ivt+0x2800 | ||
220 | /////////////////////////////////////////////////////////////////// | ||
221 | // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) | ||
222 | ENTRY(kvm_daccess_bit) | ||
223 | KVM_REFLECT(10) | ||
224 | END(kvm_daccess_bit) | ||
225 | |||
226 | .org kvm_ia64_ivt+0x2c00 | ||
227 | ///////////////////////////////////////////////////////////////// | ||
228 | // 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) | ||
229 | ENTRY(kvm_break_fault) | ||
230 | mov r31=pr | ||
231 | mov r19=11 | ||
232 | mov r29=cr.ipsr | ||
233 | ;; | ||
234 | KVM_SAVE_MIN_WITH_COVER_R19 | ||
235 | ;; | ||
236 | alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!) | ||
237 | mov out0=cr.ifa | ||
238 | mov out2=cr.isr // FIXME: pity to make this slow access twice | ||
239 | mov out3=cr.iim // FIXME: pity to make this slow access twice | ||
240 | adds r3=8,r2 // set up second base pointer | ||
241 | ;; | ||
242 | ssm psr.ic | ||
243 | ;; | ||
244 | srlz.i // guarantee that interruption collection is on | ||
245 | ;; | ||
246 | //(p15)ssm psr.i // restore psr.i | ||
247 | addl r14=@gprel(ia64_leave_hypervisor),gp | ||
248 | ;; | ||
249 | KVM_SAVE_REST | ||
250 | mov rp=r14 | ||
251 | ;; | ||
252 | adds out1=16,sp | ||
253 | br.call.sptk.many b6=kvm_ia64_handle_break | ||
254 | ;; | ||
255 | END(kvm_break_fault) | ||
256 | |||
257 | .org kvm_ia64_ivt+0x3000 | ||
258 | ///////////////////////////////////////////////////////////////// | ||
259 | // 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) | ||
260 | ENTRY(kvm_interrupt) | ||
261 | mov r31=pr // prepare to save predicates | ||
262 | mov r19=12 | ||
263 | mov r29=cr.ipsr | ||
264 | ;; | ||
265 | tbit.z p6,p7=r29,IA64_PSR_VM_BIT | ||
266 | tbit.z p0,p15=r29,IA64_PSR_I_BIT | ||
267 | ;; | ||
268 | (p7) br.sptk kvm_dispatch_interrupt | ||
269 | ;; | ||
270 | mov r27=ar.rsc /* M */ | ||
271 | mov r20=r1 /* A */ | ||
272 | mov r25=ar.unat /* M */ | ||
273 | mov r26=ar.pfs /* I */ | ||
274 | mov r28=cr.iip /* M */ | ||
275 | cover /* B (or nothing) */ | ||
276 | ;; | ||
277 | mov r1=sp | ||
278 | ;; | ||
279 | invala /* M */ | ||
280 | mov r30=cr.ifs | ||
281 | ;; | ||
282 | addl r1=-VMM_PT_REGS_SIZE,r1 | ||
283 | ;; | ||
284 | adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ | ||
285 | adds r16=PT(CR_IPSR),r1 | ||
286 | ;; | ||
287 | lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES | ||
288 | st8 [r16]=r29 /* save cr.ipsr */ | ||
289 | ;; | ||
290 | lfetch.fault.excl.nt1 [r17] | ||
291 | mov r29=b0 | ||
292 | ;; | ||
293 | adds r16=PT(R8),r1 /* initialize first base pointer */ | ||
294 | adds r17=PT(R9),r1 /* initialize second base pointer */ | ||
295 | mov r18=r0 /* make sure r18 isn't NaT */ | ||
296 | ;; | ||
297 | .mem.offset 0,0; st8.spill [r16]=r8,16 | ||
298 | .mem.offset 8,0; st8.spill [r17]=r9,16 | ||
299 | ;; | ||
300 | .mem.offset 0,0; st8.spill [r16]=r10,24 | ||
301 | .mem.offset 8,0; st8.spill [r17]=r11,24 | ||
302 | ;; | ||
303 | st8 [r16]=r28,16 /* save cr.iip */ | ||
304 | st8 [r17]=r30,16 /* save cr.ifs */ | ||
305 | mov r8=ar.fpsr /* M */ | ||
306 | mov r9=ar.csd | ||
307 | mov r10=ar.ssd | ||
308 | movl r11=FPSR_DEFAULT /* L-unit */ | ||
309 | ;; | ||
310 | st8 [r16]=r25,16 /* save ar.unat */ | ||
311 | st8 [r17]=r26,16 /* save ar.pfs */ | ||
312 | shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ | ||
313 | ;; | ||
314 | st8 [r16]=r27,16 /* save ar.rsc */ | ||
315 | adds r17=16,r17 /* skip over ar_rnat field */ | ||
316 | ;; | ||
317 | st8 [r17]=r31,16 /* save predicates */ | ||
318 | adds r16=16,r16 /* skip over ar_bspstore field */ | ||
319 | ;; | ||
320 | st8 [r16]=r29,16 /* save b0 */ | ||
321 | st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ | ||
322 | ;; | ||
323 | .mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */ | ||
324 | .mem.offset 8,0; st8.spill [r17]=r12,16 | ||
325 | adds r12=-16,r1 | ||
326 | /* switch to kernel memory stack (with 16 bytes of scratch) */ | ||
327 | ;; | ||
328 | .mem.offset 0,0; st8.spill [r16]=r13,16 | ||
329 | .mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */ | ||
330 | ;; | ||
331 | .mem.offset 0,0; st8.spill [r16]=r15,16 | ||
332 | .mem.offset 8,0; st8.spill [r17]=r14,16 | ||
333 | dep r14=-1,r0,60,4 | ||
334 | ;; | ||
335 | .mem.offset 0,0; st8.spill [r16]=r2,16 | ||
336 | .mem.offset 8,0; st8.spill [r17]=r3,16 | ||
337 | adds r2=VMM_PT_REGS_R16_OFFSET,r1 | ||
338 | adds r14 = VMM_VCPU_GP_OFFSET,r13 | ||
339 | ;; | ||
340 | mov r8=ar.ccv | ||
341 | ld8 r14 = [r14] | ||
342 | ;; | ||
343 | mov r1=r14 /* establish kernel global pointer */ | ||
344 | ;; \ | ||
345 | bsw.1 | ||
346 | ;; | ||
347 | alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group | ||
348 | mov out0=r13 | ||
349 | ;; | ||
350 | ssm psr.ic | ||
351 | ;; | ||
352 | srlz.i | ||
353 | ;; | ||
354 | //(p15) ssm psr.i | ||
355 | adds r3=8,r2 // set up second base pointer for SAVE_REST | ||
356 | srlz.i // ensure everybody knows psr.ic is back on | ||
357 | ;; | ||
358 | .mem.offset 0,0; st8.spill [r2]=r16,16 | ||
359 | .mem.offset 8,0; st8.spill [r3]=r17,16 | ||
360 | ;; | ||
361 | .mem.offset 0,0; st8.spill [r2]=r18,16 | ||
362 | .mem.offset 8,0; st8.spill [r3]=r19,16 | ||
363 | ;; | ||
364 | .mem.offset 0,0; st8.spill [r2]=r20,16 | ||
365 | .mem.offset 8,0; st8.spill [r3]=r21,16 | ||
366 | mov r18=b6 | ||
367 | ;; | ||
368 | .mem.offset 0,0; st8.spill [r2]=r22,16 | ||
369 | .mem.offset 8,0; st8.spill [r3]=r23,16 | ||
370 | mov r19=b7 | ||
371 | ;; | ||
372 | .mem.offset 0,0; st8.spill [r2]=r24,16 | ||
373 | .mem.offset 8,0; st8.spill [r3]=r25,16 | ||
374 | ;; | ||
375 | .mem.offset 0,0; st8.spill [r2]=r26,16 | ||
376 | .mem.offset 8,0; st8.spill [r3]=r27,16 | ||
377 | ;; | ||
378 | .mem.offset 0,0; st8.spill [r2]=r28,16 | ||
379 | .mem.offset 8,0; st8.spill [r3]=r29,16 | ||
380 | ;; | ||
381 | .mem.offset 0,0; st8.spill [r2]=r30,16 | ||
382 | .mem.offset 8,0; st8.spill [r3]=r31,32 | ||
383 | ;; | ||
384 | mov ar.fpsr=r11 /* M-unit */ | ||
385 | st8 [r2]=r8,8 /* ar.ccv */ | ||
386 | adds r24=PT(B6)-PT(F7),r3 | ||
387 | ;; | ||
388 | stf.spill [r2]=f6,32 | ||
389 | stf.spill [r3]=f7,32 | ||
390 | ;; | ||
391 | stf.spill [r2]=f8,32 | ||
392 | stf.spill [r3]=f9,32 | ||
393 | ;; | ||
394 | stf.spill [r2]=f10 | ||
395 | stf.spill [r3]=f11 | ||
396 | adds r25=PT(B7)-PT(F11),r3 | ||
397 | ;; | ||
398 | st8 [r24]=r18,16 /* b6 */ | ||
399 | st8 [r25]=r19,16 /* b7 */ | ||
400 | ;; | ||
401 | st8 [r24]=r9 /* ar.csd */ | ||
402 | st8 [r25]=r10 /* ar.ssd */ | ||
403 | ;; | ||
404 | srlz.d // make sure we see the effect of cr.ivr | ||
405 | addl r14=@gprel(ia64_leave_nested),gp | ||
406 | ;; | ||
407 | mov rp=r14 | ||
408 | br.call.sptk.many b6=kvm_ia64_handle_irq | ||
409 | ;; | ||
410 | END(kvm_interrupt) | ||
411 | |||
412 | .global kvm_dispatch_vexirq | ||
413 | .org kvm_ia64_ivt+0x3400 | ||
414 | ////////////////////////////////////////////////////////////////////// | ||
415 | // 0x3400 Entry 13 (size 64 bundles) Reserved | ||
416 | ENTRY(kvm_virtual_exirq) | ||
417 | mov r31=pr | ||
418 | mov r19=13 | ||
419 | mov r30 =r0 | ||
420 | ;; | ||
421 | kvm_dispatch_vexirq: | ||
422 | cmp.eq p6,p0 = 1,r30 | ||
423 | ;; | ||
424 | (p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21 | ||
425 | ;; | ||
426 | (p6)ld8 r1 = [r29] | ||
427 | ;; | ||
428 | KVM_SAVE_MIN_WITH_COVER_R19 | ||
429 | alloc r14=ar.pfs,0,0,1,0 | ||
430 | mov out0=r13 | ||
431 | |||
432 | ssm psr.ic | ||
433 | ;; | ||
434 | srlz.i // guarantee that interruption collection is on | ||
435 | ;; | ||
436 | //(p15) ssm psr.i // restore psr.i | ||
437 | adds r3=8,r2 // set up second base pointer | ||
438 | ;; | ||
439 | KVM_SAVE_REST | ||
440 | addl r14=@gprel(ia64_leave_hypervisor),gp | ||
441 | ;; | ||
442 | mov rp=r14 | ||
443 | br.call.sptk.many b6=kvm_vexirq | ||
444 | END(kvm_virtual_exirq) | ||
445 | |||
446 | .org kvm_ia64_ivt+0x3800 | ||
447 | ///////////////////////////////////////////////////////////////////// | ||
448 | // 0x3800 Entry 14 (size 64 bundles) Reserved | ||
449 | KVM_FAULT(14) | ||
450 | // this code segment is from 2.6.16.13 | ||
451 | |||
452 | |||
453 | .org kvm_ia64_ivt+0x3c00 | ||
454 | /////////////////////////////////////////////////////////////////////// | ||
455 | // 0x3c00 Entry 15 (size 64 bundles) Reserved | ||
456 | KVM_FAULT(15) | ||
457 | |||
458 | |||
459 | .org kvm_ia64_ivt+0x4000 | ||
460 | /////////////////////////////////////////////////////////////////////// | ||
461 | // 0x4000 Entry 16 (size 64 bundles) Reserved | ||
462 | KVM_FAULT(16) | ||
463 | |||
464 | .org kvm_ia64_ivt+0x4400 | ||
465 | ////////////////////////////////////////////////////////////////////// | ||
466 | // 0x4400 Entry 17 (size 64 bundles) Reserved | ||
467 | KVM_FAULT(17) | ||
468 | |||
469 | .org kvm_ia64_ivt+0x4800 | ||
470 | ////////////////////////////////////////////////////////////////////// | ||
471 | // 0x4800 Entry 18 (size 64 bundles) Reserved | ||
472 | KVM_FAULT(18) | ||
473 | |||
474 | .org kvm_ia64_ivt+0x4c00 | ||
475 | ////////////////////////////////////////////////////////////////////// | ||
476 | // 0x4c00 Entry 19 (size 64 bundles) Reserved | ||
477 | KVM_FAULT(19) | ||
478 | |||
479 | .org kvm_ia64_ivt+0x5000 | ||
480 | ////////////////////////////////////////////////////////////////////// | ||
481 | // 0x5000 Entry 20 (size 16 bundles) Page Not Present | ||
482 | ENTRY(kvm_page_not_present) | ||
483 | KVM_REFLECT(20) | ||
484 | END(kvm_page_not_present) | ||
485 | |||
486 | .org kvm_ia64_ivt+0x5100 | ||
487 | /////////////////////////////////////////////////////////////////////// | ||
488 | // 0x5100 Entry 21 (size 16 bundles) Key Permission vector | ||
489 | ENTRY(kvm_key_permission) | ||
490 | KVM_REFLECT(21) | ||
491 | END(kvm_key_permission) | ||
492 | |||
493 | .org kvm_ia64_ivt+0x5200 | ||
494 | ////////////////////////////////////////////////////////////////////// | ||
495 | // 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) | ||
496 | ENTRY(kvm_iaccess_rights) | ||
497 | KVM_REFLECT(22) | ||
498 | END(kvm_iaccess_rights) | ||
499 | |||
500 | .org kvm_ia64_ivt+0x5300 | ||
501 | ////////////////////////////////////////////////////////////////////// | ||
502 | // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) | ||
503 | ENTRY(kvm_daccess_rights) | ||
504 | KVM_REFLECT(23) | ||
505 | END(kvm_daccess_rights) | ||
506 | |||
507 | .org kvm_ia64_ivt+0x5400 | ||
508 | ///////////////////////////////////////////////////////////////////// | ||
509 | // 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) | ||
510 | ENTRY(kvm_general_exception) | ||
511 | KVM_REFLECT(24) | ||
512 | KVM_FAULT(24) | ||
513 | END(kvm_general_exception) | ||
514 | |||
515 | .org kvm_ia64_ivt+0x5500 | ||
516 | ////////////////////////////////////////////////////////////////////// | ||
517 | // 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) | ||
518 | ENTRY(kvm_disabled_fp_reg) | ||
519 | KVM_REFLECT(25) | ||
520 | END(kvm_disabled_fp_reg) | ||
521 | |||
522 | .org kvm_ia64_ivt+0x5600 | ||
523 | //////////////////////////////////////////////////////////////////// | ||
524 | // 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) | ||
525 | ENTRY(kvm_nat_consumption) | ||
526 | KVM_REFLECT(26) | ||
527 | END(kvm_nat_consumption) | ||
528 | |||
529 | .org kvm_ia64_ivt+0x5700 | ||
530 | ///////////////////////////////////////////////////////////////////// | ||
531 | // 0x5700 Entry 27 (size 16 bundles) Speculation (40) | ||
532 | ENTRY(kvm_speculation_vector) | ||
533 | KVM_REFLECT(27) | ||
534 | END(kvm_speculation_vector) | ||
535 | |||
536 | .org kvm_ia64_ivt+0x5800 | ||
537 | ///////////////////////////////////////////////////////////////////// | ||
538 | // 0x5800 Entry 28 (size 16 bundles) Reserved | ||
539 | KVM_FAULT(28) | ||
540 | |||
541 | .org kvm_ia64_ivt+0x5900 | ||
542 | /////////////////////////////////////////////////////////////////// | ||
543 | // 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) | ||
544 | ENTRY(kvm_debug_vector) | ||
545 | KVM_FAULT(29) | ||
546 | END(kvm_debug_vector) | ||
547 | |||
548 | .org kvm_ia64_ivt+0x5a00 | ||
549 | /////////////////////////////////////////////////////////////// | ||
550 | // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) | ||
551 | ENTRY(kvm_unaligned_access) | ||
552 | KVM_REFLECT(30) | ||
553 | END(kvm_unaligned_access) | ||
554 | |||
555 | .org kvm_ia64_ivt+0x5b00 | ||
556 | ////////////////////////////////////////////////////////////////////// | ||
557 | // 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) | ||
558 | ENTRY(kvm_unsupported_data_reference) | ||
559 | KVM_REFLECT(31) | ||
560 | END(kvm_unsupported_data_reference) | ||
561 | |||
562 | .org kvm_ia64_ivt+0x5c00 | ||
563 | //////////////////////////////////////////////////////////////////// | ||
564 | // 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65) | ||
565 | ENTRY(kvm_floating_point_fault) | ||
566 | KVM_REFLECT(32) | ||
567 | END(kvm_floating_point_fault) | ||
568 | |||
569 | .org kvm_ia64_ivt+0x5d00 | ||
570 | ///////////////////////////////////////////////////////////////////// | ||
571 | // 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) | ||
572 | ENTRY(kvm_floating_point_trap) | ||
573 | KVM_REFLECT(33) | ||
574 | END(kvm_floating_point_trap) | ||
575 | |||
576 | .org kvm_ia64_ivt+0x5e00 | ||
577 | ////////////////////////////////////////////////////////////////////// | ||
578 | // 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) | ||
579 | ENTRY(kvm_lower_privilege_trap) | ||
580 | KVM_REFLECT(34) | ||
581 | END(kvm_lower_privilege_trap) | ||
582 | |||
583 | .org kvm_ia64_ivt+0x5f00 | ||
584 | ////////////////////////////////////////////////////////////////////// | ||
585 | // 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) | ||
586 | ENTRY(kvm_taken_branch_trap) | ||
587 | KVM_REFLECT(35) | ||
588 | END(kvm_taken_branch_trap) | ||
589 | |||
590 | .org kvm_ia64_ivt+0x6000 | ||
591 | //////////////////////////////////////////////////////////////////// | ||
592 | // 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) | ||
593 | ENTRY(kvm_single_step_trap) | ||
594 | KVM_REFLECT(36) | ||
595 | END(kvm_single_step_trap) | ||
596 | .global kvm_virtualization_fault_back | ||
597 | .org kvm_ia64_ivt+0x6100 | ||
598 | ///////////////////////////////////////////////////////////////////// | ||
599 | // 0x6100 Entry 37 (size 16 bundles) Virtualization Fault | ||
600 | ENTRY(kvm_virtualization_fault) | ||
601 | mov r31=pr | ||
602 | adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 | ||
603 | ;; | ||
604 | st8 [r16] = r1 | ||
605 | adds r17 = VMM_VCPU_GP_OFFSET, r21 | ||
606 | ;; | ||
607 | ld8 r1 = [r17] | ||
608 | cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24 | ||
609 | cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24 | ||
610 | cmp.eq p8,p0=EVENT_MOV_TO_RR,r24 | ||
611 | cmp.eq p9,p0=EVENT_RSM,r24 | ||
612 | cmp.eq p10,p0=EVENT_SSM,r24 | ||
613 | cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24 | ||
614 | cmp.eq p12,p0=EVENT_THASH,r24 | ||
615 | (p6) br.dptk.many kvm_asm_mov_from_ar | ||
616 | (p7) br.dptk.many kvm_asm_mov_from_rr | ||
617 | (p8) br.dptk.many kvm_asm_mov_to_rr | ||
618 | (p9) br.dptk.many kvm_asm_rsm | ||
619 | (p10) br.dptk.many kvm_asm_ssm | ||
620 | (p11) br.dptk.many kvm_asm_mov_to_psr | ||
621 | (p12) br.dptk.many kvm_asm_thash | ||
622 | ;; | ||
623 | kvm_virtualization_fault_back: | ||
624 | adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 | ||
625 | ;; | ||
626 | ld8 r1 = [r16] | ||
627 | ;; | ||
628 | mov r19=37 | ||
629 | adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 | ||
630 | adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 | ||
631 | ;; | ||
632 | st8 [r16] = r24 | ||
633 | st8 [r17] = r25 | ||
634 | ;; | ||
635 | cmp.ne p6,p0=EVENT_RFI, r24 | ||
636 | (p6) br.sptk kvm_dispatch_virtualization_fault | ||
637 | ;; | ||
638 | adds r18=VMM_VPD_BASE_OFFSET,r21 | ||
639 | ;; | ||
640 | ld8 r18=[r18] | ||
641 | ;; | ||
642 | adds r18=VMM_VPD_VIFS_OFFSET,r18 | ||
643 | ;; | ||
644 | ld8 r18=[r18] | ||
645 | ;; | ||
646 | tbit.z p6,p0=r18,63 | ||
647 | (p6) br.sptk kvm_dispatch_virtualization_fault | ||
648 | ;; | ||
649 | //if vifs.v=1 desert current register frame | ||
650 | alloc r18=ar.pfs,0,0,0,0 | ||
651 | br.sptk kvm_dispatch_virtualization_fault | ||
652 | END(kvm_virtualization_fault) | ||
653 | |||
654 | .org kvm_ia64_ivt+0x6200 | ||
655 | ////////////////////////////////////////////////////////////// | ||
656 | // 0x6200 Entry 38 (size 16 bundles) Reserved | ||
657 | KVM_FAULT(38) | ||
658 | |||
659 | .org kvm_ia64_ivt+0x6300 | ||
660 | ///////////////////////////////////////////////////////////////// | ||
661 | // 0x6300 Entry 39 (size 16 bundles) Reserved | ||
662 | KVM_FAULT(39) | ||
663 | |||
664 | .org kvm_ia64_ivt+0x6400 | ||
665 | ///////////////////////////////////////////////////////////////// | ||
666 | // 0x6400 Entry 40 (size 16 bundles) Reserved | ||
667 | KVM_FAULT(40) | ||
668 | |||
669 | .org kvm_ia64_ivt+0x6500 | ||
670 | ////////////////////////////////////////////////////////////////// | ||
671 | // 0x6500 Entry 41 (size 16 bundles) Reserved | ||
672 | KVM_FAULT(41) | ||
673 | |||
674 | .org kvm_ia64_ivt+0x6600 | ||
675 | ////////////////////////////////////////////////////////////////// | ||
676 | // 0x6600 Entry 42 (size 16 bundles) Reserved | ||
677 | KVM_FAULT(42) | ||
678 | |||
679 | .org kvm_ia64_ivt+0x6700 | ||
680 | ////////////////////////////////////////////////////////////////// | ||
681 | // 0x6700 Entry 43 (size 16 bundles) Reserved | ||
682 | KVM_FAULT(43) | ||
683 | |||
684 | .org kvm_ia64_ivt+0x6800 | ||
685 | ////////////////////////////////////////////////////////////////// | ||
686 | // 0x6800 Entry 44 (size 16 bundles) Reserved | ||
687 | KVM_FAULT(44) | ||
688 | |||
689 | .org kvm_ia64_ivt+0x6900 | ||
690 | /////////////////////////////////////////////////////////////////// | ||
691 | // 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception | ||
692 | //(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) | ||
693 | ENTRY(kvm_ia32_exception) | ||
694 | KVM_FAULT(45) | ||
695 | END(kvm_ia32_exception) | ||
696 | |||
697 | .org kvm_ia64_ivt+0x6a00 | ||
698 | //////////////////////////////////////////////////////////////////// | ||
699 | // 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) | ||
700 | ENTRY(kvm_ia32_intercept) | ||
701 | KVM_FAULT(47) | ||
702 | END(kvm_ia32_intercept) | ||
703 | |||
704 | .org kvm_ia64_ivt+0x6c00 | ||
705 | ///////////////////////////////////////////////////////////////////// | ||
706 | // 0x6c00 Entry 48 (size 16 bundles) Reserved | ||
707 | KVM_FAULT(48) | ||
708 | |||
709 | .org kvm_ia64_ivt+0x6d00 | ||
710 | ////////////////////////////////////////////////////////////////////// | ||
711 | // 0x6d00 Entry 49 (size 16 bundles) Reserved | ||
712 | KVM_FAULT(49) | ||
713 | |||
714 | .org kvm_ia64_ivt+0x6e00 | ||
715 | ////////////////////////////////////////////////////////////////////// | ||
716 | // 0x6e00 Entry 50 (size 16 bundles) Reserved | ||
717 | KVM_FAULT(50) | ||
718 | |||
719 | .org kvm_ia64_ivt+0x6f00 | ||
720 | ///////////////////////////////////////////////////////////////////// | ||
721 | // 0x6f00 Entry 51 (size 16 bundles) Reserved | ||
722 | KVM_FAULT(52) | ||
723 | |||
724 | .org kvm_ia64_ivt+0x7100 | ||
725 | //////////////////////////////////////////////////////////////////// | ||
726 | // 0x7100 Entry 53 (size 16 bundles) Reserved | ||
727 | KVM_FAULT(53) | ||
728 | |||
729 | .org kvm_ia64_ivt+0x7200 | ||
730 | ///////////////////////////////////////////////////////////////////// | ||
731 | // 0x7200 Entry 54 (size 16 bundles) Reserved | ||
732 | KVM_FAULT(54) | ||
733 | |||
734 | .org kvm_ia64_ivt+0x7300 | ||
735 | //////////////////////////////////////////////////////////////////// | ||
736 | // 0x7300 Entry 55 (size 16 bundles) Reserved | ||
737 | KVM_FAULT(55) | ||
738 | |||
739 | .org kvm_ia64_ivt+0x7400 | ||
740 | //////////////////////////////////////////////////////////////////// | ||
741 | // 0x7400 Entry 56 (size 16 bundles) Reserved | ||
742 | KVM_FAULT(56) | ||
743 | |||
744 | .org kvm_ia64_ivt+0x7500 | ||
745 | ///////////////////////////////////////////////////////////////////// | ||
746 | // 0x7500 Entry 57 (size 16 bundles) Reserved | ||
747 | KVM_FAULT(57) | ||
748 | |||
749 | .org kvm_ia64_ivt+0x7600 | ||
750 | ///////////////////////////////////////////////////////////////////// | ||
751 | // 0x7600 Entry 58 (size 16 bundles) Reserved | ||
752 | KVM_FAULT(58) | ||
753 | |||
754 | .org kvm_ia64_ivt+0x7700 | ||
755 | //////////////////////////////////////////////////////////////////// | ||
756 | // 0x7700 Entry 59 (size 16 bundles) Reserved | ||
757 | KVM_FAULT(59) | ||
758 | |||
759 | .org kvm_ia64_ivt+0x7800 | ||
760 | //////////////////////////////////////////////////////////////////// | ||
761 | // 0x7800 Entry 60 (size 16 bundles) Reserved | ||
762 | KVM_FAULT(60) | ||
763 | |||
764 | .org kvm_ia64_ivt+0x7900 | ||
765 | ///////////////////////////////////////////////////////////////////// | ||
766 | // 0x7900 Entry 61 (size 16 bundles) Reserved | ||
767 | KVM_FAULT(61) | ||
768 | |||
769 | .org kvm_ia64_ivt+0x7a00 | ||
770 | ///////////////////////////////////////////////////////////////////// | ||
771 | // 0x7a00 Entry 62 (size 16 bundles) Reserved | ||
772 | KVM_FAULT(62) | ||
773 | |||
774 | .org kvm_ia64_ivt+0x7b00 | ||
775 | ///////////////////////////////////////////////////////////////////// | ||
776 | // 0x7b00 Entry 63 (size 16 bundles) Reserved | ||
777 | KVM_FAULT(63) | ||
778 | |||
779 | .org kvm_ia64_ivt+0x7c00 | ||
780 | //////////////////////////////////////////////////////////////////// | ||
781 | // 0x7c00 Entry 64 (size 16 bundles) Reserved | ||
782 | KVM_FAULT(64) | ||
783 | |||
784 | .org kvm_ia64_ivt+0x7d00 | ||
785 | ///////////////////////////////////////////////////////////////////// | ||
786 | // 0x7d00 Entry 65 (size 16 bundles) Reserved | ||
787 | KVM_FAULT(65) | ||
788 | |||
789 | .org kvm_ia64_ivt+0x7e00 | ||
790 | ///////////////////////////////////////////////////////////////////// | ||
791 | // 0x7e00 Entry 66 (size 16 bundles) Reserved | ||
792 | KVM_FAULT(66) | ||
793 | |||
794 | .org kvm_ia64_ivt+0x7f00 | ||
795 | //////////////////////////////////////////////////////////////////// | ||
796 | // 0x7f00 Entry 67 (size 16 bundles) Reserved | ||
797 | KVM_FAULT(67) | ||
798 | |||
799 | .org kvm_ia64_ivt+0x8000 | ||
800 | // There is no particular reason for this code to be here, other than that | ||
801 | // there happens to be space here that would go unused otherwise. If this | ||
802 | // fault ever gets "unreserved", simply moved the following code to a more | ||
803 | // suitable spot... | ||
804 | |||
805 | |||
806 | ENTRY(kvm_dtlb_miss_dispatch) | ||
807 | mov r19 = 2 | ||
808 | KVM_SAVE_MIN_WITH_COVER_R19 | ||
809 | alloc r14=ar.pfs,0,0,3,0 | ||
810 | mov out0=cr.ifa | ||
811 | mov out1=r15 | ||
812 | adds r3=8,r2 // set up second base pointer | ||
813 | ;; | ||
814 | ssm psr.ic | ||
815 | ;; | ||
816 | srlz.i // guarantee that interruption collection is on | ||
817 | ;; | ||
818 | //(p15) ssm psr.i // restore psr.i | ||
819 | addl r14=@gprel(ia64_leave_hypervisor_prepare),gp | ||
820 | ;; | ||
821 | KVM_SAVE_REST | ||
822 | KVM_SAVE_EXTRA | ||
823 | mov rp=r14 | ||
824 | ;; | ||
825 | adds out2=16,r12 | ||
826 | br.call.sptk.many b6=kvm_page_fault | ||
827 | END(kvm_dtlb_miss_dispatch) | ||
828 | |||
829 | ENTRY(kvm_itlb_miss_dispatch) | ||
830 | |||
831 | KVM_SAVE_MIN_WITH_COVER_R19 | ||
832 | alloc r14=ar.pfs,0,0,3,0 | ||
833 | mov out0=cr.ifa | ||
834 | mov out1=r15 | ||
835 | adds r3=8,r2 // set up second base pointer | ||
836 | ;; | ||
837 | ssm psr.ic | ||
838 | ;; | ||
839 | srlz.i // guarantee that interruption collection is on | ||
840 | ;; | ||
841 | //(p15) ssm psr.i // restore psr.i | ||
842 | addl r14=@gprel(ia64_leave_hypervisor),gp | ||
843 | ;; | ||
844 | KVM_SAVE_REST | ||
845 | mov rp=r14 | ||
846 | ;; | ||
847 | adds out2=16,r12 | ||
848 | br.call.sptk.many b6=kvm_page_fault | ||
849 | END(kvm_itlb_miss_dispatch) | ||
850 | |||
851 | ENTRY(kvm_dispatch_reflection) | ||
852 | /* | ||
853 | * Input: | ||
854 | * psr.ic: off | ||
855 | * r19: intr type (offset into ivt, see ia64_int.h) | ||
856 | * r31: contains saved predicates (pr) | ||
857 | */ | ||
858 | KVM_SAVE_MIN_WITH_COVER_R19 | ||
859 | alloc r14=ar.pfs,0,0,5,0 | ||
860 | mov out0=cr.ifa | ||
861 | mov out1=cr.isr | ||
862 | mov out2=cr.iim | ||
863 | mov out3=r15 | ||
864 | adds r3=8,r2 // set up second base pointer | ||
865 | ;; | ||
866 | ssm psr.ic | ||
867 | ;; | ||
868 | srlz.i // guarantee that interruption collection is on | ||
869 | ;; | ||
870 | //(p15) ssm psr.i // restore psr.i | ||
871 | addl r14=@gprel(ia64_leave_hypervisor),gp | ||
872 | ;; | ||
873 | KVM_SAVE_REST | ||
874 | mov rp=r14 | ||
875 | ;; | ||
876 | adds out4=16,r12 | ||
877 | br.call.sptk.many b6=reflect_interruption | ||
878 | END(kvm_dispatch_reflection) | ||
879 | |||
880 | ENTRY(kvm_dispatch_virtualization_fault) | ||
881 | adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 | ||
882 | adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 | ||
883 | ;; | ||
884 | st8 [r16] = r24 | ||
885 | st8 [r17] = r25 | ||
886 | ;; | ||
887 | KVM_SAVE_MIN_WITH_COVER_R19 | ||
888 | ;; | ||
889 | alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) | ||
890 | mov out0=r13 //vcpu | ||
891 | adds r3=8,r2 // set up second base pointer | ||
892 | ;; | ||
893 | ssm psr.ic | ||
894 | ;; | ||
895 | srlz.i // guarantee that interruption collection is on | ||
896 | ;; | ||
897 | //(p15) ssm psr.i // restore psr.i | ||
898 | addl r14=@gprel(ia64_leave_hypervisor_prepare),gp | ||
899 | ;; | ||
900 | KVM_SAVE_REST | ||
901 | KVM_SAVE_EXTRA | ||
902 | mov rp=r14 | ||
903 | ;; | ||
904 | adds out1=16,sp //regs | ||
905 | br.call.sptk.many b6=kvm_emulate | ||
906 | END(kvm_dispatch_virtualization_fault) | ||
907 | |||
908 | |||
909 | ENTRY(kvm_dispatch_interrupt) | ||
910 | KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 | ||
911 | ;; | ||
912 | alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group | ||
913 | //mov out0=cr.ivr // pass cr.ivr as first arg | ||
914 | adds r3=8,r2 // set up second base pointer for SAVE_REST | ||
915 | ;; | ||
916 | ssm psr.ic | ||
917 | ;; | ||
918 | srlz.i | ||
919 | ;; | ||
920 | //(p15) ssm psr.i | ||
921 | addl r14=@gprel(ia64_leave_hypervisor),gp | ||
922 | ;; | ||
923 | KVM_SAVE_REST | ||
924 | mov rp=r14 | ||
925 | ;; | ||
926 | mov out0=r13 // pass pointer to pt_regs as second arg | ||
927 | br.call.sptk.many b6=kvm_ia64_handle_irq | ||
928 | END(kvm_dispatch_interrupt) | ||
929 | |||
930 | |||
931 | |||
932 | |||
933 | GLOBAL_ENTRY(ia64_leave_nested) | ||
934 | rsm psr.i | ||
935 | ;; | ||
936 | adds r21=PT(PR)+16,r12 | ||
937 | ;; | ||
938 | lfetch [r21],PT(CR_IPSR)-PT(PR) | ||
939 | adds r2=PT(B6)+16,r12 | ||
940 | adds r3=PT(R16)+16,r12 | ||
941 | ;; | ||
942 | lfetch [r21] | ||
943 | ld8 r28=[r2],8 // load b6 | ||
944 | adds r29=PT(R24)+16,r12 | ||
945 | |||
946 | ld8.fill r16=[r3] | ||
947 | adds r3=PT(AR_CSD)-PT(R16),r3 | ||
948 | adds r30=PT(AR_CCV)+16,r12 | ||
949 | ;; | ||
950 | ld8.fill r24=[r29] | ||
951 | ld8 r15=[r30] // load ar.ccv | ||
952 | ;; | ||
953 | ld8 r29=[r2],16 // load b7 | ||
954 | ld8 r30=[r3],16 // load ar.csd | ||
955 | ;; | ||
956 | ld8 r31=[r2],16 // load ar.ssd | ||
957 | ld8.fill r8=[r3],16 | ||
958 | ;; | ||
959 | ld8.fill r9=[r2],16 | ||
960 | ld8.fill r10=[r3],PT(R17)-PT(R10) | ||
961 | ;; | ||
962 | ld8.fill r11=[r2],PT(R18)-PT(R11) | ||
963 | ld8.fill r17=[r3],16 | ||
964 | ;; | ||
965 | ld8.fill r18=[r2],16 | ||
966 | ld8.fill r19=[r3],16 | ||
967 | ;; | ||
968 | ld8.fill r20=[r2],16 | ||
969 | ld8.fill r21=[r3],16 | ||
970 | mov ar.csd=r30 | ||
971 | mov ar.ssd=r31 | ||
972 | ;; | ||
973 | rsm psr.i | psr.ic | ||
974 | // initiate turning off of interrupt and interruption collection | ||
975 | invala // invalidate ALAT | ||
976 | ;; | ||
977 | srlz.i | ||
978 | ;; | ||
979 | ld8.fill r22=[r2],24 | ||
980 | ld8.fill r23=[r3],24 | ||
981 | mov b6=r28 | ||
982 | ;; | ||
983 | ld8.fill r25=[r2],16 | ||
984 | ld8.fill r26=[r3],16 | ||
985 | mov b7=r29 | ||
986 | ;; | ||
987 | ld8.fill r27=[r2],16 | ||
988 | ld8.fill r28=[r3],16 | ||
989 | ;; | ||
990 | ld8.fill r29=[r2],16 | ||
991 | ld8.fill r30=[r3],24 | ||
992 | ;; | ||
993 | ld8.fill r31=[r2],PT(F9)-PT(R31) | ||
994 | adds r3=PT(F10)-PT(F6),r3 | ||
995 | ;; | ||
996 | ldf.fill f9=[r2],PT(F6)-PT(F9) | ||
997 | ldf.fill f10=[r3],PT(F8)-PT(F10) | ||
998 | ;; | ||
999 | ldf.fill f6=[r2],PT(F7)-PT(F6) | ||
1000 | ;; | ||
1001 | ldf.fill f7=[r2],PT(F11)-PT(F7) | ||
1002 | ldf.fill f8=[r3],32 | ||
1003 | ;; | ||
1004 | srlz.i // ensure interruption collection is off | ||
1005 | mov ar.ccv=r15 | ||
1006 | ;; | ||
1007 | bsw.0 // switch back to bank 0 (no stop bit required beforehand...) | ||
1008 | ;; | ||
1009 | ldf.fill f11=[r2] | ||
1010 | // mov r18=r13 | ||
1011 | // mov r21=r13 | ||
1012 | adds r16=PT(CR_IPSR)+16,r12 | ||
1013 | adds r17=PT(CR_IIP)+16,r12 | ||
1014 | ;; | ||
1015 | ld8 r29=[r16],16 // load cr.ipsr | ||
1016 | ld8 r28=[r17],16 // load cr.iip | ||
1017 | ;; | ||
1018 | ld8 r30=[r16],16 // load cr.ifs | ||
1019 | ld8 r25=[r17],16 // load ar.unat | ||
1020 | ;; | ||
1021 | ld8 r26=[r16],16 // load ar.pfs | ||
1022 | ld8 r27=[r17],16 // load ar.rsc | ||
1023 | cmp.eq p9,p0=r0,r0 | ||
1024 | // set p9 to indicate that we should restore cr.ifs | ||
1025 | ;; | ||
1026 | ld8 r24=[r16],16 // load ar.rnat (may be garbage) | ||
1027 | ld8 r23=[r17],16// load ar.bspstore (may be garbage) | ||
1028 | ;; | ||
1029 | ld8 r31=[r16],16 // load predicates | ||
1030 | ld8 r22=[r17],16 // load b0 | ||
1031 | ;; | ||
1032 | ld8 r19=[r16],16 // load ar.rsc value for "loadrs" | ||
1033 | ld8.fill r1=[r17],16 // load r1 | ||
1034 | ;; | ||
1035 | ld8.fill r12=[r16],16 | ||
1036 | ld8.fill r13=[r17],16 | ||
1037 | ;; | ||
1038 | ld8 r20=[r16],16 // ar.fpsr | ||
1039 | ld8.fill r15=[r17],16 | ||
1040 | ;; | ||
1041 | ld8.fill r14=[r16],16 | ||
1042 | ld8.fill r2=[r17] | ||
1043 | ;; | ||
1044 | ld8.fill r3=[r16] | ||
1045 | ;; | ||
1046 | mov r16=ar.bsp // get existing backing store pointer | ||
1047 | ;; | ||
1048 | mov b0=r22 | ||
1049 | mov ar.pfs=r26 | ||
1050 | mov cr.ifs=r30 | ||
1051 | mov cr.ipsr=r29 | ||
1052 | mov ar.fpsr=r20 | ||
1053 | mov cr.iip=r28 | ||
1054 | ;; | ||
1055 | mov ar.rsc=r27 | ||
1056 | mov ar.unat=r25 | ||
1057 | mov pr=r31,-1 | ||
1058 | rfi | ||
1059 | END(ia64_leave_nested) | ||
1060 | |||
1061 | |||
1062 | |||
1063 | GLOBAL_ENTRY(ia64_leave_hypervisor_prepare) | ||
1064 | /* | ||
1065 | * work.need_resched etc. mustn't get changed | ||
1066 | *by this CPU before it returns to | ||
1067 | ;; | ||
1068 | * user- or fsys-mode, hence we disable interrupts early on: | ||
1069 | */ | ||
1070 | adds r2 = PT(R4)+16,r12 | ||
1071 | adds r3 = PT(R5)+16,r12 | ||
1072 | adds r8 = PT(EML_UNAT)+16,r12 | ||
1073 | ;; | ||
1074 | ld8 r8 = [r8] | ||
1075 | ;; | ||
1076 | mov ar.unat=r8 | ||
1077 | ;; | ||
1078 | ld8.fill r4=[r2],16 //load r4 | ||
1079 | ld8.fill r5=[r3],16 //load r5 | ||
1080 | ;; | ||
1081 | ld8.fill r6=[r2] //load r6 | ||
1082 | ld8.fill r7=[r3] //load r7 | ||
1083 | ;; | ||
1084 | END(ia64_leave_hypervisor_prepare) | ||
1085 | //fall through | ||
1086 | GLOBAL_ENTRY(ia64_leave_hypervisor) | ||
1087 | rsm psr.i | ||
1088 | ;; | ||
1089 | br.call.sptk.many b0=leave_hypervisor_tail | ||
1090 | ;; | ||
1091 | adds r20=PT(PR)+16,r12 | ||
1092 | adds r8=PT(EML_UNAT)+16,r12 | ||
1093 | ;; | ||
1094 | ld8 r8=[r8] | ||
1095 | ;; | ||
1096 | mov ar.unat=r8 | ||
1097 | ;; | ||
1098 | lfetch [r20],PT(CR_IPSR)-PT(PR) | ||
1099 | adds r2 = PT(B6)+16,r12 | ||
1100 | adds r3 = PT(B7)+16,r12 | ||
1101 | ;; | ||
1102 | lfetch [r20] | ||
1103 | ;; | ||
1104 | ld8 r24=[r2],16 /* B6 */ | ||
1105 | ld8 r25=[r3],16 /* B7 */ | ||
1106 | ;; | ||
1107 | ld8 r26=[r2],16 /* ar_csd */ | ||
1108 | ld8 r27=[r3],16 /* ar_ssd */ | ||
1109 | mov b6 = r24 | ||
1110 | ;; | ||
1111 | ld8.fill r8=[r2],16 | ||
1112 | ld8.fill r9=[r3],16 | ||
1113 | mov b7 = r25 | ||
1114 | ;; | ||
1115 | mov ar.csd = r26 | ||
1116 | mov ar.ssd = r27 | ||
1117 | ;; | ||
1118 | ld8.fill r10=[r2],PT(R15)-PT(R10) | ||
1119 | ld8.fill r11=[r3],PT(R14)-PT(R11) | ||
1120 | ;; | ||
1121 | ld8.fill r15=[r2],PT(R16)-PT(R15) | ||
1122 | ld8.fill r14=[r3],PT(R17)-PT(R14) | ||
1123 | ;; | ||
1124 | ld8.fill r16=[r2],16 | ||
1125 | ld8.fill r17=[r3],16 | ||
1126 | ;; | ||
1127 | ld8.fill r18=[r2],16 | ||
1128 | ld8.fill r19=[r3],16 | ||
1129 | ;; | ||
1130 | ld8.fill r20=[r2],16 | ||
1131 | ld8.fill r21=[r3],16 | ||
1132 | ;; | ||
1133 | ld8.fill r22=[r2],16 | ||
1134 | ld8.fill r23=[r3],16 | ||
1135 | ;; | ||
1136 | ld8.fill r24=[r2],16 | ||
1137 | ld8.fill r25=[r3],16 | ||
1138 | ;; | ||
1139 | ld8.fill r26=[r2],16 | ||
1140 | ld8.fill r27=[r3],16 | ||
1141 | ;; | ||
1142 | ld8.fill r28=[r2],16 | ||
1143 | ld8.fill r29=[r3],16 | ||
1144 | ;; | ||
1145 | ld8.fill r30=[r2],PT(F6)-PT(R30) | ||
1146 | ld8.fill r31=[r3],PT(F7)-PT(R31) | ||
1147 | ;; | ||
1148 | rsm psr.i | psr.ic | ||
1149 | // initiate turning off of interrupt and interruption collection | ||
1150 | invala // invalidate ALAT | ||
1151 | ;; | ||
1152 | srlz.i // ensure interruption collection is off | ||
1153 | ;; | ||
1154 | bsw.0 | ||
1155 | ;; | ||
1156 | adds r16 = PT(CR_IPSR)+16,r12 | ||
1157 | adds r17 = PT(CR_IIP)+16,r12 | ||
1158 | mov r21=r13 // get current | ||
1159 | ;; | ||
1160 | ld8 r31=[r16],16 // load cr.ipsr | ||
1161 | ld8 r30=[r17],16 // load cr.iip | ||
1162 | ;; | ||
1163 | ld8 r29=[r16],16 // load cr.ifs | ||
1164 | ld8 r28=[r17],16 // load ar.unat | ||
1165 | ;; | ||
1166 | ld8 r27=[r16],16 // load ar.pfs | ||
1167 | ld8 r26=[r17],16 // load ar.rsc | ||
1168 | ;; | ||
1169 | ld8 r25=[r16],16 // load ar.rnat | ||
1170 | ld8 r24=[r17],16 // load ar.bspstore | ||
1171 | ;; | ||
1172 | ld8 r23=[r16],16 // load predicates | ||
1173 | ld8 r22=[r17],16 // load b0 | ||
1174 | ;; | ||
1175 | ld8 r20=[r16],16 // load ar.rsc value for "loadrs" | ||
1176 | ld8.fill r1=[r17],16 //load r1 | ||
1177 | ;; | ||
1178 | ld8.fill r12=[r16],16 //load r12 | ||
1179 | ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13 | ||
1180 | ;; | ||
1181 | ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr | ||
1182 | ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2 | ||
1183 | ;; | ||
1184 | ld8.fill r3=[r16] //load r3 | ||
1185 | ld8 r18=[r17] //load ar_ccv | ||
1186 | ;; | ||
1187 | mov ar.fpsr=r19 | ||
1188 | mov ar.ccv=r18 | ||
1189 | shr.u r18=r20,16 | ||
1190 | ;; | ||
1191 | kvm_rbs_switch: | ||
1192 | mov r19=96 | ||
1193 | |||
1194 | kvm_dont_preserve_current_frame: | ||
1195 | /* | ||
1196 | * To prevent leaking bits between the hypervisor and guest domain, | ||
1197 | * we must clear the stacked registers in the "invalid" partition here. | ||
1198 | * 5 registers/cycle on McKinley). | ||
1199 | */ | ||
1200 | # define pRecurse p6 | ||
1201 | # define pReturn p7 | ||
1202 | # define Nregs 14 | ||
1203 | |||
1204 | alloc loc0=ar.pfs,2,Nregs-2,2,0 | ||
1205 | shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) | ||
1206 | sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize | ||
1207 | ;; | ||
1208 | mov ar.rsc=r20 // load ar.rsc to be used for "loadrs" | ||
1209 | shladd in0=loc1,3,r19 | ||
1210 | mov in1=0 | ||
1211 | ;; | ||
1212 | TEXT_ALIGN(32) | ||
1213 | kvm_rse_clear_invalid: | ||
1214 | alloc loc0=ar.pfs,2,Nregs-2,2,0 | ||
1215 | cmp.lt pRecurse,p0=Nregs*8,in0 | ||
1216 | // if more than Nregs regs left to clear, (re)curse | ||
1217 | add out0=-Nregs*8,in0 | ||
1218 | add out1=1,in1 // increment recursion count | ||
1219 | mov loc1=0 | ||
1220 | mov loc2=0 | ||
1221 | ;; | ||
1222 | mov loc3=0 | ||
1223 | mov loc4=0 | ||
1224 | mov loc5=0 | ||
1225 | mov loc6=0 | ||
1226 | mov loc7=0 | ||
1227 | (pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid | ||
1228 | ;; | ||
1229 | mov loc8=0 | ||
1230 | mov loc9=0 | ||
1231 | cmp.ne pReturn,p0=r0,in1 | ||
1232 | // if recursion count != 0, we need to do a br.ret | ||
1233 | mov loc10=0 | ||
1234 | mov loc11=0 | ||
1235 | (pReturn) br.ret.dptk.many b0 | ||
1236 | |||
1237 | # undef pRecurse | ||
1238 | # undef pReturn | ||
1239 | |||
1240 | // loadrs has already been shifted | ||
1241 | alloc r16=ar.pfs,0,0,0,0 // drop current register frame | ||
1242 | ;; | ||
1243 | loadrs | ||
1244 | ;; | ||
1245 | mov ar.bspstore=r24 | ||
1246 | ;; | ||
1247 | mov ar.unat=r28 | ||
1248 | mov ar.rnat=r25 | ||
1249 | mov ar.rsc=r26 | ||
1250 | ;; | ||
1251 | mov cr.ipsr=r31 | ||
1252 | mov cr.iip=r30 | ||
1253 | mov cr.ifs=r29 | ||
1254 | mov ar.pfs=r27 | ||
1255 | adds r18=VMM_VPD_BASE_OFFSET,r21 | ||
1256 | ;; | ||
1257 | ld8 r18=[r18] //vpd | ||
1258 | adds r17=VMM_VCPU_ISR_OFFSET,r21 | ||
1259 | ;; | ||
1260 | ld8 r17=[r17] | ||
1261 | adds r19=VMM_VPD_VPSR_OFFSET,r18 | ||
1262 | ;; | ||
1263 | ld8 r19=[r19] //vpsr | ||
1264 | adds r20=VMM_VCPU_VSA_BASE_OFFSET,r21 | ||
1265 | ;; | ||
1266 | ld8 r20=[r20] | ||
1267 | ;; | ||
1268 | //vsa_sync_write_start | ||
1269 | mov r25=r18 | ||
1270 | adds r16= VMM_VCPU_GP_OFFSET,r21 | ||
1271 | ;; | ||
1272 | ld8 r16= [r16] // Put gp in r24 | ||
1273 | movl r24=@gprel(ia64_vmm_entry) // calculate return address | ||
1274 | ;; | ||
1275 | add r24=r24,r16 | ||
1276 | ;; | ||
1277 | add r16=PAL_VPS_SYNC_WRITE,r20 | ||
1278 | ;; | ||
1279 | mov b0=r16 | ||
1280 | br.cond.sptk b0 // call the service | ||
1281 | ;; | ||
1282 | END(ia64_leave_hypervisor) | ||
1283 | // fall through | ||
1284 | GLOBAL_ENTRY(ia64_vmm_entry) | ||
1285 | /* | ||
1286 | * must be at bank 0 | ||
1287 | * parameter: | ||
1288 | * r17:cr.isr | ||
1289 | * r18:vpd | ||
1290 | * r19:vpsr | ||
1291 | * r20:__vsa_base | ||
1292 | * r22:b0 | ||
1293 | * r23:predicate | ||
1294 | */ | ||
1295 | mov r24=r22 | ||
1296 | mov r25=r18 | ||
1297 | tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic | ||
1298 | ;; | ||
1299 | (p1) add r29=PAL_VPS_RESUME_NORMAL,r20 | ||
1300 | (p1) br.sptk.many ia64_vmm_entry_out | ||
1301 | ;; | ||
1302 | tbit.nz p1,p2 = r17,IA64_ISR_IR_BIT //p1=cr.isr.ir | ||
1303 | ;; | ||
1304 | (p1) add r29=PAL_VPS_RESUME_NORMAL,r20 | ||
1305 | (p2) add r29=PAL_VPS_RESUME_HANDLER,r20 | ||
1306 | (p2) ld8 r26=[r25] | ||
1307 | ;; | ||
1308 | ia64_vmm_entry_out: | ||
1309 | mov pr=r23,-2 | ||
1310 | mov b0=r29 | ||
1311 | ;; | ||
1312 | br.cond.sptk b0 // call pal service | ||
1313 | END(ia64_vmm_entry) | ||
1314 | |||
1315 | |||
1316 | |||
1317 | /* | ||
1318 | * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, | ||
1319 | * u64 arg3, u64 arg4, u64 arg5, | ||
1320 | * u64 arg6, u64 arg7); | ||
1321 | * | ||
1322 | * XXX: The currently defined services use only 4 args at the max. The | ||
1323 | * rest are not consumed. | ||
1324 | */ | ||
1325 | GLOBAL_ENTRY(ia64_call_vsa) | ||
1326 | .regstk 4,4,0,0 | ||
1327 | |||
1328 | rpsave = loc0 | ||
1329 | pfssave = loc1 | ||
1330 | psrsave = loc2 | ||
1331 | entry = loc3 | ||
1332 | hostret = r24 | ||
1333 | |||
1334 | alloc pfssave=ar.pfs,4,4,0,0 | ||
1335 | mov rpsave=rp | ||
1336 | adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13 | ||
1337 | ;; | ||
1338 | ld8 entry=[entry] | ||
1339 | 1: mov hostret=ip | ||
1340 | mov r25=in1 // copy arguments | ||
1341 | mov r26=in2 | ||
1342 | mov r27=in3 | ||
1343 | mov psrsave=psr | ||
1344 | ;; | ||
1345 | tbit.nz p6,p0=psrsave,14 // IA64_PSR_I | ||
1346 | tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC | ||
1347 | ;; | ||
1348 | add hostret=2f-1b,hostret // calculate return address | ||
1349 | add entry=entry,in0 | ||
1350 | ;; | ||
1351 | rsm psr.i | psr.ic | ||
1352 | ;; | ||
1353 | srlz.i | ||
1354 | mov b6=entry | ||
1355 | br.cond.sptk b6 // call the service | ||
1356 | 2: | ||
1357 | // Architectural sequence for enabling interrupts if necessary | ||
1358 | (p7) ssm psr.ic | ||
1359 | ;; | ||
1360 | (p7) srlz.i | ||
1361 | ;; | ||
1362 | //(p6) ssm psr.i | ||
1363 | ;; | ||
1364 | mov rp=rpsave | ||
1365 | mov ar.pfs=pfssave | ||
1366 | mov r8=r31 | ||
1367 | ;; | ||
1368 | srlz.d | ||
1369 | br.ret.sptk rp | ||
1370 | |||
1371 | END(ia64_call_vsa) | ||
1372 | |||
1373 | #define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100) | ||
1374 | |||
1375 | GLOBAL_ENTRY(vmm_reset_entry) | ||
1376 | //set up ipsr, iip, vpd.vpsr, dcr | ||
1377 | // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1 | ||
1378 | // For DCR: all bits 0 | ||
1379 | adds r14=-VMM_PT_REGS_SIZE, r12 | ||
1380 | ;; | ||
1381 | movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1 | ||
1382 | movl r10=0x8000000000000000 | ||
1383 | adds r16=PT(CR_IIP), r14 | ||
1384 | adds r20=PT(R1), r14 | ||
1385 | ;; | ||
1386 | rsm psr.ic | psr.i | ||
1387 | ;; | ||
1388 | srlz.i | ||
1389 | ;; | ||
1390 | bsw.0 | ||
1391 | ;; | ||
1392 | mov r21 =r13 | ||
1393 | ;; | ||
1394 | bsw.1 | ||
1395 | ;; | ||
1396 | mov ar.rsc = 0 | ||
1397 | ;; | ||
1398 | flushrs | ||
1399 | ;; | ||
1400 | mov ar.bspstore = 0 | ||
1401 | // clear BSPSTORE | ||
1402 | ;; | ||
1403 | mov cr.ipsr=r6 | ||
1404 | mov cr.ifs=r10 | ||
1405 | ld8 r4 = [r16] // Set init iip for first run. | ||
1406 | ld8 r1 = [r20] | ||
1407 | ;; | ||
1408 | mov cr.iip=r4 | ||
1409 | ;; | ||
1410 | adds r16=VMM_VPD_BASE_OFFSET,r13 | ||
1411 | adds r20=VMM_VCPU_VSA_BASE_OFFSET,r13 | ||
1412 | ;; | ||
1413 | ld8 r18=[r16] | ||
1414 | ld8 r20=[r20] | ||
1415 | ;; | ||
1416 | adds r19=VMM_VPD_VPSR_OFFSET,r18 | ||
1417 | ;; | ||
1418 | ld8 r19=[r19] | ||
1419 | mov r17=r0 | ||
1420 | mov r22=r0 | ||
1421 | mov r23=r0 | ||
1422 | br.cond.sptk ia64_vmm_entry | ||
1423 | br.ret.sptk b0 | ||
1424 | END(vmm_reset_entry) | ||
diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h new file mode 100644 index 000000000000..f6c5617e16af --- /dev/null +++ b/arch/ia64/kvm/vti.h | |||
@@ -0,0 +1,290 @@ | |||
1 | /* | ||
2 | * vti.h: prototype for generial vt related interface | ||
3 | * Copyright (c) 2004, Intel Corporation. | ||
4 | * | ||
5 | * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) | ||
6 | * Fred Yang (fred.yang@intel.com) | ||
7 | * Kun Tian (Kevin Tian) (kevin.tian@intel.com) | ||
8 | * | ||
9 | * Copyright (c) 2007, Intel Corporation. | ||
10 | * Zhang xiantao <xiantao.zhang@intel.com> | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or modify it | ||
13 | * under the terms and conditions of the GNU General Public License, | ||
14 | * version 2, as published by the Free Software Foundation. | ||
15 | * | ||
16 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
19 | * more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License along with | ||
22 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
23 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
24 | */ | ||
25 | #ifndef _KVM_VT_I_H | ||
26 | #define _KVM_VT_I_H | ||
27 | |||
28 | #ifndef __ASSEMBLY__ | ||
29 | #include <asm/page.h> | ||
30 | |||
31 | #include <linux/kvm_host.h> | ||
32 | |||
33 | /* define itr.i and itr.d in ia64_itr function */ | ||
34 | #define ITR 0x01 | ||
35 | #define DTR 0x02 | ||
36 | #define IaDTR 0x03 | ||
37 | |||
38 | #define IA64_TR_VMM 6 /*itr6, dtr6 : maps vmm code, vmbuffer*/ | ||
39 | #define IA64_TR_VM_DATA 7 /*dtr7 : maps current vm data*/ | ||
40 | |||
41 | #define RR6 (6UL<<61) | ||
42 | #define RR7 (7UL<<61) | ||
43 | |||
44 | |||
45 | /* config_options in pal_vp_init_env */ | ||
46 | #define VP_INITIALIZE 1UL | ||
47 | #define VP_FR_PMC 1UL<<1 | ||
48 | #define VP_OPCODE 1UL<<8 | ||
49 | #define VP_CAUSE 1UL<<9 | ||
50 | #define VP_FW_ACC 1UL<<63 | ||
51 | |||
52 | /* init vp env with initializing vm_buffer */ | ||
53 | #define VP_INIT_ENV_INITALIZE (VP_INITIALIZE | VP_FR_PMC |\ | ||
54 | VP_OPCODE | VP_CAUSE | VP_FW_ACC) | ||
55 | /* init vp env without initializing vm_buffer */ | ||
56 | #define VP_INIT_ENV VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC | ||
57 | |||
58 | #define PAL_VP_CREATE 265 | ||
59 | /* Stacked Virt. Initializes a new VPD for the operation of | ||
60 | * a new virtual processor in the virtual environment. | ||
61 | */ | ||
62 | #define PAL_VP_ENV_INFO 266 | ||
63 | /*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/ | ||
64 | #define PAL_VP_EXIT_ENV 267 | ||
65 | /*Stacked Virt. Allows a logical processor to exit a virtual environment.*/ | ||
66 | #define PAL_VP_INIT_ENV 268 | ||
67 | /*Stacked Virt. Allows a logical processor to enter a virtual environment.*/ | ||
68 | #define PAL_VP_REGISTER 269 | ||
69 | /*Stacked Virt. Register a different host IVT for the virtual processor.*/ | ||
70 | #define PAL_VP_RESUME 270 | ||
71 | /* Renamed from PAL_VP_RESUME */ | ||
72 | #define PAL_VP_RESTORE 270 | ||
73 | /*Stacked Virt. Resumes virtual processor operation on the logical processor.*/ | ||
74 | #define PAL_VP_SUSPEND 271 | ||
75 | /* Renamed from PAL_VP_SUSPEND */ | ||
76 | #define PAL_VP_SAVE 271 | ||
77 | /* Stacked Virt. Suspends operation for the specified virtual processor on | ||
78 | * the logical processor. | ||
79 | */ | ||
80 | #define PAL_VP_TERMINATE 272 | ||
81 | /* Stacked Virt. Terminates operation for the specified virtual processor.*/ | ||
82 | |||
83 | union vac { | ||
84 | unsigned long value; | ||
85 | struct { | ||
86 | int a_int:1; | ||
87 | int a_from_int_cr:1; | ||
88 | int a_to_int_cr:1; | ||
89 | int a_from_psr:1; | ||
90 | int a_from_cpuid:1; | ||
91 | int a_cover:1; | ||
92 | int a_bsw:1; | ||
93 | long reserved:57; | ||
94 | }; | ||
95 | }; | ||
96 | |||
97 | union vdc { | ||
98 | unsigned long value; | ||
99 | struct { | ||
100 | int d_vmsw:1; | ||
101 | int d_extint:1; | ||
102 | int d_ibr_dbr:1; | ||
103 | int d_pmc:1; | ||
104 | int d_to_pmd:1; | ||
105 | int d_itm:1; | ||
106 | long reserved:58; | ||
107 | }; | ||
108 | }; | ||
109 | |||
110 | struct vpd { | ||
111 | union vac vac; | ||
112 | union vdc vdc; | ||
113 | unsigned long virt_env_vaddr; | ||
114 | unsigned long reserved1[29]; | ||
115 | unsigned long vhpi; | ||
116 | unsigned long reserved2[95]; | ||
117 | unsigned long vgr[16]; | ||
118 | unsigned long vbgr[16]; | ||
119 | unsigned long vnat; | ||
120 | unsigned long vbnat; | ||
121 | unsigned long vcpuid[5]; | ||
122 | unsigned long reserved3[11]; | ||
123 | unsigned long vpsr; | ||
124 | unsigned long vpr; | ||
125 | unsigned long reserved4[76]; | ||
126 | union { | ||
127 | unsigned long vcr[128]; | ||
128 | struct { | ||
129 | unsigned long dcr; | ||
130 | unsigned long itm; | ||
131 | unsigned long iva; | ||
132 | unsigned long rsv1[5]; | ||
133 | unsigned long pta; | ||
134 | unsigned long rsv2[7]; | ||
135 | unsigned long ipsr; | ||
136 | unsigned long isr; | ||
137 | unsigned long rsv3; | ||
138 | unsigned long iip; | ||
139 | unsigned long ifa; | ||
140 | unsigned long itir; | ||
141 | unsigned long iipa; | ||
142 | unsigned long ifs; | ||
143 | unsigned long iim; | ||
144 | unsigned long iha; | ||
145 | unsigned long rsv4[38]; | ||
146 | unsigned long lid; | ||
147 | unsigned long ivr; | ||
148 | unsigned long tpr; | ||
149 | unsigned long eoi; | ||
150 | unsigned long irr[4]; | ||
151 | unsigned long itv; | ||
152 | unsigned long pmv; | ||
153 | unsigned long cmcv; | ||
154 | unsigned long rsv5[5]; | ||
155 | unsigned long lrr0; | ||
156 | unsigned long lrr1; | ||
157 | unsigned long rsv6[46]; | ||
158 | }; | ||
159 | }; | ||
160 | unsigned long reserved5[128]; | ||
161 | unsigned long reserved6[3456]; | ||
162 | unsigned long vmm_avail[128]; | ||
163 | unsigned long reserved7[4096]; | ||
164 | }; | ||
165 | |||
166 | #define PAL_PROC_VM_BIT (1UL << 40) | ||
167 | #define PAL_PROC_VMSW_BIT (1UL << 54) | ||
168 | |||
169 | static inline s64 ia64_pal_vp_env_info(u64 *buffer_size, | ||
170 | u64 *vp_env_info) | ||
171 | { | ||
172 | struct ia64_pal_retval iprv; | ||
173 | PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0); | ||
174 | *buffer_size = iprv.v0; | ||
175 | *vp_env_info = iprv.v1; | ||
176 | return iprv.status; | ||
177 | } | ||
178 | |||
179 | static inline s64 ia64_pal_vp_exit_env(u64 iva) | ||
180 | { | ||
181 | struct ia64_pal_retval iprv; | ||
182 | |||
183 | PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0); | ||
184 | return iprv.status; | ||
185 | } | ||
186 | |||
187 | static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr, | ||
188 | u64 vbase_addr, u64 *vsa_base) | ||
189 | { | ||
190 | struct ia64_pal_retval iprv; | ||
191 | |||
192 | PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr, | ||
193 | vbase_addr); | ||
194 | *vsa_base = iprv.v0; | ||
195 | |||
196 | return iprv.status; | ||
197 | } | ||
198 | |||
199 | static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector) | ||
200 | { | ||
201 | struct ia64_pal_retval iprv; | ||
202 | |||
203 | PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0); | ||
204 | |||
205 | return iprv.status; | ||
206 | } | ||
207 | |||
208 | static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector) | ||
209 | { | ||
210 | struct ia64_pal_retval iprv; | ||
211 | |||
212 | PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0); | ||
213 | |||
214 | return iprv.status; | ||
215 | } | ||
216 | |||
217 | #endif | ||
218 | |||
219 | /*VPD field offset*/ | ||
220 | #define VPD_VAC_START_OFFSET 0 | ||
221 | #define VPD_VDC_START_OFFSET 8 | ||
222 | #define VPD_VHPI_START_OFFSET 256 | ||
223 | #define VPD_VGR_START_OFFSET 1024 | ||
224 | #define VPD_VBGR_START_OFFSET 1152 | ||
225 | #define VPD_VNAT_START_OFFSET 1280 | ||
226 | #define VPD_VBNAT_START_OFFSET 1288 | ||
227 | #define VPD_VCPUID_START_OFFSET 1296 | ||
228 | #define VPD_VPSR_START_OFFSET 1424 | ||
229 | #define VPD_VPR_START_OFFSET 1432 | ||
230 | #define VPD_VRSE_CFLE_START_OFFSET 1440 | ||
231 | #define VPD_VCR_START_OFFSET 2048 | ||
232 | #define VPD_VTPR_START_OFFSET 2576 | ||
233 | #define VPD_VRR_START_OFFSET 3072 | ||
234 | #define VPD_VMM_VAIL_START_OFFSET 31744 | ||
235 | |||
236 | /*Virtualization faults*/ | ||
237 | |||
238 | #define EVENT_MOV_TO_AR 1 | ||
239 | #define EVENT_MOV_TO_AR_IMM 2 | ||
240 | #define EVENT_MOV_FROM_AR 3 | ||
241 | #define EVENT_MOV_TO_CR 4 | ||
242 | #define EVENT_MOV_FROM_CR 5 | ||
243 | #define EVENT_MOV_TO_PSR 6 | ||
244 | #define EVENT_MOV_FROM_PSR 7 | ||
245 | #define EVENT_ITC_D 8 | ||
246 | #define EVENT_ITC_I 9 | ||
247 | #define EVENT_MOV_TO_RR 10 | ||
248 | #define EVENT_MOV_TO_DBR 11 | ||
249 | #define EVENT_MOV_TO_IBR 12 | ||
250 | #define EVENT_MOV_TO_PKR 13 | ||
251 | #define EVENT_MOV_TO_PMC 14 | ||
252 | #define EVENT_MOV_TO_PMD 15 | ||
253 | #define EVENT_ITR_D 16 | ||
254 | #define EVENT_ITR_I 17 | ||
255 | #define EVENT_MOV_FROM_RR 18 | ||
256 | #define EVENT_MOV_FROM_DBR 19 | ||
257 | #define EVENT_MOV_FROM_IBR 20 | ||
258 | #define EVENT_MOV_FROM_PKR 21 | ||
259 | #define EVENT_MOV_FROM_PMC 22 | ||
260 | #define EVENT_MOV_FROM_CPUID 23 | ||
261 | #define EVENT_SSM 24 | ||
262 | #define EVENT_RSM 25 | ||
263 | #define EVENT_PTC_L 26 | ||
264 | #define EVENT_PTC_G 27 | ||
265 | #define EVENT_PTC_GA 28 | ||
266 | #define EVENT_PTR_D 29 | ||
267 | #define EVENT_PTR_I 30 | ||
268 | #define EVENT_THASH 31 | ||
269 | #define EVENT_TTAG 32 | ||
270 | #define EVENT_TPA 33 | ||
271 | #define EVENT_TAK 34 | ||
272 | #define EVENT_PTC_E 35 | ||
273 | #define EVENT_COVER 36 | ||
274 | #define EVENT_RFI 37 | ||
275 | #define EVENT_BSW_0 38 | ||
276 | #define EVENT_BSW_1 39 | ||
277 | #define EVENT_VMSW 40 | ||
278 | |||
279 | /**PAL virtual services offsets */ | ||
280 | #define PAL_VPS_RESUME_NORMAL 0x0000 | ||
281 | #define PAL_VPS_RESUME_HANDLER 0x0400 | ||
282 | #define PAL_VPS_SYNC_READ 0x0800 | ||
283 | #define PAL_VPS_SYNC_WRITE 0x0c00 | ||
284 | #define PAL_VPS_SET_PENDING_INTERRUPT 0x1000 | ||
285 | #define PAL_VPS_THASH 0x1400 | ||
286 | #define PAL_VPS_TTAG 0x1800 | ||
287 | #define PAL_VPS_RESTORE 0x1c00 | ||
288 | #define PAL_VPS_SAVE 0x2000 | ||
289 | |||
290 | #endif/* _VT_I_H*/ | ||
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c new file mode 100644 index 000000000000..def4576d22b1 --- /dev/null +++ b/arch/ia64/kvm/vtlb.c | |||
@@ -0,0 +1,636 @@ | |||
1 | /* | ||
2 | * vtlb.c: guest virtual tlb handling module. | ||
3 | * Copyright (c) 2004, Intel Corporation. | ||
4 | * Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com> | ||
5 | * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> | ||
6 | * | ||
7 | * Copyright (c) 2007, Intel Corporation. | ||
8 | * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> | ||
9 | * Xiantao Zhang <xiantao.zhang@intel.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms and conditions of the GNU General Public License, | ||
13 | * version 2, as published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
16 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
17 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
18 | * more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License along with | ||
21 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
22 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #include "vcpu.h" | ||
27 | |||
28 | #include <linux/rwsem.h> | ||
29 | |||
30 | #include <asm/tlb.h> | ||
31 | |||
32 | /* | ||
33 | * Check to see if the address rid:va is translated by the TLB | ||
34 | */ | ||
35 | |||
36 | static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va) | ||
37 | { | ||
38 | return ((trp->p) && (trp->rid == rid) | ||
39 | && ((va-trp->vadr) < PSIZE(trp->ps))); | ||
40 | } | ||
41 | |||
42 | /* | ||
43 | * Only for GUEST TR format. | ||
44 | */ | ||
45 | static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva) | ||
46 | { | ||
47 | u64 sa1, ea1; | ||
48 | |||
49 | if (!trp->p || trp->rid != rid) | ||
50 | return 0; | ||
51 | |||
52 | sa1 = trp->vadr; | ||
53 | ea1 = sa1 + PSIZE(trp->ps) - 1; | ||
54 | eva -= 1; | ||
55 | if ((sva > ea1) || (sa1 > eva)) | ||
56 | return 0; | ||
57 | else | ||
58 | return 1; | ||
59 | |||
60 | } | ||
61 | |||
62 | void machine_tlb_purge(u64 va, u64 ps) | ||
63 | { | ||
64 | ia64_ptcl(va, ps << 2); | ||
65 | } | ||
66 | |||
67 | void local_flush_tlb_all(void) | ||
68 | { | ||
69 | int i, j; | ||
70 | unsigned long flags, count0, count1; | ||
71 | unsigned long stride0, stride1, addr; | ||
72 | |||
73 | addr = current_vcpu->arch.ptce_base; | ||
74 | count0 = current_vcpu->arch.ptce_count[0]; | ||
75 | count1 = current_vcpu->arch.ptce_count[1]; | ||
76 | stride0 = current_vcpu->arch.ptce_stride[0]; | ||
77 | stride1 = current_vcpu->arch.ptce_stride[1]; | ||
78 | |||
79 | local_irq_save(flags); | ||
80 | for (i = 0; i < count0; ++i) { | ||
81 | for (j = 0; j < count1; ++j) { | ||
82 | ia64_ptce(addr); | ||
83 | addr += stride1; | ||
84 | } | ||
85 | addr += stride0; | ||
86 | } | ||
87 | local_irq_restore(flags); | ||
88 | ia64_srlz_i(); /* srlz.i implies srlz.d */ | ||
89 | } | ||
90 | |||
91 | int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref) | ||
92 | { | ||
93 | union ia64_rr vrr; | ||
94 | union ia64_pta vpta; | ||
95 | struct ia64_psr vpsr; | ||
96 | |||
97 | vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); | ||
98 | vrr.val = vcpu_get_rr(vcpu, vadr); | ||
99 | vpta.val = vcpu_get_pta(vcpu); | ||
100 | |||
101 | if (vrr.ve & vpta.ve) { | ||
102 | switch (ref) { | ||
103 | case DATA_REF: | ||
104 | case NA_REF: | ||
105 | return vpsr.dt; | ||
106 | case INST_REF: | ||
107 | return vpsr.dt && vpsr.it && vpsr.ic; | ||
108 | case RSE_REF: | ||
109 | return vpsr.dt && vpsr.rt; | ||
110 | |||
111 | } | ||
112 | } | ||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag) | ||
117 | { | ||
118 | u64 index, pfn, rid, pfn_bits; | ||
119 | |||
120 | pfn_bits = vpta.size - 5 - 8; | ||
121 | pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr); | ||
122 | rid = _REGION_ID(vrr); | ||
123 | index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1)); | ||
124 | *tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16); | ||
125 | |||
126 | return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) + | ||
127 | (index << 5)); | ||
128 | } | ||
129 | |||
130 | struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type) | ||
131 | { | ||
132 | |||
133 | struct thash_data *trp; | ||
134 | int i; | ||
135 | u64 rid; | ||
136 | |||
137 | rid = vcpu_get_rr(vcpu, va); | ||
138 | rid = rid & RR_RID_MASK;; | ||
139 | if (type == D_TLB) { | ||
140 | if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) { | ||
141 | for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0; | ||
142 | i < NDTRS; i++, trp++) { | ||
143 | if (__is_tr_translated(trp, rid, va)) | ||
144 | return trp; | ||
145 | } | ||
146 | } | ||
147 | } else { | ||
148 | if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) { | ||
149 | for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0; | ||
150 | i < NITRS; i++, trp++) { | ||
151 | if (__is_tr_translated(trp, rid, va)) | ||
152 | return trp; | ||
153 | } | ||
154 | } | ||
155 | } | ||
156 | |||
157 | return NULL; | ||
158 | } | ||
159 | |||
160 | static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte) | ||
161 | { | ||
162 | union ia64_rr rr; | ||
163 | struct thash_data *head; | ||
164 | unsigned long ps, gpaddr; | ||
165 | |||
166 | ps = itir_ps(itir); | ||
167 | |||
168 | gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) | | ||
169 | (ifa & ((1UL << ps) - 1)); | ||
170 | |||
171 | rr.val = ia64_get_rr(ifa); | ||
172 | head = (struct thash_data *)ia64_thash(ifa); | ||
173 | head->etag = INVALID_TI_TAG; | ||
174 | ia64_mf(); | ||
175 | head->page_flags = pte & ~PAGE_FLAGS_RV_MASK; | ||
176 | head->itir = rr.ps << 2; | ||
177 | head->etag = ia64_ttag(ifa); | ||
178 | head->gpaddr = gpaddr; | ||
179 | } | ||
180 | |||
181 | void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps) | ||
182 | { | ||
183 | u64 i, dirty_pages = 1; | ||
184 | u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT; | ||
185 | spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa); | ||
186 | void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE) | ||
187 | + KVM_MEM_DIRTY_LOG_OFS; | ||
188 | dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT; | ||
189 | |||
190 | vmm_spin_lock(lock); | ||
191 | for (i = 0; i < dirty_pages; i++) { | ||
192 | /* avoid RMW */ | ||
193 | if (!test_bit(base_gfn + i, dirty_bitmap)) | ||
194 | set_bit(base_gfn + i , dirty_bitmap); | ||
195 | } | ||
196 | vmm_spin_unlock(lock); | ||
197 | } | ||
198 | |||
199 | void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type) | ||
200 | { | ||
201 | u64 phy_pte, psr; | ||
202 | union ia64_rr mrr; | ||
203 | |||
204 | mrr.val = ia64_get_rr(va); | ||
205 | phy_pte = translate_phy_pte(&pte, itir, va); | ||
206 | |||
207 | if (itir_ps(itir) >= mrr.ps) { | ||
208 | vhpt_insert(phy_pte, itir, va, pte); | ||
209 | } else { | ||
210 | phy_pte &= ~PAGE_FLAGS_RV_MASK; | ||
211 | psr = ia64_clear_ic(); | ||
212 | ia64_itc(type, va, phy_pte, itir_ps(itir)); | ||
213 | ia64_set_psr(psr); | ||
214 | } | ||
215 | |||
216 | if (!(pte&VTLB_PTE_IO)) | ||
217 | mark_pages_dirty(v, pte, itir_ps(itir)); | ||
218 | } | ||
219 | |||
220 | /* | ||
221 | * vhpt lookup | ||
222 | */ | ||
223 | struct thash_data *vhpt_lookup(u64 va) | ||
224 | { | ||
225 | struct thash_data *head; | ||
226 | u64 tag; | ||
227 | |||
228 | head = (struct thash_data *)ia64_thash(va); | ||
229 | tag = ia64_ttag(va); | ||
230 | if (head->etag == tag) | ||
231 | return head; | ||
232 | return NULL; | ||
233 | } | ||
234 | |||
235 | u64 guest_vhpt_lookup(u64 iha, u64 *pte) | ||
236 | { | ||
237 | u64 ret; | ||
238 | struct thash_data *data; | ||
239 | |||
240 | data = __vtr_lookup(current_vcpu, iha, D_TLB); | ||
241 | if (data != NULL) | ||
242 | thash_vhpt_insert(current_vcpu, data->page_flags, | ||
243 | data->itir, iha, D_TLB); | ||
244 | |||
245 | asm volatile ("rsm psr.ic|psr.i;;" | ||
246 | "srlz.d;;" | ||
247 | "ld8.s r9=[%1];;" | ||
248 | "tnat.nz p6,p7=r9;;" | ||
249 | "(p6) mov %0=1;" | ||
250 | "(p6) mov r9=r0;" | ||
251 | "(p7) extr.u r9=r9,0,53;;" | ||
252 | "(p7) mov %0=r0;" | ||
253 | "(p7) st8 [%2]=r9;;" | ||
254 | "ssm psr.ic;;" | ||
255 | "srlz.d;;" | ||
256 | /* "ssm psr.i;;" Once interrupts in vmm open, need fix*/ | ||
257 | : "=r"(ret) : "r"(iha), "r"(pte):"memory"); | ||
258 | |||
259 | return ret; | ||
260 | } | ||
261 | |||
262 | /* | ||
263 | * purge software guest tlb | ||
264 | */ | ||
265 | |||
266 | static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps) | ||
267 | { | ||
268 | struct thash_data *cur; | ||
269 | u64 start, curadr, size, psbits, tag, rr_ps, num; | ||
270 | union ia64_rr vrr; | ||
271 | struct thash_cb *hcb = &v->arch.vtlb; | ||
272 | |||
273 | vrr.val = vcpu_get_rr(v, va); | ||
274 | psbits = VMX(v, psbits[(va >> 61)]); | ||
275 | start = va & ~((1UL << ps) - 1); | ||
276 | while (psbits) { | ||
277 | curadr = start; | ||
278 | rr_ps = __ffs(psbits); | ||
279 | psbits &= ~(1UL << rr_ps); | ||
280 | num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps)); | ||
281 | size = PSIZE(rr_ps); | ||
282 | vrr.ps = rr_ps; | ||
283 | while (num) { | ||
284 | cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag); | ||
285 | if (cur->etag == tag && cur->ps == rr_ps) | ||
286 | cur->etag = INVALID_TI_TAG; | ||
287 | curadr += size; | ||
288 | num--; | ||
289 | } | ||
290 | } | ||
291 | } | ||
292 | |||
293 | |||
294 | /* | ||
295 | * purge VHPT and machine TLB | ||
296 | */ | ||
297 | static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps) | ||
298 | { | ||
299 | struct thash_data *cur; | ||
300 | u64 start, size, tag, num; | ||
301 | union ia64_rr rr; | ||
302 | |||
303 | start = va & ~((1UL << ps) - 1); | ||
304 | rr.val = ia64_get_rr(va); | ||
305 | size = PSIZE(rr.ps); | ||
306 | num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps)); | ||
307 | while (num) { | ||
308 | cur = (struct thash_data *)ia64_thash(start); | ||
309 | tag = ia64_ttag(start); | ||
310 | if (cur->etag == tag) | ||
311 | cur->etag = INVALID_TI_TAG; | ||
312 | start += size; | ||
313 | num--; | ||
314 | } | ||
315 | machine_tlb_purge(va, ps); | ||
316 | } | ||
317 | |||
318 | /* | ||
319 | * Insert an entry into hash TLB or VHPT. | ||
320 | * NOTES: | ||
321 | * 1: When inserting VHPT to thash, "va" is a must covered | ||
322 | * address by the inserted machine VHPT entry. | ||
323 | * 2: The format of entry is always in TLB. | ||
324 | * 3: The caller need to make sure the new entry will not overlap | ||
325 | * with any existed entry. | ||
326 | */ | ||
327 | void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va) | ||
328 | { | ||
329 | struct thash_data *head; | ||
330 | union ia64_rr vrr; | ||
331 | u64 tag; | ||
332 | struct thash_cb *hcb = &v->arch.vtlb; | ||
333 | |||
334 | vrr.val = vcpu_get_rr(v, va); | ||
335 | vrr.ps = itir_ps(itir); | ||
336 | VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps); | ||
337 | head = vsa_thash(hcb->pta, va, vrr.val, &tag); | ||
338 | head->page_flags = pte; | ||
339 | head->itir = itir; | ||
340 | head->etag = tag; | ||
341 | } | ||
342 | |||
343 | int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type) | ||
344 | { | ||
345 | struct thash_data *trp; | ||
346 | int i; | ||
347 | u64 end, rid; | ||
348 | |||
349 | rid = vcpu_get_rr(vcpu, va); | ||
350 | rid = rid & RR_RID_MASK; | ||
351 | end = va + PSIZE(ps); | ||
352 | if (type == D_TLB) { | ||
353 | if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) { | ||
354 | for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0; | ||
355 | i < NDTRS; i++, trp++) { | ||
356 | if (__is_tr_overlap(trp, rid, va, end)) | ||
357 | return i; | ||
358 | } | ||
359 | } | ||
360 | } else { | ||
361 | if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) { | ||
362 | for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0; | ||
363 | i < NITRS; i++, trp++) { | ||
364 | if (__is_tr_overlap(trp, rid, va, end)) | ||
365 | return i; | ||
366 | } | ||
367 | } | ||
368 | } | ||
369 | return -1; | ||
370 | } | ||
371 | |||
372 | /* | ||
373 | * Purge entries in VTLB and VHPT | ||
374 | */ | ||
375 | void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps) | ||
376 | { | ||
377 | if (vcpu_quick_region_check(v->arch.tc_regions, va)) | ||
378 | vtlb_purge(v, va, ps); | ||
379 | vhpt_purge(v, va, ps); | ||
380 | } | ||
381 | |||
382 | void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps) | ||
383 | { | ||
384 | u64 old_va = va; | ||
385 | va = REGION_OFFSET(va); | ||
386 | if (vcpu_quick_region_check(v->arch.tc_regions, old_va)) | ||
387 | vtlb_purge(v, va, ps); | ||
388 | vhpt_purge(v, va, ps); | ||
389 | } | ||
390 | |||
391 | u64 translate_phy_pte(u64 *pte, u64 itir, u64 va) | ||
392 | { | ||
393 | u64 ps, ps_mask, paddr, maddr; | ||
394 | union pte_flags phy_pte; | ||
395 | |||
396 | ps = itir_ps(itir); | ||
397 | ps_mask = ~((1UL << ps) - 1); | ||
398 | phy_pte.val = *pte; | ||
399 | paddr = *pte; | ||
400 | paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask); | ||
401 | maddr = kvm_lookup_mpa(paddr >> PAGE_SHIFT); | ||
402 | if (maddr & GPFN_IO_MASK) { | ||
403 | *pte |= VTLB_PTE_IO; | ||
404 | return -1; | ||
405 | } | ||
406 | maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) | | ||
407 | (paddr & ~PAGE_MASK); | ||
408 | phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT; | ||
409 | return phy_pte.val; | ||
410 | } | ||
411 | |||
412 | /* | ||
413 | * Purge overlap TCs and then insert the new entry to emulate itc ops. | ||
414 | * Notes: Only TC entry can purge and insert. | ||
415 | * 1 indicates this is MMIO | ||
416 | */ | ||
417 | int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir, | ||
418 | u64 ifa, int type) | ||
419 | { | ||
420 | u64 ps; | ||
421 | u64 phy_pte; | ||
422 | union ia64_rr vrr, mrr; | ||
423 | int ret = 0; | ||
424 | |||
425 | ps = itir_ps(itir); | ||
426 | vrr.val = vcpu_get_rr(v, ifa); | ||
427 | mrr.val = ia64_get_rr(ifa); | ||
428 | |||
429 | phy_pte = translate_phy_pte(&pte, itir, ifa); | ||
430 | |||
431 | /* Ensure WB attribute if pte is related to a normal mem page, | ||
432 | * which is required by vga acceleration since qemu maps shared | ||
433 | * vram buffer with WB. | ||
434 | */ | ||
435 | if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT)) { | ||
436 | pte &= ~_PAGE_MA_MASK; | ||
437 | phy_pte &= ~_PAGE_MA_MASK; | ||
438 | } | ||
439 | |||
440 | if (pte & VTLB_PTE_IO) | ||
441 | ret = 1; | ||
442 | |||
443 | vtlb_purge(v, ifa, ps); | ||
444 | vhpt_purge(v, ifa, ps); | ||
445 | |||
446 | if (ps == mrr.ps) { | ||
447 | if (!(pte&VTLB_PTE_IO)) { | ||
448 | vhpt_insert(phy_pte, itir, ifa, pte); | ||
449 | } else { | ||
450 | vtlb_insert(v, pte, itir, ifa); | ||
451 | vcpu_quick_region_set(VMX(v, tc_regions), ifa); | ||
452 | } | ||
453 | } else if (ps > mrr.ps) { | ||
454 | vtlb_insert(v, pte, itir, ifa); | ||
455 | vcpu_quick_region_set(VMX(v, tc_regions), ifa); | ||
456 | if (!(pte&VTLB_PTE_IO)) | ||
457 | vhpt_insert(phy_pte, itir, ifa, pte); | ||
458 | } else { | ||
459 | u64 psr; | ||
460 | phy_pte &= ~PAGE_FLAGS_RV_MASK; | ||
461 | psr = ia64_clear_ic(); | ||
462 | ia64_itc(type, ifa, phy_pte, ps); | ||
463 | ia64_set_psr(psr); | ||
464 | } | ||
465 | if (!(pte&VTLB_PTE_IO)) | ||
466 | mark_pages_dirty(v, pte, ps); | ||
467 | |||
468 | return ret; | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * Purge all TCs or VHPT entries including those in Hash table. | ||
473 | * | ||
474 | */ | ||
475 | |||
476 | void thash_purge_all(struct kvm_vcpu *v) | ||
477 | { | ||
478 | int i; | ||
479 | struct thash_data *head; | ||
480 | struct thash_cb *vtlb, *vhpt; | ||
481 | vtlb = &v->arch.vtlb; | ||
482 | vhpt = &v->arch.vhpt; | ||
483 | |||
484 | for (i = 0; i < 8; i++) | ||
485 | VMX(v, psbits[i]) = 0; | ||
486 | |||
487 | head = vtlb->hash; | ||
488 | for (i = 0; i < vtlb->num; i++) { | ||
489 | head->page_flags = 0; | ||
490 | head->etag = INVALID_TI_TAG; | ||
491 | head->itir = 0; | ||
492 | head->next = 0; | ||
493 | head++; | ||
494 | }; | ||
495 | |||
496 | head = vhpt->hash; | ||
497 | for (i = 0; i < vhpt->num; i++) { | ||
498 | head->page_flags = 0; | ||
499 | head->etag = INVALID_TI_TAG; | ||
500 | head->itir = 0; | ||
501 | head->next = 0; | ||
502 | head++; | ||
503 | }; | ||
504 | |||
505 | local_flush_tlb_all(); | ||
506 | } | ||
507 | |||
508 | |||
509 | /* | ||
510 | * Lookup the hash table and its collision chain to find an entry | ||
511 | * covering this address rid:va or the entry. | ||
512 | * | ||
513 | * INPUT: | ||
514 | * in: TLB format for both VHPT & TLB. | ||
515 | */ | ||
516 | |||
517 | struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data) | ||
518 | { | ||
519 | struct thash_data *cch; | ||
520 | u64 psbits, ps, tag; | ||
521 | union ia64_rr vrr; | ||
522 | |||
523 | struct thash_cb *hcb = &v->arch.vtlb; | ||
524 | |||
525 | cch = __vtr_lookup(v, va, is_data);; | ||
526 | if (cch) | ||
527 | return cch; | ||
528 | |||
529 | if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0) | ||
530 | return NULL; | ||
531 | |||
532 | psbits = VMX(v, psbits[(va >> 61)]); | ||
533 | vrr.val = vcpu_get_rr(v, va); | ||
534 | while (psbits) { | ||
535 | ps = __ffs(psbits); | ||
536 | psbits &= ~(1UL << ps); | ||
537 | vrr.ps = ps; | ||
538 | cch = vsa_thash(hcb->pta, va, vrr.val, &tag); | ||
539 | if (cch->etag == tag && cch->ps == ps) | ||
540 | return cch; | ||
541 | } | ||
542 | |||
543 | return NULL; | ||
544 | } | ||
545 | |||
546 | |||
547 | /* | ||
548 | * Initialize internal control data before service. | ||
549 | */ | ||
550 | void thash_init(struct thash_cb *hcb, u64 sz) | ||
551 | { | ||
552 | int i; | ||
553 | struct thash_data *head; | ||
554 | |||
555 | hcb->pta.val = (unsigned long)hcb->hash; | ||
556 | hcb->pta.vf = 1; | ||
557 | hcb->pta.ve = 1; | ||
558 | hcb->pta.size = sz; | ||
559 | head = hcb->hash; | ||
560 | for (i = 0; i < hcb->num; i++) { | ||
561 | head->page_flags = 0; | ||
562 | head->itir = 0; | ||
563 | head->etag = INVALID_TI_TAG; | ||
564 | head->next = 0; | ||
565 | head++; | ||
566 | } | ||
567 | } | ||
568 | |||
569 | u64 kvm_lookup_mpa(u64 gpfn) | ||
570 | { | ||
571 | u64 *base = (u64 *) KVM_P2M_BASE; | ||
572 | return *(base + gpfn); | ||
573 | } | ||
574 | |||
575 | u64 kvm_gpa_to_mpa(u64 gpa) | ||
576 | { | ||
577 | u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT); | ||
578 | return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK); | ||
579 | } | ||
580 | |||
581 | |||
582 | /* | ||
583 | * Fetch guest bundle code. | ||
584 | * INPUT: | ||
585 | * gip: guest ip | ||
586 | * pbundle: used to return fetched bundle. | ||
587 | */ | ||
588 | int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle) | ||
589 | { | ||
590 | u64 gpip = 0; /* guest physical IP*/ | ||
591 | u64 *vpa; | ||
592 | struct thash_data *tlb; | ||
593 | u64 maddr; | ||
594 | |||
595 | if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) { | ||
596 | /* I-side physical mode */ | ||
597 | gpip = gip; | ||
598 | } else { | ||
599 | tlb = vtlb_lookup(vcpu, gip, I_TLB); | ||
600 | if (tlb) | ||
601 | gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) | | ||
602 | (gip & (PSIZE(tlb->ps) - 1)); | ||
603 | } | ||
604 | if (gpip) { | ||
605 | maddr = kvm_gpa_to_mpa(gpip); | ||
606 | } else { | ||
607 | tlb = vhpt_lookup(gip); | ||
608 | if (tlb == NULL) { | ||
609 | ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2); | ||
610 | return IA64_FAULT; | ||
611 | } | ||
612 | maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) | ||
613 | | (gip & (PSIZE(tlb->ps) - 1)); | ||
614 | } | ||
615 | vpa = (u64 *)__kvm_va(maddr); | ||
616 | |||
617 | pbundle->i64[0] = *vpa++; | ||
618 | pbundle->i64[1] = *vpa; | ||
619 | |||
620 | return IA64_NO_FAULT; | ||
621 | } | ||
622 | |||
623 | |||
624 | void kvm_init_vhpt(struct kvm_vcpu *v) | ||
625 | { | ||
626 | v->arch.vhpt.num = VHPT_NUM_ENTRIES; | ||
627 | thash_init(&v->arch.vhpt, VHPT_SHIFT); | ||
628 | ia64_set_pta(v->arch.vhpt.pta.val); | ||
629 | /*Enable VHPT here?*/ | ||
630 | } | ||
631 | |||
632 | void kvm_init_vtlb(struct kvm_vcpu *v) | ||
633 | { | ||
634 | v->arch.vtlb.num = VTLB_NUM_ENTRIES; | ||
635 | thash_init(&v->arch.vtlb, VTLB_SHIFT); | ||
636 | } | ||
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 20f45a8b87e3..4e40c122bf26 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig | |||
@@ -803,3 +803,4 @@ config PPC_CLOCK | |||
803 | config PPC_LIB_RHEAP | 803 | config PPC_LIB_RHEAP |
804 | bool | 804 | bool |
805 | 805 | ||
806 | source "arch/powerpc/kvm/Kconfig" | ||
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index a86d8d853214..807a2dce6263 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug | |||
@@ -151,6 +151,9 @@ config BOOTX_TEXT | |||
151 | 151 | ||
152 | config PPC_EARLY_DEBUG | 152 | config PPC_EARLY_DEBUG |
153 | bool "Early debugging (dangerous)" | 153 | bool "Early debugging (dangerous)" |
154 | # PPC_EARLY_DEBUG on 440 leaves AS=1 mappings above the TLB high water | ||
155 | # mark, which doesn't work with current 440 KVM. | ||
156 | depends on !KVM | ||
154 | help | 157 | help |
155 | Say Y to enable some early debugging facilities that may be available | 158 | Say Y to enable some early debugging facilities that may be available |
156 | for your processor/board combination. Those facilities are hacks | 159 | for your processor/board combination. Those facilities are hacks |
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index e2ec4a91ccef..9dcdc036cdf7 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile | |||
@@ -145,6 +145,7 @@ core-y += arch/powerpc/kernel/ \ | |||
145 | arch/powerpc/platforms/ | 145 | arch/powerpc/platforms/ |
146 | core-$(CONFIG_MATH_EMULATION) += arch/powerpc/math-emu/ | 146 | core-$(CONFIG_MATH_EMULATION) += arch/powerpc/math-emu/ |
147 | core-$(CONFIG_XMON) += arch/powerpc/xmon/ | 147 | core-$(CONFIG_XMON) += arch/powerpc/xmon/ |
148 | core-$(CONFIG_KVM) += arch/powerpc/kvm/ | ||
148 | 149 | ||
149 | drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ | 150 | drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ |
150 | 151 | ||
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index adf1d09d726f..62134845af08 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -23,6 +23,9 @@ | |||
23 | #include <linux/mm.h> | 23 | #include <linux/mm.h> |
24 | #include <linux/suspend.h> | 24 | #include <linux/suspend.h> |
25 | #include <linux/hrtimer.h> | 25 | #include <linux/hrtimer.h> |
26 | #ifdef CONFIG_KVM | ||
27 | #include <linux/kvm_host.h> | ||
28 | #endif | ||
26 | #ifdef CONFIG_PPC64 | 29 | #ifdef CONFIG_PPC64 |
27 | #include <linux/time.h> | 30 | #include <linux/time.h> |
28 | #include <linux/hardirq.h> | 31 | #include <linux/hardirq.h> |
@@ -324,5 +327,30 @@ int main(void) | |||
324 | 327 | ||
325 | DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); | 328 | DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); |
326 | 329 | ||
330 | #ifdef CONFIG_KVM | ||
331 | DEFINE(TLBE_BYTES, sizeof(struct tlbe)); | ||
332 | |||
333 | DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); | ||
334 | DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); | ||
335 | DEFINE(VCPU_HOST_TLB, offsetof(struct kvm_vcpu, arch.host_tlb)); | ||
336 | DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb)); | ||
337 | DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); | ||
338 | DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); | ||
339 | DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); | ||
340 | DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); | ||
341 | DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); | ||
342 | DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); | ||
343 | DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); | ||
344 | DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); | ||
345 | DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); | ||
346 | DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); | ||
347 | DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); | ||
348 | DEFINE(VCPU_PID, offsetof(struct kvm_vcpu, arch.pid)); | ||
349 | |||
350 | DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); | ||
351 | DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); | ||
352 | DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); | ||
353 | #endif | ||
354 | |||
327 | return 0; | 355 | return 0; |
328 | } | 356 | } |
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c new file mode 100644 index 000000000000..f5d7a5eab96e --- /dev/null +++ b/arch/powerpc/kvm/44x_tlb.c | |||
@@ -0,0 +1,224 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License, version 2, as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with this program; if not, write to the Free Software | ||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
14 | * | ||
15 | * Copyright IBM Corp. 2007 | ||
16 | * | ||
17 | * Authors: Hollis Blanchard <hollisb@us.ibm.com> | ||
18 | */ | ||
19 | |||
20 | #include <linux/types.h> | ||
21 | #include <linux/string.h> | ||
22 | #include <linux/kvm_host.h> | ||
23 | #include <linux/highmem.h> | ||
24 | #include <asm/mmu-44x.h> | ||
25 | #include <asm/kvm_ppc.h> | ||
26 | |||
27 | #include "44x_tlb.h" | ||
28 | |||
29 | #define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW) | ||
30 | #define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW) | ||
31 | |||
32 | static unsigned int kvmppc_tlb_44x_pos; | ||
33 | |||
34 | static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) | ||
35 | { | ||
36 | /* Mask off reserved bits. */ | ||
37 | attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK; | ||
38 | |||
39 | if (!usermode) { | ||
40 | /* Guest is in supervisor mode, so we need to translate guest | ||
41 | * supervisor permissions into user permissions. */ | ||
42 | attrib &= ~PPC44x_TLB_USER_PERM_MASK; | ||
43 | attrib |= (attrib & PPC44x_TLB_SUPER_PERM_MASK) << 3; | ||
44 | } | ||
45 | |||
46 | /* Make sure host can always access this memory. */ | ||
47 | attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW; | ||
48 | |||
49 | return attrib; | ||
50 | } | ||
51 | |||
52 | /* Search the guest TLB for a matching entry. */ | ||
53 | int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, | ||
54 | unsigned int as) | ||
55 | { | ||
56 | int i; | ||
57 | |||
58 | /* XXX Replace loop with fancy data structures. */ | ||
59 | for (i = 0; i < PPC44x_TLB_SIZE; i++) { | ||
60 | struct tlbe *tlbe = &vcpu->arch.guest_tlb[i]; | ||
61 | unsigned int tid; | ||
62 | |||
63 | if (eaddr < get_tlb_eaddr(tlbe)) | ||
64 | continue; | ||
65 | |||
66 | if (eaddr > get_tlb_end(tlbe)) | ||
67 | continue; | ||
68 | |||
69 | tid = get_tlb_tid(tlbe); | ||
70 | if (tid && (tid != pid)) | ||
71 | continue; | ||
72 | |||
73 | if (!get_tlb_v(tlbe)) | ||
74 | continue; | ||
75 | |||
76 | if (get_tlb_ts(tlbe) != as) | ||
77 | continue; | ||
78 | |||
79 | return i; | ||
80 | } | ||
81 | |||
82 | return -1; | ||
83 | } | ||
84 | |||
85 | struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) | ||
86 | { | ||
87 | unsigned int as = !!(vcpu->arch.msr & MSR_IS); | ||
88 | unsigned int index; | ||
89 | |||
90 | index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); | ||
91 | if (index == -1) | ||
92 | return NULL; | ||
93 | return &vcpu->arch.guest_tlb[index]; | ||
94 | } | ||
95 | |||
96 | struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) | ||
97 | { | ||
98 | unsigned int as = !!(vcpu->arch.msr & MSR_DS); | ||
99 | unsigned int index; | ||
100 | |||
101 | index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); | ||
102 | if (index == -1) | ||
103 | return NULL; | ||
104 | return &vcpu->arch.guest_tlb[index]; | ||
105 | } | ||
106 | |||
107 | static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe) | ||
108 | { | ||
109 | return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW); | ||
110 | } | ||
111 | |||
112 | /* Must be called with mmap_sem locked for writing. */ | ||
113 | static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu, | ||
114 | unsigned int index) | ||
115 | { | ||
116 | struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index]; | ||
117 | struct page *page = vcpu->arch.shadow_pages[index]; | ||
118 | |||
119 | kunmap(vcpu->arch.shadow_pages[index]); | ||
120 | |||
121 | if (get_tlb_v(stlbe)) { | ||
122 | if (kvmppc_44x_tlbe_is_writable(stlbe)) | ||
123 | kvm_release_page_dirty(page); | ||
124 | else | ||
125 | kvm_release_page_clean(page); | ||
126 | } | ||
127 | } | ||
128 | |||
129 | /* Caller must ensure that the specified guest TLB entry is safe to insert into | ||
130 | * the shadow TLB. */ | ||
131 | void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, | ||
132 | u32 flags) | ||
133 | { | ||
134 | struct page *new_page; | ||
135 | struct tlbe *stlbe; | ||
136 | hpa_t hpaddr; | ||
137 | unsigned int victim; | ||
138 | |||
139 | /* Future optimization: don't overwrite the TLB entry containing the | ||
140 | * current PC (or stack?). */ | ||
141 | victim = kvmppc_tlb_44x_pos++; | ||
142 | if (kvmppc_tlb_44x_pos > tlb_44x_hwater) | ||
143 | kvmppc_tlb_44x_pos = 0; | ||
144 | stlbe = &vcpu->arch.shadow_tlb[victim]; | ||
145 | |||
146 | /* Get reference to new page. */ | ||
147 | down_write(¤t->mm->mmap_sem); | ||
148 | new_page = gfn_to_page(vcpu->kvm, gfn); | ||
149 | if (is_error_page(new_page)) { | ||
150 | printk(KERN_ERR "Couldn't get guest page!\n"); | ||
151 | kvm_release_page_clean(new_page); | ||
152 | return; | ||
153 | } | ||
154 | hpaddr = page_to_phys(new_page); | ||
155 | |||
156 | /* Drop reference to old page. */ | ||
157 | kvmppc_44x_shadow_release(vcpu, victim); | ||
158 | up_write(¤t->mm->mmap_sem); | ||
159 | |||
160 | vcpu->arch.shadow_pages[victim] = new_page; | ||
161 | |||
162 | /* XXX Make sure (va, size) doesn't overlap any other | ||
163 | * entries. 440x6 user manual says the result would be | ||
164 | * "undefined." */ | ||
165 | |||
166 | /* XXX what about AS? */ | ||
167 | |||
168 | stlbe->tid = asid & 0xff; | ||
169 | |||
170 | /* Force TS=1 for all guest mappings. */ | ||
171 | /* For now we hardcode 4KB mappings, but it will be important to | ||
172 | * use host large pages in the future. */ | ||
173 | stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS | ||
174 | | PPC44x_TLB_4K; | ||
175 | |||
176 | stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); | ||
177 | stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags, | ||
178 | vcpu->arch.msr & MSR_PR); | ||
179 | } | ||
180 | |||
181 | void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, u64 eaddr, u64 asid) | ||
182 | { | ||
183 | unsigned int pid = asid & 0xff; | ||
184 | int i; | ||
185 | |||
186 | /* XXX Replace loop with fancy data structures. */ | ||
187 | down_write(¤t->mm->mmap_sem); | ||
188 | for (i = 0; i <= tlb_44x_hwater; i++) { | ||
189 | struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; | ||
190 | unsigned int tid; | ||
191 | |||
192 | if (!get_tlb_v(stlbe)) | ||
193 | continue; | ||
194 | |||
195 | if (eaddr < get_tlb_eaddr(stlbe)) | ||
196 | continue; | ||
197 | |||
198 | if (eaddr > get_tlb_end(stlbe)) | ||
199 | continue; | ||
200 | |||
201 | tid = get_tlb_tid(stlbe); | ||
202 | if (tid && (tid != pid)) | ||
203 | continue; | ||
204 | |||
205 | kvmppc_44x_shadow_release(vcpu, i); | ||
206 | stlbe->word0 = 0; | ||
207 | } | ||
208 | up_write(¤t->mm->mmap_sem); | ||
209 | } | ||
210 | |||
211 | /* Invalidate all mappings, so that when they fault back in they will get the | ||
212 | * proper permission bits. */ | ||
213 | void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) | ||
214 | { | ||
215 | int i; | ||
216 | |||
217 | /* XXX Replace loop with fancy data structures. */ | ||
218 | down_write(¤t->mm->mmap_sem); | ||
219 | for (i = 0; i <= tlb_44x_hwater; i++) { | ||
220 | kvmppc_44x_shadow_release(vcpu, i); | ||
221 | vcpu->arch.shadow_tlb[i].word0 = 0; | ||
222 | } | ||
223 | up_write(¤t->mm->mmap_sem); | ||
224 | } | ||
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h new file mode 100644 index 000000000000..2ccd46b6f6b7 --- /dev/null +++ b/arch/powerpc/kvm/44x_tlb.h | |||
@@ -0,0 +1,91 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License, version 2, as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with this program; if not, write to the Free Software | ||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
14 | * | ||
15 | * Copyright IBM Corp. 2007 | ||
16 | * | ||
17 | * Authors: Hollis Blanchard <hollisb@us.ibm.com> | ||
18 | */ | ||
19 | |||
20 | #ifndef __KVM_POWERPC_TLB_H__ | ||
21 | #define __KVM_POWERPC_TLB_H__ | ||
22 | |||
23 | #include <linux/kvm_host.h> | ||
24 | #include <asm/mmu-44x.h> | ||
25 | |||
26 | extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, | ||
27 | unsigned int pid, unsigned int as); | ||
28 | extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); | ||
29 | extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); | ||
30 | |||
31 | /* TLB helper functions */ | ||
32 | static inline unsigned int get_tlb_size(const struct tlbe *tlbe) | ||
33 | { | ||
34 | return (tlbe->word0 >> 4) & 0xf; | ||
35 | } | ||
36 | |||
37 | static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe) | ||
38 | { | ||
39 | return tlbe->word0 & 0xfffffc00; | ||
40 | } | ||
41 | |||
42 | static inline gva_t get_tlb_bytes(const struct tlbe *tlbe) | ||
43 | { | ||
44 | unsigned int pgsize = get_tlb_size(tlbe); | ||
45 | return 1 << 10 << (pgsize << 1); | ||
46 | } | ||
47 | |||
48 | static inline gva_t get_tlb_end(const struct tlbe *tlbe) | ||
49 | { | ||
50 | return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1; | ||
51 | } | ||
52 | |||
53 | static inline u64 get_tlb_raddr(const struct tlbe *tlbe) | ||
54 | { | ||
55 | u64 word1 = tlbe->word1; | ||
56 | return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00); | ||
57 | } | ||
58 | |||
59 | static inline unsigned int get_tlb_tid(const struct tlbe *tlbe) | ||
60 | { | ||
61 | return tlbe->tid & 0xff; | ||
62 | } | ||
63 | |||
64 | static inline unsigned int get_tlb_ts(const struct tlbe *tlbe) | ||
65 | { | ||
66 | return (tlbe->word0 >> 8) & 0x1; | ||
67 | } | ||
68 | |||
69 | static inline unsigned int get_tlb_v(const struct tlbe *tlbe) | ||
70 | { | ||
71 | return (tlbe->word0 >> 9) & 0x1; | ||
72 | } | ||
73 | |||
74 | static inline unsigned int get_mmucr_stid(const struct kvm_vcpu *vcpu) | ||
75 | { | ||
76 | return vcpu->arch.mmucr & 0xff; | ||
77 | } | ||
78 | |||
79 | static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu) | ||
80 | { | ||
81 | return (vcpu->arch.mmucr >> 16) & 0x1; | ||
82 | } | ||
83 | |||
84 | static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr) | ||
85 | { | ||
86 | unsigned int pgmask = get_tlb_bytes(tlbe) - 1; | ||
87 | |||
88 | return get_tlb_raddr(tlbe) | (eaddr & pgmask); | ||
89 | } | ||
90 | |||
91 | #endif /* __KVM_POWERPC_TLB_H__ */ | ||
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig new file mode 100644 index 000000000000..6b076010213b --- /dev/null +++ b/arch/powerpc/kvm/Kconfig | |||
@@ -0,0 +1,42 @@ | |||
1 | # | ||
2 | # KVM configuration | ||
3 | # | ||
4 | |||
5 | menuconfig VIRTUALIZATION | ||
6 | bool "Virtualization" | ||
7 | ---help--- | ||
8 | Say Y here to get to see options for using your Linux host to run | ||
9 | other operating systems inside virtual machines (guests). | ||
10 | This option alone does not add any kernel code. | ||
11 | |||
12 | If you say N, all options in this submenu will be skipped and | ||
13 | disabled. | ||
14 | |||
15 | if VIRTUALIZATION | ||
16 | |||
17 | config KVM | ||
18 | bool "Kernel-based Virtual Machine (KVM) support" | ||
19 | depends on 44x && EXPERIMENTAL | ||
20 | select PREEMPT_NOTIFIERS | ||
21 | select ANON_INODES | ||
22 | # We can only run on Book E hosts so far | ||
23 | select KVM_BOOKE_HOST | ||
24 | ---help--- | ||
25 | Support hosting virtualized guest machines. You will also | ||
26 | need to select one or more of the processor modules below. | ||
27 | |||
28 | This module provides access to the hardware capabilities through | ||
29 | a character device node named /dev/kvm. | ||
30 | |||
31 | If unsure, say N. | ||
32 | |||
33 | config KVM_BOOKE_HOST | ||
34 | bool "KVM host support for Book E PowerPC processors" | ||
35 | depends on KVM && 44x | ||
36 | ---help--- | ||
37 | Provides host support for KVM on Book E PowerPC processors. Currently | ||
38 | this works on 440 processors only. | ||
39 | |||
40 | source drivers/virtio/Kconfig | ||
41 | |||
42 | endif # VIRTUALIZATION | ||
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile new file mode 100644 index 000000000000..d0d358d367ec --- /dev/null +++ b/arch/powerpc/kvm/Makefile | |||
@@ -0,0 +1,15 @@ | |||
1 | # | ||
2 | # Makefile for Kernel-based Virtual Machine module | ||
3 | # | ||
4 | |||
5 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm | ||
6 | |||
7 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o) | ||
8 | |||
9 | kvm-objs := $(common-objs) powerpc.o emulate.o booke_guest.o | ||
10 | obj-$(CONFIG_KVM) += kvm.o | ||
11 | |||
12 | AFLAGS_booke_interrupts.o := -I$(obj) | ||
13 | |||
14 | kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o | ||
15 | obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o | ||
diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c new file mode 100644 index 000000000000..6d9884a6884a --- /dev/null +++ b/arch/powerpc/kvm/booke_guest.c | |||
@@ -0,0 +1,615 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License, version 2, as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with this program; if not, write to the Free Software | ||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
14 | * | ||
15 | * Copyright IBM Corp. 2007 | ||
16 | * | ||
17 | * Authors: Hollis Blanchard <hollisb@us.ibm.com> | ||
18 | * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> | ||
19 | */ | ||
20 | |||
21 | #include <linux/errno.h> | ||
22 | #include <linux/err.h> | ||
23 | #include <linux/kvm_host.h> | ||
24 | #include <linux/module.h> | ||
25 | #include <linux/vmalloc.h> | ||
26 | #include <linux/fs.h> | ||
27 | #include <asm/cputable.h> | ||
28 | #include <asm/uaccess.h> | ||
29 | #include <asm/kvm_ppc.h> | ||
30 | |||
31 | #include "44x_tlb.h" | ||
32 | |||
33 | #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM | ||
34 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU | ||
35 | |||
36 | struct kvm_stats_debugfs_item debugfs_entries[] = { | ||
37 | { "exits", VCPU_STAT(sum_exits) }, | ||
38 | { "mmio", VCPU_STAT(mmio_exits) }, | ||
39 | { "dcr", VCPU_STAT(dcr_exits) }, | ||
40 | { "sig", VCPU_STAT(signal_exits) }, | ||
41 | { "light", VCPU_STAT(light_exits) }, | ||
42 | { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, | ||
43 | { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, | ||
44 | { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) }, | ||
45 | { "dtlb_v", VCPU_STAT(dtlb_virt_miss_exits) }, | ||
46 | { "sysc", VCPU_STAT(syscall_exits) }, | ||
47 | { "isi", VCPU_STAT(isi_exits) }, | ||
48 | { "dsi", VCPU_STAT(dsi_exits) }, | ||
49 | { "inst_emu", VCPU_STAT(emulated_inst_exits) }, | ||
50 | { "dec", VCPU_STAT(dec_exits) }, | ||
51 | { "ext_intr", VCPU_STAT(ext_intr_exits) }, | ||
52 | { NULL } | ||
53 | }; | ||
54 | |||
55 | static const u32 interrupt_msr_mask[16] = { | ||
56 | [BOOKE_INTERRUPT_CRITICAL] = MSR_ME, | ||
57 | [BOOKE_INTERRUPT_MACHINE_CHECK] = 0, | ||
58 | [BOOKE_INTERRUPT_DATA_STORAGE] = MSR_CE|MSR_ME|MSR_DE, | ||
59 | [BOOKE_INTERRUPT_INST_STORAGE] = MSR_CE|MSR_ME|MSR_DE, | ||
60 | [BOOKE_INTERRUPT_EXTERNAL] = MSR_CE|MSR_ME|MSR_DE, | ||
61 | [BOOKE_INTERRUPT_ALIGNMENT] = MSR_CE|MSR_ME|MSR_DE, | ||
62 | [BOOKE_INTERRUPT_PROGRAM] = MSR_CE|MSR_ME|MSR_DE, | ||
63 | [BOOKE_INTERRUPT_FP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE, | ||
64 | [BOOKE_INTERRUPT_SYSCALL] = MSR_CE|MSR_ME|MSR_DE, | ||
65 | [BOOKE_INTERRUPT_AP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE, | ||
66 | [BOOKE_INTERRUPT_DECREMENTER] = MSR_CE|MSR_ME|MSR_DE, | ||
67 | [BOOKE_INTERRUPT_FIT] = MSR_CE|MSR_ME|MSR_DE, | ||
68 | [BOOKE_INTERRUPT_WATCHDOG] = MSR_ME, | ||
69 | [BOOKE_INTERRUPT_DTLB_MISS] = MSR_CE|MSR_ME|MSR_DE, | ||
70 | [BOOKE_INTERRUPT_ITLB_MISS] = MSR_CE|MSR_ME|MSR_DE, | ||
71 | [BOOKE_INTERRUPT_DEBUG] = MSR_ME, | ||
72 | }; | ||
73 | |||
74 | const unsigned char exception_priority[] = { | ||
75 | [BOOKE_INTERRUPT_DATA_STORAGE] = 0, | ||
76 | [BOOKE_INTERRUPT_INST_STORAGE] = 1, | ||
77 | [BOOKE_INTERRUPT_ALIGNMENT] = 2, | ||
78 | [BOOKE_INTERRUPT_PROGRAM] = 3, | ||
79 | [BOOKE_INTERRUPT_FP_UNAVAIL] = 4, | ||
80 | [BOOKE_INTERRUPT_SYSCALL] = 5, | ||
81 | [BOOKE_INTERRUPT_AP_UNAVAIL] = 6, | ||
82 | [BOOKE_INTERRUPT_DTLB_MISS] = 7, | ||
83 | [BOOKE_INTERRUPT_ITLB_MISS] = 8, | ||
84 | [BOOKE_INTERRUPT_MACHINE_CHECK] = 9, | ||
85 | [BOOKE_INTERRUPT_DEBUG] = 10, | ||
86 | [BOOKE_INTERRUPT_CRITICAL] = 11, | ||
87 | [BOOKE_INTERRUPT_WATCHDOG] = 12, | ||
88 | [BOOKE_INTERRUPT_EXTERNAL] = 13, | ||
89 | [BOOKE_INTERRUPT_FIT] = 14, | ||
90 | [BOOKE_INTERRUPT_DECREMENTER] = 15, | ||
91 | }; | ||
92 | |||
93 | const unsigned char priority_exception[] = { | ||
94 | BOOKE_INTERRUPT_DATA_STORAGE, | ||
95 | BOOKE_INTERRUPT_INST_STORAGE, | ||
96 | BOOKE_INTERRUPT_ALIGNMENT, | ||
97 | BOOKE_INTERRUPT_PROGRAM, | ||
98 | BOOKE_INTERRUPT_FP_UNAVAIL, | ||
99 | BOOKE_INTERRUPT_SYSCALL, | ||
100 | BOOKE_INTERRUPT_AP_UNAVAIL, | ||
101 | BOOKE_INTERRUPT_DTLB_MISS, | ||
102 | BOOKE_INTERRUPT_ITLB_MISS, | ||
103 | BOOKE_INTERRUPT_MACHINE_CHECK, | ||
104 | BOOKE_INTERRUPT_DEBUG, | ||
105 | BOOKE_INTERRUPT_CRITICAL, | ||
106 | BOOKE_INTERRUPT_WATCHDOG, | ||
107 | BOOKE_INTERRUPT_EXTERNAL, | ||
108 | BOOKE_INTERRUPT_FIT, | ||
109 | BOOKE_INTERRUPT_DECREMENTER, | ||
110 | }; | ||
111 | |||
112 | |||
113 | void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) | ||
114 | { | ||
115 | struct tlbe *tlbe; | ||
116 | int i; | ||
117 | |||
118 | printk("vcpu %d TLB dump:\n", vcpu->vcpu_id); | ||
119 | printk("| %2s | %3s | %8s | %8s | %8s |\n", | ||
120 | "nr", "tid", "word0", "word1", "word2"); | ||
121 | |||
122 | for (i = 0; i < PPC44x_TLB_SIZE; i++) { | ||
123 | tlbe = &vcpu->arch.guest_tlb[i]; | ||
124 | if (tlbe->word0 & PPC44x_TLB_VALID) | ||
125 | printk(" G%2d | %02X | %08X | %08X | %08X |\n", | ||
126 | i, tlbe->tid, tlbe->word0, tlbe->word1, | ||
127 | tlbe->word2); | ||
128 | } | ||
129 | |||
130 | for (i = 0; i < PPC44x_TLB_SIZE; i++) { | ||
131 | tlbe = &vcpu->arch.shadow_tlb[i]; | ||
132 | if (tlbe->word0 & PPC44x_TLB_VALID) | ||
133 | printk(" S%2d | %02X | %08X | %08X | %08X |\n", | ||
134 | i, tlbe->tid, tlbe->word0, tlbe->word1, | ||
135 | tlbe->word2); | ||
136 | } | ||
137 | } | ||
138 | |||
139 | /* TODO: use vcpu_printf() */ | ||
140 | void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) | ||
141 | { | ||
142 | int i; | ||
143 | |||
144 | printk("pc: %08x msr: %08x\n", vcpu->arch.pc, vcpu->arch.msr); | ||
145 | printk("lr: %08x ctr: %08x\n", vcpu->arch.lr, vcpu->arch.ctr); | ||
146 | printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1); | ||
147 | |||
148 | printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions); | ||
149 | |||
150 | for (i = 0; i < 32; i += 4) { | ||
151 | printk("gpr%02d: %08x %08x %08x %08x\n", i, | ||
152 | vcpu->arch.gpr[i], | ||
153 | vcpu->arch.gpr[i+1], | ||
154 | vcpu->arch.gpr[i+2], | ||
155 | vcpu->arch.gpr[i+3]); | ||
156 | } | ||
157 | } | ||
158 | |||
159 | /* Check if we are ready to deliver the interrupt */ | ||
160 | static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) | ||
161 | { | ||
162 | int r; | ||
163 | |||
164 | switch (interrupt) { | ||
165 | case BOOKE_INTERRUPT_CRITICAL: | ||
166 | r = vcpu->arch.msr & MSR_CE; | ||
167 | break; | ||
168 | case BOOKE_INTERRUPT_MACHINE_CHECK: | ||
169 | r = vcpu->arch.msr & MSR_ME; | ||
170 | break; | ||
171 | case BOOKE_INTERRUPT_EXTERNAL: | ||
172 | r = vcpu->arch.msr & MSR_EE; | ||
173 | break; | ||
174 | case BOOKE_INTERRUPT_DECREMENTER: | ||
175 | r = vcpu->arch.msr & MSR_EE; | ||
176 | break; | ||
177 | case BOOKE_INTERRUPT_FIT: | ||
178 | r = vcpu->arch.msr & MSR_EE; | ||
179 | break; | ||
180 | case BOOKE_INTERRUPT_WATCHDOG: | ||
181 | r = vcpu->arch.msr & MSR_CE; | ||
182 | break; | ||
183 | case BOOKE_INTERRUPT_DEBUG: | ||
184 | r = vcpu->arch.msr & MSR_DE; | ||
185 | break; | ||
186 | default: | ||
187 | r = 1; | ||
188 | } | ||
189 | |||
190 | return r; | ||
191 | } | ||
192 | |||
193 | static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) | ||
194 | { | ||
195 | switch (interrupt) { | ||
196 | case BOOKE_INTERRUPT_DECREMENTER: | ||
197 | vcpu->arch.tsr |= TSR_DIS; | ||
198 | break; | ||
199 | } | ||
200 | |||
201 | vcpu->arch.srr0 = vcpu->arch.pc; | ||
202 | vcpu->arch.srr1 = vcpu->arch.msr; | ||
203 | vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt]; | ||
204 | kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]); | ||
205 | } | ||
206 | |||
207 | /* Check pending exceptions and deliver one, if possible. */ | ||
208 | void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu) | ||
209 | { | ||
210 | unsigned long *pending = &vcpu->arch.pending_exceptions; | ||
211 | unsigned int exception; | ||
212 | unsigned int priority; | ||
213 | |||
214 | priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending)); | ||
215 | while (priority <= BOOKE_MAX_INTERRUPT) { | ||
216 | exception = priority_exception[priority]; | ||
217 | if (kvmppc_can_deliver_interrupt(vcpu, exception)) { | ||
218 | kvmppc_clear_exception(vcpu, exception); | ||
219 | kvmppc_deliver_interrupt(vcpu, exception); | ||
220 | break; | ||
221 | } | ||
222 | |||
223 | priority = find_next_bit(pending, | ||
224 | BITS_PER_BYTE * sizeof(*pending), | ||
225 | priority + 1); | ||
226 | } | ||
227 | } | ||
228 | |||
229 | static int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) | ||
230 | { | ||
231 | enum emulation_result er; | ||
232 | int r; | ||
233 | |||
234 | er = kvmppc_emulate_instruction(run, vcpu); | ||
235 | switch (er) { | ||
236 | case EMULATE_DONE: | ||
237 | /* Future optimization: only reload non-volatiles if they were | ||
238 | * actually modified. */ | ||
239 | r = RESUME_GUEST_NV; | ||
240 | break; | ||
241 | case EMULATE_DO_MMIO: | ||
242 | run->exit_reason = KVM_EXIT_MMIO; | ||
243 | /* We must reload nonvolatiles because "update" load/store | ||
244 | * instructions modify register state. */ | ||
245 | /* Future optimization: only reload non-volatiles if they were | ||
246 | * actually modified. */ | ||
247 | r = RESUME_HOST_NV; | ||
248 | break; | ||
249 | case EMULATE_FAIL: | ||
250 | /* XXX Deliver Program interrupt to guest. */ | ||
251 | printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__, | ||
252 | vcpu->arch.last_inst); | ||
253 | r = RESUME_HOST; | ||
254 | break; | ||
255 | default: | ||
256 | BUG(); | ||
257 | } | ||
258 | |||
259 | return r; | ||
260 | } | ||
261 | |||
262 | /** | ||
263 | * kvmppc_handle_exit | ||
264 | * | ||
265 | * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV) | ||
266 | */ | ||
267 | int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
268 | unsigned int exit_nr) | ||
269 | { | ||
270 | enum emulation_result er; | ||
271 | int r = RESUME_HOST; | ||
272 | |||
273 | local_irq_enable(); | ||
274 | |||
275 | run->exit_reason = KVM_EXIT_UNKNOWN; | ||
276 | run->ready_for_interrupt_injection = 1; | ||
277 | |||
278 | switch (exit_nr) { | ||
279 | case BOOKE_INTERRUPT_MACHINE_CHECK: | ||
280 | printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR)); | ||
281 | kvmppc_dump_vcpu(vcpu); | ||
282 | r = RESUME_HOST; | ||
283 | break; | ||
284 | |||
285 | case BOOKE_INTERRUPT_EXTERNAL: | ||
286 | case BOOKE_INTERRUPT_DECREMENTER: | ||
287 | /* Since we switched IVPR back to the host's value, the host | ||
288 | * handled this interrupt the moment we enabled interrupts. | ||
289 | * Now we just offer it a chance to reschedule the guest. */ | ||
290 | |||
291 | /* XXX At this point the TLB still holds our shadow TLB, so if | ||
292 | * we do reschedule the host will fault over it. Perhaps we | ||
293 | * should politely restore the host's entries to minimize | ||
294 | * misses before ceding control. */ | ||
295 | if (need_resched()) | ||
296 | cond_resched(); | ||
297 | if (exit_nr == BOOKE_INTERRUPT_DECREMENTER) | ||
298 | vcpu->stat.dec_exits++; | ||
299 | else | ||
300 | vcpu->stat.ext_intr_exits++; | ||
301 | r = RESUME_GUEST; | ||
302 | break; | ||
303 | |||
304 | case BOOKE_INTERRUPT_PROGRAM: | ||
305 | if (vcpu->arch.msr & MSR_PR) { | ||
306 | /* Program traps generated by user-level software must be handled | ||
307 | * by the guest kernel. */ | ||
308 | vcpu->arch.esr = vcpu->arch.fault_esr; | ||
309 | kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); | ||
310 | r = RESUME_GUEST; | ||
311 | break; | ||
312 | } | ||
313 | |||
314 | er = kvmppc_emulate_instruction(run, vcpu); | ||
315 | switch (er) { | ||
316 | case EMULATE_DONE: | ||
317 | /* Future optimization: only reload non-volatiles if | ||
318 | * they were actually modified by emulation. */ | ||
319 | vcpu->stat.emulated_inst_exits++; | ||
320 | r = RESUME_GUEST_NV; | ||
321 | break; | ||
322 | case EMULATE_DO_DCR: | ||
323 | run->exit_reason = KVM_EXIT_DCR; | ||
324 | r = RESUME_HOST; | ||
325 | break; | ||
326 | case EMULATE_FAIL: | ||
327 | /* XXX Deliver Program interrupt to guest. */ | ||
328 | printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n", | ||
329 | __func__, vcpu->arch.pc, vcpu->arch.last_inst); | ||
330 | /* For debugging, encode the failing instruction and | ||
331 | * report it to userspace. */ | ||
332 | run->hw.hardware_exit_reason = ~0ULL << 32; | ||
333 | run->hw.hardware_exit_reason |= vcpu->arch.last_inst; | ||
334 | r = RESUME_HOST; | ||
335 | break; | ||
336 | default: | ||
337 | BUG(); | ||
338 | } | ||
339 | break; | ||
340 | |||
341 | case BOOKE_INTERRUPT_DATA_STORAGE: | ||
342 | vcpu->arch.dear = vcpu->arch.fault_dear; | ||
343 | vcpu->arch.esr = vcpu->arch.fault_esr; | ||
344 | kvmppc_queue_exception(vcpu, exit_nr); | ||
345 | vcpu->stat.dsi_exits++; | ||
346 | r = RESUME_GUEST; | ||
347 | break; | ||
348 | |||
349 | case BOOKE_INTERRUPT_INST_STORAGE: | ||
350 | vcpu->arch.esr = vcpu->arch.fault_esr; | ||
351 | kvmppc_queue_exception(vcpu, exit_nr); | ||
352 | vcpu->stat.isi_exits++; | ||
353 | r = RESUME_GUEST; | ||
354 | break; | ||
355 | |||
356 | case BOOKE_INTERRUPT_SYSCALL: | ||
357 | kvmppc_queue_exception(vcpu, exit_nr); | ||
358 | vcpu->stat.syscall_exits++; | ||
359 | r = RESUME_GUEST; | ||
360 | break; | ||
361 | |||
362 | case BOOKE_INTERRUPT_DTLB_MISS: { | ||
363 | struct tlbe *gtlbe; | ||
364 | unsigned long eaddr = vcpu->arch.fault_dear; | ||
365 | gfn_t gfn; | ||
366 | |||
367 | /* Check the guest TLB. */ | ||
368 | gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr); | ||
369 | if (!gtlbe) { | ||
370 | /* The guest didn't have a mapping for it. */ | ||
371 | kvmppc_queue_exception(vcpu, exit_nr); | ||
372 | vcpu->arch.dear = vcpu->arch.fault_dear; | ||
373 | vcpu->arch.esr = vcpu->arch.fault_esr; | ||
374 | vcpu->stat.dtlb_real_miss_exits++; | ||
375 | r = RESUME_GUEST; | ||
376 | break; | ||
377 | } | ||
378 | |||
379 | vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr); | ||
380 | gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT; | ||
381 | |||
382 | if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { | ||
383 | /* The guest TLB had a mapping, but the shadow TLB | ||
384 | * didn't, and it is RAM. This could be because: | ||
385 | * a) the entry is mapping the host kernel, or | ||
386 | * b) the guest used a large mapping which we're faking | ||
387 | * Either way, we need to satisfy the fault without | ||
388 | * invoking the guest. */ | ||
389 | kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, | ||
390 | gtlbe->word2); | ||
391 | vcpu->stat.dtlb_virt_miss_exits++; | ||
392 | r = RESUME_GUEST; | ||
393 | } else { | ||
394 | /* Guest has mapped and accessed a page which is not | ||
395 | * actually RAM. */ | ||
396 | r = kvmppc_emulate_mmio(run, vcpu); | ||
397 | } | ||
398 | |||
399 | break; | ||
400 | } | ||
401 | |||
402 | case BOOKE_INTERRUPT_ITLB_MISS: { | ||
403 | struct tlbe *gtlbe; | ||
404 | unsigned long eaddr = vcpu->arch.pc; | ||
405 | gfn_t gfn; | ||
406 | |||
407 | r = RESUME_GUEST; | ||
408 | |||
409 | /* Check the guest TLB. */ | ||
410 | gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr); | ||
411 | if (!gtlbe) { | ||
412 | /* The guest didn't have a mapping for it. */ | ||
413 | kvmppc_queue_exception(vcpu, exit_nr); | ||
414 | vcpu->stat.itlb_real_miss_exits++; | ||
415 | break; | ||
416 | } | ||
417 | |||
418 | vcpu->stat.itlb_virt_miss_exits++; | ||
419 | |||
420 | gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT; | ||
421 | |||
422 | if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { | ||
423 | /* The guest TLB had a mapping, but the shadow TLB | ||
424 | * didn't. This could be because: | ||
425 | * a) the entry is mapping the host kernel, or | ||
426 | * b) the guest used a large mapping which we're faking | ||
427 | * Either way, we need to satisfy the fault without | ||
428 | * invoking the guest. */ | ||
429 | kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, | ||
430 | gtlbe->word2); | ||
431 | } else { | ||
432 | /* Guest mapped and leaped at non-RAM! */ | ||
433 | kvmppc_queue_exception(vcpu, | ||
434 | BOOKE_INTERRUPT_MACHINE_CHECK); | ||
435 | } | ||
436 | |||
437 | break; | ||
438 | } | ||
439 | |||
440 | default: | ||
441 | printk(KERN_EMERG "exit_nr %d\n", exit_nr); | ||
442 | BUG(); | ||
443 | } | ||
444 | |||
445 | local_irq_disable(); | ||
446 | |||
447 | kvmppc_check_and_deliver_interrupts(vcpu); | ||
448 | |||
449 | /* Do some exit accounting. */ | ||
450 | vcpu->stat.sum_exits++; | ||
451 | if (!(r & RESUME_HOST)) { | ||
452 | /* To avoid clobbering exit_reason, only check for signals if | ||
453 | * we aren't already exiting to userspace for some other | ||
454 | * reason. */ | ||
455 | if (signal_pending(current)) { | ||
456 | run->exit_reason = KVM_EXIT_INTR; | ||
457 | r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); | ||
458 | |||
459 | vcpu->stat.signal_exits++; | ||
460 | } else { | ||
461 | vcpu->stat.light_exits++; | ||
462 | } | ||
463 | } else { | ||
464 | switch (run->exit_reason) { | ||
465 | case KVM_EXIT_MMIO: | ||
466 | vcpu->stat.mmio_exits++; | ||
467 | break; | ||
468 | case KVM_EXIT_DCR: | ||
469 | vcpu->stat.dcr_exits++; | ||
470 | break; | ||
471 | case KVM_EXIT_INTR: | ||
472 | vcpu->stat.signal_exits++; | ||
473 | break; | ||
474 | } | ||
475 | } | ||
476 | |||
477 | return r; | ||
478 | } | ||
479 | |||
480 | /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ | ||
481 | int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | ||
482 | { | ||
483 | struct tlbe *tlbe = &vcpu->arch.guest_tlb[0]; | ||
484 | |||
485 | tlbe->tid = 0; | ||
486 | tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID; | ||
487 | tlbe->word1 = 0; | ||
488 | tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR; | ||
489 | |||
490 | tlbe++; | ||
491 | tlbe->tid = 0; | ||
492 | tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID; | ||
493 | tlbe->word1 = 0xef600000; | ||
494 | tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR | ||
495 | | PPC44x_TLB_I | PPC44x_TLB_G; | ||
496 | |||
497 | vcpu->arch.pc = 0; | ||
498 | vcpu->arch.msr = 0; | ||
499 | vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */ | ||
500 | |||
501 | /* Eye-catching number so we know if the guest takes an interrupt | ||
502 | * before it's programmed its own IVPR. */ | ||
503 | vcpu->arch.ivpr = 0x55550000; | ||
504 | |||
505 | /* Since the guest can directly access the timebase, it must know the | ||
506 | * real timebase frequency. Accordingly, it must see the state of | ||
507 | * CCR1[TCS]. */ | ||
508 | vcpu->arch.ccr1 = mfspr(SPRN_CCR1); | ||
509 | |||
510 | return 0; | ||
511 | } | ||
512 | |||
513 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | ||
514 | { | ||
515 | int i; | ||
516 | |||
517 | regs->pc = vcpu->arch.pc; | ||
518 | regs->cr = vcpu->arch.cr; | ||
519 | regs->ctr = vcpu->arch.ctr; | ||
520 | regs->lr = vcpu->arch.lr; | ||
521 | regs->xer = vcpu->arch.xer; | ||
522 | regs->msr = vcpu->arch.msr; | ||
523 | regs->srr0 = vcpu->arch.srr0; | ||
524 | regs->srr1 = vcpu->arch.srr1; | ||
525 | regs->pid = vcpu->arch.pid; | ||
526 | regs->sprg0 = vcpu->arch.sprg0; | ||
527 | regs->sprg1 = vcpu->arch.sprg1; | ||
528 | regs->sprg2 = vcpu->arch.sprg2; | ||
529 | regs->sprg3 = vcpu->arch.sprg3; | ||
530 | regs->sprg5 = vcpu->arch.sprg4; | ||
531 | regs->sprg6 = vcpu->arch.sprg5; | ||
532 | regs->sprg7 = vcpu->arch.sprg6; | ||
533 | |||
534 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | ||
535 | regs->gpr[i] = vcpu->arch.gpr[i]; | ||
536 | |||
537 | return 0; | ||
538 | } | ||
539 | |||
540 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | ||
541 | { | ||
542 | int i; | ||
543 | |||
544 | vcpu->arch.pc = regs->pc; | ||
545 | vcpu->arch.cr = regs->cr; | ||
546 | vcpu->arch.ctr = regs->ctr; | ||
547 | vcpu->arch.lr = regs->lr; | ||
548 | vcpu->arch.xer = regs->xer; | ||
549 | vcpu->arch.msr = regs->msr; | ||
550 | vcpu->arch.srr0 = regs->srr0; | ||
551 | vcpu->arch.srr1 = regs->srr1; | ||
552 | vcpu->arch.sprg0 = regs->sprg0; | ||
553 | vcpu->arch.sprg1 = regs->sprg1; | ||
554 | vcpu->arch.sprg2 = regs->sprg2; | ||
555 | vcpu->arch.sprg3 = regs->sprg3; | ||
556 | vcpu->arch.sprg5 = regs->sprg4; | ||
557 | vcpu->arch.sprg6 = regs->sprg5; | ||
558 | vcpu->arch.sprg7 = regs->sprg6; | ||
559 | |||
560 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++) | ||
561 | vcpu->arch.gpr[i] = regs->gpr[i]; | ||
562 | |||
563 | return 0; | ||
564 | } | ||
565 | |||
566 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | ||
567 | struct kvm_sregs *sregs) | ||
568 | { | ||
569 | return -ENOTSUPP; | ||
570 | } | ||
571 | |||
572 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | ||
573 | struct kvm_sregs *sregs) | ||
574 | { | ||
575 | return -ENOTSUPP; | ||
576 | } | ||
577 | |||
578 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | ||
579 | { | ||
580 | return -ENOTSUPP; | ||
581 | } | ||
582 | |||
583 | int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | ||
584 | { | ||
585 | return -ENOTSUPP; | ||
586 | } | ||
587 | |||
588 | /* 'linear_address' is actually an encoding of AS|PID|EADDR . */ | ||
589 | int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | ||
590 | struct kvm_translation *tr) | ||
591 | { | ||
592 | struct tlbe *gtlbe; | ||
593 | int index; | ||
594 | gva_t eaddr; | ||
595 | u8 pid; | ||
596 | u8 as; | ||
597 | |||
598 | eaddr = tr->linear_address; | ||
599 | pid = (tr->linear_address >> 32) & 0xff; | ||
600 | as = (tr->linear_address >> 40) & 0x1; | ||
601 | |||
602 | index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as); | ||
603 | if (index == -1) { | ||
604 | tr->valid = 0; | ||
605 | return 0; | ||
606 | } | ||
607 | |||
608 | gtlbe = &vcpu->arch.guest_tlb[index]; | ||
609 | |||
610 | tr->physical_address = tlb_xlate(gtlbe, eaddr); | ||
611 | /* XXX what does "writeable" and "usermode" even mean? */ | ||
612 | tr->valid = 1; | ||
613 | |||
614 | return 0; | ||
615 | } | ||
diff --git a/arch/powerpc/kvm/booke_host.c b/arch/powerpc/kvm/booke_host.c new file mode 100644 index 000000000000..b480341bc31e --- /dev/null +++ b/arch/powerpc/kvm/booke_host.c | |||
@@ -0,0 +1,83 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License, version 2, as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with this program; if not, write to the Free Software | ||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
14 | * | ||
15 | * Copyright IBM Corp. 2008 | ||
16 | * | ||
17 | * Authors: Hollis Blanchard <hollisb@us.ibm.com> | ||
18 | */ | ||
19 | |||
20 | #include <linux/errno.h> | ||
21 | #include <linux/kvm_host.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <asm/cacheflush.h> | ||
24 | #include <asm/kvm_ppc.h> | ||
25 | |||
26 | unsigned long kvmppc_booke_handlers; | ||
27 | |||
28 | static int kvmppc_booke_init(void) | ||
29 | { | ||
30 | unsigned long ivor[16]; | ||
31 | unsigned long max_ivor = 0; | ||
32 | int i; | ||
33 | |||
34 | /* We install our own exception handlers by hijacking IVPR. IVPR must | ||
35 | * be 16-bit aligned, so we need a 64KB allocation. */ | ||
36 | kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO, | ||
37 | VCPU_SIZE_ORDER); | ||
38 | if (!kvmppc_booke_handlers) | ||
39 | return -ENOMEM; | ||
40 | |||
41 | /* XXX make sure our handlers are smaller than Linux's */ | ||
42 | |||
43 | /* Copy our interrupt handlers to match host IVORs. That way we don't | ||
44 | * have to swap the IVORs on every guest/host transition. */ | ||
45 | ivor[0] = mfspr(SPRN_IVOR0); | ||
46 | ivor[1] = mfspr(SPRN_IVOR1); | ||
47 | ivor[2] = mfspr(SPRN_IVOR2); | ||
48 | ivor[3] = mfspr(SPRN_IVOR3); | ||
49 | ivor[4] = mfspr(SPRN_IVOR4); | ||
50 | ivor[5] = mfspr(SPRN_IVOR5); | ||
51 | ivor[6] = mfspr(SPRN_IVOR6); | ||
52 | ivor[7] = mfspr(SPRN_IVOR7); | ||
53 | ivor[8] = mfspr(SPRN_IVOR8); | ||
54 | ivor[9] = mfspr(SPRN_IVOR9); | ||
55 | ivor[10] = mfspr(SPRN_IVOR10); | ||
56 | ivor[11] = mfspr(SPRN_IVOR11); | ||
57 | ivor[12] = mfspr(SPRN_IVOR12); | ||
58 | ivor[13] = mfspr(SPRN_IVOR13); | ||
59 | ivor[14] = mfspr(SPRN_IVOR14); | ||
60 | ivor[15] = mfspr(SPRN_IVOR15); | ||
61 | |||
62 | for (i = 0; i < 16; i++) { | ||
63 | if (ivor[i] > max_ivor) | ||
64 | max_ivor = ivor[i]; | ||
65 | |||
66 | memcpy((void *)kvmppc_booke_handlers + ivor[i], | ||
67 | kvmppc_handlers_start + i * kvmppc_handler_len, | ||
68 | kvmppc_handler_len); | ||
69 | } | ||
70 | flush_icache_range(kvmppc_booke_handlers, | ||
71 | kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); | ||
72 | |||
73 | return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE); | ||
74 | } | ||
75 | |||
76 | static void __exit kvmppc_booke_exit(void) | ||
77 | { | ||
78 | free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER); | ||
79 | kvm_exit(); | ||
80 | } | ||
81 | |||
82 | module_init(kvmppc_booke_init) | ||
83 | module_exit(kvmppc_booke_exit) | ||
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S new file mode 100644 index 000000000000..3b653b5309b8 --- /dev/null +++ b/arch/powerpc/kvm/booke_interrupts.S | |||
@@ -0,0 +1,436 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License, version 2, as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with this program; if not, write to the Free Software | ||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
14 | * | ||
15 | * Copyright IBM Corp. 2007 | ||
16 | * | ||
17 | * Authors: Hollis Blanchard <hollisb@us.ibm.com> | ||
18 | */ | ||
19 | |||
20 | #include <asm/ppc_asm.h> | ||
21 | #include <asm/kvm_asm.h> | ||
22 | #include <asm/reg.h> | ||
23 | #include <asm/mmu-44x.h> | ||
24 | #include <asm/page.h> | ||
25 | #include <asm/asm-offsets.h> | ||
26 | |||
27 | #define KVMPPC_MSR_MASK (MSR_CE|MSR_EE|MSR_PR|MSR_DE|MSR_ME|MSR_IS|MSR_DS) | ||
28 | |||
29 | #define VCPU_GPR(n) (VCPU_GPRS + (n * 4)) | ||
30 | |||
31 | /* The host stack layout: */ | ||
32 | #define HOST_R1 0 /* Implied by stwu. */ | ||
33 | #define HOST_CALLEE_LR 4 | ||
34 | #define HOST_RUN 8 | ||
35 | /* r2 is special: it holds 'current', and it made nonvolatile in the | ||
36 | * kernel with the -ffixed-r2 gcc option. */ | ||
37 | #define HOST_R2 12 | ||
38 | #define HOST_NV_GPRS 16 | ||
39 | #define HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * 4)) | ||
40 | #define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + 4) | ||
41 | #define HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */ | ||
42 | #define HOST_STACK_LR (HOST_STACK_SIZE + 4) /* In caller stack frame. */ | ||
43 | |||
44 | #define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \ | ||
45 | (1<<BOOKE_INTERRUPT_DTLB_MISS)) | ||
46 | |||
47 | #define NEED_DEAR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \ | ||
48 | (1<<BOOKE_INTERRUPT_DTLB_MISS)) | ||
49 | |||
50 | #define NEED_ESR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \ | ||
51 | (1<<BOOKE_INTERRUPT_INST_STORAGE) | \ | ||
52 | (1<<BOOKE_INTERRUPT_PROGRAM) | \ | ||
53 | (1<<BOOKE_INTERRUPT_DTLB_MISS)) | ||
54 | |||
55 | .macro KVM_HANDLER ivor_nr | ||
56 | _GLOBAL(kvmppc_handler_\ivor_nr) | ||
57 | /* Get pointer to vcpu and record exit number. */ | ||
58 | mtspr SPRN_SPRG0, r4 | ||
59 | mfspr r4, SPRN_SPRG1 | ||
60 | stw r5, VCPU_GPR(r5)(r4) | ||
61 | stw r6, VCPU_GPR(r6)(r4) | ||
62 | mfctr r5 | ||
63 | lis r6, kvmppc_resume_host@h | ||
64 | stw r5, VCPU_CTR(r4) | ||
65 | li r5, \ivor_nr | ||
66 | ori r6, r6, kvmppc_resume_host@l | ||
67 | mtctr r6 | ||
68 | bctr | ||
69 | .endm | ||
70 | |||
71 | _GLOBAL(kvmppc_handlers_start) | ||
72 | KVM_HANDLER BOOKE_INTERRUPT_CRITICAL | ||
73 | KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK | ||
74 | KVM_HANDLER BOOKE_INTERRUPT_DATA_STORAGE | ||
75 | KVM_HANDLER BOOKE_INTERRUPT_INST_STORAGE | ||
76 | KVM_HANDLER BOOKE_INTERRUPT_EXTERNAL | ||
77 | KVM_HANDLER BOOKE_INTERRUPT_ALIGNMENT | ||
78 | KVM_HANDLER BOOKE_INTERRUPT_PROGRAM | ||
79 | KVM_HANDLER BOOKE_INTERRUPT_FP_UNAVAIL | ||
80 | KVM_HANDLER BOOKE_INTERRUPT_SYSCALL | ||
81 | KVM_HANDLER BOOKE_INTERRUPT_AP_UNAVAIL | ||
82 | KVM_HANDLER BOOKE_INTERRUPT_DECREMENTER | ||
83 | KVM_HANDLER BOOKE_INTERRUPT_FIT | ||
84 | KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG | ||
85 | KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS | ||
86 | KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS | ||
87 | KVM_HANDLER BOOKE_INTERRUPT_DEBUG | ||
88 | |||
89 | _GLOBAL(kvmppc_handler_len) | ||
90 | .long kvmppc_handler_1 - kvmppc_handler_0 | ||
91 | |||
92 | |||
93 | /* Registers: | ||
94 | * SPRG0: guest r4 | ||
95 | * r4: vcpu pointer | ||
96 | * r5: KVM exit number | ||
97 | */ | ||
98 | _GLOBAL(kvmppc_resume_host) | ||
99 | stw r3, VCPU_GPR(r3)(r4) | ||
100 | mfcr r3 | ||
101 | stw r3, VCPU_CR(r4) | ||
102 | stw r7, VCPU_GPR(r7)(r4) | ||
103 | stw r8, VCPU_GPR(r8)(r4) | ||
104 | stw r9, VCPU_GPR(r9)(r4) | ||
105 | |||
106 | li r6, 1 | ||
107 | slw r6, r6, r5 | ||
108 | |||
109 | /* Save the faulting instruction and all GPRs for emulation. */ | ||
110 | andi. r7, r6, NEED_INST_MASK | ||
111 | beq ..skip_inst_copy | ||
112 | mfspr r9, SPRN_SRR0 | ||
113 | mfmsr r8 | ||
114 | ori r7, r8, MSR_DS | ||
115 | mtmsr r7 | ||
116 | isync | ||
117 | lwz r9, 0(r9) | ||
118 | mtmsr r8 | ||
119 | isync | ||
120 | stw r9, VCPU_LAST_INST(r4) | ||
121 | |||
122 | stw r15, VCPU_GPR(r15)(r4) | ||
123 | stw r16, VCPU_GPR(r16)(r4) | ||
124 | stw r17, VCPU_GPR(r17)(r4) | ||
125 | stw r18, VCPU_GPR(r18)(r4) | ||
126 | stw r19, VCPU_GPR(r19)(r4) | ||
127 | stw r20, VCPU_GPR(r20)(r4) | ||
128 | stw r21, VCPU_GPR(r21)(r4) | ||
129 | stw r22, VCPU_GPR(r22)(r4) | ||
130 | stw r23, VCPU_GPR(r23)(r4) | ||
131 | stw r24, VCPU_GPR(r24)(r4) | ||
132 | stw r25, VCPU_GPR(r25)(r4) | ||
133 | stw r26, VCPU_GPR(r26)(r4) | ||
134 | stw r27, VCPU_GPR(r27)(r4) | ||
135 | stw r28, VCPU_GPR(r28)(r4) | ||
136 | stw r29, VCPU_GPR(r29)(r4) | ||
137 | stw r30, VCPU_GPR(r30)(r4) | ||
138 | stw r31, VCPU_GPR(r31)(r4) | ||
139 | ..skip_inst_copy: | ||
140 | |||
141 | /* Also grab DEAR and ESR before the host can clobber them. */ | ||
142 | |||
143 | andi. r7, r6, NEED_DEAR_MASK | ||
144 | beq ..skip_dear | ||
145 | mfspr r9, SPRN_DEAR | ||
146 | stw r9, VCPU_FAULT_DEAR(r4) | ||
147 | ..skip_dear: | ||
148 | |||
149 | andi. r7, r6, NEED_ESR_MASK | ||
150 | beq ..skip_esr | ||
151 | mfspr r9, SPRN_ESR | ||
152 | stw r9, VCPU_FAULT_ESR(r4) | ||
153 | ..skip_esr: | ||
154 | |||
155 | /* Save remaining volatile guest register state to vcpu. */ | ||
156 | stw r0, VCPU_GPR(r0)(r4) | ||
157 | stw r1, VCPU_GPR(r1)(r4) | ||
158 | stw r2, VCPU_GPR(r2)(r4) | ||
159 | stw r10, VCPU_GPR(r10)(r4) | ||
160 | stw r11, VCPU_GPR(r11)(r4) | ||
161 | stw r12, VCPU_GPR(r12)(r4) | ||
162 | stw r13, VCPU_GPR(r13)(r4) | ||
163 | stw r14, VCPU_GPR(r14)(r4) /* We need a NV GPR below. */ | ||
164 | mflr r3 | ||
165 | stw r3, VCPU_LR(r4) | ||
166 | mfxer r3 | ||
167 | stw r3, VCPU_XER(r4) | ||
168 | mfspr r3, SPRN_SPRG0 | ||
169 | stw r3, VCPU_GPR(r4)(r4) | ||
170 | mfspr r3, SPRN_SRR0 | ||
171 | stw r3, VCPU_PC(r4) | ||
172 | |||
173 | /* Restore host stack pointer and PID before IVPR, since the host | ||
174 | * exception handlers use them. */ | ||
175 | lwz r1, VCPU_HOST_STACK(r4) | ||
176 | lwz r3, VCPU_HOST_PID(r4) | ||
177 | mtspr SPRN_PID, r3 | ||
178 | |||
179 | /* Restore host IVPR before re-enabling interrupts. We cheat and know | ||
180 | * that Linux IVPR is always 0xc0000000. */ | ||
181 | lis r3, 0xc000 | ||
182 | mtspr SPRN_IVPR, r3 | ||
183 | |||
184 | /* Switch to kernel stack and jump to handler. */ | ||
185 | LOAD_REG_ADDR(r3, kvmppc_handle_exit) | ||
186 | mtctr r3 | ||
187 | lwz r3, HOST_RUN(r1) | ||
188 | lwz r2, HOST_R2(r1) | ||
189 | mr r14, r4 /* Save vcpu pointer. */ | ||
190 | |||
191 | bctrl /* kvmppc_handle_exit() */ | ||
192 | |||
193 | /* Restore vcpu pointer and the nonvolatiles we used. */ | ||
194 | mr r4, r14 | ||
195 | lwz r14, VCPU_GPR(r14)(r4) | ||
196 | |||
197 | /* Sometimes instruction emulation must restore complete GPR state. */ | ||
198 | andi. r5, r3, RESUME_FLAG_NV | ||
199 | beq ..skip_nv_load | ||
200 | lwz r15, VCPU_GPR(r15)(r4) | ||
201 | lwz r16, VCPU_GPR(r16)(r4) | ||
202 | lwz r17, VCPU_GPR(r17)(r4) | ||
203 | lwz r18, VCPU_GPR(r18)(r4) | ||
204 | lwz r19, VCPU_GPR(r19)(r4) | ||
205 | lwz r20, VCPU_GPR(r20)(r4) | ||
206 | lwz r21, VCPU_GPR(r21)(r4) | ||
207 | lwz r22, VCPU_GPR(r22)(r4) | ||
208 | lwz r23, VCPU_GPR(r23)(r4) | ||
209 | lwz r24, VCPU_GPR(r24)(r4) | ||
210 | lwz r25, VCPU_GPR(r25)(r4) | ||
211 | lwz r26, VCPU_GPR(r26)(r4) | ||
212 | lwz r27, VCPU_GPR(r27)(r4) | ||
213 | lwz r28, VCPU_GPR(r28)(r4) | ||
214 | lwz r29, VCPU_GPR(r29)(r4) | ||
215 | lwz r30, VCPU_GPR(r30)(r4) | ||
216 | lwz r31, VCPU_GPR(r31)(r4) | ||
217 | ..skip_nv_load: | ||
218 | |||
219 | /* Should we return to the guest? */ | ||
220 | andi. r5, r3, RESUME_FLAG_HOST | ||
221 | beq lightweight_exit | ||
222 | |||
223 | srawi r3, r3, 2 /* Shift -ERR back down. */ | ||
224 | |||
225 | heavyweight_exit: | ||
226 | /* Not returning to guest. */ | ||
227 | |||
228 | /* We already saved guest volatile register state; now save the | ||
229 | * non-volatiles. */ | ||
230 | stw r15, VCPU_GPR(r15)(r4) | ||
231 | stw r16, VCPU_GPR(r16)(r4) | ||
232 | stw r17, VCPU_GPR(r17)(r4) | ||
233 | stw r18, VCPU_GPR(r18)(r4) | ||
234 | stw r19, VCPU_GPR(r19)(r4) | ||
235 | stw r20, VCPU_GPR(r20)(r4) | ||
236 | stw r21, VCPU_GPR(r21)(r4) | ||
237 | stw r22, VCPU_GPR(r22)(r4) | ||
238 | stw r23, VCPU_GPR(r23)(r4) | ||
239 | stw r24, VCPU_GPR(r24)(r4) | ||
240 | stw r25, VCPU_GPR(r25)(r4) | ||
241 | stw r26, VCPU_GPR(r26)(r4) | ||
242 | stw r27, VCPU_GPR(r27)(r4) | ||
243 | stw r28, VCPU_GPR(r28)(r4) | ||
244 | stw r29, VCPU_GPR(r29)(r4) | ||
245 | stw r30, VCPU_GPR(r30)(r4) | ||
246 | stw r31, VCPU_GPR(r31)(r4) | ||
247 | |||
248 | /* Load host non-volatile register state from host stack. */ | ||
249 | lwz r14, HOST_NV_GPR(r14)(r1) | ||
250 | lwz r15, HOST_NV_GPR(r15)(r1) | ||
251 | lwz r16, HOST_NV_GPR(r16)(r1) | ||
252 | lwz r17, HOST_NV_GPR(r17)(r1) | ||
253 | lwz r18, HOST_NV_GPR(r18)(r1) | ||
254 | lwz r19, HOST_NV_GPR(r19)(r1) | ||
255 | lwz r20, HOST_NV_GPR(r20)(r1) | ||
256 | lwz r21, HOST_NV_GPR(r21)(r1) | ||
257 | lwz r22, HOST_NV_GPR(r22)(r1) | ||
258 | lwz r23, HOST_NV_GPR(r23)(r1) | ||
259 | lwz r24, HOST_NV_GPR(r24)(r1) | ||
260 | lwz r25, HOST_NV_GPR(r25)(r1) | ||
261 | lwz r26, HOST_NV_GPR(r26)(r1) | ||
262 | lwz r27, HOST_NV_GPR(r27)(r1) | ||
263 | lwz r28, HOST_NV_GPR(r28)(r1) | ||
264 | lwz r29, HOST_NV_GPR(r29)(r1) | ||
265 | lwz r30, HOST_NV_GPR(r30)(r1) | ||
266 | lwz r31, HOST_NV_GPR(r31)(r1) | ||
267 | |||
268 | /* Return to kvm_vcpu_run(). */ | ||
269 | lwz r4, HOST_STACK_LR(r1) | ||
270 | addi r1, r1, HOST_STACK_SIZE | ||
271 | mtlr r4 | ||
272 | /* r3 still contains the return code from kvmppc_handle_exit(). */ | ||
273 | blr | ||
274 | |||
275 | |||
276 | /* Registers: | ||
277 | * r3: kvm_run pointer | ||
278 | * r4: vcpu pointer | ||
279 | */ | ||
280 | _GLOBAL(__kvmppc_vcpu_run) | ||
281 | stwu r1, -HOST_STACK_SIZE(r1) | ||
282 | stw r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */ | ||
283 | |||
284 | /* Save host state to stack. */ | ||
285 | stw r3, HOST_RUN(r1) | ||
286 | mflr r3 | ||
287 | stw r3, HOST_STACK_LR(r1) | ||
288 | |||
289 | /* Save host non-volatile register state to stack. */ | ||
290 | stw r14, HOST_NV_GPR(r14)(r1) | ||
291 | stw r15, HOST_NV_GPR(r15)(r1) | ||
292 | stw r16, HOST_NV_GPR(r16)(r1) | ||
293 | stw r17, HOST_NV_GPR(r17)(r1) | ||
294 | stw r18, HOST_NV_GPR(r18)(r1) | ||
295 | stw r19, HOST_NV_GPR(r19)(r1) | ||
296 | stw r20, HOST_NV_GPR(r20)(r1) | ||
297 | stw r21, HOST_NV_GPR(r21)(r1) | ||
298 | stw r22, HOST_NV_GPR(r22)(r1) | ||
299 | stw r23, HOST_NV_GPR(r23)(r1) | ||
300 | stw r24, HOST_NV_GPR(r24)(r1) | ||
301 | stw r25, HOST_NV_GPR(r25)(r1) | ||
302 | stw r26, HOST_NV_GPR(r26)(r1) | ||
303 | stw r27, HOST_NV_GPR(r27)(r1) | ||
304 | stw r28, HOST_NV_GPR(r28)(r1) | ||
305 | stw r29, HOST_NV_GPR(r29)(r1) | ||
306 | stw r30, HOST_NV_GPR(r30)(r1) | ||
307 | stw r31, HOST_NV_GPR(r31)(r1) | ||
308 | |||
309 | /* Load guest non-volatiles. */ | ||
310 | lwz r14, VCPU_GPR(r14)(r4) | ||
311 | lwz r15, VCPU_GPR(r15)(r4) | ||
312 | lwz r16, VCPU_GPR(r16)(r4) | ||
313 | lwz r17, VCPU_GPR(r17)(r4) | ||
314 | lwz r18, VCPU_GPR(r18)(r4) | ||
315 | lwz r19, VCPU_GPR(r19)(r4) | ||
316 | lwz r20, VCPU_GPR(r20)(r4) | ||
317 | lwz r21, VCPU_GPR(r21)(r4) | ||
318 | lwz r22, VCPU_GPR(r22)(r4) | ||
319 | lwz r23, VCPU_GPR(r23)(r4) | ||
320 | lwz r24, VCPU_GPR(r24)(r4) | ||
321 | lwz r25, VCPU_GPR(r25)(r4) | ||
322 | lwz r26, VCPU_GPR(r26)(r4) | ||
323 | lwz r27, VCPU_GPR(r27)(r4) | ||
324 | lwz r28, VCPU_GPR(r28)(r4) | ||
325 | lwz r29, VCPU_GPR(r29)(r4) | ||
326 | lwz r30, VCPU_GPR(r30)(r4) | ||
327 | lwz r31, VCPU_GPR(r31)(r4) | ||
328 | |||
329 | lightweight_exit: | ||
330 | stw r2, HOST_R2(r1) | ||
331 | |||
332 | mfspr r3, SPRN_PID | ||
333 | stw r3, VCPU_HOST_PID(r4) | ||
334 | lwz r3, VCPU_PID(r4) | ||
335 | mtspr SPRN_PID, r3 | ||
336 | |||
337 | /* Prevent all TLB updates. */ | ||
338 | mfmsr r5 | ||
339 | lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h | ||
340 | ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l | ||
341 | andc r6, r5, r6 | ||
342 | mtmsr r6 | ||
343 | |||
344 | /* Save the host's non-pinned TLB mappings, and load the guest mappings | ||
345 | * over them. Leave the host's "pinned" kernel mappings in place. */ | ||
346 | /* XXX optimization: use generation count to avoid swapping unmodified | ||
347 | * entries. */ | ||
348 | mfspr r10, SPRN_MMUCR /* Save host MMUCR. */ | ||
349 | lis r8, tlb_44x_hwater@ha | ||
350 | lwz r8, tlb_44x_hwater@l(r8) | ||
351 | addi r3, r4, VCPU_HOST_TLB - 4 | ||
352 | addi r9, r4, VCPU_SHADOW_TLB - 4 | ||
353 | li r6, 0 | ||
354 | 1: | ||
355 | /* Save host entry. */ | ||
356 | tlbre r7, r6, PPC44x_TLB_PAGEID | ||
357 | mfspr r5, SPRN_MMUCR | ||
358 | stwu r5, 4(r3) | ||
359 | stwu r7, 4(r3) | ||
360 | tlbre r7, r6, PPC44x_TLB_XLAT | ||
361 | stwu r7, 4(r3) | ||
362 | tlbre r7, r6, PPC44x_TLB_ATTRIB | ||
363 | stwu r7, 4(r3) | ||
364 | /* Load guest entry. */ | ||
365 | lwzu r7, 4(r9) | ||
366 | mtspr SPRN_MMUCR, r7 | ||
367 | lwzu r7, 4(r9) | ||
368 | tlbwe r7, r6, PPC44x_TLB_PAGEID | ||
369 | lwzu r7, 4(r9) | ||
370 | tlbwe r7, r6, PPC44x_TLB_XLAT | ||
371 | lwzu r7, 4(r9) | ||
372 | tlbwe r7, r6, PPC44x_TLB_ATTRIB | ||
373 | /* Increment index. */ | ||
374 | addi r6, r6, 1 | ||
375 | cmpw r6, r8 | ||
376 | blt 1b | ||
377 | mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */ | ||
378 | |||
379 | iccci 0, 0 /* XXX hack */ | ||
380 | |||
381 | /* Load some guest volatiles. */ | ||
382 | lwz r0, VCPU_GPR(r0)(r4) | ||
383 | lwz r2, VCPU_GPR(r2)(r4) | ||
384 | lwz r9, VCPU_GPR(r9)(r4) | ||
385 | lwz r10, VCPU_GPR(r10)(r4) | ||
386 | lwz r11, VCPU_GPR(r11)(r4) | ||
387 | lwz r12, VCPU_GPR(r12)(r4) | ||
388 | lwz r13, VCPU_GPR(r13)(r4) | ||
389 | lwz r3, VCPU_LR(r4) | ||
390 | mtlr r3 | ||
391 | lwz r3, VCPU_XER(r4) | ||
392 | mtxer r3 | ||
393 | |||
394 | /* Switch the IVPR. XXX If we take a TLB miss after this we're screwed, | ||
395 | * so how do we make sure vcpu won't fault? */ | ||
396 | lis r8, kvmppc_booke_handlers@ha | ||
397 | lwz r8, kvmppc_booke_handlers@l(r8) | ||
398 | mtspr SPRN_IVPR, r8 | ||
399 | |||
400 | /* Save vcpu pointer for the exception handlers. */ | ||
401 | mtspr SPRN_SPRG1, r4 | ||
402 | |||
403 | /* Can't switch the stack pointer until after IVPR is switched, | ||
404 | * because host interrupt handlers would get confused. */ | ||
405 | lwz r1, VCPU_GPR(r1)(r4) | ||
406 | |||
407 | /* XXX handle USPRG0 */ | ||
408 | /* Host interrupt handlers may have clobbered these guest-readable | ||
409 | * SPRGs, so we need to reload them here with the guest's values. */ | ||
410 | lwz r3, VCPU_SPRG4(r4) | ||
411 | mtspr SPRN_SPRG4, r3 | ||
412 | lwz r3, VCPU_SPRG5(r4) | ||
413 | mtspr SPRN_SPRG5, r3 | ||
414 | lwz r3, VCPU_SPRG6(r4) | ||
415 | mtspr SPRN_SPRG6, r3 | ||
416 | lwz r3, VCPU_SPRG7(r4) | ||
417 | mtspr SPRN_SPRG7, r3 | ||
418 | |||
419 | /* Finish loading guest volatiles and jump to guest. */ | ||
420 | lwz r3, VCPU_CTR(r4) | ||
421 | mtctr r3 | ||
422 | lwz r3, VCPU_CR(r4) | ||
423 | mtcr r3 | ||
424 | lwz r5, VCPU_GPR(r5)(r4) | ||
425 | lwz r6, VCPU_GPR(r6)(r4) | ||
426 | lwz r7, VCPU_GPR(r7)(r4) | ||
427 | lwz r8, VCPU_GPR(r8)(r4) | ||
428 | lwz r3, VCPU_PC(r4) | ||
429 | mtsrr0 r3 | ||
430 | lwz r3, VCPU_MSR(r4) | ||
431 | oris r3, r3, KVMPPC_MSR_MASK@h | ||
432 | ori r3, r3, KVMPPC_MSR_MASK@l | ||
433 | mtsrr1 r3 | ||
434 | lwz r3, VCPU_GPR(r3)(r4) | ||
435 | lwz r4, VCPU_GPR(r4)(r4) | ||
436 | rfi | ||
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c new file mode 100644 index 000000000000..a03fe0c80698 --- /dev/null +++ b/arch/powerpc/kvm/emulate.c | |||
@@ -0,0 +1,760 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License, version 2, as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with this program; if not, write to the Free Software | ||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
14 | * | ||
15 | * Copyright IBM Corp. 2007 | ||
16 | * | ||
17 | * Authors: Hollis Blanchard <hollisb@us.ibm.com> | ||
18 | */ | ||
19 | |||
20 | #include <linux/jiffies.h> | ||
21 | #include <linux/timer.h> | ||
22 | #include <linux/types.h> | ||
23 | #include <linux/string.h> | ||
24 | #include <linux/kvm_host.h> | ||
25 | |||
26 | #include <asm/dcr.h> | ||
27 | #include <asm/dcr-regs.h> | ||
28 | #include <asm/time.h> | ||
29 | #include <asm/byteorder.h> | ||
30 | #include <asm/kvm_ppc.h> | ||
31 | |||
32 | #include "44x_tlb.h" | ||
33 | |||
34 | /* Instruction decoding */ | ||
35 | static inline unsigned int get_op(u32 inst) | ||
36 | { | ||
37 | return inst >> 26; | ||
38 | } | ||
39 | |||
40 | static inline unsigned int get_xop(u32 inst) | ||
41 | { | ||
42 | return (inst >> 1) & 0x3ff; | ||
43 | } | ||
44 | |||
45 | static inline unsigned int get_sprn(u32 inst) | ||
46 | { | ||
47 | return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); | ||
48 | } | ||
49 | |||
50 | static inline unsigned int get_dcrn(u32 inst) | ||
51 | { | ||
52 | return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); | ||
53 | } | ||
54 | |||
55 | static inline unsigned int get_rt(u32 inst) | ||
56 | { | ||
57 | return (inst >> 21) & 0x1f; | ||
58 | } | ||
59 | |||
60 | static inline unsigned int get_rs(u32 inst) | ||
61 | { | ||
62 | return (inst >> 21) & 0x1f; | ||
63 | } | ||
64 | |||
65 | static inline unsigned int get_ra(u32 inst) | ||
66 | { | ||
67 | return (inst >> 16) & 0x1f; | ||
68 | } | ||
69 | |||
70 | static inline unsigned int get_rb(u32 inst) | ||
71 | { | ||
72 | return (inst >> 11) & 0x1f; | ||
73 | } | ||
74 | |||
75 | static inline unsigned int get_rc(u32 inst) | ||
76 | { | ||
77 | return inst & 0x1; | ||
78 | } | ||
79 | |||
80 | static inline unsigned int get_ws(u32 inst) | ||
81 | { | ||
82 | return (inst >> 11) & 0x1f; | ||
83 | } | ||
84 | |||
85 | static inline unsigned int get_d(u32 inst) | ||
86 | { | ||
87 | return inst & 0xffff; | ||
88 | } | ||
89 | |||
90 | static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, | ||
91 | const struct tlbe *tlbe) | ||
92 | { | ||
93 | gpa_t gpa; | ||
94 | |||
95 | if (!get_tlb_v(tlbe)) | ||
96 | return 0; | ||
97 | |||
98 | /* Does it match current guest AS? */ | ||
99 | /* XXX what about IS != DS? */ | ||
100 | if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS)) | ||
101 | return 0; | ||
102 | |||
103 | gpa = get_tlb_raddr(tlbe); | ||
104 | if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) | ||
105 | /* Mapping is not for RAM. */ | ||
106 | return 0; | ||
107 | |||
108 | return 1; | ||
109 | } | ||
110 | |||
111 | static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst) | ||
112 | { | ||
113 | u64 eaddr; | ||
114 | u64 raddr; | ||
115 | u64 asid; | ||
116 | u32 flags; | ||
117 | struct tlbe *tlbe; | ||
118 | unsigned int ra; | ||
119 | unsigned int rs; | ||
120 | unsigned int ws; | ||
121 | unsigned int index; | ||
122 | |||
123 | ra = get_ra(inst); | ||
124 | rs = get_rs(inst); | ||
125 | ws = get_ws(inst); | ||
126 | |||
127 | index = vcpu->arch.gpr[ra]; | ||
128 | if (index > PPC44x_TLB_SIZE) { | ||
129 | printk("%s: index %d\n", __func__, index); | ||
130 | kvmppc_dump_vcpu(vcpu); | ||
131 | return EMULATE_FAIL; | ||
132 | } | ||
133 | |||
134 | tlbe = &vcpu->arch.guest_tlb[index]; | ||
135 | |||
136 | /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ | ||
137 | if (tlbe->word0 & PPC44x_TLB_VALID) { | ||
138 | eaddr = get_tlb_eaddr(tlbe); | ||
139 | asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; | ||
140 | kvmppc_mmu_invalidate(vcpu, eaddr, asid); | ||
141 | } | ||
142 | |||
143 | switch (ws) { | ||
144 | case PPC44x_TLB_PAGEID: | ||
145 | tlbe->tid = vcpu->arch.mmucr & 0xff; | ||
146 | tlbe->word0 = vcpu->arch.gpr[rs]; | ||
147 | break; | ||
148 | |||
149 | case PPC44x_TLB_XLAT: | ||
150 | tlbe->word1 = vcpu->arch.gpr[rs]; | ||
151 | break; | ||
152 | |||
153 | case PPC44x_TLB_ATTRIB: | ||
154 | tlbe->word2 = vcpu->arch.gpr[rs]; | ||
155 | break; | ||
156 | |||
157 | default: | ||
158 | return EMULATE_FAIL; | ||
159 | } | ||
160 | |||
161 | if (tlbe_is_host_safe(vcpu, tlbe)) { | ||
162 | eaddr = get_tlb_eaddr(tlbe); | ||
163 | raddr = get_tlb_raddr(tlbe); | ||
164 | asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; | ||
165 | flags = tlbe->word2 & 0xffff; | ||
166 | |||
167 | /* Create a 4KB mapping on the host. If the guest wanted a | ||
168 | * large page, only the first 4KB is mapped here and the rest | ||
169 | * are mapped on the fly. */ | ||
170 | kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags); | ||
171 | } | ||
172 | |||
173 | return EMULATE_DONE; | ||
174 | } | ||
175 | |||
176 | static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) | ||
177 | { | ||
178 | if (vcpu->arch.tcr & TCR_DIE) { | ||
179 | /* The decrementer ticks at the same rate as the timebase, so | ||
180 | * that's how we convert the guest DEC value to the number of | ||
181 | * host ticks. */ | ||
182 | unsigned long nr_jiffies; | ||
183 | |||
184 | nr_jiffies = vcpu->arch.dec / tb_ticks_per_jiffy; | ||
185 | mod_timer(&vcpu->arch.dec_timer, | ||
186 | get_jiffies_64() + nr_jiffies); | ||
187 | } else { | ||
188 | del_timer(&vcpu->arch.dec_timer); | ||
189 | } | ||
190 | } | ||
191 | |||
192 | static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) | ||
193 | { | ||
194 | vcpu->arch.pc = vcpu->arch.srr0; | ||
195 | kvmppc_set_msr(vcpu, vcpu->arch.srr1); | ||
196 | } | ||
197 | |||
198 | /* XXX to do: | ||
199 | * lhax | ||
200 | * lhaux | ||
201 | * lswx | ||
202 | * lswi | ||
203 | * stswx | ||
204 | * stswi | ||
205 | * lha | ||
206 | * lhau | ||
207 | * lmw | ||
208 | * stmw | ||
209 | * | ||
210 | * XXX is_bigendian should depend on MMU mapping or MSR[LE] | ||
211 | */ | ||
212 | int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | ||
213 | { | ||
214 | u32 inst = vcpu->arch.last_inst; | ||
215 | u32 ea; | ||
216 | int ra; | ||
217 | int rb; | ||
218 | int rc; | ||
219 | int rs; | ||
220 | int rt; | ||
221 | int sprn; | ||
222 | int dcrn; | ||
223 | enum emulation_result emulated = EMULATE_DONE; | ||
224 | int advance = 1; | ||
225 | |||
226 | switch (get_op(inst)) { | ||
227 | case 3: /* trap */ | ||
228 | printk("trap!\n"); | ||
229 | kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); | ||
230 | advance = 0; | ||
231 | break; | ||
232 | |||
233 | case 19: | ||
234 | switch (get_xop(inst)) { | ||
235 | case 50: /* rfi */ | ||
236 | kvmppc_emul_rfi(vcpu); | ||
237 | advance = 0; | ||
238 | break; | ||
239 | |||
240 | default: | ||
241 | emulated = EMULATE_FAIL; | ||
242 | break; | ||
243 | } | ||
244 | break; | ||
245 | |||
246 | case 31: | ||
247 | switch (get_xop(inst)) { | ||
248 | |||
249 | case 83: /* mfmsr */ | ||
250 | rt = get_rt(inst); | ||
251 | vcpu->arch.gpr[rt] = vcpu->arch.msr; | ||
252 | break; | ||
253 | |||
254 | case 87: /* lbzx */ | ||
255 | rt = get_rt(inst); | ||
256 | emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); | ||
257 | break; | ||
258 | |||
259 | case 131: /* wrtee */ | ||
260 | rs = get_rs(inst); | ||
261 | vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) | ||
262 | | (vcpu->arch.gpr[rs] & MSR_EE); | ||
263 | break; | ||
264 | |||
265 | case 146: /* mtmsr */ | ||
266 | rs = get_rs(inst); | ||
267 | kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); | ||
268 | break; | ||
269 | |||
270 | case 163: /* wrteei */ | ||
271 | vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) | ||
272 | | (inst & MSR_EE); | ||
273 | break; | ||
274 | |||
275 | case 215: /* stbx */ | ||
276 | rs = get_rs(inst); | ||
277 | emulated = kvmppc_handle_store(run, vcpu, | ||
278 | vcpu->arch.gpr[rs], | ||
279 | 1, 1); | ||
280 | break; | ||
281 | |||
282 | case 247: /* stbux */ | ||
283 | rs = get_rs(inst); | ||
284 | ra = get_ra(inst); | ||
285 | rb = get_rb(inst); | ||
286 | |||
287 | ea = vcpu->arch.gpr[rb]; | ||
288 | if (ra) | ||
289 | ea += vcpu->arch.gpr[ra]; | ||
290 | |||
291 | emulated = kvmppc_handle_store(run, vcpu, | ||
292 | vcpu->arch.gpr[rs], | ||
293 | 1, 1); | ||
294 | vcpu->arch.gpr[rs] = ea; | ||
295 | break; | ||
296 | |||
297 | case 279: /* lhzx */ | ||
298 | rt = get_rt(inst); | ||
299 | emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); | ||
300 | break; | ||
301 | |||
302 | case 311: /* lhzux */ | ||
303 | rt = get_rt(inst); | ||
304 | ra = get_ra(inst); | ||
305 | rb = get_rb(inst); | ||
306 | |||
307 | ea = vcpu->arch.gpr[rb]; | ||
308 | if (ra) | ||
309 | ea += vcpu->arch.gpr[ra]; | ||
310 | |||
311 | emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); | ||
312 | vcpu->arch.gpr[ra] = ea; | ||
313 | break; | ||
314 | |||
315 | case 323: /* mfdcr */ | ||
316 | dcrn = get_dcrn(inst); | ||
317 | rt = get_rt(inst); | ||
318 | |||
319 | /* The guest may access CPR0 registers to determine the timebase | ||
320 | * frequency, and it must know the real host frequency because it | ||
321 | * can directly access the timebase registers. | ||
322 | * | ||
323 | * It would be possible to emulate those accesses in userspace, | ||
324 | * but userspace can really only figure out the end frequency. | ||
325 | * We could decompose that into the factors that compute it, but | ||
326 | * that's tricky math, and it's easier to just report the real | ||
327 | * CPR0 values. | ||
328 | */ | ||
329 | switch (dcrn) { | ||
330 | case DCRN_CPR0_CONFIG_ADDR: | ||
331 | vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr; | ||
332 | break; | ||
333 | case DCRN_CPR0_CONFIG_DATA: | ||
334 | local_irq_disable(); | ||
335 | mtdcr(DCRN_CPR0_CONFIG_ADDR, | ||
336 | vcpu->arch.cpr0_cfgaddr); | ||
337 | vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA); | ||
338 | local_irq_enable(); | ||
339 | break; | ||
340 | default: | ||
341 | run->dcr.dcrn = dcrn; | ||
342 | run->dcr.data = 0; | ||
343 | run->dcr.is_write = 0; | ||
344 | vcpu->arch.io_gpr = rt; | ||
345 | vcpu->arch.dcr_needed = 1; | ||
346 | emulated = EMULATE_DO_DCR; | ||
347 | } | ||
348 | |||
349 | break; | ||
350 | |||
351 | case 339: /* mfspr */ | ||
352 | sprn = get_sprn(inst); | ||
353 | rt = get_rt(inst); | ||
354 | |||
355 | switch (sprn) { | ||
356 | case SPRN_SRR0: | ||
357 | vcpu->arch.gpr[rt] = vcpu->arch.srr0; break; | ||
358 | case SPRN_SRR1: | ||
359 | vcpu->arch.gpr[rt] = vcpu->arch.srr1; break; | ||
360 | case SPRN_MMUCR: | ||
361 | vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break; | ||
362 | case SPRN_PID: | ||
363 | vcpu->arch.gpr[rt] = vcpu->arch.pid; break; | ||
364 | case SPRN_IVPR: | ||
365 | vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; | ||
366 | case SPRN_CCR0: | ||
367 | vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break; | ||
368 | case SPRN_CCR1: | ||
369 | vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break; | ||
370 | case SPRN_PVR: | ||
371 | vcpu->arch.gpr[rt] = vcpu->arch.pvr; break; | ||
372 | case SPRN_DEAR: | ||
373 | vcpu->arch.gpr[rt] = vcpu->arch.dear; break; | ||
374 | case SPRN_ESR: | ||
375 | vcpu->arch.gpr[rt] = vcpu->arch.esr; break; | ||
376 | case SPRN_DBCR0: | ||
377 | vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; | ||
378 | case SPRN_DBCR1: | ||
379 | vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; | ||
380 | |||
381 | /* Note: mftb and TBRL/TBWL are user-accessible, so | ||
382 | * the guest can always access the real TB anyways. | ||
383 | * In fact, we probably will never see these traps. */ | ||
384 | case SPRN_TBWL: | ||
385 | vcpu->arch.gpr[rt] = mftbl(); break; | ||
386 | case SPRN_TBWU: | ||
387 | vcpu->arch.gpr[rt] = mftbu(); break; | ||
388 | |||
389 | case SPRN_SPRG0: | ||
390 | vcpu->arch.gpr[rt] = vcpu->arch.sprg0; break; | ||
391 | case SPRN_SPRG1: | ||
392 | vcpu->arch.gpr[rt] = vcpu->arch.sprg1; break; | ||
393 | case SPRN_SPRG2: | ||
394 | vcpu->arch.gpr[rt] = vcpu->arch.sprg2; break; | ||
395 | case SPRN_SPRG3: | ||
396 | vcpu->arch.gpr[rt] = vcpu->arch.sprg3; break; | ||
397 | /* Note: SPRG4-7 are user-readable, so we don't get | ||
398 | * a trap. */ | ||
399 | |||
400 | case SPRN_IVOR0: | ||
401 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break; | ||
402 | case SPRN_IVOR1: | ||
403 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break; | ||
404 | case SPRN_IVOR2: | ||
405 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break; | ||
406 | case SPRN_IVOR3: | ||
407 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break; | ||
408 | case SPRN_IVOR4: | ||
409 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break; | ||
410 | case SPRN_IVOR5: | ||
411 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break; | ||
412 | case SPRN_IVOR6: | ||
413 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break; | ||
414 | case SPRN_IVOR7: | ||
415 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break; | ||
416 | case SPRN_IVOR8: | ||
417 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break; | ||
418 | case SPRN_IVOR9: | ||
419 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break; | ||
420 | case SPRN_IVOR10: | ||
421 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break; | ||
422 | case SPRN_IVOR11: | ||
423 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break; | ||
424 | case SPRN_IVOR12: | ||
425 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break; | ||
426 | case SPRN_IVOR13: | ||
427 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break; | ||
428 | case SPRN_IVOR14: | ||
429 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break; | ||
430 | case SPRN_IVOR15: | ||
431 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break; | ||
432 | |||
433 | default: | ||
434 | printk("mfspr: unknown spr %x\n", sprn); | ||
435 | vcpu->arch.gpr[rt] = 0; | ||
436 | break; | ||
437 | } | ||
438 | break; | ||
439 | |||
440 | case 407: /* sthx */ | ||
441 | rs = get_rs(inst); | ||
442 | ra = get_ra(inst); | ||
443 | rb = get_rb(inst); | ||
444 | |||
445 | emulated = kvmppc_handle_store(run, vcpu, | ||
446 | vcpu->arch.gpr[rs], | ||
447 | 2, 1); | ||
448 | break; | ||
449 | |||
450 | case 439: /* sthux */ | ||
451 | rs = get_rs(inst); | ||
452 | ra = get_ra(inst); | ||
453 | rb = get_rb(inst); | ||
454 | |||
455 | ea = vcpu->arch.gpr[rb]; | ||
456 | if (ra) | ||
457 | ea += vcpu->arch.gpr[ra]; | ||
458 | |||
459 | emulated = kvmppc_handle_store(run, vcpu, | ||
460 | vcpu->arch.gpr[rs], | ||
461 | 2, 1); | ||
462 | vcpu->arch.gpr[ra] = ea; | ||
463 | break; | ||
464 | |||
465 | case 451: /* mtdcr */ | ||
466 | dcrn = get_dcrn(inst); | ||
467 | rs = get_rs(inst); | ||
468 | |||
469 | /* emulate some access in kernel */ | ||
470 | switch (dcrn) { | ||
471 | case DCRN_CPR0_CONFIG_ADDR: | ||
472 | vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs]; | ||
473 | break; | ||
474 | default: | ||
475 | run->dcr.dcrn = dcrn; | ||
476 | run->dcr.data = vcpu->arch.gpr[rs]; | ||
477 | run->dcr.is_write = 1; | ||
478 | vcpu->arch.dcr_needed = 1; | ||
479 | emulated = EMULATE_DO_DCR; | ||
480 | } | ||
481 | |||
482 | break; | ||
483 | |||
484 | case 467: /* mtspr */ | ||
485 | sprn = get_sprn(inst); | ||
486 | rs = get_rs(inst); | ||
487 | switch (sprn) { | ||
488 | case SPRN_SRR0: | ||
489 | vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break; | ||
490 | case SPRN_SRR1: | ||
491 | vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break; | ||
492 | case SPRN_MMUCR: | ||
493 | vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; | ||
494 | case SPRN_PID: | ||
495 | vcpu->arch.pid = vcpu->arch.gpr[rs]; break; | ||
496 | case SPRN_CCR0: | ||
497 | vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; | ||
498 | case SPRN_CCR1: | ||
499 | vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break; | ||
500 | case SPRN_DEAR: | ||
501 | vcpu->arch.dear = vcpu->arch.gpr[rs]; break; | ||
502 | case SPRN_ESR: | ||
503 | vcpu->arch.esr = vcpu->arch.gpr[rs]; break; | ||
504 | case SPRN_DBCR0: | ||
505 | vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; | ||
506 | case SPRN_DBCR1: | ||
507 | vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; | ||
508 | |||
509 | /* XXX We need to context-switch the timebase for | ||
510 | * watchdog and FIT. */ | ||
511 | case SPRN_TBWL: break; | ||
512 | case SPRN_TBWU: break; | ||
513 | |||
514 | case SPRN_DEC: | ||
515 | vcpu->arch.dec = vcpu->arch.gpr[rs]; | ||
516 | kvmppc_emulate_dec(vcpu); | ||
517 | break; | ||
518 | |||
519 | case SPRN_TSR: | ||
520 | vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; | ||
521 | |||
522 | case SPRN_TCR: | ||
523 | vcpu->arch.tcr = vcpu->arch.gpr[rs]; | ||
524 | kvmppc_emulate_dec(vcpu); | ||
525 | break; | ||
526 | |||
527 | case SPRN_SPRG0: | ||
528 | vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break; | ||
529 | case SPRN_SPRG1: | ||
530 | vcpu->arch.sprg1 = vcpu->arch.gpr[rs]; break; | ||
531 | case SPRN_SPRG2: | ||
532 | vcpu->arch.sprg2 = vcpu->arch.gpr[rs]; break; | ||
533 | case SPRN_SPRG3: | ||
534 | vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break; | ||
535 | |||
536 | /* Note: SPRG4-7 are user-readable. These values are | ||
537 | * loaded into the real SPRGs when resuming the | ||
538 | * guest. */ | ||
539 | case SPRN_SPRG4: | ||
540 | vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; | ||
541 | case SPRN_SPRG5: | ||
542 | vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; | ||
543 | case SPRN_SPRG6: | ||
544 | vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; | ||
545 | case SPRN_SPRG7: | ||
546 | vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; | ||
547 | |||
548 | case SPRN_IVPR: | ||
549 | vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break; | ||
550 | case SPRN_IVOR0: | ||
551 | vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break; | ||
552 | case SPRN_IVOR1: | ||
553 | vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break; | ||
554 | case SPRN_IVOR2: | ||
555 | vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break; | ||
556 | case SPRN_IVOR3: | ||
557 | vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break; | ||
558 | case SPRN_IVOR4: | ||
559 | vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break; | ||
560 | case SPRN_IVOR5: | ||
561 | vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break; | ||
562 | case SPRN_IVOR6: | ||
563 | vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break; | ||
564 | case SPRN_IVOR7: | ||
565 | vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break; | ||
566 | case SPRN_IVOR8: | ||
567 | vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break; | ||
568 | case SPRN_IVOR9: | ||
569 | vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break; | ||
570 | case SPRN_IVOR10: | ||
571 | vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break; | ||
572 | case SPRN_IVOR11: | ||
573 | vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break; | ||
574 | case SPRN_IVOR12: | ||
575 | vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break; | ||
576 | case SPRN_IVOR13: | ||
577 | vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break; | ||
578 | case SPRN_IVOR14: | ||
579 | vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break; | ||
580 | case SPRN_IVOR15: | ||
581 | vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break; | ||
582 | |||
583 | default: | ||
584 | printk("mtspr: unknown spr %x\n", sprn); | ||
585 | emulated = EMULATE_FAIL; | ||
586 | break; | ||
587 | } | ||
588 | break; | ||
589 | |||
590 | case 470: /* dcbi */ | ||
591 | /* Do nothing. The guest is performing dcbi because | ||
592 | * hardware DMA is not snooped by the dcache, but | ||
593 | * emulated DMA either goes through the dcache as | ||
594 | * normal writes, or the host kernel has handled dcache | ||
595 | * coherence. */ | ||
596 | break; | ||
597 | |||
598 | case 534: /* lwbrx */ | ||
599 | rt = get_rt(inst); | ||
600 | emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); | ||
601 | break; | ||
602 | |||
603 | case 566: /* tlbsync */ | ||
604 | break; | ||
605 | |||
606 | case 662: /* stwbrx */ | ||
607 | rs = get_rs(inst); | ||
608 | ra = get_ra(inst); | ||
609 | rb = get_rb(inst); | ||
610 | |||
611 | emulated = kvmppc_handle_store(run, vcpu, | ||
612 | vcpu->arch.gpr[rs], | ||
613 | 4, 0); | ||
614 | break; | ||
615 | |||
616 | case 978: /* tlbwe */ | ||
617 | emulated = kvmppc_emul_tlbwe(vcpu, inst); | ||
618 | break; | ||
619 | |||
620 | case 914: { /* tlbsx */ | ||
621 | int index; | ||
622 | unsigned int as = get_mmucr_sts(vcpu); | ||
623 | unsigned int pid = get_mmucr_stid(vcpu); | ||
624 | |||
625 | rt = get_rt(inst); | ||
626 | ra = get_ra(inst); | ||
627 | rb = get_rb(inst); | ||
628 | rc = get_rc(inst); | ||
629 | |||
630 | ea = vcpu->arch.gpr[rb]; | ||
631 | if (ra) | ||
632 | ea += vcpu->arch.gpr[ra]; | ||
633 | |||
634 | index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); | ||
635 | if (rc) { | ||
636 | if (index < 0) | ||
637 | vcpu->arch.cr &= ~0x20000000; | ||
638 | else | ||
639 | vcpu->arch.cr |= 0x20000000; | ||
640 | } | ||
641 | vcpu->arch.gpr[rt] = index; | ||
642 | |||
643 | } | ||
644 | break; | ||
645 | |||
646 | case 790: /* lhbrx */ | ||
647 | rt = get_rt(inst); | ||
648 | emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); | ||
649 | break; | ||
650 | |||
651 | case 918: /* sthbrx */ | ||
652 | rs = get_rs(inst); | ||
653 | ra = get_ra(inst); | ||
654 | rb = get_rb(inst); | ||
655 | |||
656 | emulated = kvmppc_handle_store(run, vcpu, | ||
657 | vcpu->arch.gpr[rs], | ||
658 | 2, 0); | ||
659 | break; | ||
660 | |||
661 | case 966: /* iccci */ | ||
662 | break; | ||
663 | |||
664 | default: | ||
665 | printk("unknown: op %d xop %d\n", get_op(inst), | ||
666 | get_xop(inst)); | ||
667 | emulated = EMULATE_FAIL; | ||
668 | break; | ||
669 | } | ||
670 | break; | ||
671 | |||
672 | case 32: /* lwz */ | ||
673 | rt = get_rt(inst); | ||
674 | emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); | ||
675 | break; | ||
676 | |||
677 | case 33: /* lwzu */ | ||
678 | ra = get_ra(inst); | ||
679 | rt = get_rt(inst); | ||
680 | emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); | ||
681 | vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; | ||
682 | break; | ||
683 | |||
684 | case 34: /* lbz */ | ||
685 | rt = get_rt(inst); | ||
686 | emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); | ||
687 | break; | ||
688 | |||
689 | case 35: /* lbzu */ | ||
690 | ra = get_ra(inst); | ||
691 | rt = get_rt(inst); | ||
692 | emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); | ||
693 | vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; | ||
694 | break; | ||
695 | |||
696 | case 36: /* stw */ | ||
697 | rs = get_rs(inst); | ||
698 | emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], | ||
699 | 4, 1); | ||
700 | break; | ||
701 | |||
702 | case 37: /* stwu */ | ||
703 | ra = get_ra(inst); | ||
704 | rs = get_rs(inst); | ||
705 | emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], | ||
706 | 4, 1); | ||
707 | vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; | ||
708 | break; | ||
709 | |||
710 | case 38: /* stb */ | ||
711 | rs = get_rs(inst); | ||
712 | emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], | ||
713 | 1, 1); | ||
714 | break; | ||
715 | |||
716 | case 39: /* stbu */ | ||
717 | ra = get_ra(inst); | ||
718 | rs = get_rs(inst); | ||
719 | emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], | ||
720 | 1, 1); | ||
721 | vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; | ||
722 | break; | ||
723 | |||
724 | case 40: /* lhz */ | ||
725 | rt = get_rt(inst); | ||
726 | emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); | ||
727 | break; | ||
728 | |||
729 | case 41: /* lhzu */ | ||
730 | ra = get_ra(inst); | ||
731 | rt = get_rt(inst); | ||
732 | emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); | ||
733 | vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; | ||
734 | break; | ||
735 | |||
736 | case 44: /* sth */ | ||
737 | rs = get_rs(inst); | ||
738 | emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], | ||
739 | 2, 1); | ||
740 | break; | ||
741 | |||
742 | case 45: /* sthu */ | ||
743 | ra = get_ra(inst); | ||
744 | rs = get_rs(inst); | ||
745 | emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], | ||
746 | 2, 1); | ||
747 | vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; | ||
748 | break; | ||
749 | |||
750 | default: | ||
751 | printk("unknown op %d\n", get_op(inst)); | ||
752 | emulated = EMULATE_FAIL; | ||
753 | break; | ||
754 | } | ||
755 | |||
756 | if (advance) | ||
757 | vcpu->arch.pc += 4; /* Advance past emulated instruction. */ | ||
758 | |||
759 | return emulated; | ||
760 | } | ||
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c new file mode 100644 index 000000000000..bad40bd2d3ac --- /dev/null +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -0,0 +1,436 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License, version 2, as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with this program; if not, write to the Free Software | ||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
14 | * | ||
15 | * Copyright IBM Corp. 2007 | ||
16 | * | ||
17 | * Authors: Hollis Blanchard <hollisb@us.ibm.com> | ||
18 | * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> | ||
19 | */ | ||
20 | |||
21 | #include <linux/errno.h> | ||
22 | #include <linux/err.h> | ||
23 | #include <linux/kvm_host.h> | ||
24 | #include <linux/module.h> | ||
25 | #include <linux/vmalloc.h> | ||
26 | #include <linux/fs.h> | ||
27 | #include <asm/cputable.h> | ||
28 | #include <asm/uaccess.h> | ||
29 | #include <asm/kvm_ppc.h> | ||
30 | |||
31 | |||
32 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | ||
33 | { | ||
34 | return gfn; | ||
35 | } | ||
36 | |||
37 | int kvm_cpu_has_interrupt(struct kvm_vcpu *v) | ||
38 | { | ||
39 | /* XXX implement me */ | ||
40 | return 0; | ||
41 | } | ||
42 | |||
43 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) | ||
44 | { | ||
45 | return 1; | ||
46 | } | ||
47 | |||
48 | |||
49 | int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) | ||
50 | { | ||
51 | enum emulation_result er; | ||
52 | int r; | ||
53 | |||
54 | er = kvmppc_emulate_instruction(run, vcpu); | ||
55 | switch (er) { | ||
56 | case EMULATE_DONE: | ||
57 | /* Future optimization: only reload non-volatiles if they were | ||
58 | * actually modified. */ | ||
59 | r = RESUME_GUEST_NV; | ||
60 | break; | ||
61 | case EMULATE_DO_MMIO: | ||
62 | run->exit_reason = KVM_EXIT_MMIO; | ||
63 | /* We must reload nonvolatiles because "update" load/store | ||
64 | * instructions modify register state. */ | ||
65 | /* Future optimization: only reload non-volatiles if they were | ||
66 | * actually modified. */ | ||
67 | r = RESUME_HOST_NV; | ||
68 | break; | ||
69 | case EMULATE_FAIL: | ||
70 | /* XXX Deliver Program interrupt to guest. */ | ||
71 | printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__, | ||
72 | vcpu->arch.last_inst); | ||
73 | r = RESUME_HOST; | ||
74 | break; | ||
75 | default: | ||
76 | BUG(); | ||
77 | } | ||
78 | |||
79 | return r; | ||
80 | } | ||
81 | |||
82 | void kvm_arch_hardware_enable(void *garbage) | ||
83 | { | ||
84 | } | ||
85 | |||
86 | void kvm_arch_hardware_disable(void *garbage) | ||
87 | { | ||
88 | } | ||
89 | |||
90 | int kvm_arch_hardware_setup(void) | ||
91 | { | ||
92 | return 0; | ||
93 | } | ||
94 | |||
95 | void kvm_arch_hardware_unsetup(void) | ||
96 | { | ||
97 | } | ||
98 | |||
99 | void kvm_arch_check_processor_compat(void *rtn) | ||
100 | { | ||
101 | int r; | ||
102 | |||
103 | if (strcmp(cur_cpu_spec->platform, "ppc440") == 0) | ||
104 | r = 0; | ||
105 | else | ||
106 | r = -ENOTSUPP; | ||
107 | |||
108 | *(int *)rtn = r; | ||
109 | } | ||
110 | |||
111 | struct kvm *kvm_arch_create_vm(void) | ||
112 | { | ||
113 | struct kvm *kvm; | ||
114 | |||
115 | kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); | ||
116 | if (!kvm) | ||
117 | return ERR_PTR(-ENOMEM); | ||
118 | |||
119 | return kvm; | ||
120 | } | ||
121 | |||
122 | static void kvmppc_free_vcpus(struct kvm *kvm) | ||
123 | { | ||
124 | unsigned int i; | ||
125 | |||
126 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | ||
127 | if (kvm->vcpus[i]) { | ||
128 | kvm_arch_vcpu_free(kvm->vcpus[i]); | ||
129 | kvm->vcpus[i] = NULL; | ||
130 | } | ||
131 | } | ||
132 | } | ||
133 | |||
134 | void kvm_arch_destroy_vm(struct kvm *kvm) | ||
135 | { | ||
136 | kvmppc_free_vcpus(kvm); | ||
137 | kvm_free_physmem(kvm); | ||
138 | kfree(kvm); | ||
139 | } | ||
140 | |||
141 | int kvm_dev_ioctl_check_extension(long ext) | ||
142 | { | ||
143 | int r; | ||
144 | |||
145 | switch (ext) { | ||
146 | case KVM_CAP_USER_MEMORY: | ||
147 | r = 1; | ||
148 | break; | ||
149 | default: | ||
150 | r = 0; | ||
151 | break; | ||
152 | } | ||
153 | return r; | ||
154 | |||
155 | } | ||
156 | |||
157 | long kvm_arch_dev_ioctl(struct file *filp, | ||
158 | unsigned int ioctl, unsigned long arg) | ||
159 | { | ||
160 | return -EINVAL; | ||
161 | } | ||
162 | |||
163 | int kvm_arch_set_memory_region(struct kvm *kvm, | ||
164 | struct kvm_userspace_memory_region *mem, | ||
165 | struct kvm_memory_slot old, | ||
166 | int user_alloc) | ||
167 | { | ||
168 | return 0; | ||
169 | } | ||
170 | |||
171 | struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) | ||
172 | { | ||
173 | struct kvm_vcpu *vcpu; | ||
174 | int err; | ||
175 | |||
176 | vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); | ||
177 | if (!vcpu) { | ||
178 | err = -ENOMEM; | ||
179 | goto out; | ||
180 | } | ||
181 | |||
182 | err = kvm_vcpu_init(vcpu, kvm, id); | ||
183 | if (err) | ||
184 | goto free_vcpu; | ||
185 | |||
186 | return vcpu; | ||
187 | |||
188 | free_vcpu: | ||
189 | kmem_cache_free(kvm_vcpu_cache, vcpu); | ||
190 | out: | ||
191 | return ERR_PTR(err); | ||
192 | } | ||
193 | |||
194 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | ||
195 | { | ||
196 | kvm_vcpu_uninit(vcpu); | ||
197 | kmem_cache_free(kvm_vcpu_cache, vcpu); | ||
198 | } | ||
199 | |||
200 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | ||
201 | { | ||
202 | kvm_arch_vcpu_free(vcpu); | ||
203 | } | ||
204 | |||
205 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | ||
206 | { | ||
207 | unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER]; | ||
208 | |||
209 | return test_bit(priority, &vcpu->arch.pending_exceptions); | ||
210 | } | ||
211 | |||
212 | static void kvmppc_decrementer_func(unsigned long data) | ||
213 | { | ||
214 | struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; | ||
215 | |||
216 | kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER); | ||
217 | } | ||
218 | |||
219 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | ||
220 | { | ||
221 | setup_timer(&vcpu->arch.dec_timer, kvmppc_decrementer_func, | ||
222 | (unsigned long)vcpu); | ||
223 | |||
224 | return 0; | ||
225 | } | ||
226 | |||
227 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | ||
228 | { | ||
229 | } | ||
230 | |||
231 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | ||
232 | { | ||
233 | } | ||
234 | |||
235 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | ||
236 | { | ||
237 | } | ||
238 | |||
239 | void decache_vcpus_on_cpu(int cpu) | ||
240 | { | ||
241 | } | ||
242 | |||
243 | int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, | ||
244 | struct kvm_debug_guest *dbg) | ||
245 | { | ||
246 | return -ENOTSUPP; | ||
247 | } | ||
248 | |||
249 | static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, | ||
250 | struct kvm_run *run) | ||
251 | { | ||
252 | u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; | ||
253 | *gpr = run->dcr.data; | ||
254 | } | ||
255 | |||
256 | static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, | ||
257 | struct kvm_run *run) | ||
258 | { | ||
259 | u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; | ||
260 | |||
261 | if (run->mmio.len > sizeof(*gpr)) { | ||
262 | printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); | ||
263 | return; | ||
264 | } | ||
265 | |||
266 | if (vcpu->arch.mmio_is_bigendian) { | ||
267 | switch (run->mmio.len) { | ||
268 | case 4: *gpr = *(u32 *)run->mmio.data; break; | ||
269 | case 2: *gpr = *(u16 *)run->mmio.data; break; | ||
270 | case 1: *gpr = *(u8 *)run->mmio.data; break; | ||
271 | } | ||
272 | } else { | ||
273 | /* Convert BE data from userland back to LE. */ | ||
274 | switch (run->mmio.len) { | ||
275 | case 4: *gpr = ld_le32((u32 *)run->mmio.data); break; | ||
276 | case 2: *gpr = ld_le16((u16 *)run->mmio.data); break; | ||
277 | case 1: *gpr = *(u8 *)run->mmio.data; break; | ||
278 | } | ||
279 | } | ||
280 | } | ||
281 | |||
282 | int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
283 | unsigned int rt, unsigned int bytes, int is_bigendian) | ||
284 | { | ||
285 | if (bytes > sizeof(run->mmio.data)) { | ||
286 | printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, | ||
287 | run->mmio.len); | ||
288 | } | ||
289 | |||
290 | run->mmio.phys_addr = vcpu->arch.paddr_accessed; | ||
291 | run->mmio.len = bytes; | ||
292 | run->mmio.is_write = 0; | ||
293 | |||
294 | vcpu->arch.io_gpr = rt; | ||
295 | vcpu->arch.mmio_is_bigendian = is_bigendian; | ||
296 | vcpu->mmio_needed = 1; | ||
297 | vcpu->mmio_is_write = 0; | ||
298 | |||
299 | return EMULATE_DO_MMIO; | ||
300 | } | ||
301 | |||
302 | int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
303 | u32 val, unsigned int bytes, int is_bigendian) | ||
304 | { | ||
305 | void *data = run->mmio.data; | ||
306 | |||
307 | if (bytes > sizeof(run->mmio.data)) { | ||
308 | printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, | ||
309 | run->mmio.len); | ||
310 | } | ||
311 | |||
312 | run->mmio.phys_addr = vcpu->arch.paddr_accessed; | ||
313 | run->mmio.len = bytes; | ||
314 | run->mmio.is_write = 1; | ||
315 | vcpu->mmio_needed = 1; | ||
316 | vcpu->mmio_is_write = 1; | ||
317 | |||
318 | /* Store the value at the lowest bytes in 'data'. */ | ||
319 | if (is_bigendian) { | ||
320 | switch (bytes) { | ||
321 | case 4: *(u32 *)data = val; break; | ||
322 | case 2: *(u16 *)data = val; break; | ||
323 | case 1: *(u8 *)data = val; break; | ||
324 | } | ||
325 | } else { | ||
326 | /* Store LE value into 'data'. */ | ||
327 | switch (bytes) { | ||
328 | case 4: st_le32(data, val); break; | ||
329 | case 2: st_le16(data, val); break; | ||
330 | case 1: *(u8 *)data = val; break; | ||
331 | } | ||
332 | } | ||
333 | |||
334 | return EMULATE_DO_MMIO; | ||
335 | } | ||
336 | |||
337 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | ||
338 | { | ||
339 | int r; | ||
340 | sigset_t sigsaved; | ||
341 | |||
342 | if (vcpu->sigset_active) | ||
343 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | ||
344 | |||
345 | if (vcpu->mmio_needed) { | ||
346 | if (!vcpu->mmio_is_write) | ||
347 | kvmppc_complete_mmio_load(vcpu, run); | ||
348 | vcpu->mmio_needed = 0; | ||
349 | } else if (vcpu->arch.dcr_needed) { | ||
350 | if (!vcpu->arch.dcr_is_write) | ||
351 | kvmppc_complete_dcr_load(vcpu, run); | ||
352 | vcpu->arch.dcr_needed = 0; | ||
353 | } | ||
354 | |||
355 | kvmppc_check_and_deliver_interrupts(vcpu); | ||
356 | |||
357 | local_irq_disable(); | ||
358 | kvm_guest_enter(); | ||
359 | r = __kvmppc_vcpu_run(run, vcpu); | ||
360 | kvm_guest_exit(); | ||
361 | local_irq_enable(); | ||
362 | |||
363 | if (vcpu->sigset_active) | ||
364 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | ||
365 | |||
366 | return r; | ||
367 | } | ||
368 | |||
369 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) | ||
370 | { | ||
371 | kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL); | ||
372 | return 0; | ||
373 | } | ||
374 | |||
375 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | ||
376 | struct kvm_mp_state *mp_state) | ||
377 | { | ||
378 | return -EINVAL; | ||
379 | } | ||
380 | |||
381 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | ||
382 | struct kvm_mp_state *mp_state) | ||
383 | { | ||
384 | return -EINVAL; | ||
385 | } | ||
386 | |||
387 | long kvm_arch_vcpu_ioctl(struct file *filp, | ||
388 | unsigned int ioctl, unsigned long arg) | ||
389 | { | ||
390 | struct kvm_vcpu *vcpu = filp->private_data; | ||
391 | void __user *argp = (void __user *)arg; | ||
392 | long r; | ||
393 | |||
394 | switch (ioctl) { | ||
395 | case KVM_INTERRUPT: { | ||
396 | struct kvm_interrupt irq; | ||
397 | r = -EFAULT; | ||
398 | if (copy_from_user(&irq, argp, sizeof(irq))) | ||
399 | goto out; | ||
400 | r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); | ||
401 | break; | ||
402 | } | ||
403 | default: | ||
404 | r = -EINVAL; | ||
405 | } | ||
406 | |||
407 | out: | ||
408 | return r; | ||
409 | } | ||
410 | |||
411 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) | ||
412 | { | ||
413 | return -ENOTSUPP; | ||
414 | } | ||
415 | |||
416 | long kvm_arch_vm_ioctl(struct file *filp, | ||
417 | unsigned int ioctl, unsigned long arg) | ||
418 | { | ||
419 | long r; | ||
420 | |||
421 | switch (ioctl) { | ||
422 | default: | ||
423 | r = -EINVAL; | ||
424 | } | ||
425 | |||
426 | return r; | ||
427 | } | ||
428 | |||
429 | int kvm_arch_init(void *opaque) | ||
430 | { | ||
431 | return 0; | ||
432 | } | ||
433 | |||
434 | void kvm_arch_exit(void) | ||
435 | { | ||
436 | } | ||
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f6a68e178fc5..8f5f02160ffc 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig | |||
@@ -62,6 +62,10 @@ config GENERIC_LOCKBREAK | |||
62 | default y | 62 | default y |
63 | depends on SMP && PREEMPT | 63 | depends on SMP && PREEMPT |
64 | 64 | ||
65 | config PGSTE | ||
66 | bool | ||
67 | default y if KVM | ||
68 | |||
65 | mainmenu "Linux Kernel Configuration" | 69 | mainmenu "Linux Kernel Configuration" |
66 | 70 | ||
67 | config S390 | 71 | config S390 |
@@ -69,6 +73,7 @@ config S390 | |||
69 | select HAVE_OPROFILE | 73 | select HAVE_OPROFILE |
70 | select HAVE_KPROBES | 74 | select HAVE_KPROBES |
71 | select HAVE_KRETPROBES | 75 | select HAVE_KRETPROBES |
76 | select HAVE_KVM if 64BIT | ||
72 | 77 | ||
73 | source "init/Kconfig" | 78 | source "init/Kconfig" |
74 | 79 | ||
@@ -515,6 +520,13 @@ config ZFCPDUMP | |||
515 | Select this option if you want to build an zfcpdump enabled kernel. | 520 | Select this option if you want to build an zfcpdump enabled kernel. |
516 | Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. | 521 | Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. |
517 | 522 | ||
523 | config S390_GUEST | ||
524 | bool "s390 guest support (EXPERIMENTAL)" | ||
525 | depends on 64BIT && EXPERIMENTAL | ||
526 | select VIRTIO | ||
527 | select VIRTIO_RING | ||
528 | help | ||
529 | Select this option if you want to run the kernel under s390 linux | ||
518 | endmenu | 530 | endmenu |
519 | 531 | ||
520 | source "net/Kconfig" | 532 | source "net/Kconfig" |
@@ -536,3 +548,5 @@ source "security/Kconfig" | |||
536 | source "crypto/Kconfig" | 548 | source "crypto/Kconfig" |
537 | 549 | ||
538 | source "lib/Kconfig" | 550 | source "lib/Kconfig" |
551 | |||
552 | source "arch/s390/kvm/Kconfig" | ||
diff --git a/arch/s390/Makefile b/arch/s390/Makefile index f708be367b03..792a4e7743ce 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile | |||
@@ -87,7 +87,7 @@ LDFLAGS_vmlinux := -e start | |||
87 | head-y := arch/s390/kernel/head.o arch/s390/kernel/init_task.o | 87 | head-y := arch/s390/kernel/head.o arch/s390/kernel/init_task.o |
88 | 88 | ||
89 | core-y += arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \ | 89 | core-y += arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \ |
90 | arch/s390/appldata/ arch/s390/hypfs/ | 90 | arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/ |
91 | libs-y += arch/s390/lib/ | 91 | libs-y += arch/s390/lib/ |
92 | drivers-y += drivers/s390/ | 92 | drivers-y += drivers/s390/ |
93 | drivers-$(CONFIG_MATHEMU) += arch/s390/math-emu/ | 93 | drivers-$(CONFIG_MATHEMU) += arch/s390/math-emu/ |
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 540a67f979b6..68ec4083bf73 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c | |||
@@ -144,6 +144,10 @@ static noinline __init void detect_machine_type(void) | |||
144 | /* Running on a P/390 ? */ | 144 | /* Running on a P/390 ? */ |
145 | if (cpuinfo->cpu_id.machine == 0x7490) | 145 | if (cpuinfo->cpu_id.machine == 0x7490) |
146 | machine_flags |= 4; | 146 | machine_flags |= 4; |
147 | |||
148 | /* Running under KVM ? */ | ||
149 | if (cpuinfo->cpu_id.version == 0xfe) | ||
150 | machine_flags |= 64; | ||
147 | } | 151 | } |
148 | 152 | ||
149 | #ifdef CONFIG_64BIT | 153 | #ifdef CONFIG_64BIT |
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 7141147e6b63..a9d18aafa5f4 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c | |||
@@ -316,7 +316,11 @@ static int __init early_parse_ipldelay(char *p) | |||
316 | early_param("ipldelay", early_parse_ipldelay); | 316 | early_param("ipldelay", early_parse_ipldelay); |
317 | 317 | ||
318 | #ifdef CONFIG_S390_SWITCH_AMODE | 318 | #ifdef CONFIG_S390_SWITCH_AMODE |
319 | #ifdef CONFIG_PGSTE | ||
320 | unsigned int switch_amode = 1; | ||
321 | #else | ||
319 | unsigned int switch_amode = 0; | 322 | unsigned int switch_amode = 0; |
323 | #endif | ||
320 | EXPORT_SYMBOL_GPL(switch_amode); | 324 | EXPORT_SYMBOL_GPL(switch_amode); |
321 | 325 | ||
322 | static void set_amode_and_uaccess(unsigned long user_amode, | 326 | static void set_amode_and_uaccess(unsigned long user_amode, |
@@ -797,9 +801,13 @@ setup_arch(char **cmdline_p) | |||
797 | "This machine has an IEEE fpu\n" : | 801 | "This machine has an IEEE fpu\n" : |
798 | "This machine has no IEEE fpu\n"); | 802 | "This machine has no IEEE fpu\n"); |
799 | #else /* CONFIG_64BIT */ | 803 | #else /* CONFIG_64BIT */ |
800 | printk((MACHINE_IS_VM) ? | 804 | if (MACHINE_IS_VM) |
801 | "We are running under VM (64 bit mode)\n" : | 805 | printk("We are running under VM (64 bit mode)\n"); |
802 | "We are running native (64 bit mode)\n"); | 806 | else if (MACHINE_IS_KVM) { |
807 | printk("We are running under KVM (64 bit mode)\n"); | ||
808 | add_preferred_console("ttyS", 1, NULL); | ||
809 | } else | ||
810 | printk("We are running native (64 bit mode)\n"); | ||
803 | #endif /* CONFIG_64BIT */ | 811 | #endif /* CONFIG_64BIT */ |
804 | 812 | ||
805 | /* Save unparsed command line copy for /proc/cmdline */ | 813 | /* Save unparsed command line copy for /proc/cmdline */ |
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index c5f05b3fb2c3..ca90ee3f930e 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c | |||
@@ -110,6 +110,7 @@ void account_system_vtime(struct task_struct *tsk) | |||
110 | S390_lowcore.steal_clock -= cputime << 12; | 110 | S390_lowcore.steal_clock -= cputime << 12; |
111 | account_system_time(tsk, 0, cputime); | 111 | account_system_time(tsk, 0, cputime); |
112 | } | 112 | } |
113 | EXPORT_SYMBOL_GPL(account_system_vtime); | ||
113 | 114 | ||
114 | static inline void set_vtimer(__u64 expires) | 115 | static inline void set_vtimer(__u64 expires) |
115 | { | 116 | { |
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig new file mode 100644 index 000000000000..1761b74d639b --- /dev/null +++ b/arch/s390/kvm/Kconfig | |||
@@ -0,0 +1,46 @@ | |||
1 | # | ||
2 | # KVM configuration | ||
3 | # | ||
4 | config HAVE_KVM | ||
5 | bool | ||
6 | |||
7 | menuconfig VIRTUALIZATION | ||
8 | bool "Virtualization" | ||
9 | default y | ||
10 | ---help--- | ||
11 | Say Y here to get to see options for using your Linux host to run other | ||
12 | operating systems inside virtual machines (guests). | ||
13 | This option alone does not add any kernel code. | ||
14 | |||
15 | If you say N, all options in this submenu will be skipped and disabled. | ||
16 | |||
17 | if VIRTUALIZATION | ||
18 | |||
19 | config KVM | ||
20 | tristate "Kernel-based Virtual Machine (KVM) support" | ||
21 | depends on HAVE_KVM && EXPERIMENTAL | ||
22 | select PREEMPT_NOTIFIERS | ||
23 | select ANON_INODES | ||
24 | select S390_SWITCH_AMODE | ||
25 | select PREEMPT | ||
26 | ---help--- | ||
27 | Support hosting paravirtualized guest machines using the SIE | ||
28 | virtualization capability on the mainframe. This should work | ||
29 | on any 64bit machine. | ||
30 | |||
31 | This module provides access to the hardware capabilities through | ||
32 | a character device node named /dev/kvm. | ||
33 | |||
34 | To compile this as a module, choose M here: the module | ||
35 | will be called kvm. | ||
36 | |||
37 | If unsure, say N. | ||
38 | |||
39 | config KVM_TRACE | ||
40 | bool | ||
41 | |||
42 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under | ||
43 | # the virtualization menu. | ||
44 | source drivers/virtio/Kconfig | ||
45 | |||
46 | endif # VIRTUALIZATION | ||
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile new file mode 100644 index 000000000000..e5221ec0b8e3 --- /dev/null +++ b/arch/s390/kvm/Makefile | |||
@@ -0,0 +1,14 @@ | |||
1 | # Makefile for kernel virtual machines on s390 | ||
2 | # | ||
3 | # Copyright IBM Corp. 2008 | ||
4 | # | ||
5 | # This program is free software; you can redistribute it and/or modify | ||
6 | # it under the terms of the GNU General Public License (version 2 only) | ||
7 | # as published by the Free Software Foundation. | ||
8 | |||
9 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o) | ||
10 | |||
11 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm | ||
12 | |||
13 | kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o sigp.o diag.o | ||
14 | obj-$(CONFIG_KVM) += kvm.o | ||
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c new file mode 100644 index 000000000000..f639a152869f --- /dev/null +++ b/arch/s390/kvm/diag.c | |||
@@ -0,0 +1,67 @@ | |||
1 | /* | ||
2 | * diag.c - handling diagnose instructions | ||
3 | * | ||
4 | * Copyright IBM Corp. 2008 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License (version 2 only) | ||
8 | * as published by the Free Software Foundation. | ||
9 | * | ||
10 | * Author(s): Carsten Otte <cotte@de.ibm.com> | ||
11 | * Christian Borntraeger <borntraeger@de.ibm.com> | ||
12 | */ | ||
13 | |||
14 | #include <linux/kvm.h> | ||
15 | #include <linux/kvm_host.h> | ||
16 | #include "kvm-s390.h" | ||
17 | |||
18 | static int __diag_time_slice_end(struct kvm_vcpu *vcpu) | ||
19 | { | ||
20 | VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); | ||
21 | vcpu->stat.diagnose_44++; | ||
22 | vcpu_put(vcpu); | ||
23 | schedule(); | ||
24 | vcpu_load(vcpu); | ||
25 | return 0; | ||
26 | } | ||
27 | |||
28 | static int __diag_ipl_functions(struct kvm_vcpu *vcpu) | ||
29 | { | ||
30 | unsigned int reg = vcpu->arch.sie_block->ipa & 0xf; | ||
31 | unsigned long subcode = vcpu->arch.guest_gprs[reg] & 0xffff; | ||
32 | |||
33 | VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode); | ||
34 | switch (subcode) { | ||
35 | case 3: | ||
36 | vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR; | ||
37 | break; | ||
38 | case 4: | ||
39 | vcpu->run->s390_reset_flags = 0; | ||
40 | break; | ||
41 | default: | ||
42 | return -ENOTSUPP; | ||
43 | } | ||
44 | |||
45 | atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); | ||
46 | vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM; | ||
47 | vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL; | ||
48 | vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT; | ||
49 | vcpu->run->exit_reason = KVM_EXIT_S390_RESET; | ||
50 | VCPU_EVENT(vcpu, 3, "requesting userspace resets %lx", | ||
51 | vcpu->run->s390_reset_flags); | ||
52 | return -EREMOTE; | ||
53 | } | ||
54 | |||
55 | int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) | ||
56 | { | ||
57 | int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; | ||
58 | |||
59 | switch (code) { | ||
60 | case 0x44: | ||
61 | return __diag_time_slice_end(vcpu); | ||
62 | case 0x308: | ||
63 | return __diag_ipl_functions(vcpu); | ||
64 | default: | ||
65 | return -ENOTSUPP; | ||
66 | } | ||
67 | } | ||
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h new file mode 100644 index 000000000000..4e0633c413f3 --- /dev/null +++ b/arch/s390/kvm/gaccess.h | |||
@@ -0,0 +1,274 @@ | |||
1 | /* | ||
2 | * gaccess.h - access guest memory | ||
3 | * | ||
4 | * Copyright IBM Corp. 2008 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License (version 2 only) | ||
8 | * as published by the Free Software Foundation. | ||
9 | * | ||
10 | * Author(s): Carsten Otte <cotte@de.ibm.com> | ||
11 | */ | ||
12 | |||
13 | #ifndef __KVM_S390_GACCESS_H | ||
14 | #define __KVM_S390_GACCESS_H | ||
15 | |||
16 | #include <linux/compiler.h> | ||
17 | #include <linux/kvm_host.h> | ||
18 | #include <asm/uaccess.h> | ||
19 | |||
20 | static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu, | ||
21 | u64 guestaddr) | ||
22 | { | ||
23 | u64 prefix = vcpu->arch.sie_block->prefix; | ||
24 | u64 origin = vcpu->kvm->arch.guest_origin; | ||
25 | u64 memsize = vcpu->kvm->arch.guest_memsize; | ||
26 | |||
27 | if (guestaddr < 2 * PAGE_SIZE) | ||
28 | guestaddr += prefix; | ||
29 | else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE)) | ||
30 | guestaddr -= prefix; | ||
31 | |||
32 | if (guestaddr > memsize) | ||
33 | return (void __user __force *) ERR_PTR(-EFAULT); | ||
34 | |||
35 | guestaddr += origin; | ||
36 | |||
37 | return (void __user *) guestaddr; | ||
38 | } | ||
39 | |||
40 | static inline int get_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr, | ||
41 | u64 *result) | ||
42 | { | ||
43 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
44 | |||
45 | BUG_ON(guestaddr & 7); | ||
46 | |||
47 | if (IS_ERR((void __force *) uptr)) | ||
48 | return PTR_ERR((void __force *) uptr); | ||
49 | |||
50 | return get_user(*result, (u64 __user *) uptr); | ||
51 | } | ||
52 | |||
53 | static inline int get_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr, | ||
54 | u32 *result) | ||
55 | { | ||
56 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
57 | |||
58 | BUG_ON(guestaddr & 3); | ||
59 | |||
60 | if (IS_ERR((void __force *) uptr)) | ||
61 | return PTR_ERR((void __force *) uptr); | ||
62 | |||
63 | return get_user(*result, (u32 __user *) uptr); | ||
64 | } | ||
65 | |||
66 | static inline int get_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr, | ||
67 | u16 *result) | ||
68 | { | ||
69 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
70 | |||
71 | BUG_ON(guestaddr & 1); | ||
72 | |||
73 | if (IS_ERR(uptr)) | ||
74 | return PTR_ERR(uptr); | ||
75 | |||
76 | return get_user(*result, (u16 __user *) uptr); | ||
77 | } | ||
78 | |||
79 | static inline int get_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr, | ||
80 | u8 *result) | ||
81 | { | ||
82 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
83 | |||
84 | if (IS_ERR((void __force *) uptr)) | ||
85 | return PTR_ERR((void __force *) uptr); | ||
86 | |||
87 | return get_user(*result, (u8 __user *) uptr); | ||
88 | } | ||
89 | |||
90 | static inline int put_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr, | ||
91 | u64 value) | ||
92 | { | ||
93 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
94 | |||
95 | BUG_ON(guestaddr & 7); | ||
96 | |||
97 | if (IS_ERR((void __force *) uptr)) | ||
98 | return PTR_ERR((void __force *) uptr); | ||
99 | |||
100 | return put_user(value, (u64 __user *) uptr); | ||
101 | } | ||
102 | |||
103 | static inline int put_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr, | ||
104 | u32 value) | ||
105 | { | ||
106 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
107 | |||
108 | BUG_ON(guestaddr & 3); | ||
109 | |||
110 | if (IS_ERR((void __force *) uptr)) | ||
111 | return PTR_ERR((void __force *) uptr); | ||
112 | |||
113 | return put_user(value, (u32 __user *) uptr); | ||
114 | } | ||
115 | |||
116 | static inline int put_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr, | ||
117 | u16 value) | ||
118 | { | ||
119 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
120 | |||
121 | BUG_ON(guestaddr & 1); | ||
122 | |||
123 | if (IS_ERR((void __force *) uptr)) | ||
124 | return PTR_ERR((void __force *) uptr); | ||
125 | |||
126 | return put_user(value, (u16 __user *) uptr); | ||
127 | } | ||
128 | |||
129 | static inline int put_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr, | ||
130 | u8 value) | ||
131 | { | ||
132 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
133 | |||
134 | if (IS_ERR((void __force *) uptr)) | ||
135 | return PTR_ERR((void __force *) uptr); | ||
136 | |||
137 | return put_user(value, (u8 __user *) uptr); | ||
138 | } | ||
139 | |||
140 | |||
141 | static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, u64 guestdest, | ||
142 | const void *from, unsigned long n) | ||
143 | { | ||
144 | int rc; | ||
145 | unsigned long i; | ||
146 | const u8 *data = from; | ||
147 | |||
148 | for (i = 0; i < n; i++) { | ||
149 | rc = put_guest_u8(vcpu, guestdest++, *(data++)); | ||
150 | if (rc < 0) | ||
151 | return rc; | ||
152 | } | ||
153 | return 0; | ||
154 | } | ||
155 | |||
156 | static inline int copy_to_guest(struct kvm_vcpu *vcpu, u64 guestdest, | ||
157 | const void *from, unsigned long n) | ||
158 | { | ||
159 | u64 prefix = vcpu->arch.sie_block->prefix; | ||
160 | u64 origin = vcpu->kvm->arch.guest_origin; | ||
161 | u64 memsize = vcpu->kvm->arch.guest_memsize; | ||
162 | |||
163 | if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE)) | ||
164 | goto slowpath; | ||
165 | |||
166 | if ((guestdest < prefix) && (guestdest + n > prefix)) | ||
167 | goto slowpath; | ||
168 | |||
169 | if ((guestdest < prefix + 2 * PAGE_SIZE) | ||
170 | && (guestdest + n > prefix + 2 * PAGE_SIZE)) | ||
171 | goto slowpath; | ||
172 | |||
173 | if (guestdest < 2 * PAGE_SIZE) | ||
174 | guestdest += prefix; | ||
175 | else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE)) | ||
176 | guestdest -= prefix; | ||
177 | |||
178 | if (guestdest + n > memsize) | ||
179 | return -EFAULT; | ||
180 | |||
181 | if (guestdest + n < guestdest) | ||
182 | return -EFAULT; | ||
183 | |||
184 | guestdest += origin; | ||
185 | |||
186 | return copy_to_user((void __user *) guestdest, from, n); | ||
187 | slowpath: | ||
188 | return __copy_to_guest_slow(vcpu, guestdest, from, n); | ||
189 | } | ||
190 | |||
191 | static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to, | ||
192 | u64 guestsrc, unsigned long n) | ||
193 | { | ||
194 | int rc; | ||
195 | unsigned long i; | ||
196 | u8 *data = to; | ||
197 | |||
198 | for (i = 0; i < n; i++) { | ||
199 | rc = get_guest_u8(vcpu, guestsrc++, data++); | ||
200 | if (rc < 0) | ||
201 | return rc; | ||
202 | } | ||
203 | return 0; | ||
204 | } | ||
205 | |||
206 | static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to, | ||
207 | u64 guestsrc, unsigned long n) | ||
208 | { | ||
209 | u64 prefix = vcpu->arch.sie_block->prefix; | ||
210 | u64 origin = vcpu->kvm->arch.guest_origin; | ||
211 | u64 memsize = vcpu->kvm->arch.guest_memsize; | ||
212 | |||
213 | if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE)) | ||
214 | goto slowpath; | ||
215 | |||
216 | if ((guestsrc < prefix) && (guestsrc + n > prefix)) | ||
217 | goto slowpath; | ||
218 | |||
219 | if ((guestsrc < prefix + 2 * PAGE_SIZE) | ||
220 | && (guestsrc + n > prefix + 2 * PAGE_SIZE)) | ||
221 | goto slowpath; | ||
222 | |||
223 | if (guestsrc < 2 * PAGE_SIZE) | ||
224 | guestsrc += prefix; | ||
225 | else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE)) | ||
226 | guestsrc -= prefix; | ||
227 | |||
228 | if (guestsrc + n > memsize) | ||
229 | return -EFAULT; | ||
230 | |||
231 | if (guestsrc + n < guestsrc) | ||
232 | return -EFAULT; | ||
233 | |||
234 | guestsrc += origin; | ||
235 | |||
236 | return copy_from_user(to, (void __user *) guestsrc, n); | ||
237 | slowpath: | ||
238 | return __copy_from_guest_slow(vcpu, to, guestsrc, n); | ||
239 | } | ||
240 | |||
241 | static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, u64 guestdest, | ||
242 | const void *from, unsigned long n) | ||
243 | { | ||
244 | u64 origin = vcpu->kvm->arch.guest_origin; | ||
245 | u64 memsize = vcpu->kvm->arch.guest_memsize; | ||
246 | |||
247 | if (guestdest + n > memsize) | ||
248 | return -EFAULT; | ||
249 | |||
250 | if (guestdest + n < guestdest) | ||
251 | return -EFAULT; | ||
252 | |||
253 | guestdest += origin; | ||
254 | |||
255 | return copy_to_user((void __user *) guestdest, from, n); | ||
256 | } | ||
257 | |||
258 | static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to, | ||
259 | u64 guestsrc, unsigned long n) | ||
260 | { | ||
261 | u64 origin = vcpu->kvm->arch.guest_origin; | ||
262 | u64 memsize = vcpu->kvm->arch.guest_memsize; | ||
263 | |||
264 | if (guestsrc + n > memsize) | ||
265 | return -EFAULT; | ||
266 | |||
267 | if (guestsrc + n < guestsrc) | ||
268 | return -EFAULT; | ||
269 | |||
270 | guestsrc += origin; | ||
271 | |||
272 | return copy_from_user(to, (void __user *) guestsrc, n); | ||
273 | } | ||
274 | #endif | ||
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c new file mode 100644 index 000000000000..349581a26103 --- /dev/null +++ b/arch/s390/kvm/intercept.c | |||
@@ -0,0 +1,216 @@ | |||
1 | /* | ||
2 | * intercept.c - in-kernel handling for sie intercepts | ||
3 | * | ||
4 | * Copyright IBM Corp. 2008 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License (version 2 only) | ||
8 | * as published by the Free Software Foundation. | ||
9 | * | ||
10 | * Author(s): Carsten Otte <cotte@de.ibm.com> | ||
11 | * Christian Borntraeger <borntraeger@de.ibm.com> | ||
12 | */ | ||
13 | |||
14 | #include <linux/kvm_host.h> | ||
15 | #include <linux/errno.h> | ||
16 | #include <linux/pagemap.h> | ||
17 | |||
18 | #include <asm/kvm_host.h> | ||
19 | |||
20 | #include "kvm-s390.h" | ||
21 | #include "gaccess.h" | ||
22 | |||
23 | static int handle_lctg(struct kvm_vcpu *vcpu) | ||
24 | { | ||
25 | int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; | ||
26 | int reg3 = vcpu->arch.sie_block->ipa & 0x000f; | ||
27 | int base2 = vcpu->arch.sie_block->ipb >> 28; | ||
28 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + | ||
29 | ((vcpu->arch.sie_block->ipb & 0xff00) << 4); | ||
30 | u64 useraddr; | ||
31 | int reg, rc; | ||
32 | |||
33 | vcpu->stat.instruction_lctg++; | ||
34 | if ((vcpu->arch.sie_block->ipb & 0xff) != 0x2f) | ||
35 | return -ENOTSUPP; | ||
36 | |||
37 | useraddr = disp2; | ||
38 | if (base2) | ||
39 | useraddr += vcpu->arch.guest_gprs[base2]; | ||
40 | |||
41 | reg = reg1; | ||
42 | |||
43 | VCPU_EVENT(vcpu, 5, "lctg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2, | ||
44 | disp2); | ||
45 | |||
46 | do { | ||
47 | rc = get_guest_u64(vcpu, useraddr, | ||
48 | &vcpu->arch.sie_block->gcr[reg]); | ||
49 | if (rc == -EFAULT) { | ||
50 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
51 | break; | ||
52 | } | ||
53 | useraddr += 8; | ||
54 | if (reg == reg3) | ||
55 | break; | ||
56 | reg = (reg + 1) % 16; | ||
57 | } while (1); | ||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | static int handle_lctl(struct kvm_vcpu *vcpu) | ||
62 | { | ||
63 | int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; | ||
64 | int reg3 = vcpu->arch.sie_block->ipa & 0x000f; | ||
65 | int base2 = vcpu->arch.sie_block->ipb >> 28; | ||
66 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | ||
67 | u64 useraddr; | ||
68 | u32 val = 0; | ||
69 | int reg, rc; | ||
70 | |||
71 | vcpu->stat.instruction_lctl++; | ||
72 | |||
73 | useraddr = disp2; | ||
74 | if (base2) | ||
75 | useraddr += vcpu->arch.guest_gprs[base2]; | ||
76 | |||
77 | VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2, | ||
78 | disp2); | ||
79 | |||
80 | reg = reg1; | ||
81 | do { | ||
82 | rc = get_guest_u32(vcpu, useraddr, &val); | ||
83 | if (rc == -EFAULT) { | ||
84 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
85 | break; | ||
86 | } | ||
87 | vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; | ||
88 | vcpu->arch.sie_block->gcr[reg] |= val; | ||
89 | useraddr += 4; | ||
90 | if (reg == reg3) | ||
91 | break; | ||
92 | reg = (reg + 1) % 16; | ||
93 | } while (1); | ||
94 | return 0; | ||
95 | } | ||
96 | |||
97 | static intercept_handler_t instruction_handlers[256] = { | ||
98 | [0x83] = kvm_s390_handle_diag, | ||
99 | [0xae] = kvm_s390_handle_sigp, | ||
100 | [0xb2] = kvm_s390_handle_priv, | ||
101 | [0xb7] = handle_lctl, | ||
102 | [0xeb] = handle_lctg, | ||
103 | }; | ||
104 | |||
105 | static int handle_noop(struct kvm_vcpu *vcpu) | ||
106 | { | ||
107 | switch (vcpu->arch.sie_block->icptcode) { | ||
108 | case 0x10: | ||
109 | vcpu->stat.exit_external_request++; | ||
110 | break; | ||
111 | case 0x14: | ||
112 | vcpu->stat.exit_external_interrupt++; | ||
113 | break; | ||
114 | default: | ||
115 | break; /* nothing */ | ||
116 | } | ||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | static int handle_stop(struct kvm_vcpu *vcpu) | ||
121 | { | ||
122 | int rc; | ||
123 | |||
124 | vcpu->stat.exit_stop_request++; | ||
125 | atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); | ||
126 | spin_lock_bh(&vcpu->arch.local_int.lock); | ||
127 | if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { | ||
128 | vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; | ||
129 | rc = __kvm_s390_vcpu_store_status(vcpu, | ||
130 | KVM_S390_STORE_STATUS_NOADDR); | ||
131 | if (rc >= 0) | ||
132 | rc = -ENOTSUPP; | ||
133 | } | ||
134 | |||
135 | if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) { | ||
136 | vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP; | ||
137 | VCPU_EVENT(vcpu, 3, "%s", "cpu stopped"); | ||
138 | rc = -ENOTSUPP; | ||
139 | } else | ||
140 | rc = 0; | ||
141 | spin_unlock_bh(&vcpu->arch.local_int.lock); | ||
142 | return rc; | ||
143 | } | ||
144 | |||
145 | static int handle_validity(struct kvm_vcpu *vcpu) | ||
146 | { | ||
147 | int viwhy = vcpu->arch.sie_block->ipb >> 16; | ||
148 | vcpu->stat.exit_validity++; | ||
149 | if (viwhy == 0x37) { | ||
150 | fault_in_pages_writeable((char __user *) | ||
151 | vcpu->kvm->arch.guest_origin + | ||
152 | vcpu->arch.sie_block->prefix, | ||
153 | PAGE_SIZE); | ||
154 | return 0; | ||
155 | } | ||
156 | VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d", | ||
157 | viwhy); | ||
158 | return -ENOTSUPP; | ||
159 | } | ||
160 | |||
161 | static int handle_instruction(struct kvm_vcpu *vcpu) | ||
162 | { | ||
163 | intercept_handler_t handler; | ||
164 | |||
165 | vcpu->stat.exit_instruction++; | ||
166 | handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8]; | ||
167 | if (handler) | ||
168 | return handler(vcpu); | ||
169 | return -ENOTSUPP; | ||
170 | } | ||
171 | |||
172 | static int handle_prog(struct kvm_vcpu *vcpu) | ||
173 | { | ||
174 | vcpu->stat.exit_program_interruption++; | ||
175 | return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc); | ||
176 | } | ||
177 | |||
178 | static int handle_instruction_and_prog(struct kvm_vcpu *vcpu) | ||
179 | { | ||
180 | int rc, rc2; | ||
181 | |||
182 | vcpu->stat.exit_instr_and_program++; | ||
183 | rc = handle_instruction(vcpu); | ||
184 | rc2 = handle_prog(vcpu); | ||
185 | |||
186 | if (rc == -ENOTSUPP) | ||
187 | vcpu->arch.sie_block->icptcode = 0x04; | ||
188 | if (rc) | ||
189 | return rc; | ||
190 | return rc2; | ||
191 | } | ||
192 | |||
193 | static const intercept_handler_t intercept_funcs[0x48 >> 2] = { | ||
194 | [0x00 >> 2] = handle_noop, | ||
195 | [0x04 >> 2] = handle_instruction, | ||
196 | [0x08 >> 2] = handle_prog, | ||
197 | [0x0C >> 2] = handle_instruction_and_prog, | ||
198 | [0x10 >> 2] = handle_noop, | ||
199 | [0x14 >> 2] = handle_noop, | ||
200 | [0x1C >> 2] = kvm_s390_handle_wait, | ||
201 | [0x20 >> 2] = handle_validity, | ||
202 | [0x28 >> 2] = handle_stop, | ||
203 | }; | ||
204 | |||
205 | int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) | ||
206 | { | ||
207 | intercept_handler_t func; | ||
208 | u8 code = vcpu->arch.sie_block->icptcode; | ||
209 | |||
210 | if (code & 3 || code > 0x48) | ||
211 | return -ENOTSUPP; | ||
212 | func = intercept_funcs[code >> 2]; | ||
213 | if (func) | ||
214 | return func(vcpu); | ||
215 | return -ENOTSUPP; | ||
216 | } | ||
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c new file mode 100644 index 000000000000..fcd1ed8015c1 --- /dev/null +++ b/arch/s390/kvm/interrupt.c | |||
@@ -0,0 +1,592 @@ | |||
1 | /* | ||
2 | * interrupt.c - handling kvm guest interrupts | ||
3 | * | ||
4 | * Copyright IBM Corp. 2008 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License (version 2 only) | ||
8 | * as published by the Free Software Foundation. | ||
9 | * | ||
10 | * Author(s): Carsten Otte <cotte@de.ibm.com> | ||
11 | */ | ||
12 | |||
13 | #include <asm/lowcore.h> | ||
14 | #include <asm/uaccess.h> | ||
15 | #include <linux/kvm_host.h> | ||
16 | #include "kvm-s390.h" | ||
17 | #include "gaccess.h" | ||
18 | |||
19 | static int psw_extint_disabled(struct kvm_vcpu *vcpu) | ||
20 | { | ||
21 | return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); | ||
22 | } | ||
23 | |||
24 | static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) | ||
25 | { | ||
26 | if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) || | ||
27 | (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO) || | ||
28 | (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT)) | ||
29 | return 0; | ||
30 | return 1; | ||
31 | } | ||
32 | |||
33 | static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, | ||
34 | struct interrupt_info *inti) | ||
35 | { | ||
36 | switch (inti->type) { | ||
37 | case KVM_S390_INT_EMERGENCY: | ||
38 | if (psw_extint_disabled(vcpu)) | ||
39 | return 0; | ||
40 | if (vcpu->arch.sie_block->gcr[0] & 0x4000ul) | ||
41 | return 1; | ||
42 | return 0; | ||
43 | case KVM_S390_INT_SERVICE: | ||
44 | if (psw_extint_disabled(vcpu)) | ||
45 | return 0; | ||
46 | if (vcpu->arch.sie_block->gcr[0] & 0x200ul) | ||
47 | return 1; | ||
48 | return 0; | ||
49 | case KVM_S390_INT_VIRTIO: | ||
50 | if (psw_extint_disabled(vcpu)) | ||
51 | return 0; | ||
52 | if (vcpu->arch.sie_block->gcr[0] & 0x200ul) | ||
53 | return 1; | ||
54 | return 0; | ||
55 | case KVM_S390_PROGRAM_INT: | ||
56 | case KVM_S390_SIGP_STOP: | ||
57 | case KVM_S390_SIGP_SET_PREFIX: | ||
58 | case KVM_S390_RESTART: | ||
59 | return 1; | ||
60 | default: | ||
61 | BUG(); | ||
62 | } | ||
63 | return 0; | ||
64 | } | ||
65 | |||
66 | static void __set_cpu_idle(struct kvm_vcpu *vcpu) | ||
67 | { | ||
68 | BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1); | ||
69 | atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); | ||
70 | set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); | ||
71 | } | ||
72 | |||
73 | static void __unset_cpu_idle(struct kvm_vcpu *vcpu) | ||
74 | { | ||
75 | BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1); | ||
76 | atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); | ||
77 | clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); | ||
78 | } | ||
79 | |||
80 | static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) | ||
81 | { | ||
82 | atomic_clear_mask(CPUSTAT_ECALL_PEND | | ||
83 | CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT, | ||
84 | &vcpu->arch.sie_block->cpuflags); | ||
85 | vcpu->arch.sie_block->lctl = 0x0000; | ||
86 | } | ||
87 | |||
88 | static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) | ||
89 | { | ||
90 | atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags); | ||
91 | } | ||
92 | |||
93 | static void __set_intercept_indicator(struct kvm_vcpu *vcpu, | ||
94 | struct interrupt_info *inti) | ||
95 | { | ||
96 | switch (inti->type) { | ||
97 | case KVM_S390_INT_EMERGENCY: | ||
98 | case KVM_S390_INT_SERVICE: | ||
99 | case KVM_S390_INT_VIRTIO: | ||
100 | if (psw_extint_disabled(vcpu)) | ||
101 | __set_cpuflag(vcpu, CPUSTAT_EXT_INT); | ||
102 | else | ||
103 | vcpu->arch.sie_block->lctl |= LCTL_CR0; | ||
104 | break; | ||
105 | case KVM_S390_SIGP_STOP: | ||
106 | __set_cpuflag(vcpu, CPUSTAT_STOP_INT); | ||
107 | break; | ||
108 | default: | ||
109 | BUG(); | ||
110 | } | ||
111 | } | ||
112 | |||
113 | static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | ||
114 | struct interrupt_info *inti) | ||
115 | { | ||
116 | const unsigned short table[] = { 2, 4, 4, 6 }; | ||
117 | int rc, exception = 0; | ||
118 | |||
119 | switch (inti->type) { | ||
120 | case KVM_S390_INT_EMERGENCY: | ||
121 | VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg"); | ||
122 | vcpu->stat.deliver_emergency_signal++; | ||
123 | rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201); | ||
124 | if (rc == -EFAULT) | ||
125 | exception = 1; | ||
126 | |||
127 | rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, | ||
128 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | ||
129 | if (rc == -EFAULT) | ||
130 | exception = 1; | ||
131 | |||
132 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
133 | __LC_EXT_NEW_PSW, sizeof(psw_t)); | ||
134 | if (rc == -EFAULT) | ||
135 | exception = 1; | ||
136 | break; | ||
137 | |||
138 | case KVM_S390_INT_SERVICE: | ||
139 | VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", | ||
140 | inti->ext.ext_params); | ||
141 | vcpu->stat.deliver_service_signal++; | ||
142 | rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401); | ||
143 | if (rc == -EFAULT) | ||
144 | exception = 1; | ||
145 | |||
146 | rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, | ||
147 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | ||
148 | if (rc == -EFAULT) | ||
149 | exception = 1; | ||
150 | |||
151 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
152 | __LC_EXT_NEW_PSW, sizeof(psw_t)); | ||
153 | if (rc == -EFAULT) | ||
154 | exception = 1; | ||
155 | |||
156 | rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); | ||
157 | if (rc == -EFAULT) | ||
158 | exception = 1; | ||
159 | break; | ||
160 | |||
161 | case KVM_S390_INT_VIRTIO: | ||
162 | VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%lx", | ||
163 | inti->ext.ext_params, inti->ext.ext_params2); | ||
164 | vcpu->stat.deliver_virtio_interrupt++; | ||
165 | rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603); | ||
166 | if (rc == -EFAULT) | ||
167 | exception = 1; | ||
168 | |||
169 | rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, 0x0d00); | ||
170 | if (rc == -EFAULT) | ||
171 | exception = 1; | ||
172 | |||
173 | rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, | ||
174 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | ||
175 | if (rc == -EFAULT) | ||
176 | exception = 1; | ||
177 | |||
178 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
179 | __LC_EXT_NEW_PSW, sizeof(psw_t)); | ||
180 | if (rc == -EFAULT) | ||
181 | exception = 1; | ||
182 | |||
183 | rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); | ||
184 | if (rc == -EFAULT) | ||
185 | exception = 1; | ||
186 | |||
187 | rc = put_guest_u64(vcpu, __LC_PFAULT_INTPARM, | ||
188 | inti->ext.ext_params2); | ||
189 | if (rc == -EFAULT) | ||
190 | exception = 1; | ||
191 | break; | ||
192 | |||
193 | case KVM_S390_SIGP_STOP: | ||
194 | VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); | ||
195 | vcpu->stat.deliver_stop_signal++; | ||
196 | __set_intercept_indicator(vcpu, inti); | ||
197 | break; | ||
198 | |||
199 | case KVM_S390_SIGP_SET_PREFIX: | ||
200 | VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", | ||
201 | inti->prefix.address); | ||
202 | vcpu->stat.deliver_prefix_signal++; | ||
203 | vcpu->arch.sie_block->prefix = inti->prefix.address; | ||
204 | vcpu->arch.sie_block->ihcpu = 0xffff; | ||
205 | break; | ||
206 | |||
207 | case KVM_S390_RESTART: | ||
208 | VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart"); | ||
209 | vcpu->stat.deliver_restart_signal++; | ||
210 | rc = copy_to_guest(vcpu, offsetof(struct _lowcore, | ||
211 | restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | ||
212 | if (rc == -EFAULT) | ||
213 | exception = 1; | ||
214 | |||
215 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
216 | offsetof(struct _lowcore, restart_psw), sizeof(psw_t)); | ||
217 | if (rc == -EFAULT) | ||
218 | exception = 1; | ||
219 | break; | ||
220 | |||
221 | case KVM_S390_PROGRAM_INT: | ||
222 | VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", | ||
223 | inti->pgm.code, | ||
224 | table[vcpu->arch.sie_block->ipa >> 14]); | ||
225 | vcpu->stat.deliver_program_int++; | ||
226 | rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code); | ||
227 | if (rc == -EFAULT) | ||
228 | exception = 1; | ||
229 | |||
230 | rc = put_guest_u16(vcpu, __LC_PGM_ILC, | ||
231 | table[vcpu->arch.sie_block->ipa >> 14]); | ||
232 | if (rc == -EFAULT) | ||
233 | exception = 1; | ||
234 | |||
235 | rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW, | ||
236 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | ||
237 | if (rc == -EFAULT) | ||
238 | exception = 1; | ||
239 | |||
240 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
241 | __LC_PGM_NEW_PSW, sizeof(psw_t)); | ||
242 | if (rc == -EFAULT) | ||
243 | exception = 1; | ||
244 | break; | ||
245 | |||
246 | default: | ||
247 | BUG(); | ||
248 | } | ||
249 | |||
250 | if (exception) { | ||
251 | VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering" | ||
252 | " interrupt"); | ||
253 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
254 | if (inti->type == KVM_S390_PROGRAM_INT) { | ||
255 | printk(KERN_WARNING "kvm: recursive program check\n"); | ||
256 | BUG(); | ||
257 | } | ||
258 | } | ||
259 | } | ||
260 | |||
261 | static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) | ||
262 | { | ||
263 | int rc, exception = 0; | ||
264 | |||
265 | if (psw_extint_disabled(vcpu)) | ||
266 | return 0; | ||
267 | if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul)) | ||
268 | return 0; | ||
269 | rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004); | ||
270 | if (rc == -EFAULT) | ||
271 | exception = 1; | ||
272 | rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, | ||
273 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | ||
274 | if (rc == -EFAULT) | ||
275 | exception = 1; | ||
276 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
277 | __LC_EXT_NEW_PSW, sizeof(psw_t)); | ||
278 | if (rc == -EFAULT) | ||
279 | exception = 1; | ||
280 | |||
281 | if (exception) { | ||
282 | VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering" \ | ||
283 | " ckc interrupt"); | ||
284 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
285 | return 0; | ||
286 | } | ||
287 | |||
288 | return 1; | ||
289 | } | ||
290 | |||
291 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) | ||
292 | { | ||
293 | struct local_interrupt *li = &vcpu->arch.local_int; | ||
294 | struct float_interrupt *fi = vcpu->arch.local_int.float_int; | ||
295 | struct interrupt_info *inti; | ||
296 | int rc = 0; | ||
297 | |||
298 | if (atomic_read(&li->active)) { | ||
299 | spin_lock_bh(&li->lock); | ||
300 | list_for_each_entry(inti, &li->list, list) | ||
301 | if (__interrupt_is_deliverable(vcpu, inti)) { | ||
302 | rc = 1; | ||
303 | break; | ||
304 | } | ||
305 | spin_unlock_bh(&li->lock); | ||
306 | } | ||
307 | |||
308 | if ((!rc) && atomic_read(&fi->active)) { | ||
309 | spin_lock_bh(&fi->lock); | ||
310 | list_for_each_entry(inti, &fi->list, list) | ||
311 | if (__interrupt_is_deliverable(vcpu, inti)) { | ||
312 | rc = 1; | ||
313 | break; | ||
314 | } | ||
315 | spin_unlock_bh(&fi->lock); | ||
316 | } | ||
317 | |||
318 | if ((!rc) && (vcpu->arch.sie_block->ckc < | ||
319 | get_clock() + vcpu->arch.sie_block->epoch)) { | ||
320 | if ((!psw_extint_disabled(vcpu)) && | ||
321 | (vcpu->arch.sie_block->gcr[0] & 0x800ul)) | ||
322 | rc = 1; | ||
323 | } | ||
324 | |||
325 | return rc; | ||
326 | } | ||
327 | |||
328 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | ||
329 | { | ||
330 | return 0; | ||
331 | } | ||
332 | |||
333 | int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) | ||
334 | { | ||
335 | u64 now, sltime; | ||
336 | DECLARE_WAITQUEUE(wait, current); | ||
337 | |||
338 | vcpu->stat.exit_wait_state++; | ||
339 | if (kvm_cpu_has_interrupt(vcpu)) | ||
340 | return 0; | ||
341 | |||
342 | if (psw_interrupts_disabled(vcpu)) { | ||
343 | VCPU_EVENT(vcpu, 3, "%s", "disabled wait"); | ||
344 | __unset_cpu_idle(vcpu); | ||
345 | return -ENOTSUPP; /* disabled wait */ | ||
346 | } | ||
347 | |||
348 | if (psw_extint_disabled(vcpu) || | ||
349 | (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))) { | ||
350 | VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); | ||
351 | goto no_timer; | ||
352 | } | ||
353 | |||
354 | now = get_clock() + vcpu->arch.sie_block->epoch; | ||
355 | if (vcpu->arch.sie_block->ckc < now) { | ||
356 | __unset_cpu_idle(vcpu); | ||
357 | return 0; | ||
358 | } | ||
359 | |||
360 | sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1; | ||
361 | |||
362 | vcpu->arch.ckc_timer.expires = jiffies + sltime; | ||
363 | |||
364 | add_timer(&vcpu->arch.ckc_timer); | ||
365 | VCPU_EVENT(vcpu, 5, "enabled wait timer:%lx jiffies", sltime); | ||
366 | no_timer: | ||
367 | spin_lock_bh(&vcpu->arch.local_int.float_int->lock); | ||
368 | spin_lock_bh(&vcpu->arch.local_int.lock); | ||
369 | __set_cpu_idle(vcpu); | ||
370 | vcpu->arch.local_int.timer_due = 0; | ||
371 | add_wait_queue(&vcpu->arch.local_int.wq, &wait); | ||
372 | while (list_empty(&vcpu->arch.local_int.list) && | ||
373 | list_empty(&vcpu->arch.local_int.float_int->list) && | ||
374 | (!vcpu->arch.local_int.timer_due) && | ||
375 | !signal_pending(current)) { | ||
376 | set_current_state(TASK_INTERRUPTIBLE); | ||
377 | spin_unlock_bh(&vcpu->arch.local_int.lock); | ||
378 | spin_unlock_bh(&vcpu->arch.local_int.float_int->lock); | ||
379 | vcpu_put(vcpu); | ||
380 | schedule(); | ||
381 | vcpu_load(vcpu); | ||
382 | spin_lock_bh(&vcpu->arch.local_int.float_int->lock); | ||
383 | spin_lock_bh(&vcpu->arch.local_int.lock); | ||
384 | } | ||
385 | __unset_cpu_idle(vcpu); | ||
386 | __set_current_state(TASK_RUNNING); | ||
387 | remove_wait_queue(&vcpu->wq, &wait); | ||
388 | spin_unlock_bh(&vcpu->arch.local_int.lock); | ||
389 | spin_unlock_bh(&vcpu->arch.local_int.float_int->lock); | ||
390 | del_timer(&vcpu->arch.ckc_timer); | ||
391 | return 0; | ||
392 | } | ||
393 | |||
394 | void kvm_s390_idle_wakeup(unsigned long data) | ||
395 | { | ||
396 | struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; | ||
397 | |||
398 | spin_lock_bh(&vcpu->arch.local_int.lock); | ||
399 | vcpu->arch.local_int.timer_due = 1; | ||
400 | if (waitqueue_active(&vcpu->arch.local_int.wq)) | ||
401 | wake_up_interruptible(&vcpu->arch.local_int.wq); | ||
402 | spin_unlock_bh(&vcpu->arch.local_int.lock); | ||
403 | } | ||
404 | |||
405 | |||
406 | void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) | ||
407 | { | ||
408 | struct local_interrupt *li = &vcpu->arch.local_int; | ||
409 | struct float_interrupt *fi = vcpu->arch.local_int.float_int; | ||
410 | struct interrupt_info *n, *inti = NULL; | ||
411 | int deliver; | ||
412 | |||
413 | __reset_intercept_indicators(vcpu); | ||
414 | if (atomic_read(&li->active)) { | ||
415 | do { | ||
416 | deliver = 0; | ||
417 | spin_lock_bh(&li->lock); | ||
418 | list_for_each_entry_safe(inti, n, &li->list, list) { | ||
419 | if (__interrupt_is_deliverable(vcpu, inti)) { | ||
420 | list_del(&inti->list); | ||
421 | deliver = 1; | ||
422 | break; | ||
423 | } | ||
424 | __set_intercept_indicator(vcpu, inti); | ||
425 | } | ||
426 | if (list_empty(&li->list)) | ||
427 | atomic_set(&li->active, 0); | ||
428 | spin_unlock_bh(&li->lock); | ||
429 | if (deliver) { | ||
430 | __do_deliver_interrupt(vcpu, inti); | ||
431 | kfree(inti); | ||
432 | } | ||
433 | } while (deliver); | ||
434 | } | ||
435 | |||
436 | if ((vcpu->arch.sie_block->ckc < | ||
437 | get_clock() + vcpu->arch.sie_block->epoch)) | ||
438 | __try_deliver_ckc_interrupt(vcpu); | ||
439 | |||
440 | if (atomic_read(&fi->active)) { | ||
441 | do { | ||
442 | deliver = 0; | ||
443 | spin_lock_bh(&fi->lock); | ||
444 | list_for_each_entry_safe(inti, n, &fi->list, list) { | ||
445 | if (__interrupt_is_deliverable(vcpu, inti)) { | ||
446 | list_del(&inti->list); | ||
447 | deliver = 1; | ||
448 | break; | ||
449 | } | ||
450 | __set_intercept_indicator(vcpu, inti); | ||
451 | } | ||
452 | if (list_empty(&fi->list)) | ||
453 | atomic_set(&fi->active, 0); | ||
454 | spin_unlock_bh(&fi->lock); | ||
455 | if (deliver) { | ||
456 | __do_deliver_interrupt(vcpu, inti); | ||
457 | kfree(inti); | ||
458 | } | ||
459 | } while (deliver); | ||
460 | } | ||
461 | } | ||
462 | |||
463 | int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) | ||
464 | { | ||
465 | struct local_interrupt *li = &vcpu->arch.local_int; | ||
466 | struct interrupt_info *inti; | ||
467 | |||
468 | inti = kzalloc(sizeof(*inti), GFP_KERNEL); | ||
469 | if (!inti) | ||
470 | return -ENOMEM; | ||
471 | |||
472 | inti->type = KVM_S390_PROGRAM_INT;; | ||
473 | inti->pgm.code = code; | ||
474 | |||
475 | VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code); | ||
476 | spin_lock_bh(&li->lock); | ||
477 | list_add(&inti->list, &li->list); | ||
478 | atomic_set(&li->active, 1); | ||
479 | BUG_ON(waitqueue_active(&li->wq)); | ||
480 | spin_unlock_bh(&li->lock); | ||
481 | return 0; | ||
482 | } | ||
483 | |||
484 | int kvm_s390_inject_vm(struct kvm *kvm, | ||
485 | struct kvm_s390_interrupt *s390int) | ||
486 | { | ||
487 | struct local_interrupt *li; | ||
488 | struct float_interrupt *fi; | ||
489 | struct interrupt_info *inti; | ||
490 | int sigcpu; | ||
491 | |||
492 | inti = kzalloc(sizeof(*inti), GFP_KERNEL); | ||
493 | if (!inti) | ||
494 | return -ENOMEM; | ||
495 | |||
496 | switch (s390int->type) { | ||
497 | case KVM_S390_INT_VIRTIO: | ||
498 | VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%lx", | ||
499 | s390int->parm, s390int->parm64); | ||
500 | inti->type = s390int->type; | ||
501 | inti->ext.ext_params = s390int->parm; | ||
502 | inti->ext.ext_params2 = s390int->parm64; | ||
503 | break; | ||
504 | case KVM_S390_INT_SERVICE: | ||
505 | VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm); | ||
506 | inti->type = s390int->type; | ||
507 | inti->ext.ext_params = s390int->parm; | ||
508 | break; | ||
509 | case KVM_S390_PROGRAM_INT: | ||
510 | case KVM_S390_SIGP_STOP: | ||
511 | case KVM_S390_INT_EMERGENCY: | ||
512 | default: | ||
513 | kfree(inti); | ||
514 | return -EINVAL; | ||
515 | } | ||
516 | |||
517 | mutex_lock(&kvm->lock); | ||
518 | fi = &kvm->arch.float_int; | ||
519 | spin_lock_bh(&fi->lock); | ||
520 | list_add_tail(&inti->list, &fi->list); | ||
521 | atomic_set(&fi->active, 1); | ||
522 | sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); | ||
523 | if (sigcpu == KVM_MAX_VCPUS) { | ||
524 | do { | ||
525 | sigcpu = fi->next_rr_cpu++; | ||
526 | if (sigcpu == KVM_MAX_VCPUS) | ||
527 | sigcpu = fi->next_rr_cpu = 0; | ||
528 | } while (fi->local_int[sigcpu] == NULL); | ||
529 | } | ||
530 | li = fi->local_int[sigcpu]; | ||
531 | spin_lock_bh(&li->lock); | ||
532 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); | ||
533 | if (waitqueue_active(&li->wq)) | ||
534 | wake_up_interruptible(&li->wq); | ||
535 | spin_unlock_bh(&li->lock); | ||
536 | spin_unlock_bh(&fi->lock); | ||
537 | mutex_unlock(&kvm->lock); | ||
538 | return 0; | ||
539 | } | ||
540 | |||
541 | int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, | ||
542 | struct kvm_s390_interrupt *s390int) | ||
543 | { | ||
544 | struct local_interrupt *li; | ||
545 | struct interrupt_info *inti; | ||
546 | |||
547 | inti = kzalloc(sizeof(*inti), GFP_KERNEL); | ||
548 | if (!inti) | ||
549 | return -ENOMEM; | ||
550 | |||
551 | switch (s390int->type) { | ||
552 | case KVM_S390_PROGRAM_INT: | ||
553 | if (s390int->parm & 0xffff0000) { | ||
554 | kfree(inti); | ||
555 | return -EINVAL; | ||
556 | } | ||
557 | inti->type = s390int->type; | ||
558 | inti->pgm.code = s390int->parm; | ||
559 | VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)", | ||
560 | s390int->parm); | ||
561 | break; | ||
562 | case KVM_S390_SIGP_STOP: | ||
563 | case KVM_S390_RESTART: | ||
564 | case KVM_S390_SIGP_SET_PREFIX: | ||
565 | case KVM_S390_INT_EMERGENCY: | ||
566 | VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); | ||
567 | inti->type = s390int->type; | ||
568 | break; | ||
569 | case KVM_S390_INT_VIRTIO: | ||
570 | case KVM_S390_INT_SERVICE: | ||
571 | default: | ||
572 | kfree(inti); | ||
573 | return -EINVAL; | ||
574 | } | ||
575 | |||
576 | mutex_lock(&vcpu->kvm->lock); | ||
577 | li = &vcpu->arch.local_int; | ||
578 | spin_lock_bh(&li->lock); | ||
579 | if (inti->type == KVM_S390_PROGRAM_INT) | ||
580 | list_add(&inti->list, &li->list); | ||
581 | else | ||
582 | list_add_tail(&inti->list, &li->list); | ||
583 | atomic_set(&li->active, 1); | ||
584 | if (inti->type == KVM_S390_SIGP_STOP) | ||
585 | li->action_bits |= ACTION_STOP_ON_STOP; | ||
586 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); | ||
587 | if (waitqueue_active(&li->wq)) | ||
588 | wake_up_interruptible(&vcpu->arch.local_int.wq); | ||
589 | spin_unlock_bh(&li->lock); | ||
590 | mutex_unlock(&vcpu->kvm->lock); | ||
591 | return 0; | ||
592 | } | ||
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c new file mode 100644 index 000000000000..98d1e73e01f1 --- /dev/null +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -0,0 +1,685 @@ | |||
1 | /* | ||
2 | * s390host.c -- hosting zSeries kernel virtual machines | ||
3 | * | ||
4 | * Copyright IBM Corp. 2008 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License (version 2 only) | ||
8 | * as published by the Free Software Foundation. | ||
9 | * | ||
10 | * Author(s): Carsten Otte <cotte@de.ibm.com> | ||
11 | * Christian Borntraeger <borntraeger@de.ibm.com> | ||
12 | * Heiko Carstens <heiko.carstens@de.ibm.com> | ||
13 | */ | ||
14 | |||
15 | #include <linux/compiler.h> | ||
16 | #include <linux/err.h> | ||
17 | #include <linux/fs.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/kvm.h> | ||
20 | #include <linux/kvm_host.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/slab.h> | ||
23 | #include <linux/timer.h> | ||
24 | #include <asm/lowcore.h> | ||
25 | #include <asm/pgtable.h> | ||
26 | |||
27 | #include "kvm-s390.h" | ||
28 | #include "gaccess.h" | ||
29 | |||
30 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU | ||
31 | |||
32 | struct kvm_stats_debugfs_item debugfs_entries[] = { | ||
33 | { "userspace_handled", VCPU_STAT(exit_userspace) }, | ||
34 | { "exit_validity", VCPU_STAT(exit_validity) }, | ||
35 | { "exit_stop_request", VCPU_STAT(exit_stop_request) }, | ||
36 | { "exit_external_request", VCPU_STAT(exit_external_request) }, | ||
37 | { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, | ||
38 | { "exit_instruction", VCPU_STAT(exit_instruction) }, | ||
39 | { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, | ||
40 | { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, | ||
41 | { "instruction_lctg", VCPU_STAT(instruction_lctg) }, | ||
42 | { "instruction_lctl", VCPU_STAT(instruction_lctl) }, | ||
43 | { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, | ||
44 | { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, | ||
45 | { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, | ||
46 | { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, | ||
47 | { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, | ||
48 | { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, | ||
49 | { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, | ||
50 | { "exit_wait_state", VCPU_STAT(exit_wait_state) }, | ||
51 | { "instruction_stidp", VCPU_STAT(instruction_stidp) }, | ||
52 | { "instruction_spx", VCPU_STAT(instruction_spx) }, | ||
53 | { "instruction_stpx", VCPU_STAT(instruction_stpx) }, | ||
54 | { "instruction_stap", VCPU_STAT(instruction_stap) }, | ||
55 | { "instruction_storage_key", VCPU_STAT(instruction_storage_key) }, | ||
56 | { "instruction_stsch", VCPU_STAT(instruction_stsch) }, | ||
57 | { "instruction_chsc", VCPU_STAT(instruction_chsc) }, | ||
58 | { "instruction_stsi", VCPU_STAT(instruction_stsi) }, | ||
59 | { "instruction_stfl", VCPU_STAT(instruction_stfl) }, | ||
60 | { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, | ||
61 | { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, | ||
62 | { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, | ||
63 | { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, | ||
64 | { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, | ||
65 | { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, | ||
66 | { "diagnose_44", VCPU_STAT(diagnose_44) }, | ||
67 | { NULL } | ||
68 | }; | ||
69 | |||
70 | |||
71 | /* Section: not file related */ | ||
72 | void kvm_arch_hardware_enable(void *garbage) | ||
73 | { | ||
74 | /* every s390 is virtualization enabled ;-) */ | ||
75 | } | ||
76 | |||
77 | void kvm_arch_hardware_disable(void *garbage) | ||
78 | { | ||
79 | } | ||
80 | |||
81 | void decache_vcpus_on_cpu(int cpu) | ||
82 | { | ||
83 | } | ||
84 | |||
85 | int kvm_arch_hardware_setup(void) | ||
86 | { | ||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | void kvm_arch_hardware_unsetup(void) | ||
91 | { | ||
92 | } | ||
93 | |||
94 | void kvm_arch_check_processor_compat(void *rtn) | ||
95 | { | ||
96 | } | ||
97 | |||
98 | int kvm_arch_init(void *opaque) | ||
99 | { | ||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | void kvm_arch_exit(void) | ||
104 | { | ||
105 | } | ||
106 | |||
107 | /* Section: device related */ | ||
108 | long kvm_arch_dev_ioctl(struct file *filp, | ||
109 | unsigned int ioctl, unsigned long arg) | ||
110 | { | ||
111 | if (ioctl == KVM_S390_ENABLE_SIE) | ||
112 | return s390_enable_sie(); | ||
113 | return -EINVAL; | ||
114 | } | ||
115 | |||
116 | int kvm_dev_ioctl_check_extension(long ext) | ||
117 | { | ||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | /* Section: vm related */ | ||
122 | /* | ||
123 | * Get (and clear) the dirty memory log for a memory slot. | ||
124 | */ | ||
125 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | ||
126 | struct kvm_dirty_log *log) | ||
127 | { | ||
128 | return 0; | ||
129 | } | ||
130 | |||
131 | long kvm_arch_vm_ioctl(struct file *filp, | ||
132 | unsigned int ioctl, unsigned long arg) | ||
133 | { | ||
134 | struct kvm *kvm = filp->private_data; | ||
135 | void __user *argp = (void __user *)arg; | ||
136 | int r; | ||
137 | |||
138 | switch (ioctl) { | ||
139 | case KVM_S390_INTERRUPT: { | ||
140 | struct kvm_s390_interrupt s390int; | ||
141 | |||
142 | r = -EFAULT; | ||
143 | if (copy_from_user(&s390int, argp, sizeof(s390int))) | ||
144 | break; | ||
145 | r = kvm_s390_inject_vm(kvm, &s390int); | ||
146 | break; | ||
147 | } | ||
148 | default: | ||
149 | r = -EINVAL; | ||
150 | } | ||
151 | |||
152 | return r; | ||
153 | } | ||
154 | |||
155 | struct kvm *kvm_arch_create_vm(void) | ||
156 | { | ||
157 | struct kvm *kvm; | ||
158 | int rc; | ||
159 | char debug_name[16]; | ||
160 | |||
161 | rc = s390_enable_sie(); | ||
162 | if (rc) | ||
163 | goto out_nokvm; | ||
164 | |||
165 | rc = -ENOMEM; | ||
166 | kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); | ||
167 | if (!kvm) | ||
168 | goto out_nokvm; | ||
169 | |||
170 | kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); | ||
171 | if (!kvm->arch.sca) | ||
172 | goto out_nosca; | ||
173 | |||
174 | sprintf(debug_name, "kvm-%u", current->pid); | ||
175 | |||
176 | kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long)); | ||
177 | if (!kvm->arch.dbf) | ||
178 | goto out_nodbf; | ||
179 | |||
180 | spin_lock_init(&kvm->arch.float_int.lock); | ||
181 | INIT_LIST_HEAD(&kvm->arch.float_int.list); | ||
182 | |||
183 | debug_register_view(kvm->arch.dbf, &debug_sprintf_view); | ||
184 | VM_EVENT(kvm, 3, "%s", "vm created"); | ||
185 | |||
186 | try_module_get(THIS_MODULE); | ||
187 | |||
188 | return kvm; | ||
189 | out_nodbf: | ||
190 | free_page((unsigned long)(kvm->arch.sca)); | ||
191 | out_nosca: | ||
192 | kfree(kvm); | ||
193 | out_nokvm: | ||
194 | return ERR_PTR(rc); | ||
195 | } | ||
196 | |||
197 | void kvm_arch_destroy_vm(struct kvm *kvm) | ||
198 | { | ||
199 | debug_unregister(kvm->arch.dbf); | ||
200 | free_page((unsigned long)(kvm->arch.sca)); | ||
201 | kfree(kvm); | ||
202 | module_put(THIS_MODULE); | ||
203 | } | ||
204 | |||
205 | /* Section: vcpu related */ | ||
206 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | ||
207 | { | ||
208 | return 0; | ||
209 | } | ||
210 | |||
211 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | ||
212 | { | ||
213 | /* kvm common code refers to this, but does'nt call it */ | ||
214 | BUG(); | ||
215 | } | ||
216 | |||
217 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | ||
218 | { | ||
219 | save_fp_regs(&vcpu->arch.host_fpregs); | ||
220 | save_access_regs(vcpu->arch.host_acrs); | ||
221 | vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK; | ||
222 | restore_fp_regs(&vcpu->arch.guest_fpregs); | ||
223 | restore_access_regs(vcpu->arch.guest_acrs); | ||
224 | |||
225 | if (signal_pending(current)) | ||
226 | atomic_set_mask(CPUSTAT_STOP_INT, | ||
227 | &vcpu->arch.sie_block->cpuflags); | ||
228 | } | ||
229 | |||
230 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | ||
231 | { | ||
232 | save_fp_regs(&vcpu->arch.guest_fpregs); | ||
233 | save_access_regs(vcpu->arch.guest_acrs); | ||
234 | restore_fp_regs(&vcpu->arch.host_fpregs); | ||
235 | restore_access_regs(vcpu->arch.host_acrs); | ||
236 | } | ||
237 | |||
238 | static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) | ||
239 | { | ||
240 | /* this equals initial cpu reset in pop, but we don't switch to ESA */ | ||
241 | vcpu->arch.sie_block->gpsw.mask = 0UL; | ||
242 | vcpu->arch.sie_block->gpsw.addr = 0UL; | ||
243 | vcpu->arch.sie_block->prefix = 0UL; | ||
244 | vcpu->arch.sie_block->ihcpu = 0xffff; | ||
245 | vcpu->arch.sie_block->cputm = 0UL; | ||
246 | vcpu->arch.sie_block->ckc = 0UL; | ||
247 | vcpu->arch.sie_block->todpr = 0; | ||
248 | memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); | ||
249 | vcpu->arch.sie_block->gcr[0] = 0xE0UL; | ||
250 | vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; | ||
251 | vcpu->arch.guest_fpregs.fpc = 0; | ||
252 | asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); | ||
253 | vcpu->arch.sie_block->gbea = 1; | ||
254 | } | ||
255 | |||
256 | int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | ||
257 | { | ||
258 | atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH); | ||
259 | vcpu->arch.sie_block->gmslm = 0xffffffffffUL; | ||
260 | vcpu->arch.sie_block->gmsor = 0x000000000000; | ||
261 | vcpu->arch.sie_block->ecb = 2; | ||
262 | vcpu->arch.sie_block->eca = 0xC1002001U; | ||
263 | setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, | ||
264 | (unsigned long) vcpu); | ||
265 | get_cpu_id(&vcpu->arch.cpu_id); | ||
266 | vcpu->arch.cpu_id.version = 0xfe; | ||
267 | return 0; | ||
268 | } | ||
269 | |||
270 | struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, | ||
271 | unsigned int id) | ||
272 | { | ||
273 | struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); | ||
274 | int rc = -ENOMEM; | ||
275 | |||
276 | if (!vcpu) | ||
277 | goto out_nomem; | ||
278 | |||
279 | vcpu->arch.sie_block = (struct sie_block *) get_zeroed_page(GFP_KERNEL); | ||
280 | |||
281 | if (!vcpu->arch.sie_block) | ||
282 | goto out_free_cpu; | ||
283 | |||
284 | vcpu->arch.sie_block->icpua = id; | ||
285 | BUG_ON(!kvm->arch.sca); | ||
286 | BUG_ON(kvm->arch.sca->cpu[id].sda); | ||
287 | kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; | ||
288 | vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); | ||
289 | vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; | ||
290 | |||
291 | spin_lock_init(&vcpu->arch.local_int.lock); | ||
292 | INIT_LIST_HEAD(&vcpu->arch.local_int.list); | ||
293 | vcpu->arch.local_int.float_int = &kvm->arch.float_int; | ||
294 | spin_lock_bh(&kvm->arch.float_int.lock); | ||
295 | kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; | ||
296 | init_waitqueue_head(&vcpu->arch.local_int.wq); | ||
297 | vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; | ||
298 | spin_unlock_bh(&kvm->arch.float_int.lock); | ||
299 | |||
300 | rc = kvm_vcpu_init(vcpu, kvm, id); | ||
301 | if (rc) | ||
302 | goto out_free_cpu; | ||
303 | VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, | ||
304 | vcpu->arch.sie_block); | ||
305 | |||
306 | try_module_get(THIS_MODULE); | ||
307 | |||
308 | return vcpu; | ||
309 | out_free_cpu: | ||
310 | kfree(vcpu); | ||
311 | out_nomem: | ||
312 | return ERR_PTR(rc); | ||
313 | } | ||
314 | |||
315 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | ||
316 | { | ||
317 | VCPU_EVENT(vcpu, 3, "%s", "destroy cpu"); | ||
318 | free_page((unsigned long)(vcpu->arch.sie_block)); | ||
319 | kfree(vcpu); | ||
320 | module_put(THIS_MODULE); | ||
321 | } | ||
322 | |||
323 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | ||
324 | { | ||
325 | /* kvm common code refers to this, but never calls it */ | ||
326 | BUG(); | ||
327 | return 0; | ||
328 | } | ||
329 | |||
330 | static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) | ||
331 | { | ||
332 | vcpu_load(vcpu); | ||
333 | kvm_s390_vcpu_initial_reset(vcpu); | ||
334 | vcpu_put(vcpu); | ||
335 | return 0; | ||
336 | } | ||
337 | |||
338 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | ||
339 | { | ||
340 | vcpu_load(vcpu); | ||
341 | memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs)); | ||
342 | vcpu_put(vcpu); | ||
343 | return 0; | ||
344 | } | ||
345 | |||
346 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | ||
347 | { | ||
348 | vcpu_load(vcpu); | ||
349 | memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs)); | ||
350 | vcpu_put(vcpu); | ||
351 | return 0; | ||
352 | } | ||
353 | |||
354 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | ||
355 | struct kvm_sregs *sregs) | ||
356 | { | ||
357 | vcpu_load(vcpu); | ||
358 | memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); | ||
359 | memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); | ||
360 | vcpu_put(vcpu); | ||
361 | return 0; | ||
362 | } | ||
363 | |||
364 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | ||
365 | struct kvm_sregs *sregs) | ||
366 | { | ||
367 | vcpu_load(vcpu); | ||
368 | memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs)); | ||
369 | memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); | ||
370 | vcpu_put(vcpu); | ||
371 | return 0; | ||
372 | } | ||
373 | |||
374 | int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | ||
375 | { | ||
376 | vcpu_load(vcpu); | ||
377 | memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); | ||
378 | vcpu->arch.guest_fpregs.fpc = fpu->fpc; | ||
379 | vcpu_put(vcpu); | ||
380 | return 0; | ||
381 | } | ||
382 | |||
383 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | ||
384 | { | ||
385 | vcpu_load(vcpu); | ||
386 | memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); | ||
387 | fpu->fpc = vcpu->arch.guest_fpregs.fpc; | ||
388 | vcpu_put(vcpu); | ||
389 | return 0; | ||
390 | } | ||
391 | |||
392 | static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) | ||
393 | { | ||
394 | int rc = 0; | ||
395 | |||
396 | vcpu_load(vcpu); | ||
397 | if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING) | ||
398 | rc = -EBUSY; | ||
399 | else | ||
400 | vcpu->arch.sie_block->gpsw = psw; | ||
401 | vcpu_put(vcpu); | ||
402 | return rc; | ||
403 | } | ||
404 | |||
405 | int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | ||
406 | struct kvm_translation *tr) | ||
407 | { | ||
408 | return -EINVAL; /* not implemented yet */ | ||
409 | } | ||
410 | |||
411 | int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, | ||
412 | struct kvm_debug_guest *dbg) | ||
413 | { | ||
414 | return -EINVAL; /* not implemented yet */ | ||
415 | } | ||
416 | |||
417 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | ||
418 | struct kvm_mp_state *mp_state) | ||
419 | { | ||
420 | return -EINVAL; /* not implemented yet */ | ||
421 | } | ||
422 | |||
423 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | ||
424 | struct kvm_mp_state *mp_state) | ||
425 | { | ||
426 | return -EINVAL; /* not implemented yet */ | ||
427 | } | ||
428 | |||
429 | static void __vcpu_run(struct kvm_vcpu *vcpu) | ||
430 | { | ||
431 | memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16); | ||
432 | |||
433 | if (need_resched()) | ||
434 | schedule(); | ||
435 | |||
436 | vcpu->arch.sie_block->icptcode = 0; | ||
437 | local_irq_disable(); | ||
438 | kvm_guest_enter(); | ||
439 | local_irq_enable(); | ||
440 | VCPU_EVENT(vcpu, 6, "entering sie flags %x", | ||
441 | atomic_read(&vcpu->arch.sie_block->cpuflags)); | ||
442 | sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs); | ||
443 | VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", | ||
444 | vcpu->arch.sie_block->icptcode); | ||
445 | local_irq_disable(); | ||
446 | kvm_guest_exit(); | ||
447 | local_irq_enable(); | ||
448 | |||
449 | memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16); | ||
450 | } | ||
451 | |||
452 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
453 | { | ||
454 | int rc; | ||
455 | sigset_t sigsaved; | ||
456 | |||
457 | vcpu_load(vcpu); | ||
458 | |||
459 | if (vcpu->sigset_active) | ||
460 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | ||
461 | |||
462 | atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); | ||
463 | |||
464 | BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL); | ||
465 | |||
466 | switch (kvm_run->exit_reason) { | ||
467 | case KVM_EXIT_S390_SIEIC: | ||
468 | vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask; | ||
469 | vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr; | ||
470 | break; | ||
471 | case KVM_EXIT_UNKNOWN: | ||
472 | case KVM_EXIT_S390_RESET: | ||
473 | break; | ||
474 | default: | ||
475 | BUG(); | ||
476 | } | ||
477 | |||
478 | might_sleep(); | ||
479 | |||
480 | do { | ||
481 | kvm_s390_deliver_pending_interrupts(vcpu); | ||
482 | __vcpu_run(vcpu); | ||
483 | rc = kvm_handle_sie_intercept(vcpu); | ||
484 | } while (!signal_pending(current) && !rc); | ||
485 | |||
486 | if (signal_pending(current) && !rc) | ||
487 | rc = -EINTR; | ||
488 | |||
489 | if (rc == -ENOTSUPP) { | ||
490 | /* intercept cannot be handled in-kernel, prepare kvm-run */ | ||
491 | kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; | ||
492 | kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; | ||
493 | kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask; | ||
494 | kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr; | ||
495 | kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; | ||
496 | kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; | ||
497 | rc = 0; | ||
498 | } | ||
499 | |||
500 | if (rc == -EREMOTE) { | ||
501 | /* intercept was handled, but userspace support is needed | ||
502 | * kvm_run has been prepared by the handler */ | ||
503 | rc = 0; | ||
504 | } | ||
505 | |||
506 | if (vcpu->sigset_active) | ||
507 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | ||
508 | |||
509 | vcpu_put(vcpu); | ||
510 | |||
511 | vcpu->stat.exit_userspace++; | ||
512 | return rc; | ||
513 | } | ||
514 | |||
515 | static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from, | ||
516 | unsigned long n, int prefix) | ||
517 | { | ||
518 | if (prefix) | ||
519 | return copy_to_guest(vcpu, guestdest, from, n); | ||
520 | else | ||
521 | return copy_to_guest_absolute(vcpu, guestdest, from, n); | ||
522 | } | ||
523 | |||
524 | /* | ||
525 | * store status at address | ||
526 | * we use have two special cases: | ||
527 | * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit | ||
528 | * KVM_S390_STORE_STATUS_PREFIXED: -> prefix | ||
529 | */ | ||
530 | int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) | ||
531 | { | ||
532 | const unsigned char archmode = 1; | ||
533 | int prefix; | ||
534 | |||
535 | if (addr == KVM_S390_STORE_STATUS_NOADDR) { | ||
536 | if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1)) | ||
537 | return -EFAULT; | ||
538 | addr = SAVE_AREA_BASE; | ||
539 | prefix = 0; | ||
540 | } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) { | ||
541 | if (copy_to_guest(vcpu, 163ul, &archmode, 1)) | ||
542 | return -EFAULT; | ||
543 | addr = SAVE_AREA_BASE; | ||
544 | prefix = 1; | ||
545 | } else | ||
546 | prefix = 0; | ||
547 | |||
548 | if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs), | ||
549 | vcpu->arch.guest_fpregs.fprs, 128, prefix)) | ||
550 | return -EFAULT; | ||
551 | |||
552 | if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs), | ||
553 | vcpu->arch.guest_gprs, 128, prefix)) | ||
554 | return -EFAULT; | ||
555 | |||
556 | if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw), | ||
557 | &vcpu->arch.sie_block->gpsw, 16, prefix)) | ||
558 | return -EFAULT; | ||
559 | |||
560 | if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg), | ||
561 | &vcpu->arch.sie_block->prefix, 4, prefix)) | ||
562 | return -EFAULT; | ||
563 | |||
564 | if (__guestcopy(vcpu, | ||
565 | addr + offsetof(struct save_area_s390x, fp_ctrl_reg), | ||
566 | &vcpu->arch.guest_fpregs.fpc, 4, prefix)) | ||
567 | return -EFAULT; | ||
568 | |||
569 | if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg), | ||
570 | &vcpu->arch.sie_block->todpr, 4, prefix)) | ||
571 | return -EFAULT; | ||
572 | |||
573 | if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer), | ||
574 | &vcpu->arch.sie_block->cputm, 8, prefix)) | ||
575 | return -EFAULT; | ||
576 | |||
577 | if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp), | ||
578 | &vcpu->arch.sie_block->ckc, 8, prefix)) | ||
579 | return -EFAULT; | ||
580 | |||
581 | if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs), | ||
582 | &vcpu->arch.guest_acrs, 64, prefix)) | ||
583 | return -EFAULT; | ||
584 | |||
585 | if (__guestcopy(vcpu, | ||
586 | addr + offsetof(struct save_area_s390x, ctrl_regs), | ||
587 | &vcpu->arch.sie_block->gcr, 128, prefix)) | ||
588 | return -EFAULT; | ||
589 | return 0; | ||
590 | } | ||
591 | |||
592 | static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) | ||
593 | { | ||
594 | int rc; | ||
595 | |||
596 | vcpu_load(vcpu); | ||
597 | rc = __kvm_s390_vcpu_store_status(vcpu, addr); | ||
598 | vcpu_put(vcpu); | ||
599 | return rc; | ||
600 | } | ||
601 | |||
602 | long kvm_arch_vcpu_ioctl(struct file *filp, | ||
603 | unsigned int ioctl, unsigned long arg) | ||
604 | { | ||
605 | struct kvm_vcpu *vcpu = filp->private_data; | ||
606 | void __user *argp = (void __user *)arg; | ||
607 | |||
608 | switch (ioctl) { | ||
609 | case KVM_S390_INTERRUPT: { | ||
610 | struct kvm_s390_interrupt s390int; | ||
611 | |||
612 | if (copy_from_user(&s390int, argp, sizeof(s390int))) | ||
613 | return -EFAULT; | ||
614 | return kvm_s390_inject_vcpu(vcpu, &s390int); | ||
615 | } | ||
616 | case KVM_S390_STORE_STATUS: | ||
617 | return kvm_s390_vcpu_store_status(vcpu, arg); | ||
618 | case KVM_S390_SET_INITIAL_PSW: { | ||
619 | psw_t psw; | ||
620 | |||
621 | if (copy_from_user(&psw, argp, sizeof(psw))) | ||
622 | return -EFAULT; | ||
623 | return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); | ||
624 | } | ||
625 | case KVM_S390_INITIAL_RESET: | ||
626 | return kvm_arch_vcpu_ioctl_initial_reset(vcpu); | ||
627 | default: | ||
628 | ; | ||
629 | } | ||
630 | return -EINVAL; | ||
631 | } | ||
632 | |||
633 | /* Section: memory related */ | ||
634 | int kvm_arch_set_memory_region(struct kvm *kvm, | ||
635 | struct kvm_userspace_memory_region *mem, | ||
636 | struct kvm_memory_slot old, | ||
637 | int user_alloc) | ||
638 | { | ||
639 | /* A few sanity checks. We can have exactly one memory slot which has | ||
640 | to start at guest virtual zero and which has to be located at a | ||
641 | page boundary in userland and which has to end at a page boundary. | ||
642 | The memory in userland is ok to be fragmented into various different | ||
643 | vmas. It is okay to mmap() and munmap() stuff in this slot after | ||
644 | doing this call at any time */ | ||
645 | |||
646 | if (mem->slot) | ||
647 | return -EINVAL; | ||
648 | |||
649 | if (mem->guest_phys_addr) | ||
650 | return -EINVAL; | ||
651 | |||
652 | if (mem->userspace_addr & (PAGE_SIZE - 1)) | ||
653 | return -EINVAL; | ||
654 | |||
655 | if (mem->memory_size & (PAGE_SIZE - 1)) | ||
656 | return -EINVAL; | ||
657 | |||
658 | kvm->arch.guest_origin = mem->userspace_addr; | ||
659 | kvm->arch.guest_memsize = mem->memory_size; | ||
660 | |||
661 | /* FIXME: we do want to interrupt running CPUs and update their memory | ||
662 | configuration now to avoid race conditions. But hey, changing the | ||
663 | memory layout while virtual CPUs are running is usually bad | ||
664 | programming practice. */ | ||
665 | |||
666 | return 0; | ||
667 | } | ||
668 | |||
669 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | ||
670 | { | ||
671 | return gfn; | ||
672 | } | ||
673 | |||
674 | static int __init kvm_s390_init(void) | ||
675 | { | ||
676 | return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE); | ||
677 | } | ||
678 | |||
679 | static void __exit kvm_s390_exit(void) | ||
680 | { | ||
681 | kvm_exit(); | ||
682 | } | ||
683 | |||
684 | module_init(kvm_s390_init); | ||
685 | module_exit(kvm_s390_exit); | ||
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h new file mode 100644 index 000000000000..3893cf12eacf --- /dev/null +++ b/arch/s390/kvm/kvm-s390.h | |||
@@ -0,0 +1,64 @@ | |||
1 | /* | ||
2 | * kvm_s390.h - definition for kvm on s390 | ||
3 | * | ||
4 | * Copyright IBM Corp. 2008 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License (version 2 only) | ||
8 | * as published by the Free Software Foundation. | ||
9 | * | ||
10 | * Author(s): Carsten Otte <cotte@de.ibm.com> | ||
11 | * Christian Borntraeger <borntraeger@de.ibm.com> | ||
12 | */ | ||
13 | |||
14 | #ifndef ARCH_S390_KVM_S390_H | ||
15 | #define ARCH_S390_KVM_S390_H | ||
16 | |||
17 | #include <linux/kvm.h> | ||
18 | #include <linux/kvm_host.h> | ||
19 | |||
20 | typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); | ||
21 | |||
22 | int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); | ||
23 | |||
24 | #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ | ||
25 | do { \ | ||
26 | debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \ | ||
27 | d_args); \ | ||
28 | } while (0) | ||
29 | |||
30 | #define VCPU_EVENT(d_vcpu, d_loglevel, d_string, d_args...)\ | ||
31 | do { \ | ||
32 | debug_sprintf_event(d_vcpu->kvm->arch.dbf, d_loglevel, \ | ||
33 | "%02d[%016lx-%016lx]: " d_string "\n", d_vcpu->vcpu_id, \ | ||
34 | d_vcpu->arch.sie_block->gpsw.mask, d_vcpu->arch.sie_block->gpsw.addr,\ | ||
35 | d_args); \ | ||
36 | } while (0) | ||
37 | |||
38 | static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu) | ||
39 | { | ||
40 | return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT; | ||
41 | } | ||
42 | |||
43 | int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); | ||
44 | void kvm_s390_idle_wakeup(unsigned long data); | ||
45 | void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); | ||
46 | int kvm_s390_inject_vm(struct kvm *kvm, | ||
47 | struct kvm_s390_interrupt *s390int); | ||
48 | int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, | ||
49 | struct kvm_s390_interrupt *s390int); | ||
50 | int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); | ||
51 | |||
52 | /* implemented in priv.c */ | ||
53 | int kvm_s390_handle_priv(struct kvm_vcpu *vcpu); | ||
54 | |||
55 | /* implemented in sigp.c */ | ||
56 | int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); | ||
57 | |||
58 | /* implemented in kvm-s390.c */ | ||
59 | int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, | ||
60 | unsigned long addr); | ||
61 | /* implemented in diag.c */ | ||
62 | int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); | ||
63 | |||
64 | #endif | ||
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c new file mode 100644 index 000000000000..1465946325c5 --- /dev/null +++ b/arch/s390/kvm/priv.c | |||
@@ -0,0 +1,323 @@ | |||
1 | /* | ||
2 | * priv.c - handling privileged instructions | ||
3 | * | ||
4 | * Copyright IBM Corp. 2008 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License (version 2 only) | ||
8 | * as published by the Free Software Foundation. | ||
9 | * | ||
10 | * Author(s): Carsten Otte <cotte@de.ibm.com> | ||
11 | * Christian Borntraeger <borntraeger@de.ibm.com> | ||
12 | */ | ||
13 | |||
14 | #include <linux/kvm.h> | ||
15 | #include <linux/errno.h> | ||
16 | #include <asm/current.h> | ||
17 | #include <asm/debug.h> | ||
18 | #include <asm/ebcdic.h> | ||
19 | #include <asm/sysinfo.h> | ||
20 | #include "gaccess.h" | ||
21 | #include "kvm-s390.h" | ||
22 | |||
23 | static int handle_set_prefix(struct kvm_vcpu *vcpu) | ||
24 | { | ||
25 | int base2 = vcpu->arch.sie_block->ipb >> 28; | ||
26 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | ||
27 | u64 operand2; | ||
28 | u32 address = 0; | ||
29 | u8 tmp; | ||
30 | |||
31 | vcpu->stat.instruction_spx++; | ||
32 | |||
33 | operand2 = disp2; | ||
34 | if (base2) | ||
35 | operand2 += vcpu->arch.guest_gprs[base2]; | ||
36 | |||
37 | /* must be word boundary */ | ||
38 | if (operand2 & 3) { | ||
39 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
40 | goto out; | ||
41 | } | ||
42 | |||
43 | /* get the value */ | ||
44 | if (get_guest_u32(vcpu, operand2, &address)) { | ||
45 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
46 | goto out; | ||
47 | } | ||
48 | |||
49 | address = address & 0x7fffe000u; | ||
50 | |||
51 | /* make sure that the new value is valid memory */ | ||
52 | if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || | ||
53 | (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) { | ||
54 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
55 | goto out; | ||
56 | } | ||
57 | |||
58 | vcpu->arch.sie_block->prefix = address; | ||
59 | vcpu->arch.sie_block->ihcpu = 0xffff; | ||
60 | |||
61 | VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); | ||
62 | out: | ||
63 | return 0; | ||
64 | } | ||
65 | |||
66 | static int handle_store_prefix(struct kvm_vcpu *vcpu) | ||
67 | { | ||
68 | int base2 = vcpu->arch.sie_block->ipb >> 28; | ||
69 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | ||
70 | u64 operand2; | ||
71 | u32 address; | ||
72 | |||
73 | vcpu->stat.instruction_stpx++; | ||
74 | operand2 = disp2; | ||
75 | if (base2) | ||
76 | operand2 += vcpu->arch.guest_gprs[base2]; | ||
77 | |||
78 | /* must be word boundary */ | ||
79 | if (operand2 & 3) { | ||
80 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
81 | goto out; | ||
82 | } | ||
83 | |||
84 | address = vcpu->arch.sie_block->prefix; | ||
85 | address = address & 0x7fffe000u; | ||
86 | |||
87 | /* get the value */ | ||
88 | if (put_guest_u32(vcpu, operand2, address)) { | ||
89 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
90 | goto out; | ||
91 | } | ||
92 | |||
93 | VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); | ||
94 | out: | ||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | static int handle_store_cpu_address(struct kvm_vcpu *vcpu) | ||
99 | { | ||
100 | int base2 = vcpu->arch.sie_block->ipb >> 28; | ||
101 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | ||
102 | u64 useraddr; | ||
103 | int rc; | ||
104 | |||
105 | vcpu->stat.instruction_stap++; | ||
106 | useraddr = disp2; | ||
107 | if (base2) | ||
108 | useraddr += vcpu->arch.guest_gprs[base2]; | ||
109 | |||
110 | if (useraddr & 1) { | ||
111 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
112 | goto out; | ||
113 | } | ||
114 | |||
115 | rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id); | ||
116 | if (rc == -EFAULT) { | ||
117 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
118 | goto out; | ||
119 | } | ||
120 | |||
121 | VCPU_EVENT(vcpu, 5, "storing cpu address to %lx", useraddr); | ||
122 | out: | ||
123 | return 0; | ||
124 | } | ||
125 | |||
126 | static int handle_skey(struct kvm_vcpu *vcpu) | ||
127 | { | ||
128 | vcpu->stat.instruction_storage_key++; | ||
129 | vcpu->arch.sie_block->gpsw.addr -= 4; | ||
130 | VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); | ||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | static int handle_stsch(struct kvm_vcpu *vcpu) | ||
135 | { | ||
136 | vcpu->stat.instruction_stsch++; | ||
137 | VCPU_EVENT(vcpu, 4, "%s", "store subchannel - CC3"); | ||
138 | /* condition code 3 */ | ||
139 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | ||
140 | vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; | ||
141 | return 0; | ||
142 | } | ||
143 | |||
144 | static int handle_chsc(struct kvm_vcpu *vcpu) | ||
145 | { | ||
146 | vcpu->stat.instruction_chsc++; | ||
147 | VCPU_EVENT(vcpu, 4, "%s", "channel subsystem call - CC3"); | ||
148 | /* condition code 3 */ | ||
149 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | ||
150 | vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; | ||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | static unsigned int kvm_stfl(void) | ||
155 | { | ||
156 | asm volatile( | ||
157 | " .insn s,0xb2b10000,0(0)\n" /* stfl */ | ||
158 | "0:\n" | ||
159 | EX_TABLE(0b, 0b)); | ||
160 | return S390_lowcore.stfl_fac_list; | ||
161 | } | ||
162 | |||
163 | static int handle_stfl(struct kvm_vcpu *vcpu) | ||
164 | { | ||
165 | unsigned int facility_list = kvm_stfl(); | ||
166 | int rc; | ||
167 | |||
168 | vcpu->stat.instruction_stfl++; | ||
169 | facility_list &= ~(1UL<<24); /* no stfle */ | ||
170 | |||
171 | rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), | ||
172 | &facility_list, sizeof(facility_list)); | ||
173 | if (rc == -EFAULT) | ||
174 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
175 | else | ||
176 | VCPU_EVENT(vcpu, 5, "store facility list value %x", | ||
177 | facility_list); | ||
178 | return 0; | ||
179 | } | ||
180 | |||
181 | static int handle_stidp(struct kvm_vcpu *vcpu) | ||
182 | { | ||
183 | int base2 = vcpu->arch.sie_block->ipb >> 28; | ||
184 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | ||
185 | u64 operand2; | ||
186 | int rc; | ||
187 | |||
188 | vcpu->stat.instruction_stidp++; | ||
189 | operand2 = disp2; | ||
190 | if (base2) | ||
191 | operand2 += vcpu->arch.guest_gprs[base2]; | ||
192 | |||
193 | if (operand2 & 7) { | ||
194 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
195 | goto out; | ||
196 | } | ||
197 | |||
198 | rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data); | ||
199 | if (rc == -EFAULT) { | ||
200 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
201 | goto out; | ||
202 | } | ||
203 | |||
204 | VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); | ||
205 | out: | ||
206 | return 0; | ||
207 | } | ||
208 | |||
209 | static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) | ||
210 | { | ||
211 | struct float_interrupt *fi = &vcpu->kvm->arch.float_int; | ||
212 | int cpus = 0; | ||
213 | int n; | ||
214 | |||
215 | spin_lock_bh(&fi->lock); | ||
216 | for (n = 0; n < KVM_MAX_VCPUS; n++) | ||
217 | if (fi->local_int[n]) | ||
218 | cpus++; | ||
219 | spin_unlock_bh(&fi->lock); | ||
220 | |||
221 | /* deal with other level 3 hypervisors */ | ||
222 | if (stsi(mem, 3, 2, 2) == -ENOSYS) | ||
223 | mem->count = 0; | ||
224 | if (mem->count < 8) | ||
225 | mem->count++; | ||
226 | for (n = mem->count - 1; n > 0 ; n--) | ||
227 | memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0])); | ||
228 | |||
229 | mem->vm[0].cpus_total = cpus; | ||
230 | mem->vm[0].cpus_configured = cpus; | ||
231 | mem->vm[0].cpus_standby = 0; | ||
232 | mem->vm[0].cpus_reserved = 0; | ||
233 | mem->vm[0].caf = 1000; | ||
234 | memcpy(mem->vm[0].name, "KVMguest", 8); | ||
235 | ASCEBC(mem->vm[0].name, 8); | ||
236 | memcpy(mem->vm[0].cpi, "KVM/Linux ", 16); | ||
237 | ASCEBC(mem->vm[0].cpi, 16); | ||
238 | } | ||
239 | |||
240 | static int handle_stsi(struct kvm_vcpu *vcpu) | ||
241 | { | ||
242 | int fc = (vcpu->arch.guest_gprs[0] & 0xf0000000) >> 28; | ||
243 | int sel1 = vcpu->arch.guest_gprs[0] & 0xff; | ||
244 | int sel2 = vcpu->arch.guest_gprs[1] & 0xffff; | ||
245 | int base2 = vcpu->arch.sie_block->ipb >> 28; | ||
246 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | ||
247 | u64 operand2; | ||
248 | unsigned long mem; | ||
249 | |||
250 | vcpu->stat.instruction_stsi++; | ||
251 | VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); | ||
252 | |||
253 | operand2 = disp2; | ||
254 | if (base2) | ||
255 | operand2 += vcpu->arch.guest_gprs[base2]; | ||
256 | |||
257 | if (operand2 & 0xfff && fc > 0) | ||
258 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
259 | |||
260 | switch (fc) { | ||
261 | case 0: | ||
262 | vcpu->arch.guest_gprs[0] = 3 << 28; | ||
263 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | ||
264 | return 0; | ||
265 | case 1: /* same handling for 1 and 2 */ | ||
266 | case 2: | ||
267 | mem = get_zeroed_page(GFP_KERNEL); | ||
268 | if (!mem) | ||
269 | goto out_fail; | ||
270 | if (stsi((void *) mem, fc, sel1, sel2) == -ENOSYS) | ||
271 | goto out_mem; | ||
272 | break; | ||
273 | case 3: | ||
274 | if (sel1 != 2 || sel2 != 2) | ||
275 | goto out_fail; | ||
276 | mem = get_zeroed_page(GFP_KERNEL); | ||
277 | if (!mem) | ||
278 | goto out_fail; | ||
279 | handle_stsi_3_2_2(vcpu, (void *) mem); | ||
280 | break; | ||
281 | default: | ||
282 | goto out_fail; | ||
283 | } | ||
284 | |||
285 | if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { | ||
286 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
287 | goto out_mem; | ||
288 | } | ||
289 | free_page(mem); | ||
290 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | ||
291 | vcpu->arch.guest_gprs[0] = 0; | ||
292 | return 0; | ||
293 | out_mem: | ||
294 | free_page(mem); | ||
295 | out_fail: | ||
296 | /* condition code 3 */ | ||
297 | vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; | ||
298 | return 0; | ||
299 | } | ||
300 | |||
301 | static intercept_handler_t priv_handlers[256] = { | ||
302 | [0x02] = handle_stidp, | ||
303 | [0x10] = handle_set_prefix, | ||
304 | [0x11] = handle_store_prefix, | ||
305 | [0x12] = handle_store_cpu_address, | ||
306 | [0x29] = handle_skey, | ||
307 | [0x2a] = handle_skey, | ||
308 | [0x2b] = handle_skey, | ||
309 | [0x34] = handle_stsch, | ||
310 | [0x5f] = handle_chsc, | ||
311 | [0x7d] = handle_stsi, | ||
312 | [0xb1] = handle_stfl, | ||
313 | }; | ||
314 | |||
315 | int kvm_s390_handle_priv(struct kvm_vcpu *vcpu) | ||
316 | { | ||
317 | intercept_handler_t handler; | ||
318 | |||
319 | handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; | ||
320 | if (handler) | ||
321 | return handler(vcpu); | ||
322 | return -ENOTSUPP; | ||
323 | } | ||
diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S new file mode 100644 index 000000000000..934fd6a885f6 --- /dev/null +++ b/arch/s390/kvm/sie64a.S | |||
@@ -0,0 +1,47 @@ | |||
1 | /* | ||
2 | * sie64a.S - low level sie call | ||
3 | * | ||
4 | * Copyright IBM Corp. 2008 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License (version 2 only) | ||
8 | * as published by the Free Software Foundation. | ||
9 | * | ||
10 | * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> | ||
11 | */ | ||
12 | |||
13 | #include <linux/errno.h> | ||
14 | #include <asm/asm-offsets.h> | ||
15 | |||
16 | SP_R5 = 5 * 8 # offset into stackframe | ||
17 | SP_R6 = 6 * 8 | ||
18 | |||
19 | /* | ||
20 | * sie64a calling convention: | ||
21 | * %r2 pointer to sie control block | ||
22 | * %r3 guest register save area | ||
23 | */ | ||
24 | .globl sie64a | ||
25 | sie64a: | ||
26 | lgr %r5,%r3 | ||
27 | stmg %r5,%r14,SP_R5(%r15) # save register on entry | ||
28 | lgr %r14,%r2 # pointer to sie control block | ||
29 | lmg %r0,%r13,0(%r3) # load guest gprs 0-13 | ||
30 | sie_inst: | ||
31 | sie 0(%r14) | ||
32 | lg %r14,SP_R5(%r15) | ||
33 | stmg %r0,%r13,0(%r14) # save guest gprs 0-13 | ||
34 | lghi %r2,0 | ||
35 | lmg %r6,%r14,SP_R6(%r15) | ||
36 | br %r14 | ||
37 | |||
38 | sie_err: | ||
39 | lg %r14,SP_R5(%r15) | ||
40 | stmg %r0,%r13,0(%r14) # save guest gprs 0-13 | ||
41 | lghi %r2,-EFAULT | ||
42 | lmg %r6,%r14,SP_R6(%r15) | ||
43 | br %r14 | ||
44 | |||
45 | .section __ex_table,"a" | ||
46 | .quad sie_inst,sie_err | ||
47 | .previous | ||
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c new file mode 100644 index 000000000000..0a236acfb5f6 --- /dev/null +++ b/arch/s390/kvm/sigp.c | |||
@@ -0,0 +1,288 @@ | |||
1 | /* | ||
2 | * sigp.c - handlinge interprocessor communication | ||
3 | * | ||
4 | * Copyright IBM Corp. 2008 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License (version 2 only) | ||
8 | * as published by the Free Software Foundation. | ||
9 | * | ||
10 | * Author(s): Carsten Otte <cotte@de.ibm.com> | ||
11 | * Christian Borntraeger <borntraeger@de.ibm.com> | ||
12 | */ | ||
13 | |||
14 | #include <linux/kvm.h> | ||
15 | #include <linux/kvm_host.h> | ||
16 | #include "gaccess.h" | ||
17 | #include "kvm-s390.h" | ||
18 | |||
19 | /* sigp order codes */ | ||
20 | #define SIGP_SENSE 0x01 | ||
21 | #define SIGP_EXTERNAL_CALL 0x02 | ||
22 | #define SIGP_EMERGENCY 0x03 | ||
23 | #define SIGP_START 0x04 | ||
24 | #define SIGP_STOP 0x05 | ||
25 | #define SIGP_RESTART 0x06 | ||
26 | #define SIGP_STOP_STORE_STATUS 0x09 | ||
27 | #define SIGP_INITIAL_CPU_RESET 0x0b | ||
28 | #define SIGP_CPU_RESET 0x0c | ||
29 | #define SIGP_SET_PREFIX 0x0d | ||
30 | #define SIGP_STORE_STATUS_ADDR 0x0e | ||
31 | #define SIGP_SET_ARCH 0x12 | ||
32 | |||
33 | /* cpu status bits */ | ||
34 | #define SIGP_STAT_EQUIPMENT_CHECK 0x80000000UL | ||
35 | #define SIGP_STAT_INCORRECT_STATE 0x00000200UL | ||
36 | #define SIGP_STAT_INVALID_PARAMETER 0x00000100UL | ||
37 | #define SIGP_STAT_EXT_CALL_PENDING 0x00000080UL | ||
38 | #define SIGP_STAT_STOPPED 0x00000040UL | ||
39 | #define SIGP_STAT_OPERATOR_INTERV 0x00000020UL | ||
40 | #define SIGP_STAT_CHECK_STOP 0x00000010UL | ||
41 | #define SIGP_STAT_INOPERATIVE 0x00000004UL | ||
42 | #define SIGP_STAT_INVALID_ORDER 0x00000002UL | ||
43 | #define SIGP_STAT_RECEIVER_CHECK 0x00000001UL | ||
44 | |||
45 | |||
46 | static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg) | ||
47 | { | ||
48 | struct float_interrupt *fi = &vcpu->kvm->arch.float_int; | ||
49 | int rc; | ||
50 | |||
51 | if (cpu_addr >= KVM_MAX_VCPUS) | ||
52 | return 3; /* not operational */ | ||
53 | |||
54 | spin_lock_bh(&fi->lock); | ||
55 | if (fi->local_int[cpu_addr] == NULL) | ||
56 | rc = 3; /* not operational */ | ||
57 | else if (atomic_read(fi->local_int[cpu_addr]->cpuflags) | ||
58 | & CPUSTAT_RUNNING) { | ||
59 | *reg &= 0xffffffff00000000UL; | ||
60 | rc = 1; /* status stored */ | ||
61 | } else { | ||
62 | *reg &= 0xffffffff00000000UL; | ||
63 | *reg |= SIGP_STAT_STOPPED; | ||
64 | rc = 1; /* status stored */ | ||
65 | } | ||
66 | spin_unlock_bh(&fi->lock); | ||
67 | |||
68 | VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc); | ||
69 | return rc; | ||
70 | } | ||
71 | |||
72 | static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) | ||
73 | { | ||
74 | struct float_interrupt *fi = &vcpu->kvm->arch.float_int; | ||
75 | struct local_interrupt *li; | ||
76 | struct interrupt_info *inti; | ||
77 | int rc; | ||
78 | |||
79 | if (cpu_addr >= KVM_MAX_VCPUS) | ||
80 | return 3; /* not operational */ | ||
81 | |||
82 | inti = kzalloc(sizeof(*inti), GFP_KERNEL); | ||
83 | if (!inti) | ||
84 | return -ENOMEM; | ||
85 | |||
86 | inti->type = KVM_S390_INT_EMERGENCY; | ||
87 | |||
88 | spin_lock_bh(&fi->lock); | ||
89 | li = fi->local_int[cpu_addr]; | ||
90 | if (li == NULL) { | ||
91 | rc = 3; /* not operational */ | ||
92 | kfree(inti); | ||
93 | goto unlock; | ||
94 | } | ||
95 | spin_lock_bh(&li->lock); | ||
96 | list_add_tail(&inti->list, &li->list); | ||
97 | atomic_set(&li->active, 1); | ||
98 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); | ||
99 | if (waitqueue_active(&li->wq)) | ||
100 | wake_up_interruptible(&li->wq); | ||
101 | spin_unlock_bh(&li->lock); | ||
102 | rc = 0; /* order accepted */ | ||
103 | unlock: | ||
104 | spin_unlock_bh(&fi->lock); | ||
105 | VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); | ||
106 | return rc; | ||
107 | } | ||
108 | |||
109 | static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store) | ||
110 | { | ||
111 | struct float_interrupt *fi = &vcpu->kvm->arch.float_int; | ||
112 | struct local_interrupt *li; | ||
113 | struct interrupt_info *inti; | ||
114 | int rc; | ||
115 | |||
116 | if (cpu_addr >= KVM_MAX_VCPUS) | ||
117 | return 3; /* not operational */ | ||
118 | |||
119 | inti = kzalloc(sizeof(*inti), GFP_KERNEL); | ||
120 | if (!inti) | ||
121 | return -ENOMEM; | ||
122 | |||
123 | inti->type = KVM_S390_SIGP_STOP; | ||
124 | |||
125 | spin_lock_bh(&fi->lock); | ||
126 | li = fi->local_int[cpu_addr]; | ||
127 | if (li == NULL) { | ||
128 | rc = 3; /* not operational */ | ||
129 | kfree(inti); | ||
130 | goto unlock; | ||
131 | } | ||
132 | spin_lock_bh(&li->lock); | ||
133 | list_add_tail(&inti->list, &li->list); | ||
134 | atomic_set(&li->active, 1); | ||
135 | atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); | ||
136 | if (store) | ||
137 | li->action_bits |= ACTION_STORE_ON_STOP; | ||
138 | li->action_bits |= ACTION_STOP_ON_STOP; | ||
139 | if (waitqueue_active(&li->wq)) | ||
140 | wake_up_interruptible(&li->wq); | ||
141 | spin_unlock_bh(&li->lock); | ||
142 | rc = 0; /* order accepted */ | ||
143 | unlock: | ||
144 | spin_unlock_bh(&fi->lock); | ||
145 | VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); | ||
146 | return rc; | ||
147 | } | ||
148 | |||
149 | static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) | ||
150 | { | ||
151 | int rc; | ||
152 | |||
153 | switch (parameter & 0xff) { | ||
154 | case 0: | ||
155 | printk(KERN_WARNING "kvm: request to switch to ESA/390 mode" | ||
156 | " not supported"); | ||
157 | rc = 3; /* not operational */ | ||
158 | break; | ||
159 | case 1: | ||
160 | case 2: | ||
161 | rc = 0; /* order accepted */ | ||
162 | break; | ||
163 | default: | ||
164 | rc = -ENOTSUPP; | ||
165 | } | ||
166 | return rc; | ||
167 | } | ||
168 | |||
169 | static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, | ||
170 | u64 *reg) | ||
171 | { | ||
172 | struct float_interrupt *fi = &vcpu->kvm->arch.float_int; | ||
173 | struct local_interrupt *li; | ||
174 | struct interrupt_info *inti; | ||
175 | int rc; | ||
176 | u8 tmp; | ||
177 | |||
178 | /* make sure that the new value is valid memory */ | ||
179 | address = address & 0x7fffe000u; | ||
180 | if ((copy_from_guest(vcpu, &tmp, | ||
181 | (u64) (address + vcpu->kvm->arch.guest_origin) , 1)) || | ||
182 | (copy_from_guest(vcpu, &tmp, (u64) (address + | ||
183 | vcpu->kvm->arch.guest_origin + PAGE_SIZE), 1))) { | ||
184 | *reg |= SIGP_STAT_INVALID_PARAMETER; | ||
185 | return 1; /* invalid parameter */ | ||
186 | } | ||
187 | |||
188 | inti = kzalloc(sizeof(*inti), GFP_KERNEL); | ||
189 | if (!inti) | ||
190 | return 2; /* busy */ | ||
191 | |||
192 | spin_lock_bh(&fi->lock); | ||
193 | li = fi->local_int[cpu_addr]; | ||
194 | |||
195 | if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) { | ||
196 | rc = 1; /* incorrect state */ | ||
197 | *reg &= SIGP_STAT_INCORRECT_STATE; | ||
198 | kfree(inti); | ||
199 | goto out_fi; | ||
200 | } | ||
201 | |||
202 | spin_lock_bh(&li->lock); | ||
203 | /* cpu must be in stopped state */ | ||
204 | if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) { | ||
205 | rc = 1; /* incorrect state */ | ||
206 | *reg &= SIGP_STAT_INCORRECT_STATE; | ||
207 | kfree(inti); | ||
208 | goto out_li; | ||
209 | } | ||
210 | |||
211 | inti->type = KVM_S390_SIGP_SET_PREFIX; | ||
212 | inti->prefix.address = address; | ||
213 | |||
214 | list_add_tail(&inti->list, &li->list); | ||
215 | atomic_set(&li->active, 1); | ||
216 | if (waitqueue_active(&li->wq)) | ||
217 | wake_up_interruptible(&li->wq); | ||
218 | rc = 0; /* order accepted */ | ||
219 | |||
220 | VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); | ||
221 | out_li: | ||
222 | spin_unlock_bh(&li->lock); | ||
223 | out_fi: | ||
224 | spin_unlock_bh(&fi->lock); | ||
225 | return rc; | ||
226 | } | ||
227 | |||
228 | int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) | ||
229 | { | ||
230 | int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; | ||
231 | int r3 = vcpu->arch.sie_block->ipa & 0x000f; | ||
232 | int base2 = vcpu->arch.sie_block->ipb >> 28; | ||
233 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | ||
234 | u32 parameter; | ||
235 | u16 cpu_addr = vcpu->arch.guest_gprs[r3]; | ||
236 | u8 order_code; | ||
237 | int rc; | ||
238 | |||
239 | order_code = disp2; | ||
240 | if (base2) | ||
241 | order_code += vcpu->arch.guest_gprs[base2]; | ||
242 | |||
243 | if (r1 % 2) | ||
244 | parameter = vcpu->arch.guest_gprs[r1]; | ||
245 | else | ||
246 | parameter = vcpu->arch.guest_gprs[r1 + 1]; | ||
247 | |||
248 | switch (order_code) { | ||
249 | case SIGP_SENSE: | ||
250 | vcpu->stat.instruction_sigp_sense++; | ||
251 | rc = __sigp_sense(vcpu, cpu_addr, | ||
252 | &vcpu->arch.guest_gprs[r1]); | ||
253 | break; | ||
254 | case SIGP_EMERGENCY: | ||
255 | vcpu->stat.instruction_sigp_emergency++; | ||
256 | rc = __sigp_emergency(vcpu, cpu_addr); | ||
257 | break; | ||
258 | case SIGP_STOP: | ||
259 | vcpu->stat.instruction_sigp_stop++; | ||
260 | rc = __sigp_stop(vcpu, cpu_addr, 0); | ||
261 | break; | ||
262 | case SIGP_STOP_STORE_STATUS: | ||
263 | vcpu->stat.instruction_sigp_stop++; | ||
264 | rc = __sigp_stop(vcpu, cpu_addr, 1); | ||
265 | break; | ||
266 | case SIGP_SET_ARCH: | ||
267 | vcpu->stat.instruction_sigp_arch++; | ||
268 | rc = __sigp_set_arch(vcpu, parameter); | ||
269 | break; | ||
270 | case SIGP_SET_PREFIX: | ||
271 | vcpu->stat.instruction_sigp_prefix++; | ||
272 | rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, | ||
273 | &vcpu->arch.guest_gprs[r1]); | ||
274 | break; | ||
275 | case SIGP_RESTART: | ||
276 | vcpu->stat.instruction_sigp_restart++; | ||
277 | /* user space must know about restart */ | ||
278 | default: | ||
279 | return -ENOTSUPP; | ||
280 | } | ||
281 | |||
282 | if (rc < 0) | ||
283 | return rc; | ||
284 | |||
285 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | ||
286 | vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44; | ||
287 | return 0; | ||
288 | } | ||
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index fd072013f88c..5c1aea97cd12 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -30,11 +30,27 @@ | |||
30 | #define TABLES_PER_PAGE 4 | 30 | #define TABLES_PER_PAGE 4 |
31 | #define FRAG_MASK 15UL | 31 | #define FRAG_MASK 15UL |
32 | #define SECOND_HALVES 10UL | 32 | #define SECOND_HALVES 10UL |
33 | |||
34 | void clear_table_pgstes(unsigned long *table) | ||
35 | { | ||
36 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); | ||
37 | memset(table + 256, 0, PAGE_SIZE/4); | ||
38 | clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); | ||
39 | memset(table + 768, 0, PAGE_SIZE/4); | ||
40 | } | ||
41 | |||
33 | #else | 42 | #else |
34 | #define ALLOC_ORDER 2 | 43 | #define ALLOC_ORDER 2 |
35 | #define TABLES_PER_PAGE 2 | 44 | #define TABLES_PER_PAGE 2 |
36 | #define FRAG_MASK 3UL | 45 | #define FRAG_MASK 3UL |
37 | #define SECOND_HALVES 2UL | 46 | #define SECOND_HALVES 2UL |
47 | |||
48 | void clear_table_pgstes(unsigned long *table) | ||
49 | { | ||
50 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); | ||
51 | memset(table + 256, 0, PAGE_SIZE/2); | ||
52 | } | ||
53 | |||
38 | #endif | 54 | #endif |
39 | 55 | ||
40 | unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) | 56 | unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) |
@@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) | |||
153 | unsigned long *table; | 169 | unsigned long *table; |
154 | unsigned long bits; | 170 | unsigned long bits; |
155 | 171 | ||
156 | bits = mm->context.noexec ? 3UL : 1UL; | 172 | bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; |
157 | spin_lock(&mm->page_table_lock); | 173 | spin_lock(&mm->page_table_lock); |
158 | page = NULL; | 174 | page = NULL; |
159 | if (!list_empty(&mm->context.pgtable_list)) { | 175 | if (!list_empty(&mm->context.pgtable_list)) { |
@@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct mm_struct *mm) | |||
170 | pgtable_page_ctor(page); | 186 | pgtable_page_ctor(page); |
171 | page->flags &= ~FRAG_MASK; | 187 | page->flags &= ~FRAG_MASK; |
172 | table = (unsigned long *) page_to_phys(page); | 188 | table = (unsigned long *) page_to_phys(page); |
173 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); | 189 | if (mm->context.pgstes) |
190 | clear_table_pgstes(table); | ||
191 | else | ||
192 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); | ||
174 | spin_lock(&mm->page_table_lock); | 193 | spin_lock(&mm->page_table_lock); |
175 | list_add(&page->lru, &mm->context.pgtable_list); | 194 | list_add(&page->lru, &mm->context.pgtable_list); |
176 | } | 195 | } |
@@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) | |||
191 | struct page *page; | 210 | struct page *page; |
192 | unsigned long bits; | 211 | unsigned long bits; |
193 | 212 | ||
194 | bits = mm->context.noexec ? 3UL : 1UL; | 213 | bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; |
195 | bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); | 214 | bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); |
196 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 215 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
197 | spin_lock(&mm->page_table_lock); | 216 | spin_lock(&mm->page_table_lock); |
@@ -228,3 +247,43 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) | |||
228 | mm->context.noexec = 0; | 247 | mm->context.noexec = 0; |
229 | update_mm(mm, tsk); | 248 | update_mm(mm, tsk); |
230 | } | 249 | } |
250 | |||
251 | /* | ||
252 | * switch on pgstes for its userspace process (for kvm) | ||
253 | */ | ||
254 | int s390_enable_sie(void) | ||
255 | { | ||
256 | struct task_struct *tsk = current; | ||
257 | struct mm_struct *mm; | ||
258 | int rc; | ||
259 | |||
260 | task_lock(tsk); | ||
261 | |||
262 | rc = 0; | ||
263 | if (tsk->mm->context.pgstes) | ||
264 | goto unlock; | ||
265 | |||
266 | rc = -EINVAL; | ||
267 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || | ||
268 | tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) | ||
269 | goto unlock; | ||
270 | |||
271 | tsk->mm->context.pgstes = 1; /* dirty little tricks .. */ | ||
272 | mm = dup_mm(tsk); | ||
273 | tsk->mm->context.pgstes = 0; | ||
274 | |||
275 | rc = -ENOMEM; | ||
276 | if (!mm) | ||
277 | goto unlock; | ||
278 | mmput(tsk->mm); | ||
279 | tsk->mm = tsk->active_mm = mm; | ||
280 | preempt_disable(); | ||
281 | update_mm(mm, tsk); | ||
282 | cpu_set(smp_processor_id(), mm->cpu_vm_mask); | ||
283 | preempt_enable(); | ||
284 | rc = 0; | ||
285 | unlock: | ||
286 | task_unlock(tsk); | ||
287 | return rc; | ||
288 | } | ||
289 | EXPORT_SYMBOL_GPL(s390_enable_sie); | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2fadf794483d..e5790fe9e330 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -373,6 +373,25 @@ config VMI | |||
373 | at the moment), by linking the kernel to a GPL-ed ROM module | 373 | at the moment), by linking the kernel to a GPL-ed ROM module |
374 | provided by the hypervisor. | 374 | provided by the hypervisor. |
375 | 375 | ||
376 | config KVM_CLOCK | ||
377 | bool "KVM paravirtualized clock" | ||
378 | select PARAVIRT | ||
379 | depends on !(X86_VISWS || X86_VOYAGER) | ||
380 | help | ||
381 | Turning on this option will allow you to run a paravirtualized clock | ||
382 | when running over the KVM hypervisor. Instead of relying on a PIT | ||
383 | (or probably other) emulation by the underlying device model, the host | ||
384 | provides the guest with timing infrastructure such as time of day, and | ||
385 | system time | ||
386 | |||
387 | config KVM_GUEST | ||
388 | bool "KVM Guest support" | ||
389 | select PARAVIRT | ||
390 | depends on !(X86_VISWS || X86_VOYAGER) | ||
391 | help | ||
392 | This option enables various optimizations for running under the KVM | ||
393 | hypervisor. | ||
394 | |||
376 | source "arch/x86/lguest/Kconfig" | 395 | source "arch/x86/lguest/Kconfig" |
377 | 396 | ||
378 | config PARAVIRT | 397 | config PARAVIRT |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 90e092d0af0c..fa19c3819540 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -80,6 +80,8 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o | |||
80 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o | 80 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o |
81 | 81 | ||
82 | obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o | 82 | obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o |
83 | obj-$(CONFIG_KVM_GUEST) += kvm.o | ||
84 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o | ||
83 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o | 85 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o |
84 | 86 | ||
85 | ifdef CONFIG_INPUT_PCSPKR | 87 | ifdef CONFIG_INPUT_PCSPKR |
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 2251d0ae9570..268553817909 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <asm/hpet.h> | 25 | #include <asm/hpet.h> |
26 | #include <linux/kdebug.h> | 26 | #include <linux/kdebug.h> |
27 | #include <asm/smp.h> | 27 | #include <asm/smp.h> |
28 | #include <asm/reboot.h> | ||
28 | 29 | ||
29 | #include <mach_ipi.h> | 30 | #include <mach_ipi.h> |
30 | 31 | ||
@@ -117,7 +118,7 @@ static void nmi_shootdown_cpus(void) | |||
117 | } | 118 | } |
118 | #endif | 119 | #endif |
119 | 120 | ||
120 | void machine_crash_shutdown(struct pt_regs *regs) | 121 | void native_machine_crash_shutdown(struct pt_regs *regs) |
121 | { | 122 | { |
122 | /* This function is only called after the system | 123 | /* This function is only called after the system |
123 | * has panicked or is otherwise in a critical state. | 124 | * has panicked or is otherwise in a critical state. |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c new file mode 100644 index 000000000000..8b7a3cf37d2b --- /dev/null +++ b/arch/x86/kernel/kvm.c | |||
@@ -0,0 +1,248 @@ | |||
1 | /* | ||
2 | * KVM paravirt_ops implementation | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
17 | * | ||
18 | * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | ||
19 | * Copyright IBM Corporation, 2007 | ||
20 | * Authors: Anthony Liguori <aliguori@us.ibm.com> | ||
21 | */ | ||
22 | |||
23 | #include <linux/module.h> | ||
24 | #include <linux/kernel.h> | ||
25 | #include <linux/kvm_para.h> | ||
26 | #include <linux/cpu.h> | ||
27 | #include <linux/mm.h> | ||
28 | #include <linux/highmem.h> | ||
29 | #include <linux/hardirq.h> | ||
30 | |||
31 | #define MMU_QUEUE_SIZE 1024 | ||
32 | |||
33 | struct kvm_para_state { | ||
34 | u8 mmu_queue[MMU_QUEUE_SIZE]; | ||
35 | int mmu_queue_len; | ||
36 | enum paravirt_lazy_mode mode; | ||
37 | }; | ||
38 | |||
39 | static DEFINE_PER_CPU(struct kvm_para_state, para_state); | ||
40 | |||
41 | static struct kvm_para_state *kvm_para_state(void) | ||
42 | { | ||
43 | return &per_cpu(para_state, raw_smp_processor_id()); | ||
44 | } | ||
45 | |||
46 | /* | ||
47 | * No need for any "IO delay" on KVM | ||
48 | */ | ||
49 | static void kvm_io_delay(void) | ||
50 | { | ||
51 | } | ||
52 | |||
53 | static void kvm_mmu_op(void *buffer, unsigned len) | ||
54 | { | ||
55 | int r; | ||
56 | unsigned long a1, a2; | ||
57 | |||
58 | do { | ||
59 | a1 = __pa(buffer); | ||
60 | a2 = 0; /* on i386 __pa() always returns <4G */ | ||
61 | r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2); | ||
62 | buffer += r; | ||
63 | len -= r; | ||
64 | } while (len); | ||
65 | } | ||
66 | |||
67 | static void mmu_queue_flush(struct kvm_para_state *state) | ||
68 | { | ||
69 | if (state->mmu_queue_len) { | ||
70 | kvm_mmu_op(state->mmu_queue, state->mmu_queue_len); | ||
71 | state->mmu_queue_len = 0; | ||
72 | } | ||
73 | } | ||
74 | |||
75 | static void kvm_deferred_mmu_op(void *buffer, int len) | ||
76 | { | ||
77 | struct kvm_para_state *state = kvm_para_state(); | ||
78 | |||
79 | if (state->mode != PARAVIRT_LAZY_MMU) { | ||
80 | kvm_mmu_op(buffer, len); | ||
81 | return; | ||
82 | } | ||
83 | if (state->mmu_queue_len + len > sizeof state->mmu_queue) | ||
84 | mmu_queue_flush(state); | ||
85 | memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len); | ||
86 | state->mmu_queue_len += len; | ||
87 | } | ||
88 | |||
89 | static void kvm_mmu_write(void *dest, u64 val) | ||
90 | { | ||
91 | __u64 pte_phys; | ||
92 | struct kvm_mmu_op_write_pte wpte; | ||
93 | |||
94 | #ifdef CONFIG_HIGHPTE | ||
95 | struct page *page; | ||
96 | unsigned long dst = (unsigned long) dest; | ||
97 | |||
98 | page = kmap_atomic_to_page(dest); | ||
99 | pte_phys = page_to_pfn(page); | ||
100 | pte_phys <<= PAGE_SHIFT; | ||
101 | pte_phys += (dst & ~(PAGE_MASK)); | ||
102 | #else | ||
103 | pte_phys = (unsigned long)__pa(dest); | ||
104 | #endif | ||
105 | wpte.header.op = KVM_MMU_OP_WRITE_PTE; | ||
106 | wpte.pte_val = val; | ||
107 | wpte.pte_phys = pte_phys; | ||
108 | |||
109 | kvm_deferred_mmu_op(&wpte, sizeof wpte); | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * We only need to hook operations that are MMU writes. We hook these so that | ||
114 | * we can use lazy MMU mode to batch these operations. We could probably | ||
115 | * improve the performance of the host code if we used some of the information | ||
116 | * here to simplify processing of batched writes. | ||
117 | */ | ||
118 | static void kvm_set_pte(pte_t *ptep, pte_t pte) | ||
119 | { | ||
120 | kvm_mmu_write(ptep, pte_val(pte)); | ||
121 | } | ||
122 | |||
123 | static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr, | ||
124 | pte_t *ptep, pte_t pte) | ||
125 | { | ||
126 | kvm_mmu_write(ptep, pte_val(pte)); | ||
127 | } | ||
128 | |||
129 | static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd) | ||
130 | { | ||
131 | kvm_mmu_write(pmdp, pmd_val(pmd)); | ||
132 | } | ||
133 | |||
134 | #if PAGETABLE_LEVELS >= 3 | ||
135 | #ifdef CONFIG_X86_PAE | ||
136 | static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte) | ||
137 | { | ||
138 | kvm_mmu_write(ptep, pte_val(pte)); | ||
139 | } | ||
140 | |||
141 | static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr, | ||
142 | pte_t *ptep, pte_t pte) | ||
143 | { | ||
144 | kvm_mmu_write(ptep, pte_val(pte)); | ||
145 | } | ||
146 | |||
147 | static void kvm_pte_clear(struct mm_struct *mm, | ||
148 | unsigned long addr, pte_t *ptep) | ||
149 | { | ||
150 | kvm_mmu_write(ptep, 0); | ||
151 | } | ||
152 | |||
153 | static void kvm_pmd_clear(pmd_t *pmdp) | ||
154 | { | ||
155 | kvm_mmu_write(pmdp, 0); | ||
156 | } | ||
157 | #endif | ||
158 | |||
159 | static void kvm_set_pud(pud_t *pudp, pud_t pud) | ||
160 | { | ||
161 | kvm_mmu_write(pudp, pud_val(pud)); | ||
162 | } | ||
163 | |||
164 | #if PAGETABLE_LEVELS == 4 | ||
165 | static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd) | ||
166 | { | ||
167 | kvm_mmu_write(pgdp, pgd_val(pgd)); | ||
168 | } | ||
169 | #endif | ||
170 | #endif /* PAGETABLE_LEVELS >= 3 */ | ||
171 | |||
172 | static void kvm_flush_tlb(void) | ||
173 | { | ||
174 | struct kvm_mmu_op_flush_tlb ftlb = { | ||
175 | .header.op = KVM_MMU_OP_FLUSH_TLB, | ||
176 | }; | ||
177 | |||
178 | kvm_deferred_mmu_op(&ftlb, sizeof ftlb); | ||
179 | } | ||
180 | |||
181 | static void kvm_release_pt(u32 pfn) | ||
182 | { | ||
183 | struct kvm_mmu_op_release_pt rpt = { | ||
184 | .header.op = KVM_MMU_OP_RELEASE_PT, | ||
185 | .pt_phys = (u64)pfn << PAGE_SHIFT, | ||
186 | }; | ||
187 | |||
188 | kvm_mmu_op(&rpt, sizeof rpt); | ||
189 | } | ||
190 | |||
191 | static void kvm_enter_lazy_mmu(void) | ||
192 | { | ||
193 | struct kvm_para_state *state = kvm_para_state(); | ||
194 | |||
195 | paravirt_enter_lazy_mmu(); | ||
196 | state->mode = paravirt_get_lazy_mode(); | ||
197 | } | ||
198 | |||
199 | static void kvm_leave_lazy_mmu(void) | ||
200 | { | ||
201 | struct kvm_para_state *state = kvm_para_state(); | ||
202 | |||
203 | mmu_queue_flush(state); | ||
204 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | ||
205 | state->mode = paravirt_get_lazy_mode(); | ||
206 | } | ||
207 | |||
208 | static void paravirt_ops_setup(void) | ||
209 | { | ||
210 | pv_info.name = "KVM"; | ||
211 | pv_info.paravirt_enabled = 1; | ||
212 | |||
213 | if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) | ||
214 | pv_cpu_ops.io_delay = kvm_io_delay; | ||
215 | |||
216 | if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) { | ||
217 | pv_mmu_ops.set_pte = kvm_set_pte; | ||
218 | pv_mmu_ops.set_pte_at = kvm_set_pte_at; | ||
219 | pv_mmu_ops.set_pmd = kvm_set_pmd; | ||
220 | #if PAGETABLE_LEVELS >= 3 | ||
221 | #ifdef CONFIG_X86_PAE | ||
222 | pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic; | ||
223 | pv_mmu_ops.set_pte_present = kvm_set_pte_present; | ||
224 | pv_mmu_ops.pte_clear = kvm_pte_clear; | ||
225 | pv_mmu_ops.pmd_clear = kvm_pmd_clear; | ||
226 | #endif | ||
227 | pv_mmu_ops.set_pud = kvm_set_pud; | ||
228 | #if PAGETABLE_LEVELS == 4 | ||
229 | pv_mmu_ops.set_pgd = kvm_set_pgd; | ||
230 | #endif | ||
231 | #endif | ||
232 | pv_mmu_ops.flush_tlb_user = kvm_flush_tlb; | ||
233 | pv_mmu_ops.release_pte = kvm_release_pt; | ||
234 | pv_mmu_ops.release_pmd = kvm_release_pt; | ||
235 | pv_mmu_ops.release_pud = kvm_release_pt; | ||
236 | |||
237 | pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; | ||
238 | pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; | ||
239 | } | ||
240 | } | ||
241 | |||
242 | void __init kvm_guest_init(void) | ||
243 | { | ||
244 | if (!kvm_para_available()) | ||
245 | return; | ||
246 | |||
247 | paravirt_ops_setup(); | ||
248 | } | ||
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c new file mode 100644 index 000000000000..ddee04043aeb --- /dev/null +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -0,0 +1,187 @@ | |||
1 | /* KVM paravirtual clock driver. A clocksource implementation | ||
2 | Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc. | ||
3 | |||
4 | This program is free software; you can redistribute it and/or modify | ||
5 | it under the terms of the GNU General Public License as published by | ||
6 | the Free Software Foundation; either version 2 of the License, or | ||
7 | (at your option) any later version. | ||
8 | |||
9 | This program is distributed in the hope that it will be useful, | ||
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | GNU General Public License for more details. | ||
13 | |||
14 | You should have received a copy of the GNU General Public License | ||
15 | along with this program; if not, write to the Free Software | ||
16 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | |||
19 | #include <linux/clocksource.h> | ||
20 | #include <linux/kvm_para.h> | ||
21 | #include <asm/arch_hooks.h> | ||
22 | #include <asm/msr.h> | ||
23 | #include <asm/apic.h> | ||
24 | #include <linux/percpu.h> | ||
25 | #include <asm/reboot.h> | ||
26 | |||
27 | #define KVM_SCALE 22 | ||
28 | |||
29 | static int kvmclock = 1; | ||
30 | |||
31 | static int parse_no_kvmclock(char *arg) | ||
32 | { | ||
33 | kvmclock = 0; | ||
34 | return 0; | ||
35 | } | ||
36 | early_param("no-kvmclock", parse_no_kvmclock); | ||
37 | |||
38 | /* The hypervisor will put information about time periodically here */ | ||
39 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock); | ||
40 | #define get_clock(cpu, field) per_cpu(hv_clock, cpu).field | ||
41 | |||
42 | static inline u64 kvm_get_delta(u64 last_tsc) | ||
43 | { | ||
44 | int cpu = smp_processor_id(); | ||
45 | u64 delta = native_read_tsc() - last_tsc; | ||
46 | return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE; | ||
47 | } | ||
48 | |||
49 | static struct kvm_wall_clock wall_clock; | ||
50 | static cycle_t kvm_clock_read(void); | ||
51 | /* | ||
52 | * The wallclock is the time of day when we booted. Since then, some time may | ||
53 | * have elapsed since the hypervisor wrote the data. So we try to account for | ||
54 | * that with system time | ||
55 | */ | ||
56 | unsigned long kvm_get_wallclock(void) | ||
57 | { | ||
58 | u32 wc_sec, wc_nsec; | ||
59 | u64 delta; | ||
60 | struct timespec ts; | ||
61 | int version, nsec; | ||
62 | int low, high; | ||
63 | |||
64 | low = (int)__pa(&wall_clock); | ||
65 | high = ((u64)__pa(&wall_clock) >> 32); | ||
66 | |||
67 | delta = kvm_clock_read(); | ||
68 | |||
69 | native_write_msr(MSR_KVM_WALL_CLOCK, low, high); | ||
70 | do { | ||
71 | version = wall_clock.wc_version; | ||
72 | rmb(); | ||
73 | wc_sec = wall_clock.wc_sec; | ||
74 | wc_nsec = wall_clock.wc_nsec; | ||
75 | rmb(); | ||
76 | } while ((wall_clock.wc_version != version) || (version & 1)); | ||
77 | |||
78 | delta = kvm_clock_read() - delta; | ||
79 | delta += wc_nsec; | ||
80 | nsec = do_div(delta, NSEC_PER_SEC); | ||
81 | set_normalized_timespec(&ts, wc_sec + delta, nsec); | ||
82 | /* | ||
83 | * Of all mechanisms of time adjustment I've tested, this one | ||
84 | * was the champion! | ||
85 | */ | ||
86 | return ts.tv_sec + 1; | ||
87 | } | ||
88 | |||
89 | int kvm_set_wallclock(unsigned long now) | ||
90 | { | ||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * This is our read_clock function. The host puts an tsc timestamp each time | ||
96 | * it updates a new time. Without the tsc adjustment, we can have a situation | ||
97 | * in which a vcpu starts to run earlier (smaller system_time), but probes | ||
98 | * time later (compared to another vcpu), leading to backwards time | ||
99 | */ | ||
100 | static cycle_t kvm_clock_read(void) | ||
101 | { | ||
102 | u64 last_tsc, now; | ||
103 | int cpu; | ||
104 | |||
105 | preempt_disable(); | ||
106 | cpu = smp_processor_id(); | ||
107 | |||
108 | last_tsc = get_clock(cpu, tsc_timestamp); | ||
109 | now = get_clock(cpu, system_time); | ||
110 | |||
111 | now += kvm_get_delta(last_tsc); | ||
112 | preempt_enable(); | ||
113 | |||
114 | return now; | ||
115 | } | ||
116 | static struct clocksource kvm_clock = { | ||
117 | .name = "kvm-clock", | ||
118 | .read = kvm_clock_read, | ||
119 | .rating = 400, | ||
120 | .mask = CLOCKSOURCE_MASK(64), | ||
121 | .mult = 1 << KVM_SCALE, | ||
122 | .shift = KVM_SCALE, | ||
123 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
124 | }; | ||
125 | |||
126 | static int kvm_register_clock(void) | ||
127 | { | ||
128 | int cpu = smp_processor_id(); | ||
129 | int low, high; | ||
130 | low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; | ||
131 | high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); | ||
132 | |||
133 | return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); | ||
134 | } | ||
135 | |||
136 | static void kvm_setup_secondary_clock(void) | ||
137 | { | ||
138 | /* | ||
139 | * Now that the first cpu already had this clocksource initialized, | ||
140 | * we shouldn't fail. | ||
141 | */ | ||
142 | WARN_ON(kvm_register_clock()); | ||
143 | /* ok, done with our trickery, call native */ | ||
144 | setup_secondary_APIC_clock(); | ||
145 | } | ||
146 | |||
147 | /* | ||
148 | * After the clock is registered, the host will keep writing to the | ||
149 | * registered memory location. If the guest happens to shutdown, this memory | ||
150 | * won't be valid. In cases like kexec, in which you install a new kernel, this | ||
151 | * means a random memory location will be kept being written. So before any | ||
152 | * kind of shutdown from our side, we unregister the clock by writting anything | ||
153 | * that does not have the 'enable' bit set in the msr | ||
154 | */ | ||
155 | #ifdef CONFIG_KEXEC | ||
156 | static void kvm_crash_shutdown(struct pt_regs *regs) | ||
157 | { | ||
158 | native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); | ||
159 | native_machine_crash_shutdown(regs); | ||
160 | } | ||
161 | #endif | ||
162 | |||
163 | static void kvm_shutdown(void) | ||
164 | { | ||
165 | native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); | ||
166 | native_machine_shutdown(); | ||
167 | } | ||
168 | |||
169 | void __init kvmclock_init(void) | ||
170 | { | ||
171 | if (!kvm_para_available()) | ||
172 | return; | ||
173 | |||
174 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { | ||
175 | if (kvm_register_clock()) | ||
176 | return; | ||
177 | pv_time_ops.get_wallclock = kvm_get_wallclock; | ||
178 | pv_time_ops.set_wallclock = kvm_set_wallclock; | ||
179 | pv_time_ops.sched_clock = kvm_clock_read; | ||
180 | pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; | ||
181 | machine_ops.shutdown = kvm_shutdown; | ||
182 | #ifdef CONFIG_KEXEC | ||
183 | machine_ops.crash_shutdown = kvm_crash_shutdown; | ||
184 | #endif | ||
185 | clocksource_register(&kvm_clock); | ||
186 | } | ||
187 | } | ||
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 1791a751a772..a4a838306b2c 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -399,7 +399,7 @@ static void native_machine_emergency_restart(void) | |||
399 | } | 399 | } |
400 | } | 400 | } |
401 | 401 | ||
402 | static void native_machine_shutdown(void) | 402 | void native_machine_shutdown(void) |
403 | { | 403 | { |
404 | /* Stop the cpus and apics */ | 404 | /* Stop the cpus and apics */ |
405 | #ifdef CONFIG_SMP | 405 | #ifdef CONFIG_SMP |
@@ -470,7 +470,10 @@ struct machine_ops machine_ops = { | |||
470 | .shutdown = native_machine_shutdown, | 470 | .shutdown = native_machine_shutdown, |
471 | .emergency_restart = native_machine_emergency_restart, | 471 | .emergency_restart = native_machine_emergency_restart, |
472 | .restart = native_machine_restart, | 472 | .restart = native_machine_restart, |
473 | .halt = native_machine_halt | 473 | .halt = native_machine_halt, |
474 | #ifdef CONFIG_KEXEC | ||
475 | .crash_shutdown = native_machine_crash_shutdown, | ||
476 | #endif | ||
474 | }; | 477 | }; |
475 | 478 | ||
476 | void machine_power_off(void) | 479 | void machine_power_off(void) |
@@ -498,3 +501,9 @@ void machine_halt(void) | |||
498 | machine_ops.halt(); | 501 | machine_ops.halt(); |
499 | } | 502 | } |
500 | 503 | ||
504 | #ifdef CONFIG_KEXEC | ||
505 | void machine_crash_shutdown(struct pt_regs *regs) | ||
506 | { | ||
507 | machine_ops.crash_shutdown(regs); | ||
508 | } | ||
509 | #endif | ||
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 44cc9b933932..2283422af794 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/pfn.h> | 47 | #include <linux/pfn.h> |
48 | #include <linux/pci.h> | 48 | #include <linux/pci.h> |
49 | #include <linux/init_ohci1394_dma.h> | 49 | #include <linux/init_ohci1394_dma.h> |
50 | #include <linux/kvm_para.h> | ||
50 | 51 | ||
51 | #include <video/edid.h> | 52 | #include <video/edid.h> |
52 | 53 | ||
@@ -820,6 +821,10 @@ void __init setup_arch(char **cmdline_p) | |||
820 | 821 | ||
821 | max_low_pfn = setup_memory(); | 822 | max_low_pfn = setup_memory(); |
822 | 823 | ||
824 | #ifdef CONFIG_KVM_CLOCK | ||
825 | kvmclock_init(); | ||
826 | #endif | ||
827 | |||
823 | #ifdef CONFIG_VMI | 828 | #ifdef CONFIG_VMI |
824 | /* | 829 | /* |
825 | * Must be after max_low_pfn is determined, and before kernel | 830 | * Must be after max_low_pfn is determined, and before kernel |
@@ -827,6 +832,7 @@ void __init setup_arch(char **cmdline_p) | |||
827 | */ | 832 | */ |
828 | vmi_init(); | 833 | vmi_init(); |
829 | #endif | 834 | #endif |
835 | kvm_guest_init(); | ||
830 | 836 | ||
831 | /* | 837 | /* |
832 | * NOTE: before this point _nobody_ is allowed to allocate | 838 | * NOTE: before this point _nobody_ is allowed to allocate |
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 60e64c8eee92..a94fb959a87a 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <linux/ctype.h> | 42 | #include <linux/ctype.h> |
43 | #include <linux/uaccess.h> | 43 | #include <linux/uaccess.h> |
44 | #include <linux/init_ohci1394_dma.h> | 44 | #include <linux/init_ohci1394_dma.h> |
45 | #include <linux/kvm_para.h> | ||
45 | 46 | ||
46 | #include <asm/mtrr.h> | 47 | #include <asm/mtrr.h> |
47 | #include <asm/uaccess.h> | 48 | #include <asm/uaccess.h> |
@@ -384,6 +385,10 @@ void __init setup_arch(char **cmdline_p) | |||
384 | 385 | ||
385 | io_delay_init(); | 386 | io_delay_init(); |
386 | 387 | ||
388 | #ifdef CONFIG_KVM_CLOCK | ||
389 | kvmclock_init(); | ||
390 | #endif | ||
391 | |||
387 | #ifdef CONFIG_SMP | 392 | #ifdef CONFIG_SMP |
388 | /* setup to use the early static init tables during kernel startup */ | 393 | /* setup to use the early static init tables during kernel startup */ |
389 | x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; | 394 | x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; |
@@ -488,6 +493,8 @@ void __init setup_arch(char **cmdline_p) | |||
488 | init_apic_mappings(); | 493 | init_apic_mappings(); |
489 | ioapic_init_mappings(); | 494 | ioapic_init_mappings(); |
490 | 495 | ||
496 | kvm_guest_init(); | ||
497 | |||
491 | /* | 498 | /* |
492 | * We trust e820 completely. No explicit ROM probing in memory. | 499 | * We trust e820 completely. No explicit ROM probing in memory. |
493 | */ | 500 | */ |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 41962e793c0f..8d45fabc5f3b 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -19,7 +19,7 @@ if VIRTUALIZATION | |||
19 | 19 | ||
20 | config KVM | 20 | config KVM |
21 | tristate "Kernel-based Virtual Machine (KVM) support" | 21 | tristate "Kernel-based Virtual Machine (KVM) support" |
22 | depends on HAVE_KVM && EXPERIMENTAL | 22 | depends on HAVE_KVM |
23 | select PREEMPT_NOTIFIERS | 23 | select PREEMPT_NOTIFIERS |
24 | select ANON_INODES | 24 | select ANON_INODES |
25 | ---help--- | 25 | ---help--- |
@@ -50,6 +50,17 @@ config KVM_AMD | |||
50 | Provides support for KVM on AMD processors equipped with the AMD-V | 50 | Provides support for KVM on AMD processors equipped with the AMD-V |
51 | (SVM) extensions. | 51 | (SVM) extensions. |
52 | 52 | ||
53 | config KVM_TRACE | ||
54 | bool "KVM trace support" | ||
55 | depends on KVM && MARKERS && SYSFS | ||
56 | select RELAY | ||
57 | select DEBUG_FS | ||
58 | default n | ||
59 | ---help--- | ||
60 | This option allows reading a trace of kvm-related events through | ||
61 | relayfs. Note the ABI is not considered stable and will be | ||
62 | modified in future updates. | ||
63 | |||
53 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under | 64 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under |
54 | # the virtualization menu. | 65 | # the virtualization menu. |
55 | source drivers/lguest/Kconfig | 66 | source drivers/lguest/Kconfig |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index ffdd0b310784..c97d35c218db 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -3,10 +3,14 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) | 5 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) |
6 | ifeq ($(CONFIG_KVM_TRACE),y) | ||
7 | common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) | ||
8 | endif | ||
6 | 9 | ||
7 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm | 10 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm |
8 | 11 | ||
9 | kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o | 12 | kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \ |
13 | i8254.o | ||
10 | obj-$(CONFIG_KVM) += kvm.o | 14 | obj-$(CONFIG_KVM) += kvm.o |
11 | kvm-intel-objs = vmx.o | 15 | kvm-intel-objs = vmx.o |
12 | obj-$(CONFIG_KVM_INTEL) += kvm-intel.o | 16 | obj-$(CONFIG_KVM_INTEL) += kvm-intel.o |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c new file mode 100644 index 000000000000..361e31611276 --- /dev/null +++ b/arch/x86/kvm/i8254.c | |||
@@ -0,0 +1,611 @@ | |||
1 | /* | ||
2 | * 8253/8254 interval timer emulation | ||
3 | * | ||
4 | * Copyright (c) 2003-2004 Fabrice Bellard | ||
5 | * Copyright (c) 2006 Intel Corporation | ||
6 | * Copyright (c) 2007 Keir Fraser, XenSource Inc | ||
7 | * Copyright (c) 2008 Intel Corporation | ||
8 | * | ||
9 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
10 | * of this software and associated documentation files (the "Software"), to deal | ||
11 | * in the Software without restriction, including without limitation the rights | ||
12 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
13 | * copies of the Software, and to permit persons to whom the Software is | ||
14 | * furnished to do so, subject to the following conditions: | ||
15 | * | ||
16 | * The above copyright notice and this permission notice shall be included in | ||
17 | * all copies or substantial portions of the Software. | ||
18 | * | ||
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
22 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
23 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
24 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
25 | * THE SOFTWARE. | ||
26 | * | ||
27 | * Authors: | ||
28 | * Sheng Yang <sheng.yang@intel.com> | ||
29 | * Based on QEMU and Xen. | ||
30 | */ | ||
31 | |||
32 | #include <linux/kvm_host.h> | ||
33 | |||
34 | #include "irq.h" | ||
35 | #include "i8254.h" | ||
36 | |||
37 | #ifndef CONFIG_X86_64 | ||
38 | #define mod_64(x, y) ((x) - (y) * div64_64(x, y)) | ||
39 | #else | ||
40 | #define mod_64(x, y) ((x) % (y)) | ||
41 | #endif | ||
42 | |||
43 | #define RW_STATE_LSB 1 | ||
44 | #define RW_STATE_MSB 2 | ||
45 | #define RW_STATE_WORD0 3 | ||
46 | #define RW_STATE_WORD1 4 | ||
47 | |||
48 | /* Compute with 96 bit intermediate result: (a*b)/c */ | ||
49 | static u64 muldiv64(u64 a, u32 b, u32 c) | ||
50 | { | ||
51 | union { | ||
52 | u64 ll; | ||
53 | struct { | ||
54 | u32 low, high; | ||
55 | } l; | ||
56 | } u, res; | ||
57 | u64 rl, rh; | ||
58 | |||
59 | u.ll = a; | ||
60 | rl = (u64)u.l.low * (u64)b; | ||
61 | rh = (u64)u.l.high * (u64)b; | ||
62 | rh += (rl >> 32); | ||
63 | res.l.high = div64_64(rh, c); | ||
64 | res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c); | ||
65 | return res.ll; | ||
66 | } | ||
67 | |||
68 | static void pit_set_gate(struct kvm *kvm, int channel, u32 val) | ||
69 | { | ||
70 | struct kvm_kpit_channel_state *c = | ||
71 | &kvm->arch.vpit->pit_state.channels[channel]; | ||
72 | |||
73 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); | ||
74 | |||
75 | switch (c->mode) { | ||
76 | default: | ||
77 | case 0: | ||
78 | case 4: | ||
79 | /* XXX: just disable/enable counting */ | ||
80 | break; | ||
81 | case 1: | ||
82 | case 2: | ||
83 | case 3: | ||
84 | case 5: | ||
85 | /* Restart counting on rising edge. */ | ||
86 | if (c->gate < val) | ||
87 | c->count_load_time = ktime_get(); | ||
88 | break; | ||
89 | } | ||
90 | |||
91 | c->gate = val; | ||
92 | } | ||
93 | |||
94 | int pit_get_gate(struct kvm *kvm, int channel) | ||
95 | { | ||
96 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); | ||
97 | |||
98 | return kvm->arch.vpit->pit_state.channels[channel].gate; | ||
99 | } | ||
100 | |||
101 | static int pit_get_count(struct kvm *kvm, int channel) | ||
102 | { | ||
103 | struct kvm_kpit_channel_state *c = | ||
104 | &kvm->arch.vpit->pit_state.channels[channel]; | ||
105 | s64 d, t; | ||
106 | int counter; | ||
107 | |||
108 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); | ||
109 | |||
110 | t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); | ||
111 | d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); | ||
112 | |||
113 | switch (c->mode) { | ||
114 | case 0: | ||
115 | case 1: | ||
116 | case 4: | ||
117 | case 5: | ||
118 | counter = (c->count - d) & 0xffff; | ||
119 | break; | ||
120 | case 3: | ||
121 | /* XXX: may be incorrect for odd counts */ | ||
122 | counter = c->count - (mod_64((2 * d), c->count)); | ||
123 | break; | ||
124 | default: | ||
125 | counter = c->count - mod_64(d, c->count); | ||
126 | break; | ||
127 | } | ||
128 | return counter; | ||
129 | } | ||
130 | |||
131 | static int pit_get_out(struct kvm *kvm, int channel) | ||
132 | { | ||
133 | struct kvm_kpit_channel_state *c = | ||
134 | &kvm->arch.vpit->pit_state.channels[channel]; | ||
135 | s64 d, t; | ||
136 | int out; | ||
137 | |||
138 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); | ||
139 | |||
140 | t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); | ||
141 | d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); | ||
142 | |||
143 | switch (c->mode) { | ||
144 | default: | ||
145 | case 0: | ||
146 | out = (d >= c->count); | ||
147 | break; | ||
148 | case 1: | ||
149 | out = (d < c->count); | ||
150 | break; | ||
151 | case 2: | ||
152 | out = ((mod_64(d, c->count) == 0) && (d != 0)); | ||
153 | break; | ||
154 | case 3: | ||
155 | out = (mod_64(d, c->count) < ((c->count + 1) >> 1)); | ||
156 | break; | ||
157 | case 4: | ||
158 | case 5: | ||
159 | out = (d == c->count); | ||
160 | break; | ||
161 | } | ||
162 | |||
163 | return out; | ||
164 | } | ||
165 | |||
166 | static void pit_latch_count(struct kvm *kvm, int channel) | ||
167 | { | ||
168 | struct kvm_kpit_channel_state *c = | ||
169 | &kvm->arch.vpit->pit_state.channels[channel]; | ||
170 | |||
171 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); | ||
172 | |||
173 | if (!c->count_latched) { | ||
174 | c->latched_count = pit_get_count(kvm, channel); | ||
175 | c->count_latched = c->rw_mode; | ||
176 | } | ||
177 | } | ||
178 | |||
179 | static void pit_latch_status(struct kvm *kvm, int channel) | ||
180 | { | ||
181 | struct kvm_kpit_channel_state *c = | ||
182 | &kvm->arch.vpit->pit_state.channels[channel]; | ||
183 | |||
184 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); | ||
185 | |||
186 | if (!c->status_latched) { | ||
187 | /* TODO: Return NULL COUNT (bit 6). */ | ||
188 | c->status = ((pit_get_out(kvm, channel) << 7) | | ||
189 | (c->rw_mode << 4) | | ||
190 | (c->mode << 1) | | ||
191 | c->bcd); | ||
192 | c->status_latched = 1; | ||
193 | } | ||
194 | } | ||
195 | |||
196 | int __pit_timer_fn(struct kvm_kpit_state *ps) | ||
197 | { | ||
198 | struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0]; | ||
199 | struct kvm_kpit_timer *pt = &ps->pit_timer; | ||
200 | |||
201 | atomic_inc(&pt->pending); | ||
202 | smp_mb__after_atomic_inc(); | ||
203 | /* FIXME: handle case where the guest is in guest mode */ | ||
204 | if (vcpu0 && waitqueue_active(&vcpu0->wq)) { | ||
205 | vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
206 | wake_up_interruptible(&vcpu0->wq); | ||
207 | } | ||
208 | |||
209 | pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); | ||
210 | pt->scheduled = ktime_to_ns(pt->timer.expires); | ||
211 | |||
212 | return (pt->period == 0 ? 0 : 1); | ||
213 | } | ||
214 | |||
215 | int pit_has_pending_timer(struct kvm_vcpu *vcpu) | ||
216 | { | ||
217 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; | ||
218 | |||
219 | if (pit && vcpu->vcpu_id == 0) | ||
220 | return atomic_read(&pit->pit_state.pit_timer.pending); | ||
221 | |||
222 | return 0; | ||
223 | } | ||
224 | |||
225 | static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) | ||
226 | { | ||
227 | struct kvm_kpit_state *ps; | ||
228 | int restart_timer = 0; | ||
229 | |||
230 | ps = container_of(data, struct kvm_kpit_state, pit_timer.timer); | ||
231 | |||
232 | restart_timer = __pit_timer_fn(ps); | ||
233 | |||
234 | if (restart_timer) | ||
235 | return HRTIMER_RESTART; | ||
236 | else | ||
237 | return HRTIMER_NORESTART; | ||
238 | } | ||
239 | |||
240 | static void destroy_pit_timer(struct kvm_kpit_timer *pt) | ||
241 | { | ||
242 | pr_debug("pit: execute del timer!\n"); | ||
243 | hrtimer_cancel(&pt->timer); | ||
244 | } | ||
245 | |||
246 | static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period) | ||
247 | { | ||
248 | s64 interval; | ||
249 | |||
250 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); | ||
251 | |||
252 | pr_debug("pit: create pit timer, interval is %llu nsec\n", interval); | ||
253 | |||
254 | /* TODO The new value only affected after the retriggered */ | ||
255 | hrtimer_cancel(&pt->timer); | ||
256 | pt->period = (is_period == 0) ? 0 : interval; | ||
257 | pt->timer.function = pit_timer_fn; | ||
258 | atomic_set(&pt->pending, 0); | ||
259 | |||
260 | hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval), | ||
261 | HRTIMER_MODE_ABS); | ||
262 | } | ||
263 | |||
264 | static void pit_load_count(struct kvm *kvm, int channel, u32 val) | ||
265 | { | ||
266 | struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; | ||
267 | |||
268 | WARN_ON(!mutex_is_locked(&ps->lock)); | ||
269 | |||
270 | pr_debug("pit: load_count val is %d, channel is %d\n", val, channel); | ||
271 | |||
272 | /* | ||
273 | * Though spec said the state of 8254 is undefined after power-up, | ||
274 | * seems some tricky OS like Windows XP depends on IRQ0 interrupt | ||
275 | * when booting up. | ||
276 | * So here setting initialize rate for it, and not a specific number | ||
277 | */ | ||
278 | if (val == 0) | ||
279 | val = 0x10000; | ||
280 | |||
281 | ps->channels[channel].count_load_time = ktime_get(); | ||
282 | ps->channels[channel].count = val; | ||
283 | |||
284 | if (channel != 0) | ||
285 | return; | ||
286 | |||
287 | /* Two types of timer | ||
288 | * mode 1 is one shot, mode 2 is period, otherwise del timer */ | ||
289 | switch (ps->channels[0].mode) { | ||
290 | case 1: | ||
291 | create_pit_timer(&ps->pit_timer, val, 0); | ||
292 | break; | ||
293 | case 2: | ||
294 | create_pit_timer(&ps->pit_timer, val, 1); | ||
295 | break; | ||
296 | default: | ||
297 | destroy_pit_timer(&ps->pit_timer); | ||
298 | } | ||
299 | } | ||
300 | |||
301 | void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val) | ||
302 | { | ||
303 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | ||
304 | pit_load_count(kvm, channel, val); | ||
305 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | ||
306 | } | ||
307 | |||
308 | static void pit_ioport_write(struct kvm_io_device *this, | ||
309 | gpa_t addr, int len, const void *data) | ||
310 | { | ||
311 | struct kvm_pit *pit = (struct kvm_pit *)this->private; | ||
312 | struct kvm_kpit_state *pit_state = &pit->pit_state; | ||
313 | struct kvm *kvm = pit->kvm; | ||
314 | int channel, access; | ||
315 | struct kvm_kpit_channel_state *s; | ||
316 | u32 val = *(u32 *) data; | ||
317 | |||
318 | val &= 0xff; | ||
319 | addr &= KVM_PIT_CHANNEL_MASK; | ||
320 | |||
321 | mutex_lock(&pit_state->lock); | ||
322 | |||
323 | if (val != 0) | ||
324 | pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n", | ||
325 | (unsigned int)addr, len, val); | ||
326 | |||
327 | if (addr == 3) { | ||
328 | channel = val >> 6; | ||
329 | if (channel == 3) { | ||
330 | /* Read-Back Command. */ | ||
331 | for (channel = 0; channel < 3; channel++) { | ||
332 | s = &pit_state->channels[channel]; | ||
333 | if (val & (2 << channel)) { | ||
334 | if (!(val & 0x20)) | ||
335 | pit_latch_count(kvm, channel); | ||
336 | if (!(val & 0x10)) | ||
337 | pit_latch_status(kvm, channel); | ||
338 | } | ||
339 | } | ||
340 | } else { | ||
341 | /* Select Counter <channel>. */ | ||
342 | s = &pit_state->channels[channel]; | ||
343 | access = (val >> 4) & KVM_PIT_CHANNEL_MASK; | ||
344 | if (access == 0) { | ||
345 | pit_latch_count(kvm, channel); | ||
346 | } else { | ||
347 | s->rw_mode = access; | ||
348 | s->read_state = access; | ||
349 | s->write_state = access; | ||
350 | s->mode = (val >> 1) & 7; | ||
351 | if (s->mode > 5) | ||
352 | s->mode -= 4; | ||
353 | s->bcd = val & 1; | ||
354 | } | ||
355 | } | ||
356 | } else { | ||
357 | /* Write Count. */ | ||
358 | s = &pit_state->channels[addr]; | ||
359 | switch (s->write_state) { | ||
360 | default: | ||
361 | case RW_STATE_LSB: | ||
362 | pit_load_count(kvm, addr, val); | ||
363 | break; | ||
364 | case RW_STATE_MSB: | ||
365 | pit_load_count(kvm, addr, val << 8); | ||
366 | break; | ||
367 | case RW_STATE_WORD0: | ||
368 | s->write_latch = val; | ||
369 | s->write_state = RW_STATE_WORD1; | ||
370 | break; | ||
371 | case RW_STATE_WORD1: | ||
372 | pit_load_count(kvm, addr, s->write_latch | (val << 8)); | ||
373 | s->write_state = RW_STATE_WORD0; | ||
374 | break; | ||
375 | } | ||
376 | } | ||
377 | |||
378 | mutex_unlock(&pit_state->lock); | ||
379 | } | ||
380 | |||
381 | static void pit_ioport_read(struct kvm_io_device *this, | ||
382 | gpa_t addr, int len, void *data) | ||
383 | { | ||
384 | struct kvm_pit *pit = (struct kvm_pit *)this->private; | ||
385 | struct kvm_kpit_state *pit_state = &pit->pit_state; | ||
386 | struct kvm *kvm = pit->kvm; | ||
387 | int ret, count; | ||
388 | struct kvm_kpit_channel_state *s; | ||
389 | |||
390 | addr &= KVM_PIT_CHANNEL_MASK; | ||
391 | s = &pit_state->channels[addr]; | ||
392 | |||
393 | mutex_lock(&pit_state->lock); | ||
394 | |||
395 | if (s->status_latched) { | ||
396 | s->status_latched = 0; | ||
397 | ret = s->status; | ||
398 | } else if (s->count_latched) { | ||
399 | switch (s->count_latched) { | ||
400 | default: | ||
401 | case RW_STATE_LSB: | ||
402 | ret = s->latched_count & 0xff; | ||
403 | s->count_latched = 0; | ||
404 | break; | ||
405 | case RW_STATE_MSB: | ||
406 | ret = s->latched_count >> 8; | ||
407 | s->count_latched = 0; | ||
408 | break; | ||
409 | case RW_STATE_WORD0: | ||
410 | ret = s->latched_count & 0xff; | ||
411 | s->count_latched = RW_STATE_MSB; | ||
412 | break; | ||
413 | } | ||
414 | } else { | ||
415 | switch (s->read_state) { | ||
416 | default: | ||
417 | case RW_STATE_LSB: | ||
418 | count = pit_get_count(kvm, addr); | ||
419 | ret = count & 0xff; | ||
420 | break; | ||
421 | case RW_STATE_MSB: | ||
422 | count = pit_get_count(kvm, addr); | ||
423 | ret = (count >> 8) & 0xff; | ||
424 | break; | ||
425 | case RW_STATE_WORD0: | ||
426 | count = pit_get_count(kvm, addr); | ||
427 | ret = count & 0xff; | ||
428 | s->read_state = RW_STATE_WORD1; | ||
429 | break; | ||
430 | case RW_STATE_WORD1: | ||
431 | count = pit_get_count(kvm, addr); | ||
432 | ret = (count >> 8) & 0xff; | ||
433 | s->read_state = RW_STATE_WORD0; | ||
434 | break; | ||
435 | } | ||
436 | } | ||
437 | |||
438 | if (len > sizeof(ret)) | ||
439 | len = sizeof(ret); | ||
440 | memcpy(data, (char *)&ret, len); | ||
441 | |||
442 | mutex_unlock(&pit_state->lock); | ||
443 | } | ||
444 | |||
445 | static int pit_in_range(struct kvm_io_device *this, gpa_t addr) | ||
446 | { | ||
447 | return ((addr >= KVM_PIT_BASE_ADDRESS) && | ||
448 | (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH)); | ||
449 | } | ||
450 | |||
451 | static void speaker_ioport_write(struct kvm_io_device *this, | ||
452 | gpa_t addr, int len, const void *data) | ||
453 | { | ||
454 | struct kvm_pit *pit = (struct kvm_pit *)this->private; | ||
455 | struct kvm_kpit_state *pit_state = &pit->pit_state; | ||
456 | struct kvm *kvm = pit->kvm; | ||
457 | u32 val = *(u32 *) data; | ||
458 | |||
459 | mutex_lock(&pit_state->lock); | ||
460 | pit_state->speaker_data_on = (val >> 1) & 1; | ||
461 | pit_set_gate(kvm, 2, val & 1); | ||
462 | mutex_unlock(&pit_state->lock); | ||
463 | } | ||
464 | |||
465 | static void speaker_ioport_read(struct kvm_io_device *this, | ||
466 | gpa_t addr, int len, void *data) | ||
467 | { | ||
468 | struct kvm_pit *pit = (struct kvm_pit *)this->private; | ||
469 | struct kvm_kpit_state *pit_state = &pit->pit_state; | ||
470 | struct kvm *kvm = pit->kvm; | ||
471 | unsigned int refresh_clock; | ||
472 | int ret; | ||
473 | |||
474 | /* Refresh clock toggles at about 15us. We approximate as 2^14ns. */ | ||
475 | refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1; | ||
476 | |||
477 | mutex_lock(&pit_state->lock); | ||
478 | ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) | | ||
479 | (pit_get_out(kvm, 2) << 5) | (refresh_clock << 4)); | ||
480 | if (len > sizeof(ret)) | ||
481 | len = sizeof(ret); | ||
482 | memcpy(data, (char *)&ret, len); | ||
483 | mutex_unlock(&pit_state->lock); | ||
484 | } | ||
485 | |||
486 | static int speaker_in_range(struct kvm_io_device *this, gpa_t addr) | ||
487 | { | ||
488 | return (addr == KVM_SPEAKER_BASE_ADDRESS); | ||
489 | } | ||
490 | |||
491 | void kvm_pit_reset(struct kvm_pit *pit) | ||
492 | { | ||
493 | int i; | ||
494 | struct kvm_kpit_channel_state *c; | ||
495 | |||
496 | mutex_lock(&pit->pit_state.lock); | ||
497 | for (i = 0; i < 3; i++) { | ||
498 | c = &pit->pit_state.channels[i]; | ||
499 | c->mode = 0xff; | ||
500 | c->gate = (i != 2); | ||
501 | pit_load_count(pit->kvm, i, 0); | ||
502 | } | ||
503 | mutex_unlock(&pit->pit_state.lock); | ||
504 | |||
505 | atomic_set(&pit->pit_state.pit_timer.pending, 0); | ||
506 | pit->pit_state.inject_pending = 1; | ||
507 | } | ||
508 | |||
509 | struct kvm_pit *kvm_create_pit(struct kvm *kvm) | ||
510 | { | ||
511 | struct kvm_pit *pit; | ||
512 | struct kvm_kpit_state *pit_state; | ||
513 | |||
514 | pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL); | ||
515 | if (!pit) | ||
516 | return NULL; | ||
517 | |||
518 | mutex_init(&pit->pit_state.lock); | ||
519 | mutex_lock(&pit->pit_state.lock); | ||
520 | |||
521 | /* Initialize PIO device */ | ||
522 | pit->dev.read = pit_ioport_read; | ||
523 | pit->dev.write = pit_ioport_write; | ||
524 | pit->dev.in_range = pit_in_range; | ||
525 | pit->dev.private = pit; | ||
526 | kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev); | ||
527 | |||
528 | pit->speaker_dev.read = speaker_ioport_read; | ||
529 | pit->speaker_dev.write = speaker_ioport_write; | ||
530 | pit->speaker_dev.in_range = speaker_in_range; | ||
531 | pit->speaker_dev.private = pit; | ||
532 | kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev); | ||
533 | |||
534 | kvm->arch.vpit = pit; | ||
535 | pit->kvm = kvm; | ||
536 | |||
537 | pit_state = &pit->pit_state; | ||
538 | pit_state->pit = pit; | ||
539 | hrtimer_init(&pit_state->pit_timer.timer, | ||
540 | CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
541 | mutex_unlock(&pit->pit_state.lock); | ||
542 | |||
543 | kvm_pit_reset(pit); | ||
544 | |||
545 | return pit; | ||
546 | } | ||
547 | |||
548 | void kvm_free_pit(struct kvm *kvm) | ||
549 | { | ||
550 | struct hrtimer *timer; | ||
551 | |||
552 | if (kvm->arch.vpit) { | ||
553 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | ||
554 | timer = &kvm->arch.vpit->pit_state.pit_timer.timer; | ||
555 | hrtimer_cancel(timer); | ||
556 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | ||
557 | kfree(kvm->arch.vpit); | ||
558 | } | ||
559 | } | ||
560 | |||
561 | void __inject_pit_timer_intr(struct kvm *kvm) | ||
562 | { | ||
563 | mutex_lock(&kvm->lock); | ||
564 | kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1); | ||
565 | kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0); | ||
566 | kvm_pic_set_irq(pic_irqchip(kvm), 0, 1); | ||
567 | kvm_pic_set_irq(pic_irqchip(kvm), 0, 0); | ||
568 | mutex_unlock(&kvm->lock); | ||
569 | } | ||
570 | |||
571 | void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) | ||
572 | { | ||
573 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; | ||
574 | struct kvm *kvm = vcpu->kvm; | ||
575 | struct kvm_kpit_state *ps; | ||
576 | |||
577 | if (vcpu && pit) { | ||
578 | ps = &pit->pit_state; | ||
579 | |||
580 | /* Try to inject pending interrupts when: | ||
581 | * 1. Pending exists | ||
582 | * 2. Last interrupt was accepted or waited for too long time*/ | ||
583 | if (atomic_read(&ps->pit_timer.pending) && | ||
584 | (ps->inject_pending || | ||
585 | (jiffies - ps->last_injected_time | ||
586 | >= KVM_MAX_PIT_INTR_INTERVAL))) { | ||
587 | ps->inject_pending = 0; | ||
588 | __inject_pit_timer_intr(kvm); | ||
589 | ps->last_injected_time = jiffies; | ||
590 | } | ||
591 | } | ||
592 | } | ||
593 | |||
594 | void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec) | ||
595 | { | ||
596 | struct kvm_arch *arch = &vcpu->kvm->arch; | ||
597 | struct kvm_kpit_state *ps; | ||
598 | |||
599 | if (vcpu && arch->vpit) { | ||
600 | ps = &arch->vpit->pit_state; | ||
601 | if (atomic_read(&ps->pit_timer.pending) && | ||
602 | (((arch->vpic->pics[0].imr & 1) == 0 && | ||
603 | arch->vpic->pics[0].irq_base == vec) || | ||
604 | (arch->vioapic->redirtbl[0].fields.vector == vec && | ||
605 | arch->vioapic->redirtbl[0].fields.mask != 1))) { | ||
606 | ps->inject_pending = 1; | ||
607 | atomic_dec(&ps->pit_timer.pending); | ||
608 | ps->channels[0].count_load_time = ktime_get(); | ||
609 | } | ||
610 | } | ||
611 | } | ||
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h new file mode 100644 index 000000000000..db25c2a6c8c4 --- /dev/null +++ b/arch/x86/kvm/i8254.h | |||
@@ -0,0 +1,63 @@ | |||
1 | #ifndef __I8254_H | ||
2 | #define __I8254_H | ||
3 | |||
4 | #include "iodev.h" | ||
5 | |||
6 | struct kvm_kpit_timer { | ||
7 | struct hrtimer timer; | ||
8 | int irq; | ||
9 | s64 period; /* unit: ns */ | ||
10 | s64 scheduled; | ||
11 | ktime_t last_update; | ||
12 | atomic_t pending; | ||
13 | }; | ||
14 | |||
15 | struct kvm_kpit_channel_state { | ||
16 | u32 count; /* can be 65536 */ | ||
17 | u16 latched_count; | ||
18 | u8 count_latched; | ||
19 | u8 status_latched; | ||
20 | u8 status; | ||
21 | u8 read_state; | ||
22 | u8 write_state; | ||
23 | u8 write_latch; | ||
24 | u8 rw_mode; | ||
25 | u8 mode; | ||
26 | u8 bcd; /* not supported */ | ||
27 | u8 gate; /* timer start */ | ||
28 | ktime_t count_load_time; | ||
29 | }; | ||
30 | |||
31 | struct kvm_kpit_state { | ||
32 | struct kvm_kpit_channel_state channels[3]; | ||
33 | struct kvm_kpit_timer pit_timer; | ||
34 | u32 speaker_data_on; | ||
35 | struct mutex lock; | ||
36 | struct kvm_pit *pit; | ||
37 | bool inject_pending; /* if inject pending interrupts */ | ||
38 | unsigned long last_injected_time; | ||
39 | }; | ||
40 | |||
41 | struct kvm_pit { | ||
42 | unsigned long base_addresss; | ||
43 | struct kvm_io_device dev; | ||
44 | struct kvm_io_device speaker_dev; | ||
45 | struct kvm *kvm; | ||
46 | struct kvm_kpit_state pit_state; | ||
47 | }; | ||
48 | |||
49 | #define KVM_PIT_BASE_ADDRESS 0x40 | ||
50 | #define KVM_SPEAKER_BASE_ADDRESS 0x61 | ||
51 | #define KVM_PIT_MEM_LENGTH 4 | ||
52 | #define KVM_PIT_FREQ 1193181 | ||
53 | #define KVM_MAX_PIT_INTR_INTERVAL HZ / 100 | ||
54 | #define KVM_PIT_CHANNEL_MASK 0x3 | ||
55 | |||
56 | void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu); | ||
57 | void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec); | ||
58 | void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val); | ||
59 | struct kvm_pit *kvm_create_pit(struct kvm *kvm); | ||
60 | void kvm_free_pit(struct kvm *kvm); | ||
61 | void kvm_pit_reset(struct kvm_pit *pit); | ||
62 | |||
63 | #endif | ||
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index e5714759e97f..ce1f583459b1 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c | |||
@@ -23,6 +23,22 @@ | |||
23 | #include <linux/kvm_host.h> | 23 | #include <linux/kvm_host.h> |
24 | 24 | ||
25 | #include "irq.h" | 25 | #include "irq.h" |
26 | #include "i8254.h" | ||
27 | |||
28 | /* | ||
29 | * check if there are pending timer events | ||
30 | * to be processed. | ||
31 | */ | ||
32 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | ||
33 | { | ||
34 | int ret; | ||
35 | |||
36 | ret = pit_has_pending_timer(vcpu); | ||
37 | ret |= apic_has_pending_timer(vcpu); | ||
38 | |||
39 | return ret; | ||
40 | } | ||
41 | EXPORT_SYMBOL(kvm_cpu_has_pending_timer); | ||
26 | 42 | ||
27 | /* | 43 | /* |
28 | * check if there is pending interrupt without | 44 | * check if there is pending interrupt without |
@@ -66,6 +82,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); | |||
66 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) | 82 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) |
67 | { | 83 | { |
68 | kvm_inject_apic_timer_irqs(vcpu); | 84 | kvm_inject_apic_timer_irqs(vcpu); |
85 | kvm_inject_pit_timer_irqs(vcpu); | ||
69 | /* TODO: PIT, RTC etc. */ | 86 | /* TODO: PIT, RTC etc. */ |
70 | } | 87 | } |
71 | EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); | 88 | EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); |
@@ -73,6 +90,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); | |||
73 | void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) | 90 | void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) |
74 | { | 91 | { |
75 | kvm_apic_timer_intr_post(vcpu, vec); | 92 | kvm_apic_timer_intr_post(vcpu, vec); |
93 | kvm_pit_timer_intr_post(vcpu, vec); | ||
76 | /* TODO: PIT, RTC etc. */ | 94 | /* TODO: PIT, RTC etc. */ |
77 | } | 95 | } |
78 | EXPORT_SYMBOL_GPL(kvm_timer_intr_post); | 96 | EXPORT_SYMBOL_GPL(kvm_timer_intr_post); |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index fa5ed5d59b5d..1802134b836f 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -85,4 +85,7 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); | |||
85 | void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); | 85 | void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); |
86 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); | 86 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); |
87 | 87 | ||
88 | int pit_has_pending_timer(struct kvm_vcpu *vcpu); | ||
89 | int apic_has_pending_timer(struct kvm_vcpu *vcpu); | ||
90 | |||
88 | #endif | 91 | #endif |
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h index ecdfe97e4635..65ef0fc2c036 100644 --- a/arch/x86/kvm/kvm_svm.h +++ b/arch/x86/kvm/kvm_svm.h | |||
@@ -39,6 +39,8 @@ struct vcpu_svm { | |||
39 | unsigned long host_db_regs[NUM_DB_REGS]; | 39 | unsigned long host_db_regs[NUM_DB_REGS]; |
40 | unsigned long host_dr6; | 40 | unsigned long host_dr6; |
41 | unsigned long host_dr7; | 41 | unsigned long host_dr7; |
42 | |||
43 | u32 *msrpm; | ||
42 | }; | 44 | }; |
43 | 45 | ||
44 | #endif | 46 | #endif |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 68a6b1511934..57ac4e4c556a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -338,10 +338,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
338 | } else | 338 | } else |
339 | apic_clear_vector(vector, apic->regs + APIC_TMR); | 339 | apic_clear_vector(vector, apic->regs + APIC_TMR); |
340 | 340 | ||
341 | if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE) | 341 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) |
342 | kvm_vcpu_kick(vcpu); | 342 | kvm_vcpu_kick(vcpu); |
343 | else if (vcpu->arch.mp_state == VCPU_MP_STATE_HALTED) { | 343 | else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) { |
344 | vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; | 344 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
345 | if (waitqueue_active(&vcpu->wq)) | 345 | if (waitqueue_active(&vcpu->wq)) |
346 | wake_up_interruptible(&vcpu->wq); | 346 | wake_up_interruptible(&vcpu->wq); |
347 | } | 347 | } |
@@ -362,11 +362,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
362 | 362 | ||
363 | case APIC_DM_INIT: | 363 | case APIC_DM_INIT: |
364 | if (level) { | 364 | if (level) { |
365 | if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE) | 365 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) |
366 | printk(KERN_DEBUG | 366 | printk(KERN_DEBUG |
367 | "INIT on a runnable vcpu %d\n", | 367 | "INIT on a runnable vcpu %d\n", |
368 | vcpu->vcpu_id); | 368 | vcpu->vcpu_id); |
369 | vcpu->arch.mp_state = VCPU_MP_STATE_INIT_RECEIVED; | 369 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; |
370 | kvm_vcpu_kick(vcpu); | 370 | kvm_vcpu_kick(vcpu); |
371 | } else { | 371 | } else { |
372 | printk(KERN_DEBUG | 372 | printk(KERN_DEBUG |
@@ -379,9 +379,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
379 | case APIC_DM_STARTUP: | 379 | case APIC_DM_STARTUP: |
380 | printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n", | 380 | printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n", |
381 | vcpu->vcpu_id, vector); | 381 | vcpu->vcpu_id, vector); |
382 | if (vcpu->arch.mp_state == VCPU_MP_STATE_INIT_RECEIVED) { | 382 | if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { |
383 | vcpu->arch.sipi_vector = vector; | 383 | vcpu->arch.sipi_vector = vector; |
384 | vcpu->arch.mp_state = VCPU_MP_STATE_SIPI_RECEIVED; | 384 | vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; |
385 | if (waitqueue_active(&vcpu->wq)) | 385 | if (waitqueue_active(&vcpu->wq)) |
386 | wake_up_interruptible(&vcpu->wq); | 386 | wake_up_interruptible(&vcpu->wq); |
387 | } | 387 | } |
@@ -658,7 +658,7 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
658 | apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" | 658 | apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" |
659 | PRIx64 ", " | 659 | PRIx64 ", " |
660 | "timer initial count 0x%x, period %lldns, " | 660 | "timer initial count 0x%x, period %lldns, " |
661 | "expire @ 0x%016" PRIx64 ".\n", __FUNCTION__, | 661 | "expire @ 0x%016" PRIx64 ".\n", __func__, |
662 | APIC_BUS_CYCLE_NS, ktime_to_ns(now), | 662 | APIC_BUS_CYCLE_NS, ktime_to_ns(now), |
663 | apic_get_reg(apic, APIC_TMICT), | 663 | apic_get_reg(apic, APIC_TMICT), |
664 | apic->timer.period, | 664 | apic->timer.period, |
@@ -691,7 +691,7 @@ static void apic_mmio_write(struct kvm_io_device *this, | |||
691 | /* too common printing */ | 691 | /* too common printing */ |
692 | if (offset != APIC_EOI) | 692 | if (offset != APIC_EOI) |
693 | apic_debug("%s: offset 0x%x with length 0x%x, and value is " | 693 | apic_debug("%s: offset 0x%x with length 0x%x, and value is " |
694 | "0x%x\n", __FUNCTION__, offset, len, val); | 694 | "0x%x\n", __func__, offset, len, val); |
695 | 695 | ||
696 | offset &= 0xff0; | 696 | offset &= 0xff0; |
697 | 697 | ||
@@ -822,6 +822,7 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) | |||
822 | apic_set_tpr(apic, ((cr8 & 0x0f) << 4) | 822 | apic_set_tpr(apic, ((cr8 & 0x0f) << 4) |
823 | | (apic_get_reg(apic, APIC_TASKPRI) & 4)); | 823 | | (apic_get_reg(apic, APIC_TASKPRI) & 4)); |
824 | } | 824 | } |
825 | EXPORT_SYMBOL_GPL(kvm_lapic_set_tpr); | ||
825 | 826 | ||
826 | u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) | 827 | u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) |
827 | { | 828 | { |
@@ -869,7 +870,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
869 | struct kvm_lapic *apic; | 870 | struct kvm_lapic *apic; |
870 | int i; | 871 | int i; |
871 | 872 | ||
872 | apic_debug("%s\n", __FUNCTION__); | 873 | apic_debug("%s\n", __func__); |
873 | 874 | ||
874 | ASSERT(vcpu); | 875 | ASSERT(vcpu); |
875 | apic = vcpu->arch.apic; | 876 | apic = vcpu->arch.apic; |
@@ -907,7 +908,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
907 | apic_update_ppr(apic); | 908 | apic_update_ppr(apic); |
908 | 909 | ||
909 | apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" | 910 | apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" |
910 | "0x%016" PRIx64 ", base_address=0x%0lx.\n", __FUNCTION__, | 911 | "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, |
911 | vcpu, kvm_apic_id(apic), | 912 | vcpu, kvm_apic_id(apic), |
912 | vcpu->arch.apic_base, apic->base_address); | 913 | vcpu->arch.apic_base, apic->base_address); |
913 | } | 914 | } |
@@ -940,7 +941,7 @@ static int __apic_timer_fn(struct kvm_lapic *apic) | |||
940 | 941 | ||
941 | atomic_inc(&apic->timer.pending); | 942 | atomic_inc(&apic->timer.pending); |
942 | if (waitqueue_active(q)) { | 943 | if (waitqueue_active(q)) { |
943 | apic->vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; | 944 | apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
944 | wake_up_interruptible(q); | 945 | wake_up_interruptible(q); |
945 | } | 946 | } |
946 | if (apic_lvtt_period(apic)) { | 947 | if (apic_lvtt_period(apic)) { |
@@ -952,6 +953,16 @@ static int __apic_timer_fn(struct kvm_lapic *apic) | |||
952 | return result; | 953 | return result; |
953 | } | 954 | } |
954 | 955 | ||
956 | int apic_has_pending_timer(struct kvm_vcpu *vcpu) | ||
957 | { | ||
958 | struct kvm_lapic *lapic = vcpu->arch.apic; | ||
959 | |||
960 | if (lapic) | ||
961 | return atomic_read(&lapic->timer.pending); | ||
962 | |||
963 | return 0; | ||
964 | } | ||
965 | |||
955 | static int __inject_apic_timer_irq(struct kvm_lapic *apic) | 966 | static int __inject_apic_timer_irq(struct kvm_lapic *apic) |
956 | { | 967 | { |
957 | int vector; | 968 | int vector; |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e55af12e11b7..2ad6f5481671 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -27,11 +27,22 @@ | |||
27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
28 | #include <linux/module.h> | 28 | #include <linux/module.h> |
29 | #include <linux/swap.h> | 29 | #include <linux/swap.h> |
30 | #include <linux/hugetlb.h> | ||
31 | #include <linux/compiler.h> | ||
30 | 32 | ||
31 | #include <asm/page.h> | 33 | #include <asm/page.h> |
32 | #include <asm/cmpxchg.h> | 34 | #include <asm/cmpxchg.h> |
33 | #include <asm/io.h> | 35 | #include <asm/io.h> |
34 | 36 | ||
37 | /* | ||
38 | * When setting this variable to true it enables Two-Dimensional-Paging | ||
39 | * where the hardware walks 2 page tables: | ||
40 | * 1. the guest-virtual to guest-physical | ||
41 | * 2. while doing 1. it walks guest-physical to host-physical | ||
42 | * If the hardware supports that we don't need to do shadow paging. | ||
43 | */ | ||
44 | bool tdp_enabled = false; | ||
45 | |||
35 | #undef MMU_DEBUG | 46 | #undef MMU_DEBUG |
36 | 47 | ||
37 | #undef AUDIT | 48 | #undef AUDIT |
@@ -101,8 +112,6 @@ static int dbg = 1; | |||
101 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 | 112 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 |
102 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 | 113 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 |
103 | 114 | ||
104 | #define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) | ||
105 | |||
106 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) | 115 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) |
107 | 116 | ||
108 | #define PT64_LEVEL_BITS 9 | 117 | #define PT64_LEVEL_BITS 9 |
@@ -159,6 +168,13 @@ static int dbg = 1; | |||
159 | #define ACC_USER_MASK PT_USER_MASK | 168 | #define ACC_USER_MASK PT_USER_MASK |
160 | #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) | 169 | #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) |
161 | 170 | ||
171 | struct kvm_pv_mmu_op_buffer { | ||
172 | void *ptr; | ||
173 | unsigned len; | ||
174 | unsigned processed; | ||
175 | char buf[512] __aligned(sizeof(long)); | ||
176 | }; | ||
177 | |||
162 | struct kvm_rmap_desc { | 178 | struct kvm_rmap_desc { |
163 | u64 *shadow_ptes[RMAP_EXT]; | 179 | u64 *shadow_ptes[RMAP_EXT]; |
164 | struct kvm_rmap_desc *more; | 180 | struct kvm_rmap_desc *more; |
@@ -200,11 +216,15 @@ static int is_present_pte(unsigned long pte) | |||
200 | 216 | ||
201 | static int is_shadow_present_pte(u64 pte) | 217 | static int is_shadow_present_pte(u64 pte) |
202 | { | 218 | { |
203 | pte &= ~PT_SHADOW_IO_MARK; | ||
204 | return pte != shadow_trap_nonpresent_pte | 219 | return pte != shadow_trap_nonpresent_pte |
205 | && pte != shadow_notrap_nonpresent_pte; | 220 | && pte != shadow_notrap_nonpresent_pte; |
206 | } | 221 | } |
207 | 222 | ||
223 | static int is_large_pte(u64 pte) | ||
224 | { | ||
225 | return pte & PT_PAGE_SIZE_MASK; | ||
226 | } | ||
227 | |||
208 | static int is_writeble_pte(unsigned long pte) | 228 | static int is_writeble_pte(unsigned long pte) |
209 | { | 229 | { |
210 | return pte & PT_WRITABLE_MASK; | 230 | return pte & PT_WRITABLE_MASK; |
@@ -215,14 +235,14 @@ static int is_dirty_pte(unsigned long pte) | |||
215 | return pte & PT_DIRTY_MASK; | 235 | return pte & PT_DIRTY_MASK; |
216 | } | 236 | } |
217 | 237 | ||
218 | static int is_io_pte(unsigned long pte) | 238 | static int is_rmap_pte(u64 pte) |
219 | { | 239 | { |
220 | return pte & PT_SHADOW_IO_MARK; | 240 | return is_shadow_present_pte(pte); |
221 | } | 241 | } |
222 | 242 | ||
223 | static int is_rmap_pte(u64 pte) | 243 | static pfn_t spte_to_pfn(u64 pte) |
224 | { | 244 | { |
225 | return is_shadow_present_pte(pte); | 245 | return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; |
226 | } | 246 | } |
227 | 247 | ||
228 | static gfn_t pse36_gfn_delta(u32 gpte) | 248 | static gfn_t pse36_gfn_delta(u32 gpte) |
@@ -349,16 +369,100 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) | |||
349 | } | 369 | } |
350 | 370 | ||
351 | /* | 371 | /* |
372 | * Return the pointer to the largepage write count for a given | ||
373 | * gfn, handling slots that are not large page aligned. | ||
374 | */ | ||
375 | static int *slot_largepage_idx(gfn_t gfn, struct kvm_memory_slot *slot) | ||
376 | { | ||
377 | unsigned long idx; | ||
378 | |||
379 | idx = (gfn / KVM_PAGES_PER_HPAGE) - | ||
380 | (slot->base_gfn / KVM_PAGES_PER_HPAGE); | ||
381 | return &slot->lpage_info[idx].write_count; | ||
382 | } | ||
383 | |||
384 | static void account_shadowed(struct kvm *kvm, gfn_t gfn) | ||
385 | { | ||
386 | int *write_count; | ||
387 | |||
388 | write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); | ||
389 | *write_count += 1; | ||
390 | WARN_ON(*write_count > KVM_PAGES_PER_HPAGE); | ||
391 | } | ||
392 | |||
393 | static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | ||
394 | { | ||
395 | int *write_count; | ||
396 | |||
397 | write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); | ||
398 | *write_count -= 1; | ||
399 | WARN_ON(*write_count < 0); | ||
400 | } | ||
401 | |||
402 | static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn) | ||
403 | { | ||
404 | struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); | ||
405 | int *largepage_idx; | ||
406 | |||
407 | if (slot) { | ||
408 | largepage_idx = slot_largepage_idx(gfn, slot); | ||
409 | return *largepage_idx; | ||
410 | } | ||
411 | |||
412 | return 1; | ||
413 | } | ||
414 | |||
415 | static int host_largepage_backed(struct kvm *kvm, gfn_t gfn) | ||
416 | { | ||
417 | struct vm_area_struct *vma; | ||
418 | unsigned long addr; | ||
419 | |||
420 | addr = gfn_to_hva(kvm, gfn); | ||
421 | if (kvm_is_error_hva(addr)) | ||
422 | return 0; | ||
423 | |||
424 | vma = find_vma(current->mm, addr); | ||
425 | if (vma && is_vm_hugetlb_page(vma)) | ||
426 | return 1; | ||
427 | |||
428 | return 0; | ||
429 | } | ||
430 | |||
431 | static int is_largepage_backed(struct kvm_vcpu *vcpu, gfn_t large_gfn) | ||
432 | { | ||
433 | struct kvm_memory_slot *slot; | ||
434 | |||
435 | if (has_wrprotected_page(vcpu->kvm, large_gfn)) | ||
436 | return 0; | ||
437 | |||
438 | if (!host_largepage_backed(vcpu->kvm, large_gfn)) | ||
439 | return 0; | ||
440 | |||
441 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); | ||
442 | if (slot && slot->dirty_bitmap) | ||
443 | return 0; | ||
444 | |||
445 | return 1; | ||
446 | } | ||
447 | |||
448 | /* | ||
352 | * Take gfn and return the reverse mapping to it. | 449 | * Take gfn and return the reverse mapping to it. |
353 | * Note: gfn must be unaliased before this function get called | 450 | * Note: gfn must be unaliased before this function get called |
354 | */ | 451 | */ |
355 | 452 | ||
356 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn) | 453 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage) |
357 | { | 454 | { |
358 | struct kvm_memory_slot *slot; | 455 | struct kvm_memory_slot *slot; |
456 | unsigned long idx; | ||
359 | 457 | ||
360 | slot = gfn_to_memslot(kvm, gfn); | 458 | slot = gfn_to_memslot(kvm, gfn); |
361 | return &slot->rmap[gfn - slot->base_gfn]; | 459 | if (!lpage) |
460 | return &slot->rmap[gfn - slot->base_gfn]; | ||
461 | |||
462 | idx = (gfn / KVM_PAGES_PER_HPAGE) - | ||
463 | (slot->base_gfn / KVM_PAGES_PER_HPAGE); | ||
464 | |||
465 | return &slot->lpage_info[idx].rmap_pde; | ||
362 | } | 466 | } |
363 | 467 | ||
364 | /* | 468 | /* |
@@ -370,7 +474,7 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn) | |||
370 | * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc | 474 | * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc |
371 | * containing more mappings. | 475 | * containing more mappings. |
372 | */ | 476 | */ |
373 | static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | 477 | static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) |
374 | { | 478 | { |
375 | struct kvm_mmu_page *sp; | 479 | struct kvm_mmu_page *sp; |
376 | struct kvm_rmap_desc *desc; | 480 | struct kvm_rmap_desc *desc; |
@@ -382,7 +486,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
382 | gfn = unalias_gfn(vcpu->kvm, gfn); | 486 | gfn = unalias_gfn(vcpu->kvm, gfn); |
383 | sp = page_header(__pa(spte)); | 487 | sp = page_header(__pa(spte)); |
384 | sp->gfns[spte - sp->spt] = gfn; | 488 | sp->gfns[spte - sp->spt] = gfn; |
385 | rmapp = gfn_to_rmap(vcpu->kvm, gfn); | 489 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage); |
386 | if (!*rmapp) { | 490 | if (!*rmapp) { |
387 | rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); | 491 | rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); |
388 | *rmapp = (unsigned long)spte; | 492 | *rmapp = (unsigned long)spte; |
@@ -435,20 +539,21 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
435 | struct kvm_rmap_desc *desc; | 539 | struct kvm_rmap_desc *desc; |
436 | struct kvm_rmap_desc *prev_desc; | 540 | struct kvm_rmap_desc *prev_desc; |
437 | struct kvm_mmu_page *sp; | 541 | struct kvm_mmu_page *sp; |
438 | struct page *page; | 542 | pfn_t pfn; |
439 | unsigned long *rmapp; | 543 | unsigned long *rmapp; |
440 | int i; | 544 | int i; |
441 | 545 | ||
442 | if (!is_rmap_pte(*spte)) | 546 | if (!is_rmap_pte(*spte)) |
443 | return; | 547 | return; |
444 | sp = page_header(__pa(spte)); | 548 | sp = page_header(__pa(spte)); |
445 | page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); | 549 | pfn = spte_to_pfn(*spte); |
446 | mark_page_accessed(page); | 550 | if (*spte & PT_ACCESSED_MASK) |
551 | kvm_set_pfn_accessed(pfn); | ||
447 | if (is_writeble_pte(*spte)) | 552 | if (is_writeble_pte(*spte)) |
448 | kvm_release_page_dirty(page); | 553 | kvm_release_pfn_dirty(pfn); |
449 | else | 554 | else |
450 | kvm_release_page_clean(page); | 555 | kvm_release_pfn_clean(pfn); |
451 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt]); | 556 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], is_large_pte(*spte)); |
452 | if (!*rmapp) { | 557 | if (!*rmapp) { |
453 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); | 558 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); |
454 | BUG(); | 559 | BUG(); |
@@ -514,7 +619,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
514 | int write_protected = 0; | 619 | int write_protected = 0; |
515 | 620 | ||
516 | gfn = unalias_gfn(kvm, gfn); | 621 | gfn = unalias_gfn(kvm, gfn); |
517 | rmapp = gfn_to_rmap(kvm, gfn); | 622 | rmapp = gfn_to_rmap(kvm, gfn, 0); |
518 | 623 | ||
519 | spte = rmap_next(kvm, rmapp, NULL); | 624 | spte = rmap_next(kvm, rmapp, NULL); |
520 | while (spte) { | 625 | while (spte) { |
@@ -527,8 +632,35 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
527 | } | 632 | } |
528 | spte = rmap_next(kvm, rmapp, spte); | 633 | spte = rmap_next(kvm, rmapp, spte); |
529 | } | 634 | } |
635 | if (write_protected) { | ||
636 | pfn_t pfn; | ||
637 | |||
638 | spte = rmap_next(kvm, rmapp, NULL); | ||
639 | pfn = spte_to_pfn(*spte); | ||
640 | kvm_set_pfn_dirty(pfn); | ||
641 | } | ||
642 | |||
643 | /* check for huge page mappings */ | ||
644 | rmapp = gfn_to_rmap(kvm, gfn, 1); | ||
645 | spte = rmap_next(kvm, rmapp, NULL); | ||
646 | while (spte) { | ||
647 | BUG_ON(!spte); | ||
648 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | ||
649 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); | ||
650 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); | ||
651 | if (is_writeble_pte(*spte)) { | ||
652 | rmap_remove(kvm, spte); | ||
653 | --kvm->stat.lpages; | ||
654 | set_shadow_pte(spte, shadow_trap_nonpresent_pte); | ||
655 | write_protected = 1; | ||
656 | } | ||
657 | spte = rmap_next(kvm, rmapp, spte); | ||
658 | } | ||
659 | |||
530 | if (write_protected) | 660 | if (write_protected) |
531 | kvm_flush_remote_tlbs(kvm); | 661 | kvm_flush_remote_tlbs(kvm); |
662 | |||
663 | account_shadowed(kvm, gfn); | ||
532 | } | 664 | } |
533 | 665 | ||
534 | #ifdef MMU_DEBUG | 666 | #ifdef MMU_DEBUG |
@@ -538,8 +670,8 @@ static int is_empty_shadow_page(u64 *spt) | |||
538 | u64 *end; | 670 | u64 *end; |
539 | 671 | ||
540 | for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) | 672 | for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) |
541 | if ((*pos & ~PT_SHADOW_IO_MARK) != shadow_trap_nonpresent_pte) { | 673 | if (*pos != shadow_trap_nonpresent_pte) { |
542 | printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__, | 674 | printk(KERN_ERR "%s: %p %llx\n", __func__, |
543 | pos, *pos); | 675 | pos, *pos); |
544 | return 0; | 676 | return 0; |
545 | } | 677 | } |
@@ -559,7 +691,7 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
559 | 691 | ||
560 | static unsigned kvm_page_table_hashfn(gfn_t gfn) | 692 | static unsigned kvm_page_table_hashfn(gfn_t gfn) |
561 | { | 693 | { |
562 | return gfn; | 694 | return gfn & ((1 << KVM_MMU_HASH_SHIFT) - 1); |
563 | } | 695 | } |
564 | 696 | ||
565 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | 697 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, |
@@ -662,13 +794,14 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | |||
662 | struct kvm_mmu_page *sp; | 794 | struct kvm_mmu_page *sp; |
663 | struct hlist_node *node; | 795 | struct hlist_node *node; |
664 | 796 | ||
665 | pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn); | 797 | pgprintk("%s: looking for gfn %lx\n", __func__, gfn); |
666 | index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; | 798 | index = kvm_page_table_hashfn(gfn); |
667 | bucket = &kvm->arch.mmu_page_hash[index]; | 799 | bucket = &kvm->arch.mmu_page_hash[index]; |
668 | hlist_for_each_entry(sp, node, bucket, hash_link) | 800 | hlist_for_each_entry(sp, node, bucket, hash_link) |
669 | if (sp->gfn == gfn && !sp->role.metaphysical) { | 801 | if (sp->gfn == gfn && !sp->role.metaphysical |
802 | && !sp->role.invalid) { | ||
670 | pgprintk("%s: found role %x\n", | 803 | pgprintk("%s: found role %x\n", |
671 | __FUNCTION__, sp->role.word); | 804 | __func__, sp->role.word); |
672 | return sp; | 805 | return sp; |
673 | } | 806 | } |
674 | return NULL; | 807 | return NULL; |
@@ -699,27 +832,27 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
699 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; | 832 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; |
700 | role.quadrant = quadrant; | 833 | role.quadrant = quadrant; |
701 | } | 834 | } |
702 | pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__, | 835 | pgprintk("%s: looking gfn %lx role %x\n", __func__, |
703 | gfn, role.word); | 836 | gfn, role.word); |
704 | index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; | 837 | index = kvm_page_table_hashfn(gfn); |
705 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 838 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
706 | hlist_for_each_entry(sp, node, bucket, hash_link) | 839 | hlist_for_each_entry(sp, node, bucket, hash_link) |
707 | if (sp->gfn == gfn && sp->role.word == role.word) { | 840 | if (sp->gfn == gfn && sp->role.word == role.word) { |
708 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 841 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
709 | pgprintk("%s: found\n", __FUNCTION__); | 842 | pgprintk("%s: found\n", __func__); |
710 | return sp; | 843 | return sp; |
711 | } | 844 | } |
712 | ++vcpu->kvm->stat.mmu_cache_miss; | 845 | ++vcpu->kvm->stat.mmu_cache_miss; |
713 | sp = kvm_mmu_alloc_page(vcpu, parent_pte); | 846 | sp = kvm_mmu_alloc_page(vcpu, parent_pte); |
714 | if (!sp) | 847 | if (!sp) |
715 | return sp; | 848 | return sp; |
716 | pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word); | 849 | pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word); |
717 | sp->gfn = gfn; | 850 | sp->gfn = gfn; |
718 | sp->role = role; | 851 | sp->role = role; |
719 | hlist_add_head(&sp->hash_link, bucket); | 852 | hlist_add_head(&sp->hash_link, bucket); |
720 | vcpu->arch.mmu.prefetch_page(vcpu, sp); | ||
721 | if (!metaphysical) | 853 | if (!metaphysical) |
722 | rmap_write_protect(vcpu->kvm, gfn); | 854 | rmap_write_protect(vcpu->kvm, gfn); |
855 | vcpu->arch.mmu.prefetch_page(vcpu, sp); | ||
723 | return sp; | 856 | return sp; |
724 | } | 857 | } |
725 | 858 | ||
@@ -745,11 +878,17 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm, | |||
745 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | 878 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { |
746 | ent = pt[i]; | 879 | ent = pt[i]; |
747 | 880 | ||
881 | if (is_shadow_present_pte(ent)) { | ||
882 | if (!is_large_pte(ent)) { | ||
883 | ent &= PT64_BASE_ADDR_MASK; | ||
884 | mmu_page_remove_parent_pte(page_header(ent), | ||
885 | &pt[i]); | ||
886 | } else { | ||
887 | --kvm->stat.lpages; | ||
888 | rmap_remove(kvm, &pt[i]); | ||
889 | } | ||
890 | } | ||
748 | pt[i] = shadow_trap_nonpresent_pte; | 891 | pt[i] = shadow_trap_nonpresent_pte; |
749 | if (!is_shadow_present_pte(ent)) | ||
750 | continue; | ||
751 | ent &= PT64_BASE_ADDR_MASK; | ||
752 | mmu_page_remove_parent_pte(page_header(ent), &pt[i]); | ||
753 | } | 892 | } |
754 | kvm_flush_remote_tlbs(kvm); | 893 | kvm_flush_remote_tlbs(kvm); |
755 | } | 894 | } |
@@ -789,10 +928,15 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
789 | } | 928 | } |
790 | kvm_mmu_page_unlink_children(kvm, sp); | 929 | kvm_mmu_page_unlink_children(kvm, sp); |
791 | if (!sp->root_count) { | 930 | if (!sp->root_count) { |
931 | if (!sp->role.metaphysical) | ||
932 | unaccount_shadowed(kvm, sp->gfn); | ||
792 | hlist_del(&sp->hash_link); | 933 | hlist_del(&sp->hash_link); |
793 | kvm_mmu_free_page(kvm, sp); | 934 | kvm_mmu_free_page(kvm, sp); |
794 | } else | 935 | } else { |
795 | list_move(&sp->link, &kvm->arch.active_mmu_pages); | 936 | list_move(&sp->link, &kvm->arch.active_mmu_pages); |
937 | sp->role.invalid = 1; | ||
938 | kvm_reload_remote_mmus(kvm); | ||
939 | } | ||
796 | kvm_mmu_reset_last_pte_updated(kvm); | 940 | kvm_mmu_reset_last_pte_updated(kvm); |
797 | } | 941 | } |
798 | 942 | ||
@@ -838,13 +982,13 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
838 | struct hlist_node *node, *n; | 982 | struct hlist_node *node, *n; |
839 | int r; | 983 | int r; |
840 | 984 | ||
841 | pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn); | 985 | pgprintk("%s: looking for gfn %lx\n", __func__, gfn); |
842 | r = 0; | 986 | r = 0; |
843 | index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; | 987 | index = kvm_page_table_hashfn(gfn); |
844 | bucket = &kvm->arch.mmu_page_hash[index]; | 988 | bucket = &kvm->arch.mmu_page_hash[index]; |
845 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) | 989 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) |
846 | if (sp->gfn == gfn && !sp->role.metaphysical) { | 990 | if (sp->gfn == gfn && !sp->role.metaphysical) { |
847 | pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn, | 991 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, |
848 | sp->role.word); | 992 | sp->role.word); |
849 | kvm_mmu_zap_page(kvm, sp); | 993 | kvm_mmu_zap_page(kvm, sp); |
850 | r = 1; | 994 | r = 1; |
@@ -857,7 +1001,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | |||
857 | struct kvm_mmu_page *sp; | 1001 | struct kvm_mmu_page *sp; |
858 | 1002 | ||
859 | while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) { | 1003 | while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) { |
860 | pgprintk("%s: zap %lx %x\n", __FUNCTION__, gfn, sp->role.word); | 1004 | pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word); |
861 | kvm_mmu_zap_page(kvm, sp); | 1005 | kvm_mmu_zap_page(kvm, sp); |
862 | } | 1006 | } |
863 | } | 1007 | } |
@@ -889,26 +1033,39 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | |||
889 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | 1033 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, |
890 | unsigned pt_access, unsigned pte_access, | 1034 | unsigned pt_access, unsigned pte_access, |
891 | int user_fault, int write_fault, int dirty, | 1035 | int user_fault, int write_fault, int dirty, |
892 | int *ptwrite, gfn_t gfn, struct page *page) | 1036 | int *ptwrite, int largepage, gfn_t gfn, |
1037 | pfn_t pfn, bool speculative) | ||
893 | { | 1038 | { |
894 | u64 spte; | 1039 | u64 spte; |
895 | int was_rmapped = 0; | 1040 | int was_rmapped = 0; |
896 | int was_writeble = is_writeble_pte(*shadow_pte); | 1041 | int was_writeble = is_writeble_pte(*shadow_pte); |
897 | hfn_t host_pfn = (*shadow_pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | ||
898 | 1042 | ||
899 | pgprintk("%s: spte %llx access %x write_fault %d" | 1043 | pgprintk("%s: spte %llx access %x write_fault %d" |
900 | " user_fault %d gfn %lx\n", | 1044 | " user_fault %d gfn %lx\n", |
901 | __FUNCTION__, *shadow_pte, pt_access, | 1045 | __func__, *shadow_pte, pt_access, |
902 | write_fault, user_fault, gfn); | 1046 | write_fault, user_fault, gfn); |
903 | 1047 | ||
904 | if (is_rmap_pte(*shadow_pte)) { | 1048 | if (is_rmap_pte(*shadow_pte)) { |
905 | if (host_pfn != page_to_pfn(page)) { | 1049 | /* |
1050 | * If we overwrite a PTE page pointer with a 2MB PMD, unlink | ||
1051 | * the parent of the now unreachable PTE. | ||
1052 | */ | ||
1053 | if (largepage && !is_large_pte(*shadow_pte)) { | ||
1054 | struct kvm_mmu_page *child; | ||
1055 | u64 pte = *shadow_pte; | ||
1056 | |||
1057 | child = page_header(pte & PT64_BASE_ADDR_MASK); | ||
1058 | mmu_page_remove_parent_pte(child, shadow_pte); | ||
1059 | } else if (pfn != spte_to_pfn(*shadow_pte)) { | ||
906 | pgprintk("hfn old %lx new %lx\n", | 1060 | pgprintk("hfn old %lx new %lx\n", |
907 | host_pfn, page_to_pfn(page)); | 1061 | spte_to_pfn(*shadow_pte), pfn); |
908 | rmap_remove(vcpu->kvm, shadow_pte); | 1062 | rmap_remove(vcpu->kvm, shadow_pte); |
1063 | } else { | ||
1064 | if (largepage) | ||
1065 | was_rmapped = is_large_pte(*shadow_pte); | ||
1066 | else | ||
1067 | was_rmapped = 1; | ||
909 | } | 1068 | } |
910 | else | ||
911 | was_rmapped = 1; | ||
912 | } | 1069 | } |
913 | 1070 | ||
914 | /* | 1071 | /* |
@@ -917,6 +1074,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
917 | * demand paging). | 1074 | * demand paging). |
918 | */ | 1075 | */ |
919 | spte = PT_PRESENT_MASK | PT_DIRTY_MASK; | 1076 | spte = PT_PRESENT_MASK | PT_DIRTY_MASK; |
1077 | if (!speculative) | ||
1078 | pte_access |= PT_ACCESSED_MASK; | ||
920 | if (!dirty) | 1079 | if (!dirty) |
921 | pte_access &= ~ACC_WRITE_MASK; | 1080 | pte_access &= ~ACC_WRITE_MASK; |
922 | if (!(pte_access & ACC_EXEC_MASK)) | 1081 | if (!(pte_access & ACC_EXEC_MASK)) |
@@ -925,15 +1084,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
925 | spte |= PT_PRESENT_MASK; | 1084 | spte |= PT_PRESENT_MASK; |
926 | if (pte_access & ACC_USER_MASK) | 1085 | if (pte_access & ACC_USER_MASK) |
927 | spte |= PT_USER_MASK; | 1086 | spte |= PT_USER_MASK; |
1087 | if (largepage) | ||
1088 | spte |= PT_PAGE_SIZE_MASK; | ||
928 | 1089 | ||
929 | if (is_error_page(page)) { | 1090 | spte |= (u64)pfn << PAGE_SHIFT; |
930 | set_shadow_pte(shadow_pte, | ||
931 | shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK); | ||
932 | kvm_release_page_clean(page); | ||
933 | return; | ||
934 | } | ||
935 | |||
936 | spte |= page_to_phys(page); | ||
937 | 1091 | ||
938 | if ((pte_access & ACC_WRITE_MASK) | 1092 | if ((pte_access & ACC_WRITE_MASK) |
939 | || (write_fault && !is_write_protection(vcpu) && !user_fault)) { | 1093 | || (write_fault && !is_write_protection(vcpu) && !user_fault)) { |
@@ -946,9 +1100,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
946 | } | 1100 | } |
947 | 1101 | ||
948 | shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); | 1102 | shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); |
949 | if (shadow) { | 1103 | if (shadow || |
1104 | (largepage && has_wrprotected_page(vcpu->kvm, gfn))) { | ||
950 | pgprintk("%s: found shadow page for %lx, marking ro\n", | 1105 | pgprintk("%s: found shadow page for %lx, marking ro\n", |
951 | __FUNCTION__, gfn); | 1106 | __func__, gfn); |
952 | pte_access &= ~ACC_WRITE_MASK; | 1107 | pte_access &= ~ACC_WRITE_MASK; |
953 | if (is_writeble_pte(spte)) { | 1108 | if (is_writeble_pte(spte)) { |
954 | spte &= ~PT_WRITABLE_MASK; | 1109 | spte &= ~PT_WRITABLE_MASK; |
@@ -964,18 +1119,25 @@ unshadowed: | |||
964 | if (pte_access & ACC_WRITE_MASK) | 1119 | if (pte_access & ACC_WRITE_MASK) |
965 | mark_page_dirty(vcpu->kvm, gfn); | 1120 | mark_page_dirty(vcpu->kvm, gfn); |
966 | 1121 | ||
967 | pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte); | 1122 | pgprintk("%s: setting spte %llx\n", __func__, spte); |
1123 | pgprintk("instantiating %s PTE (%s) at %d (%llx) addr %llx\n", | ||
1124 | (spte&PT_PAGE_SIZE_MASK)? "2MB" : "4kB", | ||
1125 | (spte&PT_WRITABLE_MASK)?"RW":"R", gfn, spte, shadow_pte); | ||
968 | set_shadow_pte(shadow_pte, spte); | 1126 | set_shadow_pte(shadow_pte, spte); |
1127 | if (!was_rmapped && (spte & PT_PAGE_SIZE_MASK) | ||
1128 | && (spte & PT_PRESENT_MASK)) | ||
1129 | ++vcpu->kvm->stat.lpages; | ||
1130 | |||
969 | page_header_update_slot(vcpu->kvm, shadow_pte, gfn); | 1131 | page_header_update_slot(vcpu->kvm, shadow_pte, gfn); |
970 | if (!was_rmapped) { | 1132 | if (!was_rmapped) { |
971 | rmap_add(vcpu, shadow_pte, gfn); | 1133 | rmap_add(vcpu, shadow_pte, gfn, largepage); |
972 | if (!is_rmap_pte(*shadow_pte)) | 1134 | if (!is_rmap_pte(*shadow_pte)) |
973 | kvm_release_page_clean(page); | 1135 | kvm_release_pfn_clean(pfn); |
974 | } else { | 1136 | } else { |
975 | if (was_writeble) | 1137 | if (was_writeble) |
976 | kvm_release_page_dirty(page); | 1138 | kvm_release_pfn_dirty(pfn); |
977 | else | 1139 | else |
978 | kvm_release_page_clean(page); | 1140 | kvm_release_pfn_clean(pfn); |
979 | } | 1141 | } |
980 | if (!ptwrite || !*ptwrite) | 1142 | if (!ptwrite || !*ptwrite) |
981 | vcpu->arch.last_pte_updated = shadow_pte; | 1143 | vcpu->arch.last_pte_updated = shadow_pte; |
@@ -985,10 +1147,10 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | |||
985 | { | 1147 | { |
986 | } | 1148 | } |
987 | 1149 | ||
988 | static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, | 1150 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, |
989 | gfn_t gfn, struct page *page) | 1151 | int largepage, gfn_t gfn, pfn_t pfn, |
1152 | int level) | ||
990 | { | 1153 | { |
991 | int level = PT32E_ROOT_LEVEL; | ||
992 | hpa_t table_addr = vcpu->arch.mmu.root_hpa; | 1154 | hpa_t table_addr = vcpu->arch.mmu.root_hpa; |
993 | int pt_write = 0; | 1155 | int pt_write = 0; |
994 | 1156 | ||
@@ -1001,8 +1163,14 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, | |||
1001 | 1163 | ||
1002 | if (level == 1) { | 1164 | if (level == 1) { |
1003 | mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, | 1165 | mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, |
1004 | 0, write, 1, &pt_write, gfn, page); | 1166 | 0, write, 1, &pt_write, 0, gfn, pfn, false); |
1005 | return pt_write || is_io_pte(table[index]); | 1167 | return pt_write; |
1168 | } | ||
1169 | |||
1170 | if (largepage && level == 2) { | ||
1171 | mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, | ||
1172 | 0, write, 1, &pt_write, 1, gfn, pfn, false); | ||
1173 | return pt_write; | ||
1006 | } | 1174 | } |
1007 | 1175 | ||
1008 | if (table[index] == shadow_trap_nonpresent_pte) { | 1176 | if (table[index] == shadow_trap_nonpresent_pte) { |
@@ -1016,7 +1184,7 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, | |||
1016 | 1, ACC_ALL, &table[index]); | 1184 | 1, ACC_ALL, &table[index]); |
1017 | if (!new_table) { | 1185 | if (!new_table) { |
1018 | pgprintk("nonpaging_map: ENOMEM\n"); | 1186 | pgprintk("nonpaging_map: ENOMEM\n"); |
1019 | kvm_release_page_clean(page); | 1187 | kvm_release_pfn_clean(pfn); |
1020 | return -ENOMEM; | 1188 | return -ENOMEM; |
1021 | } | 1189 | } |
1022 | 1190 | ||
@@ -1030,21 +1198,30 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, | |||
1030 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | 1198 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) |
1031 | { | 1199 | { |
1032 | int r; | 1200 | int r; |
1033 | 1201 | int largepage = 0; | |
1034 | struct page *page; | 1202 | pfn_t pfn; |
1035 | |||
1036 | down_read(&vcpu->kvm->slots_lock); | ||
1037 | 1203 | ||
1038 | down_read(¤t->mm->mmap_sem); | 1204 | down_read(¤t->mm->mmap_sem); |
1039 | page = gfn_to_page(vcpu->kvm, gfn); | 1205 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { |
1206 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | ||
1207 | largepage = 1; | ||
1208 | } | ||
1209 | |||
1210 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | ||
1040 | up_read(¤t->mm->mmap_sem); | 1211 | up_read(¤t->mm->mmap_sem); |
1041 | 1212 | ||
1213 | /* mmio */ | ||
1214 | if (is_error_pfn(pfn)) { | ||
1215 | kvm_release_pfn_clean(pfn); | ||
1216 | return 1; | ||
1217 | } | ||
1218 | |||
1042 | spin_lock(&vcpu->kvm->mmu_lock); | 1219 | spin_lock(&vcpu->kvm->mmu_lock); |
1043 | kvm_mmu_free_some_pages(vcpu); | 1220 | kvm_mmu_free_some_pages(vcpu); |
1044 | r = __nonpaging_map(vcpu, v, write, gfn, page); | 1221 | r = __direct_map(vcpu, v, write, largepage, gfn, pfn, |
1222 | PT32E_ROOT_LEVEL); | ||
1045 | spin_unlock(&vcpu->kvm->mmu_lock); | 1223 | spin_unlock(&vcpu->kvm->mmu_lock); |
1046 | 1224 | ||
1047 | up_read(&vcpu->kvm->slots_lock); | ||
1048 | 1225 | ||
1049 | return r; | 1226 | return r; |
1050 | } | 1227 | } |
@@ -1073,6 +1250,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
1073 | 1250 | ||
1074 | sp = page_header(root); | 1251 | sp = page_header(root); |
1075 | --sp->root_count; | 1252 | --sp->root_count; |
1253 | if (!sp->root_count && sp->role.invalid) | ||
1254 | kvm_mmu_zap_page(vcpu->kvm, sp); | ||
1076 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | 1255 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; |
1077 | spin_unlock(&vcpu->kvm->mmu_lock); | 1256 | spin_unlock(&vcpu->kvm->mmu_lock); |
1078 | return; | 1257 | return; |
@@ -1085,6 +1264,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
1085 | root &= PT64_BASE_ADDR_MASK; | 1264 | root &= PT64_BASE_ADDR_MASK; |
1086 | sp = page_header(root); | 1265 | sp = page_header(root); |
1087 | --sp->root_count; | 1266 | --sp->root_count; |
1267 | if (!sp->root_count && sp->role.invalid) | ||
1268 | kvm_mmu_zap_page(vcpu->kvm, sp); | ||
1088 | } | 1269 | } |
1089 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; | 1270 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; |
1090 | } | 1271 | } |
@@ -1097,6 +1278,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1097 | int i; | 1278 | int i; |
1098 | gfn_t root_gfn; | 1279 | gfn_t root_gfn; |
1099 | struct kvm_mmu_page *sp; | 1280 | struct kvm_mmu_page *sp; |
1281 | int metaphysical = 0; | ||
1100 | 1282 | ||
1101 | root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; | 1283 | root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; |
1102 | 1284 | ||
@@ -1105,14 +1287,20 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1105 | hpa_t root = vcpu->arch.mmu.root_hpa; | 1287 | hpa_t root = vcpu->arch.mmu.root_hpa; |
1106 | 1288 | ||
1107 | ASSERT(!VALID_PAGE(root)); | 1289 | ASSERT(!VALID_PAGE(root)); |
1290 | if (tdp_enabled) | ||
1291 | metaphysical = 1; | ||
1108 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, | 1292 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, |
1109 | PT64_ROOT_LEVEL, 0, ACC_ALL, NULL); | 1293 | PT64_ROOT_LEVEL, metaphysical, |
1294 | ACC_ALL, NULL); | ||
1110 | root = __pa(sp->spt); | 1295 | root = __pa(sp->spt); |
1111 | ++sp->root_count; | 1296 | ++sp->root_count; |
1112 | vcpu->arch.mmu.root_hpa = root; | 1297 | vcpu->arch.mmu.root_hpa = root; |
1113 | return; | 1298 | return; |
1114 | } | 1299 | } |
1115 | #endif | 1300 | #endif |
1301 | metaphysical = !is_paging(vcpu); | ||
1302 | if (tdp_enabled) | ||
1303 | metaphysical = 1; | ||
1116 | for (i = 0; i < 4; ++i) { | 1304 | for (i = 0; i < 4; ++i) { |
1117 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | 1305 | hpa_t root = vcpu->arch.mmu.pae_root[i]; |
1118 | 1306 | ||
@@ -1126,7 +1314,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1126 | } else if (vcpu->arch.mmu.root_level == 0) | 1314 | } else if (vcpu->arch.mmu.root_level == 0) |
1127 | root_gfn = 0; | 1315 | root_gfn = 0; |
1128 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 1316 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
1129 | PT32_ROOT_LEVEL, !is_paging(vcpu), | 1317 | PT32_ROOT_LEVEL, metaphysical, |
1130 | ACC_ALL, NULL); | 1318 | ACC_ALL, NULL); |
1131 | root = __pa(sp->spt); | 1319 | root = __pa(sp->spt); |
1132 | ++sp->root_count; | 1320 | ++sp->root_count; |
@@ -1146,7 +1334,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | |||
1146 | gfn_t gfn; | 1334 | gfn_t gfn; |
1147 | int r; | 1335 | int r; |
1148 | 1336 | ||
1149 | pgprintk("%s: gva %lx error %x\n", __FUNCTION__, gva, error_code); | 1337 | pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); |
1150 | r = mmu_topup_memory_caches(vcpu); | 1338 | r = mmu_topup_memory_caches(vcpu); |
1151 | if (r) | 1339 | if (r) |
1152 | return r; | 1340 | return r; |
@@ -1160,6 +1348,41 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | |||
1160 | error_code & PFERR_WRITE_MASK, gfn); | 1348 | error_code & PFERR_WRITE_MASK, gfn); |
1161 | } | 1349 | } |
1162 | 1350 | ||
1351 | static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | ||
1352 | u32 error_code) | ||
1353 | { | ||
1354 | pfn_t pfn; | ||
1355 | int r; | ||
1356 | int largepage = 0; | ||
1357 | gfn_t gfn = gpa >> PAGE_SHIFT; | ||
1358 | |||
1359 | ASSERT(vcpu); | ||
1360 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); | ||
1361 | |||
1362 | r = mmu_topup_memory_caches(vcpu); | ||
1363 | if (r) | ||
1364 | return r; | ||
1365 | |||
1366 | down_read(¤t->mm->mmap_sem); | ||
1367 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { | ||
1368 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | ||
1369 | largepage = 1; | ||
1370 | } | ||
1371 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | ||
1372 | up_read(¤t->mm->mmap_sem); | ||
1373 | if (is_error_pfn(pfn)) { | ||
1374 | kvm_release_pfn_clean(pfn); | ||
1375 | return 1; | ||
1376 | } | ||
1377 | spin_lock(&vcpu->kvm->mmu_lock); | ||
1378 | kvm_mmu_free_some_pages(vcpu); | ||
1379 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, | ||
1380 | largepage, gfn, pfn, TDP_ROOT_LEVEL); | ||
1381 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
1382 | |||
1383 | return r; | ||
1384 | } | ||
1385 | |||
1163 | static void nonpaging_free(struct kvm_vcpu *vcpu) | 1386 | static void nonpaging_free(struct kvm_vcpu *vcpu) |
1164 | { | 1387 | { |
1165 | mmu_free_roots(vcpu); | 1388 | mmu_free_roots(vcpu); |
@@ -1188,7 +1411,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | |||
1188 | 1411 | ||
1189 | static void paging_new_cr3(struct kvm_vcpu *vcpu) | 1412 | static void paging_new_cr3(struct kvm_vcpu *vcpu) |
1190 | { | 1413 | { |
1191 | pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->arch.cr3); | 1414 | pgprintk("%s: cr3 %lx\n", __func__, vcpu->arch.cr3); |
1192 | mmu_free_roots(vcpu); | 1415 | mmu_free_roots(vcpu); |
1193 | } | 1416 | } |
1194 | 1417 | ||
@@ -1253,7 +1476,35 @@ static int paging32E_init_context(struct kvm_vcpu *vcpu) | |||
1253 | return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); | 1476 | return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); |
1254 | } | 1477 | } |
1255 | 1478 | ||
1256 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) | 1479 | static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) |
1480 | { | ||
1481 | struct kvm_mmu *context = &vcpu->arch.mmu; | ||
1482 | |||
1483 | context->new_cr3 = nonpaging_new_cr3; | ||
1484 | context->page_fault = tdp_page_fault; | ||
1485 | context->free = nonpaging_free; | ||
1486 | context->prefetch_page = nonpaging_prefetch_page; | ||
1487 | context->shadow_root_level = TDP_ROOT_LEVEL; | ||
1488 | context->root_hpa = INVALID_PAGE; | ||
1489 | |||
1490 | if (!is_paging(vcpu)) { | ||
1491 | context->gva_to_gpa = nonpaging_gva_to_gpa; | ||
1492 | context->root_level = 0; | ||
1493 | } else if (is_long_mode(vcpu)) { | ||
1494 | context->gva_to_gpa = paging64_gva_to_gpa; | ||
1495 | context->root_level = PT64_ROOT_LEVEL; | ||
1496 | } else if (is_pae(vcpu)) { | ||
1497 | context->gva_to_gpa = paging64_gva_to_gpa; | ||
1498 | context->root_level = PT32E_ROOT_LEVEL; | ||
1499 | } else { | ||
1500 | context->gva_to_gpa = paging32_gva_to_gpa; | ||
1501 | context->root_level = PT32_ROOT_LEVEL; | ||
1502 | } | ||
1503 | |||
1504 | return 0; | ||
1505 | } | ||
1506 | |||
1507 | static int init_kvm_softmmu(struct kvm_vcpu *vcpu) | ||
1257 | { | 1508 | { |
1258 | ASSERT(vcpu); | 1509 | ASSERT(vcpu); |
1259 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 1510 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
@@ -1268,6 +1519,16 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu) | |||
1268 | return paging32_init_context(vcpu); | 1519 | return paging32_init_context(vcpu); |
1269 | } | 1520 | } |
1270 | 1521 | ||
1522 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) | ||
1523 | { | ||
1524 | vcpu->arch.update_pte.pfn = bad_pfn; | ||
1525 | |||
1526 | if (tdp_enabled) | ||
1527 | return init_kvm_tdp_mmu(vcpu); | ||
1528 | else | ||
1529 | return init_kvm_softmmu(vcpu); | ||
1530 | } | ||
1531 | |||
1271 | static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) | 1532 | static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) |
1272 | { | 1533 | { |
1273 | ASSERT(vcpu); | 1534 | ASSERT(vcpu); |
@@ -1316,7 +1577,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | |||
1316 | 1577 | ||
1317 | pte = *spte; | 1578 | pte = *spte; |
1318 | if (is_shadow_present_pte(pte)) { | 1579 | if (is_shadow_present_pte(pte)) { |
1319 | if (sp->role.level == PT_PAGE_TABLE_LEVEL) | 1580 | if (sp->role.level == PT_PAGE_TABLE_LEVEL || |
1581 | is_large_pte(pte)) | ||
1320 | rmap_remove(vcpu->kvm, spte); | 1582 | rmap_remove(vcpu->kvm, spte); |
1321 | else { | 1583 | else { |
1322 | child = page_header(pte & PT64_BASE_ADDR_MASK); | 1584 | child = page_header(pte & PT64_BASE_ADDR_MASK); |
@@ -1324,24 +1586,26 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | |||
1324 | } | 1586 | } |
1325 | } | 1587 | } |
1326 | set_shadow_pte(spte, shadow_trap_nonpresent_pte); | 1588 | set_shadow_pte(spte, shadow_trap_nonpresent_pte); |
1589 | if (is_large_pte(pte)) | ||
1590 | --vcpu->kvm->stat.lpages; | ||
1327 | } | 1591 | } |
1328 | 1592 | ||
1329 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | 1593 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, |
1330 | struct kvm_mmu_page *sp, | 1594 | struct kvm_mmu_page *sp, |
1331 | u64 *spte, | 1595 | u64 *spte, |
1332 | const void *new, int bytes, | 1596 | const void *new) |
1333 | int offset_in_pte) | ||
1334 | { | 1597 | { |
1335 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) { | 1598 | if ((sp->role.level != PT_PAGE_TABLE_LEVEL) |
1599 | && !vcpu->arch.update_pte.largepage) { | ||
1336 | ++vcpu->kvm->stat.mmu_pde_zapped; | 1600 | ++vcpu->kvm->stat.mmu_pde_zapped; |
1337 | return; | 1601 | return; |
1338 | } | 1602 | } |
1339 | 1603 | ||
1340 | ++vcpu->kvm->stat.mmu_pte_updated; | 1604 | ++vcpu->kvm->stat.mmu_pte_updated; |
1341 | if (sp->role.glevels == PT32_ROOT_LEVEL) | 1605 | if (sp->role.glevels == PT32_ROOT_LEVEL) |
1342 | paging32_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte); | 1606 | paging32_update_pte(vcpu, sp, spte, new); |
1343 | else | 1607 | else |
1344 | paging64_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte); | 1608 | paging64_update_pte(vcpu, sp, spte, new); |
1345 | } | 1609 | } |
1346 | 1610 | ||
1347 | static bool need_remote_flush(u64 old, u64 new) | 1611 | static bool need_remote_flush(u64 old, u64 new) |
@@ -1378,7 +1642,9 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1378 | gfn_t gfn; | 1642 | gfn_t gfn; |
1379 | int r; | 1643 | int r; |
1380 | u64 gpte = 0; | 1644 | u64 gpte = 0; |
1381 | struct page *page; | 1645 | pfn_t pfn; |
1646 | |||
1647 | vcpu->arch.update_pte.largepage = 0; | ||
1382 | 1648 | ||
1383 | if (bytes != 4 && bytes != 8) | 1649 | if (bytes != 4 && bytes != 8) |
1384 | return; | 1650 | return; |
@@ -1408,11 +1674,19 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1408 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | 1674 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; |
1409 | 1675 | ||
1410 | down_read(¤t->mm->mmap_sem); | 1676 | down_read(¤t->mm->mmap_sem); |
1411 | page = gfn_to_page(vcpu->kvm, gfn); | 1677 | if (is_large_pte(gpte) && is_largepage_backed(vcpu, gfn)) { |
1678 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | ||
1679 | vcpu->arch.update_pte.largepage = 1; | ||
1680 | } | ||
1681 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | ||
1412 | up_read(¤t->mm->mmap_sem); | 1682 | up_read(¤t->mm->mmap_sem); |
1413 | 1683 | ||
1684 | if (is_error_pfn(pfn)) { | ||
1685 | kvm_release_pfn_clean(pfn); | ||
1686 | return; | ||
1687 | } | ||
1414 | vcpu->arch.update_pte.gfn = gfn; | 1688 | vcpu->arch.update_pte.gfn = gfn; |
1415 | vcpu->arch.update_pte.page = page; | 1689 | vcpu->arch.update_pte.pfn = pfn; |
1416 | } | 1690 | } |
1417 | 1691 | ||
1418 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 1692 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
@@ -1423,7 +1697,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1423 | struct hlist_node *node, *n; | 1697 | struct hlist_node *node, *n; |
1424 | struct hlist_head *bucket; | 1698 | struct hlist_head *bucket; |
1425 | unsigned index; | 1699 | unsigned index; |
1426 | u64 entry; | 1700 | u64 entry, gentry; |
1427 | u64 *spte; | 1701 | u64 *spte; |
1428 | unsigned offset = offset_in_page(gpa); | 1702 | unsigned offset = offset_in_page(gpa); |
1429 | unsigned pte_size; | 1703 | unsigned pte_size; |
@@ -1433,8 +1707,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1433 | int level; | 1707 | int level; |
1434 | int flooded = 0; | 1708 | int flooded = 0; |
1435 | int npte; | 1709 | int npte; |
1710 | int r; | ||
1436 | 1711 | ||
1437 | pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); | 1712 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
1438 | mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); | 1713 | mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); |
1439 | spin_lock(&vcpu->kvm->mmu_lock); | 1714 | spin_lock(&vcpu->kvm->mmu_lock); |
1440 | kvm_mmu_free_some_pages(vcpu); | 1715 | kvm_mmu_free_some_pages(vcpu); |
@@ -1450,7 +1725,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1450 | vcpu->arch.last_pt_write_count = 1; | 1725 | vcpu->arch.last_pt_write_count = 1; |
1451 | vcpu->arch.last_pte_updated = NULL; | 1726 | vcpu->arch.last_pte_updated = NULL; |
1452 | } | 1727 | } |
1453 | index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; | 1728 | index = kvm_page_table_hashfn(gfn); |
1454 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 1729 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
1455 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { | 1730 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { |
1456 | if (sp->gfn != gfn || sp->role.metaphysical) | 1731 | if (sp->gfn != gfn || sp->role.metaphysical) |
@@ -1496,20 +1771,29 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1496 | continue; | 1771 | continue; |
1497 | } | 1772 | } |
1498 | spte = &sp->spt[page_offset / sizeof(*spte)]; | 1773 | spte = &sp->spt[page_offset / sizeof(*spte)]; |
1774 | if ((gpa & (pte_size - 1)) || (bytes < pte_size)) { | ||
1775 | gentry = 0; | ||
1776 | r = kvm_read_guest_atomic(vcpu->kvm, | ||
1777 | gpa & ~(u64)(pte_size - 1), | ||
1778 | &gentry, pte_size); | ||
1779 | new = (const void *)&gentry; | ||
1780 | if (r < 0) | ||
1781 | new = NULL; | ||
1782 | } | ||
1499 | while (npte--) { | 1783 | while (npte--) { |
1500 | entry = *spte; | 1784 | entry = *spte; |
1501 | mmu_pte_write_zap_pte(vcpu, sp, spte); | 1785 | mmu_pte_write_zap_pte(vcpu, sp, spte); |
1502 | mmu_pte_write_new_pte(vcpu, sp, spte, new, bytes, | 1786 | if (new) |
1503 | page_offset & (pte_size - 1)); | 1787 | mmu_pte_write_new_pte(vcpu, sp, spte, new); |
1504 | mmu_pte_write_flush_tlb(vcpu, entry, *spte); | 1788 | mmu_pte_write_flush_tlb(vcpu, entry, *spte); |
1505 | ++spte; | 1789 | ++spte; |
1506 | } | 1790 | } |
1507 | } | 1791 | } |
1508 | kvm_mmu_audit(vcpu, "post pte write"); | 1792 | kvm_mmu_audit(vcpu, "post pte write"); |
1509 | spin_unlock(&vcpu->kvm->mmu_lock); | 1793 | spin_unlock(&vcpu->kvm->mmu_lock); |
1510 | if (vcpu->arch.update_pte.page) { | 1794 | if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { |
1511 | kvm_release_page_clean(vcpu->arch.update_pte.page); | 1795 | kvm_release_pfn_clean(vcpu->arch.update_pte.pfn); |
1512 | vcpu->arch.update_pte.page = NULL; | 1796 | vcpu->arch.update_pte.pfn = bad_pfn; |
1513 | } | 1797 | } |
1514 | } | 1798 | } |
1515 | 1799 | ||
@@ -1518,9 +1802,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
1518 | gpa_t gpa; | 1802 | gpa_t gpa; |
1519 | int r; | 1803 | int r; |
1520 | 1804 | ||
1521 | down_read(&vcpu->kvm->slots_lock); | ||
1522 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); | 1805 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); |
1523 | up_read(&vcpu->kvm->slots_lock); | ||
1524 | 1806 | ||
1525 | spin_lock(&vcpu->kvm->mmu_lock); | 1807 | spin_lock(&vcpu->kvm->mmu_lock); |
1526 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 1808 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
@@ -1577,6 +1859,12 @@ out: | |||
1577 | } | 1859 | } |
1578 | EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); | 1860 | EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); |
1579 | 1861 | ||
1862 | void kvm_enable_tdp(void) | ||
1863 | { | ||
1864 | tdp_enabled = true; | ||
1865 | } | ||
1866 | EXPORT_SYMBOL_GPL(kvm_enable_tdp); | ||
1867 | |||
1580 | static void free_mmu_pages(struct kvm_vcpu *vcpu) | 1868 | static void free_mmu_pages(struct kvm_vcpu *vcpu) |
1581 | { | 1869 | { |
1582 | struct kvm_mmu_page *sp; | 1870 | struct kvm_mmu_page *sp; |
@@ -1677,7 +1965,53 @@ void kvm_mmu_zap_all(struct kvm *kvm) | |||
1677 | kvm_flush_remote_tlbs(kvm); | 1965 | kvm_flush_remote_tlbs(kvm); |
1678 | } | 1966 | } |
1679 | 1967 | ||
1680 | void kvm_mmu_module_exit(void) | 1968 | void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm) |
1969 | { | ||
1970 | struct kvm_mmu_page *page; | ||
1971 | |||
1972 | page = container_of(kvm->arch.active_mmu_pages.prev, | ||
1973 | struct kvm_mmu_page, link); | ||
1974 | kvm_mmu_zap_page(kvm, page); | ||
1975 | } | ||
1976 | |||
1977 | static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | ||
1978 | { | ||
1979 | struct kvm *kvm; | ||
1980 | struct kvm *kvm_freed = NULL; | ||
1981 | int cache_count = 0; | ||
1982 | |||
1983 | spin_lock(&kvm_lock); | ||
1984 | |||
1985 | list_for_each_entry(kvm, &vm_list, vm_list) { | ||
1986 | int npages; | ||
1987 | |||
1988 | spin_lock(&kvm->mmu_lock); | ||
1989 | npages = kvm->arch.n_alloc_mmu_pages - | ||
1990 | kvm->arch.n_free_mmu_pages; | ||
1991 | cache_count += npages; | ||
1992 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { | ||
1993 | kvm_mmu_remove_one_alloc_mmu_page(kvm); | ||
1994 | cache_count--; | ||
1995 | kvm_freed = kvm; | ||
1996 | } | ||
1997 | nr_to_scan--; | ||
1998 | |||
1999 | spin_unlock(&kvm->mmu_lock); | ||
2000 | } | ||
2001 | if (kvm_freed) | ||
2002 | list_move_tail(&kvm_freed->vm_list, &vm_list); | ||
2003 | |||
2004 | spin_unlock(&kvm_lock); | ||
2005 | |||
2006 | return cache_count; | ||
2007 | } | ||
2008 | |||
2009 | static struct shrinker mmu_shrinker = { | ||
2010 | .shrink = mmu_shrink, | ||
2011 | .seeks = DEFAULT_SEEKS * 10, | ||
2012 | }; | ||
2013 | |||
2014 | void mmu_destroy_caches(void) | ||
1681 | { | 2015 | { |
1682 | if (pte_chain_cache) | 2016 | if (pte_chain_cache) |
1683 | kmem_cache_destroy(pte_chain_cache); | 2017 | kmem_cache_destroy(pte_chain_cache); |
@@ -1687,6 +2021,12 @@ void kvm_mmu_module_exit(void) | |||
1687 | kmem_cache_destroy(mmu_page_header_cache); | 2021 | kmem_cache_destroy(mmu_page_header_cache); |
1688 | } | 2022 | } |
1689 | 2023 | ||
2024 | void kvm_mmu_module_exit(void) | ||
2025 | { | ||
2026 | mmu_destroy_caches(); | ||
2027 | unregister_shrinker(&mmu_shrinker); | ||
2028 | } | ||
2029 | |||
1690 | int kvm_mmu_module_init(void) | 2030 | int kvm_mmu_module_init(void) |
1691 | { | 2031 | { |
1692 | pte_chain_cache = kmem_cache_create("kvm_pte_chain", | 2032 | pte_chain_cache = kmem_cache_create("kvm_pte_chain", |
@@ -1706,10 +2046,12 @@ int kvm_mmu_module_init(void) | |||
1706 | if (!mmu_page_header_cache) | 2046 | if (!mmu_page_header_cache) |
1707 | goto nomem; | 2047 | goto nomem; |
1708 | 2048 | ||
2049 | register_shrinker(&mmu_shrinker); | ||
2050 | |||
1709 | return 0; | 2051 | return 0; |
1710 | 2052 | ||
1711 | nomem: | 2053 | nomem: |
1712 | kvm_mmu_module_exit(); | 2054 | mmu_destroy_caches(); |
1713 | return -ENOMEM; | 2055 | return -ENOMEM; |
1714 | } | 2056 | } |
1715 | 2057 | ||
@@ -1732,6 +2074,127 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | |||
1732 | return nr_mmu_pages; | 2074 | return nr_mmu_pages; |
1733 | } | 2075 | } |
1734 | 2076 | ||
2077 | static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer *buffer, | ||
2078 | unsigned len) | ||
2079 | { | ||
2080 | if (len > buffer->len) | ||
2081 | return NULL; | ||
2082 | return buffer->ptr; | ||
2083 | } | ||
2084 | |||
2085 | static void *pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer *buffer, | ||
2086 | unsigned len) | ||
2087 | { | ||
2088 | void *ret; | ||
2089 | |||
2090 | ret = pv_mmu_peek_buffer(buffer, len); | ||
2091 | if (!ret) | ||
2092 | return ret; | ||
2093 | buffer->ptr += len; | ||
2094 | buffer->len -= len; | ||
2095 | buffer->processed += len; | ||
2096 | return ret; | ||
2097 | } | ||
2098 | |||
2099 | static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, | ||
2100 | gpa_t addr, gpa_t value) | ||
2101 | { | ||
2102 | int bytes = 8; | ||
2103 | int r; | ||
2104 | |||
2105 | if (!is_long_mode(vcpu) && !is_pae(vcpu)) | ||
2106 | bytes = 4; | ||
2107 | |||
2108 | r = mmu_topup_memory_caches(vcpu); | ||
2109 | if (r) | ||
2110 | return r; | ||
2111 | |||
2112 | if (!emulator_write_phys(vcpu, addr, &value, bytes)) | ||
2113 | return -EFAULT; | ||
2114 | |||
2115 | return 1; | ||
2116 | } | ||
2117 | |||
2118 | static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) | ||
2119 | { | ||
2120 | kvm_x86_ops->tlb_flush(vcpu); | ||
2121 | return 1; | ||
2122 | } | ||
2123 | |||
2124 | static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr) | ||
2125 | { | ||
2126 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2127 | mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT); | ||
2128 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
2129 | return 1; | ||
2130 | } | ||
2131 | |||
2132 | static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu, | ||
2133 | struct kvm_pv_mmu_op_buffer *buffer) | ||
2134 | { | ||
2135 | struct kvm_mmu_op_header *header; | ||
2136 | |||
2137 | header = pv_mmu_peek_buffer(buffer, sizeof *header); | ||
2138 | if (!header) | ||
2139 | return 0; | ||
2140 | switch (header->op) { | ||
2141 | case KVM_MMU_OP_WRITE_PTE: { | ||
2142 | struct kvm_mmu_op_write_pte *wpte; | ||
2143 | |||
2144 | wpte = pv_mmu_read_buffer(buffer, sizeof *wpte); | ||
2145 | if (!wpte) | ||
2146 | return 0; | ||
2147 | return kvm_pv_mmu_write(vcpu, wpte->pte_phys, | ||
2148 | wpte->pte_val); | ||
2149 | } | ||
2150 | case KVM_MMU_OP_FLUSH_TLB: { | ||
2151 | struct kvm_mmu_op_flush_tlb *ftlb; | ||
2152 | |||
2153 | ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb); | ||
2154 | if (!ftlb) | ||
2155 | return 0; | ||
2156 | return kvm_pv_mmu_flush_tlb(vcpu); | ||
2157 | } | ||
2158 | case KVM_MMU_OP_RELEASE_PT: { | ||
2159 | struct kvm_mmu_op_release_pt *rpt; | ||
2160 | |||
2161 | rpt = pv_mmu_read_buffer(buffer, sizeof *rpt); | ||
2162 | if (!rpt) | ||
2163 | return 0; | ||
2164 | return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys); | ||
2165 | } | ||
2166 | default: return 0; | ||
2167 | } | ||
2168 | } | ||
2169 | |||
2170 | int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, | ||
2171 | gpa_t addr, unsigned long *ret) | ||
2172 | { | ||
2173 | int r; | ||
2174 | struct kvm_pv_mmu_op_buffer buffer; | ||
2175 | |||
2176 | buffer.ptr = buffer.buf; | ||
2177 | buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf); | ||
2178 | buffer.processed = 0; | ||
2179 | |||
2180 | r = kvm_read_guest(vcpu->kvm, addr, buffer.buf, buffer.len); | ||
2181 | if (r) | ||
2182 | goto out; | ||
2183 | |||
2184 | while (buffer.len) { | ||
2185 | r = kvm_pv_mmu_op_one(vcpu, &buffer); | ||
2186 | if (r < 0) | ||
2187 | goto out; | ||
2188 | if (r == 0) | ||
2189 | break; | ||
2190 | } | ||
2191 | |||
2192 | r = 1; | ||
2193 | out: | ||
2194 | *ret = buffer.processed; | ||
2195 | return r; | ||
2196 | } | ||
2197 | |||
1735 | #ifdef AUDIT | 2198 | #ifdef AUDIT |
1736 | 2199 | ||
1737 | static const char *audit_msg; | 2200 | static const char *audit_msg; |
@@ -1768,8 +2231,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, | |||
1768 | audit_mappings_page(vcpu, ent, va, level - 1); | 2231 | audit_mappings_page(vcpu, ent, va, level - 1); |
1769 | } else { | 2232 | } else { |
1770 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); | 2233 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); |
1771 | struct page *page = gpa_to_page(vcpu, gpa); | 2234 | hpa_t hpa = (hpa_t)gpa_to_pfn(vcpu, gpa) << PAGE_SHIFT; |
1772 | hpa_t hpa = page_to_phys(page); | ||
1773 | 2235 | ||
1774 | if (is_shadow_present_pte(ent) | 2236 | if (is_shadow_present_pte(ent) |
1775 | && (ent & PT64_BASE_ADDR_MASK) != hpa) | 2237 | && (ent & PT64_BASE_ADDR_MASK) != hpa) |
@@ -1782,7 +2244,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, | |||
1782 | && !is_error_hpa(hpa)) | 2244 | && !is_error_hpa(hpa)) |
1783 | printk(KERN_ERR "audit: (%s) notrap shadow," | 2245 | printk(KERN_ERR "audit: (%s) notrap shadow," |
1784 | " valid guest gva %lx\n", audit_msg, va); | 2246 | " valid guest gva %lx\n", audit_msg, va); |
1785 | kvm_release_page_clean(page); | 2247 | kvm_release_pfn_clean(pfn); |
1786 | 2248 | ||
1787 | } | 2249 | } |
1788 | } | 2250 | } |
@@ -1867,7 +2329,7 @@ static void audit_rmap(struct kvm_vcpu *vcpu) | |||
1867 | 2329 | ||
1868 | if (n_rmap != n_actual) | 2330 | if (n_rmap != n_actual) |
1869 | printk(KERN_ERR "%s: (%s) rmap %d actual %d\n", | 2331 | printk(KERN_ERR "%s: (%s) rmap %d actual %d\n", |
1870 | __FUNCTION__, audit_msg, n_rmap, n_actual); | 2332 | __func__, audit_msg, n_rmap, n_actual); |
1871 | } | 2333 | } |
1872 | 2334 | ||
1873 | static void audit_write_protection(struct kvm_vcpu *vcpu) | 2335 | static void audit_write_protection(struct kvm_vcpu *vcpu) |
@@ -1887,7 +2349,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu) | |||
1887 | if (*rmapp) | 2349 | if (*rmapp) |
1888 | printk(KERN_ERR "%s: (%s) shadow page has writable" | 2350 | printk(KERN_ERR "%s: (%s) shadow page has writable" |
1889 | " mappings: gfn %lx role %x\n", | 2351 | " mappings: gfn %lx role %x\n", |
1890 | __FUNCTION__, audit_msg, sp->gfn, | 2352 | __func__, audit_msg, sp->gfn, |
1891 | sp->role.word); | 2353 | sp->role.word); |
1892 | } | 2354 | } |
1893 | } | 2355 | } |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 1fce19ec7a23..e64e9f56a65e 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -3,6 +3,12 @@ | |||
3 | 3 | ||
4 | #include <linux/kvm_host.h> | 4 | #include <linux/kvm_host.h> |
5 | 5 | ||
6 | #ifdef CONFIG_X86_64 | ||
7 | #define TDP_ROOT_LEVEL PT64_ROOT_LEVEL | ||
8 | #else | ||
9 | #define TDP_ROOT_LEVEL PT32E_ROOT_LEVEL | ||
10 | #endif | ||
11 | |||
6 | static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 12 | static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) |
7 | { | 13 | { |
8 | if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES)) | 14 | if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES)) |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index ecc0856268c4..156fe10288ae 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -130,7 +130,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, | |||
130 | unsigned index, pt_access, pte_access; | 130 | unsigned index, pt_access, pte_access; |
131 | gpa_t pte_gpa; | 131 | gpa_t pte_gpa; |
132 | 132 | ||
133 | pgprintk("%s: addr %lx\n", __FUNCTION__, addr); | 133 | pgprintk("%s: addr %lx\n", __func__, addr); |
134 | walk: | 134 | walk: |
135 | walker->level = vcpu->arch.mmu.root_level; | 135 | walker->level = vcpu->arch.mmu.root_level; |
136 | pte = vcpu->arch.cr3; | 136 | pte = vcpu->arch.cr3; |
@@ -155,7 +155,7 @@ walk: | |||
155 | pte_gpa += index * sizeof(pt_element_t); | 155 | pte_gpa += index * sizeof(pt_element_t); |
156 | walker->table_gfn[walker->level - 1] = table_gfn; | 156 | walker->table_gfn[walker->level - 1] = table_gfn; |
157 | walker->pte_gpa[walker->level - 1] = pte_gpa; | 157 | walker->pte_gpa[walker->level - 1] = pte_gpa; |
158 | pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, | 158 | pgprintk("%s: table_gfn[%d] %lx\n", __func__, |
159 | walker->level - 1, table_gfn); | 159 | walker->level - 1, table_gfn); |
160 | 160 | ||
161 | kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); | 161 | kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); |
@@ -222,7 +222,7 @@ walk: | |||
222 | walker->pt_access = pt_access; | 222 | walker->pt_access = pt_access; |
223 | walker->pte_access = pte_access; | 223 | walker->pte_access = pte_access; |
224 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", | 224 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", |
225 | __FUNCTION__, (u64)pte, pt_access, pte_access); | 225 | __func__, (u64)pte, pt_access, pte_access); |
226 | return 1; | 226 | return 1; |
227 | 227 | ||
228 | not_present: | 228 | not_present: |
@@ -243,31 +243,30 @@ err: | |||
243 | } | 243 | } |
244 | 244 | ||
245 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | 245 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, |
246 | u64 *spte, const void *pte, int bytes, | 246 | u64 *spte, const void *pte) |
247 | int offset_in_pte) | ||
248 | { | 247 | { |
249 | pt_element_t gpte; | 248 | pt_element_t gpte; |
250 | unsigned pte_access; | 249 | unsigned pte_access; |
251 | struct page *npage; | 250 | pfn_t pfn; |
251 | int largepage = vcpu->arch.update_pte.largepage; | ||
252 | 252 | ||
253 | gpte = *(const pt_element_t *)pte; | 253 | gpte = *(const pt_element_t *)pte; |
254 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { | 254 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { |
255 | if (!offset_in_pte && !is_present_pte(gpte)) | 255 | if (!is_present_pte(gpte)) |
256 | set_shadow_pte(spte, shadow_notrap_nonpresent_pte); | 256 | set_shadow_pte(spte, shadow_notrap_nonpresent_pte); |
257 | return; | 257 | return; |
258 | } | 258 | } |
259 | if (bytes < sizeof(pt_element_t)) | 259 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
260 | return; | ||
261 | pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte); | ||
262 | pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte); | 260 | pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte); |
263 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) | 261 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) |
264 | return; | 262 | return; |
265 | npage = vcpu->arch.update_pte.page; | 263 | pfn = vcpu->arch.update_pte.pfn; |
266 | if (!npage) | 264 | if (is_error_pfn(pfn)) |
267 | return; | 265 | return; |
268 | get_page(npage); | 266 | kvm_get_pfn(pfn); |
269 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, | 267 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, |
270 | gpte & PT_DIRTY_MASK, NULL, gpte_to_gfn(gpte), npage); | 268 | gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), |
269 | pfn, true); | ||
271 | } | 270 | } |
272 | 271 | ||
273 | /* | 272 | /* |
@@ -275,8 +274,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
275 | */ | 274 | */ |
276 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | 275 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, |
277 | struct guest_walker *walker, | 276 | struct guest_walker *walker, |
278 | int user_fault, int write_fault, int *ptwrite, | 277 | int user_fault, int write_fault, int largepage, |
279 | struct page *page) | 278 | int *ptwrite, pfn_t pfn) |
280 | { | 279 | { |
281 | hpa_t shadow_addr; | 280 | hpa_t shadow_addr; |
282 | int level; | 281 | int level; |
@@ -304,11 +303,19 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
304 | shadow_ent = ((u64 *)__va(shadow_addr)) + index; | 303 | shadow_ent = ((u64 *)__va(shadow_addr)) + index; |
305 | if (level == PT_PAGE_TABLE_LEVEL) | 304 | if (level == PT_PAGE_TABLE_LEVEL) |
306 | break; | 305 | break; |
307 | if (is_shadow_present_pte(*shadow_ent)) { | 306 | |
307 | if (largepage && level == PT_DIRECTORY_LEVEL) | ||
308 | break; | ||
309 | |||
310 | if (is_shadow_present_pte(*shadow_ent) | ||
311 | && !is_large_pte(*shadow_ent)) { | ||
308 | shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; | 312 | shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; |
309 | continue; | 313 | continue; |
310 | } | 314 | } |
311 | 315 | ||
316 | if (is_large_pte(*shadow_ent)) | ||
317 | rmap_remove(vcpu->kvm, shadow_ent); | ||
318 | |||
312 | if (level - 1 == PT_PAGE_TABLE_LEVEL | 319 | if (level - 1 == PT_PAGE_TABLE_LEVEL |
313 | && walker->level == PT_DIRECTORY_LEVEL) { | 320 | && walker->level == PT_DIRECTORY_LEVEL) { |
314 | metaphysical = 1; | 321 | metaphysical = 1; |
@@ -329,7 +336,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
329 | walker->pte_gpa[level - 2], | 336 | walker->pte_gpa[level - 2], |
330 | &curr_pte, sizeof(curr_pte)); | 337 | &curr_pte, sizeof(curr_pte)); |
331 | if (r || curr_pte != walker->ptes[level - 2]) { | 338 | if (r || curr_pte != walker->ptes[level - 2]) { |
332 | kvm_release_page_clean(page); | 339 | kvm_release_pfn_clean(pfn); |
333 | return NULL; | 340 | return NULL; |
334 | } | 341 | } |
335 | } | 342 | } |
@@ -342,7 +349,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
342 | mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, | 349 | mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, |
343 | user_fault, write_fault, | 350 | user_fault, write_fault, |
344 | walker->ptes[walker->level-1] & PT_DIRTY_MASK, | 351 | walker->ptes[walker->level-1] & PT_DIRTY_MASK, |
345 | ptwrite, walker->gfn, page); | 352 | ptwrite, largepage, walker->gfn, pfn, false); |
346 | 353 | ||
347 | return shadow_ent; | 354 | return shadow_ent; |
348 | } | 355 | } |
@@ -371,16 +378,16 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
371 | u64 *shadow_pte; | 378 | u64 *shadow_pte; |
372 | int write_pt = 0; | 379 | int write_pt = 0; |
373 | int r; | 380 | int r; |
374 | struct page *page; | 381 | pfn_t pfn; |
382 | int largepage = 0; | ||
375 | 383 | ||
376 | pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code); | 384 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); |
377 | kvm_mmu_audit(vcpu, "pre page fault"); | 385 | kvm_mmu_audit(vcpu, "pre page fault"); |
378 | 386 | ||
379 | r = mmu_topup_memory_caches(vcpu); | 387 | r = mmu_topup_memory_caches(vcpu); |
380 | if (r) | 388 | if (r) |
381 | return r; | 389 | return r; |
382 | 390 | ||
383 | down_read(&vcpu->kvm->slots_lock); | ||
384 | /* | 391 | /* |
385 | * Look up the shadow pte for the faulting address. | 392 | * Look up the shadow pte for the faulting address. |
386 | */ | 393 | */ |
@@ -391,40 +398,45 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
391 | * The page is not mapped by the guest. Let the guest handle it. | 398 | * The page is not mapped by the guest. Let the guest handle it. |
392 | */ | 399 | */ |
393 | if (!r) { | 400 | if (!r) { |
394 | pgprintk("%s: guest page fault\n", __FUNCTION__); | 401 | pgprintk("%s: guest page fault\n", __func__); |
395 | inject_page_fault(vcpu, addr, walker.error_code); | 402 | inject_page_fault(vcpu, addr, walker.error_code); |
396 | vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ | 403 | vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ |
397 | up_read(&vcpu->kvm->slots_lock); | ||
398 | return 0; | 404 | return 0; |
399 | } | 405 | } |
400 | 406 | ||
401 | down_read(¤t->mm->mmap_sem); | 407 | down_read(¤t->mm->mmap_sem); |
402 | page = gfn_to_page(vcpu->kvm, walker.gfn); | 408 | if (walker.level == PT_DIRECTORY_LEVEL) { |
409 | gfn_t large_gfn; | ||
410 | large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1); | ||
411 | if (is_largepage_backed(vcpu, large_gfn)) { | ||
412 | walker.gfn = large_gfn; | ||
413 | largepage = 1; | ||
414 | } | ||
415 | } | ||
416 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); | ||
403 | up_read(¤t->mm->mmap_sem); | 417 | up_read(¤t->mm->mmap_sem); |
404 | 418 | ||
419 | /* mmio */ | ||
420 | if (is_error_pfn(pfn)) { | ||
421 | pgprintk("gfn %x is mmio\n", walker.gfn); | ||
422 | kvm_release_pfn_clean(pfn); | ||
423 | return 1; | ||
424 | } | ||
425 | |||
405 | spin_lock(&vcpu->kvm->mmu_lock); | 426 | spin_lock(&vcpu->kvm->mmu_lock); |
406 | kvm_mmu_free_some_pages(vcpu); | 427 | kvm_mmu_free_some_pages(vcpu); |
407 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | 428 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, |
408 | &write_pt, page); | 429 | largepage, &write_pt, pfn); |
409 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, | 430 | |
431 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, | ||
410 | shadow_pte, *shadow_pte, write_pt); | 432 | shadow_pte, *shadow_pte, write_pt); |
411 | 433 | ||
412 | if (!write_pt) | 434 | if (!write_pt) |
413 | vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ | 435 | vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ |
414 | 436 | ||
415 | /* | ||
416 | * mmio: emulate if accessible, otherwise its a guest fault. | ||
417 | */ | ||
418 | if (shadow_pte && is_io_pte(*shadow_pte)) { | ||
419 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
420 | up_read(&vcpu->kvm->slots_lock); | ||
421 | return 1; | ||
422 | } | ||
423 | |||
424 | ++vcpu->stat.pf_fixed; | 437 | ++vcpu->stat.pf_fixed; |
425 | kvm_mmu_audit(vcpu, "post page fault (fixed)"); | 438 | kvm_mmu_audit(vcpu, "post page fault (fixed)"); |
426 | spin_unlock(&vcpu->kvm->mmu_lock); | 439 | spin_unlock(&vcpu->kvm->mmu_lock); |
427 | up_read(&vcpu->kvm->slots_lock); | ||
428 | 440 | ||
429 | return write_pt; | 441 | return write_pt; |
430 | } | 442 | } |
diff --git a/arch/x86/kvm/segment_descriptor.h b/arch/x86/kvm/segment_descriptor.h deleted file mode 100644 index 56fc4c873389..000000000000 --- a/arch/x86/kvm/segment_descriptor.h +++ /dev/null | |||
@@ -1,29 +0,0 @@ | |||
1 | #ifndef __SEGMENT_DESCRIPTOR_H | ||
2 | #define __SEGMENT_DESCRIPTOR_H | ||
3 | |||
4 | struct segment_descriptor { | ||
5 | u16 limit_low; | ||
6 | u16 base_low; | ||
7 | u8 base_mid; | ||
8 | u8 type : 4; | ||
9 | u8 system : 1; | ||
10 | u8 dpl : 2; | ||
11 | u8 present : 1; | ||
12 | u8 limit_high : 4; | ||
13 | u8 avl : 1; | ||
14 | u8 long_mode : 1; | ||
15 | u8 default_op : 1; | ||
16 | u8 granularity : 1; | ||
17 | u8 base_high; | ||
18 | } __attribute__((packed)); | ||
19 | |||
20 | #ifdef CONFIG_X86_64 | ||
21 | /* LDT or TSS descriptor in the GDT. 16 bytes. */ | ||
22 | struct segment_descriptor_64 { | ||
23 | struct segment_descriptor s; | ||
24 | u32 base_higher; | ||
25 | u32 pad_zero; | ||
26 | }; | ||
27 | |||
28 | #endif | ||
29 | #endif | ||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1a582f1090e8..89e0be2c10d0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -47,6 +47,18 @@ MODULE_LICENSE("GPL"); | |||
47 | #define SVM_FEATURE_LBRV (1 << 1) | 47 | #define SVM_FEATURE_LBRV (1 << 1) |
48 | #define SVM_DEATURE_SVML (1 << 2) | 48 | #define SVM_DEATURE_SVML (1 << 2) |
49 | 49 | ||
50 | #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) | ||
51 | |||
52 | /* enable NPT for AMD64 and X86 with PAE */ | ||
53 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | ||
54 | static bool npt_enabled = true; | ||
55 | #else | ||
56 | static bool npt_enabled = false; | ||
57 | #endif | ||
58 | static int npt = 1; | ||
59 | |||
60 | module_param(npt, int, S_IRUGO); | ||
61 | |||
50 | static void kvm_reput_irq(struct vcpu_svm *svm); | 62 | static void kvm_reput_irq(struct vcpu_svm *svm); |
51 | 63 | ||
52 | static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) | 64 | static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) |
@@ -54,8 +66,7 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) | |||
54 | return container_of(vcpu, struct vcpu_svm, vcpu); | 66 | return container_of(vcpu, struct vcpu_svm, vcpu); |
55 | } | 67 | } |
56 | 68 | ||
57 | unsigned long iopm_base; | 69 | static unsigned long iopm_base; |
58 | unsigned long msrpm_base; | ||
59 | 70 | ||
60 | struct kvm_ldttss_desc { | 71 | struct kvm_ldttss_desc { |
61 | u16 limit0; | 72 | u16 limit0; |
@@ -182,7 +193,7 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu) | |||
182 | 193 | ||
183 | static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) | 194 | static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) |
184 | { | 195 | { |
185 | if (!(efer & EFER_LMA)) | 196 | if (!npt_enabled && !(efer & EFER_LMA)) |
186 | efer &= ~EFER_LME; | 197 | efer &= ~EFER_LME; |
187 | 198 | ||
188 | to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK; | 199 | to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK; |
@@ -219,12 +230,12 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
219 | struct vcpu_svm *svm = to_svm(vcpu); | 230 | struct vcpu_svm *svm = to_svm(vcpu); |
220 | 231 | ||
221 | if (!svm->next_rip) { | 232 | if (!svm->next_rip) { |
222 | printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__); | 233 | printk(KERN_DEBUG "%s: NOP\n", __func__); |
223 | return; | 234 | return; |
224 | } | 235 | } |
225 | if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE) | 236 | if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE) |
226 | printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n", | 237 | printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n", |
227 | __FUNCTION__, | 238 | __func__, |
228 | svm->vmcb->save.rip, | 239 | svm->vmcb->save.rip, |
229 | svm->next_rip); | 240 | svm->next_rip); |
230 | 241 | ||
@@ -279,11 +290,7 @@ static void svm_hardware_enable(void *garbage) | |||
279 | 290 | ||
280 | struct svm_cpu_data *svm_data; | 291 | struct svm_cpu_data *svm_data; |
281 | uint64_t efer; | 292 | uint64_t efer; |
282 | #ifdef CONFIG_X86_64 | ||
283 | struct desc_ptr gdt_descr; | ||
284 | #else | ||
285 | struct desc_ptr gdt_descr; | 293 | struct desc_ptr gdt_descr; |
286 | #endif | ||
287 | struct desc_struct *gdt; | 294 | struct desc_struct *gdt; |
288 | int me = raw_smp_processor_id(); | 295 | int me = raw_smp_processor_id(); |
289 | 296 | ||
@@ -302,7 +309,6 @@ static void svm_hardware_enable(void *garbage) | |||
302 | svm_data->asid_generation = 1; | 309 | svm_data->asid_generation = 1; |
303 | svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; | 310 | svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; |
304 | svm_data->next_asid = svm_data->max_asid + 1; | 311 | svm_data->next_asid = svm_data->max_asid + 1; |
305 | svm_features = cpuid_edx(SVM_CPUID_FUNC); | ||
306 | 312 | ||
307 | asm volatile ("sgdt %0" : "=m"(gdt_descr)); | 313 | asm volatile ("sgdt %0" : "=m"(gdt_descr)); |
308 | gdt = (struct desc_struct *)gdt_descr.address; | 314 | gdt = (struct desc_struct *)gdt_descr.address; |
@@ -361,12 +367,51 @@ static void set_msr_interception(u32 *msrpm, unsigned msr, | |||
361 | BUG(); | 367 | BUG(); |
362 | } | 368 | } |
363 | 369 | ||
370 | static void svm_vcpu_init_msrpm(u32 *msrpm) | ||
371 | { | ||
372 | memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); | ||
373 | |||
374 | #ifdef CONFIG_X86_64 | ||
375 | set_msr_interception(msrpm, MSR_GS_BASE, 1, 1); | ||
376 | set_msr_interception(msrpm, MSR_FS_BASE, 1, 1); | ||
377 | set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1); | ||
378 | set_msr_interception(msrpm, MSR_LSTAR, 1, 1); | ||
379 | set_msr_interception(msrpm, MSR_CSTAR, 1, 1); | ||
380 | set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1); | ||
381 | #endif | ||
382 | set_msr_interception(msrpm, MSR_K6_STAR, 1, 1); | ||
383 | set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1); | ||
384 | set_msr_interception(msrpm, MSR_IA32_SYSENTER_ESP, 1, 1); | ||
385 | set_msr_interception(msrpm, MSR_IA32_SYSENTER_EIP, 1, 1); | ||
386 | } | ||
387 | |||
388 | static void svm_enable_lbrv(struct vcpu_svm *svm) | ||
389 | { | ||
390 | u32 *msrpm = svm->msrpm; | ||
391 | |||
392 | svm->vmcb->control.lbr_ctl = 1; | ||
393 | set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); | ||
394 | set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); | ||
395 | set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); | ||
396 | set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1); | ||
397 | } | ||
398 | |||
399 | static void svm_disable_lbrv(struct vcpu_svm *svm) | ||
400 | { | ||
401 | u32 *msrpm = svm->msrpm; | ||
402 | |||
403 | svm->vmcb->control.lbr_ctl = 0; | ||
404 | set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0); | ||
405 | set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0); | ||
406 | set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0); | ||
407 | set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); | ||
408 | } | ||
409 | |||
364 | static __init int svm_hardware_setup(void) | 410 | static __init int svm_hardware_setup(void) |
365 | { | 411 | { |
366 | int cpu; | 412 | int cpu; |
367 | struct page *iopm_pages; | 413 | struct page *iopm_pages; |
368 | struct page *msrpm_pages; | 414 | void *iopm_va; |
369 | void *iopm_va, *msrpm_va; | ||
370 | int r; | 415 | int r; |
371 | 416 | ||
372 | iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER); | 417 | iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER); |
@@ -379,41 +424,33 @@ static __init int svm_hardware_setup(void) | |||
379 | clear_bit(0x80, iopm_va); /* allow direct access to PC debug port */ | 424 | clear_bit(0x80, iopm_va); /* allow direct access to PC debug port */ |
380 | iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; | 425 | iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; |
381 | 426 | ||
427 | if (boot_cpu_has(X86_FEATURE_NX)) | ||
428 | kvm_enable_efer_bits(EFER_NX); | ||
382 | 429 | ||
383 | msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); | 430 | for_each_online_cpu(cpu) { |
431 | r = svm_cpu_init(cpu); | ||
432 | if (r) | ||
433 | goto err; | ||
434 | } | ||
384 | 435 | ||
385 | r = -ENOMEM; | 436 | svm_features = cpuid_edx(SVM_CPUID_FUNC); |
386 | if (!msrpm_pages) | ||
387 | goto err_1; | ||
388 | 437 | ||
389 | msrpm_va = page_address(msrpm_pages); | 438 | if (!svm_has(SVM_FEATURE_NPT)) |
390 | memset(msrpm_va, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); | 439 | npt_enabled = false; |
391 | msrpm_base = page_to_pfn(msrpm_pages) << PAGE_SHIFT; | ||
392 | 440 | ||
393 | #ifdef CONFIG_X86_64 | 441 | if (npt_enabled && !npt) { |
394 | set_msr_interception(msrpm_va, MSR_GS_BASE, 1, 1); | 442 | printk(KERN_INFO "kvm: Nested Paging disabled\n"); |
395 | set_msr_interception(msrpm_va, MSR_FS_BASE, 1, 1); | 443 | npt_enabled = false; |
396 | set_msr_interception(msrpm_va, MSR_KERNEL_GS_BASE, 1, 1); | 444 | } |
397 | set_msr_interception(msrpm_va, MSR_LSTAR, 1, 1); | ||
398 | set_msr_interception(msrpm_va, MSR_CSTAR, 1, 1); | ||
399 | set_msr_interception(msrpm_va, MSR_SYSCALL_MASK, 1, 1); | ||
400 | #endif | ||
401 | set_msr_interception(msrpm_va, MSR_K6_STAR, 1, 1); | ||
402 | set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_CS, 1, 1); | ||
403 | set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_ESP, 1, 1); | ||
404 | set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_EIP, 1, 1); | ||
405 | 445 | ||
406 | for_each_online_cpu(cpu) { | 446 | if (npt_enabled) { |
407 | r = svm_cpu_init(cpu); | 447 | printk(KERN_INFO "kvm: Nested Paging enabled\n"); |
408 | if (r) | 448 | kvm_enable_tdp(); |
409 | goto err_2; | ||
410 | } | 449 | } |
450 | |||
411 | return 0; | 451 | return 0; |
412 | 452 | ||
413 | err_2: | 453 | err: |
414 | __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER); | ||
415 | msrpm_base = 0; | ||
416 | err_1: | ||
417 | __free_pages(iopm_pages, IOPM_ALLOC_ORDER); | 454 | __free_pages(iopm_pages, IOPM_ALLOC_ORDER); |
418 | iopm_base = 0; | 455 | iopm_base = 0; |
419 | return r; | 456 | return r; |
@@ -421,9 +458,8 @@ err_1: | |||
421 | 458 | ||
422 | static __exit void svm_hardware_unsetup(void) | 459 | static __exit void svm_hardware_unsetup(void) |
423 | { | 460 | { |
424 | __free_pages(pfn_to_page(msrpm_base >> PAGE_SHIFT), MSRPM_ALLOC_ORDER); | ||
425 | __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); | 461 | __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); |
426 | iopm_base = msrpm_base = 0; | 462 | iopm_base = 0; |
427 | } | 463 | } |
428 | 464 | ||
429 | static void init_seg(struct vmcb_seg *seg) | 465 | static void init_seg(struct vmcb_seg *seg) |
@@ -443,15 +479,14 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) | |||
443 | seg->base = 0; | 479 | seg->base = 0; |
444 | } | 480 | } |
445 | 481 | ||
446 | static void init_vmcb(struct vmcb *vmcb) | 482 | static void init_vmcb(struct vcpu_svm *svm) |
447 | { | 483 | { |
448 | struct vmcb_control_area *control = &vmcb->control; | 484 | struct vmcb_control_area *control = &svm->vmcb->control; |
449 | struct vmcb_save_area *save = &vmcb->save; | 485 | struct vmcb_save_area *save = &svm->vmcb->save; |
450 | 486 | ||
451 | control->intercept_cr_read = INTERCEPT_CR0_MASK | | 487 | control->intercept_cr_read = INTERCEPT_CR0_MASK | |
452 | INTERCEPT_CR3_MASK | | 488 | INTERCEPT_CR3_MASK | |
453 | INTERCEPT_CR4_MASK | | 489 | INTERCEPT_CR4_MASK; |
454 | INTERCEPT_CR8_MASK; | ||
455 | 490 | ||
456 | control->intercept_cr_write = INTERCEPT_CR0_MASK | | 491 | control->intercept_cr_write = INTERCEPT_CR0_MASK | |
457 | INTERCEPT_CR3_MASK | | 492 | INTERCEPT_CR3_MASK | |
@@ -471,23 +506,13 @@ static void init_vmcb(struct vmcb *vmcb) | |||
471 | INTERCEPT_DR7_MASK; | 506 | INTERCEPT_DR7_MASK; |
472 | 507 | ||
473 | control->intercept_exceptions = (1 << PF_VECTOR) | | 508 | control->intercept_exceptions = (1 << PF_VECTOR) | |
474 | (1 << UD_VECTOR); | 509 | (1 << UD_VECTOR) | |
510 | (1 << MC_VECTOR); | ||
475 | 511 | ||
476 | 512 | ||
477 | control->intercept = (1ULL << INTERCEPT_INTR) | | 513 | control->intercept = (1ULL << INTERCEPT_INTR) | |
478 | (1ULL << INTERCEPT_NMI) | | 514 | (1ULL << INTERCEPT_NMI) | |
479 | (1ULL << INTERCEPT_SMI) | | 515 | (1ULL << INTERCEPT_SMI) | |
480 | /* | ||
481 | * selective cr0 intercept bug? | ||
482 | * 0: 0f 22 d8 mov %eax,%cr3 | ||
483 | * 3: 0f 20 c0 mov %cr0,%eax | ||
484 | * 6: 0d 00 00 00 80 or $0x80000000,%eax | ||
485 | * b: 0f 22 c0 mov %eax,%cr0 | ||
486 | * set cr3 ->interception | ||
487 | * get cr0 ->interception | ||
488 | * set cr0 -> no interception | ||
489 | */ | ||
490 | /* (1ULL << INTERCEPT_SELECTIVE_CR0) | */ | ||
491 | (1ULL << INTERCEPT_CPUID) | | 516 | (1ULL << INTERCEPT_CPUID) | |
492 | (1ULL << INTERCEPT_INVD) | | 517 | (1ULL << INTERCEPT_INVD) | |
493 | (1ULL << INTERCEPT_HLT) | | 518 | (1ULL << INTERCEPT_HLT) | |
@@ -508,7 +533,7 @@ static void init_vmcb(struct vmcb *vmcb) | |||
508 | (1ULL << INTERCEPT_MWAIT); | 533 | (1ULL << INTERCEPT_MWAIT); |
509 | 534 | ||
510 | control->iopm_base_pa = iopm_base; | 535 | control->iopm_base_pa = iopm_base; |
511 | control->msrpm_base_pa = msrpm_base; | 536 | control->msrpm_base_pa = __pa(svm->msrpm); |
512 | control->tsc_offset = 0; | 537 | control->tsc_offset = 0; |
513 | control->int_ctl = V_INTR_MASKING_MASK; | 538 | control->int_ctl = V_INTR_MASKING_MASK; |
514 | 539 | ||
@@ -550,13 +575,30 @@ static void init_vmcb(struct vmcb *vmcb) | |||
550 | save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP; | 575 | save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP; |
551 | save->cr4 = X86_CR4_PAE; | 576 | save->cr4 = X86_CR4_PAE; |
552 | /* rdx = ?? */ | 577 | /* rdx = ?? */ |
578 | |||
579 | if (npt_enabled) { | ||
580 | /* Setup VMCB for Nested Paging */ | ||
581 | control->nested_ctl = 1; | ||
582 | control->intercept &= ~(1ULL << INTERCEPT_TASK_SWITCH); | ||
583 | control->intercept_exceptions &= ~(1 << PF_VECTOR); | ||
584 | control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK| | ||
585 | INTERCEPT_CR3_MASK); | ||
586 | control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK| | ||
587 | INTERCEPT_CR3_MASK); | ||
588 | save->g_pat = 0x0007040600070406ULL; | ||
589 | /* enable caching because the QEMU Bios doesn't enable it */ | ||
590 | save->cr0 = X86_CR0_ET; | ||
591 | save->cr3 = 0; | ||
592 | save->cr4 = 0; | ||
593 | } | ||
594 | force_new_asid(&svm->vcpu); | ||
553 | } | 595 | } |
554 | 596 | ||
555 | static int svm_vcpu_reset(struct kvm_vcpu *vcpu) | 597 | static int svm_vcpu_reset(struct kvm_vcpu *vcpu) |
556 | { | 598 | { |
557 | struct vcpu_svm *svm = to_svm(vcpu); | 599 | struct vcpu_svm *svm = to_svm(vcpu); |
558 | 600 | ||
559 | init_vmcb(svm->vmcb); | 601 | init_vmcb(svm); |
560 | 602 | ||
561 | if (vcpu->vcpu_id != 0) { | 603 | if (vcpu->vcpu_id != 0) { |
562 | svm->vmcb->save.rip = 0; | 604 | svm->vmcb->save.rip = 0; |
@@ -571,6 +613,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
571 | { | 613 | { |
572 | struct vcpu_svm *svm; | 614 | struct vcpu_svm *svm; |
573 | struct page *page; | 615 | struct page *page; |
616 | struct page *msrpm_pages; | ||
574 | int err; | 617 | int err; |
575 | 618 | ||
576 | svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); | 619 | svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); |
@@ -589,12 +632,19 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
589 | goto uninit; | 632 | goto uninit; |
590 | } | 633 | } |
591 | 634 | ||
635 | err = -ENOMEM; | ||
636 | msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); | ||
637 | if (!msrpm_pages) | ||
638 | goto uninit; | ||
639 | svm->msrpm = page_address(msrpm_pages); | ||
640 | svm_vcpu_init_msrpm(svm->msrpm); | ||
641 | |||
592 | svm->vmcb = page_address(page); | 642 | svm->vmcb = page_address(page); |
593 | clear_page(svm->vmcb); | 643 | clear_page(svm->vmcb); |
594 | svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; | 644 | svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; |
595 | svm->asid_generation = 0; | 645 | svm->asid_generation = 0; |
596 | memset(svm->db_regs, 0, sizeof(svm->db_regs)); | 646 | memset(svm->db_regs, 0, sizeof(svm->db_regs)); |
597 | init_vmcb(svm->vmcb); | 647 | init_vmcb(svm); |
598 | 648 | ||
599 | fx_init(&svm->vcpu); | 649 | fx_init(&svm->vcpu); |
600 | svm->vcpu.fpu_active = 1; | 650 | svm->vcpu.fpu_active = 1; |
@@ -617,6 +667,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) | |||
617 | struct vcpu_svm *svm = to_svm(vcpu); | 667 | struct vcpu_svm *svm = to_svm(vcpu); |
618 | 668 | ||
619 | __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); | 669 | __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); |
670 | __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); | ||
620 | kvm_vcpu_uninit(vcpu); | 671 | kvm_vcpu_uninit(vcpu); |
621 | kmem_cache_free(kvm_vcpu_cache, svm); | 672 | kmem_cache_free(kvm_vcpu_cache, svm); |
622 | } | 673 | } |
@@ -731,6 +782,13 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, | |||
731 | var->unusable = !var->present; | 782 | var->unusable = !var->present; |
732 | } | 783 | } |
733 | 784 | ||
785 | static int svm_get_cpl(struct kvm_vcpu *vcpu) | ||
786 | { | ||
787 | struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save; | ||
788 | |||
789 | return save->cpl; | ||
790 | } | ||
791 | |||
734 | static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 792 | static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) |
735 | { | 793 | { |
736 | struct vcpu_svm *svm = to_svm(vcpu); | 794 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -784,6 +842,9 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
784 | } | 842 | } |
785 | } | 843 | } |
786 | #endif | 844 | #endif |
845 | if (npt_enabled) | ||
846 | goto set; | ||
847 | |||
787 | if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) { | 848 | if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) { |
788 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); | 849 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); |
789 | vcpu->fpu_active = 1; | 850 | vcpu->fpu_active = 1; |
@@ -791,18 +852,29 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
791 | 852 | ||
792 | vcpu->arch.cr0 = cr0; | 853 | vcpu->arch.cr0 = cr0; |
793 | cr0 |= X86_CR0_PG | X86_CR0_WP; | 854 | cr0 |= X86_CR0_PG | X86_CR0_WP; |
794 | cr0 &= ~(X86_CR0_CD | X86_CR0_NW); | ||
795 | if (!vcpu->fpu_active) { | 855 | if (!vcpu->fpu_active) { |
796 | svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); | 856 | svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); |
797 | cr0 |= X86_CR0_TS; | 857 | cr0 |= X86_CR0_TS; |
798 | } | 858 | } |
859 | set: | ||
860 | /* | ||
861 | * re-enable caching here because the QEMU bios | ||
862 | * does not do it - this results in some delay at | ||
863 | * reboot | ||
864 | */ | ||
865 | cr0 &= ~(X86_CR0_CD | X86_CR0_NW); | ||
799 | svm->vmcb->save.cr0 = cr0; | 866 | svm->vmcb->save.cr0 = cr0; |
800 | } | 867 | } |
801 | 868 | ||
802 | static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 869 | static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
803 | { | 870 | { |
804 | vcpu->arch.cr4 = cr4; | 871 | unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE; |
805 | to_svm(vcpu)->vmcb->save.cr4 = cr4 | X86_CR4_PAE; | 872 | |
873 | vcpu->arch.cr4 = cr4; | ||
874 | if (!npt_enabled) | ||
875 | cr4 |= X86_CR4_PAE; | ||
876 | cr4 |= host_cr4_mce; | ||
877 | to_svm(vcpu)->vmcb->save.cr4 = cr4; | ||
806 | } | 878 | } |
807 | 879 | ||
808 | static void svm_set_segment(struct kvm_vcpu *vcpu, | 880 | static void svm_set_segment(struct kvm_vcpu *vcpu, |
@@ -833,13 +905,6 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, | |||
833 | 905 | ||
834 | } | 906 | } |
835 | 907 | ||
836 | /* FIXME: | ||
837 | |||
838 | svm(vcpu)->vmcb->control.int_ctl &= ~V_TPR_MASK; | ||
839 | svm(vcpu)->vmcb->control.int_ctl |= (sregs->cr8 & V_TPR_MASK); | ||
840 | |||
841 | */ | ||
842 | |||
843 | static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) | 908 | static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) |
844 | { | 909 | { |
845 | return -EOPNOTSUPP; | 910 | return -EOPNOTSUPP; |
@@ -920,7 +985,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, | |||
920 | } | 985 | } |
921 | default: | 986 | default: |
922 | printk(KERN_DEBUG "%s: unexpected dr %u\n", | 987 | printk(KERN_DEBUG "%s: unexpected dr %u\n", |
923 | __FUNCTION__, dr); | 988 | __func__, dr); |
924 | *exception = UD_VECTOR; | 989 | *exception = UD_VECTOR; |
925 | return; | 990 | return; |
926 | } | 991 | } |
@@ -962,6 +1027,19 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
962 | return 1; | 1027 | return 1; |
963 | } | 1028 | } |
964 | 1029 | ||
1030 | static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | ||
1031 | { | ||
1032 | /* | ||
1033 | * On an #MC intercept the MCE handler is not called automatically in | ||
1034 | * the host. So do it by hand here. | ||
1035 | */ | ||
1036 | asm volatile ( | ||
1037 | "int $0x12\n"); | ||
1038 | /* not sure if we ever come back to this point */ | ||
1039 | |||
1040 | return 1; | ||
1041 | } | ||
1042 | |||
965 | static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1043 | static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
966 | { | 1044 | { |
967 | /* | 1045 | /* |
@@ -969,7 +1047,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
969 | * so reinitialize it. | 1047 | * so reinitialize it. |
970 | */ | 1048 | */ |
971 | clear_page(svm->vmcb); | 1049 | clear_page(svm->vmcb); |
972 | init_vmcb(svm->vmcb); | 1050 | init_vmcb(svm); |
973 | 1051 | ||
974 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; | 1052 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; |
975 | return 0; | 1053 | return 0; |
@@ -1033,9 +1111,18 @@ static int invalid_op_interception(struct vcpu_svm *svm, | |||
1033 | static int task_switch_interception(struct vcpu_svm *svm, | 1111 | static int task_switch_interception(struct vcpu_svm *svm, |
1034 | struct kvm_run *kvm_run) | 1112 | struct kvm_run *kvm_run) |
1035 | { | 1113 | { |
1036 | pr_unimpl(&svm->vcpu, "%s: task switch is unsupported\n", __FUNCTION__); | 1114 | u16 tss_selector; |
1037 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 1115 | |
1038 | return 0; | 1116 | tss_selector = (u16)svm->vmcb->control.exit_info_1; |
1117 | if (svm->vmcb->control.exit_info_2 & | ||
1118 | (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET)) | ||
1119 | return kvm_task_switch(&svm->vcpu, tss_selector, | ||
1120 | TASK_SWITCH_IRET); | ||
1121 | if (svm->vmcb->control.exit_info_2 & | ||
1122 | (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP)) | ||
1123 | return kvm_task_switch(&svm->vcpu, tss_selector, | ||
1124 | TASK_SWITCH_JMP); | ||
1125 | return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL); | ||
1039 | } | 1126 | } |
1040 | 1127 | ||
1041 | static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1128 | static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
@@ -1049,7 +1136,7 @@ static int emulate_on_interception(struct vcpu_svm *svm, | |||
1049 | struct kvm_run *kvm_run) | 1136 | struct kvm_run *kvm_run) |
1050 | { | 1137 | { |
1051 | if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE) | 1138 | if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE) |
1052 | pr_unimpl(&svm->vcpu, "%s: failed\n", __FUNCTION__); | 1139 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); |
1053 | return 1; | 1140 | return 1; |
1054 | } | 1141 | } |
1055 | 1142 | ||
@@ -1179,8 +1266,19 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
1179 | svm->vmcb->save.sysenter_esp = data; | 1266 | svm->vmcb->save.sysenter_esp = data; |
1180 | break; | 1267 | break; |
1181 | case MSR_IA32_DEBUGCTLMSR: | 1268 | case MSR_IA32_DEBUGCTLMSR: |
1182 | pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", | 1269 | if (!svm_has(SVM_FEATURE_LBRV)) { |
1183 | __FUNCTION__, data); | 1270 | pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", |
1271 | __func__, data); | ||
1272 | break; | ||
1273 | } | ||
1274 | if (data & DEBUGCTL_RESERVED_BITS) | ||
1275 | return 1; | ||
1276 | |||
1277 | svm->vmcb->save.dbgctl = data; | ||
1278 | if (data & (1ULL<<0)) | ||
1279 | svm_enable_lbrv(svm); | ||
1280 | else | ||
1281 | svm_disable_lbrv(svm); | ||
1184 | break; | 1282 | break; |
1185 | case MSR_K7_EVNTSEL0: | 1283 | case MSR_K7_EVNTSEL0: |
1186 | case MSR_K7_EVNTSEL1: | 1284 | case MSR_K7_EVNTSEL1: |
@@ -1265,6 +1363,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | |||
1265 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, | 1363 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, |
1266 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, | 1364 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, |
1267 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, | 1365 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, |
1366 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, | ||
1268 | [SVM_EXIT_INTR] = nop_on_interception, | 1367 | [SVM_EXIT_INTR] = nop_on_interception, |
1269 | [SVM_EXIT_NMI] = nop_on_interception, | 1368 | [SVM_EXIT_NMI] = nop_on_interception, |
1270 | [SVM_EXIT_SMI] = nop_on_interception, | 1369 | [SVM_EXIT_SMI] = nop_on_interception, |
@@ -1290,14 +1389,34 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | |||
1290 | [SVM_EXIT_WBINVD] = emulate_on_interception, | 1389 | [SVM_EXIT_WBINVD] = emulate_on_interception, |
1291 | [SVM_EXIT_MONITOR] = invalid_op_interception, | 1390 | [SVM_EXIT_MONITOR] = invalid_op_interception, |
1292 | [SVM_EXIT_MWAIT] = invalid_op_interception, | 1391 | [SVM_EXIT_MWAIT] = invalid_op_interception, |
1392 | [SVM_EXIT_NPF] = pf_interception, | ||
1293 | }; | 1393 | }; |
1294 | 1394 | ||
1295 | |||
1296 | static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | 1395 | static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) |
1297 | { | 1396 | { |
1298 | struct vcpu_svm *svm = to_svm(vcpu); | 1397 | struct vcpu_svm *svm = to_svm(vcpu); |
1299 | u32 exit_code = svm->vmcb->control.exit_code; | 1398 | u32 exit_code = svm->vmcb->control.exit_code; |
1300 | 1399 | ||
1400 | if (npt_enabled) { | ||
1401 | int mmu_reload = 0; | ||
1402 | if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) { | ||
1403 | svm_set_cr0(vcpu, svm->vmcb->save.cr0); | ||
1404 | mmu_reload = 1; | ||
1405 | } | ||
1406 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | ||
1407 | vcpu->arch.cr3 = svm->vmcb->save.cr3; | ||
1408 | if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { | ||
1409 | if (!load_pdptrs(vcpu, vcpu->arch.cr3)) { | ||
1410 | kvm_inject_gp(vcpu, 0); | ||
1411 | return 1; | ||
1412 | } | ||
1413 | } | ||
1414 | if (mmu_reload) { | ||
1415 | kvm_mmu_reset_context(vcpu); | ||
1416 | kvm_mmu_load(vcpu); | ||
1417 | } | ||
1418 | } | ||
1419 | |||
1301 | kvm_reput_irq(svm); | 1420 | kvm_reput_irq(svm); |
1302 | 1421 | ||
1303 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { | 1422 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { |
@@ -1308,10 +1427,11 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
1308 | } | 1427 | } |
1309 | 1428 | ||
1310 | if (is_external_interrupt(svm->vmcb->control.exit_int_info) && | 1429 | if (is_external_interrupt(svm->vmcb->control.exit_int_info) && |
1311 | exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR) | 1430 | exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && |
1431 | exit_code != SVM_EXIT_NPF) | ||
1312 | printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " | 1432 | printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " |
1313 | "exit_code 0x%x\n", | 1433 | "exit_code 0x%x\n", |
1314 | __FUNCTION__, svm->vmcb->control.exit_int_info, | 1434 | __func__, svm->vmcb->control.exit_int_info, |
1315 | exit_code); | 1435 | exit_code); |
1316 | 1436 | ||
1317 | if (exit_code >= ARRAY_SIZE(svm_exit_handlers) | 1437 | if (exit_code >= ARRAY_SIZE(svm_exit_handlers) |
@@ -1364,6 +1484,27 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq) | |||
1364 | svm_inject_irq(svm, irq); | 1484 | svm_inject_irq(svm, irq); |
1365 | } | 1485 | } |
1366 | 1486 | ||
1487 | static void update_cr8_intercept(struct kvm_vcpu *vcpu) | ||
1488 | { | ||
1489 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1490 | struct vmcb *vmcb = svm->vmcb; | ||
1491 | int max_irr, tpr; | ||
1492 | |||
1493 | if (!irqchip_in_kernel(vcpu->kvm) || vcpu->arch.apic->vapic_addr) | ||
1494 | return; | ||
1495 | |||
1496 | vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; | ||
1497 | |||
1498 | max_irr = kvm_lapic_find_highest_irr(vcpu); | ||
1499 | if (max_irr == -1) | ||
1500 | return; | ||
1501 | |||
1502 | tpr = kvm_lapic_get_cr8(vcpu) << 4; | ||
1503 | |||
1504 | if (tpr >= (max_irr & 0xf0)) | ||
1505 | vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK; | ||
1506 | } | ||
1507 | |||
1367 | static void svm_intr_assist(struct kvm_vcpu *vcpu) | 1508 | static void svm_intr_assist(struct kvm_vcpu *vcpu) |
1368 | { | 1509 | { |
1369 | struct vcpu_svm *svm = to_svm(vcpu); | 1510 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -1376,14 +1517,14 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu) | |||
1376 | SVM_EVTINJ_VEC_MASK; | 1517 | SVM_EVTINJ_VEC_MASK; |
1377 | vmcb->control.exit_int_info = 0; | 1518 | vmcb->control.exit_int_info = 0; |
1378 | svm_inject_irq(svm, intr_vector); | 1519 | svm_inject_irq(svm, intr_vector); |
1379 | return; | 1520 | goto out; |
1380 | } | 1521 | } |
1381 | 1522 | ||
1382 | if (vmcb->control.int_ctl & V_IRQ_MASK) | 1523 | if (vmcb->control.int_ctl & V_IRQ_MASK) |
1383 | return; | 1524 | goto out; |
1384 | 1525 | ||
1385 | if (!kvm_cpu_has_interrupt(vcpu)) | 1526 | if (!kvm_cpu_has_interrupt(vcpu)) |
1386 | return; | 1527 | goto out; |
1387 | 1528 | ||
1388 | if (!(vmcb->save.rflags & X86_EFLAGS_IF) || | 1529 | if (!(vmcb->save.rflags & X86_EFLAGS_IF) || |
1389 | (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || | 1530 | (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || |
@@ -1391,12 +1532,14 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu) | |||
1391 | /* unable to deliver irq, set pending irq */ | 1532 | /* unable to deliver irq, set pending irq */ |
1392 | vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR); | 1533 | vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR); |
1393 | svm_inject_irq(svm, 0x0); | 1534 | svm_inject_irq(svm, 0x0); |
1394 | return; | 1535 | goto out; |
1395 | } | 1536 | } |
1396 | /* Okay, we can deliver the interrupt: grab it and update PIC state. */ | 1537 | /* Okay, we can deliver the interrupt: grab it and update PIC state. */ |
1397 | intr_vector = kvm_cpu_get_interrupt(vcpu); | 1538 | intr_vector = kvm_cpu_get_interrupt(vcpu); |
1398 | svm_inject_irq(svm, intr_vector); | 1539 | svm_inject_irq(svm, intr_vector); |
1399 | kvm_timer_intr_post(vcpu, intr_vector); | 1540 | kvm_timer_intr_post(vcpu, intr_vector); |
1541 | out: | ||
1542 | update_cr8_intercept(vcpu); | ||
1400 | } | 1543 | } |
1401 | 1544 | ||
1402 | static void kvm_reput_irq(struct vcpu_svm *svm) | 1545 | static void kvm_reput_irq(struct vcpu_svm *svm) |
@@ -1482,6 +1625,29 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) | |||
1482 | { | 1625 | { |
1483 | } | 1626 | } |
1484 | 1627 | ||
1628 | static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) | ||
1629 | { | ||
1630 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1631 | |||
1632 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { | ||
1633 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; | ||
1634 | kvm_lapic_set_tpr(vcpu, cr8); | ||
1635 | } | ||
1636 | } | ||
1637 | |||
1638 | static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) | ||
1639 | { | ||
1640 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1641 | u64 cr8; | ||
1642 | |||
1643 | if (!irqchip_in_kernel(vcpu->kvm)) | ||
1644 | return; | ||
1645 | |||
1646 | cr8 = kvm_get_cr8(vcpu); | ||
1647 | svm->vmcb->control.int_ctl &= ~V_TPR_MASK; | ||
1648 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; | ||
1649 | } | ||
1650 | |||
1485 | static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1651 | static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
1486 | { | 1652 | { |
1487 | struct vcpu_svm *svm = to_svm(vcpu); | 1653 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -1491,6 +1657,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1491 | 1657 | ||
1492 | pre_svm_run(svm); | 1658 | pre_svm_run(svm); |
1493 | 1659 | ||
1660 | sync_lapic_to_cr8(vcpu); | ||
1661 | |||
1494 | save_host_msrs(vcpu); | 1662 | save_host_msrs(vcpu); |
1495 | fs_selector = read_fs(); | 1663 | fs_selector = read_fs(); |
1496 | gs_selector = read_gs(); | 1664 | gs_selector = read_gs(); |
@@ -1499,6 +1667,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1499 | svm->host_dr6 = read_dr6(); | 1667 | svm->host_dr6 = read_dr6(); |
1500 | svm->host_dr7 = read_dr7(); | 1668 | svm->host_dr7 = read_dr7(); |
1501 | svm->vmcb->save.cr2 = vcpu->arch.cr2; | 1669 | svm->vmcb->save.cr2 = vcpu->arch.cr2; |
1670 | /* required for live migration with NPT */ | ||
1671 | if (npt_enabled) | ||
1672 | svm->vmcb->save.cr3 = vcpu->arch.cr3; | ||
1502 | 1673 | ||
1503 | if (svm->vmcb->save.dr7 & 0xff) { | 1674 | if (svm->vmcb->save.dr7 & 0xff) { |
1504 | write_dr7(0); | 1675 | write_dr7(0); |
@@ -1635,6 +1806,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1635 | 1806 | ||
1636 | stgi(); | 1807 | stgi(); |
1637 | 1808 | ||
1809 | sync_cr8_to_lapic(vcpu); | ||
1810 | |||
1638 | svm->next_rip = 0; | 1811 | svm->next_rip = 0; |
1639 | } | 1812 | } |
1640 | 1813 | ||
@@ -1642,6 +1815,12 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) | |||
1642 | { | 1815 | { |
1643 | struct vcpu_svm *svm = to_svm(vcpu); | 1816 | struct vcpu_svm *svm = to_svm(vcpu); |
1644 | 1817 | ||
1818 | if (npt_enabled) { | ||
1819 | svm->vmcb->control.nested_cr3 = root; | ||
1820 | force_new_asid(vcpu); | ||
1821 | return; | ||
1822 | } | ||
1823 | |||
1645 | svm->vmcb->save.cr3 = root; | 1824 | svm->vmcb->save.cr3 = root; |
1646 | force_new_asid(vcpu); | 1825 | force_new_asid(vcpu); |
1647 | 1826 | ||
@@ -1709,6 +1888,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
1709 | .get_segment_base = svm_get_segment_base, | 1888 | .get_segment_base = svm_get_segment_base, |
1710 | .get_segment = svm_get_segment, | 1889 | .get_segment = svm_get_segment, |
1711 | .set_segment = svm_set_segment, | 1890 | .set_segment = svm_set_segment, |
1891 | .get_cpl = svm_get_cpl, | ||
1712 | .get_cs_db_l_bits = kvm_get_cs_db_l_bits, | 1892 | .get_cs_db_l_bits = kvm_get_cs_db_l_bits, |
1713 | .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, | 1893 | .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, |
1714 | .set_cr0 = svm_set_cr0, | 1894 | .set_cr0 = svm_set_cr0, |
diff --git a/arch/x86/kvm/svm.h b/arch/x86/kvm/svm.h index 5fd50491b555..1b8afa78e869 100644 --- a/arch/x86/kvm/svm.h +++ b/arch/x86/kvm/svm.h | |||
@@ -238,6 +238,9 @@ struct __attribute__ ((__packed__)) vmcb { | |||
238 | #define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID | 238 | #define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID |
239 | #define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR | 239 | #define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR |
240 | 240 | ||
241 | #define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 | ||
242 | #define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 | ||
243 | |||
241 | #define SVM_EXIT_READ_CR0 0x000 | 244 | #define SVM_EXIT_READ_CR0 0x000 |
242 | #define SVM_EXIT_READ_CR3 0x003 | 245 | #define SVM_EXIT_READ_CR3 0x003 |
243 | #define SVM_EXIT_READ_CR4 0x004 | 246 | #define SVM_EXIT_READ_CR4 0x004 |
diff --git a/arch/x86/kvm/tss.h b/arch/x86/kvm/tss.h new file mode 100644 index 000000000000..622aa10f692f --- /dev/null +++ b/arch/x86/kvm/tss.h | |||
@@ -0,0 +1,59 @@ | |||
1 | #ifndef __TSS_SEGMENT_H | ||
2 | #define __TSS_SEGMENT_H | ||
3 | |||
4 | struct tss_segment_32 { | ||
5 | u32 prev_task_link; | ||
6 | u32 esp0; | ||
7 | u32 ss0; | ||
8 | u32 esp1; | ||
9 | u32 ss1; | ||
10 | u32 esp2; | ||
11 | u32 ss2; | ||
12 | u32 cr3; | ||
13 | u32 eip; | ||
14 | u32 eflags; | ||
15 | u32 eax; | ||
16 | u32 ecx; | ||
17 | u32 edx; | ||
18 | u32 ebx; | ||
19 | u32 esp; | ||
20 | u32 ebp; | ||
21 | u32 esi; | ||
22 | u32 edi; | ||
23 | u32 es; | ||
24 | u32 cs; | ||
25 | u32 ss; | ||
26 | u32 ds; | ||
27 | u32 fs; | ||
28 | u32 gs; | ||
29 | u32 ldt_selector; | ||
30 | u16 t; | ||
31 | u16 io_map; | ||
32 | }; | ||
33 | |||
34 | struct tss_segment_16 { | ||
35 | u16 prev_task_link; | ||
36 | u16 sp0; | ||
37 | u16 ss0; | ||
38 | u16 sp1; | ||
39 | u16 ss1; | ||
40 | u16 sp2; | ||
41 | u16 ss2; | ||
42 | u16 ip; | ||
43 | u16 flag; | ||
44 | u16 ax; | ||
45 | u16 cx; | ||
46 | u16 dx; | ||
47 | u16 bx; | ||
48 | u16 sp; | ||
49 | u16 bp; | ||
50 | u16 si; | ||
51 | u16 di; | ||
52 | u16 es; | ||
53 | u16 cs; | ||
54 | u16 ss; | ||
55 | u16 ds; | ||
56 | u16 ldt; | ||
57 | }; | ||
58 | |||
59 | #endif | ||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8e1462880d1f..8e5d6645b90d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -17,7 +17,6 @@ | |||
17 | 17 | ||
18 | #include "irq.h" | 18 | #include "irq.h" |
19 | #include "vmx.h" | 19 | #include "vmx.h" |
20 | #include "segment_descriptor.h" | ||
21 | #include "mmu.h" | 20 | #include "mmu.h" |
22 | 21 | ||
23 | #include <linux/kvm_host.h> | 22 | #include <linux/kvm_host.h> |
@@ -37,6 +36,12 @@ MODULE_LICENSE("GPL"); | |||
37 | static int bypass_guest_pf = 1; | 36 | static int bypass_guest_pf = 1; |
38 | module_param(bypass_guest_pf, bool, 0); | 37 | module_param(bypass_guest_pf, bool, 0); |
39 | 38 | ||
39 | static int enable_vpid = 1; | ||
40 | module_param(enable_vpid, bool, 0); | ||
41 | |||
42 | static int flexpriority_enabled = 1; | ||
43 | module_param(flexpriority_enabled, bool, 0); | ||
44 | |||
40 | struct vmcs { | 45 | struct vmcs { |
41 | u32 revision_id; | 46 | u32 revision_id; |
42 | u32 abort; | 47 | u32 abort; |
@@ -71,6 +76,7 @@ struct vcpu_vmx { | |||
71 | unsigned rip; | 76 | unsigned rip; |
72 | } irq; | 77 | } irq; |
73 | } rmode; | 78 | } rmode; |
79 | int vpid; | ||
74 | }; | 80 | }; |
75 | 81 | ||
76 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | 82 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) |
@@ -85,6 +91,10 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | |||
85 | 91 | ||
86 | static struct page *vmx_io_bitmap_a; | 92 | static struct page *vmx_io_bitmap_a; |
87 | static struct page *vmx_io_bitmap_b; | 93 | static struct page *vmx_io_bitmap_b; |
94 | static struct page *vmx_msr_bitmap; | ||
95 | |||
96 | static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); | ||
97 | static DEFINE_SPINLOCK(vmx_vpid_lock); | ||
88 | 98 | ||
89 | static struct vmcs_config { | 99 | static struct vmcs_config { |
90 | int size; | 100 | int size; |
@@ -176,6 +186,11 @@ static inline int is_external_interrupt(u32 intr_info) | |||
176 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); | 186 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); |
177 | } | 187 | } |
178 | 188 | ||
189 | static inline int cpu_has_vmx_msr_bitmap(void) | ||
190 | { | ||
191 | return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS); | ||
192 | } | ||
193 | |||
179 | static inline int cpu_has_vmx_tpr_shadow(void) | 194 | static inline int cpu_has_vmx_tpr_shadow(void) |
180 | { | 195 | { |
181 | return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW); | 196 | return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW); |
@@ -194,8 +209,9 @@ static inline int cpu_has_secondary_exec_ctrls(void) | |||
194 | 209 | ||
195 | static inline bool cpu_has_vmx_virtualize_apic_accesses(void) | 210 | static inline bool cpu_has_vmx_virtualize_apic_accesses(void) |
196 | { | 211 | { |
197 | return (vmcs_config.cpu_based_2nd_exec_ctrl & | 212 | return flexpriority_enabled |
198 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); | 213 | && (vmcs_config.cpu_based_2nd_exec_ctrl & |
214 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); | ||
199 | } | 215 | } |
200 | 216 | ||
201 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 217 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) |
@@ -204,6 +220,12 @@ static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | |||
204 | (irqchip_in_kernel(kvm))); | 220 | (irqchip_in_kernel(kvm))); |
205 | } | 221 | } |
206 | 222 | ||
223 | static inline int cpu_has_vmx_vpid(void) | ||
224 | { | ||
225 | return (vmcs_config.cpu_based_2nd_exec_ctrl & | ||
226 | SECONDARY_EXEC_ENABLE_VPID); | ||
227 | } | ||
228 | |||
207 | static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) | 229 | static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) |
208 | { | 230 | { |
209 | int i; | 231 | int i; |
@@ -214,6 +236,20 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) | |||
214 | return -1; | 236 | return -1; |
215 | } | 237 | } |
216 | 238 | ||
239 | static inline void __invvpid(int ext, u16 vpid, gva_t gva) | ||
240 | { | ||
241 | struct { | ||
242 | u64 vpid : 16; | ||
243 | u64 rsvd : 48; | ||
244 | u64 gva; | ||
245 | } operand = { vpid, 0, gva }; | ||
246 | |||
247 | asm volatile (ASM_VMX_INVVPID | ||
248 | /* CF==1 or ZF==1 --> rc = -1 */ | ||
249 | "; ja 1f ; ud2 ; 1:" | ||
250 | : : "a"(&operand), "c"(ext) : "cc", "memory"); | ||
251 | } | ||
252 | |||
217 | static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) | 253 | static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) |
218 | { | 254 | { |
219 | int i; | 255 | int i; |
@@ -257,6 +293,14 @@ static void vcpu_clear(struct vcpu_vmx *vmx) | |||
257 | vmx->launched = 0; | 293 | vmx->launched = 0; |
258 | } | 294 | } |
259 | 295 | ||
296 | static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) | ||
297 | { | ||
298 | if (vmx->vpid == 0) | ||
299 | return; | ||
300 | |||
301 | __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); | ||
302 | } | ||
303 | |||
260 | static unsigned long vmcs_readl(unsigned long field) | 304 | static unsigned long vmcs_readl(unsigned long field) |
261 | { | 305 | { |
262 | unsigned long value; | 306 | unsigned long value; |
@@ -353,7 +397,7 @@ static void reload_tss(void) | |||
353 | * VT restores TR but not its size. Useless. | 397 | * VT restores TR but not its size. Useless. |
354 | */ | 398 | */ |
355 | struct descriptor_table gdt; | 399 | struct descriptor_table gdt; |
356 | struct segment_descriptor *descs; | 400 | struct desc_struct *descs; |
357 | 401 | ||
358 | get_gdt(&gdt); | 402 | get_gdt(&gdt); |
359 | descs = (void *)gdt.base; | 403 | descs = (void *)gdt.base; |
@@ -485,11 +529,12 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
485 | { | 529 | { |
486 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 530 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
487 | u64 phys_addr = __pa(vmx->vmcs); | 531 | u64 phys_addr = __pa(vmx->vmcs); |
488 | u64 tsc_this, delta; | 532 | u64 tsc_this, delta, new_offset; |
489 | 533 | ||
490 | if (vcpu->cpu != cpu) { | 534 | if (vcpu->cpu != cpu) { |
491 | vcpu_clear(vmx); | 535 | vcpu_clear(vmx); |
492 | kvm_migrate_apic_timer(vcpu); | 536 | kvm_migrate_apic_timer(vcpu); |
537 | vpid_sync_vcpu_all(vmx); | ||
493 | } | 538 | } |
494 | 539 | ||
495 | if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { | 540 | if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { |
@@ -524,8 +569,11 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
524 | * Make sure the time stamp counter is monotonous. | 569 | * Make sure the time stamp counter is monotonous. |
525 | */ | 570 | */ |
526 | rdtscll(tsc_this); | 571 | rdtscll(tsc_this); |
527 | delta = vcpu->arch.host_tsc - tsc_this; | 572 | if (tsc_this < vcpu->arch.host_tsc) { |
528 | vmcs_write64(TSC_OFFSET, vmcs_read64(TSC_OFFSET) + delta); | 573 | delta = vcpu->arch.host_tsc - tsc_this; |
574 | new_offset = vmcs_read64(TSC_OFFSET) + delta; | ||
575 | vmcs_write64(TSC_OFFSET, new_offset); | ||
576 | } | ||
529 | } | 577 | } |
530 | } | 578 | } |
531 | 579 | ||
@@ -596,7 +644,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
596 | { | 644 | { |
597 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 645 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
598 | nr | INTR_TYPE_EXCEPTION | 646 | nr | INTR_TYPE_EXCEPTION |
599 | | (has_error_code ? INTR_INFO_DELIEVER_CODE_MASK : 0) | 647 | | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) |
600 | | INTR_INFO_VALID_MASK); | 648 | | INTR_INFO_VALID_MASK); |
601 | if (has_error_code) | 649 | if (has_error_code) |
602 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); | 650 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); |
@@ -959,6 +1007,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
959 | CPU_BASED_MOV_DR_EXITING | | 1007 | CPU_BASED_MOV_DR_EXITING | |
960 | CPU_BASED_USE_TSC_OFFSETING; | 1008 | CPU_BASED_USE_TSC_OFFSETING; |
961 | opt = CPU_BASED_TPR_SHADOW | | 1009 | opt = CPU_BASED_TPR_SHADOW | |
1010 | CPU_BASED_USE_MSR_BITMAPS | | ||
962 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 1011 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
963 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, | 1012 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, |
964 | &_cpu_based_exec_control) < 0) | 1013 | &_cpu_based_exec_control) < 0) |
@@ -971,7 +1020,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
971 | if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { | 1020 | if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { |
972 | min = 0; | 1021 | min = 0; |
973 | opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 1022 | opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
974 | SECONDARY_EXEC_WBINVD_EXITING; | 1023 | SECONDARY_EXEC_WBINVD_EXITING | |
1024 | SECONDARY_EXEC_ENABLE_VPID; | ||
975 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2, | 1025 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2, |
976 | &_cpu_based_2nd_exec_control) < 0) | 1026 | &_cpu_based_2nd_exec_control) < 0) |
977 | return -EIO; | 1027 | return -EIO; |
@@ -1080,6 +1130,10 @@ static __init int hardware_setup(void) | |||
1080 | { | 1130 | { |
1081 | if (setup_vmcs_config(&vmcs_config) < 0) | 1131 | if (setup_vmcs_config(&vmcs_config) < 0) |
1082 | return -EIO; | 1132 | return -EIO; |
1133 | |||
1134 | if (boot_cpu_has(X86_FEATURE_NX)) | ||
1135 | kvm_enable_efer_bits(EFER_NX); | ||
1136 | |||
1083 | return alloc_kvm_area(); | 1137 | return alloc_kvm_area(); |
1084 | } | 1138 | } |
1085 | 1139 | ||
@@ -1214,7 +1268,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu) | |||
1214 | guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); | 1268 | guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); |
1215 | if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { | 1269 | if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { |
1216 | printk(KERN_DEBUG "%s: tss fixup for long mode. \n", | 1270 | printk(KERN_DEBUG "%s: tss fixup for long mode. \n", |
1217 | __FUNCTION__); | 1271 | __func__); |
1218 | vmcs_write32(GUEST_TR_AR_BYTES, | 1272 | vmcs_write32(GUEST_TR_AR_BYTES, |
1219 | (guest_tr_ar & ~AR_TYPE_MASK) | 1273 | (guest_tr_ar & ~AR_TYPE_MASK) |
1220 | | AR_TYPE_BUSY_64_TSS); | 1274 | | AR_TYPE_BUSY_64_TSS); |
@@ -1239,6 +1293,11 @@ static void exit_lmode(struct kvm_vcpu *vcpu) | |||
1239 | 1293 | ||
1240 | #endif | 1294 | #endif |
1241 | 1295 | ||
1296 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | ||
1297 | { | ||
1298 | vpid_sync_vcpu_all(to_vmx(vcpu)); | ||
1299 | } | ||
1300 | |||
1242 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | 1301 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) |
1243 | { | 1302 | { |
1244 | vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK; | 1303 | vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK; |
@@ -1275,6 +1334,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1275 | 1334 | ||
1276 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | 1335 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) |
1277 | { | 1336 | { |
1337 | vmx_flush_tlb(vcpu); | ||
1278 | vmcs_writel(GUEST_CR3, cr3); | 1338 | vmcs_writel(GUEST_CR3, cr3); |
1279 | if (vcpu->arch.cr0 & X86_CR0_PE) | 1339 | if (vcpu->arch.cr0 & X86_CR0_PE) |
1280 | vmx_fpu_deactivate(vcpu); | 1340 | vmx_fpu_deactivate(vcpu); |
@@ -1288,14 +1348,14 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
1288 | vcpu->arch.cr4 = cr4; | 1348 | vcpu->arch.cr4 = cr4; |
1289 | } | 1349 | } |
1290 | 1350 | ||
1291 | #ifdef CONFIG_X86_64 | ||
1292 | |||
1293 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | 1351 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) |
1294 | { | 1352 | { |
1295 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1353 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1296 | struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); | 1354 | struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); |
1297 | 1355 | ||
1298 | vcpu->arch.shadow_efer = efer; | 1356 | vcpu->arch.shadow_efer = efer; |
1357 | if (!msr) | ||
1358 | return; | ||
1299 | if (efer & EFER_LMA) { | 1359 | if (efer & EFER_LMA) { |
1300 | vmcs_write32(VM_ENTRY_CONTROLS, | 1360 | vmcs_write32(VM_ENTRY_CONTROLS, |
1301 | vmcs_read32(VM_ENTRY_CONTROLS) | | 1361 | vmcs_read32(VM_ENTRY_CONTROLS) | |
@@ -1312,8 +1372,6 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
1312 | setup_msrs(vmx); | 1372 | setup_msrs(vmx); |
1313 | } | 1373 | } |
1314 | 1374 | ||
1315 | #endif | ||
1316 | |||
1317 | static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) | 1375 | static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) |
1318 | { | 1376 | { |
1319 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 1377 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
@@ -1344,6 +1402,20 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
1344 | var->unusable = (ar >> 16) & 1; | 1402 | var->unusable = (ar >> 16) & 1; |
1345 | } | 1403 | } |
1346 | 1404 | ||
1405 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) | ||
1406 | { | ||
1407 | struct kvm_segment kvm_seg; | ||
1408 | |||
1409 | if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */ | ||
1410 | return 0; | ||
1411 | |||
1412 | if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ | ||
1413 | return 3; | ||
1414 | |||
1415 | vmx_get_segment(vcpu, &kvm_seg, VCPU_SREG_CS); | ||
1416 | return kvm_seg.selector & 3; | ||
1417 | } | ||
1418 | |||
1347 | static u32 vmx_segment_access_rights(struct kvm_segment *var) | 1419 | static u32 vmx_segment_access_rights(struct kvm_segment *var) |
1348 | { | 1420 | { |
1349 | u32 ar; | 1421 | u32 ar; |
@@ -1433,7 +1505,6 @@ static int init_rmode_tss(struct kvm *kvm) | |||
1433 | int ret = 0; | 1505 | int ret = 0; |
1434 | int r; | 1506 | int r; |
1435 | 1507 | ||
1436 | down_read(&kvm->slots_lock); | ||
1437 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); | 1508 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); |
1438 | if (r < 0) | 1509 | if (r < 0) |
1439 | goto out; | 1510 | goto out; |
@@ -1456,7 +1527,6 @@ static int init_rmode_tss(struct kvm *kvm) | |||
1456 | 1527 | ||
1457 | ret = 1; | 1528 | ret = 1; |
1458 | out: | 1529 | out: |
1459 | up_read(&kvm->slots_lock); | ||
1460 | return ret; | 1530 | return ret; |
1461 | } | 1531 | } |
1462 | 1532 | ||
@@ -1494,6 +1564,46 @@ out: | |||
1494 | return r; | 1564 | return r; |
1495 | } | 1565 | } |
1496 | 1566 | ||
1567 | static void allocate_vpid(struct vcpu_vmx *vmx) | ||
1568 | { | ||
1569 | int vpid; | ||
1570 | |||
1571 | vmx->vpid = 0; | ||
1572 | if (!enable_vpid || !cpu_has_vmx_vpid()) | ||
1573 | return; | ||
1574 | spin_lock(&vmx_vpid_lock); | ||
1575 | vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); | ||
1576 | if (vpid < VMX_NR_VPIDS) { | ||
1577 | vmx->vpid = vpid; | ||
1578 | __set_bit(vpid, vmx_vpid_bitmap); | ||
1579 | } | ||
1580 | spin_unlock(&vmx_vpid_lock); | ||
1581 | } | ||
1582 | |||
1583 | void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) | ||
1584 | { | ||
1585 | void *va; | ||
1586 | |||
1587 | if (!cpu_has_vmx_msr_bitmap()) | ||
1588 | return; | ||
1589 | |||
1590 | /* | ||
1591 | * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals | ||
1592 | * have the write-low and read-high bitmap offsets the wrong way round. | ||
1593 | * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. | ||
1594 | */ | ||
1595 | va = kmap(msr_bitmap); | ||
1596 | if (msr <= 0x1fff) { | ||
1597 | __clear_bit(msr, va + 0x000); /* read-low */ | ||
1598 | __clear_bit(msr, va + 0x800); /* write-low */ | ||
1599 | } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { | ||
1600 | msr &= 0x1fff; | ||
1601 | __clear_bit(msr, va + 0x400); /* read-high */ | ||
1602 | __clear_bit(msr, va + 0xc00); /* write-high */ | ||
1603 | } | ||
1604 | kunmap(msr_bitmap); | ||
1605 | } | ||
1606 | |||
1497 | /* | 1607 | /* |
1498 | * Sets up the vmcs for emulated real mode. | 1608 | * Sets up the vmcs for emulated real mode. |
1499 | */ | 1609 | */ |
@@ -1511,6 +1621,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
1511 | vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a)); | 1621 | vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a)); |
1512 | vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b)); | 1622 | vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b)); |
1513 | 1623 | ||
1624 | if (cpu_has_vmx_msr_bitmap()) | ||
1625 | vmcs_write64(MSR_BITMAP, page_to_phys(vmx_msr_bitmap)); | ||
1626 | |||
1514 | vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ | 1627 | vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ |
1515 | 1628 | ||
1516 | /* Control */ | 1629 | /* Control */ |
@@ -1532,6 +1645,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
1532 | if (!vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) | 1645 | if (!vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) |
1533 | exec_control &= | 1646 | exec_control &= |
1534 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 1647 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
1648 | if (vmx->vpid == 0) | ||
1649 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; | ||
1535 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 1650 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
1536 | } | 1651 | } |
1537 | 1652 | ||
@@ -1613,6 +1728,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
1613 | u64 msr; | 1728 | u64 msr; |
1614 | int ret; | 1729 | int ret; |
1615 | 1730 | ||
1731 | down_read(&vcpu->kvm->slots_lock); | ||
1616 | if (!init_rmode_tss(vmx->vcpu.kvm)) { | 1732 | if (!init_rmode_tss(vmx->vcpu.kvm)) { |
1617 | ret = -ENOMEM; | 1733 | ret = -ENOMEM; |
1618 | goto out; | 1734 | goto out; |
@@ -1621,7 +1737,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
1621 | vmx->vcpu.arch.rmode.active = 0; | 1737 | vmx->vcpu.arch.rmode.active = 0; |
1622 | 1738 | ||
1623 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); | 1739 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); |
1624 | set_cr8(&vmx->vcpu, 0); | 1740 | kvm_set_cr8(&vmx->vcpu, 0); |
1625 | msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; | 1741 | msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; |
1626 | if (vmx->vcpu.vcpu_id == 0) | 1742 | if (vmx->vcpu.vcpu_id == 0) |
1627 | msr |= MSR_IA32_APICBASE_BSP; | 1743 | msr |= MSR_IA32_APICBASE_BSP; |
@@ -1704,18 +1820,22 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
1704 | vmcs_write64(APIC_ACCESS_ADDR, | 1820 | vmcs_write64(APIC_ACCESS_ADDR, |
1705 | page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); | 1821 | page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); |
1706 | 1822 | ||
1823 | if (vmx->vpid != 0) | ||
1824 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); | ||
1825 | |||
1707 | vmx->vcpu.arch.cr0 = 0x60000010; | 1826 | vmx->vcpu.arch.cr0 = 0x60000010; |
1708 | vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ | 1827 | vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ |
1709 | vmx_set_cr4(&vmx->vcpu, 0); | 1828 | vmx_set_cr4(&vmx->vcpu, 0); |
1710 | #ifdef CONFIG_X86_64 | ||
1711 | vmx_set_efer(&vmx->vcpu, 0); | 1829 | vmx_set_efer(&vmx->vcpu, 0); |
1712 | #endif | ||
1713 | vmx_fpu_activate(&vmx->vcpu); | 1830 | vmx_fpu_activate(&vmx->vcpu); |
1714 | update_exception_bitmap(&vmx->vcpu); | 1831 | update_exception_bitmap(&vmx->vcpu); |
1715 | 1832 | ||
1716 | return 0; | 1833 | vpid_sync_vcpu_all(vmx); |
1834 | |||
1835 | ret = 0; | ||
1717 | 1836 | ||
1718 | out: | 1837 | out: |
1838 | up_read(&vcpu->kvm->slots_lock); | ||
1719 | return ret; | 1839 | return ret; |
1720 | } | 1840 | } |
1721 | 1841 | ||
@@ -1723,6 +1843,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) | |||
1723 | { | 1843 | { |
1724 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1844 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1725 | 1845 | ||
1846 | KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler); | ||
1847 | |||
1726 | if (vcpu->arch.rmode.active) { | 1848 | if (vcpu->arch.rmode.active) { |
1727 | vmx->rmode.irq.pending = true; | 1849 | vmx->rmode.irq.pending = true; |
1728 | vmx->rmode.irq.vector = irq; | 1850 | vmx->rmode.irq.vector = irq; |
@@ -1844,7 +1966,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1844 | if ((vect_info & VECTORING_INFO_VALID_MASK) && | 1966 | if ((vect_info & VECTORING_INFO_VALID_MASK) && |
1845 | !is_page_fault(intr_info)) | 1967 | !is_page_fault(intr_info)) |
1846 | printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " | 1968 | printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " |
1847 | "intr info 0x%x\n", __FUNCTION__, vect_info, intr_info); | 1969 | "intr info 0x%x\n", __func__, vect_info, intr_info); |
1848 | 1970 | ||
1849 | if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) { | 1971 | if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) { |
1850 | int irq = vect_info & VECTORING_INFO_VECTOR_MASK; | 1972 | int irq = vect_info & VECTORING_INFO_VECTOR_MASK; |
@@ -1869,10 +1991,12 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1869 | 1991 | ||
1870 | error_code = 0; | 1992 | error_code = 0; |
1871 | rip = vmcs_readl(GUEST_RIP); | 1993 | rip = vmcs_readl(GUEST_RIP); |
1872 | if (intr_info & INTR_INFO_DELIEVER_CODE_MASK) | 1994 | if (intr_info & INTR_INFO_DELIVER_CODE_MASK) |
1873 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); | 1995 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); |
1874 | if (is_page_fault(intr_info)) { | 1996 | if (is_page_fault(intr_info)) { |
1875 | cr2 = vmcs_readl(EXIT_QUALIFICATION); | 1997 | cr2 = vmcs_readl(EXIT_QUALIFICATION); |
1998 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, | ||
1999 | (u32)((u64)cr2 >> 32), handler); | ||
1876 | return kvm_mmu_page_fault(vcpu, cr2, error_code); | 2000 | return kvm_mmu_page_fault(vcpu, cr2, error_code); |
1877 | } | 2001 | } |
1878 | 2002 | ||
@@ -1901,6 +2025,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu, | |||
1901 | struct kvm_run *kvm_run) | 2025 | struct kvm_run *kvm_run) |
1902 | { | 2026 | { |
1903 | ++vcpu->stat.irq_exits; | 2027 | ++vcpu->stat.irq_exits; |
2028 | KVMTRACE_1D(INTR, vcpu, vmcs_read32(VM_EXIT_INTR_INFO), handler); | ||
1904 | return 1; | 2029 | return 1; |
1905 | } | 2030 | } |
1906 | 2031 | ||
@@ -1958,25 +2083,27 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1958 | reg = (exit_qualification >> 8) & 15; | 2083 | reg = (exit_qualification >> 8) & 15; |
1959 | switch ((exit_qualification >> 4) & 3) { | 2084 | switch ((exit_qualification >> 4) & 3) { |
1960 | case 0: /* mov to cr */ | 2085 | case 0: /* mov to cr */ |
2086 | KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)vcpu->arch.regs[reg], | ||
2087 | (u32)((u64)vcpu->arch.regs[reg] >> 32), handler); | ||
1961 | switch (cr) { | 2088 | switch (cr) { |
1962 | case 0: | 2089 | case 0: |
1963 | vcpu_load_rsp_rip(vcpu); | 2090 | vcpu_load_rsp_rip(vcpu); |
1964 | set_cr0(vcpu, vcpu->arch.regs[reg]); | 2091 | kvm_set_cr0(vcpu, vcpu->arch.regs[reg]); |
1965 | skip_emulated_instruction(vcpu); | 2092 | skip_emulated_instruction(vcpu); |
1966 | return 1; | 2093 | return 1; |
1967 | case 3: | 2094 | case 3: |
1968 | vcpu_load_rsp_rip(vcpu); | 2095 | vcpu_load_rsp_rip(vcpu); |
1969 | set_cr3(vcpu, vcpu->arch.regs[reg]); | 2096 | kvm_set_cr3(vcpu, vcpu->arch.regs[reg]); |
1970 | skip_emulated_instruction(vcpu); | 2097 | skip_emulated_instruction(vcpu); |
1971 | return 1; | 2098 | return 1; |
1972 | case 4: | 2099 | case 4: |
1973 | vcpu_load_rsp_rip(vcpu); | 2100 | vcpu_load_rsp_rip(vcpu); |
1974 | set_cr4(vcpu, vcpu->arch.regs[reg]); | 2101 | kvm_set_cr4(vcpu, vcpu->arch.regs[reg]); |
1975 | skip_emulated_instruction(vcpu); | 2102 | skip_emulated_instruction(vcpu); |
1976 | return 1; | 2103 | return 1; |
1977 | case 8: | 2104 | case 8: |
1978 | vcpu_load_rsp_rip(vcpu); | 2105 | vcpu_load_rsp_rip(vcpu); |
1979 | set_cr8(vcpu, vcpu->arch.regs[reg]); | 2106 | kvm_set_cr8(vcpu, vcpu->arch.regs[reg]); |
1980 | skip_emulated_instruction(vcpu); | 2107 | skip_emulated_instruction(vcpu); |
1981 | if (irqchip_in_kernel(vcpu->kvm)) | 2108 | if (irqchip_in_kernel(vcpu->kvm)) |
1982 | return 1; | 2109 | return 1; |
@@ -1990,6 +2117,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1990 | vcpu->arch.cr0 &= ~X86_CR0_TS; | 2117 | vcpu->arch.cr0 &= ~X86_CR0_TS; |
1991 | vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); | 2118 | vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); |
1992 | vmx_fpu_activate(vcpu); | 2119 | vmx_fpu_activate(vcpu); |
2120 | KVMTRACE_0D(CLTS, vcpu, handler); | ||
1993 | skip_emulated_instruction(vcpu); | 2121 | skip_emulated_instruction(vcpu); |
1994 | return 1; | 2122 | return 1; |
1995 | case 1: /*mov from cr*/ | 2123 | case 1: /*mov from cr*/ |
@@ -1998,18 +2126,24 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1998 | vcpu_load_rsp_rip(vcpu); | 2126 | vcpu_load_rsp_rip(vcpu); |
1999 | vcpu->arch.regs[reg] = vcpu->arch.cr3; | 2127 | vcpu->arch.regs[reg] = vcpu->arch.cr3; |
2000 | vcpu_put_rsp_rip(vcpu); | 2128 | vcpu_put_rsp_rip(vcpu); |
2129 | KVMTRACE_3D(CR_READ, vcpu, (u32)cr, | ||
2130 | (u32)vcpu->arch.regs[reg], | ||
2131 | (u32)((u64)vcpu->arch.regs[reg] >> 32), | ||
2132 | handler); | ||
2001 | skip_emulated_instruction(vcpu); | 2133 | skip_emulated_instruction(vcpu); |
2002 | return 1; | 2134 | return 1; |
2003 | case 8: | 2135 | case 8: |
2004 | vcpu_load_rsp_rip(vcpu); | 2136 | vcpu_load_rsp_rip(vcpu); |
2005 | vcpu->arch.regs[reg] = get_cr8(vcpu); | 2137 | vcpu->arch.regs[reg] = kvm_get_cr8(vcpu); |
2006 | vcpu_put_rsp_rip(vcpu); | 2138 | vcpu_put_rsp_rip(vcpu); |
2139 | KVMTRACE_2D(CR_READ, vcpu, (u32)cr, | ||
2140 | (u32)vcpu->arch.regs[reg], handler); | ||
2007 | skip_emulated_instruction(vcpu); | 2141 | skip_emulated_instruction(vcpu); |
2008 | return 1; | 2142 | return 1; |
2009 | } | 2143 | } |
2010 | break; | 2144 | break; |
2011 | case 3: /* lmsw */ | 2145 | case 3: /* lmsw */ |
2012 | lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f); | 2146 | kvm_lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f); |
2013 | 2147 | ||
2014 | skip_emulated_instruction(vcpu); | 2148 | skip_emulated_instruction(vcpu); |
2015 | return 1; | 2149 | return 1; |
@@ -2049,6 +2183,7 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2049 | val = 0; | 2183 | val = 0; |
2050 | } | 2184 | } |
2051 | vcpu->arch.regs[reg] = val; | 2185 | vcpu->arch.regs[reg] = val; |
2186 | KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); | ||
2052 | } else { | 2187 | } else { |
2053 | /* mov to dr */ | 2188 | /* mov to dr */ |
2054 | } | 2189 | } |
@@ -2073,6 +2208,9 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2073 | return 1; | 2208 | return 1; |
2074 | } | 2209 | } |
2075 | 2210 | ||
2211 | KVMTRACE_3D(MSR_READ, vcpu, ecx, (u32)data, (u32)(data >> 32), | ||
2212 | handler); | ||
2213 | |||
2076 | /* FIXME: handling of bits 32:63 of rax, rdx */ | 2214 | /* FIXME: handling of bits 32:63 of rax, rdx */ |
2077 | vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u; | 2215 | vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u; |
2078 | vcpu->arch.regs[VCPU_REGS_RDX] = (data >> 32) & -1u; | 2216 | vcpu->arch.regs[VCPU_REGS_RDX] = (data >> 32) & -1u; |
@@ -2086,6 +2224,9 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2086 | u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) | 2224 | u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) |
2087 | | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); | 2225 | | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); |
2088 | 2226 | ||
2227 | KVMTRACE_3D(MSR_WRITE, vcpu, ecx, (u32)data, (u32)(data >> 32), | ||
2228 | handler); | ||
2229 | |||
2089 | if (vmx_set_msr(vcpu, ecx, data) != 0) { | 2230 | if (vmx_set_msr(vcpu, ecx, data) != 0) { |
2090 | kvm_inject_gp(vcpu, 0); | 2231 | kvm_inject_gp(vcpu, 0); |
2091 | return 1; | 2232 | return 1; |
@@ -2110,6 +2251,9 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, | |||
2110 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 2251 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
2111 | cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; | 2252 | cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; |
2112 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 2253 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
2254 | |||
2255 | KVMTRACE_0D(PEND_INTR, vcpu, handler); | ||
2256 | |||
2113 | /* | 2257 | /* |
2114 | * If the user space waits to inject interrupts, exit as soon as | 2258 | * If the user space waits to inject interrupts, exit as soon as |
2115 | * possible | 2259 | * possible |
@@ -2152,6 +2296,8 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2152 | exit_qualification = vmcs_read64(EXIT_QUALIFICATION); | 2296 | exit_qualification = vmcs_read64(EXIT_QUALIFICATION); |
2153 | offset = exit_qualification & 0xffful; | 2297 | offset = exit_qualification & 0xffful; |
2154 | 2298 | ||
2299 | KVMTRACE_1D(APIC_ACCESS, vcpu, (u32)offset, handler); | ||
2300 | |||
2155 | er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | 2301 | er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); |
2156 | 2302 | ||
2157 | if (er != EMULATE_DONE) { | 2303 | if (er != EMULATE_DONE) { |
@@ -2163,6 +2309,20 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2163 | return 1; | 2309 | return 1; |
2164 | } | 2310 | } |
2165 | 2311 | ||
2312 | static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
2313 | { | ||
2314 | unsigned long exit_qualification; | ||
2315 | u16 tss_selector; | ||
2316 | int reason; | ||
2317 | |||
2318 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
2319 | |||
2320 | reason = (u32)exit_qualification >> 30; | ||
2321 | tss_selector = exit_qualification; | ||
2322 | |||
2323 | return kvm_task_switch(vcpu, tss_selector, reason); | ||
2324 | } | ||
2325 | |||
2166 | /* | 2326 | /* |
2167 | * The exit handlers return 1 if the exit was handled fully and guest execution | 2327 | * The exit handlers return 1 if the exit was handled fully and guest execution |
2168 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 2328 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
@@ -2185,6 +2345,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
2185 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, | 2345 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, |
2186 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, | 2346 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, |
2187 | [EXIT_REASON_WBINVD] = handle_wbinvd, | 2347 | [EXIT_REASON_WBINVD] = handle_wbinvd, |
2348 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, | ||
2188 | }; | 2349 | }; |
2189 | 2350 | ||
2190 | static const int kvm_vmx_max_exit_handlers = | 2351 | static const int kvm_vmx_max_exit_handlers = |
@@ -2200,6 +2361,9 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2200 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2361 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2201 | u32 vectoring_info = vmx->idt_vectoring_info; | 2362 | u32 vectoring_info = vmx->idt_vectoring_info; |
2202 | 2363 | ||
2364 | KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP), | ||
2365 | (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit); | ||
2366 | |||
2203 | if (unlikely(vmx->fail)) { | 2367 | if (unlikely(vmx->fail)) { |
2204 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 2368 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
2205 | kvm_run->fail_entry.hardware_entry_failure_reason | 2369 | kvm_run->fail_entry.hardware_entry_failure_reason |
@@ -2210,7 +2374,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2210 | if ((vectoring_info & VECTORING_INFO_VALID_MASK) && | 2374 | if ((vectoring_info & VECTORING_INFO_VALID_MASK) && |
2211 | exit_reason != EXIT_REASON_EXCEPTION_NMI) | 2375 | exit_reason != EXIT_REASON_EXCEPTION_NMI) |
2212 | printk(KERN_WARNING "%s: unexpected, valid vectoring info and " | 2376 | printk(KERN_WARNING "%s: unexpected, valid vectoring info and " |
2213 | "exit reason is 0x%x\n", __FUNCTION__, exit_reason); | 2377 | "exit reason is 0x%x\n", __func__, exit_reason); |
2214 | if (exit_reason < kvm_vmx_max_exit_handlers | 2378 | if (exit_reason < kvm_vmx_max_exit_handlers |
2215 | && kvm_vmx_exit_handlers[exit_reason]) | 2379 | && kvm_vmx_exit_handlers[exit_reason]) |
2216 | return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); | 2380 | return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); |
@@ -2221,10 +2385,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2221 | return 0; | 2385 | return 0; |
2222 | } | 2386 | } |
2223 | 2387 | ||
2224 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | ||
2225 | { | ||
2226 | } | ||
2227 | |||
2228 | static void update_tpr_threshold(struct kvm_vcpu *vcpu) | 2388 | static void update_tpr_threshold(struct kvm_vcpu *vcpu) |
2229 | { | 2389 | { |
2230 | int max_irr, tpr; | 2390 | int max_irr, tpr; |
@@ -2285,11 +2445,13 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) | |||
2285 | return; | 2445 | return; |
2286 | } | 2446 | } |
2287 | 2447 | ||
2448 | KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler); | ||
2449 | |||
2288 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); | 2450 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); |
2289 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, | 2451 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, |
2290 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); | 2452 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); |
2291 | 2453 | ||
2292 | if (unlikely(idtv_info_field & INTR_INFO_DELIEVER_CODE_MASK)) | 2454 | if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK)) |
2293 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, | 2455 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, |
2294 | vmcs_read32(IDT_VECTORING_ERROR_CODE)); | 2456 | vmcs_read32(IDT_VECTORING_ERROR_CODE)); |
2295 | if (unlikely(has_ext_irq)) | 2457 | if (unlikely(has_ext_irq)) |
@@ -2470,8 +2632,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2470 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 2632 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
2471 | 2633 | ||
2472 | /* We need to handle NMIs before interrupts are enabled */ | 2634 | /* We need to handle NMIs before interrupts are enabled */ |
2473 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */ | 2635 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */ |
2636 | KVMTRACE_0D(NMI, vcpu, handler); | ||
2474 | asm("int $2"); | 2637 | asm("int $2"); |
2638 | } | ||
2475 | } | 2639 | } |
2476 | 2640 | ||
2477 | static void vmx_free_vmcs(struct kvm_vcpu *vcpu) | 2641 | static void vmx_free_vmcs(struct kvm_vcpu *vcpu) |
@@ -2489,6 +2653,10 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
2489 | { | 2653 | { |
2490 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2654 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2491 | 2655 | ||
2656 | spin_lock(&vmx_vpid_lock); | ||
2657 | if (vmx->vpid != 0) | ||
2658 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); | ||
2659 | spin_unlock(&vmx_vpid_lock); | ||
2492 | vmx_free_vmcs(vcpu); | 2660 | vmx_free_vmcs(vcpu); |
2493 | kfree(vmx->host_msrs); | 2661 | kfree(vmx->host_msrs); |
2494 | kfree(vmx->guest_msrs); | 2662 | kfree(vmx->guest_msrs); |
@@ -2505,6 +2673,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
2505 | if (!vmx) | 2673 | if (!vmx) |
2506 | return ERR_PTR(-ENOMEM); | 2674 | return ERR_PTR(-ENOMEM); |
2507 | 2675 | ||
2676 | allocate_vpid(vmx); | ||
2677 | |||
2508 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); | 2678 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); |
2509 | if (err) | 2679 | if (err) |
2510 | goto free_vcpu; | 2680 | goto free_vcpu; |
@@ -2591,14 +2761,13 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
2591 | .get_segment_base = vmx_get_segment_base, | 2761 | .get_segment_base = vmx_get_segment_base, |
2592 | .get_segment = vmx_get_segment, | 2762 | .get_segment = vmx_get_segment, |
2593 | .set_segment = vmx_set_segment, | 2763 | .set_segment = vmx_set_segment, |
2764 | .get_cpl = vmx_get_cpl, | ||
2594 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, | 2765 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, |
2595 | .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, | 2766 | .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, |
2596 | .set_cr0 = vmx_set_cr0, | 2767 | .set_cr0 = vmx_set_cr0, |
2597 | .set_cr3 = vmx_set_cr3, | 2768 | .set_cr3 = vmx_set_cr3, |
2598 | .set_cr4 = vmx_set_cr4, | 2769 | .set_cr4 = vmx_set_cr4, |
2599 | #ifdef CONFIG_X86_64 | ||
2600 | .set_efer = vmx_set_efer, | 2770 | .set_efer = vmx_set_efer, |
2601 | #endif | ||
2602 | .get_idt = vmx_get_idt, | 2771 | .get_idt = vmx_get_idt, |
2603 | .set_idt = vmx_set_idt, | 2772 | .set_idt = vmx_set_idt, |
2604 | .get_gdt = vmx_get_gdt, | 2773 | .get_gdt = vmx_get_gdt, |
@@ -2626,7 +2795,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
2626 | 2795 | ||
2627 | static int __init vmx_init(void) | 2796 | static int __init vmx_init(void) |
2628 | { | 2797 | { |
2629 | void *iova; | 2798 | void *va; |
2630 | int r; | 2799 | int r; |
2631 | 2800 | ||
2632 | vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); | 2801 | vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); |
@@ -2639,28 +2808,48 @@ static int __init vmx_init(void) | |||
2639 | goto out; | 2808 | goto out; |
2640 | } | 2809 | } |
2641 | 2810 | ||
2811 | vmx_msr_bitmap = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); | ||
2812 | if (!vmx_msr_bitmap) { | ||
2813 | r = -ENOMEM; | ||
2814 | goto out1; | ||
2815 | } | ||
2816 | |||
2642 | /* | 2817 | /* |
2643 | * Allow direct access to the PC debug port (it is often used for I/O | 2818 | * Allow direct access to the PC debug port (it is often used for I/O |
2644 | * delays, but the vmexits simply slow things down). | 2819 | * delays, but the vmexits simply slow things down). |
2645 | */ | 2820 | */ |
2646 | iova = kmap(vmx_io_bitmap_a); | 2821 | va = kmap(vmx_io_bitmap_a); |
2647 | memset(iova, 0xff, PAGE_SIZE); | 2822 | memset(va, 0xff, PAGE_SIZE); |
2648 | clear_bit(0x80, iova); | 2823 | clear_bit(0x80, va); |
2649 | kunmap(vmx_io_bitmap_a); | 2824 | kunmap(vmx_io_bitmap_a); |
2650 | 2825 | ||
2651 | iova = kmap(vmx_io_bitmap_b); | 2826 | va = kmap(vmx_io_bitmap_b); |
2652 | memset(iova, 0xff, PAGE_SIZE); | 2827 | memset(va, 0xff, PAGE_SIZE); |
2653 | kunmap(vmx_io_bitmap_b); | 2828 | kunmap(vmx_io_bitmap_b); |
2654 | 2829 | ||
2830 | va = kmap(vmx_msr_bitmap); | ||
2831 | memset(va, 0xff, PAGE_SIZE); | ||
2832 | kunmap(vmx_msr_bitmap); | ||
2833 | |||
2834 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ | ||
2835 | |||
2655 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); | 2836 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); |
2656 | if (r) | 2837 | if (r) |
2657 | goto out1; | 2838 | goto out2; |
2839 | |||
2840 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_FS_BASE); | ||
2841 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_GS_BASE); | ||
2842 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_CS); | ||
2843 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); | ||
2844 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); | ||
2658 | 2845 | ||
2659 | if (bypass_guest_pf) | 2846 | if (bypass_guest_pf) |
2660 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); | 2847 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); |
2661 | 2848 | ||
2662 | return 0; | 2849 | return 0; |
2663 | 2850 | ||
2851 | out2: | ||
2852 | __free_page(vmx_msr_bitmap); | ||
2664 | out1: | 2853 | out1: |
2665 | __free_page(vmx_io_bitmap_b); | 2854 | __free_page(vmx_io_bitmap_b); |
2666 | out: | 2855 | out: |
@@ -2670,6 +2859,7 @@ out: | |||
2670 | 2859 | ||
2671 | static void __exit vmx_exit(void) | 2860 | static void __exit vmx_exit(void) |
2672 | { | 2861 | { |
2862 | __free_page(vmx_msr_bitmap); | ||
2673 | __free_page(vmx_io_bitmap_b); | 2863 | __free_page(vmx_io_bitmap_b); |
2674 | __free_page(vmx_io_bitmap_a); | 2864 | __free_page(vmx_io_bitmap_a); |
2675 | 2865 | ||
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index d52ae8d7303d..5dff4606b988 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h | |||
@@ -49,6 +49,7 @@ | |||
49 | * Definitions of Secondary Processor-Based VM-Execution Controls. | 49 | * Definitions of Secondary Processor-Based VM-Execution Controls. |
50 | */ | 50 | */ |
51 | #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 | 51 | #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 |
52 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 | ||
52 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 | 53 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 |
53 | 54 | ||
54 | 55 | ||
@@ -65,6 +66,7 @@ | |||
65 | 66 | ||
66 | /* VMCS Encodings */ | 67 | /* VMCS Encodings */ |
67 | enum vmcs_field { | 68 | enum vmcs_field { |
69 | VIRTUAL_PROCESSOR_ID = 0x00000000, | ||
68 | GUEST_ES_SELECTOR = 0x00000800, | 70 | GUEST_ES_SELECTOR = 0x00000800, |
69 | GUEST_CS_SELECTOR = 0x00000802, | 71 | GUEST_CS_SELECTOR = 0x00000802, |
70 | GUEST_SS_SELECTOR = 0x00000804, | 72 | GUEST_SS_SELECTOR = 0x00000804, |
@@ -231,12 +233,12 @@ enum vmcs_field { | |||
231 | */ | 233 | */ |
232 | #define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */ | 234 | #define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */ |
233 | #define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */ | 235 | #define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */ |
234 | #define INTR_INFO_DELIEVER_CODE_MASK 0x800 /* 11 */ | 236 | #define INTR_INFO_DELIVER_CODE_MASK 0x800 /* 11 */ |
235 | #define INTR_INFO_VALID_MASK 0x80000000 /* 31 */ | 237 | #define INTR_INFO_VALID_MASK 0x80000000 /* 31 */ |
236 | 238 | ||
237 | #define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK | 239 | #define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK |
238 | #define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK | 240 | #define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK |
239 | #define VECTORING_INFO_DELIEVER_CODE_MASK INTR_INFO_DELIEVER_CODE_MASK | 241 | #define VECTORING_INFO_DELIVER_CODE_MASK INTR_INFO_DELIVER_CODE_MASK |
240 | #define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK | 242 | #define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK |
241 | 243 | ||
242 | #define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ | 244 | #define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ |
@@ -321,4 +323,8 @@ enum vmcs_field { | |||
321 | 323 | ||
322 | #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 | 324 | #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 |
323 | 325 | ||
326 | #define VMX_NR_VPIDS (1 << 16) | ||
327 | #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 | ||
328 | #define VMX_VPID_EXTENT_ALL_CONTEXT 2 | ||
329 | |||
324 | #endif | 330 | #endif |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6b01552bd1f1..0ce556372a4d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -15,10 +15,12 @@ | |||
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <linux/kvm_host.h> | 17 | #include <linux/kvm_host.h> |
18 | #include "segment_descriptor.h" | ||
19 | #include "irq.h" | 18 | #include "irq.h" |
20 | #include "mmu.h" | 19 | #include "mmu.h" |
20 | #include "i8254.h" | ||
21 | #include "tss.h" | ||
21 | 22 | ||
23 | #include <linux/clocksource.h> | ||
22 | #include <linux/kvm.h> | 24 | #include <linux/kvm.h> |
23 | #include <linux/fs.h> | 25 | #include <linux/fs.h> |
24 | #include <linux/vmalloc.h> | 26 | #include <linux/vmalloc.h> |
@@ -28,6 +30,7 @@ | |||
28 | 30 | ||
29 | #include <asm/uaccess.h> | 31 | #include <asm/uaccess.h> |
30 | #include <asm/msr.h> | 32 | #include <asm/msr.h> |
33 | #include <asm/desc.h> | ||
31 | 34 | ||
32 | #define MAX_IO_MSRS 256 | 35 | #define MAX_IO_MSRS 256 |
33 | #define CR0_RESERVED_BITS \ | 36 | #define CR0_RESERVED_BITS \ |
@@ -41,7 +44,15 @@ | |||
41 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) | 44 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) |
42 | 45 | ||
43 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) | 46 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) |
44 | #define EFER_RESERVED_BITS 0xfffffffffffff2fe | 47 | /* EFER defaults: |
48 | * - enable syscall per default because its emulated by KVM | ||
49 | * - enable LME and LMA per default on 64 bit KVM | ||
50 | */ | ||
51 | #ifdef CONFIG_X86_64 | ||
52 | static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffafeULL; | ||
53 | #else | ||
54 | static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL; | ||
55 | #endif | ||
45 | 56 | ||
46 | #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM | 57 | #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM |
47 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU | 58 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU |
@@ -63,6 +74,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
63 | { "irq_window", VCPU_STAT(irq_window_exits) }, | 74 | { "irq_window", VCPU_STAT(irq_window_exits) }, |
64 | { "halt_exits", VCPU_STAT(halt_exits) }, | 75 | { "halt_exits", VCPU_STAT(halt_exits) }, |
65 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, | 76 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, |
77 | { "hypercalls", VCPU_STAT(hypercalls) }, | ||
66 | { "request_irq", VCPU_STAT(request_irq_exits) }, | 78 | { "request_irq", VCPU_STAT(request_irq_exits) }, |
67 | { "irq_exits", VCPU_STAT(irq_exits) }, | 79 | { "irq_exits", VCPU_STAT(irq_exits) }, |
68 | { "host_state_reload", VCPU_STAT(host_state_reload) }, | 80 | { "host_state_reload", VCPU_STAT(host_state_reload) }, |
@@ -78,6 +90,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
78 | { "mmu_recycled", VM_STAT(mmu_recycled) }, | 90 | { "mmu_recycled", VM_STAT(mmu_recycled) }, |
79 | { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, | 91 | { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, |
80 | { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, | 92 | { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, |
93 | { "largepages", VM_STAT(lpages) }, | ||
81 | { NULL } | 94 | { NULL } |
82 | }; | 95 | }; |
83 | 96 | ||
@@ -85,7 +98,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
85 | unsigned long segment_base(u16 selector) | 98 | unsigned long segment_base(u16 selector) |
86 | { | 99 | { |
87 | struct descriptor_table gdt; | 100 | struct descriptor_table gdt; |
88 | struct segment_descriptor *d; | 101 | struct desc_struct *d; |
89 | unsigned long table_base; | 102 | unsigned long table_base; |
90 | unsigned long v; | 103 | unsigned long v; |
91 | 104 | ||
@@ -101,13 +114,12 @@ unsigned long segment_base(u16 selector) | |||
101 | asm("sldt %0" : "=g"(ldt_selector)); | 114 | asm("sldt %0" : "=g"(ldt_selector)); |
102 | table_base = segment_base(ldt_selector); | 115 | table_base = segment_base(ldt_selector); |
103 | } | 116 | } |
104 | d = (struct segment_descriptor *)(table_base + (selector & ~7)); | 117 | d = (struct desc_struct *)(table_base + (selector & ~7)); |
105 | v = d->base_low | ((unsigned long)d->base_mid << 16) | | 118 | v = d->base0 | ((unsigned long)d->base1 << 16) | |
106 | ((unsigned long)d->base_high << 24); | 119 | ((unsigned long)d->base2 << 24); |
107 | #ifdef CONFIG_X86_64 | 120 | #ifdef CONFIG_X86_64 |
108 | if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) | 121 | if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) |
109 | v |= ((unsigned long) \ | 122 | v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; |
110 | ((struct segment_descriptor_64 *)d)->base_higher) << 32; | ||
111 | #endif | 123 | #endif |
112 | return v; | 124 | return v; |
113 | } | 125 | } |
@@ -145,11 +157,16 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, | |||
145 | u32 error_code) | 157 | u32 error_code) |
146 | { | 158 | { |
147 | ++vcpu->stat.pf_guest; | 159 | ++vcpu->stat.pf_guest; |
148 | if (vcpu->arch.exception.pending && vcpu->arch.exception.nr == PF_VECTOR) { | 160 | if (vcpu->arch.exception.pending) { |
149 | printk(KERN_DEBUG "kvm: inject_page_fault:" | 161 | if (vcpu->arch.exception.nr == PF_VECTOR) { |
150 | " double fault 0x%lx\n", addr); | 162 | printk(KERN_DEBUG "kvm: inject_page_fault:" |
151 | vcpu->arch.exception.nr = DF_VECTOR; | 163 | " double fault 0x%lx\n", addr); |
152 | vcpu->arch.exception.error_code = 0; | 164 | vcpu->arch.exception.nr = DF_VECTOR; |
165 | vcpu->arch.exception.error_code = 0; | ||
166 | } else if (vcpu->arch.exception.nr == DF_VECTOR) { | ||
167 | /* triple fault -> shutdown */ | ||
168 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | ||
169 | } | ||
153 | return; | 170 | return; |
154 | } | 171 | } |
155 | vcpu->arch.cr2 = addr; | 172 | vcpu->arch.cr2 = addr; |
@@ -184,7 +201,6 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
184 | int ret; | 201 | int ret; |
185 | u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; | 202 | u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; |
186 | 203 | ||
187 | down_read(&vcpu->kvm->slots_lock); | ||
188 | ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte, | 204 | ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte, |
189 | offset * sizeof(u64), sizeof(pdpte)); | 205 | offset * sizeof(u64), sizeof(pdpte)); |
190 | if (ret < 0) { | 206 | if (ret < 0) { |
@@ -201,10 +217,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
201 | 217 | ||
202 | memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); | 218 | memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); |
203 | out: | 219 | out: |
204 | up_read(&vcpu->kvm->slots_lock); | ||
205 | 220 | ||
206 | return ret; | 221 | return ret; |
207 | } | 222 | } |
223 | EXPORT_SYMBOL_GPL(load_pdptrs); | ||
208 | 224 | ||
209 | static bool pdptrs_changed(struct kvm_vcpu *vcpu) | 225 | static bool pdptrs_changed(struct kvm_vcpu *vcpu) |
210 | { | 226 | { |
@@ -215,18 +231,16 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu) | |||
215 | if (is_long_mode(vcpu) || !is_pae(vcpu)) | 231 | if (is_long_mode(vcpu) || !is_pae(vcpu)) |
216 | return false; | 232 | return false; |
217 | 233 | ||
218 | down_read(&vcpu->kvm->slots_lock); | ||
219 | r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte)); | 234 | r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte)); |
220 | if (r < 0) | 235 | if (r < 0) |
221 | goto out; | 236 | goto out; |
222 | changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0; | 237 | changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0; |
223 | out: | 238 | out: |
224 | up_read(&vcpu->kvm->slots_lock); | ||
225 | 239 | ||
226 | return changed; | 240 | return changed; |
227 | } | 241 | } |
228 | 242 | ||
229 | void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 243 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
230 | { | 244 | { |
231 | if (cr0 & CR0_RESERVED_BITS) { | 245 | if (cr0 & CR0_RESERVED_BITS) { |
232 | printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", | 246 | printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", |
@@ -284,15 +298,18 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
284 | kvm_mmu_reset_context(vcpu); | 298 | kvm_mmu_reset_context(vcpu); |
285 | return; | 299 | return; |
286 | } | 300 | } |
287 | EXPORT_SYMBOL_GPL(set_cr0); | 301 | EXPORT_SYMBOL_GPL(kvm_set_cr0); |
288 | 302 | ||
289 | void lmsw(struct kvm_vcpu *vcpu, unsigned long msw) | 303 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) |
290 | { | 304 | { |
291 | set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)); | 305 | kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)); |
306 | KVMTRACE_1D(LMSW, vcpu, | ||
307 | (u32)((vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)), | ||
308 | handler); | ||
292 | } | 309 | } |
293 | EXPORT_SYMBOL_GPL(lmsw); | 310 | EXPORT_SYMBOL_GPL(kvm_lmsw); |
294 | 311 | ||
295 | void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 312 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
296 | { | 313 | { |
297 | if (cr4 & CR4_RESERVED_BITS) { | 314 | if (cr4 & CR4_RESERVED_BITS) { |
298 | printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); | 315 | printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); |
@@ -323,9 +340,9 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
323 | vcpu->arch.cr4 = cr4; | 340 | vcpu->arch.cr4 = cr4; |
324 | kvm_mmu_reset_context(vcpu); | 341 | kvm_mmu_reset_context(vcpu); |
325 | } | 342 | } |
326 | EXPORT_SYMBOL_GPL(set_cr4); | 343 | EXPORT_SYMBOL_GPL(kvm_set_cr4); |
327 | 344 | ||
328 | void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | 345 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) |
329 | { | 346 | { |
330 | if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { | 347 | if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { |
331 | kvm_mmu_flush_tlb(vcpu); | 348 | kvm_mmu_flush_tlb(vcpu); |
@@ -359,7 +376,6 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
359 | */ | 376 | */ |
360 | } | 377 | } |
361 | 378 | ||
362 | down_read(&vcpu->kvm->slots_lock); | ||
363 | /* | 379 | /* |
364 | * Does the new cr3 value map to physical memory? (Note, we | 380 | * Does the new cr3 value map to physical memory? (Note, we |
365 | * catch an invalid cr3 even in real-mode, because it would | 381 | * catch an invalid cr3 even in real-mode, because it would |
@@ -375,11 +391,10 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
375 | vcpu->arch.cr3 = cr3; | 391 | vcpu->arch.cr3 = cr3; |
376 | vcpu->arch.mmu.new_cr3(vcpu); | 392 | vcpu->arch.mmu.new_cr3(vcpu); |
377 | } | 393 | } |
378 | up_read(&vcpu->kvm->slots_lock); | ||
379 | } | 394 | } |
380 | EXPORT_SYMBOL_GPL(set_cr3); | 395 | EXPORT_SYMBOL_GPL(kvm_set_cr3); |
381 | 396 | ||
382 | void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | 397 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) |
383 | { | 398 | { |
384 | if (cr8 & CR8_RESERVED_BITS) { | 399 | if (cr8 & CR8_RESERVED_BITS) { |
385 | printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); | 400 | printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); |
@@ -391,16 +406,16 @@ void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | |||
391 | else | 406 | else |
392 | vcpu->arch.cr8 = cr8; | 407 | vcpu->arch.cr8 = cr8; |
393 | } | 408 | } |
394 | EXPORT_SYMBOL_GPL(set_cr8); | 409 | EXPORT_SYMBOL_GPL(kvm_set_cr8); |
395 | 410 | ||
396 | unsigned long get_cr8(struct kvm_vcpu *vcpu) | 411 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) |
397 | { | 412 | { |
398 | if (irqchip_in_kernel(vcpu->kvm)) | 413 | if (irqchip_in_kernel(vcpu->kvm)) |
399 | return kvm_lapic_get_cr8(vcpu); | 414 | return kvm_lapic_get_cr8(vcpu); |
400 | else | 415 | else |
401 | return vcpu->arch.cr8; | 416 | return vcpu->arch.cr8; |
402 | } | 417 | } |
403 | EXPORT_SYMBOL_GPL(get_cr8); | 418 | EXPORT_SYMBOL_GPL(kvm_get_cr8); |
404 | 419 | ||
405 | /* | 420 | /* |
406 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS | 421 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS |
@@ -415,7 +430,8 @@ static u32 msrs_to_save[] = { | |||
415 | #ifdef CONFIG_X86_64 | 430 | #ifdef CONFIG_X86_64 |
416 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, | 431 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, |
417 | #endif | 432 | #endif |
418 | MSR_IA32_TIME_STAMP_COUNTER, | 433 | MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
434 | MSR_IA32_PERF_STATUS, | ||
419 | }; | 435 | }; |
420 | 436 | ||
421 | static unsigned num_msrs_to_save; | 437 | static unsigned num_msrs_to_save; |
@@ -424,11 +440,9 @@ static u32 emulated_msrs[] = { | |||
424 | MSR_IA32_MISC_ENABLE, | 440 | MSR_IA32_MISC_ENABLE, |
425 | }; | 441 | }; |
426 | 442 | ||
427 | #ifdef CONFIG_X86_64 | ||
428 | |||
429 | static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | 443 | static void set_efer(struct kvm_vcpu *vcpu, u64 efer) |
430 | { | 444 | { |
431 | if (efer & EFER_RESERVED_BITS) { | 445 | if (efer & efer_reserved_bits) { |
432 | printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n", | 446 | printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n", |
433 | efer); | 447 | efer); |
434 | kvm_inject_gp(vcpu, 0); | 448 | kvm_inject_gp(vcpu, 0); |
@@ -450,7 +464,12 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
450 | vcpu->arch.shadow_efer = efer; | 464 | vcpu->arch.shadow_efer = efer; |
451 | } | 465 | } |
452 | 466 | ||
453 | #endif | 467 | void kvm_enable_efer_bits(u64 mask) |
468 | { | ||
469 | efer_reserved_bits &= ~mask; | ||
470 | } | ||
471 | EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); | ||
472 | |||
454 | 473 | ||
455 | /* | 474 | /* |
456 | * Writes msr value into into the appropriate "register". | 475 | * Writes msr value into into the appropriate "register". |
@@ -470,26 +489,86 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) | |||
470 | return kvm_set_msr(vcpu, index, *data); | 489 | return kvm_set_msr(vcpu, index, *data); |
471 | } | 490 | } |
472 | 491 | ||
492 | static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | ||
493 | { | ||
494 | static int version; | ||
495 | struct kvm_wall_clock wc; | ||
496 | struct timespec wc_ts; | ||
497 | |||
498 | if (!wall_clock) | ||
499 | return; | ||
500 | |||
501 | version++; | ||
502 | |||
503 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); | ||
504 | |||
505 | wc_ts = current_kernel_time(); | ||
506 | wc.wc_sec = wc_ts.tv_sec; | ||
507 | wc.wc_nsec = wc_ts.tv_nsec; | ||
508 | wc.wc_version = version; | ||
509 | |||
510 | kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc)); | ||
511 | |||
512 | version++; | ||
513 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); | ||
514 | } | ||
515 | |||
516 | static void kvm_write_guest_time(struct kvm_vcpu *v) | ||
517 | { | ||
518 | struct timespec ts; | ||
519 | unsigned long flags; | ||
520 | struct kvm_vcpu_arch *vcpu = &v->arch; | ||
521 | void *shared_kaddr; | ||
522 | |||
523 | if ((!vcpu->time_page)) | ||
524 | return; | ||
525 | |||
526 | /* Keep irq disabled to prevent changes to the clock */ | ||
527 | local_irq_save(flags); | ||
528 | kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER, | ||
529 | &vcpu->hv_clock.tsc_timestamp); | ||
530 | ktime_get_ts(&ts); | ||
531 | local_irq_restore(flags); | ||
532 | |||
533 | /* With all the info we got, fill in the values */ | ||
534 | |||
535 | vcpu->hv_clock.system_time = ts.tv_nsec + | ||
536 | (NSEC_PER_SEC * (u64)ts.tv_sec); | ||
537 | /* | ||
538 | * The interface expects us to write an even number signaling that the | ||
539 | * update is finished. Since the guest won't see the intermediate | ||
540 | * state, we just write "2" at the end | ||
541 | */ | ||
542 | vcpu->hv_clock.version = 2; | ||
543 | |||
544 | shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0); | ||
545 | |||
546 | memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, | ||
547 | sizeof(vcpu->hv_clock)); | ||
548 | |||
549 | kunmap_atomic(shared_kaddr, KM_USER0); | ||
550 | |||
551 | mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); | ||
552 | } | ||
553 | |||
473 | 554 | ||
474 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 555 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
475 | { | 556 | { |
476 | switch (msr) { | 557 | switch (msr) { |
477 | #ifdef CONFIG_X86_64 | ||
478 | case MSR_EFER: | 558 | case MSR_EFER: |
479 | set_efer(vcpu, data); | 559 | set_efer(vcpu, data); |
480 | break; | 560 | break; |
481 | #endif | ||
482 | case MSR_IA32_MC0_STATUS: | 561 | case MSR_IA32_MC0_STATUS: |
483 | pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", | 562 | pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", |
484 | __FUNCTION__, data); | 563 | __func__, data); |
485 | break; | 564 | break; |
486 | case MSR_IA32_MCG_STATUS: | 565 | case MSR_IA32_MCG_STATUS: |
487 | pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", | 566 | pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", |
488 | __FUNCTION__, data); | 567 | __func__, data); |
489 | break; | 568 | break; |
490 | case MSR_IA32_MCG_CTL: | 569 | case MSR_IA32_MCG_CTL: |
491 | pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n", | 570 | pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n", |
492 | __FUNCTION__, data); | 571 | __func__, data); |
493 | break; | 572 | break; |
494 | case MSR_IA32_UCODE_REV: | 573 | case MSR_IA32_UCODE_REV: |
495 | case MSR_IA32_UCODE_WRITE: | 574 | case MSR_IA32_UCODE_WRITE: |
@@ -501,6 +580,42 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
501 | case MSR_IA32_MISC_ENABLE: | 580 | case MSR_IA32_MISC_ENABLE: |
502 | vcpu->arch.ia32_misc_enable_msr = data; | 581 | vcpu->arch.ia32_misc_enable_msr = data; |
503 | break; | 582 | break; |
583 | case MSR_KVM_WALL_CLOCK: | ||
584 | vcpu->kvm->arch.wall_clock = data; | ||
585 | kvm_write_wall_clock(vcpu->kvm, data); | ||
586 | break; | ||
587 | case MSR_KVM_SYSTEM_TIME: { | ||
588 | if (vcpu->arch.time_page) { | ||
589 | kvm_release_page_dirty(vcpu->arch.time_page); | ||
590 | vcpu->arch.time_page = NULL; | ||
591 | } | ||
592 | |||
593 | vcpu->arch.time = data; | ||
594 | |||
595 | /* we verify if the enable bit is set... */ | ||
596 | if (!(data & 1)) | ||
597 | break; | ||
598 | |||
599 | /* ...but clean it before doing the actual write */ | ||
600 | vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); | ||
601 | |||
602 | vcpu->arch.hv_clock.tsc_to_system_mul = | ||
603 | clocksource_khz2mult(tsc_khz, 22); | ||
604 | vcpu->arch.hv_clock.tsc_shift = 22; | ||
605 | |||
606 | down_read(¤t->mm->mmap_sem); | ||
607 | vcpu->arch.time_page = | ||
608 | gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); | ||
609 | up_read(¤t->mm->mmap_sem); | ||
610 | |||
611 | if (is_error_page(vcpu->arch.time_page)) { | ||
612 | kvm_release_page_clean(vcpu->arch.time_page); | ||
613 | vcpu->arch.time_page = NULL; | ||
614 | } | ||
615 | |||
616 | kvm_write_guest_time(vcpu); | ||
617 | break; | ||
618 | } | ||
504 | default: | 619 | default: |
505 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); | 620 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); |
506 | return 1; | 621 | return 1; |
@@ -540,7 +655,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
540 | case MSR_IA32_MC0_MISC+12: | 655 | case MSR_IA32_MC0_MISC+12: |
541 | case MSR_IA32_MC0_MISC+16: | 656 | case MSR_IA32_MC0_MISC+16: |
542 | case MSR_IA32_UCODE_REV: | 657 | case MSR_IA32_UCODE_REV: |
543 | case MSR_IA32_PERF_STATUS: | ||
544 | case MSR_IA32_EBL_CR_POWERON: | 658 | case MSR_IA32_EBL_CR_POWERON: |
545 | /* MTRR registers */ | 659 | /* MTRR registers */ |
546 | case 0xfe: | 660 | case 0xfe: |
@@ -556,11 +670,21 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
556 | case MSR_IA32_MISC_ENABLE: | 670 | case MSR_IA32_MISC_ENABLE: |
557 | data = vcpu->arch.ia32_misc_enable_msr; | 671 | data = vcpu->arch.ia32_misc_enable_msr; |
558 | break; | 672 | break; |
559 | #ifdef CONFIG_X86_64 | 673 | case MSR_IA32_PERF_STATUS: |
674 | /* TSC increment by tick */ | ||
675 | data = 1000ULL; | ||
676 | /* CPU multiplier */ | ||
677 | data |= (((uint64_t)4ULL) << 40); | ||
678 | break; | ||
560 | case MSR_EFER: | 679 | case MSR_EFER: |
561 | data = vcpu->arch.shadow_efer; | 680 | data = vcpu->arch.shadow_efer; |
562 | break; | 681 | break; |
563 | #endif | 682 | case MSR_KVM_WALL_CLOCK: |
683 | data = vcpu->kvm->arch.wall_clock; | ||
684 | break; | ||
685 | case MSR_KVM_SYSTEM_TIME: | ||
686 | data = vcpu->arch.time; | ||
687 | break; | ||
564 | default: | 688 | default: |
565 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); | 689 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); |
566 | return 1; | 690 | return 1; |
@@ -584,9 +708,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, | |||
584 | 708 | ||
585 | vcpu_load(vcpu); | 709 | vcpu_load(vcpu); |
586 | 710 | ||
711 | down_read(&vcpu->kvm->slots_lock); | ||
587 | for (i = 0; i < msrs->nmsrs; ++i) | 712 | for (i = 0; i < msrs->nmsrs; ++i) |
588 | if (do_msr(vcpu, entries[i].index, &entries[i].data)) | 713 | if (do_msr(vcpu, entries[i].index, &entries[i].data)) |
589 | break; | 714 | break; |
715 | up_read(&vcpu->kvm->slots_lock); | ||
590 | 716 | ||
591 | vcpu_put(vcpu); | 717 | vcpu_put(vcpu); |
592 | 718 | ||
@@ -688,11 +814,24 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
688 | case KVM_CAP_USER_MEMORY: | 814 | case KVM_CAP_USER_MEMORY: |
689 | case KVM_CAP_SET_TSS_ADDR: | 815 | case KVM_CAP_SET_TSS_ADDR: |
690 | case KVM_CAP_EXT_CPUID: | 816 | case KVM_CAP_EXT_CPUID: |
817 | case KVM_CAP_CLOCKSOURCE: | ||
818 | case KVM_CAP_PIT: | ||
819 | case KVM_CAP_NOP_IO_DELAY: | ||
820 | case KVM_CAP_MP_STATE: | ||
691 | r = 1; | 821 | r = 1; |
692 | break; | 822 | break; |
693 | case KVM_CAP_VAPIC: | 823 | case KVM_CAP_VAPIC: |
694 | r = !kvm_x86_ops->cpu_has_accelerated_tpr(); | 824 | r = !kvm_x86_ops->cpu_has_accelerated_tpr(); |
695 | break; | 825 | break; |
826 | case KVM_CAP_NR_VCPUS: | ||
827 | r = KVM_MAX_VCPUS; | ||
828 | break; | ||
829 | case KVM_CAP_NR_MEMSLOTS: | ||
830 | r = KVM_MEMORY_SLOTS; | ||
831 | break; | ||
832 | case KVM_CAP_PV_MMU: | ||
833 | r = !tdp_enabled; | ||
834 | break; | ||
696 | default: | 835 | default: |
697 | r = 0; | 836 | r = 0; |
698 | break; | 837 | break; |
@@ -763,6 +902,7 @@ out: | |||
763 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 902 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
764 | { | 903 | { |
765 | kvm_x86_ops->vcpu_load(vcpu, cpu); | 904 | kvm_x86_ops->vcpu_load(vcpu, cpu); |
905 | kvm_write_guest_time(vcpu); | ||
766 | } | 906 | } |
767 | 907 | ||
768 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | 908 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) |
@@ -958,32 +1098,32 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
958 | } | 1098 | } |
959 | /* function 4 and 0xb have additional index. */ | 1099 | /* function 4 and 0xb have additional index. */ |
960 | case 4: { | 1100 | case 4: { |
961 | int index, cache_type; | 1101 | int i, cache_type; |
962 | 1102 | ||
963 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | 1103 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; |
964 | /* read more entries until cache_type is zero */ | 1104 | /* read more entries until cache_type is zero */ |
965 | for (index = 1; *nent < maxnent; ++index) { | 1105 | for (i = 1; *nent < maxnent; ++i) { |
966 | cache_type = entry[index - 1].eax & 0x1f; | 1106 | cache_type = entry[i - 1].eax & 0x1f; |
967 | if (!cache_type) | 1107 | if (!cache_type) |
968 | break; | 1108 | break; |
969 | do_cpuid_1_ent(&entry[index], function, index); | 1109 | do_cpuid_1_ent(&entry[i], function, i); |
970 | entry[index].flags |= | 1110 | entry[i].flags |= |
971 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | 1111 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; |
972 | ++*nent; | 1112 | ++*nent; |
973 | } | 1113 | } |
974 | break; | 1114 | break; |
975 | } | 1115 | } |
976 | case 0xb: { | 1116 | case 0xb: { |
977 | int index, level_type; | 1117 | int i, level_type; |
978 | 1118 | ||
979 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | 1119 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; |
980 | /* read more entries until level_type is zero */ | 1120 | /* read more entries until level_type is zero */ |
981 | for (index = 1; *nent < maxnent; ++index) { | 1121 | for (i = 1; *nent < maxnent; ++i) { |
982 | level_type = entry[index - 1].ecx & 0xff; | 1122 | level_type = entry[i - 1].ecx & 0xff; |
983 | if (!level_type) | 1123 | if (!level_type) |
984 | break; | 1124 | break; |
985 | do_cpuid_1_ent(&entry[index], function, index); | 1125 | do_cpuid_1_ent(&entry[i], function, i); |
986 | entry[index].flags |= | 1126 | entry[i].flags |= |
987 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | 1127 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; |
988 | ++*nent; | 1128 | ++*nent; |
989 | } | 1129 | } |
@@ -1365,6 +1505,23 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | |||
1365 | return r; | 1505 | return r; |
1366 | } | 1506 | } |
1367 | 1507 | ||
1508 | static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps) | ||
1509 | { | ||
1510 | int r = 0; | ||
1511 | |||
1512 | memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state)); | ||
1513 | return r; | ||
1514 | } | ||
1515 | |||
1516 | static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) | ||
1517 | { | ||
1518 | int r = 0; | ||
1519 | |||
1520 | memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state)); | ||
1521 | kvm_pit_load_count(kvm, 0, ps->channels[0].count); | ||
1522 | return r; | ||
1523 | } | ||
1524 | |||
1368 | /* | 1525 | /* |
1369 | * Get (and clear) the dirty memory log for a memory slot. | 1526 | * Get (and clear) the dirty memory log for a memory slot. |
1370 | */ | 1527 | */ |
@@ -1457,6 +1614,12 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
1457 | } else | 1614 | } else |
1458 | goto out; | 1615 | goto out; |
1459 | break; | 1616 | break; |
1617 | case KVM_CREATE_PIT: | ||
1618 | r = -ENOMEM; | ||
1619 | kvm->arch.vpit = kvm_create_pit(kvm); | ||
1620 | if (kvm->arch.vpit) | ||
1621 | r = 0; | ||
1622 | break; | ||
1460 | case KVM_IRQ_LINE: { | 1623 | case KVM_IRQ_LINE: { |
1461 | struct kvm_irq_level irq_event; | 1624 | struct kvm_irq_level irq_event; |
1462 | 1625 | ||
@@ -1512,6 +1675,37 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
1512 | r = 0; | 1675 | r = 0; |
1513 | break; | 1676 | break; |
1514 | } | 1677 | } |
1678 | case KVM_GET_PIT: { | ||
1679 | struct kvm_pit_state ps; | ||
1680 | r = -EFAULT; | ||
1681 | if (copy_from_user(&ps, argp, sizeof ps)) | ||
1682 | goto out; | ||
1683 | r = -ENXIO; | ||
1684 | if (!kvm->arch.vpit) | ||
1685 | goto out; | ||
1686 | r = kvm_vm_ioctl_get_pit(kvm, &ps); | ||
1687 | if (r) | ||
1688 | goto out; | ||
1689 | r = -EFAULT; | ||
1690 | if (copy_to_user(argp, &ps, sizeof ps)) | ||
1691 | goto out; | ||
1692 | r = 0; | ||
1693 | break; | ||
1694 | } | ||
1695 | case KVM_SET_PIT: { | ||
1696 | struct kvm_pit_state ps; | ||
1697 | r = -EFAULT; | ||
1698 | if (copy_from_user(&ps, argp, sizeof ps)) | ||
1699 | goto out; | ||
1700 | r = -ENXIO; | ||
1701 | if (!kvm->arch.vpit) | ||
1702 | goto out; | ||
1703 | r = kvm_vm_ioctl_set_pit(kvm, &ps); | ||
1704 | if (r) | ||
1705 | goto out; | ||
1706 | r = 0; | ||
1707 | break; | ||
1708 | } | ||
1515 | default: | 1709 | default: |
1516 | ; | 1710 | ; |
1517 | } | 1711 | } |
@@ -1570,7 +1764,6 @@ int emulator_read_std(unsigned long addr, | |||
1570 | void *data = val; | 1764 | void *data = val; |
1571 | int r = X86EMUL_CONTINUE; | 1765 | int r = X86EMUL_CONTINUE; |
1572 | 1766 | ||
1573 | down_read(&vcpu->kvm->slots_lock); | ||
1574 | while (bytes) { | 1767 | while (bytes) { |
1575 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 1768 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); |
1576 | unsigned offset = addr & (PAGE_SIZE-1); | 1769 | unsigned offset = addr & (PAGE_SIZE-1); |
@@ -1592,7 +1785,6 @@ int emulator_read_std(unsigned long addr, | |||
1592 | addr += tocopy; | 1785 | addr += tocopy; |
1593 | } | 1786 | } |
1594 | out: | 1787 | out: |
1595 | up_read(&vcpu->kvm->slots_lock); | ||
1596 | return r; | 1788 | return r; |
1597 | } | 1789 | } |
1598 | EXPORT_SYMBOL_GPL(emulator_read_std); | 1790 | EXPORT_SYMBOL_GPL(emulator_read_std); |
@@ -1611,9 +1803,7 @@ static int emulator_read_emulated(unsigned long addr, | |||
1611 | return X86EMUL_CONTINUE; | 1803 | return X86EMUL_CONTINUE; |
1612 | } | 1804 | } |
1613 | 1805 | ||
1614 | down_read(&vcpu->kvm->slots_lock); | ||
1615 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 1806 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); |
1616 | up_read(&vcpu->kvm->slots_lock); | ||
1617 | 1807 | ||
1618 | /* For APIC access vmexit */ | 1808 | /* For APIC access vmexit */ |
1619 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 1809 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
@@ -1646,19 +1836,15 @@ mmio: | |||
1646 | return X86EMUL_UNHANDLEABLE; | 1836 | return X86EMUL_UNHANDLEABLE; |
1647 | } | 1837 | } |
1648 | 1838 | ||
1649 | static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | 1839 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, |
1650 | const void *val, int bytes) | 1840 | const void *val, int bytes) |
1651 | { | 1841 | { |
1652 | int ret; | 1842 | int ret; |
1653 | 1843 | ||
1654 | down_read(&vcpu->kvm->slots_lock); | ||
1655 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); | 1844 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); |
1656 | if (ret < 0) { | 1845 | if (ret < 0) |
1657 | up_read(&vcpu->kvm->slots_lock); | ||
1658 | return 0; | 1846 | return 0; |
1659 | } | ||
1660 | kvm_mmu_pte_write(vcpu, gpa, val, bytes); | 1847 | kvm_mmu_pte_write(vcpu, gpa, val, bytes); |
1661 | up_read(&vcpu->kvm->slots_lock); | ||
1662 | return 1; | 1848 | return 1; |
1663 | } | 1849 | } |
1664 | 1850 | ||
@@ -1670,9 +1856,7 @@ static int emulator_write_emulated_onepage(unsigned long addr, | |||
1670 | struct kvm_io_device *mmio_dev; | 1856 | struct kvm_io_device *mmio_dev; |
1671 | gpa_t gpa; | 1857 | gpa_t gpa; |
1672 | 1858 | ||
1673 | down_read(&vcpu->kvm->slots_lock); | ||
1674 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 1859 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); |
1675 | up_read(&vcpu->kvm->slots_lock); | ||
1676 | 1860 | ||
1677 | if (gpa == UNMAPPED_GVA) { | 1861 | if (gpa == UNMAPPED_GVA) { |
1678 | kvm_inject_page_fault(vcpu, addr, 2); | 1862 | kvm_inject_page_fault(vcpu, addr, 2); |
@@ -1749,7 +1933,6 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
1749 | char *kaddr; | 1933 | char *kaddr; |
1750 | u64 val; | 1934 | u64 val; |
1751 | 1935 | ||
1752 | down_read(&vcpu->kvm->slots_lock); | ||
1753 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 1936 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); |
1754 | 1937 | ||
1755 | if (gpa == UNMAPPED_GVA || | 1938 | if (gpa == UNMAPPED_GVA || |
@@ -1769,9 +1952,8 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
1769 | set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); | 1952 | set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); |
1770 | kunmap_atomic(kaddr, KM_USER0); | 1953 | kunmap_atomic(kaddr, KM_USER0); |
1771 | kvm_release_page_dirty(page); | 1954 | kvm_release_page_dirty(page); |
1772 | emul_write: | ||
1773 | up_read(&vcpu->kvm->slots_lock); | ||
1774 | } | 1955 | } |
1956 | emul_write: | ||
1775 | #endif | 1957 | #endif |
1776 | 1958 | ||
1777 | return emulator_write_emulated(addr, new, bytes, vcpu); | 1959 | return emulator_write_emulated(addr, new, bytes, vcpu); |
@@ -1802,7 +1984,7 @@ int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) | |||
1802 | *dest = kvm_x86_ops->get_dr(vcpu, dr); | 1984 | *dest = kvm_x86_ops->get_dr(vcpu, dr); |
1803 | return X86EMUL_CONTINUE; | 1985 | return X86EMUL_CONTINUE; |
1804 | default: | 1986 | default: |
1805 | pr_unimpl(vcpu, "%s: unexpected dr %u\n", __FUNCTION__, dr); | 1987 | pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr); |
1806 | return X86EMUL_UNHANDLEABLE; | 1988 | return X86EMUL_UNHANDLEABLE; |
1807 | } | 1989 | } |
1808 | } | 1990 | } |
@@ -1840,7 +2022,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | |||
1840 | } | 2022 | } |
1841 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); | 2023 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); |
1842 | 2024 | ||
1843 | struct x86_emulate_ops emulate_ops = { | 2025 | static struct x86_emulate_ops emulate_ops = { |
1844 | .read_std = emulator_read_std, | 2026 | .read_std = emulator_read_std, |
1845 | .read_emulated = emulator_read_emulated, | 2027 | .read_emulated = emulator_read_emulated, |
1846 | .write_emulated = emulator_write_emulated, | 2028 | .write_emulated = emulator_write_emulated, |
@@ -2091,6 +2273,13 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
2091 | vcpu->arch.pio.guest_page_offset = 0; | 2273 | vcpu->arch.pio.guest_page_offset = 0; |
2092 | vcpu->arch.pio.rep = 0; | 2274 | vcpu->arch.pio.rep = 0; |
2093 | 2275 | ||
2276 | if (vcpu->run->io.direction == KVM_EXIT_IO_IN) | ||
2277 | KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size, | ||
2278 | handler); | ||
2279 | else | ||
2280 | KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size, | ||
2281 | handler); | ||
2282 | |||
2094 | kvm_x86_ops->cache_regs(vcpu); | 2283 | kvm_x86_ops->cache_regs(vcpu); |
2095 | memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4); | 2284 | memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4); |
2096 | kvm_x86_ops->decache_regs(vcpu); | 2285 | kvm_x86_ops->decache_regs(vcpu); |
@@ -2129,6 +2318,13 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
2129 | vcpu->arch.pio.guest_page_offset = offset_in_page(address); | 2318 | vcpu->arch.pio.guest_page_offset = offset_in_page(address); |
2130 | vcpu->arch.pio.rep = rep; | 2319 | vcpu->arch.pio.rep = rep; |
2131 | 2320 | ||
2321 | if (vcpu->run->io.direction == KVM_EXIT_IO_IN) | ||
2322 | KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size, | ||
2323 | handler); | ||
2324 | else | ||
2325 | KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size, | ||
2326 | handler); | ||
2327 | |||
2132 | if (!count) { | 2328 | if (!count) { |
2133 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 2329 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
2134 | return 1; | 2330 | return 1; |
@@ -2163,10 +2359,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
2163 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 2359 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
2164 | 2360 | ||
2165 | for (i = 0; i < nr_pages; ++i) { | 2361 | for (i = 0; i < nr_pages; ++i) { |
2166 | down_read(&vcpu->kvm->slots_lock); | ||
2167 | page = gva_to_page(vcpu, address + i * PAGE_SIZE); | 2362 | page = gva_to_page(vcpu, address + i * PAGE_SIZE); |
2168 | vcpu->arch.pio.guest_pages[i] = page; | 2363 | vcpu->arch.pio.guest_pages[i] = page; |
2169 | up_read(&vcpu->kvm->slots_lock); | ||
2170 | if (!page) { | 2364 | if (!page) { |
2171 | kvm_inject_gp(vcpu, 0); | 2365 | kvm_inject_gp(vcpu, 0); |
2172 | free_pio_guest_pages(vcpu); | 2366 | free_pio_guest_pages(vcpu); |
@@ -2238,10 +2432,13 @@ void kvm_arch_exit(void) | |||
2238 | int kvm_emulate_halt(struct kvm_vcpu *vcpu) | 2432 | int kvm_emulate_halt(struct kvm_vcpu *vcpu) |
2239 | { | 2433 | { |
2240 | ++vcpu->stat.halt_exits; | 2434 | ++vcpu->stat.halt_exits; |
2435 | KVMTRACE_0D(HLT, vcpu, handler); | ||
2241 | if (irqchip_in_kernel(vcpu->kvm)) { | 2436 | if (irqchip_in_kernel(vcpu->kvm)) { |
2242 | vcpu->arch.mp_state = VCPU_MP_STATE_HALTED; | 2437 | vcpu->arch.mp_state = KVM_MP_STATE_HALTED; |
2438 | up_read(&vcpu->kvm->slots_lock); | ||
2243 | kvm_vcpu_block(vcpu); | 2439 | kvm_vcpu_block(vcpu); |
2244 | if (vcpu->arch.mp_state != VCPU_MP_STATE_RUNNABLE) | 2440 | down_read(&vcpu->kvm->slots_lock); |
2441 | if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) | ||
2245 | return -EINTR; | 2442 | return -EINTR; |
2246 | return 1; | 2443 | return 1; |
2247 | } else { | 2444 | } else { |
@@ -2251,9 +2448,19 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) | |||
2251 | } | 2448 | } |
2252 | EXPORT_SYMBOL_GPL(kvm_emulate_halt); | 2449 | EXPORT_SYMBOL_GPL(kvm_emulate_halt); |
2253 | 2450 | ||
2451 | static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0, | ||
2452 | unsigned long a1) | ||
2453 | { | ||
2454 | if (is_long_mode(vcpu)) | ||
2455 | return a0; | ||
2456 | else | ||
2457 | return a0 | ((gpa_t)a1 << 32); | ||
2458 | } | ||
2459 | |||
2254 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | 2460 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) |
2255 | { | 2461 | { |
2256 | unsigned long nr, a0, a1, a2, a3, ret; | 2462 | unsigned long nr, a0, a1, a2, a3, ret; |
2463 | int r = 1; | ||
2257 | 2464 | ||
2258 | kvm_x86_ops->cache_regs(vcpu); | 2465 | kvm_x86_ops->cache_regs(vcpu); |
2259 | 2466 | ||
@@ -2263,6 +2470,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
2263 | a2 = vcpu->arch.regs[VCPU_REGS_RDX]; | 2470 | a2 = vcpu->arch.regs[VCPU_REGS_RDX]; |
2264 | a3 = vcpu->arch.regs[VCPU_REGS_RSI]; | 2471 | a3 = vcpu->arch.regs[VCPU_REGS_RSI]; |
2265 | 2472 | ||
2473 | KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler); | ||
2474 | |||
2266 | if (!is_long_mode(vcpu)) { | 2475 | if (!is_long_mode(vcpu)) { |
2267 | nr &= 0xFFFFFFFF; | 2476 | nr &= 0xFFFFFFFF; |
2268 | a0 &= 0xFFFFFFFF; | 2477 | a0 &= 0xFFFFFFFF; |
@@ -2275,13 +2484,17 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
2275 | case KVM_HC_VAPIC_POLL_IRQ: | 2484 | case KVM_HC_VAPIC_POLL_IRQ: |
2276 | ret = 0; | 2485 | ret = 0; |
2277 | break; | 2486 | break; |
2487 | case KVM_HC_MMU_OP: | ||
2488 | r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret); | ||
2489 | break; | ||
2278 | default: | 2490 | default: |
2279 | ret = -KVM_ENOSYS; | 2491 | ret = -KVM_ENOSYS; |
2280 | break; | 2492 | break; |
2281 | } | 2493 | } |
2282 | vcpu->arch.regs[VCPU_REGS_RAX] = ret; | 2494 | vcpu->arch.regs[VCPU_REGS_RAX] = ret; |
2283 | kvm_x86_ops->decache_regs(vcpu); | 2495 | kvm_x86_ops->decache_regs(vcpu); |
2284 | return 0; | 2496 | ++vcpu->stat.hypercalls; |
2497 | return r; | ||
2285 | } | 2498 | } |
2286 | EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); | 2499 | EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); |
2287 | 2500 | ||
@@ -2329,7 +2542,7 @@ void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | |||
2329 | void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | 2542 | void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, |
2330 | unsigned long *rflags) | 2543 | unsigned long *rflags) |
2331 | { | 2544 | { |
2332 | lmsw(vcpu, msw); | 2545 | kvm_lmsw(vcpu, msw); |
2333 | *rflags = kvm_x86_ops->get_rflags(vcpu); | 2546 | *rflags = kvm_x86_ops->get_rflags(vcpu); |
2334 | } | 2547 | } |
2335 | 2548 | ||
@@ -2346,9 +2559,9 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | |||
2346 | case 4: | 2559 | case 4: |
2347 | return vcpu->arch.cr4; | 2560 | return vcpu->arch.cr4; |
2348 | case 8: | 2561 | case 8: |
2349 | return get_cr8(vcpu); | 2562 | return kvm_get_cr8(vcpu); |
2350 | default: | 2563 | default: |
2351 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr); | 2564 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); |
2352 | return 0; | 2565 | return 0; |
2353 | } | 2566 | } |
2354 | } | 2567 | } |
@@ -2358,23 +2571,23 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | |||
2358 | { | 2571 | { |
2359 | switch (cr) { | 2572 | switch (cr) { |
2360 | case 0: | 2573 | case 0: |
2361 | set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); | 2574 | kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); |
2362 | *rflags = kvm_x86_ops->get_rflags(vcpu); | 2575 | *rflags = kvm_x86_ops->get_rflags(vcpu); |
2363 | break; | 2576 | break; |
2364 | case 2: | 2577 | case 2: |
2365 | vcpu->arch.cr2 = val; | 2578 | vcpu->arch.cr2 = val; |
2366 | break; | 2579 | break; |
2367 | case 3: | 2580 | case 3: |
2368 | set_cr3(vcpu, val); | 2581 | kvm_set_cr3(vcpu, val); |
2369 | break; | 2582 | break; |
2370 | case 4: | 2583 | case 4: |
2371 | set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val)); | 2584 | kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val)); |
2372 | break; | 2585 | break; |
2373 | case 8: | 2586 | case 8: |
2374 | set_cr8(vcpu, val & 0xfUL); | 2587 | kvm_set_cr8(vcpu, val & 0xfUL); |
2375 | break; | 2588 | break; |
2376 | default: | 2589 | default: |
2377 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr); | 2590 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); |
2378 | } | 2591 | } |
2379 | } | 2592 | } |
2380 | 2593 | ||
@@ -2447,6 +2660,11 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | |||
2447 | } | 2660 | } |
2448 | kvm_x86_ops->decache_regs(vcpu); | 2661 | kvm_x86_ops->decache_regs(vcpu); |
2449 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 2662 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
2663 | KVMTRACE_5D(CPUID, vcpu, function, | ||
2664 | (u32)vcpu->arch.regs[VCPU_REGS_RAX], | ||
2665 | (u32)vcpu->arch.regs[VCPU_REGS_RBX], | ||
2666 | (u32)vcpu->arch.regs[VCPU_REGS_RCX], | ||
2667 | (u32)vcpu->arch.regs[VCPU_REGS_RDX], handler); | ||
2450 | } | 2668 | } |
2451 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | 2669 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); |
2452 | 2670 | ||
@@ -2469,7 +2687,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu, | |||
2469 | struct kvm_run *kvm_run) | 2687 | struct kvm_run *kvm_run) |
2470 | { | 2688 | { |
2471 | kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0; | 2689 | kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0; |
2472 | kvm_run->cr8 = get_cr8(vcpu); | 2690 | kvm_run->cr8 = kvm_get_cr8(vcpu); |
2473 | kvm_run->apic_base = kvm_get_apic_base(vcpu); | 2691 | kvm_run->apic_base = kvm_get_apic_base(vcpu); |
2474 | if (irqchip_in_kernel(vcpu->kvm)) | 2692 | if (irqchip_in_kernel(vcpu->kvm)) |
2475 | kvm_run->ready_for_interrupt_injection = 1; | 2693 | kvm_run->ready_for_interrupt_injection = 1; |
@@ -2509,16 +2727,17 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2509 | { | 2727 | { |
2510 | int r; | 2728 | int r; |
2511 | 2729 | ||
2512 | if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) { | 2730 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { |
2513 | pr_debug("vcpu %d received sipi with vector # %x\n", | 2731 | pr_debug("vcpu %d received sipi with vector # %x\n", |
2514 | vcpu->vcpu_id, vcpu->arch.sipi_vector); | 2732 | vcpu->vcpu_id, vcpu->arch.sipi_vector); |
2515 | kvm_lapic_reset(vcpu); | 2733 | kvm_lapic_reset(vcpu); |
2516 | r = kvm_x86_ops->vcpu_reset(vcpu); | 2734 | r = kvm_x86_ops->vcpu_reset(vcpu); |
2517 | if (r) | 2735 | if (r) |
2518 | return r; | 2736 | return r; |
2519 | vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; | 2737 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
2520 | } | 2738 | } |
2521 | 2739 | ||
2740 | down_read(&vcpu->kvm->slots_lock); | ||
2522 | vapic_enter(vcpu); | 2741 | vapic_enter(vcpu); |
2523 | 2742 | ||
2524 | preempted: | 2743 | preempted: |
@@ -2526,6 +2745,10 @@ preempted: | |||
2526 | kvm_x86_ops->guest_debug_pre(vcpu); | 2745 | kvm_x86_ops->guest_debug_pre(vcpu); |
2527 | 2746 | ||
2528 | again: | 2747 | again: |
2748 | if (vcpu->requests) | ||
2749 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) | ||
2750 | kvm_mmu_unload(vcpu); | ||
2751 | |||
2529 | r = kvm_mmu_reload(vcpu); | 2752 | r = kvm_mmu_reload(vcpu); |
2530 | if (unlikely(r)) | 2753 | if (unlikely(r)) |
2531 | goto out; | 2754 | goto out; |
@@ -2539,6 +2762,11 @@ again: | |||
2539 | r = 0; | 2762 | r = 0; |
2540 | goto out; | 2763 | goto out; |
2541 | } | 2764 | } |
2765 | if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { | ||
2766 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; | ||
2767 | r = 0; | ||
2768 | goto out; | ||
2769 | } | ||
2542 | } | 2770 | } |
2543 | 2771 | ||
2544 | kvm_inject_pending_timer_irqs(vcpu); | 2772 | kvm_inject_pending_timer_irqs(vcpu); |
@@ -2557,6 +2785,14 @@ again: | |||
2557 | goto out; | 2785 | goto out; |
2558 | } | 2786 | } |
2559 | 2787 | ||
2788 | if (vcpu->requests) | ||
2789 | if (test_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) { | ||
2790 | local_irq_enable(); | ||
2791 | preempt_enable(); | ||
2792 | r = 1; | ||
2793 | goto out; | ||
2794 | } | ||
2795 | |||
2560 | if (signal_pending(current)) { | 2796 | if (signal_pending(current)) { |
2561 | local_irq_enable(); | 2797 | local_irq_enable(); |
2562 | preempt_enable(); | 2798 | preempt_enable(); |
@@ -2566,6 +2802,13 @@ again: | |||
2566 | goto out; | 2802 | goto out; |
2567 | } | 2803 | } |
2568 | 2804 | ||
2805 | vcpu->guest_mode = 1; | ||
2806 | /* | ||
2807 | * Make sure that guest_mode assignment won't happen after | ||
2808 | * testing the pending IRQ vector bitmap. | ||
2809 | */ | ||
2810 | smp_wmb(); | ||
2811 | |||
2569 | if (vcpu->arch.exception.pending) | 2812 | if (vcpu->arch.exception.pending) |
2570 | __queue_exception(vcpu); | 2813 | __queue_exception(vcpu); |
2571 | else if (irqchip_in_kernel(vcpu->kvm)) | 2814 | else if (irqchip_in_kernel(vcpu->kvm)) |
@@ -2575,13 +2818,15 @@ again: | |||
2575 | 2818 | ||
2576 | kvm_lapic_sync_to_vapic(vcpu); | 2819 | kvm_lapic_sync_to_vapic(vcpu); |
2577 | 2820 | ||
2578 | vcpu->guest_mode = 1; | 2821 | up_read(&vcpu->kvm->slots_lock); |
2822 | |||
2579 | kvm_guest_enter(); | 2823 | kvm_guest_enter(); |
2580 | 2824 | ||
2581 | if (vcpu->requests) | 2825 | if (vcpu->requests) |
2582 | if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) | 2826 | if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) |
2583 | kvm_x86_ops->tlb_flush(vcpu); | 2827 | kvm_x86_ops->tlb_flush(vcpu); |
2584 | 2828 | ||
2829 | KVMTRACE_0D(VMENTRY, vcpu, entryexit); | ||
2585 | kvm_x86_ops->run(vcpu, kvm_run); | 2830 | kvm_x86_ops->run(vcpu, kvm_run); |
2586 | 2831 | ||
2587 | vcpu->guest_mode = 0; | 2832 | vcpu->guest_mode = 0; |
@@ -2601,6 +2846,8 @@ again: | |||
2601 | 2846 | ||
2602 | preempt_enable(); | 2847 | preempt_enable(); |
2603 | 2848 | ||
2849 | down_read(&vcpu->kvm->slots_lock); | ||
2850 | |||
2604 | /* | 2851 | /* |
2605 | * Profile KVM exit RIPs: | 2852 | * Profile KVM exit RIPs: |
2606 | */ | 2853 | */ |
@@ -2628,14 +2875,18 @@ again: | |||
2628 | } | 2875 | } |
2629 | 2876 | ||
2630 | out: | 2877 | out: |
2878 | up_read(&vcpu->kvm->slots_lock); | ||
2631 | if (r > 0) { | 2879 | if (r > 0) { |
2632 | kvm_resched(vcpu); | 2880 | kvm_resched(vcpu); |
2881 | down_read(&vcpu->kvm->slots_lock); | ||
2633 | goto preempted; | 2882 | goto preempted; |
2634 | } | 2883 | } |
2635 | 2884 | ||
2636 | post_kvm_run_save(vcpu, kvm_run); | 2885 | post_kvm_run_save(vcpu, kvm_run); |
2637 | 2886 | ||
2887 | down_read(&vcpu->kvm->slots_lock); | ||
2638 | vapic_exit(vcpu); | 2888 | vapic_exit(vcpu); |
2889 | up_read(&vcpu->kvm->slots_lock); | ||
2639 | 2890 | ||
2640 | return r; | 2891 | return r; |
2641 | } | 2892 | } |
@@ -2647,7 +2898,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2647 | 2898 | ||
2648 | vcpu_load(vcpu); | 2899 | vcpu_load(vcpu); |
2649 | 2900 | ||
2650 | if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_UNINITIALIZED)) { | 2901 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { |
2651 | kvm_vcpu_block(vcpu); | 2902 | kvm_vcpu_block(vcpu); |
2652 | vcpu_put(vcpu); | 2903 | vcpu_put(vcpu); |
2653 | return -EAGAIN; | 2904 | return -EAGAIN; |
@@ -2658,7 +2909,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2658 | 2909 | ||
2659 | /* re-sync apic's tpr */ | 2910 | /* re-sync apic's tpr */ |
2660 | if (!irqchip_in_kernel(vcpu->kvm)) | 2911 | if (!irqchip_in_kernel(vcpu->kvm)) |
2661 | set_cr8(vcpu, kvm_run->cr8); | 2912 | kvm_set_cr8(vcpu, kvm_run->cr8); |
2662 | 2913 | ||
2663 | if (vcpu->arch.pio.cur_count) { | 2914 | if (vcpu->arch.pio.cur_count) { |
2664 | r = complete_pio(vcpu); | 2915 | r = complete_pio(vcpu); |
@@ -2670,9 +2921,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2670 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); | 2921 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); |
2671 | vcpu->mmio_read_completed = 1; | 2922 | vcpu->mmio_read_completed = 1; |
2672 | vcpu->mmio_needed = 0; | 2923 | vcpu->mmio_needed = 0; |
2924 | |||
2925 | down_read(&vcpu->kvm->slots_lock); | ||
2673 | r = emulate_instruction(vcpu, kvm_run, | 2926 | r = emulate_instruction(vcpu, kvm_run, |
2674 | vcpu->arch.mmio_fault_cr2, 0, | 2927 | vcpu->arch.mmio_fault_cr2, 0, |
2675 | EMULTYPE_NO_DECODE); | 2928 | EMULTYPE_NO_DECODE); |
2929 | up_read(&vcpu->kvm->slots_lock); | ||
2676 | if (r == EMULATE_DO_MMIO) { | 2930 | if (r == EMULATE_DO_MMIO) { |
2677 | /* | 2931 | /* |
2678 | * Read-modify-write. Back to userspace. | 2932 | * Read-modify-write. Back to userspace. |
@@ -2773,7 +3027,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
2773 | static void get_segment(struct kvm_vcpu *vcpu, | 3027 | static void get_segment(struct kvm_vcpu *vcpu, |
2774 | struct kvm_segment *var, int seg) | 3028 | struct kvm_segment *var, int seg) |
2775 | { | 3029 | { |
2776 | return kvm_x86_ops->get_segment(vcpu, var, seg); | 3030 | kvm_x86_ops->get_segment(vcpu, var, seg); |
2777 | } | 3031 | } |
2778 | 3032 | ||
2779 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | 3033 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) |
@@ -2816,7 +3070,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
2816 | sregs->cr2 = vcpu->arch.cr2; | 3070 | sregs->cr2 = vcpu->arch.cr2; |
2817 | sregs->cr3 = vcpu->arch.cr3; | 3071 | sregs->cr3 = vcpu->arch.cr3; |
2818 | sregs->cr4 = vcpu->arch.cr4; | 3072 | sregs->cr4 = vcpu->arch.cr4; |
2819 | sregs->cr8 = get_cr8(vcpu); | 3073 | sregs->cr8 = kvm_get_cr8(vcpu); |
2820 | sregs->efer = vcpu->arch.shadow_efer; | 3074 | sregs->efer = vcpu->arch.shadow_efer; |
2821 | sregs->apic_base = kvm_get_apic_base(vcpu); | 3075 | sregs->apic_base = kvm_get_apic_base(vcpu); |
2822 | 3076 | ||
@@ -2836,12 +3090,438 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
2836 | return 0; | 3090 | return 0; |
2837 | } | 3091 | } |
2838 | 3092 | ||
3093 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | ||
3094 | struct kvm_mp_state *mp_state) | ||
3095 | { | ||
3096 | vcpu_load(vcpu); | ||
3097 | mp_state->mp_state = vcpu->arch.mp_state; | ||
3098 | vcpu_put(vcpu); | ||
3099 | return 0; | ||
3100 | } | ||
3101 | |||
3102 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | ||
3103 | struct kvm_mp_state *mp_state) | ||
3104 | { | ||
3105 | vcpu_load(vcpu); | ||
3106 | vcpu->arch.mp_state = mp_state->mp_state; | ||
3107 | vcpu_put(vcpu); | ||
3108 | return 0; | ||
3109 | } | ||
3110 | |||
2839 | static void set_segment(struct kvm_vcpu *vcpu, | 3111 | static void set_segment(struct kvm_vcpu *vcpu, |
2840 | struct kvm_segment *var, int seg) | 3112 | struct kvm_segment *var, int seg) |
2841 | { | 3113 | { |
2842 | return kvm_x86_ops->set_segment(vcpu, var, seg); | 3114 | kvm_x86_ops->set_segment(vcpu, var, seg); |
3115 | } | ||
3116 | |||
3117 | static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, | ||
3118 | struct kvm_segment *kvm_desct) | ||
3119 | { | ||
3120 | kvm_desct->base = seg_desc->base0; | ||
3121 | kvm_desct->base |= seg_desc->base1 << 16; | ||
3122 | kvm_desct->base |= seg_desc->base2 << 24; | ||
3123 | kvm_desct->limit = seg_desc->limit0; | ||
3124 | kvm_desct->limit |= seg_desc->limit << 16; | ||
3125 | kvm_desct->selector = selector; | ||
3126 | kvm_desct->type = seg_desc->type; | ||
3127 | kvm_desct->present = seg_desc->p; | ||
3128 | kvm_desct->dpl = seg_desc->dpl; | ||
3129 | kvm_desct->db = seg_desc->d; | ||
3130 | kvm_desct->s = seg_desc->s; | ||
3131 | kvm_desct->l = seg_desc->l; | ||
3132 | kvm_desct->g = seg_desc->g; | ||
3133 | kvm_desct->avl = seg_desc->avl; | ||
3134 | if (!selector) | ||
3135 | kvm_desct->unusable = 1; | ||
3136 | else | ||
3137 | kvm_desct->unusable = 0; | ||
3138 | kvm_desct->padding = 0; | ||
3139 | } | ||
3140 | |||
3141 | static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu, | ||
3142 | u16 selector, | ||
3143 | struct descriptor_table *dtable) | ||
3144 | { | ||
3145 | if (selector & 1 << 2) { | ||
3146 | struct kvm_segment kvm_seg; | ||
3147 | |||
3148 | get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); | ||
3149 | |||
3150 | if (kvm_seg.unusable) | ||
3151 | dtable->limit = 0; | ||
3152 | else | ||
3153 | dtable->limit = kvm_seg.limit; | ||
3154 | dtable->base = kvm_seg.base; | ||
3155 | } | ||
3156 | else | ||
3157 | kvm_x86_ops->get_gdt(vcpu, dtable); | ||
3158 | } | ||
3159 | |||
3160 | /* allowed just for 8 bytes segments */ | ||
3161 | static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | ||
3162 | struct desc_struct *seg_desc) | ||
3163 | { | ||
3164 | struct descriptor_table dtable; | ||
3165 | u16 index = selector >> 3; | ||
3166 | |||
3167 | get_segment_descritptor_dtable(vcpu, selector, &dtable); | ||
3168 | |||
3169 | if (dtable.limit < index * 8 + 7) { | ||
3170 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); | ||
3171 | return 1; | ||
3172 | } | ||
3173 | return kvm_read_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); | ||
3174 | } | ||
3175 | |||
3176 | /* allowed just for 8 bytes segments */ | ||
3177 | static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | ||
3178 | struct desc_struct *seg_desc) | ||
3179 | { | ||
3180 | struct descriptor_table dtable; | ||
3181 | u16 index = selector >> 3; | ||
3182 | |||
3183 | get_segment_descritptor_dtable(vcpu, selector, &dtable); | ||
3184 | |||
3185 | if (dtable.limit < index * 8 + 7) | ||
3186 | return 1; | ||
3187 | return kvm_write_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); | ||
3188 | } | ||
3189 | |||
3190 | static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, | ||
3191 | struct desc_struct *seg_desc) | ||
3192 | { | ||
3193 | u32 base_addr; | ||
3194 | |||
3195 | base_addr = seg_desc->base0; | ||
3196 | base_addr |= (seg_desc->base1 << 16); | ||
3197 | base_addr |= (seg_desc->base2 << 24); | ||
3198 | |||
3199 | return base_addr; | ||
3200 | } | ||
3201 | |||
3202 | static int load_tss_segment32(struct kvm_vcpu *vcpu, | ||
3203 | struct desc_struct *seg_desc, | ||
3204 | struct tss_segment_32 *tss) | ||
3205 | { | ||
3206 | u32 base_addr; | ||
3207 | |||
3208 | base_addr = get_tss_base_addr(vcpu, seg_desc); | ||
3209 | |||
3210 | return kvm_read_guest(vcpu->kvm, base_addr, tss, | ||
3211 | sizeof(struct tss_segment_32)); | ||
3212 | } | ||
3213 | |||
3214 | static int save_tss_segment32(struct kvm_vcpu *vcpu, | ||
3215 | struct desc_struct *seg_desc, | ||
3216 | struct tss_segment_32 *tss) | ||
3217 | { | ||
3218 | u32 base_addr; | ||
3219 | |||
3220 | base_addr = get_tss_base_addr(vcpu, seg_desc); | ||
3221 | |||
3222 | return kvm_write_guest(vcpu->kvm, base_addr, tss, | ||
3223 | sizeof(struct tss_segment_32)); | ||
3224 | } | ||
3225 | |||
3226 | static int load_tss_segment16(struct kvm_vcpu *vcpu, | ||
3227 | struct desc_struct *seg_desc, | ||
3228 | struct tss_segment_16 *tss) | ||
3229 | { | ||
3230 | u32 base_addr; | ||
3231 | |||
3232 | base_addr = get_tss_base_addr(vcpu, seg_desc); | ||
3233 | |||
3234 | return kvm_read_guest(vcpu->kvm, base_addr, tss, | ||
3235 | sizeof(struct tss_segment_16)); | ||
3236 | } | ||
3237 | |||
3238 | static int save_tss_segment16(struct kvm_vcpu *vcpu, | ||
3239 | struct desc_struct *seg_desc, | ||
3240 | struct tss_segment_16 *tss) | ||
3241 | { | ||
3242 | u32 base_addr; | ||
3243 | |||
3244 | base_addr = get_tss_base_addr(vcpu, seg_desc); | ||
3245 | |||
3246 | return kvm_write_guest(vcpu->kvm, base_addr, tss, | ||
3247 | sizeof(struct tss_segment_16)); | ||
3248 | } | ||
3249 | |||
3250 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) | ||
3251 | { | ||
3252 | struct kvm_segment kvm_seg; | ||
3253 | |||
3254 | get_segment(vcpu, &kvm_seg, seg); | ||
3255 | return kvm_seg.selector; | ||
3256 | } | ||
3257 | |||
3258 | static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu, | ||
3259 | u16 selector, | ||
3260 | struct kvm_segment *kvm_seg) | ||
3261 | { | ||
3262 | struct desc_struct seg_desc; | ||
3263 | |||
3264 | if (load_guest_segment_descriptor(vcpu, selector, &seg_desc)) | ||
3265 | return 1; | ||
3266 | seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg); | ||
3267 | return 0; | ||
3268 | } | ||
3269 | |||
3270 | static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | ||
3271 | int type_bits, int seg) | ||
3272 | { | ||
3273 | struct kvm_segment kvm_seg; | ||
3274 | |||
3275 | if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg)) | ||
3276 | return 1; | ||
3277 | kvm_seg.type |= type_bits; | ||
3278 | |||
3279 | if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS && | ||
3280 | seg != VCPU_SREG_LDTR) | ||
3281 | if (!kvm_seg.s) | ||
3282 | kvm_seg.unusable = 1; | ||
3283 | |||
3284 | set_segment(vcpu, &kvm_seg, seg); | ||
3285 | return 0; | ||
3286 | } | ||
3287 | |||
3288 | static void save_state_to_tss32(struct kvm_vcpu *vcpu, | ||
3289 | struct tss_segment_32 *tss) | ||
3290 | { | ||
3291 | tss->cr3 = vcpu->arch.cr3; | ||
3292 | tss->eip = vcpu->arch.rip; | ||
3293 | tss->eflags = kvm_x86_ops->get_rflags(vcpu); | ||
3294 | tss->eax = vcpu->arch.regs[VCPU_REGS_RAX]; | ||
3295 | tss->ecx = vcpu->arch.regs[VCPU_REGS_RCX]; | ||
3296 | tss->edx = vcpu->arch.regs[VCPU_REGS_RDX]; | ||
3297 | tss->ebx = vcpu->arch.regs[VCPU_REGS_RBX]; | ||
3298 | tss->esp = vcpu->arch.regs[VCPU_REGS_RSP]; | ||
3299 | tss->ebp = vcpu->arch.regs[VCPU_REGS_RBP]; | ||
3300 | tss->esi = vcpu->arch.regs[VCPU_REGS_RSI]; | ||
3301 | tss->edi = vcpu->arch.regs[VCPU_REGS_RDI]; | ||
3302 | |||
3303 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); | ||
3304 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); | ||
3305 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); | ||
3306 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); | ||
3307 | tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); | ||
3308 | tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); | ||
3309 | tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); | ||
3310 | tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR); | ||
3311 | } | ||
3312 | |||
3313 | static int load_state_from_tss32(struct kvm_vcpu *vcpu, | ||
3314 | struct tss_segment_32 *tss) | ||
3315 | { | ||
3316 | kvm_set_cr3(vcpu, tss->cr3); | ||
3317 | |||
3318 | vcpu->arch.rip = tss->eip; | ||
3319 | kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2); | ||
3320 | |||
3321 | vcpu->arch.regs[VCPU_REGS_RAX] = tss->eax; | ||
3322 | vcpu->arch.regs[VCPU_REGS_RCX] = tss->ecx; | ||
3323 | vcpu->arch.regs[VCPU_REGS_RDX] = tss->edx; | ||
3324 | vcpu->arch.regs[VCPU_REGS_RBX] = tss->ebx; | ||
3325 | vcpu->arch.regs[VCPU_REGS_RSP] = tss->esp; | ||
3326 | vcpu->arch.regs[VCPU_REGS_RBP] = tss->ebp; | ||
3327 | vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi; | ||
3328 | vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi; | ||
3329 | |||
3330 | if (load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) | ||
3331 | return 1; | ||
3332 | |||
3333 | if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) | ||
3334 | return 1; | ||
3335 | |||
3336 | if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) | ||
3337 | return 1; | ||
3338 | |||
3339 | if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) | ||
3340 | return 1; | ||
3341 | |||
3342 | if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) | ||
3343 | return 1; | ||
3344 | |||
3345 | if (load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS)) | ||
3346 | return 1; | ||
3347 | |||
3348 | if (load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS)) | ||
3349 | return 1; | ||
3350 | return 0; | ||
3351 | } | ||
3352 | |||
3353 | static void save_state_to_tss16(struct kvm_vcpu *vcpu, | ||
3354 | struct tss_segment_16 *tss) | ||
3355 | { | ||
3356 | tss->ip = vcpu->arch.rip; | ||
3357 | tss->flag = kvm_x86_ops->get_rflags(vcpu); | ||
3358 | tss->ax = vcpu->arch.regs[VCPU_REGS_RAX]; | ||
3359 | tss->cx = vcpu->arch.regs[VCPU_REGS_RCX]; | ||
3360 | tss->dx = vcpu->arch.regs[VCPU_REGS_RDX]; | ||
3361 | tss->bx = vcpu->arch.regs[VCPU_REGS_RBX]; | ||
3362 | tss->sp = vcpu->arch.regs[VCPU_REGS_RSP]; | ||
3363 | tss->bp = vcpu->arch.regs[VCPU_REGS_RBP]; | ||
3364 | tss->si = vcpu->arch.regs[VCPU_REGS_RSI]; | ||
3365 | tss->di = vcpu->arch.regs[VCPU_REGS_RDI]; | ||
3366 | |||
3367 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); | ||
3368 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); | ||
3369 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); | ||
3370 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); | ||
3371 | tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); | ||
3372 | tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR); | ||
3373 | } | ||
3374 | |||
3375 | static int load_state_from_tss16(struct kvm_vcpu *vcpu, | ||
3376 | struct tss_segment_16 *tss) | ||
3377 | { | ||
3378 | vcpu->arch.rip = tss->ip; | ||
3379 | kvm_x86_ops->set_rflags(vcpu, tss->flag | 2); | ||
3380 | vcpu->arch.regs[VCPU_REGS_RAX] = tss->ax; | ||
3381 | vcpu->arch.regs[VCPU_REGS_RCX] = tss->cx; | ||
3382 | vcpu->arch.regs[VCPU_REGS_RDX] = tss->dx; | ||
3383 | vcpu->arch.regs[VCPU_REGS_RBX] = tss->bx; | ||
3384 | vcpu->arch.regs[VCPU_REGS_RSP] = tss->sp; | ||
3385 | vcpu->arch.regs[VCPU_REGS_RBP] = tss->bp; | ||
3386 | vcpu->arch.regs[VCPU_REGS_RSI] = tss->si; | ||
3387 | vcpu->arch.regs[VCPU_REGS_RDI] = tss->di; | ||
3388 | |||
3389 | if (load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) | ||
3390 | return 1; | ||
3391 | |||
3392 | if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) | ||
3393 | return 1; | ||
3394 | |||
3395 | if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) | ||
3396 | return 1; | ||
3397 | |||
3398 | if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) | ||
3399 | return 1; | ||
3400 | |||
3401 | if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) | ||
3402 | return 1; | ||
3403 | return 0; | ||
3404 | } | ||
3405 | |||
3406 | int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | ||
3407 | struct desc_struct *cseg_desc, | ||
3408 | struct desc_struct *nseg_desc) | ||
3409 | { | ||
3410 | struct tss_segment_16 tss_segment_16; | ||
3411 | int ret = 0; | ||
3412 | |||
3413 | if (load_tss_segment16(vcpu, cseg_desc, &tss_segment_16)) | ||
3414 | goto out; | ||
3415 | |||
3416 | save_state_to_tss16(vcpu, &tss_segment_16); | ||
3417 | save_tss_segment16(vcpu, cseg_desc, &tss_segment_16); | ||
3418 | |||
3419 | if (load_tss_segment16(vcpu, nseg_desc, &tss_segment_16)) | ||
3420 | goto out; | ||
3421 | if (load_state_from_tss16(vcpu, &tss_segment_16)) | ||
3422 | goto out; | ||
3423 | |||
3424 | ret = 1; | ||
3425 | out: | ||
3426 | return ret; | ||
3427 | } | ||
3428 | |||
3429 | int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | ||
3430 | struct desc_struct *cseg_desc, | ||
3431 | struct desc_struct *nseg_desc) | ||
3432 | { | ||
3433 | struct tss_segment_32 tss_segment_32; | ||
3434 | int ret = 0; | ||
3435 | |||
3436 | if (load_tss_segment32(vcpu, cseg_desc, &tss_segment_32)) | ||
3437 | goto out; | ||
3438 | |||
3439 | save_state_to_tss32(vcpu, &tss_segment_32); | ||
3440 | save_tss_segment32(vcpu, cseg_desc, &tss_segment_32); | ||
3441 | |||
3442 | if (load_tss_segment32(vcpu, nseg_desc, &tss_segment_32)) | ||
3443 | goto out; | ||
3444 | if (load_state_from_tss32(vcpu, &tss_segment_32)) | ||
3445 | goto out; | ||
3446 | |||
3447 | ret = 1; | ||
3448 | out: | ||
3449 | return ret; | ||
2843 | } | 3450 | } |
2844 | 3451 | ||
3452 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | ||
3453 | { | ||
3454 | struct kvm_segment tr_seg; | ||
3455 | struct desc_struct cseg_desc; | ||
3456 | struct desc_struct nseg_desc; | ||
3457 | int ret = 0; | ||
3458 | |||
3459 | get_segment(vcpu, &tr_seg, VCPU_SREG_TR); | ||
3460 | |||
3461 | if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) | ||
3462 | goto out; | ||
3463 | |||
3464 | if (load_guest_segment_descriptor(vcpu, tr_seg.selector, &cseg_desc)) | ||
3465 | goto out; | ||
3466 | |||
3467 | |||
3468 | if (reason != TASK_SWITCH_IRET) { | ||
3469 | int cpl; | ||
3470 | |||
3471 | cpl = kvm_x86_ops->get_cpl(vcpu); | ||
3472 | if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) { | ||
3473 | kvm_queue_exception_e(vcpu, GP_VECTOR, 0); | ||
3474 | return 1; | ||
3475 | } | ||
3476 | } | ||
3477 | |||
3478 | if (!nseg_desc.p || (nseg_desc.limit0 | nseg_desc.limit << 16) < 0x67) { | ||
3479 | kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); | ||
3480 | return 1; | ||
3481 | } | ||
3482 | |||
3483 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | ||
3484 | cseg_desc.type &= ~(1 << 8); //clear the B flag | ||
3485 | save_guest_segment_descriptor(vcpu, tr_seg.selector, | ||
3486 | &cseg_desc); | ||
3487 | } | ||
3488 | |||
3489 | if (reason == TASK_SWITCH_IRET) { | ||
3490 | u32 eflags = kvm_x86_ops->get_rflags(vcpu); | ||
3491 | kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); | ||
3492 | } | ||
3493 | |||
3494 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
3495 | kvm_x86_ops->cache_regs(vcpu); | ||
3496 | |||
3497 | if (nseg_desc.type & 8) | ||
3498 | ret = kvm_task_switch_32(vcpu, tss_selector, &cseg_desc, | ||
3499 | &nseg_desc); | ||
3500 | else | ||
3501 | ret = kvm_task_switch_16(vcpu, tss_selector, &cseg_desc, | ||
3502 | &nseg_desc); | ||
3503 | |||
3504 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { | ||
3505 | u32 eflags = kvm_x86_ops->get_rflags(vcpu); | ||
3506 | kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT); | ||
3507 | } | ||
3508 | |||
3509 | if (reason != TASK_SWITCH_IRET) { | ||
3510 | nseg_desc.type |= (1 << 8); | ||
3511 | save_guest_segment_descriptor(vcpu, tss_selector, | ||
3512 | &nseg_desc); | ||
3513 | } | ||
3514 | |||
3515 | kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS); | ||
3516 | seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); | ||
3517 | tr_seg.type = 11; | ||
3518 | set_segment(vcpu, &tr_seg, VCPU_SREG_TR); | ||
3519 | out: | ||
3520 | kvm_x86_ops->decache_regs(vcpu); | ||
3521 | return ret; | ||
3522 | } | ||
3523 | EXPORT_SYMBOL_GPL(kvm_task_switch); | ||
3524 | |||
2845 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | 3525 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, |
2846 | struct kvm_sregs *sregs) | 3526 | struct kvm_sregs *sregs) |
2847 | { | 3527 | { |
@@ -2862,12 +3542,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
2862 | mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; | 3542 | mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; |
2863 | vcpu->arch.cr3 = sregs->cr3; | 3543 | vcpu->arch.cr3 = sregs->cr3; |
2864 | 3544 | ||
2865 | set_cr8(vcpu, sregs->cr8); | 3545 | kvm_set_cr8(vcpu, sregs->cr8); |
2866 | 3546 | ||
2867 | mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer; | 3547 | mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer; |
2868 | #ifdef CONFIG_X86_64 | ||
2869 | kvm_x86_ops->set_efer(vcpu, sregs->efer); | 3548 | kvm_x86_ops->set_efer(vcpu, sregs->efer); |
2870 | #endif | ||
2871 | kvm_set_apic_base(vcpu, sregs->apic_base); | 3549 | kvm_set_apic_base(vcpu, sregs->apic_base); |
2872 | 3550 | ||
2873 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); | 3551 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); |
@@ -3141,9 +3819,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
3141 | 3819 | ||
3142 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | 3820 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; |
3143 | if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0) | 3821 | if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0) |
3144 | vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; | 3822 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
3145 | else | 3823 | else |
3146 | vcpu->arch.mp_state = VCPU_MP_STATE_UNINITIALIZED; | 3824 | vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; |
3147 | 3825 | ||
3148 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 3826 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
3149 | if (!page) { | 3827 | if (!page) { |
@@ -3175,7 +3853,9 @@ fail: | |||
3175 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | 3853 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) |
3176 | { | 3854 | { |
3177 | kvm_free_lapic(vcpu); | 3855 | kvm_free_lapic(vcpu); |
3856 | down_read(&vcpu->kvm->slots_lock); | ||
3178 | kvm_mmu_destroy(vcpu); | 3857 | kvm_mmu_destroy(vcpu); |
3858 | up_read(&vcpu->kvm->slots_lock); | ||
3179 | free_page((unsigned long)vcpu->arch.pio_data); | 3859 | free_page((unsigned long)vcpu->arch.pio_data); |
3180 | } | 3860 | } |
3181 | 3861 | ||
@@ -3219,10 +3899,13 @@ static void kvm_free_vcpus(struct kvm *kvm) | |||
3219 | 3899 | ||
3220 | void kvm_arch_destroy_vm(struct kvm *kvm) | 3900 | void kvm_arch_destroy_vm(struct kvm *kvm) |
3221 | { | 3901 | { |
3902 | kvm_free_pit(kvm); | ||
3222 | kfree(kvm->arch.vpic); | 3903 | kfree(kvm->arch.vpic); |
3223 | kfree(kvm->arch.vioapic); | 3904 | kfree(kvm->arch.vioapic); |
3224 | kvm_free_vcpus(kvm); | 3905 | kvm_free_vcpus(kvm); |
3225 | kvm_free_physmem(kvm); | 3906 | kvm_free_physmem(kvm); |
3907 | if (kvm->arch.apic_access_page) | ||
3908 | put_page(kvm->arch.apic_access_page); | ||
3226 | kfree(kvm); | 3909 | kfree(kvm); |
3227 | } | 3910 | } |
3228 | 3911 | ||
@@ -3278,8 +3961,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
3278 | 3961 | ||
3279 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | 3962 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) |
3280 | { | 3963 | { |
3281 | return vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE | 3964 | return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE |
3282 | || vcpu->arch.mp_state == VCPU_MP_STATE_SIPI_RECEIVED; | 3965 | || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED; |
3283 | } | 3966 | } |
3284 | 3967 | ||
3285 | static void vcpu_kick_intr(void *info) | 3968 | static void vcpu_kick_intr(void *info) |
@@ -3293,11 +3976,17 @@ static void vcpu_kick_intr(void *info) | |||
3293 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu) | 3976 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu) |
3294 | { | 3977 | { |
3295 | int ipi_pcpu = vcpu->cpu; | 3978 | int ipi_pcpu = vcpu->cpu; |
3979 | int cpu = get_cpu(); | ||
3296 | 3980 | ||
3297 | if (waitqueue_active(&vcpu->wq)) { | 3981 | if (waitqueue_active(&vcpu->wq)) { |
3298 | wake_up_interruptible(&vcpu->wq); | 3982 | wake_up_interruptible(&vcpu->wq); |
3299 | ++vcpu->stat.halt_wakeup; | 3983 | ++vcpu->stat.halt_wakeup; |
3300 | } | 3984 | } |
3301 | if (vcpu->guest_mode) | 3985 | /* |
3986 | * We may be called synchronously with irqs disabled in guest mode, | ||
3987 | * So need not to call smp_call_function_single() in that case. | ||
3988 | */ | ||
3989 | if (vcpu->guest_mode && vcpu->cpu != cpu) | ||
3302 | smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); | 3990 | smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); |
3991 | put_cpu(); | ||
3303 | } | 3992 | } |
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 79586003397a..2ca08386f993 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c | |||
@@ -65,6 +65,14 @@ | |||
65 | #define MemAbs (1<<9) /* Memory operand is absolute displacement */ | 65 | #define MemAbs (1<<9) /* Memory operand is absolute displacement */ |
66 | #define String (1<<10) /* String instruction (rep capable) */ | 66 | #define String (1<<10) /* String instruction (rep capable) */ |
67 | #define Stack (1<<11) /* Stack instruction (push/pop) */ | 67 | #define Stack (1<<11) /* Stack instruction (push/pop) */ |
68 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ | ||
69 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ | ||
70 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ | ||
71 | |||
72 | enum { | ||
73 | Group1_80, Group1_81, Group1_82, Group1_83, | ||
74 | Group1A, Group3_Byte, Group3, Group4, Group5, Group7, | ||
75 | }; | ||
68 | 76 | ||
69 | static u16 opcode_table[256] = { | 77 | static u16 opcode_table[256] = { |
70 | /* 0x00 - 0x07 */ | 78 | /* 0x00 - 0x07 */ |
@@ -123,14 +131,14 @@ static u16 opcode_table[256] = { | |||
123 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | 131 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, |
124 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | 132 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, |
125 | /* 0x80 - 0x87 */ | 133 | /* 0x80 - 0x87 */ |
126 | ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, | 134 | Group | Group1_80, Group | Group1_81, |
127 | ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, | 135 | Group | Group1_82, Group | Group1_83, |
128 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 136 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
129 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 137 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
130 | /* 0x88 - 0x8F */ | 138 | /* 0x88 - 0x8F */ |
131 | ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, | 139 | ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, |
132 | ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, | 140 | ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, |
133 | 0, ModRM | DstReg, 0, DstMem | SrcNone | ModRM | Mov | Stack, | 141 | 0, ModRM | DstReg, 0, Group | Group1A, |
134 | /* 0x90 - 0x9F */ | 142 | /* 0x90 - 0x9F */ |
135 | 0, 0, 0, 0, 0, 0, 0, 0, | 143 | 0, 0, 0, 0, 0, 0, 0, 0, |
136 | 0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, | 144 | 0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, |
@@ -164,16 +172,15 @@ static u16 opcode_table[256] = { | |||
164 | 0, 0, 0, 0, | 172 | 0, 0, 0, 0, |
165 | /* 0xF0 - 0xF7 */ | 173 | /* 0xF0 - 0xF7 */ |
166 | 0, 0, 0, 0, | 174 | 0, 0, 0, 0, |
167 | ImplicitOps, ImplicitOps, | 175 | ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3, |
168 | ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, | ||
169 | /* 0xF8 - 0xFF */ | 176 | /* 0xF8 - 0xFF */ |
170 | ImplicitOps, 0, ImplicitOps, ImplicitOps, | 177 | ImplicitOps, 0, ImplicitOps, ImplicitOps, |
171 | 0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM | 178 | 0, 0, Group | Group4, Group | Group5, |
172 | }; | 179 | }; |
173 | 180 | ||
174 | static u16 twobyte_table[256] = { | 181 | static u16 twobyte_table[256] = { |
175 | /* 0x00 - 0x0F */ | 182 | /* 0x00 - 0x0F */ |
176 | 0, SrcMem | ModRM | DstReg, 0, 0, 0, 0, ImplicitOps, 0, | 183 | 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0, |
177 | ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, | 184 | ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, |
178 | /* 0x10 - 0x1F */ | 185 | /* 0x10 - 0x1F */ |
179 | 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, | 186 | 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, |
@@ -229,6 +236,56 @@ static u16 twobyte_table[256] = { | |||
229 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | 236 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
230 | }; | 237 | }; |
231 | 238 | ||
239 | static u16 group_table[] = { | ||
240 | [Group1_80*8] = | ||
241 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | ||
242 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | ||
243 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | ||
244 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | ||
245 | [Group1_81*8] = | ||
246 | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, | ||
247 | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, | ||
248 | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, | ||
249 | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, | ||
250 | [Group1_82*8] = | ||
251 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | ||
252 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | ||
253 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | ||
254 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | ||
255 | [Group1_83*8] = | ||
256 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, | ||
257 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, | ||
258 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, | ||
259 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, | ||
260 | [Group1A*8] = | ||
261 | DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0, | ||
262 | [Group3_Byte*8] = | ||
263 | ByteOp | SrcImm | DstMem | ModRM, 0, | ||
264 | ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, | ||
265 | 0, 0, 0, 0, | ||
266 | [Group3*8] = | ||
267 | DstMem | SrcImm | ModRM | SrcImm, 0, | ||
268 | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, | ||
269 | 0, 0, 0, 0, | ||
270 | [Group4*8] = | ||
271 | ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, | ||
272 | 0, 0, 0, 0, 0, 0, | ||
273 | [Group5*8] = | ||
274 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, 0, 0, | ||
275 | SrcMem | ModRM, 0, SrcMem | ModRM | Stack, 0, | ||
276 | [Group7*8] = | ||
277 | 0, 0, ModRM | SrcMem, ModRM | SrcMem, | ||
278 | SrcNone | ModRM | DstMem | Mov, 0, | ||
279 | SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp, | ||
280 | }; | ||
281 | |||
282 | static u16 group2_table[] = { | ||
283 | [Group7*8] = | ||
284 | SrcNone | ModRM, 0, 0, 0, | ||
285 | SrcNone | ModRM | DstMem | Mov, 0, | ||
286 | SrcMem16 | ModRM | Mov, 0, | ||
287 | }; | ||
288 | |||
232 | /* EFLAGS bit definitions. */ | 289 | /* EFLAGS bit definitions. */ |
233 | #define EFLG_OF (1<<11) | 290 | #define EFLG_OF (1<<11) |
234 | #define EFLG_DF (1<<10) | 291 | #define EFLG_DF (1<<10) |
@@ -317,7 +374,7 @@ static u16 twobyte_table[256] = { | |||
317 | 374 | ||
318 | #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ | 375 | #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ |
319 | do { \ | 376 | do { \ |
320 | unsigned long _tmp; \ | 377 | unsigned long __tmp; \ |
321 | switch ((_dst).bytes) { \ | 378 | switch ((_dst).bytes) { \ |
322 | case 1: \ | 379 | case 1: \ |
323 | __asm__ __volatile__ ( \ | 380 | __asm__ __volatile__ ( \ |
@@ -325,7 +382,7 @@ static u16 twobyte_table[256] = { | |||
325 | _op"b %"_bx"3,%1; " \ | 382 | _op"b %"_bx"3,%1; " \ |
326 | _POST_EFLAGS("0", "4", "2") \ | 383 | _POST_EFLAGS("0", "4", "2") \ |
327 | : "=m" (_eflags), "=m" ((_dst).val), \ | 384 | : "=m" (_eflags), "=m" ((_dst).val), \ |
328 | "=&r" (_tmp) \ | 385 | "=&r" (__tmp) \ |
329 | : _by ((_src).val), "i" (EFLAGS_MASK)); \ | 386 | : _by ((_src).val), "i" (EFLAGS_MASK)); \ |
330 | break; \ | 387 | break; \ |
331 | default: \ | 388 | default: \ |
@@ -426,29 +483,40 @@ static u16 twobyte_table[256] = { | |||
426 | (_type)_x; \ | 483 | (_type)_x; \ |
427 | }) | 484 | }) |
428 | 485 | ||
486 | static inline unsigned long ad_mask(struct decode_cache *c) | ||
487 | { | ||
488 | return (1UL << (c->ad_bytes << 3)) - 1; | ||
489 | } | ||
490 | |||
429 | /* Access/update address held in a register, based on addressing mode. */ | 491 | /* Access/update address held in a register, based on addressing mode. */ |
430 | #define address_mask(reg) \ | 492 | static inline unsigned long |
431 | ((c->ad_bytes == sizeof(unsigned long)) ? \ | 493 | address_mask(struct decode_cache *c, unsigned long reg) |
432 | (reg) : ((reg) & ((1UL << (c->ad_bytes << 3)) - 1))) | 494 | { |
433 | #define register_address(base, reg) \ | 495 | if (c->ad_bytes == sizeof(unsigned long)) |
434 | ((base) + address_mask(reg)) | 496 | return reg; |
435 | #define register_address_increment(reg, inc) \ | 497 | else |
436 | do { \ | 498 | return reg & ad_mask(c); |
437 | /* signed type ensures sign extension to long */ \ | 499 | } |
438 | int _inc = (inc); \ | ||
439 | if (c->ad_bytes == sizeof(unsigned long)) \ | ||
440 | (reg) += _inc; \ | ||
441 | else \ | ||
442 | (reg) = ((reg) & \ | ||
443 | ~((1UL << (c->ad_bytes << 3)) - 1)) | \ | ||
444 | (((reg) + _inc) & \ | ||
445 | ((1UL << (c->ad_bytes << 3)) - 1)); \ | ||
446 | } while (0) | ||
447 | 500 | ||
448 | #define JMP_REL(rel) \ | 501 | static inline unsigned long |
449 | do { \ | 502 | register_address(struct decode_cache *c, unsigned long base, unsigned long reg) |
450 | register_address_increment(c->eip, rel); \ | 503 | { |
451 | } while (0) | 504 | return base + address_mask(c, reg); |
505 | } | ||
506 | |||
507 | static inline void | ||
508 | register_address_increment(struct decode_cache *c, unsigned long *reg, int inc) | ||
509 | { | ||
510 | if (c->ad_bytes == sizeof(unsigned long)) | ||
511 | *reg += inc; | ||
512 | else | ||
513 | *reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c)); | ||
514 | } | ||
515 | |||
516 | static inline void jmp_rel(struct decode_cache *c, int rel) | ||
517 | { | ||
518 | register_address_increment(c, &c->eip, rel); | ||
519 | } | ||
452 | 520 | ||
453 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | 521 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, |
454 | struct x86_emulate_ops *ops, | 522 | struct x86_emulate_ops *ops, |
@@ -763,7 +831,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
763 | struct decode_cache *c = &ctxt->decode; | 831 | struct decode_cache *c = &ctxt->decode; |
764 | int rc = 0; | 832 | int rc = 0; |
765 | int mode = ctxt->mode; | 833 | int mode = ctxt->mode; |
766 | int def_op_bytes, def_ad_bytes; | 834 | int def_op_bytes, def_ad_bytes, group; |
767 | 835 | ||
768 | /* Shadow copy of register state. Committed on successful emulation. */ | 836 | /* Shadow copy of register state. Committed on successful emulation. */ |
769 | 837 | ||
@@ -864,12 +932,24 @@ done_prefixes: | |||
864 | c->b = insn_fetch(u8, 1, c->eip); | 932 | c->b = insn_fetch(u8, 1, c->eip); |
865 | c->d = twobyte_table[c->b]; | 933 | c->d = twobyte_table[c->b]; |
866 | } | 934 | } |
935 | } | ||
867 | 936 | ||
868 | /* Unrecognised? */ | 937 | if (c->d & Group) { |
869 | if (c->d == 0) { | 938 | group = c->d & GroupMask; |
870 | DPRINTF("Cannot emulate %02x\n", c->b); | 939 | c->modrm = insn_fetch(u8, 1, c->eip); |
871 | return -1; | 940 | --c->eip; |
872 | } | 941 | |
942 | group = (group << 3) + ((c->modrm >> 3) & 7); | ||
943 | if ((c->d & GroupDual) && (c->modrm >> 6) == 3) | ||
944 | c->d = group2_table[group]; | ||
945 | else | ||
946 | c->d = group_table[group]; | ||
947 | } | ||
948 | |||
949 | /* Unrecognised? */ | ||
950 | if (c->d == 0) { | ||
951 | DPRINTF("Cannot emulate %02x\n", c->b); | ||
952 | return -1; | ||
873 | } | 953 | } |
874 | 954 | ||
875 | if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack)) | 955 | if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack)) |
@@ -924,6 +1004,7 @@ done_prefixes: | |||
924 | */ | 1004 | */ |
925 | if ((c->d & ModRM) && c->modrm_mod == 3) { | 1005 | if ((c->d & ModRM) && c->modrm_mod == 3) { |
926 | c->src.type = OP_REG; | 1006 | c->src.type = OP_REG; |
1007 | c->src.val = c->modrm_val; | ||
927 | break; | 1008 | break; |
928 | } | 1009 | } |
929 | c->src.type = OP_MEM; | 1010 | c->src.type = OP_MEM; |
@@ -967,6 +1048,7 @@ done_prefixes: | |||
967 | case DstMem: | 1048 | case DstMem: |
968 | if ((c->d & ModRM) && c->modrm_mod == 3) { | 1049 | if ((c->d & ModRM) && c->modrm_mod == 3) { |
969 | c->dst.type = OP_REG; | 1050 | c->dst.type = OP_REG; |
1051 | c->dst.val = c->dst.orig_val = c->modrm_val; | ||
970 | break; | 1052 | break; |
971 | } | 1053 | } |
972 | c->dst.type = OP_MEM; | 1054 | c->dst.type = OP_MEM; |
@@ -984,8 +1066,8 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt) | |||
984 | c->dst.type = OP_MEM; | 1066 | c->dst.type = OP_MEM; |
985 | c->dst.bytes = c->op_bytes; | 1067 | c->dst.bytes = c->op_bytes; |
986 | c->dst.val = c->src.val; | 1068 | c->dst.val = c->src.val; |
987 | register_address_increment(c->regs[VCPU_REGS_RSP], -c->op_bytes); | 1069 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); |
988 | c->dst.ptr = (void *) register_address(ctxt->ss_base, | 1070 | c->dst.ptr = (void *) register_address(c, ctxt->ss_base, |
989 | c->regs[VCPU_REGS_RSP]); | 1071 | c->regs[VCPU_REGS_RSP]); |
990 | } | 1072 | } |
991 | 1073 | ||
@@ -995,13 +1077,13 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, | |||
995 | struct decode_cache *c = &ctxt->decode; | 1077 | struct decode_cache *c = &ctxt->decode; |
996 | int rc; | 1078 | int rc; |
997 | 1079 | ||
998 | rc = ops->read_std(register_address(ctxt->ss_base, | 1080 | rc = ops->read_std(register_address(c, ctxt->ss_base, |
999 | c->regs[VCPU_REGS_RSP]), | 1081 | c->regs[VCPU_REGS_RSP]), |
1000 | &c->dst.val, c->dst.bytes, ctxt->vcpu); | 1082 | &c->dst.val, c->dst.bytes, ctxt->vcpu); |
1001 | if (rc != 0) | 1083 | if (rc != 0) |
1002 | return rc; | 1084 | return rc; |
1003 | 1085 | ||
1004 | register_address_increment(c->regs[VCPU_REGS_RSP], c->dst.bytes); | 1086 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes); |
1005 | 1087 | ||
1006 | return 0; | 1088 | return 0; |
1007 | } | 1089 | } |
@@ -1043,26 +1125,6 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, | |||
1043 | 1125 | ||
1044 | switch (c->modrm_reg) { | 1126 | switch (c->modrm_reg) { |
1045 | case 0 ... 1: /* test */ | 1127 | case 0 ... 1: /* test */ |
1046 | /* | ||
1047 | * Special case in Grp3: test has an immediate | ||
1048 | * source operand. | ||
1049 | */ | ||
1050 | c->src.type = OP_IMM; | ||
1051 | c->src.ptr = (unsigned long *)c->eip; | ||
1052 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
1053 | if (c->src.bytes == 8) | ||
1054 | c->src.bytes = 4; | ||
1055 | switch (c->src.bytes) { | ||
1056 | case 1: | ||
1057 | c->src.val = insn_fetch(s8, 1, c->eip); | ||
1058 | break; | ||
1059 | case 2: | ||
1060 | c->src.val = insn_fetch(s16, 2, c->eip); | ||
1061 | break; | ||
1062 | case 4: | ||
1063 | c->src.val = insn_fetch(s32, 4, c->eip); | ||
1064 | break; | ||
1065 | } | ||
1066 | emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); | 1128 | emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); |
1067 | break; | 1129 | break; |
1068 | case 2: /* not */ | 1130 | case 2: /* not */ |
@@ -1076,7 +1138,6 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, | |||
1076 | rc = X86EMUL_UNHANDLEABLE; | 1138 | rc = X86EMUL_UNHANDLEABLE; |
1077 | break; | 1139 | break; |
1078 | } | 1140 | } |
1079 | done: | ||
1080 | return rc; | 1141 | return rc; |
1081 | } | 1142 | } |
1082 | 1143 | ||
@@ -1084,7 +1145,6 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | |||
1084 | struct x86_emulate_ops *ops) | 1145 | struct x86_emulate_ops *ops) |
1085 | { | 1146 | { |
1086 | struct decode_cache *c = &ctxt->decode; | 1147 | struct decode_cache *c = &ctxt->decode; |
1087 | int rc; | ||
1088 | 1148 | ||
1089 | switch (c->modrm_reg) { | 1149 | switch (c->modrm_reg) { |
1090 | case 0: /* inc */ | 1150 | case 0: /* inc */ |
@@ -1094,36 +1154,11 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | |||
1094 | emulate_1op("dec", c->dst, ctxt->eflags); | 1154 | emulate_1op("dec", c->dst, ctxt->eflags); |
1095 | break; | 1155 | break; |
1096 | case 4: /* jmp abs */ | 1156 | case 4: /* jmp abs */ |
1097 | if (c->b == 0xff) | 1157 | c->eip = c->src.val; |
1098 | c->eip = c->dst.val; | ||
1099 | else { | ||
1100 | DPRINTF("Cannot emulate %02x\n", c->b); | ||
1101 | return X86EMUL_UNHANDLEABLE; | ||
1102 | } | ||
1103 | break; | 1158 | break; |
1104 | case 6: /* push */ | 1159 | case 6: /* push */ |
1105 | 1160 | emulate_push(ctxt); | |
1106 | /* 64-bit mode: PUSH always pushes a 64-bit operand. */ | ||
1107 | |||
1108 | if (ctxt->mode == X86EMUL_MODE_PROT64) { | ||
1109 | c->dst.bytes = 8; | ||
1110 | rc = ops->read_std((unsigned long)c->dst.ptr, | ||
1111 | &c->dst.val, 8, ctxt->vcpu); | ||
1112 | if (rc != 0) | ||
1113 | return rc; | ||
1114 | } | ||
1115 | register_address_increment(c->regs[VCPU_REGS_RSP], | ||
1116 | -c->dst.bytes); | ||
1117 | rc = ops->write_emulated(register_address(ctxt->ss_base, | ||
1118 | c->regs[VCPU_REGS_RSP]), &c->dst.val, | ||
1119 | c->dst.bytes, ctxt->vcpu); | ||
1120 | if (rc != 0) | ||
1121 | return rc; | ||
1122 | c->dst.type = OP_NONE; | ||
1123 | break; | 1161 | break; |
1124 | default: | ||
1125 | DPRINTF("Cannot emulate %02x\n", c->b); | ||
1126 | return X86EMUL_UNHANDLEABLE; | ||
1127 | } | 1162 | } |
1128 | return 0; | 1163 | return 0; |
1129 | } | 1164 | } |
@@ -1361,19 +1396,19 @@ special_insn: | |||
1361 | c->dst.type = OP_MEM; | 1396 | c->dst.type = OP_MEM; |
1362 | c->dst.bytes = c->op_bytes; | 1397 | c->dst.bytes = c->op_bytes; |
1363 | c->dst.val = c->src.val; | 1398 | c->dst.val = c->src.val; |
1364 | register_address_increment(c->regs[VCPU_REGS_RSP], | 1399 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], |
1365 | -c->op_bytes); | 1400 | -c->op_bytes); |
1366 | c->dst.ptr = (void *) register_address( | 1401 | c->dst.ptr = (void *) register_address( |
1367 | ctxt->ss_base, c->regs[VCPU_REGS_RSP]); | 1402 | c, ctxt->ss_base, c->regs[VCPU_REGS_RSP]); |
1368 | break; | 1403 | break; |
1369 | case 0x58 ... 0x5f: /* pop reg */ | 1404 | case 0x58 ... 0x5f: /* pop reg */ |
1370 | pop_instruction: | 1405 | pop_instruction: |
1371 | if ((rc = ops->read_std(register_address(ctxt->ss_base, | 1406 | if ((rc = ops->read_std(register_address(c, ctxt->ss_base, |
1372 | c->regs[VCPU_REGS_RSP]), c->dst.ptr, | 1407 | c->regs[VCPU_REGS_RSP]), c->dst.ptr, |
1373 | c->op_bytes, ctxt->vcpu)) != 0) | 1408 | c->op_bytes, ctxt->vcpu)) != 0) |
1374 | goto done; | 1409 | goto done; |
1375 | 1410 | ||
1376 | register_address_increment(c->regs[VCPU_REGS_RSP], | 1411 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], |
1377 | c->op_bytes); | 1412 | c->op_bytes); |
1378 | c->dst.type = OP_NONE; /* Disable writeback. */ | 1413 | c->dst.type = OP_NONE; /* Disable writeback. */ |
1379 | break; | 1414 | break; |
@@ -1393,9 +1428,9 @@ special_insn: | |||
1393 | 1, | 1428 | 1, |
1394 | (c->d & ByteOp) ? 1 : c->op_bytes, | 1429 | (c->d & ByteOp) ? 1 : c->op_bytes, |
1395 | c->rep_prefix ? | 1430 | c->rep_prefix ? |
1396 | address_mask(c->regs[VCPU_REGS_RCX]) : 1, | 1431 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, |
1397 | (ctxt->eflags & EFLG_DF), | 1432 | (ctxt->eflags & EFLG_DF), |
1398 | register_address(ctxt->es_base, | 1433 | register_address(c, ctxt->es_base, |
1399 | c->regs[VCPU_REGS_RDI]), | 1434 | c->regs[VCPU_REGS_RDI]), |
1400 | c->rep_prefix, | 1435 | c->rep_prefix, |
1401 | c->regs[VCPU_REGS_RDX]) == 0) { | 1436 | c->regs[VCPU_REGS_RDX]) == 0) { |
@@ -1409,9 +1444,9 @@ special_insn: | |||
1409 | 0, | 1444 | 0, |
1410 | (c->d & ByteOp) ? 1 : c->op_bytes, | 1445 | (c->d & ByteOp) ? 1 : c->op_bytes, |
1411 | c->rep_prefix ? | 1446 | c->rep_prefix ? |
1412 | address_mask(c->regs[VCPU_REGS_RCX]) : 1, | 1447 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, |
1413 | (ctxt->eflags & EFLG_DF), | 1448 | (ctxt->eflags & EFLG_DF), |
1414 | register_address(c->override_base ? | 1449 | register_address(c, c->override_base ? |
1415 | *c->override_base : | 1450 | *c->override_base : |
1416 | ctxt->ds_base, | 1451 | ctxt->ds_base, |
1417 | c->regs[VCPU_REGS_RSI]), | 1452 | c->regs[VCPU_REGS_RSI]), |
@@ -1425,7 +1460,7 @@ special_insn: | |||
1425 | int rel = insn_fetch(s8, 1, c->eip); | 1460 | int rel = insn_fetch(s8, 1, c->eip); |
1426 | 1461 | ||
1427 | if (test_cc(c->b, ctxt->eflags)) | 1462 | if (test_cc(c->b, ctxt->eflags)) |
1428 | JMP_REL(rel); | 1463 | jmp_rel(c, rel); |
1429 | break; | 1464 | break; |
1430 | } | 1465 | } |
1431 | case 0x80 ... 0x83: /* Grp1 */ | 1466 | case 0x80 ... 0x83: /* Grp1 */ |
@@ -1477,7 +1512,7 @@ special_insn: | |||
1477 | case 0x88 ... 0x8b: /* mov */ | 1512 | case 0x88 ... 0x8b: /* mov */ |
1478 | goto mov; | 1513 | goto mov; |
1479 | case 0x8d: /* lea r16/r32, m */ | 1514 | case 0x8d: /* lea r16/r32, m */ |
1480 | c->dst.val = c->modrm_val; | 1515 | c->dst.val = c->modrm_ea; |
1481 | break; | 1516 | break; |
1482 | case 0x8f: /* pop (sole member of Grp1a) */ | 1517 | case 0x8f: /* pop (sole member of Grp1a) */ |
1483 | rc = emulate_grp1a(ctxt, ops); | 1518 | rc = emulate_grp1a(ctxt, ops); |
@@ -1501,27 +1536,27 @@ special_insn: | |||
1501 | case 0xa4 ... 0xa5: /* movs */ | 1536 | case 0xa4 ... 0xa5: /* movs */ |
1502 | c->dst.type = OP_MEM; | 1537 | c->dst.type = OP_MEM; |
1503 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1538 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1504 | c->dst.ptr = (unsigned long *)register_address( | 1539 | c->dst.ptr = (unsigned long *)register_address(c, |
1505 | ctxt->es_base, | 1540 | ctxt->es_base, |
1506 | c->regs[VCPU_REGS_RDI]); | 1541 | c->regs[VCPU_REGS_RDI]); |
1507 | if ((rc = ops->read_emulated(register_address( | 1542 | if ((rc = ops->read_emulated(register_address(c, |
1508 | c->override_base ? *c->override_base : | 1543 | c->override_base ? *c->override_base : |
1509 | ctxt->ds_base, | 1544 | ctxt->ds_base, |
1510 | c->regs[VCPU_REGS_RSI]), | 1545 | c->regs[VCPU_REGS_RSI]), |
1511 | &c->dst.val, | 1546 | &c->dst.val, |
1512 | c->dst.bytes, ctxt->vcpu)) != 0) | 1547 | c->dst.bytes, ctxt->vcpu)) != 0) |
1513 | goto done; | 1548 | goto done; |
1514 | register_address_increment(c->regs[VCPU_REGS_RSI], | 1549 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], |
1515 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | 1550 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes |
1516 | : c->dst.bytes); | 1551 | : c->dst.bytes); |
1517 | register_address_increment(c->regs[VCPU_REGS_RDI], | 1552 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], |
1518 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | 1553 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes |
1519 | : c->dst.bytes); | 1554 | : c->dst.bytes); |
1520 | break; | 1555 | break; |
1521 | case 0xa6 ... 0xa7: /* cmps */ | 1556 | case 0xa6 ... 0xa7: /* cmps */ |
1522 | c->src.type = OP_NONE; /* Disable writeback. */ | 1557 | c->src.type = OP_NONE; /* Disable writeback. */ |
1523 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1558 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1524 | c->src.ptr = (unsigned long *)register_address( | 1559 | c->src.ptr = (unsigned long *)register_address(c, |
1525 | c->override_base ? *c->override_base : | 1560 | c->override_base ? *c->override_base : |
1526 | ctxt->ds_base, | 1561 | ctxt->ds_base, |
1527 | c->regs[VCPU_REGS_RSI]); | 1562 | c->regs[VCPU_REGS_RSI]); |
@@ -1533,7 +1568,7 @@ special_insn: | |||
1533 | 1568 | ||
1534 | c->dst.type = OP_NONE; /* Disable writeback. */ | 1569 | c->dst.type = OP_NONE; /* Disable writeback. */ |
1535 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1570 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1536 | c->dst.ptr = (unsigned long *)register_address( | 1571 | c->dst.ptr = (unsigned long *)register_address(c, |
1537 | ctxt->es_base, | 1572 | ctxt->es_base, |
1538 | c->regs[VCPU_REGS_RDI]); | 1573 | c->regs[VCPU_REGS_RDI]); |
1539 | if ((rc = ops->read_emulated((unsigned long)c->dst.ptr, | 1574 | if ((rc = ops->read_emulated((unsigned long)c->dst.ptr, |
@@ -1546,10 +1581,10 @@ special_insn: | |||
1546 | 1581 | ||
1547 | emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); | 1582 | emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); |
1548 | 1583 | ||
1549 | register_address_increment(c->regs[VCPU_REGS_RSI], | 1584 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], |
1550 | (ctxt->eflags & EFLG_DF) ? -c->src.bytes | 1585 | (ctxt->eflags & EFLG_DF) ? -c->src.bytes |
1551 | : c->src.bytes); | 1586 | : c->src.bytes); |
1552 | register_address_increment(c->regs[VCPU_REGS_RDI], | 1587 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], |
1553 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | 1588 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes |
1554 | : c->dst.bytes); | 1589 | : c->dst.bytes); |
1555 | 1590 | ||
@@ -1557,11 +1592,11 @@ special_insn: | |||
1557 | case 0xaa ... 0xab: /* stos */ | 1592 | case 0xaa ... 0xab: /* stos */ |
1558 | c->dst.type = OP_MEM; | 1593 | c->dst.type = OP_MEM; |
1559 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1594 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1560 | c->dst.ptr = (unsigned long *)register_address( | 1595 | c->dst.ptr = (unsigned long *)register_address(c, |
1561 | ctxt->es_base, | 1596 | ctxt->es_base, |
1562 | c->regs[VCPU_REGS_RDI]); | 1597 | c->regs[VCPU_REGS_RDI]); |
1563 | c->dst.val = c->regs[VCPU_REGS_RAX]; | 1598 | c->dst.val = c->regs[VCPU_REGS_RAX]; |
1564 | register_address_increment(c->regs[VCPU_REGS_RDI], | 1599 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], |
1565 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | 1600 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes |
1566 | : c->dst.bytes); | 1601 | : c->dst.bytes); |
1567 | break; | 1602 | break; |
@@ -1569,7 +1604,7 @@ special_insn: | |||
1569 | c->dst.type = OP_REG; | 1604 | c->dst.type = OP_REG; |
1570 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1605 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1571 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; | 1606 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; |
1572 | if ((rc = ops->read_emulated(register_address( | 1607 | if ((rc = ops->read_emulated(register_address(c, |
1573 | c->override_base ? *c->override_base : | 1608 | c->override_base ? *c->override_base : |
1574 | ctxt->ds_base, | 1609 | ctxt->ds_base, |
1575 | c->regs[VCPU_REGS_RSI]), | 1610 | c->regs[VCPU_REGS_RSI]), |
@@ -1577,7 +1612,7 @@ special_insn: | |||
1577 | c->dst.bytes, | 1612 | c->dst.bytes, |
1578 | ctxt->vcpu)) != 0) | 1613 | ctxt->vcpu)) != 0) |
1579 | goto done; | 1614 | goto done; |
1580 | register_address_increment(c->regs[VCPU_REGS_RSI], | 1615 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], |
1581 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | 1616 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes |
1582 | : c->dst.bytes); | 1617 | : c->dst.bytes); |
1583 | break; | 1618 | break; |
@@ -1616,14 +1651,14 @@ special_insn: | |||
1616 | goto cannot_emulate; | 1651 | goto cannot_emulate; |
1617 | } | 1652 | } |
1618 | c->src.val = (unsigned long) c->eip; | 1653 | c->src.val = (unsigned long) c->eip; |
1619 | JMP_REL(rel); | 1654 | jmp_rel(c, rel); |
1620 | c->op_bytes = c->ad_bytes; | 1655 | c->op_bytes = c->ad_bytes; |
1621 | emulate_push(ctxt); | 1656 | emulate_push(ctxt); |
1622 | break; | 1657 | break; |
1623 | } | 1658 | } |
1624 | case 0xe9: /* jmp rel */ | 1659 | case 0xe9: /* jmp rel */ |
1625 | case 0xeb: /* jmp rel short */ | 1660 | case 0xeb: /* jmp rel short */ |
1626 | JMP_REL(c->src.val); | 1661 | jmp_rel(c, c->src.val); |
1627 | c->dst.type = OP_NONE; /* Disable writeback. */ | 1662 | c->dst.type = OP_NONE; /* Disable writeback. */ |
1628 | break; | 1663 | break; |
1629 | case 0xf4: /* hlt */ | 1664 | case 0xf4: /* hlt */ |
@@ -1690,6 +1725,8 @@ twobyte_insn: | |||
1690 | goto done; | 1725 | goto done; |
1691 | 1726 | ||
1692 | kvm_emulate_hypercall(ctxt->vcpu); | 1727 | kvm_emulate_hypercall(ctxt->vcpu); |
1728 | /* Disable writeback. */ | ||
1729 | c->dst.type = OP_NONE; | ||
1693 | break; | 1730 | break; |
1694 | case 2: /* lgdt */ | 1731 | case 2: /* lgdt */ |
1695 | rc = read_descriptor(ctxt, ops, c->src.ptr, | 1732 | rc = read_descriptor(ctxt, ops, c->src.ptr, |
@@ -1697,6 +1734,8 @@ twobyte_insn: | |||
1697 | if (rc) | 1734 | if (rc) |
1698 | goto done; | 1735 | goto done; |
1699 | realmode_lgdt(ctxt->vcpu, size, address); | 1736 | realmode_lgdt(ctxt->vcpu, size, address); |
1737 | /* Disable writeback. */ | ||
1738 | c->dst.type = OP_NONE; | ||
1700 | break; | 1739 | break; |
1701 | case 3: /* lidt/vmmcall */ | 1740 | case 3: /* lidt/vmmcall */ |
1702 | if (c->modrm_mod == 3 && c->modrm_rm == 1) { | 1741 | if (c->modrm_mod == 3 && c->modrm_rm == 1) { |
@@ -1712,27 +1751,25 @@ twobyte_insn: | |||
1712 | goto done; | 1751 | goto done; |
1713 | realmode_lidt(ctxt->vcpu, size, address); | 1752 | realmode_lidt(ctxt->vcpu, size, address); |
1714 | } | 1753 | } |
1754 | /* Disable writeback. */ | ||
1755 | c->dst.type = OP_NONE; | ||
1715 | break; | 1756 | break; |
1716 | case 4: /* smsw */ | 1757 | case 4: /* smsw */ |
1717 | if (c->modrm_mod != 3) | 1758 | c->dst.bytes = 2; |
1718 | goto cannot_emulate; | 1759 | c->dst.val = realmode_get_cr(ctxt->vcpu, 0); |
1719 | *(u16 *)&c->regs[c->modrm_rm] | ||
1720 | = realmode_get_cr(ctxt->vcpu, 0); | ||
1721 | break; | 1760 | break; |
1722 | case 6: /* lmsw */ | 1761 | case 6: /* lmsw */ |
1723 | if (c->modrm_mod != 3) | 1762 | realmode_lmsw(ctxt->vcpu, (u16)c->src.val, |
1724 | goto cannot_emulate; | 1763 | &ctxt->eflags); |
1725 | realmode_lmsw(ctxt->vcpu, (u16)c->modrm_val, | ||
1726 | &ctxt->eflags); | ||
1727 | break; | 1764 | break; |
1728 | case 7: /* invlpg*/ | 1765 | case 7: /* invlpg*/ |
1729 | emulate_invlpg(ctxt->vcpu, memop); | 1766 | emulate_invlpg(ctxt->vcpu, memop); |
1767 | /* Disable writeback. */ | ||
1768 | c->dst.type = OP_NONE; | ||
1730 | break; | 1769 | break; |
1731 | default: | 1770 | default: |
1732 | goto cannot_emulate; | 1771 | goto cannot_emulate; |
1733 | } | 1772 | } |
1734 | /* Disable writeback. */ | ||
1735 | c->dst.type = OP_NONE; | ||
1736 | break; | 1773 | break; |
1737 | case 0x06: | 1774 | case 0x06: |
1738 | emulate_clts(ctxt->vcpu); | 1775 | emulate_clts(ctxt->vcpu); |
@@ -1823,7 +1860,7 @@ twobyte_insn: | |||
1823 | goto cannot_emulate; | 1860 | goto cannot_emulate; |
1824 | } | 1861 | } |
1825 | if (test_cc(c->b, ctxt->eflags)) | 1862 | if (test_cc(c->b, ctxt->eflags)) |
1826 | JMP_REL(rel); | 1863 | jmp_rel(c, rel); |
1827 | c->dst.type = OP_NONE; | 1864 | c->dst.type = OP_NONE; |
1828 | break; | 1865 | break; |
1829 | } | 1866 | } |
diff --git a/drivers/s390/Makefile b/drivers/s390/Makefile index 5a888704a8d0..4f4e7cf105d4 100644 --- a/drivers/s390/Makefile +++ b/drivers/s390/Makefile | |||
@@ -5,7 +5,7 @@ | |||
5 | CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w | 5 | CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w |
6 | 6 | ||
7 | obj-y += s390mach.o sysinfo.o s390_rdev.o | 7 | obj-y += s390mach.o sysinfo.o s390_rdev.o |
8 | obj-y += cio/ block/ char/ crypto/ net/ scsi/ | 8 | obj-y += cio/ block/ char/ crypto/ net/ scsi/ kvm/ |
9 | 9 | ||
10 | drivers-y += drivers/s390/built-in.o | 10 | drivers-y += drivers/s390/built-in.o |
11 | 11 | ||
diff --git a/drivers/s390/kvm/Makefile b/drivers/s390/kvm/Makefile new file mode 100644 index 000000000000..4a5ec39f9ca6 --- /dev/null +++ b/drivers/s390/kvm/Makefile | |||
@@ -0,0 +1,9 @@ | |||
1 | # Makefile for kvm guest drivers on s390 | ||
2 | # | ||
3 | # Copyright IBM Corp. 2008 | ||
4 | # | ||
5 | # This program is free software; you can redistribute it and/or modify | ||
6 | # it under the terms of the GNU General Public License (version 2 only) | ||
7 | # as published by the Free Software Foundation. | ||
8 | |||
9 | obj-$(CONFIG_VIRTIO) += kvm_virtio.o | ||
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c new file mode 100644 index 000000000000..bbef3764fbf8 --- /dev/null +++ b/drivers/s390/kvm/kvm_virtio.c | |||
@@ -0,0 +1,338 @@ | |||
1 | /* | ||
2 | * kvm_virtio.c - virtio for kvm on s390 | ||
3 | * | ||
4 | * Copyright IBM Corp. 2008 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License (version 2 only) | ||
8 | * as published by the Free Software Foundation. | ||
9 | * | ||
10 | * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> | ||
11 | */ | ||
12 | |||
13 | #include <linux/init.h> | ||
14 | #include <linux/bootmem.h> | ||
15 | #include <linux/err.h> | ||
16 | #include <linux/virtio.h> | ||
17 | #include <linux/virtio_config.h> | ||
18 | #include <linux/interrupt.h> | ||
19 | #include <linux/virtio_ring.h> | ||
20 | #include <asm/io.h> | ||
21 | #include <asm/kvm_para.h> | ||
22 | #include <asm/kvm_virtio.h> | ||
23 | #include <asm/setup.h> | ||
24 | #include <asm/s390_ext.h> | ||
25 | |||
26 | #define VIRTIO_SUBCODE_64 0x0D00 | ||
27 | |||
28 | /* | ||
29 | * The pointer to our (page) of device descriptions. | ||
30 | */ | ||
31 | static void *kvm_devices; | ||
32 | |||
33 | /* | ||
34 | * Unique numbering for kvm devices. | ||
35 | */ | ||
36 | static unsigned int dev_index; | ||
37 | |||
38 | struct kvm_device { | ||
39 | struct virtio_device vdev; | ||
40 | struct kvm_device_desc *desc; | ||
41 | }; | ||
42 | |||
43 | #define to_kvmdev(vd) container_of(vd, struct kvm_device, vdev) | ||
44 | |||
45 | /* | ||
46 | * memory layout: | ||
47 | * - kvm_device_descriptor | ||
48 | * struct kvm_device_desc | ||
49 | * - configuration | ||
50 | * struct kvm_vqconfig | ||
51 | * - feature bits | ||
52 | * - config space | ||
53 | */ | ||
54 | static struct kvm_vqconfig *kvm_vq_config(const struct kvm_device_desc *desc) | ||
55 | { | ||
56 | return (struct kvm_vqconfig *)(desc + 1); | ||
57 | } | ||
58 | |||
59 | static u8 *kvm_vq_features(const struct kvm_device_desc *desc) | ||
60 | { | ||
61 | return (u8 *)(kvm_vq_config(desc) + desc->num_vq); | ||
62 | } | ||
63 | |||
64 | static u8 *kvm_vq_configspace(const struct kvm_device_desc *desc) | ||
65 | { | ||
66 | return kvm_vq_features(desc) + desc->feature_len * 2; | ||
67 | } | ||
68 | |||
69 | /* | ||
70 | * The total size of the config page used by this device (incl. desc) | ||
71 | */ | ||
72 | static unsigned desc_size(const struct kvm_device_desc *desc) | ||
73 | { | ||
74 | return sizeof(*desc) | ||
75 | + desc->num_vq * sizeof(struct kvm_vqconfig) | ||
76 | + desc->feature_len * 2 | ||
77 | + desc->config_len; | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * This tests (and acknowleges) a feature bit. | ||
82 | */ | ||
83 | static bool kvm_feature(struct virtio_device *vdev, unsigned fbit) | ||
84 | { | ||
85 | struct kvm_device_desc *desc = to_kvmdev(vdev)->desc; | ||
86 | u8 *features; | ||
87 | |||
88 | if (fbit / 8 > desc->feature_len) | ||
89 | return false; | ||
90 | |||
91 | features = kvm_vq_features(desc); | ||
92 | if (!(features[fbit / 8] & (1 << (fbit % 8)))) | ||
93 | return false; | ||
94 | |||
95 | /* | ||
96 | * We set the matching bit in the other half of the bitmap to tell the | ||
97 | * Host we want to use this feature. | ||
98 | */ | ||
99 | features[desc->feature_len + fbit / 8] |= (1 << (fbit % 8)); | ||
100 | return true; | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * Reading and writing elements in config space | ||
105 | */ | ||
106 | static void kvm_get(struct virtio_device *vdev, unsigned int offset, | ||
107 | void *buf, unsigned len) | ||
108 | { | ||
109 | struct kvm_device_desc *desc = to_kvmdev(vdev)->desc; | ||
110 | |||
111 | BUG_ON(offset + len > desc->config_len); | ||
112 | memcpy(buf, kvm_vq_configspace(desc) + offset, len); | ||
113 | } | ||
114 | |||
115 | static void kvm_set(struct virtio_device *vdev, unsigned int offset, | ||
116 | const void *buf, unsigned len) | ||
117 | { | ||
118 | struct kvm_device_desc *desc = to_kvmdev(vdev)->desc; | ||
119 | |||
120 | BUG_ON(offset + len > desc->config_len); | ||
121 | memcpy(kvm_vq_configspace(desc) + offset, buf, len); | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * The operations to get and set the status word just access | ||
126 | * the status field of the device descriptor. set_status will also | ||
127 | * make a hypercall to the host, to tell about status changes | ||
128 | */ | ||
129 | static u8 kvm_get_status(struct virtio_device *vdev) | ||
130 | { | ||
131 | return to_kvmdev(vdev)->desc->status; | ||
132 | } | ||
133 | |||
134 | static void kvm_set_status(struct virtio_device *vdev, u8 status) | ||
135 | { | ||
136 | BUG_ON(!status); | ||
137 | to_kvmdev(vdev)->desc->status = status; | ||
138 | kvm_hypercall1(KVM_S390_VIRTIO_SET_STATUS, | ||
139 | (unsigned long) to_kvmdev(vdev)->desc); | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * To reset the device, we use the KVM_VIRTIO_RESET hypercall, using the | ||
144 | * descriptor address. The Host will zero the status and all the | ||
145 | * features. | ||
146 | */ | ||
147 | static void kvm_reset(struct virtio_device *vdev) | ||
148 | { | ||
149 | kvm_hypercall1(KVM_S390_VIRTIO_RESET, | ||
150 | (unsigned long) to_kvmdev(vdev)->desc); | ||
151 | } | ||
152 | |||
153 | /* | ||
154 | * When the virtio_ring code wants to notify the Host, it calls us here and we | ||
155 | * make a hypercall. We hand the address of the virtqueue so the Host | ||
156 | * knows which virtqueue we're talking about. | ||
157 | */ | ||
158 | static void kvm_notify(struct virtqueue *vq) | ||
159 | { | ||
160 | struct kvm_vqconfig *config = vq->priv; | ||
161 | |||
162 | kvm_hypercall1(KVM_S390_VIRTIO_NOTIFY, config->address); | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * This routine finds the first virtqueue described in the configuration of | ||
167 | * this device and sets it up. | ||
168 | */ | ||
169 | static struct virtqueue *kvm_find_vq(struct virtio_device *vdev, | ||
170 | unsigned index, | ||
171 | void (*callback)(struct virtqueue *vq)) | ||
172 | { | ||
173 | struct kvm_device *kdev = to_kvmdev(vdev); | ||
174 | struct kvm_vqconfig *config; | ||
175 | struct virtqueue *vq; | ||
176 | int err; | ||
177 | |||
178 | if (index >= kdev->desc->num_vq) | ||
179 | return ERR_PTR(-ENOENT); | ||
180 | |||
181 | config = kvm_vq_config(kdev->desc)+index; | ||
182 | |||
183 | if (add_shared_memory(config->address, | ||
184 | vring_size(config->num, PAGE_SIZE))) { | ||
185 | err = -ENOMEM; | ||
186 | goto out; | ||
187 | } | ||
188 | |||
189 | vq = vring_new_virtqueue(config->num, vdev, (void *) config->address, | ||
190 | kvm_notify, callback); | ||
191 | if (!vq) { | ||
192 | err = -ENOMEM; | ||
193 | goto unmap; | ||
194 | } | ||
195 | |||
196 | /* | ||
197 | * register a callback token | ||
198 | * The host will sent this via the external interrupt parameter | ||
199 | */ | ||
200 | config->token = (u64) vq; | ||
201 | |||
202 | vq->priv = config; | ||
203 | return vq; | ||
204 | unmap: | ||
205 | remove_shared_memory(config->address, vring_size(config->num, | ||
206 | PAGE_SIZE)); | ||
207 | out: | ||
208 | return ERR_PTR(err); | ||
209 | } | ||
210 | |||
211 | static void kvm_del_vq(struct virtqueue *vq) | ||
212 | { | ||
213 | struct kvm_vqconfig *config = vq->priv; | ||
214 | |||
215 | vring_del_virtqueue(vq); | ||
216 | remove_shared_memory(config->address, | ||
217 | vring_size(config->num, PAGE_SIZE)); | ||
218 | } | ||
219 | |||
220 | /* | ||
221 | * The config ops structure as defined by virtio config | ||
222 | */ | ||
223 | static struct virtio_config_ops kvm_vq_configspace_ops = { | ||
224 | .feature = kvm_feature, | ||
225 | .get = kvm_get, | ||
226 | .set = kvm_set, | ||
227 | .get_status = kvm_get_status, | ||
228 | .set_status = kvm_set_status, | ||
229 | .reset = kvm_reset, | ||
230 | .find_vq = kvm_find_vq, | ||
231 | .del_vq = kvm_del_vq, | ||
232 | }; | ||
233 | |||
234 | /* | ||
235 | * The root device for the kvm virtio devices. | ||
236 | * This makes them appear as /sys/devices/kvm_s390/0,1,2 not /sys/devices/0,1,2. | ||
237 | */ | ||
238 | static struct device kvm_root = { | ||
239 | .parent = NULL, | ||
240 | .bus_id = "kvm_s390", | ||
241 | }; | ||
242 | |||
243 | /* | ||
244 | * adds a new device and register it with virtio | ||
245 | * appropriate drivers are loaded by the device model | ||
246 | */ | ||
247 | static void add_kvm_device(struct kvm_device_desc *d) | ||
248 | { | ||
249 | struct kvm_device *kdev; | ||
250 | |||
251 | kdev = kzalloc(sizeof(*kdev), GFP_KERNEL); | ||
252 | if (!kdev) { | ||
253 | printk(KERN_EMERG "Cannot allocate kvm dev %u\n", | ||
254 | dev_index++); | ||
255 | return; | ||
256 | } | ||
257 | |||
258 | kdev->vdev.dev.parent = &kvm_root; | ||
259 | kdev->vdev.index = dev_index++; | ||
260 | kdev->vdev.id.device = d->type; | ||
261 | kdev->vdev.config = &kvm_vq_configspace_ops; | ||
262 | kdev->desc = d; | ||
263 | |||
264 | if (register_virtio_device(&kdev->vdev) != 0) { | ||
265 | printk(KERN_ERR "Failed to register kvm device %u\n", | ||
266 | kdev->vdev.index); | ||
267 | kfree(kdev); | ||
268 | } | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * scan_devices() simply iterates through the device page. | ||
273 | * The type 0 is reserved to mean "end of devices". | ||
274 | */ | ||
275 | static void scan_devices(void) | ||
276 | { | ||
277 | unsigned int i; | ||
278 | struct kvm_device_desc *d; | ||
279 | |||
280 | for (i = 0; i < PAGE_SIZE; i += desc_size(d)) { | ||
281 | d = kvm_devices + i; | ||
282 | |||
283 | if (d->type == 0) | ||
284 | break; | ||
285 | |||
286 | add_kvm_device(d); | ||
287 | } | ||
288 | } | ||
289 | |||
290 | /* | ||
291 | * we emulate the request_irq behaviour on top of s390 extints | ||
292 | */ | ||
293 | static void kvm_extint_handler(u16 code) | ||
294 | { | ||
295 | void *data = (void *) *(long *) __LC_PFAULT_INTPARM; | ||
296 | u16 subcode = S390_lowcore.cpu_addr; | ||
297 | |||
298 | if ((subcode & 0xff00) != VIRTIO_SUBCODE_64) | ||
299 | return; | ||
300 | |||
301 | vring_interrupt(0, data); | ||
302 | } | ||
303 | |||
304 | /* | ||
305 | * Init function for virtio | ||
306 | * devices are in a single page above top of "normal" mem | ||
307 | */ | ||
308 | static int __init kvm_devices_init(void) | ||
309 | { | ||
310 | int rc; | ||
311 | |||
312 | if (!MACHINE_IS_KVM) | ||
313 | return -ENODEV; | ||
314 | |||
315 | rc = device_register(&kvm_root); | ||
316 | if (rc) { | ||
317 | printk(KERN_ERR "Could not register kvm_s390 root device"); | ||
318 | return rc; | ||
319 | } | ||
320 | |||
321 | if (add_shared_memory((max_pfn) << PAGE_SHIFT, PAGE_SIZE)) { | ||
322 | device_unregister(&kvm_root); | ||
323 | return -ENOMEM; | ||
324 | } | ||
325 | |||
326 | kvm_devices = (void *) (max_pfn << PAGE_SHIFT); | ||
327 | |||
328 | ctl_set_bit(0, 9); | ||
329 | register_external_interrupt(0x2603, kvm_extint_handler); | ||
330 | |||
331 | scan_devices(); | ||
332 | return 0; | ||
333 | } | ||
334 | |||
335 | /* | ||
336 | * We do this after core stuff, but before the drivers. | ||
337 | */ | ||
338 | postcore_initcall(kvm_devices_init); | ||
diff --git a/include/asm-ia64/gcc_intrin.h b/include/asm-ia64/gcc_intrin.h index de2ed2cbdd84..2fe292c275fe 100644 --- a/include/asm-ia64/gcc_intrin.h +++ b/include/asm-ia64/gcc_intrin.h | |||
@@ -21,6 +21,10 @@ | |||
21 | 21 | ||
22 | #define ia64_invala_fr(regnum) asm volatile ("invala.e f%0" :: "i"(regnum)) | 22 | #define ia64_invala_fr(regnum) asm volatile ("invala.e f%0" :: "i"(regnum)) |
23 | 23 | ||
24 | #define ia64_flushrs() asm volatile ("flushrs;;":::"memory") | ||
25 | |||
26 | #define ia64_loadrs() asm volatile ("loadrs;;":::"memory") | ||
27 | |||
24 | extern void ia64_bad_param_for_setreg (void); | 28 | extern void ia64_bad_param_for_setreg (void); |
25 | extern void ia64_bad_param_for_getreg (void); | 29 | extern void ia64_bad_param_for_getreg (void); |
26 | 30 | ||
@@ -517,6 +521,14 @@ do { \ | |||
517 | #define ia64_ptrd(addr, size) \ | 521 | #define ia64_ptrd(addr, size) \ |
518 | asm volatile ("ptr.d %0,%1" :: "r"(addr), "r"(size) : "memory") | 522 | asm volatile ("ptr.d %0,%1" :: "r"(addr), "r"(size) : "memory") |
519 | 523 | ||
524 | #define ia64_ttag(addr) \ | ||
525 | ({ \ | ||
526 | __u64 ia64_intri_res; \ | ||
527 | asm volatile ("ttag %0=%1" : "=r"(ia64_intri_res) : "r" (addr)); \ | ||
528 | ia64_intri_res; \ | ||
529 | }) | ||
530 | |||
531 | |||
520 | /* Values for lfhint in ia64_lfetch and ia64_lfetch_fault */ | 532 | /* Values for lfhint in ia64_lfetch and ia64_lfetch_fault */ |
521 | 533 | ||
522 | #define ia64_lfhint_none 0 | 534 | #define ia64_lfhint_none 0 |
diff --git a/include/asm-ia64/kvm.h b/include/asm-ia64/kvm.h index 030d29b4b26b..eb2d3559d089 100644 --- a/include/asm-ia64/kvm.h +++ b/include/asm-ia64/kvm.h | |||
@@ -1,6 +1,205 @@ | |||
1 | #ifndef __LINUX_KVM_IA64_H | 1 | #ifndef __ASM_IA64_KVM_H |
2 | #define __LINUX_KVM_IA64_H | 2 | #define __ASM_IA64_KVM_H |
3 | 3 | ||
4 | /* ia64 does not support KVM */ | 4 | /* |
5 | * asm-ia64/kvm.h: kvm structure definitions for ia64 | ||
6 | * | ||
7 | * Copyright (C) 2007 Xiantao Zhang <xiantao.zhang@intel.com> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | ||
10 | * under the terms and conditions of the GNU General Public License, | ||
11 | * version 2, as published by the Free Software Foundation. | ||
12 | * | ||
13 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
14 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
15 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
16 | * more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License along with | ||
19 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
20 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <asm/types.h> | ||
25 | #include <asm/fpu.h> | ||
26 | |||
27 | #include <linux/ioctl.h> | ||
28 | |||
29 | /* Architectural interrupt line count. */ | ||
30 | #define KVM_NR_INTERRUPTS 256 | ||
31 | |||
32 | #define KVM_IOAPIC_NUM_PINS 24 | ||
33 | |||
34 | struct kvm_ioapic_state { | ||
35 | __u64 base_address; | ||
36 | __u32 ioregsel; | ||
37 | __u32 id; | ||
38 | __u32 irr; | ||
39 | __u32 pad; | ||
40 | union { | ||
41 | __u64 bits; | ||
42 | struct { | ||
43 | __u8 vector; | ||
44 | __u8 delivery_mode:3; | ||
45 | __u8 dest_mode:1; | ||
46 | __u8 delivery_status:1; | ||
47 | __u8 polarity:1; | ||
48 | __u8 remote_irr:1; | ||
49 | __u8 trig_mode:1; | ||
50 | __u8 mask:1; | ||
51 | __u8 reserve:7; | ||
52 | __u8 reserved[4]; | ||
53 | __u8 dest_id; | ||
54 | } fields; | ||
55 | } redirtbl[KVM_IOAPIC_NUM_PINS]; | ||
56 | }; | ||
57 | |||
58 | #define KVM_IRQCHIP_PIC_MASTER 0 | ||
59 | #define KVM_IRQCHIP_PIC_SLAVE 1 | ||
60 | #define KVM_IRQCHIP_IOAPIC 2 | ||
61 | |||
62 | #define KVM_CONTEXT_SIZE 8*1024 | ||
63 | |||
64 | union context { | ||
65 | /* 8K size */ | ||
66 | char dummy[KVM_CONTEXT_SIZE]; | ||
67 | struct { | ||
68 | unsigned long psr; | ||
69 | unsigned long pr; | ||
70 | unsigned long caller_unat; | ||
71 | unsigned long pad; | ||
72 | unsigned long gr[32]; | ||
73 | unsigned long ar[128]; | ||
74 | unsigned long br[8]; | ||
75 | unsigned long cr[128]; | ||
76 | unsigned long rr[8]; | ||
77 | unsigned long ibr[8]; | ||
78 | unsigned long dbr[8]; | ||
79 | unsigned long pkr[8]; | ||
80 | struct ia64_fpreg fr[128]; | ||
81 | }; | ||
82 | }; | ||
83 | |||
84 | struct thash_data { | ||
85 | union { | ||
86 | struct { | ||
87 | unsigned long p : 1; /* 0 */ | ||
88 | unsigned long rv1 : 1; /* 1 */ | ||
89 | unsigned long ma : 3; /* 2-4 */ | ||
90 | unsigned long a : 1; /* 5 */ | ||
91 | unsigned long d : 1; /* 6 */ | ||
92 | unsigned long pl : 2; /* 7-8 */ | ||
93 | unsigned long ar : 3; /* 9-11 */ | ||
94 | unsigned long ppn : 38; /* 12-49 */ | ||
95 | unsigned long rv2 : 2; /* 50-51 */ | ||
96 | unsigned long ed : 1; /* 52 */ | ||
97 | unsigned long ig1 : 11; /* 53-63 */ | ||
98 | }; | ||
99 | struct { | ||
100 | unsigned long __rv1 : 53; /* 0-52 */ | ||
101 | unsigned long contiguous : 1; /*53 */ | ||
102 | unsigned long tc : 1; /* 54 TR or TC */ | ||
103 | unsigned long cl : 1; | ||
104 | /* 55 I side or D side cache line */ | ||
105 | unsigned long len : 4; /* 56-59 */ | ||
106 | unsigned long io : 1; /* 60 entry is for io or not */ | ||
107 | unsigned long nomap : 1; | ||
108 | /* 61 entry cann't be inserted into machine TLB.*/ | ||
109 | unsigned long checked : 1; | ||
110 | /* 62 for VTLB/VHPT sanity check */ | ||
111 | unsigned long invalid : 1; | ||
112 | /* 63 invalid entry */ | ||
113 | }; | ||
114 | unsigned long page_flags; | ||
115 | }; /* same for VHPT and TLB */ | ||
116 | |||
117 | union { | ||
118 | struct { | ||
119 | unsigned long rv3 : 2; | ||
120 | unsigned long ps : 6; | ||
121 | unsigned long key : 24; | ||
122 | unsigned long rv4 : 32; | ||
123 | }; | ||
124 | unsigned long itir; | ||
125 | }; | ||
126 | union { | ||
127 | struct { | ||
128 | unsigned long ig2 : 12; | ||
129 | unsigned long vpn : 49; | ||
130 | unsigned long vrn : 3; | ||
131 | }; | ||
132 | unsigned long ifa; | ||
133 | unsigned long vadr; | ||
134 | struct { | ||
135 | unsigned long tag : 63; | ||
136 | unsigned long ti : 1; | ||
137 | }; | ||
138 | unsigned long etag; | ||
139 | }; | ||
140 | union { | ||
141 | struct thash_data *next; | ||
142 | unsigned long rid; | ||
143 | unsigned long gpaddr; | ||
144 | }; | ||
145 | }; | ||
146 | |||
147 | #define NITRS 8 | ||
148 | #define NDTRS 8 | ||
149 | |||
150 | struct saved_vpd { | ||
151 | unsigned long vhpi; | ||
152 | unsigned long vgr[16]; | ||
153 | unsigned long vbgr[16]; | ||
154 | unsigned long vnat; | ||
155 | unsigned long vbnat; | ||
156 | unsigned long vcpuid[5]; | ||
157 | unsigned long vpsr; | ||
158 | unsigned long vpr; | ||
159 | unsigned long vcr[128]; | ||
160 | }; | ||
161 | |||
162 | struct kvm_regs { | ||
163 | char *saved_guest; | ||
164 | char *saved_stack; | ||
165 | struct saved_vpd vpd; | ||
166 | /*Arch-regs*/ | ||
167 | int mp_state; | ||
168 | unsigned long vmm_rr; | ||
169 | /* TR and TC. */ | ||
170 | struct thash_data itrs[NITRS]; | ||
171 | struct thash_data dtrs[NDTRS]; | ||
172 | /* Bit is set if there is a tr/tc for the region. */ | ||
173 | unsigned char itr_regions; | ||
174 | unsigned char dtr_regions; | ||
175 | unsigned char tc_regions; | ||
176 | |||
177 | char irq_check; | ||
178 | unsigned long saved_itc; | ||
179 | unsigned long itc_check; | ||
180 | unsigned long timer_check; | ||
181 | unsigned long timer_pending; | ||
182 | unsigned long last_itc; | ||
183 | |||
184 | unsigned long vrr[8]; | ||
185 | unsigned long ibr[8]; | ||
186 | unsigned long dbr[8]; | ||
187 | unsigned long insvc[4]; /* Interrupt in service. */ | ||
188 | unsigned long xtp; | ||
189 | |||
190 | unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */ | ||
191 | unsigned long metaphysical_rr4; /* from kvm_arch (so is pinned) */ | ||
192 | unsigned long metaphysical_saved_rr0; /* from kvm_arch */ | ||
193 | unsigned long metaphysical_saved_rr4; /* from kvm_arch */ | ||
194 | unsigned long fp_psr; /*used for lazy float register */ | ||
195 | unsigned long saved_gp; | ||
196 | /*for phycial emulation */ | ||
197 | }; | ||
198 | |||
199 | struct kvm_sregs { | ||
200 | }; | ||
201 | |||
202 | struct kvm_fpu { | ||
203 | }; | ||
5 | 204 | ||
6 | #endif | 205 | #endif |
diff --git a/include/asm-ia64/kvm_host.h b/include/asm-ia64/kvm_host.h new file mode 100644 index 000000000000..c082c208c1f3 --- /dev/null +++ b/include/asm-ia64/kvm_host.h | |||
@@ -0,0 +1,524 @@ | |||
1 | /* | ||
2 | * kvm_host.h: used for kvm module, and hold ia64-specific sections. | ||
3 | * | ||
4 | * Copyright (C) 2007, Intel Corporation. | ||
5 | * | ||
6 | * Xiantao Zhang <xiantao.zhang@intel.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
19 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #ifndef __ASM_KVM_HOST_H | ||
24 | #define __ASM_KVM_HOST_H | ||
25 | |||
26 | |||
27 | #include <linux/types.h> | ||
28 | #include <linux/mm.h> | ||
29 | #include <linux/kvm.h> | ||
30 | #include <linux/kvm_para.h> | ||
31 | #include <linux/kvm_types.h> | ||
32 | |||
33 | #include <asm/pal.h> | ||
34 | #include <asm/sal.h> | ||
35 | |||
36 | #define KVM_MAX_VCPUS 4 | ||
37 | #define KVM_MEMORY_SLOTS 32 | ||
38 | /* memory slots that does not exposed to userspace */ | ||
39 | #define KVM_PRIVATE_MEM_SLOTS 4 | ||
40 | |||
41 | |||
42 | /* define exit reasons from vmm to kvm*/ | ||
43 | #define EXIT_REASON_VM_PANIC 0 | ||
44 | #define EXIT_REASON_MMIO_INSTRUCTION 1 | ||
45 | #define EXIT_REASON_PAL_CALL 2 | ||
46 | #define EXIT_REASON_SAL_CALL 3 | ||
47 | #define EXIT_REASON_SWITCH_RR6 4 | ||
48 | #define EXIT_REASON_VM_DESTROY 5 | ||
49 | #define EXIT_REASON_EXTERNAL_INTERRUPT 6 | ||
50 | #define EXIT_REASON_IPI 7 | ||
51 | #define EXIT_REASON_PTC_G 8 | ||
52 | |||
53 | /*Define vmm address space and vm data space.*/ | ||
54 | #define KVM_VMM_SIZE (16UL<<20) | ||
55 | #define KVM_VMM_SHIFT 24 | ||
56 | #define KVM_VMM_BASE 0xD000000000000000UL | ||
57 | #define VMM_SIZE (8UL<<20) | ||
58 | |||
59 | /* | ||
60 | * Define vm_buffer, used by PAL Services, base address. | ||
61 | * Note: vmbuffer is in the VMM-BLOCK, the size must be < 8M | ||
62 | */ | ||
63 | #define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE) | ||
64 | #define KVM_VM_BUFFER_SIZE (8UL<<20) | ||
65 | |||
66 | /*Define Virtual machine data layout.*/ | ||
67 | #define KVM_VM_DATA_SHIFT 24 | ||
68 | #define KVM_VM_DATA_SIZE (1UL << KVM_VM_DATA_SHIFT) | ||
69 | #define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VMM_SIZE) | ||
70 | |||
71 | |||
72 | #define KVM_P2M_BASE KVM_VM_DATA_BASE | ||
73 | #define KVM_P2M_OFS 0 | ||
74 | #define KVM_P2M_SIZE (8UL << 20) | ||
75 | |||
76 | #define KVM_VHPT_BASE (KVM_P2M_BASE + KVM_P2M_SIZE) | ||
77 | #define KVM_VHPT_OFS KVM_P2M_SIZE | ||
78 | #define KVM_VHPT_BLOCK_SIZE (2UL << 20) | ||
79 | #define VHPT_SHIFT 18 | ||
80 | #define VHPT_SIZE (1UL << VHPT_SHIFT) | ||
81 | #define VHPT_NUM_ENTRIES (1<<(VHPT_SHIFT-5)) | ||
82 | |||
83 | #define KVM_VTLB_BASE (KVM_VHPT_BASE+KVM_VHPT_BLOCK_SIZE) | ||
84 | #define KVM_VTLB_OFS (KVM_VHPT_OFS+KVM_VHPT_BLOCK_SIZE) | ||
85 | #define KVM_VTLB_BLOCK_SIZE (1UL<<20) | ||
86 | #define VTLB_SHIFT 17 | ||
87 | #define VTLB_SIZE (1UL<<VTLB_SHIFT) | ||
88 | #define VTLB_NUM_ENTRIES (1<<(VTLB_SHIFT-5)) | ||
89 | |||
90 | #define KVM_VPD_BASE (KVM_VTLB_BASE+KVM_VTLB_BLOCK_SIZE) | ||
91 | #define KVM_VPD_OFS (KVM_VTLB_OFS+KVM_VTLB_BLOCK_SIZE) | ||
92 | #define KVM_VPD_BLOCK_SIZE (2UL<<20) | ||
93 | #define VPD_SHIFT 16 | ||
94 | #define VPD_SIZE (1UL<<VPD_SHIFT) | ||
95 | |||
96 | #define KVM_VCPU_BASE (KVM_VPD_BASE+KVM_VPD_BLOCK_SIZE) | ||
97 | #define KVM_VCPU_OFS (KVM_VPD_OFS+KVM_VPD_BLOCK_SIZE) | ||
98 | #define KVM_VCPU_BLOCK_SIZE (2UL<<20) | ||
99 | #define VCPU_SHIFT 18 | ||
100 | #define VCPU_SIZE (1UL<<VCPU_SHIFT) | ||
101 | #define MAX_VCPU_NUM KVM_VCPU_BLOCK_SIZE/VCPU_SIZE | ||
102 | |||
103 | #define KVM_VM_BASE (KVM_VCPU_BASE+KVM_VCPU_BLOCK_SIZE) | ||
104 | #define KVM_VM_OFS (KVM_VCPU_OFS+KVM_VCPU_BLOCK_SIZE) | ||
105 | #define KVM_VM_BLOCK_SIZE (1UL<<19) | ||
106 | |||
107 | #define KVM_MEM_DIRTY_LOG_BASE (KVM_VM_BASE+KVM_VM_BLOCK_SIZE) | ||
108 | #define KVM_MEM_DIRTY_LOG_OFS (KVM_VM_OFS+KVM_VM_BLOCK_SIZE) | ||
109 | #define KVM_MEM_DIRTY_LOG_SIZE (1UL<<19) | ||
110 | |||
111 | /* Get vpd, vhpt, tlb, vcpu, base*/ | ||
112 | #define VPD_ADDR(n) (KVM_VPD_BASE+n*VPD_SIZE) | ||
113 | #define VHPT_ADDR(n) (KVM_VHPT_BASE+n*VHPT_SIZE) | ||
114 | #define VTLB_ADDR(n) (KVM_VTLB_BASE+n*VTLB_SIZE) | ||
115 | #define VCPU_ADDR(n) (KVM_VCPU_BASE+n*VCPU_SIZE) | ||
116 | |||
117 | /*IO section definitions*/ | ||
118 | #define IOREQ_READ 1 | ||
119 | #define IOREQ_WRITE 0 | ||
120 | |||
121 | #define STATE_IOREQ_NONE 0 | ||
122 | #define STATE_IOREQ_READY 1 | ||
123 | #define STATE_IOREQ_INPROCESS 2 | ||
124 | #define STATE_IORESP_READY 3 | ||
125 | |||
126 | /*Guest Physical address layout.*/ | ||
127 | #define GPFN_MEM (0UL << 60) /* Guest pfn is normal mem */ | ||
128 | #define GPFN_FRAME_BUFFER (1UL << 60) /* VGA framebuffer */ | ||
129 | #define GPFN_LOW_MMIO (2UL << 60) /* Low MMIO range */ | ||
130 | #define GPFN_PIB (3UL << 60) /* PIB base */ | ||
131 | #define GPFN_IOSAPIC (4UL << 60) /* IOSAPIC base */ | ||
132 | #define GPFN_LEGACY_IO (5UL << 60) /* Legacy I/O base */ | ||
133 | #define GPFN_GFW (6UL << 60) /* Guest Firmware */ | ||
134 | #define GPFN_HIGH_MMIO (7UL << 60) /* High MMIO range */ | ||
135 | |||
136 | #define GPFN_IO_MASK (7UL << 60) /* Guest pfn is I/O type */ | ||
137 | #define GPFN_INV_MASK (1UL << 63) /* Guest pfn is invalid */ | ||
138 | #define INVALID_MFN (~0UL) | ||
139 | #define MEM_G (1UL << 30) | ||
140 | #define MEM_M (1UL << 20) | ||
141 | #define MMIO_START (3 * MEM_G) | ||
142 | #define MMIO_SIZE (512 * MEM_M) | ||
143 | #define VGA_IO_START 0xA0000UL | ||
144 | #define VGA_IO_SIZE 0x20000 | ||
145 | #define LEGACY_IO_START (MMIO_START + MMIO_SIZE) | ||
146 | #define LEGACY_IO_SIZE (64 * MEM_M) | ||
147 | #define IO_SAPIC_START 0xfec00000UL | ||
148 | #define IO_SAPIC_SIZE 0x100000 | ||
149 | #define PIB_START 0xfee00000UL | ||
150 | #define PIB_SIZE 0x200000 | ||
151 | #define GFW_START (4 * MEM_G - 16 * MEM_M) | ||
152 | #define GFW_SIZE (16 * MEM_M) | ||
153 | |||
154 | /*Deliver mode, defined for ioapic.c*/ | ||
155 | #define dest_Fixed IOSAPIC_FIXED | ||
156 | #define dest_LowestPrio IOSAPIC_LOWEST_PRIORITY | ||
157 | |||
158 | #define NMI_VECTOR 2 | ||
159 | #define ExtINT_VECTOR 0 | ||
160 | #define NULL_VECTOR (-1) | ||
161 | #define IA64_SPURIOUS_INT_VECTOR 0x0f | ||
162 | |||
163 | #define VCPU_LID(v) (((u64)(v)->vcpu_id) << 24) | ||
164 | |||
165 | /* | ||
166 | *Delivery mode | ||
167 | */ | ||
168 | #define SAPIC_DELIV_SHIFT 8 | ||
169 | #define SAPIC_FIXED 0x0 | ||
170 | #define SAPIC_LOWEST_PRIORITY 0x1 | ||
171 | #define SAPIC_PMI 0x2 | ||
172 | #define SAPIC_NMI 0x4 | ||
173 | #define SAPIC_INIT 0x5 | ||
174 | #define SAPIC_EXTINT 0x7 | ||
175 | |||
176 | /* | ||
177 | * vcpu->requests bit members for arch | ||
178 | */ | ||
179 | #define KVM_REQ_PTC_G 32 | ||
180 | #define KVM_REQ_RESUME 33 | ||
181 | |||
182 | #define KVM_PAGES_PER_HPAGE 1 | ||
183 | |||
184 | struct kvm; | ||
185 | struct kvm_vcpu; | ||
186 | struct kvm_guest_debug{ | ||
187 | }; | ||
188 | |||
189 | struct kvm_mmio_req { | ||
190 | uint64_t addr; /* physical address */ | ||
191 | uint64_t size; /* size in bytes */ | ||
192 | uint64_t data; /* data (or paddr of data) */ | ||
193 | uint8_t state:4; | ||
194 | uint8_t dir:1; /* 1=read, 0=write */ | ||
195 | }; | ||
196 | |||
197 | /*Pal data struct */ | ||
198 | struct kvm_pal_call{ | ||
199 | /*In area*/ | ||
200 | uint64_t gr28; | ||
201 | uint64_t gr29; | ||
202 | uint64_t gr30; | ||
203 | uint64_t gr31; | ||
204 | /*Out area*/ | ||
205 | struct ia64_pal_retval ret; | ||
206 | }; | ||
207 | |||
208 | /* Sal data structure */ | ||
209 | struct kvm_sal_call{ | ||
210 | /*In area*/ | ||
211 | uint64_t in0; | ||
212 | uint64_t in1; | ||
213 | uint64_t in2; | ||
214 | uint64_t in3; | ||
215 | uint64_t in4; | ||
216 | uint64_t in5; | ||
217 | uint64_t in6; | ||
218 | uint64_t in7; | ||
219 | struct sal_ret_values ret; | ||
220 | }; | ||
221 | |||
222 | /*Guest change rr6*/ | ||
223 | struct kvm_switch_rr6 { | ||
224 | uint64_t old_rr; | ||
225 | uint64_t new_rr; | ||
226 | }; | ||
227 | |||
228 | union ia64_ipi_a{ | ||
229 | unsigned long val; | ||
230 | struct { | ||
231 | unsigned long rv : 3; | ||
232 | unsigned long ir : 1; | ||
233 | unsigned long eid : 8; | ||
234 | unsigned long id : 8; | ||
235 | unsigned long ib_base : 44; | ||
236 | }; | ||
237 | }; | ||
238 | |||
239 | union ia64_ipi_d { | ||
240 | unsigned long val; | ||
241 | struct { | ||
242 | unsigned long vector : 8; | ||
243 | unsigned long dm : 3; | ||
244 | unsigned long ig : 53; | ||
245 | }; | ||
246 | }; | ||
247 | |||
248 | /*ipi check exit data*/ | ||
249 | struct kvm_ipi_data{ | ||
250 | union ia64_ipi_a addr; | ||
251 | union ia64_ipi_d data; | ||
252 | }; | ||
253 | |||
254 | /*global purge data*/ | ||
255 | struct kvm_ptc_g { | ||
256 | unsigned long vaddr; | ||
257 | unsigned long rr; | ||
258 | unsigned long ps; | ||
259 | struct kvm_vcpu *vcpu; | ||
260 | }; | ||
261 | |||
262 | /*Exit control data */ | ||
263 | struct exit_ctl_data{ | ||
264 | uint32_t exit_reason; | ||
265 | uint32_t vm_status; | ||
266 | union { | ||
267 | struct kvm_mmio_req ioreq; | ||
268 | struct kvm_pal_call pal_data; | ||
269 | struct kvm_sal_call sal_data; | ||
270 | struct kvm_switch_rr6 rr_data; | ||
271 | struct kvm_ipi_data ipi_data; | ||
272 | struct kvm_ptc_g ptc_g_data; | ||
273 | } u; | ||
274 | }; | ||
275 | |||
276 | union pte_flags { | ||
277 | unsigned long val; | ||
278 | struct { | ||
279 | unsigned long p : 1; /*0 */ | ||
280 | unsigned long : 1; /* 1 */ | ||
281 | unsigned long ma : 3; /* 2-4 */ | ||
282 | unsigned long a : 1; /* 5 */ | ||
283 | unsigned long d : 1; /* 6 */ | ||
284 | unsigned long pl : 2; /* 7-8 */ | ||
285 | unsigned long ar : 3; /* 9-11 */ | ||
286 | unsigned long ppn : 38; /* 12-49 */ | ||
287 | unsigned long : 2; /* 50-51 */ | ||
288 | unsigned long ed : 1; /* 52 */ | ||
289 | }; | ||
290 | }; | ||
291 | |||
292 | union ia64_pta { | ||
293 | unsigned long val; | ||
294 | struct { | ||
295 | unsigned long ve : 1; | ||
296 | unsigned long reserved0 : 1; | ||
297 | unsigned long size : 6; | ||
298 | unsigned long vf : 1; | ||
299 | unsigned long reserved1 : 6; | ||
300 | unsigned long base : 49; | ||
301 | }; | ||
302 | }; | ||
303 | |||
304 | struct thash_cb { | ||
305 | /* THASH base information */ | ||
306 | struct thash_data *hash; /* hash table pointer */ | ||
307 | union ia64_pta pta; | ||
308 | int num; | ||
309 | }; | ||
310 | |||
311 | struct kvm_vcpu_stat { | ||
312 | }; | ||
313 | |||
314 | struct kvm_vcpu_arch { | ||
315 | int launched; | ||
316 | int last_exit; | ||
317 | int last_run_cpu; | ||
318 | int vmm_tr_slot; | ||
319 | int vm_tr_slot; | ||
320 | |||
321 | #define KVM_MP_STATE_RUNNABLE 0 | ||
322 | #define KVM_MP_STATE_UNINITIALIZED 1 | ||
323 | #define KVM_MP_STATE_INIT_RECEIVED 2 | ||
324 | #define KVM_MP_STATE_HALTED 3 | ||
325 | int mp_state; | ||
326 | |||
327 | #define MAX_PTC_G_NUM 3 | ||
328 | int ptc_g_count; | ||
329 | struct kvm_ptc_g ptc_g_data[MAX_PTC_G_NUM]; | ||
330 | |||
331 | /*halt timer to wake up sleepy vcpus*/ | ||
332 | struct hrtimer hlt_timer; | ||
333 | long ht_active; | ||
334 | |||
335 | struct kvm_lapic *apic; /* kernel irqchip context */ | ||
336 | struct vpd *vpd; | ||
337 | |||
338 | /* Exit data for vmm_transition*/ | ||
339 | struct exit_ctl_data exit_data; | ||
340 | |||
341 | cpumask_t cache_coherent_map; | ||
342 | |||
343 | unsigned long vmm_rr; | ||
344 | unsigned long host_rr6; | ||
345 | unsigned long psbits[8]; | ||
346 | unsigned long cr_iipa; | ||
347 | unsigned long cr_isr; | ||
348 | unsigned long vsa_base; | ||
349 | unsigned long dirty_log_lock_pa; | ||
350 | unsigned long __gp; | ||
351 | /* TR and TC. */ | ||
352 | struct thash_data itrs[NITRS]; | ||
353 | struct thash_data dtrs[NDTRS]; | ||
354 | /* Bit is set if there is a tr/tc for the region. */ | ||
355 | unsigned char itr_regions; | ||
356 | unsigned char dtr_regions; | ||
357 | unsigned char tc_regions; | ||
358 | /* purge all */ | ||
359 | unsigned long ptce_base; | ||
360 | unsigned long ptce_count[2]; | ||
361 | unsigned long ptce_stride[2]; | ||
362 | /* itc/itm */ | ||
363 | unsigned long last_itc; | ||
364 | long itc_offset; | ||
365 | unsigned long itc_check; | ||
366 | unsigned long timer_check; | ||
367 | unsigned long timer_pending; | ||
368 | |||
369 | unsigned long vrr[8]; | ||
370 | unsigned long ibr[8]; | ||
371 | unsigned long dbr[8]; | ||
372 | unsigned long insvc[4]; /* Interrupt in service. */ | ||
373 | unsigned long xtp; | ||
374 | |||
375 | unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */ | ||
376 | unsigned long metaphysical_rr4; /* from kvm_arch (so is pinned) */ | ||
377 | unsigned long metaphysical_saved_rr0; /* from kvm_arch */ | ||
378 | unsigned long metaphysical_saved_rr4; /* from kvm_arch */ | ||
379 | unsigned long fp_psr; /*used for lazy float register */ | ||
380 | unsigned long saved_gp; | ||
381 | /*for phycial emulation */ | ||
382 | int mode_flags; | ||
383 | struct thash_cb vtlb; | ||
384 | struct thash_cb vhpt; | ||
385 | char irq_check; | ||
386 | char irq_new_pending; | ||
387 | |||
388 | unsigned long opcode; | ||
389 | unsigned long cause; | ||
390 | union context host; | ||
391 | union context guest; | ||
392 | }; | ||
393 | |||
394 | struct kvm_vm_stat { | ||
395 | u64 remote_tlb_flush; | ||
396 | }; | ||
397 | |||
398 | struct kvm_sal_data { | ||
399 | unsigned long boot_ip; | ||
400 | unsigned long boot_gp; | ||
401 | }; | ||
402 | |||
403 | struct kvm_arch { | ||
404 | unsigned long vm_base; | ||
405 | unsigned long metaphysical_rr0; | ||
406 | unsigned long metaphysical_rr4; | ||
407 | unsigned long vmm_init_rr; | ||
408 | unsigned long vhpt_base; | ||
409 | unsigned long vtlb_base; | ||
410 | unsigned long vpd_base; | ||
411 | spinlock_t dirty_log_lock; | ||
412 | struct kvm_ioapic *vioapic; | ||
413 | struct kvm_vm_stat stat; | ||
414 | struct kvm_sal_data rdv_sal_data; | ||
415 | }; | ||
416 | |||
417 | union cpuid3_t { | ||
418 | u64 value; | ||
419 | struct { | ||
420 | u64 number : 8; | ||
421 | u64 revision : 8; | ||
422 | u64 model : 8; | ||
423 | u64 family : 8; | ||
424 | u64 archrev : 8; | ||
425 | u64 rv : 24; | ||
426 | }; | ||
427 | }; | ||
428 | |||
429 | struct kvm_pt_regs { | ||
430 | /* The following registers are saved by SAVE_MIN: */ | ||
431 | unsigned long b6; /* scratch */ | ||
432 | unsigned long b7; /* scratch */ | ||
433 | |||
434 | unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */ | ||
435 | unsigned long ar_ssd; /* reserved for future use (scratch) */ | ||
436 | |||
437 | unsigned long r8; /* scratch (return value register 0) */ | ||
438 | unsigned long r9; /* scratch (return value register 1) */ | ||
439 | unsigned long r10; /* scratch (return value register 2) */ | ||
440 | unsigned long r11; /* scratch (return value register 3) */ | ||
441 | |||
442 | unsigned long cr_ipsr; /* interrupted task's psr */ | ||
443 | unsigned long cr_iip; /* interrupted task's instruction pointer */ | ||
444 | unsigned long cr_ifs; /* interrupted task's function state */ | ||
445 | |||
446 | unsigned long ar_unat; /* interrupted task's NaT register (preserved) */ | ||
447 | unsigned long ar_pfs; /* prev function state */ | ||
448 | unsigned long ar_rsc; /* RSE configuration */ | ||
449 | /* The following two are valid only if cr_ipsr.cpl > 0: */ | ||
450 | unsigned long ar_rnat; /* RSE NaT */ | ||
451 | unsigned long ar_bspstore; /* RSE bspstore */ | ||
452 | |||
453 | unsigned long pr; /* 64 predicate registers (1 bit each) */ | ||
454 | unsigned long b0; /* return pointer (bp) */ | ||
455 | unsigned long loadrs; /* size of dirty partition << 16 */ | ||
456 | |||
457 | unsigned long r1; /* the gp pointer */ | ||
458 | unsigned long r12; /* interrupted task's memory stack pointer */ | ||
459 | unsigned long r13; /* thread pointer */ | ||
460 | |||
461 | unsigned long ar_fpsr; /* floating point status (preserved) */ | ||
462 | unsigned long r15; /* scratch */ | ||
463 | |||
464 | /* The remaining registers are NOT saved for system calls. */ | ||
465 | unsigned long r14; /* scratch */ | ||
466 | unsigned long r2; /* scratch */ | ||
467 | unsigned long r3; /* scratch */ | ||
468 | unsigned long r16; /* scratch */ | ||
469 | unsigned long r17; /* scratch */ | ||
470 | unsigned long r18; /* scratch */ | ||
471 | unsigned long r19; /* scratch */ | ||
472 | unsigned long r20; /* scratch */ | ||
473 | unsigned long r21; /* scratch */ | ||
474 | unsigned long r22; /* scratch */ | ||
475 | unsigned long r23; /* scratch */ | ||
476 | unsigned long r24; /* scratch */ | ||
477 | unsigned long r25; /* scratch */ | ||
478 | unsigned long r26; /* scratch */ | ||
479 | unsigned long r27; /* scratch */ | ||
480 | unsigned long r28; /* scratch */ | ||
481 | unsigned long r29; /* scratch */ | ||
482 | unsigned long r30; /* scratch */ | ||
483 | unsigned long r31; /* scratch */ | ||
484 | unsigned long ar_ccv; /* compare/exchange value (scratch) */ | ||
485 | |||
486 | /* | ||
487 | * Floating point registers that the kernel considers scratch: | ||
488 | */ | ||
489 | struct ia64_fpreg f6; /* scratch */ | ||
490 | struct ia64_fpreg f7; /* scratch */ | ||
491 | struct ia64_fpreg f8; /* scratch */ | ||
492 | struct ia64_fpreg f9; /* scratch */ | ||
493 | struct ia64_fpreg f10; /* scratch */ | ||
494 | struct ia64_fpreg f11; /* scratch */ | ||
495 | |||
496 | unsigned long r4; /* preserved */ | ||
497 | unsigned long r5; /* preserved */ | ||
498 | unsigned long r6; /* preserved */ | ||
499 | unsigned long r7; /* preserved */ | ||
500 | unsigned long eml_unat; /* used for emulating instruction */ | ||
501 | unsigned long pad0; /* alignment pad */ | ||
502 | }; | ||
503 | |||
504 | static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v) | ||
505 | { | ||
506 | return (struct kvm_pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1; | ||
507 | } | ||
508 | |||
509 | typedef int kvm_vmm_entry(void); | ||
510 | typedef void kvm_tramp_entry(union context *host, union context *guest); | ||
511 | |||
512 | struct kvm_vmm_info{ | ||
513 | struct module *module; | ||
514 | kvm_vmm_entry *vmm_entry; | ||
515 | kvm_tramp_entry *tramp_entry; | ||
516 | unsigned long vmm_ivt; | ||
517 | }; | ||
518 | |||
519 | int kvm_highest_pending_irq(struct kvm_vcpu *vcpu); | ||
520 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); | ||
521 | int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); | ||
522 | void kvm_sal_emul(struct kvm_vcpu *vcpu); | ||
523 | |||
524 | #endif | ||
diff --git a/include/asm-ia64/kvm_para.h b/include/asm-ia64/kvm_para.h new file mode 100644 index 000000000000..9f9796bb3441 --- /dev/null +++ b/include/asm-ia64/kvm_para.h | |||
@@ -0,0 +1,29 @@ | |||
1 | #ifndef __IA64_KVM_PARA_H | ||
2 | #define __IA64_KVM_PARA_H | ||
3 | |||
4 | /* | ||
5 | * asm-ia64/kvm_para.h | ||
6 | * | ||
7 | * Copyright (C) 2007 Xiantao Zhang <xiantao.zhang@intel.com> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | ||
10 | * under the terms and conditions of the GNU General Public License, | ||
11 | * version 2, as published by the Free Software Foundation. | ||
12 | * | ||
13 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
14 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
15 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
16 | * more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License along with | ||
19 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
20 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | static inline unsigned int kvm_arch_para_features(void) | ||
25 | { | ||
26 | return 0; | ||
27 | } | ||
28 | |||
29 | #endif | ||
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h index 741f7ecb986a..6aff126fc07e 100644 --- a/include/asm-ia64/processor.h +++ b/include/asm-ia64/processor.h | |||
@@ -119,6 +119,69 @@ struct ia64_psr { | |||
119 | __u64 reserved4 : 19; | 119 | __u64 reserved4 : 19; |
120 | }; | 120 | }; |
121 | 121 | ||
122 | union ia64_isr { | ||
123 | __u64 val; | ||
124 | struct { | ||
125 | __u64 code : 16; | ||
126 | __u64 vector : 8; | ||
127 | __u64 reserved1 : 8; | ||
128 | __u64 x : 1; | ||
129 | __u64 w : 1; | ||
130 | __u64 r : 1; | ||
131 | __u64 na : 1; | ||
132 | __u64 sp : 1; | ||
133 | __u64 rs : 1; | ||
134 | __u64 ir : 1; | ||
135 | __u64 ni : 1; | ||
136 | __u64 so : 1; | ||
137 | __u64 ei : 2; | ||
138 | __u64 ed : 1; | ||
139 | __u64 reserved2 : 20; | ||
140 | }; | ||
141 | }; | ||
142 | |||
143 | union ia64_lid { | ||
144 | __u64 val; | ||
145 | struct { | ||
146 | __u64 rv : 16; | ||
147 | __u64 eid : 8; | ||
148 | __u64 id : 8; | ||
149 | __u64 ig : 32; | ||
150 | }; | ||
151 | }; | ||
152 | |||
153 | union ia64_tpr { | ||
154 | __u64 val; | ||
155 | struct { | ||
156 | __u64 ig0 : 4; | ||
157 | __u64 mic : 4; | ||
158 | __u64 rsv : 8; | ||
159 | __u64 mmi : 1; | ||
160 | __u64 ig1 : 47; | ||
161 | }; | ||
162 | }; | ||
163 | |||
164 | union ia64_itir { | ||
165 | __u64 val; | ||
166 | struct { | ||
167 | __u64 rv3 : 2; /* 0-1 */ | ||
168 | __u64 ps : 6; /* 2-7 */ | ||
169 | __u64 key : 24; /* 8-31 */ | ||
170 | __u64 rv4 : 32; /* 32-63 */ | ||
171 | }; | ||
172 | }; | ||
173 | |||
174 | union ia64_rr { | ||
175 | __u64 val; | ||
176 | struct { | ||
177 | __u64 ve : 1; /* enable hw walker */ | ||
178 | __u64 reserved0: 1; /* reserved */ | ||
179 | __u64 ps : 6; /* log page size */ | ||
180 | __u64 rid : 24; /* region id */ | ||
181 | __u64 reserved1: 32; /* reserved */ | ||
182 | }; | ||
183 | }; | ||
184 | |||
122 | /* | 185 | /* |
123 | * CPU type, hardware bug flags, and per-CPU state. Frequently used | 186 | * CPU type, hardware bug flags, and per-CPU state. Frequently used |
124 | * state comes earlier: | 187 | * state comes earlier: |
diff --git a/include/asm-powerpc/kvm.h b/include/asm-powerpc/kvm.h index d1b530fbf8dd..f993e4198d5c 100644 --- a/include/asm-powerpc/kvm.h +++ b/include/asm-powerpc/kvm.h | |||
@@ -1,6 +1,55 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License, version 2, as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with this program; if not, write to the Free Software | ||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
14 | * | ||
15 | * Copyright IBM Corp. 2007 | ||
16 | * | ||
17 | * Authors: Hollis Blanchard <hollisb@us.ibm.com> | ||
18 | */ | ||
19 | |||
1 | #ifndef __LINUX_KVM_POWERPC_H | 20 | #ifndef __LINUX_KVM_POWERPC_H |
2 | #define __LINUX_KVM_POWERPC_H | 21 | #define __LINUX_KVM_POWERPC_H |
3 | 22 | ||
4 | /* powerpc does not support KVM */ | 23 | #include <asm/types.h> |
24 | |||
25 | struct kvm_regs { | ||
26 | __u64 pc; | ||
27 | __u64 cr; | ||
28 | __u64 ctr; | ||
29 | __u64 lr; | ||
30 | __u64 xer; | ||
31 | __u64 msr; | ||
32 | __u64 srr0; | ||
33 | __u64 srr1; | ||
34 | __u64 pid; | ||
35 | |||
36 | __u64 sprg0; | ||
37 | __u64 sprg1; | ||
38 | __u64 sprg2; | ||
39 | __u64 sprg3; | ||
40 | __u64 sprg4; | ||
41 | __u64 sprg5; | ||
42 | __u64 sprg6; | ||
43 | __u64 sprg7; | ||
44 | |||
45 | __u64 gpr[32]; | ||
46 | }; | ||
47 | |||
48 | struct kvm_sregs { | ||
49 | }; | ||
50 | |||
51 | struct kvm_fpu { | ||
52 | __u64 fpr[32]; | ||
53 | }; | ||
5 | 54 | ||
6 | #endif | 55 | #endif /* __LINUX_KVM_POWERPC_H */ |
diff --git a/include/asm-powerpc/kvm_asm.h b/include/asm-powerpc/kvm_asm.h new file mode 100644 index 000000000000..2197764796d9 --- /dev/null +++ b/include/asm-powerpc/kvm_asm.h | |||
@@ -0,0 +1,55 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License, version 2, as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with this program; if not, write to the Free Software | ||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
14 | * | ||
15 | * Copyright IBM Corp. 2008 | ||
16 | * | ||
17 | * Authors: Hollis Blanchard <hollisb@us.ibm.com> | ||
18 | */ | ||
19 | |||
20 | #ifndef __POWERPC_KVM_ASM_H__ | ||
21 | #define __POWERPC_KVM_ASM_H__ | ||
22 | |||
23 | /* IVPR must be 64KiB-aligned. */ | ||
24 | #define VCPU_SIZE_ORDER 4 | ||
25 | #define VCPU_SIZE_LOG (VCPU_SIZE_ORDER + 12) | ||
26 | #define VCPU_TLB_PGSZ PPC44x_TLB_64K | ||
27 | #define VCPU_SIZE_BYTES (1<<VCPU_SIZE_LOG) | ||
28 | |||
29 | #define BOOKE_INTERRUPT_CRITICAL 0 | ||
30 | #define BOOKE_INTERRUPT_MACHINE_CHECK 1 | ||
31 | #define BOOKE_INTERRUPT_DATA_STORAGE 2 | ||
32 | #define BOOKE_INTERRUPT_INST_STORAGE 3 | ||
33 | #define BOOKE_INTERRUPT_EXTERNAL 4 | ||
34 | #define BOOKE_INTERRUPT_ALIGNMENT 5 | ||
35 | #define BOOKE_INTERRUPT_PROGRAM 6 | ||
36 | #define BOOKE_INTERRUPT_FP_UNAVAIL 7 | ||
37 | #define BOOKE_INTERRUPT_SYSCALL 8 | ||
38 | #define BOOKE_INTERRUPT_AP_UNAVAIL 9 | ||
39 | #define BOOKE_INTERRUPT_DECREMENTER 10 | ||
40 | #define BOOKE_INTERRUPT_FIT 11 | ||
41 | #define BOOKE_INTERRUPT_WATCHDOG 12 | ||
42 | #define BOOKE_INTERRUPT_DTLB_MISS 13 | ||
43 | #define BOOKE_INTERRUPT_ITLB_MISS 14 | ||
44 | #define BOOKE_INTERRUPT_DEBUG 15 | ||
45 | #define BOOKE_MAX_INTERRUPT 15 | ||
46 | |||
47 | #define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ | ||
48 | #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ | ||
49 | |||
50 | #define RESUME_GUEST 0 | ||
51 | #define RESUME_GUEST_NV RESUME_FLAG_NV | ||
52 | #define RESUME_HOST RESUME_FLAG_HOST | ||
53 | #define RESUME_HOST_NV (RESUME_FLAG_HOST|RESUME_FLAG_NV) | ||
54 | |||
55 | #endif /* __POWERPC_KVM_ASM_H__ */ | ||
diff --git a/include/asm-powerpc/kvm_host.h b/include/asm-powerpc/kvm_host.h new file mode 100644 index 000000000000..04ffbb8e0a35 --- /dev/null +++ b/include/asm-powerpc/kvm_host.h | |||
@@ -0,0 +1,152 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License, version 2, as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with this program; if not, write to the Free Software | ||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
14 | * | ||
15 | * Copyright IBM Corp. 2007 | ||
16 | * | ||
17 | * Authors: Hollis Blanchard <hollisb@us.ibm.com> | ||
18 | */ | ||
19 | |||
20 | #ifndef __POWERPC_KVM_HOST_H__ | ||
21 | #define __POWERPC_KVM_HOST_H__ | ||
22 | |||
23 | #include <linux/mutex.h> | ||
24 | #include <linux/timer.h> | ||
25 | #include <linux/types.h> | ||
26 | #include <linux/kvm_types.h> | ||
27 | #include <asm/kvm_asm.h> | ||
28 | |||
29 | #define KVM_MAX_VCPUS 1 | ||
30 | #define KVM_MEMORY_SLOTS 32 | ||
31 | /* memory slots that does not exposed to userspace */ | ||
32 | #define KVM_PRIVATE_MEM_SLOTS 4 | ||
33 | |||
34 | /* We don't currently support large pages. */ | ||
35 | #define KVM_PAGES_PER_HPAGE (1<<31) | ||
36 | |||
37 | struct kvm; | ||
38 | struct kvm_run; | ||
39 | struct kvm_vcpu; | ||
40 | |||
41 | struct kvm_vm_stat { | ||
42 | u32 remote_tlb_flush; | ||
43 | }; | ||
44 | |||
45 | struct kvm_vcpu_stat { | ||
46 | u32 sum_exits; | ||
47 | u32 mmio_exits; | ||
48 | u32 dcr_exits; | ||
49 | u32 signal_exits; | ||
50 | u32 light_exits; | ||
51 | /* Account for special types of light exits: */ | ||
52 | u32 itlb_real_miss_exits; | ||
53 | u32 itlb_virt_miss_exits; | ||
54 | u32 dtlb_real_miss_exits; | ||
55 | u32 dtlb_virt_miss_exits; | ||
56 | u32 syscall_exits; | ||
57 | u32 isi_exits; | ||
58 | u32 dsi_exits; | ||
59 | u32 emulated_inst_exits; | ||
60 | u32 dec_exits; | ||
61 | u32 ext_intr_exits; | ||
62 | }; | ||
63 | |||
64 | struct tlbe { | ||
65 | u32 tid; /* Only the low 8 bits are used. */ | ||
66 | u32 word0; | ||
67 | u32 word1; | ||
68 | u32 word2; | ||
69 | }; | ||
70 | |||
71 | struct kvm_arch { | ||
72 | }; | ||
73 | |||
74 | struct kvm_vcpu_arch { | ||
75 | /* Unmodified copy of the guest's TLB. */ | ||
76 | struct tlbe guest_tlb[PPC44x_TLB_SIZE]; | ||
77 | /* TLB that's actually used when the guest is running. */ | ||
78 | struct tlbe shadow_tlb[PPC44x_TLB_SIZE]; | ||
79 | /* Pages which are referenced in the shadow TLB. */ | ||
80 | struct page *shadow_pages[PPC44x_TLB_SIZE]; | ||
81 | /* Copy of the host's TLB. */ | ||
82 | struct tlbe host_tlb[PPC44x_TLB_SIZE]; | ||
83 | |||
84 | u32 host_stack; | ||
85 | u32 host_pid; | ||
86 | |||
87 | u64 fpr[32]; | ||
88 | u32 gpr[32]; | ||
89 | |||
90 | u32 pc; | ||
91 | u32 cr; | ||
92 | u32 ctr; | ||
93 | u32 lr; | ||
94 | u32 xer; | ||
95 | |||
96 | u32 msr; | ||
97 | u32 mmucr; | ||
98 | u32 sprg0; | ||
99 | u32 sprg1; | ||
100 | u32 sprg2; | ||
101 | u32 sprg3; | ||
102 | u32 sprg4; | ||
103 | u32 sprg5; | ||
104 | u32 sprg6; | ||
105 | u32 sprg7; | ||
106 | u32 srr0; | ||
107 | u32 srr1; | ||
108 | u32 csrr0; | ||
109 | u32 csrr1; | ||
110 | u32 dsrr0; | ||
111 | u32 dsrr1; | ||
112 | u32 dear; | ||
113 | u32 esr; | ||
114 | u32 dec; | ||
115 | u32 decar; | ||
116 | u32 tbl; | ||
117 | u32 tbu; | ||
118 | u32 tcr; | ||
119 | u32 tsr; | ||
120 | u32 ivor[16]; | ||
121 | u32 ivpr; | ||
122 | u32 pir; | ||
123 | u32 pid; | ||
124 | u32 pvr; | ||
125 | u32 ccr0; | ||
126 | u32 ccr1; | ||
127 | u32 dbcr0; | ||
128 | u32 dbcr1; | ||
129 | |||
130 | u32 last_inst; | ||
131 | u32 fault_dear; | ||
132 | u32 fault_esr; | ||
133 | gpa_t paddr_accessed; | ||
134 | |||
135 | u8 io_gpr; /* GPR used as IO source/target */ | ||
136 | u8 mmio_is_bigendian; | ||
137 | u8 dcr_needed; | ||
138 | u8 dcr_is_write; | ||
139 | |||
140 | u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ | ||
141 | |||
142 | struct timer_list dec_timer; | ||
143 | unsigned long pending_exceptions; | ||
144 | }; | ||
145 | |||
146 | struct kvm_guest_debug { | ||
147 | int enabled; | ||
148 | unsigned long bp[4]; | ||
149 | int singlestep; | ||
150 | }; | ||
151 | |||
152 | #endif /* __POWERPC_KVM_HOST_H__ */ | ||
diff --git a/include/asm-powerpc/kvm_para.h b/include/asm-powerpc/kvm_para.h new file mode 100644 index 000000000000..2d48f6a63d0b --- /dev/null +++ b/include/asm-powerpc/kvm_para.h | |||
@@ -0,0 +1,37 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License, version 2, as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with this program; if not, write to the Free Software | ||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
14 | * | ||
15 | * Copyright IBM Corp. 2008 | ||
16 | * | ||
17 | * Authors: Hollis Blanchard <hollisb@us.ibm.com> | ||
18 | */ | ||
19 | |||
20 | #ifndef __POWERPC_KVM_PARA_H__ | ||
21 | #define __POWERPC_KVM_PARA_H__ | ||
22 | |||
23 | #ifdef __KERNEL__ | ||
24 | |||
25 | static inline int kvm_para_available(void) | ||
26 | { | ||
27 | return 0; | ||
28 | } | ||
29 | |||
30 | static inline unsigned int kvm_arch_para_features(void) | ||
31 | { | ||
32 | return 0; | ||
33 | } | ||
34 | |||
35 | #endif /* __KERNEL__ */ | ||
36 | |||
37 | #endif /* __POWERPC_KVM_PARA_H__ */ | ||
diff --git a/include/asm-powerpc/kvm_ppc.h b/include/asm-powerpc/kvm_ppc.h new file mode 100644 index 000000000000..7ac820308a7e --- /dev/null +++ b/include/asm-powerpc/kvm_ppc.h | |||
@@ -0,0 +1,88 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License, version 2, as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with this program; if not, write to the Free Software | ||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
14 | * | ||
15 | * Copyright IBM Corp. 2008 | ||
16 | * | ||
17 | * Authors: Hollis Blanchard <hollisb@us.ibm.com> | ||
18 | */ | ||
19 | |||
20 | #ifndef __POWERPC_KVM_PPC_H__ | ||
21 | #define __POWERPC_KVM_PPC_H__ | ||
22 | |||
23 | /* This file exists just so we can dereference kvm_vcpu, avoiding nested header | ||
24 | * dependencies. */ | ||
25 | |||
26 | #include <linux/mutex.h> | ||
27 | #include <linux/timer.h> | ||
28 | #include <linux/types.h> | ||
29 | #include <linux/kvm_types.h> | ||
30 | #include <linux/kvm_host.h> | ||
31 | |||
32 | struct kvm_tlb { | ||
33 | struct tlbe guest_tlb[PPC44x_TLB_SIZE]; | ||
34 | struct tlbe shadow_tlb[PPC44x_TLB_SIZE]; | ||
35 | }; | ||
36 | |||
37 | enum emulation_result { | ||
38 | EMULATE_DONE, /* no further processing */ | ||
39 | EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ | ||
40 | EMULATE_DO_DCR, /* kvm_run filled with DCR request */ | ||
41 | EMULATE_FAIL, /* can't emulate this instruction */ | ||
42 | }; | ||
43 | |||
44 | extern const unsigned char exception_priority[]; | ||
45 | extern const unsigned char priority_exception[]; | ||
46 | |||
47 | extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); | ||
48 | extern char kvmppc_handlers_start[]; | ||
49 | extern unsigned long kvmppc_handler_len; | ||
50 | |||
51 | extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu); | ||
52 | extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
53 | unsigned int rt, unsigned int bytes, | ||
54 | int is_bigendian); | ||
55 | extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
56 | u32 val, unsigned int bytes, int is_bigendian); | ||
57 | |||
58 | extern int kvmppc_emulate_instruction(struct kvm_run *run, | ||
59 | struct kvm_vcpu *vcpu); | ||
60 | |||
61 | extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, | ||
62 | u64 asid, u32 flags); | ||
63 | extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, u64 eaddr, u64 asid); | ||
64 | extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); | ||
65 | |||
66 | extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu); | ||
67 | |||
68 | static inline void kvmppc_queue_exception(struct kvm_vcpu *vcpu, int exception) | ||
69 | { | ||
70 | unsigned int priority = exception_priority[exception]; | ||
71 | set_bit(priority, &vcpu->arch.pending_exceptions); | ||
72 | } | ||
73 | |||
74 | static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception) | ||
75 | { | ||
76 | unsigned int priority = exception_priority[exception]; | ||
77 | clear_bit(priority, &vcpu->arch.pending_exceptions); | ||
78 | } | ||
79 | |||
80 | static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) | ||
81 | { | ||
82 | if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) | ||
83 | kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); | ||
84 | |||
85 | vcpu->arch.msr = new_msr; | ||
86 | } | ||
87 | |||
88 | #endif /* __POWERPC_KVM_PPC_H__ */ | ||
diff --git a/include/asm-powerpc/mmu-44x.h b/include/asm-powerpc/mmu-44x.h index c8b02d97f753..a825524c981a 100644 --- a/include/asm-powerpc/mmu-44x.h +++ b/include/asm-powerpc/mmu-44x.h | |||
@@ -53,6 +53,8 @@ | |||
53 | 53 | ||
54 | #ifndef __ASSEMBLY__ | 54 | #ifndef __ASSEMBLY__ |
55 | 55 | ||
56 | extern unsigned int tlb_44x_hwater; | ||
57 | |||
56 | typedef struct { | 58 | typedef struct { |
57 | unsigned long id; | 59 | unsigned long id; |
58 | unsigned long vdso_base; | 60 | unsigned long vdso_base; |
diff --git a/include/asm-s390/Kbuild b/include/asm-s390/Kbuild index e92b429d2be1..13c9805349f1 100644 --- a/include/asm-s390/Kbuild +++ b/include/asm-s390/Kbuild | |||
@@ -7,6 +7,7 @@ header-y += tape390.h | |||
7 | header-y += ucontext.h | 7 | header-y += ucontext.h |
8 | header-y += vtoc.h | 8 | header-y += vtoc.h |
9 | header-y += zcrypt.h | 9 | header-y += zcrypt.h |
10 | header-y += kvm.h | ||
10 | 11 | ||
11 | unifdef-y += cmb.h | 12 | unifdef-y += cmb.h |
12 | unifdef-y += debug.h | 13 | unifdef-y += debug.h |
diff --git a/include/asm-s390/kvm.h b/include/asm-s390/kvm.h index 573f2a351386..d74002f95794 100644 --- a/include/asm-s390/kvm.h +++ b/include/asm-s390/kvm.h | |||
@@ -1,6 +1,45 @@ | |||
1 | #ifndef __LINUX_KVM_S390_H | 1 | #ifndef __LINUX_KVM_S390_H |
2 | #define __LINUX_KVM_S390_H | 2 | #define __LINUX_KVM_S390_H |
3 | 3 | ||
4 | /* s390 does not support KVM */ | 4 | /* |
5 | * asm-s390/kvm.h - KVM s390 specific structures and definitions | ||
6 | * | ||
7 | * Copyright IBM Corp. 2008 | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License (version 2 only) | ||
11 | * as published by the Free Software Foundation. | ||
12 | * | ||
13 | * Author(s): Carsten Otte <cotte@de.ibm.com> | ||
14 | * Christian Borntraeger <borntraeger@de.ibm.com> | ||
15 | */ | ||
16 | #include <asm/types.h> | ||
17 | |||
18 | /* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */ | ||
19 | struct kvm_pic_state { | ||
20 | /* no PIC for s390 */ | ||
21 | }; | ||
22 | |||
23 | struct kvm_ioapic_state { | ||
24 | /* no IOAPIC for s390 */ | ||
25 | }; | ||
26 | |||
27 | /* for KVM_GET_REGS and KVM_SET_REGS */ | ||
28 | struct kvm_regs { | ||
29 | /* general purpose regs for s390 */ | ||
30 | __u64 gprs[16]; | ||
31 | }; | ||
32 | |||
33 | /* for KVM_GET_SREGS and KVM_SET_SREGS */ | ||
34 | struct kvm_sregs { | ||
35 | __u32 acrs[16]; | ||
36 | __u64 crs[16]; | ||
37 | }; | ||
38 | |||
39 | /* for KVM_GET_FPU and KVM_SET_FPU */ | ||
40 | struct kvm_fpu { | ||
41 | __u32 fpc; | ||
42 | __u64 fprs[16]; | ||
43 | }; | ||
5 | 44 | ||
6 | #endif | 45 | #endif |
diff --git a/include/asm-s390/kvm_host.h b/include/asm-s390/kvm_host.h new file mode 100644 index 000000000000..f8204a4f2e02 --- /dev/null +++ b/include/asm-s390/kvm_host.h | |||
@@ -0,0 +1,234 @@ | |||
1 | /* | ||
2 | * asm-s390/kvm_host.h - definition for kernel virtual machines on s390 | ||
3 | * | ||
4 | * Copyright IBM Corp. 2008 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License (version 2 only) | ||
8 | * as published by the Free Software Foundation. | ||
9 | * | ||
10 | * Author(s): Carsten Otte <cotte@de.ibm.com> | ||
11 | */ | ||
12 | |||
13 | |||
14 | #ifndef ASM_KVM_HOST_H | ||
15 | #define ASM_KVM_HOST_H | ||
16 | #include <linux/kvm_host.h> | ||
17 | #include <asm/debug.h> | ||
18 | |||
19 | #define KVM_MAX_VCPUS 64 | ||
20 | #define KVM_MEMORY_SLOTS 32 | ||
21 | /* memory slots that does not exposed to userspace */ | ||
22 | #define KVM_PRIVATE_MEM_SLOTS 4 | ||
23 | |||
24 | struct kvm_guest_debug { | ||
25 | }; | ||
26 | |||
27 | struct sca_entry { | ||
28 | atomic_t scn; | ||
29 | __u64 reserved; | ||
30 | __u64 sda; | ||
31 | __u64 reserved2[2]; | ||
32 | } __attribute__((packed)); | ||
33 | |||
34 | |||
35 | struct sca_block { | ||
36 | __u64 ipte_control; | ||
37 | __u64 reserved[5]; | ||
38 | __u64 mcn; | ||
39 | __u64 reserved2; | ||
40 | struct sca_entry cpu[64]; | ||
41 | } __attribute__((packed)); | ||
42 | |||
43 | #define KVM_PAGES_PER_HPAGE 256 | ||
44 | |||
45 | #define CPUSTAT_HOST 0x80000000 | ||
46 | #define CPUSTAT_WAIT 0x10000000 | ||
47 | #define CPUSTAT_ECALL_PEND 0x08000000 | ||
48 | #define CPUSTAT_STOP_INT 0x04000000 | ||
49 | #define CPUSTAT_IO_INT 0x02000000 | ||
50 | #define CPUSTAT_EXT_INT 0x01000000 | ||
51 | #define CPUSTAT_RUNNING 0x00800000 | ||
52 | #define CPUSTAT_RETAINED 0x00400000 | ||
53 | #define CPUSTAT_TIMING_SUB 0x00020000 | ||
54 | #define CPUSTAT_SIE_SUB 0x00010000 | ||
55 | #define CPUSTAT_RRF 0x00008000 | ||
56 | #define CPUSTAT_SLSV 0x00004000 | ||
57 | #define CPUSTAT_SLSR 0x00002000 | ||
58 | #define CPUSTAT_ZARCH 0x00000800 | ||
59 | #define CPUSTAT_MCDS 0x00000100 | ||
60 | #define CPUSTAT_SM 0x00000080 | ||
61 | #define CPUSTAT_G 0x00000008 | ||
62 | #define CPUSTAT_J 0x00000002 | ||
63 | #define CPUSTAT_P 0x00000001 | ||
64 | |||
65 | struct sie_block { | ||
66 | atomic_t cpuflags; /* 0x0000 */ | ||
67 | __u32 prefix; /* 0x0004 */ | ||
68 | __u8 reserved8[32]; /* 0x0008 */ | ||
69 | __u64 cputm; /* 0x0028 */ | ||
70 | __u64 ckc; /* 0x0030 */ | ||
71 | __u64 epoch; /* 0x0038 */ | ||
72 | __u8 reserved40[4]; /* 0x0040 */ | ||
73 | #define LCTL_CR0 0x8000 | ||
74 | __u16 lctl; /* 0x0044 */ | ||
75 | __s16 icpua; /* 0x0046 */ | ||
76 | __u32 ictl; /* 0x0048 */ | ||
77 | __u32 eca; /* 0x004c */ | ||
78 | __u8 icptcode; /* 0x0050 */ | ||
79 | __u8 reserved51; /* 0x0051 */ | ||
80 | __u16 ihcpu; /* 0x0052 */ | ||
81 | __u8 reserved54[2]; /* 0x0054 */ | ||
82 | __u16 ipa; /* 0x0056 */ | ||
83 | __u32 ipb; /* 0x0058 */ | ||
84 | __u32 scaoh; /* 0x005c */ | ||
85 | __u8 reserved60; /* 0x0060 */ | ||
86 | __u8 ecb; /* 0x0061 */ | ||
87 | __u8 reserved62[2]; /* 0x0062 */ | ||
88 | __u32 scaol; /* 0x0064 */ | ||
89 | __u8 reserved68[4]; /* 0x0068 */ | ||
90 | __u32 todpr; /* 0x006c */ | ||
91 | __u8 reserved70[16]; /* 0x0070 */ | ||
92 | __u64 gmsor; /* 0x0080 */ | ||
93 | __u64 gmslm; /* 0x0088 */ | ||
94 | psw_t gpsw; /* 0x0090 */ | ||
95 | __u64 gg14; /* 0x00a0 */ | ||
96 | __u64 gg15; /* 0x00a8 */ | ||
97 | __u8 reservedb0[30]; /* 0x00b0 */ | ||
98 | __u16 iprcc; /* 0x00ce */ | ||
99 | __u8 reservedd0[48]; /* 0x00d0 */ | ||
100 | __u64 gcr[16]; /* 0x0100 */ | ||
101 | __u64 gbea; /* 0x0180 */ | ||
102 | __u8 reserved188[120]; /* 0x0188 */ | ||
103 | } __attribute__((packed)); | ||
104 | |||
105 | struct kvm_vcpu_stat { | ||
106 | u32 exit_userspace; | ||
107 | u32 exit_external_request; | ||
108 | u32 exit_external_interrupt; | ||
109 | u32 exit_stop_request; | ||
110 | u32 exit_validity; | ||
111 | u32 exit_instruction; | ||
112 | u32 instruction_lctl; | ||
113 | u32 instruction_lctg; | ||
114 | u32 exit_program_interruption; | ||
115 | u32 exit_instr_and_program; | ||
116 | u32 deliver_emergency_signal; | ||
117 | u32 deliver_service_signal; | ||
118 | u32 deliver_virtio_interrupt; | ||
119 | u32 deliver_stop_signal; | ||
120 | u32 deliver_prefix_signal; | ||
121 | u32 deliver_restart_signal; | ||
122 | u32 deliver_program_int; | ||
123 | u32 exit_wait_state; | ||
124 | u32 instruction_stidp; | ||
125 | u32 instruction_spx; | ||
126 | u32 instruction_stpx; | ||
127 | u32 instruction_stap; | ||
128 | u32 instruction_storage_key; | ||
129 | u32 instruction_stsch; | ||
130 | u32 instruction_chsc; | ||
131 | u32 instruction_stsi; | ||
132 | u32 instruction_stfl; | ||
133 | u32 instruction_sigp_sense; | ||
134 | u32 instruction_sigp_emergency; | ||
135 | u32 instruction_sigp_stop; | ||
136 | u32 instruction_sigp_arch; | ||
137 | u32 instruction_sigp_prefix; | ||
138 | u32 instruction_sigp_restart; | ||
139 | u32 diagnose_44; | ||
140 | }; | ||
141 | |||
142 | struct io_info { | ||
143 | __u16 subchannel_id; /* 0x0b8 */ | ||
144 | __u16 subchannel_nr; /* 0x0ba */ | ||
145 | __u32 io_int_parm; /* 0x0bc */ | ||
146 | __u32 io_int_word; /* 0x0c0 */ | ||
147 | }; | ||
148 | |||
149 | struct ext_info { | ||
150 | __u32 ext_params; | ||
151 | __u64 ext_params2; | ||
152 | }; | ||
153 | |||
154 | #define PGM_OPERATION 0x01 | ||
155 | #define PGM_PRIVILEGED_OPERATION 0x02 | ||
156 | #define PGM_EXECUTE 0x03 | ||
157 | #define PGM_PROTECTION 0x04 | ||
158 | #define PGM_ADDRESSING 0x05 | ||
159 | #define PGM_SPECIFICATION 0x06 | ||
160 | #define PGM_DATA 0x07 | ||
161 | |||
162 | struct pgm_info { | ||
163 | __u16 code; | ||
164 | }; | ||
165 | |||
166 | struct prefix_info { | ||
167 | __u32 address; | ||
168 | }; | ||
169 | |||
170 | struct interrupt_info { | ||
171 | struct list_head list; | ||
172 | u64 type; | ||
173 | union { | ||
174 | struct io_info io; | ||
175 | struct ext_info ext; | ||
176 | struct pgm_info pgm; | ||
177 | struct prefix_info prefix; | ||
178 | }; | ||
179 | }; | ||
180 | |||
181 | /* for local_interrupt.action_flags */ | ||
182 | #define ACTION_STORE_ON_STOP 1 | ||
183 | #define ACTION_STOP_ON_STOP 2 | ||
184 | |||
185 | struct local_interrupt { | ||
186 | spinlock_t lock; | ||
187 | struct list_head list; | ||
188 | atomic_t active; | ||
189 | struct float_interrupt *float_int; | ||
190 | int timer_due; /* event indicator for waitqueue below */ | ||
191 | wait_queue_head_t wq; | ||
192 | atomic_t *cpuflags; | ||
193 | unsigned int action_bits; | ||
194 | }; | ||
195 | |||
196 | struct float_interrupt { | ||
197 | spinlock_t lock; | ||
198 | struct list_head list; | ||
199 | atomic_t active; | ||
200 | int next_rr_cpu; | ||
201 | unsigned long idle_mask [(64 + sizeof(long) - 1) / sizeof(long)]; | ||
202 | struct local_interrupt *local_int[64]; | ||
203 | }; | ||
204 | |||
205 | |||
206 | struct kvm_vcpu_arch { | ||
207 | struct sie_block *sie_block; | ||
208 | unsigned long guest_gprs[16]; | ||
209 | s390_fp_regs host_fpregs; | ||
210 | unsigned int host_acrs[NUM_ACRS]; | ||
211 | s390_fp_regs guest_fpregs; | ||
212 | unsigned int guest_acrs[NUM_ACRS]; | ||
213 | struct local_interrupt local_int; | ||
214 | struct timer_list ckc_timer; | ||
215 | union { | ||
216 | cpuid_t cpu_id; | ||
217 | u64 stidp_data; | ||
218 | }; | ||
219 | }; | ||
220 | |||
221 | struct kvm_vm_stat { | ||
222 | u32 remote_tlb_flush; | ||
223 | }; | ||
224 | |||
225 | struct kvm_arch{ | ||
226 | unsigned long guest_origin; | ||
227 | unsigned long guest_memsize; | ||
228 | struct sca_block *sca; | ||
229 | debug_info_t *dbf; | ||
230 | struct float_interrupt float_int; | ||
231 | }; | ||
232 | |||
233 | extern int sie64a(struct sie_block *, __u64 *); | ||
234 | #endif | ||
diff --git a/include/asm-s390/kvm_para.h b/include/asm-s390/kvm_para.h new file mode 100644 index 000000000000..2c503796b619 --- /dev/null +++ b/include/asm-s390/kvm_para.h | |||
@@ -0,0 +1,150 @@ | |||
1 | /* | ||
2 | * asm-s390/kvm_para.h - definition for paravirtual devices on s390 | ||
3 | * | ||
4 | * Copyright IBM Corp. 2008 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License (version 2 only) | ||
8 | * as published by the Free Software Foundation. | ||
9 | * | ||
10 | * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> | ||
11 | */ | ||
12 | |||
13 | #ifndef __S390_KVM_PARA_H | ||
14 | #define __S390_KVM_PARA_H | ||
15 | |||
16 | /* | ||
17 | * Hypercalls for KVM on s390. The calling convention is similar to the | ||
18 | * s390 ABI, so we use R2-R6 for parameters 1-5. In addition we use R1 | ||
19 | * as hypercall number and R7 as parameter 6. The return value is | ||
20 | * written to R2. We use the diagnose instruction as hypercall. To avoid | ||
21 | * conflicts with existing diagnoses for LPAR and z/VM, we do not use | ||
22 | * the instruction encoded number, but specify the number in R1 and | ||
23 | * use 0x500 as KVM hypercall | ||
24 | * | ||
25 | * Copyright IBM Corp. 2007,2008 | ||
26 | * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> | ||
27 | * | ||
28 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
29 | */ | ||
30 | |||
31 | static inline long kvm_hypercall0(unsigned long nr) | ||
32 | { | ||
33 | register unsigned long __nr asm("1") = nr; | ||
34 | register long __rc asm("2"); | ||
35 | |||
36 | asm volatile ("diag 2,4,0x500\n" | ||
37 | : "=d" (__rc) : "d" (__nr): "memory", "cc"); | ||
38 | return __rc; | ||
39 | } | ||
40 | |||
41 | static inline long kvm_hypercall1(unsigned long nr, unsigned long p1) | ||
42 | { | ||
43 | register unsigned long __nr asm("1") = nr; | ||
44 | register unsigned long __p1 asm("2") = p1; | ||
45 | register long __rc asm("2"); | ||
46 | |||
47 | asm volatile ("diag 2,4,0x500\n" | ||
48 | : "=d" (__rc) : "d" (__nr), "0" (__p1) : "memory", "cc"); | ||
49 | return __rc; | ||
50 | } | ||
51 | |||
52 | static inline long kvm_hypercall2(unsigned long nr, unsigned long p1, | ||
53 | unsigned long p2) | ||
54 | { | ||
55 | register unsigned long __nr asm("1") = nr; | ||
56 | register unsigned long __p1 asm("2") = p1; | ||
57 | register unsigned long __p2 asm("3") = p2; | ||
58 | register long __rc asm("2"); | ||
59 | |||
60 | asm volatile ("diag 2,4,0x500\n" | ||
61 | : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2) | ||
62 | : "memory", "cc"); | ||
63 | return __rc; | ||
64 | } | ||
65 | |||
66 | static inline long kvm_hypercall3(unsigned long nr, unsigned long p1, | ||
67 | unsigned long p2, unsigned long p3) | ||
68 | { | ||
69 | register unsigned long __nr asm("1") = nr; | ||
70 | register unsigned long __p1 asm("2") = p1; | ||
71 | register unsigned long __p2 asm("3") = p2; | ||
72 | register unsigned long __p3 asm("4") = p3; | ||
73 | register long __rc asm("2"); | ||
74 | |||
75 | asm volatile ("diag 2,4,0x500\n" | ||
76 | : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2), | ||
77 | "d" (__p3) : "memory", "cc"); | ||
78 | return __rc; | ||
79 | } | ||
80 | |||
81 | |||
82 | static inline long kvm_hypercall4(unsigned long nr, unsigned long p1, | ||
83 | unsigned long p2, unsigned long p3, | ||
84 | unsigned long p4) | ||
85 | { | ||
86 | register unsigned long __nr asm("1") = nr; | ||
87 | register unsigned long __p1 asm("2") = p1; | ||
88 | register unsigned long __p2 asm("3") = p2; | ||
89 | register unsigned long __p3 asm("4") = p3; | ||
90 | register unsigned long __p4 asm("5") = p4; | ||
91 | register long __rc asm("2"); | ||
92 | |||
93 | asm volatile ("diag 2,4,0x500\n" | ||
94 | : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2), | ||
95 | "d" (__p3), "d" (__p4) : "memory", "cc"); | ||
96 | return __rc; | ||
97 | } | ||
98 | |||
99 | static inline long kvm_hypercall5(unsigned long nr, unsigned long p1, | ||
100 | unsigned long p2, unsigned long p3, | ||
101 | unsigned long p4, unsigned long p5) | ||
102 | { | ||
103 | register unsigned long __nr asm("1") = nr; | ||
104 | register unsigned long __p1 asm("2") = p1; | ||
105 | register unsigned long __p2 asm("3") = p2; | ||
106 | register unsigned long __p3 asm("4") = p3; | ||
107 | register unsigned long __p4 asm("5") = p4; | ||
108 | register unsigned long __p5 asm("6") = p5; | ||
109 | register long __rc asm("2"); | ||
110 | |||
111 | asm volatile ("diag 2,4,0x500\n" | ||
112 | : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2), | ||
113 | "d" (__p3), "d" (__p4), "d" (__p5) : "memory", "cc"); | ||
114 | return __rc; | ||
115 | } | ||
116 | |||
117 | static inline long kvm_hypercall6(unsigned long nr, unsigned long p1, | ||
118 | unsigned long p2, unsigned long p3, | ||
119 | unsigned long p4, unsigned long p5, | ||
120 | unsigned long p6) | ||
121 | { | ||
122 | register unsigned long __nr asm("1") = nr; | ||
123 | register unsigned long __p1 asm("2") = p1; | ||
124 | register unsigned long __p2 asm("3") = p2; | ||
125 | register unsigned long __p3 asm("4") = p3; | ||
126 | register unsigned long __p4 asm("5") = p4; | ||
127 | register unsigned long __p5 asm("6") = p5; | ||
128 | register unsigned long __p6 asm("7") = p6; | ||
129 | register long __rc asm("2"); | ||
130 | |||
131 | asm volatile ("diag 2,4,0x500\n" | ||
132 | : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2), | ||
133 | "d" (__p3), "d" (__p4), "d" (__p5), "d" (__p6) | ||
134 | : "memory", "cc"); | ||
135 | return __rc; | ||
136 | } | ||
137 | |||
138 | /* kvm on s390 is always paravirtualization enabled */ | ||
139 | static inline int kvm_para_available(void) | ||
140 | { | ||
141 | return 1; | ||
142 | } | ||
143 | |||
144 | /* No feature bits are currently assigned for kvm on s390 */ | ||
145 | static inline unsigned int kvm_arch_para_features(void) | ||
146 | { | ||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | #endif /* __S390_KVM_PARA_H */ | ||
diff --git a/include/asm-s390/kvm_virtio.h b/include/asm-s390/kvm_virtio.h new file mode 100644 index 000000000000..5c871a990c29 --- /dev/null +++ b/include/asm-s390/kvm_virtio.h | |||
@@ -0,0 +1,53 @@ | |||
1 | /* | ||
2 | * kvm_virtio.h - definition for virtio for kvm on s390 | ||
3 | * | ||
4 | * Copyright IBM Corp. 2008 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License (version 2 only) | ||
8 | * as published by the Free Software Foundation. | ||
9 | * | ||
10 | * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> | ||
11 | */ | ||
12 | |||
13 | #ifndef __KVM_S390_VIRTIO_H | ||
14 | #define __KVM_S390_VIRTIO_H | ||
15 | |||
16 | #include <linux/types.h> | ||
17 | |||
18 | struct kvm_device_desc { | ||
19 | /* The device type: console, network, disk etc. Type 0 terminates. */ | ||
20 | __u8 type; | ||
21 | /* The number of virtqueues (first in config array) */ | ||
22 | __u8 num_vq; | ||
23 | /* | ||
24 | * The number of bytes of feature bits. Multiply by 2: one for host | ||
25 | * features and one for guest acknowledgements. | ||
26 | */ | ||
27 | __u8 feature_len; | ||
28 | /* The number of bytes of the config array after virtqueues. */ | ||
29 | __u8 config_len; | ||
30 | /* A status byte, written by the Guest. */ | ||
31 | __u8 status; | ||
32 | __u8 config[0]; | ||
33 | }; | ||
34 | |||
35 | /* | ||
36 | * This is how we expect the device configuration field for a virtqueue | ||
37 | * to be laid out in config space. | ||
38 | */ | ||
39 | struct kvm_vqconfig { | ||
40 | /* The token returned with an interrupt. Set by the guest */ | ||
41 | __u64 token; | ||
42 | /* The address of the virtio ring */ | ||
43 | __u64 address; | ||
44 | /* The number of entries in the virtio_ring */ | ||
45 | __u16 num; | ||
46 | |||
47 | }; | ||
48 | |||
49 | #define KVM_S390_VIRTIO_NOTIFY 0 | ||
50 | #define KVM_S390_VIRTIO_RESET 1 | ||
51 | #define KVM_S390_VIRTIO_SET_STATUS 2 | ||
52 | |||
53 | #endif | ||
diff --git a/include/asm-s390/lowcore.h b/include/asm-s390/lowcore.h index 5de3efb31445..0bc51d52a899 100644 --- a/include/asm-s390/lowcore.h +++ b/include/asm-s390/lowcore.h | |||
@@ -381,27 +381,32 @@ struct _lowcore | |||
381 | /* whether the kernel died with panic() or not */ | 381 | /* whether the kernel died with panic() or not */ |
382 | __u32 panic_magic; /* 0xe00 */ | 382 | __u32 panic_magic; /* 0xe00 */ |
383 | 383 | ||
384 | __u8 pad13[0x1200-0xe04]; /* 0xe04 */ | 384 | __u8 pad13[0x11b8-0xe04]; /* 0xe04 */ |
385 | |||
386 | /* 64 bit extparam used for pfault, diag 250 etc */ | ||
387 | __u64 ext_params2; /* 0x11B8 */ | ||
388 | |||
389 | __u8 pad14[0x1200-0x11C0]; /* 0x11C0 */ | ||
385 | 390 | ||
386 | /* System info area */ | 391 | /* System info area */ |
387 | 392 | ||
388 | __u64 floating_pt_save_area[16]; /* 0x1200 */ | 393 | __u64 floating_pt_save_area[16]; /* 0x1200 */ |
389 | __u64 gpregs_save_area[16]; /* 0x1280 */ | 394 | __u64 gpregs_save_area[16]; /* 0x1280 */ |
390 | __u32 st_status_fixed_logout[4]; /* 0x1300 */ | 395 | __u32 st_status_fixed_logout[4]; /* 0x1300 */ |
391 | __u8 pad14[0x1318-0x1310]; /* 0x1310 */ | 396 | __u8 pad15[0x1318-0x1310]; /* 0x1310 */ |
392 | __u32 prefixreg_save_area; /* 0x1318 */ | 397 | __u32 prefixreg_save_area; /* 0x1318 */ |
393 | __u32 fpt_creg_save_area; /* 0x131c */ | 398 | __u32 fpt_creg_save_area; /* 0x131c */ |
394 | __u8 pad15[0x1324-0x1320]; /* 0x1320 */ | 399 | __u8 pad16[0x1324-0x1320]; /* 0x1320 */ |
395 | __u32 tod_progreg_save_area; /* 0x1324 */ | 400 | __u32 tod_progreg_save_area; /* 0x1324 */ |
396 | __u32 cpu_timer_save_area[2]; /* 0x1328 */ | 401 | __u32 cpu_timer_save_area[2]; /* 0x1328 */ |
397 | __u32 clock_comp_save_area[2]; /* 0x1330 */ | 402 | __u32 clock_comp_save_area[2]; /* 0x1330 */ |
398 | __u8 pad16[0x1340-0x1338]; /* 0x1338 */ | 403 | __u8 pad17[0x1340-0x1338]; /* 0x1338 */ |
399 | __u32 access_regs_save_area[16]; /* 0x1340 */ | 404 | __u32 access_regs_save_area[16]; /* 0x1340 */ |
400 | __u64 cregs_save_area[16]; /* 0x1380 */ | 405 | __u64 cregs_save_area[16]; /* 0x1380 */ |
401 | 406 | ||
402 | /* align to the top of the prefix area */ | 407 | /* align to the top of the prefix area */ |
403 | 408 | ||
404 | __u8 pad17[0x2000-0x1400]; /* 0x1400 */ | 409 | __u8 pad18[0x2000-0x1400]; /* 0x1400 */ |
405 | #endif /* !__s390x__ */ | 410 | #endif /* !__s390x__ */ |
406 | } __attribute__((packed)); /* End structure*/ | 411 | } __attribute__((packed)); /* End structure*/ |
407 | 412 | ||
diff --git a/include/asm-s390/mmu.h b/include/asm-s390/mmu.h index 1698e29c5b20..5dd5e7b3476f 100644 --- a/include/asm-s390/mmu.h +++ b/include/asm-s390/mmu.h | |||
@@ -7,6 +7,7 @@ typedef struct { | |||
7 | unsigned long asce_bits; | 7 | unsigned long asce_bits; |
8 | unsigned long asce_limit; | 8 | unsigned long asce_limit; |
9 | int noexec; | 9 | int noexec; |
10 | int pgstes; | ||
10 | } mm_context_t; | 11 | } mm_context_t; |
11 | 12 | ||
12 | #endif | 13 | #endif |
diff --git a/include/asm-s390/mmu_context.h b/include/asm-s390/mmu_context.h index b5a34c6f91a9..4c2fbf48c9c4 100644 --- a/include/asm-s390/mmu_context.h +++ b/include/asm-s390/mmu_context.h | |||
@@ -20,7 +20,13 @@ static inline int init_new_context(struct task_struct *tsk, | |||
20 | #ifdef CONFIG_64BIT | 20 | #ifdef CONFIG_64BIT |
21 | mm->context.asce_bits |= _ASCE_TYPE_REGION3; | 21 | mm->context.asce_bits |= _ASCE_TYPE_REGION3; |
22 | #endif | 22 | #endif |
23 | mm->context.noexec = s390_noexec; | 23 | if (current->mm->context.pgstes) { |
24 | mm->context.noexec = 0; | ||
25 | mm->context.pgstes = 1; | ||
26 | } else { | ||
27 | mm->context.noexec = s390_noexec; | ||
28 | mm->context.pgstes = 0; | ||
29 | } | ||
24 | mm->context.asce_limit = STACK_TOP_MAX; | 30 | mm->context.asce_limit = STACK_TOP_MAX; |
25 | crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); | 31 | crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); |
26 | return 0; | 32 | return 0; |
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index 65154dc9a9e5..4c0698c0dda5 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h | |||
@@ -30,6 +30,7 @@ | |||
30 | */ | 30 | */ |
31 | #ifndef __ASSEMBLY__ | 31 | #ifndef __ASSEMBLY__ |
32 | #include <linux/mm_types.h> | 32 | #include <linux/mm_types.h> |
33 | #include <asm/bitops.h> | ||
33 | #include <asm/bug.h> | 34 | #include <asm/bug.h> |
34 | #include <asm/processor.h> | 35 | #include <asm/processor.h> |
35 | 36 | ||
@@ -258,6 +259,13 @@ extern char empty_zero_page[PAGE_SIZE]; | |||
258 | * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid. | 259 | * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid. |
259 | */ | 260 | */ |
260 | 261 | ||
262 | /* Page status table bits for virtualization */ | ||
263 | #define RCP_PCL_BIT 55 | ||
264 | #define RCP_HR_BIT 54 | ||
265 | #define RCP_HC_BIT 53 | ||
266 | #define RCP_GR_BIT 50 | ||
267 | #define RCP_GC_BIT 49 | ||
268 | |||
261 | #ifndef __s390x__ | 269 | #ifndef __s390x__ |
262 | 270 | ||
263 | /* Bits in the segment table address-space-control-element */ | 271 | /* Bits in the segment table address-space-control-element */ |
@@ -513,6 +521,48 @@ static inline int pte_file(pte_t pte) | |||
513 | #define __HAVE_ARCH_PTE_SAME | 521 | #define __HAVE_ARCH_PTE_SAME |
514 | #define pte_same(a,b) (pte_val(a) == pte_val(b)) | 522 | #define pte_same(a,b) (pte_val(a) == pte_val(b)) |
515 | 523 | ||
524 | static inline void rcp_lock(pte_t *ptep) | ||
525 | { | ||
526 | #ifdef CONFIG_PGSTE | ||
527 | unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); | ||
528 | preempt_disable(); | ||
529 | while (test_and_set_bit(RCP_PCL_BIT, pgste)) | ||
530 | ; | ||
531 | #endif | ||
532 | } | ||
533 | |||
534 | static inline void rcp_unlock(pte_t *ptep) | ||
535 | { | ||
536 | #ifdef CONFIG_PGSTE | ||
537 | unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); | ||
538 | clear_bit(RCP_PCL_BIT, pgste); | ||
539 | preempt_enable(); | ||
540 | #endif | ||
541 | } | ||
542 | |||
543 | /* forward declaration for SetPageUptodate in page-flags.h*/ | ||
544 | static inline void page_clear_dirty(struct page *page); | ||
545 | #include <linux/page-flags.h> | ||
546 | |||
547 | static inline void ptep_rcp_copy(pte_t *ptep) | ||
548 | { | ||
549 | #ifdef CONFIG_PGSTE | ||
550 | struct page *page = virt_to_page(pte_val(*ptep)); | ||
551 | unsigned int skey; | ||
552 | unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); | ||
553 | |||
554 | skey = page_get_storage_key(page_to_phys(page)); | ||
555 | if (skey & _PAGE_CHANGED) | ||
556 | set_bit_simple(RCP_GC_BIT, pgste); | ||
557 | if (skey & _PAGE_REFERENCED) | ||
558 | set_bit_simple(RCP_GR_BIT, pgste); | ||
559 | if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) | ||
560 | SetPageDirty(page); | ||
561 | if (test_and_clear_bit_simple(RCP_HR_BIT, pgste)) | ||
562 | SetPageReferenced(page); | ||
563 | #endif | ||
564 | } | ||
565 | |||
516 | /* | 566 | /* |
517 | * query functions pte_write/pte_dirty/pte_young only work if | 567 | * query functions pte_write/pte_dirty/pte_young only work if |
518 | * pte_present() is true. Undefined behaviour if not.. | 568 | * pte_present() is true. Undefined behaviour if not.. |
@@ -599,6 +649,8 @@ static inline void pmd_clear(pmd_t *pmd) | |||
599 | 649 | ||
600 | static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 650 | static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
601 | { | 651 | { |
652 | if (mm->context.pgstes) | ||
653 | ptep_rcp_copy(ptep); | ||
602 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; | 654 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; |
603 | if (mm->context.noexec) | 655 | if (mm->context.noexec) |
604 | pte_val(ptep[PTRS_PER_PTE]) = _PAGE_TYPE_EMPTY; | 656 | pte_val(ptep[PTRS_PER_PTE]) = _PAGE_TYPE_EMPTY; |
@@ -667,6 +719,24 @@ static inline pte_t pte_mkyoung(pte_t pte) | |||
667 | static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, | 719 | static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, |
668 | unsigned long addr, pte_t *ptep) | 720 | unsigned long addr, pte_t *ptep) |
669 | { | 721 | { |
722 | #ifdef CONFIG_PGSTE | ||
723 | unsigned long physpage; | ||
724 | int young; | ||
725 | unsigned long *pgste; | ||
726 | |||
727 | if (!vma->vm_mm->context.pgstes) | ||
728 | return 0; | ||
729 | physpage = pte_val(*ptep) & PAGE_MASK; | ||
730 | pgste = (unsigned long *) (ptep + PTRS_PER_PTE); | ||
731 | |||
732 | young = ((page_get_storage_key(physpage) & _PAGE_REFERENCED) != 0); | ||
733 | rcp_lock(ptep); | ||
734 | if (young) | ||
735 | set_bit_simple(RCP_GR_BIT, pgste); | ||
736 | young |= test_and_clear_bit_simple(RCP_HR_BIT, pgste); | ||
737 | rcp_unlock(ptep); | ||
738 | return young; | ||
739 | #endif | ||
670 | return 0; | 740 | return 0; |
671 | } | 741 | } |
672 | 742 | ||
@@ -674,7 +744,13 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, | |||
674 | static inline int ptep_clear_flush_young(struct vm_area_struct *vma, | 744 | static inline int ptep_clear_flush_young(struct vm_area_struct *vma, |
675 | unsigned long address, pte_t *ptep) | 745 | unsigned long address, pte_t *ptep) |
676 | { | 746 | { |
677 | /* No need to flush TLB; bits are in storage key */ | 747 | /* No need to flush TLB |
748 | * On s390 reference bits are in storage key and never in TLB | ||
749 | * With virtualization we handle the reference bit, without we | ||
750 | * we can simply return */ | ||
751 | #ifdef CONFIG_PGSTE | ||
752 | return ptep_test_and_clear_young(vma, address, ptep); | ||
753 | #endif | ||
678 | return 0; | 754 | return 0; |
679 | } | 755 | } |
680 | 756 | ||
@@ -693,15 +769,25 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep) | |||
693 | : "=m" (*ptep) : "m" (*ptep), | 769 | : "=m" (*ptep) : "m" (*ptep), |
694 | "a" (pto), "a" (address)); | 770 | "a" (pto), "a" (address)); |
695 | } | 771 | } |
696 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; | ||
697 | } | 772 | } |
698 | 773 | ||
699 | static inline void ptep_invalidate(struct mm_struct *mm, | 774 | static inline void ptep_invalidate(struct mm_struct *mm, |
700 | unsigned long address, pte_t *ptep) | 775 | unsigned long address, pte_t *ptep) |
701 | { | 776 | { |
777 | if (mm->context.pgstes) { | ||
778 | rcp_lock(ptep); | ||
779 | __ptep_ipte(address, ptep); | ||
780 | ptep_rcp_copy(ptep); | ||
781 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; | ||
782 | rcp_unlock(ptep); | ||
783 | return; | ||
784 | } | ||
702 | __ptep_ipte(address, ptep); | 785 | __ptep_ipte(address, ptep); |
703 | if (mm->context.noexec) | 786 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; |
787 | if (mm->context.noexec) { | ||
704 | __ptep_ipte(address, ptep + PTRS_PER_PTE); | 788 | __ptep_ipte(address, ptep + PTRS_PER_PTE); |
789 | pte_val(*(ptep + PTRS_PER_PTE)) = _PAGE_TYPE_EMPTY; | ||
790 | } | ||
705 | } | 791 | } |
706 | 792 | ||
707 | /* | 793 | /* |
@@ -966,6 +1052,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) | |||
966 | 1052 | ||
967 | extern int add_shared_memory(unsigned long start, unsigned long size); | 1053 | extern int add_shared_memory(unsigned long start, unsigned long size); |
968 | extern int remove_shared_memory(unsigned long start, unsigned long size); | 1054 | extern int remove_shared_memory(unsigned long start, unsigned long size); |
1055 | extern int s390_enable_sie(void); | ||
969 | 1056 | ||
970 | /* | 1057 | /* |
971 | * No page table caches to initialise | 1058 | * No page table caches to initialise |
diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h index a76a6b8fd887..aaf4b518b940 100644 --- a/include/asm-s390/setup.h +++ b/include/asm-s390/setup.h | |||
@@ -62,6 +62,7 @@ extern unsigned long machine_flags; | |||
62 | #define MACHINE_IS_VM (machine_flags & 1) | 62 | #define MACHINE_IS_VM (machine_flags & 1) |
63 | #define MACHINE_IS_P390 (machine_flags & 4) | 63 | #define MACHINE_IS_P390 (machine_flags & 4) |
64 | #define MACHINE_HAS_MVPG (machine_flags & 16) | 64 | #define MACHINE_HAS_MVPG (machine_flags & 16) |
65 | #define MACHINE_IS_KVM (machine_flags & 64) | ||
65 | #define MACHINE_HAS_IDTE (machine_flags & 128) | 66 | #define MACHINE_HAS_IDTE (machine_flags & 128) |
66 | #define MACHINE_HAS_DIAG9C (machine_flags & 256) | 67 | #define MACHINE_HAS_DIAG9C (machine_flags & 256) |
67 | 68 | ||
diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h index 7a71120426a3..80eefef2cc76 100644 --- a/include/asm-x86/kvm.h +++ b/include/asm-x86/kvm.h | |||
@@ -188,4 +188,45 @@ struct kvm_cpuid2 { | |||
188 | struct kvm_cpuid_entry2 entries[0]; | 188 | struct kvm_cpuid_entry2 entries[0]; |
189 | }; | 189 | }; |
190 | 190 | ||
191 | /* for KVM_GET_PIT and KVM_SET_PIT */ | ||
192 | struct kvm_pit_channel_state { | ||
193 | __u32 count; /* can be 65536 */ | ||
194 | __u16 latched_count; | ||
195 | __u8 count_latched; | ||
196 | __u8 status_latched; | ||
197 | __u8 status; | ||
198 | __u8 read_state; | ||
199 | __u8 write_state; | ||
200 | __u8 write_latch; | ||
201 | __u8 rw_mode; | ||
202 | __u8 mode; | ||
203 | __u8 bcd; | ||
204 | __u8 gate; | ||
205 | __s64 count_load_time; | ||
206 | }; | ||
207 | |||
208 | struct kvm_pit_state { | ||
209 | struct kvm_pit_channel_state channels[3]; | ||
210 | }; | ||
211 | |||
212 | #define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) | ||
213 | #define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) | ||
214 | #define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) | ||
215 | #define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) | ||
216 | #define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) | ||
217 | #define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) | ||
218 | #define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) | ||
219 | #define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) | ||
220 | #define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) | ||
221 | #define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) | ||
222 | #define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) | ||
223 | #define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) | ||
224 | #define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) | ||
225 | #define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) | ||
226 | #define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) | ||
227 | #define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) | ||
228 | #define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) | ||
229 | #define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) | ||
230 | #define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) | ||
231 | |||
191 | #endif | 232 | #endif |
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 68ee390b2844..9d963cd6533c 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h | |||
@@ -20,6 +20,13 @@ | |||
20 | 20 | ||
21 | #include <asm/desc.h> | 21 | #include <asm/desc.h> |
22 | 22 | ||
23 | #define KVM_MAX_VCPUS 16 | ||
24 | #define KVM_MEMORY_SLOTS 32 | ||
25 | /* memory slots that does not exposed to userspace */ | ||
26 | #define KVM_PRIVATE_MEM_SLOTS 4 | ||
27 | |||
28 | #define KVM_PIO_PAGE_OFFSET 1 | ||
29 | |||
23 | #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) | 30 | #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) |
24 | #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) | 31 | #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) |
25 | #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ | 32 | #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ |
@@ -39,6 +46,13 @@ | |||
39 | #define INVALID_PAGE (~(hpa_t)0) | 46 | #define INVALID_PAGE (~(hpa_t)0) |
40 | #define UNMAPPED_GVA (~(gpa_t)0) | 47 | #define UNMAPPED_GVA (~(gpa_t)0) |
41 | 48 | ||
49 | /* shadow tables are PAE even on non-PAE hosts */ | ||
50 | #define KVM_HPAGE_SHIFT 21 | ||
51 | #define KVM_HPAGE_SIZE (1UL << KVM_HPAGE_SHIFT) | ||
52 | #define KVM_HPAGE_MASK (~(KVM_HPAGE_SIZE - 1)) | ||
53 | |||
54 | #define KVM_PAGES_PER_HPAGE (KVM_HPAGE_SIZE / PAGE_SIZE) | ||
55 | |||
42 | #define DE_VECTOR 0 | 56 | #define DE_VECTOR 0 |
43 | #define UD_VECTOR 6 | 57 | #define UD_VECTOR 6 |
44 | #define NM_VECTOR 7 | 58 | #define NM_VECTOR 7 |
@@ -48,6 +62,7 @@ | |||
48 | #define SS_VECTOR 12 | 62 | #define SS_VECTOR 12 |
49 | #define GP_VECTOR 13 | 63 | #define GP_VECTOR 13 |
50 | #define PF_VECTOR 14 | 64 | #define PF_VECTOR 14 |
65 | #define MC_VECTOR 18 | ||
51 | 66 | ||
52 | #define SELECTOR_TI_MASK (1 << 2) | 67 | #define SELECTOR_TI_MASK (1 << 2) |
53 | #define SELECTOR_RPL_MASK 0x03 | 68 | #define SELECTOR_RPL_MASK 0x03 |
@@ -58,7 +73,8 @@ | |||
58 | 73 | ||
59 | #define KVM_PERMILLE_MMU_PAGES 20 | 74 | #define KVM_PERMILLE_MMU_PAGES 20 |
60 | #define KVM_MIN_ALLOC_MMU_PAGES 64 | 75 | #define KVM_MIN_ALLOC_MMU_PAGES 64 |
61 | #define KVM_NUM_MMU_PAGES 1024 | 76 | #define KVM_MMU_HASH_SHIFT 10 |
77 | #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) | ||
62 | #define KVM_MIN_FREE_MMU_PAGES 5 | 78 | #define KVM_MIN_FREE_MMU_PAGES 5 |
63 | #define KVM_REFILL_PAGES 25 | 79 | #define KVM_REFILL_PAGES 25 |
64 | #define KVM_MAX_CPUID_ENTRIES 40 | 80 | #define KVM_MAX_CPUID_ENTRIES 40 |
@@ -106,6 +122,12 @@ enum { | |||
106 | 122 | ||
107 | #define KVM_NR_MEM_OBJS 40 | 123 | #define KVM_NR_MEM_OBJS 40 |
108 | 124 | ||
125 | struct kvm_guest_debug { | ||
126 | int enabled; | ||
127 | unsigned long bp[4]; | ||
128 | int singlestep; | ||
129 | }; | ||
130 | |||
109 | /* | 131 | /* |
110 | * We don't want allocation failures within the mmu code, so we preallocate | 132 | * We don't want allocation failures within the mmu code, so we preallocate |
111 | * enough memory for a single page fault in a cache. | 133 | * enough memory for a single page fault in a cache. |
@@ -140,6 +162,7 @@ union kvm_mmu_page_role { | |||
140 | unsigned pad_for_nice_hex_output:6; | 162 | unsigned pad_for_nice_hex_output:6; |
141 | unsigned metaphysical:1; | 163 | unsigned metaphysical:1; |
142 | unsigned access:3; | 164 | unsigned access:3; |
165 | unsigned invalid:1; | ||
143 | }; | 166 | }; |
144 | }; | 167 | }; |
145 | 168 | ||
@@ -204,11 +227,6 @@ struct kvm_vcpu_arch { | |||
204 | u64 shadow_efer; | 227 | u64 shadow_efer; |
205 | u64 apic_base; | 228 | u64 apic_base; |
206 | struct kvm_lapic *apic; /* kernel irqchip context */ | 229 | struct kvm_lapic *apic; /* kernel irqchip context */ |
207 | #define VCPU_MP_STATE_RUNNABLE 0 | ||
208 | #define VCPU_MP_STATE_UNINITIALIZED 1 | ||
209 | #define VCPU_MP_STATE_INIT_RECEIVED 2 | ||
210 | #define VCPU_MP_STATE_SIPI_RECEIVED 3 | ||
211 | #define VCPU_MP_STATE_HALTED 4 | ||
212 | int mp_state; | 230 | int mp_state; |
213 | int sipi_vector; | 231 | int sipi_vector; |
214 | u64 ia32_misc_enable_msr; | 232 | u64 ia32_misc_enable_msr; |
@@ -226,8 +244,9 @@ struct kvm_vcpu_arch { | |||
226 | u64 *last_pte_updated; | 244 | u64 *last_pte_updated; |
227 | 245 | ||
228 | struct { | 246 | struct { |
229 | gfn_t gfn; /* presumed gfn during guest pte update */ | 247 | gfn_t gfn; /* presumed gfn during guest pte update */ |
230 | struct page *page; /* page corresponding to that gfn */ | 248 | pfn_t pfn; /* pfn corresponding to that gfn */ |
249 | int largepage; | ||
231 | } update_pte; | 250 | } update_pte; |
232 | 251 | ||
233 | struct i387_fxsave_struct host_fx_image; | 252 | struct i387_fxsave_struct host_fx_image; |
@@ -261,6 +280,11 @@ struct kvm_vcpu_arch { | |||
261 | /* emulate context */ | 280 | /* emulate context */ |
262 | 281 | ||
263 | struct x86_emulate_ctxt emulate_ctxt; | 282 | struct x86_emulate_ctxt emulate_ctxt; |
283 | |||
284 | gpa_t time; | ||
285 | struct kvm_vcpu_time_info hv_clock; | ||
286 | unsigned int time_offset; | ||
287 | struct page *time_page; | ||
264 | }; | 288 | }; |
265 | 289 | ||
266 | struct kvm_mem_alias { | 290 | struct kvm_mem_alias { |
@@ -283,10 +307,13 @@ struct kvm_arch{ | |||
283 | struct list_head active_mmu_pages; | 307 | struct list_head active_mmu_pages; |
284 | struct kvm_pic *vpic; | 308 | struct kvm_pic *vpic; |
285 | struct kvm_ioapic *vioapic; | 309 | struct kvm_ioapic *vioapic; |
310 | struct kvm_pit *vpit; | ||
286 | 311 | ||
287 | int round_robin_prev_vcpu; | 312 | int round_robin_prev_vcpu; |
288 | unsigned int tss_addr; | 313 | unsigned int tss_addr; |
289 | struct page *apic_access_page; | 314 | struct page *apic_access_page; |
315 | |||
316 | gpa_t wall_clock; | ||
290 | }; | 317 | }; |
291 | 318 | ||
292 | struct kvm_vm_stat { | 319 | struct kvm_vm_stat { |
@@ -298,6 +325,7 @@ struct kvm_vm_stat { | |||
298 | u32 mmu_recycled; | 325 | u32 mmu_recycled; |
299 | u32 mmu_cache_miss; | 326 | u32 mmu_cache_miss; |
300 | u32 remote_tlb_flush; | 327 | u32 remote_tlb_flush; |
328 | u32 lpages; | ||
301 | }; | 329 | }; |
302 | 330 | ||
303 | struct kvm_vcpu_stat { | 331 | struct kvm_vcpu_stat { |
@@ -320,6 +348,7 @@ struct kvm_vcpu_stat { | |||
320 | u32 fpu_reload; | 348 | u32 fpu_reload; |
321 | u32 insn_emulation; | 349 | u32 insn_emulation; |
322 | u32 insn_emulation_fail; | 350 | u32 insn_emulation_fail; |
351 | u32 hypercalls; | ||
323 | }; | 352 | }; |
324 | 353 | ||
325 | struct descriptor_table { | 354 | struct descriptor_table { |
@@ -355,6 +384,7 @@ struct kvm_x86_ops { | |||
355 | u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); | 384 | u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); |
356 | void (*get_segment)(struct kvm_vcpu *vcpu, | 385 | void (*get_segment)(struct kvm_vcpu *vcpu, |
357 | struct kvm_segment *var, int seg); | 386 | struct kvm_segment *var, int seg); |
387 | int (*get_cpl)(struct kvm_vcpu *vcpu); | ||
358 | void (*set_segment)(struct kvm_vcpu *vcpu, | 388 | void (*set_segment)(struct kvm_vcpu *vcpu, |
359 | struct kvm_segment *var, int seg); | 389 | struct kvm_segment *var, int seg); |
360 | void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); | 390 | void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); |
@@ -410,6 +440,15 @@ void kvm_mmu_zap_all(struct kvm *kvm); | |||
410 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); | 440 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); |
411 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); | 441 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); |
412 | 442 | ||
443 | int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); | ||
444 | |||
445 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | ||
446 | const void *val, int bytes); | ||
447 | int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, | ||
448 | gpa_t addr, unsigned long *ret); | ||
449 | |||
450 | extern bool tdp_enabled; | ||
451 | |||
413 | enum emulation_result { | 452 | enum emulation_result { |
414 | EMULATE_DONE, /* no further processing */ | 453 | EMULATE_DONE, /* no further processing */ |
415 | EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ | 454 | EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ |
@@ -429,6 +468,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | |||
429 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr); | 468 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr); |
430 | void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, | 469 | void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, |
431 | unsigned long *rflags); | 470 | unsigned long *rflags); |
471 | void kvm_enable_efer_bits(u64); | ||
432 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); | 472 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); |
433 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | 473 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); |
434 | 474 | ||
@@ -448,12 +488,14 @@ int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, | |||
448 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, | 488 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, |
449 | unsigned long value); | 489 | unsigned long value); |
450 | 490 | ||
451 | void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); | 491 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); |
452 | void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0); | 492 | |
453 | void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0); | 493 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); |
454 | void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0); | 494 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); |
455 | unsigned long get_cr8(struct kvm_vcpu *vcpu); | 495 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); |
456 | void lmsw(struct kvm_vcpu *vcpu, unsigned long msw); | 496 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); |
497 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); | ||
498 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); | ||
457 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); | 499 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); |
458 | 500 | ||
459 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); | 501 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); |
@@ -491,6 +533,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu); | |||
491 | 533 | ||
492 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code); | 534 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code); |
493 | 535 | ||
536 | void kvm_enable_tdp(void); | ||
537 | |||
494 | int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); | 538 | int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); |
495 | int complete_pio(struct kvm_vcpu *vcpu); | 539 | int complete_pio(struct kvm_vcpu *vcpu); |
496 | 540 | ||
@@ -600,6 +644,7 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) | |||
600 | #define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" | 644 | #define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" |
601 | #define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" | 645 | #define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" |
602 | #define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" | 646 | #define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" |
647 | #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" | ||
603 | 648 | ||
604 | #define MSR_IA32_TIME_STAMP_COUNTER 0x010 | 649 | #define MSR_IA32_TIME_STAMP_COUNTER 0x010 |
605 | 650 | ||
@@ -610,4 +655,30 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) | |||
610 | #define RMODE_TSS_SIZE \ | 655 | #define RMODE_TSS_SIZE \ |
611 | (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1) | 656 | (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1) |
612 | 657 | ||
658 | enum { | ||
659 | TASK_SWITCH_CALL = 0, | ||
660 | TASK_SWITCH_IRET = 1, | ||
661 | TASK_SWITCH_JMP = 2, | ||
662 | TASK_SWITCH_GATE = 3, | ||
663 | }; | ||
664 | |||
665 | #define KVMTRACE_5D(evt, vcpu, d1, d2, d3, d4, d5, name) \ | ||
666 | trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ | ||
667 | vcpu, 5, d1, d2, d3, d4, d5) | ||
668 | #define KVMTRACE_4D(evt, vcpu, d1, d2, d3, d4, name) \ | ||
669 | trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ | ||
670 | vcpu, 4, d1, d2, d3, d4, 0) | ||
671 | #define KVMTRACE_3D(evt, vcpu, d1, d2, d3, name) \ | ||
672 | trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ | ||
673 | vcpu, 3, d1, d2, d3, 0, 0) | ||
674 | #define KVMTRACE_2D(evt, vcpu, d1, d2, name) \ | ||
675 | trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ | ||
676 | vcpu, 2, d1, d2, 0, 0, 0) | ||
677 | #define KVMTRACE_1D(evt, vcpu, d1, name) \ | ||
678 | trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ | ||
679 | vcpu, 1, d1, 0, 0, 0, 0) | ||
680 | #define KVMTRACE_0D(evt, vcpu, name) \ | ||
681 | trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ | ||
682 | vcpu, 0, 0, 0, 0, 0, 0) | ||
683 | |||
613 | #endif | 684 | #endif |
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h index c6f3fd8d8c53..509845942070 100644 --- a/include/asm-x86/kvm_para.h +++ b/include/asm-x86/kvm_para.h | |||
@@ -10,10 +10,65 @@ | |||
10 | * paravirtualization, the appropriate feature bit should be checked. | 10 | * paravirtualization, the appropriate feature bit should be checked. |
11 | */ | 11 | */ |
12 | #define KVM_CPUID_FEATURES 0x40000001 | 12 | #define KVM_CPUID_FEATURES 0x40000001 |
13 | #define KVM_FEATURE_CLOCKSOURCE 0 | ||
14 | #define KVM_FEATURE_NOP_IO_DELAY 1 | ||
15 | #define KVM_FEATURE_MMU_OP 2 | ||
16 | |||
17 | #define MSR_KVM_WALL_CLOCK 0x11 | ||
18 | #define MSR_KVM_SYSTEM_TIME 0x12 | ||
19 | |||
20 | #define KVM_MAX_MMU_OP_BATCH 32 | ||
21 | |||
22 | /* Operations for KVM_HC_MMU_OP */ | ||
23 | #define KVM_MMU_OP_WRITE_PTE 1 | ||
24 | #define KVM_MMU_OP_FLUSH_TLB 2 | ||
25 | #define KVM_MMU_OP_RELEASE_PT 3 | ||
26 | |||
27 | /* Payload for KVM_HC_MMU_OP */ | ||
28 | struct kvm_mmu_op_header { | ||
29 | __u32 op; | ||
30 | __u32 pad; | ||
31 | }; | ||
32 | |||
33 | struct kvm_mmu_op_write_pte { | ||
34 | struct kvm_mmu_op_header header; | ||
35 | __u64 pte_phys; | ||
36 | __u64 pte_val; | ||
37 | }; | ||
38 | |||
39 | struct kvm_mmu_op_flush_tlb { | ||
40 | struct kvm_mmu_op_header header; | ||
41 | }; | ||
42 | |||
43 | struct kvm_mmu_op_release_pt { | ||
44 | struct kvm_mmu_op_header header; | ||
45 | __u64 pt_phys; | ||
46 | }; | ||
13 | 47 | ||
14 | #ifdef __KERNEL__ | 48 | #ifdef __KERNEL__ |
15 | #include <asm/processor.h> | 49 | #include <asm/processor.h> |
16 | 50 | ||
51 | /* xen binary-compatible interface. See xen headers for details */ | ||
52 | struct kvm_vcpu_time_info { | ||
53 | uint32_t version; | ||
54 | uint32_t pad0; | ||
55 | uint64_t tsc_timestamp; | ||
56 | uint64_t system_time; | ||
57 | uint32_t tsc_to_system_mul; | ||
58 | int8_t tsc_shift; | ||
59 | int8_t pad[3]; | ||
60 | } __attribute__((__packed__)); /* 32 bytes */ | ||
61 | |||
62 | struct kvm_wall_clock { | ||
63 | uint32_t wc_version; | ||
64 | uint32_t wc_sec; | ||
65 | uint32_t wc_nsec; | ||
66 | } __attribute__((__packed__)); | ||
67 | |||
68 | |||
69 | extern void kvmclock_init(void); | ||
70 | |||
71 | |||
17 | /* This instruction is vmcall. On non-VT architectures, it will generate a | 72 | /* This instruction is vmcall. On non-VT architectures, it will generate a |
18 | * trap that we will then rewrite to the appropriate instruction. | 73 | * trap that we will then rewrite to the appropriate instruction. |
19 | */ | 74 | */ |
diff --git a/include/asm-x86/reboot.h b/include/asm-x86/reboot.h index 6b5233b4f84b..e63741f19392 100644 --- a/include/asm-x86/reboot.h +++ b/include/asm-x86/reboot.h | |||
@@ -15,5 +15,7 @@ struct machine_ops { | |||
15 | extern struct machine_ops machine_ops; | 15 | extern struct machine_ops machine_ops; |
16 | 16 | ||
17 | void machine_real_restart(unsigned char *code, int length); | 17 | void machine_real_restart(unsigned char *code, int length); |
18 | void native_machine_crash_shutdown(struct pt_regs *regs); | ||
19 | void native_machine_shutdown(void); | ||
18 | 20 | ||
19 | #endif /* _ASM_REBOOT_H */ | 21 | #endif /* _ASM_REBOOT_H */ |
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index c1ec04fd000d..a281afeddfbb 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h | |||
@@ -8,11 +8,18 @@ | |||
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <asm/types.h> | 10 | #include <asm/types.h> |
11 | #include <linux/compiler.h> | ||
11 | #include <linux/ioctl.h> | 12 | #include <linux/ioctl.h> |
12 | #include <asm/kvm.h> | 13 | #include <asm/kvm.h> |
13 | 14 | ||
14 | #define KVM_API_VERSION 12 | 15 | #define KVM_API_VERSION 12 |
15 | 16 | ||
17 | /* for KVM_TRACE_ENABLE */ | ||
18 | struct kvm_user_trace_setup { | ||
19 | __u32 buf_size; /* sub_buffer size of each per-cpu */ | ||
20 | __u32 buf_nr; /* the number of sub_buffers of each per-cpu */ | ||
21 | }; | ||
22 | |||
16 | /* for KVM_CREATE_MEMORY_REGION */ | 23 | /* for KVM_CREATE_MEMORY_REGION */ |
17 | struct kvm_memory_region { | 24 | struct kvm_memory_region { |
18 | __u32 slot; | 25 | __u32 slot; |
@@ -73,6 +80,9 @@ struct kvm_irqchip { | |||
73 | #define KVM_EXIT_INTR 10 | 80 | #define KVM_EXIT_INTR 10 |
74 | #define KVM_EXIT_SET_TPR 11 | 81 | #define KVM_EXIT_SET_TPR 11 |
75 | #define KVM_EXIT_TPR_ACCESS 12 | 82 | #define KVM_EXIT_TPR_ACCESS 12 |
83 | #define KVM_EXIT_S390_SIEIC 13 | ||
84 | #define KVM_EXIT_S390_RESET 14 | ||
85 | #define KVM_EXIT_DCR 15 | ||
76 | 86 | ||
77 | /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ | 87 | /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ |
78 | struct kvm_run { | 88 | struct kvm_run { |
@@ -137,6 +147,27 @@ struct kvm_run { | |||
137 | __u32 is_write; | 147 | __u32 is_write; |
138 | __u32 pad; | 148 | __u32 pad; |
139 | } tpr_access; | 149 | } tpr_access; |
150 | /* KVM_EXIT_S390_SIEIC */ | ||
151 | struct { | ||
152 | __u8 icptcode; | ||
153 | __u64 mask; /* psw upper half */ | ||
154 | __u64 addr; /* psw lower half */ | ||
155 | __u16 ipa; | ||
156 | __u32 ipb; | ||
157 | } s390_sieic; | ||
158 | /* KVM_EXIT_S390_RESET */ | ||
159 | #define KVM_S390_RESET_POR 1 | ||
160 | #define KVM_S390_RESET_CLEAR 2 | ||
161 | #define KVM_S390_RESET_SUBSYSTEM 4 | ||
162 | #define KVM_S390_RESET_CPU_INIT 8 | ||
163 | #define KVM_S390_RESET_IPL 16 | ||
164 | __u64 s390_reset_flags; | ||
165 | /* KVM_EXIT_DCR */ | ||
166 | struct { | ||
167 | __u32 dcrn; | ||
168 | __u32 data; | ||
169 | __u8 is_write; | ||
170 | } dcr; | ||
140 | /* Fix the size of the union. */ | 171 | /* Fix the size of the union. */ |
141 | char padding[256]; | 172 | char padding[256]; |
142 | }; | 173 | }; |
@@ -204,6 +235,74 @@ struct kvm_vapic_addr { | |||
204 | __u64 vapic_addr; | 235 | __u64 vapic_addr; |
205 | }; | 236 | }; |
206 | 237 | ||
238 | /* for KVM_SET_MPSTATE */ | ||
239 | |||
240 | #define KVM_MP_STATE_RUNNABLE 0 | ||
241 | #define KVM_MP_STATE_UNINITIALIZED 1 | ||
242 | #define KVM_MP_STATE_INIT_RECEIVED 2 | ||
243 | #define KVM_MP_STATE_HALTED 3 | ||
244 | #define KVM_MP_STATE_SIPI_RECEIVED 4 | ||
245 | |||
246 | struct kvm_mp_state { | ||
247 | __u32 mp_state; | ||
248 | }; | ||
249 | |||
250 | struct kvm_s390_psw { | ||
251 | __u64 mask; | ||
252 | __u64 addr; | ||
253 | }; | ||
254 | |||
255 | /* valid values for type in kvm_s390_interrupt */ | ||
256 | #define KVM_S390_SIGP_STOP 0xfffe0000u | ||
257 | #define KVM_S390_PROGRAM_INT 0xfffe0001u | ||
258 | #define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u | ||
259 | #define KVM_S390_RESTART 0xfffe0003u | ||
260 | #define KVM_S390_INT_VIRTIO 0xffff2603u | ||
261 | #define KVM_S390_INT_SERVICE 0xffff2401u | ||
262 | #define KVM_S390_INT_EMERGENCY 0xffff1201u | ||
263 | |||
264 | struct kvm_s390_interrupt { | ||
265 | __u32 type; | ||
266 | __u32 parm; | ||
267 | __u64 parm64; | ||
268 | }; | ||
269 | |||
270 | #define KVM_TRC_SHIFT 16 | ||
271 | /* | ||
272 | * kvm trace categories | ||
273 | */ | ||
274 | #define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT) | ||
275 | #define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1)) /* only 12 bits */ | ||
276 | |||
277 | /* | ||
278 | * kvm trace action | ||
279 | */ | ||
280 | #define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01) | ||
281 | #define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02) | ||
282 | #define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01) | ||
283 | |||
284 | #define KVM_TRC_HEAD_SIZE 12 | ||
285 | #define KVM_TRC_CYCLE_SIZE 8 | ||
286 | #define KVM_TRC_EXTRA_MAX 7 | ||
287 | |||
288 | /* This structure represents a single trace buffer record. */ | ||
289 | struct kvm_trace_rec { | ||
290 | __u32 event:28; | ||
291 | __u32 extra_u32:3; | ||
292 | __u32 cycle_in:1; | ||
293 | __u32 pid; | ||
294 | __u32 vcpu_id; | ||
295 | union { | ||
296 | struct { | ||
297 | __u32 cycle_lo, cycle_hi; | ||
298 | __u32 extra_u32[KVM_TRC_EXTRA_MAX]; | ||
299 | } cycle; | ||
300 | struct { | ||
301 | __u32 extra_u32[KVM_TRC_EXTRA_MAX]; | ||
302 | } nocycle; | ||
303 | } u; | ||
304 | }; | ||
305 | |||
207 | #define KVMIO 0xAE | 306 | #define KVMIO 0xAE |
208 | 307 | ||
209 | /* | 308 | /* |
@@ -212,6 +311,8 @@ struct kvm_vapic_addr { | |||
212 | #define KVM_GET_API_VERSION _IO(KVMIO, 0x00) | 311 | #define KVM_GET_API_VERSION _IO(KVMIO, 0x00) |
213 | #define KVM_CREATE_VM _IO(KVMIO, 0x01) /* returns a VM fd */ | 312 | #define KVM_CREATE_VM _IO(KVMIO, 0x01) /* returns a VM fd */ |
214 | #define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list) | 313 | #define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list) |
314 | |||
315 | #define KVM_S390_ENABLE_SIE _IO(KVMIO, 0x06) | ||
215 | /* | 316 | /* |
216 | * Check if a kvm extension is available. Argument is extension number, | 317 | * Check if a kvm extension is available. Argument is extension number, |
217 | * return is 1 (yes) or 0 (no, sorry). | 318 | * return is 1 (yes) or 0 (no, sorry). |
@@ -222,7 +323,12 @@ struct kvm_vapic_addr { | |||
222 | */ | 323 | */ |
223 | #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ | 324 | #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ |
224 | #define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2) | 325 | #define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2) |
225 | 326 | /* | |
327 | * ioctls for kvm trace | ||
328 | */ | ||
329 | #define KVM_TRACE_ENABLE _IOW(KVMIO, 0x06, struct kvm_user_trace_setup) | ||
330 | #define KVM_TRACE_PAUSE _IO(KVMIO, 0x07) | ||
331 | #define KVM_TRACE_DISABLE _IO(KVMIO, 0x08) | ||
226 | /* | 332 | /* |
227 | * Extension capability list. | 333 | * Extension capability list. |
228 | */ | 334 | */ |
@@ -233,6 +339,13 @@ struct kvm_vapic_addr { | |||
233 | #define KVM_CAP_SET_TSS_ADDR 4 | 339 | #define KVM_CAP_SET_TSS_ADDR 4 |
234 | #define KVM_CAP_VAPIC 6 | 340 | #define KVM_CAP_VAPIC 6 |
235 | #define KVM_CAP_EXT_CPUID 7 | 341 | #define KVM_CAP_EXT_CPUID 7 |
342 | #define KVM_CAP_CLOCKSOURCE 8 | ||
343 | #define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */ | ||
344 | #define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */ | ||
345 | #define KVM_CAP_PIT 11 | ||
346 | #define KVM_CAP_NOP_IO_DELAY 12 | ||
347 | #define KVM_CAP_PV_MMU 13 | ||
348 | #define KVM_CAP_MP_STATE 14 | ||
236 | 349 | ||
237 | /* | 350 | /* |
238 | * ioctls for VM fds | 351 | * ioctls for VM fds |
@@ -255,6 +368,9 @@ struct kvm_vapic_addr { | |||
255 | #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) | 368 | #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) |
256 | #define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip) | 369 | #define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip) |
257 | #define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip) | 370 | #define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip) |
371 | #define KVM_CREATE_PIT _IO(KVMIO, 0x64) | ||
372 | #define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state) | ||
373 | #define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state) | ||
258 | 374 | ||
259 | /* | 375 | /* |
260 | * ioctls for vcpu fds | 376 | * ioctls for vcpu fds |
@@ -281,5 +397,17 @@ struct kvm_vapic_addr { | |||
281 | #define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl) | 397 | #define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl) |
282 | /* Available with KVM_CAP_VAPIC */ | 398 | /* Available with KVM_CAP_VAPIC */ |
283 | #define KVM_SET_VAPIC_ADDR _IOW(KVMIO, 0x93, struct kvm_vapic_addr) | 399 | #define KVM_SET_VAPIC_ADDR _IOW(KVMIO, 0x93, struct kvm_vapic_addr) |
400 | /* valid for virtual machine (for floating interrupt)_and_ vcpu */ | ||
401 | #define KVM_S390_INTERRUPT _IOW(KVMIO, 0x94, struct kvm_s390_interrupt) | ||
402 | /* store status for s390 */ | ||
403 | #define KVM_S390_STORE_STATUS_NOADDR (-1ul) | ||
404 | #define KVM_S390_STORE_STATUS_PREFIXED (-2ul) | ||
405 | #define KVM_S390_STORE_STATUS _IOW(KVMIO, 0x95, unsigned long) | ||
406 | /* initial ipl psw for s390 */ | ||
407 | #define KVM_S390_SET_INITIAL_PSW _IOW(KVMIO, 0x96, struct kvm_s390_psw) | ||
408 | /* initial reset for s390 */ | ||
409 | #define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) | ||
410 | #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) | ||
411 | #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) | ||
284 | 412 | ||
285 | #endif | 413 | #endif |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 928b0d59e9ba..398978972b7a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
17 | #include <linux/preempt.h> | 17 | #include <linux/preempt.h> |
18 | #include <linux/marker.h> | ||
18 | #include <asm/signal.h> | 19 | #include <asm/signal.h> |
19 | 20 | ||
20 | #include <linux/kvm.h> | 21 | #include <linux/kvm.h> |
@@ -24,29 +25,18 @@ | |||
24 | 25 | ||
25 | #include <asm/kvm_host.h> | 26 | #include <asm/kvm_host.h> |
26 | 27 | ||
27 | #define KVM_MAX_VCPUS 4 | ||
28 | #define KVM_MEMORY_SLOTS 8 | ||
29 | /* memory slots that does not exposed to userspace */ | ||
30 | #define KVM_PRIVATE_MEM_SLOTS 4 | ||
31 | |||
32 | #define KVM_PIO_PAGE_OFFSET 1 | ||
33 | |||
34 | /* | 28 | /* |
35 | * vcpu->requests bit members | 29 | * vcpu->requests bit members |
36 | */ | 30 | */ |
37 | #define KVM_REQ_TLB_FLUSH 0 | 31 | #define KVM_REQ_TLB_FLUSH 0 |
38 | #define KVM_REQ_MIGRATE_TIMER 1 | 32 | #define KVM_REQ_MIGRATE_TIMER 1 |
39 | #define KVM_REQ_REPORT_TPR_ACCESS 2 | 33 | #define KVM_REQ_REPORT_TPR_ACCESS 2 |
34 | #define KVM_REQ_MMU_RELOAD 3 | ||
35 | #define KVM_REQ_TRIPLE_FAULT 4 | ||
40 | 36 | ||
41 | struct kvm_vcpu; | 37 | struct kvm_vcpu; |
42 | extern struct kmem_cache *kvm_vcpu_cache; | 38 | extern struct kmem_cache *kvm_vcpu_cache; |
43 | 39 | ||
44 | struct kvm_guest_debug { | ||
45 | int enabled; | ||
46 | unsigned long bp[4]; | ||
47 | int singlestep; | ||
48 | }; | ||
49 | |||
50 | /* | 40 | /* |
51 | * It would be nice to use something smarter than a linear search, TBD... | 41 | * It would be nice to use something smarter than a linear search, TBD... |
52 | * Thankfully we dont expect many devices to register (famous last words :), | 42 | * Thankfully we dont expect many devices to register (famous last words :), |
@@ -67,7 +57,9 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus, | |||
67 | 57 | ||
68 | struct kvm_vcpu { | 58 | struct kvm_vcpu { |
69 | struct kvm *kvm; | 59 | struct kvm *kvm; |
60 | #ifdef CONFIG_PREEMPT_NOTIFIERS | ||
70 | struct preempt_notifier preempt_notifier; | 61 | struct preempt_notifier preempt_notifier; |
62 | #endif | ||
71 | int vcpu_id; | 63 | int vcpu_id; |
72 | struct mutex mutex; | 64 | struct mutex mutex; |
73 | int cpu; | 65 | int cpu; |
@@ -100,6 +92,10 @@ struct kvm_memory_slot { | |||
100 | unsigned long flags; | 92 | unsigned long flags; |
101 | unsigned long *rmap; | 93 | unsigned long *rmap; |
102 | unsigned long *dirty_bitmap; | 94 | unsigned long *dirty_bitmap; |
95 | struct { | ||
96 | unsigned long rmap_pde; | ||
97 | int write_count; | ||
98 | } *lpage_info; | ||
103 | unsigned long userspace_addr; | 99 | unsigned long userspace_addr; |
104 | int user_alloc; | 100 | int user_alloc; |
105 | }; | 101 | }; |
@@ -114,11 +110,11 @@ struct kvm { | |||
114 | KVM_PRIVATE_MEM_SLOTS]; | 110 | KVM_PRIVATE_MEM_SLOTS]; |
115 | struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; | 111 | struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; |
116 | struct list_head vm_list; | 112 | struct list_head vm_list; |
117 | struct file *filp; | ||
118 | struct kvm_io_bus mmio_bus; | 113 | struct kvm_io_bus mmio_bus; |
119 | struct kvm_io_bus pio_bus; | 114 | struct kvm_io_bus pio_bus; |
120 | struct kvm_vm_stat stat; | 115 | struct kvm_vm_stat stat; |
121 | struct kvm_arch arch; | 116 | struct kvm_arch arch; |
117 | atomic_t users_count; | ||
122 | }; | 118 | }; |
123 | 119 | ||
124 | /* The guest did something we don't support. */ | 120 | /* The guest did something we don't support. */ |
@@ -145,14 +141,19 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
145 | struct module *module); | 141 | struct module *module); |
146 | void kvm_exit(void); | 142 | void kvm_exit(void); |
147 | 143 | ||
144 | void kvm_get_kvm(struct kvm *kvm); | ||
145 | void kvm_put_kvm(struct kvm *kvm); | ||
146 | |||
148 | #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) | 147 | #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) |
149 | #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) | 148 | #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) |
150 | static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } | 149 | static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } |
151 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); | 150 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); |
152 | 151 | ||
153 | extern struct page *bad_page; | 152 | extern struct page *bad_page; |
153 | extern pfn_t bad_pfn; | ||
154 | 154 | ||
155 | int is_error_page(struct page *page); | 155 | int is_error_page(struct page *page); |
156 | int is_error_pfn(pfn_t pfn); | ||
156 | int kvm_is_error_hva(unsigned long addr); | 157 | int kvm_is_error_hva(unsigned long addr); |
157 | int kvm_set_memory_region(struct kvm *kvm, | 158 | int kvm_set_memory_region(struct kvm *kvm, |
158 | struct kvm_userspace_memory_region *mem, | 159 | struct kvm_userspace_memory_region *mem, |
@@ -166,8 +167,19 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
166 | int user_alloc); | 167 | int user_alloc); |
167 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); | 168 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); |
168 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); | 169 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); |
170 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); | ||
169 | void kvm_release_page_clean(struct page *page); | 171 | void kvm_release_page_clean(struct page *page); |
170 | void kvm_release_page_dirty(struct page *page); | 172 | void kvm_release_page_dirty(struct page *page); |
173 | void kvm_set_page_dirty(struct page *page); | ||
174 | void kvm_set_page_accessed(struct page *page); | ||
175 | |||
176 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); | ||
177 | void kvm_release_pfn_dirty(pfn_t); | ||
178 | void kvm_release_pfn_clean(pfn_t pfn); | ||
179 | void kvm_set_pfn_dirty(pfn_t pfn); | ||
180 | void kvm_set_pfn_accessed(pfn_t pfn); | ||
181 | void kvm_get_pfn(pfn_t pfn); | ||
182 | |||
171 | int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, | 183 | int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, |
172 | int len); | 184 | int len); |
173 | int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, | 185 | int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, |
@@ -188,6 +200,7 @@ void kvm_resched(struct kvm_vcpu *vcpu); | |||
188 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); | 200 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); |
189 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); | 201 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); |
190 | void kvm_flush_remote_tlbs(struct kvm *kvm); | 202 | void kvm_flush_remote_tlbs(struct kvm *kvm); |
203 | void kvm_reload_remote_mmus(struct kvm *kvm); | ||
191 | 204 | ||
192 | long kvm_arch_dev_ioctl(struct file *filp, | 205 | long kvm_arch_dev_ioctl(struct file *filp, |
193 | unsigned int ioctl, unsigned long arg); | 206 | unsigned int ioctl, unsigned long arg); |
@@ -223,6 +236,10 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
223 | struct kvm_sregs *sregs); | 236 | struct kvm_sregs *sregs); |
224 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | 237 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, |
225 | struct kvm_sregs *sregs); | 238 | struct kvm_sregs *sregs); |
239 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | ||
240 | struct kvm_mp_state *mp_state); | ||
241 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | ||
242 | struct kvm_mp_state *mp_state); | ||
226 | int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, | 243 | int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, |
227 | struct kvm_debug_guest *dbg); | 244 | struct kvm_debug_guest *dbg); |
228 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); | 245 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); |
@@ -255,6 +272,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm); | |||
255 | 272 | ||
256 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); | 273 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); |
257 | int kvm_cpu_has_interrupt(struct kvm_vcpu *v); | 274 | int kvm_cpu_has_interrupt(struct kvm_vcpu *v); |
275 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); | ||
258 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu); | 276 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu); |
259 | 277 | ||
260 | static inline void kvm_guest_enter(void) | 278 | static inline void kvm_guest_enter(void) |
@@ -296,5 +314,18 @@ struct kvm_stats_debugfs_item { | |||
296 | struct dentry *dentry; | 314 | struct dentry *dentry; |
297 | }; | 315 | }; |
298 | extern struct kvm_stats_debugfs_item debugfs_entries[]; | 316 | extern struct kvm_stats_debugfs_item debugfs_entries[]; |
317 | extern struct dentry *kvm_debugfs_dir; | ||
318 | |||
319 | #ifdef CONFIG_KVM_TRACE | ||
320 | int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg); | ||
321 | void kvm_trace_cleanup(void); | ||
322 | #else | ||
323 | static inline | ||
324 | int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg) | ||
325 | { | ||
326 | return -EINVAL; | ||
327 | } | ||
328 | #define kvm_trace_cleanup() ((void)0) | ||
329 | #endif | ||
299 | 330 | ||
300 | #endif | 331 | #endif |
diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h index 5497aac0d2f8..3ddce03766ca 100644 --- a/include/linux/kvm_para.h +++ b/include/linux/kvm_para.h | |||
@@ -11,8 +11,11 @@ | |||
11 | 11 | ||
12 | /* Return values for hypercalls */ | 12 | /* Return values for hypercalls */ |
13 | #define KVM_ENOSYS 1000 | 13 | #define KVM_ENOSYS 1000 |
14 | #define KVM_EFAULT EFAULT | ||
15 | #define KVM_E2BIG E2BIG | ||
14 | 16 | ||
15 | #define KVM_HC_VAPIC_POLL_IRQ 1 | 17 | #define KVM_HC_VAPIC_POLL_IRQ 1 |
18 | #define KVM_HC_MMU_OP 2 | ||
16 | 19 | ||
17 | /* | 20 | /* |
18 | * hypercalls use architecture specific | 21 | * hypercalls use architecture specific |
@@ -20,6 +23,12 @@ | |||
20 | #include <asm/kvm_para.h> | 23 | #include <asm/kvm_para.h> |
21 | 24 | ||
22 | #ifdef __KERNEL__ | 25 | #ifdef __KERNEL__ |
26 | #ifdef CONFIG_KVM_GUEST | ||
27 | void __init kvm_guest_init(void); | ||
28 | #else | ||
29 | #define kvm_guest_init() do { } while (0) | ||
30 | #endif | ||
31 | |||
23 | static inline int kvm_para_has_feature(unsigned int feature) | 32 | static inline int kvm_para_has_feature(unsigned int feature) |
24 | { | 33 | { |
25 | if (kvm_arch_para_features() & (1UL << feature)) | 34 | if (kvm_arch_para_features() & (1UL << feature)) |
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 1c4e46decb22..9b6f395c9625 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h | |||
@@ -38,6 +38,8 @@ typedef unsigned long hva_t; | |||
38 | typedef u64 hpa_t; | 38 | typedef u64 hpa_t; |
39 | typedef unsigned long hfn_t; | 39 | typedef unsigned long hfn_t; |
40 | 40 | ||
41 | typedef hfn_t pfn_t; | ||
42 | |||
41 | struct kvm_pio_request { | 43 | struct kvm_pio_request { |
42 | unsigned long count; | 44 | unsigned long count; |
43 | int cur_count; | 45 | int cur_count; |
diff --git a/include/linux/sched.h b/include/linux/sched.h index d0bd97044abd..9a4f3e63e3bf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1798,6 +1798,8 @@ extern void mmput(struct mm_struct *); | |||
1798 | extern struct mm_struct *get_task_mm(struct task_struct *task); | 1798 | extern struct mm_struct *get_task_mm(struct task_struct *task); |
1799 | /* Remove the current tasks stale references to the old mm_struct */ | 1799 | /* Remove the current tasks stale references to the old mm_struct */ |
1800 | extern void mm_release(struct task_struct *, struct mm_struct *); | 1800 | extern void mm_release(struct task_struct *, struct mm_struct *); |
1801 | /* Allocate a new mm structure and copy contents from tsk->mm */ | ||
1802 | extern struct mm_struct *dup_mm(struct task_struct *tsk); | ||
1801 | 1803 | ||
1802 | extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *); | 1804 | extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *); |
1803 | extern void flush_thread(void); | 1805 | extern void flush_thread(void); |
diff --git a/kernel/fork.c b/kernel/fork.c index cb46befdd3a0..c674aa8d3c31 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -521,7 +521,7 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
521 | * Allocate a new mm structure and copy contents from the | 521 | * Allocate a new mm structure and copy contents from the |
522 | * mm structure of the passed in task structure. | 522 | * mm structure of the passed in task structure. |
523 | */ | 523 | */ |
524 | static struct mm_struct *dup_mm(struct task_struct *tsk) | 524 | struct mm_struct *dup_mm(struct task_struct *tsk) |
525 | { | 525 | { |
526 | struct mm_struct *mm, *oldmm = current->mm; | 526 | struct mm_struct *mm, *oldmm = current->mm; |
527 | int err; | 527 | int err; |
@@ -413,9 +413,6 @@ int page_referenced(struct page *page, int is_locked, | |||
413 | { | 413 | { |
414 | int referenced = 0; | 414 | int referenced = 0; |
415 | 415 | ||
416 | if (page_test_and_clear_young(page)) | ||
417 | referenced++; | ||
418 | |||
419 | if (TestClearPageReferenced(page)) | 416 | if (TestClearPageReferenced(page)) |
420 | referenced++; | 417 | referenced++; |
421 | 418 | ||
@@ -433,6 +430,10 @@ int page_referenced(struct page *page, int is_locked, | |||
433 | unlock_page(page); | 430 | unlock_page(page); |
434 | } | 431 | } |
435 | } | 432 | } |
433 | |||
434 | if (page_test_and_clear_young(page)) | ||
435 | referenced++; | ||
436 | |||
436 | return referenced; | 437 | return referenced; |
437 | } | 438 | } |
438 | 439 | ||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b2e12893e3f4..c82cf15730a1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/kvm_para.h> | 40 | #include <linux/kvm_para.h> |
41 | #include <linux/pagemap.h> | 41 | #include <linux/pagemap.h> |
42 | #include <linux/mman.h> | 42 | #include <linux/mman.h> |
43 | #include <linux/swap.h> | ||
43 | 44 | ||
44 | #include <asm/processor.h> | 45 | #include <asm/processor.h> |
45 | #include <asm/io.h> | 46 | #include <asm/io.h> |
@@ -59,7 +60,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_cache); | |||
59 | 60 | ||
60 | static __read_mostly struct preempt_ops kvm_preempt_ops; | 61 | static __read_mostly struct preempt_ops kvm_preempt_ops; |
61 | 62 | ||
62 | static struct dentry *debugfs_dir; | 63 | struct dentry *kvm_debugfs_dir; |
63 | 64 | ||
64 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, | 65 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, |
65 | unsigned long arg); | 66 | unsigned long arg); |
@@ -119,6 +120,29 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) | |||
119 | smp_call_function_mask(cpus, ack_flush, NULL, 1); | 120 | smp_call_function_mask(cpus, ack_flush, NULL, 1); |
120 | } | 121 | } |
121 | 122 | ||
123 | void kvm_reload_remote_mmus(struct kvm *kvm) | ||
124 | { | ||
125 | int i, cpu; | ||
126 | cpumask_t cpus; | ||
127 | struct kvm_vcpu *vcpu; | ||
128 | |||
129 | cpus_clear(cpus); | ||
130 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | ||
131 | vcpu = kvm->vcpus[i]; | ||
132 | if (!vcpu) | ||
133 | continue; | ||
134 | if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) | ||
135 | continue; | ||
136 | cpu = vcpu->cpu; | ||
137 | if (cpu != -1 && cpu != raw_smp_processor_id()) | ||
138 | cpu_set(cpu, cpus); | ||
139 | } | ||
140 | if (cpus_empty(cpus)) | ||
141 | return; | ||
142 | smp_call_function_mask(cpus, ack_flush, NULL, 1); | ||
143 | } | ||
144 | |||
145 | |||
122 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | 146 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) |
123 | { | 147 | { |
124 | struct page *page; | 148 | struct page *page; |
@@ -170,6 +194,7 @@ static struct kvm *kvm_create_vm(void) | |||
170 | mutex_init(&kvm->lock); | 194 | mutex_init(&kvm->lock); |
171 | kvm_io_bus_init(&kvm->mmio_bus); | 195 | kvm_io_bus_init(&kvm->mmio_bus); |
172 | init_rwsem(&kvm->slots_lock); | 196 | init_rwsem(&kvm->slots_lock); |
197 | atomic_set(&kvm->users_count, 1); | ||
173 | spin_lock(&kvm_lock); | 198 | spin_lock(&kvm_lock); |
174 | list_add(&kvm->vm_list, &vm_list); | 199 | list_add(&kvm->vm_list, &vm_list); |
175 | spin_unlock(&kvm_lock); | 200 | spin_unlock(&kvm_lock); |
@@ -189,9 +214,13 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free, | |||
189 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) | 214 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) |
190 | vfree(free->dirty_bitmap); | 215 | vfree(free->dirty_bitmap); |
191 | 216 | ||
217 | if (!dont || free->lpage_info != dont->lpage_info) | ||
218 | vfree(free->lpage_info); | ||
219 | |||
192 | free->npages = 0; | 220 | free->npages = 0; |
193 | free->dirty_bitmap = NULL; | 221 | free->dirty_bitmap = NULL; |
194 | free->rmap = NULL; | 222 | free->rmap = NULL; |
223 | free->lpage_info = NULL; | ||
195 | } | 224 | } |
196 | 225 | ||
197 | void kvm_free_physmem(struct kvm *kvm) | 226 | void kvm_free_physmem(struct kvm *kvm) |
@@ -215,11 +244,25 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
215 | mmdrop(mm); | 244 | mmdrop(mm); |
216 | } | 245 | } |
217 | 246 | ||
247 | void kvm_get_kvm(struct kvm *kvm) | ||
248 | { | ||
249 | atomic_inc(&kvm->users_count); | ||
250 | } | ||
251 | EXPORT_SYMBOL_GPL(kvm_get_kvm); | ||
252 | |||
253 | void kvm_put_kvm(struct kvm *kvm) | ||
254 | { | ||
255 | if (atomic_dec_and_test(&kvm->users_count)) | ||
256 | kvm_destroy_vm(kvm); | ||
257 | } | ||
258 | EXPORT_SYMBOL_GPL(kvm_put_kvm); | ||
259 | |||
260 | |||
218 | static int kvm_vm_release(struct inode *inode, struct file *filp) | 261 | static int kvm_vm_release(struct inode *inode, struct file *filp) |
219 | { | 262 | { |
220 | struct kvm *kvm = filp->private_data; | 263 | struct kvm *kvm = filp->private_data; |
221 | 264 | ||
222 | kvm_destroy_vm(kvm); | 265 | kvm_put_kvm(kvm); |
223 | return 0; | 266 | return 0; |
224 | } | 267 | } |
225 | 268 | ||
@@ -301,6 +344,25 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
301 | new.user_alloc = user_alloc; | 344 | new.user_alloc = user_alloc; |
302 | new.userspace_addr = mem->userspace_addr; | 345 | new.userspace_addr = mem->userspace_addr; |
303 | } | 346 | } |
347 | if (npages && !new.lpage_info) { | ||
348 | int largepages = npages / KVM_PAGES_PER_HPAGE; | ||
349 | if (npages % KVM_PAGES_PER_HPAGE) | ||
350 | largepages++; | ||
351 | if (base_gfn % KVM_PAGES_PER_HPAGE) | ||
352 | largepages++; | ||
353 | |||
354 | new.lpage_info = vmalloc(largepages * sizeof(*new.lpage_info)); | ||
355 | |||
356 | if (!new.lpage_info) | ||
357 | goto out_free; | ||
358 | |||
359 | memset(new.lpage_info, 0, largepages * sizeof(*new.lpage_info)); | ||
360 | |||
361 | if (base_gfn % KVM_PAGES_PER_HPAGE) | ||
362 | new.lpage_info[0].write_count = 1; | ||
363 | if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE) | ||
364 | new.lpage_info[largepages-1].write_count = 1; | ||
365 | } | ||
304 | 366 | ||
305 | /* Allocate page dirty bitmap if needed */ | 367 | /* Allocate page dirty bitmap if needed */ |
306 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { | 368 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { |
@@ -397,6 +459,12 @@ int is_error_page(struct page *page) | |||
397 | } | 459 | } |
398 | EXPORT_SYMBOL_GPL(is_error_page); | 460 | EXPORT_SYMBOL_GPL(is_error_page); |
399 | 461 | ||
462 | int is_error_pfn(pfn_t pfn) | ||
463 | { | ||
464 | return pfn == bad_pfn; | ||
465 | } | ||
466 | EXPORT_SYMBOL_GPL(is_error_pfn); | ||
467 | |||
400 | static inline unsigned long bad_hva(void) | 468 | static inline unsigned long bad_hva(void) |
401 | { | 469 | { |
402 | return PAGE_OFFSET; | 470 | return PAGE_OFFSET; |
@@ -444,7 +512,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | |||
444 | } | 512 | } |
445 | EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); | 513 | EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); |
446 | 514 | ||
447 | static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | 515 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) |
448 | { | 516 | { |
449 | struct kvm_memory_slot *slot; | 517 | struct kvm_memory_slot *slot; |
450 | 518 | ||
@@ -458,7 +526,7 @@ static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | |||
458 | /* | 526 | /* |
459 | * Requires current->mm->mmap_sem to be held | 527 | * Requires current->mm->mmap_sem to be held |
460 | */ | 528 | */ |
461 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | 529 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) |
462 | { | 530 | { |
463 | struct page *page[1]; | 531 | struct page *page[1]; |
464 | unsigned long addr; | 532 | unsigned long addr; |
@@ -469,7 +537,7 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | |||
469 | addr = gfn_to_hva(kvm, gfn); | 537 | addr = gfn_to_hva(kvm, gfn); |
470 | if (kvm_is_error_hva(addr)) { | 538 | if (kvm_is_error_hva(addr)) { |
471 | get_page(bad_page); | 539 | get_page(bad_page); |
472 | return bad_page; | 540 | return page_to_pfn(bad_page); |
473 | } | 541 | } |
474 | 542 | ||
475 | npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, | 543 | npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, |
@@ -477,27 +545,71 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | |||
477 | 545 | ||
478 | if (npages != 1) { | 546 | if (npages != 1) { |
479 | get_page(bad_page); | 547 | get_page(bad_page); |
480 | return bad_page; | 548 | return page_to_pfn(bad_page); |
481 | } | 549 | } |
482 | 550 | ||
483 | return page[0]; | 551 | return page_to_pfn(page[0]); |
552 | } | ||
553 | |||
554 | EXPORT_SYMBOL_GPL(gfn_to_pfn); | ||
555 | |||
556 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | ||
557 | { | ||
558 | return pfn_to_page(gfn_to_pfn(kvm, gfn)); | ||
484 | } | 559 | } |
485 | 560 | ||
486 | EXPORT_SYMBOL_GPL(gfn_to_page); | 561 | EXPORT_SYMBOL_GPL(gfn_to_page); |
487 | 562 | ||
488 | void kvm_release_page_clean(struct page *page) | 563 | void kvm_release_page_clean(struct page *page) |
489 | { | 564 | { |
490 | put_page(page); | 565 | kvm_release_pfn_clean(page_to_pfn(page)); |
491 | } | 566 | } |
492 | EXPORT_SYMBOL_GPL(kvm_release_page_clean); | 567 | EXPORT_SYMBOL_GPL(kvm_release_page_clean); |
493 | 568 | ||
569 | void kvm_release_pfn_clean(pfn_t pfn) | ||
570 | { | ||
571 | put_page(pfn_to_page(pfn)); | ||
572 | } | ||
573 | EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); | ||
574 | |||
494 | void kvm_release_page_dirty(struct page *page) | 575 | void kvm_release_page_dirty(struct page *page) |
495 | { | 576 | { |
577 | kvm_release_pfn_dirty(page_to_pfn(page)); | ||
578 | } | ||
579 | EXPORT_SYMBOL_GPL(kvm_release_page_dirty); | ||
580 | |||
581 | void kvm_release_pfn_dirty(pfn_t pfn) | ||
582 | { | ||
583 | kvm_set_pfn_dirty(pfn); | ||
584 | kvm_release_pfn_clean(pfn); | ||
585 | } | ||
586 | EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); | ||
587 | |||
588 | void kvm_set_page_dirty(struct page *page) | ||
589 | { | ||
590 | kvm_set_pfn_dirty(page_to_pfn(page)); | ||
591 | } | ||
592 | EXPORT_SYMBOL_GPL(kvm_set_page_dirty); | ||
593 | |||
594 | void kvm_set_pfn_dirty(pfn_t pfn) | ||
595 | { | ||
596 | struct page *page = pfn_to_page(pfn); | ||
496 | if (!PageReserved(page)) | 597 | if (!PageReserved(page)) |
497 | SetPageDirty(page); | 598 | SetPageDirty(page); |
498 | put_page(page); | ||
499 | } | 599 | } |
500 | EXPORT_SYMBOL_GPL(kvm_release_page_dirty); | 600 | EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); |
601 | |||
602 | void kvm_set_pfn_accessed(pfn_t pfn) | ||
603 | { | ||
604 | mark_page_accessed(pfn_to_page(pfn)); | ||
605 | } | ||
606 | EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); | ||
607 | |||
608 | void kvm_get_pfn(pfn_t pfn) | ||
609 | { | ||
610 | get_page(pfn_to_page(pfn)); | ||
611 | } | ||
612 | EXPORT_SYMBOL_GPL(kvm_get_pfn); | ||
501 | 613 | ||
502 | static int next_segment(unsigned long len, int offset) | 614 | static int next_segment(unsigned long len, int offset) |
503 | { | 615 | { |
@@ -554,7 +666,9 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, | |||
554 | addr = gfn_to_hva(kvm, gfn); | 666 | addr = gfn_to_hva(kvm, gfn); |
555 | if (kvm_is_error_hva(addr)) | 667 | if (kvm_is_error_hva(addr)) |
556 | return -EFAULT; | 668 | return -EFAULT; |
669 | pagefault_disable(); | ||
557 | r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); | 670 | r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); |
671 | pagefault_enable(); | ||
558 | if (r) | 672 | if (r) |
559 | return -EFAULT; | 673 | return -EFAULT; |
560 | return 0; | 674 | return 0; |
@@ -651,6 +765,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
651 | * We will block until either an interrupt or a signal wakes us up | 765 | * We will block until either an interrupt or a signal wakes us up |
652 | */ | 766 | */ |
653 | while (!kvm_cpu_has_interrupt(vcpu) | 767 | while (!kvm_cpu_has_interrupt(vcpu) |
768 | && !kvm_cpu_has_pending_timer(vcpu) | ||
654 | && !signal_pending(current) | 769 | && !signal_pending(current) |
655 | && !kvm_arch_vcpu_runnable(vcpu)) { | 770 | && !kvm_arch_vcpu_runnable(vcpu)) { |
656 | set_current_state(TASK_INTERRUPTIBLE); | 771 | set_current_state(TASK_INTERRUPTIBLE); |
@@ -678,8 +793,10 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
678 | 793 | ||
679 | if (vmf->pgoff == 0) | 794 | if (vmf->pgoff == 0) |
680 | page = virt_to_page(vcpu->run); | 795 | page = virt_to_page(vcpu->run); |
796 | #ifdef CONFIG_X86 | ||
681 | else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) | 797 | else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) |
682 | page = virt_to_page(vcpu->arch.pio_data); | 798 | page = virt_to_page(vcpu->arch.pio_data); |
799 | #endif | ||
683 | else | 800 | else |
684 | return VM_FAULT_SIGBUS; | 801 | return VM_FAULT_SIGBUS; |
685 | get_page(page); | 802 | get_page(page); |
@@ -701,11 +818,11 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp) | |||
701 | { | 818 | { |
702 | struct kvm_vcpu *vcpu = filp->private_data; | 819 | struct kvm_vcpu *vcpu = filp->private_data; |
703 | 820 | ||
704 | fput(vcpu->kvm->filp); | 821 | kvm_put_kvm(vcpu->kvm); |
705 | return 0; | 822 | return 0; |
706 | } | 823 | } |
707 | 824 | ||
708 | static struct file_operations kvm_vcpu_fops = { | 825 | static const struct file_operations kvm_vcpu_fops = { |
709 | .release = kvm_vcpu_release, | 826 | .release = kvm_vcpu_release, |
710 | .unlocked_ioctl = kvm_vcpu_ioctl, | 827 | .unlocked_ioctl = kvm_vcpu_ioctl, |
711 | .compat_ioctl = kvm_vcpu_ioctl, | 828 | .compat_ioctl = kvm_vcpu_ioctl, |
@@ -723,9 +840,10 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu) | |||
723 | 840 | ||
724 | r = anon_inode_getfd(&fd, &inode, &file, | 841 | r = anon_inode_getfd(&fd, &inode, &file, |
725 | "kvm-vcpu", &kvm_vcpu_fops, vcpu); | 842 | "kvm-vcpu", &kvm_vcpu_fops, vcpu); |
726 | if (r) | 843 | if (r) { |
844 | kvm_put_kvm(vcpu->kvm); | ||
727 | return r; | 845 | return r; |
728 | atomic_inc(&vcpu->kvm->filp->f_count); | 846 | } |
729 | return fd; | 847 | return fd; |
730 | } | 848 | } |
731 | 849 | ||
@@ -760,6 +878,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | |||
760 | mutex_unlock(&kvm->lock); | 878 | mutex_unlock(&kvm->lock); |
761 | 879 | ||
762 | /* Now it's all set up, let userspace reach it */ | 880 | /* Now it's all set up, let userspace reach it */ |
881 | kvm_get_kvm(kvm); | ||
763 | r = create_vcpu_fd(vcpu); | 882 | r = create_vcpu_fd(vcpu); |
764 | if (r < 0) | 883 | if (r < 0) |
765 | goto unlink; | 884 | goto unlink; |
@@ -802,28 +921,39 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
802 | r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); | 921 | r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); |
803 | break; | 922 | break; |
804 | case KVM_GET_REGS: { | 923 | case KVM_GET_REGS: { |
805 | struct kvm_regs kvm_regs; | 924 | struct kvm_regs *kvm_regs; |
806 | 925 | ||
807 | memset(&kvm_regs, 0, sizeof kvm_regs); | 926 | r = -ENOMEM; |
808 | r = kvm_arch_vcpu_ioctl_get_regs(vcpu, &kvm_regs); | 927 | kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); |
809 | if (r) | 928 | if (!kvm_regs) |
810 | goto out; | 929 | goto out; |
930 | r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); | ||
931 | if (r) | ||
932 | goto out_free1; | ||
811 | r = -EFAULT; | 933 | r = -EFAULT; |
812 | if (copy_to_user(argp, &kvm_regs, sizeof kvm_regs)) | 934 | if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs))) |
813 | goto out; | 935 | goto out_free1; |
814 | r = 0; | 936 | r = 0; |
937 | out_free1: | ||
938 | kfree(kvm_regs); | ||
815 | break; | 939 | break; |
816 | } | 940 | } |
817 | case KVM_SET_REGS: { | 941 | case KVM_SET_REGS: { |
818 | struct kvm_regs kvm_regs; | 942 | struct kvm_regs *kvm_regs; |
819 | 943 | ||
820 | r = -EFAULT; | 944 | r = -ENOMEM; |
821 | if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs)) | 945 | kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); |
946 | if (!kvm_regs) | ||
822 | goto out; | 947 | goto out; |
823 | r = kvm_arch_vcpu_ioctl_set_regs(vcpu, &kvm_regs); | 948 | r = -EFAULT; |
949 | if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs))) | ||
950 | goto out_free2; | ||
951 | r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); | ||
824 | if (r) | 952 | if (r) |
825 | goto out; | 953 | goto out_free2; |
826 | r = 0; | 954 | r = 0; |
955 | out_free2: | ||
956 | kfree(kvm_regs); | ||
827 | break; | 957 | break; |
828 | } | 958 | } |
829 | case KVM_GET_SREGS: { | 959 | case KVM_GET_SREGS: { |
@@ -851,6 +981,30 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
851 | r = 0; | 981 | r = 0; |
852 | break; | 982 | break; |
853 | } | 983 | } |
984 | case KVM_GET_MP_STATE: { | ||
985 | struct kvm_mp_state mp_state; | ||
986 | |||
987 | r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state); | ||
988 | if (r) | ||
989 | goto out; | ||
990 | r = -EFAULT; | ||
991 | if (copy_to_user(argp, &mp_state, sizeof mp_state)) | ||
992 | goto out; | ||
993 | r = 0; | ||
994 | break; | ||
995 | } | ||
996 | case KVM_SET_MP_STATE: { | ||
997 | struct kvm_mp_state mp_state; | ||
998 | |||
999 | r = -EFAULT; | ||
1000 | if (copy_from_user(&mp_state, argp, sizeof mp_state)) | ||
1001 | goto out; | ||
1002 | r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); | ||
1003 | if (r) | ||
1004 | goto out; | ||
1005 | r = 0; | ||
1006 | break; | ||
1007 | } | ||
854 | case KVM_TRANSLATE: { | 1008 | case KVM_TRANSLATE: { |
855 | struct kvm_translation tr; | 1009 | struct kvm_translation tr; |
856 | 1010 | ||
@@ -1005,7 +1159,7 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) | |||
1005 | return 0; | 1159 | return 0; |
1006 | } | 1160 | } |
1007 | 1161 | ||
1008 | static struct file_operations kvm_vm_fops = { | 1162 | static const struct file_operations kvm_vm_fops = { |
1009 | .release = kvm_vm_release, | 1163 | .release = kvm_vm_release, |
1010 | .unlocked_ioctl = kvm_vm_ioctl, | 1164 | .unlocked_ioctl = kvm_vm_ioctl, |
1011 | .compat_ioctl = kvm_vm_ioctl, | 1165 | .compat_ioctl = kvm_vm_ioctl, |
@@ -1024,12 +1178,10 @@ static int kvm_dev_ioctl_create_vm(void) | |||
1024 | return PTR_ERR(kvm); | 1178 | return PTR_ERR(kvm); |
1025 | r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm); | 1179 | r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm); |
1026 | if (r) { | 1180 | if (r) { |
1027 | kvm_destroy_vm(kvm); | 1181 | kvm_put_kvm(kvm); |
1028 | return r; | 1182 | return r; |
1029 | } | 1183 | } |
1030 | 1184 | ||
1031 | kvm->filp = file; | ||
1032 | |||
1033 | return fd; | 1185 | return fd; |
1034 | } | 1186 | } |
1035 | 1187 | ||
@@ -1059,7 +1211,15 @@ static long kvm_dev_ioctl(struct file *filp, | |||
1059 | r = -EINVAL; | 1211 | r = -EINVAL; |
1060 | if (arg) | 1212 | if (arg) |
1061 | goto out; | 1213 | goto out; |
1062 | r = 2 * PAGE_SIZE; | 1214 | r = PAGE_SIZE; /* struct kvm_run */ |
1215 | #ifdef CONFIG_X86 | ||
1216 | r += PAGE_SIZE; /* pio data page */ | ||
1217 | #endif | ||
1218 | break; | ||
1219 | case KVM_TRACE_ENABLE: | ||
1220 | case KVM_TRACE_PAUSE: | ||
1221 | case KVM_TRACE_DISABLE: | ||
1222 | r = kvm_trace_ioctl(ioctl, arg); | ||
1063 | break; | 1223 | break; |
1064 | default: | 1224 | default: |
1065 | return kvm_arch_dev_ioctl(filp, ioctl, arg); | 1225 | return kvm_arch_dev_ioctl(filp, ioctl, arg); |
@@ -1232,9 +1392,9 @@ static void kvm_init_debug(void) | |||
1232 | { | 1392 | { |
1233 | struct kvm_stats_debugfs_item *p; | 1393 | struct kvm_stats_debugfs_item *p; |
1234 | 1394 | ||
1235 | debugfs_dir = debugfs_create_dir("kvm", NULL); | 1395 | kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); |
1236 | for (p = debugfs_entries; p->name; ++p) | 1396 | for (p = debugfs_entries; p->name; ++p) |
1237 | p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir, | 1397 | p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, |
1238 | (void *)(long)p->offset, | 1398 | (void *)(long)p->offset, |
1239 | stat_fops[p->kind]); | 1399 | stat_fops[p->kind]); |
1240 | } | 1400 | } |
@@ -1245,7 +1405,7 @@ static void kvm_exit_debug(void) | |||
1245 | 1405 | ||
1246 | for (p = debugfs_entries; p->name; ++p) | 1406 | for (p = debugfs_entries; p->name; ++p) |
1247 | debugfs_remove(p->dentry); | 1407 | debugfs_remove(p->dentry); |
1248 | debugfs_remove(debugfs_dir); | 1408 | debugfs_remove(kvm_debugfs_dir); |
1249 | } | 1409 | } |
1250 | 1410 | ||
1251 | static int kvm_suspend(struct sys_device *dev, pm_message_t state) | 1411 | static int kvm_suspend(struct sys_device *dev, pm_message_t state) |
@@ -1272,6 +1432,7 @@ static struct sys_device kvm_sysdev = { | |||
1272 | }; | 1432 | }; |
1273 | 1433 | ||
1274 | struct page *bad_page; | 1434 | struct page *bad_page; |
1435 | pfn_t bad_pfn; | ||
1275 | 1436 | ||
1276 | static inline | 1437 | static inline |
1277 | struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) | 1438 | struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) |
@@ -1313,6 +1474,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
1313 | goto out; | 1474 | goto out; |
1314 | } | 1475 | } |
1315 | 1476 | ||
1477 | bad_pfn = page_to_pfn(bad_page); | ||
1478 | |||
1316 | r = kvm_arch_hardware_setup(); | 1479 | r = kvm_arch_hardware_setup(); |
1317 | if (r < 0) | 1480 | if (r < 0) |
1318 | goto out_free_0; | 1481 | goto out_free_0; |
@@ -1386,6 +1549,7 @@ EXPORT_SYMBOL_GPL(kvm_init); | |||
1386 | 1549 | ||
1387 | void kvm_exit(void) | 1550 | void kvm_exit(void) |
1388 | { | 1551 | { |
1552 | kvm_trace_cleanup(); | ||
1389 | misc_deregister(&kvm_dev); | 1553 | misc_deregister(&kvm_dev); |
1390 | kmem_cache_destroy(kvm_vcpu_cache); | 1554 | kmem_cache_destroy(kvm_vcpu_cache); |
1391 | sysdev_unregister(&kvm_sysdev); | 1555 | sysdev_unregister(&kvm_sysdev); |
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c new file mode 100644 index 000000000000..0e495470788d --- /dev/null +++ b/virt/kvm/kvm_trace.c | |||
@@ -0,0 +1,276 @@ | |||
1 | /* | ||
2 | * kvm trace | ||
3 | * | ||
4 | * It is designed to allow debugging traces of kvm to be generated | ||
5 | * on UP / SMP machines. Each trace entry can be timestamped so that | ||
6 | * it's possible to reconstruct a chronological record of trace events. | ||
7 | * The implementation refers to blktrace kernel support. | ||
8 | * | ||
9 | * Copyright (c) 2008 Intel Corporation | ||
10 | * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> | ||
11 | * | ||
12 | * Authors: Feng(Eric) Liu, eric.e.liu@intel.com | ||
13 | * | ||
14 | * Date: Feb 2008 | ||
15 | */ | ||
16 | |||
17 | #include <linux/module.h> | ||
18 | #include <linux/relay.h> | ||
19 | #include <linux/debugfs.h> | ||
20 | |||
21 | #include <linux/kvm_host.h> | ||
22 | |||
23 | #define KVM_TRACE_STATE_RUNNING (1 << 0) | ||
24 | #define KVM_TRACE_STATE_PAUSE (1 << 1) | ||
25 | #define KVM_TRACE_STATE_CLEARUP (1 << 2) | ||
26 | |||
27 | struct kvm_trace { | ||
28 | int trace_state; | ||
29 | struct rchan *rchan; | ||
30 | struct dentry *lost_file; | ||
31 | atomic_t lost_records; | ||
32 | }; | ||
33 | static struct kvm_trace *kvm_trace; | ||
34 | |||
35 | struct kvm_trace_probe { | ||
36 | const char *name; | ||
37 | const char *format; | ||
38 | u32 cycle_in; | ||
39 | marker_probe_func *probe_func; | ||
40 | }; | ||
41 | |||
42 | static inline int calc_rec_size(int cycle, int extra) | ||
43 | { | ||
44 | int rec_size = KVM_TRC_HEAD_SIZE; | ||
45 | |||
46 | rec_size += extra; | ||
47 | return cycle ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; | ||
48 | } | ||
49 | |||
50 | static void kvm_add_trace(void *probe_private, void *call_data, | ||
51 | const char *format, va_list *args) | ||
52 | { | ||
53 | struct kvm_trace_probe *p = probe_private; | ||
54 | struct kvm_trace *kt = kvm_trace; | ||
55 | struct kvm_trace_rec rec; | ||
56 | struct kvm_vcpu *vcpu; | ||
57 | int i, extra, size; | ||
58 | |||
59 | if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING)) | ||
60 | return; | ||
61 | |||
62 | rec.event = va_arg(*args, u32); | ||
63 | vcpu = va_arg(*args, struct kvm_vcpu *); | ||
64 | rec.pid = current->tgid; | ||
65 | rec.vcpu_id = vcpu->vcpu_id; | ||
66 | |||
67 | extra = va_arg(*args, u32); | ||
68 | WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); | ||
69 | extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); | ||
70 | rec.extra_u32 = extra; | ||
71 | |||
72 | rec.cycle_in = p->cycle_in; | ||
73 | |||
74 | if (rec.cycle_in) { | ||
75 | u64 cycle = 0; | ||
76 | |||
77 | cycle = get_cycles(); | ||
78 | rec.u.cycle.cycle_lo = (u32)cycle; | ||
79 | rec.u.cycle.cycle_hi = (u32)(cycle >> 32); | ||
80 | |||
81 | for (i = 0; i < rec.extra_u32; i++) | ||
82 | rec.u.cycle.extra_u32[i] = va_arg(*args, u32); | ||
83 | } else { | ||
84 | for (i = 0; i < rec.extra_u32; i++) | ||
85 | rec.u.nocycle.extra_u32[i] = va_arg(*args, u32); | ||
86 | } | ||
87 | |||
88 | size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32)); | ||
89 | relay_write(kt->rchan, &rec, size); | ||
90 | } | ||
91 | |||
92 | static struct kvm_trace_probe kvm_trace_probes[] = { | ||
93 | { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace }, | ||
94 | { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace }, | ||
95 | }; | ||
96 | |||
97 | static int lost_records_get(void *data, u64 *val) | ||
98 | { | ||
99 | struct kvm_trace *kt = data; | ||
100 | |||
101 | *val = atomic_read(&kt->lost_records); | ||
102 | return 0; | ||
103 | } | ||
104 | |||
105 | DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n"); | ||
106 | |||
107 | /* | ||
108 | * The relay channel is used in "no-overwrite" mode, it keeps trace of how | ||
109 | * many times we encountered a full subbuffer, to tell user space app the | ||
110 | * lost records there were. | ||
111 | */ | ||
112 | static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, | ||
113 | void *prev_subbuf, size_t prev_padding) | ||
114 | { | ||
115 | struct kvm_trace *kt; | ||
116 | |||
117 | if (!relay_buf_full(buf)) | ||
118 | return 1; | ||
119 | |||
120 | kt = buf->chan->private_data; | ||
121 | atomic_inc(&kt->lost_records); | ||
122 | |||
123 | return 0; | ||
124 | } | ||
125 | |||
126 | static struct dentry *kvm_create_buf_file_callack(const char *filename, | ||
127 | struct dentry *parent, | ||
128 | int mode, | ||
129 | struct rchan_buf *buf, | ||
130 | int *is_global) | ||
131 | { | ||
132 | return debugfs_create_file(filename, mode, parent, buf, | ||
133 | &relay_file_operations); | ||
134 | } | ||
135 | |||
136 | static int kvm_remove_buf_file_callback(struct dentry *dentry) | ||
137 | { | ||
138 | debugfs_remove(dentry); | ||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | static struct rchan_callbacks kvm_relay_callbacks = { | ||
143 | .subbuf_start = kvm_subbuf_start_callback, | ||
144 | .create_buf_file = kvm_create_buf_file_callack, | ||
145 | .remove_buf_file = kvm_remove_buf_file_callback, | ||
146 | }; | ||
147 | |||
148 | static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts) | ||
149 | { | ||
150 | struct kvm_trace *kt; | ||
151 | int i, r = -ENOMEM; | ||
152 | |||
153 | if (!kuts->buf_size || !kuts->buf_nr) | ||
154 | return -EINVAL; | ||
155 | |||
156 | kt = kzalloc(sizeof(*kt), GFP_KERNEL); | ||
157 | if (!kt) | ||
158 | goto err; | ||
159 | |||
160 | r = -EIO; | ||
161 | atomic_set(&kt->lost_records, 0); | ||
162 | kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir, | ||
163 | kt, &kvm_trace_lost_ops); | ||
164 | if (!kt->lost_file) | ||
165 | goto err; | ||
166 | |||
167 | kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size, | ||
168 | kuts->buf_nr, &kvm_relay_callbacks, kt); | ||
169 | if (!kt->rchan) | ||
170 | goto err; | ||
171 | |||
172 | kvm_trace = kt; | ||
173 | |||
174 | for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { | ||
175 | struct kvm_trace_probe *p = &kvm_trace_probes[i]; | ||
176 | |||
177 | r = marker_probe_register(p->name, p->format, p->probe_func, p); | ||
178 | if (r) | ||
179 | printk(KERN_INFO "Unable to register probe %s\n", | ||
180 | p->name); | ||
181 | } | ||
182 | |||
183 | kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING; | ||
184 | |||
185 | return 0; | ||
186 | err: | ||
187 | if (kt) { | ||
188 | if (kt->lost_file) | ||
189 | debugfs_remove(kt->lost_file); | ||
190 | if (kt->rchan) | ||
191 | relay_close(kt->rchan); | ||
192 | kfree(kt); | ||
193 | } | ||
194 | return r; | ||
195 | } | ||
196 | |||
197 | static int kvm_trace_enable(char __user *arg) | ||
198 | { | ||
199 | struct kvm_user_trace_setup kuts; | ||
200 | int ret; | ||
201 | |||
202 | ret = copy_from_user(&kuts, arg, sizeof(kuts)); | ||
203 | if (ret) | ||
204 | return -EFAULT; | ||
205 | |||
206 | ret = do_kvm_trace_enable(&kuts); | ||
207 | if (ret) | ||
208 | return ret; | ||
209 | |||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | static int kvm_trace_pause(void) | ||
214 | { | ||
215 | struct kvm_trace *kt = kvm_trace; | ||
216 | int r = -EINVAL; | ||
217 | |||
218 | if (kt == NULL) | ||
219 | return r; | ||
220 | |||
221 | if (kt->trace_state == KVM_TRACE_STATE_RUNNING) { | ||
222 | kt->trace_state = KVM_TRACE_STATE_PAUSE; | ||
223 | relay_flush(kt->rchan); | ||
224 | r = 0; | ||
225 | } | ||
226 | |||
227 | return r; | ||
228 | } | ||
229 | |||
230 | void kvm_trace_cleanup(void) | ||
231 | { | ||
232 | struct kvm_trace *kt = kvm_trace; | ||
233 | int i; | ||
234 | |||
235 | if (kt == NULL) | ||
236 | return; | ||
237 | |||
238 | if (kt->trace_state == KVM_TRACE_STATE_RUNNING || | ||
239 | kt->trace_state == KVM_TRACE_STATE_PAUSE) { | ||
240 | |||
241 | kt->trace_state = KVM_TRACE_STATE_CLEARUP; | ||
242 | |||
243 | for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { | ||
244 | struct kvm_trace_probe *p = &kvm_trace_probes[i]; | ||
245 | marker_probe_unregister(p->name, p->probe_func, p); | ||
246 | } | ||
247 | |||
248 | relay_close(kt->rchan); | ||
249 | debugfs_remove(kt->lost_file); | ||
250 | kfree(kt); | ||
251 | } | ||
252 | } | ||
253 | |||
254 | int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg) | ||
255 | { | ||
256 | void __user *argp = (void __user *)arg; | ||
257 | long r = -EINVAL; | ||
258 | |||
259 | if (!capable(CAP_SYS_ADMIN)) | ||
260 | return -EPERM; | ||
261 | |||
262 | switch (ioctl) { | ||
263 | case KVM_TRACE_ENABLE: | ||
264 | r = kvm_trace_enable(argp); | ||
265 | break; | ||
266 | case KVM_TRACE_PAUSE: | ||
267 | r = kvm_trace_pause(); | ||
268 | break; | ||
269 | case KVM_TRACE_DISABLE: | ||
270 | r = 0; | ||
271 | kvm_trace_cleanup(); | ||
272 | break; | ||
273 | } | ||
274 | |||
275 | return r; | ||
276 | } | ||