aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-04-27 13:13:52 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-27 13:13:52 -0400
commit42cadc86008aae0fd9ff31642dc01ed50723cf32 (patch)
treeb05d4c8f0561bad5a0183a89fb23ce4c8ee1653c
parentfba5c1af5c4fd6645fe62ea84ccde0981282cf66 (diff)
parent66c0b394f08fd89236515c1c84485ea712a157be (diff)
Merge branch 'kvm-updates-2.6.26' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm
* 'kvm-updates-2.6.26' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (147 commits) KVM: kill file->f_count abuse in kvm KVM: MMU: kvm_pv_mmu_op should not take mmap_sem KVM: SVM: remove selective CR0 comment KVM: SVM: remove now obsolete FIXME comment KVM: SVM: disable CR8 intercept when tpr is not masking interrupts KVM: SVM: sync V_TPR with LAPIC.TPR if CR8 write intercept is disabled KVM: export kvm_lapic_set_tpr() to modules KVM: SVM: sync TPR value to V_TPR field in the VMCB KVM: ppc: PowerPC 440 KVM implementation KVM: Add MAINTAINERS entry for PowerPC KVM KVM: ppc: Add DCR access information to struct kvm_run ppc: Export tlb_44x_hwater for KVM KVM: Rename debugfs_dir to kvm_debugfs_dir KVM: x86 emulator: fix lea to really get the effective address KVM: x86 emulator: fix smsw and lmsw with a memory operand KVM: x86 emulator: initialize src.val and dst.val for register operands KVM: SVM: force a new asid when initializing the vmcb KVM: fix kvm_vcpu_kick vs __vcpu_run race KVM: add ioctls to save/store mpstate KVM: Rename VCPU_MP_STATE_* to KVM_MP_STATE_* ...
-rw-r--r--Documentation/ia64/kvm.txt82
-rw-r--r--Documentation/ioctl-number.txt2
-rw-r--r--Documentation/powerpc/kvm_440.txt41
-rw-r--r--Documentation/s390/kvm.txt125
-rw-r--r--MAINTAINERS17
-rw-r--r--arch/ia64/Kconfig3
-rw-r--r--arch/ia64/Makefile1
-rw-r--r--arch/ia64/kvm/Kconfig49
-rw-r--r--arch/ia64/kvm/Makefile61
-rw-r--r--arch/ia64/kvm/asm-offsets.c251
-rw-r--r--arch/ia64/kvm/kvm-ia64.c1806
-rw-r--r--arch/ia64/kvm/kvm_fw.c500
-rw-r--r--arch/ia64/kvm/kvm_minstate.h273
-rw-r--r--arch/ia64/kvm/lapic.h25
-rw-r--r--arch/ia64/kvm/misc.h93
-rw-r--r--arch/ia64/kvm/mmio.c341
-rw-r--r--arch/ia64/kvm/optvfault.S918
-rw-r--r--arch/ia64/kvm/process.c970
-rw-r--r--arch/ia64/kvm/trampoline.S1038
-rw-r--r--arch/ia64/kvm/vcpu.c2163
-rw-r--r--arch/ia64/kvm/vcpu.h740
-rw-r--r--arch/ia64/kvm/vmm.c66
-rw-r--r--arch/ia64/kvm/vmm_ivt.S1424
-rw-r--r--arch/ia64/kvm/vti.h290
-rw-r--r--arch/ia64/kvm/vtlb.c636
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/Kconfig.debug3
-rw-r--r--arch/powerpc/Makefile1
-rw-r--r--arch/powerpc/kernel/asm-offsets.c28
-rw-r--r--arch/powerpc/kvm/44x_tlb.c224
-rw-r--r--arch/powerpc/kvm/44x_tlb.h91
-rw-r--r--arch/powerpc/kvm/Kconfig42
-rw-r--r--arch/powerpc/kvm/Makefile15
-rw-r--r--arch/powerpc/kvm/booke_guest.c615
-rw-r--r--arch/powerpc/kvm/booke_host.c83
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S436
-rw-r--r--arch/powerpc/kvm/emulate.c760
-rw-r--r--arch/powerpc/kvm/powerpc.c436
-rw-r--r--arch/s390/Kconfig14
-rw-r--r--arch/s390/Makefile2
-rw-r--r--arch/s390/kernel/early.c4
-rw-r--r--arch/s390/kernel/setup.c14
-rw-r--r--arch/s390/kernel/vtime.c1
-rw-r--r--arch/s390/kvm/Kconfig46
-rw-r--r--arch/s390/kvm/Makefile14
-rw-r--r--arch/s390/kvm/diag.c67
-rw-r--r--arch/s390/kvm/gaccess.h274
-rw-r--r--arch/s390/kvm/intercept.c216
-rw-r--r--arch/s390/kvm/interrupt.c592
-rw-r--r--arch/s390/kvm/kvm-s390.c685
-rw-r--r--arch/s390/kvm/kvm-s390.h64
-rw-r--r--arch/s390/kvm/priv.c323
-rw-r--r--arch/s390/kvm/sie64a.S47
-rw-r--r--arch/s390/kvm/sigp.c288
-rw-r--r--arch/s390/mm/pgtable.c65
-rw-r--r--arch/x86/Kconfig19
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/crash.c3
-rw-r--r--arch/x86/kernel/kvm.c248
-rw-r--r--arch/x86/kernel/kvmclock.c187
-rw-r--r--arch/x86/kernel/reboot.c13
-rw-r--r--arch/x86/kernel/setup_32.c6
-rw-r--r--arch/x86/kernel/setup_64.c7
-rw-r--r--arch/x86/kvm/Kconfig13
-rw-r--r--arch/x86/kvm/Makefile6
-rw-r--r--arch/x86/kvm/i8254.c611
-rw-r--r--arch/x86/kvm/i8254.h63
-rw-r--r--arch/x86/kvm/irq.c18
-rw-r--r--arch/x86/kvm/irq.h3
-rw-r--r--arch/x86/kvm/kvm_svm.h2
-rw-r--r--arch/x86/kvm/lapic.c35
-rw-r--r--arch/x86/kvm/mmu.c672
-rw-r--r--arch/x86/kvm/mmu.h6
-rw-r--r--arch/x86/kvm/paging_tmpl.h86
-rw-r--r--arch/x86/kvm/segment_descriptor.h29
-rw-r--r--arch/x86/kvm/svm.c352
-rw-r--r--arch/x86/kvm/svm.h3
-rw-r--r--arch/x86/kvm/tss.h59
-rw-r--r--arch/x86/kvm/vmx.c278
-rw-r--r--arch/x86/kvm/vmx.h10
-rw-r--r--arch/x86/kvm/x86.c897
-rw-r--r--arch/x86/kvm/x86_emulate.c285
-rw-r--r--drivers/s390/Makefile2
-rw-r--r--drivers/s390/kvm/Makefile9
-rw-r--r--drivers/s390/kvm/kvm_virtio.c338
-rw-r--r--include/asm-ia64/gcc_intrin.h12
-rw-r--r--include/asm-ia64/kvm.h205
-rw-r--r--include/asm-ia64/kvm_host.h524
-rw-r--r--include/asm-ia64/kvm_para.h29
-rw-r--r--include/asm-ia64/processor.h63
-rw-r--r--include/asm-powerpc/kvm.h53
-rw-r--r--include/asm-powerpc/kvm_asm.h55
-rw-r--r--include/asm-powerpc/kvm_host.h152
-rw-r--r--include/asm-powerpc/kvm_para.h37
-rw-r--r--include/asm-powerpc/kvm_ppc.h88
-rw-r--r--include/asm-powerpc/mmu-44x.h2
-rw-r--r--include/asm-s390/Kbuild1
-rw-r--r--include/asm-s390/kvm.h41
-rw-r--r--include/asm-s390/kvm_host.h234
-rw-r--r--include/asm-s390/kvm_para.h150
-rw-r--r--include/asm-s390/kvm_virtio.h53
-rw-r--r--include/asm-s390/lowcore.h15
-rw-r--r--include/asm-s390/mmu.h1
-rw-r--r--include/asm-s390/mmu_context.h8
-rw-r--r--include/asm-s390/pgtable.h93
-rw-r--r--include/asm-s390/setup.h1
-rw-r--r--include/asm-x86/kvm.h41
-rw-r--r--include/asm-x86/kvm_host.h99
-rw-r--r--include/asm-x86/kvm_para.h55
-rw-r--r--include/asm-x86/reboot.h2
-rw-r--r--include/linux/kvm.h130
-rw-r--r--include/linux/kvm_host.h59
-rw-r--r--include/linux/kvm_para.h11
-rw-r--r--include/linux/kvm_types.h2
-rw-r--r--include/linux/sched.h2
-rw-r--r--kernel/fork.c2
-rw-r--r--mm/rmap.c7
-rw-r--r--virt/kvm/kvm_main.c230
-rw-r--r--virt/kvm/kvm_trace.c276
119 files changed, 23723 insertions, 638 deletions
diff --git a/Documentation/ia64/kvm.txt b/Documentation/ia64/kvm.txt
new file mode 100644
index 000000000000..bec9d815da33
--- /dev/null
+++ b/Documentation/ia64/kvm.txt
@@ -0,0 +1,82 @@
1Currently, kvm module in EXPERIMENTAL stage on IA64. This means that
2interfaces are not stable enough to use. So, plase had better don't run
3critical applications in virtual machine. We will try our best to make it
4strong in future versions!
5 Guide: How to boot up guests on kvm/ia64
6
7This guide is to describe how to enable kvm support for IA-64 systems.
8
91. Get the kvm source from git.kernel.org.
10 Userspace source:
11 git clone git://git.kernel.org/pub/scm/virt/kvm/kvm-userspace.git
12 Kernel Source:
13 git clone git://git.kernel.org/pub/scm/linux/kernel/git/xiantao/kvm-ia64.git
14
152. Compile the source code.
16 2.1 Compile userspace code:
17 (1)cd ./kvm-userspace
18 (2)./configure
19 (3)cd kernel
20 (4)make sync LINUX= $kernel_dir (kernel_dir is the directory of kernel source.)
21 (5)cd ..
22 (6)make qemu
23 (7)cd qemu; make install
24
25 2.2 Compile kernel source code:
26 (1) cd ./$kernel_dir
27 (2) Make menuconfig
28 (3) Enter into virtualization option, and choose kvm.
29 (4) make
30 (5) Once (4) done, make modules_install
31 (6) Make initrd, and use new kernel to reboot up host machine.
32 (7) Once (6) done, cd $kernel_dir/arch/ia64/kvm
33 (8) insmod kvm.ko; insmod kvm-intel.ko
34
35Note: For step 2, please make sure that host page size == TARGET_PAGE_SIZE of qemu, otherwise, may fail.
36
373. Get Guest Firmware named as Flash.fd, and put it under right place:
38 (1) If you have the guest firmware (binary) released by Intel Corp for Xen, use it directly.
39
40 (2) If you have no firmware at hand, Please download its source from
41 hg clone http://xenbits.xensource.com/ext/efi-vfirmware.hg
42 you can get the firmware's binary in the directory of efi-vfirmware.hg/binaries.
43
44 (3) Rename the firware you owned to Flash.fd, and copy it to /usr/local/share/qemu
45
464. Boot up Linux or Windows guests:
47 4.1 Create or install a image for guest boot. If you have xen experience, it should be easy.
48
49 4.2 Boot up guests use the following command.
50 /usr/local/bin/qemu-system-ia64 -smp xx -m 512 -hda $your_image
51 (xx is the number of virtual processors for the guest, now the maximum value is 4)
52
535. Known possibile issue on some platforms with old Firmware.
54
55If meet strange host crashe issues, try to solve it through either of the following ways:
56
57(1): Upgrade your Firmware to the latest one.
58
59(2): Applying the below patch to kernel source.
60diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
61index 0b53344..f02b0f7 100644
62--- a/arch/ia64/kernel/pal.S
63+++ b/arch/ia64/kernel/pal.S
64@@ -84,7 +84,8 @@ GLOBAL_ENTRY(ia64_pal_call_static)
65 mov ar.pfs = loc1
66 mov rp = loc0
67 ;;
68- srlz.d // seralize restoration of psr.l
69+ srlz.i // seralize restoration of psr.l
70+ ;;
71 br.ret.sptk.many b0
72 END(ia64_pal_call_static)
73
746. Bug report:
75 If you found any issues when use kvm/ia64, Please post the bug info to kvm-ia64-devel mailing list.
76 https://lists.sourceforge.net/lists/listinfo/kvm-ia64-devel/
77
78Thanks for your interest! Let's work together, and make kvm/ia64 stronger and stronger!
79
80
81 Xiantao Zhang <xiantao.zhang@intel.com>
82 2008.3.10
diff --git a/Documentation/ioctl-number.txt b/Documentation/ioctl-number.txt
index c18363bd8d11..240ce7a56c40 100644
--- a/Documentation/ioctl-number.txt
+++ b/Documentation/ioctl-number.txt
@@ -183,6 +183,8 @@ Code Seq# Include File Comments
1830xAC 00-1F linux/raw.h 1830xAC 00-1F linux/raw.h
1840xAD 00 Netfilter device in development: 1840xAD 00 Netfilter device in development:
185 <mailto:rusty@rustcorp.com.au> 185 <mailto:rusty@rustcorp.com.au>
1860xAE all linux/kvm.h Kernel-based Virtual Machine
187 <mailto:kvm-devel@lists.sourceforge.net>
1860xB0 all RATIO devices in development: 1880xB0 all RATIO devices in development:
187 <mailto:vgo@ratio.de> 189 <mailto:vgo@ratio.de>
1880xB1 00-1F PPPoX <mailto:mostrows@styx.uwaterloo.ca> 1900xB1 00-1F PPPoX <mailto:mostrows@styx.uwaterloo.ca>
diff --git a/Documentation/powerpc/kvm_440.txt b/Documentation/powerpc/kvm_440.txt
new file mode 100644
index 000000000000..c02a003fa03a
--- /dev/null
+++ b/Documentation/powerpc/kvm_440.txt
@@ -0,0 +1,41 @@
1Hollis Blanchard <hollisb@us.ibm.com>
215 Apr 2008
3
4Various notes on the implementation of KVM for PowerPC 440:
5
6To enforce isolation, host userspace, guest kernel, and guest userspace all
7run at user privilege level. Only the host kernel runs in supervisor mode.
8Executing privileged instructions in the guest traps into KVM (in the host
9kernel), where we decode and emulate them. Through this technique, unmodified
10440 Linux kernels can be run (slowly) as guests. Future performance work will
11focus on reducing the overhead and frequency of these traps.
12
13The usual code flow is started from userspace invoking an "run" ioctl, which
14causes KVM to switch into guest context. We use IVPR to hijack the host
15interrupt vectors while running the guest, which allows us to direct all
16interrupts to kvmppc_handle_interrupt(). At this point, we could either
17- handle the interrupt completely (e.g. emulate "mtspr SPRG0"), or
18- let the host interrupt handler run (e.g. when the decrementer fires), or
19- return to host userspace (e.g. when the guest performs device MMIO)
20
21Address spaces: We take advantage of the fact that Linux doesn't use the AS=1
22address space (in host or guest), which gives us virtual address space to use
23for guest mappings. While the guest is running, the host kernel remains mapped
24in AS=0, but the guest can only use AS=1 mappings.
25
26TLB entries: The TLB entries covering the host linear mapping remain
27present while running the guest. This reduces the overhead of lightweight
28exits, which are handled by KVM running in the host kernel. We keep three
29copies of the TLB:
30 - guest TLB: contents of the TLB as the guest sees it
31 - shadow TLB: the TLB that is actually in hardware while guest is running
32 - host TLB: to restore TLB state when context switching guest -> host
33When a TLB miss occurs because a mapping was not present in the shadow TLB,
34but was present in the guest TLB, KVM handles the fault without invoking the
35guest. Large guest pages are backed by multiple 4KB shadow pages through this
36mechanism.
37
38IO: MMIO and DCR accesses are emulated by userspace. We use virtio for network
39and block IO, so those drivers must be enabled in the guest. It's possible
40that some qemu device emulation (e.g. e1000 or rtl8139) may also work with
41little effort.
diff --git a/Documentation/s390/kvm.txt b/Documentation/s390/kvm.txt
new file mode 100644
index 000000000000..6f5ceb0f09fc
--- /dev/null
+++ b/Documentation/s390/kvm.txt
@@ -0,0 +1,125 @@
1*** BIG FAT WARNING ***
2The kvm module is currently in EXPERIMENTAL state for s390. This means that
3the interface to the module is not yet considered to remain stable. Thus, be
4prepared that we keep breaking your userspace application and guest
5compatibility over and over again until we feel happy with the result. Make sure
6your guest kernel, your host kernel, and your userspace launcher are in a
7consistent state.
8
9This Documentation describes the unique ioctl calls to /dev/kvm, the resulting
10kvm-vm file descriptors, and the kvm-vcpu file descriptors that differ from x86.
11
121. ioctl calls to /dev/kvm
13KVM does support the following ioctls on s390 that are common with other
14architectures and do behave the same:
15KVM_GET_API_VERSION
16KVM_CREATE_VM (*) see note
17KVM_CHECK_EXTENSION
18KVM_GET_VCPU_MMAP_SIZE
19
20Notes:
21* KVM_CREATE_VM may fail on s390, if the calling process has multiple
22threads and has not called KVM_S390_ENABLE_SIE before.
23
24In addition, on s390 the following architecture specific ioctls are supported:
25ioctl: KVM_S390_ENABLE_SIE
26args: none
27see also: include/linux/kvm.h
28This call causes the kernel to switch on PGSTE in the user page table. This
29operation is needed in order to run a virtual machine, and it requires the
30calling process to be single-threaded. Note that the first call to KVM_CREATE_VM
31will implicitly try to switch on PGSTE if the user process has not called
32KVM_S390_ENABLE_SIE before. User processes that want to launch multiple threads
33before creating a virtual machine have to call KVM_S390_ENABLE_SIE, or will
34observe an error calling KVM_CREATE_VM. Switching on PGSTE is a one-time
35operation, is not reversible, and will persist over the entire lifetime of
36the calling process. It does not have any user-visible effect other than a small
37performance penalty.
38
392. ioctl calls to the kvm-vm file descriptor
40KVM does support the following ioctls on s390 that are common with other
41architectures and do behave the same:
42KVM_CREATE_VCPU
43KVM_SET_USER_MEMORY_REGION (*) see note
44KVM_GET_DIRTY_LOG (**) see note
45
46Notes:
47* kvm does only allow exactly one memory slot on s390, which has to start
48 at guest absolute address zero and at a user address that is aligned on any
49 page boundary. This hardware "limitation" allows us to have a few unique
50 optimizations. The memory slot doesn't have to be filled
51 with memory actually, it may contain sparse holes. That said, with different
52 user memory layout this does still allow a large flexibility when
53 doing the guest memory setup.
54** KVM_GET_DIRTY_LOG doesn't work properly yet. The user will receive an empty
55log. This ioctl call is only needed for guest migration, and we intend to
56implement this one in the future.
57
58In addition, on s390 the following architecture specific ioctls for the kvm-vm
59file descriptor are supported:
60ioctl: KVM_S390_INTERRUPT
61args: struct kvm_s390_interrupt *
62see also: include/linux/kvm.h
63This ioctl is used to submit a floating interrupt for a virtual machine.
64Floating interrupts may be delivered to any virtual cpu in the configuration.
65Only some interrupt types defined in include/linux/kvm.h make sense when
66submitted as floating interrupts. The following interrupts are not considered
67to be useful as floating interrupts, and a call to inject them will result in
68-EINVAL error code: program interrupts and interprocessor signals. Valid
69floating interrupts are:
70KVM_S390_INT_VIRTIO
71KVM_S390_INT_SERVICE
72
733. ioctl calls to the kvm-vcpu file descriptor
74KVM does support the following ioctls on s390 that are common with other
75architectures and do behave the same:
76KVM_RUN
77KVM_GET_REGS
78KVM_SET_REGS
79KVM_GET_SREGS
80KVM_SET_SREGS
81KVM_GET_FPU
82KVM_SET_FPU
83
84In addition, on s390 the following architecture specific ioctls for the
85kvm-vcpu file descriptor are supported:
86ioctl: KVM_S390_INTERRUPT
87args: struct kvm_s390_interrupt *
88see also: include/linux/kvm.h
89This ioctl is used to submit an interrupt for a specific virtual cpu.
90Only some interrupt types defined in include/linux/kvm.h make sense when
91submitted for a specific cpu. The following interrupts are not considered
92to be useful, and a call to inject them will result in -EINVAL error code:
93service processor calls and virtio interrupts. Valid interrupt types are:
94KVM_S390_PROGRAM_INT
95KVM_S390_SIGP_STOP
96KVM_S390_RESTART
97KVM_S390_SIGP_SET_PREFIX
98KVM_S390_INT_EMERGENCY
99
100ioctl: KVM_S390_STORE_STATUS
101args: unsigned long
102see also: include/linux/kvm.h
103This ioctl stores the state of the cpu at the guest real address given as
104argument, unless one of the following values defined in include/linux/kvm.h
105is given as arguement:
106KVM_S390_STORE_STATUS_NOADDR - the CPU stores its status to the save area in
107absolute lowcore as defined by the principles of operation
108KVM_S390_STORE_STATUS_PREFIXED - the CPU stores its status to the save area in
109its prefix page just like the dump tool that comes with zipl. This is useful
110to create a system dump for use with lkcdutils or crash.
111
112ioctl: KVM_S390_SET_INITIAL_PSW
113args: struct kvm_s390_psw *
114see also: include/linux/kvm.h
115This ioctl can be used to set the processor status word (psw) of a stopped cpu
116prior to running it with KVM_RUN. Note that this call is not required to modify
117the psw during sie intercepts that fall back to userspace because struct kvm_run
118does contain the psw, and this value is evaluated during reentry of KVM_RUN
119after the intercept exit was recognized.
120
121ioctl: KVM_S390_INITIAL_RESET
122args: none
123see also: include/linux/kvm.h
124This ioctl can be used to perform an initial cpu reset as defined by the
125principles of operation. The target cpu has to be in stopped state.
diff --git a/MAINTAINERS b/MAINTAINERS
index a942f3852499..c1dd1ae7b133 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2329,6 +2329,13 @@ L: kvm-devel@lists.sourceforge.net
2329W: kvm.sourceforge.net 2329W: kvm.sourceforge.net
2330S: Supported 2330S: Supported
2331 2331
2332KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC
2333P: Hollis Blanchard
2334M: hollisb@us.ibm.com
2335L: kvm-ppc-devel@lists.sourceforge.net
2336W: kvm.sourceforge.net
2337S: Supported
2338
2332KERNEL VIRTUAL MACHINE For Itanium(KVM/IA64) 2339KERNEL VIRTUAL MACHINE For Itanium(KVM/IA64)
2333P: Anthony Xu 2340P: Anthony Xu
2334M: anthony.xu@intel.com 2341M: anthony.xu@intel.com
@@ -2338,6 +2345,16 @@ L: kvm-ia64-devel@lists.sourceforge.net
2338W: kvm.sourceforge.net 2345W: kvm.sourceforge.net
2339S: Supported 2346S: Supported
2340 2347
2348KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
2349P: Carsten Otte
2350M: cotte@de.ibm.com
2351P: Christian Borntraeger
2352M: borntraeger@de.ibm.com
2353M: linux390@de.ibm.com
2354L: linux-s390@vger.kernel.org
2355W: http://www.ibm.com/developerworks/linux/linux390/
2356S: Supported
2357
2341KEXEC 2358KEXEC
2342P: Eric Biederman 2359P: Eric Biederman
2343M: ebiederm@xmission.com 2360M: ebiederm@xmission.com
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index cd13e138bd03..3aa6c821449a 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -19,6 +19,7 @@ config IA64
19 select HAVE_OPROFILE 19 select HAVE_OPROFILE
20 select HAVE_KPROBES 20 select HAVE_KPROBES
21 select HAVE_KRETPROBES 21 select HAVE_KRETPROBES
22 select HAVE_KVM
22 default y 23 default y
23 help 24 help
24 The Itanium Processor Family is Intel's 64-bit successor to 25 The Itanium Processor Family is Intel's 64-bit successor to
@@ -589,6 +590,8 @@ config MSPEC
589 590
590source "fs/Kconfig" 591source "fs/Kconfig"
591 592
593source "arch/ia64/kvm/Kconfig"
594
592source "lib/Kconfig" 595source "lib/Kconfig"
593 596
594# 597#
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index f1645c4f7039..ec4cca477f49 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -57,6 +57,7 @@ core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/
57core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/ 57core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/
58core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/ 58core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
59core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/ 59core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/
60core-$(CONFIG_KVM) += arch/ia64/kvm/
60 61
61drivers-$(CONFIG_PCI) += arch/ia64/pci/ 62drivers-$(CONFIG_PCI) += arch/ia64/pci/
62drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/ 63drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
new file mode 100644
index 000000000000..7914e4828504
--- /dev/null
+++ b/arch/ia64/kvm/Kconfig
@@ -0,0 +1,49 @@
1#
2# KVM configuration
3#
4config HAVE_KVM
5 bool
6
7menuconfig VIRTUALIZATION
8 bool "Virtualization"
9 depends on HAVE_KVM || IA64
10 default y
11 ---help---
12 Say Y here to get to see options for using your Linux host to run other
13 operating systems inside virtual machines (guests).
14 This option alone does not add any kernel code.
15
16 If you say N, all options in this submenu will be skipped and disabled.
17
18if VIRTUALIZATION
19
20config KVM
21 tristate "Kernel-based Virtual Machine (KVM) support"
22 depends on HAVE_KVM && EXPERIMENTAL
23 select PREEMPT_NOTIFIERS
24 select ANON_INODES
25 ---help---
26 Support hosting fully virtualized guest machines using hardware
27 virtualization extensions. You will need a fairly recent
28 processor equipped with virtualization extensions. You will also
29 need to select one or more of the processor modules below.
30
31 This module provides access to the hardware capabilities through
32 a character device node named /dev/kvm.
33
34 To compile this as a module, choose M here: the module
35 will be called kvm.
36
37 If unsure, say N.
38
39config KVM_INTEL
40 tristate "KVM for Intel Itanium 2 processors support"
41 depends on KVM && m
42 ---help---
43 Provides support for KVM on Itanium 2 processors equipped with the VT
44 extensions.
45
46config KVM_TRACE
47 bool
48
49endif # VIRTUALIZATION
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
new file mode 100644
index 000000000000..41b034ffa73b
--- /dev/null
+++ b/arch/ia64/kvm/Makefile
@@ -0,0 +1,61 @@
1#This Make file is to generate asm-offsets.h and build source.
2#
3
4#Generate asm-offsets.h for vmm module build
5offsets-file := asm-offsets.h
6
7always := $(offsets-file)
8targets := $(offsets-file)
9targets += arch/ia64/kvm/asm-offsets.s
10clean-files := $(addprefix $(objtree)/,$(targets) $(obj)/memcpy.S $(obj)/memset.S)
11
12# Default sed regexp - multiline due to syntax constraints
13define sed-y
14 "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
15endef
16
17quiet_cmd_offsets = GEN $@
18define cmd_offsets
19 (set -e; \
20 echo "#ifndef __ASM_KVM_OFFSETS_H__"; \
21 echo "#define __ASM_KVM_OFFSETS_H__"; \
22 echo "/*"; \
23 echo " * DO NOT MODIFY."; \
24 echo " *"; \
25 echo " * This file was generated by Makefile"; \
26 echo " *"; \
27 echo " */"; \
28 echo ""; \
29 sed -ne $(sed-y) $<; \
30 echo ""; \
31 echo "#endif" ) > $@
32endef
33# We use internal rules to avoid the "is up to date" message from make
34arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c
35 $(call if_changed_dep,cc_s_c)
36
37$(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s
38 $(call cmd,offsets)
39
40#
41# Makefile for Kernel-based Virtual Machine module
42#
43
44EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
45
46$(addprefix $(objtree)/,$(obj)/memcpy.S $(obj)/memset.S):
47 $(shell ln -snf ../lib/memcpy.S $(src)/memcpy.S)
48 $(shell ln -snf ../lib/memset.S $(src)/memset.S)
49
50common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
51
52kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
53obj-$(CONFIG_KVM) += kvm.o
54
55FORCE : $(obj)/$(offsets-file)
56EXTRA_CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
57kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
58 vtlb.o process.o
59#Add link memcpy and memset to avoid possible structure assignment error
60kvm-intel-objs += memset.o memcpy.o
61obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
new file mode 100644
index 000000000000..4e3dc13a619c
--- /dev/null
+++ b/arch/ia64/kvm/asm-offsets.c
@@ -0,0 +1,251 @@
1/*
2 * asm-offsets.c Generate definitions needed by assembly language modules.
3 * This code generates raw asm output which is post-processed
4 * to extract and format the required data.
5 *
6 * Anthony Xu <anthony.xu@intel.com>
7 * Xiantao Zhang <xiantao.zhang@intel.com>
8 * Copyright (c) 2007 Intel Corporation KVM support.
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms and conditions of the GNU General Public License,
12 * version 2, as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * more details.
18 *
19 * You should have received a copy of the GNU General Public License along with
20 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
21 * Place - Suite 330, Boston, MA 02111-1307 USA.
22 *
23 */
24
25#include <linux/autoconf.h>
26#include <linux/kvm_host.h>
27
28#include "vcpu.h"
29
30#define task_struct kvm_vcpu
31
32#define DEFINE(sym, val) \
33 asm volatile("\n->" #sym " (%0) " #val : : "i" (val))
34
35#define BLANK() asm volatile("\n->" : :)
36
37#define OFFSET(_sym, _str, _mem) \
38 DEFINE(_sym, offsetof(_str, _mem));
39
40void foo(void)
41{
42 DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
43 DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs));
44
45 BLANK();
46
47 DEFINE(VMM_VCPU_META_RR0_OFFSET,
48 offsetof(struct kvm_vcpu, arch.metaphysical_rr0));
49 DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
50 offsetof(struct kvm_vcpu,
51 arch.metaphysical_saved_rr0));
52 DEFINE(VMM_VCPU_VRR0_OFFSET,
53 offsetof(struct kvm_vcpu, arch.vrr[0]));
54 DEFINE(VMM_VPD_IRR0_OFFSET,
55 offsetof(struct vpd, irr[0]));
56 DEFINE(VMM_VCPU_ITC_CHECK_OFFSET,
57 offsetof(struct kvm_vcpu, arch.itc_check));
58 DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET,
59 offsetof(struct kvm_vcpu, arch.irq_check));
60 DEFINE(VMM_VPD_VHPI_OFFSET,
61 offsetof(struct vpd, vhpi));
62 DEFINE(VMM_VCPU_VSA_BASE_OFFSET,
63 offsetof(struct kvm_vcpu, arch.vsa_base));
64 DEFINE(VMM_VCPU_VPD_OFFSET,
65 offsetof(struct kvm_vcpu, arch.vpd));
66 DEFINE(VMM_VCPU_IRQ_CHECK,
67 offsetof(struct kvm_vcpu, arch.irq_check));
68 DEFINE(VMM_VCPU_TIMER_PENDING,
69 offsetof(struct kvm_vcpu, arch.timer_pending));
70 DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
71 offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0));
72 DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
73 offsetof(struct kvm_vcpu, arch.mode_flags));
74 DEFINE(VMM_VCPU_ITC_OFS_OFFSET,
75 offsetof(struct kvm_vcpu, arch.itc_offset));
76 DEFINE(VMM_VCPU_LAST_ITC_OFFSET,
77 offsetof(struct kvm_vcpu, arch.last_itc));
78 DEFINE(VMM_VCPU_SAVED_GP_OFFSET,
79 offsetof(struct kvm_vcpu, arch.saved_gp));
80
81 BLANK();
82
83 DEFINE(VMM_PT_REGS_B6_OFFSET,
84 offsetof(struct kvm_pt_regs, b6));
85 DEFINE(VMM_PT_REGS_B7_OFFSET,
86 offsetof(struct kvm_pt_regs, b7));
87 DEFINE(VMM_PT_REGS_AR_CSD_OFFSET,
88 offsetof(struct kvm_pt_regs, ar_csd));
89 DEFINE(VMM_PT_REGS_AR_SSD_OFFSET,
90 offsetof(struct kvm_pt_regs, ar_ssd));
91 DEFINE(VMM_PT_REGS_R8_OFFSET,
92 offsetof(struct kvm_pt_regs, r8));
93 DEFINE(VMM_PT_REGS_R9_OFFSET,
94 offsetof(struct kvm_pt_regs, r9));
95 DEFINE(VMM_PT_REGS_R10_OFFSET,
96 offsetof(struct kvm_pt_regs, r10));
97 DEFINE(VMM_PT_REGS_R11_OFFSET,
98 offsetof(struct kvm_pt_regs, r11));
99 DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET,
100 offsetof(struct kvm_pt_regs, cr_ipsr));
101 DEFINE(VMM_PT_REGS_CR_IIP_OFFSET,
102 offsetof(struct kvm_pt_regs, cr_iip));
103 DEFINE(VMM_PT_REGS_CR_IFS_OFFSET,
104 offsetof(struct kvm_pt_regs, cr_ifs));
105 DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET,
106 offsetof(struct kvm_pt_regs, ar_unat));
107 DEFINE(VMM_PT_REGS_AR_PFS_OFFSET,
108 offsetof(struct kvm_pt_regs, ar_pfs));
109 DEFINE(VMM_PT_REGS_AR_RSC_OFFSET,
110 offsetof(struct kvm_pt_regs, ar_rsc));
111 DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET,
112 offsetof(struct kvm_pt_regs, ar_rnat));
113
114 DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET,
115 offsetof(struct kvm_pt_regs, ar_bspstore));
116 DEFINE(VMM_PT_REGS_PR_OFFSET,
117 offsetof(struct kvm_pt_regs, pr));
118 DEFINE(VMM_PT_REGS_B0_OFFSET,
119 offsetof(struct kvm_pt_regs, b0));
120 DEFINE(VMM_PT_REGS_LOADRS_OFFSET,
121 offsetof(struct kvm_pt_regs, loadrs));
122 DEFINE(VMM_PT_REGS_R1_OFFSET,
123 offsetof(struct kvm_pt_regs, r1));
124 DEFINE(VMM_PT_REGS_R12_OFFSET,
125 offsetof(struct kvm_pt_regs, r12));
126 DEFINE(VMM_PT_REGS_R13_OFFSET,
127 offsetof(struct kvm_pt_regs, r13));
128 DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET,
129 offsetof(struct kvm_pt_regs, ar_fpsr));
130 DEFINE(VMM_PT_REGS_R15_OFFSET,
131 offsetof(struct kvm_pt_regs, r15));
132 DEFINE(VMM_PT_REGS_R14_OFFSET,
133 offsetof(struct kvm_pt_regs, r14));
134 DEFINE(VMM_PT_REGS_R2_OFFSET,
135 offsetof(struct kvm_pt_regs, r2));
136 DEFINE(VMM_PT_REGS_R3_OFFSET,
137 offsetof(struct kvm_pt_regs, r3));
138 DEFINE(VMM_PT_REGS_R16_OFFSET,
139 offsetof(struct kvm_pt_regs, r16));
140 DEFINE(VMM_PT_REGS_R17_OFFSET,
141 offsetof(struct kvm_pt_regs, r17));
142 DEFINE(VMM_PT_REGS_R18_OFFSET,
143 offsetof(struct kvm_pt_regs, r18));
144 DEFINE(VMM_PT_REGS_R19_OFFSET,
145 offsetof(struct kvm_pt_regs, r19));
146 DEFINE(VMM_PT_REGS_R20_OFFSET,
147 offsetof(struct kvm_pt_regs, r20));
148 DEFINE(VMM_PT_REGS_R21_OFFSET,
149 offsetof(struct kvm_pt_regs, r21));
150 DEFINE(VMM_PT_REGS_R22_OFFSET,
151 offsetof(struct kvm_pt_regs, r22));
152 DEFINE(VMM_PT_REGS_R23_OFFSET,
153 offsetof(struct kvm_pt_regs, r23));
154 DEFINE(VMM_PT_REGS_R24_OFFSET,
155 offsetof(struct kvm_pt_regs, r24));
156 DEFINE(VMM_PT_REGS_R25_OFFSET,
157 offsetof(struct kvm_pt_regs, r25));
158 DEFINE(VMM_PT_REGS_R26_OFFSET,
159 offsetof(struct kvm_pt_regs, r26));
160 DEFINE(VMM_PT_REGS_R27_OFFSET,
161 offsetof(struct kvm_pt_regs, r27));
162 DEFINE(VMM_PT_REGS_R28_OFFSET,
163 offsetof(struct kvm_pt_regs, r28));
164 DEFINE(VMM_PT_REGS_R29_OFFSET,
165 offsetof(struct kvm_pt_regs, r29));
166 DEFINE(VMM_PT_REGS_R30_OFFSET,
167 offsetof(struct kvm_pt_regs, r30));
168 DEFINE(VMM_PT_REGS_R31_OFFSET,
169 offsetof(struct kvm_pt_regs, r31));
170 DEFINE(VMM_PT_REGS_AR_CCV_OFFSET,
171 offsetof(struct kvm_pt_regs, ar_ccv));
172 DEFINE(VMM_PT_REGS_F6_OFFSET,
173 offsetof(struct kvm_pt_regs, f6));
174 DEFINE(VMM_PT_REGS_F7_OFFSET,
175 offsetof(struct kvm_pt_regs, f7));
176 DEFINE(VMM_PT_REGS_F8_OFFSET,
177 offsetof(struct kvm_pt_regs, f8));
178 DEFINE(VMM_PT_REGS_F9_OFFSET,
179 offsetof(struct kvm_pt_regs, f9));
180 DEFINE(VMM_PT_REGS_F10_OFFSET,
181 offsetof(struct kvm_pt_regs, f10));
182 DEFINE(VMM_PT_REGS_F11_OFFSET,
183 offsetof(struct kvm_pt_regs, f11));
184 DEFINE(VMM_PT_REGS_R4_OFFSET,
185 offsetof(struct kvm_pt_regs, r4));
186 DEFINE(VMM_PT_REGS_R5_OFFSET,
187 offsetof(struct kvm_pt_regs, r5));
188 DEFINE(VMM_PT_REGS_R6_OFFSET,
189 offsetof(struct kvm_pt_regs, r6));
190 DEFINE(VMM_PT_REGS_R7_OFFSET,
191 offsetof(struct kvm_pt_regs, r7));
192 DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET,
193 offsetof(struct kvm_pt_regs, eml_unat));
194 DEFINE(VMM_VCPU_IIPA_OFFSET,
195 offsetof(struct kvm_vcpu, arch.cr_iipa));
196 DEFINE(VMM_VCPU_OPCODE_OFFSET,
197 offsetof(struct kvm_vcpu, arch.opcode));
198 DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause));
199 DEFINE(VMM_VCPU_ISR_OFFSET,
200 offsetof(struct kvm_vcpu, arch.cr_isr));
201 DEFINE(VMM_PT_REGS_R16_SLOT,
202 (((offsetof(struct kvm_pt_regs, r16)
203 - sizeof(struct kvm_pt_regs)) >> 3) & 0x3f));
204 DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
205 offsetof(struct kvm_vcpu, arch.mode_flags));
206 DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp));
207 BLANK();
208
209 DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd));
210 DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs));
211 DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET,
212 offsetof(struct kvm_vcpu, arch.insvc[0]));
213 DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta));
214 DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr));
215
216 DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4]));
217 DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5]));
218 DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12]));
219 DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13]));
220 DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0]));
221 DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1]));
222 DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0]));
223 DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1]));
224 DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2]));
225 DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0]));
226 DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16]));
227 DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18]));
228 DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19]));
229 DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21]));
230 DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24]));
231 DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27]));
232 DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28]));
233 DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29]));
234 DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30]));
235 DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36]));
236 DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40]));
237 DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64]));
238 DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65]));
239 DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0]));
240 DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2]));
241 DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8]));
242 DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0]));
243 DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0]));
244 DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2]));
245 DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3]));
246 DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32]));
247 DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33]));
248 DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0]));
249 DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr));
250 BLANK();
251}
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
new file mode 100644
index 000000000000..6df073240135
--- /dev/null
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -0,0 +1,1806 @@
1
2/*
3 * kvm_ia64.c: Basic KVM suppport On Itanium series processors
4 *
5 *
6 * Copyright (C) 2007, Intel Corporation.
7 * Xiantao Zhang (xiantao.zhang@intel.com)
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms and conditions of the GNU General Public License,
11 * version 2, as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
20 * Place - Suite 330, Boston, MA 02111-1307 USA.
21 *
22 */
23
24#include <linux/module.h>
25#include <linux/errno.h>
26#include <linux/percpu.h>
27#include <linux/gfp.h>
28#include <linux/fs.h>
29#include <linux/smp.h>
30#include <linux/kvm_host.h>
31#include <linux/kvm.h>
32#include <linux/bitops.h>
33#include <linux/hrtimer.h>
34#include <linux/uaccess.h>
35
36#include <asm/pgtable.h>
37#include <asm/gcc_intrin.h>
38#include <asm/pal.h>
39#include <asm/cacheflush.h>
40#include <asm/div64.h>
41#include <asm/tlb.h>
42
43#include "misc.h"
44#include "vti.h"
45#include "iodev.h"
46#include "ioapic.h"
47#include "lapic.h"
48
49static unsigned long kvm_vmm_base;
50static unsigned long kvm_vsa_base;
51static unsigned long kvm_vm_buffer;
52static unsigned long kvm_vm_buffer_size;
53unsigned long kvm_vmm_gp;
54
55static long vp_env_info;
56
57static struct kvm_vmm_info *kvm_vmm_info;
58
59static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu);
60
61struct kvm_stats_debugfs_item debugfs_entries[] = {
62 { NULL }
63};
64
65
66struct fdesc{
67 unsigned long ip;
68 unsigned long gp;
69};
70
71static void kvm_flush_icache(unsigned long start, unsigned long len)
72{
73 int l;
74
75 for (l = 0; l < (len + 32); l += 32)
76 ia64_fc(start + l);
77
78 ia64_sync_i();
79 ia64_srlz_i();
80}
81
82static void kvm_flush_tlb_all(void)
83{
84 unsigned long i, j, count0, count1, stride0, stride1, addr;
85 long flags;
86
87 addr = local_cpu_data->ptce_base;
88 count0 = local_cpu_data->ptce_count[0];
89 count1 = local_cpu_data->ptce_count[1];
90 stride0 = local_cpu_data->ptce_stride[0];
91 stride1 = local_cpu_data->ptce_stride[1];
92
93 local_irq_save(flags);
94 for (i = 0; i < count0; ++i) {
95 for (j = 0; j < count1; ++j) {
96 ia64_ptce(addr);
97 addr += stride1;
98 }
99 addr += stride0;
100 }
101 local_irq_restore(flags);
102 ia64_srlz_i(); /* srlz.i implies srlz.d */
103}
104
105long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
106{
107 struct ia64_pal_retval iprv;
108
109 PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva,
110 (u64)opt_handler);
111
112 return iprv.status;
113}
114
115static DEFINE_SPINLOCK(vp_lock);
116
117void kvm_arch_hardware_enable(void *garbage)
118{
119 long status;
120 long tmp_base;
121 unsigned long pte;
122 unsigned long saved_psr;
123 int slot;
124
125 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
126 PAGE_KERNEL));
127 local_irq_save(saved_psr);
128 slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
129 if (slot < 0)
130 return;
131 local_irq_restore(saved_psr);
132
133 spin_lock(&vp_lock);
134 status = ia64_pal_vp_init_env(kvm_vsa_base ?
135 VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
136 __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
137 if (status != 0) {
138 printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
139 return ;
140 }
141
142 if (!kvm_vsa_base) {
143 kvm_vsa_base = tmp_base;
144 printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base);
145 }
146 spin_unlock(&vp_lock);
147 ia64_ptr_entry(0x3, slot);
148}
149
150void kvm_arch_hardware_disable(void *garbage)
151{
152
153 long status;
154 int slot;
155 unsigned long pte;
156 unsigned long saved_psr;
157 unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA);
158
159 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
160 PAGE_KERNEL));
161
162 local_irq_save(saved_psr);
163 slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
164 if (slot < 0)
165 return;
166 local_irq_restore(saved_psr);
167
168 status = ia64_pal_vp_exit_env(host_iva);
169 if (status)
170 printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n",
171 status);
172 ia64_ptr_entry(0x3, slot);
173}
174
175void kvm_arch_check_processor_compat(void *rtn)
176{
177 *(int *)rtn = 0;
178}
179
180int kvm_dev_ioctl_check_extension(long ext)
181{
182
183 int r;
184
185 switch (ext) {
186 case KVM_CAP_IRQCHIP:
187 case KVM_CAP_USER_MEMORY:
188
189 r = 1;
190 break;
191 default:
192 r = 0;
193 }
194 return r;
195
196}
197
198static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
199 gpa_t addr)
200{
201 struct kvm_io_device *dev;
202
203 dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
204
205 return dev;
206}
207
208static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
209{
210 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
211 kvm_run->hw.hardware_exit_reason = 1;
212 return 0;
213}
214
215static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
216{
217 struct kvm_mmio_req *p;
218 struct kvm_io_device *mmio_dev;
219
220 p = kvm_get_vcpu_ioreq(vcpu);
221
222 if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
223 goto mmio;
224 vcpu->mmio_needed = 1;
225 vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr;
226 vcpu->mmio_size = kvm_run->mmio.len = p->size;
227 vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
228
229 if (vcpu->mmio_is_write)
230 memcpy(vcpu->mmio_data, &p->data, p->size);
231 memcpy(kvm_run->mmio.data, &p->data, p->size);
232 kvm_run->exit_reason = KVM_EXIT_MMIO;
233 return 0;
234mmio:
235 mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr);
236 if (mmio_dev) {
237 if (!p->dir)
238 kvm_iodevice_write(mmio_dev, p->addr, p->size,
239 &p->data);
240 else
241 kvm_iodevice_read(mmio_dev, p->addr, p->size,
242 &p->data);
243
244 } else
245 printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
246 p->state = STATE_IORESP_READY;
247
248 return 1;
249}
250
251static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
252{
253 struct exit_ctl_data *p;
254
255 p = kvm_get_exit_data(vcpu);
256
257 if (p->exit_reason == EXIT_REASON_PAL_CALL)
258 return kvm_pal_emul(vcpu, kvm_run);
259 else {
260 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
261 kvm_run->hw.hardware_exit_reason = 2;
262 return 0;
263 }
264}
265
266static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
267{
268 struct exit_ctl_data *p;
269
270 p = kvm_get_exit_data(vcpu);
271
272 if (p->exit_reason == EXIT_REASON_SAL_CALL) {
273 kvm_sal_emul(vcpu);
274 return 1;
275 } else {
276 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
277 kvm_run->hw.hardware_exit_reason = 3;
278 return 0;
279 }
280
281}
282
283/*
284 * offset: address offset to IPI space.
285 * value: deliver value.
286 */
287static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
288 uint64_t vector)
289{
290 switch (dm) {
291 case SAPIC_FIXED:
292 kvm_apic_set_irq(vcpu, vector, 0);
293 break;
294 case SAPIC_NMI:
295 kvm_apic_set_irq(vcpu, 2, 0);
296 break;
297 case SAPIC_EXTINT:
298 kvm_apic_set_irq(vcpu, 0, 0);
299 break;
300 case SAPIC_INIT:
301 case SAPIC_PMI:
302 default:
303 printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
304 break;
305 }
306}
307
308static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
309 unsigned long eid)
310{
311 union ia64_lid lid;
312 int i;
313
314 for (i = 0; i < KVM_MAX_VCPUS; i++) {
315 if (kvm->vcpus[i]) {
316 lid.val = VCPU_LID(kvm->vcpus[i]);
317 if (lid.id == id && lid.eid == eid)
318 return kvm->vcpus[i];
319 }
320 }
321
322 return NULL;
323}
324
325static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
326{
327 struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
328 struct kvm_vcpu *target_vcpu;
329 struct kvm_pt_regs *regs;
330 union ia64_ipi_a addr = p->u.ipi_data.addr;
331 union ia64_ipi_d data = p->u.ipi_data.data;
332
333 target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid);
334 if (!target_vcpu)
335 return handle_vm_error(vcpu, kvm_run);
336
337 if (!target_vcpu->arch.launched) {
338 regs = vcpu_regs(target_vcpu);
339
340 regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip;
341 regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp;
342
343 target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
344 if (waitqueue_active(&target_vcpu->wq))
345 wake_up_interruptible(&target_vcpu->wq);
346 } else {
347 vcpu_deliver_ipi(target_vcpu, data.dm, data.vector);
348 if (target_vcpu != vcpu)
349 kvm_vcpu_kick(target_vcpu);
350 }
351
352 return 1;
353}
354
355struct call_data {
356 struct kvm_ptc_g ptc_g_data;
357 struct kvm_vcpu *vcpu;
358};
359
360static void vcpu_global_purge(void *info)
361{
362 struct call_data *p = (struct call_data *)info;
363 struct kvm_vcpu *vcpu = p->vcpu;
364
365 if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
366 return;
367
368 set_bit(KVM_REQ_PTC_G, &vcpu->requests);
369 if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) {
370 vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] =
371 p->ptc_g_data;
372 } else {
373 clear_bit(KVM_REQ_PTC_G, &vcpu->requests);
374 vcpu->arch.ptc_g_count = 0;
375 set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
376 }
377}
378
379static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
380{
381 struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
382 struct kvm *kvm = vcpu->kvm;
383 struct call_data call_data;
384 int i;
385 call_data.ptc_g_data = p->u.ptc_g_data;
386
387 for (i = 0; i < KVM_MAX_VCPUS; i++) {
388 if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state ==
389 KVM_MP_STATE_UNINITIALIZED ||
390 vcpu == kvm->vcpus[i])
391 continue;
392
393 if (waitqueue_active(&kvm->vcpus[i]->wq))
394 wake_up_interruptible(&kvm->vcpus[i]->wq);
395
396 if (kvm->vcpus[i]->cpu != -1) {
397 call_data.vcpu = kvm->vcpus[i];
398 smp_call_function_single(kvm->vcpus[i]->cpu,
399 vcpu_global_purge, &call_data, 0, 1);
400 } else
401 printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
402
403 }
404 return 1;
405}
406
407static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
408{
409 return 1;
410}
411
412int kvm_emulate_halt(struct kvm_vcpu *vcpu)
413{
414
415 ktime_t kt;
416 long itc_diff;
417 unsigned long vcpu_now_itc;
418
419 unsigned long expires;
420 struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
421 unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec;
422 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
423
424 vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset;
425
426 if (time_after(vcpu_now_itc, vpd->itm)) {
427 vcpu->arch.timer_check = 1;
428 return 1;
429 }
430 itc_diff = vpd->itm - vcpu_now_itc;
431 if (itc_diff < 0)
432 itc_diff = -itc_diff;
433
434 expires = div64_64(itc_diff, cyc_per_usec);
435 kt = ktime_set(0, 1000 * expires);
436 vcpu->arch.ht_active = 1;
437 hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
438
439 if (irqchip_in_kernel(vcpu->kvm)) {
440 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
441 kvm_vcpu_block(vcpu);
442 hrtimer_cancel(p_ht);
443 vcpu->arch.ht_active = 0;
444
445 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
446 return -EINTR;
447 return 1;
448 } else {
449 printk(KERN_ERR"kvm: Unsupported userspace halt!");
450 return 0;
451 }
452}
453
454static int handle_vm_shutdown(struct kvm_vcpu *vcpu,
455 struct kvm_run *kvm_run)
456{
457 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
458 return 0;
459}
460
461static int handle_external_interrupt(struct kvm_vcpu *vcpu,
462 struct kvm_run *kvm_run)
463{
464 return 1;
465}
466
467static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
468 struct kvm_run *kvm_run) = {
469 [EXIT_REASON_VM_PANIC] = handle_vm_error,
470 [EXIT_REASON_MMIO_INSTRUCTION] = handle_mmio,
471 [EXIT_REASON_PAL_CALL] = handle_pal_call,
472 [EXIT_REASON_SAL_CALL] = handle_sal_call,
473 [EXIT_REASON_SWITCH_RR6] = handle_switch_rr6,
474 [EXIT_REASON_VM_DESTROY] = handle_vm_shutdown,
475 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
476 [EXIT_REASON_IPI] = handle_ipi,
477 [EXIT_REASON_PTC_G] = handle_global_purge,
478
479};
480
481static const int kvm_vti_max_exit_handlers =
482 sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers);
483
484static void kvm_prepare_guest_switch(struct kvm_vcpu *vcpu)
485{
486}
487
488static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu)
489{
490 struct exit_ctl_data *p_exit_data;
491
492 p_exit_data = kvm_get_exit_data(vcpu);
493 return p_exit_data->exit_reason;
494}
495
496/*
497 * The guest has exited. See if we can fix it or if we need userspace
498 * assistance.
499 */
500static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
501{
502 u32 exit_reason = kvm_get_exit_reason(vcpu);
503 vcpu->arch.last_exit = exit_reason;
504
505 if (exit_reason < kvm_vti_max_exit_handlers
506 && kvm_vti_exit_handlers[exit_reason])
507 return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run);
508 else {
509 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
510 kvm_run->hw.hardware_exit_reason = exit_reason;
511 }
512 return 0;
513}
514
515static inline void vti_set_rr6(unsigned long rr6)
516{
517 ia64_set_rr(RR6, rr6);
518 ia64_srlz_i();
519}
520
521static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
522{
523 unsigned long pte;
524 struct kvm *kvm = vcpu->kvm;
525 int r;
526
527 /*Insert a pair of tr to map vmm*/
528 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
529 r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
530 if (r < 0)
531 goto out;
532 vcpu->arch.vmm_tr_slot = r;
533 /*Insert a pairt of tr to map data of vm*/
534 pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL));
535 r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE,
536 pte, KVM_VM_DATA_SHIFT);
537 if (r < 0)
538 goto out;
539 vcpu->arch.vm_tr_slot = r;
540 r = 0;
541out:
542 return r;
543
544}
545
546static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
547{
548
549 ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
550 ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
551
552}
553
554static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
555{
556 int cpu = smp_processor_id();
557
558 if (vcpu->arch.last_run_cpu != cpu ||
559 per_cpu(last_vcpu, cpu) != vcpu) {
560 per_cpu(last_vcpu, cpu) = vcpu;
561 vcpu->arch.last_run_cpu = cpu;
562 kvm_flush_tlb_all();
563 }
564
565 vcpu->arch.host_rr6 = ia64_get_rr(RR6);
566 vti_set_rr6(vcpu->arch.vmm_rr);
567 return kvm_insert_vmm_mapping(vcpu);
568}
569static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
570{
571 kvm_purge_vmm_mapping(vcpu);
572 vti_set_rr6(vcpu->arch.host_rr6);
573}
574
575static int vti_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
576{
577 union context *host_ctx, *guest_ctx;
578 int r;
579
580 /*Get host and guest context with guest address space.*/
581 host_ctx = kvm_get_host_context(vcpu);
582 guest_ctx = kvm_get_guest_context(vcpu);
583
584 r = kvm_vcpu_pre_transition(vcpu);
585 if (r < 0)
586 goto out;
587 kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
588 kvm_vcpu_post_transition(vcpu);
589 r = 0;
590out:
591 return r;
592}
593
594static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
595{
596 int r;
597
598again:
599 preempt_disable();
600
601 kvm_prepare_guest_switch(vcpu);
602 local_irq_disable();
603
604 if (signal_pending(current)) {
605 local_irq_enable();
606 preempt_enable();
607 r = -EINTR;
608 kvm_run->exit_reason = KVM_EXIT_INTR;
609 goto out;
610 }
611
612 vcpu->guest_mode = 1;
613 kvm_guest_enter();
614
615 r = vti_vcpu_run(vcpu, kvm_run);
616 if (r < 0) {
617 local_irq_enable();
618 preempt_enable();
619 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
620 goto out;
621 }
622
623 vcpu->arch.launched = 1;
624 vcpu->guest_mode = 0;
625 local_irq_enable();
626
627 /*
628 * We must have an instruction between local_irq_enable() and
629 * kvm_guest_exit(), so the timer interrupt isn't delayed by
630 * the interrupt shadow. The stat.exits increment will do nicely.
631 * But we need to prevent reordering, hence this barrier():
632 */
633 barrier();
634
635 kvm_guest_exit();
636
637 preempt_enable();
638
639 r = kvm_handle_exit(kvm_run, vcpu);
640
641 if (r > 0) {
642 if (!need_resched())
643 goto again;
644 }
645
646out:
647 if (r > 0) {
648 kvm_resched(vcpu);
649 goto again;
650 }
651
652 return r;
653}
654
655static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
656{
657 struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
658
659 if (!vcpu->mmio_is_write)
660 memcpy(&p->data, vcpu->mmio_data, 8);
661 p->state = STATE_IORESP_READY;
662}
663
664int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
665{
666 int r;
667 sigset_t sigsaved;
668
669 vcpu_load(vcpu);
670
671 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
672 kvm_vcpu_block(vcpu);
673 vcpu_put(vcpu);
674 return -EAGAIN;
675 }
676
677 if (vcpu->sigset_active)
678 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
679
680 if (vcpu->mmio_needed) {
681 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
682 kvm_set_mmio_data(vcpu);
683 vcpu->mmio_read_completed = 1;
684 vcpu->mmio_needed = 0;
685 }
686 r = __vcpu_run(vcpu, kvm_run);
687
688 if (vcpu->sigset_active)
689 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
690
691 vcpu_put(vcpu);
692 return r;
693}
694
695/*
696 * Allocate 16M memory for every vm to hold its specific data.
697 * Its memory map is defined in kvm_host.h.
698 */
699static struct kvm *kvm_alloc_kvm(void)
700{
701
702 struct kvm *kvm;
703 uint64_t vm_base;
704
705 vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
706
707 if (!vm_base)
708 return ERR_PTR(-ENOMEM);
709 printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base);
710
711 /* Zero all pages before use! */
712 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
713
714 kvm = (struct kvm *)(vm_base + KVM_VM_OFS);
715 kvm->arch.vm_base = vm_base;
716
717 return kvm;
718}
719
720struct kvm_io_range {
721 unsigned long start;
722 unsigned long size;
723 unsigned long type;
724};
725
726static const struct kvm_io_range io_ranges[] = {
727 {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER},
728 {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO},
729 {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO},
730 {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC},
731 {PIB_START, PIB_SIZE, GPFN_PIB},
732};
733
734static void kvm_build_io_pmt(struct kvm *kvm)
735{
736 unsigned long i, j;
737
738 /* Mark I/O ranges */
739 for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range));
740 i++) {
741 for (j = io_ranges[i].start;
742 j < io_ranges[i].start + io_ranges[i].size;
743 j += PAGE_SIZE)
744 kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT,
745 io_ranges[i].type, 0);
746 }
747
748}
749
750/*Use unused rids to virtualize guest rid.*/
751#define GUEST_PHYSICAL_RR0 0x1739
752#define GUEST_PHYSICAL_RR4 0x2739
753#define VMM_INIT_RR 0x1660
754
755static void kvm_init_vm(struct kvm *kvm)
756{
757 long vm_base;
758
759 BUG_ON(!kvm);
760
761 kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
762 kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
763 kvm->arch.vmm_init_rr = VMM_INIT_RR;
764
765 vm_base = kvm->arch.vm_base;
766 if (vm_base) {
767 kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS;
768 kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS;
769 kvm->arch.vpd_base = vm_base + KVM_VPD_OFS;
770 }
771
772 /*
773 *Fill P2M entries for MMIO/IO ranges
774 */
775 kvm_build_io_pmt(kvm);
776
777}
778
779struct kvm *kvm_arch_create_vm(void)
780{
781 struct kvm *kvm = kvm_alloc_kvm();
782
783 if (IS_ERR(kvm))
784 return ERR_PTR(-ENOMEM);
785 kvm_init_vm(kvm);
786
787 return kvm;
788
789}
790
791static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
792 struct kvm_irqchip *chip)
793{
794 int r;
795
796 r = 0;
797 switch (chip->chip_id) {
798 case KVM_IRQCHIP_IOAPIC:
799 memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
800 sizeof(struct kvm_ioapic_state));
801 break;
802 default:
803 r = -EINVAL;
804 break;
805 }
806 return r;
807}
808
809static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
810{
811 int r;
812
813 r = 0;
814 switch (chip->chip_id) {
815 case KVM_IRQCHIP_IOAPIC:
816 memcpy(ioapic_irqchip(kvm),
817 &chip->chip.ioapic,
818 sizeof(struct kvm_ioapic_state));
819 break;
820 default:
821 r = -EINVAL;
822 break;
823 }
824 return r;
825}
826
827#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x
828
829int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
830{
831 int i;
832 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
833 int r;
834
835 vcpu_load(vcpu);
836
837 for (i = 0; i < 16; i++) {
838 vpd->vgr[i] = regs->vpd.vgr[i];
839 vpd->vbgr[i] = regs->vpd.vbgr[i];
840 }
841 for (i = 0; i < 128; i++)
842 vpd->vcr[i] = regs->vpd.vcr[i];
843 vpd->vhpi = regs->vpd.vhpi;
844 vpd->vnat = regs->vpd.vnat;
845 vpd->vbnat = regs->vpd.vbnat;
846 vpd->vpsr = regs->vpd.vpsr;
847
848 vpd->vpr = regs->vpd.vpr;
849
850 r = -EFAULT;
851 r = copy_from_user(&vcpu->arch.guest, regs->saved_guest,
852 sizeof(union context));
853 if (r)
854 goto out;
855 r = copy_from_user(vcpu + 1, regs->saved_stack +
856 sizeof(struct kvm_vcpu),
857 IA64_STK_OFFSET - sizeof(struct kvm_vcpu));
858 if (r)
859 goto out;
860 vcpu->arch.exit_data =
861 ((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data;
862
863 RESTORE_REGS(mp_state);
864 RESTORE_REGS(vmm_rr);
865 memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS);
866 memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS);
867 RESTORE_REGS(itr_regions);
868 RESTORE_REGS(dtr_regions);
869 RESTORE_REGS(tc_regions);
870 RESTORE_REGS(irq_check);
871 RESTORE_REGS(itc_check);
872 RESTORE_REGS(timer_check);
873 RESTORE_REGS(timer_pending);
874 RESTORE_REGS(last_itc);
875 for (i = 0; i < 8; i++) {
876 vcpu->arch.vrr[i] = regs->vrr[i];
877 vcpu->arch.ibr[i] = regs->ibr[i];
878 vcpu->arch.dbr[i] = regs->dbr[i];
879 }
880 for (i = 0; i < 4; i++)
881 vcpu->arch.insvc[i] = regs->insvc[i];
882 RESTORE_REGS(xtp);
883 RESTORE_REGS(metaphysical_rr0);
884 RESTORE_REGS(metaphysical_rr4);
885 RESTORE_REGS(metaphysical_saved_rr0);
886 RESTORE_REGS(metaphysical_saved_rr4);
887 RESTORE_REGS(fp_psr);
888 RESTORE_REGS(saved_gp);
889
890 vcpu->arch.irq_new_pending = 1;
891 vcpu->arch.itc_offset = regs->saved_itc - ia64_getreg(_IA64_REG_AR_ITC);
892 set_bit(KVM_REQ_RESUME, &vcpu->requests);
893
894 vcpu_put(vcpu);
895 r = 0;
896out:
897 return r;
898}
899
900long kvm_arch_vm_ioctl(struct file *filp,
901 unsigned int ioctl, unsigned long arg)
902{
903 struct kvm *kvm = filp->private_data;
904 void __user *argp = (void __user *)arg;
905 int r = -EINVAL;
906
907 switch (ioctl) {
908 case KVM_SET_MEMORY_REGION: {
909 struct kvm_memory_region kvm_mem;
910 struct kvm_userspace_memory_region kvm_userspace_mem;
911
912 r = -EFAULT;
913 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
914 goto out;
915 kvm_userspace_mem.slot = kvm_mem.slot;
916 kvm_userspace_mem.flags = kvm_mem.flags;
917 kvm_userspace_mem.guest_phys_addr =
918 kvm_mem.guest_phys_addr;
919 kvm_userspace_mem.memory_size = kvm_mem.memory_size;
920 r = kvm_vm_ioctl_set_memory_region(kvm,
921 &kvm_userspace_mem, 0);
922 if (r)
923 goto out;
924 break;
925 }
926 case KVM_CREATE_IRQCHIP:
927 r = -EFAULT;
928 r = kvm_ioapic_init(kvm);
929 if (r)
930 goto out;
931 break;
932 case KVM_IRQ_LINE: {
933 struct kvm_irq_level irq_event;
934
935 r = -EFAULT;
936 if (copy_from_user(&irq_event, argp, sizeof irq_event))
937 goto out;
938 if (irqchip_in_kernel(kvm)) {
939 mutex_lock(&kvm->lock);
940 kvm_ioapic_set_irq(kvm->arch.vioapic,
941 irq_event.irq,
942 irq_event.level);
943 mutex_unlock(&kvm->lock);
944 r = 0;
945 }
946 break;
947 }
948 case KVM_GET_IRQCHIP: {
949 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
950 struct kvm_irqchip chip;
951
952 r = -EFAULT;
953 if (copy_from_user(&chip, argp, sizeof chip))
954 goto out;
955 r = -ENXIO;
956 if (!irqchip_in_kernel(kvm))
957 goto out;
958 r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
959 if (r)
960 goto out;
961 r = -EFAULT;
962 if (copy_to_user(argp, &chip, sizeof chip))
963 goto out;
964 r = 0;
965 break;
966 }
967 case KVM_SET_IRQCHIP: {
968 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
969 struct kvm_irqchip chip;
970
971 r = -EFAULT;
972 if (copy_from_user(&chip, argp, sizeof chip))
973 goto out;
974 r = -ENXIO;
975 if (!irqchip_in_kernel(kvm))
976 goto out;
977 r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
978 if (r)
979 goto out;
980 r = 0;
981 break;
982 }
983 default:
984 ;
985 }
986out:
987 return r;
988}
989
990int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
991 struct kvm_sregs *sregs)
992{
993 return -EINVAL;
994}
995
996int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
997 struct kvm_sregs *sregs)
998{
999 return -EINVAL;
1000
1001}
1002int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1003 struct kvm_translation *tr)
1004{
1005
1006 return -EINVAL;
1007}
1008
1009static int kvm_alloc_vmm_area(void)
1010{
1011 if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) {
1012 kvm_vmm_base = __get_free_pages(GFP_KERNEL,
1013 get_order(KVM_VMM_SIZE));
1014 if (!kvm_vmm_base)
1015 return -ENOMEM;
1016
1017 memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
1018 kvm_vm_buffer = kvm_vmm_base + VMM_SIZE;
1019
1020 printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n",
1021 kvm_vmm_base, kvm_vm_buffer);
1022 }
1023
1024 return 0;
1025}
1026
1027static void kvm_free_vmm_area(void)
1028{
1029 if (kvm_vmm_base) {
1030 /*Zero this area before free to avoid bits leak!!*/
1031 memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
1032 free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE));
1033 kvm_vmm_base = 0;
1034 kvm_vm_buffer = 0;
1035 kvm_vsa_base = 0;
1036 }
1037}
1038
1039/*
1040 * Make sure that a cpu that is being hot-unplugged does not have any vcpus
1041 * cached on it. Leave it as blank for IA64.
1042 */
1043void decache_vcpus_on_cpu(int cpu)
1044{
1045}
1046
1047static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1048{
1049}
1050
1051static int vti_init_vpd(struct kvm_vcpu *vcpu)
1052{
1053 int i;
1054 union cpuid3_t cpuid3;
1055 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1056
1057 if (IS_ERR(vpd))
1058 return PTR_ERR(vpd);
1059
1060 /* CPUID init */
1061 for (i = 0; i < 5; i++)
1062 vpd->vcpuid[i] = ia64_get_cpuid(i);
1063
1064 /* Limit the CPUID number to 5 */
1065 cpuid3.value = vpd->vcpuid[3];
1066 cpuid3.number = 4; /* 5 - 1 */
1067 vpd->vcpuid[3] = cpuid3.value;
1068
1069 /*Set vac and vdc fields*/
1070 vpd->vac.a_from_int_cr = 1;
1071 vpd->vac.a_to_int_cr = 1;
1072 vpd->vac.a_from_psr = 1;
1073 vpd->vac.a_from_cpuid = 1;
1074 vpd->vac.a_cover = 1;
1075 vpd->vac.a_bsw = 1;
1076 vpd->vac.a_int = 1;
1077 vpd->vdc.d_vmsw = 1;
1078
1079 /*Set virtual buffer*/
1080 vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE;
1081
1082 return 0;
1083}
1084
1085static int vti_create_vp(struct kvm_vcpu *vcpu)
1086{
1087 long ret;
1088 struct vpd *vpd = vcpu->arch.vpd;
1089 unsigned long vmm_ivt;
1090
1091 vmm_ivt = kvm_vmm_info->vmm_ivt;
1092
1093 printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt);
1094
1095 ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0);
1096
1097 if (ret) {
1098 printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n");
1099 return -EINVAL;
1100 }
1101 return 0;
1102}
1103
1104static void init_ptce_info(struct kvm_vcpu *vcpu)
1105{
1106 ia64_ptce_info_t ptce = {0};
1107
1108 ia64_get_ptce(&ptce);
1109 vcpu->arch.ptce_base = ptce.base;
1110 vcpu->arch.ptce_count[0] = ptce.count[0];
1111 vcpu->arch.ptce_count[1] = ptce.count[1];
1112 vcpu->arch.ptce_stride[0] = ptce.stride[0];
1113 vcpu->arch.ptce_stride[1] = ptce.stride[1];
1114}
1115
1116static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu)
1117{
1118 struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
1119
1120 if (hrtimer_cancel(p_ht))
1121 hrtimer_start(p_ht, p_ht->expires, HRTIMER_MODE_ABS);
1122}
1123
1124static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
1125{
1126 struct kvm_vcpu *vcpu;
1127 wait_queue_head_t *q;
1128
1129 vcpu = container_of(data, struct kvm_vcpu, arch.hlt_timer);
1130 if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
1131 goto out;
1132
1133 q = &vcpu->wq;
1134 if (waitqueue_active(q)) {
1135 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
1136 wake_up_interruptible(q);
1137 }
1138out:
1139 vcpu->arch.timer_check = 1;
1140 return HRTIMER_NORESTART;
1141}
1142
1143#define PALE_RESET_ENTRY 0x80000000ffffffb0UL
1144
1145int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1146{
1147 struct kvm_vcpu *v;
1148 int r;
1149 int i;
1150 long itc_offset;
1151 struct kvm *kvm = vcpu->kvm;
1152 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1153
1154 union context *p_ctx = &vcpu->arch.guest;
1155 struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu);
1156
1157 /*Init vcpu context for first run.*/
1158 if (IS_ERR(vmm_vcpu))
1159 return PTR_ERR(vmm_vcpu);
1160
1161 if (vcpu->vcpu_id == 0) {
1162 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
1163
1164 /*Set entry address for first run.*/
1165 regs->cr_iip = PALE_RESET_ENTRY;
1166
1167 /*Initilize itc offset for vcpus*/
1168 itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
1169 for (i = 0; i < MAX_VCPU_NUM; i++) {
1170 v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
1171 v->arch.itc_offset = itc_offset;
1172 v->arch.last_itc = 0;
1173 }
1174 } else
1175 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
1176
1177 r = -ENOMEM;
1178 vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL);
1179 if (!vcpu->arch.apic)
1180 goto out;
1181 vcpu->arch.apic->vcpu = vcpu;
1182
1183 p_ctx->gr[1] = 0;
1184 p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET);
1185 p_ctx->gr[13] = (unsigned long)vmm_vcpu;
1186 p_ctx->psr = 0x1008522000UL;
1187 p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
1188 p_ctx->caller_unat = 0;
1189 p_ctx->pr = 0x0;
1190 p_ctx->ar[36] = 0x0; /*unat*/
1191 p_ctx->ar[19] = 0x0; /*rnat*/
1192 p_ctx->ar[18] = (unsigned long)vmm_vcpu +
1193 ((sizeof(struct kvm_vcpu)+15) & ~15);
1194 p_ctx->ar[64] = 0x0; /*pfs*/
1195 p_ctx->cr[0] = 0x7e04UL;
1196 p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt;
1197 p_ctx->cr[8] = 0x3c;
1198
1199 /*Initilize region register*/
1200 p_ctx->rr[0] = 0x30;
1201 p_ctx->rr[1] = 0x30;
1202 p_ctx->rr[2] = 0x30;
1203 p_ctx->rr[3] = 0x30;
1204 p_ctx->rr[4] = 0x30;
1205 p_ctx->rr[5] = 0x30;
1206 p_ctx->rr[7] = 0x30;
1207
1208 /*Initilize branch register 0*/
1209 p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry;
1210
1211 vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr;
1212 vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0;
1213 vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4;
1214
1215 hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1216 vcpu->arch.hlt_timer.function = hlt_timer_fn;
1217
1218 vcpu->arch.last_run_cpu = -1;
1219 vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id);
1220 vcpu->arch.vsa_base = kvm_vsa_base;
1221 vcpu->arch.__gp = kvm_vmm_gp;
1222 vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
1223 vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id);
1224 vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id);
1225 init_ptce_info(vcpu);
1226
1227 r = 0;
1228out:
1229 return r;
1230}
1231
1232static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
1233{
1234 unsigned long psr;
1235 int r;
1236
1237 local_irq_save(psr);
1238 r = kvm_insert_vmm_mapping(vcpu);
1239 if (r)
1240 goto fail;
1241 r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
1242 if (r)
1243 goto fail;
1244
1245 r = vti_init_vpd(vcpu);
1246 if (r) {
1247 printk(KERN_DEBUG"kvm: vpd init error!!\n");
1248 goto uninit;
1249 }
1250
1251 r = vti_create_vp(vcpu);
1252 if (r)
1253 goto uninit;
1254
1255 kvm_purge_vmm_mapping(vcpu);
1256 local_irq_restore(psr);
1257
1258 return 0;
1259uninit:
1260 kvm_vcpu_uninit(vcpu);
1261fail:
1262 return r;
1263}
1264
1265struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1266 unsigned int id)
1267{
1268 struct kvm_vcpu *vcpu;
1269 unsigned long vm_base = kvm->arch.vm_base;
1270 int r;
1271 int cpu;
1272
1273 r = -ENOMEM;
1274 if (!vm_base) {
1275 printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
1276 goto fail;
1277 }
1278 vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id);
1279 vcpu->kvm = kvm;
1280
1281 cpu = get_cpu();
1282 vti_vcpu_load(vcpu, cpu);
1283 r = vti_vcpu_setup(vcpu, id);
1284 put_cpu();
1285
1286 if (r) {
1287 printk(KERN_DEBUG"kvm: vcpu_setup error!!\n");
1288 goto fail;
1289 }
1290
1291 return vcpu;
1292fail:
1293 return ERR_PTR(r);
1294}
1295
1296int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1297{
1298 return 0;
1299}
1300
1301int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1302{
1303 return -EINVAL;
1304}
1305
1306int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1307{
1308 return -EINVAL;
1309}
1310
1311int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
1312 struct kvm_debug_guest *dbg)
1313{
1314 return -EINVAL;
1315}
1316
1317static void free_kvm(struct kvm *kvm)
1318{
1319 unsigned long vm_base = kvm->arch.vm_base;
1320
1321 if (vm_base) {
1322 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
1323 free_pages(vm_base, get_order(KVM_VM_DATA_SIZE));
1324 }
1325
1326}
1327
1328static void kvm_release_vm_pages(struct kvm *kvm)
1329{
1330 struct kvm_memory_slot *memslot;
1331 int i, j;
1332 unsigned long base_gfn;
1333
1334 for (i = 0; i < kvm->nmemslots; i++) {
1335 memslot = &kvm->memslots[i];
1336 base_gfn = memslot->base_gfn;
1337
1338 for (j = 0; j < memslot->npages; j++) {
1339 if (memslot->rmap[j])
1340 put_page((struct page *)memslot->rmap[j]);
1341 }
1342 }
1343}
1344
1345void kvm_arch_destroy_vm(struct kvm *kvm)
1346{
1347 kfree(kvm->arch.vioapic);
1348 kvm_release_vm_pages(kvm);
1349 kvm_free_physmem(kvm);
1350 free_kvm(kvm);
1351}
1352
1353void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1354{
1355}
1356
1357void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1358{
1359 if (cpu != vcpu->cpu) {
1360 vcpu->cpu = cpu;
1361 if (vcpu->arch.ht_active)
1362 kvm_migrate_hlt_timer(vcpu);
1363 }
1364}
1365
1366#define SAVE_REGS(_x) regs->_x = vcpu->arch._x
1367
1368int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1369{
1370 int i;
1371 int r;
1372 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1373 vcpu_load(vcpu);
1374
1375 for (i = 0; i < 16; i++) {
1376 regs->vpd.vgr[i] = vpd->vgr[i];
1377 regs->vpd.vbgr[i] = vpd->vbgr[i];
1378 }
1379 for (i = 0; i < 128; i++)
1380 regs->vpd.vcr[i] = vpd->vcr[i];
1381 regs->vpd.vhpi = vpd->vhpi;
1382 regs->vpd.vnat = vpd->vnat;
1383 regs->vpd.vbnat = vpd->vbnat;
1384 regs->vpd.vpsr = vpd->vpsr;
1385 regs->vpd.vpr = vpd->vpr;
1386
1387 r = -EFAULT;
1388 r = copy_to_user(regs->saved_guest, &vcpu->arch.guest,
1389 sizeof(union context));
1390 if (r)
1391 goto out;
1392 r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET);
1393 if (r)
1394 goto out;
1395 SAVE_REGS(mp_state);
1396 SAVE_REGS(vmm_rr);
1397 memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
1398 memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS);
1399 SAVE_REGS(itr_regions);
1400 SAVE_REGS(dtr_regions);
1401 SAVE_REGS(tc_regions);
1402 SAVE_REGS(irq_check);
1403 SAVE_REGS(itc_check);
1404 SAVE_REGS(timer_check);
1405 SAVE_REGS(timer_pending);
1406 SAVE_REGS(last_itc);
1407 for (i = 0; i < 8; i++) {
1408 regs->vrr[i] = vcpu->arch.vrr[i];
1409 regs->ibr[i] = vcpu->arch.ibr[i];
1410 regs->dbr[i] = vcpu->arch.dbr[i];
1411 }
1412 for (i = 0; i < 4; i++)
1413 regs->insvc[i] = vcpu->arch.insvc[i];
1414 regs->saved_itc = vcpu->arch.itc_offset + ia64_getreg(_IA64_REG_AR_ITC);
1415 SAVE_REGS(xtp);
1416 SAVE_REGS(metaphysical_rr0);
1417 SAVE_REGS(metaphysical_rr4);
1418 SAVE_REGS(metaphysical_saved_rr0);
1419 SAVE_REGS(metaphysical_saved_rr4);
1420 SAVE_REGS(fp_psr);
1421 SAVE_REGS(saved_gp);
1422 vcpu_put(vcpu);
1423 r = 0;
1424out:
1425 return r;
1426}
1427
1428void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
1429{
1430
1431 hrtimer_cancel(&vcpu->arch.hlt_timer);
1432 kfree(vcpu->arch.apic);
1433}
1434
1435
1436long kvm_arch_vcpu_ioctl(struct file *filp,
1437 unsigned int ioctl, unsigned long arg)
1438{
1439 return -EINVAL;
1440}
1441
1442int kvm_arch_set_memory_region(struct kvm *kvm,
1443 struct kvm_userspace_memory_region *mem,
1444 struct kvm_memory_slot old,
1445 int user_alloc)
1446{
1447 unsigned long i;
1448 struct page *page;
1449 int npages = mem->memory_size >> PAGE_SHIFT;
1450 struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
1451 unsigned long base_gfn = memslot->base_gfn;
1452
1453 for (i = 0; i < npages; i++) {
1454 page = gfn_to_page(kvm, base_gfn + i);
1455 kvm_set_pmt_entry(kvm, base_gfn + i,
1456 page_to_pfn(page) << PAGE_SHIFT,
1457 _PAGE_AR_RWX|_PAGE_MA_WB);
1458 memslot->rmap[i] = (unsigned long)page;
1459 }
1460
1461 return 0;
1462}
1463
1464
1465long kvm_arch_dev_ioctl(struct file *filp,
1466 unsigned int ioctl, unsigned long arg)
1467{
1468 return -EINVAL;
1469}
1470
1471void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1472{
1473 kvm_vcpu_uninit(vcpu);
1474}
1475
1476static int vti_cpu_has_kvm_support(void)
1477{
1478 long avail = 1, status = 1, control = 1;
1479 long ret;
1480
1481 ret = ia64_pal_proc_get_features(&avail, &status, &control, 0);
1482 if (ret)
1483 goto out;
1484
1485 if (!(avail & PAL_PROC_VM_BIT))
1486 goto out;
1487
1488 printk(KERN_DEBUG"kvm: Hardware Supports VT\n");
1489
1490 ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info);
1491 if (ret)
1492 goto out;
1493 printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size);
1494
1495 if (!(vp_env_info & VP_OPCODE)) {
1496 printk(KERN_WARNING"kvm: No opcode ability on hardware, "
1497 "vm_env_info:0x%lx\n", vp_env_info);
1498 }
1499
1500 return 1;
1501out:
1502 return 0;
1503}
1504
1505static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
1506 struct module *module)
1507{
1508 unsigned long module_base;
1509 unsigned long vmm_size;
1510
1511 unsigned long vmm_offset, func_offset, fdesc_offset;
1512 struct fdesc *p_fdesc;
1513
1514 BUG_ON(!module);
1515
1516 if (!kvm_vmm_base) {
1517 printk("kvm: kvm area hasn't been initilized yet!!\n");
1518 return -EFAULT;
1519 }
1520
1521 /*Calculate new position of relocated vmm module.*/
1522 module_base = (unsigned long)module->module_core;
1523 vmm_size = module->core_size;
1524 if (unlikely(vmm_size > KVM_VMM_SIZE))
1525 return -EFAULT;
1526
1527 memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
1528 kvm_flush_icache(kvm_vmm_base, vmm_size);
1529
1530 /*Recalculate kvm_vmm_info based on new VMM*/
1531 vmm_offset = vmm_info->vmm_ivt - module_base;
1532 kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset;
1533 printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n",
1534 kvm_vmm_info->vmm_ivt);
1535
1536 fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base;
1537 kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE +
1538 fdesc_offset);
1539 func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base;
1540 p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
1541 p_fdesc->ip = KVM_VMM_BASE + func_offset;
1542 p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base);
1543
1544 printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n",
1545 KVM_VMM_BASE+func_offset);
1546
1547 fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base;
1548 kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE +
1549 fdesc_offset);
1550 func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base;
1551 p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
1552 p_fdesc->ip = KVM_VMM_BASE + func_offset;
1553 p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base);
1554
1555 kvm_vmm_gp = p_fdesc->gp;
1556
1557 printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n",
1558 kvm_vmm_info->vmm_entry);
1559 printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n",
1560 KVM_VMM_BASE + func_offset);
1561
1562 return 0;
1563}
1564
1565int kvm_arch_init(void *opaque)
1566{
1567 int r;
1568 struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque;
1569
1570 if (!vti_cpu_has_kvm_support()) {
1571 printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n");
1572 r = -EOPNOTSUPP;
1573 goto out;
1574 }
1575
1576 if (kvm_vmm_info) {
1577 printk(KERN_ERR "kvm: Already loaded VMM module!\n");
1578 r = -EEXIST;
1579 goto out;
1580 }
1581
1582 r = -ENOMEM;
1583 kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL);
1584 if (!kvm_vmm_info)
1585 goto out;
1586
1587 if (kvm_alloc_vmm_area())
1588 goto out_free0;
1589
1590 r = kvm_relocate_vmm(vmm_info, vmm_info->module);
1591 if (r)
1592 goto out_free1;
1593
1594 return 0;
1595
1596out_free1:
1597 kvm_free_vmm_area();
1598out_free0:
1599 kfree(kvm_vmm_info);
1600out:
1601 return r;
1602}
1603
1604void kvm_arch_exit(void)
1605{
1606 kvm_free_vmm_area();
1607 kfree(kvm_vmm_info);
1608 kvm_vmm_info = NULL;
1609}
1610
1611static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
1612 struct kvm_dirty_log *log)
1613{
1614 struct kvm_memory_slot *memslot;
1615 int r, i;
1616 long n, base;
1617 unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS
1618 + KVM_MEM_DIRTY_LOG_OFS);
1619
1620 r = -EINVAL;
1621 if (log->slot >= KVM_MEMORY_SLOTS)
1622 goto out;
1623
1624 memslot = &kvm->memslots[log->slot];
1625 r = -ENOENT;
1626 if (!memslot->dirty_bitmap)
1627 goto out;
1628
1629 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
1630 base = memslot->base_gfn / BITS_PER_LONG;
1631
1632 for (i = 0; i < n/sizeof(long); ++i) {
1633 memslot->dirty_bitmap[i] = dirty_bitmap[base + i];
1634 dirty_bitmap[base + i] = 0;
1635 }
1636 r = 0;
1637out:
1638 return r;
1639}
1640
1641int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1642 struct kvm_dirty_log *log)
1643{
1644 int r;
1645 int n;
1646 struct kvm_memory_slot *memslot;
1647 int is_dirty = 0;
1648
1649 spin_lock(&kvm->arch.dirty_log_lock);
1650
1651 r = kvm_ia64_sync_dirty_log(kvm, log);
1652 if (r)
1653 goto out;
1654
1655 r = kvm_get_dirty_log(kvm, log, &is_dirty);
1656 if (r)
1657 goto out;
1658
1659 /* If nothing is dirty, don't bother messing with page tables. */
1660 if (is_dirty) {
1661 kvm_flush_remote_tlbs(kvm);
1662 memslot = &kvm->memslots[log->slot];
1663 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
1664 memset(memslot->dirty_bitmap, 0, n);
1665 }
1666 r = 0;
1667out:
1668 spin_unlock(&kvm->arch.dirty_log_lock);
1669 return r;
1670}
1671
1672int kvm_arch_hardware_setup(void)
1673{
1674 return 0;
1675}
1676
1677void kvm_arch_hardware_unsetup(void)
1678{
1679}
1680
1681static void vcpu_kick_intr(void *info)
1682{
1683#ifdef DEBUG
1684 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
1685 printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu);
1686#endif
1687}
1688
1689void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
1690{
1691 int ipi_pcpu = vcpu->cpu;
1692
1693 if (waitqueue_active(&vcpu->wq))
1694 wake_up_interruptible(&vcpu->wq);
1695
1696 if (vcpu->guest_mode)
1697 smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
1698}
1699
1700int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig)
1701{
1702
1703 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1704
1705 if (!test_and_set_bit(vec, &vpd->irr[0])) {
1706 vcpu->arch.irq_new_pending = 1;
1707 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
1708 kvm_vcpu_kick(vcpu);
1709 else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) {
1710 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
1711 if (waitqueue_active(&vcpu->wq))
1712 wake_up_interruptible(&vcpu->wq);
1713 }
1714 return 1;
1715 }
1716 return 0;
1717}
1718
1719int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
1720{
1721 return apic->vcpu->vcpu_id == dest;
1722}
1723
1724int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
1725{
1726 return 0;
1727}
1728
1729struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
1730 unsigned long bitmap)
1731{
1732 struct kvm_vcpu *lvcpu = kvm->vcpus[0];
1733 int i;
1734
1735 for (i = 1; i < KVM_MAX_VCPUS; i++) {
1736 if (!kvm->vcpus[i])
1737 continue;
1738 if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp)
1739 lvcpu = kvm->vcpus[i];
1740 }
1741
1742 return lvcpu;
1743}
1744
1745static int find_highest_bits(int *dat)
1746{
1747 u32 bits, bitnum;
1748 int i;
1749
1750 /* loop for all 256 bits */
1751 for (i = 7; i >= 0 ; i--) {
1752 bits = dat[i];
1753 if (bits) {
1754 bitnum = fls(bits);
1755 return i * 32 + bitnum - 1;
1756 }
1757 }
1758
1759 return -1;
1760}
1761
1762int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
1763{
1764 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1765
1766 if (vpd->irr[0] & (1UL << NMI_VECTOR))
1767 return NMI_VECTOR;
1768 if (vpd->irr[0] & (1UL << ExtINT_VECTOR))
1769 return ExtINT_VECTOR;
1770
1771 return find_highest_bits((int *)&vpd->irr[0]);
1772}
1773
1774int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
1775{
1776 if (kvm_highest_pending_irq(vcpu) != -1)
1777 return 1;
1778 return 0;
1779}
1780
1781int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
1782{
1783 return 0;
1784}
1785
1786gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
1787{
1788 return gfn;
1789}
1790
1791int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1792{
1793 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE;
1794}
1795
1796int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1797 struct kvm_mp_state *mp_state)
1798{
1799 return -EINVAL;
1800}
1801
1802int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1803 struct kvm_mp_state *mp_state)
1804{
1805 return -EINVAL;
1806}
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
new file mode 100644
index 000000000000..091f936c4485
--- /dev/null
+++ b/arch/ia64/kvm/kvm_fw.c
@@ -0,0 +1,500 @@
1/*
2 * PAL/SAL call delegation
3 *
4 * Copyright (c) 2004 Li Susie <susie.li@intel.com>
5 * Copyright (c) 2005 Yu Ke <ke.yu@intel.com>
6 * Copyright (c) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 * Place - Suite 330, Boston, MA 02111-1307 USA.
20 */
21
22#include <linux/kvm_host.h>
23#include <linux/smp.h>
24
25#include "vti.h"
26#include "misc.h"
27
28#include <asm/pal.h>
29#include <asm/sal.h>
30#include <asm/tlb.h>
31
32/*
33 * Handy macros to make sure that the PAL return values start out
34 * as something meaningful.
35 */
36#define INIT_PAL_STATUS_UNIMPLEMENTED(x) \
37 { \
38 x.status = PAL_STATUS_UNIMPLEMENTED; \
39 x.v0 = 0; \
40 x.v1 = 0; \
41 x.v2 = 0; \
42 }
43
44#define INIT_PAL_STATUS_SUCCESS(x) \
45 { \
46 x.status = PAL_STATUS_SUCCESS; \
47 x.v0 = 0; \
48 x.v1 = 0; \
49 x.v2 = 0; \
50 }
51
52static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu,
53 u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) {
54 struct exit_ctl_data *p;
55
56 if (vcpu) {
57 p = &vcpu->arch.exit_data;
58 if (p->exit_reason == EXIT_REASON_PAL_CALL) {
59 *gr28 = p->u.pal_data.gr28;
60 *gr29 = p->u.pal_data.gr29;
61 *gr30 = p->u.pal_data.gr30;
62 *gr31 = p->u.pal_data.gr31;
63 return ;
64 }
65 }
66 printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n");
67}
68
69static void set_pal_result(struct kvm_vcpu *vcpu,
70 struct ia64_pal_retval result) {
71
72 struct exit_ctl_data *p;
73
74 p = kvm_get_exit_data(vcpu);
75 if (p && p->exit_reason == EXIT_REASON_PAL_CALL) {
76 p->u.pal_data.ret = result;
77 return ;
78 }
79 INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret);
80}
81
82static void set_sal_result(struct kvm_vcpu *vcpu,
83 struct sal_ret_values result) {
84 struct exit_ctl_data *p;
85
86 p = kvm_get_exit_data(vcpu);
87 if (p && p->exit_reason == EXIT_REASON_SAL_CALL) {
88 p->u.sal_data.ret = result;
89 return ;
90 }
91 printk(KERN_WARNING"Failed to set sal result!!\n");
92}
93
94struct cache_flush_args {
95 u64 cache_type;
96 u64 operation;
97 u64 progress;
98 long status;
99};
100
101cpumask_t cpu_cache_coherent_map;
102
103static void remote_pal_cache_flush(void *data)
104{
105 struct cache_flush_args *args = data;
106 long status;
107 u64 progress = args->progress;
108
109 status = ia64_pal_cache_flush(args->cache_type, args->operation,
110 &progress, NULL);
111 if (status != 0)
112 args->status = status;
113}
114
115static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu)
116{
117 u64 gr28, gr29, gr30, gr31;
118 struct ia64_pal_retval result = {0, 0, 0, 0};
119 struct cache_flush_args args = {0, 0, 0, 0};
120 long psr;
121
122 gr28 = gr29 = gr30 = gr31 = 0;
123 kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31);
124
125 if (gr31 != 0)
126 printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu);
127
128 /* Always call Host Pal in int=1 */
129 gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS;
130 args.cache_type = gr29;
131 args.operation = gr30;
132 smp_call_function(remote_pal_cache_flush,
133 (void *)&args, 1, 1);
134 if (args.status != 0)
135 printk(KERN_ERR"pal_cache_flush error!,"
136 "status:0x%lx\n", args.status);
137 /*
138 * Call Host PAL cache flush
139 * Clear psr.ic when call PAL_CACHE_FLUSH
140 */
141 local_irq_save(psr);
142 result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1,
143 &result.v0);
144 local_irq_restore(psr);
145 if (result.status != 0)
146 printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld"
147 "in1:%lx,in2:%lx\n",
148 vcpu, result.status, gr29, gr30);
149
150#if 0
151 if (gr29 == PAL_CACHE_TYPE_COHERENT) {
152 cpus_setall(vcpu->arch.cache_coherent_map);
153 cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map);
154 cpus_setall(cpu_cache_coherent_map);
155 cpu_clear(vcpu->cpu, cpu_cache_coherent_map);
156 }
157#endif
158 return result;
159}
160
161struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu)
162{
163
164 struct ia64_pal_retval result;
165
166 PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0);
167 return result;
168}
169
170static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu)
171{
172
173 struct ia64_pal_retval result;
174
175 PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0);
176
177 /*
178 * PAL_FREQ_BASE may not be implemented in some platforms,
179 * call SAL instead.
180 */
181 if (result.v0 == 0) {
182 result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
183 &result.v0,
184 &result.v1);
185 result.v2 = 0;
186 }
187
188 return result;
189}
190
191static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
192{
193
194 struct ia64_pal_retval result;
195
196 PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0);
197 return result;
198}
199
200static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu)
201{
202 struct ia64_pal_retval result;
203
204 INIT_PAL_STATUS_UNIMPLEMENTED(result);
205 return result;
206}
207
208static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu)
209{
210
211 struct ia64_pal_retval result;
212
213 INIT_PAL_STATUS_SUCCESS(result);
214 return result;
215}
216
217static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu)
218{
219
220 struct ia64_pal_retval result = {0, 0, 0, 0};
221 long in0, in1, in2, in3;
222
223 kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
224 result.status = ia64_pal_proc_get_features(&result.v0, &result.v1,
225 &result.v2, in2);
226
227 return result;
228}
229
230static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu)
231{
232
233 pal_cache_config_info_t ci;
234 long status;
235 unsigned long in0, in1, in2, in3, r9, r10;
236
237 kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
238 status = ia64_pal_cache_config_info(in1, in2, &ci);
239 r9 = ci.pcci_info_1.pcci1_data;
240 r10 = ci.pcci_info_2.pcci2_data;
241 return ((struct ia64_pal_retval){status, r9, r10, 0});
242}
243
244#define GUEST_IMPL_VA_MSB 59
245#define GUEST_RID_BITS 18
246
247static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu)
248{
249
250 pal_vm_info_1_u_t vminfo1;
251 pal_vm_info_2_u_t vminfo2;
252 struct ia64_pal_retval result;
253
254 PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0);
255 if (!result.status) {
256 vminfo1.pvi1_val = result.v0;
257 vminfo1.pal_vm_info_1_s.max_itr_entry = 8;
258 vminfo1.pal_vm_info_1_s.max_dtr_entry = 8;
259 result.v0 = vminfo1.pvi1_val;
260 vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB;
261 vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS;
262 result.v1 = vminfo2.pvi2_val;
263 }
264
265 return result;
266}
267
268static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu)
269{
270 struct ia64_pal_retval result;
271
272 INIT_PAL_STATUS_UNIMPLEMENTED(result);
273
274 return result;
275}
276
277static u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu)
278{
279 u64 index = 0;
280 struct exit_ctl_data *p;
281
282 p = kvm_get_exit_data(vcpu);
283 if (p && (p->exit_reason == EXIT_REASON_PAL_CALL))
284 index = p->u.pal_data.gr28;
285
286 return index;
287}
288
289int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
290{
291
292 u64 gr28;
293 struct ia64_pal_retval result;
294 int ret = 1;
295
296 gr28 = kvm_get_pal_call_index(vcpu);
297 /*printk("pal_call index:%lx\n",gr28);*/
298 switch (gr28) {
299 case PAL_CACHE_FLUSH:
300 result = pal_cache_flush(vcpu);
301 break;
302 case PAL_CACHE_SUMMARY:
303 result = pal_cache_summary(vcpu);
304 break;
305 case PAL_HALT_LIGHT:
306 {
307 vcpu->arch.timer_pending = 1;
308 INIT_PAL_STATUS_SUCCESS(result);
309 if (kvm_highest_pending_irq(vcpu) == -1)
310 ret = kvm_emulate_halt(vcpu);
311
312 }
313 break;
314
315 case PAL_FREQ_RATIOS:
316 result = pal_freq_ratios(vcpu);
317 break;
318
319 case PAL_FREQ_BASE:
320 result = pal_freq_base(vcpu);
321 break;
322
323 case PAL_LOGICAL_TO_PHYSICAL :
324 result = pal_logical_to_physica(vcpu);
325 break;
326
327 case PAL_VM_SUMMARY :
328 result = pal_vm_summary(vcpu);
329 break;
330
331 case PAL_VM_INFO :
332 result = pal_vm_info(vcpu);
333 break;
334 case PAL_PLATFORM_ADDR :
335 result = pal_platform_addr(vcpu);
336 break;
337 case PAL_CACHE_INFO:
338 result = pal_cache_info(vcpu);
339 break;
340 case PAL_PTCE_INFO:
341 INIT_PAL_STATUS_SUCCESS(result);
342 result.v1 = (1L << 32) | 1L;
343 break;
344 case PAL_VM_PAGE_SIZE:
345 result.status = ia64_pal_vm_page_size(&result.v0,
346 &result.v1);
347 break;
348 case PAL_RSE_INFO:
349 result.status = ia64_pal_rse_info(&result.v0,
350 (pal_hints_u_t *)&result.v1);
351 break;
352 case PAL_PROC_GET_FEATURES:
353 result = pal_proc_get_features(vcpu);
354 break;
355 case PAL_DEBUG_INFO:
356 result.status = ia64_pal_debug_info(&result.v0,
357 &result.v1);
358 break;
359 case PAL_VERSION:
360 result.status = ia64_pal_version(
361 (pal_version_u_t *)&result.v0,
362 (pal_version_u_t *)&result.v1);
363
364 break;
365 case PAL_FIXED_ADDR:
366 result.status = PAL_STATUS_SUCCESS;
367 result.v0 = vcpu->vcpu_id;
368 break;
369 default:
370 INIT_PAL_STATUS_UNIMPLEMENTED(result);
371 printk(KERN_WARNING"kvm: Unsupported pal call,"
372 " index:0x%lx\n", gr28);
373 }
374 set_pal_result(vcpu, result);
375 return ret;
376}
377
378static struct sal_ret_values sal_emulator(struct kvm *kvm,
379 long index, unsigned long in1,
380 unsigned long in2, unsigned long in3,
381 unsigned long in4, unsigned long in5,
382 unsigned long in6, unsigned long in7)
383{
384 unsigned long r9 = 0;
385 unsigned long r10 = 0;
386 long r11 = 0;
387 long status;
388
389 status = 0;
390 switch (index) {
391 case SAL_FREQ_BASE:
392 status = ia64_sal_freq_base(in1, &r9, &r10);
393 break;
394 case SAL_PCI_CONFIG_READ:
395 printk(KERN_WARNING"kvm: Not allowed to call here!"
396 " SAL_PCI_CONFIG_READ\n");
397 break;
398 case SAL_PCI_CONFIG_WRITE:
399 printk(KERN_WARNING"kvm: Not allowed to call here!"
400 " SAL_PCI_CONFIG_WRITE\n");
401 break;
402 case SAL_SET_VECTORS:
403 if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) {
404 if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) {
405 status = -2;
406 } else {
407 kvm->arch.rdv_sal_data.boot_ip = in2;
408 kvm->arch.rdv_sal_data.boot_gp = in3;
409 }
410 printk("Rendvous called! iip:%lx\n\n", in2);
411 } else
412 printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu."
413 "ignored...\n", in1);
414 break;
415 case SAL_GET_STATE_INFO:
416 /* No more info. */
417 status = -5;
418 r9 = 0;
419 break;
420 case SAL_GET_STATE_INFO_SIZE:
421 /* Return a dummy size. */
422 status = 0;
423 r9 = 128;
424 break;
425 case SAL_CLEAR_STATE_INFO:
426 /* Noop. */
427 break;
428 case SAL_MC_RENDEZ:
429 printk(KERN_WARNING
430 "kvm: called SAL_MC_RENDEZ. ignored...\n");
431 break;
432 case SAL_MC_SET_PARAMS:
433 printk(KERN_WARNING
434 "kvm: called SAL_MC_SET_PARAMS.ignored!\n");
435 break;
436 case SAL_CACHE_FLUSH:
437 if (1) {
438 /*Flush using SAL.
439 This method is faster but has a side
440 effect on other vcpu running on
441 this cpu. */
442 status = ia64_sal_cache_flush(in1);
443 } else {
444 /*Maybe need to implement the method
445 without side effect!*/
446 status = 0;
447 }
448 break;
449 case SAL_CACHE_INIT:
450 printk(KERN_WARNING
451 "kvm: called SAL_CACHE_INIT. ignored...\n");
452 break;
453 case SAL_UPDATE_PAL:
454 printk(KERN_WARNING
455 "kvm: CALLED SAL_UPDATE_PAL. ignored...\n");
456 break;
457 default:
458 printk(KERN_WARNING"kvm: called SAL_CALL with unknown index."
459 " index:%ld\n", index);
460 status = -1;
461 break;
462 }
463 return ((struct sal_ret_values) {status, r9, r10, r11});
464}
465
466static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1,
467 u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){
468
469 struct exit_ctl_data *p;
470
471 p = kvm_get_exit_data(vcpu);
472
473 if (p) {
474 if (p->exit_reason == EXIT_REASON_SAL_CALL) {
475 *in0 = p->u.sal_data.in0;
476 *in1 = p->u.sal_data.in1;
477 *in2 = p->u.sal_data.in2;
478 *in3 = p->u.sal_data.in3;
479 *in4 = p->u.sal_data.in4;
480 *in5 = p->u.sal_data.in5;
481 *in6 = p->u.sal_data.in6;
482 *in7 = p->u.sal_data.in7;
483 return ;
484 }
485 }
486 *in0 = 0;
487}
488
489void kvm_sal_emul(struct kvm_vcpu *vcpu)
490{
491
492 struct sal_ret_values result;
493 u64 index, in1, in2, in3, in4, in5, in6, in7;
494
495 kvm_get_sal_call_data(vcpu, &index, &in1, &in2,
496 &in3, &in4, &in5, &in6, &in7);
497 result = sal_emulator(vcpu->kvm, index, in1, in2, in3,
498 in4, in5, in6, in7);
499 set_sal_result(vcpu, result);
500}
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
new file mode 100644
index 000000000000..13980d9b8bcf
--- /dev/null
+++ b/arch/ia64/kvm/kvm_minstate.h
@@ -0,0 +1,273 @@
1/*
2 * kvm_minstate.h: min save macros
3 * Copyright (c) 2007, Intel Corporation.
4 *
5 * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
6 * Xiantao Zhang (xiantao.zhang@intel.com)
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 * Place - Suite 330, Boston, MA 02111-1307 USA.
20 *
21 */
22
23
24#include <asm/asmmacro.h>
25#include <asm/types.h>
26#include <asm/kregs.h>
27#include "asm-offsets.h"
28
29#define KVM_MINSTATE_START_SAVE_MIN \
30 mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\
31 ;; \
32 mov.m r28 = ar.rnat; \
33 addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \
34 ;; \
35 lfetch.fault.excl.nt1 [r22]; \
36 addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
37 mov r23 = ar.bspstore; /* save ar.bspstore */ \
38 ;; \
39 mov ar.bspstore = r22; /* switch to kernel RBS */\
40 ;; \
41 mov r18 = ar.bsp; \
42 mov ar.rsc = 0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */
43
44
45
46#define KVM_MINSTATE_END_SAVE_MIN \
47 bsw.1; /* switch back to bank 1 (must be last in insn group) */\
48 ;;
49
50
51#define PAL_VSA_SYNC_READ \
52 /* begin to call pal vps sync_read */ \
53 add r25 = VMM_VPD_BASE_OFFSET, r21; \
54 adds r20 = VMM_VCPU_VSA_BASE_OFFSET, r21; /* entry point */ \
55 ;; \
56 ld8 r25 = [r25]; /* read vpd base */ \
57 ld8 r20 = [r20]; \
58 ;; \
59 add r20 = PAL_VPS_SYNC_READ,r20; \
60 ;; \
61{ .mii; \
62 nop 0x0; \
63 mov r24 = ip; \
64 mov b0 = r20; \
65 ;; \
66}; \
67{ .mmb; \
68 add r24 = 0x20, r24; \
69 nop 0x0; \
70 br.cond.sptk b0; /* call the service */ \
71 ;; \
72};
73
74
75
76#define KVM_MINSTATE_GET_CURRENT(reg) mov reg=r21
77
78/*
79 * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
80 * the minimum state necessary that allows us to turn psr.ic back
81 * on.
82 *
83 * Assumed state upon entry:
84 * psr.ic: off
85 * r31: contains saved predicates (pr)
86 *
87 * Upon exit, the state is as follows:
88 * psr.ic: off
89 * r2 = points to &pt_regs.r16
90 * r8 = contents of ar.ccv
91 * r9 = contents of ar.csd
92 * r10 = contents of ar.ssd
93 * r11 = FPSR_DEFAULT
94 * r12 = kernel sp (kernel virtual address)
95 * r13 = points to current task_struct (kernel virtual address)
96 * p15 = TRUE if psr.i is set in cr.ipsr
97 * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
98 * preserved
99 *
100 * Note that psr.ic is NOT turned on by this macro. This is so that
101 * we can pass interruption state as arguments to a handler.
102 */
103
104
105#define PT(f) (VMM_PT_REGS_##f##_OFFSET)
106
107#define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \
108 KVM_MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \
109 mov r27 = ar.rsc; /* M */ \
110 mov r20 = r1; /* A */ \
111 mov r25 = ar.unat; /* M */ \
112 mov r29 = cr.ipsr; /* M */ \
113 mov r26 = ar.pfs; /* I */ \
114 mov r18 = cr.isr; \
115 COVER; /* B;; (or nothing) */ \
116 ;; \
117 tbit.z p0,p15 = r29,IA64_PSR_I_BIT; \
118 mov r1 = r16; \
119/* mov r21=r16; */ \
120 /* switch from user to kernel RBS: */ \
121 ;; \
122 invala; /* M */ \
123 SAVE_IFS; \
124 ;; \
125 KVM_MINSTATE_START_SAVE_MIN \
126 adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */ \
127 adds r16 = PT(CR_IPSR),r1; \
128 ;; \
129 lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \
130 st8 [r16] = r29; /* save cr.ipsr */ \
131 ;; \
132 lfetch.fault.excl.nt1 [r17]; \
133 tbit.nz p15,p0 = r29,IA64_PSR_I_BIT; \
134 mov r29 = b0 \
135 ;; \
136 adds r16 = PT(R8),r1; /* initialize first base pointer */\
137 adds r17 = PT(R9),r1; /* initialize second base pointer */\
138 ;; \
139.mem.offset 0,0; st8.spill [r16] = r8,16; \
140.mem.offset 8,0; st8.spill [r17] = r9,16; \
141 ;; \
142.mem.offset 0,0; st8.spill [r16] = r10,24; \
143.mem.offset 8,0; st8.spill [r17] = r11,24; \
144 ;; \
145 mov r9 = cr.iip; /* M */ \
146 mov r10 = ar.fpsr; /* M */ \
147 ;; \
148 st8 [r16] = r9,16; /* save cr.iip */ \
149 st8 [r17] = r30,16; /* save cr.ifs */ \
150 sub r18 = r18,r22; /* r18=RSE.ndirty*8 */ \
151 ;; \
152 st8 [r16] = r25,16; /* save ar.unat */ \
153 st8 [r17] = r26,16; /* save ar.pfs */ \
154 shl r18 = r18,16; /* calu ar.rsc used for "loadrs" */\
155 ;; \
156 st8 [r16] = r27,16; /* save ar.rsc */ \
157 st8 [r17] = r28,16; /* save ar.rnat */ \
158 ;; /* avoid RAW on r16 & r17 */ \
159 st8 [r16] = r23,16; /* save ar.bspstore */ \
160 st8 [r17] = r31,16; /* save predicates */ \
161 ;; \
162 st8 [r16] = r29,16; /* save b0 */ \
163 st8 [r17] = r18,16; /* save ar.rsc value for "loadrs" */\
164 ;; \
165.mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */ \
166.mem.offset 8,0; st8.spill [r17] = r12,16; \
167 adds r12 = -16,r1; /* switch to kernel memory stack */ \
168 ;; \
169.mem.offset 0,0; st8.spill [r16] = r13,16; \
170.mem.offset 8,0; st8.spill [r17] = r10,16; /* save ar.fpsr */\
171 mov r13 = r21; /* establish `current' */ \
172 ;; \
173.mem.offset 0,0; st8.spill [r16] = r15,16; \
174.mem.offset 8,0; st8.spill [r17] = r14,16; \
175 ;; \
176.mem.offset 0,0; st8.spill [r16] = r2,16; \
177.mem.offset 8,0; st8.spill [r17] = r3,16; \
178 adds r2 = VMM_PT_REGS_R16_OFFSET,r1; \
179 ;; \
180 adds r16 = VMM_VCPU_IIPA_OFFSET,r13; \
181 adds r17 = VMM_VCPU_ISR_OFFSET,r13; \
182 mov r26 = cr.iipa; \
183 mov r27 = cr.isr; \
184 ;; \
185 st8 [r16] = r26; \
186 st8 [r17] = r27; \
187 ;; \
188 EXTRA; \
189 mov r8 = ar.ccv; \
190 mov r9 = ar.csd; \
191 mov r10 = ar.ssd; \
192 movl r11 = FPSR_DEFAULT; /* L-unit */ \
193 adds r17 = VMM_VCPU_GP_OFFSET,r13; \
194 ;; \
195 ld8 r1 = [r17];/* establish kernel global pointer */ \
196 ;; \
197 PAL_VSA_SYNC_READ \
198 KVM_MINSTATE_END_SAVE_MIN
199
200/*
201 * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
202 *
203 * Assumed state upon entry:
204 * psr.ic: on
205 * r2: points to &pt_regs.f6
206 * r3: points to &pt_regs.f7
207 * r8: contents of ar.ccv
208 * r9: contents of ar.csd
209 * r10: contents of ar.ssd
210 * r11: FPSR_DEFAULT
211 *
212 * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
213 */
214#define KVM_SAVE_REST \
215.mem.offset 0,0; st8.spill [r2] = r16,16; \
216.mem.offset 8,0; st8.spill [r3] = r17,16; \
217 ;; \
218.mem.offset 0,0; st8.spill [r2] = r18,16; \
219.mem.offset 8,0; st8.spill [r3] = r19,16; \
220 ;; \
221.mem.offset 0,0; st8.spill [r2] = r20,16; \
222.mem.offset 8,0; st8.spill [r3] = r21,16; \
223 mov r18=b6; \
224 ;; \
225.mem.offset 0,0; st8.spill [r2] = r22,16; \
226.mem.offset 8,0; st8.spill [r3] = r23,16; \
227 mov r19 = b7; \
228 ;; \
229.mem.offset 0,0; st8.spill [r2] = r24,16; \
230.mem.offset 8,0; st8.spill [r3] = r25,16; \
231 ;; \
232.mem.offset 0,0; st8.spill [r2] = r26,16; \
233.mem.offset 8,0; st8.spill [r3] = r27,16; \
234 ;; \
235.mem.offset 0,0; st8.spill [r2] = r28,16; \
236.mem.offset 8,0; st8.spill [r3] = r29,16; \
237 ;; \
238.mem.offset 0,0; st8.spill [r2] = r30,16; \
239.mem.offset 8,0; st8.spill [r3] = r31,32; \
240 ;; \
241 mov ar.fpsr = r11; \
242 st8 [r2] = r8,8; \
243 adds r24 = PT(B6)-PT(F7),r3; \
244 adds r25 = PT(B7)-PT(F7),r3; \
245 ;; \
246 st8 [r24] = r18,16; /* b6 */ \
247 st8 [r25] = r19,16; /* b7 */ \
248 adds r2 = PT(R4)-PT(F6),r2; \
249 adds r3 = PT(R5)-PT(F7),r3; \
250 ;; \
251 st8 [r24] = r9; /* ar.csd */ \
252 st8 [r25] = r10; /* ar.ssd */ \
253 ;; \
254 mov r18 = ar.unat; \
255 adds r19 = PT(EML_UNAT)-PT(R4),r2; \
256 ;; \
257 st8 [r19] = r18; /* eml_unat */ \
258
259
260#define KVM_SAVE_EXTRA \
261.mem.offset 0,0; st8.spill [r2] = r4,16; \
262.mem.offset 8,0; st8.spill [r3] = r5,16; \
263 ;; \
264.mem.offset 0,0; st8.spill [r2] = r6,16; \
265.mem.offset 8,0; st8.spill [r3] = r7; \
266 ;; \
267 mov r26 = ar.unat; \
268 ;; \
269 st8 [r2] = r26;/* eml_unat */ \
270
271#define KVM_SAVE_MIN_WITH_COVER KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,)
272#define KVM_SAVE_MIN_WITH_COVER_R19 KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19)
273#define KVM_SAVE_MIN KVM_DO_SAVE_MIN( , mov r30 = r0, )
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
new file mode 100644
index 000000000000..6d6cbcb14893
--- /dev/null
+++ b/arch/ia64/kvm/lapic.h
@@ -0,0 +1,25 @@
1#ifndef __KVM_IA64_LAPIC_H
2#define __KVM_IA64_LAPIC_H
3
4#include <linux/kvm_host.h>
5
6/*
7 * vlsapic
8 */
9struct kvm_lapic{
10 struct kvm_vcpu *vcpu;
11 uint64_t insvc[4];
12 uint64_t vhpi;
13 uint8_t xtp;
14 uint8_t pal_init_pending;
15 uint8_t pad[2];
16};
17
18int kvm_create_lapic(struct kvm_vcpu *vcpu);
19void kvm_free_lapic(struct kvm_vcpu *vcpu);
20
21int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
22int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
23int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig);
24
25#endif
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h
new file mode 100644
index 000000000000..e585c4607344
--- /dev/null
+++ b/arch/ia64/kvm/misc.h
@@ -0,0 +1,93 @@
1#ifndef __KVM_IA64_MISC_H
2#define __KVM_IA64_MISC_H
3
4#include <linux/kvm_host.h>
5/*
6 * misc.h
7 * Copyright (C) 2007, Intel Corporation.
8 * Xiantao Zhang (xiantao.zhang@intel.com)
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms and conditions of the GNU General Public License,
12 * version 2, as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * more details.
18 *
19 * You should have received a copy of the GNU General Public License along with
20 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
21 * Place - Suite 330, Boston, MA 02111-1307 USA.
22 *
23 */
24
25/*
26 *Return p2m base address at host side!
27 */
28static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
29{
30 return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS);
31}
32
33static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
34 u64 paddr, u64 mem_flags)
35{
36 uint64_t *pmt_base = kvm_host_get_pmt(kvm);
37 unsigned long pte;
38
39 pte = PAGE_ALIGN(paddr) | mem_flags;
40 pmt_base[gfn] = pte;
41}
42
43/*Function for translating host address to guest address*/
44
45static inline void *to_guest(struct kvm *kvm, void *addr)
46{
47 return (void *)((unsigned long)(addr) - kvm->arch.vm_base +
48 KVM_VM_DATA_BASE);
49}
50
51/*Function for translating guest address to host address*/
52
53static inline void *to_host(struct kvm *kvm, void *addr)
54{
55 return (void *)((unsigned long)addr - KVM_VM_DATA_BASE
56 + kvm->arch.vm_base);
57}
58
59/* Get host context of the vcpu */
60static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu)
61{
62 union context *ctx = &vcpu->arch.host;
63 return to_guest(vcpu->kvm, ctx);
64}
65
66/* Get guest context of the vcpu */
67static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu)
68{
69 union context *ctx = &vcpu->arch.guest;
70 return to_guest(vcpu->kvm, ctx);
71}
72
73/* kvm get exit data from gvmm! */
74static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu)
75{
76 return &vcpu->arch.exit_data;
77}
78
79/*kvm get vcpu ioreq for kvm module!*/
80static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu)
81{
82 struct exit_ctl_data *p_ctl_data;
83
84 if (vcpu) {
85 p_ctl_data = kvm_get_exit_data(vcpu);
86 if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION)
87 return &p_ctl_data->u.ioreq;
88 }
89
90 return NULL;
91}
92
93#endif
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
new file mode 100644
index 000000000000..351bf70da463
--- /dev/null
+++ b/arch/ia64/kvm/mmio.c
@@ -0,0 +1,341 @@
1/*
2 * mmio.c: MMIO emulation components.
3 * Copyright (c) 2004, Intel Corporation.
4 * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
5 * Kun Tian (Kevin Tian) (Kevin.tian@intel.com)
6 *
7 * Copyright (c) 2007 Intel Corporation KVM support.
8 * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
9 * Xiantao Zhang (xiantao.zhang@intel.com)
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms and conditions of the GNU General Public License,
13 * version 2, as published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 * more details.
19 *
20 * You should have received a copy of the GNU General Public License along with
21 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
22 * Place - Suite 330, Boston, MA 02111-1307 USA.
23 *
24 */
25
26#include <linux/kvm_host.h>
27
28#include "vcpu.h"
29
30static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val)
31{
32 VLSAPIC_XTP(v) = val;
33}
34
35/*
36 * LSAPIC OFFSET
37 */
38#define PIB_LOW_HALF(ofst) !(ofst & (1 << 20))
39#define PIB_OFST_INTA 0x1E0000
40#define PIB_OFST_XTP 0x1E0008
41
42/*
43 * execute write IPI op.
44 */
45static void vlsapic_write_ipi(struct kvm_vcpu *vcpu,
46 uint64_t addr, uint64_t data)
47{
48 struct exit_ctl_data *p = &current_vcpu->arch.exit_data;
49 unsigned long psr;
50
51 local_irq_save(psr);
52
53 p->exit_reason = EXIT_REASON_IPI;
54 p->u.ipi_data.addr.val = addr;
55 p->u.ipi_data.data.val = data;
56 vmm_transition(current_vcpu);
57
58 local_irq_restore(psr);
59
60}
61
62void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
63 unsigned long length, unsigned long val)
64{
65 addr &= (PIB_SIZE - 1);
66
67 switch (addr) {
68 case PIB_OFST_INTA:
69 /*panic_domain(NULL, "Undefined write on PIB INTA\n");*/
70 panic_vm(v);
71 break;
72 case PIB_OFST_XTP:
73 if (length == 1) {
74 vlsapic_write_xtp(v, val);
75 } else {
76 /*panic_domain(NULL,
77 "Undefined write on PIB XTP\n");*/
78 panic_vm(v);
79 }
80 break;
81 default:
82 if (PIB_LOW_HALF(addr)) {
83 /*lower half */
84 if (length != 8)
85 /*panic_domain(NULL,
86 "Can't LHF write with size %ld!\n",
87 length);*/
88 panic_vm(v);
89 else
90 vlsapic_write_ipi(v, addr, val);
91 } else { /* upper half
92 printk("IPI-UHF write %lx\n",addr);*/
93 panic_vm(v);
94 }
95 break;
96 }
97}
98
99unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
100 unsigned long length)
101{
102 uint64_t result = 0;
103
104 addr &= (PIB_SIZE - 1);
105
106 switch (addr) {
107 case PIB_OFST_INTA:
108 if (length == 1) /* 1 byte load */
109 ; /* There is no i8259, there is no INTA access*/
110 else
111 /*panic_domain(NULL,"Undefined read on PIB INTA\n"); */
112 panic_vm(v);
113
114 break;
115 case PIB_OFST_XTP:
116 if (length == 1) {
117 result = VLSAPIC_XTP(v);
118 /* printk("read xtp %lx\n", result); */
119 } else {
120 /*panic_domain(NULL,
121 "Undefined read on PIB XTP\n");*/
122 panic_vm(v);
123 }
124 break;
125 default:
126 panic_vm(v);
127 break;
128 }
129 return result;
130}
131
132static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
133 u16 s, int ma, int dir)
134{
135 unsigned long iot;
136 struct exit_ctl_data *p = &vcpu->arch.exit_data;
137 unsigned long psr;
138
139 iot = __gpfn_is_io(src_pa >> PAGE_SHIFT);
140
141 local_irq_save(psr);
142
143 /*Intercept the acces for PIB range*/
144 if (iot == GPFN_PIB) {
145 if (!dir)
146 lsapic_write(vcpu, src_pa, s, *dest);
147 else
148 *dest = lsapic_read(vcpu, src_pa, s);
149 goto out;
150 }
151 p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION;
152 p->u.ioreq.addr = src_pa;
153 p->u.ioreq.size = s;
154 p->u.ioreq.dir = dir;
155 if (dir == IOREQ_WRITE)
156 p->u.ioreq.data = *dest;
157 p->u.ioreq.state = STATE_IOREQ_READY;
158 vmm_transition(vcpu);
159
160 if (p->u.ioreq.state == STATE_IORESP_READY) {
161 if (dir == IOREQ_READ)
162 *dest = p->u.ioreq.data;
163 } else
164 panic_vm(vcpu);
165out:
166 local_irq_restore(psr);
167 return ;
168}
169
170/*
171 dir 1: read 0:write
172 inst_type 0:integer 1:floating point
173 */
174#define SL_INTEGER 0 /* store/load interger*/
175#define SL_FLOATING 1 /* store/load floating*/
176
177void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
178{
179 struct kvm_pt_regs *regs;
180 IA64_BUNDLE bundle;
181 int slot, dir = 0;
182 int inst_type = -1;
183 u16 size = 0;
184 u64 data, slot1a, slot1b, temp, update_reg;
185 s32 imm;
186 INST64 inst;
187
188 regs = vcpu_regs(vcpu);
189
190 if (fetch_code(vcpu, regs->cr_iip, &bundle)) {
191 /* if fetch code fail, return and try again */
192 return;
193 }
194 slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
195 if (!slot)
196 inst.inst = bundle.slot0;
197 else if (slot == 1) {
198 slot1a = bundle.slot1a;
199 slot1b = bundle.slot1b;
200 inst.inst = slot1a + (slot1b << 18);
201 } else if (slot == 2)
202 inst.inst = bundle.slot2;
203
204 /* Integer Load/Store */
205 if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) {
206 inst_type = SL_INTEGER;
207 size = (inst.M1.x6 & 0x3);
208 if ((inst.M1.x6 >> 2) > 0xb) {
209 /*write*/
210 dir = IOREQ_WRITE;
211 data = vcpu_get_gr(vcpu, inst.M4.r2);
212 } else if ((inst.M1.x6 >> 2) < 0xb) {
213 /*read*/
214 dir = IOREQ_READ;
215 }
216 } else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) {
217 /* Integer Load + Reg update */
218 inst_type = SL_INTEGER;
219 dir = IOREQ_READ;
220 size = (inst.M2.x6 & 0x3);
221 temp = vcpu_get_gr(vcpu, inst.M2.r3);
222 update_reg = vcpu_get_gr(vcpu, inst.M2.r2);
223 temp += update_reg;
224 vcpu_set_gr(vcpu, inst.M2.r3, temp, 0);
225 } else if (inst.M3.major == 5) {
226 /*Integer Load/Store + Imm update*/
227 inst_type = SL_INTEGER;
228 size = (inst.M3.x6&0x3);
229 if ((inst.M5.x6 >> 2) > 0xb) {
230 /*write*/
231 dir = IOREQ_WRITE;
232 data = vcpu_get_gr(vcpu, inst.M5.r2);
233 temp = vcpu_get_gr(vcpu, inst.M5.r3);
234 imm = (inst.M5.s << 31) | (inst.M5.i << 30) |
235 (inst.M5.imm7 << 23);
236 temp += imm >> 23;
237 vcpu_set_gr(vcpu, inst.M5.r3, temp, 0);
238
239 } else if ((inst.M3.x6 >> 2) < 0xb) {
240 /*read*/
241 dir = IOREQ_READ;
242 temp = vcpu_get_gr(vcpu, inst.M3.r3);
243 imm = (inst.M3.s << 31) | (inst.M3.i << 30) |
244 (inst.M3.imm7 << 23);
245 temp += imm >> 23;
246 vcpu_set_gr(vcpu, inst.M3.r3, temp, 0);
247
248 }
249 } else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B
250 && inst.M9.m == 0 && inst.M9.x == 0) {
251 /* Floating-point spill*/
252 struct ia64_fpreg v;
253
254 inst_type = SL_FLOATING;
255 dir = IOREQ_WRITE;
256 vcpu_get_fpreg(vcpu, inst.M9.f2, &v);
257 /* Write high word. FIXME: this is a kludge! */
258 v.u.bits[1] &= 0x3ffff;
259 mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE);
260 data = v.u.bits[0];
261 size = 3;
262 } else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) {
263 /* Floating-point spill + Imm update */
264 struct ia64_fpreg v;
265
266 inst_type = SL_FLOATING;
267 dir = IOREQ_WRITE;
268 vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
269 temp = vcpu_get_gr(vcpu, inst.M10.r3);
270 imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
271 (inst.M10.imm7 << 23);
272 temp += imm >> 23;
273 vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
274
275 /* Write high word.FIXME: this is a kludge! */
276 v.u.bits[1] &= 0x3ffff;
277 mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE);
278 data = v.u.bits[0];
279 size = 3;
280 } else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) {
281 /* Floating-point stf8 + Imm update */
282 struct ia64_fpreg v;
283 inst_type = SL_FLOATING;
284 dir = IOREQ_WRITE;
285 size = 3;
286 vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
287 data = v.u.bits[0]; /* Significand. */
288 temp = vcpu_get_gr(vcpu, inst.M10.r3);
289 imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
290 (inst.M10.imm7 << 23);
291 temp += imm >> 23;
292 vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
293 } else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c
294 && inst.M15.x6 <= 0x2f) {
295 temp = vcpu_get_gr(vcpu, inst.M15.r3);
296 imm = (inst.M15.s << 31) | (inst.M15.i << 30) |
297 (inst.M15.imm7 << 23);
298 temp += imm >> 23;
299 vcpu_set_gr(vcpu, inst.M15.r3, temp, 0);
300
301 vcpu_increment_iip(vcpu);
302 return;
303 } else if (inst.M12.major == 6 && inst.M12.m == 1
304 && inst.M12.x == 1 && inst.M12.x6 == 1) {
305 /* Floating-point Load Pair + Imm ldfp8 M12*/
306 struct ia64_fpreg v;
307
308 inst_type = SL_FLOATING;
309 dir = IOREQ_READ;
310 size = 8; /*ldfd*/
311 mmio_access(vcpu, padr, &data, size, ma, dir);
312 v.u.bits[0] = data;
313 v.u.bits[1] = 0x1003E;
314 vcpu_set_fpreg(vcpu, inst.M12.f1, &v);
315 padr += 8;
316 mmio_access(vcpu, padr, &data, size, ma, dir);
317 v.u.bits[0] = data;
318 v.u.bits[1] = 0x1003E;
319 vcpu_set_fpreg(vcpu, inst.M12.f2, &v);
320 padr += 8;
321 vcpu_set_gr(vcpu, inst.M12.r3, padr, 0);
322 vcpu_increment_iip(vcpu);
323 return;
324 } else {
325 inst_type = -1;
326 panic_vm(vcpu);
327 }
328
329 size = 1 << size;
330 if (dir == IOREQ_WRITE) {
331 mmio_access(vcpu, padr, &data, size, ma, dir);
332 } else {
333 mmio_access(vcpu, padr, &data, size, ma, dir);
334 if (inst_type == SL_INTEGER)
335 vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
336 else
337 panic_vm(vcpu);
338
339 }
340 vcpu_increment_iip(vcpu);
341}
diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S
new file mode 100644
index 000000000000..e4f15d641b22
--- /dev/null
+++ b/arch/ia64/kvm/optvfault.S
@@ -0,0 +1,918 @@
1/*
2 * arch/ia64/vmx/optvfault.S
3 * optimize virtualization fault handler
4 *
5 * Copyright (C) 2006 Intel Co
6 * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
7 */
8
9#include <asm/asmmacro.h>
10#include <asm/processor.h>
11
12#include "vti.h"
13#include "asm-offsets.h"
14
15#define ACCE_MOV_FROM_AR
16#define ACCE_MOV_FROM_RR
17#define ACCE_MOV_TO_RR
18#define ACCE_RSM
19#define ACCE_SSM
20#define ACCE_MOV_TO_PSR
21#define ACCE_THASH
22
23//mov r1=ar3
24GLOBAL_ENTRY(kvm_asm_mov_from_ar)
25#ifndef ACCE_MOV_FROM_AR
26 br.many kvm_virtualization_fault_back
27#endif
28 add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
29 add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
30 extr.u r17=r25,6,7
31 ;;
32 ld8 r18=[r18]
33 mov r19=ar.itc
34 mov r24=b0
35 ;;
36 add r19=r19,r18
37 addl r20=@gprel(asm_mov_to_reg),gp
38 ;;
39 st8 [r16] = r19
40 adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
41 shladd r17=r17,4,r20
42 ;;
43 mov b0=r17
44 br.sptk.few b0
45 ;;
46END(kvm_asm_mov_from_ar)
47
48
49// mov r1=rr[r3]
50GLOBAL_ENTRY(kvm_asm_mov_from_rr)
51#ifndef ACCE_MOV_FROM_RR
52 br.many kvm_virtualization_fault_back
53#endif
54 extr.u r16=r25,20,7
55 extr.u r17=r25,6,7
56 addl r20=@gprel(asm_mov_from_reg),gp
57 ;;
58 adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20
59 shladd r16=r16,4,r20
60 mov r24=b0
61 ;;
62 add r27=VMM_VCPU_VRR0_OFFSET,r21
63 mov b0=r16
64 br.many b0
65 ;;
66kvm_asm_mov_from_rr_back_1:
67 adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
68 adds r22=asm_mov_to_reg-asm_mov_from_reg,r20
69 shr.u r26=r19,61
70 ;;
71 shladd r17=r17,4,r22
72 shladd r27=r26,3,r27
73 ;;
74 ld8 r19=[r27]
75 mov b0=r17
76 br.many b0
77END(kvm_asm_mov_from_rr)
78
79
80// mov rr[r3]=r2
81GLOBAL_ENTRY(kvm_asm_mov_to_rr)
82#ifndef ACCE_MOV_TO_RR
83 br.many kvm_virtualization_fault_back
84#endif
85 extr.u r16=r25,20,7
86 extr.u r17=r25,13,7
87 addl r20=@gprel(asm_mov_from_reg),gp
88 ;;
89 adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20
90 shladd r16=r16,4,r20
91 mov r22=b0
92 ;;
93 add r27=VMM_VCPU_VRR0_OFFSET,r21
94 mov b0=r16
95 br.many b0
96 ;;
97kvm_asm_mov_to_rr_back_1:
98 adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20
99 shr.u r23=r19,61
100 shladd r17=r17,4,r20
101 ;;
102 //if rr6, go back
103 cmp.eq p6,p0=6,r23
104 mov b0=r22
105 (p6) br.cond.dpnt.many kvm_virtualization_fault_back
106 ;;
107 mov r28=r19
108 mov b0=r17
109 br.many b0
110kvm_asm_mov_to_rr_back_2:
111 adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
112 shladd r27=r23,3,r27
113 ;; // vrr.rid<<4 |0xe
114 st8 [r27]=r19
115 mov b0=r30
116 ;;
117 extr.u r16=r19,8,26
118 extr.u r18 =r19,2,6
119 mov r17 =0xe
120 ;;
121 shladd r16 = r16, 4, r17
122 extr.u r19 =r19,0,8
123 ;;
124 shl r16 = r16,8
125 ;;
126 add r19 = r19, r16
127 ;; //set ve 1
128 dep r19=-1,r19,0,1
129 cmp.lt p6,p0=14,r18
130 ;;
131 (p6) mov r18=14
132 ;;
133 (p6) dep r19=r18,r19,2,6
134 ;;
135 cmp.eq p6,p0=0,r23
136 ;;
137 cmp.eq.or p6,p0=4,r23
138 ;;
139 adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21
140 (p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
141 ;;
142 ld4 r16=[r16]
143 cmp.eq p7,p0=r0,r0
144 (p6) shladd r17=r23,1,r17
145 ;;
146 (p6) st8 [r17]=r19
147 (p6) tbit.nz p6,p7=r16,0
148 ;;
149 (p7) mov rr[r28]=r19
150 mov r24=r22
151 br.many b0
152END(kvm_asm_mov_to_rr)
153
154
155//rsm
156GLOBAL_ENTRY(kvm_asm_rsm)
157#ifndef ACCE_RSM
158 br.many kvm_virtualization_fault_back
159#endif
160 add r16=VMM_VPD_BASE_OFFSET,r21
161 extr.u r26=r25,6,21
162 extr.u r27=r25,31,2
163 ;;
164 ld8 r16=[r16]
165 extr.u r28=r25,36,1
166 dep r26=r27,r26,21,2
167 ;;
168 add r17=VPD_VPSR_START_OFFSET,r16
169 add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
170 //r26 is imm24
171 dep r26=r28,r26,23,1
172 ;;
173 ld8 r18=[r17]
174 movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI
175 ld4 r23=[r22]
176 sub r27=-1,r26
177 mov r24=b0
178 ;;
179 mov r20=cr.ipsr
180 or r28=r27,r28
181 and r19=r18,r27
182 ;;
183 st8 [r17]=r19
184 and r20=r20,r28
185 /* Comment it out due to short of fp lazy alorgithm support
186 adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
187 ;;
188 ld8 r27=[r27]
189 ;;
190 tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT
191 ;;
192 (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
193 */
194 ;;
195 mov cr.ipsr=r20
196 tbit.nz p6,p0=r23,0
197 ;;
198 tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT
199 (p6) br.dptk kvm_resume_to_guest
200 ;;
201 add r26=VMM_VCPU_META_RR0_OFFSET,r21
202 add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
203 dep r23=-1,r23,0,1
204 ;;
205 ld8 r26=[r26]
206 ld8 r27=[r27]
207 st4 [r22]=r23
208 dep.z r28=4,61,3
209 ;;
210 mov rr[r0]=r26
211 ;;
212 mov rr[r28]=r27
213 ;;
214 srlz.d
215 br.many kvm_resume_to_guest
216END(kvm_asm_rsm)
217
218
219//ssm
220GLOBAL_ENTRY(kvm_asm_ssm)
221#ifndef ACCE_SSM
222 br.many kvm_virtualization_fault_back
223#endif
224 add r16=VMM_VPD_BASE_OFFSET,r21
225 extr.u r26=r25,6,21
226 extr.u r27=r25,31,2
227 ;;
228 ld8 r16=[r16]
229 extr.u r28=r25,36,1
230 dep r26=r27,r26,21,2
231 ;; //r26 is imm24
232 add r27=VPD_VPSR_START_OFFSET,r16
233 dep r26=r28,r26,23,1
234 ;; //r19 vpsr
235 ld8 r29=[r27]
236 mov r24=b0
237 ;;
238 add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
239 mov r20=cr.ipsr
240 or r19=r29,r26
241 ;;
242 ld4 r23=[r22]
243 st8 [r27]=r19
244 or r20=r20,r26
245 ;;
246 mov cr.ipsr=r20
247 movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
248 ;;
249 and r19=r28,r19
250 tbit.z p6,p0=r23,0
251 ;;
252 cmp.ne.or p6,p0=r28,r19
253 (p6) br.dptk kvm_asm_ssm_1
254 ;;
255 add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
256 add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
257 dep r23=0,r23,0,1
258 ;;
259 ld8 r26=[r26]
260 ld8 r27=[r27]
261 st4 [r22]=r23
262 dep.z r28=4,61,3
263 ;;
264 mov rr[r0]=r26
265 ;;
266 mov rr[r28]=r27
267 ;;
268 srlz.d
269 ;;
270kvm_asm_ssm_1:
271 tbit.nz p6,p0=r29,IA64_PSR_I_BIT
272 ;;
273 tbit.z.or p6,p0=r19,IA64_PSR_I_BIT
274 (p6) br.dptk kvm_resume_to_guest
275 ;;
276 add r29=VPD_VTPR_START_OFFSET,r16
277 add r30=VPD_VHPI_START_OFFSET,r16
278 ;;
279 ld8 r29=[r29]
280 ld8 r30=[r30]
281 ;;
282 extr.u r17=r29,4,4
283 extr.u r18=r29,16,1
284 ;;
285 dep r17=r18,r17,4,1
286 ;;
287 cmp.gt p6,p0=r30,r17
288 (p6) br.dpnt.few kvm_asm_dispatch_vexirq
289 br.many kvm_resume_to_guest
290END(kvm_asm_ssm)
291
292
293//mov psr.l=r2
294GLOBAL_ENTRY(kvm_asm_mov_to_psr)
295#ifndef ACCE_MOV_TO_PSR
296 br.many kvm_virtualization_fault_back
297#endif
298 add r16=VMM_VPD_BASE_OFFSET,r21
299 extr.u r26=r25,13,7 //r2
300 ;;
301 ld8 r16=[r16]
302 addl r20=@gprel(asm_mov_from_reg),gp
303 ;;
304 adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20
305 shladd r26=r26,4,r20
306 mov r24=b0
307 ;;
308 add r27=VPD_VPSR_START_OFFSET,r16
309 mov b0=r26
310 br.many b0
311 ;;
312kvm_asm_mov_to_psr_back:
313 ld8 r17=[r27]
314 add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
315 dep r19=0,r19,32,32
316 ;;
317 ld4 r23=[r22]
318 dep r18=0,r17,0,32
319 ;;
320 add r30=r18,r19
321 movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
322 ;;
323 st8 [r27]=r30
324 and r27=r28,r30
325 and r29=r28,r17
326 ;;
327 cmp.eq p5,p0=r29,r27
328 cmp.eq p6,p7=r28,r27
329 (p5) br.many kvm_asm_mov_to_psr_1
330 ;;
331 //virtual to physical
332 (p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21
333 (p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
334 (p7) dep r23=-1,r23,0,1
335 ;;
336 //physical to virtual
337 (p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
338 (p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
339 (p6) dep r23=0,r23,0,1
340 ;;
341 ld8 r26=[r26]
342 ld8 r27=[r27]
343 st4 [r22]=r23
344 dep.z r28=4,61,3
345 ;;
346 mov rr[r0]=r26
347 ;;
348 mov rr[r28]=r27
349 ;;
350 srlz.d
351 ;;
352kvm_asm_mov_to_psr_1:
353 mov r20=cr.ipsr
354 movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT
355 ;;
356 or r19=r19,r28
357 dep r20=0,r20,0,32
358 ;;
359 add r20=r19,r20
360 mov b0=r24
361 ;;
362 /* Comment it out due to short of fp lazy algorithm support
363 adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
364 ;;
365 ld8 r27=[r27]
366 ;;
367 tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT
368 ;;
369 (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
370 ;;
371 */
372 mov cr.ipsr=r20
373 cmp.ne p6,p0=r0,r0
374 ;;
375 tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT
376 tbit.z.or p6,p0=r30,IA64_PSR_I_BIT
377 (p6) br.dpnt.few kvm_resume_to_guest
378 ;;
379 add r29=VPD_VTPR_START_OFFSET,r16
380 add r30=VPD_VHPI_START_OFFSET,r16
381 ;;
382 ld8 r29=[r29]
383 ld8 r30=[r30]
384 ;;
385 extr.u r17=r29,4,4
386 extr.u r18=r29,16,1
387 ;;
388 dep r17=r18,r17,4,1
389 ;;
390 cmp.gt p6,p0=r30,r17
391 (p6) br.dpnt.few kvm_asm_dispatch_vexirq
392 br.many kvm_resume_to_guest
393END(kvm_asm_mov_to_psr)
394
395
396ENTRY(kvm_asm_dispatch_vexirq)
397//increment iip
398 mov r16=cr.ipsr
399 ;;
400 extr.u r17=r16,IA64_PSR_RI_BIT,2
401 tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
402 ;;
403 (p6) mov r18=cr.iip
404 (p6) mov r17=r0
405 (p7) add r17=1,r17
406 ;;
407 (p6) add r18=0x10,r18
408 dep r16=r17,r16,IA64_PSR_RI_BIT,2
409 ;;
410 (p6) mov cr.iip=r18
411 mov cr.ipsr=r16
412 mov r30 =1
413 br.many kvm_dispatch_vexirq
414END(kvm_asm_dispatch_vexirq)
415
416// thash
417// TODO: add support when pta.vf = 1
418GLOBAL_ENTRY(kvm_asm_thash)
419#ifndef ACCE_THASH
420 br.many kvm_virtualization_fault_back
421#endif
422 extr.u r17=r25,20,7 // get r3 from opcode in r25
423 extr.u r18=r25,6,7 // get r1 from opcode in r25
424 addl r20=@gprel(asm_mov_from_reg),gp
425 ;;
426 adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20
427 shladd r17=r17,4,r20 // get addr of MOVE_FROM_REG(r17)
428 adds r16=VMM_VPD_BASE_OFFSET,r21 // get vcpu.arch.priveregs
429 ;;
430 mov r24=b0
431 ;;
432 ld8 r16=[r16] // get VPD addr
433 mov b0=r17
434 br.many b0 // r19 return value
435 ;;
436kvm_asm_thash_back1:
437 shr.u r23=r19,61 // get RR number
438 adds r25=VMM_VCPU_VRR0_OFFSET,r21 // get vcpu->arch.vrr[0]'s addr
439 adds r16=VMM_VPD_VPTA_OFFSET,r16 // get vpta
440 ;;
441 shladd r27=r23,3,r25 // get vcpu->arch.vrr[r23]'s addr
442 ld8 r17=[r16] // get PTA
443 mov r26=1
444 ;;
445 extr.u r29=r17,2,6 // get pta.size
446 ld8 r25=[r27] // get vcpu->arch.vrr[r23]'s value
447 ;;
448 extr.u r25=r25,2,6 // get rr.ps
449 shl r22=r26,r29 // 1UL << pta.size
450 ;;
451 shr.u r23=r19,r25 // vaddr >> rr.ps
452 adds r26=3,r29 // pta.size + 3
453 shl r27=r17,3 // pta << 3
454 ;;
455 shl r23=r23,3 // (vaddr >> rr.ps) << 3
456 shr.u r27=r27,r26 // (pta << 3) >> (pta.size+3)
457 movl r16=7<<61
458 ;;
459 adds r22=-1,r22 // (1UL << pta.size) - 1
460 shl r27=r27,r29 // ((pta<<3)>>(pta.size+3))<<pta.size
461 and r19=r19,r16 // vaddr & VRN_MASK
462 ;;
463 and r22=r22,r23 // vhpt_offset
464 or r19=r19,r27 // (vadr&VRN_MASK)|(((pta<<3)>>(pta.size + 3))<<pta.size)
465 adds r26=asm_mov_to_reg-asm_mov_from_reg,r20
466 ;;
467 or r19=r19,r22 // calc pval
468 shladd r17=r18,4,r26
469 adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
470 ;;
471 mov b0=r17
472 br.many b0
473END(kvm_asm_thash)
474
475#define MOV_TO_REG0 \
476{; \
477 nop.b 0x0; \
478 nop.b 0x0; \
479 nop.b 0x0; \
480 ;; \
481};
482
483
484#define MOV_TO_REG(n) \
485{; \
486 mov r##n##=r19; \
487 mov b0=r30; \
488 br.sptk.many b0; \
489 ;; \
490};
491
492
493#define MOV_FROM_REG(n) \
494{; \
495 mov r19=r##n##; \
496 mov b0=r30; \
497 br.sptk.many b0; \
498 ;; \
499};
500
501
502#define MOV_TO_BANK0_REG(n) \
503ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##); \
504{; \
505 mov r26=r2; \
506 mov r2=r19; \
507 bsw.1; \
508 ;; \
509}; \
510{; \
511 mov r##n##=r2; \
512 nop.b 0x0; \
513 bsw.0; \
514 ;; \
515}; \
516{; \
517 mov r2=r26; \
518 mov b0=r30; \
519 br.sptk.many b0; \
520 ;; \
521}; \
522END(asm_mov_to_bank0_reg##n##)
523
524
525#define MOV_FROM_BANK0_REG(n) \
526ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##); \
527{; \
528 mov r26=r2; \
529 nop.b 0x0; \
530 bsw.1; \
531 ;; \
532}; \
533{; \
534 mov r2=r##n##; \
535 nop.b 0x0; \
536 bsw.0; \
537 ;; \
538}; \
539{; \
540 mov r19=r2; \
541 mov r2=r26; \
542 mov b0=r30; \
543}; \
544{; \
545 nop.b 0x0; \
546 nop.b 0x0; \
547 br.sptk.many b0; \
548 ;; \
549}; \
550END(asm_mov_from_bank0_reg##n##)
551
552
553#define JMP_TO_MOV_TO_BANK0_REG(n) \
554{; \
555 nop.b 0x0; \
556 nop.b 0x0; \
557 br.sptk.many asm_mov_to_bank0_reg##n##; \
558 ;; \
559}
560
561
562#define JMP_TO_MOV_FROM_BANK0_REG(n) \
563{; \
564 nop.b 0x0; \
565 nop.b 0x0; \
566 br.sptk.many asm_mov_from_bank0_reg##n##; \
567 ;; \
568}
569
570
571MOV_FROM_BANK0_REG(16)
572MOV_FROM_BANK0_REG(17)
573MOV_FROM_BANK0_REG(18)
574MOV_FROM_BANK0_REG(19)
575MOV_FROM_BANK0_REG(20)
576MOV_FROM_BANK0_REG(21)
577MOV_FROM_BANK0_REG(22)
578MOV_FROM_BANK0_REG(23)
579MOV_FROM_BANK0_REG(24)
580MOV_FROM_BANK0_REG(25)
581MOV_FROM_BANK0_REG(26)
582MOV_FROM_BANK0_REG(27)
583MOV_FROM_BANK0_REG(28)
584MOV_FROM_BANK0_REG(29)
585MOV_FROM_BANK0_REG(30)
586MOV_FROM_BANK0_REG(31)
587
588
589// mov from reg table
590ENTRY(asm_mov_from_reg)
591 MOV_FROM_REG(0)
592 MOV_FROM_REG(1)
593 MOV_FROM_REG(2)
594 MOV_FROM_REG(3)
595 MOV_FROM_REG(4)
596 MOV_FROM_REG(5)
597 MOV_FROM_REG(6)
598 MOV_FROM_REG(7)
599 MOV_FROM_REG(8)
600 MOV_FROM_REG(9)
601 MOV_FROM_REG(10)
602 MOV_FROM_REG(11)
603 MOV_FROM_REG(12)
604 MOV_FROM_REG(13)
605 MOV_FROM_REG(14)
606 MOV_FROM_REG(15)
607 JMP_TO_MOV_FROM_BANK0_REG(16)
608 JMP_TO_MOV_FROM_BANK0_REG(17)
609 JMP_TO_MOV_FROM_BANK0_REG(18)
610 JMP_TO_MOV_FROM_BANK0_REG(19)
611 JMP_TO_MOV_FROM_BANK0_REG(20)
612 JMP_TO_MOV_FROM_BANK0_REG(21)
613 JMP_TO_MOV_FROM_BANK0_REG(22)
614 JMP_TO_MOV_FROM_BANK0_REG(23)
615 JMP_TO_MOV_FROM_BANK0_REG(24)
616 JMP_TO_MOV_FROM_BANK0_REG(25)
617 JMP_TO_MOV_FROM_BANK0_REG(26)
618 JMP_TO_MOV_FROM_BANK0_REG(27)
619 JMP_TO_MOV_FROM_BANK0_REG(28)
620 JMP_TO_MOV_FROM_BANK0_REG(29)
621 JMP_TO_MOV_FROM_BANK0_REG(30)
622 JMP_TO_MOV_FROM_BANK0_REG(31)
623 MOV_FROM_REG(32)
624 MOV_FROM_REG(33)
625 MOV_FROM_REG(34)
626 MOV_FROM_REG(35)
627 MOV_FROM_REG(36)
628 MOV_FROM_REG(37)
629 MOV_FROM_REG(38)
630 MOV_FROM_REG(39)
631 MOV_FROM_REG(40)
632 MOV_FROM_REG(41)
633 MOV_FROM_REG(42)
634 MOV_FROM_REG(43)
635 MOV_FROM_REG(44)
636 MOV_FROM_REG(45)
637 MOV_FROM_REG(46)
638 MOV_FROM_REG(47)
639 MOV_FROM_REG(48)
640 MOV_FROM_REG(49)
641 MOV_FROM_REG(50)
642 MOV_FROM_REG(51)
643 MOV_FROM_REG(52)
644 MOV_FROM_REG(53)
645 MOV_FROM_REG(54)
646 MOV_FROM_REG(55)
647 MOV_FROM_REG(56)
648 MOV_FROM_REG(57)
649 MOV_FROM_REG(58)
650 MOV_FROM_REG(59)
651 MOV_FROM_REG(60)
652 MOV_FROM_REG(61)
653 MOV_FROM_REG(62)
654 MOV_FROM_REG(63)
655 MOV_FROM_REG(64)
656 MOV_FROM_REG(65)
657 MOV_FROM_REG(66)
658 MOV_FROM_REG(67)
659 MOV_FROM_REG(68)
660 MOV_FROM_REG(69)
661 MOV_FROM_REG(70)
662 MOV_FROM_REG(71)
663 MOV_FROM_REG(72)
664 MOV_FROM_REG(73)
665 MOV_FROM_REG(74)
666 MOV_FROM_REG(75)
667 MOV_FROM_REG(76)
668 MOV_FROM_REG(77)
669 MOV_FROM_REG(78)
670 MOV_FROM_REG(79)
671 MOV_FROM_REG(80)
672 MOV_FROM_REG(81)
673 MOV_FROM_REG(82)
674 MOV_FROM_REG(83)
675 MOV_FROM_REG(84)
676 MOV_FROM_REG(85)
677 MOV_FROM_REG(86)
678 MOV_FROM_REG(87)
679 MOV_FROM_REG(88)
680 MOV_FROM_REG(89)
681 MOV_FROM_REG(90)
682 MOV_FROM_REG(91)
683 MOV_FROM_REG(92)
684 MOV_FROM_REG(93)
685 MOV_FROM_REG(94)
686 MOV_FROM_REG(95)
687 MOV_FROM_REG(96)
688 MOV_FROM_REG(97)
689 MOV_FROM_REG(98)
690 MOV_FROM_REG(99)
691 MOV_FROM_REG(100)
692 MOV_FROM_REG(101)
693 MOV_FROM_REG(102)
694 MOV_FROM_REG(103)
695 MOV_FROM_REG(104)
696 MOV_FROM_REG(105)
697 MOV_FROM_REG(106)
698 MOV_FROM_REG(107)
699 MOV_FROM_REG(108)
700 MOV_FROM_REG(109)
701 MOV_FROM_REG(110)
702 MOV_FROM_REG(111)
703 MOV_FROM_REG(112)
704 MOV_FROM_REG(113)
705 MOV_FROM_REG(114)
706 MOV_FROM_REG(115)
707 MOV_FROM_REG(116)
708 MOV_FROM_REG(117)
709 MOV_FROM_REG(118)
710 MOV_FROM_REG(119)
711 MOV_FROM_REG(120)
712 MOV_FROM_REG(121)
713 MOV_FROM_REG(122)
714 MOV_FROM_REG(123)
715 MOV_FROM_REG(124)
716 MOV_FROM_REG(125)
717 MOV_FROM_REG(126)
718 MOV_FROM_REG(127)
719END(asm_mov_from_reg)
720
721
722/* must be in bank 0
723 * parameter:
724 * r31: pr
725 * r24: b0
726 */
727ENTRY(kvm_resume_to_guest)
728 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
729 ;;
730 ld8 r1 =[r16]
731 adds r20 = VMM_VCPU_VSA_BASE_OFFSET,r21
732 ;;
733 mov r16=cr.ipsr
734 ;;
735 ld8 r20 = [r20]
736 adds r19=VMM_VPD_BASE_OFFSET,r21
737 ;;
738 ld8 r25=[r19]
739 extr.u r17=r16,IA64_PSR_RI_BIT,2
740 tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
741 ;;
742 (p6) mov r18=cr.iip
743 (p6) mov r17=r0
744 ;;
745 (p6) add r18=0x10,r18
746 (p7) add r17=1,r17
747 ;;
748 (p6) mov cr.iip=r18
749 dep r16=r17,r16,IA64_PSR_RI_BIT,2
750 ;;
751 mov cr.ipsr=r16
752 adds r19= VPD_VPSR_START_OFFSET,r25
753 add r28=PAL_VPS_RESUME_NORMAL,r20
754 add r29=PAL_VPS_RESUME_HANDLER,r20
755 ;;
756 ld8 r19=[r19]
757 mov b0=r29
758 cmp.ne p6,p7 = r0,r0
759 ;;
760 tbit.z p6,p7 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic
761 ;;
762 (p6) ld8 r26=[r25]
763 (p7) mov b0=r28
764 mov pr=r31,-2
765 br.sptk.many b0 // call pal service
766 ;;
767END(kvm_resume_to_guest)
768
769
770MOV_TO_BANK0_REG(16)
771MOV_TO_BANK0_REG(17)
772MOV_TO_BANK0_REG(18)
773MOV_TO_BANK0_REG(19)
774MOV_TO_BANK0_REG(20)
775MOV_TO_BANK0_REG(21)
776MOV_TO_BANK0_REG(22)
777MOV_TO_BANK0_REG(23)
778MOV_TO_BANK0_REG(24)
779MOV_TO_BANK0_REG(25)
780MOV_TO_BANK0_REG(26)
781MOV_TO_BANK0_REG(27)
782MOV_TO_BANK0_REG(28)
783MOV_TO_BANK0_REG(29)
784MOV_TO_BANK0_REG(30)
785MOV_TO_BANK0_REG(31)
786
787
788// mov to reg table
789ENTRY(asm_mov_to_reg)
790 MOV_TO_REG0
791 MOV_TO_REG(1)
792 MOV_TO_REG(2)
793 MOV_TO_REG(3)
794 MOV_TO_REG(4)
795 MOV_TO_REG(5)
796 MOV_TO_REG(6)
797 MOV_TO_REG(7)
798 MOV_TO_REG(8)
799 MOV_TO_REG(9)
800 MOV_TO_REG(10)
801 MOV_TO_REG(11)
802 MOV_TO_REG(12)
803 MOV_TO_REG(13)
804 MOV_TO_REG(14)
805 MOV_TO_REG(15)
806 JMP_TO_MOV_TO_BANK0_REG(16)
807 JMP_TO_MOV_TO_BANK0_REG(17)
808 JMP_TO_MOV_TO_BANK0_REG(18)
809 JMP_TO_MOV_TO_BANK0_REG(19)
810 JMP_TO_MOV_TO_BANK0_REG(20)
811 JMP_TO_MOV_TO_BANK0_REG(21)
812 JMP_TO_MOV_TO_BANK0_REG(22)
813 JMP_TO_MOV_TO_BANK0_REG(23)
814 JMP_TO_MOV_TO_BANK0_REG(24)
815 JMP_TO_MOV_TO_BANK0_REG(25)
816 JMP_TO_MOV_TO_BANK0_REG(26)
817 JMP_TO_MOV_TO_BANK0_REG(27)
818 JMP_TO_MOV_TO_BANK0_REG(28)
819 JMP_TO_MOV_TO_BANK0_REG(29)
820 JMP_TO_MOV_TO_BANK0_REG(30)
821 JMP_TO_MOV_TO_BANK0_REG(31)
822 MOV_TO_REG(32)
823 MOV_TO_REG(33)
824 MOV_TO_REG(34)
825 MOV_TO_REG(35)
826 MOV_TO_REG(36)
827 MOV_TO_REG(37)
828 MOV_TO_REG(38)
829 MOV_TO_REG(39)
830 MOV_TO_REG(40)
831 MOV_TO_REG(41)
832 MOV_TO_REG(42)
833 MOV_TO_REG(43)
834 MOV_TO_REG(44)
835 MOV_TO_REG(45)
836 MOV_TO_REG(46)
837 MOV_TO_REG(47)
838 MOV_TO_REG(48)
839 MOV_TO_REG(49)
840 MOV_TO_REG(50)
841 MOV_TO_REG(51)
842 MOV_TO_REG(52)
843 MOV_TO_REG(53)
844 MOV_TO_REG(54)
845 MOV_TO_REG(55)
846 MOV_TO_REG(56)
847 MOV_TO_REG(57)
848 MOV_TO_REG(58)
849 MOV_TO_REG(59)
850 MOV_TO_REG(60)
851 MOV_TO_REG(61)
852 MOV_TO_REG(62)
853 MOV_TO_REG(63)
854 MOV_TO_REG(64)
855 MOV_TO_REG(65)
856 MOV_TO_REG(66)
857 MOV_TO_REG(67)
858 MOV_TO_REG(68)
859 MOV_TO_REG(69)
860 MOV_TO_REG(70)
861 MOV_TO_REG(71)
862 MOV_TO_REG(72)
863 MOV_TO_REG(73)
864 MOV_TO_REG(74)
865 MOV_TO_REG(75)
866 MOV_TO_REG(76)
867 MOV_TO_REG(77)
868 MOV_TO_REG(78)
869 MOV_TO_REG(79)
870 MOV_TO_REG(80)
871 MOV_TO_REG(81)
872 MOV_TO_REG(82)
873 MOV_TO_REG(83)
874 MOV_TO_REG(84)
875 MOV_TO_REG(85)
876 MOV_TO_REG(86)
877 MOV_TO_REG(87)
878 MOV_TO_REG(88)
879 MOV_TO_REG(89)
880 MOV_TO_REG(90)
881 MOV_TO_REG(91)
882 MOV_TO_REG(92)
883 MOV_TO_REG(93)
884 MOV_TO_REG(94)
885 MOV_TO_REG(95)
886 MOV_TO_REG(96)
887 MOV_TO_REG(97)
888 MOV_TO_REG(98)
889 MOV_TO_REG(99)
890 MOV_TO_REG(100)
891 MOV_TO_REG(101)
892 MOV_TO_REG(102)
893 MOV_TO_REG(103)
894 MOV_TO_REG(104)
895 MOV_TO_REG(105)
896 MOV_TO_REG(106)
897 MOV_TO_REG(107)
898 MOV_TO_REG(108)
899 MOV_TO_REG(109)
900 MOV_TO_REG(110)
901 MOV_TO_REG(111)
902 MOV_TO_REG(112)
903 MOV_TO_REG(113)
904 MOV_TO_REG(114)
905 MOV_TO_REG(115)
906 MOV_TO_REG(116)
907 MOV_TO_REG(117)
908 MOV_TO_REG(118)
909 MOV_TO_REG(119)
910 MOV_TO_REG(120)
911 MOV_TO_REG(121)
912 MOV_TO_REG(122)
913 MOV_TO_REG(123)
914 MOV_TO_REG(124)
915 MOV_TO_REG(125)
916 MOV_TO_REG(126)
917 MOV_TO_REG(127)
918END(asm_mov_to_reg)
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
new file mode 100644
index 000000000000..5a33f7ed29a0
--- /dev/null
+++ b/arch/ia64/kvm/process.c
@@ -0,0 +1,970 @@
1/*
2 * process.c: handle interruption inject for guests.
3 * Copyright (c) 2005, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 * Shaofan Li (Susue Li) <susie.li@intel.com>
19 * Xiaoyan Feng (Fleming Feng) <fleming.feng@intel.com>
20 * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
21 * Xiantao Zhang (xiantao.zhang@intel.com)
22 */
23#include "vcpu.h"
24
25#include <asm/pal.h>
26#include <asm/sal.h>
27#include <asm/fpswa.h>
28#include <asm/kregs.h>
29#include <asm/tlb.h>
30
31fpswa_interface_t *vmm_fpswa_interface;
32
33#define IA64_VHPT_TRANS_VECTOR 0x0000
34#define IA64_INST_TLB_VECTOR 0x0400
35#define IA64_DATA_TLB_VECTOR 0x0800
36#define IA64_ALT_INST_TLB_VECTOR 0x0c00
37#define IA64_ALT_DATA_TLB_VECTOR 0x1000
38#define IA64_DATA_NESTED_TLB_VECTOR 0x1400
39#define IA64_INST_KEY_MISS_VECTOR 0x1800
40#define IA64_DATA_KEY_MISS_VECTOR 0x1c00
41#define IA64_DIRTY_BIT_VECTOR 0x2000
42#define IA64_INST_ACCESS_BIT_VECTOR 0x2400
43#define IA64_DATA_ACCESS_BIT_VECTOR 0x2800
44#define IA64_BREAK_VECTOR 0x2c00
45#define IA64_EXTINT_VECTOR 0x3000
46#define IA64_PAGE_NOT_PRESENT_VECTOR 0x5000
47#define IA64_KEY_PERMISSION_VECTOR 0x5100
48#define IA64_INST_ACCESS_RIGHTS_VECTOR 0x5200
49#define IA64_DATA_ACCESS_RIGHTS_VECTOR 0x5300
50#define IA64_GENEX_VECTOR 0x5400
51#define IA64_DISABLED_FPREG_VECTOR 0x5500
52#define IA64_NAT_CONSUMPTION_VECTOR 0x5600
53#define IA64_SPECULATION_VECTOR 0x5700 /* UNUSED */
54#define IA64_DEBUG_VECTOR 0x5900
55#define IA64_UNALIGNED_REF_VECTOR 0x5a00
56#define IA64_UNSUPPORTED_DATA_REF_VECTOR 0x5b00
57#define IA64_FP_FAULT_VECTOR 0x5c00
58#define IA64_FP_TRAP_VECTOR 0x5d00
59#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR 0x5e00
60#define IA64_TAKEN_BRANCH_TRAP_VECTOR 0x5f00
61#define IA64_SINGLE_STEP_TRAP_VECTOR 0x6000
62
63/* SDM vol2 5.5 - IVA based interruption handling */
64#define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\
65 IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT | \
66 IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT)
67
68#define DOMN_PAL_REQUEST 0x110000
69#define DOMN_SAL_REQUEST 0x110001
70
71static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800,
72 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00,
73 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400,
74 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00,
75 0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600,
76 0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00,
77 0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800,
78 0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00
79};
80
81static void collect_interruption(struct kvm_vcpu *vcpu)
82{
83 u64 ipsr;
84 u64 vdcr;
85 u64 vifs;
86 unsigned long vpsr;
87 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
88
89 vpsr = vcpu_get_psr(vcpu);
90 vcpu_bsw0(vcpu);
91 if (vpsr & IA64_PSR_IC) {
92
93 /* Sync mpsr id/da/dd/ss/ed bits to vipsr
94 * since after guest do rfi, we still want these bits on in
95 * mpsr
96 */
97
98 ipsr = regs->cr_ipsr;
99 vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA
100 | IA64_PSR_DD | IA64_PSR_SS
101 | IA64_PSR_ED));
102 vcpu_set_ipsr(vcpu, vpsr);
103
104 /* Currently, for trap, we do not advance IIP to next
105 * instruction. That's because we assume caller already
106 * set up IIP correctly
107 */
108
109 vcpu_set_iip(vcpu , regs->cr_iip);
110
111 /* set vifs.v to zero */
112 vifs = VCPU(vcpu, ifs);
113 vifs &= ~IA64_IFS_V;
114 vcpu_set_ifs(vcpu, vifs);
115
116 vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa));
117 }
118
119 vdcr = VCPU(vcpu, dcr);
120
121 /* Set guest psr
122 * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged
123 * be: set to the value of dcr.be
124 * pp: set to the value of dcr.pp
125 */
126 vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION;
127 vpsr |= (vdcr & IA64_DCR_BE);
128
129 /* VDCR pp bit position is different from VPSR pp bit */
130 if (vdcr & IA64_DCR_PP) {
131 vpsr |= IA64_PSR_PP;
132 } else {
133 vpsr &= ~IA64_PSR_PP;;
134 }
135
136 vcpu_set_psr(vcpu, vpsr);
137
138}
139
140void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec)
141{
142 u64 viva;
143 struct kvm_pt_regs *regs;
144 union ia64_isr pt_isr;
145
146 regs = vcpu_regs(vcpu);
147
148 /* clear cr.isr.ir (incomplete register frame)*/
149 pt_isr.val = VMX(vcpu, cr_isr);
150 pt_isr.ir = 0;
151 VMX(vcpu, cr_isr) = pt_isr.val;
152
153 collect_interruption(vcpu);
154
155 viva = vcpu_get_iva(vcpu);
156 regs->cr_iip = viva + vec;
157}
158
159static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa)
160{
161 union ia64_rr rr, rr1;
162
163 rr.val = vcpu_get_rr(vcpu, ifa);
164 rr1.val = 0;
165 rr1.ps = rr.ps;
166 rr1.rid = rr.rid;
167 return (rr1.val);
168}
169
170
171/*
172 * Set vIFA & vITIR & vIHA, when vPSR.ic =1
173 * Parameter:
174 * set_ifa: if true, set vIFA
175 * set_itir: if true, set vITIR
176 * set_iha: if true, set vIHA
177 */
178void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr,
179 int set_ifa, int set_itir, int set_iha)
180{
181 long vpsr;
182 u64 value;
183
184 vpsr = VCPU(vcpu, vpsr);
185 /* Vol2, Table 8-1 */
186 if (vpsr & IA64_PSR_IC) {
187 if (set_ifa)
188 vcpu_set_ifa(vcpu, vadr);
189 if (set_itir) {
190 value = vcpu_get_itir_on_fault(vcpu, vadr);
191 vcpu_set_itir(vcpu, value);
192 }
193
194 if (set_iha) {
195 value = vcpu_thash(vcpu, vadr);
196 vcpu_set_iha(vcpu, value);
197 }
198 }
199}
200
201/*
202 * Data TLB Fault
203 * @ Data TLB vector
204 * Refer to SDM Vol2 Table 5-6 & 8-1
205 */
206void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
207{
208 /* If vPSR.ic, IFA, ITIR, IHA */
209 set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
210 inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR);
211}
212
213/*
214 * Instruction TLB Fault
215 * @ Instruction TLB vector
216 * Refer to SDM Vol2 Table 5-6 & 8-1
217 */
218void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
219{
220 /* If vPSR.ic, IFA, ITIR, IHA */
221 set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
222 inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR);
223}
224
225
226
227/*
228 * Data Nested TLB Fault
229 * @ Data Nested TLB Vector
230 * Refer to SDM Vol2 Table 5-6 & 8-1
231 */
232void nested_dtlb(struct kvm_vcpu *vcpu)
233{
234 inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR);
235}
236
237/*
238 * Alternate Data TLB Fault
239 * @ Alternate Data TLB vector
240 * Refer to SDM Vol2 Table 5-6 & 8-1
241 */
242void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr)
243{
244 set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
245 inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR);
246}
247
248
249/*
250 * Data TLB Fault
251 * @ Data TLB vector
252 * Refer to SDM Vol2 Table 5-6 & 8-1
253 */
254void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr)
255{
256 set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
257 inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR);
258}
259
260/* Deal with:
261 * VHPT Translation Vector
262 */
263static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
264{
265 /* If vPSR.ic, IFA, ITIR, IHA*/
266 set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
267 inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR);
268
269
270}
271
272/*
273 * VHPT Instruction Fault
274 * @ VHPT Translation vector
275 * Refer to SDM Vol2 Table 5-6 & 8-1
276 */
277void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
278{
279 _vhpt_fault(vcpu, vadr);
280}
281
282
283/*
284 * VHPT Data Fault
285 * @ VHPT Translation vector
286 * Refer to SDM Vol2 Table 5-6 & 8-1
287 */
288void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
289{
290 _vhpt_fault(vcpu, vadr);
291}
292
293
294
295/*
296 * Deal with:
297 * General Exception vector
298 */
299void _general_exception(struct kvm_vcpu *vcpu)
300{
301 inject_guest_interruption(vcpu, IA64_GENEX_VECTOR);
302}
303
304
305/*
306 * Illegal Operation Fault
307 * @ General Exception Vector
308 * Refer to SDM Vol2 Table 5-6 & 8-1
309 */
310void illegal_op(struct kvm_vcpu *vcpu)
311{
312 _general_exception(vcpu);
313}
314
315/*
316 * Illegal Dependency Fault
317 * @ General Exception Vector
318 * Refer to SDM Vol2 Table 5-6 & 8-1
319 */
320void illegal_dep(struct kvm_vcpu *vcpu)
321{
322 _general_exception(vcpu);
323}
324
325/*
326 * Reserved Register/Field Fault
327 * @ General Exception Vector
328 * Refer to SDM Vol2 Table 5-6 & 8-1
329 */
330void rsv_reg_field(struct kvm_vcpu *vcpu)
331{
332 _general_exception(vcpu);
333}
334/*
335 * Privileged Operation Fault
336 * @ General Exception Vector
337 * Refer to SDM Vol2 Table 5-6 & 8-1
338 */
339
340void privilege_op(struct kvm_vcpu *vcpu)
341{
342 _general_exception(vcpu);
343}
344
345/*
346 * Unimplement Data Address Fault
347 * @ General Exception Vector
348 * Refer to SDM Vol2 Table 5-6 & 8-1
349 */
350void unimpl_daddr(struct kvm_vcpu *vcpu)
351{
352 _general_exception(vcpu);
353}
354
355/*
356 * Privileged Register Fault
357 * @ General Exception Vector
358 * Refer to SDM Vol2 Table 5-6 & 8-1
359 */
360void privilege_reg(struct kvm_vcpu *vcpu)
361{
362 _general_exception(vcpu);
363}
364
365/* Deal with
366 * Nat consumption vector
367 * Parameter:
368 * vaddr: Optional, if t == REGISTER
369 */
370static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr,
371 enum tlb_miss_type t)
372{
373 /* If vPSR.ic && t == DATA/INST, IFA */
374 if (t == DATA || t == INSTRUCTION) {
375 /* IFA */
376 set_ifa_itir_iha(vcpu, vadr, 1, 0, 0);
377 }
378
379 inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR);
380}
381
382/*
383 * Instruction Nat Page Consumption Fault
384 * @ Nat Consumption Vector
385 * Refer to SDM Vol2 Table 5-6 & 8-1
386 */
387void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
388{
389 _nat_consumption_fault(vcpu, vadr, INSTRUCTION);
390}
391
392/*
393 * Register Nat Consumption Fault
394 * @ Nat Consumption Vector
395 * Refer to SDM Vol2 Table 5-6 & 8-1
396 */
397void rnat_consumption(struct kvm_vcpu *vcpu)
398{
399 _nat_consumption_fault(vcpu, 0, REGISTER);
400}
401
402/*
403 * Data Nat Page Consumption Fault
404 * @ Nat Consumption Vector
405 * Refer to SDM Vol2 Table 5-6 & 8-1
406 */
407void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
408{
409 _nat_consumption_fault(vcpu, vadr, DATA);
410}
411
412/* Deal with
413 * Page not present vector
414 */
415static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
416{
417 /* If vPSR.ic, IFA, ITIR */
418 set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
419 inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
420}
421
422
423void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
424{
425 __page_not_present(vcpu, vadr);
426}
427
428
429void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
430{
431 __page_not_present(vcpu, vadr);
432}
433
434
435/* Deal with
436 * Data access rights vector
437 */
438void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr)
439{
440 /* If vPSR.ic, IFA, ITIR */
441 set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
442 inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR);
443}
444
445fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr,
446 unsigned long *fpsr, unsigned long *isr, unsigned long *pr,
447 unsigned long *ifs, struct kvm_pt_regs *regs)
448{
449 fp_state_t fp_state;
450 fpswa_ret_t ret;
451 struct kvm_vcpu *vcpu = current_vcpu;
452
453 uint64_t old_rr7 = ia64_get_rr(7UL<<61);
454
455 if (!vmm_fpswa_interface)
456 return (fpswa_ret_t) {-1, 0, 0, 0};
457
458 /*
459 * Just let fpswa driver to use hardware fp registers.
460 * No fp register is valid in memory.
461 */
462 memset(&fp_state, 0, sizeof(fp_state_t));
463
464 /*
465 * unsigned long (*EFI_FPSWA) (
466 * unsigned long trap_type,
467 * void *Bundle,
468 * unsigned long *pipsr,
469 * unsigned long *pfsr,
470 * unsigned long *pisr,
471 * unsigned long *ppreds,
472 * unsigned long *pifs,
473 * void *fp_state);
474 */
475 /*Call host fpswa interface directly to virtualize
476 *guest fpswa request!
477 */
478 ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]);
479 ia64_srlz_d();
480
481 ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle,
482 ipsr, fpsr, isr, pr, ifs, &fp_state);
483 ia64_set_rr(7UL << 61, old_rr7);
484 ia64_srlz_d();
485 return ret;
486}
487
488/*
489 * Handle floating-point assist faults and traps for domain.
490 */
491unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs,
492 unsigned long isr)
493{
494 struct kvm_vcpu *v = current_vcpu;
495 IA64_BUNDLE bundle;
496 unsigned long fault_ip;
497 fpswa_ret_t ret;
498
499 fault_ip = regs->cr_iip;
500 /*
501 * When the FP trap occurs, the trapping instruction is completed.
502 * If ipsr.ri == 0, there is the trapping instruction in previous
503 * bundle.
504 */
505 if (!fp_fault && (ia64_psr(regs)->ri == 0))
506 fault_ip -= 16;
507
508 if (fetch_code(v, fault_ip, &bundle))
509 return -EAGAIN;
510
511 if (!bundle.i64[0] && !bundle.i64[1])
512 return -EACCES;
513
514 ret = vmm_fp_emulate(fp_fault, &bundle, &regs->cr_ipsr, &regs->ar_fpsr,
515 &isr, &regs->pr, &regs->cr_ifs, regs);
516 return ret.status;
517}
518
519void reflect_interruption(u64 ifa, u64 isr, u64 iim,
520 u64 vec, struct kvm_pt_regs *regs)
521{
522 u64 vector;
523 int status ;
524 struct kvm_vcpu *vcpu = current_vcpu;
525 u64 vpsr = VCPU(vcpu, vpsr);
526
527 vector = vec2off[vec];
528
529 if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
530 panic_vm(vcpu);
531 return;
532 }
533
534 switch (vec) {
535 case 32: /*IA64_FP_FAULT_VECTOR*/
536 status = vmm_handle_fpu_swa(1, regs, isr);
537 if (!status) {
538 vcpu_increment_iip(vcpu);
539 return;
540 } else if (-EAGAIN == status)
541 return;
542 break;
543 case 33: /*IA64_FP_TRAP_VECTOR*/
544 status = vmm_handle_fpu_swa(0, regs, isr);
545 if (!status)
546 return ;
547 else if (-EAGAIN == status) {
548 vcpu_decrement_iip(vcpu);
549 return ;
550 }
551 break;
552 }
553
554 VCPU(vcpu, isr) = isr;
555 VCPU(vcpu, iipa) = regs->cr_iip;
556 if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
557 VCPU(vcpu, iim) = iim;
558 else
559 set_ifa_itir_iha(vcpu, ifa, 1, 1, 1);
560
561 inject_guest_interruption(vcpu, vector);
562}
563
564static void set_pal_call_data(struct kvm_vcpu *vcpu)
565{
566 struct exit_ctl_data *p = &vcpu->arch.exit_data;
567
568 /*FIXME:For static and stacked convention, firmware
569 * has put the parameters in gr28-gr31 before
570 * break to vmm !!*/
571
572 p->u.pal_data.gr28 = vcpu_get_gr(vcpu, 28);
573 p->u.pal_data.gr29 = vcpu_get_gr(vcpu, 29);
574 p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
575 p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31);
576 p->exit_reason = EXIT_REASON_PAL_CALL;
577}
578
579static void set_pal_call_result(struct kvm_vcpu *vcpu)
580{
581 struct exit_ctl_data *p = &vcpu->arch.exit_data;
582
583 if (p->exit_reason == EXIT_REASON_PAL_CALL) {
584 vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0);
585 vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0);
586 vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
587 vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
588 } else
589 panic_vm(vcpu);
590}
591
592static void set_sal_call_data(struct kvm_vcpu *vcpu)
593{
594 struct exit_ctl_data *p = &vcpu->arch.exit_data;
595
596 p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32);
597 p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33);
598 p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34);
599 p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35);
600 p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36);
601 p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37);
602 p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38);
603 p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39);
604 p->exit_reason = EXIT_REASON_SAL_CALL;
605}
606
607static void set_sal_call_result(struct kvm_vcpu *vcpu)
608{
609 struct exit_ctl_data *p = &vcpu->arch.exit_data;
610
611 if (p->exit_reason == EXIT_REASON_SAL_CALL) {
612 vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0);
613 vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0);
614 vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
615 vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
616 } else
617 panic_vm(vcpu);
618}
619
620void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
621 unsigned long isr, unsigned long iim)
622{
623 struct kvm_vcpu *v = current_vcpu;
624
625 if (ia64_psr(regs)->cpl == 0) {
626 /* Allow hypercalls only when cpl = 0. */
627 if (iim == DOMN_PAL_REQUEST) {
628 set_pal_call_data(v);
629 vmm_transition(v);
630 set_pal_call_result(v);
631 vcpu_increment_iip(v);
632 return;
633 } else if (iim == DOMN_SAL_REQUEST) {
634 set_sal_call_data(v);
635 vmm_transition(v);
636 set_sal_call_result(v);
637 vcpu_increment_iip(v);
638 return;
639 }
640 }
641 reflect_interruption(ifa, isr, iim, 11, regs);
642}
643
644void check_pending_irq(struct kvm_vcpu *vcpu)
645{
646 int mask, h_pending, h_inservice;
647 u64 isr;
648 unsigned long vpsr;
649 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
650
651 h_pending = highest_pending_irq(vcpu);
652 if (h_pending == NULL_VECTOR) {
653 update_vhpi(vcpu, NULL_VECTOR);
654 return;
655 }
656 h_inservice = highest_inservice_irq(vcpu);
657
658 vpsr = VCPU(vcpu, vpsr);
659 mask = irq_masked(vcpu, h_pending, h_inservice);
660 if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) {
661 isr = vpsr & IA64_PSR_RI;
662 update_vhpi(vcpu, h_pending);
663 reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
664 } else if (mask == IRQ_MASKED_BY_INSVC) {
665 if (VCPU(vcpu, vhpi))
666 update_vhpi(vcpu, NULL_VECTOR);
667 } else {
668 /* masked by vpsr.i or vtpr.*/
669 update_vhpi(vcpu, h_pending);
670 }
671}
672
673static void generate_exirq(struct kvm_vcpu *vcpu)
674{
675 unsigned vpsr;
676 uint64_t isr;
677
678 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
679
680 vpsr = VCPU(vcpu, vpsr);
681 isr = vpsr & IA64_PSR_RI;
682 if (!(vpsr & IA64_PSR_IC))
683 panic_vm(vcpu);
684 reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
685}
686
687void vhpi_detection(struct kvm_vcpu *vcpu)
688{
689 uint64_t threshold, vhpi;
690 union ia64_tpr vtpr;
691 struct ia64_psr vpsr;
692
693 vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
694 vtpr.val = VCPU(vcpu, tpr);
695
696 threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic;
697 vhpi = VCPU(vcpu, vhpi);
698 if (vhpi > threshold) {
699 /* interrupt actived*/
700 generate_exirq(vcpu);
701 }
702}
703
704
705void leave_hypervisor_tail(void)
706{
707 struct kvm_vcpu *v = current_vcpu;
708
709 if (VMX(v, timer_check)) {
710 VMX(v, timer_check) = 0;
711 if (VMX(v, itc_check)) {
712 if (vcpu_get_itc(v) > VCPU(v, itm)) {
713 if (!(VCPU(v, itv) & (1 << 16))) {
714 vcpu_pend_interrupt(v, VCPU(v, itv)
715 & 0xff);
716 VMX(v, itc_check) = 0;
717 } else {
718 v->arch.timer_pending = 1;
719 }
720 VMX(v, last_itc) = VCPU(v, itm) + 1;
721 }
722 }
723 }
724
725 rmb();
726 if (v->arch.irq_new_pending) {
727 v->arch.irq_new_pending = 0;
728 VMX(v, irq_check) = 0;
729 check_pending_irq(v);
730 return;
731 }
732 if (VMX(v, irq_check)) {
733 VMX(v, irq_check) = 0;
734 vhpi_detection(v);
735 }
736}
737
738
739static inline void handle_lds(struct kvm_pt_regs *regs)
740{
741 regs->cr_ipsr |= IA64_PSR_ED;
742}
743
744void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type)
745{
746 unsigned long pte;
747 union ia64_rr rr;
748
749 rr.val = ia64_get_rr(vadr);
750 pte = vadr & _PAGE_PPN_MASK;
751 pte = pte | PHY_PAGE_WB;
752 thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type);
753 return;
754}
755
756void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs)
757{
758 unsigned long vpsr;
759 int type;
760
761 u64 vhpt_adr, gppa, pteval, rr, itir;
762 union ia64_isr misr;
763 union ia64_pta vpta;
764 struct thash_data *data;
765 struct kvm_vcpu *v = current_vcpu;
766
767 vpsr = VCPU(v, vpsr);
768 misr.val = VMX(v, cr_isr);
769
770 type = vec;
771
772 if (is_physical_mode(v) && (!(vadr << 1 >> 62))) {
773 if (vec == 2) {
774 if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) {
775 emulate_io_inst(v, ((vadr << 1) >> 1), 4);
776 return;
777 }
778 }
779 physical_tlb_miss(v, vadr, type);
780 return;
781 }
782 data = vtlb_lookup(v, vadr, type);
783 if (data != 0) {
784 if (type == D_TLB) {
785 gppa = (vadr & ((1UL << data->ps) - 1))
786 + (data->ppn >> (data->ps - 12) << data->ps);
787 if (__gpfn_is_io(gppa >> PAGE_SHIFT)) {
788 if (data->pl >= ((regs->cr_ipsr >>
789 IA64_PSR_CPL0_BIT) & 3))
790 emulate_io_inst(v, gppa, data->ma);
791 else {
792 vcpu_set_isr(v, misr.val);
793 data_access_rights(v, vadr);
794 }
795 return ;
796 }
797 }
798 thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type);
799
800 } else if (type == D_TLB) {
801 if (misr.sp) {
802 handle_lds(regs);
803 return;
804 }
805
806 rr = vcpu_get_rr(v, vadr);
807 itir = rr & (RR_RID_MASK | RR_PS_MASK);
808
809 if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) {
810 if (vpsr & IA64_PSR_IC) {
811 vcpu_set_isr(v, misr.val);
812 alt_dtlb(v, vadr);
813 } else {
814 nested_dtlb(v);
815 }
816 return ;
817 }
818
819 vpta.val = vcpu_get_pta(v);
820 /* avoid recursively walking (short format) VHPT */
821
822 vhpt_adr = vcpu_thash(v, vadr);
823 if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
824 /* VHPT successfully read. */
825 if (!(pteval & _PAGE_P)) {
826 if (vpsr & IA64_PSR_IC) {
827 vcpu_set_isr(v, misr.val);
828 dtlb_fault(v, vadr);
829 } else {
830 nested_dtlb(v);
831 }
832 } else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) {
833 thash_purge_and_insert(v, pteval, itir,
834 vadr, D_TLB);
835 } else if (vpsr & IA64_PSR_IC) {
836 vcpu_set_isr(v, misr.val);
837 dtlb_fault(v, vadr);
838 } else {
839 nested_dtlb(v);
840 }
841 } else {
842 /* Can't read VHPT. */
843 if (vpsr & IA64_PSR_IC) {
844 vcpu_set_isr(v, misr.val);
845 dvhpt_fault(v, vadr);
846 } else {
847 nested_dtlb(v);
848 }
849 }
850 } else if (type == I_TLB) {
851 if (!(vpsr & IA64_PSR_IC))
852 misr.ni = 1;
853 if (!vhpt_enabled(v, vadr, INST_REF)) {
854 vcpu_set_isr(v, misr.val);
855 alt_itlb(v, vadr);
856 return;
857 }
858
859 vpta.val = vcpu_get_pta(v);
860
861 vhpt_adr = vcpu_thash(v, vadr);
862 if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
863 /* VHPT successfully read. */
864 if (pteval & _PAGE_P) {
865 if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) {
866 vcpu_set_isr(v, misr.val);
867 itlb_fault(v, vadr);
868 return ;
869 }
870 rr = vcpu_get_rr(v, vadr);
871 itir = rr & (RR_RID_MASK | RR_PS_MASK);
872 thash_purge_and_insert(v, pteval, itir,
873 vadr, I_TLB);
874 } else {
875 vcpu_set_isr(v, misr.val);
876 inst_page_not_present(v, vadr);
877 }
878 } else {
879 vcpu_set_isr(v, misr.val);
880 ivhpt_fault(v, vadr);
881 }
882 }
883}
884
885void kvm_vexirq(struct kvm_vcpu *vcpu)
886{
887 u64 vpsr, isr;
888 struct kvm_pt_regs *regs;
889
890 regs = vcpu_regs(vcpu);
891 vpsr = VCPU(vcpu, vpsr);
892 isr = vpsr & IA64_PSR_RI;
893 reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/
894}
895
896void kvm_ia64_handle_irq(struct kvm_vcpu *v)
897{
898 struct exit_ctl_data *p = &v->arch.exit_data;
899 long psr;
900
901 local_irq_save(psr);
902 p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
903 vmm_transition(v);
904 local_irq_restore(psr);
905
906 VMX(v, timer_check) = 1;
907
908}
909
910static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos)
911{
912 u64 oldrid, moldrid, oldpsbits, vaddr;
913 struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos];
914 vaddr = p->vaddr;
915
916 oldrid = VMX(v, vrr[0]);
917 VMX(v, vrr[0]) = p->rr;
918 oldpsbits = VMX(v, psbits[0]);
919 VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]);
920 moldrid = ia64_get_rr(0x0);
921 ia64_set_rr(0x0, vrrtomrr(p->rr));
922 ia64_srlz_d();
923
924 vaddr = PAGEALIGN(vaddr, p->ps);
925 thash_purge_entries_remote(v, vaddr, p->ps);
926
927 VMX(v, vrr[0]) = oldrid;
928 VMX(v, psbits[0]) = oldpsbits;
929 ia64_set_rr(0x0, moldrid);
930 ia64_dv_serialize_data();
931}
932
933static void vcpu_do_resume(struct kvm_vcpu *vcpu)
934{
935 /*Re-init VHPT and VTLB once from resume*/
936 vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES;
937 thash_init(&vcpu->arch.vhpt, VHPT_SHIFT);
938 vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES;
939 thash_init(&vcpu->arch.vtlb, VTLB_SHIFT);
940
941 ia64_set_pta(vcpu->arch.vhpt.pta.val);
942}
943
944static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
945{
946 if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
947 vcpu_do_resume(vcpu);
948 return;
949 }
950
951 if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) {
952 thash_purge_all(vcpu);
953 return;
954 }
955
956 if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) {
957 while (vcpu->arch.ptc_g_count > 0)
958 ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count);
959 }
960}
961
962void vmm_transition(struct kvm_vcpu *vcpu)
963{
964 ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd,
965 0, 0, 0, 0, 0, 0);
966 vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host);
967 ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd,
968 0, 0, 0, 0, 0, 0);
969 kvm_do_resume_op(vcpu);
970}
diff --git a/arch/ia64/kvm/trampoline.S b/arch/ia64/kvm/trampoline.S
new file mode 100644
index 000000000000..30897d44d61e
--- /dev/null
+++ b/arch/ia64/kvm/trampoline.S
@@ -0,0 +1,1038 @@
1/* Save all processor states
2 *
3 * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com>
4 * Copyright (c) 2007 Anthony Xu <anthony.xu@intel.com>
5 */
6
7#include <asm/asmmacro.h>
8#include "asm-offsets.h"
9
10
11#define CTX(name) VMM_CTX_##name##_OFFSET
12
13 /*
14 * r32: context_t base address
15 */
16#define SAVE_BRANCH_REGS \
17 add r2 = CTX(B0),r32; \
18 add r3 = CTX(B1),r32; \
19 mov r16 = b0; \
20 mov r17 = b1; \
21 ;; \
22 st8 [r2]=r16,16; \
23 st8 [r3]=r17,16; \
24 ;; \
25 mov r16 = b2; \
26 mov r17 = b3; \
27 ;; \
28 st8 [r2]=r16,16; \
29 st8 [r3]=r17,16; \
30 ;; \
31 mov r16 = b4; \
32 mov r17 = b5; \
33 ;; \
34 st8 [r2]=r16; \
35 st8 [r3]=r17; \
36 ;;
37
38 /*
39 * r33: context_t base address
40 */
41#define RESTORE_BRANCH_REGS \
42 add r2 = CTX(B0),r33; \
43 add r3 = CTX(B1),r33; \
44 ;; \
45 ld8 r16=[r2],16; \
46 ld8 r17=[r3],16; \
47 ;; \
48 mov b0 = r16; \
49 mov b1 = r17; \
50 ;; \
51 ld8 r16=[r2],16; \
52 ld8 r17=[r3],16; \
53 ;; \
54 mov b2 = r16; \
55 mov b3 = r17; \
56 ;; \
57 ld8 r16=[r2]; \
58 ld8 r17=[r3]; \
59 ;; \
60 mov b4=r16; \
61 mov b5=r17; \
62 ;;
63
64
65 /*
66 * r32: context_t base address
67 * bsw == 1
68 * Save all bank1 general registers, r4 ~ r7
69 */
70#define SAVE_GENERAL_REGS \
71 add r2=CTX(R4),r32; \
72 add r3=CTX(R5),r32; \
73 ;; \
74.mem.offset 0,0; \
75 st8.spill [r2]=r4,16; \
76.mem.offset 8,0; \
77 st8.spill [r3]=r5,16; \
78 ;; \
79.mem.offset 0,0; \
80 st8.spill [r2]=r6,48; \
81.mem.offset 8,0; \
82 st8.spill [r3]=r7,48; \
83 ;; \
84.mem.offset 0,0; \
85 st8.spill [r2]=r12; \
86.mem.offset 8,0; \
87 st8.spill [r3]=r13; \
88 ;;
89
90 /*
91 * r33: context_t base address
92 * bsw == 1
93 */
94#define RESTORE_GENERAL_REGS \
95 add r2=CTX(R4),r33; \
96 add r3=CTX(R5),r33; \
97 ;; \
98 ld8.fill r4=[r2],16; \
99 ld8.fill r5=[r3],16; \
100 ;; \
101 ld8.fill r6=[r2],48; \
102 ld8.fill r7=[r3],48; \
103 ;; \
104 ld8.fill r12=[r2]; \
105 ld8.fill r13 =[r3]; \
106 ;;
107
108
109
110
111 /*
112 * r32: context_t base address
113 */
114#define SAVE_KERNEL_REGS \
115 add r2 = CTX(KR0),r32; \
116 add r3 = CTX(KR1),r32; \
117 mov r16 = ar.k0; \
118 mov r17 = ar.k1; \
119 ;; \
120 st8 [r2] = r16,16; \
121 st8 [r3] = r17,16; \
122 ;; \
123 mov r16 = ar.k2; \
124 mov r17 = ar.k3; \
125 ;; \
126 st8 [r2] = r16,16; \
127 st8 [r3] = r17,16; \
128 ;; \
129 mov r16 = ar.k4; \
130 mov r17 = ar.k5; \
131 ;; \
132 st8 [r2] = r16,16; \
133 st8 [r3] = r17,16; \
134 ;; \
135 mov r16 = ar.k6; \
136 mov r17 = ar.k7; \
137 ;; \
138 st8 [r2] = r16; \
139 st8 [r3] = r17; \
140 ;;
141
142
143
144 /*
145 * r33: context_t base address
146 */
147#define RESTORE_KERNEL_REGS \
148 add r2 = CTX(KR0),r33; \
149 add r3 = CTX(KR1),r33; \
150 ;; \
151 ld8 r16=[r2],16; \
152 ld8 r17=[r3],16; \
153 ;; \
154 mov ar.k0=r16; \
155 mov ar.k1=r17; \
156 ;; \
157 ld8 r16=[r2],16; \
158 ld8 r17=[r3],16; \
159 ;; \
160 mov ar.k2=r16; \
161 mov ar.k3=r17; \
162 ;; \
163 ld8 r16=[r2],16; \
164 ld8 r17=[r3],16; \
165 ;; \
166 mov ar.k4=r16; \
167 mov ar.k5=r17; \
168 ;; \
169 ld8 r16=[r2],16; \
170 ld8 r17=[r3],16; \
171 ;; \
172 mov ar.k6=r16; \
173 mov ar.k7=r17; \
174 ;;
175
176
177
178 /*
179 * r32: context_t base address
180 */
181#define SAVE_APP_REGS \
182 add r2 = CTX(BSPSTORE),r32; \
183 mov r16 = ar.bspstore; \
184 ;; \
185 st8 [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\
186 mov r16 = ar.rnat; \
187 ;; \
188 st8 [r2] = r16,CTX(FCR)-CTX(RNAT); \
189 mov r16 = ar.fcr; \
190 ;; \
191 st8 [r2] = r16,CTX(EFLAG)-CTX(FCR); \
192 mov r16 = ar.eflag; \
193 ;; \
194 st8 [r2] = r16,CTX(CFLG)-CTX(EFLAG); \
195 mov r16 = ar.cflg; \
196 ;; \
197 st8 [r2] = r16,CTX(FSR)-CTX(CFLG); \
198 mov r16 = ar.fsr; \
199 ;; \
200 st8 [r2] = r16,CTX(FIR)-CTX(FSR); \
201 mov r16 = ar.fir; \
202 ;; \
203 st8 [r2] = r16,CTX(FDR)-CTX(FIR); \
204 mov r16 = ar.fdr; \
205 ;; \
206 st8 [r2] = r16,CTX(UNAT)-CTX(FDR); \
207 mov r16 = ar.unat; \
208 ;; \
209 st8 [r2] = r16,CTX(FPSR)-CTX(UNAT); \
210 mov r16 = ar.fpsr; \
211 ;; \
212 st8 [r2] = r16,CTX(PFS)-CTX(FPSR); \
213 mov r16 = ar.pfs; \
214 ;; \
215 st8 [r2] = r16,CTX(LC)-CTX(PFS); \
216 mov r16 = ar.lc; \
217 ;; \
218 st8 [r2] = r16; \
219 ;;
220
221 /*
222 * r33: context_t base address
223 */
224#define RESTORE_APP_REGS \
225 add r2=CTX(BSPSTORE),r33; \
226 ;; \
227 ld8 r16=[r2],CTX(RNAT)-CTX(BSPSTORE); \
228 ;; \
229 mov ar.bspstore=r16; \
230 ld8 r16=[r2],CTX(FCR)-CTX(RNAT); \
231 ;; \
232 mov ar.rnat=r16; \
233 ld8 r16=[r2],CTX(EFLAG)-CTX(FCR); \
234 ;; \
235 mov ar.fcr=r16; \
236 ld8 r16=[r2],CTX(CFLG)-CTX(EFLAG); \
237 ;; \
238 mov ar.eflag=r16; \
239 ld8 r16=[r2],CTX(FSR)-CTX(CFLG); \
240 ;; \
241 mov ar.cflg=r16; \
242 ld8 r16=[r2],CTX(FIR)-CTX(FSR); \
243 ;; \
244 mov ar.fsr=r16; \
245 ld8 r16=[r2],CTX(FDR)-CTX(FIR); \
246 ;; \
247 mov ar.fir=r16; \
248 ld8 r16=[r2],CTX(UNAT)-CTX(FDR); \
249 ;; \
250 mov ar.fdr=r16; \
251 ld8 r16=[r2],CTX(FPSR)-CTX(UNAT); \
252 ;; \
253 mov ar.unat=r16; \
254 ld8 r16=[r2],CTX(PFS)-CTX(FPSR); \
255 ;; \
256 mov ar.fpsr=r16; \
257 ld8 r16=[r2],CTX(LC)-CTX(PFS); \
258 ;; \
259 mov ar.pfs=r16; \
260 ld8 r16=[r2]; \
261 ;; \
262 mov ar.lc=r16; \
263 ;;
264
265 /*
266 * r32: context_t base address
267 */
268#define SAVE_CTL_REGS \
269 add r2 = CTX(DCR),r32; \
270 mov r16 = cr.dcr; \
271 ;; \
272 st8 [r2] = r16,CTX(IVA)-CTX(DCR); \
273 ;; \
274 mov r16 = cr.iva; \
275 ;; \
276 st8 [r2] = r16,CTX(PTA)-CTX(IVA); \
277 ;; \
278 mov r16 = cr.pta; \
279 ;; \
280 st8 [r2] = r16 ; \
281 ;;
282
283 /*
284 * r33: context_t base address
285 */
286#define RESTORE_CTL_REGS \
287 add r2 = CTX(DCR),r33; \
288 ;; \
289 ld8 r16 = [r2],CTX(IVA)-CTX(DCR); \
290 ;; \
291 mov cr.dcr = r16; \
292 dv_serialize_data; \
293 ;; \
294 ld8 r16 = [r2],CTX(PTA)-CTX(IVA); \
295 ;; \
296 mov cr.iva = r16; \
297 dv_serialize_data; \
298 ;; \
299 ld8 r16 = [r2]; \
300 ;; \
301 mov cr.pta = r16; \
302 dv_serialize_data; \
303 ;;
304
305
306 /*
307 * r32: context_t base address
308 */
309#define SAVE_REGION_REGS \
310 add r2=CTX(RR0),r32; \
311 mov r16=rr[r0]; \
312 dep.z r18=1,61,3; \
313 ;; \
314 st8 [r2]=r16,8; \
315 mov r17=rr[r18]; \
316 dep.z r18=2,61,3; \
317 ;; \
318 st8 [r2]=r17,8; \
319 mov r16=rr[r18]; \
320 dep.z r18=3,61,3; \
321 ;; \
322 st8 [r2]=r16,8; \
323 mov r17=rr[r18]; \
324 dep.z r18=4,61,3; \
325 ;; \
326 st8 [r2]=r17,8; \
327 mov r16=rr[r18]; \
328 dep.z r18=5,61,3; \
329 ;; \
330 st8 [r2]=r16,8; \
331 mov r17=rr[r18]; \
332 dep.z r18=7,61,3; \
333 ;; \
334 st8 [r2]=r17,16; \
335 mov r16=rr[r18]; \
336 ;; \
337 st8 [r2]=r16,8; \
338 ;;
339
340 /*
341 * r33:context_t base address
342 */
343#define RESTORE_REGION_REGS \
344 add r2=CTX(RR0),r33;\
345 mov r18=r0; \
346 ;; \
347 ld8 r20=[r2],8; \
348 ;; /* rr0 */ \
349 ld8 r21=[r2],8; \
350 ;; /* rr1 */ \
351 ld8 r22=[r2],8; \
352 ;; /* rr2 */ \
353 ld8 r23=[r2],8; \
354 ;; /* rr3 */ \
355 ld8 r24=[r2],8; \
356 ;; /* rr4 */ \
357 ld8 r25=[r2],16; \
358 ;; /* rr5 */ \
359 ld8 r27=[r2]; \
360 ;; /* rr7 */ \
361 mov rr[r18]=r20; \
362 dep.z r18=1,61,3; \
363 ;; /* rr1 */ \
364 mov rr[r18]=r21; \
365 dep.z r18=2,61,3; \
366 ;; /* rr2 */ \
367 mov rr[r18]=r22; \
368 dep.z r18=3,61,3; \
369 ;; /* rr3 */ \
370 mov rr[r18]=r23; \
371 dep.z r18=4,61,3; \
372 ;; /* rr4 */ \
373 mov rr[r18]=r24; \
374 dep.z r18=5,61,3; \
375 ;; /* rr5 */ \
376 mov rr[r18]=r25; \
377 dep.z r18=7,61,3; \
378 ;; /* rr7 */ \
379 mov rr[r18]=r27; \
380 ;; \
381 srlz.i; \
382 ;;
383
384
385
386 /*
387 * r32: context_t base address
388 * r36~r39:scratch registers
389 */
390#define SAVE_DEBUG_REGS \
391 add r2=CTX(IBR0),r32; \
392 add r3=CTX(DBR0),r32; \
393 mov r16=ibr[r0]; \
394 mov r17=dbr[r0]; \
395 ;; \
396 st8 [r2]=r16,8; \
397 st8 [r3]=r17,8; \
398 add r18=1,r0; \
399 ;; \
400 mov r16=ibr[r18]; \
401 mov r17=dbr[r18]; \
402 ;; \
403 st8 [r2]=r16,8; \
404 st8 [r3]=r17,8; \
405 add r18=2,r0; \
406 ;; \
407 mov r16=ibr[r18]; \
408 mov r17=dbr[r18]; \
409 ;; \
410 st8 [r2]=r16,8; \
411 st8 [r3]=r17,8; \
412 add r18=2,r0; \
413 ;; \
414 mov r16=ibr[r18]; \
415 mov r17=dbr[r18]; \
416 ;; \
417 st8 [r2]=r16,8; \
418 st8 [r3]=r17,8; \
419 add r18=3,r0; \
420 ;; \
421 mov r16=ibr[r18]; \
422 mov r17=dbr[r18]; \
423 ;; \
424 st8 [r2]=r16,8; \
425 st8 [r3]=r17,8; \
426 add r18=4,r0; \
427 ;; \
428 mov r16=ibr[r18]; \
429 mov r17=dbr[r18]; \
430 ;; \
431 st8 [r2]=r16,8; \
432 st8 [r3]=r17,8; \
433 add r18=5,r0; \
434 ;; \
435 mov r16=ibr[r18]; \
436 mov r17=dbr[r18]; \
437 ;; \
438 st8 [r2]=r16,8; \
439 st8 [r3]=r17,8; \
440 add r18=6,r0; \
441 ;; \
442 mov r16=ibr[r18]; \
443 mov r17=dbr[r18]; \
444 ;; \
445 st8 [r2]=r16,8; \
446 st8 [r3]=r17,8; \
447 add r18=7,r0; \
448 ;; \
449 mov r16=ibr[r18]; \
450 mov r17=dbr[r18]; \
451 ;; \
452 st8 [r2]=r16,8; \
453 st8 [r3]=r17,8; \
454 ;;
455
456
457/*
458 * r33: point to context_t structure
459 * ar.lc are corrupted.
460 */
461#define RESTORE_DEBUG_REGS \
462 add r2=CTX(IBR0),r33; \
463 add r3=CTX(DBR0),r33; \
464 mov r16=7; \
465 mov r17=r0; \
466 ;; \
467 mov ar.lc = r16; \
468 ;; \
4691: \
470 ld8 r18=[r2],8; \
471 ld8 r19=[r3],8; \
472 ;; \
473 mov ibr[r17]=r18; \
474 mov dbr[r17]=r19; \
475 ;; \
476 srlz.i; \
477 ;; \
478 add r17=1,r17; \
479 br.cloop.sptk 1b; \
480 ;;
481
482
483 /*
484 * r32: context_t base address
485 */
486#define SAVE_FPU_LOW \
487 add r2=CTX(F2),r32; \
488 add r3=CTX(F3),r32; \
489 ;; \
490 stf.spill.nta [r2]=f2,32; \
491 stf.spill.nta [r3]=f3,32; \
492 ;; \
493 stf.spill.nta [r2]=f4,32; \
494 stf.spill.nta [r3]=f5,32; \
495 ;; \
496 stf.spill.nta [r2]=f6,32; \
497 stf.spill.nta [r3]=f7,32; \
498 ;; \
499 stf.spill.nta [r2]=f8,32; \
500 stf.spill.nta [r3]=f9,32; \
501 ;; \
502 stf.spill.nta [r2]=f10,32; \
503 stf.spill.nta [r3]=f11,32; \
504 ;; \
505 stf.spill.nta [r2]=f12,32; \
506 stf.spill.nta [r3]=f13,32; \
507 ;; \
508 stf.spill.nta [r2]=f14,32; \
509 stf.spill.nta [r3]=f15,32; \
510 ;; \
511 stf.spill.nta [r2]=f16,32; \
512 stf.spill.nta [r3]=f17,32; \
513 ;; \
514 stf.spill.nta [r2]=f18,32; \
515 stf.spill.nta [r3]=f19,32; \
516 ;; \
517 stf.spill.nta [r2]=f20,32; \
518 stf.spill.nta [r3]=f21,32; \
519 ;; \
520 stf.spill.nta [r2]=f22,32; \
521 stf.spill.nta [r3]=f23,32; \
522 ;; \
523 stf.spill.nta [r2]=f24,32; \
524 stf.spill.nta [r3]=f25,32; \
525 ;; \
526 stf.spill.nta [r2]=f26,32; \
527 stf.spill.nta [r3]=f27,32; \
528 ;; \
529 stf.spill.nta [r2]=f28,32; \
530 stf.spill.nta [r3]=f29,32; \
531 ;; \
532 stf.spill.nta [r2]=f30; \
533 stf.spill.nta [r3]=f31; \
534 ;;
535
536 /*
537 * r32: context_t base address
538 */
539#define SAVE_FPU_HIGH \
540 add r2=CTX(F32),r32; \
541 add r3=CTX(F33),r32; \
542 ;; \
543 stf.spill.nta [r2]=f32,32; \
544 stf.spill.nta [r3]=f33,32; \
545 ;; \
546 stf.spill.nta [r2]=f34,32; \
547 stf.spill.nta [r3]=f35,32; \
548 ;; \
549 stf.spill.nta [r2]=f36,32; \
550 stf.spill.nta [r3]=f37,32; \
551 ;; \
552 stf.spill.nta [r2]=f38,32; \
553 stf.spill.nta [r3]=f39,32; \
554 ;; \
555 stf.spill.nta [r2]=f40,32; \
556 stf.spill.nta [r3]=f41,32; \
557 ;; \
558 stf.spill.nta [r2]=f42,32; \
559 stf.spill.nta [r3]=f43,32; \
560 ;; \
561 stf.spill.nta [r2]=f44,32; \
562 stf.spill.nta [r3]=f45,32; \
563 ;; \
564 stf.spill.nta [r2]=f46,32; \
565 stf.spill.nta [r3]=f47,32; \
566 ;; \
567 stf.spill.nta [r2]=f48,32; \
568 stf.spill.nta [r3]=f49,32; \
569 ;; \
570 stf.spill.nta [r2]=f50,32; \
571 stf.spill.nta [r3]=f51,32; \
572 ;; \
573 stf.spill.nta [r2]=f52,32; \
574 stf.spill.nta [r3]=f53,32; \
575 ;; \
576 stf.spill.nta [r2]=f54,32; \
577 stf.spill.nta [r3]=f55,32; \
578 ;; \
579 stf.spill.nta [r2]=f56,32; \
580 stf.spill.nta [r3]=f57,32; \
581 ;; \
582 stf.spill.nta [r2]=f58,32; \
583 stf.spill.nta [r3]=f59,32; \
584 ;; \
585 stf.spill.nta [r2]=f60,32; \
586 stf.spill.nta [r3]=f61,32; \
587 ;; \
588 stf.spill.nta [r2]=f62,32; \
589 stf.spill.nta [r3]=f63,32; \
590 ;; \
591 stf.spill.nta [r2]=f64,32; \
592 stf.spill.nta [r3]=f65,32; \
593 ;; \
594 stf.spill.nta [r2]=f66,32; \
595 stf.spill.nta [r3]=f67,32; \
596 ;; \
597 stf.spill.nta [r2]=f68,32; \
598 stf.spill.nta [r3]=f69,32; \
599 ;; \
600 stf.spill.nta [r2]=f70,32; \
601 stf.spill.nta [r3]=f71,32; \
602 ;; \
603 stf.spill.nta [r2]=f72,32; \
604 stf.spill.nta [r3]=f73,32; \
605 ;; \
606 stf.spill.nta [r2]=f74,32; \
607 stf.spill.nta [r3]=f75,32; \
608 ;; \
609 stf.spill.nta [r2]=f76,32; \
610 stf.spill.nta [r3]=f77,32; \
611 ;; \
612 stf.spill.nta [r2]=f78,32; \
613 stf.spill.nta [r3]=f79,32; \
614 ;; \
615 stf.spill.nta [r2]=f80,32; \
616 stf.spill.nta [r3]=f81,32; \
617 ;; \
618 stf.spill.nta [r2]=f82,32; \
619 stf.spill.nta [r3]=f83,32; \
620 ;; \
621 stf.spill.nta [r2]=f84,32; \
622 stf.spill.nta [r3]=f85,32; \
623 ;; \
624 stf.spill.nta [r2]=f86,32; \
625 stf.spill.nta [r3]=f87,32; \
626 ;; \
627 stf.spill.nta [r2]=f88,32; \
628 stf.spill.nta [r3]=f89,32; \
629 ;; \
630 stf.spill.nta [r2]=f90,32; \
631 stf.spill.nta [r3]=f91,32; \
632 ;; \
633 stf.spill.nta [r2]=f92,32; \
634 stf.spill.nta [r3]=f93,32; \
635 ;; \
636 stf.spill.nta [r2]=f94,32; \
637 stf.spill.nta [r3]=f95,32; \
638 ;; \
639 stf.spill.nta [r2]=f96,32; \
640 stf.spill.nta [r3]=f97,32; \
641 ;; \
642 stf.spill.nta [r2]=f98,32; \
643 stf.spill.nta [r3]=f99,32; \
644 ;; \
645 stf.spill.nta [r2]=f100,32; \
646 stf.spill.nta [r3]=f101,32; \
647 ;; \
648 stf.spill.nta [r2]=f102,32; \
649 stf.spill.nta [r3]=f103,32; \
650 ;; \
651 stf.spill.nta [r2]=f104,32; \
652 stf.spill.nta [r3]=f105,32; \
653 ;; \
654 stf.spill.nta [r2]=f106,32; \
655 stf.spill.nta [r3]=f107,32; \
656 ;; \
657 stf.spill.nta [r2]=f108,32; \
658 stf.spill.nta [r3]=f109,32; \
659 ;; \
660 stf.spill.nta [r2]=f110,32; \
661 stf.spill.nta [r3]=f111,32; \
662 ;; \
663 stf.spill.nta [r2]=f112,32; \
664 stf.spill.nta [r3]=f113,32; \
665 ;; \
666 stf.spill.nta [r2]=f114,32; \
667 stf.spill.nta [r3]=f115,32; \
668 ;; \
669 stf.spill.nta [r2]=f116,32; \
670 stf.spill.nta [r3]=f117,32; \
671 ;; \
672 stf.spill.nta [r2]=f118,32; \
673 stf.spill.nta [r3]=f119,32; \
674 ;; \
675 stf.spill.nta [r2]=f120,32; \
676 stf.spill.nta [r3]=f121,32; \
677 ;; \
678 stf.spill.nta [r2]=f122,32; \
679 stf.spill.nta [r3]=f123,32; \
680 ;; \
681 stf.spill.nta [r2]=f124,32; \
682 stf.spill.nta [r3]=f125,32; \
683 ;; \
684 stf.spill.nta [r2]=f126; \
685 stf.spill.nta [r3]=f127; \
686 ;;
687
688 /*
689 * r33: point to context_t structure
690 */
691#define RESTORE_FPU_LOW \
692 add r2 = CTX(F2), r33; \
693 add r3 = CTX(F3), r33; \
694 ;; \
695 ldf.fill.nta f2 = [r2], 32; \
696 ldf.fill.nta f3 = [r3], 32; \
697 ;; \
698 ldf.fill.nta f4 = [r2], 32; \
699 ldf.fill.nta f5 = [r3], 32; \
700 ;; \
701 ldf.fill.nta f6 = [r2], 32; \
702 ldf.fill.nta f7 = [r3], 32; \
703 ;; \
704 ldf.fill.nta f8 = [r2], 32; \
705 ldf.fill.nta f9 = [r3], 32; \
706 ;; \
707 ldf.fill.nta f10 = [r2], 32; \
708 ldf.fill.nta f11 = [r3], 32; \
709 ;; \
710 ldf.fill.nta f12 = [r2], 32; \
711 ldf.fill.nta f13 = [r3], 32; \
712 ;; \
713 ldf.fill.nta f14 = [r2], 32; \
714 ldf.fill.nta f15 = [r3], 32; \
715 ;; \
716 ldf.fill.nta f16 = [r2], 32; \
717 ldf.fill.nta f17 = [r3], 32; \
718 ;; \
719 ldf.fill.nta f18 = [r2], 32; \
720 ldf.fill.nta f19 = [r3], 32; \
721 ;; \
722 ldf.fill.nta f20 = [r2], 32; \
723 ldf.fill.nta f21 = [r3], 32; \
724 ;; \
725 ldf.fill.nta f22 = [r2], 32; \
726 ldf.fill.nta f23 = [r3], 32; \
727 ;; \
728 ldf.fill.nta f24 = [r2], 32; \
729 ldf.fill.nta f25 = [r3], 32; \
730 ;; \
731 ldf.fill.nta f26 = [r2], 32; \
732 ldf.fill.nta f27 = [r3], 32; \
733 ;; \
734 ldf.fill.nta f28 = [r2], 32; \
735 ldf.fill.nta f29 = [r3], 32; \
736 ;; \
737 ldf.fill.nta f30 = [r2], 32; \
738 ldf.fill.nta f31 = [r3], 32; \
739 ;;
740
741
742
743 /*
744 * r33: point to context_t structure
745 */
746#define RESTORE_FPU_HIGH \
747 add r2 = CTX(F32), r33; \
748 add r3 = CTX(F33), r33; \
749 ;; \
750 ldf.fill.nta f32 = [r2], 32; \
751 ldf.fill.nta f33 = [r3], 32; \
752 ;; \
753 ldf.fill.nta f34 = [r2], 32; \
754 ldf.fill.nta f35 = [r3], 32; \
755 ;; \
756 ldf.fill.nta f36 = [r2], 32; \
757 ldf.fill.nta f37 = [r3], 32; \
758 ;; \
759 ldf.fill.nta f38 = [r2], 32; \
760 ldf.fill.nta f39 = [r3], 32; \
761 ;; \
762 ldf.fill.nta f40 = [r2], 32; \
763 ldf.fill.nta f41 = [r3], 32; \
764 ;; \
765 ldf.fill.nta f42 = [r2], 32; \
766 ldf.fill.nta f43 = [r3], 32; \
767 ;; \
768 ldf.fill.nta f44 = [r2], 32; \
769 ldf.fill.nta f45 = [r3], 32; \
770 ;; \
771 ldf.fill.nta f46 = [r2], 32; \
772 ldf.fill.nta f47 = [r3], 32; \
773 ;; \
774 ldf.fill.nta f48 = [r2], 32; \
775 ldf.fill.nta f49 = [r3], 32; \
776 ;; \
777 ldf.fill.nta f50 = [r2], 32; \
778 ldf.fill.nta f51 = [r3], 32; \
779 ;; \
780 ldf.fill.nta f52 = [r2], 32; \
781 ldf.fill.nta f53 = [r3], 32; \
782 ;; \
783 ldf.fill.nta f54 = [r2], 32; \
784 ldf.fill.nta f55 = [r3], 32; \
785 ;; \
786 ldf.fill.nta f56 = [r2], 32; \
787 ldf.fill.nta f57 = [r3], 32; \
788 ;; \
789 ldf.fill.nta f58 = [r2], 32; \
790 ldf.fill.nta f59 = [r3], 32; \
791 ;; \
792 ldf.fill.nta f60 = [r2], 32; \
793 ldf.fill.nta f61 = [r3], 32; \
794 ;; \
795 ldf.fill.nta f62 = [r2], 32; \
796 ldf.fill.nta f63 = [r3], 32; \
797 ;; \
798 ldf.fill.nta f64 = [r2], 32; \
799 ldf.fill.nta f65 = [r3], 32; \
800 ;; \
801 ldf.fill.nta f66 = [r2], 32; \
802 ldf.fill.nta f67 = [r3], 32; \
803 ;; \
804 ldf.fill.nta f68 = [r2], 32; \
805 ldf.fill.nta f69 = [r3], 32; \
806 ;; \
807 ldf.fill.nta f70 = [r2], 32; \
808 ldf.fill.nta f71 = [r3], 32; \
809 ;; \
810 ldf.fill.nta f72 = [r2], 32; \
811 ldf.fill.nta f73 = [r3], 32; \
812 ;; \
813 ldf.fill.nta f74 = [r2], 32; \
814 ldf.fill.nta f75 = [r3], 32; \
815 ;; \
816 ldf.fill.nta f76 = [r2], 32; \
817 ldf.fill.nta f77 = [r3], 32; \
818 ;; \
819 ldf.fill.nta f78 = [r2], 32; \
820 ldf.fill.nta f79 = [r3], 32; \
821 ;; \
822 ldf.fill.nta f80 = [r2], 32; \
823 ldf.fill.nta f81 = [r3], 32; \
824 ;; \
825 ldf.fill.nta f82 = [r2], 32; \
826 ldf.fill.nta f83 = [r3], 32; \
827 ;; \
828 ldf.fill.nta f84 = [r2], 32; \
829 ldf.fill.nta f85 = [r3], 32; \
830 ;; \
831 ldf.fill.nta f86 = [r2], 32; \
832 ldf.fill.nta f87 = [r3], 32; \
833 ;; \
834 ldf.fill.nta f88 = [r2], 32; \
835 ldf.fill.nta f89 = [r3], 32; \
836 ;; \
837 ldf.fill.nta f90 = [r2], 32; \
838 ldf.fill.nta f91 = [r3], 32; \
839 ;; \
840 ldf.fill.nta f92 = [r2], 32; \
841 ldf.fill.nta f93 = [r3], 32; \
842 ;; \
843 ldf.fill.nta f94 = [r2], 32; \
844 ldf.fill.nta f95 = [r3], 32; \
845 ;; \
846 ldf.fill.nta f96 = [r2], 32; \
847 ldf.fill.nta f97 = [r3], 32; \
848 ;; \
849 ldf.fill.nta f98 = [r2], 32; \
850 ldf.fill.nta f99 = [r3], 32; \
851 ;; \
852 ldf.fill.nta f100 = [r2], 32; \
853 ldf.fill.nta f101 = [r3], 32; \
854 ;; \
855 ldf.fill.nta f102 = [r2], 32; \
856 ldf.fill.nta f103 = [r3], 32; \
857 ;; \
858 ldf.fill.nta f104 = [r2], 32; \
859 ldf.fill.nta f105 = [r3], 32; \
860 ;; \
861 ldf.fill.nta f106 = [r2], 32; \
862 ldf.fill.nta f107 = [r3], 32; \
863 ;; \
864 ldf.fill.nta f108 = [r2], 32; \
865 ldf.fill.nta f109 = [r3], 32; \
866 ;; \
867 ldf.fill.nta f110 = [r2], 32; \
868 ldf.fill.nta f111 = [r3], 32; \
869 ;; \
870 ldf.fill.nta f112 = [r2], 32; \
871 ldf.fill.nta f113 = [r3], 32; \
872 ;; \
873 ldf.fill.nta f114 = [r2], 32; \
874 ldf.fill.nta f115 = [r3], 32; \
875 ;; \
876 ldf.fill.nta f116 = [r2], 32; \
877 ldf.fill.nta f117 = [r3], 32; \
878 ;; \
879 ldf.fill.nta f118 = [r2], 32; \
880 ldf.fill.nta f119 = [r3], 32; \
881 ;; \
882 ldf.fill.nta f120 = [r2], 32; \
883 ldf.fill.nta f121 = [r3], 32; \
884 ;; \
885 ldf.fill.nta f122 = [r2], 32; \
886 ldf.fill.nta f123 = [r3], 32; \
887 ;; \
888 ldf.fill.nta f124 = [r2], 32; \
889 ldf.fill.nta f125 = [r3], 32; \
890 ;; \
891 ldf.fill.nta f126 = [r2], 32; \
892 ldf.fill.nta f127 = [r3], 32; \
893 ;;
894
895 /*
896 * r32: context_t base address
897 */
898#define SAVE_PTK_REGS \
899 add r2=CTX(PKR0), r32; \
900 mov r16=7; \
901 ;; \
902 mov ar.lc=r16; \
903 mov r17=r0; \
904 ;; \
9051: \
906 mov r18=pkr[r17]; \
907 ;; \
908 srlz.i; \
909 ;; \
910 st8 [r2]=r18, 8; \
911 ;; \
912 add r17 =1,r17; \
913 ;; \
914 br.cloop.sptk 1b; \
915 ;;
916
917/*
918 * r33: point to context_t structure
919 * ar.lc are corrupted.
920 */
921#define RESTORE_PTK_REGS \
922 add r2=CTX(PKR0), r33; \
923 mov r16=7; \
924 ;; \
925 mov ar.lc=r16; \
926 mov r17=r0; \
927 ;; \
9281: \
929 ld8 r18=[r2], 8; \
930 ;; \
931 mov pkr[r17]=r18; \
932 ;; \
933 srlz.i; \
934 ;; \
935 add r17 =1,r17; \
936 ;; \
937 br.cloop.sptk 1b; \
938 ;;
939
940
941/*
942 * void vmm_trampoline( context_t * from,
943 * context_t * to)
944 *
945 * from: r32
946 * to: r33
947 * note: interrupt disabled before call this function.
948 */
949GLOBAL_ENTRY(vmm_trampoline)
950 mov r16 = psr
951 adds r2 = CTX(PSR), r32
952 ;;
953 st8 [r2] = r16, 8 // psr
954 mov r17 = pr
955 ;;
956 st8 [r2] = r17, 8 // pr
957 mov r18 = ar.unat
958 ;;
959 st8 [r2] = r18
960 mov r17 = ar.rsc
961 ;;
962 adds r2 = CTX(RSC),r32
963 ;;
964 st8 [r2]= r17
965 mov ar.rsc =0
966 flushrs
967 ;;
968 SAVE_GENERAL_REGS
969 ;;
970 SAVE_KERNEL_REGS
971 ;;
972 SAVE_APP_REGS
973 ;;
974 SAVE_BRANCH_REGS
975 ;;
976 SAVE_CTL_REGS
977 ;;
978 SAVE_REGION_REGS
979 ;;
980 //SAVE_DEBUG_REGS
981 ;;
982 rsm psr.dfl
983 ;;
984 srlz.d
985 ;;
986 SAVE_FPU_LOW
987 ;;
988 rsm psr.dfh
989 ;;
990 srlz.d
991 ;;
992 SAVE_FPU_HIGH
993 ;;
994 SAVE_PTK_REGS
995 ;;
996 RESTORE_PTK_REGS
997 ;;
998 RESTORE_FPU_HIGH
999 ;;
1000 RESTORE_FPU_LOW
1001 ;;
1002 //RESTORE_DEBUG_REGS
1003 ;;
1004 RESTORE_REGION_REGS
1005 ;;
1006 RESTORE_CTL_REGS
1007 ;;
1008 RESTORE_BRANCH_REGS
1009 ;;
1010 RESTORE_APP_REGS
1011 ;;
1012 RESTORE_KERNEL_REGS
1013 ;;
1014 RESTORE_GENERAL_REGS
1015 ;;
1016 adds r2=CTX(PSR), r33
1017 ;;
1018 ld8 r16=[r2], 8 // psr
1019 ;;
1020 mov psr.l=r16
1021 ;;
1022 srlz.d
1023 ;;
1024 ld8 r16=[r2], 8 // pr
1025 ;;
1026 mov pr =r16,-1
1027 ld8 r16=[r2] // unat
1028 ;;
1029 mov ar.unat=r16
1030 ;;
1031 adds r2=CTX(RSC),r33
1032 ;;
1033 ld8 r16 =[r2]
1034 ;;
1035 mov ar.rsc = r16
1036 ;;
1037 br.ret.sptk.few b0
1038END(vmm_trampoline)
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
new file mode 100644
index 000000000000..e44027ce5667
--- /dev/null
+++ b/arch/ia64/kvm/vcpu.c
@@ -0,0 +1,2163 @@
1/*
2 * kvm_vcpu.c: handling all virtual cpu related thing.
3 * Copyright (c) 2005, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 * Shaofan Li (Susue Li) <susie.li@intel.com>
19 * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
20 * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
21 * Xiantao Zhang <xiantao.zhang@intel.com>
22 */
23
24#include <linux/kvm_host.h>
25#include <linux/types.h>
26
27#include <asm/processor.h>
28#include <asm/ia64regs.h>
29#include <asm/gcc_intrin.h>
30#include <asm/kregs.h>
31#include <asm/pgtable.h>
32#include <asm/tlb.h>
33
34#include "asm-offsets.h"
35#include "vcpu.h"
36
37/*
38 * Special notes:
39 * - Index by it/dt/rt sequence
40 * - Only existing mode transitions are allowed in this table
41 * - RSE is placed at lazy mode when emulating guest partial mode
42 * - If gva happens to be rr0 and rr4, only allowed case is identity
43 * mapping (gva=gpa), or panic! (How?)
44 */
45int mm_switch_table[8][8] = {
46 /* 2004/09/12(Kevin): Allow switch to self */
47 /*
48 * (it,dt,rt): (0,0,0) -> (1,1,1)
49 * This kind of transition usually occurs in the very early
50 * stage of Linux boot up procedure. Another case is in efi
51 * and pal calls. (see "arch/ia64/kernel/head.S")
52 *
53 * (it,dt,rt): (0,0,0) -> (0,1,1)
54 * This kind of transition is found when OSYa exits efi boot
55 * service. Due to gva = gpa in this case (Same region),
56 * data access can be satisfied though itlb entry for physical
57 * emulation is hit.
58 */
59 {SW_SELF, 0, 0, SW_NOP, 0, 0, 0, SW_P2V},
60 {0, 0, 0, 0, 0, 0, 0, 0},
61 {0, 0, 0, 0, 0, 0, 0, 0},
62 /*
63 * (it,dt,rt): (0,1,1) -> (1,1,1)
64 * This kind of transition is found in OSYa.
65 *
66 * (it,dt,rt): (0,1,1) -> (0,0,0)
67 * This kind of transition is found in OSYa
68 */
69 {SW_NOP, 0, 0, SW_SELF, 0, 0, 0, SW_P2V},
70 /* (1,0,0)->(1,1,1) */
71 {0, 0, 0, 0, 0, 0, 0, SW_P2V},
72 /*
73 * (it,dt,rt): (1,0,1) -> (1,1,1)
74 * This kind of transition usually occurs when Linux returns
75 * from the low level TLB miss handlers.
76 * (see "arch/ia64/kernel/ivt.S")
77 */
78 {0, 0, 0, 0, 0, SW_SELF, 0, SW_P2V},
79 {0, 0, 0, 0, 0, 0, 0, 0},
80 /*
81 * (it,dt,rt): (1,1,1) -> (1,0,1)
82 * This kind of transition usually occurs in Linux low level
83 * TLB miss handler. (see "arch/ia64/kernel/ivt.S")
84 *
85 * (it,dt,rt): (1,1,1) -> (0,0,0)
86 * This kind of transition usually occurs in pal and efi calls,
87 * which requires running in physical mode.
88 * (see "arch/ia64/kernel/head.S")
89 * (1,1,1)->(1,0,0)
90 */
91
92 {SW_V2P, 0, 0, 0, SW_V2P, SW_V2P, 0, SW_SELF},
93};
94
95void physical_mode_init(struct kvm_vcpu *vcpu)
96{
97 vcpu->arch.mode_flags = GUEST_IN_PHY;
98}
99
100void switch_to_physical_rid(struct kvm_vcpu *vcpu)
101{
102 unsigned long psr;
103
104 /* Save original virtual mode rr[0] and rr[4] */
105 psr = ia64_clear_ic();
106 ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0);
107 ia64_srlz_d();
108 ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4);
109 ia64_srlz_d();
110
111 ia64_set_psr(psr);
112 return;
113}
114
115
116void switch_to_virtual_rid(struct kvm_vcpu *vcpu)
117{
118 unsigned long psr;
119
120 psr = ia64_clear_ic();
121 ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0);
122 ia64_srlz_d();
123 ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4);
124 ia64_srlz_d();
125 ia64_set_psr(psr);
126 return;
127}
128
129static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr)
130{
131 return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)];
132}
133
134void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
135 struct ia64_psr new_psr)
136{
137 int act;
138 act = mm_switch_action(old_psr, new_psr);
139 switch (act) {
140 case SW_V2P:
141 /*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n",
142 old_psr.val, new_psr.val);*/
143 switch_to_physical_rid(vcpu);
144 /*
145 * Set rse to enforced lazy, to prevent active rse
146 *save/restor when guest physical mode.
147 */
148 vcpu->arch.mode_flags |= GUEST_IN_PHY;
149 break;
150 case SW_P2V:
151 switch_to_virtual_rid(vcpu);
152 /*
153 * recover old mode which is saved when entering
154 * guest physical mode
155 */
156 vcpu->arch.mode_flags &= ~GUEST_IN_PHY;
157 break;
158 case SW_SELF:
159 break;
160 case SW_NOP:
161 break;
162 default:
163 /* Sanity check */
164 break;
165 }
166 return;
167}
168
169
170
171/*
172 * In physical mode, insert tc/tr for region 0 and 4 uses
173 * RID[0] and RID[4] which is for physical mode emulation.
174 * However what those inserted tc/tr wants is rid for
175 * virtual mode. So original virtual rid needs to be restored
176 * before insert.
177 *
178 * Operations which required such switch include:
179 * - insertions (itc.*, itr.*)
180 * - purges (ptc.* and ptr.*)
181 * - tpa
182 * - tak
183 * - thash?, ttag?
184 * All above needs actual virtual rid for destination entry.
185 */
186
187void check_mm_mode_switch(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
188 struct ia64_psr new_psr)
189{
190
191 if ((old_psr.dt != new_psr.dt)
192 || (old_psr.it != new_psr.it)
193 || (old_psr.rt != new_psr.rt))
194 switch_mm_mode(vcpu, old_psr, new_psr);
195
196 return;
197}
198
199
200/*
201 * In physical mode, insert tc/tr for region 0 and 4 uses
202 * RID[0] and RID[4] which is for physical mode emulation.
203 * However what those inserted tc/tr wants is rid for
204 * virtual mode. So original virtual rid needs to be restored
205 * before insert.
206 *
207 * Operations which required such switch include:
208 * - insertions (itc.*, itr.*)
209 * - purges (ptc.* and ptr.*)
210 * - tpa
211 * - tak
212 * - thash?, ttag?
213 * All above needs actual virtual rid for destination entry.
214 */
215
216void prepare_if_physical_mode(struct kvm_vcpu *vcpu)
217{
218 if (is_physical_mode(vcpu)) {
219 vcpu->arch.mode_flags |= GUEST_PHY_EMUL;
220 switch_to_virtual_rid(vcpu);
221 }
222 return;
223}
224
225/* Recover always follows prepare */
226void recover_if_physical_mode(struct kvm_vcpu *vcpu)
227{
228 if (is_physical_mode(vcpu))
229 switch_to_physical_rid(vcpu);
230 vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL;
231 return;
232}
233
234#define RPT(x) ((u16) &((struct kvm_pt_regs *)0)->x)
235
236static u16 gr_info[32] = {
237 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */
238 RPT(r1), RPT(r2), RPT(r3),
239 RPT(r4), RPT(r5), RPT(r6), RPT(r7),
240 RPT(r8), RPT(r9), RPT(r10), RPT(r11),
241 RPT(r12), RPT(r13), RPT(r14), RPT(r15),
242 RPT(r16), RPT(r17), RPT(r18), RPT(r19),
243 RPT(r20), RPT(r21), RPT(r22), RPT(r23),
244 RPT(r24), RPT(r25), RPT(r26), RPT(r27),
245 RPT(r28), RPT(r29), RPT(r30), RPT(r31)
246};
247
248#define IA64_FIRST_STACKED_GR 32
249#define IA64_FIRST_ROTATING_FR 32
250
251static inline unsigned long
252rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg)
253{
254 reg += rrb;
255 if (reg >= sor)
256 reg -= sor;
257 return reg;
258}
259
260/*
261 * Return the (rotated) index for floating point register
262 * be in the REGNUM (REGNUM must range from 32-127,
263 * result is in the range from 0-95.
264 */
265static inline unsigned long fph_index(struct kvm_pt_regs *regs,
266 long regnum)
267{
268 unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
269 return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
270}
271
272
273/*
274 * The inverse of the above: given bspstore and the number of
275 * registers, calculate ar.bsp.
276 */
277static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr,
278 long num_regs)
279{
280 long delta = ia64_rse_slot_num(addr) + num_regs;
281 int i = 0;
282
283 if (num_regs < 0)
284 delta -= 0x3e;
285 if (delta < 0) {
286 while (delta <= -0x3f) {
287 i--;
288 delta += 0x3f;
289 }
290 } else {
291 while (delta >= 0x3f) {
292 i++;
293 delta -= 0x3f;
294 }
295 }
296
297 return addr + num_regs + i;
298}
299
300static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
301 unsigned long *val, int *nat)
302{
303 unsigned long *bsp, *addr, *rnat_addr, *bspstore;
304 unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
305 unsigned long nat_mask;
306 unsigned long old_rsc, new_rsc;
307 long sof = (regs->cr_ifs) & 0x7f;
308 long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
309 long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
310 long ridx = r1 - 32;
311
312 if (ridx < sor)
313 ridx = rotate_reg(sor, rrb_gr, ridx);
314
315 old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
316 new_rsc = old_rsc&(~(0x3));
317 ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
318
319 bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
320 bsp = kbs + (regs->loadrs >> 19);
321
322 addr = kvm_rse_skip_regs(bsp, -sof + ridx);
323 nat_mask = 1UL << ia64_rse_slot_num(addr);
324 rnat_addr = ia64_rse_rnat_addr(addr);
325
326 if (addr >= bspstore) {
327 ia64_flushrs();
328 ia64_mf();
329 bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
330 }
331 *val = *addr;
332 if (nat) {
333 if (bspstore < rnat_addr)
334 *nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT)
335 & nat_mask);
336 else
337 *nat = (int)!!((*rnat_addr) & nat_mask);
338 ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
339 }
340}
341
342void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
343 unsigned long val, unsigned long nat)
344{
345 unsigned long *bsp, *bspstore, *addr, *rnat_addr;
346 unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
347 unsigned long nat_mask;
348 unsigned long old_rsc, new_rsc, psr;
349 unsigned long rnat;
350 long sof = (regs->cr_ifs) & 0x7f;
351 long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
352 long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
353 long ridx = r1 - 32;
354
355 if (ridx < sor)
356 ridx = rotate_reg(sor, rrb_gr, ridx);
357
358 old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
359 /* put RSC to lazy mode, and set loadrs 0 */
360 new_rsc = old_rsc & (~0x3fff0003);
361 ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
362 bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */
363
364 addr = kvm_rse_skip_regs(bsp, -sof + ridx);
365 nat_mask = 1UL << ia64_rse_slot_num(addr);
366 rnat_addr = ia64_rse_rnat_addr(addr);
367
368 local_irq_save(psr);
369 bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
370 if (addr >= bspstore) {
371
372 ia64_flushrs();
373 ia64_mf();
374 *addr = val;
375 bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
376 rnat = ia64_getreg(_IA64_REG_AR_RNAT);
377 if (bspstore < rnat_addr)
378 rnat = rnat & (~nat_mask);
379 else
380 *rnat_addr = (*rnat_addr)&(~nat_mask);
381
382 ia64_mf();
383 ia64_loadrs();
384 ia64_setreg(_IA64_REG_AR_RNAT, rnat);
385 } else {
386 rnat = ia64_getreg(_IA64_REG_AR_RNAT);
387 *addr = val;
388 if (bspstore < rnat_addr)
389 rnat = rnat&(~nat_mask);
390 else
391 *rnat_addr = (*rnat_addr) & (~nat_mask);
392
393 ia64_setreg(_IA64_REG_AR_BSPSTORE, bspstore);
394 ia64_setreg(_IA64_REG_AR_RNAT, rnat);
395 }
396 local_irq_restore(psr);
397 ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
398}
399
400void getreg(unsigned long regnum, unsigned long *val,
401 int *nat, struct kvm_pt_regs *regs)
402{
403 unsigned long addr, *unat;
404 if (regnum >= IA64_FIRST_STACKED_GR) {
405 get_rse_reg(regs, regnum, val, nat);
406 return;
407 }
408
409 /*
410 * Now look at registers in [0-31] range and init correct UNAT
411 */
412 addr = (unsigned long)regs;
413 unat = &regs->eml_unat;;
414
415 addr += gr_info[regnum];
416
417 *val = *(unsigned long *)addr;
418 /*
419 * do it only when requested
420 */
421 if (nat)
422 *nat = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL;
423}
424
425void setreg(unsigned long regnum, unsigned long val,
426 int nat, struct kvm_pt_regs *regs)
427{
428 unsigned long addr;
429 unsigned long bitmask;
430 unsigned long *unat;
431
432 /*
433 * First takes care of stacked registers
434 */
435 if (regnum >= IA64_FIRST_STACKED_GR) {
436 set_rse_reg(regs, regnum, val, nat);
437 return;
438 }
439
440 /*
441 * Now look at registers in [0-31] range and init correct UNAT
442 */
443 addr = (unsigned long)regs;
444 unat = &regs->eml_unat;
445 /*
446 * add offset from base of struct
447 * and do it !
448 */
449 addr += gr_info[regnum];
450
451 *(unsigned long *)addr = val;
452
453 /*
454 * We need to clear the corresponding UNAT bit to fully emulate the load
455 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
456 */
457 bitmask = 1UL << ((addr >> 3) & 0x3f);
458 if (nat)
459 *unat |= bitmask;
460 else
461 *unat &= ~bitmask;
462
463}
464
465u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
466{
467 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
468 u64 val;
469
470 if (!reg)
471 return 0;
472 getreg(reg, &val, 0, regs);
473 return val;
474}
475
476void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 value, int nat)
477{
478 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
479 long sof = (regs->cr_ifs) & 0x7f;
480
481 if (!reg)
482 return;
483 if (reg >= sof + 32)
484 return;
485 setreg(reg, value, nat, regs); /* FIXME: handle NATs later*/
486}
487
488void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
489 struct kvm_pt_regs *regs)
490{
491 /* Take floating register rotation into consideration*/
492 if (regnum >= IA64_FIRST_ROTATING_FR)
493 regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
494#define CASE_FIXED_FP(reg) \
495 case (reg) : \
496 ia64_stf_spill(fpval, reg); \
497 break
498
499 switch (regnum) {
500 CASE_FIXED_FP(0);
501 CASE_FIXED_FP(1);
502 CASE_FIXED_FP(2);
503 CASE_FIXED_FP(3);
504 CASE_FIXED_FP(4);
505 CASE_FIXED_FP(5);
506
507 CASE_FIXED_FP(6);
508 CASE_FIXED_FP(7);
509 CASE_FIXED_FP(8);
510 CASE_FIXED_FP(9);
511 CASE_FIXED_FP(10);
512 CASE_FIXED_FP(11);
513
514 CASE_FIXED_FP(12);
515 CASE_FIXED_FP(13);
516 CASE_FIXED_FP(14);
517 CASE_FIXED_FP(15);
518 CASE_FIXED_FP(16);
519 CASE_FIXED_FP(17);
520 CASE_FIXED_FP(18);
521 CASE_FIXED_FP(19);
522 CASE_FIXED_FP(20);
523 CASE_FIXED_FP(21);
524 CASE_FIXED_FP(22);
525 CASE_FIXED_FP(23);
526 CASE_FIXED_FP(24);
527 CASE_FIXED_FP(25);
528 CASE_FIXED_FP(26);
529 CASE_FIXED_FP(27);
530 CASE_FIXED_FP(28);
531 CASE_FIXED_FP(29);
532 CASE_FIXED_FP(30);
533 CASE_FIXED_FP(31);
534 CASE_FIXED_FP(32);
535 CASE_FIXED_FP(33);
536 CASE_FIXED_FP(34);
537 CASE_FIXED_FP(35);
538 CASE_FIXED_FP(36);
539 CASE_FIXED_FP(37);
540 CASE_FIXED_FP(38);
541 CASE_FIXED_FP(39);
542 CASE_FIXED_FP(40);
543 CASE_FIXED_FP(41);
544 CASE_FIXED_FP(42);
545 CASE_FIXED_FP(43);
546 CASE_FIXED_FP(44);
547 CASE_FIXED_FP(45);
548 CASE_FIXED_FP(46);
549 CASE_FIXED_FP(47);
550 CASE_FIXED_FP(48);
551 CASE_FIXED_FP(49);
552 CASE_FIXED_FP(50);
553 CASE_FIXED_FP(51);
554 CASE_FIXED_FP(52);
555 CASE_FIXED_FP(53);
556 CASE_FIXED_FP(54);
557 CASE_FIXED_FP(55);
558 CASE_FIXED_FP(56);
559 CASE_FIXED_FP(57);
560 CASE_FIXED_FP(58);
561 CASE_FIXED_FP(59);
562 CASE_FIXED_FP(60);
563 CASE_FIXED_FP(61);
564 CASE_FIXED_FP(62);
565 CASE_FIXED_FP(63);
566 CASE_FIXED_FP(64);
567 CASE_FIXED_FP(65);
568 CASE_FIXED_FP(66);
569 CASE_FIXED_FP(67);
570 CASE_FIXED_FP(68);
571 CASE_FIXED_FP(69);
572 CASE_FIXED_FP(70);
573 CASE_FIXED_FP(71);
574 CASE_FIXED_FP(72);
575 CASE_FIXED_FP(73);
576 CASE_FIXED_FP(74);
577 CASE_FIXED_FP(75);
578 CASE_FIXED_FP(76);
579 CASE_FIXED_FP(77);
580 CASE_FIXED_FP(78);
581 CASE_FIXED_FP(79);
582 CASE_FIXED_FP(80);
583 CASE_FIXED_FP(81);
584 CASE_FIXED_FP(82);
585 CASE_FIXED_FP(83);
586 CASE_FIXED_FP(84);
587 CASE_FIXED_FP(85);
588 CASE_FIXED_FP(86);
589 CASE_FIXED_FP(87);
590 CASE_FIXED_FP(88);
591 CASE_FIXED_FP(89);
592 CASE_FIXED_FP(90);
593 CASE_FIXED_FP(91);
594 CASE_FIXED_FP(92);
595 CASE_FIXED_FP(93);
596 CASE_FIXED_FP(94);
597 CASE_FIXED_FP(95);
598 CASE_FIXED_FP(96);
599 CASE_FIXED_FP(97);
600 CASE_FIXED_FP(98);
601 CASE_FIXED_FP(99);
602 CASE_FIXED_FP(100);
603 CASE_FIXED_FP(101);
604 CASE_FIXED_FP(102);
605 CASE_FIXED_FP(103);
606 CASE_FIXED_FP(104);
607 CASE_FIXED_FP(105);
608 CASE_FIXED_FP(106);
609 CASE_FIXED_FP(107);
610 CASE_FIXED_FP(108);
611 CASE_FIXED_FP(109);
612 CASE_FIXED_FP(110);
613 CASE_FIXED_FP(111);
614 CASE_FIXED_FP(112);
615 CASE_FIXED_FP(113);
616 CASE_FIXED_FP(114);
617 CASE_FIXED_FP(115);
618 CASE_FIXED_FP(116);
619 CASE_FIXED_FP(117);
620 CASE_FIXED_FP(118);
621 CASE_FIXED_FP(119);
622 CASE_FIXED_FP(120);
623 CASE_FIXED_FP(121);
624 CASE_FIXED_FP(122);
625 CASE_FIXED_FP(123);
626 CASE_FIXED_FP(124);
627 CASE_FIXED_FP(125);
628 CASE_FIXED_FP(126);
629 CASE_FIXED_FP(127);
630 }
631#undef CASE_FIXED_FP
632}
633
634void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
635 struct kvm_pt_regs *regs)
636{
637 /* Take floating register rotation into consideration*/
638 if (regnum >= IA64_FIRST_ROTATING_FR)
639 regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
640
641#define CASE_FIXED_FP(reg) \
642 case (reg) : \
643 ia64_ldf_fill(reg, fpval); \
644 break
645
646 switch (regnum) {
647 CASE_FIXED_FP(2);
648 CASE_FIXED_FP(3);
649 CASE_FIXED_FP(4);
650 CASE_FIXED_FP(5);
651
652 CASE_FIXED_FP(6);
653 CASE_FIXED_FP(7);
654 CASE_FIXED_FP(8);
655 CASE_FIXED_FP(9);
656 CASE_FIXED_FP(10);
657 CASE_FIXED_FP(11);
658
659 CASE_FIXED_FP(12);
660 CASE_FIXED_FP(13);
661 CASE_FIXED_FP(14);
662 CASE_FIXED_FP(15);
663 CASE_FIXED_FP(16);
664 CASE_FIXED_FP(17);
665 CASE_FIXED_FP(18);
666 CASE_FIXED_FP(19);
667 CASE_FIXED_FP(20);
668 CASE_FIXED_FP(21);
669 CASE_FIXED_FP(22);
670 CASE_FIXED_FP(23);
671 CASE_FIXED_FP(24);
672 CASE_FIXED_FP(25);
673 CASE_FIXED_FP(26);
674 CASE_FIXED_FP(27);
675 CASE_FIXED_FP(28);
676 CASE_FIXED_FP(29);
677 CASE_FIXED_FP(30);
678 CASE_FIXED_FP(31);
679 CASE_FIXED_FP(32);
680 CASE_FIXED_FP(33);
681 CASE_FIXED_FP(34);
682 CASE_FIXED_FP(35);
683 CASE_FIXED_FP(36);
684 CASE_FIXED_FP(37);
685 CASE_FIXED_FP(38);
686 CASE_FIXED_FP(39);
687 CASE_FIXED_FP(40);
688 CASE_FIXED_FP(41);
689 CASE_FIXED_FP(42);
690 CASE_FIXED_FP(43);
691 CASE_FIXED_FP(44);
692 CASE_FIXED_FP(45);
693 CASE_FIXED_FP(46);
694 CASE_FIXED_FP(47);
695 CASE_FIXED_FP(48);
696 CASE_FIXED_FP(49);
697 CASE_FIXED_FP(50);
698 CASE_FIXED_FP(51);
699 CASE_FIXED_FP(52);
700 CASE_FIXED_FP(53);
701 CASE_FIXED_FP(54);
702 CASE_FIXED_FP(55);
703 CASE_FIXED_FP(56);
704 CASE_FIXED_FP(57);
705 CASE_FIXED_FP(58);
706 CASE_FIXED_FP(59);
707 CASE_FIXED_FP(60);
708 CASE_FIXED_FP(61);
709 CASE_FIXED_FP(62);
710 CASE_FIXED_FP(63);
711 CASE_FIXED_FP(64);
712 CASE_FIXED_FP(65);
713 CASE_FIXED_FP(66);
714 CASE_FIXED_FP(67);
715 CASE_FIXED_FP(68);
716 CASE_FIXED_FP(69);
717 CASE_FIXED_FP(70);
718 CASE_FIXED_FP(71);
719 CASE_FIXED_FP(72);
720 CASE_FIXED_FP(73);
721 CASE_FIXED_FP(74);
722 CASE_FIXED_FP(75);
723 CASE_FIXED_FP(76);
724 CASE_FIXED_FP(77);
725 CASE_FIXED_FP(78);
726 CASE_FIXED_FP(79);
727 CASE_FIXED_FP(80);
728 CASE_FIXED_FP(81);
729 CASE_FIXED_FP(82);
730 CASE_FIXED_FP(83);
731 CASE_FIXED_FP(84);
732 CASE_FIXED_FP(85);
733 CASE_FIXED_FP(86);
734 CASE_FIXED_FP(87);
735 CASE_FIXED_FP(88);
736 CASE_FIXED_FP(89);
737 CASE_FIXED_FP(90);
738 CASE_FIXED_FP(91);
739 CASE_FIXED_FP(92);
740 CASE_FIXED_FP(93);
741 CASE_FIXED_FP(94);
742 CASE_FIXED_FP(95);
743 CASE_FIXED_FP(96);
744 CASE_FIXED_FP(97);
745 CASE_FIXED_FP(98);
746 CASE_FIXED_FP(99);
747 CASE_FIXED_FP(100);
748 CASE_FIXED_FP(101);
749 CASE_FIXED_FP(102);
750 CASE_FIXED_FP(103);
751 CASE_FIXED_FP(104);
752 CASE_FIXED_FP(105);
753 CASE_FIXED_FP(106);
754 CASE_FIXED_FP(107);
755 CASE_FIXED_FP(108);
756 CASE_FIXED_FP(109);
757 CASE_FIXED_FP(110);
758 CASE_FIXED_FP(111);
759 CASE_FIXED_FP(112);
760 CASE_FIXED_FP(113);
761 CASE_FIXED_FP(114);
762 CASE_FIXED_FP(115);
763 CASE_FIXED_FP(116);
764 CASE_FIXED_FP(117);
765 CASE_FIXED_FP(118);
766 CASE_FIXED_FP(119);
767 CASE_FIXED_FP(120);
768 CASE_FIXED_FP(121);
769 CASE_FIXED_FP(122);
770 CASE_FIXED_FP(123);
771 CASE_FIXED_FP(124);
772 CASE_FIXED_FP(125);
773 CASE_FIXED_FP(126);
774 CASE_FIXED_FP(127);
775 }
776}
777
778void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
779 struct ia64_fpreg *val)
780{
781 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
782
783 getfpreg(reg, val, regs); /* FIXME: handle NATs later*/
784}
785
786void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
787 struct ia64_fpreg *val)
788{
789 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
790
791 if (reg > 1)
792 setfpreg(reg, val, regs); /* FIXME: handle NATs later*/
793}
794
795/************************************************************************
796 * lsapic timer
797 ***********************************************************************/
798u64 vcpu_get_itc(struct kvm_vcpu *vcpu)
799{
800 unsigned long guest_itc;
801 guest_itc = VMX(vcpu, itc_offset) + ia64_getreg(_IA64_REG_AR_ITC);
802
803 if (guest_itc >= VMX(vcpu, last_itc)) {
804 VMX(vcpu, last_itc) = guest_itc;
805 return guest_itc;
806 } else
807 return VMX(vcpu, last_itc);
808}
809
810static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val);
811static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
812{
813 struct kvm_vcpu *v;
814 int i;
815 long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC);
816 unsigned long vitv = VCPU(vcpu, itv);
817
818 if (vcpu->vcpu_id == 0) {
819 for (i = 0; i < MAX_VCPU_NUM; i++) {
820 v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
821 VMX(v, itc_offset) = itc_offset;
822 VMX(v, last_itc) = 0;
823 }
824 }
825 VMX(vcpu, last_itc) = 0;
826 if (VCPU(vcpu, itm) <= val) {
827 VMX(vcpu, itc_check) = 0;
828 vcpu_unpend_interrupt(vcpu, vitv);
829 } else {
830 VMX(vcpu, itc_check) = 1;
831 vcpu_set_itm(vcpu, VCPU(vcpu, itm));
832 }
833
834}
835
836static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu)
837{
838 return ((u64)VCPU(vcpu, itm));
839}
840
841static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val)
842{
843 unsigned long vitv = VCPU(vcpu, itv);
844 VCPU(vcpu, itm) = val;
845
846 if (val > vcpu_get_itc(vcpu)) {
847 VMX(vcpu, itc_check) = 1;
848 vcpu_unpend_interrupt(vcpu, vitv);
849 VMX(vcpu, timer_pending) = 0;
850 } else
851 VMX(vcpu, itc_check) = 0;
852}
853
854#define ITV_VECTOR(itv) (itv&0xff)
855#define ITV_IRQ_MASK(itv) (itv&(1<<16))
856
857static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val)
858{
859 VCPU(vcpu, itv) = val;
860 if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) {
861 vcpu_pend_interrupt(vcpu, ITV_VECTOR(val));
862 vcpu->arch.timer_pending = 0;
863 }
864}
865
866static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val)
867{
868 int vec;
869
870 vec = highest_inservice_irq(vcpu);
871 if (vec == NULL_VECTOR)
872 return;
873 VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63));
874 VCPU(vcpu, eoi) = 0;
875 vcpu->arch.irq_new_pending = 1;
876
877}
878
879/* See Table 5-8 in SDM vol2 for the definition */
880int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice)
881{
882 union ia64_tpr vtpr;
883
884 vtpr.val = VCPU(vcpu, tpr);
885
886 if (h_inservice == NMI_VECTOR)
887 return IRQ_MASKED_BY_INSVC;
888
889 if (h_pending == NMI_VECTOR) {
890 /* Non Maskable Interrupt */
891 return IRQ_NO_MASKED;
892 }
893
894 if (h_inservice == ExtINT_VECTOR)
895 return IRQ_MASKED_BY_INSVC;
896
897 if (h_pending == ExtINT_VECTOR) {
898 if (vtpr.mmi) {
899 /* mask all external IRQ */
900 return IRQ_MASKED_BY_VTPR;
901 } else
902 return IRQ_NO_MASKED;
903 }
904
905 if (is_higher_irq(h_pending, h_inservice)) {
906 if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4)))
907 return IRQ_NO_MASKED;
908 else
909 return IRQ_MASKED_BY_VTPR;
910 } else {
911 return IRQ_MASKED_BY_INSVC;
912 }
913}
914
915void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
916{
917 long spsr;
918 int ret;
919
920 local_irq_save(spsr);
921 ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0]));
922 local_irq_restore(spsr);
923
924 vcpu->arch.irq_new_pending = 1;
925}
926
927void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
928{
929 long spsr;
930 int ret;
931
932 local_irq_save(spsr);
933 ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0]));
934 local_irq_restore(spsr);
935 if (ret) {
936 vcpu->arch.irq_new_pending = 1;
937 wmb();
938 }
939}
940
941void update_vhpi(struct kvm_vcpu *vcpu, int vec)
942{
943 u64 vhpi;
944
945 if (vec == NULL_VECTOR)
946 vhpi = 0;
947 else if (vec == NMI_VECTOR)
948 vhpi = 32;
949 else if (vec == ExtINT_VECTOR)
950 vhpi = 16;
951 else
952 vhpi = vec >> 4;
953
954 VCPU(vcpu, vhpi) = vhpi;
955 if (VCPU(vcpu, vac).a_int)
956 ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT,
957 (u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0);
958}
959
960u64 vcpu_get_ivr(struct kvm_vcpu *vcpu)
961{
962 int vec, h_inservice, mask;
963
964 vec = highest_pending_irq(vcpu);
965 h_inservice = highest_inservice_irq(vcpu);
966 mask = irq_masked(vcpu, vec, h_inservice);
967 if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) {
968 if (VCPU(vcpu, vhpi))
969 update_vhpi(vcpu, NULL_VECTOR);
970 return IA64_SPURIOUS_INT_VECTOR;
971 }
972 if (mask == IRQ_MASKED_BY_VTPR) {
973 update_vhpi(vcpu, vec);
974 return IA64_SPURIOUS_INT_VECTOR;
975 }
976 VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63));
977 vcpu_unpend_interrupt(vcpu, vec);
978 return (u64)vec;
979}
980
981/**************************************************************************
982 Privileged operation emulation routines
983 **************************************************************************/
984u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr)
985{
986 union ia64_pta vpta;
987 union ia64_rr vrr;
988 u64 pval;
989 u64 vhpt_offset;
990
991 vpta.val = vcpu_get_pta(vcpu);
992 vrr.val = vcpu_get_rr(vcpu, vadr);
993 vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1);
994 if (vpta.vf) {
995 pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val,
996 vpta.val, 0, 0, 0, 0);
997 } else {
998 pval = (vadr & VRN_MASK) | vhpt_offset |
999 (vpta.val << 3 >> (vpta.size + 3) << (vpta.size));
1000 }
1001 return pval;
1002}
1003
1004u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr)
1005{
1006 union ia64_rr vrr;
1007 union ia64_pta vpta;
1008 u64 pval;
1009
1010 vpta.val = vcpu_get_pta(vcpu);
1011 vrr.val = vcpu_get_rr(vcpu, vadr);
1012 if (vpta.vf) {
1013 pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val,
1014 0, 0, 0, 0, 0);
1015 } else
1016 pval = 1;
1017
1018 return pval;
1019}
1020
1021u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr)
1022{
1023 struct thash_data *data;
1024 union ia64_pta vpta;
1025 u64 key;
1026
1027 vpta.val = vcpu_get_pta(vcpu);
1028 if (vpta.vf == 0) {
1029 key = 1;
1030 return key;
1031 }
1032 data = vtlb_lookup(vcpu, vadr, D_TLB);
1033 if (!data || !data->p)
1034 key = 1;
1035 else
1036 key = data->key;
1037
1038 return key;
1039}
1040
1041
1042
1043void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
1044{
1045 unsigned long thash, vadr;
1046
1047 vadr = vcpu_get_gr(vcpu, inst.M46.r3);
1048 thash = vcpu_thash(vcpu, vadr);
1049 vcpu_set_gr(vcpu, inst.M46.r1, thash, 0);
1050}
1051
1052
1053void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
1054{
1055 unsigned long tag, vadr;
1056
1057 vadr = vcpu_get_gr(vcpu, inst.M46.r3);
1058 tag = vcpu_ttag(vcpu, vadr);
1059 vcpu_set_gr(vcpu, inst.M46.r1, tag, 0);
1060}
1061
1062int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr)
1063{
1064 struct thash_data *data;
1065 union ia64_isr visr, pt_isr;
1066 struct kvm_pt_regs *regs;
1067 struct ia64_psr vpsr;
1068
1069 regs = vcpu_regs(vcpu);
1070 pt_isr.val = VMX(vcpu, cr_isr);
1071 visr.val = 0;
1072 visr.ei = pt_isr.ei;
1073 visr.ir = pt_isr.ir;
1074 vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
1075 visr.na = 1;
1076
1077 data = vhpt_lookup(vadr);
1078 if (data) {
1079 if (data->p == 0) {
1080 vcpu_set_isr(vcpu, visr.val);
1081 data_page_not_present(vcpu, vadr);
1082 return IA64_FAULT;
1083 } else if (data->ma == VA_MATTR_NATPAGE) {
1084 vcpu_set_isr(vcpu, visr.val);
1085 dnat_page_consumption(vcpu, vadr);
1086 return IA64_FAULT;
1087 } else {
1088 *padr = (data->gpaddr >> data->ps << data->ps) |
1089 (vadr & (PSIZE(data->ps) - 1));
1090 return IA64_NO_FAULT;
1091 }
1092 }
1093
1094 data = vtlb_lookup(vcpu, vadr, D_TLB);
1095 if (data) {
1096 if (data->p == 0) {
1097 vcpu_set_isr(vcpu, visr.val);
1098 data_page_not_present(vcpu, vadr);
1099 return IA64_FAULT;
1100 } else if (data->ma == VA_MATTR_NATPAGE) {
1101 vcpu_set_isr(vcpu, visr.val);
1102 dnat_page_consumption(vcpu, vadr);
1103 return IA64_FAULT;
1104 } else{
1105 *padr = ((data->ppn >> (data->ps - 12)) << data->ps)
1106 | (vadr & (PSIZE(data->ps) - 1));
1107 return IA64_NO_FAULT;
1108 }
1109 }
1110 if (!vhpt_enabled(vcpu, vadr, NA_REF)) {
1111 if (vpsr.ic) {
1112 vcpu_set_isr(vcpu, visr.val);
1113 alt_dtlb(vcpu, vadr);
1114 return IA64_FAULT;
1115 } else {
1116 nested_dtlb(vcpu);
1117 return IA64_FAULT;
1118 }
1119 } else {
1120 if (vpsr.ic) {
1121 vcpu_set_isr(vcpu, visr.val);
1122 dvhpt_fault(vcpu, vadr);
1123 return IA64_FAULT;
1124 } else{
1125 nested_dtlb(vcpu);
1126 return IA64_FAULT;
1127 }
1128 }
1129
1130 return IA64_NO_FAULT;
1131}
1132
1133
1134int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst)
1135{
1136 unsigned long r1, r3;
1137
1138 r3 = vcpu_get_gr(vcpu, inst.M46.r3);
1139
1140 if (vcpu_tpa(vcpu, r3, &r1))
1141 return IA64_FAULT;
1142
1143 vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
1144 return(IA64_NO_FAULT);
1145}
1146
1147void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst)
1148{
1149 unsigned long r1, r3;
1150
1151 r3 = vcpu_get_gr(vcpu, inst.M46.r3);
1152 r1 = vcpu_tak(vcpu, r3);
1153 vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
1154}
1155
1156
1157/************************************
1158 * Insert/Purge translation register/cache
1159 ************************************/
1160void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
1161{
1162 thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB);
1163}
1164
1165void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
1166{
1167 thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB);
1168}
1169
1170void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
1171{
1172 u64 ps, va, rid;
1173 struct thash_data *p_itr;
1174
1175 ps = itir_ps(itir);
1176 va = PAGEALIGN(ifa, ps);
1177 pte &= ~PAGE_FLAGS_RV_MASK;
1178 rid = vcpu_get_rr(vcpu, ifa);
1179 rid = rid & RR_RID_MASK;
1180 p_itr = (struct thash_data *)&vcpu->arch.itrs[slot];
1181 vcpu_set_tr(p_itr, pte, itir, va, rid);
1182 vcpu_quick_region_set(VMX(vcpu, itr_regions), va);
1183}
1184
1185
1186void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
1187{
1188 u64 gpfn;
1189 u64 ps, va, rid;
1190 struct thash_data *p_dtr;
1191
1192 ps = itir_ps(itir);
1193 va = PAGEALIGN(ifa, ps);
1194 pte &= ~PAGE_FLAGS_RV_MASK;
1195
1196 if (ps != _PAGE_SIZE_16M)
1197 thash_purge_entries(vcpu, va, ps);
1198 gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
1199 if (__gpfn_is_io(gpfn))
1200 pte |= VTLB_PTE_IO;
1201 rid = vcpu_get_rr(vcpu, va);
1202 rid = rid & RR_RID_MASK;
1203 p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot];
1204 vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot],
1205 pte, itir, va, rid);
1206 vcpu_quick_region_set(VMX(vcpu, dtr_regions), va);
1207}
1208
1209void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
1210{
1211 int index;
1212 u64 va;
1213
1214 va = PAGEALIGN(ifa, ps);
1215 while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0)
1216 vcpu->arch.dtrs[index].page_flags = 0;
1217
1218 thash_purge_entries(vcpu, va, ps);
1219}
1220
1221void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
1222{
1223 int index;
1224 u64 va;
1225
1226 va = PAGEALIGN(ifa, ps);
1227 while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0)
1228 vcpu->arch.itrs[index].page_flags = 0;
1229
1230 thash_purge_entries(vcpu, va, ps);
1231}
1232
1233void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps)
1234{
1235 va = PAGEALIGN(va, ps);
1236 thash_purge_entries(vcpu, va, ps);
1237}
1238
1239void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va)
1240{
1241 thash_purge_all(vcpu);
1242}
1243
1244void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps)
1245{
1246 struct exit_ctl_data *p = &vcpu->arch.exit_data;
1247 long psr;
1248 local_irq_save(psr);
1249 p->exit_reason = EXIT_REASON_PTC_G;
1250
1251 p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va);
1252 p->u.ptc_g_data.vaddr = va;
1253 p->u.ptc_g_data.ps = ps;
1254 vmm_transition(vcpu);
1255 /* Do Local Purge Here*/
1256 vcpu_ptc_l(vcpu, va, ps);
1257 local_irq_restore(psr);
1258}
1259
1260
1261void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps)
1262{
1263 vcpu_ptc_ga(vcpu, va, ps);
1264}
1265
1266void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst)
1267{
1268 unsigned long ifa;
1269
1270 ifa = vcpu_get_gr(vcpu, inst.M45.r3);
1271 vcpu_ptc_e(vcpu, ifa);
1272}
1273
1274void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst)
1275{
1276 unsigned long ifa, itir;
1277
1278 ifa = vcpu_get_gr(vcpu, inst.M45.r3);
1279 itir = vcpu_get_gr(vcpu, inst.M45.r2);
1280 vcpu_ptc_g(vcpu, ifa, itir_ps(itir));
1281}
1282
1283void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst)
1284{
1285 unsigned long ifa, itir;
1286
1287 ifa = vcpu_get_gr(vcpu, inst.M45.r3);
1288 itir = vcpu_get_gr(vcpu, inst.M45.r2);
1289 vcpu_ptc_ga(vcpu, ifa, itir_ps(itir));
1290}
1291
1292void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst)
1293{
1294 unsigned long ifa, itir;
1295
1296 ifa = vcpu_get_gr(vcpu, inst.M45.r3);
1297 itir = vcpu_get_gr(vcpu, inst.M45.r2);
1298 vcpu_ptc_l(vcpu, ifa, itir_ps(itir));
1299}
1300
1301void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst)
1302{
1303 unsigned long ifa, itir;
1304
1305 ifa = vcpu_get_gr(vcpu, inst.M45.r3);
1306 itir = vcpu_get_gr(vcpu, inst.M45.r2);
1307 vcpu_ptr_d(vcpu, ifa, itir_ps(itir));
1308}
1309
1310void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst)
1311{
1312 unsigned long ifa, itir;
1313
1314 ifa = vcpu_get_gr(vcpu, inst.M45.r3);
1315 itir = vcpu_get_gr(vcpu, inst.M45.r2);
1316 vcpu_ptr_i(vcpu, ifa, itir_ps(itir));
1317}
1318
1319void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst)
1320{
1321 unsigned long itir, ifa, pte, slot;
1322
1323 slot = vcpu_get_gr(vcpu, inst.M45.r3);
1324 pte = vcpu_get_gr(vcpu, inst.M45.r2);
1325 itir = vcpu_get_itir(vcpu);
1326 ifa = vcpu_get_ifa(vcpu);
1327 vcpu_itr_d(vcpu, slot, pte, itir, ifa);
1328}
1329
1330
1331
1332void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst)
1333{
1334 unsigned long itir, ifa, pte, slot;
1335
1336 slot = vcpu_get_gr(vcpu, inst.M45.r3);
1337 pte = vcpu_get_gr(vcpu, inst.M45.r2);
1338 itir = vcpu_get_itir(vcpu);
1339 ifa = vcpu_get_ifa(vcpu);
1340 vcpu_itr_i(vcpu, slot, pte, itir, ifa);
1341}
1342
1343void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst)
1344{
1345 unsigned long itir, ifa, pte;
1346
1347 itir = vcpu_get_itir(vcpu);
1348 ifa = vcpu_get_ifa(vcpu);
1349 pte = vcpu_get_gr(vcpu, inst.M45.r2);
1350 vcpu_itc_d(vcpu, pte, itir, ifa);
1351}
1352
1353void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst)
1354{
1355 unsigned long itir, ifa, pte;
1356
1357 itir = vcpu_get_itir(vcpu);
1358 ifa = vcpu_get_ifa(vcpu);
1359 pte = vcpu_get_gr(vcpu, inst.M45.r2);
1360 vcpu_itc_i(vcpu, pte, itir, ifa);
1361}
1362
1363/*************************************
1364 * Moves to semi-privileged registers
1365 *************************************/
1366
1367void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst)
1368{
1369 unsigned long imm;
1370
1371 if (inst.M30.s)
1372 imm = -inst.M30.imm;
1373 else
1374 imm = inst.M30.imm;
1375
1376 vcpu_set_itc(vcpu, imm);
1377}
1378
1379void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
1380{
1381 unsigned long r2;
1382
1383 r2 = vcpu_get_gr(vcpu, inst.M29.r2);
1384 vcpu_set_itc(vcpu, r2);
1385}
1386
1387
1388void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
1389{
1390 unsigned long r1;
1391
1392 r1 = vcpu_get_itc(vcpu);
1393 vcpu_set_gr(vcpu, inst.M31.r1, r1, 0);
1394}
1395/**************************************************************************
1396 struct kvm_vcpu*protection key register access routines
1397 **************************************************************************/
1398
1399unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg)
1400{
1401 return ((unsigned long)ia64_get_pkr(reg));
1402}
1403
1404void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val)
1405{
1406 ia64_set_pkr(reg, val);
1407}
1408
1409
1410unsigned long vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, unsigned long ifa)
1411{
1412 union ia64_rr rr, rr1;
1413
1414 rr.val = vcpu_get_rr(vcpu, ifa);
1415 rr1.val = 0;
1416 rr1.ps = rr.ps;
1417 rr1.rid = rr.rid;
1418 return (rr1.val);
1419}
1420
1421
1422
1423/********************************
1424 * Moves to privileged registers
1425 ********************************/
1426unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg,
1427 unsigned long val)
1428{
1429 union ia64_rr oldrr, newrr;
1430 unsigned long rrval;
1431 struct exit_ctl_data *p = &vcpu->arch.exit_data;
1432 unsigned long psr;
1433
1434 oldrr.val = vcpu_get_rr(vcpu, reg);
1435 newrr.val = val;
1436 vcpu->arch.vrr[reg >> VRN_SHIFT] = val;
1437
1438 switch ((unsigned long)(reg >> VRN_SHIFT)) {
1439 case VRN6:
1440 vcpu->arch.vmm_rr = vrrtomrr(val);
1441 local_irq_save(psr);
1442 p->exit_reason = EXIT_REASON_SWITCH_RR6;
1443 vmm_transition(vcpu);
1444 local_irq_restore(psr);
1445 break;
1446 case VRN4:
1447 rrval = vrrtomrr(val);
1448 vcpu->arch.metaphysical_saved_rr4 = rrval;
1449 if (!is_physical_mode(vcpu))
1450 ia64_set_rr(reg, rrval);
1451 break;
1452 case VRN0:
1453 rrval = vrrtomrr(val);
1454 vcpu->arch.metaphysical_saved_rr0 = rrval;
1455 if (!is_physical_mode(vcpu))
1456 ia64_set_rr(reg, rrval);
1457 break;
1458 default:
1459 ia64_set_rr(reg, vrrtomrr(val));
1460 break;
1461 }
1462
1463 return (IA64_NO_FAULT);
1464}
1465
1466
1467
1468void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst)
1469{
1470 unsigned long r3, r2;
1471
1472 r3 = vcpu_get_gr(vcpu, inst.M42.r3);
1473 r2 = vcpu_get_gr(vcpu, inst.M42.r2);
1474 vcpu_set_rr(vcpu, r3, r2);
1475}
1476
1477void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst)
1478{
1479}
1480
1481void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst)
1482{
1483}
1484
1485void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst)
1486{
1487 unsigned long r3, r2;
1488
1489 r3 = vcpu_get_gr(vcpu, inst.M42.r3);
1490 r2 = vcpu_get_gr(vcpu, inst.M42.r2);
1491 vcpu_set_pmc(vcpu, r3, r2);
1492}
1493
1494void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst)
1495{
1496 unsigned long r3, r2;
1497
1498 r3 = vcpu_get_gr(vcpu, inst.M42.r3);
1499 r2 = vcpu_get_gr(vcpu, inst.M42.r2);
1500 vcpu_set_pmd(vcpu, r3, r2);
1501}
1502
1503void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst)
1504{
1505 u64 r3, r2;
1506
1507 r3 = vcpu_get_gr(vcpu, inst.M42.r3);
1508 r2 = vcpu_get_gr(vcpu, inst.M42.r2);
1509 vcpu_set_pkr(vcpu, r3, r2);
1510}
1511
1512
1513
1514void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst)
1515{
1516 unsigned long r3, r1;
1517
1518 r3 = vcpu_get_gr(vcpu, inst.M43.r3);
1519 r1 = vcpu_get_rr(vcpu, r3);
1520 vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
1521}
1522
1523void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst)
1524{
1525 unsigned long r3, r1;
1526
1527 r3 = vcpu_get_gr(vcpu, inst.M43.r3);
1528 r1 = vcpu_get_pkr(vcpu, r3);
1529 vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
1530}
1531
1532void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst)
1533{
1534 unsigned long r3, r1;
1535
1536 r3 = vcpu_get_gr(vcpu, inst.M43.r3);
1537 r1 = vcpu_get_dbr(vcpu, r3);
1538 vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
1539}
1540
1541void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst)
1542{
1543 unsigned long r3, r1;
1544
1545 r3 = vcpu_get_gr(vcpu, inst.M43.r3);
1546 r1 = vcpu_get_ibr(vcpu, r3);
1547 vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
1548}
1549
1550void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst)
1551{
1552 unsigned long r3, r1;
1553
1554 r3 = vcpu_get_gr(vcpu, inst.M43.r3);
1555 r1 = vcpu_get_pmc(vcpu, r3);
1556 vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
1557}
1558
1559
1560unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg)
1561{
1562 /* FIXME: This could get called as a result of a rsvd-reg fault */
1563 if (reg > (ia64_get_cpuid(3) & 0xff))
1564 return 0;
1565 else
1566 return ia64_get_cpuid(reg);
1567}
1568
1569void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst)
1570{
1571 unsigned long r3, r1;
1572
1573 r3 = vcpu_get_gr(vcpu, inst.M43.r3);
1574 r1 = vcpu_get_cpuid(vcpu, r3);
1575 vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
1576}
1577
1578void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val)
1579{
1580 VCPU(vcpu, tpr) = val;
1581 vcpu->arch.irq_check = 1;
1582}
1583
1584unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst)
1585{
1586 unsigned long r2;
1587
1588 r2 = vcpu_get_gr(vcpu, inst.M32.r2);
1589 VCPU(vcpu, vcr[inst.M32.cr3]) = r2;
1590
1591 switch (inst.M32.cr3) {
1592 case 0:
1593 vcpu_set_dcr(vcpu, r2);
1594 break;
1595 case 1:
1596 vcpu_set_itm(vcpu, r2);
1597 break;
1598 case 66:
1599 vcpu_set_tpr(vcpu, r2);
1600 break;
1601 case 67:
1602 vcpu_set_eoi(vcpu, r2);
1603 break;
1604 default:
1605 break;
1606 }
1607
1608 return 0;
1609}
1610
1611
1612unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
1613{
1614 unsigned long tgt = inst.M33.r1;
1615 unsigned long val;
1616
1617 switch (inst.M33.cr3) {
1618 case 65:
1619 val = vcpu_get_ivr(vcpu);
1620 vcpu_set_gr(vcpu, tgt, val, 0);
1621 break;
1622
1623 case 67:
1624 vcpu_set_gr(vcpu, tgt, 0L, 0);
1625 break;
1626 default:
1627 val = VCPU(vcpu, vcr[inst.M33.cr3]);
1628 vcpu_set_gr(vcpu, tgt, val, 0);
1629 break;
1630 }
1631
1632 return 0;
1633}
1634
1635
1636
1637void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
1638{
1639
1640 unsigned long mask;
1641 struct kvm_pt_regs *regs;
1642 struct ia64_psr old_psr, new_psr;
1643
1644 old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
1645
1646 regs = vcpu_regs(vcpu);
1647 /* We only support guest as:
1648 * vpsr.pk = 0
1649 * vpsr.is = 0
1650 * Otherwise panic
1651 */
1652 if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
1653 panic_vm(vcpu);
1654
1655 /*
1656 * For those IA64_PSR bits: id/da/dd/ss/ed/ia
1657 * Since these bits will become 0, after success execution of each
1658 * instruction, we will change set them to mIA64_PSR
1659 */
1660 VCPU(vcpu, vpsr) = val
1661 & (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD |
1662 IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA));
1663
1664 if (!old_psr.i && (val & IA64_PSR_I)) {
1665 /* vpsr.i 0->1 */
1666 vcpu->arch.irq_check = 1;
1667 }
1668 new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
1669
1670 /*
1671 * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr)
1672 * , except for the following bits:
1673 * ic/i/dt/si/rt/mc/it/bn/vm
1674 */
1675 mask = IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI +
1676 IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN +
1677 IA64_PSR_VM;
1678
1679 regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask));
1680
1681 check_mm_mode_switch(vcpu, old_psr, new_psr);
1682
1683 return ;
1684}
1685
1686unsigned long vcpu_cover(struct kvm_vcpu *vcpu)
1687{
1688 struct ia64_psr vpsr;
1689
1690 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1691 vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
1692
1693 if (!vpsr.ic)
1694 VCPU(vcpu, ifs) = regs->cr_ifs;
1695 regs->cr_ifs = IA64_IFS_V;
1696 return (IA64_NO_FAULT);
1697}
1698
1699
1700
1701/**************************************************************************
1702 VCPU banked general register access routines
1703 **************************************************************************/
1704#define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \
1705 do { \
1706 __asm__ __volatile__ ( \
1707 ";;extr.u %0 = %3,%6,16;;\n" \
1708 "dep %1 = %0, %1, 0, 16;;\n" \
1709 "st8 [%4] = %1\n" \
1710 "extr.u %0 = %2, 16, 16;;\n" \
1711 "dep %3 = %0, %3, %6, 16;;\n" \
1712 "st8 [%5] = %3\n" \
1713 ::"r"(i), "r"(*b1unat), "r"(*b0unat), \
1714 "r"(*runat), "r"(b1unat), "r"(runat), \
1715 "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \
1716 } while (0)
1717
1718void vcpu_bsw0(struct kvm_vcpu *vcpu)
1719{
1720 unsigned long i;
1721
1722 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1723 unsigned long *r = &regs->r16;
1724 unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
1725 unsigned long *b1 = &VCPU(vcpu, vgr[0]);
1726 unsigned long *runat = &regs->eml_unat;
1727 unsigned long *b0unat = &VCPU(vcpu, vbnat);
1728 unsigned long *b1unat = &VCPU(vcpu, vnat);
1729
1730
1731 if (VCPU(vcpu, vpsr) & IA64_PSR_BN) {
1732 for (i = 0; i < 16; i++) {
1733 *b1++ = *r;
1734 *r++ = *b0++;
1735 }
1736 vcpu_bsw0_unat(i, b0unat, b1unat, runat,
1737 VMM_PT_REGS_R16_SLOT);
1738 VCPU(vcpu, vpsr) &= ~IA64_PSR_BN;
1739 }
1740}
1741
1742#define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \
1743 do { \
1744 __asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n" \
1745 "dep %1 = %0, %1, 16, 16;;\n" \
1746 "st8 [%4] = %1\n" \
1747 "extr.u %0 = %2, 0, 16;;\n" \
1748 "dep %3 = %0, %3, %6, 16;;\n" \
1749 "st8 [%5] = %3\n" \
1750 ::"r"(i), "r"(*b0unat), "r"(*b1unat), \
1751 "r"(*runat), "r"(b0unat), "r"(runat), \
1752 "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \
1753 } while (0)
1754
1755void vcpu_bsw1(struct kvm_vcpu *vcpu)
1756{
1757 unsigned long i;
1758 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1759 unsigned long *r = &regs->r16;
1760 unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
1761 unsigned long *b1 = &VCPU(vcpu, vgr[0]);
1762 unsigned long *runat = &regs->eml_unat;
1763 unsigned long *b0unat = &VCPU(vcpu, vbnat);
1764 unsigned long *b1unat = &VCPU(vcpu, vnat);
1765
1766 if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) {
1767 for (i = 0; i < 16; i++) {
1768 *b0++ = *r;
1769 *r++ = *b1++;
1770 }
1771 vcpu_bsw1_unat(i, b0unat, b1unat, runat,
1772 VMM_PT_REGS_R16_SLOT);
1773 VCPU(vcpu, vpsr) |= IA64_PSR_BN;
1774 }
1775}
1776
1777
1778
1779
1780void vcpu_rfi(struct kvm_vcpu *vcpu)
1781{
1782 unsigned long ifs, psr;
1783 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1784
1785 psr = VCPU(vcpu, ipsr);
1786 if (psr & IA64_PSR_BN)
1787 vcpu_bsw1(vcpu);
1788 else
1789 vcpu_bsw0(vcpu);
1790 vcpu_set_psr(vcpu, psr);
1791 ifs = VCPU(vcpu, ifs);
1792 if (ifs >> 63)
1793 regs->cr_ifs = ifs;
1794 regs->cr_iip = VCPU(vcpu, iip);
1795}
1796
1797
1798/*
1799 VPSR can't keep track of below bits of guest PSR
1800 This function gets guest PSR
1801 */
1802
1803unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu)
1804{
1805 unsigned long mask;
1806 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1807
1808 mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL |
1809 IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI;
1810 return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask);
1811}
1812
1813void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst)
1814{
1815 unsigned long vpsr;
1816 unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21)
1817 | inst.M44.imm;
1818
1819 vpsr = vcpu_get_psr(vcpu);
1820 vpsr &= (~imm24);
1821 vcpu_set_psr(vcpu, vpsr);
1822}
1823
1824void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst)
1825{
1826 unsigned long vpsr;
1827 unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21)
1828 | inst.M44.imm;
1829
1830 vpsr = vcpu_get_psr(vcpu);
1831 vpsr |= imm24;
1832 vcpu_set_psr(vcpu, vpsr);
1833}
1834
1835/* Generate Mask
1836 * Parameter:
1837 * bit -- starting bit
1838 * len -- how many bits
1839 */
1840#define MASK(bit,len) \
1841({ \
1842 __u64 ret; \
1843 \
1844 __asm __volatile("dep %0=-1, r0, %1, %2"\
1845 : "=r" (ret): \
1846 "M" (bit), \
1847 "M" (len)); \
1848 ret; \
1849})
1850
1851void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val)
1852{
1853 val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32));
1854 vcpu_set_psr(vcpu, val);
1855}
1856
1857void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst)
1858{
1859 unsigned long val;
1860
1861 val = vcpu_get_gr(vcpu, inst.M35.r2);
1862 vcpu_set_psr_l(vcpu, val);
1863}
1864
1865void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst)
1866{
1867 unsigned long val;
1868
1869 val = vcpu_get_psr(vcpu);
1870 val = (val & MASK(0, 32)) | (val & MASK(35, 2));
1871 vcpu_set_gr(vcpu, inst.M33.r1, val, 0);
1872}
1873
1874void vcpu_increment_iip(struct kvm_vcpu *vcpu)
1875{
1876 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1877 struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
1878 if (ipsr->ri == 2) {
1879 ipsr->ri = 0;
1880 regs->cr_iip += 16;
1881 } else
1882 ipsr->ri++;
1883}
1884
1885void vcpu_decrement_iip(struct kvm_vcpu *vcpu)
1886{
1887 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1888 struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
1889
1890 if (ipsr->ri == 0) {
1891 ipsr->ri = 2;
1892 regs->cr_iip -= 16;
1893 } else
1894 ipsr->ri--;
1895}
1896
1897/** Emulate a privileged operation.
1898 *
1899 *
1900 * @param vcpu virtual cpu
1901 * @cause the reason cause virtualization fault
1902 * @opcode the instruction code which cause virtualization fault
1903 */
1904
1905void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs)
1906{
1907 unsigned long status, cause, opcode ;
1908 INST64 inst;
1909
1910 status = IA64_NO_FAULT;
1911 cause = VMX(vcpu, cause);
1912 opcode = VMX(vcpu, opcode);
1913 inst.inst = opcode;
1914 /*
1915 * Switch to actual virtual rid in rr0 and rr4,
1916 * which is required by some tlb related instructions.
1917 */
1918 prepare_if_physical_mode(vcpu);
1919
1920 switch (cause) {
1921 case EVENT_RSM:
1922 kvm_rsm(vcpu, inst);
1923 break;
1924 case EVENT_SSM:
1925 kvm_ssm(vcpu, inst);
1926 break;
1927 case EVENT_MOV_TO_PSR:
1928 kvm_mov_to_psr(vcpu, inst);
1929 break;
1930 case EVENT_MOV_FROM_PSR:
1931 kvm_mov_from_psr(vcpu, inst);
1932 break;
1933 case EVENT_MOV_FROM_CR:
1934 kvm_mov_from_cr(vcpu, inst);
1935 break;
1936 case EVENT_MOV_TO_CR:
1937 kvm_mov_to_cr(vcpu, inst);
1938 break;
1939 case EVENT_BSW_0:
1940 vcpu_bsw0(vcpu);
1941 break;
1942 case EVENT_BSW_1:
1943 vcpu_bsw1(vcpu);
1944 break;
1945 case EVENT_COVER:
1946 vcpu_cover(vcpu);
1947 break;
1948 case EVENT_RFI:
1949 vcpu_rfi(vcpu);
1950 break;
1951 case EVENT_ITR_D:
1952 kvm_itr_d(vcpu, inst);
1953 break;
1954 case EVENT_ITR_I:
1955 kvm_itr_i(vcpu, inst);
1956 break;
1957 case EVENT_PTR_D:
1958 kvm_ptr_d(vcpu, inst);
1959 break;
1960 case EVENT_PTR_I:
1961 kvm_ptr_i(vcpu, inst);
1962 break;
1963 case EVENT_ITC_D:
1964 kvm_itc_d(vcpu, inst);
1965 break;
1966 case EVENT_ITC_I:
1967 kvm_itc_i(vcpu, inst);
1968 break;
1969 case EVENT_PTC_L:
1970 kvm_ptc_l(vcpu, inst);
1971 break;
1972 case EVENT_PTC_G:
1973 kvm_ptc_g(vcpu, inst);
1974 break;
1975 case EVENT_PTC_GA:
1976 kvm_ptc_ga(vcpu, inst);
1977 break;
1978 case EVENT_PTC_E:
1979 kvm_ptc_e(vcpu, inst);
1980 break;
1981 case EVENT_MOV_TO_RR:
1982 kvm_mov_to_rr(vcpu, inst);
1983 break;
1984 case EVENT_MOV_FROM_RR:
1985 kvm_mov_from_rr(vcpu, inst);
1986 break;
1987 case EVENT_THASH:
1988 kvm_thash(vcpu, inst);
1989 break;
1990 case EVENT_TTAG:
1991 kvm_ttag(vcpu, inst);
1992 break;
1993 case EVENT_TPA:
1994 status = kvm_tpa(vcpu, inst);
1995 break;
1996 case EVENT_TAK:
1997 kvm_tak(vcpu, inst);
1998 break;
1999 case EVENT_MOV_TO_AR_IMM:
2000 kvm_mov_to_ar_imm(vcpu, inst);
2001 break;
2002 case EVENT_MOV_TO_AR:
2003 kvm_mov_to_ar_reg(vcpu, inst);
2004 break;
2005 case EVENT_MOV_FROM_AR:
2006 kvm_mov_from_ar_reg(vcpu, inst);
2007 break;
2008 case EVENT_MOV_TO_DBR:
2009 kvm_mov_to_dbr(vcpu, inst);
2010 break;
2011 case EVENT_MOV_TO_IBR:
2012 kvm_mov_to_ibr(vcpu, inst);
2013 break;
2014 case EVENT_MOV_TO_PMC:
2015 kvm_mov_to_pmc(vcpu, inst);
2016 break;
2017 case EVENT_MOV_TO_PMD:
2018 kvm_mov_to_pmd(vcpu, inst);
2019 break;
2020 case EVENT_MOV_TO_PKR:
2021 kvm_mov_to_pkr(vcpu, inst);
2022 break;
2023 case EVENT_MOV_FROM_DBR:
2024 kvm_mov_from_dbr(vcpu, inst);
2025 break;
2026 case EVENT_MOV_FROM_IBR:
2027 kvm_mov_from_ibr(vcpu, inst);
2028 break;
2029 case EVENT_MOV_FROM_PMC:
2030 kvm_mov_from_pmc(vcpu, inst);
2031 break;
2032 case EVENT_MOV_FROM_PKR:
2033 kvm_mov_from_pkr(vcpu, inst);
2034 break;
2035 case EVENT_MOV_FROM_CPUID:
2036 kvm_mov_from_cpuid(vcpu, inst);
2037 break;
2038 case EVENT_VMSW:
2039 status = IA64_FAULT;
2040 break;
2041 default:
2042 break;
2043 };
2044 /*Assume all status is NO_FAULT ?*/
2045 if (status == IA64_NO_FAULT && cause != EVENT_RFI)
2046 vcpu_increment_iip(vcpu);
2047
2048 recover_if_physical_mode(vcpu);
2049}
2050
2051void init_vcpu(struct kvm_vcpu *vcpu)
2052{
2053 int i;
2054
2055 vcpu->arch.mode_flags = GUEST_IN_PHY;
2056 VMX(vcpu, vrr[0]) = 0x38;
2057 VMX(vcpu, vrr[1]) = 0x38;
2058 VMX(vcpu, vrr[2]) = 0x38;
2059 VMX(vcpu, vrr[3]) = 0x38;
2060 VMX(vcpu, vrr[4]) = 0x38;
2061 VMX(vcpu, vrr[5]) = 0x38;
2062 VMX(vcpu, vrr[6]) = 0x38;
2063 VMX(vcpu, vrr[7]) = 0x38;
2064 VCPU(vcpu, vpsr) = IA64_PSR_BN;
2065 VCPU(vcpu, dcr) = 0;
2066 /* pta.size must not be 0. The minimum is 15 (32k) */
2067 VCPU(vcpu, pta) = 15 << 2;
2068 VCPU(vcpu, itv) = 0x10000;
2069 VCPU(vcpu, itm) = 0;
2070 VMX(vcpu, last_itc) = 0;
2071
2072 VCPU(vcpu, lid) = VCPU_LID(vcpu);
2073 VCPU(vcpu, ivr) = 0;
2074 VCPU(vcpu, tpr) = 0x10000;
2075 VCPU(vcpu, eoi) = 0;
2076 VCPU(vcpu, irr[0]) = 0;
2077 VCPU(vcpu, irr[1]) = 0;
2078 VCPU(vcpu, irr[2]) = 0;
2079 VCPU(vcpu, irr[3]) = 0;
2080 VCPU(vcpu, pmv) = 0x10000;
2081 VCPU(vcpu, cmcv) = 0x10000;
2082 VCPU(vcpu, lrr0) = 0x10000; /* default reset value? */
2083 VCPU(vcpu, lrr1) = 0x10000; /* default reset value? */
2084 update_vhpi(vcpu, NULL_VECTOR);
2085 VLSAPIC_XTP(vcpu) = 0x80; /* disabled */
2086
2087 for (i = 0; i < 4; i++)
2088 VLSAPIC_INSVC(vcpu, i) = 0;
2089}
2090
2091void kvm_init_all_rr(struct kvm_vcpu *vcpu)
2092{
2093 unsigned long psr;
2094
2095 local_irq_save(psr);
2096
2097 /* WARNING: not allow co-exist of both virtual mode and physical
2098 * mode in same region
2099 */
2100
2101 vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0]));
2102 vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4]));
2103
2104 if (is_physical_mode(vcpu)) {
2105 if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
2106 panic_vm(vcpu);
2107
2108 ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
2109 ia64_dv_serialize_data();
2110 ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4);
2111 ia64_dv_serialize_data();
2112 } else {
2113 ia64_set_rr((VRN0 << VRN_SHIFT),
2114 vcpu->arch.metaphysical_saved_rr0);
2115 ia64_dv_serialize_data();
2116 ia64_set_rr((VRN4 << VRN_SHIFT),
2117 vcpu->arch.metaphysical_saved_rr4);
2118 ia64_dv_serialize_data();
2119 }
2120 ia64_set_rr((VRN1 << VRN_SHIFT),
2121 vrrtomrr(VMX(vcpu, vrr[VRN1])));
2122 ia64_dv_serialize_data();
2123 ia64_set_rr((VRN2 << VRN_SHIFT),
2124 vrrtomrr(VMX(vcpu, vrr[VRN2])));
2125 ia64_dv_serialize_data();
2126 ia64_set_rr((VRN3 << VRN_SHIFT),
2127 vrrtomrr(VMX(vcpu, vrr[VRN3])));
2128 ia64_dv_serialize_data();
2129 ia64_set_rr((VRN5 << VRN_SHIFT),
2130 vrrtomrr(VMX(vcpu, vrr[VRN5])));
2131 ia64_dv_serialize_data();
2132 ia64_set_rr((VRN7 << VRN_SHIFT),
2133 vrrtomrr(VMX(vcpu, vrr[VRN7])));
2134 ia64_dv_serialize_data();
2135 ia64_srlz_d();
2136 ia64_set_psr(psr);
2137}
2138
2139int vmm_entry(void)
2140{
2141 struct kvm_vcpu *v;
2142 v = current_vcpu;
2143
2144 ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd,
2145 0, 0, 0, 0, 0, 0);
2146 kvm_init_vtlb(v);
2147 kvm_init_vhpt(v);
2148 init_vcpu(v);
2149 kvm_init_all_rr(v);
2150 vmm_reset_entry();
2151
2152 return 0;
2153}
2154
2155void panic_vm(struct kvm_vcpu *v)
2156{
2157 struct exit_ctl_data *p = &v->arch.exit_data;
2158
2159 p->exit_reason = EXIT_REASON_VM_PANIC;
2160 vmm_transition(v);
2161 /*Never to return*/
2162 while (1);
2163}
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
new file mode 100644
index 000000000000..b0fcfb62c49e
--- /dev/null
+++ b/arch/ia64/kvm/vcpu.h
@@ -0,0 +1,740 @@
1/*
2 * vcpu.h: vcpu routines
3 * Copyright (c) 2005, Intel Corporation.
4 * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
5 * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
6 *
7 * Copyright (c) 2007, Intel Corporation.
8 * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
9 * Xiantao Zhang (xiantao.zhang@intel.com)
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms and conditions of the GNU General Public License,
13 * version 2, as published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 * more details.
19 *
20 * You should have received a copy of the GNU General Public License along with
21 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
22 * Place - Suite 330, Boston, MA 02111-1307 USA.
23 *
24 */
25
26
27#ifndef __KVM_VCPU_H__
28#define __KVM_VCPU_H__
29
30#include <asm/types.h>
31#include <asm/fpu.h>
32#include <asm/processor.h>
33
34#ifndef __ASSEMBLY__
35#include "vti.h"
36
37#include <linux/kvm_host.h>
38#include <linux/spinlock.h>
39
40typedef unsigned long IA64_INST;
41
42typedef union U_IA64_BUNDLE {
43 unsigned long i64[2];
44 struct { unsigned long template:5, slot0:41, slot1a:18,
45 slot1b:23, slot2:41; };
46 /* NOTE: following doesn't work because bitfields can't cross natural
47 size boundaries
48 struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */
49} IA64_BUNDLE;
50
51typedef union U_INST64_A5 {
52 IA64_INST inst;
53 struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5,
54 imm9d:9, s:1, major:4; };
55} INST64_A5;
56
57typedef union U_INST64_B4 {
58 IA64_INST inst;
59 struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6,
60 wh:2, d:1, un1:1, major:4; };
61} INST64_B4;
62
63typedef union U_INST64_B8 {
64 IA64_INST inst;
65 struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; };
66} INST64_B8;
67
68typedef union U_INST64_B9 {
69 IA64_INST inst;
70 struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; };
71} INST64_B9;
72
73typedef union U_INST64_I19 {
74 IA64_INST inst;
75 struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; };
76} INST64_I19;
77
78typedef union U_INST64_I26 {
79 IA64_INST inst;
80 struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
81} INST64_I26;
82
83typedef union U_INST64_I27 {
84 IA64_INST inst;
85 struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; };
86} INST64_I27;
87
88typedef union U_INST64_I28 { /* not privileged (mov from AR) */
89 IA64_INST inst;
90 struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
91} INST64_I28;
92
93typedef union U_INST64_M28 {
94 IA64_INST inst;
95 struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; };
96} INST64_M28;
97
98typedef union U_INST64_M29 {
99 IA64_INST inst;
100 struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
101} INST64_M29;
102
103typedef union U_INST64_M30 {
104 IA64_INST inst;
105 struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2,
106 x3:3, s:1, major:4; };
107} INST64_M30;
108
109typedef union U_INST64_M31 {
110 IA64_INST inst;
111 struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
112} INST64_M31;
113
114typedef union U_INST64_M32 {
115 IA64_INST inst;
116 struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; };
117} INST64_M32;
118
119typedef union U_INST64_M33 {
120 IA64_INST inst;
121 struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; };
122} INST64_M33;
123
124typedef union U_INST64_M35 {
125 IA64_INST inst;
126 struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
127
128} INST64_M35;
129
130typedef union U_INST64_M36 {
131 IA64_INST inst;
132 struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; };
133} INST64_M36;
134
135typedef union U_INST64_M37 {
136 IA64_INST inst;
137 struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3,
138 i:1, major:4; };
139} INST64_M37;
140
141typedef union U_INST64_M41 {
142 IA64_INST inst;
143 struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
144} INST64_M41;
145
146typedef union U_INST64_M42 {
147 IA64_INST inst;
148 struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
149} INST64_M42;
150
151typedef union U_INST64_M43 {
152 IA64_INST inst;
153 struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; };
154} INST64_M43;
155
156typedef union U_INST64_M44 {
157 IA64_INST inst;
158 struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; };
159} INST64_M44;
160
161typedef union U_INST64_M45 {
162 IA64_INST inst;
163 struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
164} INST64_M45;
165
166typedef union U_INST64_M46 {
167 IA64_INST inst;
168 struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6,
169 x3:3, un1:1, major:4; };
170} INST64_M46;
171
172typedef union U_INST64_M47 {
173 IA64_INST inst;
174 struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; };
175} INST64_M47;
176
177typedef union U_INST64_M1{
178 IA64_INST inst;
179 struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2,
180 x6:6, m:1, major:4; };
181} INST64_M1;
182
183typedef union U_INST64_M2{
184 IA64_INST inst;
185 struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2,
186 x6:6, m:1, major:4; };
187} INST64_M2;
188
189typedef union U_INST64_M3{
190 IA64_INST inst;
191 struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2,
192 x6:6, s:1, major:4; };
193} INST64_M3;
194
195typedef union U_INST64_M4 {
196 IA64_INST inst;
197 struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2,
198 x6:6, m:1, major:4; };
199} INST64_M4;
200
201typedef union U_INST64_M5 {
202 IA64_INST inst;
203 struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2,
204 x6:6, s:1, major:4; };
205} INST64_M5;
206
207typedef union U_INST64_M6 {
208 IA64_INST inst;
209 struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2,
210 x6:6, m:1, major:4; };
211} INST64_M6;
212
213typedef union U_INST64_M9 {
214 IA64_INST inst;
215 struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2,
216 x6:6, m:1, major:4; };
217} INST64_M9;
218
219typedef union U_INST64_M10 {
220 IA64_INST inst;
221 struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2,
222 x6:6, s:1, major:4; };
223} INST64_M10;
224
225typedef union U_INST64_M12 {
226 IA64_INST inst;
227 struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2,
228 x6:6, m:1, major:4; };
229} INST64_M12;
230
231typedef union U_INST64_M15 {
232 IA64_INST inst;
233 struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2,
234 x6:6, s:1, major:4; };
235} INST64_M15;
236
237typedef union U_INST64 {
238 IA64_INST inst;
239 struct { unsigned long :37, major:4; } generic;
240 INST64_A5 A5; /* used in build_hypercall_bundle only */
241 INST64_B4 B4; /* used in build_hypercall_bundle only */
242 INST64_B8 B8; /* rfi, bsw.[01] */
243 INST64_B9 B9; /* break.b */
244 INST64_I19 I19; /* used in build_hypercall_bundle only */
245 INST64_I26 I26; /* mov register to ar (I unit) */
246 INST64_I27 I27; /* mov immediate to ar (I unit) */
247 INST64_I28 I28; /* mov from ar (I unit) */
248 INST64_M1 M1; /* ld integer */
249 INST64_M2 M2;
250 INST64_M3 M3;
251 INST64_M4 M4; /* st integer */
252 INST64_M5 M5;
253 INST64_M6 M6; /* ldfd floating pointer */
254 INST64_M9 M9; /* stfd floating pointer */
255 INST64_M10 M10; /* stfd floating pointer */
256 INST64_M12 M12; /* ldfd pair floating pointer */
257 INST64_M15 M15; /* lfetch + imm update */
258 INST64_M28 M28; /* purge translation cache entry */
259 INST64_M29 M29; /* mov register to ar (M unit) */
260 INST64_M30 M30; /* mov immediate to ar (M unit) */
261 INST64_M31 M31; /* mov from ar (M unit) */
262 INST64_M32 M32; /* mov reg to cr */
263 INST64_M33 M33; /* mov from cr */
264 INST64_M35 M35; /* mov to psr */
265 INST64_M36 M36; /* mov from psr */
266 INST64_M37 M37; /* break.m */
267 INST64_M41 M41; /* translation cache insert */
268 INST64_M42 M42; /* mov to indirect reg/translation reg insert*/
269 INST64_M43 M43; /* mov from indirect reg */
270 INST64_M44 M44; /* set/reset system mask */
271 INST64_M45 M45; /* translation purge */
272 INST64_M46 M46; /* translation access (tpa,tak) */
273 INST64_M47 M47; /* purge translation entry */
274} INST64;
275
276#define MASK_41 ((unsigned long)0x1ffffffffff)
277
278/* Virtual address memory attributes encoding */
279#define VA_MATTR_WB 0x0
280#define VA_MATTR_UC 0x4
281#define VA_MATTR_UCE 0x5
282#define VA_MATTR_WC 0x6
283#define VA_MATTR_NATPAGE 0x7
284
285#define PMASK(size) (~((size) - 1))
286#define PSIZE(size) (1UL<<(size))
287#define CLEARLSB(ppn, nbits) (((ppn) >> (nbits)) << (nbits))
288#define PAGEALIGN(va, ps) CLEARLSB(va, ps)
289#define PAGE_FLAGS_RV_MASK (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53))
290#define _PAGE_MA_ST (0x1 << 2) /* is reserved for software use */
291
292#define ARCH_PAGE_SHIFT 12
293
294#define INVALID_TI_TAG (1UL << 63)
295
296#define VTLB_PTE_P_BIT 0
297#define VTLB_PTE_IO_BIT 60
298#define VTLB_PTE_IO (1UL<<VTLB_PTE_IO_BIT)
299#define VTLB_PTE_P (1UL<<VTLB_PTE_P_BIT)
300
301#define vcpu_quick_region_check(_tr_regions,_ifa) \
302 (_tr_regions & (1 << ((unsigned long)_ifa >> 61)))
303
304#define vcpu_quick_region_set(_tr_regions,_ifa) \
305 do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0)
306
307static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir,
308 u64 va, u64 rid)
309{
310 trp->page_flags = pte;
311 trp->itir = itir;
312 trp->vadr = va;
313 trp->rid = rid;
314}
315
316extern u64 kvm_lookup_mpa(u64 gpfn);
317extern u64 kvm_gpa_to_mpa(u64 gpa);
318
319/* Return I/O type if trye */
320#define __gpfn_is_io(gpfn) \
321 ({ \
322 u64 pte, ret = 0; \
323 pte = kvm_lookup_mpa(gpfn); \
324 if (!(pte & GPFN_INV_MASK)) \
325 ret = pte & GPFN_IO_MASK; \
326 ret; \
327 })
328
329#endif
330
331#define IA64_NO_FAULT 0
332#define IA64_FAULT 1
333
334#define VMM_RBS_OFFSET ((VMM_TASK_SIZE + 15) & ~15)
335
336#define SW_BAD 0 /* Bad mode transitition */
337#define SW_V2P 1 /* Physical emulatino is activated */
338#define SW_P2V 2 /* Exit physical mode emulation */
339#define SW_SELF 3 /* No mode transition */
340#define SW_NOP 4 /* Mode transition, but without action required */
341
342#define GUEST_IN_PHY 0x1
343#define GUEST_PHY_EMUL 0x2
344
345#define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP))
346
347#define VRN_SHIFT 61
348#define VRN_MASK 0xe000000000000000
349#define VRN0 0x0UL
350#define VRN1 0x1UL
351#define VRN2 0x2UL
352#define VRN3 0x3UL
353#define VRN4 0x4UL
354#define VRN5 0x5UL
355#define VRN6 0x6UL
356#define VRN7 0x7UL
357
358#define IRQ_NO_MASKED 0
359#define IRQ_MASKED_BY_VTPR 1
360#define IRQ_MASKED_BY_INSVC 2 /* masked by inservice IRQ */
361
362#define PTA_BASE_SHIFT 15
363
364#define IA64_PSR_VM_BIT 46
365#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT)
366
367/* Interruption Function State */
368#define IA64_IFS_V_BIT 63
369#define IA64_IFS_V (__IA64_UL(1) << IA64_IFS_V_BIT)
370
371#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX)
372#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX)
373
374#ifndef __ASSEMBLY__
375
376#include <asm/gcc_intrin.h>
377
378#define is_physical_mode(v) \
379 ((v->arch.mode_flags) & GUEST_IN_PHY)
380
381#define is_virtual_mode(v) \
382 (!is_physical_mode(v))
383
384#define MODE_IND(psr) \
385 (((psr).it << 2) + ((psr).dt << 1) + (psr).rt)
386
387#define _vmm_raw_spin_lock(x) \
388 do { \
389 __u32 *ia64_spinlock_ptr = (__u32 *) (x); \
390 __u64 ia64_spinlock_val; \
391 ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
392 if (unlikely(ia64_spinlock_val)) { \
393 do { \
394 while (*ia64_spinlock_ptr) \
395 ia64_barrier(); \
396 ia64_spinlock_val = \
397 ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
398 } while (ia64_spinlock_val); \
399 } \
400 } while (0)
401
402#define _vmm_raw_spin_unlock(x) \
403 do { barrier(); \
404 ((spinlock_t *)x)->raw_lock.lock = 0; } \
405while (0)
406
407void vmm_spin_lock(spinlock_t *lock);
408void vmm_spin_unlock(spinlock_t *lock);
409enum {
410 I_TLB = 1,
411 D_TLB = 2
412};
413
414union kvm_va {
415 struct {
416 unsigned long off : 60; /* intra-region offset */
417 unsigned long reg : 4; /* region number */
418 } f;
419 unsigned long l;
420 void *p;
421};
422
423#define __kvm_pa(x) ({union kvm_va _v; _v.l = (long) (x); \
424 _v.f.reg = 0; _v.l; })
425#define __kvm_va(x) ({union kvm_va _v; _v.l = (long) (x); \
426 _v.f.reg = -1; _v.p; })
427
428#define _REGION_ID(x) ({union ia64_rr _v; _v.val = (long)(x); \
429 _v.rid; })
430#define _REGION_PAGE_SIZE(x) ({union ia64_rr _v; _v.val = (long)(x); \
431 _v.ps; })
432#define _REGION_HW_WALKER(x) ({union ia64_rr _v; _v.val = (long)(x); \
433 _v.ve; })
434
435enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF };
436enum tlb_miss_type { INSTRUCTION, DATA, REGISTER };
437
438#define VCPU(_v, _x) ((_v)->arch.vpd->_x)
439#define VMX(_v, _x) ((_v)->arch._x)
440
441#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i])
442#define VLSAPIC_XTP(_v) VMX(_v, xtp)
443
444static inline unsigned long itir_ps(unsigned long itir)
445{
446 return ((itir >> 2) & 0x3f);
447}
448
449
450/**************************************************************************
451 VCPU control register access routines
452 **************************************************************************/
453
454static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu)
455{
456 return ((u64)VCPU(vcpu, itir));
457}
458
459static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val)
460{
461 VCPU(vcpu, itir) = val;
462}
463
464static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu)
465{
466 return ((u64)VCPU(vcpu, ifa));
467}
468
469static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val)
470{
471 VCPU(vcpu, ifa) = val;
472}
473
474static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu)
475{
476 return ((u64)VCPU(vcpu, iva));
477}
478
479static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu)
480{
481 return ((u64)VCPU(vcpu, pta));
482}
483
484static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu)
485{
486 return ((u64)VCPU(vcpu, lid));
487}
488
489static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu)
490{
491 return ((u64)VCPU(vcpu, tpr));
492}
493
494static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu)
495{
496 return (0UL); /*reads of eoi always return 0 */
497}
498
499static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu)
500{
501 return ((u64)VCPU(vcpu, irr[0]));
502}
503
504static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu)
505{
506 return ((u64)VCPU(vcpu, irr[1]));
507}
508
509static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu)
510{
511 return ((u64)VCPU(vcpu, irr[2]));
512}
513
514static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu)
515{
516 return ((u64)VCPU(vcpu, irr[3]));
517}
518
519static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val)
520{
521 ia64_setreg(_IA64_REG_CR_DCR, val);
522}
523
524static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val)
525{
526 VCPU(vcpu, isr) = val;
527}
528
529static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val)
530{
531 VCPU(vcpu, lid) = val;
532}
533
534static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val)
535{
536 VCPU(vcpu, ipsr) = val;
537}
538
539static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val)
540{
541 VCPU(vcpu, iip) = val;
542}
543
544static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val)
545{
546 VCPU(vcpu, ifs) = val;
547}
548
549static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val)
550{
551 VCPU(vcpu, iipa) = val;
552}
553
554static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val)
555{
556 VCPU(vcpu, iha) = val;
557}
558
559
560static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg)
561{
562 return vcpu->arch.vrr[reg>>61];
563}
564
565/**************************************************************************
566 VCPU debug breakpoint register access routines
567 **************************************************************************/
568
569static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
570{
571 __ia64_set_dbr(reg, val);
572}
573
574static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
575{
576 ia64_set_ibr(reg, val);
577}
578
579static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg)
580{
581 return ((u64)__ia64_get_dbr(reg));
582}
583
584static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg)
585{
586 return ((u64)ia64_get_ibr(reg));
587}
588
589/**************************************************************************
590 VCPU performance monitor register access routines
591 **************************************************************************/
592static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val)
593{
594 /* NOTE: Writes to unimplemented PMC registers are discarded */
595 ia64_set_pmc(reg, val);
596}
597
598static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val)
599{
600 /* NOTE: Writes to unimplemented PMD registers are discarded */
601 ia64_set_pmd(reg, val);
602}
603
604static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg)
605{
606 /* NOTE: Reads from unimplemented PMC registers return zero */
607 return ((u64)ia64_get_pmc(reg));
608}
609
610static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg)
611{
612 /* NOTE: Reads from unimplemented PMD registers return zero */
613 return ((u64)ia64_get_pmd(reg));
614}
615
616static inline unsigned long vrrtomrr(unsigned long val)
617{
618 union ia64_rr rr;
619 rr.val = val;
620 rr.rid = (rr.rid << 4) | 0xe;
621 if (rr.ps > PAGE_SHIFT)
622 rr.ps = PAGE_SHIFT;
623 rr.ve = 1;
624 return rr.val;
625}
626
627
628static inline int highest_bits(int *dat)
629{
630 u32 bits, bitnum;
631 int i;
632
633 /* loop for all 256 bits */
634 for (i = 7; i >= 0 ; i--) {
635 bits = dat[i];
636 if (bits) {
637 bitnum = fls(bits);
638 return i * 32 + bitnum - 1;
639 }
640 }
641 return NULL_VECTOR;
642}
643
644/*
645 * The pending irq is higher than the inservice one.
646 *
647 */
648static inline int is_higher_irq(int pending, int inservice)
649{
650 return ((pending > inservice)
651 || ((pending != NULL_VECTOR)
652 && (inservice == NULL_VECTOR)));
653}
654
655static inline int is_higher_class(int pending, int mic)
656{
657 return ((pending >> 4) > mic);
658}
659
660/*
661 * Return 0-255 for pending irq.
662 * NULL_VECTOR: when no pending.
663 */
664static inline int highest_pending_irq(struct kvm_vcpu *vcpu)
665{
666 if (VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR))
667 return NMI_VECTOR;
668 if (VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR))
669 return ExtINT_VECTOR;
670
671 return highest_bits((int *)&VCPU(vcpu, irr[0]));
672}
673
674static inline int highest_inservice_irq(struct kvm_vcpu *vcpu)
675{
676 if (VMX(vcpu, insvc[0]) & (1UL<<NMI_VECTOR))
677 return NMI_VECTOR;
678 if (VMX(vcpu, insvc[0]) & (1UL<<ExtINT_VECTOR))
679 return ExtINT_VECTOR;
680
681 return highest_bits((int *)&(VMX(vcpu, insvc[0])));
682}
683
684extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, u64 reg,
685 struct ia64_fpreg *val);
686extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, u64 reg,
687 struct ia64_fpreg *val);
688extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, u64 reg);
689extern void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 val, int nat);
690extern u64 vcpu_get_psr(struct kvm_vcpu *vcpu);
691extern void vcpu_set_psr(struct kvm_vcpu *vcpu, u64 val);
692extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr);
693extern void vcpu_bsw0(struct kvm_vcpu *vcpu);
694extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte,
695 u64 itir, u64 va, int type);
696extern struct thash_data *vhpt_lookup(u64 va);
697extern u64 guest_vhpt_lookup(u64 iha, u64 *pte);
698extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps);
699extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps);
700extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va);
701extern int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
702 u64 itir, u64 ifa, int type);
703extern void thash_purge_all(struct kvm_vcpu *v);
704extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v,
705 u64 va, int is_data);
706extern int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va,
707 u64 ps, int is_data);
708
709extern void vcpu_increment_iip(struct kvm_vcpu *v);
710extern void vcpu_decrement_iip(struct kvm_vcpu *vcpu);
711extern void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
712extern void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
713extern void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr);
714extern void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr);
715extern void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr);
716extern void nested_dtlb(struct kvm_vcpu *vcpu);
717extern void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr);
718extern int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref);
719
720extern void update_vhpi(struct kvm_vcpu *vcpu, int vec);
721extern int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice);
722
723extern int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle);
724extern void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma);
725extern void vmm_transition(struct kvm_vcpu *vcpu);
726extern void vmm_trampoline(union context *from, union context *to);
727extern int vmm_entry(void);
728extern u64 vcpu_get_itc(struct kvm_vcpu *vcpu);
729
730extern void vmm_reset_entry(void);
731void kvm_init_vtlb(struct kvm_vcpu *v);
732void kvm_init_vhpt(struct kvm_vcpu *v);
733void thash_init(struct thash_cb *hcb, u64 sz);
734
735void panic_vm(struct kvm_vcpu *v);
736
737extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
738 u64 arg4, u64 arg5, u64 arg6, u64 arg7);
739#endif
740#endif /* __VCPU_H__ */
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
new file mode 100644
index 000000000000..2275bf4e681a
--- /dev/null
+++ b/arch/ia64/kvm/vmm.c
@@ -0,0 +1,66 @@
1/*
2 * vmm.c: vmm module interface with kvm module
3 *
4 * Copyright (c) 2007, Intel Corporation.
5 *
6 * Xiantao Zhang (xiantao.zhang@intel.com)
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 * Place - Suite 330, Boston, MA 02111-1307 USA.
20 */
21
22
23#include<linux/module.h>
24#include<asm/fpswa.h>
25
26#include "vcpu.h"
27
28MODULE_AUTHOR("Intel");
29MODULE_LICENSE("GPL");
30
31extern char kvm_ia64_ivt;
32extern fpswa_interface_t *vmm_fpswa_interface;
33
34struct kvm_vmm_info vmm_info = {
35 .module = THIS_MODULE,
36 .vmm_entry = vmm_entry,
37 .tramp_entry = vmm_trampoline,
38 .vmm_ivt = (unsigned long)&kvm_ia64_ivt,
39};
40
41static int __init kvm_vmm_init(void)
42{
43
44 vmm_fpswa_interface = fpswa_interface;
45
46 /*Register vmm data to kvm side*/
47 return kvm_init(&vmm_info, 1024, THIS_MODULE);
48}
49
50static void __exit kvm_vmm_exit(void)
51{
52 kvm_exit();
53 return ;
54}
55
56void vmm_spin_lock(spinlock_t *lock)
57{
58 _vmm_raw_spin_lock(lock);
59}
60
61void vmm_spin_unlock(spinlock_t *lock)
62{
63 _vmm_raw_spin_unlock(lock);
64}
65module_init(kvm_vmm_init)
66module_exit(kvm_vmm_exit)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
new file mode 100644
index 000000000000..3ee5f481c06d
--- /dev/null
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -0,0 +1,1424 @@
1/*
2 * /ia64/kvm_ivt.S
3 *
4 * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
5 * Stephane Eranian <eranian@hpl.hp.com>
6 * David Mosberger <davidm@hpl.hp.com>
7 * Copyright (C) 2000, 2002-2003 Intel Co
8 * Asit Mallick <asit.k.mallick@intel.com>
9 * Suresh Siddha <suresh.b.siddha@intel.com>
10 * Kenneth Chen <kenneth.w.chen@intel.com>
11 * Fenghua Yu <fenghua.yu@intel.com>
12 *
13 *
14 * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling
15 * for SMP
16 * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB
17 * handler now uses virtual PT.
18 *
19 * 07/6/20 Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
20 * Supporting Intel virtualization architecture
21 *
22 */
23
24/*
25 * This file defines the interruption vector table used by the CPU.
26 * It does not include one entry per possible cause of interruption.
27 *
28 * The first 20 entries of the table contain 64 bundles each while the
29 * remaining 48 entries contain only 16 bundles each.
30 *
31 * The 64 bundles are used to allow inlining the whole handler for
32 * critical
33 * interruptions like TLB misses.
34 *
35 * For each entry, the comment is as follows:
36 *
37 * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss
38 * (12,51)
39 * entry offset ----/ / / /
40 * /
41 * entry number ---------/ / /
42 * /
43 * size of the entry -------------/ /
44 * /
45 * vector name -------------------------------------/
46 * /
47 * interruptions triggering this vector
48 * ----------------------/
49 *
50 * The table is 32KB in size and must be aligned on 32KB
51 * boundary.
52 * (The CPU ignores the 15 lower bits of the address)
53 *
54 * Table is based upon EAS2.6 (Oct 1999)
55 */
56
57
58#include <asm/asmmacro.h>
59#include <asm/cache.h>
60#include <asm/pgtable.h>
61
62#include "asm-offsets.h"
63#include "vcpu.h"
64#include "kvm_minstate.h"
65#include "vti.h"
66
67#if 1
68# define PSR_DEFAULT_BITS psr.ac
69#else
70# define PSR_DEFAULT_BITS 0
71#endif
72
73
74#define KVM_FAULT(n) \
75 kvm_fault_##n:; \
76 mov r19=n;; \
77 br.sptk.many kvm_fault_##n; \
78 ;; \
79
80
81#define KVM_REFLECT(n) \
82 mov r31=pr; \
83 mov r19=n; /* prepare to save predicates */ \
84 mov r29=cr.ipsr; \
85 ;; \
86 tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \
87(p7)br.sptk.many kvm_dispatch_reflection; \
88 br.sptk.many kvm_panic; \
89
90
91GLOBAL_ENTRY(kvm_panic)
92 br.sptk.many kvm_panic
93 ;;
94END(kvm_panic)
95
96
97
98
99
100 .section .text.ivt,"ax"
101
102 .align 32768 // align on 32KB boundary
103 .global kvm_ia64_ivt
104kvm_ia64_ivt:
105///////////////////////////////////////////////////////////////
106// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
107ENTRY(kvm_vhpt_miss)
108 KVM_FAULT(0)
109END(kvm_vhpt_miss)
110
111
112 .org kvm_ia64_ivt+0x400
113////////////////////////////////////////////////////////////////
114// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
115ENTRY(kvm_itlb_miss)
116 mov r31 = pr
117 mov r29=cr.ipsr;
118 ;;
119 tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
120 (p6) br.sptk kvm_alt_itlb_miss
121 mov r19 = 1
122 br.sptk kvm_itlb_miss_dispatch
123 KVM_FAULT(1);
124END(kvm_itlb_miss)
125
126 .org kvm_ia64_ivt+0x0800
127//////////////////////////////////////////////////////////////////
128// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
129ENTRY(kvm_dtlb_miss)
130 mov r31 = pr
131 mov r29=cr.ipsr;
132 ;;
133 tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
134(p6)br.sptk kvm_alt_dtlb_miss
135 br.sptk kvm_dtlb_miss_dispatch
136END(kvm_dtlb_miss)
137
138 .org kvm_ia64_ivt+0x0c00
139////////////////////////////////////////////////////////////////////
140// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
141ENTRY(kvm_alt_itlb_miss)
142 mov r16=cr.ifa // get address that caused the TLB miss
143 ;;
144 movl r17=PAGE_KERNEL
145 mov r24=cr.ipsr
146 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
147 ;;
148 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
149 ;;
150 or r19=r17,r19 // insert PTE control bits into r19
151 ;;
152 movl r20=IA64_GRANULE_SHIFT<<2
153 ;;
154 mov cr.itir=r20
155 ;;
156 itc.i r19 // insert the TLB entry
157 mov pr=r31,-1
158 rfi
159END(kvm_alt_itlb_miss)
160
161 .org kvm_ia64_ivt+0x1000
162/////////////////////////////////////////////////////////////////////
163// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
164ENTRY(kvm_alt_dtlb_miss)
165 mov r16=cr.ifa // get address that caused the TLB miss
166 ;;
167 movl r17=PAGE_KERNEL
168 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
169 mov r24=cr.ipsr
170 ;;
171 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
172 ;;
173 or r19=r19,r17 // insert PTE control bits into r19
174 ;;
175 movl r20=IA64_GRANULE_SHIFT<<2
176 ;;
177 mov cr.itir=r20
178 ;;
179 itc.d r19 // insert the TLB entry
180 mov pr=r31,-1
181 rfi
182END(kvm_alt_dtlb_miss)
183
184 .org kvm_ia64_ivt+0x1400
185//////////////////////////////////////////////////////////////////////
186// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
187ENTRY(kvm_nested_dtlb_miss)
188 KVM_FAULT(5)
189END(kvm_nested_dtlb_miss)
190
191 .org kvm_ia64_ivt+0x1800
192/////////////////////////////////////////////////////////////////////
193// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
194ENTRY(kvm_ikey_miss)
195 KVM_REFLECT(6)
196END(kvm_ikey_miss)
197
198 .org kvm_ia64_ivt+0x1c00
199/////////////////////////////////////////////////////////////////////
200// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
201ENTRY(kvm_dkey_miss)
202 KVM_REFLECT(7)
203END(kvm_dkey_miss)
204
205 .org kvm_ia64_ivt+0x2000
206////////////////////////////////////////////////////////////////////
207// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
208ENTRY(kvm_dirty_bit)
209 KVM_REFLECT(8)
210END(kvm_dirty_bit)
211
212 .org kvm_ia64_ivt+0x2400
213////////////////////////////////////////////////////////////////////
214// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
215ENTRY(kvm_iaccess_bit)
216 KVM_REFLECT(9)
217END(kvm_iaccess_bit)
218
219 .org kvm_ia64_ivt+0x2800
220///////////////////////////////////////////////////////////////////
221// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
222ENTRY(kvm_daccess_bit)
223 KVM_REFLECT(10)
224END(kvm_daccess_bit)
225
226 .org kvm_ia64_ivt+0x2c00
227/////////////////////////////////////////////////////////////////
228// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
229ENTRY(kvm_break_fault)
230 mov r31=pr
231 mov r19=11
232 mov r29=cr.ipsr
233 ;;
234 KVM_SAVE_MIN_WITH_COVER_R19
235 ;;
236 alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
237 mov out0=cr.ifa
238 mov out2=cr.isr // FIXME: pity to make this slow access twice
239 mov out3=cr.iim // FIXME: pity to make this slow access twice
240 adds r3=8,r2 // set up second base pointer
241 ;;
242 ssm psr.ic
243 ;;
244 srlz.i // guarantee that interruption collection is on
245 ;;
246 //(p15)ssm psr.i // restore psr.i
247 addl r14=@gprel(ia64_leave_hypervisor),gp
248 ;;
249 KVM_SAVE_REST
250 mov rp=r14
251 ;;
252 adds out1=16,sp
253 br.call.sptk.many b6=kvm_ia64_handle_break
254 ;;
255END(kvm_break_fault)
256
257 .org kvm_ia64_ivt+0x3000
258/////////////////////////////////////////////////////////////////
259// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
260ENTRY(kvm_interrupt)
261 mov r31=pr // prepare to save predicates
262 mov r19=12
263 mov r29=cr.ipsr
264 ;;
265 tbit.z p6,p7=r29,IA64_PSR_VM_BIT
266 tbit.z p0,p15=r29,IA64_PSR_I_BIT
267 ;;
268(p7) br.sptk kvm_dispatch_interrupt
269 ;;
270 mov r27=ar.rsc /* M */
271 mov r20=r1 /* A */
272 mov r25=ar.unat /* M */
273 mov r26=ar.pfs /* I */
274 mov r28=cr.iip /* M */
275 cover /* B (or nothing) */
276 ;;
277 mov r1=sp
278 ;;
279 invala /* M */
280 mov r30=cr.ifs
281 ;;
282 addl r1=-VMM_PT_REGS_SIZE,r1
283 ;;
284 adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */
285 adds r16=PT(CR_IPSR),r1
286 ;;
287 lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
288 st8 [r16]=r29 /* save cr.ipsr */
289 ;;
290 lfetch.fault.excl.nt1 [r17]
291 mov r29=b0
292 ;;
293 adds r16=PT(R8),r1 /* initialize first base pointer */
294 adds r17=PT(R9),r1 /* initialize second base pointer */
295 mov r18=r0 /* make sure r18 isn't NaT */
296 ;;
297.mem.offset 0,0; st8.spill [r16]=r8,16
298.mem.offset 8,0; st8.spill [r17]=r9,16
299 ;;
300.mem.offset 0,0; st8.spill [r16]=r10,24
301.mem.offset 8,0; st8.spill [r17]=r11,24
302 ;;
303 st8 [r16]=r28,16 /* save cr.iip */
304 st8 [r17]=r30,16 /* save cr.ifs */
305 mov r8=ar.fpsr /* M */
306 mov r9=ar.csd
307 mov r10=ar.ssd
308 movl r11=FPSR_DEFAULT /* L-unit */
309 ;;
310 st8 [r16]=r25,16 /* save ar.unat */
311 st8 [r17]=r26,16 /* save ar.pfs */
312 shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */
313 ;;
314 st8 [r16]=r27,16 /* save ar.rsc */
315 adds r17=16,r17 /* skip over ar_rnat field */
316 ;;
317 st8 [r17]=r31,16 /* save predicates */
318 adds r16=16,r16 /* skip over ar_bspstore field */
319 ;;
320 st8 [r16]=r29,16 /* save b0 */
321 st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */
322 ;;
323.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */
324.mem.offset 8,0; st8.spill [r17]=r12,16
325 adds r12=-16,r1
326 /* switch to kernel memory stack (with 16 bytes of scratch) */
327 ;;
328.mem.offset 0,0; st8.spill [r16]=r13,16
329.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
330 ;;
331.mem.offset 0,0; st8.spill [r16]=r15,16
332.mem.offset 8,0; st8.spill [r17]=r14,16
333 dep r14=-1,r0,60,4
334 ;;
335.mem.offset 0,0; st8.spill [r16]=r2,16
336.mem.offset 8,0; st8.spill [r17]=r3,16
337 adds r2=VMM_PT_REGS_R16_OFFSET,r1
338 adds r14 = VMM_VCPU_GP_OFFSET,r13
339 ;;
340 mov r8=ar.ccv
341 ld8 r14 = [r14]
342 ;;
343 mov r1=r14 /* establish kernel global pointer */
344 ;; \
345 bsw.1
346 ;;
347 alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
348 mov out0=r13
349 ;;
350 ssm psr.ic
351 ;;
352 srlz.i
353 ;;
354 //(p15) ssm psr.i
355 adds r3=8,r2 // set up second base pointer for SAVE_REST
356 srlz.i // ensure everybody knows psr.ic is back on
357 ;;
358.mem.offset 0,0; st8.spill [r2]=r16,16
359.mem.offset 8,0; st8.spill [r3]=r17,16
360 ;;
361.mem.offset 0,0; st8.spill [r2]=r18,16
362.mem.offset 8,0; st8.spill [r3]=r19,16
363 ;;
364.mem.offset 0,0; st8.spill [r2]=r20,16
365.mem.offset 8,0; st8.spill [r3]=r21,16
366 mov r18=b6
367 ;;
368.mem.offset 0,0; st8.spill [r2]=r22,16
369.mem.offset 8,0; st8.spill [r3]=r23,16
370 mov r19=b7
371 ;;
372.mem.offset 0,0; st8.spill [r2]=r24,16
373.mem.offset 8,0; st8.spill [r3]=r25,16
374 ;;
375.mem.offset 0,0; st8.spill [r2]=r26,16
376.mem.offset 8,0; st8.spill [r3]=r27,16
377 ;;
378.mem.offset 0,0; st8.spill [r2]=r28,16
379.mem.offset 8,0; st8.spill [r3]=r29,16
380 ;;
381.mem.offset 0,0; st8.spill [r2]=r30,16
382.mem.offset 8,0; st8.spill [r3]=r31,32
383 ;;
384 mov ar.fpsr=r11 /* M-unit */
385 st8 [r2]=r8,8 /* ar.ccv */
386 adds r24=PT(B6)-PT(F7),r3
387 ;;
388 stf.spill [r2]=f6,32
389 stf.spill [r3]=f7,32
390 ;;
391 stf.spill [r2]=f8,32
392 stf.spill [r3]=f9,32
393 ;;
394 stf.spill [r2]=f10
395 stf.spill [r3]=f11
396 adds r25=PT(B7)-PT(F11),r3
397 ;;
398 st8 [r24]=r18,16 /* b6 */
399 st8 [r25]=r19,16 /* b7 */
400 ;;
401 st8 [r24]=r9 /* ar.csd */
402 st8 [r25]=r10 /* ar.ssd */
403 ;;
404 srlz.d // make sure we see the effect of cr.ivr
405 addl r14=@gprel(ia64_leave_nested),gp
406 ;;
407 mov rp=r14
408 br.call.sptk.many b6=kvm_ia64_handle_irq
409 ;;
410END(kvm_interrupt)
411
412 .global kvm_dispatch_vexirq
413 .org kvm_ia64_ivt+0x3400
414//////////////////////////////////////////////////////////////////////
415// 0x3400 Entry 13 (size 64 bundles) Reserved
416ENTRY(kvm_virtual_exirq)
417 mov r31=pr
418 mov r19=13
419 mov r30 =r0
420 ;;
421kvm_dispatch_vexirq:
422 cmp.eq p6,p0 = 1,r30
423 ;;
424(p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
425 ;;
426(p6)ld8 r1 = [r29]
427 ;;
428 KVM_SAVE_MIN_WITH_COVER_R19
429 alloc r14=ar.pfs,0,0,1,0
430 mov out0=r13
431
432 ssm psr.ic
433 ;;
434 srlz.i // guarantee that interruption collection is on
435 ;;
436 //(p15) ssm psr.i // restore psr.i
437 adds r3=8,r2 // set up second base pointer
438 ;;
439 KVM_SAVE_REST
440 addl r14=@gprel(ia64_leave_hypervisor),gp
441 ;;
442 mov rp=r14
443 br.call.sptk.many b6=kvm_vexirq
444END(kvm_virtual_exirq)
445
446 .org kvm_ia64_ivt+0x3800
447/////////////////////////////////////////////////////////////////////
448// 0x3800 Entry 14 (size 64 bundles) Reserved
449 KVM_FAULT(14)
450 // this code segment is from 2.6.16.13
451
452
453 .org kvm_ia64_ivt+0x3c00
454///////////////////////////////////////////////////////////////////////
455// 0x3c00 Entry 15 (size 64 bundles) Reserved
456 KVM_FAULT(15)
457
458
459 .org kvm_ia64_ivt+0x4000
460///////////////////////////////////////////////////////////////////////
461// 0x4000 Entry 16 (size 64 bundles) Reserved
462 KVM_FAULT(16)
463
464 .org kvm_ia64_ivt+0x4400
465//////////////////////////////////////////////////////////////////////
466// 0x4400 Entry 17 (size 64 bundles) Reserved
467 KVM_FAULT(17)
468
469 .org kvm_ia64_ivt+0x4800
470//////////////////////////////////////////////////////////////////////
471// 0x4800 Entry 18 (size 64 bundles) Reserved
472 KVM_FAULT(18)
473
474 .org kvm_ia64_ivt+0x4c00
475//////////////////////////////////////////////////////////////////////
476// 0x4c00 Entry 19 (size 64 bundles) Reserved
477 KVM_FAULT(19)
478
479 .org kvm_ia64_ivt+0x5000
480//////////////////////////////////////////////////////////////////////
481// 0x5000 Entry 20 (size 16 bundles) Page Not Present
482ENTRY(kvm_page_not_present)
483 KVM_REFLECT(20)
484END(kvm_page_not_present)
485
486 .org kvm_ia64_ivt+0x5100
487///////////////////////////////////////////////////////////////////////
488// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
489ENTRY(kvm_key_permission)
490 KVM_REFLECT(21)
491END(kvm_key_permission)
492
493 .org kvm_ia64_ivt+0x5200
494//////////////////////////////////////////////////////////////////////
495// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
496ENTRY(kvm_iaccess_rights)
497 KVM_REFLECT(22)
498END(kvm_iaccess_rights)
499
500 .org kvm_ia64_ivt+0x5300
501//////////////////////////////////////////////////////////////////////
502// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
503ENTRY(kvm_daccess_rights)
504 KVM_REFLECT(23)
505END(kvm_daccess_rights)
506
507 .org kvm_ia64_ivt+0x5400
508/////////////////////////////////////////////////////////////////////
509// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
510ENTRY(kvm_general_exception)
511 KVM_REFLECT(24)
512 KVM_FAULT(24)
513END(kvm_general_exception)
514
515 .org kvm_ia64_ivt+0x5500
516//////////////////////////////////////////////////////////////////////
517// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
518ENTRY(kvm_disabled_fp_reg)
519 KVM_REFLECT(25)
520END(kvm_disabled_fp_reg)
521
522 .org kvm_ia64_ivt+0x5600
523////////////////////////////////////////////////////////////////////
524// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
525ENTRY(kvm_nat_consumption)
526 KVM_REFLECT(26)
527END(kvm_nat_consumption)
528
529 .org kvm_ia64_ivt+0x5700
530/////////////////////////////////////////////////////////////////////
531// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
532ENTRY(kvm_speculation_vector)
533 KVM_REFLECT(27)
534END(kvm_speculation_vector)
535
536 .org kvm_ia64_ivt+0x5800
537/////////////////////////////////////////////////////////////////////
538// 0x5800 Entry 28 (size 16 bundles) Reserved
539 KVM_FAULT(28)
540
541 .org kvm_ia64_ivt+0x5900
542///////////////////////////////////////////////////////////////////
543// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
544ENTRY(kvm_debug_vector)
545 KVM_FAULT(29)
546END(kvm_debug_vector)
547
548 .org kvm_ia64_ivt+0x5a00
549///////////////////////////////////////////////////////////////
550// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
551ENTRY(kvm_unaligned_access)
552 KVM_REFLECT(30)
553END(kvm_unaligned_access)
554
555 .org kvm_ia64_ivt+0x5b00
556//////////////////////////////////////////////////////////////////////
557// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
558ENTRY(kvm_unsupported_data_reference)
559 KVM_REFLECT(31)
560END(kvm_unsupported_data_reference)
561
562 .org kvm_ia64_ivt+0x5c00
563////////////////////////////////////////////////////////////////////
564// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
565ENTRY(kvm_floating_point_fault)
566 KVM_REFLECT(32)
567END(kvm_floating_point_fault)
568
569 .org kvm_ia64_ivt+0x5d00
570/////////////////////////////////////////////////////////////////////
571// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
572ENTRY(kvm_floating_point_trap)
573 KVM_REFLECT(33)
574END(kvm_floating_point_trap)
575
576 .org kvm_ia64_ivt+0x5e00
577//////////////////////////////////////////////////////////////////////
578// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
579ENTRY(kvm_lower_privilege_trap)
580 KVM_REFLECT(34)
581END(kvm_lower_privilege_trap)
582
583 .org kvm_ia64_ivt+0x5f00
584//////////////////////////////////////////////////////////////////////
585// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
586ENTRY(kvm_taken_branch_trap)
587 KVM_REFLECT(35)
588END(kvm_taken_branch_trap)
589
590 .org kvm_ia64_ivt+0x6000
591////////////////////////////////////////////////////////////////////
592// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
593ENTRY(kvm_single_step_trap)
594 KVM_REFLECT(36)
595END(kvm_single_step_trap)
596 .global kvm_virtualization_fault_back
597 .org kvm_ia64_ivt+0x6100
598/////////////////////////////////////////////////////////////////////
599// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
600ENTRY(kvm_virtualization_fault)
601 mov r31=pr
602 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
603 ;;
604 st8 [r16] = r1
605 adds r17 = VMM_VCPU_GP_OFFSET, r21
606 ;;
607 ld8 r1 = [r17]
608 cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
609 cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
610 cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
611 cmp.eq p9,p0=EVENT_RSM,r24
612 cmp.eq p10,p0=EVENT_SSM,r24
613 cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
614 cmp.eq p12,p0=EVENT_THASH,r24
615 (p6) br.dptk.many kvm_asm_mov_from_ar
616 (p7) br.dptk.many kvm_asm_mov_from_rr
617 (p8) br.dptk.many kvm_asm_mov_to_rr
618 (p9) br.dptk.many kvm_asm_rsm
619 (p10) br.dptk.many kvm_asm_ssm
620 (p11) br.dptk.many kvm_asm_mov_to_psr
621 (p12) br.dptk.many kvm_asm_thash
622 ;;
623kvm_virtualization_fault_back:
624 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
625 ;;
626 ld8 r1 = [r16]
627 ;;
628 mov r19=37
629 adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
630 adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
631 ;;
632 st8 [r16] = r24
633 st8 [r17] = r25
634 ;;
635 cmp.ne p6,p0=EVENT_RFI, r24
636 (p6) br.sptk kvm_dispatch_virtualization_fault
637 ;;
638 adds r18=VMM_VPD_BASE_OFFSET,r21
639 ;;
640 ld8 r18=[r18]
641 ;;
642 adds r18=VMM_VPD_VIFS_OFFSET,r18
643 ;;
644 ld8 r18=[r18]
645 ;;
646 tbit.z p6,p0=r18,63
647 (p6) br.sptk kvm_dispatch_virtualization_fault
648 ;;
649 //if vifs.v=1 desert current register frame
650 alloc r18=ar.pfs,0,0,0,0
651 br.sptk kvm_dispatch_virtualization_fault
652END(kvm_virtualization_fault)
653
654 .org kvm_ia64_ivt+0x6200
655//////////////////////////////////////////////////////////////
656// 0x6200 Entry 38 (size 16 bundles) Reserved
657 KVM_FAULT(38)
658
659 .org kvm_ia64_ivt+0x6300
660/////////////////////////////////////////////////////////////////
661// 0x6300 Entry 39 (size 16 bundles) Reserved
662 KVM_FAULT(39)
663
664 .org kvm_ia64_ivt+0x6400
665/////////////////////////////////////////////////////////////////
666// 0x6400 Entry 40 (size 16 bundles) Reserved
667 KVM_FAULT(40)
668
669 .org kvm_ia64_ivt+0x6500
670//////////////////////////////////////////////////////////////////
671// 0x6500 Entry 41 (size 16 bundles) Reserved
672 KVM_FAULT(41)
673
674 .org kvm_ia64_ivt+0x6600
675//////////////////////////////////////////////////////////////////
676// 0x6600 Entry 42 (size 16 bundles) Reserved
677 KVM_FAULT(42)
678
679 .org kvm_ia64_ivt+0x6700
680//////////////////////////////////////////////////////////////////
681// 0x6700 Entry 43 (size 16 bundles) Reserved
682 KVM_FAULT(43)
683
684 .org kvm_ia64_ivt+0x6800
685//////////////////////////////////////////////////////////////////
686// 0x6800 Entry 44 (size 16 bundles) Reserved
687 KVM_FAULT(44)
688
689 .org kvm_ia64_ivt+0x6900
690///////////////////////////////////////////////////////////////////
691// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
692//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
693ENTRY(kvm_ia32_exception)
694 KVM_FAULT(45)
695END(kvm_ia32_exception)
696
697 .org kvm_ia64_ivt+0x6a00
698////////////////////////////////////////////////////////////////////
699// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
700ENTRY(kvm_ia32_intercept)
701 KVM_FAULT(47)
702END(kvm_ia32_intercept)
703
704 .org kvm_ia64_ivt+0x6c00
705/////////////////////////////////////////////////////////////////////
706// 0x6c00 Entry 48 (size 16 bundles) Reserved
707 KVM_FAULT(48)
708
709 .org kvm_ia64_ivt+0x6d00
710//////////////////////////////////////////////////////////////////////
711// 0x6d00 Entry 49 (size 16 bundles) Reserved
712 KVM_FAULT(49)
713
714 .org kvm_ia64_ivt+0x6e00
715//////////////////////////////////////////////////////////////////////
716// 0x6e00 Entry 50 (size 16 bundles) Reserved
717 KVM_FAULT(50)
718
719 .org kvm_ia64_ivt+0x6f00
720/////////////////////////////////////////////////////////////////////
721// 0x6f00 Entry 51 (size 16 bundles) Reserved
722 KVM_FAULT(52)
723
724 .org kvm_ia64_ivt+0x7100
725////////////////////////////////////////////////////////////////////
726// 0x7100 Entry 53 (size 16 bundles) Reserved
727 KVM_FAULT(53)
728
729 .org kvm_ia64_ivt+0x7200
730/////////////////////////////////////////////////////////////////////
731// 0x7200 Entry 54 (size 16 bundles) Reserved
732 KVM_FAULT(54)
733
734 .org kvm_ia64_ivt+0x7300
735////////////////////////////////////////////////////////////////////
736// 0x7300 Entry 55 (size 16 bundles) Reserved
737 KVM_FAULT(55)
738
739 .org kvm_ia64_ivt+0x7400
740////////////////////////////////////////////////////////////////////
741// 0x7400 Entry 56 (size 16 bundles) Reserved
742 KVM_FAULT(56)
743
744 .org kvm_ia64_ivt+0x7500
745/////////////////////////////////////////////////////////////////////
746// 0x7500 Entry 57 (size 16 bundles) Reserved
747 KVM_FAULT(57)
748
749 .org kvm_ia64_ivt+0x7600
750/////////////////////////////////////////////////////////////////////
751// 0x7600 Entry 58 (size 16 bundles) Reserved
752 KVM_FAULT(58)
753
754 .org kvm_ia64_ivt+0x7700
755////////////////////////////////////////////////////////////////////
756// 0x7700 Entry 59 (size 16 bundles) Reserved
757 KVM_FAULT(59)
758
759 .org kvm_ia64_ivt+0x7800
760////////////////////////////////////////////////////////////////////
761// 0x7800 Entry 60 (size 16 bundles) Reserved
762 KVM_FAULT(60)
763
764 .org kvm_ia64_ivt+0x7900
765/////////////////////////////////////////////////////////////////////
766// 0x7900 Entry 61 (size 16 bundles) Reserved
767 KVM_FAULT(61)
768
769 .org kvm_ia64_ivt+0x7a00
770/////////////////////////////////////////////////////////////////////
771// 0x7a00 Entry 62 (size 16 bundles) Reserved
772 KVM_FAULT(62)
773
774 .org kvm_ia64_ivt+0x7b00
775/////////////////////////////////////////////////////////////////////
776// 0x7b00 Entry 63 (size 16 bundles) Reserved
777 KVM_FAULT(63)
778
779 .org kvm_ia64_ivt+0x7c00
780////////////////////////////////////////////////////////////////////
781// 0x7c00 Entry 64 (size 16 bundles) Reserved
782 KVM_FAULT(64)
783
784 .org kvm_ia64_ivt+0x7d00
785/////////////////////////////////////////////////////////////////////
786// 0x7d00 Entry 65 (size 16 bundles) Reserved
787 KVM_FAULT(65)
788
789 .org kvm_ia64_ivt+0x7e00
790/////////////////////////////////////////////////////////////////////
791// 0x7e00 Entry 66 (size 16 bundles) Reserved
792 KVM_FAULT(66)
793
794 .org kvm_ia64_ivt+0x7f00
795////////////////////////////////////////////////////////////////////
796// 0x7f00 Entry 67 (size 16 bundles) Reserved
797 KVM_FAULT(67)
798
799 .org kvm_ia64_ivt+0x8000
800// There is no particular reason for this code to be here, other than that
801// there happens to be space here that would go unused otherwise. If this
802// fault ever gets "unreserved", simply moved the following code to a more
803// suitable spot...
804
805
806ENTRY(kvm_dtlb_miss_dispatch)
807 mov r19 = 2
808 KVM_SAVE_MIN_WITH_COVER_R19
809 alloc r14=ar.pfs,0,0,3,0
810 mov out0=cr.ifa
811 mov out1=r15
812 adds r3=8,r2 // set up second base pointer
813 ;;
814 ssm psr.ic
815 ;;
816 srlz.i // guarantee that interruption collection is on
817 ;;
818 //(p15) ssm psr.i // restore psr.i
819 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
820 ;;
821 KVM_SAVE_REST
822 KVM_SAVE_EXTRA
823 mov rp=r14
824 ;;
825 adds out2=16,r12
826 br.call.sptk.many b6=kvm_page_fault
827END(kvm_dtlb_miss_dispatch)
828
829ENTRY(kvm_itlb_miss_dispatch)
830
831 KVM_SAVE_MIN_WITH_COVER_R19
832 alloc r14=ar.pfs,0,0,3,0
833 mov out0=cr.ifa
834 mov out1=r15
835 adds r3=8,r2 // set up second base pointer
836 ;;
837 ssm psr.ic
838 ;;
839 srlz.i // guarantee that interruption collection is on
840 ;;
841 //(p15) ssm psr.i // restore psr.i
842 addl r14=@gprel(ia64_leave_hypervisor),gp
843 ;;
844 KVM_SAVE_REST
845 mov rp=r14
846 ;;
847 adds out2=16,r12
848 br.call.sptk.many b6=kvm_page_fault
849END(kvm_itlb_miss_dispatch)
850
851ENTRY(kvm_dispatch_reflection)
852 /*
853 * Input:
854 * psr.ic: off
855 * r19: intr type (offset into ivt, see ia64_int.h)
856 * r31: contains saved predicates (pr)
857 */
858 KVM_SAVE_MIN_WITH_COVER_R19
859 alloc r14=ar.pfs,0,0,5,0
860 mov out0=cr.ifa
861 mov out1=cr.isr
862 mov out2=cr.iim
863 mov out3=r15
864 adds r3=8,r2 // set up second base pointer
865 ;;
866 ssm psr.ic
867 ;;
868 srlz.i // guarantee that interruption collection is on
869 ;;
870 //(p15) ssm psr.i // restore psr.i
871 addl r14=@gprel(ia64_leave_hypervisor),gp
872 ;;
873 KVM_SAVE_REST
874 mov rp=r14
875 ;;
876 adds out4=16,r12
877 br.call.sptk.many b6=reflect_interruption
878END(kvm_dispatch_reflection)
879
880ENTRY(kvm_dispatch_virtualization_fault)
881 adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
882 adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
883 ;;
884 st8 [r16] = r24
885 st8 [r17] = r25
886 ;;
887 KVM_SAVE_MIN_WITH_COVER_R19
888 ;;
889 alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!)
890 mov out0=r13 //vcpu
891 adds r3=8,r2 // set up second base pointer
892 ;;
893 ssm psr.ic
894 ;;
895 srlz.i // guarantee that interruption collection is on
896 ;;
897 //(p15) ssm psr.i // restore psr.i
898 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
899 ;;
900 KVM_SAVE_REST
901 KVM_SAVE_EXTRA
902 mov rp=r14
903 ;;
904 adds out1=16,sp //regs
905 br.call.sptk.many b6=kvm_emulate
906END(kvm_dispatch_virtualization_fault)
907
908
909ENTRY(kvm_dispatch_interrupt)
910 KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3
911 ;;
912 alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
913 //mov out0=cr.ivr // pass cr.ivr as first arg
914 adds r3=8,r2 // set up second base pointer for SAVE_REST
915 ;;
916 ssm psr.ic
917 ;;
918 srlz.i
919 ;;
920 //(p15) ssm psr.i
921 addl r14=@gprel(ia64_leave_hypervisor),gp
922 ;;
923 KVM_SAVE_REST
924 mov rp=r14
925 ;;
926 mov out0=r13 // pass pointer to pt_regs as second arg
927 br.call.sptk.many b6=kvm_ia64_handle_irq
928END(kvm_dispatch_interrupt)
929
930
931
932
933GLOBAL_ENTRY(ia64_leave_nested)
934 rsm psr.i
935 ;;
936 adds r21=PT(PR)+16,r12
937 ;;
938 lfetch [r21],PT(CR_IPSR)-PT(PR)
939 adds r2=PT(B6)+16,r12
940 adds r3=PT(R16)+16,r12
941 ;;
942 lfetch [r21]
943 ld8 r28=[r2],8 // load b6
944 adds r29=PT(R24)+16,r12
945
946 ld8.fill r16=[r3]
947 adds r3=PT(AR_CSD)-PT(R16),r3
948 adds r30=PT(AR_CCV)+16,r12
949 ;;
950 ld8.fill r24=[r29]
951 ld8 r15=[r30] // load ar.ccv
952 ;;
953 ld8 r29=[r2],16 // load b7
954 ld8 r30=[r3],16 // load ar.csd
955 ;;
956 ld8 r31=[r2],16 // load ar.ssd
957 ld8.fill r8=[r3],16
958 ;;
959 ld8.fill r9=[r2],16
960 ld8.fill r10=[r3],PT(R17)-PT(R10)
961 ;;
962 ld8.fill r11=[r2],PT(R18)-PT(R11)
963 ld8.fill r17=[r3],16
964 ;;
965 ld8.fill r18=[r2],16
966 ld8.fill r19=[r3],16
967 ;;
968 ld8.fill r20=[r2],16
969 ld8.fill r21=[r3],16
970 mov ar.csd=r30
971 mov ar.ssd=r31
972 ;;
973 rsm psr.i | psr.ic
974 // initiate turning off of interrupt and interruption collection
975 invala // invalidate ALAT
976 ;;
977 srlz.i
978 ;;
979 ld8.fill r22=[r2],24
980 ld8.fill r23=[r3],24
981 mov b6=r28
982 ;;
983 ld8.fill r25=[r2],16
984 ld8.fill r26=[r3],16
985 mov b7=r29
986 ;;
987 ld8.fill r27=[r2],16
988 ld8.fill r28=[r3],16
989 ;;
990 ld8.fill r29=[r2],16
991 ld8.fill r30=[r3],24
992 ;;
993 ld8.fill r31=[r2],PT(F9)-PT(R31)
994 adds r3=PT(F10)-PT(F6),r3
995 ;;
996 ldf.fill f9=[r2],PT(F6)-PT(F9)
997 ldf.fill f10=[r3],PT(F8)-PT(F10)
998 ;;
999 ldf.fill f6=[r2],PT(F7)-PT(F6)
1000 ;;
1001 ldf.fill f7=[r2],PT(F11)-PT(F7)
1002 ldf.fill f8=[r3],32
1003 ;;
1004 srlz.i // ensure interruption collection is off
1005 mov ar.ccv=r15
1006 ;;
1007 bsw.0 // switch back to bank 0 (no stop bit required beforehand...)
1008 ;;
1009 ldf.fill f11=[r2]
1010// mov r18=r13
1011// mov r21=r13
1012 adds r16=PT(CR_IPSR)+16,r12
1013 adds r17=PT(CR_IIP)+16,r12
1014 ;;
1015 ld8 r29=[r16],16 // load cr.ipsr
1016 ld8 r28=[r17],16 // load cr.iip
1017 ;;
1018 ld8 r30=[r16],16 // load cr.ifs
1019 ld8 r25=[r17],16 // load ar.unat
1020 ;;
1021 ld8 r26=[r16],16 // load ar.pfs
1022 ld8 r27=[r17],16 // load ar.rsc
1023 cmp.eq p9,p0=r0,r0
1024 // set p9 to indicate that we should restore cr.ifs
1025 ;;
1026 ld8 r24=[r16],16 // load ar.rnat (may be garbage)
1027 ld8 r23=[r17],16// load ar.bspstore (may be garbage)
1028 ;;
1029 ld8 r31=[r16],16 // load predicates
1030 ld8 r22=[r17],16 // load b0
1031 ;;
1032 ld8 r19=[r16],16 // load ar.rsc value for "loadrs"
1033 ld8.fill r1=[r17],16 // load r1
1034 ;;
1035 ld8.fill r12=[r16],16
1036 ld8.fill r13=[r17],16
1037 ;;
1038 ld8 r20=[r16],16 // ar.fpsr
1039 ld8.fill r15=[r17],16
1040 ;;
1041 ld8.fill r14=[r16],16
1042 ld8.fill r2=[r17]
1043 ;;
1044 ld8.fill r3=[r16]
1045 ;;
1046 mov r16=ar.bsp // get existing backing store pointer
1047 ;;
1048 mov b0=r22
1049 mov ar.pfs=r26
1050 mov cr.ifs=r30
1051 mov cr.ipsr=r29
1052 mov ar.fpsr=r20
1053 mov cr.iip=r28
1054 ;;
1055 mov ar.rsc=r27
1056 mov ar.unat=r25
1057 mov pr=r31,-1
1058 rfi
1059END(ia64_leave_nested)
1060
1061
1062
1063GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
1064 /*
1065 * work.need_resched etc. mustn't get changed
1066 *by this CPU before it returns to
1067 ;;
1068 * user- or fsys-mode, hence we disable interrupts early on:
1069 */
1070 adds r2 = PT(R4)+16,r12
1071 adds r3 = PT(R5)+16,r12
1072 adds r8 = PT(EML_UNAT)+16,r12
1073 ;;
1074 ld8 r8 = [r8]
1075 ;;
1076 mov ar.unat=r8
1077 ;;
1078 ld8.fill r4=[r2],16 //load r4
1079 ld8.fill r5=[r3],16 //load r5
1080 ;;
1081 ld8.fill r6=[r2] //load r6
1082 ld8.fill r7=[r3] //load r7
1083 ;;
1084END(ia64_leave_hypervisor_prepare)
1085//fall through
1086GLOBAL_ENTRY(ia64_leave_hypervisor)
1087 rsm psr.i
1088 ;;
1089 br.call.sptk.many b0=leave_hypervisor_tail
1090 ;;
1091 adds r20=PT(PR)+16,r12
1092 adds r8=PT(EML_UNAT)+16,r12
1093 ;;
1094 ld8 r8=[r8]
1095 ;;
1096 mov ar.unat=r8
1097 ;;
1098 lfetch [r20],PT(CR_IPSR)-PT(PR)
1099 adds r2 = PT(B6)+16,r12
1100 adds r3 = PT(B7)+16,r12
1101 ;;
1102 lfetch [r20]
1103 ;;
1104 ld8 r24=[r2],16 /* B6 */
1105 ld8 r25=[r3],16 /* B7 */
1106 ;;
1107 ld8 r26=[r2],16 /* ar_csd */
1108 ld8 r27=[r3],16 /* ar_ssd */
1109 mov b6 = r24
1110 ;;
1111 ld8.fill r8=[r2],16
1112 ld8.fill r9=[r3],16
1113 mov b7 = r25
1114 ;;
1115 mov ar.csd = r26
1116 mov ar.ssd = r27
1117 ;;
1118 ld8.fill r10=[r2],PT(R15)-PT(R10)
1119 ld8.fill r11=[r3],PT(R14)-PT(R11)
1120 ;;
1121 ld8.fill r15=[r2],PT(R16)-PT(R15)
1122 ld8.fill r14=[r3],PT(R17)-PT(R14)
1123 ;;
1124 ld8.fill r16=[r2],16
1125 ld8.fill r17=[r3],16
1126 ;;
1127 ld8.fill r18=[r2],16
1128 ld8.fill r19=[r3],16
1129 ;;
1130 ld8.fill r20=[r2],16
1131 ld8.fill r21=[r3],16
1132 ;;
1133 ld8.fill r22=[r2],16
1134 ld8.fill r23=[r3],16
1135 ;;
1136 ld8.fill r24=[r2],16
1137 ld8.fill r25=[r3],16
1138 ;;
1139 ld8.fill r26=[r2],16
1140 ld8.fill r27=[r3],16
1141 ;;
1142 ld8.fill r28=[r2],16
1143 ld8.fill r29=[r3],16
1144 ;;
1145 ld8.fill r30=[r2],PT(F6)-PT(R30)
1146 ld8.fill r31=[r3],PT(F7)-PT(R31)
1147 ;;
1148 rsm psr.i | psr.ic
1149 // initiate turning off of interrupt and interruption collection
1150 invala // invalidate ALAT
1151 ;;
1152 srlz.i // ensure interruption collection is off
1153 ;;
1154 bsw.0
1155 ;;
1156 adds r16 = PT(CR_IPSR)+16,r12
1157 adds r17 = PT(CR_IIP)+16,r12
1158 mov r21=r13 // get current
1159 ;;
1160 ld8 r31=[r16],16 // load cr.ipsr
1161 ld8 r30=[r17],16 // load cr.iip
1162 ;;
1163 ld8 r29=[r16],16 // load cr.ifs
1164 ld8 r28=[r17],16 // load ar.unat
1165 ;;
1166 ld8 r27=[r16],16 // load ar.pfs
1167 ld8 r26=[r17],16 // load ar.rsc
1168 ;;
1169 ld8 r25=[r16],16 // load ar.rnat
1170 ld8 r24=[r17],16 // load ar.bspstore
1171 ;;
1172 ld8 r23=[r16],16 // load predicates
1173 ld8 r22=[r17],16 // load b0
1174 ;;
1175 ld8 r20=[r16],16 // load ar.rsc value for "loadrs"
1176 ld8.fill r1=[r17],16 //load r1
1177 ;;
1178 ld8.fill r12=[r16],16 //load r12
1179 ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13
1180 ;;
1181 ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr
1182 ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2
1183 ;;
1184 ld8.fill r3=[r16] //load r3
1185 ld8 r18=[r17] //load ar_ccv
1186 ;;
1187 mov ar.fpsr=r19
1188 mov ar.ccv=r18
1189 shr.u r18=r20,16
1190 ;;
1191kvm_rbs_switch:
1192 mov r19=96
1193
1194kvm_dont_preserve_current_frame:
1195/*
1196 * To prevent leaking bits between the hypervisor and guest domain,
1197 * we must clear the stacked registers in the "invalid" partition here.
1198 * 5 registers/cycle on McKinley).
1199 */
1200# define pRecurse p6
1201# define pReturn p7
1202# define Nregs 14
1203
1204 alloc loc0=ar.pfs,2,Nregs-2,2,0
1205 shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8))
1206 sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize
1207 ;;
1208 mov ar.rsc=r20 // load ar.rsc to be used for "loadrs"
1209 shladd in0=loc1,3,r19
1210 mov in1=0
1211 ;;
1212 TEXT_ALIGN(32)
1213kvm_rse_clear_invalid:
1214 alloc loc0=ar.pfs,2,Nregs-2,2,0
1215 cmp.lt pRecurse,p0=Nregs*8,in0
1216 // if more than Nregs regs left to clear, (re)curse
1217 add out0=-Nregs*8,in0
1218 add out1=1,in1 // increment recursion count
1219 mov loc1=0
1220 mov loc2=0
1221 ;;
1222 mov loc3=0
1223 mov loc4=0
1224 mov loc5=0
1225 mov loc6=0
1226 mov loc7=0
1227(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
1228 ;;
1229 mov loc8=0
1230 mov loc9=0
1231 cmp.ne pReturn,p0=r0,in1
1232 // if recursion count != 0, we need to do a br.ret
1233 mov loc10=0
1234 mov loc11=0
1235(pReturn) br.ret.dptk.many b0
1236
1237# undef pRecurse
1238# undef pReturn
1239
1240// loadrs has already been shifted
1241 alloc r16=ar.pfs,0,0,0,0 // drop current register frame
1242 ;;
1243 loadrs
1244 ;;
1245 mov ar.bspstore=r24
1246 ;;
1247 mov ar.unat=r28
1248 mov ar.rnat=r25
1249 mov ar.rsc=r26
1250 ;;
1251 mov cr.ipsr=r31
1252 mov cr.iip=r30
1253 mov cr.ifs=r29
1254 mov ar.pfs=r27
1255 adds r18=VMM_VPD_BASE_OFFSET,r21
1256 ;;
1257 ld8 r18=[r18] //vpd
1258 adds r17=VMM_VCPU_ISR_OFFSET,r21
1259 ;;
1260 ld8 r17=[r17]
1261 adds r19=VMM_VPD_VPSR_OFFSET,r18
1262 ;;
1263 ld8 r19=[r19] //vpsr
1264 adds r20=VMM_VCPU_VSA_BASE_OFFSET,r21
1265 ;;
1266 ld8 r20=[r20]
1267 ;;
1268//vsa_sync_write_start
1269 mov r25=r18
1270 adds r16= VMM_VCPU_GP_OFFSET,r21
1271 ;;
1272 ld8 r16= [r16] // Put gp in r24
1273 movl r24=@gprel(ia64_vmm_entry) // calculate return address
1274 ;;
1275 add r24=r24,r16
1276 ;;
1277 add r16=PAL_VPS_SYNC_WRITE,r20
1278 ;;
1279 mov b0=r16
1280 br.cond.sptk b0 // call the service
1281 ;;
1282END(ia64_leave_hypervisor)
1283// fall through
1284GLOBAL_ENTRY(ia64_vmm_entry)
1285/*
1286 * must be at bank 0
1287 * parameter:
1288 * r17:cr.isr
1289 * r18:vpd
1290 * r19:vpsr
1291 * r20:__vsa_base
1292 * r22:b0
1293 * r23:predicate
1294 */
1295 mov r24=r22
1296 mov r25=r18
1297 tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic
1298 ;;
1299 (p1) add r29=PAL_VPS_RESUME_NORMAL,r20
1300 (p1) br.sptk.many ia64_vmm_entry_out
1301 ;;
1302 tbit.nz p1,p2 = r17,IA64_ISR_IR_BIT //p1=cr.isr.ir
1303 ;;
1304 (p1) add r29=PAL_VPS_RESUME_NORMAL,r20
1305 (p2) add r29=PAL_VPS_RESUME_HANDLER,r20
1306 (p2) ld8 r26=[r25]
1307 ;;
1308ia64_vmm_entry_out:
1309 mov pr=r23,-2
1310 mov b0=r29
1311 ;;
1312 br.cond.sptk b0 // call pal service
1313END(ia64_vmm_entry)
1314
1315
1316
1317/*
1318 * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
1319 * u64 arg3, u64 arg4, u64 arg5,
1320 * u64 arg6, u64 arg7);
1321 *
1322 * XXX: The currently defined services use only 4 args at the max. The
1323 * rest are not consumed.
1324 */
1325GLOBAL_ENTRY(ia64_call_vsa)
1326 .regstk 4,4,0,0
1327
1328rpsave = loc0
1329pfssave = loc1
1330psrsave = loc2
1331entry = loc3
1332hostret = r24
1333
1334 alloc pfssave=ar.pfs,4,4,0,0
1335 mov rpsave=rp
1336 adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
1337 ;;
1338 ld8 entry=[entry]
13391: mov hostret=ip
1340 mov r25=in1 // copy arguments
1341 mov r26=in2
1342 mov r27=in3
1343 mov psrsave=psr
1344 ;;
1345 tbit.nz p6,p0=psrsave,14 // IA64_PSR_I
1346 tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC
1347 ;;
1348 add hostret=2f-1b,hostret // calculate return address
1349 add entry=entry,in0
1350 ;;
1351 rsm psr.i | psr.ic
1352 ;;
1353 srlz.i
1354 mov b6=entry
1355 br.cond.sptk b6 // call the service
13562:
1357 // Architectural sequence for enabling interrupts if necessary
1358(p7) ssm psr.ic
1359 ;;
1360(p7) srlz.i
1361 ;;
1362//(p6) ssm psr.i
1363 ;;
1364 mov rp=rpsave
1365 mov ar.pfs=pfssave
1366 mov r8=r31
1367 ;;
1368 srlz.d
1369 br.ret.sptk rp
1370
1371END(ia64_call_vsa)
1372
1373#define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100)
1374
1375GLOBAL_ENTRY(vmm_reset_entry)
1376 //set up ipsr, iip, vpd.vpsr, dcr
1377 // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
1378 // For DCR: all bits 0
1379 adds r14=-VMM_PT_REGS_SIZE, r12
1380 ;;
1381 movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
1382 movl r10=0x8000000000000000
1383 adds r16=PT(CR_IIP), r14
1384 adds r20=PT(R1), r14
1385 ;;
1386 rsm psr.ic | psr.i
1387 ;;
1388 srlz.i
1389 ;;
1390 bsw.0
1391 ;;
1392 mov r21 =r13
1393 ;;
1394 bsw.1
1395 ;;
1396 mov ar.rsc = 0
1397 ;;
1398 flushrs
1399 ;;
1400 mov ar.bspstore = 0
1401 // clear BSPSTORE
1402 ;;
1403 mov cr.ipsr=r6
1404 mov cr.ifs=r10
1405 ld8 r4 = [r16] // Set init iip for first run.
1406 ld8 r1 = [r20]
1407 ;;
1408 mov cr.iip=r4
1409 ;;
1410 adds r16=VMM_VPD_BASE_OFFSET,r13
1411 adds r20=VMM_VCPU_VSA_BASE_OFFSET,r13
1412 ;;
1413 ld8 r18=[r16]
1414 ld8 r20=[r20]
1415 ;;
1416 adds r19=VMM_VPD_VPSR_OFFSET,r18
1417 ;;
1418 ld8 r19=[r19]
1419 mov r17=r0
1420 mov r22=r0
1421 mov r23=r0
1422 br.cond.sptk ia64_vmm_entry
1423 br.ret.sptk b0
1424END(vmm_reset_entry)
diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h
new file mode 100644
index 000000000000..f6c5617e16af
--- /dev/null
+++ b/arch/ia64/kvm/vti.h
@@ -0,0 +1,290 @@
1/*
2 * vti.h: prototype for generial vt related interface
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
6 * Fred Yang (fred.yang@intel.com)
7 * Kun Tian (Kevin Tian) (kevin.tian@intel.com)
8 *
9 * Copyright (c) 2007, Intel Corporation.
10 * Zhang xiantao <xiantao.zhang@intel.com>
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms and conditions of the GNU General Public License,
14 * version 2, as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 * more details.
20 *
21 * You should have received a copy of the GNU General Public License along with
22 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
23 * Place - Suite 330, Boston, MA 02111-1307 USA.
24 */
25#ifndef _KVM_VT_I_H
26#define _KVM_VT_I_H
27
28#ifndef __ASSEMBLY__
29#include <asm/page.h>
30
31#include <linux/kvm_host.h>
32
33/* define itr.i and itr.d in ia64_itr function */
34#define ITR 0x01
35#define DTR 0x02
36#define IaDTR 0x03
37
38#define IA64_TR_VMM 6 /*itr6, dtr6 : maps vmm code, vmbuffer*/
39#define IA64_TR_VM_DATA 7 /*dtr7 : maps current vm data*/
40
41#define RR6 (6UL<<61)
42#define RR7 (7UL<<61)
43
44
45/* config_options in pal_vp_init_env */
46#define VP_INITIALIZE 1UL
47#define VP_FR_PMC 1UL<<1
48#define VP_OPCODE 1UL<<8
49#define VP_CAUSE 1UL<<9
50#define VP_FW_ACC 1UL<<63
51
52/* init vp env with initializing vm_buffer */
53#define VP_INIT_ENV_INITALIZE (VP_INITIALIZE | VP_FR_PMC |\
54 VP_OPCODE | VP_CAUSE | VP_FW_ACC)
55/* init vp env without initializing vm_buffer */
56#define VP_INIT_ENV VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC
57
58#define PAL_VP_CREATE 265
59/* Stacked Virt. Initializes a new VPD for the operation of
60 * a new virtual processor in the virtual environment.
61 */
62#define PAL_VP_ENV_INFO 266
63/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/
64#define PAL_VP_EXIT_ENV 267
65/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/
66#define PAL_VP_INIT_ENV 268
67/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/
68#define PAL_VP_REGISTER 269
69/*Stacked Virt. Register a different host IVT for the virtual processor.*/
70#define PAL_VP_RESUME 270
71/* Renamed from PAL_VP_RESUME */
72#define PAL_VP_RESTORE 270
73/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/
74#define PAL_VP_SUSPEND 271
75/* Renamed from PAL_VP_SUSPEND */
76#define PAL_VP_SAVE 271
77/* Stacked Virt. Suspends operation for the specified virtual processor on
78 * the logical processor.
79 */
80#define PAL_VP_TERMINATE 272
81/* Stacked Virt. Terminates operation for the specified virtual processor.*/
82
83union vac {
84 unsigned long value;
85 struct {
86 int a_int:1;
87 int a_from_int_cr:1;
88 int a_to_int_cr:1;
89 int a_from_psr:1;
90 int a_from_cpuid:1;
91 int a_cover:1;
92 int a_bsw:1;
93 long reserved:57;
94 };
95};
96
97union vdc {
98 unsigned long value;
99 struct {
100 int d_vmsw:1;
101 int d_extint:1;
102 int d_ibr_dbr:1;
103 int d_pmc:1;
104 int d_to_pmd:1;
105 int d_itm:1;
106 long reserved:58;
107 };
108};
109
110struct vpd {
111 union vac vac;
112 union vdc vdc;
113 unsigned long virt_env_vaddr;
114 unsigned long reserved1[29];
115 unsigned long vhpi;
116 unsigned long reserved2[95];
117 unsigned long vgr[16];
118 unsigned long vbgr[16];
119 unsigned long vnat;
120 unsigned long vbnat;
121 unsigned long vcpuid[5];
122 unsigned long reserved3[11];
123 unsigned long vpsr;
124 unsigned long vpr;
125 unsigned long reserved4[76];
126 union {
127 unsigned long vcr[128];
128 struct {
129 unsigned long dcr;
130 unsigned long itm;
131 unsigned long iva;
132 unsigned long rsv1[5];
133 unsigned long pta;
134 unsigned long rsv2[7];
135 unsigned long ipsr;
136 unsigned long isr;
137 unsigned long rsv3;
138 unsigned long iip;
139 unsigned long ifa;
140 unsigned long itir;
141 unsigned long iipa;
142 unsigned long ifs;
143 unsigned long iim;
144 unsigned long iha;
145 unsigned long rsv4[38];
146 unsigned long lid;
147 unsigned long ivr;
148 unsigned long tpr;
149 unsigned long eoi;
150 unsigned long irr[4];
151 unsigned long itv;
152 unsigned long pmv;
153 unsigned long cmcv;
154 unsigned long rsv5[5];
155 unsigned long lrr0;
156 unsigned long lrr1;
157 unsigned long rsv6[46];
158 };
159 };
160 unsigned long reserved5[128];
161 unsigned long reserved6[3456];
162 unsigned long vmm_avail[128];
163 unsigned long reserved7[4096];
164};
165
166#define PAL_PROC_VM_BIT (1UL << 40)
167#define PAL_PROC_VMSW_BIT (1UL << 54)
168
169static inline s64 ia64_pal_vp_env_info(u64 *buffer_size,
170 u64 *vp_env_info)
171{
172 struct ia64_pal_retval iprv;
173 PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0);
174 *buffer_size = iprv.v0;
175 *vp_env_info = iprv.v1;
176 return iprv.status;
177}
178
179static inline s64 ia64_pal_vp_exit_env(u64 iva)
180{
181 struct ia64_pal_retval iprv;
182
183 PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0);
184 return iprv.status;
185}
186
187static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr,
188 u64 vbase_addr, u64 *vsa_base)
189{
190 struct ia64_pal_retval iprv;
191
192 PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr,
193 vbase_addr);
194 *vsa_base = iprv.v0;
195
196 return iprv.status;
197}
198
199static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector)
200{
201 struct ia64_pal_retval iprv;
202
203 PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0);
204
205 return iprv.status;
206}
207
208static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector)
209{
210 struct ia64_pal_retval iprv;
211
212 PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0);
213
214 return iprv.status;
215}
216
217#endif
218
219/*VPD field offset*/
220#define VPD_VAC_START_OFFSET 0
221#define VPD_VDC_START_OFFSET 8
222#define VPD_VHPI_START_OFFSET 256
223#define VPD_VGR_START_OFFSET 1024
224#define VPD_VBGR_START_OFFSET 1152
225#define VPD_VNAT_START_OFFSET 1280
226#define VPD_VBNAT_START_OFFSET 1288
227#define VPD_VCPUID_START_OFFSET 1296
228#define VPD_VPSR_START_OFFSET 1424
229#define VPD_VPR_START_OFFSET 1432
230#define VPD_VRSE_CFLE_START_OFFSET 1440
231#define VPD_VCR_START_OFFSET 2048
232#define VPD_VTPR_START_OFFSET 2576
233#define VPD_VRR_START_OFFSET 3072
234#define VPD_VMM_VAIL_START_OFFSET 31744
235
236/*Virtualization faults*/
237
238#define EVENT_MOV_TO_AR 1
239#define EVENT_MOV_TO_AR_IMM 2
240#define EVENT_MOV_FROM_AR 3
241#define EVENT_MOV_TO_CR 4
242#define EVENT_MOV_FROM_CR 5
243#define EVENT_MOV_TO_PSR 6
244#define EVENT_MOV_FROM_PSR 7
245#define EVENT_ITC_D 8
246#define EVENT_ITC_I 9
247#define EVENT_MOV_TO_RR 10
248#define EVENT_MOV_TO_DBR 11
249#define EVENT_MOV_TO_IBR 12
250#define EVENT_MOV_TO_PKR 13
251#define EVENT_MOV_TO_PMC 14
252#define EVENT_MOV_TO_PMD 15
253#define EVENT_ITR_D 16
254#define EVENT_ITR_I 17
255#define EVENT_MOV_FROM_RR 18
256#define EVENT_MOV_FROM_DBR 19
257#define EVENT_MOV_FROM_IBR 20
258#define EVENT_MOV_FROM_PKR 21
259#define EVENT_MOV_FROM_PMC 22
260#define EVENT_MOV_FROM_CPUID 23
261#define EVENT_SSM 24
262#define EVENT_RSM 25
263#define EVENT_PTC_L 26
264#define EVENT_PTC_G 27
265#define EVENT_PTC_GA 28
266#define EVENT_PTR_D 29
267#define EVENT_PTR_I 30
268#define EVENT_THASH 31
269#define EVENT_TTAG 32
270#define EVENT_TPA 33
271#define EVENT_TAK 34
272#define EVENT_PTC_E 35
273#define EVENT_COVER 36
274#define EVENT_RFI 37
275#define EVENT_BSW_0 38
276#define EVENT_BSW_1 39
277#define EVENT_VMSW 40
278
279/**PAL virtual services offsets */
280#define PAL_VPS_RESUME_NORMAL 0x0000
281#define PAL_VPS_RESUME_HANDLER 0x0400
282#define PAL_VPS_SYNC_READ 0x0800
283#define PAL_VPS_SYNC_WRITE 0x0c00
284#define PAL_VPS_SET_PENDING_INTERRUPT 0x1000
285#define PAL_VPS_THASH 0x1400
286#define PAL_VPS_TTAG 0x1800
287#define PAL_VPS_RESTORE 0x1c00
288#define PAL_VPS_SAVE 0x2000
289
290#endif/* _VT_I_H*/
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
new file mode 100644
index 000000000000..def4576d22b1
--- /dev/null
+++ b/arch/ia64/kvm/vtlb.c
@@ -0,0 +1,636 @@
1/*
2 * vtlb.c: guest virtual tlb handling module.
3 * Copyright (c) 2004, Intel Corporation.
4 * Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com>
5 * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
6 *
7 * Copyright (c) 2007, Intel Corporation.
8 * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
9 * Xiantao Zhang <xiantao.zhang@intel.com>
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms and conditions of the GNU General Public License,
13 * version 2, as published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 * more details.
19 *
20 * You should have received a copy of the GNU General Public License along with
21 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
22 * Place - Suite 330, Boston, MA 02111-1307 USA.
23 *
24 */
25
26#include "vcpu.h"
27
28#include <linux/rwsem.h>
29
30#include <asm/tlb.h>
31
32/*
33 * Check to see if the address rid:va is translated by the TLB
34 */
35
36static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va)
37{
38 return ((trp->p) && (trp->rid == rid)
39 && ((va-trp->vadr) < PSIZE(trp->ps)));
40}
41
42/*
43 * Only for GUEST TR format.
44 */
45static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva)
46{
47 u64 sa1, ea1;
48
49 if (!trp->p || trp->rid != rid)
50 return 0;
51
52 sa1 = trp->vadr;
53 ea1 = sa1 + PSIZE(trp->ps) - 1;
54 eva -= 1;
55 if ((sva > ea1) || (sa1 > eva))
56 return 0;
57 else
58 return 1;
59
60}
61
62void machine_tlb_purge(u64 va, u64 ps)
63{
64 ia64_ptcl(va, ps << 2);
65}
66
67void local_flush_tlb_all(void)
68{
69 int i, j;
70 unsigned long flags, count0, count1;
71 unsigned long stride0, stride1, addr;
72
73 addr = current_vcpu->arch.ptce_base;
74 count0 = current_vcpu->arch.ptce_count[0];
75 count1 = current_vcpu->arch.ptce_count[1];
76 stride0 = current_vcpu->arch.ptce_stride[0];
77 stride1 = current_vcpu->arch.ptce_stride[1];
78
79 local_irq_save(flags);
80 for (i = 0; i < count0; ++i) {
81 for (j = 0; j < count1; ++j) {
82 ia64_ptce(addr);
83 addr += stride1;
84 }
85 addr += stride0;
86 }
87 local_irq_restore(flags);
88 ia64_srlz_i(); /* srlz.i implies srlz.d */
89}
90
91int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref)
92{
93 union ia64_rr vrr;
94 union ia64_pta vpta;
95 struct ia64_psr vpsr;
96
97 vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
98 vrr.val = vcpu_get_rr(vcpu, vadr);
99 vpta.val = vcpu_get_pta(vcpu);
100
101 if (vrr.ve & vpta.ve) {
102 switch (ref) {
103 case DATA_REF:
104 case NA_REF:
105 return vpsr.dt;
106 case INST_REF:
107 return vpsr.dt && vpsr.it && vpsr.ic;
108 case RSE_REF:
109 return vpsr.dt && vpsr.rt;
110
111 }
112 }
113 return 0;
114}
115
116struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag)
117{
118 u64 index, pfn, rid, pfn_bits;
119
120 pfn_bits = vpta.size - 5 - 8;
121 pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr);
122 rid = _REGION_ID(vrr);
123 index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1));
124 *tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16);
125
126 return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) +
127 (index << 5));
128}
129
130struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type)
131{
132
133 struct thash_data *trp;
134 int i;
135 u64 rid;
136
137 rid = vcpu_get_rr(vcpu, va);
138 rid = rid & RR_RID_MASK;;
139 if (type == D_TLB) {
140 if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
141 for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
142 i < NDTRS; i++, trp++) {
143 if (__is_tr_translated(trp, rid, va))
144 return trp;
145 }
146 }
147 } else {
148 if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
149 for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
150 i < NITRS; i++, trp++) {
151 if (__is_tr_translated(trp, rid, va))
152 return trp;
153 }
154 }
155 }
156
157 return NULL;
158}
159
160static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte)
161{
162 union ia64_rr rr;
163 struct thash_data *head;
164 unsigned long ps, gpaddr;
165
166 ps = itir_ps(itir);
167
168 gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
169 (ifa & ((1UL << ps) - 1));
170
171 rr.val = ia64_get_rr(ifa);
172 head = (struct thash_data *)ia64_thash(ifa);
173 head->etag = INVALID_TI_TAG;
174 ia64_mf();
175 head->page_flags = pte & ~PAGE_FLAGS_RV_MASK;
176 head->itir = rr.ps << 2;
177 head->etag = ia64_ttag(ifa);
178 head->gpaddr = gpaddr;
179}
180
181void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
182{
183 u64 i, dirty_pages = 1;
184 u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
185 spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
186 void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE)
187 + KVM_MEM_DIRTY_LOG_OFS;
188 dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
189
190 vmm_spin_lock(lock);
191 for (i = 0; i < dirty_pages; i++) {
192 /* avoid RMW */
193 if (!test_bit(base_gfn + i, dirty_bitmap))
194 set_bit(base_gfn + i , dirty_bitmap);
195 }
196 vmm_spin_unlock(lock);
197}
198
199void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type)
200{
201 u64 phy_pte, psr;
202 union ia64_rr mrr;
203
204 mrr.val = ia64_get_rr(va);
205 phy_pte = translate_phy_pte(&pte, itir, va);
206
207 if (itir_ps(itir) >= mrr.ps) {
208 vhpt_insert(phy_pte, itir, va, pte);
209 } else {
210 phy_pte &= ~PAGE_FLAGS_RV_MASK;
211 psr = ia64_clear_ic();
212 ia64_itc(type, va, phy_pte, itir_ps(itir));
213 ia64_set_psr(psr);
214 }
215
216 if (!(pte&VTLB_PTE_IO))
217 mark_pages_dirty(v, pte, itir_ps(itir));
218}
219
220/*
221 * vhpt lookup
222 */
223struct thash_data *vhpt_lookup(u64 va)
224{
225 struct thash_data *head;
226 u64 tag;
227
228 head = (struct thash_data *)ia64_thash(va);
229 tag = ia64_ttag(va);
230 if (head->etag == tag)
231 return head;
232 return NULL;
233}
234
235u64 guest_vhpt_lookup(u64 iha, u64 *pte)
236{
237 u64 ret;
238 struct thash_data *data;
239
240 data = __vtr_lookup(current_vcpu, iha, D_TLB);
241 if (data != NULL)
242 thash_vhpt_insert(current_vcpu, data->page_flags,
243 data->itir, iha, D_TLB);
244
245 asm volatile ("rsm psr.ic|psr.i;;"
246 "srlz.d;;"
247 "ld8.s r9=[%1];;"
248 "tnat.nz p6,p7=r9;;"
249 "(p6) mov %0=1;"
250 "(p6) mov r9=r0;"
251 "(p7) extr.u r9=r9,0,53;;"
252 "(p7) mov %0=r0;"
253 "(p7) st8 [%2]=r9;;"
254 "ssm psr.ic;;"
255 "srlz.d;;"
256 /* "ssm psr.i;;" Once interrupts in vmm open, need fix*/
257 : "=r"(ret) : "r"(iha), "r"(pte):"memory");
258
259 return ret;
260}
261
262/*
263 * purge software guest tlb
264 */
265
266static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps)
267{
268 struct thash_data *cur;
269 u64 start, curadr, size, psbits, tag, rr_ps, num;
270 union ia64_rr vrr;
271 struct thash_cb *hcb = &v->arch.vtlb;
272
273 vrr.val = vcpu_get_rr(v, va);
274 psbits = VMX(v, psbits[(va >> 61)]);
275 start = va & ~((1UL << ps) - 1);
276 while (psbits) {
277 curadr = start;
278 rr_ps = __ffs(psbits);
279 psbits &= ~(1UL << rr_ps);
280 num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps));
281 size = PSIZE(rr_ps);
282 vrr.ps = rr_ps;
283 while (num) {
284 cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag);
285 if (cur->etag == tag && cur->ps == rr_ps)
286 cur->etag = INVALID_TI_TAG;
287 curadr += size;
288 num--;
289 }
290 }
291}
292
293
294/*
295 * purge VHPT and machine TLB
296 */
297static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps)
298{
299 struct thash_data *cur;
300 u64 start, size, tag, num;
301 union ia64_rr rr;
302
303 start = va & ~((1UL << ps) - 1);
304 rr.val = ia64_get_rr(va);
305 size = PSIZE(rr.ps);
306 num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps));
307 while (num) {
308 cur = (struct thash_data *)ia64_thash(start);
309 tag = ia64_ttag(start);
310 if (cur->etag == tag)
311 cur->etag = INVALID_TI_TAG;
312 start += size;
313 num--;
314 }
315 machine_tlb_purge(va, ps);
316}
317
318/*
319 * Insert an entry into hash TLB or VHPT.
320 * NOTES:
321 * 1: When inserting VHPT to thash, "va" is a must covered
322 * address by the inserted machine VHPT entry.
323 * 2: The format of entry is always in TLB.
324 * 3: The caller need to make sure the new entry will not overlap
325 * with any existed entry.
326 */
327void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va)
328{
329 struct thash_data *head;
330 union ia64_rr vrr;
331 u64 tag;
332 struct thash_cb *hcb = &v->arch.vtlb;
333
334 vrr.val = vcpu_get_rr(v, va);
335 vrr.ps = itir_ps(itir);
336 VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps);
337 head = vsa_thash(hcb->pta, va, vrr.val, &tag);
338 head->page_flags = pte;
339 head->itir = itir;
340 head->etag = tag;
341}
342
343int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type)
344{
345 struct thash_data *trp;
346 int i;
347 u64 end, rid;
348
349 rid = vcpu_get_rr(vcpu, va);
350 rid = rid & RR_RID_MASK;
351 end = va + PSIZE(ps);
352 if (type == D_TLB) {
353 if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
354 for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
355 i < NDTRS; i++, trp++) {
356 if (__is_tr_overlap(trp, rid, va, end))
357 return i;
358 }
359 }
360 } else {
361 if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
362 for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
363 i < NITRS; i++, trp++) {
364 if (__is_tr_overlap(trp, rid, va, end))
365 return i;
366 }
367 }
368 }
369 return -1;
370}
371
372/*
373 * Purge entries in VTLB and VHPT
374 */
375void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps)
376{
377 if (vcpu_quick_region_check(v->arch.tc_regions, va))
378 vtlb_purge(v, va, ps);
379 vhpt_purge(v, va, ps);
380}
381
382void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps)
383{
384 u64 old_va = va;
385 va = REGION_OFFSET(va);
386 if (vcpu_quick_region_check(v->arch.tc_regions, old_va))
387 vtlb_purge(v, va, ps);
388 vhpt_purge(v, va, ps);
389}
390
391u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
392{
393 u64 ps, ps_mask, paddr, maddr;
394 union pte_flags phy_pte;
395
396 ps = itir_ps(itir);
397 ps_mask = ~((1UL << ps) - 1);
398 phy_pte.val = *pte;
399 paddr = *pte;
400 paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask);
401 maddr = kvm_lookup_mpa(paddr >> PAGE_SHIFT);
402 if (maddr & GPFN_IO_MASK) {
403 *pte |= VTLB_PTE_IO;
404 return -1;
405 }
406 maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) |
407 (paddr & ~PAGE_MASK);
408 phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT;
409 return phy_pte.val;
410}
411
412/*
413 * Purge overlap TCs and then insert the new entry to emulate itc ops.
414 * Notes: Only TC entry can purge and insert.
415 * 1 indicates this is MMIO
416 */
417int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
418 u64 ifa, int type)
419{
420 u64 ps;
421 u64 phy_pte;
422 union ia64_rr vrr, mrr;
423 int ret = 0;
424
425 ps = itir_ps(itir);
426 vrr.val = vcpu_get_rr(v, ifa);
427 mrr.val = ia64_get_rr(ifa);
428
429 phy_pte = translate_phy_pte(&pte, itir, ifa);
430
431 /* Ensure WB attribute if pte is related to a normal mem page,
432 * which is required by vga acceleration since qemu maps shared
433 * vram buffer with WB.
434 */
435 if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT)) {
436 pte &= ~_PAGE_MA_MASK;
437 phy_pte &= ~_PAGE_MA_MASK;
438 }
439
440 if (pte & VTLB_PTE_IO)
441 ret = 1;
442
443 vtlb_purge(v, ifa, ps);
444 vhpt_purge(v, ifa, ps);
445
446 if (ps == mrr.ps) {
447 if (!(pte&VTLB_PTE_IO)) {
448 vhpt_insert(phy_pte, itir, ifa, pte);
449 } else {
450 vtlb_insert(v, pte, itir, ifa);
451 vcpu_quick_region_set(VMX(v, tc_regions), ifa);
452 }
453 } else if (ps > mrr.ps) {
454 vtlb_insert(v, pte, itir, ifa);
455 vcpu_quick_region_set(VMX(v, tc_regions), ifa);
456 if (!(pte&VTLB_PTE_IO))
457 vhpt_insert(phy_pte, itir, ifa, pte);
458 } else {
459 u64 psr;
460 phy_pte &= ~PAGE_FLAGS_RV_MASK;
461 psr = ia64_clear_ic();
462 ia64_itc(type, ifa, phy_pte, ps);
463 ia64_set_psr(psr);
464 }
465 if (!(pte&VTLB_PTE_IO))
466 mark_pages_dirty(v, pte, ps);
467
468 return ret;
469}
470
471/*
472 * Purge all TCs or VHPT entries including those in Hash table.
473 *
474 */
475
476void thash_purge_all(struct kvm_vcpu *v)
477{
478 int i;
479 struct thash_data *head;
480 struct thash_cb *vtlb, *vhpt;
481 vtlb = &v->arch.vtlb;
482 vhpt = &v->arch.vhpt;
483
484 for (i = 0; i < 8; i++)
485 VMX(v, psbits[i]) = 0;
486
487 head = vtlb->hash;
488 for (i = 0; i < vtlb->num; i++) {
489 head->page_flags = 0;
490 head->etag = INVALID_TI_TAG;
491 head->itir = 0;
492 head->next = 0;
493 head++;
494 };
495
496 head = vhpt->hash;
497 for (i = 0; i < vhpt->num; i++) {
498 head->page_flags = 0;
499 head->etag = INVALID_TI_TAG;
500 head->itir = 0;
501 head->next = 0;
502 head++;
503 };
504
505 local_flush_tlb_all();
506}
507
508
509/*
510 * Lookup the hash table and its collision chain to find an entry
511 * covering this address rid:va or the entry.
512 *
513 * INPUT:
514 * in: TLB format for both VHPT & TLB.
515 */
516
517struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
518{
519 struct thash_data *cch;
520 u64 psbits, ps, tag;
521 union ia64_rr vrr;
522
523 struct thash_cb *hcb = &v->arch.vtlb;
524
525 cch = __vtr_lookup(v, va, is_data);;
526 if (cch)
527 return cch;
528
529 if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0)
530 return NULL;
531
532 psbits = VMX(v, psbits[(va >> 61)]);
533 vrr.val = vcpu_get_rr(v, va);
534 while (psbits) {
535 ps = __ffs(psbits);
536 psbits &= ~(1UL << ps);
537 vrr.ps = ps;
538 cch = vsa_thash(hcb->pta, va, vrr.val, &tag);
539 if (cch->etag == tag && cch->ps == ps)
540 return cch;
541 }
542
543 return NULL;
544}
545
546
547/*
548 * Initialize internal control data before service.
549 */
550void thash_init(struct thash_cb *hcb, u64 sz)
551{
552 int i;
553 struct thash_data *head;
554
555 hcb->pta.val = (unsigned long)hcb->hash;
556 hcb->pta.vf = 1;
557 hcb->pta.ve = 1;
558 hcb->pta.size = sz;
559 head = hcb->hash;
560 for (i = 0; i < hcb->num; i++) {
561 head->page_flags = 0;
562 head->itir = 0;
563 head->etag = INVALID_TI_TAG;
564 head->next = 0;
565 head++;
566 }
567}
568
569u64 kvm_lookup_mpa(u64 gpfn)
570{
571 u64 *base = (u64 *) KVM_P2M_BASE;
572 return *(base + gpfn);
573}
574
575u64 kvm_gpa_to_mpa(u64 gpa)
576{
577 u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT);
578 return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK);
579}
580
581
582/*
583 * Fetch guest bundle code.
584 * INPUT:
585 * gip: guest ip
586 * pbundle: used to return fetched bundle.
587 */
588int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle)
589{
590 u64 gpip = 0; /* guest physical IP*/
591 u64 *vpa;
592 struct thash_data *tlb;
593 u64 maddr;
594
595 if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) {
596 /* I-side physical mode */
597 gpip = gip;
598 } else {
599 tlb = vtlb_lookup(vcpu, gip, I_TLB);
600 if (tlb)
601 gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) |
602 (gip & (PSIZE(tlb->ps) - 1));
603 }
604 if (gpip) {
605 maddr = kvm_gpa_to_mpa(gpip);
606 } else {
607 tlb = vhpt_lookup(gip);
608 if (tlb == NULL) {
609 ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2);
610 return IA64_FAULT;
611 }
612 maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps)
613 | (gip & (PSIZE(tlb->ps) - 1));
614 }
615 vpa = (u64 *)__kvm_va(maddr);
616
617 pbundle->i64[0] = *vpa++;
618 pbundle->i64[1] = *vpa;
619
620 return IA64_NO_FAULT;
621}
622
623
624void kvm_init_vhpt(struct kvm_vcpu *v)
625{
626 v->arch.vhpt.num = VHPT_NUM_ENTRIES;
627 thash_init(&v->arch.vhpt, VHPT_SHIFT);
628 ia64_set_pta(v->arch.vhpt.pta.val);
629 /*Enable VHPT here?*/
630}
631
632void kvm_init_vtlb(struct kvm_vcpu *v)
633{
634 v->arch.vtlb.num = VTLB_NUM_ENTRIES;
635 thash_init(&v->arch.vtlb, VTLB_SHIFT);
636}
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 20f45a8b87e3..4e40c122bf26 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -803,3 +803,4 @@ config PPC_CLOCK
803config PPC_LIB_RHEAP 803config PPC_LIB_RHEAP
804 bool 804 bool
805 805
806source "arch/powerpc/kvm/Kconfig"
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index a86d8d853214..807a2dce6263 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -151,6 +151,9 @@ config BOOTX_TEXT
151 151
152config PPC_EARLY_DEBUG 152config PPC_EARLY_DEBUG
153 bool "Early debugging (dangerous)" 153 bool "Early debugging (dangerous)"
154 # PPC_EARLY_DEBUG on 440 leaves AS=1 mappings above the TLB high water
155 # mark, which doesn't work with current 440 KVM.
156 depends on !KVM
154 help 157 help
155 Say Y to enable some early debugging facilities that may be available 158 Say Y to enable some early debugging facilities that may be available
156 for your processor/board combination. Those facilities are hacks 159 for your processor/board combination. Those facilities are hacks
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index e2ec4a91ccef..9dcdc036cdf7 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -145,6 +145,7 @@ core-y += arch/powerpc/kernel/ \
145 arch/powerpc/platforms/ 145 arch/powerpc/platforms/
146core-$(CONFIG_MATH_EMULATION) += arch/powerpc/math-emu/ 146core-$(CONFIG_MATH_EMULATION) += arch/powerpc/math-emu/
147core-$(CONFIG_XMON) += arch/powerpc/xmon/ 147core-$(CONFIG_XMON) += arch/powerpc/xmon/
148core-$(CONFIG_KVM) += arch/powerpc/kvm/
148 149
149drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ 150drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/
150 151
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index adf1d09d726f..62134845af08 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -23,6 +23,9 @@
23#include <linux/mm.h> 23#include <linux/mm.h>
24#include <linux/suspend.h> 24#include <linux/suspend.h>
25#include <linux/hrtimer.h> 25#include <linux/hrtimer.h>
26#ifdef CONFIG_KVM
27#include <linux/kvm_host.h>
28#endif
26#ifdef CONFIG_PPC64 29#ifdef CONFIG_PPC64
27#include <linux/time.h> 30#include <linux/time.h>
28#include <linux/hardirq.h> 31#include <linux/hardirq.h>
@@ -324,5 +327,30 @@ int main(void)
324 327
325 DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); 328 DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE);
326 329
330#ifdef CONFIG_KVM
331 DEFINE(TLBE_BYTES, sizeof(struct tlbe));
332
333 DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
334 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
335 DEFINE(VCPU_HOST_TLB, offsetof(struct kvm_vcpu, arch.host_tlb));
336 DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb));
337 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
338 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
339 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
340 DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
341 DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
342 DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
343 DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr));
344 DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4));
345 DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));
346 DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));
347 DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7));
348 DEFINE(VCPU_PID, offsetof(struct kvm_vcpu, arch.pid));
349
350 DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
351 DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
352 DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
353#endif
354
327 return 0; 355 return 0;
328} 356}
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
new file mode 100644
index 000000000000..f5d7a5eab96e
--- /dev/null
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -0,0 +1,224 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2007
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#include <linux/types.h>
21#include <linux/string.h>
22#include <linux/kvm_host.h>
23#include <linux/highmem.h>
24#include <asm/mmu-44x.h>
25#include <asm/kvm_ppc.h>
26
27#include "44x_tlb.h"
28
29#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
30#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW)
31
32static unsigned int kvmppc_tlb_44x_pos;
33
34static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
35{
36 /* Mask off reserved bits. */
37 attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK;
38
39 if (!usermode) {
40 /* Guest is in supervisor mode, so we need to translate guest
41 * supervisor permissions into user permissions. */
42 attrib &= ~PPC44x_TLB_USER_PERM_MASK;
43 attrib |= (attrib & PPC44x_TLB_SUPER_PERM_MASK) << 3;
44 }
45
46 /* Make sure host can always access this memory. */
47 attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW;
48
49 return attrib;
50}
51
52/* Search the guest TLB for a matching entry. */
53int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
54 unsigned int as)
55{
56 int i;
57
58 /* XXX Replace loop with fancy data structures. */
59 for (i = 0; i < PPC44x_TLB_SIZE; i++) {
60 struct tlbe *tlbe = &vcpu->arch.guest_tlb[i];
61 unsigned int tid;
62
63 if (eaddr < get_tlb_eaddr(tlbe))
64 continue;
65
66 if (eaddr > get_tlb_end(tlbe))
67 continue;
68
69 tid = get_tlb_tid(tlbe);
70 if (tid && (tid != pid))
71 continue;
72
73 if (!get_tlb_v(tlbe))
74 continue;
75
76 if (get_tlb_ts(tlbe) != as)
77 continue;
78
79 return i;
80 }
81
82 return -1;
83}
84
85struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
86{
87 unsigned int as = !!(vcpu->arch.msr & MSR_IS);
88 unsigned int index;
89
90 index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
91 if (index == -1)
92 return NULL;
93 return &vcpu->arch.guest_tlb[index];
94}
95
96struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
97{
98 unsigned int as = !!(vcpu->arch.msr & MSR_DS);
99 unsigned int index;
100
101 index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
102 if (index == -1)
103 return NULL;
104 return &vcpu->arch.guest_tlb[index];
105}
106
107static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe)
108{
109 return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW);
110}
111
112/* Must be called with mmap_sem locked for writing. */
113static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,
114 unsigned int index)
115{
116 struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index];
117 struct page *page = vcpu->arch.shadow_pages[index];
118
119 kunmap(vcpu->arch.shadow_pages[index]);
120
121 if (get_tlb_v(stlbe)) {
122 if (kvmppc_44x_tlbe_is_writable(stlbe))
123 kvm_release_page_dirty(page);
124 else
125 kvm_release_page_clean(page);
126 }
127}
128
129/* Caller must ensure that the specified guest TLB entry is safe to insert into
130 * the shadow TLB. */
131void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
132 u32 flags)
133{
134 struct page *new_page;
135 struct tlbe *stlbe;
136 hpa_t hpaddr;
137 unsigned int victim;
138
139 /* Future optimization: don't overwrite the TLB entry containing the
140 * current PC (or stack?). */
141 victim = kvmppc_tlb_44x_pos++;
142 if (kvmppc_tlb_44x_pos > tlb_44x_hwater)
143 kvmppc_tlb_44x_pos = 0;
144 stlbe = &vcpu->arch.shadow_tlb[victim];
145
146 /* Get reference to new page. */
147 down_write(&current->mm->mmap_sem);
148 new_page = gfn_to_page(vcpu->kvm, gfn);
149 if (is_error_page(new_page)) {
150 printk(KERN_ERR "Couldn't get guest page!\n");
151 kvm_release_page_clean(new_page);
152 return;
153 }
154 hpaddr = page_to_phys(new_page);
155
156 /* Drop reference to old page. */
157 kvmppc_44x_shadow_release(vcpu, victim);
158 up_write(&current->mm->mmap_sem);
159
160 vcpu->arch.shadow_pages[victim] = new_page;
161
162 /* XXX Make sure (va, size) doesn't overlap any other
163 * entries. 440x6 user manual says the result would be
164 * "undefined." */
165
166 /* XXX what about AS? */
167
168 stlbe->tid = asid & 0xff;
169
170 /* Force TS=1 for all guest mappings. */
171 /* For now we hardcode 4KB mappings, but it will be important to
172 * use host large pages in the future. */
173 stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS
174 | PPC44x_TLB_4K;
175
176 stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
177 stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags,
178 vcpu->arch.msr & MSR_PR);
179}
180
181void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, u64 eaddr, u64 asid)
182{
183 unsigned int pid = asid & 0xff;
184 int i;
185
186 /* XXX Replace loop with fancy data structures. */
187 down_write(&current->mm->mmap_sem);
188 for (i = 0; i <= tlb_44x_hwater; i++) {
189 struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
190 unsigned int tid;
191
192 if (!get_tlb_v(stlbe))
193 continue;
194
195 if (eaddr < get_tlb_eaddr(stlbe))
196 continue;
197
198 if (eaddr > get_tlb_end(stlbe))
199 continue;
200
201 tid = get_tlb_tid(stlbe);
202 if (tid && (tid != pid))
203 continue;
204
205 kvmppc_44x_shadow_release(vcpu, i);
206 stlbe->word0 = 0;
207 }
208 up_write(&current->mm->mmap_sem);
209}
210
211/* Invalidate all mappings, so that when they fault back in they will get the
212 * proper permission bits. */
213void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
214{
215 int i;
216
217 /* XXX Replace loop with fancy data structures. */
218 down_write(&current->mm->mmap_sem);
219 for (i = 0; i <= tlb_44x_hwater; i++) {
220 kvmppc_44x_shadow_release(vcpu, i);
221 vcpu->arch.shadow_tlb[i].word0 = 0;
222 }
223 up_write(&current->mm->mmap_sem);
224}
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
new file mode 100644
index 000000000000..2ccd46b6f6b7
--- /dev/null
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -0,0 +1,91 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2007
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#ifndef __KVM_POWERPC_TLB_H__
21#define __KVM_POWERPC_TLB_H__
22
23#include <linux/kvm_host.h>
24#include <asm/mmu-44x.h>
25
26extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
27 unsigned int pid, unsigned int as);
28extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
29extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
30
31/* TLB helper functions */
32static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
33{
34 return (tlbe->word0 >> 4) & 0xf;
35}
36
37static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
38{
39 return tlbe->word0 & 0xfffffc00;
40}
41
42static inline gva_t get_tlb_bytes(const struct tlbe *tlbe)
43{
44 unsigned int pgsize = get_tlb_size(tlbe);
45 return 1 << 10 << (pgsize << 1);
46}
47
48static inline gva_t get_tlb_end(const struct tlbe *tlbe)
49{
50 return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1;
51}
52
53static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
54{
55 u64 word1 = tlbe->word1;
56 return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00);
57}
58
59static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
60{
61 return tlbe->tid & 0xff;
62}
63
64static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
65{
66 return (tlbe->word0 >> 8) & 0x1;
67}
68
69static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
70{
71 return (tlbe->word0 >> 9) & 0x1;
72}
73
74static inline unsigned int get_mmucr_stid(const struct kvm_vcpu *vcpu)
75{
76 return vcpu->arch.mmucr & 0xff;
77}
78
79static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
80{
81 return (vcpu->arch.mmucr >> 16) & 0x1;
82}
83
84static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr)
85{
86 unsigned int pgmask = get_tlb_bytes(tlbe) - 1;
87
88 return get_tlb_raddr(tlbe) | (eaddr & pgmask);
89}
90
91#endif /* __KVM_POWERPC_TLB_H__ */
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
new file mode 100644
index 000000000000..6b076010213b
--- /dev/null
+++ b/arch/powerpc/kvm/Kconfig
@@ -0,0 +1,42 @@
1#
2# KVM configuration
3#
4
5menuconfig VIRTUALIZATION
6 bool "Virtualization"
7 ---help---
8 Say Y here to get to see options for using your Linux host to run
9 other operating systems inside virtual machines (guests).
10 This option alone does not add any kernel code.
11
12 If you say N, all options in this submenu will be skipped and
13 disabled.
14
15if VIRTUALIZATION
16
17config KVM
18 bool "Kernel-based Virtual Machine (KVM) support"
19 depends on 44x && EXPERIMENTAL
20 select PREEMPT_NOTIFIERS
21 select ANON_INODES
22 # We can only run on Book E hosts so far
23 select KVM_BOOKE_HOST
24 ---help---
25 Support hosting virtualized guest machines. You will also
26 need to select one or more of the processor modules below.
27
28 This module provides access to the hardware capabilities through
29 a character device node named /dev/kvm.
30
31 If unsure, say N.
32
33config KVM_BOOKE_HOST
34 bool "KVM host support for Book E PowerPC processors"
35 depends on KVM && 44x
36 ---help---
37 Provides host support for KVM on Book E PowerPC processors. Currently
38 this works on 440 processors only.
39
40source drivers/virtio/Kconfig
41
42endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
new file mode 100644
index 000000000000..d0d358d367ec
--- /dev/null
+++ b/arch/powerpc/kvm/Makefile
@@ -0,0 +1,15 @@
1#
2# Makefile for Kernel-based Virtual Machine module
3#
4
5EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm
6
7common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o)
8
9kvm-objs := $(common-objs) powerpc.o emulate.o booke_guest.o
10obj-$(CONFIG_KVM) += kvm.o
11
12AFLAGS_booke_interrupts.o := -I$(obj)
13
14kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o
15obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o
diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c
new file mode 100644
index 000000000000..6d9884a6884a
--- /dev/null
+++ b/arch/powerpc/kvm/booke_guest.c
@@ -0,0 +1,615 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2007
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
19 */
20
21#include <linux/errno.h>
22#include <linux/err.h>
23#include <linux/kvm_host.h>
24#include <linux/module.h>
25#include <linux/vmalloc.h>
26#include <linux/fs.h>
27#include <asm/cputable.h>
28#include <asm/uaccess.h>
29#include <asm/kvm_ppc.h>
30
31#include "44x_tlb.h"
32
33#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
34#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
35
36struct kvm_stats_debugfs_item debugfs_entries[] = {
37 { "exits", VCPU_STAT(sum_exits) },
38 { "mmio", VCPU_STAT(mmio_exits) },
39 { "dcr", VCPU_STAT(dcr_exits) },
40 { "sig", VCPU_STAT(signal_exits) },
41 { "light", VCPU_STAT(light_exits) },
42 { "itlb_r", VCPU_STAT(itlb_real_miss_exits) },
43 { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) },
44 { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) },
45 { "dtlb_v", VCPU_STAT(dtlb_virt_miss_exits) },
46 { "sysc", VCPU_STAT(syscall_exits) },
47 { "isi", VCPU_STAT(isi_exits) },
48 { "dsi", VCPU_STAT(dsi_exits) },
49 { "inst_emu", VCPU_STAT(emulated_inst_exits) },
50 { "dec", VCPU_STAT(dec_exits) },
51 { "ext_intr", VCPU_STAT(ext_intr_exits) },
52 { NULL }
53};
54
55static const u32 interrupt_msr_mask[16] = {
56 [BOOKE_INTERRUPT_CRITICAL] = MSR_ME,
57 [BOOKE_INTERRUPT_MACHINE_CHECK] = 0,
58 [BOOKE_INTERRUPT_DATA_STORAGE] = MSR_CE|MSR_ME|MSR_DE,
59 [BOOKE_INTERRUPT_INST_STORAGE] = MSR_CE|MSR_ME|MSR_DE,
60 [BOOKE_INTERRUPT_EXTERNAL] = MSR_CE|MSR_ME|MSR_DE,
61 [BOOKE_INTERRUPT_ALIGNMENT] = MSR_CE|MSR_ME|MSR_DE,
62 [BOOKE_INTERRUPT_PROGRAM] = MSR_CE|MSR_ME|MSR_DE,
63 [BOOKE_INTERRUPT_FP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE,
64 [BOOKE_INTERRUPT_SYSCALL] = MSR_CE|MSR_ME|MSR_DE,
65 [BOOKE_INTERRUPT_AP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE,
66 [BOOKE_INTERRUPT_DECREMENTER] = MSR_CE|MSR_ME|MSR_DE,
67 [BOOKE_INTERRUPT_FIT] = MSR_CE|MSR_ME|MSR_DE,
68 [BOOKE_INTERRUPT_WATCHDOG] = MSR_ME,
69 [BOOKE_INTERRUPT_DTLB_MISS] = MSR_CE|MSR_ME|MSR_DE,
70 [BOOKE_INTERRUPT_ITLB_MISS] = MSR_CE|MSR_ME|MSR_DE,
71 [BOOKE_INTERRUPT_DEBUG] = MSR_ME,
72};
73
74const unsigned char exception_priority[] = {
75 [BOOKE_INTERRUPT_DATA_STORAGE] = 0,
76 [BOOKE_INTERRUPT_INST_STORAGE] = 1,
77 [BOOKE_INTERRUPT_ALIGNMENT] = 2,
78 [BOOKE_INTERRUPT_PROGRAM] = 3,
79 [BOOKE_INTERRUPT_FP_UNAVAIL] = 4,
80 [BOOKE_INTERRUPT_SYSCALL] = 5,
81 [BOOKE_INTERRUPT_AP_UNAVAIL] = 6,
82 [BOOKE_INTERRUPT_DTLB_MISS] = 7,
83 [BOOKE_INTERRUPT_ITLB_MISS] = 8,
84 [BOOKE_INTERRUPT_MACHINE_CHECK] = 9,
85 [BOOKE_INTERRUPT_DEBUG] = 10,
86 [BOOKE_INTERRUPT_CRITICAL] = 11,
87 [BOOKE_INTERRUPT_WATCHDOG] = 12,
88 [BOOKE_INTERRUPT_EXTERNAL] = 13,
89 [BOOKE_INTERRUPT_FIT] = 14,
90 [BOOKE_INTERRUPT_DECREMENTER] = 15,
91};
92
93const unsigned char priority_exception[] = {
94 BOOKE_INTERRUPT_DATA_STORAGE,
95 BOOKE_INTERRUPT_INST_STORAGE,
96 BOOKE_INTERRUPT_ALIGNMENT,
97 BOOKE_INTERRUPT_PROGRAM,
98 BOOKE_INTERRUPT_FP_UNAVAIL,
99 BOOKE_INTERRUPT_SYSCALL,
100 BOOKE_INTERRUPT_AP_UNAVAIL,
101 BOOKE_INTERRUPT_DTLB_MISS,
102 BOOKE_INTERRUPT_ITLB_MISS,
103 BOOKE_INTERRUPT_MACHINE_CHECK,
104 BOOKE_INTERRUPT_DEBUG,
105 BOOKE_INTERRUPT_CRITICAL,
106 BOOKE_INTERRUPT_WATCHDOG,
107 BOOKE_INTERRUPT_EXTERNAL,
108 BOOKE_INTERRUPT_FIT,
109 BOOKE_INTERRUPT_DECREMENTER,
110};
111
112
113void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
114{
115 struct tlbe *tlbe;
116 int i;
117
118 printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
119 printk("| %2s | %3s | %8s | %8s | %8s |\n",
120 "nr", "tid", "word0", "word1", "word2");
121
122 for (i = 0; i < PPC44x_TLB_SIZE; i++) {
123 tlbe = &vcpu->arch.guest_tlb[i];
124 if (tlbe->word0 & PPC44x_TLB_VALID)
125 printk(" G%2d | %02X | %08X | %08X | %08X |\n",
126 i, tlbe->tid, tlbe->word0, tlbe->word1,
127 tlbe->word2);
128 }
129
130 for (i = 0; i < PPC44x_TLB_SIZE; i++) {
131 tlbe = &vcpu->arch.shadow_tlb[i];
132 if (tlbe->word0 & PPC44x_TLB_VALID)
133 printk(" S%2d | %02X | %08X | %08X | %08X |\n",
134 i, tlbe->tid, tlbe->word0, tlbe->word1,
135 tlbe->word2);
136 }
137}
138
139/* TODO: use vcpu_printf() */
140void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
141{
142 int i;
143
144 printk("pc: %08x msr: %08x\n", vcpu->arch.pc, vcpu->arch.msr);
145 printk("lr: %08x ctr: %08x\n", vcpu->arch.lr, vcpu->arch.ctr);
146 printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1);
147
148 printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
149
150 for (i = 0; i < 32; i += 4) {
151 printk("gpr%02d: %08x %08x %08x %08x\n", i,
152 vcpu->arch.gpr[i],
153 vcpu->arch.gpr[i+1],
154 vcpu->arch.gpr[i+2],
155 vcpu->arch.gpr[i+3]);
156 }
157}
158
159/* Check if we are ready to deliver the interrupt */
160static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
161{
162 int r;
163
164 switch (interrupt) {
165 case BOOKE_INTERRUPT_CRITICAL:
166 r = vcpu->arch.msr & MSR_CE;
167 break;
168 case BOOKE_INTERRUPT_MACHINE_CHECK:
169 r = vcpu->arch.msr & MSR_ME;
170 break;
171 case BOOKE_INTERRUPT_EXTERNAL:
172 r = vcpu->arch.msr & MSR_EE;
173 break;
174 case BOOKE_INTERRUPT_DECREMENTER:
175 r = vcpu->arch.msr & MSR_EE;
176 break;
177 case BOOKE_INTERRUPT_FIT:
178 r = vcpu->arch.msr & MSR_EE;
179 break;
180 case BOOKE_INTERRUPT_WATCHDOG:
181 r = vcpu->arch.msr & MSR_CE;
182 break;
183 case BOOKE_INTERRUPT_DEBUG:
184 r = vcpu->arch.msr & MSR_DE;
185 break;
186 default:
187 r = 1;
188 }
189
190 return r;
191}
192
193static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
194{
195 switch (interrupt) {
196 case BOOKE_INTERRUPT_DECREMENTER:
197 vcpu->arch.tsr |= TSR_DIS;
198 break;
199 }
200
201 vcpu->arch.srr0 = vcpu->arch.pc;
202 vcpu->arch.srr1 = vcpu->arch.msr;
203 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt];
204 kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]);
205}
206
207/* Check pending exceptions and deliver one, if possible. */
208void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu)
209{
210 unsigned long *pending = &vcpu->arch.pending_exceptions;
211 unsigned int exception;
212 unsigned int priority;
213
214 priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending));
215 while (priority <= BOOKE_MAX_INTERRUPT) {
216 exception = priority_exception[priority];
217 if (kvmppc_can_deliver_interrupt(vcpu, exception)) {
218 kvmppc_clear_exception(vcpu, exception);
219 kvmppc_deliver_interrupt(vcpu, exception);
220 break;
221 }
222
223 priority = find_next_bit(pending,
224 BITS_PER_BYTE * sizeof(*pending),
225 priority + 1);
226 }
227}
228
229static int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
230{
231 enum emulation_result er;
232 int r;
233
234 er = kvmppc_emulate_instruction(run, vcpu);
235 switch (er) {
236 case EMULATE_DONE:
237 /* Future optimization: only reload non-volatiles if they were
238 * actually modified. */
239 r = RESUME_GUEST_NV;
240 break;
241 case EMULATE_DO_MMIO:
242 run->exit_reason = KVM_EXIT_MMIO;
243 /* We must reload nonvolatiles because "update" load/store
244 * instructions modify register state. */
245 /* Future optimization: only reload non-volatiles if they were
246 * actually modified. */
247 r = RESUME_HOST_NV;
248 break;
249 case EMULATE_FAIL:
250 /* XXX Deliver Program interrupt to guest. */
251 printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__,
252 vcpu->arch.last_inst);
253 r = RESUME_HOST;
254 break;
255 default:
256 BUG();
257 }
258
259 return r;
260}
261
262/**
263 * kvmppc_handle_exit
264 *
265 * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
266 */
267int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
268 unsigned int exit_nr)
269{
270 enum emulation_result er;
271 int r = RESUME_HOST;
272
273 local_irq_enable();
274
275 run->exit_reason = KVM_EXIT_UNKNOWN;
276 run->ready_for_interrupt_injection = 1;
277
278 switch (exit_nr) {
279 case BOOKE_INTERRUPT_MACHINE_CHECK:
280 printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
281 kvmppc_dump_vcpu(vcpu);
282 r = RESUME_HOST;
283 break;
284
285 case BOOKE_INTERRUPT_EXTERNAL:
286 case BOOKE_INTERRUPT_DECREMENTER:
287 /* Since we switched IVPR back to the host's value, the host
288 * handled this interrupt the moment we enabled interrupts.
289 * Now we just offer it a chance to reschedule the guest. */
290
291 /* XXX At this point the TLB still holds our shadow TLB, so if
292 * we do reschedule the host will fault over it. Perhaps we
293 * should politely restore the host's entries to minimize
294 * misses before ceding control. */
295 if (need_resched())
296 cond_resched();
297 if (exit_nr == BOOKE_INTERRUPT_DECREMENTER)
298 vcpu->stat.dec_exits++;
299 else
300 vcpu->stat.ext_intr_exits++;
301 r = RESUME_GUEST;
302 break;
303
304 case BOOKE_INTERRUPT_PROGRAM:
305 if (vcpu->arch.msr & MSR_PR) {
306 /* Program traps generated by user-level software must be handled
307 * by the guest kernel. */
308 vcpu->arch.esr = vcpu->arch.fault_esr;
309 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
310 r = RESUME_GUEST;
311 break;
312 }
313
314 er = kvmppc_emulate_instruction(run, vcpu);
315 switch (er) {
316 case EMULATE_DONE:
317 /* Future optimization: only reload non-volatiles if
318 * they were actually modified by emulation. */
319 vcpu->stat.emulated_inst_exits++;
320 r = RESUME_GUEST_NV;
321 break;
322 case EMULATE_DO_DCR:
323 run->exit_reason = KVM_EXIT_DCR;
324 r = RESUME_HOST;
325 break;
326 case EMULATE_FAIL:
327 /* XXX Deliver Program interrupt to guest. */
328 printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n",
329 __func__, vcpu->arch.pc, vcpu->arch.last_inst);
330 /* For debugging, encode the failing instruction and
331 * report it to userspace. */
332 run->hw.hardware_exit_reason = ~0ULL << 32;
333 run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
334 r = RESUME_HOST;
335 break;
336 default:
337 BUG();
338 }
339 break;
340
341 case BOOKE_INTERRUPT_DATA_STORAGE:
342 vcpu->arch.dear = vcpu->arch.fault_dear;
343 vcpu->arch.esr = vcpu->arch.fault_esr;
344 kvmppc_queue_exception(vcpu, exit_nr);
345 vcpu->stat.dsi_exits++;
346 r = RESUME_GUEST;
347 break;
348
349 case BOOKE_INTERRUPT_INST_STORAGE:
350 vcpu->arch.esr = vcpu->arch.fault_esr;
351 kvmppc_queue_exception(vcpu, exit_nr);
352 vcpu->stat.isi_exits++;
353 r = RESUME_GUEST;
354 break;
355
356 case BOOKE_INTERRUPT_SYSCALL:
357 kvmppc_queue_exception(vcpu, exit_nr);
358 vcpu->stat.syscall_exits++;
359 r = RESUME_GUEST;
360 break;
361
362 case BOOKE_INTERRUPT_DTLB_MISS: {
363 struct tlbe *gtlbe;
364 unsigned long eaddr = vcpu->arch.fault_dear;
365 gfn_t gfn;
366
367 /* Check the guest TLB. */
368 gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr);
369 if (!gtlbe) {
370 /* The guest didn't have a mapping for it. */
371 kvmppc_queue_exception(vcpu, exit_nr);
372 vcpu->arch.dear = vcpu->arch.fault_dear;
373 vcpu->arch.esr = vcpu->arch.fault_esr;
374 vcpu->stat.dtlb_real_miss_exits++;
375 r = RESUME_GUEST;
376 break;
377 }
378
379 vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
380 gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
381
382 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
383 /* The guest TLB had a mapping, but the shadow TLB
384 * didn't, and it is RAM. This could be because:
385 * a) the entry is mapping the host kernel, or
386 * b) the guest used a large mapping which we're faking
387 * Either way, we need to satisfy the fault without
388 * invoking the guest. */
389 kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
390 gtlbe->word2);
391 vcpu->stat.dtlb_virt_miss_exits++;
392 r = RESUME_GUEST;
393 } else {
394 /* Guest has mapped and accessed a page which is not
395 * actually RAM. */
396 r = kvmppc_emulate_mmio(run, vcpu);
397 }
398
399 break;
400 }
401
402 case BOOKE_INTERRUPT_ITLB_MISS: {
403 struct tlbe *gtlbe;
404 unsigned long eaddr = vcpu->arch.pc;
405 gfn_t gfn;
406
407 r = RESUME_GUEST;
408
409 /* Check the guest TLB. */
410 gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr);
411 if (!gtlbe) {
412 /* The guest didn't have a mapping for it. */
413 kvmppc_queue_exception(vcpu, exit_nr);
414 vcpu->stat.itlb_real_miss_exits++;
415 break;
416 }
417
418 vcpu->stat.itlb_virt_miss_exits++;
419
420 gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT;
421
422 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
423 /* The guest TLB had a mapping, but the shadow TLB
424 * didn't. This could be because:
425 * a) the entry is mapping the host kernel, or
426 * b) the guest used a large mapping which we're faking
427 * Either way, we need to satisfy the fault without
428 * invoking the guest. */
429 kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
430 gtlbe->word2);
431 } else {
432 /* Guest mapped and leaped at non-RAM! */
433 kvmppc_queue_exception(vcpu,
434 BOOKE_INTERRUPT_MACHINE_CHECK);
435 }
436
437 break;
438 }
439
440 default:
441 printk(KERN_EMERG "exit_nr %d\n", exit_nr);
442 BUG();
443 }
444
445 local_irq_disable();
446
447 kvmppc_check_and_deliver_interrupts(vcpu);
448
449 /* Do some exit accounting. */
450 vcpu->stat.sum_exits++;
451 if (!(r & RESUME_HOST)) {
452 /* To avoid clobbering exit_reason, only check for signals if
453 * we aren't already exiting to userspace for some other
454 * reason. */
455 if (signal_pending(current)) {
456 run->exit_reason = KVM_EXIT_INTR;
457 r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
458
459 vcpu->stat.signal_exits++;
460 } else {
461 vcpu->stat.light_exits++;
462 }
463 } else {
464 switch (run->exit_reason) {
465 case KVM_EXIT_MMIO:
466 vcpu->stat.mmio_exits++;
467 break;
468 case KVM_EXIT_DCR:
469 vcpu->stat.dcr_exits++;
470 break;
471 case KVM_EXIT_INTR:
472 vcpu->stat.signal_exits++;
473 break;
474 }
475 }
476
477 return r;
478}
479
480/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
481int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
482{
483 struct tlbe *tlbe = &vcpu->arch.guest_tlb[0];
484
485 tlbe->tid = 0;
486 tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
487 tlbe->word1 = 0;
488 tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
489
490 tlbe++;
491 tlbe->tid = 0;
492 tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
493 tlbe->word1 = 0xef600000;
494 tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
495 | PPC44x_TLB_I | PPC44x_TLB_G;
496
497 vcpu->arch.pc = 0;
498 vcpu->arch.msr = 0;
499 vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
500
501 /* Eye-catching number so we know if the guest takes an interrupt
502 * before it's programmed its own IVPR. */
503 vcpu->arch.ivpr = 0x55550000;
504
505 /* Since the guest can directly access the timebase, it must know the
506 * real timebase frequency. Accordingly, it must see the state of
507 * CCR1[TCS]. */
508 vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
509
510 return 0;
511}
512
513int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
514{
515 int i;
516
517 regs->pc = vcpu->arch.pc;
518 regs->cr = vcpu->arch.cr;
519 regs->ctr = vcpu->arch.ctr;
520 regs->lr = vcpu->arch.lr;
521 regs->xer = vcpu->arch.xer;
522 regs->msr = vcpu->arch.msr;
523 regs->srr0 = vcpu->arch.srr0;
524 regs->srr1 = vcpu->arch.srr1;
525 regs->pid = vcpu->arch.pid;
526 regs->sprg0 = vcpu->arch.sprg0;
527 regs->sprg1 = vcpu->arch.sprg1;
528 regs->sprg2 = vcpu->arch.sprg2;
529 regs->sprg3 = vcpu->arch.sprg3;
530 regs->sprg5 = vcpu->arch.sprg4;
531 regs->sprg6 = vcpu->arch.sprg5;
532 regs->sprg7 = vcpu->arch.sprg6;
533
534 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
535 regs->gpr[i] = vcpu->arch.gpr[i];
536
537 return 0;
538}
539
540int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
541{
542 int i;
543
544 vcpu->arch.pc = regs->pc;
545 vcpu->arch.cr = regs->cr;
546 vcpu->arch.ctr = regs->ctr;
547 vcpu->arch.lr = regs->lr;
548 vcpu->arch.xer = regs->xer;
549 vcpu->arch.msr = regs->msr;
550 vcpu->arch.srr0 = regs->srr0;
551 vcpu->arch.srr1 = regs->srr1;
552 vcpu->arch.sprg0 = regs->sprg0;
553 vcpu->arch.sprg1 = regs->sprg1;
554 vcpu->arch.sprg2 = regs->sprg2;
555 vcpu->arch.sprg3 = regs->sprg3;
556 vcpu->arch.sprg5 = regs->sprg4;
557 vcpu->arch.sprg6 = regs->sprg5;
558 vcpu->arch.sprg7 = regs->sprg6;
559
560 for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
561 vcpu->arch.gpr[i] = regs->gpr[i];
562
563 return 0;
564}
565
566int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
567 struct kvm_sregs *sregs)
568{
569 return -ENOTSUPP;
570}
571
572int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
573 struct kvm_sregs *sregs)
574{
575 return -ENOTSUPP;
576}
577
578int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
579{
580 return -ENOTSUPP;
581}
582
583int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
584{
585 return -ENOTSUPP;
586}
587
588/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
589int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
590 struct kvm_translation *tr)
591{
592 struct tlbe *gtlbe;
593 int index;
594 gva_t eaddr;
595 u8 pid;
596 u8 as;
597
598 eaddr = tr->linear_address;
599 pid = (tr->linear_address >> 32) & 0xff;
600 as = (tr->linear_address >> 40) & 0x1;
601
602 index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
603 if (index == -1) {
604 tr->valid = 0;
605 return 0;
606 }
607
608 gtlbe = &vcpu->arch.guest_tlb[index];
609
610 tr->physical_address = tlb_xlate(gtlbe, eaddr);
611 /* XXX what does "writeable" and "usermode" even mean? */
612 tr->valid = 1;
613
614 return 0;
615}
diff --git a/arch/powerpc/kvm/booke_host.c b/arch/powerpc/kvm/booke_host.c
new file mode 100644
index 000000000000..b480341bc31e
--- /dev/null
+++ b/arch/powerpc/kvm/booke_host.c
@@ -0,0 +1,83 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#include <linux/errno.h>
21#include <linux/kvm_host.h>
22#include <linux/module.h>
23#include <asm/cacheflush.h>
24#include <asm/kvm_ppc.h>
25
26unsigned long kvmppc_booke_handlers;
27
28static int kvmppc_booke_init(void)
29{
30 unsigned long ivor[16];
31 unsigned long max_ivor = 0;
32 int i;
33
34 /* We install our own exception handlers by hijacking IVPR. IVPR must
35 * be 16-bit aligned, so we need a 64KB allocation. */
36 kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
37 VCPU_SIZE_ORDER);
38 if (!kvmppc_booke_handlers)
39 return -ENOMEM;
40
41 /* XXX make sure our handlers are smaller than Linux's */
42
43 /* Copy our interrupt handlers to match host IVORs. That way we don't
44 * have to swap the IVORs on every guest/host transition. */
45 ivor[0] = mfspr(SPRN_IVOR0);
46 ivor[1] = mfspr(SPRN_IVOR1);
47 ivor[2] = mfspr(SPRN_IVOR2);
48 ivor[3] = mfspr(SPRN_IVOR3);
49 ivor[4] = mfspr(SPRN_IVOR4);
50 ivor[5] = mfspr(SPRN_IVOR5);
51 ivor[6] = mfspr(SPRN_IVOR6);
52 ivor[7] = mfspr(SPRN_IVOR7);
53 ivor[8] = mfspr(SPRN_IVOR8);
54 ivor[9] = mfspr(SPRN_IVOR9);
55 ivor[10] = mfspr(SPRN_IVOR10);
56 ivor[11] = mfspr(SPRN_IVOR11);
57 ivor[12] = mfspr(SPRN_IVOR12);
58 ivor[13] = mfspr(SPRN_IVOR13);
59 ivor[14] = mfspr(SPRN_IVOR14);
60 ivor[15] = mfspr(SPRN_IVOR15);
61
62 for (i = 0; i < 16; i++) {
63 if (ivor[i] > max_ivor)
64 max_ivor = ivor[i];
65
66 memcpy((void *)kvmppc_booke_handlers + ivor[i],
67 kvmppc_handlers_start + i * kvmppc_handler_len,
68 kvmppc_handler_len);
69 }
70 flush_icache_range(kvmppc_booke_handlers,
71 kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
72
73 return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
74}
75
76static void __exit kvmppc_booke_exit(void)
77{
78 free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
79 kvm_exit();
80}
81
82module_init(kvmppc_booke_init)
83module_exit(kvmppc_booke_exit)
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
new file mode 100644
index 000000000000..3b653b5309b8
--- /dev/null
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -0,0 +1,436 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2007
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#include <asm/ppc_asm.h>
21#include <asm/kvm_asm.h>
22#include <asm/reg.h>
23#include <asm/mmu-44x.h>
24#include <asm/page.h>
25#include <asm/asm-offsets.h>
26
27#define KVMPPC_MSR_MASK (MSR_CE|MSR_EE|MSR_PR|MSR_DE|MSR_ME|MSR_IS|MSR_DS)
28
29#define VCPU_GPR(n) (VCPU_GPRS + (n * 4))
30
31/* The host stack layout: */
32#define HOST_R1 0 /* Implied by stwu. */
33#define HOST_CALLEE_LR 4
34#define HOST_RUN 8
35/* r2 is special: it holds 'current', and it made nonvolatile in the
36 * kernel with the -ffixed-r2 gcc option. */
37#define HOST_R2 12
38#define HOST_NV_GPRS 16
39#define HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * 4))
40#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + 4)
41#define HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */
42#define HOST_STACK_LR (HOST_STACK_SIZE + 4) /* In caller stack frame. */
43
44#define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \
45 (1<<BOOKE_INTERRUPT_DTLB_MISS))
46
47#define NEED_DEAR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \
48 (1<<BOOKE_INTERRUPT_DTLB_MISS))
49
50#define NEED_ESR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \
51 (1<<BOOKE_INTERRUPT_INST_STORAGE) | \
52 (1<<BOOKE_INTERRUPT_PROGRAM) | \
53 (1<<BOOKE_INTERRUPT_DTLB_MISS))
54
55.macro KVM_HANDLER ivor_nr
56_GLOBAL(kvmppc_handler_\ivor_nr)
57 /* Get pointer to vcpu and record exit number. */
58 mtspr SPRN_SPRG0, r4
59 mfspr r4, SPRN_SPRG1
60 stw r5, VCPU_GPR(r5)(r4)
61 stw r6, VCPU_GPR(r6)(r4)
62 mfctr r5
63 lis r6, kvmppc_resume_host@h
64 stw r5, VCPU_CTR(r4)
65 li r5, \ivor_nr
66 ori r6, r6, kvmppc_resume_host@l
67 mtctr r6
68 bctr
69.endm
70
71_GLOBAL(kvmppc_handlers_start)
72KVM_HANDLER BOOKE_INTERRUPT_CRITICAL
73KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK
74KVM_HANDLER BOOKE_INTERRUPT_DATA_STORAGE
75KVM_HANDLER BOOKE_INTERRUPT_INST_STORAGE
76KVM_HANDLER BOOKE_INTERRUPT_EXTERNAL
77KVM_HANDLER BOOKE_INTERRUPT_ALIGNMENT
78KVM_HANDLER BOOKE_INTERRUPT_PROGRAM
79KVM_HANDLER BOOKE_INTERRUPT_FP_UNAVAIL
80KVM_HANDLER BOOKE_INTERRUPT_SYSCALL
81KVM_HANDLER BOOKE_INTERRUPT_AP_UNAVAIL
82KVM_HANDLER BOOKE_INTERRUPT_DECREMENTER
83KVM_HANDLER BOOKE_INTERRUPT_FIT
84KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG
85KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS
86KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS
87KVM_HANDLER BOOKE_INTERRUPT_DEBUG
88
89_GLOBAL(kvmppc_handler_len)
90 .long kvmppc_handler_1 - kvmppc_handler_0
91
92
93/* Registers:
94 * SPRG0: guest r4
95 * r4: vcpu pointer
96 * r5: KVM exit number
97 */
98_GLOBAL(kvmppc_resume_host)
99 stw r3, VCPU_GPR(r3)(r4)
100 mfcr r3
101 stw r3, VCPU_CR(r4)
102 stw r7, VCPU_GPR(r7)(r4)
103 stw r8, VCPU_GPR(r8)(r4)
104 stw r9, VCPU_GPR(r9)(r4)
105
106 li r6, 1
107 slw r6, r6, r5
108
109 /* Save the faulting instruction and all GPRs for emulation. */
110 andi. r7, r6, NEED_INST_MASK
111 beq ..skip_inst_copy
112 mfspr r9, SPRN_SRR0
113 mfmsr r8
114 ori r7, r8, MSR_DS
115 mtmsr r7
116 isync
117 lwz r9, 0(r9)
118 mtmsr r8
119 isync
120 stw r9, VCPU_LAST_INST(r4)
121
122 stw r15, VCPU_GPR(r15)(r4)
123 stw r16, VCPU_GPR(r16)(r4)
124 stw r17, VCPU_GPR(r17)(r4)
125 stw r18, VCPU_GPR(r18)(r4)
126 stw r19, VCPU_GPR(r19)(r4)
127 stw r20, VCPU_GPR(r20)(r4)
128 stw r21, VCPU_GPR(r21)(r4)
129 stw r22, VCPU_GPR(r22)(r4)
130 stw r23, VCPU_GPR(r23)(r4)
131 stw r24, VCPU_GPR(r24)(r4)
132 stw r25, VCPU_GPR(r25)(r4)
133 stw r26, VCPU_GPR(r26)(r4)
134 stw r27, VCPU_GPR(r27)(r4)
135 stw r28, VCPU_GPR(r28)(r4)
136 stw r29, VCPU_GPR(r29)(r4)
137 stw r30, VCPU_GPR(r30)(r4)
138 stw r31, VCPU_GPR(r31)(r4)
139..skip_inst_copy:
140
141 /* Also grab DEAR and ESR before the host can clobber them. */
142
143 andi. r7, r6, NEED_DEAR_MASK
144 beq ..skip_dear
145 mfspr r9, SPRN_DEAR
146 stw r9, VCPU_FAULT_DEAR(r4)
147..skip_dear:
148
149 andi. r7, r6, NEED_ESR_MASK
150 beq ..skip_esr
151 mfspr r9, SPRN_ESR
152 stw r9, VCPU_FAULT_ESR(r4)
153..skip_esr:
154
155 /* Save remaining volatile guest register state to vcpu. */
156 stw r0, VCPU_GPR(r0)(r4)
157 stw r1, VCPU_GPR(r1)(r4)
158 stw r2, VCPU_GPR(r2)(r4)
159 stw r10, VCPU_GPR(r10)(r4)
160 stw r11, VCPU_GPR(r11)(r4)
161 stw r12, VCPU_GPR(r12)(r4)
162 stw r13, VCPU_GPR(r13)(r4)
163 stw r14, VCPU_GPR(r14)(r4) /* We need a NV GPR below. */
164 mflr r3
165 stw r3, VCPU_LR(r4)
166 mfxer r3
167 stw r3, VCPU_XER(r4)
168 mfspr r3, SPRN_SPRG0
169 stw r3, VCPU_GPR(r4)(r4)
170 mfspr r3, SPRN_SRR0
171 stw r3, VCPU_PC(r4)
172
173 /* Restore host stack pointer and PID before IVPR, since the host
174 * exception handlers use them. */
175 lwz r1, VCPU_HOST_STACK(r4)
176 lwz r3, VCPU_HOST_PID(r4)
177 mtspr SPRN_PID, r3
178
179 /* Restore host IVPR before re-enabling interrupts. We cheat and know
180 * that Linux IVPR is always 0xc0000000. */
181 lis r3, 0xc000
182 mtspr SPRN_IVPR, r3
183
184 /* Switch to kernel stack and jump to handler. */
185 LOAD_REG_ADDR(r3, kvmppc_handle_exit)
186 mtctr r3
187 lwz r3, HOST_RUN(r1)
188 lwz r2, HOST_R2(r1)
189 mr r14, r4 /* Save vcpu pointer. */
190
191 bctrl /* kvmppc_handle_exit() */
192
193 /* Restore vcpu pointer and the nonvolatiles we used. */
194 mr r4, r14
195 lwz r14, VCPU_GPR(r14)(r4)
196
197 /* Sometimes instruction emulation must restore complete GPR state. */
198 andi. r5, r3, RESUME_FLAG_NV
199 beq ..skip_nv_load
200 lwz r15, VCPU_GPR(r15)(r4)
201 lwz r16, VCPU_GPR(r16)(r4)
202 lwz r17, VCPU_GPR(r17)(r4)
203 lwz r18, VCPU_GPR(r18)(r4)
204 lwz r19, VCPU_GPR(r19)(r4)
205 lwz r20, VCPU_GPR(r20)(r4)
206 lwz r21, VCPU_GPR(r21)(r4)
207 lwz r22, VCPU_GPR(r22)(r4)
208 lwz r23, VCPU_GPR(r23)(r4)
209 lwz r24, VCPU_GPR(r24)(r4)
210 lwz r25, VCPU_GPR(r25)(r4)
211 lwz r26, VCPU_GPR(r26)(r4)
212 lwz r27, VCPU_GPR(r27)(r4)
213 lwz r28, VCPU_GPR(r28)(r4)
214 lwz r29, VCPU_GPR(r29)(r4)
215 lwz r30, VCPU_GPR(r30)(r4)
216 lwz r31, VCPU_GPR(r31)(r4)
217..skip_nv_load:
218
219 /* Should we return to the guest? */
220 andi. r5, r3, RESUME_FLAG_HOST
221 beq lightweight_exit
222
223 srawi r3, r3, 2 /* Shift -ERR back down. */
224
225heavyweight_exit:
226 /* Not returning to guest. */
227
228 /* We already saved guest volatile register state; now save the
229 * non-volatiles. */
230 stw r15, VCPU_GPR(r15)(r4)
231 stw r16, VCPU_GPR(r16)(r4)
232 stw r17, VCPU_GPR(r17)(r4)
233 stw r18, VCPU_GPR(r18)(r4)
234 stw r19, VCPU_GPR(r19)(r4)
235 stw r20, VCPU_GPR(r20)(r4)
236 stw r21, VCPU_GPR(r21)(r4)
237 stw r22, VCPU_GPR(r22)(r4)
238 stw r23, VCPU_GPR(r23)(r4)
239 stw r24, VCPU_GPR(r24)(r4)
240 stw r25, VCPU_GPR(r25)(r4)
241 stw r26, VCPU_GPR(r26)(r4)
242 stw r27, VCPU_GPR(r27)(r4)
243 stw r28, VCPU_GPR(r28)(r4)
244 stw r29, VCPU_GPR(r29)(r4)
245 stw r30, VCPU_GPR(r30)(r4)
246 stw r31, VCPU_GPR(r31)(r4)
247
248 /* Load host non-volatile register state from host stack. */
249 lwz r14, HOST_NV_GPR(r14)(r1)
250 lwz r15, HOST_NV_GPR(r15)(r1)
251 lwz r16, HOST_NV_GPR(r16)(r1)
252 lwz r17, HOST_NV_GPR(r17)(r1)
253 lwz r18, HOST_NV_GPR(r18)(r1)
254 lwz r19, HOST_NV_GPR(r19)(r1)
255 lwz r20, HOST_NV_GPR(r20)(r1)
256 lwz r21, HOST_NV_GPR(r21)(r1)
257 lwz r22, HOST_NV_GPR(r22)(r1)
258 lwz r23, HOST_NV_GPR(r23)(r1)
259 lwz r24, HOST_NV_GPR(r24)(r1)
260 lwz r25, HOST_NV_GPR(r25)(r1)
261 lwz r26, HOST_NV_GPR(r26)(r1)
262 lwz r27, HOST_NV_GPR(r27)(r1)
263 lwz r28, HOST_NV_GPR(r28)(r1)
264 lwz r29, HOST_NV_GPR(r29)(r1)
265 lwz r30, HOST_NV_GPR(r30)(r1)
266 lwz r31, HOST_NV_GPR(r31)(r1)
267
268 /* Return to kvm_vcpu_run(). */
269 lwz r4, HOST_STACK_LR(r1)
270 addi r1, r1, HOST_STACK_SIZE
271 mtlr r4
272 /* r3 still contains the return code from kvmppc_handle_exit(). */
273 blr
274
275
276/* Registers:
277 * r3: kvm_run pointer
278 * r4: vcpu pointer
279 */
280_GLOBAL(__kvmppc_vcpu_run)
281 stwu r1, -HOST_STACK_SIZE(r1)
282 stw r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */
283
284 /* Save host state to stack. */
285 stw r3, HOST_RUN(r1)
286 mflr r3
287 stw r3, HOST_STACK_LR(r1)
288
289 /* Save host non-volatile register state to stack. */
290 stw r14, HOST_NV_GPR(r14)(r1)
291 stw r15, HOST_NV_GPR(r15)(r1)
292 stw r16, HOST_NV_GPR(r16)(r1)
293 stw r17, HOST_NV_GPR(r17)(r1)
294 stw r18, HOST_NV_GPR(r18)(r1)
295 stw r19, HOST_NV_GPR(r19)(r1)
296 stw r20, HOST_NV_GPR(r20)(r1)
297 stw r21, HOST_NV_GPR(r21)(r1)
298 stw r22, HOST_NV_GPR(r22)(r1)
299 stw r23, HOST_NV_GPR(r23)(r1)
300 stw r24, HOST_NV_GPR(r24)(r1)
301 stw r25, HOST_NV_GPR(r25)(r1)
302 stw r26, HOST_NV_GPR(r26)(r1)
303 stw r27, HOST_NV_GPR(r27)(r1)
304 stw r28, HOST_NV_GPR(r28)(r1)
305 stw r29, HOST_NV_GPR(r29)(r1)
306 stw r30, HOST_NV_GPR(r30)(r1)
307 stw r31, HOST_NV_GPR(r31)(r1)
308
309 /* Load guest non-volatiles. */
310 lwz r14, VCPU_GPR(r14)(r4)
311 lwz r15, VCPU_GPR(r15)(r4)
312 lwz r16, VCPU_GPR(r16)(r4)
313 lwz r17, VCPU_GPR(r17)(r4)
314 lwz r18, VCPU_GPR(r18)(r4)
315 lwz r19, VCPU_GPR(r19)(r4)
316 lwz r20, VCPU_GPR(r20)(r4)
317 lwz r21, VCPU_GPR(r21)(r4)
318 lwz r22, VCPU_GPR(r22)(r4)
319 lwz r23, VCPU_GPR(r23)(r4)
320 lwz r24, VCPU_GPR(r24)(r4)
321 lwz r25, VCPU_GPR(r25)(r4)
322 lwz r26, VCPU_GPR(r26)(r4)
323 lwz r27, VCPU_GPR(r27)(r4)
324 lwz r28, VCPU_GPR(r28)(r4)
325 lwz r29, VCPU_GPR(r29)(r4)
326 lwz r30, VCPU_GPR(r30)(r4)
327 lwz r31, VCPU_GPR(r31)(r4)
328
329lightweight_exit:
330 stw r2, HOST_R2(r1)
331
332 mfspr r3, SPRN_PID
333 stw r3, VCPU_HOST_PID(r4)
334 lwz r3, VCPU_PID(r4)
335 mtspr SPRN_PID, r3
336
337 /* Prevent all TLB updates. */
338 mfmsr r5
339 lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h
340 ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l
341 andc r6, r5, r6
342 mtmsr r6
343
344 /* Save the host's non-pinned TLB mappings, and load the guest mappings
345 * over them. Leave the host's "pinned" kernel mappings in place. */
346 /* XXX optimization: use generation count to avoid swapping unmodified
347 * entries. */
348 mfspr r10, SPRN_MMUCR /* Save host MMUCR. */
349 lis r8, tlb_44x_hwater@ha
350 lwz r8, tlb_44x_hwater@l(r8)
351 addi r3, r4, VCPU_HOST_TLB - 4
352 addi r9, r4, VCPU_SHADOW_TLB - 4
353 li r6, 0
3541:
355 /* Save host entry. */
356 tlbre r7, r6, PPC44x_TLB_PAGEID
357 mfspr r5, SPRN_MMUCR
358 stwu r5, 4(r3)
359 stwu r7, 4(r3)
360 tlbre r7, r6, PPC44x_TLB_XLAT
361 stwu r7, 4(r3)
362 tlbre r7, r6, PPC44x_TLB_ATTRIB
363 stwu r7, 4(r3)
364 /* Load guest entry. */
365 lwzu r7, 4(r9)
366 mtspr SPRN_MMUCR, r7
367 lwzu r7, 4(r9)
368 tlbwe r7, r6, PPC44x_TLB_PAGEID
369 lwzu r7, 4(r9)
370 tlbwe r7, r6, PPC44x_TLB_XLAT
371 lwzu r7, 4(r9)
372 tlbwe r7, r6, PPC44x_TLB_ATTRIB
373 /* Increment index. */
374 addi r6, r6, 1
375 cmpw r6, r8
376 blt 1b
377 mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */
378
379 iccci 0, 0 /* XXX hack */
380
381 /* Load some guest volatiles. */
382 lwz r0, VCPU_GPR(r0)(r4)
383 lwz r2, VCPU_GPR(r2)(r4)
384 lwz r9, VCPU_GPR(r9)(r4)
385 lwz r10, VCPU_GPR(r10)(r4)
386 lwz r11, VCPU_GPR(r11)(r4)
387 lwz r12, VCPU_GPR(r12)(r4)
388 lwz r13, VCPU_GPR(r13)(r4)
389 lwz r3, VCPU_LR(r4)
390 mtlr r3
391 lwz r3, VCPU_XER(r4)
392 mtxer r3
393
394 /* Switch the IVPR. XXX If we take a TLB miss after this we're screwed,
395 * so how do we make sure vcpu won't fault? */
396 lis r8, kvmppc_booke_handlers@ha
397 lwz r8, kvmppc_booke_handlers@l(r8)
398 mtspr SPRN_IVPR, r8
399
400 /* Save vcpu pointer for the exception handlers. */
401 mtspr SPRN_SPRG1, r4
402
403 /* Can't switch the stack pointer until after IVPR is switched,
404 * because host interrupt handlers would get confused. */
405 lwz r1, VCPU_GPR(r1)(r4)
406
407 /* XXX handle USPRG0 */
408 /* Host interrupt handlers may have clobbered these guest-readable
409 * SPRGs, so we need to reload them here with the guest's values. */
410 lwz r3, VCPU_SPRG4(r4)
411 mtspr SPRN_SPRG4, r3
412 lwz r3, VCPU_SPRG5(r4)
413 mtspr SPRN_SPRG5, r3
414 lwz r3, VCPU_SPRG6(r4)
415 mtspr SPRN_SPRG6, r3
416 lwz r3, VCPU_SPRG7(r4)
417 mtspr SPRN_SPRG7, r3
418
419 /* Finish loading guest volatiles and jump to guest. */
420 lwz r3, VCPU_CTR(r4)
421 mtctr r3
422 lwz r3, VCPU_CR(r4)
423 mtcr r3
424 lwz r5, VCPU_GPR(r5)(r4)
425 lwz r6, VCPU_GPR(r6)(r4)
426 lwz r7, VCPU_GPR(r7)(r4)
427 lwz r8, VCPU_GPR(r8)(r4)
428 lwz r3, VCPU_PC(r4)
429 mtsrr0 r3
430 lwz r3, VCPU_MSR(r4)
431 oris r3, r3, KVMPPC_MSR_MASK@h
432 ori r3, r3, KVMPPC_MSR_MASK@l
433 mtsrr1 r3
434 lwz r3, VCPU_GPR(r3)(r4)
435 lwz r4, VCPU_GPR(r4)(r4)
436 rfi
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
new file mode 100644
index 000000000000..a03fe0c80698
--- /dev/null
+++ b/arch/powerpc/kvm/emulate.c
@@ -0,0 +1,760 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2007
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#include <linux/jiffies.h>
21#include <linux/timer.h>
22#include <linux/types.h>
23#include <linux/string.h>
24#include <linux/kvm_host.h>
25
26#include <asm/dcr.h>
27#include <asm/dcr-regs.h>
28#include <asm/time.h>
29#include <asm/byteorder.h>
30#include <asm/kvm_ppc.h>
31
32#include "44x_tlb.h"
33
34/* Instruction decoding */
35static inline unsigned int get_op(u32 inst)
36{
37 return inst >> 26;
38}
39
40static inline unsigned int get_xop(u32 inst)
41{
42 return (inst >> 1) & 0x3ff;
43}
44
45static inline unsigned int get_sprn(u32 inst)
46{
47 return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
48}
49
50static inline unsigned int get_dcrn(u32 inst)
51{
52 return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
53}
54
55static inline unsigned int get_rt(u32 inst)
56{
57 return (inst >> 21) & 0x1f;
58}
59
60static inline unsigned int get_rs(u32 inst)
61{
62 return (inst >> 21) & 0x1f;
63}
64
65static inline unsigned int get_ra(u32 inst)
66{
67 return (inst >> 16) & 0x1f;
68}
69
70static inline unsigned int get_rb(u32 inst)
71{
72 return (inst >> 11) & 0x1f;
73}
74
75static inline unsigned int get_rc(u32 inst)
76{
77 return inst & 0x1;
78}
79
80static inline unsigned int get_ws(u32 inst)
81{
82 return (inst >> 11) & 0x1f;
83}
84
85static inline unsigned int get_d(u32 inst)
86{
87 return inst & 0xffff;
88}
89
90static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
91 const struct tlbe *tlbe)
92{
93 gpa_t gpa;
94
95 if (!get_tlb_v(tlbe))
96 return 0;
97
98 /* Does it match current guest AS? */
99 /* XXX what about IS != DS? */
100 if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
101 return 0;
102
103 gpa = get_tlb_raddr(tlbe);
104 if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
105 /* Mapping is not for RAM. */
106 return 0;
107
108 return 1;
109}
110
111static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst)
112{
113 u64 eaddr;
114 u64 raddr;
115 u64 asid;
116 u32 flags;
117 struct tlbe *tlbe;
118 unsigned int ra;
119 unsigned int rs;
120 unsigned int ws;
121 unsigned int index;
122
123 ra = get_ra(inst);
124 rs = get_rs(inst);
125 ws = get_ws(inst);
126
127 index = vcpu->arch.gpr[ra];
128 if (index > PPC44x_TLB_SIZE) {
129 printk("%s: index %d\n", __func__, index);
130 kvmppc_dump_vcpu(vcpu);
131 return EMULATE_FAIL;
132 }
133
134 tlbe = &vcpu->arch.guest_tlb[index];
135
136 /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
137 if (tlbe->word0 & PPC44x_TLB_VALID) {
138 eaddr = get_tlb_eaddr(tlbe);
139 asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
140 kvmppc_mmu_invalidate(vcpu, eaddr, asid);
141 }
142
143 switch (ws) {
144 case PPC44x_TLB_PAGEID:
145 tlbe->tid = vcpu->arch.mmucr & 0xff;
146 tlbe->word0 = vcpu->arch.gpr[rs];
147 break;
148
149 case PPC44x_TLB_XLAT:
150 tlbe->word1 = vcpu->arch.gpr[rs];
151 break;
152
153 case PPC44x_TLB_ATTRIB:
154 tlbe->word2 = vcpu->arch.gpr[rs];
155 break;
156
157 default:
158 return EMULATE_FAIL;
159 }
160
161 if (tlbe_is_host_safe(vcpu, tlbe)) {
162 eaddr = get_tlb_eaddr(tlbe);
163 raddr = get_tlb_raddr(tlbe);
164 asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
165 flags = tlbe->word2 & 0xffff;
166
167 /* Create a 4KB mapping on the host. If the guest wanted a
168 * large page, only the first 4KB is mapped here and the rest
169 * are mapped on the fly. */
170 kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
171 }
172
173 return EMULATE_DONE;
174}
175
176static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
177{
178 if (vcpu->arch.tcr & TCR_DIE) {
179 /* The decrementer ticks at the same rate as the timebase, so
180 * that's how we convert the guest DEC value to the number of
181 * host ticks. */
182 unsigned long nr_jiffies;
183
184 nr_jiffies = vcpu->arch.dec / tb_ticks_per_jiffy;
185 mod_timer(&vcpu->arch.dec_timer,
186 get_jiffies_64() + nr_jiffies);
187 } else {
188 del_timer(&vcpu->arch.dec_timer);
189 }
190}
191
192static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
193{
194 vcpu->arch.pc = vcpu->arch.srr0;
195 kvmppc_set_msr(vcpu, vcpu->arch.srr1);
196}
197
198/* XXX to do:
199 * lhax
200 * lhaux
201 * lswx
202 * lswi
203 * stswx
204 * stswi
205 * lha
206 * lhau
207 * lmw
208 * stmw
209 *
210 * XXX is_bigendian should depend on MMU mapping or MSR[LE]
211 */
212int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
213{
214 u32 inst = vcpu->arch.last_inst;
215 u32 ea;
216 int ra;
217 int rb;
218 int rc;
219 int rs;
220 int rt;
221 int sprn;
222 int dcrn;
223 enum emulation_result emulated = EMULATE_DONE;
224 int advance = 1;
225
226 switch (get_op(inst)) {
227 case 3: /* trap */
228 printk("trap!\n");
229 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
230 advance = 0;
231 break;
232
233 case 19:
234 switch (get_xop(inst)) {
235 case 50: /* rfi */
236 kvmppc_emul_rfi(vcpu);
237 advance = 0;
238 break;
239
240 default:
241 emulated = EMULATE_FAIL;
242 break;
243 }
244 break;
245
246 case 31:
247 switch (get_xop(inst)) {
248
249 case 83: /* mfmsr */
250 rt = get_rt(inst);
251 vcpu->arch.gpr[rt] = vcpu->arch.msr;
252 break;
253
254 case 87: /* lbzx */
255 rt = get_rt(inst);
256 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
257 break;
258
259 case 131: /* wrtee */
260 rs = get_rs(inst);
261 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
262 | (vcpu->arch.gpr[rs] & MSR_EE);
263 break;
264
265 case 146: /* mtmsr */
266 rs = get_rs(inst);
267 kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
268 break;
269
270 case 163: /* wrteei */
271 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
272 | (inst & MSR_EE);
273 break;
274
275 case 215: /* stbx */
276 rs = get_rs(inst);
277 emulated = kvmppc_handle_store(run, vcpu,
278 vcpu->arch.gpr[rs],
279 1, 1);
280 break;
281
282 case 247: /* stbux */
283 rs = get_rs(inst);
284 ra = get_ra(inst);
285 rb = get_rb(inst);
286
287 ea = vcpu->arch.gpr[rb];
288 if (ra)
289 ea += vcpu->arch.gpr[ra];
290
291 emulated = kvmppc_handle_store(run, vcpu,
292 vcpu->arch.gpr[rs],
293 1, 1);
294 vcpu->arch.gpr[rs] = ea;
295 break;
296
297 case 279: /* lhzx */
298 rt = get_rt(inst);
299 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
300 break;
301
302 case 311: /* lhzux */
303 rt = get_rt(inst);
304 ra = get_ra(inst);
305 rb = get_rb(inst);
306
307 ea = vcpu->arch.gpr[rb];
308 if (ra)
309 ea += vcpu->arch.gpr[ra];
310
311 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
312 vcpu->arch.gpr[ra] = ea;
313 break;
314
315 case 323: /* mfdcr */
316 dcrn = get_dcrn(inst);
317 rt = get_rt(inst);
318
319 /* The guest may access CPR0 registers to determine the timebase
320 * frequency, and it must know the real host frequency because it
321 * can directly access the timebase registers.
322 *
323 * It would be possible to emulate those accesses in userspace,
324 * but userspace can really only figure out the end frequency.
325 * We could decompose that into the factors that compute it, but
326 * that's tricky math, and it's easier to just report the real
327 * CPR0 values.
328 */
329 switch (dcrn) {
330 case DCRN_CPR0_CONFIG_ADDR:
331 vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
332 break;
333 case DCRN_CPR0_CONFIG_DATA:
334 local_irq_disable();
335 mtdcr(DCRN_CPR0_CONFIG_ADDR,
336 vcpu->arch.cpr0_cfgaddr);
337 vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
338 local_irq_enable();
339 break;
340 default:
341 run->dcr.dcrn = dcrn;
342 run->dcr.data = 0;
343 run->dcr.is_write = 0;
344 vcpu->arch.io_gpr = rt;
345 vcpu->arch.dcr_needed = 1;
346 emulated = EMULATE_DO_DCR;
347 }
348
349 break;
350
351 case 339: /* mfspr */
352 sprn = get_sprn(inst);
353 rt = get_rt(inst);
354
355 switch (sprn) {
356 case SPRN_SRR0:
357 vcpu->arch.gpr[rt] = vcpu->arch.srr0; break;
358 case SPRN_SRR1:
359 vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
360 case SPRN_MMUCR:
361 vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
362 case SPRN_PID:
363 vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
364 case SPRN_IVPR:
365 vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
366 case SPRN_CCR0:
367 vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
368 case SPRN_CCR1:
369 vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
370 case SPRN_PVR:
371 vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
372 case SPRN_DEAR:
373 vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
374 case SPRN_ESR:
375 vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
376 case SPRN_DBCR0:
377 vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
378 case SPRN_DBCR1:
379 vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
380
381 /* Note: mftb and TBRL/TBWL are user-accessible, so
382 * the guest can always access the real TB anyways.
383 * In fact, we probably will never see these traps. */
384 case SPRN_TBWL:
385 vcpu->arch.gpr[rt] = mftbl(); break;
386 case SPRN_TBWU:
387 vcpu->arch.gpr[rt] = mftbu(); break;
388
389 case SPRN_SPRG0:
390 vcpu->arch.gpr[rt] = vcpu->arch.sprg0; break;
391 case SPRN_SPRG1:
392 vcpu->arch.gpr[rt] = vcpu->arch.sprg1; break;
393 case SPRN_SPRG2:
394 vcpu->arch.gpr[rt] = vcpu->arch.sprg2; break;
395 case SPRN_SPRG3:
396 vcpu->arch.gpr[rt] = vcpu->arch.sprg3; break;
397 /* Note: SPRG4-7 are user-readable, so we don't get
398 * a trap. */
399
400 case SPRN_IVOR0:
401 vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break;
402 case SPRN_IVOR1:
403 vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break;
404 case SPRN_IVOR2:
405 vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break;
406 case SPRN_IVOR3:
407 vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break;
408 case SPRN_IVOR4:
409 vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break;
410 case SPRN_IVOR5:
411 vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break;
412 case SPRN_IVOR6:
413 vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break;
414 case SPRN_IVOR7:
415 vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break;
416 case SPRN_IVOR8:
417 vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break;
418 case SPRN_IVOR9:
419 vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break;
420 case SPRN_IVOR10:
421 vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break;
422 case SPRN_IVOR11:
423 vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break;
424 case SPRN_IVOR12:
425 vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break;
426 case SPRN_IVOR13:
427 vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break;
428 case SPRN_IVOR14:
429 vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break;
430 case SPRN_IVOR15:
431 vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break;
432
433 default:
434 printk("mfspr: unknown spr %x\n", sprn);
435 vcpu->arch.gpr[rt] = 0;
436 break;
437 }
438 break;
439
440 case 407: /* sthx */
441 rs = get_rs(inst);
442 ra = get_ra(inst);
443 rb = get_rb(inst);
444
445 emulated = kvmppc_handle_store(run, vcpu,
446 vcpu->arch.gpr[rs],
447 2, 1);
448 break;
449
450 case 439: /* sthux */
451 rs = get_rs(inst);
452 ra = get_ra(inst);
453 rb = get_rb(inst);
454
455 ea = vcpu->arch.gpr[rb];
456 if (ra)
457 ea += vcpu->arch.gpr[ra];
458
459 emulated = kvmppc_handle_store(run, vcpu,
460 vcpu->arch.gpr[rs],
461 2, 1);
462 vcpu->arch.gpr[ra] = ea;
463 break;
464
465 case 451: /* mtdcr */
466 dcrn = get_dcrn(inst);
467 rs = get_rs(inst);
468
469 /* emulate some access in kernel */
470 switch (dcrn) {
471 case DCRN_CPR0_CONFIG_ADDR:
472 vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
473 break;
474 default:
475 run->dcr.dcrn = dcrn;
476 run->dcr.data = vcpu->arch.gpr[rs];
477 run->dcr.is_write = 1;
478 vcpu->arch.dcr_needed = 1;
479 emulated = EMULATE_DO_DCR;
480 }
481
482 break;
483
484 case 467: /* mtspr */
485 sprn = get_sprn(inst);
486 rs = get_rs(inst);
487 switch (sprn) {
488 case SPRN_SRR0:
489 vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break;
490 case SPRN_SRR1:
491 vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break;
492 case SPRN_MMUCR:
493 vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
494 case SPRN_PID:
495 vcpu->arch.pid = vcpu->arch.gpr[rs]; break;
496 case SPRN_CCR0:
497 vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
498 case SPRN_CCR1:
499 vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
500 case SPRN_DEAR:
501 vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
502 case SPRN_ESR:
503 vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
504 case SPRN_DBCR0:
505 vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
506 case SPRN_DBCR1:
507 vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
508
509 /* XXX We need to context-switch the timebase for
510 * watchdog and FIT. */
511 case SPRN_TBWL: break;
512 case SPRN_TBWU: break;
513
514 case SPRN_DEC:
515 vcpu->arch.dec = vcpu->arch.gpr[rs];
516 kvmppc_emulate_dec(vcpu);
517 break;
518
519 case SPRN_TSR:
520 vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
521
522 case SPRN_TCR:
523 vcpu->arch.tcr = vcpu->arch.gpr[rs];
524 kvmppc_emulate_dec(vcpu);
525 break;
526
527 case SPRN_SPRG0:
528 vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break;
529 case SPRN_SPRG1:
530 vcpu->arch.sprg1 = vcpu->arch.gpr[rs]; break;
531 case SPRN_SPRG2:
532 vcpu->arch.sprg2 = vcpu->arch.gpr[rs]; break;
533 case SPRN_SPRG3:
534 vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break;
535
536 /* Note: SPRG4-7 are user-readable. These values are
537 * loaded into the real SPRGs when resuming the
538 * guest. */
539 case SPRN_SPRG4:
540 vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
541 case SPRN_SPRG5:
542 vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
543 case SPRN_SPRG6:
544 vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
545 case SPRN_SPRG7:
546 vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
547
548 case SPRN_IVPR:
549 vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break;
550 case SPRN_IVOR0:
551 vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break;
552 case SPRN_IVOR1:
553 vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break;
554 case SPRN_IVOR2:
555 vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break;
556 case SPRN_IVOR3:
557 vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break;
558 case SPRN_IVOR4:
559 vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break;
560 case SPRN_IVOR5:
561 vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break;
562 case SPRN_IVOR6:
563 vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break;
564 case SPRN_IVOR7:
565 vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break;
566 case SPRN_IVOR8:
567 vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break;
568 case SPRN_IVOR9:
569 vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break;
570 case SPRN_IVOR10:
571 vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break;
572 case SPRN_IVOR11:
573 vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break;
574 case SPRN_IVOR12:
575 vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break;
576 case SPRN_IVOR13:
577 vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break;
578 case SPRN_IVOR14:
579 vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break;
580 case SPRN_IVOR15:
581 vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break;
582
583 default:
584 printk("mtspr: unknown spr %x\n", sprn);
585 emulated = EMULATE_FAIL;
586 break;
587 }
588 break;
589
590 case 470: /* dcbi */
591 /* Do nothing. The guest is performing dcbi because
592 * hardware DMA is not snooped by the dcache, but
593 * emulated DMA either goes through the dcache as
594 * normal writes, or the host kernel has handled dcache
595 * coherence. */
596 break;
597
598 case 534: /* lwbrx */
599 rt = get_rt(inst);
600 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
601 break;
602
603 case 566: /* tlbsync */
604 break;
605
606 case 662: /* stwbrx */
607 rs = get_rs(inst);
608 ra = get_ra(inst);
609 rb = get_rb(inst);
610
611 emulated = kvmppc_handle_store(run, vcpu,
612 vcpu->arch.gpr[rs],
613 4, 0);
614 break;
615
616 case 978: /* tlbwe */
617 emulated = kvmppc_emul_tlbwe(vcpu, inst);
618 break;
619
620 case 914: { /* tlbsx */
621 int index;
622 unsigned int as = get_mmucr_sts(vcpu);
623 unsigned int pid = get_mmucr_stid(vcpu);
624
625 rt = get_rt(inst);
626 ra = get_ra(inst);
627 rb = get_rb(inst);
628 rc = get_rc(inst);
629
630 ea = vcpu->arch.gpr[rb];
631 if (ra)
632 ea += vcpu->arch.gpr[ra];
633
634 index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
635 if (rc) {
636 if (index < 0)
637 vcpu->arch.cr &= ~0x20000000;
638 else
639 vcpu->arch.cr |= 0x20000000;
640 }
641 vcpu->arch.gpr[rt] = index;
642
643 }
644 break;
645
646 case 790: /* lhbrx */
647 rt = get_rt(inst);
648 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
649 break;
650
651 case 918: /* sthbrx */
652 rs = get_rs(inst);
653 ra = get_ra(inst);
654 rb = get_rb(inst);
655
656 emulated = kvmppc_handle_store(run, vcpu,
657 vcpu->arch.gpr[rs],
658 2, 0);
659 break;
660
661 case 966: /* iccci */
662 break;
663
664 default:
665 printk("unknown: op %d xop %d\n", get_op(inst),
666 get_xop(inst));
667 emulated = EMULATE_FAIL;
668 break;
669 }
670 break;
671
672 case 32: /* lwz */
673 rt = get_rt(inst);
674 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
675 break;
676
677 case 33: /* lwzu */
678 ra = get_ra(inst);
679 rt = get_rt(inst);
680 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
681 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
682 break;
683
684 case 34: /* lbz */
685 rt = get_rt(inst);
686 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
687 break;
688
689 case 35: /* lbzu */
690 ra = get_ra(inst);
691 rt = get_rt(inst);
692 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
693 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
694 break;
695
696 case 36: /* stw */
697 rs = get_rs(inst);
698 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
699 4, 1);
700 break;
701
702 case 37: /* stwu */
703 ra = get_ra(inst);
704 rs = get_rs(inst);
705 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
706 4, 1);
707 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
708 break;
709
710 case 38: /* stb */
711 rs = get_rs(inst);
712 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
713 1, 1);
714 break;
715
716 case 39: /* stbu */
717 ra = get_ra(inst);
718 rs = get_rs(inst);
719 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
720 1, 1);
721 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
722 break;
723
724 case 40: /* lhz */
725 rt = get_rt(inst);
726 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
727 break;
728
729 case 41: /* lhzu */
730 ra = get_ra(inst);
731 rt = get_rt(inst);
732 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
733 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
734 break;
735
736 case 44: /* sth */
737 rs = get_rs(inst);
738 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
739 2, 1);
740 break;
741
742 case 45: /* sthu */
743 ra = get_ra(inst);
744 rs = get_rs(inst);
745 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
746 2, 1);
747 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
748 break;
749
750 default:
751 printk("unknown op %d\n", get_op(inst));
752 emulated = EMULATE_FAIL;
753 break;
754 }
755
756 if (advance)
757 vcpu->arch.pc += 4; /* Advance past emulated instruction. */
758
759 return emulated;
760}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
new file mode 100644
index 000000000000..bad40bd2d3ac
--- /dev/null
+++ b/arch/powerpc/kvm/powerpc.c
@@ -0,0 +1,436 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2007
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
19 */
20
21#include <linux/errno.h>
22#include <linux/err.h>
23#include <linux/kvm_host.h>
24#include <linux/module.h>
25#include <linux/vmalloc.h>
26#include <linux/fs.h>
27#include <asm/cputable.h>
28#include <asm/uaccess.h>
29#include <asm/kvm_ppc.h>
30
31
32gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
33{
34 return gfn;
35}
36
37int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
38{
39 /* XXX implement me */
40 return 0;
41}
42
43int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
44{
45 return 1;
46}
47
48
49int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
50{
51 enum emulation_result er;
52 int r;
53
54 er = kvmppc_emulate_instruction(run, vcpu);
55 switch (er) {
56 case EMULATE_DONE:
57 /* Future optimization: only reload non-volatiles if they were
58 * actually modified. */
59 r = RESUME_GUEST_NV;
60 break;
61 case EMULATE_DO_MMIO:
62 run->exit_reason = KVM_EXIT_MMIO;
63 /* We must reload nonvolatiles because "update" load/store
64 * instructions modify register state. */
65 /* Future optimization: only reload non-volatiles if they were
66 * actually modified. */
67 r = RESUME_HOST_NV;
68 break;
69 case EMULATE_FAIL:
70 /* XXX Deliver Program interrupt to guest. */
71 printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__,
72 vcpu->arch.last_inst);
73 r = RESUME_HOST;
74 break;
75 default:
76 BUG();
77 }
78
79 return r;
80}
81
82void kvm_arch_hardware_enable(void *garbage)
83{
84}
85
86void kvm_arch_hardware_disable(void *garbage)
87{
88}
89
90int kvm_arch_hardware_setup(void)
91{
92 return 0;
93}
94
95void kvm_arch_hardware_unsetup(void)
96{
97}
98
99void kvm_arch_check_processor_compat(void *rtn)
100{
101 int r;
102
103 if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
104 r = 0;
105 else
106 r = -ENOTSUPP;
107
108 *(int *)rtn = r;
109}
110
111struct kvm *kvm_arch_create_vm(void)
112{
113 struct kvm *kvm;
114
115 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
116 if (!kvm)
117 return ERR_PTR(-ENOMEM);
118
119 return kvm;
120}
121
122static void kvmppc_free_vcpus(struct kvm *kvm)
123{
124 unsigned int i;
125
126 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
127 if (kvm->vcpus[i]) {
128 kvm_arch_vcpu_free(kvm->vcpus[i]);
129 kvm->vcpus[i] = NULL;
130 }
131 }
132}
133
134void kvm_arch_destroy_vm(struct kvm *kvm)
135{
136 kvmppc_free_vcpus(kvm);
137 kvm_free_physmem(kvm);
138 kfree(kvm);
139}
140
141int kvm_dev_ioctl_check_extension(long ext)
142{
143 int r;
144
145 switch (ext) {
146 case KVM_CAP_USER_MEMORY:
147 r = 1;
148 break;
149 default:
150 r = 0;
151 break;
152 }
153 return r;
154
155}
156
157long kvm_arch_dev_ioctl(struct file *filp,
158 unsigned int ioctl, unsigned long arg)
159{
160 return -EINVAL;
161}
162
163int kvm_arch_set_memory_region(struct kvm *kvm,
164 struct kvm_userspace_memory_region *mem,
165 struct kvm_memory_slot old,
166 int user_alloc)
167{
168 return 0;
169}
170
171struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
172{
173 struct kvm_vcpu *vcpu;
174 int err;
175
176 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
177 if (!vcpu) {
178 err = -ENOMEM;
179 goto out;
180 }
181
182 err = kvm_vcpu_init(vcpu, kvm, id);
183 if (err)
184 goto free_vcpu;
185
186 return vcpu;
187
188free_vcpu:
189 kmem_cache_free(kvm_vcpu_cache, vcpu);
190out:
191 return ERR_PTR(err);
192}
193
194void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
195{
196 kvm_vcpu_uninit(vcpu);
197 kmem_cache_free(kvm_vcpu_cache, vcpu);
198}
199
200void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
201{
202 kvm_arch_vcpu_free(vcpu);
203}
204
205int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
206{
207 unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER];
208
209 return test_bit(priority, &vcpu->arch.pending_exceptions);
210}
211
212static void kvmppc_decrementer_func(unsigned long data)
213{
214 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
215
216 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER);
217}
218
219int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
220{
221 setup_timer(&vcpu->arch.dec_timer, kvmppc_decrementer_func,
222 (unsigned long)vcpu);
223
224 return 0;
225}
226
227void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
228{
229}
230
231void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
232{
233}
234
235void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
236{
237}
238
239void decache_vcpus_on_cpu(int cpu)
240{
241}
242
243int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
244 struct kvm_debug_guest *dbg)
245{
246 return -ENOTSUPP;
247}
248
249static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
250 struct kvm_run *run)
251{
252 u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
253 *gpr = run->dcr.data;
254}
255
256static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
257 struct kvm_run *run)
258{
259 u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
260
261 if (run->mmio.len > sizeof(*gpr)) {
262 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
263 return;
264 }
265
266 if (vcpu->arch.mmio_is_bigendian) {
267 switch (run->mmio.len) {
268 case 4: *gpr = *(u32 *)run->mmio.data; break;
269 case 2: *gpr = *(u16 *)run->mmio.data; break;
270 case 1: *gpr = *(u8 *)run->mmio.data; break;
271 }
272 } else {
273 /* Convert BE data from userland back to LE. */
274 switch (run->mmio.len) {
275 case 4: *gpr = ld_le32((u32 *)run->mmio.data); break;
276 case 2: *gpr = ld_le16((u16 *)run->mmio.data); break;
277 case 1: *gpr = *(u8 *)run->mmio.data; break;
278 }
279 }
280}
281
282int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
283 unsigned int rt, unsigned int bytes, int is_bigendian)
284{
285 if (bytes > sizeof(run->mmio.data)) {
286 printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
287 run->mmio.len);
288 }
289
290 run->mmio.phys_addr = vcpu->arch.paddr_accessed;
291 run->mmio.len = bytes;
292 run->mmio.is_write = 0;
293
294 vcpu->arch.io_gpr = rt;
295 vcpu->arch.mmio_is_bigendian = is_bigendian;
296 vcpu->mmio_needed = 1;
297 vcpu->mmio_is_write = 0;
298
299 return EMULATE_DO_MMIO;
300}
301
302int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
303 u32 val, unsigned int bytes, int is_bigendian)
304{
305 void *data = run->mmio.data;
306
307 if (bytes > sizeof(run->mmio.data)) {
308 printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
309 run->mmio.len);
310 }
311
312 run->mmio.phys_addr = vcpu->arch.paddr_accessed;
313 run->mmio.len = bytes;
314 run->mmio.is_write = 1;
315 vcpu->mmio_needed = 1;
316 vcpu->mmio_is_write = 1;
317
318 /* Store the value at the lowest bytes in 'data'. */
319 if (is_bigendian) {
320 switch (bytes) {
321 case 4: *(u32 *)data = val; break;
322 case 2: *(u16 *)data = val; break;
323 case 1: *(u8 *)data = val; break;
324 }
325 } else {
326 /* Store LE value into 'data'. */
327 switch (bytes) {
328 case 4: st_le32(data, val); break;
329 case 2: st_le16(data, val); break;
330 case 1: *(u8 *)data = val; break;
331 }
332 }
333
334 return EMULATE_DO_MMIO;
335}
336
337int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
338{
339 int r;
340 sigset_t sigsaved;
341
342 if (vcpu->sigset_active)
343 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
344
345 if (vcpu->mmio_needed) {
346 if (!vcpu->mmio_is_write)
347 kvmppc_complete_mmio_load(vcpu, run);
348 vcpu->mmio_needed = 0;
349 } else if (vcpu->arch.dcr_needed) {
350 if (!vcpu->arch.dcr_is_write)
351 kvmppc_complete_dcr_load(vcpu, run);
352 vcpu->arch.dcr_needed = 0;
353 }
354
355 kvmppc_check_and_deliver_interrupts(vcpu);
356
357 local_irq_disable();
358 kvm_guest_enter();
359 r = __kvmppc_vcpu_run(run, vcpu);
360 kvm_guest_exit();
361 local_irq_enable();
362
363 if (vcpu->sigset_active)
364 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
365
366 return r;
367}
368
369int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
370{
371 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL);
372 return 0;
373}
374
375int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
376 struct kvm_mp_state *mp_state)
377{
378 return -EINVAL;
379}
380
381int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
382 struct kvm_mp_state *mp_state)
383{
384 return -EINVAL;
385}
386
387long kvm_arch_vcpu_ioctl(struct file *filp,
388 unsigned int ioctl, unsigned long arg)
389{
390 struct kvm_vcpu *vcpu = filp->private_data;
391 void __user *argp = (void __user *)arg;
392 long r;
393
394 switch (ioctl) {
395 case KVM_INTERRUPT: {
396 struct kvm_interrupt irq;
397 r = -EFAULT;
398 if (copy_from_user(&irq, argp, sizeof(irq)))
399 goto out;
400 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
401 break;
402 }
403 default:
404 r = -EINVAL;
405 }
406
407out:
408 return r;
409}
410
411int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
412{
413 return -ENOTSUPP;
414}
415
416long kvm_arch_vm_ioctl(struct file *filp,
417 unsigned int ioctl, unsigned long arg)
418{
419 long r;
420
421 switch (ioctl) {
422 default:
423 r = -EINVAL;
424 }
425
426 return r;
427}
428
429int kvm_arch_init(void *opaque)
430{
431 return 0;
432}
433
434void kvm_arch_exit(void)
435{
436}
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index f6a68e178fc5..8f5f02160ffc 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -62,6 +62,10 @@ config GENERIC_LOCKBREAK
62 default y 62 default y
63 depends on SMP && PREEMPT 63 depends on SMP && PREEMPT
64 64
65config PGSTE
66 bool
67 default y if KVM
68
65mainmenu "Linux Kernel Configuration" 69mainmenu "Linux Kernel Configuration"
66 70
67config S390 71config S390
@@ -69,6 +73,7 @@ config S390
69 select HAVE_OPROFILE 73 select HAVE_OPROFILE
70 select HAVE_KPROBES 74 select HAVE_KPROBES
71 select HAVE_KRETPROBES 75 select HAVE_KRETPROBES
76 select HAVE_KVM if 64BIT
72 77
73source "init/Kconfig" 78source "init/Kconfig"
74 79
@@ -515,6 +520,13 @@ config ZFCPDUMP
515 Select this option if you want to build an zfcpdump enabled kernel. 520 Select this option if you want to build an zfcpdump enabled kernel.
516 Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. 521 Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
517 522
523config S390_GUEST
524bool "s390 guest support (EXPERIMENTAL)"
525 depends on 64BIT && EXPERIMENTAL
526 select VIRTIO
527 select VIRTIO_RING
528 help
529 Select this option if you want to run the kernel under s390 linux
518endmenu 530endmenu
519 531
520source "net/Kconfig" 532source "net/Kconfig"
@@ -536,3 +548,5 @@ source "security/Kconfig"
536source "crypto/Kconfig" 548source "crypto/Kconfig"
537 549
538source "lib/Kconfig" 550source "lib/Kconfig"
551
552source "arch/s390/kvm/Kconfig"
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index f708be367b03..792a4e7743ce 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -87,7 +87,7 @@ LDFLAGS_vmlinux := -e start
87head-y := arch/s390/kernel/head.o arch/s390/kernel/init_task.o 87head-y := arch/s390/kernel/head.o arch/s390/kernel/init_task.o
88 88
89core-y += arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \ 89core-y += arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \
90 arch/s390/appldata/ arch/s390/hypfs/ 90 arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/
91libs-y += arch/s390/lib/ 91libs-y += arch/s390/lib/
92drivers-y += drivers/s390/ 92drivers-y += drivers/s390/
93drivers-$(CONFIG_MATHEMU) += arch/s390/math-emu/ 93drivers-$(CONFIG_MATHEMU) += arch/s390/math-emu/
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 540a67f979b6..68ec4083bf73 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -144,6 +144,10 @@ static noinline __init void detect_machine_type(void)
144 /* Running on a P/390 ? */ 144 /* Running on a P/390 ? */
145 if (cpuinfo->cpu_id.machine == 0x7490) 145 if (cpuinfo->cpu_id.machine == 0x7490)
146 machine_flags |= 4; 146 machine_flags |= 4;
147
148 /* Running under KVM ? */
149 if (cpuinfo->cpu_id.version == 0xfe)
150 machine_flags |= 64;
147} 151}
148 152
149#ifdef CONFIG_64BIT 153#ifdef CONFIG_64BIT
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 7141147e6b63..a9d18aafa5f4 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -316,7 +316,11 @@ static int __init early_parse_ipldelay(char *p)
316early_param("ipldelay", early_parse_ipldelay); 316early_param("ipldelay", early_parse_ipldelay);
317 317
318#ifdef CONFIG_S390_SWITCH_AMODE 318#ifdef CONFIG_S390_SWITCH_AMODE
319#ifdef CONFIG_PGSTE
320unsigned int switch_amode = 1;
321#else
319unsigned int switch_amode = 0; 322unsigned int switch_amode = 0;
323#endif
320EXPORT_SYMBOL_GPL(switch_amode); 324EXPORT_SYMBOL_GPL(switch_amode);
321 325
322static void set_amode_and_uaccess(unsigned long user_amode, 326static void set_amode_and_uaccess(unsigned long user_amode,
@@ -797,9 +801,13 @@ setup_arch(char **cmdline_p)
797 "This machine has an IEEE fpu\n" : 801 "This machine has an IEEE fpu\n" :
798 "This machine has no IEEE fpu\n"); 802 "This machine has no IEEE fpu\n");
799#else /* CONFIG_64BIT */ 803#else /* CONFIG_64BIT */
800 printk((MACHINE_IS_VM) ? 804 if (MACHINE_IS_VM)
801 "We are running under VM (64 bit mode)\n" : 805 printk("We are running under VM (64 bit mode)\n");
802 "We are running native (64 bit mode)\n"); 806 else if (MACHINE_IS_KVM) {
807 printk("We are running under KVM (64 bit mode)\n");
808 add_preferred_console("ttyS", 1, NULL);
809 } else
810 printk("We are running native (64 bit mode)\n");
803#endif /* CONFIG_64BIT */ 811#endif /* CONFIG_64BIT */
804 812
805 /* Save unparsed command line copy for /proc/cmdline */ 813 /* Save unparsed command line copy for /proc/cmdline */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index c5f05b3fb2c3..ca90ee3f930e 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -110,6 +110,7 @@ void account_system_vtime(struct task_struct *tsk)
110 S390_lowcore.steal_clock -= cputime << 12; 110 S390_lowcore.steal_clock -= cputime << 12;
111 account_system_time(tsk, 0, cputime); 111 account_system_time(tsk, 0, cputime);
112} 112}
113EXPORT_SYMBOL_GPL(account_system_vtime);
113 114
114static inline void set_vtimer(__u64 expires) 115static inline void set_vtimer(__u64 expires)
115{ 116{
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
new file mode 100644
index 000000000000..1761b74d639b
--- /dev/null
+++ b/arch/s390/kvm/Kconfig
@@ -0,0 +1,46 @@
1#
2# KVM configuration
3#
4config HAVE_KVM
5 bool
6
7menuconfig VIRTUALIZATION
8 bool "Virtualization"
9 default y
10 ---help---
11 Say Y here to get to see options for using your Linux host to run other
12 operating systems inside virtual machines (guests).
13 This option alone does not add any kernel code.
14
15 If you say N, all options in this submenu will be skipped and disabled.
16
17if VIRTUALIZATION
18
19config KVM
20 tristate "Kernel-based Virtual Machine (KVM) support"
21 depends on HAVE_KVM && EXPERIMENTAL
22 select PREEMPT_NOTIFIERS
23 select ANON_INODES
24 select S390_SWITCH_AMODE
25 select PREEMPT
26 ---help---
27 Support hosting paravirtualized guest machines using the SIE
28 virtualization capability on the mainframe. This should work
29 on any 64bit machine.
30
31 This module provides access to the hardware capabilities through
32 a character device node named /dev/kvm.
33
34 To compile this as a module, choose M here: the module
35 will be called kvm.
36
37 If unsure, say N.
38
39config KVM_TRACE
40 bool
41
42# OK, it's a little counter-intuitive to do this, but it puts it neatly under
43# the virtualization menu.
44source drivers/virtio/Kconfig
45
46endif # VIRTUALIZATION
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
new file mode 100644
index 000000000000..e5221ec0b8e3
--- /dev/null
+++ b/arch/s390/kvm/Makefile
@@ -0,0 +1,14 @@
1# Makefile for kernel virtual machines on s390
2#
3# Copyright IBM Corp. 2008
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License (version 2 only)
7# as published by the Free Software Foundation.
8
9common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o)
10
11EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm
12
13kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o sigp.o diag.o
14obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
new file mode 100644
index 000000000000..f639a152869f
--- /dev/null
+++ b/arch/s390/kvm/diag.c
@@ -0,0 +1,67 @@
1/*
2 * diag.c - handling diagnose instructions
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 */
13
14#include <linux/kvm.h>
15#include <linux/kvm_host.h>
16#include "kvm-s390.h"
17
18static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
19{
20 VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
21 vcpu->stat.diagnose_44++;
22 vcpu_put(vcpu);
23 schedule();
24 vcpu_load(vcpu);
25 return 0;
26}
27
28static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
29{
30 unsigned int reg = vcpu->arch.sie_block->ipa & 0xf;
31 unsigned long subcode = vcpu->arch.guest_gprs[reg] & 0xffff;
32
33 VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
34 switch (subcode) {
35 case 3:
36 vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
37 break;
38 case 4:
39 vcpu->run->s390_reset_flags = 0;
40 break;
41 default:
42 return -ENOTSUPP;
43 }
44
45 atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
46 vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
47 vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
48 vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
49 vcpu->run->exit_reason = KVM_EXIT_S390_RESET;
50 VCPU_EVENT(vcpu, 3, "requesting userspace resets %lx",
51 vcpu->run->s390_reset_flags);
52 return -EREMOTE;
53}
54
55int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
56{
57 int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
58
59 switch (code) {
60 case 0x44:
61 return __diag_time_slice_end(vcpu);
62 case 0x308:
63 return __diag_ipl_functions(vcpu);
64 default:
65 return -ENOTSUPP;
66 }
67}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
new file mode 100644
index 000000000000..4e0633c413f3
--- /dev/null
+++ b/arch/s390/kvm/gaccess.h
@@ -0,0 +1,274 @@
1/*
2 * gaccess.h - access guest memory
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 */
12
13#ifndef __KVM_S390_GACCESS_H
14#define __KVM_S390_GACCESS_H
15
16#include <linux/compiler.h>
17#include <linux/kvm_host.h>
18#include <asm/uaccess.h>
19
20static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu,
21 u64 guestaddr)
22{
23 u64 prefix = vcpu->arch.sie_block->prefix;
24 u64 origin = vcpu->kvm->arch.guest_origin;
25 u64 memsize = vcpu->kvm->arch.guest_memsize;
26
27 if (guestaddr < 2 * PAGE_SIZE)
28 guestaddr += prefix;
29 else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE))
30 guestaddr -= prefix;
31
32 if (guestaddr > memsize)
33 return (void __user __force *) ERR_PTR(-EFAULT);
34
35 guestaddr += origin;
36
37 return (void __user *) guestaddr;
38}
39
40static inline int get_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr,
41 u64 *result)
42{
43 void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
44
45 BUG_ON(guestaddr & 7);
46
47 if (IS_ERR((void __force *) uptr))
48 return PTR_ERR((void __force *) uptr);
49
50 return get_user(*result, (u64 __user *) uptr);
51}
52
53static inline int get_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr,
54 u32 *result)
55{
56 void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
57
58 BUG_ON(guestaddr & 3);
59
60 if (IS_ERR((void __force *) uptr))
61 return PTR_ERR((void __force *) uptr);
62
63 return get_user(*result, (u32 __user *) uptr);
64}
65
66static inline int get_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr,
67 u16 *result)
68{
69 void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
70
71 BUG_ON(guestaddr & 1);
72
73 if (IS_ERR(uptr))
74 return PTR_ERR(uptr);
75
76 return get_user(*result, (u16 __user *) uptr);
77}
78
79static inline int get_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr,
80 u8 *result)
81{
82 void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
83
84 if (IS_ERR((void __force *) uptr))
85 return PTR_ERR((void __force *) uptr);
86
87 return get_user(*result, (u8 __user *) uptr);
88}
89
90static inline int put_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr,
91 u64 value)
92{
93 void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
94
95 BUG_ON(guestaddr & 7);
96
97 if (IS_ERR((void __force *) uptr))
98 return PTR_ERR((void __force *) uptr);
99
100 return put_user(value, (u64 __user *) uptr);
101}
102
103static inline int put_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr,
104 u32 value)
105{
106 void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
107
108 BUG_ON(guestaddr & 3);
109
110 if (IS_ERR((void __force *) uptr))
111 return PTR_ERR((void __force *) uptr);
112
113 return put_user(value, (u32 __user *) uptr);
114}
115
116static inline int put_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr,
117 u16 value)
118{
119 void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
120
121 BUG_ON(guestaddr & 1);
122
123 if (IS_ERR((void __force *) uptr))
124 return PTR_ERR((void __force *) uptr);
125
126 return put_user(value, (u16 __user *) uptr);
127}
128
129static inline int put_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr,
130 u8 value)
131{
132 void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
133
134 if (IS_ERR((void __force *) uptr))
135 return PTR_ERR((void __force *) uptr);
136
137 return put_user(value, (u8 __user *) uptr);
138}
139
140
141static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, u64 guestdest,
142 const void *from, unsigned long n)
143{
144 int rc;
145 unsigned long i;
146 const u8 *data = from;
147
148 for (i = 0; i < n; i++) {
149 rc = put_guest_u8(vcpu, guestdest++, *(data++));
150 if (rc < 0)
151 return rc;
152 }
153 return 0;
154}
155
156static inline int copy_to_guest(struct kvm_vcpu *vcpu, u64 guestdest,
157 const void *from, unsigned long n)
158{
159 u64 prefix = vcpu->arch.sie_block->prefix;
160 u64 origin = vcpu->kvm->arch.guest_origin;
161 u64 memsize = vcpu->kvm->arch.guest_memsize;
162
163 if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE))
164 goto slowpath;
165
166 if ((guestdest < prefix) && (guestdest + n > prefix))
167 goto slowpath;
168
169 if ((guestdest < prefix + 2 * PAGE_SIZE)
170 && (guestdest + n > prefix + 2 * PAGE_SIZE))
171 goto slowpath;
172
173 if (guestdest < 2 * PAGE_SIZE)
174 guestdest += prefix;
175 else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE))
176 guestdest -= prefix;
177
178 if (guestdest + n > memsize)
179 return -EFAULT;
180
181 if (guestdest + n < guestdest)
182 return -EFAULT;
183
184 guestdest += origin;
185
186 return copy_to_user((void __user *) guestdest, from, n);
187slowpath:
188 return __copy_to_guest_slow(vcpu, guestdest, from, n);
189}
190
191static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to,
192 u64 guestsrc, unsigned long n)
193{
194 int rc;
195 unsigned long i;
196 u8 *data = to;
197
198 for (i = 0; i < n; i++) {
199 rc = get_guest_u8(vcpu, guestsrc++, data++);
200 if (rc < 0)
201 return rc;
202 }
203 return 0;
204}
205
206static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to,
207 u64 guestsrc, unsigned long n)
208{
209 u64 prefix = vcpu->arch.sie_block->prefix;
210 u64 origin = vcpu->kvm->arch.guest_origin;
211 u64 memsize = vcpu->kvm->arch.guest_memsize;
212
213 if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE))
214 goto slowpath;
215
216 if ((guestsrc < prefix) && (guestsrc + n > prefix))
217 goto slowpath;
218
219 if ((guestsrc < prefix + 2 * PAGE_SIZE)
220 && (guestsrc + n > prefix + 2 * PAGE_SIZE))
221 goto slowpath;
222
223 if (guestsrc < 2 * PAGE_SIZE)
224 guestsrc += prefix;
225 else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE))
226 guestsrc -= prefix;
227
228 if (guestsrc + n > memsize)
229 return -EFAULT;
230
231 if (guestsrc + n < guestsrc)
232 return -EFAULT;
233
234 guestsrc += origin;
235
236 return copy_from_user(to, (void __user *) guestsrc, n);
237slowpath:
238 return __copy_from_guest_slow(vcpu, to, guestsrc, n);
239}
240
241static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, u64 guestdest,
242 const void *from, unsigned long n)
243{
244 u64 origin = vcpu->kvm->arch.guest_origin;
245 u64 memsize = vcpu->kvm->arch.guest_memsize;
246
247 if (guestdest + n > memsize)
248 return -EFAULT;
249
250 if (guestdest + n < guestdest)
251 return -EFAULT;
252
253 guestdest += origin;
254
255 return copy_to_user((void __user *) guestdest, from, n);
256}
257
258static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to,
259 u64 guestsrc, unsigned long n)
260{
261 u64 origin = vcpu->kvm->arch.guest_origin;
262 u64 memsize = vcpu->kvm->arch.guest_memsize;
263
264 if (guestsrc + n > memsize)
265 return -EFAULT;
266
267 if (guestsrc + n < guestsrc)
268 return -EFAULT;
269
270 guestsrc += origin;
271
272 return copy_from_user(to, (void __user *) guestsrc, n);
273}
274#endif
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
new file mode 100644
index 000000000000..349581a26103
--- /dev/null
+++ b/arch/s390/kvm/intercept.c
@@ -0,0 +1,216 @@
1/*
2 * intercept.c - in-kernel handling for sie intercepts
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 */
13
14#include <linux/kvm_host.h>
15#include <linux/errno.h>
16#include <linux/pagemap.h>
17
18#include <asm/kvm_host.h>
19
20#include "kvm-s390.h"
21#include "gaccess.h"
22
23static int handle_lctg(struct kvm_vcpu *vcpu)
24{
25 int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
26 int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
27 int base2 = vcpu->arch.sie_block->ipb >> 28;
28 int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
29 ((vcpu->arch.sie_block->ipb & 0xff00) << 4);
30 u64 useraddr;
31 int reg, rc;
32
33 vcpu->stat.instruction_lctg++;
34 if ((vcpu->arch.sie_block->ipb & 0xff) != 0x2f)
35 return -ENOTSUPP;
36
37 useraddr = disp2;
38 if (base2)
39 useraddr += vcpu->arch.guest_gprs[base2];
40
41 reg = reg1;
42
43 VCPU_EVENT(vcpu, 5, "lctg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
44 disp2);
45
46 do {
47 rc = get_guest_u64(vcpu, useraddr,
48 &vcpu->arch.sie_block->gcr[reg]);
49 if (rc == -EFAULT) {
50 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
51 break;
52 }
53 useraddr += 8;
54 if (reg == reg3)
55 break;
56 reg = (reg + 1) % 16;
57 } while (1);
58 return 0;
59}
60
61static int handle_lctl(struct kvm_vcpu *vcpu)
62{
63 int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
64 int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
65 int base2 = vcpu->arch.sie_block->ipb >> 28;
66 int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
67 u64 useraddr;
68 u32 val = 0;
69 int reg, rc;
70
71 vcpu->stat.instruction_lctl++;
72
73 useraddr = disp2;
74 if (base2)
75 useraddr += vcpu->arch.guest_gprs[base2];
76
77 VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
78 disp2);
79
80 reg = reg1;
81 do {
82 rc = get_guest_u32(vcpu, useraddr, &val);
83 if (rc == -EFAULT) {
84 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
85 break;
86 }
87 vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
88 vcpu->arch.sie_block->gcr[reg] |= val;
89 useraddr += 4;
90 if (reg == reg3)
91 break;
92 reg = (reg + 1) % 16;
93 } while (1);
94 return 0;
95}
96
97static intercept_handler_t instruction_handlers[256] = {
98 [0x83] = kvm_s390_handle_diag,
99 [0xae] = kvm_s390_handle_sigp,
100 [0xb2] = kvm_s390_handle_priv,
101 [0xb7] = handle_lctl,
102 [0xeb] = handle_lctg,
103};
104
105static int handle_noop(struct kvm_vcpu *vcpu)
106{
107 switch (vcpu->arch.sie_block->icptcode) {
108 case 0x10:
109 vcpu->stat.exit_external_request++;
110 break;
111 case 0x14:
112 vcpu->stat.exit_external_interrupt++;
113 break;
114 default:
115 break; /* nothing */
116 }
117 return 0;
118}
119
120static int handle_stop(struct kvm_vcpu *vcpu)
121{
122 int rc;
123
124 vcpu->stat.exit_stop_request++;
125 atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
126 spin_lock_bh(&vcpu->arch.local_int.lock);
127 if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) {
128 vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP;
129 rc = __kvm_s390_vcpu_store_status(vcpu,
130 KVM_S390_STORE_STATUS_NOADDR);
131 if (rc >= 0)
132 rc = -ENOTSUPP;
133 }
134
135 if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
136 vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
137 VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
138 rc = -ENOTSUPP;
139 } else
140 rc = 0;
141 spin_unlock_bh(&vcpu->arch.local_int.lock);
142 return rc;
143}
144
145static int handle_validity(struct kvm_vcpu *vcpu)
146{
147 int viwhy = vcpu->arch.sie_block->ipb >> 16;
148 vcpu->stat.exit_validity++;
149 if (viwhy == 0x37) {
150 fault_in_pages_writeable((char __user *)
151 vcpu->kvm->arch.guest_origin +
152 vcpu->arch.sie_block->prefix,
153 PAGE_SIZE);
154 return 0;
155 }
156 VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
157 viwhy);
158 return -ENOTSUPP;
159}
160
161static int handle_instruction(struct kvm_vcpu *vcpu)
162{
163 intercept_handler_t handler;
164
165 vcpu->stat.exit_instruction++;
166 handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8];
167 if (handler)
168 return handler(vcpu);
169 return -ENOTSUPP;
170}
171
172static int handle_prog(struct kvm_vcpu *vcpu)
173{
174 vcpu->stat.exit_program_interruption++;
175 return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
176}
177
178static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
179{
180 int rc, rc2;
181
182 vcpu->stat.exit_instr_and_program++;
183 rc = handle_instruction(vcpu);
184 rc2 = handle_prog(vcpu);
185
186 if (rc == -ENOTSUPP)
187 vcpu->arch.sie_block->icptcode = 0x04;
188 if (rc)
189 return rc;
190 return rc2;
191}
192
193static const intercept_handler_t intercept_funcs[0x48 >> 2] = {
194 [0x00 >> 2] = handle_noop,
195 [0x04 >> 2] = handle_instruction,
196 [0x08 >> 2] = handle_prog,
197 [0x0C >> 2] = handle_instruction_and_prog,
198 [0x10 >> 2] = handle_noop,
199 [0x14 >> 2] = handle_noop,
200 [0x1C >> 2] = kvm_s390_handle_wait,
201 [0x20 >> 2] = handle_validity,
202 [0x28 >> 2] = handle_stop,
203};
204
205int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
206{
207 intercept_handler_t func;
208 u8 code = vcpu->arch.sie_block->icptcode;
209
210 if (code & 3 || code > 0x48)
211 return -ENOTSUPP;
212 func = intercept_funcs[code >> 2];
213 if (func)
214 return func(vcpu);
215 return -ENOTSUPP;
216}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
new file mode 100644
index 000000000000..fcd1ed8015c1
--- /dev/null
+++ b/arch/s390/kvm/interrupt.c
@@ -0,0 +1,592 @@
1/*
2 * interrupt.c - handling kvm guest interrupts
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 */
12
13#include <asm/lowcore.h>
14#include <asm/uaccess.h>
15#include <linux/kvm_host.h>
16#include "kvm-s390.h"
17#include "gaccess.h"
18
19static int psw_extint_disabled(struct kvm_vcpu *vcpu)
20{
21 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
22}
23
24static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
25{
26 if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) ||
27 (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO) ||
28 (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT))
29 return 0;
30 return 1;
31}
32
33static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
34 struct interrupt_info *inti)
35{
36 switch (inti->type) {
37 case KVM_S390_INT_EMERGENCY:
38 if (psw_extint_disabled(vcpu))
39 return 0;
40 if (vcpu->arch.sie_block->gcr[0] & 0x4000ul)
41 return 1;
42 return 0;
43 case KVM_S390_INT_SERVICE:
44 if (psw_extint_disabled(vcpu))
45 return 0;
46 if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
47 return 1;
48 return 0;
49 case KVM_S390_INT_VIRTIO:
50 if (psw_extint_disabled(vcpu))
51 return 0;
52 if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
53 return 1;
54 return 0;
55 case KVM_S390_PROGRAM_INT:
56 case KVM_S390_SIGP_STOP:
57 case KVM_S390_SIGP_SET_PREFIX:
58 case KVM_S390_RESTART:
59 return 1;
60 default:
61 BUG();
62 }
63 return 0;
64}
65
66static void __set_cpu_idle(struct kvm_vcpu *vcpu)
67{
68 BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
69 atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
70 set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
71}
72
73static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
74{
75 BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
76 atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
77 clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
78}
79
80static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
81{
82 atomic_clear_mask(CPUSTAT_ECALL_PEND |
83 CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
84 &vcpu->arch.sie_block->cpuflags);
85 vcpu->arch.sie_block->lctl = 0x0000;
86}
87
88static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
89{
90 atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
91}
92
93static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
94 struct interrupt_info *inti)
95{
96 switch (inti->type) {
97 case KVM_S390_INT_EMERGENCY:
98 case KVM_S390_INT_SERVICE:
99 case KVM_S390_INT_VIRTIO:
100 if (psw_extint_disabled(vcpu))
101 __set_cpuflag(vcpu, CPUSTAT_EXT_INT);
102 else
103 vcpu->arch.sie_block->lctl |= LCTL_CR0;
104 break;
105 case KVM_S390_SIGP_STOP:
106 __set_cpuflag(vcpu, CPUSTAT_STOP_INT);
107 break;
108 default:
109 BUG();
110 }
111}
112
113static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
114 struct interrupt_info *inti)
115{
116 const unsigned short table[] = { 2, 4, 4, 6 };
117 int rc, exception = 0;
118
119 switch (inti->type) {
120 case KVM_S390_INT_EMERGENCY:
121 VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
122 vcpu->stat.deliver_emergency_signal++;
123 rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201);
124 if (rc == -EFAULT)
125 exception = 1;
126
127 rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
128 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
129 if (rc == -EFAULT)
130 exception = 1;
131
132 rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
133 __LC_EXT_NEW_PSW, sizeof(psw_t));
134 if (rc == -EFAULT)
135 exception = 1;
136 break;
137
138 case KVM_S390_INT_SERVICE:
139 VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
140 inti->ext.ext_params);
141 vcpu->stat.deliver_service_signal++;
142 rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401);
143 if (rc == -EFAULT)
144 exception = 1;
145
146 rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
147 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
148 if (rc == -EFAULT)
149 exception = 1;
150
151 rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
152 __LC_EXT_NEW_PSW, sizeof(psw_t));
153 if (rc == -EFAULT)
154 exception = 1;
155
156 rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
157 if (rc == -EFAULT)
158 exception = 1;
159 break;
160
161 case KVM_S390_INT_VIRTIO:
162 VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%lx",
163 inti->ext.ext_params, inti->ext.ext_params2);
164 vcpu->stat.deliver_virtio_interrupt++;
165 rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603);
166 if (rc == -EFAULT)
167 exception = 1;
168
169 rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, 0x0d00);
170 if (rc == -EFAULT)
171 exception = 1;
172
173 rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
174 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
175 if (rc == -EFAULT)
176 exception = 1;
177
178 rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
179 __LC_EXT_NEW_PSW, sizeof(psw_t));
180 if (rc == -EFAULT)
181 exception = 1;
182
183 rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
184 if (rc == -EFAULT)
185 exception = 1;
186
187 rc = put_guest_u64(vcpu, __LC_PFAULT_INTPARM,
188 inti->ext.ext_params2);
189 if (rc == -EFAULT)
190 exception = 1;
191 break;
192
193 case KVM_S390_SIGP_STOP:
194 VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
195 vcpu->stat.deliver_stop_signal++;
196 __set_intercept_indicator(vcpu, inti);
197 break;
198
199 case KVM_S390_SIGP_SET_PREFIX:
200 VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
201 inti->prefix.address);
202 vcpu->stat.deliver_prefix_signal++;
203 vcpu->arch.sie_block->prefix = inti->prefix.address;
204 vcpu->arch.sie_block->ihcpu = 0xffff;
205 break;
206
207 case KVM_S390_RESTART:
208 VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
209 vcpu->stat.deliver_restart_signal++;
210 rc = copy_to_guest(vcpu, offsetof(struct _lowcore,
211 restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
212 if (rc == -EFAULT)
213 exception = 1;
214
215 rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
216 offsetof(struct _lowcore, restart_psw), sizeof(psw_t));
217 if (rc == -EFAULT)
218 exception = 1;
219 break;
220
221 case KVM_S390_PROGRAM_INT:
222 VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
223 inti->pgm.code,
224 table[vcpu->arch.sie_block->ipa >> 14]);
225 vcpu->stat.deliver_program_int++;
226 rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code);
227 if (rc == -EFAULT)
228 exception = 1;
229
230 rc = put_guest_u16(vcpu, __LC_PGM_ILC,
231 table[vcpu->arch.sie_block->ipa >> 14]);
232 if (rc == -EFAULT)
233 exception = 1;
234
235 rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW,
236 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
237 if (rc == -EFAULT)
238 exception = 1;
239
240 rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
241 __LC_PGM_NEW_PSW, sizeof(psw_t));
242 if (rc == -EFAULT)
243 exception = 1;
244 break;
245
246 default:
247 BUG();
248 }
249
250 if (exception) {
251 VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering"
252 " interrupt");
253 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
254 if (inti->type == KVM_S390_PROGRAM_INT) {
255 printk(KERN_WARNING "kvm: recursive program check\n");
256 BUG();
257 }
258 }
259}
260
261static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
262{
263 int rc, exception = 0;
264
265 if (psw_extint_disabled(vcpu))
266 return 0;
267 if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
268 return 0;
269 rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004);
270 if (rc == -EFAULT)
271 exception = 1;
272 rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
273 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
274 if (rc == -EFAULT)
275 exception = 1;
276 rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
277 __LC_EXT_NEW_PSW, sizeof(psw_t));
278 if (rc == -EFAULT)
279 exception = 1;
280
281 if (exception) {
282 VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering" \
283 " ckc interrupt");
284 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
285 return 0;
286 }
287
288 return 1;
289}
290
291int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
292{
293 struct local_interrupt *li = &vcpu->arch.local_int;
294 struct float_interrupt *fi = vcpu->arch.local_int.float_int;
295 struct interrupt_info *inti;
296 int rc = 0;
297
298 if (atomic_read(&li->active)) {
299 spin_lock_bh(&li->lock);
300 list_for_each_entry(inti, &li->list, list)
301 if (__interrupt_is_deliverable(vcpu, inti)) {
302 rc = 1;
303 break;
304 }
305 spin_unlock_bh(&li->lock);
306 }
307
308 if ((!rc) && atomic_read(&fi->active)) {
309 spin_lock_bh(&fi->lock);
310 list_for_each_entry(inti, &fi->list, list)
311 if (__interrupt_is_deliverable(vcpu, inti)) {
312 rc = 1;
313 break;
314 }
315 spin_unlock_bh(&fi->lock);
316 }
317
318 if ((!rc) && (vcpu->arch.sie_block->ckc <
319 get_clock() + vcpu->arch.sie_block->epoch)) {
320 if ((!psw_extint_disabled(vcpu)) &&
321 (vcpu->arch.sie_block->gcr[0] & 0x800ul))
322 rc = 1;
323 }
324
325 return rc;
326}
327
328int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
329{
330 return 0;
331}
332
333int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
334{
335 u64 now, sltime;
336 DECLARE_WAITQUEUE(wait, current);
337
338 vcpu->stat.exit_wait_state++;
339 if (kvm_cpu_has_interrupt(vcpu))
340 return 0;
341
342 if (psw_interrupts_disabled(vcpu)) {
343 VCPU_EVENT(vcpu, 3, "%s", "disabled wait");
344 __unset_cpu_idle(vcpu);
345 return -ENOTSUPP; /* disabled wait */
346 }
347
348 if (psw_extint_disabled(vcpu) ||
349 (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))) {
350 VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
351 goto no_timer;
352 }
353
354 now = get_clock() + vcpu->arch.sie_block->epoch;
355 if (vcpu->arch.sie_block->ckc < now) {
356 __unset_cpu_idle(vcpu);
357 return 0;
358 }
359
360 sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1;
361
362 vcpu->arch.ckc_timer.expires = jiffies + sltime;
363
364 add_timer(&vcpu->arch.ckc_timer);
365 VCPU_EVENT(vcpu, 5, "enabled wait timer:%lx jiffies", sltime);
366no_timer:
367 spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
368 spin_lock_bh(&vcpu->arch.local_int.lock);
369 __set_cpu_idle(vcpu);
370 vcpu->arch.local_int.timer_due = 0;
371 add_wait_queue(&vcpu->arch.local_int.wq, &wait);
372 while (list_empty(&vcpu->arch.local_int.list) &&
373 list_empty(&vcpu->arch.local_int.float_int->list) &&
374 (!vcpu->arch.local_int.timer_due) &&
375 !signal_pending(current)) {
376 set_current_state(TASK_INTERRUPTIBLE);
377 spin_unlock_bh(&vcpu->arch.local_int.lock);
378 spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
379 vcpu_put(vcpu);
380 schedule();
381 vcpu_load(vcpu);
382 spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
383 spin_lock_bh(&vcpu->arch.local_int.lock);
384 }
385 __unset_cpu_idle(vcpu);
386 __set_current_state(TASK_RUNNING);
387 remove_wait_queue(&vcpu->wq, &wait);
388 spin_unlock_bh(&vcpu->arch.local_int.lock);
389 spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
390 del_timer(&vcpu->arch.ckc_timer);
391 return 0;
392}
393
394void kvm_s390_idle_wakeup(unsigned long data)
395{
396 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
397
398 spin_lock_bh(&vcpu->arch.local_int.lock);
399 vcpu->arch.local_int.timer_due = 1;
400 if (waitqueue_active(&vcpu->arch.local_int.wq))
401 wake_up_interruptible(&vcpu->arch.local_int.wq);
402 spin_unlock_bh(&vcpu->arch.local_int.lock);
403}
404
405
406void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
407{
408 struct local_interrupt *li = &vcpu->arch.local_int;
409 struct float_interrupt *fi = vcpu->arch.local_int.float_int;
410 struct interrupt_info *n, *inti = NULL;
411 int deliver;
412
413 __reset_intercept_indicators(vcpu);
414 if (atomic_read(&li->active)) {
415 do {
416 deliver = 0;
417 spin_lock_bh(&li->lock);
418 list_for_each_entry_safe(inti, n, &li->list, list) {
419 if (__interrupt_is_deliverable(vcpu, inti)) {
420 list_del(&inti->list);
421 deliver = 1;
422 break;
423 }
424 __set_intercept_indicator(vcpu, inti);
425 }
426 if (list_empty(&li->list))
427 atomic_set(&li->active, 0);
428 spin_unlock_bh(&li->lock);
429 if (deliver) {
430 __do_deliver_interrupt(vcpu, inti);
431 kfree(inti);
432 }
433 } while (deliver);
434 }
435
436 if ((vcpu->arch.sie_block->ckc <
437 get_clock() + vcpu->arch.sie_block->epoch))
438 __try_deliver_ckc_interrupt(vcpu);
439
440 if (atomic_read(&fi->active)) {
441 do {
442 deliver = 0;
443 spin_lock_bh(&fi->lock);
444 list_for_each_entry_safe(inti, n, &fi->list, list) {
445 if (__interrupt_is_deliverable(vcpu, inti)) {
446 list_del(&inti->list);
447 deliver = 1;
448 break;
449 }
450 __set_intercept_indicator(vcpu, inti);
451 }
452 if (list_empty(&fi->list))
453 atomic_set(&fi->active, 0);
454 spin_unlock_bh(&fi->lock);
455 if (deliver) {
456 __do_deliver_interrupt(vcpu, inti);
457 kfree(inti);
458 }
459 } while (deliver);
460 }
461}
462
463int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
464{
465 struct local_interrupt *li = &vcpu->arch.local_int;
466 struct interrupt_info *inti;
467
468 inti = kzalloc(sizeof(*inti), GFP_KERNEL);
469 if (!inti)
470 return -ENOMEM;
471
472 inti->type = KVM_S390_PROGRAM_INT;;
473 inti->pgm.code = code;
474
475 VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
476 spin_lock_bh(&li->lock);
477 list_add(&inti->list, &li->list);
478 atomic_set(&li->active, 1);
479 BUG_ON(waitqueue_active(&li->wq));
480 spin_unlock_bh(&li->lock);
481 return 0;
482}
483
484int kvm_s390_inject_vm(struct kvm *kvm,
485 struct kvm_s390_interrupt *s390int)
486{
487 struct local_interrupt *li;
488 struct float_interrupt *fi;
489 struct interrupt_info *inti;
490 int sigcpu;
491
492 inti = kzalloc(sizeof(*inti), GFP_KERNEL);
493 if (!inti)
494 return -ENOMEM;
495
496 switch (s390int->type) {
497 case KVM_S390_INT_VIRTIO:
498 VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%lx",
499 s390int->parm, s390int->parm64);
500 inti->type = s390int->type;
501 inti->ext.ext_params = s390int->parm;
502 inti->ext.ext_params2 = s390int->parm64;
503 break;
504 case KVM_S390_INT_SERVICE:
505 VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
506 inti->type = s390int->type;
507 inti->ext.ext_params = s390int->parm;
508 break;
509 case KVM_S390_PROGRAM_INT:
510 case KVM_S390_SIGP_STOP:
511 case KVM_S390_INT_EMERGENCY:
512 default:
513 kfree(inti);
514 return -EINVAL;
515 }
516
517 mutex_lock(&kvm->lock);
518 fi = &kvm->arch.float_int;
519 spin_lock_bh(&fi->lock);
520 list_add_tail(&inti->list, &fi->list);
521 atomic_set(&fi->active, 1);
522 sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
523 if (sigcpu == KVM_MAX_VCPUS) {
524 do {
525 sigcpu = fi->next_rr_cpu++;
526 if (sigcpu == KVM_MAX_VCPUS)
527 sigcpu = fi->next_rr_cpu = 0;
528 } while (fi->local_int[sigcpu] == NULL);
529 }
530 li = fi->local_int[sigcpu];
531 spin_lock_bh(&li->lock);
532 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
533 if (waitqueue_active(&li->wq))
534 wake_up_interruptible(&li->wq);
535 spin_unlock_bh(&li->lock);
536 spin_unlock_bh(&fi->lock);
537 mutex_unlock(&kvm->lock);
538 return 0;
539}
540
541int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
542 struct kvm_s390_interrupt *s390int)
543{
544 struct local_interrupt *li;
545 struct interrupt_info *inti;
546
547 inti = kzalloc(sizeof(*inti), GFP_KERNEL);
548 if (!inti)
549 return -ENOMEM;
550
551 switch (s390int->type) {
552 case KVM_S390_PROGRAM_INT:
553 if (s390int->parm & 0xffff0000) {
554 kfree(inti);
555 return -EINVAL;
556 }
557 inti->type = s390int->type;
558 inti->pgm.code = s390int->parm;
559 VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
560 s390int->parm);
561 break;
562 case KVM_S390_SIGP_STOP:
563 case KVM_S390_RESTART:
564 case KVM_S390_SIGP_SET_PREFIX:
565 case KVM_S390_INT_EMERGENCY:
566 VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
567 inti->type = s390int->type;
568 break;
569 case KVM_S390_INT_VIRTIO:
570 case KVM_S390_INT_SERVICE:
571 default:
572 kfree(inti);
573 return -EINVAL;
574 }
575
576 mutex_lock(&vcpu->kvm->lock);
577 li = &vcpu->arch.local_int;
578 spin_lock_bh(&li->lock);
579 if (inti->type == KVM_S390_PROGRAM_INT)
580 list_add(&inti->list, &li->list);
581 else
582 list_add_tail(&inti->list, &li->list);
583 atomic_set(&li->active, 1);
584 if (inti->type == KVM_S390_SIGP_STOP)
585 li->action_bits |= ACTION_STOP_ON_STOP;
586 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
587 if (waitqueue_active(&li->wq))
588 wake_up_interruptible(&vcpu->arch.local_int.wq);
589 spin_unlock_bh(&li->lock);
590 mutex_unlock(&vcpu->kvm->lock);
591 return 0;
592}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
new file mode 100644
index 000000000000..98d1e73e01f1
--- /dev/null
+++ b/arch/s390/kvm/kvm-s390.c
@@ -0,0 +1,685 @@
1/*
2 * s390host.c -- hosting zSeries kernel virtual machines
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 */
14
15#include <linux/compiler.h>
16#include <linux/err.h>
17#include <linux/fs.h>
18#include <linux/init.h>
19#include <linux/kvm.h>
20#include <linux/kvm_host.h>
21#include <linux/module.h>
22#include <linux/slab.h>
23#include <linux/timer.h>
24#include <asm/lowcore.h>
25#include <asm/pgtable.h>
26
27#include "kvm-s390.h"
28#include "gaccess.h"
29
30#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
31
32struct kvm_stats_debugfs_item debugfs_entries[] = {
33 { "userspace_handled", VCPU_STAT(exit_userspace) },
34 { "exit_validity", VCPU_STAT(exit_validity) },
35 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
36 { "exit_external_request", VCPU_STAT(exit_external_request) },
37 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
38 { "exit_instruction", VCPU_STAT(exit_instruction) },
39 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
40 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
41 { "instruction_lctg", VCPU_STAT(instruction_lctg) },
42 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
43 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
44 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
45 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
46 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
47 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
48 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
49 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
50 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
51 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
52 { "instruction_spx", VCPU_STAT(instruction_spx) },
53 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
54 { "instruction_stap", VCPU_STAT(instruction_stap) },
55 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
56 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
57 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
58 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
59 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
60 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
61 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
62 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
63 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
64 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
65 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
66 { "diagnose_44", VCPU_STAT(diagnose_44) },
67 { NULL }
68};
69
70
71/* Section: not file related */
72void kvm_arch_hardware_enable(void *garbage)
73{
74 /* every s390 is virtualization enabled ;-) */
75}
76
77void kvm_arch_hardware_disable(void *garbage)
78{
79}
80
81void decache_vcpus_on_cpu(int cpu)
82{
83}
84
85int kvm_arch_hardware_setup(void)
86{
87 return 0;
88}
89
90void kvm_arch_hardware_unsetup(void)
91{
92}
93
94void kvm_arch_check_processor_compat(void *rtn)
95{
96}
97
98int kvm_arch_init(void *opaque)
99{
100 return 0;
101}
102
103void kvm_arch_exit(void)
104{
105}
106
107/* Section: device related */
108long kvm_arch_dev_ioctl(struct file *filp,
109 unsigned int ioctl, unsigned long arg)
110{
111 if (ioctl == KVM_S390_ENABLE_SIE)
112 return s390_enable_sie();
113 return -EINVAL;
114}
115
116int kvm_dev_ioctl_check_extension(long ext)
117{
118 return 0;
119}
120
121/* Section: vm related */
122/*
123 * Get (and clear) the dirty memory log for a memory slot.
124 */
125int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
126 struct kvm_dirty_log *log)
127{
128 return 0;
129}
130
131long kvm_arch_vm_ioctl(struct file *filp,
132 unsigned int ioctl, unsigned long arg)
133{
134 struct kvm *kvm = filp->private_data;
135 void __user *argp = (void __user *)arg;
136 int r;
137
138 switch (ioctl) {
139 case KVM_S390_INTERRUPT: {
140 struct kvm_s390_interrupt s390int;
141
142 r = -EFAULT;
143 if (copy_from_user(&s390int, argp, sizeof(s390int)))
144 break;
145 r = kvm_s390_inject_vm(kvm, &s390int);
146 break;
147 }
148 default:
149 r = -EINVAL;
150 }
151
152 return r;
153}
154
155struct kvm *kvm_arch_create_vm(void)
156{
157 struct kvm *kvm;
158 int rc;
159 char debug_name[16];
160
161 rc = s390_enable_sie();
162 if (rc)
163 goto out_nokvm;
164
165 rc = -ENOMEM;
166 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
167 if (!kvm)
168 goto out_nokvm;
169
170 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
171 if (!kvm->arch.sca)
172 goto out_nosca;
173
174 sprintf(debug_name, "kvm-%u", current->pid);
175
176 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
177 if (!kvm->arch.dbf)
178 goto out_nodbf;
179
180 spin_lock_init(&kvm->arch.float_int.lock);
181 INIT_LIST_HEAD(&kvm->arch.float_int.list);
182
183 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
184 VM_EVENT(kvm, 3, "%s", "vm created");
185
186 try_module_get(THIS_MODULE);
187
188 return kvm;
189out_nodbf:
190 free_page((unsigned long)(kvm->arch.sca));
191out_nosca:
192 kfree(kvm);
193out_nokvm:
194 return ERR_PTR(rc);
195}
196
197void kvm_arch_destroy_vm(struct kvm *kvm)
198{
199 debug_unregister(kvm->arch.dbf);
200 free_page((unsigned long)(kvm->arch.sca));
201 kfree(kvm);
202 module_put(THIS_MODULE);
203}
204
205/* Section: vcpu related */
206int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
207{
208 return 0;
209}
210
211void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
212{
213 /* kvm common code refers to this, but does'nt call it */
214 BUG();
215}
216
217void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
218{
219 save_fp_regs(&vcpu->arch.host_fpregs);
220 save_access_regs(vcpu->arch.host_acrs);
221 vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
222 restore_fp_regs(&vcpu->arch.guest_fpregs);
223 restore_access_regs(vcpu->arch.guest_acrs);
224
225 if (signal_pending(current))
226 atomic_set_mask(CPUSTAT_STOP_INT,
227 &vcpu->arch.sie_block->cpuflags);
228}
229
230void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
231{
232 save_fp_regs(&vcpu->arch.guest_fpregs);
233 save_access_regs(vcpu->arch.guest_acrs);
234 restore_fp_regs(&vcpu->arch.host_fpregs);
235 restore_access_regs(vcpu->arch.host_acrs);
236}
237
238static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
239{
240 /* this equals initial cpu reset in pop, but we don't switch to ESA */
241 vcpu->arch.sie_block->gpsw.mask = 0UL;
242 vcpu->arch.sie_block->gpsw.addr = 0UL;
243 vcpu->arch.sie_block->prefix = 0UL;
244 vcpu->arch.sie_block->ihcpu = 0xffff;
245 vcpu->arch.sie_block->cputm = 0UL;
246 vcpu->arch.sie_block->ckc = 0UL;
247 vcpu->arch.sie_block->todpr = 0;
248 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
249 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
250 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
251 vcpu->arch.guest_fpregs.fpc = 0;
252 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
253 vcpu->arch.sie_block->gbea = 1;
254}
255
256int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
257{
258 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
259 vcpu->arch.sie_block->gmslm = 0xffffffffffUL;
260 vcpu->arch.sie_block->gmsor = 0x000000000000;
261 vcpu->arch.sie_block->ecb = 2;
262 vcpu->arch.sie_block->eca = 0xC1002001U;
263 setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
264 (unsigned long) vcpu);
265 get_cpu_id(&vcpu->arch.cpu_id);
266 vcpu->arch.cpu_id.version = 0xfe;
267 return 0;
268}
269
270struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
271 unsigned int id)
272{
273 struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
274 int rc = -ENOMEM;
275
276 if (!vcpu)
277 goto out_nomem;
278
279 vcpu->arch.sie_block = (struct sie_block *) get_zeroed_page(GFP_KERNEL);
280
281 if (!vcpu->arch.sie_block)
282 goto out_free_cpu;
283
284 vcpu->arch.sie_block->icpua = id;
285 BUG_ON(!kvm->arch.sca);
286 BUG_ON(kvm->arch.sca->cpu[id].sda);
287 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
288 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
289 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
290
291 spin_lock_init(&vcpu->arch.local_int.lock);
292 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
293 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
294 spin_lock_bh(&kvm->arch.float_int.lock);
295 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
296 init_waitqueue_head(&vcpu->arch.local_int.wq);
297 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
298 spin_unlock_bh(&kvm->arch.float_int.lock);
299
300 rc = kvm_vcpu_init(vcpu, kvm, id);
301 if (rc)
302 goto out_free_cpu;
303 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
304 vcpu->arch.sie_block);
305
306 try_module_get(THIS_MODULE);
307
308 return vcpu;
309out_free_cpu:
310 kfree(vcpu);
311out_nomem:
312 return ERR_PTR(rc);
313}
314
315void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
316{
317 VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
318 free_page((unsigned long)(vcpu->arch.sie_block));
319 kfree(vcpu);
320 module_put(THIS_MODULE);
321}
322
323int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
324{
325 /* kvm common code refers to this, but never calls it */
326 BUG();
327 return 0;
328}
329
330static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
331{
332 vcpu_load(vcpu);
333 kvm_s390_vcpu_initial_reset(vcpu);
334 vcpu_put(vcpu);
335 return 0;
336}
337
338int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
339{
340 vcpu_load(vcpu);
341 memcpy(&vcpu->arch.guest_gprs, &regs->gprs, sizeof(regs->gprs));
342 vcpu_put(vcpu);
343 return 0;
344}
345
346int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
347{
348 vcpu_load(vcpu);
349 memcpy(&regs->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
350 vcpu_put(vcpu);
351 return 0;
352}
353
354int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
355 struct kvm_sregs *sregs)
356{
357 vcpu_load(vcpu);
358 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
359 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
360 vcpu_put(vcpu);
361 return 0;
362}
363
364int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
365 struct kvm_sregs *sregs)
366{
367 vcpu_load(vcpu);
368 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
369 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
370 vcpu_put(vcpu);
371 return 0;
372}
373
374int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
375{
376 vcpu_load(vcpu);
377 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
378 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
379 vcpu_put(vcpu);
380 return 0;
381}
382
383int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
384{
385 vcpu_load(vcpu);
386 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
387 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
388 vcpu_put(vcpu);
389 return 0;
390}
391
392static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
393{
394 int rc = 0;
395
396 vcpu_load(vcpu);
397 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
398 rc = -EBUSY;
399 else
400 vcpu->arch.sie_block->gpsw = psw;
401 vcpu_put(vcpu);
402 return rc;
403}
404
405int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
406 struct kvm_translation *tr)
407{
408 return -EINVAL; /* not implemented yet */
409}
410
411int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
412 struct kvm_debug_guest *dbg)
413{
414 return -EINVAL; /* not implemented yet */
415}
416
417int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
418 struct kvm_mp_state *mp_state)
419{
420 return -EINVAL; /* not implemented yet */
421}
422
423int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
424 struct kvm_mp_state *mp_state)
425{
426 return -EINVAL; /* not implemented yet */
427}
428
429static void __vcpu_run(struct kvm_vcpu *vcpu)
430{
431 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
432
433 if (need_resched())
434 schedule();
435
436 vcpu->arch.sie_block->icptcode = 0;
437 local_irq_disable();
438 kvm_guest_enter();
439 local_irq_enable();
440 VCPU_EVENT(vcpu, 6, "entering sie flags %x",
441 atomic_read(&vcpu->arch.sie_block->cpuflags));
442 sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs);
443 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
444 vcpu->arch.sie_block->icptcode);
445 local_irq_disable();
446 kvm_guest_exit();
447 local_irq_enable();
448
449 memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
450}
451
452int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
453{
454 int rc;
455 sigset_t sigsaved;
456
457 vcpu_load(vcpu);
458
459 if (vcpu->sigset_active)
460 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
461
462 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
463
464 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
465
466 switch (kvm_run->exit_reason) {
467 case KVM_EXIT_S390_SIEIC:
468 vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
469 vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
470 break;
471 case KVM_EXIT_UNKNOWN:
472 case KVM_EXIT_S390_RESET:
473 break;
474 default:
475 BUG();
476 }
477
478 might_sleep();
479
480 do {
481 kvm_s390_deliver_pending_interrupts(vcpu);
482 __vcpu_run(vcpu);
483 rc = kvm_handle_sie_intercept(vcpu);
484 } while (!signal_pending(current) && !rc);
485
486 if (signal_pending(current) && !rc)
487 rc = -EINTR;
488
489 if (rc == -ENOTSUPP) {
490 /* intercept cannot be handled in-kernel, prepare kvm-run */
491 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
492 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
493 kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask;
494 kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr;
495 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
496 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
497 rc = 0;
498 }
499
500 if (rc == -EREMOTE) {
501 /* intercept was handled, but userspace support is needed
502 * kvm_run has been prepared by the handler */
503 rc = 0;
504 }
505
506 if (vcpu->sigset_active)
507 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
508
509 vcpu_put(vcpu);
510
511 vcpu->stat.exit_userspace++;
512 return rc;
513}
514
515static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
516 unsigned long n, int prefix)
517{
518 if (prefix)
519 return copy_to_guest(vcpu, guestdest, from, n);
520 else
521 return copy_to_guest_absolute(vcpu, guestdest, from, n);
522}
523
524/*
525 * store status at address
526 * we use have two special cases:
527 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
528 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
529 */
530int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
531{
532 const unsigned char archmode = 1;
533 int prefix;
534
535 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
536 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
537 return -EFAULT;
538 addr = SAVE_AREA_BASE;
539 prefix = 0;
540 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
541 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
542 return -EFAULT;
543 addr = SAVE_AREA_BASE;
544 prefix = 1;
545 } else
546 prefix = 0;
547
548 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
549 vcpu->arch.guest_fpregs.fprs, 128, prefix))
550 return -EFAULT;
551
552 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
553 vcpu->arch.guest_gprs, 128, prefix))
554 return -EFAULT;
555
556 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
557 &vcpu->arch.sie_block->gpsw, 16, prefix))
558 return -EFAULT;
559
560 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
561 &vcpu->arch.sie_block->prefix, 4, prefix))
562 return -EFAULT;
563
564 if (__guestcopy(vcpu,
565 addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
566 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
567 return -EFAULT;
568
569 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
570 &vcpu->arch.sie_block->todpr, 4, prefix))
571 return -EFAULT;
572
573 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
574 &vcpu->arch.sie_block->cputm, 8, prefix))
575 return -EFAULT;
576
577 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
578 &vcpu->arch.sie_block->ckc, 8, prefix))
579 return -EFAULT;
580
581 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
582 &vcpu->arch.guest_acrs, 64, prefix))
583 return -EFAULT;
584
585 if (__guestcopy(vcpu,
586 addr + offsetof(struct save_area_s390x, ctrl_regs),
587 &vcpu->arch.sie_block->gcr, 128, prefix))
588 return -EFAULT;
589 return 0;
590}
591
592static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
593{
594 int rc;
595
596 vcpu_load(vcpu);
597 rc = __kvm_s390_vcpu_store_status(vcpu, addr);
598 vcpu_put(vcpu);
599 return rc;
600}
601
602long kvm_arch_vcpu_ioctl(struct file *filp,
603 unsigned int ioctl, unsigned long arg)
604{
605 struct kvm_vcpu *vcpu = filp->private_data;
606 void __user *argp = (void __user *)arg;
607
608 switch (ioctl) {
609 case KVM_S390_INTERRUPT: {
610 struct kvm_s390_interrupt s390int;
611
612 if (copy_from_user(&s390int, argp, sizeof(s390int)))
613 return -EFAULT;
614 return kvm_s390_inject_vcpu(vcpu, &s390int);
615 }
616 case KVM_S390_STORE_STATUS:
617 return kvm_s390_vcpu_store_status(vcpu, arg);
618 case KVM_S390_SET_INITIAL_PSW: {
619 psw_t psw;
620
621 if (copy_from_user(&psw, argp, sizeof(psw)))
622 return -EFAULT;
623 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
624 }
625 case KVM_S390_INITIAL_RESET:
626 return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
627 default:
628 ;
629 }
630 return -EINVAL;
631}
632
633/* Section: memory related */
634int kvm_arch_set_memory_region(struct kvm *kvm,
635 struct kvm_userspace_memory_region *mem,
636 struct kvm_memory_slot old,
637 int user_alloc)
638{
639 /* A few sanity checks. We can have exactly one memory slot which has
640 to start at guest virtual zero and which has to be located at a
641 page boundary in userland and which has to end at a page boundary.
642 The memory in userland is ok to be fragmented into various different
643 vmas. It is okay to mmap() and munmap() stuff in this slot after
644 doing this call at any time */
645
646 if (mem->slot)
647 return -EINVAL;
648
649 if (mem->guest_phys_addr)
650 return -EINVAL;
651
652 if (mem->userspace_addr & (PAGE_SIZE - 1))
653 return -EINVAL;
654
655 if (mem->memory_size & (PAGE_SIZE - 1))
656 return -EINVAL;
657
658 kvm->arch.guest_origin = mem->userspace_addr;
659 kvm->arch.guest_memsize = mem->memory_size;
660
661 /* FIXME: we do want to interrupt running CPUs and update their memory
662 configuration now to avoid race conditions. But hey, changing the
663 memory layout while virtual CPUs are running is usually bad
664 programming practice. */
665
666 return 0;
667}
668
669gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
670{
671 return gfn;
672}
673
674static int __init kvm_s390_init(void)
675{
676 return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
677}
678
679static void __exit kvm_s390_exit(void)
680{
681 kvm_exit();
682}
683
684module_init(kvm_s390_init);
685module_exit(kvm_s390_exit);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
new file mode 100644
index 000000000000..3893cf12eacf
--- /dev/null
+++ b/arch/s390/kvm/kvm-s390.h
@@ -0,0 +1,64 @@
1/*
2 * kvm_s390.h - definition for kvm on s390
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 */
13
14#ifndef ARCH_S390_KVM_S390_H
15#define ARCH_S390_KVM_S390_H
16
17#include <linux/kvm.h>
18#include <linux/kvm_host.h>
19
20typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
21
22int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
23
24#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
25do { \
26 debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
27 d_args); \
28} while (0)
29
30#define VCPU_EVENT(d_vcpu, d_loglevel, d_string, d_args...)\
31do { \
32 debug_sprintf_event(d_vcpu->kvm->arch.dbf, d_loglevel, \
33 "%02d[%016lx-%016lx]: " d_string "\n", d_vcpu->vcpu_id, \
34 d_vcpu->arch.sie_block->gpsw.mask, d_vcpu->arch.sie_block->gpsw.addr,\
35 d_args); \
36} while (0)
37
38static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu)
39{
40 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT;
41}
42
43int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
44void kvm_s390_idle_wakeup(unsigned long data);
45void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
46int kvm_s390_inject_vm(struct kvm *kvm,
47 struct kvm_s390_interrupt *s390int);
48int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
49 struct kvm_s390_interrupt *s390int);
50int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
51
52/* implemented in priv.c */
53int kvm_s390_handle_priv(struct kvm_vcpu *vcpu);
54
55/* implemented in sigp.c */
56int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
57
58/* implemented in kvm-s390.c */
59int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu,
60 unsigned long addr);
61/* implemented in diag.c */
62int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
63
64#endif
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
new file mode 100644
index 000000000000..1465946325c5
--- /dev/null
+++ b/arch/s390/kvm/priv.c
@@ -0,0 +1,323 @@
1/*
2 * priv.c - handling privileged instructions
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 */
13
14#include <linux/kvm.h>
15#include <linux/errno.h>
16#include <asm/current.h>
17#include <asm/debug.h>
18#include <asm/ebcdic.h>
19#include <asm/sysinfo.h>
20#include "gaccess.h"
21#include "kvm-s390.h"
22
23static int handle_set_prefix(struct kvm_vcpu *vcpu)
24{
25 int base2 = vcpu->arch.sie_block->ipb >> 28;
26 int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
27 u64 operand2;
28 u32 address = 0;
29 u8 tmp;
30
31 vcpu->stat.instruction_spx++;
32
33 operand2 = disp2;
34 if (base2)
35 operand2 += vcpu->arch.guest_gprs[base2];
36
37 /* must be word boundary */
38 if (operand2 & 3) {
39 kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
40 goto out;
41 }
42
43 /* get the value */
44 if (get_guest_u32(vcpu, operand2, &address)) {
45 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
46 goto out;
47 }
48
49 address = address & 0x7fffe000u;
50
51 /* make sure that the new value is valid memory */
52 if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
53 (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) {
54 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
55 goto out;
56 }
57
58 vcpu->arch.sie_block->prefix = address;
59 vcpu->arch.sie_block->ihcpu = 0xffff;
60
61 VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
62out:
63 return 0;
64}
65
66static int handle_store_prefix(struct kvm_vcpu *vcpu)
67{
68 int base2 = vcpu->arch.sie_block->ipb >> 28;
69 int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
70 u64 operand2;
71 u32 address;
72
73 vcpu->stat.instruction_stpx++;
74 operand2 = disp2;
75 if (base2)
76 operand2 += vcpu->arch.guest_gprs[base2];
77
78 /* must be word boundary */
79 if (operand2 & 3) {
80 kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
81 goto out;
82 }
83
84 address = vcpu->arch.sie_block->prefix;
85 address = address & 0x7fffe000u;
86
87 /* get the value */
88 if (put_guest_u32(vcpu, operand2, address)) {
89 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
90 goto out;
91 }
92
93 VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
94out:
95 return 0;
96}
97
98static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
99{
100 int base2 = vcpu->arch.sie_block->ipb >> 28;
101 int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
102 u64 useraddr;
103 int rc;
104
105 vcpu->stat.instruction_stap++;
106 useraddr = disp2;
107 if (base2)
108 useraddr += vcpu->arch.guest_gprs[base2];
109
110 if (useraddr & 1) {
111 kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
112 goto out;
113 }
114
115 rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id);
116 if (rc == -EFAULT) {
117 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
118 goto out;
119 }
120
121 VCPU_EVENT(vcpu, 5, "storing cpu address to %lx", useraddr);
122out:
123 return 0;
124}
125
126static int handle_skey(struct kvm_vcpu *vcpu)
127{
128 vcpu->stat.instruction_storage_key++;
129 vcpu->arch.sie_block->gpsw.addr -= 4;
130 VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
131 return 0;
132}
133
134static int handle_stsch(struct kvm_vcpu *vcpu)
135{
136 vcpu->stat.instruction_stsch++;
137 VCPU_EVENT(vcpu, 4, "%s", "store subchannel - CC3");
138 /* condition code 3 */
139 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
140 vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
141 return 0;
142}
143
144static int handle_chsc(struct kvm_vcpu *vcpu)
145{
146 vcpu->stat.instruction_chsc++;
147 VCPU_EVENT(vcpu, 4, "%s", "channel subsystem call - CC3");
148 /* condition code 3 */
149 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
150 vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
151 return 0;
152}
153
154static unsigned int kvm_stfl(void)
155{
156 asm volatile(
157 " .insn s,0xb2b10000,0(0)\n" /* stfl */
158 "0:\n"
159 EX_TABLE(0b, 0b));
160 return S390_lowcore.stfl_fac_list;
161}
162
163static int handle_stfl(struct kvm_vcpu *vcpu)
164{
165 unsigned int facility_list = kvm_stfl();
166 int rc;
167
168 vcpu->stat.instruction_stfl++;
169 facility_list &= ~(1UL<<24); /* no stfle */
170
171 rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
172 &facility_list, sizeof(facility_list));
173 if (rc == -EFAULT)
174 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
175 else
176 VCPU_EVENT(vcpu, 5, "store facility list value %x",
177 facility_list);
178 return 0;
179}
180
181static int handle_stidp(struct kvm_vcpu *vcpu)
182{
183 int base2 = vcpu->arch.sie_block->ipb >> 28;
184 int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
185 u64 operand2;
186 int rc;
187
188 vcpu->stat.instruction_stidp++;
189 operand2 = disp2;
190 if (base2)
191 operand2 += vcpu->arch.guest_gprs[base2];
192
193 if (operand2 & 7) {
194 kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
195 goto out;
196 }
197
198 rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data);
199 if (rc == -EFAULT) {
200 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
201 goto out;
202 }
203
204 VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
205out:
206 return 0;
207}
208
209static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
210{
211 struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
212 int cpus = 0;
213 int n;
214
215 spin_lock_bh(&fi->lock);
216 for (n = 0; n < KVM_MAX_VCPUS; n++)
217 if (fi->local_int[n])
218 cpus++;
219 spin_unlock_bh(&fi->lock);
220
221 /* deal with other level 3 hypervisors */
222 if (stsi(mem, 3, 2, 2) == -ENOSYS)
223 mem->count = 0;
224 if (mem->count < 8)
225 mem->count++;
226 for (n = mem->count - 1; n > 0 ; n--)
227 memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0]));
228
229 mem->vm[0].cpus_total = cpus;
230 mem->vm[0].cpus_configured = cpus;
231 mem->vm[0].cpus_standby = 0;
232 mem->vm[0].cpus_reserved = 0;
233 mem->vm[0].caf = 1000;
234 memcpy(mem->vm[0].name, "KVMguest", 8);
235 ASCEBC(mem->vm[0].name, 8);
236 memcpy(mem->vm[0].cpi, "KVM/Linux ", 16);
237 ASCEBC(mem->vm[0].cpi, 16);
238}
239
240static int handle_stsi(struct kvm_vcpu *vcpu)
241{
242 int fc = (vcpu->arch.guest_gprs[0] & 0xf0000000) >> 28;
243 int sel1 = vcpu->arch.guest_gprs[0] & 0xff;
244 int sel2 = vcpu->arch.guest_gprs[1] & 0xffff;
245 int base2 = vcpu->arch.sie_block->ipb >> 28;
246 int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
247 u64 operand2;
248 unsigned long mem;
249
250 vcpu->stat.instruction_stsi++;
251 VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
252
253 operand2 = disp2;
254 if (base2)
255 operand2 += vcpu->arch.guest_gprs[base2];
256
257 if (operand2 & 0xfff && fc > 0)
258 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
259
260 switch (fc) {
261 case 0:
262 vcpu->arch.guest_gprs[0] = 3 << 28;
263 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
264 return 0;
265 case 1: /* same handling for 1 and 2 */
266 case 2:
267 mem = get_zeroed_page(GFP_KERNEL);
268 if (!mem)
269 goto out_fail;
270 if (stsi((void *) mem, fc, sel1, sel2) == -ENOSYS)
271 goto out_mem;
272 break;
273 case 3:
274 if (sel1 != 2 || sel2 != 2)
275 goto out_fail;
276 mem = get_zeroed_page(GFP_KERNEL);
277 if (!mem)
278 goto out_fail;
279 handle_stsi_3_2_2(vcpu, (void *) mem);
280 break;
281 default:
282 goto out_fail;
283 }
284
285 if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
286 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
287 goto out_mem;
288 }
289 free_page(mem);
290 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
291 vcpu->arch.guest_gprs[0] = 0;
292 return 0;
293out_mem:
294 free_page(mem);
295out_fail:
296 /* condition code 3 */
297 vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;
298 return 0;
299}
300
301static intercept_handler_t priv_handlers[256] = {
302 [0x02] = handle_stidp,
303 [0x10] = handle_set_prefix,
304 [0x11] = handle_store_prefix,
305 [0x12] = handle_store_cpu_address,
306 [0x29] = handle_skey,
307 [0x2a] = handle_skey,
308 [0x2b] = handle_skey,
309 [0x34] = handle_stsch,
310 [0x5f] = handle_chsc,
311 [0x7d] = handle_stsi,
312 [0xb1] = handle_stfl,
313};
314
315int kvm_s390_handle_priv(struct kvm_vcpu *vcpu)
316{
317 intercept_handler_t handler;
318
319 handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
320 if (handler)
321 return handler(vcpu);
322 return -ENOTSUPP;
323}
diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S
new file mode 100644
index 000000000000..934fd6a885f6
--- /dev/null
+++ b/arch/s390/kvm/sie64a.S
@@ -0,0 +1,47 @@
1/*
2 * sie64a.S - low level sie call
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
11 */
12
13#include <linux/errno.h>
14#include <asm/asm-offsets.h>
15
16SP_R5 = 5 * 8 # offset into stackframe
17SP_R6 = 6 * 8
18
19/*
20 * sie64a calling convention:
21 * %r2 pointer to sie control block
22 * %r3 guest register save area
23 */
24 .globl sie64a
25sie64a:
26 lgr %r5,%r3
27 stmg %r5,%r14,SP_R5(%r15) # save register on entry
28 lgr %r14,%r2 # pointer to sie control block
29 lmg %r0,%r13,0(%r3) # load guest gprs 0-13
30sie_inst:
31 sie 0(%r14)
32 lg %r14,SP_R5(%r15)
33 stmg %r0,%r13,0(%r14) # save guest gprs 0-13
34 lghi %r2,0
35 lmg %r6,%r14,SP_R6(%r15)
36 br %r14
37
38sie_err:
39 lg %r14,SP_R5(%r15)
40 stmg %r0,%r13,0(%r14) # save guest gprs 0-13
41 lghi %r2,-EFAULT
42 lmg %r6,%r14,SP_R6(%r15)
43 br %r14
44
45 .section __ex_table,"a"
46 .quad sie_inst,sie_err
47 .previous
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
new file mode 100644
index 000000000000..0a236acfb5f6
--- /dev/null
+++ b/arch/s390/kvm/sigp.c
@@ -0,0 +1,288 @@
1/*
2 * sigp.c - handlinge interprocessor communication
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 */
13
14#include <linux/kvm.h>
15#include <linux/kvm_host.h>
16#include "gaccess.h"
17#include "kvm-s390.h"
18
19/* sigp order codes */
20#define SIGP_SENSE 0x01
21#define SIGP_EXTERNAL_CALL 0x02
22#define SIGP_EMERGENCY 0x03
23#define SIGP_START 0x04
24#define SIGP_STOP 0x05
25#define SIGP_RESTART 0x06
26#define SIGP_STOP_STORE_STATUS 0x09
27#define SIGP_INITIAL_CPU_RESET 0x0b
28#define SIGP_CPU_RESET 0x0c
29#define SIGP_SET_PREFIX 0x0d
30#define SIGP_STORE_STATUS_ADDR 0x0e
31#define SIGP_SET_ARCH 0x12
32
33/* cpu status bits */
34#define SIGP_STAT_EQUIPMENT_CHECK 0x80000000UL
35#define SIGP_STAT_INCORRECT_STATE 0x00000200UL
36#define SIGP_STAT_INVALID_PARAMETER 0x00000100UL
37#define SIGP_STAT_EXT_CALL_PENDING 0x00000080UL
38#define SIGP_STAT_STOPPED 0x00000040UL
39#define SIGP_STAT_OPERATOR_INTERV 0x00000020UL
40#define SIGP_STAT_CHECK_STOP 0x00000010UL
41#define SIGP_STAT_INOPERATIVE 0x00000004UL
42#define SIGP_STAT_INVALID_ORDER 0x00000002UL
43#define SIGP_STAT_RECEIVER_CHECK 0x00000001UL
44
45
46static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg)
47{
48 struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
49 int rc;
50
51 if (cpu_addr >= KVM_MAX_VCPUS)
52 return 3; /* not operational */
53
54 spin_lock_bh(&fi->lock);
55 if (fi->local_int[cpu_addr] == NULL)
56 rc = 3; /* not operational */
57 else if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
58 & CPUSTAT_RUNNING) {
59 *reg &= 0xffffffff00000000UL;
60 rc = 1; /* status stored */
61 } else {
62 *reg &= 0xffffffff00000000UL;
63 *reg |= SIGP_STAT_STOPPED;
64 rc = 1; /* status stored */
65 }
66 spin_unlock_bh(&fi->lock);
67
68 VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
69 return rc;
70}
71
72static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
73{
74 struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
75 struct local_interrupt *li;
76 struct interrupt_info *inti;
77 int rc;
78
79 if (cpu_addr >= KVM_MAX_VCPUS)
80 return 3; /* not operational */
81
82 inti = kzalloc(sizeof(*inti), GFP_KERNEL);
83 if (!inti)
84 return -ENOMEM;
85
86 inti->type = KVM_S390_INT_EMERGENCY;
87
88 spin_lock_bh(&fi->lock);
89 li = fi->local_int[cpu_addr];
90 if (li == NULL) {
91 rc = 3; /* not operational */
92 kfree(inti);
93 goto unlock;
94 }
95 spin_lock_bh(&li->lock);
96 list_add_tail(&inti->list, &li->list);
97 atomic_set(&li->active, 1);
98 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
99 if (waitqueue_active(&li->wq))
100 wake_up_interruptible(&li->wq);
101 spin_unlock_bh(&li->lock);
102 rc = 0; /* order accepted */
103unlock:
104 spin_unlock_bh(&fi->lock);
105 VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
106 return rc;
107}
108
109static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
110{
111 struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
112 struct local_interrupt *li;
113 struct interrupt_info *inti;
114 int rc;
115
116 if (cpu_addr >= KVM_MAX_VCPUS)
117 return 3; /* not operational */
118
119 inti = kzalloc(sizeof(*inti), GFP_KERNEL);
120 if (!inti)
121 return -ENOMEM;
122
123 inti->type = KVM_S390_SIGP_STOP;
124
125 spin_lock_bh(&fi->lock);
126 li = fi->local_int[cpu_addr];
127 if (li == NULL) {
128 rc = 3; /* not operational */
129 kfree(inti);
130 goto unlock;
131 }
132 spin_lock_bh(&li->lock);
133 list_add_tail(&inti->list, &li->list);
134 atomic_set(&li->active, 1);
135 atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
136 if (store)
137 li->action_bits |= ACTION_STORE_ON_STOP;
138 li->action_bits |= ACTION_STOP_ON_STOP;
139 if (waitqueue_active(&li->wq))
140 wake_up_interruptible(&li->wq);
141 spin_unlock_bh(&li->lock);
142 rc = 0; /* order accepted */
143unlock:
144 spin_unlock_bh(&fi->lock);
145 VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
146 return rc;
147}
148
149static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
150{
151 int rc;
152
153 switch (parameter & 0xff) {
154 case 0:
155 printk(KERN_WARNING "kvm: request to switch to ESA/390 mode"
156 " not supported");
157 rc = 3; /* not operational */
158 break;
159 case 1:
160 case 2:
161 rc = 0; /* order accepted */
162 break;
163 default:
164 rc = -ENOTSUPP;
165 }
166 return rc;
167}
168
169static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
170 u64 *reg)
171{
172 struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
173 struct local_interrupt *li;
174 struct interrupt_info *inti;
175 int rc;
176 u8 tmp;
177
178 /* make sure that the new value is valid memory */
179 address = address & 0x7fffe000u;
180 if ((copy_from_guest(vcpu, &tmp,
181 (u64) (address + vcpu->kvm->arch.guest_origin) , 1)) ||
182 (copy_from_guest(vcpu, &tmp, (u64) (address +
183 vcpu->kvm->arch.guest_origin + PAGE_SIZE), 1))) {
184 *reg |= SIGP_STAT_INVALID_PARAMETER;
185 return 1; /* invalid parameter */
186 }
187
188 inti = kzalloc(sizeof(*inti), GFP_KERNEL);
189 if (!inti)
190 return 2; /* busy */
191
192 spin_lock_bh(&fi->lock);
193 li = fi->local_int[cpu_addr];
194
195 if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) {
196 rc = 1; /* incorrect state */
197 *reg &= SIGP_STAT_INCORRECT_STATE;
198 kfree(inti);
199 goto out_fi;
200 }
201
202 spin_lock_bh(&li->lock);
203 /* cpu must be in stopped state */
204 if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
205 rc = 1; /* incorrect state */
206 *reg &= SIGP_STAT_INCORRECT_STATE;
207 kfree(inti);
208 goto out_li;
209 }
210
211 inti->type = KVM_S390_SIGP_SET_PREFIX;
212 inti->prefix.address = address;
213
214 list_add_tail(&inti->list, &li->list);
215 atomic_set(&li->active, 1);
216 if (waitqueue_active(&li->wq))
217 wake_up_interruptible(&li->wq);
218 rc = 0; /* order accepted */
219
220 VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
221out_li:
222 spin_unlock_bh(&li->lock);
223out_fi:
224 spin_unlock_bh(&fi->lock);
225 return rc;
226}
227
228int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
229{
230 int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
231 int r3 = vcpu->arch.sie_block->ipa & 0x000f;
232 int base2 = vcpu->arch.sie_block->ipb >> 28;
233 int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
234 u32 parameter;
235 u16 cpu_addr = vcpu->arch.guest_gprs[r3];
236 u8 order_code;
237 int rc;
238
239 order_code = disp2;
240 if (base2)
241 order_code += vcpu->arch.guest_gprs[base2];
242
243 if (r1 % 2)
244 parameter = vcpu->arch.guest_gprs[r1];
245 else
246 parameter = vcpu->arch.guest_gprs[r1 + 1];
247
248 switch (order_code) {
249 case SIGP_SENSE:
250 vcpu->stat.instruction_sigp_sense++;
251 rc = __sigp_sense(vcpu, cpu_addr,
252 &vcpu->arch.guest_gprs[r1]);
253 break;
254 case SIGP_EMERGENCY:
255 vcpu->stat.instruction_sigp_emergency++;
256 rc = __sigp_emergency(vcpu, cpu_addr);
257 break;
258 case SIGP_STOP:
259 vcpu->stat.instruction_sigp_stop++;
260 rc = __sigp_stop(vcpu, cpu_addr, 0);
261 break;
262 case SIGP_STOP_STORE_STATUS:
263 vcpu->stat.instruction_sigp_stop++;
264 rc = __sigp_stop(vcpu, cpu_addr, 1);
265 break;
266 case SIGP_SET_ARCH:
267 vcpu->stat.instruction_sigp_arch++;
268 rc = __sigp_set_arch(vcpu, parameter);
269 break;
270 case SIGP_SET_PREFIX:
271 vcpu->stat.instruction_sigp_prefix++;
272 rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
273 &vcpu->arch.guest_gprs[r1]);
274 break;
275 case SIGP_RESTART:
276 vcpu->stat.instruction_sigp_restart++;
277 /* user space must know about restart */
278 default:
279 return -ENOTSUPP;
280 }
281
282 if (rc < 0)
283 return rc;
284
285 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
286 vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44;
287 return 0;
288}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index fd072013f88c..5c1aea97cd12 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -30,11 +30,27 @@
30#define TABLES_PER_PAGE 4 30#define TABLES_PER_PAGE 4
31#define FRAG_MASK 15UL 31#define FRAG_MASK 15UL
32#define SECOND_HALVES 10UL 32#define SECOND_HALVES 10UL
33
34void clear_table_pgstes(unsigned long *table)
35{
36 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
37 memset(table + 256, 0, PAGE_SIZE/4);
38 clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
39 memset(table + 768, 0, PAGE_SIZE/4);
40}
41
33#else 42#else
34#define ALLOC_ORDER 2 43#define ALLOC_ORDER 2
35#define TABLES_PER_PAGE 2 44#define TABLES_PER_PAGE 2
36#define FRAG_MASK 3UL 45#define FRAG_MASK 3UL
37#define SECOND_HALVES 2UL 46#define SECOND_HALVES 2UL
47
48void clear_table_pgstes(unsigned long *table)
49{
50 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
51 memset(table + 256, 0, PAGE_SIZE/2);
52}
53
38#endif 54#endif
39 55
40unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) 56unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
@@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
153 unsigned long *table; 169 unsigned long *table;
154 unsigned long bits; 170 unsigned long bits;
155 171
156 bits = mm->context.noexec ? 3UL : 1UL; 172 bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
157 spin_lock(&mm->page_table_lock); 173 spin_lock(&mm->page_table_lock);
158 page = NULL; 174 page = NULL;
159 if (!list_empty(&mm->context.pgtable_list)) { 175 if (!list_empty(&mm->context.pgtable_list)) {
@@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
170 pgtable_page_ctor(page); 186 pgtable_page_ctor(page);
171 page->flags &= ~FRAG_MASK; 187 page->flags &= ~FRAG_MASK;
172 table = (unsigned long *) page_to_phys(page); 188 table = (unsigned long *) page_to_phys(page);
173 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); 189 if (mm->context.pgstes)
190 clear_table_pgstes(table);
191 else
192 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
174 spin_lock(&mm->page_table_lock); 193 spin_lock(&mm->page_table_lock);
175 list_add(&page->lru, &mm->context.pgtable_list); 194 list_add(&page->lru, &mm->context.pgtable_list);
176 } 195 }
@@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
191 struct page *page; 210 struct page *page;
192 unsigned long bits; 211 unsigned long bits;
193 212
194 bits = mm->context.noexec ? 3UL : 1UL; 213 bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
195 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); 214 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
196 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 215 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
197 spin_lock(&mm->page_table_lock); 216 spin_lock(&mm->page_table_lock);
@@ -228,3 +247,43 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
228 mm->context.noexec = 0; 247 mm->context.noexec = 0;
229 update_mm(mm, tsk); 248 update_mm(mm, tsk);
230} 249}
250
251/*
252 * switch on pgstes for its userspace process (for kvm)
253 */
254int s390_enable_sie(void)
255{
256 struct task_struct *tsk = current;
257 struct mm_struct *mm;
258 int rc;
259
260 task_lock(tsk);
261
262 rc = 0;
263 if (tsk->mm->context.pgstes)
264 goto unlock;
265
266 rc = -EINVAL;
267 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
268 tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
269 goto unlock;
270
271 tsk->mm->context.pgstes = 1; /* dirty little tricks .. */
272 mm = dup_mm(tsk);
273 tsk->mm->context.pgstes = 0;
274
275 rc = -ENOMEM;
276 if (!mm)
277 goto unlock;
278 mmput(tsk->mm);
279 tsk->mm = tsk->active_mm = mm;
280 preempt_disable();
281 update_mm(mm, tsk);
282 cpu_set(smp_processor_id(), mm->cpu_vm_mask);
283 preempt_enable();
284 rc = 0;
285unlock:
286 task_unlock(tsk);
287 return rc;
288}
289EXPORT_SYMBOL_GPL(s390_enable_sie);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2fadf794483d..e5790fe9e330 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -373,6 +373,25 @@ config VMI
373 at the moment), by linking the kernel to a GPL-ed ROM module 373 at the moment), by linking the kernel to a GPL-ed ROM module
374 provided by the hypervisor. 374 provided by the hypervisor.
375 375
376config KVM_CLOCK
377 bool "KVM paravirtualized clock"
378 select PARAVIRT
379 depends on !(X86_VISWS || X86_VOYAGER)
380 help
381 Turning on this option will allow you to run a paravirtualized clock
382 when running over the KVM hypervisor. Instead of relying on a PIT
383 (or probably other) emulation by the underlying device model, the host
384 provides the guest with timing infrastructure such as time of day, and
385 system time
386
387config KVM_GUEST
388 bool "KVM Guest support"
389 select PARAVIRT
390 depends on !(X86_VISWS || X86_VOYAGER)
391 help
392 This option enables various optimizations for running under the KVM
393 hypervisor.
394
376source "arch/x86/lguest/Kconfig" 395source "arch/x86/lguest/Kconfig"
377 396
378config PARAVIRT 397config PARAVIRT
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 90e092d0af0c..fa19c3819540 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -80,6 +80,8 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
80obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o 80obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
81 81
82obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o 82obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
83obj-$(CONFIG_KVM_GUEST) += kvm.o
84obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
83obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o 85obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
84 86
85ifdef CONFIG_INPUT_PCSPKR 87ifdef CONFIG_INPUT_PCSPKR
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 2251d0ae9570..268553817909 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -25,6 +25,7 @@
25#include <asm/hpet.h> 25#include <asm/hpet.h>
26#include <linux/kdebug.h> 26#include <linux/kdebug.h>
27#include <asm/smp.h> 27#include <asm/smp.h>
28#include <asm/reboot.h>
28 29
29#include <mach_ipi.h> 30#include <mach_ipi.h>
30 31
@@ -117,7 +118,7 @@ static void nmi_shootdown_cpus(void)
117} 118}
118#endif 119#endif
119 120
120void machine_crash_shutdown(struct pt_regs *regs) 121void native_machine_crash_shutdown(struct pt_regs *regs)
121{ 122{
122 /* This function is only called after the system 123 /* This function is only called after the system
123 * has panicked or is otherwise in a critical state. 124 * has panicked or is otherwise in a critical state.
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
new file mode 100644
index 000000000000..8b7a3cf37d2b
--- /dev/null
+++ b/arch/x86/kernel/kvm.c
@@ -0,0 +1,248 @@
1/*
2 * KVM paravirt_ops implementation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 *
18 * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
19 * Copyright IBM Corporation, 2007
20 * Authors: Anthony Liguori <aliguori@us.ibm.com>
21 */
22
23#include <linux/module.h>
24#include <linux/kernel.h>
25#include <linux/kvm_para.h>
26#include <linux/cpu.h>
27#include <linux/mm.h>
28#include <linux/highmem.h>
29#include <linux/hardirq.h>
30
31#define MMU_QUEUE_SIZE 1024
32
33struct kvm_para_state {
34 u8 mmu_queue[MMU_QUEUE_SIZE];
35 int mmu_queue_len;
36 enum paravirt_lazy_mode mode;
37};
38
39static DEFINE_PER_CPU(struct kvm_para_state, para_state);
40
41static struct kvm_para_state *kvm_para_state(void)
42{
43 return &per_cpu(para_state, raw_smp_processor_id());
44}
45
46/*
47 * No need for any "IO delay" on KVM
48 */
49static void kvm_io_delay(void)
50{
51}
52
53static void kvm_mmu_op(void *buffer, unsigned len)
54{
55 int r;
56 unsigned long a1, a2;
57
58 do {
59 a1 = __pa(buffer);
60 a2 = 0; /* on i386 __pa() always returns <4G */
61 r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2);
62 buffer += r;
63 len -= r;
64 } while (len);
65}
66
67static void mmu_queue_flush(struct kvm_para_state *state)
68{
69 if (state->mmu_queue_len) {
70 kvm_mmu_op(state->mmu_queue, state->mmu_queue_len);
71 state->mmu_queue_len = 0;
72 }
73}
74
75static void kvm_deferred_mmu_op(void *buffer, int len)
76{
77 struct kvm_para_state *state = kvm_para_state();
78
79 if (state->mode != PARAVIRT_LAZY_MMU) {
80 kvm_mmu_op(buffer, len);
81 return;
82 }
83 if (state->mmu_queue_len + len > sizeof state->mmu_queue)
84 mmu_queue_flush(state);
85 memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len);
86 state->mmu_queue_len += len;
87}
88
89static void kvm_mmu_write(void *dest, u64 val)
90{
91 __u64 pte_phys;
92 struct kvm_mmu_op_write_pte wpte;
93
94#ifdef CONFIG_HIGHPTE
95 struct page *page;
96 unsigned long dst = (unsigned long) dest;
97
98 page = kmap_atomic_to_page(dest);
99 pte_phys = page_to_pfn(page);
100 pte_phys <<= PAGE_SHIFT;
101 pte_phys += (dst & ~(PAGE_MASK));
102#else
103 pte_phys = (unsigned long)__pa(dest);
104#endif
105 wpte.header.op = KVM_MMU_OP_WRITE_PTE;
106 wpte.pte_val = val;
107 wpte.pte_phys = pte_phys;
108
109 kvm_deferred_mmu_op(&wpte, sizeof wpte);
110}
111
112/*
113 * We only need to hook operations that are MMU writes. We hook these so that
114 * we can use lazy MMU mode to batch these operations. We could probably
115 * improve the performance of the host code if we used some of the information
116 * here to simplify processing of batched writes.
117 */
118static void kvm_set_pte(pte_t *ptep, pte_t pte)
119{
120 kvm_mmu_write(ptep, pte_val(pte));
121}
122
123static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr,
124 pte_t *ptep, pte_t pte)
125{
126 kvm_mmu_write(ptep, pte_val(pte));
127}
128
129static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd)
130{
131 kvm_mmu_write(pmdp, pmd_val(pmd));
132}
133
134#if PAGETABLE_LEVELS >= 3
135#ifdef CONFIG_X86_PAE
136static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte)
137{
138 kvm_mmu_write(ptep, pte_val(pte));
139}
140
141static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr,
142 pte_t *ptep, pte_t pte)
143{
144 kvm_mmu_write(ptep, pte_val(pte));
145}
146
147static void kvm_pte_clear(struct mm_struct *mm,
148 unsigned long addr, pte_t *ptep)
149{
150 kvm_mmu_write(ptep, 0);
151}
152
153static void kvm_pmd_clear(pmd_t *pmdp)
154{
155 kvm_mmu_write(pmdp, 0);
156}
157#endif
158
159static void kvm_set_pud(pud_t *pudp, pud_t pud)
160{
161 kvm_mmu_write(pudp, pud_val(pud));
162}
163
164#if PAGETABLE_LEVELS == 4
165static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd)
166{
167 kvm_mmu_write(pgdp, pgd_val(pgd));
168}
169#endif
170#endif /* PAGETABLE_LEVELS >= 3 */
171
172static void kvm_flush_tlb(void)
173{
174 struct kvm_mmu_op_flush_tlb ftlb = {
175 .header.op = KVM_MMU_OP_FLUSH_TLB,
176 };
177
178 kvm_deferred_mmu_op(&ftlb, sizeof ftlb);
179}
180
181static void kvm_release_pt(u32 pfn)
182{
183 struct kvm_mmu_op_release_pt rpt = {
184 .header.op = KVM_MMU_OP_RELEASE_PT,
185 .pt_phys = (u64)pfn << PAGE_SHIFT,
186 };
187
188 kvm_mmu_op(&rpt, sizeof rpt);
189}
190
191static void kvm_enter_lazy_mmu(void)
192{
193 struct kvm_para_state *state = kvm_para_state();
194
195 paravirt_enter_lazy_mmu();
196 state->mode = paravirt_get_lazy_mode();
197}
198
199static void kvm_leave_lazy_mmu(void)
200{
201 struct kvm_para_state *state = kvm_para_state();
202
203 mmu_queue_flush(state);
204 paravirt_leave_lazy(paravirt_get_lazy_mode());
205 state->mode = paravirt_get_lazy_mode();
206}
207
208static void paravirt_ops_setup(void)
209{
210 pv_info.name = "KVM";
211 pv_info.paravirt_enabled = 1;
212
213 if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
214 pv_cpu_ops.io_delay = kvm_io_delay;
215
216 if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) {
217 pv_mmu_ops.set_pte = kvm_set_pte;
218 pv_mmu_ops.set_pte_at = kvm_set_pte_at;
219 pv_mmu_ops.set_pmd = kvm_set_pmd;
220#if PAGETABLE_LEVELS >= 3
221#ifdef CONFIG_X86_PAE
222 pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic;
223 pv_mmu_ops.set_pte_present = kvm_set_pte_present;
224 pv_mmu_ops.pte_clear = kvm_pte_clear;
225 pv_mmu_ops.pmd_clear = kvm_pmd_clear;
226#endif
227 pv_mmu_ops.set_pud = kvm_set_pud;
228#if PAGETABLE_LEVELS == 4
229 pv_mmu_ops.set_pgd = kvm_set_pgd;
230#endif
231#endif
232 pv_mmu_ops.flush_tlb_user = kvm_flush_tlb;
233 pv_mmu_ops.release_pte = kvm_release_pt;
234 pv_mmu_ops.release_pmd = kvm_release_pt;
235 pv_mmu_ops.release_pud = kvm_release_pt;
236
237 pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
238 pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
239 }
240}
241
242void __init kvm_guest_init(void)
243{
244 if (!kvm_para_available())
245 return;
246
247 paravirt_ops_setup();
248}
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
new file mode 100644
index 000000000000..ddee04043aeb
--- /dev/null
+++ b/arch/x86/kernel/kvmclock.c
@@ -0,0 +1,187 @@
1/* KVM paravirtual clock driver. A clocksource implementation
2 Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17*/
18
19#include <linux/clocksource.h>
20#include <linux/kvm_para.h>
21#include <asm/arch_hooks.h>
22#include <asm/msr.h>
23#include <asm/apic.h>
24#include <linux/percpu.h>
25#include <asm/reboot.h>
26
27#define KVM_SCALE 22
28
29static int kvmclock = 1;
30
31static int parse_no_kvmclock(char *arg)
32{
33 kvmclock = 0;
34 return 0;
35}
36early_param("no-kvmclock", parse_no_kvmclock);
37
38/* The hypervisor will put information about time periodically here */
39static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
40#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
41
42static inline u64 kvm_get_delta(u64 last_tsc)
43{
44 int cpu = smp_processor_id();
45 u64 delta = native_read_tsc() - last_tsc;
46 return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
47}
48
49static struct kvm_wall_clock wall_clock;
50static cycle_t kvm_clock_read(void);
51/*
52 * The wallclock is the time of day when we booted. Since then, some time may
53 * have elapsed since the hypervisor wrote the data. So we try to account for
54 * that with system time
55 */
56unsigned long kvm_get_wallclock(void)
57{
58 u32 wc_sec, wc_nsec;
59 u64 delta;
60 struct timespec ts;
61 int version, nsec;
62 int low, high;
63
64 low = (int)__pa(&wall_clock);
65 high = ((u64)__pa(&wall_clock) >> 32);
66
67 delta = kvm_clock_read();
68
69 native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
70 do {
71 version = wall_clock.wc_version;
72 rmb();
73 wc_sec = wall_clock.wc_sec;
74 wc_nsec = wall_clock.wc_nsec;
75 rmb();
76 } while ((wall_clock.wc_version != version) || (version & 1));
77
78 delta = kvm_clock_read() - delta;
79 delta += wc_nsec;
80 nsec = do_div(delta, NSEC_PER_SEC);
81 set_normalized_timespec(&ts, wc_sec + delta, nsec);
82 /*
83 * Of all mechanisms of time adjustment I've tested, this one
84 * was the champion!
85 */
86 return ts.tv_sec + 1;
87}
88
89int kvm_set_wallclock(unsigned long now)
90{
91 return 0;
92}
93
94/*
95 * This is our read_clock function. The host puts an tsc timestamp each time
96 * it updates a new time. Without the tsc adjustment, we can have a situation
97 * in which a vcpu starts to run earlier (smaller system_time), but probes
98 * time later (compared to another vcpu), leading to backwards time
99 */
100static cycle_t kvm_clock_read(void)
101{
102 u64 last_tsc, now;
103 int cpu;
104
105 preempt_disable();
106 cpu = smp_processor_id();
107
108 last_tsc = get_clock(cpu, tsc_timestamp);
109 now = get_clock(cpu, system_time);
110
111 now += kvm_get_delta(last_tsc);
112 preempt_enable();
113
114 return now;
115}
116static struct clocksource kvm_clock = {
117 .name = "kvm-clock",
118 .read = kvm_clock_read,
119 .rating = 400,
120 .mask = CLOCKSOURCE_MASK(64),
121 .mult = 1 << KVM_SCALE,
122 .shift = KVM_SCALE,
123 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
124};
125
126static int kvm_register_clock(void)
127{
128 int cpu = smp_processor_id();
129 int low, high;
130 low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
131 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
132
133 return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
134}
135
136static void kvm_setup_secondary_clock(void)
137{
138 /*
139 * Now that the first cpu already had this clocksource initialized,
140 * we shouldn't fail.
141 */
142 WARN_ON(kvm_register_clock());
143 /* ok, done with our trickery, call native */
144 setup_secondary_APIC_clock();
145}
146
147/*
148 * After the clock is registered, the host will keep writing to the
149 * registered memory location. If the guest happens to shutdown, this memory
150 * won't be valid. In cases like kexec, in which you install a new kernel, this
151 * means a random memory location will be kept being written. So before any
152 * kind of shutdown from our side, we unregister the clock by writting anything
153 * that does not have the 'enable' bit set in the msr
154 */
155#ifdef CONFIG_KEXEC
156static void kvm_crash_shutdown(struct pt_regs *regs)
157{
158 native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0);
159 native_machine_crash_shutdown(regs);
160}
161#endif
162
163static void kvm_shutdown(void)
164{
165 native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0);
166 native_machine_shutdown();
167}
168
169void __init kvmclock_init(void)
170{
171 if (!kvm_para_available())
172 return;
173
174 if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
175 if (kvm_register_clock())
176 return;
177 pv_time_ops.get_wallclock = kvm_get_wallclock;
178 pv_time_ops.set_wallclock = kvm_set_wallclock;
179 pv_time_ops.sched_clock = kvm_clock_read;
180 pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
181 machine_ops.shutdown = kvm_shutdown;
182#ifdef CONFIG_KEXEC
183 machine_ops.crash_shutdown = kvm_crash_shutdown;
184#endif
185 clocksource_register(&kvm_clock);
186 }
187}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 1791a751a772..a4a838306b2c 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -399,7 +399,7 @@ static void native_machine_emergency_restart(void)
399 } 399 }
400} 400}
401 401
402static void native_machine_shutdown(void) 402void native_machine_shutdown(void)
403{ 403{
404 /* Stop the cpus and apics */ 404 /* Stop the cpus and apics */
405#ifdef CONFIG_SMP 405#ifdef CONFIG_SMP
@@ -470,7 +470,10 @@ struct machine_ops machine_ops = {
470 .shutdown = native_machine_shutdown, 470 .shutdown = native_machine_shutdown,
471 .emergency_restart = native_machine_emergency_restart, 471 .emergency_restart = native_machine_emergency_restart,
472 .restart = native_machine_restart, 472 .restart = native_machine_restart,
473 .halt = native_machine_halt 473 .halt = native_machine_halt,
474#ifdef CONFIG_KEXEC
475 .crash_shutdown = native_machine_crash_shutdown,
476#endif
474}; 477};
475 478
476void machine_power_off(void) 479void machine_power_off(void)
@@ -498,3 +501,9 @@ void machine_halt(void)
498 machine_ops.halt(); 501 machine_ops.halt();
499} 502}
500 503
504#ifdef CONFIG_KEXEC
505void machine_crash_shutdown(struct pt_regs *regs)
506{
507 machine_ops.crash_shutdown(regs);
508}
509#endif
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 44cc9b933932..2283422af794 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -47,6 +47,7 @@
47#include <linux/pfn.h> 47#include <linux/pfn.h>
48#include <linux/pci.h> 48#include <linux/pci.h>
49#include <linux/init_ohci1394_dma.h> 49#include <linux/init_ohci1394_dma.h>
50#include <linux/kvm_para.h>
50 51
51#include <video/edid.h> 52#include <video/edid.h>
52 53
@@ -820,6 +821,10 @@ void __init setup_arch(char **cmdline_p)
820 821
821 max_low_pfn = setup_memory(); 822 max_low_pfn = setup_memory();
822 823
824#ifdef CONFIG_KVM_CLOCK
825 kvmclock_init();
826#endif
827
823#ifdef CONFIG_VMI 828#ifdef CONFIG_VMI
824 /* 829 /*
825 * Must be after max_low_pfn is determined, and before kernel 830 * Must be after max_low_pfn is determined, and before kernel
@@ -827,6 +832,7 @@ void __init setup_arch(char **cmdline_p)
827 */ 832 */
828 vmi_init(); 833 vmi_init();
829#endif 834#endif
835 kvm_guest_init();
830 836
831 /* 837 /*
832 * NOTE: before this point _nobody_ is allowed to allocate 838 * NOTE: before this point _nobody_ is allowed to allocate
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 60e64c8eee92..a94fb959a87a 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -42,6 +42,7 @@
42#include <linux/ctype.h> 42#include <linux/ctype.h>
43#include <linux/uaccess.h> 43#include <linux/uaccess.h>
44#include <linux/init_ohci1394_dma.h> 44#include <linux/init_ohci1394_dma.h>
45#include <linux/kvm_para.h>
45 46
46#include <asm/mtrr.h> 47#include <asm/mtrr.h>
47#include <asm/uaccess.h> 48#include <asm/uaccess.h>
@@ -384,6 +385,10 @@ void __init setup_arch(char **cmdline_p)
384 385
385 io_delay_init(); 386 io_delay_init();
386 387
388#ifdef CONFIG_KVM_CLOCK
389 kvmclock_init();
390#endif
391
387#ifdef CONFIG_SMP 392#ifdef CONFIG_SMP
388 /* setup to use the early static init tables during kernel startup */ 393 /* setup to use the early static init tables during kernel startup */
389 x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; 394 x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
@@ -488,6 +493,8 @@ void __init setup_arch(char **cmdline_p)
488 init_apic_mappings(); 493 init_apic_mappings();
489 ioapic_init_mappings(); 494 ioapic_init_mappings();
490 495
496 kvm_guest_init();
497
491 /* 498 /*
492 * We trust e820 completely. No explicit ROM probing in memory. 499 * We trust e820 completely. No explicit ROM probing in memory.
493 */ 500 */
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 41962e793c0f..8d45fabc5f3b 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -19,7 +19,7 @@ if VIRTUALIZATION
19 19
20config KVM 20config KVM
21 tristate "Kernel-based Virtual Machine (KVM) support" 21 tristate "Kernel-based Virtual Machine (KVM) support"
22 depends on HAVE_KVM && EXPERIMENTAL 22 depends on HAVE_KVM
23 select PREEMPT_NOTIFIERS 23 select PREEMPT_NOTIFIERS
24 select ANON_INODES 24 select ANON_INODES
25 ---help--- 25 ---help---
@@ -50,6 +50,17 @@ config KVM_AMD
50 Provides support for KVM on AMD processors equipped with the AMD-V 50 Provides support for KVM on AMD processors equipped with the AMD-V
51 (SVM) extensions. 51 (SVM) extensions.
52 52
53config KVM_TRACE
54 bool "KVM trace support"
55 depends on KVM && MARKERS && SYSFS
56 select RELAY
57 select DEBUG_FS
58 default n
59 ---help---
60 This option allows reading a trace of kvm-related events through
61 relayfs. Note the ABI is not considered stable and will be
62 modified in future updates.
63
53# OK, it's a little counter-intuitive to do this, but it puts it neatly under 64# OK, it's a little counter-intuitive to do this, but it puts it neatly under
54# the virtualization menu. 65# the virtualization menu.
55source drivers/lguest/Kconfig 66source drivers/lguest/Kconfig
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index ffdd0b310784..c97d35c218db 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -3,10 +3,14 @@
3# 3#
4 4
5common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) 5common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
6ifeq ($(CONFIG_KVM_TRACE),y)
7common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
8endif
6 9
7EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm 10EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
8 11
9kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o 12kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
13 i8254.o
10obj-$(CONFIG_KVM) += kvm.o 14obj-$(CONFIG_KVM) += kvm.o
11kvm-intel-objs = vmx.o 15kvm-intel-objs = vmx.o
12obj-$(CONFIG_KVM_INTEL) += kvm-intel.o 16obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
new file mode 100644
index 000000000000..361e31611276
--- /dev/null
+++ b/arch/x86/kvm/i8254.c
@@ -0,0 +1,611 @@
1/*
2 * 8253/8254 interval timer emulation
3 *
4 * Copyright (c) 2003-2004 Fabrice Bellard
5 * Copyright (c) 2006 Intel Corporation
6 * Copyright (c) 2007 Keir Fraser, XenSource Inc
7 * Copyright (c) 2008 Intel Corporation
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a copy
10 * of this software and associated documentation files (the "Software"), to deal
11 * in the Software without restriction, including without limitation the rights
12 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 * copies of the Software, and to permit persons to whom the Software is
14 * furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included in
17 * all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 * THE SOFTWARE.
26 *
27 * Authors:
28 * Sheng Yang <sheng.yang@intel.com>
29 * Based on QEMU and Xen.
30 */
31
32#include <linux/kvm_host.h>
33
34#include "irq.h"
35#include "i8254.h"
36
37#ifndef CONFIG_X86_64
38#define mod_64(x, y) ((x) - (y) * div64_64(x, y))
39#else
40#define mod_64(x, y) ((x) % (y))
41#endif
42
43#define RW_STATE_LSB 1
44#define RW_STATE_MSB 2
45#define RW_STATE_WORD0 3
46#define RW_STATE_WORD1 4
47
48/* Compute with 96 bit intermediate result: (a*b)/c */
49static u64 muldiv64(u64 a, u32 b, u32 c)
50{
51 union {
52 u64 ll;
53 struct {
54 u32 low, high;
55 } l;
56 } u, res;
57 u64 rl, rh;
58
59 u.ll = a;
60 rl = (u64)u.l.low * (u64)b;
61 rh = (u64)u.l.high * (u64)b;
62 rh += (rl >> 32);
63 res.l.high = div64_64(rh, c);
64 res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
65 return res.ll;
66}
67
68static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
69{
70 struct kvm_kpit_channel_state *c =
71 &kvm->arch.vpit->pit_state.channels[channel];
72
73 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
74
75 switch (c->mode) {
76 default:
77 case 0:
78 case 4:
79 /* XXX: just disable/enable counting */
80 break;
81 case 1:
82 case 2:
83 case 3:
84 case 5:
85 /* Restart counting on rising edge. */
86 if (c->gate < val)
87 c->count_load_time = ktime_get();
88 break;
89 }
90
91 c->gate = val;
92}
93
94int pit_get_gate(struct kvm *kvm, int channel)
95{
96 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
97
98 return kvm->arch.vpit->pit_state.channels[channel].gate;
99}
100
101static int pit_get_count(struct kvm *kvm, int channel)
102{
103 struct kvm_kpit_channel_state *c =
104 &kvm->arch.vpit->pit_state.channels[channel];
105 s64 d, t;
106 int counter;
107
108 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
109
110 t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
111 d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
112
113 switch (c->mode) {
114 case 0:
115 case 1:
116 case 4:
117 case 5:
118 counter = (c->count - d) & 0xffff;
119 break;
120 case 3:
121 /* XXX: may be incorrect for odd counts */
122 counter = c->count - (mod_64((2 * d), c->count));
123 break;
124 default:
125 counter = c->count - mod_64(d, c->count);
126 break;
127 }
128 return counter;
129}
130
131static int pit_get_out(struct kvm *kvm, int channel)
132{
133 struct kvm_kpit_channel_state *c =
134 &kvm->arch.vpit->pit_state.channels[channel];
135 s64 d, t;
136 int out;
137
138 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
139
140 t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
141 d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
142
143 switch (c->mode) {
144 default:
145 case 0:
146 out = (d >= c->count);
147 break;
148 case 1:
149 out = (d < c->count);
150 break;
151 case 2:
152 out = ((mod_64(d, c->count) == 0) && (d != 0));
153 break;
154 case 3:
155 out = (mod_64(d, c->count) < ((c->count + 1) >> 1));
156 break;
157 case 4:
158 case 5:
159 out = (d == c->count);
160 break;
161 }
162
163 return out;
164}
165
166static void pit_latch_count(struct kvm *kvm, int channel)
167{
168 struct kvm_kpit_channel_state *c =
169 &kvm->arch.vpit->pit_state.channels[channel];
170
171 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
172
173 if (!c->count_latched) {
174 c->latched_count = pit_get_count(kvm, channel);
175 c->count_latched = c->rw_mode;
176 }
177}
178
179static void pit_latch_status(struct kvm *kvm, int channel)
180{
181 struct kvm_kpit_channel_state *c =
182 &kvm->arch.vpit->pit_state.channels[channel];
183
184 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
185
186 if (!c->status_latched) {
187 /* TODO: Return NULL COUNT (bit 6). */
188 c->status = ((pit_get_out(kvm, channel) << 7) |
189 (c->rw_mode << 4) |
190 (c->mode << 1) |
191 c->bcd);
192 c->status_latched = 1;
193 }
194}
195
196int __pit_timer_fn(struct kvm_kpit_state *ps)
197{
198 struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
199 struct kvm_kpit_timer *pt = &ps->pit_timer;
200
201 atomic_inc(&pt->pending);
202 smp_mb__after_atomic_inc();
203 /* FIXME: handle case where the guest is in guest mode */
204 if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
205 vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
206 wake_up_interruptible(&vcpu0->wq);
207 }
208
209 pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
210 pt->scheduled = ktime_to_ns(pt->timer.expires);
211
212 return (pt->period == 0 ? 0 : 1);
213}
214
215int pit_has_pending_timer(struct kvm_vcpu *vcpu)
216{
217 struct kvm_pit *pit = vcpu->kvm->arch.vpit;
218
219 if (pit && vcpu->vcpu_id == 0)
220 return atomic_read(&pit->pit_state.pit_timer.pending);
221
222 return 0;
223}
224
225static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
226{
227 struct kvm_kpit_state *ps;
228 int restart_timer = 0;
229
230 ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
231
232 restart_timer = __pit_timer_fn(ps);
233
234 if (restart_timer)
235 return HRTIMER_RESTART;
236 else
237 return HRTIMER_NORESTART;
238}
239
240static void destroy_pit_timer(struct kvm_kpit_timer *pt)
241{
242 pr_debug("pit: execute del timer!\n");
243 hrtimer_cancel(&pt->timer);
244}
245
246static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period)
247{
248 s64 interval;
249
250 interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
251
252 pr_debug("pit: create pit timer, interval is %llu nsec\n", interval);
253
254 /* TODO The new value only affected after the retriggered */
255 hrtimer_cancel(&pt->timer);
256 pt->period = (is_period == 0) ? 0 : interval;
257 pt->timer.function = pit_timer_fn;
258 atomic_set(&pt->pending, 0);
259
260 hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
261 HRTIMER_MODE_ABS);
262}
263
264static void pit_load_count(struct kvm *kvm, int channel, u32 val)
265{
266 struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
267
268 WARN_ON(!mutex_is_locked(&ps->lock));
269
270 pr_debug("pit: load_count val is %d, channel is %d\n", val, channel);
271
272 /*
273 * Though spec said the state of 8254 is undefined after power-up,
274 * seems some tricky OS like Windows XP depends on IRQ0 interrupt
275 * when booting up.
276 * So here setting initialize rate for it, and not a specific number
277 */
278 if (val == 0)
279 val = 0x10000;
280
281 ps->channels[channel].count_load_time = ktime_get();
282 ps->channels[channel].count = val;
283
284 if (channel != 0)
285 return;
286
287 /* Two types of timer
288 * mode 1 is one shot, mode 2 is period, otherwise del timer */
289 switch (ps->channels[0].mode) {
290 case 1:
291 create_pit_timer(&ps->pit_timer, val, 0);
292 break;
293 case 2:
294 create_pit_timer(&ps->pit_timer, val, 1);
295 break;
296 default:
297 destroy_pit_timer(&ps->pit_timer);
298 }
299}
300
301void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val)
302{
303 mutex_lock(&kvm->arch.vpit->pit_state.lock);
304 pit_load_count(kvm, channel, val);
305 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
306}
307
308static void pit_ioport_write(struct kvm_io_device *this,
309 gpa_t addr, int len, const void *data)
310{
311 struct kvm_pit *pit = (struct kvm_pit *)this->private;
312 struct kvm_kpit_state *pit_state = &pit->pit_state;
313 struct kvm *kvm = pit->kvm;
314 int channel, access;
315 struct kvm_kpit_channel_state *s;
316 u32 val = *(u32 *) data;
317
318 val &= 0xff;
319 addr &= KVM_PIT_CHANNEL_MASK;
320
321 mutex_lock(&pit_state->lock);
322
323 if (val != 0)
324 pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n",
325 (unsigned int)addr, len, val);
326
327 if (addr == 3) {
328 channel = val >> 6;
329 if (channel == 3) {
330 /* Read-Back Command. */
331 for (channel = 0; channel < 3; channel++) {
332 s = &pit_state->channels[channel];
333 if (val & (2 << channel)) {
334 if (!(val & 0x20))
335 pit_latch_count(kvm, channel);
336 if (!(val & 0x10))
337 pit_latch_status(kvm, channel);
338 }
339 }
340 } else {
341 /* Select Counter <channel>. */
342 s = &pit_state->channels[channel];
343 access = (val >> 4) & KVM_PIT_CHANNEL_MASK;
344 if (access == 0) {
345 pit_latch_count(kvm, channel);
346 } else {
347 s->rw_mode = access;
348 s->read_state = access;
349 s->write_state = access;
350 s->mode = (val >> 1) & 7;
351 if (s->mode > 5)
352 s->mode -= 4;
353 s->bcd = val & 1;
354 }
355 }
356 } else {
357 /* Write Count. */
358 s = &pit_state->channels[addr];
359 switch (s->write_state) {
360 default:
361 case RW_STATE_LSB:
362 pit_load_count(kvm, addr, val);
363 break;
364 case RW_STATE_MSB:
365 pit_load_count(kvm, addr, val << 8);
366 break;
367 case RW_STATE_WORD0:
368 s->write_latch = val;
369 s->write_state = RW_STATE_WORD1;
370 break;
371 case RW_STATE_WORD1:
372 pit_load_count(kvm, addr, s->write_latch | (val << 8));
373 s->write_state = RW_STATE_WORD0;
374 break;
375 }
376 }
377
378 mutex_unlock(&pit_state->lock);
379}
380
381static void pit_ioport_read(struct kvm_io_device *this,
382 gpa_t addr, int len, void *data)
383{
384 struct kvm_pit *pit = (struct kvm_pit *)this->private;
385 struct kvm_kpit_state *pit_state = &pit->pit_state;
386 struct kvm *kvm = pit->kvm;
387 int ret, count;
388 struct kvm_kpit_channel_state *s;
389
390 addr &= KVM_PIT_CHANNEL_MASK;
391 s = &pit_state->channels[addr];
392
393 mutex_lock(&pit_state->lock);
394
395 if (s->status_latched) {
396 s->status_latched = 0;
397 ret = s->status;
398 } else if (s->count_latched) {
399 switch (s->count_latched) {
400 default:
401 case RW_STATE_LSB:
402 ret = s->latched_count & 0xff;
403 s->count_latched = 0;
404 break;
405 case RW_STATE_MSB:
406 ret = s->latched_count >> 8;
407 s->count_latched = 0;
408 break;
409 case RW_STATE_WORD0:
410 ret = s->latched_count & 0xff;
411 s->count_latched = RW_STATE_MSB;
412 break;
413 }
414 } else {
415 switch (s->read_state) {
416 default:
417 case RW_STATE_LSB:
418 count = pit_get_count(kvm, addr);
419 ret = count & 0xff;
420 break;
421 case RW_STATE_MSB:
422 count = pit_get_count(kvm, addr);
423 ret = (count >> 8) & 0xff;
424 break;
425 case RW_STATE_WORD0:
426 count = pit_get_count(kvm, addr);
427 ret = count & 0xff;
428 s->read_state = RW_STATE_WORD1;
429 break;
430 case RW_STATE_WORD1:
431 count = pit_get_count(kvm, addr);
432 ret = (count >> 8) & 0xff;
433 s->read_state = RW_STATE_WORD0;
434 break;
435 }
436 }
437
438 if (len > sizeof(ret))
439 len = sizeof(ret);
440 memcpy(data, (char *)&ret, len);
441
442 mutex_unlock(&pit_state->lock);
443}
444
445static int pit_in_range(struct kvm_io_device *this, gpa_t addr)
446{
447 return ((addr >= KVM_PIT_BASE_ADDRESS) &&
448 (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
449}
450
451static void speaker_ioport_write(struct kvm_io_device *this,
452 gpa_t addr, int len, const void *data)
453{
454 struct kvm_pit *pit = (struct kvm_pit *)this->private;
455 struct kvm_kpit_state *pit_state = &pit->pit_state;
456 struct kvm *kvm = pit->kvm;
457 u32 val = *(u32 *) data;
458
459 mutex_lock(&pit_state->lock);
460 pit_state->speaker_data_on = (val >> 1) & 1;
461 pit_set_gate(kvm, 2, val & 1);
462 mutex_unlock(&pit_state->lock);
463}
464
465static void speaker_ioport_read(struct kvm_io_device *this,
466 gpa_t addr, int len, void *data)
467{
468 struct kvm_pit *pit = (struct kvm_pit *)this->private;
469 struct kvm_kpit_state *pit_state = &pit->pit_state;
470 struct kvm *kvm = pit->kvm;
471 unsigned int refresh_clock;
472 int ret;
473
474 /* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
475 refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
476
477 mutex_lock(&pit_state->lock);
478 ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) |
479 (pit_get_out(kvm, 2) << 5) | (refresh_clock << 4));
480 if (len > sizeof(ret))
481 len = sizeof(ret);
482 memcpy(data, (char *)&ret, len);
483 mutex_unlock(&pit_state->lock);
484}
485
486static int speaker_in_range(struct kvm_io_device *this, gpa_t addr)
487{
488 return (addr == KVM_SPEAKER_BASE_ADDRESS);
489}
490
491void kvm_pit_reset(struct kvm_pit *pit)
492{
493 int i;
494 struct kvm_kpit_channel_state *c;
495
496 mutex_lock(&pit->pit_state.lock);
497 for (i = 0; i < 3; i++) {
498 c = &pit->pit_state.channels[i];
499 c->mode = 0xff;
500 c->gate = (i != 2);
501 pit_load_count(pit->kvm, i, 0);
502 }
503 mutex_unlock(&pit->pit_state.lock);
504
505 atomic_set(&pit->pit_state.pit_timer.pending, 0);
506 pit->pit_state.inject_pending = 1;
507}
508
509struct kvm_pit *kvm_create_pit(struct kvm *kvm)
510{
511 struct kvm_pit *pit;
512 struct kvm_kpit_state *pit_state;
513
514 pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
515 if (!pit)
516 return NULL;
517
518 mutex_init(&pit->pit_state.lock);
519 mutex_lock(&pit->pit_state.lock);
520
521 /* Initialize PIO device */
522 pit->dev.read = pit_ioport_read;
523 pit->dev.write = pit_ioport_write;
524 pit->dev.in_range = pit_in_range;
525 pit->dev.private = pit;
526 kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
527
528 pit->speaker_dev.read = speaker_ioport_read;
529 pit->speaker_dev.write = speaker_ioport_write;
530 pit->speaker_dev.in_range = speaker_in_range;
531 pit->speaker_dev.private = pit;
532 kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
533
534 kvm->arch.vpit = pit;
535 pit->kvm = kvm;
536
537 pit_state = &pit->pit_state;
538 pit_state->pit = pit;
539 hrtimer_init(&pit_state->pit_timer.timer,
540 CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
541 mutex_unlock(&pit->pit_state.lock);
542
543 kvm_pit_reset(pit);
544
545 return pit;
546}
547
548void kvm_free_pit(struct kvm *kvm)
549{
550 struct hrtimer *timer;
551
552 if (kvm->arch.vpit) {
553 mutex_lock(&kvm->arch.vpit->pit_state.lock);
554 timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
555 hrtimer_cancel(timer);
556 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
557 kfree(kvm->arch.vpit);
558 }
559}
560
561void __inject_pit_timer_intr(struct kvm *kvm)
562{
563 mutex_lock(&kvm->lock);
564 kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1);
565 kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0);
566 kvm_pic_set_irq(pic_irqchip(kvm), 0, 1);
567 kvm_pic_set_irq(pic_irqchip(kvm), 0, 0);
568 mutex_unlock(&kvm->lock);
569}
570
571void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
572{
573 struct kvm_pit *pit = vcpu->kvm->arch.vpit;
574 struct kvm *kvm = vcpu->kvm;
575 struct kvm_kpit_state *ps;
576
577 if (vcpu && pit) {
578 ps = &pit->pit_state;
579
580 /* Try to inject pending interrupts when:
581 * 1. Pending exists
582 * 2. Last interrupt was accepted or waited for too long time*/
583 if (atomic_read(&ps->pit_timer.pending) &&
584 (ps->inject_pending ||
585 (jiffies - ps->last_injected_time
586 >= KVM_MAX_PIT_INTR_INTERVAL))) {
587 ps->inject_pending = 0;
588 __inject_pit_timer_intr(kvm);
589 ps->last_injected_time = jiffies;
590 }
591 }
592}
593
594void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
595{
596 struct kvm_arch *arch = &vcpu->kvm->arch;
597 struct kvm_kpit_state *ps;
598
599 if (vcpu && arch->vpit) {
600 ps = &arch->vpit->pit_state;
601 if (atomic_read(&ps->pit_timer.pending) &&
602 (((arch->vpic->pics[0].imr & 1) == 0 &&
603 arch->vpic->pics[0].irq_base == vec) ||
604 (arch->vioapic->redirtbl[0].fields.vector == vec &&
605 arch->vioapic->redirtbl[0].fields.mask != 1))) {
606 ps->inject_pending = 1;
607 atomic_dec(&ps->pit_timer.pending);
608 ps->channels[0].count_load_time = ktime_get();
609 }
610 }
611}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
new file mode 100644
index 000000000000..db25c2a6c8c4
--- /dev/null
+++ b/arch/x86/kvm/i8254.h
@@ -0,0 +1,63 @@
1#ifndef __I8254_H
2#define __I8254_H
3
4#include "iodev.h"
5
6struct kvm_kpit_timer {
7 struct hrtimer timer;
8 int irq;
9 s64 period; /* unit: ns */
10 s64 scheduled;
11 ktime_t last_update;
12 atomic_t pending;
13};
14
15struct kvm_kpit_channel_state {
16 u32 count; /* can be 65536 */
17 u16 latched_count;
18 u8 count_latched;
19 u8 status_latched;
20 u8 status;
21 u8 read_state;
22 u8 write_state;
23 u8 write_latch;
24 u8 rw_mode;
25 u8 mode;
26 u8 bcd; /* not supported */
27 u8 gate; /* timer start */
28 ktime_t count_load_time;
29};
30
31struct kvm_kpit_state {
32 struct kvm_kpit_channel_state channels[3];
33 struct kvm_kpit_timer pit_timer;
34 u32 speaker_data_on;
35 struct mutex lock;
36 struct kvm_pit *pit;
37 bool inject_pending; /* if inject pending interrupts */
38 unsigned long last_injected_time;
39};
40
41struct kvm_pit {
42 unsigned long base_addresss;
43 struct kvm_io_device dev;
44 struct kvm_io_device speaker_dev;
45 struct kvm *kvm;
46 struct kvm_kpit_state pit_state;
47};
48
49#define KVM_PIT_BASE_ADDRESS 0x40
50#define KVM_SPEAKER_BASE_ADDRESS 0x61
51#define KVM_PIT_MEM_LENGTH 4
52#define KVM_PIT_FREQ 1193181
53#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100
54#define KVM_PIT_CHANNEL_MASK 0x3
55
56void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
57void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
58void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val);
59struct kvm_pit *kvm_create_pit(struct kvm *kvm);
60void kvm_free_pit(struct kvm *kvm);
61void kvm_pit_reset(struct kvm_pit *pit);
62
63#endif
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index e5714759e97f..ce1f583459b1 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -23,6 +23,22 @@
23#include <linux/kvm_host.h> 23#include <linux/kvm_host.h>
24 24
25#include "irq.h" 25#include "irq.h"
26#include "i8254.h"
27
28/*
29 * check if there are pending timer events
30 * to be processed.
31 */
32int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
33{
34 int ret;
35
36 ret = pit_has_pending_timer(vcpu);
37 ret |= apic_has_pending_timer(vcpu);
38
39 return ret;
40}
41EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
26 42
27/* 43/*
28 * check if there is pending interrupt without 44 * check if there is pending interrupt without
@@ -66,6 +82,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
66void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) 82void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
67{ 83{
68 kvm_inject_apic_timer_irqs(vcpu); 84 kvm_inject_apic_timer_irqs(vcpu);
85 kvm_inject_pit_timer_irqs(vcpu);
69 /* TODO: PIT, RTC etc. */ 86 /* TODO: PIT, RTC etc. */
70} 87}
71EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); 88EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
@@ -73,6 +90,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
73void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) 90void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
74{ 91{
75 kvm_apic_timer_intr_post(vcpu, vec); 92 kvm_apic_timer_intr_post(vcpu, vec);
93 kvm_pit_timer_intr_post(vcpu, vec);
76 /* TODO: PIT, RTC etc. */ 94 /* TODO: PIT, RTC etc. */
77} 95}
78EXPORT_SYMBOL_GPL(kvm_timer_intr_post); 96EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index fa5ed5d59b5d..1802134b836f 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -85,4 +85,7 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
85void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); 85void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
86void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); 86void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
87 87
88int pit_has_pending_timer(struct kvm_vcpu *vcpu);
89int apic_has_pending_timer(struct kvm_vcpu *vcpu);
90
88#endif 91#endif
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index ecdfe97e4635..65ef0fc2c036 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -39,6 +39,8 @@ struct vcpu_svm {
39 unsigned long host_db_regs[NUM_DB_REGS]; 39 unsigned long host_db_regs[NUM_DB_REGS];
40 unsigned long host_dr6; 40 unsigned long host_dr6;
41 unsigned long host_dr7; 41 unsigned long host_dr7;
42
43 u32 *msrpm;
42}; 44};
43 45
44#endif 46#endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 68a6b1511934..57ac4e4c556a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -338,10 +338,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
338 } else 338 } else
339 apic_clear_vector(vector, apic->regs + APIC_TMR); 339 apic_clear_vector(vector, apic->regs + APIC_TMR);
340 340
341 if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE) 341 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
342 kvm_vcpu_kick(vcpu); 342 kvm_vcpu_kick(vcpu);
343 else if (vcpu->arch.mp_state == VCPU_MP_STATE_HALTED) { 343 else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) {
344 vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; 344 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
345 if (waitqueue_active(&vcpu->wq)) 345 if (waitqueue_active(&vcpu->wq))
346 wake_up_interruptible(&vcpu->wq); 346 wake_up_interruptible(&vcpu->wq);
347 } 347 }
@@ -362,11 +362,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
362 362
363 case APIC_DM_INIT: 363 case APIC_DM_INIT:
364 if (level) { 364 if (level) {
365 if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE) 365 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
366 printk(KERN_DEBUG 366 printk(KERN_DEBUG
367 "INIT on a runnable vcpu %d\n", 367 "INIT on a runnable vcpu %d\n",
368 vcpu->vcpu_id); 368 vcpu->vcpu_id);
369 vcpu->arch.mp_state = VCPU_MP_STATE_INIT_RECEIVED; 369 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
370 kvm_vcpu_kick(vcpu); 370 kvm_vcpu_kick(vcpu);
371 } else { 371 } else {
372 printk(KERN_DEBUG 372 printk(KERN_DEBUG
@@ -379,9 +379,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
379 case APIC_DM_STARTUP: 379 case APIC_DM_STARTUP:
380 printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n", 380 printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n",
381 vcpu->vcpu_id, vector); 381 vcpu->vcpu_id, vector);
382 if (vcpu->arch.mp_state == VCPU_MP_STATE_INIT_RECEIVED) { 382 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
383 vcpu->arch.sipi_vector = vector; 383 vcpu->arch.sipi_vector = vector;
384 vcpu->arch.mp_state = VCPU_MP_STATE_SIPI_RECEIVED; 384 vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
385 if (waitqueue_active(&vcpu->wq)) 385 if (waitqueue_active(&vcpu->wq))
386 wake_up_interruptible(&vcpu->wq); 386 wake_up_interruptible(&vcpu->wq);
387 } 387 }
@@ -658,7 +658,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
658 apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" 658 apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
659 PRIx64 ", " 659 PRIx64 ", "
660 "timer initial count 0x%x, period %lldns, " 660 "timer initial count 0x%x, period %lldns, "
661 "expire @ 0x%016" PRIx64 ".\n", __FUNCTION__, 661 "expire @ 0x%016" PRIx64 ".\n", __func__,
662 APIC_BUS_CYCLE_NS, ktime_to_ns(now), 662 APIC_BUS_CYCLE_NS, ktime_to_ns(now),
663 apic_get_reg(apic, APIC_TMICT), 663 apic_get_reg(apic, APIC_TMICT),
664 apic->timer.period, 664 apic->timer.period,
@@ -691,7 +691,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
691 /* too common printing */ 691 /* too common printing */
692 if (offset != APIC_EOI) 692 if (offset != APIC_EOI)
693 apic_debug("%s: offset 0x%x with length 0x%x, and value is " 693 apic_debug("%s: offset 0x%x with length 0x%x, and value is "
694 "0x%x\n", __FUNCTION__, offset, len, val); 694 "0x%x\n", __func__, offset, len, val);
695 695
696 offset &= 0xff0; 696 offset &= 0xff0;
697 697
@@ -822,6 +822,7 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
822 apic_set_tpr(apic, ((cr8 & 0x0f) << 4) 822 apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
823 | (apic_get_reg(apic, APIC_TASKPRI) & 4)); 823 | (apic_get_reg(apic, APIC_TASKPRI) & 4));
824} 824}
825EXPORT_SYMBOL_GPL(kvm_lapic_set_tpr);
825 826
826u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) 827u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
827{ 828{
@@ -869,7 +870,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
869 struct kvm_lapic *apic; 870 struct kvm_lapic *apic;
870 int i; 871 int i;
871 872
872 apic_debug("%s\n", __FUNCTION__); 873 apic_debug("%s\n", __func__);
873 874
874 ASSERT(vcpu); 875 ASSERT(vcpu);
875 apic = vcpu->arch.apic; 876 apic = vcpu->arch.apic;
@@ -907,7 +908,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
907 apic_update_ppr(apic); 908 apic_update_ppr(apic);
908 909
909 apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" 910 apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
910 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __FUNCTION__, 911 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
911 vcpu, kvm_apic_id(apic), 912 vcpu, kvm_apic_id(apic),
912 vcpu->arch.apic_base, apic->base_address); 913 vcpu->arch.apic_base, apic->base_address);
913} 914}
@@ -940,7 +941,7 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
940 941
941 atomic_inc(&apic->timer.pending); 942 atomic_inc(&apic->timer.pending);
942 if (waitqueue_active(q)) { 943 if (waitqueue_active(q)) {
943 apic->vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; 944 apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
944 wake_up_interruptible(q); 945 wake_up_interruptible(q);
945 } 946 }
946 if (apic_lvtt_period(apic)) { 947 if (apic_lvtt_period(apic)) {
@@ -952,6 +953,16 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
952 return result; 953 return result;
953} 954}
954 955
956int apic_has_pending_timer(struct kvm_vcpu *vcpu)
957{
958 struct kvm_lapic *lapic = vcpu->arch.apic;
959
960 if (lapic)
961 return atomic_read(&lapic->timer.pending);
962
963 return 0;
964}
965
955static int __inject_apic_timer_irq(struct kvm_lapic *apic) 966static int __inject_apic_timer_irq(struct kvm_lapic *apic)
956{ 967{
957 int vector; 968 int vector;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e55af12e11b7..2ad6f5481671 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -27,11 +27,22 @@
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/swap.h> 29#include <linux/swap.h>
30#include <linux/hugetlb.h>
31#include <linux/compiler.h>
30 32
31#include <asm/page.h> 33#include <asm/page.h>
32#include <asm/cmpxchg.h> 34#include <asm/cmpxchg.h>
33#include <asm/io.h> 35#include <asm/io.h>
34 36
37/*
38 * When setting this variable to true it enables Two-Dimensional-Paging
39 * where the hardware walks 2 page tables:
40 * 1. the guest-virtual to guest-physical
41 * 2. while doing 1. it walks guest-physical to host-physical
42 * If the hardware supports that we don't need to do shadow paging.
43 */
44bool tdp_enabled = false;
45
35#undef MMU_DEBUG 46#undef MMU_DEBUG
36 47
37#undef AUDIT 48#undef AUDIT
@@ -101,8 +112,6 @@ static int dbg = 1;
101#define PT_FIRST_AVAIL_BITS_SHIFT 9 112#define PT_FIRST_AVAIL_BITS_SHIFT 9
102#define PT64_SECOND_AVAIL_BITS_SHIFT 52 113#define PT64_SECOND_AVAIL_BITS_SHIFT 52
103 114
104#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
105
106#define VALID_PAGE(x) ((x) != INVALID_PAGE) 115#define VALID_PAGE(x) ((x) != INVALID_PAGE)
107 116
108#define PT64_LEVEL_BITS 9 117#define PT64_LEVEL_BITS 9
@@ -159,6 +168,13 @@ static int dbg = 1;
159#define ACC_USER_MASK PT_USER_MASK 168#define ACC_USER_MASK PT_USER_MASK
160#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) 169#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
161 170
171struct kvm_pv_mmu_op_buffer {
172 void *ptr;
173 unsigned len;
174 unsigned processed;
175 char buf[512] __aligned(sizeof(long));
176};
177
162struct kvm_rmap_desc { 178struct kvm_rmap_desc {
163 u64 *shadow_ptes[RMAP_EXT]; 179 u64 *shadow_ptes[RMAP_EXT];
164 struct kvm_rmap_desc *more; 180 struct kvm_rmap_desc *more;
@@ -200,11 +216,15 @@ static int is_present_pte(unsigned long pte)
200 216
201static int is_shadow_present_pte(u64 pte) 217static int is_shadow_present_pte(u64 pte)
202{ 218{
203 pte &= ~PT_SHADOW_IO_MARK;
204 return pte != shadow_trap_nonpresent_pte 219 return pte != shadow_trap_nonpresent_pte
205 && pte != shadow_notrap_nonpresent_pte; 220 && pte != shadow_notrap_nonpresent_pte;
206} 221}
207 222
223static int is_large_pte(u64 pte)
224{
225 return pte & PT_PAGE_SIZE_MASK;
226}
227
208static int is_writeble_pte(unsigned long pte) 228static int is_writeble_pte(unsigned long pte)
209{ 229{
210 return pte & PT_WRITABLE_MASK; 230 return pte & PT_WRITABLE_MASK;
@@ -215,14 +235,14 @@ static int is_dirty_pte(unsigned long pte)
215 return pte & PT_DIRTY_MASK; 235 return pte & PT_DIRTY_MASK;
216} 236}
217 237
218static int is_io_pte(unsigned long pte) 238static int is_rmap_pte(u64 pte)
219{ 239{
220 return pte & PT_SHADOW_IO_MARK; 240 return is_shadow_present_pte(pte);
221} 241}
222 242
223static int is_rmap_pte(u64 pte) 243static pfn_t spte_to_pfn(u64 pte)
224{ 244{
225 return is_shadow_present_pte(pte); 245 return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
226} 246}
227 247
228static gfn_t pse36_gfn_delta(u32 gpte) 248static gfn_t pse36_gfn_delta(u32 gpte)
@@ -349,16 +369,100 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
349} 369}
350 370
351/* 371/*
372 * Return the pointer to the largepage write count for a given
373 * gfn, handling slots that are not large page aligned.
374 */
375static int *slot_largepage_idx(gfn_t gfn, struct kvm_memory_slot *slot)
376{
377 unsigned long idx;
378
379 idx = (gfn / KVM_PAGES_PER_HPAGE) -
380 (slot->base_gfn / KVM_PAGES_PER_HPAGE);
381 return &slot->lpage_info[idx].write_count;
382}
383
384static void account_shadowed(struct kvm *kvm, gfn_t gfn)
385{
386 int *write_count;
387
388 write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
389 *write_count += 1;
390 WARN_ON(*write_count > KVM_PAGES_PER_HPAGE);
391}
392
393static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
394{
395 int *write_count;
396
397 write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
398 *write_count -= 1;
399 WARN_ON(*write_count < 0);
400}
401
402static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn)
403{
404 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
405 int *largepage_idx;
406
407 if (slot) {
408 largepage_idx = slot_largepage_idx(gfn, slot);
409 return *largepage_idx;
410 }
411
412 return 1;
413}
414
415static int host_largepage_backed(struct kvm *kvm, gfn_t gfn)
416{
417 struct vm_area_struct *vma;
418 unsigned long addr;
419
420 addr = gfn_to_hva(kvm, gfn);
421 if (kvm_is_error_hva(addr))
422 return 0;
423
424 vma = find_vma(current->mm, addr);
425 if (vma && is_vm_hugetlb_page(vma))
426 return 1;
427
428 return 0;
429}
430
431static int is_largepage_backed(struct kvm_vcpu *vcpu, gfn_t large_gfn)
432{
433 struct kvm_memory_slot *slot;
434
435 if (has_wrprotected_page(vcpu->kvm, large_gfn))
436 return 0;
437
438 if (!host_largepage_backed(vcpu->kvm, large_gfn))
439 return 0;
440
441 slot = gfn_to_memslot(vcpu->kvm, large_gfn);
442 if (slot && slot->dirty_bitmap)
443 return 0;
444
445 return 1;
446}
447
448/*
352 * Take gfn and return the reverse mapping to it. 449 * Take gfn and return the reverse mapping to it.
353 * Note: gfn must be unaliased before this function get called 450 * Note: gfn must be unaliased before this function get called
354 */ 451 */
355 452
356static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn) 453static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage)
357{ 454{
358 struct kvm_memory_slot *slot; 455 struct kvm_memory_slot *slot;
456 unsigned long idx;
359 457
360 slot = gfn_to_memslot(kvm, gfn); 458 slot = gfn_to_memslot(kvm, gfn);
361 return &slot->rmap[gfn - slot->base_gfn]; 459 if (!lpage)
460 return &slot->rmap[gfn - slot->base_gfn];
461
462 idx = (gfn / KVM_PAGES_PER_HPAGE) -
463 (slot->base_gfn / KVM_PAGES_PER_HPAGE);
464
465 return &slot->lpage_info[idx].rmap_pde;
362} 466}
363 467
364/* 468/*
@@ -370,7 +474,7 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn)
370 * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc 474 * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
371 * containing more mappings. 475 * containing more mappings.
372 */ 476 */
373static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) 477static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
374{ 478{
375 struct kvm_mmu_page *sp; 479 struct kvm_mmu_page *sp;
376 struct kvm_rmap_desc *desc; 480 struct kvm_rmap_desc *desc;
@@ -382,7 +486,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
382 gfn = unalias_gfn(vcpu->kvm, gfn); 486 gfn = unalias_gfn(vcpu->kvm, gfn);
383 sp = page_header(__pa(spte)); 487 sp = page_header(__pa(spte));
384 sp->gfns[spte - sp->spt] = gfn; 488 sp->gfns[spte - sp->spt] = gfn;
385 rmapp = gfn_to_rmap(vcpu->kvm, gfn); 489 rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage);
386 if (!*rmapp) { 490 if (!*rmapp) {
387 rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); 491 rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
388 *rmapp = (unsigned long)spte; 492 *rmapp = (unsigned long)spte;
@@ -435,20 +539,21 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
435 struct kvm_rmap_desc *desc; 539 struct kvm_rmap_desc *desc;
436 struct kvm_rmap_desc *prev_desc; 540 struct kvm_rmap_desc *prev_desc;
437 struct kvm_mmu_page *sp; 541 struct kvm_mmu_page *sp;
438 struct page *page; 542 pfn_t pfn;
439 unsigned long *rmapp; 543 unsigned long *rmapp;
440 int i; 544 int i;
441 545
442 if (!is_rmap_pte(*spte)) 546 if (!is_rmap_pte(*spte))
443 return; 547 return;
444 sp = page_header(__pa(spte)); 548 sp = page_header(__pa(spte));
445 page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); 549 pfn = spte_to_pfn(*spte);
446 mark_page_accessed(page); 550 if (*spte & PT_ACCESSED_MASK)
551 kvm_set_pfn_accessed(pfn);
447 if (is_writeble_pte(*spte)) 552 if (is_writeble_pte(*spte))
448 kvm_release_page_dirty(page); 553 kvm_release_pfn_dirty(pfn);
449 else 554 else
450 kvm_release_page_clean(page); 555 kvm_release_pfn_clean(pfn);
451 rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt]); 556 rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], is_large_pte(*spte));
452 if (!*rmapp) { 557 if (!*rmapp) {
453 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); 558 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
454 BUG(); 559 BUG();
@@ -514,7 +619,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
514 int write_protected = 0; 619 int write_protected = 0;
515 620
516 gfn = unalias_gfn(kvm, gfn); 621 gfn = unalias_gfn(kvm, gfn);
517 rmapp = gfn_to_rmap(kvm, gfn); 622 rmapp = gfn_to_rmap(kvm, gfn, 0);
518 623
519 spte = rmap_next(kvm, rmapp, NULL); 624 spte = rmap_next(kvm, rmapp, NULL);
520 while (spte) { 625 while (spte) {
@@ -527,8 +632,35 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
527 } 632 }
528 spte = rmap_next(kvm, rmapp, spte); 633 spte = rmap_next(kvm, rmapp, spte);
529 } 634 }
635 if (write_protected) {
636 pfn_t pfn;
637
638 spte = rmap_next(kvm, rmapp, NULL);
639 pfn = spte_to_pfn(*spte);
640 kvm_set_pfn_dirty(pfn);
641 }
642
643 /* check for huge page mappings */
644 rmapp = gfn_to_rmap(kvm, gfn, 1);
645 spte = rmap_next(kvm, rmapp, NULL);
646 while (spte) {
647 BUG_ON(!spte);
648 BUG_ON(!(*spte & PT_PRESENT_MASK));
649 BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK));
650 pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
651 if (is_writeble_pte(*spte)) {
652 rmap_remove(kvm, spte);
653 --kvm->stat.lpages;
654 set_shadow_pte(spte, shadow_trap_nonpresent_pte);
655 write_protected = 1;
656 }
657 spte = rmap_next(kvm, rmapp, spte);
658 }
659
530 if (write_protected) 660 if (write_protected)
531 kvm_flush_remote_tlbs(kvm); 661 kvm_flush_remote_tlbs(kvm);
662
663 account_shadowed(kvm, gfn);
532} 664}
533 665
534#ifdef MMU_DEBUG 666#ifdef MMU_DEBUG
@@ -538,8 +670,8 @@ static int is_empty_shadow_page(u64 *spt)
538 u64 *end; 670 u64 *end;
539 671
540 for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) 672 for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++)
541 if ((*pos & ~PT_SHADOW_IO_MARK) != shadow_trap_nonpresent_pte) { 673 if (*pos != shadow_trap_nonpresent_pte) {
542 printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__, 674 printk(KERN_ERR "%s: %p %llx\n", __func__,
543 pos, *pos); 675 pos, *pos);
544 return 0; 676 return 0;
545 } 677 }
@@ -559,7 +691,7 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)
559 691
560static unsigned kvm_page_table_hashfn(gfn_t gfn) 692static unsigned kvm_page_table_hashfn(gfn_t gfn)
561{ 693{
562 return gfn; 694 return gfn & ((1 << KVM_MMU_HASH_SHIFT) - 1);
563} 695}
564 696
565static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, 697static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
@@ -662,13 +794,14 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
662 struct kvm_mmu_page *sp; 794 struct kvm_mmu_page *sp;
663 struct hlist_node *node; 795 struct hlist_node *node;
664 796
665 pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn); 797 pgprintk("%s: looking for gfn %lx\n", __func__, gfn);
666 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; 798 index = kvm_page_table_hashfn(gfn);
667 bucket = &kvm->arch.mmu_page_hash[index]; 799 bucket = &kvm->arch.mmu_page_hash[index];
668 hlist_for_each_entry(sp, node, bucket, hash_link) 800 hlist_for_each_entry(sp, node, bucket, hash_link)
669 if (sp->gfn == gfn && !sp->role.metaphysical) { 801 if (sp->gfn == gfn && !sp->role.metaphysical
802 && !sp->role.invalid) {
670 pgprintk("%s: found role %x\n", 803 pgprintk("%s: found role %x\n",
671 __FUNCTION__, sp->role.word); 804 __func__, sp->role.word);
672 return sp; 805 return sp;
673 } 806 }
674 return NULL; 807 return NULL;
@@ -699,27 +832,27 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
699 quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; 832 quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
700 role.quadrant = quadrant; 833 role.quadrant = quadrant;
701 } 834 }
702 pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__, 835 pgprintk("%s: looking gfn %lx role %x\n", __func__,
703 gfn, role.word); 836 gfn, role.word);
704 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; 837 index = kvm_page_table_hashfn(gfn);
705 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 838 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
706 hlist_for_each_entry(sp, node, bucket, hash_link) 839 hlist_for_each_entry(sp, node, bucket, hash_link)
707 if (sp->gfn == gfn && sp->role.word == role.word) { 840 if (sp->gfn == gfn && sp->role.word == role.word) {
708 mmu_page_add_parent_pte(vcpu, sp, parent_pte); 841 mmu_page_add_parent_pte(vcpu, sp, parent_pte);
709 pgprintk("%s: found\n", __FUNCTION__); 842 pgprintk("%s: found\n", __func__);
710 return sp; 843 return sp;
711 } 844 }
712 ++vcpu->kvm->stat.mmu_cache_miss; 845 ++vcpu->kvm->stat.mmu_cache_miss;
713 sp = kvm_mmu_alloc_page(vcpu, parent_pte); 846 sp = kvm_mmu_alloc_page(vcpu, parent_pte);
714 if (!sp) 847 if (!sp)
715 return sp; 848 return sp;
716 pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word); 849 pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
717 sp->gfn = gfn; 850 sp->gfn = gfn;
718 sp->role = role; 851 sp->role = role;
719 hlist_add_head(&sp->hash_link, bucket); 852 hlist_add_head(&sp->hash_link, bucket);
720 vcpu->arch.mmu.prefetch_page(vcpu, sp);
721 if (!metaphysical) 853 if (!metaphysical)
722 rmap_write_protect(vcpu->kvm, gfn); 854 rmap_write_protect(vcpu->kvm, gfn);
855 vcpu->arch.mmu.prefetch_page(vcpu, sp);
723 return sp; 856 return sp;
724} 857}
725 858
@@ -745,11 +878,17 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
745 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { 878 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
746 ent = pt[i]; 879 ent = pt[i];
747 880
881 if (is_shadow_present_pte(ent)) {
882 if (!is_large_pte(ent)) {
883 ent &= PT64_BASE_ADDR_MASK;
884 mmu_page_remove_parent_pte(page_header(ent),
885 &pt[i]);
886 } else {
887 --kvm->stat.lpages;
888 rmap_remove(kvm, &pt[i]);
889 }
890 }
748 pt[i] = shadow_trap_nonpresent_pte; 891 pt[i] = shadow_trap_nonpresent_pte;
749 if (!is_shadow_present_pte(ent))
750 continue;
751 ent &= PT64_BASE_ADDR_MASK;
752 mmu_page_remove_parent_pte(page_header(ent), &pt[i]);
753 } 892 }
754 kvm_flush_remote_tlbs(kvm); 893 kvm_flush_remote_tlbs(kvm);
755} 894}
@@ -789,10 +928,15 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
789 } 928 }
790 kvm_mmu_page_unlink_children(kvm, sp); 929 kvm_mmu_page_unlink_children(kvm, sp);
791 if (!sp->root_count) { 930 if (!sp->root_count) {
931 if (!sp->role.metaphysical)
932 unaccount_shadowed(kvm, sp->gfn);
792 hlist_del(&sp->hash_link); 933 hlist_del(&sp->hash_link);
793 kvm_mmu_free_page(kvm, sp); 934 kvm_mmu_free_page(kvm, sp);
794 } else 935 } else {
795 list_move(&sp->link, &kvm->arch.active_mmu_pages); 936 list_move(&sp->link, &kvm->arch.active_mmu_pages);
937 sp->role.invalid = 1;
938 kvm_reload_remote_mmus(kvm);
939 }
796 kvm_mmu_reset_last_pte_updated(kvm); 940 kvm_mmu_reset_last_pte_updated(kvm);
797} 941}
798 942
@@ -838,13 +982,13 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
838 struct hlist_node *node, *n; 982 struct hlist_node *node, *n;
839 int r; 983 int r;
840 984
841 pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn); 985 pgprintk("%s: looking for gfn %lx\n", __func__, gfn);
842 r = 0; 986 r = 0;
843 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; 987 index = kvm_page_table_hashfn(gfn);
844 bucket = &kvm->arch.mmu_page_hash[index]; 988 bucket = &kvm->arch.mmu_page_hash[index];
845 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) 989 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
846 if (sp->gfn == gfn && !sp->role.metaphysical) { 990 if (sp->gfn == gfn && !sp->role.metaphysical) {
847 pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn, 991 pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
848 sp->role.word); 992 sp->role.word);
849 kvm_mmu_zap_page(kvm, sp); 993 kvm_mmu_zap_page(kvm, sp);
850 r = 1; 994 r = 1;
@@ -857,7 +1001,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
857 struct kvm_mmu_page *sp; 1001 struct kvm_mmu_page *sp;
858 1002
859 while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) { 1003 while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) {
860 pgprintk("%s: zap %lx %x\n", __FUNCTION__, gfn, sp->role.word); 1004 pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word);
861 kvm_mmu_zap_page(kvm, sp); 1005 kvm_mmu_zap_page(kvm, sp);
862 } 1006 }
863} 1007}
@@ -889,26 +1033,39 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
889static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, 1033static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
890 unsigned pt_access, unsigned pte_access, 1034 unsigned pt_access, unsigned pte_access,
891 int user_fault, int write_fault, int dirty, 1035 int user_fault, int write_fault, int dirty,
892 int *ptwrite, gfn_t gfn, struct page *page) 1036 int *ptwrite, int largepage, gfn_t gfn,
1037 pfn_t pfn, bool speculative)
893{ 1038{
894 u64 spte; 1039 u64 spte;
895 int was_rmapped = 0; 1040 int was_rmapped = 0;
896 int was_writeble = is_writeble_pte(*shadow_pte); 1041 int was_writeble = is_writeble_pte(*shadow_pte);
897 hfn_t host_pfn = (*shadow_pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
898 1042
899 pgprintk("%s: spte %llx access %x write_fault %d" 1043 pgprintk("%s: spte %llx access %x write_fault %d"
900 " user_fault %d gfn %lx\n", 1044 " user_fault %d gfn %lx\n",
901 __FUNCTION__, *shadow_pte, pt_access, 1045 __func__, *shadow_pte, pt_access,
902 write_fault, user_fault, gfn); 1046 write_fault, user_fault, gfn);
903 1047
904 if (is_rmap_pte(*shadow_pte)) { 1048 if (is_rmap_pte(*shadow_pte)) {
905 if (host_pfn != page_to_pfn(page)) { 1049 /*
1050 * If we overwrite a PTE page pointer with a 2MB PMD, unlink
1051 * the parent of the now unreachable PTE.
1052 */
1053 if (largepage && !is_large_pte(*shadow_pte)) {
1054 struct kvm_mmu_page *child;
1055 u64 pte = *shadow_pte;
1056
1057 child = page_header(pte & PT64_BASE_ADDR_MASK);
1058 mmu_page_remove_parent_pte(child, shadow_pte);
1059 } else if (pfn != spte_to_pfn(*shadow_pte)) {
906 pgprintk("hfn old %lx new %lx\n", 1060 pgprintk("hfn old %lx new %lx\n",
907 host_pfn, page_to_pfn(page)); 1061 spte_to_pfn(*shadow_pte), pfn);
908 rmap_remove(vcpu->kvm, shadow_pte); 1062 rmap_remove(vcpu->kvm, shadow_pte);
1063 } else {
1064 if (largepage)
1065 was_rmapped = is_large_pte(*shadow_pte);
1066 else
1067 was_rmapped = 1;
909 } 1068 }
910 else
911 was_rmapped = 1;
912 } 1069 }
913 1070
914 /* 1071 /*
@@ -917,6 +1074,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
917 * demand paging). 1074 * demand paging).
918 */ 1075 */
919 spte = PT_PRESENT_MASK | PT_DIRTY_MASK; 1076 spte = PT_PRESENT_MASK | PT_DIRTY_MASK;
1077 if (!speculative)
1078 pte_access |= PT_ACCESSED_MASK;
920 if (!dirty) 1079 if (!dirty)
921 pte_access &= ~ACC_WRITE_MASK; 1080 pte_access &= ~ACC_WRITE_MASK;
922 if (!(pte_access & ACC_EXEC_MASK)) 1081 if (!(pte_access & ACC_EXEC_MASK))
@@ -925,15 +1084,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
925 spte |= PT_PRESENT_MASK; 1084 spte |= PT_PRESENT_MASK;
926 if (pte_access & ACC_USER_MASK) 1085 if (pte_access & ACC_USER_MASK)
927 spte |= PT_USER_MASK; 1086 spte |= PT_USER_MASK;
1087 if (largepage)
1088 spte |= PT_PAGE_SIZE_MASK;
928 1089
929 if (is_error_page(page)) { 1090 spte |= (u64)pfn << PAGE_SHIFT;
930 set_shadow_pte(shadow_pte,
931 shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK);
932 kvm_release_page_clean(page);
933 return;
934 }
935
936 spte |= page_to_phys(page);
937 1091
938 if ((pte_access & ACC_WRITE_MASK) 1092 if ((pte_access & ACC_WRITE_MASK)
939 || (write_fault && !is_write_protection(vcpu) && !user_fault)) { 1093 || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
@@ -946,9 +1100,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
946 } 1100 }
947 1101
948 shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); 1102 shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
949 if (shadow) { 1103 if (shadow ||
1104 (largepage && has_wrprotected_page(vcpu->kvm, gfn))) {
950 pgprintk("%s: found shadow page for %lx, marking ro\n", 1105 pgprintk("%s: found shadow page for %lx, marking ro\n",
951 __FUNCTION__, gfn); 1106 __func__, gfn);
952 pte_access &= ~ACC_WRITE_MASK; 1107 pte_access &= ~ACC_WRITE_MASK;
953 if (is_writeble_pte(spte)) { 1108 if (is_writeble_pte(spte)) {
954 spte &= ~PT_WRITABLE_MASK; 1109 spte &= ~PT_WRITABLE_MASK;
@@ -964,18 +1119,25 @@ unshadowed:
964 if (pte_access & ACC_WRITE_MASK) 1119 if (pte_access & ACC_WRITE_MASK)
965 mark_page_dirty(vcpu->kvm, gfn); 1120 mark_page_dirty(vcpu->kvm, gfn);
966 1121
967 pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte); 1122 pgprintk("%s: setting spte %llx\n", __func__, spte);
1123 pgprintk("instantiating %s PTE (%s) at %d (%llx) addr %llx\n",
1124 (spte&PT_PAGE_SIZE_MASK)? "2MB" : "4kB",
1125 (spte&PT_WRITABLE_MASK)?"RW":"R", gfn, spte, shadow_pte);
968 set_shadow_pte(shadow_pte, spte); 1126 set_shadow_pte(shadow_pte, spte);
1127 if (!was_rmapped && (spte & PT_PAGE_SIZE_MASK)
1128 && (spte & PT_PRESENT_MASK))
1129 ++vcpu->kvm->stat.lpages;
1130
969 page_header_update_slot(vcpu->kvm, shadow_pte, gfn); 1131 page_header_update_slot(vcpu->kvm, shadow_pte, gfn);
970 if (!was_rmapped) { 1132 if (!was_rmapped) {
971 rmap_add(vcpu, shadow_pte, gfn); 1133 rmap_add(vcpu, shadow_pte, gfn, largepage);
972 if (!is_rmap_pte(*shadow_pte)) 1134 if (!is_rmap_pte(*shadow_pte))
973 kvm_release_page_clean(page); 1135 kvm_release_pfn_clean(pfn);
974 } else { 1136 } else {
975 if (was_writeble) 1137 if (was_writeble)
976 kvm_release_page_dirty(page); 1138 kvm_release_pfn_dirty(pfn);
977 else 1139 else
978 kvm_release_page_clean(page); 1140 kvm_release_pfn_clean(pfn);
979 } 1141 }
980 if (!ptwrite || !*ptwrite) 1142 if (!ptwrite || !*ptwrite)
981 vcpu->arch.last_pte_updated = shadow_pte; 1143 vcpu->arch.last_pte_updated = shadow_pte;
@@ -985,10 +1147,10 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
985{ 1147{
986} 1148}
987 1149
988static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, 1150static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
989 gfn_t gfn, struct page *page) 1151 int largepage, gfn_t gfn, pfn_t pfn,
1152 int level)
990{ 1153{
991 int level = PT32E_ROOT_LEVEL;
992 hpa_t table_addr = vcpu->arch.mmu.root_hpa; 1154 hpa_t table_addr = vcpu->arch.mmu.root_hpa;
993 int pt_write = 0; 1155 int pt_write = 0;
994 1156
@@ -1001,8 +1163,14 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
1001 1163
1002 if (level == 1) { 1164 if (level == 1) {
1003 mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, 1165 mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
1004 0, write, 1, &pt_write, gfn, page); 1166 0, write, 1, &pt_write, 0, gfn, pfn, false);
1005 return pt_write || is_io_pte(table[index]); 1167 return pt_write;
1168 }
1169
1170 if (largepage && level == 2) {
1171 mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
1172 0, write, 1, &pt_write, 1, gfn, pfn, false);
1173 return pt_write;
1006 } 1174 }
1007 1175
1008 if (table[index] == shadow_trap_nonpresent_pte) { 1176 if (table[index] == shadow_trap_nonpresent_pte) {
@@ -1016,7 +1184,7 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
1016 1, ACC_ALL, &table[index]); 1184 1, ACC_ALL, &table[index]);
1017 if (!new_table) { 1185 if (!new_table) {
1018 pgprintk("nonpaging_map: ENOMEM\n"); 1186 pgprintk("nonpaging_map: ENOMEM\n");
1019 kvm_release_page_clean(page); 1187 kvm_release_pfn_clean(pfn);
1020 return -ENOMEM; 1188 return -ENOMEM;
1021 } 1189 }
1022 1190
@@ -1030,21 +1198,30 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
1030static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) 1198static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1031{ 1199{
1032 int r; 1200 int r;
1033 1201 int largepage = 0;
1034 struct page *page; 1202 pfn_t pfn;
1035
1036 down_read(&vcpu->kvm->slots_lock);
1037 1203
1038 down_read(&current->mm->mmap_sem); 1204 down_read(&current->mm->mmap_sem);
1039 page = gfn_to_page(vcpu->kvm, gfn); 1205 if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
1206 gfn &= ~(KVM_PAGES_PER_HPAGE-1);
1207 largepage = 1;
1208 }
1209
1210 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1040 up_read(&current->mm->mmap_sem); 1211 up_read(&current->mm->mmap_sem);
1041 1212
1213 /* mmio */
1214 if (is_error_pfn(pfn)) {
1215 kvm_release_pfn_clean(pfn);
1216 return 1;
1217 }
1218
1042 spin_lock(&vcpu->kvm->mmu_lock); 1219 spin_lock(&vcpu->kvm->mmu_lock);
1043 kvm_mmu_free_some_pages(vcpu); 1220 kvm_mmu_free_some_pages(vcpu);
1044 r = __nonpaging_map(vcpu, v, write, gfn, page); 1221 r = __direct_map(vcpu, v, write, largepage, gfn, pfn,
1222 PT32E_ROOT_LEVEL);
1045 spin_unlock(&vcpu->kvm->mmu_lock); 1223 spin_unlock(&vcpu->kvm->mmu_lock);
1046 1224
1047 up_read(&vcpu->kvm->slots_lock);
1048 1225
1049 return r; 1226 return r;
1050} 1227}
@@ -1073,6 +1250,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
1073 1250
1074 sp = page_header(root); 1251 sp = page_header(root);
1075 --sp->root_count; 1252 --sp->root_count;
1253 if (!sp->root_count && sp->role.invalid)
1254 kvm_mmu_zap_page(vcpu->kvm, sp);
1076 vcpu->arch.mmu.root_hpa = INVALID_PAGE; 1255 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
1077 spin_unlock(&vcpu->kvm->mmu_lock); 1256 spin_unlock(&vcpu->kvm->mmu_lock);
1078 return; 1257 return;
@@ -1085,6 +1264,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
1085 root &= PT64_BASE_ADDR_MASK; 1264 root &= PT64_BASE_ADDR_MASK;
1086 sp = page_header(root); 1265 sp = page_header(root);
1087 --sp->root_count; 1266 --sp->root_count;
1267 if (!sp->root_count && sp->role.invalid)
1268 kvm_mmu_zap_page(vcpu->kvm, sp);
1088 } 1269 }
1089 vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; 1270 vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
1090 } 1271 }
@@ -1097,6 +1278,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1097 int i; 1278 int i;
1098 gfn_t root_gfn; 1279 gfn_t root_gfn;
1099 struct kvm_mmu_page *sp; 1280 struct kvm_mmu_page *sp;
1281 int metaphysical = 0;
1100 1282
1101 root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; 1283 root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
1102 1284
@@ -1105,14 +1287,20 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1105 hpa_t root = vcpu->arch.mmu.root_hpa; 1287 hpa_t root = vcpu->arch.mmu.root_hpa;
1106 1288
1107 ASSERT(!VALID_PAGE(root)); 1289 ASSERT(!VALID_PAGE(root));
1290 if (tdp_enabled)
1291 metaphysical = 1;
1108 sp = kvm_mmu_get_page(vcpu, root_gfn, 0, 1292 sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
1109 PT64_ROOT_LEVEL, 0, ACC_ALL, NULL); 1293 PT64_ROOT_LEVEL, metaphysical,
1294 ACC_ALL, NULL);
1110 root = __pa(sp->spt); 1295 root = __pa(sp->spt);
1111 ++sp->root_count; 1296 ++sp->root_count;
1112 vcpu->arch.mmu.root_hpa = root; 1297 vcpu->arch.mmu.root_hpa = root;
1113 return; 1298 return;
1114 } 1299 }
1115#endif 1300#endif
1301 metaphysical = !is_paging(vcpu);
1302 if (tdp_enabled)
1303 metaphysical = 1;
1116 for (i = 0; i < 4; ++i) { 1304 for (i = 0; i < 4; ++i) {
1117 hpa_t root = vcpu->arch.mmu.pae_root[i]; 1305 hpa_t root = vcpu->arch.mmu.pae_root[i];
1118 1306
@@ -1126,7 +1314,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1126 } else if (vcpu->arch.mmu.root_level == 0) 1314 } else if (vcpu->arch.mmu.root_level == 0)
1127 root_gfn = 0; 1315 root_gfn = 0;
1128 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, 1316 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
1129 PT32_ROOT_LEVEL, !is_paging(vcpu), 1317 PT32_ROOT_LEVEL, metaphysical,
1130 ACC_ALL, NULL); 1318 ACC_ALL, NULL);
1131 root = __pa(sp->spt); 1319 root = __pa(sp->spt);
1132 ++sp->root_count; 1320 ++sp->root_count;
@@ -1146,7 +1334,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
1146 gfn_t gfn; 1334 gfn_t gfn;
1147 int r; 1335 int r;
1148 1336
1149 pgprintk("%s: gva %lx error %x\n", __FUNCTION__, gva, error_code); 1337 pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
1150 r = mmu_topup_memory_caches(vcpu); 1338 r = mmu_topup_memory_caches(vcpu);
1151 if (r) 1339 if (r)
1152 return r; 1340 return r;
@@ -1160,6 +1348,41 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
1160 error_code & PFERR_WRITE_MASK, gfn); 1348 error_code & PFERR_WRITE_MASK, gfn);
1161} 1349}
1162 1350
1351static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
1352 u32 error_code)
1353{
1354 pfn_t pfn;
1355 int r;
1356 int largepage = 0;
1357 gfn_t gfn = gpa >> PAGE_SHIFT;
1358
1359 ASSERT(vcpu);
1360 ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
1361
1362 r = mmu_topup_memory_caches(vcpu);
1363 if (r)
1364 return r;
1365
1366 down_read(&current->mm->mmap_sem);
1367 if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
1368 gfn &= ~(KVM_PAGES_PER_HPAGE-1);
1369 largepage = 1;
1370 }
1371 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1372 up_read(&current->mm->mmap_sem);
1373 if (is_error_pfn(pfn)) {
1374 kvm_release_pfn_clean(pfn);
1375 return 1;
1376 }
1377 spin_lock(&vcpu->kvm->mmu_lock);
1378 kvm_mmu_free_some_pages(vcpu);
1379 r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
1380 largepage, gfn, pfn, TDP_ROOT_LEVEL);
1381 spin_unlock(&vcpu->kvm->mmu_lock);
1382
1383 return r;
1384}
1385
1163static void nonpaging_free(struct kvm_vcpu *vcpu) 1386static void nonpaging_free(struct kvm_vcpu *vcpu)
1164{ 1387{
1165 mmu_free_roots(vcpu); 1388 mmu_free_roots(vcpu);
@@ -1188,7 +1411,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
1188 1411
1189static void paging_new_cr3(struct kvm_vcpu *vcpu) 1412static void paging_new_cr3(struct kvm_vcpu *vcpu)
1190{ 1413{
1191 pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->arch.cr3); 1414 pgprintk("%s: cr3 %lx\n", __func__, vcpu->arch.cr3);
1192 mmu_free_roots(vcpu); 1415 mmu_free_roots(vcpu);
1193} 1416}
1194 1417
@@ -1253,7 +1476,35 @@ static int paging32E_init_context(struct kvm_vcpu *vcpu)
1253 return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); 1476 return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
1254} 1477}
1255 1478
1256static int init_kvm_mmu(struct kvm_vcpu *vcpu) 1479static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
1480{
1481 struct kvm_mmu *context = &vcpu->arch.mmu;
1482
1483 context->new_cr3 = nonpaging_new_cr3;
1484 context->page_fault = tdp_page_fault;
1485 context->free = nonpaging_free;
1486 context->prefetch_page = nonpaging_prefetch_page;
1487 context->shadow_root_level = TDP_ROOT_LEVEL;
1488 context->root_hpa = INVALID_PAGE;
1489
1490 if (!is_paging(vcpu)) {
1491 context->gva_to_gpa = nonpaging_gva_to_gpa;
1492 context->root_level = 0;
1493 } else if (is_long_mode(vcpu)) {
1494 context->gva_to_gpa = paging64_gva_to_gpa;
1495 context->root_level = PT64_ROOT_LEVEL;
1496 } else if (is_pae(vcpu)) {
1497 context->gva_to_gpa = paging64_gva_to_gpa;
1498 context->root_level = PT32E_ROOT_LEVEL;
1499 } else {
1500 context->gva_to_gpa = paging32_gva_to_gpa;
1501 context->root_level = PT32_ROOT_LEVEL;
1502 }
1503
1504 return 0;
1505}
1506
1507static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
1257{ 1508{
1258 ASSERT(vcpu); 1509 ASSERT(vcpu);
1259 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); 1510 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -1268,6 +1519,16 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu)
1268 return paging32_init_context(vcpu); 1519 return paging32_init_context(vcpu);
1269} 1520}
1270 1521
1522static int init_kvm_mmu(struct kvm_vcpu *vcpu)
1523{
1524 vcpu->arch.update_pte.pfn = bad_pfn;
1525
1526 if (tdp_enabled)
1527 return init_kvm_tdp_mmu(vcpu);
1528 else
1529 return init_kvm_softmmu(vcpu);
1530}
1531
1271static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) 1532static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
1272{ 1533{
1273 ASSERT(vcpu); 1534 ASSERT(vcpu);
@@ -1316,7 +1577,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
1316 1577
1317 pte = *spte; 1578 pte = *spte;
1318 if (is_shadow_present_pte(pte)) { 1579 if (is_shadow_present_pte(pte)) {
1319 if (sp->role.level == PT_PAGE_TABLE_LEVEL) 1580 if (sp->role.level == PT_PAGE_TABLE_LEVEL ||
1581 is_large_pte(pte))
1320 rmap_remove(vcpu->kvm, spte); 1582 rmap_remove(vcpu->kvm, spte);
1321 else { 1583 else {
1322 child = page_header(pte & PT64_BASE_ADDR_MASK); 1584 child = page_header(pte & PT64_BASE_ADDR_MASK);
@@ -1324,24 +1586,26 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
1324 } 1586 }
1325 } 1587 }
1326 set_shadow_pte(spte, shadow_trap_nonpresent_pte); 1588 set_shadow_pte(spte, shadow_trap_nonpresent_pte);
1589 if (is_large_pte(pte))
1590 --vcpu->kvm->stat.lpages;
1327} 1591}
1328 1592
1329static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, 1593static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
1330 struct kvm_mmu_page *sp, 1594 struct kvm_mmu_page *sp,
1331 u64 *spte, 1595 u64 *spte,
1332 const void *new, int bytes, 1596 const void *new)
1333 int offset_in_pte)
1334{ 1597{
1335 if (sp->role.level != PT_PAGE_TABLE_LEVEL) { 1598 if ((sp->role.level != PT_PAGE_TABLE_LEVEL)
1599 && !vcpu->arch.update_pte.largepage) {
1336 ++vcpu->kvm->stat.mmu_pde_zapped; 1600 ++vcpu->kvm->stat.mmu_pde_zapped;
1337 return; 1601 return;
1338 } 1602 }
1339 1603
1340 ++vcpu->kvm->stat.mmu_pte_updated; 1604 ++vcpu->kvm->stat.mmu_pte_updated;
1341 if (sp->role.glevels == PT32_ROOT_LEVEL) 1605 if (sp->role.glevels == PT32_ROOT_LEVEL)
1342 paging32_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte); 1606 paging32_update_pte(vcpu, sp, spte, new);
1343 else 1607 else
1344 paging64_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte); 1608 paging64_update_pte(vcpu, sp, spte, new);
1345} 1609}
1346 1610
1347static bool need_remote_flush(u64 old, u64 new) 1611static bool need_remote_flush(u64 old, u64 new)
@@ -1378,7 +1642,9 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1378 gfn_t gfn; 1642 gfn_t gfn;
1379 int r; 1643 int r;
1380 u64 gpte = 0; 1644 u64 gpte = 0;
1381 struct page *page; 1645 pfn_t pfn;
1646
1647 vcpu->arch.update_pte.largepage = 0;
1382 1648
1383 if (bytes != 4 && bytes != 8) 1649 if (bytes != 4 && bytes != 8)
1384 return; 1650 return;
@@ -1408,11 +1674,19 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1408 gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; 1674 gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
1409 1675
1410 down_read(&current->mm->mmap_sem); 1676 down_read(&current->mm->mmap_sem);
1411 page = gfn_to_page(vcpu->kvm, gfn); 1677 if (is_large_pte(gpte) && is_largepage_backed(vcpu, gfn)) {
1678 gfn &= ~(KVM_PAGES_PER_HPAGE-1);
1679 vcpu->arch.update_pte.largepage = 1;
1680 }
1681 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1412 up_read(&current->mm->mmap_sem); 1682 up_read(&current->mm->mmap_sem);
1413 1683
1684 if (is_error_pfn(pfn)) {
1685 kvm_release_pfn_clean(pfn);
1686 return;
1687 }
1414 vcpu->arch.update_pte.gfn = gfn; 1688 vcpu->arch.update_pte.gfn = gfn;
1415 vcpu->arch.update_pte.page = page; 1689 vcpu->arch.update_pte.pfn = pfn;
1416} 1690}
1417 1691
1418void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 1692void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
@@ -1423,7 +1697,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1423 struct hlist_node *node, *n; 1697 struct hlist_node *node, *n;
1424 struct hlist_head *bucket; 1698 struct hlist_head *bucket;
1425 unsigned index; 1699 unsigned index;
1426 u64 entry; 1700 u64 entry, gentry;
1427 u64 *spte; 1701 u64 *spte;
1428 unsigned offset = offset_in_page(gpa); 1702 unsigned offset = offset_in_page(gpa);
1429 unsigned pte_size; 1703 unsigned pte_size;
@@ -1433,8 +1707,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1433 int level; 1707 int level;
1434 int flooded = 0; 1708 int flooded = 0;
1435 int npte; 1709 int npte;
1710 int r;
1436 1711
1437 pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); 1712 pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
1438 mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); 1713 mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes);
1439 spin_lock(&vcpu->kvm->mmu_lock); 1714 spin_lock(&vcpu->kvm->mmu_lock);
1440 kvm_mmu_free_some_pages(vcpu); 1715 kvm_mmu_free_some_pages(vcpu);
@@ -1450,7 +1725,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1450 vcpu->arch.last_pt_write_count = 1; 1725 vcpu->arch.last_pt_write_count = 1;
1451 vcpu->arch.last_pte_updated = NULL; 1726 vcpu->arch.last_pte_updated = NULL;
1452 } 1727 }
1453 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; 1728 index = kvm_page_table_hashfn(gfn);
1454 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 1729 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
1455 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { 1730 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) {
1456 if (sp->gfn != gfn || sp->role.metaphysical) 1731 if (sp->gfn != gfn || sp->role.metaphysical)
@@ -1496,20 +1771,29 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1496 continue; 1771 continue;
1497 } 1772 }
1498 spte = &sp->spt[page_offset / sizeof(*spte)]; 1773 spte = &sp->spt[page_offset / sizeof(*spte)];
1774 if ((gpa & (pte_size - 1)) || (bytes < pte_size)) {
1775 gentry = 0;
1776 r = kvm_read_guest_atomic(vcpu->kvm,
1777 gpa & ~(u64)(pte_size - 1),
1778 &gentry, pte_size);
1779 new = (const void *)&gentry;
1780 if (r < 0)
1781 new = NULL;
1782 }
1499 while (npte--) { 1783 while (npte--) {
1500 entry = *spte; 1784 entry = *spte;
1501 mmu_pte_write_zap_pte(vcpu, sp, spte); 1785 mmu_pte_write_zap_pte(vcpu, sp, spte);
1502 mmu_pte_write_new_pte(vcpu, sp, spte, new, bytes, 1786 if (new)
1503 page_offset & (pte_size - 1)); 1787 mmu_pte_write_new_pte(vcpu, sp, spte, new);
1504 mmu_pte_write_flush_tlb(vcpu, entry, *spte); 1788 mmu_pte_write_flush_tlb(vcpu, entry, *spte);
1505 ++spte; 1789 ++spte;
1506 } 1790 }
1507 } 1791 }
1508 kvm_mmu_audit(vcpu, "post pte write"); 1792 kvm_mmu_audit(vcpu, "post pte write");
1509 spin_unlock(&vcpu->kvm->mmu_lock); 1793 spin_unlock(&vcpu->kvm->mmu_lock);
1510 if (vcpu->arch.update_pte.page) { 1794 if (!is_error_pfn(vcpu->arch.update_pte.pfn)) {
1511 kvm_release_page_clean(vcpu->arch.update_pte.page); 1795 kvm_release_pfn_clean(vcpu->arch.update_pte.pfn);
1512 vcpu->arch.update_pte.page = NULL; 1796 vcpu->arch.update_pte.pfn = bad_pfn;
1513 } 1797 }
1514} 1798}
1515 1799
@@ -1518,9 +1802,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
1518 gpa_t gpa; 1802 gpa_t gpa;
1519 int r; 1803 int r;
1520 1804
1521 down_read(&vcpu->kvm->slots_lock);
1522 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); 1805 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
1523 up_read(&vcpu->kvm->slots_lock);
1524 1806
1525 spin_lock(&vcpu->kvm->mmu_lock); 1807 spin_lock(&vcpu->kvm->mmu_lock);
1526 r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); 1808 r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
@@ -1577,6 +1859,12 @@ out:
1577} 1859}
1578EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); 1860EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
1579 1861
1862void kvm_enable_tdp(void)
1863{
1864 tdp_enabled = true;
1865}
1866EXPORT_SYMBOL_GPL(kvm_enable_tdp);
1867
1580static void free_mmu_pages(struct kvm_vcpu *vcpu) 1868static void free_mmu_pages(struct kvm_vcpu *vcpu)
1581{ 1869{
1582 struct kvm_mmu_page *sp; 1870 struct kvm_mmu_page *sp;
@@ -1677,7 +1965,53 @@ void kvm_mmu_zap_all(struct kvm *kvm)
1677 kvm_flush_remote_tlbs(kvm); 1965 kvm_flush_remote_tlbs(kvm);
1678} 1966}
1679 1967
1680void kvm_mmu_module_exit(void) 1968void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm)
1969{
1970 struct kvm_mmu_page *page;
1971
1972 page = container_of(kvm->arch.active_mmu_pages.prev,
1973 struct kvm_mmu_page, link);
1974 kvm_mmu_zap_page(kvm, page);
1975}
1976
1977static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
1978{
1979 struct kvm *kvm;
1980 struct kvm *kvm_freed = NULL;
1981 int cache_count = 0;
1982
1983 spin_lock(&kvm_lock);
1984
1985 list_for_each_entry(kvm, &vm_list, vm_list) {
1986 int npages;
1987
1988 spin_lock(&kvm->mmu_lock);
1989 npages = kvm->arch.n_alloc_mmu_pages -
1990 kvm->arch.n_free_mmu_pages;
1991 cache_count += npages;
1992 if (!kvm_freed && nr_to_scan > 0 && npages > 0) {
1993 kvm_mmu_remove_one_alloc_mmu_page(kvm);
1994 cache_count--;
1995 kvm_freed = kvm;
1996 }
1997 nr_to_scan--;
1998
1999 spin_unlock(&kvm->mmu_lock);
2000 }
2001 if (kvm_freed)
2002 list_move_tail(&kvm_freed->vm_list, &vm_list);
2003
2004 spin_unlock(&kvm_lock);
2005
2006 return cache_count;
2007}
2008
2009static struct shrinker mmu_shrinker = {
2010 .shrink = mmu_shrink,
2011 .seeks = DEFAULT_SEEKS * 10,
2012};
2013
2014void mmu_destroy_caches(void)
1681{ 2015{
1682 if (pte_chain_cache) 2016 if (pte_chain_cache)
1683 kmem_cache_destroy(pte_chain_cache); 2017 kmem_cache_destroy(pte_chain_cache);
@@ -1687,6 +2021,12 @@ void kvm_mmu_module_exit(void)
1687 kmem_cache_destroy(mmu_page_header_cache); 2021 kmem_cache_destroy(mmu_page_header_cache);
1688} 2022}
1689 2023
2024void kvm_mmu_module_exit(void)
2025{
2026 mmu_destroy_caches();
2027 unregister_shrinker(&mmu_shrinker);
2028}
2029
1690int kvm_mmu_module_init(void) 2030int kvm_mmu_module_init(void)
1691{ 2031{
1692 pte_chain_cache = kmem_cache_create("kvm_pte_chain", 2032 pte_chain_cache = kmem_cache_create("kvm_pte_chain",
@@ -1706,10 +2046,12 @@ int kvm_mmu_module_init(void)
1706 if (!mmu_page_header_cache) 2046 if (!mmu_page_header_cache)
1707 goto nomem; 2047 goto nomem;
1708 2048
2049 register_shrinker(&mmu_shrinker);
2050
1709 return 0; 2051 return 0;
1710 2052
1711nomem: 2053nomem:
1712 kvm_mmu_module_exit(); 2054 mmu_destroy_caches();
1713 return -ENOMEM; 2055 return -ENOMEM;
1714} 2056}
1715 2057
@@ -1732,6 +2074,127 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
1732 return nr_mmu_pages; 2074 return nr_mmu_pages;
1733} 2075}
1734 2076
2077static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer *buffer,
2078 unsigned len)
2079{
2080 if (len > buffer->len)
2081 return NULL;
2082 return buffer->ptr;
2083}
2084
2085static void *pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer *buffer,
2086 unsigned len)
2087{
2088 void *ret;
2089
2090 ret = pv_mmu_peek_buffer(buffer, len);
2091 if (!ret)
2092 return ret;
2093 buffer->ptr += len;
2094 buffer->len -= len;
2095 buffer->processed += len;
2096 return ret;
2097}
2098
2099static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
2100 gpa_t addr, gpa_t value)
2101{
2102 int bytes = 8;
2103 int r;
2104
2105 if (!is_long_mode(vcpu) && !is_pae(vcpu))
2106 bytes = 4;
2107
2108 r = mmu_topup_memory_caches(vcpu);
2109 if (r)
2110 return r;
2111
2112 if (!emulator_write_phys(vcpu, addr, &value, bytes))
2113 return -EFAULT;
2114
2115 return 1;
2116}
2117
2118static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
2119{
2120 kvm_x86_ops->tlb_flush(vcpu);
2121 return 1;
2122}
2123
2124static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr)
2125{
2126 spin_lock(&vcpu->kvm->mmu_lock);
2127 mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT);
2128 spin_unlock(&vcpu->kvm->mmu_lock);
2129 return 1;
2130}
2131
2132static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu,
2133 struct kvm_pv_mmu_op_buffer *buffer)
2134{
2135 struct kvm_mmu_op_header *header;
2136
2137 header = pv_mmu_peek_buffer(buffer, sizeof *header);
2138 if (!header)
2139 return 0;
2140 switch (header->op) {
2141 case KVM_MMU_OP_WRITE_PTE: {
2142 struct kvm_mmu_op_write_pte *wpte;
2143
2144 wpte = pv_mmu_read_buffer(buffer, sizeof *wpte);
2145 if (!wpte)
2146 return 0;
2147 return kvm_pv_mmu_write(vcpu, wpte->pte_phys,
2148 wpte->pte_val);
2149 }
2150 case KVM_MMU_OP_FLUSH_TLB: {
2151 struct kvm_mmu_op_flush_tlb *ftlb;
2152
2153 ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb);
2154 if (!ftlb)
2155 return 0;
2156 return kvm_pv_mmu_flush_tlb(vcpu);
2157 }
2158 case KVM_MMU_OP_RELEASE_PT: {
2159 struct kvm_mmu_op_release_pt *rpt;
2160
2161 rpt = pv_mmu_read_buffer(buffer, sizeof *rpt);
2162 if (!rpt)
2163 return 0;
2164 return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys);
2165 }
2166 default: return 0;
2167 }
2168}
2169
2170int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
2171 gpa_t addr, unsigned long *ret)
2172{
2173 int r;
2174 struct kvm_pv_mmu_op_buffer buffer;
2175
2176 buffer.ptr = buffer.buf;
2177 buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf);
2178 buffer.processed = 0;
2179
2180 r = kvm_read_guest(vcpu->kvm, addr, buffer.buf, buffer.len);
2181 if (r)
2182 goto out;
2183
2184 while (buffer.len) {
2185 r = kvm_pv_mmu_op_one(vcpu, &buffer);
2186 if (r < 0)
2187 goto out;
2188 if (r == 0)
2189 break;
2190 }
2191
2192 r = 1;
2193out:
2194 *ret = buffer.processed;
2195 return r;
2196}
2197
1735#ifdef AUDIT 2198#ifdef AUDIT
1736 2199
1737static const char *audit_msg; 2200static const char *audit_msg;
@@ -1768,8 +2231,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
1768 audit_mappings_page(vcpu, ent, va, level - 1); 2231 audit_mappings_page(vcpu, ent, va, level - 1);
1769 } else { 2232 } else {
1770 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); 2233 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
1771 struct page *page = gpa_to_page(vcpu, gpa); 2234 hpa_t hpa = (hpa_t)gpa_to_pfn(vcpu, gpa) << PAGE_SHIFT;
1772 hpa_t hpa = page_to_phys(page);
1773 2235
1774 if (is_shadow_present_pte(ent) 2236 if (is_shadow_present_pte(ent)
1775 && (ent & PT64_BASE_ADDR_MASK) != hpa) 2237 && (ent & PT64_BASE_ADDR_MASK) != hpa)
@@ -1782,7 +2244,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
1782 && !is_error_hpa(hpa)) 2244 && !is_error_hpa(hpa))
1783 printk(KERN_ERR "audit: (%s) notrap shadow," 2245 printk(KERN_ERR "audit: (%s) notrap shadow,"
1784 " valid guest gva %lx\n", audit_msg, va); 2246 " valid guest gva %lx\n", audit_msg, va);
1785 kvm_release_page_clean(page); 2247 kvm_release_pfn_clean(pfn);
1786 2248
1787 } 2249 }
1788 } 2250 }
@@ -1867,7 +2329,7 @@ static void audit_rmap(struct kvm_vcpu *vcpu)
1867 2329
1868 if (n_rmap != n_actual) 2330 if (n_rmap != n_actual)
1869 printk(KERN_ERR "%s: (%s) rmap %d actual %d\n", 2331 printk(KERN_ERR "%s: (%s) rmap %d actual %d\n",
1870 __FUNCTION__, audit_msg, n_rmap, n_actual); 2332 __func__, audit_msg, n_rmap, n_actual);
1871} 2333}
1872 2334
1873static void audit_write_protection(struct kvm_vcpu *vcpu) 2335static void audit_write_protection(struct kvm_vcpu *vcpu)
@@ -1887,7 +2349,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
1887 if (*rmapp) 2349 if (*rmapp)
1888 printk(KERN_ERR "%s: (%s) shadow page has writable" 2350 printk(KERN_ERR "%s: (%s) shadow page has writable"
1889 " mappings: gfn %lx role %x\n", 2351 " mappings: gfn %lx role %x\n",
1890 __FUNCTION__, audit_msg, sp->gfn, 2352 __func__, audit_msg, sp->gfn,
1891 sp->role.word); 2353 sp->role.word);
1892 } 2354 }
1893} 2355}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 1fce19ec7a23..e64e9f56a65e 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -3,6 +3,12 @@
3 3
4#include <linux/kvm_host.h> 4#include <linux/kvm_host.h>
5 5
6#ifdef CONFIG_X86_64
7#define TDP_ROOT_LEVEL PT64_ROOT_LEVEL
8#else
9#define TDP_ROOT_LEVEL PT32E_ROOT_LEVEL
10#endif
11
6static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) 12static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
7{ 13{
8 if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES)) 14 if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index ecc0856268c4..156fe10288ae 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -130,7 +130,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
130 unsigned index, pt_access, pte_access; 130 unsigned index, pt_access, pte_access;
131 gpa_t pte_gpa; 131 gpa_t pte_gpa;
132 132
133 pgprintk("%s: addr %lx\n", __FUNCTION__, addr); 133 pgprintk("%s: addr %lx\n", __func__, addr);
134walk: 134walk:
135 walker->level = vcpu->arch.mmu.root_level; 135 walker->level = vcpu->arch.mmu.root_level;
136 pte = vcpu->arch.cr3; 136 pte = vcpu->arch.cr3;
@@ -155,7 +155,7 @@ walk:
155 pte_gpa += index * sizeof(pt_element_t); 155 pte_gpa += index * sizeof(pt_element_t);
156 walker->table_gfn[walker->level - 1] = table_gfn; 156 walker->table_gfn[walker->level - 1] = table_gfn;
157 walker->pte_gpa[walker->level - 1] = pte_gpa; 157 walker->pte_gpa[walker->level - 1] = pte_gpa;
158 pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, 158 pgprintk("%s: table_gfn[%d] %lx\n", __func__,
159 walker->level - 1, table_gfn); 159 walker->level - 1, table_gfn);
160 160
161 kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); 161 kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
@@ -222,7 +222,7 @@ walk:
222 walker->pt_access = pt_access; 222 walker->pt_access = pt_access;
223 walker->pte_access = pte_access; 223 walker->pte_access = pte_access;
224 pgprintk("%s: pte %llx pte_access %x pt_access %x\n", 224 pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
225 __FUNCTION__, (u64)pte, pt_access, pte_access); 225 __func__, (u64)pte, pt_access, pte_access);
226 return 1; 226 return 1;
227 227
228not_present: 228not_present:
@@ -243,31 +243,30 @@ err:
243} 243}
244 244
245static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, 245static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
246 u64 *spte, const void *pte, int bytes, 246 u64 *spte, const void *pte)
247 int offset_in_pte)
248{ 247{
249 pt_element_t gpte; 248 pt_element_t gpte;
250 unsigned pte_access; 249 unsigned pte_access;
251 struct page *npage; 250 pfn_t pfn;
251 int largepage = vcpu->arch.update_pte.largepage;
252 252
253 gpte = *(const pt_element_t *)pte; 253 gpte = *(const pt_element_t *)pte;
254 if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { 254 if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
255 if (!offset_in_pte && !is_present_pte(gpte)) 255 if (!is_present_pte(gpte))
256 set_shadow_pte(spte, shadow_notrap_nonpresent_pte); 256 set_shadow_pte(spte, shadow_notrap_nonpresent_pte);
257 return; 257 return;
258 } 258 }
259 if (bytes < sizeof(pt_element_t)) 259 pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
260 return;
261 pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
262 pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte); 260 pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
263 if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) 261 if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
264 return; 262 return;
265 npage = vcpu->arch.update_pte.page; 263 pfn = vcpu->arch.update_pte.pfn;
266 if (!npage) 264 if (is_error_pfn(pfn))
267 return; 265 return;
268 get_page(npage); 266 kvm_get_pfn(pfn);
269 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, 267 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
270 gpte & PT_DIRTY_MASK, NULL, gpte_to_gfn(gpte), npage); 268 gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
269 pfn, true);
271} 270}
272 271
273/* 272/*
@@ -275,8 +274,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
275 */ 274 */
276static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, 275static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
277 struct guest_walker *walker, 276 struct guest_walker *walker,
278 int user_fault, int write_fault, int *ptwrite, 277 int user_fault, int write_fault, int largepage,
279 struct page *page) 278 int *ptwrite, pfn_t pfn)
280{ 279{
281 hpa_t shadow_addr; 280 hpa_t shadow_addr;
282 int level; 281 int level;
@@ -304,11 +303,19 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
304 shadow_ent = ((u64 *)__va(shadow_addr)) + index; 303 shadow_ent = ((u64 *)__va(shadow_addr)) + index;
305 if (level == PT_PAGE_TABLE_LEVEL) 304 if (level == PT_PAGE_TABLE_LEVEL)
306 break; 305 break;
307 if (is_shadow_present_pte(*shadow_ent)) { 306
307 if (largepage && level == PT_DIRECTORY_LEVEL)
308 break;
309
310 if (is_shadow_present_pte(*shadow_ent)
311 && !is_large_pte(*shadow_ent)) {
308 shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; 312 shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK;
309 continue; 313 continue;
310 } 314 }
311 315
316 if (is_large_pte(*shadow_ent))
317 rmap_remove(vcpu->kvm, shadow_ent);
318
312 if (level - 1 == PT_PAGE_TABLE_LEVEL 319 if (level - 1 == PT_PAGE_TABLE_LEVEL
313 && walker->level == PT_DIRECTORY_LEVEL) { 320 && walker->level == PT_DIRECTORY_LEVEL) {
314 metaphysical = 1; 321 metaphysical = 1;
@@ -329,7 +336,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
329 walker->pte_gpa[level - 2], 336 walker->pte_gpa[level - 2],
330 &curr_pte, sizeof(curr_pte)); 337 &curr_pte, sizeof(curr_pte));
331 if (r || curr_pte != walker->ptes[level - 2]) { 338 if (r || curr_pte != walker->ptes[level - 2]) {
332 kvm_release_page_clean(page); 339 kvm_release_pfn_clean(pfn);
333 return NULL; 340 return NULL;
334 } 341 }
335 } 342 }
@@ -342,7 +349,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
342 mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, 349 mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
343 user_fault, write_fault, 350 user_fault, write_fault,
344 walker->ptes[walker->level-1] & PT_DIRTY_MASK, 351 walker->ptes[walker->level-1] & PT_DIRTY_MASK,
345 ptwrite, walker->gfn, page); 352 ptwrite, largepage, walker->gfn, pfn, false);
346 353
347 return shadow_ent; 354 return shadow_ent;
348} 355}
@@ -371,16 +378,16 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
371 u64 *shadow_pte; 378 u64 *shadow_pte;
372 int write_pt = 0; 379 int write_pt = 0;
373 int r; 380 int r;
374 struct page *page; 381 pfn_t pfn;
382 int largepage = 0;
375 383
376 pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code); 384 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
377 kvm_mmu_audit(vcpu, "pre page fault"); 385 kvm_mmu_audit(vcpu, "pre page fault");
378 386
379 r = mmu_topup_memory_caches(vcpu); 387 r = mmu_topup_memory_caches(vcpu);
380 if (r) 388 if (r)
381 return r; 389 return r;
382 390
383 down_read(&vcpu->kvm->slots_lock);
384 /* 391 /*
385 * Look up the shadow pte for the faulting address. 392 * Look up the shadow pte for the faulting address.
386 */ 393 */
@@ -391,40 +398,45 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
391 * The page is not mapped by the guest. Let the guest handle it. 398 * The page is not mapped by the guest. Let the guest handle it.
392 */ 399 */
393 if (!r) { 400 if (!r) {
394 pgprintk("%s: guest page fault\n", __FUNCTION__); 401 pgprintk("%s: guest page fault\n", __func__);
395 inject_page_fault(vcpu, addr, walker.error_code); 402 inject_page_fault(vcpu, addr, walker.error_code);
396 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ 403 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
397 up_read(&vcpu->kvm->slots_lock);
398 return 0; 404 return 0;
399 } 405 }
400 406
401 down_read(&current->mm->mmap_sem); 407 down_read(&current->mm->mmap_sem);
402 page = gfn_to_page(vcpu->kvm, walker.gfn); 408 if (walker.level == PT_DIRECTORY_LEVEL) {
409 gfn_t large_gfn;
410 large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1);
411 if (is_largepage_backed(vcpu, large_gfn)) {
412 walker.gfn = large_gfn;
413 largepage = 1;
414 }
415 }
416 pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
403 up_read(&current->mm->mmap_sem); 417 up_read(&current->mm->mmap_sem);
404 418
419 /* mmio */
420 if (is_error_pfn(pfn)) {
421 pgprintk("gfn %x is mmio\n", walker.gfn);
422 kvm_release_pfn_clean(pfn);
423 return 1;
424 }
425
405 spin_lock(&vcpu->kvm->mmu_lock); 426 spin_lock(&vcpu->kvm->mmu_lock);
406 kvm_mmu_free_some_pages(vcpu); 427 kvm_mmu_free_some_pages(vcpu);
407 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, 428 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
408 &write_pt, page); 429 largepage, &write_pt, pfn);
409 pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, 430
431 pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
410 shadow_pte, *shadow_pte, write_pt); 432 shadow_pte, *shadow_pte, write_pt);
411 433
412 if (!write_pt) 434 if (!write_pt)
413 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ 435 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
414 436
415 /*
416 * mmio: emulate if accessible, otherwise its a guest fault.
417 */
418 if (shadow_pte && is_io_pte(*shadow_pte)) {
419 spin_unlock(&vcpu->kvm->mmu_lock);
420 up_read(&vcpu->kvm->slots_lock);
421 return 1;
422 }
423
424 ++vcpu->stat.pf_fixed; 437 ++vcpu->stat.pf_fixed;
425 kvm_mmu_audit(vcpu, "post page fault (fixed)"); 438 kvm_mmu_audit(vcpu, "post page fault (fixed)");
426 spin_unlock(&vcpu->kvm->mmu_lock); 439 spin_unlock(&vcpu->kvm->mmu_lock);
427 up_read(&vcpu->kvm->slots_lock);
428 440
429 return write_pt; 441 return write_pt;
430} 442}
diff --git a/arch/x86/kvm/segment_descriptor.h b/arch/x86/kvm/segment_descriptor.h
deleted file mode 100644
index 56fc4c873389..000000000000
--- a/arch/x86/kvm/segment_descriptor.h
+++ /dev/null
@@ -1,29 +0,0 @@
1#ifndef __SEGMENT_DESCRIPTOR_H
2#define __SEGMENT_DESCRIPTOR_H
3
4struct segment_descriptor {
5 u16 limit_low;
6 u16 base_low;
7 u8 base_mid;
8 u8 type : 4;
9 u8 system : 1;
10 u8 dpl : 2;
11 u8 present : 1;
12 u8 limit_high : 4;
13 u8 avl : 1;
14 u8 long_mode : 1;
15 u8 default_op : 1;
16 u8 granularity : 1;
17 u8 base_high;
18} __attribute__((packed));
19
20#ifdef CONFIG_X86_64
21/* LDT or TSS descriptor in the GDT. 16 bytes. */
22struct segment_descriptor_64 {
23 struct segment_descriptor s;
24 u32 base_higher;
25 u32 pad_zero;
26};
27
28#endif
29#endif
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1a582f1090e8..89e0be2c10d0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -47,6 +47,18 @@ MODULE_LICENSE("GPL");
47#define SVM_FEATURE_LBRV (1 << 1) 47#define SVM_FEATURE_LBRV (1 << 1)
48#define SVM_DEATURE_SVML (1 << 2) 48#define SVM_DEATURE_SVML (1 << 2)
49 49
50#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
51
52/* enable NPT for AMD64 and X86 with PAE */
53#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
54static bool npt_enabled = true;
55#else
56static bool npt_enabled = false;
57#endif
58static int npt = 1;
59
60module_param(npt, int, S_IRUGO);
61
50static void kvm_reput_irq(struct vcpu_svm *svm); 62static void kvm_reput_irq(struct vcpu_svm *svm);
51 63
52static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) 64static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
@@ -54,8 +66,7 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
54 return container_of(vcpu, struct vcpu_svm, vcpu); 66 return container_of(vcpu, struct vcpu_svm, vcpu);
55} 67}
56 68
57unsigned long iopm_base; 69static unsigned long iopm_base;
58unsigned long msrpm_base;
59 70
60struct kvm_ldttss_desc { 71struct kvm_ldttss_desc {
61 u16 limit0; 72 u16 limit0;
@@ -182,7 +193,7 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
182 193
183static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) 194static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
184{ 195{
185 if (!(efer & EFER_LMA)) 196 if (!npt_enabled && !(efer & EFER_LMA))
186 efer &= ~EFER_LME; 197 efer &= ~EFER_LME;
187 198
188 to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK; 199 to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
@@ -219,12 +230,12 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
219 struct vcpu_svm *svm = to_svm(vcpu); 230 struct vcpu_svm *svm = to_svm(vcpu);
220 231
221 if (!svm->next_rip) { 232 if (!svm->next_rip) {
222 printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__); 233 printk(KERN_DEBUG "%s: NOP\n", __func__);
223 return; 234 return;
224 } 235 }
225 if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE) 236 if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE)
226 printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n", 237 printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n",
227 __FUNCTION__, 238 __func__,
228 svm->vmcb->save.rip, 239 svm->vmcb->save.rip,
229 svm->next_rip); 240 svm->next_rip);
230 241
@@ -279,11 +290,7 @@ static void svm_hardware_enable(void *garbage)
279 290
280 struct svm_cpu_data *svm_data; 291 struct svm_cpu_data *svm_data;
281 uint64_t efer; 292 uint64_t efer;
282#ifdef CONFIG_X86_64
283 struct desc_ptr gdt_descr;
284#else
285 struct desc_ptr gdt_descr; 293 struct desc_ptr gdt_descr;
286#endif
287 struct desc_struct *gdt; 294 struct desc_struct *gdt;
288 int me = raw_smp_processor_id(); 295 int me = raw_smp_processor_id();
289 296
@@ -302,7 +309,6 @@ static void svm_hardware_enable(void *garbage)
302 svm_data->asid_generation = 1; 309 svm_data->asid_generation = 1;
303 svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; 310 svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
304 svm_data->next_asid = svm_data->max_asid + 1; 311 svm_data->next_asid = svm_data->max_asid + 1;
305 svm_features = cpuid_edx(SVM_CPUID_FUNC);
306 312
307 asm volatile ("sgdt %0" : "=m"(gdt_descr)); 313 asm volatile ("sgdt %0" : "=m"(gdt_descr));
308 gdt = (struct desc_struct *)gdt_descr.address; 314 gdt = (struct desc_struct *)gdt_descr.address;
@@ -361,12 +367,51 @@ static void set_msr_interception(u32 *msrpm, unsigned msr,
361 BUG(); 367 BUG();
362} 368}
363 369
370static void svm_vcpu_init_msrpm(u32 *msrpm)
371{
372 memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
373
374#ifdef CONFIG_X86_64
375 set_msr_interception(msrpm, MSR_GS_BASE, 1, 1);
376 set_msr_interception(msrpm, MSR_FS_BASE, 1, 1);
377 set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1);
378 set_msr_interception(msrpm, MSR_LSTAR, 1, 1);
379 set_msr_interception(msrpm, MSR_CSTAR, 1, 1);
380 set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1);
381#endif
382 set_msr_interception(msrpm, MSR_K6_STAR, 1, 1);
383 set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1);
384 set_msr_interception(msrpm, MSR_IA32_SYSENTER_ESP, 1, 1);
385 set_msr_interception(msrpm, MSR_IA32_SYSENTER_EIP, 1, 1);
386}
387
388static void svm_enable_lbrv(struct vcpu_svm *svm)
389{
390 u32 *msrpm = svm->msrpm;
391
392 svm->vmcb->control.lbr_ctl = 1;
393 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
394 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
395 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
396 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
397}
398
399static void svm_disable_lbrv(struct vcpu_svm *svm)
400{
401 u32 *msrpm = svm->msrpm;
402
403 svm->vmcb->control.lbr_ctl = 0;
404 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
405 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
406 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
407 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
408}
409
364static __init int svm_hardware_setup(void) 410static __init int svm_hardware_setup(void)
365{ 411{
366 int cpu; 412 int cpu;
367 struct page *iopm_pages; 413 struct page *iopm_pages;
368 struct page *msrpm_pages; 414 void *iopm_va;
369 void *iopm_va, *msrpm_va;
370 int r; 415 int r;
371 416
372 iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER); 417 iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
@@ -379,41 +424,33 @@ static __init int svm_hardware_setup(void)
379 clear_bit(0x80, iopm_va); /* allow direct access to PC debug port */ 424 clear_bit(0x80, iopm_va); /* allow direct access to PC debug port */
380 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; 425 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
381 426
427 if (boot_cpu_has(X86_FEATURE_NX))
428 kvm_enable_efer_bits(EFER_NX);
382 429
383 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 430 for_each_online_cpu(cpu) {
431 r = svm_cpu_init(cpu);
432 if (r)
433 goto err;
434 }
384 435
385 r = -ENOMEM; 436 svm_features = cpuid_edx(SVM_CPUID_FUNC);
386 if (!msrpm_pages)
387 goto err_1;
388 437
389 msrpm_va = page_address(msrpm_pages); 438 if (!svm_has(SVM_FEATURE_NPT))
390 memset(msrpm_va, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); 439 npt_enabled = false;
391 msrpm_base = page_to_pfn(msrpm_pages) << PAGE_SHIFT;
392 440
393#ifdef CONFIG_X86_64 441 if (npt_enabled && !npt) {
394 set_msr_interception(msrpm_va, MSR_GS_BASE, 1, 1); 442 printk(KERN_INFO "kvm: Nested Paging disabled\n");
395 set_msr_interception(msrpm_va, MSR_FS_BASE, 1, 1); 443 npt_enabled = false;
396 set_msr_interception(msrpm_va, MSR_KERNEL_GS_BASE, 1, 1); 444 }
397 set_msr_interception(msrpm_va, MSR_LSTAR, 1, 1);
398 set_msr_interception(msrpm_va, MSR_CSTAR, 1, 1);
399 set_msr_interception(msrpm_va, MSR_SYSCALL_MASK, 1, 1);
400#endif
401 set_msr_interception(msrpm_va, MSR_K6_STAR, 1, 1);
402 set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_CS, 1, 1);
403 set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_ESP, 1, 1);
404 set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_EIP, 1, 1);
405 445
406 for_each_online_cpu(cpu) { 446 if (npt_enabled) {
407 r = svm_cpu_init(cpu); 447 printk(KERN_INFO "kvm: Nested Paging enabled\n");
408 if (r) 448 kvm_enable_tdp();
409 goto err_2;
410 } 449 }
450
411 return 0; 451 return 0;
412 452
413err_2: 453err:
414 __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
415 msrpm_base = 0;
416err_1:
417 __free_pages(iopm_pages, IOPM_ALLOC_ORDER); 454 __free_pages(iopm_pages, IOPM_ALLOC_ORDER);
418 iopm_base = 0; 455 iopm_base = 0;
419 return r; 456 return r;
@@ -421,9 +458,8 @@ err_1:
421 458
422static __exit void svm_hardware_unsetup(void) 459static __exit void svm_hardware_unsetup(void)
423{ 460{
424 __free_pages(pfn_to_page(msrpm_base >> PAGE_SHIFT), MSRPM_ALLOC_ORDER);
425 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); 461 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
426 iopm_base = msrpm_base = 0; 462 iopm_base = 0;
427} 463}
428 464
429static void init_seg(struct vmcb_seg *seg) 465static void init_seg(struct vmcb_seg *seg)
@@ -443,15 +479,14 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
443 seg->base = 0; 479 seg->base = 0;
444} 480}
445 481
446static void init_vmcb(struct vmcb *vmcb) 482static void init_vmcb(struct vcpu_svm *svm)
447{ 483{
448 struct vmcb_control_area *control = &vmcb->control; 484 struct vmcb_control_area *control = &svm->vmcb->control;
449 struct vmcb_save_area *save = &vmcb->save; 485 struct vmcb_save_area *save = &svm->vmcb->save;
450 486
451 control->intercept_cr_read = INTERCEPT_CR0_MASK | 487 control->intercept_cr_read = INTERCEPT_CR0_MASK |
452 INTERCEPT_CR3_MASK | 488 INTERCEPT_CR3_MASK |
453 INTERCEPT_CR4_MASK | 489 INTERCEPT_CR4_MASK;
454 INTERCEPT_CR8_MASK;
455 490
456 control->intercept_cr_write = INTERCEPT_CR0_MASK | 491 control->intercept_cr_write = INTERCEPT_CR0_MASK |
457 INTERCEPT_CR3_MASK | 492 INTERCEPT_CR3_MASK |
@@ -471,23 +506,13 @@ static void init_vmcb(struct vmcb *vmcb)
471 INTERCEPT_DR7_MASK; 506 INTERCEPT_DR7_MASK;
472 507
473 control->intercept_exceptions = (1 << PF_VECTOR) | 508 control->intercept_exceptions = (1 << PF_VECTOR) |
474 (1 << UD_VECTOR); 509 (1 << UD_VECTOR) |
510 (1 << MC_VECTOR);
475 511
476 512
477 control->intercept = (1ULL << INTERCEPT_INTR) | 513 control->intercept = (1ULL << INTERCEPT_INTR) |
478 (1ULL << INTERCEPT_NMI) | 514 (1ULL << INTERCEPT_NMI) |
479 (1ULL << INTERCEPT_SMI) | 515 (1ULL << INTERCEPT_SMI) |
480 /*
481 * selective cr0 intercept bug?
482 * 0: 0f 22 d8 mov %eax,%cr3
483 * 3: 0f 20 c0 mov %cr0,%eax
484 * 6: 0d 00 00 00 80 or $0x80000000,%eax
485 * b: 0f 22 c0 mov %eax,%cr0
486 * set cr3 ->interception
487 * get cr0 ->interception
488 * set cr0 -> no interception
489 */
490 /* (1ULL << INTERCEPT_SELECTIVE_CR0) | */
491 (1ULL << INTERCEPT_CPUID) | 516 (1ULL << INTERCEPT_CPUID) |
492 (1ULL << INTERCEPT_INVD) | 517 (1ULL << INTERCEPT_INVD) |
493 (1ULL << INTERCEPT_HLT) | 518 (1ULL << INTERCEPT_HLT) |
@@ -508,7 +533,7 @@ static void init_vmcb(struct vmcb *vmcb)
508 (1ULL << INTERCEPT_MWAIT); 533 (1ULL << INTERCEPT_MWAIT);
509 534
510 control->iopm_base_pa = iopm_base; 535 control->iopm_base_pa = iopm_base;
511 control->msrpm_base_pa = msrpm_base; 536 control->msrpm_base_pa = __pa(svm->msrpm);
512 control->tsc_offset = 0; 537 control->tsc_offset = 0;
513 control->int_ctl = V_INTR_MASKING_MASK; 538 control->int_ctl = V_INTR_MASKING_MASK;
514 539
@@ -550,13 +575,30 @@ static void init_vmcb(struct vmcb *vmcb)
550 save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP; 575 save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP;
551 save->cr4 = X86_CR4_PAE; 576 save->cr4 = X86_CR4_PAE;
552 /* rdx = ?? */ 577 /* rdx = ?? */
578
579 if (npt_enabled) {
580 /* Setup VMCB for Nested Paging */
581 control->nested_ctl = 1;
582 control->intercept &= ~(1ULL << INTERCEPT_TASK_SWITCH);
583 control->intercept_exceptions &= ~(1 << PF_VECTOR);
584 control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK|
585 INTERCEPT_CR3_MASK);
586 control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
587 INTERCEPT_CR3_MASK);
588 save->g_pat = 0x0007040600070406ULL;
589 /* enable caching because the QEMU Bios doesn't enable it */
590 save->cr0 = X86_CR0_ET;
591 save->cr3 = 0;
592 save->cr4 = 0;
593 }
594 force_new_asid(&svm->vcpu);
553} 595}
554 596
555static int svm_vcpu_reset(struct kvm_vcpu *vcpu) 597static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
556{ 598{
557 struct vcpu_svm *svm = to_svm(vcpu); 599 struct vcpu_svm *svm = to_svm(vcpu);
558 600
559 init_vmcb(svm->vmcb); 601 init_vmcb(svm);
560 602
561 if (vcpu->vcpu_id != 0) { 603 if (vcpu->vcpu_id != 0) {
562 svm->vmcb->save.rip = 0; 604 svm->vmcb->save.rip = 0;
@@ -571,6 +613,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
571{ 613{
572 struct vcpu_svm *svm; 614 struct vcpu_svm *svm;
573 struct page *page; 615 struct page *page;
616 struct page *msrpm_pages;
574 int err; 617 int err;
575 618
576 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 619 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
@@ -589,12 +632,19 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
589 goto uninit; 632 goto uninit;
590 } 633 }
591 634
635 err = -ENOMEM;
636 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
637 if (!msrpm_pages)
638 goto uninit;
639 svm->msrpm = page_address(msrpm_pages);
640 svm_vcpu_init_msrpm(svm->msrpm);
641
592 svm->vmcb = page_address(page); 642 svm->vmcb = page_address(page);
593 clear_page(svm->vmcb); 643 clear_page(svm->vmcb);
594 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; 644 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
595 svm->asid_generation = 0; 645 svm->asid_generation = 0;
596 memset(svm->db_regs, 0, sizeof(svm->db_regs)); 646 memset(svm->db_regs, 0, sizeof(svm->db_regs));
597 init_vmcb(svm->vmcb); 647 init_vmcb(svm);
598 648
599 fx_init(&svm->vcpu); 649 fx_init(&svm->vcpu);
600 svm->vcpu.fpu_active = 1; 650 svm->vcpu.fpu_active = 1;
@@ -617,6 +667,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
617 struct vcpu_svm *svm = to_svm(vcpu); 667 struct vcpu_svm *svm = to_svm(vcpu);
618 668
619 __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); 669 __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
670 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
620 kvm_vcpu_uninit(vcpu); 671 kvm_vcpu_uninit(vcpu);
621 kmem_cache_free(kvm_vcpu_cache, svm); 672 kmem_cache_free(kvm_vcpu_cache, svm);
622} 673}
@@ -731,6 +782,13 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
731 var->unusable = !var->present; 782 var->unusable = !var->present;
732} 783}
733 784
785static int svm_get_cpl(struct kvm_vcpu *vcpu)
786{
787 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
788
789 return save->cpl;
790}
791
734static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 792static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
735{ 793{
736 struct vcpu_svm *svm = to_svm(vcpu); 794 struct vcpu_svm *svm = to_svm(vcpu);
@@ -784,6 +842,9 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
784 } 842 }
785 } 843 }
786#endif 844#endif
845 if (npt_enabled)
846 goto set;
847
787 if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) { 848 if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
788 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 849 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
789 vcpu->fpu_active = 1; 850 vcpu->fpu_active = 1;
@@ -791,18 +852,29 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
791 852
792 vcpu->arch.cr0 = cr0; 853 vcpu->arch.cr0 = cr0;
793 cr0 |= X86_CR0_PG | X86_CR0_WP; 854 cr0 |= X86_CR0_PG | X86_CR0_WP;
794 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
795 if (!vcpu->fpu_active) { 855 if (!vcpu->fpu_active) {
796 svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); 856 svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
797 cr0 |= X86_CR0_TS; 857 cr0 |= X86_CR0_TS;
798 } 858 }
859set:
860 /*
861 * re-enable caching here because the QEMU bios
862 * does not do it - this results in some delay at
863 * reboot
864 */
865 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
799 svm->vmcb->save.cr0 = cr0; 866 svm->vmcb->save.cr0 = cr0;
800} 867}
801 868
802static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 869static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
803{ 870{
804 vcpu->arch.cr4 = cr4; 871 unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE;
805 to_svm(vcpu)->vmcb->save.cr4 = cr4 | X86_CR4_PAE; 872
873 vcpu->arch.cr4 = cr4;
874 if (!npt_enabled)
875 cr4 |= X86_CR4_PAE;
876 cr4 |= host_cr4_mce;
877 to_svm(vcpu)->vmcb->save.cr4 = cr4;
806} 878}
807 879
808static void svm_set_segment(struct kvm_vcpu *vcpu, 880static void svm_set_segment(struct kvm_vcpu *vcpu,
@@ -833,13 +905,6 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
833 905
834} 906}
835 907
836/* FIXME:
837
838 svm(vcpu)->vmcb->control.int_ctl &= ~V_TPR_MASK;
839 svm(vcpu)->vmcb->control.int_ctl |= (sregs->cr8 & V_TPR_MASK);
840
841*/
842
843static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) 908static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
844{ 909{
845 return -EOPNOTSUPP; 910 return -EOPNOTSUPP;
@@ -920,7 +985,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
920 } 985 }
921 default: 986 default:
922 printk(KERN_DEBUG "%s: unexpected dr %u\n", 987 printk(KERN_DEBUG "%s: unexpected dr %u\n",
923 __FUNCTION__, dr); 988 __func__, dr);
924 *exception = UD_VECTOR; 989 *exception = UD_VECTOR;
925 return; 990 return;
926 } 991 }
@@ -962,6 +1027,19 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
962 return 1; 1027 return 1;
963} 1028}
964 1029
1030static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1031{
1032 /*
1033 * On an #MC intercept the MCE handler is not called automatically in
1034 * the host. So do it by hand here.
1035 */
1036 asm volatile (
1037 "int $0x12\n");
1038 /* not sure if we ever come back to this point */
1039
1040 return 1;
1041}
1042
965static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1043static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
966{ 1044{
967 /* 1045 /*
@@ -969,7 +1047,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
969 * so reinitialize it. 1047 * so reinitialize it.
970 */ 1048 */
971 clear_page(svm->vmcb); 1049 clear_page(svm->vmcb);
972 init_vmcb(svm->vmcb); 1050 init_vmcb(svm);
973 1051
974 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; 1052 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
975 return 0; 1053 return 0;
@@ -1033,9 +1111,18 @@ static int invalid_op_interception(struct vcpu_svm *svm,
1033static int task_switch_interception(struct vcpu_svm *svm, 1111static int task_switch_interception(struct vcpu_svm *svm,
1034 struct kvm_run *kvm_run) 1112 struct kvm_run *kvm_run)
1035{ 1113{
1036 pr_unimpl(&svm->vcpu, "%s: task switch is unsupported\n", __FUNCTION__); 1114 u16 tss_selector;
1037 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 1115
1038 return 0; 1116 tss_selector = (u16)svm->vmcb->control.exit_info_1;
1117 if (svm->vmcb->control.exit_info_2 &
1118 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
1119 return kvm_task_switch(&svm->vcpu, tss_selector,
1120 TASK_SWITCH_IRET);
1121 if (svm->vmcb->control.exit_info_2 &
1122 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
1123 return kvm_task_switch(&svm->vcpu, tss_selector,
1124 TASK_SWITCH_JMP);
1125 return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL);
1039} 1126}
1040 1127
1041static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1128static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
@@ -1049,7 +1136,7 @@ static int emulate_on_interception(struct vcpu_svm *svm,
1049 struct kvm_run *kvm_run) 1136 struct kvm_run *kvm_run)
1050{ 1137{
1051 if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE) 1138 if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE)
1052 pr_unimpl(&svm->vcpu, "%s: failed\n", __FUNCTION__); 1139 pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
1053 return 1; 1140 return 1;
1054} 1141}
1055 1142
@@ -1179,8 +1266,19 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
1179 svm->vmcb->save.sysenter_esp = data; 1266 svm->vmcb->save.sysenter_esp = data;
1180 break; 1267 break;
1181 case MSR_IA32_DEBUGCTLMSR: 1268 case MSR_IA32_DEBUGCTLMSR:
1182 pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", 1269 if (!svm_has(SVM_FEATURE_LBRV)) {
1183 __FUNCTION__, data); 1270 pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
1271 __func__, data);
1272 break;
1273 }
1274 if (data & DEBUGCTL_RESERVED_BITS)
1275 return 1;
1276
1277 svm->vmcb->save.dbgctl = data;
1278 if (data & (1ULL<<0))
1279 svm_enable_lbrv(svm);
1280 else
1281 svm_disable_lbrv(svm);
1184 break; 1282 break;
1185 case MSR_K7_EVNTSEL0: 1283 case MSR_K7_EVNTSEL0:
1186 case MSR_K7_EVNTSEL1: 1284 case MSR_K7_EVNTSEL1:
@@ -1265,6 +1363,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
1265 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, 1363 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
1266 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, 1364 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
1267 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, 1365 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
1366 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
1268 [SVM_EXIT_INTR] = nop_on_interception, 1367 [SVM_EXIT_INTR] = nop_on_interception,
1269 [SVM_EXIT_NMI] = nop_on_interception, 1368 [SVM_EXIT_NMI] = nop_on_interception,
1270 [SVM_EXIT_SMI] = nop_on_interception, 1369 [SVM_EXIT_SMI] = nop_on_interception,
@@ -1290,14 +1389,34 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
1290 [SVM_EXIT_WBINVD] = emulate_on_interception, 1389 [SVM_EXIT_WBINVD] = emulate_on_interception,
1291 [SVM_EXIT_MONITOR] = invalid_op_interception, 1390 [SVM_EXIT_MONITOR] = invalid_op_interception,
1292 [SVM_EXIT_MWAIT] = invalid_op_interception, 1391 [SVM_EXIT_MWAIT] = invalid_op_interception,
1392 [SVM_EXIT_NPF] = pf_interception,
1293}; 1393};
1294 1394
1295
1296static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1395static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1297{ 1396{
1298 struct vcpu_svm *svm = to_svm(vcpu); 1397 struct vcpu_svm *svm = to_svm(vcpu);
1299 u32 exit_code = svm->vmcb->control.exit_code; 1398 u32 exit_code = svm->vmcb->control.exit_code;
1300 1399
1400 if (npt_enabled) {
1401 int mmu_reload = 0;
1402 if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
1403 svm_set_cr0(vcpu, svm->vmcb->save.cr0);
1404 mmu_reload = 1;
1405 }
1406 vcpu->arch.cr0 = svm->vmcb->save.cr0;
1407 vcpu->arch.cr3 = svm->vmcb->save.cr3;
1408 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
1409 if (!load_pdptrs(vcpu, vcpu->arch.cr3)) {
1410 kvm_inject_gp(vcpu, 0);
1411 return 1;
1412 }
1413 }
1414 if (mmu_reload) {
1415 kvm_mmu_reset_context(vcpu);
1416 kvm_mmu_load(vcpu);
1417 }
1418 }
1419
1301 kvm_reput_irq(svm); 1420 kvm_reput_irq(svm);
1302 1421
1303 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { 1422 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
@@ -1308,10 +1427,11 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1308 } 1427 }
1309 1428
1310 if (is_external_interrupt(svm->vmcb->control.exit_int_info) && 1429 if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
1311 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR) 1430 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
1431 exit_code != SVM_EXIT_NPF)
1312 printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " 1432 printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
1313 "exit_code 0x%x\n", 1433 "exit_code 0x%x\n",
1314 __FUNCTION__, svm->vmcb->control.exit_int_info, 1434 __func__, svm->vmcb->control.exit_int_info,
1315 exit_code); 1435 exit_code);
1316 1436
1317 if (exit_code >= ARRAY_SIZE(svm_exit_handlers) 1437 if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
@@ -1364,6 +1484,27 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
1364 svm_inject_irq(svm, irq); 1484 svm_inject_irq(svm, irq);
1365} 1485}
1366 1486
1487static void update_cr8_intercept(struct kvm_vcpu *vcpu)
1488{
1489 struct vcpu_svm *svm = to_svm(vcpu);
1490 struct vmcb *vmcb = svm->vmcb;
1491 int max_irr, tpr;
1492
1493 if (!irqchip_in_kernel(vcpu->kvm) || vcpu->arch.apic->vapic_addr)
1494 return;
1495
1496 vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
1497
1498 max_irr = kvm_lapic_find_highest_irr(vcpu);
1499 if (max_irr == -1)
1500 return;
1501
1502 tpr = kvm_lapic_get_cr8(vcpu) << 4;
1503
1504 if (tpr >= (max_irr & 0xf0))
1505 vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
1506}
1507
1367static void svm_intr_assist(struct kvm_vcpu *vcpu) 1508static void svm_intr_assist(struct kvm_vcpu *vcpu)
1368{ 1509{
1369 struct vcpu_svm *svm = to_svm(vcpu); 1510 struct vcpu_svm *svm = to_svm(vcpu);
@@ -1376,14 +1517,14 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
1376 SVM_EVTINJ_VEC_MASK; 1517 SVM_EVTINJ_VEC_MASK;
1377 vmcb->control.exit_int_info = 0; 1518 vmcb->control.exit_int_info = 0;
1378 svm_inject_irq(svm, intr_vector); 1519 svm_inject_irq(svm, intr_vector);
1379 return; 1520 goto out;
1380 } 1521 }
1381 1522
1382 if (vmcb->control.int_ctl & V_IRQ_MASK) 1523 if (vmcb->control.int_ctl & V_IRQ_MASK)
1383 return; 1524 goto out;
1384 1525
1385 if (!kvm_cpu_has_interrupt(vcpu)) 1526 if (!kvm_cpu_has_interrupt(vcpu))
1386 return; 1527 goto out;
1387 1528
1388 if (!(vmcb->save.rflags & X86_EFLAGS_IF) || 1529 if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
1389 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || 1530 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
@@ -1391,12 +1532,14 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
1391 /* unable to deliver irq, set pending irq */ 1532 /* unable to deliver irq, set pending irq */
1392 vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR); 1533 vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
1393 svm_inject_irq(svm, 0x0); 1534 svm_inject_irq(svm, 0x0);
1394 return; 1535 goto out;
1395 } 1536 }
1396 /* Okay, we can deliver the interrupt: grab it and update PIC state. */ 1537 /* Okay, we can deliver the interrupt: grab it and update PIC state. */
1397 intr_vector = kvm_cpu_get_interrupt(vcpu); 1538 intr_vector = kvm_cpu_get_interrupt(vcpu);
1398 svm_inject_irq(svm, intr_vector); 1539 svm_inject_irq(svm, intr_vector);
1399 kvm_timer_intr_post(vcpu, intr_vector); 1540 kvm_timer_intr_post(vcpu, intr_vector);
1541out:
1542 update_cr8_intercept(vcpu);
1400} 1543}
1401 1544
1402static void kvm_reput_irq(struct vcpu_svm *svm) 1545static void kvm_reput_irq(struct vcpu_svm *svm)
@@ -1482,6 +1625,29 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
1482{ 1625{
1483} 1626}
1484 1627
1628static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
1629{
1630 struct vcpu_svm *svm = to_svm(vcpu);
1631
1632 if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) {
1633 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
1634 kvm_lapic_set_tpr(vcpu, cr8);
1635 }
1636}
1637
1638static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
1639{
1640 struct vcpu_svm *svm = to_svm(vcpu);
1641 u64 cr8;
1642
1643 if (!irqchip_in_kernel(vcpu->kvm))
1644 return;
1645
1646 cr8 = kvm_get_cr8(vcpu);
1647 svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
1648 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
1649}
1650
1485static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1651static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1486{ 1652{
1487 struct vcpu_svm *svm = to_svm(vcpu); 1653 struct vcpu_svm *svm = to_svm(vcpu);
@@ -1491,6 +1657,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1491 1657
1492 pre_svm_run(svm); 1658 pre_svm_run(svm);
1493 1659
1660 sync_lapic_to_cr8(vcpu);
1661
1494 save_host_msrs(vcpu); 1662 save_host_msrs(vcpu);
1495 fs_selector = read_fs(); 1663 fs_selector = read_fs();
1496 gs_selector = read_gs(); 1664 gs_selector = read_gs();
@@ -1499,6 +1667,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1499 svm->host_dr6 = read_dr6(); 1667 svm->host_dr6 = read_dr6();
1500 svm->host_dr7 = read_dr7(); 1668 svm->host_dr7 = read_dr7();
1501 svm->vmcb->save.cr2 = vcpu->arch.cr2; 1669 svm->vmcb->save.cr2 = vcpu->arch.cr2;
1670 /* required for live migration with NPT */
1671 if (npt_enabled)
1672 svm->vmcb->save.cr3 = vcpu->arch.cr3;
1502 1673
1503 if (svm->vmcb->save.dr7 & 0xff) { 1674 if (svm->vmcb->save.dr7 & 0xff) {
1504 write_dr7(0); 1675 write_dr7(0);
@@ -1635,6 +1806,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1635 1806
1636 stgi(); 1807 stgi();
1637 1808
1809 sync_cr8_to_lapic(vcpu);
1810
1638 svm->next_rip = 0; 1811 svm->next_rip = 0;
1639} 1812}
1640 1813
@@ -1642,6 +1815,12 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
1642{ 1815{
1643 struct vcpu_svm *svm = to_svm(vcpu); 1816 struct vcpu_svm *svm = to_svm(vcpu);
1644 1817
1818 if (npt_enabled) {
1819 svm->vmcb->control.nested_cr3 = root;
1820 force_new_asid(vcpu);
1821 return;
1822 }
1823
1645 svm->vmcb->save.cr3 = root; 1824 svm->vmcb->save.cr3 = root;
1646 force_new_asid(vcpu); 1825 force_new_asid(vcpu);
1647 1826
@@ -1709,6 +1888,7 @@ static struct kvm_x86_ops svm_x86_ops = {
1709 .get_segment_base = svm_get_segment_base, 1888 .get_segment_base = svm_get_segment_base,
1710 .get_segment = svm_get_segment, 1889 .get_segment = svm_get_segment,
1711 .set_segment = svm_set_segment, 1890 .set_segment = svm_set_segment,
1891 .get_cpl = svm_get_cpl,
1712 .get_cs_db_l_bits = kvm_get_cs_db_l_bits, 1892 .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
1713 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, 1893 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
1714 .set_cr0 = svm_set_cr0, 1894 .set_cr0 = svm_set_cr0,
diff --git a/arch/x86/kvm/svm.h b/arch/x86/kvm/svm.h
index 5fd50491b555..1b8afa78e869 100644
--- a/arch/x86/kvm/svm.h
+++ b/arch/x86/kvm/svm.h
@@ -238,6 +238,9 @@ struct __attribute__ ((__packed__)) vmcb {
238#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID 238#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
239#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR 239#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
240 240
241#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
242#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
243
241#define SVM_EXIT_READ_CR0 0x000 244#define SVM_EXIT_READ_CR0 0x000
242#define SVM_EXIT_READ_CR3 0x003 245#define SVM_EXIT_READ_CR3 0x003
243#define SVM_EXIT_READ_CR4 0x004 246#define SVM_EXIT_READ_CR4 0x004
diff --git a/arch/x86/kvm/tss.h b/arch/x86/kvm/tss.h
new file mode 100644
index 000000000000..622aa10f692f
--- /dev/null
+++ b/arch/x86/kvm/tss.h
@@ -0,0 +1,59 @@
1#ifndef __TSS_SEGMENT_H
2#define __TSS_SEGMENT_H
3
4struct tss_segment_32 {
5 u32 prev_task_link;
6 u32 esp0;
7 u32 ss0;
8 u32 esp1;
9 u32 ss1;
10 u32 esp2;
11 u32 ss2;
12 u32 cr3;
13 u32 eip;
14 u32 eflags;
15 u32 eax;
16 u32 ecx;
17 u32 edx;
18 u32 ebx;
19 u32 esp;
20 u32 ebp;
21 u32 esi;
22 u32 edi;
23 u32 es;
24 u32 cs;
25 u32 ss;
26 u32 ds;
27 u32 fs;
28 u32 gs;
29 u32 ldt_selector;
30 u16 t;
31 u16 io_map;
32};
33
34struct tss_segment_16 {
35 u16 prev_task_link;
36 u16 sp0;
37 u16 ss0;
38 u16 sp1;
39 u16 ss1;
40 u16 sp2;
41 u16 ss2;
42 u16 ip;
43 u16 flag;
44 u16 ax;
45 u16 cx;
46 u16 dx;
47 u16 bx;
48 u16 sp;
49 u16 bp;
50 u16 si;
51 u16 di;
52 u16 es;
53 u16 cs;
54 u16 ss;
55 u16 ds;
56 u16 ldt;
57};
58
59#endif
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8e1462880d1f..8e5d6645b90d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -17,7 +17,6 @@
17 17
18#include "irq.h" 18#include "irq.h"
19#include "vmx.h" 19#include "vmx.h"
20#include "segment_descriptor.h"
21#include "mmu.h" 20#include "mmu.h"
22 21
23#include <linux/kvm_host.h> 22#include <linux/kvm_host.h>
@@ -37,6 +36,12 @@ MODULE_LICENSE("GPL");
37static int bypass_guest_pf = 1; 36static int bypass_guest_pf = 1;
38module_param(bypass_guest_pf, bool, 0); 37module_param(bypass_guest_pf, bool, 0);
39 38
39static int enable_vpid = 1;
40module_param(enable_vpid, bool, 0);
41
42static int flexpriority_enabled = 1;
43module_param(flexpriority_enabled, bool, 0);
44
40struct vmcs { 45struct vmcs {
41 u32 revision_id; 46 u32 revision_id;
42 u32 abort; 47 u32 abort;
@@ -71,6 +76,7 @@ struct vcpu_vmx {
71 unsigned rip; 76 unsigned rip;
72 } irq; 77 } irq;
73 } rmode; 78 } rmode;
79 int vpid;
74}; 80};
75 81
76static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) 82static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -85,6 +91,10 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
85 91
86static struct page *vmx_io_bitmap_a; 92static struct page *vmx_io_bitmap_a;
87static struct page *vmx_io_bitmap_b; 93static struct page *vmx_io_bitmap_b;
94static struct page *vmx_msr_bitmap;
95
96static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
97static DEFINE_SPINLOCK(vmx_vpid_lock);
88 98
89static struct vmcs_config { 99static struct vmcs_config {
90 int size; 100 int size;
@@ -176,6 +186,11 @@ static inline int is_external_interrupt(u32 intr_info)
176 == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); 186 == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
177} 187}
178 188
189static inline int cpu_has_vmx_msr_bitmap(void)
190{
191 return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS);
192}
193
179static inline int cpu_has_vmx_tpr_shadow(void) 194static inline int cpu_has_vmx_tpr_shadow(void)
180{ 195{
181 return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW); 196 return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW);
@@ -194,8 +209,9 @@ static inline int cpu_has_secondary_exec_ctrls(void)
194 209
195static inline bool cpu_has_vmx_virtualize_apic_accesses(void) 210static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
196{ 211{
197 return (vmcs_config.cpu_based_2nd_exec_ctrl & 212 return flexpriority_enabled
198 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); 213 && (vmcs_config.cpu_based_2nd_exec_ctrl &
214 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
199} 215}
200 216
201static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) 217static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
@@ -204,6 +220,12 @@ static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
204 (irqchip_in_kernel(kvm))); 220 (irqchip_in_kernel(kvm)));
205} 221}
206 222
223static inline int cpu_has_vmx_vpid(void)
224{
225 return (vmcs_config.cpu_based_2nd_exec_ctrl &
226 SECONDARY_EXEC_ENABLE_VPID);
227}
228
207static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) 229static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
208{ 230{
209 int i; 231 int i;
@@ -214,6 +236,20 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
214 return -1; 236 return -1;
215} 237}
216 238
239static inline void __invvpid(int ext, u16 vpid, gva_t gva)
240{
241 struct {
242 u64 vpid : 16;
243 u64 rsvd : 48;
244 u64 gva;
245 } operand = { vpid, 0, gva };
246
247 asm volatile (ASM_VMX_INVVPID
248 /* CF==1 or ZF==1 --> rc = -1 */
249 "; ja 1f ; ud2 ; 1:"
250 : : "a"(&operand), "c"(ext) : "cc", "memory");
251}
252
217static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) 253static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
218{ 254{
219 int i; 255 int i;
@@ -257,6 +293,14 @@ static void vcpu_clear(struct vcpu_vmx *vmx)
257 vmx->launched = 0; 293 vmx->launched = 0;
258} 294}
259 295
296static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx)
297{
298 if (vmx->vpid == 0)
299 return;
300
301 __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
302}
303
260static unsigned long vmcs_readl(unsigned long field) 304static unsigned long vmcs_readl(unsigned long field)
261{ 305{
262 unsigned long value; 306 unsigned long value;
@@ -353,7 +397,7 @@ static void reload_tss(void)
353 * VT restores TR but not its size. Useless. 397 * VT restores TR but not its size. Useless.
354 */ 398 */
355 struct descriptor_table gdt; 399 struct descriptor_table gdt;
356 struct segment_descriptor *descs; 400 struct desc_struct *descs;
357 401
358 get_gdt(&gdt); 402 get_gdt(&gdt);
359 descs = (void *)gdt.base; 403 descs = (void *)gdt.base;
@@ -485,11 +529,12 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
485{ 529{
486 struct vcpu_vmx *vmx = to_vmx(vcpu); 530 struct vcpu_vmx *vmx = to_vmx(vcpu);
487 u64 phys_addr = __pa(vmx->vmcs); 531 u64 phys_addr = __pa(vmx->vmcs);
488 u64 tsc_this, delta; 532 u64 tsc_this, delta, new_offset;
489 533
490 if (vcpu->cpu != cpu) { 534 if (vcpu->cpu != cpu) {
491 vcpu_clear(vmx); 535 vcpu_clear(vmx);
492 kvm_migrate_apic_timer(vcpu); 536 kvm_migrate_apic_timer(vcpu);
537 vpid_sync_vcpu_all(vmx);
493 } 538 }
494 539
495 if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { 540 if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
@@ -524,8 +569,11 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
524 * Make sure the time stamp counter is monotonous. 569 * Make sure the time stamp counter is monotonous.
525 */ 570 */
526 rdtscll(tsc_this); 571 rdtscll(tsc_this);
527 delta = vcpu->arch.host_tsc - tsc_this; 572 if (tsc_this < vcpu->arch.host_tsc) {
528 vmcs_write64(TSC_OFFSET, vmcs_read64(TSC_OFFSET) + delta); 573 delta = vcpu->arch.host_tsc - tsc_this;
574 new_offset = vmcs_read64(TSC_OFFSET) + delta;
575 vmcs_write64(TSC_OFFSET, new_offset);
576 }
529 } 577 }
530} 578}
531 579
@@ -596,7 +644,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
596{ 644{
597 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 645 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
598 nr | INTR_TYPE_EXCEPTION 646 nr | INTR_TYPE_EXCEPTION
599 | (has_error_code ? INTR_INFO_DELIEVER_CODE_MASK : 0) 647 | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
600 | INTR_INFO_VALID_MASK); 648 | INTR_INFO_VALID_MASK);
601 if (has_error_code) 649 if (has_error_code)
602 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); 650 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
@@ -959,6 +1007,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
959 CPU_BASED_MOV_DR_EXITING | 1007 CPU_BASED_MOV_DR_EXITING |
960 CPU_BASED_USE_TSC_OFFSETING; 1008 CPU_BASED_USE_TSC_OFFSETING;
961 opt = CPU_BASED_TPR_SHADOW | 1009 opt = CPU_BASED_TPR_SHADOW |
1010 CPU_BASED_USE_MSR_BITMAPS |
962 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; 1011 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
963 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, 1012 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
964 &_cpu_based_exec_control) < 0) 1013 &_cpu_based_exec_control) < 0)
@@ -971,7 +1020,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
971 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { 1020 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
972 min = 0; 1021 min = 0;
973 opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 1022 opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
974 SECONDARY_EXEC_WBINVD_EXITING; 1023 SECONDARY_EXEC_WBINVD_EXITING |
1024 SECONDARY_EXEC_ENABLE_VPID;
975 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2, 1025 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2,
976 &_cpu_based_2nd_exec_control) < 0) 1026 &_cpu_based_2nd_exec_control) < 0)
977 return -EIO; 1027 return -EIO;
@@ -1080,6 +1130,10 @@ static __init int hardware_setup(void)
1080{ 1130{
1081 if (setup_vmcs_config(&vmcs_config) < 0) 1131 if (setup_vmcs_config(&vmcs_config) < 0)
1082 return -EIO; 1132 return -EIO;
1133
1134 if (boot_cpu_has(X86_FEATURE_NX))
1135 kvm_enable_efer_bits(EFER_NX);
1136
1083 return alloc_kvm_area(); 1137 return alloc_kvm_area();
1084} 1138}
1085 1139
@@ -1214,7 +1268,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
1214 guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); 1268 guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
1215 if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { 1269 if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
1216 printk(KERN_DEBUG "%s: tss fixup for long mode. \n", 1270 printk(KERN_DEBUG "%s: tss fixup for long mode. \n",
1217 __FUNCTION__); 1271 __func__);
1218 vmcs_write32(GUEST_TR_AR_BYTES, 1272 vmcs_write32(GUEST_TR_AR_BYTES,
1219 (guest_tr_ar & ~AR_TYPE_MASK) 1273 (guest_tr_ar & ~AR_TYPE_MASK)
1220 | AR_TYPE_BUSY_64_TSS); 1274 | AR_TYPE_BUSY_64_TSS);
@@ -1239,6 +1293,11 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
1239 1293
1240#endif 1294#endif
1241 1295
1296static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
1297{
1298 vpid_sync_vcpu_all(to_vmx(vcpu));
1299}
1300
1242static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 1301static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
1243{ 1302{
1244 vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK; 1303 vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK;
@@ -1275,6 +1334,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1275 1334
1276static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 1335static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1277{ 1336{
1337 vmx_flush_tlb(vcpu);
1278 vmcs_writel(GUEST_CR3, cr3); 1338 vmcs_writel(GUEST_CR3, cr3);
1279 if (vcpu->arch.cr0 & X86_CR0_PE) 1339 if (vcpu->arch.cr0 & X86_CR0_PE)
1280 vmx_fpu_deactivate(vcpu); 1340 vmx_fpu_deactivate(vcpu);
@@ -1288,14 +1348,14 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1288 vcpu->arch.cr4 = cr4; 1348 vcpu->arch.cr4 = cr4;
1289} 1349}
1290 1350
1291#ifdef CONFIG_X86_64
1292
1293static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) 1351static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
1294{ 1352{
1295 struct vcpu_vmx *vmx = to_vmx(vcpu); 1353 struct vcpu_vmx *vmx = to_vmx(vcpu);
1296 struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); 1354 struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
1297 1355
1298 vcpu->arch.shadow_efer = efer; 1356 vcpu->arch.shadow_efer = efer;
1357 if (!msr)
1358 return;
1299 if (efer & EFER_LMA) { 1359 if (efer & EFER_LMA) {
1300 vmcs_write32(VM_ENTRY_CONTROLS, 1360 vmcs_write32(VM_ENTRY_CONTROLS,
1301 vmcs_read32(VM_ENTRY_CONTROLS) | 1361 vmcs_read32(VM_ENTRY_CONTROLS) |
@@ -1312,8 +1372,6 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
1312 setup_msrs(vmx); 1372 setup_msrs(vmx);
1313} 1373}
1314 1374
1315#endif
1316
1317static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) 1375static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
1318{ 1376{
1319 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 1377 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -1344,6 +1402,20 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
1344 var->unusable = (ar >> 16) & 1; 1402 var->unusable = (ar >> 16) & 1;
1345} 1403}
1346 1404
1405static int vmx_get_cpl(struct kvm_vcpu *vcpu)
1406{
1407 struct kvm_segment kvm_seg;
1408
1409 if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */
1410 return 0;
1411
1412 if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */
1413 return 3;
1414
1415 vmx_get_segment(vcpu, &kvm_seg, VCPU_SREG_CS);
1416 return kvm_seg.selector & 3;
1417}
1418
1347static u32 vmx_segment_access_rights(struct kvm_segment *var) 1419static u32 vmx_segment_access_rights(struct kvm_segment *var)
1348{ 1420{
1349 u32 ar; 1421 u32 ar;
@@ -1433,7 +1505,6 @@ static int init_rmode_tss(struct kvm *kvm)
1433 int ret = 0; 1505 int ret = 0;
1434 int r; 1506 int r;
1435 1507
1436 down_read(&kvm->slots_lock);
1437 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); 1508 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
1438 if (r < 0) 1509 if (r < 0)
1439 goto out; 1510 goto out;
@@ -1456,7 +1527,6 @@ static int init_rmode_tss(struct kvm *kvm)
1456 1527
1457 ret = 1; 1528 ret = 1;
1458out: 1529out:
1459 up_read(&kvm->slots_lock);
1460 return ret; 1530 return ret;
1461} 1531}
1462 1532
@@ -1494,6 +1564,46 @@ out:
1494 return r; 1564 return r;
1495} 1565}
1496 1566
1567static void allocate_vpid(struct vcpu_vmx *vmx)
1568{
1569 int vpid;
1570
1571 vmx->vpid = 0;
1572 if (!enable_vpid || !cpu_has_vmx_vpid())
1573 return;
1574 spin_lock(&vmx_vpid_lock);
1575 vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
1576 if (vpid < VMX_NR_VPIDS) {
1577 vmx->vpid = vpid;
1578 __set_bit(vpid, vmx_vpid_bitmap);
1579 }
1580 spin_unlock(&vmx_vpid_lock);
1581}
1582
1583void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
1584{
1585 void *va;
1586
1587 if (!cpu_has_vmx_msr_bitmap())
1588 return;
1589
1590 /*
1591 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
1592 * have the write-low and read-high bitmap offsets the wrong way round.
1593 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
1594 */
1595 va = kmap(msr_bitmap);
1596 if (msr <= 0x1fff) {
1597 __clear_bit(msr, va + 0x000); /* read-low */
1598 __clear_bit(msr, va + 0x800); /* write-low */
1599 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
1600 msr &= 0x1fff;
1601 __clear_bit(msr, va + 0x400); /* read-high */
1602 __clear_bit(msr, va + 0xc00); /* write-high */
1603 }
1604 kunmap(msr_bitmap);
1605}
1606
1497/* 1607/*
1498 * Sets up the vmcs for emulated real mode. 1608 * Sets up the vmcs for emulated real mode.
1499 */ 1609 */
@@ -1511,6 +1621,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
1511 vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a)); 1621 vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a));
1512 vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b)); 1622 vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b));
1513 1623
1624 if (cpu_has_vmx_msr_bitmap())
1625 vmcs_write64(MSR_BITMAP, page_to_phys(vmx_msr_bitmap));
1626
1514 vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ 1627 vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
1515 1628
1516 /* Control */ 1629 /* Control */
@@ -1532,6 +1645,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
1532 if (!vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) 1645 if (!vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
1533 exec_control &= 1646 exec_control &=
1534 ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; 1647 ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
1648 if (vmx->vpid == 0)
1649 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
1535 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); 1650 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
1536 } 1651 }
1537 1652
@@ -1613,6 +1728,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
1613 u64 msr; 1728 u64 msr;
1614 int ret; 1729 int ret;
1615 1730
1731 down_read(&vcpu->kvm->slots_lock);
1616 if (!init_rmode_tss(vmx->vcpu.kvm)) { 1732 if (!init_rmode_tss(vmx->vcpu.kvm)) {
1617 ret = -ENOMEM; 1733 ret = -ENOMEM;
1618 goto out; 1734 goto out;
@@ -1621,7 +1737,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
1621 vmx->vcpu.arch.rmode.active = 0; 1737 vmx->vcpu.arch.rmode.active = 0;
1622 1738
1623 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); 1739 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
1624 set_cr8(&vmx->vcpu, 0); 1740 kvm_set_cr8(&vmx->vcpu, 0);
1625 msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 1741 msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
1626 if (vmx->vcpu.vcpu_id == 0) 1742 if (vmx->vcpu.vcpu_id == 0)
1627 msr |= MSR_IA32_APICBASE_BSP; 1743 msr |= MSR_IA32_APICBASE_BSP;
@@ -1704,18 +1820,22 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
1704 vmcs_write64(APIC_ACCESS_ADDR, 1820 vmcs_write64(APIC_ACCESS_ADDR,
1705 page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); 1821 page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
1706 1822
1823 if (vmx->vpid != 0)
1824 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
1825
1707 vmx->vcpu.arch.cr0 = 0x60000010; 1826 vmx->vcpu.arch.cr0 = 0x60000010;
1708 vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ 1827 vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */
1709 vmx_set_cr4(&vmx->vcpu, 0); 1828 vmx_set_cr4(&vmx->vcpu, 0);
1710#ifdef CONFIG_X86_64
1711 vmx_set_efer(&vmx->vcpu, 0); 1829 vmx_set_efer(&vmx->vcpu, 0);
1712#endif
1713 vmx_fpu_activate(&vmx->vcpu); 1830 vmx_fpu_activate(&vmx->vcpu);
1714 update_exception_bitmap(&vmx->vcpu); 1831 update_exception_bitmap(&vmx->vcpu);
1715 1832
1716 return 0; 1833 vpid_sync_vcpu_all(vmx);
1834
1835 ret = 0;
1717 1836
1718out: 1837out:
1838 up_read(&vcpu->kvm->slots_lock);
1719 return ret; 1839 return ret;
1720} 1840}
1721 1841
@@ -1723,6 +1843,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
1723{ 1843{
1724 struct vcpu_vmx *vmx = to_vmx(vcpu); 1844 struct vcpu_vmx *vmx = to_vmx(vcpu);
1725 1845
1846 KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
1847
1726 if (vcpu->arch.rmode.active) { 1848 if (vcpu->arch.rmode.active) {
1727 vmx->rmode.irq.pending = true; 1849 vmx->rmode.irq.pending = true;
1728 vmx->rmode.irq.vector = irq; 1850 vmx->rmode.irq.vector = irq;
@@ -1844,7 +1966,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1844 if ((vect_info & VECTORING_INFO_VALID_MASK) && 1966 if ((vect_info & VECTORING_INFO_VALID_MASK) &&
1845 !is_page_fault(intr_info)) 1967 !is_page_fault(intr_info))
1846 printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " 1968 printk(KERN_ERR "%s: unexpected, vectoring info 0x%x "
1847 "intr info 0x%x\n", __FUNCTION__, vect_info, intr_info); 1969 "intr info 0x%x\n", __func__, vect_info, intr_info);
1848 1970
1849 if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) { 1971 if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) {
1850 int irq = vect_info & VECTORING_INFO_VECTOR_MASK; 1972 int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
@@ -1869,10 +1991,12 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1869 1991
1870 error_code = 0; 1992 error_code = 0;
1871 rip = vmcs_readl(GUEST_RIP); 1993 rip = vmcs_readl(GUEST_RIP);
1872 if (intr_info & INTR_INFO_DELIEVER_CODE_MASK) 1994 if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
1873 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 1995 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
1874 if (is_page_fault(intr_info)) { 1996 if (is_page_fault(intr_info)) {
1875 cr2 = vmcs_readl(EXIT_QUALIFICATION); 1997 cr2 = vmcs_readl(EXIT_QUALIFICATION);
1998 KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
1999 (u32)((u64)cr2 >> 32), handler);
1876 return kvm_mmu_page_fault(vcpu, cr2, error_code); 2000 return kvm_mmu_page_fault(vcpu, cr2, error_code);
1877 } 2001 }
1878 2002
@@ -1901,6 +2025,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu,
1901 struct kvm_run *kvm_run) 2025 struct kvm_run *kvm_run)
1902{ 2026{
1903 ++vcpu->stat.irq_exits; 2027 ++vcpu->stat.irq_exits;
2028 KVMTRACE_1D(INTR, vcpu, vmcs_read32(VM_EXIT_INTR_INFO), handler);
1904 return 1; 2029 return 1;
1905} 2030}
1906 2031
@@ -1958,25 +2083,27 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1958 reg = (exit_qualification >> 8) & 15; 2083 reg = (exit_qualification >> 8) & 15;
1959 switch ((exit_qualification >> 4) & 3) { 2084 switch ((exit_qualification >> 4) & 3) {
1960 case 0: /* mov to cr */ 2085 case 0: /* mov to cr */
2086 KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)vcpu->arch.regs[reg],
2087 (u32)((u64)vcpu->arch.regs[reg] >> 32), handler);
1961 switch (cr) { 2088 switch (cr) {
1962 case 0: 2089 case 0:
1963 vcpu_load_rsp_rip(vcpu); 2090 vcpu_load_rsp_rip(vcpu);
1964 set_cr0(vcpu, vcpu->arch.regs[reg]); 2091 kvm_set_cr0(vcpu, vcpu->arch.regs[reg]);
1965 skip_emulated_instruction(vcpu); 2092 skip_emulated_instruction(vcpu);
1966 return 1; 2093 return 1;
1967 case 3: 2094 case 3:
1968 vcpu_load_rsp_rip(vcpu); 2095 vcpu_load_rsp_rip(vcpu);
1969 set_cr3(vcpu, vcpu->arch.regs[reg]); 2096 kvm_set_cr3(vcpu, vcpu->arch.regs[reg]);
1970 skip_emulated_instruction(vcpu); 2097 skip_emulated_instruction(vcpu);
1971 return 1; 2098 return 1;
1972 case 4: 2099 case 4:
1973 vcpu_load_rsp_rip(vcpu); 2100 vcpu_load_rsp_rip(vcpu);
1974 set_cr4(vcpu, vcpu->arch.regs[reg]); 2101 kvm_set_cr4(vcpu, vcpu->arch.regs[reg]);
1975 skip_emulated_instruction(vcpu); 2102 skip_emulated_instruction(vcpu);
1976 return 1; 2103 return 1;
1977 case 8: 2104 case 8:
1978 vcpu_load_rsp_rip(vcpu); 2105 vcpu_load_rsp_rip(vcpu);
1979 set_cr8(vcpu, vcpu->arch.regs[reg]); 2106 kvm_set_cr8(vcpu, vcpu->arch.regs[reg]);
1980 skip_emulated_instruction(vcpu); 2107 skip_emulated_instruction(vcpu);
1981 if (irqchip_in_kernel(vcpu->kvm)) 2108 if (irqchip_in_kernel(vcpu->kvm))
1982 return 1; 2109 return 1;
@@ -1990,6 +2117,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1990 vcpu->arch.cr0 &= ~X86_CR0_TS; 2117 vcpu->arch.cr0 &= ~X86_CR0_TS;
1991 vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); 2118 vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
1992 vmx_fpu_activate(vcpu); 2119 vmx_fpu_activate(vcpu);
2120 KVMTRACE_0D(CLTS, vcpu, handler);
1993 skip_emulated_instruction(vcpu); 2121 skip_emulated_instruction(vcpu);
1994 return 1; 2122 return 1;
1995 case 1: /*mov from cr*/ 2123 case 1: /*mov from cr*/
@@ -1998,18 +2126,24 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1998 vcpu_load_rsp_rip(vcpu); 2126 vcpu_load_rsp_rip(vcpu);
1999 vcpu->arch.regs[reg] = vcpu->arch.cr3; 2127 vcpu->arch.regs[reg] = vcpu->arch.cr3;
2000 vcpu_put_rsp_rip(vcpu); 2128 vcpu_put_rsp_rip(vcpu);
2129 KVMTRACE_3D(CR_READ, vcpu, (u32)cr,
2130 (u32)vcpu->arch.regs[reg],
2131 (u32)((u64)vcpu->arch.regs[reg] >> 32),
2132 handler);
2001 skip_emulated_instruction(vcpu); 2133 skip_emulated_instruction(vcpu);
2002 return 1; 2134 return 1;
2003 case 8: 2135 case 8:
2004 vcpu_load_rsp_rip(vcpu); 2136 vcpu_load_rsp_rip(vcpu);
2005 vcpu->arch.regs[reg] = get_cr8(vcpu); 2137 vcpu->arch.regs[reg] = kvm_get_cr8(vcpu);
2006 vcpu_put_rsp_rip(vcpu); 2138 vcpu_put_rsp_rip(vcpu);
2139 KVMTRACE_2D(CR_READ, vcpu, (u32)cr,
2140 (u32)vcpu->arch.regs[reg], handler);
2007 skip_emulated_instruction(vcpu); 2141 skip_emulated_instruction(vcpu);
2008 return 1; 2142 return 1;
2009 } 2143 }
2010 break; 2144 break;
2011 case 3: /* lmsw */ 2145 case 3: /* lmsw */
2012 lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f); 2146 kvm_lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f);
2013 2147
2014 skip_emulated_instruction(vcpu); 2148 skip_emulated_instruction(vcpu);
2015 return 1; 2149 return 1;
@@ -2049,6 +2183,7 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2049 val = 0; 2183 val = 0;
2050 } 2184 }
2051 vcpu->arch.regs[reg] = val; 2185 vcpu->arch.regs[reg] = val;
2186 KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
2052 } else { 2187 } else {
2053 /* mov to dr */ 2188 /* mov to dr */
2054 } 2189 }
@@ -2073,6 +2208,9 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2073 return 1; 2208 return 1;
2074 } 2209 }
2075 2210
2211 KVMTRACE_3D(MSR_READ, vcpu, ecx, (u32)data, (u32)(data >> 32),
2212 handler);
2213
2076 /* FIXME: handling of bits 32:63 of rax, rdx */ 2214 /* FIXME: handling of bits 32:63 of rax, rdx */
2077 vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u; 2215 vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u;
2078 vcpu->arch.regs[VCPU_REGS_RDX] = (data >> 32) & -1u; 2216 vcpu->arch.regs[VCPU_REGS_RDX] = (data >> 32) & -1u;
@@ -2086,6 +2224,9 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2086 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) 2224 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
2087 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); 2225 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
2088 2226
2227 KVMTRACE_3D(MSR_WRITE, vcpu, ecx, (u32)data, (u32)(data >> 32),
2228 handler);
2229
2089 if (vmx_set_msr(vcpu, ecx, data) != 0) { 2230 if (vmx_set_msr(vcpu, ecx, data) != 0) {
2090 kvm_inject_gp(vcpu, 0); 2231 kvm_inject_gp(vcpu, 0);
2091 return 1; 2232 return 1;
@@ -2110,6 +2251,9 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
2110 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 2251 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2111 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; 2252 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
2112 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 2253 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2254
2255 KVMTRACE_0D(PEND_INTR, vcpu, handler);
2256
2113 /* 2257 /*
2114 * If the user space waits to inject interrupts, exit as soon as 2258 * If the user space waits to inject interrupts, exit as soon as
2115 * possible 2259 * possible
@@ -2152,6 +2296,8 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2152 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); 2296 exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
2153 offset = exit_qualification & 0xffful; 2297 offset = exit_qualification & 0xffful;
2154 2298
2299 KVMTRACE_1D(APIC_ACCESS, vcpu, (u32)offset, handler);
2300
2155 er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); 2301 er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
2156 2302
2157 if (er != EMULATE_DONE) { 2303 if (er != EMULATE_DONE) {
@@ -2163,6 +2309,20 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2163 return 1; 2309 return 1;
2164} 2310}
2165 2311
2312static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2313{
2314 unsigned long exit_qualification;
2315 u16 tss_selector;
2316 int reason;
2317
2318 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
2319
2320 reason = (u32)exit_qualification >> 30;
2321 tss_selector = exit_qualification;
2322
2323 return kvm_task_switch(vcpu, tss_selector, reason);
2324}
2325
2166/* 2326/*
2167 * The exit handlers return 1 if the exit was handled fully and guest execution 2327 * The exit handlers return 1 if the exit was handled fully and guest execution
2168 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 2328 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -2185,6 +2345,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
2185 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, 2345 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
2186 [EXIT_REASON_APIC_ACCESS] = handle_apic_access, 2346 [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
2187 [EXIT_REASON_WBINVD] = handle_wbinvd, 2347 [EXIT_REASON_WBINVD] = handle_wbinvd,
2348 [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
2188}; 2349};
2189 2350
2190static const int kvm_vmx_max_exit_handlers = 2351static const int kvm_vmx_max_exit_handlers =
@@ -2200,6 +2361,9 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2200 struct vcpu_vmx *vmx = to_vmx(vcpu); 2361 struct vcpu_vmx *vmx = to_vmx(vcpu);
2201 u32 vectoring_info = vmx->idt_vectoring_info; 2362 u32 vectoring_info = vmx->idt_vectoring_info;
2202 2363
2364 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP),
2365 (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit);
2366
2203 if (unlikely(vmx->fail)) { 2367 if (unlikely(vmx->fail)) {
2204 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 2368 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
2205 kvm_run->fail_entry.hardware_entry_failure_reason 2369 kvm_run->fail_entry.hardware_entry_failure_reason
@@ -2210,7 +2374,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2210 if ((vectoring_info & VECTORING_INFO_VALID_MASK) && 2374 if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
2211 exit_reason != EXIT_REASON_EXCEPTION_NMI) 2375 exit_reason != EXIT_REASON_EXCEPTION_NMI)
2212 printk(KERN_WARNING "%s: unexpected, valid vectoring info and " 2376 printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
2213 "exit reason is 0x%x\n", __FUNCTION__, exit_reason); 2377 "exit reason is 0x%x\n", __func__, exit_reason);
2214 if (exit_reason < kvm_vmx_max_exit_handlers 2378 if (exit_reason < kvm_vmx_max_exit_handlers
2215 && kvm_vmx_exit_handlers[exit_reason]) 2379 && kvm_vmx_exit_handlers[exit_reason])
2216 return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); 2380 return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
@@ -2221,10 +2385,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2221 return 0; 2385 return 0;
2222} 2386}
2223 2387
2224static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
2225{
2226}
2227
2228static void update_tpr_threshold(struct kvm_vcpu *vcpu) 2388static void update_tpr_threshold(struct kvm_vcpu *vcpu)
2229{ 2389{
2230 int max_irr, tpr; 2390 int max_irr, tpr;
@@ -2285,11 +2445,13 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
2285 return; 2445 return;
2286 } 2446 }
2287 2447
2448 KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler);
2449
2288 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); 2450 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
2289 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 2451 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
2290 vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); 2452 vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
2291 2453
2292 if (unlikely(idtv_info_field & INTR_INFO_DELIEVER_CODE_MASK)) 2454 if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK))
2293 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, 2455 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
2294 vmcs_read32(IDT_VECTORING_ERROR_CODE)); 2456 vmcs_read32(IDT_VECTORING_ERROR_CODE));
2295 if (unlikely(has_ext_irq)) 2457 if (unlikely(has_ext_irq))
@@ -2470,8 +2632,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2470 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 2632 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
2471 2633
2472 /* We need to handle NMIs before interrupts are enabled */ 2634 /* We need to handle NMIs before interrupts are enabled */
2473 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */ 2635 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
2636 KVMTRACE_0D(NMI, vcpu, handler);
2474 asm("int $2"); 2637 asm("int $2");
2638 }
2475} 2639}
2476 2640
2477static void vmx_free_vmcs(struct kvm_vcpu *vcpu) 2641static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
@@ -2489,6 +2653,10 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
2489{ 2653{
2490 struct vcpu_vmx *vmx = to_vmx(vcpu); 2654 struct vcpu_vmx *vmx = to_vmx(vcpu);
2491 2655
2656 spin_lock(&vmx_vpid_lock);
2657 if (vmx->vpid != 0)
2658 __clear_bit(vmx->vpid, vmx_vpid_bitmap);
2659 spin_unlock(&vmx_vpid_lock);
2492 vmx_free_vmcs(vcpu); 2660 vmx_free_vmcs(vcpu);
2493 kfree(vmx->host_msrs); 2661 kfree(vmx->host_msrs);
2494 kfree(vmx->guest_msrs); 2662 kfree(vmx->guest_msrs);
@@ -2505,6 +2673,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
2505 if (!vmx) 2673 if (!vmx)
2506 return ERR_PTR(-ENOMEM); 2674 return ERR_PTR(-ENOMEM);
2507 2675
2676 allocate_vpid(vmx);
2677
2508 err = kvm_vcpu_init(&vmx->vcpu, kvm, id); 2678 err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
2509 if (err) 2679 if (err)
2510 goto free_vcpu; 2680 goto free_vcpu;
@@ -2591,14 +2761,13 @@ static struct kvm_x86_ops vmx_x86_ops = {
2591 .get_segment_base = vmx_get_segment_base, 2761 .get_segment_base = vmx_get_segment_base,
2592 .get_segment = vmx_get_segment, 2762 .get_segment = vmx_get_segment,
2593 .set_segment = vmx_set_segment, 2763 .set_segment = vmx_set_segment,
2764 .get_cpl = vmx_get_cpl,
2594 .get_cs_db_l_bits = vmx_get_cs_db_l_bits, 2765 .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
2595 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, 2766 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
2596 .set_cr0 = vmx_set_cr0, 2767 .set_cr0 = vmx_set_cr0,
2597 .set_cr3 = vmx_set_cr3, 2768 .set_cr3 = vmx_set_cr3,
2598 .set_cr4 = vmx_set_cr4, 2769 .set_cr4 = vmx_set_cr4,
2599#ifdef CONFIG_X86_64
2600 .set_efer = vmx_set_efer, 2770 .set_efer = vmx_set_efer,
2601#endif
2602 .get_idt = vmx_get_idt, 2771 .get_idt = vmx_get_idt,
2603 .set_idt = vmx_set_idt, 2772 .set_idt = vmx_set_idt,
2604 .get_gdt = vmx_get_gdt, 2773 .get_gdt = vmx_get_gdt,
@@ -2626,7 +2795,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
2626 2795
2627static int __init vmx_init(void) 2796static int __init vmx_init(void)
2628{ 2797{
2629 void *iova; 2798 void *va;
2630 int r; 2799 int r;
2631 2800
2632 vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); 2801 vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
@@ -2639,28 +2808,48 @@ static int __init vmx_init(void)
2639 goto out; 2808 goto out;
2640 } 2809 }
2641 2810
2811 vmx_msr_bitmap = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
2812 if (!vmx_msr_bitmap) {
2813 r = -ENOMEM;
2814 goto out1;
2815 }
2816
2642 /* 2817 /*
2643 * Allow direct access to the PC debug port (it is often used for I/O 2818 * Allow direct access to the PC debug port (it is often used for I/O
2644 * delays, but the vmexits simply slow things down). 2819 * delays, but the vmexits simply slow things down).
2645 */ 2820 */
2646 iova = kmap(vmx_io_bitmap_a); 2821 va = kmap(vmx_io_bitmap_a);
2647 memset(iova, 0xff, PAGE_SIZE); 2822 memset(va, 0xff, PAGE_SIZE);
2648 clear_bit(0x80, iova); 2823 clear_bit(0x80, va);
2649 kunmap(vmx_io_bitmap_a); 2824 kunmap(vmx_io_bitmap_a);
2650 2825
2651 iova = kmap(vmx_io_bitmap_b); 2826 va = kmap(vmx_io_bitmap_b);
2652 memset(iova, 0xff, PAGE_SIZE); 2827 memset(va, 0xff, PAGE_SIZE);
2653 kunmap(vmx_io_bitmap_b); 2828 kunmap(vmx_io_bitmap_b);
2654 2829
2830 va = kmap(vmx_msr_bitmap);
2831 memset(va, 0xff, PAGE_SIZE);
2832 kunmap(vmx_msr_bitmap);
2833
2834 set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
2835
2655 r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); 2836 r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE);
2656 if (r) 2837 if (r)
2657 goto out1; 2838 goto out2;
2839
2840 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_FS_BASE);
2841 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_GS_BASE);
2842 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_CS);
2843 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP);
2844 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP);
2658 2845
2659 if (bypass_guest_pf) 2846 if (bypass_guest_pf)
2660 kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); 2847 kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
2661 2848
2662 return 0; 2849 return 0;
2663 2850
2851out2:
2852 __free_page(vmx_msr_bitmap);
2664out1: 2853out1:
2665 __free_page(vmx_io_bitmap_b); 2854 __free_page(vmx_io_bitmap_b);
2666out: 2855out:
@@ -2670,6 +2859,7 @@ out:
2670 2859
2671static void __exit vmx_exit(void) 2860static void __exit vmx_exit(void)
2672{ 2861{
2862 __free_page(vmx_msr_bitmap);
2673 __free_page(vmx_io_bitmap_b); 2863 __free_page(vmx_io_bitmap_b);
2674 __free_page(vmx_io_bitmap_a); 2864 __free_page(vmx_io_bitmap_a);
2675 2865
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index d52ae8d7303d..5dff4606b988 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -49,6 +49,7 @@
49 * Definitions of Secondary Processor-Based VM-Execution Controls. 49 * Definitions of Secondary Processor-Based VM-Execution Controls.
50 */ 50 */
51#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 51#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
52#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
52#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 53#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
53 54
54 55
@@ -65,6 +66,7 @@
65 66
66/* VMCS Encodings */ 67/* VMCS Encodings */
67enum vmcs_field { 68enum vmcs_field {
69 VIRTUAL_PROCESSOR_ID = 0x00000000,
68 GUEST_ES_SELECTOR = 0x00000800, 70 GUEST_ES_SELECTOR = 0x00000800,
69 GUEST_CS_SELECTOR = 0x00000802, 71 GUEST_CS_SELECTOR = 0x00000802,
70 GUEST_SS_SELECTOR = 0x00000804, 72 GUEST_SS_SELECTOR = 0x00000804,
@@ -231,12 +233,12 @@ enum vmcs_field {
231 */ 233 */
232#define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */ 234#define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */
233#define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */ 235#define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */
234#define INTR_INFO_DELIEVER_CODE_MASK 0x800 /* 11 */ 236#define INTR_INFO_DELIVER_CODE_MASK 0x800 /* 11 */
235#define INTR_INFO_VALID_MASK 0x80000000 /* 31 */ 237#define INTR_INFO_VALID_MASK 0x80000000 /* 31 */
236 238
237#define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK 239#define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK
238#define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK 240#define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK
239#define VECTORING_INFO_DELIEVER_CODE_MASK INTR_INFO_DELIEVER_CODE_MASK 241#define VECTORING_INFO_DELIVER_CODE_MASK INTR_INFO_DELIVER_CODE_MASK
240#define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK 242#define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK
241 243
242#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ 244#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
@@ -321,4 +323,8 @@ enum vmcs_field {
321 323
322#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 324#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9
323 325
326#define VMX_NR_VPIDS (1 << 16)
327#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
328#define VMX_VPID_EXTENT_ALL_CONTEXT 2
329
324#endif 330#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6b01552bd1f1..0ce556372a4d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -15,10 +15,12 @@
15 */ 15 */
16 16
17#include <linux/kvm_host.h> 17#include <linux/kvm_host.h>
18#include "segment_descriptor.h"
19#include "irq.h" 18#include "irq.h"
20#include "mmu.h" 19#include "mmu.h"
20#include "i8254.h"
21#include "tss.h"
21 22
23#include <linux/clocksource.h>
22#include <linux/kvm.h> 24#include <linux/kvm.h>
23#include <linux/fs.h> 25#include <linux/fs.h>
24#include <linux/vmalloc.h> 26#include <linux/vmalloc.h>
@@ -28,6 +30,7 @@
28 30
29#include <asm/uaccess.h> 31#include <asm/uaccess.h>
30#include <asm/msr.h> 32#include <asm/msr.h>
33#include <asm/desc.h>
31 34
32#define MAX_IO_MSRS 256 35#define MAX_IO_MSRS 256
33#define CR0_RESERVED_BITS \ 36#define CR0_RESERVED_BITS \
@@ -41,7 +44,15 @@
41 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) 44 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
42 45
43#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) 46#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
44#define EFER_RESERVED_BITS 0xfffffffffffff2fe 47/* EFER defaults:
48 * - enable syscall per default because its emulated by KVM
49 * - enable LME and LMA per default on 64 bit KVM
50 */
51#ifdef CONFIG_X86_64
52static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffafeULL;
53#else
54static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
55#endif
45 56
46#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM 57#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
47#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 58#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
@@ -63,6 +74,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
63 { "irq_window", VCPU_STAT(irq_window_exits) }, 74 { "irq_window", VCPU_STAT(irq_window_exits) },
64 { "halt_exits", VCPU_STAT(halt_exits) }, 75 { "halt_exits", VCPU_STAT(halt_exits) },
65 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 76 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
77 { "hypercalls", VCPU_STAT(hypercalls) },
66 { "request_irq", VCPU_STAT(request_irq_exits) }, 78 { "request_irq", VCPU_STAT(request_irq_exits) },
67 { "irq_exits", VCPU_STAT(irq_exits) }, 79 { "irq_exits", VCPU_STAT(irq_exits) },
68 { "host_state_reload", VCPU_STAT(host_state_reload) }, 80 { "host_state_reload", VCPU_STAT(host_state_reload) },
@@ -78,6 +90,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
78 { "mmu_recycled", VM_STAT(mmu_recycled) }, 90 { "mmu_recycled", VM_STAT(mmu_recycled) },
79 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, 91 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
80 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, 92 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
93 { "largepages", VM_STAT(lpages) },
81 { NULL } 94 { NULL }
82}; 95};
83 96
@@ -85,7 +98,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
85unsigned long segment_base(u16 selector) 98unsigned long segment_base(u16 selector)
86{ 99{
87 struct descriptor_table gdt; 100 struct descriptor_table gdt;
88 struct segment_descriptor *d; 101 struct desc_struct *d;
89 unsigned long table_base; 102 unsigned long table_base;
90 unsigned long v; 103 unsigned long v;
91 104
@@ -101,13 +114,12 @@ unsigned long segment_base(u16 selector)
101 asm("sldt %0" : "=g"(ldt_selector)); 114 asm("sldt %0" : "=g"(ldt_selector));
102 table_base = segment_base(ldt_selector); 115 table_base = segment_base(ldt_selector);
103 } 116 }
104 d = (struct segment_descriptor *)(table_base + (selector & ~7)); 117 d = (struct desc_struct *)(table_base + (selector & ~7));
105 v = d->base_low | ((unsigned long)d->base_mid << 16) | 118 v = d->base0 | ((unsigned long)d->base1 << 16) |
106 ((unsigned long)d->base_high << 24); 119 ((unsigned long)d->base2 << 24);
107#ifdef CONFIG_X86_64 120#ifdef CONFIG_X86_64
108 if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) 121 if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
109 v |= ((unsigned long) \ 122 v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
110 ((struct segment_descriptor_64 *)d)->base_higher) << 32;
111#endif 123#endif
112 return v; 124 return v;
113} 125}
@@ -145,11 +157,16 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
145 u32 error_code) 157 u32 error_code)
146{ 158{
147 ++vcpu->stat.pf_guest; 159 ++vcpu->stat.pf_guest;
148 if (vcpu->arch.exception.pending && vcpu->arch.exception.nr == PF_VECTOR) { 160 if (vcpu->arch.exception.pending) {
149 printk(KERN_DEBUG "kvm: inject_page_fault:" 161 if (vcpu->arch.exception.nr == PF_VECTOR) {
150 " double fault 0x%lx\n", addr); 162 printk(KERN_DEBUG "kvm: inject_page_fault:"
151 vcpu->arch.exception.nr = DF_VECTOR; 163 " double fault 0x%lx\n", addr);
152 vcpu->arch.exception.error_code = 0; 164 vcpu->arch.exception.nr = DF_VECTOR;
165 vcpu->arch.exception.error_code = 0;
166 } else if (vcpu->arch.exception.nr == DF_VECTOR) {
167 /* triple fault -> shutdown */
168 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
169 }
153 return; 170 return;
154 } 171 }
155 vcpu->arch.cr2 = addr; 172 vcpu->arch.cr2 = addr;
@@ -184,7 +201,6 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
184 int ret; 201 int ret;
185 u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; 202 u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
186 203
187 down_read(&vcpu->kvm->slots_lock);
188 ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte, 204 ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
189 offset * sizeof(u64), sizeof(pdpte)); 205 offset * sizeof(u64), sizeof(pdpte));
190 if (ret < 0) { 206 if (ret < 0) {
@@ -201,10 +217,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
201 217
202 memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); 218 memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
203out: 219out:
204 up_read(&vcpu->kvm->slots_lock);
205 220
206 return ret; 221 return ret;
207} 222}
223EXPORT_SYMBOL_GPL(load_pdptrs);
208 224
209static bool pdptrs_changed(struct kvm_vcpu *vcpu) 225static bool pdptrs_changed(struct kvm_vcpu *vcpu)
210{ 226{
@@ -215,18 +231,16 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
215 if (is_long_mode(vcpu) || !is_pae(vcpu)) 231 if (is_long_mode(vcpu) || !is_pae(vcpu))
216 return false; 232 return false;
217 233
218 down_read(&vcpu->kvm->slots_lock);
219 r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte)); 234 r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
220 if (r < 0) 235 if (r < 0)
221 goto out; 236 goto out;
222 changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0; 237 changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0;
223out: 238out:
224 up_read(&vcpu->kvm->slots_lock);
225 239
226 return changed; 240 return changed;
227} 241}
228 242
229void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 243void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
230{ 244{
231 if (cr0 & CR0_RESERVED_BITS) { 245 if (cr0 & CR0_RESERVED_BITS) {
232 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", 246 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
@@ -284,15 +298,18 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
284 kvm_mmu_reset_context(vcpu); 298 kvm_mmu_reset_context(vcpu);
285 return; 299 return;
286} 300}
287EXPORT_SYMBOL_GPL(set_cr0); 301EXPORT_SYMBOL_GPL(kvm_set_cr0);
288 302
289void lmsw(struct kvm_vcpu *vcpu, unsigned long msw) 303void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
290{ 304{
291 set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)); 305 kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
306 KVMTRACE_1D(LMSW, vcpu,
307 (u32)((vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)),
308 handler);
292} 309}
293EXPORT_SYMBOL_GPL(lmsw); 310EXPORT_SYMBOL_GPL(kvm_lmsw);
294 311
295void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 312void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
296{ 313{
297 if (cr4 & CR4_RESERVED_BITS) { 314 if (cr4 & CR4_RESERVED_BITS) {
298 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); 315 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
@@ -323,9 +340,9 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
323 vcpu->arch.cr4 = cr4; 340 vcpu->arch.cr4 = cr4;
324 kvm_mmu_reset_context(vcpu); 341 kvm_mmu_reset_context(vcpu);
325} 342}
326EXPORT_SYMBOL_GPL(set_cr4); 343EXPORT_SYMBOL_GPL(kvm_set_cr4);
327 344
328void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 345void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
329{ 346{
330 if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { 347 if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) {
331 kvm_mmu_flush_tlb(vcpu); 348 kvm_mmu_flush_tlb(vcpu);
@@ -359,7 +376,6 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
359 */ 376 */
360 } 377 }
361 378
362 down_read(&vcpu->kvm->slots_lock);
363 /* 379 /*
364 * Does the new cr3 value map to physical memory? (Note, we 380 * Does the new cr3 value map to physical memory? (Note, we
365 * catch an invalid cr3 even in real-mode, because it would 381 * catch an invalid cr3 even in real-mode, because it would
@@ -375,11 +391,10 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
375 vcpu->arch.cr3 = cr3; 391 vcpu->arch.cr3 = cr3;
376 vcpu->arch.mmu.new_cr3(vcpu); 392 vcpu->arch.mmu.new_cr3(vcpu);
377 } 393 }
378 up_read(&vcpu->kvm->slots_lock);
379} 394}
380EXPORT_SYMBOL_GPL(set_cr3); 395EXPORT_SYMBOL_GPL(kvm_set_cr3);
381 396
382void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) 397void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
383{ 398{
384 if (cr8 & CR8_RESERVED_BITS) { 399 if (cr8 & CR8_RESERVED_BITS) {
385 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); 400 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
@@ -391,16 +406,16 @@ void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
391 else 406 else
392 vcpu->arch.cr8 = cr8; 407 vcpu->arch.cr8 = cr8;
393} 408}
394EXPORT_SYMBOL_GPL(set_cr8); 409EXPORT_SYMBOL_GPL(kvm_set_cr8);
395 410
396unsigned long get_cr8(struct kvm_vcpu *vcpu) 411unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
397{ 412{
398 if (irqchip_in_kernel(vcpu->kvm)) 413 if (irqchip_in_kernel(vcpu->kvm))
399 return kvm_lapic_get_cr8(vcpu); 414 return kvm_lapic_get_cr8(vcpu);
400 else 415 else
401 return vcpu->arch.cr8; 416 return vcpu->arch.cr8;
402} 417}
403EXPORT_SYMBOL_GPL(get_cr8); 418EXPORT_SYMBOL_GPL(kvm_get_cr8);
404 419
405/* 420/*
406 * List of msr numbers which we expose to userspace through KVM_GET_MSRS 421 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
@@ -415,7 +430,8 @@ static u32 msrs_to_save[] = {
415#ifdef CONFIG_X86_64 430#ifdef CONFIG_X86_64
416 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, 431 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
417#endif 432#endif
418 MSR_IA32_TIME_STAMP_COUNTER, 433 MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
434 MSR_IA32_PERF_STATUS,
419}; 435};
420 436
421static unsigned num_msrs_to_save; 437static unsigned num_msrs_to_save;
@@ -424,11 +440,9 @@ static u32 emulated_msrs[] = {
424 MSR_IA32_MISC_ENABLE, 440 MSR_IA32_MISC_ENABLE,
425}; 441};
426 442
427#ifdef CONFIG_X86_64
428
429static void set_efer(struct kvm_vcpu *vcpu, u64 efer) 443static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
430{ 444{
431 if (efer & EFER_RESERVED_BITS) { 445 if (efer & efer_reserved_bits) {
432 printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n", 446 printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
433 efer); 447 efer);
434 kvm_inject_gp(vcpu, 0); 448 kvm_inject_gp(vcpu, 0);
@@ -450,7 +464,12 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
450 vcpu->arch.shadow_efer = efer; 464 vcpu->arch.shadow_efer = efer;
451} 465}
452 466
453#endif 467void kvm_enable_efer_bits(u64 mask)
468{
469 efer_reserved_bits &= ~mask;
470}
471EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
472
454 473
455/* 474/*
456 * Writes msr value into into the appropriate "register". 475 * Writes msr value into into the appropriate "register".
@@ -470,26 +489,86 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
470 return kvm_set_msr(vcpu, index, *data); 489 return kvm_set_msr(vcpu, index, *data);
471} 490}
472 491
492static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
493{
494 static int version;
495 struct kvm_wall_clock wc;
496 struct timespec wc_ts;
497
498 if (!wall_clock)
499 return;
500
501 version++;
502
503 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
504
505 wc_ts = current_kernel_time();
506 wc.wc_sec = wc_ts.tv_sec;
507 wc.wc_nsec = wc_ts.tv_nsec;
508 wc.wc_version = version;
509
510 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
511
512 version++;
513 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
514}
515
516static void kvm_write_guest_time(struct kvm_vcpu *v)
517{
518 struct timespec ts;
519 unsigned long flags;
520 struct kvm_vcpu_arch *vcpu = &v->arch;
521 void *shared_kaddr;
522
523 if ((!vcpu->time_page))
524 return;
525
526 /* Keep irq disabled to prevent changes to the clock */
527 local_irq_save(flags);
528 kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
529 &vcpu->hv_clock.tsc_timestamp);
530 ktime_get_ts(&ts);
531 local_irq_restore(flags);
532
533 /* With all the info we got, fill in the values */
534
535 vcpu->hv_clock.system_time = ts.tv_nsec +
536 (NSEC_PER_SEC * (u64)ts.tv_sec);
537 /*
538 * The interface expects us to write an even number signaling that the
539 * update is finished. Since the guest won't see the intermediate
540 * state, we just write "2" at the end
541 */
542 vcpu->hv_clock.version = 2;
543
544 shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
545
546 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
547 sizeof(vcpu->hv_clock));
548
549 kunmap_atomic(shared_kaddr, KM_USER0);
550
551 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
552}
553
473 554
474int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 555int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
475{ 556{
476 switch (msr) { 557 switch (msr) {
477#ifdef CONFIG_X86_64
478 case MSR_EFER: 558 case MSR_EFER:
479 set_efer(vcpu, data); 559 set_efer(vcpu, data);
480 break; 560 break;
481#endif
482 case MSR_IA32_MC0_STATUS: 561 case MSR_IA32_MC0_STATUS:
483 pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", 562 pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
484 __FUNCTION__, data); 563 __func__, data);
485 break; 564 break;
486 case MSR_IA32_MCG_STATUS: 565 case MSR_IA32_MCG_STATUS:
487 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", 566 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
488 __FUNCTION__, data); 567 __func__, data);
489 break; 568 break;
490 case MSR_IA32_MCG_CTL: 569 case MSR_IA32_MCG_CTL:
491 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n", 570 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
492 __FUNCTION__, data); 571 __func__, data);
493 break; 572 break;
494 case MSR_IA32_UCODE_REV: 573 case MSR_IA32_UCODE_REV:
495 case MSR_IA32_UCODE_WRITE: 574 case MSR_IA32_UCODE_WRITE:
@@ -501,6 +580,42 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
501 case MSR_IA32_MISC_ENABLE: 580 case MSR_IA32_MISC_ENABLE:
502 vcpu->arch.ia32_misc_enable_msr = data; 581 vcpu->arch.ia32_misc_enable_msr = data;
503 break; 582 break;
583 case MSR_KVM_WALL_CLOCK:
584 vcpu->kvm->arch.wall_clock = data;
585 kvm_write_wall_clock(vcpu->kvm, data);
586 break;
587 case MSR_KVM_SYSTEM_TIME: {
588 if (vcpu->arch.time_page) {
589 kvm_release_page_dirty(vcpu->arch.time_page);
590 vcpu->arch.time_page = NULL;
591 }
592
593 vcpu->arch.time = data;
594
595 /* we verify if the enable bit is set... */
596 if (!(data & 1))
597 break;
598
599 /* ...but clean it before doing the actual write */
600 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
601
602 vcpu->arch.hv_clock.tsc_to_system_mul =
603 clocksource_khz2mult(tsc_khz, 22);
604 vcpu->arch.hv_clock.tsc_shift = 22;
605
606 down_read(&current->mm->mmap_sem);
607 vcpu->arch.time_page =
608 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
609 up_read(&current->mm->mmap_sem);
610
611 if (is_error_page(vcpu->arch.time_page)) {
612 kvm_release_page_clean(vcpu->arch.time_page);
613 vcpu->arch.time_page = NULL;
614 }
615
616 kvm_write_guest_time(vcpu);
617 break;
618 }
504 default: 619 default:
505 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); 620 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data);
506 return 1; 621 return 1;
@@ -540,7 +655,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
540 case MSR_IA32_MC0_MISC+12: 655 case MSR_IA32_MC0_MISC+12:
541 case MSR_IA32_MC0_MISC+16: 656 case MSR_IA32_MC0_MISC+16:
542 case MSR_IA32_UCODE_REV: 657 case MSR_IA32_UCODE_REV:
543 case MSR_IA32_PERF_STATUS:
544 case MSR_IA32_EBL_CR_POWERON: 658 case MSR_IA32_EBL_CR_POWERON:
545 /* MTRR registers */ 659 /* MTRR registers */
546 case 0xfe: 660 case 0xfe:
@@ -556,11 +670,21 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
556 case MSR_IA32_MISC_ENABLE: 670 case MSR_IA32_MISC_ENABLE:
557 data = vcpu->arch.ia32_misc_enable_msr; 671 data = vcpu->arch.ia32_misc_enable_msr;
558 break; 672 break;
559#ifdef CONFIG_X86_64 673 case MSR_IA32_PERF_STATUS:
674 /* TSC increment by tick */
675 data = 1000ULL;
676 /* CPU multiplier */
677 data |= (((uint64_t)4ULL) << 40);
678 break;
560 case MSR_EFER: 679 case MSR_EFER:
561 data = vcpu->arch.shadow_efer; 680 data = vcpu->arch.shadow_efer;
562 break; 681 break;
563#endif 682 case MSR_KVM_WALL_CLOCK:
683 data = vcpu->kvm->arch.wall_clock;
684 break;
685 case MSR_KVM_SYSTEM_TIME:
686 data = vcpu->arch.time;
687 break;
564 default: 688 default:
565 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); 689 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
566 return 1; 690 return 1;
@@ -584,9 +708,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
584 708
585 vcpu_load(vcpu); 709 vcpu_load(vcpu);
586 710
711 down_read(&vcpu->kvm->slots_lock);
587 for (i = 0; i < msrs->nmsrs; ++i) 712 for (i = 0; i < msrs->nmsrs; ++i)
588 if (do_msr(vcpu, entries[i].index, &entries[i].data)) 713 if (do_msr(vcpu, entries[i].index, &entries[i].data))
589 break; 714 break;
715 up_read(&vcpu->kvm->slots_lock);
590 716
591 vcpu_put(vcpu); 717 vcpu_put(vcpu);
592 718
@@ -688,11 +814,24 @@ int kvm_dev_ioctl_check_extension(long ext)
688 case KVM_CAP_USER_MEMORY: 814 case KVM_CAP_USER_MEMORY:
689 case KVM_CAP_SET_TSS_ADDR: 815 case KVM_CAP_SET_TSS_ADDR:
690 case KVM_CAP_EXT_CPUID: 816 case KVM_CAP_EXT_CPUID:
817 case KVM_CAP_CLOCKSOURCE:
818 case KVM_CAP_PIT:
819 case KVM_CAP_NOP_IO_DELAY:
820 case KVM_CAP_MP_STATE:
691 r = 1; 821 r = 1;
692 break; 822 break;
693 case KVM_CAP_VAPIC: 823 case KVM_CAP_VAPIC:
694 r = !kvm_x86_ops->cpu_has_accelerated_tpr(); 824 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
695 break; 825 break;
826 case KVM_CAP_NR_VCPUS:
827 r = KVM_MAX_VCPUS;
828 break;
829 case KVM_CAP_NR_MEMSLOTS:
830 r = KVM_MEMORY_SLOTS;
831 break;
832 case KVM_CAP_PV_MMU:
833 r = !tdp_enabled;
834 break;
696 default: 835 default:
697 r = 0; 836 r = 0;
698 break; 837 break;
@@ -763,6 +902,7 @@ out:
763void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 902void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
764{ 903{
765 kvm_x86_ops->vcpu_load(vcpu, cpu); 904 kvm_x86_ops->vcpu_load(vcpu, cpu);
905 kvm_write_guest_time(vcpu);
766} 906}
767 907
768void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 908void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -958,32 +1098,32 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
958 } 1098 }
959 /* function 4 and 0xb have additional index. */ 1099 /* function 4 and 0xb have additional index. */
960 case 4: { 1100 case 4: {
961 int index, cache_type; 1101 int i, cache_type;
962 1102
963 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1103 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
964 /* read more entries until cache_type is zero */ 1104 /* read more entries until cache_type is zero */
965 for (index = 1; *nent < maxnent; ++index) { 1105 for (i = 1; *nent < maxnent; ++i) {
966 cache_type = entry[index - 1].eax & 0x1f; 1106 cache_type = entry[i - 1].eax & 0x1f;
967 if (!cache_type) 1107 if (!cache_type)
968 break; 1108 break;
969 do_cpuid_1_ent(&entry[index], function, index); 1109 do_cpuid_1_ent(&entry[i], function, i);
970 entry[index].flags |= 1110 entry[i].flags |=
971 KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1111 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
972 ++*nent; 1112 ++*nent;
973 } 1113 }
974 break; 1114 break;
975 } 1115 }
976 case 0xb: { 1116 case 0xb: {
977 int index, level_type; 1117 int i, level_type;
978 1118
979 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1119 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
980 /* read more entries until level_type is zero */ 1120 /* read more entries until level_type is zero */
981 for (index = 1; *nent < maxnent; ++index) { 1121 for (i = 1; *nent < maxnent; ++i) {
982 level_type = entry[index - 1].ecx & 0xff; 1122 level_type = entry[i - 1].ecx & 0xff;
983 if (!level_type) 1123 if (!level_type)
984 break; 1124 break;
985 do_cpuid_1_ent(&entry[index], function, index); 1125 do_cpuid_1_ent(&entry[i], function, i);
986 entry[index].flags |= 1126 entry[i].flags |=
987 KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1127 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
988 ++*nent; 1128 ++*nent;
989 } 1129 }
@@ -1365,6 +1505,23 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
1365 return r; 1505 return r;
1366} 1506}
1367 1507
1508static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
1509{
1510 int r = 0;
1511
1512 memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
1513 return r;
1514}
1515
1516static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
1517{
1518 int r = 0;
1519
1520 memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
1521 kvm_pit_load_count(kvm, 0, ps->channels[0].count);
1522 return r;
1523}
1524
1368/* 1525/*
1369 * Get (and clear) the dirty memory log for a memory slot. 1526 * Get (and clear) the dirty memory log for a memory slot.
1370 */ 1527 */
@@ -1457,6 +1614,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
1457 } else 1614 } else
1458 goto out; 1615 goto out;
1459 break; 1616 break;
1617 case KVM_CREATE_PIT:
1618 r = -ENOMEM;
1619 kvm->arch.vpit = kvm_create_pit(kvm);
1620 if (kvm->arch.vpit)
1621 r = 0;
1622 break;
1460 case KVM_IRQ_LINE: { 1623 case KVM_IRQ_LINE: {
1461 struct kvm_irq_level irq_event; 1624 struct kvm_irq_level irq_event;
1462 1625
@@ -1512,6 +1675,37 @@ long kvm_arch_vm_ioctl(struct file *filp,
1512 r = 0; 1675 r = 0;
1513 break; 1676 break;
1514 } 1677 }
1678 case KVM_GET_PIT: {
1679 struct kvm_pit_state ps;
1680 r = -EFAULT;
1681 if (copy_from_user(&ps, argp, sizeof ps))
1682 goto out;
1683 r = -ENXIO;
1684 if (!kvm->arch.vpit)
1685 goto out;
1686 r = kvm_vm_ioctl_get_pit(kvm, &ps);
1687 if (r)
1688 goto out;
1689 r = -EFAULT;
1690 if (copy_to_user(argp, &ps, sizeof ps))
1691 goto out;
1692 r = 0;
1693 break;
1694 }
1695 case KVM_SET_PIT: {
1696 struct kvm_pit_state ps;
1697 r = -EFAULT;
1698 if (copy_from_user(&ps, argp, sizeof ps))
1699 goto out;
1700 r = -ENXIO;
1701 if (!kvm->arch.vpit)
1702 goto out;
1703 r = kvm_vm_ioctl_set_pit(kvm, &ps);
1704 if (r)
1705 goto out;
1706 r = 0;
1707 break;
1708 }
1515 default: 1709 default:
1516 ; 1710 ;
1517 } 1711 }
@@ -1570,7 +1764,6 @@ int emulator_read_std(unsigned long addr,
1570 void *data = val; 1764 void *data = val;
1571 int r = X86EMUL_CONTINUE; 1765 int r = X86EMUL_CONTINUE;
1572 1766
1573 down_read(&vcpu->kvm->slots_lock);
1574 while (bytes) { 1767 while (bytes) {
1575 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 1768 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
1576 unsigned offset = addr & (PAGE_SIZE-1); 1769 unsigned offset = addr & (PAGE_SIZE-1);
@@ -1592,7 +1785,6 @@ int emulator_read_std(unsigned long addr,
1592 addr += tocopy; 1785 addr += tocopy;
1593 } 1786 }
1594out: 1787out:
1595 up_read(&vcpu->kvm->slots_lock);
1596 return r; 1788 return r;
1597} 1789}
1598EXPORT_SYMBOL_GPL(emulator_read_std); 1790EXPORT_SYMBOL_GPL(emulator_read_std);
@@ -1611,9 +1803,7 @@ static int emulator_read_emulated(unsigned long addr,
1611 return X86EMUL_CONTINUE; 1803 return X86EMUL_CONTINUE;
1612 } 1804 }
1613 1805
1614 down_read(&vcpu->kvm->slots_lock);
1615 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 1806 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
1616 up_read(&vcpu->kvm->slots_lock);
1617 1807
1618 /* For APIC access vmexit */ 1808 /* For APIC access vmexit */
1619 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 1809 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -1646,19 +1836,15 @@ mmio:
1646 return X86EMUL_UNHANDLEABLE; 1836 return X86EMUL_UNHANDLEABLE;
1647} 1837}
1648 1838
1649static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, 1839int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
1650 const void *val, int bytes) 1840 const void *val, int bytes)
1651{ 1841{
1652 int ret; 1842 int ret;
1653 1843
1654 down_read(&vcpu->kvm->slots_lock);
1655 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); 1844 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
1656 if (ret < 0) { 1845 if (ret < 0)
1657 up_read(&vcpu->kvm->slots_lock);
1658 return 0; 1846 return 0;
1659 }
1660 kvm_mmu_pte_write(vcpu, gpa, val, bytes); 1847 kvm_mmu_pte_write(vcpu, gpa, val, bytes);
1661 up_read(&vcpu->kvm->slots_lock);
1662 return 1; 1848 return 1;
1663} 1849}
1664 1850
@@ -1670,9 +1856,7 @@ static int emulator_write_emulated_onepage(unsigned long addr,
1670 struct kvm_io_device *mmio_dev; 1856 struct kvm_io_device *mmio_dev;
1671 gpa_t gpa; 1857 gpa_t gpa;
1672 1858
1673 down_read(&vcpu->kvm->slots_lock);
1674 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 1859 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
1675 up_read(&vcpu->kvm->slots_lock);
1676 1860
1677 if (gpa == UNMAPPED_GVA) { 1861 if (gpa == UNMAPPED_GVA) {
1678 kvm_inject_page_fault(vcpu, addr, 2); 1862 kvm_inject_page_fault(vcpu, addr, 2);
@@ -1749,7 +1933,6 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
1749 char *kaddr; 1933 char *kaddr;
1750 u64 val; 1934 u64 val;
1751 1935
1752 down_read(&vcpu->kvm->slots_lock);
1753 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 1936 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
1754 1937
1755 if (gpa == UNMAPPED_GVA || 1938 if (gpa == UNMAPPED_GVA ||
@@ -1769,9 +1952,8 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
1769 set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); 1952 set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
1770 kunmap_atomic(kaddr, KM_USER0); 1953 kunmap_atomic(kaddr, KM_USER0);
1771 kvm_release_page_dirty(page); 1954 kvm_release_page_dirty(page);
1772 emul_write:
1773 up_read(&vcpu->kvm->slots_lock);
1774 } 1955 }
1956emul_write:
1775#endif 1957#endif
1776 1958
1777 return emulator_write_emulated(addr, new, bytes, vcpu); 1959 return emulator_write_emulated(addr, new, bytes, vcpu);
@@ -1802,7 +1984,7 @@ int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
1802 *dest = kvm_x86_ops->get_dr(vcpu, dr); 1984 *dest = kvm_x86_ops->get_dr(vcpu, dr);
1803 return X86EMUL_CONTINUE; 1985 return X86EMUL_CONTINUE;
1804 default: 1986 default:
1805 pr_unimpl(vcpu, "%s: unexpected dr %u\n", __FUNCTION__, dr); 1987 pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr);
1806 return X86EMUL_UNHANDLEABLE; 1988 return X86EMUL_UNHANDLEABLE;
1807 } 1989 }
1808} 1990}
@@ -1840,7 +2022,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
1840} 2022}
1841EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); 2023EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
1842 2024
1843struct x86_emulate_ops emulate_ops = { 2025static struct x86_emulate_ops emulate_ops = {
1844 .read_std = emulator_read_std, 2026 .read_std = emulator_read_std,
1845 .read_emulated = emulator_read_emulated, 2027 .read_emulated = emulator_read_emulated,
1846 .write_emulated = emulator_write_emulated, 2028 .write_emulated = emulator_write_emulated,
@@ -2091,6 +2273,13 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2091 vcpu->arch.pio.guest_page_offset = 0; 2273 vcpu->arch.pio.guest_page_offset = 0;
2092 vcpu->arch.pio.rep = 0; 2274 vcpu->arch.pio.rep = 0;
2093 2275
2276 if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
2277 KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
2278 handler);
2279 else
2280 KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
2281 handler);
2282
2094 kvm_x86_ops->cache_regs(vcpu); 2283 kvm_x86_ops->cache_regs(vcpu);
2095 memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4); 2284 memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4);
2096 kvm_x86_ops->decache_regs(vcpu); 2285 kvm_x86_ops->decache_regs(vcpu);
@@ -2129,6 +2318,13 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2129 vcpu->arch.pio.guest_page_offset = offset_in_page(address); 2318 vcpu->arch.pio.guest_page_offset = offset_in_page(address);
2130 vcpu->arch.pio.rep = rep; 2319 vcpu->arch.pio.rep = rep;
2131 2320
2321 if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
2322 KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
2323 handler);
2324 else
2325 KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
2326 handler);
2327
2132 if (!count) { 2328 if (!count) {
2133 kvm_x86_ops->skip_emulated_instruction(vcpu); 2329 kvm_x86_ops->skip_emulated_instruction(vcpu);
2134 return 1; 2330 return 1;
@@ -2163,10 +2359,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2163 kvm_x86_ops->skip_emulated_instruction(vcpu); 2359 kvm_x86_ops->skip_emulated_instruction(vcpu);
2164 2360
2165 for (i = 0; i < nr_pages; ++i) { 2361 for (i = 0; i < nr_pages; ++i) {
2166 down_read(&vcpu->kvm->slots_lock);
2167 page = gva_to_page(vcpu, address + i * PAGE_SIZE); 2362 page = gva_to_page(vcpu, address + i * PAGE_SIZE);
2168 vcpu->arch.pio.guest_pages[i] = page; 2363 vcpu->arch.pio.guest_pages[i] = page;
2169 up_read(&vcpu->kvm->slots_lock);
2170 if (!page) { 2364 if (!page) {
2171 kvm_inject_gp(vcpu, 0); 2365 kvm_inject_gp(vcpu, 0);
2172 free_pio_guest_pages(vcpu); 2366 free_pio_guest_pages(vcpu);
@@ -2238,10 +2432,13 @@ void kvm_arch_exit(void)
2238int kvm_emulate_halt(struct kvm_vcpu *vcpu) 2432int kvm_emulate_halt(struct kvm_vcpu *vcpu)
2239{ 2433{
2240 ++vcpu->stat.halt_exits; 2434 ++vcpu->stat.halt_exits;
2435 KVMTRACE_0D(HLT, vcpu, handler);
2241 if (irqchip_in_kernel(vcpu->kvm)) { 2436 if (irqchip_in_kernel(vcpu->kvm)) {
2242 vcpu->arch.mp_state = VCPU_MP_STATE_HALTED; 2437 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
2438 up_read(&vcpu->kvm->slots_lock);
2243 kvm_vcpu_block(vcpu); 2439 kvm_vcpu_block(vcpu);
2244 if (vcpu->arch.mp_state != VCPU_MP_STATE_RUNNABLE) 2440 down_read(&vcpu->kvm->slots_lock);
2441 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
2245 return -EINTR; 2442 return -EINTR;
2246 return 1; 2443 return 1;
2247 } else { 2444 } else {
@@ -2251,9 +2448,19 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
2251} 2448}
2252EXPORT_SYMBOL_GPL(kvm_emulate_halt); 2449EXPORT_SYMBOL_GPL(kvm_emulate_halt);
2253 2450
2451static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
2452 unsigned long a1)
2453{
2454 if (is_long_mode(vcpu))
2455 return a0;
2456 else
2457 return a0 | ((gpa_t)a1 << 32);
2458}
2459
2254int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) 2460int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2255{ 2461{
2256 unsigned long nr, a0, a1, a2, a3, ret; 2462 unsigned long nr, a0, a1, a2, a3, ret;
2463 int r = 1;
2257 2464
2258 kvm_x86_ops->cache_regs(vcpu); 2465 kvm_x86_ops->cache_regs(vcpu);
2259 2466
@@ -2263,6 +2470,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2263 a2 = vcpu->arch.regs[VCPU_REGS_RDX]; 2470 a2 = vcpu->arch.regs[VCPU_REGS_RDX];
2264 a3 = vcpu->arch.regs[VCPU_REGS_RSI]; 2471 a3 = vcpu->arch.regs[VCPU_REGS_RSI];
2265 2472
2473 KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler);
2474
2266 if (!is_long_mode(vcpu)) { 2475 if (!is_long_mode(vcpu)) {
2267 nr &= 0xFFFFFFFF; 2476 nr &= 0xFFFFFFFF;
2268 a0 &= 0xFFFFFFFF; 2477 a0 &= 0xFFFFFFFF;
@@ -2275,13 +2484,17 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2275 case KVM_HC_VAPIC_POLL_IRQ: 2484 case KVM_HC_VAPIC_POLL_IRQ:
2276 ret = 0; 2485 ret = 0;
2277 break; 2486 break;
2487 case KVM_HC_MMU_OP:
2488 r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
2489 break;
2278 default: 2490 default:
2279 ret = -KVM_ENOSYS; 2491 ret = -KVM_ENOSYS;
2280 break; 2492 break;
2281 } 2493 }
2282 vcpu->arch.regs[VCPU_REGS_RAX] = ret; 2494 vcpu->arch.regs[VCPU_REGS_RAX] = ret;
2283 kvm_x86_ops->decache_regs(vcpu); 2495 kvm_x86_ops->decache_regs(vcpu);
2284 return 0; 2496 ++vcpu->stat.hypercalls;
2497 return r;
2285} 2498}
2286EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); 2499EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
2287 2500
@@ -2329,7 +2542,7 @@ void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
2329void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, 2542void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
2330 unsigned long *rflags) 2543 unsigned long *rflags)
2331{ 2544{
2332 lmsw(vcpu, msw); 2545 kvm_lmsw(vcpu, msw);
2333 *rflags = kvm_x86_ops->get_rflags(vcpu); 2546 *rflags = kvm_x86_ops->get_rflags(vcpu);
2334} 2547}
2335 2548
@@ -2346,9 +2559,9 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
2346 case 4: 2559 case 4:
2347 return vcpu->arch.cr4; 2560 return vcpu->arch.cr4;
2348 case 8: 2561 case 8:
2349 return get_cr8(vcpu); 2562 return kvm_get_cr8(vcpu);
2350 default: 2563 default:
2351 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr); 2564 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
2352 return 0; 2565 return 0;
2353 } 2566 }
2354} 2567}
@@ -2358,23 +2571,23 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
2358{ 2571{
2359 switch (cr) { 2572 switch (cr) {
2360 case 0: 2573 case 0:
2361 set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); 2574 kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
2362 *rflags = kvm_x86_ops->get_rflags(vcpu); 2575 *rflags = kvm_x86_ops->get_rflags(vcpu);
2363 break; 2576 break;
2364 case 2: 2577 case 2:
2365 vcpu->arch.cr2 = val; 2578 vcpu->arch.cr2 = val;
2366 break; 2579 break;
2367 case 3: 2580 case 3:
2368 set_cr3(vcpu, val); 2581 kvm_set_cr3(vcpu, val);
2369 break; 2582 break;
2370 case 4: 2583 case 4:
2371 set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val)); 2584 kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
2372 break; 2585 break;
2373 case 8: 2586 case 8:
2374 set_cr8(vcpu, val & 0xfUL); 2587 kvm_set_cr8(vcpu, val & 0xfUL);
2375 break; 2588 break;
2376 default: 2589 default:
2377 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr); 2590 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
2378 } 2591 }
2379} 2592}
2380 2593
@@ -2447,6 +2660,11 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
2447 } 2660 }
2448 kvm_x86_ops->decache_regs(vcpu); 2661 kvm_x86_ops->decache_regs(vcpu);
2449 kvm_x86_ops->skip_emulated_instruction(vcpu); 2662 kvm_x86_ops->skip_emulated_instruction(vcpu);
2663 KVMTRACE_5D(CPUID, vcpu, function,
2664 (u32)vcpu->arch.regs[VCPU_REGS_RAX],
2665 (u32)vcpu->arch.regs[VCPU_REGS_RBX],
2666 (u32)vcpu->arch.regs[VCPU_REGS_RCX],
2667 (u32)vcpu->arch.regs[VCPU_REGS_RDX], handler);
2450} 2668}
2451EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); 2669EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
2452 2670
@@ -2469,7 +2687,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
2469 struct kvm_run *kvm_run) 2687 struct kvm_run *kvm_run)
2470{ 2688{
2471 kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0; 2689 kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
2472 kvm_run->cr8 = get_cr8(vcpu); 2690 kvm_run->cr8 = kvm_get_cr8(vcpu);
2473 kvm_run->apic_base = kvm_get_apic_base(vcpu); 2691 kvm_run->apic_base = kvm_get_apic_base(vcpu);
2474 if (irqchip_in_kernel(vcpu->kvm)) 2692 if (irqchip_in_kernel(vcpu->kvm))
2475 kvm_run->ready_for_interrupt_injection = 1; 2693 kvm_run->ready_for_interrupt_injection = 1;
@@ -2509,16 +2727,17 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2509{ 2727{
2510 int r; 2728 int r;
2511 2729
2512 if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) { 2730 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
2513 pr_debug("vcpu %d received sipi with vector # %x\n", 2731 pr_debug("vcpu %d received sipi with vector # %x\n",
2514 vcpu->vcpu_id, vcpu->arch.sipi_vector); 2732 vcpu->vcpu_id, vcpu->arch.sipi_vector);
2515 kvm_lapic_reset(vcpu); 2733 kvm_lapic_reset(vcpu);
2516 r = kvm_x86_ops->vcpu_reset(vcpu); 2734 r = kvm_x86_ops->vcpu_reset(vcpu);
2517 if (r) 2735 if (r)
2518 return r; 2736 return r;
2519 vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; 2737 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2520 } 2738 }
2521 2739
2740 down_read(&vcpu->kvm->slots_lock);
2522 vapic_enter(vcpu); 2741 vapic_enter(vcpu);
2523 2742
2524preempted: 2743preempted:
@@ -2526,6 +2745,10 @@ preempted:
2526 kvm_x86_ops->guest_debug_pre(vcpu); 2745 kvm_x86_ops->guest_debug_pre(vcpu);
2527 2746
2528again: 2747again:
2748 if (vcpu->requests)
2749 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
2750 kvm_mmu_unload(vcpu);
2751
2529 r = kvm_mmu_reload(vcpu); 2752 r = kvm_mmu_reload(vcpu);
2530 if (unlikely(r)) 2753 if (unlikely(r))
2531 goto out; 2754 goto out;
@@ -2539,6 +2762,11 @@ again:
2539 r = 0; 2762 r = 0;
2540 goto out; 2763 goto out;
2541 } 2764 }
2765 if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
2766 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
2767 r = 0;
2768 goto out;
2769 }
2542 } 2770 }
2543 2771
2544 kvm_inject_pending_timer_irqs(vcpu); 2772 kvm_inject_pending_timer_irqs(vcpu);
@@ -2557,6 +2785,14 @@ again:
2557 goto out; 2785 goto out;
2558 } 2786 }
2559 2787
2788 if (vcpu->requests)
2789 if (test_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) {
2790 local_irq_enable();
2791 preempt_enable();
2792 r = 1;
2793 goto out;
2794 }
2795
2560 if (signal_pending(current)) { 2796 if (signal_pending(current)) {
2561 local_irq_enable(); 2797 local_irq_enable();
2562 preempt_enable(); 2798 preempt_enable();
@@ -2566,6 +2802,13 @@ again:
2566 goto out; 2802 goto out;
2567 } 2803 }
2568 2804
2805 vcpu->guest_mode = 1;
2806 /*
2807 * Make sure that guest_mode assignment won't happen after
2808 * testing the pending IRQ vector bitmap.
2809 */
2810 smp_wmb();
2811
2569 if (vcpu->arch.exception.pending) 2812 if (vcpu->arch.exception.pending)
2570 __queue_exception(vcpu); 2813 __queue_exception(vcpu);
2571 else if (irqchip_in_kernel(vcpu->kvm)) 2814 else if (irqchip_in_kernel(vcpu->kvm))
@@ -2575,13 +2818,15 @@ again:
2575 2818
2576 kvm_lapic_sync_to_vapic(vcpu); 2819 kvm_lapic_sync_to_vapic(vcpu);
2577 2820
2578 vcpu->guest_mode = 1; 2821 up_read(&vcpu->kvm->slots_lock);
2822
2579 kvm_guest_enter(); 2823 kvm_guest_enter();
2580 2824
2581 if (vcpu->requests) 2825 if (vcpu->requests)
2582 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 2826 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
2583 kvm_x86_ops->tlb_flush(vcpu); 2827 kvm_x86_ops->tlb_flush(vcpu);
2584 2828
2829 KVMTRACE_0D(VMENTRY, vcpu, entryexit);
2585 kvm_x86_ops->run(vcpu, kvm_run); 2830 kvm_x86_ops->run(vcpu, kvm_run);
2586 2831
2587 vcpu->guest_mode = 0; 2832 vcpu->guest_mode = 0;
@@ -2601,6 +2846,8 @@ again:
2601 2846
2602 preempt_enable(); 2847 preempt_enable();
2603 2848
2849 down_read(&vcpu->kvm->slots_lock);
2850
2604 /* 2851 /*
2605 * Profile KVM exit RIPs: 2852 * Profile KVM exit RIPs:
2606 */ 2853 */
@@ -2628,14 +2875,18 @@ again:
2628 } 2875 }
2629 2876
2630out: 2877out:
2878 up_read(&vcpu->kvm->slots_lock);
2631 if (r > 0) { 2879 if (r > 0) {
2632 kvm_resched(vcpu); 2880 kvm_resched(vcpu);
2881 down_read(&vcpu->kvm->slots_lock);
2633 goto preempted; 2882 goto preempted;
2634 } 2883 }
2635 2884
2636 post_kvm_run_save(vcpu, kvm_run); 2885 post_kvm_run_save(vcpu, kvm_run);
2637 2886
2887 down_read(&vcpu->kvm->slots_lock);
2638 vapic_exit(vcpu); 2888 vapic_exit(vcpu);
2889 up_read(&vcpu->kvm->slots_lock);
2639 2890
2640 return r; 2891 return r;
2641} 2892}
@@ -2647,7 +2898,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2647 2898
2648 vcpu_load(vcpu); 2899 vcpu_load(vcpu);
2649 2900
2650 if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_UNINITIALIZED)) { 2901 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
2651 kvm_vcpu_block(vcpu); 2902 kvm_vcpu_block(vcpu);
2652 vcpu_put(vcpu); 2903 vcpu_put(vcpu);
2653 return -EAGAIN; 2904 return -EAGAIN;
@@ -2658,7 +2909,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2658 2909
2659 /* re-sync apic's tpr */ 2910 /* re-sync apic's tpr */
2660 if (!irqchip_in_kernel(vcpu->kvm)) 2911 if (!irqchip_in_kernel(vcpu->kvm))
2661 set_cr8(vcpu, kvm_run->cr8); 2912 kvm_set_cr8(vcpu, kvm_run->cr8);
2662 2913
2663 if (vcpu->arch.pio.cur_count) { 2914 if (vcpu->arch.pio.cur_count) {
2664 r = complete_pio(vcpu); 2915 r = complete_pio(vcpu);
@@ -2670,9 +2921,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2670 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); 2921 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
2671 vcpu->mmio_read_completed = 1; 2922 vcpu->mmio_read_completed = 1;
2672 vcpu->mmio_needed = 0; 2923 vcpu->mmio_needed = 0;
2924
2925 down_read(&vcpu->kvm->slots_lock);
2673 r = emulate_instruction(vcpu, kvm_run, 2926 r = emulate_instruction(vcpu, kvm_run,
2674 vcpu->arch.mmio_fault_cr2, 0, 2927 vcpu->arch.mmio_fault_cr2, 0,
2675 EMULTYPE_NO_DECODE); 2928 EMULTYPE_NO_DECODE);
2929 up_read(&vcpu->kvm->slots_lock);
2676 if (r == EMULATE_DO_MMIO) { 2930 if (r == EMULATE_DO_MMIO) {
2677 /* 2931 /*
2678 * Read-modify-write. Back to userspace. 2932 * Read-modify-write. Back to userspace.
@@ -2773,7 +3027,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2773static void get_segment(struct kvm_vcpu *vcpu, 3027static void get_segment(struct kvm_vcpu *vcpu,
2774 struct kvm_segment *var, int seg) 3028 struct kvm_segment *var, int seg)
2775{ 3029{
2776 return kvm_x86_ops->get_segment(vcpu, var, seg); 3030 kvm_x86_ops->get_segment(vcpu, var, seg);
2777} 3031}
2778 3032
2779void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 3033void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
@@ -2816,7 +3070,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2816 sregs->cr2 = vcpu->arch.cr2; 3070 sregs->cr2 = vcpu->arch.cr2;
2817 sregs->cr3 = vcpu->arch.cr3; 3071 sregs->cr3 = vcpu->arch.cr3;
2818 sregs->cr4 = vcpu->arch.cr4; 3072 sregs->cr4 = vcpu->arch.cr4;
2819 sregs->cr8 = get_cr8(vcpu); 3073 sregs->cr8 = kvm_get_cr8(vcpu);
2820 sregs->efer = vcpu->arch.shadow_efer; 3074 sregs->efer = vcpu->arch.shadow_efer;
2821 sregs->apic_base = kvm_get_apic_base(vcpu); 3075 sregs->apic_base = kvm_get_apic_base(vcpu);
2822 3076
@@ -2836,12 +3090,438 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2836 return 0; 3090 return 0;
2837} 3091}
2838 3092
3093int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3094 struct kvm_mp_state *mp_state)
3095{
3096 vcpu_load(vcpu);
3097 mp_state->mp_state = vcpu->arch.mp_state;
3098 vcpu_put(vcpu);
3099 return 0;
3100}
3101
3102int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3103 struct kvm_mp_state *mp_state)
3104{
3105 vcpu_load(vcpu);
3106 vcpu->arch.mp_state = mp_state->mp_state;
3107 vcpu_put(vcpu);
3108 return 0;
3109}
3110
2839static void set_segment(struct kvm_vcpu *vcpu, 3111static void set_segment(struct kvm_vcpu *vcpu,
2840 struct kvm_segment *var, int seg) 3112 struct kvm_segment *var, int seg)
2841{ 3113{
2842 return kvm_x86_ops->set_segment(vcpu, var, seg); 3114 kvm_x86_ops->set_segment(vcpu, var, seg);
3115}
3116
3117static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
3118 struct kvm_segment *kvm_desct)
3119{
3120 kvm_desct->base = seg_desc->base0;
3121 kvm_desct->base |= seg_desc->base1 << 16;
3122 kvm_desct->base |= seg_desc->base2 << 24;
3123 kvm_desct->limit = seg_desc->limit0;
3124 kvm_desct->limit |= seg_desc->limit << 16;
3125 kvm_desct->selector = selector;
3126 kvm_desct->type = seg_desc->type;
3127 kvm_desct->present = seg_desc->p;
3128 kvm_desct->dpl = seg_desc->dpl;
3129 kvm_desct->db = seg_desc->d;
3130 kvm_desct->s = seg_desc->s;
3131 kvm_desct->l = seg_desc->l;
3132 kvm_desct->g = seg_desc->g;
3133 kvm_desct->avl = seg_desc->avl;
3134 if (!selector)
3135 kvm_desct->unusable = 1;
3136 else
3137 kvm_desct->unusable = 0;
3138 kvm_desct->padding = 0;
3139}
3140
3141static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
3142 u16 selector,
3143 struct descriptor_table *dtable)
3144{
3145 if (selector & 1 << 2) {
3146 struct kvm_segment kvm_seg;
3147
3148 get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR);
3149
3150 if (kvm_seg.unusable)
3151 dtable->limit = 0;
3152 else
3153 dtable->limit = kvm_seg.limit;
3154 dtable->base = kvm_seg.base;
3155 }
3156 else
3157 kvm_x86_ops->get_gdt(vcpu, dtable);
3158}
3159
3160/* allowed just for 8 bytes segments */
3161static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3162 struct desc_struct *seg_desc)
3163{
3164 struct descriptor_table dtable;
3165 u16 index = selector >> 3;
3166
3167 get_segment_descritptor_dtable(vcpu, selector, &dtable);
3168
3169 if (dtable.limit < index * 8 + 7) {
3170 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
3171 return 1;
3172 }
3173 return kvm_read_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8);
3174}
3175
3176/* allowed just for 8 bytes segments */
3177static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3178 struct desc_struct *seg_desc)
3179{
3180 struct descriptor_table dtable;
3181 u16 index = selector >> 3;
3182
3183 get_segment_descritptor_dtable(vcpu, selector, &dtable);
3184
3185 if (dtable.limit < index * 8 + 7)
3186 return 1;
3187 return kvm_write_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8);
3188}
3189
3190static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
3191 struct desc_struct *seg_desc)
3192{
3193 u32 base_addr;
3194
3195 base_addr = seg_desc->base0;
3196 base_addr |= (seg_desc->base1 << 16);
3197 base_addr |= (seg_desc->base2 << 24);
3198
3199 return base_addr;
3200}
3201
3202static int load_tss_segment32(struct kvm_vcpu *vcpu,
3203 struct desc_struct *seg_desc,
3204 struct tss_segment_32 *tss)
3205{
3206 u32 base_addr;
3207
3208 base_addr = get_tss_base_addr(vcpu, seg_desc);
3209
3210 return kvm_read_guest(vcpu->kvm, base_addr, tss,
3211 sizeof(struct tss_segment_32));
3212}
3213
3214static int save_tss_segment32(struct kvm_vcpu *vcpu,
3215 struct desc_struct *seg_desc,
3216 struct tss_segment_32 *tss)
3217{
3218 u32 base_addr;
3219
3220 base_addr = get_tss_base_addr(vcpu, seg_desc);
3221
3222 return kvm_write_guest(vcpu->kvm, base_addr, tss,
3223 sizeof(struct tss_segment_32));
3224}
3225
3226static int load_tss_segment16(struct kvm_vcpu *vcpu,
3227 struct desc_struct *seg_desc,
3228 struct tss_segment_16 *tss)
3229{
3230 u32 base_addr;
3231
3232 base_addr = get_tss_base_addr(vcpu, seg_desc);
3233
3234 return kvm_read_guest(vcpu->kvm, base_addr, tss,
3235 sizeof(struct tss_segment_16));
3236}
3237
3238static int save_tss_segment16(struct kvm_vcpu *vcpu,
3239 struct desc_struct *seg_desc,
3240 struct tss_segment_16 *tss)
3241{
3242 u32 base_addr;
3243
3244 base_addr = get_tss_base_addr(vcpu, seg_desc);
3245
3246 return kvm_write_guest(vcpu->kvm, base_addr, tss,
3247 sizeof(struct tss_segment_16));
3248}
3249
3250static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
3251{
3252 struct kvm_segment kvm_seg;
3253
3254 get_segment(vcpu, &kvm_seg, seg);
3255 return kvm_seg.selector;
3256}
3257
3258static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
3259 u16 selector,
3260 struct kvm_segment *kvm_seg)
3261{
3262 struct desc_struct seg_desc;
3263
3264 if (load_guest_segment_descriptor(vcpu, selector, &seg_desc))
3265 return 1;
3266 seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg);
3267 return 0;
3268}
3269
3270static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3271 int type_bits, int seg)
3272{
3273 struct kvm_segment kvm_seg;
3274
3275 if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
3276 return 1;
3277 kvm_seg.type |= type_bits;
3278
3279 if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS &&
3280 seg != VCPU_SREG_LDTR)
3281 if (!kvm_seg.s)
3282 kvm_seg.unusable = 1;
3283
3284 set_segment(vcpu, &kvm_seg, seg);
3285 return 0;
3286}
3287
3288static void save_state_to_tss32(struct kvm_vcpu *vcpu,
3289 struct tss_segment_32 *tss)
3290{
3291 tss->cr3 = vcpu->arch.cr3;
3292 tss->eip = vcpu->arch.rip;
3293 tss->eflags = kvm_x86_ops->get_rflags(vcpu);
3294 tss->eax = vcpu->arch.regs[VCPU_REGS_RAX];
3295 tss->ecx = vcpu->arch.regs[VCPU_REGS_RCX];
3296 tss->edx = vcpu->arch.regs[VCPU_REGS_RDX];
3297 tss->ebx = vcpu->arch.regs[VCPU_REGS_RBX];
3298 tss->esp = vcpu->arch.regs[VCPU_REGS_RSP];
3299 tss->ebp = vcpu->arch.regs[VCPU_REGS_RBP];
3300 tss->esi = vcpu->arch.regs[VCPU_REGS_RSI];
3301 tss->edi = vcpu->arch.regs[VCPU_REGS_RDI];
3302
3303 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
3304 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
3305 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
3306 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
3307 tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
3308 tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
3309 tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
3310 tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
3311}
3312
3313static int load_state_from_tss32(struct kvm_vcpu *vcpu,
3314 struct tss_segment_32 *tss)
3315{
3316 kvm_set_cr3(vcpu, tss->cr3);
3317
3318 vcpu->arch.rip = tss->eip;
3319 kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2);
3320
3321 vcpu->arch.regs[VCPU_REGS_RAX] = tss->eax;
3322 vcpu->arch.regs[VCPU_REGS_RCX] = tss->ecx;
3323 vcpu->arch.regs[VCPU_REGS_RDX] = tss->edx;
3324 vcpu->arch.regs[VCPU_REGS_RBX] = tss->ebx;
3325 vcpu->arch.regs[VCPU_REGS_RSP] = tss->esp;
3326 vcpu->arch.regs[VCPU_REGS_RBP] = tss->ebp;
3327 vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi;
3328 vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi;
3329
3330 if (load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR))
3331 return 1;
3332
3333 if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
3334 return 1;
3335
3336 if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
3337 return 1;
3338
3339 if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
3340 return 1;
3341
3342 if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
3343 return 1;
3344
3345 if (load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS))
3346 return 1;
3347
3348 if (load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS))
3349 return 1;
3350 return 0;
3351}
3352
3353static void save_state_to_tss16(struct kvm_vcpu *vcpu,
3354 struct tss_segment_16 *tss)
3355{
3356 tss->ip = vcpu->arch.rip;
3357 tss->flag = kvm_x86_ops->get_rflags(vcpu);
3358 tss->ax = vcpu->arch.regs[VCPU_REGS_RAX];
3359 tss->cx = vcpu->arch.regs[VCPU_REGS_RCX];
3360 tss->dx = vcpu->arch.regs[VCPU_REGS_RDX];
3361 tss->bx = vcpu->arch.regs[VCPU_REGS_RBX];
3362 tss->sp = vcpu->arch.regs[VCPU_REGS_RSP];
3363 tss->bp = vcpu->arch.regs[VCPU_REGS_RBP];
3364 tss->si = vcpu->arch.regs[VCPU_REGS_RSI];
3365 tss->di = vcpu->arch.regs[VCPU_REGS_RDI];
3366
3367 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
3368 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
3369 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
3370 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
3371 tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
3372 tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
3373}
3374
3375static int load_state_from_tss16(struct kvm_vcpu *vcpu,
3376 struct tss_segment_16 *tss)
3377{
3378 vcpu->arch.rip = tss->ip;
3379 kvm_x86_ops->set_rflags(vcpu, tss->flag | 2);
3380 vcpu->arch.regs[VCPU_REGS_RAX] = tss->ax;
3381 vcpu->arch.regs[VCPU_REGS_RCX] = tss->cx;
3382 vcpu->arch.regs[VCPU_REGS_RDX] = tss->dx;
3383 vcpu->arch.regs[VCPU_REGS_RBX] = tss->bx;
3384 vcpu->arch.regs[VCPU_REGS_RSP] = tss->sp;
3385 vcpu->arch.regs[VCPU_REGS_RBP] = tss->bp;
3386 vcpu->arch.regs[VCPU_REGS_RSI] = tss->si;
3387 vcpu->arch.regs[VCPU_REGS_RDI] = tss->di;
3388
3389 if (load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR))
3390 return 1;
3391
3392 if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
3393 return 1;
3394
3395 if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
3396 return 1;
3397
3398 if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
3399 return 1;
3400
3401 if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
3402 return 1;
3403 return 0;
3404}
3405
3406int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
3407 struct desc_struct *cseg_desc,
3408 struct desc_struct *nseg_desc)
3409{
3410 struct tss_segment_16 tss_segment_16;
3411 int ret = 0;
3412
3413 if (load_tss_segment16(vcpu, cseg_desc, &tss_segment_16))
3414 goto out;
3415
3416 save_state_to_tss16(vcpu, &tss_segment_16);
3417 save_tss_segment16(vcpu, cseg_desc, &tss_segment_16);
3418
3419 if (load_tss_segment16(vcpu, nseg_desc, &tss_segment_16))
3420 goto out;
3421 if (load_state_from_tss16(vcpu, &tss_segment_16))
3422 goto out;
3423
3424 ret = 1;
3425out:
3426 return ret;
3427}
3428
3429int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
3430 struct desc_struct *cseg_desc,
3431 struct desc_struct *nseg_desc)
3432{
3433 struct tss_segment_32 tss_segment_32;
3434 int ret = 0;
3435
3436 if (load_tss_segment32(vcpu, cseg_desc, &tss_segment_32))
3437 goto out;
3438
3439 save_state_to_tss32(vcpu, &tss_segment_32);
3440 save_tss_segment32(vcpu, cseg_desc, &tss_segment_32);
3441
3442 if (load_tss_segment32(vcpu, nseg_desc, &tss_segment_32))
3443 goto out;
3444 if (load_state_from_tss32(vcpu, &tss_segment_32))
3445 goto out;
3446
3447 ret = 1;
3448out:
3449 return ret;
2843} 3450}
2844 3451
3452int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3453{
3454 struct kvm_segment tr_seg;
3455 struct desc_struct cseg_desc;
3456 struct desc_struct nseg_desc;
3457 int ret = 0;
3458
3459 get_segment(vcpu, &tr_seg, VCPU_SREG_TR);
3460
3461 if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
3462 goto out;
3463
3464 if (load_guest_segment_descriptor(vcpu, tr_seg.selector, &cseg_desc))
3465 goto out;
3466
3467
3468 if (reason != TASK_SWITCH_IRET) {
3469 int cpl;
3470
3471 cpl = kvm_x86_ops->get_cpl(vcpu);
3472 if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) {
3473 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
3474 return 1;
3475 }
3476 }
3477
3478 if (!nseg_desc.p || (nseg_desc.limit0 | nseg_desc.limit << 16) < 0x67) {
3479 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
3480 return 1;
3481 }
3482
3483 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
3484 cseg_desc.type &= ~(1 << 8); //clear the B flag
3485 save_guest_segment_descriptor(vcpu, tr_seg.selector,
3486 &cseg_desc);
3487 }
3488
3489 if (reason == TASK_SWITCH_IRET) {
3490 u32 eflags = kvm_x86_ops->get_rflags(vcpu);
3491 kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
3492 }
3493
3494 kvm_x86_ops->skip_emulated_instruction(vcpu);
3495 kvm_x86_ops->cache_regs(vcpu);
3496
3497 if (nseg_desc.type & 8)
3498 ret = kvm_task_switch_32(vcpu, tss_selector, &cseg_desc,
3499 &nseg_desc);
3500 else
3501 ret = kvm_task_switch_16(vcpu, tss_selector, &cseg_desc,
3502 &nseg_desc);
3503
3504 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
3505 u32 eflags = kvm_x86_ops->get_rflags(vcpu);
3506 kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT);
3507 }
3508
3509 if (reason != TASK_SWITCH_IRET) {
3510 nseg_desc.type |= (1 << 8);
3511 save_guest_segment_descriptor(vcpu, tss_selector,
3512 &nseg_desc);
3513 }
3514
3515 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS);
3516 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
3517 tr_seg.type = 11;
3518 set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
3519out:
3520 kvm_x86_ops->decache_regs(vcpu);
3521 return ret;
3522}
3523EXPORT_SYMBOL_GPL(kvm_task_switch);
3524
2845int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 3525int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2846 struct kvm_sregs *sregs) 3526 struct kvm_sregs *sregs)
2847{ 3527{
@@ -2862,12 +3542,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2862 mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; 3542 mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
2863 vcpu->arch.cr3 = sregs->cr3; 3543 vcpu->arch.cr3 = sregs->cr3;
2864 3544
2865 set_cr8(vcpu, sregs->cr8); 3545 kvm_set_cr8(vcpu, sregs->cr8);
2866 3546
2867 mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer; 3547 mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer;
2868#ifdef CONFIG_X86_64
2869 kvm_x86_ops->set_efer(vcpu, sregs->efer); 3548 kvm_x86_ops->set_efer(vcpu, sregs->efer);
2870#endif
2871 kvm_set_apic_base(vcpu, sregs->apic_base); 3549 kvm_set_apic_base(vcpu, sregs->apic_base);
2872 3550
2873 kvm_x86_ops->decache_cr4_guest_bits(vcpu); 3551 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
@@ -3141,9 +3819,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
3141 3819
3142 vcpu->arch.mmu.root_hpa = INVALID_PAGE; 3820 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
3143 if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0) 3821 if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0)
3144 vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; 3822 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3145 else 3823 else
3146 vcpu->arch.mp_state = VCPU_MP_STATE_UNINITIALIZED; 3824 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
3147 3825
3148 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 3826 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
3149 if (!page) { 3827 if (!page) {
@@ -3175,7 +3853,9 @@ fail:
3175void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 3853void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
3176{ 3854{
3177 kvm_free_lapic(vcpu); 3855 kvm_free_lapic(vcpu);
3856 down_read(&vcpu->kvm->slots_lock);
3178 kvm_mmu_destroy(vcpu); 3857 kvm_mmu_destroy(vcpu);
3858 up_read(&vcpu->kvm->slots_lock);
3179 free_page((unsigned long)vcpu->arch.pio_data); 3859 free_page((unsigned long)vcpu->arch.pio_data);
3180} 3860}
3181 3861
@@ -3219,10 +3899,13 @@ static void kvm_free_vcpus(struct kvm *kvm)
3219 3899
3220void kvm_arch_destroy_vm(struct kvm *kvm) 3900void kvm_arch_destroy_vm(struct kvm *kvm)
3221{ 3901{
3902 kvm_free_pit(kvm);
3222 kfree(kvm->arch.vpic); 3903 kfree(kvm->arch.vpic);
3223 kfree(kvm->arch.vioapic); 3904 kfree(kvm->arch.vioapic);
3224 kvm_free_vcpus(kvm); 3905 kvm_free_vcpus(kvm);
3225 kvm_free_physmem(kvm); 3906 kvm_free_physmem(kvm);
3907 if (kvm->arch.apic_access_page)
3908 put_page(kvm->arch.apic_access_page);
3226 kfree(kvm); 3909 kfree(kvm);
3227} 3910}
3228 3911
@@ -3278,8 +3961,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
3278 3961
3279int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 3962int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3280{ 3963{
3281 return vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE 3964 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
3282 || vcpu->arch.mp_state == VCPU_MP_STATE_SIPI_RECEIVED; 3965 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED;
3283} 3966}
3284 3967
3285static void vcpu_kick_intr(void *info) 3968static void vcpu_kick_intr(void *info)
@@ -3293,11 +3976,17 @@ static void vcpu_kick_intr(void *info)
3293void kvm_vcpu_kick(struct kvm_vcpu *vcpu) 3976void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
3294{ 3977{
3295 int ipi_pcpu = vcpu->cpu; 3978 int ipi_pcpu = vcpu->cpu;
3979 int cpu = get_cpu();
3296 3980
3297 if (waitqueue_active(&vcpu->wq)) { 3981 if (waitqueue_active(&vcpu->wq)) {
3298 wake_up_interruptible(&vcpu->wq); 3982 wake_up_interruptible(&vcpu->wq);
3299 ++vcpu->stat.halt_wakeup; 3983 ++vcpu->stat.halt_wakeup;
3300 } 3984 }
3301 if (vcpu->guest_mode) 3985 /*
3986 * We may be called synchronously with irqs disabled in guest mode,
3987 * So need not to call smp_call_function_single() in that case.
3988 */
3989 if (vcpu->guest_mode && vcpu->cpu != cpu)
3302 smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); 3990 smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
3991 put_cpu();
3303} 3992}
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 79586003397a..2ca08386f993 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -65,6 +65,14 @@
65#define MemAbs (1<<9) /* Memory operand is absolute displacement */ 65#define MemAbs (1<<9) /* Memory operand is absolute displacement */
66#define String (1<<10) /* String instruction (rep capable) */ 66#define String (1<<10) /* String instruction (rep capable) */
67#define Stack (1<<11) /* Stack instruction (push/pop) */ 67#define Stack (1<<11) /* Stack instruction (push/pop) */
68#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
69#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
70#define GroupMask 0xff /* Group number stored in bits 0:7 */
71
72enum {
73 Group1_80, Group1_81, Group1_82, Group1_83,
74 Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
75};
68 76
69static u16 opcode_table[256] = { 77static u16 opcode_table[256] = {
70 /* 0x00 - 0x07 */ 78 /* 0x00 - 0x07 */
@@ -123,14 +131,14 @@ static u16 opcode_table[256] = {
123 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, 131 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
124 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, 132 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
125 /* 0x80 - 0x87 */ 133 /* 0x80 - 0x87 */
126 ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, 134 Group | Group1_80, Group | Group1_81,
127 ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, 135 Group | Group1_82, Group | Group1_83,
128 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 136 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
129 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 137 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
130 /* 0x88 - 0x8F */ 138 /* 0x88 - 0x8F */
131 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, 139 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
132 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, 140 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
133 0, ModRM | DstReg, 0, DstMem | SrcNone | ModRM | Mov | Stack, 141 0, ModRM | DstReg, 0, Group | Group1A,
134 /* 0x90 - 0x9F */ 142 /* 0x90 - 0x9F */
135 0, 0, 0, 0, 0, 0, 0, 0, 143 0, 0, 0, 0, 0, 0, 0, 0,
136 0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, 144 0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
@@ -164,16 +172,15 @@ static u16 opcode_table[256] = {
164 0, 0, 0, 0, 172 0, 0, 0, 0,
165 /* 0xF0 - 0xF7 */ 173 /* 0xF0 - 0xF7 */
166 0, 0, 0, 0, 174 0, 0, 0, 0,
167 ImplicitOps, ImplicitOps, 175 ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3,
168 ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
169 /* 0xF8 - 0xFF */ 176 /* 0xF8 - 0xFF */
170 ImplicitOps, 0, ImplicitOps, ImplicitOps, 177 ImplicitOps, 0, ImplicitOps, ImplicitOps,
171 0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM 178 0, 0, Group | Group4, Group | Group5,
172}; 179};
173 180
174static u16 twobyte_table[256] = { 181static u16 twobyte_table[256] = {
175 /* 0x00 - 0x0F */ 182 /* 0x00 - 0x0F */
176 0, SrcMem | ModRM | DstReg, 0, 0, 0, 0, ImplicitOps, 0, 183 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0,
177 ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 184 ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
178 /* 0x10 - 0x1F */ 185 /* 0x10 - 0x1F */
179 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, 186 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
@@ -229,6 +236,56 @@ static u16 twobyte_table[256] = {
229 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 236 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
230}; 237};
231 238
239static u16 group_table[] = {
240 [Group1_80*8] =
241 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
242 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
243 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
244 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
245 [Group1_81*8] =
246 DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
247 DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
248 DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
249 DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
250 [Group1_82*8] =
251 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
252 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
253 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
254 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
255 [Group1_83*8] =
256 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
257 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
258 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
259 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
260 [Group1A*8] =
261 DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
262 [Group3_Byte*8] =
263 ByteOp | SrcImm | DstMem | ModRM, 0,
264 ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
265 0, 0, 0, 0,
266 [Group3*8] =
267 DstMem | SrcImm | ModRM | SrcImm, 0,
268 DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
269 0, 0, 0, 0,
270 [Group4*8] =
271 ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
272 0, 0, 0, 0, 0, 0,
273 [Group5*8] =
274 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, 0, 0,
275 SrcMem | ModRM, 0, SrcMem | ModRM | Stack, 0,
276 [Group7*8] =
277 0, 0, ModRM | SrcMem, ModRM | SrcMem,
278 SrcNone | ModRM | DstMem | Mov, 0,
279 SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
280};
281
282static u16 group2_table[] = {
283 [Group7*8] =
284 SrcNone | ModRM, 0, 0, 0,
285 SrcNone | ModRM | DstMem | Mov, 0,
286 SrcMem16 | ModRM | Mov, 0,
287};
288
232/* EFLAGS bit definitions. */ 289/* EFLAGS bit definitions. */
233#define EFLG_OF (1<<11) 290#define EFLG_OF (1<<11)
234#define EFLG_DF (1<<10) 291#define EFLG_DF (1<<10)
@@ -317,7 +374,7 @@ static u16 twobyte_table[256] = {
317 374
318#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ 375#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
319 do { \ 376 do { \
320 unsigned long _tmp; \ 377 unsigned long __tmp; \
321 switch ((_dst).bytes) { \ 378 switch ((_dst).bytes) { \
322 case 1: \ 379 case 1: \
323 __asm__ __volatile__ ( \ 380 __asm__ __volatile__ ( \
@@ -325,7 +382,7 @@ static u16 twobyte_table[256] = {
325 _op"b %"_bx"3,%1; " \ 382 _op"b %"_bx"3,%1; " \
326 _POST_EFLAGS("0", "4", "2") \ 383 _POST_EFLAGS("0", "4", "2") \
327 : "=m" (_eflags), "=m" ((_dst).val), \ 384 : "=m" (_eflags), "=m" ((_dst).val), \
328 "=&r" (_tmp) \ 385 "=&r" (__tmp) \
329 : _by ((_src).val), "i" (EFLAGS_MASK)); \ 386 : _by ((_src).val), "i" (EFLAGS_MASK)); \
330 break; \ 387 break; \
331 default: \ 388 default: \
@@ -426,29 +483,40 @@ static u16 twobyte_table[256] = {
426 (_type)_x; \ 483 (_type)_x; \
427}) 484})
428 485
486static inline unsigned long ad_mask(struct decode_cache *c)
487{
488 return (1UL << (c->ad_bytes << 3)) - 1;
489}
490
429/* Access/update address held in a register, based on addressing mode. */ 491/* Access/update address held in a register, based on addressing mode. */
430#define address_mask(reg) \ 492static inline unsigned long
431 ((c->ad_bytes == sizeof(unsigned long)) ? \ 493address_mask(struct decode_cache *c, unsigned long reg)
432 (reg) : ((reg) & ((1UL << (c->ad_bytes << 3)) - 1))) 494{
433#define register_address(base, reg) \ 495 if (c->ad_bytes == sizeof(unsigned long))
434 ((base) + address_mask(reg)) 496 return reg;
435#define register_address_increment(reg, inc) \ 497 else
436 do { \ 498 return reg & ad_mask(c);
437 /* signed type ensures sign extension to long */ \ 499}
438 int _inc = (inc); \
439 if (c->ad_bytes == sizeof(unsigned long)) \
440 (reg) += _inc; \
441 else \
442 (reg) = ((reg) & \
443 ~((1UL << (c->ad_bytes << 3)) - 1)) | \
444 (((reg) + _inc) & \
445 ((1UL << (c->ad_bytes << 3)) - 1)); \
446 } while (0)
447 500
448#define JMP_REL(rel) \ 501static inline unsigned long
449 do { \ 502register_address(struct decode_cache *c, unsigned long base, unsigned long reg)
450 register_address_increment(c->eip, rel); \ 503{
451 } while (0) 504 return base + address_mask(c, reg);
505}
506
507static inline void
508register_address_increment(struct decode_cache *c, unsigned long *reg, int inc)
509{
510 if (c->ad_bytes == sizeof(unsigned long))
511 *reg += inc;
512 else
513 *reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c));
514}
515
516static inline void jmp_rel(struct decode_cache *c, int rel)
517{
518 register_address_increment(c, &c->eip, rel);
519}
452 520
453static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, 521static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
454 struct x86_emulate_ops *ops, 522 struct x86_emulate_ops *ops,
@@ -763,7 +831,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
763 struct decode_cache *c = &ctxt->decode; 831 struct decode_cache *c = &ctxt->decode;
764 int rc = 0; 832 int rc = 0;
765 int mode = ctxt->mode; 833 int mode = ctxt->mode;
766 int def_op_bytes, def_ad_bytes; 834 int def_op_bytes, def_ad_bytes, group;
767 835
768 /* Shadow copy of register state. Committed on successful emulation. */ 836 /* Shadow copy of register state. Committed on successful emulation. */
769 837
@@ -864,12 +932,24 @@ done_prefixes:
864 c->b = insn_fetch(u8, 1, c->eip); 932 c->b = insn_fetch(u8, 1, c->eip);
865 c->d = twobyte_table[c->b]; 933 c->d = twobyte_table[c->b];
866 } 934 }
935 }
867 936
868 /* Unrecognised? */ 937 if (c->d & Group) {
869 if (c->d == 0) { 938 group = c->d & GroupMask;
870 DPRINTF("Cannot emulate %02x\n", c->b); 939 c->modrm = insn_fetch(u8, 1, c->eip);
871 return -1; 940 --c->eip;
872 } 941
942 group = (group << 3) + ((c->modrm >> 3) & 7);
943 if ((c->d & GroupDual) && (c->modrm >> 6) == 3)
944 c->d = group2_table[group];
945 else
946 c->d = group_table[group];
947 }
948
949 /* Unrecognised? */
950 if (c->d == 0) {
951 DPRINTF("Cannot emulate %02x\n", c->b);
952 return -1;
873 } 953 }
874 954
875 if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack)) 955 if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
@@ -924,6 +1004,7 @@ done_prefixes:
924 */ 1004 */
925 if ((c->d & ModRM) && c->modrm_mod == 3) { 1005 if ((c->d & ModRM) && c->modrm_mod == 3) {
926 c->src.type = OP_REG; 1006 c->src.type = OP_REG;
1007 c->src.val = c->modrm_val;
927 break; 1008 break;
928 } 1009 }
929 c->src.type = OP_MEM; 1010 c->src.type = OP_MEM;
@@ -967,6 +1048,7 @@ done_prefixes:
967 case DstMem: 1048 case DstMem:
968 if ((c->d & ModRM) && c->modrm_mod == 3) { 1049 if ((c->d & ModRM) && c->modrm_mod == 3) {
969 c->dst.type = OP_REG; 1050 c->dst.type = OP_REG;
1051 c->dst.val = c->dst.orig_val = c->modrm_val;
970 break; 1052 break;
971 } 1053 }
972 c->dst.type = OP_MEM; 1054 c->dst.type = OP_MEM;
@@ -984,8 +1066,8 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
984 c->dst.type = OP_MEM; 1066 c->dst.type = OP_MEM;
985 c->dst.bytes = c->op_bytes; 1067 c->dst.bytes = c->op_bytes;
986 c->dst.val = c->src.val; 1068 c->dst.val = c->src.val;
987 register_address_increment(c->regs[VCPU_REGS_RSP], -c->op_bytes); 1069 register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
988 c->dst.ptr = (void *) register_address(ctxt->ss_base, 1070 c->dst.ptr = (void *) register_address(c, ctxt->ss_base,
989 c->regs[VCPU_REGS_RSP]); 1071 c->regs[VCPU_REGS_RSP]);
990} 1072}
991 1073
@@ -995,13 +1077,13 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
995 struct decode_cache *c = &ctxt->decode; 1077 struct decode_cache *c = &ctxt->decode;
996 int rc; 1078 int rc;
997 1079
998 rc = ops->read_std(register_address(ctxt->ss_base, 1080 rc = ops->read_std(register_address(c, ctxt->ss_base,
999 c->regs[VCPU_REGS_RSP]), 1081 c->regs[VCPU_REGS_RSP]),
1000 &c->dst.val, c->dst.bytes, ctxt->vcpu); 1082 &c->dst.val, c->dst.bytes, ctxt->vcpu);
1001 if (rc != 0) 1083 if (rc != 0)
1002 return rc; 1084 return rc;
1003 1085
1004 register_address_increment(c->regs[VCPU_REGS_RSP], c->dst.bytes); 1086 register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes);
1005 1087
1006 return 0; 1088 return 0;
1007} 1089}
@@ -1043,26 +1125,6 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1043 1125
1044 switch (c->modrm_reg) { 1126 switch (c->modrm_reg) {
1045 case 0 ... 1: /* test */ 1127 case 0 ... 1: /* test */
1046 /*
1047 * Special case in Grp3: test has an immediate
1048 * source operand.
1049 */
1050 c->src.type = OP_IMM;
1051 c->src.ptr = (unsigned long *)c->eip;
1052 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1053 if (c->src.bytes == 8)
1054 c->src.bytes = 4;
1055 switch (c->src.bytes) {
1056 case 1:
1057 c->src.val = insn_fetch(s8, 1, c->eip);
1058 break;
1059 case 2:
1060 c->src.val = insn_fetch(s16, 2, c->eip);
1061 break;
1062 case 4:
1063 c->src.val = insn_fetch(s32, 4, c->eip);
1064 break;
1065 }
1066 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); 1128 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
1067 break; 1129 break;
1068 case 2: /* not */ 1130 case 2: /* not */
@@ -1076,7 +1138,6 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1076 rc = X86EMUL_UNHANDLEABLE; 1138 rc = X86EMUL_UNHANDLEABLE;
1077 break; 1139 break;
1078 } 1140 }
1079done:
1080 return rc; 1141 return rc;
1081} 1142}
1082 1143
@@ -1084,7 +1145,6 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
1084 struct x86_emulate_ops *ops) 1145 struct x86_emulate_ops *ops)
1085{ 1146{
1086 struct decode_cache *c = &ctxt->decode; 1147 struct decode_cache *c = &ctxt->decode;
1087 int rc;
1088 1148
1089 switch (c->modrm_reg) { 1149 switch (c->modrm_reg) {
1090 case 0: /* inc */ 1150 case 0: /* inc */
@@ -1094,36 +1154,11 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
1094 emulate_1op("dec", c->dst, ctxt->eflags); 1154 emulate_1op("dec", c->dst, ctxt->eflags);
1095 break; 1155 break;
1096 case 4: /* jmp abs */ 1156 case 4: /* jmp abs */
1097 if (c->b == 0xff) 1157 c->eip = c->src.val;
1098 c->eip = c->dst.val;
1099 else {
1100 DPRINTF("Cannot emulate %02x\n", c->b);
1101 return X86EMUL_UNHANDLEABLE;
1102 }
1103 break; 1158 break;
1104 case 6: /* push */ 1159 case 6: /* push */
1105 1160 emulate_push(ctxt);
1106 /* 64-bit mode: PUSH always pushes a 64-bit operand. */
1107
1108 if (ctxt->mode == X86EMUL_MODE_PROT64) {
1109 c->dst.bytes = 8;
1110 rc = ops->read_std((unsigned long)c->dst.ptr,
1111 &c->dst.val, 8, ctxt->vcpu);
1112 if (rc != 0)
1113 return rc;
1114 }
1115 register_address_increment(c->regs[VCPU_REGS_RSP],
1116 -c->dst.bytes);
1117 rc = ops->write_emulated(register_address(ctxt->ss_base,
1118 c->regs[VCPU_REGS_RSP]), &c->dst.val,
1119 c->dst.bytes, ctxt->vcpu);
1120 if (rc != 0)
1121 return rc;
1122 c->dst.type = OP_NONE;
1123 break; 1161 break;
1124 default:
1125 DPRINTF("Cannot emulate %02x\n", c->b);
1126 return X86EMUL_UNHANDLEABLE;
1127 } 1162 }
1128 return 0; 1163 return 0;
1129} 1164}
@@ -1361,19 +1396,19 @@ special_insn:
1361 c->dst.type = OP_MEM; 1396 c->dst.type = OP_MEM;
1362 c->dst.bytes = c->op_bytes; 1397 c->dst.bytes = c->op_bytes;
1363 c->dst.val = c->src.val; 1398 c->dst.val = c->src.val;
1364 register_address_increment(c->regs[VCPU_REGS_RSP], 1399 register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1365 -c->op_bytes); 1400 -c->op_bytes);
1366 c->dst.ptr = (void *) register_address( 1401 c->dst.ptr = (void *) register_address(
1367 ctxt->ss_base, c->regs[VCPU_REGS_RSP]); 1402 c, ctxt->ss_base, c->regs[VCPU_REGS_RSP]);
1368 break; 1403 break;
1369 case 0x58 ... 0x5f: /* pop reg */ 1404 case 0x58 ... 0x5f: /* pop reg */
1370 pop_instruction: 1405 pop_instruction:
1371 if ((rc = ops->read_std(register_address(ctxt->ss_base, 1406 if ((rc = ops->read_std(register_address(c, ctxt->ss_base,
1372 c->regs[VCPU_REGS_RSP]), c->dst.ptr, 1407 c->regs[VCPU_REGS_RSP]), c->dst.ptr,
1373 c->op_bytes, ctxt->vcpu)) != 0) 1408 c->op_bytes, ctxt->vcpu)) != 0)
1374 goto done; 1409 goto done;
1375 1410
1376 register_address_increment(c->regs[VCPU_REGS_RSP], 1411 register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1377 c->op_bytes); 1412 c->op_bytes);
1378 c->dst.type = OP_NONE; /* Disable writeback. */ 1413 c->dst.type = OP_NONE; /* Disable writeback. */
1379 break; 1414 break;
@@ -1393,9 +1428,9 @@ special_insn:
1393 1, 1428 1,
1394 (c->d & ByteOp) ? 1 : c->op_bytes, 1429 (c->d & ByteOp) ? 1 : c->op_bytes,
1395 c->rep_prefix ? 1430 c->rep_prefix ?
1396 address_mask(c->regs[VCPU_REGS_RCX]) : 1, 1431 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
1397 (ctxt->eflags & EFLG_DF), 1432 (ctxt->eflags & EFLG_DF),
1398 register_address(ctxt->es_base, 1433 register_address(c, ctxt->es_base,
1399 c->regs[VCPU_REGS_RDI]), 1434 c->regs[VCPU_REGS_RDI]),
1400 c->rep_prefix, 1435 c->rep_prefix,
1401 c->regs[VCPU_REGS_RDX]) == 0) { 1436 c->regs[VCPU_REGS_RDX]) == 0) {
@@ -1409,9 +1444,9 @@ special_insn:
1409 0, 1444 0,
1410 (c->d & ByteOp) ? 1 : c->op_bytes, 1445 (c->d & ByteOp) ? 1 : c->op_bytes,
1411 c->rep_prefix ? 1446 c->rep_prefix ?
1412 address_mask(c->regs[VCPU_REGS_RCX]) : 1, 1447 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
1413 (ctxt->eflags & EFLG_DF), 1448 (ctxt->eflags & EFLG_DF),
1414 register_address(c->override_base ? 1449 register_address(c, c->override_base ?
1415 *c->override_base : 1450 *c->override_base :
1416 ctxt->ds_base, 1451 ctxt->ds_base,
1417 c->regs[VCPU_REGS_RSI]), 1452 c->regs[VCPU_REGS_RSI]),
@@ -1425,7 +1460,7 @@ special_insn:
1425 int rel = insn_fetch(s8, 1, c->eip); 1460 int rel = insn_fetch(s8, 1, c->eip);
1426 1461
1427 if (test_cc(c->b, ctxt->eflags)) 1462 if (test_cc(c->b, ctxt->eflags))
1428 JMP_REL(rel); 1463 jmp_rel(c, rel);
1429 break; 1464 break;
1430 } 1465 }
1431 case 0x80 ... 0x83: /* Grp1 */ 1466 case 0x80 ... 0x83: /* Grp1 */
@@ -1477,7 +1512,7 @@ special_insn:
1477 case 0x88 ... 0x8b: /* mov */ 1512 case 0x88 ... 0x8b: /* mov */
1478 goto mov; 1513 goto mov;
1479 case 0x8d: /* lea r16/r32, m */ 1514 case 0x8d: /* lea r16/r32, m */
1480 c->dst.val = c->modrm_val; 1515 c->dst.val = c->modrm_ea;
1481 break; 1516 break;
1482 case 0x8f: /* pop (sole member of Grp1a) */ 1517 case 0x8f: /* pop (sole member of Grp1a) */
1483 rc = emulate_grp1a(ctxt, ops); 1518 rc = emulate_grp1a(ctxt, ops);
@@ -1501,27 +1536,27 @@ special_insn:
1501 case 0xa4 ... 0xa5: /* movs */ 1536 case 0xa4 ... 0xa5: /* movs */
1502 c->dst.type = OP_MEM; 1537 c->dst.type = OP_MEM;
1503 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1538 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1504 c->dst.ptr = (unsigned long *)register_address( 1539 c->dst.ptr = (unsigned long *)register_address(c,
1505 ctxt->es_base, 1540 ctxt->es_base,
1506 c->regs[VCPU_REGS_RDI]); 1541 c->regs[VCPU_REGS_RDI]);
1507 if ((rc = ops->read_emulated(register_address( 1542 if ((rc = ops->read_emulated(register_address(c,
1508 c->override_base ? *c->override_base : 1543 c->override_base ? *c->override_base :
1509 ctxt->ds_base, 1544 ctxt->ds_base,
1510 c->regs[VCPU_REGS_RSI]), 1545 c->regs[VCPU_REGS_RSI]),
1511 &c->dst.val, 1546 &c->dst.val,
1512 c->dst.bytes, ctxt->vcpu)) != 0) 1547 c->dst.bytes, ctxt->vcpu)) != 0)
1513 goto done; 1548 goto done;
1514 register_address_increment(c->regs[VCPU_REGS_RSI], 1549 register_address_increment(c, &c->regs[VCPU_REGS_RSI],
1515 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes 1550 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
1516 : c->dst.bytes); 1551 : c->dst.bytes);
1517 register_address_increment(c->regs[VCPU_REGS_RDI], 1552 register_address_increment(c, &c->regs[VCPU_REGS_RDI],
1518 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes 1553 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
1519 : c->dst.bytes); 1554 : c->dst.bytes);
1520 break; 1555 break;
1521 case 0xa6 ... 0xa7: /* cmps */ 1556 case 0xa6 ... 0xa7: /* cmps */
1522 c->src.type = OP_NONE; /* Disable writeback. */ 1557 c->src.type = OP_NONE; /* Disable writeback. */
1523 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1558 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1524 c->src.ptr = (unsigned long *)register_address( 1559 c->src.ptr = (unsigned long *)register_address(c,
1525 c->override_base ? *c->override_base : 1560 c->override_base ? *c->override_base :
1526 ctxt->ds_base, 1561 ctxt->ds_base,
1527 c->regs[VCPU_REGS_RSI]); 1562 c->regs[VCPU_REGS_RSI]);
@@ -1533,7 +1568,7 @@ special_insn:
1533 1568
1534 c->dst.type = OP_NONE; /* Disable writeback. */ 1569 c->dst.type = OP_NONE; /* Disable writeback. */
1535 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1570 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1536 c->dst.ptr = (unsigned long *)register_address( 1571 c->dst.ptr = (unsigned long *)register_address(c,
1537 ctxt->es_base, 1572 ctxt->es_base,
1538 c->regs[VCPU_REGS_RDI]); 1573 c->regs[VCPU_REGS_RDI]);
1539 if ((rc = ops->read_emulated((unsigned long)c->dst.ptr, 1574 if ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
@@ -1546,10 +1581,10 @@ special_insn:
1546 1581
1547 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); 1582 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
1548 1583
1549 register_address_increment(c->regs[VCPU_REGS_RSI], 1584 register_address_increment(c, &c->regs[VCPU_REGS_RSI],
1550 (ctxt->eflags & EFLG_DF) ? -c->src.bytes 1585 (ctxt->eflags & EFLG_DF) ? -c->src.bytes
1551 : c->src.bytes); 1586 : c->src.bytes);
1552 register_address_increment(c->regs[VCPU_REGS_RDI], 1587 register_address_increment(c, &c->regs[VCPU_REGS_RDI],
1553 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes 1588 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
1554 : c->dst.bytes); 1589 : c->dst.bytes);
1555 1590
@@ -1557,11 +1592,11 @@ special_insn:
1557 case 0xaa ... 0xab: /* stos */ 1592 case 0xaa ... 0xab: /* stos */
1558 c->dst.type = OP_MEM; 1593 c->dst.type = OP_MEM;
1559 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1594 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1560 c->dst.ptr = (unsigned long *)register_address( 1595 c->dst.ptr = (unsigned long *)register_address(c,
1561 ctxt->es_base, 1596 ctxt->es_base,
1562 c->regs[VCPU_REGS_RDI]); 1597 c->regs[VCPU_REGS_RDI]);
1563 c->dst.val = c->regs[VCPU_REGS_RAX]; 1598 c->dst.val = c->regs[VCPU_REGS_RAX];
1564 register_address_increment(c->regs[VCPU_REGS_RDI], 1599 register_address_increment(c, &c->regs[VCPU_REGS_RDI],
1565 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes 1600 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
1566 : c->dst.bytes); 1601 : c->dst.bytes);
1567 break; 1602 break;
@@ -1569,7 +1604,7 @@ special_insn:
1569 c->dst.type = OP_REG; 1604 c->dst.type = OP_REG;
1570 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1605 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1571 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; 1606 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
1572 if ((rc = ops->read_emulated(register_address( 1607 if ((rc = ops->read_emulated(register_address(c,
1573 c->override_base ? *c->override_base : 1608 c->override_base ? *c->override_base :
1574 ctxt->ds_base, 1609 ctxt->ds_base,
1575 c->regs[VCPU_REGS_RSI]), 1610 c->regs[VCPU_REGS_RSI]),
@@ -1577,7 +1612,7 @@ special_insn:
1577 c->dst.bytes, 1612 c->dst.bytes,
1578 ctxt->vcpu)) != 0) 1613 ctxt->vcpu)) != 0)
1579 goto done; 1614 goto done;
1580 register_address_increment(c->regs[VCPU_REGS_RSI], 1615 register_address_increment(c, &c->regs[VCPU_REGS_RSI],
1581 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes 1616 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
1582 : c->dst.bytes); 1617 : c->dst.bytes);
1583 break; 1618 break;
@@ -1616,14 +1651,14 @@ special_insn:
1616 goto cannot_emulate; 1651 goto cannot_emulate;
1617 } 1652 }
1618 c->src.val = (unsigned long) c->eip; 1653 c->src.val = (unsigned long) c->eip;
1619 JMP_REL(rel); 1654 jmp_rel(c, rel);
1620 c->op_bytes = c->ad_bytes; 1655 c->op_bytes = c->ad_bytes;
1621 emulate_push(ctxt); 1656 emulate_push(ctxt);
1622 break; 1657 break;
1623 } 1658 }
1624 case 0xe9: /* jmp rel */ 1659 case 0xe9: /* jmp rel */
1625 case 0xeb: /* jmp rel short */ 1660 case 0xeb: /* jmp rel short */
1626 JMP_REL(c->src.val); 1661 jmp_rel(c, c->src.val);
1627 c->dst.type = OP_NONE; /* Disable writeback. */ 1662 c->dst.type = OP_NONE; /* Disable writeback. */
1628 break; 1663 break;
1629 case 0xf4: /* hlt */ 1664 case 0xf4: /* hlt */
@@ -1690,6 +1725,8 @@ twobyte_insn:
1690 goto done; 1725 goto done;
1691 1726
1692 kvm_emulate_hypercall(ctxt->vcpu); 1727 kvm_emulate_hypercall(ctxt->vcpu);
1728 /* Disable writeback. */
1729 c->dst.type = OP_NONE;
1693 break; 1730 break;
1694 case 2: /* lgdt */ 1731 case 2: /* lgdt */
1695 rc = read_descriptor(ctxt, ops, c->src.ptr, 1732 rc = read_descriptor(ctxt, ops, c->src.ptr,
@@ -1697,6 +1734,8 @@ twobyte_insn:
1697 if (rc) 1734 if (rc)
1698 goto done; 1735 goto done;
1699 realmode_lgdt(ctxt->vcpu, size, address); 1736 realmode_lgdt(ctxt->vcpu, size, address);
1737 /* Disable writeback. */
1738 c->dst.type = OP_NONE;
1700 break; 1739 break;
1701 case 3: /* lidt/vmmcall */ 1740 case 3: /* lidt/vmmcall */
1702 if (c->modrm_mod == 3 && c->modrm_rm == 1) { 1741 if (c->modrm_mod == 3 && c->modrm_rm == 1) {
@@ -1712,27 +1751,25 @@ twobyte_insn:
1712 goto done; 1751 goto done;
1713 realmode_lidt(ctxt->vcpu, size, address); 1752 realmode_lidt(ctxt->vcpu, size, address);
1714 } 1753 }
1754 /* Disable writeback. */
1755 c->dst.type = OP_NONE;
1715 break; 1756 break;
1716 case 4: /* smsw */ 1757 case 4: /* smsw */
1717 if (c->modrm_mod != 3) 1758 c->dst.bytes = 2;
1718 goto cannot_emulate; 1759 c->dst.val = realmode_get_cr(ctxt->vcpu, 0);
1719 *(u16 *)&c->regs[c->modrm_rm]
1720 = realmode_get_cr(ctxt->vcpu, 0);
1721 break; 1760 break;
1722 case 6: /* lmsw */ 1761 case 6: /* lmsw */
1723 if (c->modrm_mod != 3) 1762 realmode_lmsw(ctxt->vcpu, (u16)c->src.val,
1724 goto cannot_emulate; 1763 &ctxt->eflags);
1725 realmode_lmsw(ctxt->vcpu, (u16)c->modrm_val,
1726 &ctxt->eflags);
1727 break; 1764 break;
1728 case 7: /* invlpg*/ 1765 case 7: /* invlpg*/
1729 emulate_invlpg(ctxt->vcpu, memop); 1766 emulate_invlpg(ctxt->vcpu, memop);
1767 /* Disable writeback. */
1768 c->dst.type = OP_NONE;
1730 break; 1769 break;
1731 default: 1770 default:
1732 goto cannot_emulate; 1771 goto cannot_emulate;
1733 } 1772 }
1734 /* Disable writeback. */
1735 c->dst.type = OP_NONE;
1736 break; 1773 break;
1737 case 0x06: 1774 case 0x06:
1738 emulate_clts(ctxt->vcpu); 1775 emulate_clts(ctxt->vcpu);
@@ -1823,7 +1860,7 @@ twobyte_insn:
1823 goto cannot_emulate; 1860 goto cannot_emulate;
1824 } 1861 }
1825 if (test_cc(c->b, ctxt->eflags)) 1862 if (test_cc(c->b, ctxt->eflags))
1826 JMP_REL(rel); 1863 jmp_rel(c, rel);
1827 c->dst.type = OP_NONE; 1864 c->dst.type = OP_NONE;
1828 break; 1865 break;
1829 } 1866 }
diff --git a/drivers/s390/Makefile b/drivers/s390/Makefile
index 5a888704a8d0..4f4e7cf105d4 100644
--- a/drivers/s390/Makefile
+++ b/drivers/s390/Makefile
@@ -5,7 +5,7 @@
5CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w 5CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w
6 6
7obj-y += s390mach.o sysinfo.o s390_rdev.o 7obj-y += s390mach.o sysinfo.o s390_rdev.o
8obj-y += cio/ block/ char/ crypto/ net/ scsi/ 8obj-y += cio/ block/ char/ crypto/ net/ scsi/ kvm/
9 9
10drivers-y += drivers/s390/built-in.o 10drivers-y += drivers/s390/built-in.o
11 11
diff --git a/drivers/s390/kvm/Makefile b/drivers/s390/kvm/Makefile
new file mode 100644
index 000000000000..4a5ec39f9ca6
--- /dev/null
+++ b/drivers/s390/kvm/Makefile
@@ -0,0 +1,9 @@
1# Makefile for kvm guest drivers on s390
2#
3# Copyright IBM Corp. 2008
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License (version 2 only)
7# as published by the Free Software Foundation.
8
9obj-$(CONFIG_VIRTIO) += kvm_virtio.o
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
new file mode 100644
index 000000000000..bbef3764fbf8
--- /dev/null
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -0,0 +1,338 @@
1/*
2 * kvm_virtio.c - virtio for kvm on s390
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
11 */
12
13#include <linux/init.h>
14#include <linux/bootmem.h>
15#include <linux/err.h>
16#include <linux/virtio.h>
17#include <linux/virtio_config.h>
18#include <linux/interrupt.h>
19#include <linux/virtio_ring.h>
20#include <asm/io.h>
21#include <asm/kvm_para.h>
22#include <asm/kvm_virtio.h>
23#include <asm/setup.h>
24#include <asm/s390_ext.h>
25
26#define VIRTIO_SUBCODE_64 0x0D00
27
28/*
29 * The pointer to our (page) of device descriptions.
30 */
31static void *kvm_devices;
32
33/*
34 * Unique numbering for kvm devices.
35 */
36static unsigned int dev_index;
37
38struct kvm_device {
39 struct virtio_device vdev;
40 struct kvm_device_desc *desc;
41};
42
43#define to_kvmdev(vd) container_of(vd, struct kvm_device, vdev)
44
45/*
46 * memory layout:
47 * - kvm_device_descriptor
48 * struct kvm_device_desc
49 * - configuration
50 * struct kvm_vqconfig
51 * - feature bits
52 * - config space
53 */
54static struct kvm_vqconfig *kvm_vq_config(const struct kvm_device_desc *desc)
55{
56 return (struct kvm_vqconfig *)(desc + 1);
57}
58
59static u8 *kvm_vq_features(const struct kvm_device_desc *desc)
60{
61 return (u8 *)(kvm_vq_config(desc) + desc->num_vq);
62}
63
64static u8 *kvm_vq_configspace(const struct kvm_device_desc *desc)
65{
66 return kvm_vq_features(desc) + desc->feature_len * 2;
67}
68
69/*
70 * The total size of the config page used by this device (incl. desc)
71 */
72static unsigned desc_size(const struct kvm_device_desc *desc)
73{
74 return sizeof(*desc)
75 + desc->num_vq * sizeof(struct kvm_vqconfig)
76 + desc->feature_len * 2
77 + desc->config_len;
78}
79
80/*
81 * This tests (and acknowleges) a feature bit.
82 */
83static bool kvm_feature(struct virtio_device *vdev, unsigned fbit)
84{
85 struct kvm_device_desc *desc = to_kvmdev(vdev)->desc;
86 u8 *features;
87
88 if (fbit / 8 > desc->feature_len)
89 return false;
90
91 features = kvm_vq_features(desc);
92 if (!(features[fbit / 8] & (1 << (fbit % 8))))
93 return false;
94
95 /*
96 * We set the matching bit in the other half of the bitmap to tell the
97 * Host we want to use this feature.
98 */
99 features[desc->feature_len + fbit / 8] |= (1 << (fbit % 8));
100 return true;
101}
102
103/*
104 * Reading and writing elements in config space
105 */
106static void kvm_get(struct virtio_device *vdev, unsigned int offset,
107 void *buf, unsigned len)
108{
109 struct kvm_device_desc *desc = to_kvmdev(vdev)->desc;
110
111 BUG_ON(offset + len > desc->config_len);
112 memcpy(buf, kvm_vq_configspace(desc) + offset, len);
113}
114
115static void kvm_set(struct virtio_device *vdev, unsigned int offset,
116 const void *buf, unsigned len)
117{
118 struct kvm_device_desc *desc = to_kvmdev(vdev)->desc;
119
120 BUG_ON(offset + len > desc->config_len);
121 memcpy(kvm_vq_configspace(desc) + offset, buf, len);
122}
123
124/*
125 * The operations to get and set the status word just access
126 * the status field of the device descriptor. set_status will also
127 * make a hypercall to the host, to tell about status changes
128 */
129static u8 kvm_get_status(struct virtio_device *vdev)
130{
131 return to_kvmdev(vdev)->desc->status;
132}
133
134static void kvm_set_status(struct virtio_device *vdev, u8 status)
135{
136 BUG_ON(!status);
137 to_kvmdev(vdev)->desc->status = status;
138 kvm_hypercall1(KVM_S390_VIRTIO_SET_STATUS,
139 (unsigned long) to_kvmdev(vdev)->desc);
140}
141
142/*
143 * To reset the device, we use the KVM_VIRTIO_RESET hypercall, using the
144 * descriptor address. The Host will zero the status and all the
145 * features.
146 */
147static void kvm_reset(struct virtio_device *vdev)
148{
149 kvm_hypercall1(KVM_S390_VIRTIO_RESET,
150 (unsigned long) to_kvmdev(vdev)->desc);
151}
152
153/*
154 * When the virtio_ring code wants to notify the Host, it calls us here and we
155 * make a hypercall. We hand the address of the virtqueue so the Host
156 * knows which virtqueue we're talking about.
157 */
158static void kvm_notify(struct virtqueue *vq)
159{
160 struct kvm_vqconfig *config = vq->priv;
161
162 kvm_hypercall1(KVM_S390_VIRTIO_NOTIFY, config->address);
163}
164
165/*
166 * This routine finds the first virtqueue described in the configuration of
167 * this device and sets it up.
168 */
169static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
170 unsigned index,
171 void (*callback)(struct virtqueue *vq))
172{
173 struct kvm_device *kdev = to_kvmdev(vdev);
174 struct kvm_vqconfig *config;
175 struct virtqueue *vq;
176 int err;
177
178 if (index >= kdev->desc->num_vq)
179 return ERR_PTR(-ENOENT);
180
181 config = kvm_vq_config(kdev->desc)+index;
182
183 if (add_shared_memory(config->address,
184 vring_size(config->num, PAGE_SIZE))) {
185 err = -ENOMEM;
186 goto out;
187 }
188
189 vq = vring_new_virtqueue(config->num, vdev, (void *) config->address,
190 kvm_notify, callback);
191 if (!vq) {
192 err = -ENOMEM;
193 goto unmap;
194 }
195
196 /*
197 * register a callback token
198 * The host will sent this via the external interrupt parameter
199 */
200 config->token = (u64) vq;
201
202 vq->priv = config;
203 return vq;
204unmap:
205 remove_shared_memory(config->address, vring_size(config->num,
206 PAGE_SIZE));
207out:
208 return ERR_PTR(err);
209}
210
211static void kvm_del_vq(struct virtqueue *vq)
212{
213 struct kvm_vqconfig *config = vq->priv;
214
215 vring_del_virtqueue(vq);
216 remove_shared_memory(config->address,
217 vring_size(config->num, PAGE_SIZE));
218}
219
220/*
221 * The config ops structure as defined by virtio config
222 */
223static struct virtio_config_ops kvm_vq_configspace_ops = {
224 .feature = kvm_feature,
225 .get = kvm_get,
226 .set = kvm_set,
227 .get_status = kvm_get_status,
228 .set_status = kvm_set_status,
229 .reset = kvm_reset,
230 .find_vq = kvm_find_vq,
231 .del_vq = kvm_del_vq,
232};
233
234/*
235 * The root device for the kvm virtio devices.
236 * This makes them appear as /sys/devices/kvm_s390/0,1,2 not /sys/devices/0,1,2.
237 */
238static struct device kvm_root = {
239 .parent = NULL,
240 .bus_id = "kvm_s390",
241};
242
243/*
244 * adds a new device and register it with virtio
245 * appropriate drivers are loaded by the device model
246 */
247static void add_kvm_device(struct kvm_device_desc *d)
248{
249 struct kvm_device *kdev;
250
251 kdev = kzalloc(sizeof(*kdev), GFP_KERNEL);
252 if (!kdev) {
253 printk(KERN_EMERG "Cannot allocate kvm dev %u\n",
254 dev_index++);
255 return;
256 }
257
258 kdev->vdev.dev.parent = &kvm_root;
259 kdev->vdev.index = dev_index++;
260 kdev->vdev.id.device = d->type;
261 kdev->vdev.config = &kvm_vq_configspace_ops;
262 kdev->desc = d;
263
264 if (register_virtio_device(&kdev->vdev) != 0) {
265 printk(KERN_ERR "Failed to register kvm device %u\n",
266 kdev->vdev.index);
267 kfree(kdev);
268 }
269}
270
271/*
272 * scan_devices() simply iterates through the device page.
273 * The type 0 is reserved to mean "end of devices".
274 */
275static void scan_devices(void)
276{
277 unsigned int i;
278 struct kvm_device_desc *d;
279
280 for (i = 0; i < PAGE_SIZE; i += desc_size(d)) {
281 d = kvm_devices + i;
282
283 if (d->type == 0)
284 break;
285
286 add_kvm_device(d);
287 }
288}
289
290/*
291 * we emulate the request_irq behaviour on top of s390 extints
292 */
293static void kvm_extint_handler(u16 code)
294{
295 void *data = (void *) *(long *) __LC_PFAULT_INTPARM;
296 u16 subcode = S390_lowcore.cpu_addr;
297
298 if ((subcode & 0xff00) != VIRTIO_SUBCODE_64)
299 return;
300
301 vring_interrupt(0, data);
302}
303
304/*
305 * Init function for virtio
306 * devices are in a single page above top of "normal" mem
307 */
308static int __init kvm_devices_init(void)
309{
310 int rc;
311
312 if (!MACHINE_IS_KVM)
313 return -ENODEV;
314
315 rc = device_register(&kvm_root);
316 if (rc) {
317 printk(KERN_ERR "Could not register kvm_s390 root device");
318 return rc;
319 }
320
321 if (add_shared_memory((max_pfn) << PAGE_SHIFT, PAGE_SIZE)) {
322 device_unregister(&kvm_root);
323 return -ENOMEM;
324 }
325
326 kvm_devices = (void *) (max_pfn << PAGE_SHIFT);
327
328 ctl_set_bit(0, 9);
329 register_external_interrupt(0x2603, kvm_extint_handler);
330
331 scan_devices();
332 return 0;
333}
334
335/*
336 * We do this after core stuff, but before the drivers.
337 */
338postcore_initcall(kvm_devices_init);
diff --git a/include/asm-ia64/gcc_intrin.h b/include/asm-ia64/gcc_intrin.h
index de2ed2cbdd84..2fe292c275fe 100644
--- a/include/asm-ia64/gcc_intrin.h
+++ b/include/asm-ia64/gcc_intrin.h
@@ -21,6 +21,10 @@
21 21
22#define ia64_invala_fr(regnum) asm volatile ("invala.e f%0" :: "i"(regnum)) 22#define ia64_invala_fr(regnum) asm volatile ("invala.e f%0" :: "i"(regnum))
23 23
24#define ia64_flushrs() asm volatile ("flushrs;;":::"memory")
25
26#define ia64_loadrs() asm volatile ("loadrs;;":::"memory")
27
24extern void ia64_bad_param_for_setreg (void); 28extern void ia64_bad_param_for_setreg (void);
25extern void ia64_bad_param_for_getreg (void); 29extern void ia64_bad_param_for_getreg (void);
26 30
@@ -517,6 +521,14 @@ do { \
517#define ia64_ptrd(addr, size) \ 521#define ia64_ptrd(addr, size) \
518 asm volatile ("ptr.d %0,%1" :: "r"(addr), "r"(size) : "memory") 522 asm volatile ("ptr.d %0,%1" :: "r"(addr), "r"(size) : "memory")
519 523
524#define ia64_ttag(addr) \
525({ \
526 __u64 ia64_intri_res; \
527 asm volatile ("ttag %0=%1" : "=r"(ia64_intri_res) : "r" (addr)); \
528 ia64_intri_res; \
529})
530
531
520/* Values for lfhint in ia64_lfetch and ia64_lfetch_fault */ 532/* Values for lfhint in ia64_lfetch and ia64_lfetch_fault */
521 533
522#define ia64_lfhint_none 0 534#define ia64_lfhint_none 0
diff --git a/include/asm-ia64/kvm.h b/include/asm-ia64/kvm.h
index 030d29b4b26b..eb2d3559d089 100644
--- a/include/asm-ia64/kvm.h
+++ b/include/asm-ia64/kvm.h
@@ -1,6 +1,205 @@
1#ifndef __LINUX_KVM_IA64_H 1#ifndef __ASM_IA64_KVM_H
2#define __LINUX_KVM_IA64_H 2#define __ASM_IA64_KVM_H
3 3
4/* ia64 does not support KVM */ 4/*
5 * asm-ia64/kvm.h: kvm structure definitions for ia64
6 *
7 * Copyright (C) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms and conditions of the GNU General Public License,
11 * version 2, as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
20 * Place - Suite 330, Boston, MA 02111-1307 USA.
21 *
22 */
23
24#include <asm/types.h>
25#include <asm/fpu.h>
26
27#include <linux/ioctl.h>
28
29/* Architectural interrupt line count. */
30#define KVM_NR_INTERRUPTS 256
31
32#define KVM_IOAPIC_NUM_PINS 24
33
34struct kvm_ioapic_state {
35 __u64 base_address;
36 __u32 ioregsel;
37 __u32 id;
38 __u32 irr;
39 __u32 pad;
40 union {
41 __u64 bits;
42 struct {
43 __u8 vector;
44 __u8 delivery_mode:3;
45 __u8 dest_mode:1;
46 __u8 delivery_status:1;
47 __u8 polarity:1;
48 __u8 remote_irr:1;
49 __u8 trig_mode:1;
50 __u8 mask:1;
51 __u8 reserve:7;
52 __u8 reserved[4];
53 __u8 dest_id;
54 } fields;
55 } redirtbl[KVM_IOAPIC_NUM_PINS];
56};
57
58#define KVM_IRQCHIP_PIC_MASTER 0
59#define KVM_IRQCHIP_PIC_SLAVE 1
60#define KVM_IRQCHIP_IOAPIC 2
61
62#define KVM_CONTEXT_SIZE 8*1024
63
64union context {
65 /* 8K size */
66 char dummy[KVM_CONTEXT_SIZE];
67 struct {
68 unsigned long psr;
69 unsigned long pr;
70 unsigned long caller_unat;
71 unsigned long pad;
72 unsigned long gr[32];
73 unsigned long ar[128];
74 unsigned long br[8];
75 unsigned long cr[128];
76 unsigned long rr[8];
77 unsigned long ibr[8];
78 unsigned long dbr[8];
79 unsigned long pkr[8];
80 struct ia64_fpreg fr[128];
81 };
82};
83
84struct thash_data {
85 union {
86 struct {
87 unsigned long p : 1; /* 0 */
88 unsigned long rv1 : 1; /* 1 */
89 unsigned long ma : 3; /* 2-4 */
90 unsigned long a : 1; /* 5 */
91 unsigned long d : 1; /* 6 */
92 unsigned long pl : 2; /* 7-8 */
93 unsigned long ar : 3; /* 9-11 */
94 unsigned long ppn : 38; /* 12-49 */
95 unsigned long rv2 : 2; /* 50-51 */
96 unsigned long ed : 1; /* 52 */
97 unsigned long ig1 : 11; /* 53-63 */
98 };
99 struct {
100 unsigned long __rv1 : 53; /* 0-52 */
101 unsigned long contiguous : 1; /*53 */
102 unsigned long tc : 1; /* 54 TR or TC */
103 unsigned long cl : 1;
104 /* 55 I side or D side cache line */
105 unsigned long len : 4; /* 56-59 */
106 unsigned long io : 1; /* 60 entry is for io or not */
107 unsigned long nomap : 1;
108 /* 61 entry cann't be inserted into machine TLB.*/
109 unsigned long checked : 1;
110 /* 62 for VTLB/VHPT sanity check */
111 unsigned long invalid : 1;
112 /* 63 invalid entry */
113 };
114 unsigned long page_flags;
115 }; /* same for VHPT and TLB */
116
117 union {
118 struct {
119 unsigned long rv3 : 2;
120 unsigned long ps : 6;
121 unsigned long key : 24;
122 unsigned long rv4 : 32;
123 };
124 unsigned long itir;
125 };
126 union {
127 struct {
128 unsigned long ig2 : 12;
129 unsigned long vpn : 49;
130 unsigned long vrn : 3;
131 };
132 unsigned long ifa;
133 unsigned long vadr;
134 struct {
135 unsigned long tag : 63;
136 unsigned long ti : 1;
137 };
138 unsigned long etag;
139 };
140 union {
141 struct thash_data *next;
142 unsigned long rid;
143 unsigned long gpaddr;
144 };
145};
146
147#define NITRS 8
148#define NDTRS 8
149
150struct saved_vpd {
151 unsigned long vhpi;
152 unsigned long vgr[16];
153 unsigned long vbgr[16];
154 unsigned long vnat;
155 unsigned long vbnat;
156 unsigned long vcpuid[5];
157 unsigned long vpsr;
158 unsigned long vpr;
159 unsigned long vcr[128];
160};
161
162struct kvm_regs {
163 char *saved_guest;
164 char *saved_stack;
165 struct saved_vpd vpd;
166 /*Arch-regs*/
167 int mp_state;
168 unsigned long vmm_rr;
169 /* TR and TC. */
170 struct thash_data itrs[NITRS];
171 struct thash_data dtrs[NDTRS];
172 /* Bit is set if there is a tr/tc for the region. */
173 unsigned char itr_regions;
174 unsigned char dtr_regions;
175 unsigned char tc_regions;
176
177 char irq_check;
178 unsigned long saved_itc;
179 unsigned long itc_check;
180 unsigned long timer_check;
181 unsigned long timer_pending;
182 unsigned long last_itc;
183
184 unsigned long vrr[8];
185 unsigned long ibr[8];
186 unsigned long dbr[8];
187 unsigned long insvc[4]; /* Interrupt in service. */
188 unsigned long xtp;
189
190 unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */
191 unsigned long metaphysical_rr4; /* from kvm_arch (so is pinned) */
192 unsigned long metaphysical_saved_rr0; /* from kvm_arch */
193 unsigned long metaphysical_saved_rr4; /* from kvm_arch */
194 unsigned long fp_psr; /*used for lazy float register */
195 unsigned long saved_gp;
196 /*for phycial emulation */
197};
198
199struct kvm_sregs {
200};
201
202struct kvm_fpu {
203};
5 204
6#endif 205#endif
diff --git a/include/asm-ia64/kvm_host.h b/include/asm-ia64/kvm_host.h
new file mode 100644
index 000000000000..c082c208c1f3
--- /dev/null
+++ b/include/asm-ia64/kvm_host.h
@@ -0,0 +1,524 @@
1/*
2 * kvm_host.h: used for kvm module, and hold ia64-specific sections.
3 *
4 * Copyright (C) 2007, Intel Corporation.
5 *
6 * Xiantao Zhang <xiantao.zhang@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 * Place - Suite 330, Boston, MA 02111-1307 USA.
20 *
21 */
22
23#ifndef __ASM_KVM_HOST_H
24#define __ASM_KVM_HOST_H
25
26
27#include <linux/types.h>
28#include <linux/mm.h>
29#include <linux/kvm.h>
30#include <linux/kvm_para.h>
31#include <linux/kvm_types.h>
32
33#include <asm/pal.h>
34#include <asm/sal.h>
35
36#define KVM_MAX_VCPUS 4
37#define KVM_MEMORY_SLOTS 32
38/* memory slots that does not exposed to userspace */
39#define KVM_PRIVATE_MEM_SLOTS 4
40
41
42/* define exit reasons from vmm to kvm*/
43#define EXIT_REASON_VM_PANIC 0
44#define EXIT_REASON_MMIO_INSTRUCTION 1
45#define EXIT_REASON_PAL_CALL 2
46#define EXIT_REASON_SAL_CALL 3
47#define EXIT_REASON_SWITCH_RR6 4
48#define EXIT_REASON_VM_DESTROY 5
49#define EXIT_REASON_EXTERNAL_INTERRUPT 6
50#define EXIT_REASON_IPI 7
51#define EXIT_REASON_PTC_G 8
52
53/*Define vmm address space and vm data space.*/
54#define KVM_VMM_SIZE (16UL<<20)
55#define KVM_VMM_SHIFT 24
56#define KVM_VMM_BASE 0xD000000000000000UL
57#define VMM_SIZE (8UL<<20)
58
59/*
60 * Define vm_buffer, used by PAL Services, base address.
61 * Note: vmbuffer is in the VMM-BLOCK, the size must be < 8M
62 */
63#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE)
64#define KVM_VM_BUFFER_SIZE (8UL<<20)
65
66/*Define Virtual machine data layout.*/
67#define KVM_VM_DATA_SHIFT 24
68#define KVM_VM_DATA_SIZE (1UL << KVM_VM_DATA_SHIFT)
69#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VMM_SIZE)
70
71
72#define KVM_P2M_BASE KVM_VM_DATA_BASE
73#define KVM_P2M_OFS 0
74#define KVM_P2M_SIZE (8UL << 20)
75
76#define KVM_VHPT_BASE (KVM_P2M_BASE + KVM_P2M_SIZE)
77#define KVM_VHPT_OFS KVM_P2M_SIZE
78#define KVM_VHPT_BLOCK_SIZE (2UL << 20)
79#define VHPT_SHIFT 18
80#define VHPT_SIZE (1UL << VHPT_SHIFT)
81#define VHPT_NUM_ENTRIES (1<<(VHPT_SHIFT-5))
82
83#define KVM_VTLB_BASE (KVM_VHPT_BASE+KVM_VHPT_BLOCK_SIZE)
84#define KVM_VTLB_OFS (KVM_VHPT_OFS+KVM_VHPT_BLOCK_SIZE)
85#define KVM_VTLB_BLOCK_SIZE (1UL<<20)
86#define VTLB_SHIFT 17
87#define VTLB_SIZE (1UL<<VTLB_SHIFT)
88#define VTLB_NUM_ENTRIES (1<<(VTLB_SHIFT-5))
89
90#define KVM_VPD_BASE (KVM_VTLB_BASE+KVM_VTLB_BLOCK_SIZE)
91#define KVM_VPD_OFS (KVM_VTLB_OFS+KVM_VTLB_BLOCK_SIZE)
92#define KVM_VPD_BLOCK_SIZE (2UL<<20)
93#define VPD_SHIFT 16
94#define VPD_SIZE (1UL<<VPD_SHIFT)
95
96#define KVM_VCPU_BASE (KVM_VPD_BASE+KVM_VPD_BLOCK_SIZE)
97#define KVM_VCPU_OFS (KVM_VPD_OFS+KVM_VPD_BLOCK_SIZE)
98#define KVM_VCPU_BLOCK_SIZE (2UL<<20)
99#define VCPU_SHIFT 18
100#define VCPU_SIZE (1UL<<VCPU_SHIFT)
101#define MAX_VCPU_NUM KVM_VCPU_BLOCK_SIZE/VCPU_SIZE
102
103#define KVM_VM_BASE (KVM_VCPU_BASE+KVM_VCPU_BLOCK_SIZE)
104#define KVM_VM_OFS (KVM_VCPU_OFS+KVM_VCPU_BLOCK_SIZE)
105#define KVM_VM_BLOCK_SIZE (1UL<<19)
106
107#define KVM_MEM_DIRTY_LOG_BASE (KVM_VM_BASE+KVM_VM_BLOCK_SIZE)
108#define KVM_MEM_DIRTY_LOG_OFS (KVM_VM_OFS+KVM_VM_BLOCK_SIZE)
109#define KVM_MEM_DIRTY_LOG_SIZE (1UL<<19)
110
111/* Get vpd, vhpt, tlb, vcpu, base*/
112#define VPD_ADDR(n) (KVM_VPD_BASE+n*VPD_SIZE)
113#define VHPT_ADDR(n) (KVM_VHPT_BASE+n*VHPT_SIZE)
114#define VTLB_ADDR(n) (KVM_VTLB_BASE+n*VTLB_SIZE)
115#define VCPU_ADDR(n) (KVM_VCPU_BASE+n*VCPU_SIZE)
116
117/*IO section definitions*/
118#define IOREQ_READ 1
119#define IOREQ_WRITE 0
120
121#define STATE_IOREQ_NONE 0
122#define STATE_IOREQ_READY 1
123#define STATE_IOREQ_INPROCESS 2
124#define STATE_IORESP_READY 3
125
126/*Guest Physical address layout.*/
127#define GPFN_MEM (0UL << 60) /* Guest pfn is normal mem */
128#define GPFN_FRAME_BUFFER (1UL << 60) /* VGA framebuffer */
129#define GPFN_LOW_MMIO (2UL << 60) /* Low MMIO range */
130#define GPFN_PIB (3UL << 60) /* PIB base */
131#define GPFN_IOSAPIC (4UL << 60) /* IOSAPIC base */
132#define GPFN_LEGACY_IO (5UL << 60) /* Legacy I/O base */
133#define GPFN_GFW (6UL << 60) /* Guest Firmware */
134#define GPFN_HIGH_MMIO (7UL << 60) /* High MMIO range */
135
136#define GPFN_IO_MASK (7UL << 60) /* Guest pfn is I/O type */
137#define GPFN_INV_MASK (1UL << 63) /* Guest pfn is invalid */
138#define INVALID_MFN (~0UL)
139#define MEM_G (1UL << 30)
140#define MEM_M (1UL << 20)
141#define MMIO_START (3 * MEM_G)
142#define MMIO_SIZE (512 * MEM_M)
143#define VGA_IO_START 0xA0000UL
144#define VGA_IO_SIZE 0x20000
145#define LEGACY_IO_START (MMIO_START + MMIO_SIZE)
146#define LEGACY_IO_SIZE (64 * MEM_M)
147#define IO_SAPIC_START 0xfec00000UL
148#define IO_SAPIC_SIZE 0x100000
149#define PIB_START 0xfee00000UL
150#define PIB_SIZE 0x200000
151#define GFW_START (4 * MEM_G - 16 * MEM_M)
152#define GFW_SIZE (16 * MEM_M)
153
154/*Deliver mode, defined for ioapic.c*/
155#define dest_Fixed IOSAPIC_FIXED
156#define dest_LowestPrio IOSAPIC_LOWEST_PRIORITY
157
158#define NMI_VECTOR 2
159#define ExtINT_VECTOR 0
160#define NULL_VECTOR (-1)
161#define IA64_SPURIOUS_INT_VECTOR 0x0f
162
163#define VCPU_LID(v) (((u64)(v)->vcpu_id) << 24)
164
165/*
166 *Delivery mode
167 */
168#define SAPIC_DELIV_SHIFT 8
169#define SAPIC_FIXED 0x0
170#define SAPIC_LOWEST_PRIORITY 0x1
171#define SAPIC_PMI 0x2
172#define SAPIC_NMI 0x4
173#define SAPIC_INIT 0x5
174#define SAPIC_EXTINT 0x7
175
176/*
177 * vcpu->requests bit members for arch
178 */
179#define KVM_REQ_PTC_G 32
180#define KVM_REQ_RESUME 33
181
182#define KVM_PAGES_PER_HPAGE 1
183
184struct kvm;
185struct kvm_vcpu;
186struct kvm_guest_debug{
187};
188
189struct kvm_mmio_req {
190 uint64_t addr; /* physical address */
191 uint64_t size; /* size in bytes */
192 uint64_t data; /* data (or paddr of data) */
193 uint8_t state:4;
194 uint8_t dir:1; /* 1=read, 0=write */
195};
196
197/*Pal data struct */
198struct kvm_pal_call{
199 /*In area*/
200 uint64_t gr28;
201 uint64_t gr29;
202 uint64_t gr30;
203 uint64_t gr31;
204 /*Out area*/
205 struct ia64_pal_retval ret;
206};
207
208/* Sal data structure */
209struct kvm_sal_call{
210 /*In area*/
211 uint64_t in0;
212 uint64_t in1;
213 uint64_t in2;
214 uint64_t in3;
215 uint64_t in4;
216 uint64_t in5;
217 uint64_t in6;
218 uint64_t in7;
219 struct sal_ret_values ret;
220};
221
222/*Guest change rr6*/
223struct kvm_switch_rr6 {
224 uint64_t old_rr;
225 uint64_t new_rr;
226};
227
228union ia64_ipi_a{
229 unsigned long val;
230 struct {
231 unsigned long rv : 3;
232 unsigned long ir : 1;
233 unsigned long eid : 8;
234 unsigned long id : 8;
235 unsigned long ib_base : 44;
236 };
237};
238
239union ia64_ipi_d {
240 unsigned long val;
241 struct {
242 unsigned long vector : 8;
243 unsigned long dm : 3;
244 unsigned long ig : 53;
245 };
246};
247
248/*ipi check exit data*/
249struct kvm_ipi_data{
250 union ia64_ipi_a addr;
251 union ia64_ipi_d data;
252};
253
254/*global purge data*/
255struct kvm_ptc_g {
256 unsigned long vaddr;
257 unsigned long rr;
258 unsigned long ps;
259 struct kvm_vcpu *vcpu;
260};
261
262/*Exit control data */
263struct exit_ctl_data{
264 uint32_t exit_reason;
265 uint32_t vm_status;
266 union {
267 struct kvm_mmio_req ioreq;
268 struct kvm_pal_call pal_data;
269 struct kvm_sal_call sal_data;
270 struct kvm_switch_rr6 rr_data;
271 struct kvm_ipi_data ipi_data;
272 struct kvm_ptc_g ptc_g_data;
273 } u;
274};
275
276union pte_flags {
277 unsigned long val;
278 struct {
279 unsigned long p : 1; /*0 */
280 unsigned long : 1; /* 1 */
281 unsigned long ma : 3; /* 2-4 */
282 unsigned long a : 1; /* 5 */
283 unsigned long d : 1; /* 6 */
284 unsigned long pl : 2; /* 7-8 */
285 unsigned long ar : 3; /* 9-11 */
286 unsigned long ppn : 38; /* 12-49 */
287 unsigned long : 2; /* 50-51 */
288 unsigned long ed : 1; /* 52 */
289 };
290};
291
292union ia64_pta {
293 unsigned long val;
294 struct {
295 unsigned long ve : 1;
296 unsigned long reserved0 : 1;
297 unsigned long size : 6;
298 unsigned long vf : 1;
299 unsigned long reserved1 : 6;
300 unsigned long base : 49;
301 };
302};
303
304struct thash_cb {
305 /* THASH base information */
306 struct thash_data *hash; /* hash table pointer */
307 union ia64_pta pta;
308 int num;
309};
310
311struct kvm_vcpu_stat {
312};
313
314struct kvm_vcpu_arch {
315 int launched;
316 int last_exit;
317 int last_run_cpu;
318 int vmm_tr_slot;
319 int vm_tr_slot;
320
321#define KVM_MP_STATE_RUNNABLE 0
322#define KVM_MP_STATE_UNINITIALIZED 1
323#define KVM_MP_STATE_INIT_RECEIVED 2
324#define KVM_MP_STATE_HALTED 3
325 int mp_state;
326
327#define MAX_PTC_G_NUM 3
328 int ptc_g_count;
329 struct kvm_ptc_g ptc_g_data[MAX_PTC_G_NUM];
330
331 /*halt timer to wake up sleepy vcpus*/
332 struct hrtimer hlt_timer;
333 long ht_active;
334
335 struct kvm_lapic *apic; /* kernel irqchip context */
336 struct vpd *vpd;
337
338 /* Exit data for vmm_transition*/
339 struct exit_ctl_data exit_data;
340
341 cpumask_t cache_coherent_map;
342
343 unsigned long vmm_rr;
344 unsigned long host_rr6;
345 unsigned long psbits[8];
346 unsigned long cr_iipa;
347 unsigned long cr_isr;
348 unsigned long vsa_base;
349 unsigned long dirty_log_lock_pa;
350 unsigned long __gp;
351 /* TR and TC. */
352 struct thash_data itrs[NITRS];
353 struct thash_data dtrs[NDTRS];
354 /* Bit is set if there is a tr/tc for the region. */
355 unsigned char itr_regions;
356 unsigned char dtr_regions;
357 unsigned char tc_regions;
358 /* purge all */
359 unsigned long ptce_base;
360 unsigned long ptce_count[2];
361 unsigned long ptce_stride[2];
362 /* itc/itm */
363 unsigned long last_itc;
364 long itc_offset;
365 unsigned long itc_check;
366 unsigned long timer_check;
367 unsigned long timer_pending;
368
369 unsigned long vrr[8];
370 unsigned long ibr[8];
371 unsigned long dbr[8];
372 unsigned long insvc[4]; /* Interrupt in service. */
373 unsigned long xtp;
374
375 unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */
376 unsigned long metaphysical_rr4; /* from kvm_arch (so is pinned) */
377 unsigned long metaphysical_saved_rr0; /* from kvm_arch */
378 unsigned long metaphysical_saved_rr4; /* from kvm_arch */
379 unsigned long fp_psr; /*used for lazy float register */
380 unsigned long saved_gp;
381 /*for phycial emulation */
382 int mode_flags;
383 struct thash_cb vtlb;
384 struct thash_cb vhpt;
385 char irq_check;
386 char irq_new_pending;
387
388 unsigned long opcode;
389 unsigned long cause;
390 union context host;
391 union context guest;
392};
393
394struct kvm_vm_stat {
395 u64 remote_tlb_flush;
396};
397
398struct kvm_sal_data {
399 unsigned long boot_ip;
400 unsigned long boot_gp;
401};
402
403struct kvm_arch {
404 unsigned long vm_base;
405 unsigned long metaphysical_rr0;
406 unsigned long metaphysical_rr4;
407 unsigned long vmm_init_rr;
408 unsigned long vhpt_base;
409 unsigned long vtlb_base;
410 unsigned long vpd_base;
411 spinlock_t dirty_log_lock;
412 struct kvm_ioapic *vioapic;
413 struct kvm_vm_stat stat;
414 struct kvm_sal_data rdv_sal_data;
415};
416
417union cpuid3_t {
418 u64 value;
419 struct {
420 u64 number : 8;
421 u64 revision : 8;
422 u64 model : 8;
423 u64 family : 8;
424 u64 archrev : 8;
425 u64 rv : 24;
426 };
427};
428
429struct kvm_pt_regs {
430 /* The following registers are saved by SAVE_MIN: */
431 unsigned long b6; /* scratch */
432 unsigned long b7; /* scratch */
433
434 unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */
435 unsigned long ar_ssd; /* reserved for future use (scratch) */
436
437 unsigned long r8; /* scratch (return value register 0) */
438 unsigned long r9; /* scratch (return value register 1) */
439 unsigned long r10; /* scratch (return value register 2) */
440 unsigned long r11; /* scratch (return value register 3) */
441
442 unsigned long cr_ipsr; /* interrupted task's psr */
443 unsigned long cr_iip; /* interrupted task's instruction pointer */
444 unsigned long cr_ifs; /* interrupted task's function state */
445
446 unsigned long ar_unat; /* interrupted task's NaT register (preserved) */
447 unsigned long ar_pfs; /* prev function state */
448 unsigned long ar_rsc; /* RSE configuration */
449 /* The following two are valid only if cr_ipsr.cpl > 0: */
450 unsigned long ar_rnat; /* RSE NaT */
451 unsigned long ar_bspstore; /* RSE bspstore */
452
453 unsigned long pr; /* 64 predicate registers (1 bit each) */
454 unsigned long b0; /* return pointer (bp) */
455 unsigned long loadrs; /* size of dirty partition << 16 */
456
457 unsigned long r1; /* the gp pointer */
458 unsigned long r12; /* interrupted task's memory stack pointer */
459 unsigned long r13; /* thread pointer */
460
461 unsigned long ar_fpsr; /* floating point status (preserved) */
462 unsigned long r15; /* scratch */
463
464 /* The remaining registers are NOT saved for system calls. */
465 unsigned long r14; /* scratch */
466 unsigned long r2; /* scratch */
467 unsigned long r3; /* scratch */
468 unsigned long r16; /* scratch */
469 unsigned long r17; /* scratch */
470 unsigned long r18; /* scratch */
471 unsigned long r19; /* scratch */
472 unsigned long r20; /* scratch */
473 unsigned long r21; /* scratch */
474 unsigned long r22; /* scratch */
475 unsigned long r23; /* scratch */
476 unsigned long r24; /* scratch */
477 unsigned long r25; /* scratch */
478 unsigned long r26; /* scratch */
479 unsigned long r27; /* scratch */
480 unsigned long r28; /* scratch */
481 unsigned long r29; /* scratch */
482 unsigned long r30; /* scratch */
483 unsigned long r31; /* scratch */
484 unsigned long ar_ccv; /* compare/exchange value (scratch) */
485
486 /*
487 * Floating point registers that the kernel considers scratch:
488 */
489 struct ia64_fpreg f6; /* scratch */
490 struct ia64_fpreg f7; /* scratch */
491 struct ia64_fpreg f8; /* scratch */
492 struct ia64_fpreg f9; /* scratch */
493 struct ia64_fpreg f10; /* scratch */
494 struct ia64_fpreg f11; /* scratch */
495
496 unsigned long r4; /* preserved */
497 unsigned long r5; /* preserved */
498 unsigned long r6; /* preserved */
499 unsigned long r7; /* preserved */
500 unsigned long eml_unat; /* used for emulating instruction */
501 unsigned long pad0; /* alignment pad */
502};
503
504static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v)
505{
506 return (struct kvm_pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
507}
508
509typedef int kvm_vmm_entry(void);
510typedef void kvm_tramp_entry(union context *host, union context *guest);
511
512struct kvm_vmm_info{
513 struct module *module;
514 kvm_vmm_entry *vmm_entry;
515 kvm_tramp_entry *tramp_entry;
516 unsigned long vmm_ivt;
517};
518
519int kvm_highest_pending_irq(struct kvm_vcpu *vcpu);
520int kvm_emulate_halt(struct kvm_vcpu *vcpu);
521int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
522void kvm_sal_emul(struct kvm_vcpu *vcpu);
523
524#endif
diff --git a/include/asm-ia64/kvm_para.h b/include/asm-ia64/kvm_para.h
new file mode 100644
index 000000000000..9f9796bb3441
--- /dev/null
+++ b/include/asm-ia64/kvm_para.h
@@ -0,0 +1,29 @@
1#ifndef __IA64_KVM_PARA_H
2#define __IA64_KVM_PARA_H
3
4/*
5 * asm-ia64/kvm_para.h
6 *
7 * Copyright (C) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms and conditions of the GNU General Public License,
11 * version 2, as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
20 * Place - Suite 330, Boston, MA 02111-1307 USA.
21 *
22 */
23
24static inline unsigned int kvm_arch_para_features(void)
25{
26 return 0;
27}
28
29#endif
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h
index 741f7ecb986a..6aff126fc07e 100644
--- a/include/asm-ia64/processor.h
+++ b/include/asm-ia64/processor.h
@@ -119,6 +119,69 @@ struct ia64_psr {
119 __u64 reserved4 : 19; 119 __u64 reserved4 : 19;
120}; 120};
121 121
122union ia64_isr {
123 __u64 val;
124 struct {
125 __u64 code : 16;
126 __u64 vector : 8;
127 __u64 reserved1 : 8;
128 __u64 x : 1;
129 __u64 w : 1;
130 __u64 r : 1;
131 __u64 na : 1;
132 __u64 sp : 1;
133 __u64 rs : 1;
134 __u64 ir : 1;
135 __u64 ni : 1;
136 __u64 so : 1;
137 __u64 ei : 2;
138 __u64 ed : 1;
139 __u64 reserved2 : 20;
140 };
141};
142
143union ia64_lid {
144 __u64 val;
145 struct {
146 __u64 rv : 16;
147 __u64 eid : 8;
148 __u64 id : 8;
149 __u64 ig : 32;
150 };
151};
152
153union ia64_tpr {
154 __u64 val;
155 struct {
156 __u64 ig0 : 4;
157 __u64 mic : 4;
158 __u64 rsv : 8;
159 __u64 mmi : 1;
160 __u64 ig1 : 47;
161 };
162};
163
164union ia64_itir {
165 __u64 val;
166 struct {
167 __u64 rv3 : 2; /* 0-1 */
168 __u64 ps : 6; /* 2-7 */
169 __u64 key : 24; /* 8-31 */
170 __u64 rv4 : 32; /* 32-63 */
171 };
172};
173
174union ia64_rr {
175 __u64 val;
176 struct {
177 __u64 ve : 1; /* enable hw walker */
178 __u64 reserved0: 1; /* reserved */
179 __u64 ps : 6; /* log page size */
180 __u64 rid : 24; /* region id */
181 __u64 reserved1: 32; /* reserved */
182 };
183};
184
122/* 185/*
123 * CPU type, hardware bug flags, and per-CPU state. Frequently used 186 * CPU type, hardware bug flags, and per-CPU state. Frequently used
124 * state comes earlier: 187 * state comes earlier:
diff --git a/include/asm-powerpc/kvm.h b/include/asm-powerpc/kvm.h
index d1b530fbf8dd..f993e4198d5c 100644
--- a/include/asm-powerpc/kvm.h
+++ b/include/asm-powerpc/kvm.h
@@ -1,6 +1,55 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2007
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
1#ifndef __LINUX_KVM_POWERPC_H 20#ifndef __LINUX_KVM_POWERPC_H
2#define __LINUX_KVM_POWERPC_H 21#define __LINUX_KVM_POWERPC_H
3 22
4/* powerpc does not support KVM */ 23#include <asm/types.h>
24
25struct kvm_regs {
26 __u64 pc;
27 __u64 cr;
28 __u64 ctr;
29 __u64 lr;
30 __u64 xer;
31 __u64 msr;
32 __u64 srr0;
33 __u64 srr1;
34 __u64 pid;
35
36 __u64 sprg0;
37 __u64 sprg1;
38 __u64 sprg2;
39 __u64 sprg3;
40 __u64 sprg4;
41 __u64 sprg5;
42 __u64 sprg6;
43 __u64 sprg7;
44
45 __u64 gpr[32];
46};
47
48struct kvm_sregs {
49};
50
51struct kvm_fpu {
52 __u64 fpr[32];
53};
5 54
6#endif 55#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/include/asm-powerpc/kvm_asm.h b/include/asm-powerpc/kvm_asm.h
new file mode 100644
index 000000000000..2197764796d9
--- /dev/null
+++ b/include/asm-powerpc/kvm_asm.h
@@ -0,0 +1,55 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#ifndef __POWERPC_KVM_ASM_H__
21#define __POWERPC_KVM_ASM_H__
22
23/* IVPR must be 64KiB-aligned. */
24#define VCPU_SIZE_ORDER 4
25#define VCPU_SIZE_LOG (VCPU_SIZE_ORDER + 12)
26#define VCPU_TLB_PGSZ PPC44x_TLB_64K
27#define VCPU_SIZE_BYTES (1<<VCPU_SIZE_LOG)
28
29#define BOOKE_INTERRUPT_CRITICAL 0
30#define BOOKE_INTERRUPT_MACHINE_CHECK 1
31#define BOOKE_INTERRUPT_DATA_STORAGE 2
32#define BOOKE_INTERRUPT_INST_STORAGE 3
33#define BOOKE_INTERRUPT_EXTERNAL 4
34#define BOOKE_INTERRUPT_ALIGNMENT 5
35#define BOOKE_INTERRUPT_PROGRAM 6
36#define BOOKE_INTERRUPT_FP_UNAVAIL 7
37#define BOOKE_INTERRUPT_SYSCALL 8
38#define BOOKE_INTERRUPT_AP_UNAVAIL 9
39#define BOOKE_INTERRUPT_DECREMENTER 10
40#define BOOKE_INTERRUPT_FIT 11
41#define BOOKE_INTERRUPT_WATCHDOG 12
42#define BOOKE_INTERRUPT_DTLB_MISS 13
43#define BOOKE_INTERRUPT_ITLB_MISS 14
44#define BOOKE_INTERRUPT_DEBUG 15
45#define BOOKE_MAX_INTERRUPT 15
46
47#define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */
48#define RESUME_FLAG_HOST (1<<1) /* Resume host? */
49
50#define RESUME_GUEST 0
51#define RESUME_GUEST_NV RESUME_FLAG_NV
52#define RESUME_HOST RESUME_FLAG_HOST
53#define RESUME_HOST_NV (RESUME_FLAG_HOST|RESUME_FLAG_NV)
54
55#endif /* __POWERPC_KVM_ASM_H__ */
diff --git a/include/asm-powerpc/kvm_host.h b/include/asm-powerpc/kvm_host.h
new file mode 100644
index 000000000000..04ffbb8e0a35
--- /dev/null
+++ b/include/asm-powerpc/kvm_host.h
@@ -0,0 +1,152 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2007
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#ifndef __POWERPC_KVM_HOST_H__
21#define __POWERPC_KVM_HOST_H__
22
23#include <linux/mutex.h>
24#include <linux/timer.h>
25#include <linux/types.h>
26#include <linux/kvm_types.h>
27#include <asm/kvm_asm.h>
28
29#define KVM_MAX_VCPUS 1
30#define KVM_MEMORY_SLOTS 32
31/* memory slots that does not exposed to userspace */
32#define KVM_PRIVATE_MEM_SLOTS 4
33
34/* We don't currently support large pages. */
35#define KVM_PAGES_PER_HPAGE (1<<31)
36
37struct kvm;
38struct kvm_run;
39struct kvm_vcpu;
40
41struct kvm_vm_stat {
42 u32 remote_tlb_flush;
43};
44
45struct kvm_vcpu_stat {
46 u32 sum_exits;
47 u32 mmio_exits;
48 u32 dcr_exits;
49 u32 signal_exits;
50 u32 light_exits;
51 /* Account for special types of light exits: */
52 u32 itlb_real_miss_exits;
53 u32 itlb_virt_miss_exits;
54 u32 dtlb_real_miss_exits;
55 u32 dtlb_virt_miss_exits;
56 u32 syscall_exits;
57 u32 isi_exits;
58 u32 dsi_exits;
59 u32 emulated_inst_exits;
60 u32 dec_exits;
61 u32 ext_intr_exits;
62};
63
64struct tlbe {
65 u32 tid; /* Only the low 8 bits are used. */
66 u32 word0;
67 u32 word1;
68 u32 word2;
69};
70
71struct kvm_arch {
72};
73
74struct kvm_vcpu_arch {
75 /* Unmodified copy of the guest's TLB. */
76 struct tlbe guest_tlb[PPC44x_TLB_SIZE];
77 /* TLB that's actually used when the guest is running. */
78 struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
79 /* Pages which are referenced in the shadow TLB. */
80 struct page *shadow_pages[PPC44x_TLB_SIZE];
81 /* Copy of the host's TLB. */
82 struct tlbe host_tlb[PPC44x_TLB_SIZE];
83
84 u32 host_stack;
85 u32 host_pid;
86
87 u64 fpr[32];
88 u32 gpr[32];
89
90 u32 pc;
91 u32 cr;
92 u32 ctr;
93 u32 lr;
94 u32 xer;
95
96 u32 msr;
97 u32 mmucr;
98 u32 sprg0;
99 u32 sprg1;
100 u32 sprg2;
101 u32 sprg3;
102 u32 sprg4;
103 u32 sprg5;
104 u32 sprg6;
105 u32 sprg7;
106 u32 srr0;
107 u32 srr1;
108 u32 csrr0;
109 u32 csrr1;
110 u32 dsrr0;
111 u32 dsrr1;
112 u32 dear;
113 u32 esr;
114 u32 dec;
115 u32 decar;
116 u32 tbl;
117 u32 tbu;
118 u32 tcr;
119 u32 tsr;
120 u32 ivor[16];
121 u32 ivpr;
122 u32 pir;
123 u32 pid;
124 u32 pvr;
125 u32 ccr0;
126 u32 ccr1;
127 u32 dbcr0;
128 u32 dbcr1;
129
130 u32 last_inst;
131 u32 fault_dear;
132 u32 fault_esr;
133 gpa_t paddr_accessed;
134
135 u8 io_gpr; /* GPR used as IO source/target */
136 u8 mmio_is_bigendian;
137 u8 dcr_needed;
138 u8 dcr_is_write;
139
140 u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
141
142 struct timer_list dec_timer;
143 unsigned long pending_exceptions;
144};
145
146struct kvm_guest_debug {
147 int enabled;
148 unsigned long bp[4];
149 int singlestep;
150};
151
152#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/include/asm-powerpc/kvm_para.h b/include/asm-powerpc/kvm_para.h
new file mode 100644
index 000000000000..2d48f6a63d0b
--- /dev/null
+++ b/include/asm-powerpc/kvm_para.h
@@ -0,0 +1,37 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#ifndef __POWERPC_KVM_PARA_H__
21#define __POWERPC_KVM_PARA_H__
22
23#ifdef __KERNEL__
24
25static inline int kvm_para_available(void)
26{
27 return 0;
28}
29
30static inline unsigned int kvm_arch_para_features(void)
31{
32 return 0;
33}
34
35#endif /* __KERNEL__ */
36
37#endif /* __POWERPC_KVM_PARA_H__ */
diff --git a/include/asm-powerpc/kvm_ppc.h b/include/asm-powerpc/kvm_ppc.h
new file mode 100644
index 000000000000..7ac820308a7e
--- /dev/null
+++ b/include/asm-powerpc/kvm_ppc.h
@@ -0,0 +1,88 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#ifndef __POWERPC_KVM_PPC_H__
21#define __POWERPC_KVM_PPC_H__
22
23/* This file exists just so we can dereference kvm_vcpu, avoiding nested header
24 * dependencies. */
25
26#include <linux/mutex.h>
27#include <linux/timer.h>
28#include <linux/types.h>
29#include <linux/kvm_types.h>
30#include <linux/kvm_host.h>
31
32struct kvm_tlb {
33 struct tlbe guest_tlb[PPC44x_TLB_SIZE];
34 struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
35};
36
37enum emulation_result {
38 EMULATE_DONE, /* no further processing */
39 EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */
40 EMULATE_DO_DCR, /* kvm_run filled with DCR request */
41 EMULATE_FAIL, /* can't emulate this instruction */
42};
43
44extern const unsigned char exception_priority[];
45extern const unsigned char priority_exception[];
46
47extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
48extern char kvmppc_handlers_start[];
49extern unsigned long kvmppc_handler_len;
50
51extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu);
52extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
53 unsigned int rt, unsigned int bytes,
54 int is_bigendian);
55extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
56 u32 val, unsigned int bytes, int is_bigendian);
57
58extern int kvmppc_emulate_instruction(struct kvm_run *run,
59 struct kvm_vcpu *vcpu);
60
61extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn,
62 u64 asid, u32 flags);
63extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, u64 eaddr, u64 asid);
64extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
65
66extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu);
67
68static inline void kvmppc_queue_exception(struct kvm_vcpu *vcpu, int exception)
69{
70 unsigned int priority = exception_priority[exception];
71 set_bit(priority, &vcpu->arch.pending_exceptions);
72}
73
74static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception)
75{
76 unsigned int priority = exception_priority[exception];
77 clear_bit(priority, &vcpu->arch.pending_exceptions);
78}
79
80static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
81{
82 if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
83 kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
84
85 vcpu->arch.msr = new_msr;
86}
87
88#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/include/asm-powerpc/mmu-44x.h b/include/asm-powerpc/mmu-44x.h
index c8b02d97f753..a825524c981a 100644
--- a/include/asm-powerpc/mmu-44x.h
+++ b/include/asm-powerpc/mmu-44x.h
@@ -53,6 +53,8 @@
53 53
54#ifndef __ASSEMBLY__ 54#ifndef __ASSEMBLY__
55 55
56extern unsigned int tlb_44x_hwater;
57
56typedef struct { 58typedef struct {
57 unsigned long id; 59 unsigned long id;
58 unsigned long vdso_base; 60 unsigned long vdso_base;
diff --git a/include/asm-s390/Kbuild b/include/asm-s390/Kbuild
index e92b429d2be1..13c9805349f1 100644
--- a/include/asm-s390/Kbuild
+++ b/include/asm-s390/Kbuild
@@ -7,6 +7,7 @@ header-y += tape390.h
7header-y += ucontext.h 7header-y += ucontext.h
8header-y += vtoc.h 8header-y += vtoc.h
9header-y += zcrypt.h 9header-y += zcrypt.h
10header-y += kvm.h
10 11
11unifdef-y += cmb.h 12unifdef-y += cmb.h
12unifdef-y += debug.h 13unifdef-y += debug.h
diff --git a/include/asm-s390/kvm.h b/include/asm-s390/kvm.h
index 573f2a351386..d74002f95794 100644
--- a/include/asm-s390/kvm.h
+++ b/include/asm-s390/kvm.h
@@ -1,6 +1,45 @@
1#ifndef __LINUX_KVM_S390_H 1#ifndef __LINUX_KVM_S390_H
2#define __LINUX_KVM_S390_H 2#define __LINUX_KVM_S390_H
3 3
4/* s390 does not support KVM */ 4/*
5 * asm-s390/kvm.h - KVM s390 specific structures and definitions
6 *
7 * Copyright IBM Corp. 2008
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License (version 2 only)
11 * as published by the Free Software Foundation.
12 *
13 * Author(s): Carsten Otte <cotte@de.ibm.com>
14 * Christian Borntraeger <borntraeger@de.ibm.com>
15 */
16#include <asm/types.h>
17
18/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
19struct kvm_pic_state {
20 /* no PIC for s390 */
21};
22
23struct kvm_ioapic_state {
24 /* no IOAPIC for s390 */
25};
26
27/* for KVM_GET_REGS and KVM_SET_REGS */
28struct kvm_regs {
29 /* general purpose regs for s390 */
30 __u64 gprs[16];
31};
32
33/* for KVM_GET_SREGS and KVM_SET_SREGS */
34struct kvm_sregs {
35 __u32 acrs[16];
36 __u64 crs[16];
37};
38
39/* for KVM_GET_FPU and KVM_SET_FPU */
40struct kvm_fpu {
41 __u32 fpc;
42 __u64 fprs[16];
43};
5 44
6#endif 45#endif
diff --git a/include/asm-s390/kvm_host.h b/include/asm-s390/kvm_host.h
new file mode 100644
index 000000000000..f8204a4f2e02
--- /dev/null
+++ b/include/asm-s390/kvm_host.h
@@ -0,0 +1,234 @@
1/*
2 * asm-s390/kvm_host.h - definition for kernel virtual machines on s390
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 */
12
13
14#ifndef ASM_KVM_HOST_H
15#define ASM_KVM_HOST_H
16#include <linux/kvm_host.h>
17#include <asm/debug.h>
18
19#define KVM_MAX_VCPUS 64
20#define KVM_MEMORY_SLOTS 32
21/* memory slots that does not exposed to userspace */
22#define KVM_PRIVATE_MEM_SLOTS 4
23
24struct kvm_guest_debug {
25};
26
27struct sca_entry {
28 atomic_t scn;
29 __u64 reserved;
30 __u64 sda;
31 __u64 reserved2[2];
32} __attribute__((packed));
33
34
35struct sca_block {
36 __u64 ipte_control;
37 __u64 reserved[5];
38 __u64 mcn;
39 __u64 reserved2;
40 struct sca_entry cpu[64];
41} __attribute__((packed));
42
43#define KVM_PAGES_PER_HPAGE 256
44
45#define CPUSTAT_HOST 0x80000000
46#define CPUSTAT_WAIT 0x10000000
47#define CPUSTAT_ECALL_PEND 0x08000000
48#define CPUSTAT_STOP_INT 0x04000000
49#define CPUSTAT_IO_INT 0x02000000
50#define CPUSTAT_EXT_INT 0x01000000
51#define CPUSTAT_RUNNING 0x00800000
52#define CPUSTAT_RETAINED 0x00400000
53#define CPUSTAT_TIMING_SUB 0x00020000
54#define CPUSTAT_SIE_SUB 0x00010000
55#define CPUSTAT_RRF 0x00008000
56#define CPUSTAT_SLSV 0x00004000
57#define CPUSTAT_SLSR 0x00002000
58#define CPUSTAT_ZARCH 0x00000800
59#define CPUSTAT_MCDS 0x00000100
60#define CPUSTAT_SM 0x00000080
61#define CPUSTAT_G 0x00000008
62#define CPUSTAT_J 0x00000002
63#define CPUSTAT_P 0x00000001
64
65struct sie_block {
66 atomic_t cpuflags; /* 0x0000 */
67 __u32 prefix; /* 0x0004 */
68 __u8 reserved8[32]; /* 0x0008 */
69 __u64 cputm; /* 0x0028 */
70 __u64 ckc; /* 0x0030 */
71 __u64 epoch; /* 0x0038 */
72 __u8 reserved40[4]; /* 0x0040 */
73#define LCTL_CR0 0x8000
74 __u16 lctl; /* 0x0044 */
75 __s16 icpua; /* 0x0046 */
76 __u32 ictl; /* 0x0048 */
77 __u32 eca; /* 0x004c */
78 __u8 icptcode; /* 0x0050 */
79 __u8 reserved51; /* 0x0051 */
80 __u16 ihcpu; /* 0x0052 */
81 __u8 reserved54[2]; /* 0x0054 */
82 __u16 ipa; /* 0x0056 */
83 __u32 ipb; /* 0x0058 */
84 __u32 scaoh; /* 0x005c */
85 __u8 reserved60; /* 0x0060 */
86 __u8 ecb; /* 0x0061 */
87 __u8 reserved62[2]; /* 0x0062 */
88 __u32 scaol; /* 0x0064 */
89 __u8 reserved68[4]; /* 0x0068 */
90 __u32 todpr; /* 0x006c */
91 __u8 reserved70[16]; /* 0x0070 */
92 __u64 gmsor; /* 0x0080 */
93 __u64 gmslm; /* 0x0088 */
94 psw_t gpsw; /* 0x0090 */
95 __u64 gg14; /* 0x00a0 */
96 __u64 gg15; /* 0x00a8 */
97 __u8 reservedb0[30]; /* 0x00b0 */
98 __u16 iprcc; /* 0x00ce */
99 __u8 reservedd0[48]; /* 0x00d0 */
100 __u64 gcr[16]; /* 0x0100 */
101 __u64 gbea; /* 0x0180 */
102 __u8 reserved188[120]; /* 0x0188 */
103} __attribute__((packed));
104
105struct kvm_vcpu_stat {
106 u32 exit_userspace;
107 u32 exit_external_request;
108 u32 exit_external_interrupt;
109 u32 exit_stop_request;
110 u32 exit_validity;
111 u32 exit_instruction;
112 u32 instruction_lctl;
113 u32 instruction_lctg;
114 u32 exit_program_interruption;
115 u32 exit_instr_and_program;
116 u32 deliver_emergency_signal;
117 u32 deliver_service_signal;
118 u32 deliver_virtio_interrupt;
119 u32 deliver_stop_signal;
120 u32 deliver_prefix_signal;
121 u32 deliver_restart_signal;
122 u32 deliver_program_int;
123 u32 exit_wait_state;
124 u32 instruction_stidp;
125 u32 instruction_spx;
126 u32 instruction_stpx;
127 u32 instruction_stap;
128 u32 instruction_storage_key;
129 u32 instruction_stsch;
130 u32 instruction_chsc;
131 u32 instruction_stsi;
132 u32 instruction_stfl;
133 u32 instruction_sigp_sense;
134 u32 instruction_sigp_emergency;
135 u32 instruction_sigp_stop;
136 u32 instruction_sigp_arch;
137 u32 instruction_sigp_prefix;
138 u32 instruction_sigp_restart;
139 u32 diagnose_44;
140};
141
142struct io_info {
143 __u16 subchannel_id; /* 0x0b8 */
144 __u16 subchannel_nr; /* 0x0ba */
145 __u32 io_int_parm; /* 0x0bc */
146 __u32 io_int_word; /* 0x0c0 */
147};
148
149struct ext_info {
150 __u32 ext_params;
151 __u64 ext_params2;
152};
153
154#define PGM_OPERATION 0x01
155#define PGM_PRIVILEGED_OPERATION 0x02
156#define PGM_EXECUTE 0x03
157#define PGM_PROTECTION 0x04
158#define PGM_ADDRESSING 0x05
159#define PGM_SPECIFICATION 0x06
160#define PGM_DATA 0x07
161
162struct pgm_info {
163 __u16 code;
164};
165
166struct prefix_info {
167 __u32 address;
168};
169
170struct interrupt_info {
171 struct list_head list;
172 u64 type;
173 union {
174 struct io_info io;
175 struct ext_info ext;
176 struct pgm_info pgm;
177 struct prefix_info prefix;
178 };
179};
180
181/* for local_interrupt.action_flags */
182#define ACTION_STORE_ON_STOP 1
183#define ACTION_STOP_ON_STOP 2
184
185struct local_interrupt {
186 spinlock_t lock;
187 struct list_head list;
188 atomic_t active;
189 struct float_interrupt *float_int;
190 int timer_due; /* event indicator for waitqueue below */
191 wait_queue_head_t wq;
192 atomic_t *cpuflags;
193 unsigned int action_bits;
194};
195
196struct float_interrupt {
197 spinlock_t lock;
198 struct list_head list;
199 atomic_t active;
200 int next_rr_cpu;
201 unsigned long idle_mask [(64 + sizeof(long) - 1) / sizeof(long)];
202 struct local_interrupt *local_int[64];
203};
204
205
206struct kvm_vcpu_arch {
207 struct sie_block *sie_block;
208 unsigned long guest_gprs[16];
209 s390_fp_regs host_fpregs;
210 unsigned int host_acrs[NUM_ACRS];
211 s390_fp_regs guest_fpregs;
212 unsigned int guest_acrs[NUM_ACRS];
213 struct local_interrupt local_int;
214 struct timer_list ckc_timer;
215 union {
216 cpuid_t cpu_id;
217 u64 stidp_data;
218 };
219};
220
221struct kvm_vm_stat {
222 u32 remote_tlb_flush;
223};
224
225struct kvm_arch{
226 unsigned long guest_origin;
227 unsigned long guest_memsize;
228 struct sca_block *sca;
229 debug_info_t *dbf;
230 struct float_interrupt float_int;
231};
232
233extern int sie64a(struct sie_block *, __u64 *);
234#endif
diff --git a/include/asm-s390/kvm_para.h b/include/asm-s390/kvm_para.h
new file mode 100644
index 000000000000..2c503796b619
--- /dev/null
+++ b/include/asm-s390/kvm_para.h
@@ -0,0 +1,150 @@
1/*
2 * asm-s390/kvm_para.h - definition for paravirtual devices on s390
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
11 */
12
13#ifndef __S390_KVM_PARA_H
14#define __S390_KVM_PARA_H
15
16/*
17 * Hypercalls for KVM on s390. The calling convention is similar to the
18 * s390 ABI, so we use R2-R6 for parameters 1-5. In addition we use R1
19 * as hypercall number and R7 as parameter 6. The return value is
20 * written to R2. We use the diagnose instruction as hypercall. To avoid
21 * conflicts with existing diagnoses for LPAR and z/VM, we do not use
22 * the instruction encoded number, but specify the number in R1 and
23 * use 0x500 as KVM hypercall
24 *
25 * Copyright IBM Corp. 2007,2008
26 * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
27 *
28 * This work is licensed under the terms of the GNU GPL, version 2.
29 */
30
31static inline long kvm_hypercall0(unsigned long nr)
32{
33 register unsigned long __nr asm("1") = nr;
34 register long __rc asm("2");
35
36 asm volatile ("diag 2,4,0x500\n"
37 : "=d" (__rc) : "d" (__nr): "memory", "cc");
38 return __rc;
39}
40
41static inline long kvm_hypercall1(unsigned long nr, unsigned long p1)
42{
43 register unsigned long __nr asm("1") = nr;
44 register unsigned long __p1 asm("2") = p1;
45 register long __rc asm("2");
46
47 asm volatile ("diag 2,4,0x500\n"
48 : "=d" (__rc) : "d" (__nr), "0" (__p1) : "memory", "cc");
49 return __rc;
50}
51
52static inline long kvm_hypercall2(unsigned long nr, unsigned long p1,
53 unsigned long p2)
54{
55 register unsigned long __nr asm("1") = nr;
56 register unsigned long __p1 asm("2") = p1;
57 register unsigned long __p2 asm("3") = p2;
58 register long __rc asm("2");
59
60 asm volatile ("diag 2,4,0x500\n"
61 : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2)
62 : "memory", "cc");
63 return __rc;
64}
65
66static inline long kvm_hypercall3(unsigned long nr, unsigned long p1,
67 unsigned long p2, unsigned long p3)
68{
69 register unsigned long __nr asm("1") = nr;
70 register unsigned long __p1 asm("2") = p1;
71 register unsigned long __p2 asm("3") = p2;
72 register unsigned long __p3 asm("4") = p3;
73 register long __rc asm("2");
74
75 asm volatile ("diag 2,4,0x500\n"
76 : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
77 "d" (__p3) : "memory", "cc");
78 return __rc;
79}
80
81
82static inline long kvm_hypercall4(unsigned long nr, unsigned long p1,
83 unsigned long p2, unsigned long p3,
84 unsigned long p4)
85{
86 register unsigned long __nr asm("1") = nr;
87 register unsigned long __p1 asm("2") = p1;
88 register unsigned long __p2 asm("3") = p2;
89 register unsigned long __p3 asm("4") = p3;
90 register unsigned long __p4 asm("5") = p4;
91 register long __rc asm("2");
92
93 asm volatile ("diag 2,4,0x500\n"
94 : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
95 "d" (__p3), "d" (__p4) : "memory", "cc");
96 return __rc;
97}
98
99static inline long kvm_hypercall5(unsigned long nr, unsigned long p1,
100 unsigned long p2, unsigned long p3,
101 unsigned long p4, unsigned long p5)
102{
103 register unsigned long __nr asm("1") = nr;
104 register unsigned long __p1 asm("2") = p1;
105 register unsigned long __p2 asm("3") = p2;
106 register unsigned long __p3 asm("4") = p3;
107 register unsigned long __p4 asm("5") = p4;
108 register unsigned long __p5 asm("6") = p5;
109 register long __rc asm("2");
110
111 asm volatile ("diag 2,4,0x500\n"
112 : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
113 "d" (__p3), "d" (__p4), "d" (__p5) : "memory", "cc");
114 return __rc;
115}
116
117static inline long kvm_hypercall6(unsigned long nr, unsigned long p1,
118 unsigned long p2, unsigned long p3,
119 unsigned long p4, unsigned long p5,
120 unsigned long p6)
121{
122 register unsigned long __nr asm("1") = nr;
123 register unsigned long __p1 asm("2") = p1;
124 register unsigned long __p2 asm("3") = p2;
125 register unsigned long __p3 asm("4") = p3;
126 register unsigned long __p4 asm("5") = p4;
127 register unsigned long __p5 asm("6") = p5;
128 register unsigned long __p6 asm("7") = p6;
129 register long __rc asm("2");
130
131 asm volatile ("diag 2,4,0x500\n"
132 : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
133 "d" (__p3), "d" (__p4), "d" (__p5), "d" (__p6)
134 : "memory", "cc");
135 return __rc;
136}
137
138/* kvm on s390 is always paravirtualization enabled */
139static inline int kvm_para_available(void)
140{
141 return 1;
142}
143
144/* No feature bits are currently assigned for kvm on s390 */
145static inline unsigned int kvm_arch_para_features(void)
146{
147 return 0;
148}
149
150#endif /* __S390_KVM_PARA_H */
diff --git a/include/asm-s390/kvm_virtio.h b/include/asm-s390/kvm_virtio.h
new file mode 100644
index 000000000000..5c871a990c29
--- /dev/null
+++ b/include/asm-s390/kvm_virtio.h
@@ -0,0 +1,53 @@
1/*
2 * kvm_virtio.h - definition for virtio for kvm on s390
3 *
4 * Copyright IBM Corp. 2008
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
11 */
12
13#ifndef __KVM_S390_VIRTIO_H
14#define __KVM_S390_VIRTIO_H
15
16#include <linux/types.h>
17
18struct kvm_device_desc {
19 /* The device type: console, network, disk etc. Type 0 terminates. */
20 __u8 type;
21 /* The number of virtqueues (first in config array) */
22 __u8 num_vq;
23 /*
24 * The number of bytes of feature bits. Multiply by 2: one for host
25 * features and one for guest acknowledgements.
26 */
27 __u8 feature_len;
28 /* The number of bytes of the config array after virtqueues. */
29 __u8 config_len;
30 /* A status byte, written by the Guest. */
31 __u8 status;
32 __u8 config[0];
33};
34
35/*
36 * This is how we expect the device configuration field for a virtqueue
37 * to be laid out in config space.
38 */
39struct kvm_vqconfig {
40 /* The token returned with an interrupt. Set by the guest */
41 __u64 token;
42 /* The address of the virtio ring */
43 __u64 address;
44 /* The number of entries in the virtio_ring */
45 __u16 num;
46
47};
48
49#define KVM_S390_VIRTIO_NOTIFY 0
50#define KVM_S390_VIRTIO_RESET 1
51#define KVM_S390_VIRTIO_SET_STATUS 2
52
53#endif
diff --git a/include/asm-s390/lowcore.h b/include/asm-s390/lowcore.h
index 5de3efb31445..0bc51d52a899 100644
--- a/include/asm-s390/lowcore.h
+++ b/include/asm-s390/lowcore.h
@@ -381,27 +381,32 @@ struct _lowcore
381 /* whether the kernel died with panic() or not */ 381 /* whether the kernel died with panic() or not */
382 __u32 panic_magic; /* 0xe00 */ 382 __u32 panic_magic; /* 0xe00 */
383 383
384 __u8 pad13[0x1200-0xe04]; /* 0xe04 */ 384 __u8 pad13[0x11b8-0xe04]; /* 0xe04 */
385
386 /* 64 bit extparam used for pfault, diag 250 etc */
387 __u64 ext_params2; /* 0x11B8 */
388
389 __u8 pad14[0x1200-0x11C0]; /* 0x11C0 */
385 390
386 /* System info area */ 391 /* System info area */
387 392
388 __u64 floating_pt_save_area[16]; /* 0x1200 */ 393 __u64 floating_pt_save_area[16]; /* 0x1200 */
389 __u64 gpregs_save_area[16]; /* 0x1280 */ 394 __u64 gpregs_save_area[16]; /* 0x1280 */
390 __u32 st_status_fixed_logout[4]; /* 0x1300 */ 395 __u32 st_status_fixed_logout[4]; /* 0x1300 */
391 __u8 pad14[0x1318-0x1310]; /* 0x1310 */ 396 __u8 pad15[0x1318-0x1310]; /* 0x1310 */
392 __u32 prefixreg_save_area; /* 0x1318 */ 397 __u32 prefixreg_save_area; /* 0x1318 */
393 __u32 fpt_creg_save_area; /* 0x131c */ 398 __u32 fpt_creg_save_area; /* 0x131c */
394 __u8 pad15[0x1324-0x1320]; /* 0x1320 */ 399 __u8 pad16[0x1324-0x1320]; /* 0x1320 */
395 __u32 tod_progreg_save_area; /* 0x1324 */ 400 __u32 tod_progreg_save_area; /* 0x1324 */
396 __u32 cpu_timer_save_area[2]; /* 0x1328 */ 401 __u32 cpu_timer_save_area[2]; /* 0x1328 */
397 __u32 clock_comp_save_area[2]; /* 0x1330 */ 402 __u32 clock_comp_save_area[2]; /* 0x1330 */
398 __u8 pad16[0x1340-0x1338]; /* 0x1338 */ 403 __u8 pad17[0x1340-0x1338]; /* 0x1338 */
399 __u32 access_regs_save_area[16]; /* 0x1340 */ 404 __u32 access_regs_save_area[16]; /* 0x1340 */
400 __u64 cregs_save_area[16]; /* 0x1380 */ 405 __u64 cregs_save_area[16]; /* 0x1380 */
401 406
402 /* align to the top of the prefix area */ 407 /* align to the top of the prefix area */
403 408
404 __u8 pad17[0x2000-0x1400]; /* 0x1400 */ 409 __u8 pad18[0x2000-0x1400]; /* 0x1400 */
405#endif /* !__s390x__ */ 410#endif /* !__s390x__ */
406} __attribute__((packed)); /* End structure*/ 411} __attribute__((packed)); /* End structure*/
407 412
diff --git a/include/asm-s390/mmu.h b/include/asm-s390/mmu.h
index 1698e29c5b20..5dd5e7b3476f 100644
--- a/include/asm-s390/mmu.h
+++ b/include/asm-s390/mmu.h
@@ -7,6 +7,7 @@ typedef struct {
7 unsigned long asce_bits; 7 unsigned long asce_bits;
8 unsigned long asce_limit; 8 unsigned long asce_limit;
9 int noexec; 9 int noexec;
10 int pgstes;
10} mm_context_t; 11} mm_context_t;
11 12
12#endif 13#endif
diff --git a/include/asm-s390/mmu_context.h b/include/asm-s390/mmu_context.h
index b5a34c6f91a9..4c2fbf48c9c4 100644
--- a/include/asm-s390/mmu_context.h
+++ b/include/asm-s390/mmu_context.h
@@ -20,7 +20,13 @@ static inline int init_new_context(struct task_struct *tsk,
20#ifdef CONFIG_64BIT 20#ifdef CONFIG_64BIT
21 mm->context.asce_bits |= _ASCE_TYPE_REGION3; 21 mm->context.asce_bits |= _ASCE_TYPE_REGION3;
22#endif 22#endif
23 mm->context.noexec = s390_noexec; 23 if (current->mm->context.pgstes) {
24 mm->context.noexec = 0;
25 mm->context.pgstes = 1;
26 } else {
27 mm->context.noexec = s390_noexec;
28 mm->context.pgstes = 0;
29 }
24 mm->context.asce_limit = STACK_TOP_MAX; 30 mm->context.asce_limit = STACK_TOP_MAX;
25 crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); 31 crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
26 return 0; 32 return 0;
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index 65154dc9a9e5..4c0698c0dda5 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -30,6 +30,7 @@
30 */ 30 */
31#ifndef __ASSEMBLY__ 31#ifndef __ASSEMBLY__
32#include <linux/mm_types.h> 32#include <linux/mm_types.h>
33#include <asm/bitops.h>
33#include <asm/bug.h> 34#include <asm/bug.h>
34#include <asm/processor.h> 35#include <asm/processor.h>
35 36
@@ -258,6 +259,13 @@ extern char empty_zero_page[PAGE_SIZE];
258 * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid. 259 * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid.
259 */ 260 */
260 261
262/* Page status table bits for virtualization */
263#define RCP_PCL_BIT 55
264#define RCP_HR_BIT 54
265#define RCP_HC_BIT 53
266#define RCP_GR_BIT 50
267#define RCP_GC_BIT 49
268
261#ifndef __s390x__ 269#ifndef __s390x__
262 270
263/* Bits in the segment table address-space-control-element */ 271/* Bits in the segment table address-space-control-element */
@@ -513,6 +521,48 @@ static inline int pte_file(pte_t pte)
513#define __HAVE_ARCH_PTE_SAME 521#define __HAVE_ARCH_PTE_SAME
514#define pte_same(a,b) (pte_val(a) == pte_val(b)) 522#define pte_same(a,b) (pte_val(a) == pte_val(b))
515 523
524static inline void rcp_lock(pte_t *ptep)
525{
526#ifdef CONFIG_PGSTE
527 unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE);
528 preempt_disable();
529 while (test_and_set_bit(RCP_PCL_BIT, pgste))
530 ;
531#endif
532}
533
534static inline void rcp_unlock(pte_t *ptep)
535{
536#ifdef CONFIG_PGSTE
537 unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE);
538 clear_bit(RCP_PCL_BIT, pgste);
539 preempt_enable();
540#endif
541}
542
543/* forward declaration for SetPageUptodate in page-flags.h*/
544static inline void page_clear_dirty(struct page *page);
545#include <linux/page-flags.h>
546
547static inline void ptep_rcp_copy(pte_t *ptep)
548{
549#ifdef CONFIG_PGSTE
550 struct page *page = virt_to_page(pte_val(*ptep));
551 unsigned int skey;
552 unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE);
553
554 skey = page_get_storage_key(page_to_phys(page));
555 if (skey & _PAGE_CHANGED)
556 set_bit_simple(RCP_GC_BIT, pgste);
557 if (skey & _PAGE_REFERENCED)
558 set_bit_simple(RCP_GR_BIT, pgste);
559 if (test_and_clear_bit_simple(RCP_HC_BIT, pgste))
560 SetPageDirty(page);
561 if (test_and_clear_bit_simple(RCP_HR_BIT, pgste))
562 SetPageReferenced(page);
563#endif
564}
565
516/* 566/*
517 * query functions pte_write/pte_dirty/pte_young only work if 567 * query functions pte_write/pte_dirty/pte_young only work if
518 * pte_present() is true. Undefined behaviour if not.. 568 * pte_present() is true. Undefined behaviour if not..
@@ -599,6 +649,8 @@ static inline void pmd_clear(pmd_t *pmd)
599 649
600static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 650static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
601{ 651{
652 if (mm->context.pgstes)
653 ptep_rcp_copy(ptep);
602 pte_val(*ptep) = _PAGE_TYPE_EMPTY; 654 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
603 if (mm->context.noexec) 655 if (mm->context.noexec)
604 pte_val(ptep[PTRS_PER_PTE]) = _PAGE_TYPE_EMPTY; 656 pte_val(ptep[PTRS_PER_PTE]) = _PAGE_TYPE_EMPTY;
@@ -667,6 +719,24 @@ static inline pte_t pte_mkyoung(pte_t pte)
667static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 719static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
668 unsigned long addr, pte_t *ptep) 720 unsigned long addr, pte_t *ptep)
669{ 721{
722#ifdef CONFIG_PGSTE
723 unsigned long physpage;
724 int young;
725 unsigned long *pgste;
726
727 if (!vma->vm_mm->context.pgstes)
728 return 0;
729 physpage = pte_val(*ptep) & PAGE_MASK;
730 pgste = (unsigned long *) (ptep + PTRS_PER_PTE);
731
732 young = ((page_get_storage_key(physpage) & _PAGE_REFERENCED) != 0);
733 rcp_lock(ptep);
734 if (young)
735 set_bit_simple(RCP_GR_BIT, pgste);
736 young |= test_and_clear_bit_simple(RCP_HR_BIT, pgste);
737 rcp_unlock(ptep);
738 return young;
739#endif
670 return 0; 740 return 0;
671} 741}
672 742
@@ -674,7 +744,13 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
674static inline int ptep_clear_flush_young(struct vm_area_struct *vma, 744static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
675 unsigned long address, pte_t *ptep) 745 unsigned long address, pte_t *ptep)
676{ 746{
677 /* No need to flush TLB; bits are in storage key */ 747 /* No need to flush TLB
748 * On s390 reference bits are in storage key and never in TLB
749 * With virtualization we handle the reference bit, without we
750 * we can simply return */
751#ifdef CONFIG_PGSTE
752 return ptep_test_and_clear_young(vma, address, ptep);
753#endif
678 return 0; 754 return 0;
679} 755}
680 756
@@ -693,15 +769,25 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
693 : "=m" (*ptep) : "m" (*ptep), 769 : "=m" (*ptep) : "m" (*ptep),
694 "a" (pto), "a" (address)); 770 "a" (pto), "a" (address));
695 } 771 }
696 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
697} 772}
698 773
699static inline void ptep_invalidate(struct mm_struct *mm, 774static inline void ptep_invalidate(struct mm_struct *mm,
700 unsigned long address, pte_t *ptep) 775 unsigned long address, pte_t *ptep)
701{ 776{
777 if (mm->context.pgstes) {
778 rcp_lock(ptep);
779 __ptep_ipte(address, ptep);
780 ptep_rcp_copy(ptep);
781 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
782 rcp_unlock(ptep);
783 return;
784 }
702 __ptep_ipte(address, ptep); 785 __ptep_ipte(address, ptep);
703 if (mm->context.noexec) 786 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
787 if (mm->context.noexec) {
704 __ptep_ipte(address, ptep + PTRS_PER_PTE); 788 __ptep_ipte(address, ptep + PTRS_PER_PTE);
789 pte_val(*(ptep + PTRS_PER_PTE)) = _PAGE_TYPE_EMPTY;
790 }
705} 791}
706 792
707/* 793/*
@@ -966,6 +1052,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
966 1052
967extern int add_shared_memory(unsigned long start, unsigned long size); 1053extern int add_shared_memory(unsigned long start, unsigned long size);
968extern int remove_shared_memory(unsigned long start, unsigned long size); 1054extern int remove_shared_memory(unsigned long start, unsigned long size);
1055extern int s390_enable_sie(void);
969 1056
970/* 1057/*
971 * No page table caches to initialise 1058 * No page table caches to initialise
diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h
index a76a6b8fd887..aaf4b518b940 100644
--- a/include/asm-s390/setup.h
+++ b/include/asm-s390/setup.h
@@ -62,6 +62,7 @@ extern unsigned long machine_flags;
62#define MACHINE_IS_VM (machine_flags & 1) 62#define MACHINE_IS_VM (machine_flags & 1)
63#define MACHINE_IS_P390 (machine_flags & 4) 63#define MACHINE_IS_P390 (machine_flags & 4)
64#define MACHINE_HAS_MVPG (machine_flags & 16) 64#define MACHINE_HAS_MVPG (machine_flags & 16)
65#define MACHINE_IS_KVM (machine_flags & 64)
65#define MACHINE_HAS_IDTE (machine_flags & 128) 66#define MACHINE_HAS_IDTE (machine_flags & 128)
66#define MACHINE_HAS_DIAG9C (machine_flags & 256) 67#define MACHINE_HAS_DIAG9C (machine_flags & 256)
67 68
diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h
index 7a71120426a3..80eefef2cc76 100644
--- a/include/asm-x86/kvm.h
+++ b/include/asm-x86/kvm.h
@@ -188,4 +188,45 @@ struct kvm_cpuid2 {
188 struct kvm_cpuid_entry2 entries[0]; 188 struct kvm_cpuid_entry2 entries[0];
189}; 189};
190 190
191/* for KVM_GET_PIT and KVM_SET_PIT */
192struct kvm_pit_channel_state {
193 __u32 count; /* can be 65536 */
194 __u16 latched_count;
195 __u8 count_latched;
196 __u8 status_latched;
197 __u8 status;
198 __u8 read_state;
199 __u8 write_state;
200 __u8 write_latch;
201 __u8 rw_mode;
202 __u8 mode;
203 __u8 bcd;
204 __u8 gate;
205 __s64 count_load_time;
206};
207
208struct kvm_pit_state {
209 struct kvm_pit_channel_state channels[3];
210};
211
212#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02)
213#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03)
214#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04)
215#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05)
216#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06)
217#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07)
218#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08)
219#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09)
220#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A)
221#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B)
222#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C)
223#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D)
224#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E)
225#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F)
226#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10)
227#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11)
228#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12)
229#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13)
230#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14)
231
191#endif 232#endif
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 68ee390b2844..9d963cd6533c 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -20,6 +20,13 @@
20 20
21#include <asm/desc.h> 21#include <asm/desc.h>
22 22
23#define KVM_MAX_VCPUS 16
24#define KVM_MEMORY_SLOTS 32
25/* memory slots that does not exposed to userspace */
26#define KVM_PRIVATE_MEM_SLOTS 4
27
28#define KVM_PIO_PAGE_OFFSET 1
29
23#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) 30#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
24#define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) 31#define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
25#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ 32#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \
@@ -39,6 +46,13 @@
39#define INVALID_PAGE (~(hpa_t)0) 46#define INVALID_PAGE (~(hpa_t)0)
40#define UNMAPPED_GVA (~(gpa_t)0) 47#define UNMAPPED_GVA (~(gpa_t)0)
41 48
49/* shadow tables are PAE even on non-PAE hosts */
50#define KVM_HPAGE_SHIFT 21
51#define KVM_HPAGE_SIZE (1UL << KVM_HPAGE_SHIFT)
52#define KVM_HPAGE_MASK (~(KVM_HPAGE_SIZE - 1))
53
54#define KVM_PAGES_PER_HPAGE (KVM_HPAGE_SIZE / PAGE_SIZE)
55
42#define DE_VECTOR 0 56#define DE_VECTOR 0
43#define UD_VECTOR 6 57#define UD_VECTOR 6
44#define NM_VECTOR 7 58#define NM_VECTOR 7
@@ -48,6 +62,7 @@
48#define SS_VECTOR 12 62#define SS_VECTOR 12
49#define GP_VECTOR 13 63#define GP_VECTOR 13
50#define PF_VECTOR 14 64#define PF_VECTOR 14
65#define MC_VECTOR 18
51 66
52#define SELECTOR_TI_MASK (1 << 2) 67#define SELECTOR_TI_MASK (1 << 2)
53#define SELECTOR_RPL_MASK 0x03 68#define SELECTOR_RPL_MASK 0x03
@@ -58,7 +73,8 @@
58 73
59#define KVM_PERMILLE_MMU_PAGES 20 74#define KVM_PERMILLE_MMU_PAGES 20
60#define KVM_MIN_ALLOC_MMU_PAGES 64 75#define KVM_MIN_ALLOC_MMU_PAGES 64
61#define KVM_NUM_MMU_PAGES 1024 76#define KVM_MMU_HASH_SHIFT 10
77#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
62#define KVM_MIN_FREE_MMU_PAGES 5 78#define KVM_MIN_FREE_MMU_PAGES 5
63#define KVM_REFILL_PAGES 25 79#define KVM_REFILL_PAGES 25
64#define KVM_MAX_CPUID_ENTRIES 40 80#define KVM_MAX_CPUID_ENTRIES 40
@@ -106,6 +122,12 @@ enum {
106 122
107#define KVM_NR_MEM_OBJS 40 123#define KVM_NR_MEM_OBJS 40
108 124
125struct kvm_guest_debug {
126 int enabled;
127 unsigned long bp[4];
128 int singlestep;
129};
130
109/* 131/*
110 * We don't want allocation failures within the mmu code, so we preallocate 132 * We don't want allocation failures within the mmu code, so we preallocate
111 * enough memory for a single page fault in a cache. 133 * enough memory for a single page fault in a cache.
@@ -140,6 +162,7 @@ union kvm_mmu_page_role {
140 unsigned pad_for_nice_hex_output:6; 162 unsigned pad_for_nice_hex_output:6;
141 unsigned metaphysical:1; 163 unsigned metaphysical:1;
142 unsigned access:3; 164 unsigned access:3;
165 unsigned invalid:1;
143 }; 166 };
144}; 167};
145 168
@@ -204,11 +227,6 @@ struct kvm_vcpu_arch {
204 u64 shadow_efer; 227 u64 shadow_efer;
205 u64 apic_base; 228 u64 apic_base;
206 struct kvm_lapic *apic; /* kernel irqchip context */ 229 struct kvm_lapic *apic; /* kernel irqchip context */
207#define VCPU_MP_STATE_RUNNABLE 0
208#define VCPU_MP_STATE_UNINITIALIZED 1
209#define VCPU_MP_STATE_INIT_RECEIVED 2
210#define VCPU_MP_STATE_SIPI_RECEIVED 3
211#define VCPU_MP_STATE_HALTED 4
212 int mp_state; 230 int mp_state;
213 int sipi_vector; 231 int sipi_vector;
214 u64 ia32_misc_enable_msr; 232 u64 ia32_misc_enable_msr;
@@ -226,8 +244,9 @@ struct kvm_vcpu_arch {
226 u64 *last_pte_updated; 244 u64 *last_pte_updated;
227 245
228 struct { 246 struct {
229 gfn_t gfn; /* presumed gfn during guest pte update */ 247 gfn_t gfn; /* presumed gfn during guest pte update */
230 struct page *page; /* page corresponding to that gfn */ 248 pfn_t pfn; /* pfn corresponding to that gfn */
249 int largepage;
231 } update_pte; 250 } update_pte;
232 251
233 struct i387_fxsave_struct host_fx_image; 252 struct i387_fxsave_struct host_fx_image;
@@ -261,6 +280,11 @@ struct kvm_vcpu_arch {
261 /* emulate context */ 280 /* emulate context */
262 281
263 struct x86_emulate_ctxt emulate_ctxt; 282 struct x86_emulate_ctxt emulate_ctxt;
283
284 gpa_t time;
285 struct kvm_vcpu_time_info hv_clock;
286 unsigned int time_offset;
287 struct page *time_page;
264}; 288};
265 289
266struct kvm_mem_alias { 290struct kvm_mem_alias {
@@ -283,10 +307,13 @@ struct kvm_arch{
283 struct list_head active_mmu_pages; 307 struct list_head active_mmu_pages;
284 struct kvm_pic *vpic; 308 struct kvm_pic *vpic;
285 struct kvm_ioapic *vioapic; 309 struct kvm_ioapic *vioapic;
310 struct kvm_pit *vpit;
286 311
287 int round_robin_prev_vcpu; 312 int round_robin_prev_vcpu;
288 unsigned int tss_addr; 313 unsigned int tss_addr;
289 struct page *apic_access_page; 314 struct page *apic_access_page;
315
316 gpa_t wall_clock;
290}; 317};
291 318
292struct kvm_vm_stat { 319struct kvm_vm_stat {
@@ -298,6 +325,7 @@ struct kvm_vm_stat {
298 u32 mmu_recycled; 325 u32 mmu_recycled;
299 u32 mmu_cache_miss; 326 u32 mmu_cache_miss;
300 u32 remote_tlb_flush; 327 u32 remote_tlb_flush;
328 u32 lpages;
301}; 329};
302 330
303struct kvm_vcpu_stat { 331struct kvm_vcpu_stat {
@@ -320,6 +348,7 @@ struct kvm_vcpu_stat {
320 u32 fpu_reload; 348 u32 fpu_reload;
321 u32 insn_emulation; 349 u32 insn_emulation;
322 u32 insn_emulation_fail; 350 u32 insn_emulation_fail;
351 u32 hypercalls;
323}; 352};
324 353
325struct descriptor_table { 354struct descriptor_table {
@@ -355,6 +384,7 @@ struct kvm_x86_ops {
355 u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); 384 u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
356 void (*get_segment)(struct kvm_vcpu *vcpu, 385 void (*get_segment)(struct kvm_vcpu *vcpu,
357 struct kvm_segment *var, int seg); 386 struct kvm_segment *var, int seg);
387 int (*get_cpl)(struct kvm_vcpu *vcpu);
358 void (*set_segment)(struct kvm_vcpu *vcpu, 388 void (*set_segment)(struct kvm_vcpu *vcpu,
359 struct kvm_segment *var, int seg); 389 struct kvm_segment *var, int seg);
360 void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); 390 void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
@@ -410,6 +440,15 @@ void kvm_mmu_zap_all(struct kvm *kvm);
410unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); 440unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
411void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); 441void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
412 442
443int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
444
445int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
446 const void *val, int bytes);
447int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
448 gpa_t addr, unsigned long *ret);
449
450extern bool tdp_enabled;
451
413enum emulation_result { 452enum emulation_result {
414 EMULATE_DONE, /* no further processing */ 453 EMULATE_DONE, /* no further processing */
415 EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ 454 EMULATE_DO_MMIO, /* kvm_run filled with mmio request */
@@ -429,6 +468,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
429unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr); 468unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr);
430void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, 469void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value,
431 unsigned long *rflags); 470 unsigned long *rflags);
471void kvm_enable_efer_bits(u64);
432int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); 472int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data);
433int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); 473int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
434 474
@@ -448,12 +488,14 @@ int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
448int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, 488int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
449 unsigned long value); 489 unsigned long value);
450 490
451void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); 491int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason);
452void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0); 492
453void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0); 493void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
454void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0); 494void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
455unsigned long get_cr8(struct kvm_vcpu *vcpu); 495void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
456void lmsw(struct kvm_vcpu *vcpu, unsigned long msw); 496void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
497unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
498void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
457void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); 499void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
458 500
459int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); 501int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
@@ -491,6 +533,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu);
491 533
492int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code); 534int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code);
493 535
536void kvm_enable_tdp(void);
537
494int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); 538int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
495int complete_pio(struct kvm_vcpu *vcpu); 539int complete_pio(struct kvm_vcpu *vcpu);
496 540
@@ -600,6 +644,7 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
600#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" 644#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
601#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" 645#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
602#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" 646#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
647#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
603 648
604#define MSR_IA32_TIME_STAMP_COUNTER 0x010 649#define MSR_IA32_TIME_STAMP_COUNTER 0x010
605 650
@@ -610,4 +655,30 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
610#define RMODE_TSS_SIZE \ 655#define RMODE_TSS_SIZE \
611 (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1) 656 (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1)
612 657
658enum {
659 TASK_SWITCH_CALL = 0,
660 TASK_SWITCH_IRET = 1,
661 TASK_SWITCH_JMP = 2,
662 TASK_SWITCH_GATE = 3,
663};
664
665#define KVMTRACE_5D(evt, vcpu, d1, d2, d3, d4, d5, name) \
666 trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
667 vcpu, 5, d1, d2, d3, d4, d5)
668#define KVMTRACE_4D(evt, vcpu, d1, d2, d3, d4, name) \
669 trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
670 vcpu, 4, d1, d2, d3, d4, 0)
671#define KVMTRACE_3D(evt, vcpu, d1, d2, d3, name) \
672 trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
673 vcpu, 3, d1, d2, d3, 0, 0)
674#define KVMTRACE_2D(evt, vcpu, d1, d2, name) \
675 trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
676 vcpu, 2, d1, d2, 0, 0, 0)
677#define KVMTRACE_1D(evt, vcpu, d1, name) \
678 trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
679 vcpu, 1, d1, 0, 0, 0, 0)
680#define KVMTRACE_0D(evt, vcpu, name) \
681 trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
682 vcpu, 0, 0, 0, 0, 0, 0)
683
613#endif 684#endif
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index c6f3fd8d8c53..509845942070 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -10,10 +10,65 @@
10 * paravirtualization, the appropriate feature bit should be checked. 10 * paravirtualization, the appropriate feature bit should be checked.
11 */ 11 */
12#define KVM_CPUID_FEATURES 0x40000001 12#define KVM_CPUID_FEATURES 0x40000001
13#define KVM_FEATURE_CLOCKSOURCE 0
14#define KVM_FEATURE_NOP_IO_DELAY 1
15#define KVM_FEATURE_MMU_OP 2
16
17#define MSR_KVM_WALL_CLOCK 0x11
18#define MSR_KVM_SYSTEM_TIME 0x12
19
20#define KVM_MAX_MMU_OP_BATCH 32
21
22/* Operations for KVM_HC_MMU_OP */
23#define KVM_MMU_OP_WRITE_PTE 1
24#define KVM_MMU_OP_FLUSH_TLB 2
25#define KVM_MMU_OP_RELEASE_PT 3
26
27/* Payload for KVM_HC_MMU_OP */
28struct kvm_mmu_op_header {
29 __u32 op;
30 __u32 pad;
31};
32
33struct kvm_mmu_op_write_pte {
34 struct kvm_mmu_op_header header;
35 __u64 pte_phys;
36 __u64 pte_val;
37};
38
39struct kvm_mmu_op_flush_tlb {
40 struct kvm_mmu_op_header header;
41};
42
43struct kvm_mmu_op_release_pt {
44 struct kvm_mmu_op_header header;
45 __u64 pt_phys;
46};
13 47
14#ifdef __KERNEL__ 48#ifdef __KERNEL__
15#include <asm/processor.h> 49#include <asm/processor.h>
16 50
51/* xen binary-compatible interface. See xen headers for details */
52struct kvm_vcpu_time_info {
53 uint32_t version;
54 uint32_t pad0;
55 uint64_t tsc_timestamp;
56 uint64_t system_time;
57 uint32_t tsc_to_system_mul;
58 int8_t tsc_shift;
59 int8_t pad[3];
60} __attribute__((__packed__)); /* 32 bytes */
61
62struct kvm_wall_clock {
63 uint32_t wc_version;
64 uint32_t wc_sec;
65 uint32_t wc_nsec;
66} __attribute__((__packed__));
67
68
69extern void kvmclock_init(void);
70
71
17/* This instruction is vmcall. On non-VT architectures, it will generate a 72/* This instruction is vmcall. On non-VT architectures, it will generate a
18 * trap that we will then rewrite to the appropriate instruction. 73 * trap that we will then rewrite to the appropriate instruction.
19 */ 74 */
diff --git a/include/asm-x86/reboot.h b/include/asm-x86/reboot.h
index 6b5233b4f84b..e63741f19392 100644
--- a/include/asm-x86/reboot.h
+++ b/include/asm-x86/reboot.h
@@ -15,5 +15,7 @@ struct machine_ops {
15extern struct machine_ops machine_ops; 15extern struct machine_ops machine_ops;
16 16
17void machine_real_restart(unsigned char *code, int length); 17void machine_real_restart(unsigned char *code, int length);
18void native_machine_crash_shutdown(struct pt_regs *regs);
19void native_machine_shutdown(void);
18 20
19#endif /* _ASM_REBOOT_H */ 21#endif /* _ASM_REBOOT_H */
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index c1ec04fd000d..a281afeddfbb 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -8,11 +8,18 @@
8 */ 8 */
9 9
10#include <asm/types.h> 10#include <asm/types.h>
11#include <linux/compiler.h>
11#include <linux/ioctl.h> 12#include <linux/ioctl.h>
12#include <asm/kvm.h> 13#include <asm/kvm.h>
13 14
14#define KVM_API_VERSION 12 15#define KVM_API_VERSION 12
15 16
17/* for KVM_TRACE_ENABLE */
18struct kvm_user_trace_setup {
19 __u32 buf_size; /* sub_buffer size of each per-cpu */
20 __u32 buf_nr; /* the number of sub_buffers of each per-cpu */
21};
22
16/* for KVM_CREATE_MEMORY_REGION */ 23/* for KVM_CREATE_MEMORY_REGION */
17struct kvm_memory_region { 24struct kvm_memory_region {
18 __u32 slot; 25 __u32 slot;
@@ -73,6 +80,9 @@ struct kvm_irqchip {
73#define KVM_EXIT_INTR 10 80#define KVM_EXIT_INTR 10
74#define KVM_EXIT_SET_TPR 11 81#define KVM_EXIT_SET_TPR 11
75#define KVM_EXIT_TPR_ACCESS 12 82#define KVM_EXIT_TPR_ACCESS 12
83#define KVM_EXIT_S390_SIEIC 13
84#define KVM_EXIT_S390_RESET 14
85#define KVM_EXIT_DCR 15
76 86
77/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ 87/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
78struct kvm_run { 88struct kvm_run {
@@ -137,6 +147,27 @@ struct kvm_run {
137 __u32 is_write; 147 __u32 is_write;
138 __u32 pad; 148 __u32 pad;
139 } tpr_access; 149 } tpr_access;
150 /* KVM_EXIT_S390_SIEIC */
151 struct {
152 __u8 icptcode;
153 __u64 mask; /* psw upper half */
154 __u64 addr; /* psw lower half */
155 __u16 ipa;
156 __u32 ipb;
157 } s390_sieic;
158 /* KVM_EXIT_S390_RESET */
159#define KVM_S390_RESET_POR 1
160#define KVM_S390_RESET_CLEAR 2
161#define KVM_S390_RESET_SUBSYSTEM 4
162#define KVM_S390_RESET_CPU_INIT 8
163#define KVM_S390_RESET_IPL 16
164 __u64 s390_reset_flags;
165 /* KVM_EXIT_DCR */
166 struct {
167 __u32 dcrn;
168 __u32 data;
169 __u8 is_write;
170 } dcr;
140 /* Fix the size of the union. */ 171 /* Fix the size of the union. */
141 char padding[256]; 172 char padding[256];
142 }; 173 };
@@ -204,6 +235,74 @@ struct kvm_vapic_addr {
204 __u64 vapic_addr; 235 __u64 vapic_addr;
205}; 236};
206 237
238/* for KVM_SET_MPSTATE */
239
240#define KVM_MP_STATE_RUNNABLE 0
241#define KVM_MP_STATE_UNINITIALIZED 1
242#define KVM_MP_STATE_INIT_RECEIVED 2
243#define KVM_MP_STATE_HALTED 3
244#define KVM_MP_STATE_SIPI_RECEIVED 4
245
246struct kvm_mp_state {
247 __u32 mp_state;
248};
249
250struct kvm_s390_psw {
251 __u64 mask;
252 __u64 addr;
253};
254
255/* valid values for type in kvm_s390_interrupt */
256#define KVM_S390_SIGP_STOP 0xfffe0000u
257#define KVM_S390_PROGRAM_INT 0xfffe0001u
258#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u
259#define KVM_S390_RESTART 0xfffe0003u
260#define KVM_S390_INT_VIRTIO 0xffff2603u
261#define KVM_S390_INT_SERVICE 0xffff2401u
262#define KVM_S390_INT_EMERGENCY 0xffff1201u
263
264struct kvm_s390_interrupt {
265 __u32 type;
266 __u32 parm;
267 __u64 parm64;
268};
269
270#define KVM_TRC_SHIFT 16
271/*
272 * kvm trace categories
273 */
274#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT)
275#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1)) /* only 12 bits */
276
277/*
278 * kvm trace action
279 */
280#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01)
281#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02)
282#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01)
283
284#define KVM_TRC_HEAD_SIZE 12
285#define KVM_TRC_CYCLE_SIZE 8
286#define KVM_TRC_EXTRA_MAX 7
287
288/* This structure represents a single trace buffer record. */
289struct kvm_trace_rec {
290 __u32 event:28;
291 __u32 extra_u32:3;
292 __u32 cycle_in:1;
293 __u32 pid;
294 __u32 vcpu_id;
295 union {
296 struct {
297 __u32 cycle_lo, cycle_hi;
298 __u32 extra_u32[KVM_TRC_EXTRA_MAX];
299 } cycle;
300 struct {
301 __u32 extra_u32[KVM_TRC_EXTRA_MAX];
302 } nocycle;
303 } u;
304};
305
207#define KVMIO 0xAE 306#define KVMIO 0xAE
208 307
209/* 308/*
@@ -212,6 +311,8 @@ struct kvm_vapic_addr {
212#define KVM_GET_API_VERSION _IO(KVMIO, 0x00) 311#define KVM_GET_API_VERSION _IO(KVMIO, 0x00)
213#define KVM_CREATE_VM _IO(KVMIO, 0x01) /* returns a VM fd */ 312#define KVM_CREATE_VM _IO(KVMIO, 0x01) /* returns a VM fd */
214#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list) 313#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list)
314
315#define KVM_S390_ENABLE_SIE _IO(KVMIO, 0x06)
215/* 316/*
216 * Check if a kvm extension is available. Argument is extension number, 317 * Check if a kvm extension is available. Argument is extension number,
217 * return is 1 (yes) or 0 (no, sorry). 318 * return is 1 (yes) or 0 (no, sorry).
@@ -222,7 +323,12 @@ struct kvm_vapic_addr {
222 */ 323 */
223#define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ 324#define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */
224#define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2) 325#define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2)
225 326/*
327 * ioctls for kvm trace
328 */
329#define KVM_TRACE_ENABLE _IOW(KVMIO, 0x06, struct kvm_user_trace_setup)
330#define KVM_TRACE_PAUSE _IO(KVMIO, 0x07)
331#define KVM_TRACE_DISABLE _IO(KVMIO, 0x08)
226/* 332/*
227 * Extension capability list. 333 * Extension capability list.
228 */ 334 */
@@ -233,6 +339,13 @@ struct kvm_vapic_addr {
233#define KVM_CAP_SET_TSS_ADDR 4 339#define KVM_CAP_SET_TSS_ADDR 4
234#define KVM_CAP_VAPIC 6 340#define KVM_CAP_VAPIC 6
235#define KVM_CAP_EXT_CPUID 7 341#define KVM_CAP_EXT_CPUID 7
342#define KVM_CAP_CLOCKSOURCE 8
343#define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */
344#define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */
345#define KVM_CAP_PIT 11
346#define KVM_CAP_NOP_IO_DELAY 12
347#define KVM_CAP_PV_MMU 13
348#define KVM_CAP_MP_STATE 14
236 349
237/* 350/*
238 * ioctls for VM fds 351 * ioctls for VM fds
@@ -255,6 +368,9 @@ struct kvm_vapic_addr {
255#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) 368#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
256#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip) 369#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip)
257#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip) 370#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)
371#define KVM_CREATE_PIT _IO(KVMIO, 0x64)
372#define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state)
373#define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state)
258 374
259/* 375/*
260 * ioctls for vcpu fds 376 * ioctls for vcpu fds
@@ -281,5 +397,17 @@ struct kvm_vapic_addr {
281#define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl) 397#define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl)
282/* Available with KVM_CAP_VAPIC */ 398/* Available with KVM_CAP_VAPIC */
283#define KVM_SET_VAPIC_ADDR _IOW(KVMIO, 0x93, struct kvm_vapic_addr) 399#define KVM_SET_VAPIC_ADDR _IOW(KVMIO, 0x93, struct kvm_vapic_addr)
400/* valid for virtual machine (for floating interrupt)_and_ vcpu */
401#define KVM_S390_INTERRUPT _IOW(KVMIO, 0x94, struct kvm_s390_interrupt)
402/* store status for s390 */
403#define KVM_S390_STORE_STATUS_NOADDR (-1ul)
404#define KVM_S390_STORE_STATUS_PREFIXED (-2ul)
405#define KVM_S390_STORE_STATUS _IOW(KVMIO, 0x95, unsigned long)
406/* initial ipl psw for s390 */
407#define KVM_S390_SET_INITIAL_PSW _IOW(KVMIO, 0x96, struct kvm_s390_psw)
408/* initial reset for s390 */
409#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97)
410#define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state)
411#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state)
284 412
285#endif 413#endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 928b0d59e9ba..398978972b7a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -15,6 +15,7 @@
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/preempt.h> 17#include <linux/preempt.h>
18#include <linux/marker.h>
18#include <asm/signal.h> 19#include <asm/signal.h>
19 20
20#include <linux/kvm.h> 21#include <linux/kvm.h>
@@ -24,29 +25,18 @@
24 25
25#include <asm/kvm_host.h> 26#include <asm/kvm_host.h>
26 27
27#define KVM_MAX_VCPUS 4
28#define KVM_MEMORY_SLOTS 8
29/* memory slots that does not exposed to userspace */
30#define KVM_PRIVATE_MEM_SLOTS 4
31
32#define KVM_PIO_PAGE_OFFSET 1
33
34/* 28/*
35 * vcpu->requests bit members 29 * vcpu->requests bit members
36 */ 30 */
37#define KVM_REQ_TLB_FLUSH 0 31#define KVM_REQ_TLB_FLUSH 0
38#define KVM_REQ_MIGRATE_TIMER 1 32#define KVM_REQ_MIGRATE_TIMER 1
39#define KVM_REQ_REPORT_TPR_ACCESS 2 33#define KVM_REQ_REPORT_TPR_ACCESS 2
34#define KVM_REQ_MMU_RELOAD 3
35#define KVM_REQ_TRIPLE_FAULT 4
40 36
41struct kvm_vcpu; 37struct kvm_vcpu;
42extern struct kmem_cache *kvm_vcpu_cache; 38extern struct kmem_cache *kvm_vcpu_cache;
43 39
44struct kvm_guest_debug {
45 int enabled;
46 unsigned long bp[4];
47 int singlestep;
48};
49
50/* 40/*
51 * It would be nice to use something smarter than a linear search, TBD... 41 * It would be nice to use something smarter than a linear search, TBD...
52 * Thankfully we dont expect many devices to register (famous last words :), 42 * Thankfully we dont expect many devices to register (famous last words :),
@@ -67,7 +57,9 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
67 57
68struct kvm_vcpu { 58struct kvm_vcpu {
69 struct kvm *kvm; 59 struct kvm *kvm;
60#ifdef CONFIG_PREEMPT_NOTIFIERS
70 struct preempt_notifier preempt_notifier; 61 struct preempt_notifier preempt_notifier;
62#endif
71 int vcpu_id; 63 int vcpu_id;
72 struct mutex mutex; 64 struct mutex mutex;
73 int cpu; 65 int cpu;
@@ -100,6 +92,10 @@ struct kvm_memory_slot {
100 unsigned long flags; 92 unsigned long flags;
101 unsigned long *rmap; 93 unsigned long *rmap;
102 unsigned long *dirty_bitmap; 94 unsigned long *dirty_bitmap;
95 struct {
96 unsigned long rmap_pde;
97 int write_count;
98 } *lpage_info;
103 unsigned long userspace_addr; 99 unsigned long userspace_addr;
104 int user_alloc; 100 int user_alloc;
105}; 101};
@@ -114,11 +110,11 @@ struct kvm {
114 KVM_PRIVATE_MEM_SLOTS]; 110 KVM_PRIVATE_MEM_SLOTS];
115 struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; 111 struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
116 struct list_head vm_list; 112 struct list_head vm_list;
117 struct file *filp;
118 struct kvm_io_bus mmio_bus; 113 struct kvm_io_bus mmio_bus;
119 struct kvm_io_bus pio_bus; 114 struct kvm_io_bus pio_bus;
120 struct kvm_vm_stat stat; 115 struct kvm_vm_stat stat;
121 struct kvm_arch arch; 116 struct kvm_arch arch;
117 atomic_t users_count;
122}; 118};
123 119
124/* The guest did something we don't support. */ 120/* The guest did something we don't support. */
@@ -145,14 +141,19 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
145 struct module *module); 141 struct module *module);
146void kvm_exit(void); 142void kvm_exit(void);
147 143
144void kvm_get_kvm(struct kvm *kvm);
145void kvm_put_kvm(struct kvm *kvm);
146
148#define HPA_MSB ((sizeof(hpa_t) * 8) - 1) 147#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
149#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) 148#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
150static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } 149static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
151struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); 150struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
152 151
153extern struct page *bad_page; 152extern struct page *bad_page;
153extern pfn_t bad_pfn;
154 154
155int is_error_page(struct page *page); 155int is_error_page(struct page *page);
156int is_error_pfn(pfn_t pfn);
156int kvm_is_error_hva(unsigned long addr); 157int kvm_is_error_hva(unsigned long addr);
157int kvm_set_memory_region(struct kvm *kvm, 158int kvm_set_memory_region(struct kvm *kvm,
158 struct kvm_userspace_memory_region *mem, 159 struct kvm_userspace_memory_region *mem,
@@ -166,8 +167,19 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
166 int user_alloc); 167 int user_alloc);
167gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); 168gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
168struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); 169struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
170unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
169void kvm_release_page_clean(struct page *page); 171void kvm_release_page_clean(struct page *page);
170void kvm_release_page_dirty(struct page *page); 172void kvm_release_page_dirty(struct page *page);
173void kvm_set_page_dirty(struct page *page);
174void kvm_set_page_accessed(struct page *page);
175
176pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
177void kvm_release_pfn_dirty(pfn_t);
178void kvm_release_pfn_clean(pfn_t pfn);
179void kvm_set_pfn_dirty(pfn_t pfn);
180void kvm_set_pfn_accessed(pfn_t pfn);
181void kvm_get_pfn(pfn_t pfn);
182
171int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, 183int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
172 int len); 184 int len);
173int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, 185int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
@@ -188,6 +200,7 @@ void kvm_resched(struct kvm_vcpu *vcpu);
188void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); 200void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
189void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); 201void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
190void kvm_flush_remote_tlbs(struct kvm *kvm); 202void kvm_flush_remote_tlbs(struct kvm *kvm);
203void kvm_reload_remote_mmus(struct kvm *kvm);
191 204
192long kvm_arch_dev_ioctl(struct file *filp, 205long kvm_arch_dev_ioctl(struct file *filp,
193 unsigned int ioctl, unsigned long arg); 206 unsigned int ioctl, unsigned long arg);
@@ -223,6 +236,10 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
223 struct kvm_sregs *sregs); 236 struct kvm_sregs *sregs);
224int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 237int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
225 struct kvm_sregs *sregs); 238 struct kvm_sregs *sregs);
239int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
240 struct kvm_mp_state *mp_state);
241int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
242 struct kvm_mp_state *mp_state);
226int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, 243int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
227 struct kvm_debug_guest *dbg); 244 struct kvm_debug_guest *dbg);
228int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); 245int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
@@ -255,6 +272,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm);
255 272
256int kvm_cpu_get_interrupt(struct kvm_vcpu *v); 273int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
257int kvm_cpu_has_interrupt(struct kvm_vcpu *v); 274int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
275int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
258void kvm_vcpu_kick(struct kvm_vcpu *vcpu); 276void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
259 277
260static inline void kvm_guest_enter(void) 278static inline void kvm_guest_enter(void)
@@ -296,5 +314,18 @@ struct kvm_stats_debugfs_item {
296 struct dentry *dentry; 314 struct dentry *dentry;
297}; 315};
298extern struct kvm_stats_debugfs_item debugfs_entries[]; 316extern struct kvm_stats_debugfs_item debugfs_entries[];
317extern struct dentry *kvm_debugfs_dir;
318
319#ifdef CONFIG_KVM_TRACE
320int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg);
321void kvm_trace_cleanup(void);
322#else
323static inline
324int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
325{
326 return -EINVAL;
327}
328#define kvm_trace_cleanup() ((void)0)
329#endif
299 330
300#endif 331#endif
diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
index 5497aac0d2f8..3ddce03766ca 100644
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@@ -11,8 +11,11 @@
11 11
12/* Return values for hypercalls */ 12/* Return values for hypercalls */
13#define KVM_ENOSYS 1000 13#define KVM_ENOSYS 1000
14#define KVM_EFAULT EFAULT
15#define KVM_E2BIG E2BIG
14 16
15#define KVM_HC_VAPIC_POLL_IRQ 1 17#define KVM_HC_VAPIC_POLL_IRQ 1
18#define KVM_HC_MMU_OP 2
16 19
17/* 20/*
18 * hypercalls use architecture specific 21 * hypercalls use architecture specific
@@ -20,6 +23,12 @@
20#include <asm/kvm_para.h> 23#include <asm/kvm_para.h>
21 24
22#ifdef __KERNEL__ 25#ifdef __KERNEL__
26#ifdef CONFIG_KVM_GUEST
27void __init kvm_guest_init(void);
28#else
29#define kvm_guest_init() do { } while (0)
30#endif
31
23static inline int kvm_para_has_feature(unsigned int feature) 32static inline int kvm_para_has_feature(unsigned int feature)
24{ 33{
25 if (kvm_arch_para_features() & (1UL << feature)) 34 if (kvm_arch_para_features() & (1UL << feature))
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 1c4e46decb22..9b6f395c9625 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -38,6 +38,8 @@ typedef unsigned long hva_t;
38typedef u64 hpa_t; 38typedef u64 hpa_t;
39typedef unsigned long hfn_t; 39typedef unsigned long hfn_t;
40 40
41typedef hfn_t pfn_t;
42
41struct kvm_pio_request { 43struct kvm_pio_request {
42 unsigned long count; 44 unsigned long count;
43 int cur_count; 45 int cur_count;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d0bd97044abd..9a4f3e63e3bf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1798,6 +1798,8 @@ extern void mmput(struct mm_struct *);
1798extern struct mm_struct *get_task_mm(struct task_struct *task); 1798extern struct mm_struct *get_task_mm(struct task_struct *task);
1799/* Remove the current tasks stale references to the old mm_struct */ 1799/* Remove the current tasks stale references to the old mm_struct */
1800extern void mm_release(struct task_struct *, struct mm_struct *); 1800extern void mm_release(struct task_struct *, struct mm_struct *);
1801/* Allocate a new mm structure and copy contents from tsk->mm */
1802extern struct mm_struct *dup_mm(struct task_struct *tsk);
1801 1803
1802extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *); 1804extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
1803extern void flush_thread(void); 1805extern void flush_thread(void);
diff --git a/kernel/fork.c b/kernel/fork.c
index cb46befdd3a0..c674aa8d3c31 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -521,7 +521,7 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
521 * Allocate a new mm structure and copy contents from the 521 * Allocate a new mm structure and copy contents from the
522 * mm structure of the passed in task structure. 522 * mm structure of the passed in task structure.
523 */ 523 */
524static struct mm_struct *dup_mm(struct task_struct *tsk) 524struct mm_struct *dup_mm(struct task_struct *tsk)
525{ 525{
526 struct mm_struct *mm, *oldmm = current->mm; 526 struct mm_struct *mm, *oldmm = current->mm;
527 int err; 527 int err;
diff --git a/mm/rmap.c b/mm/rmap.c
index 997f06907b6d..e9bb6b1093f6 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -413,9 +413,6 @@ int page_referenced(struct page *page, int is_locked,
413{ 413{
414 int referenced = 0; 414 int referenced = 0;
415 415
416 if (page_test_and_clear_young(page))
417 referenced++;
418
419 if (TestClearPageReferenced(page)) 416 if (TestClearPageReferenced(page))
420 referenced++; 417 referenced++;
421 418
@@ -433,6 +430,10 @@ int page_referenced(struct page *page, int is_locked,
433 unlock_page(page); 430 unlock_page(page);
434 } 431 }
435 } 432 }
433
434 if (page_test_and_clear_young(page))
435 referenced++;
436
436 return referenced; 437 return referenced;
437} 438}
438 439
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b2e12893e3f4..c82cf15730a1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -40,6 +40,7 @@
40#include <linux/kvm_para.h> 40#include <linux/kvm_para.h>
41#include <linux/pagemap.h> 41#include <linux/pagemap.h>
42#include <linux/mman.h> 42#include <linux/mman.h>
43#include <linux/swap.h>
43 44
44#include <asm/processor.h> 45#include <asm/processor.h>
45#include <asm/io.h> 46#include <asm/io.h>
@@ -59,7 +60,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
59 60
60static __read_mostly struct preempt_ops kvm_preempt_ops; 61static __read_mostly struct preempt_ops kvm_preempt_ops;
61 62
62static struct dentry *debugfs_dir; 63struct dentry *kvm_debugfs_dir;
63 64
64static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, 65static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
65 unsigned long arg); 66 unsigned long arg);
@@ -119,6 +120,29 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
119 smp_call_function_mask(cpus, ack_flush, NULL, 1); 120 smp_call_function_mask(cpus, ack_flush, NULL, 1);
120} 121}
121 122
123void kvm_reload_remote_mmus(struct kvm *kvm)
124{
125 int i, cpu;
126 cpumask_t cpus;
127 struct kvm_vcpu *vcpu;
128
129 cpus_clear(cpus);
130 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
131 vcpu = kvm->vcpus[i];
132 if (!vcpu)
133 continue;
134 if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
135 continue;
136 cpu = vcpu->cpu;
137 if (cpu != -1 && cpu != raw_smp_processor_id())
138 cpu_set(cpu, cpus);
139 }
140 if (cpus_empty(cpus))
141 return;
142 smp_call_function_mask(cpus, ack_flush, NULL, 1);
143}
144
145
122int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) 146int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
123{ 147{
124 struct page *page; 148 struct page *page;
@@ -170,6 +194,7 @@ static struct kvm *kvm_create_vm(void)
170 mutex_init(&kvm->lock); 194 mutex_init(&kvm->lock);
171 kvm_io_bus_init(&kvm->mmio_bus); 195 kvm_io_bus_init(&kvm->mmio_bus);
172 init_rwsem(&kvm->slots_lock); 196 init_rwsem(&kvm->slots_lock);
197 atomic_set(&kvm->users_count, 1);
173 spin_lock(&kvm_lock); 198 spin_lock(&kvm_lock);
174 list_add(&kvm->vm_list, &vm_list); 199 list_add(&kvm->vm_list, &vm_list);
175 spin_unlock(&kvm_lock); 200 spin_unlock(&kvm_lock);
@@ -189,9 +214,13 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
189 if (!dont || free->dirty_bitmap != dont->dirty_bitmap) 214 if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
190 vfree(free->dirty_bitmap); 215 vfree(free->dirty_bitmap);
191 216
217 if (!dont || free->lpage_info != dont->lpage_info)
218 vfree(free->lpage_info);
219
192 free->npages = 0; 220 free->npages = 0;
193 free->dirty_bitmap = NULL; 221 free->dirty_bitmap = NULL;
194 free->rmap = NULL; 222 free->rmap = NULL;
223 free->lpage_info = NULL;
195} 224}
196 225
197void kvm_free_physmem(struct kvm *kvm) 226void kvm_free_physmem(struct kvm *kvm)
@@ -215,11 +244,25 @@ static void kvm_destroy_vm(struct kvm *kvm)
215 mmdrop(mm); 244 mmdrop(mm);
216} 245}
217 246
247void kvm_get_kvm(struct kvm *kvm)
248{
249 atomic_inc(&kvm->users_count);
250}
251EXPORT_SYMBOL_GPL(kvm_get_kvm);
252
253void kvm_put_kvm(struct kvm *kvm)
254{
255 if (atomic_dec_and_test(&kvm->users_count))
256 kvm_destroy_vm(kvm);
257}
258EXPORT_SYMBOL_GPL(kvm_put_kvm);
259
260
218static int kvm_vm_release(struct inode *inode, struct file *filp) 261static int kvm_vm_release(struct inode *inode, struct file *filp)
219{ 262{
220 struct kvm *kvm = filp->private_data; 263 struct kvm *kvm = filp->private_data;
221 264
222 kvm_destroy_vm(kvm); 265 kvm_put_kvm(kvm);
223 return 0; 266 return 0;
224} 267}
225 268
@@ -301,6 +344,25 @@ int __kvm_set_memory_region(struct kvm *kvm,
301 new.user_alloc = user_alloc; 344 new.user_alloc = user_alloc;
302 new.userspace_addr = mem->userspace_addr; 345 new.userspace_addr = mem->userspace_addr;
303 } 346 }
347 if (npages && !new.lpage_info) {
348 int largepages = npages / KVM_PAGES_PER_HPAGE;
349 if (npages % KVM_PAGES_PER_HPAGE)
350 largepages++;
351 if (base_gfn % KVM_PAGES_PER_HPAGE)
352 largepages++;
353
354 new.lpage_info = vmalloc(largepages * sizeof(*new.lpage_info));
355
356 if (!new.lpage_info)
357 goto out_free;
358
359 memset(new.lpage_info, 0, largepages * sizeof(*new.lpage_info));
360
361 if (base_gfn % KVM_PAGES_PER_HPAGE)
362 new.lpage_info[0].write_count = 1;
363 if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE)
364 new.lpage_info[largepages-1].write_count = 1;
365 }
304 366
305 /* Allocate page dirty bitmap if needed */ 367 /* Allocate page dirty bitmap if needed */
306 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { 368 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
@@ -397,6 +459,12 @@ int is_error_page(struct page *page)
397} 459}
398EXPORT_SYMBOL_GPL(is_error_page); 460EXPORT_SYMBOL_GPL(is_error_page);
399 461
462int is_error_pfn(pfn_t pfn)
463{
464 return pfn == bad_pfn;
465}
466EXPORT_SYMBOL_GPL(is_error_pfn);
467
400static inline unsigned long bad_hva(void) 468static inline unsigned long bad_hva(void)
401{ 469{
402 return PAGE_OFFSET; 470 return PAGE_OFFSET;
@@ -444,7 +512,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
444} 512}
445EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); 513EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
446 514
447static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) 515unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
448{ 516{
449 struct kvm_memory_slot *slot; 517 struct kvm_memory_slot *slot;
450 518
@@ -458,7 +526,7 @@ static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
458/* 526/*
459 * Requires current->mm->mmap_sem to be held 527 * Requires current->mm->mmap_sem to be held
460 */ 528 */
461struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) 529pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
462{ 530{
463 struct page *page[1]; 531 struct page *page[1];
464 unsigned long addr; 532 unsigned long addr;
@@ -469,7 +537,7 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
469 addr = gfn_to_hva(kvm, gfn); 537 addr = gfn_to_hva(kvm, gfn);
470 if (kvm_is_error_hva(addr)) { 538 if (kvm_is_error_hva(addr)) {
471 get_page(bad_page); 539 get_page(bad_page);
472 return bad_page; 540 return page_to_pfn(bad_page);
473 } 541 }
474 542
475 npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, 543 npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page,
@@ -477,27 +545,71 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
477 545
478 if (npages != 1) { 546 if (npages != 1) {
479 get_page(bad_page); 547 get_page(bad_page);
480 return bad_page; 548 return page_to_pfn(bad_page);
481 } 549 }
482 550
483 return page[0]; 551 return page_to_pfn(page[0]);
552}
553
554EXPORT_SYMBOL_GPL(gfn_to_pfn);
555
556struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
557{
558 return pfn_to_page(gfn_to_pfn(kvm, gfn));
484} 559}
485 560
486EXPORT_SYMBOL_GPL(gfn_to_page); 561EXPORT_SYMBOL_GPL(gfn_to_page);
487 562
488void kvm_release_page_clean(struct page *page) 563void kvm_release_page_clean(struct page *page)
489{ 564{
490 put_page(page); 565 kvm_release_pfn_clean(page_to_pfn(page));
491} 566}
492EXPORT_SYMBOL_GPL(kvm_release_page_clean); 567EXPORT_SYMBOL_GPL(kvm_release_page_clean);
493 568
569void kvm_release_pfn_clean(pfn_t pfn)
570{
571 put_page(pfn_to_page(pfn));
572}
573EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
574
494void kvm_release_page_dirty(struct page *page) 575void kvm_release_page_dirty(struct page *page)
495{ 576{
577 kvm_release_pfn_dirty(page_to_pfn(page));
578}
579EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
580
581void kvm_release_pfn_dirty(pfn_t pfn)
582{
583 kvm_set_pfn_dirty(pfn);
584 kvm_release_pfn_clean(pfn);
585}
586EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
587
588void kvm_set_page_dirty(struct page *page)
589{
590 kvm_set_pfn_dirty(page_to_pfn(page));
591}
592EXPORT_SYMBOL_GPL(kvm_set_page_dirty);
593
594void kvm_set_pfn_dirty(pfn_t pfn)
595{
596 struct page *page = pfn_to_page(pfn);
496 if (!PageReserved(page)) 597 if (!PageReserved(page))
497 SetPageDirty(page); 598 SetPageDirty(page);
498 put_page(page);
499} 599}
500EXPORT_SYMBOL_GPL(kvm_release_page_dirty); 600EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
601
602void kvm_set_pfn_accessed(pfn_t pfn)
603{
604 mark_page_accessed(pfn_to_page(pfn));
605}
606EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
607
608void kvm_get_pfn(pfn_t pfn)
609{
610 get_page(pfn_to_page(pfn));
611}
612EXPORT_SYMBOL_GPL(kvm_get_pfn);
501 613
502static int next_segment(unsigned long len, int offset) 614static int next_segment(unsigned long len, int offset)
503{ 615{
@@ -554,7 +666,9 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
554 addr = gfn_to_hva(kvm, gfn); 666 addr = gfn_to_hva(kvm, gfn);
555 if (kvm_is_error_hva(addr)) 667 if (kvm_is_error_hva(addr))
556 return -EFAULT; 668 return -EFAULT;
669 pagefault_disable();
557 r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); 670 r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
671 pagefault_enable();
558 if (r) 672 if (r)
559 return -EFAULT; 673 return -EFAULT;
560 return 0; 674 return 0;
@@ -651,6 +765,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
651 * We will block until either an interrupt or a signal wakes us up 765 * We will block until either an interrupt or a signal wakes us up
652 */ 766 */
653 while (!kvm_cpu_has_interrupt(vcpu) 767 while (!kvm_cpu_has_interrupt(vcpu)
768 && !kvm_cpu_has_pending_timer(vcpu)
654 && !signal_pending(current) 769 && !signal_pending(current)
655 && !kvm_arch_vcpu_runnable(vcpu)) { 770 && !kvm_arch_vcpu_runnable(vcpu)) {
656 set_current_state(TASK_INTERRUPTIBLE); 771 set_current_state(TASK_INTERRUPTIBLE);
@@ -678,8 +793,10 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
678 793
679 if (vmf->pgoff == 0) 794 if (vmf->pgoff == 0)
680 page = virt_to_page(vcpu->run); 795 page = virt_to_page(vcpu->run);
796#ifdef CONFIG_X86
681 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) 797 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
682 page = virt_to_page(vcpu->arch.pio_data); 798 page = virt_to_page(vcpu->arch.pio_data);
799#endif
683 else 800 else
684 return VM_FAULT_SIGBUS; 801 return VM_FAULT_SIGBUS;
685 get_page(page); 802 get_page(page);
@@ -701,11 +818,11 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
701{ 818{
702 struct kvm_vcpu *vcpu = filp->private_data; 819 struct kvm_vcpu *vcpu = filp->private_data;
703 820
704 fput(vcpu->kvm->filp); 821 kvm_put_kvm(vcpu->kvm);
705 return 0; 822 return 0;
706} 823}
707 824
708static struct file_operations kvm_vcpu_fops = { 825static const struct file_operations kvm_vcpu_fops = {
709 .release = kvm_vcpu_release, 826 .release = kvm_vcpu_release,
710 .unlocked_ioctl = kvm_vcpu_ioctl, 827 .unlocked_ioctl = kvm_vcpu_ioctl,
711 .compat_ioctl = kvm_vcpu_ioctl, 828 .compat_ioctl = kvm_vcpu_ioctl,
@@ -723,9 +840,10 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu)
723 840
724 r = anon_inode_getfd(&fd, &inode, &file, 841 r = anon_inode_getfd(&fd, &inode, &file,
725 "kvm-vcpu", &kvm_vcpu_fops, vcpu); 842 "kvm-vcpu", &kvm_vcpu_fops, vcpu);
726 if (r) 843 if (r) {
844 kvm_put_kvm(vcpu->kvm);
727 return r; 845 return r;
728 atomic_inc(&vcpu->kvm->filp->f_count); 846 }
729 return fd; 847 return fd;
730} 848}
731 849
@@ -760,6 +878,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
760 mutex_unlock(&kvm->lock); 878 mutex_unlock(&kvm->lock);
761 879
762 /* Now it's all set up, let userspace reach it */ 880 /* Now it's all set up, let userspace reach it */
881 kvm_get_kvm(kvm);
763 r = create_vcpu_fd(vcpu); 882 r = create_vcpu_fd(vcpu);
764 if (r < 0) 883 if (r < 0)
765 goto unlink; 884 goto unlink;
@@ -802,28 +921,39 @@ static long kvm_vcpu_ioctl(struct file *filp,
802 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); 921 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
803 break; 922 break;
804 case KVM_GET_REGS: { 923 case KVM_GET_REGS: {
805 struct kvm_regs kvm_regs; 924 struct kvm_regs *kvm_regs;
806 925
807 memset(&kvm_regs, 0, sizeof kvm_regs); 926 r = -ENOMEM;
808 r = kvm_arch_vcpu_ioctl_get_regs(vcpu, &kvm_regs); 927 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
809 if (r) 928 if (!kvm_regs)
810 goto out; 929 goto out;
930 r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs);
931 if (r)
932 goto out_free1;
811 r = -EFAULT; 933 r = -EFAULT;
812 if (copy_to_user(argp, &kvm_regs, sizeof kvm_regs)) 934 if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs)))
813 goto out; 935 goto out_free1;
814 r = 0; 936 r = 0;
937out_free1:
938 kfree(kvm_regs);
815 break; 939 break;
816 } 940 }
817 case KVM_SET_REGS: { 941 case KVM_SET_REGS: {
818 struct kvm_regs kvm_regs; 942 struct kvm_regs *kvm_regs;
819 943
820 r = -EFAULT; 944 r = -ENOMEM;
821 if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs)) 945 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
946 if (!kvm_regs)
822 goto out; 947 goto out;
823 r = kvm_arch_vcpu_ioctl_set_regs(vcpu, &kvm_regs); 948 r = -EFAULT;
949 if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs)))
950 goto out_free2;
951 r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs);
824 if (r) 952 if (r)
825 goto out; 953 goto out_free2;
826 r = 0; 954 r = 0;
955out_free2:
956 kfree(kvm_regs);
827 break; 957 break;
828 } 958 }
829 case KVM_GET_SREGS: { 959 case KVM_GET_SREGS: {
@@ -851,6 +981,30 @@ static long kvm_vcpu_ioctl(struct file *filp,
851 r = 0; 981 r = 0;
852 break; 982 break;
853 } 983 }
984 case KVM_GET_MP_STATE: {
985 struct kvm_mp_state mp_state;
986
987 r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state);
988 if (r)
989 goto out;
990 r = -EFAULT;
991 if (copy_to_user(argp, &mp_state, sizeof mp_state))
992 goto out;
993 r = 0;
994 break;
995 }
996 case KVM_SET_MP_STATE: {
997 struct kvm_mp_state mp_state;
998
999 r = -EFAULT;
1000 if (copy_from_user(&mp_state, argp, sizeof mp_state))
1001 goto out;
1002 r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state);
1003 if (r)
1004 goto out;
1005 r = 0;
1006 break;
1007 }
854 case KVM_TRANSLATE: { 1008 case KVM_TRANSLATE: {
855 struct kvm_translation tr; 1009 struct kvm_translation tr;
856 1010
@@ -1005,7 +1159,7 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
1005 return 0; 1159 return 0;
1006} 1160}
1007 1161
1008static struct file_operations kvm_vm_fops = { 1162static const struct file_operations kvm_vm_fops = {
1009 .release = kvm_vm_release, 1163 .release = kvm_vm_release,
1010 .unlocked_ioctl = kvm_vm_ioctl, 1164 .unlocked_ioctl = kvm_vm_ioctl,
1011 .compat_ioctl = kvm_vm_ioctl, 1165 .compat_ioctl = kvm_vm_ioctl,
@@ -1024,12 +1178,10 @@ static int kvm_dev_ioctl_create_vm(void)
1024 return PTR_ERR(kvm); 1178 return PTR_ERR(kvm);
1025 r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm); 1179 r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm);
1026 if (r) { 1180 if (r) {
1027 kvm_destroy_vm(kvm); 1181 kvm_put_kvm(kvm);
1028 return r; 1182 return r;
1029 } 1183 }
1030 1184
1031 kvm->filp = file;
1032
1033 return fd; 1185 return fd;
1034} 1186}
1035 1187
@@ -1059,7 +1211,15 @@ static long kvm_dev_ioctl(struct file *filp,
1059 r = -EINVAL; 1211 r = -EINVAL;
1060 if (arg) 1212 if (arg)
1061 goto out; 1213 goto out;
1062 r = 2 * PAGE_SIZE; 1214 r = PAGE_SIZE; /* struct kvm_run */
1215#ifdef CONFIG_X86
1216 r += PAGE_SIZE; /* pio data page */
1217#endif
1218 break;
1219 case KVM_TRACE_ENABLE:
1220 case KVM_TRACE_PAUSE:
1221 case KVM_TRACE_DISABLE:
1222 r = kvm_trace_ioctl(ioctl, arg);
1063 break; 1223 break;
1064 default: 1224 default:
1065 return kvm_arch_dev_ioctl(filp, ioctl, arg); 1225 return kvm_arch_dev_ioctl(filp, ioctl, arg);
@@ -1232,9 +1392,9 @@ static void kvm_init_debug(void)
1232{ 1392{
1233 struct kvm_stats_debugfs_item *p; 1393 struct kvm_stats_debugfs_item *p;
1234 1394
1235 debugfs_dir = debugfs_create_dir("kvm", NULL); 1395 kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
1236 for (p = debugfs_entries; p->name; ++p) 1396 for (p = debugfs_entries; p->name; ++p)
1237 p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir, 1397 p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
1238 (void *)(long)p->offset, 1398 (void *)(long)p->offset,
1239 stat_fops[p->kind]); 1399 stat_fops[p->kind]);
1240} 1400}
@@ -1245,7 +1405,7 @@ static void kvm_exit_debug(void)
1245 1405
1246 for (p = debugfs_entries; p->name; ++p) 1406 for (p = debugfs_entries; p->name; ++p)
1247 debugfs_remove(p->dentry); 1407 debugfs_remove(p->dentry);
1248 debugfs_remove(debugfs_dir); 1408 debugfs_remove(kvm_debugfs_dir);
1249} 1409}
1250 1410
1251static int kvm_suspend(struct sys_device *dev, pm_message_t state) 1411static int kvm_suspend(struct sys_device *dev, pm_message_t state)
@@ -1272,6 +1432,7 @@ static struct sys_device kvm_sysdev = {
1272}; 1432};
1273 1433
1274struct page *bad_page; 1434struct page *bad_page;
1435pfn_t bad_pfn;
1275 1436
1276static inline 1437static inline
1277struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) 1438struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
@@ -1313,6 +1474,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
1313 goto out; 1474 goto out;
1314 } 1475 }
1315 1476
1477 bad_pfn = page_to_pfn(bad_page);
1478
1316 r = kvm_arch_hardware_setup(); 1479 r = kvm_arch_hardware_setup();
1317 if (r < 0) 1480 if (r < 0)
1318 goto out_free_0; 1481 goto out_free_0;
@@ -1386,6 +1549,7 @@ EXPORT_SYMBOL_GPL(kvm_init);
1386 1549
1387void kvm_exit(void) 1550void kvm_exit(void)
1388{ 1551{
1552 kvm_trace_cleanup();
1389 misc_deregister(&kvm_dev); 1553 misc_deregister(&kvm_dev);
1390 kmem_cache_destroy(kvm_vcpu_cache); 1554 kmem_cache_destroy(kvm_vcpu_cache);
1391 sysdev_unregister(&kvm_sysdev); 1555 sysdev_unregister(&kvm_sysdev);
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c
new file mode 100644
index 000000000000..0e495470788d
--- /dev/null
+++ b/virt/kvm/kvm_trace.c
@@ -0,0 +1,276 @@
1/*
2 * kvm trace
3 *
4 * It is designed to allow debugging traces of kvm to be generated
5 * on UP / SMP machines. Each trace entry can be timestamped so that
6 * it's possible to reconstruct a chronological record of trace events.
7 * The implementation refers to blktrace kernel support.
8 *
9 * Copyright (c) 2008 Intel Corporation
10 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
11 *
12 * Authors: Feng(Eric) Liu, eric.e.liu@intel.com
13 *
14 * Date: Feb 2008
15 */
16
17#include <linux/module.h>
18#include <linux/relay.h>
19#include <linux/debugfs.h>
20
21#include <linux/kvm_host.h>
22
23#define KVM_TRACE_STATE_RUNNING (1 << 0)
24#define KVM_TRACE_STATE_PAUSE (1 << 1)
25#define KVM_TRACE_STATE_CLEARUP (1 << 2)
26
27struct kvm_trace {
28 int trace_state;
29 struct rchan *rchan;
30 struct dentry *lost_file;
31 atomic_t lost_records;
32};
33static struct kvm_trace *kvm_trace;
34
35struct kvm_trace_probe {
36 const char *name;
37 const char *format;
38 u32 cycle_in;
39 marker_probe_func *probe_func;
40};
41
42static inline int calc_rec_size(int cycle, int extra)
43{
44 int rec_size = KVM_TRC_HEAD_SIZE;
45
46 rec_size += extra;
47 return cycle ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
48}
49
50static void kvm_add_trace(void *probe_private, void *call_data,
51 const char *format, va_list *args)
52{
53 struct kvm_trace_probe *p = probe_private;
54 struct kvm_trace *kt = kvm_trace;
55 struct kvm_trace_rec rec;
56 struct kvm_vcpu *vcpu;
57 int i, extra, size;
58
59 if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING))
60 return;
61
62 rec.event = va_arg(*args, u32);
63 vcpu = va_arg(*args, struct kvm_vcpu *);
64 rec.pid = current->tgid;
65 rec.vcpu_id = vcpu->vcpu_id;
66
67 extra = va_arg(*args, u32);
68 WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX));
69 extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX);
70 rec.extra_u32 = extra;
71
72 rec.cycle_in = p->cycle_in;
73
74 if (rec.cycle_in) {
75 u64 cycle = 0;
76
77 cycle = get_cycles();
78 rec.u.cycle.cycle_lo = (u32)cycle;
79 rec.u.cycle.cycle_hi = (u32)(cycle >> 32);
80
81 for (i = 0; i < rec.extra_u32; i++)
82 rec.u.cycle.extra_u32[i] = va_arg(*args, u32);
83 } else {
84 for (i = 0; i < rec.extra_u32; i++)
85 rec.u.nocycle.extra_u32[i] = va_arg(*args, u32);
86 }
87
88 size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32));
89 relay_write(kt->rchan, &rec, size);
90}
91
92static struct kvm_trace_probe kvm_trace_probes[] = {
93 { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace },
94 { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace },
95};
96
97static int lost_records_get(void *data, u64 *val)
98{
99 struct kvm_trace *kt = data;
100
101 *val = atomic_read(&kt->lost_records);
102 return 0;
103}
104
105DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n");
106
107/*
108 * The relay channel is used in "no-overwrite" mode, it keeps trace of how
109 * many times we encountered a full subbuffer, to tell user space app the
110 * lost records there were.
111 */
112static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
113 void *prev_subbuf, size_t prev_padding)
114{
115 struct kvm_trace *kt;
116
117 if (!relay_buf_full(buf))
118 return 1;
119
120 kt = buf->chan->private_data;
121 atomic_inc(&kt->lost_records);
122
123 return 0;
124}
125
126static struct dentry *kvm_create_buf_file_callack(const char *filename,
127 struct dentry *parent,
128 int mode,
129 struct rchan_buf *buf,
130 int *is_global)
131{
132 return debugfs_create_file(filename, mode, parent, buf,
133 &relay_file_operations);
134}
135
136static int kvm_remove_buf_file_callback(struct dentry *dentry)
137{
138 debugfs_remove(dentry);
139 return 0;
140}
141
142static struct rchan_callbacks kvm_relay_callbacks = {
143 .subbuf_start = kvm_subbuf_start_callback,
144 .create_buf_file = kvm_create_buf_file_callack,
145 .remove_buf_file = kvm_remove_buf_file_callback,
146};
147
148static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts)
149{
150 struct kvm_trace *kt;
151 int i, r = -ENOMEM;
152
153 if (!kuts->buf_size || !kuts->buf_nr)
154 return -EINVAL;
155
156 kt = kzalloc(sizeof(*kt), GFP_KERNEL);
157 if (!kt)
158 goto err;
159
160 r = -EIO;
161 atomic_set(&kt->lost_records, 0);
162 kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir,
163 kt, &kvm_trace_lost_ops);
164 if (!kt->lost_file)
165 goto err;
166
167 kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size,
168 kuts->buf_nr, &kvm_relay_callbacks, kt);
169 if (!kt->rchan)
170 goto err;
171
172 kvm_trace = kt;
173
174 for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
175 struct kvm_trace_probe *p = &kvm_trace_probes[i];
176
177 r = marker_probe_register(p->name, p->format, p->probe_func, p);
178 if (r)
179 printk(KERN_INFO "Unable to register probe %s\n",
180 p->name);
181 }
182
183 kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING;
184
185 return 0;
186err:
187 if (kt) {
188 if (kt->lost_file)
189 debugfs_remove(kt->lost_file);
190 if (kt->rchan)
191 relay_close(kt->rchan);
192 kfree(kt);
193 }
194 return r;
195}
196
197static int kvm_trace_enable(char __user *arg)
198{
199 struct kvm_user_trace_setup kuts;
200 int ret;
201
202 ret = copy_from_user(&kuts, arg, sizeof(kuts));
203 if (ret)
204 return -EFAULT;
205
206 ret = do_kvm_trace_enable(&kuts);
207 if (ret)
208 return ret;
209
210 return 0;
211}
212
213static int kvm_trace_pause(void)
214{
215 struct kvm_trace *kt = kvm_trace;
216 int r = -EINVAL;
217
218 if (kt == NULL)
219 return r;
220
221 if (kt->trace_state == KVM_TRACE_STATE_RUNNING) {
222 kt->trace_state = KVM_TRACE_STATE_PAUSE;
223 relay_flush(kt->rchan);
224 r = 0;
225 }
226
227 return r;
228}
229
230void kvm_trace_cleanup(void)
231{
232 struct kvm_trace *kt = kvm_trace;
233 int i;
234
235 if (kt == NULL)
236 return;
237
238 if (kt->trace_state == KVM_TRACE_STATE_RUNNING ||
239 kt->trace_state == KVM_TRACE_STATE_PAUSE) {
240
241 kt->trace_state = KVM_TRACE_STATE_CLEARUP;
242
243 for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
244 struct kvm_trace_probe *p = &kvm_trace_probes[i];
245 marker_probe_unregister(p->name, p->probe_func, p);
246 }
247
248 relay_close(kt->rchan);
249 debugfs_remove(kt->lost_file);
250 kfree(kt);
251 }
252}
253
254int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
255{
256 void __user *argp = (void __user *)arg;
257 long r = -EINVAL;
258
259 if (!capable(CAP_SYS_ADMIN))
260 return -EPERM;
261
262 switch (ioctl) {
263 case KVM_TRACE_ENABLE:
264 r = kvm_trace_enable(argp);
265 break;
266 case KVM_TRACE_PAUSE:
267 r = kvm_trace_pause();
268 break;
269 case KVM_TRACE_DISABLE:
270 r = 0;
271 kvm_trace_cleanup();
272 break;
273 }
274
275 return r;
276}