aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-01-02 14:41:11 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-02 14:41:11 -0500
commit597b0d21626da4e6f09f132442caf0cc2b0eb47c (patch)
tree13c0074bb20f7b05a471e78d4ff52c665a10266a
parent2640c9a90fa596871e142f42052608864335f102 (diff)
parent87917239204d67a316cb89751750f86c9ed3640b (diff)
Merge branch 'kvm-updates/2.6.29' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm
* 'kvm-updates/2.6.29' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (140 commits) KVM: MMU: handle large host sptes on invlpg/resync KVM: Add locking to virtual i8259 interrupt controller KVM: MMU: Don't treat a global pte as such if cr4.pge is cleared MAINTAINERS: Maintainership changes for kvm/ia64 KVM: ia64: Fix kvm_arch_vcpu_ioctl_[gs]et_regs() KVM: x86: Rework user space NMI injection as KVM_CAP_USER_NMI KVM: VMX: Fix pending NMI-vs.-IRQ race for user space irqchip KVM: fix handling of ACK from shared guest IRQ KVM: MMU: check for present pdptr shadow page in walk_shadow KVM: Consolidate userspace memory capability reporting into common code KVM: Advertise the bug in memory region destruction as fixed KVM: use cpumask_var_t for cpus_hardware_enabled KVM: use modern cpumask primitives, no cpumask_t on stack KVM: Extract core of kvm_flush_remote_tlbs/kvm_reload_remote_mmus KVM: set owner of cpu and vm file operations anon_inodes: use fops->owner for module refcount x86: KVM guest: kvm_get_tsc_khz: return khz, not lpj KVM: MMU: prepopulate the shadow on invlpg KVM: MMU: skip global pgtables on sync due to cr3 switch KVM: MMU: collapse remote TLB flushes on root sync ...
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/ia64/include/asm/kvm.h6
-rw-r--r--arch/ia64/include/asm/kvm_host.h196
-rw-r--r--arch/ia64/kvm/Makefile2
-rw-r--r--arch/ia64/kvm/asm-offsets.c11
-rw-r--r--arch/ia64/kvm/kvm-ia64.c107
-rw-r--r--arch/ia64/kvm/kvm_lib.c15
-rw-r--r--arch/ia64/kvm/kvm_minstate.h4
-rw-r--r--arch/ia64/kvm/misc.h3
-rw-r--r--arch/ia64/kvm/mmio.c38
-rw-r--r--arch/ia64/kvm/process.c29
-rw-r--r--arch/ia64/kvm/vcpu.c76
-rw-r--r--arch/ia64/kvm/vcpu.h5
-rw-r--r--arch/ia64/kvm/vmm.c29
-rw-r--r--arch/ia64/kvm/vmm_ivt.S1469
-rw-r--r--arch/ia64/kvm/vtlb.c4
-rw-r--r--arch/powerpc/include/asm/disassemble.h80
-rw-r--r--arch/powerpc/include/asm/kvm_44x.h61
-rw-r--r--arch/powerpc/include/asm/kvm_host.h116
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h83
-rw-r--r--arch/powerpc/include/asm/mmu-44x.h1
-rw-r--r--arch/powerpc/kernel/asm-offsets.c21
-rw-r--r--arch/powerpc/kvm/44x.c228
-rw-r--r--arch/powerpc/kvm/44x_emulate.c371
-rw-r--r--arch/powerpc/kvm/44x_tlb.c463
-rw-r--r--arch/powerpc/kvm/44x_tlb.h26
-rw-r--r--arch/powerpc/kvm/Kconfig28
-rw-r--r--arch/powerpc/kvm/Makefile12
-rw-r--r--arch/powerpc/kvm/booke.c (renamed from arch/powerpc/kvm/booke_guest.c)418
-rw-r--r--arch/powerpc/kvm/booke.h60
-rw-r--r--arch/powerpc/kvm/booke_host.c83
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S72
-rw-r--r--arch/powerpc/kvm/emulate.c447
-rw-r--r--arch/powerpc/kvm/powerpc.c130
-rw-r--r--arch/powerpc/kvm/timing.c239
-rw-r--r--arch/powerpc/kvm/timing.h102
-rw-r--r--arch/s390/kvm/kvm-s390.c41
-rw-r--r--arch/x86/include/asm/kvm_host.h45
-rw-r--r--arch/x86/include/asm/kvm_x86_emulate.h11
-rw-r--r--arch/x86/include/asm/mtrr.h25
-rw-r--r--arch/x86/include/asm/svm.h (renamed from arch/x86/kvm/svm.h)0
-rw-r--r--arch/x86/include/asm/virtext.h132
-rw-r--r--arch/x86/include/asm/vmx.h (renamed from arch/x86/kvm/vmx.h)27
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c12
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h18
-rw-r--r--arch/x86/kernel/crash.c18
-rw-r--r--arch/x86/kernel/kvmclock.c10
-rw-r--r--arch/x86/kernel/reboot.c62
-rw-r--r--arch/x86/kvm/i8254.c19
-rw-r--r--arch/x86/kvm/i8259.c52
-rw-r--r--arch/x86/kvm/irq.h6
-rw-r--r--arch/x86/kvm/kvm_svm.h2
-rw-r--r--arch/x86/kvm/lapic.c58
-rw-r--r--arch/x86/kvm/mmu.c444
-rw-r--r--arch/x86/kvm/paging_tmpl.h44
-rw-r--r--arch/x86/kvm/svm.c48
-rw-r--r--arch/x86/kvm/vmx.c350
-rw-r--r--arch/x86/kvm/x86.c117
-rw-r--r--arch/x86/kvm/x86_emulate.c297
-rw-r--r--fs/anon_inodes.c7
-rw-r--r--include/linux/kvm.h18
-rw-r--r--include/linux/kvm_host.h12
-rw-r--r--virt/kvm/ioapic.c8
-rw-r--r--virt/kvm/ioapic.h2
-rw-r--r--virt/kvm/irq_comm.c19
-rw-r--r--virt/kvm/kvm_main.c420
-rw-r--r--virt/kvm/kvm_trace.c1
68 files changed, 4760 insertions, 2606 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 88f43e09aeb3..befacf07729f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2542,8 +2542,6 @@ W: http://kvm.qumranet.com
2542S: Supported 2542S: Supported
2543 2543
2544KERNEL VIRTUAL MACHINE For Itanium (KVM/IA64) 2544KERNEL VIRTUAL MACHINE For Itanium (KVM/IA64)
2545P: Anthony Xu
2546M: anthony.xu@intel.com
2547P: Xiantao Zhang 2545P: Xiantao Zhang
2548M: xiantao.zhang@intel.com 2546M: xiantao.zhang@intel.com
2549L: kvm-ia64@vger.kernel.org 2547L: kvm-ia64@vger.kernel.org
diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index f38472ac2267..68aa6da807c1 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -166,8 +166,6 @@ struct saved_vpd {
166}; 166};
167 167
168struct kvm_regs { 168struct kvm_regs {
169 char *saved_guest;
170 char *saved_stack;
171 struct saved_vpd vpd; 169 struct saved_vpd vpd;
172 /*Arch-regs*/ 170 /*Arch-regs*/
173 int mp_state; 171 int mp_state;
@@ -200,6 +198,10 @@ struct kvm_regs {
200 unsigned long fp_psr; /*used for lazy float register */ 198 unsigned long fp_psr; /*used for lazy float register */
201 unsigned long saved_gp; 199 unsigned long saved_gp;
202 /*for phycial emulation */ 200 /*for phycial emulation */
201
202 union context saved_guest;
203
204 unsigned long reserved[64]; /* for future use */
203}; 205};
204 206
205struct kvm_sregs { 207struct kvm_sregs {
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index c60d324da540..0560f3fae538 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -23,17 +23,6 @@
23#ifndef __ASM_KVM_HOST_H 23#ifndef __ASM_KVM_HOST_H
24#define __ASM_KVM_HOST_H 24#define __ASM_KVM_HOST_H
25 25
26
27#include <linux/types.h>
28#include <linux/mm.h>
29#include <linux/kvm.h>
30#include <linux/kvm_para.h>
31#include <linux/kvm_types.h>
32
33#include <asm/pal.h>
34#include <asm/sal.h>
35
36#define KVM_MAX_VCPUS 4
37#define KVM_MEMORY_SLOTS 32 26#define KVM_MEMORY_SLOTS 32
38/* memory slots that does not exposed to userspace */ 27/* memory slots that does not exposed to userspace */
39#define KVM_PRIVATE_MEM_SLOTS 4 28#define KVM_PRIVATE_MEM_SLOTS 4
@@ -50,70 +39,132 @@
50#define EXIT_REASON_EXTERNAL_INTERRUPT 6 39#define EXIT_REASON_EXTERNAL_INTERRUPT 6
51#define EXIT_REASON_IPI 7 40#define EXIT_REASON_IPI 7
52#define EXIT_REASON_PTC_G 8 41#define EXIT_REASON_PTC_G 8
42#define EXIT_REASON_DEBUG 20
53 43
54/*Define vmm address space and vm data space.*/ 44/*Define vmm address space and vm data space.*/
55#define KVM_VMM_SIZE (16UL<<20) 45#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20)
56#define KVM_VMM_SHIFT 24 46#define KVM_VMM_SHIFT 24
57#define KVM_VMM_BASE 0xD000000000000000UL 47#define KVM_VMM_BASE 0xD000000000000000
58#define VMM_SIZE (8UL<<20) 48#define VMM_SIZE (__IA64_UL_CONST(8)<<20)
59 49
60/* 50/*
61 * Define vm_buffer, used by PAL Services, base address. 51 * Define vm_buffer, used by PAL Services, base address.
62 * Note: vmbuffer is in the VMM-BLOCK, the size must be < 8M 52 * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M
63 */ 53 */
64#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE) 54#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE)
65#define KVM_VM_BUFFER_SIZE (8UL<<20) 55#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20)
66 56
67/*Define Virtual machine data layout.*/ 57/*
68#define KVM_VM_DATA_SHIFT 24 58 * kvm guest's data area looks as follow:
69#define KVM_VM_DATA_SIZE (1UL << KVM_VM_DATA_SHIFT) 59 *
70#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VMM_SIZE) 60 * +----------------------+ ------- KVM_VM_DATA_SIZE
71 61 * | vcpu[n]'s data | | ___________________KVM_STK_OFFSET
72 62 * | | | / |
73#define KVM_P2M_BASE KVM_VM_DATA_BASE 63 * | .......... | | /vcpu's struct&stack |
74#define KVM_P2M_OFS 0 64 * | .......... | | /---------------------|---- 0
75#define KVM_P2M_SIZE (8UL << 20) 65 * | vcpu[5]'s data | | / vpd |
76 66 * | vcpu[4]'s data | |/-----------------------|
77#define KVM_VHPT_BASE (KVM_P2M_BASE + KVM_P2M_SIZE) 67 * | vcpu[3]'s data | / vtlb |
78#define KVM_VHPT_OFS KVM_P2M_SIZE 68 * | vcpu[2]'s data | /|------------------------|
79#define KVM_VHPT_BLOCK_SIZE (2UL << 20) 69 * | vcpu[1]'s data |/ | vhpt |
80#define VHPT_SHIFT 18 70 * | vcpu[0]'s data |____________________________|
81#define VHPT_SIZE (1UL << VHPT_SHIFT) 71 * +----------------------+ |
82#define VHPT_NUM_ENTRIES (1<<(VHPT_SHIFT-5)) 72 * | memory dirty log | |
83 73 * +----------------------+ |
84#define KVM_VTLB_BASE (KVM_VHPT_BASE+KVM_VHPT_BLOCK_SIZE) 74 * | vm's data struct | |
85#define KVM_VTLB_OFS (KVM_VHPT_OFS+KVM_VHPT_BLOCK_SIZE) 75 * +----------------------+ |
86#define KVM_VTLB_BLOCK_SIZE (1UL<<20) 76 * | | |
87#define VTLB_SHIFT 17 77 * | | |
88#define VTLB_SIZE (1UL<<VTLB_SHIFT) 78 * | | |
89#define VTLB_NUM_ENTRIES (1<<(VTLB_SHIFT-5)) 79 * | | |
90 80 * | | |
91#define KVM_VPD_BASE (KVM_VTLB_BASE+KVM_VTLB_BLOCK_SIZE) 81 * | | |
92#define KVM_VPD_OFS (KVM_VTLB_OFS+KVM_VTLB_BLOCK_SIZE) 82 * | | |
93#define KVM_VPD_BLOCK_SIZE (2UL<<20) 83 * | vm's p2m table | |
94#define VPD_SHIFT 16 84 * | | |
95#define VPD_SIZE (1UL<<VPD_SHIFT) 85 * | | |
96 86 * | | | |
97#define KVM_VCPU_BASE (KVM_VPD_BASE+KVM_VPD_BLOCK_SIZE) 87 * vm's data->| | | |
98#define KVM_VCPU_OFS (KVM_VPD_OFS+KVM_VPD_BLOCK_SIZE) 88 * +----------------------+ ------- 0
99#define KVM_VCPU_BLOCK_SIZE (2UL<<20) 89 * To support large memory, needs to increase the size of p2m.
100#define VCPU_SHIFT 18 90 * To support more vcpus, needs to ensure it has enough space to
101#define VCPU_SIZE (1UL<<VCPU_SHIFT) 91 * hold vcpus' data.
102#define MAX_VCPU_NUM KVM_VCPU_BLOCK_SIZE/VCPU_SIZE 92 */
103 93
104#define KVM_VM_BASE (KVM_VCPU_BASE+KVM_VCPU_BLOCK_SIZE) 94#define KVM_VM_DATA_SHIFT 26
105#define KVM_VM_OFS (KVM_VCPU_OFS+KVM_VCPU_BLOCK_SIZE) 95#define KVM_VM_DATA_SIZE (__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT)
106#define KVM_VM_BLOCK_SIZE (1UL<<19) 96#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VM_DATA_SIZE)
107 97
108#define KVM_MEM_DIRTY_LOG_BASE (KVM_VM_BASE+KVM_VM_BLOCK_SIZE) 98#define KVM_P2M_BASE KVM_VM_DATA_BASE
109#define KVM_MEM_DIRTY_LOG_OFS (KVM_VM_OFS+KVM_VM_BLOCK_SIZE) 99#define KVM_P2M_SIZE (__IA64_UL_CONST(24) << 20)
110#define KVM_MEM_DIRTY_LOG_SIZE (1UL<<19) 100
111 101#define VHPT_SHIFT 16
112/* Get vpd, vhpt, tlb, vcpu, base*/ 102#define VHPT_SIZE (__IA64_UL_CONST(1) << VHPT_SHIFT)
113#define VPD_ADDR(n) (KVM_VPD_BASE+n*VPD_SIZE) 103#define VHPT_NUM_ENTRIES (__IA64_UL_CONST(1) << (VHPT_SHIFT-5))
114#define VHPT_ADDR(n) (KVM_VHPT_BASE+n*VHPT_SIZE) 104
115#define VTLB_ADDR(n) (KVM_VTLB_BASE+n*VTLB_SIZE) 105#define VTLB_SHIFT 16
116#define VCPU_ADDR(n) (KVM_VCPU_BASE+n*VCPU_SIZE) 106#define VTLB_SIZE (__IA64_UL_CONST(1) << VTLB_SHIFT)
107#define VTLB_NUM_ENTRIES (1UL << (VHPT_SHIFT-5))
108
109#define VPD_SHIFT 16
110#define VPD_SIZE (__IA64_UL_CONST(1) << VPD_SHIFT)
111
112#define VCPU_STRUCT_SHIFT 16
113#define VCPU_STRUCT_SIZE (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
114
115#define KVM_STK_OFFSET VCPU_STRUCT_SIZE
116
117#define KVM_VM_STRUCT_SHIFT 19
118#define KVM_VM_STRUCT_SIZE (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
119
120#define KVM_MEM_DIRY_LOG_SHIFT 19
121#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT)
122
123#ifndef __ASSEMBLY__
124
125/*Define the max vcpus and memory for Guests.*/
126#define KVM_MAX_VCPUS (KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\
127 KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data)
128#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT)
129
130#define VMM_LOG_LEN 256
131
132#include <linux/types.h>
133#include <linux/mm.h>
134#include <linux/kvm.h>
135#include <linux/kvm_para.h>
136#include <linux/kvm_types.h>
137
138#include <asm/pal.h>
139#include <asm/sal.h>
140#include <asm/page.h>
141
142struct kvm_vcpu_data {
143 char vcpu_vhpt[VHPT_SIZE];
144 char vcpu_vtlb[VTLB_SIZE];
145 char vcpu_vpd[VPD_SIZE];
146 char vcpu_struct[VCPU_STRUCT_SIZE];
147};
148
149struct kvm_vm_data {
150 char kvm_p2m[KVM_P2M_SIZE];
151 char kvm_vm_struct[KVM_VM_STRUCT_SIZE];
152 char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE];
153 struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
154};
155
156#define VCPU_BASE(n) KVM_VM_DATA_BASE + \
157 offsetof(struct kvm_vm_data, vcpu_data[n])
158#define VM_BASE KVM_VM_DATA_BASE + \
159 offsetof(struct kvm_vm_data, kvm_vm_struct)
160#define KVM_MEM_DIRTY_LOG_BASE KVM_VM_DATA_BASE + \
161 offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
162
163#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt))
164#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb))
165#define VPD_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd))
166#define VCPU_STRUCT_BASE(n) (VCPU_BASE(n) + \
167 offsetof(struct kvm_vcpu_data, vcpu_struct))
117 168
118/*IO section definitions*/ 169/*IO section definitions*/
119#define IOREQ_READ 1 170#define IOREQ_READ 1
@@ -389,6 +440,7 @@ struct kvm_vcpu_arch {
389 440
390 unsigned long opcode; 441 unsigned long opcode;
391 unsigned long cause; 442 unsigned long cause;
443 char log_buf[VMM_LOG_LEN];
392 union context host; 444 union context host;
393 union context guest; 445 union context guest;
394}; 446};
@@ -403,14 +455,13 @@ struct kvm_sal_data {
403}; 455};
404 456
405struct kvm_arch { 457struct kvm_arch {
458 spinlock_t dirty_log_lock;
459
406 unsigned long vm_base; 460 unsigned long vm_base;
407 unsigned long metaphysical_rr0; 461 unsigned long metaphysical_rr0;
408 unsigned long metaphysical_rr4; 462 unsigned long metaphysical_rr4;
409 unsigned long vmm_init_rr; 463 unsigned long vmm_init_rr;
410 unsigned long vhpt_base; 464
411 unsigned long vtlb_base;
412 unsigned long vpd_base;
413 spinlock_t dirty_log_lock;
414 struct kvm_ioapic *vioapic; 465 struct kvm_ioapic *vioapic;
415 struct kvm_vm_stat stat; 466 struct kvm_vm_stat stat;
416 struct kvm_sal_data rdv_sal_data; 467 struct kvm_sal_data rdv_sal_data;
@@ -512,7 +563,7 @@ struct kvm_pt_regs {
512 563
513static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v) 564static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v)
514{ 565{
515 return (struct kvm_pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1; 566 return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1;
516} 567}
517 568
518typedef int kvm_vmm_entry(void); 569typedef int kvm_vmm_entry(void);
@@ -531,5 +582,6 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
531void kvm_sal_emul(struct kvm_vcpu *vcpu); 582void kvm_sal_emul(struct kvm_vcpu *vcpu);
532 583
533static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {} 584static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {}
585#endif /* __ASSEMBLY__*/
534 586
535#endif 587#endif
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 92cef66ca268..76464dc312e6 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -60,7 +60,7 @@ obj-$(CONFIG_KVM) += kvm.o
60 60
61CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127 61CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
62kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \ 62kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
63 vtlb.o process.o 63 vtlb.o process.o kvm_lib.o
64#Add link memcpy and memset to avoid possible structure assignment error 64#Add link memcpy and memset to avoid possible structure assignment error
65kvm-intel-objs += memcpy.o memset.o 65kvm-intel-objs += memcpy.o memset.o
66obj-$(CONFIG_KVM_INTEL) += kvm-intel.o 66obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
index 4e3dc13a619c..0c3564a7a033 100644
--- a/arch/ia64/kvm/asm-offsets.c
+++ b/arch/ia64/kvm/asm-offsets.c
@@ -24,19 +24,10 @@
24 24
25#include <linux/autoconf.h> 25#include <linux/autoconf.h>
26#include <linux/kvm_host.h> 26#include <linux/kvm_host.h>
27#include <linux/kbuild.h>
27 28
28#include "vcpu.h" 29#include "vcpu.h"
29 30
30#define task_struct kvm_vcpu
31
32#define DEFINE(sym, val) \
33 asm volatile("\n->" #sym " (%0) " #val : : "i" (val))
34
35#define BLANK() asm volatile("\n->" : :)
36
37#define OFFSET(_sym, _str, _mem) \
38 DEFINE(_sym, offsetof(_str, _mem));
39
40void foo(void) 31void foo(void)
41{ 32{
42 DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu)); 33 DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index af1464f7a6ad..0f5ebd948437 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -180,7 +180,6 @@ int kvm_dev_ioctl_check_extension(long ext)
180 180
181 switch (ext) { 181 switch (ext) {
182 case KVM_CAP_IRQCHIP: 182 case KVM_CAP_IRQCHIP:
183 case KVM_CAP_USER_MEMORY:
184 case KVM_CAP_MP_STATE: 183 case KVM_CAP_MP_STATE:
185 184
186 r = 1; 185 r = 1;
@@ -439,7 +438,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
439 expires = div64_u64(itc_diff, cyc_per_usec); 438 expires = div64_u64(itc_diff, cyc_per_usec);
440 kt = ktime_set(0, 1000 * expires); 439 kt = ktime_set(0, 1000 * expires);
441 440
442 down_read(&vcpu->kvm->slots_lock);
443 vcpu->arch.ht_active = 1; 441 vcpu->arch.ht_active = 1;
444 hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); 442 hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
445 443
@@ -452,7 +450,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
452 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) 450 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
453 vcpu->arch.mp_state = 451 vcpu->arch.mp_state =
454 KVM_MP_STATE_RUNNABLE; 452 KVM_MP_STATE_RUNNABLE;
455 up_read(&vcpu->kvm->slots_lock);
456 453
457 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) 454 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
458 return -EINTR; 455 return -EINTR;
@@ -476,6 +473,13 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu,
476 return 1; 473 return 1;
477} 474}
478 475
476static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
477 struct kvm_run *kvm_run)
478{
479 printk("VMM: %s", vcpu->arch.log_buf);
480 return 1;
481}
482
479static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, 483static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
480 struct kvm_run *kvm_run) = { 484 struct kvm_run *kvm_run) = {
481 [EXIT_REASON_VM_PANIC] = handle_vm_error, 485 [EXIT_REASON_VM_PANIC] = handle_vm_error,
@@ -487,6 +491,7 @@ static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
487 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, 491 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
488 [EXIT_REASON_IPI] = handle_ipi, 492 [EXIT_REASON_IPI] = handle_ipi,
489 [EXIT_REASON_PTC_G] = handle_global_purge, 493 [EXIT_REASON_PTC_G] = handle_global_purge,
494 [EXIT_REASON_DEBUG] = handle_vcpu_debug,
490 495
491}; 496};
492 497
@@ -698,27 +703,24 @@ out:
698 return r; 703 return r;
699} 704}
700 705
701/*
702 * Allocate 16M memory for every vm to hold its specific data.
703 * Its memory map is defined in kvm_host.h.
704 */
705static struct kvm *kvm_alloc_kvm(void) 706static struct kvm *kvm_alloc_kvm(void)
706{ 707{
707 708
708 struct kvm *kvm; 709 struct kvm *kvm;
709 uint64_t vm_base; 710 uint64_t vm_base;
710 711
712 BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
713
711 vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE)); 714 vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
712 715
713 if (!vm_base) 716 if (!vm_base)
714 return ERR_PTR(-ENOMEM); 717 return ERR_PTR(-ENOMEM);
715 printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base);
716 718
717 /* Zero all pages before use! */
718 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); 719 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
719 720 kvm = (struct kvm *)(vm_base +
720 kvm = (struct kvm *)(vm_base + KVM_VM_OFS); 721 offsetof(struct kvm_vm_data, kvm_vm_struct));
721 kvm->arch.vm_base = vm_base; 722 kvm->arch.vm_base = vm_base;
723 printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
722 724
723 return kvm; 725 return kvm;
724} 726}
@@ -760,21 +762,12 @@ static void kvm_build_io_pmt(struct kvm *kvm)
760 762
761static void kvm_init_vm(struct kvm *kvm) 763static void kvm_init_vm(struct kvm *kvm)
762{ 764{
763 long vm_base;
764
765 BUG_ON(!kvm); 765 BUG_ON(!kvm);
766 766
767 kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0; 767 kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
768 kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4; 768 kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
769 kvm->arch.vmm_init_rr = VMM_INIT_RR; 769 kvm->arch.vmm_init_rr = VMM_INIT_RR;
770 770
771 vm_base = kvm->arch.vm_base;
772 if (vm_base) {
773 kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS;
774 kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS;
775 kvm->arch.vpd_base = vm_base + KVM_VPD_OFS;
776 }
777
778 /* 771 /*
779 *Fill P2M entries for MMIO/IO ranges 772 *Fill P2M entries for MMIO/IO ranges
780 */ 773 */
@@ -838,9 +831,8 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
838 831
839int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 832int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
840{ 833{
841 int i;
842 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 834 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
843 int r; 835 int i;
844 836
845 vcpu_load(vcpu); 837 vcpu_load(vcpu);
846 838
@@ -857,18 +849,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
857 849
858 vpd->vpr = regs->vpd.vpr; 850 vpd->vpr = regs->vpd.vpr;
859 851
860 r = -EFAULT; 852 memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
861 r = copy_from_user(&vcpu->arch.guest, regs->saved_guest,
862 sizeof(union context));
863 if (r)
864 goto out;
865 r = copy_from_user(vcpu + 1, regs->saved_stack +
866 sizeof(struct kvm_vcpu),
867 IA64_STK_OFFSET - sizeof(struct kvm_vcpu));
868 if (r)
869 goto out;
870 vcpu->arch.exit_data =
871 ((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data;
872 853
873 RESTORE_REGS(mp_state); 854 RESTORE_REGS(mp_state);
874 RESTORE_REGS(vmm_rr); 855 RESTORE_REGS(vmm_rr);
@@ -902,9 +883,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
902 set_bit(KVM_REQ_RESUME, &vcpu->requests); 883 set_bit(KVM_REQ_RESUME, &vcpu->requests);
903 884
904 vcpu_put(vcpu); 885 vcpu_put(vcpu);
905 r = 0; 886
906out: 887 return 0;
907 return r;
908} 888}
909 889
910long kvm_arch_vm_ioctl(struct file *filp, 890long kvm_arch_vm_ioctl(struct file *filp,
@@ -1166,10 +1146,11 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1166 /*Set entry address for first run.*/ 1146 /*Set entry address for first run.*/
1167 regs->cr_iip = PALE_RESET_ENTRY; 1147 regs->cr_iip = PALE_RESET_ENTRY;
1168 1148
1169 /*Initilize itc offset for vcpus*/ 1149 /*Initialize itc offset for vcpus*/
1170 itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC); 1150 itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
1171 for (i = 0; i < MAX_VCPU_NUM; i++) { 1151 for (i = 0; i < KVM_MAX_VCPUS; i++) {
1172 v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); 1152 v = (struct kvm_vcpu *)((char *)vcpu +
1153 sizeof(struct kvm_vcpu_data) * i);
1173 v->arch.itc_offset = itc_offset; 1154 v->arch.itc_offset = itc_offset;
1174 v->arch.last_itc = 0; 1155 v->arch.last_itc = 0;
1175 } 1156 }
@@ -1183,7 +1164,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1183 vcpu->arch.apic->vcpu = vcpu; 1164 vcpu->arch.apic->vcpu = vcpu;
1184 1165
1185 p_ctx->gr[1] = 0; 1166 p_ctx->gr[1] = 0;
1186 p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET); 1167 p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
1187 p_ctx->gr[13] = (unsigned long)vmm_vcpu; 1168 p_ctx->gr[13] = (unsigned long)vmm_vcpu;
1188 p_ctx->psr = 0x1008522000UL; 1169 p_ctx->psr = 0x1008522000UL;
1189 p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/ 1170 p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
@@ -1218,12 +1199,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1218 vcpu->arch.hlt_timer.function = hlt_timer_fn; 1199 vcpu->arch.hlt_timer.function = hlt_timer_fn;
1219 1200
1220 vcpu->arch.last_run_cpu = -1; 1201 vcpu->arch.last_run_cpu = -1;
1221 vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id); 1202 vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
1222 vcpu->arch.vsa_base = kvm_vsa_base; 1203 vcpu->arch.vsa_base = kvm_vsa_base;
1223 vcpu->arch.__gp = kvm_vmm_gp; 1204 vcpu->arch.__gp = kvm_vmm_gp;
1224 vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock); 1205 vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
1225 vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id); 1206 vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
1226 vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id); 1207 vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
1227 init_ptce_info(vcpu); 1208 init_ptce_info(vcpu);
1228 1209
1229 r = 0; 1210 r = 0;
@@ -1273,12 +1254,22 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1273 int r; 1254 int r;
1274 int cpu; 1255 int cpu;
1275 1256
1257 BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
1258
1259 r = -EINVAL;
1260 if (id >= KVM_MAX_VCPUS) {
1261 printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
1262 KVM_MAX_VCPUS);
1263 goto fail;
1264 }
1265
1276 r = -ENOMEM; 1266 r = -ENOMEM;
1277 if (!vm_base) { 1267 if (!vm_base) {
1278 printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id); 1268 printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
1279 goto fail; 1269 goto fail;
1280 } 1270 }
1281 vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id); 1271 vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
1272 vcpu_data[id].vcpu_struct));
1282 vcpu->kvm = kvm; 1273 vcpu->kvm = kvm;
1283 1274
1284 cpu = get_cpu(); 1275 cpu = get_cpu();
@@ -1374,9 +1365,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1374 1365
1375int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 1366int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1376{ 1367{
1377 int i;
1378 int r;
1379 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 1368 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1369 int i;
1370
1380 vcpu_load(vcpu); 1371 vcpu_load(vcpu);
1381 1372
1382 for (i = 0; i < 16; i++) { 1373 for (i = 0; i < 16; i++) {
@@ -1391,14 +1382,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1391 regs->vpd.vpsr = vpd->vpsr; 1382 regs->vpd.vpsr = vpd->vpsr;
1392 regs->vpd.vpr = vpd->vpr; 1383 regs->vpd.vpr = vpd->vpr;
1393 1384
1394 r = -EFAULT; 1385 memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
1395 r = copy_to_user(regs->saved_guest, &vcpu->arch.guest, 1386
1396 sizeof(union context));
1397 if (r)
1398 goto out;
1399 r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET);
1400 if (r)
1401 goto out;
1402 SAVE_REGS(mp_state); 1387 SAVE_REGS(mp_state);
1403 SAVE_REGS(vmm_rr); 1388 SAVE_REGS(vmm_rr);
1404 memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS); 1389 memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
@@ -1426,10 +1411,9 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1426 SAVE_REGS(metaphysical_saved_rr4); 1411 SAVE_REGS(metaphysical_saved_rr4);
1427 SAVE_REGS(fp_psr); 1412 SAVE_REGS(fp_psr);
1428 SAVE_REGS(saved_gp); 1413 SAVE_REGS(saved_gp);
1414
1429 vcpu_put(vcpu); 1415 vcpu_put(vcpu);
1430 r = 0; 1416 return 0;
1431out:
1432 return r;
1433} 1417}
1434 1418
1435void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 1419void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
@@ -1457,6 +1441,9 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
1457 struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; 1441 struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
1458 unsigned long base_gfn = memslot->base_gfn; 1442 unsigned long base_gfn = memslot->base_gfn;
1459 1443
1444 if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
1445 return -ENOMEM;
1446
1460 for (i = 0; i < npages; i++) { 1447 for (i = 0; i < npages; i++) {
1461 pfn = gfn_to_pfn(kvm, base_gfn + i); 1448 pfn = gfn_to_pfn(kvm, base_gfn + i);
1462 if (!kvm_is_mmio_pfn(pfn)) { 1449 if (!kvm_is_mmio_pfn(pfn)) {
@@ -1631,8 +1618,8 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
1631 struct kvm_memory_slot *memslot; 1618 struct kvm_memory_slot *memslot;
1632 int r, i; 1619 int r, i;
1633 long n, base; 1620 long n, base;
1634 unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS 1621 unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
1635 + KVM_MEM_DIRTY_LOG_OFS); 1622 offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
1636 1623
1637 r = -EINVAL; 1624 r = -EINVAL;
1638 if (log->slot >= KVM_MEMORY_SLOTS) 1625 if (log->slot >= KVM_MEMORY_SLOTS)
diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c
new file mode 100644
index 000000000000..a85cb611ecd7
--- /dev/null
+++ b/arch/ia64/kvm/kvm_lib.c
@@ -0,0 +1,15 @@
1/*
2 * kvm_lib.c: Compile some libraries for kvm-intel module.
3 *
4 * Just include kernel's library, and disable symbols export.
5 * Copyright (C) 2008, Intel Corporation.
6 * Xiantao Zhang (xiantao.zhang@intel.com)
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 *
12 */
13#undef CONFIG_MODULES
14#include "../../../lib/vsprintf.c"
15#include "../../../lib/ctype.c"
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
index 2cc41d17cf99..b2bcaa2787aa 100644
--- a/arch/ia64/kvm/kvm_minstate.h
+++ b/arch/ia64/kvm/kvm_minstate.h
@@ -24,6 +24,8 @@
24#include <asm/asmmacro.h> 24#include <asm/asmmacro.h>
25#include <asm/types.h> 25#include <asm/types.h>
26#include <asm/kregs.h> 26#include <asm/kregs.h>
27#include <asm/kvm_host.h>
28
27#include "asm-offsets.h" 29#include "asm-offsets.h"
28 30
29#define KVM_MINSTATE_START_SAVE_MIN \ 31#define KVM_MINSTATE_START_SAVE_MIN \
@@ -33,7 +35,7 @@
33 addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \ 35 addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \
34 ;; \ 36 ;; \
35 lfetch.fault.excl.nt1 [r22]; \ 37 lfetch.fault.excl.nt1 [r22]; \
36 addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ 38 addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1; \
37 mov r23 = ar.bspstore; /* save ar.bspstore */ \ 39 mov r23 = ar.bspstore; /* save ar.bspstore */ \
38 ;; \ 40 ;; \
39 mov ar.bspstore = r22; /* switch to kernel RBS */\ 41 mov ar.bspstore = r22; /* switch to kernel RBS */\
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h
index e585c4607344..dd979e00b574 100644
--- a/arch/ia64/kvm/misc.h
+++ b/arch/ia64/kvm/misc.h
@@ -27,7 +27,8 @@
27 */ 27 */
28static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm) 28static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
29{ 29{
30 return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS); 30 return (uint64_t *)(kvm->arch.vm_base +
31 offsetof(struct kvm_vm_data, kvm_p2m));
31} 32}
32 33
33static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn, 34static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
index 7f1a858bc69f..21f63fffc379 100644
--- a/arch/ia64/kvm/mmio.c
+++ b/arch/ia64/kvm/mmio.c
@@ -66,31 +66,25 @@ void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
66 66
67 switch (addr) { 67 switch (addr) {
68 case PIB_OFST_INTA: 68 case PIB_OFST_INTA:
69 /*panic_domain(NULL, "Undefined write on PIB INTA\n");*/ 69 panic_vm(v, "Undefined write on PIB INTA\n");
70 panic_vm(v);
71 break; 70 break;
72 case PIB_OFST_XTP: 71 case PIB_OFST_XTP:
73 if (length == 1) { 72 if (length == 1) {
74 vlsapic_write_xtp(v, val); 73 vlsapic_write_xtp(v, val);
75 } else { 74 } else {
76 /*panic_domain(NULL, 75 panic_vm(v, "Undefined write on PIB XTP\n");
77 "Undefined write on PIB XTP\n");*/
78 panic_vm(v);
79 } 76 }
80 break; 77 break;
81 default: 78 default:
82 if (PIB_LOW_HALF(addr)) { 79 if (PIB_LOW_HALF(addr)) {
83 /*lower half */ 80 /*Lower half */
84 if (length != 8) 81 if (length != 8)
85 /*panic_domain(NULL, 82 panic_vm(v, "Can't LHF write with size %ld!\n",
86 "Can't LHF write with size %ld!\n", 83 length);
87 length);*/
88 panic_vm(v);
89 else 84 else
90 vlsapic_write_ipi(v, addr, val); 85 vlsapic_write_ipi(v, addr, val);
91 } else { /* upper half 86 } else { /*Upper half */
92 printk("IPI-UHF write %lx\n",addr);*/ 87 panic_vm(v, "IPI-UHF write %lx\n", addr);
93 panic_vm(v);
94 } 88 }
95 break; 89 break;
96 } 90 }
@@ -108,22 +102,18 @@ unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
108 if (length == 1) /* 1 byte load */ 102 if (length == 1) /* 1 byte load */
109 ; /* There is no i8259, there is no INTA access*/ 103 ; /* There is no i8259, there is no INTA access*/
110 else 104 else
111 /*panic_domain(NULL,"Undefined read on PIB INTA\n"); */ 105 panic_vm(v, "Undefined read on PIB INTA\n");
112 panic_vm(v);
113 106
114 break; 107 break;
115 case PIB_OFST_XTP: 108 case PIB_OFST_XTP:
116 if (length == 1) { 109 if (length == 1) {
117 result = VLSAPIC_XTP(v); 110 result = VLSAPIC_XTP(v);
118 /* printk("read xtp %lx\n", result); */
119 } else { 111 } else {
120 /*panic_domain(NULL, 112 panic_vm(v, "Undefined read on PIB XTP\n");
121 "Undefined read on PIB XTP\n");*/
122 panic_vm(v);
123 } 113 }
124 break; 114 break;
125 default: 115 default:
126 panic_vm(v); 116 panic_vm(v, "Undefined addr access for lsapic!\n");
127 break; 117 break;
128 } 118 }
129 return result; 119 return result;
@@ -162,7 +152,7 @@ static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
162 /* it's necessary to ensure zero extending */ 152 /* it's necessary to ensure zero extending */
163 *dest = p->u.ioreq.data & (~0UL >> (64-(s*8))); 153 *dest = p->u.ioreq.data & (~0UL >> (64-(s*8)));
164 } else 154 } else
165 panic_vm(vcpu); 155 panic_vm(vcpu, "Unhandled mmio access returned!\n");
166out: 156out:
167 local_irq_restore(psr); 157 local_irq_restore(psr);
168 return ; 158 return ;
@@ -324,7 +314,9 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
324 return; 314 return;
325 } else { 315 } else {
326 inst_type = -1; 316 inst_type = -1;
327 panic_vm(vcpu); 317 panic_vm(vcpu, "Unsupported MMIO access instruction! \
318 Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
319 bundle.i64[0], bundle.i64[1]);
328 } 320 }
329 321
330 size = 1 << size; 322 size = 1 << size;
@@ -335,7 +327,7 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
335 if (inst_type == SL_INTEGER) 327 if (inst_type == SL_INTEGER)
336 vcpu_set_gr(vcpu, inst.M1.r1, data, 0); 328 vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
337 else 329 else
338 panic_vm(vcpu); 330 panic_vm(vcpu, "Unsupported instruction type!\n");
339 331
340 } 332 }
341 vcpu_increment_iip(vcpu); 333 vcpu_increment_iip(vcpu);
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
index 800817307b7b..552d07724207 100644
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -527,7 +527,8 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim,
527 vector = vec2off[vec]; 527 vector = vec2off[vec];
528 528
529 if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) { 529 if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
530 panic_vm(vcpu); 530 panic_vm(vcpu, "Interruption with vector :0x%lx occurs "
531 "with psr.ic = 0\n", vector);
531 return; 532 return;
532 } 533 }
533 534
@@ -586,7 +587,7 @@ static void set_pal_call_result(struct kvm_vcpu *vcpu)
586 vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0); 587 vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
587 vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0); 588 vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
588 } else 589 } else
589 panic_vm(vcpu); 590 panic_vm(vcpu, "Mis-set for exit reason!\n");
590} 591}
591 592
592static void set_sal_call_data(struct kvm_vcpu *vcpu) 593static void set_sal_call_data(struct kvm_vcpu *vcpu)
@@ -614,7 +615,7 @@ static void set_sal_call_result(struct kvm_vcpu *vcpu)
614 vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0); 615 vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
615 vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0); 616 vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
616 } else 617 } else
617 panic_vm(vcpu); 618 panic_vm(vcpu, "Mis-set for exit reason!\n");
618} 619}
619 620
620void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs, 621void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
@@ -680,7 +681,7 @@ static void generate_exirq(struct kvm_vcpu *vcpu)
680 vpsr = VCPU(vcpu, vpsr); 681 vpsr = VCPU(vcpu, vpsr);
681 isr = vpsr & IA64_PSR_RI; 682 isr = vpsr & IA64_PSR_RI;
682 if (!(vpsr & IA64_PSR_IC)) 683 if (!(vpsr & IA64_PSR_IC))
683 panic_vm(vcpu); 684 panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n");
684 reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ 685 reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
685} 686}
686 687
@@ -941,8 +942,20 @@ static void vcpu_do_resume(struct kvm_vcpu *vcpu)
941 ia64_set_pta(vcpu->arch.vhpt.pta.val); 942 ia64_set_pta(vcpu->arch.vhpt.pta.val);
942} 943}
943 944
945static void vmm_sanity_check(struct kvm_vcpu *vcpu)
946{
947 struct exit_ctl_data *p = &vcpu->arch.exit_data;
948
949 if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) {
950 panic_vm(vcpu, "Failed to do vmm sanity check,"
951 "it maybe caused by crashed vmm!!\n\n");
952 }
953}
954
944static void kvm_do_resume_op(struct kvm_vcpu *vcpu) 955static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
945{ 956{
957 vmm_sanity_check(vcpu); /*Guarantee vcpu runing on healthy vmm!*/
958
946 if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) { 959 if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
947 vcpu_do_resume(vcpu); 960 vcpu_do_resume(vcpu);
948 return; 961 return;
@@ -968,3 +981,11 @@ void vmm_transition(struct kvm_vcpu *vcpu)
968 1, 0, 0, 0, 0, 0); 981 1, 0, 0, 0, 0, 0);
969 kvm_do_resume_op(vcpu); 982 kvm_do_resume_op(vcpu);
970} 983}
984
985void vmm_panic_handler(u64 vec)
986{
987 struct kvm_vcpu *vcpu = current_vcpu;
988 vmm_sanity = 0;
989 panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n",
990 vec2off[vec]);
991}
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index e44027ce5667..ecd526b55323 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -816,8 +816,9 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
816 unsigned long vitv = VCPU(vcpu, itv); 816 unsigned long vitv = VCPU(vcpu, itv);
817 817
818 if (vcpu->vcpu_id == 0) { 818 if (vcpu->vcpu_id == 0) {
819 for (i = 0; i < MAX_VCPU_NUM; i++) { 819 for (i = 0; i < KVM_MAX_VCPUS; i++) {
820 v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); 820 v = (struct kvm_vcpu *)((char *)vcpu +
821 sizeof(struct kvm_vcpu_data) * i);
821 VMX(v, itc_offset) = itc_offset; 822 VMX(v, itc_offset) = itc_offset;
822 VMX(v, last_itc) = 0; 823 VMX(v, last_itc) = 0;
823 } 824 }
@@ -1650,7 +1651,8 @@ void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
1650 * Otherwise panic 1651 * Otherwise panic
1651 */ 1652 */
1652 if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM)) 1653 if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
1653 panic_vm(vcpu); 1654 panic_vm(vcpu, "Only support guests with vpsr.pk =0 \
1655 & vpsr.is=0\n");
1654 1656
1655 /* 1657 /*
1656 * For those IA64_PSR bits: id/da/dd/ss/ed/ia 1658 * For those IA64_PSR bits: id/da/dd/ss/ed/ia
@@ -2103,7 +2105,7 @@ void kvm_init_all_rr(struct kvm_vcpu *vcpu)
2103 2105
2104 if (is_physical_mode(vcpu)) { 2106 if (is_physical_mode(vcpu)) {
2105 if (vcpu->arch.mode_flags & GUEST_PHY_EMUL) 2107 if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
2106 panic_vm(vcpu); 2108 panic_vm(vcpu, "Machine Status conflicts!\n");
2107 2109
2108 ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0); 2110 ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
2109 ia64_dv_serialize_data(); 2111 ia64_dv_serialize_data();
@@ -2152,10 +2154,70 @@ int vmm_entry(void)
2152 return 0; 2154 return 0;
2153} 2155}
2154 2156
2155void panic_vm(struct kvm_vcpu *v) 2157static void kvm_show_registers(struct kvm_pt_regs *regs)
2156{ 2158{
2159 unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
2160
2161 struct kvm_vcpu *vcpu = current_vcpu;
2162 if (vcpu != NULL)
2163 printk("vcpu 0x%p vcpu %d\n",
2164 vcpu, vcpu->vcpu_id);
2165
2166 printk("psr : %016lx ifs : %016lx ip : [<%016lx>]\n",
2167 regs->cr_ipsr, regs->cr_ifs, ip);
2168
2169 printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
2170 regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
2171 printk("rnat: %016lx bspstore: %016lx pr : %016lx\n",
2172 regs->ar_rnat, regs->ar_bspstore, regs->pr);
2173 printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
2174 regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
2175 printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
2176 printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0,
2177 regs->b6, regs->b7);
2178 printk("f6 : %05lx%016lx f7 : %05lx%016lx\n",
2179 regs->f6.u.bits[1], regs->f6.u.bits[0],
2180 regs->f7.u.bits[1], regs->f7.u.bits[0]);
2181 printk("f8 : %05lx%016lx f9 : %05lx%016lx\n",
2182 regs->f8.u.bits[1], regs->f8.u.bits[0],
2183 regs->f9.u.bits[1], regs->f9.u.bits[0]);
2184 printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
2185 regs->f10.u.bits[1], regs->f10.u.bits[0],
2186 regs->f11.u.bits[1], regs->f11.u.bits[0]);
2187
2188 printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1,
2189 regs->r2, regs->r3);
2190 printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8,
2191 regs->r9, regs->r10);
2192 printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11,
2193 regs->r12, regs->r13);
2194 printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14,
2195 regs->r15, regs->r16);
2196 printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17,
2197 regs->r18, regs->r19);
2198 printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20,
2199 regs->r21, regs->r22);
2200 printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23,
2201 regs->r24, regs->r25);
2202 printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26,
2203 regs->r27, regs->r28);
2204 printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29,
2205 regs->r30, regs->r31);
2206
2207}
2208
2209void panic_vm(struct kvm_vcpu *v, const char *fmt, ...)
2210{
2211 va_list args;
2212 char buf[256];
2213
2214 struct kvm_pt_regs *regs = vcpu_regs(v);
2157 struct exit_ctl_data *p = &v->arch.exit_data; 2215 struct exit_ctl_data *p = &v->arch.exit_data;
2158 2216 va_start(args, fmt);
2217 vsnprintf(buf, sizeof(buf), fmt, args);
2218 va_end(args);
2219 printk(buf);
2220 kvm_show_registers(regs);
2159 p->exit_reason = EXIT_REASON_VM_PANIC; 2221 p->exit_reason = EXIT_REASON_VM_PANIC;
2160 vmm_transition(v); 2222 vmm_transition(v);
2161 /*Never to return*/ 2223 /*Never to return*/
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
index e9b2a4e121c0..b2f12a562bdf 100644
--- a/arch/ia64/kvm/vcpu.h
+++ b/arch/ia64/kvm/vcpu.h
@@ -737,9 +737,12 @@ void kvm_init_vtlb(struct kvm_vcpu *v);
737void kvm_init_vhpt(struct kvm_vcpu *v); 737void kvm_init_vhpt(struct kvm_vcpu *v);
738void thash_init(struct thash_cb *hcb, u64 sz); 738void thash_init(struct thash_cb *hcb, u64 sz);
739 739
740void panic_vm(struct kvm_vcpu *v); 740void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
741 741
742extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3, 742extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
743 u64 arg4, u64 arg5, u64 arg6, u64 arg7); 743 u64 arg4, u64 arg5, u64 arg6, u64 arg7);
744
745extern long vmm_sanity;
746
744#endif 747#endif
745#endif /* __VCPU_H__ */ 748#endif /* __VCPU_H__ */
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
index 2275bf4e681a..9eee5c04bacc 100644
--- a/arch/ia64/kvm/vmm.c
+++ b/arch/ia64/kvm/vmm.c
@@ -20,6 +20,7 @@
20 */ 20 */
21 21
22 22
23#include<linux/kernel.h>
23#include<linux/module.h> 24#include<linux/module.h>
24#include<asm/fpswa.h> 25#include<asm/fpswa.h>
25 26
@@ -31,6 +32,8 @@ MODULE_LICENSE("GPL");
31extern char kvm_ia64_ivt; 32extern char kvm_ia64_ivt;
32extern fpswa_interface_t *vmm_fpswa_interface; 33extern fpswa_interface_t *vmm_fpswa_interface;
33 34
35long vmm_sanity = 1;
36
34struct kvm_vmm_info vmm_info = { 37struct kvm_vmm_info vmm_info = {
35 .module = THIS_MODULE, 38 .module = THIS_MODULE,
36 .vmm_entry = vmm_entry, 39 .vmm_entry = vmm_entry,
@@ -62,5 +65,31 @@ void vmm_spin_unlock(spinlock_t *lock)
62{ 65{
63 _vmm_raw_spin_unlock(lock); 66 _vmm_raw_spin_unlock(lock);
64} 67}
68
69static void vcpu_debug_exit(struct kvm_vcpu *vcpu)
70{
71 struct exit_ctl_data *p = &vcpu->arch.exit_data;
72 long psr;
73
74 local_irq_save(psr);
75 p->exit_reason = EXIT_REASON_DEBUG;
76 vmm_transition(vcpu);
77 local_irq_restore(psr);
78}
79
80asmlinkage int printk(const char *fmt, ...)
81{
82 struct kvm_vcpu *vcpu = current_vcpu;
83 va_list args;
84 int r;
85
86 memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN);
87 va_start(args, fmt);
88 r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args);
89 va_end(args);
90 vcpu_debug_exit(vcpu);
91 return r;
92}
93
65module_init(kvm_vmm_init) 94module_init(kvm_vmm_init)
66module_exit(kvm_vmm_exit) 95module_exit(kvm_vmm_exit)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
index c1d7251a1480..3ef1a017a318 100644
--- a/arch/ia64/kvm/vmm_ivt.S
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -1,5 +1,5 @@
1/* 1/*
2 * /ia64/kvm_ivt.S 2 * arch/ia64/kvm/vmm_ivt.S
3 * 3 *
4 * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co 4 * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
5 * Stephane Eranian <eranian@hpl.hp.com> 5 * Stephane Eranian <eranian@hpl.hp.com>
@@ -70,32 +70,39 @@
70# define PSR_DEFAULT_BITS 0 70# define PSR_DEFAULT_BITS 0
71#endif 71#endif
72 72
73
74#define KVM_FAULT(n) \ 73#define KVM_FAULT(n) \
75 kvm_fault_##n:; \ 74 kvm_fault_##n:; \
76 mov r19=n;; \ 75 mov r19=n;; \
77 br.sptk.many kvm_fault_##n; \ 76 br.sptk.many kvm_vmm_panic; \
78 ;; \ 77 ;; \
79
80 78
81#define KVM_REFLECT(n) \ 79#define KVM_REFLECT(n) \
82 mov r31=pr; \ 80 mov r31=pr; \
83 mov r19=n; /* prepare to save predicates */ \ 81 mov r19=n; /* prepare to save predicates */ \
84 mov r29=cr.ipsr; \ 82 mov r29=cr.ipsr; \
85 ;; \ 83 ;; \
86 tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ 84 tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \
87(p7)br.sptk.many kvm_dispatch_reflection; \ 85(p7) br.sptk.many kvm_dispatch_reflection; \
88 br.sptk.many kvm_panic; \ 86 br.sptk.many kvm_vmm_panic; \
89 87
90 88GLOBAL_ENTRY(kvm_vmm_panic)
91GLOBAL_ENTRY(kvm_panic) 89 KVM_SAVE_MIN_WITH_COVER_R19
92 br.sptk.many kvm_panic 90 alloc r14=ar.pfs,0,0,1,0
93 ;; 91 mov out0=r15
94END(kvm_panic) 92 adds r3=8,r2 // set up second base pointer
95 93 ;;
96 94 ssm psr.ic
97 95 ;;
98 96 srlz.i // guarantee that interruption collection is on
97 ;;
98 //(p15) ssm psr.i // restore psr.i
99 addl r14=@gprel(ia64_leave_hypervisor),gp
100 ;;
101 KVM_SAVE_REST
102 mov rp=r14
103 ;;
104 br.call.sptk.many b6=vmm_panic_handler;
105END(kvm_vmm_panic)
99 106
100 .section .text.ivt,"ax" 107 .section .text.ivt,"ax"
101 108
@@ -105,308 +112,307 @@ kvm_ia64_ivt:
105/////////////////////////////////////////////////////////////// 112///////////////////////////////////////////////////////////////
106// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) 113// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
107ENTRY(kvm_vhpt_miss) 114ENTRY(kvm_vhpt_miss)
108 KVM_FAULT(0) 115 KVM_FAULT(0)
109END(kvm_vhpt_miss) 116END(kvm_vhpt_miss)
110 117
111
112 .org kvm_ia64_ivt+0x400 118 .org kvm_ia64_ivt+0x400
113//////////////////////////////////////////////////////////////// 119////////////////////////////////////////////////////////////////
114// 0x0400 Entry 1 (size 64 bundles) ITLB (21) 120// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
115ENTRY(kvm_itlb_miss) 121ENTRY(kvm_itlb_miss)
116 mov r31 = pr 122 mov r31 = pr
117 mov r29=cr.ipsr; 123 mov r29=cr.ipsr;
118 ;; 124 ;;
119 tbit.z p6,p7=r29,IA64_PSR_VM_BIT; 125 tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
120 (p6) br.sptk kvm_alt_itlb_miss 126(p6) br.sptk kvm_alt_itlb_miss
121 mov r19 = 1 127 mov r19 = 1
122 br.sptk kvm_itlb_miss_dispatch 128 br.sptk kvm_itlb_miss_dispatch
123 KVM_FAULT(1); 129 KVM_FAULT(1);
124END(kvm_itlb_miss) 130END(kvm_itlb_miss)
125 131
126 .org kvm_ia64_ivt+0x0800 132 .org kvm_ia64_ivt+0x0800
127////////////////////////////////////////////////////////////////// 133//////////////////////////////////////////////////////////////////
128// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) 134// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
129ENTRY(kvm_dtlb_miss) 135ENTRY(kvm_dtlb_miss)
130 mov r31 = pr 136 mov r31 = pr
131 mov r29=cr.ipsr; 137 mov r29=cr.ipsr;
132 ;; 138 ;;
133 tbit.z p6,p7=r29,IA64_PSR_VM_BIT; 139 tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
134(p6)br.sptk kvm_alt_dtlb_miss 140(p6) br.sptk kvm_alt_dtlb_miss
135 br.sptk kvm_dtlb_miss_dispatch 141 br.sptk kvm_dtlb_miss_dispatch
136END(kvm_dtlb_miss) 142END(kvm_dtlb_miss)
137 143
138 .org kvm_ia64_ivt+0x0c00 144 .org kvm_ia64_ivt+0x0c00
139//////////////////////////////////////////////////////////////////// 145////////////////////////////////////////////////////////////////////
140// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) 146// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
141ENTRY(kvm_alt_itlb_miss) 147ENTRY(kvm_alt_itlb_miss)
142 mov r16=cr.ifa // get address that caused the TLB miss 148 mov r16=cr.ifa // get address that caused the TLB miss
143 ;; 149 ;;
144 movl r17=PAGE_KERNEL 150 movl r17=PAGE_KERNEL
145 mov r24=cr.ipsr 151 mov r24=cr.ipsr
146 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) 152 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
147 ;; 153 ;;
148 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits 154 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
149 ;; 155 ;;
150 or r19=r17,r19 // insert PTE control bits into r19 156 or r19=r17,r19 // insert PTE control bits into r19
151 ;; 157 ;;
152 movl r20=IA64_GRANULE_SHIFT<<2 158 movl r20=IA64_GRANULE_SHIFT<<2
153 ;; 159 ;;
154 mov cr.itir=r20 160 mov cr.itir=r20
155 ;; 161 ;;
156 itc.i r19 // insert the TLB entry 162 itc.i r19 // insert the TLB entry
157 mov pr=r31,-1 163 mov pr=r31,-1
158 rfi 164 rfi
159END(kvm_alt_itlb_miss) 165END(kvm_alt_itlb_miss)
160 166
161 .org kvm_ia64_ivt+0x1000 167 .org kvm_ia64_ivt+0x1000
162///////////////////////////////////////////////////////////////////// 168/////////////////////////////////////////////////////////////////////
163// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) 169// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
164ENTRY(kvm_alt_dtlb_miss) 170ENTRY(kvm_alt_dtlb_miss)
165 mov r16=cr.ifa // get address that caused the TLB miss 171 mov r16=cr.ifa // get address that caused the TLB miss
166 ;; 172 ;;
167 movl r17=PAGE_KERNEL 173 movl r17=PAGE_KERNEL
168 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) 174 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
169 mov r24=cr.ipsr 175 mov r24=cr.ipsr
170 ;; 176 ;;
171 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits 177 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
172 ;; 178 ;;
173 or r19=r19,r17 // insert PTE control bits into r19 179 or r19=r19,r17 // insert PTE control bits into r19
174 ;; 180 ;;
175 movl r20=IA64_GRANULE_SHIFT<<2 181 movl r20=IA64_GRANULE_SHIFT<<2
176 ;; 182 ;;
177 mov cr.itir=r20 183 mov cr.itir=r20
178 ;; 184 ;;
179 itc.d r19 // insert the TLB entry 185 itc.d r19 // insert the TLB entry
180 mov pr=r31,-1 186 mov pr=r31,-1
181 rfi 187 rfi
182END(kvm_alt_dtlb_miss) 188END(kvm_alt_dtlb_miss)
183 189
184 .org kvm_ia64_ivt+0x1400 190 .org kvm_ia64_ivt+0x1400
185////////////////////////////////////////////////////////////////////// 191//////////////////////////////////////////////////////////////////////
186// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) 192// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
187ENTRY(kvm_nested_dtlb_miss) 193ENTRY(kvm_nested_dtlb_miss)
188 KVM_FAULT(5) 194 KVM_FAULT(5)
189END(kvm_nested_dtlb_miss) 195END(kvm_nested_dtlb_miss)
190 196
191 .org kvm_ia64_ivt+0x1800 197 .org kvm_ia64_ivt+0x1800
192///////////////////////////////////////////////////////////////////// 198/////////////////////////////////////////////////////////////////////
193// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) 199// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
194ENTRY(kvm_ikey_miss) 200ENTRY(kvm_ikey_miss)
195 KVM_REFLECT(6) 201 KVM_REFLECT(6)
196END(kvm_ikey_miss) 202END(kvm_ikey_miss)
197 203
198 .org kvm_ia64_ivt+0x1c00 204 .org kvm_ia64_ivt+0x1c00
199///////////////////////////////////////////////////////////////////// 205/////////////////////////////////////////////////////////////////////
200// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) 206// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
201ENTRY(kvm_dkey_miss) 207ENTRY(kvm_dkey_miss)
202 KVM_REFLECT(7) 208 KVM_REFLECT(7)
203END(kvm_dkey_miss) 209END(kvm_dkey_miss)
204 210
205 .org kvm_ia64_ivt+0x2000 211 .org kvm_ia64_ivt+0x2000
206//////////////////////////////////////////////////////////////////// 212////////////////////////////////////////////////////////////////////
207// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) 213// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
208ENTRY(kvm_dirty_bit) 214ENTRY(kvm_dirty_bit)
209 KVM_REFLECT(8) 215 KVM_REFLECT(8)
210END(kvm_dirty_bit) 216END(kvm_dirty_bit)
211 217
212 .org kvm_ia64_ivt+0x2400 218 .org kvm_ia64_ivt+0x2400
213//////////////////////////////////////////////////////////////////// 219////////////////////////////////////////////////////////////////////
214// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) 220// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
215ENTRY(kvm_iaccess_bit) 221ENTRY(kvm_iaccess_bit)
216 KVM_REFLECT(9) 222 KVM_REFLECT(9)
217END(kvm_iaccess_bit) 223END(kvm_iaccess_bit)
218 224
219 .org kvm_ia64_ivt+0x2800 225 .org kvm_ia64_ivt+0x2800
220/////////////////////////////////////////////////////////////////// 226///////////////////////////////////////////////////////////////////
221// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) 227// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
222ENTRY(kvm_daccess_bit) 228ENTRY(kvm_daccess_bit)
223 KVM_REFLECT(10) 229 KVM_REFLECT(10)
224END(kvm_daccess_bit) 230END(kvm_daccess_bit)
225 231
226 .org kvm_ia64_ivt+0x2c00 232 .org kvm_ia64_ivt+0x2c00
227///////////////////////////////////////////////////////////////// 233/////////////////////////////////////////////////////////////////
228// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) 234// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
229ENTRY(kvm_break_fault) 235ENTRY(kvm_break_fault)
230 mov r31=pr 236 mov r31=pr
231 mov r19=11 237 mov r19=11
232 mov r29=cr.ipsr 238 mov r29=cr.ipsr
233 ;; 239 ;;
234 KVM_SAVE_MIN_WITH_COVER_R19 240 KVM_SAVE_MIN_WITH_COVER_R19
235 ;; 241 ;;
236 alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!) 242 alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!)
237 mov out0=cr.ifa 243 mov out0=cr.ifa
238 mov out2=cr.isr // FIXME: pity to make this slow access twice 244 mov out2=cr.isr // FIXME: pity to make this slow access twice
239 mov out3=cr.iim // FIXME: pity to make this slow access twice 245 mov out3=cr.iim // FIXME: pity to make this slow access twice
240 adds r3=8,r2 // set up second base pointer 246 adds r3=8,r2 // set up second base pointer
241 ;; 247 ;;
242 ssm psr.ic 248 ssm psr.ic
243 ;; 249 ;;
244 srlz.i // guarantee that interruption collection is on 250 srlz.i // guarantee that interruption collection is on
245 ;; 251 ;;
246 //(p15)ssm psr.i // restore psr.i 252 //(p15)ssm psr.i // restore psr.i
247 addl r14=@gprel(ia64_leave_hypervisor),gp 253 addl r14=@gprel(ia64_leave_hypervisor),gp
248 ;; 254 ;;
249 KVM_SAVE_REST 255 KVM_SAVE_REST
250 mov rp=r14 256 mov rp=r14
251 ;; 257 ;;
252 adds out1=16,sp 258 adds out1=16,sp
253 br.call.sptk.many b6=kvm_ia64_handle_break 259 br.call.sptk.many b6=kvm_ia64_handle_break
254 ;; 260 ;;
255END(kvm_break_fault) 261END(kvm_break_fault)
256 262
257 .org kvm_ia64_ivt+0x3000 263 .org kvm_ia64_ivt+0x3000
258///////////////////////////////////////////////////////////////// 264/////////////////////////////////////////////////////////////////
259// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) 265// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
260ENTRY(kvm_interrupt) 266ENTRY(kvm_interrupt)
261 mov r31=pr // prepare to save predicates 267 mov r31=pr // prepare to save predicates
262 mov r19=12 268 mov r19=12
263 mov r29=cr.ipsr 269 mov r29=cr.ipsr
264 ;; 270 ;;
265 tbit.z p6,p7=r29,IA64_PSR_VM_BIT 271 tbit.z p6,p7=r29,IA64_PSR_VM_BIT
266 tbit.z p0,p15=r29,IA64_PSR_I_BIT 272 tbit.z p0,p15=r29,IA64_PSR_I_BIT
267 ;; 273 ;;
268(p7) br.sptk kvm_dispatch_interrupt 274(p7) br.sptk kvm_dispatch_interrupt
269 ;; 275 ;;
270 mov r27=ar.rsc /* M */ 276 mov r27=ar.rsc /* M */
271 mov r20=r1 /* A */ 277 mov r20=r1 /* A */
272 mov r25=ar.unat /* M */ 278 mov r25=ar.unat /* M */
273 mov r26=ar.pfs /* I */ 279 mov r26=ar.pfs /* I */
274 mov r28=cr.iip /* M */ 280 mov r28=cr.iip /* M */
275 cover /* B (or nothing) */ 281 cover /* B (or nothing) */
276 ;; 282 ;;
277 mov r1=sp 283 mov r1=sp
278 ;; 284 ;;
279 invala /* M */ 285 invala /* M */
280 mov r30=cr.ifs 286 mov r30=cr.ifs
281 ;; 287 ;;
282 addl r1=-VMM_PT_REGS_SIZE,r1 288 addl r1=-VMM_PT_REGS_SIZE,r1
283 ;; 289 ;;
284 adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ 290 adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */
285 adds r16=PT(CR_IPSR),r1 291 adds r16=PT(CR_IPSR),r1
286 ;; 292 ;;
287 lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES 293 lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
288 st8 [r16]=r29 /* save cr.ipsr */ 294 st8 [r16]=r29 /* save cr.ipsr */
289 ;; 295 ;;
290 lfetch.fault.excl.nt1 [r17] 296 lfetch.fault.excl.nt1 [r17]
291 mov r29=b0 297 mov r29=b0
292 ;; 298 ;;
293 adds r16=PT(R8),r1 /* initialize first base pointer */ 299 adds r16=PT(R8),r1 /* initialize first base pointer */
294 adds r17=PT(R9),r1 /* initialize second base pointer */ 300 adds r17=PT(R9),r1 /* initialize second base pointer */
295 mov r18=r0 /* make sure r18 isn't NaT */ 301 mov r18=r0 /* make sure r18 isn't NaT */
296 ;; 302 ;;
297.mem.offset 0,0; st8.spill [r16]=r8,16 303.mem.offset 0,0; st8.spill [r16]=r8,16
298.mem.offset 8,0; st8.spill [r17]=r9,16 304.mem.offset 8,0; st8.spill [r17]=r9,16
299 ;; 305 ;;
300.mem.offset 0,0; st8.spill [r16]=r10,24 306.mem.offset 0,0; st8.spill [r16]=r10,24
301.mem.offset 8,0; st8.spill [r17]=r11,24 307.mem.offset 8,0; st8.spill [r17]=r11,24
302 ;; 308 ;;
303 st8 [r16]=r28,16 /* save cr.iip */ 309 st8 [r16]=r28,16 /* save cr.iip */
304 st8 [r17]=r30,16 /* save cr.ifs */ 310 st8 [r17]=r30,16 /* save cr.ifs */
305 mov r8=ar.fpsr /* M */ 311 mov r8=ar.fpsr /* M */
306 mov r9=ar.csd 312 mov r9=ar.csd
307 mov r10=ar.ssd 313 mov r10=ar.ssd
308 movl r11=FPSR_DEFAULT /* L-unit */ 314 movl r11=FPSR_DEFAULT /* L-unit */
309 ;; 315 ;;
310 st8 [r16]=r25,16 /* save ar.unat */ 316 st8 [r16]=r25,16 /* save ar.unat */
311 st8 [r17]=r26,16 /* save ar.pfs */ 317 st8 [r17]=r26,16 /* save ar.pfs */
312 shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ 318 shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */
313 ;; 319 ;;
314 st8 [r16]=r27,16 /* save ar.rsc */ 320 st8 [r16]=r27,16 /* save ar.rsc */
315 adds r17=16,r17 /* skip over ar_rnat field */ 321 adds r17=16,r17 /* skip over ar_rnat field */
316 ;; 322 ;;
317 st8 [r17]=r31,16 /* save predicates */ 323 st8 [r17]=r31,16 /* save predicates */
318 adds r16=16,r16 /* skip over ar_bspstore field */ 324 adds r16=16,r16 /* skip over ar_bspstore field */
319 ;; 325 ;;
320 st8 [r16]=r29,16 /* save b0 */ 326 st8 [r16]=r29,16 /* save b0 */
321 st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ 327 st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */
322 ;; 328 ;;
323.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */ 329.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */
324.mem.offset 8,0; st8.spill [r17]=r12,16 330.mem.offset 8,0; st8.spill [r17]=r12,16
325 adds r12=-16,r1 331 adds r12=-16,r1
326 /* switch to kernel memory stack (with 16 bytes of scratch) */ 332 /* switch to kernel memory stack (with 16 bytes of scratch) */
327 ;; 333 ;;
328.mem.offset 0,0; st8.spill [r16]=r13,16 334.mem.offset 0,0; st8.spill [r16]=r13,16
329.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */ 335.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
330 ;; 336 ;;
331.mem.offset 0,0; st8.spill [r16]=r15,16 337.mem.offset 0,0; st8.spill [r16]=r15,16
332.mem.offset 8,0; st8.spill [r17]=r14,16 338.mem.offset 8,0; st8.spill [r17]=r14,16
333 dep r14=-1,r0,60,4 339 dep r14=-1,r0,60,4
334 ;; 340 ;;
335.mem.offset 0,0; st8.spill [r16]=r2,16 341.mem.offset 0,0; st8.spill [r16]=r2,16
336.mem.offset 8,0; st8.spill [r17]=r3,16 342.mem.offset 8,0; st8.spill [r17]=r3,16
337 adds r2=VMM_PT_REGS_R16_OFFSET,r1 343 adds r2=VMM_PT_REGS_R16_OFFSET,r1
338 adds r14 = VMM_VCPU_GP_OFFSET,r13 344 adds r14 = VMM_VCPU_GP_OFFSET,r13
339 ;; 345 ;;
340 mov r8=ar.ccv 346 mov r8=ar.ccv
341 ld8 r14 = [r14] 347 ld8 r14 = [r14]
342 ;; 348 ;;
343 mov r1=r14 /* establish kernel global pointer */ 349 mov r1=r14 /* establish kernel global pointer */
344 ;; \ 350 ;; \
345 bsw.1 351 bsw.1
346 ;; 352 ;;
347 alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group 353 alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
348 mov out0=r13 354 mov out0=r13
349 ;; 355 ;;
350 ssm psr.ic 356 ssm psr.ic
351 ;; 357 ;;
352 srlz.i 358 srlz.i
353 ;; 359 ;;
354 //(p15) ssm psr.i 360 //(p15) ssm psr.i
355 adds r3=8,r2 // set up second base pointer for SAVE_REST 361 adds r3=8,r2 // set up second base pointer for SAVE_REST
356 srlz.i // ensure everybody knows psr.ic is back on 362 srlz.i // ensure everybody knows psr.ic is back on
357 ;; 363 ;;
358.mem.offset 0,0; st8.spill [r2]=r16,16 364.mem.offset 0,0; st8.spill [r2]=r16,16
359.mem.offset 8,0; st8.spill [r3]=r17,16 365.mem.offset 8,0; st8.spill [r3]=r17,16
360 ;; 366 ;;
361.mem.offset 0,0; st8.spill [r2]=r18,16 367.mem.offset 0,0; st8.spill [r2]=r18,16
362.mem.offset 8,0; st8.spill [r3]=r19,16 368.mem.offset 8,0; st8.spill [r3]=r19,16
363 ;; 369 ;;
364.mem.offset 0,0; st8.spill [r2]=r20,16 370.mem.offset 0,0; st8.spill [r2]=r20,16
365.mem.offset 8,0; st8.spill [r3]=r21,16 371.mem.offset 8,0; st8.spill [r3]=r21,16
366 mov r18=b6 372 mov r18=b6
367 ;; 373 ;;
368.mem.offset 0,0; st8.spill [r2]=r22,16 374.mem.offset 0,0; st8.spill [r2]=r22,16
369.mem.offset 8,0; st8.spill [r3]=r23,16 375.mem.offset 8,0; st8.spill [r3]=r23,16
370 mov r19=b7 376 mov r19=b7
371 ;; 377 ;;
372.mem.offset 0,0; st8.spill [r2]=r24,16 378.mem.offset 0,0; st8.spill [r2]=r24,16
373.mem.offset 8,0; st8.spill [r3]=r25,16 379.mem.offset 8,0; st8.spill [r3]=r25,16
374 ;; 380 ;;
375.mem.offset 0,0; st8.spill [r2]=r26,16 381.mem.offset 0,0; st8.spill [r2]=r26,16
376.mem.offset 8,0; st8.spill [r3]=r27,16 382.mem.offset 8,0; st8.spill [r3]=r27,16
377 ;; 383 ;;
378.mem.offset 0,0; st8.spill [r2]=r28,16 384.mem.offset 0,0; st8.spill [r2]=r28,16
379.mem.offset 8,0; st8.spill [r3]=r29,16 385.mem.offset 8,0; st8.spill [r3]=r29,16
380 ;; 386 ;;
381.mem.offset 0,0; st8.spill [r2]=r30,16 387.mem.offset 0,0; st8.spill [r2]=r30,16
382.mem.offset 8,0; st8.spill [r3]=r31,32 388.mem.offset 8,0; st8.spill [r3]=r31,32
383 ;; 389 ;;
384 mov ar.fpsr=r11 /* M-unit */ 390 mov ar.fpsr=r11 /* M-unit */
385 st8 [r2]=r8,8 /* ar.ccv */ 391 st8 [r2]=r8,8 /* ar.ccv */
386 adds r24=PT(B6)-PT(F7),r3 392 adds r24=PT(B6)-PT(F7),r3
387 ;; 393 ;;
388 stf.spill [r2]=f6,32 394 stf.spill [r2]=f6,32
389 stf.spill [r3]=f7,32 395 stf.spill [r3]=f7,32
390 ;; 396 ;;
391 stf.spill [r2]=f8,32 397 stf.spill [r2]=f8,32
392 stf.spill [r3]=f9,32 398 stf.spill [r3]=f9,32
393 ;; 399 ;;
394 stf.spill [r2]=f10 400 stf.spill [r2]=f10
395 stf.spill [r3]=f11 401 stf.spill [r3]=f11
396 adds r25=PT(B7)-PT(F11),r3 402 adds r25=PT(B7)-PT(F11),r3
397 ;; 403 ;;
398 st8 [r24]=r18,16 /* b6 */ 404 st8 [r24]=r18,16 /* b6 */
399 st8 [r25]=r19,16 /* b7 */ 405 st8 [r25]=r19,16 /* b7 */
400 ;; 406 ;;
401 st8 [r24]=r9 /* ar.csd */ 407 st8 [r24]=r9 /* ar.csd */
402 st8 [r25]=r10 /* ar.ssd */ 408 st8 [r25]=r10 /* ar.ssd */
403 ;; 409 ;;
404 srlz.d // make sure we see the effect of cr.ivr 410 srlz.d // make sure we see the effect of cr.ivr
405 addl r14=@gprel(ia64_leave_nested),gp 411 addl r14=@gprel(ia64_leave_nested),gp
406 ;; 412 ;;
407 mov rp=r14 413 mov rp=r14
408 br.call.sptk.many b6=kvm_ia64_handle_irq 414 br.call.sptk.many b6=kvm_ia64_handle_irq
409 ;; 415 ;;
410END(kvm_interrupt) 416END(kvm_interrupt)
411 417
412 .global kvm_dispatch_vexirq 418 .global kvm_dispatch_vexirq
@@ -414,387 +420,385 @@ END(kvm_interrupt)
414////////////////////////////////////////////////////////////////////// 420//////////////////////////////////////////////////////////////////////
415// 0x3400 Entry 13 (size 64 bundles) Reserved 421// 0x3400 Entry 13 (size 64 bundles) Reserved
416ENTRY(kvm_virtual_exirq) 422ENTRY(kvm_virtual_exirq)
417 mov r31=pr 423 mov r31=pr
418 mov r19=13 424 mov r19=13
419 mov r30 =r0 425 mov r30 =r0
420 ;; 426 ;;
421kvm_dispatch_vexirq: 427kvm_dispatch_vexirq:
422 cmp.eq p6,p0 = 1,r30 428 cmp.eq p6,p0 = 1,r30
423 ;; 429 ;;
424(p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21 430(p6) add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
425 ;; 431 ;;
426(p6)ld8 r1 = [r29] 432(p6) ld8 r1 = [r29]
427 ;; 433 ;;
428 KVM_SAVE_MIN_WITH_COVER_R19 434 KVM_SAVE_MIN_WITH_COVER_R19
429 alloc r14=ar.pfs,0,0,1,0 435 alloc r14=ar.pfs,0,0,1,0
430 mov out0=r13 436 mov out0=r13
431 437
432 ssm psr.ic 438 ssm psr.ic
433 ;; 439 ;;
434 srlz.i // guarantee that interruption collection is on 440 srlz.i // guarantee that interruption collection is on
435 ;; 441 ;;
436 //(p15) ssm psr.i // restore psr.i 442 //(p15) ssm psr.i // restore psr.i
437 adds r3=8,r2 // set up second base pointer 443 adds r3=8,r2 // set up second base pointer
438 ;; 444 ;;
439 KVM_SAVE_REST 445 KVM_SAVE_REST
440 addl r14=@gprel(ia64_leave_hypervisor),gp 446 addl r14=@gprel(ia64_leave_hypervisor),gp
441 ;; 447 ;;
442 mov rp=r14 448 mov rp=r14
443 br.call.sptk.many b6=kvm_vexirq 449 br.call.sptk.many b6=kvm_vexirq
444END(kvm_virtual_exirq) 450END(kvm_virtual_exirq)
445 451
446 .org kvm_ia64_ivt+0x3800 452 .org kvm_ia64_ivt+0x3800
447///////////////////////////////////////////////////////////////////// 453/////////////////////////////////////////////////////////////////////
448// 0x3800 Entry 14 (size 64 bundles) Reserved 454// 0x3800 Entry 14 (size 64 bundles) Reserved
449 KVM_FAULT(14) 455 KVM_FAULT(14)
450 // this code segment is from 2.6.16.13 456 // this code segment is from 2.6.16.13
451
452 457
453 .org kvm_ia64_ivt+0x3c00 458 .org kvm_ia64_ivt+0x3c00
454/////////////////////////////////////////////////////////////////////// 459///////////////////////////////////////////////////////////////////////
455// 0x3c00 Entry 15 (size 64 bundles) Reserved 460// 0x3c00 Entry 15 (size 64 bundles) Reserved
456 KVM_FAULT(15) 461 KVM_FAULT(15)
457
458 462
459 .org kvm_ia64_ivt+0x4000 463 .org kvm_ia64_ivt+0x4000
460/////////////////////////////////////////////////////////////////////// 464///////////////////////////////////////////////////////////////////////
461// 0x4000 Entry 16 (size 64 bundles) Reserved 465// 0x4000 Entry 16 (size 64 bundles) Reserved
462 KVM_FAULT(16) 466 KVM_FAULT(16)
463 467
464 .org kvm_ia64_ivt+0x4400 468 .org kvm_ia64_ivt+0x4400
465////////////////////////////////////////////////////////////////////// 469//////////////////////////////////////////////////////////////////////
466// 0x4400 Entry 17 (size 64 bundles) Reserved 470// 0x4400 Entry 17 (size 64 bundles) Reserved
467 KVM_FAULT(17) 471 KVM_FAULT(17)
468 472
469 .org kvm_ia64_ivt+0x4800 473 .org kvm_ia64_ivt+0x4800
470////////////////////////////////////////////////////////////////////// 474//////////////////////////////////////////////////////////////////////
471// 0x4800 Entry 18 (size 64 bundles) Reserved 475// 0x4800 Entry 18 (size 64 bundles) Reserved
472 KVM_FAULT(18) 476 KVM_FAULT(18)
473 477
474 .org kvm_ia64_ivt+0x4c00 478 .org kvm_ia64_ivt+0x4c00
475////////////////////////////////////////////////////////////////////// 479//////////////////////////////////////////////////////////////////////
476// 0x4c00 Entry 19 (size 64 bundles) Reserved 480// 0x4c00 Entry 19 (size 64 bundles) Reserved
477 KVM_FAULT(19) 481 KVM_FAULT(19)
478 482
479 .org kvm_ia64_ivt+0x5000 483 .org kvm_ia64_ivt+0x5000
480////////////////////////////////////////////////////////////////////// 484//////////////////////////////////////////////////////////////////////
481// 0x5000 Entry 20 (size 16 bundles) Page Not Present 485// 0x5000 Entry 20 (size 16 bundles) Page Not Present
482ENTRY(kvm_page_not_present) 486ENTRY(kvm_page_not_present)
483 KVM_REFLECT(20) 487 KVM_REFLECT(20)
484END(kvm_page_not_present) 488END(kvm_page_not_present)
485 489
486 .org kvm_ia64_ivt+0x5100 490 .org kvm_ia64_ivt+0x5100
487/////////////////////////////////////////////////////////////////////// 491///////////////////////////////////////////////////////////////////////
488// 0x5100 Entry 21 (size 16 bundles) Key Permission vector 492// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
489ENTRY(kvm_key_permission) 493ENTRY(kvm_key_permission)
490 KVM_REFLECT(21) 494 KVM_REFLECT(21)
491END(kvm_key_permission) 495END(kvm_key_permission)
492 496
493 .org kvm_ia64_ivt+0x5200 497 .org kvm_ia64_ivt+0x5200
494////////////////////////////////////////////////////////////////////// 498//////////////////////////////////////////////////////////////////////
495// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) 499// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
496ENTRY(kvm_iaccess_rights) 500ENTRY(kvm_iaccess_rights)
497 KVM_REFLECT(22) 501 KVM_REFLECT(22)
498END(kvm_iaccess_rights) 502END(kvm_iaccess_rights)
499 503
500 .org kvm_ia64_ivt+0x5300 504 .org kvm_ia64_ivt+0x5300
501////////////////////////////////////////////////////////////////////// 505//////////////////////////////////////////////////////////////////////
502// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) 506// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
503ENTRY(kvm_daccess_rights) 507ENTRY(kvm_daccess_rights)
504 KVM_REFLECT(23) 508 KVM_REFLECT(23)
505END(kvm_daccess_rights) 509END(kvm_daccess_rights)
506 510
507 .org kvm_ia64_ivt+0x5400 511 .org kvm_ia64_ivt+0x5400
508///////////////////////////////////////////////////////////////////// 512/////////////////////////////////////////////////////////////////////
509// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) 513// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
510ENTRY(kvm_general_exception) 514ENTRY(kvm_general_exception)
511 KVM_REFLECT(24) 515 KVM_REFLECT(24)
512 KVM_FAULT(24) 516 KVM_FAULT(24)
513END(kvm_general_exception) 517END(kvm_general_exception)
514 518
515 .org kvm_ia64_ivt+0x5500 519 .org kvm_ia64_ivt+0x5500
516////////////////////////////////////////////////////////////////////// 520//////////////////////////////////////////////////////////////////////
517// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) 521// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
518ENTRY(kvm_disabled_fp_reg) 522ENTRY(kvm_disabled_fp_reg)
519 KVM_REFLECT(25) 523 KVM_REFLECT(25)
520END(kvm_disabled_fp_reg) 524END(kvm_disabled_fp_reg)
521 525
522 .org kvm_ia64_ivt+0x5600 526 .org kvm_ia64_ivt+0x5600
523//////////////////////////////////////////////////////////////////// 527////////////////////////////////////////////////////////////////////
524// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) 528// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
525ENTRY(kvm_nat_consumption) 529ENTRY(kvm_nat_consumption)
526 KVM_REFLECT(26) 530 KVM_REFLECT(26)
527END(kvm_nat_consumption) 531END(kvm_nat_consumption)
528 532
529 .org kvm_ia64_ivt+0x5700 533 .org kvm_ia64_ivt+0x5700
530///////////////////////////////////////////////////////////////////// 534/////////////////////////////////////////////////////////////////////
531// 0x5700 Entry 27 (size 16 bundles) Speculation (40) 535// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
532ENTRY(kvm_speculation_vector) 536ENTRY(kvm_speculation_vector)
533 KVM_REFLECT(27) 537 KVM_REFLECT(27)
534END(kvm_speculation_vector) 538END(kvm_speculation_vector)
535 539
536 .org kvm_ia64_ivt+0x5800 540 .org kvm_ia64_ivt+0x5800
537///////////////////////////////////////////////////////////////////// 541/////////////////////////////////////////////////////////////////////
538// 0x5800 Entry 28 (size 16 bundles) Reserved 542// 0x5800 Entry 28 (size 16 bundles) Reserved
539 KVM_FAULT(28) 543 KVM_FAULT(28)
540 544
541 .org kvm_ia64_ivt+0x5900 545 .org kvm_ia64_ivt+0x5900
542/////////////////////////////////////////////////////////////////// 546///////////////////////////////////////////////////////////////////
543// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) 547// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
544ENTRY(kvm_debug_vector) 548ENTRY(kvm_debug_vector)
545 KVM_FAULT(29) 549 KVM_FAULT(29)
546END(kvm_debug_vector) 550END(kvm_debug_vector)
547 551
548 .org kvm_ia64_ivt+0x5a00 552 .org kvm_ia64_ivt+0x5a00
549/////////////////////////////////////////////////////////////// 553///////////////////////////////////////////////////////////////
550// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) 554// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
551ENTRY(kvm_unaligned_access) 555ENTRY(kvm_unaligned_access)
552 KVM_REFLECT(30) 556 KVM_REFLECT(30)
553END(kvm_unaligned_access) 557END(kvm_unaligned_access)
554 558
555 .org kvm_ia64_ivt+0x5b00 559 .org kvm_ia64_ivt+0x5b00
556////////////////////////////////////////////////////////////////////// 560//////////////////////////////////////////////////////////////////////
557// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) 561// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
558ENTRY(kvm_unsupported_data_reference) 562ENTRY(kvm_unsupported_data_reference)
559 KVM_REFLECT(31) 563 KVM_REFLECT(31)
560END(kvm_unsupported_data_reference) 564END(kvm_unsupported_data_reference)
561 565
562 .org kvm_ia64_ivt+0x5c00 566 .org kvm_ia64_ivt+0x5c00
563//////////////////////////////////////////////////////////////////// 567////////////////////////////////////////////////////////////////////
564// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65) 568// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
565ENTRY(kvm_floating_point_fault) 569ENTRY(kvm_floating_point_fault)
566 KVM_REFLECT(32) 570 KVM_REFLECT(32)
567END(kvm_floating_point_fault) 571END(kvm_floating_point_fault)
568 572
569 .org kvm_ia64_ivt+0x5d00 573 .org kvm_ia64_ivt+0x5d00
570///////////////////////////////////////////////////////////////////// 574/////////////////////////////////////////////////////////////////////
571// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) 575// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
572ENTRY(kvm_floating_point_trap) 576ENTRY(kvm_floating_point_trap)
573 KVM_REFLECT(33) 577 KVM_REFLECT(33)
574END(kvm_floating_point_trap) 578END(kvm_floating_point_trap)
575 579
576 .org kvm_ia64_ivt+0x5e00 580 .org kvm_ia64_ivt+0x5e00
577////////////////////////////////////////////////////////////////////// 581//////////////////////////////////////////////////////////////////////
578// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) 582// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
579ENTRY(kvm_lower_privilege_trap) 583ENTRY(kvm_lower_privilege_trap)
580 KVM_REFLECT(34) 584 KVM_REFLECT(34)
581END(kvm_lower_privilege_trap) 585END(kvm_lower_privilege_trap)
582 586
583 .org kvm_ia64_ivt+0x5f00 587 .org kvm_ia64_ivt+0x5f00
584////////////////////////////////////////////////////////////////////// 588//////////////////////////////////////////////////////////////////////
585// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) 589// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
586ENTRY(kvm_taken_branch_trap) 590ENTRY(kvm_taken_branch_trap)
587 KVM_REFLECT(35) 591 KVM_REFLECT(35)
588END(kvm_taken_branch_trap) 592END(kvm_taken_branch_trap)
589 593
590 .org kvm_ia64_ivt+0x6000 594 .org kvm_ia64_ivt+0x6000
591//////////////////////////////////////////////////////////////////// 595////////////////////////////////////////////////////////////////////
592// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) 596// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
593ENTRY(kvm_single_step_trap) 597ENTRY(kvm_single_step_trap)
594 KVM_REFLECT(36) 598 KVM_REFLECT(36)
595END(kvm_single_step_trap) 599END(kvm_single_step_trap)
596 .global kvm_virtualization_fault_back 600 .global kvm_virtualization_fault_back
597 .org kvm_ia64_ivt+0x6100 601 .org kvm_ia64_ivt+0x6100
598///////////////////////////////////////////////////////////////////// 602/////////////////////////////////////////////////////////////////////
599// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault 603// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
600ENTRY(kvm_virtualization_fault) 604ENTRY(kvm_virtualization_fault)
601 mov r31=pr 605 mov r31=pr
602 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 606 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
603 ;; 607 ;;
604 st8 [r16] = r1 608 st8 [r16] = r1
605 adds r17 = VMM_VCPU_GP_OFFSET, r21 609 adds r17 = VMM_VCPU_GP_OFFSET, r21
606 ;; 610 ;;
607 ld8 r1 = [r17] 611 ld8 r1 = [r17]
608 cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24 612 cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
609 cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24 613 cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
610 cmp.eq p8,p0=EVENT_MOV_TO_RR,r24 614 cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
611 cmp.eq p9,p0=EVENT_RSM,r24 615 cmp.eq p9,p0=EVENT_RSM,r24
612 cmp.eq p10,p0=EVENT_SSM,r24 616 cmp.eq p10,p0=EVENT_SSM,r24
613 cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24 617 cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
614 cmp.eq p12,p0=EVENT_THASH,r24 618 cmp.eq p12,p0=EVENT_THASH,r24
615 (p6) br.dptk.many kvm_asm_mov_from_ar 619(p6) br.dptk.many kvm_asm_mov_from_ar
616 (p7) br.dptk.many kvm_asm_mov_from_rr 620(p7) br.dptk.many kvm_asm_mov_from_rr
617 (p8) br.dptk.many kvm_asm_mov_to_rr 621(p8) br.dptk.many kvm_asm_mov_to_rr
618 (p9) br.dptk.many kvm_asm_rsm 622(p9) br.dptk.many kvm_asm_rsm
619 (p10) br.dptk.many kvm_asm_ssm 623(p10) br.dptk.many kvm_asm_ssm
620 (p11) br.dptk.many kvm_asm_mov_to_psr 624(p11) br.dptk.many kvm_asm_mov_to_psr
621 (p12) br.dptk.many kvm_asm_thash 625(p12) br.dptk.many kvm_asm_thash
622 ;; 626 ;;
623kvm_virtualization_fault_back: 627kvm_virtualization_fault_back:
624 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 628 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
625 ;; 629 ;;
626 ld8 r1 = [r16] 630 ld8 r1 = [r16]
627 ;; 631 ;;
628 mov r19=37 632 mov r19=37
629 adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 633 adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
630 adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 634 adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
631 ;; 635 ;;
632 st8 [r16] = r24 636 st8 [r16] = r24
633 st8 [r17] = r25 637 st8 [r17] = r25
634 ;; 638 ;;
635 cmp.ne p6,p0=EVENT_RFI, r24 639 cmp.ne p6,p0=EVENT_RFI, r24
636 (p6) br.sptk kvm_dispatch_virtualization_fault 640(p6) br.sptk kvm_dispatch_virtualization_fault
637 ;; 641 ;;
638 adds r18=VMM_VPD_BASE_OFFSET,r21 642 adds r18=VMM_VPD_BASE_OFFSET,r21
639 ;; 643 ;;
640 ld8 r18=[r18] 644 ld8 r18=[r18]
641 ;; 645 ;;
642 adds r18=VMM_VPD_VIFS_OFFSET,r18 646 adds r18=VMM_VPD_VIFS_OFFSET,r18
643 ;; 647 ;;
644 ld8 r18=[r18] 648 ld8 r18=[r18]
645 ;; 649 ;;
646 tbit.z p6,p0=r18,63 650 tbit.z p6,p0=r18,63
647 (p6) br.sptk kvm_dispatch_virtualization_fault 651(p6) br.sptk kvm_dispatch_virtualization_fault
648 ;; 652 ;;
649 //if vifs.v=1 desert current register frame 653//if vifs.v=1 desert current register frame
650 alloc r18=ar.pfs,0,0,0,0 654 alloc r18=ar.pfs,0,0,0,0
651 br.sptk kvm_dispatch_virtualization_fault 655 br.sptk kvm_dispatch_virtualization_fault
652END(kvm_virtualization_fault) 656END(kvm_virtualization_fault)
653 657
654 .org kvm_ia64_ivt+0x6200 658 .org kvm_ia64_ivt+0x6200
655////////////////////////////////////////////////////////////// 659//////////////////////////////////////////////////////////////
656// 0x6200 Entry 38 (size 16 bundles) Reserved 660// 0x6200 Entry 38 (size 16 bundles) Reserved
657 KVM_FAULT(38) 661 KVM_FAULT(38)
658 662
659 .org kvm_ia64_ivt+0x6300 663 .org kvm_ia64_ivt+0x6300
660///////////////////////////////////////////////////////////////// 664/////////////////////////////////////////////////////////////////
661// 0x6300 Entry 39 (size 16 bundles) Reserved 665// 0x6300 Entry 39 (size 16 bundles) Reserved
662 KVM_FAULT(39) 666 KVM_FAULT(39)
663 667
664 .org kvm_ia64_ivt+0x6400 668 .org kvm_ia64_ivt+0x6400
665///////////////////////////////////////////////////////////////// 669/////////////////////////////////////////////////////////////////
666// 0x6400 Entry 40 (size 16 bundles) Reserved 670// 0x6400 Entry 40 (size 16 bundles) Reserved
667 KVM_FAULT(40) 671 KVM_FAULT(40)
668 672
669 .org kvm_ia64_ivt+0x6500 673 .org kvm_ia64_ivt+0x6500
670////////////////////////////////////////////////////////////////// 674//////////////////////////////////////////////////////////////////
671// 0x6500 Entry 41 (size 16 bundles) Reserved 675// 0x6500 Entry 41 (size 16 bundles) Reserved
672 KVM_FAULT(41) 676 KVM_FAULT(41)
673 677
674 .org kvm_ia64_ivt+0x6600 678 .org kvm_ia64_ivt+0x6600
675////////////////////////////////////////////////////////////////// 679//////////////////////////////////////////////////////////////////
676// 0x6600 Entry 42 (size 16 bundles) Reserved 680// 0x6600 Entry 42 (size 16 bundles) Reserved
677 KVM_FAULT(42) 681 KVM_FAULT(42)
678 682
679 .org kvm_ia64_ivt+0x6700 683 .org kvm_ia64_ivt+0x6700
680////////////////////////////////////////////////////////////////// 684//////////////////////////////////////////////////////////////////
681// 0x6700 Entry 43 (size 16 bundles) Reserved 685// 0x6700 Entry 43 (size 16 bundles) Reserved
682 KVM_FAULT(43) 686 KVM_FAULT(43)
683 687
684 .org kvm_ia64_ivt+0x6800 688 .org kvm_ia64_ivt+0x6800
685////////////////////////////////////////////////////////////////// 689//////////////////////////////////////////////////////////////////
686// 0x6800 Entry 44 (size 16 bundles) Reserved 690// 0x6800 Entry 44 (size 16 bundles) Reserved
687 KVM_FAULT(44) 691 KVM_FAULT(44)
688 692
689 .org kvm_ia64_ivt+0x6900 693 .org kvm_ia64_ivt+0x6900
690/////////////////////////////////////////////////////////////////// 694///////////////////////////////////////////////////////////////////
691// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception 695// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
692//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) 696//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
693ENTRY(kvm_ia32_exception) 697ENTRY(kvm_ia32_exception)
694 KVM_FAULT(45) 698 KVM_FAULT(45)
695END(kvm_ia32_exception) 699END(kvm_ia32_exception)
696 700
697 .org kvm_ia64_ivt+0x6a00 701 .org kvm_ia64_ivt+0x6a00
698//////////////////////////////////////////////////////////////////// 702////////////////////////////////////////////////////////////////////
699// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) 703// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
700ENTRY(kvm_ia32_intercept) 704ENTRY(kvm_ia32_intercept)
701 KVM_FAULT(47) 705 KVM_FAULT(47)
702END(kvm_ia32_intercept) 706END(kvm_ia32_intercept)
703 707
704 .org kvm_ia64_ivt+0x6c00 708 .org kvm_ia64_ivt+0x6c00
705///////////////////////////////////////////////////////////////////// 709/////////////////////////////////////////////////////////////////////
706// 0x6c00 Entry 48 (size 16 bundles) Reserved 710// 0x6c00 Entry 48 (size 16 bundles) Reserved
707 KVM_FAULT(48) 711 KVM_FAULT(48)
708 712
709 .org kvm_ia64_ivt+0x6d00 713 .org kvm_ia64_ivt+0x6d00
710////////////////////////////////////////////////////////////////////// 714//////////////////////////////////////////////////////////////////////
711// 0x6d00 Entry 49 (size 16 bundles) Reserved 715// 0x6d00 Entry 49 (size 16 bundles) Reserved
712 KVM_FAULT(49) 716 KVM_FAULT(49)
713 717
714 .org kvm_ia64_ivt+0x6e00 718 .org kvm_ia64_ivt+0x6e00
715////////////////////////////////////////////////////////////////////// 719//////////////////////////////////////////////////////////////////////
716// 0x6e00 Entry 50 (size 16 bundles) Reserved 720// 0x6e00 Entry 50 (size 16 bundles) Reserved
717 KVM_FAULT(50) 721 KVM_FAULT(50)
718 722
719 .org kvm_ia64_ivt+0x6f00 723 .org kvm_ia64_ivt+0x6f00
720///////////////////////////////////////////////////////////////////// 724/////////////////////////////////////////////////////////////////////
721// 0x6f00 Entry 51 (size 16 bundles) Reserved 725// 0x6f00 Entry 51 (size 16 bundles) Reserved
722 KVM_FAULT(52) 726 KVM_FAULT(52)
723 727
724 .org kvm_ia64_ivt+0x7100 728 .org kvm_ia64_ivt+0x7100
725//////////////////////////////////////////////////////////////////// 729////////////////////////////////////////////////////////////////////
726// 0x7100 Entry 53 (size 16 bundles) Reserved 730// 0x7100 Entry 53 (size 16 bundles) Reserved
727 KVM_FAULT(53) 731 KVM_FAULT(53)
728 732
729 .org kvm_ia64_ivt+0x7200 733 .org kvm_ia64_ivt+0x7200
730///////////////////////////////////////////////////////////////////// 734/////////////////////////////////////////////////////////////////////
731// 0x7200 Entry 54 (size 16 bundles) Reserved 735// 0x7200 Entry 54 (size 16 bundles) Reserved
732 KVM_FAULT(54) 736 KVM_FAULT(54)
733 737
734 .org kvm_ia64_ivt+0x7300 738 .org kvm_ia64_ivt+0x7300
735//////////////////////////////////////////////////////////////////// 739////////////////////////////////////////////////////////////////////
736// 0x7300 Entry 55 (size 16 bundles) Reserved 740// 0x7300 Entry 55 (size 16 bundles) Reserved
737 KVM_FAULT(55) 741 KVM_FAULT(55)
738 742
739 .org kvm_ia64_ivt+0x7400 743 .org kvm_ia64_ivt+0x7400
740//////////////////////////////////////////////////////////////////// 744////////////////////////////////////////////////////////////////////
741// 0x7400 Entry 56 (size 16 bundles) Reserved 745// 0x7400 Entry 56 (size 16 bundles) Reserved
742 KVM_FAULT(56) 746 KVM_FAULT(56)
743 747
744 .org kvm_ia64_ivt+0x7500 748 .org kvm_ia64_ivt+0x7500
745///////////////////////////////////////////////////////////////////// 749/////////////////////////////////////////////////////////////////////
746// 0x7500 Entry 57 (size 16 bundles) Reserved 750// 0x7500 Entry 57 (size 16 bundles) Reserved
747 KVM_FAULT(57) 751 KVM_FAULT(57)
748 752
749 .org kvm_ia64_ivt+0x7600 753 .org kvm_ia64_ivt+0x7600
750///////////////////////////////////////////////////////////////////// 754/////////////////////////////////////////////////////////////////////
751// 0x7600 Entry 58 (size 16 bundles) Reserved 755// 0x7600 Entry 58 (size 16 bundles) Reserved
752 KVM_FAULT(58) 756 KVM_FAULT(58)
753 757
754 .org kvm_ia64_ivt+0x7700 758 .org kvm_ia64_ivt+0x7700
755//////////////////////////////////////////////////////////////////// 759////////////////////////////////////////////////////////////////////
756// 0x7700 Entry 59 (size 16 bundles) Reserved 760// 0x7700 Entry 59 (size 16 bundles) Reserved
757 KVM_FAULT(59) 761 KVM_FAULT(59)
758 762
759 .org kvm_ia64_ivt+0x7800 763 .org kvm_ia64_ivt+0x7800
760//////////////////////////////////////////////////////////////////// 764////////////////////////////////////////////////////////////////////
761// 0x7800 Entry 60 (size 16 bundles) Reserved 765// 0x7800 Entry 60 (size 16 bundles) Reserved
762 KVM_FAULT(60) 766 KVM_FAULT(60)
763 767
764 .org kvm_ia64_ivt+0x7900 768 .org kvm_ia64_ivt+0x7900
765///////////////////////////////////////////////////////////////////// 769/////////////////////////////////////////////////////////////////////
766// 0x7900 Entry 61 (size 16 bundles) Reserved 770// 0x7900 Entry 61 (size 16 bundles) Reserved
767 KVM_FAULT(61) 771 KVM_FAULT(61)
768 772
769 .org kvm_ia64_ivt+0x7a00 773 .org kvm_ia64_ivt+0x7a00
770///////////////////////////////////////////////////////////////////// 774/////////////////////////////////////////////////////////////////////
771// 0x7a00 Entry 62 (size 16 bundles) Reserved 775// 0x7a00 Entry 62 (size 16 bundles) Reserved
772 KVM_FAULT(62) 776 KVM_FAULT(62)
773 777
774 .org kvm_ia64_ivt+0x7b00 778 .org kvm_ia64_ivt+0x7b00
775///////////////////////////////////////////////////////////////////// 779/////////////////////////////////////////////////////////////////////
776// 0x7b00 Entry 63 (size 16 bundles) Reserved 780// 0x7b00 Entry 63 (size 16 bundles) Reserved
777 KVM_FAULT(63) 781 KVM_FAULT(63)
778 782
779 .org kvm_ia64_ivt+0x7c00 783 .org kvm_ia64_ivt+0x7c00
780//////////////////////////////////////////////////////////////////// 784////////////////////////////////////////////////////////////////////
781// 0x7c00 Entry 64 (size 16 bundles) Reserved 785// 0x7c00 Entry 64 (size 16 bundles) Reserved
782 KVM_FAULT(64) 786 KVM_FAULT(64)
783 787
784 .org kvm_ia64_ivt+0x7d00 788 .org kvm_ia64_ivt+0x7d00
785///////////////////////////////////////////////////////////////////// 789/////////////////////////////////////////////////////////////////////
786// 0x7d00 Entry 65 (size 16 bundles) Reserved 790// 0x7d00 Entry 65 (size 16 bundles) Reserved
787 KVM_FAULT(65) 791 KVM_FAULT(65)
788 792
789 .org kvm_ia64_ivt+0x7e00 793 .org kvm_ia64_ivt+0x7e00
790///////////////////////////////////////////////////////////////////// 794/////////////////////////////////////////////////////////////////////
791// 0x7e00 Entry 66 (size 16 bundles) Reserved 795// 0x7e00 Entry 66 (size 16 bundles) Reserved
792 KVM_FAULT(66) 796 KVM_FAULT(66)
793 797
794 .org kvm_ia64_ivt+0x7f00 798 .org kvm_ia64_ivt+0x7f00
795//////////////////////////////////////////////////////////////////// 799////////////////////////////////////////////////////////////////////
796// 0x7f00 Entry 67 (size 16 bundles) Reserved 800// 0x7f00 Entry 67 (size 16 bundles) Reserved
797 KVM_FAULT(67) 801 KVM_FAULT(67)
798 802
799 .org kvm_ia64_ivt+0x8000 803 .org kvm_ia64_ivt+0x8000
800// There is no particular reason for this code to be here, other than that 804// There is no particular reason for this code to be here, other than that
@@ -804,132 +808,128 @@ END(kvm_ia32_intercept)
804 808
805 809
806ENTRY(kvm_dtlb_miss_dispatch) 810ENTRY(kvm_dtlb_miss_dispatch)
807 mov r19 = 2 811 mov r19 = 2
808 KVM_SAVE_MIN_WITH_COVER_R19 812 KVM_SAVE_MIN_WITH_COVER_R19
809 alloc r14=ar.pfs,0,0,3,0 813 alloc r14=ar.pfs,0,0,3,0
810 mov out0=cr.ifa 814 mov out0=cr.ifa
811 mov out1=r15 815 mov out1=r15
812 adds r3=8,r2 // set up second base pointer 816 adds r3=8,r2 // set up second base pointer
813 ;; 817 ;;
814 ssm psr.ic 818 ssm psr.ic
815 ;; 819 ;;
816 srlz.i // guarantee that interruption collection is on 820 srlz.i // guarantee that interruption collection is on
817 ;; 821 ;;
818 //(p15) ssm psr.i // restore psr.i 822 //(p15) ssm psr.i // restore psr.i
819 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp 823 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
820 ;; 824 ;;
821 KVM_SAVE_REST 825 KVM_SAVE_REST
822 KVM_SAVE_EXTRA 826 KVM_SAVE_EXTRA
823 mov rp=r14 827 mov rp=r14
824 ;; 828 ;;
825 adds out2=16,r12 829 adds out2=16,r12
826 br.call.sptk.many b6=kvm_page_fault 830 br.call.sptk.many b6=kvm_page_fault
827END(kvm_dtlb_miss_dispatch) 831END(kvm_dtlb_miss_dispatch)
828 832
829ENTRY(kvm_itlb_miss_dispatch) 833ENTRY(kvm_itlb_miss_dispatch)
830 834
831 KVM_SAVE_MIN_WITH_COVER_R19 835 KVM_SAVE_MIN_WITH_COVER_R19
832 alloc r14=ar.pfs,0,0,3,0 836 alloc r14=ar.pfs,0,0,3,0
833 mov out0=cr.ifa 837 mov out0=cr.ifa
834 mov out1=r15 838 mov out1=r15
835 adds r3=8,r2 // set up second base pointer 839 adds r3=8,r2 // set up second base pointer
836 ;; 840 ;;
837 ssm psr.ic 841 ssm psr.ic
838 ;; 842 ;;
839 srlz.i // guarantee that interruption collection is on 843 srlz.i // guarantee that interruption collection is on
840 ;; 844 ;;
841 //(p15) ssm psr.i // restore psr.i 845 //(p15) ssm psr.i // restore psr.i
842 addl r14=@gprel(ia64_leave_hypervisor),gp 846 addl r14=@gprel(ia64_leave_hypervisor),gp
843 ;; 847 ;;
844 KVM_SAVE_REST 848 KVM_SAVE_REST
845 mov rp=r14 849 mov rp=r14
846 ;; 850 ;;
847 adds out2=16,r12 851 adds out2=16,r12
848 br.call.sptk.many b6=kvm_page_fault 852 br.call.sptk.many b6=kvm_page_fault
849END(kvm_itlb_miss_dispatch) 853END(kvm_itlb_miss_dispatch)
850 854
851ENTRY(kvm_dispatch_reflection) 855ENTRY(kvm_dispatch_reflection)
852 /* 856/*
853 * Input: 857 * Input:
854 * psr.ic: off 858 * psr.ic: off
855 * r19: intr type (offset into ivt, see ia64_int.h) 859 * r19: intr type (offset into ivt, see ia64_int.h)
856 * r31: contains saved predicates (pr) 860 * r31: contains saved predicates (pr)
857 */ 861 */
858 KVM_SAVE_MIN_WITH_COVER_R19 862 KVM_SAVE_MIN_WITH_COVER_R19
859 alloc r14=ar.pfs,0,0,5,0 863 alloc r14=ar.pfs,0,0,5,0
860 mov out0=cr.ifa 864 mov out0=cr.ifa
861 mov out1=cr.isr 865 mov out1=cr.isr
862 mov out2=cr.iim 866 mov out2=cr.iim
863 mov out3=r15 867 mov out3=r15
864 adds r3=8,r2 // set up second base pointer 868 adds r3=8,r2 // set up second base pointer
865 ;; 869 ;;
866 ssm psr.ic 870 ssm psr.ic
867 ;; 871 ;;
868 srlz.i // guarantee that interruption collection is on 872 srlz.i // guarantee that interruption collection is on
869 ;; 873 ;;
870 //(p15) ssm psr.i // restore psr.i 874 //(p15) ssm psr.i // restore psr.i
871 addl r14=@gprel(ia64_leave_hypervisor),gp 875 addl r14=@gprel(ia64_leave_hypervisor),gp
872 ;; 876 ;;
873 KVM_SAVE_REST 877 KVM_SAVE_REST
874 mov rp=r14 878 mov rp=r14
875 ;; 879 ;;
876 adds out4=16,r12 880 adds out4=16,r12
877 br.call.sptk.many b6=reflect_interruption 881 br.call.sptk.many b6=reflect_interruption
878END(kvm_dispatch_reflection) 882END(kvm_dispatch_reflection)
879 883
880ENTRY(kvm_dispatch_virtualization_fault) 884ENTRY(kvm_dispatch_virtualization_fault)
881 adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 885 adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
882 adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 886 adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
883 ;; 887 ;;
884 st8 [r16] = r24 888 st8 [r16] = r24
885 st8 [r17] = r25 889 st8 [r17] = r25
886 ;; 890 ;;
887 KVM_SAVE_MIN_WITH_COVER_R19 891 KVM_SAVE_MIN_WITH_COVER_R19
888 ;; 892 ;;
889 alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) 893 alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!)
890 mov out0=r13 //vcpu 894 mov out0=r13 //vcpu
891 adds r3=8,r2 // set up second base pointer 895 adds r3=8,r2 // set up second base pointer
892 ;; 896 ;;
893 ssm psr.ic 897 ssm psr.ic
894 ;; 898 ;;
895 srlz.i // guarantee that interruption collection is on 899 srlz.i // guarantee that interruption collection is on
896 ;; 900 ;;
897 //(p15) ssm psr.i // restore psr.i 901 //(p15) ssm psr.i // restore psr.i
898 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp 902 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
899 ;; 903 ;;
900 KVM_SAVE_REST 904 KVM_SAVE_REST
901 KVM_SAVE_EXTRA 905 KVM_SAVE_EXTRA
902 mov rp=r14 906 mov rp=r14
903 ;; 907 ;;
904 adds out1=16,sp //regs 908 adds out1=16,sp //regs
905 br.call.sptk.many b6=kvm_emulate 909 br.call.sptk.many b6=kvm_emulate
906END(kvm_dispatch_virtualization_fault) 910END(kvm_dispatch_virtualization_fault)
907 911
908 912
909ENTRY(kvm_dispatch_interrupt) 913ENTRY(kvm_dispatch_interrupt)
910 KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 914 KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3
911 ;; 915 ;;
912 alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group 916 alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
913 //mov out0=cr.ivr // pass cr.ivr as first arg 917 adds r3=8,r2 // set up second base pointer for SAVE_REST
914 adds r3=8,r2 // set up second base pointer for SAVE_REST 918 ;;
915 ;; 919 ssm psr.ic
916 ssm psr.ic 920 ;;
917 ;; 921 srlz.i
918 srlz.i 922 ;;
919 ;; 923 //(p15) ssm psr.i
920 //(p15) ssm psr.i 924 addl r14=@gprel(ia64_leave_hypervisor),gp
921 addl r14=@gprel(ia64_leave_hypervisor),gp 925 ;;
922 ;; 926 KVM_SAVE_REST
923 KVM_SAVE_REST 927 mov rp=r14
924 mov rp=r14 928 ;;
925 ;; 929 mov out0=r13 // pass pointer to pt_regs as second arg
926 mov out0=r13 // pass pointer to pt_regs as second arg 930 br.call.sptk.many b6=kvm_ia64_handle_irq
927 br.call.sptk.many b6=kvm_ia64_handle_irq
928END(kvm_dispatch_interrupt) 931END(kvm_dispatch_interrupt)
929 932
930
931
932
933GLOBAL_ENTRY(ia64_leave_nested) 933GLOBAL_ENTRY(ia64_leave_nested)
934 rsm psr.i 934 rsm psr.i
935 ;; 935 ;;
@@ -1008,7 +1008,7 @@ GLOBAL_ENTRY(ia64_leave_nested)
1008 ;; 1008 ;;
1009 ldf.fill f11=[r2] 1009 ldf.fill f11=[r2]
1010// mov r18=r13 1010// mov r18=r13
1011// mov r21=r13 1011// mov r21=r13
1012 adds r16=PT(CR_IPSR)+16,r12 1012 adds r16=PT(CR_IPSR)+16,r12
1013 adds r17=PT(CR_IIP)+16,r12 1013 adds r17=PT(CR_IIP)+16,r12
1014 ;; 1014 ;;
@@ -1058,138 +1058,135 @@ GLOBAL_ENTRY(ia64_leave_nested)
1058 rfi 1058 rfi
1059END(ia64_leave_nested) 1059END(ia64_leave_nested)
1060 1060
1061
1062
1063GLOBAL_ENTRY(ia64_leave_hypervisor_prepare) 1061GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
1064 /* 1062/*
1065 * work.need_resched etc. mustn't get changed 1063 * work.need_resched etc. mustn't get changed
1066 *by this CPU before it returns to 1064 *by this CPU before it returns to
1067 ;; 1065 * user- or fsys-mode, hence we disable interrupts early on:
1068 * user- or fsys-mode, hence we disable interrupts early on: 1066 */
1069 */ 1067 adds r2 = PT(R4)+16,r12
1070 adds r2 = PT(R4)+16,r12 1068 adds r3 = PT(R5)+16,r12
1071 adds r3 = PT(R5)+16,r12 1069 adds r8 = PT(EML_UNAT)+16,r12
1072 adds r8 = PT(EML_UNAT)+16,r12 1070 ;;
1073 ;; 1071 ld8 r8 = [r8]
1074 ld8 r8 = [r8] 1072 ;;
1075 ;; 1073 mov ar.unat=r8
1076 mov ar.unat=r8 1074 ;;
1077 ;; 1075 ld8.fill r4=[r2],16 //load r4
1078 ld8.fill r4=[r2],16 //load r4 1076 ld8.fill r5=[r3],16 //load r5
1079 ld8.fill r5=[r3],16 //load r5 1077 ;;
1080 ;; 1078 ld8.fill r6=[r2] //load r6
1081 ld8.fill r6=[r2] //load r6 1079 ld8.fill r7=[r3] //load r7
1082 ld8.fill r7=[r3] //load r7 1080 ;;
1083 ;;
1084END(ia64_leave_hypervisor_prepare) 1081END(ia64_leave_hypervisor_prepare)
1085//fall through 1082//fall through
1086GLOBAL_ENTRY(ia64_leave_hypervisor) 1083GLOBAL_ENTRY(ia64_leave_hypervisor)
1087 rsm psr.i 1084 rsm psr.i
1088 ;; 1085 ;;
1089 br.call.sptk.many b0=leave_hypervisor_tail 1086 br.call.sptk.many b0=leave_hypervisor_tail
1090 ;; 1087 ;;
1091 adds r20=PT(PR)+16,r12 1088 adds r20=PT(PR)+16,r12
1092 adds r8=PT(EML_UNAT)+16,r12 1089 adds r8=PT(EML_UNAT)+16,r12
1093 ;; 1090 ;;
1094 ld8 r8=[r8] 1091 ld8 r8=[r8]
1095 ;; 1092 ;;
1096 mov ar.unat=r8 1093 mov ar.unat=r8
1097 ;; 1094 ;;
1098 lfetch [r20],PT(CR_IPSR)-PT(PR) 1095 lfetch [r20],PT(CR_IPSR)-PT(PR)
1099 adds r2 = PT(B6)+16,r12 1096 adds r2 = PT(B6)+16,r12
1100 adds r3 = PT(B7)+16,r12 1097 adds r3 = PT(B7)+16,r12
1101 ;; 1098 ;;
1102 lfetch [r20] 1099 lfetch [r20]
1103 ;; 1100 ;;
1104 ld8 r24=[r2],16 /* B6 */ 1101 ld8 r24=[r2],16 /* B6 */
1105 ld8 r25=[r3],16 /* B7 */ 1102 ld8 r25=[r3],16 /* B7 */
1106 ;; 1103 ;;
1107 ld8 r26=[r2],16 /* ar_csd */ 1104 ld8 r26=[r2],16 /* ar_csd */
1108 ld8 r27=[r3],16 /* ar_ssd */ 1105 ld8 r27=[r3],16 /* ar_ssd */
1109 mov b6 = r24 1106 mov b6 = r24
1110 ;; 1107 ;;
1111 ld8.fill r8=[r2],16 1108 ld8.fill r8=[r2],16
1112 ld8.fill r9=[r3],16 1109 ld8.fill r9=[r3],16
1113 mov b7 = r25 1110 mov b7 = r25
1114 ;; 1111 ;;
1115 mov ar.csd = r26 1112 mov ar.csd = r26
1116 mov ar.ssd = r27 1113 mov ar.ssd = r27
1117 ;; 1114 ;;
1118 ld8.fill r10=[r2],PT(R15)-PT(R10) 1115 ld8.fill r10=[r2],PT(R15)-PT(R10)
1119 ld8.fill r11=[r3],PT(R14)-PT(R11) 1116 ld8.fill r11=[r3],PT(R14)-PT(R11)
1120 ;; 1117 ;;
1121 ld8.fill r15=[r2],PT(R16)-PT(R15) 1118 ld8.fill r15=[r2],PT(R16)-PT(R15)
1122 ld8.fill r14=[r3],PT(R17)-PT(R14) 1119 ld8.fill r14=[r3],PT(R17)-PT(R14)
1123 ;; 1120 ;;
1124 ld8.fill r16=[r2],16 1121 ld8.fill r16=[r2],16
1125 ld8.fill r17=[r3],16 1122 ld8.fill r17=[r3],16
1126 ;; 1123 ;;
1127 ld8.fill r18=[r2],16 1124 ld8.fill r18=[r2],16
1128 ld8.fill r19=[r3],16 1125 ld8.fill r19=[r3],16
1129 ;; 1126 ;;
1130 ld8.fill r20=[r2],16 1127 ld8.fill r20=[r2],16
1131 ld8.fill r21=[r3],16 1128 ld8.fill r21=[r3],16
1132 ;; 1129 ;;
1133 ld8.fill r22=[r2],16 1130 ld8.fill r22=[r2],16
1134 ld8.fill r23=[r3],16 1131 ld8.fill r23=[r3],16
1135 ;; 1132 ;;
1136 ld8.fill r24=[r2],16 1133 ld8.fill r24=[r2],16
1137 ld8.fill r25=[r3],16 1134 ld8.fill r25=[r3],16
1138 ;; 1135 ;;
1139 ld8.fill r26=[r2],16 1136 ld8.fill r26=[r2],16
1140 ld8.fill r27=[r3],16 1137 ld8.fill r27=[r3],16
1141 ;; 1138 ;;
1142 ld8.fill r28=[r2],16 1139 ld8.fill r28=[r2],16
1143 ld8.fill r29=[r3],16 1140 ld8.fill r29=[r3],16
1144 ;; 1141 ;;
1145 ld8.fill r30=[r2],PT(F6)-PT(R30) 1142 ld8.fill r30=[r2],PT(F6)-PT(R30)
1146 ld8.fill r31=[r3],PT(F7)-PT(R31) 1143 ld8.fill r31=[r3],PT(F7)-PT(R31)
1147 ;; 1144 ;;
1148 rsm psr.i | psr.ic 1145 rsm psr.i | psr.ic
1149 // initiate turning off of interrupt and interruption collection 1146 // initiate turning off of interrupt and interruption collection
1150 invala // invalidate ALAT 1147 invala // invalidate ALAT
1151 ;; 1148 ;;
1152 srlz.i // ensure interruption collection is off 1149 srlz.i // ensure interruption collection is off
1153 ;; 1150 ;;
1154 bsw.0 1151 bsw.0
1155 ;; 1152 ;;
1156 adds r16 = PT(CR_IPSR)+16,r12 1153 adds r16 = PT(CR_IPSR)+16,r12
1157 adds r17 = PT(CR_IIP)+16,r12 1154 adds r17 = PT(CR_IIP)+16,r12
1158 mov r21=r13 // get current 1155 mov r21=r13 // get current
1159 ;; 1156 ;;
1160 ld8 r31=[r16],16 // load cr.ipsr 1157 ld8 r31=[r16],16 // load cr.ipsr
1161 ld8 r30=[r17],16 // load cr.iip 1158 ld8 r30=[r17],16 // load cr.iip
1162 ;; 1159 ;;
1163 ld8 r29=[r16],16 // load cr.ifs 1160 ld8 r29=[r16],16 // load cr.ifs
1164 ld8 r28=[r17],16 // load ar.unat 1161 ld8 r28=[r17],16 // load ar.unat
1165 ;; 1162 ;;
1166 ld8 r27=[r16],16 // load ar.pfs 1163 ld8 r27=[r16],16 // load ar.pfs
1167 ld8 r26=[r17],16 // load ar.rsc 1164 ld8 r26=[r17],16 // load ar.rsc
1168 ;; 1165 ;;
1169 ld8 r25=[r16],16 // load ar.rnat 1166 ld8 r25=[r16],16 // load ar.rnat
1170 ld8 r24=[r17],16 // load ar.bspstore 1167 ld8 r24=[r17],16 // load ar.bspstore
1171 ;; 1168 ;;
1172 ld8 r23=[r16],16 // load predicates 1169 ld8 r23=[r16],16 // load predicates
1173 ld8 r22=[r17],16 // load b0 1170 ld8 r22=[r17],16 // load b0
1174 ;; 1171 ;;
1175 ld8 r20=[r16],16 // load ar.rsc value for "loadrs" 1172 ld8 r20=[r16],16 // load ar.rsc value for "loadrs"
1176 ld8.fill r1=[r17],16 //load r1 1173 ld8.fill r1=[r17],16 //load r1
1177 ;; 1174 ;;
1178 ld8.fill r12=[r16],16 //load r12 1175 ld8.fill r12=[r16],16 //load r12
1179 ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13 1176 ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13
1180 ;; 1177 ;;
1181 ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr 1178 ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr
1182 ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2 1179 ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2
1183 ;; 1180 ;;
1184 ld8.fill r3=[r16] //load r3 1181 ld8.fill r3=[r16] //load r3
1185 ld8 r18=[r17] //load ar_ccv 1182 ld8 r18=[r17] //load ar_ccv
1186 ;; 1183 ;;
1187 mov ar.fpsr=r19 1184 mov ar.fpsr=r19
1188 mov ar.ccv=r18 1185 mov ar.ccv=r18
1189 shr.u r18=r20,16 1186 shr.u r18=r20,16
1190 ;; 1187 ;;
1191kvm_rbs_switch: 1188kvm_rbs_switch:
1192 mov r19=96 1189 mov r19=96
1193 1190
1194kvm_dont_preserve_current_frame: 1191kvm_dont_preserve_current_frame:
1195/* 1192/*
@@ -1201,76 +1198,76 @@ kvm_dont_preserve_current_frame:
1201# define pReturn p7 1198# define pReturn p7
1202# define Nregs 14 1199# define Nregs 14
1203 1200
1204 alloc loc0=ar.pfs,2,Nregs-2,2,0 1201 alloc loc0=ar.pfs,2,Nregs-2,2,0
1205 shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) 1202 shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8))
1206 sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize 1203 sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize
1207 ;; 1204 ;;
1208 mov ar.rsc=r20 // load ar.rsc to be used for "loadrs" 1205 mov ar.rsc=r20 // load ar.rsc to be used for "loadrs"
1209 shladd in0=loc1,3,r19 1206 shladd in0=loc1,3,r19
1210 mov in1=0 1207 mov in1=0
1211 ;; 1208 ;;
1212 TEXT_ALIGN(32) 1209 TEXT_ALIGN(32)
1213kvm_rse_clear_invalid: 1210kvm_rse_clear_invalid:
1214 alloc loc0=ar.pfs,2,Nregs-2,2,0 1211 alloc loc0=ar.pfs,2,Nregs-2,2,0
1215 cmp.lt pRecurse,p0=Nregs*8,in0 1212 cmp.lt pRecurse,p0=Nregs*8,in0
1216 // if more than Nregs regs left to clear, (re)curse 1213 // if more than Nregs regs left to clear, (re)curse
1217 add out0=-Nregs*8,in0 1214 add out0=-Nregs*8,in0
1218 add out1=1,in1 // increment recursion count 1215 add out1=1,in1 // increment recursion count
1219 mov loc1=0 1216 mov loc1=0
1220 mov loc2=0 1217 mov loc2=0
1221 ;; 1218 ;;
1222 mov loc3=0 1219 mov loc3=0
1223 mov loc4=0 1220 mov loc4=0
1224 mov loc5=0 1221 mov loc5=0
1225 mov loc6=0 1222 mov loc6=0
1226 mov loc7=0 1223 mov loc7=0
1227(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid 1224(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
1228 ;; 1225 ;;
1229 mov loc8=0 1226 mov loc8=0
1230 mov loc9=0 1227 mov loc9=0
1231 cmp.ne pReturn,p0=r0,in1 1228 cmp.ne pReturn,p0=r0,in1
1232 // if recursion count != 0, we need to do a br.ret 1229 // if recursion count != 0, we need to do a br.ret
1233 mov loc10=0 1230 mov loc10=0
1234 mov loc11=0 1231 mov loc11=0
1235(pReturn) br.ret.dptk.many b0 1232(pReturn) br.ret.dptk.many b0
1236 1233
1237# undef pRecurse 1234# undef pRecurse
1238# undef pReturn 1235# undef pReturn
1239 1236
1240// loadrs has already been shifted 1237// loadrs has already been shifted
1241 alloc r16=ar.pfs,0,0,0,0 // drop current register frame 1238 alloc r16=ar.pfs,0,0,0,0 // drop current register frame
1242 ;; 1239 ;;
1243 loadrs 1240 loadrs
1244 ;; 1241 ;;
1245 mov ar.bspstore=r24 1242 mov ar.bspstore=r24
1246 ;; 1243 ;;
1247 mov ar.unat=r28 1244 mov ar.unat=r28
1248 mov ar.rnat=r25 1245 mov ar.rnat=r25
1249 mov ar.rsc=r26 1246 mov ar.rsc=r26
1250 ;; 1247 ;;
1251 mov cr.ipsr=r31 1248 mov cr.ipsr=r31
1252 mov cr.iip=r30 1249 mov cr.iip=r30
1253 mov cr.ifs=r29 1250 mov cr.ifs=r29
1254 mov ar.pfs=r27 1251 mov ar.pfs=r27
1255 adds r18=VMM_VPD_BASE_OFFSET,r21 1252 adds r18=VMM_VPD_BASE_OFFSET,r21
1256 ;; 1253 ;;
1257 ld8 r18=[r18] //vpd 1254 ld8 r18=[r18] //vpd
1258 adds r17=VMM_VCPU_ISR_OFFSET,r21 1255 adds r17=VMM_VCPU_ISR_OFFSET,r21
1259 ;; 1256 ;;
1260 ld8 r17=[r17] 1257 ld8 r17=[r17]
1261 adds r19=VMM_VPD_VPSR_OFFSET,r18 1258 adds r19=VMM_VPD_VPSR_OFFSET,r18
1262 ;; 1259 ;;
1263 ld8 r19=[r19] //vpsr 1260 ld8 r19=[r19] //vpsr
1264 mov r25=r18 1261 mov r25=r18
1265 adds r16= VMM_VCPU_GP_OFFSET,r21 1262 adds r16= VMM_VCPU_GP_OFFSET,r21
1266 ;; 1263 ;;
1267 ld8 r16= [r16] // Put gp in r24 1264 ld8 r16= [r16] // Put gp in r24
1268 movl r24=@gprel(ia64_vmm_entry) // calculate return address 1265 movl r24=@gprel(ia64_vmm_entry) // calculate return address
1269 ;; 1266 ;;
1270 add r24=r24,r16 1267 add r24=r24,r16
1271 ;; 1268 ;;
1272 br.sptk.many kvm_vps_sync_write // call the service 1269 br.sptk.many kvm_vps_sync_write // call the service
1273 ;; 1270 ;;
1274END(ia64_leave_hypervisor) 1271END(ia64_leave_hypervisor)
1275// fall through 1272// fall through
1276GLOBAL_ENTRY(ia64_vmm_entry) 1273GLOBAL_ENTRY(ia64_vmm_entry)
@@ -1283,16 +1280,14 @@ GLOBAL_ENTRY(ia64_vmm_entry)
1283 * r22:b0 1280 * r22:b0
1284 * r23:predicate 1281 * r23:predicate
1285 */ 1282 */
1286 mov r24=r22 1283 mov r24=r22
1287 mov r25=r18 1284 mov r25=r18
1288 tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic 1285 tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic
1289 (p1) br.cond.sptk.few kvm_vps_resume_normal 1286(p1) br.cond.sptk.few kvm_vps_resume_normal
1290 (p2) br.cond.sptk.many kvm_vps_resume_handler 1287(p2) br.cond.sptk.many kvm_vps_resume_handler
1291 ;; 1288 ;;
1292END(ia64_vmm_entry) 1289END(ia64_vmm_entry)
1293 1290
1294
1295
1296/* 1291/*
1297 * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, 1292 * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
1298 * u64 arg3, u64 arg4, u64 arg5, 1293 * u64 arg3, u64 arg4, u64 arg5,
@@ -1310,88 +1305,88 @@ psrsave = loc2
1310entry = loc3 1305entry = loc3
1311hostret = r24 1306hostret = r24
1312 1307
1313 alloc pfssave=ar.pfs,4,4,0,0 1308 alloc pfssave=ar.pfs,4,4,0,0
1314 mov rpsave=rp 1309 mov rpsave=rp
1315 adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13 1310 adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
1316 ;; 1311 ;;
1317 ld8 entry=[entry] 1312 ld8 entry=[entry]
13181: mov hostret=ip 13131: mov hostret=ip
1319 mov r25=in1 // copy arguments 1314 mov r25=in1 // copy arguments
1320 mov r26=in2 1315 mov r26=in2
1321 mov r27=in3 1316 mov r27=in3
1322 mov psrsave=psr 1317 mov psrsave=psr
1323 ;; 1318 ;;
1324 tbit.nz p6,p0=psrsave,14 // IA64_PSR_I 1319 tbit.nz p6,p0=psrsave,14 // IA64_PSR_I
1325 tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC 1320 tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC
1326 ;; 1321 ;;
1327 add hostret=2f-1b,hostret // calculate return address 1322 add hostret=2f-1b,hostret // calculate return address
1328 add entry=entry,in0 1323 add entry=entry,in0
1329 ;; 1324 ;;
1330 rsm psr.i | psr.ic 1325 rsm psr.i | psr.ic
1331 ;; 1326 ;;
1332 srlz.i 1327 srlz.i
1333 mov b6=entry 1328 mov b6=entry
1334 br.cond.sptk b6 // call the service 1329 br.cond.sptk b6 // call the service
13352: 13302:
1336 // Architectural sequence for enabling interrupts if necessary 1331// Architectural sequence for enabling interrupts if necessary
1337(p7) ssm psr.ic 1332(p7) ssm psr.ic
1338 ;; 1333 ;;
1339(p7) srlz.i 1334(p7) srlz.i
1340 ;; 1335 ;;
1341//(p6) ssm psr.i 1336//(p6) ssm psr.i
1342 ;; 1337 ;;
1343 mov rp=rpsave 1338 mov rp=rpsave
1344 mov ar.pfs=pfssave 1339 mov ar.pfs=pfssave
1345 mov r8=r31 1340 mov r8=r31
1346 ;; 1341 ;;
1347 srlz.d 1342 srlz.d
1348 br.ret.sptk rp 1343 br.ret.sptk rp
1349 1344
1350END(ia64_call_vsa) 1345END(ia64_call_vsa)
1351 1346
1352#define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100) 1347#define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100)
1353 1348
1354GLOBAL_ENTRY(vmm_reset_entry) 1349GLOBAL_ENTRY(vmm_reset_entry)
1355 //set up ipsr, iip, vpd.vpsr, dcr 1350 //set up ipsr, iip, vpd.vpsr, dcr
1356 // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1 1351 // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
1357 // For DCR: all bits 0 1352 // For DCR: all bits 0
1358 bsw.0 1353 bsw.0
1359 ;; 1354 ;;
1360 mov r21 =r13 1355 mov r21 =r13
1361 adds r14=-VMM_PT_REGS_SIZE, r12 1356 adds r14=-VMM_PT_REGS_SIZE, r12
1362 ;; 1357 ;;
1363 movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1 1358 movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
1364 movl r10=0x8000000000000000 1359 movl r10=0x8000000000000000
1365 adds r16=PT(CR_IIP), r14 1360 adds r16=PT(CR_IIP), r14
1366 adds r20=PT(R1), r14 1361 adds r20=PT(R1), r14
1367 ;; 1362 ;;
1368 rsm psr.ic | psr.i 1363 rsm psr.ic | psr.i
1369 ;; 1364 ;;
1370 srlz.i 1365 srlz.i
1371 ;; 1366 ;;
1372 mov ar.rsc = 0 1367 mov ar.rsc = 0
1373 ;; 1368 ;;
1374 flushrs 1369 flushrs
1375 ;; 1370 ;;
1376 mov ar.bspstore = 0 1371 mov ar.bspstore = 0
1377 // clear BSPSTORE 1372 // clear BSPSTORE
1378 ;; 1373 ;;
1379 mov cr.ipsr=r6 1374 mov cr.ipsr=r6
1380 mov cr.ifs=r10 1375 mov cr.ifs=r10
1381 ld8 r4 = [r16] // Set init iip for first run. 1376 ld8 r4 = [r16] // Set init iip for first run.
1382 ld8 r1 = [r20] 1377 ld8 r1 = [r20]
1383 ;; 1378 ;;
1384 mov cr.iip=r4 1379 mov cr.iip=r4
1385 adds r16=VMM_VPD_BASE_OFFSET,r13 1380 adds r16=VMM_VPD_BASE_OFFSET,r13
1386 ;; 1381 ;;
1387 ld8 r18=[r16] 1382 ld8 r18=[r16]
1388 ;; 1383 ;;
1389 adds r19=VMM_VPD_VPSR_OFFSET,r18 1384 adds r19=VMM_VPD_VPSR_OFFSET,r18
1390 ;; 1385 ;;
1391 ld8 r19=[r19] 1386 ld8 r19=[r19]
1392 mov r17=r0 1387 mov r17=r0
1393 mov r22=r0 1388 mov r22=r0
1394 mov r23=r0 1389 mov r23=r0
1395 br.cond.sptk ia64_vmm_entry 1390 br.cond.sptk ia64_vmm_entry
1396 br.ret.sptk b0 1391 br.ret.sptk b0
1397END(vmm_reset_entry) 1392END(vmm_reset_entry)
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
index e22b93361e08..6b6307a3bd55 100644
--- a/arch/ia64/kvm/vtlb.c
+++ b/arch/ia64/kvm/vtlb.c
@@ -183,8 +183,8 @@ void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
183 u64 i, dirty_pages = 1; 183 u64 i, dirty_pages = 1;
184 u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT; 184 u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
185 spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa); 185 spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
186 void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE) 186 void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE;
187 + KVM_MEM_DIRTY_LOG_OFS; 187
188 dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT; 188 dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
189 189
190 vmm_spin_lock(lock); 190 vmm_spin_lock(lock);
diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h
new file mode 100644
index 000000000000..9b198d1b3b2b
--- /dev/null
+++ b/arch/powerpc/include/asm/disassemble.h
@@ -0,0 +1,80 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#ifndef __ASM_PPC_DISASSEMBLE_H__
21#define __ASM_PPC_DISASSEMBLE_H__
22
23#include <linux/types.h>
24
25static inline unsigned int get_op(u32 inst)
26{
27 return inst >> 26;
28}
29
30static inline unsigned int get_xop(u32 inst)
31{
32 return (inst >> 1) & 0x3ff;
33}
34
35static inline unsigned int get_sprn(u32 inst)
36{
37 return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
38}
39
40static inline unsigned int get_dcrn(u32 inst)
41{
42 return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
43}
44
45static inline unsigned int get_rt(u32 inst)
46{
47 return (inst >> 21) & 0x1f;
48}
49
50static inline unsigned int get_rs(u32 inst)
51{
52 return (inst >> 21) & 0x1f;
53}
54
55static inline unsigned int get_ra(u32 inst)
56{
57 return (inst >> 16) & 0x1f;
58}
59
60static inline unsigned int get_rb(u32 inst)
61{
62 return (inst >> 11) & 0x1f;
63}
64
65static inline unsigned int get_rc(u32 inst)
66{
67 return inst & 0x1;
68}
69
70static inline unsigned int get_ws(u32 inst)
71{
72 return (inst >> 11) & 0x1f;
73}
74
75static inline unsigned int get_d(u32 inst)
76{
77 return inst & 0xffff;
78}
79
80#endif /* __ASM_PPC_DISASSEMBLE_H__ */
diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
new file mode 100644
index 000000000000..f49031b632ca
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -0,0 +1,61 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#ifndef __ASM_44X_H__
21#define __ASM_44X_H__
22
23#include <linux/kvm_host.h>
24
25#define PPC44x_TLB_SIZE 64
26
27/* If the guest is expecting it, this can be as large as we like; we'd just
28 * need to find some way of advertising it. */
29#define KVM44x_GUEST_TLB_SIZE 64
30
31struct kvmppc_44x_shadow_ref {
32 struct page *page;
33 u16 gtlb_index;
34 u8 writeable;
35 u8 tid;
36};
37
38struct kvmppc_vcpu_44x {
39 /* Unmodified copy of the guest's TLB. */
40 struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE];
41
42 /* References to guest pages in the hardware TLB. */
43 struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE];
44
45 /* State of the shadow TLB at guest context switch time. */
46 struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE];
47 u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
48
49 struct kvm_vcpu vcpu;
50};
51
52static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
53{
54 return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
55}
56
57void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid);
58void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
59void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);
60
61#endif /* __ASM_44X_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 34b52b7180cd..c1e436fe7738 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -64,27 +64,58 @@ struct kvm_vcpu_stat {
64 u32 halt_wakeup; 64 u32 halt_wakeup;
65}; 65};
66 66
67struct tlbe { 67struct kvmppc_44x_tlbe {
68 u32 tid; /* Only the low 8 bits are used. */ 68 u32 tid; /* Only the low 8 bits are used. */
69 u32 word0; 69 u32 word0;
70 u32 word1; 70 u32 word1;
71 u32 word2; 71 u32 word2;
72}; 72};
73 73
74struct kvm_arch { 74enum kvm_exit_types {
75 MMIO_EXITS,
76 DCR_EXITS,
77 SIGNAL_EXITS,
78 ITLB_REAL_MISS_EXITS,
79 ITLB_VIRT_MISS_EXITS,
80 DTLB_REAL_MISS_EXITS,
81 DTLB_VIRT_MISS_EXITS,
82 SYSCALL_EXITS,
83 ISI_EXITS,
84 DSI_EXITS,
85 EMULATED_INST_EXITS,
86 EMULATED_MTMSRWE_EXITS,
87 EMULATED_WRTEE_EXITS,
88 EMULATED_MTSPR_EXITS,
89 EMULATED_MFSPR_EXITS,
90 EMULATED_MTMSR_EXITS,
91 EMULATED_MFMSR_EXITS,
92 EMULATED_TLBSX_EXITS,
93 EMULATED_TLBWE_EXITS,
94 EMULATED_RFI_EXITS,
95 DEC_EXITS,
96 EXT_INTR_EXITS,
97 HALT_WAKEUP,
98 USR_PR_INST,
99 FP_UNAVAIL,
100 DEBUG_EXITS,
101 TIMEINGUEST,
102 __NUMBER_OF_KVM_EXIT_TYPES
75}; 103};
76 104
77struct kvm_vcpu_arch { 105/* allow access to big endian 32bit upper/lower parts and 64bit var */
78 /* Unmodified copy of the guest's TLB. */ 106struct kvmppc_exit_timing {
79 struct tlbe guest_tlb[PPC44x_TLB_SIZE]; 107 union {
80 /* TLB that's actually used when the guest is running. */ 108 u64 tv64;
81 struct tlbe shadow_tlb[PPC44x_TLB_SIZE]; 109 struct {
82 /* Pages which are referenced in the shadow TLB. */ 110 u32 tbu, tbl;
83 struct page *shadow_pages[PPC44x_TLB_SIZE]; 111 } tv32;
112 };
113};
84 114
85 /* Track which TLB entries we've modified in the current exit. */ 115struct kvm_arch {
86 u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; 116};
87 117
118struct kvm_vcpu_arch {
88 u32 host_stack; 119 u32 host_stack;
89 u32 host_pid; 120 u32 host_pid;
90 u32 host_dbcr0; 121 u32 host_dbcr0;
@@ -94,32 +125,32 @@ struct kvm_vcpu_arch {
94 u32 host_msr; 125 u32 host_msr;
95 126
96 u64 fpr[32]; 127 u64 fpr[32];
97 u32 gpr[32]; 128 ulong gpr[32];
98 129
99 u32 pc; 130 ulong pc;
100 u32 cr; 131 u32 cr;
101 u32 ctr; 132 ulong ctr;
102 u32 lr; 133 ulong lr;
103 u32 xer; 134 ulong xer;
104 135
105 u32 msr; 136 ulong msr;
106 u32 mmucr; 137 u32 mmucr;
107 u32 sprg0; 138 ulong sprg0;
108 u32 sprg1; 139 ulong sprg1;
109 u32 sprg2; 140 ulong sprg2;
110 u32 sprg3; 141 ulong sprg3;
111 u32 sprg4; 142 ulong sprg4;
112 u32 sprg5; 143 ulong sprg5;
113 u32 sprg6; 144 ulong sprg6;
114 u32 sprg7; 145 ulong sprg7;
115 u32 srr0; 146 ulong srr0;
116 u32 srr1; 147 ulong srr1;
117 u32 csrr0; 148 ulong csrr0;
118 u32 csrr1; 149 ulong csrr1;
119 u32 dsrr0; 150 ulong dsrr0;
120 u32 dsrr1; 151 ulong dsrr1;
121 u32 dear; 152 ulong dear;
122 u32 esr; 153 ulong esr;
123 u32 dec; 154 u32 dec;
124 u32 decar; 155 u32 decar;
125 u32 tbl; 156 u32 tbl;
@@ -127,7 +158,7 @@ struct kvm_vcpu_arch {
127 u32 tcr; 158 u32 tcr;
128 u32 tsr; 159 u32 tsr;
129 u32 ivor[16]; 160 u32 ivor[16];
130 u32 ivpr; 161 ulong ivpr;
131 u32 pir; 162 u32 pir;
132 163
133 u32 shadow_pid; 164 u32 shadow_pid;
@@ -140,9 +171,22 @@ struct kvm_vcpu_arch {
140 u32 dbcr0; 171 u32 dbcr0;
141 u32 dbcr1; 172 u32 dbcr1;
142 173
174#ifdef CONFIG_KVM_EXIT_TIMING
175 struct kvmppc_exit_timing timing_exit;
176 struct kvmppc_exit_timing timing_last_enter;
177 u32 last_exit_type;
178 u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES];
179 u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES];
180 u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES];
181 u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
182 u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
183 u64 timing_last_exit;
184 struct dentry *debugfs_exit_timing;
185#endif
186
143 u32 last_inst; 187 u32 last_inst;
144 u32 fault_dear; 188 ulong fault_dear;
145 u32 fault_esr; 189 ulong fault_esr;
146 gpa_t paddr_accessed; 190 gpa_t paddr_accessed;
147 191
148 u8 io_gpr; /* GPR used as IO source/target */ 192 u8 io_gpr; /* GPR used as IO source/target */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index bb62ad876de3..36d2a50a8487 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -29,11 +29,6 @@
29#include <linux/kvm_types.h> 29#include <linux/kvm_types.h>
30#include <linux/kvm_host.h> 30#include <linux/kvm_host.h>
31 31
32struct kvm_tlb {
33 struct tlbe guest_tlb[PPC44x_TLB_SIZE];
34 struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
35};
36
37enum emulation_result { 32enum emulation_result {
38 EMULATE_DONE, /* no further processing */ 33 EMULATE_DONE, /* no further processing */
39 EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ 34 EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */
@@ -41,9 +36,6 @@ enum emulation_result {
41 EMULATE_FAIL, /* can't emulate this instruction */ 36 EMULATE_FAIL, /* can't emulate this instruction */
42}; 37};
43 38
44extern const unsigned char exception_priority[];
45extern const unsigned char priority_exception[];
46
47extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); 39extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
48extern char kvmppc_handlers_start[]; 40extern char kvmppc_handlers_start[];
49extern unsigned long kvmppc_handler_len; 41extern unsigned long kvmppc_handler_len;
@@ -58,51 +50,44 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
58extern int kvmppc_emulate_instruction(struct kvm_run *run, 50extern int kvmppc_emulate_instruction(struct kvm_run *run,
59 struct kvm_vcpu *vcpu); 51 struct kvm_vcpu *vcpu);
60extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); 52extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
53extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
61 54
62extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, 55extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
63 u64 asid, u32 flags); 56 u64 asid, u32 flags, u32 max_bytes,
64extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, 57 unsigned int gtlb_idx);
65 gva_t eend, u32 asid);
66extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); 58extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
67extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); 59extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
68 60
69/* XXX Book E specific */ 61/* Core-specific hooks */
70extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i); 62
71 63extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
72extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu); 64 unsigned int id);
73 65extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu);
74static inline void kvmppc_queue_exception(struct kvm_vcpu *vcpu, int exception) 66extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu);
75{ 67extern int kvmppc_core_check_processor_compat(void);
76 unsigned int priority = exception_priority[exception]; 68extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
77 set_bit(priority, &vcpu->arch.pending_exceptions); 69 struct kvm_translation *tr);
78} 70
79 71extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
80static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception) 72extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
81{ 73
82 unsigned int priority = exception_priority[exception]; 74extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu);
83 clear_bit(priority, &vcpu->arch.pending_exceptions); 75extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu);
84} 76
85 77extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
86/* Helper function for "full" MSR writes. No need to call this if only EE is 78extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
87 * changing. */ 79extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu);
88static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) 80extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
89{ 81extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
90 if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) 82 struct kvm_interrupt *irq);
91 kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); 83
92 84extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
93 vcpu->arch.msr = new_msr; 85 unsigned int op, int *advance);
94 86extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
95 if (vcpu->arch.msr & MSR_WE) 87extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
96 kvm_vcpu_block(vcpu); 88
97} 89extern int kvmppc_booke_init(void);
98 90extern void kvmppc_booke_exit(void);
99static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
100{
101 if (vcpu->arch.pid != new_pid) {
102 vcpu->arch.pid = new_pid;
103 vcpu->arch.swap_pid = 1;
104 }
105}
106 91
107extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); 92extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
108 93
diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h
index 8a97cfb08b7e..27cc6fdcd3b7 100644
--- a/arch/powerpc/include/asm/mmu-44x.h
+++ b/arch/powerpc/include/asm/mmu-44x.h
@@ -56,6 +56,7 @@
56#ifndef __ASSEMBLY__ 56#ifndef __ASSEMBLY__
57 57
58extern unsigned int tlb_44x_hwater; 58extern unsigned int tlb_44x_hwater;
59extern unsigned int tlb_44x_index;
59 60
60typedef struct { 61typedef struct {
61 unsigned int id; 62 unsigned int id;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 661d07d2146b..9937fe44555f 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -23,9 +23,6 @@
23#include <linux/mm.h> 23#include <linux/mm.h>
24#include <linux/suspend.h> 24#include <linux/suspend.h>
25#include <linux/hrtimer.h> 25#include <linux/hrtimer.h>
26#ifdef CONFIG_KVM
27#include <linux/kvm_host.h>
28#endif
29#ifdef CONFIG_PPC64 26#ifdef CONFIG_PPC64
30#include <linux/time.h> 27#include <linux/time.h>
31#include <linux/hardirq.h> 28#include <linux/hardirq.h>
@@ -51,6 +48,9 @@
51#ifdef CONFIG_PPC_ISERIES 48#ifdef CONFIG_PPC_ISERIES
52#include <asm/iseries/alpaca.h> 49#include <asm/iseries/alpaca.h>
53#endif 50#endif
51#ifdef CONFIG_KVM
52#include <asm/kvm_44x.h>
53#endif
54 54
55#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) 55#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
56#include "head_booke.h" 56#include "head_booke.h"
@@ -357,12 +357,10 @@ int main(void)
357 DEFINE(PTE_SIZE, sizeof(pte_t)); 357 DEFINE(PTE_SIZE, sizeof(pte_t));
358 358
359#ifdef CONFIG_KVM 359#ifdef CONFIG_KVM
360 DEFINE(TLBE_BYTES, sizeof(struct tlbe)); 360 DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
361 361
362 DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); 362 DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
363 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); 363 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
364 DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb));
365 DEFINE(VCPU_SHADOW_MOD, offsetof(struct kvm_vcpu, arch.shadow_tlb_mod));
366 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); 364 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
367 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); 365 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
368 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); 366 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
@@ -385,5 +383,16 @@ int main(void)
385 DEFINE(PTE_T_LOG2, PTE_T_LOG2); 383 DEFINE(PTE_T_LOG2, PTE_T_LOG2);
386#endif 384#endif
387 385
386#ifdef CONFIG_KVM_EXIT_TIMING
387 DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
388 arch.timing_exit.tv32.tbu));
389 DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu,
390 arch.timing_exit.tv32.tbl));
391 DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu,
392 arch.timing_last_enter.tv32.tbu));
393 DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu,
394 arch.timing_last_enter.tv32.tbl));
395#endif
396
388 return 0; 397 return 0;
389} 398}
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
new file mode 100644
index 000000000000..a66bec57265a
--- /dev/null
+++ b/arch/powerpc/kvm/44x.c
@@ -0,0 +1,228 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#include <linux/kvm_host.h>
21#include <linux/err.h>
22
23#include <asm/reg.h>
24#include <asm/cputable.h>
25#include <asm/tlbflush.h>
26#include <asm/kvm_44x.h>
27#include <asm/kvm_ppc.h>
28
29#include "44x_tlb.h"
30
31/* Note: clearing MSR[DE] just means that the debug interrupt will not be
32 * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
33 * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
34 * will be delivered as an "imprecise debug event" (which is indicated by
35 * DBSR[IDE].
36 */
37static void kvm44x_disable_debug_interrupts(void)
38{
39 mtmsr(mfmsr() & ~MSR_DE);
40}
41
42void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
43{
44 kvm44x_disable_debug_interrupts();
45
46 mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
47 mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
48 mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
49 mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
50 mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
51 mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
52 mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
53 mtmsr(vcpu->arch.host_msr);
54}
55
56void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
57{
58 struct kvm_guest_debug *dbg = &vcpu->guest_debug;
59 u32 dbcr0 = 0;
60
61 vcpu->arch.host_msr = mfmsr();
62 kvm44x_disable_debug_interrupts();
63
64 /* Save host debug register state. */
65 vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
66 vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
67 vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
68 vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
69 vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
70 vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
71 vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
72
73 /* set registers up for guest */
74
75 if (dbg->bp[0]) {
76 mtspr(SPRN_IAC1, dbg->bp[0]);
77 dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
78 }
79 if (dbg->bp[1]) {
80 mtspr(SPRN_IAC2, dbg->bp[1]);
81 dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
82 }
83 if (dbg->bp[2]) {
84 mtspr(SPRN_IAC3, dbg->bp[2]);
85 dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
86 }
87 if (dbg->bp[3]) {
88 mtspr(SPRN_IAC4, dbg->bp[3]);
89 dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
90 }
91
92 mtspr(SPRN_DBCR0, dbcr0);
93 mtspr(SPRN_DBCR1, 0);
94 mtspr(SPRN_DBCR2, 0);
95}
96
97void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
98{
99 kvmppc_44x_tlb_load(vcpu);
100}
101
102void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
103{
104 kvmppc_44x_tlb_put(vcpu);
105}
106
107int kvmppc_core_check_processor_compat(void)
108{
109 int r;
110
111 if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
112 r = 0;
113 else
114 r = -ENOTSUPP;
115
116 return r;
117}
118
119int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
120{
121 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
122 struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0];
123 int i;
124
125 tlbe->tid = 0;
126 tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
127 tlbe->word1 = 0;
128 tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
129
130 tlbe++;
131 tlbe->tid = 0;
132 tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
133 tlbe->word1 = 0xef600000;
134 tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
135 | PPC44x_TLB_I | PPC44x_TLB_G;
136
137 /* Since the guest can directly access the timebase, it must know the
138 * real timebase frequency. Accordingly, it must see the state of
139 * CCR1[TCS]. */
140 vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
141
142 for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
143 vcpu_44x->shadow_refs[i].gtlb_index = -1;
144
145 return 0;
146}
147
148/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
149int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
150 struct kvm_translation *tr)
151{
152 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
153 struct kvmppc_44x_tlbe *gtlbe;
154 int index;
155 gva_t eaddr;
156 u8 pid;
157 u8 as;
158
159 eaddr = tr->linear_address;
160 pid = (tr->linear_address >> 32) & 0xff;
161 as = (tr->linear_address >> 40) & 0x1;
162
163 index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
164 if (index == -1) {
165 tr->valid = 0;
166 return 0;
167 }
168
169 gtlbe = &vcpu_44x->guest_tlb[index];
170
171 tr->physical_address = tlb_xlate(gtlbe, eaddr);
172 /* XXX what does "writeable" and "usermode" even mean? */
173 tr->valid = 1;
174
175 return 0;
176}
177
178struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
179{
180 struct kvmppc_vcpu_44x *vcpu_44x;
181 struct kvm_vcpu *vcpu;
182 int err;
183
184 vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
185 if (!vcpu_44x) {
186 err = -ENOMEM;
187 goto out;
188 }
189
190 vcpu = &vcpu_44x->vcpu;
191 err = kvm_vcpu_init(vcpu, kvm, id);
192 if (err)
193 goto free_vcpu;
194
195 return vcpu;
196
197free_vcpu:
198 kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
199out:
200 return ERR_PTR(err);
201}
202
203void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
204{
205 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
206
207 kvm_vcpu_uninit(vcpu);
208 kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
209}
210
211static int kvmppc_44x_init(void)
212{
213 int r;
214
215 r = kvmppc_booke_init();
216 if (r)
217 return r;
218
219 return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE);
220}
221
222static void kvmppc_44x_exit(void)
223{
224 kvmppc_booke_exit();
225}
226
227module_init(kvmppc_44x_init);
228module_exit(kvmppc_44x_exit);
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
new file mode 100644
index 000000000000..82489a743a6f
--- /dev/null
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -0,0 +1,371 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#include <asm/kvm_ppc.h>
21#include <asm/dcr.h>
22#include <asm/dcr-regs.h>
23#include <asm/disassemble.h>
24#include <asm/kvm_44x.h>
25#include "timing.h"
26
27#include "booke.h"
28#include "44x_tlb.h"
29
30#define OP_RFI 19
31
32#define XOP_RFI 50
33#define XOP_MFMSR 83
34#define XOP_WRTEE 131
35#define XOP_MTMSR 146
36#define XOP_WRTEEI 163
37#define XOP_MFDCR 323
38#define XOP_MTDCR 451
39#define XOP_TLBSX 914
40#define XOP_ICCCI 966
41#define XOP_TLBWE 978
42
43static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
44{
45 vcpu->arch.pc = vcpu->arch.srr0;
46 kvmppc_set_msr(vcpu, vcpu->arch.srr1);
47}
48
49int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
50 unsigned int inst, int *advance)
51{
52 int emulated = EMULATE_DONE;
53 int dcrn;
54 int ra;
55 int rb;
56 int rc;
57 int rs;
58 int rt;
59 int ws;
60
61 switch (get_op(inst)) {
62 case OP_RFI:
63 switch (get_xop(inst)) {
64 case XOP_RFI:
65 kvmppc_emul_rfi(vcpu);
66 kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
67 *advance = 0;
68 break;
69
70 default:
71 emulated = EMULATE_FAIL;
72 break;
73 }
74 break;
75
76 case 31:
77 switch (get_xop(inst)) {
78
79 case XOP_MFMSR:
80 rt = get_rt(inst);
81 vcpu->arch.gpr[rt] = vcpu->arch.msr;
82 kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
83 break;
84
85 case XOP_MTMSR:
86 rs = get_rs(inst);
87 kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
88 kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
89 break;
90
91 case XOP_WRTEE:
92 rs = get_rs(inst);
93 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
94 | (vcpu->arch.gpr[rs] & MSR_EE);
95 kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
96 break;
97
98 case XOP_WRTEEI:
99 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
100 | (inst & MSR_EE);
101 kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
102 break;
103
104 case XOP_MFDCR:
105 dcrn = get_dcrn(inst);
106 rt = get_rt(inst);
107
108 /* The guest may access CPR0 registers to determine the timebase
109 * frequency, and it must know the real host frequency because it
110 * can directly access the timebase registers.
111 *
112 * It would be possible to emulate those accesses in userspace,
113 * but userspace can really only figure out the end frequency.
114 * We could decompose that into the factors that compute it, but
115 * that's tricky math, and it's easier to just report the real
116 * CPR0 values.
117 */
118 switch (dcrn) {
119 case DCRN_CPR0_CONFIG_ADDR:
120 vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
121 break;
122 case DCRN_CPR0_CONFIG_DATA:
123 local_irq_disable();
124 mtdcr(DCRN_CPR0_CONFIG_ADDR,
125 vcpu->arch.cpr0_cfgaddr);
126 vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
127 local_irq_enable();
128 break;
129 default:
130 run->dcr.dcrn = dcrn;
131 run->dcr.data = 0;
132 run->dcr.is_write = 0;
133 vcpu->arch.io_gpr = rt;
134 vcpu->arch.dcr_needed = 1;
135 kvmppc_account_exit(vcpu, DCR_EXITS);
136 emulated = EMULATE_DO_DCR;
137 }
138
139 break;
140
141 case XOP_MTDCR:
142 dcrn = get_dcrn(inst);
143 rs = get_rs(inst);
144
145 /* emulate some access in kernel */
146 switch (dcrn) {
147 case DCRN_CPR0_CONFIG_ADDR:
148 vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
149 break;
150 default:
151 run->dcr.dcrn = dcrn;
152 run->dcr.data = vcpu->arch.gpr[rs];
153 run->dcr.is_write = 1;
154 vcpu->arch.dcr_needed = 1;
155 kvmppc_account_exit(vcpu, DCR_EXITS);
156 emulated = EMULATE_DO_DCR;
157 }
158
159 break;
160
161 case XOP_TLBWE:
162 ra = get_ra(inst);
163 rs = get_rs(inst);
164 ws = get_ws(inst);
165 emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws);
166 break;
167
168 case XOP_TLBSX:
169 rt = get_rt(inst);
170 ra = get_ra(inst);
171 rb = get_rb(inst);
172 rc = get_rc(inst);
173 emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc);
174 break;
175
176 case XOP_ICCCI:
177 break;
178
179 default:
180 emulated = EMULATE_FAIL;
181 }
182
183 break;
184
185 default:
186 emulated = EMULATE_FAIL;
187 }
188
189 return emulated;
190}
191
192int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
193{
194 switch (sprn) {
195 case SPRN_MMUCR:
196 vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
197 case SPRN_PID:
198 kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
199 case SPRN_CCR0:
200 vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
201 case SPRN_CCR1:
202 vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
203 case SPRN_DEAR:
204 vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
205 case SPRN_ESR:
206 vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
207 case SPRN_DBCR0:
208 vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
209 case SPRN_DBCR1:
210 vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
211 case SPRN_TSR:
212 vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
213 case SPRN_TCR:
214 vcpu->arch.tcr = vcpu->arch.gpr[rs];
215 kvmppc_emulate_dec(vcpu);
216 break;
217
218 /* Note: SPRG4-7 are user-readable. These values are
219 * loaded into the real SPRGs when resuming the
220 * guest. */
221 case SPRN_SPRG4:
222 vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
223 case SPRN_SPRG5:
224 vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
225 case SPRN_SPRG6:
226 vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
227 case SPRN_SPRG7:
228 vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
229
230 case SPRN_IVPR:
231 vcpu->arch.ivpr = vcpu->arch.gpr[rs];
232 break;
233 case SPRN_IVOR0:
234 vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
235 break;
236 case SPRN_IVOR1:
237 vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
238 break;
239 case SPRN_IVOR2:
240 vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
241 break;
242 case SPRN_IVOR3:
243 vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
244 break;
245 case SPRN_IVOR4:
246 vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
247 break;
248 case SPRN_IVOR5:
249 vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
250 break;
251 case SPRN_IVOR6:
252 vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
253 break;
254 case SPRN_IVOR7:
255 vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
256 break;
257 case SPRN_IVOR8:
258 vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
259 break;
260 case SPRN_IVOR9:
261 vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
262 break;
263 case SPRN_IVOR10:
264 vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
265 break;
266 case SPRN_IVOR11:
267 vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
268 break;
269 case SPRN_IVOR12:
270 vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
271 break;
272 case SPRN_IVOR13:
273 vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
274 break;
275 case SPRN_IVOR14:
276 vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
277 break;
278 case SPRN_IVOR15:
279 vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
280 break;
281
282 default:
283 return EMULATE_FAIL;
284 }
285
286 kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
287 return EMULATE_DONE;
288}
289
290int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
291{
292 switch (sprn) {
293 /* 440 */
294 case SPRN_MMUCR:
295 vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
296 case SPRN_CCR0:
297 vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
298 case SPRN_CCR1:
299 vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
300
301 /* Book E */
302 case SPRN_PID:
303 vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
304 case SPRN_IVPR:
305 vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
306 case SPRN_DEAR:
307 vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
308 case SPRN_ESR:
309 vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
310 case SPRN_DBCR0:
311 vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
312 case SPRN_DBCR1:
313 vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
314
315 case SPRN_IVOR0:
316 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
317 break;
318 case SPRN_IVOR1:
319 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
320 break;
321 case SPRN_IVOR2:
322 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
323 break;
324 case SPRN_IVOR3:
325 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
326 break;
327 case SPRN_IVOR4:
328 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
329 break;
330 case SPRN_IVOR5:
331 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
332 break;
333 case SPRN_IVOR6:
334 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
335 break;
336 case SPRN_IVOR7:
337 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
338 break;
339 case SPRN_IVOR8:
340 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
341 break;
342 case SPRN_IVOR9:
343 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
344 break;
345 case SPRN_IVOR10:
346 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
347 break;
348 case SPRN_IVOR11:
349 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
350 break;
351 case SPRN_IVOR12:
352 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
353 break;
354 case SPRN_IVOR13:
355 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
356 break;
357 case SPRN_IVOR14:
358 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
359 break;
360 case SPRN_IVOR15:
361 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
362 break;
363
364 default:
365 return EMULATE_FAIL;
366 }
367
368 kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
369 return EMULATE_DONE;
370}
371
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index ad72c6f9811f..9a34b8edb9e2 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -22,20 +22,103 @@
22#include <linux/kvm.h> 22#include <linux/kvm.h>
23#include <linux/kvm_host.h> 23#include <linux/kvm_host.h>
24#include <linux/highmem.h> 24#include <linux/highmem.h>
25
26#include <asm/tlbflush.h>
25#include <asm/mmu-44x.h> 27#include <asm/mmu-44x.h>
26#include <asm/kvm_ppc.h> 28#include <asm/kvm_ppc.h>
29#include <asm/kvm_44x.h>
30#include "timing.h"
27 31
28#include "44x_tlb.h" 32#include "44x_tlb.h"
29 33
34#ifndef PPC44x_TLBE_SIZE
35#define PPC44x_TLBE_SIZE PPC44x_TLB_4K
36#endif
37
38#define PAGE_SIZE_4K (1<<12)
39#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1))
40
41#define PPC44x_TLB_UATTR_MASK \
42 (PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3)
30#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW) 43#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
31#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW) 44#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW)
32 45
33static unsigned int kvmppc_tlb_44x_pos; 46#ifdef DEBUG
47void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
48{
49 struct kvmppc_44x_tlbe *tlbe;
50 int i;
51
52 printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
53 printk("| %2s | %3s | %8s | %8s | %8s |\n",
54 "nr", "tid", "word0", "word1", "word2");
55
56 for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
57 tlbe = &vcpu_44x->guest_tlb[i];
58 if (tlbe->word0 & PPC44x_TLB_VALID)
59 printk(" G%2d | %02X | %08X | %08X | %08X |\n",
60 i, tlbe->tid, tlbe->word0, tlbe->word1,
61 tlbe->word2);
62 }
63}
64#endif
65
66static inline void kvmppc_44x_tlbie(unsigned int index)
67{
68 /* 0 <= index < 64, so the V bit is clear and we can use the index as
69 * word0. */
70 asm volatile(
71 "tlbwe %[index], %[index], 0\n"
72 :
73 : [index] "r"(index)
74 );
75}
76
77static inline void kvmppc_44x_tlbre(unsigned int index,
78 struct kvmppc_44x_tlbe *tlbe)
79{
80 asm volatile(
81 "tlbre %[word0], %[index], 0\n"
82 "mfspr %[tid], %[sprn_mmucr]\n"
83 "andi. %[tid], %[tid], 0xff\n"
84 "tlbre %[word1], %[index], 1\n"
85 "tlbre %[word2], %[index], 2\n"
86 : [word0] "=r"(tlbe->word0),
87 [word1] "=r"(tlbe->word1),
88 [word2] "=r"(tlbe->word2),
89 [tid] "=r"(tlbe->tid)
90 : [index] "r"(index),
91 [sprn_mmucr] "i"(SPRN_MMUCR)
92 : "cc"
93 );
94}
95
96static inline void kvmppc_44x_tlbwe(unsigned int index,
97 struct kvmppc_44x_tlbe *stlbe)
98{
99 unsigned long tmp;
100
101 asm volatile(
102 "mfspr %[tmp], %[sprn_mmucr]\n"
103 "rlwimi %[tmp], %[tid], 0, 0xff\n"
104 "mtspr %[sprn_mmucr], %[tmp]\n"
105 "tlbwe %[word0], %[index], 0\n"
106 "tlbwe %[word1], %[index], 1\n"
107 "tlbwe %[word2], %[index], 2\n"
108 : [tmp] "=&r"(tmp)
109 : [word0] "r"(stlbe->word0),
110 [word1] "r"(stlbe->word1),
111 [word2] "r"(stlbe->word2),
112 [tid] "r"(stlbe->tid),
113 [index] "r"(index),
114 [sprn_mmucr] "i"(SPRN_MMUCR)
115 );
116}
34 117
35static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) 118static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
36{ 119{
37 /* Mask off reserved bits. */ 120 /* We only care about the guest's permission and user bits. */
38 attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK; 121 attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK;
39 122
40 if (!usermode) { 123 if (!usermode) {
41 /* Guest is in supervisor mode, so we need to translate guest 124 /* Guest is in supervisor mode, so we need to translate guest
@@ -47,18 +130,60 @@ static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
47 /* Make sure host can always access this memory. */ 130 /* Make sure host can always access this memory. */
48 attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW; 131 attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW;
49 132
133 /* WIMGE = 0b00100 */
134 attrib |= PPC44x_TLB_M;
135
50 return attrib; 136 return attrib;
51} 137}
52 138
139/* Load shadow TLB back into hardware. */
140void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu)
141{
142 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
143 int i;
144
145 for (i = 0; i <= tlb_44x_hwater; i++) {
146 struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
147
148 if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
149 kvmppc_44x_tlbwe(i, stlbe);
150 }
151}
152
153static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x,
154 unsigned int i)
155{
156 vcpu_44x->shadow_tlb_mod[i] = 1;
157}
158
159/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */
160void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu)
161{
162 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
163 int i;
164
165 for (i = 0; i <= tlb_44x_hwater; i++) {
166 struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
167
168 if (vcpu_44x->shadow_tlb_mod[i])
169 kvmppc_44x_tlbre(i, stlbe);
170
171 if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
172 kvmppc_44x_tlbie(i);
173 }
174}
175
176
53/* Search the guest TLB for a matching entry. */ 177/* Search the guest TLB for a matching entry. */
54int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, 178int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
55 unsigned int as) 179 unsigned int as)
56{ 180{
181 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
57 int i; 182 int i;
58 183
59 /* XXX Replace loop with fancy data structures. */ 184 /* XXX Replace loop with fancy data structures. */
60 for (i = 0; i < PPC44x_TLB_SIZE; i++) { 185 for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
61 struct tlbe *tlbe = &vcpu->arch.guest_tlb[i]; 186 struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i];
62 unsigned int tid; 187 unsigned int tid;
63 188
64 if (eaddr < get_tlb_eaddr(tlbe)) 189 if (eaddr < get_tlb_eaddr(tlbe))
@@ -83,78 +208,89 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
83 return -1; 208 return -1;
84} 209}
85 210
86struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) 211int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
87{ 212{
88 unsigned int as = !!(vcpu->arch.msr & MSR_IS); 213 unsigned int as = !!(vcpu->arch.msr & MSR_IS);
89 unsigned int index;
90 214
91 index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); 215 return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
92 if (index == -1)
93 return NULL;
94 return &vcpu->arch.guest_tlb[index];
95} 216}
96 217
97struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) 218int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
98{ 219{
99 unsigned int as = !!(vcpu->arch.msr & MSR_DS); 220 unsigned int as = !!(vcpu->arch.msr & MSR_DS);
100 unsigned int index;
101 221
102 index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); 222 return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
103 if (index == -1)
104 return NULL;
105 return &vcpu->arch.guest_tlb[index];
106} 223}
107 224
108static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe) 225static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
226 unsigned int stlb_index)
109{ 227{
110 return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW); 228 struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index];
111}
112 229
113static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu, 230 if (!ref->page)
114 unsigned int index) 231 return;
115{
116 struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index];
117 struct page *page = vcpu->arch.shadow_pages[index];
118 232
119 if (get_tlb_v(stlbe)) { 233 /* Discard from the TLB. */
120 if (kvmppc_44x_tlbe_is_writable(stlbe)) 234 /* Note: we could actually invalidate a host mapping, if the host overwrote
121 kvm_release_page_dirty(page); 235 * this TLB entry since we inserted a guest mapping. */
122 else 236 kvmppc_44x_tlbie(stlb_index);
123 kvm_release_page_clean(page); 237
124 } 238 /* Now release the page. */
239 if (ref->writeable)
240 kvm_release_page_dirty(ref->page);
241 else
242 kvm_release_page_clean(ref->page);
243
244 ref->page = NULL;
245
246 /* XXX set tlb_44x_index to stlb_index? */
247
248 KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
125} 249}
126 250
127void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu) 251void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
128{ 252{
253 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
129 int i; 254 int i;
130 255
131 for (i = 0; i <= tlb_44x_hwater; i++) 256 for (i = 0; i <= tlb_44x_hwater; i++)
132 kvmppc_44x_shadow_release(vcpu, i); 257 kvmppc_44x_shadow_release(vcpu_44x, i);
133}
134
135void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i)
136{
137 vcpu->arch.shadow_tlb_mod[i] = 1;
138} 258}
139 259
140/* Caller must ensure that the specified guest TLB entry is safe to insert into 260/**
141 * the shadow TLB. */ 261 * kvmppc_mmu_map -- create a host mapping for guest memory
142void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, 262 *
143 u32 flags) 263 * If the guest wanted a larger page than the host supports, only the first
264 * host page is mapped here and the rest are demand faulted.
265 *
266 * If the guest wanted a smaller page than the host page size, we map only the
267 * guest-size page (i.e. not a full host page mapping).
268 *
269 * Caller must ensure that the specified guest TLB entry is safe to insert into
270 * the shadow TLB.
271 */
272void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
273 u32 flags, u32 max_bytes, unsigned int gtlb_index)
144{ 274{
275 struct kvmppc_44x_tlbe stlbe;
276 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
277 struct kvmppc_44x_shadow_ref *ref;
145 struct page *new_page; 278 struct page *new_page;
146 struct tlbe *stlbe;
147 hpa_t hpaddr; 279 hpa_t hpaddr;
280 gfn_t gfn;
148 unsigned int victim; 281 unsigned int victim;
149 282
150 /* Future optimization: don't overwrite the TLB entry containing the 283 /* Select TLB entry to clobber. Indirectly guard against races with the TLB
151 * current PC (or stack?). */ 284 * miss handler by disabling interrupts. */
152 victim = kvmppc_tlb_44x_pos++; 285 local_irq_disable();
153 if (kvmppc_tlb_44x_pos > tlb_44x_hwater) 286 victim = ++tlb_44x_index;
154 kvmppc_tlb_44x_pos = 0; 287 if (victim > tlb_44x_hwater)
155 stlbe = &vcpu->arch.shadow_tlb[victim]; 288 victim = 0;
289 tlb_44x_index = victim;
290 local_irq_enable();
156 291
157 /* Get reference to new page. */ 292 /* Get reference to new page. */
293 gfn = gpaddr >> PAGE_SHIFT;
158 new_page = gfn_to_page(vcpu->kvm, gfn); 294 new_page = gfn_to_page(vcpu->kvm, gfn);
159 if (is_error_page(new_page)) { 295 if (is_error_page(new_page)) {
160 printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn); 296 printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
@@ -163,10 +299,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
163 } 299 }
164 hpaddr = page_to_phys(new_page); 300 hpaddr = page_to_phys(new_page);
165 301
166 /* Drop reference to old page. */ 302 /* Invalidate any previous shadow mappings. */
167 kvmppc_44x_shadow_release(vcpu, victim); 303 kvmppc_44x_shadow_release(vcpu_44x, victim);
168
169 vcpu->arch.shadow_pages[victim] = new_page;
170 304
171 /* XXX Make sure (va, size) doesn't overlap any other 305 /* XXX Make sure (va, size) doesn't overlap any other
172 * entries. 440x6 user manual says the result would be 306 * entries. 440x6 user manual says the result would be
@@ -174,78 +308,193 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
174 308
175 /* XXX what about AS? */ 309 /* XXX what about AS? */
176 310
177 stlbe->tid = !(asid & 0xff);
178
179 /* Force TS=1 for all guest mappings. */ 311 /* Force TS=1 for all guest mappings. */
180 /* For now we hardcode 4KB mappings, but it will be important to 312 stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS;
181 * use host large pages in the future. */ 313
182 stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS 314 if (max_bytes >= PAGE_SIZE) {
183 | PPC44x_TLB_4K; 315 /* Guest mapping is larger than or equal to host page size. We can use
184 stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); 316 * a "native" host mapping. */
185 stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags, 317 stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE;
186 vcpu->arch.msr & MSR_PR); 318 } else {
187 kvmppc_tlbe_set_modified(vcpu, victim); 319 /* Guest mapping is smaller than host page size. We must restrict the
320 * size of the mapping to be at most the smaller of the two, but for
321 * simplicity we fall back to a 4K mapping (this is probably what the
322 * guest is using anyways). */
323 stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K;
324
325 /* 'hpaddr' is a host page, which is larger than the mapping we're
326 * inserting here. To compensate, we must add the in-page offset to the
327 * sub-page. */
328 hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K);
329 }
188 330
189 KVMTRACE_5D(STLB_WRITE, vcpu, victim, 331 stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
190 stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2, 332 stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags,
191 handler); 333 vcpu->arch.msr & MSR_PR);
334 stlbe.tid = !(asid & 0xff);
335
336 /* Keep track of the reference so we can properly release it later. */
337 ref = &vcpu_44x->shadow_refs[victim];
338 ref->page = new_page;
339 ref->gtlb_index = gtlb_index;
340 ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW);
341 ref->tid = stlbe.tid;
342
343 /* Insert shadow mapping into hardware TLB. */
344 kvmppc_44x_tlbe_set_modified(vcpu_44x, victim);
345 kvmppc_44x_tlbwe(victim, &stlbe);
346 KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1,
347 stlbe.word2, handler);
192} 348}
193 349
194void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, 350/* For a particular guest TLB entry, invalidate the corresponding host TLB
195 gva_t eend, u32 asid) 351 * mappings and release the host pages. */
352static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu,
353 unsigned int gtlb_index)
196{ 354{
197 unsigned int pid = !(asid & 0xff); 355 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
198 int i; 356 int i;
199 357
200 /* XXX Replace loop with fancy data structures. */ 358 for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
201 for (i = 0; i <= tlb_44x_hwater; i++) { 359 struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
202 struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; 360 if (ref->gtlb_index == gtlb_index)
203 unsigned int tid; 361 kvmppc_44x_shadow_release(vcpu_44x, i);
362 }
363}
204 364
205 if (!get_tlb_v(stlbe)) 365void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
206 continue; 366{
367 vcpu->arch.shadow_pid = !usermode;
368}
207 369
208 if (eend < get_tlb_eaddr(stlbe)) 370void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
209 continue; 371{
372 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
373 int i;
210 374
211 if (eaddr > get_tlb_end(stlbe)) 375 if (unlikely(vcpu->arch.pid == new_pid))
212 continue; 376 return;
213 377
214 tid = get_tlb_tid(stlbe); 378 vcpu->arch.pid = new_pid;
215 if (tid && (tid != pid))
216 continue;
217 379
218 kvmppc_44x_shadow_release(vcpu, i); 380 /* Guest userspace runs with TID=0 mappings and PID=0, to make sure it
219 stlbe->word0 = 0; 381 * can't access guest kernel mappings (TID=1). When we switch to a new
220 kvmppc_tlbe_set_modified(vcpu, i); 382 * guest PID, which will also use host PID=0, we must discard the old guest
221 KVMTRACE_5D(STLB_INVAL, vcpu, i, 383 * userspace mappings. */
222 stlbe->tid, stlbe->word0, stlbe->word1, 384 for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
223 stlbe->word2, handler); 385 struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
386
387 if (ref->tid == 0)
388 kvmppc_44x_shadow_release(vcpu_44x, i);
224 } 389 }
225} 390}
226 391
227/* Invalidate all mappings on the privilege switch after PID has been changed. 392static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
228 * The guest always runs with PID=1, so we must clear the entire TLB when 393 const struct kvmppc_44x_tlbe *tlbe)
229 * switching address spaces. */
230void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
231{ 394{
232 int i; 395 gpa_t gpa;
233 396
234 if (vcpu->arch.swap_pid) { 397 if (!get_tlb_v(tlbe))
235 /* XXX Replace loop with fancy data structures. */ 398 return 0;
236 for (i = 0; i <= tlb_44x_hwater; i++) { 399
237 struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; 400 /* Does it match current guest AS? */
238 401 /* XXX what about IS != DS? */
239 /* Future optimization: clear only userspace mappings. */ 402 if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
240 kvmppc_44x_shadow_release(vcpu, i); 403 return 0;
241 stlbe->word0 = 0; 404
242 kvmppc_tlbe_set_modified(vcpu, i); 405 gpa = get_tlb_raddr(tlbe);
243 KVMTRACE_5D(STLB_INVAL, vcpu, i, 406 if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
244 stlbe->tid, stlbe->word0, stlbe->word1, 407 /* Mapping is not for RAM. */
245 stlbe->word2, handler); 408 return 0;
246 } 409
247 vcpu->arch.swap_pid = 0; 410 return 1;
411}
412
413int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
414{
415 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
416 struct kvmppc_44x_tlbe *tlbe;
417 unsigned int gtlb_index;
418
419 gtlb_index = vcpu->arch.gpr[ra];
420 if (gtlb_index > KVM44x_GUEST_TLB_SIZE) {
421 printk("%s: index %d\n", __func__, gtlb_index);
422 kvmppc_dump_vcpu(vcpu);
423 return EMULATE_FAIL;
248 } 424 }
249 425
250 vcpu->arch.shadow_pid = !usermode; 426 tlbe = &vcpu_44x->guest_tlb[gtlb_index];
427
428 /* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */
429 if (tlbe->word0 & PPC44x_TLB_VALID)
430 kvmppc_44x_invalidate(vcpu, gtlb_index);
431
432 switch (ws) {
433 case PPC44x_TLB_PAGEID:
434 tlbe->tid = get_mmucr_stid(vcpu);
435 tlbe->word0 = vcpu->arch.gpr[rs];
436 break;
437
438 case PPC44x_TLB_XLAT:
439 tlbe->word1 = vcpu->arch.gpr[rs];
440 break;
441
442 case PPC44x_TLB_ATTRIB:
443 tlbe->word2 = vcpu->arch.gpr[rs];
444 break;
445
446 default:
447 return EMULATE_FAIL;
448 }
449
450 if (tlbe_is_host_safe(vcpu, tlbe)) {
451 u64 asid;
452 gva_t eaddr;
453 gpa_t gpaddr;
454 u32 flags;
455 u32 bytes;
456
457 eaddr = get_tlb_eaddr(tlbe);
458 gpaddr = get_tlb_raddr(tlbe);
459
460 /* Use the advertised page size to mask effective and real addrs. */
461 bytes = get_tlb_bytes(tlbe);
462 eaddr &= ~(bytes - 1);
463 gpaddr &= ~(bytes - 1);
464
465 asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
466 flags = tlbe->word2 & 0xffff;
467
468 kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index);
469 }
470
471 KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
472 tlbe->word1, tlbe->word2, handler);
473
474 kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
475 return EMULATE_DONE;
476}
477
478int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
479{
480 u32 ea;
481 int gtlb_index;
482 unsigned int as = get_mmucr_sts(vcpu);
483 unsigned int pid = get_mmucr_stid(vcpu);
484
485 ea = vcpu->arch.gpr[rb];
486 if (ra)
487 ea += vcpu->arch.gpr[ra];
488
489 gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
490 if (rc) {
491 if (gtlb_index < 0)
492 vcpu->arch.cr &= ~0x20000000;
493 else
494 vcpu->arch.cr |= 0x20000000;
495 }
496 vcpu->arch.gpr[rt] = gtlb_index;
497
498 kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
499 return EMULATE_DONE;
251} 500}
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
index 2ccd46b6f6b7..772191f29e62 100644
--- a/arch/powerpc/kvm/44x_tlb.h
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -25,48 +25,52 @@
25 25
26extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, 26extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
27 unsigned int pid, unsigned int as); 27 unsigned int pid, unsigned int as);
28extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); 28extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
29extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); 29extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
30
31extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
32 u8 rc);
33extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws);
30 34
31/* TLB helper functions */ 35/* TLB helper functions */
32static inline unsigned int get_tlb_size(const struct tlbe *tlbe) 36static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe)
33{ 37{
34 return (tlbe->word0 >> 4) & 0xf; 38 return (tlbe->word0 >> 4) & 0xf;
35} 39}
36 40
37static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe) 41static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe)
38{ 42{
39 return tlbe->word0 & 0xfffffc00; 43 return tlbe->word0 & 0xfffffc00;
40} 44}
41 45
42static inline gva_t get_tlb_bytes(const struct tlbe *tlbe) 46static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe)
43{ 47{
44 unsigned int pgsize = get_tlb_size(tlbe); 48 unsigned int pgsize = get_tlb_size(tlbe);
45 return 1 << 10 << (pgsize << 1); 49 return 1 << 10 << (pgsize << 1);
46} 50}
47 51
48static inline gva_t get_tlb_end(const struct tlbe *tlbe) 52static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe)
49{ 53{
50 return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1; 54 return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1;
51} 55}
52 56
53static inline u64 get_tlb_raddr(const struct tlbe *tlbe) 57static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe)
54{ 58{
55 u64 word1 = tlbe->word1; 59 u64 word1 = tlbe->word1;
56 return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00); 60 return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00);
57} 61}
58 62
59static inline unsigned int get_tlb_tid(const struct tlbe *tlbe) 63static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe)
60{ 64{
61 return tlbe->tid & 0xff; 65 return tlbe->tid & 0xff;
62} 66}
63 67
64static inline unsigned int get_tlb_ts(const struct tlbe *tlbe) 68static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe)
65{ 69{
66 return (tlbe->word0 >> 8) & 0x1; 70 return (tlbe->word0 >> 8) & 0x1;
67} 71}
68 72
69static inline unsigned int get_tlb_v(const struct tlbe *tlbe) 73static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe)
70{ 74{
71 return (tlbe->word0 >> 9) & 0x1; 75 return (tlbe->word0 >> 9) & 0x1;
72} 76}
@@ -81,7 +85,7 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
81 return (vcpu->arch.mmucr >> 16) & 0x1; 85 return (vcpu->arch.mmucr >> 16) & 0x1;
82} 86}
83 87
84static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr) 88static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr)
85{ 89{
86 unsigned int pgmask = get_tlb_bytes(tlbe) - 1; 90 unsigned int pgmask = get_tlb_bytes(tlbe) - 1;
87 91
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 53aaa66b25e5..6dbdc4817d80 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -15,27 +15,33 @@ menuconfig VIRTUALIZATION
15if VIRTUALIZATION 15if VIRTUALIZATION
16 16
17config KVM 17config KVM
18 bool "Kernel-based Virtual Machine (KVM) support" 18 bool
19 depends on 44x && EXPERIMENTAL
20 select PREEMPT_NOTIFIERS 19 select PREEMPT_NOTIFIERS
21 select ANON_INODES 20 select ANON_INODES
22 # We can only run on Book E hosts so far 21
23 select KVM_BOOKE_HOST 22config KVM_440
23 bool "KVM support for PowerPC 440 processors"
24 depends on EXPERIMENTAL && 44x
25 select KVM
24 ---help--- 26 ---help---
25 Support hosting virtualized guest machines. You will also 27 Support running unmodified 440 guest kernels in virtual machines on
26 need to select one or more of the processor modules below. 28 440 host processors.
27 29
28 This module provides access to the hardware capabilities through 30 This module provides access to the hardware capabilities through
29 a character device node named /dev/kvm. 31 a character device node named /dev/kvm.
30 32
31 If unsure, say N. 33 If unsure, say N.
32 34
33config KVM_BOOKE_HOST 35config KVM_EXIT_TIMING
34 bool "KVM host support for Book E PowerPC processors" 36 bool "Detailed exit timing"
35 depends on KVM && 44x 37 depends on KVM
36 ---help--- 38 ---help---
37 Provides host support for KVM on Book E PowerPC processors. Currently 39 Calculate elapsed time for every exit/enter cycle. A per-vcpu
38 this works on 440 processors only. 40 report is available in debugfs kvm/vm#_vcpu#_timing.
41 The overhead is relatively small, however it is not recommended for
42 production environments.
43
44 If unsure, say N.
39 45
40config KVM_TRACE 46config KVM_TRACE
41 bool "KVM trace support" 47 bool "KVM trace support"
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 2a5d4397ac4b..df7ba59e6d53 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -8,10 +8,16 @@ common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
8 8
9common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o) 9common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o)
10 10
11kvm-objs := $(common-objs-y) powerpc.o emulate.o booke_guest.o 11kvm-objs := $(common-objs-y) powerpc.o emulate.o
12obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
12obj-$(CONFIG_KVM) += kvm.o 13obj-$(CONFIG_KVM) += kvm.o
13 14
14AFLAGS_booke_interrupts.o := -I$(obj) 15AFLAGS_booke_interrupts.o := -I$(obj)
15 16
16kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o 17kvm-440-objs := \
17obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o 18 booke.o \
19 booke_interrupts.o \
20 44x.o \
21 44x_tlb.o \
22 44x_emulate.o
23obj-$(CONFIG_KVM_440) += kvm-440.o
diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke.c
index 7b2591e26bae..35485dd6927e 100644
--- a/arch/powerpc/kvm/booke_guest.c
+++ b/arch/powerpc/kvm/booke.c
@@ -24,21 +24,26 @@
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/vmalloc.h> 25#include <linux/vmalloc.h>
26#include <linux/fs.h> 26#include <linux/fs.h>
27
27#include <asm/cputable.h> 28#include <asm/cputable.h>
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
29#include <asm/kvm_ppc.h> 30#include <asm/kvm_ppc.h>
31#include "timing.h"
32#include <asm/cacheflush.h>
33#include <asm/kvm_44x.h>
30 34
35#include "booke.h"
31#include "44x_tlb.h" 36#include "44x_tlb.h"
32 37
38unsigned long kvmppc_booke_handlers;
39
33#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM 40#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
34#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 41#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
35 42
36struct kvm_stats_debugfs_item debugfs_entries[] = { 43struct kvm_stats_debugfs_item debugfs_entries[] = {
37 { "exits", VCPU_STAT(sum_exits) },
38 { "mmio", VCPU_STAT(mmio_exits) }, 44 { "mmio", VCPU_STAT(mmio_exits) },
39 { "dcr", VCPU_STAT(dcr_exits) }, 45 { "dcr", VCPU_STAT(dcr_exits) },
40 { "sig", VCPU_STAT(signal_exits) }, 46 { "sig", VCPU_STAT(signal_exits) },
41 { "light", VCPU_STAT(light_exits) },
42 { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, 47 { "itlb_r", VCPU_STAT(itlb_real_miss_exits) },
43 { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, 48 { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) },
44 { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) }, 49 { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) },
@@ -53,103 +58,19 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
53 { NULL } 58 { NULL }
54}; 59};
55 60
56static const u32 interrupt_msr_mask[16] = {
57 [BOOKE_INTERRUPT_CRITICAL] = MSR_ME,
58 [BOOKE_INTERRUPT_MACHINE_CHECK] = 0,
59 [BOOKE_INTERRUPT_DATA_STORAGE] = MSR_CE|MSR_ME|MSR_DE,
60 [BOOKE_INTERRUPT_INST_STORAGE] = MSR_CE|MSR_ME|MSR_DE,
61 [BOOKE_INTERRUPT_EXTERNAL] = MSR_CE|MSR_ME|MSR_DE,
62 [BOOKE_INTERRUPT_ALIGNMENT] = MSR_CE|MSR_ME|MSR_DE,
63 [BOOKE_INTERRUPT_PROGRAM] = MSR_CE|MSR_ME|MSR_DE,
64 [BOOKE_INTERRUPT_FP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE,
65 [BOOKE_INTERRUPT_SYSCALL] = MSR_CE|MSR_ME|MSR_DE,
66 [BOOKE_INTERRUPT_AP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE,
67 [BOOKE_INTERRUPT_DECREMENTER] = MSR_CE|MSR_ME|MSR_DE,
68 [BOOKE_INTERRUPT_FIT] = MSR_CE|MSR_ME|MSR_DE,
69 [BOOKE_INTERRUPT_WATCHDOG] = MSR_ME,
70 [BOOKE_INTERRUPT_DTLB_MISS] = MSR_CE|MSR_ME|MSR_DE,
71 [BOOKE_INTERRUPT_ITLB_MISS] = MSR_CE|MSR_ME|MSR_DE,
72 [BOOKE_INTERRUPT_DEBUG] = MSR_ME,
73};
74
75const unsigned char exception_priority[] = {
76 [BOOKE_INTERRUPT_DATA_STORAGE] = 0,
77 [BOOKE_INTERRUPT_INST_STORAGE] = 1,
78 [BOOKE_INTERRUPT_ALIGNMENT] = 2,
79 [BOOKE_INTERRUPT_PROGRAM] = 3,
80 [BOOKE_INTERRUPT_FP_UNAVAIL] = 4,
81 [BOOKE_INTERRUPT_SYSCALL] = 5,
82 [BOOKE_INTERRUPT_AP_UNAVAIL] = 6,
83 [BOOKE_INTERRUPT_DTLB_MISS] = 7,
84 [BOOKE_INTERRUPT_ITLB_MISS] = 8,
85 [BOOKE_INTERRUPT_MACHINE_CHECK] = 9,
86 [BOOKE_INTERRUPT_DEBUG] = 10,
87 [BOOKE_INTERRUPT_CRITICAL] = 11,
88 [BOOKE_INTERRUPT_WATCHDOG] = 12,
89 [BOOKE_INTERRUPT_EXTERNAL] = 13,
90 [BOOKE_INTERRUPT_FIT] = 14,
91 [BOOKE_INTERRUPT_DECREMENTER] = 15,
92};
93
94const unsigned char priority_exception[] = {
95 BOOKE_INTERRUPT_DATA_STORAGE,
96 BOOKE_INTERRUPT_INST_STORAGE,
97 BOOKE_INTERRUPT_ALIGNMENT,
98 BOOKE_INTERRUPT_PROGRAM,
99 BOOKE_INTERRUPT_FP_UNAVAIL,
100 BOOKE_INTERRUPT_SYSCALL,
101 BOOKE_INTERRUPT_AP_UNAVAIL,
102 BOOKE_INTERRUPT_DTLB_MISS,
103 BOOKE_INTERRUPT_ITLB_MISS,
104 BOOKE_INTERRUPT_MACHINE_CHECK,
105 BOOKE_INTERRUPT_DEBUG,
106 BOOKE_INTERRUPT_CRITICAL,
107 BOOKE_INTERRUPT_WATCHDOG,
108 BOOKE_INTERRUPT_EXTERNAL,
109 BOOKE_INTERRUPT_FIT,
110 BOOKE_INTERRUPT_DECREMENTER,
111};
112
113
114void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
115{
116 struct tlbe *tlbe;
117 int i;
118
119 printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
120 printk("| %2s | %3s | %8s | %8s | %8s |\n",
121 "nr", "tid", "word0", "word1", "word2");
122
123 for (i = 0; i < PPC44x_TLB_SIZE; i++) {
124 tlbe = &vcpu->arch.guest_tlb[i];
125 if (tlbe->word0 & PPC44x_TLB_VALID)
126 printk(" G%2d | %02X | %08X | %08X | %08X |\n",
127 i, tlbe->tid, tlbe->word0, tlbe->word1,
128 tlbe->word2);
129 }
130
131 for (i = 0; i < PPC44x_TLB_SIZE; i++) {
132 tlbe = &vcpu->arch.shadow_tlb[i];
133 if (tlbe->word0 & PPC44x_TLB_VALID)
134 printk(" S%2d | %02X | %08X | %08X | %08X |\n",
135 i, tlbe->tid, tlbe->word0, tlbe->word1,
136 tlbe->word2);
137 }
138}
139
140/* TODO: use vcpu_printf() */ 61/* TODO: use vcpu_printf() */
141void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) 62void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
142{ 63{
143 int i; 64 int i;
144 65
145 printk("pc: %08x msr: %08x\n", vcpu->arch.pc, vcpu->arch.msr); 66 printk("pc: %08lx msr: %08lx\n", vcpu->arch.pc, vcpu->arch.msr);
146 printk("lr: %08x ctr: %08x\n", vcpu->arch.lr, vcpu->arch.ctr); 67 printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
147 printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1); 68 printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1);
148 69
149 printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions); 70 printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
150 71
151 for (i = 0; i < 32; i += 4) { 72 for (i = 0; i < 32; i += 4) {
152 printk("gpr%02d: %08x %08x %08x %08x\n", i, 73 printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
153 vcpu->arch.gpr[i], 74 vcpu->arch.gpr[i],
154 vcpu->arch.gpr[i+1], 75 vcpu->arch.gpr[i+1],
155 vcpu->arch.gpr[i+2], 76 vcpu->arch.gpr[i+2],
@@ -157,69 +78,96 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
157 } 78 }
158} 79}
159 80
160/* Check if we are ready to deliver the interrupt */ 81static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
161static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) 82 unsigned int priority)
162{ 83{
163 int r; 84 set_bit(priority, &vcpu->arch.pending_exceptions);
85}
164 86
165 switch (interrupt) { 87void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
166 case BOOKE_INTERRUPT_CRITICAL: 88{
167 r = vcpu->arch.msr & MSR_CE; 89 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
168 break; 90}
169 case BOOKE_INTERRUPT_MACHINE_CHECK: 91
170 r = vcpu->arch.msr & MSR_ME; 92void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
171 break; 93{
172 case BOOKE_INTERRUPT_EXTERNAL: 94 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
173 r = vcpu->arch.msr & MSR_EE; 95}
96
97int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
98{
99 return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
100}
101
102void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
103 struct kvm_interrupt *irq)
104{
105 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL);
106}
107
108/* Deliver the interrupt of the corresponding priority, if possible. */
109static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
110 unsigned int priority)
111{
112 int allowed = 0;
113 ulong msr_mask;
114
115 switch (priority) {
116 case BOOKE_IRQPRIO_PROGRAM:
117 case BOOKE_IRQPRIO_DTLB_MISS:
118 case BOOKE_IRQPRIO_ITLB_MISS:
119 case BOOKE_IRQPRIO_SYSCALL:
120 case BOOKE_IRQPRIO_DATA_STORAGE:
121 case BOOKE_IRQPRIO_INST_STORAGE:
122 case BOOKE_IRQPRIO_FP_UNAVAIL:
123 case BOOKE_IRQPRIO_AP_UNAVAIL:
124 case BOOKE_IRQPRIO_ALIGNMENT:
125 allowed = 1;
126 msr_mask = MSR_CE|MSR_ME|MSR_DE;
174 break; 127 break;
175 case BOOKE_INTERRUPT_DECREMENTER: 128 case BOOKE_IRQPRIO_CRITICAL:
176 r = vcpu->arch.msr & MSR_EE; 129 case BOOKE_IRQPRIO_WATCHDOG:
130 allowed = vcpu->arch.msr & MSR_CE;
131 msr_mask = MSR_ME;
177 break; 132 break;
178 case BOOKE_INTERRUPT_FIT: 133 case BOOKE_IRQPRIO_MACHINE_CHECK:
179 r = vcpu->arch.msr & MSR_EE; 134 allowed = vcpu->arch.msr & MSR_ME;
135 msr_mask = 0;
180 break; 136 break;
181 case BOOKE_INTERRUPT_WATCHDOG: 137 case BOOKE_IRQPRIO_EXTERNAL:
182 r = vcpu->arch.msr & MSR_CE; 138 case BOOKE_IRQPRIO_DECREMENTER:
139 case BOOKE_IRQPRIO_FIT:
140 allowed = vcpu->arch.msr & MSR_EE;
141 msr_mask = MSR_CE|MSR_ME|MSR_DE;
183 break; 142 break;
184 case BOOKE_INTERRUPT_DEBUG: 143 case BOOKE_IRQPRIO_DEBUG:
185 r = vcpu->arch.msr & MSR_DE; 144 allowed = vcpu->arch.msr & MSR_DE;
145 msr_mask = MSR_ME;
186 break; 146 break;
187 default:
188 r = 1;
189 } 147 }
190 148
191 return r; 149 if (allowed) {
192} 150 vcpu->arch.srr0 = vcpu->arch.pc;
151 vcpu->arch.srr1 = vcpu->arch.msr;
152 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
153 kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
193 154
194static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) 155 clear_bit(priority, &vcpu->arch.pending_exceptions);
195{
196 switch (interrupt) {
197 case BOOKE_INTERRUPT_DECREMENTER:
198 vcpu->arch.tsr |= TSR_DIS;
199 break;
200 } 156 }
201 157
202 vcpu->arch.srr0 = vcpu->arch.pc; 158 return allowed;
203 vcpu->arch.srr1 = vcpu->arch.msr;
204 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt];
205 kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]);
206} 159}
207 160
208/* Check pending exceptions and deliver one, if possible. */ 161/* Check pending exceptions and deliver one, if possible. */
209void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu) 162void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
210{ 163{
211 unsigned long *pending = &vcpu->arch.pending_exceptions; 164 unsigned long *pending = &vcpu->arch.pending_exceptions;
212 unsigned int exception;
213 unsigned int priority; 165 unsigned int priority;
214 166
215 priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending)); 167 priority = __ffs(*pending);
216 while (priority <= BOOKE_MAX_INTERRUPT) { 168 while (priority <= BOOKE_MAX_INTERRUPT) {
217 exception = priority_exception[priority]; 169 if (kvmppc_booke_irqprio_deliver(vcpu, priority))
218 if (kvmppc_can_deliver_interrupt(vcpu, exception)) {
219 kvmppc_clear_exception(vcpu, exception);
220 kvmppc_deliver_interrupt(vcpu, exception);
221 break; 170 break;
222 }
223 171
224 priority = find_next_bit(pending, 172 priority = find_next_bit(pending,
225 BITS_PER_BYTE * sizeof(*pending), 173 BITS_PER_BYTE * sizeof(*pending),
@@ -238,6 +186,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
238 enum emulation_result er; 186 enum emulation_result er;
239 int r = RESUME_HOST; 187 int r = RESUME_HOST;
240 188
189 /* update before a new last_exit_type is rewritten */
190 kvmppc_update_timing_stats(vcpu);
191
241 local_irq_enable(); 192 local_irq_enable();
242 193
243 run->exit_reason = KVM_EXIT_UNKNOWN; 194 run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -251,21 +202,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
251 break; 202 break;
252 203
253 case BOOKE_INTERRUPT_EXTERNAL: 204 case BOOKE_INTERRUPT_EXTERNAL:
205 kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
206 if (need_resched())
207 cond_resched();
208 r = RESUME_GUEST;
209 break;
210
254 case BOOKE_INTERRUPT_DECREMENTER: 211 case BOOKE_INTERRUPT_DECREMENTER:
255 /* Since we switched IVPR back to the host's value, the host 212 /* Since we switched IVPR back to the host's value, the host
256 * handled this interrupt the moment we enabled interrupts. 213 * handled this interrupt the moment we enabled interrupts.
257 * Now we just offer it a chance to reschedule the guest. */ 214 * Now we just offer it a chance to reschedule the guest. */
258 215 kvmppc_account_exit(vcpu, DEC_EXITS);
259 /* XXX At this point the TLB still holds our shadow TLB, so if
260 * we do reschedule the host will fault over it. Perhaps we
261 * should politely restore the host's entries to minimize
262 * misses before ceding control. */
263 if (need_resched()) 216 if (need_resched())
264 cond_resched(); 217 cond_resched();
265 if (exit_nr == BOOKE_INTERRUPT_DECREMENTER)
266 vcpu->stat.dec_exits++;
267 else
268 vcpu->stat.ext_intr_exits++;
269 r = RESUME_GUEST; 218 r = RESUME_GUEST;
270 break; 219 break;
271 220
@@ -274,17 +223,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
274 /* Program traps generated by user-level software must be handled 223 /* Program traps generated by user-level software must be handled
275 * by the guest kernel. */ 224 * by the guest kernel. */
276 vcpu->arch.esr = vcpu->arch.fault_esr; 225 vcpu->arch.esr = vcpu->arch.fault_esr;
277 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); 226 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
278 r = RESUME_GUEST; 227 r = RESUME_GUEST;
228 kvmppc_account_exit(vcpu, USR_PR_INST);
279 break; 229 break;
280 } 230 }
281 231
282 er = kvmppc_emulate_instruction(run, vcpu); 232 er = kvmppc_emulate_instruction(run, vcpu);
283 switch (er) { 233 switch (er) {
284 case EMULATE_DONE: 234 case EMULATE_DONE:
235 /* don't overwrite subtypes, just account kvm_stats */
236 kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
285 /* Future optimization: only reload non-volatiles if 237 /* Future optimization: only reload non-volatiles if
286 * they were actually modified by emulation. */ 238 * they were actually modified by emulation. */
287 vcpu->stat.emulated_inst_exits++;
288 r = RESUME_GUEST_NV; 239 r = RESUME_GUEST_NV;
289 break; 240 break;
290 case EMULATE_DO_DCR: 241 case EMULATE_DO_DCR:
@@ -293,7 +244,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
293 break; 244 break;
294 case EMULATE_FAIL: 245 case EMULATE_FAIL:
295 /* XXX Deliver Program interrupt to guest. */ 246 /* XXX Deliver Program interrupt to guest. */
296 printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n", 247 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
297 __func__, vcpu->arch.pc, vcpu->arch.last_inst); 248 __func__, vcpu->arch.pc, vcpu->arch.last_inst);
298 /* For debugging, encode the failing instruction and 249 /* For debugging, encode the failing instruction and
299 * report it to userspace. */ 250 * report it to userspace. */
@@ -307,48 +258,53 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
307 break; 258 break;
308 259
309 case BOOKE_INTERRUPT_FP_UNAVAIL: 260 case BOOKE_INTERRUPT_FP_UNAVAIL:
310 kvmppc_queue_exception(vcpu, exit_nr); 261 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
262 kvmppc_account_exit(vcpu, FP_UNAVAIL);
311 r = RESUME_GUEST; 263 r = RESUME_GUEST;
312 break; 264 break;
313 265
314 case BOOKE_INTERRUPT_DATA_STORAGE: 266 case BOOKE_INTERRUPT_DATA_STORAGE:
315 vcpu->arch.dear = vcpu->arch.fault_dear; 267 vcpu->arch.dear = vcpu->arch.fault_dear;
316 vcpu->arch.esr = vcpu->arch.fault_esr; 268 vcpu->arch.esr = vcpu->arch.fault_esr;
317 kvmppc_queue_exception(vcpu, exit_nr); 269 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
318 vcpu->stat.dsi_exits++; 270 kvmppc_account_exit(vcpu, DSI_EXITS);
319 r = RESUME_GUEST; 271 r = RESUME_GUEST;
320 break; 272 break;
321 273
322 case BOOKE_INTERRUPT_INST_STORAGE: 274 case BOOKE_INTERRUPT_INST_STORAGE:
323 vcpu->arch.esr = vcpu->arch.fault_esr; 275 vcpu->arch.esr = vcpu->arch.fault_esr;
324 kvmppc_queue_exception(vcpu, exit_nr); 276 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
325 vcpu->stat.isi_exits++; 277 kvmppc_account_exit(vcpu, ISI_EXITS);
326 r = RESUME_GUEST; 278 r = RESUME_GUEST;
327 break; 279 break;
328 280
329 case BOOKE_INTERRUPT_SYSCALL: 281 case BOOKE_INTERRUPT_SYSCALL:
330 kvmppc_queue_exception(vcpu, exit_nr); 282 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
331 vcpu->stat.syscall_exits++; 283 kvmppc_account_exit(vcpu, SYSCALL_EXITS);
332 r = RESUME_GUEST; 284 r = RESUME_GUEST;
333 break; 285 break;
334 286
287 /* XXX move to a 440-specific file. */
335 case BOOKE_INTERRUPT_DTLB_MISS: { 288 case BOOKE_INTERRUPT_DTLB_MISS: {
336 struct tlbe *gtlbe; 289 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
290 struct kvmppc_44x_tlbe *gtlbe;
337 unsigned long eaddr = vcpu->arch.fault_dear; 291 unsigned long eaddr = vcpu->arch.fault_dear;
292 int gtlb_index;
338 gfn_t gfn; 293 gfn_t gfn;
339 294
340 /* Check the guest TLB. */ 295 /* Check the guest TLB. */
341 gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr); 296 gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr);
342 if (!gtlbe) { 297 if (gtlb_index < 0) {
343 /* The guest didn't have a mapping for it. */ 298 /* The guest didn't have a mapping for it. */
344 kvmppc_queue_exception(vcpu, exit_nr); 299 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
345 vcpu->arch.dear = vcpu->arch.fault_dear; 300 vcpu->arch.dear = vcpu->arch.fault_dear;
346 vcpu->arch.esr = vcpu->arch.fault_esr; 301 vcpu->arch.esr = vcpu->arch.fault_esr;
347 vcpu->stat.dtlb_real_miss_exits++; 302 kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
348 r = RESUME_GUEST; 303 r = RESUME_GUEST;
349 break; 304 break;
350 } 305 }
351 306
307 gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
352 vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr); 308 vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
353 gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT; 309 gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
354 310
@@ -359,38 +315,45 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
359 * b) the guest used a large mapping which we're faking 315 * b) the guest used a large mapping which we're faking
360 * Either way, we need to satisfy the fault without 316 * Either way, we need to satisfy the fault without
361 * invoking the guest. */ 317 * invoking the guest. */
362 kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, 318 kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
363 gtlbe->word2); 319 gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
364 vcpu->stat.dtlb_virt_miss_exits++; 320 kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
365 r = RESUME_GUEST; 321 r = RESUME_GUEST;
366 } else { 322 } else {
367 /* Guest has mapped and accessed a page which is not 323 /* Guest has mapped and accessed a page which is not
368 * actually RAM. */ 324 * actually RAM. */
369 r = kvmppc_emulate_mmio(run, vcpu); 325 r = kvmppc_emulate_mmio(run, vcpu);
326 kvmppc_account_exit(vcpu, MMIO_EXITS);
370 } 327 }
371 328
372 break; 329 break;
373 } 330 }
374 331
332 /* XXX move to a 440-specific file. */
375 case BOOKE_INTERRUPT_ITLB_MISS: { 333 case BOOKE_INTERRUPT_ITLB_MISS: {
376 struct tlbe *gtlbe; 334 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
335 struct kvmppc_44x_tlbe *gtlbe;
377 unsigned long eaddr = vcpu->arch.pc; 336 unsigned long eaddr = vcpu->arch.pc;
337 gpa_t gpaddr;
378 gfn_t gfn; 338 gfn_t gfn;
339 int gtlb_index;
379 340
380 r = RESUME_GUEST; 341 r = RESUME_GUEST;
381 342
382 /* Check the guest TLB. */ 343 /* Check the guest TLB. */
383 gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr); 344 gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr);
384 if (!gtlbe) { 345 if (gtlb_index < 0) {
385 /* The guest didn't have a mapping for it. */ 346 /* The guest didn't have a mapping for it. */
386 kvmppc_queue_exception(vcpu, exit_nr); 347 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
387 vcpu->stat.itlb_real_miss_exits++; 348 kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS);
388 break; 349 break;
389 } 350 }
390 351
391 vcpu->stat.itlb_virt_miss_exits++; 352 kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
392 353
393 gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT; 354 gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
355 gpaddr = tlb_xlate(gtlbe, eaddr);
356 gfn = gpaddr >> PAGE_SHIFT;
394 357
395 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { 358 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
396 /* The guest TLB had a mapping, but the shadow TLB 359 /* The guest TLB had a mapping, but the shadow TLB
@@ -399,12 +362,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
399 * b) the guest used a large mapping which we're faking 362 * b) the guest used a large mapping which we're faking
400 * Either way, we need to satisfy the fault without 363 * Either way, we need to satisfy the fault without
401 * invoking the guest. */ 364 * invoking the guest. */
402 kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, 365 kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
403 gtlbe->word2); 366 gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
404 } else { 367 } else {
405 /* Guest mapped and leaped at non-RAM! */ 368 /* Guest mapped and leaped at non-RAM! */
406 kvmppc_queue_exception(vcpu, 369 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
407 BOOKE_INTERRUPT_MACHINE_CHECK);
408 } 370 }
409 371
410 break; 372 break;
@@ -421,6 +383,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
421 mtspr(SPRN_DBSR, dbsr); 383 mtspr(SPRN_DBSR, dbsr);
422 384
423 run->exit_reason = KVM_EXIT_DEBUG; 385 run->exit_reason = KVM_EXIT_DEBUG;
386 kvmppc_account_exit(vcpu, DEBUG_EXITS);
424 r = RESUME_HOST; 387 r = RESUME_HOST;
425 break; 388 break;
426 } 389 }
@@ -432,10 +395,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
432 395
433 local_irq_disable(); 396 local_irq_disable();
434 397
435 kvmppc_check_and_deliver_interrupts(vcpu); 398 kvmppc_core_deliver_interrupts(vcpu);
436 399
437 /* Do some exit accounting. */
438 vcpu->stat.sum_exits++;
439 if (!(r & RESUME_HOST)) { 400 if (!(r & RESUME_HOST)) {
440 /* To avoid clobbering exit_reason, only check for signals if 401 /* To avoid clobbering exit_reason, only check for signals if
441 * we aren't already exiting to userspace for some other 402 * we aren't already exiting to userspace for some other
@@ -443,22 +404,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
443 if (signal_pending(current)) { 404 if (signal_pending(current)) {
444 run->exit_reason = KVM_EXIT_INTR; 405 run->exit_reason = KVM_EXIT_INTR;
445 r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); 406 r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
446 407 kvmppc_account_exit(vcpu, SIGNAL_EXITS);
447 vcpu->stat.signal_exits++;
448 } else {
449 vcpu->stat.light_exits++;
450 }
451 } else {
452 switch (run->exit_reason) {
453 case KVM_EXIT_MMIO:
454 vcpu->stat.mmio_exits++;
455 break;
456 case KVM_EXIT_DCR:
457 vcpu->stat.dcr_exits++;
458 break;
459 case KVM_EXIT_INTR:
460 vcpu->stat.signal_exits++;
461 break;
462 } 408 }
463 } 409 }
464 410
@@ -468,20 +414,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
468/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ 414/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
469int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 415int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
470{ 416{
471 struct tlbe *tlbe = &vcpu->arch.guest_tlb[0];
472
473 tlbe->tid = 0;
474 tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
475 tlbe->word1 = 0;
476 tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
477
478 tlbe++;
479 tlbe->tid = 0;
480 tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
481 tlbe->word1 = 0xef600000;
482 tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
483 | PPC44x_TLB_I | PPC44x_TLB_G;
484
485 vcpu->arch.pc = 0; 417 vcpu->arch.pc = 0;
486 vcpu->arch.msr = 0; 418 vcpu->arch.msr = 0;
487 vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */ 419 vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
@@ -492,12 +424,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
492 * before it's programmed its own IVPR. */ 424 * before it's programmed its own IVPR. */
493 vcpu->arch.ivpr = 0x55550000; 425 vcpu->arch.ivpr = 0x55550000;
494 426
495 /* Since the guest can directly access the timebase, it must know the 427 kvmppc_init_timing_stats(vcpu);
496 * real timebase frequency. Accordingly, it must see the state of
497 * CCR1[TCS]. */
498 vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
499 428
500 return 0; 429 return kvmppc_core_vcpu_setup(vcpu);
501} 430}
502 431
503int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 432int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@@ -536,7 +465,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
536 vcpu->arch.ctr = regs->ctr; 465 vcpu->arch.ctr = regs->ctr;
537 vcpu->arch.lr = regs->lr; 466 vcpu->arch.lr = regs->lr;
538 vcpu->arch.xer = regs->xer; 467 vcpu->arch.xer = regs->xer;
539 vcpu->arch.msr = regs->msr; 468 kvmppc_set_msr(vcpu, regs->msr);
540 vcpu->arch.srr0 = regs->srr0; 469 vcpu->arch.srr0 = regs->srr0;
541 vcpu->arch.srr1 = regs->srr1; 470 vcpu->arch.srr1 = regs->srr1;
542 vcpu->arch.sprg0 = regs->sprg0; 471 vcpu->arch.sprg0 = regs->sprg0;
@@ -575,31 +504,62 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
575 return -ENOTSUPP; 504 return -ENOTSUPP;
576} 505}
577 506
578/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
579int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 507int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
580 struct kvm_translation *tr) 508 struct kvm_translation *tr)
581{ 509{
582 struct tlbe *gtlbe; 510 return kvmppc_core_vcpu_translate(vcpu, tr);
583 int index; 511}
584 gva_t eaddr;
585 u8 pid;
586 u8 as;
587
588 eaddr = tr->linear_address;
589 pid = (tr->linear_address >> 32) & 0xff;
590 as = (tr->linear_address >> 40) & 0x1;
591
592 index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
593 if (index == -1) {
594 tr->valid = 0;
595 return 0;
596 }
597 512
598 gtlbe = &vcpu->arch.guest_tlb[index]; 513int kvmppc_booke_init(void)
514{
515 unsigned long ivor[16];
516 unsigned long max_ivor = 0;
517 int i;
599 518
600 tr->physical_address = tlb_xlate(gtlbe, eaddr); 519 /* We install our own exception handlers by hijacking IVPR. IVPR must
601 /* XXX what does "writeable" and "usermode" even mean? */ 520 * be 16-bit aligned, so we need a 64KB allocation. */
602 tr->valid = 1; 521 kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
522 VCPU_SIZE_ORDER);
523 if (!kvmppc_booke_handlers)
524 return -ENOMEM;
525
526 /* XXX make sure our handlers are smaller than Linux's */
527
528 /* Copy our interrupt handlers to match host IVORs. That way we don't
529 * have to swap the IVORs on every guest/host transition. */
530 ivor[0] = mfspr(SPRN_IVOR0);
531 ivor[1] = mfspr(SPRN_IVOR1);
532 ivor[2] = mfspr(SPRN_IVOR2);
533 ivor[3] = mfspr(SPRN_IVOR3);
534 ivor[4] = mfspr(SPRN_IVOR4);
535 ivor[5] = mfspr(SPRN_IVOR5);
536 ivor[6] = mfspr(SPRN_IVOR6);
537 ivor[7] = mfspr(SPRN_IVOR7);
538 ivor[8] = mfspr(SPRN_IVOR8);
539 ivor[9] = mfspr(SPRN_IVOR9);
540 ivor[10] = mfspr(SPRN_IVOR10);
541 ivor[11] = mfspr(SPRN_IVOR11);
542 ivor[12] = mfspr(SPRN_IVOR12);
543 ivor[13] = mfspr(SPRN_IVOR13);
544 ivor[14] = mfspr(SPRN_IVOR14);
545 ivor[15] = mfspr(SPRN_IVOR15);
546
547 for (i = 0; i < 16; i++) {
548 if (ivor[i] > max_ivor)
549 max_ivor = ivor[i];
550
551 memcpy((void *)kvmppc_booke_handlers + ivor[i],
552 kvmppc_handlers_start + i * kvmppc_handler_len,
553 kvmppc_handler_len);
554 }
555 flush_icache_range(kvmppc_booke_handlers,
556 kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
603 557
604 return 0; 558 return 0;
605} 559}
560
561void __exit kvmppc_booke_exit(void)
562{
563 free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
564 kvm_exit();
565}
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
new file mode 100644
index 000000000000..cf7c94ca24bf
--- /dev/null
+++ b/arch/powerpc/kvm/booke.h
@@ -0,0 +1,60 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#ifndef __KVM_BOOKE_H__
21#define __KVM_BOOKE_H__
22
23#include <linux/types.h>
24#include <linux/kvm_host.h>
25#include "timing.h"
26
27/* interrupt priortity ordering */
28#define BOOKE_IRQPRIO_DATA_STORAGE 0
29#define BOOKE_IRQPRIO_INST_STORAGE 1
30#define BOOKE_IRQPRIO_ALIGNMENT 2
31#define BOOKE_IRQPRIO_PROGRAM 3
32#define BOOKE_IRQPRIO_FP_UNAVAIL 4
33#define BOOKE_IRQPRIO_SYSCALL 5
34#define BOOKE_IRQPRIO_AP_UNAVAIL 6
35#define BOOKE_IRQPRIO_DTLB_MISS 7
36#define BOOKE_IRQPRIO_ITLB_MISS 8
37#define BOOKE_IRQPRIO_MACHINE_CHECK 9
38#define BOOKE_IRQPRIO_DEBUG 10
39#define BOOKE_IRQPRIO_CRITICAL 11
40#define BOOKE_IRQPRIO_WATCHDOG 12
41#define BOOKE_IRQPRIO_EXTERNAL 13
42#define BOOKE_IRQPRIO_FIT 14
43#define BOOKE_IRQPRIO_DECREMENTER 15
44
45/* Helper function for "full" MSR writes. No need to call this if only EE is
46 * changing. */
47static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
48{
49 if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
50 kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
51
52 vcpu->arch.msr = new_msr;
53
54 if (vcpu->arch.msr & MSR_WE) {
55 kvm_vcpu_block(vcpu);
56 kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
57 };
58}
59
60#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_host.c b/arch/powerpc/kvm/booke_host.c
deleted file mode 100644
index b480341bc31e..000000000000
--- a/arch/powerpc/kvm/booke_host.c
+++ /dev/null
@@ -1,83 +0,0 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#include <linux/errno.h>
21#include <linux/kvm_host.h>
22#include <linux/module.h>
23#include <asm/cacheflush.h>
24#include <asm/kvm_ppc.h>
25
26unsigned long kvmppc_booke_handlers;
27
28static int kvmppc_booke_init(void)
29{
30 unsigned long ivor[16];
31 unsigned long max_ivor = 0;
32 int i;
33
34 /* We install our own exception handlers by hijacking IVPR. IVPR must
35 * be 16-bit aligned, so we need a 64KB allocation. */
36 kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
37 VCPU_SIZE_ORDER);
38 if (!kvmppc_booke_handlers)
39 return -ENOMEM;
40
41 /* XXX make sure our handlers are smaller than Linux's */
42
43 /* Copy our interrupt handlers to match host IVORs. That way we don't
44 * have to swap the IVORs on every guest/host transition. */
45 ivor[0] = mfspr(SPRN_IVOR0);
46 ivor[1] = mfspr(SPRN_IVOR1);
47 ivor[2] = mfspr(SPRN_IVOR2);
48 ivor[3] = mfspr(SPRN_IVOR3);
49 ivor[4] = mfspr(SPRN_IVOR4);
50 ivor[5] = mfspr(SPRN_IVOR5);
51 ivor[6] = mfspr(SPRN_IVOR6);
52 ivor[7] = mfspr(SPRN_IVOR7);
53 ivor[8] = mfspr(SPRN_IVOR8);
54 ivor[9] = mfspr(SPRN_IVOR9);
55 ivor[10] = mfspr(SPRN_IVOR10);
56 ivor[11] = mfspr(SPRN_IVOR11);
57 ivor[12] = mfspr(SPRN_IVOR12);
58 ivor[13] = mfspr(SPRN_IVOR13);
59 ivor[14] = mfspr(SPRN_IVOR14);
60 ivor[15] = mfspr(SPRN_IVOR15);
61
62 for (i = 0; i < 16; i++) {
63 if (ivor[i] > max_ivor)
64 max_ivor = ivor[i];
65
66 memcpy((void *)kvmppc_booke_handlers + ivor[i],
67 kvmppc_handlers_start + i * kvmppc_handler_len,
68 kvmppc_handler_len);
69 }
70 flush_icache_range(kvmppc_booke_handlers,
71 kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
72
73 return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
74}
75
76static void __exit kvmppc_booke_exit(void)
77{
78 free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
79 kvm_exit();
80}
81
82module_init(kvmppc_booke_init)
83module_exit(kvmppc_booke_exit)
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 95e165baf85f..084ebcd7dd83 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -107,6 +107,18 @@ _GLOBAL(kvmppc_resume_host)
107 li r6, 1 107 li r6, 1
108 slw r6, r6, r5 108 slw r6, r6, r5
109 109
110#ifdef CONFIG_KVM_EXIT_TIMING
111 /* save exit time */
1121:
113 mfspr r7, SPRN_TBRU
114 mfspr r8, SPRN_TBRL
115 mfspr r9, SPRN_TBRU
116 cmpw r9, r7
117 bne 1b
118 stw r8, VCPU_TIMING_EXIT_TBL(r4)
119 stw r9, VCPU_TIMING_EXIT_TBU(r4)
120#endif
121
110 /* Save the faulting instruction and all GPRs for emulation. */ 122 /* Save the faulting instruction and all GPRs for emulation. */
111 andi. r7, r6, NEED_INST_MASK 123 andi. r7, r6, NEED_INST_MASK
112 beq ..skip_inst_copy 124 beq ..skip_inst_copy
@@ -335,54 +347,6 @@ lightweight_exit:
335 lwz r3, VCPU_SHADOW_PID(r4) 347 lwz r3, VCPU_SHADOW_PID(r4)
336 mtspr SPRN_PID, r3 348 mtspr SPRN_PID, r3
337 349
338 /* Prevent all asynchronous TLB updates. */
339 mfmsr r5
340 lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h
341 ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l
342 andc r6, r5, r6
343 mtmsr r6
344
345 /* Load the guest mappings, leaving the host's "pinned" kernel mappings
346 * in place. */
347 mfspr r10, SPRN_MMUCR /* Save host MMUCR. */
348 li r5, PPC44x_TLB_SIZE
349 lis r5, tlb_44x_hwater@ha
350 lwz r5, tlb_44x_hwater@l(r5)
351 mtctr r5
352 addi r9, r4, VCPU_SHADOW_TLB
353 addi r5, r4, VCPU_SHADOW_MOD
354 li r3, 0
3551:
356 lbzx r7, r3, r5
357 cmpwi r7, 0
358 beq 3f
359
360 /* Load guest entry. */
361 mulli r11, r3, TLBE_BYTES
362 add r11, r11, r9
363 lwz r7, 0(r11)
364 mtspr SPRN_MMUCR, r7
365 lwz r7, 4(r11)
366 tlbwe r7, r3, PPC44x_TLB_PAGEID
367 lwz r7, 8(r11)
368 tlbwe r7, r3, PPC44x_TLB_XLAT
369 lwz r7, 12(r11)
370 tlbwe r7, r3, PPC44x_TLB_ATTRIB
3713:
372 addi r3, r3, 1 /* Increment index. */
373 bdnz 1b
374
375 mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */
376
377 /* Clear bitmap of modified TLB entries */
378 li r5, PPC44x_TLB_SIZE>>2
379 mtctr r5
380 addi r5, r4, VCPU_SHADOW_MOD - 4
381 li r6, 0
3821:
383 stwu r6, 4(r5)
384 bdnz 1b
385
386 iccci 0, 0 /* XXX hack */ 350 iccci 0, 0 /* XXX hack */
387 351
388 /* Load some guest volatiles. */ 352 /* Load some guest volatiles. */
@@ -423,6 +387,18 @@ lightweight_exit:
423 lwz r3, VCPU_SPRG7(r4) 387 lwz r3, VCPU_SPRG7(r4)
424 mtspr SPRN_SPRG7, r3 388 mtspr SPRN_SPRG7, r3
425 389
390#ifdef CONFIG_KVM_EXIT_TIMING
391 /* save enter time */
3921:
393 mfspr r6, SPRN_TBRU
394 mfspr r7, SPRN_TBRL
395 mfspr r8, SPRN_TBRU
396 cmpw r8, r6
397 bne 1b
398 stw r7, VCPU_TIMING_LAST_ENTER_TBL(r4)
399 stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
400#endif
401
426 /* Finish loading guest volatiles and jump to guest. */ 402 /* Finish loading guest volatiles and jump to guest. */
427 lwz r3, VCPU_CTR(r4) 403 lwz r3, VCPU_CTR(r4)
428 mtctr r3 404 mtctr r3
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 0fce4fbdc20d..d1d38daa93fb 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -23,161 +23,14 @@
23#include <linux/string.h> 23#include <linux/string.h>
24#include <linux/kvm_host.h> 24#include <linux/kvm_host.h>
25 25
26#include <asm/dcr.h> 26#include <asm/reg.h>
27#include <asm/dcr-regs.h>
28#include <asm/time.h> 27#include <asm/time.h>
29#include <asm/byteorder.h> 28#include <asm/byteorder.h>
30#include <asm/kvm_ppc.h> 29#include <asm/kvm_ppc.h>
30#include <asm/disassemble.h>
31#include "timing.h"
31 32
32#include "44x_tlb.h" 33void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
33
34/* Instruction decoding */
35static inline unsigned int get_op(u32 inst)
36{
37 return inst >> 26;
38}
39
40static inline unsigned int get_xop(u32 inst)
41{
42 return (inst >> 1) & 0x3ff;
43}
44
45static inline unsigned int get_sprn(u32 inst)
46{
47 return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
48}
49
50static inline unsigned int get_dcrn(u32 inst)
51{
52 return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
53}
54
55static inline unsigned int get_rt(u32 inst)
56{
57 return (inst >> 21) & 0x1f;
58}
59
60static inline unsigned int get_rs(u32 inst)
61{
62 return (inst >> 21) & 0x1f;
63}
64
65static inline unsigned int get_ra(u32 inst)
66{
67 return (inst >> 16) & 0x1f;
68}
69
70static inline unsigned int get_rb(u32 inst)
71{
72 return (inst >> 11) & 0x1f;
73}
74
75static inline unsigned int get_rc(u32 inst)
76{
77 return inst & 0x1;
78}
79
80static inline unsigned int get_ws(u32 inst)
81{
82 return (inst >> 11) & 0x1f;
83}
84
85static inline unsigned int get_d(u32 inst)
86{
87 return inst & 0xffff;
88}
89
90static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
91 const struct tlbe *tlbe)
92{
93 gpa_t gpa;
94
95 if (!get_tlb_v(tlbe))
96 return 0;
97
98 /* Does it match current guest AS? */
99 /* XXX what about IS != DS? */
100 if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
101 return 0;
102
103 gpa = get_tlb_raddr(tlbe);
104 if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
105 /* Mapping is not for RAM. */
106 return 0;
107
108 return 1;
109}
110
111static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst)
112{
113 u64 eaddr;
114 u64 raddr;
115 u64 asid;
116 u32 flags;
117 struct tlbe *tlbe;
118 unsigned int ra;
119 unsigned int rs;
120 unsigned int ws;
121 unsigned int index;
122
123 ra = get_ra(inst);
124 rs = get_rs(inst);
125 ws = get_ws(inst);
126
127 index = vcpu->arch.gpr[ra];
128 if (index > PPC44x_TLB_SIZE) {
129 printk("%s: index %d\n", __func__, index);
130 kvmppc_dump_vcpu(vcpu);
131 return EMULATE_FAIL;
132 }
133
134 tlbe = &vcpu->arch.guest_tlb[index];
135
136 /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
137 if (tlbe->word0 & PPC44x_TLB_VALID) {
138 eaddr = get_tlb_eaddr(tlbe);
139 asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
140 kvmppc_mmu_invalidate(vcpu, eaddr, get_tlb_end(tlbe), asid);
141 }
142
143 switch (ws) {
144 case PPC44x_TLB_PAGEID:
145 tlbe->tid = vcpu->arch.mmucr & 0xff;
146 tlbe->word0 = vcpu->arch.gpr[rs];
147 break;
148
149 case PPC44x_TLB_XLAT:
150 tlbe->word1 = vcpu->arch.gpr[rs];
151 break;
152
153 case PPC44x_TLB_ATTRIB:
154 tlbe->word2 = vcpu->arch.gpr[rs];
155 break;
156
157 default:
158 return EMULATE_FAIL;
159 }
160
161 if (tlbe_is_host_safe(vcpu, tlbe)) {
162 eaddr = get_tlb_eaddr(tlbe);
163 raddr = get_tlb_raddr(tlbe);
164 asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
165 flags = tlbe->word2 & 0xffff;
166
167 /* Create a 4KB mapping on the host. If the guest wanted a
168 * large page, only the first 4KB is mapped here and the rest
169 * are mapped on the fly. */
170 kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
171 }
172
173 KVMTRACE_5D(GTLB_WRITE, vcpu, index,
174 tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2,
175 handler);
176
177 return EMULATE_DONE;
178}
179
180static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
181{ 34{
182 if (vcpu->arch.tcr & TCR_DIE) { 35 if (vcpu->arch.tcr & TCR_DIE) {
183 /* The decrementer ticks at the same rate as the timebase, so 36 /* The decrementer ticks at the same rate as the timebase, so
@@ -193,12 +46,6 @@ static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
193 } 46 }
194} 47}
195 48
196static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
197{
198 vcpu->arch.pc = vcpu->arch.srr0;
199 kvmppc_set_msr(vcpu, vcpu->arch.srr1);
200}
201
202/* XXX to do: 49/* XXX to do:
203 * lhax 50 * lhax
204 * lhaux 51 * lhaux
@@ -213,40 +60,30 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
213 * 60 *
214 * XXX is_bigendian should depend on MMU mapping or MSR[LE] 61 * XXX is_bigendian should depend on MMU mapping or MSR[LE]
215 */ 62 */
63/* XXX Should probably auto-generate instruction decoding for a particular core
64 * from opcode tables in the future. */
216int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 65int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
217{ 66{
218 u32 inst = vcpu->arch.last_inst; 67 u32 inst = vcpu->arch.last_inst;
219 u32 ea; 68 u32 ea;
220 int ra; 69 int ra;
221 int rb; 70 int rb;
222 int rc;
223 int rs; 71 int rs;
224 int rt; 72 int rt;
225 int sprn; 73 int sprn;
226 int dcrn;
227 enum emulation_result emulated = EMULATE_DONE; 74 enum emulation_result emulated = EMULATE_DONE;
228 int advance = 1; 75 int advance = 1;
229 76
77 /* this default type might be overwritten by subcategories */
78 kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
79
230 switch (get_op(inst)) { 80 switch (get_op(inst)) {
231 case 3: /* trap */ 81 case 3: /* trap */
232 printk("trap!\n"); 82 vcpu->arch.esr |= ESR_PTR;
233 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); 83 kvmppc_core_queue_program(vcpu);
234 advance = 0; 84 advance = 0;
235 break; 85 break;
236 86
237 case 19:
238 switch (get_xop(inst)) {
239 case 50: /* rfi */
240 kvmppc_emul_rfi(vcpu);
241 advance = 0;
242 break;
243
244 default:
245 emulated = EMULATE_FAIL;
246 break;
247 }
248 break;
249
250 case 31: 87 case 31:
251 switch (get_xop(inst)) { 88 switch (get_xop(inst)) {
252 89
@@ -255,27 +92,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
255 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); 92 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
256 break; 93 break;
257 94
258 case 83: /* mfmsr */
259 rt = get_rt(inst);
260 vcpu->arch.gpr[rt] = vcpu->arch.msr;
261 break;
262
263 case 87: /* lbzx */ 95 case 87: /* lbzx */
264 rt = get_rt(inst); 96 rt = get_rt(inst);
265 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 97 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
266 break; 98 break;
267 99
268 case 131: /* wrtee */
269 rs = get_rs(inst);
270 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
271 | (vcpu->arch.gpr[rs] & MSR_EE);
272 break;
273
274 case 146: /* mtmsr */
275 rs = get_rs(inst);
276 kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
277 break;
278
279 case 151: /* stwx */ 100 case 151: /* stwx */
280 rs = get_rs(inst); 101 rs = get_rs(inst);
281 emulated = kvmppc_handle_store(run, vcpu, 102 emulated = kvmppc_handle_store(run, vcpu,
@@ -283,11 +104,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
283 4, 1); 104 4, 1);
284 break; 105 break;
285 106
286 case 163: /* wrteei */
287 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
288 | (inst & MSR_EE);
289 break;
290
291 case 215: /* stbx */ 107 case 215: /* stbx */
292 rs = get_rs(inst); 108 rs = get_rs(inst);
293 emulated = kvmppc_handle_store(run, vcpu, 109 emulated = kvmppc_handle_store(run, vcpu,
@@ -328,42 +144,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
328 vcpu->arch.gpr[ra] = ea; 144 vcpu->arch.gpr[ra] = ea;
329 break; 145 break;
330 146
331 case 323: /* mfdcr */
332 dcrn = get_dcrn(inst);
333 rt = get_rt(inst);
334
335 /* The guest may access CPR0 registers to determine the timebase
336 * frequency, and it must know the real host frequency because it
337 * can directly access the timebase registers.
338 *
339 * It would be possible to emulate those accesses in userspace,
340 * but userspace can really only figure out the end frequency.
341 * We could decompose that into the factors that compute it, but
342 * that's tricky math, and it's easier to just report the real
343 * CPR0 values.
344 */
345 switch (dcrn) {
346 case DCRN_CPR0_CONFIG_ADDR:
347 vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
348 break;
349 case DCRN_CPR0_CONFIG_DATA:
350 local_irq_disable();
351 mtdcr(DCRN_CPR0_CONFIG_ADDR,
352 vcpu->arch.cpr0_cfgaddr);
353 vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
354 local_irq_enable();
355 break;
356 default:
357 run->dcr.dcrn = dcrn;
358 run->dcr.data = 0;
359 run->dcr.is_write = 0;
360 vcpu->arch.io_gpr = rt;
361 vcpu->arch.dcr_needed = 1;
362 emulated = EMULATE_DO_DCR;
363 }
364
365 break;
366
367 case 339: /* mfspr */ 147 case 339: /* mfspr */
368 sprn = get_sprn(inst); 148 sprn = get_sprn(inst);
369 rt = get_rt(inst); 149 rt = get_rt(inst);
@@ -373,26 +153,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
373 vcpu->arch.gpr[rt] = vcpu->arch.srr0; break; 153 vcpu->arch.gpr[rt] = vcpu->arch.srr0; break;
374 case SPRN_SRR1: 154 case SPRN_SRR1:
375 vcpu->arch.gpr[rt] = vcpu->arch.srr1; break; 155 vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
376 case SPRN_MMUCR:
377 vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
378 case SPRN_PID:
379 vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
380 case SPRN_IVPR:
381 vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
382 case SPRN_CCR0:
383 vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
384 case SPRN_CCR1:
385 vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
386 case SPRN_PVR: 156 case SPRN_PVR:
387 vcpu->arch.gpr[rt] = vcpu->arch.pvr; break; 157 vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
388 case SPRN_DEAR:
389 vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
390 case SPRN_ESR:
391 vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
392 case SPRN_DBCR0:
393 vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
394 case SPRN_DBCR1:
395 vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
396 158
397 /* Note: mftb and TBRL/TBWL are user-accessible, so 159 /* Note: mftb and TBRL/TBWL are user-accessible, so
398 * the guest can always access the real TB anyways. 160 * the guest can always access the real TB anyways.
@@ -413,42 +175,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
413 /* Note: SPRG4-7 are user-readable, so we don't get 175 /* Note: SPRG4-7 are user-readable, so we don't get
414 * a trap. */ 176 * a trap. */
415 177
416 case SPRN_IVOR0:
417 vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break;
418 case SPRN_IVOR1:
419 vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break;
420 case SPRN_IVOR2:
421 vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break;
422 case SPRN_IVOR3:
423 vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break;
424 case SPRN_IVOR4:
425 vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break;
426 case SPRN_IVOR5:
427 vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break;
428 case SPRN_IVOR6:
429 vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break;
430 case SPRN_IVOR7:
431 vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break;
432 case SPRN_IVOR8:
433 vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break;
434 case SPRN_IVOR9:
435 vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break;
436 case SPRN_IVOR10:
437 vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break;
438 case SPRN_IVOR11:
439 vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break;
440 case SPRN_IVOR12:
441 vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break;
442 case SPRN_IVOR13:
443 vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break;
444 case SPRN_IVOR14:
445 vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break;
446 case SPRN_IVOR15:
447 vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break;
448
449 default: 178 default:
450 printk("mfspr: unknown spr %x\n", sprn); 179 emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
451 vcpu->arch.gpr[rt] = 0; 180 if (emulated == EMULATE_FAIL) {
181 printk("mfspr: unknown spr %x\n", sprn);
182 vcpu->arch.gpr[rt] = 0;
183 }
452 break; 184 break;
453 } 185 }
454 break; 186 break;
@@ -478,25 +210,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
478 vcpu->arch.gpr[ra] = ea; 210 vcpu->arch.gpr[ra] = ea;
479 break; 211 break;
480 212
481 case 451: /* mtdcr */
482 dcrn = get_dcrn(inst);
483 rs = get_rs(inst);
484
485 /* emulate some access in kernel */
486 switch (dcrn) {
487 case DCRN_CPR0_CONFIG_ADDR:
488 vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
489 break;
490 default:
491 run->dcr.dcrn = dcrn;
492 run->dcr.data = vcpu->arch.gpr[rs];
493 run->dcr.is_write = 1;
494 vcpu->arch.dcr_needed = 1;
495 emulated = EMULATE_DO_DCR;
496 }
497
498 break;
499
500 case 467: /* mtspr */ 213 case 467: /* mtspr */
501 sprn = get_sprn(inst); 214 sprn = get_sprn(inst);
502 rs = get_rs(inst); 215 rs = get_rs(inst);
@@ -505,22 +218,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
505 vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break; 218 vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break;
506 case SPRN_SRR1: 219 case SPRN_SRR1:
507 vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break; 220 vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break;
508 case SPRN_MMUCR:
509 vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
510 case SPRN_PID:
511 kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
512 case SPRN_CCR0:
513 vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
514 case SPRN_CCR1:
515 vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
516 case SPRN_DEAR:
517 vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
518 case SPRN_ESR:
519 vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
520 case SPRN_DBCR0:
521 vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
522 case SPRN_DBCR1:
523 vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
524 221
525 /* XXX We need to context-switch the timebase for 222 /* XXX We need to context-switch the timebase for
526 * watchdog and FIT. */ 223 * watchdog and FIT. */
@@ -532,14 +229,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
532 kvmppc_emulate_dec(vcpu); 229 kvmppc_emulate_dec(vcpu);
533 break; 230 break;
534 231
535 case SPRN_TSR:
536 vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
537
538 case SPRN_TCR:
539 vcpu->arch.tcr = vcpu->arch.gpr[rs];
540 kvmppc_emulate_dec(vcpu);
541 break;
542
543 case SPRN_SPRG0: 232 case SPRN_SPRG0:
544 vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break; 233 vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break;
545 case SPRN_SPRG1: 234 case SPRN_SPRG1:
@@ -549,56 +238,10 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
549 case SPRN_SPRG3: 238 case SPRN_SPRG3:
550 vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break; 239 vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break;
551 240
552 /* Note: SPRG4-7 are user-readable. These values are
553 * loaded into the real SPRGs when resuming the
554 * guest. */
555 case SPRN_SPRG4:
556 vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
557 case SPRN_SPRG5:
558 vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
559 case SPRN_SPRG6:
560 vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
561 case SPRN_SPRG7:
562 vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
563
564 case SPRN_IVPR:
565 vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break;
566 case SPRN_IVOR0:
567 vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break;
568 case SPRN_IVOR1:
569 vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break;
570 case SPRN_IVOR2:
571 vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break;
572 case SPRN_IVOR3:
573 vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break;
574 case SPRN_IVOR4:
575 vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break;
576 case SPRN_IVOR5:
577 vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break;
578 case SPRN_IVOR6:
579 vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break;
580 case SPRN_IVOR7:
581 vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break;
582 case SPRN_IVOR8:
583 vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break;
584 case SPRN_IVOR9:
585 vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break;
586 case SPRN_IVOR10:
587 vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break;
588 case SPRN_IVOR11:
589 vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break;
590 case SPRN_IVOR12:
591 vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break;
592 case SPRN_IVOR13:
593 vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break;
594 case SPRN_IVOR14:
595 vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break;
596 case SPRN_IVOR15:
597 vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break;
598
599 default: 241 default:
600 printk("mtspr: unknown spr %x\n", sprn); 242 emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
601 emulated = EMULATE_FAIL; 243 if (emulated == EMULATE_FAIL)
244 printk("mtspr: unknown spr %x\n", sprn);
602 break; 245 break;
603 } 246 }
604 break; 247 break;
@@ -629,36 +272,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
629 4, 0); 272 4, 0);
630 break; 273 break;
631 274
632 case 978: /* tlbwe */
633 emulated = kvmppc_emul_tlbwe(vcpu, inst);
634 break;
635
636 case 914: { /* tlbsx */
637 int index;
638 unsigned int as = get_mmucr_sts(vcpu);
639 unsigned int pid = get_mmucr_stid(vcpu);
640
641 rt = get_rt(inst);
642 ra = get_ra(inst);
643 rb = get_rb(inst);
644 rc = get_rc(inst);
645
646 ea = vcpu->arch.gpr[rb];
647 if (ra)
648 ea += vcpu->arch.gpr[ra];
649
650 index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
651 if (rc) {
652 if (index < 0)
653 vcpu->arch.cr &= ~0x20000000;
654 else
655 vcpu->arch.cr |= 0x20000000;
656 }
657 vcpu->arch.gpr[rt] = index;
658
659 }
660 break;
661
662 case 790: /* lhbrx */ 275 case 790: /* lhbrx */
663 rt = get_rt(inst); 276 rt = get_rt(inst);
664 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); 277 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
@@ -674,14 +287,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
674 2, 0); 287 2, 0);
675 break; 288 break;
676 289
677 case 966: /* iccci */
678 break;
679
680 default: 290 default:
681 printk("unknown: op %d xop %d\n", get_op(inst), 291 /* Attempt core-specific emulation below. */
682 get_xop(inst));
683 emulated = EMULATE_FAIL; 292 emulated = EMULATE_FAIL;
684 break;
685 } 293 }
686 break; 294 break;
687 295
@@ -764,12 +372,19 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
764 break; 372 break;
765 373
766 default: 374 default:
767 printk("unknown op %d\n", get_op(inst));
768 emulated = EMULATE_FAIL; 375 emulated = EMULATE_FAIL;
769 break;
770 } 376 }
771 377
772 KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit); 378 if (emulated == EMULATE_FAIL) {
379 emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance);
380 if (emulated == EMULATE_FAIL) {
381 advance = 0;
382 printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
383 "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
384 }
385 }
386
387 KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit);
773 388
774 if (advance) 389 if (advance)
775 vcpu->arch.pc += 4; /* Advance past emulated instruction. */ 390 vcpu->arch.pc += 4; /* Advance past emulated instruction. */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 8bef0efcdfe1..2822c8ccfaaf 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -28,9 +28,9 @@
28#include <asm/uaccess.h> 28#include <asm/uaccess.h>
29#include <asm/kvm_ppc.h> 29#include <asm/kvm_ppc.h>
30#include <asm/tlbflush.h> 30#include <asm/tlbflush.h>
31#include "timing.h"
31#include "../mm/mmu_decl.h" 32#include "../mm/mmu_decl.h"
32 33
33
34gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 34gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
35{ 35{
36 return gfn; 36 return gfn;
@@ -99,14 +99,7 @@ void kvm_arch_hardware_unsetup(void)
99 99
100void kvm_arch_check_processor_compat(void *rtn) 100void kvm_arch_check_processor_compat(void *rtn)
101{ 101{
102 int r; 102 *(int *)rtn = kvmppc_core_check_processor_compat();
103
104 if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
105 r = 0;
106 else
107 r = -ENOTSUPP;
108
109 *(int *)rtn = r;
110} 103}
111 104
112struct kvm *kvm_arch_create_vm(void) 105struct kvm *kvm_arch_create_vm(void)
@@ -144,9 +137,6 @@ int kvm_dev_ioctl_check_extension(long ext)
144 int r; 137 int r;
145 138
146 switch (ext) { 139 switch (ext) {
147 case KVM_CAP_USER_MEMORY:
148 r = 1;
149 break;
150 case KVM_CAP_COALESCED_MMIO: 140 case KVM_CAP_COALESCED_MMIO:
151 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 141 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
152 break; 142 break;
@@ -179,30 +169,15 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
179struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) 169struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
180{ 170{
181 struct kvm_vcpu *vcpu; 171 struct kvm_vcpu *vcpu;
182 int err; 172 vcpu = kvmppc_core_vcpu_create(kvm, id);
183 173 kvmppc_create_vcpu_debugfs(vcpu, id);
184 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
185 if (!vcpu) {
186 err = -ENOMEM;
187 goto out;
188 }
189
190 err = kvm_vcpu_init(vcpu, kvm, id);
191 if (err)
192 goto free_vcpu;
193
194 return vcpu; 174 return vcpu;
195
196free_vcpu:
197 kmem_cache_free(kvm_vcpu_cache, vcpu);
198out:
199 return ERR_PTR(err);
200} 175}
201 176
202void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) 177void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
203{ 178{
204 kvm_vcpu_uninit(vcpu); 179 kvmppc_remove_vcpu_debugfs(vcpu);
205 kmem_cache_free(kvm_vcpu_cache, vcpu); 180 kvmppc_core_vcpu_free(vcpu);
206} 181}
207 182
208void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 183void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -212,16 +187,14 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
212 187
213int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 188int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
214{ 189{
215 unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER]; 190 return kvmppc_core_pending_dec(vcpu);
216
217 return test_bit(priority, &vcpu->arch.pending_exceptions);
218} 191}
219 192
220static void kvmppc_decrementer_func(unsigned long data) 193static void kvmppc_decrementer_func(unsigned long data)
221{ 194{
222 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; 195 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
223 196
224 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER); 197 kvmppc_core_queue_dec(vcpu);
225 198
226 if (waitqueue_active(&vcpu->wq)) { 199 if (waitqueue_active(&vcpu->wq)) {
227 wake_up_interruptible(&vcpu->wq); 200 wake_up_interruptible(&vcpu->wq);
@@ -242,96 +215,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
242 kvmppc_core_destroy_mmu(vcpu); 215 kvmppc_core_destroy_mmu(vcpu);
243} 216}
244 217
245/* Note: clearing MSR[DE] just means that the debug interrupt will not be
246 * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
247 * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
248 * will be delivered as an "imprecise debug event" (which is indicated by
249 * DBSR[IDE].
250 */
251static void kvmppc_disable_debug_interrupts(void)
252{
253 mtmsr(mfmsr() & ~MSR_DE);
254}
255
256static void kvmppc_restore_host_debug_state(struct kvm_vcpu *vcpu)
257{
258 kvmppc_disable_debug_interrupts();
259
260 mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
261 mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
262 mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
263 mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
264 mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
265 mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
266 mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
267 mtmsr(vcpu->arch.host_msr);
268}
269
270static void kvmppc_load_guest_debug_registers(struct kvm_vcpu *vcpu)
271{
272 struct kvm_guest_debug *dbg = &vcpu->guest_debug;
273 u32 dbcr0 = 0;
274
275 vcpu->arch.host_msr = mfmsr();
276 kvmppc_disable_debug_interrupts();
277
278 /* Save host debug register state. */
279 vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
280 vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
281 vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
282 vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
283 vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
284 vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
285 vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
286
287 /* set registers up for guest */
288
289 if (dbg->bp[0]) {
290 mtspr(SPRN_IAC1, dbg->bp[0]);
291 dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
292 }
293 if (dbg->bp[1]) {
294 mtspr(SPRN_IAC2, dbg->bp[1]);
295 dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
296 }
297 if (dbg->bp[2]) {
298 mtspr(SPRN_IAC3, dbg->bp[2]);
299 dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
300 }
301 if (dbg->bp[3]) {
302 mtspr(SPRN_IAC4, dbg->bp[3]);
303 dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
304 }
305
306 mtspr(SPRN_DBCR0, dbcr0);
307 mtspr(SPRN_DBCR1, 0);
308 mtspr(SPRN_DBCR2, 0);
309}
310
311void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 218void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
312{ 219{
313 int i;
314
315 if (vcpu->guest_debug.enabled) 220 if (vcpu->guest_debug.enabled)
316 kvmppc_load_guest_debug_registers(vcpu); 221 kvmppc_core_load_guest_debugstate(vcpu);
317 222
318 /* Mark every guest entry in the shadow TLB entry modified, so that they 223 kvmppc_core_vcpu_load(vcpu, cpu);
319 * will all be reloaded on the next vcpu run (instead of being
320 * demand-faulted). */
321 for (i = 0; i <= tlb_44x_hwater; i++)
322 kvmppc_tlbe_set_modified(vcpu, i);
323} 224}
324 225
325void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 226void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
326{ 227{
327 if (vcpu->guest_debug.enabled) 228 if (vcpu->guest_debug.enabled)
328 kvmppc_restore_host_debug_state(vcpu); 229 kvmppc_core_load_host_debugstate(vcpu);
329 230
330 /* Don't leave guest TLB entries resident when being de-scheduled. */ 231 /* Don't leave guest TLB entries resident when being de-scheduled. */
331 /* XXX It would be nice to differentiate between heavyweight exit and 232 /* XXX It would be nice to differentiate between heavyweight exit and
332 * sched_out here, since we could avoid the TLB flush for heavyweight 233 * sched_out here, since we could avoid the TLB flush for heavyweight
333 * exits. */ 234 * exits. */
334 _tlbil_all(); 235 _tlbil_all();
236 kvmppc_core_vcpu_put(vcpu);
335} 237}
336 238
337int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, 239int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
@@ -355,14 +257,14 @@ int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
355static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, 257static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
356 struct kvm_run *run) 258 struct kvm_run *run)
357{ 259{
358 u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; 260 ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
359 *gpr = run->dcr.data; 261 *gpr = run->dcr.data;
360} 262}
361 263
362static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, 264static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
363 struct kvm_run *run) 265 struct kvm_run *run)
364{ 266{
365 u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; 267 ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
366 268
367 if (run->mmio.len > sizeof(*gpr)) { 269 if (run->mmio.len > sizeof(*gpr)) {
368 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); 270 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
@@ -460,7 +362,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
460 vcpu->arch.dcr_needed = 0; 362 vcpu->arch.dcr_needed = 0;
461 } 363 }
462 364
463 kvmppc_check_and_deliver_interrupts(vcpu); 365 kvmppc_core_deliver_interrupts(vcpu);
464 366
465 local_irq_disable(); 367 local_irq_disable();
466 kvm_guest_enter(); 368 kvm_guest_enter();
@@ -478,7 +380,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
478 380
479int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) 381int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
480{ 382{
481 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL); 383 kvmppc_core_queue_external(vcpu, irq);
482 384
483 if (waitqueue_active(&vcpu->wq)) { 385 if (waitqueue_active(&vcpu->wq)) {
484 wake_up_interruptible(&vcpu->wq); 386 wake_up_interruptible(&vcpu->wq);
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
new file mode 100644
index 000000000000..47ee603f558e
--- /dev/null
+++ b/arch/powerpc/kvm/timing.c
@@ -0,0 +1,239 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
19 */
20
21#include <linux/kvm_host.h>
22#include <linux/fs.h>
23#include <linux/seq_file.h>
24#include <linux/debugfs.h>
25#include <linux/uaccess.h>
26
27#include <asm/time.h>
28#include <asm-generic/div64.h>
29
30#include "timing.h"
31
32void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
33{
34 int i;
35
36 /* pause guest execution to avoid concurrent updates */
37 local_irq_disable();
38 mutex_lock(&vcpu->mutex);
39
40 vcpu->arch.last_exit_type = 0xDEAD;
41 for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
42 vcpu->arch.timing_count_type[i] = 0;
43 vcpu->arch.timing_max_duration[i] = 0;
44 vcpu->arch.timing_min_duration[i] = 0xFFFFFFFF;
45 vcpu->arch.timing_sum_duration[i] = 0;
46 vcpu->arch.timing_sum_quad_duration[i] = 0;
47 }
48 vcpu->arch.timing_last_exit = 0;
49 vcpu->arch.timing_exit.tv64 = 0;
50 vcpu->arch.timing_last_enter.tv64 = 0;
51
52 mutex_unlock(&vcpu->mutex);
53 local_irq_enable();
54}
55
56static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
57{
58 u64 old;
59
60 do_div(duration, tb_ticks_per_usec);
61 if (unlikely(duration > 0xFFFFFFFF)) {
62 printk(KERN_ERR"%s - duration too big -> overflow"
63 " duration %lld type %d exit #%d\n",
64 __func__, duration, type,
65 vcpu->arch.timing_count_type[type]);
66 return;
67 }
68
69 vcpu->arch.timing_count_type[type]++;
70
71 /* sum */
72 old = vcpu->arch.timing_sum_duration[type];
73 vcpu->arch.timing_sum_duration[type] += duration;
74 if (unlikely(old > vcpu->arch.timing_sum_duration[type])) {
75 printk(KERN_ERR"%s - wrap adding sum of durations"
76 " old %lld new %lld type %d exit # of type %d\n",
77 __func__, old, vcpu->arch.timing_sum_duration[type],
78 type, vcpu->arch.timing_count_type[type]);
79 }
80
81 /* square sum */
82 old = vcpu->arch.timing_sum_quad_duration[type];
83 vcpu->arch.timing_sum_quad_duration[type] += (duration*duration);
84 if (unlikely(old > vcpu->arch.timing_sum_quad_duration[type])) {
85 printk(KERN_ERR"%s - wrap adding sum of squared durations"
86 " old %lld new %lld type %d exit # of type %d\n",
87 __func__, old,
88 vcpu->arch.timing_sum_quad_duration[type],
89 type, vcpu->arch.timing_count_type[type]);
90 }
91
92 /* set min/max */
93 if (unlikely(duration < vcpu->arch.timing_min_duration[type]))
94 vcpu->arch.timing_min_duration[type] = duration;
95 if (unlikely(duration > vcpu->arch.timing_max_duration[type]))
96 vcpu->arch.timing_max_duration[type] = duration;
97}
98
99void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
100{
101 u64 exit = vcpu->arch.timing_last_exit;
102 u64 enter = vcpu->arch.timing_last_enter.tv64;
103
104 /* save exit time, used next exit when the reenter time is known */
105 vcpu->arch.timing_last_exit = vcpu->arch.timing_exit.tv64;
106
107 if (unlikely(vcpu->arch.last_exit_type == 0xDEAD || exit == 0))
108 return; /* skip incomplete cycle (e.g. after reset) */
109
110 /* update statistics for average and standard deviation */
111 add_exit_timing(vcpu, (enter - exit), vcpu->arch.last_exit_type);
112 /* enter -> timing_last_exit is time spent in guest - log this too */
113 add_exit_timing(vcpu, (vcpu->arch.timing_last_exit - enter),
114 TIMEINGUEST);
115}
116
117static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = {
118 [MMIO_EXITS] = "MMIO",
119 [DCR_EXITS] = "DCR",
120 [SIGNAL_EXITS] = "SIGNAL",
121 [ITLB_REAL_MISS_EXITS] = "ITLBREAL",
122 [ITLB_VIRT_MISS_EXITS] = "ITLBVIRT",
123 [DTLB_REAL_MISS_EXITS] = "DTLBREAL",
124 [DTLB_VIRT_MISS_EXITS] = "DTLBVIRT",
125 [SYSCALL_EXITS] = "SYSCALL",
126 [ISI_EXITS] = "ISI",
127 [DSI_EXITS] = "DSI",
128 [EMULATED_INST_EXITS] = "EMULINST",
129 [EMULATED_MTMSRWE_EXITS] = "EMUL_WAIT",
130 [EMULATED_WRTEE_EXITS] = "EMUL_WRTEE",
131 [EMULATED_MTSPR_EXITS] = "EMUL_MTSPR",
132 [EMULATED_MFSPR_EXITS] = "EMUL_MFSPR",
133 [EMULATED_MTMSR_EXITS] = "EMUL_MTMSR",
134 [EMULATED_MFMSR_EXITS] = "EMUL_MFMSR",
135 [EMULATED_TLBSX_EXITS] = "EMUL_TLBSX",
136 [EMULATED_TLBWE_EXITS] = "EMUL_TLBWE",
137 [EMULATED_RFI_EXITS] = "EMUL_RFI",
138 [DEC_EXITS] = "DEC",
139 [EXT_INTR_EXITS] = "EXTINT",
140 [HALT_WAKEUP] = "HALT",
141 [USR_PR_INST] = "USR_PR_INST",
142 [FP_UNAVAIL] = "FP_UNAVAIL",
143 [DEBUG_EXITS] = "DEBUG",
144 [TIMEINGUEST] = "TIMEINGUEST"
145};
146
147static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
148{
149 struct kvm_vcpu *vcpu = m->private;
150 int i;
151
152 seq_printf(m, "%s", "type count min max sum sum_squared\n");
153
154 for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
155 seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n",
156 kvm_exit_names[i],
157 vcpu->arch.timing_count_type[i],
158 vcpu->arch.timing_min_duration[i],
159 vcpu->arch.timing_max_duration[i],
160 vcpu->arch.timing_sum_duration[i],
161 vcpu->arch.timing_sum_quad_duration[i]);
162 }
163 return 0;
164}
165
166/* Write 'c' to clear the timing statistics. */
167static ssize_t kvmppc_exit_timing_write(struct file *file,
168 const char __user *user_buf,
169 size_t count, loff_t *ppos)
170{
171 int err = -EINVAL;
172 char c;
173
174 if (count > 1) {
175 goto done;
176 }
177
178 if (get_user(c, user_buf)) {
179 err = -EFAULT;
180 goto done;
181 }
182
183 if (c == 'c') {
184 struct seq_file *seqf = (struct seq_file *)file->private_data;
185 struct kvm_vcpu *vcpu = seqf->private;
186 /* Write does not affect our buffers previously generated with
187 * show. seq_file is locked here to prevent races of init with
188 * a show call */
189 mutex_lock(&seqf->lock);
190 kvmppc_init_timing_stats(vcpu);
191 mutex_unlock(&seqf->lock);
192 err = count;
193 }
194
195done:
196 return err;
197}
198
199static int kvmppc_exit_timing_open(struct inode *inode, struct file *file)
200{
201 return single_open(file, kvmppc_exit_timing_show, inode->i_private);
202}
203
204static struct file_operations kvmppc_exit_timing_fops = {
205 .owner = THIS_MODULE,
206 .open = kvmppc_exit_timing_open,
207 .read = seq_read,
208 .write = kvmppc_exit_timing_write,
209 .llseek = seq_lseek,
210 .release = single_release,
211};
212
213void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id)
214{
215 static char dbg_fname[50];
216 struct dentry *debugfs_file;
217
218 snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing",
219 current->pid, id);
220 debugfs_file = debugfs_create_file(dbg_fname, 0666,
221 kvm_debugfs_dir, vcpu,
222 &kvmppc_exit_timing_fops);
223
224 if (!debugfs_file) {
225 printk(KERN_ERR"%s: error creating debugfs file %s\n",
226 __func__, dbg_fname);
227 return;
228 }
229
230 vcpu->arch.debugfs_exit_timing = debugfs_file;
231}
232
233void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu)
234{
235 if (vcpu->arch.debugfs_exit_timing) {
236 debugfs_remove(vcpu->arch.debugfs_exit_timing);
237 vcpu->arch.debugfs_exit_timing = NULL;
238 }
239}
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
new file mode 100644
index 000000000000..bb13b1f3cd5a
--- /dev/null
+++ b/arch/powerpc/kvm/timing.h
@@ -0,0 +1,102 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
18 */
19
20#ifndef __POWERPC_KVM_EXITTIMING_H__
21#define __POWERPC_KVM_EXITTIMING_H__
22
23#include <linux/kvm_host.h>
24#include <asm/kvm_host.h>
25
26#ifdef CONFIG_KVM_EXIT_TIMING
27void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
28void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu);
29void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id);
30void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu);
31
32static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
33{
34 vcpu->arch.last_exit_type = type;
35}
36
37#else
38/* if exit timing is not configured there is no need to build the c file */
39static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {}
40static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {}
41static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
42 unsigned int id) {}
43static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
44static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
45#endif /* CONFIG_KVM_EXIT_TIMING */
46
47/* account the exit in kvm_stats */
48static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
49{
50 /* type has to be known at build time for optimization */
51 BUILD_BUG_ON(__builtin_constant_p(type));
52 switch (type) {
53 case EXT_INTR_EXITS:
54 vcpu->stat.ext_intr_exits++;
55 break;
56 case DEC_EXITS:
57 vcpu->stat.dec_exits++;
58 break;
59 case EMULATED_INST_EXITS:
60 vcpu->stat.emulated_inst_exits++;
61 break;
62 case DCR_EXITS:
63 vcpu->stat.dcr_exits++;
64 break;
65 case DSI_EXITS:
66 vcpu->stat.dsi_exits++;
67 break;
68 case ISI_EXITS:
69 vcpu->stat.isi_exits++;
70 break;
71 case SYSCALL_EXITS:
72 vcpu->stat.syscall_exits++;
73 break;
74 case DTLB_REAL_MISS_EXITS:
75 vcpu->stat.dtlb_real_miss_exits++;
76 break;
77 case DTLB_VIRT_MISS_EXITS:
78 vcpu->stat.dtlb_virt_miss_exits++;
79 break;
80 case MMIO_EXITS:
81 vcpu->stat.mmio_exits++;
82 break;
83 case ITLB_REAL_MISS_EXITS:
84 vcpu->stat.itlb_real_miss_exits++;
85 break;
86 case ITLB_VIRT_MISS_EXITS:
87 vcpu->stat.itlb_virt_miss_exits++;
88 break;
89 case SIGNAL_EXITS:
90 vcpu->stat.signal_exits++;
91 break;
92 }
93}
94
95/* wrapper to set exit time and account for it in kvm_stats */
96static inline void kvmppc_account_exit(struct kvm_vcpu *vcpu, int type)
97{
98 kvmppc_set_exit_type(vcpu, type);
99 kvmppc_account_exit_stat(vcpu, type);
100}
101
102#endif /* __POWERPC_KVM_EXITTIMING_H__ */
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 8b00eb2ddf57..be8497186b96 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -113,8 +113,6 @@ long kvm_arch_dev_ioctl(struct file *filp,
113int kvm_dev_ioctl_check_extension(long ext) 113int kvm_dev_ioctl_check_extension(long ext)
114{ 114{
115 switch (ext) { 115 switch (ext) {
116 case KVM_CAP_USER_MEMORY:
117 return 1;
118 default: 116 default:
119 return 0; 117 return 0;
120 } 118 }
@@ -185,8 +183,6 @@ struct kvm *kvm_arch_create_vm(void)
185 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 183 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
186 VM_EVENT(kvm, 3, "%s", "vm created"); 184 VM_EVENT(kvm, 3, "%s", "vm created");
187 185
188 try_module_get(THIS_MODULE);
189
190 return kvm; 186 return kvm;
191out_nodbf: 187out_nodbf:
192 free_page((unsigned long)(kvm->arch.sca)); 188 free_page((unsigned long)(kvm->arch.sca));
@@ -196,13 +192,33 @@ out_nokvm:
196 return ERR_PTR(rc); 192 return ERR_PTR(rc);
197} 193}
198 194
195void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
196{
197 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
198 free_page((unsigned long)(vcpu->arch.sie_block));
199 kvm_vcpu_uninit(vcpu);
200 kfree(vcpu);
201}
202
203static void kvm_free_vcpus(struct kvm *kvm)
204{
205 unsigned int i;
206
207 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
208 if (kvm->vcpus[i]) {
209 kvm_arch_vcpu_destroy(kvm->vcpus[i]);
210 kvm->vcpus[i] = NULL;
211 }
212 }
213}
214
199void kvm_arch_destroy_vm(struct kvm *kvm) 215void kvm_arch_destroy_vm(struct kvm *kvm)
200{ 216{
201 debug_unregister(kvm->arch.dbf); 217 kvm_free_vcpus(kvm);
202 kvm_free_physmem(kvm); 218 kvm_free_physmem(kvm);
203 free_page((unsigned long)(kvm->arch.sca)); 219 free_page((unsigned long)(kvm->arch.sca));
220 debug_unregister(kvm->arch.dbf);
204 kfree(kvm); 221 kfree(kvm);
205 module_put(THIS_MODULE);
206} 222}
207 223
208/* Section: vcpu related */ 224/* Section: vcpu related */
@@ -213,8 +229,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
213 229
214void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 230void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
215{ 231{
216 /* kvm common code refers to this, but does'nt call it */ 232 /* Nothing todo */
217 BUG();
218} 233}
219 234
220void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 235void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -308,8 +323,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
308 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, 323 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
309 vcpu->arch.sie_block); 324 vcpu->arch.sie_block);
310 325
311 try_module_get(THIS_MODULE);
312
313 return vcpu; 326 return vcpu;
314out_free_cpu: 327out_free_cpu:
315 kfree(vcpu); 328 kfree(vcpu);
@@ -317,14 +330,6 @@ out_nomem:
317 return ERR_PTR(rc); 330 return ERR_PTR(rc);
318} 331}
319 332
320void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
321{
322 VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
323 free_page((unsigned long)(vcpu->arch.sie_block));
324 kfree(vcpu);
325 module_put(THIS_MODULE);
326}
327
328int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 333int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
329{ 334{
330 /* kvm common code refers to this, but never calls it */ 335 /* kvm common code refers to this, but never calls it */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8346be87cfa1..97215a458e5f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -21,6 +21,7 @@
21 21
22#include <asm/pvclock-abi.h> 22#include <asm/pvclock-abi.h>
23#include <asm/desc.h> 23#include <asm/desc.h>
24#include <asm/mtrr.h>
24 25
25#define KVM_MAX_VCPUS 16 26#define KVM_MAX_VCPUS 16
26#define KVM_MEMORY_SLOTS 32 27#define KVM_MEMORY_SLOTS 32
@@ -86,6 +87,7 @@
86#define KVM_MIN_FREE_MMU_PAGES 5 87#define KVM_MIN_FREE_MMU_PAGES 5
87#define KVM_REFILL_PAGES 25 88#define KVM_REFILL_PAGES 25
88#define KVM_MAX_CPUID_ENTRIES 40 89#define KVM_MAX_CPUID_ENTRIES 40
90#define KVM_NR_FIXED_MTRR_REGION 88
89#define KVM_NR_VAR_MTRR 8 91#define KVM_NR_VAR_MTRR 8
90 92
91extern spinlock_t kvm_lock; 93extern spinlock_t kvm_lock;
@@ -180,6 +182,8 @@ struct kvm_mmu_page {
180 struct list_head link; 182 struct list_head link;
181 struct hlist_node hash_link; 183 struct hlist_node hash_link;
182 184
185 struct list_head oos_link;
186
183 /* 187 /*
184 * The following two entries are used to key the shadow page in the 188 * The following two entries are used to key the shadow page in the
185 * hash table. 189 * hash table.
@@ -190,13 +194,16 @@ struct kvm_mmu_page {
190 u64 *spt; 194 u64 *spt;
191 /* hold the gfn of each spte inside spt */ 195 /* hold the gfn of each spte inside spt */
192 gfn_t *gfns; 196 gfn_t *gfns;
193 unsigned long slot_bitmap; /* One bit set per slot which has memory 197 /*
194 * in this shadow page. 198 * One bit set per slot which has memory
195 */ 199 * in this shadow page.
200 */
201 DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
196 int multimapped; /* More than one parent_pte? */ 202 int multimapped; /* More than one parent_pte? */
197 int root_count; /* Currently serving as active root */ 203 int root_count; /* Currently serving as active root */
198 bool unsync; 204 bool unsync;
199 bool unsync_children; 205 bool global;
206 unsigned int unsync_children;
200 union { 207 union {
201 u64 *parent_pte; /* !multimapped */ 208 u64 *parent_pte; /* !multimapped */
202 struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ 209 struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
@@ -327,8 +334,10 @@ struct kvm_vcpu_arch {
327 334
328 bool nmi_pending; 335 bool nmi_pending;
329 bool nmi_injected; 336 bool nmi_injected;
337 bool nmi_window_open;
330 338
331 u64 mtrr[0x100]; 339 struct mtrr_state_type mtrr_state;
340 u32 pat;
332}; 341};
333 342
334struct kvm_mem_alias { 343struct kvm_mem_alias {
@@ -350,11 +359,13 @@ struct kvm_arch{
350 */ 359 */
351 struct list_head active_mmu_pages; 360 struct list_head active_mmu_pages;
352 struct list_head assigned_dev_head; 361 struct list_head assigned_dev_head;
362 struct list_head oos_global_pages;
353 struct dmar_domain *intel_iommu_domain; 363 struct dmar_domain *intel_iommu_domain;
354 struct kvm_pic *vpic; 364 struct kvm_pic *vpic;
355 struct kvm_ioapic *vioapic; 365 struct kvm_ioapic *vioapic;
356 struct kvm_pit *vpit; 366 struct kvm_pit *vpit;
357 struct hlist_head irq_ack_notifier_list; 367 struct hlist_head irq_ack_notifier_list;
368 int vapics_in_nmi_mode;
358 369
359 int round_robin_prev_vcpu; 370 int round_robin_prev_vcpu;
360 unsigned int tss_addr; 371 unsigned int tss_addr;
@@ -378,6 +389,7 @@ struct kvm_vm_stat {
378 u32 mmu_recycled; 389 u32 mmu_recycled;
379 u32 mmu_cache_miss; 390 u32 mmu_cache_miss;
380 u32 mmu_unsync; 391 u32 mmu_unsync;
392 u32 mmu_unsync_global;
381 u32 remote_tlb_flush; 393 u32 remote_tlb_flush;
382 u32 lpages; 394 u32 lpages;
383}; 395};
@@ -397,6 +409,7 @@ struct kvm_vcpu_stat {
397 u32 halt_exits; 409 u32 halt_exits;
398 u32 halt_wakeup; 410 u32 halt_wakeup;
399 u32 request_irq_exits; 411 u32 request_irq_exits;
412 u32 request_nmi_exits;
400 u32 irq_exits; 413 u32 irq_exits;
401 u32 host_state_reload; 414 u32 host_state_reload;
402 u32 efer_reload; 415 u32 efer_reload;
@@ -405,6 +418,7 @@ struct kvm_vcpu_stat {
405 u32 insn_emulation_fail; 418 u32 insn_emulation_fail;
406 u32 hypercalls; 419 u32 hypercalls;
407 u32 irq_injections; 420 u32 irq_injections;
421 u32 nmi_injections;
408}; 422};
409 423
410struct descriptor_table { 424struct descriptor_table {
@@ -477,6 +491,7 @@ struct kvm_x86_ops {
477 491
478 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); 492 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
479 int (*get_tdp_level)(void); 493 int (*get_tdp_level)(void);
494 int (*get_mt_mask_shift)(void);
480}; 495};
481 496
482extern struct kvm_x86_ops *kvm_x86_ops; 497extern struct kvm_x86_ops *kvm_x86_ops;
@@ -490,7 +505,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu);
490void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); 505void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
491void kvm_mmu_set_base_ptes(u64 base_pte); 506void kvm_mmu_set_base_ptes(u64 base_pte);
492void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 507void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
493 u64 dirty_mask, u64 nx_mask, u64 x_mask); 508 u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask);
494 509
495int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 510int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
496void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); 511void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
@@ -587,12 +602,14 @@ unsigned long segment_base(u16 selector);
587 602
588void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); 603void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
589void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 604void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
590 const u8 *new, int bytes); 605 const u8 *new, int bytes,
606 bool guest_initiated);
591int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); 607int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
592void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); 608void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
593int kvm_mmu_load(struct kvm_vcpu *vcpu); 609int kvm_mmu_load(struct kvm_vcpu *vcpu);
594void kvm_mmu_unload(struct kvm_vcpu *vcpu); 610void kvm_mmu_unload(struct kvm_vcpu *vcpu);
595void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); 611void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
612void kvm_mmu_sync_global(struct kvm_vcpu *vcpu);
596 613
597int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); 614int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
598 615
@@ -607,6 +624,8 @@ void kvm_disable_tdp(void);
607int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); 624int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
608int complete_pio(struct kvm_vcpu *vcpu); 625int complete_pio(struct kvm_vcpu *vcpu);
609 626
627struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn);
628
610static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) 629static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
611{ 630{
612 struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); 631 struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
@@ -702,18 +721,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
702 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); 721 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
703} 722}
704 723
705#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30"
706#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2"
707#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3"
708#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30"
709#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0"
710#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0"
711#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
712#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
713#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
714#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
715#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
716
717#define MSR_IA32_TIME_STAMP_COUNTER 0x010 724#define MSR_IA32_TIME_STAMP_COUNTER 0x010
718 725
719#define TSS_IOPB_BASE_OFFSET 0x66 726#define TSS_IOPB_BASE_OFFSET 0x66
diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h
index 25179a29f208..6a159732881a 100644
--- a/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/arch/x86/include/asm/kvm_x86_emulate.h
@@ -123,6 +123,7 @@ struct decode_cache {
123 u8 ad_bytes; 123 u8 ad_bytes;
124 u8 rex_prefix; 124 u8 rex_prefix;
125 struct operand src; 125 struct operand src;
126 struct operand src2;
126 struct operand dst; 127 struct operand dst;
127 bool has_seg_override; 128 bool has_seg_override;
128 u8 seg_override; 129 u8 seg_override;
@@ -146,22 +147,18 @@ struct x86_emulate_ctxt {
146 /* Register state before/after emulation. */ 147 /* Register state before/after emulation. */
147 struct kvm_vcpu *vcpu; 148 struct kvm_vcpu *vcpu;
148 149
149 /* Linear faulting address (if emulating a page-faulting instruction) */
150 unsigned long eflags; 150 unsigned long eflags;
151
152 /* Emulated execution mode, represented by an X86EMUL_MODE value. */ 151 /* Emulated execution mode, represented by an X86EMUL_MODE value. */
153 int mode; 152 int mode;
154
155 u32 cs_base; 153 u32 cs_base;
156 154
157 /* decode cache */ 155 /* decode cache */
158
159 struct decode_cache decode; 156 struct decode_cache decode;
160}; 157};
161 158
162/* Repeat String Operation Prefix */ 159/* Repeat String Operation Prefix */
163#define REPE_PREFIX 1 160#define REPE_PREFIX 1
164#define REPNE_PREFIX 2 161#define REPNE_PREFIX 2
165 162
166/* Execution mode, passed to the emulator. */ 163/* Execution mode, passed to the emulator. */
167#define X86EMUL_MODE_REAL 0 /* Real mode. */ 164#define X86EMUL_MODE_REAL 0 /* Real mode. */
@@ -170,7 +167,7 @@ struct x86_emulate_ctxt {
170#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ 167#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */
171 168
172/* Host execution mode. */ 169/* Host execution mode. */
173#if defined(__i386__) 170#if defined(CONFIG_X86_32)
174#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 171#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
175#elif defined(CONFIG_X86_64) 172#elif defined(CONFIG_X86_64)
176#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 173#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index 7c1e4258b31e..cb988aab716d 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -57,6 +57,31 @@ struct mtrr_gentry {
57}; 57};
58#endif /* !__i386__ */ 58#endif /* !__i386__ */
59 59
60struct mtrr_var_range {
61 u32 base_lo;
62 u32 base_hi;
63 u32 mask_lo;
64 u32 mask_hi;
65};
66
67/* In the Intel processor's MTRR interface, the MTRR type is always held in
68 an 8 bit field: */
69typedef u8 mtrr_type;
70
71#define MTRR_NUM_FIXED_RANGES 88
72#define MTRR_MAX_VAR_RANGES 256
73
74struct mtrr_state_type {
75 struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
76 mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
77 unsigned char enabled;
78 unsigned char have_fixed;
79 mtrr_type def_type;
80};
81
82#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
83#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
84
60/* These are the various ioctls */ 85/* These are the various ioctls */
61#define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry) 86#define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry)
62#define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry) 87#define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry)
diff --git a/arch/x86/kvm/svm.h b/arch/x86/include/asm/svm.h
index 1b8afa78e869..1b8afa78e869 100644
--- a/arch/x86/kvm/svm.h
+++ b/arch/x86/include/asm/svm.h
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
new file mode 100644
index 000000000000..593636275238
--- /dev/null
+++ b/arch/x86/include/asm/virtext.h
@@ -0,0 +1,132 @@
1/* CPU virtualization extensions handling
2 *
3 * This should carry the code for handling CPU virtualization extensions
4 * that needs to live in the kernel core.
5 *
6 * Author: Eduardo Habkost <ehabkost@redhat.com>
7 *
8 * Copyright (C) 2008, Red Hat Inc.
9 *
10 * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc.
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2. See
13 * the COPYING file in the top-level directory.
14 */
15#ifndef _ASM_X86_VIRTEX_H
16#define _ASM_X86_VIRTEX_H
17
18#include <asm/processor.h>
19#include <asm/system.h>
20
21#include <asm/vmx.h>
22#include <asm/svm.h>
23
24/*
25 * VMX functions:
26 */
27
28static inline int cpu_has_vmx(void)
29{
30 unsigned long ecx = cpuid_ecx(1);
31 return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
32}
33
34
35/** Disable VMX on the current CPU
36 *
37 * vmxoff causes a undefined-opcode exception if vmxon was not run
38 * on the CPU previously. Only call this function if you know VMX
39 * is enabled.
40 */
41static inline void cpu_vmxoff(void)
42{
43 asm volatile (ASM_VMX_VMXOFF : : : "cc");
44 write_cr4(read_cr4() & ~X86_CR4_VMXE);
45}
46
47static inline int cpu_vmx_enabled(void)
48{
49 return read_cr4() & X86_CR4_VMXE;
50}
51
52/** Disable VMX if it is enabled on the current CPU
53 *
54 * You shouldn't call this if cpu_has_vmx() returns 0.
55 */
56static inline void __cpu_emergency_vmxoff(void)
57{
58 if (cpu_vmx_enabled())
59 cpu_vmxoff();
60}
61
62/** Disable VMX if it is supported and enabled on the current CPU
63 */
64static inline void cpu_emergency_vmxoff(void)
65{
66 if (cpu_has_vmx())
67 __cpu_emergency_vmxoff();
68}
69
70
71
72
73/*
74 * SVM functions:
75 */
76
77/** Check if the CPU has SVM support
78 *
79 * You can use the 'msg' arg to get a message describing the problem,
80 * if the function returns zero. Simply pass NULL if you are not interested
81 * on the messages; gcc should take care of not generating code for
82 * the messages on this case.
83 */
84static inline int cpu_has_svm(const char **msg)
85{
86 uint32_t eax, ebx, ecx, edx;
87
88 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
89 if (msg)
90 *msg = "not amd";
91 return 0;
92 }
93
94 cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
95 if (eax < SVM_CPUID_FUNC) {
96 if (msg)
97 *msg = "can't execute cpuid_8000000a";
98 return 0;
99 }
100
101 cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
102 if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
103 if (msg)
104 *msg = "svm not available";
105 return 0;
106 }
107 return 1;
108}
109
110
111/** Disable SVM on the current CPU
112 *
113 * You should call this only if cpu_has_svm() returned true.
114 */
115static inline void cpu_svm_disable(void)
116{
117 uint64_t efer;
118
119 wrmsrl(MSR_VM_HSAVE_PA, 0);
120 rdmsrl(MSR_EFER, efer);
121 wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
122}
123
124/** Makes sure SVM is disabled, if it is supported on the CPU
125 */
126static inline void cpu_emergency_svm_disable(void)
127{
128 if (cpu_has_svm(NULL))
129 cpu_svm_disable();
130}
131
132#endif /* _ASM_X86_VIRTEX_H */
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/include/asm/vmx.h
index ec5edc339da6..d0238e6151d8 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -63,10 +63,13 @@
63 63
64#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 64#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
65#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 65#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
66#define VM_EXIT_SAVE_IA32_PAT 0x00040000
67#define VM_EXIT_LOAD_IA32_PAT 0x00080000
66 68
67#define VM_ENTRY_IA32E_MODE 0x00000200 69#define VM_ENTRY_IA32E_MODE 0x00000200
68#define VM_ENTRY_SMM 0x00000400 70#define VM_ENTRY_SMM 0x00000400
69#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 71#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
72#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
70 73
71/* VMCS Encodings */ 74/* VMCS Encodings */
72enum vmcs_field { 75enum vmcs_field {
@@ -112,6 +115,8 @@ enum vmcs_field {
112 VMCS_LINK_POINTER_HIGH = 0x00002801, 115 VMCS_LINK_POINTER_HIGH = 0x00002801,
113 GUEST_IA32_DEBUGCTL = 0x00002802, 116 GUEST_IA32_DEBUGCTL = 0x00002802,
114 GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, 117 GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
118 GUEST_IA32_PAT = 0x00002804,
119 GUEST_IA32_PAT_HIGH = 0x00002805,
115 GUEST_PDPTR0 = 0x0000280a, 120 GUEST_PDPTR0 = 0x0000280a,
116 GUEST_PDPTR0_HIGH = 0x0000280b, 121 GUEST_PDPTR0_HIGH = 0x0000280b,
117 GUEST_PDPTR1 = 0x0000280c, 122 GUEST_PDPTR1 = 0x0000280c,
@@ -120,6 +125,8 @@ enum vmcs_field {
120 GUEST_PDPTR2_HIGH = 0x0000280f, 125 GUEST_PDPTR2_HIGH = 0x0000280f,
121 GUEST_PDPTR3 = 0x00002810, 126 GUEST_PDPTR3 = 0x00002810,
122 GUEST_PDPTR3_HIGH = 0x00002811, 127 GUEST_PDPTR3_HIGH = 0x00002811,
128 HOST_IA32_PAT = 0x00002c00,
129 HOST_IA32_PAT_HIGH = 0x00002c01,
123 PIN_BASED_VM_EXEC_CONTROL = 0x00004000, 130 PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
124 CPU_BASED_VM_EXEC_CONTROL = 0x00004002, 131 CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
125 EXCEPTION_BITMAP = 0x00004004, 132 EXCEPTION_BITMAP = 0x00004004,
@@ -331,8 +338,9 @@ enum vmcs_field {
331 338
332#define AR_RESERVD_MASK 0xfffe0f00 339#define AR_RESERVD_MASK 0xfffe0f00
333 340
334#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 341#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0)
335#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 342#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1)
343#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2)
336 344
337#define VMX_NR_VPIDS (1 << 16) 345#define VMX_NR_VPIDS (1 << 16)
338#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 346#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
@@ -356,4 +364,19 @@ enum vmcs_field {
356 364
357#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul 365#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
358 366
367
368#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30"
369#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2"
370#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3"
371#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30"
372#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0"
373#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0"
374#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
375#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
376#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
377#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
378#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
379
380
381
359#endif 382#endif
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 4e8d77f01eeb..b59ddcc88cd8 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -14,14 +14,6 @@
14#include <asm/pat.h> 14#include <asm/pat.h>
15#include "mtrr.h" 15#include "mtrr.h"
16 16
17struct mtrr_state {
18 struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
19 mtrr_type fixed_ranges[NUM_FIXED_RANGES];
20 unsigned char enabled;
21 unsigned char have_fixed;
22 mtrr_type def_type;
23};
24
25struct fixed_range_block { 17struct fixed_range_block {
26 int base_msr; /* start address of an MTRR block */ 18 int base_msr; /* start address of an MTRR block */
27 int ranges; /* number of MTRRs in this block */ 19 int ranges; /* number of MTRRs in this block */
@@ -35,10 +27,12 @@ static struct fixed_range_block fixed_range_blocks[] = {
35}; 27};
36 28
37static unsigned long smp_changes_mask; 29static unsigned long smp_changes_mask;
38static struct mtrr_state mtrr_state = {};
39static int mtrr_state_set; 30static int mtrr_state_set;
40u64 mtrr_tom2; 31u64 mtrr_tom2;
41 32
33struct mtrr_state_type mtrr_state = {};
34EXPORT_SYMBOL_GPL(mtrr_state);
35
42#undef MODULE_PARAM_PREFIX 36#undef MODULE_PARAM_PREFIX
43#define MODULE_PARAM_PREFIX "mtrr." 37#define MODULE_PARAM_PREFIX "mtrr."
44 38
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 1159e269e596..d6ec7ec30274 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -49,7 +49,7 @@
49 49
50u32 num_var_ranges = 0; 50u32 num_var_ranges = 0;
51 51
52unsigned int mtrr_usage_table[MAX_VAR_RANGES]; 52unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
53static DEFINE_MUTEX(mtrr_mutex); 53static DEFINE_MUTEX(mtrr_mutex);
54 54
55u64 size_or_mask, size_and_mask; 55u64 size_or_mask, size_and_mask;
@@ -574,7 +574,7 @@ struct mtrr_value {
574 unsigned long lsize; 574 unsigned long lsize;
575}; 575};
576 576
577static struct mtrr_value mtrr_state[MAX_VAR_RANGES]; 577static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES];
578 578
579static int mtrr_save(struct sys_device * sysdev, pm_message_t state) 579static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
580{ 580{
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 2dc4ec656b23..ffd60409cc6d 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -8,11 +8,6 @@
8#define MTRRcap_MSR 0x0fe 8#define MTRRcap_MSR 0x0fe
9#define MTRRdefType_MSR 0x2ff 9#define MTRRdefType_MSR 0x2ff
10 10
11#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
12#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
13
14#define NUM_FIXED_RANGES 88
15#define MAX_VAR_RANGES 256
16#define MTRRfix64K_00000_MSR 0x250 11#define MTRRfix64K_00000_MSR 0x250
17#define MTRRfix16K_80000_MSR 0x258 12#define MTRRfix16K_80000_MSR 0x258
18#define MTRRfix16K_A0000_MSR 0x259 13#define MTRRfix16K_A0000_MSR 0x259
@@ -29,11 +24,7 @@
29#define MTRR_CHANGE_MASK_VARIABLE 0x02 24#define MTRR_CHANGE_MASK_VARIABLE 0x02
30#define MTRR_CHANGE_MASK_DEFTYPE 0x04 25#define MTRR_CHANGE_MASK_DEFTYPE 0x04
31 26
32/* In the Intel processor's MTRR interface, the MTRR type is always held in 27extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
33 an 8 bit field: */
34typedef u8 mtrr_type;
35
36extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
37 28
38struct mtrr_ops { 29struct mtrr_ops {
39 u32 vendor; 30 u32 vendor;
@@ -70,13 +61,6 @@ struct set_mtrr_context {
70 u32 ccr3; 61 u32 ccr3;
71}; 62};
72 63
73struct mtrr_var_range {
74 u32 base_lo;
75 u32 base_hi;
76 u32 mask_lo;
77 u32 mask_hi;
78};
79
80void set_mtrr_done(struct set_mtrr_context *ctxt); 64void set_mtrr_done(struct set_mtrr_context *ctxt);
81void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); 65void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
82void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); 66void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index d84a852e4cd7..c689d19e35ab 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -26,6 +26,7 @@
26#include <linux/kdebug.h> 26#include <linux/kdebug.h>
27#include <asm/smp.h> 27#include <asm/smp.h>
28#include <asm/reboot.h> 28#include <asm/reboot.h>
29#include <asm/virtext.h>
29 30
30#include <mach_ipi.h> 31#include <mach_ipi.h>
31 32
@@ -49,6 +50,15 @@ static void kdump_nmi_callback(int cpu, struct die_args *args)
49#endif 50#endif
50 crash_save_cpu(regs, cpu); 51 crash_save_cpu(regs, cpu);
51 52
53 /* Disable VMX or SVM if needed.
54 *
55 * We need to disable virtualization on all CPUs.
56 * Having VMX or SVM enabled on any CPU may break rebooting
57 * after the kdump kernel has finished its task.
58 */
59 cpu_emergency_vmxoff();
60 cpu_emergency_svm_disable();
61
52 disable_local_APIC(); 62 disable_local_APIC();
53} 63}
54 64
@@ -80,6 +90,14 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
80 local_irq_disable(); 90 local_irq_disable();
81 91
82 kdump_nmi_shootdown_cpus(); 92 kdump_nmi_shootdown_cpus();
93
94 /* Booting kdump kernel with VMX or SVM enabled won't work,
95 * because (among other limitations) we can't disable paging
96 * with the virt flags.
97 */
98 cpu_emergency_vmxoff();
99 cpu_emergency_svm_disable();
100
83 lapic_shutdown(); 101 lapic_shutdown();
84#if defined(CONFIG_X86_IO_APIC) 102#if defined(CONFIG_X86_IO_APIC)
85 disable_IO_APIC(); 103 disable_IO_APIC();
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index e169ae9b6a62..652fce6d2cce 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -89,17 +89,17 @@ static cycle_t kvm_clock_read(void)
89 */ 89 */
90static unsigned long kvm_get_tsc_khz(void) 90static unsigned long kvm_get_tsc_khz(void)
91{ 91{
92 return preset_lpj; 92 struct pvclock_vcpu_time_info *src;
93 src = &per_cpu(hv_clock, 0);
94 return pvclock_tsc_khz(src);
93} 95}
94 96
95static void kvm_get_preset_lpj(void) 97static void kvm_get_preset_lpj(void)
96{ 98{
97 struct pvclock_vcpu_time_info *src;
98 unsigned long khz; 99 unsigned long khz;
99 u64 lpj; 100 u64 lpj;
100 101
101 src = &per_cpu(hv_clock, 0); 102 khz = kvm_get_tsc_khz();
102 khz = pvclock_tsc_khz(src);
103 103
104 lpj = ((u64)khz * 1000); 104 lpj = ((u64)khz * 1000);
105 do_div(lpj, HZ); 105 do_div(lpj, HZ);
@@ -194,5 +194,7 @@ void __init kvmclock_init(void)
194#endif 194#endif
195 kvm_get_preset_lpj(); 195 kvm_get_preset_lpj();
196 clocksource_register(&kvm_clock); 196 clocksource_register(&kvm_clock);
197 pv_info.paravirt_enabled = 1;
198 pv_info.name = "KVM";
197 } 199 }
198} 200}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 61f718df6eec..72e0e4e712d6 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -12,6 +12,7 @@
12#include <asm/proto.h> 12#include <asm/proto.h>
13#include <asm/reboot_fixups.h> 13#include <asm/reboot_fixups.h>
14#include <asm/reboot.h> 14#include <asm/reboot.h>
15#include <asm/virtext.h>
15 16
16#ifdef CONFIG_X86_32 17#ifdef CONFIG_X86_32
17# include <linux/dmi.h> 18# include <linux/dmi.h>
@@ -39,6 +40,12 @@ int reboot_force;
39static int reboot_cpu = -1; 40static int reboot_cpu = -1;
40#endif 41#endif
41 42
43/* This is set if we need to go through the 'emergency' path.
44 * When machine_emergency_restart() is called, we may be on
45 * an inconsistent state and won't be able to do a clean cleanup
46 */
47static int reboot_emergency;
48
42/* This is set by the PCI code if either type 1 or type 2 PCI is detected */ 49/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
43bool port_cf9_safe = false; 50bool port_cf9_safe = false;
44 51
@@ -368,6 +375,48 @@ static inline void kb_wait(void)
368 } 375 }
369} 376}
370 377
378static void vmxoff_nmi(int cpu, struct die_args *args)
379{
380 cpu_emergency_vmxoff();
381}
382
383/* Use NMIs as IPIs to tell all CPUs to disable virtualization
384 */
385static void emergency_vmx_disable_all(void)
386{
387 /* Just make sure we won't change CPUs while doing this */
388 local_irq_disable();
389
390 /* We need to disable VMX on all CPUs before rebooting, otherwise
391 * we risk hanging up the machine, because the CPU ignore INIT
392 * signals when VMX is enabled.
393 *
394 * We can't take any locks and we may be on an inconsistent
395 * state, so we use NMIs as IPIs to tell the other CPUs to disable
396 * VMX and halt.
397 *
398 * For safety, we will avoid running the nmi_shootdown_cpus()
399 * stuff unnecessarily, but we don't have a way to check
400 * if other CPUs have VMX enabled. So we will call it only if the
401 * CPU we are running on has VMX enabled.
402 *
403 * We will miss cases where VMX is not enabled on all CPUs. This
404 * shouldn't do much harm because KVM always enable VMX on all
405 * CPUs anyway. But we can miss it on the small window where KVM
406 * is still enabling VMX.
407 */
408 if (cpu_has_vmx() && cpu_vmx_enabled()) {
409 /* Disable VMX on this CPU.
410 */
411 cpu_vmxoff();
412
413 /* Halt and disable VMX on the other CPUs */
414 nmi_shootdown_cpus(vmxoff_nmi);
415
416 }
417}
418
419
371void __attribute__((weak)) mach_reboot_fixups(void) 420void __attribute__((weak)) mach_reboot_fixups(void)
372{ 421{
373} 422}
@@ -376,6 +425,9 @@ static void native_machine_emergency_restart(void)
376{ 425{
377 int i; 426 int i;
378 427
428 if (reboot_emergency)
429 emergency_vmx_disable_all();
430
379 /* Tell the BIOS if we want cold or warm reboot */ 431 /* Tell the BIOS if we want cold or warm reboot */
380 *((unsigned short *)__va(0x472)) = reboot_mode; 432 *((unsigned short *)__va(0x472)) = reboot_mode;
381 433
@@ -482,13 +534,19 @@ void native_machine_shutdown(void)
482#endif 534#endif
483} 535}
484 536
537static void __machine_emergency_restart(int emergency)
538{
539 reboot_emergency = emergency;
540 machine_ops.emergency_restart();
541}
542
485static void native_machine_restart(char *__unused) 543static void native_machine_restart(char *__unused)
486{ 544{
487 printk("machine restart\n"); 545 printk("machine restart\n");
488 546
489 if (!reboot_force) 547 if (!reboot_force)
490 machine_shutdown(); 548 machine_shutdown();
491 machine_emergency_restart(); 549 __machine_emergency_restart(0);
492} 550}
493 551
494static void native_machine_halt(void) 552static void native_machine_halt(void)
@@ -532,7 +590,7 @@ void machine_shutdown(void)
532 590
533void machine_emergency_restart(void) 591void machine_emergency_restart(void)
534{ 592{
535 machine_ops.emergency_restart(); 593 __machine_emergency_restart(1);
536} 594}
537 595
538void machine_restart(char *cmd) 596void machine_restart(char *cmd)
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 59ebd37ad79e..e665d1c623ca 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -603,10 +603,29 @@ void kvm_free_pit(struct kvm *kvm)
603 603
604static void __inject_pit_timer_intr(struct kvm *kvm) 604static void __inject_pit_timer_intr(struct kvm *kvm)
605{ 605{
606 struct kvm_vcpu *vcpu;
607 int i;
608
606 mutex_lock(&kvm->lock); 609 mutex_lock(&kvm->lock);
607 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); 610 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
608 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); 611 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
609 mutex_unlock(&kvm->lock); 612 mutex_unlock(&kvm->lock);
613
614 /*
615 * Provides NMI watchdog support via Virtual Wire mode.
616 * The route is: PIT -> PIC -> LVT0 in NMI mode.
617 *
618 * Note: Our Virtual Wire implementation is simplified, only
619 * propagating PIT interrupts to all VCPUs when they have set
620 * LVT0 to NMI delivery. Other PIC interrupts are just sent to
621 * VCPU0, and only if its LVT0 is in EXTINT mode.
622 */
623 if (kvm->arch.vapics_in_nmi_mode > 0)
624 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
625 vcpu = kvm->vcpus[i];
626 if (vcpu)
627 kvm_apic_nmi_wd_deliver(vcpu);
628 }
610} 629}
611 630
612void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) 631void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 17e41e165f1a..179dcb0103fd 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -26,10 +26,40 @@
26 * Port from Qemu. 26 * Port from Qemu.
27 */ 27 */
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/bitops.h>
29#include "irq.h" 30#include "irq.h"
30 31
31#include <linux/kvm_host.h> 32#include <linux/kvm_host.h>
32 33
34static void pic_lock(struct kvm_pic *s)
35{
36 spin_lock(&s->lock);
37}
38
39static void pic_unlock(struct kvm_pic *s)
40{
41 struct kvm *kvm = s->kvm;
42 unsigned acks = s->pending_acks;
43 bool wakeup = s->wakeup_needed;
44 struct kvm_vcpu *vcpu;
45
46 s->pending_acks = 0;
47 s->wakeup_needed = false;
48
49 spin_unlock(&s->lock);
50
51 while (acks) {
52 kvm_notify_acked_irq(kvm, __ffs(acks));
53 acks &= acks - 1;
54 }
55
56 if (wakeup) {
57 vcpu = s->kvm->vcpus[0];
58 if (vcpu)
59 kvm_vcpu_kick(vcpu);
60 }
61}
62
33static void pic_clear_isr(struct kvm_kpic_state *s, int irq) 63static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
34{ 64{
35 s->isr &= ~(1 << irq); 65 s->isr &= ~(1 << irq);
@@ -136,17 +166,21 @@ static void pic_update_irq(struct kvm_pic *s)
136 166
137void kvm_pic_update_irq(struct kvm_pic *s) 167void kvm_pic_update_irq(struct kvm_pic *s)
138{ 168{
169 pic_lock(s);
139 pic_update_irq(s); 170 pic_update_irq(s);
171 pic_unlock(s);
140} 172}
141 173
142void kvm_pic_set_irq(void *opaque, int irq, int level) 174void kvm_pic_set_irq(void *opaque, int irq, int level)
143{ 175{
144 struct kvm_pic *s = opaque; 176 struct kvm_pic *s = opaque;
145 177
178 pic_lock(s);
146 if (irq >= 0 && irq < PIC_NUM_PINS) { 179 if (irq >= 0 && irq < PIC_NUM_PINS) {
147 pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); 180 pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
148 pic_update_irq(s); 181 pic_update_irq(s);
149 } 182 }
183 pic_unlock(s);
150} 184}
151 185
152/* 186/*
@@ -172,6 +206,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
172 int irq, irq2, intno; 206 int irq, irq2, intno;
173 struct kvm_pic *s = pic_irqchip(kvm); 207 struct kvm_pic *s = pic_irqchip(kvm);
174 208
209 pic_lock(s);
175 irq = pic_get_irq(&s->pics[0]); 210 irq = pic_get_irq(&s->pics[0]);
176 if (irq >= 0) { 211 if (irq >= 0) {
177 pic_intack(&s->pics[0], irq); 212 pic_intack(&s->pics[0], irq);
@@ -196,6 +231,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
196 intno = s->pics[0].irq_base + irq; 231 intno = s->pics[0].irq_base + irq;
197 } 232 }
198 pic_update_irq(s); 233 pic_update_irq(s);
234 pic_unlock(s);
199 kvm_notify_acked_irq(kvm, irq); 235 kvm_notify_acked_irq(kvm, irq);
200 236
201 return intno; 237 return intno;
@@ -203,7 +239,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
203 239
204void kvm_pic_reset(struct kvm_kpic_state *s) 240void kvm_pic_reset(struct kvm_kpic_state *s)
205{ 241{
206 int irq, irqbase; 242 int irq, irqbase, n;
207 struct kvm *kvm = s->pics_state->irq_request_opaque; 243 struct kvm *kvm = s->pics_state->irq_request_opaque;
208 struct kvm_vcpu *vcpu0 = kvm->vcpus[0]; 244 struct kvm_vcpu *vcpu0 = kvm->vcpus[0];
209 245
@@ -214,8 +250,10 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
214 250
215 for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { 251 for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
216 if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) 252 if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
217 if (s->irr & (1 << irq) || s->isr & (1 << irq)) 253 if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
218 kvm_notify_acked_irq(kvm, irq+irqbase); 254 n = irq + irqbase;
255 s->pics_state->pending_acks |= 1 << n;
256 }
219 } 257 }
220 s->last_irr = 0; 258 s->last_irr = 0;
221 s->irr = 0; 259 s->irr = 0;
@@ -406,6 +444,7 @@ static void picdev_write(struct kvm_io_device *this,
406 printk(KERN_ERR "PIC: non byte write\n"); 444 printk(KERN_ERR "PIC: non byte write\n");
407 return; 445 return;
408 } 446 }
447 pic_lock(s);
409 switch (addr) { 448 switch (addr) {
410 case 0x20: 449 case 0x20:
411 case 0x21: 450 case 0x21:
@@ -418,6 +457,7 @@ static void picdev_write(struct kvm_io_device *this,
418 elcr_ioport_write(&s->pics[addr & 1], addr, data); 457 elcr_ioport_write(&s->pics[addr & 1], addr, data);
419 break; 458 break;
420 } 459 }
460 pic_unlock(s);
421} 461}
422 462
423static void picdev_read(struct kvm_io_device *this, 463static void picdev_read(struct kvm_io_device *this,
@@ -431,6 +471,7 @@ static void picdev_read(struct kvm_io_device *this,
431 printk(KERN_ERR "PIC: non byte read\n"); 471 printk(KERN_ERR "PIC: non byte read\n");
432 return; 472 return;
433 } 473 }
474 pic_lock(s);
434 switch (addr) { 475 switch (addr) {
435 case 0x20: 476 case 0x20:
436 case 0x21: 477 case 0x21:
@@ -444,6 +485,7 @@ static void picdev_read(struct kvm_io_device *this,
444 break; 485 break;
445 } 486 }
446 *(unsigned char *)val = data; 487 *(unsigned char *)val = data;
488 pic_unlock(s);
447} 489}
448 490
449/* 491/*
@@ -459,7 +501,7 @@ static void pic_irq_request(void *opaque, int level)
459 s->output = level; 501 s->output = level;
460 if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { 502 if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) {
461 s->pics[0].isr_ack &= ~(1 << irq); 503 s->pics[0].isr_ack &= ~(1 << irq);
462 kvm_vcpu_kick(vcpu); 504 s->wakeup_needed = true;
463 } 505 }
464} 506}
465 507
@@ -469,6 +511,8 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
469 s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); 511 s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
470 if (!s) 512 if (!s)
471 return NULL; 513 return NULL;
514 spin_lock_init(&s->lock);
515 s->kvm = kvm;
472 s->pics[0].elcr_mask = 0xf8; 516 s->pics[0].elcr_mask = 0xf8;
473 s->pics[1].elcr_mask = 0xde; 517 s->pics[1].elcr_mask = 0xde;
474 s->irq_request = pic_irq_request; 518 s->irq_request = pic_irq_request;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index f17c8f5bbf31..2bf32a03ceec 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -25,6 +25,7 @@
25#include <linux/mm_types.h> 25#include <linux/mm_types.h>
26#include <linux/hrtimer.h> 26#include <linux/hrtimer.h>
27#include <linux/kvm_host.h> 27#include <linux/kvm_host.h>
28#include <linux/spinlock.h>
28 29
29#include "iodev.h" 30#include "iodev.h"
30#include "ioapic.h" 31#include "ioapic.h"
@@ -59,6 +60,10 @@ struct kvm_kpic_state {
59}; 60};
60 61
61struct kvm_pic { 62struct kvm_pic {
63 spinlock_t lock;
64 bool wakeup_needed;
65 unsigned pending_acks;
66 struct kvm *kvm;
62 struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ 67 struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
63 irq_request_func *irq_request; 68 irq_request_func *irq_request;
64 void *irq_request_opaque; 69 void *irq_request_opaque;
@@ -87,6 +92,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s);
87void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); 92void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
88void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); 93void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
89void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); 94void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
95void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
90void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); 96void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
91void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); 97void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
92void __kvm_migrate_timers(struct kvm_vcpu *vcpu); 98void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index 65ef0fc2c036..8e5ee99551f6 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -7,7 +7,7 @@
7#include <linux/kvm_host.h> 7#include <linux/kvm_host.h>
8#include <asm/msr.h> 8#include <asm/msr.h>
9 9
10#include "svm.h" 10#include <asm/svm.h>
11 11
12static const u32 host_save_user_msrs[] = { 12static const u32 host_save_user_msrs[] = {
13#ifdef CONFIG_X86_64 13#ifdef CONFIG_X86_64
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0fc3cab48943..afac68c0815c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -130,6 +130,11 @@ static inline int apic_lvtt_period(struct kvm_lapic *apic)
130 return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; 130 return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC;
131} 131}
132 132
133static inline int apic_lvt_nmi_mode(u32 lvt_val)
134{
135 return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
136}
137
133static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { 138static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
134 LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ 139 LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */
135 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ 140 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
@@ -354,6 +359,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
354 359
355 case APIC_DM_NMI: 360 case APIC_DM_NMI:
356 kvm_inject_nmi(vcpu); 361 kvm_inject_nmi(vcpu);
362 kvm_vcpu_kick(vcpu);
357 break; 363 break;
358 364
359 case APIC_DM_INIT: 365 case APIC_DM_INIT:
@@ -380,6 +386,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
380 } 386 }
381 break; 387 break;
382 388
389 case APIC_DM_EXTINT:
390 /*
391 * Should only be called by kvm_apic_local_deliver() with LVT0,
392 * before NMI watchdog was enabled. Already handled by
393 * kvm_apic_accept_pic_intr().
394 */
395 break;
396
383 default: 397 default:
384 printk(KERN_ERR "TODO: unsupported delivery mode %x\n", 398 printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
385 delivery_mode); 399 delivery_mode);
@@ -663,6 +677,20 @@ static void start_apic_timer(struct kvm_lapic *apic)
663 apic->timer.period))); 677 apic->timer.period)));
664} 678}
665 679
680static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
681{
682 int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0));
683
684 if (apic_lvt_nmi_mode(lvt0_val)) {
685 if (!nmi_wd_enabled) {
686 apic_debug("Receive NMI setting on APIC_LVT0 "
687 "for cpu %d\n", apic->vcpu->vcpu_id);
688 apic->vcpu->kvm->arch.vapics_in_nmi_mode++;
689 }
690 } else if (nmi_wd_enabled)
691 apic->vcpu->kvm->arch.vapics_in_nmi_mode--;
692}
693
666static void apic_mmio_write(struct kvm_io_device *this, 694static void apic_mmio_write(struct kvm_io_device *this,
667 gpa_t address, int len, const void *data) 695 gpa_t address, int len, const void *data)
668{ 696{
@@ -743,10 +771,11 @@ static void apic_mmio_write(struct kvm_io_device *this,
743 apic_set_reg(apic, APIC_ICR2, val & 0xff000000); 771 apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
744 break; 772 break;
745 773
774 case APIC_LVT0:
775 apic_manage_nmi_watchdog(apic, val);
746 case APIC_LVTT: 776 case APIC_LVTT:
747 case APIC_LVTTHMR: 777 case APIC_LVTTHMR:
748 case APIC_LVTPC: 778 case APIC_LVTPC:
749 case APIC_LVT0:
750 case APIC_LVT1: 779 case APIC_LVT1:
751 case APIC_LVTERR: 780 case APIC_LVTERR:
752 /* TODO: Check vector */ 781 /* TODO: Check vector */
@@ -961,12 +990,26 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
961 return 0; 990 return 0;
962} 991}
963 992
964static int __inject_apic_timer_irq(struct kvm_lapic *apic) 993static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
994{
995 u32 reg = apic_get_reg(apic, lvt_type);
996 int vector, mode, trig_mode;
997
998 if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
999 vector = reg & APIC_VECTOR_MASK;
1000 mode = reg & APIC_MODE_MASK;
1001 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
1002 return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
1003 }
1004 return 0;
1005}
1006
1007void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
965{ 1008{
966 int vector; 1009 struct kvm_lapic *apic = vcpu->arch.apic;
967 1010
968 vector = apic_lvt_vector(apic, APIC_LVTT); 1011 if (apic)
969 return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0); 1012 kvm_apic_local_deliver(apic, APIC_LVT0);
970} 1013}
971 1014
972static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) 1015static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
@@ -1061,9 +1104,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
1061{ 1104{
1062 struct kvm_lapic *apic = vcpu->arch.apic; 1105 struct kvm_lapic *apic = vcpu->arch.apic;
1063 1106
1064 if (apic && apic_lvt_enabled(apic, APIC_LVTT) && 1107 if (apic && atomic_read(&apic->timer.pending) > 0) {
1065 atomic_read(&apic->timer.pending) > 0) { 1108 if (kvm_apic_local_deliver(apic, APIC_LVTT))
1066 if (__inject_apic_timer_irq(apic))
1067 atomic_dec(&apic->timer.pending); 1109 atomic_dec(&apic->timer.pending);
1068 } 1110 }
1069} 1111}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 410ddbc1aa2e..83f11c7474a1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -17,7 +17,6 @@
17 * 17 *
18 */ 18 */
19 19
20#include "vmx.h"
21#include "mmu.h" 20#include "mmu.h"
22 21
23#include <linux/kvm_host.h> 22#include <linux/kvm_host.h>
@@ -33,6 +32,7 @@
33#include <asm/page.h> 32#include <asm/page.h>
34#include <asm/cmpxchg.h> 33#include <asm/cmpxchg.h>
35#include <asm/io.h> 34#include <asm/io.h>
35#include <asm/vmx.h>
36 36
37/* 37/*
38 * When setting this variable to true it enables Two-Dimensional-Paging 38 * When setting this variable to true it enables Two-Dimensional-Paging
@@ -168,6 +168,7 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
168static u64 __read_mostly shadow_user_mask; 168static u64 __read_mostly shadow_user_mask;
169static u64 __read_mostly shadow_accessed_mask; 169static u64 __read_mostly shadow_accessed_mask;
170static u64 __read_mostly shadow_dirty_mask; 170static u64 __read_mostly shadow_dirty_mask;
171static u64 __read_mostly shadow_mt_mask;
171 172
172void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) 173void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
173{ 174{
@@ -183,13 +184,14 @@ void kvm_mmu_set_base_ptes(u64 base_pte)
183EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); 184EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
184 185
185void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 186void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
186 u64 dirty_mask, u64 nx_mask, u64 x_mask) 187 u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask)
187{ 188{
188 shadow_user_mask = user_mask; 189 shadow_user_mask = user_mask;
189 shadow_accessed_mask = accessed_mask; 190 shadow_accessed_mask = accessed_mask;
190 shadow_dirty_mask = dirty_mask; 191 shadow_dirty_mask = dirty_mask;
191 shadow_nx_mask = nx_mask; 192 shadow_nx_mask = nx_mask;
192 shadow_x_mask = x_mask; 193 shadow_x_mask = x_mask;
194 shadow_mt_mask = mt_mask;
193} 195}
194EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); 196EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
195 197
@@ -384,7 +386,9 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
384{ 386{
385 int *write_count; 387 int *write_count;
386 388
387 write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); 389 gfn = unalias_gfn(kvm, gfn);
390 write_count = slot_largepage_idx(gfn,
391 gfn_to_memslot_unaliased(kvm, gfn));
388 *write_count += 1; 392 *write_count += 1;
389} 393}
390 394
@@ -392,16 +396,20 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
392{ 396{
393 int *write_count; 397 int *write_count;
394 398
395 write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); 399 gfn = unalias_gfn(kvm, gfn);
400 write_count = slot_largepage_idx(gfn,
401 gfn_to_memslot_unaliased(kvm, gfn));
396 *write_count -= 1; 402 *write_count -= 1;
397 WARN_ON(*write_count < 0); 403 WARN_ON(*write_count < 0);
398} 404}
399 405
400static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn) 406static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn)
401{ 407{
402 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); 408 struct kvm_memory_slot *slot;
403 int *largepage_idx; 409 int *largepage_idx;
404 410
411 gfn = unalias_gfn(kvm, gfn);
412 slot = gfn_to_memslot_unaliased(kvm, gfn);
405 if (slot) { 413 if (slot) {
406 largepage_idx = slot_largepage_idx(gfn, slot); 414 largepage_idx = slot_largepage_idx(gfn, slot);
407 return *largepage_idx; 415 return *largepage_idx;
@@ -613,7 +621,7 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
613 return NULL; 621 return NULL;
614} 622}
615 623
616static void rmap_write_protect(struct kvm *kvm, u64 gfn) 624static int rmap_write_protect(struct kvm *kvm, u64 gfn)
617{ 625{
618 unsigned long *rmapp; 626 unsigned long *rmapp;
619 u64 *spte; 627 u64 *spte;
@@ -659,8 +667,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
659 spte = rmap_next(kvm, rmapp, spte); 667 spte = rmap_next(kvm, rmapp, spte);
660 } 668 }
661 669
662 if (write_protected) 670 return write_protected;
663 kvm_flush_remote_tlbs(kvm);
664} 671}
665 672
666static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) 673static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
@@ -786,9 +793,11 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
786 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); 793 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
787 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 794 set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
788 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); 795 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
796 INIT_LIST_HEAD(&sp->oos_link);
789 ASSERT(is_empty_shadow_page(sp->spt)); 797 ASSERT(is_empty_shadow_page(sp->spt));
790 sp->slot_bitmap = 0; 798 bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
791 sp->multimapped = 0; 799 sp->multimapped = 0;
800 sp->global = 1;
792 sp->parent_pte = parent_pte; 801 sp->parent_pte = parent_pte;
793 --vcpu->kvm->arch.n_free_mmu_pages; 802 --vcpu->kvm->arch.n_free_mmu_pages;
794 return sp; 803 return sp;
@@ -900,8 +909,9 @@ static void kvm_mmu_update_unsync_bitmap(u64 *spte)
900 struct kvm_mmu_page *sp = page_header(__pa(spte)); 909 struct kvm_mmu_page *sp = page_header(__pa(spte));
901 910
902 index = spte - sp->spt; 911 index = spte - sp->spt;
903 __set_bit(index, sp->unsync_child_bitmap); 912 if (!__test_and_set_bit(index, sp->unsync_child_bitmap))
904 sp->unsync_children = 1; 913 sp->unsync_children++;
914 WARN_ON(!sp->unsync_children);
905} 915}
906 916
907static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) 917static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
@@ -928,7 +938,6 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
928 938
929static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 939static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
930{ 940{
931 sp->unsync_children = 1;
932 kvm_mmu_update_parents_unsync(sp); 941 kvm_mmu_update_parents_unsync(sp);
933 return 1; 942 return 1;
934} 943}
@@ -959,38 +968,66 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
959{ 968{
960} 969}
961 970
971#define KVM_PAGE_ARRAY_NR 16
972
973struct kvm_mmu_pages {
974 struct mmu_page_and_offset {
975 struct kvm_mmu_page *sp;
976 unsigned int idx;
977 } page[KVM_PAGE_ARRAY_NR];
978 unsigned int nr;
979};
980
962#define for_each_unsync_children(bitmap, idx) \ 981#define for_each_unsync_children(bitmap, idx) \
963 for (idx = find_first_bit(bitmap, 512); \ 982 for (idx = find_first_bit(bitmap, 512); \
964 idx < 512; \ 983 idx < 512; \
965 idx = find_next_bit(bitmap, 512, idx+1)) 984 idx = find_next_bit(bitmap, 512, idx+1))
966 985
967static int mmu_unsync_walk(struct kvm_mmu_page *sp, 986int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
968 struct kvm_unsync_walk *walker) 987 int idx)
969{ 988{
970 int i, ret; 989 int i;
971 990
972 if (!sp->unsync_children) 991 if (sp->unsync)
973 return 0; 992 for (i=0; i < pvec->nr; i++)
993 if (pvec->page[i].sp == sp)
994 return 0;
995
996 pvec->page[pvec->nr].sp = sp;
997 pvec->page[pvec->nr].idx = idx;
998 pvec->nr++;
999 return (pvec->nr == KVM_PAGE_ARRAY_NR);
1000}
1001
1002static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
1003 struct kvm_mmu_pages *pvec)
1004{
1005 int i, ret, nr_unsync_leaf = 0;
974 1006
975 for_each_unsync_children(sp->unsync_child_bitmap, i) { 1007 for_each_unsync_children(sp->unsync_child_bitmap, i) {
976 u64 ent = sp->spt[i]; 1008 u64 ent = sp->spt[i];
977 1009
978 if (is_shadow_present_pte(ent)) { 1010 if (is_shadow_present_pte(ent) && !is_large_pte(ent)) {
979 struct kvm_mmu_page *child; 1011 struct kvm_mmu_page *child;
980 child = page_header(ent & PT64_BASE_ADDR_MASK); 1012 child = page_header(ent & PT64_BASE_ADDR_MASK);
981 1013
982 if (child->unsync_children) { 1014 if (child->unsync_children) {
983 ret = mmu_unsync_walk(child, walker); 1015 if (mmu_pages_add(pvec, child, i))
984 if (ret) 1016 return -ENOSPC;
1017
1018 ret = __mmu_unsync_walk(child, pvec);
1019 if (!ret)
1020 __clear_bit(i, sp->unsync_child_bitmap);
1021 else if (ret > 0)
1022 nr_unsync_leaf += ret;
1023 else
985 return ret; 1024 return ret;
986 __clear_bit(i, sp->unsync_child_bitmap);
987 } 1025 }
988 1026
989 if (child->unsync) { 1027 if (child->unsync) {
990 ret = walker->entry(child, walker); 1028 nr_unsync_leaf++;
991 __clear_bit(i, sp->unsync_child_bitmap); 1029 if (mmu_pages_add(pvec, child, i))
992 if (ret) 1030 return -ENOSPC;
993 return ret;
994 } 1031 }
995 } 1032 }
996 } 1033 }
@@ -998,7 +1035,17 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
998 if (find_first_bit(sp->unsync_child_bitmap, 512) == 512) 1035 if (find_first_bit(sp->unsync_child_bitmap, 512) == 512)
999 sp->unsync_children = 0; 1036 sp->unsync_children = 0;
1000 1037
1001 return 0; 1038 return nr_unsync_leaf;
1039}
1040
1041static int mmu_unsync_walk(struct kvm_mmu_page *sp,
1042 struct kvm_mmu_pages *pvec)
1043{
1044 if (!sp->unsync_children)
1045 return 0;
1046
1047 mmu_pages_add(pvec, sp, 0);
1048 return __mmu_unsync_walk(sp, pvec);
1002} 1049}
1003 1050
1004static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) 1051static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
@@ -1021,10 +1068,18 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
1021 return NULL; 1068 return NULL;
1022} 1069}
1023 1070
1071static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp)
1072{
1073 list_del(&sp->oos_link);
1074 --kvm->stat.mmu_unsync_global;
1075}
1076
1024static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) 1077static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1025{ 1078{
1026 WARN_ON(!sp->unsync); 1079 WARN_ON(!sp->unsync);
1027 sp->unsync = 0; 1080 sp->unsync = 0;
1081 if (sp->global)
1082 kvm_unlink_unsync_global(kvm, sp);
1028 --kvm->stat.mmu_unsync; 1083 --kvm->stat.mmu_unsync;
1029} 1084}
1030 1085
@@ -1037,7 +1092,8 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1037 return 1; 1092 return 1;
1038 } 1093 }
1039 1094
1040 rmap_write_protect(vcpu->kvm, sp->gfn); 1095 if (rmap_write_protect(vcpu->kvm, sp->gfn))
1096 kvm_flush_remote_tlbs(vcpu->kvm);
1041 kvm_unlink_unsync_page(vcpu->kvm, sp); 1097 kvm_unlink_unsync_page(vcpu->kvm, sp);
1042 if (vcpu->arch.mmu.sync_page(vcpu, sp)) { 1098 if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
1043 kvm_mmu_zap_page(vcpu->kvm, sp); 1099 kvm_mmu_zap_page(vcpu->kvm, sp);
@@ -1048,30 +1104,89 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1048 return 0; 1104 return 0;
1049} 1105}
1050 1106
1051struct sync_walker { 1107struct mmu_page_path {
1052 struct kvm_vcpu *vcpu; 1108 struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1];
1053 struct kvm_unsync_walk walker; 1109 unsigned int idx[PT64_ROOT_LEVEL-1];
1054}; 1110};
1055 1111
1056static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) 1112#define for_each_sp(pvec, sp, parents, i) \
1113 for (i = mmu_pages_next(&pvec, &parents, -1), \
1114 sp = pvec.page[i].sp; \
1115 i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \
1116 i = mmu_pages_next(&pvec, &parents, i))
1117
1118int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
1119 int i)
1057{ 1120{
1058 struct sync_walker *sync_walk = container_of(walk, struct sync_walker, 1121 int n;
1059 walker);
1060 struct kvm_vcpu *vcpu = sync_walk->vcpu;
1061 1122
1062 kvm_sync_page(vcpu, sp); 1123 for (n = i+1; n < pvec->nr; n++) {
1063 return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)); 1124 struct kvm_mmu_page *sp = pvec->page[n].sp;
1125
1126 if (sp->role.level == PT_PAGE_TABLE_LEVEL) {
1127 parents->idx[0] = pvec->page[n].idx;
1128 return n;
1129 }
1130
1131 parents->parent[sp->role.level-2] = sp;
1132 parents->idx[sp->role.level-1] = pvec->page[n].idx;
1133 }
1134
1135 return n;
1064} 1136}
1065 1137
1066static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 1138void mmu_pages_clear_parents(struct mmu_page_path *parents)
1067{ 1139{
1068 struct sync_walker walker = { 1140 struct kvm_mmu_page *sp;
1069 .walker = { .entry = mmu_sync_fn, }, 1141 unsigned int level = 0;
1070 .vcpu = vcpu, 1142
1071 }; 1143 do {
1144 unsigned int idx = parents->idx[level];
1145
1146 sp = parents->parent[level];
1147 if (!sp)
1148 return;
1149
1150 --sp->unsync_children;
1151 WARN_ON((int)sp->unsync_children < 0);
1152 __clear_bit(idx, sp->unsync_child_bitmap);
1153 level++;
1154 } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children);
1155}
1156
1157static void kvm_mmu_pages_init(struct kvm_mmu_page *parent,
1158 struct mmu_page_path *parents,
1159 struct kvm_mmu_pages *pvec)
1160{
1161 parents->parent[parent->role.level-1] = NULL;
1162 pvec->nr = 0;
1163}
1164
1165static void mmu_sync_children(struct kvm_vcpu *vcpu,
1166 struct kvm_mmu_page *parent)
1167{
1168 int i;
1169 struct kvm_mmu_page *sp;
1170 struct mmu_page_path parents;
1171 struct kvm_mmu_pages pages;
1172
1173 kvm_mmu_pages_init(parent, &parents, &pages);
1174 while (mmu_unsync_walk(parent, &pages)) {
1175 int protected = 0;
1072 1176
1073 while (mmu_unsync_walk(sp, &walker.walker)) 1177 for_each_sp(pages, sp, parents, i)
1178 protected |= rmap_write_protect(vcpu->kvm, sp->gfn);
1179
1180 if (protected)
1181 kvm_flush_remote_tlbs(vcpu->kvm);
1182
1183 for_each_sp(pages, sp, parents, i) {
1184 kvm_sync_page(vcpu, sp);
1185 mmu_pages_clear_parents(&parents);
1186 }
1074 cond_resched_lock(&vcpu->kvm->mmu_lock); 1187 cond_resched_lock(&vcpu->kvm->mmu_lock);
1188 kvm_mmu_pages_init(parent, &parents, &pages);
1189 }
1075} 1190}
1076 1191
1077static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, 1192static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
@@ -1129,7 +1244,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1129 sp->role = role; 1244 sp->role = role;
1130 hlist_add_head(&sp->hash_link, bucket); 1245 hlist_add_head(&sp->hash_link, bucket);
1131 if (!metaphysical) { 1246 if (!metaphysical) {
1132 rmap_write_protect(vcpu->kvm, gfn); 1247 if (rmap_write_protect(vcpu->kvm, gfn))
1248 kvm_flush_remote_tlbs(vcpu->kvm);
1133 account_shadowed(vcpu->kvm, gfn); 1249 account_shadowed(vcpu->kvm, gfn);
1134 } 1250 }
1135 if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) 1251 if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte)
@@ -1153,6 +1269,8 @@ static int walk_shadow(struct kvm_shadow_walk *walker,
1153 if (level == PT32E_ROOT_LEVEL) { 1269 if (level == PT32E_ROOT_LEVEL) {
1154 shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; 1270 shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
1155 shadow_addr &= PT64_BASE_ADDR_MASK; 1271 shadow_addr &= PT64_BASE_ADDR_MASK;
1272 if (!shadow_addr)
1273 return 1;
1156 --level; 1274 --level;
1157 } 1275 }
1158 1276
@@ -1237,33 +1355,29 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
1237 } 1355 }
1238} 1356}
1239 1357
1240struct zap_walker { 1358static int mmu_zap_unsync_children(struct kvm *kvm,
1241 struct kvm_unsync_walk walker; 1359 struct kvm_mmu_page *parent)
1242 struct kvm *kvm;
1243 int zapped;
1244};
1245
1246static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
1247{ 1360{
1248 struct zap_walker *zap_walk = container_of(walk, struct zap_walker, 1361 int i, zapped = 0;
1249 walker); 1362 struct mmu_page_path parents;
1250 kvm_mmu_zap_page(zap_walk->kvm, sp); 1363 struct kvm_mmu_pages pages;
1251 zap_walk->zapped = 1;
1252 return 0;
1253}
1254 1364
1255static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp) 1365 if (parent->role.level == PT_PAGE_TABLE_LEVEL)
1256{
1257 struct zap_walker walker = {
1258 .walker = { .entry = mmu_zap_fn, },
1259 .kvm = kvm,
1260 .zapped = 0,
1261 };
1262
1263 if (sp->role.level == PT_PAGE_TABLE_LEVEL)
1264 return 0; 1366 return 0;
1265 mmu_unsync_walk(sp, &walker.walker); 1367
1266 return walker.zapped; 1368 kvm_mmu_pages_init(parent, &parents, &pages);
1369 while (mmu_unsync_walk(parent, &pages)) {
1370 struct kvm_mmu_page *sp;
1371
1372 for_each_sp(pages, sp, parents, i) {
1373 kvm_mmu_zap_page(kvm, sp);
1374 mmu_pages_clear_parents(&parents);
1375 }
1376 zapped += pages.nr;
1377 kvm_mmu_pages_init(parent, &parents, &pages);
1378 }
1379
1380 return zapped;
1267} 1381}
1268 1382
1269static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) 1383static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
@@ -1362,7 +1476,7 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
1362 int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); 1476 int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn));
1363 struct kvm_mmu_page *sp = page_header(__pa(pte)); 1477 struct kvm_mmu_page *sp = page_header(__pa(pte));
1364 1478
1365 __set_bit(slot, &sp->slot_bitmap); 1479 __set_bit(slot, sp->slot_bitmap);
1366} 1480}
1367 1481
1368static void mmu_convert_notrap(struct kvm_mmu_page *sp) 1482static void mmu_convert_notrap(struct kvm_mmu_page *sp)
@@ -1393,6 +1507,110 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
1393 return page; 1507 return page;
1394} 1508}
1395 1509
1510/*
1511 * The function is based on mtrr_type_lookup() in
1512 * arch/x86/kernel/cpu/mtrr/generic.c
1513 */
1514static int get_mtrr_type(struct mtrr_state_type *mtrr_state,
1515 u64 start, u64 end)
1516{
1517 int i;
1518 u64 base, mask;
1519 u8 prev_match, curr_match;
1520 int num_var_ranges = KVM_NR_VAR_MTRR;
1521
1522 if (!mtrr_state->enabled)
1523 return 0xFF;
1524
1525 /* Make end inclusive end, instead of exclusive */
1526 end--;
1527
1528 /* Look in fixed ranges. Just return the type as per start */
1529 if (mtrr_state->have_fixed && (start < 0x100000)) {
1530 int idx;
1531
1532 if (start < 0x80000) {
1533 idx = 0;
1534 idx += (start >> 16);
1535 return mtrr_state->fixed_ranges[idx];
1536 } else if (start < 0xC0000) {
1537 idx = 1 * 8;
1538 idx += ((start - 0x80000) >> 14);
1539 return mtrr_state->fixed_ranges[idx];
1540 } else if (start < 0x1000000) {
1541 idx = 3 * 8;
1542 idx += ((start - 0xC0000) >> 12);
1543 return mtrr_state->fixed_ranges[idx];
1544 }
1545 }
1546
1547 /*
1548 * Look in variable ranges
1549 * Look of multiple ranges matching this address and pick type
1550 * as per MTRR precedence
1551 */
1552 if (!(mtrr_state->enabled & 2))
1553 return mtrr_state->def_type;
1554
1555 prev_match = 0xFF;
1556 for (i = 0; i < num_var_ranges; ++i) {
1557 unsigned short start_state, end_state;
1558
1559 if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11)))
1560 continue;
1561
1562 base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) +
1563 (mtrr_state->var_ranges[i].base_lo & PAGE_MASK);
1564 mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) +
1565 (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK);
1566
1567 start_state = ((start & mask) == (base & mask));
1568 end_state = ((end & mask) == (base & mask));
1569 if (start_state != end_state)
1570 return 0xFE;
1571
1572 if ((start & mask) != (base & mask))
1573 continue;
1574
1575 curr_match = mtrr_state->var_ranges[i].base_lo & 0xff;
1576 if (prev_match == 0xFF) {
1577 prev_match = curr_match;
1578 continue;
1579 }
1580
1581 if (prev_match == MTRR_TYPE_UNCACHABLE ||
1582 curr_match == MTRR_TYPE_UNCACHABLE)
1583 return MTRR_TYPE_UNCACHABLE;
1584
1585 if ((prev_match == MTRR_TYPE_WRBACK &&
1586 curr_match == MTRR_TYPE_WRTHROUGH) ||
1587 (prev_match == MTRR_TYPE_WRTHROUGH &&
1588 curr_match == MTRR_TYPE_WRBACK)) {
1589 prev_match = MTRR_TYPE_WRTHROUGH;
1590 curr_match = MTRR_TYPE_WRTHROUGH;
1591 }
1592
1593 if (prev_match != curr_match)
1594 return MTRR_TYPE_UNCACHABLE;
1595 }
1596
1597 if (prev_match != 0xFF)
1598 return prev_match;
1599
1600 return mtrr_state->def_type;
1601}
1602
1603static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
1604{
1605 u8 mtrr;
1606
1607 mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT,
1608 (gfn << PAGE_SHIFT) + PAGE_SIZE);
1609 if (mtrr == 0xfe || mtrr == 0xff)
1610 mtrr = MTRR_TYPE_WRBACK;
1611 return mtrr;
1612}
1613
1396static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 1614static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1397{ 1615{
1398 unsigned index; 1616 unsigned index;
@@ -1409,9 +1627,15 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1409 if (s->role.word != sp->role.word) 1627 if (s->role.word != sp->role.word)
1410 return 1; 1628 return 1;
1411 } 1629 }
1412 kvm_mmu_mark_parents_unsync(vcpu, sp);
1413 ++vcpu->kvm->stat.mmu_unsync; 1630 ++vcpu->kvm->stat.mmu_unsync;
1414 sp->unsync = 1; 1631 sp->unsync = 1;
1632
1633 if (sp->global) {
1634 list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages);
1635 ++vcpu->kvm->stat.mmu_unsync_global;
1636 } else
1637 kvm_mmu_mark_parents_unsync(vcpu, sp);
1638
1415 mmu_convert_notrap(sp); 1639 mmu_convert_notrap(sp);
1416 return 0; 1640 return 0;
1417} 1641}
@@ -1437,11 +1661,24 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
1437static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, 1661static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1438 unsigned pte_access, int user_fault, 1662 unsigned pte_access, int user_fault,
1439 int write_fault, int dirty, int largepage, 1663 int write_fault, int dirty, int largepage,
1440 gfn_t gfn, pfn_t pfn, bool speculative, 1664 int global, gfn_t gfn, pfn_t pfn, bool speculative,
1441 bool can_unsync) 1665 bool can_unsync)
1442{ 1666{
1443 u64 spte; 1667 u64 spte;
1444 int ret = 0; 1668 int ret = 0;
1669 u64 mt_mask = shadow_mt_mask;
1670 struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
1671
1672 if (!(vcpu->arch.cr4 & X86_CR4_PGE))
1673 global = 0;
1674 if (!global && sp->global) {
1675 sp->global = 0;
1676 if (sp->unsync) {
1677 kvm_unlink_unsync_global(vcpu->kvm, sp);
1678 kvm_mmu_mark_parents_unsync(vcpu, sp);
1679 }
1680 }
1681
1445 /* 1682 /*
1446 * We don't set the accessed bit, since we sometimes want to see 1683 * We don't set the accessed bit, since we sometimes want to see
1447 * whether the guest actually used the pte (in order to detect 1684 * whether the guest actually used the pte (in order to detect
@@ -1460,6 +1697,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1460 spte |= shadow_user_mask; 1697 spte |= shadow_user_mask;
1461 if (largepage) 1698 if (largepage)
1462 spte |= PT_PAGE_SIZE_MASK; 1699 spte |= PT_PAGE_SIZE_MASK;
1700 if (mt_mask) {
1701 mt_mask = get_memory_type(vcpu, gfn) <<
1702 kvm_x86_ops->get_mt_mask_shift();
1703 spte |= mt_mask;
1704 }
1463 1705
1464 spte |= (u64)pfn << PAGE_SHIFT; 1706 spte |= (u64)pfn << PAGE_SHIFT;
1465 1707
@@ -1474,6 +1716,15 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1474 1716
1475 spte |= PT_WRITABLE_MASK; 1717 spte |= PT_WRITABLE_MASK;
1476 1718
1719 /*
1720 * Optimization: for pte sync, if spte was writable the hash
1721 * lookup is unnecessary (and expensive). Write protection
1722 * is responsibility of mmu_get_page / kvm_sync_page.
1723 * Same reasoning can be applied to dirty page accounting.
1724 */
1725 if (!can_unsync && is_writeble_pte(*shadow_pte))
1726 goto set_pte;
1727
1477 if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { 1728 if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
1478 pgprintk("%s: found shadow page for %lx, marking ro\n", 1729 pgprintk("%s: found shadow page for %lx, marking ro\n",
1479 __func__, gfn); 1730 __func__, gfn);
@@ -1495,8 +1746,8 @@ set_pte:
1495static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, 1746static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1496 unsigned pt_access, unsigned pte_access, 1747 unsigned pt_access, unsigned pte_access,
1497 int user_fault, int write_fault, int dirty, 1748 int user_fault, int write_fault, int dirty,
1498 int *ptwrite, int largepage, gfn_t gfn, 1749 int *ptwrite, int largepage, int global,
1499 pfn_t pfn, bool speculative) 1750 gfn_t gfn, pfn_t pfn, bool speculative)
1500{ 1751{
1501 int was_rmapped = 0; 1752 int was_rmapped = 0;
1502 int was_writeble = is_writeble_pte(*shadow_pte); 1753 int was_writeble = is_writeble_pte(*shadow_pte);
@@ -1529,7 +1780,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1529 } 1780 }
1530 } 1781 }
1531 if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, 1782 if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
1532 dirty, largepage, gfn, pfn, speculative, true)) { 1783 dirty, largepage, global, gfn, pfn, speculative, true)) {
1533 if (write_fault) 1784 if (write_fault)
1534 *ptwrite = 1; 1785 *ptwrite = 1;
1535 kvm_x86_ops->tlb_flush(vcpu); 1786 kvm_x86_ops->tlb_flush(vcpu);
@@ -1586,7 +1837,7 @@ static int direct_map_entry(struct kvm_shadow_walk *_walk,
1586 || (walk->largepage && level == PT_DIRECTORY_LEVEL)) { 1837 || (walk->largepage && level == PT_DIRECTORY_LEVEL)) {
1587 mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL, 1838 mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
1588 0, walk->write, 1, &walk->pt_write, 1839 0, walk->write, 1, &walk->pt_write,
1589 walk->largepage, gfn, walk->pfn, false); 1840 walk->largepage, 0, gfn, walk->pfn, false);
1590 ++vcpu->stat.pf_fixed; 1841 ++vcpu->stat.pf_fixed;
1591 return 1; 1842 return 1;
1592 } 1843 }
@@ -1773,6 +2024,15 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
1773 } 2024 }
1774} 2025}
1775 2026
2027static void mmu_sync_global(struct kvm_vcpu *vcpu)
2028{
2029 struct kvm *kvm = vcpu->kvm;
2030 struct kvm_mmu_page *sp, *n;
2031
2032 list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link)
2033 kvm_sync_page(vcpu, sp);
2034}
2035
1776void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) 2036void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
1777{ 2037{
1778 spin_lock(&vcpu->kvm->mmu_lock); 2038 spin_lock(&vcpu->kvm->mmu_lock);
@@ -1780,6 +2040,13 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
1780 spin_unlock(&vcpu->kvm->mmu_lock); 2040 spin_unlock(&vcpu->kvm->mmu_lock);
1781} 2041}
1782 2042
2043void kvm_mmu_sync_global(struct kvm_vcpu *vcpu)
2044{
2045 spin_lock(&vcpu->kvm->mmu_lock);
2046 mmu_sync_global(vcpu);
2047 spin_unlock(&vcpu->kvm->mmu_lock);
2048}
2049
1783static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) 2050static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
1784{ 2051{
1785 return vaddr; 2052 return vaddr;
@@ -2178,7 +2445,8 @@ static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn)
2178} 2445}
2179 2446
2180void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 2447void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2181 const u8 *new, int bytes) 2448 const u8 *new, int bytes,
2449 bool guest_initiated)
2182{ 2450{
2183 gfn_t gfn = gpa >> PAGE_SHIFT; 2451 gfn_t gfn = gpa >> PAGE_SHIFT;
2184 struct kvm_mmu_page *sp; 2452 struct kvm_mmu_page *sp;
@@ -2204,15 +2472,17 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2204 kvm_mmu_free_some_pages(vcpu); 2472 kvm_mmu_free_some_pages(vcpu);
2205 ++vcpu->kvm->stat.mmu_pte_write; 2473 ++vcpu->kvm->stat.mmu_pte_write;
2206 kvm_mmu_audit(vcpu, "pre pte write"); 2474 kvm_mmu_audit(vcpu, "pre pte write");
2207 if (gfn == vcpu->arch.last_pt_write_gfn 2475 if (guest_initiated) {
2208 && !last_updated_pte_accessed(vcpu)) { 2476 if (gfn == vcpu->arch.last_pt_write_gfn
2209 ++vcpu->arch.last_pt_write_count; 2477 && !last_updated_pte_accessed(vcpu)) {
2210 if (vcpu->arch.last_pt_write_count >= 3) 2478 ++vcpu->arch.last_pt_write_count;
2211 flooded = 1; 2479 if (vcpu->arch.last_pt_write_count >= 3)
2212 } else { 2480 flooded = 1;
2213 vcpu->arch.last_pt_write_gfn = gfn; 2481 } else {
2214 vcpu->arch.last_pt_write_count = 1; 2482 vcpu->arch.last_pt_write_gfn = gfn;
2215 vcpu->arch.last_pte_updated = NULL; 2483 vcpu->arch.last_pt_write_count = 1;
2484 vcpu->arch.last_pte_updated = NULL;
2485 }
2216 } 2486 }
2217 index = kvm_page_table_hashfn(gfn); 2487 index = kvm_page_table_hashfn(gfn);
2218 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 2488 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
@@ -2352,9 +2622,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
2352 2622
2353void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) 2623void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
2354{ 2624{
2355 spin_lock(&vcpu->kvm->mmu_lock);
2356 vcpu->arch.mmu.invlpg(vcpu, gva); 2625 vcpu->arch.mmu.invlpg(vcpu, gva);
2357 spin_unlock(&vcpu->kvm->mmu_lock);
2358 kvm_mmu_flush_tlb(vcpu); 2626 kvm_mmu_flush_tlb(vcpu);
2359 ++vcpu->stat.invlpg; 2627 ++vcpu->stat.invlpg;
2360} 2628}
@@ -2451,7 +2719,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
2451 int i; 2719 int i;
2452 u64 *pt; 2720 u64 *pt;
2453 2721
2454 if (!test_bit(slot, &sp->slot_bitmap)) 2722 if (!test_bit(slot, sp->slot_bitmap))
2455 continue; 2723 continue;
2456 2724
2457 pt = sp->spt; 2725 pt = sp->spt;
@@ -2860,8 +3128,8 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
2860 if (sp->role.metaphysical) 3128 if (sp->role.metaphysical)
2861 continue; 3129 continue;
2862 3130
2863 slot = gfn_to_memslot(vcpu->kvm, sp->gfn);
2864 gfn = unalias_gfn(vcpu->kvm, sp->gfn); 3131 gfn = unalias_gfn(vcpu->kvm, sp->gfn);
3132 slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn);
2865 rmapp = &slot->rmap[gfn - slot->base_gfn]; 3133 rmapp = &slot->rmap[gfn - slot->base_gfn];
2866 if (*rmapp) 3134 if (*rmapp)
2867 printk(KERN_ERR "%s: (%s) shadow page has writable" 3135 printk(KERN_ERR "%s: (%s) shadow page has writable"
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 84eee43bbe74..9fd78b6e17ad 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -82,6 +82,7 @@ struct shadow_walker {
82 int *ptwrite; 82 int *ptwrite;
83 pfn_t pfn; 83 pfn_t pfn;
84 u64 *sptep; 84 u64 *sptep;
85 gpa_t pte_gpa;
85}; 86};
86 87
87static gfn_t gpte_to_gfn(pt_element_t gpte) 88static gfn_t gpte_to_gfn(pt_element_t gpte)
@@ -222,7 +223,7 @@ walk:
222 if (ret) 223 if (ret)
223 goto walk; 224 goto walk;
224 pte |= PT_DIRTY_MASK; 225 pte |= PT_DIRTY_MASK;
225 kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte)); 226 kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0);
226 walker->ptes[walker->level - 1] = pte; 227 walker->ptes[walker->level - 1] = pte;
227 } 228 }
228 229
@@ -274,7 +275,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
274 return; 275 return;
275 kvm_get_pfn(pfn); 276 kvm_get_pfn(pfn);
276 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, 277 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
277 gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), 278 gpte & PT_DIRTY_MASK, NULL, largepage,
279 gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte),
278 pfn, true); 280 pfn, true);
279} 281}
280 282
@@ -301,8 +303,9 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
301 mmu_set_spte(vcpu, sptep, access, gw->pte_access & access, 303 mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
302 sw->user_fault, sw->write_fault, 304 sw->user_fault, sw->write_fault,
303 gw->ptes[gw->level-1] & PT_DIRTY_MASK, 305 gw->ptes[gw->level-1] & PT_DIRTY_MASK,
304 sw->ptwrite, sw->largepage, gw->gfn, sw->pfn, 306 sw->ptwrite, sw->largepage,
305 false); 307 gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
308 gw->gfn, sw->pfn, false);
306 sw->sptep = sptep; 309 sw->sptep = sptep;
307 return 1; 310 return 1;
308 } 311 }
@@ -466,10 +469,22 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
466 struct kvm_vcpu *vcpu, u64 addr, 469 struct kvm_vcpu *vcpu, u64 addr,
467 u64 *sptep, int level) 470 u64 *sptep, int level)
468{ 471{
472 struct shadow_walker *sw =
473 container_of(_sw, struct shadow_walker, walker);
469 474
470 if (level == PT_PAGE_TABLE_LEVEL) { 475 /* FIXME: properly handle invlpg on large guest pages */
471 if (is_shadow_present_pte(*sptep)) 476 if (level == PT_PAGE_TABLE_LEVEL ||
477 ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
478 struct kvm_mmu_page *sp = page_header(__pa(sptep));
479
480 sw->pte_gpa = (sp->gfn << PAGE_SHIFT);
481 sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
482
483 if (is_shadow_present_pte(*sptep)) {
472 rmap_remove(vcpu->kvm, sptep); 484 rmap_remove(vcpu->kvm, sptep);
485 if (is_large_pte(*sptep))
486 --vcpu->kvm->stat.lpages;
487 }
473 set_shadow_pte(sptep, shadow_trap_nonpresent_pte); 488 set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
474 return 1; 489 return 1;
475 } 490 }
@@ -480,11 +495,26 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
480 495
481static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) 496static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
482{ 497{
498 pt_element_t gpte;
483 struct shadow_walker walker = { 499 struct shadow_walker walker = {
484 .walker = { .entry = FNAME(shadow_invlpg_entry), }, 500 .walker = { .entry = FNAME(shadow_invlpg_entry), },
501 .pte_gpa = -1,
485 }; 502 };
486 503
504 spin_lock(&vcpu->kvm->mmu_lock);
487 walk_shadow(&walker.walker, vcpu, gva); 505 walk_shadow(&walker.walker, vcpu, gva);
506 spin_unlock(&vcpu->kvm->mmu_lock);
507 if (walker.pte_gpa == -1)
508 return;
509 if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte,
510 sizeof(pt_element_t)))
511 return;
512 if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
513 if (mmu_topup_memory_caches(vcpu))
514 return;
515 kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte,
516 sizeof(pt_element_t), 0);
517 }
488} 518}
489 519
490static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) 520static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
@@ -580,7 +610,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
580 nr_present++; 610 nr_present++;
581 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); 611 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
582 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, 612 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
583 is_dirty_pte(gpte), 0, gfn, 613 is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn,
584 spte_to_pfn(sp->spt[i]), true, false); 614 spte_to_pfn(sp->spt[i]), true, false);
585 } 615 }
586 616
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9c4ce657d963..1452851ae258 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -28,6 +28,8 @@
28 28
29#include <asm/desc.h> 29#include <asm/desc.h>
30 30
31#include <asm/virtext.h>
32
31#define __ex(x) __kvm_handle_fault_on_reboot(x) 33#define __ex(x) __kvm_handle_fault_on_reboot(x)
32 34
33MODULE_AUTHOR("Qumranet"); 35MODULE_AUTHOR("Qumranet");
@@ -245,34 +247,19 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
245 247
246static int has_svm(void) 248static int has_svm(void)
247{ 249{
248 uint32_t eax, ebx, ecx, edx; 250 const char *msg;
249
250 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
251 printk(KERN_INFO "has_svm: not amd\n");
252 return 0;
253 }
254 251
255 cpuid(0x80000000, &eax, &ebx, &ecx, &edx); 252 if (!cpu_has_svm(&msg)) {
256 if (eax < SVM_CPUID_FUNC) { 253 printk(KERN_INFO "has_svn: %s\n", msg);
257 printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n");
258 return 0; 254 return 0;
259 } 255 }
260 256
261 cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
262 if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
263 printk(KERN_DEBUG "has_svm: svm not available\n");
264 return 0;
265 }
266 return 1; 257 return 1;
267} 258}
268 259
269static void svm_hardware_disable(void *garbage) 260static void svm_hardware_disable(void *garbage)
270{ 261{
271 uint64_t efer; 262 cpu_svm_disable();
272
273 wrmsrl(MSR_VM_HSAVE_PA, 0);
274 rdmsrl(MSR_EFER, efer);
275 wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
276} 263}
277 264
278static void svm_hardware_enable(void *garbage) 265static void svm_hardware_enable(void *garbage)
@@ -772,6 +759,22 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
772 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; 759 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
773 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; 760 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
774 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; 761 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
762
763 /*
764 * SVM always stores 0 for the 'G' bit in the CS selector in
765 * the VMCB on a VMEXIT. This hurts cross-vendor migration:
766 * Intel's VMENTRY has a check on the 'G' bit.
767 */
768 if (seg == VCPU_SREG_CS)
769 var->g = s->limit > 0xfffff;
770
771 /*
772 * Work around a bug where the busy flag in the tr selector
773 * isn't exposed
774 */
775 if (seg == VCPU_SREG_TR)
776 var->type |= 0x2;
777
775 var->unusable = !var->present; 778 var->unusable = !var->present;
776} 779}
777 780
@@ -1099,6 +1102,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1099 rep = (io_info & SVM_IOIO_REP_MASK) != 0; 1102 rep = (io_info & SVM_IOIO_REP_MASK) != 0;
1100 down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; 1103 down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
1101 1104
1105 skip_emulated_instruction(&svm->vcpu);
1102 return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); 1106 return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
1103} 1107}
1104 1108
@@ -1912,6 +1916,11 @@ static int get_npt_level(void)
1912#endif 1916#endif
1913} 1917}
1914 1918
1919static int svm_get_mt_mask_shift(void)
1920{
1921 return 0;
1922}
1923
1915static struct kvm_x86_ops svm_x86_ops = { 1924static struct kvm_x86_ops svm_x86_ops = {
1916 .cpu_has_kvm_support = has_svm, 1925 .cpu_has_kvm_support = has_svm,
1917 .disabled_by_bios = is_disabled, 1926 .disabled_by_bios = is_disabled,
@@ -1967,6 +1976,7 @@ static struct kvm_x86_ops svm_x86_ops = {
1967 1976
1968 .set_tss_addr = svm_set_tss_addr, 1977 .set_tss_addr = svm_set_tss_addr,
1969 .get_tdp_level = get_npt_level, 1978 .get_tdp_level = get_npt_level,
1979 .get_mt_mask_shift = svm_get_mt_mask_shift,
1970}; 1980};
1971 1981
1972static int __init svm_init(void) 1982static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a4018b01e1f9..6259d7467648 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -16,7 +16,6 @@
16 */ 16 */
17 17
18#include "irq.h" 18#include "irq.h"
19#include "vmx.h"
20#include "mmu.h" 19#include "mmu.h"
21 20
22#include <linux/kvm_host.h> 21#include <linux/kvm_host.h>
@@ -31,6 +30,8 @@
31 30
32#include <asm/io.h> 31#include <asm/io.h>
33#include <asm/desc.h> 32#include <asm/desc.h>
33#include <asm/vmx.h>
34#include <asm/virtext.h>
34 35
35#define __ex(x) __kvm_handle_fault_on_reboot(x) 36#define __ex(x) __kvm_handle_fault_on_reboot(x)
36 37
@@ -90,6 +91,11 @@ struct vcpu_vmx {
90 } rmode; 91 } rmode;
91 int vpid; 92 int vpid;
92 bool emulation_required; 93 bool emulation_required;
94
95 /* Support for vnmi-less CPUs */
96 int soft_vnmi_blocked;
97 ktime_t entry_time;
98 s64 vnmi_blocked_time;
93}; 99};
94 100
95static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) 101static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -122,7 +128,7 @@ static struct vmcs_config {
122 u32 vmentry_ctrl; 128 u32 vmentry_ctrl;
123} vmcs_config; 129} vmcs_config;
124 130
125struct vmx_capability { 131static struct vmx_capability {
126 u32 ept; 132 u32 ept;
127 u32 vpid; 133 u32 vpid;
128} vmx_capability; 134} vmx_capability;
@@ -957,6 +963,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
957 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data); 963 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
958 964
959 break; 965 break;
966 case MSR_IA32_CR_PAT:
967 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
968 vmcs_write64(GUEST_IA32_PAT, data);
969 vcpu->arch.pat = data;
970 break;
971 }
972 /* Otherwise falls through to kvm_set_msr_common */
960 default: 973 default:
961 vmx_load_host_state(vmx); 974 vmx_load_host_state(vmx);
962 msr = find_msr_entry(vmx, msr_index); 975 msr = find_msr_entry(vmx, msr_index);
@@ -1032,8 +1045,7 @@ static int vmx_get_irq(struct kvm_vcpu *vcpu)
1032 1045
1033static __init int cpu_has_kvm_support(void) 1046static __init int cpu_has_kvm_support(void)
1034{ 1047{
1035 unsigned long ecx = cpuid_ecx(1); 1048 return cpu_has_vmx();
1036 return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
1037} 1049}
1038 1050
1039static __init int vmx_disabled_by_bios(void) 1051static __init int vmx_disabled_by_bios(void)
@@ -1079,13 +1091,22 @@ static void vmclear_local_vcpus(void)
1079 __vcpu_clear(vmx); 1091 __vcpu_clear(vmx);
1080} 1092}
1081 1093
1082static void hardware_disable(void *garbage) 1094
1095/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
1096 * tricks.
1097 */
1098static void kvm_cpu_vmxoff(void)
1083{ 1099{
1084 vmclear_local_vcpus();
1085 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); 1100 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
1086 write_cr4(read_cr4() & ~X86_CR4_VMXE); 1101 write_cr4(read_cr4() & ~X86_CR4_VMXE);
1087} 1102}
1088 1103
1104static void hardware_disable(void *garbage)
1105{
1106 vmclear_local_vcpus();
1107 kvm_cpu_vmxoff();
1108}
1109
1089static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, 1110static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
1090 u32 msr, u32 *result) 1111 u32 msr, u32 *result)
1091{ 1112{
@@ -1176,12 +1197,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1176#ifdef CONFIG_X86_64 1197#ifdef CONFIG_X86_64
1177 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; 1198 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
1178#endif 1199#endif
1179 opt = 0; 1200 opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT;
1180 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, 1201 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
1181 &_vmexit_control) < 0) 1202 &_vmexit_control) < 0)
1182 return -EIO; 1203 return -EIO;
1183 1204
1184 min = opt = 0; 1205 min = 0;
1206 opt = VM_ENTRY_LOAD_IA32_PAT;
1185 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, 1207 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
1186 &_vmentry_control) < 0) 1208 &_vmentry_control) < 0)
1187 return -EIO; 1209 return -EIO;
@@ -2087,8 +2109,9 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
2087 */ 2109 */
2088static int vmx_vcpu_setup(struct vcpu_vmx *vmx) 2110static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2089{ 2111{
2090 u32 host_sysenter_cs; 2112 u32 host_sysenter_cs, msr_low, msr_high;
2091 u32 junk; 2113 u32 junk;
2114 u64 host_pat;
2092 unsigned long a; 2115 unsigned long a;
2093 struct descriptor_table dt; 2116 struct descriptor_table dt;
2094 int i; 2117 int i;
@@ -2176,6 +2199,20 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2176 rdmsrl(MSR_IA32_SYSENTER_EIP, a); 2199 rdmsrl(MSR_IA32_SYSENTER_EIP, a);
2177 vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */ 2200 vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */
2178 2201
2202 if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
2203 rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
2204 host_pat = msr_low | ((u64) msr_high << 32);
2205 vmcs_write64(HOST_IA32_PAT, host_pat);
2206 }
2207 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
2208 rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
2209 host_pat = msr_low | ((u64) msr_high << 32);
2210 /* Write the default value follow host pat */
2211 vmcs_write64(GUEST_IA32_PAT, host_pat);
2212 /* Keep arch.pat sync with GUEST_IA32_PAT */
2213 vmx->vcpu.arch.pat = host_pat;
2214 }
2215
2179 for (i = 0; i < NR_VMX_MSR; ++i) { 2216 for (i = 0; i < NR_VMX_MSR; ++i) {
2180 u32 index = vmx_msr_index[i]; 2217 u32 index = vmx_msr_index[i];
2181 u32 data_low, data_high; 2218 u32 data_low, data_high;
@@ -2230,6 +2267,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2230 2267
2231 vmx->vcpu.arch.rmode.active = 0; 2268 vmx->vcpu.arch.rmode.active = 0;
2232 2269
2270 vmx->soft_vnmi_blocked = 0;
2271
2233 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); 2272 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
2234 kvm_set_cr8(&vmx->vcpu, 0); 2273 kvm_set_cr8(&vmx->vcpu, 0);
2235 msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 2274 msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
@@ -2335,6 +2374,29 @@ out:
2335 return ret; 2374 return ret;
2336} 2375}
2337 2376
2377static void enable_irq_window(struct kvm_vcpu *vcpu)
2378{
2379 u32 cpu_based_vm_exec_control;
2380
2381 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2382 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
2383 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2384}
2385
2386static void enable_nmi_window(struct kvm_vcpu *vcpu)
2387{
2388 u32 cpu_based_vm_exec_control;
2389
2390 if (!cpu_has_virtual_nmis()) {
2391 enable_irq_window(vcpu);
2392 return;
2393 }
2394
2395 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2396 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
2397 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2398}
2399
2338static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) 2400static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
2339{ 2401{
2340 struct vcpu_vmx *vmx = to_vmx(vcpu); 2402 struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2358,10 +2420,54 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
2358 2420
2359static void vmx_inject_nmi(struct kvm_vcpu *vcpu) 2421static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
2360{ 2422{
2423 struct vcpu_vmx *vmx = to_vmx(vcpu);
2424
2425 if (!cpu_has_virtual_nmis()) {
2426 /*
2427 * Tracking the NMI-blocked state in software is built upon
2428 * finding the next open IRQ window. This, in turn, depends on
2429 * well-behaving guests: They have to keep IRQs disabled at
2430 * least as long as the NMI handler runs. Otherwise we may
2431 * cause NMI nesting, maybe breaking the guest. But as this is
2432 * highly unlikely, we can live with the residual risk.
2433 */
2434 vmx->soft_vnmi_blocked = 1;
2435 vmx->vnmi_blocked_time = 0;
2436 }
2437
2438 ++vcpu->stat.nmi_injections;
2439 if (vcpu->arch.rmode.active) {
2440 vmx->rmode.irq.pending = true;
2441 vmx->rmode.irq.vector = NMI_VECTOR;
2442 vmx->rmode.irq.rip = kvm_rip_read(vcpu);
2443 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2444 NMI_VECTOR | INTR_TYPE_SOFT_INTR |
2445 INTR_INFO_VALID_MASK);
2446 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
2447 kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
2448 return;
2449 }
2361 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 2450 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2362 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); 2451 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
2363} 2452}
2364 2453
2454static void vmx_update_window_states(struct kvm_vcpu *vcpu)
2455{
2456 u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
2457
2458 vcpu->arch.nmi_window_open =
2459 !(guest_intr & (GUEST_INTR_STATE_STI |
2460 GUEST_INTR_STATE_MOV_SS |
2461 GUEST_INTR_STATE_NMI));
2462 if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
2463 vcpu->arch.nmi_window_open = 0;
2464
2465 vcpu->arch.interrupt_window_open =
2466 ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
2467 !(guest_intr & (GUEST_INTR_STATE_STI |
2468 GUEST_INTR_STATE_MOV_SS)));
2469}
2470
2365static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) 2471static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
2366{ 2472{
2367 int word_index = __ffs(vcpu->arch.irq_summary); 2473 int word_index = __ffs(vcpu->arch.irq_summary);
@@ -2374,40 +2480,49 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
2374 kvm_queue_interrupt(vcpu, irq); 2480 kvm_queue_interrupt(vcpu, irq);
2375} 2481}
2376 2482
2377
2378static void do_interrupt_requests(struct kvm_vcpu *vcpu, 2483static void do_interrupt_requests(struct kvm_vcpu *vcpu,
2379 struct kvm_run *kvm_run) 2484 struct kvm_run *kvm_run)
2380{ 2485{
2381 u32 cpu_based_vm_exec_control; 2486 vmx_update_window_states(vcpu);
2382
2383 vcpu->arch.interrupt_window_open =
2384 ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
2385 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
2386 2487
2387 if (vcpu->arch.interrupt_window_open && 2488 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
2388 vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) 2489 if (vcpu->arch.interrupt.pending) {
2389 kvm_do_inject_irq(vcpu); 2490 enable_nmi_window(vcpu);
2491 } else if (vcpu->arch.nmi_window_open) {
2492 vcpu->arch.nmi_pending = false;
2493 vcpu->arch.nmi_injected = true;
2494 } else {
2495 enable_nmi_window(vcpu);
2496 return;
2497 }
2498 }
2499 if (vcpu->arch.nmi_injected) {
2500 vmx_inject_nmi(vcpu);
2501 if (vcpu->arch.nmi_pending)
2502 enable_nmi_window(vcpu);
2503 else if (vcpu->arch.irq_summary
2504 || kvm_run->request_interrupt_window)
2505 enable_irq_window(vcpu);
2506 return;
2507 }
2390 2508
2391 if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending) 2509 if (vcpu->arch.interrupt_window_open) {
2392 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); 2510 if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
2511 kvm_do_inject_irq(vcpu);
2393 2512
2394 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 2513 if (vcpu->arch.interrupt.pending)
2514 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
2515 }
2395 if (!vcpu->arch.interrupt_window_open && 2516 if (!vcpu->arch.interrupt_window_open &&
2396 (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) 2517 (vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
2397 /* 2518 enable_irq_window(vcpu);
2398 * Interrupts blocked. Wait for unblock.
2399 */
2400 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
2401 else
2402 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
2403 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2404} 2519}
2405 2520
2406static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) 2521static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
2407{ 2522{
2408 int ret; 2523 int ret;
2409 struct kvm_userspace_memory_region tss_mem = { 2524 struct kvm_userspace_memory_region tss_mem = {
2410 .slot = 8, 2525 .slot = TSS_PRIVATE_MEMSLOT,
2411 .guest_phys_addr = addr, 2526 .guest_phys_addr = addr,
2412 .memory_size = PAGE_SIZE * 3, 2527 .memory_size = PAGE_SIZE * 3,
2413 .flags = 0, 2528 .flags = 0,
@@ -2492,7 +2607,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2492 set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); 2607 set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
2493 } 2608 }
2494 2609
2495 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */ 2610 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
2496 return 1; /* already handled by vmx_vcpu_run() */ 2611 return 1; /* already handled by vmx_vcpu_run() */
2497 2612
2498 if (is_no_device(intr_info)) { 2613 if (is_no_device(intr_info)) {
@@ -2581,6 +2696,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2581 rep = (exit_qualification & 32) != 0; 2696 rep = (exit_qualification & 32) != 0;
2582 port = exit_qualification >> 16; 2697 port = exit_qualification >> 16;
2583 2698
2699 skip_emulated_instruction(vcpu);
2584 return kvm_emulate_pio(vcpu, kvm_run, in, size, port); 2700 return kvm_emulate_pio(vcpu, kvm_run, in, size, port);
2585} 2701}
2586 2702
@@ -2767,6 +2883,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
2767 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 2883 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2768 2884
2769 KVMTRACE_0D(PEND_INTR, vcpu, handler); 2885 KVMTRACE_0D(PEND_INTR, vcpu, handler);
2886 ++vcpu->stat.irq_window_exits;
2770 2887
2771 /* 2888 /*
2772 * If the user space waits to inject interrupts, exit as soon as 2889 * If the user space waits to inject interrupts, exit as soon as
@@ -2775,7 +2892,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
2775 if (kvm_run->request_interrupt_window && 2892 if (kvm_run->request_interrupt_window &&
2776 !vcpu->arch.irq_summary) { 2893 !vcpu->arch.irq_summary) {
2777 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; 2894 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
2778 ++vcpu->stat.irq_window_exits;
2779 return 0; 2895 return 0;
2780 } 2896 }
2781 return 1; 2897 return 1;
@@ -2832,6 +2948,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2832 2948
2833static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2949static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2834{ 2950{
2951 struct vcpu_vmx *vmx = to_vmx(vcpu);
2835 unsigned long exit_qualification; 2952 unsigned long exit_qualification;
2836 u16 tss_selector; 2953 u16 tss_selector;
2837 int reason; 2954 int reason;
@@ -2839,6 +2956,15 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2839 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 2956 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
2840 2957
2841 reason = (u32)exit_qualification >> 30; 2958 reason = (u32)exit_qualification >> 30;
2959 if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected &&
2960 (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
2961 (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK)
2962 == INTR_TYPE_NMI_INTR) {
2963 vcpu->arch.nmi_injected = false;
2964 if (cpu_has_virtual_nmis())
2965 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
2966 GUEST_INTR_STATE_NMI);
2967 }
2842 tss_selector = exit_qualification; 2968 tss_selector = exit_qualification;
2843 2969
2844 return kvm_task_switch(vcpu, tss_selector, reason); 2970 return kvm_task_switch(vcpu, tss_selector, reason);
@@ -2927,16 +3053,12 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
2927 while (!guest_state_valid(vcpu)) { 3053 while (!guest_state_valid(vcpu)) {
2928 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); 3054 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
2929 3055
2930 switch (err) { 3056 if (err == EMULATE_DO_MMIO)
2931 case EMULATE_DONE: 3057 break;
2932 break; 3058
2933 case EMULATE_DO_MMIO: 3059 if (err != EMULATE_DONE) {
2934 kvm_report_emulation_failure(vcpu, "mmio"); 3060 kvm_report_emulation_failure(vcpu, "emulation failure");
2935 /* TODO: Handle MMIO */ 3061 return;
2936 return;
2937 default:
2938 kvm_report_emulation_failure(vcpu, "emulation failure");
2939 return;
2940 } 3062 }
2941 3063
2942 if (signal_pending(current)) 3064 if (signal_pending(current))
@@ -2948,8 +3070,10 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
2948 local_irq_disable(); 3070 local_irq_disable();
2949 preempt_disable(); 3071 preempt_disable();
2950 3072
2951 /* Guest state should be valid now, no more emulation should be needed */ 3073 /* Guest state should be valid now except if we need to
2952 vmx->emulation_required = 0; 3074 * emulate an MMIO */
3075 if (guest_state_valid(vcpu))
3076 vmx->emulation_required = 0;
2953} 3077}
2954 3078
2955/* 3079/*
@@ -2996,6 +3120,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2996 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), 3120 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu),
2997 (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit); 3121 (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit);
2998 3122
3123 /* If we need to emulate an MMIO from handle_invalid_guest_state
3124 * we just return 0 */
3125 if (vmx->emulation_required && emulate_invalid_guest_state)
3126 return 0;
3127
2999 /* Access CR3 don't cause VMExit in paging mode, so we need 3128 /* Access CR3 don't cause VMExit in paging mode, so we need
3000 * to sync with guest real CR3. */ 3129 * to sync with guest real CR3. */
3001 if (vm_need_ept() && is_paging(vcpu)) { 3130 if (vm_need_ept() && is_paging(vcpu)) {
@@ -3012,9 +3141,32 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3012 3141
3013 if ((vectoring_info & VECTORING_INFO_VALID_MASK) && 3142 if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
3014 (exit_reason != EXIT_REASON_EXCEPTION_NMI && 3143 (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
3015 exit_reason != EXIT_REASON_EPT_VIOLATION)) 3144 exit_reason != EXIT_REASON_EPT_VIOLATION &&
3016 printk(KERN_WARNING "%s: unexpected, valid vectoring info and " 3145 exit_reason != EXIT_REASON_TASK_SWITCH))
3017 "exit reason is 0x%x\n", __func__, exit_reason); 3146 printk(KERN_WARNING "%s: unexpected, valid vectoring info "
3147 "(0x%x) and exit reason is 0x%x\n",
3148 __func__, vectoring_info, exit_reason);
3149
3150 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) {
3151 if (vcpu->arch.interrupt_window_open) {
3152 vmx->soft_vnmi_blocked = 0;
3153 vcpu->arch.nmi_window_open = 1;
3154 } else if (vmx->vnmi_blocked_time > 1000000000LL &&
3155 vcpu->arch.nmi_pending) {
3156 /*
3157 * This CPU don't support us in finding the end of an
3158 * NMI-blocked window if the guest runs with IRQs
3159 * disabled. So we pull the trigger after 1 s of
3160 * futile waiting, but inform the user about this.
3161 */
3162 printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
3163 "state on VCPU %d after 1 s timeout\n",
3164 __func__, vcpu->vcpu_id);
3165 vmx->soft_vnmi_blocked = 0;
3166 vmx->vcpu.arch.nmi_window_open = 1;
3167 }
3168 }
3169
3018 if (exit_reason < kvm_vmx_max_exit_handlers 3170 if (exit_reason < kvm_vmx_max_exit_handlers
3019 && kvm_vmx_exit_handlers[exit_reason]) 3171 && kvm_vmx_exit_handlers[exit_reason])
3020 return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); 3172 return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
@@ -3042,51 +3194,6 @@ static void update_tpr_threshold(struct kvm_vcpu *vcpu)
3042 vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4); 3194 vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4);
3043} 3195}
3044 3196
3045static void enable_irq_window(struct kvm_vcpu *vcpu)
3046{
3047 u32 cpu_based_vm_exec_control;
3048
3049 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
3050 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
3051 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
3052}
3053
3054static void enable_nmi_window(struct kvm_vcpu *vcpu)
3055{
3056 u32 cpu_based_vm_exec_control;
3057
3058 if (!cpu_has_virtual_nmis())
3059 return;
3060
3061 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
3062 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
3063 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
3064}
3065
3066static int vmx_nmi_enabled(struct kvm_vcpu *vcpu)
3067{
3068 u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
3069 return !(guest_intr & (GUEST_INTR_STATE_NMI |
3070 GUEST_INTR_STATE_MOV_SS |
3071 GUEST_INTR_STATE_STI));
3072}
3073
3074static int vmx_irq_enabled(struct kvm_vcpu *vcpu)
3075{
3076 u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
3077 return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS |
3078 GUEST_INTR_STATE_STI)) &&
3079 (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
3080}
3081
3082static void enable_intr_window(struct kvm_vcpu *vcpu)
3083{
3084 if (vcpu->arch.nmi_pending)
3085 enable_nmi_window(vcpu);
3086 else if (kvm_cpu_has_interrupt(vcpu))
3087 enable_irq_window(vcpu);
3088}
3089
3090static void vmx_complete_interrupts(struct vcpu_vmx *vmx) 3197static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3091{ 3198{
3092 u32 exit_intr_info; 3199 u32 exit_intr_info;
@@ -3109,7 +3216,9 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3109 if (unblock_nmi && vector != DF_VECTOR) 3216 if (unblock_nmi && vector != DF_VECTOR)
3110 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 3217 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
3111 GUEST_INTR_STATE_NMI); 3218 GUEST_INTR_STATE_NMI);
3112 } 3219 } else if (unlikely(vmx->soft_vnmi_blocked))
3220 vmx->vnmi_blocked_time +=
3221 ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
3113 3222
3114 idt_vectoring_info = vmx->idt_vectoring_info; 3223 idt_vectoring_info = vmx->idt_vectoring_info;
3115 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; 3224 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
@@ -3147,26 +3256,29 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
3147{ 3256{
3148 update_tpr_threshold(vcpu); 3257 update_tpr_threshold(vcpu);
3149 3258
3150 if (cpu_has_virtual_nmis()) { 3259 vmx_update_window_states(vcpu);
3151 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { 3260
3152 if (vcpu->arch.interrupt.pending) { 3261 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
3153 enable_nmi_window(vcpu); 3262 if (vcpu->arch.interrupt.pending) {
3154 } else if (vmx_nmi_enabled(vcpu)) { 3263 enable_nmi_window(vcpu);
3155 vcpu->arch.nmi_pending = false; 3264 } else if (vcpu->arch.nmi_window_open) {
3156 vcpu->arch.nmi_injected = true; 3265 vcpu->arch.nmi_pending = false;
3157 } else { 3266 vcpu->arch.nmi_injected = true;
3158 enable_intr_window(vcpu); 3267 } else {
3159 return; 3268 enable_nmi_window(vcpu);
3160 }
3161 }
3162 if (vcpu->arch.nmi_injected) {
3163 vmx_inject_nmi(vcpu);
3164 enable_intr_window(vcpu);
3165 return; 3269 return;
3166 } 3270 }
3167 } 3271 }
3272 if (vcpu->arch.nmi_injected) {
3273 vmx_inject_nmi(vcpu);
3274 if (vcpu->arch.nmi_pending)
3275 enable_nmi_window(vcpu);
3276 else if (kvm_cpu_has_interrupt(vcpu))
3277 enable_irq_window(vcpu);
3278 return;
3279 }
3168 if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { 3280 if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
3169 if (vmx_irq_enabled(vcpu)) 3281 if (vcpu->arch.interrupt_window_open)
3170 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); 3282 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
3171 else 3283 else
3172 enable_irq_window(vcpu); 3284 enable_irq_window(vcpu);
@@ -3174,6 +3286,8 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
3174 if (vcpu->arch.interrupt.pending) { 3286 if (vcpu->arch.interrupt.pending) {
3175 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); 3287 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
3176 kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); 3288 kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
3289 if (kvm_cpu_has_interrupt(vcpu))
3290 enable_irq_window(vcpu);
3177 } 3291 }
3178} 3292}
3179 3293
@@ -3213,6 +3327,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3213 struct vcpu_vmx *vmx = to_vmx(vcpu); 3327 struct vcpu_vmx *vmx = to_vmx(vcpu);
3214 u32 intr_info; 3328 u32 intr_info;
3215 3329
3330 /* Record the guest's net vcpu time for enforced NMI injections. */
3331 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
3332 vmx->entry_time = ktime_get();
3333
3216 /* Handle invalid guest state instead of entering VMX */ 3334 /* Handle invalid guest state instead of entering VMX */
3217 if (vmx->emulation_required && emulate_invalid_guest_state) { 3335 if (vmx->emulation_required && emulate_invalid_guest_state) {
3218 handle_invalid_guest_state(vcpu, kvm_run); 3336 handle_invalid_guest_state(vcpu, kvm_run);
@@ -3327,9 +3445,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3327 if (vmx->rmode.irq.pending) 3445 if (vmx->rmode.irq.pending)
3328 fixup_rmode_irq(vmx); 3446 fixup_rmode_irq(vmx);
3329 3447
3330 vcpu->arch.interrupt_window_open = 3448 vmx_update_window_states(vcpu);
3331 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
3332 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
3333 3449
3334 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); 3450 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
3335 vmx->launched = 1; 3451 vmx->launched = 1;
@@ -3337,7 +3453,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3337 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 3453 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
3338 3454
3339 /* We need to handle NMIs before interrupts are enabled */ 3455 /* We need to handle NMIs before interrupts are enabled */
3340 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 && 3456 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
3341 (intr_info & INTR_INFO_VALID_MASK)) { 3457 (intr_info & INTR_INFO_VALID_MASK)) {
3342 KVMTRACE_0D(NMI, vcpu, handler); 3458 KVMTRACE_0D(NMI, vcpu, handler);
3343 asm("int $2"); 3459 asm("int $2");
@@ -3455,6 +3571,11 @@ static int get_ept_level(void)
3455 return VMX_EPT_DEFAULT_GAW + 1; 3571 return VMX_EPT_DEFAULT_GAW + 1;
3456} 3572}
3457 3573
3574static int vmx_get_mt_mask_shift(void)
3575{
3576 return VMX_EPT_MT_EPTE_SHIFT;
3577}
3578
3458static struct kvm_x86_ops vmx_x86_ops = { 3579static struct kvm_x86_ops vmx_x86_ops = {
3459 .cpu_has_kvm_support = cpu_has_kvm_support, 3580 .cpu_has_kvm_support = cpu_has_kvm_support,
3460 .disabled_by_bios = vmx_disabled_by_bios, 3581 .disabled_by_bios = vmx_disabled_by_bios,
@@ -3510,6 +3631,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
3510 3631
3511 .set_tss_addr = vmx_set_tss_addr, 3632 .set_tss_addr = vmx_set_tss_addr,
3512 .get_tdp_level = get_ept_level, 3633 .get_tdp_level = get_ept_level,
3634 .get_mt_mask_shift = vmx_get_mt_mask_shift,
3513}; 3635};
3514 3636
3515static int __init vmx_init(void) 3637static int __init vmx_init(void)
@@ -3566,10 +3688,10 @@ static int __init vmx_init(void)
3566 bypass_guest_pf = 0; 3688 bypass_guest_pf = 0;
3567 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | 3689 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3568 VMX_EPT_WRITABLE_MASK | 3690 VMX_EPT_WRITABLE_MASK |
3569 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT |
3570 VMX_EPT_IGMT_BIT); 3691 VMX_EPT_IGMT_BIT);
3571 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, 3692 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
3572 VMX_EPT_EXECUTABLE_MASK); 3693 VMX_EPT_EXECUTABLE_MASK,
3694 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
3573 kvm_enable_tdp(); 3695 kvm_enable_tdp();
3574 } else 3696 } else
3575 kvm_disable_tdp(); 3697 kvm_disable_tdp();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f1f8ff2f1fa2..0e6aa8141dcd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -39,6 +39,7 @@
39#include <asm/uaccess.h> 39#include <asm/uaccess.h>
40#include <asm/msr.h> 40#include <asm/msr.h>
41#include <asm/desc.h> 41#include <asm/desc.h>
42#include <asm/mtrr.h>
42 43
43#define MAX_IO_MSRS 256 44#define MAX_IO_MSRS 256
44#define CR0_RESERVED_BITS \ 45#define CR0_RESERVED_BITS \
@@ -86,6 +87,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
86 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 87 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
87 { "hypercalls", VCPU_STAT(hypercalls) }, 88 { "hypercalls", VCPU_STAT(hypercalls) },
88 { "request_irq", VCPU_STAT(request_irq_exits) }, 89 { "request_irq", VCPU_STAT(request_irq_exits) },
90 { "request_nmi", VCPU_STAT(request_nmi_exits) },
89 { "irq_exits", VCPU_STAT(irq_exits) }, 91 { "irq_exits", VCPU_STAT(irq_exits) },
90 { "host_state_reload", VCPU_STAT(host_state_reload) }, 92 { "host_state_reload", VCPU_STAT(host_state_reload) },
91 { "efer_reload", VCPU_STAT(efer_reload) }, 93 { "efer_reload", VCPU_STAT(efer_reload) },
@@ -93,6 +95,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
93 { "insn_emulation", VCPU_STAT(insn_emulation) }, 95 { "insn_emulation", VCPU_STAT(insn_emulation) },
94 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, 96 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
95 { "irq_injections", VCPU_STAT(irq_injections) }, 97 { "irq_injections", VCPU_STAT(irq_injections) },
98 { "nmi_injections", VCPU_STAT(nmi_injections) },
96 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, 99 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
97 { "mmu_pte_write", VM_STAT(mmu_pte_write) }, 100 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
98 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, 101 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@@ -101,6 +104,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
101 { "mmu_recycled", VM_STAT(mmu_recycled) }, 104 { "mmu_recycled", VM_STAT(mmu_recycled) },
102 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, 105 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
103 { "mmu_unsync", VM_STAT(mmu_unsync) }, 106 { "mmu_unsync", VM_STAT(mmu_unsync) },
107 { "mmu_unsync_global", VM_STAT(mmu_unsync_global) },
104 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, 108 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
105 { "largepages", VM_STAT(lpages) }, 109 { "largepages", VM_STAT(lpages) },
106 { NULL } 110 { NULL }
@@ -312,6 +316,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
312 kvm_x86_ops->set_cr0(vcpu, cr0); 316 kvm_x86_ops->set_cr0(vcpu, cr0);
313 vcpu->arch.cr0 = cr0; 317 vcpu->arch.cr0 = cr0;
314 318
319 kvm_mmu_sync_global(vcpu);
315 kvm_mmu_reset_context(vcpu); 320 kvm_mmu_reset_context(vcpu);
316 return; 321 return;
317} 322}
@@ -355,6 +360,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
355 } 360 }
356 kvm_x86_ops->set_cr4(vcpu, cr4); 361 kvm_x86_ops->set_cr4(vcpu, cr4);
357 vcpu->arch.cr4 = cr4; 362 vcpu->arch.cr4 = cr4;
363 kvm_mmu_sync_global(vcpu);
358 kvm_mmu_reset_context(vcpu); 364 kvm_mmu_reset_context(vcpu);
359} 365}
360EXPORT_SYMBOL_GPL(kvm_set_cr4); 366EXPORT_SYMBOL_GPL(kvm_set_cr4);
@@ -449,7 +455,7 @@ static u32 msrs_to_save[] = {
449 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, 455 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
450#endif 456#endif
451 MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 457 MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
452 MSR_IA32_PERF_STATUS, 458 MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT
453}; 459};
454 460
455static unsigned num_msrs_to_save; 461static unsigned num_msrs_to_save;
@@ -648,10 +654,38 @@ static bool msr_mtrr_valid(unsigned msr)
648 654
649static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 655static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
650{ 656{
657 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
658
651 if (!msr_mtrr_valid(msr)) 659 if (!msr_mtrr_valid(msr))
652 return 1; 660 return 1;
653 661
654 vcpu->arch.mtrr[msr - 0x200] = data; 662 if (msr == MSR_MTRRdefType) {
663 vcpu->arch.mtrr_state.def_type = data;
664 vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
665 } else if (msr == MSR_MTRRfix64K_00000)
666 p[0] = data;
667 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
668 p[1 + msr - MSR_MTRRfix16K_80000] = data;
669 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
670 p[3 + msr - MSR_MTRRfix4K_C0000] = data;
671 else if (msr == MSR_IA32_CR_PAT)
672 vcpu->arch.pat = data;
673 else { /* Variable MTRRs */
674 int idx, is_mtrr_mask;
675 u64 *pt;
676
677 idx = (msr - 0x200) / 2;
678 is_mtrr_mask = msr - 0x200 - 2 * idx;
679 if (!is_mtrr_mask)
680 pt =
681 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
682 else
683 pt =
684 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
685 *pt = data;
686 }
687
688 kvm_mmu_reset_context(vcpu);
655 return 0; 689 return 0;
656} 690}
657 691
@@ -747,10 +781,37 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
747 781
748static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 782static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
749{ 783{
784 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
785
750 if (!msr_mtrr_valid(msr)) 786 if (!msr_mtrr_valid(msr))
751 return 1; 787 return 1;
752 788
753 *pdata = vcpu->arch.mtrr[msr - 0x200]; 789 if (msr == MSR_MTRRdefType)
790 *pdata = vcpu->arch.mtrr_state.def_type +
791 (vcpu->arch.mtrr_state.enabled << 10);
792 else if (msr == MSR_MTRRfix64K_00000)
793 *pdata = p[0];
794 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
795 *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
796 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
797 *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
798 else if (msr == MSR_IA32_CR_PAT)
799 *pdata = vcpu->arch.pat;
800 else { /* Variable MTRRs */
801 int idx, is_mtrr_mask;
802 u64 *pt;
803
804 idx = (msr - 0x200) / 2;
805 is_mtrr_mask = msr - 0x200 - 2 * idx;
806 if (!is_mtrr_mask)
807 pt =
808 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
809 else
810 pt =
811 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
812 *pdata = *pt;
813 }
814
754 return 0; 815 return 0;
755} 816}
756 817
@@ -903,7 +964,6 @@ int kvm_dev_ioctl_check_extension(long ext)
903 case KVM_CAP_IRQCHIP: 964 case KVM_CAP_IRQCHIP:
904 case KVM_CAP_HLT: 965 case KVM_CAP_HLT:
905 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 966 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
906 case KVM_CAP_USER_MEMORY:
907 case KVM_CAP_SET_TSS_ADDR: 967 case KVM_CAP_SET_TSS_ADDR:
908 case KVM_CAP_EXT_CPUID: 968 case KVM_CAP_EXT_CPUID:
909 case KVM_CAP_CLOCKSOURCE: 969 case KVM_CAP_CLOCKSOURCE:
@@ -1188,6 +1248,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1188 int t, times = entry->eax & 0xff; 1248 int t, times = entry->eax & 0xff;
1189 1249
1190 entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; 1250 entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
1251 entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
1191 for (t = 1; t < times && *nent < maxnent; ++t) { 1252 for (t = 1; t < times && *nent < maxnent; ++t) {
1192 do_cpuid_1_ent(&entry[t], function, 0); 1253 do_cpuid_1_ent(&entry[t], function, 0);
1193 entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; 1254 entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
@@ -1218,7 +1279,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1218 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1279 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1219 /* read more entries until level_type is zero */ 1280 /* read more entries until level_type is zero */
1220 for (i = 1; *nent < maxnent; ++i) { 1281 for (i = 1; *nent < maxnent; ++i) {
1221 level_type = entry[i - 1].ecx & 0xff; 1282 level_type = entry[i - 1].ecx & 0xff00;
1222 if (!level_type) 1283 if (!level_type)
1223 break; 1284 break;
1224 do_cpuid_1_ent(&entry[i], function, i); 1285 do_cpuid_1_ent(&entry[i], function, i);
@@ -1318,6 +1379,15 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
1318 return 0; 1379 return 0;
1319} 1380}
1320 1381
1382static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
1383{
1384 vcpu_load(vcpu);
1385 kvm_inject_nmi(vcpu);
1386 vcpu_put(vcpu);
1387
1388 return 0;
1389}
1390
1321static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, 1391static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
1322 struct kvm_tpr_access_ctl *tac) 1392 struct kvm_tpr_access_ctl *tac)
1323{ 1393{
@@ -1377,6 +1447,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1377 r = 0; 1447 r = 0;
1378 break; 1448 break;
1379 } 1449 }
1450 case KVM_NMI: {
1451 r = kvm_vcpu_ioctl_nmi(vcpu);
1452 if (r)
1453 goto out;
1454 r = 0;
1455 break;
1456 }
1380 case KVM_SET_CPUID: { 1457 case KVM_SET_CPUID: {
1381 struct kvm_cpuid __user *cpuid_arg = argp; 1458 struct kvm_cpuid __user *cpuid_arg = argp;
1382 struct kvm_cpuid cpuid; 1459 struct kvm_cpuid cpuid;
@@ -1968,7 +2045,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
1968 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); 2045 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
1969 if (ret < 0) 2046 if (ret < 0)
1970 return 0; 2047 return 0;
1971 kvm_mmu_pte_write(vcpu, gpa, val, bytes); 2048 kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
1972 return 1; 2049 return 1;
1973} 2050}
1974 2051
@@ -2404,8 +2481,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2404 val = kvm_register_read(vcpu, VCPU_REGS_RAX); 2481 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
2405 memcpy(vcpu->arch.pio_data, &val, 4); 2482 memcpy(vcpu->arch.pio_data, &val, 4);
2406 2483
2407 kvm_x86_ops->skip_emulated_instruction(vcpu);
2408
2409 pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); 2484 pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
2410 if (pio_dev) { 2485 if (pio_dev) {
2411 kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); 2486 kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
@@ -2541,7 +2616,7 @@ int kvm_arch_init(void *opaque)
2541 kvm_mmu_set_nonpresent_ptes(0ull, 0ull); 2616 kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
2542 kvm_mmu_set_base_ptes(PT_PRESENT_MASK); 2617 kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
2543 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, 2618 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
2544 PT_DIRTY_MASK, PT64_NX_MASK, 0); 2619 PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
2545 return 0; 2620 return 0;
2546 2621
2547out: 2622out:
@@ -2729,7 +2804,7 @@ static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
2729 2804
2730 e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; 2805 e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
2731 /* when no next entry is found, the current entry[i] is reselected */ 2806 /* when no next entry is found, the current entry[i] is reselected */
2732 for (j = i + 1; j == i; j = (j + 1) % nent) { 2807 for (j = i + 1; ; j = (j + 1) % nent) {
2733 struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; 2808 struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
2734 if (ej->function == e->function) { 2809 if (ej->function == e->function) {
2735 ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; 2810 ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
@@ -2973,7 +3048,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2973 pr_debug("vcpu %d received sipi with vector # %x\n", 3048 pr_debug("vcpu %d received sipi with vector # %x\n",
2974 vcpu->vcpu_id, vcpu->arch.sipi_vector); 3049 vcpu->vcpu_id, vcpu->arch.sipi_vector);
2975 kvm_lapic_reset(vcpu); 3050 kvm_lapic_reset(vcpu);
2976 r = kvm_x86_ops->vcpu_reset(vcpu); 3051 r = kvm_arch_vcpu_reset(vcpu);
2977 if (r) 3052 if (r)
2978 return r; 3053 return r;
2979 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 3054 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -3275,9 +3350,9 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
3275 kvm_desct->padding = 0; 3350 kvm_desct->padding = 0;
3276} 3351}
3277 3352
3278static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu, 3353static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
3279 u16 selector, 3354 u16 selector,
3280 struct descriptor_table *dtable) 3355 struct descriptor_table *dtable)
3281{ 3356{
3282 if (selector & 1 << 2) { 3357 if (selector & 1 << 2) {
3283 struct kvm_segment kvm_seg; 3358 struct kvm_segment kvm_seg;
@@ -3302,7 +3377,7 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3302 struct descriptor_table dtable; 3377 struct descriptor_table dtable;
3303 u16 index = selector >> 3; 3378 u16 index = selector >> 3;
3304 3379
3305 get_segment_descritptor_dtable(vcpu, selector, &dtable); 3380 get_segment_descriptor_dtable(vcpu, selector, &dtable);
3306 3381
3307 if (dtable.limit < index * 8 + 7) { 3382 if (dtable.limit < index * 8 + 7) {
3308 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); 3383 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
@@ -3321,7 +3396,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3321 struct descriptor_table dtable; 3396 struct descriptor_table dtable;
3322 u16 index = selector >> 3; 3397 u16 index = selector >> 3;
3323 3398
3324 get_segment_descritptor_dtable(vcpu, selector, &dtable); 3399 get_segment_descriptor_dtable(vcpu, selector, &dtable);
3325 3400
3326 if (dtable.limit < index * 8 + 7) 3401 if (dtable.limit < index * 8 + 7)
3327 return 1; 3402 return 1;
@@ -3900,6 +3975,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
3900 /* We do fxsave: this must be aligned. */ 3975 /* We do fxsave: this must be aligned. */
3901 BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF); 3976 BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
3902 3977
3978 vcpu->arch.mtrr_state.have_fixed = 1;
3903 vcpu_load(vcpu); 3979 vcpu_load(vcpu);
3904 r = kvm_arch_vcpu_reset(vcpu); 3980 r = kvm_arch_vcpu_reset(vcpu);
3905 if (r == 0) 3981 if (r == 0)
@@ -3925,6 +4001,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
3925 4001
3926int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) 4002int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
3927{ 4003{
4004 vcpu->arch.nmi_pending = false;
4005 vcpu->arch.nmi_injected = false;
4006
3928 return kvm_x86_ops->vcpu_reset(vcpu); 4007 return kvm_x86_ops->vcpu_reset(vcpu);
3929} 4008}
3930 4009
@@ -4012,6 +4091,7 @@ struct kvm *kvm_arch_create_vm(void)
4012 return ERR_PTR(-ENOMEM); 4091 return ERR_PTR(-ENOMEM);
4013 4092
4014 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 4093 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
4094 INIT_LIST_HEAD(&kvm->arch.oos_global_pages);
4015 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 4095 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
4016 4096
4017 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 4097 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
@@ -4048,8 +4128,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
4048 4128
4049void kvm_arch_destroy_vm(struct kvm *kvm) 4129void kvm_arch_destroy_vm(struct kvm *kvm)
4050{ 4130{
4051 kvm_iommu_unmap_guest(kvm);
4052 kvm_free_all_assigned_devices(kvm); 4131 kvm_free_all_assigned_devices(kvm);
4132 kvm_iommu_unmap_guest(kvm);
4053 kvm_free_pit(kvm); 4133 kvm_free_pit(kvm);
4054 kfree(kvm->arch.vpic); 4134 kfree(kvm->arch.vpic);
4055 kfree(kvm->arch.vioapic); 4135 kfree(kvm->arch.vioapic);
@@ -4127,7 +4207,8 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
4127int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 4207int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
4128{ 4208{
4129 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE 4209 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
4130 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED; 4210 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
4211 || vcpu->arch.nmi_pending;
4131} 4212}
4132 4213
4133static void vcpu_kick_intr(void *info) 4214static void vcpu_kick_intr(void *info)
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index ea051173b0da..d174db7a3370 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -58,6 +58,7 @@
58#define SrcMem32 (4<<4) /* Memory operand (32-bit). */ 58#define SrcMem32 (4<<4) /* Memory operand (32-bit). */
59#define SrcImm (5<<4) /* Immediate operand. */ 59#define SrcImm (5<<4) /* Immediate operand. */
60#define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ 60#define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */
61#define SrcOne (7<<4) /* Implied '1' */
61#define SrcMask (7<<4) 62#define SrcMask (7<<4)
62/* Generic ModRM decode. */ 63/* Generic ModRM decode. */
63#define ModRM (1<<7) 64#define ModRM (1<<7)
@@ -70,17 +71,23 @@
70#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ 71#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
71#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ 72#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
72#define GroupMask 0xff /* Group number stored in bits 0:7 */ 73#define GroupMask 0xff /* Group number stored in bits 0:7 */
74/* Source 2 operand type */
75#define Src2None (0<<29)
76#define Src2CL (1<<29)
77#define Src2ImmByte (2<<29)
78#define Src2One (3<<29)
79#define Src2Mask (7<<29)
73 80
74enum { 81enum {
75 Group1_80, Group1_81, Group1_82, Group1_83, 82 Group1_80, Group1_81, Group1_82, Group1_83,
76 Group1A, Group3_Byte, Group3, Group4, Group5, Group7, 83 Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
77}; 84};
78 85
79static u16 opcode_table[256] = { 86static u32 opcode_table[256] = {
80 /* 0x00 - 0x07 */ 87 /* 0x00 - 0x07 */
81 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 88 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
82 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 89 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
83 0, 0, 0, 0, 90 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
84 /* 0x08 - 0x0F */ 91 /* 0x08 - 0x0F */
85 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 92 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
86 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 93 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
@@ -195,7 +202,7 @@ static u16 opcode_table[256] = {
195 ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, 202 ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
196}; 203};
197 204
198static u16 twobyte_table[256] = { 205static u32 twobyte_table[256] = {
199 /* 0x00 - 0x0F */ 206 /* 0x00 - 0x0F */
200 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0, 207 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0,
201 ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 208 ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
@@ -230,9 +237,14 @@ static u16 twobyte_table[256] = {
230 /* 0x90 - 0x9F */ 237 /* 0x90 - 0x9F */
231 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 238 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
232 /* 0xA0 - 0xA7 */ 239 /* 0xA0 - 0xA7 */
233 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, 240 0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
241 DstMem | SrcReg | Src2ImmByte | ModRM,
242 DstMem | SrcReg | Src2CL | ModRM, 0, 0,
234 /* 0xA8 - 0xAF */ 243 /* 0xA8 - 0xAF */
235 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0, 244 0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
245 DstMem | SrcReg | Src2ImmByte | ModRM,
246 DstMem | SrcReg | Src2CL | ModRM,
247 ModRM, 0,
236 /* 0xB0 - 0xB7 */ 248 /* 0xB0 - 0xB7 */
237 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, 249 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
238 DstMem | SrcReg | ModRM | BitOp, 250 DstMem | SrcReg | ModRM | BitOp,
@@ -253,7 +265,7 @@ static u16 twobyte_table[256] = {
253 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 265 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
254}; 266};
255 267
256static u16 group_table[] = { 268static u32 group_table[] = {
257 [Group1_80*8] = 269 [Group1_80*8] =
258 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 270 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
259 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 271 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
@@ -297,9 +309,9 @@ static u16 group_table[] = {
297 SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp, 309 SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
298}; 310};
299 311
300static u16 group2_table[] = { 312static u32 group2_table[] = {
301 [Group7*8] = 313 [Group7*8] =
302 SrcNone | ModRM, 0, 0, 0, 314 SrcNone | ModRM, 0, 0, SrcNone | ModRM,
303 SrcNone | ModRM | DstMem | Mov, 0, 315 SrcNone | ModRM | DstMem | Mov, 0,
304 SrcMem16 | ModRM | Mov, 0, 316 SrcMem16 | ModRM | Mov, 0,
305}; 317};
@@ -359,49 +371,48 @@ static u16 group2_table[] = {
359 "andl %"_msk",%"_LO32 _tmp"; " \ 371 "andl %"_msk",%"_LO32 _tmp"; " \
360 "orl %"_LO32 _tmp",%"_sav"; " 372 "orl %"_LO32 _tmp",%"_sav"; "
361 373
374#ifdef CONFIG_X86_64
375#define ON64(x) x
376#else
377#define ON64(x)
378#endif
379
380#define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \
381 do { \
382 __asm__ __volatile__ ( \
383 _PRE_EFLAGS("0", "4", "2") \
384 _op _suffix " %"_x"3,%1; " \
385 _POST_EFLAGS("0", "4", "2") \
386 : "=m" (_eflags), "=m" ((_dst).val), \
387 "=&r" (_tmp) \
388 : _y ((_src).val), "i" (EFLAGS_MASK)); \
389 } while (0)
390
391
362/* Raw emulation: instruction has two explicit operands. */ 392/* Raw emulation: instruction has two explicit operands. */
363#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \ 393#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
364 do { \ 394 do { \
365 unsigned long _tmp; \ 395 unsigned long _tmp; \
366 \ 396 \
367 switch ((_dst).bytes) { \ 397 switch ((_dst).bytes) { \
368 case 2: \ 398 case 2: \
369 __asm__ __volatile__ ( \ 399 ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
370 _PRE_EFLAGS("0", "4", "2") \ 400 break; \
371 _op"w %"_wx"3,%1; " \ 401 case 4: \
372 _POST_EFLAGS("0", "4", "2") \ 402 ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
373 : "=m" (_eflags), "=m" ((_dst).val), \ 403 break; \
374 "=&r" (_tmp) \ 404 case 8: \
375 : _wy ((_src).val), "i" (EFLAGS_MASK)); \ 405 ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
376 break; \ 406 break; \
377 case 4: \ 407 } \
378 __asm__ __volatile__ ( \
379 _PRE_EFLAGS("0", "4", "2") \
380 _op"l %"_lx"3,%1; " \
381 _POST_EFLAGS("0", "4", "2") \
382 : "=m" (_eflags), "=m" ((_dst).val), \
383 "=&r" (_tmp) \
384 : _ly ((_src).val), "i" (EFLAGS_MASK)); \
385 break; \
386 case 8: \
387 __emulate_2op_8byte(_op, _src, _dst, \
388 _eflags, _qx, _qy); \
389 break; \
390 } \
391 } while (0) 408 } while (0)
392 409
393#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ 410#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
394 do { \ 411 do { \
395 unsigned long __tmp; \ 412 unsigned long _tmp; \
396 switch ((_dst).bytes) { \ 413 switch ((_dst).bytes) { \
397 case 1: \ 414 case 1: \
398 __asm__ __volatile__ ( \ 415 ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \
399 _PRE_EFLAGS("0", "4", "2") \
400 _op"b %"_bx"3,%1; " \
401 _POST_EFLAGS("0", "4", "2") \
402 : "=m" (_eflags), "=m" ((_dst).val), \
403 "=&r" (__tmp) \
404 : _by ((_src).val), "i" (EFLAGS_MASK)); \
405 break; \ 416 break; \
406 default: \ 417 default: \
407 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ 418 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
@@ -425,71 +436,68 @@ static u16 group2_table[] = {
425 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ 436 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
426 "w", "r", _LO32, "r", "", "r") 437 "w", "r", _LO32, "r", "", "r")
427 438
428/* Instruction has only one explicit operand (no source operand). */ 439/* Instruction has three operands and one operand is stored in ECX register */
429#define emulate_1op(_op, _dst, _eflags) \ 440#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \
430 do { \ 441 do { \
431 unsigned long _tmp; \ 442 unsigned long _tmp; \
432 \ 443 _type _clv = (_cl).val; \
433 switch ((_dst).bytes) { \ 444 _type _srcv = (_src).val; \
434 case 1: \ 445 _type _dstv = (_dst).val; \
435 __asm__ __volatile__ ( \ 446 \
436 _PRE_EFLAGS("0", "3", "2") \ 447 __asm__ __volatile__ ( \
437 _op"b %1; " \ 448 _PRE_EFLAGS("0", "5", "2") \
438 _POST_EFLAGS("0", "3", "2") \ 449 _op _suffix " %4,%1 \n" \
439 : "=m" (_eflags), "=m" ((_dst).val), \ 450 _POST_EFLAGS("0", "5", "2") \
440 "=&r" (_tmp) \ 451 : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \
441 : "i" (EFLAGS_MASK)); \ 452 : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \
442 break; \ 453 ); \
443 case 2: \ 454 \
444 __asm__ __volatile__ ( \ 455 (_cl).val = (unsigned long) _clv; \
445 _PRE_EFLAGS("0", "3", "2") \ 456 (_src).val = (unsigned long) _srcv; \
446 _op"w %1; " \ 457 (_dst).val = (unsigned long) _dstv; \
447 _POST_EFLAGS("0", "3", "2") \
448 : "=m" (_eflags), "=m" ((_dst).val), \
449 "=&r" (_tmp) \
450 : "i" (EFLAGS_MASK)); \
451 break; \
452 case 4: \
453 __asm__ __volatile__ ( \
454 _PRE_EFLAGS("0", "3", "2") \
455 _op"l %1; " \
456 _POST_EFLAGS("0", "3", "2") \
457 : "=m" (_eflags), "=m" ((_dst).val), \
458 "=&r" (_tmp) \
459 : "i" (EFLAGS_MASK)); \
460 break; \
461 case 8: \
462 __emulate_1op_8byte(_op, _dst, _eflags); \
463 break; \
464 } \
465 } while (0) 458 } while (0)
466 459
467/* Emulate an instruction with quadword operands (x86/64 only). */ 460#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \
468#if defined(CONFIG_X86_64) 461 do { \
469#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) \ 462 switch ((_dst).bytes) { \
470 do { \ 463 case 2: \
471 __asm__ __volatile__ ( \ 464 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
472 _PRE_EFLAGS("0", "4", "2") \ 465 "w", unsigned short); \
473 _op"q %"_qx"3,%1; " \ 466 break; \
474 _POST_EFLAGS("0", "4", "2") \ 467 case 4: \
475 : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ 468 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
476 : _qy ((_src).val), "i" (EFLAGS_MASK)); \ 469 "l", unsigned int); \
470 break; \
471 case 8: \
472 ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
473 "q", unsigned long)); \
474 break; \
475 } \
477 } while (0) 476 } while (0)
478 477
479#define __emulate_1op_8byte(_op, _dst, _eflags) \ 478#define __emulate_1op(_op, _dst, _eflags, _suffix) \
480 do { \ 479 do { \
481 __asm__ __volatile__ ( \ 480 unsigned long _tmp; \
482 _PRE_EFLAGS("0", "3", "2") \ 481 \
483 _op"q %1; " \ 482 __asm__ __volatile__ ( \
484 _POST_EFLAGS("0", "3", "2") \ 483 _PRE_EFLAGS("0", "3", "2") \
485 : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ 484 _op _suffix " %1; " \
486 : "i" (EFLAGS_MASK)); \ 485 _POST_EFLAGS("0", "3", "2") \
486 : "=m" (_eflags), "+m" ((_dst).val), \
487 "=&r" (_tmp) \
488 : "i" (EFLAGS_MASK)); \
487 } while (0) 489 } while (0)
488 490
489#elif defined(__i386__) 491/* Instruction has only one explicit operand (no source operand). */
490#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) 492#define emulate_1op(_op, _dst, _eflags) \
491#define __emulate_1op_8byte(_op, _dst, _eflags) 493 do { \
492#endif /* __i386__ */ 494 switch ((_dst).bytes) { \
495 case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \
496 case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \
497 case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \
498 case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
499 } \
500 } while (0)
493 501
494/* Fetch next part of the instruction being emulated. */ 502/* Fetch next part of the instruction being emulated. */
495#define insn_fetch(_type, _size, _eip) \ 503#define insn_fetch(_type, _size, _eip) \
@@ -1041,6 +1049,33 @@ done_prefixes:
1041 c->src.bytes = 1; 1049 c->src.bytes = 1;
1042 c->src.val = insn_fetch(s8, 1, c->eip); 1050 c->src.val = insn_fetch(s8, 1, c->eip);
1043 break; 1051 break;
1052 case SrcOne:
1053 c->src.bytes = 1;
1054 c->src.val = 1;
1055 break;
1056 }
1057
1058 /*
1059 * Decode and fetch the second source operand: register, memory
1060 * or immediate.
1061 */
1062 switch (c->d & Src2Mask) {
1063 case Src2None:
1064 break;
1065 case Src2CL:
1066 c->src2.bytes = 1;
1067 c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
1068 break;
1069 case Src2ImmByte:
1070 c->src2.type = OP_IMM;
1071 c->src2.ptr = (unsigned long *)c->eip;
1072 c->src2.bytes = 1;
1073 c->src2.val = insn_fetch(u8, 1, c->eip);
1074 break;
1075 case Src2One:
1076 c->src2.bytes = 1;
1077 c->src2.val = 1;
1078 break;
1044 } 1079 }
1045 1080
1046 /* Decode and fetch the destination operand: register or memory. */ 1081 /* Decode and fetch the destination operand: register or memory. */
@@ -1100,20 +1135,33 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
1100 c->regs[VCPU_REGS_RSP]); 1135 c->regs[VCPU_REGS_RSP]);
1101} 1136}
1102 1137
1103static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, 1138static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1104 struct x86_emulate_ops *ops) 1139 struct x86_emulate_ops *ops)
1105{ 1140{
1106 struct decode_cache *c = &ctxt->decode; 1141 struct decode_cache *c = &ctxt->decode;
1107 int rc; 1142 int rc;
1108 1143
1109 rc = ops->read_std(register_address(c, ss_base(ctxt), 1144 rc = ops->read_emulated(register_address(c, ss_base(ctxt),
1110 c->regs[VCPU_REGS_RSP]), 1145 c->regs[VCPU_REGS_RSP]),
1111 &c->dst.val, c->dst.bytes, ctxt->vcpu); 1146 &c->src.val, c->src.bytes, ctxt->vcpu);
1112 if (rc != 0) 1147 if (rc != 0)
1113 return rc; 1148 return rc;
1114 1149
1115 register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes); 1150 register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes);
1151 return rc;
1152}
1153
1154static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1155 struct x86_emulate_ops *ops)
1156{
1157 struct decode_cache *c = &ctxt->decode;
1158 int rc;
1116 1159
1160 c->src.bytes = c->dst.bytes;
1161 rc = emulate_pop(ctxt, ops);
1162 if (rc != 0)
1163 return rc;
1164 c->dst.val = c->src.val;
1117 return 0; 1165 return 0;
1118} 1166}
1119 1167
@@ -1415,24 +1463,15 @@ special_insn:
1415 emulate_1op("dec", c->dst, ctxt->eflags); 1463 emulate_1op("dec", c->dst, ctxt->eflags);
1416 break; 1464 break;
1417 case 0x50 ... 0x57: /* push reg */ 1465 case 0x50 ... 0x57: /* push reg */
1418 c->dst.type = OP_MEM; 1466 emulate_push(ctxt);
1419 c->dst.bytes = c->op_bytes;
1420 c->dst.val = c->src.val;
1421 register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1422 -c->op_bytes);
1423 c->dst.ptr = (void *) register_address(
1424 c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]);
1425 break; 1467 break;
1426 case 0x58 ... 0x5f: /* pop reg */ 1468 case 0x58 ... 0x5f: /* pop reg */
1427 pop_instruction: 1469 pop_instruction:
1428 if ((rc = ops->read_std(register_address(c, ss_base(ctxt), 1470 c->src.bytes = c->op_bytes;
1429 c->regs[VCPU_REGS_RSP]), c->dst.ptr, 1471 rc = emulate_pop(ctxt, ops);
1430 c->op_bytes, ctxt->vcpu)) != 0) 1472 if (rc != 0)
1431 goto done; 1473 goto done;
1432 1474 c->dst.val = c->src.val;
1433 register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1434 c->op_bytes);
1435 c->dst.type = OP_NONE; /* Disable writeback. */
1436 break; 1475 break;
1437 case 0x63: /* movsxd */ 1476 case 0x63: /* movsxd */
1438 if (ctxt->mode != X86EMUL_MODE_PROT64) 1477 if (ctxt->mode != X86EMUL_MODE_PROT64)
@@ -1591,7 +1630,9 @@ special_insn:
1591 emulate_push(ctxt); 1630 emulate_push(ctxt);
1592 break; 1631 break;
1593 case 0x9d: /* popf */ 1632 case 0x9d: /* popf */
1633 c->dst.type = OP_REG;
1594 c->dst.ptr = (unsigned long *) &ctxt->eflags; 1634 c->dst.ptr = (unsigned long *) &ctxt->eflags;
1635 c->dst.bytes = c->op_bytes;
1595 goto pop_instruction; 1636 goto pop_instruction;
1596 case 0xa0 ... 0xa1: /* mov */ 1637 case 0xa0 ... 0xa1: /* mov */
1597 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; 1638 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
@@ -1689,7 +1730,9 @@ special_insn:
1689 emulate_grp2(ctxt); 1730 emulate_grp2(ctxt);
1690 break; 1731 break;
1691 case 0xc3: /* ret */ 1732 case 0xc3: /* ret */
1733 c->dst.type = OP_REG;
1692 c->dst.ptr = &c->eip; 1734 c->dst.ptr = &c->eip;
1735 c->dst.bytes = c->op_bytes;
1693 goto pop_instruction; 1736 goto pop_instruction;
1694 case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ 1737 case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
1695 mov: 1738 mov:
@@ -1778,7 +1821,7 @@ special_insn:
1778 c->eip = saved_eip; 1821 c->eip = saved_eip;
1779 goto cannot_emulate; 1822 goto cannot_emulate;
1780 } 1823 }
1781 return 0; 1824 break;
1782 case 0xf4: /* hlt */ 1825 case 0xf4: /* hlt */
1783 ctxt->vcpu->arch.halt_request = 1; 1826 ctxt->vcpu->arch.halt_request = 1;
1784 break; 1827 break;
@@ -1999,12 +2042,20 @@ twobyte_insn:
1999 c->src.val &= (c->dst.bytes << 3) - 1; 2042 c->src.val &= (c->dst.bytes << 3) - 1;
2000 emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags); 2043 emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
2001 break; 2044 break;
2045 case 0xa4: /* shld imm8, r, r/m */
2046 case 0xa5: /* shld cl, r, r/m */
2047 emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
2048 break;
2002 case 0xab: 2049 case 0xab:
2003 bts: /* bts */ 2050 bts: /* bts */
2004 /* only subword offset */ 2051 /* only subword offset */
2005 c->src.val &= (c->dst.bytes << 3) - 1; 2052 c->src.val &= (c->dst.bytes << 3) - 1;
2006 emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags); 2053 emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
2007 break; 2054 break;
2055 case 0xac: /* shrd imm8, r, r/m */
2056 case 0xad: /* shrd cl, r, r/m */
2057 emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
2058 break;
2008 case 0xae: /* clflush */ 2059 case 0xae: /* clflush */
2009 break; 2060 break;
2010 case 0xb0 ... 0xb1: /* cmpxchg */ 2061 case 0xb0 ... 0xb1: /* cmpxchg */
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index c16d9be1b017..3bbdb9d02376 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -79,9 +79,12 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
79 if (IS_ERR(anon_inode_inode)) 79 if (IS_ERR(anon_inode_inode))
80 return -ENODEV; 80 return -ENODEV;
81 81
82 if (fops->owner && !try_module_get(fops->owner))
83 return -ENOENT;
84
82 error = get_unused_fd_flags(flags); 85 error = get_unused_fd_flags(flags);
83 if (error < 0) 86 if (error < 0)
84 return error; 87 goto err_module;
85 fd = error; 88 fd = error;
86 89
87 /* 90 /*
@@ -128,6 +131,8 @@ err_dput:
128 dput(dentry); 131 dput(dentry);
129err_put_unused_fd: 132err_put_unused_fd:
130 put_unused_fd(fd); 133 put_unused_fd(fd);
134err_module:
135 module_put(fops->owner);
131 return error; 136 return error;
132} 137}
133EXPORT_SYMBOL_GPL(anon_inode_getfd); 138EXPORT_SYMBOL_GPL(anon_inode_getfd);
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index f18b86fa8655..35525ac63337 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -83,6 +83,7 @@ struct kvm_irqchip {
83#define KVM_EXIT_S390_SIEIC 13 83#define KVM_EXIT_S390_SIEIC 13
84#define KVM_EXIT_S390_RESET 14 84#define KVM_EXIT_S390_RESET 14
85#define KVM_EXIT_DCR 15 85#define KVM_EXIT_DCR 15
86#define KVM_EXIT_NMI 16
86 87
87/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ 88/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
88struct kvm_run { 89struct kvm_run {
@@ -387,6 +388,14 @@ struct kvm_trace_rec {
387#define KVM_CAP_DEVICE_ASSIGNMENT 17 388#define KVM_CAP_DEVICE_ASSIGNMENT 17
388#endif 389#endif
389#define KVM_CAP_IOMMU 18 390#define KVM_CAP_IOMMU 18
391#if defined(CONFIG_X86)
392#define KVM_CAP_DEVICE_MSI 20
393#endif
394/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
395#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
396#if defined(CONFIG_X86)
397#define KVM_CAP_USER_NMI 22
398#endif
390 399
391/* 400/*
392 * ioctls for VM fds 401 * ioctls for VM fds
@@ -458,6 +467,8 @@ struct kvm_trace_rec {
458#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) 467#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97)
459#define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) 468#define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state)
460#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) 469#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state)
470/* Available with KVM_CAP_NMI */
471#define KVM_NMI _IO(KVMIO, 0x9a)
461 472
462#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) 473#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02)
463#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) 474#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03)
@@ -500,10 +511,17 @@ struct kvm_assigned_irq {
500 __u32 guest_irq; 511 __u32 guest_irq;
501 __u32 flags; 512 __u32 flags;
502 union { 513 union {
514 struct {
515 __u32 addr_lo;
516 __u32 addr_hi;
517 __u32 data;
518 } guest_msi;
503 __u32 reserved[12]; 519 __u32 reserved[12];
504 }; 520 };
505}; 521};
506 522
507#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) 523#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
508 524
525#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0)
526
509#endif 527#endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bb92be2153bc..eafabd5c66b2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -16,6 +16,7 @@
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/preempt.h> 17#include <linux/preempt.h>
18#include <linux/marker.h> 18#include <linux/marker.h>
19#include <linux/msi.h>
19#include <asm/signal.h> 20#include <asm/signal.h>
20 21
21#include <linux/kvm.h> 22#include <linux/kvm.h>
@@ -306,8 +307,14 @@ struct kvm_assigned_dev_kernel {
306 int host_busnr; 307 int host_busnr;
307 int host_devfn; 308 int host_devfn;
308 int host_irq; 309 int host_irq;
310 bool host_irq_disabled;
309 int guest_irq; 311 int guest_irq;
310 int irq_requested; 312 struct msi_msg guest_msi;
313#define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0)
314#define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1)
315#define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8)
316#define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9)
317 unsigned long irq_requested_type;
311 int irq_source_id; 318 int irq_source_id;
312 struct pci_dev *dev; 319 struct pci_dev *dev;
313 struct kvm *kvm; 320 struct kvm *kvm;
@@ -316,8 +323,7 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
316void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); 323void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
317void kvm_register_irq_ack_notifier(struct kvm *kvm, 324void kvm_register_irq_ack_notifier(struct kvm *kvm,
318 struct kvm_irq_ack_notifier *kian); 325 struct kvm_irq_ack_notifier *kian);
319void kvm_unregister_irq_ack_notifier(struct kvm *kvm, 326void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
320 struct kvm_irq_ack_notifier *kian);
321int kvm_request_irq_source_id(struct kvm *kvm); 327int kvm_request_irq_source_id(struct kvm *kvm);
322void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); 328void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
323 329
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 53772bb46320..23b81cf242af 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -150,10 +150,11 @@ static int ioapic_inj_irq(struct kvm_ioapic *ioapic,
150static void ioapic_inj_nmi(struct kvm_vcpu *vcpu) 150static void ioapic_inj_nmi(struct kvm_vcpu *vcpu)
151{ 151{
152 kvm_inject_nmi(vcpu); 152 kvm_inject_nmi(vcpu);
153 kvm_vcpu_kick(vcpu);
153} 154}
154 155
155static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, 156u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
156 u8 dest_mode) 157 u8 dest_mode)
157{ 158{
158 u32 mask = 0; 159 u32 mask = 0;
159 int i; 160 int i;
@@ -207,7 +208,8 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
207 "vector=%x trig_mode=%x\n", 208 "vector=%x trig_mode=%x\n",
208 dest, dest_mode, delivery_mode, vector, trig_mode); 209 dest, dest_mode, delivery_mode, vector, trig_mode);
209 210
210 deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode); 211 deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, dest,
212 dest_mode);
211 if (!deliver_bitmask) { 213 if (!deliver_bitmask) {
212 ioapic_debug("no target on destination\n"); 214 ioapic_debug("no target on destination\n");
213 return 0; 215 return 0;
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index cd7ae7691c9d..49c9581d2586 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -85,5 +85,7 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
85int kvm_ioapic_init(struct kvm *kvm); 85int kvm_ioapic_init(struct kvm *kvm);
86void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); 86void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
87void kvm_ioapic_reset(struct kvm_ioapic *ioapic); 87void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
88u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
89 u8 dest_mode);
88 90
89#endif 91#endif
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 55ad76ee2d09..aa5d1e5c497e 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -61,10 +61,9 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
61 hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list); 61 hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
62} 62}
63 63
64void kvm_unregister_irq_ack_notifier(struct kvm *kvm, 64void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian)
65 struct kvm_irq_ack_notifier *kian)
66{ 65{
67 hlist_del(&kian->link); 66 hlist_del_init(&kian->link);
68} 67}
69 68
70/* The caller must hold kvm->lock mutex */ 69/* The caller must hold kvm->lock mutex */
@@ -73,11 +72,15 @@ int kvm_request_irq_source_id(struct kvm *kvm)
73 unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; 72 unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
74 int irq_source_id = find_first_zero_bit(bitmap, 73 int irq_source_id = find_first_zero_bit(bitmap,
75 sizeof(kvm->arch.irq_sources_bitmap)); 74 sizeof(kvm->arch.irq_sources_bitmap));
75
76 if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { 76 if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
77 printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); 77 printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n");
78 irq_source_id = -EFAULT; 78 return -EFAULT;
79 } else 79 }
80 set_bit(irq_source_id, bitmap); 80
81 ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
82 set_bit(irq_source_id, bitmap);
83
81 return irq_source_id; 84 return irq_source_id;
82} 85}
83 86
@@ -85,7 +88,9 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
85{ 88{
86 int i; 89 int i;
87 90
88 if (irq_source_id <= 0 || 91 ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
92
93 if (irq_source_id < 0 ||
89 irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { 94 irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
90 printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); 95 printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
91 return; 96 return;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a87f45edfae8..fc6127cbea1f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -47,6 +47,10 @@
47#include <asm/uaccess.h> 47#include <asm/uaccess.h>
48#include <asm/pgtable.h> 48#include <asm/pgtable.h>
49 49
50#ifdef CONFIG_X86
51#include <asm/msidef.h>
52#endif
53
50#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 54#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
51#include "coalesced_mmio.h" 55#include "coalesced_mmio.h"
52#endif 56#endif
@@ -60,10 +64,13 @@
60MODULE_AUTHOR("Qumranet"); 64MODULE_AUTHOR("Qumranet");
61MODULE_LICENSE("GPL"); 65MODULE_LICENSE("GPL");
62 66
67static int msi2intx = 1;
68module_param(msi2intx, bool, 0);
69
63DEFINE_SPINLOCK(kvm_lock); 70DEFINE_SPINLOCK(kvm_lock);
64LIST_HEAD(vm_list); 71LIST_HEAD(vm_list);
65 72
66static cpumask_t cpus_hardware_enabled; 73static cpumask_var_t cpus_hardware_enabled;
67 74
68struct kmem_cache *kvm_vcpu_cache; 75struct kmem_cache *kvm_vcpu_cache;
69EXPORT_SYMBOL_GPL(kvm_vcpu_cache); 76EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
@@ -75,9 +82,60 @@ struct dentry *kvm_debugfs_dir;
75static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, 82static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
76 unsigned long arg); 83 unsigned long arg);
77 84
78bool kvm_rebooting; 85static bool kvm_rebooting;
79 86
80#ifdef KVM_CAP_DEVICE_ASSIGNMENT 87#ifdef KVM_CAP_DEVICE_ASSIGNMENT
88
89#ifdef CONFIG_X86
90static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev)
91{
92 int vcpu_id;
93 struct kvm_vcpu *vcpu;
94 struct kvm_ioapic *ioapic = ioapic_irqchip(dev->kvm);
95 int dest_id = (dev->guest_msi.address_lo & MSI_ADDR_DEST_ID_MASK)
96 >> MSI_ADDR_DEST_ID_SHIFT;
97 int vector = (dev->guest_msi.data & MSI_DATA_VECTOR_MASK)
98 >> MSI_DATA_VECTOR_SHIFT;
99 int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
100 (unsigned long *)&dev->guest_msi.address_lo);
101 int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
102 (unsigned long *)&dev->guest_msi.data);
103 int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
104 (unsigned long *)&dev->guest_msi.data);
105 u32 deliver_bitmask;
106
107 BUG_ON(!ioapic);
108
109 deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
110 dest_id, dest_mode);
111 /* IOAPIC delivery mode value is the same as MSI here */
112 switch (delivery_mode) {
113 case IOAPIC_LOWEST_PRIORITY:
114 vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
115 deliver_bitmask);
116 if (vcpu != NULL)
117 kvm_apic_set_irq(vcpu, vector, trig_mode);
118 else
119 printk(KERN_INFO "kvm: null lowest priority vcpu!\n");
120 break;
121 case IOAPIC_FIXED:
122 for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
123 if (!(deliver_bitmask & (1 << vcpu_id)))
124 continue;
125 deliver_bitmask &= ~(1 << vcpu_id);
126 vcpu = ioapic->kvm->vcpus[vcpu_id];
127 if (vcpu)
128 kvm_apic_set_irq(vcpu, vector, trig_mode);
129 }
130 break;
131 default:
132 printk(KERN_INFO "kvm: unsupported MSI delivery mode\n");
133 }
134}
135#else
136static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) {}
137#endif
138
81static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, 139static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
82 int assigned_dev_id) 140 int assigned_dev_id)
83{ 141{
@@ -104,9 +162,16 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
104 * finer-grained lock, update this 162 * finer-grained lock, update this
105 */ 163 */
106 mutex_lock(&assigned_dev->kvm->lock); 164 mutex_lock(&assigned_dev->kvm->lock);
107 kvm_set_irq(assigned_dev->kvm, 165 if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX)
108 assigned_dev->irq_source_id, 166 kvm_set_irq(assigned_dev->kvm,
109 assigned_dev->guest_irq, 1); 167 assigned_dev->irq_source_id,
168 assigned_dev->guest_irq, 1);
169 else if (assigned_dev->irq_requested_type &
170 KVM_ASSIGNED_DEV_GUEST_MSI) {
171 assigned_device_msi_dispatch(assigned_dev);
172 enable_irq(assigned_dev->host_irq);
173 assigned_dev->host_irq_disabled = false;
174 }
110 mutex_unlock(&assigned_dev->kvm->lock); 175 mutex_unlock(&assigned_dev->kvm->lock);
111 kvm_put_kvm(assigned_dev->kvm); 176 kvm_put_kvm(assigned_dev->kvm);
112} 177}
@@ -117,8 +182,12 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
117 (struct kvm_assigned_dev_kernel *) dev_id; 182 (struct kvm_assigned_dev_kernel *) dev_id;
118 183
119 kvm_get_kvm(assigned_dev->kvm); 184 kvm_get_kvm(assigned_dev->kvm);
185
120 schedule_work(&assigned_dev->interrupt_work); 186 schedule_work(&assigned_dev->interrupt_work);
187
121 disable_irq_nosync(irq); 188 disable_irq_nosync(irq);
189 assigned_dev->host_irq_disabled = true;
190
122 return IRQ_HANDLED; 191 return IRQ_HANDLED;
123} 192}
124 193
@@ -132,19 +201,32 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
132 201
133 dev = container_of(kian, struct kvm_assigned_dev_kernel, 202 dev = container_of(kian, struct kvm_assigned_dev_kernel,
134 ack_notifier); 203 ack_notifier);
204
135 kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); 205 kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
136 enable_irq(dev->host_irq); 206
207 /* The guest irq may be shared so this ack may be
208 * from another device.
209 */
210 if (dev->host_irq_disabled) {
211 enable_irq(dev->host_irq);
212 dev->host_irq_disabled = false;
213 }
137} 214}
138 215
139static void kvm_free_assigned_device(struct kvm *kvm, 216static void kvm_free_assigned_irq(struct kvm *kvm,
140 struct kvm_assigned_dev_kernel 217 struct kvm_assigned_dev_kernel *assigned_dev)
141 *assigned_dev)
142{ 218{
143 if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested) 219 if (!irqchip_in_kernel(kvm))
144 free_irq(assigned_dev->host_irq, (void *)assigned_dev); 220 return;
221
222 kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
145 223
146 kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); 224 if (assigned_dev->irq_source_id != -1)
147 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); 225 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
226 assigned_dev->irq_source_id = -1;
227
228 if (!assigned_dev->irq_requested_type)
229 return;
148 230
149 if (cancel_work_sync(&assigned_dev->interrupt_work)) 231 if (cancel_work_sync(&assigned_dev->interrupt_work))
150 /* We had pending work. That means we will have to take 232 /* We had pending work. That means we will have to take
@@ -152,6 +234,23 @@ static void kvm_free_assigned_device(struct kvm *kvm,
152 */ 234 */
153 kvm_put_kvm(kvm); 235 kvm_put_kvm(kvm);
154 236
237 free_irq(assigned_dev->host_irq, (void *)assigned_dev);
238
239 if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
240 pci_disable_msi(assigned_dev->dev);
241
242 assigned_dev->irq_requested_type = 0;
243}
244
245
246static void kvm_free_assigned_device(struct kvm *kvm,
247 struct kvm_assigned_dev_kernel
248 *assigned_dev)
249{
250 kvm_free_assigned_irq(kvm, assigned_dev);
251
252 pci_reset_function(assigned_dev->dev);
253
155 pci_release_regions(assigned_dev->dev); 254 pci_release_regions(assigned_dev->dev);
156 pci_disable_device(assigned_dev->dev); 255 pci_disable_device(assigned_dev->dev);
157 pci_dev_put(assigned_dev->dev); 256 pci_dev_put(assigned_dev->dev);
@@ -174,6 +273,95 @@ void kvm_free_all_assigned_devices(struct kvm *kvm)
174 } 273 }
175} 274}
176 275
276static int assigned_device_update_intx(struct kvm *kvm,
277 struct kvm_assigned_dev_kernel *adev,
278 struct kvm_assigned_irq *airq)
279{
280 adev->guest_irq = airq->guest_irq;
281 adev->ack_notifier.gsi = airq->guest_irq;
282
283 if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX)
284 return 0;
285
286 if (irqchip_in_kernel(kvm)) {
287 if (!msi2intx &&
288 adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) {
289 free_irq(adev->host_irq, (void *)kvm);
290 pci_disable_msi(adev->dev);
291 }
292
293 if (!capable(CAP_SYS_RAWIO))
294 return -EPERM;
295
296 if (airq->host_irq)
297 adev->host_irq = airq->host_irq;
298 else
299 adev->host_irq = adev->dev->irq;
300
301 /* Even though this is PCI, we don't want to use shared
302 * interrupts. Sharing host devices with guest-assigned devices
303 * on the same interrupt line is not a happy situation: there
304 * are going to be long delays in accepting, acking, etc.
305 */
306 if (request_irq(adev->host_irq, kvm_assigned_dev_intr,
307 0, "kvm_assigned_intx_device", (void *)adev))
308 return -EIO;
309 }
310
311 adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX |
312 KVM_ASSIGNED_DEV_HOST_INTX;
313 return 0;
314}
315
316#ifdef CONFIG_X86
317static int assigned_device_update_msi(struct kvm *kvm,
318 struct kvm_assigned_dev_kernel *adev,
319 struct kvm_assigned_irq *airq)
320{
321 int r;
322
323 if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) {
324 /* x86 don't care upper address of guest msi message addr */
325 adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI;
326 adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX;
327 adev->guest_msi.address_lo = airq->guest_msi.addr_lo;
328 adev->guest_msi.data = airq->guest_msi.data;
329 adev->ack_notifier.gsi = -1;
330 } else if (msi2intx) {
331 adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX;
332 adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI;
333 adev->guest_irq = airq->guest_irq;
334 adev->ack_notifier.gsi = airq->guest_irq;
335 }
336
337 if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
338 return 0;
339
340 if (irqchip_in_kernel(kvm)) {
341 if (!msi2intx) {
342 if (adev->irq_requested_type &
343 KVM_ASSIGNED_DEV_HOST_INTX)
344 free_irq(adev->host_irq, (void *)adev);
345
346 r = pci_enable_msi(adev->dev);
347 if (r)
348 return r;
349 }
350
351 adev->host_irq = adev->dev->irq;
352 if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0,
353 "kvm_assigned_msi_device", (void *)adev))
354 return -EIO;
355 }
356
357 if (!msi2intx)
358 adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI;
359
360 adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI;
361 return 0;
362}
363#endif
364
177static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, 365static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
178 struct kvm_assigned_irq 366 struct kvm_assigned_irq
179 *assigned_irq) 367 *assigned_irq)
@@ -190,49 +378,68 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
190 return -EINVAL; 378 return -EINVAL;
191 } 379 }
192 380
193 if (match->irq_requested) { 381 if (!match->irq_requested_type) {
194 match->guest_irq = assigned_irq->guest_irq; 382 INIT_WORK(&match->interrupt_work,
195 match->ack_notifier.gsi = assigned_irq->guest_irq; 383 kvm_assigned_dev_interrupt_work_handler);
196 mutex_unlock(&kvm->lock); 384 if (irqchip_in_kernel(kvm)) {
197 return 0; 385 /* Register ack nofitier */
198 } 386 match->ack_notifier.gsi = -1;
387 match->ack_notifier.irq_acked =
388 kvm_assigned_dev_ack_irq;
389 kvm_register_irq_ack_notifier(kvm,
390 &match->ack_notifier);
391
392 /* Request IRQ source ID */
393 r = kvm_request_irq_source_id(kvm);
394 if (r < 0)
395 goto out_release;
396 else
397 match->irq_source_id = r;
199 398
200 INIT_WORK(&match->interrupt_work, 399#ifdef CONFIG_X86
201 kvm_assigned_dev_interrupt_work_handler); 400 /* Determine host device irq type, we can know the
401 * result from dev->msi_enabled */
402 if (msi2intx)
403 pci_enable_msi(match->dev);
404#endif
405 }
406 }
202 407
203 if (irqchip_in_kernel(kvm)) { 408 if ((!msi2intx &&
204 if (!capable(CAP_SYS_RAWIO)) { 409 (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI)) ||
205 r = -EPERM; 410 (msi2intx && match->dev->msi_enabled)) {
411#ifdef CONFIG_X86
412 r = assigned_device_update_msi(kvm, match, assigned_irq);
413 if (r) {
414 printk(KERN_WARNING "kvm: failed to enable "
415 "MSI device!\n");
206 goto out_release; 416 goto out_release;
207 } 417 }
208 418#else
209 if (assigned_irq->host_irq) 419 r = -ENOTTY;
210 match->host_irq = assigned_irq->host_irq; 420#endif
211 else 421 } else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) {
212 match->host_irq = match->dev->irq; 422 /* Host device IRQ 0 means don't support INTx */
213 match->guest_irq = assigned_irq->guest_irq; 423 if (!msi2intx) {
214 match->ack_notifier.gsi = assigned_irq->guest_irq; 424 printk(KERN_WARNING
215 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; 425 "kvm: wait device to enable MSI!\n");
216 kvm_register_irq_ack_notifier(kvm, &match->ack_notifier); 426 r = 0;
217 r = kvm_request_irq_source_id(kvm); 427 } else {
218 if (r < 0) 428 printk(KERN_WARNING
429 "kvm: failed to enable MSI device!\n");
430 r = -ENOTTY;
219 goto out_release; 431 goto out_release;
220 else 432 }
221 match->irq_source_id = r; 433 } else {
222 434 /* Non-sharing INTx mode */
223 /* Even though this is PCI, we don't want to use shared 435 r = assigned_device_update_intx(kvm, match, assigned_irq);
224 * interrupts. Sharing host devices with guest-assigned devices 436 if (r) {
225 * on the same interrupt line is not a happy situation: there 437 printk(KERN_WARNING "kvm: failed to enable "
226 * are going to be long delays in accepting, acking, etc. 438 "INTx device!\n");
227 */
228 if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0,
229 "kvm_assigned_device", (void *)match)) {
230 r = -EIO;
231 goto out_release; 439 goto out_release;
232 } 440 }
233 } 441 }
234 442
235 match->irq_requested = true;
236 mutex_unlock(&kvm->lock); 443 mutex_unlock(&kvm->lock);
237 return r; 444 return r;
238out_release: 445out_release:
@@ -283,11 +490,14 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
283 __func__); 490 __func__);
284 goto out_disable; 491 goto out_disable;
285 } 492 }
493
494 pci_reset_function(dev);
495
286 match->assigned_dev_id = assigned_dev->assigned_dev_id; 496 match->assigned_dev_id = assigned_dev->assigned_dev_id;
287 match->host_busnr = assigned_dev->busnr; 497 match->host_busnr = assigned_dev->busnr;
288 match->host_devfn = assigned_dev->devfn; 498 match->host_devfn = assigned_dev->devfn;
289 match->dev = dev; 499 match->dev = dev;
290 500 match->irq_source_id = -1;
291 match->kvm = kvm; 501 match->kvm = kvm;
292 502
293 list_add(&match->list, &kvm->arch.assigned_dev_head); 503 list_add(&match->list, &kvm->arch.assigned_dev_head);
@@ -355,57 +565,48 @@ static void ack_flush(void *_completed)
355{ 565{
356} 566}
357 567
358void kvm_flush_remote_tlbs(struct kvm *kvm) 568static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
359{ 569{
360 int i, cpu, me; 570 int i, cpu, me;
361 cpumask_t cpus; 571 cpumask_var_t cpus;
572 bool called = true;
362 struct kvm_vcpu *vcpu; 573 struct kvm_vcpu *vcpu;
363 574
575 if (alloc_cpumask_var(&cpus, GFP_ATOMIC))
576 cpumask_clear(cpus);
577
364 me = get_cpu(); 578 me = get_cpu();
365 cpus_clear(cpus);
366 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 579 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
367 vcpu = kvm->vcpus[i]; 580 vcpu = kvm->vcpus[i];
368 if (!vcpu) 581 if (!vcpu)
369 continue; 582 continue;
370 if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 583 if (test_and_set_bit(req, &vcpu->requests))
371 continue; 584 continue;
372 cpu = vcpu->cpu; 585 cpu = vcpu->cpu;
373 if (cpu != -1 && cpu != me) 586 if (cpus != NULL && cpu != -1 && cpu != me)
374 cpu_set(cpu, cpus); 587 cpumask_set_cpu(cpu, cpus);
375 } 588 }
376 if (cpus_empty(cpus)) 589 if (unlikely(cpus == NULL))
377 goto out; 590 smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1);
378 ++kvm->stat.remote_tlb_flush; 591 else if (!cpumask_empty(cpus))
379 smp_call_function_mask(cpus, ack_flush, NULL, 1); 592 smp_call_function_many(cpus, ack_flush, NULL, 1);
380out: 593 else
594 called = false;
381 put_cpu(); 595 put_cpu();
596 free_cpumask_var(cpus);
597 return called;
382} 598}
383 599
384void kvm_reload_remote_mmus(struct kvm *kvm) 600void kvm_flush_remote_tlbs(struct kvm *kvm)
385{ 601{
386 int i, cpu, me; 602 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
387 cpumask_t cpus; 603 ++kvm->stat.remote_tlb_flush;
388 struct kvm_vcpu *vcpu;
389
390 me = get_cpu();
391 cpus_clear(cpus);
392 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
393 vcpu = kvm->vcpus[i];
394 if (!vcpu)
395 continue;
396 if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
397 continue;
398 cpu = vcpu->cpu;
399 if (cpu != -1 && cpu != me)
400 cpu_set(cpu, cpus);
401 }
402 if (cpus_empty(cpus))
403 goto out;
404 smp_call_function_mask(cpus, ack_flush, NULL, 1);
405out:
406 put_cpu();
407} 604}
408 605
606void kvm_reload_remote_mmus(struct kvm *kvm)
607{
608 make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
609}
409 610
410int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) 611int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
411{ 612{
@@ -710,6 +911,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
710 goto out; 911 goto out;
711 if (mem->guest_phys_addr & (PAGE_SIZE - 1)) 912 if (mem->guest_phys_addr & (PAGE_SIZE - 1))
712 goto out; 913 goto out;
914 if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1)))
915 goto out;
713 if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) 916 if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
714 goto out; 917 goto out;
715 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) 918 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
@@ -821,7 +1024,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
821 goto out_free; 1024 goto out_free;
822 } 1025 }
823 1026
824 kvm_free_physmem_slot(&old, &new); 1027 kvm_free_physmem_slot(&old, npages ? &new : NULL);
1028 /* Slot deletion case: we have to update the current slot */
1029 if (!npages)
1030 *memslot = old;
825#ifdef CONFIG_DMAR 1031#ifdef CONFIG_DMAR
826 /* map the pages in iommu page table */ 1032 /* map the pages in iommu page table */
827 r = kvm_iommu_map_pages(kvm, base_gfn, npages); 1033 r = kvm_iommu_map_pages(kvm, base_gfn, npages);
@@ -918,7 +1124,7 @@ int kvm_is_error_hva(unsigned long addr)
918} 1124}
919EXPORT_SYMBOL_GPL(kvm_is_error_hva); 1125EXPORT_SYMBOL_GPL(kvm_is_error_hva);
920 1126
921static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 1127struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
922{ 1128{
923 int i; 1129 int i;
924 1130
@@ -931,11 +1137,12 @@ static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
931 } 1137 }
932 return NULL; 1138 return NULL;
933} 1139}
1140EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased);
934 1141
935struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 1142struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
936{ 1143{
937 gfn = unalias_gfn(kvm, gfn); 1144 gfn = unalias_gfn(kvm, gfn);
938 return __gfn_to_memslot(kvm, gfn); 1145 return gfn_to_memslot_unaliased(kvm, gfn);
939} 1146}
940 1147
941int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 1148int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
@@ -959,7 +1166,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
959 struct kvm_memory_slot *slot; 1166 struct kvm_memory_slot *slot;
960 1167
961 gfn = unalias_gfn(kvm, gfn); 1168 gfn = unalias_gfn(kvm, gfn);
962 slot = __gfn_to_memslot(kvm, gfn); 1169 slot = gfn_to_memslot_unaliased(kvm, gfn);
963 if (!slot) 1170 if (!slot)
964 return bad_hva(); 1171 return bad_hva();
965 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); 1172 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
@@ -1210,7 +1417,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
1210 struct kvm_memory_slot *memslot; 1417 struct kvm_memory_slot *memslot;
1211 1418
1212 gfn = unalias_gfn(kvm, gfn); 1419 gfn = unalias_gfn(kvm, gfn);
1213 memslot = __gfn_to_memslot(kvm, gfn); 1420 memslot = gfn_to_memslot_unaliased(kvm, gfn);
1214 if (memslot && memslot->dirty_bitmap) { 1421 if (memslot && memslot->dirty_bitmap) {
1215 unsigned long rel_gfn = gfn - memslot->base_gfn; 1422 unsigned long rel_gfn = gfn - memslot->base_gfn;
1216 1423
@@ -1295,7 +1502,7 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
1295 return 0; 1502 return 0;
1296} 1503}
1297 1504
1298static const struct file_operations kvm_vcpu_fops = { 1505static struct file_operations kvm_vcpu_fops = {
1299 .release = kvm_vcpu_release, 1506 .release = kvm_vcpu_release,
1300 .unlocked_ioctl = kvm_vcpu_ioctl, 1507 .unlocked_ioctl = kvm_vcpu_ioctl,
1301 .compat_ioctl = kvm_vcpu_ioctl, 1508 .compat_ioctl = kvm_vcpu_ioctl,
@@ -1689,7 +1896,7 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
1689 return 0; 1896 return 0;
1690} 1897}
1691 1898
1692static const struct file_operations kvm_vm_fops = { 1899static struct file_operations kvm_vm_fops = {
1693 .release = kvm_vm_release, 1900 .release = kvm_vm_release,
1694 .unlocked_ioctl = kvm_vm_ioctl, 1901 .unlocked_ioctl = kvm_vm_ioctl,
1695 .compat_ioctl = kvm_vm_ioctl, 1902 .compat_ioctl = kvm_vm_ioctl,
@@ -1711,6 +1918,18 @@ static int kvm_dev_ioctl_create_vm(void)
1711 return fd; 1918 return fd;
1712} 1919}
1713 1920
1921static long kvm_dev_ioctl_check_extension_generic(long arg)
1922{
1923 switch (arg) {
1924 case KVM_CAP_USER_MEMORY:
1925 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
1926 return 1;
1927 default:
1928 break;
1929 }
1930 return kvm_dev_ioctl_check_extension(arg);
1931}
1932
1714static long kvm_dev_ioctl(struct file *filp, 1933static long kvm_dev_ioctl(struct file *filp,
1715 unsigned int ioctl, unsigned long arg) 1934 unsigned int ioctl, unsigned long arg)
1716{ 1935{
@@ -1730,7 +1949,7 @@ static long kvm_dev_ioctl(struct file *filp,
1730 r = kvm_dev_ioctl_create_vm(); 1949 r = kvm_dev_ioctl_create_vm();
1731 break; 1950 break;
1732 case KVM_CHECK_EXTENSION: 1951 case KVM_CHECK_EXTENSION:
1733 r = kvm_dev_ioctl_check_extension(arg); 1952 r = kvm_dev_ioctl_check_extension_generic(arg);
1734 break; 1953 break;
1735 case KVM_GET_VCPU_MMAP_SIZE: 1954 case KVM_GET_VCPU_MMAP_SIZE:
1736 r = -EINVAL; 1955 r = -EINVAL;
@@ -1771,9 +1990,9 @@ static void hardware_enable(void *junk)
1771{ 1990{
1772 int cpu = raw_smp_processor_id(); 1991 int cpu = raw_smp_processor_id();
1773 1992
1774 if (cpu_isset(cpu, cpus_hardware_enabled)) 1993 if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
1775 return; 1994 return;
1776 cpu_set(cpu, cpus_hardware_enabled); 1995 cpumask_set_cpu(cpu, cpus_hardware_enabled);
1777 kvm_arch_hardware_enable(NULL); 1996 kvm_arch_hardware_enable(NULL);
1778} 1997}
1779 1998
@@ -1781,9 +2000,9 @@ static void hardware_disable(void *junk)
1781{ 2000{
1782 int cpu = raw_smp_processor_id(); 2001 int cpu = raw_smp_processor_id();
1783 2002
1784 if (!cpu_isset(cpu, cpus_hardware_enabled)) 2003 if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
1785 return; 2004 return;
1786 cpu_clear(cpu, cpus_hardware_enabled); 2005 cpumask_clear_cpu(cpu, cpus_hardware_enabled);
1787 kvm_arch_hardware_disable(NULL); 2006 kvm_arch_hardware_disable(NULL);
1788} 2007}
1789 2008
@@ -2017,9 +2236,14 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
2017 2236
2018 bad_pfn = page_to_pfn(bad_page); 2237 bad_pfn = page_to_pfn(bad_page);
2019 2238
2239 if (!alloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
2240 r = -ENOMEM;
2241 goto out_free_0;
2242 }
2243
2020 r = kvm_arch_hardware_setup(); 2244 r = kvm_arch_hardware_setup();
2021 if (r < 0) 2245 if (r < 0)
2022 goto out_free_0; 2246 goto out_free_0a;
2023 2247
2024 for_each_online_cpu(cpu) { 2248 for_each_online_cpu(cpu) {
2025 smp_call_function_single(cpu, 2249 smp_call_function_single(cpu,
@@ -2053,6 +2277,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
2053 } 2277 }
2054 2278
2055 kvm_chardev_ops.owner = module; 2279 kvm_chardev_ops.owner = module;
2280 kvm_vm_fops.owner = module;
2281 kvm_vcpu_fops.owner = module;
2056 2282
2057 r = misc_register(&kvm_dev); 2283 r = misc_register(&kvm_dev);
2058 if (r) { 2284 if (r) {
@@ -2062,6 +2288,9 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
2062 2288
2063 kvm_preempt_ops.sched_in = kvm_sched_in; 2289 kvm_preempt_ops.sched_in = kvm_sched_in;
2064 kvm_preempt_ops.sched_out = kvm_sched_out; 2290 kvm_preempt_ops.sched_out = kvm_sched_out;
2291#ifndef CONFIG_X86
2292 msi2intx = 0;
2293#endif
2065 2294
2066 return 0; 2295 return 0;
2067 2296
@@ -2078,6 +2307,8 @@ out_free_2:
2078 on_each_cpu(hardware_disable, NULL, 1); 2307 on_each_cpu(hardware_disable, NULL, 1);
2079out_free_1: 2308out_free_1:
2080 kvm_arch_hardware_unsetup(); 2309 kvm_arch_hardware_unsetup();
2310out_free_0a:
2311 free_cpumask_var(cpus_hardware_enabled);
2081out_free_0: 2312out_free_0:
2082 __free_page(bad_page); 2313 __free_page(bad_page);
2083out: 2314out:
@@ -2101,6 +2332,7 @@ void kvm_exit(void)
2101 kvm_arch_hardware_unsetup(); 2332 kvm_arch_hardware_unsetup();
2102 kvm_arch_exit(); 2333 kvm_arch_exit();
2103 kvm_exit_debug(); 2334 kvm_exit_debug();
2335 free_cpumask_var(cpus_hardware_enabled);
2104 __free_page(bad_page); 2336 __free_page(bad_page);
2105} 2337}
2106EXPORT_SYMBOL_GPL(kvm_exit); 2338EXPORT_SYMBOL_GPL(kvm_exit);
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c
index 41dcc845f78c..f59874446440 100644
--- a/virt/kvm/kvm_trace.c
+++ b/virt/kvm/kvm_trace.c
@@ -252,6 +252,7 @@ void kvm_trace_cleanup(void)
252 struct kvm_trace_probe *p = &kvm_trace_probes[i]; 252 struct kvm_trace_probe *p = &kvm_trace_probes[i];
253 marker_probe_unregister(p->name, p->probe_func, p); 253 marker_probe_unregister(p->name, p->probe_func, p);
254 } 254 }
255 marker_synchronize_unregister();
255 256
256 relay_close(kt->rchan); 257 relay_close(kt->rchan);
257 debugfs_remove(kt->lost_file); 258 debugfs_remove(kt->lost_file);