aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/ia64/include/asm/kvm.h49
-rw-r--r--arch/ia64/include/asm/kvm_host.h18
-rw-r--r--arch/ia64/include/asm/msidef.h42
-rw-r--r--arch/ia64/kernel/msi_ia64.c55
-rw-r--r--arch/ia64/kvm/Kconfig4
-rw-r--r--arch/ia64/kvm/irq.h2
-rw-r--r--arch/ia64/kvm/kvm-ia64.c125
-rw-r--r--arch/ia64/kvm/kvm_fw.c151
-rw-r--r--arch/ia64/kvm/process.c71
-rw-r--r--arch/ia64/kvm/vcpu.c44
-rw-r--r--arch/ia64/kvm/vcpu.h4
-rw-r--r--arch/ia64/kvm/vtlb.c44
-rw-r--r--arch/powerpc/include/asm/kvm.h7
-rw-r--r--arch/powerpc/include/asm/kvm_44x.h7
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h7
-rw-r--r--arch/powerpc/include/asm/kvm_e500.h67
-rw-r--r--arch/powerpc/include/asm/kvm_host.h21
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h15
-rw-r--r--arch/powerpc/include/asm/mmu-fsl-booke.h2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c4
-rw-r--r--arch/powerpc/kvm/44x.c72
-rw-r--r--arch/powerpc/kvm/44x_emulate.c217
-rw-r--r--arch/powerpc/kvm/44x_tlb.c39
-rw-r--r--arch/powerpc/kvm/44x_tlb.h9
-rw-r--r--arch/powerpc/kvm/Kconfig16
-rw-r--r--arch/powerpc/kvm/Makefile10
-rw-r--r--arch/powerpc/kvm/booke.c50
-rw-r--r--arch/powerpc/kvm/booke.h35
-rw-r--r--arch/powerpc/kvm/booke_emulate.c266
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S5
-rw-r--r--arch/powerpc/kvm/e500.c169
-rw-r--r--arch/powerpc/kvm/e500_emulate.c202
-rw-r--r--arch/powerpc/kvm/e500_tlb.c757
-rw-r--r--arch/powerpc/kvm/e500_tlb.h185
-rw-r--r--arch/powerpc/kvm/emulate.c93
-rw-r--r--arch/powerpc/kvm/powerpc.c31
-rw-r--r--arch/s390/include/asm/kvm.h7
-rw-r--r--arch/s390/include/asm/kvm_host.h3
-rw-r--r--arch/s390/kvm/Kconfig3
-rw-r--r--arch/s390/kvm/intercept.c2
-rw-r--r--arch/s390/kvm/interrupt.c7
-rw-r--r--arch/s390/kvm/kvm-s390.c4
-rw-r--r--arch/s390/kvm/kvm-s390.h2
-rw-r--r--arch/s390/kvm/priv.c18
-rw-r--r--arch/s390/kvm/sigp.c2
-rw-r--r--arch/x86/include/asm/kvm.h24
-rw-r--r--arch/x86/include/asm/kvm_host.h61
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/svm.h4
-rw-r--r--arch/x86/include/asm/virtext.h2
-rw-r--r--arch/x86/include/asm/vmx.h5
-rw-r--r--arch/x86/kvm/Kconfig4
-rw-r--r--arch/x86/kvm/i8254.c21
-rw-r--r--arch/x86/kvm/i8254.h2
-rw-r--r--arch/x86/kvm/i8259.c25
-rw-r--r--arch/x86/kvm/irq.h2
-rw-r--r--arch/x86/kvm/kvm_svm.h16
-rw-r--r--arch/x86/kvm/mmu.c237
-rw-r--r--arch/x86/kvm/mmu.h2
-rw-r--r--arch/x86/kvm/paging_tmpl.h219
-rw-r--r--arch/x86/kvm/svm.c916
-rw-r--r--arch/x86/kvm/vmx.c393
-rw-r--r--arch/x86/kvm/x86.c432
-rw-r--r--arch/x86/kvm/x86_emulate.c56
-rw-r--r--include/linux/kvm.h115
-rw-r--r--include/linux/kvm_host.h62
-rw-r--r--include/linux/kvm_types.h13
-rw-r--r--virt/kvm/ioapic.c39
-rw-r--r--virt/kvm/ioapic.h2
-rw-r--r--virt/kvm/irq_comm.c297
-rw-r--r--virt/kvm/kvm_main.c141
71 files changed, 4640 insertions, 1402 deletions
diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index bfa86b6af7cd..0ee5bd7a988f 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -166,7 +166,40 @@ struct saved_vpd {
166 unsigned long vcpuid[5]; 166 unsigned long vcpuid[5];
167 unsigned long vpsr; 167 unsigned long vpsr;
168 unsigned long vpr; 168 unsigned long vpr;
169 unsigned long vcr[128]; 169 union {
170 unsigned long vcr[128];
171 struct {
172 unsigned long dcr;
173 unsigned long itm;
174 unsigned long iva;
175 unsigned long rsv1[5];
176 unsigned long pta;
177 unsigned long rsv2[7];
178 unsigned long ipsr;
179 unsigned long isr;
180 unsigned long rsv3;
181 unsigned long iip;
182 unsigned long ifa;
183 unsigned long itir;
184 unsigned long iipa;
185 unsigned long ifs;
186 unsigned long iim;
187 unsigned long iha;
188 unsigned long rsv4[38];
189 unsigned long lid;
190 unsigned long ivr;
191 unsigned long tpr;
192 unsigned long eoi;
193 unsigned long irr[4];
194 unsigned long itv;
195 unsigned long pmv;
196 unsigned long cmcv;
197 unsigned long rsv5[5];
198 unsigned long lrr0;
199 unsigned long lrr1;
200 unsigned long rsv6[46];
201 };
202 };
170}; 203};
171 204
172struct kvm_regs { 205struct kvm_regs {
@@ -214,4 +247,18 @@ struct kvm_sregs {
214struct kvm_fpu { 247struct kvm_fpu {
215}; 248};
216 249
250#define KVM_IA64_VCPU_STACK_SHIFT 16
251#define KVM_IA64_VCPU_STACK_SIZE (1UL << KVM_IA64_VCPU_STACK_SHIFT)
252
253struct kvm_ia64_vcpu_stack {
254 unsigned char stack[KVM_IA64_VCPU_STACK_SIZE];
255};
256
257struct kvm_debug_exit_arch {
258};
259
260/* for KVM_SET_GUEST_DEBUG */
261struct kvm_guest_debug_arch {
262};
263
217#endif 264#endif
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 348663661659..4542651e6acb 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -112,7 +112,11 @@
112#define VCPU_STRUCT_SHIFT 16 112#define VCPU_STRUCT_SHIFT 16
113#define VCPU_STRUCT_SIZE (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT) 113#define VCPU_STRUCT_SIZE (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
114 114
115#define KVM_STK_OFFSET VCPU_STRUCT_SIZE 115/*
116 * This must match KVM_IA64_VCPU_STACK_{SHIFT,SIZE} arch/ia64/include/asm/kvm.h
117 */
118#define KVM_STK_SHIFT 16
119#define KVM_STK_OFFSET (__IA64_UL_CONST(1)<< KVM_STK_SHIFT)
116 120
117#define KVM_VM_STRUCT_SHIFT 19 121#define KVM_VM_STRUCT_SHIFT 19
118#define KVM_VM_STRUCT_SIZE (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT) 122#define KVM_VM_STRUCT_SIZE (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
@@ -153,10 +157,10 @@ struct kvm_vm_data {
153 struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS]; 157 struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
154}; 158};
155 159
156#define VCPU_BASE(n) KVM_VM_DATA_BASE + \ 160#define VCPU_BASE(n) (KVM_VM_DATA_BASE + \
157 offsetof(struct kvm_vm_data, vcpu_data[n]) 161 offsetof(struct kvm_vm_data, vcpu_data[n]))
158#define VM_BASE KVM_VM_DATA_BASE + \ 162#define KVM_VM_BASE (KVM_VM_DATA_BASE + \
159 offsetof(struct kvm_vm_data, kvm_vm_struct) 163 offsetof(struct kvm_vm_data, kvm_vm_struct))
160#define KVM_MEM_DIRTY_LOG_BASE KVM_VM_DATA_BASE + \ 164#define KVM_MEM_DIRTY_LOG_BASE KVM_VM_DATA_BASE + \
161 offsetof(struct kvm_vm_data, kvm_mem_dirty_log) 165 offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
162 166
@@ -235,8 +239,6 @@ struct kvm_vm_data {
235 239
236struct kvm; 240struct kvm;
237struct kvm_vcpu; 241struct kvm_vcpu;
238struct kvm_guest_debug{
239};
240 242
241struct kvm_mmio_req { 243struct kvm_mmio_req {
242 uint64_t addr; /* physical address */ 244 uint64_t addr; /* physical address */
@@ -462,6 +464,8 @@ struct kvm_arch {
462 unsigned long metaphysical_rr4; 464 unsigned long metaphysical_rr4;
463 unsigned long vmm_init_rr; 465 unsigned long vmm_init_rr;
464 466
467 int online_vcpus;
468
465 struct kvm_ioapic *vioapic; 469 struct kvm_ioapic *vioapic;
466 struct kvm_vm_stat stat; 470 struct kvm_vm_stat stat;
467 struct kvm_sal_data rdv_sal_data; 471 struct kvm_sal_data rdv_sal_data;
diff --git a/arch/ia64/include/asm/msidef.h b/arch/ia64/include/asm/msidef.h
new file mode 100644
index 000000000000..592c1047a0c5
--- /dev/null
+++ b/arch/ia64/include/asm/msidef.h
@@ -0,0 +1,42 @@
1#ifndef _IA64_MSI_DEF_H
2#define _IA64_MSI_DEF_H
3
4/*
5 * Shifts for APIC-based data
6 */
7
8#define MSI_DATA_VECTOR_SHIFT 0
9#define MSI_DATA_VECTOR(v) (((u8)v) << MSI_DATA_VECTOR_SHIFT)
10#define MSI_DATA_VECTOR_MASK 0xffffff00
11
12#define MSI_DATA_DELIVERY_MODE_SHIFT 8
13#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_MODE_SHIFT)
14#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_MODE_SHIFT)
15
16#define MSI_DATA_LEVEL_SHIFT 14
17#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT)
18#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT)
19
20#define MSI_DATA_TRIGGER_SHIFT 15
21#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT)
22#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT)
23
24/*
25 * Shift/mask fields for APIC-based bus address
26 */
27
28#define MSI_ADDR_DEST_ID_SHIFT 4
29#define MSI_ADDR_HEADER 0xfee00000
30
31#define MSI_ADDR_DEST_ID_MASK 0xfff0000f
32#define MSI_ADDR_DEST_ID_CPU(cpu) ((cpu) << MSI_ADDR_DEST_ID_SHIFT)
33
34#define MSI_ADDR_DEST_MODE_SHIFT 2
35#define MSI_ADDR_DEST_MODE_PHYS (0 << MSI_ADDR_DEST_MODE_SHIFT)
36#define MSI_ADDR_DEST_MODE_LOGIC (1 << MSI_ADDR_DEST_MODE_SHIFT)
37
38#define MSI_ADDR_REDIRECTION_SHIFT 3
39#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT)
40#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT)
41
42#endif/* _IA64_MSI_DEF_H */
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 890339339035..368ee4e5266d 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -7,44 +7,7 @@
7#include <linux/msi.h> 7#include <linux/msi.h>
8#include <linux/dmar.h> 8#include <linux/dmar.h>
9#include <asm/smp.h> 9#include <asm/smp.h>
10 10#include <asm/msidef.h>
11/*
12 * Shifts for APIC-based data
13 */
14
15#define MSI_DATA_VECTOR_SHIFT 0
16#define MSI_DATA_VECTOR(v) (((u8)v) << MSI_DATA_VECTOR_SHIFT)
17#define MSI_DATA_VECTOR_MASK 0xffffff00
18
19#define MSI_DATA_DELIVERY_SHIFT 8
20#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_SHIFT)
21#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_SHIFT)
22
23#define MSI_DATA_LEVEL_SHIFT 14
24#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT)
25#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT)
26
27#define MSI_DATA_TRIGGER_SHIFT 15
28#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT)
29#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT)
30
31/*
32 * Shift/mask fields for APIC-based bus address
33 */
34
35#define MSI_TARGET_CPU_SHIFT 4
36#define MSI_ADDR_HEADER 0xfee00000
37
38#define MSI_ADDR_DESTID_MASK 0xfff0000f
39#define MSI_ADDR_DESTID_CPU(cpu) ((cpu) << MSI_TARGET_CPU_SHIFT)
40
41#define MSI_ADDR_DESTMODE_SHIFT 2
42#define MSI_ADDR_DESTMODE_PHYS (0 << MSI_ADDR_DESTMODE_SHIFT)
43#define MSI_ADDR_DESTMODE_LOGIC (1 << MSI_ADDR_DESTMODE_SHIFT)
44
45#define MSI_ADDR_REDIRECTION_SHIFT 3
46#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT)
47#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT)
48 11
49static struct irq_chip ia64_msi_chip; 12static struct irq_chip ia64_msi_chip;
50 13
@@ -65,8 +28,8 @@ static void ia64_set_msi_irq_affinity(unsigned int irq,
65 read_msi_msg(irq, &msg); 28 read_msi_msg(irq, &msg);
66 29
67 addr = msg.address_lo; 30 addr = msg.address_lo;
68 addr &= MSI_ADDR_DESTID_MASK; 31 addr &= MSI_ADDR_DEST_ID_MASK;
69 addr |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu)); 32 addr |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
70 msg.address_lo = addr; 33 msg.address_lo = addr;
71 34
72 data = msg.data; 35 data = msg.data;
@@ -98,9 +61,9 @@ int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
98 msg.address_hi = 0; 61 msg.address_hi = 0;
99 msg.address_lo = 62 msg.address_lo =
100 MSI_ADDR_HEADER | 63 MSI_ADDR_HEADER |
101 MSI_ADDR_DESTMODE_PHYS | 64 MSI_ADDR_DEST_MODE_PHYS |
102 MSI_ADDR_REDIRECTION_CPU | 65 MSI_ADDR_REDIRECTION_CPU |
103 MSI_ADDR_DESTID_CPU(dest_phys_id); 66 MSI_ADDR_DEST_ID_CPU(dest_phys_id);
104 67
105 msg.data = 68 msg.data =
106 MSI_DATA_TRIGGER_EDGE | 69 MSI_DATA_TRIGGER_EDGE |
@@ -183,8 +146,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
183 146
184 msg.data &= ~MSI_DATA_VECTOR_MASK; 147 msg.data &= ~MSI_DATA_VECTOR_MASK;
185 msg.data |= MSI_DATA_VECTOR(cfg->vector); 148 msg.data |= MSI_DATA_VECTOR(cfg->vector);
186 msg.address_lo &= ~MSI_ADDR_DESTID_MASK; 149 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
187 msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu)); 150 msg.address_lo |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
188 151
189 dmar_msi_write(irq, &msg); 152 dmar_msi_write(irq, &msg);
190 irq_desc[irq].affinity = *mask; 153 irq_desc[irq].affinity = *mask;
@@ -215,9 +178,9 @@ msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
215 msg->address_hi = 0; 178 msg->address_hi = 0;
216 msg->address_lo = 179 msg->address_lo =
217 MSI_ADDR_HEADER | 180 MSI_ADDR_HEADER |
218 MSI_ADDR_DESTMODE_PHYS | 181 MSI_ADDR_DEST_MODE_PHYS |
219 MSI_ADDR_REDIRECTION_CPU | 182 MSI_ADDR_REDIRECTION_CPU |
220 MSI_ADDR_DESTID_CPU(dest); 183 MSI_ADDR_DEST_ID_CPU(dest);
221 184
222 msg->data = 185 msg->data =
223 MSI_DATA_TRIGGER_EDGE | 186 MSI_DATA_TRIGGER_EDGE |
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index f833a0b4188d..0a2d6b86075a 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -4,6 +4,10 @@
4config HAVE_KVM 4config HAVE_KVM
5 bool 5 bool
6 6
7config HAVE_KVM_IRQCHIP
8 bool
9 default y
10
7menuconfig VIRTUALIZATION 11menuconfig VIRTUALIZATION
8 bool "Virtualization" 12 bool "Virtualization"
9 depends on HAVE_KVM || IA64 13 depends on HAVE_KVM || IA64
diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h
index c6786e8b1bf4..c0785a728271 100644
--- a/arch/ia64/kvm/irq.h
+++ b/arch/ia64/kvm/irq.h
@@ -23,6 +23,8 @@
23#ifndef __IRQ_H 23#ifndef __IRQ_H
24#define __IRQ_H 24#define __IRQ_H
25 25
26#include "lapic.h"
27
26static inline int irqchip_in_kernel(struct kvm *kvm) 28static inline int irqchip_in_kernel(struct kvm *kvm)
27{ 29{
28 return 1; 30 return 1;
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 28f982045f29..076b00d1dbff 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -182,7 +182,7 @@ int kvm_dev_ioctl_check_extension(long ext)
182 switch (ext) { 182 switch (ext) {
183 case KVM_CAP_IRQCHIP: 183 case KVM_CAP_IRQCHIP:
184 case KVM_CAP_MP_STATE: 184 case KVM_CAP_MP_STATE:
185 185 case KVM_CAP_IRQ_INJECT_STATUS:
186 r = 1; 186 r = 1;
187 break; 187 break;
188 case KVM_CAP_COALESCED_MMIO: 188 case KVM_CAP_COALESCED_MMIO:
@@ -314,7 +314,7 @@ static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
314 union ia64_lid lid; 314 union ia64_lid lid;
315 int i; 315 int i;
316 316
317 for (i = 0; i < KVM_MAX_VCPUS; i++) { 317 for (i = 0; i < kvm->arch.online_vcpus; i++) {
318 if (kvm->vcpus[i]) { 318 if (kvm->vcpus[i]) {
319 lid.val = VCPU_LID(kvm->vcpus[i]); 319 lid.val = VCPU_LID(kvm->vcpus[i]);
320 if (lid.id == id && lid.eid == eid) 320 if (lid.id == id && lid.eid == eid)
@@ -388,7 +388,7 @@ static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
388 388
389 call_data.ptc_g_data = p->u.ptc_g_data; 389 call_data.ptc_g_data = p->u.ptc_g_data;
390 390
391 for (i = 0; i < KVM_MAX_VCPUS; i++) { 391 for (i = 0; i < kvm->arch.online_vcpus; i++) {
392 if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state == 392 if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state ==
393 KVM_MP_STATE_UNINITIALIZED || 393 KVM_MP_STATE_UNINITIALIZED ||
394 vcpu == kvm->vcpus[i]) 394 vcpu == kvm->vcpus[i])
@@ -788,6 +788,8 @@ struct kvm *kvm_arch_create_vm(void)
788 return ERR_PTR(-ENOMEM); 788 return ERR_PTR(-ENOMEM);
789 kvm_init_vm(kvm); 789 kvm_init_vm(kvm);
790 790
791 kvm->arch.online_vcpus = 0;
792
791 return kvm; 793 return kvm;
792 794
793} 795}
@@ -919,7 +921,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
919 r = kvm_ioapic_init(kvm); 921 r = kvm_ioapic_init(kvm);
920 if (r) 922 if (r)
921 goto out; 923 goto out;
924 r = kvm_setup_default_irq_routing(kvm);
925 if (r) {
926 kfree(kvm->arch.vioapic);
927 goto out;
928 }
922 break; 929 break;
930 case KVM_IRQ_LINE_STATUS:
923 case KVM_IRQ_LINE: { 931 case KVM_IRQ_LINE: {
924 struct kvm_irq_level irq_event; 932 struct kvm_irq_level irq_event;
925 933
@@ -927,10 +935,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
927 if (copy_from_user(&irq_event, argp, sizeof irq_event)) 935 if (copy_from_user(&irq_event, argp, sizeof irq_event))
928 goto out; 936 goto out;
929 if (irqchip_in_kernel(kvm)) { 937 if (irqchip_in_kernel(kvm)) {
938 __s32 status;
930 mutex_lock(&kvm->lock); 939 mutex_lock(&kvm->lock);
931 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 940 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
932 irq_event.irq, irq_event.level); 941 irq_event.irq, irq_event.level);
933 mutex_unlock(&kvm->lock); 942 mutex_unlock(&kvm->lock);
943 if (ioctl == KVM_IRQ_LINE_STATUS) {
944 irq_event.status = status;
945 if (copy_to_user(argp, &irq_event,
946 sizeof irq_event))
947 goto out;
948 }
934 r = 0; 949 r = 0;
935 } 950 }
936 break; 951 break;
@@ -1149,7 +1164,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1149 1164
1150 /*Initialize itc offset for vcpus*/ 1165 /*Initialize itc offset for vcpus*/
1151 itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC); 1166 itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
1152 for (i = 0; i < KVM_MAX_VCPUS; i++) { 1167 for (i = 0; i < kvm->arch.online_vcpus; i++) {
1153 v = (struct kvm_vcpu *)((char *)vcpu + 1168 v = (struct kvm_vcpu *)((char *)vcpu +
1154 sizeof(struct kvm_vcpu_data) * i); 1169 sizeof(struct kvm_vcpu_data) * i);
1155 v->arch.itc_offset = itc_offset; 1170 v->arch.itc_offset = itc_offset;
@@ -1283,6 +1298,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1283 goto fail; 1298 goto fail;
1284 } 1299 }
1285 1300
1301 kvm->arch.online_vcpus++;
1302
1286 return vcpu; 1303 return vcpu;
1287fail: 1304fail:
1288 return ERR_PTR(r); 1305 return ERR_PTR(r);
@@ -1303,8 +1320,8 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1303 return -EINVAL; 1320 return -EINVAL;
1304} 1321}
1305 1322
1306int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, 1323int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1307 struct kvm_debug_guest *dbg) 1324 struct kvm_guest_debug *dbg)
1308{ 1325{
1309 return -EINVAL; 1326 return -EINVAL;
1310} 1327}
@@ -1421,6 +1438,23 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1421 return 0; 1438 return 0;
1422} 1439}
1423 1440
1441int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu,
1442 struct kvm_ia64_vcpu_stack *stack)
1443{
1444 memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack));
1445 return 0;
1446}
1447
1448int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu,
1449 struct kvm_ia64_vcpu_stack *stack)
1450{
1451 memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu),
1452 sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu));
1453
1454 vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data;
1455 return 0;
1456}
1457
1424void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 1458void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
1425{ 1459{
1426 1460
@@ -1430,9 +1464,78 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
1430 1464
1431 1465
1432long kvm_arch_vcpu_ioctl(struct file *filp, 1466long kvm_arch_vcpu_ioctl(struct file *filp,
1433 unsigned int ioctl, unsigned long arg) 1467 unsigned int ioctl, unsigned long arg)
1434{ 1468{
1435 return -EINVAL; 1469 struct kvm_vcpu *vcpu = filp->private_data;
1470 void __user *argp = (void __user *)arg;
1471 struct kvm_ia64_vcpu_stack *stack = NULL;
1472 long r;
1473
1474 switch (ioctl) {
1475 case KVM_IA64_VCPU_GET_STACK: {
1476 struct kvm_ia64_vcpu_stack __user *user_stack;
1477 void __user *first_p = argp;
1478
1479 r = -EFAULT;
1480 if (copy_from_user(&user_stack, first_p, sizeof(void *)))
1481 goto out;
1482
1483 if (!access_ok(VERIFY_WRITE, user_stack,
1484 sizeof(struct kvm_ia64_vcpu_stack))) {
1485 printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: "
1486 "Illegal user destination address for stack\n");
1487 goto out;
1488 }
1489 stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
1490 if (!stack) {
1491 r = -ENOMEM;
1492 goto out;
1493 }
1494
1495 r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack);
1496 if (r)
1497 goto out;
1498
1499 if (copy_to_user(user_stack, stack,
1500 sizeof(struct kvm_ia64_vcpu_stack)))
1501 goto out;
1502
1503 break;
1504 }
1505 case KVM_IA64_VCPU_SET_STACK: {
1506 struct kvm_ia64_vcpu_stack __user *user_stack;
1507 void __user *first_p = argp;
1508
1509 r = -EFAULT;
1510 if (copy_from_user(&user_stack, first_p, sizeof(void *)))
1511 goto out;
1512
1513 if (!access_ok(VERIFY_READ, user_stack,
1514 sizeof(struct kvm_ia64_vcpu_stack))) {
1515 printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: "
1516 "Illegal user address for stack\n");
1517 goto out;
1518 }
1519 stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
1520 if (!stack) {
1521 r = -ENOMEM;
1522 goto out;
1523 }
1524 if (copy_from_user(stack, user_stack,
1525 sizeof(struct kvm_ia64_vcpu_stack)))
1526 goto out;
1527
1528 r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack);
1529 break;
1530 }
1531
1532 default:
1533 r = -EINVAL;
1534 }
1535
1536out:
1537 kfree(stack);
1538 return r;
1436} 1539}
1437 1540
1438int kvm_arch_set_memory_region(struct kvm *kvm, 1541int kvm_arch_set_memory_region(struct kvm *kvm,
@@ -1472,7 +1575,7 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
1472} 1575}
1473 1576
1474long kvm_arch_dev_ioctl(struct file *filp, 1577long kvm_arch_dev_ioctl(struct file *filp,
1475 unsigned int ioctl, unsigned long arg) 1578 unsigned int ioctl, unsigned long arg)
1476{ 1579{
1477 return -EINVAL; 1580 return -EINVAL;
1478} 1581}
@@ -1737,7 +1840,7 @@ struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
1737 struct kvm_vcpu *lvcpu = kvm->vcpus[0]; 1840 struct kvm_vcpu *lvcpu = kvm->vcpus[0];
1738 int i; 1841 int i;
1739 1842
1740 for (i = 1; i < KVM_MAX_VCPUS; i++) { 1843 for (i = 1; i < kvm->arch.online_vcpus; i++) {
1741 if (!kvm->vcpus[i]) 1844 if (!kvm->vcpus[i])
1742 continue; 1845 continue;
1743 if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp) 1846 if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp)
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
index cb7600bdff9d..a8ae52ed5635 100644
--- a/arch/ia64/kvm/kvm_fw.c
+++ b/arch/ia64/kvm/kvm_fw.c
@@ -227,6 +227,18 @@ static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu)
227 return result; 227 return result;
228} 228}
229 229
230static struct ia64_pal_retval pal_register_info(struct kvm_vcpu *vcpu)
231{
232
233 struct ia64_pal_retval result = {0, 0, 0, 0};
234 long in0, in1, in2, in3;
235
236 kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
237 result.status = ia64_pal_register_info(in1, &result.v1, &result.v2);
238
239 return result;
240}
241
230static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu) 242static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu)
231{ 243{
232 244
@@ -268,8 +280,12 @@ static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu)
268static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu) 280static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu)
269{ 281{
270 struct ia64_pal_retval result; 282 struct ia64_pal_retval result;
283 unsigned long in0, in1, in2, in3;
271 284
272 INIT_PAL_STATUS_UNIMPLEMENTED(result); 285 kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
286
287 result.status = ia64_pal_vm_info(in1, in2,
288 (pal_tc_info_u_t *)&result.v1, &result.v2);
273 289
274 return result; 290 return result;
275} 291}
@@ -292,6 +308,108 @@ static void prepare_for_halt(struct kvm_vcpu *vcpu)
292 vcpu->arch.timer_fired = 0; 308 vcpu->arch.timer_fired = 0;
293} 309}
294 310
311static struct ia64_pal_retval pal_perf_mon_info(struct kvm_vcpu *vcpu)
312{
313 long status;
314 unsigned long in0, in1, in2, in3, r9;
315 unsigned long pm_buffer[16];
316
317 kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
318 status = ia64_pal_perf_mon_info(pm_buffer,
319 (pal_perf_mon_info_u_t *) &r9);
320 if (status != 0) {
321 printk(KERN_DEBUG"PAL_PERF_MON_INFO fails ret=%ld\n", status);
322 } else {
323 if (in1)
324 memcpy((void *)in1, pm_buffer, sizeof(pm_buffer));
325 else {
326 status = PAL_STATUS_EINVAL;
327 printk(KERN_WARNING"Invalid parameters "
328 "for PAL call:0x%lx!\n", in0);
329 }
330 }
331 return (struct ia64_pal_retval){status, r9, 0, 0};
332}
333
334static struct ia64_pal_retval pal_halt_info(struct kvm_vcpu *vcpu)
335{
336 unsigned long in0, in1, in2, in3;
337 long status;
338 unsigned long res = 1000UL | (1000UL << 16) | (10UL << 32)
339 | (1UL << 61) | (1UL << 60);
340
341 kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
342 if (in1) {
343 memcpy((void *)in1, &res, sizeof(res));
344 status = 0;
345 } else{
346 status = PAL_STATUS_EINVAL;
347 printk(KERN_WARNING"Invalid parameters "
348 "for PAL call:0x%lx!\n", in0);
349 }
350
351 return (struct ia64_pal_retval){status, 0, 0, 0};
352}
353
354static struct ia64_pal_retval pal_mem_attrib(struct kvm_vcpu *vcpu)
355{
356 unsigned long r9;
357 long status;
358
359 status = ia64_pal_mem_attrib(&r9);
360
361 return (struct ia64_pal_retval){status, r9, 0, 0};
362}
363
364static void remote_pal_prefetch_visibility(void *v)
365{
366 s64 trans_type = (s64)v;
367 ia64_pal_prefetch_visibility(trans_type);
368}
369
370static struct ia64_pal_retval pal_prefetch_visibility(struct kvm_vcpu *vcpu)
371{
372 struct ia64_pal_retval result = {0, 0, 0, 0};
373 unsigned long in0, in1, in2, in3;
374 kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
375 result.status = ia64_pal_prefetch_visibility(in1);
376 if (result.status == 0) {
377 /* Must be performed on all remote processors
378 in the coherence domain. */
379 smp_call_function(remote_pal_prefetch_visibility,
380 (void *)in1, 1);
381 /* Unnecessary on remote processor for other vcpus!*/
382 result.status = 1;
383 }
384 return result;
385}
386
387static void remote_pal_mc_drain(void *v)
388{
389 ia64_pal_mc_drain();
390}
391
392static struct ia64_pal_retval pal_get_brand_info(struct kvm_vcpu *vcpu)
393{
394 struct ia64_pal_retval result = {0, 0, 0, 0};
395 unsigned long in0, in1, in2, in3;
396
397 kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
398
399 if (in1 == 0 && in2) {
400 char brand_info[128];
401 result.status = ia64_pal_get_brand_info(brand_info);
402 if (result.status == PAL_STATUS_SUCCESS)
403 memcpy((void *)in2, brand_info, 128);
404 } else {
405 result.status = PAL_STATUS_REQUIRES_MEMORY;
406 printk(KERN_WARNING"Invalid parameters for "
407 "PAL call:0x%lx!\n", in0);
408 }
409
410 return result;
411}
412
295int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run) 413int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
296{ 414{
297 415
@@ -300,14 +418,22 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
300 int ret = 1; 418 int ret = 1;
301 419
302 gr28 = kvm_get_pal_call_index(vcpu); 420 gr28 = kvm_get_pal_call_index(vcpu);
303 /*printk("pal_call index:%lx\n",gr28);*/
304 switch (gr28) { 421 switch (gr28) {
305 case PAL_CACHE_FLUSH: 422 case PAL_CACHE_FLUSH:
306 result = pal_cache_flush(vcpu); 423 result = pal_cache_flush(vcpu);
307 break; 424 break;
425 case PAL_MEM_ATTRIB:
426 result = pal_mem_attrib(vcpu);
427 break;
308 case PAL_CACHE_SUMMARY: 428 case PAL_CACHE_SUMMARY:
309 result = pal_cache_summary(vcpu); 429 result = pal_cache_summary(vcpu);
310 break; 430 break;
431 case PAL_PERF_MON_INFO:
432 result = pal_perf_mon_info(vcpu);
433 break;
434 case PAL_HALT_INFO:
435 result = pal_halt_info(vcpu);
436 break;
311 case PAL_HALT_LIGHT: 437 case PAL_HALT_LIGHT:
312 { 438 {
313 INIT_PAL_STATUS_SUCCESS(result); 439 INIT_PAL_STATUS_SUCCESS(result);
@@ -317,6 +443,16 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
317 } 443 }
318 break; 444 break;
319 445
446 case PAL_PREFETCH_VISIBILITY:
447 result = pal_prefetch_visibility(vcpu);
448 break;
449 case PAL_MC_DRAIN:
450 result.status = ia64_pal_mc_drain();
451 /* FIXME: All vcpus likely call PAL_MC_DRAIN.
452 That causes the congestion. */
453 smp_call_function(remote_pal_mc_drain, NULL, 1);
454 break;
455
320 case PAL_FREQ_RATIOS: 456 case PAL_FREQ_RATIOS:
321 result = pal_freq_ratios(vcpu); 457 result = pal_freq_ratios(vcpu);
322 break; 458 break;
@@ -346,6 +482,9 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
346 INIT_PAL_STATUS_SUCCESS(result); 482 INIT_PAL_STATUS_SUCCESS(result);
347 result.v1 = (1L << 32) | 1L; 483 result.v1 = (1L << 32) | 1L;
348 break; 484 break;
485 case PAL_REGISTER_INFO:
486 result = pal_register_info(vcpu);
487 break;
349 case PAL_VM_PAGE_SIZE: 488 case PAL_VM_PAGE_SIZE:
350 result.status = ia64_pal_vm_page_size(&result.v0, 489 result.status = ia64_pal_vm_page_size(&result.v0,
351 &result.v1); 490 &result.v1);
@@ -365,12 +504,18 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
365 result.status = ia64_pal_version( 504 result.status = ia64_pal_version(
366 (pal_version_u_t *)&result.v0, 505 (pal_version_u_t *)&result.v0,
367 (pal_version_u_t *)&result.v1); 506 (pal_version_u_t *)&result.v1);
368
369 break; 507 break;
370 case PAL_FIXED_ADDR: 508 case PAL_FIXED_ADDR:
371 result.status = PAL_STATUS_SUCCESS; 509 result.status = PAL_STATUS_SUCCESS;
372 result.v0 = vcpu->vcpu_id; 510 result.v0 = vcpu->vcpu_id;
373 break; 511 break;
512 case PAL_BRAND_INFO:
513 result = pal_get_brand_info(vcpu);
514 break;
515 case PAL_GET_PSTATE:
516 case PAL_CACHE_SHARED_INFO:
517 INIT_PAL_STATUS_UNIMPLEMENTED(result);
518 break;
374 default: 519 default:
375 INIT_PAL_STATUS_UNIMPLEMENTED(result); 520 INIT_PAL_STATUS_UNIMPLEMENTED(result);
376 printk(KERN_WARNING"kvm: Unsupported pal call," 521 printk(KERN_WARNING"kvm: Unsupported pal call,"
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
index 230eae482f32..b1dc80952d91 100644
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -167,7 +167,6 @@ static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa)
167 return (rr1.val); 167 return (rr1.val);
168} 168}
169 169
170
171/* 170/*
172 * Set vIFA & vITIR & vIHA, when vPSR.ic =1 171 * Set vIFA & vITIR & vIHA, when vPSR.ic =1
173 * Parameter: 172 * Parameter:
@@ -222,8 +221,6 @@ void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
222 inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR); 221 inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR);
223} 222}
224 223
225
226
227/* 224/*
228 * Data Nested TLB Fault 225 * Data Nested TLB Fault
229 * @ Data Nested TLB Vector 226 * @ Data Nested TLB Vector
@@ -245,7 +242,6 @@ void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr)
245 inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR); 242 inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR);
246} 243}
247 244
248
249/* 245/*
250 * Data TLB Fault 246 * Data TLB Fault
251 * @ Data TLB vector 247 * @ Data TLB vector
@@ -265,8 +261,6 @@ static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
265 /* If vPSR.ic, IFA, ITIR, IHA*/ 261 /* If vPSR.ic, IFA, ITIR, IHA*/
266 set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); 262 set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
267 inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR); 263 inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR);
268
269
270} 264}
271 265
272/* 266/*
@@ -279,7 +273,6 @@ void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
279 _vhpt_fault(vcpu, vadr); 273 _vhpt_fault(vcpu, vadr);
280} 274}
281 275
282
283/* 276/*
284 * VHPT Data Fault 277 * VHPT Data Fault
285 * @ VHPT Translation vector 278 * @ VHPT Translation vector
@@ -290,8 +283,6 @@ void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
290 _vhpt_fault(vcpu, vadr); 283 _vhpt_fault(vcpu, vadr);
291} 284}
292 285
293
294
295/* 286/*
296 * Deal with: 287 * Deal with:
297 * General Exception vector 288 * General Exception vector
@@ -301,7 +292,6 @@ void _general_exception(struct kvm_vcpu *vcpu)
301 inject_guest_interruption(vcpu, IA64_GENEX_VECTOR); 292 inject_guest_interruption(vcpu, IA64_GENEX_VECTOR);
302} 293}
303 294
304
305/* 295/*
306 * Illegal Operation Fault 296 * Illegal Operation Fault
307 * @ General Exception Vector 297 * @ General Exception Vector
@@ -419,19 +409,16 @@ static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
419 inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR); 409 inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
420} 410}
421 411
422
423void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr) 412void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
424{ 413{
425 __page_not_present(vcpu, vadr); 414 __page_not_present(vcpu, vadr);
426} 415}
427 416
428
429void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr) 417void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
430{ 418{
431 __page_not_present(vcpu, vadr); 419 __page_not_present(vcpu, vadr);
432} 420}
433 421
434
435/* Deal with 422/* Deal with
436 * Data access rights vector 423 * Data access rights vector
437 */ 424 */
@@ -563,22 +550,64 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim,
563 inject_guest_interruption(vcpu, vector); 550 inject_guest_interruption(vcpu, vector);
564} 551}
565 552
553static unsigned long kvm_trans_pal_call_args(struct kvm_vcpu *vcpu,
554 unsigned long arg)
555{
556 struct thash_data *data;
557 unsigned long gpa, poff;
558
559 if (!is_physical_mode(vcpu)) {
560 /* Depends on caller to provide the DTR or DTC mapping.*/
561 data = vtlb_lookup(vcpu, arg, D_TLB);
562 if (data)
563 gpa = data->page_flags & _PAGE_PPN_MASK;
564 else {
565 data = vhpt_lookup(arg);
566 if (!data)
567 return 0;
568 gpa = data->gpaddr & _PAGE_PPN_MASK;
569 }
570
571 poff = arg & (PSIZE(data->ps) - 1);
572 arg = PAGEALIGN(gpa, data->ps) | poff;
573 }
574 arg = kvm_gpa_to_mpa(arg << 1 >> 1);
575
576 return (unsigned long)__va(arg);
577}
578
566static void set_pal_call_data(struct kvm_vcpu *vcpu) 579static void set_pal_call_data(struct kvm_vcpu *vcpu)
567{ 580{
568 struct exit_ctl_data *p = &vcpu->arch.exit_data; 581 struct exit_ctl_data *p = &vcpu->arch.exit_data;
582 unsigned long gr28 = vcpu_get_gr(vcpu, 28);
583 unsigned long gr29 = vcpu_get_gr(vcpu, 29);
584 unsigned long gr30 = vcpu_get_gr(vcpu, 30);
569 585
570 /*FIXME:For static and stacked convention, firmware 586 /*FIXME:For static and stacked convention, firmware
571 * has put the parameters in gr28-gr31 before 587 * has put the parameters in gr28-gr31 before
572 * break to vmm !!*/ 588 * break to vmm !!*/
573 589
574 p->u.pal_data.gr28 = vcpu_get_gr(vcpu, 28); 590 switch (gr28) {
575 p->u.pal_data.gr29 = vcpu_get_gr(vcpu, 29); 591 case PAL_PERF_MON_INFO:
576 p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30); 592 case PAL_HALT_INFO:
593 p->u.pal_data.gr29 = kvm_trans_pal_call_args(vcpu, gr29);
594 p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
595 break;
596 case PAL_BRAND_INFO:
597 p->u.pal_data.gr29 = gr29;;
598 p->u.pal_data.gr30 = kvm_trans_pal_call_args(vcpu, gr30);
599 break;
600 default:
601 p->u.pal_data.gr29 = gr29;;
602 p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
603 }
604 p->u.pal_data.gr28 = gr28;
577 p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31); 605 p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31);
606
578 p->exit_reason = EXIT_REASON_PAL_CALL; 607 p->exit_reason = EXIT_REASON_PAL_CALL;
579} 608}
580 609
581static void set_pal_call_result(struct kvm_vcpu *vcpu) 610static void get_pal_call_result(struct kvm_vcpu *vcpu)
582{ 611{
583 struct exit_ctl_data *p = &vcpu->arch.exit_data; 612 struct exit_ctl_data *p = &vcpu->arch.exit_data;
584 613
@@ -606,7 +635,7 @@ static void set_sal_call_data(struct kvm_vcpu *vcpu)
606 p->exit_reason = EXIT_REASON_SAL_CALL; 635 p->exit_reason = EXIT_REASON_SAL_CALL;
607} 636}
608 637
609static void set_sal_call_result(struct kvm_vcpu *vcpu) 638static void get_sal_call_result(struct kvm_vcpu *vcpu)
610{ 639{
611 struct exit_ctl_data *p = &vcpu->arch.exit_data; 640 struct exit_ctl_data *p = &vcpu->arch.exit_data;
612 641
@@ -629,13 +658,13 @@ void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
629 if (iim == DOMN_PAL_REQUEST) { 658 if (iim == DOMN_PAL_REQUEST) {
630 set_pal_call_data(v); 659 set_pal_call_data(v);
631 vmm_transition(v); 660 vmm_transition(v);
632 set_pal_call_result(v); 661 get_pal_call_result(v);
633 vcpu_increment_iip(v); 662 vcpu_increment_iip(v);
634 return; 663 return;
635 } else if (iim == DOMN_SAL_REQUEST) { 664 } else if (iim == DOMN_SAL_REQUEST) {
636 set_sal_call_data(v); 665 set_sal_call_data(v);
637 vmm_transition(v); 666 vmm_transition(v);
638 set_sal_call_result(v); 667 get_sal_call_result(v);
639 vcpu_increment_iip(v); 668 vcpu_increment_iip(v);
640 return; 669 return;
641 } 670 }
@@ -703,7 +732,6 @@ void vhpi_detection(struct kvm_vcpu *vcpu)
703 } 732 }
704} 733}
705 734
706
707void leave_hypervisor_tail(void) 735void leave_hypervisor_tail(void)
708{ 736{
709 struct kvm_vcpu *v = current_vcpu; 737 struct kvm_vcpu *v = current_vcpu;
@@ -737,7 +765,6 @@ void leave_hypervisor_tail(void)
737 } 765 }
738} 766}
739 767
740
741static inline void handle_lds(struct kvm_pt_regs *regs) 768static inline void handle_lds(struct kvm_pt_regs *regs)
742{ 769{
743 regs->cr_ipsr |= IA64_PSR_ED; 770 regs->cr_ipsr |= IA64_PSR_ED;
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index ecd526b55323..d4d280505878 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -112,7 +112,6 @@ void switch_to_physical_rid(struct kvm_vcpu *vcpu)
112 return; 112 return;
113} 113}
114 114
115
116void switch_to_virtual_rid(struct kvm_vcpu *vcpu) 115void switch_to_virtual_rid(struct kvm_vcpu *vcpu)
117{ 116{
118 unsigned long psr; 117 unsigned long psr;
@@ -166,8 +165,6 @@ void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
166 return; 165 return;
167} 166}
168 167
169
170
171/* 168/*
172 * In physical mode, insert tc/tr for region 0 and 4 uses 169 * In physical mode, insert tc/tr for region 0 and 4 uses
173 * RID[0] and RID[4] which is for physical mode emulation. 170 * RID[0] and RID[4] which is for physical mode emulation.
@@ -269,7 +266,6 @@ static inline unsigned long fph_index(struct kvm_pt_regs *regs,
269 return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR)); 266 return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
270} 267}
271 268
272
273/* 269/*
274 * The inverse of the above: given bspstore and the number of 270 * The inverse of the above: given bspstore and the number of
275 * registers, calculate ar.bsp. 271 * registers, calculate ar.bsp.
@@ -811,12 +807,15 @@ static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val);
811static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val) 807static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
812{ 808{
813 struct kvm_vcpu *v; 809 struct kvm_vcpu *v;
810 struct kvm *kvm;
814 int i; 811 int i;
815 long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC); 812 long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC);
816 unsigned long vitv = VCPU(vcpu, itv); 813 unsigned long vitv = VCPU(vcpu, itv);
817 814
815 kvm = (struct kvm *)KVM_VM_BASE;
816
818 if (vcpu->vcpu_id == 0) { 817 if (vcpu->vcpu_id == 0) {
819 for (i = 0; i < KVM_MAX_VCPUS; i++) { 818 for (i = 0; i < kvm->arch.online_vcpus; i++) {
820 v = (struct kvm_vcpu *)((char *)vcpu + 819 v = (struct kvm_vcpu *)((char *)vcpu +
821 sizeof(struct kvm_vcpu_data) * i); 820 sizeof(struct kvm_vcpu_data) * i);
822 VMX(v, itc_offset) = itc_offset; 821 VMX(v, itc_offset) = itc_offset;
@@ -1039,8 +1038,6 @@ u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr)
1039 return key; 1038 return key;
1040} 1039}
1041 1040
1042
1043
1044void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst) 1041void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
1045{ 1042{
1046 unsigned long thash, vadr; 1043 unsigned long thash, vadr;
@@ -1050,7 +1047,6 @@ void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
1050 vcpu_set_gr(vcpu, inst.M46.r1, thash, 0); 1047 vcpu_set_gr(vcpu, inst.M46.r1, thash, 0);
1051} 1048}
1052 1049
1053
1054void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst) 1050void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
1055{ 1051{
1056 unsigned long tag, vadr; 1052 unsigned long tag, vadr;
@@ -1131,7 +1127,6 @@ int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr)
1131 return IA64_NO_FAULT; 1127 return IA64_NO_FAULT;
1132} 1128}
1133 1129
1134
1135int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst) 1130int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst)
1136{ 1131{
1137 unsigned long r1, r3; 1132 unsigned long r1, r3;
@@ -1154,7 +1149,6 @@ void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst)
1154 vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); 1149 vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
1155} 1150}
1156 1151
1157
1158/************************************ 1152/************************************
1159 * Insert/Purge translation register/cache 1153 * Insert/Purge translation register/cache
1160 ************************************/ 1154 ************************************/
@@ -1385,7 +1379,6 @@ void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
1385 vcpu_set_itc(vcpu, r2); 1379 vcpu_set_itc(vcpu, r2);
1386} 1380}
1387 1381
1388
1389void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst) 1382void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
1390{ 1383{
1391 unsigned long r1; 1384 unsigned long r1;
@@ -1393,8 +1386,9 @@ void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
1393 r1 = vcpu_get_itc(vcpu); 1386 r1 = vcpu_get_itc(vcpu);
1394 vcpu_set_gr(vcpu, inst.M31.r1, r1, 0); 1387 vcpu_set_gr(vcpu, inst.M31.r1, r1, 0);
1395} 1388}
1389
1396/************************************************************************** 1390/**************************************************************************
1397 struct kvm_vcpu*protection key register access routines 1391 struct kvm_vcpu protection key register access routines
1398 **************************************************************************/ 1392 **************************************************************************/
1399 1393
1400unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg) 1394unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg)
@@ -1407,20 +1401,6 @@ void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val)
1407 ia64_set_pkr(reg, val); 1401 ia64_set_pkr(reg, val);
1408} 1402}
1409 1403
1410
1411unsigned long vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, unsigned long ifa)
1412{
1413 union ia64_rr rr, rr1;
1414
1415 rr.val = vcpu_get_rr(vcpu, ifa);
1416 rr1.val = 0;
1417 rr1.ps = rr.ps;
1418 rr1.rid = rr.rid;
1419 return (rr1.val);
1420}
1421
1422
1423
1424/******************************** 1404/********************************
1425 * Moves to privileged registers 1405 * Moves to privileged registers
1426 ********************************/ 1406 ********************************/
@@ -1464,8 +1444,6 @@ unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg,
1464 return (IA64_NO_FAULT); 1444 return (IA64_NO_FAULT);
1465} 1445}
1466 1446
1467
1468
1469void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst) 1447void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst)
1470{ 1448{
1471 unsigned long r3, r2; 1449 unsigned long r3, r2;
@@ -1510,8 +1488,6 @@ void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst)
1510 vcpu_set_pkr(vcpu, r3, r2); 1488 vcpu_set_pkr(vcpu, r3, r2);
1511} 1489}
1512 1490
1513
1514
1515void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst) 1491void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst)
1516{ 1492{
1517 unsigned long r3, r1; 1493 unsigned long r3, r1;
@@ -1557,7 +1533,6 @@ void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst)
1557 vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); 1533 vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
1558} 1534}
1559 1535
1560
1561unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg) 1536unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg)
1562{ 1537{
1563 /* FIXME: This could get called as a result of a rsvd-reg fault */ 1538 /* FIXME: This could get called as a result of a rsvd-reg fault */
@@ -1609,7 +1584,6 @@ unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst)
1609 return 0; 1584 return 0;
1610} 1585}
1611 1586
1612
1613unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst) 1587unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
1614{ 1588{
1615 unsigned long tgt = inst.M33.r1; 1589 unsigned long tgt = inst.M33.r1;
@@ -1633,8 +1607,6 @@ unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
1633 return 0; 1607 return 0;
1634} 1608}
1635 1609
1636
1637
1638void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val) 1610void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
1639{ 1611{
1640 1612
@@ -1776,9 +1748,6 @@ void vcpu_bsw1(struct kvm_vcpu *vcpu)
1776 } 1748 }
1777} 1749}
1778 1750
1779
1780
1781
1782void vcpu_rfi(struct kvm_vcpu *vcpu) 1751void vcpu_rfi(struct kvm_vcpu *vcpu)
1783{ 1752{
1784 unsigned long ifs, psr; 1753 unsigned long ifs, psr;
@@ -1796,7 +1765,6 @@ void vcpu_rfi(struct kvm_vcpu *vcpu)
1796 regs->cr_iip = VCPU(vcpu, iip); 1765 regs->cr_iip = VCPU(vcpu, iip);
1797} 1766}
1798 1767
1799
1800/* 1768/*
1801 VPSR can't keep track of below bits of guest PSR 1769 VPSR can't keep track of below bits of guest PSR
1802 This function gets guest PSR 1770 This function gets guest PSR
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
index b2f12a562bdf..042af92ced83 100644
--- a/arch/ia64/kvm/vcpu.h
+++ b/arch/ia64/kvm/vcpu.h
@@ -703,7 +703,7 @@ extern u64 guest_vhpt_lookup(u64 iha, u64 *pte);
703extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps); 703extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps);
704extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps); 704extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps);
705extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va); 705extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va);
706extern int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, 706extern void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
707 u64 itir, u64 ifa, int type); 707 u64 itir, u64 ifa, int type);
708extern void thash_purge_all(struct kvm_vcpu *v); 708extern void thash_purge_all(struct kvm_vcpu *v);
709extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v, 709extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v,
@@ -738,7 +738,7 @@ void kvm_init_vhpt(struct kvm_vcpu *v);
738void thash_init(struct thash_cb *hcb, u64 sz); 738void thash_init(struct thash_cb *hcb, u64 sz);
739 739
740void panic_vm(struct kvm_vcpu *v, const char *fmt, ...); 740void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
741 741u64 kvm_gpa_to_mpa(u64 gpa);
742extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3, 742extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
743 u64 arg4, u64 arg5, u64 arg6, u64 arg7); 743 u64 arg4, u64 arg5, u64 arg6, u64 arg7);
744 744
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
index 6b6307a3bd55..38232b37668b 100644
--- a/arch/ia64/kvm/vtlb.c
+++ b/arch/ia64/kvm/vtlb.c
@@ -164,11 +164,11 @@ static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte)
164 unsigned long ps, gpaddr; 164 unsigned long ps, gpaddr;
165 165
166 ps = itir_ps(itir); 166 ps = itir_ps(itir);
167 rr.val = ia64_get_rr(ifa);
167 168
168 gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) | 169 gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
169 (ifa & ((1UL << ps) - 1)); 170 (ifa & ((1UL << ps) - 1));
170 171
171 rr.val = ia64_get_rr(ifa);
172 head = (struct thash_data *)ia64_thash(ifa); 172 head = (struct thash_data *)ia64_thash(ifa);
173 head->etag = INVALID_TI_TAG; 173 head->etag = INVALID_TI_TAG;
174 ia64_mf(); 174 ia64_mf();
@@ -412,16 +412,14 @@ u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
412 412
413/* 413/*
414 * Purge overlap TCs and then insert the new entry to emulate itc ops. 414 * Purge overlap TCs and then insert the new entry to emulate itc ops.
415 * Notes: Only TC entry can purge and insert. 415 * Notes: Only TC entry can purge and insert.
416 * 1 indicates this is MMIO
417 */ 416 */
418int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir, 417void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
419 u64 ifa, int type) 418 u64 ifa, int type)
420{ 419{
421 u64 ps; 420 u64 ps;
422 u64 phy_pte, io_mask, index; 421 u64 phy_pte, io_mask, index;
423 union ia64_rr vrr, mrr; 422 union ia64_rr vrr, mrr;
424 int ret = 0;
425 423
426 ps = itir_ps(itir); 424 ps = itir_ps(itir);
427 vrr.val = vcpu_get_rr(v, ifa); 425 vrr.val = vcpu_get_rr(v, ifa);
@@ -441,25 +439,19 @@ int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
441 phy_pte &= ~_PAGE_MA_MASK; 439 phy_pte &= ~_PAGE_MA_MASK;
442 } 440 }
443 441
444 if (pte & VTLB_PTE_IO)
445 ret = 1;
446
447 vtlb_purge(v, ifa, ps); 442 vtlb_purge(v, ifa, ps);
448 vhpt_purge(v, ifa, ps); 443 vhpt_purge(v, ifa, ps);
449 444
450 if (ps == mrr.ps) { 445 if ((ps != mrr.ps) || (pte & VTLB_PTE_IO)) {
451 if (!(pte&VTLB_PTE_IO)) {
452 vhpt_insert(phy_pte, itir, ifa, pte);
453 } else {
454 vtlb_insert(v, pte, itir, ifa);
455 vcpu_quick_region_set(VMX(v, tc_regions), ifa);
456 }
457 } else if (ps > mrr.ps) {
458 vtlb_insert(v, pte, itir, ifa); 446 vtlb_insert(v, pte, itir, ifa);
459 vcpu_quick_region_set(VMX(v, tc_regions), ifa); 447 vcpu_quick_region_set(VMX(v, tc_regions), ifa);
460 if (!(pte&VTLB_PTE_IO)) 448 }
461 vhpt_insert(phy_pte, itir, ifa, pte); 449 if (pte & VTLB_PTE_IO)
462 } else { 450 return;
451
452 if (ps >= mrr.ps)
453 vhpt_insert(phy_pte, itir, ifa, pte);
454 else {
463 u64 psr; 455 u64 psr;
464 phy_pte &= ~PAGE_FLAGS_RV_MASK; 456 phy_pte &= ~PAGE_FLAGS_RV_MASK;
465 psr = ia64_clear_ic(); 457 psr = ia64_clear_ic();
@@ -469,7 +461,6 @@ int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
469 if (!(pte&VTLB_PTE_IO)) 461 if (!(pte&VTLB_PTE_IO))
470 mark_pages_dirty(v, pte, ps); 462 mark_pages_dirty(v, pte, ps);
471 463
472 return ret;
473} 464}
474 465
475/* 466/*
@@ -509,7 +500,6 @@ void thash_purge_all(struct kvm_vcpu *v)
509 local_flush_tlb_all(); 500 local_flush_tlb_all();
510} 501}
511 502
512
513/* 503/*
514 * Lookup the hash table and its collision chain to find an entry 504 * Lookup the hash table and its collision chain to find an entry
515 * covering this address rid:va or the entry. 505 * covering this address rid:va or the entry.
@@ -517,7 +507,6 @@ void thash_purge_all(struct kvm_vcpu *v)
517 * INPUT: 507 * INPUT:
518 * in: TLB format for both VHPT & TLB. 508 * in: TLB format for both VHPT & TLB.
519 */ 509 */
520
521struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data) 510struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
522{ 511{
523 struct thash_data *cch; 512 struct thash_data *cch;
@@ -547,7 +536,6 @@ struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
547 return NULL; 536 return NULL;
548} 537}
549 538
550
551/* 539/*
552 * Initialize internal control data before service. 540 * Initialize internal control data before service.
553 */ 541 */
@@ -573,6 +561,10 @@ void thash_init(struct thash_cb *hcb, u64 sz)
573u64 kvm_get_mpt_entry(u64 gpfn) 561u64 kvm_get_mpt_entry(u64 gpfn)
574{ 562{
575 u64 *base = (u64 *) KVM_P2M_BASE; 563 u64 *base = (u64 *) KVM_P2M_BASE;
564
565 if (gpfn >= (KVM_P2M_SIZE >> 3))
566 panic_vm(current_vcpu, "Invalid gpfn =%lx\n", gpfn);
567
576 return *(base + gpfn); 568 return *(base + gpfn);
577} 569}
578 570
@@ -589,7 +581,6 @@ u64 kvm_gpa_to_mpa(u64 gpa)
589 return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK); 581 return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK);
590} 582}
591 583
592
593/* 584/*
594 * Fetch guest bundle code. 585 * Fetch guest bundle code.
595 * INPUT: 586 * INPUT:
@@ -631,7 +622,6 @@ int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle)
631 return IA64_NO_FAULT; 622 return IA64_NO_FAULT;
632} 623}
633 624
634
635void kvm_init_vhpt(struct kvm_vcpu *v) 625void kvm_init_vhpt(struct kvm_vcpu *v)
636{ 626{
637 v->arch.vhpt.num = VHPT_NUM_ENTRIES; 627 v->arch.vhpt.num = VHPT_NUM_ENTRIES;
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index f993e4198d5c..755f1b1948c5 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -52,4 +52,11 @@ struct kvm_fpu {
52 __u64 fpr[32]; 52 __u64 fpr[32];
53}; 53};
54 54
55struct kvm_debug_exit_arch {
56};
57
58/* for KVM_SET_GUEST_DEBUG */
59struct kvm_guest_debug_arch {
60};
61
55#endif /* __LINUX_KVM_POWERPC_H */ 62#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
index f49031b632ca..d22d39942a92 100644
--- a/arch/powerpc/include/asm/kvm_44x.h
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -28,6 +28,13 @@
28 * need to find some way of advertising it. */ 28 * need to find some way of advertising it. */
29#define KVM44x_GUEST_TLB_SIZE 64 29#define KVM44x_GUEST_TLB_SIZE 64
30 30
31struct kvmppc_44x_tlbe {
32 u32 tid; /* Only the low 8 bits are used. */
33 u32 word0;
34 u32 word1;
35 u32 word2;
36};
37
31struct kvmppc_44x_shadow_ref { 38struct kvmppc_44x_shadow_ref {
32 struct page *page; 39 struct page *page;
33 u16 gtlb_index; 40 u16 gtlb_index;
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 2197764796d9..56bfae59837f 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -42,7 +42,12 @@
42#define BOOKE_INTERRUPT_DTLB_MISS 13 42#define BOOKE_INTERRUPT_DTLB_MISS 13
43#define BOOKE_INTERRUPT_ITLB_MISS 14 43#define BOOKE_INTERRUPT_ITLB_MISS 14
44#define BOOKE_INTERRUPT_DEBUG 15 44#define BOOKE_INTERRUPT_DEBUG 15
45#define BOOKE_MAX_INTERRUPT 15 45
46/* E500 */
47#define BOOKE_INTERRUPT_SPE_UNAVAIL 32
48#define BOOKE_INTERRUPT_SPE_FP_DATA 33
49#define BOOKE_INTERRUPT_SPE_FP_ROUND 34
50#define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35
46 51
47#define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ 52#define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */
48#define RESUME_FLAG_HOST (1<<1) /* Resume host? */ 53#define RESUME_FLAG_HOST (1<<1) /* Resume host? */
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
new file mode 100644
index 000000000000..9d497ce49726
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -0,0 +1,67 @@
1/*
2 * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
3 *
4 * Author: Yu Liu, <yu.liu@freescale.com>
5 *
6 * Description:
7 * This file is derived from arch/powerpc/include/asm/kvm_44x.h,
8 * by Hollis Blanchard <hollisb@us.ibm.com>.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License, version 2, as
12 * published by the Free Software Foundation.
13 */
14
15#ifndef __ASM_KVM_E500_H__
16#define __ASM_KVM_E500_H__
17
18#include <linux/kvm_host.h>
19
20#define BOOKE_INTERRUPT_SIZE 36
21
22#define E500_PID_NUM 3
23#define E500_TLB_NUM 2
24
25struct tlbe{
26 u32 mas1;
27 u32 mas2;
28 u32 mas3;
29 u32 mas7;
30};
31
32struct kvmppc_vcpu_e500 {
33 /* Unmodified copy of the guest's TLB. */
34 struct tlbe *guest_tlb[E500_TLB_NUM];
35 /* TLB that's actually used when the guest is running. */
36 struct tlbe *shadow_tlb[E500_TLB_NUM];
37 /* Pages which are referenced in the shadow TLB. */
38 struct page **shadow_pages[E500_TLB_NUM];
39
40 unsigned int guest_tlb_size[E500_TLB_NUM];
41 unsigned int shadow_tlb_size[E500_TLB_NUM];
42 unsigned int guest_tlb_nv[E500_TLB_NUM];
43
44 u32 host_pid[E500_PID_NUM];
45 u32 pid[E500_PID_NUM];
46
47 u32 mas0;
48 u32 mas1;
49 u32 mas2;
50 u32 mas3;
51 u32 mas4;
52 u32 mas5;
53 u32 mas6;
54 u32 mas7;
55 u32 l1csr1;
56 u32 hid0;
57 u32 hid1;
58
59 struct kvm_vcpu vcpu;
60};
61
62static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu)
63{
64 return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu);
65}
66
67#endif /* __ASM_KVM_E500_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index c1e436fe7738..dfdf13c9fefd 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -64,13 +64,6 @@ struct kvm_vcpu_stat {
64 u32 halt_wakeup; 64 u32 halt_wakeup;
65}; 65};
66 66
67struct kvmppc_44x_tlbe {
68 u32 tid; /* Only the low 8 bits are used. */
69 u32 word0;
70 u32 word1;
71 u32 word2;
72};
73
74enum kvm_exit_types { 67enum kvm_exit_types {
75 MMIO_EXITS, 68 MMIO_EXITS,
76 DCR_EXITS, 69 DCR_EXITS,
@@ -118,11 +111,6 @@ struct kvm_arch {
118struct kvm_vcpu_arch { 111struct kvm_vcpu_arch {
119 u32 host_stack; 112 u32 host_stack;
120 u32 host_pid; 113 u32 host_pid;
121 u32 host_dbcr0;
122 u32 host_dbcr1;
123 u32 host_dbcr2;
124 u32 host_iac[4];
125 u32 host_msr;
126 114
127 u64 fpr[32]; 115 u64 fpr[32];
128 ulong gpr[32]; 116 ulong gpr[32];
@@ -157,7 +145,7 @@ struct kvm_vcpu_arch {
157 u32 tbu; 145 u32 tbu;
158 u32 tcr; 146 u32 tcr;
159 u32 tsr; 147 u32 tsr;
160 u32 ivor[16]; 148 u32 ivor[64];
161 ulong ivpr; 149 ulong ivpr;
162 u32 pir; 150 u32 pir;
163 151
@@ -170,6 +158,7 @@ struct kvm_vcpu_arch {
170 u32 ccr1; 158 u32 ccr1;
171 u32 dbcr0; 159 u32 dbcr0;
172 u32 dbcr1; 160 u32 dbcr1;
161 u32 dbsr;
173 162
174#ifdef CONFIG_KVM_EXIT_TIMING 163#ifdef CONFIG_KVM_EXIT_TIMING
175 struct kvmppc_exit_timing timing_exit; 164 struct kvmppc_exit_timing timing_exit;
@@ -200,10 +189,4 @@ struct kvm_vcpu_arch {
200 unsigned long pending_exceptions; 189 unsigned long pending_exceptions;
201}; 190};
202 191
203struct kvm_guest_debug {
204 int enabled;
205 unsigned long bp[4];
206 int singlestep;
207};
208
209#endif /* __POWERPC_KVM_HOST_H__ */ 192#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 36d2a50a8487..2c6ee349df5e 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -52,13 +52,19 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
52extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); 52extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
53extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); 53extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
54 54
55/* Core-specific hooks */
56
55extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, 57extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
56 u64 asid, u32 flags, u32 max_bytes,
57 unsigned int gtlb_idx); 58 unsigned int gtlb_idx);
58extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); 59extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
59extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); 60extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
60 61extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu);
61/* Core-specific hooks */ 62extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
63extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
64extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
65 gva_t eaddr);
66extern void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu);
67extern void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu);
62 68
63extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, 69extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
64 unsigned int id); 70 unsigned int id);
@@ -71,9 +77,6 @@ extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
71extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu); 77extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
72extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); 78extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
73 79
74extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu);
75extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu);
76
77extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu); 80extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
78extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); 81extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
79extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu); 82extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/mmu-fsl-booke.h b/arch/powerpc/include/asm/mmu-fsl-booke.h
index 3f941c0f7e8e..4285b64a65e0 100644
--- a/arch/powerpc/include/asm/mmu-fsl-booke.h
+++ b/arch/powerpc/include/asm/mmu-fsl-booke.h
@@ -75,6 +75,8 @@
75 75
76#ifndef __ASSEMBLY__ 76#ifndef __ASSEMBLY__
77 77
78extern unsigned int tlbcam_index;
79
78typedef struct { 80typedef struct {
79 unsigned int id; 81 unsigned int id;
80 unsigned int active; 82 unsigned int active;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 19ee491e9e23..42fe4da4e8ae 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -49,7 +49,7 @@
49#include <asm/iseries/alpaca.h> 49#include <asm/iseries/alpaca.h>
50#endif 50#endif
51#ifdef CONFIG_KVM 51#ifdef CONFIG_KVM
52#include <asm/kvm_44x.h> 52#include <linux/kvm_host.h>
53#endif 53#endif
54 54
55#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) 55#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
@@ -361,8 +361,6 @@ int main(void)
361 DEFINE(PTE_SIZE, sizeof(pte_t)); 361 DEFINE(PTE_SIZE, sizeof(pte_t));
362 362
363#ifdef CONFIG_KVM 363#ifdef CONFIG_KVM
364 DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
365
366 DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); 364 DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
367 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); 365 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
368 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); 366 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index a66bec57265a..0cef809cec21 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -28,72 +28,6 @@
28 28
29#include "44x_tlb.h" 29#include "44x_tlb.h"
30 30
31/* Note: clearing MSR[DE] just means that the debug interrupt will not be
32 * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
33 * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
34 * will be delivered as an "imprecise debug event" (which is indicated by
35 * DBSR[IDE].
36 */
37static void kvm44x_disable_debug_interrupts(void)
38{
39 mtmsr(mfmsr() & ~MSR_DE);
40}
41
42void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
43{
44 kvm44x_disable_debug_interrupts();
45
46 mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
47 mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
48 mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
49 mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
50 mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
51 mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
52 mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
53 mtmsr(vcpu->arch.host_msr);
54}
55
56void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
57{
58 struct kvm_guest_debug *dbg = &vcpu->guest_debug;
59 u32 dbcr0 = 0;
60
61 vcpu->arch.host_msr = mfmsr();
62 kvm44x_disable_debug_interrupts();
63
64 /* Save host debug register state. */
65 vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
66 vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
67 vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
68 vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
69 vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
70 vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
71 vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
72
73 /* set registers up for guest */
74
75 if (dbg->bp[0]) {
76 mtspr(SPRN_IAC1, dbg->bp[0]);
77 dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
78 }
79 if (dbg->bp[1]) {
80 mtspr(SPRN_IAC2, dbg->bp[1]);
81 dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
82 }
83 if (dbg->bp[2]) {
84 mtspr(SPRN_IAC3, dbg->bp[2]);
85 dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
86 }
87 if (dbg->bp[3]) {
88 mtspr(SPRN_IAC4, dbg->bp[3]);
89 dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
90 }
91
92 mtspr(SPRN_DBCR0, dbcr0);
93 mtspr(SPRN_DBCR1, 0);
94 mtspr(SPRN_DBCR2, 0);
95}
96
97void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 31void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
98{ 32{
99 kvmppc_44x_tlb_load(vcpu); 33 kvmppc_44x_tlb_load(vcpu);
@@ -149,8 +83,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
149int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, 83int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
150 struct kvm_translation *tr) 84 struct kvm_translation *tr)
151{ 85{
152 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
153 struct kvmppc_44x_tlbe *gtlbe;
154 int index; 86 int index;
155 gva_t eaddr; 87 gva_t eaddr;
156 u8 pid; 88 u8 pid;
@@ -166,9 +98,7 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
166 return 0; 98 return 0;
167 } 99 }
168 100
169 gtlbe = &vcpu_44x->guest_tlb[index]; 101 tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
170
171 tr->physical_address = tlb_xlate(gtlbe, eaddr);
172 /* XXX what does "writeable" and "usermode" even mean? */ 102 /* XXX what does "writeable" and "usermode" even mean? */
173 tr->valid = 1; 103 tr->valid = 1;
174 104
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 82489a743a6f..61af58fcecee 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -27,25 +27,12 @@
27#include "booke.h" 27#include "booke.h"
28#include "44x_tlb.h" 28#include "44x_tlb.h"
29 29
30#define OP_RFI 19
31
32#define XOP_RFI 50
33#define XOP_MFMSR 83
34#define XOP_WRTEE 131
35#define XOP_MTMSR 146
36#define XOP_WRTEEI 163
37#define XOP_MFDCR 323 30#define XOP_MFDCR 323
38#define XOP_MTDCR 451 31#define XOP_MTDCR 451
39#define XOP_TLBSX 914 32#define XOP_TLBSX 914
40#define XOP_ICCCI 966 33#define XOP_ICCCI 966
41#define XOP_TLBWE 978 34#define XOP_TLBWE 978
42 35
43static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
44{
45 vcpu->arch.pc = vcpu->arch.srr0;
46 kvmppc_set_msr(vcpu, vcpu->arch.srr1);
47}
48
49int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 36int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
50 unsigned int inst, int *advance) 37 unsigned int inst, int *advance)
51{ 38{
@@ -59,48 +46,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
59 int ws; 46 int ws;
60 47
61 switch (get_op(inst)) { 48 switch (get_op(inst)) {
62 case OP_RFI:
63 switch (get_xop(inst)) {
64 case XOP_RFI:
65 kvmppc_emul_rfi(vcpu);
66 kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
67 *advance = 0;
68 break;
69
70 default:
71 emulated = EMULATE_FAIL;
72 break;
73 }
74 break;
75
76 case 31: 49 case 31:
77 switch (get_xop(inst)) { 50 switch (get_xop(inst)) {
78 51
79 case XOP_MFMSR:
80 rt = get_rt(inst);
81 vcpu->arch.gpr[rt] = vcpu->arch.msr;
82 kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
83 break;
84
85 case XOP_MTMSR:
86 rs = get_rs(inst);
87 kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
88 kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
89 break;
90
91 case XOP_WRTEE:
92 rs = get_rs(inst);
93 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
94 | (vcpu->arch.gpr[rs] & MSR_EE);
95 kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
96 break;
97
98 case XOP_WRTEEI:
99 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
100 | (inst & MSR_EE);
101 kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
102 break;
103
104 case XOP_MFDCR: 52 case XOP_MFDCR:
105 dcrn = get_dcrn(inst); 53 dcrn = get_dcrn(inst);
106 rt = get_rt(inst); 54 rt = get_rt(inst);
@@ -186,186 +134,51 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
186 emulated = EMULATE_FAIL; 134 emulated = EMULATE_FAIL;
187 } 135 }
188 136
137 if (emulated == EMULATE_FAIL)
138 emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance);
139
189 return emulated; 140 return emulated;
190} 141}
191 142
192int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) 143int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
193{ 144{
145 int emulated = EMULATE_DONE;
146
194 switch (sprn) { 147 switch (sprn) {
195 case SPRN_MMUCR:
196 vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
197 case SPRN_PID: 148 case SPRN_PID:
198 kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break; 149 kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
150 case SPRN_MMUCR:
151 vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
199 case SPRN_CCR0: 152 case SPRN_CCR0:
200 vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; 153 vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
201 case SPRN_CCR1: 154 case SPRN_CCR1:
202 vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break; 155 vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
203 case SPRN_DEAR:
204 vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
205 case SPRN_ESR:
206 vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
207 case SPRN_DBCR0:
208 vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
209 case SPRN_DBCR1:
210 vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
211 case SPRN_TSR:
212 vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
213 case SPRN_TCR:
214 vcpu->arch.tcr = vcpu->arch.gpr[rs];
215 kvmppc_emulate_dec(vcpu);
216 break;
217
218 /* Note: SPRG4-7 are user-readable. These values are
219 * loaded into the real SPRGs when resuming the
220 * guest. */
221 case SPRN_SPRG4:
222 vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
223 case SPRN_SPRG5:
224 vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
225 case SPRN_SPRG6:
226 vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
227 case SPRN_SPRG7:
228 vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
229
230 case SPRN_IVPR:
231 vcpu->arch.ivpr = vcpu->arch.gpr[rs];
232 break;
233 case SPRN_IVOR0:
234 vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
235 break;
236 case SPRN_IVOR1:
237 vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
238 break;
239 case SPRN_IVOR2:
240 vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
241 break;
242 case SPRN_IVOR3:
243 vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
244 break;
245 case SPRN_IVOR4:
246 vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
247 break;
248 case SPRN_IVOR5:
249 vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
250 break;
251 case SPRN_IVOR6:
252 vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
253 break;
254 case SPRN_IVOR7:
255 vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
256 break;
257 case SPRN_IVOR8:
258 vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
259 break;
260 case SPRN_IVOR9:
261 vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
262 break;
263 case SPRN_IVOR10:
264 vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
265 break;
266 case SPRN_IVOR11:
267 vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
268 break;
269 case SPRN_IVOR12:
270 vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
271 break;
272 case SPRN_IVOR13:
273 vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
274 break;
275 case SPRN_IVOR14:
276 vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
277 break;
278 case SPRN_IVOR15:
279 vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
280 break;
281
282 default: 156 default:
283 return EMULATE_FAIL; 157 emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
284 } 158 }
285 159
286 kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); 160 kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
287 return EMULATE_DONE; 161 return emulated;
288} 162}
289 163
290int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) 164int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
291{ 165{
166 int emulated = EMULATE_DONE;
167
292 switch (sprn) { 168 switch (sprn) {
293 /* 440 */ 169 case SPRN_PID:
170 vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
294 case SPRN_MMUCR: 171 case SPRN_MMUCR:
295 vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break; 172 vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
296 case SPRN_CCR0: 173 case SPRN_CCR0:
297 vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break; 174 vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
298 case SPRN_CCR1: 175 case SPRN_CCR1:
299 vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break; 176 vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
300
301 /* Book E */
302 case SPRN_PID:
303 vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
304 case SPRN_IVPR:
305 vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
306 case SPRN_DEAR:
307 vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
308 case SPRN_ESR:
309 vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
310 case SPRN_DBCR0:
311 vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
312 case SPRN_DBCR1:
313 vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
314
315 case SPRN_IVOR0:
316 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
317 break;
318 case SPRN_IVOR1:
319 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
320 break;
321 case SPRN_IVOR2:
322 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
323 break;
324 case SPRN_IVOR3:
325 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
326 break;
327 case SPRN_IVOR4:
328 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
329 break;
330 case SPRN_IVOR5:
331 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
332 break;
333 case SPRN_IVOR6:
334 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
335 break;
336 case SPRN_IVOR7:
337 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
338 break;
339 case SPRN_IVOR8:
340 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
341 break;
342 case SPRN_IVOR9:
343 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
344 break;
345 case SPRN_IVOR10:
346 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
347 break;
348 case SPRN_IVOR11:
349 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
350 break;
351 case SPRN_IVOR12:
352 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
353 break;
354 case SPRN_IVOR13:
355 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
356 break;
357 case SPRN_IVOR14:
358 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
359 break;
360 case SPRN_IVOR15:
361 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
362 break;
363
364 default: 177 default:
365 return EMULATE_FAIL; 178 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
366 } 179 }
367 180
368 kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); 181 kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
369 return EMULATE_DONE; 182 return emulated;
370} 183}
371 184
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 9a34b8edb9e2..4a16f472cc18 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -208,20 +208,38 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
208 return -1; 208 return -1;
209} 209}
210 210
211int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) 211gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
212 gva_t eaddr)
213{
214 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
215 struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
216 unsigned int pgmask = get_tlb_bytes(gtlbe) - 1;
217
218 return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
219}
220
221int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
212{ 222{
213 unsigned int as = !!(vcpu->arch.msr & MSR_IS); 223 unsigned int as = !!(vcpu->arch.msr & MSR_IS);
214 224
215 return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); 225 return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
216} 226}
217 227
218int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) 228int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
219{ 229{
220 unsigned int as = !!(vcpu->arch.msr & MSR_DS); 230 unsigned int as = !!(vcpu->arch.msr & MSR_DS);
221 231
222 return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); 232 return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
223} 233}
224 234
235void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
236{
237}
238
239void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
240{
241}
242
225static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x, 243static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
226 unsigned int stlb_index) 244 unsigned int stlb_index)
227{ 245{
@@ -248,7 +266,7 @@ static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
248 KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler); 266 KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
249} 267}
250 268
251void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu) 269void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
252{ 270{
253 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 271 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
254 int i; 272 int i;
@@ -269,15 +287,19 @@ void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
269 * Caller must ensure that the specified guest TLB entry is safe to insert into 287 * Caller must ensure that the specified guest TLB entry is safe to insert into
270 * the shadow TLB. 288 * the shadow TLB.
271 */ 289 */
272void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid, 290void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
273 u32 flags, u32 max_bytes, unsigned int gtlb_index) 291 unsigned int gtlb_index)
274{ 292{
275 struct kvmppc_44x_tlbe stlbe; 293 struct kvmppc_44x_tlbe stlbe;
276 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 294 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
295 struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
277 struct kvmppc_44x_shadow_ref *ref; 296 struct kvmppc_44x_shadow_ref *ref;
278 struct page *new_page; 297 struct page *new_page;
279 hpa_t hpaddr; 298 hpa_t hpaddr;
280 gfn_t gfn; 299 gfn_t gfn;
300 u32 asid = gtlbe->tid;
301 u32 flags = gtlbe->word2;
302 u32 max_bytes = get_tlb_bytes(gtlbe);
281 unsigned int victim; 303 unsigned int victim;
282 304
283 /* Select TLB entry to clobber. Indirectly guard against races with the TLB 305 /* Select TLB entry to clobber. Indirectly guard against races with the TLB
@@ -448,10 +470,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
448 } 470 }
449 471
450 if (tlbe_is_host_safe(vcpu, tlbe)) { 472 if (tlbe_is_host_safe(vcpu, tlbe)) {
451 u64 asid;
452 gva_t eaddr; 473 gva_t eaddr;
453 gpa_t gpaddr; 474 gpa_t gpaddr;
454 u32 flags;
455 u32 bytes; 475 u32 bytes;
456 476
457 eaddr = get_tlb_eaddr(tlbe); 477 eaddr = get_tlb_eaddr(tlbe);
@@ -462,10 +482,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
462 eaddr &= ~(bytes - 1); 482 eaddr &= ~(bytes - 1);
463 gpaddr &= ~(bytes - 1); 483 gpaddr &= ~(bytes - 1);
464 484
465 asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; 485 kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
466 flags = tlbe->word2 & 0xffff;
467
468 kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index);
469 } 486 }
470 487
471 KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0, 488 KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
index 772191f29e62..a9ff80e51526 100644
--- a/arch/powerpc/kvm/44x_tlb.h
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -25,8 +25,6 @@
25 25
26extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, 26extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
27 unsigned int pid, unsigned int as); 27 unsigned int pid, unsigned int as);
28extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
29extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
30 28
31extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, 29extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
32 u8 rc); 30 u8 rc);
@@ -85,11 +83,4 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
85 return (vcpu->arch.mmucr >> 16) & 0x1; 83 return (vcpu->arch.mmucr >> 16) & 0x1;
86} 84}
87 85
88static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr)
89{
90 unsigned int pgmask = get_tlb_bytes(tlbe) - 1;
91
92 return get_tlb_raddr(tlbe) | (eaddr & pgmask);
93}
94
95#endif /* __KVM_POWERPC_TLB_H__ */ 86#endif /* __KVM_POWERPC_TLB_H__ */
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 6dbdc4817d80..5a152a52796f 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -2,6 +2,9 @@
2# KVM configuration 2# KVM configuration
3# 3#
4 4
5config HAVE_KVM_IRQCHIP
6 bool
7
5menuconfig VIRTUALIZATION 8menuconfig VIRTUALIZATION
6 bool "Virtualization" 9 bool "Virtualization"
7 ---help--- 10 ---help---
@@ -43,6 +46,19 @@ config KVM_EXIT_TIMING
43 46
44 If unsure, say N. 47 If unsure, say N.
45 48
49config KVM_E500
50 bool "KVM support for PowerPC E500 processors"
51 depends on EXPERIMENTAL && E500
52 select KVM
53 ---help---
54 Support running unmodified E500 guest kernels in virtual machines on
55 E500 host processors.
56
57 This module provides access to the hardware capabilities through
58 a character device node named /dev/kvm.
59
60 If unsure, say N.
61
46config KVM_TRACE 62config KVM_TRACE
47 bool "KVM trace support" 63 bool "KVM trace support"
48 depends on KVM && MARKERS && SYSFS 64 depends on KVM && MARKERS && SYSFS
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index df7ba59e6d53..4b2df66c79d8 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -16,8 +16,18 @@ AFLAGS_booke_interrupts.o := -I$(obj)
16 16
17kvm-440-objs := \ 17kvm-440-objs := \
18 booke.o \ 18 booke.o \
19 booke_emulate.o \
19 booke_interrupts.o \ 20 booke_interrupts.o \
20 44x.o \ 21 44x.o \
21 44x_tlb.o \ 22 44x_tlb.o \
22 44x_emulate.o 23 44x_emulate.o
23obj-$(CONFIG_KVM_440) += kvm-440.o 24obj-$(CONFIG_KVM_440) += kvm-440.o
25
26kvm-e500-objs := \
27 booke.o \
28 booke_emulate.o \
29 booke_interrupts.o \
30 e500.o \
31 e500_tlb.o \
32 e500_emulate.o
33obj-$(CONFIG_KVM_E500) += kvm-e500.o
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 35485dd6927e..642e4204cf25 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -30,10 +30,8 @@
30#include <asm/kvm_ppc.h> 30#include <asm/kvm_ppc.h>
31#include "timing.h" 31#include "timing.h"
32#include <asm/cacheflush.h> 32#include <asm/cacheflush.h>
33#include <asm/kvm_44x.h>
34 33
35#include "booke.h" 34#include "booke.h"
36#include "44x_tlb.h"
37 35
38unsigned long kvmppc_booke_handlers; 36unsigned long kvmppc_booke_handlers;
39 37
@@ -120,6 +118,9 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
120 case BOOKE_IRQPRIO_DATA_STORAGE: 118 case BOOKE_IRQPRIO_DATA_STORAGE:
121 case BOOKE_IRQPRIO_INST_STORAGE: 119 case BOOKE_IRQPRIO_INST_STORAGE:
122 case BOOKE_IRQPRIO_FP_UNAVAIL: 120 case BOOKE_IRQPRIO_FP_UNAVAIL:
121 case BOOKE_IRQPRIO_SPE_UNAVAIL:
122 case BOOKE_IRQPRIO_SPE_FP_DATA:
123 case BOOKE_IRQPRIO_SPE_FP_ROUND:
123 case BOOKE_IRQPRIO_AP_UNAVAIL: 124 case BOOKE_IRQPRIO_AP_UNAVAIL:
124 case BOOKE_IRQPRIO_ALIGNMENT: 125 case BOOKE_IRQPRIO_ALIGNMENT:
125 allowed = 1; 126 allowed = 1;
@@ -165,7 +166,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
165 unsigned int priority; 166 unsigned int priority;
166 167
167 priority = __ffs(*pending); 168 priority = __ffs(*pending);
168 while (priority <= BOOKE_MAX_INTERRUPT) { 169 while (priority <= BOOKE_IRQPRIO_MAX) {
169 if (kvmppc_booke_irqprio_deliver(vcpu, priority)) 170 if (kvmppc_booke_irqprio_deliver(vcpu, priority))
170 break; 171 break;
171 172
@@ -263,6 +264,21 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
263 r = RESUME_GUEST; 264 r = RESUME_GUEST;
264 break; 265 break;
265 266
267 case BOOKE_INTERRUPT_SPE_UNAVAIL:
268 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_UNAVAIL);
269 r = RESUME_GUEST;
270 break;
271
272 case BOOKE_INTERRUPT_SPE_FP_DATA:
273 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA);
274 r = RESUME_GUEST;
275 break;
276
277 case BOOKE_INTERRUPT_SPE_FP_ROUND:
278 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND);
279 r = RESUME_GUEST;
280 break;
281
266 case BOOKE_INTERRUPT_DATA_STORAGE: 282 case BOOKE_INTERRUPT_DATA_STORAGE:
267 vcpu->arch.dear = vcpu->arch.fault_dear; 283 vcpu->arch.dear = vcpu->arch.fault_dear;
268 vcpu->arch.esr = vcpu->arch.fault_esr; 284 vcpu->arch.esr = vcpu->arch.fault_esr;
@@ -284,29 +300,27 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
284 r = RESUME_GUEST; 300 r = RESUME_GUEST;
285 break; 301 break;
286 302
287 /* XXX move to a 440-specific file. */
288 case BOOKE_INTERRUPT_DTLB_MISS: { 303 case BOOKE_INTERRUPT_DTLB_MISS: {
289 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
290 struct kvmppc_44x_tlbe *gtlbe;
291 unsigned long eaddr = vcpu->arch.fault_dear; 304 unsigned long eaddr = vcpu->arch.fault_dear;
292 int gtlb_index; 305 int gtlb_index;
306 gpa_t gpaddr;
293 gfn_t gfn; 307 gfn_t gfn;
294 308
295 /* Check the guest TLB. */ 309 /* Check the guest TLB. */
296 gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr); 310 gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr);
297 if (gtlb_index < 0) { 311 if (gtlb_index < 0) {
298 /* The guest didn't have a mapping for it. */ 312 /* The guest didn't have a mapping for it. */
299 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); 313 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
300 vcpu->arch.dear = vcpu->arch.fault_dear; 314 vcpu->arch.dear = vcpu->arch.fault_dear;
301 vcpu->arch.esr = vcpu->arch.fault_esr; 315 vcpu->arch.esr = vcpu->arch.fault_esr;
316 kvmppc_mmu_dtlb_miss(vcpu);
302 kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS); 317 kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
303 r = RESUME_GUEST; 318 r = RESUME_GUEST;
304 break; 319 break;
305 } 320 }
306 321
307 gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; 322 gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
308 vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr); 323 gfn = gpaddr >> PAGE_SHIFT;
309 gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
310 324
311 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { 325 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
312 /* The guest TLB had a mapping, but the shadow TLB 326 /* The guest TLB had a mapping, but the shadow TLB
@@ -315,13 +329,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
315 * b) the guest used a large mapping which we're faking 329 * b) the guest used a large mapping which we're faking
316 * Either way, we need to satisfy the fault without 330 * Either way, we need to satisfy the fault without
317 * invoking the guest. */ 331 * invoking the guest. */
318 kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid, 332 kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
319 gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
320 kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS); 333 kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
321 r = RESUME_GUEST; 334 r = RESUME_GUEST;
322 } else { 335 } else {
323 /* Guest has mapped and accessed a page which is not 336 /* Guest has mapped and accessed a page which is not
324 * actually RAM. */ 337 * actually RAM. */
338 vcpu->arch.paddr_accessed = gpaddr;
325 r = kvmppc_emulate_mmio(run, vcpu); 339 r = kvmppc_emulate_mmio(run, vcpu);
326 kvmppc_account_exit(vcpu, MMIO_EXITS); 340 kvmppc_account_exit(vcpu, MMIO_EXITS);
327 } 341 }
@@ -329,10 +343,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
329 break; 343 break;
330 } 344 }
331 345
332 /* XXX move to a 440-specific file. */
333 case BOOKE_INTERRUPT_ITLB_MISS: { 346 case BOOKE_INTERRUPT_ITLB_MISS: {
334 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
335 struct kvmppc_44x_tlbe *gtlbe;
336 unsigned long eaddr = vcpu->arch.pc; 347 unsigned long eaddr = vcpu->arch.pc;
337 gpa_t gpaddr; 348 gpa_t gpaddr;
338 gfn_t gfn; 349 gfn_t gfn;
@@ -341,18 +352,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
341 r = RESUME_GUEST; 352 r = RESUME_GUEST;
342 353
343 /* Check the guest TLB. */ 354 /* Check the guest TLB. */
344 gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr); 355 gtlb_index = kvmppc_mmu_itlb_index(vcpu, eaddr);
345 if (gtlb_index < 0) { 356 if (gtlb_index < 0) {
346 /* The guest didn't have a mapping for it. */ 357 /* The guest didn't have a mapping for it. */
347 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS); 358 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
359 kvmppc_mmu_itlb_miss(vcpu);
348 kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS); 360 kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS);
349 break; 361 break;
350 } 362 }
351 363
352 kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS); 364 kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
353 365
354 gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; 366 gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
355 gpaddr = tlb_xlate(gtlbe, eaddr);
356 gfn = gpaddr >> PAGE_SHIFT; 367 gfn = gpaddr >> PAGE_SHIFT;
357 368
358 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { 369 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
@@ -362,8 +373,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
362 * b) the guest used a large mapping which we're faking 373 * b) the guest used a large mapping which we're faking
363 * Either way, we need to satisfy the fault without 374 * Either way, we need to satisfy the fault without
364 * invoking the guest. */ 375 * invoking the guest. */
365 kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid, 376 kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
366 gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
367 } else { 377 } else {
368 /* Guest mapped and leaped at non-RAM! */ 378 /* Guest mapped and leaped at non-RAM! */
369 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK); 379 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index cf7c94ca24bf..d59bcca1f9d8 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -22,6 +22,7 @@
22 22
23#include <linux/types.h> 23#include <linux/types.h>
24#include <linux/kvm_host.h> 24#include <linux/kvm_host.h>
25#include <asm/kvm_ppc.h>
25#include "timing.h" 26#include "timing.h"
26 27
27/* interrupt priortity ordering */ 28/* interrupt priortity ordering */
@@ -30,17 +31,24 @@
30#define BOOKE_IRQPRIO_ALIGNMENT 2 31#define BOOKE_IRQPRIO_ALIGNMENT 2
31#define BOOKE_IRQPRIO_PROGRAM 3 32#define BOOKE_IRQPRIO_PROGRAM 3
32#define BOOKE_IRQPRIO_FP_UNAVAIL 4 33#define BOOKE_IRQPRIO_FP_UNAVAIL 4
33#define BOOKE_IRQPRIO_SYSCALL 5 34#define BOOKE_IRQPRIO_SPE_UNAVAIL 5
34#define BOOKE_IRQPRIO_AP_UNAVAIL 6 35#define BOOKE_IRQPRIO_SPE_FP_DATA 6
35#define BOOKE_IRQPRIO_DTLB_MISS 7 36#define BOOKE_IRQPRIO_SPE_FP_ROUND 7
36#define BOOKE_IRQPRIO_ITLB_MISS 8 37#define BOOKE_IRQPRIO_SYSCALL 8
37#define BOOKE_IRQPRIO_MACHINE_CHECK 9 38#define BOOKE_IRQPRIO_AP_UNAVAIL 9
38#define BOOKE_IRQPRIO_DEBUG 10 39#define BOOKE_IRQPRIO_DTLB_MISS 10
39#define BOOKE_IRQPRIO_CRITICAL 11 40#define BOOKE_IRQPRIO_ITLB_MISS 11
40#define BOOKE_IRQPRIO_WATCHDOG 12 41#define BOOKE_IRQPRIO_MACHINE_CHECK 12
41#define BOOKE_IRQPRIO_EXTERNAL 13 42#define BOOKE_IRQPRIO_DEBUG 13
42#define BOOKE_IRQPRIO_FIT 14 43#define BOOKE_IRQPRIO_CRITICAL 14
43#define BOOKE_IRQPRIO_DECREMENTER 15 44#define BOOKE_IRQPRIO_WATCHDOG 15
45#define BOOKE_IRQPRIO_EXTERNAL 16
46#define BOOKE_IRQPRIO_FIT 17
47#define BOOKE_IRQPRIO_DECREMENTER 18
48#define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19
49#define BOOKE_IRQPRIO_MAX 19
50
51extern unsigned long kvmppc_booke_handlers;
44 52
45/* Helper function for "full" MSR writes. No need to call this if only EE is 53/* Helper function for "full" MSR writes. No need to call this if only EE is
46 * changing. */ 54 * changing. */
@@ -57,4 +65,9 @@ static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
57 }; 65 };
58} 66}
59 67
68int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
69 unsigned int inst, int *advance);
70int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
71int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
72
60#endif /* __KVM_BOOKE_H__ */ 73#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
new file mode 100644
index 000000000000..aebc65e93f4b
--- /dev/null
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -0,0 +1,266 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#include <linux/kvm_host.h>
21#include <asm/disassemble.h>
22
23#include "booke.h"
24
25#define OP_19_XOP_RFI 50
26
27#define OP_31_XOP_MFMSR 83
28#define OP_31_XOP_WRTEE 131
29#define OP_31_XOP_MTMSR 146
30#define OP_31_XOP_WRTEEI 163
31
32static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
33{
34 vcpu->arch.pc = vcpu->arch.srr0;
35 kvmppc_set_msr(vcpu, vcpu->arch.srr1);
36}
37
38int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
39 unsigned int inst, int *advance)
40{
41 int emulated = EMULATE_DONE;
42 int rs;
43 int rt;
44
45 switch (get_op(inst)) {
46 case 19:
47 switch (get_xop(inst)) {
48 case OP_19_XOP_RFI:
49 kvmppc_emul_rfi(vcpu);
50 kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
51 *advance = 0;
52 break;
53
54 default:
55 emulated = EMULATE_FAIL;
56 break;
57 }
58 break;
59
60 case 31:
61 switch (get_xop(inst)) {
62
63 case OP_31_XOP_MFMSR:
64 rt = get_rt(inst);
65 vcpu->arch.gpr[rt] = vcpu->arch.msr;
66 kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
67 break;
68
69 case OP_31_XOP_MTMSR:
70 rs = get_rs(inst);
71 kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
72 kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
73 break;
74
75 case OP_31_XOP_WRTEE:
76 rs = get_rs(inst);
77 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
78 | (vcpu->arch.gpr[rs] & MSR_EE);
79 kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
80 break;
81
82 case OP_31_XOP_WRTEEI:
83 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
84 | (inst & MSR_EE);
85 kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
86 break;
87
88 default:
89 emulated = EMULATE_FAIL;
90 }
91
92 break;
93
94 default:
95 emulated = EMULATE_FAIL;
96 }
97
98 return emulated;
99}
100
101int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
102{
103 int emulated = EMULATE_DONE;
104
105 switch (sprn) {
106 case SPRN_DEAR:
107 vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
108 case SPRN_ESR:
109 vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
110 case SPRN_DBCR0:
111 vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
112 case SPRN_DBCR1:
113 vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
114 case SPRN_DBSR:
115 vcpu->arch.dbsr &= ~vcpu->arch.gpr[rs]; break;
116 case SPRN_TSR:
117 vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
118 case SPRN_TCR:
119 vcpu->arch.tcr = vcpu->arch.gpr[rs];
120 kvmppc_emulate_dec(vcpu);
121 break;
122
123 /* Note: SPRG4-7 are user-readable. These values are
124 * loaded into the real SPRGs when resuming the
125 * guest. */
126 case SPRN_SPRG4:
127 vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
128 case SPRN_SPRG5:
129 vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
130 case SPRN_SPRG6:
131 vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
132 case SPRN_SPRG7:
133 vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
134
135 case SPRN_IVPR:
136 vcpu->arch.ivpr = vcpu->arch.gpr[rs];
137 break;
138 case SPRN_IVOR0:
139 vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
140 break;
141 case SPRN_IVOR1:
142 vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
143 break;
144 case SPRN_IVOR2:
145 vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
146 break;
147 case SPRN_IVOR3:
148 vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
149 break;
150 case SPRN_IVOR4:
151 vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
152 break;
153 case SPRN_IVOR5:
154 vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
155 break;
156 case SPRN_IVOR6:
157 vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
158 break;
159 case SPRN_IVOR7:
160 vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
161 break;
162 case SPRN_IVOR8:
163 vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
164 break;
165 case SPRN_IVOR9:
166 vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
167 break;
168 case SPRN_IVOR10:
169 vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
170 break;
171 case SPRN_IVOR11:
172 vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
173 break;
174 case SPRN_IVOR12:
175 vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
176 break;
177 case SPRN_IVOR13:
178 vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
179 break;
180 case SPRN_IVOR14:
181 vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
182 break;
183 case SPRN_IVOR15:
184 vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
185 break;
186
187 default:
188 emulated = EMULATE_FAIL;
189 }
190
191 return emulated;
192}
193
194int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
195{
196 int emulated = EMULATE_DONE;
197
198 switch (sprn) {
199 case SPRN_IVPR:
200 vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
201 case SPRN_DEAR:
202 vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
203 case SPRN_ESR:
204 vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
205 case SPRN_DBCR0:
206 vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
207 case SPRN_DBCR1:
208 vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
209 case SPRN_DBSR:
210 vcpu->arch.gpr[rt] = vcpu->arch.dbsr; break;
211
212 case SPRN_IVOR0:
213 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
214 break;
215 case SPRN_IVOR1:
216 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
217 break;
218 case SPRN_IVOR2:
219 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
220 break;
221 case SPRN_IVOR3:
222 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
223 break;
224 case SPRN_IVOR4:
225 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
226 break;
227 case SPRN_IVOR5:
228 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
229 break;
230 case SPRN_IVOR6:
231 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
232 break;
233 case SPRN_IVOR7:
234 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
235 break;
236 case SPRN_IVOR8:
237 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
238 break;
239 case SPRN_IVOR9:
240 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
241 break;
242 case SPRN_IVOR10:
243 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
244 break;
245 case SPRN_IVOR11:
246 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
247 break;
248 case SPRN_IVOR12:
249 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
250 break;
251 case SPRN_IVOR13:
252 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
253 break;
254 case SPRN_IVOR14:
255 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
256 break;
257 case SPRN_IVOR15:
258 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
259 break;
260
261 default:
262 emulated = EMULATE_FAIL;
263 }
264
265 return emulated;
266}
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 084ebcd7dd83..d0c6f841bbd1 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -86,6 +86,9 @@ KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG
86KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS 86KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS
87KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS 87KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS
88KVM_HANDLER BOOKE_INTERRUPT_DEBUG 88KVM_HANDLER BOOKE_INTERRUPT_DEBUG
89KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL
90KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA
91KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND
89 92
90_GLOBAL(kvmppc_handler_len) 93_GLOBAL(kvmppc_handler_len)
91 .long kvmppc_handler_1 - kvmppc_handler_0 94 .long kvmppc_handler_1 - kvmppc_handler_0
@@ -347,7 +350,9 @@ lightweight_exit:
347 lwz r3, VCPU_SHADOW_PID(r4) 350 lwz r3, VCPU_SHADOW_PID(r4)
348 mtspr SPRN_PID, r3 351 mtspr SPRN_PID, r3
349 352
353#ifdef CONFIG_44x
350 iccci 0, 0 /* XXX hack */ 354 iccci 0, 0 /* XXX hack */
355#endif
351 356
352 /* Load some guest volatiles. */ 357 /* Load some guest volatiles. */
353 lwz r0, VCPU_GPR(r0)(r4) 358 lwz r0, VCPU_GPR(r0)(r4)
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
new file mode 100644
index 000000000000..d8067fd81cdd
--- /dev/null
+++ b/arch/powerpc/kvm/e500.c
@@ -0,0 +1,169 @@
1/*
2 * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
3 *
4 * Author: Yu Liu, <yu.liu@freescale.com>
5 *
6 * Description:
7 * This file is derived from arch/powerpc/kvm/44x.c,
8 * by Hollis Blanchard <hollisb@us.ibm.com>.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License, version 2, as
12 * published by the Free Software Foundation.
13 */
14
15#include <linux/kvm_host.h>
16#include <linux/err.h>
17
18#include <asm/reg.h>
19#include <asm/cputable.h>
20#include <asm/tlbflush.h>
21#include <asm/kvm_e500.h>
22#include <asm/kvm_ppc.h>
23
24#include "booke.h"
25#include "e500_tlb.h"
26
27void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
28{
29}
30
31void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
32{
33}
34
35void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
36{
37 kvmppc_e500_tlb_load(vcpu, cpu);
38}
39
40void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
41{
42 kvmppc_e500_tlb_put(vcpu);
43}
44
45int kvmppc_core_check_processor_compat(void)
46{
47 int r;
48
49 if (strcmp(cur_cpu_spec->cpu_name, "e500v2") == 0)
50 r = 0;
51 else
52 r = -ENOTSUPP;
53
54 return r;
55}
56
57int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
58{
59 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
60
61 kvmppc_e500_tlb_setup(vcpu_e500);
62
63 /* Use the same core vertion as host's */
64 vcpu->arch.pvr = mfspr(SPRN_PVR);
65
66 return 0;
67}
68
69/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
70int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
71 struct kvm_translation *tr)
72{
73 int index;
74 gva_t eaddr;
75 u8 pid;
76 u8 as;
77
78 eaddr = tr->linear_address;
79 pid = (tr->linear_address >> 32) & 0xff;
80 as = (tr->linear_address >> 40) & 0x1;
81
82 index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as);
83 if (index < 0) {
84 tr->valid = 0;
85 return 0;
86 }
87
88 tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
89 /* XXX what does "writeable" and "usermode" even mean? */
90 tr->valid = 1;
91
92 return 0;
93}
94
95struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
96{
97 struct kvmppc_vcpu_e500 *vcpu_e500;
98 struct kvm_vcpu *vcpu;
99 int err;
100
101 vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
102 if (!vcpu_e500) {
103 err = -ENOMEM;
104 goto out;
105 }
106
107 vcpu = &vcpu_e500->vcpu;
108 err = kvm_vcpu_init(vcpu, kvm, id);
109 if (err)
110 goto free_vcpu;
111
112 err = kvmppc_e500_tlb_init(vcpu_e500);
113 if (err)
114 goto uninit_vcpu;
115
116 return vcpu;
117
118uninit_vcpu:
119 kvm_vcpu_uninit(vcpu);
120free_vcpu:
121 kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
122out:
123 return ERR_PTR(err);
124}
125
126void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
127{
128 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
129
130 kvmppc_e500_tlb_uninit(vcpu_e500);
131 kvm_vcpu_uninit(vcpu);
132 kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
133}
134
135static int kvmppc_e500_init(void)
136{
137 int r, i;
138 unsigned long ivor[3];
139 unsigned long max_ivor = 0;
140
141 r = kvmppc_booke_init();
142 if (r)
143 return r;
144
145 /* copy extra E500 exception handlers */
146 ivor[0] = mfspr(SPRN_IVOR32);
147 ivor[1] = mfspr(SPRN_IVOR33);
148 ivor[2] = mfspr(SPRN_IVOR34);
149 for (i = 0; i < 3; i++) {
150 if (ivor[i] > max_ivor)
151 max_ivor = ivor[i];
152
153 memcpy((void *)kvmppc_booke_handlers + ivor[i],
154 kvmppc_handlers_start + (i + 16) * kvmppc_handler_len,
155 kvmppc_handler_len);
156 }
157 flush_icache_range(kvmppc_booke_handlers,
158 kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
159
160 return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), THIS_MODULE);
161}
162
163static void kvmppc_e500_exit(void)
164{
165 kvmppc_booke_exit();
166}
167
168module_init(kvmppc_e500_init);
169module_exit(kvmppc_e500_exit);
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
new file mode 100644
index 000000000000..3f760414b9f8
--- /dev/null
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -0,0 +1,202 @@
1/*
2 * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
3 *
4 * Author: Yu Liu, <yu.liu@freescale.com>
5 *
6 * Description:
7 * This file is derived from arch/powerpc/kvm/44x_emulate.c,
8 * by Hollis Blanchard <hollisb@us.ibm.com>.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License, version 2, as
12 * published by the Free Software Foundation.
13 */
14
15#include <asm/kvm_ppc.h>
16#include <asm/disassemble.h>
17#include <asm/kvm_e500.h>
18
19#include "booke.h"
20#include "e500_tlb.h"
21
22#define XOP_TLBIVAX 786
23#define XOP_TLBSX 914
24#define XOP_TLBRE 946
25#define XOP_TLBWE 978
26
27int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
28 unsigned int inst, int *advance)
29{
30 int emulated = EMULATE_DONE;
31 int ra;
32 int rb;
33
34 switch (get_op(inst)) {
35 case 31:
36 switch (get_xop(inst)) {
37
38 case XOP_TLBRE:
39 emulated = kvmppc_e500_emul_tlbre(vcpu);
40 break;
41
42 case XOP_TLBWE:
43 emulated = kvmppc_e500_emul_tlbwe(vcpu);
44 break;
45
46 case XOP_TLBSX:
47 rb = get_rb(inst);
48 emulated = kvmppc_e500_emul_tlbsx(vcpu,rb);
49 break;
50
51 case XOP_TLBIVAX:
52 ra = get_ra(inst);
53 rb = get_rb(inst);
54 emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb);
55 break;
56
57 default:
58 emulated = EMULATE_FAIL;
59 }
60
61 break;
62
63 default:
64 emulated = EMULATE_FAIL;
65 }
66
67 if (emulated == EMULATE_FAIL)
68 emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance);
69
70 return emulated;
71}
72
73int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
74{
75 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
76 int emulated = EMULATE_DONE;
77
78 switch (sprn) {
79 case SPRN_PID:
80 vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
81 vcpu->arch.pid = vcpu->arch.gpr[rs];
82 break;
83 case SPRN_PID1:
84 vcpu_e500->pid[1] = vcpu->arch.gpr[rs]; break;
85 case SPRN_PID2:
86 vcpu_e500->pid[2] = vcpu->arch.gpr[rs]; break;
87 case SPRN_MAS0:
88 vcpu_e500->mas0 = vcpu->arch.gpr[rs]; break;
89 case SPRN_MAS1:
90 vcpu_e500->mas1 = vcpu->arch.gpr[rs]; break;
91 case SPRN_MAS2:
92 vcpu_e500->mas2 = vcpu->arch.gpr[rs]; break;
93 case SPRN_MAS3:
94 vcpu_e500->mas3 = vcpu->arch.gpr[rs]; break;
95 case SPRN_MAS4:
96 vcpu_e500->mas4 = vcpu->arch.gpr[rs]; break;
97 case SPRN_MAS6:
98 vcpu_e500->mas6 = vcpu->arch.gpr[rs]; break;
99 case SPRN_MAS7:
100 vcpu_e500->mas7 = vcpu->arch.gpr[rs]; break;
101 case SPRN_L1CSR1:
102 vcpu_e500->l1csr1 = vcpu->arch.gpr[rs]; break;
103 case SPRN_HID0:
104 vcpu_e500->hid0 = vcpu->arch.gpr[rs]; break;
105 case SPRN_HID1:
106 vcpu_e500->hid1 = vcpu->arch.gpr[rs]; break;
107
108 case SPRN_MMUCSR0:
109 emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500,
110 vcpu->arch.gpr[rs]);
111 break;
112
113 /* extra exceptions */
114 case SPRN_IVOR32:
115 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = vcpu->arch.gpr[rs];
116 break;
117 case SPRN_IVOR33:
118 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = vcpu->arch.gpr[rs];
119 break;
120 case SPRN_IVOR34:
121 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = vcpu->arch.gpr[rs];
122 break;
123 case SPRN_IVOR35:
124 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = vcpu->arch.gpr[rs];
125 break;
126
127 default:
128 emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
129 }
130
131 return emulated;
132}
133
134int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
135{
136 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
137 int emulated = EMULATE_DONE;
138
139 switch (sprn) {
140 case SPRN_PID:
141 vcpu->arch.gpr[rt] = vcpu_e500->pid[0]; break;
142 case SPRN_PID1:
143 vcpu->arch.gpr[rt] = vcpu_e500->pid[1]; break;
144 case SPRN_PID2:
145 vcpu->arch.gpr[rt] = vcpu_e500->pid[2]; break;
146 case SPRN_MAS0:
147 vcpu->arch.gpr[rt] = vcpu_e500->mas0; break;
148 case SPRN_MAS1:
149 vcpu->arch.gpr[rt] = vcpu_e500->mas1; break;
150 case SPRN_MAS2:
151 vcpu->arch.gpr[rt] = vcpu_e500->mas2; break;
152 case SPRN_MAS3:
153 vcpu->arch.gpr[rt] = vcpu_e500->mas3; break;
154 case SPRN_MAS4:
155 vcpu->arch.gpr[rt] = vcpu_e500->mas4; break;
156 case SPRN_MAS6:
157 vcpu->arch.gpr[rt] = vcpu_e500->mas6; break;
158 case SPRN_MAS7:
159 vcpu->arch.gpr[rt] = vcpu_e500->mas7; break;
160
161 case SPRN_TLB0CFG:
162 vcpu->arch.gpr[rt] = mfspr(SPRN_TLB0CFG);
163 vcpu->arch.gpr[rt] &= ~0xfffUL;
164 vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[0];
165 break;
166
167 case SPRN_TLB1CFG:
168 vcpu->arch.gpr[rt] = mfspr(SPRN_TLB1CFG);
169 vcpu->arch.gpr[rt] &= ~0xfffUL;
170 vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[1];
171 break;
172
173 case SPRN_L1CSR1:
174 vcpu->arch.gpr[rt] = vcpu_e500->l1csr1; break;
175 case SPRN_HID0:
176 vcpu->arch.gpr[rt] = vcpu_e500->hid0; break;
177 case SPRN_HID1:
178 vcpu->arch.gpr[rt] = vcpu_e500->hid1; break;
179
180 case SPRN_MMUCSR0:
181 vcpu->arch.gpr[rt] = 0; break;
182
183 /* extra exceptions */
184 case SPRN_IVOR32:
185 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
186 break;
187 case SPRN_IVOR33:
188 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
189 break;
190 case SPRN_IVOR34:
191 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
192 break;
193 case SPRN_IVOR35:
194 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
195 break;
196 default:
197 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
198 }
199
200 return emulated;
201}
202
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
new file mode 100644
index 000000000000..0e773fc2d5e4
--- /dev/null
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -0,0 +1,757 @@
1/*
2 * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
3 *
4 * Author: Yu Liu, yu.liu@freescale.com
5 *
6 * Description:
7 * This file is based on arch/powerpc/kvm/44x_tlb.c,
8 * by Hollis Blanchard <hollisb@us.ibm.com>.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License, version 2, as
12 * published by the Free Software Foundation.
13 */
14
15#include <linux/types.h>
16#include <linux/string.h>
17#include <linux/kvm.h>
18#include <linux/kvm_host.h>
19#include <linux/highmem.h>
20#include <asm/kvm_ppc.h>
21#include <asm/kvm_e500.h>
22
23#include "../mm/mmu_decl.h"
24#include "e500_tlb.h"
25
26#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
27
28static unsigned int tlb1_entry_num;
29
30void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
31{
32 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
33 struct tlbe *tlbe;
34 int i, tlbsel;
35
36 printk("| %8s | %8s | %8s | %8s | %8s |\n",
37 "nr", "mas1", "mas2", "mas3", "mas7");
38
39 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
40 printk("Guest TLB%d:\n", tlbsel);
41 for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) {
42 tlbe = &vcpu_e500->guest_tlb[tlbsel][i];
43 if (tlbe->mas1 & MAS1_VALID)
44 printk(" G[%d][%3d] | %08X | %08X | %08X | %08X |\n",
45 tlbsel, i, tlbe->mas1, tlbe->mas2,
46 tlbe->mas3, tlbe->mas7);
47 }
48 }
49
50 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
51 printk("Shadow TLB%d:\n", tlbsel);
52 for (i = 0; i < vcpu_e500->shadow_tlb_size[tlbsel]; i++) {
53 tlbe = &vcpu_e500->shadow_tlb[tlbsel][i];
54 if (tlbe->mas1 & MAS1_VALID)
55 printk(" S[%d][%3d] | %08X | %08X | %08X | %08X |\n",
56 tlbsel, i, tlbe->mas1, tlbe->mas2,
57 tlbe->mas3, tlbe->mas7);
58 }
59 }
60}
61
62static inline unsigned int tlb0_get_next_victim(
63 struct kvmppc_vcpu_e500 *vcpu_e500)
64{
65 unsigned int victim;
66
67 victim = vcpu_e500->guest_tlb_nv[0]++;
68 if (unlikely(vcpu_e500->guest_tlb_nv[0] >= KVM_E500_TLB0_WAY_NUM))
69 vcpu_e500->guest_tlb_nv[0] = 0;
70
71 return victim;
72}
73
74static inline unsigned int tlb1_max_shadow_size(void)
75{
76 return tlb1_entry_num - tlbcam_index;
77}
78
79static inline int tlbe_is_writable(struct tlbe *tlbe)
80{
81 return tlbe->mas3 & (MAS3_SW|MAS3_UW);
82}
83
84static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
85{
86 /* Mask off reserved bits. */
87 mas3 &= MAS3_ATTRIB_MASK;
88
89 if (!usermode) {
90 /* Guest is in supervisor mode,
91 * so we need to translate guest
92 * supervisor permissions into user permissions. */
93 mas3 &= ~E500_TLB_USER_PERM_MASK;
94 mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1;
95 }
96
97 return mas3 | E500_TLB_SUPER_PERM_MASK;
98}
99
100static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
101{
102#ifdef CONFIG_SMP
103 return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
104#else
105 return mas2 & MAS2_ATTRIB_MASK;
106#endif
107}
108
109/*
110 * writing shadow tlb entry to host TLB
111 */
112static inline void __write_host_tlbe(struct tlbe *stlbe)
113{
114 mtspr(SPRN_MAS1, stlbe->mas1);
115 mtspr(SPRN_MAS2, stlbe->mas2);
116 mtspr(SPRN_MAS3, stlbe->mas3);
117 mtspr(SPRN_MAS7, stlbe->mas7);
118 __asm__ __volatile__ ("tlbwe\n" : : );
119}
120
121static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
122 int tlbsel, int esel)
123{
124 struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
125
126 local_irq_disable();
127 if (tlbsel == 0) {
128 __write_host_tlbe(stlbe);
129 } else {
130 unsigned register mas0;
131
132 mas0 = mfspr(SPRN_MAS0);
133
134 mtspr(SPRN_MAS0, MAS0_TLBSEL(1) | MAS0_ESEL(to_htlb1_esel(esel)));
135 __write_host_tlbe(stlbe);
136
137 mtspr(SPRN_MAS0, mas0);
138 }
139 local_irq_enable();
140}
141
142void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
143{
144 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
145 int i;
146 unsigned register mas0;
147
148 /* Load all valid TLB1 entries to reduce guest tlb miss fault */
149 local_irq_disable();
150 mas0 = mfspr(SPRN_MAS0);
151 for (i = 0; i < tlb1_max_shadow_size(); i++) {
152 struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
153
154 if (get_tlb_v(stlbe)) {
155 mtspr(SPRN_MAS0, MAS0_TLBSEL(1)
156 | MAS0_ESEL(to_htlb1_esel(i)));
157 __write_host_tlbe(stlbe);
158 }
159 }
160 mtspr(SPRN_MAS0, mas0);
161 local_irq_enable();
162}
163
164void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
165{
166 _tlbil_all();
167}
168
169/* Search the guest TLB for a matching entry. */
170static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
171 gva_t eaddr, int tlbsel, unsigned int pid, int as)
172{
173 int i;
174
175 /* XXX Replace loop with fancy data structures. */
176 for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) {
177 struct tlbe *tlbe = &vcpu_e500->guest_tlb[tlbsel][i];
178 unsigned int tid;
179
180 if (eaddr < get_tlb_eaddr(tlbe))
181 continue;
182
183 if (eaddr > get_tlb_end(tlbe))
184 continue;
185
186 tid = get_tlb_tid(tlbe);
187 if (tid && (tid != pid))
188 continue;
189
190 if (!get_tlb_v(tlbe))
191 continue;
192
193 if (get_tlb_ts(tlbe) != as && as != -1)
194 continue;
195
196 return i;
197 }
198
199 return -1;
200}
201
202static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500,
203 int tlbsel, int esel)
204{
205 struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
206 struct page *page = vcpu_e500->shadow_pages[tlbsel][esel];
207
208 if (page) {
209 vcpu_e500->shadow_pages[tlbsel][esel] = NULL;
210
211 if (get_tlb_v(stlbe)) {
212 if (tlbe_is_writable(stlbe))
213 kvm_release_page_dirty(page);
214 else
215 kvm_release_page_clean(page);
216 }
217 }
218}
219
220static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
221 int tlbsel, int esel)
222{
223 struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
224
225 kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
226 stlbe->mas1 = 0;
227 KVMTRACE_5D(STLB_INVAL, &vcpu_e500->vcpu, index_of(tlbsel, esel),
228 stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7,
229 handler);
230}
231
232static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
233 gva_t eaddr, gva_t eend, u32 tid)
234{
235 unsigned int pid = tid & 0xff;
236 unsigned int i;
237
238 /* XXX Replace loop with fancy data structures. */
239 for (i = 0; i < vcpu_e500->guest_tlb_size[1]; i++) {
240 struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
241 unsigned int tid;
242
243 if (!get_tlb_v(stlbe))
244 continue;
245
246 if (eend < get_tlb_eaddr(stlbe))
247 continue;
248
249 if (eaddr > get_tlb_end(stlbe))
250 continue;
251
252 tid = get_tlb_tid(stlbe);
253 if (tid && (tid != pid))
254 continue;
255
256 kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
257 write_host_tlbe(vcpu_e500, 1, i);
258 }
259}
260
261static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
262 unsigned int eaddr, int as)
263{
264 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
265 unsigned int victim, pidsel, tsized;
266 int tlbsel;
267
268 /* since we only have two TLBs, only lower bit is used. */
269 tlbsel = (vcpu_e500->mas4 >> 28) & 0x1;
270 victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
271 pidsel = (vcpu_e500->mas4 >> 16) & 0xf;
272 tsized = (vcpu_e500->mas4 >> 8) & 0xf;
273
274 vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
275 | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
276 vcpu_e500->mas1 = MAS1_VALID | (as ? MAS1_TS : 0)
277 | MAS1_TID(vcpu_e500->pid[pidsel])
278 | MAS1_TSIZE(tsized);
279 vcpu_e500->mas2 = (eaddr & MAS2_EPN)
280 | (vcpu_e500->mas4 & MAS2_ATTRIB_MASK);
281 vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
282 vcpu_e500->mas6 = (vcpu_e500->mas6 & MAS6_SPID1)
283 | (get_cur_pid(vcpu) << 16)
284 | (as ? MAS6_SAS : 0);
285 vcpu_e500->mas7 = 0;
286}
287
288static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
289 u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel)
290{
291 struct page *new_page;
292 struct tlbe *stlbe;
293 hpa_t hpaddr;
294
295 stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
296
297 /* Get reference to new page. */
298 new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
299 if (is_error_page(new_page)) {
300 printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
301 kvm_release_page_clean(new_page);
302 return;
303 }
304 hpaddr = page_to_phys(new_page);
305
306 /* Drop reference to old page. */
307 kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
308
309 vcpu_e500->shadow_pages[tlbsel][esel] = new_page;
310
311 /* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
312 stlbe->mas1 = MAS1_TSIZE(BOOKE_PAGESZ_4K)
313 | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
314 stlbe->mas2 = (gvaddr & MAS2_EPN)
315 | e500_shadow_mas2_attrib(gtlbe->mas2,
316 vcpu_e500->vcpu.arch.msr & MSR_PR);
317 stlbe->mas3 = (hpaddr & MAS3_RPN)
318 | e500_shadow_mas3_attrib(gtlbe->mas3,
319 vcpu_e500->vcpu.arch.msr & MSR_PR);
320 stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
321
322 KVMTRACE_5D(STLB_WRITE, &vcpu_e500->vcpu, index_of(tlbsel, esel),
323 stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7,
324 handler);
325}
326
327/* XXX only map the one-one case, for now use TLB0 */
328static int kvmppc_e500_stlbe_map(struct kvmppc_vcpu_e500 *vcpu_e500,
329 int tlbsel, int esel)
330{
331 struct tlbe *gtlbe;
332
333 gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
334
335 kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
336 get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
337 gtlbe, tlbsel, esel);
338
339 return esel;
340}
341
342/* Caller must ensure that the specified guest TLB entry is safe to insert into
343 * the shadow TLB. */
344/* XXX for both one-one and one-to-many , for now use TLB1 */
345static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
346 u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe)
347{
348 unsigned int victim;
349
350 victim = vcpu_e500->guest_tlb_nv[1]++;
351
352 if (unlikely(vcpu_e500->guest_tlb_nv[1] >= tlb1_max_shadow_size()))
353 vcpu_e500->guest_tlb_nv[1] = 0;
354
355 kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim);
356
357 return victim;
358}
359
360/* Invalidate all guest kernel mappings when enter usermode,
361 * so that when they fault back in they will get the
362 * proper permission bits. */
363void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
364{
365 if (usermode) {
366 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
367 int i;
368
369 /* XXX Replace loop with fancy data structures. */
370 for (i = 0; i < tlb1_max_shadow_size(); i++)
371 kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
372
373 _tlbil_all();
374 }
375}
376
377static int kvmppc_e500_gtlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
378 int tlbsel, int esel)
379{
380 struct tlbe *gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
381
382 if (unlikely(get_tlb_iprot(gtlbe)))
383 return -1;
384
385 if (tlbsel == 1) {
386 kvmppc_e500_tlb1_invalidate(vcpu_e500, get_tlb_eaddr(gtlbe),
387 get_tlb_end(gtlbe),
388 get_tlb_tid(gtlbe));
389 } else {
390 kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel);
391 }
392
393 gtlbe->mas1 = 0;
394
395 return 0;
396}
397
398int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
399{
400 int esel;
401
402 if (value & MMUCSR0_TLB0FI)
403 for (esel = 0; esel < vcpu_e500->guest_tlb_size[0]; esel++)
404 kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel);
405 if (value & MMUCSR0_TLB1FI)
406 for (esel = 0; esel < vcpu_e500->guest_tlb_size[1]; esel++)
407 kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel);
408
409 _tlbil_all();
410
411 return EMULATE_DONE;
412}
413
414int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
415{
416 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
417 unsigned int ia;
418 int esel, tlbsel;
419 gva_t ea;
420
421 ea = ((ra) ? vcpu->arch.gpr[ra] : 0) + vcpu->arch.gpr[rb];
422
423 ia = (ea >> 2) & 0x1;
424
425 /* since we only have two TLBs, only lower bit is used. */
426 tlbsel = (ea >> 3) & 0x1;
427
428 if (ia) {
429 /* invalidate all entries */
430 for (esel = 0; esel < vcpu_e500->guest_tlb_size[tlbsel]; esel++)
431 kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
432 } else {
433 ea &= 0xfffff000;
434 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel,
435 get_cur_pid(vcpu), -1);
436 if (esel >= 0)
437 kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
438 }
439
440 _tlbil_all();
441
442 return EMULATE_DONE;
443}
444
445int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
446{
447 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
448 int tlbsel, esel;
449 struct tlbe *gtlbe;
450
451 tlbsel = get_tlb_tlbsel(vcpu_e500);
452 esel = get_tlb_esel(vcpu_e500, tlbsel);
453
454 gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
455 vcpu_e500->mas0 &= ~MAS0_NV(~0);
456 vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
457 vcpu_e500->mas1 = gtlbe->mas1;
458 vcpu_e500->mas2 = gtlbe->mas2;
459 vcpu_e500->mas3 = gtlbe->mas3;
460 vcpu_e500->mas7 = gtlbe->mas7;
461
462 return EMULATE_DONE;
463}
464
465int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
466{
467 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
468 int as = !!get_cur_sas(vcpu_e500);
469 unsigned int pid = get_cur_spid(vcpu_e500);
470 int esel, tlbsel;
471 struct tlbe *gtlbe = NULL;
472 gva_t ea;
473
474 ea = vcpu->arch.gpr[rb];
475
476 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
477 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
478 if (esel >= 0) {
479 gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
480 break;
481 }
482 }
483
484 if (gtlbe) {
485 vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel)
486 | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
487 vcpu_e500->mas1 = gtlbe->mas1;
488 vcpu_e500->mas2 = gtlbe->mas2;
489 vcpu_e500->mas3 = gtlbe->mas3;
490 vcpu_e500->mas7 = gtlbe->mas7;
491 } else {
492 int victim;
493
494 /* since we only have two TLBs, only lower bit is used. */
495 tlbsel = vcpu_e500->mas4 >> 28 & 0x1;
496 victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
497
498 vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
499 | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
500 vcpu_e500->mas1 = (vcpu_e500->mas6 & MAS6_SPID0)
501 | (vcpu_e500->mas6 & (MAS6_SAS ? MAS1_TS : 0))
502 | (vcpu_e500->mas4 & MAS4_TSIZED(~0));
503 vcpu_e500->mas2 &= MAS2_EPN;
504 vcpu_e500->mas2 |= vcpu_e500->mas4 & MAS2_ATTRIB_MASK;
505 vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
506 vcpu_e500->mas7 = 0;
507 }
508
509 return EMULATE_DONE;
510}
511
512int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
513{
514 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
515 u64 eaddr;
516 u64 raddr;
517 u32 tid;
518 struct tlbe *gtlbe;
519 int tlbsel, esel, stlbsel, sesel;
520
521 tlbsel = get_tlb_tlbsel(vcpu_e500);
522 esel = get_tlb_esel(vcpu_e500, tlbsel);
523
524 gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
525
526 if (get_tlb_v(gtlbe) && tlbsel == 1) {
527 eaddr = get_tlb_eaddr(gtlbe);
528 tid = get_tlb_tid(gtlbe);
529 kvmppc_e500_tlb1_invalidate(vcpu_e500, eaddr,
530 get_tlb_end(gtlbe), tid);
531 }
532
533 gtlbe->mas1 = vcpu_e500->mas1;
534 gtlbe->mas2 = vcpu_e500->mas2;
535 gtlbe->mas3 = vcpu_e500->mas3;
536 gtlbe->mas7 = vcpu_e500->mas7;
537
538 KVMTRACE_5D(GTLB_WRITE, vcpu, vcpu_e500->mas0,
539 gtlbe->mas1, gtlbe->mas2, gtlbe->mas3, gtlbe->mas7,
540 handler);
541
542 /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
543 if (tlbe_is_host_safe(vcpu, gtlbe)) {
544 switch (tlbsel) {
545 case 0:
546 /* TLB0 */
547 gtlbe->mas1 &= ~MAS1_TSIZE(~0);
548 gtlbe->mas1 |= MAS1_TSIZE(BOOKE_PAGESZ_4K);
549
550 stlbsel = 0;
551 sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel);
552
553 break;
554
555 case 1:
556 /* TLB1 */
557 eaddr = get_tlb_eaddr(gtlbe);
558 raddr = get_tlb_raddr(gtlbe);
559
560 /* Create a 4KB mapping on the host.
561 * If the guest wanted a large page,
562 * only the first 4KB is mapped here and the rest
563 * are mapped on the fly. */
564 stlbsel = 1;
565 sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr,
566 raddr >> PAGE_SHIFT, gtlbe);
567 break;
568
569 default:
570 BUG();
571 }
572 write_host_tlbe(vcpu_e500, stlbsel, sesel);
573 }
574
575 return EMULATE_DONE;
576}
577
578int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
579{
580 unsigned int as = !!(vcpu->arch.msr & MSR_IS);
581
582 return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
583}
584
585int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
586{
587 unsigned int as = !!(vcpu->arch.msr & MSR_DS);
588
589 return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
590}
591
592void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
593{
594 unsigned int as = !!(vcpu->arch.msr & MSR_IS);
595
596 kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as);
597}
598
599void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
600{
601 unsigned int as = !!(vcpu->arch.msr & MSR_DS);
602
603 kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.fault_dear, as);
604}
605
606gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
607 gva_t eaddr)
608{
609 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
610 struct tlbe *gtlbe =
611 &vcpu_e500->guest_tlb[tlbsel_of(index)][esel_of(index)];
612 u64 pgmask = get_tlb_bytes(gtlbe) - 1;
613
614 return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
615}
616
617void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
618{
619 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
620 int tlbsel, i;
621
622 for (tlbsel = 0; tlbsel < 2; tlbsel++)
623 for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++)
624 kvmppc_e500_shadow_release(vcpu_e500, tlbsel, i);
625
626 /* discard all guest mapping */
627 _tlbil_all();
628}
629
630void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
631 unsigned int index)
632{
633 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
634 int tlbsel = tlbsel_of(index);
635 int esel = esel_of(index);
636 int stlbsel, sesel;
637
638 switch (tlbsel) {
639 case 0:
640 stlbsel = 0;
641 sesel = esel;
642 break;
643
644 case 1: {
645 gfn_t gfn = gpaddr >> PAGE_SHIFT;
646 struct tlbe *gtlbe
647 = &vcpu_e500->guest_tlb[tlbsel][esel];
648
649 stlbsel = 1;
650 sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe);
651 break;
652 }
653
654 default:
655 BUG();
656 break;
657 }
658 write_host_tlbe(vcpu_e500, stlbsel, sesel);
659}
660
661int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
662 gva_t eaddr, unsigned int pid, int as)
663{
664 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
665 int esel, tlbsel;
666
667 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
668 esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as);
669 if (esel >= 0)
670 return index_of(tlbsel, esel);
671 }
672
673 return -1;
674}
675
676void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
677{
678 struct tlbe *tlbe;
679
680 /* Insert large initial mapping for guest. */
681 tlbe = &vcpu_e500->guest_tlb[1][0];
682 tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_256M);
683 tlbe->mas2 = 0;
684 tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
685 tlbe->mas7 = 0;
686
687 /* 4K map for serial output. Used by kernel wrapper. */
688 tlbe = &vcpu_e500->guest_tlb[1][1];
689 tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_4K);
690 tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
691 tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
692 tlbe->mas7 = 0;
693}
694
695int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
696{
697 tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF;
698
699 vcpu_e500->guest_tlb_size[0] = KVM_E500_TLB0_SIZE;
700 vcpu_e500->guest_tlb[0] =
701 kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
702 if (vcpu_e500->guest_tlb[0] == NULL)
703 goto err_out;
704
705 vcpu_e500->shadow_tlb_size[0] = KVM_E500_TLB0_SIZE;
706 vcpu_e500->shadow_tlb[0] =
707 kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
708 if (vcpu_e500->shadow_tlb[0] == NULL)
709 goto err_out_guest0;
710
711 vcpu_e500->guest_tlb_size[1] = KVM_E500_TLB1_SIZE;
712 vcpu_e500->guest_tlb[1] =
713 kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
714 if (vcpu_e500->guest_tlb[1] == NULL)
715 goto err_out_shadow0;
716
717 vcpu_e500->shadow_tlb_size[1] = tlb1_entry_num;
718 vcpu_e500->shadow_tlb[1] =
719 kzalloc(sizeof(struct tlbe) * tlb1_entry_num, GFP_KERNEL);
720 if (vcpu_e500->shadow_tlb[1] == NULL)
721 goto err_out_guest1;
722
723 vcpu_e500->shadow_pages[0] = (struct page **)
724 kzalloc(sizeof(struct page *) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
725 if (vcpu_e500->shadow_pages[0] == NULL)
726 goto err_out_shadow1;
727
728 vcpu_e500->shadow_pages[1] = (struct page **)
729 kzalloc(sizeof(struct page *) * tlb1_entry_num, GFP_KERNEL);
730 if (vcpu_e500->shadow_pages[1] == NULL)
731 goto err_out_page0;
732
733 return 0;
734
735err_out_page0:
736 kfree(vcpu_e500->shadow_pages[0]);
737err_out_shadow1:
738 kfree(vcpu_e500->shadow_tlb[1]);
739err_out_guest1:
740 kfree(vcpu_e500->guest_tlb[1]);
741err_out_shadow0:
742 kfree(vcpu_e500->shadow_tlb[0]);
743err_out_guest0:
744 kfree(vcpu_e500->guest_tlb[0]);
745err_out:
746 return -1;
747}
748
749void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
750{
751 kfree(vcpu_e500->shadow_pages[1]);
752 kfree(vcpu_e500->shadow_pages[0]);
753 kfree(vcpu_e500->shadow_tlb[1]);
754 kfree(vcpu_e500->guest_tlb[1]);
755 kfree(vcpu_e500->shadow_tlb[0]);
756 kfree(vcpu_e500->guest_tlb[0]);
757}
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
new file mode 100644
index 000000000000..45b064b76906
--- /dev/null
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -0,0 +1,185 @@
1/*
2 * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
3 *
4 * Author: Yu Liu, yu.liu@freescale.com
5 *
6 * Description:
7 * This file is based on arch/powerpc/kvm/44x_tlb.h,
8 * by Hollis Blanchard <hollisb@us.ibm.com>.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License, version 2, as
12 * published by the Free Software Foundation.
13 */
14
15#ifndef __KVM_E500_TLB_H__
16#define __KVM_E500_TLB_H__
17
18#include <linux/kvm_host.h>
19#include <asm/mmu-fsl-booke.h>
20#include <asm/tlb.h>
21#include <asm/kvm_e500.h>
22
23#define KVM_E500_TLB0_WAY_SIZE_BIT 7 /* Fixed */
24#define KVM_E500_TLB0_WAY_SIZE (1UL << KVM_E500_TLB0_WAY_SIZE_BIT)
25#define KVM_E500_TLB0_WAY_SIZE_MASK (KVM_E500_TLB0_WAY_SIZE - 1)
26
27#define KVM_E500_TLB0_WAY_NUM_BIT 1 /* No greater than 7 */
28#define KVM_E500_TLB0_WAY_NUM (1UL << KVM_E500_TLB0_WAY_NUM_BIT)
29#define KVM_E500_TLB0_WAY_NUM_MASK (KVM_E500_TLB0_WAY_NUM - 1)
30
31#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
32#define KVM_E500_TLB1_SIZE 16
33
34#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF))
35#define tlbsel_of(index) ((index) >> 16)
36#define esel_of(index) ((index) & 0xFFFF)
37
38#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
39#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
40#define MAS2_ATTRIB_MASK \
41 (MAS2_X0 | MAS2_X1)
42#define MAS3_ATTRIB_MASK \
43 (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
44 | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
45
46extern void kvmppc_dump_tlbs(struct kvm_vcpu *);
47extern int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *, ulong);
48extern int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *);
49extern int kvmppc_e500_emul_tlbre(struct kvm_vcpu *);
50extern int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *, int, int);
51extern int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *, int);
52extern int kvmppc_e500_tlb_search(struct kvm_vcpu *, gva_t, unsigned int, int);
53extern void kvmppc_e500_tlb_put(struct kvm_vcpu *);
54extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int);
55extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *);
56extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *);
57extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
58
59/* TLB helper functions */
60static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
61{
62 return (tlbe->mas1 >> 8) & 0xf;
63}
64
65static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
66{
67 return tlbe->mas2 & 0xfffff000;
68}
69
70static inline u64 get_tlb_bytes(const struct tlbe *tlbe)
71{
72 unsigned int pgsize = get_tlb_size(tlbe);
73 return 1ULL << 10 << (pgsize << 1);
74}
75
76static inline gva_t get_tlb_end(const struct tlbe *tlbe)
77{
78 u64 bytes = get_tlb_bytes(tlbe);
79 return get_tlb_eaddr(tlbe) + bytes - 1;
80}
81
82static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
83{
84 u64 rpn = tlbe->mas7;
85 return (rpn << 32) | (tlbe->mas3 & 0xfffff000);
86}
87
88static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
89{
90 return (tlbe->mas1 >> 16) & 0xff;
91}
92
93static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
94{
95 return (tlbe->mas1 >> 12) & 0x1;
96}
97
98static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
99{
100 return (tlbe->mas1 >> 31) & 0x1;
101}
102
103static inline unsigned int get_tlb_iprot(const struct tlbe *tlbe)
104{
105 return (tlbe->mas1 >> 30) & 0x1;
106}
107
108static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu)
109{
110 return vcpu->arch.pid & 0xff;
111}
112
113static inline unsigned int get_cur_spid(
114 const struct kvmppc_vcpu_e500 *vcpu_e500)
115{
116 return (vcpu_e500->mas6 >> 16) & 0xff;
117}
118
119static inline unsigned int get_cur_sas(
120 const struct kvmppc_vcpu_e500 *vcpu_e500)
121{
122 return vcpu_e500->mas6 & 0x1;
123}
124
125static inline unsigned int get_tlb_tlbsel(
126 const struct kvmppc_vcpu_e500 *vcpu_e500)
127{
128 /*
129 * Manual says that tlbsel has 2 bits wide.
130 * Since we only have two TLBs, only lower bit is used.
131 */
132 return (vcpu_e500->mas0 >> 28) & 0x1;
133}
134
135static inline unsigned int get_tlb_nv_bit(
136 const struct kvmppc_vcpu_e500 *vcpu_e500)
137{
138 return vcpu_e500->mas0 & 0xfff;
139}
140
141static inline unsigned int get_tlb_esel_bit(
142 const struct kvmppc_vcpu_e500 *vcpu_e500)
143{
144 return (vcpu_e500->mas0 >> 16) & 0xfff;
145}
146
147static inline unsigned int get_tlb_esel(
148 const struct kvmppc_vcpu_e500 *vcpu_e500,
149 int tlbsel)
150{
151 unsigned int esel = get_tlb_esel_bit(vcpu_e500);
152
153 if (tlbsel == 0) {
154 esel &= KVM_E500_TLB0_WAY_NUM_MASK;
155 esel |= ((vcpu_e500->mas2 >> 12) & KVM_E500_TLB0_WAY_SIZE_MASK)
156 << KVM_E500_TLB0_WAY_NUM_BIT;
157 } else {
158 esel &= KVM_E500_TLB1_SIZE - 1;
159 }
160
161 return esel;
162}
163
164static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
165 const struct tlbe *tlbe)
166{
167 gpa_t gpa;
168
169 if (!get_tlb_v(tlbe))
170 return 0;
171
172 /* Does it match current guest AS? */
173 /* XXX what about IS != DS? */
174 if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
175 return 0;
176
177 gpa = get_tlb_raddr(tlbe);
178 if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
179 /* Mapping is not for RAM. */
180 return 0;
181
182 return 1;
183}
184
185#endif /* __KVM_E500_TLB_H__ */
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index d1d38daa93fb..a561d6e8da1c 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -30,6 +30,39 @@
30#include <asm/disassemble.h> 30#include <asm/disassemble.h>
31#include "timing.h" 31#include "timing.h"
32 32
33#define OP_TRAP 3
34
35#define OP_31_XOP_LWZX 23
36#define OP_31_XOP_LBZX 87
37#define OP_31_XOP_STWX 151
38#define OP_31_XOP_STBX 215
39#define OP_31_XOP_STBUX 247
40#define OP_31_XOP_LHZX 279
41#define OP_31_XOP_LHZUX 311
42#define OP_31_XOP_MFSPR 339
43#define OP_31_XOP_STHX 407
44#define OP_31_XOP_STHUX 439
45#define OP_31_XOP_MTSPR 467
46#define OP_31_XOP_DCBI 470
47#define OP_31_XOP_LWBRX 534
48#define OP_31_XOP_TLBSYNC 566
49#define OP_31_XOP_STWBRX 662
50#define OP_31_XOP_LHBRX 790
51#define OP_31_XOP_STHBRX 918
52
53#define OP_LWZ 32
54#define OP_LWZU 33
55#define OP_LBZ 34
56#define OP_LBZU 35
57#define OP_STW 36
58#define OP_STWU 37
59#define OP_STB 38
60#define OP_STBU 39
61#define OP_LHZ 40
62#define OP_LHZU 41
63#define OP_STH 44
64#define OP_STHU 45
65
33void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) 66void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
34{ 67{
35 if (vcpu->arch.tcr & TCR_DIE) { 68 if (vcpu->arch.tcr & TCR_DIE) {
@@ -78,7 +111,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
78 kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); 111 kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
79 112
80 switch (get_op(inst)) { 113 switch (get_op(inst)) {
81 case 3: /* trap */ 114 case OP_TRAP:
82 vcpu->arch.esr |= ESR_PTR; 115 vcpu->arch.esr |= ESR_PTR;
83 kvmppc_core_queue_program(vcpu); 116 kvmppc_core_queue_program(vcpu);
84 advance = 0; 117 advance = 0;
@@ -87,31 +120,31 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
87 case 31: 120 case 31:
88 switch (get_xop(inst)) { 121 switch (get_xop(inst)) {
89 122
90 case 23: /* lwzx */ 123 case OP_31_XOP_LWZX:
91 rt = get_rt(inst); 124 rt = get_rt(inst);
92 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); 125 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
93 break; 126 break;
94 127
95 case 87: /* lbzx */ 128 case OP_31_XOP_LBZX:
96 rt = get_rt(inst); 129 rt = get_rt(inst);
97 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 130 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
98 break; 131 break;
99 132
100 case 151: /* stwx */ 133 case OP_31_XOP_STWX:
101 rs = get_rs(inst); 134 rs = get_rs(inst);
102 emulated = kvmppc_handle_store(run, vcpu, 135 emulated = kvmppc_handle_store(run, vcpu,
103 vcpu->arch.gpr[rs], 136 vcpu->arch.gpr[rs],
104 4, 1); 137 4, 1);
105 break; 138 break;
106 139
107 case 215: /* stbx */ 140 case OP_31_XOP_STBX:
108 rs = get_rs(inst); 141 rs = get_rs(inst);
109 emulated = kvmppc_handle_store(run, vcpu, 142 emulated = kvmppc_handle_store(run, vcpu,
110 vcpu->arch.gpr[rs], 143 vcpu->arch.gpr[rs],
111 1, 1); 144 1, 1);
112 break; 145 break;
113 146
114 case 247: /* stbux */ 147 case OP_31_XOP_STBUX:
115 rs = get_rs(inst); 148 rs = get_rs(inst);
116 ra = get_ra(inst); 149 ra = get_ra(inst);
117 rb = get_rb(inst); 150 rb = get_rb(inst);
@@ -126,12 +159,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
126 vcpu->arch.gpr[rs] = ea; 159 vcpu->arch.gpr[rs] = ea;
127 break; 160 break;
128 161
129 case 279: /* lhzx */ 162 case OP_31_XOP_LHZX:
130 rt = get_rt(inst); 163 rt = get_rt(inst);
131 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); 164 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
132 break; 165 break;
133 166
134 case 311: /* lhzux */ 167 case OP_31_XOP_LHZUX:
135 rt = get_rt(inst); 168 rt = get_rt(inst);
136 ra = get_ra(inst); 169 ra = get_ra(inst);
137 rb = get_rb(inst); 170 rb = get_rb(inst);
@@ -144,7 +177,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
144 vcpu->arch.gpr[ra] = ea; 177 vcpu->arch.gpr[ra] = ea;
145 break; 178 break;
146 179
147 case 339: /* mfspr */ 180 case OP_31_XOP_MFSPR:
148 sprn = get_sprn(inst); 181 sprn = get_sprn(inst);
149 rt = get_rt(inst); 182 rt = get_rt(inst);
150 183
@@ -185,7 +218,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
185 } 218 }
186 break; 219 break;
187 220
188 case 407: /* sthx */ 221 case OP_31_XOP_STHX:
189 rs = get_rs(inst); 222 rs = get_rs(inst);
190 ra = get_ra(inst); 223 ra = get_ra(inst);
191 rb = get_rb(inst); 224 rb = get_rb(inst);
@@ -195,7 +228,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
195 2, 1); 228 2, 1);
196 break; 229 break;
197 230
198 case 439: /* sthux */ 231 case OP_31_XOP_STHUX:
199 rs = get_rs(inst); 232 rs = get_rs(inst);
200 ra = get_ra(inst); 233 ra = get_ra(inst);
201 rb = get_rb(inst); 234 rb = get_rb(inst);
@@ -210,7 +243,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
210 vcpu->arch.gpr[ra] = ea; 243 vcpu->arch.gpr[ra] = ea;
211 break; 244 break;
212 245
213 case 467: /* mtspr */ 246 case OP_31_XOP_MTSPR:
214 sprn = get_sprn(inst); 247 sprn = get_sprn(inst);
215 rs = get_rs(inst); 248 rs = get_rs(inst);
216 switch (sprn) { 249 switch (sprn) {
@@ -246,7 +279,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
246 } 279 }
247 break; 280 break;
248 281
249 case 470: /* dcbi */ 282 case OP_31_XOP_DCBI:
250 /* Do nothing. The guest is performing dcbi because 283 /* Do nothing. The guest is performing dcbi because
251 * hardware DMA is not snooped by the dcache, but 284 * hardware DMA is not snooped by the dcache, but
252 * emulated DMA either goes through the dcache as 285 * emulated DMA either goes through the dcache as
@@ -254,15 +287,15 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
254 * coherence. */ 287 * coherence. */
255 break; 288 break;
256 289
257 case 534: /* lwbrx */ 290 case OP_31_XOP_LWBRX:
258 rt = get_rt(inst); 291 rt = get_rt(inst);
259 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); 292 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
260 break; 293 break;
261 294
262 case 566: /* tlbsync */ 295 case OP_31_XOP_TLBSYNC:
263 break; 296 break;
264 297
265 case 662: /* stwbrx */ 298 case OP_31_XOP_STWBRX:
266 rs = get_rs(inst); 299 rs = get_rs(inst);
267 ra = get_ra(inst); 300 ra = get_ra(inst);
268 rb = get_rb(inst); 301 rb = get_rb(inst);
@@ -272,12 +305,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
272 4, 0); 305 4, 0);
273 break; 306 break;
274 307
275 case 790: /* lhbrx */ 308 case OP_31_XOP_LHBRX:
276 rt = get_rt(inst); 309 rt = get_rt(inst);
277 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); 310 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
278 break; 311 break;
279 312
280 case 918: /* sthbrx */ 313 case OP_31_XOP_STHBRX:
281 rs = get_rs(inst); 314 rs = get_rs(inst);
282 ra = get_ra(inst); 315 ra = get_ra(inst);
283 rb = get_rb(inst); 316 rb = get_rb(inst);
@@ -293,37 +326,37 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
293 } 326 }
294 break; 327 break;
295 328
296 case 32: /* lwz */ 329 case OP_LWZ:
297 rt = get_rt(inst); 330 rt = get_rt(inst);
298 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); 331 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
299 break; 332 break;
300 333
301 case 33: /* lwzu */ 334 case OP_LWZU:
302 ra = get_ra(inst); 335 ra = get_ra(inst);
303 rt = get_rt(inst); 336 rt = get_rt(inst);
304 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); 337 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
305 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 338 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
306 break; 339 break;
307 340
308 case 34: /* lbz */ 341 case OP_LBZ:
309 rt = get_rt(inst); 342 rt = get_rt(inst);
310 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 343 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
311 break; 344 break;
312 345
313 case 35: /* lbzu */ 346 case OP_LBZU:
314 ra = get_ra(inst); 347 ra = get_ra(inst);
315 rt = get_rt(inst); 348 rt = get_rt(inst);
316 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 349 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
317 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 350 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
318 break; 351 break;
319 352
320 case 36: /* stw */ 353 case OP_STW:
321 rs = get_rs(inst); 354 rs = get_rs(inst);
322 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 355 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
323 4, 1); 356 4, 1);
324 break; 357 break;
325 358
326 case 37: /* stwu */ 359 case OP_STWU:
327 ra = get_ra(inst); 360 ra = get_ra(inst);
328 rs = get_rs(inst); 361 rs = get_rs(inst);
329 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 362 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
@@ -331,13 +364,13 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
331 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 364 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
332 break; 365 break;
333 366
334 case 38: /* stb */ 367 case OP_STB:
335 rs = get_rs(inst); 368 rs = get_rs(inst);
336 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 369 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
337 1, 1); 370 1, 1);
338 break; 371 break;
339 372
340 case 39: /* stbu */ 373 case OP_STBU:
341 ra = get_ra(inst); 374 ra = get_ra(inst);
342 rs = get_rs(inst); 375 rs = get_rs(inst);
343 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 376 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
@@ -345,25 +378,25 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
345 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 378 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
346 break; 379 break;
347 380
348 case 40: /* lhz */ 381 case OP_LHZ:
349 rt = get_rt(inst); 382 rt = get_rt(inst);
350 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); 383 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
351 break; 384 break;
352 385
353 case 41: /* lhzu */ 386 case OP_LHZU:
354 ra = get_ra(inst); 387 ra = get_ra(inst);
355 rt = get_rt(inst); 388 rt = get_rt(inst);
356 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); 389 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
357 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 390 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
358 break; 391 break;
359 392
360 case 44: /* sth */ 393 case OP_STH:
361 rs = get_rs(inst); 394 rs = get_rs(inst);
362 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 395 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
363 2, 1); 396 2, 1);
364 break; 397 break;
365 398
366 case 45: /* sthu */ 399 case OP_STHU:
367 ra = get_ra(inst); 400 ra = get_ra(inst);
368 rs = get_rs(inst); 401 rs = get_rs(inst);
369 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 402 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 5f81256287f5..9057335fdc61 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -216,46 +216,23 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
216 216
217void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 217void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
218{ 218{
219 kvmppc_core_destroy_mmu(vcpu); 219 kvmppc_mmu_destroy(vcpu);
220} 220}
221 221
222void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 222void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
223{ 223{
224 if (vcpu->guest_debug.enabled)
225 kvmppc_core_load_guest_debugstate(vcpu);
226
227 kvmppc_core_vcpu_load(vcpu, cpu); 224 kvmppc_core_vcpu_load(vcpu, cpu);
228} 225}
229 226
230void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 227void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
231{ 228{
232 if (vcpu->guest_debug.enabled)
233 kvmppc_core_load_host_debugstate(vcpu);
234
235 /* Don't leave guest TLB entries resident when being de-scheduled. */
236 /* XXX It would be nice to differentiate between heavyweight exit and
237 * sched_out here, since we could avoid the TLB flush for heavyweight
238 * exits. */
239 _tlbil_all();
240 kvmppc_core_vcpu_put(vcpu); 229 kvmppc_core_vcpu_put(vcpu);
241} 230}
242 231
243int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, 232int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
244 struct kvm_debug_guest *dbg) 233 struct kvm_guest_debug *dbg)
245{ 234{
246 int i; 235 return -EINVAL;
247
248 vcpu->guest_debug.enabled = dbg->enabled;
249 if (vcpu->guest_debug.enabled) {
250 for (i=0; i < ARRAY_SIZE(vcpu->guest_debug.bp); i++) {
251 if (dbg->breakpoints[i].enabled)
252 vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address;
253 else
254 vcpu->guest_debug.bp[i] = 0;
255 }
256 }
257
258 return 0;
259} 236}
260 237
261static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, 238static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h
index e1f54654e3ae..0b2f829f6d50 100644
--- a/arch/s390/include/asm/kvm.h
+++ b/arch/s390/include/asm/kvm.h
@@ -42,4 +42,11 @@ struct kvm_fpu {
42 __u64 fprs[16]; 42 __u64 fprs[16];
43}; 43};
44 44
45struct kvm_debug_exit_arch {
46};
47
48/* for KVM_SET_GUEST_DEBUG */
49struct kvm_guest_debug_arch {
50};
51
45#endif 52#endif
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 3c55e4107dcc..c6e674f5fca9 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -21,9 +21,6 @@
21/* memory slots that does not exposed to userspace */ 21/* memory slots that does not exposed to userspace */
22#define KVM_PRIVATE_MEM_SLOTS 4 22#define KVM_PRIVATE_MEM_SLOTS 4
23 23
24struct kvm_guest_debug {
25};
26
27struct sca_entry { 24struct sca_entry {
28 atomic_t scn; 25 atomic_t scn;
29 __u64 reserved; 26 __u64 reserved;
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index e051cad1f1e0..3e260b7e37b2 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -4,6 +4,9 @@
4config HAVE_KVM 4config HAVE_KVM
5 bool 5 bool
6 6
7config HAVE_KVM_IRQCHIP
8 bool
9
7menuconfig VIRTUALIZATION 10menuconfig VIRTUALIZATION
8 bool "Virtualization" 11 bool "Virtualization"
9 default y 12 default y
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 61236102203e..9d19803111ba 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -103,7 +103,7 @@ static int handle_lctl(struct kvm_vcpu *vcpu)
103static intercept_handler_t instruction_handlers[256] = { 103static intercept_handler_t instruction_handlers[256] = {
104 [0x83] = kvm_s390_handle_diag, 104 [0x83] = kvm_s390_handle_diag,
105 [0xae] = kvm_s390_handle_sigp, 105 [0xae] = kvm_s390_handle_sigp,
106 [0xb2] = kvm_s390_handle_priv, 106 [0xb2] = kvm_s390_handle_b2,
107 [0xb7] = handle_lctl, 107 [0xb7] = handle_lctl,
108 [0xeb] = handle_lctlg, 108 [0xeb] = handle_lctlg,
109}; 109};
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index f4fe28a2521a..0189356fe209 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -555,9 +555,14 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
555 VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)", 555 VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
556 s390int->parm); 556 s390int->parm);
557 break; 557 break;
558 case KVM_S390_SIGP_SET_PREFIX:
559 inti->prefix.address = s390int->parm;
560 inti->type = s390int->type;
561 VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
562 s390int->parm);
563 break;
558 case KVM_S390_SIGP_STOP: 564 case KVM_S390_SIGP_STOP:
559 case KVM_S390_RESTART: 565 case KVM_S390_RESTART:
560 case KVM_S390_SIGP_SET_PREFIX:
561 case KVM_S390_INT_EMERGENCY: 566 case KVM_S390_INT_EMERGENCY:
562 VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); 567 VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
563 inti->type = s390int->type; 568 inti->type = s390int->type;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0d33893e1e89..cbfe91e10120 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -422,8 +422,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
422 return -EINVAL; /* not implemented yet */ 422 return -EINVAL; /* not implemented yet */
423} 423}
424 424
425int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, 425int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
426 struct kvm_debug_guest *dbg) 426 struct kvm_guest_debug *dbg)
427{ 427{
428 return -EINVAL; /* not implemented yet */ 428 return -EINVAL; /* not implemented yet */
429} 429}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 3893cf12eacf..00bbe69b78da 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -50,7 +50,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
50int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); 50int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
51 51
52/* implemented in priv.c */ 52/* implemented in priv.c */
53int kvm_s390_handle_priv(struct kvm_vcpu *vcpu); 53int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
54 54
55/* implemented in sigp.c */ 55/* implemented in sigp.c */
56int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); 56int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 3605df45dd41..4b88834b8dd8 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -304,12 +304,24 @@ static intercept_handler_t priv_handlers[256] = {
304 [0xb1] = handle_stfl, 304 [0xb1] = handle_stfl,
305}; 305};
306 306
307int kvm_s390_handle_priv(struct kvm_vcpu *vcpu) 307int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
308{ 308{
309 intercept_handler_t handler; 309 intercept_handler_t handler;
310 310
311 /*
312 * a lot of B2 instructions are priviledged. We first check for
313 * the priviledges ones, that we can handle in the kernel. If the
314 * kernel can handle this instruction, we check for the problem
315 * state bit and (a) handle the instruction or (b) send a code 2
316 * program check.
317 * Anything else goes to userspace.*/
311 handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; 318 handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
312 if (handler) 319 if (handler) {
313 return handler(vcpu); 320 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
321 return kvm_s390_inject_program_int(vcpu,
322 PGM_PRIVILEGED_OPERATION);
323 else
324 return handler(vcpu);
325 }
314 return -ENOTSUPP; 326 return -ENOTSUPP;
315} 327}
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 2a01b9e02801..f27dbedf0866 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -153,8 +153,6 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
153 153
154 switch (parameter & 0xff) { 154 switch (parameter & 0xff) {
155 case 0: 155 case 0:
156 printk(KERN_WARNING "kvm: request to switch to ESA/390 mode"
157 " not supported");
158 rc = 3; /* not operational */ 156 rc = 3; /* not operational */
159 break; 157 break;
160 case 1: 158 case 1:
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 886c9402ec45..dc3f6cf11704 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -15,6 +15,7 @@
15#define __KVM_HAVE_DEVICE_ASSIGNMENT 15#define __KVM_HAVE_DEVICE_ASSIGNMENT
16#define __KVM_HAVE_MSI 16#define __KVM_HAVE_MSI
17#define __KVM_HAVE_USER_NMI 17#define __KVM_HAVE_USER_NMI
18#define __KVM_HAVE_GUEST_DEBUG
18 19
19/* Architectural interrupt line count. */ 20/* Architectural interrupt line count. */
20#define KVM_NR_INTERRUPTS 256 21#define KVM_NR_INTERRUPTS 256
@@ -212,7 +213,30 @@ struct kvm_pit_channel_state {
212 __s64 count_load_time; 213 __s64 count_load_time;
213}; 214};
214 215
216struct kvm_debug_exit_arch {
217 __u32 exception;
218 __u32 pad;
219 __u64 pc;
220 __u64 dr6;
221 __u64 dr7;
222};
223
224#define KVM_GUESTDBG_USE_SW_BP 0x00010000
225#define KVM_GUESTDBG_USE_HW_BP 0x00020000
226#define KVM_GUESTDBG_INJECT_DB 0x00040000
227#define KVM_GUESTDBG_INJECT_BP 0x00080000
228
229/* for KVM_SET_GUEST_DEBUG */
230struct kvm_guest_debug_arch {
231 __u64 debugreg[8];
232};
233
215struct kvm_pit_state { 234struct kvm_pit_state {
216 struct kvm_pit_channel_state channels[3]; 235 struct kvm_pit_channel_state channels[3];
217}; 236};
237
238struct kvm_reinject_control {
239 __u8 pit_reinject;
240 __u8 reserved[31];
241};
218#endif /* _ASM_X86_KVM_H */ 242#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 730843d1d2fb..f0faf58044ff 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -22,6 +22,7 @@
22#include <asm/pvclock-abi.h> 22#include <asm/pvclock-abi.h>
23#include <asm/desc.h> 23#include <asm/desc.h>
24#include <asm/mtrr.h> 24#include <asm/mtrr.h>
25#include <asm/msr-index.h>
25 26
26#define KVM_MAX_VCPUS 16 27#define KVM_MAX_VCPUS 16
27#define KVM_MEMORY_SLOTS 32 28#define KVM_MEMORY_SLOTS 32
@@ -134,11 +135,18 @@ enum {
134 135
135#define KVM_NR_MEM_OBJS 40 136#define KVM_NR_MEM_OBJS 40
136 137
137struct kvm_guest_debug { 138#define KVM_NR_DB_REGS 4
138 int enabled; 139
139 unsigned long bp[4]; 140#define DR6_BD (1 << 13)
140 int singlestep; 141#define DR6_BS (1 << 14)
141}; 142#define DR6_FIXED_1 0xffff0ff0
143#define DR6_VOLATILE 0x0000e00f
144
145#define DR7_BP_EN_MASK 0x000000ff
146#define DR7_GE (1 << 9)
147#define DR7_GD (1 << 13)
148#define DR7_FIXED_1 0x00000400
149#define DR7_VOLATILE 0xffff23ff
142 150
143/* 151/*
144 * We don't want allocation failures within the mmu code, so we preallocate 152 * We don't want allocation failures within the mmu code, so we preallocate
@@ -162,7 +170,8 @@ struct kvm_pte_chain {
162 * bits 0:3 - total guest paging levels (2-4, or zero for real mode) 170 * bits 0:3 - total guest paging levels (2-4, or zero for real mode)
163 * bits 4:7 - page table level for this shadow (1-4) 171 * bits 4:7 - page table level for this shadow (1-4)
164 * bits 8:9 - page table quadrant for 2-level guests 172 * bits 8:9 - page table quadrant for 2-level guests
165 * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode) 173 * bit 16 - direct mapping of virtual to physical mapping at gfn
174 * used for real mode and two-dimensional paging
166 * bits 17:19 - common access permissions for all ptes in this shadow page 175 * bits 17:19 - common access permissions for all ptes in this shadow page
167 */ 176 */
168union kvm_mmu_page_role { 177union kvm_mmu_page_role {
@@ -172,9 +181,10 @@ union kvm_mmu_page_role {
172 unsigned level:4; 181 unsigned level:4;
173 unsigned quadrant:2; 182 unsigned quadrant:2;
174 unsigned pad_for_nice_hex_output:6; 183 unsigned pad_for_nice_hex_output:6;
175 unsigned metaphysical:1; 184 unsigned direct:1;
176 unsigned access:3; 185 unsigned access:3;
177 unsigned invalid:1; 186 unsigned invalid:1;
187 unsigned cr4_pge:1;
178 }; 188 };
179}; 189};
180 190
@@ -218,6 +228,18 @@ struct kvm_pv_mmu_op_buffer {
218 char buf[512] __aligned(sizeof(long)); 228 char buf[512] __aligned(sizeof(long));
219}; 229};
220 230
231struct kvm_pio_request {
232 unsigned long count;
233 int cur_count;
234 gva_t guest_gva;
235 int in;
236 int port;
237 int size;
238 int string;
239 int down;
240 int rep;
241};
242
221/* 243/*
222 * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level 244 * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
223 * 32-bit). The kvm_mmu structure abstracts the details of the current mmu 245 * 32-bit). The kvm_mmu structure abstracts the details of the current mmu
@@ -236,6 +258,7 @@ struct kvm_mmu {
236 hpa_t root_hpa; 258 hpa_t root_hpa;
237 int root_level; 259 int root_level;
238 int shadow_root_level; 260 int shadow_root_level;
261 union kvm_mmu_page_role base_role;
239 262
240 u64 *pae_root; 263 u64 *pae_root;
241}; 264};
@@ -258,6 +281,7 @@ struct kvm_vcpu_arch {
258 unsigned long cr3; 281 unsigned long cr3;
259 unsigned long cr4; 282 unsigned long cr4;
260 unsigned long cr8; 283 unsigned long cr8;
284 u32 hflags;
261 u64 pdptrs[4]; /* pae */ 285 u64 pdptrs[4]; /* pae */
262 u64 shadow_efer; 286 u64 shadow_efer;
263 u64 apic_base; 287 u64 apic_base;
@@ -338,6 +362,15 @@ struct kvm_vcpu_arch {
338 362
339 struct mtrr_state_type mtrr_state; 363 struct mtrr_state_type mtrr_state;
340 u32 pat; 364 u32 pat;
365
366 int switch_db_regs;
367 unsigned long host_db[KVM_NR_DB_REGS];
368 unsigned long host_dr6;
369 unsigned long host_dr7;
370 unsigned long db[KVM_NR_DB_REGS];
371 unsigned long dr6;
372 unsigned long dr7;
373 unsigned long eff_db[KVM_NR_DB_REGS];
341}; 374};
342 375
343struct kvm_mem_alias { 376struct kvm_mem_alias {
@@ -378,6 +411,7 @@ struct kvm_arch{
378 411
379 unsigned long irq_sources_bitmap; 412 unsigned long irq_sources_bitmap;
380 unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; 413 unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
414 u64 vm_init_tsc;
381}; 415};
382 416
383struct kvm_vm_stat { 417struct kvm_vm_stat {
@@ -446,8 +480,7 @@ struct kvm_x86_ops {
446 void (*vcpu_put)(struct kvm_vcpu *vcpu); 480 void (*vcpu_put)(struct kvm_vcpu *vcpu);
447 481
448 int (*set_guest_debug)(struct kvm_vcpu *vcpu, 482 int (*set_guest_debug)(struct kvm_vcpu *vcpu,
449 struct kvm_debug_guest *dbg); 483 struct kvm_guest_debug *dbg);
450 void (*guest_debug_pre)(struct kvm_vcpu *vcpu);
451 int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); 484 int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
452 int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); 485 int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
453 u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); 486 u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
@@ -583,16 +616,12 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
583void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, 616void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
584 u32 error_code); 617 u32 error_code);
585 618
586void kvm_pic_set_irq(void *opaque, int irq, int level); 619int kvm_pic_set_irq(void *opaque, int irq, int level);
587 620
588void kvm_inject_nmi(struct kvm_vcpu *vcpu); 621void kvm_inject_nmi(struct kvm_vcpu *vcpu);
589 622
590void fx_init(struct kvm_vcpu *vcpu); 623void fx_init(struct kvm_vcpu *vcpu);
591 624
592int emulator_read_std(unsigned long addr,
593 void *val,
594 unsigned int bytes,
595 struct kvm_vcpu *vcpu);
596int emulator_write_emulated(unsigned long addr, 625int emulator_write_emulated(unsigned long addr,
597 const void *val, 626 const void *val,
598 unsigned int bytes, 627 unsigned int bytes,
@@ -737,6 +766,10 @@ enum {
737 TASK_SWITCH_GATE = 3, 766 TASK_SWITCH_GATE = 3,
738}; 767};
739 768
769#define HF_GIF_MASK (1 << 0)
770#define HF_HIF_MASK (1 << 1)
771#define HF_VINTR_MASK (1 << 2)
772
740/* 773/*
741 * Hardware virtualization extension instructions may fault if a 774 * Hardware virtualization extension instructions may fault if a
742 * reboot turns off virtualization while processes are running. 775 * reboot turns off virtualization while processes are running.
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 358acc59ae04..f4e505f286bc 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -18,11 +18,15 @@
18#define _EFER_LME 8 /* Long mode enable */ 18#define _EFER_LME 8 /* Long mode enable */
19#define _EFER_LMA 10 /* Long mode active (read-only) */ 19#define _EFER_LMA 10 /* Long mode active (read-only) */
20#define _EFER_NX 11 /* No execute enable */ 20#define _EFER_NX 11 /* No execute enable */
21#define _EFER_SVME 12 /* Enable virtualization */
22#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */
21 23
22#define EFER_SCE (1<<_EFER_SCE) 24#define EFER_SCE (1<<_EFER_SCE)
23#define EFER_LME (1<<_EFER_LME) 25#define EFER_LME (1<<_EFER_LME)
24#define EFER_LMA (1<<_EFER_LMA) 26#define EFER_LMA (1<<_EFER_LMA)
25#define EFER_NX (1<<_EFER_NX) 27#define EFER_NX (1<<_EFER_NX)
28#define EFER_SVME (1<<_EFER_SVME)
29#define EFER_FFXSR (1<<_EFER_FFXSR)
26 30
27/* Intel MSRs. Some also available on other CPUs */ 31/* Intel MSRs. Some also available on other CPUs */
28#define MSR_IA32_PERFCTR0 0x000000c1 32#define MSR_IA32_PERFCTR0 0x000000c1
@@ -360,4 +364,9 @@
360#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b 364#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
361#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c 365#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
362 366
367/* AMD-V MSRs */
368
369#define MSR_VM_CR 0xc0010114
370#define MSR_VM_HSAVE_PA 0xc0010117
371
363#endif /* _ASM_X86_MSR_INDEX_H */ 372#endif /* _ASM_X86_MSR_INDEX_H */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 1b8afa78e869..82ada75f3ebf 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -174,10 +174,6 @@ struct __attribute__ ((__packed__)) vmcb {
174#define SVM_CPUID_FEATURE_SHIFT 2 174#define SVM_CPUID_FEATURE_SHIFT 2
175#define SVM_CPUID_FUNC 0x8000000a 175#define SVM_CPUID_FUNC 0x8000000a
176 176
177#define MSR_EFER_SVME_MASK (1ULL << 12)
178#define MSR_VM_CR 0xc0010114
179#define MSR_VM_HSAVE_PA 0xc0010117ULL
180
181#define SVM_VM_CR_SVM_DISABLE 4 177#define SVM_VM_CR_SVM_DISABLE 4
182 178
183#define SVM_SELECTOR_S_SHIFT 4 179#define SVM_SELECTOR_S_SHIFT 4
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 593636275238..e0f9aa16358b 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -118,7 +118,7 @@ static inline void cpu_svm_disable(void)
118 118
119 wrmsrl(MSR_VM_HSAVE_PA, 0); 119 wrmsrl(MSR_VM_HSAVE_PA, 0);
120 rdmsrl(MSR_EFER, efer); 120 rdmsrl(MSR_EFER, efer);
121 wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); 121 wrmsrl(MSR_EFER, efer & ~EFER_SVME);
122} 122}
123 123
124/** Makes sure SVM is disabled, if it is supported on the CPU 124/** Makes sure SVM is disabled, if it is supported on the CPU
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index d0238e6151d8..498f944010b9 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -270,8 +270,9 @@ enum vmcs_field {
270 270
271#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ 271#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
272#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ 272#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */
273#define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */ 273#define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */
274#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ 274#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */
275#define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */
275 276
276/* GUEST_INTERRUPTIBILITY_INFO flags. */ 277/* GUEST_INTERRUPTIBILITY_INFO flags. */
277#define GUEST_INTR_STATE_STI 0x00000001 278#define GUEST_INTR_STATE_STI 0x00000001
@@ -311,7 +312,7 @@ enum vmcs_field {
311#define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */ 312#define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */
312#define TYPE_MOV_TO_DR (0 << 4) 313#define TYPE_MOV_TO_DR (0 << 4)
313#define TYPE_MOV_FROM_DR (1 << 4) 314#define TYPE_MOV_FROM_DR (1 << 4)
314#define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose reg. */ 315#define DEBUG_REG_ACCESS_REG(eq) (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */
315 316
316 317
317/* segment AR */ 318/* segment AR */
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b81125f0bdee..0a303c3ed11f 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -4,6 +4,10 @@
4config HAVE_KVM 4config HAVE_KVM
5 bool 5 bool
6 6
7config HAVE_KVM_IRQCHIP
8 bool
9 default y
10
7menuconfig VIRTUALIZATION 11menuconfig VIRTUALIZATION
8 bool "Virtualization" 12 bool "Virtualization"
9 depends on HAVE_KVM || X86 13 depends on HAVE_KVM || X86
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 72bd275a9b5c..c13bb92d3157 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -201,6 +201,9 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
201 if (!atomic_inc_and_test(&pt->pending)) 201 if (!atomic_inc_and_test(&pt->pending))
202 set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); 202 set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
203 203
204 if (!pt->reinject)
205 atomic_set(&pt->pending, 1);
206
204 if (vcpu0 && waitqueue_active(&vcpu0->wq)) 207 if (vcpu0 && waitqueue_active(&vcpu0->wq))
205 wake_up_interruptible(&vcpu0->wq); 208 wake_up_interruptible(&vcpu0->wq);
206 209
@@ -536,6 +539,16 @@ void kvm_pit_reset(struct kvm_pit *pit)
536 pit->pit_state.irq_ack = 1; 539 pit->pit_state.irq_ack = 1;
537} 540}
538 541
542static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
543{
544 struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
545
546 if (!mask) {
547 atomic_set(&pit->pit_state.pit_timer.pending, 0);
548 pit->pit_state.irq_ack = 1;
549 }
550}
551
539struct kvm_pit *kvm_create_pit(struct kvm *kvm) 552struct kvm_pit *kvm_create_pit(struct kvm *kvm)
540{ 553{
541 struct kvm_pit *pit; 554 struct kvm_pit *pit;
@@ -545,9 +558,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
545 if (!pit) 558 if (!pit)
546 return NULL; 559 return NULL;
547 560
548 mutex_lock(&kvm->lock);
549 pit->irq_source_id = kvm_request_irq_source_id(kvm); 561 pit->irq_source_id = kvm_request_irq_source_id(kvm);
550 mutex_unlock(&kvm->lock);
551 if (pit->irq_source_id < 0) { 562 if (pit->irq_source_id < 0) {
552 kfree(pit); 563 kfree(pit);
553 return NULL; 564 return NULL;
@@ -580,10 +591,14 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
580 pit_state->irq_ack_notifier.gsi = 0; 591 pit_state->irq_ack_notifier.gsi = 0;
581 pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq; 592 pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
582 kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); 593 kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
594 pit_state->pit_timer.reinject = true;
583 mutex_unlock(&pit->pit_state.lock); 595 mutex_unlock(&pit->pit_state.lock);
584 596
585 kvm_pit_reset(pit); 597 kvm_pit_reset(pit);
586 598
599 pit->mask_notifier.func = pit_mask_notifer;
600 kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
601
587 return pit; 602 return pit;
588} 603}
589 604
@@ -592,6 +607,8 @@ void kvm_free_pit(struct kvm *kvm)
592 struct hrtimer *timer; 607 struct hrtimer *timer;
593 608
594 if (kvm->arch.vpit) { 609 if (kvm->arch.vpit) {
610 kvm_unregister_irq_mask_notifier(kvm, 0,
611 &kvm->arch.vpit->mask_notifier);
595 mutex_lock(&kvm->arch.vpit->pit_state.lock); 612 mutex_lock(&kvm->arch.vpit->pit_state.lock);
596 timer = &kvm->arch.vpit->pit_state.pit_timer.timer; 613 timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
597 hrtimer_cancel(timer); 614 hrtimer_cancel(timer);
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 4178022b97aa..6acbe4b505d5 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -9,6 +9,7 @@ struct kvm_kpit_timer {
9 s64 period; /* unit: ns */ 9 s64 period; /* unit: ns */
10 s64 scheduled; 10 s64 scheduled;
11 atomic_t pending; 11 atomic_t pending;
12 bool reinject;
12}; 13};
13 14
14struct kvm_kpit_channel_state { 15struct kvm_kpit_channel_state {
@@ -45,6 +46,7 @@ struct kvm_pit {
45 struct kvm *kvm; 46 struct kvm *kvm;
46 struct kvm_kpit_state pit_state; 47 struct kvm_kpit_state pit_state;
47 int irq_source_id; 48 int irq_source_id;
49 struct kvm_irq_mask_notifier mask_notifier;
48}; 50};
49 51
50#define KVM_PIT_BASE_ADDRESS 0x40 52#define KVM_PIT_BASE_ADDRESS 0x40
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 179dcb0103fd..1ccb50c74f18 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -32,11 +32,13 @@
32#include <linux/kvm_host.h> 32#include <linux/kvm_host.h>
33 33
34static void pic_lock(struct kvm_pic *s) 34static void pic_lock(struct kvm_pic *s)
35 __acquires(&s->lock)
35{ 36{
36 spin_lock(&s->lock); 37 spin_lock(&s->lock);
37} 38}
38 39
39static void pic_unlock(struct kvm_pic *s) 40static void pic_unlock(struct kvm_pic *s)
41 __releases(&s->lock)
40{ 42{
41 struct kvm *kvm = s->kvm; 43 struct kvm *kvm = s->kvm;
42 unsigned acks = s->pending_acks; 44 unsigned acks = s->pending_acks;
@@ -49,7 +51,8 @@ static void pic_unlock(struct kvm_pic *s)
49 spin_unlock(&s->lock); 51 spin_unlock(&s->lock);
50 52
51 while (acks) { 53 while (acks) {
52 kvm_notify_acked_irq(kvm, __ffs(acks)); 54 kvm_notify_acked_irq(kvm, SELECT_PIC(__ffs(acks)),
55 __ffs(acks));
53 acks &= acks - 1; 56 acks &= acks - 1;
54 } 57 }
55 58
@@ -76,12 +79,13 @@ void kvm_pic_clear_isr_ack(struct kvm *kvm)
76/* 79/*
77 * set irq level. If an edge is detected, then the IRR is set to 1 80 * set irq level. If an edge is detected, then the IRR is set to 1
78 */ 81 */
79static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level) 82static inline int pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
80{ 83{
81 int mask; 84 int mask, ret = 1;
82 mask = 1 << irq; 85 mask = 1 << irq;
83 if (s->elcr & mask) /* level triggered */ 86 if (s->elcr & mask) /* level triggered */
84 if (level) { 87 if (level) {
88 ret = !(s->irr & mask);
85 s->irr |= mask; 89 s->irr |= mask;
86 s->last_irr |= mask; 90 s->last_irr |= mask;
87 } else { 91 } else {
@@ -90,11 +94,15 @@ static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
90 } 94 }
91 else /* edge triggered */ 95 else /* edge triggered */
92 if (level) { 96 if (level) {
93 if ((s->last_irr & mask) == 0) 97 if ((s->last_irr & mask) == 0) {
98 ret = !(s->irr & mask);
94 s->irr |= mask; 99 s->irr |= mask;
100 }
95 s->last_irr |= mask; 101 s->last_irr |= mask;
96 } else 102 } else
97 s->last_irr &= ~mask; 103 s->last_irr &= ~mask;
104
105 return (s->imr & mask) ? -1 : ret;
98} 106}
99 107
100/* 108/*
@@ -171,16 +179,19 @@ void kvm_pic_update_irq(struct kvm_pic *s)
171 pic_unlock(s); 179 pic_unlock(s);
172} 180}
173 181
174void kvm_pic_set_irq(void *opaque, int irq, int level) 182int kvm_pic_set_irq(void *opaque, int irq, int level)
175{ 183{
176 struct kvm_pic *s = opaque; 184 struct kvm_pic *s = opaque;
185 int ret = -1;
177 186
178 pic_lock(s); 187 pic_lock(s);
179 if (irq >= 0 && irq < PIC_NUM_PINS) { 188 if (irq >= 0 && irq < PIC_NUM_PINS) {
180 pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); 189 ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
181 pic_update_irq(s); 190 pic_update_irq(s);
182 } 191 }
183 pic_unlock(s); 192 pic_unlock(s);
193
194 return ret;
184} 195}
185 196
186/* 197/*
@@ -232,7 +243,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
232 } 243 }
233 pic_update_irq(s); 244 pic_update_irq(s);
234 pic_unlock(s); 245 pic_unlock(s);
235 kvm_notify_acked_irq(kvm, irq); 246 kvm_notify_acked_irq(kvm, SELECT_PIC(irq), irq);
236 247
237 return intno; 248 return intno;
238} 249}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 82579ee538d0..9f593188129e 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -32,6 +32,8 @@
32#include "lapic.h" 32#include "lapic.h"
33 33
34#define PIC_NUM_PINS 16 34#define PIC_NUM_PINS 16
35#define SELECT_PIC(irq) \
36 ((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE)
35 37
36struct kvm; 38struct kvm;
37struct kvm_vcpu; 39struct kvm_vcpu;
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index 8e5ee99551f6..ed66e4c078dc 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -18,7 +18,6 @@ static const u32 host_save_user_msrs[] = {
18}; 18};
19 19
20#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) 20#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
21#define NUM_DB_REGS 4
22 21
23struct kvm_vcpu; 22struct kvm_vcpu;
24 23
@@ -29,18 +28,23 @@ struct vcpu_svm {
29 struct svm_cpu_data *svm_data; 28 struct svm_cpu_data *svm_data;
30 uint64_t asid_generation; 29 uint64_t asid_generation;
31 30
32 unsigned long db_regs[NUM_DB_REGS];
33
34 u64 next_rip; 31 u64 next_rip;
35 32
36 u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; 33 u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
37 u64 host_gs_base; 34 u64 host_gs_base;
38 unsigned long host_cr2; 35 unsigned long host_cr2;
39 unsigned long host_db_regs[NUM_DB_REGS];
40 unsigned long host_dr6;
41 unsigned long host_dr7;
42 36
43 u32 *msrpm; 37 u32 *msrpm;
38 struct vmcb *hsave;
39 u64 hsave_msr;
40
41 u64 nested_vmcb;
42
43 /* These are the merged vectors */
44 u32 *nested_msrpm;
45
46 /* gpa pointers to the real vectors */
47 u64 nested_vmcb_msrpm;
44}; 48};
45 49
46#endif 50#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2d4477c71473..2a36f7f7c4c7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -145,11 +145,20 @@ struct kvm_rmap_desc {
145 struct kvm_rmap_desc *more; 145 struct kvm_rmap_desc *more;
146}; 146};
147 147
148struct kvm_shadow_walk { 148struct kvm_shadow_walk_iterator {
149 int (*entry)(struct kvm_shadow_walk *walk, struct kvm_vcpu *vcpu, 149 u64 addr;
150 u64 addr, u64 *spte, int level); 150 hpa_t shadow_addr;
151 int level;
152 u64 *sptep;
153 unsigned index;
151}; 154};
152 155
156#define for_each_shadow_entry(_vcpu, _addr, _walker) \
157 for (shadow_walk_init(&(_walker), _vcpu, _addr); \
158 shadow_walk_okay(&(_walker)); \
159 shadow_walk_next(&(_walker)))
160
161
153struct kvm_unsync_walk { 162struct kvm_unsync_walk {
154 int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk); 163 int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk);
155}; 164};
@@ -343,7 +352,6 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
343 352
344 BUG_ON(!mc->nobjs); 353 BUG_ON(!mc->nobjs);
345 p = mc->objects[--mc->nobjs]; 354 p = mc->objects[--mc->nobjs];
346 memset(p, 0, size);
347 return p; 355 return p;
348} 356}
349 357
@@ -794,10 +802,8 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
794 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 802 set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
795 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); 803 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
796 INIT_LIST_HEAD(&sp->oos_link); 804 INIT_LIST_HEAD(&sp->oos_link);
797 ASSERT(is_empty_shadow_page(sp->spt));
798 bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); 805 bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
799 sp->multimapped = 0; 806 sp->multimapped = 0;
800 sp->global = 1;
801 sp->parent_pte = parent_pte; 807 sp->parent_pte = parent_pte;
802 --vcpu->kvm->arch.n_free_mmu_pages; 808 --vcpu->kvm->arch.n_free_mmu_pages;
803 return sp; 809 return sp;
@@ -983,8 +989,8 @@ struct kvm_mmu_pages {
983 idx < 512; \ 989 idx < 512; \
984 idx = find_next_bit(bitmap, 512, idx+1)) 990 idx = find_next_bit(bitmap, 512, idx+1))
985 991
986int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, 992static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
987 int idx) 993 int idx)
988{ 994{
989 int i; 995 int i;
990 996
@@ -1059,7 +1065,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
1059 index = kvm_page_table_hashfn(gfn); 1065 index = kvm_page_table_hashfn(gfn);
1060 bucket = &kvm->arch.mmu_page_hash[index]; 1066 bucket = &kvm->arch.mmu_page_hash[index];
1061 hlist_for_each_entry(sp, node, bucket, hash_link) 1067 hlist_for_each_entry(sp, node, bucket, hash_link)
1062 if (sp->gfn == gfn && !sp->role.metaphysical 1068 if (sp->gfn == gfn && !sp->role.direct
1063 && !sp->role.invalid) { 1069 && !sp->role.invalid) {
1064 pgprintk("%s: found role %x\n", 1070 pgprintk("%s: found role %x\n",
1065 __func__, sp->role.word); 1071 __func__, sp->role.word);
@@ -1115,8 +1121,9 @@ struct mmu_page_path {
1115 i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ 1121 i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \
1116 i = mmu_pages_next(&pvec, &parents, i)) 1122 i = mmu_pages_next(&pvec, &parents, i))
1117 1123
1118int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents, 1124static int mmu_pages_next(struct kvm_mmu_pages *pvec,
1119 int i) 1125 struct mmu_page_path *parents,
1126 int i)
1120{ 1127{
1121 int n; 1128 int n;
1122 1129
@@ -1135,7 +1142,7 @@ int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
1135 return n; 1142 return n;
1136} 1143}
1137 1144
1138void mmu_pages_clear_parents(struct mmu_page_path *parents) 1145static void mmu_pages_clear_parents(struct mmu_page_path *parents)
1139{ 1146{
1140 struct kvm_mmu_page *sp; 1147 struct kvm_mmu_page *sp;
1141 unsigned int level = 0; 1148 unsigned int level = 0;
@@ -1193,7 +1200,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1193 gfn_t gfn, 1200 gfn_t gfn,
1194 gva_t gaddr, 1201 gva_t gaddr,
1195 unsigned level, 1202 unsigned level,
1196 int metaphysical, 1203 int direct,
1197 unsigned access, 1204 unsigned access,
1198 u64 *parent_pte) 1205 u64 *parent_pte)
1199{ 1206{
@@ -1204,10 +1211,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1204 struct kvm_mmu_page *sp; 1211 struct kvm_mmu_page *sp;
1205 struct hlist_node *node, *tmp; 1212 struct hlist_node *node, *tmp;
1206 1213
1207 role.word = 0; 1214 role = vcpu->arch.mmu.base_role;
1208 role.glevels = vcpu->arch.mmu.root_level;
1209 role.level = level; 1215 role.level = level;
1210 role.metaphysical = metaphysical; 1216 role.direct = direct;
1211 role.access = access; 1217 role.access = access;
1212 if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { 1218 if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
1213 quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); 1219 quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
@@ -1242,8 +1248,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1242 pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word); 1248 pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
1243 sp->gfn = gfn; 1249 sp->gfn = gfn;
1244 sp->role = role; 1250 sp->role = role;
1251 sp->global = role.cr4_pge;
1245 hlist_add_head(&sp->hash_link, bucket); 1252 hlist_add_head(&sp->hash_link, bucket);
1246 if (!metaphysical) { 1253 if (!direct) {
1247 if (rmap_write_protect(vcpu->kvm, gfn)) 1254 if (rmap_write_protect(vcpu->kvm, gfn))
1248 kvm_flush_remote_tlbs(vcpu->kvm); 1255 kvm_flush_remote_tlbs(vcpu->kvm);
1249 account_shadowed(vcpu->kvm, gfn); 1256 account_shadowed(vcpu->kvm, gfn);
@@ -1255,35 +1262,35 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1255 return sp; 1262 return sp;
1256} 1263}
1257 1264
1258static int walk_shadow(struct kvm_shadow_walk *walker, 1265static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
1259 struct kvm_vcpu *vcpu, u64 addr) 1266 struct kvm_vcpu *vcpu, u64 addr)
1260{ 1267{
1261 hpa_t shadow_addr; 1268 iterator->addr = addr;
1262 int level; 1269 iterator->shadow_addr = vcpu->arch.mmu.root_hpa;
1263 int r; 1270 iterator->level = vcpu->arch.mmu.shadow_root_level;
1264 u64 *sptep; 1271 if (iterator->level == PT32E_ROOT_LEVEL) {
1265 unsigned index; 1272 iterator->shadow_addr
1266 1273 = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
1267 shadow_addr = vcpu->arch.mmu.root_hpa; 1274 iterator->shadow_addr &= PT64_BASE_ADDR_MASK;
1268 level = vcpu->arch.mmu.shadow_root_level; 1275 --iterator->level;
1269 if (level == PT32E_ROOT_LEVEL) { 1276 if (!iterator->shadow_addr)
1270 shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; 1277 iterator->level = 0;
1271 shadow_addr &= PT64_BASE_ADDR_MASK;
1272 if (!shadow_addr)
1273 return 1;
1274 --level;
1275 } 1278 }
1279}
1276 1280
1277 while (level >= PT_PAGE_TABLE_LEVEL) { 1281static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
1278 index = SHADOW_PT_INDEX(addr, level); 1282{
1279 sptep = ((u64 *)__va(shadow_addr)) + index; 1283 if (iterator->level < PT_PAGE_TABLE_LEVEL)
1280 r = walker->entry(walker, vcpu, addr, sptep, level); 1284 return false;
1281 if (r) 1285 iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level);
1282 return r; 1286 iterator->sptep = ((u64 *)__va(iterator->shadow_addr)) + iterator->index;
1283 shadow_addr = *sptep & PT64_BASE_ADDR_MASK; 1287 return true;
1284 --level; 1288}
1285 } 1289
1286 return 0; 1290static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
1291{
1292 iterator->shadow_addr = *iterator->sptep & PT64_BASE_ADDR_MASK;
1293 --iterator->level;
1287} 1294}
1288 1295
1289static void kvm_mmu_page_unlink_children(struct kvm *kvm, 1296static void kvm_mmu_page_unlink_children(struct kvm *kvm,
@@ -1388,7 +1395,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1388 kvm_mmu_page_unlink_children(kvm, sp); 1395 kvm_mmu_page_unlink_children(kvm, sp);
1389 kvm_mmu_unlink_parents(kvm, sp); 1396 kvm_mmu_unlink_parents(kvm, sp);
1390 kvm_flush_remote_tlbs(kvm); 1397 kvm_flush_remote_tlbs(kvm);
1391 if (!sp->role.invalid && !sp->role.metaphysical) 1398 if (!sp->role.invalid && !sp->role.direct)
1392 unaccount_shadowed(kvm, sp->gfn); 1399 unaccount_shadowed(kvm, sp->gfn);
1393 if (sp->unsync) 1400 if (sp->unsync)
1394 kvm_unlink_unsync_page(kvm, sp); 1401 kvm_unlink_unsync_page(kvm, sp);
@@ -1451,7 +1458,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
1451 index = kvm_page_table_hashfn(gfn); 1458 index = kvm_page_table_hashfn(gfn);
1452 bucket = &kvm->arch.mmu_page_hash[index]; 1459 bucket = &kvm->arch.mmu_page_hash[index];
1453 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) 1460 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
1454 if (sp->gfn == gfn && !sp->role.metaphysical) { 1461 if (sp->gfn == gfn && !sp->role.direct) {
1455 pgprintk("%s: gfn %lx role %x\n", __func__, gfn, 1462 pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
1456 sp->role.word); 1463 sp->role.word);
1457 r = 1; 1464 r = 1;
@@ -1463,11 +1470,20 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
1463 1470
1464static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) 1471static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
1465{ 1472{
1473 unsigned index;
1474 struct hlist_head *bucket;
1466 struct kvm_mmu_page *sp; 1475 struct kvm_mmu_page *sp;
1476 struct hlist_node *node, *nn;
1467 1477
1468 while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) { 1478 index = kvm_page_table_hashfn(gfn);
1469 pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word); 1479 bucket = &kvm->arch.mmu_page_hash[index];
1470 kvm_mmu_zap_page(kvm, sp); 1480 hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) {
1481 if (sp->gfn == gfn && !sp->role.direct
1482 && !sp->role.invalid) {
1483 pgprintk("%s: zap %lx %x\n",
1484 __func__, gfn, sp->role.word);
1485 kvm_mmu_zap_page(kvm, sp);
1486 }
1471 } 1487 }
1472} 1488}
1473 1489
@@ -1622,7 +1638,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1622 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 1638 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
1623 /* don't unsync if pagetable is shadowed with multiple roles */ 1639 /* don't unsync if pagetable is shadowed with multiple roles */
1624 hlist_for_each_entry_safe(s, node, n, bucket, hash_link) { 1640 hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
1625 if (s->gfn != sp->gfn || s->role.metaphysical) 1641 if (s->gfn != sp->gfn || s->role.direct)
1626 continue; 1642 continue;
1627 if (s->role.word != sp->role.word) 1643 if (s->role.word != sp->role.word)
1628 return 1; 1644 return 1;
@@ -1669,8 +1685,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1669 u64 mt_mask = shadow_mt_mask; 1685 u64 mt_mask = shadow_mt_mask;
1670 struct kvm_mmu_page *sp = page_header(__pa(shadow_pte)); 1686 struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
1671 1687
1672 if (!(vcpu->arch.cr4 & X86_CR4_PGE))
1673 global = 0;
1674 if (!global && sp->global) { 1688 if (!global && sp->global) {
1675 sp->global = 0; 1689 sp->global = 0;
1676 if (sp->unsync) { 1690 if (sp->unsync) {
@@ -1777,12 +1791,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1777 pgprintk("hfn old %lx new %lx\n", 1791 pgprintk("hfn old %lx new %lx\n",
1778 spte_to_pfn(*shadow_pte), pfn); 1792 spte_to_pfn(*shadow_pte), pfn);
1779 rmap_remove(vcpu->kvm, shadow_pte); 1793 rmap_remove(vcpu->kvm, shadow_pte);
1780 } else { 1794 } else
1781 if (largepage) 1795 was_rmapped = 1;
1782 was_rmapped = is_large_pte(*shadow_pte);
1783 else
1784 was_rmapped = 1;
1785 }
1786 } 1796 }
1787 if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, 1797 if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
1788 dirty, largepage, global, gfn, pfn, speculative, true)) { 1798 dirty, largepage, global, gfn, pfn, speculative, true)) {
@@ -1820,67 +1830,42 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
1820{ 1830{
1821} 1831}
1822 1832
1823struct direct_shadow_walk { 1833static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
1824 struct kvm_shadow_walk walker; 1834 int largepage, gfn_t gfn, pfn_t pfn)
1825 pfn_t pfn;
1826 int write;
1827 int largepage;
1828 int pt_write;
1829};
1830
1831static int direct_map_entry(struct kvm_shadow_walk *_walk,
1832 struct kvm_vcpu *vcpu,
1833 u64 addr, u64 *sptep, int level)
1834{ 1835{
1835 struct direct_shadow_walk *walk = 1836 struct kvm_shadow_walk_iterator iterator;
1836 container_of(_walk, struct direct_shadow_walk, walker);
1837 struct kvm_mmu_page *sp; 1837 struct kvm_mmu_page *sp;
1838 int pt_write = 0;
1838 gfn_t pseudo_gfn; 1839 gfn_t pseudo_gfn;
1839 gfn_t gfn = addr >> PAGE_SHIFT;
1840
1841 if (level == PT_PAGE_TABLE_LEVEL
1842 || (walk->largepage && level == PT_DIRECTORY_LEVEL)) {
1843 mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
1844 0, walk->write, 1, &walk->pt_write,
1845 walk->largepage, 0, gfn, walk->pfn, false);
1846 ++vcpu->stat.pf_fixed;
1847 return 1;
1848 }
1849 1840
1850 if (*sptep == shadow_trap_nonpresent_pte) { 1841 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
1851 pseudo_gfn = (addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT; 1842 if (iterator.level == PT_PAGE_TABLE_LEVEL
1852 sp = kvm_mmu_get_page(vcpu, pseudo_gfn, (gva_t)addr, level - 1, 1843 || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) {
1853 1, ACC_ALL, sptep); 1844 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
1854 if (!sp) { 1845 0, write, 1, &pt_write,
1855 pgprintk("nonpaging_map: ENOMEM\n"); 1846 largepage, 0, gfn, pfn, false);
1856 kvm_release_pfn_clean(walk->pfn); 1847 ++vcpu->stat.pf_fixed;
1857 return -ENOMEM; 1848 break;
1858 } 1849 }
1859 1850
1860 set_shadow_pte(sptep, 1851 if (*iterator.sptep == shadow_trap_nonpresent_pte) {
1861 __pa(sp->spt) 1852 pseudo_gfn = (iterator.addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
1862 | PT_PRESENT_MASK | PT_WRITABLE_MASK 1853 sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
1863 | shadow_user_mask | shadow_x_mask); 1854 iterator.level - 1,
1864 } 1855 1, ACC_ALL, iterator.sptep);
1865 return 0; 1856 if (!sp) {
1866} 1857 pgprintk("nonpaging_map: ENOMEM\n");
1858 kvm_release_pfn_clean(pfn);
1859 return -ENOMEM;
1860 }
1867 1861
1868static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, 1862 set_shadow_pte(iterator.sptep,
1869 int largepage, gfn_t gfn, pfn_t pfn) 1863 __pa(sp->spt)
1870{ 1864 | PT_PRESENT_MASK | PT_WRITABLE_MASK
1871 int r; 1865 | shadow_user_mask | shadow_x_mask);
1872 struct direct_shadow_walk walker = { 1866 }
1873 .walker = { .entry = direct_map_entry, }, 1867 }
1874 .pfn = pfn, 1868 return pt_write;
1875 .largepage = largepage,
1876 .write = write,
1877 .pt_write = 0,
1878 };
1879
1880 r = walk_shadow(&walker.walker, vcpu, gfn << PAGE_SHIFT);
1881 if (r < 0)
1882 return r;
1883 return walker.pt_write;
1884} 1869}
1885 1870
1886static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) 1871static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
@@ -1962,7 +1947,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1962 int i; 1947 int i;
1963 gfn_t root_gfn; 1948 gfn_t root_gfn;
1964 struct kvm_mmu_page *sp; 1949 struct kvm_mmu_page *sp;
1965 int metaphysical = 0; 1950 int direct = 0;
1966 1951
1967 root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; 1952 root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
1968 1953
@@ -1971,18 +1956,18 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1971 1956
1972 ASSERT(!VALID_PAGE(root)); 1957 ASSERT(!VALID_PAGE(root));
1973 if (tdp_enabled) 1958 if (tdp_enabled)
1974 metaphysical = 1; 1959 direct = 1;
1975 sp = kvm_mmu_get_page(vcpu, root_gfn, 0, 1960 sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
1976 PT64_ROOT_LEVEL, metaphysical, 1961 PT64_ROOT_LEVEL, direct,
1977 ACC_ALL, NULL); 1962 ACC_ALL, NULL);
1978 root = __pa(sp->spt); 1963 root = __pa(sp->spt);
1979 ++sp->root_count; 1964 ++sp->root_count;
1980 vcpu->arch.mmu.root_hpa = root; 1965 vcpu->arch.mmu.root_hpa = root;
1981 return; 1966 return;
1982 } 1967 }
1983 metaphysical = !is_paging(vcpu); 1968 direct = !is_paging(vcpu);
1984 if (tdp_enabled) 1969 if (tdp_enabled)
1985 metaphysical = 1; 1970 direct = 1;
1986 for (i = 0; i < 4; ++i) { 1971 for (i = 0; i < 4; ++i) {
1987 hpa_t root = vcpu->arch.mmu.pae_root[i]; 1972 hpa_t root = vcpu->arch.mmu.pae_root[i];
1988 1973
@@ -1996,7 +1981,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1996 } else if (vcpu->arch.mmu.root_level == 0) 1981 } else if (vcpu->arch.mmu.root_level == 0)
1997 root_gfn = 0; 1982 root_gfn = 0;
1998 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, 1983 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
1999 PT32_ROOT_LEVEL, metaphysical, 1984 PT32_ROOT_LEVEL, direct,
2000 ACC_ALL, NULL); 1985 ACC_ALL, NULL);
2001 root = __pa(sp->spt); 1986 root = __pa(sp->spt);
2002 ++sp->root_count; 1987 ++sp->root_count;
@@ -2251,17 +2236,23 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
2251 2236
2252static int init_kvm_softmmu(struct kvm_vcpu *vcpu) 2237static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
2253{ 2238{
2239 int r;
2240
2254 ASSERT(vcpu); 2241 ASSERT(vcpu);
2255 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); 2242 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
2256 2243
2257 if (!is_paging(vcpu)) 2244 if (!is_paging(vcpu))
2258 return nonpaging_init_context(vcpu); 2245 r = nonpaging_init_context(vcpu);
2259 else if (is_long_mode(vcpu)) 2246 else if (is_long_mode(vcpu))
2260 return paging64_init_context(vcpu); 2247 r = paging64_init_context(vcpu);
2261 else if (is_pae(vcpu)) 2248 else if (is_pae(vcpu))
2262 return paging32E_init_context(vcpu); 2249 r = paging32E_init_context(vcpu);
2263 else 2250 else
2264 return paging32_init_context(vcpu); 2251 r = paging32_init_context(vcpu);
2252
2253 vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level;
2254
2255 return r;
2265} 2256}
2266 2257
2267static int init_kvm_mmu(struct kvm_vcpu *vcpu) 2258static int init_kvm_mmu(struct kvm_vcpu *vcpu)
@@ -2492,7 +2483,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2492 index = kvm_page_table_hashfn(gfn); 2483 index = kvm_page_table_hashfn(gfn);
2493 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 2484 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
2494 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { 2485 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) {
2495 if (sp->gfn != gfn || sp->role.metaphysical || sp->role.invalid) 2486 if (sp->gfn != gfn || sp->role.direct || sp->role.invalid)
2496 continue; 2487 continue;
2497 pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; 2488 pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
2498 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); 2489 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
@@ -3130,7 +3121,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
3130 gfn_t gfn; 3121 gfn_t gfn;
3131 3122
3132 list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) { 3123 list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) {
3133 if (sp->role.metaphysical) 3124 if (sp->role.direct)
3134 continue; 3125 continue;
3135 3126
3136 gfn = unalias_gfn(vcpu->kvm, sp->gfn); 3127 gfn = unalias_gfn(vcpu->kvm, sp->gfn);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 258e5d56298e..eaab2145f62b 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -54,7 +54,7 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
54static inline int is_long_mode(struct kvm_vcpu *vcpu) 54static inline int is_long_mode(struct kvm_vcpu *vcpu)
55{ 55{
56#ifdef CONFIG_X86_64 56#ifdef CONFIG_X86_64
57 return vcpu->arch.shadow_efer & EFER_LME; 57 return vcpu->arch.shadow_efer & EFER_LMA;
58#else 58#else
59 return 0; 59 return 0;
60#endif 60#endif
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 9fd78b6e17ad..6bd70206c561 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -25,7 +25,6 @@
25#if PTTYPE == 64 25#if PTTYPE == 64
26 #define pt_element_t u64 26 #define pt_element_t u64
27 #define guest_walker guest_walker64 27 #define guest_walker guest_walker64
28 #define shadow_walker shadow_walker64
29 #define FNAME(name) paging##64_##name 28 #define FNAME(name) paging##64_##name
30 #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK 29 #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
31 #define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK 30 #define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
@@ -42,7 +41,6 @@
42#elif PTTYPE == 32 41#elif PTTYPE == 32
43 #define pt_element_t u32 42 #define pt_element_t u32
44 #define guest_walker guest_walker32 43 #define guest_walker guest_walker32
45 #define shadow_walker shadow_walker32
46 #define FNAME(name) paging##32_##name 44 #define FNAME(name) paging##32_##name
47 #define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK 45 #define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
48 #define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK 46 #define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
@@ -73,18 +71,6 @@ struct guest_walker {
73 u32 error_code; 71 u32 error_code;
74}; 72};
75 73
76struct shadow_walker {
77 struct kvm_shadow_walk walker;
78 struct guest_walker *guest_walker;
79 int user_fault;
80 int write_fault;
81 int largepage;
82 int *ptwrite;
83 pfn_t pfn;
84 u64 *sptep;
85 gpa_t pte_gpa;
86};
87
88static gfn_t gpte_to_gfn(pt_element_t gpte) 74static gfn_t gpte_to_gfn(pt_element_t gpte)
89{ 75{
90 return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; 76 return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
@@ -283,91 +269,79 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
283/* 269/*
284 * Fetch a shadow pte for a specific level in the paging hierarchy. 270 * Fetch a shadow pte for a specific level in the paging hierarchy.
285 */ 271 */
286static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw, 272static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
287 struct kvm_vcpu *vcpu, u64 addr, 273 struct guest_walker *gw,
288 u64 *sptep, int level) 274 int user_fault, int write_fault, int largepage,
275 int *ptwrite, pfn_t pfn)
289{ 276{
290 struct shadow_walker *sw =
291 container_of(_sw, struct shadow_walker, walker);
292 struct guest_walker *gw = sw->guest_walker;
293 unsigned access = gw->pt_access; 277 unsigned access = gw->pt_access;
294 struct kvm_mmu_page *shadow_page; 278 struct kvm_mmu_page *shadow_page;
295 u64 spte; 279 u64 spte, *sptep;
296 int metaphysical; 280 int direct;
297 gfn_t table_gfn; 281 gfn_t table_gfn;
298 int r; 282 int r;
283 int level;
299 pt_element_t curr_pte; 284 pt_element_t curr_pte;
285 struct kvm_shadow_walk_iterator iterator;
300 286
301 if (level == PT_PAGE_TABLE_LEVEL 287 if (!is_present_pte(gw->ptes[gw->level - 1]))
302 || (sw->largepage && level == PT_DIRECTORY_LEVEL)) { 288 return NULL;
303 mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
304 sw->user_fault, sw->write_fault,
305 gw->ptes[gw->level-1] & PT_DIRTY_MASK,
306 sw->ptwrite, sw->largepage,
307 gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
308 gw->gfn, sw->pfn, false);
309 sw->sptep = sptep;
310 return 1;
311 }
312 289
313 if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) 290 for_each_shadow_entry(vcpu, addr, iterator) {
314 return 0; 291 level = iterator.level;
292 sptep = iterator.sptep;
293 if (level == PT_PAGE_TABLE_LEVEL
294 || (largepage && level == PT_DIRECTORY_LEVEL)) {
295 mmu_set_spte(vcpu, sptep, access,
296 gw->pte_access & access,
297 user_fault, write_fault,
298 gw->ptes[gw->level-1] & PT_DIRTY_MASK,
299 ptwrite, largepage,
300 gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
301 gw->gfn, pfn, false);
302 break;
303 }
315 304
316 if (is_large_pte(*sptep)) { 305 if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
317 set_shadow_pte(sptep, shadow_trap_nonpresent_pte); 306 continue;
318 kvm_flush_remote_tlbs(vcpu->kvm);
319 rmap_remove(vcpu->kvm, sptep);
320 }
321 307
322 if (level == PT_DIRECTORY_LEVEL && gw->level == PT_DIRECTORY_LEVEL) { 308 if (is_large_pte(*sptep)) {
323 metaphysical = 1; 309 rmap_remove(vcpu->kvm, sptep);
324 if (!is_dirty_pte(gw->ptes[level - 1])) 310 set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
325 access &= ~ACC_WRITE_MASK; 311 kvm_flush_remote_tlbs(vcpu->kvm);
326 table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
327 } else {
328 metaphysical = 0;
329 table_gfn = gw->table_gfn[level - 2];
330 }
331 shadow_page = kvm_mmu_get_page(vcpu, table_gfn, (gva_t)addr, level-1,
332 metaphysical, access, sptep);
333 if (!metaphysical) {
334 r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2],
335 &curr_pte, sizeof(curr_pte));
336 if (r || curr_pte != gw->ptes[level - 2]) {
337 kvm_mmu_put_page(shadow_page, sptep);
338 kvm_release_pfn_clean(sw->pfn);
339 sw->sptep = NULL;
340 return 1;
341 } 312 }
342 }
343 313
344 spte = __pa(shadow_page->spt) | PT_PRESENT_MASK | PT_ACCESSED_MASK 314 if (level == PT_DIRECTORY_LEVEL
345 | PT_WRITABLE_MASK | PT_USER_MASK; 315 && gw->level == PT_DIRECTORY_LEVEL) {
346 *sptep = spte; 316 direct = 1;
347 return 0; 317 if (!is_dirty_pte(gw->ptes[level - 1]))
348} 318 access &= ~ACC_WRITE_MASK;
349 319 table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
350static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, 320 } else {
351 struct guest_walker *guest_walker, 321 direct = 0;
352 int user_fault, int write_fault, int largepage, 322 table_gfn = gw->table_gfn[level - 2];
353 int *ptwrite, pfn_t pfn) 323 }
354{ 324 shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
355 struct shadow_walker walker = { 325 direct, access, sptep);
356 .walker = { .entry = FNAME(shadow_walk_entry), }, 326 if (!direct) {
357 .guest_walker = guest_walker, 327 r = kvm_read_guest_atomic(vcpu->kvm,
358 .user_fault = user_fault, 328 gw->pte_gpa[level - 2],
359 .write_fault = write_fault, 329 &curr_pte, sizeof(curr_pte));
360 .largepage = largepage, 330 if (r || curr_pte != gw->ptes[level - 2]) {
361 .ptwrite = ptwrite, 331 kvm_mmu_put_page(shadow_page, sptep);
362 .pfn = pfn, 332 kvm_release_pfn_clean(pfn);
363 }; 333 sptep = NULL;
364 334 break;
365 if (!is_present_pte(guest_walker->ptes[guest_walker->level - 1])) 335 }
366 return NULL; 336 }
367 337
368 walk_shadow(&walker.walker, vcpu, addr); 338 spte = __pa(shadow_page->spt)
339 | PT_PRESENT_MASK | PT_ACCESSED_MASK
340 | PT_WRITABLE_MASK | PT_USER_MASK;
341 *sptep = spte;
342 }
369 343
370 return walker.sptep; 344 return sptep;
371} 345}
372 346
373/* 347/*
@@ -465,54 +439,56 @@ out_unlock:
465 return 0; 439 return 0;
466} 440}
467 441
468static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw, 442static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
469 struct kvm_vcpu *vcpu, u64 addr,
470 u64 *sptep, int level)
471{ 443{
472 struct shadow_walker *sw = 444 struct kvm_shadow_walk_iterator iterator;
473 container_of(_sw, struct shadow_walker, walker); 445 pt_element_t gpte;
474 446 gpa_t pte_gpa = -1;
475 /* FIXME: properly handle invlpg on large guest pages */ 447 int level;
476 if (level == PT_PAGE_TABLE_LEVEL || 448 u64 *sptep;
477 ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) { 449 int need_flush = 0;
478 struct kvm_mmu_page *sp = page_header(__pa(sptep));
479 450
480 sw->pte_gpa = (sp->gfn << PAGE_SHIFT); 451 spin_lock(&vcpu->kvm->mmu_lock);
481 sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
482 452
483 if (is_shadow_present_pte(*sptep)) { 453 for_each_shadow_entry(vcpu, gva, iterator) {
484 rmap_remove(vcpu->kvm, sptep); 454 level = iterator.level;
485 if (is_large_pte(*sptep)) 455 sptep = iterator.sptep;
486 --vcpu->kvm->stat.lpages; 456
457 /* FIXME: properly handle invlpg on large guest pages */
458 if (level == PT_PAGE_TABLE_LEVEL ||
459 ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
460 struct kvm_mmu_page *sp = page_header(__pa(sptep));
461
462 pte_gpa = (sp->gfn << PAGE_SHIFT);
463 pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
464
465 if (is_shadow_present_pte(*sptep)) {
466 rmap_remove(vcpu->kvm, sptep);
467 if (is_large_pte(*sptep))
468 --vcpu->kvm->stat.lpages;
469 need_flush = 1;
470 }
471 set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
472 break;
487 } 473 }
488 set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
489 return 1;
490 }
491 if (!is_shadow_present_pte(*sptep))
492 return 1;
493 return 0;
494}
495 474
496static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) 475 if (!is_shadow_present_pte(*sptep))
497{ 476 break;
498 pt_element_t gpte; 477 }
499 struct shadow_walker walker = {
500 .walker = { .entry = FNAME(shadow_invlpg_entry), },
501 .pte_gpa = -1,
502 };
503 478
504 spin_lock(&vcpu->kvm->mmu_lock); 479 if (need_flush)
505 walk_shadow(&walker.walker, vcpu, gva); 480 kvm_flush_remote_tlbs(vcpu->kvm);
506 spin_unlock(&vcpu->kvm->mmu_lock); 481 spin_unlock(&vcpu->kvm->mmu_lock);
507 if (walker.pte_gpa == -1) 482
483 if (pte_gpa == -1)
508 return; 484 return;
509 if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte, 485 if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
510 sizeof(pt_element_t))) 486 sizeof(pt_element_t)))
511 return; 487 return;
512 if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) { 488 if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
513 if (mmu_topup_memory_caches(vcpu)) 489 if (mmu_topup_memory_caches(vcpu))
514 return; 490 return;
515 kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte, 491 kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte,
516 sizeof(pt_element_t), 0); 492 sizeof(pt_element_t), 0);
517 } 493 }
518} 494}
@@ -540,7 +516,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
540 pt_element_t pt[256 / sizeof(pt_element_t)]; 516 pt_element_t pt[256 / sizeof(pt_element_t)];
541 gpa_t pte_gpa; 517 gpa_t pte_gpa;
542 518
543 if (sp->role.metaphysical 519 if (sp->role.direct
544 || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) { 520 || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
545 nonpaging_prefetch_page(vcpu, sp); 521 nonpaging_prefetch_page(vcpu, sp);
546 return; 522 return;
@@ -619,7 +595,6 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
619 595
620#undef pt_element_t 596#undef pt_element_t
621#undef guest_walker 597#undef guest_walker
622#undef shadow_walker
623#undef FNAME 598#undef FNAME
624#undef PT_BASE_ADDR_MASK 599#undef PT_BASE_ADDR_MASK
625#undef PT_INDEX 600#undef PT_INDEX
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a9e769e4e251..1821c2078199 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -38,9 +38,6 @@ MODULE_LICENSE("GPL");
38#define IOPM_ALLOC_ORDER 2 38#define IOPM_ALLOC_ORDER 2
39#define MSRPM_ALLOC_ORDER 1 39#define MSRPM_ALLOC_ORDER 1
40 40
41#define DR7_GD_MASK (1 << 13)
42#define DR6_BD_MASK (1 << 13)
43
44#define SEG_TYPE_LDT 2 41#define SEG_TYPE_LDT 2
45#define SEG_TYPE_BUSY_TSS16 3 42#define SEG_TYPE_BUSY_TSS16 3
46 43
@@ -50,6 +47,15 @@ MODULE_LICENSE("GPL");
50 47
51#define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) 48#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
52 49
50/* Turn on to get debugging output*/
51/* #define NESTED_DEBUG */
52
53#ifdef NESTED_DEBUG
54#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
55#else
56#define nsvm_printk(fmt, args...) do {} while(0)
57#endif
58
53/* enable NPT for AMD64 and X86 with PAE */ 59/* enable NPT for AMD64 and X86 with PAE */
54#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 60#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
55static bool npt_enabled = true; 61static bool npt_enabled = true;
@@ -60,14 +66,29 @@ static int npt = 1;
60 66
61module_param(npt, int, S_IRUGO); 67module_param(npt, int, S_IRUGO);
62 68
69static int nested = 0;
70module_param(nested, int, S_IRUGO);
71
63static void kvm_reput_irq(struct vcpu_svm *svm); 72static void kvm_reput_irq(struct vcpu_svm *svm);
64static void svm_flush_tlb(struct kvm_vcpu *vcpu); 73static void svm_flush_tlb(struct kvm_vcpu *vcpu);
65 74
75static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
76static int nested_svm_vmexit(struct vcpu_svm *svm);
77static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
78 void *arg2, void *opaque);
79static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
80 bool has_error_code, u32 error_code);
81
66static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) 82static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
67{ 83{
68 return container_of(vcpu, struct vcpu_svm, vcpu); 84 return container_of(vcpu, struct vcpu_svm, vcpu);
69} 85}
70 86
87static inline bool is_nested(struct vcpu_svm *svm)
88{
89 return svm->nested_vmcb;
90}
91
71static unsigned long iopm_base; 92static unsigned long iopm_base;
72 93
73struct kvm_ldttss_desc { 94struct kvm_ldttss_desc {
@@ -157,32 +178,6 @@ static inline void kvm_write_cr2(unsigned long val)
157 asm volatile ("mov %0, %%cr2" :: "r" (val)); 178 asm volatile ("mov %0, %%cr2" :: "r" (val));
158} 179}
159 180
160static inline unsigned long read_dr6(void)
161{
162 unsigned long dr6;
163
164 asm volatile ("mov %%dr6, %0" : "=r" (dr6));
165 return dr6;
166}
167
168static inline void write_dr6(unsigned long val)
169{
170 asm volatile ("mov %0, %%dr6" :: "r" (val));
171}
172
173static inline unsigned long read_dr7(void)
174{
175 unsigned long dr7;
176
177 asm volatile ("mov %%dr7, %0" : "=r" (dr7));
178 return dr7;
179}
180
181static inline void write_dr7(unsigned long val)
182{
183 asm volatile ("mov %0, %%dr7" :: "r" (val));
184}
185
186static inline void force_new_asid(struct kvm_vcpu *vcpu) 181static inline void force_new_asid(struct kvm_vcpu *vcpu)
187{ 182{
188 to_svm(vcpu)->asid_generation--; 183 to_svm(vcpu)->asid_generation--;
@@ -198,7 +193,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
198 if (!npt_enabled && !(efer & EFER_LMA)) 193 if (!npt_enabled && !(efer & EFER_LMA))
199 efer &= ~EFER_LME; 194 efer &= ~EFER_LME;
200 195
201 to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK; 196 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
202 vcpu->arch.shadow_efer = efer; 197 vcpu->arch.shadow_efer = efer;
203} 198}
204 199
@@ -207,6 +202,11 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
207{ 202{
208 struct vcpu_svm *svm = to_svm(vcpu); 203 struct vcpu_svm *svm = to_svm(vcpu);
209 204
205 /* If we are within a nested VM we'd better #VMEXIT and let the
206 guest handle the exception */
207 if (nested_svm_check_exception(svm, nr, has_error_code, error_code))
208 return;
209
210 svm->vmcb->control.event_inj = nr 210 svm->vmcb->control.event_inj = nr
211 | SVM_EVTINJ_VALID 211 | SVM_EVTINJ_VALID
212 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) 212 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
@@ -242,7 +242,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
242 kvm_rip_write(vcpu, svm->next_rip); 242 kvm_rip_write(vcpu, svm->next_rip);
243 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; 243 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
244 244
245 vcpu->arch.interrupt_window_open = 1; 245 vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK);
246} 246}
247 247
248static int has_svm(void) 248static int has_svm(void)
@@ -250,7 +250,7 @@ static int has_svm(void)
250 const char *msg; 250 const char *msg;
251 251
252 if (!cpu_has_svm(&msg)) { 252 if (!cpu_has_svm(&msg)) {
253 printk(KERN_INFO "has_svn: %s\n", msg); 253 printk(KERN_INFO "has_svm: %s\n", msg);
254 return 0; 254 return 0;
255 } 255 }
256 256
@@ -292,7 +292,7 @@ static void svm_hardware_enable(void *garbage)
292 svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); 292 svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
293 293
294 rdmsrl(MSR_EFER, efer); 294 rdmsrl(MSR_EFER, efer);
295 wrmsrl(MSR_EFER, efer | MSR_EFER_SVME_MASK); 295 wrmsrl(MSR_EFER, efer | EFER_SVME);
296 296
297 wrmsrl(MSR_VM_HSAVE_PA, 297 wrmsrl(MSR_VM_HSAVE_PA,
298 page_to_pfn(svm_data->save_area) << PAGE_SHIFT); 298 page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
@@ -417,6 +417,14 @@ static __init int svm_hardware_setup(void)
417 if (boot_cpu_has(X86_FEATURE_NX)) 417 if (boot_cpu_has(X86_FEATURE_NX))
418 kvm_enable_efer_bits(EFER_NX); 418 kvm_enable_efer_bits(EFER_NX);
419 419
420 if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
421 kvm_enable_efer_bits(EFER_FFXSR);
422
423 if (nested) {
424 printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
425 kvm_enable_efer_bits(EFER_SVME);
426 }
427
420 for_each_online_cpu(cpu) { 428 for_each_online_cpu(cpu) {
421 r = svm_cpu_init(cpu); 429 r = svm_cpu_init(cpu);
422 if (r) 430 if (r)
@@ -559,7 +567,7 @@ static void init_vmcb(struct vcpu_svm *svm)
559 init_sys_seg(&save->ldtr, SEG_TYPE_LDT); 567 init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
560 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); 568 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
561 569
562 save->efer = MSR_EFER_SVME_MASK; 570 save->efer = EFER_SVME;
563 save->dr6 = 0xffff0ff0; 571 save->dr6 = 0xffff0ff0;
564 save->dr7 = 0x400; 572 save->dr7 = 0x400;
565 save->rflags = 2; 573 save->rflags = 2;
@@ -591,6 +599,9 @@ static void init_vmcb(struct vcpu_svm *svm)
591 save->cr4 = 0; 599 save->cr4 = 0;
592 } 600 }
593 force_new_asid(&svm->vcpu); 601 force_new_asid(&svm->vcpu);
602
603 svm->nested_vmcb = 0;
604 svm->vcpu.arch.hflags = HF_GIF_MASK;
594} 605}
595 606
596static int svm_vcpu_reset(struct kvm_vcpu *vcpu) 607static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -615,6 +626,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
615 struct vcpu_svm *svm; 626 struct vcpu_svm *svm;
616 struct page *page; 627 struct page *page;
617 struct page *msrpm_pages; 628 struct page *msrpm_pages;
629 struct page *hsave_page;
630 struct page *nested_msrpm_pages;
618 int err; 631 int err;
619 632
620 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 633 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
@@ -637,14 +650,25 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
637 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 650 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
638 if (!msrpm_pages) 651 if (!msrpm_pages)
639 goto uninit; 652 goto uninit;
653
654 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
655 if (!nested_msrpm_pages)
656 goto uninit;
657
640 svm->msrpm = page_address(msrpm_pages); 658 svm->msrpm = page_address(msrpm_pages);
641 svm_vcpu_init_msrpm(svm->msrpm); 659 svm_vcpu_init_msrpm(svm->msrpm);
642 660
661 hsave_page = alloc_page(GFP_KERNEL);
662 if (!hsave_page)
663 goto uninit;
664 svm->hsave = page_address(hsave_page);
665
666 svm->nested_msrpm = page_address(nested_msrpm_pages);
667
643 svm->vmcb = page_address(page); 668 svm->vmcb = page_address(page);
644 clear_page(svm->vmcb); 669 clear_page(svm->vmcb);
645 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; 670 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
646 svm->asid_generation = 0; 671 svm->asid_generation = 0;
647 memset(svm->db_regs, 0, sizeof(svm->db_regs));
648 init_vmcb(svm); 672 init_vmcb(svm);
649 673
650 fx_init(&svm->vcpu); 674 fx_init(&svm->vcpu);
@@ -669,6 +693,8 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
669 693
670 __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); 694 __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
671 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); 695 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
696 __free_page(virt_to_page(svm->hsave));
697 __free_pages(virt_to_page(svm->nested_msrpm), MSRPM_ALLOC_ORDER);
672 kvm_vcpu_uninit(vcpu); 698 kvm_vcpu_uninit(vcpu);
673 kmem_cache_free(kvm_vcpu_cache, svm); 699 kmem_cache_free(kvm_vcpu_cache, svm);
674} 700}
@@ -718,6 +744,16 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
718 to_svm(vcpu)->vmcb->save.rflags = rflags; 744 to_svm(vcpu)->vmcb->save.rflags = rflags;
719} 745}
720 746
747static void svm_set_vintr(struct vcpu_svm *svm)
748{
749 svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR;
750}
751
752static void svm_clear_vintr(struct vcpu_svm *svm)
753{
754 svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
755}
756
721static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) 757static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
722{ 758{
723 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save; 759 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
@@ -760,20 +796,37 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
760 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; 796 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
761 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; 797 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
762 798
763 /* 799 switch (seg) {
764 * SVM always stores 0 for the 'G' bit in the CS selector in 800 case VCPU_SREG_CS:
765 * the VMCB on a VMEXIT. This hurts cross-vendor migration: 801 /*
766 * Intel's VMENTRY has a check on the 'G' bit. 802 * SVM always stores 0 for the 'G' bit in the CS selector in
767 */ 803 * the VMCB on a VMEXIT. This hurts cross-vendor migration:
768 if (seg == VCPU_SREG_CS) 804 * Intel's VMENTRY has a check on the 'G' bit.
805 */
769 var->g = s->limit > 0xfffff; 806 var->g = s->limit > 0xfffff;
770 807 break;
771 /* 808 case VCPU_SREG_TR:
772 * Work around a bug where the busy flag in the tr selector 809 /*
773 * isn't exposed 810 * Work around a bug where the busy flag in the tr selector
774 */ 811 * isn't exposed
775 if (seg == VCPU_SREG_TR) 812 */
776 var->type |= 0x2; 813 var->type |= 0x2;
814 break;
815 case VCPU_SREG_DS:
816 case VCPU_SREG_ES:
817 case VCPU_SREG_FS:
818 case VCPU_SREG_GS:
819 /*
820 * The accessed bit must always be set in the segment
821 * descriptor cache, although it can be cleared in the
822 * descriptor, the cached bit always remains at 1. Since
823 * Intel has a check on this, set it here to support
824 * cross-vendor migration.
825 */
826 if (!var->unusable)
827 var->type |= 0x1;
828 break;
829 }
777 830
778 var->unusable = !var->present; 831 var->unusable = !var->present;
779} 832}
@@ -905,9 +958,37 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
905 958
906} 959}
907 960
908static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) 961static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
909{ 962{
910 return -EOPNOTSUPP; 963 int old_debug = vcpu->guest_debug;
964 struct vcpu_svm *svm = to_svm(vcpu);
965
966 vcpu->guest_debug = dbg->control;
967
968 svm->vmcb->control.intercept_exceptions &=
969 ~((1 << DB_VECTOR) | (1 << BP_VECTOR));
970 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
971 if (vcpu->guest_debug &
972 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
973 svm->vmcb->control.intercept_exceptions |=
974 1 << DB_VECTOR;
975 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
976 svm->vmcb->control.intercept_exceptions |=
977 1 << BP_VECTOR;
978 } else
979 vcpu->guest_debug = 0;
980
981 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
982 svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
983 else
984 svm->vmcb->save.dr7 = vcpu->arch.dr7;
985
986 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
987 svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
988 else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
989 svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
990
991 return 0;
911} 992}
912 993
913static int svm_get_irq(struct kvm_vcpu *vcpu) 994static int svm_get_irq(struct kvm_vcpu *vcpu)
@@ -949,7 +1030,29 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
949 1030
950static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) 1031static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
951{ 1032{
952 unsigned long val = to_svm(vcpu)->db_regs[dr]; 1033 struct vcpu_svm *svm = to_svm(vcpu);
1034 unsigned long val;
1035
1036 switch (dr) {
1037 case 0 ... 3:
1038 val = vcpu->arch.db[dr];
1039 break;
1040 case 6:
1041 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1042 val = vcpu->arch.dr6;
1043 else
1044 val = svm->vmcb->save.dr6;
1045 break;
1046 case 7:
1047 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1048 val = vcpu->arch.dr7;
1049 else
1050 val = svm->vmcb->save.dr7;
1051 break;
1052 default:
1053 val = 0;
1054 }
1055
953 KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); 1056 KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
954 return val; 1057 return val;
955} 1058}
@@ -959,33 +1062,40 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
959{ 1062{
960 struct vcpu_svm *svm = to_svm(vcpu); 1063 struct vcpu_svm *svm = to_svm(vcpu);
961 1064
962 *exception = 0; 1065 KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)value, handler);
963 1066
964 if (svm->vmcb->save.dr7 & DR7_GD_MASK) { 1067 *exception = 0;
965 svm->vmcb->save.dr7 &= ~DR7_GD_MASK;
966 svm->vmcb->save.dr6 |= DR6_BD_MASK;
967 *exception = DB_VECTOR;
968 return;
969 }
970 1068
971 switch (dr) { 1069 switch (dr) {
972 case 0 ... 3: 1070 case 0 ... 3:
973 svm->db_regs[dr] = value; 1071 vcpu->arch.db[dr] = value;
1072 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1073 vcpu->arch.eff_db[dr] = value;
974 return; 1074 return;
975 case 4 ... 5: 1075 case 4 ... 5:
976 if (vcpu->arch.cr4 & X86_CR4_DE) { 1076 if (vcpu->arch.cr4 & X86_CR4_DE)
977 *exception = UD_VECTOR; 1077 *exception = UD_VECTOR;
1078 return;
1079 case 6:
1080 if (value & 0xffffffff00000000ULL) {
1081 *exception = GP_VECTOR;
978 return; 1082 return;
979 } 1083 }
980 case 7: { 1084 vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1;
981 if (value & ~((1ULL << 32) - 1)) { 1085 return;
1086 case 7:
1087 if (value & 0xffffffff00000000ULL) {
982 *exception = GP_VECTOR; 1088 *exception = GP_VECTOR;
983 return; 1089 return;
984 } 1090 }
985 svm->vmcb->save.dr7 = value; 1091 vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1;
1092 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
1093 svm->vmcb->save.dr7 = vcpu->arch.dr7;
1094 vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK);
1095 }
986 return; 1096 return;
987 }
988 default: 1097 default:
1098 /* FIXME: Possible case? */
989 printk(KERN_DEBUG "%s: unexpected dr %u\n", 1099 printk(KERN_DEBUG "%s: unexpected dr %u\n",
990 __func__, dr); 1100 __func__, dr);
991 *exception = UD_VECTOR; 1101 *exception = UD_VECTOR;
@@ -1031,6 +1141,27 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1031 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); 1141 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
1032} 1142}
1033 1143
1144static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1145{
1146 if (!(svm->vcpu.guest_debug &
1147 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
1148 kvm_queue_exception(&svm->vcpu, DB_VECTOR);
1149 return 1;
1150 }
1151 kvm_run->exit_reason = KVM_EXIT_DEBUG;
1152 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1153 kvm_run->debug.arch.exception = DB_VECTOR;
1154 return 0;
1155}
1156
1157static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1158{
1159 kvm_run->exit_reason = KVM_EXIT_DEBUG;
1160 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1161 kvm_run->debug.arch.exception = BP_VECTOR;
1162 return 0;
1163}
1164
1034static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1165static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1035{ 1166{
1036 int er; 1167 int er;
@@ -1080,7 +1211,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1080static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1211static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1081{ 1212{
1082 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ 1213 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
1083 int size, down, in, string, rep; 1214 int size, in, string;
1084 unsigned port; 1215 unsigned port;
1085 1216
1086 ++svm->vcpu.stat.io_exits; 1217 ++svm->vcpu.stat.io_exits;
@@ -1099,8 +1230,6 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1099 in = (io_info & SVM_IOIO_TYPE_MASK) != 0; 1230 in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
1100 port = io_info >> 16; 1231 port = io_info >> 16;
1101 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; 1232 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
1102 rep = (io_info & SVM_IOIO_REP_MASK) != 0;
1103 down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
1104 1233
1105 skip_emulated_instruction(&svm->vcpu); 1234 skip_emulated_instruction(&svm->vcpu);
1106 return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); 1235 return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
@@ -1139,6 +1268,567 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1139 return 1; 1268 return 1;
1140} 1269}
1141 1270
1271static int nested_svm_check_permissions(struct vcpu_svm *svm)
1272{
1273 if (!(svm->vcpu.arch.shadow_efer & EFER_SVME)
1274 || !is_paging(&svm->vcpu)) {
1275 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1276 return 1;
1277 }
1278
1279 if (svm->vmcb->save.cpl) {
1280 kvm_inject_gp(&svm->vcpu, 0);
1281 return 1;
1282 }
1283
1284 return 0;
1285}
1286
1287static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
1288 bool has_error_code, u32 error_code)
1289{
1290 if (is_nested(svm)) {
1291 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
1292 svm->vmcb->control.exit_code_hi = 0;
1293 svm->vmcb->control.exit_info_1 = error_code;
1294 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
1295 if (nested_svm_exit_handled(svm, false)) {
1296 nsvm_printk("VMexit -> EXCP 0x%x\n", nr);
1297
1298 nested_svm_vmexit(svm);
1299 return 1;
1300 }
1301 }
1302
1303 return 0;
1304}
1305
1306static inline int nested_svm_intr(struct vcpu_svm *svm)
1307{
1308 if (is_nested(svm)) {
1309 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1310 return 0;
1311
1312 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
1313 return 0;
1314
1315 svm->vmcb->control.exit_code = SVM_EXIT_INTR;
1316
1317 if (nested_svm_exit_handled(svm, false)) {
1318 nsvm_printk("VMexit -> INTR\n");
1319 nested_svm_vmexit(svm);
1320 return 1;
1321 }
1322 }
1323
1324 return 0;
1325}
1326
1327static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
1328{
1329 struct page *page;
1330
1331 down_read(&current->mm->mmap_sem);
1332 page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
1333 up_read(&current->mm->mmap_sem);
1334
1335 if (is_error_page(page)) {
1336 printk(KERN_INFO "%s: could not find page at 0x%llx\n",
1337 __func__, gpa);
1338 kvm_release_page_clean(page);
1339 kvm_inject_gp(&svm->vcpu, 0);
1340 return NULL;
1341 }
1342 return page;
1343}
1344
1345static int nested_svm_do(struct vcpu_svm *svm,
1346 u64 arg1_gpa, u64 arg2_gpa, void *opaque,
1347 int (*handler)(struct vcpu_svm *svm,
1348 void *arg1,
1349 void *arg2,
1350 void *opaque))
1351{
1352 struct page *arg1_page;
1353 struct page *arg2_page = NULL;
1354 void *arg1;
1355 void *arg2 = NULL;
1356 int retval;
1357
1358 arg1_page = nested_svm_get_page(svm, arg1_gpa);
1359 if(arg1_page == NULL)
1360 return 1;
1361
1362 if (arg2_gpa) {
1363 arg2_page = nested_svm_get_page(svm, arg2_gpa);
1364 if(arg2_page == NULL) {
1365 kvm_release_page_clean(arg1_page);
1366 return 1;
1367 }
1368 }
1369
1370 arg1 = kmap_atomic(arg1_page, KM_USER0);
1371 if (arg2_gpa)
1372 arg2 = kmap_atomic(arg2_page, KM_USER1);
1373
1374 retval = handler(svm, arg1, arg2, opaque);
1375
1376 kunmap_atomic(arg1, KM_USER0);
1377 if (arg2_gpa)
1378 kunmap_atomic(arg2, KM_USER1);
1379
1380 kvm_release_page_dirty(arg1_page);
1381 if (arg2_gpa)
1382 kvm_release_page_dirty(arg2_page);
1383
1384 return retval;
1385}
1386
1387static int nested_svm_exit_handled_real(struct vcpu_svm *svm,
1388 void *arg1,
1389 void *arg2,
1390 void *opaque)
1391{
1392 struct vmcb *nested_vmcb = (struct vmcb *)arg1;
1393 bool kvm_overrides = *(bool *)opaque;
1394 u32 exit_code = svm->vmcb->control.exit_code;
1395
1396 if (kvm_overrides) {
1397 switch (exit_code) {
1398 case SVM_EXIT_INTR:
1399 case SVM_EXIT_NMI:
1400 return 0;
1401 /* For now we are always handling NPFs when using them */
1402 case SVM_EXIT_NPF:
1403 if (npt_enabled)
1404 return 0;
1405 break;
1406 /* When we're shadowing, trap PFs */
1407 case SVM_EXIT_EXCP_BASE + PF_VECTOR:
1408 if (!npt_enabled)
1409 return 0;
1410 break;
1411 default:
1412 break;
1413 }
1414 }
1415
1416 switch (exit_code) {
1417 case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
1418 u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
1419 if (nested_vmcb->control.intercept_cr_read & cr_bits)
1420 return 1;
1421 break;
1422 }
1423 case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: {
1424 u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0);
1425 if (nested_vmcb->control.intercept_cr_write & cr_bits)
1426 return 1;
1427 break;
1428 }
1429 case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: {
1430 u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0);
1431 if (nested_vmcb->control.intercept_dr_read & dr_bits)
1432 return 1;
1433 break;
1434 }
1435 case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: {
1436 u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0);
1437 if (nested_vmcb->control.intercept_dr_write & dr_bits)
1438 return 1;
1439 break;
1440 }
1441 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
1442 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
1443 if (nested_vmcb->control.intercept_exceptions & excp_bits)
1444 return 1;
1445 break;
1446 }
1447 default: {
1448 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
1449 nsvm_printk("exit code: 0x%x\n", exit_code);
1450 if (nested_vmcb->control.intercept & exit_bits)
1451 return 1;
1452 }
1453 }
1454
1455 return 0;
1456}
1457
1458static int nested_svm_exit_handled_msr(struct vcpu_svm *svm,
1459 void *arg1, void *arg2,
1460 void *opaque)
1461{
1462 struct vmcb *nested_vmcb = (struct vmcb *)arg1;
1463 u8 *msrpm = (u8 *)arg2;
1464 u32 t0, t1;
1465 u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
1466 u32 param = svm->vmcb->control.exit_info_1 & 1;
1467
1468 if (!(nested_vmcb->control.intercept & (1ULL << INTERCEPT_MSR_PROT)))
1469 return 0;
1470
1471 switch(msr) {
1472 case 0 ... 0x1fff:
1473 t0 = (msr * 2) % 8;
1474 t1 = msr / 8;
1475 break;
1476 case 0xc0000000 ... 0xc0001fff:
1477 t0 = (8192 + msr - 0xc0000000) * 2;
1478 t1 = (t0 / 8);
1479 t0 %= 8;
1480 break;
1481 case 0xc0010000 ... 0xc0011fff:
1482 t0 = (16384 + msr - 0xc0010000) * 2;
1483 t1 = (t0 / 8);
1484 t0 %= 8;
1485 break;
1486 default:
1487 return 1;
1488 break;
1489 }
1490 if (msrpm[t1] & ((1 << param) << t0))
1491 return 1;
1492
1493 return 0;
1494}
1495
1496static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override)
1497{
1498 bool k = kvm_override;
1499
1500 switch (svm->vmcb->control.exit_code) {
1501 case SVM_EXIT_MSR:
1502 return nested_svm_do(svm, svm->nested_vmcb,
1503 svm->nested_vmcb_msrpm, NULL,
1504 nested_svm_exit_handled_msr);
1505 default: break;
1506 }
1507
1508 return nested_svm_do(svm, svm->nested_vmcb, 0, &k,
1509 nested_svm_exit_handled_real);
1510}
1511
1512static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1,
1513 void *arg2, void *opaque)
1514{
1515 struct vmcb *nested_vmcb = (struct vmcb *)arg1;
1516 struct vmcb *hsave = svm->hsave;
1517 u64 nested_save[] = { nested_vmcb->save.cr0,
1518 nested_vmcb->save.cr3,
1519 nested_vmcb->save.cr4,
1520 nested_vmcb->save.efer,
1521 nested_vmcb->control.intercept_cr_read,
1522 nested_vmcb->control.intercept_cr_write,
1523 nested_vmcb->control.intercept_dr_read,
1524 nested_vmcb->control.intercept_dr_write,
1525 nested_vmcb->control.intercept_exceptions,
1526 nested_vmcb->control.intercept,
1527 nested_vmcb->control.msrpm_base_pa,
1528 nested_vmcb->control.iopm_base_pa,
1529 nested_vmcb->control.tsc_offset };
1530
1531 /* Give the current vmcb to the guest */
1532 memcpy(nested_vmcb, svm->vmcb, sizeof(struct vmcb));
1533 nested_vmcb->save.cr0 = nested_save[0];
1534 if (!npt_enabled)
1535 nested_vmcb->save.cr3 = nested_save[1];
1536 nested_vmcb->save.cr4 = nested_save[2];
1537 nested_vmcb->save.efer = nested_save[3];
1538 nested_vmcb->control.intercept_cr_read = nested_save[4];
1539 nested_vmcb->control.intercept_cr_write = nested_save[5];
1540 nested_vmcb->control.intercept_dr_read = nested_save[6];
1541 nested_vmcb->control.intercept_dr_write = nested_save[7];
1542 nested_vmcb->control.intercept_exceptions = nested_save[8];
1543 nested_vmcb->control.intercept = nested_save[9];
1544 nested_vmcb->control.msrpm_base_pa = nested_save[10];
1545 nested_vmcb->control.iopm_base_pa = nested_save[11];
1546 nested_vmcb->control.tsc_offset = nested_save[12];
1547
1548 /* We always set V_INTR_MASKING and remember the old value in hflags */
1549 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1550 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
1551
1552 if ((nested_vmcb->control.int_ctl & V_IRQ_MASK) &&
1553 (nested_vmcb->control.int_vector)) {
1554 nsvm_printk("WARNING: IRQ 0x%x still enabled on #VMEXIT\n",
1555 nested_vmcb->control.int_vector);
1556 }
1557
1558 /* Restore the original control entries */
1559 svm->vmcb->control = hsave->control;
1560
1561 /* Kill any pending exceptions */
1562 if (svm->vcpu.arch.exception.pending == true)
1563 nsvm_printk("WARNING: Pending Exception\n");
1564 svm->vcpu.arch.exception.pending = false;
1565
1566 /* Restore selected save entries */
1567 svm->vmcb->save.es = hsave->save.es;
1568 svm->vmcb->save.cs = hsave->save.cs;
1569 svm->vmcb->save.ss = hsave->save.ss;
1570 svm->vmcb->save.ds = hsave->save.ds;
1571 svm->vmcb->save.gdtr = hsave->save.gdtr;
1572 svm->vmcb->save.idtr = hsave->save.idtr;
1573 svm->vmcb->save.rflags = hsave->save.rflags;
1574 svm_set_efer(&svm->vcpu, hsave->save.efer);
1575 svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
1576 svm_set_cr4(&svm->vcpu, hsave->save.cr4);
1577 if (npt_enabled) {
1578 svm->vmcb->save.cr3 = hsave->save.cr3;
1579 svm->vcpu.arch.cr3 = hsave->save.cr3;
1580 } else {
1581 kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
1582 }
1583 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
1584 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
1585 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
1586 svm->vmcb->save.dr7 = 0;
1587 svm->vmcb->save.cpl = 0;
1588 svm->vmcb->control.exit_int_info = 0;
1589
1590 svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
1591 /* Exit nested SVM mode */
1592 svm->nested_vmcb = 0;
1593
1594 return 0;
1595}
1596
1597static int nested_svm_vmexit(struct vcpu_svm *svm)
1598{
1599 nsvm_printk("VMexit\n");
1600 if (nested_svm_do(svm, svm->nested_vmcb, 0,
1601 NULL, nested_svm_vmexit_real))
1602 return 1;
1603
1604 kvm_mmu_reset_context(&svm->vcpu);
1605 kvm_mmu_load(&svm->vcpu);
1606
1607 return 0;
1608}
1609
1610static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1,
1611 void *arg2, void *opaque)
1612{
1613 int i;
1614 u32 *nested_msrpm = (u32*)arg1;
1615 for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++)
1616 svm->nested_msrpm[i] = svm->msrpm[i] | nested_msrpm[i];
1617 svm->vmcb->control.msrpm_base_pa = __pa(svm->nested_msrpm);
1618
1619 return 0;
1620}
1621
1622static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
1623 void *arg2, void *opaque)
1624{
1625 struct vmcb *nested_vmcb = (struct vmcb *)arg1;
1626 struct vmcb *hsave = svm->hsave;
1627
1628 /* nested_vmcb is our indicator if nested SVM is activated */
1629 svm->nested_vmcb = svm->vmcb->save.rax;
1630
1631 /* Clear internal status */
1632 svm->vcpu.arch.exception.pending = false;
1633
1634 /* Save the old vmcb, so we don't need to pick what we save, but
1635 can restore everything when a VMEXIT occurs */
1636 memcpy(hsave, svm->vmcb, sizeof(struct vmcb));
1637 /* We need to remember the original CR3 in the SPT case */
1638 if (!npt_enabled)
1639 hsave->save.cr3 = svm->vcpu.arch.cr3;
1640 hsave->save.cr4 = svm->vcpu.arch.cr4;
1641 hsave->save.rip = svm->next_rip;
1642
1643 if (svm->vmcb->save.rflags & X86_EFLAGS_IF)
1644 svm->vcpu.arch.hflags |= HF_HIF_MASK;
1645 else
1646 svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
1647
1648 /* Load the nested guest state */
1649 svm->vmcb->save.es = nested_vmcb->save.es;
1650 svm->vmcb->save.cs = nested_vmcb->save.cs;
1651 svm->vmcb->save.ss = nested_vmcb->save.ss;
1652 svm->vmcb->save.ds = nested_vmcb->save.ds;
1653 svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
1654 svm->vmcb->save.idtr = nested_vmcb->save.idtr;
1655 svm->vmcb->save.rflags = nested_vmcb->save.rflags;
1656 svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
1657 svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
1658 svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
1659 if (npt_enabled) {
1660 svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
1661 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
1662 } else {
1663 kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
1664 kvm_mmu_reset_context(&svm->vcpu);
1665 }
1666 svm->vmcb->save.cr2 = nested_vmcb->save.cr2;
1667 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
1668 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
1669 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
1670 /* In case we don't even reach vcpu_run, the fields are not updated */
1671 svm->vmcb->save.rax = nested_vmcb->save.rax;
1672 svm->vmcb->save.rsp = nested_vmcb->save.rsp;
1673 svm->vmcb->save.rip = nested_vmcb->save.rip;
1674 svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
1675 svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
1676 svm->vmcb->save.cpl = nested_vmcb->save.cpl;
1677
1678 /* We don't want a nested guest to be more powerful than the guest,
1679 so all intercepts are ORed */
1680 svm->vmcb->control.intercept_cr_read |=
1681 nested_vmcb->control.intercept_cr_read;
1682 svm->vmcb->control.intercept_cr_write |=
1683 nested_vmcb->control.intercept_cr_write;
1684 svm->vmcb->control.intercept_dr_read |=
1685 nested_vmcb->control.intercept_dr_read;
1686 svm->vmcb->control.intercept_dr_write |=
1687 nested_vmcb->control.intercept_dr_write;
1688 svm->vmcb->control.intercept_exceptions |=
1689 nested_vmcb->control.intercept_exceptions;
1690
1691 svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
1692
1693 svm->nested_vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
1694
1695 force_new_asid(&svm->vcpu);
1696 svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
1697 svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
1698 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
1699 if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
1700 nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
1701 nested_vmcb->control.int_ctl);
1702 }
1703 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
1704 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
1705 else
1706 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
1707
1708 nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
1709 nested_vmcb->control.exit_int_info,
1710 nested_vmcb->control.int_state);
1711
1712 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
1713 svm->vmcb->control.int_state = nested_vmcb->control.int_state;
1714 svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
1715 if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
1716 nsvm_printk("Injecting Event: 0x%x\n",
1717 nested_vmcb->control.event_inj);
1718 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
1719 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
1720
1721 svm->vcpu.arch.hflags |= HF_GIF_MASK;
1722
1723 return 0;
1724}
1725
1726static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
1727{
1728 to_vmcb->save.fs = from_vmcb->save.fs;
1729 to_vmcb->save.gs = from_vmcb->save.gs;
1730 to_vmcb->save.tr = from_vmcb->save.tr;
1731 to_vmcb->save.ldtr = from_vmcb->save.ldtr;
1732 to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
1733 to_vmcb->save.star = from_vmcb->save.star;
1734 to_vmcb->save.lstar = from_vmcb->save.lstar;
1735 to_vmcb->save.cstar = from_vmcb->save.cstar;
1736 to_vmcb->save.sfmask = from_vmcb->save.sfmask;
1737 to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
1738 to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
1739 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
1740
1741 return 1;
1742}
1743
1744static int nested_svm_vmload(struct vcpu_svm *svm, void *nested_vmcb,
1745 void *arg2, void *opaque)
1746{
1747 return nested_svm_vmloadsave((struct vmcb *)nested_vmcb, svm->vmcb);
1748}
1749
1750static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
1751 void *arg2, void *opaque)
1752{
1753 return nested_svm_vmloadsave(svm->vmcb, (struct vmcb *)nested_vmcb);
1754}
1755
1756static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1757{
1758 if (nested_svm_check_permissions(svm))
1759 return 1;
1760
1761 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1762 skip_emulated_instruction(&svm->vcpu);
1763
1764 nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmload);
1765
1766 return 1;
1767}
1768
1769static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1770{
1771 if (nested_svm_check_permissions(svm))
1772 return 1;
1773
1774 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1775 skip_emulated_instruction(&svm->vcpu);
1776
1777 nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmsave);
1778
1779 return 1;
1780}
1781
1782static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1783{
1784 nsvm_printk("VMrun\n");
1785 if (nested_svm_check_permissions(svm))
1786 return 1;
1787
1788 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1789 skip_emulated_instruction(&svm->vcpu);
1790
1791 if (nested_svm_do(svm, svm->vmcb->save.rax, 0,
1792 NULL, nested_svm_vmrun))
1793 return 1;
1794
1795 if (nested_svm_do(svm, svm->nested_vmcb_msrpm, 0,
1796 NULL, nested_svm_vmrun_msrpm))
1797 return 1;
1798
1799 return 1;
1800}
1801
1802static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1803{
1804 if (nested_svm_check_permissions(svm))
1805 return 1;
1806
1807 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1808 skip_emulated_instruction(&svm->vcpu);
1809
1810 svm->vcpu.arch.hflags |= HF_GIF_MASK;
1811
1812 return 1;
1813}
1814
1815static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1816{
1817 if (nested_svm_check_permissions(svm))
1818 return 1;
1819
1820 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1821 skip_emulated_instruction(&svm->vcpu);
1822
1823 svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
1824
1825 /* After a CLGI no interrupts should come */
1826 svm_clear_vintr(svm);
1827 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
1828
1829 return 1;
1830}
1831
1142static int invalid_op_interception(struct vcpu_svm *svm, 1832static int invalid_op_interception(struct vcpu_svm *svm,
1143 struct kvm_run *kvm_run) 1833 struct kvm_run *kvm_run)
1144{ 1834{
@@ -1250,6 +1940,15 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
1250 case MSR_IA32_LASTINTTOIP: 1940 case MSR_IA32_LASTINTTOIP:
1251 *data = svm->vmcb->save.last_excp_to; 1941 *data = svm->vmcb->save.last_excp_to;
1252 break; 1942 break;
1943 case MSR_VM_HSAVE_PA:
1944 *data = svm->hsave_msr;
1945 break;
1946 case MSR_VM_CR:
1947 *data = 0;
1948 break;
1949 case MSR_IA32_UCODE_REV:
1950 *data = 0x01000065;
1951 break;
1253 default: 1952 default:
1254 return kvm_get_msr_common(vcpu, ecx, data); 1953 return kvm_get_msr_common(vcpu, ecx, data);
1255 } 1954 }
@@ -1344,6 +2043,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
1344 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data); 2043 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data);
1345 2044
1346 break; 2045 break;
2046 case MSR_VM_HSAVE_PA:
2047 svm->hsave_msr = data;
2048 break;
1347 default: 2049 default:
1348 return kvm_set_msr_common(vcpu, ecx, data); 2050 return kvm_set_msr_common(vcpu, ecx, data);
1349 } 2051 }
@@ -1380,7 +2082,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
1380{ 2082{
1381 KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler); 2083 KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);
1382 2084
1383 svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR); 2085 svm_clear_vintr(svm);
1384 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; 2086 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
1385 /* 2087 /*
1386 * If the user space waits to inject interrupts, exit as soon as 2088 * If the user space waits to inject interrupts, exit as soon as
@@ -1417,6 +2119,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
1417 [SVM_EXIT_WRITE_DR3] = emulate_on_interception, 2119 [SVM_EXIT_WRITE_DR3] = emulate_on_interception,
1418 [SVM_EXIT_WRITE_DR5] = emulate_on_interception, 2120 [SVM_EXIT_WRITE_DR5] = emulate_on_interception,
1419 [SVM_EXIT_WRITE_DR7] = emulate_on_interception, 2121 [SVM_EXIT_WRITE_DR7] = emulate_on_interception,
2122 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
2123 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
1420 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, 2124 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
1421 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, 2125 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
1422 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, 2126 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
@@ -1436,12 +2140,12 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
1436 [SVM_EXIT_MSR] = msr_interception, 2140 [SVM_EXIT_MSR] = msr_interception,
1437 [SVM_EXIT_TASK_SWITCH] = task_switch_interception, 2141 [SVM_EXIT_TASK_SWITCH] = task_switch_interception,
1438 [SVM_EXIT_SHUTDOWN] = shutdown_interception, 2142 [SVM_EXIT_SHUTDOWN] = shutdown_interception,
1439 [SVM_EXIT_VMRUN] = invalid_op_interception, 2143 [SVM_EXIT_VMRUN] = vmrun_interception,
1440 [SVM_EXIT_VMMCALL] = vmmcall_interception, 2144 [SVM_EXIT_VMMCALL] = vmmcall_interception,
1441 [SVM_EXIT_VMLOAD] = invalid_op_interception, 2145 [SVM_EXIT_VMLOAD] = vmload_interception,
1442 [SVM_EXIT_VMSAVE] = invalid_op_interception, 2146 [SVM_EXIT_VMSAVE] = vmsave_interception,
1443 [SVM_EXIT_STGI] = invalid_op_interception, 2147 [SVM_EXIT_STGI] = stgi_interception,
1444 [SVM_EXIT_CLGI] = invalid_op_interception, 2148 [SVM_EXIT_CLGI] = clgi_interception,
1445 [SVM_EXIT_SKINIT] = invalid_op_interception, 2149 [SVM_EXIT_SKINIT] = invalid_op_interception,
1446 [SVM_EXIT_WBINVD] = emulate_on_interception, 2150 [SVM_EXIT_WBINVD] = emulate_on_interception,
1447 [SVM_EXIT_MONITOR] = invalid_op_interception, 2151 [SVM_EXIT_MONITOR] = invalid_op_interception,
@@ -1457,6 +2161,17 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1457 KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip, 2161 KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip,
1458 (u32)((u64)svm->vmcb->save.rip >> 32), entryexit); 2162 (u32)((u64)svm->vmcb->save.rip >> 32), entryexit);
1459 2163
2164 if (is_nested(svm)) {
2165 nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
2166 exit_code, svm->vmcb->control.exit_info_1,
2167 svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
2168 if (nested_svm_exit_handled(svm, true)) {
2169 nested_svm_vmexit(svm);
2170 nsvm_printk("-> #VMEXIT\n");
2171 return 1;
2172 }
2173 }
2174
1460 if (npt_enabled) { 2175 if (npt_enabled) {
1461 int mmu_reload = 0; 2176 int mmu_reload = 0;
1462 if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) { 2177 if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
@@ -1544,6 +2259,8 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
1544{ 2259{
1545 struct vcpu_svm *svm = to_svm(vcpu); 2260 struct vcpu_svm *svm = to_svm(vcpu);
1546 2261
2262 nested_svm_intr(svm);
2263
1547 svm_inject_irq(svm, irq); 2264 svm_inject_irq(svm, irq);
1548} 2265}
1549 2266
@@ -1589,11 +2306,17 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
1589 if (!kvm_cpu_has_interrupt(vcpu)) 2306 if (!kvm_cpu_has_interrupt(vcpu))
1590 goto out; 2307 goto out;
1591 2308
2309 if (nested_svm_intr(svm))
2310 goto out;
2311
2312 if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
2313 goto out;
2314
1592 if (!(vmcb->save.rflags & X86_EFLAGS_IF) || 2315 if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
1593 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || 2316 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
1594 (vmcb->control.event_inj & SVM_EVTINJ_VALID)) { 2317 (vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
1595 /* unable to deliver irq, set pending irq */ 2318 /* unable to deliver irq, set pending irq */
1596 vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR); 2319 svm_set_vintr(svm);
1597 svm_inject_irq(svm, 0x0); 2320 svm_inject_irq(svm, 0x0);
1598 goto out; 2321 goto out;
1599 } 2322 }
@@ -1615,7 +2338,8 @@ static void kvm_reput_irq(struct vcpu_svm *svm)
1615 } 2338 }
1616 2339
1617 svm->vcpu.arch.interrupt_window_open = 2340 svm->vcpu.arch.interrupt_window_open =
1618 !(control->int_state & SVM_INTERRUPT_SHADOW_MASK); 2341 !(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
2342 (svm->vcpu.arch.hflags & HF_GIF_MASK);
1619} 2343}
1620 2344
1621static void svm_do_inject_vector(struct vcpu_svm *svm) 2345static void svm_do_inject_vector(struct vcpu_svm *svm)
@@ -1637,9 +2361,13 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
1637 struct vcpu_svm *svm = to_svm(vcpu); 2361 struct vcpu_svm *svm = to_svm(vcpu);
1638 struct vmcb_control_area *control = &svm->vmcb->control; 2362 struct vmcb_control_area *control = &svm->vmcb->control;
1639 2363
2364 if (nested_svm_intr(svm))
2365 return;
2366
1640 svm->vcpu.arch.interrupt_window_open = 2367 svm->vcpu.arch.interrupt_window_open =
1641 (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && 2368 (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
1642 (svm->vmcb->save.rflags & X86_EFLAGS_IF)); 2369 (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
2370 (svm->vcpu.arch.hflags & HF_GIF_MASK));
1643 2371
1644 if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary) 2372 if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary)
1645 /* 2373 /*
@@ -1652,9 +2380,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
1652 */ 2380 */
1653 if (!svm->vcpu.arch.interrupt_window_open && 2381 if (!svm->vcpu.arch.interrupt_window_open &&
1654 (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window)) 2382 (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
1655 control->intercept |= 1ULL << INTERCEPT_VINTR; 2383 svm_set_vintr(svm);
1656 else 2384 else
1657 control->intercept &= ~(1ULL << INTERCEPT_VINTR); 2385 svm_clear_vintr(svm);
1658} 2386}
1659 2387
1660static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) 2388static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -1662,22 +2390,6 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
1662 return 0; 2390 return 0;
1663} 2391}
1664 2392
1665static void save_db_regs(unsigned long *db_regs)
1666{
1667 asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0]));
1668 asm volatile ("mov %%dr1, %0" : "=r"(db_regs[1]));
1669 asm volatile ("mov %%dr2, %0" : "=r"(db_regs[2]));
1670 asm volatile ("mov %%dr3, %0" : "=r"(db_regs[3]));
1671}
1672
1673static void load_db_regs(unsigned long *db_regs)
1674{
1675 asm volatile ("mov %0, %%dr0" : : "r"(db_regs[0]));
1676 asm volatile ("mov %0, %%dr1" : : "r"(db_regs[1]));
1677 asm volatile ("mov %0, %%dr2" : : "r"(db_regs[2]));
1678 asm volatile ("mov %0, %%dr3" : : "r"(db_regs[3]));
1679}
1680
1681static void svm_flush_tlb(struct kvm_vcpu *vcpu) 2393static void svm_flush_tlb(struct kvm_vcpu *vcpu)
1682{ 2394{
1683 force_new_asid(vcpu); 2395 force_new_asid(vcpu);
@@ -1736,19 +2448,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1736 gs_selector = kvm_read_gs(); 2448 gs_selector = kvm_read_gs();
1737 ldt_selector = kvm_read_ldt(); 2449 ldt_selector = kvm_read_ldt();
1738 svm->host_cr2 = kvm_read_cr2(); 2450 svm->host_cr2 = kvm_read_cr2();
1739 svm->host_dr6 = read_dr6(); 2451 if (!is_nested(svm))
1740 svm->host_dr7 = read_dr7(); 2452 svm->vmcb->save.cr2 = vcpu->arch.cr2;
1741 svm->vmcb->save.cr2 = vcpu->arch.cr2;
1742 /* required for live migration with NPT */ 2453 /* required for live migration with NPT */
1743 if (npt_enabled) 2454 if (npt_enabled)
1744 svm->vmcb->save.cr3 = vcpu->arch.cr3; 2455 svm->vmcb->save.cr3 = vcpu->arch.cr3;
1745 2456
1746 if (svm->vmcb->save.dr7 & 0xff) {
1747 write_dr7(0);
1748 save_db_regs(svm->host_db_regs);
1749 load_db_regs(svm->db_regs);
1750 }
1751
1752 clgi(); 2457 clgi();
1753 2458
1754 local_irq_enable(); 2459 local_irq_enable();
@@ -1824,16 +2529,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1824#endif 2529#endif
1825 ); 2530 );
1826 2531
1827 if ((svm->vmcb->save.dr7 & 0xff))
1828 load_db_regs(svm->host_db_regs);
1829
1830 vcpu->arch.cr2 = svm->vmcb->save.cr2; 2532 vcpu->arch.cr2 = svm->vmcb->save.cr2;
1831 vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; 2533 vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
1832 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; 2534 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
1833 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; 2535 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
1834 2536
1835 write_dr6(svm->host_dr6);
1836 write_dr7(svm->host_dr7);
1837 kvm_write_cr2(svm->host_cr2); 2537 kvm_write_cr2(svm->host_cr2);
1838 2538
1839 kvm_load_fs(fs_selector); 2539 kvm_load_fs(fs_selector);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7611af576829..bb481330716f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -91,6 +91,7 @@ struct vcpu_vmx {
91 } rmode; 91 } rmode;
92 int vpid; 92 int vpid;
93 bool emulation_required; 93 bool emulation_required;
94 enum emulation_result invalid_state_emulation_result;
94 95
95 /* Support for vnmi-less CPUs */ 96 /* Support for vnmi-less CPUs */
96 int soft_vnmi_blocked; 97 int soft_vnmi_blocked;
@@ -189,21 +190,21 @@ static inline int is_page_fault(u32 intr_info)
189{ 190{
190 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 191 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
191 INTR_INFO_VALID_MASK)) == 192 INTR_INFO_VALID_MASK)) ==
192 (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); 193 (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
193} 194}
194 195
195static inline int is_no_device(u32 intr_info) 196static inline int is_no_device(u32 intr_info)
196{ 197{
197 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 198 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
198 INTR_INFO_VALID_MASK)) == 199 INTR_INFO_VALID_MASK)) ==
199 (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); 200 (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
200} 201}
201 202
202static inline int is_invalid_opcode(u32 intr_info) 203static inline int is_invalid_opcode(u32 intr_info)
203{ 204{
204 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 205 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
205 INTR_INFO_VALID_MASK)) == 206 INTR_INFO_VALID_MASK)) ==
206 (INTR_TYPE_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); 207 (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
207} 208}
208 209
209static inline int is_external_interrupt(u32 intr_info) 210static inline int is_external_interrupt(u32 intr_info)
@@ -480,8 +481,13 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
480 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR); 481 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR);
481 if (!vcpu->fpu_active) 482 if (!vcpu->fpu_active)
482 eb |= 1u << NM_VECTOR; 483 eb |= 1u << NM_VECTOR;
483 if (vcpu->guest_debug.enabled) 484 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
484 eb |= 1u << DB_VECTOR; 485 if (vcpu->guest_debug &
486 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
487 eb |= 1u << DB_VECTOR;
488 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
489 eb |= 1u << BP_VECTOR;
490 }
485 if (vcpu->arch.rmode.active) 491 if (vcpu->arch.rmode.active)
486 eb = ~0; 492 eb = ~0;
487 if (vm_need_ept()) 493 if (vm_need_ept())
@@ -747,29 +753,33 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
747 bool has_error_code, u32 error_code) 753 bool has_error_code, u32 error_code)
748{ 754{
749 struct vcpu_vmx *vmx = to_vmx(vcpu); 755 struct vcpu_vmx *vmx = to_vmx(vcpu);
756 u32 intr_info = nr | INTR_INFO_VALID_MASK;
750 757
751 if (has_error_code) 758 if (has_error_code) {
752 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); 759 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
760 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
761 }
753 762
754 if (vcpu->arch.rmode.active) { 763 if (vcpu->arch.rmode.active) {
755 vmx->rmode.irq.pending = true; 764 vmx->rmode.irq.pending = true;
756 vmx->rmode.irq.vector = nr; 765 vmx->rmode.irq.vector = nr;
757 vmx->rmode.irq.rip = kvm_rip_read(vcpu); 766 vmx->rmode.irq.rip = kvm_rip_read(vcpu);
758 if (nr == BP_VECTOR) 767 if (nr == BP_VECTOR || nr == OF_VECTOR)
759 vmx->rmode.irq.rip++; 768 vmx->rmode.irq.rip++;
760 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 769 intr_info |= INTR_TYPE_SOFT_INTR;
761 nr | INTR_TYPE_SOFT_INTR 770 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
762 | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
763 | INTR_INFO_VALID_MASK);
764 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); 771 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
765 kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); 772 kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
766 return; 773 return;
767 } 774 }
768 775
769 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 776 if (nr == BP_VECTOR || nr == OF_VECTOR) {
770 nr | INTR_TYPE_EXCEPTION 777 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
771 | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) 778 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
772 | INTR_INFO_VALID_MASK); 779 } else
780 intr_info |= INTR_TYPE_HARD_EXCEPTION;
781
782 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
773} 783}
774 784
775static bool vmx_exception_injected(struct kvm_vcpu *vcpu) 785static bool vmx_exception_injected(struct kvm_vcpu *vcpu)
@@ -856,11 +866,8 @@ static u64 guest_read_tsc(void)
856 * writes 'guest_tsc' into guest's timestamp counter "register" 866 * writes 'guest_tsc' into guest's timestamp counter "register"
857 * guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc 867 * guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc
858 */ 868 */
859static void guest_write_tsc(u64 guest_tsc) 869static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
860{ 870{
861 u64 host_tsc;
862
863 rdtscll(host_tsc);
864 vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc); 871 vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc);
865} 872}
866 873
@@ -925,14 +932,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
925{ 932{
926 struct vcpu_vmx *vmx = to_vmx(vcpu); 933 struct vcpu_vmx *vmx = to_vmx(vcpu);
927 struct kvm_msr_entry *msr; 934 struct kvm_msr_entry *msr;
935 u64 host_tsc;
928 int ret = 0; 936 int ret = 0;
929 937
930 switch (msr_index) { 938 switch (msr_index) {
931#ifdef CONFIG_X86_64
932 case MSR_EFER: 939 case MSR_EFER:
933 vmx_load_host_state(vmx); 940 vmx_load_host_state(vmx);
934 ret = kvm_set_msr_common(vcpu, msr_index, data); 941 ret = kvm_set_msr_common(vcpu, msr_index, data);
935 break; 942 break;
943#ifdef CONFIG_X86_64
936 case MSR_FS_BASE: 944 case MSR_FS_BASE:
937 vmcs_writel(GUEST_FS_BASE, data); 945 vmcs_writel(GUEST_FS_BASE, data);
938 break; 946 break;
@@ -950,7 +958,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
950 vmcs_writel(GUEST_SYSENTER_ESP, data); 958 vmcs_writel(GUEST_SYSENTER_ESP, data);
951 break; 959 break;
952 case MSR_IA32_TIME_STAMP_COUNTER: 960 case MSR_IA32_TIME_STAMP_COUNTER:
953 guest_write_tsc(data); 961 rdtscll(host_tsc);
962 guest_write_tsc(data, host_tsc);
954 break; 963 break;
955 case MSR_P6_PERFCTR0: 964 case MSR_P6_PERFCTR0:
956 case MSR_P6_PERFCTR1: 965 case MSR_P6_PERFCTR1:
@@ -999,40 +1008,28 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
999 } 1008 }
1000} 1009}
1001 1010
1002static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) 1011static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
1003{ 1012{
1004 unsigned long dr7 = 0x400; 1013 int old_debug = vcpu->guest_debug;
1005 int old_singlestep; 1014 unsigned long flags;
1006
1007 old_singlestep = vcpu->guest_debug.singlestep;
1008
1009 vcpu->guest_debug.enabled = dbg->enabled;
1010 if (vcpu->guest_debug.enabled) {
1011 int i;
1012 1015
1013 dr7 |= 0x200; /* exact */ 1016 vcpu->guest_debug = dbg->control;
1014 for (i = 0; i < 4; ++i) { 1017 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
1015 if (!dbg->breakpoints[i].enabled) 1018 vcpu->guest_debug = 0;
1016 continue;
1017 vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address;
1018 dr7 |= 2 << (i*2); /* global enable */
1019 dr7 |= 0 << (i*4+16); /* execution breakpoint */
1020 }
1021 1019
1022 vcpu->guest_debug.singlestep = dbg->singlestep; 1020 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1023 } else 1021 vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]);
1024 vcpu->guest_debug.singlestep = 0; 1022 else
1025 1023 vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
1026 if (old_singlestep && !vcpu->guest_debug.singlestep) {
1027 unsigned long flags;
1028 1024
1029 flags = vmcs_readl(GUEST_RFLAGS); 1025 flags = vmcs_readl(GUEST_RFLAGS);
1026 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
1027 flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
1028 else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
1030 flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); 1029 flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
1031 vmcs_writel(GUEST_RFLAGS, flags); 1030 vmcs_writel(GUEST_RFLAGS, flags);
1032 }
1033 1031
1034 update_exception_bitmap(vcpu); 1032 update_exception_bitmap(vcpu);
1035 vmcs_writel(GUEST_DR7, dr7);
1036 1033
1037 return 0; 1034 return 0;
1038} 1035}
@@ -1433,6 +1430,29 @@ continue_rmode:
1433 init_rmode(vcpu->kvm); 1430 init_rmode(vcpu->kvm);
1434} 1431}
1435 1432
1433static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
1434{
1435 struct vcpu_vmx *vmx = to_vmx(vcpu);
1436 struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
1437
1438 vcpu->arch.shadow_efer = efer;
1439 if (!msr)
1440 return;
1441 if (efer & EFER_LMA) {
1442 vmcs_write32(VM_ENTRY_CONTROLS,
1443 vmcs_read32(VM_ENTRY_CONTROLS) |
1444 VM_ENTRY_IA32E_MODE);
1445 msr->data = efer;
1446 } else {
1447 vmcs_write32(VM_ENTRY_CONTROLS,
1448 vmcs_read32(VM_ENTRY_CONTROLS) &
1449 ~VM_ENTRY_IA32E_MODE);
1450
1451 msr->data = efer & ~EFER_LME;
1452 }
1453 setup_msrs(vmx);
1454}
1455
1436#ifdef CONFIG_X86_64 1456#ifdef CONFIG_X86_64
1437 1457
1438static void enter_lmode(struct kvm_vcpu *vcpu) 1458static void enter_lmode(struct kvm_vcpu *vcpu)
@@ -1447,13 +1467,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
1447 (guest_tr_ar & ~AR_TYPE_MASK) 1467 (guest_tr_ar & ~AR_TYPE_MASK)
1448 | AR_TYPE_BUSY_64_TSS); 1468 | AR_TYPE_BUSY_64_TSS);
1449 } 1469 }
1450
1451 vcpu->arch.shadow_efer |= EFER_LMA; 1470 vcpu->arch.shadow_efer |= EFER_LMA;
1452 1471 vmx_set_efer(vcpu, vcpu->arch.shadow_efer);
1453 find_msr_entry(to_vmx(vcpu), MSR_EFER)->data |= EFER_LMA | EFER_LME;
1454 vmcs_write32(VM_ENTRY_CONTROLS,
1455 vmcs_read32(VM_ENTRY_CONTROLS)
1456 | VM_ENTRY_IA32E_MODE);
1457} 1472}
1458 1473
1459static void exit_lmode(struct kvm_vcpu *vcpu) 1474static void exit_lmode(struct kvm_vcpu *vcpu)
@@ -1612,30 +1627,6 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1612 vmcs_writel(GUEST_CR4, hw_cr4); 1627 vmcs_writel(GUEST_CR4, hw_cr4);
1613} 1628}
1614 1629
1615static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
1616{
1617 struct vcpu_vmx *vmx = to_vmx(vcpu);
1618 struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
1619
1620 vcpu->arch.shadow_efer = efer;
1621 if (!msr)
1622 return;
1623 if (efer & EFER_LMA) {
1624 vmcs_write32(VM_ENTRY_CONTROLS,
1625 vmcs_read32(VM_ENTRY_CONTROLS) |
1626 VM_ENTRY_IA32E_MODE);
1627 msr->data = efer;
1628
1629 } else {
1630 vmcs_write32(VM_ENTRY_CONTROLS,
1631 vmcs_read32(VM_ENTRY_CONTROLS) &
1632 ~VM_ENTRY_IA32E_MODE);
1633
1634 msr->data = efer & ~EFER_LME;
1635 }
1636 setup_msrs(vmx);
1637}
1638
1639static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) 1630static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
1640{ 1631{
1641 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 1632 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -1653,7 +1644,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
1653 var->limit = vmcs_read32(sf->limit); 1644 var->limit = vmcs_read32(sf->limit);
1654 var->selector = vmcs_read16(sf->selector); 1645 var->selector = vmcs_read16(sf->selector);
1655 ar = vmcs_read32(sf->ar_bytes); 1646 ar = vmcs_read32(sf->ar_bytes);
1656 if (ar & AR_UNUSABLE_MASK) 1647 if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
1657 ar = 0; 1648 ar = 0;
1658 var->type = ar & 15; 1649 var->type = ar & 15;
1659 var->s = (ar >> 4) & 1; 1650 var->s = (ar >> 4) & 1;
@@ -1788,14 +1779,16 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu)
1788 vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); 1779 vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
1789 cs_rpl = cs.selector & SELECTOR_RPL_MASK; 1780 cs_rpl = cs.selector & SELECTOR_RPL_MASK;
1790 1781
1782 if (cs.unusable)
1783 return false;
1791 if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK)) 1784 if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK))
1792 return false; 1785 return false;
1793 if (!cs.s) 1786 if (!cs.s)
1794 return false; 1787 return false;
1795 if (!(~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK))) { 1788 if (cs.type & AR_TYPE_WRITEABLE_MASK) {
1796 if (cs.dpl > cs_rpl) 1789 if (cs.dpl > cs_rpl)
1797 return false; 1790 return false;
1798 } else if (cs.type & AR_TYPE_CODE_MASK) { 1791 } else {
1799 if (cs.dpl != cs_rpl) 1792 if (cs.dpl != cs_rpl)
1800 return false; 1793 return false;
1801 } 1794 }
@@ -1814,7 +1807,9 @@ static bool stack_segment_valid(struct kvm_vcpu *vcpu)
1814 vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); 1807 vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
1815 ss_rpl = ss.selector & SELECTOR_RPL_MASK; 1808 ss_rpl = ss.selector & SELECTOR_RPL_MASK;
1816 1809
1817 if ((ss.type != 3) || (ss.type != 7)) 1810 if (ss.unusable)
1811 return true;
1812 if (ss.type != 3 && ss.type != 7)
1818 return false; 1813 return false;
1819 if (!ss.s) 1814 if (!ss.s)
1820 return false; 1815 return false;
@@ -1834,6 +1829,8 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
1834 vmx_get_segment(vcpu, &var, seg); 1829 vmx_get_segment(vcpu, &var, seg);
1835 rpl = var.selector & SELECTOR_RPL_MASK; 1830 rpl = var.selector & SELECTOR_RPL_MASK;
1836 1831
1832 if (var.unusable)
1833 return true;
1837 if (!var.s) 1834 if (!var.s)
1838 return false; 1835 return false;
1839 if (!var.present) 1836 if (!var.present)
@@ -1855,9 +1852,11 @@ static bool tr_valid(struct kvm_vcpu *vcpu)
1855 1852
1856 vmx_get_segment(vcpu, &tr, VCPU_SREG_TR); 1853 vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
1857 1854
1855 if (tr.unusable)
1856 return false;
1858 if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */ 1857 if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */
1859 return false; 1858 return false;
1860 if ((tr.type != 3) || (tr.type != 11)) /* TODO: Check if guest is in IA32e mode */ 1859 if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
1861 return false; 1860 return false;
1862 if (!tr.present) 1861 if (!tr.present)
1863 return false; 1862 return false;
@@ -1871,6 +1870,8 @@ static bool ldtr_valid(struct kvm_vcpu *vcpu)
1871 1870
1872 vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR); 1871 vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
1873 1872
1873 if (ldtr.unusable)
1874 return true;
1874 if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */ 1875 if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */
1875 return false; 1876 return false;
1876 if (ldtr.type != 2) 1877 if (ldtr.type != 2)
@@ -2112,7 +2113,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2112{ 2113{
2113 u32 host_sysenter_cs, msr_low, msr_high; 2114 u32 host_sysenter_cs, msr_low, msr_high;
2114 u32 junk; 2115 u32 junk;
2115 u64 host_pat; 2116 u64 host_pat, tsc_this, tsc_base;
2116 unsigned long a; 2117 unsigned long a;
2117 struct descriptor_table dt; 2118 struct descriptor_table dt;
2118 int i; 2119 int i;
@@ -2240,6 +2241,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2240 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); 2241 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
2241 vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); 2242 vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
2242 2243
2244 tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
2245 rdtscll(tsc_this);
2246 if (tsc_this < vmx->vcpu.kvm->arch.vm_init_tsc)
2247 tsc_base = tsc_this;
2248
2249 guest_write_tsc(0, tsc_base);
2243 2250
2244 return 0; 2251 return 0;
2245} 2252}
@@ -2319,7 +2326,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2319 kvm_rip_write(vcpu, 0); 2326 kvm_rip_write(vcpu, 0);
2320 kvm_register_write(vcpu, VCPU_REGS_RSP, 0); 2327 kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
2321 2328
2322 /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */
2323 vmcs_writel(GUEST_DR7, 0x400); 2329 vmcs_writel(GUEST_DR7, 0x400);
2324 2330
2325 vmcs_writel(GUEST_GDTR_BASE, 0); 2331 vmcs_writel(GUEST_GDTR_BASE, 0);
@@ -2332,8 +2338,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2332 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); 2338 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
2333 vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); 2339 vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
2334 2340
2335 guest_write_tsc(0);
2336
2337 /* Special registers */ 2341 /* Special registers */
2338 vmcs_write64(GUEST_IA32_DEBUGCTL, 0); 2342 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
2339 2343
@@ -2486,6 +2490,11 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
2486{ 2490{
2487 vmx_update_window_states(vcpu); 2491 vmx_update_window_states(vcpu);
2488 2492
2493 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
2494 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
2495 GUEST_INTR_STATE_STI |
2496 GUEST_INTR_STATE_MOV_SS);
2497
2489 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { 2498 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
2490 if (vcpu->arch.interrupt.pending) { 2499 if (vcpu->arch.interrupt.pending) {
2491 enable_nmi_window(vcpu); 2500 enable_nmi_window(vcpu);
@@ -2536,24 +2545,6 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
2536 return 0; 2545 return 0;
2537} 2546}
2538 2547
2539static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
2540{
2541 struct kvm_guest_debug *dbg = &vcpu->guest_debug;
2542
2543 set_debugreg(dbg->bp[0], 0);
2544 set_debugreg(dbg->bp[1], 1);
2545 set_debugreg(dbg->bp[2], 2);
2546 set_debugreg(dbg->bp[3], 3);
2547
2548 if (dbg->singlestep) {
2549 unsigned long flags;
2550
2551 flags = vmcs_readl(GUEST_RFLAGS);
2552 flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
2553 vmcs_writel(GUEST_RFLAGS, flags);
2554 }
2555}
2556
2557static int handle_rmode_exception(struct kvm_vcpu *vcpu, 2548static int handle_rmode_exception(struct kvm_vcpu *vcpu,
2558 int vec, u32 err_code) 2549 int vec, u32 err_code)
2559{ 2550{
@@ -2570,9 +2561,17 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
2570 * the required debugging infrastructure rework. 2561 * the required debugging infrastructure rework.
2571 */ 2562 */
2572 switch (vec) { 2563 switch (vec) {
2573 case DE_VECTOR:
2574 case DB_VECTOR: 2564 case DB_VECTOR:
2565 if (vcpu->guest_debug &
2566 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
2567 return 0;
2568 kvm_queue_exception(vcpu, vec);
2569 return 1;
2575 case BP_VECTOR: 2570 case BP_VECTOR:
2571 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
2572 return 0;
2573 /* fall through */
2574 case DE_VECTOR:
2576 case OF_VECTOR: 2575 case OF_VECTOR:
2577 case BR_VECTOR: 2576 case BR_VECTOR:
2578 case UD_VECTOR: 2577 case UD_VECTOR:
@@ -2589,8 +2588,8 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
2589static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2588static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2590{ 2589{
2591 struct vcpu_vmx *vmx = to_vmx(vcpu); 2590 struct vcpu_vmx *vmx = to_vmx(vcpu);
2592 u32 intr_info, error_code; 2591 u32 intr_info, ex_no, error_code;
2593 unsigned long cr2, rip; 2592 unsigned long cr2, rip, dr6;
2594 u32 vect_info; 2593 u32 vect_info;
2595 enum emulation_result er; 2594 enum emulation_result er;
2596 2595
@@ -2649,14 +2648,30 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2649 return 1; 2648 return 1;
2650 } 2649 }
2651 2650
2652 if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == 2651 ex_no = intr_info & INTR_INFO_VECTOR_MASK;
2653 (INTR_TYPE_EXCEPTION | 1)) { 2652 switch (ex_no) {
2653 case DB_VECTOR:
2654 dr6 = vmcs_readl(EXIT_QUALIFICATION);
2655 if (!(vcpu->guest_debug &
2656 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
2657 vcpu->arch.dr6 = dr6 | DR6_FIXED_1;
2658 kvm_queue_exception(vcpu, DB_VECTOR);
2659 return 1;
2660 }
2661 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
2662 kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
2663 /* fall through */
2664 case BP_VECTOR:
2654 kvm_run->exit_reason = KVM_EXIT_DEBUG; 2665 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2655 return 0; 2666 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
2667 kvm_run->debug.arch.exception = ex_no;
2668 break;
2669 default:
2670 kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
2671 kvm_run->ex.exception = ex_no;
2672 kvm_run->ex.error_code = error_code;
2673 break;
2656 } 2674 }
2657 kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
2658 kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK;
2659 kvm_run->ex.error_code = error_code;
2660 return 0; 2675 return 0;
2661} 2676}
2662 2677
@@ -2677,7 +2692,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2677static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2692static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2678{ 2693{
2679 unsigned long exit_qualification; 2694 unsigned long exit_qualification;
2680 int size, down, in, string, rep; 2695 int size, in, string;
2681 unsigned port; 2696 unsigned port;
2682 2697
2683 ++vcpu->stat.io_exits; 2698 ++vcpu->stat.io_exits;
@@ -2693,8 +2708,6 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2693 2708
2694 size = (exit_qualification & 7) + 1; 2709 size = (exit_qualification & 7) + 1;
2695 in = (exit_qualification & 8) != 0; 2710 in = (exit_qualification & 8) != 0;
2696 down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0;
2697 rep = (exit_qualification & 32) != 0;
2698 port = exit_qualification >> 16; 2711 port = exit_qualification >> 16;
2699 2712
2700 skip_emulated_instruction(vcpu); 2713 skip_emulated_instruction(vcpu);
@@ -2795,21 +2808,44 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2795 unsigned long val; 2808 unsigned long val;
2796 int dr, reg; 2809 int dr, reg;
2797 2810
2798 /* 2811 dr = vmcs_readl(GUEST_DR7);
2799 * FIXME: this code assumes the host is debugging the guest. 2812 if (dr & DR7_GD) {
2800 * need to deal with guest debugging itself too. 2813 /*
2801 */ 2814 * As the vm-exit takes precedence over the debug trap, we
2815 * need to emulate the latter, either for the host or the
2816 * guest debugging itself.
2817 */
2818 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
2819 kvm_run->debug.arch.dr6 = vcpu->arch.dr6;
2820 kvm_run->debug.arch.dr7 = dr;
2821 kvm_run->debug.arch.pc =
2822 vmcs_readl(GUEST_CS_BASE) +
2823 vmcs_readl(GUEST_RIP);
2824 kvm_run->debug.arch.exception = DB_VECTOR;
2825 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2826 return 0;
2827 } else {
2828 vcpu->arch.dr7 &= ~DR7_GD;
2829 vcpu->arch.dr6 |= DR6_BD;
2830 vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
2831 kvm_queue_exception(vcpu, DB_VECTOR);
2832 return 1;
2833 }
2834 }
2835
2802 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 2836 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
2803 dr = exit_qualification & 7; 2837 dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
2804 reg = (exit_qualification >> 8) & 15; 2838 reg = DEBUG_REG_ACCESS_REG(exit_qualification);
2805 if (exit_qualification & 16) { 2839 if (exit_qualification & TYPE_MOV_FROM_DR) {
2806 /* mov from dr */
2807 switch (dr) { 2840 switch (dr) {
2841 case 0 ... 3:
2842 val = vcpu->arch.db[dr];
2843 break;
2808 case 6: 2844 case 6:
2809 val = 0xffff0ff0; 2845 val = vcpu->arch.dr6;
2810 break; 2846 break;
2811 case 7: 2847 case 7:
2812 val = 0x400; 2848 val = vcpu->arch.dr7;
2813 break; 2849 break;
2814 default: 2850 default:
2815 val = 0; 2851 val = 0;
@@ -2817,7 +2853,38 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2817 kvm_register_write(vcpu, reg, val); 2853 kvm_register_write(vcpu, reg, val);
2818 KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); 2854 KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
2819 } else { 2855 } else {
2820 /* mov to dr */ 2856 val = vcpu->arch.regs[reg];
2857 switch (dr) {
2858 case 0 ... 3:
2859 vcpu->arch.db[dr] = val;
2860 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
2861 vcpu->arch.eff_db[dr] = val;
2862 break;
2863 case 4 ... 5:
2864 if (vcpu->arch.cr4 & X86_CR4_DE)
2865 kvm_queue_exception(vcpu, UD_VECTOR);
2866 break;
2867 case 6:
2868 if (val & 0xffffffff00000000ULL) {
2869 kvm_queue_exception(vcpu, GP_VECTOR);
2870 break;
2871 }
2872 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
2873 break;
2874 case 7:
2875 if (val & 0xffffffff00000000ULL) {
2876 kvm_queue_exception(vcpu, GP_VECTOR);
2877 break;
2878 }
2879 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
2880 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
2881 vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
2882 vcpu->arch.switch_db_regs =
2883 (val & DR7_BP_EN_MASK);
2884 }
2885 break;
2886 }
2887 KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)val, handler);
2821 } 2888 }
2822 skip_emulated_instruction(vcpu); 2889 skip_emulated_instruction(vcpu);
2823 return 1; 2890 return 1;
@@ -2968,17 +3035,25 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2968 } 3035 }
2969 tss_selector = exit_qualification; 3036 tss_selector = exit_qualification;
2970 3037
2971 return kvm_task_switch(vcpu, tss_selector, reason); 3038 if (!kvm_task_switch(vcpu, tss_selector, reason))
3039 return 0;
3040
3041 /* clear all local breakpoint enable flags */
3042 vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55);
3043
3044 /*
3045 * TODO: What about debug traps on tss switch?
3046 * Are we supposed to inject them and update dr6?
3047 */
3048
3049 return 1;
2972} 3050}
2973 3051
2974static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3052static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2975{ 3053{
2976 u64 exit_qualification; 3054 u64 exit_qualification;
2977 enum emulation_result er;
2978 gpa_t gpa; 3055 gpa_t gpa;
2979 unsigned long hva;
2980 int gla_validity; 3056 int gla_validity;
2981 int r;
2982 3057
2983 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); 3058 exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
2984 3059
@@ -3001,32 +3076,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3001 } 3076 }
3002 3077
3003 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); 3078 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
3004 hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT); 3079 return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
3005 if (!kvm_is_error_hva(hva)) {
3006 r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
3007 if (r < 0) {
3008 printk(KERN_ERR "EPT: Not enough memory!\n");
3009 return -ENOMEM;
3010 }
3011 return 1;
3012 } else {
3013 /* must be MMIO */
3014 er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
3015
3016 if (er == EMULATE_FAIL) {
3017 printk(KERN_ERR
3018 "EPT: Fail to handle EPT violation vmexit!er is %d\n",
3019 er);
3020 printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
3021 (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
3022 (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
3023 printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
3024 (long unsigned int)exit_qualification);
3025 return -ENOTSUPP;
3026 } else if (er == EMULATE_DO_MMIO)
3027 return 0;
3028 }
3029 return 1;
3030} 3080}
3031 3081
3032static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3082static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -3046,7 +3096,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
3046 struct kvm_run *kvm_run) 3096 struct kvm_run *kvm_run)
3047{ 3097{
3048 struct vcpu_vmx *vmx = to_vmx(vcpu); 3098 struct vcpu_vmx *vmx = to_vmx(vcpu);
3049 int err; 3099 enum emulation_result err = EMULATE_DONE;
3050 3100
3051 preempt_enable(); 3101 preempt_enable();
3052 local_irq_enable(); 3102 local_irq_enable();
@@ -3071,10 +3121,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
3071 local_irq_disable(); 3121 local_irq_disable();
3072 preempt_disable(); 3122 preempt_disable();
3073 3123
3074 /* Guest state should be valid now except if we need to 3124 vmx->invalid_state_emulation_result = err;
3075 * emulate an MMIO */
3076 if (guest_state_valid(vcpu))
3077 vmx->emulation_required = 0;
3078} 3125}
3079 3126
3080/* 3127/*
@@ -3123,8 +3170,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3123 3170
3124 /* If we need to emulate an MMIO from handle_invalid_guest_state 3171 /* If we need to emulate an MMIO from handle_invalid_guest_state
3125 * we just return 0 */ 3172 * we just return 0 */
3126 if (vmx->emulation_required && emulate_invalid_guest_state) 3173 if (vmx->emulation_required && emulate_invalid_guest_state) {
3127 return 0; 3174 if (guest_state_valid(vcpu))
3175 vmx->emulation_required = 0;
3176 return vmx->invalid_state_emulation_result != EMULATE_DO_MMIO;
3177 }
3128 3178
3129 /* Access CR3 don't cause VMExit in paging mode, so we need 3179 /* Access CR3 don't cause VMExit in paging mode, so we need
3130 * to sync with guest real CR3. */ 3180 * to sync with guest real CR3. */
@@ -3238,7 +3288,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3238 vmx->vcpu.arch.nmi_injected = false; 3288 vmx->vcpu.arch.nmi_injected = false;
3239 } 3289 }
3240 kvm_clear_exception_queue(&vmx->vcpu); 3290 kvm_clear_exception_queue(&vmx->vcpu);
3241 if (idtv_info_valid && type == INTR_TYPE_EXCEPTION) { 3291 if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION ||
3292 type == INTR_TYPE_SOFT_EXCEPTION)) {
3242 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { 3293 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
3243 error = vmcs_read32(IDT_VECTORING_ERROR_CODE); 3294 error = vmcs_read32(IDT_VECTORING_ERROR_CODE);
3244 kvm_queue_exception_e(&vmx->vcpu, vector, error); 3295 kvm_queue_exception_e(&vmx->vcpu, vector, error);
@@ -3259,6 +3310,11 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
3259 3310
3260 vmx_update_window_states(vcpu); 3311 vmx_update_window_states(vcpu);
3261 3312
3313 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
3314 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
3315 GUEST_INTR_STATE_STI |
3316 GUEST_INTR_STATE_MOV_SS);
3317
3262 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { 3318 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
3263 if (vcpu->arch.interrupt.pending) { 3319 if (vcpu->arch.interrupt.pending) {
3264 enable_nmi_window(vcpu); 3320 enable_nmi_window(vcpu);
@@ -3347,6 +3403,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3347 */ 3403 */
3348 vmcs_writel(HOST_CR0, read_cr0()); 3404 vmcs_writel(HOST_CR0, read_cr0());
3349 3405
3406 set_debugreg(vcpu->arch.dr6, 6);
3407
3350 asm( 3408 asm(
3351 /* Store host registers */ 3409 /* Store host registers */
3352 "push %%"R"dx; push %%"R"bp;" 3410 "push %%"R"dx; push %%"R"bp;"
@@ -3441,6 +3499,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3441 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); 3499 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
3442 vcpu->arch.regs_dirty = 0; 3500 vcpu->arch.regs_dirty = 0;
3443 3501
3502 get_debugreg(vcpu->arch.dr6, 6);
3503
3444 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); 3504 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
3445 if (vmx->rmode.irq.pending) 3505 if (vmx->rmode.irq.pending)
3446 fixup_rmode_irq(vmx); 3506 fixup_rmode_irq(vmx);
@@ -3595,7 +3655,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
3595 .vcpu_put = vmx_vcpu_put, 3655 .vcpu_put = vmx_vcpu_put,
3596 3656
3597 .set_guest_debug = set_guest_debug, 3657 .set_guest_debug = set_guest_debug,
3598 .guest_debug_pre = kvm_guest_debug_pre,
3599 .get_msr = vmx_get_msr, 3658 .get_msr = vmx_get_msr,
3600 .set_msr = vmx_set_msr, 3659 .set_msr = vmx_set_msr,
3601 .get_segment_base = vmx_get_segment_base, 3660 .get_segment_base = vmx_get_segment_base,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 758b7a155ae9..8ca100a9ecac 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -36,6 +36,7 @@
36#include <linux/highmem.h> 36#include <linux/highmem.h>
37#include <linux/iommu.h> 37#include <linux/iommu.h>
38#include <linux/intel-iommu.h> 38#include <linux/intel-iommu.h>
39#include <linux/cpufreq.h>
39 40
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41#include <asm/msr.h> 42#include <asm/msr.h>
@@ -69,6 +70,8 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
69 70
70static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 71static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
71 struct kvm_cpuid_entry2 __user *entries); 72 struct kvm_cpuid_entry2 __user *entries);
73struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
74 u32 function, u32 index);
72 75
73struct kvm_x86_ops *kvm_x86_ops; 76struct kvm_x86_ops *kvm_x86_ops;
74EXPORT_SYMBOL_GPL(kvm_x86_ops); 77EXPORT_SYMBOL_GPL(kvm_x86_ops);
@@ -173,6 +176,7 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
173 u32 error_code) 176 u32 error_code)
174{ 177{
175 ++vcpu->stat.pf_guest; 178 ++vcpu->stat.pf_guest;
179
176 if (vcpu->arch.exception.pending) { 180 if (vcpu->arch.exception.pending) {
177 if (vcpu->arch.exception.nr == PF_VECTOR) { 181 if (vcpu->arch.exception.nr == PF_VECTOR) {
178 printk(KERN_DEBUG "kvm: inject_page_fault:" 182 printk(KERN_DEBUG "kvm: inject_page_fault:"
@@ -361,6 +365,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
361 } 365 }
362 kvm_x86_ops->set_cr4(vcpu, cr4); 366 kvm_x86_ops->set_cr4(vcpu, cr4);
363 vcpu->arch.cr4 = cr4; 367 vcpu->arch.cr4 = cr4;
368 vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled;
364 kvm_mmu_sync_global(vcpu); 369 kvm_mmu_sync_global(vcpu);
365 kvm_mmu_reset_context(vcpu); 370 kvm_mmu_reset_context(vcpu);
366} 371}
@@ -442,6 +447,11 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
442} 447}
443EXPORT_SYMBOL_GPL(kvm_get_cr8); 448EXPORT_SYMBOL_GPL(kvm_get_cr8);
444 449
450static inline u32 bit(int bitno)
451{
452 return 1 << (bitno & 31);
453}
454
445/* 455/*
446 * List of msr numbers which we expose to userspace through KVM_GET_MSRS 456 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
447 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. 457 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
@@ -456,7 +466,7 @@ static u32 msrs_to_save[] = {
456 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, 466 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
457#endif 467#endif
458 MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 468 MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
459 MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT 469 MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
460}; 470};
461 471
462static unsigned num_msrs_to_save; 472static unsigned num_msrs_to_save;
@@ -481,6 +491,28 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
481 return; 491 return;
482 } 492 }
483 493
494 if (efer & EFER_FFXSR) {
495 struct kvm_cpuid_entry2 *feat;
496
497 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
498 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
499 printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n");
500 kvm_inject_gp(vcpu, 0);
501 return;
502 }
503 }
504
505 if (efer & EFER_SVME) {
506 struct kvm_cpuid_entry2 *feat;
507
508 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
509 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
510 printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
511 kvm_inject_gp(vcpu, 0);
512 return;
513 }
514 }
515
484 kvm_x86_ops->set_efer(vcpu, efer); 516 kvm_x86_ops->set_efer(vcpu, efer);
485 517
486 efer &= ~EFER_LMA; 518 efer &= ~EFER_LMA;
@@ -586,6 +618,8 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *
586 hv_clock->tsc_to_system_mul); 618 hv_clock->tsc_to_system_mul);
587} 619}
588 620
621static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
622
589static void kvm_write_guest_time(struct kvm_vcpu *v) 623static void kvm_write_guest_time(struct kvm_vcpu *v)
590{ 624{
591 struct timespec ts; 625 struct timespec ts;
@@ -596,9 +630,9 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
596 if ((!vcpu->time_page)) 630 if ((!vcpu->time_page))
597 return; 631 return;
598 632
599 if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) { 633 if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) {
600 kvm_set_time_scale(tsc_khz, &vcpu->hv_clock); 634 kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock);
601 vcpu->hv_clock_tsc_khz = tsc_khz; 635 vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz);
602 } 636 }
603 637
604 /* Keep irq disabled to prevent changes to the clock */ 638 /* Keep irq disabled to prevent changes to the clock */
@@ -629,6 +663,16 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
629 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); 663 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
630} 664}
631 665
666static int kvm_request_guest_time_update(struct kvm_vcpu *v)
667{
668 struct kvm_vcpu_arch *vcpu = &v->arch;
669
670 if (!vcpu->time_page)
671 return 0;
672 set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests);
673 return 1;
674}
675
632static bool msr_mtrr_valid(unsigned msr) 676static bool msr_mtrr_valid(unsigned msr)
633{ 677{
634 switch (msr) { 678 switch (msr) {
@@ -722,6 +766,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
722 break; 766 break;
723 case MSR_IA32_UCODE_REV: 767 case MSR_IA32_UCODE_REV:
724 case MSR_IA32_UCODE_WRITE: 768 case MSR_IA32_UCODE_WRITE:
769 case MSR_VM_HSAVE_PA:
725 break; 770 break;
726 case 0x200 ... 0x2ff: 771 case 0x200 ... 0x2ff:
727 return set_msr_mtrr(vcpu, msr, data); 772 return set_msr_mtrr(vcpu, msr, data);
@@ -758,7 +803,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
758 vcpu->arch.time_page = NULL; 803 vcpu->arch.time_page = NULL;
759 } 804 }
760 805
761 kvm_write_guest_time(vcpu); 806 kvm_request_guest_time_update(vcpu);
762 break; 807 break;
763 } 808 }
764 default: 809 default:
@@ -843,6 +888,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
843 case MSR_IA32_LASTBRANCHTOIP: 888 case MSR_IA32_LASTBRANCHTOIP:
844 case MSR_IA32_LASTINTFROMIP: 889 case MSR_IA32_LASTINTFROMIP:
845 case MSR_IA32_LASTINTTOIP: 890 case MSR_IA32_LASTINTTOIP:
891 case MSR_VM_HSAVE_PA:
846 data = 0; 892 data = 0;
847 break; 893 break;
848 case MSR_MTRRcap: 894 case MSR_MTRRcap:
@@ -967,10 +1013,13 @@ int kvm_dev_ioctl_check_extension(long ext)
967 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 1013 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
968 case KVM_CAP_SET_TSS_ADDR: 1014 case KVM_CAP_SET_TSS_ADDR:
969 case KVM_CAP_EXT_CPUID: 1015 case KVM_CAP_EXT_CPUID:
1016 case KVM_CAP_CLOCKSOURCE:
970 case KVM_CAP_PIT: 1017 case KVM_CAP_PIT:
971 case KVM_CAP_NOP_IO_DELAY: 1018 case KVM_CAP_NOP_IO_DELAY:
972 case KVM_CAP_MP_STATE: 1019 case KVM_CAP_MP_STATE:
973 case KVM_CAP_SYNC_MMU: 1020 case KVM_CAP_SYNC_MMU:
1021 case KVM_CAP_REINJECT_CONTROL:
1022 case KVM_CAP_IRQ_INJECT_STATUS:
974 r = 1; 1023 r = 1;
975 break; 1024 break;
976 case KVM_CAP_COALESCED_MMIO: 1025 case KVM_CAP_COALESCED_MMIO:
@@ -991,9 +1040,6 @@ int kvm_dev_ioctl_check_extension(long ext)
991 case KVM_CAP_IOMMU: 1040 case KVM_CAP_IOMMU:
992 r = iommu_found(); 1041 r = iommu_found();
993 break; 1042 break;
994 case KVM_CAP_CLOCKSOURCE:
995 r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
996 break;
997 default: 1043 default:
998 r = 0; 1044 r = 0;
999 break; 1045 break;
@@ -1044,7 +1090,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
1044 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 1090 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
1045 goto out; 1091 goto out;
1046 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, 1092 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
1047 cpuid_arg->entries); 1093 cpuid_arg->entries);
1048 if (r) 1094 if (r)
1049 goto out; 1095 goto out;
1050 1096
@@ -1064,7 +1110,7 @@ out:
1064void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1110void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1065{ 1111{
1066 kvm_x86_ops->vcpu_load(vcpu, cpu); 1112 kvm_x86_ops->vcpu_load(vcpu, cpu);
1067 kvm_write_guest_time(vcpu); 1113 kvm_request_guest_time_update(vcpu);
1068} 1114}
1069 1115
1070void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 1116void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1142,8 +1188,8 @@ out:
1142} 1188}
1143 1189
1144static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, 1190static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
1145 struct kvm_cpuid2 *cpuid, 1191 struct kvm_cpuid2 *cpuid,
1146 struct kvm_cpuid_entry2 __user *entries) 1192 struct kvm_cpuid_entry2 __user *entries)
1147{ 1193{
1148 int r; 1194 int r;
1149 1195
@@ -1162,8 +1208,8 @@ out:
1162} 1208}
1163 1209
1164static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, 1210static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
1165 struct kvm_cpuid2 *cpuid, 1211 struct kvm_cpuid2 *cpuid,
1166 struct kvm_cpuid_entry2 __user *entries) 1212 struct kvm_cpuid_entry2 __user *entries)
1167{ 1213{
1168 int r; 1214 int r;
1169 1215
@@ -1172,7 +1218,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
1172 goto out; 1218 goto out;
1173 r = -EFAULT; 1219 r = -EFAULT;
1174 if (copy_to_user(entries, &vcpu->arch.cpuid_entries, 1220 if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
1175 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) 1221 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
1176 goto out; 1222 goto out;
1177 return 0; 1223 return 0;
1178 1224
@@ -1181,18 +1227,13 @@ out:
1181 return r; 1227 return r;
1182} 1228}
1183 1229
1184static inline u32 bit(int bitno)
1185{
1186 return 1 << (bitno & 31);
1187}
1188
1189static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, 1230static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1190 u32 index) 1231 u32 index)
1191{ 1232{
1192 entry->function = function; 1233 entry->function = function;
1193 entry->index = index; 1234 entry->index = index;
1194 cpuid_count(entry->function, entry->index, 1235 cpuid_count(entry->function, entry->index,
1195 &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); 1236 &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
1196 entry->flags = 0; 1237 entry->flags = 0;
1197} 1238}
1198 1239
@@ -1222,15 +1263,17 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1222#ifdef CONFIG_X86_64 1263#ifdef CONFIG_X86_64
1223 bit(X86_FEATURE_LM) | 1264 bit(X86_FEATURE_LM) |
1224#endif 1265#endif
1266 bit(X86_FEATURE_FXSR_OPT) |
1225 bit(X86_FEATURE_MMXEXT) | 1267 bit(X86_FEATURE_MMXEXT) |
1226 bit(X86_FEATURE_3DNOWEXT) | 1268 bit(X86_FEATURE_3DNOWEXT) |
1227 bit(X86_FEATURE_3DNOW); 1269 bit(X86_FEATURE_3DNOW);
1228 const u32 kvm_supported_word3_x86_features = 1270 const u32 kvm_supported_word3_x86_features =
1229 bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16); 1271 bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16);
1230 const u32 kvm_supported_word6_x86_features = 1272 const u32 kvm_supported_word6_x86_features =
1231 bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY); 1273 bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) |
1274 bit(X86_FEATURE_SVM);
1232 1275
1233 /* all func 2 cpuid_count() should be called on the same cpu */ 1276 /* all calls to cpuid_count() should be made on the same cpu */
1234 get_cpu(); 1277 get_cpu();
1235 do_cpuid_1_ent(entry, function, index); 1278 do_cpuid_1_ent(entry, function, index);
1236 ++*nent; 1279 ++*nent;
@@ -1304,7 +1347,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1304} 1347}
1305 1348
1306static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 1349static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
1307 struct kvm_cpuid_entry2 __user *entries) 1350 struct kvm_cpuid_entry2 __user *entries)
1308{ 1351{
1309 struct kvm_cpuid_entry2 *cpuid_entries; 1352 struct kvm_cpuid_entry2 *cpuid_entries;
1310 int limit, nent = 0, r = -E2BIG; 1353 int limit, nent = 0, r = -E2BIG;
@@ -1321,7 +1364,7 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
1321 limit = cpuid_entries[0].eax; 1364 limit = cpuid_entries[0].eax;
1322 for (func = 1; func <= limit && nent < cpuid->nent; ++func) 1365 for (func = 1; func <= limit && nent < cpuid->nent; ++func)
1323 do_cpuid_ent(&cpuid_entries[nent], func, 0, 1366 do_cpuid_ent(&cpuid_entries[nent], func, 0,
1324 &nent, cpuid->nent); 1367 &nent, cpuid->nent);
1325 r = -E2BIG; 1368 r = -E2BIG;
1326 if (nent >= cpuid->nent) 1369 if (nent >= cpuid->nent)
1327 goto out_free; 1370 goto out_free;
@@ -1330,10 +1373,10 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
1330 limit = cpuid_entries[nent - 1].eax; 1373 limit = cpuid_entries[nent - 1].eax;
1331 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) 1374 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
1332 do_cpuid_ent(&cpuid_entries[nent], func, 0, 1375 do_cpuid_ent(&cpuid_entries[nent], func, 0,
1333 &nent, cpuid->nent); 1376 &nent, cpuid->nent);
1334 r = -EFAULT; 1377 r = -EFAULT;
1335 if (copy_to_user(entries, cpuid_entries, 1378 if (copy_to_user(entries, cpuid_entries,
1336 nent * sizeof(struct kvm_cpuid_entry2))) 1379 nent * sizeof(struct kvm_cpuid_entry2)))
1337 goto out_free; 1380 goto out_free;
1338 cpuid->nent = nent; 1381 cpuid->nent = nent;
1339 r = 0; 1382 r = 0;
@@ -1477,7 +1520,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1477 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 1520 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
1478 goto out; 1521 goto out;
1479 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid, 1522 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
1480 cpuid_arg->entries); 1523 cpuid_arg->entries);
1481 if (r) 1524 if (r)
1482 goto out; 1525 goto out;
1483 break; 1526 break;
@@ -1490,7 +1533,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1490 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 1533 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
1491 goto out; 1534 goto out;
1492 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid, 1535 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
1493 cpuid_arg->entries); 1536 cpuid_arg->entries);
1494 if (r) 1537 if (r)
1495 goto out; 1538 goto out;
1496 r = -EFAULT; 1539 r = -EFAULT;
@@ -1710,6 +1753,15 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
1710 return r; 1753 return r;
1711} 1754}
1712 1755
1756static int kvm_vm_ioctl_reinject(struct kvm *kvm,
1757 struct kvm_reinject_control *control)
1758{
1759 if (!kvm->arch.vpit)
1760 return -ENXIO;
1761 kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
1762 return 0;
1763}
1764
1713/* 1765/*
1714 * Get (and clear) the dirty memory log for a memory slot. 1766 * Get (and clear) the dirty memory log for a memory slot.
1715 */ 1767 */
@@ -1807,13 +1859,26 @@ long kvm_arch_vm_ioctl(struct file *filp,
1807 } 1859 }
1808 } else 1860 } else
1809 goto out; 1861 goto out;
1862 r = kvm_setup_default_irq_routing(kvm);
1863 if (r) {
1864 kfree(kvm->arch.vpic);
1865 kfree(kvm->arch.vioapic);
1866 goto out;
1867 }
1810 break; 1868 break;
1811 case KVM_CREATE_PIT: 1869 case KVM_CREATE_PIT:
1870 mutex_lock(&kvm->lock);
1871 r = -EEXIST;
1872 if (kvm->arch.vpit)
1873 goto create_pit_unlock;
1812 r = -ENOMEM; 1874 r = -ENOMEM;
1813 kvm->arch.vpit = kvm_create_pit(kvm); 1875 kvm->arch.vpit = kvm_create_pit(kvm);
1814 if (kvm->arch.vpit) 1876 if (kvm->arch.vpit)
1815 r = 0; 1877 r = 0;
1878 create_pit_unlock:
1879 mutex_unlock(&kvm->lock);
1816 break; 1880 break;
1881 case KVM_IRQ_LINE_STATUS:
1817 case KVM_IRQ_LINE: { 1882 case KVM_IRQ_LINE: {
1818 struct kvm_irq_level irq_event; 1883 struct kvm_irq_level irq_event;
1819 1884
@@ -1821,10 +1886,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
1821 if (copy_from_user(&irq_event, argp, sizeof irq_event)) 1886 if (copy_from_user(&irq_event, argp, sizeof irq_event))
1822 goto out; 1887 goto out;
1823 if (irqchip_in_kernel(kvm)) { 1888 if (irqchip_in_kernel(kvm)) {
1889 __s32 status;
1824 mutex_lock(&kvm->lock); 1890 mutex_lock(&kvm->lock);
1825 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1891 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
1826 irq_event.irq, irq_event.level); 1892 irq_event.irq, irq_event.level);
1827 mutex_unlock(&kvm->lock); 1893 mutex_unlock(&kvm->lock);
1894 if (ioctl == KVM_IRQ_LINE_STATUS) {
1895 irq_event.status = status;
1896 if (copy_to_user(argp, &irq_event,
1897 sizeof irq_event))
1898 goto out;
1899 }
1828 r = 0; 1900 r = 0;
1829 } 1901 }
1830 break; 1902 break;
@@ -1907,6 +1979,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
1907 r = 0; 1979 r = 0;
1908 break; 1980 break;
1909 } 1981 }
1982 case KVM_REINJECT_CONTROL: {
1983 struct kvm_reinject_control control;
1984 r = -EFAULT;
1985 if (copy_from_user(&control, argp, sizeof(control)))
1986 goto out;
1987 r = kvm_vm_ioctl_reinject(kvm, &control);
1988 if (r)
1989 goto out;
1990 r = 0;
1991 break;
1992 }
1910 default: 1993 default:
1911 ; 1994 ;
1912 } 1995 }
@@ -1960,10 +2043,38 @@ static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
1960 return dev; 2043 return dev;
1961} 2044}
1962 2045
1963int emulator_read_std(unsigned long addr, 2046static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
1964 void *val, 2047 struct kvm_vcpu *vcpu)
1965 unsigned int bytes, 2048{
1966 struct kvm_vcpu *vcpu) 2049 void *data = val;
2050 int r = X86EMUL_CONTINUE;
2051
2052 while (bytes) {
2053 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
2054 unsigned offset = addr & (PAGE_SIZE-1);
2055 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
2056 int ret;
2057
2058 if (gpa == UNMAPPED_GVA) {
2059 r = X86EMUL_PROPAGATE_FAULT;
2060 goto out;
2061 }
2062 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
2063 if (ret < 0) {
2064 r = X86EMUL_UNHANDLEABLE;
2065 goto out;
2066 }
2067
2068 bytes -= toread;
2069 data += toread;
2070 addr += toread;
2071 }
2072out:
2073 return r;
2074}
2075
2076static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
2077 struct kvm_vcpu *vcpu)
1967{ 2078{
1968 void *data = val; 2079 void *data = val;
1969 int r = X86EMUL_CONTINUE; 2080 int r = X86EMUL_CONTINUE;
@@ -1971,27 +2082,27 @@ int emulator_read_std(unsigned long addr,
1971 while (bytes) { 2082 while (bytes) {
1972 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 2083 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
1973 unsigned offset = addr & (PAGE_SIZE-1); 2084 unsigned offset = addr & (PAGE_SIZE-1);
1974 unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset); 2085 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
1975 int ret; 2086 int ret;
1976 2087
1977 if (gpa == UNMAPPED_GVA) { 2088 if (gpa == UNMAPPED_GVA) {
1978 r = X86EMUL_PROPAGATE_FAULT; 2089 r = X86EMUL_PROPAGATE_FAULT;
1979 goto out; 2090 goto out;
1980 } 2091 }
1981 ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy); 2092 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
1982 if (ret < 0) { 2093 if (ret < 0) {
1983 r = X86EMUL_UNHANDLEABLE; 2094 r = X86EMUL_UNHANDLEABLE;
1984 goto out; 2095 goto out;
1985 } 2096 }
1986 2097
1987 bytes -= tocopy; 2098 bytes -= towrite;
1988 data += tocopy; 2099 data += towrite;
1989 addr += tocopy; 2100 addr += towrite;
1990 } 2101 }
1991out: 2102out:
1992 return r; 2103 return r;
1993} 2104}
1994EXPORT_SYMBOL_GPL(emulator_read_std); 2105
1995 2106
1996static int emulator_read_emulated(unsigned long addr, 2107static int emulator_read_emulated(unsigned long addr,
1997 void *val, 2108 void *val,
@@ -2013,8 +2124,8 @@ static int emulator_read_emulated(unsigned long addr,
2013 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 2124 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
2014 goto mmio; 2125 goto mmio;
2015 2126
2016 if (emulator_read_std(addr, val, bytes, vcpu) 2127 if (kvm_read_guest_virt(addr, val, bytes, vcpu)
2017 == X86EMUL_CONTINUE) 2128 == X86EMUL_CONTINUE)
2018 return X86EMUL_CONTINUE; 2129 return X86EMUL_CONTINUE;
2019 if (gpa == UNMAPPED_GVA) 2130 if (gpa == UNMAPPED_GVA)
2020 return X86EMUL_PROPAGATE_FAULT; 2131 return X86EMUL_PROPAGATE_FAULT;
@@ -2217,7 +2328,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
2217 2328
2218 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); 2329 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
2219 2330
2220 emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu); 2331 kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu);
2221 2332
2222 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", 2333 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
2223 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); 2334 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
@@ -2225,7 +2336,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
2225EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); 2336EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
2226 2337
2227static struct x86_emulate_ops emulate_ops = { 2338static struct x86_emulate_ops emulate_ops = {
2228 .read_std = emulator_read_std, 2339 .read_std = kvm_read_guest_virt,
2229 .read_emulated = emulator_read_emulated, 2340 .read_emulated = emulator_read_emulated,
2230 .write_emulated = emulator_write_emulated, 2341 .write_emulated = emulator_write_emulated,
2231 .cmpxchg_emulated = emulator_cmpxchg_emulated, 2342 .cmpxchg_emulated = emulator_cmpxchg_emulated,
@@ -2327,40 +2438,19 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
2327} 2438}
2328EXPORT_SYMBOL_GPL(emulate_instruction); 2439EXPORT_SYMBOL_GPL(emulate_instruction);
2329 2440
2330static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
2331{
2332 int i;
2333
2334 for (i = 0; i < ARRAY_SIZE(vcpu->arch.pio.guest_pages); ++i)
2335 if (vcpu->arch.pio.guest_pages[i]) {
2336 kvm_release_page_dirty(vcpu->arch.pio.guest_pages[i]);
2337 vcpu->arch.pio.guest_pages[i] = NULL;
2338 }
2339}
2340
2341static int pio_copy_data(struct kvm_vcpu *vcpu) 2441static int pio_copy_data(struct kvm_vcpu *vcpu)
2342{ 2442{
2343 void *p = vcpu->arch.pio_data; 2443 void *p = vcpu->arch.pio_data;
2344 void *q; 2444 gva_t q = vcpu->arch.pio.guest_gva;
2345 unsigned bytes; 2445 unsigned bytes;
2346 int nr_pages = vcpu->arch.pio.guest_pages[1] ? 2 : 1; 2446 int ret;
2347 2447
2348 q = vmap(vcpu->arch.pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
2349 PAGE_KERNEL);
2350 if (!q) {
2351 free_pio_guest_pages(vcpu);
2352 return -ENOMEM;
2353 }
2354 q += vcpu->arch.pio.guest_page_offset;
2355 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; 2448 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
2356 if (vcpu->arch.pio.in) 2449 if (vcpu->arch.pio.in)
2357 memcpy(q, p, bytes); 2450 ret = kvm_write_guest_virt(q, p, bytes, vcpu);
2358 else 2451 else
2359 memcpy(p, q, bytes); 2452 ret = kvm_read_guest_virt(q, p, bytes, vcpu);
2360 q -= vcpu->arch.pio.guest_page_offset; 2453 return ret;
2361 vunmap(q);
2362 free_pio_guest_pages(vcpu);
2363 return 0;
2364} 2454}
2365 2455
2366int complete_pio(struct kvm_vcpu *vcpu) 2456int complete_pio(struct kvm_vcpu *vcpu)
@@ -2471,7 +2561,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2471 vcpu->arch.pio.in = in; 2561 vcpu->arch.pio.in = in;
2472 vcpu->arch.pio.string = 0; 2562 vcpu->arch.pio.string = 0;
2473 vcpu->arch.pio.down = 0; 2563 vcpu->arch.pio.down = 0;
2474 vcpu->arch.pio.guest_page_offset = 0;
2475 vcpu->arch.pio.rep = 0; 2564 vcpu->arch.pio.rep = 0;
2476 2565
2477 if (vcpu->run->io.direction == KVM_EXIT_IO_IN) 2566 if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
@@ -2499,9 +2588,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2499 gva_t address, int rep, unsigned port) 2588 gva_t address, int rep, unsigned port)
2500{ 2589{
2501 unsigned now, in_page; 2590 unsigned now, in_page;
2502 int i, ret = 0; 2591 int ret = 0;
2503 int nr_pages = 1;
2504 struct page *page;
2505 struct kvm_io_device *pio_dev; 2592 struct kvm_io_device *pio_dev;
2506 2593
2507 vcpu->run->exit_reason = KVM_EXIT_IO; 2594 vcpu->run->exit_reason = KVM_EXIT_IO;
@@ -2513,7 +2600,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2513 vcpu->arch.pio.in = in; 2600 vcpu->arch.pio.in = in;
2514 vcpu->arch.pio.string = 1; 2601 vcpu->arch.pio.string = 1;
2515 vcpu->arch.pio.down = down; 2602 vcpu->arch.pio.down = down;
2516 vcpu->arch.pio.guest_page_offset = offset_in_page(address);
2517 vcpu->arch.pio.rep = rep; 2603 vcpu->arch.pio.rep = rep;
2518 2604
2519 if (vcpu->run->io.direction == KVM_EXIT_IO_IN) 2605 if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
@@ -2533,15 +2619,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2533 else 2619 else
2534 in_page = offset_in_page(address) + size; 2620 in_page = offset_in_page(address) + size;
2535 now = min(count, (unsigned long)in_page / size); 2621 now = min(count, (unsigned long)in_page / size);
2536 if (!now) { 2622 if (!now)
2537 /*
2538 * String I/O straddles page boundary. Pin two guest pages
2539 * so that we satisfy atomicity constraints. Do just one
2540 * transaction to avoid complexity.
2541 */
2542 nr_pages = 2;
2543 now = 1; 2623 now = 1;
2544 }
2545 if (down) { 2624 if (down) {
2546 /* 2625 /*
2547 * String I/O in reverse. Yuck. Kill the guest, fix later. 2626 * String I/O in reverse. Yuck. Kill the guest, fix later.
@@ -2556,15 +2635,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2556 if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count) 2635 if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
2557 kvm_x86_ops->skip_emulated_instruction(vcpu); 2636 kvm_x86_ops->skip_emulated_instruction(vcpu);
2558 2637
2559 for (i = 0; i < nr_pages; ++i) { 2638 vcpu->arch.pio.guest_gva = address;
2560 page = gva_to_page(vcpu, address + i * PAGE_SIZE);
2561 vcpu->arch.pio.guest_pages[i] = page;
2562 if (!page) {
2563 kvm_inject_gp(vcpu, 0);
2564 free_pio_guest_pages(vcpu);
2565 return 1;
2566 }
2567 }
2568 2639
2569 pio_dev = vcpu_find_pio_dev(vcpu, port, 2640 pio_dev = vcpu_find_pio_dev(vcpu, port,
2570 vcpu->arch.pio.cur_count, 2641 vcpu->arch.pio.cur_count,
@@ -2572,7 +2643,11 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2572 if (!vcpu->arch.pio.in) { 2643 if (!vcpu->arch.pio.in) {
2573 /* string PIO write */ 2644 /* string PIO write */
2574 ret = pio_copy_data(vcpu); 2645 ret = pio_copy_data(vcpu);
2575 if (ret >= 0 && pio_dev) { 2646 if (ret == X86EMUL_PROPAGATE_FAULT) {
2647 kvm_inject_gp(vcpu, 0);
2648 return 1;
2649 }
2650 if (ret == 0 && pio_dev) {
2576 pio_string_write(pio_dev, vcpu); 2651 pio_string_write(pio_dev, vcpu);
2577 complete_pio(vcpu); 2652 complete_pio(vcpu);
2578 if (vcpu->arch.pio.count == 0) 2653 if (vcpu->arch.pio.count == 0)
@@ -2587,9 +2662,72 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2587} 2662}
2588EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); 2663EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
2589 2664
2665static void bounce_off(void *info)
2666{
2667 /* nothing */
2668}
2669
2670static unsigned int ref_freq;
2671static unsigned long tsc_khz_ref;
2672
2673static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
2674 void *data)
2675{
2676 struct cpufreq_freqs *freq = data;
2677 struct kvm *kvm;
2678 struct kvm_vcpu *vcpu;
2679 int i, send_ipi = 0;
2680
2681 if (!ref_freq)
2682 ref_freq = freq->old;
2683
2684 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
2685 return 0;
2686 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
2687 return 0;
2688 per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
2689
2690 spin_lock(&kvm_lock);
2691 list_for_each_entry(kvm, &vm_list, vm_list) {
2692 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
2693 vcpu = kvm->vcpus[i];
2694 if (!vcpu)
2695 continue;
2696 if (vcpu->cpu != freq->cpu)
2697 continue;
2698 if (!kvm_request_guest_time_update(vcpu))
2699 continue;
2700 if (vcpu->cpu != smp_processor_id())
2701 send_ipi++;
2702 }
2703 }
2704 spin_unlock(&kvm_lock);
2705
2706 if (freq->old < freq->new && send_ipi) {
2707 /*
2708 * We upscale the frequency. Must make the guest
2709 * doesn't see old kvmclock values while running with
2710 * the new frequency, otherwise we risk the guest sees
2711 * time go backwards.
2712 *
2713 * In case we update the frequency for another cpu
2714 * (which might be in guest context) send an interrupt
2715 * to kick the cpu out of guest context. Next time
2716 * guest context is entered kvmclock will be updated,
2717 * so the guest will not see stale values.
2718 */
2719 smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
2720 }
2721 return 0;
2722}
2723
2724static struct notifier_block kvmclock_cpufreq_notifier_block = {
2725 .notifier_call = kvmclock_cpufreq_notifier
2726};
2727
2590int kvm_arch_init(void *opaque) 2728int kvm_arch_init(void *opaque)
2591{ 2729{
2592 int r; 2730 int r, cpu;
2593 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; 2731 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
2594 2732
2595 if (kvm_x86_ops) { 2733 if (kvm_x86_ops) {
@@ -2620,6 +2758,15 @@ int kvm_arch_init(void *opaque)
2620 kvm_mmu_set_base_ptes(PT_PRESENT_MASK); 2758 kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
2621 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, 2759 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
2622 PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); 2760 PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
2761
2762 for_each_possible_cpu(cpu)
2763 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
2764 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
2765 tsc_khz_ref = tsc_khz;
2766 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
2767 CPUFREQ_TRANSITION_NOTIFIER);
2768 }
2769
2623 return 0; 2770 return 0;
2624 2771
2625out: 2772out:
@@ -2827,25 +2974,20 @@ static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
2827 if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index) 2974 if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
2828 return 0; 2975 return 0;
2829 if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) && 2976 if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
2830 !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT)) 2977 !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
2831 return 0; 2978 return 0;
2832 return 1; 2979 return 1;
2833} 2980}
2834 2981
2835void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) 2982struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
2983 u32 function, u32 index)
2836{ 2984{
2837 int i; 2985 int i;
2838 u32 function, index; 2986 struct kvm_cpuid_entry2 *best = NULL;
2839 struct kvm_cpuid_entry2 *e, *best;
2840 2987
2841 function = kvm_register_read(vcpu, VCPU_REGS_RAX);
2842 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
2843 kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
2844 kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
2845 kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
2846 kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
2847 best = NULL;
2848 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { 2988 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
2989 struct kvm_cpuid_entry2 *e;
2990
2849 e = &vcpu->arch.cpuid_entries[i]; 2991 e = &vcpu->arch.cpuid_entries[i];
2850 if (is_matching_cpuid_entry(e, function, index)) { 2992 if (is_matching_cpuid_entry(e, function, index)) {
2851 if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) 2993 if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
@@ -2860,6 +3002,21 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
2860 if (!best || e->function > best->function) 3002 if (!best || e->function > best->function)
2861 best = e; 3003 best = e;
2862 } 3004 }
3005 return best;
3006}
3007
3008void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
3009{
3010 u32 function, index;
3011 struct kvm_cpuid_entry2 *best;
3012
3013 function = kvm_register_read(vcpu, VCPU_REGS_RAX);
3014 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
3015 kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
3016 kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
3017 kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
3018 kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
3019 best = kvm_find_cpuid_entry(vcpu, function, index);
2863 if (best) { 3020 if (best) {
2864 kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); 3021 kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
2865 kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); 3022 kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
@@ -2945,6 +3102,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2945 if (vcpu->requests) { 3102 if (vcpu->requests) {
2946 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) 3103 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
2947 __kvm_migrate_timers(vcpu); 3104 __kvm_migrate_timers(vcpu);
3105 if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
3106 kvm_write_guest_time(vcpu);
2948 if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) 3107 if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
2949 kvm_mmu_sync_roots(vcpu); 3108 kvm_mmu_sync_roots(vcpu);
2950 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 3109 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
@@ -2979,9 +3138,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2979 goto out; 3138 goto out;
2980 } 3139 }
2981 3140
2982 if (vcpu->guest_debug.enabled)
2983 kvm_x86_ops->guest_debug_pre(vcpu);
2984
2985 vcpu->guest_mode = 1; 3141 vcpu->guest_mode = 1;
2986 /* 3142 /*
2987 * Make sure that guest_mode assignment won't happen after 3143 * Make sure that guest_mode assignment won't happen after
@@ -3002,10 +3158,34 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3002 3158
3003 kvm_guest_enter(); 3159 kvm_guest_enter();
3004 3160
3161 get_debugreg(vcpu->arch.host_dr6, 6);
3162 get_debugreg(vcpu->arch.host_dr7, 7);
3163 if (unlikely(vcpu->arch.switch_db_regs)) {
3164 get_debugreg(vcpu->arch.host_db[0], 0);
3165 get_debugreg(vcpu->arch.host_db[1], 1);
3166 get_debugreg(vcpu->arch.host_db[2], 2);
3167 get_debugreg(vcpu->arch.host_db[3], 3);
3168
3169 set_debugreg(0, 7);
3170 set_debugreg(vcpu->arch.eff_db[0], 0);
3171 set_debugreg(vcpu->arch.eff_db[1], 1);
3172 set_debugreg(vcpu->arch.eff_db[2], 2);
3173 set_debugreg(vcpu->arch.eff_db[3], 3);
3174 }
3005 3175
3006 KVMTRACE_0D(VMENTRY, vcpu, entryexit); 3176 KVMTRACE_0D(VMENTRY, vcpu, entryexit);
3007 kvm_x86_ops->run(vcpu, kvm_run); 3177 kvm_x86_ops->run(vcpu, kvm_run);
3008 3178
3179 if (unlikely(vcpu->arch.switch_db_regs)) {
3180 set_debugreg(0, 7);
3181 set_debugreg(vcpu->arch.host_db[0], 0);
3182 set_debugreg(vcpu->arch.host_db[1], 1);
3183 set_debugreg(vcpu->arch.host_db[2], 2);
3184 set_debugreg(vcpu->arch.host_db[3], 3);
3185 }
3186 set_debugreg(vcpu->arch.host_dr6, 6);
3187 set_debugreg(vcpu->arch.host_dr7, 7);
3188
3009 vcpu->guest_mode = 0; 3189 vcpu->guest_mode = 0;
3010 local_irq_enable(); 3190 local_irq_enable();
3011 3191
@@ -3192,7 +3372,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3192 /* 3372 /*
3193 * Don't leak debug flags in case they were set for guest debugging 3373 * Don't leak debug flags in case they were set for guest debugging
3194 */ 3374 */
3195 if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep) 3375 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
3196 regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); 3376 regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
3197 3377
3198 vcpu_put(vcpu); 3378 vcpu_put(vcpu);
@@ -3811,15 +3991,32 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3811 return 0; 3991 return 0;
3812} 3992}
3813 3993
3814int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, 3994int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3815 struct kvm_debug_guest *dbg) 3995 struct kvm_guest_debug *dbg)
3816{ 3996{
3817 int r; 3997 int i, r;
3818 3998
3819 vcpu_load(vcpu); 3999 vcpu_load(vcpu);
3820 4000
4001 if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
4002 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
4003 for (i = 0; i < KVM_NR_DB_REGS; ++i)
4004 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
4005 vcpu->arch.switch_db_regs =
4006 (dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
4007 } else {
4008 for (i = 0; i < KVM_NR_DB_REGS; i++)
4009 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
4010 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
4011 }
4012
3821 r = kvm_x86_ops->set_guest_debug(vcpu, dbg); 4013 r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
3822 4014
4015 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
4016 kvm_queue_exception(vcpu, DB_VECTOR);
4017 else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
4018 kvm_queue_exception(vcpu, BP_VECTOR);
4019
3823 vcpu_put(vcpu); 4020 vcpu_put(vcpu);
3824 4021
3825 return r; 4022 return r;
@@ -4007,6 +4204,11 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
4007 vcpu->arch.nmi_pending = false; 4204 vcpu->arch.nmi_pending = false;
4008 vcpu->arch.nmi_injected = false; 4205 vcpu->arch.nmi_injected = false;
4009 4206
4207 vcpu->arch.switch_db_regs = 0;
4208 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
4209 vcpu->arch.dr6 = DR6_FIXED_1;
4210 vcpu->arch.dr7 = DR7_FIXED_1;
4211
4010 return kvm_x86_ops->vcpu_reset(vcpu); 4212 return kvm_x86_ops->vcpu_reset(vcpu);
4011} 4213}
4012 4214
@@ -4100,6 +4302,8 @@ struct kvm *kvm_arch_create_vm(void)
4100 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 4302 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
4101 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); 4303 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
4102 4304
4305 rdtscll(kvm->arch.vm_init_tsc);
4306
4103 return kvm; 4307 return kvm;
4104} 4308}
4105 4309
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index d174db7a3370..ca91749d2083 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -178,7 +178,7 @@ static u32 opcode_table[256] = {
178 0, ImplicitOps | Stack, 0, 0, 178 0, ImplicitOps | Stack, 0, 0,
179 ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, 179 ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
180 /* 0xC8 - 0xCF */ 180 /* 0xC8 - 0xCF */
181 0, 0, 0, 0, 0, 0, 0, 0, 181 0, 0, 0, ImplicitOps | Stack, 0, 0, 0, 0,
182 /* 0xD0 - 0xD7 */ 182 /* 0xD0 - 0xD7 */
183 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, 183 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
184 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, 184 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
@@ -1136,18 +1136,19 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
1136} 1136}
1137 1137
1138static int emulate_pop(struct x86_emulate_ctxt *ctxt, 1138static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1139 struct x86_emulate_ops *ops) 1139 struct x86_emulate_ops *ops,
1140 void *dest, int len)
1140{ 1141{
1141 struct decode_cache *c = &ctxt->decode; 1142 struct decode_cache *c = &ctxt->decode;
1142 int rc; 1143 int rc;
1143 1144
1144 rc = ops->read_emulated(register_address(c, ss_base(ctxt), 1145 rc = ops->read_emulated(register_address(c, ss_base(ctxt),
1145 c->regs[VCPU_REGS_RSP]), 1146 c->regs[VCPU_REGS_RSP]),
1146 &c->src.val, c->src.bytes, ctxt->vcpu); 1147 dest, len, ctxt->vcpu);
1147 if (rc != 0) 1148 if (rc != 0)
1148 return rc; 1149 return rc;
1149 1150
1150 register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes); 1151 register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
1151 return rc; 1152 return rc;
1152} 1153}
1153 1154
@@ -1157,11 +1158,9 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1157 struct decode_cache *c = &ctxt->decode; 1158 struct decode_cache *c = &ctxt->decode;
1158 int rc; 1159 int rc;
1159 1160
1160 c->src.bytes = c->dst.bytes; 1161 rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
1161 rc = emulate_pop(ctxt, ops);
1162 if (rc != 0) 1162 if (rc != 0)
1163 return rc; 1163 return rc;
1164 c->dst.val = c->src.val;
1165 return 0; 1164 return 0;
1166} 1165}
1167 1166
@@ -1279,6 +1278,25 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1279 return 0; 1278 return 0;
1280} 1279}
1281 1280
1281static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
1282 struct x86_emulate_ops *ops)
1283{
1284 struct decode_cache *c = &ctxt->decode;
1285 int rc;
1286 unsigned long cs;
1287
1288 rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
1289 if (rc)
1290 return rc;
1291 if (c->op_bytes == 4)
1292 c->eip = (u32)c->eip;
1293 rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1294 if (rc)
1295 return rc;
1296 rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS);
1297 return rc;
1298}
1299
1282static inline int writeback(struct x86_emulate_ctxt *ctxt, 1300static inline int writeback(struct x86_emulate_ctxt *ctxt,
1283 struct x86_emulate_ops *ops) 1301 struct x86_emulate_ops *ops)
1284{ 1302{
@@ -1467,11 +1485,9 @@ special_insn:
1467 break; 1485 break;
1468 case 0x58 ... 0x5f: /* pop reg */ 1486 case 0x58 ... 0x5f: /* pop reg */
1469 pop_instruction: 1487 pop_instruction:
1470 c->src.bytes = c->op_bytes; 1488 rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
1471 rc = emulate_pop(ctxt, ops);
1472 if (rc != 0) 1489 if (rc != 0)
1473 goto done; 1490 goto done;
1474 c->dst.val = c->src.val;
1475 break; 1491 break;
1476 case 0x63: /* movsxd */ 1492 case 0x63: /* movsxd */
1477 if (ctxt->mode != X86EMUL_MODE_PROT64) 1493 if (ctxt->mode != X86EMUL_MODE_PROT64)
@@ -1738,6 +1754,11 @@ special_insn:
1738 mov: 1754 mov:
1739 c->dst.val = c->src.val; 1755 c->dst.val = c->src.val;
1740 break; 1756 break;
1757 case 0xcb: /* ret far */
1758 rc = emulate_ret_far(ctxt, ops);
1759 if (rc)
1760 goto done;
1761 break;
1741 case 0xd0 ... 0xd1: /* Grp2 */ 1762 case 0xd0 ... 0xd1: /* Grp2 */
1742 c->src.val = 1; 1763 c->src.val = 1;
1743 emulate_grp2(ctxt); 1764 emulate_grp2(ctxt);
@@ -1908,11 +1929,16 @@ twobyte_insn:
1908 c->dst.type = OP_NONE; 1929 c->dst.type = OP_NONE;
1909 break; 1930 break;
1910 case 3: /* lidt/vmmcall */ 1931 case 3: /* lidt/vmmcall */
1911 if (c->modrm_mod == 3 && c->modrm_rm == 1) { 1932 if (c->modrm_mod == 3) {
1912 rc = kvm_fix_hypercall(ctxt->vcpu); 1933 switch (c->modrm_rm) {
1913 if (rc) 1934 case 1:
1914 goto done; 1935 rc = kvm_fix_hypercall(ctxt->vcpu);
1915 kvm_emulate_hypercall(ctxt->vcpu); 1936 if (rc)
1937 goto done;
1938 break;
1939 default:
1940 goto cannot_emulate;
1941 }
1916 } else { 1942 } else {
1917 rc = read_descriptor(ctxt, ops, c->src.ptr, 1943 rc = read_descriptor(ctxt, ops, c->src.ptr,
1918 &size, &address, 1944 &size, &address,
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 0424326f1679..311a073afe8a 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -48,7 +48,10 @@ struct kvm_irq_level {
48 * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47.. 48 * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
49 * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23.. 49 * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
50 */ 50 */
51 __u32 irq; 51 union {
52 __u32 irq;
53 __s32 status;
54 };
52 __u32 level; 55 __u32 level;
53}; 56};
54 57
@@ -126,6 +129,7 @@ struct kvm_run {
126 __u64 data_offset; /* relative to kvm_run start */ 129 __u64 data_offset; /* relative to kvm_run start */
127 } io; 130 } io;
128 struct { 131 struct {
132 struct kvm_debug_exit_arch arch;
129 } debug; 133 } debug;
130 /* KVM_EXIT_MMIO */ 134 /* KVM_EXIT_MMIO */
131 struct { 135 struct {
@@ -217,21 +221,6 @@ struct kvm_interrupt {
217 __u32 irq; 221 __u32 irq;
218}; 222};
219 223
220struct kvm_breakpoint {
221 __u32 enabled;
222 __u32 padding;
223 __u64 address;
224};
225
226/* for KVM_DEBUG_GUEST */
227struct kvm_debug_guest {
228 /* int */
229 __u32 enabled;
230 __u32 pad;
231 struct kvm_breakpoint breakpoints[4];
232 __u32 singlestep;
233};
234
235/* for KVM_GET_DIRTY_LOG */ 224/* for KVM_GET_DIRTY_LOG */
236struct kvm_dirty_log { 225struct kvm_dirty_log {
237 __u32 slot; 226 __u32 slot;
@@ -292,6 +281,17 @@ struct kvm_s390_interrupt {
292 __u64 parm64; 281 __u64 parm64;
293}; 282};
294 283
284/* for KVM_SET_GUEST_DEBUG */
285
286#define KVM_GUESTDBG_ENABLE 0x00000001
287#define KVM_GUESTDBG_SINGLESTEP 0x00000002
288
289struct kvm_guest_debug {
290 __u32 control;
291 __u32 pad;
292 struct kvm_guest_debug_arch arch;
293};
294
295#define KVM_TRC_SHIFT 16 295#define KVM_TRC_SHIFT 16
296/* 296/*
297 * kvm trace categories 297 * kvm trace categories
@@ -396,6 +396,57 @@ struct kvm_trace_rec {
396#ifdef __KVM_HAVE_USER_NMI 396#ifdef __KVM_HAVE_USER_NMI
397#define KVM_CAP_USER_NMI 22 397#define KVM_CAP_USER_NMI 22
398#endif 398#endif
399#ifdef __KVM_HAVE_GUEST_DEBUG
400#define KVM_CAP_SET_GUEST_DEBUG 23
401#endif
402#ifdef __KVM_HAVE_PIT
403#define KVM_CAP_REINJECT_CONTROL 24
404#endif
405#ifdef __KVM_HAVE_IOAPIC
406#define KVM_CAP_IRQ_ROUTING 25
407#endif
408#define KVM_CAP_IRQ_INJECT_STATUS 26
409#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
410#define KVM_CAP_DEVICE_DEASSIGNMENT 27
411#endif
412
413#ifdef KVM_CAP_IRQ_ROUTING
414
415struct kvm_irq_routing_irqchip {
416 __u32 irqchip;
417 __u32 pin;
418};
419
420struct kvm_irq_routing_msi {
421 __u32 address_lo;
422 __u32 address_hi;
423 __u32 data;
424 __u32 pad;
425};
426
427/* gsi routing entry types */
428#define KVM_IRQ_ROUTING_IRQCHIP 1
429#define KVM_IRQ_ROUTING_MSI 2
430
431struct kvm_irq_routing_entry {
432 __u32 gsi;
433 __u32 type;
434 __u32 flags;
435 __u32 pad;
436 union {
437 struct kvm_irq_routing_irqchip irqchip;
438 struct kvm_irq_routing_msi msi;
439 __u32 pad[8];
440 } u;
441};
442
443struct kvm_irq_routing {
444 __u32 nr;
445 __u32 flags;
446 struct kvm_irq_routing_entry entries[0];
447};
448
449#endif
399 450
400/* 451/*
401 * ioctls for VM fds 452 * ioctls for VM fds
@@ -421,14 +472,19 @@ struct kvm_trace_rec {
421#define KVM_CREATE_PIT _IO(KVMIO, 0x64) 472#define KVM_CREATE_PIT _IO(KVMIO, 0x64)
422#define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state) 473#define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state)
423#define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state) 474#define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state)
475#define KVM_IRQ_LINE_STATUS _IOWR(KVMIO, 0x67, struct kvm_irq_level)
424#define KVM_REGISTER_COALESCED_MMIO \ 476#define KVM_REGISTER_COALESCED_MMIO \
425 _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) 477 _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone)
426#define KVM_UNREGISTER_COALESCED_MMIO \ 478#define KVM_UNREGISTER_COALESCED_MMIO \
427 _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) 479 _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone)
428#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ 480#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
429 struct kvm_assigned_pci_dev) 481 struct kvm_assigned_pci_dev)
482#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
430#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ 483#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
431 struct kvm_assigned_irq) 484 struct kvm_assigned_irq)
485#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71)
486#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
487 struct kvm_assigned_pci_dev)
432 488
433/* 489/*
434 * ioctls for vcpu fds 490 * ioctls for vcpu fds
@@ -440,7 +496,8 @@ struct kvm_trace_rec {
440#define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs) 496#define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs)
441#define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation) 497#define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation)
442#define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt) 498#define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt)
443#define KVM_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest) 499/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */
500#define KVM_DEBUG_GUEST __KVM_DEPRECATED_DEBUG_GUEST
444#define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) 501#define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs)
445#define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) 502#define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs)
446#define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) 503#define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid)
@@ -469,6 +526,29 @@ struct kvm_trace_rec {
469#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) 526#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state)
470/* Available with KVM_CAP_NMI */ 527/* Available with KVM_CAP_NMI */
471#define KVM_NMI _IO(KVMIO, 0x9a) 528#define KVM_NMI _IO(KVMIO, 0x9a)
529/* Available with KVM_CAP_SET_GUEST_DEBUG */
530#define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug)
531
532/*
533 * Deprecated interfaces
534 */
535struct kvm_breakpoint {
536 __u32 enabled;
537 __u32 padding;
538 __u64 address;
539};
540
541struct kvm_debug_guest {
542 __u32 enabled;
543 __u32 pad;
544 struct kvm_breakpoint breakpoints[4];
545 __u32 singlestep;
546};
547
548#define __KVM_DEPRECATED_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest)
549
550#define KVM_IA64_VCPU_GET_STACK _IOR(KVMIO, 0x9a, void *)
551#define KVM_IA64_VCPU_SET_STACK _IOW(KVMIO, 0x9b, void *)
472 552
473#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) 553#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02)
474#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) 554#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03)
@@ -522,6 +602,7 @@ struct kvm_assigned_irq {
522 602
523#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) 603#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
524 604
605#define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
525#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) 606#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0)
526 607
527#endif 608#endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bf6f703642fc..894a56e365e8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -37,6 +37,7 @@
37#define KVM_REQ_PENDING_TIMER 5 37#define KVM_REQ_PENDING_TIMER 5
38#define KVM_REQ_UNHALT 6 38#define KVM_REQ_UNHALT 6
39#define KVM_REQ_MMU_SYNC 7 39#define KVM_REQ_MMU_SYNC 7
40#define KVM_REQ_KVMCLOCK_UPDATE 8
40 41
41#define KVM_USERSPACE_IRQ_SOURCE_ID 0 42#define KVM_USERSPACE_IRQ_SOURCE_ID 0
42 43
@@ -73,7 +74,7 @@ struct kvm_vcpu {
73 struct kvm_run *run; 74 struct kvm_run *run;
74 int guest_mode; 75 int guest_mode;
75 unsigned long requests; 76 unsigned long requests;
76 struct kvm_guest_debug guest_debug; 77 unsigned long guest_debug;
77 int fpu_active; 78 int fpu_active;
78 int guest_fpu_loaded; 79 int guest_fpu_loaded;
79 wait_queue_head_t wq; 80 wait_queue_head_t wq;
@@ -107,6 +108,20 @@ struct kvm_memory_slot {
107 int user_alloc; 108 int user_alloc;
108}; 109};
109 110
111struct kvm_kernel_irq_routing_entry {
112 u32 gsi;
113 int (*set)(struct kvm_kernel_irq_routing_entry *e,
114 struct kvm *kvm, int level);
115 union {
116 struct {
117 unsigned irqchip;
118 unsigned pin;
119 } irqchip;
120 struct msi_msg msi;
121 };
122 struct list_head link;
123};
124
110struct kvm { 125struct kvm {
111 struct mutex lock; /* protects the vcpus array and APIC accesses */ 126 struct mutex lock; /* protects the vcpus array and APIC accesses */
112 spinlock_t mmu_lock; 127 spinlock_t mmu_lock;
@@ -127,6 +142,11 @@ struct kvm {
127 struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; 142 struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
128#endif 143#endif
129 144
145#ifdef CONFIG_HAVE_KVM_IRQCHIP
146 struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
147 struct hlist_head mask_notifier_list;
148#endif
149
130#ifdef KVM_ARCH_WANT_MMU_NOTIFIER 150#ifdef KVM_ARCH_WANT_MMU_NOTIFIER
131 struct mmu_notifier mmu_notifier; 151 struct mmu_notifier mmu_notifier;
132 unsigned long mmu_notifier_seq; 152 unsigned long mmu_notifier_seq;
@@ -237,7 +257,6 @@ int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
237 int user_alloc); 257 int user_alloc);
238long kvm_arch_vm_ioctl(struct file *filp, 258long kvm_arch_vm_ioctl(struct file *filp,
239 unsigned int ioctl, unsigned long arg); 259 unsigned int ioctl, unsigned long arg);
240void kvm_arch_destroy_vm(struct kvm *kvm);
241 260
242int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); 261int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
243int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); 262int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
@@ -255,8 +274,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
255 struct kvm_mp_state *mp_state); 274 struct kvm_mp_state *mp_state);
256int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 275int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
257 struct kvm_mp_state *mp_state); 276 struct kvm_mp_state *mp_state);
258int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, 277int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
259 struct kvm_debug_guest *dbg); 278 struct kvm_guest_debug *dbg);
260int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); 279int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
261 280
262int kvm_arch_init(void *opaque); 281int kvm_arch_init(void *opaque);
@@ -310,7 +329,6 @@ struct kvm_assigned_dev_kernel {
310 int host_irq; 329 int host_irq;
311 bool host_irq_disabled; 330 bool host_irq_disabled;
312 int guest_irq; 331 int guest_irq;
313 struct msi_msg guest_msi;
314#define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0) 332#define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0)
315#define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1) 333#define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1)
316#define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8) 334#define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8)
@@ -321,8 +339,21 @@ struct kvm_assigned_dev_kernel {
321 struct pci_dev *dev; 339 struct pci_dev *dev;
322 struct kvm *kvm; 340 struct kvm *kvm;
323}; 341};
324void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); 342
325void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); 343struct kvm_irq_mask_notifier {
344 void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
345 int irq;
346 struct hlist_node link;
347};
348
349void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
350 struct kvm_irq_mask_notifier *kimn);
351void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
352 struct kvm_irq_mask_notifier *kimn);
353void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
354
355int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
356void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
326void kvm_register_irq_ack_notifier(struct kvm *kvm, 357void kvm_register_irq_ack_notifier(struct kvm *kvm,
327 struct kvm_irq_ack_notifier *kian); 358 struct kvm_irq_ack_notifier *kian);
328void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian); 359void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
@@ -464,4 +495,21 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se
464} 495}
465#endif 496#endif
466 497
498#ifdef CONFIG_HAVE_KVM_IRQCHIP
499
500#define KVM_MAX_IRQ_ROUTES 1024
501
502int kvm_setup_default_irq_routing(struct kvm *kvm);
503int kvm_set_irq_routing(struct kvm *kvm,
504 const struct kvm_irq_routing_entry *entries,
505 unsigned nr,
506 unsigned flags);
507void kvm_free_irq_routing(struct kvm *kvm);
508
509#else
510
511static inline void kvm_free_irq_routing(struct kvm *kvm) {}
512
513#endif
514
467#endif 515#endif
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 9b6f395c9625..2b8318c83e53 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -40,17 +40,4 @@ typedef unsigned long hfn_t;
40 40
41typedef hfn_t pfn_t; 41typedef hfn_t pfn_t;
42 42
43struct kvm_pio_request {
44 unsigned long count;
45 int cur_count;
46 struct page *guest_pages[2];
47 unsigned guest_page_offset;
48 int in;
49 int port;
50 int size;
51 int string;
52 int down;
53 int rep;
54};
55
56#endif /* __KVM_TYPES_H__ */ 43#endif /* __KVM_TYPES_H__ */
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 23b81cf242af..c3b99def9cbc 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -83,24 +83,28 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
83 return result; 83 return result;
84} 84}
85 85
86static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) 86static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
87{ 87{
88 union ioapic_redir_entry *pent; 88 union ioapic_redir_entry *pent;
89 int injected = -1;
89 90
90 pent = &ioapic->redirtbl[idx]; 91 pent = &ioapic->redirtbl[idx];
91 92
92 if (!pent->fields.mask) { 93 if (!pent->fields.mask) {
93 int injected = ioapic_deliver(ioapic, idx); 94 injected = ioapic_deliver(ioapic, idx);
94 if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) 95 if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
95 pent->fields.remote_irr = 1; 96 pent->fields.remote_irr = 1;
96 } 97 }
97 if (!pent->fields.trig_mode) 98 if (!pent->fields.trig_mode)
98 ioapic->irr &= ~(1 << idx); 99 ioapic->irr &= ~(1 << idx);
100
101 return injected;
99} 102}
100 103
101static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) 104static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
102{ 105{
103 unsigned index; 106 unsigned index;
107 bool mask_before, mask_after;
104 108
105 switch (ioapic->ioregsel) { 109 switch (ioapic->ioregsel) {
106 case IOAPIC_REG_VERSION: 110 case IOAPIC_REG_VERSION:
@@ -120,6 +124,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
120 ioapic_debug("change redir index %x val %x\n", index, val); 124 ioapic_debug("change redir index %x val %x\n", index, val);
121 if (index >= IOAPIC_NUM_PINS) 125 if (index >= IOAPIC_NUM_PINS)
122 return; 126 return;
127 mask_before = ioapic->redirtbl[index].fields.mask;
123 if (ioapic->ioregsel & 1) { 128 if (ioapic->ioregsel & 1) {
124 ioapic->redirtbl[index].bits &= 0xffffffff; 129 ioapic->redirtbl[index].bits &= 0xffffffff;
125 ioapic->redirtbl[index].bits |= (u64) val << 32; 130 ioapic->redirtbl[index].bits |= (u64) val << 32;
@@ -128,6 +133,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
128 ioapic->redirtbl[index].bits |= (u32) val; 133 ioapic->redirtbl[index].bits |= (u32) val;
129 ioapic->redirtbl[index].fields.remote_irr = 0; 134 ioapic->redirtbl[index].fields.remote_irr = 0;
130 } 135 }
136 mask_after = ioapic->redirtbl[index].fields.mask;
137 if (mask_before != mask_after)
138 kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after);
131 if (ioapic->irr & (1 << index)) 139 if (ioapic->irr & (1 << index))
132 ioapic_service(ioapic, index); 140 ioapic_service(ioapic, index);
133 break; 141 break;
@@ -202,7 +210,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
202 u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode; 210 u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
203 u32 deliver_bitmask; 211 u32 deliver_bitmask;
204 struct kvm_vcpu *vcpu; 212 struct kvm_vcpu *vcpu;
205 int vcpu_id, r = 0; 213 int vcpu_id, r = -1;
206 214
207 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " 215 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
208 "vector=%x trig_mode=%x\n", 216 "vector=%x trig_mode=%x\n",
@@ -242,7 +250,9 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
242 deliver_bitmask &= ~(1 << vcpu_id); 250 deliver_bitmask &= ~(1 << vcpu_id);
243 vcpu = ioapic->kvm->vcpus[vcpu_id]; 251 vcpu = ioapic->kvm->vcpus[vcpu_id];
244 if (vcpu) { 252 if (vcpu) {
245 r = ioapic_inj_irq(ioapic, vcpu, vector, 253 if (r < 0)
254 r = 0;
255 r += ioapic_inj_irq(ioapic, vcpu, vector,
246 trig_mode, delivery_mode); 256 trig_mode, delivery_mode);
247 } 257 }
248 } 258 }
@@ -253,8 +263,10 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
253 continue; 263 continue;
254 deliver_bitmask &= ~(1 << vcpu_id); 264 deliver_bitmask &= ~(1 << vcpu_id);
255 vcpu = ioapic->kvm->vcpus[vcpu_id]; 265 vcpu = ioapic->kvm->vcpus[vcpu_id];
256 if (vcpu) 266 if (vcpu) {
257 ioapic_inj_nmi(vcpu); 267 ioapic_inj_nmi(vcpu);
268 r = 1;
269 }
258 else 270 else
259 ioapic_debug("NMI to vcpu %d failed\n", 271 ioapic_debug("NMI to vcpu %d failed\n",
260 vcpu->vcpu_id); 272 vcpu->vcpu_id);
@@ -268,11 +280,12 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
268 return r; 280 return r;
269} 281}
270 282
271void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) 283int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
272{ 284{
273 u32 old_irr = ioapic->irr; 285 u32 old_irr = ioapic->irr;
274 u32 mask = 1 << irq; 286 u32 mask = 1 << irq;
275 union ioapic_redir_entry entry; 287 union ioapic_redir_entry entry;
288 int ret = 1;
276 289
277 if (irq >= 0 && irq < IOAPIC_NUM_PINS) { 290 if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
278 entry = ioapic->redirtbl[irq]; 291 entry = ioapic->redirtbl[irq];
@@ -283,25 +296,26 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
283 ioapic->irr |= mask; 296 ioapic->irr |= mask;
284 if ((!entry.fields.trig_mode && old_irr != ioapic->irr) 297 if ((!entry.fields.trig_mode && old_irr != ioapic->irr)
285 || !entry.fields.remote_irr) 298 || !entry.fields.remote_irr)
286 ioapic_service(ioapic, irq); 299 ret = ioapic_service(ioapic, irq);
287 } 300 }
288 } 301 }
302 return ret;
289} 303}
290 304
291static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi, 305static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
292 int trigger_mode) 306 int trigger_mode)
293{ 307{
294 union ioapic_redir_entry *ent; 308 union ioapic_redir_entry *ent;
295 309
296 ent = &ioapic->redirtbl[gsi]; 310 ent = &ioapic->redirtbl[pin];
297 311
298 kvm_notify_acked_irq(ioapic->kvm, gsi); 312 kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
299 313
300 if (trigger_mode == IOAPIC_LEVEL_TRIG) { 314 if (trigger_mode == IOAPIC_LEVEL_TRIG) {
301 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); 315 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
302 ent->fields.remote_irr = 0; 316 ent->fields.remote_irr = 0;
303 if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) 317 if (!ent->fields.mask && (ioapic->irr & (1 << pin)))
304 ioapic_service(ioapic, gsi); 318 ioapic_service(ioapic, pin);
305 } 319 }
306} 320}
307 321
@@ -426,3 +440,4 @@ int kvm_ioapic_init(struct kvm *kvm)
426 kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev); 440 kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev);
427 return 0; 441 return 0;
428} 442}
443
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 49c9581d2586..a34bd5e6436b 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -83,7 +83,7 @@ struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
83 unsigned long bitmap); 83 unsigned long bitmap);
84void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); 84void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
85int kvm_ioapic_init(struct kvm *kvm); 85int kvm_ioapic_init(struct kvm *kvm);
86void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); 86int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
87void kvm_ioapic_reset(struct kvm_ioapic *ioapic); 87void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
88u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, 88u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
89 u8 dest_mode); 89 u8 dest_mode);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index aa5d1e5c497e..864ac5483baa 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -20,35 +20,132 @@
20 */ 20 */
21 21
22#include <linux/kvm_host.h> 22#include <linux/kvm_host.h>
23
24#include <asm/msidef.h>
25
23#include "irq.h" 26#include "irq.h"
24 27
25#include "ioapic.h" 28#include "ioapic.h"
26 29
27/* This should be called with the kvm->lock mutex held */ 30static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
28void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) 31 struct kvm *kvm, int level)
32{
33#ifdef CONFIG_X86
34 return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level);
35#else
36 return -1;
37#endif
38}
39
40static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
41 struct kvm *kvm, int level)
42{
43 return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
44}
45
46static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
47 struct kvm *kvm, int level)
48{
49 int vcpu_id, r = -1;
50 struct kvm_vcpu *vcpu;
51 struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
52 int dest_id = (e->msi.address_lo & MSI_ADDR_DEST_ID_MASK)
53 >> MSI_ADDR_DEST_ID_SHIFT;
54 int vector = (e->msi.data & MSI_DATA_VECTOR_MASK)
55 >> MSI_DATA_VECTOR_SHIFT;
56 int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
57 (unsigned long *)&e->msi.address_lo);
58 int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
59 (unsigned long *)&e->msi.data);
60 int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
61 (unsigned long *)&e->msi.data);
62 u32 deliver_bitmask;
63
64 BUG_ON(!ioapic);
65
66 deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
67 dest_id, dest_mode);
68 /* IOAPIC delivery mode value is the same as MSI here */
69 switch (delivery_mode) {
70 case IOAPIC_LOWEST_PRIORITY:
71 vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
72 deliver_bitmask);
73 if (vcpu != NULL)
74 r = kvm_apic_set_irq(vcpu, vector, trig_mode);
75 else
76 printk(KERN_INFO "kvm: null lowest priority vcpu!\n");
77 break;
78 case IOAPIC_FIXED:
79 for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
80 if (!(deliver_bitmask & (1 << vcpu_id)))
81 continue;
82 deliver_bitmask &= ~(1 << vcpu_id);
83 vcpu = ioapic->kvm->vcpus[vcpu_id];
84 if (vcpu) {
85 if (r < 0)
86 r = 0;
87 r += kvm_apic_set_irq(vcpu, vector, trig_mode);
88 }
89 }
90 break;
91 default:
92 break;
93 }
94 return r;
95}
96
97/* This should be called with the kvm->lock mutex held
98 * Return value:
99 * < 0 Interrupt was ignored (masked or not delivered for other reasons)
100 * = 0 Interrupt was coalesced (previous irq is still pending)
101 * > 0 Number of CPUs interrupt was delivered to
102 */
103int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
29{ 104{
30 unsigned long *irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; 105 struct kvm_kernel_irq_routing_entry *e;
106 unsigned long *irq_state, sig_level;
107 int ret = -1;
108
109 if (irq < KVM_IOAPIC_NUM_PINS) {
110 irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
31 111
32 /* Logical OR for level trig interrupt */ 112 /* Logical OR for level trig interrupt */
33 if (level) 113 if (level)
34 set_bit(irq_source_id, irq_state); 114 set_bit(irq_source_id, irq_state);
35 else 115 else
36 clear_bit(irq_source_id, irq_state); 116 clear_bit(irq_source_id, irq_state);
117 sig_level = !!(*irq_state);
118 } else /* Deal with MSI/MSI-X */
119 sig_level = 1;
37 120
38 /* Not possible to detect if the guest uses the PIC or the 121 /* Not possible to detect if the guest uses the PIC or the
39 * IOAPIC. So set the bit in both. The guest will ignore 122 * IOAPIC. So set the bit in both. The guest will ignore
40 * writes to the unused one. 123 * writes to the unused one.
41 */ 124 */
42 kvm_ioapic_set_irq(kvm->arch.vioapic, irq, !!(*irq_state)); 125 list_for_each_entry(e, &kvm->irq_routing, link)
43#ifdef CONFIG_X86 126 if (e->gsi == irq) {
44 kvm_pic_set_irq(pic_irqchip(kvm), irq, !!(*irq_state)); 127 int r = e->set(e, kvm, sig_level);
45#endif 128 if (r < 0)
129 continue;
130
131 ret = r + ((ret < 0) ? 0 : ret);
132 }
133 return ret;
46} 134}
47 135
48void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi) 136void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
49{ 137{
138 struct kvm_kernel_irq_routing_entry *e;
50 struct kvm_irq_ack_notifier *kian; 139 struct kvm_irq_ack_notifier *kian;
51 struct hlist_node *n; 140 struct hlist_node *n;
141 unsigned gsi = pin;
142
143 list_for_each_entry(e, &kvm->irq_routing, link)
144 if (e->irqchip.irqchip == irqchip &&
145 e->irqchip.pin == pin) {
146 gsi = e->gsi;
147 break;
148 }
52 149
53 hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link) 150 hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
54 if (kian->gsi == gsi) 151 if (kian->gsi == gsi)
@@ -99,3 +196,177 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
99 clear_bit(irq_source_id, &kvm->arch.irq_states[i]); 196 clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
100 clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); 197 clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
101} 198}
199
200void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
201 struct kvm_irq_mask_notifier *kimn)
202{
203 kimn->irq = irq;
204 hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
205}
206
207void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
208 struct kvm_irq_mask_notifier *kimn)
209{
210 hlist_del(&kimn->link);
211}
212
213void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
214{
215 struct kvm_irq_mask_notifier *kimn;
216 struct hlist_node *n;
217
218 hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
219 if (kimn->irq == irq)
220 kimn->func(kimn, mask);
221}
222
223static void __kvm_free_irq_routing(struct list_head *irq_routing)
224{
225 struct kvm_kernel_irq_routing_entry *e, *n;
226
227 list_for_each_entry_safe(e, n, irq_routing, link)
228 kfree(e);
229}
230
231void kvm_free_irq_routing(struct kvm *kvm)
232{
233 __kvm_free_irq_routing(&kvm->irq_routing);
234}
235
236static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
237 const struct kvm_irq_routing_entry *ue)
238{
239 int r = -EINVAL;
240 int delta;
241
242 e->gsi = ue->gsi;
243 switch (ue->type) {
244 case KVM_IRQ_ROUTING_IRQCHIP:
245 delta = 0;
246 switch (ue->u.irqchip.irqchip) {
247 case KVM_IRQCHIP_PIC_MASTER:
248 e->set = kvm_set_pic_irq;
249 break;
250 case KVM_IRQCHIP_PIC_SLAVE:
251 e->set = kvm_set_pic_irq;
252 delta = 8;
253 break;
254 case KVM_IRQCHIP_IOAPIC:
255 e->set = kvm_set_ioapic_irq;
256 break;
257 default:
258 goto out;
259 }
260 e->irqchip.irqchip = ue->u.irqchip.irqchip;
261 e->irqchip.pin = ue->u.irqchip.pin + delta;
262 break;
263 case KVM_IRQ_ROUTING_MSI:
264 e->set = kvm_set_msi;
265 e->msi.address_lo = ue->u.msi.address_lo;
266 e->msi.address_hi = ue->u.msi.address_hi;
267 e->msi.data = ue->u.msi.data;
268 break;
269 default:
270 goto out;
271 }
272 r = 0;
273out:
274 return r;
275}
276
277
278int kvm_set_irq_routing(struct kvm *kvm,
279 const struct kvm_irq_routing_entry *ue,
280 unsigned nr,
281 unsigned flags)
282{
283 struct list_head irq_list = LIST_HEAD_INIT(irq_list);
284 struct list_head tmp = LIST_HEAD_INIT(tmp);
285 struct kvm_kernel_irq_routing_entry *e = NULL;
286 unsigned i;
287 int r;
288
289 for (i = 0; i < nr; ++i) {
290 r = -EINVAL;
291 if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
292 goto out;
293 if (ue->flags)
294 goto out;
295 r = -ENOMEM;
296 e = kzalloc(sizeof(*e), GFP_KERNEL);
297 if (!e)
298 goto out;
299 r = setup_routing_entry(e, ue);
300 if (r)
301 goto out;
302 ++ue;
303 list_add(&e->link, &irq_list);
304 e = NULL;
305 }
306
307 mutex_lock(&kvm->lock);
308 list_splice(&kvm->irq_routing, &tmp);
309 INIT_LIST_HEAD(&kvm->irq_routing);
310 list_splice(&irq_list, &kvm->irq_routing);
311 INIT_LIST_HEAD(&irq_list);
312 list_splice(&tmp, &irq_list);
313 mutex_unlock(&kvm->lock);
314
315 r = 0;
316
317out:
318 kfree(e);
319 __kvm_free_irq_routing(&irq_list);
320 return r;
321}
322
323#define IOAPIC_ROUTING_ENTRY(irq) \
324 { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
325 .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) }
326#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
327
328#ifdef CONFIG_X86
329# define PIC_ROUTING_ENTRY(irq) \
330 { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
331 .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 }
332# define ROUTING_ENTRY2(irq) \
333 IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
334#else
335# define ROUTING_ENTRY2(irq) \
336 IOAPIC_ROUTING_ENTRY(irq)
337#endif
338
339static const struct kvm_irq_routing_entry default_routing[] = {
340 ROUTING_ENTRY2(0), ROUTING_ENTRY2(1),
341 ROUTING_ENTRY2(2), ROUTING_ENTRY2(3),
342 ROUTING_ENTRY2(4), ROUTING_ENTRY2(5),
343 ROUTING_ENTRY2(6), ROUTING_ENTRY2(7),
344 ROUTING_ENTRY2(8), ROUTING_ENTRY2(9),
345 ROUTING_ENTRY2(10), ROUTING_ENTRY2(11),
346 ROUTING_ENTRY2(12), ROUTING_ENTRY2(13),
347 ROUTING_ENTRY2(14), ROUTING_ENTRY2(15),
348 ROUTING_ENTRY1(16), ROUTING_ENTRY1(17),
349 ROUTING_ENTRY1(18), ROUTING_ENTRY1(19),
350 ROUTING_ENTRY1(20), ROUTING_ENTRY1(21),
351 ROUTING_ENTRY1(22), ROUTING_ENTRY1(23),
352#ifdef CONFIG_IA64
353 ROUTING_ENTRY1(24), ROUTING_ENTRY1(25),
354 ROUTING_ENTRY1(26), ROUTING_ENTRY1(27),
355 ROUTING_ENTRY1(28), ROUTING_ENTRY1(29),
356 ROUTING_ENTRY1(30), ROUTING_ENTRY1(31),
357 ROUTING_ENTRY1(32), ROUTING_ENTRY1(33),
358 ROUTING_ENTRY1(34), ROUTING_ENTRY1(35),
359 ROUTING_ENTRY1(36), ROUTING_ENTRY1(37),
360 ROUTING_ENTRY1(38), ROUTING_ENTRY1(39),
361 ROUTING_ENTRY1(40), ROUTING_ENTRY1(41),
362 ROUTING_ENTRY1(42), ROUTING_ENTRY1(43),
363 ROUTING_ENTRY1(44), ROUTING_ENTRY1(45),
364 ROUTING_ENTRY1(46), ROUTING_ENTRY1(47),
365#endif
366};
367
368int kvm_setup_default_irq_routing(struct kvm *kvm)
369{
370 return kvm_set_irq_routing(kvm, default_routing,
371 ARRAY_SIZE(default_routing), 0);
372}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 29a667ce35b0..605697e9c4dd 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -47,10 +47,6 @@
47#include <asm/uaccess.h> 47#include <asm/uaccess.h>
48#include <asm/pgtable.h> 48#include <asm/pgtable.h>
49 49
50#ifdef CONFIG_X86
51#include <asm/msidef.h>
52#endif
53
54#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 50#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
55#include "coalesced_mmio.h" 51#include "coalesced_mmio.h"
56#endif 52#endif
@@ -85,57 +81,6 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
85static bool kvm_rebooting; 81static bool kvm_rebooting;
86 82
87#ifdef KVM_CAP_DEVICE_ASSIGNMENT 83#ifdef KVM_CAP_DEVICE_ASSIGNMENT
88
89#ifdef CONFIG_X86
90static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev)
91{
92 int vcpu_id;
93 struct kvm_vcpu *vcpu;
94 struct kvm_ioapic *ioapic = ioapic_irqchip(dev->kvm);
95 int dest_id = (dev->guest_msi.address_lo & MSI_ADDR_DEST_ID_MASK)
96 >> MSI_ADDR_DEST_ID_SHIFT;
97 int vector = (dev->guest_msi.data & MSI_DATA_VECTOR_MASK)
98 >> MSI_DATA_VECTOR_SHIFT;
99 int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
100 (unsigned long *)&dev->guest_msi.address_lo);
101 int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
102 (unsigned long *)&dev->guest_msi.data);
103 int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
104 (unsigned long *)&dev->guest_msi.data);
105 u32 deliver_bitmask;
106
107 BUG_ON(!ioapic);
108
109 deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
110 dest_id, dest_mode);
111 /* IOAPIC delivery mode value is the same as MSI here */
112 switch (delivery_mode) {
113 case IOAPIC_LOWEST_PRIORITY:
114 vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
115 deliver_bitmask);
116 if (vcpu != NULL)
117 kvm_apic_set_irq(vcpu, vector, trig_mode);
118 else
119 printk(KERN_INFO "kvm: null lowest priority vcpu!\n");
120 break;
121 case IOAPIC_FIXED:
122 for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
123 if (!(deliver_bitmask & (1 << vcpu_id)))
124 continue;
125 deliver_bitmask &= ~(1 << vcpu_id);
126 vcpu = ioapic->kvm->vcpus[vcpu_id];
127 if (vcpu)
128 kvm_apic_set_irq(vcpu, vector, trig_mode);
129 }
130 break;
131 default:
132 printk(KERN_INFO "kvm: unsupported MSI delivery mode\n");
133 }
134}
135#else
136static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) {}
137#endif
138
139static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, 84static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
140 int assigned_dev_id) 85 int assigned_dev_id)
141{ 86{
@@ -162,13 +107,10 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
162 * finer-grained lock, update this 107 * finer-grained lock, update this
163 */ 108 */
164 mutex_lock(&assigned_dev->kvm->lock); 109 mutex_lock(&assigned_dev->kvm->lock);
165 if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX) 110 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
166 kvm_set_irq(assigned_dev->kvm, 111 assigned_dev->guest_irq, 1);
167 assigned_dev->irq_source_id, 112
168 assigned_dev->guest_irq, 1); 113 if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) {
169 else if (assigned_dev->irq_requested_type &
170 KVM_ASSIGNED_DEV_GUEST_MSI) {
171 assigned_device_msi_dispatch(assigned_dev);
172 enable_irq(assigned_dev->host_irq); 114 enable_irq(assigned_dev->host_irq);
173 assigned_dev->host_irq_disabled = false; 115 assigned_dev->host_irq_disabled = false;
174 } 116 }
@@ -331,18 +273,24 @@ static int assigned_device_update_msi(struct kvm *kvm,
331{ 273{
332 int r; 274 int r;
333 275
276 adev->guest_irq = airq->guest_irq;
334 if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) { 277 if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) {
335 /* x86 don't care upper address of guest msi message addr */ 278 /* x86 don't care upper address of guest msi message addr */
336 adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI; 279 adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI;
337 adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX; 280 adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX;
338 adev->guest_msi.address_lo = airq->guest_msi.addr_lo;
339 adev->guest_msi.data = airq->guest_msi.data;
340 adev->ack_notifier.gsi = -1; 281 adev->ack_notifier.gsi = -1;
341 } else if (msi2intx) { 282 } else if (msi2intx) {
342 adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX; 283 adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX;
343 adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI; 284 adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI;
344 adev->guest_irq = airq->guest_irq;
345 adev->ack_notifier.gsi = airq->guest_irq; 285 adev->ack_notifier.gsi = airq->guest_irq;
286 } else {
287 /*
288 * Guest require to disable device MSI, we disable MSI and
289 * re-enable INTx by default again. Notice it's only for
290 * non-msi2intx.
291 */
292 assigned_device_update_intx(kvm, adev, airq);
293 return 0;
346 } 294 }
347 295
348 if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) 296 if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
@@ -379,6 +327,7 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
379{ 327{
380 int r = 0; 328 int r = 0;
381 struct kvm_assigned_dev_kernel *match; 329 struct kvm_assigned_dev_kernel *match;
330 u32 current_flags = 0, changed_flags;
382 331
383 mutex_lock(&kvm->lock); 332 mutex_lock(&kvm->lock);
384 333
@@ -416,8 +365,13 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
416 } 365 }
417 } 366 }
418 367
419 if ((!msi2intx && 368 if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) &&
420 (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI)) || 369 (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI))
370 current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
371
372 changed_flags = assigned_irq->flags ^ current_flags;
373
374 if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) ||
421 (msi2intx && match->dev->msi_enabled)) { 375 (msi2intx && match->dev->msi_enabled)) {
422#ifdef CONFIG_X86 376#ifdef CONFIG_X86
423 r = assigned_device_update_msi(kvm, match, assigned_irq); 377 r = assigned_device_update_msi(kvm, match, assigned_irq);
@@ -563,7 +517,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
563 goto out; 517 goto out;
564 } 518 }
565 519
566 if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) 520 if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
567 kvm_deassign_device(kvm, match); 521 kvm_deassign_device(kvm, match);
568 522
569 kvm_free_assigned_device(kvm, match); 523 kvm_free_assigned_device(kvm, match);
@@ -581,8 +535,10 @@ static inline int valid_vcpu(int n)
581 535
582inline int kvm_is_mmio_pfn(pfn_t pfn) 536inline int kvm_is_mmio_pfn(pfn_t pfn)
583{ 537{
584 if (pfn_valid(pfn)) 538 if (pfn_valid(pfn)) {
585 return PageReserved(pfn_to_page(pfn)); 539 struct page *page = compound_head(pfn_to_page(pfn));
540 return PageReserved(page);
541 }
586 542
587 return true; 543 return true;
588} 544}
@@ -828,6 +784,10 @@ static struct kvm *kvm_create_vm(void)
828 784
829 if (IS_ERR(kvm)) 785 if (IS_ERR(kvm))
830 goto out; 786 goto out;
787#ifdef CONFIG_HAVE_KVM_IRQCHIP
788 INIT_LIST_HEAD(&kvm->irq_routing);
789 INIT_HLIST_HEAD(&kvm->mask_notifier_list);
790#endif
831 791
832#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 792#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
833 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 793 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
@@ -909,6 +869,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
909 spin_lock(&kvm_lock); 869 spin_lock(&kvm_lock);
910 list_del(&kvm->vm_list); 870 list_del(&kvm->vm_list);
911 spin_unlock(&kvm_lock); 871 spin_unlock(&kvm_lock);
872 kvm_free_irq_routing(kvm);
912 kvm_io_bus_destroy(&kvm->pio_bus); 873 kvm_io_bus_destroy(&kvm->pio_bus);
913 kvm_io_bus_destroy(&kvm->mmio_bus); 874 kvm_io_bus_destroy(&kvm->mmio_bus);
914#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 875#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
@@ -1755,13 +1716,13 @@ out_free2:
1755 r = 0; 1716 r = 0;
1756 break; 1717 break;
1757 } 1718 }
1758 case KVM_DEBUG_GUEST: { 1719 case KVM_SET_GUEST_DEBUG: {
1759 struct kvm_debug_guest dbg; 1720 struct kvm_guest_debug dbg;
1760 1721
1761 r = -EFAULT; 1722 r = -EFAULT;
1762 if (copy_from_user(&dbg, argp, sizeof dbg)) 1723 if (copy_from_user(&dbg, argp, sizeof dbg))
1763 goto out; 1724 goto out;
1764 r = kvm_arch_vcpu_ioctl_debug_guest(vcpu, &dbg); 1725 r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg);
1765 if (r) 1726 if (r)
1766 goto out; 1727 goto out;
1767 r = 0; 1728 r = 0;
@@ -1929,6 +1890,36 @@ static long kvm_vm_ioctl(struct file *filp,
1929 break; 1890 break;
1930 } 1891 }
1931#endif 1892#endif
1893#ifdef KVM_CAP_IRQ_ROUTING
1894 case KVM_SET_GSI_ROUTING: {
1895 struct kvm_irq_routing routing;
1896 struct kvm_irq_routing __user *urouting;
1897 struct kvm_irq_routing_entry *entries;
1898
1899 r = -EFAULT;
1900 if (copy_from_user(&routing, argp, sizeof(routing)))
1901 goto out;
1902 r = -EINVAL;
1903 if (routing.nr >= KVM_MAX_IRQ_ROUTES)
1904 goto out;
1905 if (routing.flags)
1906 goto out;
1907 r = -ENOMEM;
1908 entries = vmalloc(routing.nr * sizeof(*entries));
1909 if (!entries)
1910 goto out;
1911 r = -EFAULT;
1912 urouting = argp;
1913 if (copy_from_user(entries, urouting->entries,
1914 routing.nr * sizeof(*entries)))
1915 goto out_free_irq_routing;
1916 r = kvm_set_irq_routing(kvm, entries, routing.nr,
1917 routing.flags);
1918 out_free_irq_routing:
1919 vfree(entries);
1920 break;
1921 }
1922#endif
1932 default: 1923 default:
1933 r = kvm_arch_vm_ioctl(filp, ioctl, arg); 1924 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
1934 } 1925 }
@@ -1995,6 +1986,10 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
1995 case KVM_CAP_USER_MEMORY: 1986 case KVM_CAP_USER_MEMORY:
1996 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: 1987 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
1997 return 1; 1988 return 1;
1989#ifdef CONFIG_HAVE_KVM_IRQCHIP
1990 case KVM_CAP_IRQ_ROUTING:
1991 return KVM_MAX_IRQ_ROUTES;
1992#endif
1998 default: 1993 default:
1999 break; 1994 break;
2000 } 1995 }