diff options
Diffstat (limited to 'drivers/kvm/kvm_main.c')
-rw-r--r-- | drivers/kvm/kvm_main.c | 1486 |
1 files changed, 950 insertions, 536 deletions
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index cd0557954e50..353e58527d15 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include "kvm.h" | 18 | #include "kvm.h" |
19 | #include "x86_emulate.h" | 19 | #include "x86_emulate.h" |
20 | #include "segment_descriptor.h" | 20 | #include "segment_descriptor.h" |
21 | #include "irq.h" | ||
21 | 22 | ||
22 | #include <linux/kvm.h> | 23 | #include <linux/kvm.h> |
23 | #include <linux/module.h> | 24 | #include <linux/module.h> |
@@ -37,6 +38,7 @@ | |||
37 | #include <linux/cpumask.h> | 38 | #include <linux/cpumask.h> |
38 | #include <linux/smp.h> | 39 | #include <linux/smp.h> |
39 | #include <linux/anon_inodes.h> | 40 | #include <linux/anon_inodes.h> |
41 | #include <linux/profile.h> | ||
40 | 42 | ||
41 | #include <asm/processor.h> | 43 | #include <asm/processor.h> |
42 | #include <asm/msr.h> | 44 | #include <asm/msr.h> |
@@ -52,9 +54,11 @@ static LIST_HEAD(vm_list); | |||
52 | 54 | ||
53 | static cpumask_t cpus_hardware_enabled; | 55 | static cpumask_t cpus_hardware_enabled; |
54 | 56 | ||
55 | struct kvm_arch_ops *kvm_arch_ops; | 57 | struct kvm_x86_ops *kvm_x86_ops; |
58 | struct kmem_cache *kvm_vcpu_cache; | ||
59 | EXPORT_SYMBOL_GPL(kvm_vcpu_cache); | ||
56 | 60 | ||
57 | static void hardware_disable(void *ignored); | 61 | static __read_mostly struct preempt_ops kvm_preempt_ops; |
58 | 62 | ||
59 | #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) | 63 | #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) |
60 | 64 | ||
@@ -73,6 +77,7 @@ static struct kvm_stats_debugfs_item { | |||
73 | { "signal_exits", STAT_OFFSET(signal_exits) }, | 77 | { "signal_exits", STAT_OFFSET(signal_exits) }, |
74 | { "irq_window", STAT_OFFSET(irq_window_exits) }, | 78 | { "irq_window", STAT_OFFSET(irq_window_exits) }, |
75 | { "halt_exits", STAT_OFFSET(halt_exits) }, | 79 | { "halt_exits", STAT_OFFSET(halt_exits) }, |
80 | { "halt_wakeup", STAT_OFFSET(halt_wakeup) }, | ||
76 | { "request_irq", STAT_OFFSET(request_irq_exits) }, | 81 | { "request_irq", STAT_OFFSET(request_irq_exits) }, |
77 | { "irq_exits", STAT_OFFSET(irq_exits) }, | 82 | { "irq_exits", STAT_OFFSET(irq_exits) }, |
78 | { "light_exits", STAT_OFFSET(light_exits) }, | 83 | { "light_exits", STAT_OFFSET(light_exits) }, |
@@ -84,10 +89,17 @@ static struct dentry *debugfs_dir; | |||
84 | 89 | ||
85 | #define MAX_IO_MSRS 256 | 90 | #define MAX_IO_MSRS 256 |
86 | 91 | ||
87 | #define CR0_RESEVED_BITS 0xffffffff1ffaffc0ULL | 92 | #define CR0_RESERVED_BITS \ |
88 | #define LMSW_GUEST_MASK 0x0eULL | 93 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ |
89 | #define CR4_RESEVED_BITS (~((1ULL << 11) - 1)) | 94 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ |
90 | #define CR8_RESEVED_BITS (~0x0fULL) | 95 | | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) |
96 | #define CR4_RESERVED_BITS \ | ||
97 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | ||
98 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | ||
99 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ | ||
100 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) | ||
101 | |||
102 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) | ||
91 | #define EFER_RESERVED_BITS 0xfffffffffffff2fe | 103 | #define EFER_RESERVED_BITS 0xfffffffffffff2fe |
92 | 104 | ||
93 | #ifdef CONFIG_X86_64 | 105 | #ifdef CONFIG_X86_64 |
@@ -139,82 +151,14 @@ static inline int valid_vcpu(int n) | |||
139 | return likely(n >= 0 && n < KVM_MAX_VCPUS); | 151 | return likely(n >= 0 && n < KVM_MAX_VCPUS); |
140 | } | 152 | } |
141 | 153 | ||
142 | int kvm_read_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size, | ||
143 | void *dest) | ||
144 | { | ||
145 | unsigned char *host_buf = dest; | ||
146 | unsigned long req_size = size; | ||
147 | |||
148 | while (size) { | ||
149 | hpa_t paddr; | ||
150 | unsigned now; | ||
151 | unsigned offset; | ||
152 | hva_t guest_buf; | ||
153 | |||
154 | paddr = gva_to_hpa(vcpu, addr); | ||
155 | |||
156 | if (is_error_hpa(paddr)) | ||
157 | break; | ||
158 | |||
159 | guest_buf = (hva_t)kmap_atomic( | ||
160 | pfn_to_page(paddr >> PAGE_SHIFT), | ||
161 | KM_USER0); | ||
162 | offset = addr & ~PAGE_MASK; | ||
163 | guest_buf |= offset; | ||
164 | now = min(size, PAGE_SIZE - offset); | ||
165 | memcpy(host_buf, (void*)guest_buf, now); | ||
166 | host_buf += now; | ||
167 | addr += now; | ||
168 | size -= now; | ||
169 | kunmap_atomic((void *)(guest_buf & PAGE_MASK), KM_USER0); | ||
170 | } | ||
171 | return req_size - size; | ||
172 | } | ||
173 | EXPORT_SYMBOL_GPL(kvm_read_guest); | ||
174 | |||
175 | int kvm_write_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size, | ||
176 | void *data) | ||
177 | { | ||
178 | unsigned char *host_buf = data; | ||
179 | unsigned long req_size = size; | ||
180 | |||
181 | while (size) { | ||
182 | hpa_t paddr; | ||
183 | unsigned now; | ||
184 | unsigned offset; | ||
185 | hva_t guest_buf; | ||
186 | gfn_t gfn; | ||
187 | |||
188 | paddr = gva_to_hpa(vcpu, addr); | ||
189 | |||
190 | if (is_error_hpa(paddr)) | ||
191 | break; | ||
192 | |||
193 | gfn = vcpu->mmu.gva_to_gpa(vcpu, addr) >> PAGE_SHIFT; | ||
194 | mark_page_dirty(vcpu->kvm, gfn); | ||
195 | guest_buf = (hva_t)kmap_atomic( | ||
196 | pfn_to_page(paddr >> PAGE_SHIFT), KM_USER0); | ||
197 | offset = addr & ~PAGE_MASK; | ||
198 | guest_buf |= offset; | ||
199 | now = min(size, PAGE_SIZE - offset); | ||
200 | memcpy((void*)guest_buf, host_buf, now); | ||
201 | host_buf += now; | ||
202 | addr += now; | ||
203 | size -= now; | ||
204 | kunmap_atomic((void *)(guest_buf & PAGE_MASK), KM_USER0); | ||
205 | } | ||
206 | return req_size - size; | ||
207 | } | ||
208 | EXPORT_SYMBOL_GPL(kvm_write_guest); | ||
209 | |||
210 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) | 154 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) |
211 | { | 155 | { |
212 | if (!vcpu->fpu_active || vcpu->guest_fpu_loaded) | 156 | if (!vcpu->fpu_active || vcpu->guest_fpu_loaded) |
213 | return; | 157 | return; |
214 | 158 | ||
215 | vcpu->guest_fpu_loaded = 1; | 159 | vcpu->guest_fpu_loaded = 1; |
216 | fx_save(vcpu->host_fx_image); | 160 | fx_save(&vcpu->host_fx_image); |
217 | fx_restore(vcpu->guest_fx_image); | 161 | fx_restore(&vcpu->guest_fx_image); |
218 | } | 162 | } |
219 | EXPORT_SYMBOL_GPL(kvm_load_guest_fpu); | 163 | EXPORT_SYMBOL_GPL(kvm_load_guest_fpu); |
220 | 164 | ||
@@ -224,8 +168,8 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | |||
224 | return; | 168 | return; |
225 | 169 | ||
226 | vcpu->guest_fpu_loaded = 0; | 170 | vcpu->guest_fpu_loaded = 0; |
227 | fx_save(vcpu->guest_fx_image); | 171 | fx_save(&vcpu->guest_fx_image); |
228 | fx_restore(vcpu->host_fx_image); | 172 | fx_restore(&vcpu->host_fx_image); |
229 | } | 173 | } |
230 | EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); | 174 | EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); |
231 | 175 | ||
@@ -234,13 +178,21 @@ EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); | |||
234 | */ | 178 | */ |
235 | static void vcpu_load(struct kvm_vcpu *vcpu) | 179 | static void vcpu_load(struct kvm_vcpu *vcpu) |
236 | { | 180 | { |
181 | int cpu; | ||
182 | |||
237 | mutex_lock(&vcpu->mutex); | 183 | mutex_lock(&vcpu->mutex); |
238 | kvm_arch_ops->vcpu_load(vcpu); | 184 | cpu = get_cpu(); |
185 | preempt_notifier_register(&vcpu->preempt_notifier); | ||
186 | kvm_x86_ops->vcpu_load(vcpu, cpu); | ||
187 | put_cpu(); | ||
239 | } | 188 | } |
240 | 189 | ||
241 | static void vcpu_put(struct kvm_vcpu *vcpu) | 190 | static void vcpu_put(struct kvm_vcpu *vcpu) |
242 | { | 191 | { |
243 | kvm_arch_ops->vcpu_put(vcpu); | 192 | preempt_disable(); |
193 | kvm_x86_ops->vcpu_put(vcpu); | ||
194 | preempt_notifier_unregister(&vcpu->preempt_notifier); | ||
195 | preempt_enable(); | ||
244 | mutex_unlock(&vcpu->mutex); | 196 | mutex_unlock(&vcpu->mutex); |
245 | } | 197 | } |
246 | 198 | ||
@@ -261,8 +213,10 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) | |||
261 | atomic_set(&completed, 0); | 213 | atomic_set(&completed, 0); |
262 | cpus_clear(cpus); | 214 | cpus_clear(cpus); |
263 | needed = 0; | 215 | needed = 0; |
264 | for (i = 0; i < kvm->nvcpus; ++i) { | 216 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { |
265 | vcpu = &kvm->vcpus[i]; | 217 | vcpu = kvm->vcpus[i]; |
218 | if (!vcpu) | ||
219 | continue; | ||
266 | if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests)) | 220 | if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests)) |
267 | continue; | 221 | continue; |
268 | cpu = vcpu->cpu; | 222 | cpu = vcpu->cpu; |
@@ -286,37 +240,79 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) | |||
286 | } | 240 | } |
287 | } | 241 | } |
288 | 242 | ||
243 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | ||
244 | { | ||
245 | struct page *page; | ||
246 | int r; | ||
247 | |||
248 | mutex_init(&vcpu->mutex); | ||
249 | vcpu->cpu = -1; | ||
250 | vcpu->mmu.root_hpa = INVALID_PAGE; | ||
251 | vcpu->kvm = kvm; | ||
252 | vcpu->vcpu_id = id; | ||
253 | if (!irqchip_in_kernel(kvm) || id == 0) | ||
254 | vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; | ||
255 | else | ||
256 | vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED; | ||
257 | init_waitqueue_head(&vcpu->wq); | ||
258 | |||
259 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
260 | if (!page) { | ||
261 | r = -ENOMEM; | ||
262 | goto fail; | ||
263 | } | ||
264 | vcpu->run = page_address(page); | ||
265 | |||
266 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
267 | if (!page) { | ||
268 | r = -ENOMEM; | ||
269 | goto fail_free_run; | ||
270 | } | ||
271 | vcpu->pio_data = page_address(page); | ||
272 | |||
273 | r = kvm_mmu_create(vcpu); | ||
274 | if (r < 0) | ||
275 | goto fail_free_pio_data; | ||
276 | |||
277 | return 0; | ||
278 | |||
279 | fail_free_pio_data: | ||
280 | free_page((unsigned long)vcpu->pio_data); | ||
281 | fail_free_run: | ||
282 | free_page((unsigned long)vcpu->run); | ||
283 | fail: | ||
284 | return -ENOMEM; | ||
285 | } | ||
286 | EXPORT_SYMBOL_GPL(kvm_vcpu_init); | ||
287 | |||
288 | void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) | ||
289 | { | ||
290 | kvm_mmu_destroy(vcpu); | ||
291 | if (vcpu->apic) | ||
292 | hrtimer_cancel(&vcpu->apic->timer.dev); | ||
293 | kvm_free_apic(vcpu->apic); | ||
294 | free_page((unsigned long)vcpu->pio_data); | ||
295 | free_page((unsigned long)vcpu->run); | ||
296 | } | ||
297 | EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); | ||
298 | |||
289 | static struct kvm *kvm_create_vm(void) | 299 | static struct kvm *kvm_create_vm(void) |
290 | { | 300 | { |
291 | struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); | 301 | struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); |
292 | int i; | ||
293 | 302 | ||
294 | if (!kvm) | 303 | if (!kvm) |
295 | return ERR_PTR(-ENOMEM); | 304 | return ERR_PTR(-ENOMEM); |
296 | 305 | ||
297 | kvm_io_bus_init(&kvm->pio_bus); | 306 | kvm_io_bus_init(&kvm->pio_bus); |
298 | spin_lock_init(&kvm->lock); | 307 | mutex_init(&kvm->lock); |
299 | INIT_LIST_HEAD(&kvm->active_mmu_pages); | 308 | INIT_LIST_HEAD(&kvm->active_mmu_pages); |
300 | kvm_io_bus_init(&kvm->mmio_bus); | 309 | kvm_io_bus_init(&kvm->mmio_bus); |
301 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | ||
302 | struct kvm_vcpu *vcpu = &kvm->vcpus[i]; | ||
303 | |||
304 | mutex_init(&vcpu->mutex); | ||
305 | vcpu->cpu = -1; | ||
306 | vcpu->kvm = kvm; | ||
307 | vcpu->mmu.root_hpa = INVALID_PAGE; | ||
308 | } | ||
309 | spin_lock(&kvm_lock); | 310 | spin_lock(&kvm_lock); |
310 | list_add(&kvm->vm_list, &vm_list); | 311 | list_add(&kvm->vm_list, &vm_list); |
311 | spin_unlock(&kvm_lock); | 312 | spin_unlock(&kvm_lock); |
312 | return kvm; | 313 | return kvm; |
313 | } | 314 | } |
314 | 315 | ||
315 | static int kvm_dev_open(struct inode *inode, struct file *filp) | ||
316 | { | ||
317 | return 0; | ||
318 | } | ||
319 | |||
320 | /* | 316 | /* |
321 | * Free any memory in @free but not in @dont. | 317 | * Free any memory in @free but not in @dont. |
322 | */ | 318 | */ |
@@ -353,7 +349,7 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu) | |||
353 | { | 349 | { |
354 | int i; | 350 | int i; |
355 | 351 | ||
356 | for (i = 0; i < 2; ++i) | 352 | for (i = 0; i < ARRAY_SIZE(vcpu->pio.guest_pages); ++i) |
357 | if (vcpu->pio.guest_pages[i]) { | 353 | if (vcpu->pio.guest_pages[i]) { |
358 | __free_page(vcpu->pio.guest_pages[i]); | 354 | __free_page(vcpu->pio.guest_pages[i]); |
359 | vcpu->pio.guest_pages[i] = NULL; | 355 | vcpu->pio.guest_pages[i] = NULL; |
@@ -362,30 +358,11 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu) | |||
362 | 358 | ||
363 | static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) | 359 | static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) |
364 | { | 360 | { |
365 | if (!vcpu->vmcs) | ||
366 | return; | ||
367 | |||
368 | vcpu_load(vcpu); | 361 | vcpu_load(vcpu); |
369 | kvm_mmu_unload(vcpu); | 362 | kvm_mmu_unload(vcpu); |
370 | vcpu_put(vcpu); | 363 | vcpu_put(vcpu); |
371 | } | 364 | } |
372 | 365 | ||
373 | static void kvm_free_vcpu(struct kvm_vcpu *vcpu) | ||
374 | { | ||
375 | if (!vcpu->vmcs) | ||
376 | return; | ||
377 | |||
378 | vcpu_load(vcpu); | ||
379 | kvm_mmu_destroy(vcpu); | ||
380 | vcpu_put(vcpu); | ||
381 | kvm_arch_ops->vcpu_free(vcpu); | ||
382 | free_page((unsigned long)vcpu->run); | ||
383 | vcpu->run = NULL; | ||
384 | free_page((unsigned long)vcpu->pio_data); | ||
385 | vcpu->pio_data = NULL; | ||
386 | free_pio_guest_pages(vcpu); | ||
387 | } | ||
388 | |||
389 | static void kvm_free_vcpus(struct kvm *kvm) | 366 | static void kvm_free_vcpus(struct kvm *kvm) |
390 | { | 367 | { |
391 | unsigned int i; | 368 | unsigned int i; |
@@ -394,14 +371,15 @@ static void kvm_free_vcpus(struct kvm *kvm) | |||
394 | * Unpin any mmu pages first. | 371 | * Unpin any mmu pages first. |
395 | */ | 372 | */ |
396 | for (i = 0; i < KVM_MAX_VCPUS; ++i) | 373 | for (i = 0; i < KVM_MAX_VCPUS; ++i) |
397 | kvm_unload_vcpu_mmu(&kvm->vcpus[i]); | 374 | if (kvm->vcpus[i]) |
398 | for (i = 0; i < KVM_MAX_VCPUS; ++i) | 375 | kvm_unload_vcpu_mmu(kvm->vcpus[i]); |
399 | kvm_free_vcpu(&kvm->vcpus[i]); | 376 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { |
400 | } | 377 | if (kvm->vcpus[i]) { |
378 | kvm_x86_ops->vcpu_free(kvm->vcpus[i]); | ||
379 | kvm->vcpus[i] = NULL; | ||
380 | } | ||
381 | } | ||
401 | 382 | ||
402 | static int kvm_dev_release(struct inode *inode, struct file *filp) | ||
403 | { | ||
404 | return 0; | ||
405 | } | 383 | } |
406 | 384 | ||
407 | static void kvm_destroy_vm(struct kvm *kvm) | 385 | static void kvm_destroy_vm(struct kvm *kvm) |
@@ -411,6 +389,8 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
411 | spin_unlock(&kvm_lock); | 389 | spin_unlock(&kvm_lock); |
412 | kvm_io_bus_destroy(&kvm->pio_bus); | 390 | kvm_io_bus_destroy(&kvm->pio_bus); |
413 | kvm_io_bus_destroy(&kvm->mmio_bus); | 391 | kvm_io_bus_destroy(&kvm->mmio_bus); |
392 | kfree(kvm->vpic); | ||
393 | kfree(kvm->vioapic); | ||
414 | kvm_free_vcpus(kvm); | 394 | kvm_free_vcpus(kvm); |
415 | kvm_free_physmem(kvm); | 395 | kvm_free_physmem(kvm); |
416 | kfree(kvm); | 396 | kfree(kvm); |
@@ -426,7 +406,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) | |||
426 | 406 | ||
427 | static void inject_gp(struct kvm_vcpu *vcpu) | 407 | static void inject_gp(struct kvm_vcpu *vcpu) |
428 | { | 408 | { |
429 | kvm_arch_ops->inject_gp(vcpu, 0); | 409 | kvm_x86_ops->inject_gp(vcpu, 0); |
430 | } | 410 | } |
431 | 411 | ||
432 | /* | 412 | /* |
@@ -437,58 +417,60 @@ static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
437 | gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT; | 417 | gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT; |
438 | unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2; | 418 | unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2; |
439 | int i; | 419 | int i; |
440 | u64 pdpte; | ||
441 | u64 *pdpt; | 420 | u64 *pdpt; |
442 | int ret; | 421 | int ret; |
443 | struct page *page; | 422 | struct page *page; |
423 | u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)]; | ||
444 | 424 | ||
445 | spin_lock(&vcpu->kvm->lock); | 425 | mutex_lock(&vcpu->kvm->lock); |
446 | page = gfn_to_page(vcpu->kvm, pdpt_gfn); | 426 | page = gfn_to_page(vcpu->kvm, pdpt_gfn); |
447 | /* FIXME: !page - emulate? 0xff? */ | 427 | if (!page) { |
428 | ret = 0; | ||
429 | goto out; | ||
430 | } | ||
431 | |||
448 | pdpt = kmap_atomic(page, KM_USER0); | 432 | pdpt = kmap_atomic(page, KM_USER0); |
433 | memcpy(pdpte, pdpt+offset, sizeof(pdpte)); | ||
434 | kunmap_atomic(pdpt, KM_USER0); | ||
449 | 435 | ||
450 | ret = 1; | 436 | for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { |
451 | for (i = 0; i < 4; ++i) { | 437 | if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) { |
452 | pdpte = pdpt[offset + i]; | ||
453 | if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull)) { | ||
454 | ret = 0; | 438 | ret = 0; |
455 | goto out; | 439 | goto out; |
456 | } | 440 | } |
457 | } | 441 | } |
442 | ret = 1; | ||
458 | 443 | ||
459 | for (i = 0; i < 4; ++i) | 444 | memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs)); |
460 | vcpu->pdptrs[i] = pdpt[offset + i]; | ||
461 | |||
462 | out: | 445 | out: |
463 | kunmap_atomic(pdpt, KM_USER0); | 446 | mutex_unlock(&vcpu->kvm->lock); |
464 | spin_unlock(&vcpu->kvm->lock); | ||
465 | 447 | ||
466 | return ret; | 448 | return ret; |
467 | } | 449 | } |
468 | 450 | ||
469 | void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 451 | void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
470 | { | 452 | { |
471 | if (cr0 & CR0_RESEVED_BITS) { | 453 | if (cr0 & CR0_RESERVED_BITS) { |
472 | printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", | 454 | printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", |
473 | cr0, vcpu->cr0); | 455 | cr0, vcpu->cr0); |
474 | inject_gp(vcpu); | 456 | inject_gp(vcpu); |
475 | return; | 457 | return; |
476 | } | 458 | } |
477 | 459 | ||
478 | if ((cr0 & CR0_NW_MASK) && !(cr0 & CR0_CD_MASK)) { | 460 | if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { |
479 | printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); | 461 | printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); |
480 | inject_gp(vcpu); | 462 | inject_gp(vcpu); |
481 | return; | 463 | return; |
482 | } | 464 | } |
483 | 465 | ||
484 | if ((cr0 & CR0_PG_MASK) && !(cr0 & CR0_PE_MASK)) { | 466 | if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { |
485 | printk(KERN_DEBUG "set_cr0: #GP, set PG flag " | 467 | printk(KERN_DEBUG "set_cr0: #GP, set PG flag " |
486 | "and a clear PE flag\n"); | 468 | "and a clear PE flag\n"); |
487 | inject_gp(vcpu); | 469 | inject_gp(vcpu); |
488 | return; | 470 | return; |
489 | } | 471 | } |
490 | 472 | ||
491 | if (!is_paging(vcpu) && (cr0 & CR0_PG_MASK)) { | 473 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
492 | #ifdef CONFIG_X86_64 | 474 | #ifdef CONFIG_X86_64 |
493 | if ((vcpu->shadow_efer & EFER_LME)) { | 475 | if ((vcpu->shadow_efer & EFER_LME)) { |
494 | int cs_db, cs_l; | 476 | int cs_db, cs_l; |
@@ -499,7 +481,7 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
499 | inject_gp(vcpu); | 481 | inject_gp(vcpu); |
500 | return; | 482 | return; |
501 | } | 483 | } |
502 | kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 484 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
503 | if (cs_l) { | 485 | if (cs_l) { |
504 | printk(KERN_DEBUG "set_cr0: #GP, start paging " | 486 | printk(KERN_DEBUG "set_cr0: #GP, start paging " |
505 | "in long mode while CS.L == 1\n"); | 487 | "in long mode while CS.L == 1\n"); |
@@ -518,12 +500,12 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
518 | 500 | ||
519 | } | 501 | } |
520 | 502 | ||
521 | kvm_arch_ops->set_cr0(vcpu, cr0); | 503 | kvm_x86_ops->set_cr0(vcpu, cr0); |
522 | vcpu->cr0 = cr0; | 504 | vcpu->cr0 = cr0; |
523 | 505 | ||
524 | spin_lock(&vcpu->kvm->lock); | 506 | mutex_lock(&vcpu->kvm->lock); |
525 | kvm_mmu_reset_context(vcpu); | 507 | kvm_mmu_reset_context(vcpu); |
526 | spin_unlock(&vcpu->kvm->lock); | 508 | mutex_unlock(&vcpu->kvm->lock); |
527 | return; | 509 | return; |
528 | } | 510 | } |
529 | EXPORT_SYMBOL_GPL(set_cr0); | 511 | EXPORT_SYMBOL_GPL(set_cr0); |
@@ -536,62 +518,72 @@ EXPORT_SYMBOL_GPL(lmsw); | |||
536 | 518 | ||
537 | void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 519 | void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
538 | { | 520 | { |
539 | if (cr4 & CR4_RESEVED_BITS) { | 521 | if (cr4 & CR4_RESERVED_BITS) { |
540 | printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); | 522 | printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); |
541 | inject_gp(vcpu); | 523 | inject_gp(vcpu); |
542 | return; | 524 | return; |
543 | } | 525 | } |
544 | 526 | ||
545 | if (is_long_mode(vcpu)) { | 527 | if (is_long_mode(vcpu)) { |
546 | if (!(cr4 & CR4_PAE_MASK)) { | 528 | if (!(cr4 & X86_CR4_PAE)) { |
547 | printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while " | 529 | printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while " |
548 | "in long mode\n"); | 530 | "in long mode\n"); |
549 | inject_gp(vcpu); | 531 | inject_gp(vcpu); |
550 | return; | 532 | return; |
551 | } | 533 | } |
552 | } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & CR4_PAE_MASK) | 534 | } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE) |
553 | && !load_pdptrs(vcpu, vcpu->cr3)) { | 535 | && !load_pdptrs(vcpu, vcpu->cr3)) { |
554 | printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); | 536 | printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); |
555 | inject_gp(vcpu); | 537 | inject_gp(vcpu); |
538 | return; | ||
556 | } | 539 | } |
557 | 540 | ||
558 | if (cr4 & CR4_VMXE_MASK) { | 541 | if (cr4 & X86_CR4_VMXE) { |
559 | printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n"); | 542 | printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n"); |
560 | inject_gp(vcpu); | 543 | inject_gp(vcpu); |
561 | return; | 544 | return; |
562 | } | 545 | } |
563 | kvm_arch_ops->set_cr4(vcpu, cr4); | 546 | kvm_x86_ops->set_cr4(vcpu, cr4); |
564 | spin_lock(&vcpu->kvm->lock); | 547 | vcpu->cr4 = cr4; |
548 | mutex_lock(&vcpu->kvm->lock); | ||
565 | kvm_mmu_reset_context(vcpu); | 549 | kvm_mmu_reset_context(vcpu); |
566 | spin_unlock(&vcpu->kvm->lock); | 550 | mutex_unlock(&vcpu->kvm->lock); |
567 | } | 551 | } |
568 | EXPORT_SYMBOL_GPL(set_cr4); | 552 | EXPORT_SYMBOL_GPL(set_cr4); |
569 | 553 | ||
570 | void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | 554 | void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) |
571 | { | 555 | { |
572 | if (is_long_mode(vcpu)) { | 556 | if (is_long_mode(vcpu)) { |
573 | if (cr3 & CR3_L_MODE_RESEVED_BITS) { | 557 | if (cr3 & CR3_L_MODE_RESERVED_BITS) { |
574 | printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); | 558 | printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); |
575 | inject_gp(vcpu); | 559 | inject_gp(vcpu); |
576 | return; | 560 | return; |
577 | } | 561 | } |
578 | } else { | 562 | } else { |
579 | if (cr3 & CR3_RESEVED_BITS) { | 563 | if (is_pae(vcpu)) { |
580 | printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); | 564 | if (cr3 & CR3_PAE_RESERVED_BITS) { |
581 | inject_gp(vcpu); | 565 | printk(KERN_DEBUG |
582 | return; | 566 | "set_cr3: #GP, reserved bits\n"); |
583 | } | 567 | inject_gp(vcpu); |
584 | if (is_paging(vcpu) && is_pae(vcpu) && | 568 | return; |
585 | !load_pdptrs(vcpu, cr3)) { | 569 | } |
586 | printk(KERN_DEBUG "set_cr3: #GP, pdptrs " | 570 | if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { |
587 | "reserved bits\n"); | 571 | printk(KERN_DEBUG "set_cr3: #GP, pdptrs " |
588 | inject_gp(vcpu); | 572 | "reserved bits\n"); |
589 | return; | 573 | inject_gp(vcpu); |
574 | return; | ||
575 | } | ||
576 | } else { | ||
577 | if (cr3 & CR3_NONPAE_RESERVED_BITS) { | ||
578 | printk(KERN_DEBUG | ||
579 | "set_cr3: #GP, reserved bits\n"); | ||
580 | inject_gp(vcpu); | ||
581 | return; | ||
582 | } | ||
590 | } | 583 | } |
591 | } | 584 | } |
592 | 585 | ||
593 | vcpu->cr3 = cr3; | 586 | mutex_lock(&vcpu->kvm->lock); |
594 | spin_lock(&vcpu->kvm->lock); | ||
595 | /* | 587 | /* |
596 | * Does the new cr3 value map to physical memory? (Note, we | 588 | * Does the new cr3 value map to physical memory? (Note, we |
597 | * catch an invalid cr3 even in real-mode, because it would | 589 | * catch an invalid cr3 even in real-mode, because it would |
@@ -603,46 +595,73 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
603 | */ | 595 | */ |
604 | if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) | 596 | if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) |
605 | inject_gp(vcpu); | 597 | inject_gp(vcpu); |
606 | else | 598 | else { |
599 | vcpu->cr3 = cr3; | ||
607 | vcpu->mmu.new_cr3(vcpu); | 600 | vcpu->mmu.new_cr3(vcpu); |
608 | spin_unlock(&vcpu->kvm->lock); | 601 | } |
602 | mutex_unlock(&vcpu->kvm->lock); | ||
609 | } | 603 | } |
610 | EXPORT_SYMBOL_GPL(set_cr3); | 604 | EXPORT_SYMBOL_GPL(set_cr3); |
611 | 605 | ||
612 | void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | 606 | void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) |
613 | { | 607 | { |
614 | if ( cr8 & CR8_RESEVED_BITS) { | 608 | if (cr8 & CR8_RESERVED_BITS) { |
615 | printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); | 609 | printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); |
616 | inject_gp(vcpu); | 610 | inject_gp(vcpu); |
617 | return; | 611 | return; |
618 | } | 612 | } |
619 | vcpu->cr8 = cr8; | 613 | if (irqchip_in_kernel(vcpu->kvm)) |
614 | kvm_lapic_set_tpr(vcpu, cr8); | ||
615 | else | ||
616 | vcpu->cr8 = cr8; | ||
620 | } | 617 | } |
621 | EXPORT_SYMBOL_GPL(set_cr8); | 618 | EXPORT_SYMBOL_GPL(set_cr8); |
622 | 619 | ||
623 | void fx_init(struct kvm_vcpu *vcpu) | 620 | unsigned long get_cr8(struct kvm_vcpu *vcpu) |
621 | { | ||
622 | if (irqchip_in_kernel(vcpu->kvm)) | ||
623 | return kvm_lapic_get_cr8(vcpu); | ||
624 | else | ||
625 | return vcpu->cr8; | ||
626 | } | ||
627 | EXPORT_SYMBOL_GPL(get_cr8); | ||
628 | |||
629 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) | ||
624 | { | 630 | { |
625 | struct __attribute__ ((__packed__)) fx_image_s { | 631 | if (irqchip_in_kernel(vcpu->kvm)) |
626 | u16 control; //fcw | 632 | return vcpu->apic_base; |
627 | u16 status; //fsw | 633 | else |
628 | u16 tag; // ftw | 634 | return vcpu->apic_base; |
629 | u16 opcode; //fop | 635 | } |
630 | u64 ip; // fpu ip | 636 | EXPORT_SYMBOL_GPL(kvm_get_apic_base); |
631 | u64 operand;// fpu dp | ||
632 | u32 mxcsr; | ||
633 | u32 mxcsr_mask; | ||
634 | 637 | ||
635 | } *fx_image; | 638 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) |
639 | { | ||
640 | /* TODO: reserve bits check */ | ||
641 | if (irqchip_in_kernel(vcpu->kvm)) | ||
642 | kvm_lapic_set_base(vcpu, data); | ||
643 | else | ||
644 | vcpu->apic_base = data; | ||
645 | } | ||
646 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); | ||
647 | |||
648 | void fx_init(struct kvm_vcpu *vcpu) | ||
649 | { | ||
650 | unsigned after_mxcsr_mask; | ||
636 | 651 | ||
637 | fx_save(vcpu->host_fx_image); | 652 | /* Initialize guest FPU by resetting ours and saving into guest's */ |
653 | preempt_disable(); | ||
654 | fx_save(&vcpu->host_fx_image); | ||
638 | fpu_init(); | 655 | fpu_init(); |
639 | fx_save(vcpu->guest_fx_image); | 656 | fx_save(&vcpu->guest_fx_image); |
640 | fx_restore(vcpu->host_fx_image); | 657 | fx_restore(&vcpu->host_fx_image); |
658 | preempt_enable(); | ||
641 | 659 | ||
642 | fx_image = (struct fx_image_s *)vcpu->guest_fx_image; | 660 | vcpu->cr0 |= X86_CR0_ET; |
643 | fx_image->mxcsr = 0x1f80; | 661 | after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space); |
644 | memset(vcpu->guest_fx_image + sizeof(struct fx_image_s), | 662 | vcpu->guest_fx_image.mxcsr = 0x1f80; |
645 | 0, FX_IMAGE_SIZE - sizeof(struct fx_image_s)); | 663 | memset((void *)&vcpu->guest_fx_image + after_mxcsr_mask, |
664 | 0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask); | ||
646 | } | 665 | } |
647 | EXPORT_SYMBOL_GPL(fx_init); | 666 | EXPORT_SYMBOL_GPL(fx_init); |
648 | 667 | ||
@@ -661,7 +680,6 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, | |||
661 | unsigned long i; | 680 | unsigned long i; |
662 | struct kvm_memory_slot *memslot; | 681 | struct kvm_memory_slot *memslot; |
663 | struct kvm_memory_slot old, new; | 682 | struct kvm_memory_slot old, new; |
664 | int memory_config_version; | ||
665 | 683 | ||
666 | r = -EINVAL; | 684 | r = -EINVAL; |
667 | /* General sanity checks */ | 685 | /* General sanity checks */ |
@@ -681,10 +699,8 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, | |||
681 | if (!npages) | 699 | if (!npages) |
682 | mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; | 700 | mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; |
683 | 701 | ||
684 | raced: | 702 | mutex_lock(&kvm->lock); |
685 | spin_lock(&kvm->lock); | ||
686 | 703 | ||
687 | memory_config_version = kvm->memory_config_version; | ||
688 | new = old = *memslot; | 704 | new = old = *memslot; |
689 | 705 | ||
690 | new.base_gfn = base_gfn; | 706 | new.base_gfn = base_gfn; |
@@ -707,11 +723,6 @@ raced: | |||
707 | (base_gfn >= s->base_gfn + s->npages))) | 723 | (base_gfn >= s->base_gfn + s->npages))) |
708 | goto out_unlock; | 724 | goto out_unlock; |
709 | } | 725 | } |
710 | /* | ||
711 | * Do memory allocations outside lock. memory_config_version will | ||
712 | * detect any races. | ||
713 | */ | ||
714 | spin_unlock(&kvm->lock); | ||
715 | 726 | ||
716 | /* Deallocate if slot is being removed */ | 727 | /* Deallocate if slot is being removed */ |
717 | if (!npages) | 728 | if (!npages) |
@@ -728,14 +739,14 @@ raced: | |||
728 | new.phys_mem = vmalloc(npages * sizeof(struct page *)); | 739 | new.phys_mem = vmalloc(npages * sizeof(struct page *)); |
729 | 740 | ||
730 | if (!new.phys_mem) | 741 | if (!new.phys_mem) |
731 | goto out_free; | 742 | goto out_unlock; |
732 | 743 | ||
733 | memset(new.phys_mem, 0, npages * sizeof(struct page *)); | 744 | memset(new.phys_mem, 0, npages * sizeof(struct page *)); |
734 | for (i = 0; i < npages; ++i) { | 745 | for (i = 0; i < npages; ++i) { |
735 | new.phys_mem[i] = alloc_page(GFP_HIGHUSER | 746 | new.phys_mem[i] = alloc_page(GFP_HIGHUSER |
736 | | __GFP_ZERO); | 747 | | __GFP_ZERO); |
737 | if (!new.phys_mem[i]) | 748 | if (!new.phys_mem[i]) |
738 | goto out_free; | 749 | goto out_unlock; |
739 | set_page_private(new.phys_mem[i],0); | 750 | set_page_private(new.phys_mem[i],0); |
740 | } | 751 | } |
741 | } | 752 | } |
@@ -746,39 +757,25 @@ raced: | |||
746 | 757 | ||
747 | new.dirty_bitmap = vmalloc(dirty_bytes); | 758 | new.dirty_bitmap = vmalloc(dirty_bytes); |
748 | if (!new.dirty_bitmap) | 759 | if (!new.dirty_bitmap) |
749 | goto out_free; | 760 | goto out_unlock; |
750 | memset(new.dirty_bitmap, 0, dirty_bytes); | 761 | memset(new.dirty_bitmap, 0, dirty_bytes); |
751 | } | 762 | } |
752 | 763 | ||
753 | spin_lock(&kvm->lock); | ||
754 | |||
755 | if (memory_config_version != kvm->memory_config_version) { | ||
756 | spin_unlock(&kvm->lock); | ||
757 | kvm_free_physmem_slot(&new, &old); | ||
758 | goto raced; | ||
759 | } | ||
760 | |||
761 | r = -EAGAIN; | ||
762 | if (kvm->busy) | ||
763 | goto out_unlock; | ||
764 | |||
765 | if (mem->slot >= kvm->nmemslots) | 764 | if (mem->slot >= kvm->nmemslots) |
766 | kvm->nmemslots = mem->slot + 1; | 765 | kvm->nmemslots = mem->slot + 1; |
767 | 766 | ||
768 | *memslot = new; | 767 | *memslot = new; |
769 | ++kvm->memory_config_version; | ||
770 | 768 | ||
771 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 769 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); |
772 | kvm_flush_remote_tlbs(kvm); | 770 | kvm_flush_remote_tlbs(kvm); |
773 | 771 | ||
774 | spin_unlock(&kvm->lock); | 772 | mutex_unlock(&kvm->lock); |
775 | 773 | ||
776 | kvm_free_physmem_slot(&old, &new); | 774 | kvm_free_physmem_slot(&old, &new); |
777 | return 0; | 775 | return 0; |
778 | 776 | ||
779 | out_unlock: | 777 | out_unlock: |
780 | spin_unlock(&kvm->lock); | 778 | mutex_unlock(&kvm->lock); |
781 | out_free: | ||
782 | kvm_free_physmem_slot(&new, &old); | 779 | kvm_free_physmem_slot(&new, &old); |
783 | out: | 780 | out: |
784 | return r; | 781 | return r; |
@@ -795,14 +792,8 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
795 | int n; | 792 | int n; |
796 | unsigned long any = 0; | 793 | unsigned long any = 0; |
797 | 794 | ||
798 | spin_lock(&kvm->lock); | 795 | mutex_lock(&kvm->lock); |
799 | 796 | ||
800 | /* | ||
801 | * Prevent changes to guest memory configuration even while the lock | ||
802 | * is not taken. | ||
803 | */ | ||
804 | ++kvm->busy; | ||
805 | spin_unlock(&kvm->lock); | ||
806 | r = -EINVAL; | 797 | r = -EINVAL; |
807 | if (log->slot >= KVM_MEMORY_SLOTS) | 798 | if (log->slot >= KVM_MEMORY_SLOTS) |
808 | goto out; | 799 | goto out; |
@@ -821,18 +812,17 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
821 | if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) | 812 | if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) |
822 | goto out; | 813 | goto out; |
823 | 814 | ||
824 | spin_lock(&kvm->lock); | 815 | /* If nothing is dirty, don't bother messing with page tables. */ |
825 | kvm_mmu_slot_remove_write_access(kvm, log->slot); | 816 | if (any) { |
826 | kvm_flush_remote_tlbs(kvm); | 817 | kvm_mmu_slot_remove_write_access(kvm, log->slot); |
827 | memset(memslot->dirty_bitmap, 0, n); | 818 | kvm_flush_remote_tlbs(kvm); |
828 | spin_unlock(&kvm->lock); | 819 | memset(memslot->dirty_bitmap, 0, n); |
820 | } | ||
829 | 821 | ||
830 | r = 0; | 822 | r = 0; |
831 | 823 | ||
832 | out: | 824 | out: |
833 | spin_lock(&kvm->lock); | 825 | mutex_unlock(&kvm->lock); |
834 | --kvm->busy; | ||
835 | spin_unlock(&kvm->lock); | ||
836 | return r; | 826 | return r; |
837 | } | 827 | } |
838 | 828 | ||
@@ -862,7 +852,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
862 | < alias->target_phys_addr) | 852 | < alias->target_phys_addr) |
863 | goto out; | 853 | goto out; |
864 | 854 | ||
865 | spin_lock(&kvm->lock); | 855 | mutex_lock(&kvm->lock); |
866 | 856 | ||
867 | p = &kvm->aliases[alias->slot]; | 857 | p = &kvm->aliases[alias->slot]; |
868 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; | 858 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; |
@@ -876,7 +866,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
876 | 866 | ||
877 | kvm_mmu_zap_all(kvm); | 867 | kvm_mmu_zap_all(kvm); |
878 | 868 | ||
879 | spin_unlock(&kvm->lock); | 869 | mutex_unlock(&kvm->lock); |
880 | 870 | ||
881 | return 0; | 871 | return 0; |
882 | 872 | ||
@@ -884,6 +874,63 @@ out: | |||
884 | return r; | 874 | return r; |
885 | } | 875 | } |
886 | 876 | ||
877 | static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | ||
878 | { | ||
879 | int r; | ||
880 | |||
881 | r = 0; | ||
882 | switch (chip->chip_id) { | ||
883 | case KVM_IRQCHIP_PIC_MASTER: | ||
884 | memcpy (&chip->chip.pic, | ||
885 | &pic_irqchip(kvm)->pics[0], | ||
886 | sizeof(struct kvm_pic_state)); | ||
887 | break; | ||
888 | case KVM_IRQCHIP_PIC_SLAVE: | ||
889 | memcpy (&chip->chip.pic, | ||
890 | &pic_irqchip(kvm)->pics[1], | ||
891 | sizeof(struct kvm_pic_state)); | ||
892 | break; | ||
893 | case KVM_IRQCHIP_IOAPIC: | ||
894 | memcpy (&chip->chip.ioapic, | ||
895 | ioapic_irqchip(kvm), | ||
896 | sizeof(struct kvm_ioapic_state)); | ||
897 | break; | ||
898 | default: | ||
899 | r = -EINVAL; | ||
900 | break; | ||
901 | } | ||
902 | return r; | ||
903 | } | ||
904 | |||
905 | static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | ||
906 | { | ||
907 | int r; | ||
908 | |||
909 | r = 0; | ||
910 | switch (chip->chip_id) { | ||
911 | case KVM_IRQCHIP_PIC_MASTER: | ||
912 | memcpy (&pic_irqchip(kvm)->pics[0], | ||
913 | &chip->chip.pic, | ||
914 | sizeof(struct kvm_pic_state)); | ||
915 | break; | ||
916 | case KVM_IRQCHIP_PIC_SLAVE: | ||
917 | memcpy (&pic_irqchip(kvm)->pics[1], | ||
918 | &chip->chip.pic, | ||
919 | sizeof(struct kvm_pic_state)); | ||
920 | break; | ||
921 | case KVM_IRQCHIP_IOAPIC: | ||
922 | memcpy (ioapic_irqchip(kvm), | ||
923 | &chip->chip.ioapic, | ||
924 | sizeof(struct kvm_ioapic_state)); | ||
925 | break; | ||
926 | default: | ||
927 | r = -EINVAL; | ||
928 | break; | ||
929 | } | ||
930 | kvm_pic_update_irq(pic_irqchip(kvm)); | ||
931 | return r; | ||
932 | } | ||
933 | |||
887 | static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | 934 | static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) |
888 | { | 935 | { |
889 | int i; | 936 | int i; |
@@ -930,37 +977,26 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | |||
930 | } | 977 | } |
931 | EXPORT_SYMBOL_GPL(gfn_to_page); | 978 | EXPORT_SYMBOL_GPL(gfn_to_page); |
932 | 979 | ||
980 | /* WARNING: Does not work on aliased pages. */ | ||
933 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | 981 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn) |
934 | { | 982 | { |
935 | int i; | ||
936 | struct kvm_memory_slot *memslot; | 983 | struct kvm_memory_slot *memslot; |
937 | unsigned long rel_gfn; | ||
938 | 984 | ||
939 | for (i = 0; i < kvm->nmemslots; ++i) { | 985 | memslot = __gfn_to_memslot(kvm, gfn); |
940 | memslot = &kvm->memslots[i]; | 986 | if (memslot && memslot->dirty_bitmap) { |
941 | 987 | unsigned long rel_gfn = gfn - memslot->base_gfn; | |
942 | if (gfn >= memslot->base_gfn | ||
943 | && gfn < memslot->base_gfn + memslot->npages) { | ||
944 | 988 | ||
945 | if (!memslot->dirty_bitmap) | 989 | /* avoid RMW */ |
946 | return; | 990 | if (!test_bit(rel_gfn, memslot->dirty_bitmap)) |
947 | 991 | set_bit(rel_gfn, memslot->dirty_bitmap); | |
948 | rel_gfn = gfn - memslot->base_gfn; | ||
949 | |||
950 | /* avoid RMW */ | ||
951 | if (!test_bit(rel_gfn, memslot->dirty_bitmap)) | ||
952 | set_bit(rel_gfn, memslot->dirty_bitmap); | ||
953 | return; | ||
954 | } | ||
955 | } | 992 | } |
956 | } | 993 | } |
957 | 994 | ||
958 | static int emulator_read_std(unsigned long addr, | 995 | int emulator_read_std(unsigned long addr, |
959 | void *val, | 996 | void *val, |
960 | unsigned int bytes, | 997 | unsigned int bytes, |
961 | struct x86_emulate_ctxt *ctxt) | 998 | struct kvm_vcpu *vcpu) |
962 | { | 999 | { |
963 | struct kvm_vcpu *vcpu = ctxt->vcpu; | ||
964 | void *data = val; | 1000 | void *data = val; |
965 | 1001 | ||
966 | while (bytes) { | 1002 | while (bytes) { |
@@ -990,26 +1026,42 @@ static int emulator_read_std(unsigned long addr, | |||
990 | 1026 | ||
991 | return X86EMUL_CONTINUE; | 1027 | return X86EMUL_CONTINUE; |
992 | } | 1028 | } |
1029 | EXPORT_SYMBOL_GPL(emulator_read_std); | ||
993 | 1030 | ||
994 | static int emulator_write_std(unsigned long addr, | 1031 | static int emulator_write_std(unsigned long addr, |
995 | const void *val, | 1032 | const void *val, |
996 | unsigned int bytes, | 1033 | unsigned int bytes, |
997 | struct x86_emulate_ctxt *ctxt) | 1034 | struct kvm_vcpu *vcpu) |
998 | { | 1035 | { |
999 | printk(KERN_ERR "emulator_write_std: addr %lx n %d\n", | 1036 | pr_unimpl(vcpu, "emulator_write_std: addr %lx n %d\n", addr, bytes); |
1000 | addr, bytes); | ||
1001 | return X86EMUL_UNHANDLEABLE; | 1037 | return X86EMUL_UNHANDLEABLE; |
1002 | } | 1038 | } |
1003 | 1039 | ||
1040 | /* | ||
1041 | * Only apic need an MMIO device hook, so shortcut now.. | ||
1042 | */ | ||
1043 | static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu, | ||
1044 | gpa_t addr) | ||
1045 | { | ||
1046 | struct kvm_io_device *dev; | ||
1047 | |||
1048 | if (vcpu->apic) { | ||
1049 | dev = &vcpu->apic->dev; | ||
1050 | if (dev->in_range(dev, addr)) | ||
1051 | return dev; | ||
1052 | } | ||
1053 | return NULL; | ||
1054 | } | ||
1055 | |||
1004 | static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, | 1056 | static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, |
1005 | gpa_t addr) | 1057 | gpa_t addr) |
1006 | { | 1058 | { |
1007 | /* | 1059 | struct kvm_io_device *dev; |
1008 | * Note that its important to have this wrapper function because | 1060 | |
1009 | * in the very near future we will be checking for MMIOs against | 1061 | dev = vcpu_find_pervcpu_dev(vcpu, addr); |
1010 | * the LAPIC as well as the general MMIO bus | 1062 | if (dev == NULL) |
1011 | */ | 1063 | dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); |
1012 | return kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); | 1064 | return dev; |
1013 | } | 1065 | } |
1014 | 1066 | ||
1015 | static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, | 1067 | static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, |
@@ -1021,9 +1073,8 @@ static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, | |||
1021 | static int emulator_read_emulated(unsigned long addr, | 1073 | static int emulator_read_emulated(unsigned long addr, |
1022 | void *val, | 1074 | void *val, |
1023 | unsigned int bytes, | 1075 | unsigned int bytes, |
1024 | struct x86_emulate_ctxt *ctxt) | 1076 | struct kvm_vcpu *vcpu) |
1025 | { | 1077 | { |
1026 | struct kvm_vcpu *vcpu = ctxt->vcpu; | ||
1027 | struct kvm_io_device *mmio_dev; | 1078 | struct kvm_io_device *mmio_dev; |
1028 | gpa_t gpa; | 1079 | gpa_t gpa; |
1029 | 1080 | ||
@@ -1031,7 +1082,7 @@ static int emulator_read_emulated(unsigned long addr, | |||
1031 | memcpy(val, vcpu->mmio_data, bytes); | 1082 | memcpy(val, vcpu->mmio_data, bytes); |
1032 | vcpu->mmio_read_completed = 0; | 1083 | vcpu->mmio_read_completed = 0; |
1033 | return X86EMUL_CONTINUE; | 1084 | return X86EMUL_CONTINUE; |
1034 | } else if (emulator_read_std(addr, val, bytes, ctxt) | 1085 | } else if (emulator_read_std(addr, val, bytes, vcpu) |
1035 | == X86EMUL_CONTINUE) | 1086 | == X86EMUL_CONTINUE) |
1036 | return X86EMUL_CONTINUE; | 1087 | return X86EMUL_CONTINUE; |
1037 | 1088 | ||
@@ -1061,7 +1112,6 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1061 | { | 1112 | { |
1062 | struct page *page; | 1113 | struct page *page; |
1063 | void *virt; | 1114 | void *virt; |
1064 | unsigned offset = offset_in_page(gpa); | ||
1065 | 1115 | ||
1066 | if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) | 1116 | if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) |
1067 | return 0; | 1117 | return 0; |
@@ -1070,7 +1120,7 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1070 | return 0; | 1120 | return 0; |
1071 | mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); | 1121 | mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); |
1072 | virt = kmap_atomic(page, KM_USER0); | 1122 | virt = kmap_atomic(page, KM_USER0); |
1073 | kvm_mmu_pte_write(vcpu, gpa, virt + offset, val, bytes); | 1123 | kvm_mmu_pte_write(vcpu, gpa, val, bytes); |
1074 | memcpy(virt + offset_in_page(gpa), val, bytes); | 1124 | memcpy(virt + offset_in_page(gpa), val, bytes); |
1075 | kunmap_atomic(virt, KM_USER0); | 1125 | kunmap_atomic(virt, KM_USER0); |
1076 | return 1; | 1126 | return 1; |
@@ -1079,14 +1129,13 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1079 | static int emulator_write_emulated_onepage(unsigned long addr, | 1129 | static int emulator_write_emulated_onepage(unsigned long addr, |
1080 | const void *val, | 1130 | const void *val, |
1081 | unsigned int bytes, | 1131 | unsigned int bytes, |
1082 | struct x86_emulate_ctxt *ctxt) | 1132 | struct kvm_vcpu *vcpu) |
1083 | { | 1133 | { |
1084 | struct kvm_vcpu *vcpu = ctxt->vcpu; | ||
1085 | struct kvm_io_device *mmio_dev; | 1134 | struct kvm_io_device *mmio_dev; |
1086 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); | 1135 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); |
1087 | 1136 | ||
1088 | if (gpa == UNMAPPED_GVA) { | 1137 | if (gpa == UNMAPPED_GVA) { |
1089 | kvm_arch_ops->inject_page_fault(vcpu, addr, 2); | 1138 | kvm_x86_ops->inject_page_fault(vcpu, addr, 2); |
1090 | return X86EMUL_PROPAGATE_FAULT; | 1139 | return X86EMUL_PROPAGATE_FAULT; |
1091 | } | 1140 | } |
1092 | 1141 | ||
@@ -1111,31 +1160,32 @@ static int emulator_write_emulated_onepage(unsigned long addr, | |||
1111 | return X86EMUL_CONTINUE; | 1160 | return X86EMUL_CONTINUE; |
1112 | } | 1161 | } |
1113 | 1162 | ||
1114 | static int emulator_write_emulated(unsigned long addr, | 1163 | int emulator_write_emulated(unsigned long addr, |
1115 | const void *val, | 1164 | const void *val, |
1116 | unsigned int bytes, | 1165 | unsigned int bytes, |
1117 | struct x86_emulate_ctxt *ctxt) | 1166 | struct kvm_vcpu *vcpu) |
1118 | { | 1167 | { |
1119 | /* Crossing a page boundary? */ | 1168 | /* Crossing a page boundary? */ |
1120 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { | 1169 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { |
1121 | int rc, now; | 1170 | int rc, now; |
1122 | 1171 | ||
1123 | now = -addr & ~PAGE_MASK; | 1172 | now = -addr & ~PAGE_MASK; |
1124 | rc = emulator_write_emulated_onepage(addr, val, now, ctxt); | 1173 | rc = emulator_write_emulated_onepage(addr, val, now, vcpu); |
1125 | if (rc != X86EMUL_CONTINUE) | 1174 | if (rc != X86EMUL_CONTINUE) |
1126 | return rc; | 1175 | return rc; |
1127 | addr += now; | 1176 | addr += now; |
1128 | val += now; | 1177 | val += now; |
1129 | bytes -= now; | 1178 | bytes -= now; |
1130 | } | 1179 | } |
1131 | return emulator_write_emulated_onepage(addr, val, bytes, ctxt); | 1180 | return emulator_write_emulated_onepage(addr, val, bytes, vcpu); |
1132 | } | 1181 | } |
1182 | EXPORT_SYMBOL_GPL(emulator_write_emulated); | ||
1133 | 1183 | ||
1134 | static int emulator_cmpxchg_emulated(unsigned long addr, | 1184 | static int emulator_cmpxchg_emulated(unsigned long addr, |
1135 | const void *old, | 1185 | const void *old, |
1136 | const void *new, | 1186 | const void *new, |
1137 | unsigned int bytes, | 1187 | unsigned int bytes, |
1138 | struct x86_emulate_ctxt *ctxt) | 1188 | struct kvm_vcpu *vcpu) |
1139 | { | 1189 | { |
1140 | static int reported; | 1190 | static int reported; |
1141 | 1191 | ||
@@ -1143,12 +1193,12 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
1143 | reported = 1; | 1193 | reported = 1; |
1144 | printk(KERN_WARNING "kvm: emulating exchange as write\n"); | 1194 | printk(KERN_WARNING "kvm: emulating exchange as write\n"); |
1145 | } | 1195 | } |
1146 | return emulator_write_emulated(addr, new, bytes, ctxt); | 1196 | return emulator_write_emulated(addr, new, bytes, vcpu); |
1147 | } | 1197 | } |
1148 | 1198 | ||
1149 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) | 1199 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) |
1150 | { | 1200 | { |
1151 | return kvm_arch_ops->get_segment_base(vcpu, seg); | 1201 | return kvm_x86_ops->get_segment_base(vcpu, seg); |
1152 | } | 1202 | } |
1153 | 1203 | ||
1154 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) | 1204 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) |
@@ -1158,10 +1208,8 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) | |||
1158 | 1208 | ||
1159 | int emulate_clts(struct kvm_vcpu *vcpu) | 1209 | int emulate_clts(struct kvm_vcpu *vcpu) |
1160 | { | 1210 | { |
1161 | unsigned long cr0; | 1211 | vcpu->cr0 &= ~X86_CR0_TS; |
1162 | 1212 | kvm_x86_ops->set_cr0(vcpu, vcpu->cr0); | |
1163 | cr0 = vcpu->cr0 & ~CR0_TS_MASK; | ||
1164 | kvm_arch_ops->set_cr0(vcpu, cr0); | ||
1165 | return X86EMUL_CONTINUE; | 1213 | return X86EMUL_CONTINUE; |
1166 | } | 1214 | } |
1167 | 1215 | ||
@@ -1171,11 +1219,10 @@ int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, unsigned long *dest) | |||
1171 | 1219 | ||
1172 | switch (dr) { | 1220 | switch (dr) { |
1173 | case 0 ... 3: | 1221 | case 0 ... 3: |
1174 | *dest = kvm_arch_ops->get_dr(vcpu, dr); | 1222 | *dest = kvm_x86_ops->get_dr(vcpu, dr); |
1175 | return X86EMUL_CONTINUE; | 1223 | return X86EMUL_CONTINUE; |
1176 | default: | 1224 | default: |
1177 | printk(KERN_DEBUG "%s: unexpected dr %u\n", | 1225 | pr_unimpl(vcpu, "%s: unexpected dr %u\n", __FUNCTION__, dr); |
1178 | __FUNCTION__, dr); | ||
1179 | return X86EMUL_UNHANDLEABLE; | 1226 | return X86EMUL_UNHANDLEABLE; |
1180 | } | 1227 | } |
1181 | } | 1228 | } |
@@ -1185,7 +1232,7 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | |||
1185 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; | 1232 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; |
1186 | int exception; | 1233 | int exception; |
1187 | 1234 | ||
1188 | kvm_arch_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception); | 1235 | kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception); |
1189 | if (exception) { | 1236 | if (exception) { |
1190 | /* FIXME: better handling */ | 1237 | /* FIXME: better handling */ |
1191 | return X86EMUL_UNHANDLEABLE; | 1238 | return X86EMUL_UNHANDLEABLE; |
@@ -1193,25 +1240,25 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | |||
1193 | return X86EMUL_CONTINUE; | 1240 | return X86EMUL_CONTINUE; |
1194 | } | 1241 | } |
1195 | 1242 | ||
1196 | static void report_emulation_failure(struct x86_emulate_ctxt *ctxt) | 1243 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) |
1197 | { | 1244 | { |
1198 | static int reported; | 1245 | static int reported; |
1199 | u8 opcodes[4]; | 1246 | u8 opcodes[4]; |
1200 | unsigned long rip = ctxt->vcpu->rip; | 1247 | unsigned long rip = vcpu->rip; |
1201 | unsigned long rip_linear; | 1248 | unsigned long rip_linear; |
1202 | 1249 | ||
1203 | rip_linear = rip + get_segment_base(ctxt->vcpu, VCPU_SREG_CS); | 1250 | rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); |
1204 | 1251 | ||
1205 | if (reported) | 1252 | if (reported) |
1206 | return; | 1253 | return; |
1207 | 1254 | ||
1208 | emulator_read_std(rip_linear, (void *)opcodes, 4, ctxt); | 1255 | emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu); |
1209 | 1256 | ||
1210 | printk(KERN_ERR "emulation failed but !mmio_needed?" | 1257 | printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", |
1211 | " rip %lx %02x %02x %02x %02x\n", | 1258 | context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); |
1212 | rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); | ||
1213 | reported = 1; | 1259 | reported = 1; |
1214 | } | 1260 | } |
1261 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); | ||
1215 | 1262 | ||
1216 | struct x86_emulate_ops emulate_ops = { | 1263 | struct x86_emulate_ops emulate_ops = { |
1217 | .read_std = emulator_read_std, | 1264 | .read_std = emulator_read_std, |
@@ -1231,12 +1278,12 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
1231 | int cs_db, cs_l; | 1278 | int cs_db, cs_l; |
1232 | 1279 | ||
1233 | vcpu->mmio_fault_cr2 = cr2; | 1280 | vcpu->mmio_fault_cr2 = cr2; |
1234 | kvm_arch_ops->cache_regs(vcpu); | 1281 | kvm_x86_ops->cache_regs(vcpu); |
1235 | 1282 | ||
1236 | kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 1283 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
1237 | 1284 | ||
1238 | emulate_ctxt.vcpu = vcpu; | 1285 | emulate_ctxt.vcpu = vcpu; |
1239 | emulate_ctxt.eflags = kvm_arch_ops->get_rflags(vcpu); | 1286 | emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); |
1240 | emulate_ctxt.cr2 = cr2; | 1287 | emulate_ctxt.cr2 = cr2; |
1241 | emulate_ctxt.mode = (emulate_ctxt.eflags & X86_EFLAGS_VM) | 1288 | emulate_ctxt.mode = (emulate_ctxt.eflags & X86_EFLAGS_VM) |
1242 | ? X86EMUL_MODE_REAL : cs_l | 1289 | ? X86EMUL_MODE_REAL : cs_l |
@@ -1259,9 +1306,13 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
1259 | emulate_ctxt.fs_base = get_segment_base(vcpu, VCPU_SREG_FS); | 1306 | emulate_ctxt.fs_base = get_segment_base(vcpu, VCPU_SREG_FS); |
1260 | 1307 | ||
1261 | vcpu->mmio_is_write = 0; | 1308 | vcpu->mmio_is_write = 0; |
1309 | vcpu->pio.string = 0; | ||
1262 | r = x86_emulate_memop(&emulate_ctxt, &emulate_ops); | 1310 | r = x86_emulate_memop(&emulate_ctxt, &emulate_ops); |
1311 | if (vcpu->pio.string) | ||
1312 | return EMULATE_DO_MMIO; | ||
1263 | 1313 | ||
1264 | if ((r || vcpu->mmio_is_write) && run) { | 1314 | if ((r || vcpu->mmio_is_write) && run) { |
1315 | run->exit_reason = KVM_EXIT_MMIO; | ||
1265 | run->mmio.phys_addr = vcpu->mmio_phys_addr; | 1316 | run->mmio.phys_addr = vcpu->mmio_phys_addr; |
1266 | memcpy(run->mmio.data, vcpu->mmio_data, 8); | 1317 | memcpy(run->mmio.data, vcpu->mmio_data, 8); |
1267 | run->mmio.len = vcpu->mmio_size; | 1318 | run->mmio.len = vcpu->mmio_size; |
@@ -1272,14 +1323,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
1272 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) | 1323 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) |
1273 | return EMULATE_DONE; | 1324 | return EMULATE_DONE; |
1274 | if (!vcpu->mmio_needed) { | 1325 | if (!vcpu->mmio_needed) { |
1275 | report_emulation_failure(&emulate_ctxt); | 1326 | kvm_report_emulation_failure(vcpu, "mmio"); |
1276 | return EMULATE_FAIL; | 1327 | return EMULATE_FAIL; |
1277 | } | 1328 | } |
1278 | return EMULATE_DO_MMIO; | 1329 | return EMULATE_DO_MMIO; |
1279 | } | 1330 | } |
1280 | 1331 | ||
1281 | kvm_arch_ops->decache_regs(vcpu); | 1332 | kvm_x86_ops->decache_regs(vcpu); |
1282 | kvm_arch_ops->set_rflags(vcpu, emulate_ctxt.eflags); | 1333 | kvm_x86_ops->set_rflags(vcpu, emulate_ctxt.eflags); |
1283 | 1334 | ||
1284 | if (vcpu->mmio_is_write) { | 1335 | if (vcpu->mmio_is_write) { |
1285 | vcpu->mmio_needed = 0; | 1336 | vcpu->mmio_needed = 0; |
@@ -1290,14 +1341,45 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
1290 | } | 1341 | } |
1291 | EXPORT_SYMBOL_GPL(emulate_instruction); | 1342 | EXPORT_SYMBOL_GPL(emulate_instruction); |
1292 | 1343 | ||
1293 | int kvm_emulate_halt(struct kvm_vcpu *vcpu) | 1344 | /* |
1345 | * The vCPU has executed a HLT instruction with in-kernel mode enabled. | ||
1346 | */ | ||
1347 | static void kvm_vcpu_block(struct kvm_vcpu *vcpu) | ||
1294 | { | 1348 | { |
1295 | if (vcpu->irq_summary) | 1349 | DECLARE_WAITQUEUE(wait, current); |
1296 | return 1; | ||
1297 | 1350 | ||
1298 | vcpu->run->exit_reason = KVM_EXIT_HLT; | 1351 | add_wait_queue(&vcpu->wq, &wait); |
1352 | |||
1353 | /* | ||
1354 | * We will block until either an interrupt or a signal wakes us up | ||
1355 | */ | ||
1356 | while (!kvm_cpu_has_interrupt(vcpu) | ||
1357 | && !signal_pending(current) | ||
1358 | && vcpu->mp_state != VCPU_MP_STATE_RUNNABLE | ||
1359 | && vcpu->mp_state != VCPU_MP_STATE_SIPI_RECEIVED) { | ||
1360 | set_current_state(TASK_INTERRUPTIBLE); | ||
1361 | vcpu_put(vcpu); | ||
1362 | schedule(); | ||
1363 | vcpu_load(vcpu); | ||
1364 | } | ||
1365 | |||
1366 | __set_current_state(TASK_RUNNING); | ||
1367 | remove_wait_queue(&vcpu->wq, &wait); | ||
1368 | } | ||
1369 | |||
1370 | int kvm_emulate_halt(struct kvm_vcpu *vcpu) | ||
1371 | { | ||
1299 | ++vcpu->stat.halt_exits; | 1372 | ++vcpu->stat.halt_exits; |
1300 | return 0; | 1373 | if (irqchip_in_kernel(vcpu->kvm)) { |
1374 | vcpu->mp_state = VCPU_MP_STATE_HALTED; | ||
1375 | kvm_vcpu_block(vcpu); | ||
1376 | if (vcpu->mp_state != VCPU_MP_STATE_RUNNABLE) | ||
1377 | return -EINTR; | ||
1378 | return 1; | ||
1379 | } else { | ||
1380 | vcpu->run->exit_reason = KVM_EXIT_HLT; | ||
1381 | return 0; | ||
1382 | } | ||
1301 | } | 1383 | } |
1302 | EXPORT_SYMBOL_GPL(kvm_emulate_halt); | 1384 | EXPORT_SYMBOL_GPL(kvm_emulate_halt); |
1303 | 1385 | ||
@@ -1305,7 +1387,7 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
1305 | { | 1387 | { |
1306 | unsigned long nr, a0, a1, a2, a3, a4, a5, ret; | 1388 | unsigned long nr, a0, a1, a2, a3, a4, a5, ret; |
1307 | 1389 | ||
1308 | kvm_arch_ops->cache_regs(vcpu); | 1390 | kvm_x86_ops->cache_regs(vcpu); |
1309 | ret = -KVM_EINVAL; | 1391 | ret = -KVM_EINVAL; |
1310 | #ifdef CONFIG_X86_64 | 1392 | #ifdef CONFIG_X86_64 |
1311 | if (is_long_mode(vcpu)) { | 1393 | if (is_long_mode(vcpu)) { |
@@ -1329,6 +1411,7 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
1329 | } | 1411 | } |
1330 | switch (nr) { | 1412 | switch (nr) { |
1331 | default: | 1413 | default: |
1414 | run->hypercall.nr = nr; | ||
1332 | run->hypercall.args[0] = a0; | 1415 | run->hypercall.args[0] = a0; |
1333 | run->hypercall.args[1] = a1; | 1416 | run->hypercall.args[1] = a1; |
1334 | run->hypercall.args[2] = a2; | 1417 | run->hypercall.args[2] = a2; |
@@ -1337,11 +1420,11 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
1337 | run->hypercall.args[5] = a5; | 1420 | run->hypercall.args[5] = a5; |
1338 | run->hypercall.ret = ret; | 1421 | run->hypercall.ret = ret; |
1339 | run->hypercall.longmode = is_long_mode(vcpu); | 1422 | run->hypercall.longmode = is_long_mode(vcpu); |
1340 | kvm_arch_ops->decache_regs(vcpu); | 1423 | kvm_x86_ops->decache_regs(vcpu); |
1341 | return 0; | 1424 | return 0; |
1342 | } | 1425 | } |
1343 | vcpu->regs[VCPU_REGS_RAX] = ret; | 1426 | vcpu->regs[VCPU_REGS_RAX] = ret; |
1344 | kvm_arch_ops->decache_regs(vcpu); | 1427 | kvm_x86_ops->decache_regs(vcpu); |
1345 | return 1; | 1428 | return 1; |
1346 | } | 1429 | } |
1347 | EXPORT_SYMBOL_GPL(kvm_hypercall); | 1430 | EXPORT_SYMBOL_GPL(kvm_hypercall); |
@@ -1355,26 +1438,26 @@ void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | |||
1355 | { | 1438 | { |
1356 | struct descriptor_table dt = { limit, base }; | 1439 | struct descriptor_table dt = { limit, base }; |
1357 | 1440 | ||
1358 | kvm_arch_ops->set_gdt(vcpu, &dt); | 1441 | kvm_x86_ops->set_gdt(vcpu, &dt); |
1359 | } | 1442 | } |
1360 | 1443 | ||
1361 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | 1444 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) |
1362 | { | 1445 | { |
1363 | struct descriptor_table dt = { limit, base }; | 1446 | struct descriptor_table dt = { limit, base }; |
1364 | 1447 | ||
1365 | kvm_arch_ops->set_idt(vcpu, &dt); | 1448 | kvm_x86_ops->set_idt(vcpu, &dt); |
1366 | } | 1449 | } |
1367 | 1450 | ||
1368 | void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | 1451 | void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, |
1369 | unsigned long *rflags) | 1452 | unsigned long *rflags) |
1370 | { | 1453 | { |
1371 | lmsw(vcpu, msw); | 1454 | lmsw(vcpu, msw); |
1372 | *rflags = kvm_arch_ops->get_rflags(vcpu); | 1455 | *rflags = kvm_x86_ops->get_rflags(vcpu); |
1373 | } | 1456 | } |
1374 | 1457 | ||
1375 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | 1458 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) |
1376 | { | 1459 | { |
1377 | kvm_arch_ops->decache_cr4_guest_bits(vcpu); | 1460 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); |
1378 | switch (cr) { | 1461 | switch (cr) { |
1379 | case 0: | 1462 | case 0: |
1380 | return vcpu->cr0; | 1463 | return vcpu->cr0; |
@@ -1396,7 +1479,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | |||
1396 | switch (cr) { | 1479 | switch (cr) { |
1397 | case 0: | 1480 | case 0: |
1398 | set_cr0(vcpu, mk_cr_64(vcpu->cr0, val)); | 1481 | set_cr0(vcpu, mk_cr_64(vcpu->cr0, val)); |
1399 | *rflags = kvm_arch_ops->get_rflags(vcpu); | 1482 | *rflags = kvm_x86_ops->get_rflags(vcpu); |
1400 | break; | 1483 | break; |
1401 | case 2: | 1484 | case 2: |
1402 | vcpu->cr2 = val; | 1485 | vcpu->cr2 = val; |
@@ -1439,7 +1522,7 @@ static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa) | |||
1439 | 1522 | ||
1440 | mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT); | 1523 | mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT); |
1441 | para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT); | 1524 | para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT); |
1442 | para_state = kmap_atomic(para_state_page, KM_USER0); | 1525 | para_state = kmap(para_state_page); |
1443 | 1526 | ||
1444 | printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version); | 1527 | printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version); |
1445 | printk(KERN_DEBUG ".... size: %d\n", para_state->size); | 1528 | printk(KERN_DEBUG ".... size: %d\n", para_state->size); |
@@ -1470,12 +1553,12 @@ static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa) | |||
1470 | mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT); | 1553 | mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT); |
1471 | hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT), | 1554 | hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT), |
1472 | KM_USER1) + (hypercall_hpa & ~PAGE_MASK); | 1555 | KM_USER1) + (hypercall_hpa & ~PAGE_MASK); |
1473 | kvm_arch_ops->patch_hypercall(vcpu, hypercall); | 1556 | kvm_x86_ops->patch_hypercall(vcpu, hypercall); |
1474 | kunmap_atomic(hypercall, KM_USER1); | 1557 | kunmap_atomic(hypercall, KM_USER1); |
1475 | 1558 | ||
1476 | para_state->ret = 0; | 1559 | para_state->ret = 0; |
1477 | err_kunmap_skip: | 1560 | err_kunmap_skip: |
1478 | kunmap_atomic(para_state, KM_USER0); | 1561 | kunmap(para_state_page); |
1479 | return 0; | 1562 | return 0; |
1480 | err_gp: | 1563 | err_gp: |
1481 | return 1; | 1564 | return 1; |
@@ -1511,7 +1594,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1511 | data = 3; | 1594 | data = 3; |
1512 | break; | 1595 | break; |
1513 | case MSR_IA32_APICBASE: | 1596 | case MSR_IA32_APICBASE: |
1514 | data = vcpu->apic_base; | 1597 | data = kvm_get_apic_base(vcpu); |
1515 | break; | 1598 | break; |
1516 | case MSR_IA32_MISC_ENABLE: | 1599 | case MSR_IA32_MISC_ENABLE: |
1517 | data = vcpu->ia32_misc_enable_msr; | 1600 | data = vcpu->ia32_misc_enable_msr; |
@@ -1522,7 +1605,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1522 | break; | 1605 | break; |
1523 | #endif | 1606 | #endif |
1524 | default: | 1607 | default: |
1525 | printk(KERN_ERR "kvm: unhandled rdmsr: 0x%x\n", msr); | 1608 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); |
1526 | return 1; | 1609 | return 1; |
1527 | } | 1610 | } |
1528 | *pdata = data; | 1611 | *pdata = data; |
@@ -1537,7 +1620,7 @@ EXPORT_SYMBOL_GPL(kvm_get_msr_common); | |||
1537 | */ | 1620 | */ |
1538 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | 1621 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) |
1539 | { | 1622 | { |
1540 | return kvm_arch_ops->get_msr(vcpu, msr_index, pdata); | 1623 | return kvm_x86_ops->get_msr(vcpu, msr_index, pdata); |
1541 | } | 1624 | } |
1542 | 1625 | ||
1543 | #ifdef CONFIG_X86_64 | 1626 | #ifdef CONFIG_X86_64 |
@@ -1558,7 +1641,7 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
1558 | return; | 1641 | return; |
1559 | } | 1642 | } |
1560 | 1643 | ||
1561 | kvm_arch_ops->set_efer(vcpu, efer); | 1644 | kvm_x86_ops->set_efer(vcpu, efer); |
1562 | 1645 | ||
1563 | efer &= ~EFER_LMA; | 1646 | efer &= ~EFER_LMA; |
1564 | efer |= vcpu->shadow_efer & EFER_LMA; | 1647 | efer |= vcpu->shadow_efer & EFER_LMA; |
@@ -1577,11 +1660,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1577 | break; | 1660 | break; |
1578 | #endif | 1661 | #endif |
1579 | case MSR_IA32_MC0_STATUS: | 1662 | case MSR_IA32_MC0_STATUS: |
1580 | printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", | 1663 | pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", |
1581 | __FUNCTION__, data); | 1664 | __FUNCTION__, data); |
1582 | break; | 1665 | break; |
1583 | case MSR_IA32_MCG_STATUS: | 1666 | case MSR_IA32_MCG_STATUS: |
1584 | printk(KERN_WARNING "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", | 1667 | pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", |
1585 | __FUNCTION__, data); | 1668 | __FUNCTION__, data); |
1586 | break; | 1669 | break; |
1587 | case MSR_IA32_UCODE_REV: | 1670 | case MSR_IA32_UCODE_REV: |
@@ -1589,7 +1672,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1589 | case 0x200 ... 0x2ff: /* MTRRs */ | 1672 | case 0x200 ... 0x2ff: /* MTRRs */ |
1590 | break; | 1673 | break; |
1591 | case MSR_IA32_APICBASE: | 1674 | case MSR_IA32_APICBASE: |
1592 | vcpu->apic_base = data; | 1675 | kvm_set_apic_base(vcpu, data); |
1593 | break; | 1676 | break; |
1594 | case MSR_IA32_MISC_ENABLE: | 1677 | case MSR_IA32_MISC_ENABLE: |
1595 | vcpu->ia32_misc_enable_msr = data; | 1678 | vcpu->ia32_misc_enable_msr = data; |
@@ -1601,7 +1684,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1601 | return vcpu_register_para(vcpu, data); | 1684 | return vcpu_register_para(vcpu, data); |
1602 | 1685 | ||
1603 | default: | 1686 | default: |
1604 | printk(KERN_ERR "kvm: unhandled wrmsr: 0x%x\n", msr); | 1687 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr); |
1605 | return 1; | 1688 | return 1; |
1606 | } | 1689 | } |
1607 | return 0; | 1690 | return 0; |
@@ -1615,44 +1698,24 @@ EXPORT_SYMBOL_GPL(kvm_set_msr_common); | |||
1615 | */ | 1698 | */ |
1616 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | 1699 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) |
1617 | { | 1700 | { |
1618 | return kvm_arch_ops->set_msr(vcpu, msr_index, data); | 1701 | return kvm_x86_ops->set_msr(vcpu, msr_index, data); |
1619 | } | 1702 | } |
1620 | 1703 | ||
1621 | void kvm_resched(struct kvm_vcpu *vcpu) | 1704 | void kvm_resched(struct kvm_vcpu *vcpu) |
1622 | { | 1705 | { |
1623 | if (!need_resched()) | 1706 | if (!need_resched()) |
1624 | return; | 1707 | return; |
1625 | vcpu_put(vcpu); | ||
1626 | cond_resched(); | 1708 | cond_resched(); |
1627 | vcpu_load(vcpu); | ||
1628 | } | 1709 | } |
1629 | EXPORT_SYMBOL_GPL(kvm_resched); | 1710 | EXPORT_SYMBOL_GPL(kvm_resched); |
1630 | 1711 | ||
1631 | void load_msrs(struct vmx_msr_entry *e, int n) | ||
1632 | { | ||
1633 | int i; | ||
1634 | |||
1635 | for (i = 0; i < n; ++i) | ||
1636 | wrmsrl(e[i].index, e[i].data); | ||
1637 | } | ||
1638 | EXPORT_SYMBOL_GPL(load_msrs); | ||
1639 | |||
1640 | void save_msrs(struct vmx_msr_entry *e, int n) | ||
1641 | { | ||
1642 | int i; | ||
1643 | |||
1644 | for (i = 0; i < n; ++i) | ||
1645 | rdmsrl(e[i].index, e[i].data); | ||
1646 | } | ||
1647 | EXPORT_SYMBOL_GPL(save_msrs); | ||
1648 | |||
1649 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | 1712 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) |
1650 | { | 1713 | { |
1651 | int i; | 1714 | int i; |
1652 | u32 function; | 1715 | u32 function; |
1653 | struct kvm_cpuid_entry *e, *best; | 1716 | struct kvm_cpuid_entry *e, *best; |
1654 | 1717 | ||
1655 | kvm_arch_ops->cache_regs(vcpu); | 1718 | kvm_x86_ops->cache_regs(vcpu); |
1656 | function = vcpu->regs[VCPU_REGS_RAX]; | 1719 | function = vcpu->regs[VCPU_REGS_RAX]; |
1657 | vcpu->regs[VCPU_REGS_RAX] = 0; | 1720 | vcpu->regs[VCPU_REGS_RAX] = 0; |
1658 | vcpu->regs[VCPU_REGS_RBX] = 0; | 1721 | vcpu->regs[VCPU_REGS_RBX] = 0; |
@@ -1678,8 +1741,8 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | |||
1678 | vcpu->regs[VCPU_REGS_RCX] = best->ecx; | 1741 | vcpu->regs[VCPU_REGS_RCX] = best->ecx; |
1679 | vcpu->regs[VCPU_REGS_RDX] = best->edx; | 1742 | vcpu->regs[VCPU_REGS_RDX] = best->edx; |
1680 | } | 1743 | } |
1681 | kvm_arch_ops->decache_regs(vcpu); | 1744 | kvm_x86_ops->decache_regs(vcpu); |
1682 | kvm_arch_ops->skip_emulated_instruction(vcpu); | 1745 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
1683 | } | 1746 | } |
1684 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | 1747 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); |
1685 | 1748 | ||
@@ -1690,11 +1753,9 @@ static int pio_copy_data(struct kvm_vcpu *vcpu) | |||
1690 | unsigned bytes; | 1753 | unsigned bytes; |
1691 | int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1; | 1754 | int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1; |
1692 | 1755 | ||
1693 | kvm_arch_ops->vcpu_put(vcpu); | ||
1694 | q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE, | 1756 | q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE, |
1695 | PAGE_KERNEL); | 1757 | PAGE_KERNEL); |
1696 | if (!q) { | 1758 | if (!q) { |
1697 | kvm_arch_ops->vcpu_load(vcpu); | ||
1698 | free_pio_guest_pages(vcpu); | 1759 | free_pio_guest_pages(vcpu); |
1699 | return -ENOMEM; | 1760 | return -ENOMEM; |
1700 | } | 1761 | } |
@@ -1706,7 +1767,6 @@ static int pio_copy_data(struct kvm_vcpu *vcpu) | |||
1706 | memcpy(p, q, bytes); | 1767 | memcpy(p, q, bytes); |
1707 | q -= vcpu->pio.guest_page_offset; | 1768 | q -= vcpu->pio.guest_page_offset; |
1708 | vunmap(q); | 1769 | vunmap(q); |
1709 | kvm_arch_ops->vcpu_load(vcpu); | ||
1710 | free_pio_guest_pages(vcpu); | 1770 | free_pio_guest_pages(vcpu); |
1711 | return 0; | 1771 | return 0; |
1712 | } | 1772 | } |
@@ -1717,7 +1777,7 @@ static int complete_pio(struct kvm_vcpu *vcpu) | |||
1717 | long delta; | 1777 | long delta; |
1718 | int r; | 1778 | int r; |
1719 | 1779 | ||
1720 | kvm_arch_ops->cache_regs(vcpu); | 1780 | kvm_x86_ops->cache_regs(vcpu); |
1721 | 1781 | ||
1722 | if (!io->string) { | 1782 | if (!io->string) { |
1723 | if (io->in) | 1783 | if (io->in) |
@@ -1727,7 +1787,7 @@ static int complete_pio(struct kvm_vcpu *vcpu) | |||
1727 | if (io->in) { | 1787 | if (io->in) { |
1728 | r = pio_copy_data(vcpu); | 1788 | r = pio_copy_data(vcpu); |
1729 | if (r) { | 1789 | if (r) { |
1730 | kvm_arch_ops->cache_regs(vcpu); | 1790 | kvm_x86_ops->cache_regs(vcpu); |
1731 | return r; | 1791 | return r; |
1732 | } | 1792 | } |
1733 | } | 1793 | } |
@@ -1750,79 +1810,109 @@ static int complete_pio(struct kvm_vcpu *vcpu) | |||
1750 | vcpu->regs[VCPU_REGS_RSI] += delta; | 1810 | vcpu->regs[VCPU_REGS_RSI] += delta; |
1751 | } | 1811 | } |
1752 | 1812 | ||
1753 | kvm_arch_ops->decache_regs(vcpu); | 1813 | kvm_x86_ops->decache_regs(vcpu); |
1754 | 1814 | ||
1755 | io->count -= io->cur_count; | 1815 | io->count -= io->cur_count; |
1756 | io->cur_count = 0; | 1816 | io->cur_count = 0; |
1757 | 1817 | ||
1758 | if (!io->count) | ||
1759 | kvm_arch_ops->skip_emulated_instruction(vcpu); | ||
1760 | return 0; | 1818 | return 0; |
1761 | } | 1819 | } |
1762 | 1820 | ||
1763 | void kernel_pio(struct kvm_io_device *pio_dev, struct kvm_vcpu *vcpu) | 1821 | static void kernel_pio(struct kvm_io_device *pio_dev, |
1822 | struct kvm_vcpu *vcpu, | ||
1823 | void *pd) | ||
1764 | { | 1824 | { |
1765 | /* TODO: String I/O for in kernel device */ | 1825 | /* TODO: String I/O for in kernel device */ |
1766 | 1826 | ||
1827 | mutex_lock(&vcpu->kvm->lock); | ||
1767 | if (vcpu->pio.in) | 1828 | if (vcpu->pio.in) |
1768 | kvm_iodevice_read(pio_dev, vcpu->pio.port, | 1829 | kvm_iodevice_read(pio_dev, vcpu->pio.port, |
1769 | vcpu->pio.size, | 1830 | vcpu->pio.size, |
1770 | vcpu->pio_data); | 1831 | pd); |
1771 | else | 1832 | else |
1772 | kvm_iodevice_write(pio_dev, vcpu->pio.port, | 1833 | kvm_iodevice_write(pio_dev, vcpu->pio.port, |
1773 | vcpu->pio.size, | 1834 | vcpu->pio.size, |
1774 | vcpu->pio_data); | 1835 | pd); |
1836 | mutex_unlock(&vcpu->kvm->lock); | ||
1837 | } | ||
1838 | |||
1839 | static void pio_string_write(struct kvm_io_device *pio_dev, | ||
1840 | struct kvm_vcpu *vcpu) | ||
1841 | { | ||
1842 | struct kvm_pio_request *io = &vcpu->pio; | ||
1843 | void *pd = vcpu->pio_data; | ||
1844 | int i; | ||
1845 | |||
1846 | mutex_lock(&vcpu->kvm->lock); | ||
1847 | for (i = 0; i < io->cur_count; i++) { | ||
1848 | kvm_iodevice_write(pio_dev, io->port, | ||
1849 | io->size, | ||
1850 | pd); | ||
1851 | pd += io->size; | ||
1852 | } | ||
1853 | mutex_unlock(&vcpu->kvm->lock); | ||
1775 | } | 1854 | } |
1776 | 1855 | ||
1777 | int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | 1856 | int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in, |
1778 | int size, unsigned long count, int string, int down, | 1857 | int size, unsigned port) |
1858 | { | ||
1859 | struct kvm_io_device *pio_dev; | ||
1860 | |||
1861 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
1862 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | ||
1863 | vcpu->run->io.size = vcpu->pio.size = size; | ||
1864 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
1865 | vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = 1; | ||
1866 | vcpu->run->io.port = vcpu->pio.port = port; | ||
1867 | vcpu->pio.in = in; | ||
1868 | vcpu->pio.string = 0; | ||
1869 | vcpu->pio.down = 0; | ||
1870 | vcpu->pio.guest_page_offset = 0; | ||
1871 | vcpu->pio.rep = 0; | ||
1872 | |||
1873 | kvm_x86_ops->cache_regs(vcpu); | ||
1874 | memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); | ||
1875 | kvm_x86_ops->decache_regs(vcpu); | ||
1876 | |||
1877 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
1878 | |||
1879 | pio_dev = vcpu_find_pio_dev(vcpu, port); | ||
1880 | if (pio_dev) { | ||
1881 | kernel_pio(pio_dev, vcpu, vcpu->pio_data); | ||
1882 | complete_pio(vcpu); | ||
1883 | return 1; | ||
1884 | } | ||
1885 | return 0; | ||
1886 | } | ||
1887 | EXPORT_SYMBOL_GPL(kvm_emulate_pio); | ||
1888 | |||
1889 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | ||
1890 | int size, unsigned long count, int down, | ||
1779 | gva_t address, int rep, unsigned port) | 1891 | gva_t address, int rep, unsigned port) |
1780 | { | 1892 | { |
1781 | unsigned now, in_page; | 1893 | unsigned now, in_page; |
1782 | int i; | 1894 | int i, ret = 0; |
1783 | int nr_pages = 1; | 1895 | int nr_pages = 1; |
1784 | struct page *page; | 1896 | struct page *page; |
1785 | struct kvm_io_device *pio_dev; | 1897 | struct kvm_io_device *pio_dev; |
1786 | 1898 | ||
1787 | vcpu->run->exit_reason = KVM_EXIT_IO; | 1899 | vcpu->run->exit_reason = KVM_EXIT_IO; |
1788 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | 1900 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; |
1789 | vcpu->run->io.size = size; | 1901 | vcpu->run->io.size = vcpu->pio.size = size; |
1790 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | 1902 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; |
1791 | vcpu->run->io.count = count; | 1903 | vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = count; |
1792 | vcpu->run->io.port = port; | 1904 | vcpu->run->io.port = vcpu->pio.port = port; |
1793 | vcpu->pio.count = count; | ||
1794 | vcpu->pio.cur_count = count; | ||
1795 | vcpu->pio.size = size; | ||
1796 | vcpu->pio.in = in; | 1905 | vcpu->pio.in = in; |
1797 | vcpu->pio.port = port; | 1906 | vcpu->pio.string = 1; |
1798 | vcpu->pio.string = string; | ||
1799 | vcpu->pio.down = down; | 1907 | vcpu->pio.down = down; |
1800 | vcpu->pio.guest_page_offset = offset_in_page(address); | 1908 | vcpu->pio.guest_page_offset = offset_in_page(address); |
1801 | vcpu->pio.rep = rep; | 1909 | vcpu->pio.rep = rep; |
1802 | 1910 | ||
1803 | pio_dev = vcpu_find_pio_dev(vcpu, port); | ||
1804 | if (!string) { | ||
1805 | kvm_arch_ops->cache_regs(vcpu); | ||
1806 | memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); | ||
1807 | kvm_arch_ops->decache_regs(vcpu); | ||
1808 | if (pio_dev) { | ||
1809 | kernel_pio(pio_dev, vcpu); | ||
1810 | complete_pio(vcpu); | ||
1811 | return 1; | ||
1812 | } | ||
1813 | return 0; | ||
1814 | } | ||
1815 | /* TODO: String I/O for in kernel device */ | ||
1816 | if (pio_dev) | ||
1817 | printk(KERN_ERR "kvm_setup_pio: no string io support\n"); | ||
1818 | |||
1819 | if (!count) { | 1911 | if (!count) { |
1820 | kvm_arch_ops->skip_emulated_instruction(vcpu); | 1912 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
1821 | return 1; | 1913 | return 1; |
1822 | } | 1914 | } |
1823 | 1915 | ||
1824 | now = min(count, PAGE_SIZE / size); | ||
1825 | |||
1826 | if (!down) | 1916 | if (!down) |
1827 | in_page = PAGE_SIZE - offset_in_page(address); | 1917 | in_page = PAGE_SIZE - offset_in_page(address); |
1828 | else | 1918 | else |
@@ -1841,20 +1931,23 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
1841 | /* | 1931 | /* |
1842 | * String I/O in reverse. Yuck. Kill the guest, fix later. | 1932 | * String I/O in reverse. Yuck. Kill the guest, fix later. |
1843 | */ | 1933 | */ |
1844 | printk(KERN_ERR "kvm: guest string pio down\n"); | 1934 | pr_unimpl(vcpu, "guest string pio down\n"); |
1845 | inject_gp(vcpu); | 1935 | inject_gp(vcpu); |
1846 | return 1; | 1936 | return 1; |
1847 | } | 1937 | } |
1848 | vcpu->run->io.count = now; | 1938 | vcpu->run->io.count = now; |
1849 | vcpu->pio.cur_count = now; | 1939 | vcpu->pio.cur_count = now; |
1850 | 1940 | ||
1941 | if (vcpu->pio.cur_count == vcpu->pio.count) | ||
1942 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
1943 | |||
1851 | for (i = 0; i < nr_pages; ++i) { | 1944 | for (i = 0; i < nr_pages; ++i) { |
1852 | spin_lock(&vcpu->kvm->lock); | 1945 | mutex_lock(&vcpu->kvm->lock); |
1853 | page = gva_to_page(vcpu, address + i * PAGE_SIZE); | 1946 | page = gva_to_page(vcpu, address + i * PAGE_SIZE); |
1854 | if (page) | 1947 | if (page) |
1855 | get_page(page); | 1948 | get_page(page); |
1856 | vcpu->pio.guest_pages[i] = page; | 1949 | vcpu->pio.guest_pages[i] = page; |
1857 | spin_unlock(&vcpu->kvm->lock); | 1950 | mutex_unlock(&vcpu->kvm->lock); |
1858 | if (!page) { | 1951 | if (!page) { |
1859 | inject_gp(vcpu); | 1952 | inject_gp(vcpu); |
1860 | free_pio_guest_pages(vcpu); | 1953 | free_pio_guest_pages(vcpu); |
@@ -1862,11 +1955,145 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
1862 | } | 1955 | } |
1863 | } | 1956 | } |
1864 | 1957 | ||
1865 | if (!vcpu->pio.in) | 1958 | pio_dev = vcpu_find_pio_dev(vcpu, port); |
1866 | return pio_copy_data(vcpu); | 1959 | if (!vcpu->pio.in) { |
1867 | return 0; | 1960 | /* string PIO write */ |
1961 | ret = pio_copy_data(vcpu); | ||
1962 | if (ret >= 0 && pio_dev) { | ||
1963 | pio_string_write(pio_dev, vcpu); | ||
1964 | complete_pio(vcpu); | ||
1965 | if (vcpu->pio.count == 0) | ||
1966 | ret = 1; | ||
1967 | } | ||
1968 | } else if (pio_dev) | ||
1969 | pr_unimpl(vcpu, "no string pio read support yet, " | ||
1970 | "port %x size %d count %ld\n", | ||
1971 | port, size, count); | ||
1972 | |||
1973 | return ret; | ||
1974 | } | ||
1975 | EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); | ||
1976 | |||
1977 | /* | ||
1978 | * Check if userspace requested an interrupt window, and that the | ||
1979 | * interrupt window is open. | ||
1980 | * | ||
1981 | * No need to exit to userspace if we already have an interrupt queued. | ||
1982 | */ | ||
1983 | static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, | ||
1984 | struct kvm_run *kvm_run) | ||
1985 | { | ||
1986 | return (!vcpu->irq_summary && | ||
1987 | kvm_run->request_interrupt_window && | ||
1988 | vcpu->interrupt_window_open && | ||
1989 | (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF)); | ||
1990 | } | ||
1991 | |||
1992 | static void post_kvm_run_save(struct kvm_vcpu *vcpu, | ||
1993 | struct kvm_run *kvm_run) | ||
1994 | { | ||
1995 | kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0; | ||
1996 | kvm_run->cr8 = get_cr8(vcpu); | ||
1997 | kvm_run->apic_base = kvm_get_apic_base(vcpu); | ||
1998 | if (irqchip_in_kernel(vcpu->kvm)) | ||
1999 | kvm_run->ready_for_interrupt_injection = 1; | ||
2000 | else | ||
2001 | kvm_run->ready_for_interrupt_injection = | ||
2002 | (vcpu->interrupt_window_open && | ||
2003 | vcpu->irq_summary == 0); | ||
2004 | } | ||
2005 | |||
2006 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
2007 | { | ||
2008 | int r; | ||
2009 | |||
2010 | if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) { | ||
2011 | printk("vcpu %d received sipi with vector # %x\n", | ||
2012 | vcpu->vcpu_id, vcpu->sipi_vector); | ||
2013 | kvm_lapic_reset(vcpu); | ||
2014 | kvm_x86_ops->vcpu_reset(vcpu); | ||
2015 | vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; | ||
2016 | } | ||
2017 | |||
2018 | preempted: | ||
2019 | if (vcpu->guest_debug.enabled) | ||
2020 | kvm_x86_ops->guest_debug_pre(vcpu); | ||
2021 | |||
2022 | again: | ||
2023 | r = kvm_mmu_reload(vcpu); | ||
2024 | if (unlikely(r)) | ||
2025 | goto out; | ||
2026 | |||
2027 | preempt_disable(); | ||
2028 | |||
2029 | kvm_x86_ops->prepare_guest_switch(vcpu); | ||
2030 | kvm_load_guest_fpu(vcpu); | ||
2031 | |||
2032 | local_irq_disable(); | ||
2033 | |||
2034 | if (signal_pending(current)) { | ||
2035 | local_irq_enable(); | ||
2036 | preempt_enable(); | ||
2037 | r = -EINTR; | ||
2038 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
2039 | ++vcpu->stat.signal_exits; | ||
2040 | goto out; | ||
2041 | } | ||
2042 | |||
2043 | if (irqchip_in_kernel(vcpu->kvm)) | ||
2044 | kvm_x86_ops->inject_pending_irq(vcpu); | ||
2045 | else if (!vcpu->mmio_read_completed) | ||
2046 | kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run); | ||
2047 | |||
2048 | vcpu->guest_mode = 1; | ||
2049 | |||
2050 | if (vcpu->requests) | ||
2051 | if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) | ||
2052 | kvm_x86_ops->tlb_flush(vcpu); | ||
2053 | |||
2054 | kvm_x86_ops->run(vcpu, kvm_run); | ||
2055 | |||
2056 | vcpu->guest_mode = 0; | ||
2057 | local_irq_enable(); | ||
2058 | |||
2059 | ++vcpu->stat.exits; | ||
2060 | |||
2061 | preempt_enable(); | ||
2062 | |||
2063 | /* | ||
2064 | * Profile KVM exit RIPs: | ||
2065 | */ | ||
2066 | if (unlikely(prof_on == KVM_PROFILING)) { | ||
2067 | kvm_x86_ops->cache_regs(vcpu); | ||
2068 | profile_hit(KVM_PROFILING, (void *)vcpu->rip); | ||
2069 | } | ||
2070 | |||
2071 | r = kvm_x86_ops->handle_exit(kvm_run, vcpu); | ||
2072 | |||
2073 | if (r > 0) { | ||
2074 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { | ||
2075 | r = -EINTR; | ||
2076 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
2077 | ++vcpu->stat.request_irq_exits; | ||
2078 | goto out; | ||
2079 | } | ||
2080 | if (!need_resched()) { | ||
2081 | ++vcpu->stat.light_exits; | ||
2082 | goto again; | ||
2083 | } | ||
2084 | } | ||
2085 | |||
2086 | out: | ||
2087 | if (r > 0) { | ||
2088 | kvm_resched(vcpu); | ||
2089 | goto preempted; | ||
2090 | } | ||
2091 | |||
2092 | post_kvm_run_save(vcpu, kvm_run); | ||
2093 | |||
2094 | return r; | ||
1868 | } | 2095 | } |
1869 | EXPORT_SYMBOL_GPL(kvm_setup_pio); | 2096 | |
1870 | 2097 | ||
1871 | static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2098 | static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
1872 | { | 2099 | { |
@@ -1875,11 +2102,18 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1875 | 2102 | ||
1876 | vcpu_load(vcpu); | 2103 | vcpu_load(vcpu); |
1877 | 2104 | ||
2105 | if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) { | ||
2106 | kvm_vcpu_block(vcpu); | ||
2107 | vcpu_put(vcpu); | ||
2108 | return -EAGAIN; | ||
2109 | } | ||
2110 | |||
1878 | if (vcpu->sigset_active) | 2111 | if (vcpu->sigset_active) |
1879 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | 2112 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); |
1880 | 2113 | ||
1881 | /* re-sync apic's tpr */ | 2114 | /* re-sync apic's tpr */ |
1882 | vcpu->cr8 = kvm_run->cr8; | 2115 | if (!irqchip_in_kernel(vcpu->kvm)) |
2116 | set_cr8(vcpu, kvm_run->cr8); | ||
1883 | 2117 | ||
1884 | if (vcpu->pio.cur_count) { | 2118 | if (vcpu->pio.cur_count) { |
1885 | r = complete_pio(vcpu); | 2119 | r = complete_pio(vcpu); |
@@ -1897,19 +2131,18 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1897 | /* | 2131 | /* |
1898 | * Read-modify-write. Back to userspace. | 2132 | * Read-modify-write. Back to userspace. |
1899 | */ | 2133 | */ |
1900 | kvm_run->exit_reason = KVM_EXIT_MMIO; | ||
1901 | r = 0; | 2134 | r = 0; |
1902 | goto out; | 2135 | goto out; |
1903 | } | 2136 | } |
1904 | } | 2137 | } |
1905 | 2138 | ||
1906 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { | 2139 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { |
1907 | kvm_arch_ops->cache_regs(vcpu); | 2140 | kvm_x86_ops->cache_regs(vcpu); |
1908 | vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; | 2141 | vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; |
1909 | kvm_arch_ops->decache_regs(vcpu); | 2142 | kvm_x86_ops->decache_regs(vcpu); |
1910 | } | 2143 | } |
1911 | 2144 | ||
1912 | r = kvm_arch_ops->run(vcpu, kvm_run); | 2145 | r = __vcpu_run(vcpu, kvm_run); |
1913 | 2146 | ||
1914 | out: | 2147 | out: |
1915 | if (vcpu->sigset_active) | 2148 | if (vcpu->sigset_active) |
@@ -1924,7 +2157,7 @@ static int kvm_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, | |||
1924 | { | 2157 | { |
1925 | vcpu_load(vcpu); | 2158 | vcpu_load(vcpu); |
1926 | 2159 | ||
1927 | kvm_arch_ops->cache_regs(vcpu); | 2160 | kvm_x86_ops->cache_regs(vcpu); |
1928 | 2161 | ||
1929 | regs->rax = vcpu->regs[VCPU_REGS_RAX]; | 2162 | regs->rax = vcpu->regs[VCPU_REGS_RAX]; |
1930 | regs->rbx = vcpu->regs[VCPU_REGS_RBX]; | 2163 | regs->rbx = vcpu->regs[VCPU_REGS_RBX]; |
@@ -1946,7 +2179,7 @@ static int kvm_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, | |||
1946 | #endif | 2179 | #endif |
1947 | 2180 | ||
1948 | regs->rip = vcpu->rip; | 2181 | regs->rip = vcpu->rip; |
1949 | regs->rflags = kvm_arch_ops->get_rflags(vcpu); | 2182 | regs->rflags = kvm_x86_ops->get_rflags(vcpu); |
1950 | 2183 | ||
1951 | /* | 2184 | /* |
1952 | * Don't leak debug flags in case they were set for guest debugging | 2185 | * Don't leak debug flags in case they were set for guest debugging |
@@ -1984,9 +2217,9 @@ static int kvm_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, | |||
1984 | #endif | 2217 | #endif |
1985 | 2218 | ||
1986 | vcpu->rip = regs->rip; | 2219 | vcpu->rip = regs->rip; |
1987 | kvm_arch_ops->set_rflags(vcpu, regs->rflags); | 2220 | kvm_x86_ops->set_rflags(vcpu, regs->rflags); |
1988 | 2221 | ||
1989 | kvm_arch_ops->decache_regs(vcpu); | 2222 | kvm_x86_ops->decache_regs(vcpu); |
1990 | 2223 | ||
1991 | vcpu_put(vcpu); | 2224 | vcpu_put(vcpu); |
1992 | 2225 | ||
@@ -1996,13 +2229,14 @@ static int kvm_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, | |||
1996 | static void get_segment(struct kvm_vcpu *vcpu, | 2229 | static void get_segment(struct kvm_vcpu *vcpu, |
1997 | struct kvm_segment *var, int seg) | 2230 | struct kvm_segment *var, int seg) |
1998 | { | 2231 | { |
1999 | return kvm_arch_ops->get_segment(vcpu, var, seg); | 2232 | return kvm_x86_ops->get_segment(vcpu, var, seg); |
2000 | } | 2233 | } |
2001 | 2234 | ||
2002 | static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | 2235 | static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, |
2003 | struct kvm_sregs *sregs) | 2236 | struct kvm_sregs *sregs) |
2004 | { | 2237 | { |
2005 | struct descriptor_table dt; | 2238 | struct descriptor_table dt; |
2239 | int pending_vec; | ||
2006 | 2240 | ||
2007 | vcpu_load(vcpu); | 2241 | vcpu_load(vcpu); |
2008 | 2242 | ||
@@ -2016,24 +2250,31 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
2016 | get_segment(vcpu, &sregs->tr, VCPU_SREG_TR); | 2250 | get_segment(vcpu, &sregs->tr, VCPU_SREG_TR); |
2017 | get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); | 2251 | get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); |
2018 | 2252 | ||
2019 | kvm_arch_ops->get_idt(vcpu, &dt); | 2253 | kvm_x86_ops->get_idt(vcpu, &dt); |
2020 | sregs->idt.limit = dt.limit; | 2254 | sregs->idt.limit = dt.limit; |
2021 | sregs->idt.base = dt.base; | 2255 | sregs->idt.base = dt.base; |
2022 | kvm_arch_ops->get_gdt(vcpu, &dt); | 2256 | kvm_x86_ops->get_gdt(vcpu, &dt); |
2023 | sregs->gdt.limit = dt.limit; | 2257 | sregs->gdt.limit = dt.limit; |
2024 | sregs->gdt.base = dt.base; | 2258 | sregs->gdt.base = dt.base; |
2025 | 2259 | ||
2026 | kvm_arch_ops->decache_cr4_guest_bits(vcpu); | 2260 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); |
2027 | sregs->cr0 = vcpu->cr0; | 2261 | sregs->cr0 = vcpu->cr0; |
2028 | sregs->cr2 = vcpu->cr2; | 2262 | sregs->cr2 = vcpu->cr2; |
2029 | sregs->cr3 = vcpu->cr3; | 2263 | sregs->cr3 = vcpu->cr3; |
2030 | sregs->cr4 = vcpu->cr4; | 2264 | sregs->cr4 = vcpu->cr4; |
2031 | sregs->cr8 = vcpu->cr8; | 2265 | sregs->cr8 = get_cr8(vcpu); |
2032 | sregs->efer = vcpu->shadow_efer; | 2266 | sregs->efer = vcpu->shadow_efer; |
2033 | sregs->apic_base = vcpu->apic_base; | 2267 | sregs->apic_base = kvm_get_apic_base(vcpu); |
2034 | 2268 | ||
2035 | memcpy(sregs->interrupt_bitmap, vcpu->irq_pending, | 2269 | if (irqchip_in_kernel(vcpu->kvm)) { |
2036 | sizeof sregs->interrupt_bitmap); | 2270 | memset(sregs->interrupt_bitmap, 0, |
2271 | sizeof sregs->interrupt_bitmap); | ||
2272 | pending_vec = kvm_x86_ops->get_irq(vcpu); | ||
2273 | if (pending_vec >= 0) | ||
2274 | set_bit(pending_vec, (unsigned long *)sregs->interrupt_bitmap); | ||
2275 | } else | ||
2276 | memcpy(sregs->interrupt_bitmap, vcpu->irq_pending, | ||
2277 | sizeof sregs->interrupt_bitmap); | ||
2037 | 2278 | ||
2038 | vcpu_put(vcpu); | 2279 | vcpu_put(vcpu); |
2039 | 2280 | ||
@@ -2043,56 +2284,69 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
2043 | static void set_segment(struct kvm_vcpu *vcpu, | 2284 | static void set_segment(struct kvm_vcpu *vcpu, |
2044 | struct kvm_segment *var, int seg) | 2285 | struct kvm_segment *var, int seg) |
2045 | { | 2286 | { |
2046 | return kvm_arch_ops->set_segment(vcpu, var, seg); | 2287 | return kvm_x86_ops->set_segment(vcpu, var, seg); |
2047 | } | 2288 | } |
2048 | 2289 | ||
2049 | static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | 2290 | static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, |
2050 | struct kvm_sregs *sregs) | 2291 | struct kvm_sregs *sregs) |
2051 | { | 2292 | { |
2052 | int mmu_reset_needed = 0; | 2293 | int mmu_reset_needed = 0; |
2053 | int i; | 2294 | int i, pending_vec, max_bits; |
2054 | struct descriptor_table dt; | 2295 | struct descriptor_table dt; |
2055 | 2296 | ||
2056 | vcpu_load(vcpu); | 2297 | vcpu_load(vcpu); |
2057 | 2298 | ||
2058 | dt.limit = sregs->idt.limit; | 2299 | dt.limit = sregs->idt.limit; |
2059 | dt.base = sregs->idt.base; | 2300 | dt.base = sregs->idt.base; |
2060 | kvm_arch_ops->set_idt(vcpu, &dt); | 2301 | kvm_x86_ops->set_idt(vcpu, &dt); |
2061 | dt.limit = sregs->gdt.limit; | 2302 | dt.limit = sregs->gdt.limit; |
2062 | dt.base = sregs->gdt.base; | 2303 | dt.base = sregs->gdt.base; |
2063 | kvm_arch_ops->set_gdt(vcpu, &dt); | 2304 | kvm_x86_ops->set_gdt(vcpu, &dt); |
2064 | 2305 | ||
2065 | vcpu->cr2 = sregs->cr2; | 2306 | vcpu->cr2 = sregs->cr2; |
2066 | mmu_reset_needed |= vcpu->cr3 != sregs->cr3; | 2307 | mmu_reset_needed |= vcpu->cr3 != sregs->cr3; |
2067 | vcpu->cr3 = sregs->cr3; | 2308 | vcpu->cr3 = sregs->cr3; |
2068 | 2309 | ||
2069 | vcpu->cr8 = sregs->cr8; | 2310 | set_cr8(vcpu, sregs->cr8); |
2070 | 2311 | ||
2071 | mmu_reset_needed |= vcpu->shadow_efer != sregs->efer; | 2312 | mmu_reset_needed |= vcpu->shadow_efer != sregs->efer; |
2072 | #ifdef CONFIG_X86_64 | 2313 | #ifdef CONFIG_X86_64 |
2073 | kvm_arch_ops->set_efer(vcpu, sregs->efer); | 2314 | kvm_x86_ops->set_efer(vcpu, sregs->efer); |
2074 | #endif | 2315 | #endif |
2075 | vcpu->apic_base = sregs->apic_base; | 2316 | kvm_set_apic_base(vcpu, sregs->apic_base); |
2076 | 2317 | ||
2077 | kvm_arch_ops->decache_cr4_guest_bits(vcpu); | 2318 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); |
2078 | 2319 | ||
2079 | mmu_reset_needed |= vcpu->cr0 != sregs->cr0; | 2320 | mmu_reset_needed |= vcpu->cr0 != sregs->cr0; |
2080 | kvm_arch_ops->set_cr0(vcpu, sregs->cr0); | 2321 | vcpu->cr0 = sregs->cr0; |
2322 | kvm_x86_ops->set_cr0(vcpu, sregs->cr0); | ||
2081 | 2323 | ||
2082 | mmu_reset_needed |= vcpu->cr4 != sregs->cr4; | 2324 | mmu_reset_needed |= vcpu->cr4 != sregs->cr4; |
2083 | kvm_arch_ops->set_cr4(vcpu, sregs->cr4); | 2325 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); |
2084 | if (!is_long_mode(vcpu) && is_pae(vcpu)) | 2326 | if (!is_long_mode(vcpu) && is_pae(vcpu)) |
2085 | load_pdptrs(vcpu, vcpu->cr3); | 2327 | load_pdptrs(vcpu, vcpu->cr3); |
2086 | 2328 | ||
2087 | if (mmu_reset_needed) | 2329 | if (mmu_reset_needed) |
2088 | kvm_mmu_reset_context(vcpu); | 2330 | kvm_mmu_reset_context(vcpu); |
2089 | 2331 | ||
2090 | memcpy(vcpu->irq_pending, sregs->interrupt_bitmap, | 2332 | if (!irqchip_in_kernel(vcpu->kvm)) { |
2091 | sizeof vcpu->irq_pending); | 2333 | memcpy(vcpu->irq_pending, sregs->interrupt_bitmap, |
2092 | vcpu->irq_summary = 0; | 2334 | sizeof vcpu->irq_pending); |
2093 | for (i = 0; i < NR_IRQ_WORDS; ++i) | 2335 | vcpu->irq_summary = 0; |
2094 | if (vcpu->irq_pending[i]) | 2336 | for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i) |
2095 | __set_bit(i, &vcpu->irq_summary); | 2337 | if (vcpu->irq_pending[i]) |
2338 | __set_bit(i, &vcpu->irq_summary); | ||
2339 | } else { | ||
2340 | max_bits = (sizeof sregs->interrupt_bitmap) << 3; | ||
2341 | pending_vec = find_first_bit( | ||
2342 | (const unsigned long *)sregs->interrupt_bitmap, | ||
2343 | max_bits); | ||
2344 | /* Only pending external irq is handled here */ | ||
2345 | if (pending_vec < max_bits) { | ||
2346 | kvm_x86_ops->set_irq(vcpu, pending_vec); | ||
2347 | printk("Set back pending irq %d\n", pending_vec); | ||
2348 | } | ||
2349 | } | ||
2096 | 2350 | ||
2097 | set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); | 2351 | set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); |
2098 | set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); | 2352 | set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); |
@@ -2109,6 +2363,16 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
2109 | return 0; | 2363 | return 0; |
2110 | } | 2364 | } |
2111 | 2365 | ||
2366 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | ||
2367 | { | ||
2368 | struct kvm_segment cs; | ||
2369 | |||
2370 | get_segment(vcpu, &cs, VCPU_SREG_CS); | ||
2371 | *db = cs.db; | ||
2372 | *l = cs.l; | ||
2373 | } | ||
2374 | EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); | ||
2375 | |||
2112 | /* | 2376 | /* |
2113 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS | 2377 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS |
2114 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. | 2378 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. |
@@ -2236,13 +2500,13 @@ static int kvm_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
2236 | gpa_t gpa; | 2500 | gpa_t gpa; |
2237 | 2501 | ||
2238 | vcpu_load(vcpu); | 2502 | vcpu_load(vcpu); |
2239 | spin_lock(&vcpu->kvm->lock); | 2503 | mutex_lock(&vcpu->kvm->lock); |
2240 | gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr); | 2504 | gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr); |
2241 | tr->physical_address = gpa; | 2505 | tr->physical_address = gpa; |
2242 | tr->valid = gpa != UNMAPPED_GVA; | 2506 | tr->valid = gpa != UNMAPPED_GVA; |
2243 | tr->writeable = 1; | 2507 | tr->writeable = 1; |
2244 | tr->usermode = 0; | 2508 | tr->usermode = 0; |
2245 | spin_unlock(&vcpu->kvm->lock); | 2509 | mutex_unlock(&vcpu->kvm->lock); |
2246 | vcpu_put(vcpu); | 2510 | vcpu_put(vcpu); |
2247 | 2511 | ||
2248 | return 0; | 2512 | return 0; |
@@ -2253,6 +2517,8 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, | |||
2253 | { | 2517 | { |
2254 | if (irq->irq < 0 || irq->irq >= 256) | 2518 | if (irq->irq < 0 || irq->irq >= 256) |
2255 | return -EINVAL; | 2519 | return -EINVAL; |
2520 | if (irqchip_in_kernel(vcpu->kvm)) | ||
2521 | return -ENXIO; | ||
2256 | vcpu_load(vcpu); | 2522 | vcpu_load(vcpu); |
2257 | 2523 | ||
2258 | set_bit(irq->irq, vcpu->irq_pending); | 2524 | set_bit(irq->irq, vcpu->irq_pending); |
@@ -2270,7 +2536,7 @@ static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, | |||
2270 | 2536 | ||
2271 | vcpu_load(vcpu); | 2537 | vcpu_load(vcpu); |
2272 | 2538 | ||
2273 | r = kvm_arch_ops->set_guest_debug(vcpu, dbg); | 2539 | r = kvm_x86_ops->set_guest_debug(vcpu, dbg); |
2274 | 2540 | ||
2275 | vcpu_put(vcpu); | 2541 | vcpu_put(vcpu); |
2276 | 2542 | ||
@@ -2285,7 +2551,6 @@ static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, | |||
2285 | unsigned long pgoff; | 2551 | unsigned long pgoff; |
2286 | struct page *page; | 2552 | struct page *page; |
2287 | 2553 | ||
2288 | *type = VM_FAULT_MINOR; | ||
2289 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | 2554 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; |
2290 | if (pgoff == 0) | 2555 | if (pgoff == 0) |
2291 | page = virt_to_page(vcpu->run); | 2556 | page = virt_to_page(vcpu->run); |
@@ -2294,6 +2559,9 @@ static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, | |||
2294 | else | 2559 | else |
2295 | return NOPAGE_SIGBUS; | 2560 | return NOPAGE_SIGBUS; |
2296 | get_page(page); | 2561 | get_page(page); |
2562 | if (type != NULL) | ||
2563 | *type = VM_FAULT_MINOR; | ||
2564 | |||
2297 | return page; | 2565 | return page; |
2298 | } | 2566 | } |
2299 | 2567 | ||
@@ -2346,74 +2614,52 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | |||
2346 | { | 2614 | { |
2347 | int r; | 2615 | int r; |
2348 | struct kvm_vcpu *vcpu; | 2616 | struct kvm_vcpu *vcpu; |
2349 | struct page *page; | ||
2350 | 2617 | ||
2351 | r = -EINVAL; | ||
2352 | if (!valid_vcpu(n)) | 2618 | if (!valid_vcpu(n)) |
2353 | goto out; | 2619 | return -EINVAL; |
2354 | |||
2355 | vcpu = &kvm->vcpus[n]; | ||
2356 | |||
2357 | mutex_lock(&vcpu->mutex); | ||
2358 | |||
2359 | if (vcpu->vmcs) { | ||
2360 | mutex_unlock(&vcpu->mutex); | ||
2361 | return -EEXIST; | ||
2362 | } | ||
2363 | |||
2364 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
2365 | r = -ENOMEM; | ||
2366 | if (!page) | ||
2367 | goto out_unlock; | ||
2368 | vcpu->run = page_address(page); | ||
2369 | |||
2370 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
2371 | r = -ENOMEM; | ||
2372 | if (!page) | ||
2373 | goto out_free_run; | ||
2374 | vcpu->pio_data = page_address(page); | ||
2375 | 2620 | ||
2376 | vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, | 2621 | vcpu = kvm_x86_ops->vcpu_create(kvm, n); |
2377 | FX_IMAGE_ALIGN); | 2622 | if (IS_ERR(vcpu)) |
2378 | vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; | 2623 | return PTR_ERR(vcpu); |
2379 | vcpu->cr0 = 0x10; | ||
2380 | 2624 | ||
2381 | r = kvm_arch_ops->vcpu_create(vcpu); | 2625 | preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); |
2382 | if (r < 0) | ||
2383 | goto out_free_vcpus; | ||
2384 | 2626 | ||
2385 | r = kvm_mmu_create(vcpu); | 2627 | /* We do fxsave: this must be aligned. */ |
2386 | if (r < 0) | 2628 | BUG_ON((unsigned long)&vcpu->host_fx_image & 0xF); |
2387 | goto out_free_vcpus; | ||
2388 | 2629 | ||
2389 | kvm_arch_ops->vcpu_load(vcpu); | 2630 | vcpu_load(vcpu); |
2390 | r = kvm_mmu_setup(vcpu); | 2631 | r = kvm_mmu_setup(vcpu); |
2391 | if (r >= 0) | ||
2392 | r = kvm_arch_ops->vcpu_setup(vcpu); | ||
2393 | vcpu_put(vcpu); | 2632 | vcpu_put(vcpu); |
2394 | |||
2395 | if (r < 0) | 2633 | if (r < 0) |
2396 | goto out_free_vcpus; | 2634 | goto free_vcpu; |
2397 | 2635 | ||
2636 | mutex_lock(&kvm->lock); | ||
2637 | if (kvm->vcpus[n]) { | ||
2638 | r = -EEXIST; | ||
2639 | mutex_unlock(&kvm->lock); | ||
2640 | goto mmu_unload; | ||
2641 | } | ||
2642 | kvm->vcpus[n] = vcpu; | ||
2643 | mutex_unlock(&kvm->lock); | ||
2644 | |||
2645 | /* Now it's all set up, let userspace reach it */ | ||
2398 | r = create_vcpu_fd(vcpu); | 2646 | r = create_vcpu_fd(vcpu); |
2399 | if (r < 0) | 2647 | if (r < 0) |
2400 | goto out_free_vcpus; | 2648 | goto unlink; |
2649 | return r; | ||
2401 | 2650 | ||
2402 | spin_lock(&kvm_lock); | 2651 | unlink: |
2403 | if (n >= kvm->nvcpus) | 2652 | mutex_lock(&kvm->lock); |
2404 | kvm->nvcpus = n + 1; | 2653 | kvm->vcpus[n] = NULL; |
2405 | spin_unlock(&kvm_lock); | 2654 | mutex_unlock(&kvm->lock); |
2406 | 2655 | ||
2407 | return r; | 2656 | mmu_unload: |
2657 | vcpu_load(vcpu); | ||
2658 | kvm_mmu_unload(vcpu); | ||
2659 | vcpu_put(vcpu); | ||
2408 | 2660 | ||
2409 | out_free_vcpus: | 2661 | free_vcpu: |
2410 | kvm_free_vcpu(vcpu); | 2662 | kvm_x86_ops->vcpu_free(vcpu); |
2411 | out_free_run: | ||
2412 | free_page((unsigned long)vcpu->run); | ||
2413 | vcpu->run = NULL; | ||
2414 | out_unlock: | ||
2415 | mutex_unlock(&vcpu->mutex); | ||
2416 | out: | ||
2417 | return r; | 2663 | return r; |
2418 | } | 2664 | } |
2419 | 2665 | ||
@@ -2493,7 +2739,7 @@ struct fxsave { | |||
2493 | 2739 | ||
2494 | static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 2740 | static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
2495 | { | 2741 | { |
2496 | struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image; | 2742 | struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image; |
2497 | 2743 | ||
2498 | vcpu_load(vcpu); | 2744 | vcpu_load(vcpu); |
2499 | 2745 | ||
@@ -2513,7 +2759,7 @@ static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | |||
2513 | 2759 | ||
2514 | static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 2760 | static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
2515 | { | 2761 | { |
2516 | struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image; | 2762 | struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image; |
2517 | 2763 | ||
2518 | vcpu_load(vcpu); | 2764 | vcpu_load(vcpu); |
2519 | 2765 | ||
@@ -2531,6 +2777,27 @@ static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | |||
2531 | return 0; | 2777 | return 0; |
2532 | } | 2778 | } |
2533 | 2779 | ||
2780 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | ||
2781 | struct kvm_lapic_state *s) | ||
2782 | { | ||
2783 | vcpu_load(vcpu); | ||
2784 | memcpy(s->regs, vcpu->apic->regs, sizeof *s); | ||
2785 | vcpu_put(vcpu); | ||
2786 | |||
2787 | return 0; | ||
2788 | } | ||
2789 | |||
2790 | static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, | ||
2791 | struct kvm_lapic_state *s) | ||
2792 | { | ||
2793 | vcpu_load(vcpu); | ||
2794 | memcpy(vcpu->apic->regs, s->regs, sizeof *s); | ||
2795 | kvm_apic_post_state_restore(vcpu); | ||
2796 | vcpu_put(vcpu); | ||
2797 | |||
2798 | return 0; | ||
2799 | } | ||
2800 | |||
2534 | static long kvm_vcpu_ioctl(struct file *filp, | 2801 | static long kvm_vcpu_ioctl(struct file *filp, |
2535 | unsigned int ioctl, unsigned long arg) | 2802 | unsigned int ioctl, unsigned long arg) |
2536 | { | 2803 | { |
@@ -2700,6 +2967,31 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
2700 | r = 0; | 2967 | r = 0; |
2701 | break; | 2968 | break; |
2702 | } | 2969 | } |
2970 | case KVM_GET_LAPIC: { | ||
2971 | struct kvm_lapic_state lapic; | ||
2972 | |||
2973 | memset(&lapic, 0, sizeof lapic); | ||
2974 | r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic); | ||
2975 | if (r) | ||
2976 | goto out; | ||
2977 | r = -EFAULT; | ||
2978 | if (copy_to_user(argp, &lapic, sizeof lapic)) | ||
2979 | goto out; | ||
2980 | r = 0; | ||
2981 | break; | ||
2982 | } | ||
2983 | case KVM_SET_LAPIC: { | ||
2984 | struct kvm_lapic_state lapic; | ||
2985 | |||
2986 | r = -EFAULT; | ||
2987 | if (copy_from_user(&lapic, argp, sizeof lapic)) | ||
2988 | goto out; | ||
2989 | r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);; | ||
2990 | if (r) | ||
2991 | goto out; | ||
2992 | r = 0; | ||
2993 | break; | ||
2994 | } | ||
2703 | default: | 2995 | default: |
2704 | ; | 2996 | ; |
2705 | } | 2997 | } |
@@ -2753,6 +3045,75 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2753 | goto out; | 3045 | goto out; |
2754 | break; | 3046 | break; |
2755 | } | 3047 | } |
3048 | case KVM_CREATE_IRQCHIP: | ||
3049 | r = -ENOMEM; | ||
3050 | kvm->vpic = kvm_create_pic(kvm); | ||
3051 | if (kvm->vpic) { | ||
3052 | r = kvm_ioapic_init(kvm); | ||
3053 | if (r) { | ||
3054 | kfree(kvm->vpic); | ||
3055 | kvm->vpic = NULL; | ||
3056 | goto out; | ||
3057 | } | ||
3058 | } | ||
3059 | else | ||
3060 | goto out; | ||
3061 | break; | ||
3062 | case KVM_IRQ_LINE: { | ||
3063 | struct kvm_irq_level irq_event; | ||
3064 | |||
3065 | r = -EFAULT; | ||
3066 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) | ||
3067 | goto out; | ||
3068 | if (irqchip_in_kernel(kvm)) { | ||
3069 | mutex_lock(&kvm->lock); | ||
3070 | if (irq_event.irq < 16) | ||
3071 | kvm_pic_set_irq(pic_irqchip(kvm), | ||
3072 | irq_event.irq, | ||
3073 | irq_event.level); | ||
3074 | kvm_ioapic_set_irq(kvm->vioapic, | ||
3075 | irq_event.irq, | ||
3076 | irq_event.level); | ||
3077 | mutex_unlock(&kvm->lock); | ||
3078 | r = 0; | ||
3079 | } | ||
3080 | break; | ||
3081 | } | ||
3082 | case KVM_GET_IRQCHIP: { | ||
3083 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ | ||
3084 | struct kvm_irqchip chip; | ||
3085 | |||
3086 | r = -EFAULT; | ||
3087 | if (copy_from_user(&chip, argp, sizeof chip)) | ||
3088 | goto out; | ||
3089 | r = -ENXIO; | ||
3090 | if (!irqchip_in_kernel(kvm)) | ||
3091 | goto out; | ||
3092 | r = kvm_vm_ioctl_get_irqchip(kvm, &chip); | ||
3093 | if (r) | ||
3094 | goto out; | ||
3095 | r = -EFAULT; | ||
3096 | if (copy_to_user(argp, &chip, sizeof chip)) | ||
3097 | goto out; | ||
3098 | r = 0; | ||
3099 | break; | ||
3100 | } | ||
3101 | case KVM_SET_IRQCHIP: { | ||
3102 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ | ||
3103 | struct kvm_irqchip chip; | ||
3104 | |||
3105 | r = -EFAULT; | ||
3106 | if (copy_from_user(&chip, argp, sizeof chip)) | ||
3107 | goto out; | ||
3108 | r = -ENXIO; | ||
3109 | if (!irqchip_in_kernel(kvm)) | ||
3110 | goto out; | ||
3111 | r = kvm_vm_ioctl_set_irqchip(kvm, &chip); | ||
3112 | if (r) | ||
3113 | goto out; | ||
3114 | r = 0; | ||
3115 | break; | ||
3116 | } | ||
2756 | default: | 3117 | default: |
2757 | ; | 3118 | ; |
2758 | } | 3119 | } |
@@ -2768,12 +3129,14 @@ static struct page *kvm_vm_nopage(struct vm_area_struct *vma, | |||
2768 | unsigned long pgoff; | 3129 | unsigned long pgoff; |
2769 | struct page *page; | 3130 | struct page *page; |
2770 | 3131 | ||
2771 | *type = VM_FAULT_MINOR; | ||
2772 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | 3132 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; |
2773 | page = gfn_to_page(kvm, pgoff); | 3133 | page = gfn_to_page(kvm, pgoff); |
2774 | if (!page) | 3134 | if (!page) |
2775 | return NOPAGE_SIGBUS; | 3135 | return NOPAGE_SIGBUS; |
2776 | get_page(page); | 3136 | get_page(page); |
3137 | if (type != NULL) | ||
3138 | *type = VM_FAULT_MINOR; | ||
3139 | |||
2777 | return page; | 3140 | return page; |
2778 | } | 3141 | } |
2779 | 3142 | ||
@@ -2861,12 +3224,20 @@ static long kvm_dev_ioctl(struct file *filp, | |||
2861 | r = 0; | 3224 | r = 0; |
2862 | break; | 3225 | break; |
2863 | } | 3226 | } |
2864 | case KVM_CHECK_EXTENSION: | 3227 | case KVM_CHECK_EXTENSION: { |
2865 | /* | 3228 | int ext = (long)argp; |
2866 | * No extensions defined at present. | 3229 | |
2867 | */ | 3230 | switch (ext) { |
2868 | r = 0; | 3231 | case KVM_CAP_IRQCHIP: |
3232 | case KVM_CAP_HLT: | ||
3233 | r = 1; | ||
3234 | break; | ||
3235 | default: | ||
3236 | r = 0; | ||
3237 | break; | ||
3238 | } | ||
2869 | break; | 3239 | break; |
3240 | } | ||
2870 | case KVM_GET_VCPU_MMAP_SIZE: | 3241 | case KVM_GET_VCPU_MMAP_SIZE: |
2871 | r = -EINVAL; | 3242 | r = -EINVAL; |
2872 | if (arg) | 3243 | if (arg) |
@@ -2881,8 +3252,6 @@ out: | |||
2881 | } | 3252 | } |
2882 | 3253 | ||
2883 | static struct file_operations kvm_chardev_ops = { | 3254 | static struct file_operations kvm_chardev_ops = { |
2884 | .open = kvm_dev_open, | ||
2885 | .release = kvm_dev_release, | ||
2886 | .unlocked_ioctl = kvm_dev_ioctl, | 3255 | .unlocked_ioctl = kvm_dev_ioctl, |
2887 | .compat_ioctl = kvm_dev_ioctl, | 3256 | .compat_ioctl = kvm_dev_ioctl, |
2888 | }; | 3257 | }; |
@@ -2893,25 +3262,6 @@ static struct miscdevice kvm_dev = { | |||
2893 | &kvm_chardev_ops, | 3262 | &kvm_chardev_ops, |
2894 | }; | 3263 | }; |
2895 | 3264 | ||
2896 | static int kvm_reboot(struct notifier_block *notifier, unsigned long val, | ||
2897 | void *v) | ||
2898 | { | ||
2899 | if (val == SYS_RESTART) { | ||
2900 | /* | ||
2901 | * Some (well, at least mine) BIOSes hang on reboot if | ||
2902 | * in vmx root mode. | ||
2903 | */ | ||
2904 | printk(KERN_INFO "kvm: exiting hardware virtualization\n"); | ||
2905 | on_each_cpu(hardware_disable, NULL, 0, 1); | ||
2906 | } | ||
2907 | return NOTIFY_OK; | ||
2908 | } | ||
2909 | |||
2910 | static struct notifier_block kvm_reboot_notifier = { | ||
2911 | .notifier_call = kvm_reboot, | ||
2912 | .priority = 0, | ||
2913 | }; | ||
2914 | |||
2915 | /* | 3265 | /* |
2916 | * Make sure that a cpu that is being hot-unplugged does not have any vcpus | 3266 | * Make sure that a cpu that is being hot-unplugged does not have any vcpus |
2917 | * cached on it. | 3267 | * cached on it. |
@@ -2925,7 +3275,9 @@ static void decache_vcpus_on_cpu(int cpu) | |||
2925 | spin_lock(&kvm_lock); | 3275 | spin_lock(&kvm_lock); |
2926 | list_for_each_entry(vm, &vm_list, vm_list) | 3276 | list_for_each_entry(vm, &vm_list, vm_list) |
2927 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | 3277 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { |
2928 | vcpu = &vm->vcpus[i]; | 3278 | vcpu = vm->vcpus[i]; |
3279 | if (!vcpu) | ||
3280 | continue; | ||
2929 | /* | 3281 | /* |
2930 | * If the vcpu is locked, then it is running on some | 3282 | * If the vcpu is locked, then it is running on some |
2931 | * other cpu and therefore it is not cached on the | 3283 | * other cpu and therefore it is not cached on the |
@@ -2936,7 +3288,7 @@ static void decache_vcpus_on_cpu(int cpu) | |||
2936 | */ | 3288 | */ |
2937 | if (mutex_trylock(&vcpu->mutex)) { | 3289 | if (mutex_trylock(&vcpu->mutex)) { |
2938 | if (vcpu->cpu == cpu) { | 3290 | if (vcpu->cpu == cpu) { |
2939 | kvm_arch_ops->vcpu_decache(vcpu); | 3291 | kvm_x86_ops->vcpu_decache(vcpu); |
2940 | vcpu->cpu = -1; | 3292 | vcpu->cpu = -1; |
2941 | } | 3293 | } |
2942 | mutex_unlock(&vcpu->mutex); | 3294 | mutex_unlock(&vcpu->mutex); |
@@ -2952,7 +3304,7 @@ static void hardware_enable(void *junk) | |||
2952 | if (cpu_isset(cpu, cpus_hardware_enabled)) | 3304 | if (cpu_isset(cpu, cpus_hardware_enabled)) |
2953 | return; | 3305 | return; |
2954 | cpu_set(cpu, cpus_hardware_enabled); | 3306 | cpu_set(cpu, cpus_hardware_enabled); |
2955 | kvm_arch_ops->hardware_enable(NULL); | 3307 | kvm_x86_ops->hardware_enable(NULL); |
2956 | } | 3308 | } |
2957 | 3309 | ||
2958 | static void hardware_disable(void *junk) | 3310 | static void hardware_disable(void *junk) |
@@ -2963,7 +3315,7 @@ static void hardware_disable(void *junk) | |||
2963 | return; | 3315 | return; |
2964 | cpu_clear(cpu, cpus_hardware_enabled); | 3316 | cpu_clear(cpu, cpus_hardware_enabled); |
2965 | decache_vcpus_on_cpu(cpu); | 3317 | decache_vcpus_on_cpu(cpu); |
2966 | kvm_arch_ops->hardware_disable(NULL); | 3318 | kvm_x86_ops->hardware_disable(NULL); |
2967 | } | 3319 | } |
2968 | 3320 | ||
2969 | static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | 3321 | static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, |
@@ -2994,6 +3346,25 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | |||
2994 | return NOTIFY_OK; | 3346 | return NOTIFY_OK; |
2995 | } | 3347 | } |
2996 | 3348 | ||
3349 | static int kvm_reboot(struct notifier_block *notifier, unsigned long val, | ||
3350 | void *v) | ||
3351 | { | ||
3352 | if (val == SYS_RESTART) { | ||
3353 | /* | ||
3354 | * Some (well, at least mine) BIOSes hang on reboot if | ||
3355 | * in vmx root mode. | ||
3356 | */ | ||
3357 | printk(KERN_INFO "kvm: exiting hardware virtualization\n"); | ||
3358 | on_each_cpu(hardware_disable, NULL, 0, 1); | ||
3359 | } | ||
3360 | return NOTIFY_OK; | ||
3361 | } | ||
3362 | |||
3363 | static struct notifier_block kvm_reboot_notifier = { | ||
3364 | .notifier_call = kvm_reboot, | ||
3365 | .priority = 0, | ||
3366 | }; | ||
3367 | |||
2997 | void kvm_io_bus_init(struct kvm_io_bus *bus) | 3368 | void kvm_io_bus_init(struct kvm_io_bus *bus) |
2998 | { | 3369 | { |
2999 | memset(bus, 0, sizeof(*bus)); | 3370 | memset(bus, 0, sizeof(*bus)); |
@@ -3047,18 +3418,15 @@ static u64 stat_get(void *_offset) | |||
3047 | spin_lock(&kvm_lock); | 3418 | spin_lock(&kvm_lock); |
3048 | list_for_each_entry(kvm, &vm_list, vm_list) | 3419 | list_for_each_entry(kvm, &vm_list, vm_list) |
3049 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | 3420 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { |
3050 | vcpu = &kvm->vcpus[i]; | 3421 | vcpu = kvm->vcpus[i]; |
3051 | total += *(u32 *)((void *)vcpu + offset); | 3422 | if (vcpu) |
3423 | total += *(u32 *)((void *)vcpu + offset); | ||
3052 | } | 3424 | } |
3053 | spin_unlock(&kvm_lock); | 3425 | spin_unlock(&kvm_lock); |
3054 | return total; | 3426 | return total; |
3055 | } | 3427 | } |
3056 | 3428 | ||
3057 | static void stat_set(void *offset, u64 val) | 3429 | DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, NULL, "%llu\n"); |
3058 | { | ||
3059 | } | ||
3060 | |||
3061 | DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, stat_set, "%llu\n"); | ||
3062 | 3430 | ||
3063 | static __init void kvm_init_debug(void) | 3431 | static __init void kvm_init_debug(void) |
3064 | { | 3432 | { |
@@ -3105,11 +3473,34 @@ static struct sys_device kvm_sysdev = { | |||
3105 | 3473 | ||
3106 | hpa_t bad_page_address; | 3474 | hpa_t bad_page_address; |
3107 | 3475 | ||
3108 | int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) | 3476 | static inline |
3477 | struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) | ||
3478 | { | ||
3479 | return container_of(pn, struct kvm_vcpu, preempt_notifier); | ||
3480 | } | ||
3481 | |||
3482 | static void kvm_sched_in(struct preempt_notifier *pn, int cpu) | ||
3483 | { | ||
3484 | struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); | ||
3485 | |||
3486 | kvm_x86_ops->vcpu_load(vcpu, cpu); | ||
3487 | } | ||
3488 | |||
3489 | static void kvm_sched_out(struct preempt_notifier *pn, | ||
3490 | struct task_struct *next) | ||
3491 | { | ||
3492 | struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); | ||
3493 | |||
3494 | kvm_x86_ops->vcpu_put(vcpu); | ||
3495 | } | ||
3496 | |||
3497 | int kvm_init_x86(struct kvm_x86_ops *ops, unsigned int vcpu_size, | ||
3498 | struct module *module) | ||
3109 | { | 3499 | { |
3110 | int r; | 3500 | int r; |
3501 | int cpu; | ||
3111 | 3502 | ||
3112 | if (kvm_arch_ops) { | 3503 | if (kvm_x86_ops) { |
3113 | printk(KERN_ERR "kvm: already loaded the other module\n"); | 3504 | printk(KERN_ERR "kvm: already loaded the other module\n"); |
3114 | return -EEXIST; | 3505 | return -EEXIST; |
3115 | } | 3506 | } |
@@ -3123,12 +3514,20 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) | |||
3123 | return -EOPNOTSUPP; | 3514 | return -EOPNOTSUPP; |
3124 | } | 3515 | } |
3125 | 3516 | ||
3126 | kvm_arch_ops = ops; | 3517 | kvm_x86_ops = ops; |
3127 | 3518 | ||
3128 | r = kvm_arch_ops->hardware_setup(); | 3519 | r = kvm_x86_ops->hardware_setup(); |
3129 | if (r < 0) | 3520 | if (r < 0) |
3130 | goto out; | 3521 | goto out; |
3131 | 3522 | ||
3523 | for_each_online_cpu(cpu) { | ||
3524 | smp_call_function_single(cpu, | ||
3525 | kvm_x86_ops->check_processor_compatibility, | ||
3526 | &r, 0, 1); | ||
3527 | if (r < 0) | ||
3528 | goto out_free_0; | ||
3529 | } | ||
3530 | |||
3132 | on_each_cpu(hardware_enable, NULL, 0, 1); | 3531 | on_each_cpu(hardware_enable, NULL, 0, 1); |
3133 | r = register_cpu_notifier(&kvm_cpu_notifier); | 3532 | r = register_cpu_notifier(&kvm_cpu_notifier); |
3134 | if (r) | 3533 | if (r) |
@@ -3143,6 +3542,14 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) | |||
3143 | if (r) | 3542 | if (r) |
3144 | goto out_free_3; | 3543 | goto out_free_3; |
3145 | 3544 | ||
3545 | /* A kmem cache lets us meet the alignment requirements of fx_save. */ | ||
3546 | kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, | ||
3547 | __alignof__(struct kvm_vcpu), 0, 0); | ||
3548 | if (!kvm_vcpu_cache) { | ||
3549 | r = -ENOMEM; | ||
3550 | goto out_free_4; | ||
3551 | } | ||
3552 | |||
3146 | kvm_chardev_ops.owner = module; | 3553 | kvm_chardev_ops.owner = module; |
3147 | 3554 | ||
3148 | r = misc_register(&kvm_dev); | 3555 | r = misc_register(&kvm_dev); |
@@ -3151,9 +3558,14 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) | |||
3151 | goto out_free; | 3558 | goto out_free; |
3152 | } | 3559 | } |
3153 | 3560 | ||
3561 | kvm_preempt_ops.sched_in = kvm_sched_in; | ||
3562 | kvm_preempt_ops.sched_out = kvm_sched_out; | ||
3563 | |||
3154 | return r; | 3564 | return r; |
3155 | 3565 | ||
3156 | out_free: | 3566 | out_free: |
3567 | kmem_cache_destroy(kvm_vcpu_cache); | ||
3568 | out_free_4: | ||
3157 | sysdev_unregister(&kvm_sysdev); | 3569 | sysdev_unregister(&kvm_sysdev); |
3158 | out_free_3: | 3570 | out_free_3: |
3159 | sysdev_class_unregister(&kvm_sysdev_class); | 3571 | sysdev_class_unregister(&kvm_sysdev_class); |
@@ -3162,22 +3574,24 @@ out_free_2: | |||
3162 | unregister_cpu_notifier(&kvm_cpu_notifier); | 3574 | unregister_cpu_notifier(&kvm_cpu_notifier); |
3163 | out_free_1: | 3575 | out_free_1: |
3164 | on_each_cpu(hardware_disable, NULL, 0, 1); | 3576 | on_each_cpu(hardware_disable, NULL, 0, 1); |
3165 | kvm_arch_ops->hardware_unsetup(); | 3577 | out_free_0: |
3578 | kvm_x86_ops->hardware_unsetup(); | ||
3166 | out: | 3579 | out: |
3167 | kvm_arch_ops = NULL; | 3580 | kvm_x86_ops = NULL; |
3168 | return r; | 3581 | return r; |
3169 | } | 3582 | } |
3170 | 3583 | ||
3171 | void kvm_exit_arch(void) | 3584 | void kvm_exit_x86(void) |
3172 | { | 3585 | { |
3173 | misc_deregister(&kvm_dev); | 3586 | misc_deregister(&kvm_dev); |
3587 | kmem_cache_destroy(kvm_vcpu_cache); | ||
3174 | sysdev_unregister(&kvm_sysdev); | 3588 | sysdev_unregister(&kvm_sysdev); |
3175 | sysdev_class_unregister(&kvm_sysdev_class); | 3589 | sysdev_class_unregister(&kvm_sysdev_class); |
3176 | unregister_reboot_notifier(&kvm_reboot_notifier); | 3590 | unregister_reboot_notifier(&kvm_reboot_notifier); |
3177 | unregister_cpu_notifier(&kvm_cpu_notifier); | 3591 | unregister_cpu_notifier(&kvm_cpu_notifier); |
3178 | on_each_cpu(hardware_disable, NULL, 0, 1); | 3592 | on_each_cpu(hardware_disable, NULL, 0, 1); |
3179 | kvm_arch_ops->hardware_unsetup(); | 3593 | kvm_x86_ops->hardware_unsetup(); |
3180 | kvm_arch_ops = NULL; | 3594 | kvm_x86_ops = NULL; |
3181 | } | 3595 | } |
3182 | 3596 | ||
3183 | static __init int kvm_init(void) | 3597 | static __init int kvm_init(void) |
@@ -3220,5 +3634,5 @@ static __exit void kvm_exit(void) | |||
3220 | module_init(kvm_init) | 3634 | module_init(kvm_init) |
3221 | module_exit(kvm_exit) | 3635 | module_exit(kvm_exit) |
3222 | 3636 | ||
3223 | EXPORT_SYMBOL_GPL(kvm_init_arch); | 3637 | EXPORT_SYMBOL_GPL(kvm_init_x86); |
3224 | EXPORT_SYMBOL_GPL(kvm_exit_arch); | 3638 | EXPORT_SYMBOL_GPL(kvm_exit_x86); |