diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-04 12:30:33 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-04 12:30:33 -0400 |
commit | ecefbd94b834fa32559d854646d777c56749ef1c (patch) | |
tree | ca8958900ad9e208a8e5fb7704f1b66dc76131b4 /arch/x86 | |
parent | ce57e981f2b996aaca2031003b3f866368307766 (diff) | |
parent | 3d11df7abbff013b811d5615320580cd5d9d7d31 (diff) |
Merge tag 'kvm-3.7-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Avi Kivity:
"Highlights of the changes for this release include support for vfio
level triggered interrupts, improved big real mode support on older
Intels, a streamlines guest page table walker, guest APIC speedups,
PIO optimizations, better overcommit handling, and read-only memory."
* tag 'kvm-3.7-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (138 commits)
KVM: s390: Fix vcpu_load handling in interrupt code
KVM: x86: Fix guest debug across vcpu INIT reset
KVM: Add resampling irqfds for level triggered interrupts
KVM: optimize apic interrupt delivery
KVM: MMU: Eliminate pointless temporary 'ac'
KVM: MMU: Avoid access/dirty update loop if all is well
KVM: MMU: Eliminate eperm temporary
KVM: MMU: Optimize is_last_gpte()
KVM: MMU: Simplify walk_addr_generic() loop
KVM: MMU: Optimize pte permission checks
KVM: MMU: Update accessed and dirty bits after guest pagetable walk
KVM: MMU: Move gpte_access() out of paging_tmpl.h
KVM: MMU: Optimize gpte_access() slightly
KVM: MMU: Push clean gpte write protection out of gpte_access()
KVM: clarify kvmclock documentation
KVM: make processes waiting on vcpu mutex killable
KVM: SVM: Make use of asm.h
KVM: VMX: Make use of asm.h
KVM: VMX: Make lto-friendly
KVM: x86: lapic: Clean up find_highest_vector() and count_vectors()
...
Conflicts:
arch/s390/include/asm/processor.h
arch/x86/kvm/i8259.c
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 21 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_emulate.h | 48 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 36 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_para.h | 6 | ||||
-rw-r--r-- | arch/x86/kernel/Makefile | 3 | ||||
-rw-r--r-- | arch/x86/kernel/kvm.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/Kconfig | 2 | ||||
-rw-r--r-- | arch/x86/kvm/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.c | 14 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 538 | ||||
-rw-r--r-- | arch/x86/kvm/i8254.c | 64 | ||||
-rw-r--r-- | arch/x86/kvm/i8254.h | 6 | ||||
-rw-r--r-- | arch/x86/kvm/i8259.c | 70 | ||||
-rw-r--r-- | arch/x86/kvm/irq.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/kvm_timer.h | 18 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 484 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.h | 61 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 240 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.h | 25 | ||||
-rw-r--r-- | arch/x86/kvm/mmu_audit.c | 8 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 199 | ||||
-rw-r--r-- | arch/x86/kvm/pmu.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 82 | ||||
-rw-r--r-- | arch/x86/kvm/timer.c | 47 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 233 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 384 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 1 |
29 files changed, 1489 insertions, 1113 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7f9a395c5254..b72777ff32a9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -586,23 +586,18 @@ config PARAVIRT_TIME_ACCOUNTING | |||
586 | 586 | ||
587 | source "arch/x86/xen/Kconfig" | 587 | source "arch/x86/xen/Kconfig" |
588 | 588 | ||
589 | config KVM_CLOCK | ||
590 | bool "KVM paravirtualized clock" | ||
591 | select PARAVIRT | ||
592 | select PARAVIRT_CLOCK | ||
593 | ---help--- | ||
594 | Turning on this option will allow you to run a paravirtualized clock | ||
595 | when running over the KVM hypervisor. Instead of relying on a PIT | ||
596 | (or probably other) emulation by the underlying device model, the host | ||
597 | provides the guest with timing infrastructure such as time of day, and | ||
598 | system time | ||
599 | |||
600 | config KVM_GUEST | 589 | config KVM_GUEST |
601 | bool "KVM Guest support" | 590 | bool "KVM Guest support (including kvmclock)" |
591 | select PARAVIRT | ||
602 | select PARAVIRT | 592 | select PARAVIRT |
593 | select PARAVIRT_CLOCK | ||
594 | default y if PARAVIRT_GUEST | ||
603 | ---help--- | 595 | ---help--- |
604 | This option enables various optimizations for running under the KVM | 596 | This option enables various optimizations for running under the KVM |
605 | hypervisor. | 597 | hypervisor. It includes a paravirtualized clock, so that instead |
598 | of relying on a PIT (or probably other) emulation by the | ||
599 | underlying device model, the host provides the guest with | ||
600 | timing infrastructure such as time of day, and system time | ||
606 | 601 | ||
607 | source "arch/x86/lguest/Kconfig" | 602 | source "arch/x86/lguest/Kconfig" |
608 | 603 | ||
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 41e08cb6a092..a65ec29e6ffb 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h | |||
@@ -41,6 +41,7 @@ | |||
41 | #define __KVM_HAVE_DEBUGREGS | 41 | #define __KVM_HAVE_DEBUGREGS |
42 | #define __KVM_HAVE_XSAVE | 42 | #define __KVM_HAVE_XSAVE |
43 | #define __KVM_HAVE_XCRS | 43 | #define __KVM_HAVE_XCRS |
44 | #define __KVM_HAVE_READONLY_MEM | ||
44 | 45 | ||
45 | /* Architectural interrupt line count. */ | 46 | /* Architectural interrupt line count. */ |
46 | #define KVM_NR_INTERRUPTS 256 | 47 | #define KVM_NR_INTERRUPTS 256 |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index c764f43b71c5..15f960c06ff7 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -86,6 +86,19 @@ struct x86_instruction_info { | |||
86 | 86 | ||
87 | struct x86_emulate_ops { | 87 | struct x86_emulate_ops { |
88 | /* | 88 | /* |
89 | * read_gpr: read a general purpose register (rax - r15) | ||
90 | * | ||
91 | * @reg: gpr number. | ||
92 | */ | ||
93 | ulong (*read_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg); | ||
94 | /* | ||
95 | * write_gpr: write a general purpose register (rax - r15) | ||
96 | * | ||
97 | * @reg: gpr number. | ||
98 | * @val: value to write. | ||
99 | */ | ||
100 | void (*write_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val); | ||
101 | /* | ||
89 | * read_std: Read bytes of standard (non-emulated/special) memory. | 102 | * read_std: Read bytes of standard (non-emulated/special) memory. |
90 | * Used for descriptor reading. | 103 | * Used for descriptor reading. |
91 | * @addr: [IN ] Linear address from which to read. | 104 | * @addr: [IN ] Linear address from which to read. |
@@ -200,8 +213,9 @@ typedef u32 __attribute__((vector_size(16))) sse128_t; | |||
200 | 213 | ||
201 | /* Type, address-of, and value of an instruction's operand. */ | 214 | /* Type, address-of, and value of an instruction's operand. */ |
202 | struct operand { | 215 | struct operand { |
203 | enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_MM, OP_NONE } type; | 216 | enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_MM, OP_NONE } type; |
204 | unsigned int bytes; | 217 | unsigned int bytes; |
218 | unsigned int count; | ||
205 | union { | 219 | union { |
206 | unsigned long orig_val; | 220 | unsigned long orig_val; |
207 | u64 orig_val64; | 221 | u64 orig_val64; |
@@ -221,6 +235,7 @@ struct operand { | |||
221 | char valptr[sizeof(unsigned long) + 2]; | 235 | char valptr[sizeof(unsigned long) + 2]; |
222 | sse128_t vec_val; | 236 | sse128_t vec_val; |
223 | u64 mm_val; | 237 | u64 mm_val; |
238 | void *data; | ||
224 | }; | 239 | }; |
225 | }; | 240 | }; |
226 | 241 | ||
@@ -236,14 +251,23 @@ struct read_cache { | |||
236 | unsigned long end; | 251 | unsigned long end; |
237 | }; | 252 | }; |
238 | 253 | ||
254 | /* Execution mode, passed to the emulator. */ | ||
255 | enum x86emul_mode { | ||
256 | X86EMUL_MODE_REAL, /* Real mode. */ | ||
257 | X86EMUL_MODE_VM86, /* Virtual 8086 mode. */ | ||
258 | X86EMUL_MODE_PROT16, /* 16-bit protected mode. */ | ||
259 | X86EMUL_MODE_PROT32, /* 32-bit protected mode. */ | ||
260 | X86EMUL_MODE_PROT64, /* 64-bit (long) mode. */ | ||
261 | }; | ||
262 | |||
239 | struct x86_emulate_ctxt { | 263 | struct x86_emulate_ctxt { |
240 | struct x86_emulate_ops *ops; | 264 | const struct x86_emulate_ops *ops; |
241 | 265 | ||
242 | /* Register state before/after emulation. */ | 266 | /* Register state before/after emulation. */ |
243 | unsigned long eflags; | 267 | unsigned long eflags; |
244 | unsigned long eip; /* eip before instruction emulation */ | 268 | unsigned long eip; /* eip before instruction emulation */ |
245 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ | 269 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ |
246 | int mode; | 270 | enum x86emul_mode mode; |
247 | 271 | ||
248 | /* interruptibility state, as a result of execution of STI or MOV SS */ | 272 | /* interruptibility state, as a result of execution of STI or MOV SS */ |
249 | int interruptibility; | 273 | int interruptibility; |
@@ -281,8 +305,10 @@ struct x86_emulate_ctxt { | |||
281 | bool rip_relative; | 305 | bool rip_relative; |
282 | unsigned long _eip; | 306 | unsigned long _eip; |
283 | struct operand memop; | 307 | struct operand memop; |
308 | u32 regs_valid; /* bitmaps of registers in _regs[] that can be read */ | ||
309 | u32 regs_dirty; /* bitmaps of registers in _regs[] that have been written */ | ||
284 | /* Fields above regs are cleared together. */ | 310 | /* Fields above regs are cleared together. */ |
285 | unsigned long regs[NR_VCPU_REGS]; | 311 | unsigned long _regs[NR_VCPU_REGS]; |
286 | struct operand *memopp; | 312 | struct operand *memopp; |
287 | struct fetch_cache fetch; | 313 | struct fetch_cache fetch; |
288 | struct read_cache io_read; | 314 | struct read_cache io_read; |
@@ -293,17 +319,6 @@ struct x86_emulate_ctxt { | |||
293 | #define REPE_PREFIX 0xf3 | 319 | #define REPE_PREFIX 0xf3 |
294 | #define REPNE_PREFIX 0xf2 | 320 | #define REPNE_PREFIX 0xf2 |
295 | 321 | ||
296 | /* Execution mode, passed to the emulator. */ | ||
297 | #define X86EMUL_MODE_REAL 0 /* Real mode. */ | ||
298 | #define X86EMUL_MODE_VM86 1 /* Virtual 8086 mode. */ | ||
299 | #define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */ | ||
300 | #define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ | ||
301 | #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ | ||
302 | |||
303 | /* any protected mode */ | ||
304 | #define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \ | ||
305 | X86EMUL_MODE_PROT64) | ||
306 | |||
307 | /* CPUID vendors */ | 322 | /* CPUID vendors */ |
308 | #define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541 | 323 | #define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541 |
309 | #define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163 | 324 | #define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163 |
@@ -394,4 +409,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | |||
394 | u16 tss_selector, int idt_index, int reason, | 409 | u16 tss_selector, int idt_index, int reason, |
395 | bool has_error_code, u32 error_code); | 410 | bool has_error_code, u32 error_code); |
396 | int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); | 411 | int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); |
412 | void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt); | ||
413 | void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt); | ||
414 | |||
397 | #endif /* _ASM_X86_KVM_X86_EMULATE_H */ | 415 | #endif /* _ASM_X86_KVM_X86_EMULATE_H */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1eaa6b056670..b2e11f452435 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -271,10 +271,24 @@ struct kvm_mmu { | |||
271 | union kvm_mmu_page_role base_role; | 271 | union kvm_mmu_page_role base_role; |
272 | bool direct_map; | 272 | bool direct_map; |
273 | 273 | ||
274 | /* | ||
275 | * Bitmap; bit set = permission fault | ||
276 | * Byte index: page fault error code [4:1] | ||
277 | * Bit index: pte permissions in ACC_* format | ||
278 | */ | ||
279 | u8 permissions[16]; | ||
280 | |||
274 | u64 *pae_root; | 281 | u64 *pae_root; |
275 | u64 *lm_root; | 282 | u64 *lm_root; |
276 | u64 rsvd_bits_mask[2][4]; | 283 | u64 rsvd_bits_mask[2][4]; |
277 | 284 | ||
285 | /* | ||
286 | * Bitmap: bit set = last pte in walk | ||
287 | * index[0:1]: level (zero-based) | ||
288 | * index[2]: pte.ps | ||
289 | */ | ||
290 | u8 last_pte_bitmap; | ||
291 | |||
278 | bool nx; | 292 | bool nx; |
279 | 293 | ||
280 | u64 pdptrs[4]; /* pae */ | 294 | u64 pdptrs[4]; /* pae */ |
@@ -398,12 +412,15 @@ struct kvm_vcpu_arch { | |||
398 | struct x86_emulate_ctxt emulate_ctxt; | 412 | struct x86_emulate_ctxt emulate_ctxt; |
399 | bool emulate_regs_need_sync_to_vcpu; | 413 | bool emulate_regs_need_sync_to_vcpu; |
400 | bool emulate_regs_need_sync_from_vcpu; | 414 | bool emulate_regs_need_sync_from_vcpu; |
415 | int (*complete_userspace_io)(struct kvm_vcpu *vcpu); | ||
401 | 416 | ||
402 | gpa_t time; | 417 | gpa_t time; |
403 | struct pvclock_vcpu_time_info hv_clock; | 418 | struct pvclock_vcpu_time_info hv_clock; |
404 | unsigned int hw_tsc_khz; | 419 | unsigned int hw_tsc_khz; |
405 | unsigned int time_offset; | 420 | unsigned int time_offset; |
406 | struct page *time_page; | 421 | struct page *time_page; |
422 | /* set guest stopped flag in pvclock flags field */ | ||
423 | bool pvclock_set_guest_stopped_request; | ||
407 | 424 | ||
408 | struct { | 425 | struct { |
409 | u64 msr_val; | 426 | u64 msr_val; |
@@ -438,6 +455,7 @@ struct kvm_vcpu_arch { | |||
438 | unsigned long dr6; | 455 | unsigned long dr6; |
439 | unsigned long dr7; | 456 | unsigned long dr7; |
440 | unsigned long eff_db[KVM_NR_DB_REGS]; | 457 | unsigned long eff_db[KVM_NR_DB_REGS]; |
458 | unsigned long guest_debug_dr7; | ||
441 | 459 | ||
442 | u64 mcg_cap; | 460 | u64 mcg_cap; |
443 | u64 mcg_status; | 461 | u64 mcg_status; |
@@ -484,14 +502,24 @@ struct kvm_vcpu_arch { | |||
484 | }; | 502 | }; |
485 | 503 | ||
486 | struct kvm_lpage_info { | 504 | struct kvm_lpage_info { |
487 | unsigned long rmap_pde; | ||
488 | int write_count; | 505 | int write_count; |
489 | }; | 506 | }; |
490 | 507 | ||
491 | struct kvm_arch_memory_slot { | 508 | struct kvm_arch_memory_slot { |
509 | unsigned long *rmap[KVM_NR_PAGE_SIZES]; | ||
492 | struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; | 510 | struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; |
493 | }; | 511 | }; |
494 | 512 | ||
513 | struct kvm_apic_map { | ||
514 | struct rcu_head rcu; | ||
515 | u8 ldr_bits; | ||
516 | /* fields bellow are used to decode ldr values in different modes */ | ||
517 | u32 cid_shift, cid_mask, lid_mask; | ||
518 | struct kvm_lapic *phys_map[256]; | ||
519 | /* first index is cluster id second is cpu id in a cluster */ | ||
520 | struct kvm_lapic *logical_map[16][16]; | ||
521 | }; | ||
522 | |||
495 | struct kvm_arch { | 523 | struct kvm_arch { |
496 | unsigned int n_used_mmu_pages; | 524 | unsigned int n_used_mmu_pages; |
497 | unsigned int n_requested_mmu_pages; | 525 | unsigned int n_requested_mmu_pages; |
@@ -509,6 +537,8 @@ struct kvm_arch { | |||
509 | struct kvm_ioapic *vioapic; | 537 | struct kvm_ioapic *vioapic; |
510 | struct kvm_pit *vpit; | 538 | struct kvm_pit *vpit; |
511 | int vapics_in_nmi_mode; | 539 | int vapics_in_nmi_mode; |
540 | struct mutex apic_map_lock; | ||
541 | struct kvm_apic_map *apic_map; | ||
512 | 542 | ||
513 | unsigned int tss_addr; | 543 | unsigned int tss_addr; |
514 | struct page *apic_access_page; | 544 | struct page *apic_access_page; |
@@ -602,8 +632,7 @@ struct kvm_x86_ops { | |||
602 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); | 632 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); |
603 | void (*vcpu_put)(struct kvm_vcpu *vcpu); | 633 | void (*vcpu_put)(struct kvm_vcpu *vcpu); |
604 | 634 | ||
605 | void (*set_guest_debug)(struct kvm_vcpu *vcpu, | 635 | void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu); |
606 | struct kvm_guest_debug *dbg); | ||
607 | int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); | 636 | int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); |
608 | int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | 637 | int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); |
609 | u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); | 638 | u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); |
@@ -941,6 +970,7 @@ extern bool kvm_rebooting; | |||
941 | 970 | ||
942 | #define KVM_ARCH_WANT_MMU_NOTIFIER | 971 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
943 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | 972 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); |
973 | int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); | ||
944 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); | 974 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); |
945 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); | 975 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); |
946 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | 976 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); |
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 2f7712e08b1e..eb3e9d85e1f1 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -102,21 +102,21 @@ struct kvm_vcpu_pv_apf_data { | |||
102 | extern void kvmclock_init(void); | 102 | extern void kvmclock_init(void); |
103 | extern int kvm_register_clock(char *txt); | 103 | extern int kvm_register_clock(char *txt); |
104 | 104 | ||
105 | #ifdef CONFIG_KVM_CLOCK | 105 | #ifdef CONFIG_KVM_GUEST |
106 | bool kvm_check_and_clear_guest_paused(void); | 106 | bool kvm_check_and_clear_guest_paused(void); |
107 | #else | 107 | #else |
108 | static inline bool kvm_check_and_clear_guest_paused(void) | 108 | static inline bool kvm_check_and_clear_guest_paused(void) |
109 | { | 109 | { |
110 | return false; | 110 | return false; |
111 | } | 111 | } |
112 | #endif /* CONFIG_KVMCLOCK */ | 112 | #endif /* CONFIG_KVM_GUEST */ |
113 | 113 | ||
114 | /* This instruction is vmcall. On non-VT architectures, it will generate a | 114 | /* This instruction is vmcall. On non-VT architectures, it will generate a |
115 | * trap that we will then rewrite to the appropriate instruction. | 115 | * trap that we will then rewrite to the appropriate instruction. |
116 | */ | 116 | */ |
117 | #define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" | 117 | #define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" |
118 | 118 | ||
119 | /* For KVM hypercalls, a three-byte sequence of either the vmrun or the vmmrun | 119 | /* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall |
120 | * instruction. The hypervisor may replace it with something else but only the | 120 | * instruction. The hypervisor may replace it with something else but only the |
121 | * instructions are guaranteed to be supported. | 121 | * instructions are guaranteed to be supported. |
122 | * | 122 | * |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8d7a619718b5..a48ea05157d3 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -81,8 +81,7 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o | |||
81 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o | 81 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o |
82 | obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o | 82 | obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o |
83 | 83 | ||
84 | obj-$(CONFIG_KVM_GUEST) += kvm.o | 84 | obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o |
85 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o | ||
86 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o | 85 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o |
87 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o | 86 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o |
88 | obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o | 87 | obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index c1d61ee4b4f1..b3e5e51bc907 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -354,6 +354,7 @@ static void kvm_pv_guest_cpu_reboot(void *unused) | |||
354 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) | 354 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) |
355 | wrmsrl(MSR_KVM_PV_EOI_EN, 0); | 355 | wrmsrl(MSR_KVM_PV_EOI_EN, 0); |
356 | kvm_pv_disable_apf(); | 356 | kvm_pv_disable_apf(); |
357 | kvm_disable_steal_time(); | ||
357 | } | 358 | } |
358 | 359 | ||
359 | static int kvm_pv_reboot_notify(struct notifier_block *nb, | 360 | static int kvm_pv_reboot_notify(struct notifier_block *nb, |
@@ -396,9 +397,7 @@ void kvm_disable_steal_time(void) | |||
396 | #ifdef CONFIG_SMP | 397 | #ifdef CONFIG_SMP |
397 | static void __init kvm_smp_prepare_boot_cpu(void) | 398 | static void __init kvm_smp_prepare_boot_cpu(void) |
398 | { | 399 | { |
399 | #ifdef CONFIG_KVM_CLOCK | ||
400 | WARN_ON(kvm_register_clock("primary cpu clock")); | 400 | WARN_ON(kvm_register_clock("primary cpu clock")); |
401 | #endif | ||
402 | kvm_guest_cpu_init(); | 401 | kvm_guest_cpu_init(); |
403 | native_smp_prepare_boot_cpu(); | 402 | native_smp_prepare_boot_cpu(); |
404 | } | 403 | } |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4f165479c453..d609be046b57 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -957,7 +957,7 @@ void __init setup_arch(char **cmdline_p) | |||
957 | initmem_init(); | 957 | initmem_init(); |
958 | memblock_find_dma_reserve(); | 958 | memblock_find_dma_reserve(); |
959 | 959 | ||
960 | #ifdef CONFIG_KVM_CLOCK | 960 | #ifdef CONFIG_KVM_GUEST |
961 | kvmclock_init(); | 961 | kvmclock_init(); |
962 | #endif | 962 | #endif |
963 | 963 | ||
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index a28f338843ea..586f00059805 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -20,6 +20,7 @@ if VIRTUALIZATION | |||
20 | config KVM | 20 | config KVM |
21 | tristate "Kernel-based Virtual Machine (KVM) support" | 21 | tristate "Kernel-based Virtual Machine (KVM) support" |
22 | depends on HAVE_KVM | 22 | depends on HAVE_KVM |
23 | depends on HIGH_RES_TIMERS | ||
23 | # for device assignment: | 24 | # for device assignment: |
24 | depends on PCI | 25 | depends on PCI |
25 | # for TASKSTATS/TASK_DELAY_ACCT: | 26 | # for TASKSTATS/TASK_DELAY_ACCT: |
@@ -37,6 +38,7 @@ config KVM | |||
37 | select TASK_DELAY_ACCT | 38 | select TASK_DELAY_ACCT |
38 | select PERF_EVENTS | 39 | select PERF_EVENTS |
39 | select HAVE_KVM_MSI | 40 | select HAVE_KVM_MSI |
41 | select HAVE_KVM_CPU_RELAX_INTERCEPT | ||
40 | ---help--- | 42 | ---help--- |
41 | Support hosting fully virtualized guest machines using hardware | 43 | Support hosting fully virtualized guest machines using hardware |
42 | virtualization extensions. You will need a fairly recent | 44 | virtualization extensions. You will need a fairly recent |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 4f579e8dcacf..04d30401c5cb 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -12,7 +12,7 @@ kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) | |||
12 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) | 12 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) |
13 | 13 | ||
14 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ | 14 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ |
15 | i8254.o timer.o cpuid.o pmu.o | 15 | i8254.o cpuid.o pmu.o |
16 | kvm-intel-y += vmx.o | 16 | kvm-intel-y += vmx.o |
17 | kvm-amd-y += svm.o | 17 | kvm-amd-y += svm.o |
18 | 18 | ||
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0595f1397b7c..ec79e773342e 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -316,7 +316,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
316 | } | 316 | } |
317 | case 7: { | 317 | case 7: { |
318 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | 318 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; |
319 | /* Mask ebx against host capbability word 9 */ | 319 | /* Mask ebx against host capability word 9 */ |
320 | if (index == 0) { | 320 | if (index == 0) { |
321 | entry->ebx &= kvm_supported_word9_x86_features; | 321 | entry->ebx &= kvm_supported_word9_x86_features; |
322 | cpuid_mask(&entry->ebx, 9); | 322 | cpuid_mask(&entry->ebx, 9); |
@@ -397,8 +397,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
397 | break; | 397 | break; |
398 | } | 398 | } |
399 | case KVM_CPUID_SIGNATURE: { | 399 | case KVM_CPUID_SIGNATURE: { |
400 | char signature[12] = "KVMKVMKVM\0\0"; | 400 | static const char signature[12] = "KVMKVMKVM\0\0"; |
401 | u32 *sigptr = (u32 *)signature; | 401 | const u32 *sigptr = (const u32 *)signature; |
402 | entry->eax = KVM_CPUID_FEATURES; | 402 | entry->eax = KVM_CPUID_FEATURES; |
403 | entry->ebx = sigptr[0]; | 403 | entry->ebx = sigptr[0]; |
404 | entry->ecx = sigptr[1]; | 404 | entry->ecx = sigptr[1]; |
@@ -484,10 +484,10 @@ struct kvm_cpuid_param { | |||
484 | u32 func; | 484 | u32 func; |
485 | u32 idx; | 485 | u32 idx; |
486 | bool has_leaf_count; | 486 | bool has_leaf_count; |
487 | bool (*qualifier)(struct kvm_cpuid_param *param); | 487 | bool (*qualifier)(const struct kvm_cpuid_param *param); |
488 | }; | 488 | }; |
489 | 489 | ||
490 | static bool is_centaur_cpu(struct kvm_cpuid_param *param) | 490 | static bool is_centaur_cpu(const struct kvm_cpuid_param *param) |
491 | { | 491 | { |
492 | return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR; | 492 | return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR; |
493 | } | 493 | } |
@@ -498,7 +498,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | |||
498 | struct kvm_cpuid_entry2 *cpuid_entries; | 498 | struct kvm_cpuid_entry2 *cpuid_entries; |
499 | int limit, nent = 0, r = -E2BIG, i; | 499 | int limit, nent = 0, r = -E2BIG, i; |
500 | u32 func; | 500 | u32 func; |
501 | static struct kvm_cpuid_param param[] = { | 501 | static const struct kvm_cpuid_param param[] = { |
502 | { .func = 0, .has_leaf_count = true }, | 502 | { .func = 0, .has_leaf_count = true }, |
503 | { .func = 0x80000000, .has_leaf_count = true }, | 503 | { .func = 0x80000000, .has_leaf_count = true }, |
504 | { .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true }, | 504 | { .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true }, |
@@ -517,7 +517,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | |||
517 | 517 | ||
518 | r = 0; | 518 | r = 0; |
519 | for (i = 0; i < ARRAY_SIZE(param); i++) { | 519 | for (i = 0; i < ARRAY_SIZE(param); i++) { |
520 | struct kvm_cpuid_param *ent = ¶m[i]; | 520 | const struct kvm_cpuid_param *ent = ¶m[i]; |
521 | 521 | ||
522 | if (ent->qualifier && !ent->qualifier(ent)) | 522 | if (ent->qualifier && !ent->qualifier(ent)) |
523 | continue; | 523 | continue; |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a3b57a27be88..39171cb307ea 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -161,9 +161,9 @@ struct opcode { | |||
161 | u64 intercept : 8; | 161 | u64 intercept : 8; |
162 | union { | 162 | union { |
163 | int (*execute)(struct x86_emulate_ctxt *ctxt); | 163 | int (*execute)(struct x86_emulate_ctxt *ctxt); |
164 | struct opcode *group; | 164 | const struct opcode *group; |
165 | struct group_dual *gdual; | 165 | const struct group_dual *gdual; |
166 | struct gprefix *gprefix; | 166 | const struct gprefix *gprefix; |
167 | } u; | 167 | } u; |
168 | int (*check_perm)(struct x86_emulate_ctxt *ctxt); | 168 | int (*check_perm)(struct x86_emulate_ctxt *ctxt); |
169 | }; | 169 | }; |
@@ -202,6 +202,42 @@ struct gprefix { | |||
202 | #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a | 202 | #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a |
203 | #define EFLG_RESERVED_ONE_MASK 2 | 203 | #define EFLG_RESERVED_ONE_MASK 2 |
204 | 204 | ||
205 | static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr) | ||
206 | { | ||
207 | if (!(ctxt->regs_valid & (1 << nr))) { | ||
208 | ctxt->regs_valid |= 1 << nr; | ||
209 | ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr); | ||
210 | } | ||
211 | return ctxt->_regs[nr]; | ||
212 | } | ||
213 | |||
214 | static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr) | ||
215 | { | ||
216 | ctxt->regs_valid |= 1 << nr; | ||
217 | ctxt->regs_dirty |= 1 << nr; | ||
218 | return &ctxt->_regs[nr]; | ||
219 | } | ||
220 | |||
221 | static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr) | ||
222 | { | ||
223 | reg_read(ctxt, nr); | ||
224 | return reg_write(ctxt, nr); | ||
225 | } | ||
226 | |||
227 | static void writeback_registers(struct x86_emulate_ctxt *ctxt) | ||
228 | { | ||
229 | unsigned reg; | ||
230 | |||
231 | for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16) | ||
232 | ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]); | ||
233 | } | ||
234 | |||
235 | static void invalidate_registers(struct x86_emulate_ctxt *ctxt) | ||
236 | { | ||
237 | ctxt->regs_dirty = 0; | ||
238 | ctxt->regs_valid = 0; | ||
239 | } | ||
240 | |||
205 | /* | 241 | /* |
206 | * Instruction emulation: | 242 | * Instruction emulation: |
207 | * Most instructions are emulated directly via a fragment of inline assembly | 243 | * Most instructions are emulated directly via a fragment of inline assembly |
@@ -374,8 +410,8 @@ struct gprefix { | |||
374 | #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ | 410 | #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ |
375 | do { \ | 411 | do { \ |
376 | unsigned long _tmp; \ | 412 | unsigned long _tmp; \ |
377 | ulong *rax = &(ctxt)->regs[VCPU_REGS_RAX]; \ | 413 | ulong *rax = reg_rmw((ctxt), VCPU_REGS_RAX); \ |
378 | ulong *rdx = &(ctxt)->regs[VCPU_REGS_RDX]; \ | 414 | ulong *rdx = reg_rmw((ctxt), VCPU_REGS_RDX); \ |
379 | \ | 415 | \ |
380 | __asm__ __volatile__ ( \ | 416 | __asm__ __volatile__ ( \ |
381 | _PRE_EFLAGS("0", "5", "1") \ | 417 | _PRE_EFLAGS("0", "5", "1") \ |
@@ -494,7 +530,7 @@ register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, in | |||
494 | 530 | ||
495 | static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc) | 531 | static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc) |
496 | { | 532 | { |
497 | masked_increment(&ctxt->regs[VCPU_REGS_RSP], stack_mask(ctxt), inc); | 533 | masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc); |
498 | } | 534 | } |
499 | 535 | ||
500 | static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) | 536 | static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) |
@@ -632,8 +668,6 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, | |||
632 | 668 | ||
633 | la = seg_base(ctxt, addr.seg) + addr.ea; | 669 | la = seg_base(ctxt, addr.seg) + addr.ea; |
634 | switch (ctxt->mode) { | 670 | switch (ctxt->mode) { |
635 | case X86EMUL_MODE_REAL: | ||
636 | break; | ||
637 | case X86EMUL_MODE_PROT64: | 671 | case X86EMUL_MODE_PROT64: |
638 | if (((signed long)la << 16) >> 16 != la) | 672 | if (((signed long)la << 16) >> 16 != la) |
639 | return emulate_gp(ctxt, 0); | 673 | return emulate_gp(ctxt, 0); |
@@ -655,7 +689,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, | |||
655 | if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) | 689 | if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) |
656 | goto bad; | 690 | goto bad; |
657 | } else { | 691 | } else { |
658 | /* exapand-down segment */ | 692 | /* expand-down segment */ |
659 | if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim) | 693 | if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim) |
660 | goto bad; | 694 | goto bad; |
661 | lim = desc.d ? 0xffffffff : 0xffff; | 695 | lim = desc.d ? 0xffffffff : 0xffff; |
@@ -663,7 +697,10 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, | |||
663 | goto bad; | 697 | goto bad; |
664 | } | 698 | } |
665 | cpl = ctxt->ops->cpl(ctxt); | 699 | cpl = ctxt->ops->cpl(ctxt); |
666 | rpl = sel & 3; | 700 | if (ctxt->mode == X86EMUL_MODE_REAL) |
701 | rpl = 0; | ||
702 | else | ||
703 | rpl = sel & 3; | ||
667 | cpl = max(cpl, rpl); | 704 | cpl = max(cpl, rpl); |
668 | if (!(desc.type & 8)) { | 705 | if (!(desc.type & 8)) { |
669 | /* data segment */ | 706 | /* data segment */ |
@@ -688,9 +725,9 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, | |||
688 | return X86EMUL_CONTINUE; | 725 | return X86EMUL_CONTINUE; |
689 | bad: | 726 | bad: |
690 | if (addr.seg == VCPU_SREG_SS) | 727 | if (addr.seg == VCPU_SREG_SS) |
691 | return emulate_ss(ctxt, addr.seg); | 728 | return emulate_ss(ctxt, sel); |
692 | else | 729 | else |
693 | return emulate_gp(ctxt, addr.seg); | 730 | return emulate_gp(ctxt, sel); |
694 | } | 731 | } |
695 | 732 | ||
696 | static int linearize(struct x86_emulate_ctxt *ctxt, | 733 | static int linearize(struct x86_emulate_ctxt *ctxt, |
@@ -786,14 +823,15 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, | |||
786 | * pointer into the block that addresses the relevant register. | 823 | * pointer into the block that addresses the relevant register. |
787 | * @highbyte_regs specifies whether to decode AH,CH,DH,BH. | 824 | * @highbyte_regs specifies whether to decode AH,CH,DH,BH. |
788 | */ | 825 | */ |
789 | static void *decode_register(u8 modrm_reg, unsigned long *regs, | 826 | static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg, |
790 | int highbyte_regs) | 827 | int highbyte_regs) |
791 | { | 828 | { |
792 | void *p; | 829 | void *p; |
793 | 830 | ||
794 | p = ®s[modrm_reg]; | ||
795 | if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8) | 831 | if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8) |
796 | p = (unsigned char *)®s[modrm_reg & 3] + 1; | 832 | p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1; |
833 | else | ||
834 | p = reg_rmw(ctxt, modrm_reg); | ||
797 | return p; | 835 | return p; |
798 | } | 836 | } |
799 | 837 | ||
@@ -871,23 +909,23 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) | |||
871 | { | 909 | { |
872 | ctxt->ops->get_fpu(ctxt); | 910 | ctxt->ops->get_fpu(ctxt); |
873 | switch (reg) { | 911 | switch (reg) { |
874 | case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break; | 912 | case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break; |
875 | case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break; | 913 | case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break; |
876 | case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break; | 914 | case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break; |
877 | case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break; | 915 | case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break; |
878 | case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break; | 916 | case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break; |
879 | case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break; | 917 | case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break; |
880 | case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break; | 918 | case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break; |
881 | case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break; | 919 | case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break; |
882 | #ifdef CONFIG_X86_64 | 920 | #ifdef CONFIG_X86_64 |
883 | case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break; | 921 | case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break; |
884 | case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break; | 922 | case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break; |
885 | case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break; | 923 | case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break; |
886 | case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break; | 924 | case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break; |
887 | case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break; | 925 | case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break; |
888 | case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break; | 926 | case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break; |
889 | case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break; | 927 | case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break; |
890 | case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break; | 928 | case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break; |
891 | #endif | 929 | #endif |
892 | default: BUG(); | 930 | default: BUG(); |
893 | } | 931 | } |
@@ -899,23 +937,23 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, | |||
899 | { | 937 | { |
900 | ctxt->ops->get_fpu(ctxt); | 938 | ctxt->ops->get_fpu(ctxt); |
901 | switch (reg) { | 939 | switch (reg) { |
902 | case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break; | 940 | case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break; |
903 | case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break; | 941 | case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break; |
904 | case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break; | 942 | case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break; |
905 | case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break; | 943 | case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break; |
906 | case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break; | 944 | case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break; |
907 | case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break; | 945 | case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break; |
908 | case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break; | 946 | case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break; |
909 | case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break; | 947 | case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break; |
910 | #ifdef CONFIG_X86_64 | 948 | #ifdef CONFIG_X86_64 |
911 | case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break; | 949 | case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break; |
912 | case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break; | 950 | case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break; |
913 | case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break; | 951 | case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break; |
914 | case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break; | 952 | case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break; |
915 | case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break; | 953 | case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break; |
916 | case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break; | 954 | case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break; |
917 | case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break; | 955 | case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break; |
918 | case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break; | 956 | case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break; |
919 | #endif | 957 | #endif |
920 | default: BUG(); | 958 | default: BUG(); |
921 | } | 959 | } |
@@ -982,10 +1020,10 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt, | |||
982 | 1020 | ||
983 | op->type = OP_REG; | 1021 | op->type = OP_REG; |
984 | if (ctxt->d & ByteOp) { | 1022 | if (ctxt->d & ByteOp) { |
985 | op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs); | 1023 | op->addr.reg = decode_register(ctxt, reg, highbyte_regs); |
986 | op->bytes = 1; | 1024 | op->bytes = 1; |
987 | } else { | 1025 | } else { |
988 | op->addr.reg = decode_register(reg, ctxt->regs, 0); | 1026 | op->addr.reg = decode_register(ctxt, reg, 0); |
989 | op->bytes = ctxt->op_bytes; | 1027 | op->bytes = ctxt->op_bytes; |
990 | } | 1028 | } |
991 | fetch_register_operand(op); | 1029 | fetch_register_operand(op); |
@@ -1020,8 +1058,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
1020 | if (ctxt->modrm_mod == 3) { | 1058 | if (ctxt->modrm_mod == 3) { |
1021 | op->type = OP_REG; | 1059 | op->type = OP_REG; |
1022 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | 1060 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; |
1023 | op->addr.reg = decode_register(ctxt->modrm_rm, | 1061 | op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, ctxt->d & ByteOp); |
1024 | ctxt->regs, ctxt->d & ByteOp); | ||
1025 | if (ctxt->d & Sse) { | 1062 | if (ctxt->d & Sse) { |
1026 | op->type = OP_XMM; | 1063 | op->type = OP_XMM; |
1027 | op->bytes = 16; | 1064 | op->bytes = 16; |
@@ -1042,10 +1079,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
1042 | op->type = OP_MEM; | 1079 | op->type = OP_MEM; |
1043 | 1080 | ||
1044 | if (ctxt->ad_bytes == 2) { | 1081 | if (ctxt->ad_bytes == 2) { |
1045 | unsigned bx = ctxt->regs[VCPU_REGS_RBX]; | 1082 | unsigned bx = reg_read(ctxt, VCPU_REGS_RBX); |
1046 | unsigned bp = ctxt->regs[VCPU_REGS_RBP]; | 1083 | unsigned bp = reg_read(ctxt, VCPU_REGS_RBP); |
1047 | unsigned si = ctxt->regs[VCPU_REGS_RSI]; | 1084 | unsigned si = reg_read(ctxt, VCPU_REGS_RSI); |
1048 | unsigned di = ctxt->regs[VCPU_REGS_RDI]; | 1085 | unsigned di = reg_read(ctxt, VCPU_REGS_RDI); |
1049 | 1086 | ||
1050 | /* 16-bit ModR/M decode. */ | 1087 | /* 16-bit ModR/M decode. */ |
1051 | switch (ctxt->modrm_mod) { | 1088 | switch (ctxt->modrm_mod) { |
@@ -1102,17 +1139,17 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
1102 | if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0) | 1139 | if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0) |
1103 | modrm_ea += insn_fetch(s32, ctxt); | 1140 | modrm_ea += insn_fetch(s32, ctxt); |
1104 | else { | 1141 | else { |
1105 | modrm_ea += ctxt->regs[base_reg]; | 1142 | modrm_ea += reg_read(ctxt, base_reg); |
1106 | adjust_modrm_seg(ctxt, base_reg); | 1143 | adjust_modrm_seg(ctxt, base_reg); |
1107 | } | 1144 | } |
1108 | if (index_reg != 4) | 1145 | if (index_reg != 4) |
1109 | modrm_ea += ctxt->regs[index_reg] << scale; | 1146 | modrm_ea += reg_read(ctxt, index_reg) << scale; |
1110 | } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) { | 1147 | } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) { |
1111 | if (ctxt->mode == X86EMUL_MODE_PROT64) | 1148 | if (ctxt->mode == X86EMUL_MODE_PROT64) |
1112 | ctxt->rip_relative = 1; | 1149 | ctxt->rip_relative = 1; |
1113 | } else { | 1150 | } else { |
1114 | base_reg = ctxt->modrm_rm; | 1151 | base_reg = ctxt->modrm_rm; |
1115 | modrm_ea += ctxt->regs[base_reg]; | 1152 | modrm_ea += reg_read(ctxt, base_reg); |
1116 | adjust_modrm_seg(ctxt, base_reg); | 1153 | adjust_modrm_seg(ctxt, base_reg); |
1117 | } | 1154 | } |
1118 | switch (ctxt->modrm_mod) { | 1155 | switch (ctxt->modrm_mod) { |
@@ -1179,24 +1216,21 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, | |||
1179 | int rc; | 1216 | int rc; |
1180 | struct read_cache *mc = &ctxt->mem_read; | 1217 | struct read_cache *mc = &ctxt->mem_read; |
1181 | 1218 | ||
1182 | while (size) { | 1219 | if (mc->pos < mc->end) |
1183 | int n = min(size, 8u); | 1220 | goto read_cached; |
1184 | size -= n; | ||
1185 | if (mc->pos < mc->end) | ||
1186 | goto read_cached; | ||
1187 | 1221 | ||
1188 | rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, n, | 1222 | WARN_ON((mc->end + size) >= sizeof(mc->data)); |
1189 | &ctxt->exception); | ||
1190 | if (rc != X86EMUL_CONTINUE) | ||
1191 | return rc; | ||
1192 | mc->end += n; | ||
1193 | 1223 | ||
1194 | read_cached: | 1224 | rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size, |
1195 | memcpy(dest, mc->data + mc->pos, n); | 1225 | &ctxt->exception); |
1196 | mc->pos += n; | 1226 | if (rc != X86EMUL_CONTINUE) |
1197 | dest += n; | 1227 | return rc; |
1198 | addr += n; | 1228 | |
1199 | } | 1229 | mc->end += size; |
1230 | |||
1231 | read_cached: | ||
1232 | memcpy(dest, mc->data + mc->pos, size); | ||
1233 | mc->pos += size; | ||
1200 | return X86EMUL_CONTINUE; | 1234 | return X86EMUL_CONTINUE; |
1201 | } | 1235 | } |
1202 | 1236 | ||
@@ -1253,10 +1287,10 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | |||
1253 | if (rc->pos == rc->end) { /* refill pio read ahead */ | 1287 | if (rc->pos == rc->end) { /* refill pio read ahead */ |
1254 | unsigned int in_page, n; | 1288 | unsigned int in_page, n; |
1255 | unsigned int count = ctxt->rep_prefix ? | 1289 | unsigned int count = ctxt->rep_prefix ? |
1256 | address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) : 1; | 1290 | address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1; |
1257 | in_page = (ctxt->eflags & EFLG_DF) ? | 1291 | in_page = (ctxt->eflags & EFLG_DF) ? |
1258 | offset_in_page(ctxt->regs[VCPU_REGS_RDI]) : | 1292 | offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) : |
1259 | PAGE_SIZE - offset_in_page(ctxt->regs[VCPU_REGS_RDI]); | 1293 | PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)); |
1260 | n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, | 1294 | n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, |
1261 | count); | 1295 | count); |
1262 | if (n == 0) | 1296 | if (n == 0) |
@@ -1267,8 +1301,15 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | |||
1267 | rc->end = n * size; | 1301 | rc->end = n * size; |
1268 | } | 1302 | } |
1269 | 1303 | ||
1270 | memcpy(dest, rc->data + rc->pos, size); | 1304 | if (ctxt->rep_prefix && !(ctxt->eflags & EFLG_DF)) { |
1271 | rc->pos += size; | 1305 | ctxt->dst.data = rc->data + rc->pos; |
1306 | ctxt->dst.type = OP_MEM_STR; | ||
1307 | ctxt->dst.count = (rc->end - rc->pos) / size; | ||
1308 | rc->pos = rc->end; | ||
1309 | } else { | ||
1310 | memcpy(dest, rc->data + rc->pos, size); | ||
1311 | rc->pos += size; | ||
1312 | } | ||
1272 | return 1; | 1313 | return 1; |
1273 | } | 1314 | } |
1274 | 1315 | ||
@@ -1291,7 +1332,7 @@ static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1291 | static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, | 1332 | static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, |
1292 | u16 selector, struct desc_ptr *dt) | 1333 | u16 selector, struct desc_ptr *dt) |
1293 | { | 1334 | { |
1294 | struct x86_emulate_ops *ops = ctxt->ops; | 1335 | const struct x86_emulate_ops *ops = ctxt->ops; |
1295 | 1336 | ||
1296 | if (selector & 1 << 2) { | 1337 | if (selector & 1 << 2) { |
1297 | struct desc_struct desc; | 1338 | struct desc_struct desc; |
@@ -1355,19 +1396,15 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1355 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ | 1396 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ |
1356 | ulong desc_addr; | 1397 | ulong desc_addr; |
1357 | int ret; | 1398 | int ret; |
1399 | u16 dummy; | ||
1358 | 1400 | ||
1359 | memset(&seg_desc, 0, sizeof seg_desc); | 1401 | memset(&seg_desc, 0, sizeof seg_desc); |
1360 | 1402 | ||
1361 | if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) | 1403 | if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) |
1362 | || ctxt->mode == X86EMUL_MODE_REAL) { | 1404 | || ctxt->mode == X86EMUL_MODE_REAL) { |
1363 | /* set real mode segment descriptor */ | 1405 | /* set real mode segment descriptor */ |
1406 | ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg); | ||
1364 | set_desc_base(&seg_desc, selector << 4); | 1407 | set_desc_base(&seg_desc, selector << 4); |
1365 | set_desc_limit(&seg_desc, 0xffff); | ||
1366 | seg_desc.type = 3; | ||
1367 | seg_desc.p = 1; | ||
1368 | seg_desc.s = 1; | ||
1369 | if (ctxt->mode == X86EMUL_MODE_VM86) | ||
1370 | seg_desc.dpl = 3; | ||
1371 | goto load; | 1408 | goto load; |
1372 | } | 1409 | } |
1373 | 1410 | ||
@@ -1396,7 +1433,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1396 | err_code = selector & 0xfffc; | 1433 | err_code = selector & 0xfffc; |
1397 | err_vec = GP_VECTOR; | 1434 | err_vec = GP_VECTOR; |
1398 | 1435 | ||
1399 | /* can't load system descriptor into segment selecor */ | 1436 | /* can't load system descriptor into segment selector */ |
1400 | if (seg <= VCPU_SREG_GS && !seg_desc.s) | 1437 | if (seg <= VCPU_SREG_GS && !seg_desc.s) |
1401 | goto exception; | 1438 | goto exception; |
1402 | 1439 | ||
@@ -1516,6 +1553,14 @@ static int writeback(struct x86_emulate_ctxt *ctxt) | |||
1516 | if (rc != X86EMUL_CONTINUE) | 1553 | if (rc != X86EMUL_CONTINUE) |
1517 | return rc; | 1554 | return rc; |
1518 | break; | 1555 | break; |
1556 | case OP_MEM_STR: | ||
1557 | rc = segmented_write(ctxt, | ||
1558 | ctxt->dst.addr.mem, | ||
1559 | ctxt->dst.data, | ||
1560 | ctxt->dst.bytes * ctxt->dst.count); | ||
1561 | if (rc != X86EMUL_CONTINUE) | ||
1562 | return rc; | ||
1563 | break; | ||
1519 | case OP_XMM: | 1564 | case OP_XMM: |
1520 | write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm); | 1565 | write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm); |
1521 | break; | 1566 | break; |
@@ -1536,7 +1581,7 @@ static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes) | |||
1536 | struct segmented_address addr; | 1581 | struct segmented_address addr; |
1537 | 1582 | ||
1538 | rsp_increment(ctxt, -bytes); | 1583 | rsp_increment(ctxt, -bytes); |
1539 | addr.ea = ctxt->regs[VCPU_REGS_RSP] & stack_mask(ctxt); | 1584 | addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt); |
1540 | addr.seg = VCPU_SREG_SS; | 1585 | addr.seg = VCPU_SREG_SS; |
1541 | 1586 | ||
1542 | return segmented_write(ctxt, addr, data, bytes); | 1587 | return segmented_write(ctxt, addr, data, bytes); |
@@ -1555,7 +1600,7 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, | |||
1555 | int rc; | 1600 | int rc; |
1556 | struct segmented_address addr; | 1601 | struct segmented_address addr; |
1557 | 1602 | ||
1558 | addr.ea = ctxt->regs[VCPU_REGS_RSP] & stack_mask(ctxt); | 1603 | addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt); |
1559 | addr.seg = VCPU_SREG_SS; | 1604 | addr.seg = VCPU_SREG_SS; |
1560 | rc = segmented_read(ctxt, addr, dest, len); | 1605 | rc = segmented_read(ctxt, addr, dest, len); |
1561 | if (rc != X86EMUL_CONTINUE) | 1606 | if (rc != X86EMUL_CONTINUE) |
@@ -1623,26 +1668,28 @@ static int em_enter(struct x86_emulate_ctxt *ctxt) | |||
1623 | int rc; | 1668 | int rc; |
1624 | unsigned frame_size = ctxt->src.val; | 1669 | unsigned frame_size = ctxt->src.val; |
1625 | unsigned nesting_level = ctxt->src2.val & 31; | 1670 | unsigned nesting_level = ctxt->src2.val & 31; |
1671 | ulong rbp; | ||
1626 | 1672 | ||
1627 | if (nesting_level) | 1673 | if (nesting_level) |
1628 | return X86EMUL_UNHANDLEABLE; | 1674 | return X86EMUL_UNHANDLEABLE; |
1629 | 1675 | ||
1630 | rc = push(ctxt, &ctxt->regs[VCPU_REGS_RBP], stack_size(ctxt)); | 1676 | rbp = reg_read(ctxt, VCPU_REGS_RBP); |
1677 | rc = push(ctxt, &rbp, stack_size(ctxt)); | ||
1631 | if (rc != X86EMUL_CONTINUE) | 1678 | if (rc != X86EMUL_CONTINUE) |
1632 | return rc; | 1679 | return rc; |
1633 | assign_masked(&ctxt->regs[VCPU_REGS_RBP], ctxt->regs[VCPU_REGS_RSP], | 1680 | assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP), |
1634 | stack_mask(ctxt)); | 1681 | stack_mask(ctxt)); |
1635 | assign_masked(&ctxt->regs[VCPU_REGS_RSP], | 1682 | assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), |
1636 | ctxt->regs[VCPU_REGS_RSP] - frame_size, | 1683 | reg_read(ctxt, VCPU_REGS_RSP) - frame_size, |
1637 | stack_mask(ctxt)); | 1684 | stack_mask(ctxt)); |
1638 | return X86EMUL_CONTINUE; | 1685 | return X86EMUL_CONTINUE; |
1639 | } | 1686 | } |
1640 | 1687 | ||
1641 | static int em_leave(struct x86_emulate_ctxt *ctxt) | 1688 | static int em_leave(struct x86_emulate_ctxt *ctxt) |
1642 | { | 1689 | { |
1643 | assign_masked(&ctxt->regs[VCPU_REGS_RSP], ctxt->regs[VCPU_REGS_RBP], | 1690 | assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP), |
1644 | stack_mask(ctxt)); | 1691 | stack_mask(ctxt)); |
1645 | return emulate_pop(ctxt, &ctxt->regs[VCPU_REGS_RBP], ctxt->op_bytes); | 1692 | return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes); |
1646 | } | 1693 | } |
1647 | 1694 | ||
1648 | static int em_push_sreg(struct x86_emulate_ctxt *ctxt) | 1695 | static int em_push_sreg(struct x86_emulate_ctxt *ctxt) |
@@ -1670,13 +1717,13 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt) | |||
1670 | 1717 | ||
1671 | static int em_pusha(struct x86_emulate_ctxt *ctxt) | 1718 | static int em_pusha(struct x86_emulate_ctxt *ctxt) |
1672 | { | 1719 | { |
1673 | unsigned long old_esp = ctxt->regs[VCPU_REGS_RSP]; | 1720 | unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP); |
1674 | int rc = X86EMUL_CONTINUE; | 1721 | int rc = X86EMUL_CONTINUE; |
1675 | int reg = VCPU_REGS_RAX; | 1722 | int reg = VCPU_REGS_RAX; |
1676 | 1723 | ||
1677 | while (reg <= VCPU_REGS_RDI) { | 1724 | while (reg <= VCPU_REGS_RDI) { |
1678 | (reg == VCPU_REGS_RSP) ? | 1725 | (reg == VCPU_REGS_RSP) ? |
1679 | (ctxt->src.val = old_esp) : (ctxt->src.val = ctxt->regs[reg]); | 1726 | (ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg)); |
1680 | 1727 | ||
1681 | rc = em_push(ctxt); | 1728 | rc = em_push(ctxt); |
1682 | if (rc != X86EMUL_CONTINUE) | 1729 | if (rc != X86EMUL_CONTINUE) |
@@ -1705,7 +1752,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) | |||
1705 | --reg; | 1752 | --reg; |
1706 | } | 1753 | } |
1707 | 1754 | ||
1708 | rc = emulate_pop(ctxt, &ctxt->regs[reg], ctxt->op_bytes); | 1755 | rc = emulate_pop(ctxt, reg_rmw(ctxt, reg), ctxt->op_bytes); |
1709 | if (rc != X86EMUL_CONTINUE) | 1756 | if (rc != X86EMUL_CONTINUE) |
1710 | break; | 1757 | break; |
1711 | --reg; | 1758 | --reg; |
@@ -1713,9 +1760,9 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) | |||
1713 | return rc; | 1760 | return rc; |
1714 | } | 1761 | } |
1715 | 1762 | ||
1716 | int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) | 1763 | static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) |
1717 | { | 1764 | { |
1718 | struct x86_emulate_ops *ops = ctxt->ops; | 1765 | const struct x86_emulate_ops *ops = ctxt->ops; |
1719 | int rc; | 1766 | int rc; |
1720 | struct desc_ptr dt; | 1767 | struct desc_ptr dt; |
1721 | gva_t cs_addr; | 1768 | gva_t cs_addr; |
@@ -1762,11 +1809,22 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) | |||
1762 | return rc; | 1809 | return rc; |
1763 | } | 1810 | } |
1764 | 1811 | ||
1812 | int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) | ||
1813 | { | ||
1814 | int rc; | ||
1815 | |||
1816 | invalidate_registers(ctxt); | ||
1817 | rc = __emulate_int_real(ctxt, irq); | ||
1818 | if (rc == X86EMUL_CONTINUE) | ||
1819 | writeback_registers(ctxt); | ||
1820 | return rc; | ||
1821 | } | ||
1822 | |||
1765 | static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq) | 1823 | static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq) |
1766 | { | 1824 | { |
1767 | switch(ctxt->mode) { | 1825 | switch(ctxt->mode) { |
1768 | case X86EMUL_MODE_REAL: | 1826 | case X86EMUL_MODE_REAL: |
1769 | return emulate_int_real(ctxt, irq); | 1827 | return __emulate_int_real(ctxt, irq); |
1770 | case X86EMUL_MODE_VM86: | 1828 | case X86EMUL_MODE_VM86: |
1771 | case X86EMUL_MODE_PROT16: | 1829 | case X86EMUL_MODE_PROT16: |
1772 | case X86EMUL_MODE_PROT32: | 1830 | case X86EMUL_MODE_PROT32: |
@@ -1973,14 +2031,14 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) | |||
1973 | { | 2031 | { |
1974 | u64 old = ctxt->dst.orig_val64; | 2032 | u64 old = ctxt->dst.orig_val64; |
1975 | 2033 | ||
1976 | if (((u32) (old >> 0) != (u32) ctxt->regs[VCPU_REGS_RAX]) || | 2034 | if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) || |
1977 | ((u32) (old >> 32) != (u32) ctxt->regs[VCPU_REGS_RDX])) { | 2035 | ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { |
1978 | ctxt->regs[VCPU_REGS_RAX] = (u32) (old >> 0); | 2036 | *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0); |
1979 | ctxt->regs[VCPU_REGS_RDX] = (u32) (old >> 32); | 2037 | *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32); |
1980 | ctxt->eflags &= ~EFLG_ZF; | 2038 | ctxt->eflags &= ~EFLG_ZF; |
1981 | } else { | 2039 | } else { |
1982 | ctxt->dst.val64 = ((u64)ctxt->regs[VCPU_REGS_RCX] << 32) | | 2040 | ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) | |
1983 | (u32) ctxt->regs[VCPU_REGS_RBX]; | 2041 | (u32) reg_read(ctxt, VCPU_REGS_RBX); |
1984 | 2042 | ||
1985 | ctxt->eflags |= EFLG_ZF; | 2043 | ctxt->eflags |= EFLG_ZF; |
1986 | } | 2044 | } |
@@ -2016,7 +2074,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) | |||
2016 | { | 2074 | { |
2017 | /* Save real source value, then compare EAX against destination. */ | 2075 | /* Save real source value, then compare EAX against destination. */ |
2018 | ctxt->src.orig_val = ctxt->src.val; | 2076 | ctxt->src.orig_val = ctxt->src.val; |
2019 | ctxt->src.val = ctxt->regs[VCPU_REGS_RAX]; | 2077 | ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX); |
2020 | emulate_2op_SrcV(ctxt, "cmp"); | 2078 | emulate_2op_SrcV(ctxt, "cmp"); |
2021 | 2079 | ||
2022 | if (ctxt->eflags & EFLG_ZF) { | 2080 | if (ctxt->eflags & EFLG_ZF) { |
@@ -2025,7 +2083,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) | |||
2025 | } else { | 2083 | } else { |
2026 | /* Failure: write the value we saw to EAX. */ | 2084 | /* Failure: write the value we saw to EAX. */ |
2027 | ctxt->dst.type = OP_REG; | 2085 | ctxt->dst.type = OP_REG; |
2028 | ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX]; | 2086 | ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); |
2029 | } | 2087 | } |
2030 | return X86EMUL_CONTINUE; | 2088 | return X86EMUL_CONTINUE; |
2031 | } | 2089 | } |
@@ -2050,12 +2108,6 @@ static void | |||
2050 | setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, | 2108 | setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, |
2051 | struct desc_struct *cs, struct desc_struct *ss) | 2109 | struct desc_struct *cs, struct desc_struct *ss) |
2052 | { | 2110 | { |
2053 | u16 selector; | ||
2054 | |||
2055 | memset(cs, 0, sizeof(struct desc_struct)); | ||
2056 | ctxt->ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS); | ||
2057 | memset(ss, 0, sizeof(struct desc_struct)); | ||
2058 | |||
2059 | cs->l = 0; /* will be adjusted later */ | 2111 | cs->l = 0; /* will be adjusted later */ |
2060 | set_desc_base(cs, 0); /* flat segment */ | 2112 | set_desc_base(cs, 0); /* flat segment */ |
2061 | cs->g = 1; /* 4kb granularity */ | 2113 | cs->g = 1; /* 4kb granularity */ |
@@ -2065,6 +2117,7 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, | |||
2065 | cs->dpl = 0; /* will be adjusted later */ | 2117 | cs->dpl = 0; /* will be adjusted later */ |
2066 | cs->p = 1; | 2118 | cs->p = 1; |
2067 | cs->d = 1; | 2119 | cs->d = 1; |
2120 | cs->avl = 0; | ||
2068 | 2121 | ||
2069 | set_desc_base(ss, 0); /* flat segment */ | 2122 | set_desc_base(ss, 0); /* flat segment */ |
2070 | set_desc_limit(ss, 0xfffff); /* 4GB limit */ | 2123 | set_desc_limit(ss, 0xfffff); /* 4GB limit */ |
@@ -2074,6 +2127,8 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, | |||
2074 | ss->d = 1; /* 32bit stack segment */ | 2127 | ss->d = 1; /* 32bit stack segment */ |
2075 | ss->dpl = 0; | 2128 | ss->dpl = 0; |
2076 | ss->p = 1; | 2129 | ss->p = 1; |
2130 | ss->l = 0; | ||
2131 | ss->avl = 0; | ||
2077 | } | 2132 | } |
2078 | 2133 | ||
2079 | static bool vendor_intel(struct x86_emulate_ctxt *ctxt) | 2134 | static bool vendor_intel(struct x86_emulate_ctxt *ctxt) |
@@ -2089,7 +2144,7 @@ static bool vendor_intel(struct x86_emulate_ctxt *ctxt) | |||
2089 | 2144 | ||
2090 | static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) | 2145 | static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) |
2091 | { | 2146 | { |
2092 | struct x86_emulate_ops *ops = ctxt->ops; | 2147 | const struct x86_emulate_ops *ops = ctxt->ops; |
2093 | u32 eax, ebx, ecx, edx; | 2148 | u32 eax, ebx, ecx, edx; |
2094 | 2149 | ||
2095 | /* | 2150 | /* |
@@ -2133,7 +2188,7 @@ static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) | |||
2133 | 2188 | ||
2134 | static int em_syscall(struct x86_emulate_ctxt *ctxt) | 2189 | static int em_syscall(struct x86_emulate_ctxt *ctxt) |
2135 | { | 2190 | { |
2136 | struct x86_emulate_ops *ops = ctxt->ops; | 2191 | const struct x86_emulate_ops *ops = ctxt->ops; |
2137 | struct desc_struct cs, ss; | 2192 | struct desc_struct cs, ss; |
2138 | u64 msr_data; | 2193 | u64 msr_data; |
2139 | u16 cs_sel, ss_sel; | 2194 | u16 cs_sel, ss_sel; |
@@ -2165,10 +2220,10 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) | |||
2165 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); | 2220 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); |
2166 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); | 2221 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); |
2167 | 2222 | ||
2168 | ctxt->regs[VCPU_REGS_RCX] = ctxt->_eip; | 2223 | *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip; |
2169 | if (efer & EFER_LMA) { | 2224 | if (efer & EFER_LMA) { |
2170 | #ifdef CONFIG_X86_64 | 2225 | #ifdef CONFIG_X86_64 |
2171 | ctxt->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; | 2226 | *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags & ~EFLG_RF; |
2172 | 2227 | ||
2173 | ops->get_msr(ctxt, | 2228 | ops->get_msr(ctxt, |
2174 | ctxt->mode == X86EMUL_MODE_PROT64 ? | 2229 | ctxt->mode == X86EMUL_MODE_PROT64 ? |
@@ -2191,7 +2246,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) | |||
2191 | 2246 | ||
2192 | static int em_sysenter(struct x86_emulate_ctxt *ctxt) | 2247 | static int em_sysenter(struct x86_emulate_ctxt *ctxt) |
2193 | { | 2248 | { |
2194 | struct x86_emulate_ops *ops = ctxt->ops; | 2249 | const struct x86_emulate_ops *ops = ctxt->ops; |
2195 | struct desc_struct cs, ss; | 2250 | struct desc_struct cs, ss; |
2196 | u64 msr_data; | 2251 | u64 msr_data; |
2197 | u16 cs_sel, ss_sel; | 2252 | u16 cs_sel, ss_sel; |
@@ -2228,6 +2283,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) | |||
2228 | if (msr_data == 0x0) | 2283 | if (msr_data == 0x0) |
2229 | return emulate_gp(ctxt, 0); | 2284 | return emulate_gp(ctxt, 0); |
2230 | break; | 2285 | break; |
2286 | default: | ||
2287 | break; | ||
2231 | } | 2288 | } |
2232 | 2289 | ||
2233 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); | 2290 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); |
@@ -2247,14 +2304,14 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) | |||
2247 | ctxt->_eip = msr_data; | 2304 | ctxt->_eip = msr_data; |
2248 | 2305 | ||
2249 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); | 2306 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); |
2250 | ctxt->regs[VCPU_REGS_RSP] = msr_data; | 2307 | *reg_write(ctxt, VCPU_REGS_RSP) = msr_data; |
2251 | 2308 | ||
2252 | return X86EMUL_CONTINUE; | 2309 | return X86EMUL_CONTINUE; |
2253 | } | 2310 | } |
2254 | 2311 | ||
2255 | static int em_sysexit(struct x86_emulate_ctxt *ctxt) | 2312 | static int em_sysexit(struct x86_emulate_ctxt *ctxt) |
2256 | { | 2313 | { |
2257 | struct x86_emulate_ops *ops = ctxt->ops; | 2314 | const struct x86_emulate_ops *ops = ctxt->ops; |
2258 | struct desc_struct cs, ss; | 2315 | struct desc_struct cs, ss; |
2259 | u64 msr_data; | 2316 | u64 msr_data; |
2260 | int usermode; | 2317 | int usermode; |
@@ -2297,8 +2354,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) | |||
2297 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); | 2354 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); |
2298 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); | 2355 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); |
2299 | 2356 | ||
2300 | ctxt->_eip = ctxt->regs[VCPU_REGS_RDX]; | 2357 | ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX); |
2301 | ctxt->regs[VCPU_REGS_RSP] = ctxt->regs[VCPU_REGS_RCX]; | 2358 | *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX); |
2302 | 2359 | ||
2303 | return X86EMUL_CONTINUE; | 2360 | return X86EMUL_CONTINUE; |
2304 | } | 2361 | } |
@@ -2317,7 +2374,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) | |||
2317 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | 2374 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, |
2318 | u16 port, u16 len) | 2375 | u16 port, u16 len) |
2319 | { | 2376 | { |
2320 | struct x86_emulate_ops *ops = ctxt->ops; | 2377 | const struct x86_emulate_ops *ops = ctxt->ops; |
2321 | struct desc_struct tr_seg; | 2378 | struct desc_struct tr_seg; |
2322 | u32 base3; | 2379 | u32 base3; |
2323 | int r; | 2380 | int r; |
@@ -2367,14 +2424,14 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, | |||
2367 | { | 2424 | { |
2368 | tss->ip = ctxt->_eip; | 2425 | tss->ip = ctxt->_eip; |
2369 | tss->flag = ctxt->eflags; | 2426 | tss->flag = ctxt->eflags; |
2370 | tss->ax = ctxt->regs[VCPU_REGS_RAX]; | 2427 | tss->ax = reg_read(ctxt, VCPU_REGS_RAX); |
2371 | tss->cx = ctxt->regs[VCPU_REGS_RCX]; | 2428 | tss->cx = reg_read(ctxt, VCPU_REGS_RCX); |
2372 | tss->dx = ctxt->regs[VCPU_REGS_RDX]; | 2429 | tss->dx = reg_read(ctxt, VCPU_REGS_RDX); |
2373 | tss->bx = ctxt->regs[VCPU_REGS_RBX]; | 2430 | tss->bx = reg_read(ctxt, VCPU_REGS_RBX); |
2374 | tss->sp = ctxt->regs[VCPU_REGS_RSP]; | 2431 | tss->sp = reg_read(ctxt, VCPU_REGS_RSP); |
2375 | tss->bp = ctxt->regs[VCPU_REGS_RBP]; | 2432 | tss->bp = reg_read(ctxt, VCPU_REGS_RBP); |
2376 | tss->si = ctxt->regs[VCPU_REGS_RSI]; | 2433 | tss->si = reg_read(ctxt, VCPU_REGS_RSI); |
2377 | tss->di = ctxt->regs[VCPU_REGS_RDI]; | 2434 | tss->di = reg_read(ctxt, VCPU_REGS_RDI); |
2378 | 2435 | ||
2379 | tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); | 2436 | tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); |
2380 | tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); | 2437 | tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); |
@@ -2390,14 +2447,14 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | |||
2390 | 2447 | ||
2391 | ctxt->_eip = tss->ip; | 2448 | ctxt->_eip = tss->ip; |
2392 | ctxt->eflags = tss->flag | 2; | 2449 | ctxt->eflags = tss->flag | 2; |
2393 | ctxt->regs[VCPU_REGS_RAX] = tss->ax; | 2450 | *reg_write(ctxt, VCPU_REGS_RAX) = tss->ax; |
2394 | ctxt->regs[VCPU_REGS_RCX] = tss->cx; | 2451 | *reg_write(ctxt, VCPU_REGS_RCX) = tss->cx; |
2395 | ctxt->regs[VCPU_REGS_RDX] = tss->dx; | 2452 | *reg_write(ctxt, VCPU_REGS_RDX) = tss->dx; |
2396 | ctxt->regs[VCPU_REGS_RBX] = tss->bx; | 2453 | *reg_write(ctxt, VCPU_REGS_RBX) = tss->bx; |
2397 | ctxt->regs[VCPU_REGS_RSP] = tss->sp; | 2454 | *reg_write(ctxt, VCPU_REGS_RSP) = tss->sp; |
2398 | ctxt->regs[VCPU_REGS_RBP] = tss->bp; | 2455 | *reg_write(ctxt, VCPU_REGS_RBP) = tss->bp; |
2399 | ctxt->regs[VCPU_REGS_RSI] = tss->si; | 2456 | *reg_write(ctxt, VCPU_REGS_RSI) = tss->si; |
2400 | ctxt->regs[VCPU_REGS_RDI] = tss->di; | 2457 | *reg_write(ctxt, VCPU_REGS_RDI) = tss->di; |
2401 | 2458 | ||
2402 | /* | 2459 | /* |
2403 | * SDM says that segment selectors are loaded before segment | 2460 | * SDM says that segment selectors are loaded before segment |
@@ -2410,7 +2467,7 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | |||
2410 | set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); | 2467 | set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); |
2411 | 2468 | ||
2412 | /* | 2469 | /* |
2413 | * Now load segment descriptors. If fault happenes at this stage | 2470 | * Now load segment descriptors. If fault happens at this stage |
2414 | * it is handled in a context of new task | 2471 | * it is handled in a context of new task |
2415 | */ | 2472 | */ |
2416 | ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR); | 2473 | ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR); |
@@ -2436,7 +2493,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
2436 | u16 tss_selector, u16 old_tss_sel, | 2493 | u16 tss_selector, u16 old_tss_sel, |
2437 | ulong old_tss_base, struct desc_struct *new_desc) | 2494 | ulong old_tss_base, struct desc_struct *new_desc) |
2438 | { | 2495 | { |
2439 | struct x86_emulate_ops *ops = ctxt->ops; | 2496 | const struct x86_emulate_ops *ops = ctxt->ops; |
2440 | struct tss_segment_16 tss_seg; | 2497 | struct tss_segment_16 tss_seg; |
2441 | int ret; | 2498 | int ret; |
2442 | u32 new_tss_base = get_desc_base(new_desc); | 2499 | u32 new_tss_base = get_desc_base(new_desc); |
@@ -2482,14 +2539,14 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, | |||
2482 | tss->cr3 = ctxt->ops->get_cr(ctxt, 3); | 2539 | tss->cr3 = ctxt->ops->get_cr(ctxt, 3); |
2483 | tss->eip = ctxt->_eip; | 2540 | tss->eip = ctxt->_eip; |
2484 | tss->eflags = ctxt->eflags; | 2541 | tss->eflags = ctxt->eflags; |
2485 | tss->eax = ctxt->regs[VCPU_REGS_RAX]; | 2542 | tss->eax = reg_read(ctxt, VCPU_REGS_RAX); |
2486 | tss->ecx = ctxt->regs[VCPU_REGS_RCX]; | 2543 | tss->ecx = reg_read(ctxt, VCPU_REGS_RCX); |
2487 | tss->edx = ctxt->regs[VCPU_REGS_RDX]; | 2544 | tss->edx = reg_read(ctxt, VCPU_REGS_RDX); |
2488 | tss->ebx = ctxt->regs[VCPU_REGS_RBX]; | 2545 | tss->ebx = reg_read(ctxt, VCPU_REGS_RBX); |
2489 | tss->esp = ctxt->regs[VCPU_REGS_RSP]; | 2546 | tss->esp = reg_read(ctxt, VCPU_REGS_RSP); |
2490 | tss->ebp = ctxt->regs[VCPU_REGS_RBP]; | 2547 | tss->ebp = reg_read(ctxt, VCPU_REGS_RBP); |
2491 | tss->esi = ctxt->regs[VCPU_REGS_RSI]; | 2548 | tss->esi = reg_read(ctxt, VCPU_REGS_RSI); |
2492 | tss->edi = ctxt->regs[VCPU_REGS_RDI]; | 2549 | tss->edi = reg_read(ctxt, VCPU_REGS_RDI); |
2493 | 2550 | ||
2494 | tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); | 2551 | tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); |
2495 | tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); | 2552 | tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); |
@@ -2511,14 +2568,14 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
2511 | ctxt->eflags = tss->eflags | 2; | 2568 | ctxt->eflags = tss->eflags | 2; |
2512 | 2569 | ||
2513 | /* General purpose registers */ | 2570 | /* General purpose registers */ |
2514 | ctxt->regs[VCPU_REGS_RAX] = tss->eax; | 2571 | *reg_write(ctxt, VCPU_REGS_RAX) = tss->eax; |
2515 | ctxt->regs[VCPU_REGS_RCX] = tss->ecx; | 2572 | *reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx; |
2516 | ctxt->regs[VCPU_REGS_RDX] = tss->edx; | 2573 | *reg_write(ctxt, VCPU_REGS_RDX) = tss->edx; |
2517 | ctxt->regs[VCPU_REGS_RBX] = tss->ebx; | 2574 | *reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx; |
2518 | ctxt->regs[VCPU_REGS_RSP] = tss->esp; | 2575 | *reg_write(ctxt, VCPU_REGS_RSP) = tss->esp; |
2519 | ctxt->regs[VCPU_REGS_RBP] = tss->ebp; | 2576 | *reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp; |
2520 | ctxt->regs[VCPU_REGS_RSI] = tss->esi; | 2577 | *reg_write(ctxt, VCPU_REGS_RSI) = tss->esi; |
2521 | ctxt->regs[VCPU_REGS_RDI] = tss->edi; | 2578 | *reg_write(ctxt, VCPU_REGS_RDI) = tss->edi; |
2522 | 2579 | ||
2523 | /* | 2580 | /* |
2524 | * SDM says that segment selectors are loaded before segment | 2581 | * SDM says that segment selectors are loaded before segment |
@@ -2583,7 +2640,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2583 | u16 tss_selector, u16 old_tss_sel, | 2640 | u16 tss_selector, u16 old_tss_sel, |
2584 | ulong old_tss_base, struct desc_struct *new_desc) | 2641 | ulong old_tss_base, struct desc_struct *new_desc) |
2585 | { | 2642 | { |
2586 | struct x86_emulate_ops *ops = ctxt->ops; | 2643 | const struct x86_emulate_ops *ops = ctxt->ops; |
2587 | struct tss_segment_32 tss_seg; | 2644 | struct tss_segment_32 tss_seg; |
2588 | int ret; | 2645 | int ret; |
2589 | u32 new_tss_base = get_desc_base(new_desc); | 2646 | u32 new_tss_base = get_desc_base(new_desc); |
@@ -2627,7 +2684,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2627 | u16 tss_selector, int idt_index, int reason, | 2684 | u16 tss_selector, int idt_index, int reason, |
2628 | bool has_error_code, u32 error_code) | 2685 | bool has_error_code, u32 error_code) |
2629 | { | 2686 | { |
2630 | struct x86_emulate_ops *ops = ctxt->ops; | 2687 | const struct x86_emulate_ops *ops = ctxt->ops; |
2631 | struct desc_struct curr_tss_desc, next_tss_desc; | 2688 | struct desc_struct curr_tss_desc, next_tss_desc; |
2632 | int ret; | 2689 | int ret; |
2633 | u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR); | 2690 | u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR); |
@@ -2652,7 +2709,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2652 | * | 2709 | * |
2653 | * 1. jmp/call/int to task gate: Check against DPL of the task gate | 2710 | * 1. jmp/call/int to task gate: Check against DPL of the task gate |
2654 | * 2. Exception/IRQ/iret: No check is performed | 2711 | * 2. Exception/IRQ/iret: No check is performed |
2655 | * 3. jmp/call to TSS: Check agains DPL of the TSS | 2712 | * 3. jmp/call to TSS: Check against DPL of the TSS |
2656 | */ | 2713 | */ |
2657 | if (reason == TASK_SWITCH_GATE) { | 2714 | if (reason == TASK_SWITCH_GATE) { |
2658 | if (idt_index != -1) { | 2715 | if (idt_index != -1) { |
@@ -2693,7 +2750,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2693 | ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT; | 2750 | ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT; |
2694 | 2751 | ||
2695 | /* set back link to prev task only if NT bit is set in eflags | 2752 | /* set back link to prev task only if NT bit is set in eflags |
2696 | note that old_tss_sel is not used afetr this point */ | 2753 | note that old_tss_sel is not used after this point */ |
2697 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | 2754 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) |
2698 | old_tss_sel = 0xffff; | 2755 | old_tss_sel = 0xffff; |
2699 | 2756 | ||
@@ -2733,26 +2790,28 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2733 | { | 2790 | { |
2734 | int rc; | 2791 | int rc; |
2735 | 2792 | ||
2793 | invalidate_registers(ctxt); | ||
2736 | ctxt->_eip = ctxt->eip; | 2794 | ctxt->_eip = ctxt->eip; |
2737 | ctxt->dst.type = OP_NONE; | 2795 | ctxt->dst.type = OP_NONE; |
2738 | 2796 | ||
2739 | rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason, | 2797 | rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason, |
2740 | has_error_code, error_code); | 2798 | has_error_code, error_code); |
2741 | 2799 | ||
2742 | if (rc == X86EMUL_CONTINUE) | 2800 | if (rc == X86EMUL_CONTINUE) { |
2743 | ctxt->eip = ctxt->_eip; | 2801 | ctxt->eip = ctxt->_eip; |
2802 | writeback_registers(ctxt); | ||
2803 | } | ||
2744 | 2804 | ||
2745 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; | 2805 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; |
2746 | } | 2806 | } |
2747 | 2807 | ||
2748 | static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, | 2808 | static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg, |
2749 | int reg, struct operand *op) | 2809 | struct operand *op) |
2750 | { | 2810 | { |
2751 | int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; | 2811 | int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count; |
2752 | 2812 | ||
2753 | register_address_increment(ctxt, &ctxt->regs[reg], df * op->bytes); | 2813 | register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes); |
2754 | op->addr.mem.ea = register_address(ctxt, ctxt->regs[reg]); | 2814 | op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg)); |
2755 | op->addr.mem.seg = seg; | ||
2756 | } | 2815 | } |
2757 | 2816 | ||
2758 | static int em_das(struct x86_emulate_ctxt *ctxt) | 2817 | static int em_das(struct x86_emulate_ctxt *ctxt) |
@@ -2927,7 +2986,7 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt) | |||
2927 | { | 2986 | { |
2928 | ctxt->dst.type = OP_REG; | 2987 | ctxt->dst.type = OP_REG; |
2929 | ctxt->dst.bytes = ctxt->src.bytes; | 2988 | ctxt->dst.bytes = ctxt->src.bytes; |
2930 | ctxt->dst.addr.reg = &ctxt->regs[VCPU_REGS_RDX]; | 2989 | ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX); |
2931 | ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1); | 2990 | ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1); |
2932 | 2991 | ||
2933 | return X86EMUL_CONTINUE; | 2992 | return X86EMUL_CONTINUE; |
@@ -2938,8 +2997,8 @@ static int em_rdtsc(struct x86_emulate_ctxt *ctxt) | |||
2938 | u64 tsc = 0; | 2997 | u64 tsc = 0; |
2939 | 2998 | ||
2940 | ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc); | 2999 | ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc); |
2941 | ctxt->regs[VCPU_REGS_RAX] = (u32)tsc; | 3000 | *reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc; |
2942 | ctxt->regs[VCPU_REGS_RDX] = tsc >> 32; | 3001 | *reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32; |
2943 | return X86EMUL_CONTINUE; | 3002 | return X86EMUL_CONTINUE; |
2944 | } | 3003 | } |
2945 | 3004 | ||
@@ -2947,10 +3006,10 @@ static int em_rdpmc(struct x86_emulate_ctxt *ctxt) | |||
2947 | { | 3006 | { |
2948 | u64 pmc; | 3007 | u64 pmc; |
2949 | 3008 | ||
2950 | if (ctxt->ops->read_pmc(ctxt, ctxt->regs[VCPU_REGS_RCX], &pmc)) | 3009 | if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc)) |
2951 | return emulate_gp(ctxt, 0); | 3010 | return emulate_gp(ctxt, 0); |
2952 | ctxt->regs[VCPU_REGS_RAX] = (u32)pmc; | 3011 | *reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc; |
2953 | ctxt->regs[VCPU_REGS_RDX] = pmc >> 32; | 3012 | *reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32; |
2954 | return X86EMUL_CONTINUE; | 3013 | return X86EMUL_CONTINUE; |
2955 | } | 3014 | } |
2956 | 3015 | ||
@@ -2992,9 +3051,9 @@ static int em_wrmsr(struct x86_emulate_ctxt *ctxt) | |||
2992 | { | 3051 | { |
2993 | u64 msr_data; | 3052 | u64 msr_data; |
2994 | 3053 | ||
2995 | msr_data = (u32)ctxt->regs[VCPU_REGS_RAX] | 3054 | msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX) |
2996 | | ((u64)ctxt->regs[VCPU_REGS_RDX] << 32); | 3055 | | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32); |
2997 | if (ctxt->ops->set_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], msr_data)) | 3056 | if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data)) |
2998 | return emulate_gp(ctxt, 0); | 3057 | return emulate_gp(ctxt, 0); |
2999 | 3058 | ||
3000 | return X86EMUL_CONTINUE; | 3059 | return X86EMUL_CONTINUE; |
@@ -3004,11 +3063,11 @@ static int em_rdmsr(struct x86_emulate_ctxt *ctxt) | |||
3004 | { | 3063 | { |
3005 | u64 msr_data; | 3064 | u64 msr_data; |
3006 | 3065 | ||
3007 | if (ctxt->ops->get_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], &msr_data)) | 3066 | if (ctxt->ops->get_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &msr_data)) |
3008 | return emulate_gp(ctxt, 0); | 3067 | return emulate_gp(ctxt, 0); |
3009 | 3068 | ||
3010 | ctxt->regs[VCPU_REGS_RAX] = (u32)msr_data; | 3069 | *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data; |
3011 | ctxt->regs[VCPU_REGS_RDX] = msr_data >> 32; | 3070 | *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32; |
3012 | return X86EMUL_CONTINUE; | 3071 | return X86EMUL_CONTINUE; |
3013 | } | 3072 | } |
3014 | 3073 | ||
@@ -3188,8 +3247,8 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt) | |||
3188 | 3247 | ||
3189 | static int em_loop(struct x86_emulate_ctxt *ctxt) | 3248 | static int em_loop(struct x86_emulate_ctxt *ctxt) |
3190 | { | 3249 | { |
3191 | register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1); | 3250 | register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); |
3192 | if ((address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) != 0) && | 3251 | if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && |
3193 | (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) | 3252 | (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) |
3194 | jmp_rel(ctxt, ctxt->src.val); | 3253 | jmp_rel(ctxt, ctxt->src.val); |
3195 | 3254 | ||
@@ -3198,7 +3257,7 @@ static int em_loop(struct x86_emulate_ctxt *ctxt) | |||
3198 | 3257 | ||
3199 | static int em_jcxz(struct x86_emulate_ctxt *ctxt) | 3258 | static int em_jcxz(struct x86_emulate_ctxt *ctxt) |
3200 | { | 3259 | { |
3201 | if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0) | 3260 | if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) |
3202 | jmp_rel(ctxt, ctxt->src.val); | 3261 | jmp_rel(ctxt, ctxt->src.val); |
3203 | 3262 | ||
3204 | return X86EMUL_CONTINUE; | 3263 | return X86EMUL_CONTINUE; |
@@ -3286,20 +3345,20 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt) | |||
3286 | { | 3345 | { |
3287 | u32 eax, ebx, ecx, edx; | 3346 | u32 eax, ebx, ecx, edx; |
3288 | 3347 | ||
3289 | eax = ctxt->regs[VCPU_REGS_RAX]; | 3348 | eax = reg_read(ctxt, VCPU_REGS_RAX); |
3290 | ecx = ctxt->regs[VCPU_REGS_RCX]; | 3349 | ecx = reg_read(ctxt, VCPU_REGS_RCX); |
3291 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); | 3350 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); |
3292 | ctxt->regs[VCPU_REGS_RAX] = eax; | 3351 | *reg_write(ctxt, VCPU_REGS_RAX) = eax; |
3293 | ctxt->regs[VCPU_REGS_RBX] = ebx; | 3352 | *reg_write(ctxt, VCPU_REGS_RBX) = ebx; |
3294 | ctxt->regs[VCPU_REGS_RCX] = ecx; | 3353 | *reg_write(ctxt, VCPU_REGS_RCX) = ecx; |
3295 | ctxt->regs[VCPU_REGS_RDX] = edx; | 3354 | *reg_write(ctxt, VCPU_REGS_RDX) = edx; |
3296 | return X86EMUL_CONTINUE; | 3355 | return X86EMUL_CONTINUE; |
3297 | } | 3356 | } |
3298 | 3357 | ||
3299 | static int em_lahf(struct x86_emulate_ctxt *ctxt) | 3358 | static int em_lahf(struct x86_emulate_ctxt *ctxt) |
3300 | { | 3359 | { |
3301 | ctxt->regs[VCPU_REGS_RAX] &= ~0xff00UL; | 3360 | *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL; |
3302 | ctxt->regs[VCPU_REGS_RAX] |= (ctxt->eflags & 0xff) << 8; | 3361 | *reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8; |
3303 | return X86EMUL_CONTINUE; | 3362 | return X86EMUL_CONTINUE; |
3304 | } | 3363 | } |
3305 | 3364 | ||
@@ -3456,7 +3515,7 @@ static int check_svme(struct x86_emulate_ctxt *ctxt) | |||
3456 | 3515 | ||
3457 | static int check_svme_pa(struct x86_emulate_ctxt *ctxt) | 3516 | static int check_svme_pa(struct x86_emulate_ctxt *ctxt) |
3458 | { | 3517 | { |
3459 | u64 rax = ctxt->regs[VCPU_REGS_RAX]; | 3518 | u64 rax = reg_read(ctxt, VCPU_REGS_RAX); |
3460 | 3519 | ||
3461 | /* Valid physical address? */ | 3520 | /* Valid physical address? */ |
3462 | if (rax & 0xffff000000000000ULL) | 3521 | if (rax & 0xffff000000000000ULL) |
@@ -3478,7 +3537,7 @@ static int check_rdtsc(struct x86_emulate_ctxt *ctxt) | |||
3478 | static int check_rdpmc(struct x86_emulate_ctxt *ctxt) | 3537 | static int check_rdpmc(struct x86_emulate_ctxt *ctxt) |
3479 | { | 3538 | { |
3480 | u64 cr4 = ctxt->ops->get_cr(ctxt, 4); | 3539 | u64 cr4 = ctxt->ops->get_cr(ctxt, 4); |
3481 | u64 rcx = ctxt->regs[VCPU_REGS_RCX]; | 3540 | u64 rcx = reg_read(ctxt, VCPU_REGS_RCX); |
3482 | 3541 | ||
3483 | if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || | 3542 | if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || |
3484 | (rcx > 3)) | 3543 | (rcx > 3)) |
@@ -3531,13 +3590,13 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) | |||
3531 | I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ | 3590 | I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ |
3532 | I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) | 3591 | I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) |
3533 | 3592 | ||
3534 | static struct opcode group7_rm1[] = { | 3593 | static const struct opcode group7_rm1[] = { |
3535 | DI(SrcNone | Priv, monitor), | 3594 | DI(SrcNone | Priv, monitor), |
3536 | DI(SrcNone | Priv, mwait), | 3595 | DI(SrcNone | Priv, mwait), |
3537 | N, N, N, N, N, N, | 3596 | N, N, N, N, N, N, |
3538 | }; | 3597 | }; |
3539 | 3598 | ||
3540 | static struct opcode group7_rm3[] = { | 3599 | static const struct opcode group7_rm3[] = { |
3541 | DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa), | 3600 | DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa), |
3542 | II(SrcNone | Prot | VendorSpecific, em_vmmcall, vmmcall), | 3601 | II(SrcNone | Prot | VendorSpecific, em_vmmcall, vmmcall), |
3543 | DIP(SrcNone | Prot | Priv, vmload, check_svme_pa), | 3602 | DIP(SrcNone | Prot | Priv, vmload, check_svme_pa), |
@@ -3548,13 +3607,13 @@ static struct opcode group7_rm3[] = { | |||
3548 | DIP(SrcNone | Prot | Priv, invlpga, check_svme), | 3607 | DIP(SrcNone | Prot | Priv, invlpga, check_svme), |
3549 | }; | 3608 | }; |
3550 | 3609 | ||
3551 | static struct opcode group7_rm7[] = { | 3610 | static const struct opcode group7_rm7[] = { |
3552 | N, | 3611 | N, |
3553 | DIP(SrcNone, rdtscp, check_rdtsc), | 3612 | DIP(SrcNone, rdtscp, check_rdtsc), |
3554 | N, N, N, N, N, N, | 3613 | N, N, N, N, N, N, |
3555 | }; | 3614 | }; |
3556 | 3615 | ||
3557 | static struct opcode group1[] = { | 3616 | static const struct opcode group1[] = { |
3558 | I(Lock, em_add), | 3617 | I(Lock, em_add), |
3559 | I(Lock | PageTable, em_or), | 3618 | I(Lock | PageTable, em_or), |
3560 | I(Lock, em_adc), | 3619 | I(Lock, em_adc), |
@@ -3565,11 +3624,11 @@ static struct opcode group1[] = { | |||
3565 | I(0, em_cmp), | 3624 | I(0, em_cmp), |
3566 | }; | 3625 | }; |
3567 | 3626 | ||
3568 | static struct opcode group1A[] = { | 3627 | static const struct opcode group1A[] = { |
3569 | I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N, | 3628 | I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N, |
3570 | }; | 3629 | }; |
3571 | 3630 | ||
3572 | static struct opcode group3[] = { | 3631 | static const struct opcode group3[] = { |
3573 | I(DstMem | SrcImm, em_test), | 3632 | I(DstMem | SrcImm, em_test), |
3574 | I(DstMem | SrcImm, em_test), | 3633 | I(DstMem | SrcImm, em_test), |
3575 | I(DstMem | SrcNone | Lock, em_not), | 3634 | I(DstMem | SrcNone | Lock, em_not), |
@@ -3580,13 +3639,13 @@ static struct opcode group3[] = { | |||
3580 | I(SrcMem, em_idiv_ex), | 3639 | I(SrcMem, em_idiv_ex), |
3581 | }; | 3640 | }; |
3582 | 3641 | ||
3583 | static struct opcode group4[] = { | 3642 | static const struct opcode group4[] = { |
3584 | I(ByteOp | DstMem | SrcNone | Lock, em_grp45), | 3643 | I(ByteOp | DstMem | SrcNone | Lock, em_grp45), |
3585 | I(ByteOp | DstMem | SrcNone | Lock, em_grp45), | 3644 | I(ByteOp | DstMem | SrcNone | Lock, em_grp45), |
3586 | N, N, N, N, N, N, | 3645 | N, N, N, N, N, N, |
3587 | }; | 3646 | }; |
3588 | 3647 | ||
3589 | static struct opcode group5[] = { | 3648 | static const struct opcode group5[] = { |
3590 | I(DstMem | SrcNone | Lock, em_grp45), | 3649 | I(DstMem | SrcNone | Lock, em_grp45), |
3591 | I(DstMem | SrcNone | Lock, em_grp45), | 3650 | I(DstMem | SrcNone | Lock, em_grp45), |
3592 | I(SrcMem | Stack, em_grp45), | 3651 | I(SrcMem | Stack, em_grp45), |
@@ -3596,7 +3655,7 @@ static struct opcode group5[] = { | |||
3596 | I(SrcMem | Stack, em_grp45), N, | 3655 | I(SrcMem | Stack, em_grp45), N, |
3597 | }; | 3656 | }; |
3598 | 3657 | ||
3599 | static struct opcode group6[] = { | 3658 | static const struct opcode group6[] = { |
3600 | DI(Prot, sldt), | 3659 | DI(Prot, sldt), |
3601 | DI(Prot, str), | 3660 | DI(Prot, str), |
3602 | II(Prot | Priv | SrcMem16, em_lldt, lldt), | 3661 | II(Prot | Priv | SrcMem16, em_lldt, lldt), |
@@ -3604,7 +3663,7 @@ static struct opcode group6[] = { | |||
3604 | N, N, N, N, | 3663 | N, N, N, N, |
3605 | }; | 3664 | }; |
3606 | 3665 | ||
3607 | static struct group_dual group7 = { { | 3666 | static const struct group_dual group7 = { { |
3608 | II(Mov | DstMem | Priv, em_sgdt, sgdt), | 3667 | II(Mov | DstMem | Priv, em_sgdt, sgdt), |
3609 | II(Mov | DstMem | Priv, em_sidt, sidt), | 3668 | II(Mov | DstMem | Priv, em_sidt, sidt), |
3610 | II(SrcMem | Priv, em_lgdt, lgdt), | 3669 | II(SrcMem | Priv, em_lgdt, lgdt), |
@@ -3621,7 +3680,7 @@ static struct group_dual group7 = { { | |||
3621 | EXT(0, group7_rm7), | 3680 | EXT(0, group7_rm7), |
3622 | } }; | 3681 | } }; |
3623 | 3682 | ||
3624 | static struct opcode group8[] = { | 3683 | static const struct opcode group8[] = { |
3625 | N, N, N, N, | 3684 | N, N, N, N, |
3626 | I(DstMem | SrcImmByte, em_bt), | 3685 | I(DstMem | SrcImmByte, em_bt), |
3627 | I(DstMem | SrcImmByte | Lock | PageTable, em_bts), | 3686 | I(DstMem | SrcImmByte | Lock | PageTable, em_bts), |
@@ -3629,26 +3688,26 @@ static struct opcode group8[] = { | |||
3629 | I(DstMem | SrcImmByte | Lock | PageTable, em_btc), | 3688 | I(DstMem | SrcImmByte | Lock | PageTable, em_btc), |
3630 | }; | 3689 | }; |
3631 | 3690 | ||
3632 | static struct group_dual group9 = { { | 3691 | static const struct group_dual group9 = { { |
3633 | N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N, | 3692 | N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N, |
3634 | }, { | 3693 | }, { |
3635 | N, N, N, N, N, N, N, N, | 3694 | N, N, N, N, N, N, N, N, |
3636 | } }; | 3695 | } }; |
3637 | 3696 | ||
3638 | static struct opcode group11[] = { | 3697 | static const struct opcode group11[] = { |
3639 | I(DstMem | SrcImm | Mov | PageTable, em_mov), | 3698 | I(DstMem | SrcImm | Mov | PageTable, em_mov), |
3640 | X7(D(Undefined)), | 3699 | X7(D(Undefined)), |
3641 | }; | 3700 | }; |
3642 | 3701 | ||
3643 | static struct gprefix pfx_0f_6f_0f_7f = { | 3702 | static const struct gprefix pfx_0f_6f_0f_7f = { |
3644 | I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), | 3703 | I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), |
3645 | }; | 3704 | }; |
3646 | 3705 | ||
3647 | static struct gprefix pfx_vmovntpx = { | 3706 | static const struct gprefix pfx_vmovntpx = { |
3648 | I(0, em_mov), N, N, N, | 3707 | I(0, em_mov), N, N, N, |
3649 | }; | 3708 | }; |
3650 | 3709 | ||
3651 | static struct opcode opcode_table[256] = { | 3710 | static const struct opcode opcode_table[256] = { |
3652 | /* 0x00 - 0x07 */ | 3711 | /* 0x00 - 0x07 */ |
3653 | I6ALU(Lock, em_add), | 3712 | I6ALU(Lock, em_add), |
3654 | I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), | 3713 | I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), |
@@ -3689,7 +3748,7 @@ static struct opcode opcode_table[256] = { | |||
3689 | I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), | 3748 | I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), |
3690 | I(SrcImmByte | Mov | Stack, em_push), | 3749 | I(SrcImmByte | Mov | Stack, em_push), |
3691 | I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), | 3750 | I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), |
3692 | I2bvIP(DstDI | SrcDX | Mov | String, em_in, ins, check_perm_in), /* insb, insw/insd */ | 3751 | I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */ |
3693 | I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */ | 3752 | I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */ |
3694 | /* 0x70 - 0x7F */ | 3753 | /* 0x70 - 0x7F */ |
3695 | X16(D(SrcImmByte)), | 3754 | X16(D(SrcImmByte)), |
@@ -3765,7 +3824,7 @@ static struct opcode opcode_table[256] = { | |||
3765 | D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5), | 3824 | D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5), |
3766 | }; | 3825 | }; |
3767 | 3826 | ||
3768 | static struct opcode twobyte_table[256] = { | 3827 | static const struct opcode twobyte_table[256] = { |
3769 | /* 0x00 - 0x0F */ | 3828 | /* 0x00 - 0x0F */ |
3770 | G(0, group6), GD(0, &group7), N, N, | 3829 | G(0, group6), GD(0, &group7), N, N, |
3771 | N, I(ImplicitOps | VendorSpecific, em_syscall), | 3830 | N, I(ImplicitOps | VendorSpecific, em_syscall), |
@@ -3936,7 +3995,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
3936 | case OpAcc: | 3995 | case OpAcc: |
3937 | op->type = OP_REG; | 3996 | op->type = OP_REG; |
3938 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | 3997 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; |
3939 | op->addr.reg = &ctxt->regs[VCPU_REGS_RAX]; | 3998 | op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); |
3940 | fetch_register_operand(op); | 3999 | fetch_register_operand(op); |
3941 | op->orig_val = op->val; | 4000 | op->orig_val = op->val; |
3942 | break; | 4001 | break; |
@@ -3944,19 +4003,20 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
3944 | op->type = OP_MEM; | 4003 | op->type = OP_MEM; |
3945 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | 4004 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; |
3946 | op->addr.mem.ea = | 4005 | op->addr.mem.ea = |
3947 | register_address(ctxt, ctxt->regs[VCPU_REGS_RDI]); | 4006 | register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI)); |
3948 | op->addr.mem.seg = VCPU_SREG_ES; | 4007 | op->addr.mem.seg = VCPU_SREG_ES; |
3949 | op->val = 0; | 4008 | op->val = 0; |
4009 | op->count = 1; | ||
3950 | break; | 4010 | break; |
3951 | case OpDX: | 4011 | case OpDX: |
3952 | op->type = OP_REG; | 4012 | op->type = OP_REG; |
3953 | op->bytes = 2; | 4013 | op->bytes = 2; |
3954 | op->addr.reg = &ctxt->regs[VCPU_REGS_RDX]; | 4014 | op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX); |
3955 | fetch_register_operand(op); | 4015 | fetch_register_operand(op); |
3956 | break; | 4016 | break; |
3957 | case OpCL: | 4017 | case OpCL: |
3958 | op->bytes = 1; | 4018 | op->bytes = 1; |
3959 | op->val = ctxt->regs[VCPU_REGS_RCX] & 0xff; | 4019 | op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff; |
3960 | break; | 4020 | break; |
3961 | case OpImmByte: | 4021 | case OpImmByte: |
3962 | rc = decode_imm(ctxt, op, 1, true); | 4022 | rc = decode_imm(ctxt, op, 1, true); |
@@ -3987,9 +4047,10 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
3987 | op->type = OP_MEM; | 4047 | op->type = OP_MEM; |
3988 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | 4048 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; |
3989 | op->addr.mem.ea = | 4049 | op->addr.mem.ea = |
3990 | register_address(ctxt, ctxt->regs[VCPU_REGS_RSI]); | 4050 | register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI)); |
3991 | op->addr.mem.seg = seg_override(ctxt); | 4051 | op->addr.mem.seg = seg_override(ctxt); |
3992 | op->val = 0; | 4052 | op->val = 0; |
4053 | op->count = 1; | ||
3993 | break; | 4054 | break; |
3994 | case OpImmFAddr: | 4055 | case OpImmFAddr: |
3995 | op->type = OP_IMM; | 4056 | op->type = OP_IMM; |
@@ -4293,9 +4354,10 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, | |||
4293 | read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); | 4354 | read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); |
4294 | } | 4355 | } |
4295 | 4356 | ||
4357 | |||
4296 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | 4358 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) |
4297 | { | 4359 | { |
4298 | struct x86_emulate_ops *ops = ctxt->ops; | 4360 | const struct x86_emulate_ops *ops = ctxt->ops; |
4299 | int rc = X86EMUL_CONTINUE; | 4361 | int rc = X86EMUL_CONTINUE; |
4300 | int saved_dst_type = ctxt->dst.type; | 4362 | int saved_dst_type = ctxt->dst.type; |
4301 | 4363 | ||
@@ -4356,7 +4418,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
4356 | } | 4418 | } |
4357 | 4419 | ||
4358 | /* Instruction can only be executed in protected mode */ | 4420 | /* Instruction can only be executed in protected mode */ |
4359 | if ((ctxt->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) { | 4421 | if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) { |
4360 | rc = emulate_ud(ctxt); | 4422 | rc = emulate_ud(ctxt); |
4361 | goto done; | 4423 | goto done; |
4362 | } | 4424 | } |
@@ -4377,7 +4439,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
4377 | 4439 | ||
4378 | if (ctxt->rep_prefix && (ctxt->d & String)) { | 4440 | if (ctxt->rep_prefix && (ctxt->d & String)) { |
4379 | /* All REP prefixes have the same first termination condition */ | 4441 | /* All REP prefixes have the same first termination condition */ |
4380 | if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0) { | 4442 | if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { |
4381 | ctxt->eip = ctxt->_eip; | 4443 | ctxt->eip = ctxt->_eip; |
4382 | goto done; | 4444 | goto done; |
4383 | } | 4445 | } |
@@ -4450,7 +4512,7 @@ special_insn: | |||
4450 | ctxt->dst.val = ctxt->src.addr.mem.ea; | 4512 | ctxt->dst.val = ctxt->src.addr.mem.ea; |
4451 | break; | 4513 | break; |
4452 | case 0x90 ... 0x97: /* nop / xchg reg, rax */ | 4514 | case 0x90 ... 0x97: /* nop / xchg reg, rax */ |
4453 | if (ctxt->dst.addr.reg == &ctxt->regs[VCPU_REGS_RAX]) | 4515 | if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX)) |
4454 | break; | 4516 | break; |
4455 | rc = em_xchg(ctxt); | 4517 | rc = em_xchg(ctxt); |
4456 | break; | 4518 | break; |
@@ -4478,7 +4540,7 @@ special_insn: | |||
4478 | rc = em_grp2(ctxt); | 4540 | rc = em_grp2(ctxt); |
4479 | break; | 4541 | break; |
4480 | case 0xd2 ... 0xd3: /* Grp2 */ | 4542 | case 0xd2 ... 0xd3: /* Grp2 */ |
4481 | ctxt->src.val = ctxt->regs[VCPU_REGS_RCX]; | 4543 | ctxt->src.val = reg_read(ctxt, VCPU_REGS_RCX); |
4482 | rc = em_grp2(ctxt); | 4544 | rc = em_grp2(ctxt); |
4483 | break; | 4545 | break; |
4484 | case 0xe9: /* jmp rel */ | 4546 | case 0xe9: /* jmp rel */ |
@@ -4524,23 +4586,27 @@ writeback: | |||
4524 | ctxt->dst.type = saved_dst_type; | 4586 | ctxt->dst.type = saved_dst_type; |
4525 | 4587 | ||
4526 | if ((ctxt->d & SrcMask) == SrcSI) | 4588 | if ((ctxt->d & SrcMask) == SrcSI) |
4527 | string_addr_inc(ctxt, seg_override(ctxt), | 4589 | string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src); |
4528 | VCPU_REGS_RSI, &ctxt->src); | ||
4529 | 4590 | ||
4530 | if ((ctxt->d & DstMask) == DstDI) | 4591 | if ((ctxt->d & DstMask) == DstDI) |
4531 | string_addr_inc(ctxt, VCPU_SREG_ES, VCPU_REGS_RDI, | 4592 | string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst); |
4532 | &ctxt->dst); | ||
4533 | 4593 | ||
4534 | if (ctxt->rep_prefix && (ctxt->d & String)) { | 4594 | if (ctxt->rep_prefix && (ctxt->d & String)) { |
4595 | unsigned int count; | ||
4535 | struct read_cache *r = &ctxt->io_read; | 4596 | struct read_cache *r = &ctxt->io_read; |
4536 | register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1); | 4597 | if ((ctxt->d & SrcMask) == SrcSI) |
4598 | count = ctxt->src.count; | ||
4599 | else | ||
4600 | count = ctxt->dst.count; | ||
4601 | register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), | ||
4602 | -count); | ||
4537 | 4603 | ||
4538 | if (!string_insn_completed(ctxt)) { | 4604 | if (!string_insn_completed(ctxt)) { |
4539 | /* | 4605 | /* |
4540 | * Re-enter guest when pio read ahead buffer is empty | 4606 | * Re-enter guest when pio read ahead buffer is empty |
4541 | * or, if it is not used, after each 1024 iteration. | 4607 | * or, if it is not used, after each 1024 iteration. |
4542 | */ | 4608 | */ |
4543 | if ((r->end != 0 || ctxt->regs[VCPU_REGS_RCX] & 0x3ff) && | 4609 | if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) && |
4544 | (r->end == 0 || r->end != r->pos)) { | 4610 | (r->end == 0 || r->end != r->pos)) { |
4545 | /* | 4611 | /* |
4546 | * Reset read cache. Usually happens before | 4612 | * Reset read cache. Usually happens before |
@@ -4548,6 +4614,7 @@ writeback: | |||
4548 | * we have to do it here. | 4614 | * we have to do it here. |
4549 | */ | 4615 | */ |
4550 | ctxt->mem_read.end = 0; | 4616 | ctxt->mem_read.end = 0; |
4617 | writeback_registers(ctxt); | ||
4551 | return EMULATION_RESTART; | 4618 | return EMULATION_RESTART; |
4552 | } | 4619 | } |
4553 | goto done; /* skip rip writeback */ | 4620 | goto done; /* skip rip writeback */ |
@@ -4562,6 +4629,9 @@ done: | |||
4562 | if (rc == X86EMUL_INTERCEPTED) | 4629 | if (rc == X86EMUL_INTERCEPTED) |
4563 | return EMULATION_INTERCEPTED; | 4630 | return EMULATION_INTERCEPTED; |
4564 | 4631 | ||
4632 | if (rc == X86EMUL_CONTINUE) | ||
4633 | writeback_registers(ctxt); | ||
4634 | |||
4565 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; | 4635 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; |
4566 | 4636 | ||
4567 | twobyte_insn: | 4637 | twobyte_insn: |
@@ -4634,3 +4704,13 @@ twobyte_insn: | |||
4634 | cannot_emulate: | 4704 | cannot_emulate: |
4635 | return EMULATION_FAILED; | 4705 | return EMULATION_FAILED; |
4636 | } | 4706 | } |
4707 | |||
4708 | void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt) | ||
4709 | { | ||
4710 | invalidate_registers(ctxt); | ||
4711 | } | ||
4712 | |||
4713 | void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt) | ||
4714 | { | ||
4715 | writeback_registers(ctxt); | ||
4716 | } | ||
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index adba28f88d1a..11300d2fa714 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -108,7 +108,7 @@ static s64 __kpit_elapsed(struct kvm *kvm) | |||
108 | ktime_t remaining; | 108 | ktime_t remaining; |
109 | struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; | 109 | struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; |
110 | 110 | ||
111 | if (!ps->pit_timer.period) | 111 | if (!ps->period) |
112 | return 0; | 112 | return 0; |
113 | 113 | ||
114 | /* | 114 | /* |
@@ -120,9 +120,9 @@ static s64 __kpit_elapsed(struct kvm *kvm) | |||
120 | * itself with the initial count and continues counting | 120 | * itself with the initial count and continues counting |
121 | * from there. | 121 | * from there. |
122 | */ | 122 | */ |
123 | remaining = hrtimer_get_remaining(&ps->pit_timer.timer); | 123 | remaining = hrtimer_get_remaining(&ps->timer); |
124 | elapsed = ps->pit_timer.period - ktime_to_ns(remaining); | 124 | elapsed = ps->period - ktime_to_ns(remaining); |
125 | elapsed = mod_64(elapsed, ps->pit_timer.period); | 125 | elapsed = mod_64(elapsed, ps->period); |
126 | 126 | ||
127 | return elapsed; | 127 | return elapsed; |
128 | } | 128 | } |
@@ -238,12 +238,12 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) | |||
238 | int value; | 238 | int value; |
239 | 239 | ||
240 | spin_lock(&ps->inject_lock); | 240 | spin_lock(&ps->inject_lock); |
241 | value = atomic_dec_return(&ps->pit_timer.pending); | 241 | value = atomic_dec_return(&ps->pending); |
242 | if (value < 0) | 242 | if (value < 0) |
243 | /* spurious acks can be generated if, for example, the | 243 | /* spurious acks can be generated if, for example, the |
244 | * PIC is being reset. Handle it gracefully here | 244 | * PIC is being reset. Handle it gracefully here |
245 | */ | 245 | */ |
246 | atomic_inc(&ps->pit_timer.pending); | 246 | atomic_inc(&ps->pending); |
247 | else if (value > 0) | 247 | else if (value > 0) |
248 | /* in this case, we had multiple outstanding pit interrupts | 248 | /* in this case, we had multiple outstanding pit interrupts |
249 | * that we needed to inject. Reinject | 249 | * that we needed to inject. Reinject |
@@ -261,28 +261,17 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | |||
261 | if (!kvm_vcpu_is_bsp(vcpu) || !pit) | 261 | if (!kvm_vcpu_is_bsp(vcpu) || !pit) |
262 | return; | 262 | return; |
263 | 263 | ||
264 | timer = &pit->pit_state.pit_timer.timer; | 264 | timer = &pit->pit_state.timer; |
265 | if (hrtimer_cancel(timer)) | 265 | if (hrtimer_cancel(timer)) |
266 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | 266 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); |
267 | } | 267 | } |
268 | 268 | ||
269 | static void destroy_pit_timer(struct kvm_pit *pit) | 269 | static void destroy_pit_timer(struct kvm_pit *pit) |
270 | { | 270 | { |
271 | hrtimer_cancel(&pit->pit_state.pit_timer.timer); | 271 | hrtimer_cancel(&pit->pit_state.timer); |
272 | flush_kthread_work(&pit->expired); | 272 | flush_kthread_work(&pit->expired); |
273 | } | 273 | } |
274 | 274 | ||
275 | static bool kpit_is_periodic(struct kvm_timer *ktimer) | ||
276 | { | ||
277 | struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state, | ||
278 | pit_timer); | ||
279 | return ps->is_periodic; | ||
280 | } | ||
281 | |||
282 | static struct kvm_timer_ops kpit_ops = { | ||
283 | .is_periodic = kpit_is_periodic, | ||
284 | }; | ||
285 | |||
286 | static void pit_do_work(struct kthread_work *work) | 275 | static void pit_do_work(struct kthread_work *work) |
287 | { | 276 | { |
288 | struct kvm_pit *pit = container_of(work, struct kvm_pit, expired); | 277 | struct kvm_pit *pit = container_of(work, struct kvm_pit, expired); |
@@ -322,16 +311,16 @@ static void pit_do_work(struct kthread_work *work) | |||
322 | 311 | ||
323 | static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) | 312 | static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) |
324 | { | 313 | { |
325 | struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); | 314 | struct kvm_kpit_state *ps = container_of(data, struct kvm_kpit_state, timer); |
326 | struct kvm_pit *pt = ktimer->kvm->arch.vpit; | 315 | struct kvm_pit *pt = ps->kvm->arch.vpit; |
327 | 316 | ||
328 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { | 317 | if (ps->reinject || !atomic_read(&ps->pending)) { |
329 | atomic_inc(&ktimer->pending); | 318 | atomic_inc(&ps->pending); |
330 | queue_kthread_work(&pt->worker, &pt->expired); | 319 | queue_kthread_work(&pt->worker, &pt->expired); |
331 | } | 320 | } |
332 | 321 | ||
333 | if (ktimer->t_ops->is_periodic(ktimer)) { | 322 | if (ps->is_periodic) { |
334 | hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); | 323 | hrtimer_add_expires_ns(&ps->timer, ps->period); |
335 | return HRTIMER_RESTART; | 324 | return HRTIMER_RESTART; |
336 | } else | 325 | } else |
337 | return HRTIMER_NORESTART; | 326 | return HRTIMER_NORESTART; |
@@ -340,7 +329,6 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) | |||
340 | static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) | 329 | static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) |
341 | { | 330 | { |
342 | struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; | 331 | struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; |
343 | struct kvm_timer *pt = &ps->pit_timer; | ||
344 | s64 interval; | 332 | s64 interval; |
345 | 333 | ||
346 | if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY) | 334 | if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY) |
@@ -351,19 +339,18 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) | |||
351 | pr_debug("create pit timer, interval is %llu nsec\n", interval); | 339 | pr_debug("create pit timer, interval is %llu nsec\n", interval); |
352 | 340 | ||
353 | /* TODO The new value only affected after the retriggered */ | 341 | /* TODO The new value only affected after the retriggered */ |
354 | hrtimer_cancel(&pt->timer); | 342 | hrtimer_cancel(&ps->timer); |
355 | flush_kthread_work(&ps->pit->expired); | 343 | flush_kthread_work(&ps->pit->expired); |
356 | pt->period = interval; | 344 | ps->period = interval; |
357 | ps->is_periodic = is_period; | 345 | ps->is_periodic = is_period; |
358 | 346 | ||
359 | pt->timer.function = pit_timer_fn; | 347 | ps->timer.function = pit_timer_fn; |
360 | pt->t_ops = &kpit_ops; | 348 | ps->kvm = ps->pit->kvm; |
361 | pt->kvm = ps->pit->kvm; | ||
362 | 349 | ||
363 | atomic_set(&pt->pending, 0); | 350 | atomic_set(&ps->pending, 0); |
364 | ps->irq_ack = 1; | 351 | ps->irq_ack = 1; |
365 | 352 | ||
366 | hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval), | 353 | hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval), |
367 | HRTIMER_MODE_ABS); | 354 | HRTIMER_MODE_ABS); |
368 | } | 355 | } |
369 | 356 | ||
@@ -639,7 +626,7 @@ void kvm_pit_reset(struct kvm_pit *pit) | |||
639 | } | 626 | } |
640 | mutex_unlock(&pit->pit_state.lock); | 627 | mutex_unlock(&pit->pit_state.lock); |
641 | 628 | ||
642 | atomic_set(&pit->pit_state.pit_timer.pending, 0); | 629 | atomic_set(&pit->pit_state.pending, 0); |
643 | pit->pit_state.irq_ack = 1; | 630 | pit->pit_state.irq_ack = 1; |
644 | } | 631 | } |
645 | 632 | ||
@@ -648,7 +635,7 @@ static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask) | |||
648 | struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier); | 635 | struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier); |
649 | 636 | ||
650 | if (!mask) { | 637 | if (!mask) { |
651 | atomic_set(&pit->pit_state.pit_timer.pending, 0); | 638 | atomic_set(&pit->pit_state.pending, 0); |
652 | pit->pit_state.irq_ack = 1; | 639 | pit->pit_state.irq_ack = 1; |
653 | } | 640 | } |
654 | } | 641 | } |
@@ -706,12 +693,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
706 | 693 | ||
707 | pit_state = &pit->pit_state; | 694 | pit_state = &pit->pit_state; |
708 | pit_state->pit = pit; | 695 | pit_state->pit = pit; |
709 | hrtimer_init(&pit_state->pit_timer.timer, | 696 | hrtimer_init(&pit_state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
710 | CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
711 | pit_state->irq_ack_notifier.gsi = 0; | 697 | pit_state->irq_ack_notifier.gsi = 0; |
712 | pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq; | 698 | pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq; |
713 | kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); | 699 | kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); |
714 | pit_state->pit_timer.reinject = true; | 700 | pit_state->reinject = true; |
715 | mutex_unlock(&pit->pit_state.lock); | 701 | mutex_unlock(&pit->pit_state.lock); |
716 | 702 | ||
717 | kvm_pit_reset(pit); | 703 | kvm_pit_reset(pit); |
@@ -761,7 +747,7 @@ void kvm_free_pit(struct kvm *kvm) | |||
761 | kvm_unregister_irq_ack_notifier(kvm, | 747 | kvm_unregister_irq_ack_notifier(kvm, |
762 | &kvm->arch.vpit->pit_state.irq_ack_notifier); | 748 | &kvm->arch.vpit->pit_state.irq_ack_notifier); |
763 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | 749 | mutex_lock(&kvm->arch.vpit->pit_state.lock); |
764 | timer = &kvm->arch.vpit->pit_state.pit_timer.timer; | 750 | timer = &kvm->arch.vpit->pit_state.timer; |
765 | hrtimer_cancel(timer); | 751 | hrtimer_cancel(timer); |
766 | flush_kthread_work(&kvm->arch.vpit->expired); | 752 | flush_kthread_work(&kvm->arch.vpit->expired); |
767 | kthread_stop(kvm->arch.vpit->worker_task); | 753 | kthread_stop(kvm->arch.vpit->worker_task); |
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index fdf40425ea1d..dd1b16b611b0 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h | |||
@@ -24,8 +24,12 @@ struct kvm_kpit_channel_state { | |||
24 | struct kvm_kpit_state { | 24 | struct kvm_kpit_state { |
25 | struct kvm_kpit_channel_state channels[3]; | 25 | struct kvm_kpit_channel_state channels[3]; |
26 | u32 flags; | 26 | u32 flags; |
27 | struct kvm_timer pit_timer; | ||
28 | bool is_periodic; | 27 | bool is_periodic; |
28 | s64 period; /* unit: ns */ | ||
29 | struct hrtimer timer; | ||
30 | atomic_t pending; /* accumulated triggered timers */ | ||
31 | bool reinject; | ||
32 | struct kvm *kvm; | ||
29 | u32 speaker_data_on; | 33 | u32 speaker_data_on; |
30 | struct mutex lock; | 34 | struct mutex lock; |
31 | struct kvm_pit *pit; | 35 | struct kvm_pit *pit; |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 9fc9aa7ac703..848206df0967 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -190,17 +190,17 @@ void kvm_pic_update_irq(struct kvm_pic *s) | |||
190 | 190 | ||
191 | int kvm_pic_set_irq(struct kvm_pic *s, int irq, int irq_source_id, int level) | 191 | int kvm_pic_set_irq(struct kvm_pic *s, int irq, int irq_source_id, int level) |
192 | { | 192 | { |
193 | int ret = -1; | 193 | int ret, irq_level; |
194 | |||
195 | BUG_ON(irq < 0 || irq >= PIC_NUM_PINS); | ||
194 | 196 | ||
195 | pic_lock(s); | 197 | pic_lock(s); |
196 | if (irq >= 0 && irq < PIC_NUM_PINS) { | 198 | irq_level = __kvm_irq_line_state(&s->irq_states[irq], |
197 | int irq_level = __kvm_irq_line_state(&s->irq_states[irq], | 199 | irq_source_id, level); |
198 | irq_source_id, level); | 200 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, irq_level); |
199 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, irq_level); | 201 | pic_update_irq(s); |
200 | pic_update_irq(s); | 202 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, |
201 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, | 203 | s->pics[irq >> 3].imr, ret == 0); |
202 | s->pics[irq >> 3].imr, ret == 0); | ||
203 | } | ||
204 | pic_unlock(s); | 204 | pic_unlock(s); |
205 | 205 | ||
206 | return ret; | 206 | return ret; |
@@ -275,23 +275,20 @@ void kvm_pic_reset(struct kvm_kpic_state *s) | |||
275 | { | 275 | { |
276 | int irq, i; | 276 | int irq, i; |
277 | struct kvm_vcpu *vcpu; | 277 | struct kvm_vcpu *vcpu; |
278 | u8 irr = s->irr, isr = s->imr; | 278 | u8 edge_irr = s->irr & ~s->elcr; |
279 | bool found = false; | 279 | bool found = false; |
280 | 280 | ||
281 | s->last_irr = 0; | 281 | s->last_irr = 0; |
282 | s->irr = 0; | 282 | s->irr &= s->elcr; |
283 | s->imr = 0; | 283 | s->imr = 0; |
284 | s->isr = 0; | ||
285 | s->priority_add = 0; | 284 | s->priority_add = 0; |
286 | s->irq_base = 0; | ||
287 | s->read_reg_select = 0; | ||
288 | s->poll = 0; | ||
289 | s->special_mask = 0; | 285 | s->special_mask = 0; |
290 | s->init_state = 0; | 286 | s->read_reg_select = 0; |
291 | s->auto_eoi = 0; | 287 | if (!s->init4) { |
292 | s->rotate_on_auto_eoi = 0; | 288 | s->special_fully_nested_mode = 0; |
293 | s->special_fully_nested_mode = 0; | 289 | s->auto_eoi = 0; |
294 | s->init4 = 0; | 290 | } |
291 | s->init_state = 1; | ||
295 | 292 | ||
296 | kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm) | 293 | kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm) |
297 | if (kvm_apic_accept_pic_intr(vcpu)) { | 294 | if (kvm_apic_accept_pic_intr(vcpu)) { |
@@ -304,7 +301,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s) | |||
304 | return; | 301 | return; |
305 | 302 | ||
306 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) | 303 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) |
307 | if (irr & (1 << irq) || isr & (1 << irq)) | 304 | if (edge_irr & (1 << irq)) |
308 | pic_clear_isr(s, irq); | 305 | pic_clear_isr(s, irq); |
309 | } | 306 | } |
310 | 307 | ||
@@ -316,40 +313,13 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
316 | addr &= 1; | 313 | addr &= 1; |
317 | if (addr == 0) { | 314 | if (addr == 0) { |
318 | if (val & 0x10) { | 315 | if (val & 0x10) { |
319 | u8 edge_irr = s->irr & ~s->elcr; | ||
320 | int i; | ||
321 | bool found = false; | ||
322 | struct kvm_vcpu *vcpu; | ||
323 | |||
324 | s->init4 = val & 1; | 316 | s->init4 = val & 1; |
325 | s->last_irr = 0; | ||
326 | s->irr &= s->elcr; | ||
327 | s->imr = 0; | ||
328 | s->priority_add = 0; | ||
329 | s->special_mask = 0; | ||
330 | s->read_reg_select = 0; | ||
331 | if (!s->init4) { | ||
332 | s->special_fully_nested_mode = 0; | ||
333 | s->auto_eoi = 0; | ||
334 | } | ||
335 | s->init_state = 1; | ||
336 | if (val & 0x02) | 317 | if (val & 0x02) |
337 | pr_pic_unimpl("single mode not supported"); | 318 | pr_pic_unimpl("single mode not supported"); |
338 | if (val & 0x08) | 319 | if (val & 0x08) |
339 | pr_pic_unimpl( | 320 | pr_pic_unimpl( |
340 | "level sensitive irq not supported"); | 321 | "level sensitive irq not supported"); |
341 | 322 | kvm_pic_reset(s); | |
342 | kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm) | ||
343 | if (kvm_apic_accept_pic_intr(vcpu)) { | ||
344 | found = true; | ||
345 | break; | ||
346 | } | ||
347 | |||
348 | |||
349 | if (found) | ||
350 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) | ||
351 | if (edge_irr & (1 << irq)) | ||
352 | pic_clear_isr(s, irq); | ||
353 | } else if (val & 0x08) { | 323 | } else if (val & 0x08) { |
354 | if (val & 0x04) | 324 | if (val & 0x04) |
355 | s->poll = 1; | 325 | s->poll = 1; |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 2086f2bfba33..2d03568e9498 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -70,7 +70,7 @@ struct kvm_pic { | |||
70 | struct kvm_io_device dev_slave; | 70 | struct kvm_io_device dev_slave; |
71 | struct kvm_io_device dev_eclr; | 71 | struct kvm_io_device dev_eclr; |
72 | void (*ack_notifier)(void *opaque, int irq); | 72 | void (*ack_notifier)(void *opaque, int irq); |
73 | unsigned long irq_states[16]; | 73 | unsigned long irq_states[PIC_NUM_PINS]; |
74 | }; | 74 | }; |
75 | 75 | ||
76 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); | 76 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); |
diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h deleted file mode 100644 index 497dbaa366d4..000000000000 --- a/arch/x86/kvm/kvm_timer.h +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | |||
2 | struct kvm_timer { | ||
3 | struct hrtimer timer; | ||
4 | s64 period; /* unit: ns */ | ||
5 | u32 timer_mode_mask; | ||
6 | u64 tscdeadline; | ||
7 | atomic_t pending; /* accumulated triggered timers */ | ||
8 | bool reinject; | ||
9 | struct kvm_timer_ops *t_ops; | ||
10 | struct kvm *kvm; | ||
11 | struct kvm_vcpu *vcpu; | ||
12 | }; | ||
13 | |||
14 | struct kvm_timer_ops { | ||
15 | bool (*is_periodic)(struct kvm_timer *); | ||
16 | }; | ||
17 | |||
18 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); | ||
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index ce878788a39f..c6e6b721b6ee 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <asm/current.h> | 34 | #include <asm/current.h> |
35 | #include <asm/apicdef.h> | 35 | #include <asm/apicdef.h> |
36 | #include <linux/atomic.h> | 36 | #include <linux/atomic.h> |
37 | #include <linux/jump_label.h> | ||
37 | #include "kvm_cache_regs.h" | 38 | #include "kvm_cache_regs.h" |
38 | #include "irq.h" | 39 | #include "irq.h" |
39 | #include "trace.h" | 40 | #include "trace.h" |
@@ -65,6 +66,7 @@ | |||
65 | #define APIC_DEST_NOSHORT 0x0 | 66 | #define APIC_DEST_NOSHORT 0x0 |
66 | #define APIC_DEST_MASK 0x800 | 67 | #define APIC_DEST_MASK 0x800 |
67 | #define MAX_APIC_VECTOR 256 | 68 | #define MAX_APIC_VECTOR 256 |
69 | #define APIC_VECTORS_PER_REG 32 | ||
68 | 70 | ||
69 | #define VEC_POS(v) ((v) & (32 - 1)) | 71 | #define VEC_POS(v) ((v) & (32 - 1)) |
70 | #define REG_POS(v) (((v) >> 5) << 4) | 72 | #define REG_POS(v) (((v) >> 5) << 4) |
@@ -72,11 +74,6 @@ | |||
72 | static unsigned int min_timer_period_us = 500; | 74 | static unsigned int min_timer_period_us = 500; |
73 | module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); | 75 | module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); |
74 | 76 | ||
75 | static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off) | ||
76 | { | ||
77 | return *((u32 *) (apic->regs + reg_off)); | ||
78 | } | ||
79 | |||
80 | static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) | 77 | static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) |
81 | { | 78 | { |
82 | *((u32 *) (apic->regs + reg_off)) = val; | 79 | *((u32 *) (apic->regs + reg_off)) = val; |
@@ -117,19 +114,23 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap) | |||
117 | return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | 114 | return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); |
118 | } | 115 | } |
119 | 116 | ||
120 | static inline int apic_hw_enabled(struct kvm_lapic *apic) | 117 | struct static_key_deferred apic_hw_disabled __read_mostly; |
121 | { | 118 | struct static_key_deferred apic_sw_disabled __read_mostly; |
122 | return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE; | ||
123 | } | ||
124 | 119 | ||
125 | static inline int apic_sw_enabled(struct kvm_lapic *apic) | 120 | static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) |
126 | { | 121 | { |
127 | return apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED; | 122 | if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) { |
123 | if (val & APIC_SPIV_APIC_ENABLED) | ||
124 | static_key_slow_dec_deferred(&apic_sw_disabled); | ||
125 | else | ||
126 | static_key_slow_inc(&apic_sw_disabled.key); | ||
127 | } | ||
128 | apic_set_reg(apic, APIC_SPIV, val); | ||
128 | } | 129 | } |
129 | 130 | ||
130 | static inline int apic_enabled(struct kvm_lapic *apic) | 131 | static inline int apic_enabled(struct kvm_lapic *apic) |
131 | { | 132 | { |
132 | return apic_sw_enabled(apic) && apic_hw_enabled(apic); | 133 | return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic); |
133 | } | 134 | } |
134 | 135 | ||
135 | #define LVT_MASK \ | 136 | #define LVT_MASK \ |
@@ -139,36 +140,135 @@ static inline int apic_enabled(struct kvm_lapic *apic) | |||
139 | (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ | 140 | (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ |
140 | APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) | 141 | APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) |
141 | 142 | ||
143 | static inline int apic_x2apic_mode(struct kvm_lapic *apic) | ||
144 | { | ||
145 | return apic->vcpu->arch.apic_base & X2APIC_ENABLE; | ||
146 | } | ||
147 | |||
142 | static inline int kvm_apic_id(struct kvm_lapic *apic) | 148 | static inline int kvm_apic_id(struct kvm_lapic *apic) |
143 | { | 149 | { |
144 | return (apic_get_reg(apic, APIC_ID) >> 24) & 0xff; | 150 | return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; |
151 | } | ||
152 | |||
153 | static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) | ||
154 | { | ||
155 | u16 cid; | ||
156 | ldr >>= 32 - map->ldr_bits; | ||
157 | cid = (ldr >> map->cid_shift) & map->cid_mask; | ||
158 | |||
159 | BUG_ON(cid >= ARRAY_SIZE(map->logical_map)); | ||
160 | |||
161 | return cid; | ||
162 | } | ||
163 | |||
164 | static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) | ||
165 | { | ||
166 | ldr >>= (32 - map->ldr_bits); | ||
167 | return ldr & map->lid_mask; | ||
168 | } | ||
169 | |||
170 | static void recalculate_apic_map(struct kvm *kvm) | ||
171 | { | ||
172 | struct kvm_apic_map *new, *old = NULL; | ||
173 | struct kvm_vcpu *vcpu; | ||
174 | int i; | ||
175 | |||
176 | new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL); | ||
177 | |||
178 | mutex_lock(&kvm->arch.apic_map_lock); | ||
179 | |||
180 | if (!new) | ||
181 | goto out; | ||
182 | |||
183 | new->ldr_bits = 8; | ||
184 | /* flat mode is default */ | ||
185 | new->cid_shift = 8; | ||
186 | new->cid_mask = 0; | ||
187 | new->lid_mask = 0xff; | ||
188 | |||
189 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
190 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
191 | u16 cid, lid; | ||
192 | u32 ldr; | ||
193 | |||
194 | if (!kvm_apic_present(vcpu)) | ||
195 | continue; | ||
196 | |||
197 | /* | ||
198 | * All APICs have to be configured in the same mode by an OS. | ||
199 | * We take advatage of this while building logical id loockup | ||
200 | * table. After reset APICs are in xapic/flat mode, so if we | ||
201 | * find apic with different setting we assume this is the mode | ||
202 | * OS wants all apics to be in; build lookup table accordingly. | ||
203 | */ | ||
204 | if (apic_x2apic_mode(apic)) { | ||
205 | new->ldr_bits = 32; | ||
206 | new->cid_shift = 16; | ||
207 | new->cid_mask = new->lid_mask = 0xffff; | ||
208 | } else if (kvm_apic_sw_enabled(apic) && | ||
209 | !new->cid_mask /* flat mode */ && | ||
210 | kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) { | ||
211 | new->cid_shift = 4; | ||
212 | new->cid_mask = 0xf; | ||
213 | new->lid_mask = 0xf; | ||
214 | } | ||
215 | |||
216 | new->phys_map[kvm_apic_id(apic)] = apic; | ||
217 | |||
218 | ldr = kvm_apic_get_reg(apic, APIC_LDR); | ||
219 | cid = apic_cluster_id(new, ldr); | ||
220 | lid = apic_logical_id(new, ldr); | ||
221 | |||
222 | if (lid) | ||
223 | new->logical_map[cid][ffs(lid) - 1] = apic; | ||
224 | } | ||
225 | out: | ||
226 | old = rcu_dereference_protected(kvm->arch.apic_map, | ||
227 | lockdep_is_held(&kvm->arch.apic_map_lock)); | ||
228 | rcu_assign_pointer(kvm->arch.apic_map, new); | ||
229 | mutex_unlock(&kvm->arch.apic_map_lock); | ||
230 | |||
231 | if (old) | ||
232 | kfree_rcu(old, rcu); | ||
233 | } | ||
234 | |||
235 | static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) | ||
236 | { | ||
237 | apic_set_reg(apic, APIC_ID, id << 24); | ||
238 | recalculate_apic_map(apic->vcpu->kvm); | ||
239 | } | ||
240 | |||
241 | static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) | ||
242 | { | ||
243 | apic_set_reg(apic, APIC_LDR, id); | ||
244 | recalculate_apic_map(apic->vcpu->kvm); | ||
145 | } | 245 | } |
146 | 246 | ||
147 | static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) | 247 | static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) |
148 | { | 248 | { |
149 | return !(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); | 249 | return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); |
150 | } | 250 | } |
151 | 251 | ||
152 | static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) | 252 | static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) |
153 | { | 253 | { |
154 | return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; | 254 | return kvm_apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; |
155 | } | 255 | } |
156 | 256 | ||
157 | static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) | 257 | static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) |
158 | { | 258 | { |
159 | return ((apic_get_reg(apic, APIC_LVTT) & | 259 | return ((kvm_apic_get_reg(apic, APIC_LVTT) & |
160 | apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT); | 260 | apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT); |
161 | } | 261 | } |
162 | 262 | ||
163 | static inline int apic_lvtt_period(struct kvm_lapic *apic) | 263 | static inline int apic_lvtt_period(struct kvm_lapic *apic) |
164 | { | 264 | { |
165 | return ((apic_get_reg(apic, APIC_LVTT) & | 265 | return ((kvm_apic_get_reg(apic, APIC_LVTT) & |
166 | apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC); | 266 | apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC); |
167 | } | 267 | } |
168 | 268 | ||
169 | static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) | 269 | static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) |
170 | { | 270 | { |
171 | return ((apic_get_reg(apic, APIC_LVTT) & | 271 | return ((kvm_apic_get_reg(apic, APIC_LVTT) & |
172 | apic->lapic_timer.timer_mode_mask) == | 272 | apic->lapic_timer.timer_mode_mask) == |
173 | APIC_LVT_TIMER_TSCDEADLINE); | 273 | APIC_LVT_TIMER_TSCDEADLINE); |
174 | } | 274 | } |
@@ -184,7 +284,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) | |||
184 | struct kvm_cpuid_entry2 *feat; | 284 | struct kvm_cpuid_entry2 *feat; |
185 | u32 v = APIC_VERSION; | 285 | u32 v = APIC_VERSION; |
186 | 286 | ||
187 | if (!irqchip_in_kernel(vcpu->kvm)) | 287 | if (!kvm_vcpu_has_lapic(vcpu)) |
188 | return; | 288 | return; |
189 | 289 | ||
190 | feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); | 290 | feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); |
@@ -193,12 +293,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) | |||
193 | apic_set_reg(apic, APIC_LVR, v); | 293 | apic_set_reg(apic, APIC_LVR, v); |
194 | } | 294 | } |
195 | 295 | ||
196 | static inline int apic_x2apic_mode(struct kvm_lapic *apic) | 296 | static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = { |
197 | { | ||
198 | return apic->vcpu->arch.apic_base & X2APIC_ENABLE; | ||
199 | } | ||
200 | |||
201 | static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { | ||
202 | LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ | 297 | LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ |
203 | LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ | 298 | LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ |
204 | LVT_MASK | APIC_MODE_MASK, /* LVTPC */ | 299 | LVT_MASK | APIC_MODE_MASK, /* LVTPC */ |
@@ -208,25 +303,30 @@ static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { | |||
208 | 303 | ||
209 | static int find_highest_vector(void *bitmap) | 304 | static int find_highest_vector(void *bitmap) |
210 | { | 305 | { |
211 | u32 *word = bitmap; | 306 | int vec; |
212 | int word_offset = MAX_APIC_VECTOR >> 5; | 307 | u32 *reg; |
213 | 308 | ||
214 | while ((word_offset != 0) && (word[(--word_offset) << 2] == 0)) | 309 | for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; |
215 | continue; | 310 | vec >= 0; vec -= APIC_VECTORS_PER_REG) { |
311 | reg = bitmap + REG_POS(vec); | ||
312 | if (*reg) | ||
313 | return fls(*reg) - 1 + vec; | ||
314 | } | ||
216 | 315 | ||
217 | if (likely(!word_offset && !word[0])) | 316 | return -1; |
218 | return -1; | ||
219 | else | ||
220 | return fls(word[word_offset << 2]) - 1 + (word_offset << 5); | ||
221 | } | 317 | } |
222 | 318 | ||
223 | static u8 count_vectors(void *bitmap) | 319 | static u8 count_vectors(void *bitmap) |
224 | { | 320 | { |
225 | u32 *word = bitmap; | 321 | int vec; |
226 | int word_offset; | 322 | u32 *reg; |
227 | u8 count = 0; | 323 | u8 count = 0; |
228 | for (word_offset = 0; word_offset < MAX_APIC_VECTOR >> 5; ++word_offset) | 324 | |
229 | count += hweight32(word[word_offset << 2]); | 325 | for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) { |
326 | reg = bitmap + REG_POS(vec); | ||
327 | count += hweight32(*reg); | ||
328 | } | ||
329 | |||
230 | return count; | 330 | return count; |
231 | } | 331 | } |
232 | 332 | ||
@@ -285,7 +385,6 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) | |||
285 | 385 | ||
286 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) | 386 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) |
287 | { | 387 | { |
288 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
289 | int highest_irr; | 388 | int highest_irr; |
290 | 389 | ||
291 | /* This may race with setting of irr in __apic_accept_irq() and | 390 | /* This may race with setting of irr in __apic_accept_irq() and |
@@ -293,9 +392,9 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) | |||
293 | * will cause vmexit immediately and the value will be recalculated | 392 | * will cause vmexit immediately and the value will be recalculated |
294 | * on the next vmentry. | 393 | * on the next vmentry. |
295 | */ | 394 | */ |
296 | if (!apic) | 395 | if (!kvm_vcpu_has_lapic(vcpu)) |
297 | return 0; | 396 | return 0; |
298 | highest_irr = apic_find_highest_irr(apic); | 397 | highest_irr = apic_find_highest_irr(vcpu->arch.apic); |
299 | 398 | ||
300 | return highest_irr; | 399 | return highest_irr; |
301 | } | 400 | } |
@@ -378,8 +477,8 @@ static void apic_update_ppr(struct kvm_lapic *apic) | |||
378 | u32 tpr, isrv, ppr, old_ppr; | 477 | u32 tpr, isrv, ppr, old_ppr; |
379 | int isr; | 478 | int isr; |
380 | 479 | ||
381 | old_ppr = apic_get_reg(apic, APIC_PROCPRI); | 480 | old_ppr = kvm_apic_get_reg(apic, APIC_PROCPRI); |
382 | tpr = apic_get_reg(apic, APIC_TASKPRI); | 481 | tpr = kvm_apic_get_reg(apic, APIC_TASKPRI); |
383 | isr = apic_find_highest_isr(apic); | 482 | isr = apic_find_highest_isr(apic); |
384 | isrv = (isr != -1) ? isr : 0; | 483 | isrv = (isr != -1) ? isr : 0; |
385 | 484 | ||
@@ -415,13 +514,13 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) | |||
415 | u32 logical_id; | 514 | u32 logical_id; |
416 | 515 | ||
417 | if (apic_x2apic_mode(apic)) { | 516 | if (apic_x2apic_mode(apic)) { |
418 | logical_id = apic_get_reg(apic, APIC_LDR); | 517 | logical_id = kvm_apic_get_reg(apic, APIC_LDR); |
419 | return logical_id & mda; | 518 | return logical_id & mda; |
420 | } | 519 | } |
421 | 520 | ||
422 | logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR)); | 521 | logical_id = GET_APIC_LOGICAL_ID(kvm_apic_get_reg(apic, APIC_LDR)); |
423 | 522 | ||
424 | switch (apic_get_reg(apic, APIC_DFR)) { | 523 | switch (kvm_apic_get_reg(apic, APIC_DFR)) { |
425 | case APIC_DFR_FLAT: | 524 | case APIC_DFR_FLAT: |
426 | if (logical_id & mda) | 525 | if (logical_id & mda) |
427 | result = 1; | 526 | result = 1; |
@@ -433,7 +532,7 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) | |||
433 | break; | 532 | break; |
434 | default: | 533 | default: |
435 | apic_debug("Bad DFR vcpu %d: %08x\n", | 534 | apic_debug("Bad DFR vcpu %d: %08x\n", |
436 | apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR)); | 535 | apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR)); |
437 | break; | 536 | break; |
438 | } | 537 | } |
439 | 538 | ||
@@ -478,6 +577,72 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | |||
478 | return result; | 577 | return result; |
479 | } | 578 | } |
480 | 579 | ||
580 | bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | ||
581 | struct kvm_lapic_irq *irq, int *r) | ||
582 | { | ||
583 | struct kvm_apic_map *map; | ||
584 | unsigned long bitmap = 1; | ||
585 | struct kvm_lapic **dst; | ||
586 | int i; | ||
587 | bool ret = false; | ||
588 | |||
589 | *r = -1; | ||
590 | |||
591 | if (irq->shorthand == APIC_DEST_SELF) { | ||
592 | *r = kvm_apic_set_irq(src->vcpu, irq); | ||
593 | return true; | ||
594 | } | ||
595 | |||
596 | if (irq->shorthand) | ||
597 | return false; | ||
598 | |||
599 | rcu_read_lock(); | ||
600 | map = rcu_dereference(kvm->arch.apic_map); | ||
601 | |||
602 | if (!map) | ||
603 | goto out; | ||
604 | |||
605 | if (irq->dest_mode == 0) { /* physical mode */ | ||
606 | if (irq->delivery_mode == APIC_DM_LOWEST || | ||
607 | irq->dest_id == 0xff) | ||
608 | goto out; | ||
609 | dst = &map->phys_map[irq->dest_id & 0xff]; | ||
610 | } else { | ||
611 | u32 mda = irq->dest_id << (32 - map->ldr_bits); | ||
612 | |||
613 | dst = map->logical_map[apic_cluster_id(map, mda)]; | ||
614 | |||
615 | bitmap = apic_logical_id(map, mda); | ||
616 | |||
617 | if (irq->delivery_mode == APIC_DM_LOWEST) { | ||
618 | int l = -1; | ||
619 | for_each_set_bit(i, &bitmap, 16) { | ||
620 | if (!dst[i]) | ||
621 | continue; | ||
622 | if (l < 0) | ||
623 | l = i; | ||
624 | else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0) | ||
625 | l = i; | ||
626 | } | ||
627 | |||
628 | bitmap = (l >= 0) ? 1 << l : 0; | ||
629 | } | ||
630 | } | ||
631 | |||
632 | for_each_set_bit(i, &bitmap, 16) { | ||
633 | if (!dst[i]) | ||
634 | continue; | ||
635 | if (*r < 0) | ||
636 | *r = 0; | ||
637 | *r += kvm_apic_set_irq(dst[i]->vcpu, irq); | ||
638 | } | ||
639 | |||
640 | ret = true; | ||
641 | out: | ||
642 | rcu_read_unlock(); | ||
643 | return ret; | ||
644 | } | ||
645 | |||
481 | /* | 646 | /* |
482 | * Add a pending IRQ into lapic. | 647 | * Add a pending IRQ into lapic. |
483 | * Return 1 if successfully added and 0 if discarded. | 648 | * Return 1 if successfully added and 0 if discarded. |
@@ -591,7 +756,7 @@ static int apic_set_eoi(struct kvm_lapic *apic) | |||
591 | apic_clear_isr(vector, apic); | 756 | apic_clear_isr(vector, apic); |
592 | apic_update_ppr(apic); | 757 | apic_update_ppr(apic); |
593 | 758 | ||
594 | if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && | 759 | if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && |
595 | kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { | 760 | kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { |
596 | int trigger_mode; | 761 | int trigger_mode; |
597 | if (apic_test_vector(vector, apic->regs + APIC_TMR)) | 762 | if (apic_test_vector(vector, apic->regs + APIC_TMR)) |
@@ -606,8 +771,8 @@ static int apic_set_eoi(struct kvm_lapic *apic) | |||
606 | 771 | ||
607 | static void apic_send_ipi(struct kvm_lapic *apic) | 772 | static void apic_send_ipi(struct kvm_lapic *apic) |
608 | { | 773 | { |
609 | u32 icr_low = apic_get_reg(apic, APIC_ICR); | 774 | u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); |
610 | u32 icr_high = apic_get_reg(apic, APIC_ICR2); | 775 | u32 icr_high = kvm_apic_get_reg(apic, APIC_ICR2); |
611 | struct kvm_lapic_irq irq; | 776 | struct kvm_lapic_irq irq; |
612 | 777 | ||
613 | irq.vector = icr_low & APIC_VECTOR_MASK; | 778 | irq.vector = icr_low & APIC_VECTOR_MASK; |
@@ -642,7 +807,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic) | |||
642 | ASSERT(apic != NULL); | 807 | ASSERT(apic != NULL); |
643 | 808 | ||
644 | /* if initial count is 0, current count should also be 0 */ | 809 | /* if initial count is 0, current count should also be 0 */ |
645 | if (apic_get_reg(apic, APIC_TMICT) == 0) | 810 | if (kvm_apic_get_reg(apic, APIC_TMICT) == 0) |
646 | return 0; | 811 | return 0; |
647 | 812 | ||
648 | remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); | 813 | remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); |
@@ -696,13 +861,15 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) | |||
696 | 861 | ||
697 | val = apic_get_tmcct(apic); | 862 | val = apic_get_tmcct(apic); |
698 | break; | 863 | break; |
699 | 864 | case APIC_PROCPRI: | |
865 | apic_update_ppr(apic); | ||
866 | val = kvm_apic_get_reg(apic, offset); | ||
867 | break; | ||
700 | case APIC_TASKPRI: | 868 | case APIC_TASKPRI: |
701 | report_tpr_access(apic, false); | 869 | report_tpr_access(apic, false); |
702 | /* fall thru */ | 870 | /* fall thru */ |
703 | default: | 871 | default: |
704 | apic_update_ppr(apic); | 872 | val = kvm_apic_get_reg(apic, offset); |
705 | val = apic_get_reg(apic, offset); | ||
706 | break; | 873 | break; |
707 | } | 874 | } |
708 | 875 | ||
@@ -719,7 +886,7 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len, | |||
719 | { | 886 | { |
720 | unsigned char alignment = offset & 0xf; | 887 | unsigned char alignment = offset & 0xf; |
721 | u32 result; | 888 | u32 result; |
722 | /* this bitmask has a bit cleared for each reserver register */ | 889 | /* this bitmask has a bit cleared for each reserved register */ |
723 | static const u64 rmask = 0x43ff01ffffffe70cULL; | 890 | static const u64 rmask = 0x43ff01ffffffe70cULL; |
724 | 891 | ||
725 | if ((alignment + len) > 4) { | 892 | if ((alignment + len) > 4) { |
@@ -754,7 +921,7 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len, | |||
754 | 921 | ||
755 | static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) | 922 | static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) |
756 | { | 923 | { |
757 | return apic_hw_enabled(apic) && | 924 | return kvm_apic_hw_enabled(apic) && |
758 | addr >= apic->base_address && | 925 | addr >= apic->base_address && |
759 | addr < apic->base_address + LAPIC_MMIO_LENGTH; | 926 | addr < apic->base_address + LAPIC_MMIO_LENGTH; |
760 | } | 927 | } |
@@ -777,7 +944,7 @@ static void update_divide_count(struct kvm_lapic *apic) | |||
777 | { | 944 | { |
778 | u32 tmp1, tmp2, tdcr; | 945 | u32 tmp1, tmp2, tdcr; |
779 | 946 | ||
780 | tdcr = apic_get_reg(apic, APIC_TDCR); | 947 | tdcr = kvm_apic_get_reg(apic, APIC_TDCR); |
781 | tmp1 = tdcr & 0xf; | 948 | tmp1 = tdcr & 0xf; |
782 | tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; | 949 | tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; |
783 | apic->divide_count = 0x1 << (tmp2 & 0x7); | 950 | apic->divide_count = 0x1 << (tmp2 & 0x7); |
@@ -792,9 +959,9 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
792 | atomic_set(&apic->lapic_timer.pending, 0); | 959 | atomic_set(&apic->lapic_timer.pending, 0); |
793 | 960 | ||
794 | if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { | 961 | if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { |
795 | /* lapic timer in oneshot or peroidic mode */ | 962 | /* lapic timer in oneshot or periodic mode */ |
796 | now = apic->lapic_timer.timer.base->get_time(); | 963 | now = apic->lapic_timer.timer.base->get_time(); |
797 | apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) | 964 | apic->lapic_timer.period = (u64)kvm_apic_get_reg(apic, APIC_TMICT) |
798 | * APIC_BUS_CYCLE_NS * apic->divide_count; | 965 | * APIC_BUS_CYCLE_NS * apic->divide_count; |
799 | 966 | ||
800 | if (!apic->lapic_timer.period) | 967 | if (!apic->lapic_timer.period) |
@@ -826,7 +993,7 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
826 | "timer initial count 0x%x, period %lldns, " | 993 | "timer initial count 0x%x, period %lldns, " |
827 | "expire @ 0x%016" PRIx64 ".\n", __func__, | 994 | "expire @ 0x%016" PRIx64 ".\n", __func__, |
828 | APIC_BUS_CYCLE_NS, ktime_to_ns(now), | 995 | APIC_BUS_CYCLE_NS, ktime_to_ns(now), |
829 | apic_get_reg(apic, APIC_TMICT), | 996 | kvm_apic_get_reg(apic, APIC_TMICT), |
830 | apic->lapic_timer.period, | 997 | apic->lapic_timer.period, |
831 | ktime_to_ns(ktime_add_ns(now, | 998 | ktime_to_ns(ktime_add_ns(now, |
832 | apic->lapic_timer.period))); | 999 | apic->lapic_timer.period))); |
@@ -858,7 +1025,7 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
858 | 1025 | ||
859 | static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) | 1026 | static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) |
860 | { | 1027 | { |
861 | int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0)); | 1028 | int nmi_wd_enabled = apic_lvt_nmi_mode(kvm_apic_get_reg(apic, APIC_LVT0)); |
862 | 1029 | ||
863 | if (apic_lvt_nmi_mode(lvt0_val)) { | 1030 | if (apic_lvt_nmi_mode(lvt0_val)) { |
864 | if (!nmi_wd_enabled) { | 1031 | if (!nmi_wd_enabled) { |
@@ -879,7 +1046,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) | |||
879 | switch (reg) { | 1046 | switch (reg) { |
880 | case APIC_ID: /* Local APIC ID */ | 1047 | case APIC_ID: /* Local APIC ID */ |
881 | if (!apic_x2apic_mode(apic)) | 1048 | if (!apic_x2apic_mode(apic)) |
882 | apic_set_reg(apic, APIC_ID, val); | 1049 | kvm_apic_set_id(apic, val >> 24); |
883 | else | 1050 | else |
884 | ret = 1; | 1051 | ret = 1; |
885 | break; | 1052 | break; |
@@ -895,29 +1062,30 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) | |||
895 | 1062 | ||
896 | case APIC_LDR: | 1063 | case APIC_LDR: |
897 | if (!apic_x2apic_mode(apic)) | 1064 | if (!apic_x2apic_mode(apic)) |
898 | apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK); | 1065 | kvm_apic_set_ldr(apic, val & APIC_LDR_MASK); |
899 | else | 1066 | else |
900 | ret = 1; | 1067 | ret = 1; |
901 | break; | 1068 | break; |
902 | 1069 | ||
903 | case APIC_DFR: | 1070 | case APIC_DFR: |
904 | if (!apic_x2apic_mode(apic)) | 1071 | if (!apic_x2apic_mode(apic)) { |
905 | apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); | 1072 | apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); |
906 | else | 1073 | recalculate_apic_map(apic->vcpu->kvm); |
1074 | } else | ||
907 | ret = 1; | 1075 | ret = 1; |
908 | break; | 1076 | break; |
909 | 1077 | ||
910 | case APIC_SPIV: { | 1078 | case APIC_SPIV: { |
911 | u32 mask = 0x3ff; | 1079 | u32 mask = 0x3ff; |
912 | if (apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) | 1080 | if (kvm_apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) |
913 | mask |= APIC_SPIV_DIRECTED_EOI; | 1081 | mask |= APIC_SPIV_DIRECTED_EOI; |
914 | apic_set_reg(apic, APIC_SPIV, val & mask); | 1082 | apic_set_spiv(apic, val & mask); |
915 | if (!(val & APIC_SPIV_APIC_ENABLED)) { | 1083 | if (!(val & APIC_SPIV_APIC_ENABLED)) { |
916 | int i; | 1084 | int i; |
917 | u32 lvt_val; | 1085 | u32 lvt_val; |
918 | 1086 | ||
919 | for (i = 0; i < APIC_LVT_NUM; i++) { | 1087 | for (i = 0; i < APIC_LVT_NUM; i++) { |
920 | lvt_val = apic_get_reg(apic, | 1088 | lvt_val = kvm_apic_get_reg(apic, |
921 | APIC_LVTT + 0x10 * i); | 1089 | APIC_LVTT + 0x10 * i); |
922 | apic_set_reg(apic, APIC_LVTT + 0x10 * i, | 1090 | apic_set_reg(apic, APIC_LVTT + 0x10 * i, |
923 | lvt_val | APIC_LVT_MASKED); | 1091 | lvt_val | APIC_LVT_MASKED); |
@@ -946,7 +1114,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) | |||
946 | case APIC_LVT1: | 1114 | case APIC_LVT1: |
947 | case APIC_LVTERR: | 1115 | case APIC_LVTERR: |
948 | /* TODO: Check vector */ | 1116 | /* TODO: Check vector */ |
949 | if (!apic_sw_enabled(apic)) | 1117 | if (!kvm_apic_sw_enabled(apic)) |
950 | val |= APIC_LVT_MASKED; | 1118 | val |= APIC_LVT_MASKED; |
951 | 1119 | ||
952 | val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; | 1120 | val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; |
@@ -955,12 +1123,12 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) | |||
955 | break; | 1123 | break; |
956 | 1124 | ||
957 | case APIC_LVTT: | 1125 | case APIC_LVTT: |
958 | if ((apic_get_reg(apic, APIC_LVTT) & | 1126 | if ((kvm_apic_get_reg(apic, APIC_LVTT) & |
959 | apic->lapic_timer.timer_mode_mask) != | 1127 | apic->lapic_timer.timer_mode_mask) != |
960 | (val & apic->lapic_timer.timer_mode_mask)) | 1128 | (val & apic->lapic_timer.timer_mode_mask)) |
961 | hrtimer_cancel(&apic->lapic_timer.timer); | 1129 | hrtimer_cancel(&apic->lapic_timer.timer); |
962 | 1130 | ||
963 | if (!apic_sw_enabled(apic)) | 1131 | if (!kvm_apic_sw_enabled(apic)) |
964 | val |= APIC_LVT_MASKED; | 1132 | val |= APIC_LVT_MASKED; |
965 | val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); | 1133 | val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); |
966 | apic_set_reg(apic, APIC_LVTT, val); | 1134 | apic_set_reg(apic, APIC_LVTT, val); |
@@ -1039,24 +1207,30 @@ static int apic_mmio_write(struct kvm_io_device *this, | |||
1039 | 1207 | ||
1040 | void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) | 1208 | void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) |
1041 | { | 1209 | { |
1042 | struct kvm_lapic *apic = vcpu->arch.apic; | 1210 | if (kvm_vcpu_has_lapic(vcpu)) |
1043 | |||
1044 | if (apic) | ||
1045 | apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); | 1211 | apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); |
1046 | } | 1212 | } |
1047 | EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); | 1213 | EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); |
1048 | 1214 | ||
1049 | void kvm_free_lapic(struct kvm_vcpu *vcpu) | 1215 | void kvm_free_lapic(struct kvm_vcpu *vcpu) |
1050 | { | 1216 | { |
1217 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
1218 | |||
1051 | if (!vcpu->arch.apic) | 1219 | if (!vcpu->arch.apic) |
1052 | return; | 1220 | return; |
1053 | 1221 | ||
1054 | hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer); | 1222 | hrtimer_cancel(&apic->lapic_timer.timer); |
1223 | |||
1224 | if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) | ||
1225 | static_key_slow_dec_deferred(&apic_hw_disabled); | ||
1226 | |||
1227 | if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED)) | ||
1228 | static_key_slow_dec_deferred(&apic_sw_disabled); | ||
1055 | 1229 | ||
1056 | if (vcpu->arch.apic->regs) | 1230 | if (apic->regs) |
1057 | free_page((unsigned long)vcpu->arch.apic->regs); | 1231 | free_page((unsigned long)apic->regs); |
1058 | 1232 | ||
1059 | kfree(vcpu->arch.apic); | 1233 | kfree(apic); |
1060 | } | 1234 | } |
1061 | 1235 | ||
1062 | /* | 1236 | /* |
@@ -1068,10 +1242,9 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) | |||
1068 | u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) | 1242 | u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) |
1069 | { | 1243 | { |
1070 | struct kvm_lapic *apic = vcpu->arch.apic; | 1244 | struct kvm_lapic *apic = vcpu->arch.apic; |
1071 | if (!apic) | ||
1072 | return 0; | ||
1073 | 1245 | ||
1074 | if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) | 1246 | if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || |
1247 | apic_lvtt_period(apic)) | ||
1075 | return 0; | 1248 | return 0; |
1076 | 1249 | ||
1077 | return apic->lapic_timer.tscdeadline; | 1250 | return apic->lapic_timer.tscdeadline; |
@@ -1080,10 +1253,9 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) | |||
1080 | void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) | 1253 | void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) |
1081 | { | 1254 | { |
1082 | struct kvm_lapic *apic = vcpu->arch.apic; | 1255 | struct kvm_lapic *apic = vcpu->arch.apic; |
1083 | if (!apic) | ||
1084 | return; | ||
1085 | 1256 | ||
1086 | if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) | 1257 | if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || |
1258 | apic_lvtt_period(apic)) | ||
1087 | return; | 1259 | return; |
1088 | 1260 | ||
1089 | hrtimer_cancel(&apic->lapic_timer.timer); | 1261 | hrtimer_cancel(&apic->lapic_timer.timer); |
@@ -1095,20 +1267,21 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) | |||
1095 | { | 1267 | { |
1096 | struct kvm_lapic *apic = vcpu->arch.apic; | 1268 | struct kvm_lapic *apic = vcpu->arch.apic; |
1097 | 1269 | ||
1098 | if (!apic) | 1270 | if (!kvm_vcpu_has_lapic(vcpu)) |
1099 | return; | 1271 | return; |
1272 | |||
1100 | apic_set_tpr(apic, ((cr8 & 0x0f) << 4) | 1273 | apic_set_tpr(apic, ((cr8 & 0x0f) << 4) |
1101 | | (apic_get_reg(apic, APIC_TASKPRI) & 4)); | 1274 | | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4)); |
1102 | } | 1275 | } |
1103 | 1276 | ||
1104 | u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) | 1277 | u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) |
1105 | { | 1278 | { |
1106 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
1107 | u64 tpr; | 1279 | u64 tpr; |
1108 | 1280 | ||
1109 | if (!apic) | 1281 | if (!kvm_vcpu_has_lapic(vcpu)) |
1110 | return 0; | 1282 | return 0; |
1111 | tpr = (u64) apic_get_reg(apic, APIC_TASKPRI); | 1283 | |
1284 | tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI); | ||
1112 | 1285 | ||
1113 | return (tpr & 0xf0) >> 4; | 1286 | return (tpr & 0xf0) >> 4; |
1114 | } | 1287 | } |
@@ -1123,6 +1296,15 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) | |||
1123 | return; | 1296 | return; |
1124 | } | 1297 | } |
1125 | 1298 | ||
1299 | /* update jump label if enable bit changes */ | ||
1300 | if ((vcpu->arch.apic_base ^ value) & MSR_IA32_APICBASE_ENABLE) { | ||
1301 | if (value & MSR_IA32_APICBASE_ENABLE) | ||
1302 | static_key_slow_dec_deferred(&apic_hw_disabled); | ||
1303 | else | ||
1304 | static_key_slow_inc(&apic_hw_disabled.key); | ||
1305 | recalculate_apic_map(vcpu->kvm); | ||
1306 | } | ||
1307 | |||
1126 | if (!kvm_vcpu_is_bsp(apic->vcpu)) | 1308 | if (!kvm_vcpu_is_bsp(apic->vcpu)) |
1127 | value &= ~MSR_IA32_APICBASE_BSP; | 1309 | value &= ~MSR_IA32_APICBASE_BSP; |
1128 | 1310 | ||
@@ -1130,7 +1312,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) | |||
1130 | if (apic_x2apic_mode(apic)) { | 1312 | if (apic_x2apic_mode(apic)) { |
1131 | u32 id = kvm_apic_id(apic); | 1313 | u32 id = kvm_apic_id(apic); |
1132 | u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf)); | 1314 | u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf)); |
1133 | apic_set_reg(apic, APIC_LDR, ldr); | 1315 | kvm_apic_set_ldr(apic, ldr); |
1134 | } | 1316 | } |
1135 | apic->base_address = apic->vcpu->arch.apic_base & | 1317 | apic->base_address = apic->vcpu->arch.apic_base & |
1136 | MSR_IA32_APICBASE_BASE; | 1318 | MSR_IA32_APICBASE_BASE; |
@@ -1155,7 +1337,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
1155 | /* Stop the timer in case it's a reset to an active apic */ | 1337 | /* Stop the timer in case it's a reset to an active apic */ |
1156 | hrtimer_cancel(&apic->lapic_timer.timer); | 1338 | hrtimer_cancel(&apic->lapic_timer.timer); |
1157 | 1339 | ||
1158 | apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24); | 1340 | kvm_apic_set_id(apic, vcpu->vcpu_id); |
1159 | kvm_apic_set_version(apic->vcpu); | 1341 | kvm_apic_set_version(apic->vcpu); |
1160 | 1342 | ||
1161 | for (i = 0; i < APIC_LVT_NUM; i++) | 1343 | for (i = 0; i < APIC_LVT_NUM; i++) |
@@ -1164,9 +1346,9 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
1164 | SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); | 1346 | SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); |
1165 | 1347 | ||
1166 | apic_set_reg(apic, APIC_DFR, 0xffffffffU); | 1348 | apic_set_reg(apic, APIC_DFR, 0xffffffffU); |
1167 | apic_set_reg(apic, APIC_SPIV, 0xff); | 1349 | apic_set_spiv(apic, 0xff); |
1168 | apic_set_reg(apic, APIC_TASKPRI, 0); | 1350 | apic_set_reg(apic, APIC_TASKPRI, 0); |
1169 | apic_set_reg(apic, APIC_LDR, 0); | 1351 | kvm_apic_set_ldr(apic, 0); |
1170 | apic_set_reg(apic, APIC_ESR, 0); | 1352 | apic_set_reg(apic, APIC_ESR, 0); |
1171 | apic_set_reg(apic, APIC_ICR, 0); | 1353 | apic_set_reg(apic, APIC_ICR, 0); |
1172 | apic_set_reg(apic, APIC_ICR2, 0); | 1354 | apic_set_reg(apic, APIC_ICR2, 0); |
@@ -1183,7 +1365,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
1183 | update_divide_count(apic); | 1365 | update_divide_count(apic); |
1184 | atomic_set(&apic->lapic_timer.pending, 0); | 1366 | atomic_set(&apic->lapic_timer.pending, 0); |
1185 | if (kvm_vcpu_is_bsp(vcpu)) | 1367 | if (kvm_vcpu_is_bsp(vcpu)) |
1186 | vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; | 1368 | kvm_lapic_set_base(vcpu, |
1369 | vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP); | ||
1187 | vcpu->arch.pv_eoi.msr_val = 0; | 1370 | vcpu->arch.pv_eoi.msr_val = 0; |
1188 | apic_update_ppr(apic); | 1371 | apic_update_ppr(apic); |
1189 | 1372 | ||
@@ -1196,45 +1379,34 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
1196 | vcpu->arch.apic_base, apic->base_address); | 1379 | vcpu->arch.apic_base, apic->base_address); |
1197 | } | 1380 | } |
1198 | 1381 | ||
1199 | bool kvm_apic_present(struct kvm_vcpu *vcpu) | ||
1200 | { | ||
1201 | return vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic); | ||
1202 | } | ||
1203 | |||
1204 | int kvm_lapic_enabled(struct kvm_vcpu *vcpu) | ||
1205 | { | ||
1206 | return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic); | ||
1207 | } | ||
1208 | |||
1209 | /* | 1382 | /* |
1210 | *---------------------------------------------------------------------- | 1383 | *---------------------------------------------------------------------- |
1211 | * timer interface | 1384 | * timer interface |
1212 | *---------------------------------------------------------------------- | 1385 | *---------------------------------------------------------------------- |
1213 | */ | 1386 | */ |
1214 | 1387 | ||
1215 | static bool lapic_is_periodic(struct kvm_timer *ktimer) | 1388 | static bool lapic_is_periodic(struct kvm_lapic *apic) |
1216 | { | 1389 | { |
1217 | struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, | ||
1218 | lapic_timer); | ||
1219 | return apic_lvtt_period(apic); | 1390 | return apic_lvtt_period(apic); |
1220 | } | 1391 | } |
1221 | 1392 | ||
1222 | int apic_has_pending_timer(struct kvm_vcpu *vcpu) | 1393 | int apic_has_pending_timer(struct kvm_vcpu *vcpu) |
1223 | { | 1394 | { |
1224 | struct kvm_lapic *lapic = vcpu->arch.apic; | 1395 | struct kvm_lapic *apic = vcpu->arch.apic; |
1225 | 1396 | ||
1226 | if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT)) | 1397 | if (kvm_vcpu_has_lapic(vcpu) && apic_enabled(apic) && |
1227 | return atomic_read(&lapic->lapic_timer.pending); | 1398 | apic_lvt_enabled(apic, APIC_LVTT)) |
1399 | return atomic_read(&apic->lapic_timer.pending); | ||
1228 | 1400 | ||
1229 | return 0; | 1401 | return 0; |
1230 | } | 1402 | } |
1231 | 1403 | ||
1232 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) | 1404 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) |
1233 | { | 1405 | { |
1234 | u32 reg = apic_get_reg(apic, lvt_type); | 1406 | u32 reg = kvm_apic_get_reg(apic, lvt_type); |
1235 | int vector, mode, trig_mode; | 1407 | int vector, mode, trig_mode; |
1236 | 1408 | ||
1237 | if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { | 1409 | if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { |
1238 | vector = reg & APIC_VECTOR_MASK; | 1410 | vector = reg & APIC_VECTOR_MASK; |
1239 | mode = reg & APIC_MODE_MASK; | 1411 | mode = reg & APIC_MODE_MASK; |
1240 | trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; | 1412 | trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; |
@@ -1251,15 +1423,40 @@ void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) | |||
1251 | kvm_apic_local_deliver(apic, APIC_LVT0); | 1423 | kvm_apic_local_deliver(apic, APIC_LVT0); |
1252 | } | 1424 | } |
1253 | 1425 | ||
1254 | static struct kvm_timer_ops lapic_timer_ops = { | ||
1255 | .is_periodic = lapic_is_periodic, | ||
1256 | }; | ||
1257 | |||
1258 | static const struct kvm_io_device_ops apic_mmio_ops = { | 1426 | static const struct kvm_io_device_ops apic_mmio_ops = { |
1259 | .read = apic_mmio_read, | 1427 | .read = apic_mmio_read, |
1260 | .write = apic_mmio_write, | 1428 | .write = apic_mmio_write, |
1261 | }; | 1429 | }; |
1262 | 1430 | ||
1431 | static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) | ||
1432 | { | ||
1433 | struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); | ||
1434 | struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); | ||
1435 | struct kvm_vcpu *vcpu = apic->vcpu; | ||
1436 | wait_queue_head_t *q = &vcpu->wq; | ||
1437 | |||
1438 | /* | ||
1439 | * There is a race window between reading and incrementing, but we do | ||
1440 | * not care about potentially losing timer events in the !reinject | ||
1441 | * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked | ||
1442 | * in vcpu_enter_guest. | ||
1443 | */ | ||
1444 | if (!atomic_read(&ktimer->pending)) { | ||
1445 | atomic_inc(&ktimer->pending); | ||
1446 | /* FIXME: this code should not know anything about vcpus */ | ||
1447 | kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); | ||
1448 | } | ||
1449 | |||
1450 | if (waitqueue_active(q)) | ||
1451 | wake_up_interruptible(q); | ||
1452 | |||
1453 | if (lapic_is_periodic(apic)) { | ||
1454 | hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); | ||
1455 | return HRTIMER_RESTART; | ||
1456 | } else | ||
1457 | return HRTIMER_NORESTART; | ||
1458 | } | ||
1459 | |||
1263 | int kvm_create_lapic(struct kvm_vcpu *vcpu) | 1460 | int kvm_create_lapic(struct kvm_vcpu *vcpu) |
1264 | { | 1461 | { |
1265 | struct kvm_lapic *apic; | 1462 | struct kvm_lapic *apic; |
@@ -1283,14 +1480,17 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) | |||
1283 | 1480 | ||
1284 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, | 1481 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, |
1285 | HRTIMER_MODE_ABS); | 1482 | HRTIMER_MODE_ABS); |
1286 | apic->lapic_timer.timer.function = kvm_timer_fn; | 1483 | apic->lapic_timer.timer.function = apic_timer_fn; |
1287 | apic->lapic_timer.t_ops = &lapic_timer_ops; | ||
1288 | apic->lapic_timer.kvm = vcpu->kvm; | ||
1289 | apic->lapic_timer.vcpu = vcpu; | ||
1290 | 1484 | ||
1291 | apic->base_address = APIC_DEFAULT_PHYS_BASE; | 1485 | /* |
1292 | vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE; | 1486 | * APIC is created enabled. This will prevent kvm_lapic_set_base from |
1487 | * thinking that APIC satet has changed. | ||
1488 | */ | ||
1489 | vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; | ||
1490 | kvm_lapic_set_base(vcpu, | ||
1491 | APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE); | ||
1293 | 1492 | ||
1493 | static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ | ||
1294 | kvm_lapic_reset(vcpu); | 1494 | kvm_lapic_reset(vcpu); |
1295 | kvm_iodevice_init(&apic->dev, &apic_mmio_ops); | 1495 | kvm_iodevice_init(&apic->dev, &apic_mmio_ops); |
1296 | 1496 | ||
@@ -1306,23 +1506,23 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) | |||
1306 | struct kvm_lapic *apic = vcpu->arch.apic; | 1506 | struct kvm_lapic *apic = vcpu->arch.apic; |
1307 | int highest_irr; | 1507 | int highest_irr; |
1308 | 1508 | ||
1309 | if (!apic || !apic_enabled(apic)) | 1509 | if (!kvm_vcpu_has_lapic(vcpu) || !apic_enabled(apic)) |
1310 | return -1; | 1510 | return -1; |
1311 | 1511 | ||
1312 | apic_update_ppr(apic); | 1512 | apic_update_ppr(apic); |
1313 | highest_irr = apic_find_highest_irr(apic); | 1513 | highest_irr = apic_find_highest_irr(apic); |
1314 | if ((highest_irr == -1) || | 1514 | if ((highest_irr == -1) || |
1315 | ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI))) | 1515 | ((highest_irr & 0xF0) <= kvm_apic_get_reg(apic, APIC_PROCPRI))) |
1316 | return -1; | 1516 | return -1; |
1317 | return highest_irr; | 1517 | return highest_irr; |
1318 | } | 1518 | } |
1319 | 1519 | ||
1320 | int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) | 1520 | int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) |
1321 | { | 1521 | { |
1322 | u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0); | 1522 | u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0); |
1323 | int r = 0; | 1523 | int r = 0; |
1324 | 1524 | ||
1325 | if (!apic_hw_enabled(vcpu->arch.apic)) | 1525 | if (!kvm_apic_hw_enabled(vcpu->arch.apic)) |
1326 | r = 1; | 1526 | r = 1; |
1327 | if ((lvt0 & APIC_LVT_MASKED) == 0 && | 1527 | if ((lvt0 & APIC_LVT_MASKED) == 0 && |
1328 | GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) | 1528 | GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) |
@@ -1334,7 +1534,10 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) | |||
1334 | { | 1534 | { |
1335 | struct kvm_lapic *apic = vcpu->arch.apic; | 1535 | struct kvm_lapic *apic = vcpu->arch.apic; |
1336 | 1536 | ||
1337 | if (apic && atomic_read(&apic->lapic_timer.pending) > 0) { | 1537 | if (!kvm_vcpu_has_lapic(vcpu)) |
1538 | return; | ||
1539 | |||
1540 | if (atomic_read(&apic->lapic_timer.pending) > 0) { | ||
1338 | if (kvm_apic_local_deliver(apic, APIC_LVTT)) | 1541 | if (kvm_apic_local_deliver(apic, APIC_LVTT)) |
1339 | atomic_dec(&apic->lapic_timer.pending); | 1542 | atomic_dec(&apic->lapic_timer.pending); |
1340 | } | 1543 | } |
@@ -1354,12 +1557,17 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) | |||
1354 | return vector; | 1557 | return vector; |
1355 | } | 1558 | } |
1356 | 1559 | ||
1357 | void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) | 1560 | void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, |
1561 | struct kvm_lapic_state *s) | ||
1358 | { | 1562 | { |
1359 | struct kvm_lapic *apic = vcpu->arch.apic; | 1563 | struct kvm_lapic *apic = vcpu->arch.apic; |
1360 | 1564 | ||
1361 | apic->base_address = vcpu->arch.apic_base & | 1565 | kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); |
1362 | MSR_IA32_APICBASE_BASE; | 1566 | /* set SPIV separately to get count of SW disabled APICs right */ |
1567 | apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); | ||
1568 | memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); | ||
1569 | /* call kvm_apic_set_id() to put apic into apic_map */ | ||
1570 | kvm_apic_set_id(apic, kvm_apic_id(apic)); | ||
1363 | kvm_apic_set_version(vcpu); | 1571 | kvm_apic_set_version(vcpu); |
1364 | 1572 | ||
1365 | apic_update_ppr(apic); | 1573 | apic_update_ppr(apic); |
@@ -1374,13 +1582,12 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) | |||
1374 | 1582 | ||
1375 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) | 1583 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) |
1376 | { | 1584 | { |
1377 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
1378 | struct hrtimer *timer; | 1585 | struct hrtimer *timer; |
1379 | 1586 | ||
1380 | if (!apic) | 1587 | if (!kvm_vcpu_has_lapic(vcpu)) |
1381 | return; | 1588 | return; |
1382 | 1589 | ||
1383 | timer = &apic->lapic_timer.timer; | 1590 | timer = &vcpu->arch.apic->lapic_timer.timer; |
1384 | if (hrtimer_cancel(timer)) | 1591 | if (hrtimer_cancel(timer)) |
1385 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | 1592 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); |
1386 | } | 1593 | } |
@@ -1478,7 +1685,7 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) | |||
1478 | if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) | 1685 | if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) |
1479 | return; | 1686 | return; |
1480 | 1687 | ||
1481 | tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff; | 1688 | tpr = kvm_apic_get_reg(apic, APIC_TASKPRI) & 0xff; |
1482 | max_irr = apic_find_highest_irr(apic); | 1689 | max_irr = apic_find_highest_irr(apic); |
1483 | if (max_irr < 0) | 1690 | if (max_irr < 0) |
1484 | max_irr = 0; | 1691 | max_irr = 0; |
@@ -1537,7 +1744,7 @@ int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) | |||
1537 | { | 1744 | { |
1538 | struct kvm_lapic *apic = vcpu->arch.apic; | 1745 | struct kvm_lapic *apic = vcpu->arch.apic; |
1539 | 1746 | ||
1540 | if (!irqchip_in_kernel(vcpu->kvm)) | 1747 | if (!kvm_vcpu_has_lapic(vcpu)) |
1541 | return 1; | 1748 | return 1; |
1542 | 1749 | ||
1543 | /* if this is ICR write vector before command */ | 1750 | /* if this is ICR write vector before command */ |
@@ -1551,7 +1758,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) | |||
1551 | struct kvm_lapic *apic = vcpu->arch.apic; | 1758 | struct kvm_lapic *apic = vcpu->arch.apic; |
1552 | u32 low, high = 0; | 1759 | u32 low, high = 0; |
1553 | 1760 | ||
1554 | if (!irqchip_in_kernel(vcpu->kvm)) | 1761 | if (!kvm_vcpu_has_lapic(vcpu)) |
1555 | return 1; | 1762 | return 1; |
1556 | 1763 | ||
1557 | if (apic_reg_read(apic, reg, 4, &low)) | 1764 | if (apic_reg_read(apic, reg, 4, &low)) |
@@ -1576,3 +1783,10 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) | |||
1576 | return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, | 1783 | return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, |
1577 | addr); | 1784 | addr); |
1578 | } | 1785 | } |
1786 | |||
1787 | void kvm_lapic_init(void) | ||
1788 | { | ||
1789 | /* do not patch jump label more than once per second */ | ||
1790 | jump_label_rate_limit(&apic_hw_disabled, HZ); | ||
1791 | jump_label_rate_limit(&apic_sw_disabled, HZ); | ||
1792 | } | ||
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 4af5405ae1e2..e5ebf9f3571f 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -2,10 +2,17 @@ | |||
2 | #define __KVM_X86_LAPIC_H | 2 | #define __KVM_X86_LAPIC_H |
3 | 3 | ||
4 | #include "iodev.h" | 4 | #include "iodev.h" |
5 | #include "kvm_timer.h" | ||
6 | 5 | ||
7 | #include <linux/kvm_host.h> | 6 | #include <linux/kvm_host.h> |
8 | 7 | ||
8 | struct kvm_timer { | ||
9 | struct hrtimer timer; | ||
10 | s64 period; /* unit: ns */ | ||
11 | u32 timer_mode_mask; | ||
12 | u64 tscdeadline; | ||
13 | atomic_t pending; /* accumulated triggered timers */ | ||
14 | }; | ||
15 | |||
9 | struct kvm_lapic { | 16 | struct kvm_lapic { |
10 | unsigned long base_address; | 17 | unsigned long base_address; |
11 | struct kvm_io_device dev; | 18 | struct kvm_io_device dev; |
@@ -45,11 +52,13 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); | |||
45 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); | 52 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); |
46 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); | 53 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); |
47 | 54 | ||
55 | bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | ||
56 | struct kvm_lapic_irq *irq, int *r); | ||
57 | |||
48 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); | 58 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); |
49 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); | 59 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); |
50 | void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); | 60 | void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, |
51 | int kvm_lapic_enabled(struct kvm_vcpu *vcpu); | 61 | struct kvm_lapic_state *s); |
52 | bool kvm_apic_present(struct kvm_vcpu *vcpu); | ||
53 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); | 62 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); |
54 | 63 | ||
55 | u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); | 64 | u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); |
@@ -71,4 +80,48 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) | |||
71 | } | 80 | } |
72 | 81 | ||
73 | int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); | 82 | int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); |
83 | void kvm_lapic_init(void); | ||
84 | |||
85 | static inline u32 kvm_apic_get_reg(struct kvm_lapic *apic, int reg_off) | ||
86 | { | ||
87 | return *((u32 *) (apic->regs + reg_off)); | ||
88 | } | ||
89 | |||
90 | extern struct static_key kvm_no_apic_vcpu; | ||
91 | |||
92 | static inline bool kvm_vcpu_has_lapic(struct kvm_vcpu *vcpu) | ||
93 | { | ||
94 | if (static_key_false(&kvm_no_apic_vcpu)) | ||
95 | return vcpu->arch.apic; | ||
96 | return true; | ||
97 | } | ||
98 | |||
99 | extern struct static_key_deferred apic_hw_disabled; | ||
100 | |||
101 | static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic) | ||
102 | { | ||
103 | if (static_key_false(&apic_hw_disabled.key)) | ||
104 | return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE; | ||
105 | return MSR_IA32_APICBASE_ENABLE; | ||
106 | } | ||
107 | |||
108 | extern struct static_key_deferred apic_sw_disabled; | ||
109 | |||
110 | static inline int kvm_apic_sw_enabled(struct kvm_lapic *apic) | ||
111 | { | ||
112 | if (static_key_false(&apic_sw_disabled.key)) | ||
113 | return kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED; | ||
114 | return APIC_SPIV_APIC_ENABLED; | ||
115 | } | ||
116 | |||
117 | static inline bool kvm_apic_present(struct kvm_vcpu *vcpu) | ||
118 | { | ||
119 | return kvm_vcpu_has_lapic(vcpu) && kvm_apic_hw_enabled(vcpu->arch.apic); | ||
120 | } | ||
121 | |||
122 | static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu) | ||
123 | { | ||
124 | return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic); | ||
125 | } | ||
126 | |||
74 | #endif | 127 | #endif |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7fbd0d273ea8..d289fee1ffb8 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -556,6 +556,14 @@ static int mmu_spte_clear_track_bits(u64 *sptep) | |||
556 | return 0; | 556 | return 0; |
557 | 557 | ||
558 | pfn = spte_to_pfn(old_spte); | 558 | pfn = spte_to_pfn(old_spte); |
559 | |||
560 | /* | ||
561 | * KVM does not hold the refcount of the page used by | ||
562 | * kvm mmu, before reclaiming the page, we should | ||
563 | * unmap it from mmu first. | ||
564 | */ | ||
565 | WARN_ON(!kvm_is_mmio_pfn(pfn) && !page_count(pfn_to_page(pfn))); | ||
566 | |||
559 | if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) | 567 | if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) |
560 | kvm_set_pfn_accessed(pfn); | 568 | kvm_set_pfn_accessed(pfn); |
561 | if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask)) | 569 | if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask)) |
@@ -960,13 +968,10 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) | |||
960 | static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, | 968 | static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, |
961 | struct kvm_memory_slot *slot) | 969 | struct kvm_memory_slot *slot) |
962 | { | 970 | { |
963 | struct kvm_lpage_info *linfo; | 971 | unsigned long idx; |
964 | |||
965 | if (likely(level == PT_PAGE_TABLE_LEVEL)) | ||
966 | return &slot->rmap[gfn - slot->base_gfn]; | ||
967 | 972 | ||
968 | linfo = lpage_info_slot(gfn, slot, level); | 973 | idx = gfn_to_index(gfn, slot->base_gfn, level); |
969 | return &linfo->rmap_pde; | 974 | return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx]; |
970 | } | 975 | } |
971 | 976 | ||
972 | /* | 977 | /* |
@@ -1173,7 +1178,8 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, | |||
1173 | unsigned long *rmapp; | 1178 | unsigned long *rmapp; |
1174 | 1179 | ||
1175 | while (mask) { | 1180 | while (mask) { |
1176 | rmapp = &slot->rmap[gfn_offset + __ffs(mask)]; | 1181 | rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), |
1182 | PT_PAGE_TABLE_LEVEL, slot); | ||
1177 | __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL, false); | 1183 | __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL, false); |
1178 | 1184 | ||
1179 | /* clear the first set bit */ | 1185 | /* clear the first set bit */ |
@@ -1200,7 +1206,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
1200 | } | 1206 | } |
1201 | 1207 | ||
1202 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1208 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1203 | unsigned long data) | 1209 | struct kvm_memory_slot *slot, unsigned long data) |
1204 | { | 1210 | { |
1205 | u64 *sptep; | 1211 | u64 *sptep; |
1206 | struct rmap_iterator iter; | 1212 | struct rmap_iterator iter; |
@@ -1218,7 +1224,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1218 | } | 1224 | } |
1219 | 1225 | ||
1220 | static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1226 | static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1221 | unsigned long data) | 1227 | struct kvm_memory_slot *slot, unsigned long data) |
1222 | { | 1228 | { |
1223 | u64 *sptep; | 1229 | u64 *sptep; |
1224 | struct rmap_iterator iter; | 1230 | struct rmap_iterator iter; |
@@ -1259,43 +1265,67 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1259 | return 0; | 1265 | return 0; |
1260 | } | 1266 | } |
1261 | 1267 | ||
1262 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | 1268 | static int kvm_handle_hva_range(struct kvm *kvm, |
1263 | unsigned long data, | 1269 | unsigned long start, |
1264 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | 1270 | unsigned long end, |
1265 | unsigned long data)) | 1271 | unsigned long data, |
1272 | int (*handler)(struct kvm *kvm, | ||
1273 | unsigned long *rmapp, | ||
1274 | struct kvm_memory_slot *slot, | ||
1275 | unsigned long data)) | ||
1266 | { | 1276 | { |
1267 | int j; | 1277 | int j; |
1268 | int ret; | 1278 | int ret = 0; |
1269 | int retval = 0; | ||
1270 | struct kvm_memslots *slots; | 1279 | struct kvm_memslots *slots; |
1271 | struct kvm_memory_slot *memslot; | 1280 | struct kvm_memory_slot *memslot; |
1272 | 1281 | ||
1273 | slots = kvm_memslots(kvm); | 1282 | slots = kvm_memslots(kvm); |
1274 | 1283 | ||
1275 | kvm_for_each_memslot(memslot, slots) { | 1284 | kvm_for_each_memslot(memslot, slots) { |
1276 | unsigned long start = memslot->userspace_addr; | 1285 | unsigned long hva_start, hva_end; |
1277 | unsigned long end; | 1286 | gfn_t gfn_start, gfn_end; |
1278 | 1287 | ||
1279 | end = start + (memslot->npages << PAGE_SHIFT); | 1288 | hva_start = max(start, memslot->userspace_addr); |
1280 | if (hva >= start && hva < end) { | 1289 | hva_end = min(end, memslot->userspace_addr + |
1281 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | 1290 | (memslot->npages << PAGE_SHIFT)); |
1282 | gfn_t gfn = memslot->base_gfn + gfn_offset; | 1291 | if (hva_start >= hva_end) |
1292 | continue; | ||
1293 | /* | ||
1294 | * {gfn(page) | page intersects with [hva_start, hva_end)} = | ||
1295 | * {gfn_start, gfn_start+1, ..., gfn_end-1}. | ||
1296 | */ | ||
1297 | gfn_start = hva_to_gfn_memslot(hva_start, memslot); | ||
1298 | gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); | ||
1283 | 1299 | ||
1284 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); | 1300 | for (j = PT_PAGE_TABLE_LEVEL; |
1301 | j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) { | ||
1302 | unsigned long idx, idx_end; | ||
1303 | unsigned long *rmapp; | ||
1285 | 1304 | ||
1286 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { | 1305 | /* |
1287 | struct kvm_lpage_info *linfo; | 1306 | * {idx(page_j) | page_j intersects with |
1307 | * [hva_start, hva_end)} = {idx, idx+1, ..., idx_end}. | ||
1308 | */ | ||
1309 | idx = gfn_to_index(gfn_start, memslot->base_gfn, j); | ||
1310 | idx_end = gfn_to_index(gfn_end - 1, memslot->base_gfn, j); | ||
1288 | 1311 | ||
1289 | linfo = lpage_info_slot(gfn, memslot, | 1312 | rmapp = __gfn_to_rmap(gfn_start, j, memslot); |
1290 | PT_DIRECTORY_LEVEL + j); | 1313 | |
1291 | ret |= handler(kvm, &linfo->rmap_pde, data); | 1314 | for (; idx <= idx_end; ++idx) |
1292 | } | 1315 | ret |= handler(kvm, rmapp++, memslot, data); |
1293 | trace_kvm_age_page(hva, memslot, ret); | ||
1294 | retval |= ret; | ||
1295 | } | 1316 | } |
1296 | } | 1317 | } |
1297 | 1318 | ||
1298 | return retval; | 1319 | return ret; |
1320 | } | ||
1321 | |||
1322 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | ||
1323 | unsigned long data, | ||
1324 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | ||
1325 | struct kvm_memory_slot *slot, | ||
1326 | unsigned long data)) | ||
1327 | { | ||
1328 | return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); | ||
1299 | } | 1329 | } |
1300 | 1330 | ||
1301 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | 1331 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) |
@@ -1303,13 +1333,18 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | |||
1303 | return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); | 1333 | return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); |
1304 | } | 1334 | } |
1305 | 1335 | ||
1336 | int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) | ||
1337 | { | ||
1338 | return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp); | ||
1339 | } | ||
1340 | |||
1306 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) | 1341 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) |
1307 | { | 1342 | { |
1308 | kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp); | 1343 | kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp); |
1309 | } | 1344 | } |
1310 | 1345 | ||
1311 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1346 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1312 | unsigned long data) | 1347 | struct kvm_memory_slot *slot, unsigned long data) |
1313 | { | 1348 | { |
1314 | u64 *sptep; | 1349 | u64 *sptep; |
1315 | struct rmap_iterator uninitialized_var(iter); | 1350 | struct rmap_iterator uninitialized_var(iter); |
@@ -1323,8 +1358,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1323 | * This has some overhead, but not as much as the cost of swapping | 1358 | * This has some overhead, but not as much as the cost of swapping |
1324 | * out actively used pages or breaking up actively used hugepages. | 1359 | * out actively used pages or breaking up actively used hugepages. |
1325 | */ | 1360 | */ |
1326 | if (!shadow_accessed_mask) | 1361 | if (!shadow_accessed_mask) { |
1327 | return kvm_unmap_rmapp(kvm, rmapp, data); | 1362 | young = kvm_unmap_rmapp(kvm, rmapp, slot, data); |
1363 | goto out; | ||
1364 | } | ||
1328 | 1365 | ||
1329 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; | 1366 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; |
1330 | sptep = rmap_get_next(&iter)) { | 1367 | sptep = rmap_get_next(&iter)) { |
@@ -1336,12 +1373,14 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1336 | (unsigned long *)sptep); | 1373 | (unsigned long *)sptep); |
1337 | } | 1374 | } |
1338 | } | 1375 | } |
1339 | 1376 | out: | |
1377 | /* @data has hva passed to kvm_age_hva(). */ | ||
1378 | trace_kvm_age_page(data, slot, young); | ||
1340 | return young; | 1379 | return young; |
1341 | } | 1380 | } |
1342 | 1381 | ||
1343 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1382 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1344 | unsigned long data) | 1383 | struct kvm_memory_slot *slot, unsigned long data) |
1345 | { | 1384 | { |
1346 | u64 *sptep; | 1385 | u64 *sptep; |
1347 | struct rmap_iterator iter; | 1386 | struct rmap_iterator iter; |
@@ -1379,13 +1418,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
1379 | 1418 | ||
1380 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); | 1419 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); |
1381 | 1420 | ||
1382 | kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); | 1421 | kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0); |
1383 | kvm_flush_remote_tlbs(vcpu->kvm); | 1422 | kvm_flush_remote_tlbs(vcpu->kvm); |
1384 | } | 1423 | } |
1385 | 1424 | ||
1386 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | 1425 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) |
1387 | { | 1426 | { |
1388 | return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); | 1427 | return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp); |
1389 | } | 1428 | } |
1390 | 1429 | ||
1391 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) | 1430 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) |
@@ -2457,7 +2496,9 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2457 | rmap_recycle(vcpu, sptep, gfn); | 2496 | rmap_recycle(vcpu, sptep, gfn); |
2458 | } | 2497 | } |
2459 | } | 2498 | } |
2460 | kvm_release_pfn_clean(pfn); | 2499 | |
2500 | if (!is_error_pfn(pfn)) | ||
2501 | kvm_release_pfn_clean(pfn); | ||
2461 | } | 2502 | } |
2462 | 2503 | ||
2463 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | 2504 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) |
@@ -2469,17 +2510,12 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
2469 | bool no_dirty_log) | 2510 | bool no_dirty_log) |
2470 | { | 2511 | { |
2471 | struct kvm_memory_slot *slot; | 2512 | struct kvm_memory_slot *slot; |
2472 | unsigned long hva; | ||
2473 | 2513 | ||
2474 | slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log); | 2514 | slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log); |
2475 | if (!slot) { | 2515 | if (!slot) |
2476 | get_page(fault_page); | 2516 | return KVM_PFN_ERR_FAULT; |
2477 | return page_to_pfn(fault_page); | ||
2478 | } | ||
2479 | 2517 | ||
2480 | hva = gfn_to_hva_memslot(slot, gfn); | 2518 | return gfn_to_pfn_memslot_atomic(slot, gfn); |
2481 | |||
2482 | return hva_to_pfn_atomic(vcpu->kvm, hva); | ||
2483 | } | 2519 | } |
2484 | 2520 | ||
2485 | static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, | 2521 | static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, |
@@ -2580,11 +2616,6 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
2580 | sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, | 2616 | sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, |
2581 | iterator.level - 1, | 2617 | iterator.level - 1, |
2582 | 1, ACC_ALL, iterator.sptep); | 2618 | 1, ACC_ALL, iterator.sptep); |
2583 | if (!sp) { | ||
2584 | pgprintk("nonpaging_map: ENOMEM\n"); | ||
2585 | kvm_release_pfn_clean(pfn); | ||
2586 | return -ENOMEM; | ||
2587 | } | ||
2588 | 2619 | ||
2589 | mmu_spte_set(iterator.sptep, | 2620 | mmu_spte_set(iterator.sptep, |
2590 | __pa(sp->spt) | 2621 | __pa(sp->spt) |
@@ -2611,8 +2642,16 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct * | |||
2611 | 2642 | ||
2612 | static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn) | 2643 | static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn) |
2613 | { | 2644 | { |
2614 | kvm_release_pfn_clean(pfn); | 2645 | /* |
2615 | if (is_hwpoison_pfn(pfn)) { | 2646 | * Do not cache the mmio info caused by writing the readonly gfn |
2647 | * into the spte otherwise read access on readonly gfn also can | ||
2648 | * caused mmio page fault and treat it as mmio access. | ||
2649 | * Return 1 to tell kvm to emulate it. | ||
2650 | */ | ||
2651 | if (pfn == KVM_PFN_ERR_RO_FAULT) | ||
2652 | return 1; | ||
2653 | |||
2654 | if (pfn == KVM_PFN_ERR_HWPOISON) { | ||
2616 | kvm_send_hwpoison_signal(gfn_to_hva(vcpu->kvm, gfn), current); | 2655 | kvm_send_hwpoison_signal(gfn_to_hva(vcpu->kvm, gfn), current); |
2617 | return 0; | 2656 | return 0; |
2618 | } | 2657 | } |
@@ -3236,8 +3275,6 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, | |||
3236 | if (!async) | 3275 | if (!async) |
3237 | return false; /* *pfn has correct page already */ | 3276 | return false; /* *pfn has correct page already */ |
3238 | 3277 | ||
3239 | put_page(pfn_to_page(*pfn)); | ||
3240 | |||
3241 | if (!prefault && can_do_async_pf(vcpu)) { | 3278 | if (!prefault && can_do_async_pf(vcpu)) { |
3242 | trace_kvm_try_async_get_page(gva, gfn); | 3279 | trace_kvm_try_async_get_page(gva, gfn); |
3243 | if (kvm_find_async_pf_gfn(vcpu, gfn)) { | 3280 | if (kvm_find_async_pf_gfn(vcpu, gfn)) { |
@@ -3371,6 +3408,18 @@ static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level) | |||
3371 | return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0; | 3408 | return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0; |
3372 | } | 3409 | } |
3373 | 3410 | ||
3411 | static inline void protect_clean_gpte(unsigned *access, unsigned gpte) | ||
3412 | { | ||
3413 | unsigned mask; | ||
3414 | |||
3415 | BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK); | ||
3416 | |||
3417 | mask = (unsigned)~ACC_WRITE_MASK; | ||
3418 | /* Allow write access to dirty gptes */ | ||
3419 | mask |= (gpte >> (PT_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & PT_WRITABLE_MASK; | ||
3420 | *access &= mask; | ||
3421 | } | ||
3422 | |||
3374 | static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, | 3423 | static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, |
3375 | int *nr_present) | 3424 | int *nr_present) |
3376 | { | 3425 | { |
@@ -3388,6 +3437,25 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, | |||
3388 | return false; | 3437 | return false; |
3389 | } | 3438 | } |
3390 | 3439 | ||
3440 | static inline unsigned gpte_access(struct kvm_vcpu *vcpu, u64 gpte) | ||
3441 | { | ||
3442 | unsigned access; | ||
3443 | |||
3444 | access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; | ||
3445 | access &= ~(gpte >> PT64_NX_SHIFT); | ||
3446 | |||
3447 | return access; | ||
3448 | } | ||
3449 | |||
3450 | static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte) | ||
3451 | { | ||
3452 | unsigned index; | ||
3453 | |||
3454 | index = level - 1; | ||
3455 | index |= (gpte & PT_PAGE_SIZE_MASK) >> (PT_PAGE_SIZE_SHIFT - 2); | ||
3456 | return mmu->last_pte_bitmap & (1 << index); | ||
3457 | } | ||
3458 | |||
3391 | #define PTTYPE 64 | 3459 | #define PTTYPE 64 |
3392 | #include "paging_tmpl.h" | 3460 | #include "paging_tmpl.h" |
3393 | #undef PTTYPE | 3461 | #undef PTTYPE |
@@ -3457,6 +3525,56 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, | |||
3457 | } | 3525 | } |
3458 | } | 3526 | } |
3459 | 3527 | ||
3528 | static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) | ||
3529 | { | ||
3530 | unsigned bit, byte, pfec; | ||
3531 | u8 map; | ||
3532 | bool fault, x, w, u, wf, uf, ff, smep; | ||
3533 | |||
3534 | smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); | ||
3535 | for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) { | ||
3536 | pfec = byte << 1; | ||
3537 | map = 0; | ||
3538 | wf = pfec & PFERR_WRITE_MASK; | ||
3539 | uf = pfec & PFERR_USER_MASK; | ||
3540 | ff = pfec & PFERR_FETCH_MASK; | ||
3541 | for (bit = 0; bit < 8; ++bit) { | ||
3542 | x = bit & ACC_EXEC_MASK; | ||
3543 | w = bit & ACC_WRITE_MASK; | ||
3544 | u = bit & ACC_USER_MASK; | ||
3545 | |||
3546 | /* Not really needed: !nx will cause pte.nx to fault */ | ||
3547 | x |= !mmu->nx; | ||
3548 | /* Allow supervisor writes if !cr0.wp */ | ||
3549 | w |= !is_write_protection(vcpu) && !uf; | ||
3550 | /* Disallow supervisor fetches of user code if cr4.smep */ | ||
3551 | x &= !(smep && u && !uf); | ||
3552 | |||
3553 | fault = (ff && !x) || (uf && !u) || (wf && !w); | ||
3554 | map |= fault << bit; | ||
3555 | } | ||
3556 | mmu->permissions[byte] = map; | ||
3557 | } | ||
3558 | } | ||
3559 | |||
3560 | static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) | ||
3561 | { | ||
3562 | u8 map; | ||
3563 | unsigned level, root_level = mmu->root_level; | ||
3564 | const unsigned ps_set_index = 1 << 2; /* bit 2 of index: ps */ | ||
3565 | |||
3566 | if (root_level == PT32E_ROOT_LEVEL) | ||
3567 | --root_level; | ||
3568 | /* PT_PAGE_TABLE_LEVEL always terminates */ | ||
3569 | map = 1 | (1 << ps_set_index); | ||
3570 | for (level = PT_DIRECTORY_LEVEL; level <= root_level; ++level) { | ||
3571 | if (level <= PT_PDPE_LEVEL | ||
3572 | && (mmu->root_level >= PT32E_ROOT_LEVEL || is_pse(vcpu))) | ||
3573 | map |= 1 << (ps_set_index | (level - 1)); | ||
3574 | } | ||
3575 | mmu->last_pte_bitmap = map; | ||
3576 | } | ||
3577 | |||
3460 | static int paging64_init_context_common(struct kvm_vcpu *vcpu, | 3578 | static int paging64_init_context_common(struct kvm_vcpu *vcpu, |
3461 | struct kvm_mmu *context, | 3579 | struct kvm_mmu *context, |
3462 | int level) | 3580 | int level) |
@@ -3465,6 +3583,8 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, | |||
3465 | context->root_level = level; | 3583 | context->root_level = level; |
3466 | 3584 | ||
3467 | reset_rsvds_bits_mask(vcpu, context); | 3585 | reset_rsvds_bits_mask(vcpu, context); |
3586 | update_permission_bitmask(vcpu, context); | ||
3587 | update_last_pte_bitmap(vcpu, context); | ||
3468 | 3588 | ||
3469 | ASSERT(is_pae(vcpu)); | 3589 | ASSERT(is_pae(vcpu)); |
3470 | context->new_cr3 = paging_new_cr3; | 3590 | context->new_cr3 = paging_new_cr3; |
@@ -3493,6 +3613,8 @@ static int paging32_init_context(struct kvm_vcpu *vcpu, | |||
3493 | context->root_level = PT32_ROOT_LEVEL; | 3613 | context->root_level = PT32_ROOT_LEVEL; |
3494 | 3614 | ||
3495 | reset_rsvds_bits_mask(vcpu, context); | 3615 | reset_rsvds_bits_mask(vcpu, context); |
3616 | update_permission_bitmask(vcpu, context); | ||
3617 | update_last_pte_bitmap(vcpu, context); | ||
3496 | 3618 | ||
3497 | context->new_cr3 = paging_new_cr3; | 3619 | context->new_cr3 = paging_new_cr3; |
3498 | context->page_fault = paging32_page_fault; | 3620 | context->page_fault = paging32_page_fault; |
@@ -3553,6 +3675,9 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
3553 | context->gva_to_gpa = paging32_gva_to_gpa; | 3675 | context->gva_to_gpa = paging32_gva_to_gpa; |
3554 | } | 3676 | } |
3555 | 3677 | ||
3678 | update_permission_bitmask(vcpu, context); | ||
3679 | update_last_pte_bitmap(vcpu, context); | ||
3680 | |||
3556 | return 0; | 3681 | return 0; |
3557 | } | 3682 | } |
3558 | 3683 | ||
@@ -3628,6 +3753,9 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) | |||
3628 | g_context->gva_to_gpa = paging32_gva_to_gpa_nested; | 3753 | g_context->gva_to_gpa = paging32_gva_to_gpa_nested; |
3629 | } | 3754 | } |
3630 | 3755 | ||
3756 | update_permission_bitmask(vcpu, g_context); | ||
3757 | update_last_pte_bitmap(vcpu, g_context); | ||
3758 | |||
3631 | return 0; | 3759 | return 0; |
3632 | } | 3760 | } |
3633 | 3761 | ||
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index e374db9af021..69871080e866 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -18,8 +18,10 @@ | |||
18 | #define PT_PCD_MASK (1ULL << 4) | 18 | #define PT_PCD_MASK (1ULL << 4) |
19 | #define PT_ACCESSED_SHIFT 5 | 19 | #define PT_ACCESSED_SHIFT 5 |
20 | #define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT) | 20 | #define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT) |
21 | #define PT_DIRTY_MASK (1ULL << 6) | 21 | #define PT_DIRTY_SHIFT 6 |
22 | #define PT_PAGE_SIZE_MASK (1ULL << 7) | 22 | #define PT_DIRTY_MASK (1ULL << PT_DIRTY_SHIFT) |
23 | #define PT_PAGE_SIZE_SHIFT 7 | ||
24 | #define PT_PAGE_SIZE_MASK (1ULL << PT_PAGE_SIZE_SHIFT) | ||
23 | #define PT_PAT_MASK (1ULL << 7) | 25 | #define PT_PAT_MASK (1ULL << 7) |
24 | #define PT_GLOBAL_MASK (1ULL << 8) | 26 | #define PT_GLOBAL_MASK (1ULL << 8) |
25 | #define PT64_NX_SHIFT 63 | 27 | #define PT64_NX_SHIFT 63 |
@@ -88,17 +90,14 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu) | |||
88 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); | 90 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); |
89 | } | 91 | } |
90 | 92 | ||
91 | static inline bool check_write_user_access(struct kvm_vcpu *vcpu, | 93 | /* |
92 | bool write_fault, bool user_fault, | 94 | * Will a fault with a given page-fault error code (pfec) cause a permission |
93 | unsigned long pte) | 95 | * fault with the given access (in ACC_* format)? |
96 | */ | ||
97 | static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access, | ||
98 | unsigned pfec) | ||
94 | { | 99 | { |
95 | if (unlikely(write_fault && !is_writable_pte(pte) | 100 | return (mmu->permissions[pfec >> 1] >> pte_access) & 1; |
96 | && (user_fault || is_write_protection(vcpu)))) | ||
97 | return false; | ||
98 | |||
99 | if (unlikely(user_fault && !(pte & PT_USER_MASK))) | ||
100 | return false; | ||
101 | |||
102 | return true; | ||
103 | } | 101 | } |
102 | |||
104 | #endif | 103 | #endif |
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index 7d7d0b9e23eb..daff69e21150 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c | |||
@@ -116,10 +116,8 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) | |||
116 | gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); | 116 | gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); |
117 | pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn); | 117 | pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn); |
118 | 118 | ||
119 | if (is_error_pfn(pfn)) { | 119 | if (is_error_pfn(pfn)) |
120 | kvm_release_pfn_clean(pfn); | ||
121 | return; | 120 | return; |
122 | } | ||
123 | 121 | ||
124 | hpa = pfn << PAGE_SHIFT; | 122 | hpa = pfn << PAGE_SHIFT; |
125 | if ((*sptep & PT64_BASE_ADDR_MASK) != hpa) | 123 | if ((*sptep & PT64_BASE_ADDR_MASK) != hpa) |
@@ -190,7 +188,6 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
190 | 188 | ||
191 | static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) | 189 | static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) |
192 | { | 190 | { |
193 | struct kvm_memory_slot *slot; | ||
194 | unsigned long *rmapp; | 191 | unsigned long *rmapp; |
195 | u64 *sptep; | 192 | u64 *sptep; |
196 | struct rmap_iterator iter; | 193 | struct rmap_iterator iter; |
@@ -198,8 +195,7 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
198 | if (sp->role.direct || sp->unsync || sp->role.invalid) | 195 | if (sp->role.direct || sp->unsync || sp->role.invalid) |
199 | return; | 196 | return; |
200 | 197 | ||
201 | slot = gfn_to_memslot(kvm, sp->gfn); | 198 | rmapp = gfn_to_rmap(kvm, sp->gfn, PT_PAGE_TABLE_LEVEL); |
202 | rmapp = &slot->rmap[sp->gfn - slot->base_gfn]; | ||
203 | 199 | ||
204 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; | 200 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; |
205 | sptep = rmap_get_next(&iter)) { | 201 | sptep = rmap_get_next(&iter)) { |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index bb7cf01cae76..714e2c01a6fe 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -63,10 +63,12 @@ | |||
63 | */ | 63 | */ |
64 | struct guest_walker { | 64 | struct guest_walker { |
65 | int level; | 65 | int level; |
66 | unsigned max_level; | ||
66 | gfn_t table_gfn[PT_MAX_FULL_LEVELS]; | 67 | gfn_t table_gfn[PT_MAX_FULL_LEVELS]; |
67 | pt_element_t ptes[PT_MAX_FULL_LEVELS]; | 68 | pt_element_t ptes[PT_MAX_FULL_LEVELS]; |
68 | pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; | 69 | pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; |
69 | gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; | 70 | gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; |
71 | pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; | ||
70 | unsigned pt_access; | 72 | unsigned pt_access; |
71 | unsigned pte_access; | 73 | unsigned pte_access; |
72 | gfn_t gfn; | 74 | gfn_t gfn; |
@@ -101,38 +103,41 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | |||
101 | return (ret != orig_pte); | 103 | return (ret != orig_pte); |
102 | } | 104 | } |
103 | 105 | ||
104 | static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte, | 106 | static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, |
105 | bool last) | 107 | struct kvm_mmu *mmu, |
108 | struct guest_walker *walker, | ||
109 | int write_fault) | ||
106 | { | 110 | { |
107 | unsigned access; | 111 | unsigned level, index; |
108 | 112 | pt_element_t pte, orig_pte; | |
109 | access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; | 113 | pt_element_t __user *ptep_user; |
110 | if (last && !is_dirty_gpte(gpte)) | 114 | gfn_t table_gfn; |
111 | access &= ~ACC_WRITE_MASK; | 115 | int ret; |
112 | 116 | ||
113 | #if PTTYPE == 64 | 117 | for (level = walker->max_level; level >= walker->level; --level) { |
114 | if (vcpu->arch.mmu.nx) | 118 | pte = orig_pte = walker->ptes[level - 1]; |
115 | access &= ~(gpte >> PT64_NX_SHIFT); | 119 | table_gfn = walker->table_gfn[level - 1]; |
116 | #endif | 120 | ptep_user = walker->ptep_user[level - 1]; |
117 | return access; | 121 | index = offset_in_page(ptep_user) / sizeof(pt_element_t); |
118 | } | 122 | if (!(pte & PT_ACCESSED_MASK)) { |
119 | 123 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); | |
120 | static bool FNAME(is_last_gpte)(struct guest_walker *walker, | 124 | pte |= PT_ACCESSED_MASK; |
121 | struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | 125 | } |
122 | pt_element_t gpte) | 126 | if (level == walker->level && write_fault && !is_dirty_gpte(pte)) { |
123 | { | 127 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); |
124 | if (walker->level == PT_PAGE_TABLE_LEVEL) | 128 | pte |= PT_DIRTY_MASK; |
125 | return true; | 129 | } |
126 | 130 | if (pte == orig_pte) | |
127 | if ((walker->level == PT_DIRECTORY_LEVEL) && is_large_pte(gpte) && | 131 | continue; |
128 | (PTTYPE == 64 || is_pse(vcpu))) | ||
129 | return true; | ||
130 | 132 | ||
131 | if ((walker->level == PT_PDPE_LEVEL) && is_large_pte(gpte) && | 133 | ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte); |
132 | (mmu->root_level == PT64_ROOT_LEVEL)) | 134 | if (ret) |
133 | return true; | 135 | return ret; |
134 | 136 | ||
135 | return false; | 137 | mark_page_dirty(vcpu->kvm, table_gfn); |
138 | walker->ptes[level] = pte; | ||
139 | } | ||
140 | return 0; | ||
136 | } | 141 | } |
137 | 142 | ||
138 | /* | 143 | /* |
@@ -142,21 +147,22 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, | |||
142 | struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | 147 | struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, |
143 | gva_t addr, u32 access) | 148 | gva_t addr, u32 access) |
144 | { | 149 | { |
150 | int ret; | ||
145 | pt_element_t pte; | 151 | pt_element_t pte; |
146 | pt_element_t __user *uninitialized_var(ptep_user); | 152 | pt_element_t __user *uninitialized_var(ptep_user); |
147 | gfn_t table_gfn; | 153 | gfn_t table_gfn; |
148 | unsigned index, pt_access, uninitialized_var(pte_access); | 154 | unsigned index, pt_access, pte_access, accessed_dirty, shift; |
149 | gpa_t pte_gpa; | 155 | gpa_t pte_gpa; |
150 | bool eperm, last_gpte; | ||
151 | int offset; | 156 | int offset; |
152 | const int write_fault = access & PFERR_WRITE_MASK; | 157 | const int write_fault = access & PFERR_WRITE_MASK; |
153 | const int user_fault = access & PFERR_USER_MASK; | 158 | const int user_fault = access & PFERR_USER_MASK; |
154 | const int fetch_fault = access & PFERR_FETCH_MASK; | 159 | const int fetch_fault = access & PFERR_FETCH_MASK; |
155 | u16 errcode = 0; | 160 | u16 errcode = 0; |
161 | gpa_t real_gpa; | ||
162 | gfn_t gfn; | ||
156 | 163 | ||
157 | trace_kvm_mmu_pagetable_walk(addr, access); | 164 | trace_kvm_mmu_pagetable_walk(addr, access); |
158 | retry_walk: | 165 | retry_walk: |
159 | eperm = false; | ||
160 | walker->level = mmu->root_level; | 166 | walker->level = mmu->root_level; |
161 | pte = mmu->get_cr3(vcpu); | 167 | pte = mmu->get_cr3(vcpu); |
162 | 168 | ||
@@ -169,15 +175,21 @@ retry_walk: | |||
169 | --walker->level; | 175 | --walker->level; |
170 | } | 176 | } |
171 | #endif | 177 | #endif |
178 | walker->max_level = walker->level; | ||
172 | ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || | 179 | ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || |
173 | (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); | 180 | (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); |
174 | 181 | ||
175 | pt_access = ACC_ALL; | 182 | accessed_dirty = PT_ACCESSED_MASK; |
183 | pt_access = pte_access = ACC_ALL; | ||
184 | ++walker->level; | ||
176 | 185 | ||
177 | for (;;) { | 186 | do { |
178 | gfn_t real_gfn; | 187 | gfn_t real_gfn; |
179 | unsigned long host_addr; | 188 | unsigned long host_addr; |
180 | 189 | ||
190 | pt_access &= pte_access; | ||
191 | --walker->level; | ||
192 | |||
181 | index = PT_INDEX(addr, walker->level); | 193 | index = PT_INDEX(addr, walker->level); |
182 | 194 | ||
183 | table_gfn = gpte_to_gfn(pte); | 195 | table_gfn = gpte_to_gfn(pte); |
@@ -199,6 +211,7 @@ retry_walk: | |||
199 | ptep_user = (pt_element_t __user *)((void *)host_addr + offset); | 211 | ptep_user = (pt_element_t __user *)((void *)host_addr + offset); |
200 | if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) | 212 | if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) |
201 | goto error; | 213 | goto error; |
214 | walker->ptep_user[walker->level - 1] = ptep_user; | ||
202 | 215 | ||
203 | trace_kvm_mmu_paging_element(pte, walker->level); | 216 | trace_kvm_mmu_paging_element(pte, walker->level); |
204 | 217 | ||
@@ -211,92 +224,48 @@ retry_walk: | |||
211 | goto error; | 224 | goto error; |
212 | } | 225 | } |
213 | 226 | ||
214 | if (!check_write_user_access(vcpu, write_fault, user_fault, | 227 | accessed_dirty &= pte; |
215 | pte)) | 228 | pte_access = pt_access & gpte_access(vcpu, pte); |
216 | eperm = true; | ||
217 | |||
218 | #if PTTYPE == 64 | ||
219 | if (unlikely(fetch_fault && (pte & PT64_NX_MASK))) | ||
220 | eperm = true; | ||
221 | #endif | ||
222 | |||
223 | last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte); | ||
224 | if (last_gpte) { | ||
225 | pte_access = pt_access & | ||
226 | FNAME(gpte_access)(vcpu, pte, true); | ||
227 | /* check if the kernel is fetching from user page */ | ||
228 | if (unlikely(pte_access & PT_USER_MASK) && | ||
229 | kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) | ||
230 | if (fetch_fault && !user_fault) | ||
231 | eperm = true; | ||
232 | } | ||
233 | |||
234 | if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) { | ||
235 | int ret; | ||
236 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, | ||
237 | sizeof(pte)); | ||
238 | ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, | ||
239 | pte, pte|PT_ACCESSED_MASK); | ||
240 | if (unlikely(ret < 0)) | ||
241 | goto error; | ||
242 | else if (ret) | ||
243 | goto retry_walk; | ||
244 | |||
245 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
246 | pte |= PT_ACCESSED_MASK; | ||
247 | } | ||
248 | 229 | ||
249 | walker->ptes[walker->level - 1] = pte; | 230 | walker->ptes[walker->level - 1] = pte; |
231 | } while (!is_last_gpte(mmu, walker->level, pte)); | ||
250 | 232 | ||
251 | if (last_gpte) { | 233 | if (unlikely(permission_fault(mmu, pte_access, access))) { |
252 | int lvl = walker->level; | 234 | errcode |= PFERR_PRESENT_MASK; |
253 | gpa_t real_gpa; | 235 | goto error; |
254 | gfn_t gfn; | 236 | } |
255 | u32 ac; | ||
256 | |||
257 | gfn = gpte_to_gfn_lvl(pte, lvl); | ||
258 | gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT; | ||
259 | |||
260 | if (PTTYPE == 32 && | ||
261 | walker->level == PT_DIRECTORY_LEVEL && | ||
262 | is_cpuid_PSE36()) | ||
263 | gfn += pse36_gfn_delta(pte); | ||
264 | |||
265 | ac = write_fault | fetch_fault | user_fault; | ||
266 | 237 | ||
267 | real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), | 238 | gfn = gpte_to_gfn_lvl(pte, walker->level); |
268 | ac); | 239 | gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT; |
269 | if (real_gpa == UNMAPPED_GVA) | ||
270 | return 0; | ||
271 | 240 | ||
272 | walker->gfn = real_gpa >> PAGE_SHIFT; | 241 | if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36()) |
242 | gfn += pse36_gfn_delta(pte); | ||
273 | 243 | ||
274 | break; | 244 | real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access); |
275 | } | 245 | if (real_gpa == UNMAPPED_GVA) |
246 | return 0; | ||
276 | 247 | ||
277 | pt_access &= FNAME(gpte_access)(vcpu, pte, false); | 248 | walker->gfn = real_gpa >> PAGE_SHIFT; |
278 | --walker->level; | ||
279 | } | ||
280 | 249 | ||
281 | if (unlikely(eperm)) { | 250 | if (!write_fault) |
282 | errcode |= PFERR_PRESENT_MASK; | 251 | protect_clean_gpte(&pte_access, pte); |
283 | goto error; | ||
284 | } | ||
285 | 252 | ||
286 | if (write_fault && unlikely(!is_dirty_gpte(pte))) { | 253 | /* |
287 | int ret; | 254 | * On a write fault, fold the dirty bit into accessed_dirty by shifting it one |
255 | * place right. | ||
256 | * | ||
257 | * On a read fault, do nothing. | ||
258 | */ | ||
259 | shift = write_fault >> ilog2(PFERR_WRITE_MASK); | ||
260 | shift *= PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT; | ||
261 | accessed_dirty &= pte >> shift; | ||
288 | 262 | ||
289 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); | 263 | if (unlikely(!accessed_dirty)) { |
290 | ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, | 264 | ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); |
291 | pte, pte|PT_DIRTY_MASK); | ||
292 | if (unlikely(ret < 0)) | 265 | if (unlikely(ret < 0)) |
293 | goto error; | 266 | goto error; |
294 | else if (ret) | 267 | else if (ret) |
295 | goto retry_walk; | 268 | goto retry_walk; |
296 | |||
297 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
298 | pte |= PT_DIRTY_MASK; | ||
299 | walker->ptes[walker->level - 1] = pte; | ||
300 | } | 269 | } |
301 | 270 | ||
302 | walker->pt_access = pt_access; | 271 | walker->pt_access = pt_access; |
@@ -368,12 +337,11 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
368 | return; | 337 | return; |
369 | 338 | ||
370 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 339 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
371 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, true); | 340 | pte_access = sp->role.access & gpte_access(vcpu, gpte); |
341 | protect_clean_gpte(&pte_access, gpte); | ||
372 | pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte)); | 342 | pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte)); |
373 | if (mmu_invalid_pfn(pfn)) { | 343 | if (mmu_invalid_pfn(pfn)) |
374 | kvm_release_pfn_clean(pfn); | ||
375 | return; | 344 | return; |
376 | } | ||
377 | 345 | ||
378 | /* | 346 | /* |
379 | * we call mmu_set_spte() with host_writable = true because that | 347 | * we call mmu_set_spte() with host_writable = true because that |
@@ -443,15 +411,13 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, | |||
443 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) | 411 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) |
444 | continue; | 412 | continue; |
445 | 413 | ||
446 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, | 414 | pte_access = sp->role.access & gpte_access(vcpu, gpte); |
447 | true); | 415 | protect_clean_gpte(&pte_access, gpte); |
448 | gfn = gpte_to_gfn(gpte); | 416 | gfn = gpte_to_gfn(gpte); |
449 | pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, | 417 | pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, |
450 | pte_access & ACC_WRITE_MASK); | 418 | pte_access & ACC_WRITE_MASK); |
451 | if (mmu_invalid_pfn(pfn)) { | 419 | if (mmu_invalid_pfn(pfn)) |
452 | kvm_release_pfn_clean(pfn); | ||
453 | break; | 420 | break; |
454 | } | ||
455 | 421 | ||
456 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, | 422 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, |
457 | NULL, PT_PAGE_TABLE_LEVEL, gfn, | 423 | NULL, PT_PAGE_TABLE_LEVEL, gfn, |
@@ -798,7 +764,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
798 | 764 | ||
799 | gfn = gpte_to_gfn(gpte); | 765 | gfn = gpte_to_gfn(gpte); |
800 | pte_access = sp->role.access; | 766 | pte_access = sp->role.access; |
801 | pte_access &= FNAME(gpte_access)(vcpu, gpte, true); | 767 | pte_access &= gpte_access(vcpu, gpte); |
768 | protect_clean_gpte(&pte_access, gpte); | ||
802 | 769 | ||
803 | if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present)) | 770 | if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present)) |
804 | continue; | 771 | continue; |
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 9b7ec1150ab0..cfc258a6bf97 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Kernel-based Virtual Machine -- Performane Monitoring Unit support | 2 | * Kernel-based Virtual Machine -- Performance Monitoring Unit support |
3 | * | 3 | * |
4 | * Copyright 2011 Red Hat, Inc. and/or its affiliates. | 4 | * Copyright 2011 Red Hat, Inc. and/or its affiliates. |
5 | * | 5 | * |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index baead950d6c8..d017df3899ef 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -163,7 +163,7 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio); | |||
163 | 163 | ||
164 | #define MSR_INVALID 0xffffffffU | 164 | #define MSR_INVALID 0xffffffffU |
165 | 165 | ||
166 | static struct svm_direct_access_msrs { | 166 | static const struct svm_direct_access_msrs { |
167 | u32 index; /* Index of the MSR */ | 167 | u32 index; /* Index of the MSR */ |
168 | bool always; /* True if intercept is always on */ | 168 | bool always; /* True if intercept is always on */ |
169 | } direct_access_msrs[] = { | 169 | } direct_access_msrs[] = { |
@@ -400,7 +400,7 @@ struct svm_init_data { | |||
400 | int r; | 400 | int r; |
401 | }; | 401 | }; |
402 | 402 | ||
403 | static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; | 403 | static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; |
404 | 404 | ||
405 | #define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges) | 405 | #define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges) |
406 | #define MSRS_RANGE_SIZE 2048 | 406 | #define MSRS_RANGE_SIZE 2048 |
@@ -1146,7 +1146,6 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
1146 | 1146 | ||
1147 | svm_set_efer(&svm->vcpu, 0); | 1147 | svm_set_efer(&svm->vcpu, 0); |
1148 | save->dr6 = 0xffff0ff0; | 1148 | save->dr6 = 0xffff0ff0; |
1149 | save->dr7 = 0x400; | ||
1150 | kvm_set_rflags(&svm->vcpu, 2); | 1149 | kvm_set_rflags(&svm->vcpu, 2); |
1151 | save->rip = 0x0000fff0; | 1150 | save->rip = 0x0000fff0; |
1152 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; | 1151 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; |
@@ -1643,7 +1642,7 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, | |||
1643 | mark_dirty(svm->vmcb, VMCB_SEG); | 1642 | mark_dirty(svm->vmcb, VMCB_SEG); |
1644 | } | 1643 | } |
1645 | 1644 | ||
1646 | static void update_db_intercept(struct kvm_vcpu *vcpu) | 1645 | static void update_db_bp_intercept(struct kvm_vcpu *vcpu) |
1647 | { | 1646 | { |
1648 | struct vcpu_svm *svm = to_svm(vcpu); | 1647 | struct vcpu_svm *svm = to_svm(vcpu); |
1649 | 1648 | ||
@@ -1663,20 +1662,6 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) | |||
1663 | vcpu->guest_debug = 0; | 1662 | vcpu->guest_debug = 0; |
1664 | } | 1663 | } |
1665 | 1664 | ||
1666 | static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | ||
1667 | { | ||
1668 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1669 | |||
1670 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | ||
1671 | svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; | ||
1672 | else | ||
1673 | svm->vmcb->save.dr7 = vcpu->arch.dr7; | ||
1674 | |||
1675 | mark_dirty(svm->vmcb, VMCB_DR); | ||
1676 | |||
1677 | update_db_intercept(vcpu); | ||
1678 | } | ||
1679 | |||
1680 | static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) | 1665 | static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) |
1681 | { | 1666 | { |
1682 | if (sd->next_asid > sd->max_asid) { | 1667 | if (sd->next_asid > sd->max_asid) { |
@@ -1748,7 +1733,7 @@ static int db_interception(struct vcpu_svm *svm) | |||
1748 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) | 1733 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) |
1749 | svm->vmcb->save.rflags &= | 1734 | svm->vmcb->save.rflags &= |
1750 | ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | 1735 | ~(X86_EFLAGS_TF | X86_EFLAGS_RF); |
1751 | update_db_intercept(&svm->vcpu); | 1736 | update_db_bp_intercept(&svm->vcpu); |
1752 | } | 1737 | } |
1753 | 1738 | ||
1754 | if (svm->vcpu.guest_debug & | 1739 | if (svm->vcpu.guest_debug & |
@@ -2063,7 +2048,7 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm) | |||
2063 | if (svm->nested.intercept & 1ULL) { | 2048 | if (svm->nested.intercept & 1ULL) { |
2064 | /* | 2049 | /* |
2065 | * The #vmexit can't be emulated here directly because this | 2050 | * The #vmexit can't be emulated here directly because this |
2066 | * code path runs with irqs and preemtion disabled. A | 2051 | * code path runs with irqs and preemption disabled. A |
2067 | * #vmexit emulation might sleep. Only signal request for | 2052 | * #vmexit emulation might sleep. Only signal request for |
2068 | * the #vmexit here. | 2053 | * the #vmexit here. |
2069 | */ | 2054 | */ |
@@ -2105,7 +2090,6 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) | |||
2105 | return kmap(page); | 2090 | return kmap(page); |
2106 | 2091 | ||
2107 | error: | 2092 | error: |
2108 | kvm_release_page_clean(page); | ||
2109 | kvm_inject_gp(&svm->vcpu, 0); | 2093 | kvm_inject_gp(&svm->vcpu, 0); |
2110 | 2094 | ||
2111 | return NULL; | 2095 | return NULL; |
@@ -2409,7 +2393,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) | |||
2409 | { | 2393 | { |
2410 | /* | 2394 | /* |
2411 | * This function merges the msr permission bitmaps of kvm and the | 2395 | * This function merges the msr permission bitmaps of kvm and the |
2412 | * nested vmcb. It is omptimized in that it only merges the parts where | 2396 | * nested vmcb. It is optimized in that it only merges the parts where |
2413 | * the kvm msr permission bitmap may contain zero bits | 2397 | * the kvm msr permission bitmap may contain zero bits |
2414 | */ | 2398 | */ |
2415 | int i; | 2399 | int i; |
@@ -3268,7 +3252,7 @@ static int pause_interception(struct vcpu_svm *svm) | |||
3268 | return 1; | 3252 | return 1; |
3269 | } | 3253 | } |
3270 | 3254 | ||
3271 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | 3255 | static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { |
3272 | [SVM_EXIT_READ_CR0] = cr_interception, | 3256 | [SVM_EXIT_READ_CR0] = cr_interception, |
3273 | [SVM_EXIT_READ_CR3] = cr_interception, | 3257 | [SVM_EXIT_READ_CR3] = cr_interception, |
3274 | [SVM_EXIT_READ_CR4] = cr_interception, | 3258 | [SVM_EXIT_READ_CR4] = cr_interception, |
@@ -3660,7 +3644,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
3660 | */ | 3644 | */ |
3661 | svm->nmi_singlestep = true; | 3645 | svm->nmi_singlestep = true; |
3662 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); | 3646 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); |
3663 | update_db_intercept(vcpu); | 3647 | update_db_bp_intercept(vcpu); |
3664 | } | 3648 | } |
3665 | 3649 | ||
3666 | static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) | 3650 | static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) |
@@ -3783,12 +3767,6 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) | |||
3783 | svm_complete_interrupts(svm); | 3767 | svm_complete_interrupts(svm); |
3784 | } | 3768 | } |
3785 | 3769 | ||
3786 | #ifdef CONFIG_X86_64 | ||
3787 | #define R "r" | ||
3788 | #else | ||
3789 | #define R "e" | ||
3790 | #endif | ||
3791 | |||
3792 | static void svm_vcpu_run(struct kvm_vcpu *vcpu) | 3770 | static void svm_vcpu_run(struct kvm_vcpu *vcpu) |
3793 | { | 3771 | { |
3794 | struct vcpu_svm *svm = to_svm(vcpu); | 3772 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -3815,13 +3793,13 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3815 | local_irq_enable(); | 3793 | local_irq_enable(); |
3816 | 3794 | ||
3817 | asm volatile ( | 3795 | asm volatile ( |
3818 | "push %%"R"bp; \n\t" | 3796 | "push %%" _ASM_BP "; \n\t" |
3819 | "mov %c[rbx](%[svm]), %%"R"bx \n\t" | 3797 | "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" |
3820 | "mov %c[rcx](%[svm]), %%"R"cx \n\t" | 3798 | "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t" |
3821 | "mov %c[rdx](%[svm]), %%"R"dx \n\t" | 3799 | "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t" |
3822 | "mov %c[rsi](%[svm]), %%"R"si \n\t" | 3800 | "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t" |
3823 | "mov %c[rdi](%[svm]), %%"R"di \n\t" | 3801 | "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t" |
3824 | "mov %c[rbp](%[svm]), %%"R"bp \n\t" | 3802 | "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t" |
3825 | #ifdef CONFIG_X86_64 | 3803 | #ifdef CONFIG_X86_64 |
3826 | "mov %c[r8](%[svm]), %%r8 \n\t" | 3804 | "mov %c[r8](%[svm]), %%r8 \n\t" |
3827 | "mov %c[r9](%[svm]), %%r9 \n\t" | 3805 | "mov %c[r9](%[svm]), %%r9 \n\t" |
@@ -3834,20 +3812,20 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3834 | #endif | 3812 | #endif |
3835 | 3813 | ||
3836 | /* Enter guest mode */ | 3814 | /* Enter guest mode */ |
3837 | "push %%"R"ax \n\t" | 3815 | "push %%" _ASM_AX " \n\t" |
3838 | "mov %c[vmcb](%[svm]), %%"R"ax \n\t" | 3816 | "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t" |
3839 | __ex(SVM_VMLOAD) "\n\t" | 3817 | __ex(SVM_VMLOAD) "\n\t" |
3840 | __ex(SVM_VMRUN) "\n\t" | 3818 | __ex(SVM_VMRUN) "\n\t" |
3841 | __ex(SVM_VMSAVE) "\n\t" | 3819 | __ex(SVM_VMSAVE) "\n\t" |
3842 | "pop %%"R"ax \n\t" | 3820 | "pop %%" _ASM_AX " \n\t" |
3843 | 3821 | ||
3844 | /* Save guest registers, load host registers */ | 3822 | /* Save guest registers, load host registers */ |
3845 | "mov %%"R"bx, %c[rbx](%[svm]) \n\t" | 3823 | "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t" |
3846 | "mov %%"R"cx, %c[rcx](%[svm]) \n\t" | 3824 | "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t" |
3847 | "mov %%"R"dx, %c[rdx](%[svm]) \n\t" | 3825 | "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t" |
3848 | "mov %%"R"si, %c[rsi](%[svm]) \n\t" | 3826 | "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t" |
3849 | "mov %%"R"di, %c[rdi](%[svm]) \n\t" | 3827 | "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t" |
3850 | "mov %%"R"bp, %c[rbp](%[svm]) \n\t" | 3828 | "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t" |
3851 | #ifdef CONFIG_X86_64 | 3829 | #ifdef CONFIG_X86_64 |
3852 | "mov %%r8, %c[r8](%[svm]) \n\t" | 3830 | "mov %%r8, %c[r8](%[svm]) \n\t" |
3853 | "mov %%r9, %c[r9](%[svm]) \n\t" | 3831 | "mov %%r9, %c[r9](%[svm]) \n\t" |
@@ -3858,7 +3836,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3858 | "mov %%r14, %c[r14](%[svm]) \n\t" | 3836 | "mov %%r14, %c[r14](%[svm]) \n\t" |
3859 | "mov %%r15, %c[r15](%[svm]) \n\t" | 3837 | "mov %%r15, %c[r15](%[svm]) \n\t" |
3860 | #endif | 3838 | #endif |
3861 | "pop %%"R"bp" | 3839 | "pop %%" _ASM_BP |
3862 | : | 3840 | : |
3863 | : [svm]"a"(svm), | 3841 | : [svm]"a"(svm), |
3864 | [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), | 3842 | [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), |
@@ -3879,9 +3857,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3879 | [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15])) | 3857 | [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15])) |
3880 | #endif | 3858 | #endif |
3881 | : "cc", "memory" | 3859 | : "cc", "memory" |
3882 | , R"bx", R"cx", R"dx", R"si", R"di" | ||
3883 | #ifdef CONFIG_X86_64 | 3860 | #ifdef CONFIG_X86_64 |
3861 | , "rbx", "rcx", "rdx", "rsi", "rdi" | ||
3884 | , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15" | 3862 | , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15" |
3863 | #else | ||
3864 | , "ebx", "ecx", "edx", "esi", "edi" | ||
3885 | #endif | 3865 | #endif |
3886 | ); | 3866 | ); |
3887 | 3867 | ||
@@ -3941,8 +3921,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3941 | mark_all_clean(svm->vmcb); | 3921 | mark_all_clean(svm->vmcb); |
3942 | } | 3922 | } |
3943 | 3923 | ||
3944 | #undef R | ||
3945 | |||
3946 | static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) | 3924 | static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) |
3947 | { | 3925 | { |
3948 | struct vcpu_svm *svm = to_svm(vcpu); | 3926 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -4069,7 +4047,7 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) | |||
4069 | #define POST_MEM(exit) { .exit_code = (exit), \ | 4047 | #define POST_MEM(exit) { .exit_code = (exit), \ |
4070 | .stage = X86_ICPT_POST_MEMACCESS, } | 4048 | .stage = X86_ICPT_POST_MEMACCESS, } |
4071 | 4049 | ||
4072 | static struct __x86_intercept { | 4050 | static const struct __x86_intercept { |
4073 | u32 exit_code; | 4051 | u32 exit_code; |
4074 | enum x86_intercept_stage stage; | 4052 | enum x86_intercept_stage stage; |
4075 | } x86_intercept_map[] = { | 4053 | } x86_intercept_map[] = { |
@@ -4260,7 +4238,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4260 | .vcpu_load = svm_vcpu_load, | 4238 | .vcpu_load = svm_vcpu_load, |
4261 | .vcpu_put = svm_vcpu_put, | 4239 | .vcpu_put = svm_vcpu_put, |
4262 | 4240 | ||
4263 | .set_guest_debug = svm_guest_debug, | 4241 | .update_db_bp_intercept = update_db_bp_intercept, |
4264 | .get_msr = svm_get_msr, | 4242 | .get_msr = svm_get_msr, |
4265 | .set_msr = svm_set_msr, | 4243 | .set_msr = svm_set_msr, |
4266 | .get_segment_base = svm_get_segment_base, | 4244 | .get_segment_base = svm_get_segment_base, |
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c deleted file mode 100644 index 6b85cc647f34..000000000000 --- a/arch/x86/kvm/timer.c +++ /dev/null | |||
@@ -1,47 +0,0 @@ | |||
1 | /* | ||
2 | * Kernel-based Virtual Machine driver for Linux | ||
3 | * | ||
4 | * This module enables machines with Intel VT-x extensions to run virtual | ||
5 | * machines without emulation or binary translation. | ||
6 | * | ||
7 | * timer support | ||
8 | * | ||
9 | * Copyright 2010 Red Hat, Inc. and/or its affiliates. | ||
10 | * | ||
11 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
12 | * the COPYING file in the top-level directory. | ||
13 | */ | ||
14 | |||
15 | #include <linux/kvm_host.h> | ||
16 | #include <linux/kvm.h> | ||
17 | #include <linux/hrtimer.h> | ||
18 | #include <linux/atomic.h> | ||
19 | #include "kvm_timer.h" | ||
20 | |||
21 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data) | ||
22 | { | ||
23 | struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); | ||
24 | struct kvm_vcpu *vcpu = ktimer->vcpu; | ||
25 | wait_queue_head_t *q = &vcpu->wq; | ||
26 | |||
27 | /* | ||
28 | * There is a race window between reading and incrementing, but we do | ||
29 | * not care about potentially losing timer events in the !reinject | ||
30 | * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked | ||
31 | * in vcpu_enter_guest. | ||
32 | */ | ||
33 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { | ||
34 | atomic_inc(&ktimer->pending); | ||
35 | /* FIXME: this code should not know anything about vcpus */ | ||
36 | kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); | ||
37 | } | ||
38 | |||
39 | if (waitqueue_active(q)) | ||
40 | wake_up_interruptible(q); | ||
41 | |||
42 | if (ktimer->t_ops->is_periodic(ktimer)) { | ||
43 | hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); | ||
44 | return HRTIMER_RESTART; | ||
45 | } else | ||
46 | return HRTIMER_NORESTART; | ||
47 | } | ||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 851aa7c3b890..ad6b1dd06f8b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -127,6 +127,8 @@ module_param(ple_gap, int, S_IRUGO); | |||
127 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; | 127 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; |
128 | module_param(ple_window, int, S_IRUGO); | 128 | module_param(ple_window, int, S_IRUGO); |
129 | 129 | ||
130 | extern const ulong vmx_return; | ||
131 | |||
130 | #define NR_AUTOLOAD_MSRS 8 | 132 | #define NR_AUTOLOAD_MSRS 8 |
131 | #define VMCS02_POOL_SIZE 1 | 133 | #define VMCS02_POOL_SIZE 1 |
132 | 134 | ||
@@ -405,16 +407,16 @@ struct vcpu_vmx { | |||
405 | struct { | 407 | struct { |
406 | int vm86_active; | 408 | int vm86_active; |
407 | ulong save_rflags; | 409 | ulong save_rflags; |
410 | struct kvm_segment segs[8]; | ||
411 | } rmode; | ||
412 | struct { | ||
413 | u32 bitmask; /* 4 bits per segment (1 bit per field) */ | ||
408 | struct kvm_save_segment { | 414 | struct kvm_save_segment { |
409 | u16 selector; | 415 | u16 selector; |
410 | unsigned long base; | 416 | unsigned long base; |
411 | u32 limit; | 417 | u32 limit; |
412 | u32 ar; | 418 | u32 ar; |
413 | } tr, es, ds, fs, gs; | 419 | } seg[8]; |
414 | } rmode; | ||
415 | struct { | ||
416 | u32 bitmask; /* 4 bits per segment (1 bit per field) */ | ||
417 | struct kvm_save_segment seg[8]; | ||
418 | } segment_cache; | 420 | } segment_cache; |
419 | int vpid; | 421 | int vpid; |
420 | bool emulation_required; | 422 | bool emulation_required; |
@@ -450,7 +452,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | |||
450 | #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ | 452 | #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ |
451 | [number##_HIGH] = VMCS12_OFFSET(name)+4 | 453 | [number##_HIGH] = VMCS12_OFFSET(name)+4 |
452 | 454 | ||
453 | static unsigned short vmcs_field_to_offset_table[] = { | 455 | static const unsigned short vmcs_field_to_offset_table[] = { |
454 | FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), | 456 | FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), |
455 | FIELD(GUEST_ES_SELECTOR, guest_es_selector), | 457 | FIELD(GUEST_ES_SELECTOR, guest_es_selector), |
456 | FIELD(GUEST_CS_SELECTOR, guest_cs_selector), | 458 | FIELD(GUEST_CS_SELECTOR, guest_cs_selector), |
@@ -596,10 +598,9 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) | |||
596 | static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr) | 598 | static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr) |
597 | { | 599 | { |
598 | struct page *page = gfn_to_page(vcpu->kvm, addr >> PAGE_SHIFT); | 600 | struct page *page = gfn_to_page(vcpu->kvm, addr >> PAGE_SHIFT); |
599 | if (is_error_page(page)) { | 601 | if (is_error_page(page)) |
600 | kvm_release_page_clean(page); | ||
601 | return NULL; | 602 | return NULL; |
602 | } | 603 | |
603 | return page; | 604 | return page; |
604 | } | 605 | } |
605 | 606 | ||
@@ -667,7 +668,7 @@ static struct vmx_capability { | |||
667 | .ar_bytes = GUEST_##seg##_AR_BYTES, \ | 668 | .ar_bytes = GUEST_##seg##_AR_BYTES, \ |
668 | } | 669 | } |
669 | 670 | ||
670 | static struct kvm_vmx_segment_field { | 671 | static const struct kvm_vmx_segment_field { |
671 | unsigned selector; | 672 | unsigned selector; |
672 | unsigned base; | 673 | unsigned base; |
673 | unsigned limit; | 674 | unsigned limit; |
@@ -1343,7 +1344,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) | |||
1343 | guest_efer = vmx->vcpu.arch.efer; | 1344 | guest_efer = vmx->vcpu.arch.efer; |
1344 | 1345 | ||
1345 | /* | 1346 | /* |
1346 | * NX is emulated; LMA and LME handled by hardware; SCE meaninless | 1347 | * NX is emulated; LMA and LME handled by hardware; SCE meaningless |
1347 | * outside long mode | 1348 | * outside long mode |
1348 | */ | 1349 | */ |
1349 | ignore_bits = EFER_NX | EFER_SCE; | 1350 | ignore_bits = EFER_NX | EFER_SCE; |
@@ -1995,7 +1996,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
1995 | #endif | 1996 | #endif |
1996 | CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | | 1997 | CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | |
1997 | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | | 1998 | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | |
1998 | CPU_BASED_RDPMC_EXITING | | 1999 | CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING | |
1999 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 2000 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
2000 | /* | 2001 | /* |
2001 | * We can allow some features even when not supported by the | 2002 | * We can allow some features even when not supported by the |
@@ -2291,16 +2292,6 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) | |||
2291 | } | 2292 | } |
2292 | } | 2293 | } |
2293 | 2294 | ||
2294 | static void set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | ||
2295 | { | ||
2296 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | ||
2297 | vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]); | ||
2298 | else | ||
2299 | vmcs_writel(GUEST_DR7, vcpu->arch.dr7); | ||
2300 | |||
2301 | update_exception_bitmap(vcpu); | ||
2302 | } | ||
2303 | |||
2304 | static __init int cpu_has_kvm_support(void) | 2295 | static __init int cpu_has_kvm_support(void) |
2305 | { | 2296 | { |
2306 | return cpu_has_vmx(); | 2297 | return cpu_has_vmx(); |
@@ -2698,20 +2689,17 @@ static __exit void hardware_unsetup(void) | |||
2698 | free_kvm_area(); | 2689 | free_kvm_area(); |
2699 | } | 2690 | } |
2700 | 2691 | ||
2701 | static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save) | 2692 | static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) |
2702 | { | 2693 | { |
2703 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 2694 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
2695 | struct kvm_segment tmp = *save; | ||
2704 | 2696 | ||
2705 | if (vmcs_readl(sf->base) == save->base && (save->base & AR_S_MASK)) { | 2697 | if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) { |
2706 | vmcs_write16(sf->selector, save->selector); | 2698 | tmp.base = vmcs_readl(sf->base); |
2707 | vmcs_writel(sf->base, save->base); | 2699 | tmp.selector = vmcs_read16(sf->selector); |
2708 | vmcs_write32(sf->limit, save->limit); | 2700 | tmp.s = 1; |
2709 | vmcs_write32(sf->ar_bytes, save->ar); | ||
2710 | } else { | ||
2711 | u32 dpl = (vmcs_read16(sf->selector) & SELECTOR_RPL_MASK) | ||
2712 | << AR_DPL_SHIFT; | ||
2713 | vmcs_write32(sf->ar_bytes, 0x93 | dpl); | ||
2714 | } | 2701 | } |
2702 | vmx_set_segment(vcpu, &tmp, seg); | ||
2715 | } | 2703 | } |
2716 | 2704 | ||
2717 | static void enter_pmode(struct kvm_vcpu *vcpu) | 2705 | static void enter_pmode(struct kvm_vcpu *vcpu) |
@@ -2724,10 +2712,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
2724 | 2712 | ||
2725 | vmx_segment_cache_clear(vmx); | 2713 | vmx_segment_cache_clear(vmx); |
2726 | 2714 | ||
2727 | vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector); | 2715 | vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); |
2728 | vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); | ||
2729 | vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); | ||
2730 | vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); | ||
2731 | 2716 | ||
2732 | flags = vmcs_readl(GUEST_RFLAGS); | 2717 | flags = vmcs_readl(GUEST_RFLAGS); |
2733 | flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; | 2718 | flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; |
@@ -2742,10 +2727,10 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
2742 | if (emulate_invalid_guest_state) | 2727 | if (emulate_invalid_guest_state) |
2743 | return; | 2728 | return; |
2744 | 2729 | ||
2745 | fix_pmode_dataseg(VCPU_SREG_ES, &vmx->rmode.es); | 2730 | fix_pmode_dataseg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); |
2746 | fix_pmode_dataseg(VCPU_SREG_DS, &vmx->rmode.ds); | 2731 | fix_pmode_dataseg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); |
2747 | fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs); | 2732 | fix_pmode_dataseg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); |
2748 | fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs); | 2733 | fix_pmode_dataseg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); |
2749 | 2734 | ||
2750 | vmx_segment_cache_clear(vmx); | 2735 | vmx_segment_cache_clear(vmx); |
2751 | 2736 | ||
@@ -2773,14 +2758,10 @@ static gva_t rmode_tss_base(struct kvm *kvm) | |||
2773 | return kvm->arch.tss_addr; | 2758 | return kvm->arch.tss_addr; |
2774 | } | 2759 | } |
2775 | 2760 | ||
2776 | static void fix_rmode_seg(int seg, struct kvm_save_segment *save) | 2761 | static void fix_rmode_seg(int seg, struct kvm_segment *save) |
2777 | { | 2762 | { |
2778 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 2763 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
2779 | 2764 | ||
2780 | save->selector = vmcs_read16(sf->selector); | ||
2781 | save->base = vmcs_readl(sf->base); | ||
2782 | save->limit = vmcs_read32(sf->limit); | ||
2783 | save->ar = vmcs_read32(sf->ar_bytes); | ||
2784 | vmcs_write16(sf->selector, save->base >> 4); | 2765 | vmcs_write16(sf->selector, save->base >> 4); |
2785 | vmcs_write32(sf->base, save->base & 0xffff0); | 2766 | vmcs_write32(sf->base, save->base & 0xffff0); |
2786 | vmcs_write32(sf->limit, 0xffff); | 2767 | vmcs_write32(sf->limit, 0xffff); |
@@ -2800,9 +2781,16 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
2800 | if (enable_unrestricted_guest) | 2781 | if (enable_unrestricted_guest) |
2801 | return; | 2782 | return; |
2802 | 2783 | ||
2784 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); | ||
2785 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); | ||
2786 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); | ||
2787 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); | ||
2788 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); | ||
2789 | |||
2803 | vmx->emulation_required = 1; | 2790 | vmx->emulation_required = 1; |
2804 | vmx->rmode.vm86_active = 1; | 2791 | vmx->rmode.vm86_active = 1; |
2805 | 2792 | ||
2793 | |||
2806 | /* | 2794 | /* |
2807 | * Very old userspace does not call KVM_SET_TSS_ADDR before entering | 2795 | * Very old userspace does not call KVM_SET_TSS_ADDR before entering |
2808 | * vcpu. Call it here with phys address pointing 16M below 4G. | 2796 | * vcpu. Call it here with phys address pointing 16M below 4G. |
@@ -2817,14 +2805,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
2817 | 2805 | ||
2818 | vmx_segment_cache_clear(vmx); | 2806 | vmx_segment_cache_clear(vmx); |
2819 | 2807 | ||
2820 | vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR); | ||
2821 | vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); | ||
2822 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); | 2808 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); |
2823 | |||
2824 | vmx->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT); | ||
2825 | vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); | 2809 | vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); |
2826 | |||
2827 | vmx->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES); | ||
2828 | vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); | 2810 | vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); |
2829 | 2811 | ||
2830 | flags = vmcs_readl(GUEST_RFLAGS); | 2812 | flags = vmcs_readl(GUEST_RFLAGS); |
@@ -3117,35 +3099,24 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
3117 | struct kvm_segment *var, int seg) | 3099 | struct kvm_segment *var, int seg) |
3118 | { | 3100 | { |
3119 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3101 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3120 | struct kvm_save_segment *save; | ||
3121 | u32 ar; | 3102 | u32 ar; |
3122 | 3103 | ||
3123 | if (vmx->rmode.vm86_active | 3104 | if (vmx->rmode.vm86_active |
3124 | && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES | 3105 | && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES |
3125 | || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS | 3106 | || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS |
3126 | || seg == VCPU_SREG_GS) | 3107 | || seg == VCPU_SREG_GS)) { |
3127 | && !emulate_invalid_guest_state) { | 3108 | *var = vmx->rmode.segs[seg]; |
3128 | switch (seg) { | ||
3129 | case VCPU_SREG_TR: save = &vmx->rmode.tr; break; | ||
3130 | case VCPU_SREG_ES: save = &vmx->rmode.es; break; | ||
3131 | case VCPU_SREG_DS: save = &vmx->rmode.ds; break; | ||
3132 | case VCPU_SREG_FS: save = &vmx->rmode.fs; break; | ||
3133 | case VCPU_SREG_GS: save = &vmx->rmode.gs; break; | ||
3134 | default: BUG(); | ||
3135 | } | ||
3136 | var->selector = save->selector; | ||
3137 | var->base = save->base; | ||
3138 | var->limit = save->limit; | ||
3139 | ar = save->ar; | ||
3140 | if (seg == VCPU_SREG_TR | 3109 | if (seg == VCPU_SREG_TR |
3141 | || var->selector == vmx_read_guest_seg_selector(vmx, seg)) | 3110 | || var->selector == vmx_read_guest_seg_selector(vmx, seg)) |
3142 | goto use_saved_rmode_seg; | 3111 | return; |
3112 | var->base = vmx_read_guest_seg_base(vmx, seg); | ||
3113 | var->selector = vmx_read_guest_seg_selector(vmx, seg); | ||
3114 | return; | ||
3143 | } | 3115 | } |
3144 | var->base = vmx_read_guest_seg_base(vmx, seg); | 3116 | var->base = vmx_read_guest_seg_base(vmx, seg); |
3145 | var->limit = vmx_read_guest_seg_limit(vmx, seg); | 3117 | var->limit = vmx_read_guest_seg_limit(vmx, seg); |
3146 | var->selector = vmx_read_guest_seg_selector(vmx, seg); | 3118 | var->selector = vmx_read_guest_seg_selector(vmx, seg); |
3147 | ar = vmx_read_guest_seg_ar(vmx, seg); | 3119 | ar = vmx_read_guest_seg_ar(vmx, seg); |
3148 | use_saved_rmode_seg: | ||
3149 | if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) | 3120 | if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) |
3150 | ar = 0; | 3121 | ar = 0; |
3151 | var->type = ar & 15; | 3122 | var->type = ar & 15; |
@@ -3227,23 +3198,21 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
3227 | struct kvm_segment *var, int seg) | 3198 | struct kvm_segment *var, int seg) |
3228 | { | 3199 | { |
3229 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3200 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3230 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 3201 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
3231 | u32 ar; | 3202 | u32 ar; |
3232 | 3203 | ||
3233 | vmx_segment_cache_clear(vmx); | 3204 | vmx_segment_cache_clear(vmx); |
3234 | 3205 | ||
3235 | if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { | 3206 | if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { |
3236 | vmcs_write16(sf->selector, var->selector); | 3207 | vmcs_write16(sf->selector, var->selector); |
3237 | vmx->rmode.tr.selector = var->selector; | 3208 | vmx->rmode.segs[VCPU_SREG_TR] = *var; |
3238 | vmx->rmode.tr.base = var->base; | ||
3239 | vmx->rmode.tr.limit = var->limit; | ||
3240 | vmx->rmode.tr.ar = vmx_segment_access_rights(var); | ||
3241 | return; | 3209 | return; |
3242 | } | 3210 | } |
3243 | vmcs_writel(sf->base, var->base); | 3211 | vmcs_writel(sf->base, var->base); |
3244 | vmcs_write32(sf->limit, var->limit); | 3212 | vmcs_write32(sf->limit, var->limit); |
3245 | vmcs_write16(sf->selector, var->selector); | 3213 | vmcs_write16(sf->selector, var->selector); |
3246 | if (vmx->rmode.vm86_active && var->s) { | 3214 | if (vmx->rmode.vm86_active && var->s) { |
3215 | vmx->rmode.segs[seg] = *var; | ||
3247 | /* | 3216 | /* |
3248 | * Hack real-mode segments into vm86 compatibility. | 3217 | * Hack real-mode segments into vm86 compatibility. |
3249 | */ | 3218 | */ |
@@ -3258,7 +3227,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
3258 | * qemu binaries. | 3227 | * qemu binaries. |
3259 | * IA32 arch specifies that at the time of processor reset the | 3228 | * IA32 arch specifies that at the time of processor reset the |
3260 | * "Accessed" bit in the AR field of segment registers is 1. And qemu | 3229 | * "Accessed" bit in the AR field of segment registers is 1. And qemu |
3261 | * is setting it to 0 in the usedland code. This causes invalid guest | 3230 | * is setting it to 0 in the userland code. This causes invalid guest |
3262 | * state vmexit when "unrestricted guest" mode is turned on. | 3231 | * state vmexit when "unrestricted guest" mode is turned on. |
3263 | * Fix for this setup issue in cpu_reset is being pushed in the qemu | 3232 | * Fix for this setup issue in cpu_reset is being pushed in the qemu |
3264 | * tree. Newer qemu binaries with that qemu fix would not need this | 3233 | * tree. Newer qemu binaries with that qemu fix would not need this |
@@ -3288,16 +3257,10 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
3288 | vmcs_readl(GUEST_CS_BASE) >> 4); | 3257 | vmcs_readl(GUEST_CS_BASE) >> 4); |
3289 | break; | 3258 | break; |
3290 | case VCPU_SREG_ES: | 3259 | case VCPU_SREG_ES: |
3291 | fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es); | ||
3292 | break; | ||
3293 | case VCPU_SREG_DS: | 3260 | case VCPU_SREG_DS: |
3294 | fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds); | ||
3295 | break; | ||
3296 | case VCPU_SREG_GS: | 3261 | case VCPU_SREG_GS: |
3297 | fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs); | ||
3298 | break; | ||
3299 | case VCPU_SREG_FS: | 3262 | case VCPU_SREG_FS: |
3300 | fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs); | 3263 | fix_rmode_seg(seg, &vmx->rmode.segs[seg]); |
3301 | break; | 3264 | break; |
3302 | case VCPU_SREG_SS: | 3265 | case VCPU_SREG_SS: |
3303 | vmcs_write16(GUEST_SS_SELECTOR, | 3266 | vmcs_write16(GUEST_SS_SELECTOR, |
@@ -3351,9 +3314,9 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) | |||
3351 | 3314 | ||
3352 | if (var.base != (var.selector << 4)) | 3315 | if (var.base != (var.selector << 4)) |
3353 | return false; | 3316 | return false; |
3354 | if (var.limit != 0xffff) | 3317 | if (var.limit < 0xffff) |
3355 | return false; | 3318 | return false; |
3356 | if (ar != 0xf3) | 3319 | if (((ar | (3 << AR_DPL_SHIFT)) & ~(AR_G_MASK | AR_DB_MASK)) != 0xf3) |
3357 | return false; | 3320 | return false; |
3358 | 3321 | ||
3359 | return true; | 3322 | return true; |
@@ -3605,7 +3568,7 @@ out: | |||
3605 | 3568 | ||
3606 | static void seg_setup(int seg) | 3569 | static void seg_setup(int seg) |
3607 | { | 3570 | { |
3608 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 3571 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
3609 | unsigned int ar; | 3572 | unsigned int ar; |
3610 | 3573 | ||
3611 | vmcs_write16(sf->selector, 0); | 3574 | vmcs_write16(sf->selector, 0); |
@@ -3770,8 +3733,7 @@ static void vmx_set_constant_host_state(void) | |||
3770 | native_store_idt(&dt); | 3733 | native_store_idt(&dt); |
3771 | vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ | 3734 | vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ |
3772 | 3735 | ||
3773 | asm("mov $.Lkvm_vmx_return, %0" : "=r"(tmpl)); | 3736 | vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */ |
3774 | vmcs_writel(HOST_RIP, tmpl); /* 22.2.5 */ | ||
3775 | 3737 | ||
3776 | rdmsr(MSR_IA32_SYSENTER_CS, low32, high32); | 3738 | rdmsr(MSR_IA32_SYSENTER_CS, low32, high32); |
3777 | vmcs_write32(HOST_IA32_SYSENTER_CS, low32); | 3739 | vmcs_write32(HOST_IA32_SYSENTER_CS, low32); |
@@ -4005,8 +3967,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4005 | kvm_rip_write(vcpu, 0); | 3967 | kvm_rip_write(vcpu, 0); |
4006 | kvm_register_write(vcpu, VCPU_REGS_RSP, 0); | 3968 | kvm_register_write(vcpu, VCPU_REGS_RSP, 0); |
4007 | 3969 | ||
4008 | vmcs_writel(GUEST_DR7, 0x400); | ||
4009 | |||
4010 | vmcs_writel(GUEST_GDTR_BASE, 0); | 3970 | vmcs_writel(GUEST_GDTR_BASE, 0); |
4011 | vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); | 3971 | vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); |
4012 | 3972 | ||
@@ -4456,7 +4416,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
4456 | hypercall[2] = 0xc1; | 4416 | hypercall[2] = 0xc1; |
4457 | } | 4417 | } |
4458 | 4418 | ||
4459 | /* called to set cr0 as approriate for a mov-to-cr0 exit. */ | 4419 | /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ |
4460 | static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) | 4420 | static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) |
4461 | { | 4421 | { |
4462 | if (to_vmx(vcpu)->nested.vmxon && | 4422 | if (to_vmx(vcpu)->nested.vmxon && |
@@ -5701,7 +5661,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) | |||
5701 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 5661 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
5702 | * to be done to userspace and return 0. | 5662 | * to be done to userspace and return 0. |
5703 | */ | 5663 | */ |
5704 | static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | 5664 | static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { |
5705 | [EXIT_REASON_EXCEPTION_NMI] = handle_exception, | 5665 | [EXIT_REASON_EXCEPTION_NMI] = handle_exception, |
5706 | [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, | 5666 | [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, |
5707 | [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, | 5667 | [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, |
@@ -6229,17 +6189,10 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) | |||
6229 | msrs[i].host); | 6189 | msrs[i].host); |
6230 | } | 6190 | } |
6231 | 6191 | ||
6232 | #ifdef CONFIG_X86_64 | ||
6233 | #define R "r" | ||
6234 | #define Q "q" | ||
6235 | #else | ||
6236 | #define R "e" | ||
6237 | #define Q "l" | ||
6238 | #endif | ||
6239 | |||
6240 | static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | 6192 | static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) |
6241 | { | 6193 | { |
6242 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 6194 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
6195 | unsigned long debugctlmsr; | ||
6243 | 6196 | ||
6244 | if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) { | 6197 | if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) { |
6245 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 6198 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
@@ -6279,34 +6232,35 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6279 | vmx_set_interrupt_shadow(vcpu, 0); | 6232 | vmx_set_interrupt_shadow(vcpu, 0); |
6280 | 6233 | ||
6281 | atomic_switch_perf_msrs(vmx); | 6234 | atomic_switch_perf_msrs(vmx); |
6235 | debugctlmsr = get_debugctlmsr(); | ||
6282 | 6236 | ||
6283 | vmx->__launched = vmx->loaded_vmcs->launched; | 6237 | vmx->__launched = vmx->loaded_vmcs->launched; |
6284 | asm( | 6238 | asm( |
6285 | /* Store host registers */ | 6239 | /* Store host registers */ |
6286 | "push %%"R"dx; push %%"R"bp;" | 6240 | "push %%" _ASM_DX "; push %%" _ASM_BP ";" |
6287 | "push %%"R"cx \n\t" /* placeholder for guest rcx */ | 6241 | "push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */ |
6288 | "push %%"R"cx \n\t" | 6242 | "push %%" _ASM_CX " \n\t" |
6289 | "cmp %%"R"sp, %c[host_rsp](%0) \n\t" | 6243 | "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t" |
6290 | "je 1f \n\t" | 6244 | "je 1f \n\t" |
6291 | "mov %%"R"sp, %c[host_rsp](%0) \n\t" | 6245 | "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t" |
6292 | __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" | 6246 | __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" |
6293 | "1: \n\t" | 6247 | "1: \n\t" |
6294 | /* Reload cr2 if changed */ | 6248 | /* Reload cr2 if changed */ |
6295 | "mov %c[cr2](%0), %%"R"ax \n\t" | 6249 | "mov %c[cr2](%0), %%" _ASM_AX " \n\t" |
6296 | "mov %%cr2, %%"R"dx \n\t" | 6250 | "mov %%cr2, %%" _ASM_DX " \n\t" |
6297 | "cmp %%"R"ax, %%"R"dx \n\t" | 6251 | "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t" |
6298 | "je 2f \n\t" | 6252 | "je 2f \n\t" |
6299 | "mov %%"R"ax, %%cr2 \n\t" | 6253 | "mov %%" _ASM_AX", %%cr2 \n\t" |
6300 | "2: \n\t" | 6254 | "2: \n\t" |
6301 | /* Check if vmlaunch of vmresume is needed */ | 6255 | /* Check if vmlaunch of vmresume is needed */ |
6302 | "cmpl $0, %c[launched](%0) \n\t" | 6256 | "cmpl $0, %c[launched](%0) \n\t" |
6303 | /* Load guest registers. Don't clobber flags. */ | 6257 | /* Load guest registers. Don't clobber flags. */ |
6304 | "mov %c[rax](%0), %%"R"ax \n\t" | 6258 | "mov %c[rax](%0), %%" _ASM_AX " \n\t" |
6305 | "mov %c[rbx](%0), %%"R"bx \n\t" | 6259 | "mov %c[rbx](%0), %%" _ASM_BX " \n\t" |
6306 | "mov %c[rdx](%0), %%"R"dx \n\t" | 6260 | "mov %c[rdx](%0), %%" _ASM_DX " \n\t" |
6307 | "mov %c[rsi](%0), %%"R"si \n\t" | 6261 | "mov %c[rsi](%0), %%" _ASM_SI " \n\t" |
6308 | "mov %c[rdi](%0), %%"R"di \n\t" | 6262 | "mov %c[rdi](%0), %%" _ASM_DI " \n\t" |
6309 | "mov %c[rbp](%0), %%"R"bp \n\t" | 6263 | "mov %c[rbp](%0), %%" _ASM_BP " \n\t" |
6310 | #ifdef CONFIG_X86_64 | 6264 | #ifdef CONFIG_X86_64 |
6311 | "mov %c[r8](%0), %%r8 \n\t" | 6265 | "mov %c[r8](%0), %%r8 \n\t" |
6312 | "mov %c[r9](%0), %%r9 \n\t" | 6266 | "mov %c[r9](%0), %%r9 \n\t" |
@@ -6317,24 +6271,24 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6317 | "mov %c[r14](%0), %%r14 \n\t" | 6271 | "mov %c[r14](%0), %%r14 \n\t" |
6318 | "mov %c[r15](%0), %%r15 \n\t" | 6272 | "mov %c[r15](%0), %%r15 \n\t" |
6319 | #endif | 6273 | #endif |
6320 | "mov %c[rcx](%0), %%"R"cx \n\t" /* kills %0 (ecx) */ | 6274 | "mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */ |
6321 | 6275 | ||
6322 | /* Enter guest mode */ | 6276 | /* Enter guest mode */ |
6323 | "jne .Llaunched \n\t" | 6277 | "jne 1f \n\t" |
6324 | __ex(ASM_VMX_VMLAUNCH) "\n\t" | 6278 | __ex(ASM_VMX_VMLAUNCH) "\n\t" |
6325 | "jmp .Lkvm_vmx_return \n\t" | 6279 | "jmp 2f \n\t" |
6326 | ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" | 6280 | "1: " __ex(ASM_VMX_VMRESUME) "\n\t" |
6327 | ".Lkvm_vmx_return: " | 6281 | "2: " |
6328 | /* Save guest registers, load host registers, keep flags */ | 6282 | /* Save guest registers, load host registers, keep flags */ |
6329 | "mov %0, %c[wordsize](%%"R"sp) \n\t" | 6283 | "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t" |
6330 | "pop %0 \n\t" | 6284 | "pop %0 \n\t" |
6331 | "mov %%"R"ax, %c[rax](%0) \n\t" | 6285 | "mov %%" _ASM_AX ", %c[rax](%0) \n\t" |
6332 | "mov %%"R"bx, %c[rbx](%0) \n\t" | 6286 | "mov %%" _ASM_BX ", %c[rbx](%0) \n\t" |
6333 | "pop"Q" %c[rcx](%0) \n\t" | 6287 | __ASM_SIZE(pop) " %c[rcx](%0) \n\t" |
6334 | "mov %%"R"dx, %c[rdx](%0) \n\t" | 6288 | "mov %%" _ASM_DX ", %c[rdx](%0) \n\t" |
6335 | "mov %%"R"si, %c[rsi](%0) \n\t" | 6289 | "mov %%" _ASM_SI ", %c[rsi](%0) \n\t" |
6336 | "mov %%"R"di, %c[rdi](%0) \n\t" | 6290 | "mov %%" _ASM_DI ", %c[rdi](%0) \n\t" |
6337 | "mov %%"R"bp, %c[rbp](%0) \n\t" | 6291 | "mov %%" _ASM_BP ", %c[rbp](%0) \n\t" |
6338 | #ifdef CONFIG_X86_64 | 6292 | #ifdef CONFIG_X86_64 |
6339 | "mov %%r8, %c[r8](%0) \n\t" | 6293 | "mov %%r8, %c[r8](%0) \n\t" |
6340 | "mov %%r9, %c[r9](%0) \n\t" | 6294 | "mov %%r9, %c[r9](%0) \n\t" |
@@ -6345,11 +6299,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6345 | "mov %%r14, %c[r14](%0) \n\t" | 6299 | "mov %%r14, %c[r14](%0) \n\t" |
6346 | "mov %%r15, %c[r15](%0) \n\t" | 6300 | "mov %%r15, %c[r15](%0) \n\t" |
6347 | #endif | 6301 | #endif |
6348 | "mov %%cr2, %%"R"ax \n\t" | 6302 | "mov %%cr2, %%" _ASM_AX " \n\t" |
6349 | "mov %%"R"ax, %c[cr2](%0) \n\t" | 6303 | "mov %%" _ASM_AX ", %c[cr2](%0) \n\t" |
6350 | 6304 | ||
6351 | "pop %%"R"bp; pop %%"R"dx \n\t" | 6305 | "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" |
6352 | "setbe %c[fail](%0) \n\t" | 6306 | "setbe %c[fail](%0) \n\t" |
6307 | ".pushsection .rodata \n\t" | ||
6308 | ".global vmx_return \n\t" | ||
6309 | "vmx_return: " _ASM_PTR " 2b \n\t" | ||
6310 | ".popsection" | ||
6353 | : : "c"(vmx), "d"((unsigned long)HOST_RSP), | 6311 | : : "c"(vmx), "d"((unsigned long)HOST_RSP), |
6354 | [launched]"i"(offsetof(struct vcpu_vmx, __launched)), | 6312 | [launched]"i"(offsetof(struct vcpu_vmx, __launched)), |
6355 | [fail]"i"(offsetof(struct vcpu_vmx, fail)), | 6313 | [fail]"i"(offsetof(struct vcpu_vmx, fail)), |
@@ -6374,12 +6332,18 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6374 | [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)), | 6332 | [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)), |
6375 | [wordsize]"i"(sizeof(ulong)) | 6333 | [wordsize]"i"(sizeof(ulong)) |
6376 | : "cc", "memory" | 6334 | : "cc", "memory" |
6377 | , R"ax", R"bx", R"di", R"si" | ||
6378 | #ifdef CONFIG_X86_64 | 6335 | #ifdef CONFIG_X86_64 |
6336 | , "rax", "rbx", "rdi", "rsi" | ||
6379 | , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" | 6337 | , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" |
6338 | #else | ||
6339 | , "eax", "ebx", "edi", "esi" | ||
6380 | #endif | 6340 | #endif |
6381 | ); | 6341 | ); |
6382 | 6342 | ||
6343 | /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ | ||
6344 | if (debugctlmsr) | ||
6345 | update_debugctlmsr(debugctlmsr); | ||
6346 | |||
6383 | #ifndef CONFIG_X86_64 | 6347 | #ifndef CONFIG_X86_64 |
6384 | /* | 6348 | /* |
6385 | * The sysexit path does not restore ds/es, so we must set them to | 6349 | * The sysexit path does not restore ds/es, so we must set them to |
@@ -6424,9 +6388,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6424 | vmx_complete_interrupts(vmx); | 6388 | vmx_complete_interrupts(vmx); |
6425 | } | 6389 | } |
6426 | 6390 | ||
6427 | #undef R | ||
6428 | #undef Q | ||
6429 | |||
6430 | static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | 6391 | static void vmx_free_vcpu(struct kvm_vcpu *vcpu) |
6431 | { | 6392 | { |
6432 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 6393 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -7281,7 +7242,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
7281 | .vcpu_load = vmx_vcpu_load, | 7242 | .vcpu_load = vmx_vcpu_load, |
7282 | .vcpu_put = vmx_vcpu_put, | 7243 | .vcpu_put = vmx_vcpu_put, |
7283 | 7244 | ||
7284 | .set_guest_debug = set_guest_debug, | 7245 | .update_db_bp_intercept = update_exception_bitmap, |
7285 | .get_msr = vmx_get_msr, | 7246 | .get_msr = vmx_get_msr, |
7286 | .set_msr = vmx_set_msr, | 7247 | .set_msr = vmx_set_msr, |
7287 | .get_segment_base = vmx_get_segment_base, | 7248 | .get_segment_base = vmx_get_segment_base, |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1f09552572fa..1eefebe5d727 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -246,20 +246,14 @@ static void drop_user_return_notifiers(void *ignore) | |||
246 | 246 | ||
247 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) | 247 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) |
248 | { | 248 | { |
249 | if (irqchip_in_kernel(vcpu->kvm)) | 249 | return vcpu->arch.apic_base; |
250 | return vcpu->arch.apic_base; | ||
251 | else | ||
252 | return vcpu->arch.apic_base; | ||
253 | } | 250 | } |
254 | EXPORT_SYMBOL_GPL(kvm_get_apic_base); | 251 | EXPORT_SYMBOL_GPL(kvm_get_apic_base); |
255 | 252 | ||
256 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) | 253 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) |
257 | { | 254 | { |
258 | /* TODO: reserve bits check */ | 255 | /* TODO: reserve bits check */ |
259 | if (irqchip_in_kernel(vcpu->kvm)) | 256 | kvm_lapic_set_base(vcpu, data); |
260 | kvm_lapic_set_base(vcpu, data); | ||
261 | else | ||
262 | vcpu->arch.apic_base = data; | ||
263 | } | 257 | } |
264 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); | 258 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); |
265 | 259 | ||
@@ -698,6 +692,18 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) | |||
698 | } | 692 | } |
699 | EXPORT_SYMBOL_GPL(kvm_get_cr8); | 693 | EXPORT_SYMBOL_GPL(kvm_get_cr8); |
700 | 694 | ||
695 | static void kvm_update_dr7(struct kvm_vcpu *vcpu) | ||
696 | { | ||
697 | unsigned long dr7; | ||
698 | |||
699 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | ||
700 | dr7 = vcpu->arch.guest_debug_dr7; | ||
701 | else | ||
702 | dr7 = vcpu->arch.dr7; | ||
703 | kvm_x86_ops->set_dr7(vcpu, dr7); | ||
704 | vcpu->arch.switch_db_regs = (dr7 & DR7_BP_EN_MASK); | ||
705 | } | ||
706 | |||
701 | static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | 707 | static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) |
702 | { | 708 | { |
703 | switch (dr) { | 709 | switch (dr) { |
@@ -723,10 +729,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | |||
723 | if (val & 0xffffffff00000000ULL) | 729 | if (val & 0xffffffff00000000ULL) |
724 | return -1; /* #GP */ | 730 | return -1; /* #GP */ |
725 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; | 731 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; |
726 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | 732 | kvm_update_dr7(vcpu); |
727 | kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); | ||
728 | vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK); | ||
729 | } | ||
730 | break; | 733 | break; |
731 | } | 734 | } |
732 | 735 | ||
@@ -823,7 +826,7 @@ static u32 msrs_to_save[] = { | |||
823 | 826 | ||
824 | static unsigned num_msrs_to_save; | 827 | static unsigned num_msrs_to_save; |
825 | 828 | ||
826 | static u32 emulated_msrs[] = { | 829 | static const u32 emulated_msrs[] = { |
827 | MSR_IA32_TSCDEADLINE, | 830 | MSR_IA32_TSCDEADLINE, |
828 | MSR_IA32_MISC_ENABLE, | 831 | MSR_IA32_MISC_ENABLE, |
829 | MSR_IA32_MCG_STATUS, | 832 | MSR_IA32_MCG_STATUS, |
@@ -1097,7 +1100,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
1097 | * For each generation, we track the original measured | 1100 | * For each generation, we track the original measured |
1098 | * nanosecond time, offset, and write, so if TSCs are in | 1101 | * nanosecond time, offset, and write, so if TSCs are in |
1099 | * sync, we can match exact offset, and if not, we can match | 1102 | * sync, we can match exact offset, and if not, we can match |
1100 | * exact software computaion in compute_guest_tsc() | 1103 | * exact software computation in compute_guest_tsc() |
1101 | * | 1104 | * |
1102 | * These values are tracked in kvm->arch.cur_xxx variables. | 1105 | * These values are tracked in kvm->arch.cur_xxx variables. |
1103 | */ | 1106 | */ |
@@ -1140,6 +1143,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1140 | unsigned long this_tsc_khz; | 1143 | unsigned long this_tsc_khz; |
1141 | s64 kernel_ns, max_kernel_ns; | 1144 | s64 kernel_ns, max_kernel_ns; |
1142 | u64 tsc_timestamp; | 1145 | u64 tsc_timestamp; |
1146 | u8 pvclock_flags; | ||
1143 | 1147 | ||
1144 | /* Keep irq disabled to prevent changes to the clock */ | 1148 | /* Keep irq disabled to prevent changes to the clock */ |
1145 | local_irq_save(flags); | 1149 | local_irq_save(flags); |
@@ -1221,7 +1225,14 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1221 | vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; | 1225 | vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; |
1222 | vcpu->last_kernel_ns = kernel_ns; | 1226 | vcpu->last_kernel_ns = kernel_ns; |
1223 | vcpu->last_guest_tsc = tsc_timestamp; | 1227 | vcpu->last_guest_tsc = tsc_timestamp; |
1224 | vcpu->hv_clock.flags = 0; | 1228 | |
1229 | pvclock_flags = 0; | ||
1230 | if (vcpu->pvclock_set_guest_stopped_request) { | ||
1231 | pvclock_flags |= PVCLOCK_GUEST_STOPPED; | ||
1232 | vcpu->pvclock_set_guest_stopped_request = false; | ||
1233 | } | ||
1234 | |||
1235 | vcpu->hv_clock.flags = pvclock_flags; | ||
1225 | 1236 | ||
1226 | /* | 1237 | /* |
1227 | * The interface expects us to write an even number signaling that the | 1238 | * The interface expects us to write an even number signaling that the |
@@ -1504,7 +1515,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) | |||
1504 | { | 1515 | { |
1505 | gpa_t gpa = data & ~0x3f; | 1516 | gpa_t gpa = data & ~0x3f; |
1506 | 1517 | ||
1507 | /* Bits 2:5 are resrved, Should be zero */ | 1518 | /* Bits 2:5 are reserved, Should be zero */ |
1508 | if (data & 0x3c) | 1519 | if (data & 0x3c) |
1509 | return 1; | 1520 | return 1; |
1510 | 1521 | ||
@@ -1639,10 +1650,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1639 | vcpu->arch.time_page = | 1650 | vcpu->arch.time_page = |
1640 | gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); | 1651 | gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); |
1641 | 1652 | ||
1642 | if (is_error_page(vcpu->arch.time_page)) { | 1653 | if (is_error_page(vcpu->arch.time_page)) |
1643 | kvm_release_page_clean(vcpu->arch.time_page); | ||
1644 | vcpu->arch.time_page = NULL; | 1654 | vcpu->arch.time_page = NULL; |
1645 | } | 1655 | |
1646 | break; | 1656 | break; |
1647 | } | 1657 | } |
1648 | case MSR_KVM_ASYNC_PF_EN: | 1658 | case MSR_KVM_ASYNC_PF_EN: |
@@ -1727,7 +1737,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1727 | * Ignore all writes to this no longer documented MSR. | 1737 | * Ignore all writes to this no longer documented MSR. |
1728 | * Writes are only relevant for old K7 processors, | 1738 | * Writes are only relevant for old K7 processors, |
1729 | * all pre-dating SVM, but a recommended workaround from | 1739 | * all pre-dating SVM, but a recommended workaround from |
1730 | * AMD for these chips. It is possible to speicify the | 1740 | * AMD for these chips. It is possible to specify the |
1731 | * affected processor models on the command line, hence | 1741 | * affected processor models on the command line, hence |
1732 | * the need to ignore the workaround. | 1742 | * the need to ignore the workaround. |
1733 | */ | 1743 | */ |
@@ -2177,6 +2187,8 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
2177 | case KVM_CAP_GET_TSC_KHZ: | 2187 | case KVM_CAP_GET_TSC_KHZ: |
2178 | case KVM_CAP_PCI_2_3: | 2188 | case KVM_CAP_PCI_2_3: |
2179 | case KVM_CAP_KVMCLOCK_CTRL: | 2189 | case KVM_CAP_KVMCLOCK_CTRL: |
2190 | case KVM_CAP_READONLY_MEM: | ||
2191 | case KVM_CAP_IRQFD_RESAMPLE: | ||
2180 | r = 1; | 2192 | r = 1; |
2181 | break; | 2193 | break; |
2182 | case KVM_CAP_COALESCED_MMIO: | 2194 | case KVM_CAP_COALESCED_MMIO: |
@@ -2358,8 +2370,7 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | |||
2358 | static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, | 2370 | static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, |
2359 | struct kvm_lapic_state *s) | 2371 | struct kvm_lapic_state *s) |
2360 | { | 2372 | { |
2361 | memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); | 2373 | kvm_apic_post_state_restore(vcpu, s); |
2362 | kvm_apic_post_state_restore(vcpu); | ||
2363 | update_cr8_intercept(vcpu); | 2374 | update_cr8_intercept(vcpu); |
2364 | 2375 | ||
2365 | return 0; | 2376 | return 0; |
@@ -2368,7 +2379,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, | |||
2368 | static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, | 2379 | static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, |
2369 | struct kvm_interrupt *irq) | 2380 | struct kvm_interrupt *irq) |
2370 | { | 2381 | { |
2371 | if (irq->irq < 0 || irq->irq >= 256) | 2382 | if (irq->irq < 0 || irq->irq >= KVM_NR_INTERRUPTS) |
2372 | return -EINVAL; | 2383 | return -EINVAL; |
2373 | if (irqchip_in_kernel(vcpu->kvm)) | 2384 | if (irqchip_in_kernel(vcpu->kvm)) |
2374 | return -ENXIO; | 2385 | return -ENXIO; |
@@ -2635,11 +2646,9 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, | |||
2635 | */ | 2646 | */ |
2636 | static int kvm_set_guest_paused(struct kvm_vcpu *vcpu) | 2647 | static int kvm_set_guest_paused(struct kvm_vcpu *vcpu) |
2637 | { | 2648 | { |
2638 | struct pvclock_vcpu_time_info *src = &vcpu->arch.hv_clock; | ||
2639 | if (!vcpu->arch.time_page) | 2649 | if (!vcpu->arch.time_page) |
2640 | return -EINVAL; | 2650 | return -EINVAL; |
2641 | src->flags |= PVCLOCK_GUEST_STOPPED; | 2651 | vcpu->arch.pvclock_set_guest_stopped_request = true; |
2642 | mark_page_dirty(vcpu->kvm, vcpu->arch.time >> PAGE_SHIFT); | ||
2643 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | 2652 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
2644 | return 0; | 2653 | return 0; |
2645 | } | 2654 | } |
@@ -3090,7 +3099,7 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, | |||
3090 | if (!kvm->arch.vpit) | 3099 | if (!kvm->arch.vpit) |
3091 | return -ENXIO; | 3100 | return -ENXIO; |
3092 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | 3101 | mutex_lock(&kvm->arch.vpit->pit_state.lock); |
3093 | kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject; | 3102 | kvm->arch.vpit->pit_state.reinject = control->pit_reinject; |
3094 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | 3103 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); |
3095 | return 0; | 3104 | return 0; |
3096 | } | 3105 | } |
@@ -3173,6 +3182,16 @@ out: | |||
3173 | return r; | 3182 | return r; |
3174 | } | 3183 | } |
3175 | 3184 | ||
3185 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event) | ||
3186 | { | ||
3187 | if (!irqchip_in_kernel(kvm)) | ||
3188 | return -ENXIO; | ||
3189 | |||
3190 | irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | ||
3191 | irq_event->irq, irq_event->level); | ||
3192 | return 0; | ||
3193 | } | ||
3194 | |||
3176 | long kvm_arch_vm_ioctl(struct file *filp, | 3195 | long kvm_arch_vm_ioctl(struct file *filp, |
3177 | unsigned int ioctl, unsigned long arg) | 3196 | unsigned int ioctl, unsigned long arg) |
3178 | { | 3197 | { |
@@ -3279,29 +3298,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3279 | create_pit_unlock: | 3298 | create_pit_unlock: |
3280 | mutex_unlock(&kvm->slots_lock); | 3299 | mutex_unlock(&kvm->slots_lock); |
3281 | break; | 3300 | break; |
3282 | case KVM_IRQ_LINE_STATUS: | ||
3283 | case KVM_IRQ_LINE: { | ||
3284 | struct kvm_irq_level irq_event; | ||
3285 | |||
3286 | r = -EFAULT; | ||
3287 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) | ||
3288 | goto out; | ||
3289 | r = -ENXIO; | ||
3290 | if (irqchip_in_kernel(kvm)) { | ||
3291 | __s32 status; | ||
3292 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | ||
3293 | irq_event.irq, irq_event.level); | ||
3294 | if (ioctl == KVM_IRQ_LINE_STATUS) { | ||
3295 | r = -EFAULT; | ||
3296 | irq_event.status = status; | ||
3297 | if (copy_to_user(argp, &irq_event, | ||
3298 | sizeof irq_event)) | ||
3299 | goto out; | ||
3300 | } | ||
3301 | r = 0; | ||
3302 | } | ||
3303 | break; | ||
3304 | } | ||
3305 | case KVM_GET_IRQCHIP: { | 3301 | case KVM_GET_IRQCHIP: { |
3306 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ | 3302 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ |
3307 | struct kvm_irqchip *chip; | 3303 | struct kvm_irqchip *chip; |
@@ -3689,20 +3685,17 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, | |||
3689 | gpa_t *gpa, struct x86_exception *exception, | 3685 | gpa_t *gpa, struct x86_exception *exception, |
3690 | bool write) | 3686 | bool write) |
3691 | { | 3687 | { |
3692 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3688 | u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0) |
3689 | | (write ? PFERR_WRITE_MASK : 0); | ||
3693 | 3690 | ||
3694 | if (vcpu_match_mmio_gva(vcpu, gva) && | 3691 | if (vcpu_match_mmio_gva(vcpu, gva) |
3695 | check_write_user_access(vcpu, write, access, | 3692 | && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) { |
3696 | vcpu->arch.access)) { | ||
3697 | *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT | | 3693 | *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT | |
3698 | (gva & (PAGE_SIZE - 1)); | 3694 | (gva & (PAGE_SIZE - 1)); |
3699 | trace_vcpu_match_mmio(gva, *gpa, write, false); | 3695 | trace_vcpu_match_mmio(gva, *gpa, write, false); |
3700 | return 1; | 3696 | return 1; |
3701 | } | 3697 | } |
3702 | 3698 | ||
3703 | if (write) | ||
3704 | access |= PFERR_WRITE_MASK; | ||
3705 | |||
3706 | *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); | 3699 | *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); |
3707 | 3700 | ||
3708 | if (*gpa == UNMAPPED_GVA) | 3701 | if (*gpa == UNMAPPED_GVA) |
@@ -3790,14 +3783,14 @@ static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3790 | return X86EMUL_CONTINUE; | 3783 | return X86EMUL_CONTINUE; |
3791 | } | 3784 | } |
3792 | 3785 | ||
3793 | static struct read_write_emulator_ops read_emultor = { | 3786 | static const struct read_write_emulator_ops read_emultor = { |
3794 | .read_write_prepare = read_prepare, | 3787 | .read_write_prepare = read_prepare, |
3795 | .read_write_emulate = read_emulate, | 3788 | .read_write_emulate = read_emulate, |
3796 | .read_write_mmio = vcpu_mmio_read, | 3789 | .read_write_mmio = vcpu_mmio_read, |
3797 | .read_write_exit_mmio = read_exit_mmio, | 3790 | .read_write_exit_mmio = read_exit_mmio, |
3798 | }; | 3791 | }; |
3799 | 3792 | ||
3800 | static struct read_write_emulator_ops write_emultor = { | 3793 | static const struct read_write_emulator_ops write_emultor = { |
3801 | .read_write_emulate = write_emulate, | 3794 | .read_write_emulate = write_emulate, |
3802 | .read_write_mmio = write_mmio, | 3795 | .read_write_mmio = write_mmio, |
3803 | .read_write_exit_mmio = write_exit_mmio, | 3796 | .read_write_exit_mmio = write_exit_mmio, |
@@ -3808,7 +3801,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val, | |||
3808 | unsigned int bytes, | 3801 | unsigned int bytes, |
3809 | struct x86_exception *exception, | 3802 | struct x86_exception *exception, |
3810 | struct kvm_vcpu *vcpu, | 3803 | struct kvm_vcpu *vcpu, |
3811 | struct read_write_emulator_ops *ops) | 3804 | const struct read_write_emulator_ops *ops) |
3812 | { | 3805 | { |
3813 | gpa_t gpa; | 3806 | gpa_t gpa; |
3814 | int handled, ret; | 3807 | int handled, ret; |
@@ -3857,7 +3850,7 @@ mmio: | |||
3857 | int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, | 3850 | int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, |
3858 | void *val, unsigned int bytes, | 3851 | void *val, unsigned int bytes, |
3859 | struct x86_exception *exception, | 3852 | struct x86_exception *exception, |
3860 | struct read_write_emulator_ops *ops) | 3853 | const struct read_write_emulator_ops *ops) |
3861 | { | 3854 | { |
3862 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | 3855 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); |
3863 | gpa_t gpa; | 3856 | gpa_t gpa; |
@@ -3962,10 +3955,8 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, | |||
3962 | goto emul_write; | 3955 | goto emul_write; |
3963 | 3956 | ||
3964 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 3957 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
3965 | if (is_error_page(page)) { | 3958 | if (is_error_page(page)) |
3966 | kvm_release_page_clean(page); | ||
3967 | goto emul_write; | 3959 | goto emul_write; |
3968 | } | ||
3969 | 3960 | ||
3970 | kaddr = kmap_atomic(page); | 3961 | kaddr = kmap_atomic(page); |
3971 | kaddr += offset_in_page(gpa); | 3962 | kaddr += offset_in_page(gpa); |
@@ -4332,7 +4323,19 @@ static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, | |||
4332 | kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx); | 4323 | kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx); |
4333 | } | 4324 | } |
4334 | 4325 | ||
4335 | static struct x86_emulate_ops emulate_ops = { | 4326 | static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg) |
4327 | { | ||
4328 | return kvm_register_read(emul_to_vcpu(ctxt), reg); | ||
4329 | } | ||
4330 | |||
4331 | static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val) | ||
4332 | { | ||
4333 | kvm_register_write(emul_to_vcpu(ctxt), reg, val); | ||
4334 | } | ||
4335 | |||
4336 | static const struct x86_emulate_ops emulate_ops = { | ||
4337 | .read_gpr = emulator_read_gpr, | ||
4338 | .write_gpr = emulator_write_gpr, | ||
4336 | .read_std = kvm_read_guest_virt_system, | 4339 | .read_std = kvm_read_guest_virt_system, |
4337 | .write_std = kvm_write_guest_virt_system, | 4340 | .write_std = kvm_write_guest_virt_system, |
4338 | .fetch = kvm_fetch_guest_virt, | 4341 | .fetch = kvm_fetch_guest_virt, |
@@ -4367,14 +4370,6 @@ static struct x86_emulate_ops emulate_ops = { | |||
4367 | .get_cpuid = emulator_get_cpuid, | 4370 | .get_cpuid = emulator_get_cpuid, |
4368 | }; | 4371 | }; |
4369 | 4372 | ||
4370 | static void cache_all_regs(struct kvm_vcpu *vcpu) | ||
4371 | { | ||
4372 | kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
4373 | kvm_register_read(vcpu, VCPU_REGS_RSP); | ||
4374 | kvm_register_read(vcpu, VCPU_REGS_RIP); | ||
4375 | vcpu->arch.regs_dirty = ~0; | ||
4376 | } | ||
4377 | |||
4378 | static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) | 4373 | static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) |
4379 | { | 4374 | { |
4380 | u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask); | 4375 | u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask); |
@@ -4401,12 +4396,10 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu) | |||
4401 | kvm_queue_exception(vcpu, ctxt->exception.vector); | 4396 | kvm_queue_exception(vcpu, ctxt->exception.vector); |
4402 | } | 4397 | } |
4403 | 4398 | ||
4404 | static void init_decode_cache(struct x86_emulate_ctxt *ctxt, | 4399 | static void init_decode_cache(struct x86_emulate_ctxt *ctxt) |
4405 | const unsigned long *regs) | ||
4406 | { | 4400 | { |
4407 | memset(&ctxt->twobyte, 0, | 4401 | memset(&ctxt->twobyte, 0, |
4408 | (void *)&ctxt->regs - (void *)&ctxt->twobyte); | 4402 | (void *)&ctxt->_regs - (void *)&ctxt->twobyte); |
4409 | memcpy(ctxt->regs, regs, sizeof(ctxt->regs)); | ||
4410 | 4403 | ||
4411 | ctxt->fetch.start = 0; | 4404 | ctxt->fetch.start = 0; |
4412 | ctxt->fetch.end = 0; | 4405 | ctxt->fetch.end = 0; |
@@ -4421,14 +4414,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) | |||
4421 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; | 4414 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; |
4422 | int cs_db, cs_l; | 4415 | int cs_db, cs_l; |
4423 | 4416 | ||
4424 | /* | ||
4425 | * TODO: fix emulate.c to use guest_read/write_register | ||
4426 | * instead of direct ->regs accesses, can save hundred cycles | ||
4427 | * on Intel for instructions that don't read/change RSP, for | ||
4428 | * for example. | ||
4429 | */ | ||
4430 | cache_all_regs(vcpu); | ||
4431 | |||
4432 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 4417 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
4433 | 4418 | ||
4434 | ctxt->eflags = kvm_get_rflags(vcpu); | 4419 | ctxt->eflags = kvm_get_rflags(vcpu); |
@@ -4440,7 +4425,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) | |||
4440 | X86EMUL_MODE_PROT16; | 4425 | X86EMUL_MODE_PROT16; |
4441 | ctxt->guest_mode = is_guest_mode(vcpu); | 4426 | ctxt->guest_mode = is_guest_mode(vcpu); |
4442 | 4427 | ||
4443 | init_decode_cache(ctxt, vcpu->arch.regs); | 4428 | init_decode_cache(ctxt); |
4444 | vcpu->arch.emulate_regs_need_sync_from_vcpu = false; | 4429 | vcpu->arch.emulate_regs_need_sync_from_vcpu = false; |
4445 | } | 4430 | } |
4446 | 4431 | ||
@@ -4460,7 +4445,6 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) | |||
4460 | return EMULATE_FAIL; | 4445 | return EMULATE_FAIL; |
4461 | 4446 | ||
4462 | ctxt->eip = ctxt->_eip; | 4447 | ctxt->eip = ctxt->_eip; |
4463 | memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs); | ||
4464 | kvm_rip_write(vcpu, ctxt->eip); | 4448 | kvm_rip_write(vcpu, ctxt->eip); |
4465 | kvm_set_rflags(vcpu, ctxt->eflags); | 4449 | kvm_set_rflags(vcpu, ctxt->eflags); |
4466 | 4450 | ||
@@ -4493,13 +4477,14 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) | |||
4493 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | 4477 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) |
4494 | { | 4478 | { |
4495 | gpa_t gpa; | 4479 | gpa_t gpa; |
4480 | pfn_t pfn; | ||
4496 | 4481 | ||
4497 | if (tdp_enabled) | 4482 | if (tdp_enabled) |
4498 | return false; | 4483 | return false; |
4499 | 4484 | ||
4500 | /* | 4485 | /* |
4501 | * if emulation was due to access to shadowed page table | 4486 | * if emulation was due to access to shadowed page table |
4502 | * and it failed try to unshadow page and re-entetr the | 4487 | * and it failed try to unshadow page and re-enter the |
4503 | * guest to let CPU execute the instruction. | 4488 | * guest to let CPU execute the instruction. |
4504 | */ | 4489 | */ |
4505 | if (kvm_mmu_unprotect_page_virt(vcpu, gva)) | 4490 | if (kvm_mmu_unprotect_page_virt(vcpu, gva)) |
@@ -4510,8 +4495,17 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | |||
4510 | if (gpa == UNMAPPED_GVA) | 4495 | if (gpa == UNMAPPED_GVA) |
4511 | return true; /* let cpu generate fault */ | 4496 | return true; /* let cpu generate fault */ |
4512 | 4497 | ||
4513 | if (!kvm_is_error_hva(gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT))) | 4498 | /* |
4499 | * Do not retry the unhandleable instruction if it faults on the | ||
4500 | * readonly host memory, otherwise it will goto a infinite loop: | ||
4501 | * retry instruction -> write #PF -> emulation fail -> retry | ||
4502 | * instruction -> ... | ||
4503 | */ | ||
4504 | pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); | ||
4505 | if (!is_error_pfn(pfn)) { | ||
4506 | kvm_release_pfn_clean(pfn); | ||
4514 | return true; | 4507 | return true; |
4508 | } | ||
4515 | 4509 | ||
4516 | return false; | 4510 | return false; |
4517 | } | 4511 | } |
@@ -4560,6 +4554,9 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, | |||
4560 | return true; | 4554 | return true; |
4561 | } | 4555 | } |
4562 | 4556 | ||
4557 | static int complete_emulated_mmio(struct kvm_vcpu *vcpu); | ||
4558 | static int complete_emulated_pio(struct kvm_vcpu *vcpu); | ||
4559 | |||
4563 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, | 4560 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, |
4564 | unsigned long cr2, | 4561 | unsigned long cr2, |
4565 | int emulation_type, | 4562 | int emulation_type, |
@@ -4608,7 +4605,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4608 | changes registers values during IO operation */ | 4605 | changes registers values during IO operation */ |
4609 | if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { | 4606 | if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { |
4610 | vcpu->arch.emulate_regs_need_sync_from_vcpu = false; | 4607 | vcpu->arch.emulate_regs_need_sync_from_vcpu = false; |
4611 | memcpy(ctxt->regs, vcpu->arch.regs, sizeof ctxt->regs); | 4608 | emulator_invalidate_register_cache(ctxt); |
4612 | } | 4609 | } |
4613 | 4610 | ||
4614 | restart: | 4611 | restart: |
@@ -4630,13 +4627,16 @@ restart: | |||
4630 | } else if (vcpu->arch.pio.count) { | 4627 | } else if (vcpu->arch.pio.count) { |
4631 | if (!vcpu->arch.pio.in) | 4628 | if (!vcpu->arch.pio.in) |
4632 | vcpu->arch.pio.count = 0; | 4629 | vcpu->arch.pio.count = 0; |
4633 | else | 4630 | else { |
4634 | writeback = false; | 4631 | writeback = false; |
4632 | vcpu->arch.complete_userspace_io = complete_emulated_pio; | ||
4633 | } | ||
4635 | r = EMULATE_DO_MMIO; | 4634 | r = EMULATE_DO_MMIO; |
4636 | } else if (vcpu->mmio_needed) { | 4635 | } else if (vcpu->mmio_needed) { |
4637 | if (!vcpu->mmio_is_write) | 4636 | if (!vcpu->mmio_is_write) |
4638 | writeback = false; | 4637 | writeback = false; |
4639 | r = EMULATE_DO_MMIO; | 4638 | r = EMULATE_DO_MMIO; |
4639 | vcpu->arch.complete_userspace_io = complete_emulated_mmio; | ||
4640 | } else if (r == EMULATION_RESTART) | 4640 | } else if (r == EMULATION_RESTART) |
4641 | goto restart; | 4641 | goto restart; |
4642 | else | 4642 | else |
@@ -4646,7 +4646,6 @@ restart: | |||
4646 | toggle_interruptibility(vcpu, ctxt->interruptibility); | 4646 | toggle_interruptibility(vcpu, ctxt->interruptibility); |
4647 | kvm_set_rflags(vcpu, ctxt->eflags); | 4647 | kvm_set_rflags(vcpu, ctxt->eflags); |
4648 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 4648 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
4649 | memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs); | ||
4650 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | 4649 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; |
4651 | kvm_rip_write(vcpu, ctxt->eip); | 4650 | kvm_rip_write(vcpu, ctxt->eip); |
4652 | } else | 4651 | } else |
@@ -4929,6 +4928,7 @@ int kvm_arch_init(void *opaque) | |||
4929 | if (cpu_has_xsave) | 4928 | if (cpu_has_xsave) |
4930 | host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | 4929 | host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); |
4931 | 4930 | ||
4931 | kvm_lapic_init(); | ||
4932 | return 0; | 4932 | return 0; |
4933 | 4933 | ||
4934 | out: | 4934 | out: |
@@ -5499,6 +5499,24 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5499 | return r; | 5499 | return r; |
5500 | } | 5500 | } |
5501 | 5501 | ||
5502 | static inline int complete_emulated_io(struct kvm_vcpu *vcpu) | ||
5503 | { | ||
5504 | int r; | ||
5505 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
5506 | r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); | ||
5507 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
5508 | if (r != EMULATE_DONE) | ||
5509 | return 0; | ||
5510 | return 1; | ||
5511 | } | ||
5512 | |||
5513 | static int complete_emulated_pio(struct kvm_vcpu *vcpu) | ||
5514 | { | ||
5515 | BUG_ON(!vcpu->arch.pio.count); | ||
5516 | |||
5517 | return complete_emulated_io(vcpu); | ||
5518 | } | ||
5519 | |||
5502 | /* | 5520 | /* |
5503 | * Implements the following, as a state machine: | 5521 | * Implements the following, as a state machine: |
5504 | * | 5522 | * |
@@ -5515,47 +5533,37 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5515 | * copy data | 5533 | * copy data |
5516 | * exit | 5534 | * exit |
5517 | */ | 5535 | */ |
5518 | static int complete_mmio(struct kvm_vcpu *vcpu) | 5536 | static int complete_emulated_mmio(struct kvm_vcpu *vcpu) |
5519 | { | 5537 | { |
5520 | struct kvm_run *run = vcpu->run; | 5538 | struct kvm_run *run = vcpu->run; |
5521 | struct kvm_mmio_fragment *frag; | 5539 | struct kvm_mmio_fragment *frag; |
5522 | int r; | ||
5523 | 5540 | ||
5524 | if (!(vcpu->arch.pio.count || vcpu->mmio_needed)) | 5541 | BUG_ON(!vcpu->mmio_needed); |
5525 | return 1; | ||
5526 | 5542 | ||
5527 | if (vcpu->mmio_needed) { | 5543 | /* Complete previous fragment */ |
5528 | /* Complete previous fragment */ | 5544 | frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++]; |
5529 | frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++]; | 5545 | if (!vcpu->mmio_is_write) |
5530 | if (!vcpu->mmio_is_write) | 5546 | memcpy(frag->data, run->mmio.data, frag->len); |
5531 | memcpy(frag->data, run->mmio.data, frag->len); | 5547 | if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { |
5532 | if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { | 5548 | vcpu->mmio_needed = 0; |
5533 | vcpu->mmio_needed = 0; | ||
5534 | if (vcpu->mmio_is_write) | ||
5535 | return 1; | ||
5536 | vcpu->mmio_read_completed = 1; | ||
5537 | goto done; | ||
5538 | } | ||
5539 | /* Initiate next fragment */ | ||
5540 | ++frag; | ||
5541 | run->exit_reason = KVM_EXIT_MMIO; | ||
5542 | run->mmio.phys_addr = frag->gpa; | ||
5543 | if (vcpu->mmio_is_write) | 5549 | if (vcpu->mmio_is_write) |
5544 | memcpy(run->mmio.data, frag->data, frag->len); | 5550 | return 1; |
5545 | run->mmio.len = frag->len; | 5551 | vcpu->mmio_read_completed = 1; |
5546 | run->mmio.is_write = vcpu->mmio_is_write; | 5552 | return complete_emulated_io(vcpu); |
5547 | return 0; | 5553 | } |
5548 | 5554 | /* Initiate next fragment */ | |
5549 | } | 5555 | ++frag; |
5550 | done: | 5556 | run->exit_reason = KVM_EXIT_MMIO; |
5551 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 5557 | run->mmio.phys_addr = frag->gpa; |
5552 | r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); | 5558 | if (vcpu->mmio_is_write) |
5553 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 5559 | memcpy(run->mmio.data, frag->data, frag->len); |
5554 | if (r != EMULATE_DONE) | 5560 | run->mmio.len = frag->len; |
5555 | return 0; | 5561 | run->mmio.is_write = vcpu->mmio_is_write; |
5556 | return 1; | 5562 | vcpu->arch.complete_userspace_io = complete_emulated_mmio; |
5563 | return 0; | ||
5557 | } | 5564 | } |
5558 | 5565 | ||
5566 | |||
5559 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 5567 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
5560 | { | 5568 | { |
5561 | int r; | 5569 | int r; |
@@ -5582,9 +5590,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
5582 | } | 5590 | } |
5583 | } | 5591 | } |
5584 | 5592 | ||
5585 | r = complete_mmio(vcpu); | 5593 | if (unlikely(vcpu->arch.complete_userspace_io)) { |
5586 | if (r <= 0) | 5594 | int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io; |
5587 | goto out; | 5595 | vcpu->arch.complete_userspace_io = NULL; |
5596 | r = cui(vcpu); | ||
5597 | if (r <= 0) | ||
5598 | goto out; | ||
5599 | } else | ||
5600 | WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); | ||
5588 | 5601 | ||
5589 | r = __vcpu_run(vcpu); | 5602 | r = __vcpu_run(vcpu); |
5590 | 5603 | ||
@@ -5602,12 +5615,11 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
5602 | /* | 5615 | /* |
5603 | * We are here if userspace calls get_regs() in the middle of | 5616 | * We are here if userspace calls get_regs() in the middle of |
5604 | * instruction emulation. Registers state needs to be copied | 5617 | * instruction emulation. Registers state needs to be copied |
5605 | * back from emulation context to vcpu. Usrapace shouldn't do | 5618 | * back from emulation context to vcpu. Userspace shouldn't do |
5606 | * that usually, but some bad designed PV devices (vmware | 5619 | * that usually, but some bad designed PV devices (vmware |
5607 | * backdoor interface) need this to work | 5620 | * backdoor interface) need this to work |
5608 | */ | 5621 | */ |
5609 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; | 5622 | emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt); |
5610 | memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs); | ||
5611 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | 5623 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; |
5612 | } | 5624 | } |
5613 | regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); | 5625 | regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
@@ -5747,7 +5759,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, | |||
5747 | if (ret) | 5759 | if (ret) |
5748 | return EMULATE_FAIL; | 5760 | return EMULATE_FAIL; |
5749 | 5761 | ||
5750 | memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs); | ||
5751 | kvm_rip_write(vcpu, ctxt->eip); | 5762 | kvm_rip_write(vcpu, ctxt->eip); |
5752 | kvm_set_rflags(vcpu, ctxt->eflags); | 5763 | kvm_set_rflags(vcpu, ctxt->eflags); |
5753 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5764 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
@@ -5799,7 +5810,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5799 | if (mmu_reset_needed) | 5810 | if (mmu_reset_needed) |
5800 | kvm_mmu_reset_context(vcpu); | 5811 | kvm_mmu_reset_context(vcpu); |
5801 | 5812 | ||
5802 | max_bits = (sizeof sregs->interrupt_bitmap) << 3; | 5813 | max_bits = KVM_NR_INTERRUPTS; |
5803 | pending_vec = find_first_bit( | 5814 | pending_vec = find_first_bit( |
5804 | (const unsigned long *)sregs->interrupt_bitmap, max_bits); | 5815 | (const unsigned long *)sregs->interrupt_bitmap, max_bits); |
5805 | if (pending_vec < max_bits) { | 5816 | if (pending_vec < max_bits) { |
@@ -5859,13 +5870,12 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
5859 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { | 5870 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { |
5860 | for (i = 0; i < KVM_NR_DB_REGS; ++i) | 5871 | for (i = 0; i < KVM_NR_DB_REGS; ++i) |
5861 | vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; | 5872 | vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; |
5862 | vcpu->arch.switch_db_regs = | 5873 | vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7]; |
5863 | (dbg->arch.debugreg[7] & DR7_BP_EN_MASK); | ||
5864 | } else { | 5874 | } else { |
5865 | for (i = 0; i < KVM_NR_DB_REGS; i++) | 5875 | for (i = 0; i < KVM_NR_DB_REGS; i++) |
5866 | vcpu->arch.eff_db[i] = vcpu->arch.db[i]; | 5876 | vcpu->arch.eff_db[i] = vcpu->arch.db[i]; |
5867 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); | ||
5868 | } | 5877 | } |
5878 | kvm_update_dr7(vcpu); | ||
5869 | 5879 | ||
5870 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | 5880 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) |
5871 | vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) + | 5881 | vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) + |
@@ -5877,7 +5887,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
5877 | */ | 5887 | */ |
5878 | kvm_set_rflags(vcpu, rflags); | 5888 | kvm_set_rflags(vcpu, rflags); |
5879 | 5889 | ||
5880 | kvm_x86_ops->set_guest_debug(vcpu, dbg); | 5890 | kvm_x86_ops->update_db_bp_intercept(vcpu); |
5881 | 5891 | ||
5882 | r = 0; | 5892 | r = 0; |
5883 | 5893 | ||
@@ -6023,7 +6033,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
6023 | int r; | 6033 | int r; |
6024 | 6034 | ||
6025 | vcpu->arch.mtrr_state.have_fixed = 1; | 6035 | vcpu->arch.mtrr_state.have_fixed = 1; |
6026 | vcpu_load(vcpu); | 6036 | r = vcpu_load(vcpu); |
6037 | if (r) | ||
6038 | return r; | ||
6027 | r = kvm_arch_vcpu_reset(vcpu); | 6039 | r = kvm_arch_vcpu_reset(vcpu); |
6028 | if (r == 0) | 6040 | if (r == 0) |
6029 | r = kvm_mmu_setup(vcpu); | 6041 | r = kvm_mmu_setup(vcpu); |
@@ -6034,9 +6046,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
6034 | 6046 | ||
6035 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | 6047 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) |
6036 | { | 6048 | { |
6049 | int r; | ||
6037 | vcpu->arch.apf.msr_val = 0; | 6050 | vcpu->arch.apf.msr_val = 0; |
6038 | 6051 | ||
6039 | vcpu_load(vcpu); | 6052 | r = vcpu_load(vcpu); |
6053 | BUG_ON(r); | ||
6040 | kvm_mmu_unload(vcpu); | 6054 | kvm_mmu_unload(vcpu); |
6041 | vcpu_put(vcpu); | 6055 | vcpu_put(vcpu); |
6042 | 6056 | ||
@@ -6050,10 +6064,10 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | |||
6050 | vcpu->arch.nmi_pending = 0; | 6064 | vcpu->arch.nmi_pending = 0; |
6051 | vcpu->arch.nmi_injected = false; | 6065 | vcpu->arch.nmi_injected = false; |
6052 | 6066 | ||
6053 | vcpu->arch.switch_db_regs = 0; | ||
6054 | memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); | 6067 | memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); |
6055 | vcpu->arch.dr6 = DR6_FIXED_1; | 6068 | vcpu->arch.dr6 = DR6_FIXED_1; |
6056 | vcpu->arch.dr7 = DR7_FIXED_1; | 6069 | vcpu->arch.dr7 = DR7_FIXED_1; |
6070 | kvm_update_dr7(vcpu); | ||
6057 | 6071 | ||
6058 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 6072 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
6059 | vcpu->arch.apf.msr_val = 0; | 6073 | vcpu->arch.apf.msr_val = 0; |
@@ -6132,7 +6146,7 @@ int kvm_arch_hardware_enable(void *garbage) | |||
6132 | * as we reset last_host_tsc on all VCPUs to stop this from being | 6146 | * as we reset last_host_tsc on all VCPUs to stop this from being |
6133 | * called multiple times (one for each physical CPU bringup). | 6147 | * called multiple times (one for each physical CPU bringup). |
6134 | * | 6148 | * |
6135 | * Platforms with unnreliable TSCs don't have to deal with this, they | 6149 | * Platforms with unreliable TSCs don't have to deal with this, they |
6136 | * will be compensated by the logic in vcpu_load, which sets the TSC to | 6150 | * will be compensated by the logic in vcpu_load, which sets the TSC to |
6137 | * catchup mode. This will catchup all VCPUs to real time, but cannot | 6151 | * catchup mode. This will catchup all VCPUs to real time, but cannot |
6138 | * guarantee that they stay in perfect synchronization. | 6152 | * guarantee that they stay in perfect synchronization. |
@@ -6185,6 +6199,8 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) | |||
6185 | return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); | 6199 | return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); |
6186 | } | 6200 | } |
6187 | 6201 | ||
6202 | struct static_key kvm_no_apic_vcpu __read_mostly; | ||
6203 | |||
6188 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | 6204 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) |
6189 | { | 6205 | { |
6190 | struct page *page; | 6206 | struct page *page; |
@@ -6217,7 +6233,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
6217 | r = kvm_create_lapic(vcpu); | 6233 | r = kvm_create_lapic(vcpu); |
6218 | if (r < 0) | 6234 | if (r < 0) |
6219 | goto fail_mmu_destroy; | 6235 | goto fail_mmu_destroy; |
6220 | } | 6236 | } else |
6237 | static_key_slow_inc(&kvm_no_apic_vcpu); | ||
6221 | 6238 | ||
6222 | vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, | 6239 | vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, |
6223 | GFP_KERNEL); | 6240 | GFP_KERNEL); |
@@ -6257,6 +6274,8 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | |||
6257 | kvm_mmu_destroy(vcpu); | 6274 | kvm_mmu_destroy(vcpu); |
6258 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | 6275 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
6259 | free_page((unsigned long)vcpu->arch.pio_data); | 6276 | free_page((unsigned long)vcpu->arch.pio_data); |
6277 | if (!irqchip_in_kernel(vcpu->kvm)) | ||
6278 | static_key_slow_dec(&kvm_no_apic_vcpu); | ||
6260 | } | 6279 | } |
6261 | 6280 | ||
6262 | int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | 6281 | int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) |
@@ -6269,15 +6288,21 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
6269 | 6288 | ||
6270 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ | 6289 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ |
6271 | set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); | 6290 | set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); |
6291 | /* Reserve bit 1 of irq_sources_bitmap for irqfd-resampler */ | ||
6292 | set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, | ||
6293 | &kvm->arch.irq_sources_bitmap); | ||
6272 | 6294 | ||
6273 | raw_spin_lock_init(&kvm->arch.tsc_write_lock); | 6295 | raw_spin_lock_init(&kvm->arch.tsc_write_lock); |
6296 | mutex_init(&kvm->arch.apic_map_lock); | ||
6274 | 6297 | ||
6275 | return 0; | 6298 | return 0; |
6276 | } | 6299 | } |
6277 | 6300 | ||
6278 | static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) | 6301 | static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) |
6279 | { | 6302 | { |
6280 | vcpu_load(vcpu); | 6303 | int r; |
6304 | r = vcpu_load(vcpu); | ||
6305 | BUG_ON(r); | ||
6281 | kvm_mmu_unload(vcpu); | 6306 | kvm_mmu_unload(vcpu); |
6282 | vcpu_put(vcpu); | 6307 | vcpu_put(vcpu); |
6283 | } | 6308 | } |
@@ -6321,6 +6346,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
6321 | put_page(kvm->arch.apic_access_page); | 6346 | put_page(kvm->arch.apic_access_page); |
6322 | if (kvm->arch.ept_identity_pagetable) | 6347 | if (kvm->arch.ept_identity_pagetable) |
6323 | put_page(kvm->arch.ept_identity_pagetable); | 6348 | put_page(kvm->arch.ept_identity_pagetable); |
6349 | kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); | ||
6324 | } | 6350 | } |
6325 | 6351 | ||
6326 | void kvm_arch_free_memslot(struct kvm_memory_slot *free, | 6352 | void kvm_arch_free_memslot(struct kvm_memory_slot *free, |
@@ -6328,10 +6354,18 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free, | |||
6328 | { | 6354 | { |
6329 | int i; | 6355 | int i; |
6330 | 6356 | ||
6331 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | 6357 | for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { |
6332 | if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) { | 6358 | if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) { |
6333 | kvm_kvfree(free->arch.lpage_info[i]); | 6359 | kvm_kvfree(free->arch.rmap[i]); |
6334 | free->arch.lpage_info[i] = NULL; | 6360 | free->arch.rmap[i] = NULL; |
6361 | } | ||
6362 | if (i == 0) | ||
6363 | continue; | ||
6364 | |||
6365 | if (!dont || free->arch.lpage_info[i - 1] != | ||
6366 | dont->arch.lpage_info[i - 1]) { | ||
6367 | kvm_kvfree(free->arch.lpage_info[i - 1]); | ||
6368 | free->arch.lpage_info[i - 1] = NULL; | ||
6335 | } | 6369 | } |
6336 | } | 6370 | } |
6337 | } | 6371 | } |
@@ -6340,23 +6374,30 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | |||
6340 | { | 6374 | { |
6341 | int i; | 6375 | int i; |
6342 | 6376 | ||
6343 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | 6377 | for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { |
6344 | unsigned long ugfn; | 6378 | unsigned long ugfn; |
6345 | int lpages; | 6379 | int lpages; |
6346 | int level = i + 2; | 6380 | int level = i + 1; |
6347 | 6381 | ||
6348 | lpages = gfn_to_index(slot->base_gfn + npages - 1, | 6382 | lpages = gfn_to_index(slot->base_gfn + npages - 1, |
6349 | slot->base_gfn, level) + 1; | 6383 | slot->base_gfn, level) + 1; |
6350 | 6384 | ||
6351 | slot->arch.lpage_info[i] = | 6385 | slot->arch.rmap[i] = |
6352 | kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i])); | 6386 | kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i])); |
6353 | if (!slot->arch.lpage_info[i]) | 6387 | if (!slot->arch.rmap[i]) |
6388 | goto out_free; | ||
6389 | if (i == 0) | ||
6390 | continue; | ||
6391 | |||
6392 | slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages * | ||
6393 | sizeof(*slot->arch.lpage_info[i - 1])); | ||
6394 | if (!slot->arch.lpage_info[i - 1]) | ||
6354 | goto out_free; | 6395 | goto out_free; |
6355 | 6396 | ||
6356 | if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) | 6397 | if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) |
6357 | slot->arch.lpage_info[i][0].write_count = 1; | 6398 | slot->arch.lpage_info[i - 1][0].write_count = 1; |
6358 | if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) | 6399 | if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) |
6359 | slot->arch.lpage_info[i][lpages - 1].write_count = 1; | 6400 | slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1; |
6360 | ugfn = slot->userspace_addr >> PAGE_SHIFT; | 6401 | ugfn = slot->userspace_addr >> PAGE_SHIFT; |
6361 | /* | 6402 | /* |
6362 | * If the gfn and userspace address are not aligned wrt each | 6403 | * If the gfn and userspace address are not aligned wrt each |
@@ -6368,16 +6409,21 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | |||
6368 | unsigned long j; | 6409 | unsigned long j; |
6369 | 6410 | ||
6370 | for (j = 0; j < lpages; ++j) | 6411 | for (j = 0; j < lpages; ++j) |
6371 | slot->arch.lpage_info[i][j].write_count = 1; | 6412 | slot->arch.lpage_info[i - 1][j].write_count = 1; |
6372 | } | 6413 | } |
6373 | } | 6414 | } |
6374 | 6415 | ||
6375 | return 0; | 6416 | return 0; |
6376 | 6417 | ||
6377 | out_free: | 6418 | out_free: |
6378 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | 6419 | for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { |
6379 | kvm_kvfree(slot->arch.lpage_info[i]); | 6420 | kvm_kvfree(slot->arch.rmap[i]); |
6380 | slot->arch.lpage_info[i] = NULL; | 6421 | slot->arch.rmap[i] = NULL; |
6422 | if (i == 0) | ||
6423 | continue; | ||
6424 | |||
6425 | kvm_kvfree(slot->arch.lpage_info[i - 1]); | ||
6426 | slot->arch.lpage_info[i - 1] = NULL; | ||
6381 | } | 6427 | } |
6382 | return -ENOMEM; | 6428 | return -ENOMEM; |
6383 | } | 6429 | } |
@@ -6396,10 +6442,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
6396 | map_flags = MAP_SHARED | MAP_ANONYMOUS; | 6442 | map_flags = MAP_SHARED | MAP_ANONYMOUS; |
6397 | 6443 | ||
6398 | /*To keep backward compatibility with older userspace, | 6444 | /*To keep backward compatibility with older userspace, |
6399 | *x86 needs to hanlde !user_alloc case. | 6445 | *x86 needs to handle !user_alloc case. |
6400 | */ | 6446 | */ |
6401 | if (!user_alloc) { | 6447 | if (!user_alloc) { |
6402 | if (npages && !old.rmap) { | 6448 | if (npages && !old.npages) { |
6403 | unsigned long userspace_addr; | 6449 | unsigned long userspace_addr; |
6404 | 6450 | ||
6405 | userspace_addr = vm_mmap(NULL, 0, | 6451 | userspace_addr = vm_mmap(NULL, 0, |
@@ -6427,7 +6473,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
6427 | 6473 | ||
6428 | int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; | 6474 | int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; |
6429 | 6475 | ||
6430 | if (!user_alloc && !old.user_alloc && old.rmap && !npages) { | 6476 | if (!user_alloc && !old.user_alloc && old.npages && !npages) { |
6431 | int ret; | 6477 | int ret; |
6432 | 6478 | ||
6433 | ret = vm_munmap(old.userspace_addr, | 6479 | ret = vm_munmap(old.userspace_addr, |
@@ -6446,14 +6492,28 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
6446 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); | 6492 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); |
6447 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 6493 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); |
6448 | spin_unlock(&kvm->mmu_lock); | 6494 | spin_unlock(&kvm->mmu_lock); |
6495 | /* | ||
6496 | * If memory slot is created, or moved, we need to clear all | ||
6497 | * mmio sptes. | ||
6498 | */ | ||
6499 | if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) { | ||
6500 | kvm_mmu_zap_all(kvm); | ||
6501 | kvm_reload_remote_mmus(kvm); | ||
6502 | } | ||
6449 | } | 6503 | } |
6450 | 6504 | ||
6451 | void kvm_arch_flush_shadow(struct kvm *kvm) | 6505 | void kvm_arch_flush_shadow_all(struct kvm *kvm) |
6452 | { | 6506 | { |
6453 | kvm_mmu_zap_all(kvm); | 6507 | kvm_mmu_zap_all(kvm); |
6454 | kvm_reload_remote_mmus(kvm); | 6508 | kvm_reload_remote_mmus(kvm); |
6455 | } | 6509 | } |
6456 | 6510 | ||
6511 | void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | ||
6512 | struct kvm_memory_slot *slot) | ||
6513 | { | ||
6514 | kvm_arch_flush_shadow_all(kvm); | ||
6515 | } | ||
6516 | |||
6457 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | 6517 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) |
6458 | { | 6518 | { |
6459 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && | 6519 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 3d1134ddb885..2b5219c12ac8 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -124,4 +124,5 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, | |||
124 | 124 | ||
125 | extern u64 host_xcr0; | 125 | extern u64 host_xcr0; |
126 | 126 | ||
127 | extern struct static_key kvm_no_apic_vcpu; | ||
127 | #endif | 128 | #endif |