diff options
author | Avi Kivity <avi@qumranet.com> | 2008-05-13 06:23:38 -0400 |
---|---|---|
committer | Avi Kivity <avi@qumranet.com> | 2008-07-20 05:41:43 -0400 |
commit | 4ecac3fd6dc2629ad76a658a486f081c44aef10e (patch) | |
tree | a078acb4c626432ac8cf887b911d912f4a316d06 | |
parent | 1b7fcd3263e5f12dba43d27b64e1578bec070c28 (diff) |
KVM: Handle virtualization instruction #UD faults during reboot
KVM turns off hardware virtualization extensions during reboot, in order
to disassociate the memory used by the virtualization extensions from the
processor, and in order to have the system in a consistent state.
Unfortunately virtual machines may still be running while this goes on,
and once virtualization extensions are turned off, any virtulization
instruction will #UD on execution.
Fix by adding an exception handler to virtualization instructions; if we get
an exception during reboot, we simply spin waiting for the reset to complete.
If it's a true exception, BUG() so we can have our stack trace.
Signed-off-by: Avi Kivity <avi@qumranet.com>
-rw-r--r-- | arch/x86/kvm/svm.c | 20 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 25 | ||||
-rw-r--r-- | include/asm-x86/kvm_host.h | 24 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 15 |
4 files changed, 64 insertions, 20 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 992ab7115871..9390a31c06f4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -27,6 +27,8 @@ | |||
27 | 27 | ||
28 | #include <asm/desc.h> | 28 | #include <asm/desc.h> |
29 | 29 | ||
30 | #define __ex(x) __kvm_handle_fault_on_reboot(x) | ||
31 | |||
30 | MODULE_AUTHOR("Qumranet"); | 32 | MODULE_AUTHOR("Qumranet"); |
31 | MODULE_LICENSE("GPL"); | 33 | MODULE_LICENSE("GPL"); |
32 | 34 | ||
@@ -129,17 +131,17 @@ static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq) | |||
129 | 131 | ||
130 | static inline void clgi(void) | 132 | static inline void clgi(void) |
131 | { | 133 | { |
132 | asm volatile (SVM_CLGI); | 134 | asm volatile (__ex(SVM_CLGI)); |
133 | } | 135 | } |
134 | 136 | ||
135 | static inline void stgi(void) | 137 | static inline void stgi(void) |
136 | { | 138 | { |
137 | asm volatile (SVM_STGI); | 139 | asm volatile (__ex(SVM_STGI)); |
138 | } | 140 | } |
139 | 141 | ||
140 | static inline void invlpga(unsigned long addr, u32 asid) | 142 | static inline void invlpga(unsigned long addr, u32 asid) |
141 | { | 143 | { |
142 | asm volatile (SVM_INVLPGA :: "a"(addr), "c"(asid)); | 144 | asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid)); |
143 | } | 145 | } |
144 | 146 | ||
145 | static inline unsigned long kvm_read_cr2(void) | 147 | static inline unsigned long kvm_read_cr2(void) |
@@ -1758,17 +1760,17 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1758 | /* Enter guest mode */ | 1760 | /* Enter guest mode */ |
1759 | "push %%rax \n\t" | 1761 | "push %%rax \n\t" |
1760 | "mov %c[vmcb](%[svm]), %%rax \n\t" | 1762 | "mov %c[vmcb](%[svm]), %%rax \n\t" |
1761 | SVM_VMLOAD "\n\t" | 1763 | __ex(SVM_VMLOAD) "\n\t" |
1762 | SVM_VMRUN "\n\t" | 1764 | __ex(SVM_VMRUN) "\n\t" |
1763 | SVM_VMSAVE "\n\t" | 1765 | __ex(SVM_VMSAVE) "\n\t" |
1764 | "pop %%rax \n\t" | 1766 | "pop %%rax \n\t" |
1765 | #else | 1767 | #else |
1766 | /* Enter guest mode */ | 1768 | /* Enter guest mode */ |
1767 | "push %%eax \n\t" | 1769 | "push %%eax \n\t" |
1768 | "mov %c[vmcb](%[svm]), %%eax \n\t" | 1770 | "mov %c[vmcb](%[svm]), %%eax \n\t" |
1769 | SVM_VMLOAD "\n\t" | 1771 | __ex(SVM_VMLOAD) "\n\t" |
1770 | SVM_VMRUN "\n\t" | 1772 | __ex(SVM_VMRUN) "\n\t" |
1771 | SVM_VMSAVE "\n\t" | 1773 | __ex(SVM_VMSAVE) "\n\t" |
1772 | "pop %%eax \n\t" | 1774 | "pop %%eax \n\t" |
1773 | #endif | 1775 | #endif |
1774 | 1776 | ||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fff8e23433d6..b80b4d141637 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -30,6 +30,8 @@ | |||
30 | #include <asm/io.h> | 30 | #include <asm/io.h> |
31 | #include <asm/desc.h> | 31 | #include <asm/desc.h> |
32 | 32 | ||
33 | #define __ex(x) __kvm_handle_fault_on_reboot(x) | ||
34 | |||
33 | MODULE_AUTHOR("Qumranet"); | 35 | MODULE_AUTHOR("Qumranet"); |
34 | MODULE_LICENSE("GPL"); | 36 | MODULE_LICENSE("GPL"); |
35 | 37 | ||
@@ -278,7 +280,7 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva) | |||
278 | u64 gva; | 280 | u64 gva; |
279 | } operand = { vpid, 0, gva }; | 281 | } operand = { vpid, 0, gva }; |
280 | 282 | ||
281 | asm volatile (ASM_VMX_INVVPID | 283 | asm volatile (__ex(ASM_VMX_INVVPID) |
282 | /* CF==1 or ZF==1 --> rc = -1 */ | 284 | /* CF==1 or ZF==1 --> rc = -1 */ |
283 | "; ja 1f ; ud2 ; 1:" | 285 | "; ja 1f ; ud2 ; 1:" |
284 | : : "a"(&operand), "c"(ext) : "cc", "memory"); | 286 | : : "a"(&operand), "c"(ext) : "cc", "memory"); |
@@ -290,7 +292,7 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa) | |||
290 | u64 eptp, gpa; | 292 | u64 eptp, gpa; |
291 | } operand = {eptp, gpa}; | 293 | } operand = {eptp, gpa}; |
292 | 294 | ||
293 | asm volatile (ASM_VMX_INVEPT | 295 | asm volatile (__ex(ASM_VMX_INVEPT) |
294 | /* CF==1 or ZF==1 --> rc = -1 */ | 296 | /* CF==1 or ZF==1 --> rc = -1 */ |
295 | "; ja 1f ; ud2 ; 1:\n" | 297 | "; ja 1f ; ud2 ; 1:\n" |
296 | : : "a" (&operand), "c" (ext) : "cc", "memory"); | 298 | : : "a" (&operand), "c" (ext) : "cc", "memory"); |
@@ -311,7 +313,7 @@ static void vmcs_clear(struct vmcs *vmcs) | |||
311 | u64 phys_addr = __pa(vmcs); | 313 | u64 phys_addr = __pa(vmcs); |
312 | u8 error; | 314 | u8 error; |
313 | 315 | ||
314 | asm volatile (ASM_VMX_VMCLEAR_RAX "; setna %0" | 316 | asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0" |
315 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) | 317 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) |
316 | : "cc", "memory"); | 318 | : "cc", "memory"); |
317 | if (error) | 319 | if (error) |
@@ -378,7 +380,7 @@ static unsigned long vmcs_readl(unsigned long field) | |||
378 | { | 380 | { |
379 | unsigned long value; | 381 | unsigned long value; |
380 | 382 | ||
381 | asm volatile (ASM_VMX_VMREAD_RDX_RAX | 383 | asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX) |
382 | : "=a"(value) : "d"(field) : "cc"); | 384 | : "=a"(value) : "d"(field) : "cc"); |
383 | return value; | 385 | return value; |
384 | } | 386 | } |
@@ -413,7 +415,7 @@ static void vmcs_writel(unsigned long field, unsigned long value) | |||
413 | { | 415 | { |
414 | u8 error; | 416 | u8 error; |
415 | 417 | ||
416 | asm volatile (ASM_VMX_VMWRITE_RAX_RDX "; setna %0" | 418 | asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) "; setna %0" |
417 | : "=q"(error) : "a"(value), "d"(field) : "cc"); | 419 | : "=q"(error) : "a"(value), "d"(field) : "cc"); |
418 | if (unlikely(error)) | 420 | if (unlikely(error)) |
419 | vmwrite_error(field, value); | 421 | vmwrite_error(field, value); |
@@ -621,7 +623,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
621 | u8 error; | 623 | u8 error; |
622 | 624 | ||
623 | per_cpu(current_vmcs, cpu) = vmx->vmcs; | 625 | per_cpu(current_vmcs, cpu) = vmx->vmcs; |
624 | asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0" | 626 | asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" |
625 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) | 627 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) |
626 | : "cc"); | 628 | : "cc"); |
627 | if (error) | 629 | if (error) |
@@ -1030,13 +1032,14 @@ static void hardware_enable(void *garbage) | |||
1030 | MSR_IA32_FEATURE_CONTROL_LOCKED | | 1032 | MSR_IA32_FEATURE_CONTROL_LOCKED | |
1031 | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED); | 1033 | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED); |
1032 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ | 1034 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ |
1033 | asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr) | 1035 | asm volatile (ASM_VMX_VMXON_RAX |
1036 | : : "a"(&phys_addr), "m"(phys_addr) | ||
1034 | : "memory", "cc"); | 1037 | : "memory", "cc"); |
1035 | } | 1038 | } |
1036 | 1039 | ||
1037 | static void hardware_disable(void *garbage) | 1040 | static void hardware_disable(void *garbage) |
1038 | { | 1041 | { |
1039 | asm volatile (ASM_VMX_VMXOFF : : : "cc"); | 1042 | asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); |
1040 | write_cr4(read_cr4() & ~X86_CR4_VMXE); | 1043 | write_cr4(read_cr4() & ~X86_CR4_VMXE); |
1041 | } | 1044 | } |
1042 | 1045 | ||
@@ -2834,7 +2837,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2834 | "push %%edx; push %%ebp;" | 2837 | "push %%edx; push %%ebp;" |
2835 | "push %%ecx \n\t" | 2838 | "push %%ecx \n\t" |
2836 | #endif | 2839 | #endif |
2837 | ASM_VMX_VMWRITE_RSP_RDX "\n\t" | 2840 | __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" |
2838 | /* Check if vmlaunch of vmresume is needed */ | 2841 | /* Check if vmlaunch of vmresume is needed */ |
2839 | "cmpl $0, %c[launched](%0) \n\t" | 2842 | "cmpl $0, %c[launched](%0) \n\t" |
2840 | /* Load guest registers. Don't clobber flags. */ | 2843 | /* Load guest registers. Don't clobber flags. */ |
@@ -2869,9 +2872,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2869 | #endif | 2872 | #endif |
2870 | /* Enter guest mode */ | 2873 | /* Enter guest mode */ |
2871 | "jne .Llaunched \n\t" | 2874 | "jne .Llaunched \n\t" |
2872 | ASM_VMX_VMLAUNCH "\n\t" | 2875 | __ex(ASM_VMX_VMLAUNCH) "\n\t" |
2873 | "jmp .Lkvm_vmx_return \n\t" | 2876 | "jmp .Lkvm_vmx_return \n\t" |
2874 | ".Llaunched: " ASM_VMX_VMRESUME "\n\t" | 2877 | ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" |
2875 | ".Lkvm_vmx_return: " | 2878 | ".Lkvm_vmx_return: " |
2876 | /* Save guest registers, load host registers, keep flags */ | 2879 | /* Save guest registers, load host registers, keep flags */ |
2877 | #ifdef CONFIG_X86_64 | 2880 | #ifdef CONFIG_X86_64 |
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index c2d066e185f4..0df9d5fa281a 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h | |||
@@ -692,4 +692,28 @@ enum { | |||
692 | trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ | 692 | trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ |
693 | vcpu, 0, 0, 0, 0, 0, 0) | 693 | vcpu, 0, 0, 0, 0, 0, 0) |
694 | 694 | ||
695 | #ifdef CONFIG_64BIT | ||
696 | #define KVM_EX_ENTRY ".quad" | ||
697 | #else | ||
698 | #define KVM_EX_ENTRY ".long" | ||
699 | #endif | ||
700 | |||
701 | /* | ||
702 | * Hardware virtualization extension instructions may fault if a | ||
703 | * reboot turns off virtualization while processes are running. | ||
704 | * Trap the fault and ignore the instruction if that happens. | ||
705 | */ | ||
706 | asmlinkage void kvm_handle_fault_on_reboot(void); | ||
707 | |||
708 | #define __kvm_handle_fault_on_reboot(insn) \ | ||
709 | "666: " insn "\n\t" \ | ||
710 | ".pushsection .text.fixup, \"ax\" \n" \ | ||
711 | "667: \n\t" \ | ||
712 | "push $666b \n\t" \ | ||
713 | "jmp kvm_handle_fault_on_reboot \n\t" \ | ||
714 | ".popsection \n\t" \ | ||
715 | ".pushsection __ex_table, \"a\" \n\t" \ | ||
716 | KVM_EX_ENTRY " 666b, 667b \n\t" \ | ||
717 | ".popsection" | ||
718 | |||
695 | #endif | 719 | #endif |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f9dd20606c40..e4bf88a9ee4e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -65,6 +65,8 @@ struct dentry *kvm_debugfs_dir; | |||
65 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, | 65 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, |
66 | unsigned long arg); | 66 | unsigned long arg); |
67 | 67 | ||
68 | bool kvm_rebooting; | ||
69 | |||
68 | static inline int valid_vcpu(int n) | 70 | static inline int valid_vcpu(int n) |
69 | { | 71 | { |
70 | return likely(n >= 0 && n < KVM_MAX_VCPUS); | 72 | return likely(n >= 0 && n < KVM_MAX_VCPUS); |
@@ -1301,6 +1303,18 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | |||
1301 | return NOTIFY_OK; | 1303 | return NOTIFY_OK; |
1302 | } | 1304 | } |
1303 | 1305 | ||
1306 | |||
1307 | asmlinkage void kvm_handle_fault_on_reboot(void) | ||
1308 | { | ||
1309 | if (kvm_rebooting) | ||
1310 | /* spin while reset goes on */ | ||
1311 | while (true) | ||
1312 | ; | ||
1313 | /* Fault while not rebooting. We want the trace. */ | ||
1314 | BUG(); | ||
1315 | } | ||
1316 | EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot); | ||
1317 | |||
1304 | static int kvm_reboot(struct notifier_block *notifier, unsigned long val, | 1318 | static int kvm_reboot(struct notifier_block *notifier, unsigned long val, |
1305 | void *v) | 1319 | void *v) |
1306 | { | 1320 | { |
@@ -1310,6 +1324,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val, | |||
1310 | * in vmx root mode. | 1324 | * in vmx root mode. |
1311 | */ | 1325 | */ |
1312 | printk(KERN_INFO "kvm: exiting hardware virtualization\n"); | 1326 | printk(KERN_INFO "kvm: exiting hardware virtualization\n"); |
1327 | kvm_rebooting = true; | ||
1313 | on_each_cpu(hardware_disable, NULL, 1); | 1328 | on_each_cpu(hardware_disable, NULL, 1); |
1314 | } | 1329 | } |
1315 | return NOTIFY_OK; | 1330 | return NOTIFY_OK; |