From 851ba6922ac575b749f63dee0ae072808163ba6a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 24 Aug 2009 11:10:17 +0300 Subject: KVM: Don't pass kvm_run arguments They're just copies of vcpu->run, which is readily accessible. Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 102 ++++++++++++++++++++++++++++------------------------- 1 file changed, 54 insertions(+), 48 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c17404add91f..92048a626d4e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -286,7 +286,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); if (!svm->next_rip) { - if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) != + if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != EMULATE_DONE) printk(KERN_DEBUG "%s: NOP\n", __func__); return; @@ -1180,7 +1180,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, } } -static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int pf_interception(struct vcpu_svm *svm) { u64 fault_address; u32 error_code; @@ -1194,8 +1194,10 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); } -static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int db_interception(struct vcpu_svm *svm) { + struct kvm_run *kvm_run = svm->vcpu.run; + if (!(svm->vcpu.guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && !svm->vcpu.arch.singlestep) { @@ -1223,25 +1225,27 @@ static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int bp_interception(struct vcpu_svm *svm) { + struct kvm_run *kvm_run = svm->vcpu.run; + kvm_run->exit_reason = KVM_EXIT_DEBUG; kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; kvm_run->debug.arch.exception = BP_VECTOR; return 0; } -static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int ud_interception(struct vcpu_svm *svm) { int er; - er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD); + er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD); if (er != EMULATE_DONE) kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; } -static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int nm_interception(struct vcpu_svm *svm) { svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); if (!(svm->vcpu.arch.cr0 & X86_CR0_TS)) @@ -1251,7 +1255,7 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int mc_interception(struct vcpu_svm *svm) { /* * On an #MC intercept the MCE handler is not called automatically in @@ -1264,8 +1268,10 @@ static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int shutdown_interception(struct vcpu_svm *svm) { + struct kvm_run *kvm_run = svm->vcpu.run; + /* * VMCB is undefined after a SHUTDOWN intercept * so reinitialize it. @@ -1277,7 +1283,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 0; } -static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int io_interception(struct vcpu_svm *svm) { u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ int size, in, string; @@ -1291,7 +1297,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) if (string) { if (emulate_instruction(&svm->vcpu, - kvm_run, 0, 0, 0) == EMULATE_DO_MMIO) + 0, 0, 0) == EMULATE_DO_MMIO) return 0; return 1; } @@ -1301,33 +1307,33 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; skip_emulated_instruction(&svm->vcpu); - return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); + return kvm_emulate_pio(&svm->vcpu, in, size, port); } -static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int nmi_interception(struct vcpu_svm *svm) { return 1; } -static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int intr_interception(struct vcpu_svm *svm) { ++svm->vcpu.stat.irq_exits; return 1; } -static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int nop_on_interception(struct vcpu_svm *svm) { return 1; } -static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int halt_interception(struct vcpu_svm *svm) { svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; skip_emulated_instruction(&svm->vcpu); return kvm_emulate_halt(&svm->vcpu); } -static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int vmmcall_interception(struct vcpu_svm *svm) { svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; skip_emulated_instruction(&svm->vcpu); @@ -1837,7 +1843,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; } -static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int vmload_interception(struct vcpu_svm *svm) { struct vmcb *nested_vmcb; @@ -1857,7 +1863,7 @@ static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int vmsave_interception(struct vcpu_svm *svm) { struct vmcb *nested_vmcb; @@ -1877,7 +1883,7 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int vmrun_interception(struct vcpu_svm *svm) { nsvm_printk("VMrun\n"); @@ -1907,7 +1913,7 @@ failed: return 1; } -static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int stgi_interception(struct vcpu_svm *svm) { if (nested_svm_check_permissions(svm)) return 1; @@ -1920,7 +1926,7 @@ static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int clgi_interception(struct vcpu_svm *svm) { if (nested_svm_check_permissions(svm)) return 1; @@ -1937,7 +1943,7 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int invlpga_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; nsvm_printk("INVLPGA\n"); @@ -1950,15 +1956,13 @@ static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int invalid_op_interception(struct vcpu_svm *svm, - struct kvm_run *kvm_run) +static int invalid_op_interception(struct vcpu_svm *svm) { kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; } -static int task_switch_interception(struct vcpu_svm *svm, - struct kvm_run *kvm_run) +static int task_switch_interception(struct vcpu_svm *svm) { u16 tss_selector; int reason; @@ -2008,14 +2012,14 @@ static int task_switch_interception(struct vcpu_svm *svm, return kvm_task_switch(&svm->vcpu, tss_selector, reason); } -static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int cpuid_interception(struct vcpu_svm *svm) { svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; kvm_emulate_cpuid(&svm->vcpu); return 1; } -static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int iret_interception(struct vcpu_svm *svm) { ++svm->vcpu.stat.nmi_window_exits; svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); @@ -2023,26 +2027,27 @@ static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int invlpg_interception(struct vcpu_svm *svm) { - if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE) + if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); return 1; } -static int emulate_on_interception(struct vcpu_svm *svm, - struct kvm_run *kvm_run) +static int emulate_on_interception(struct vcpu_svm *svm) { - if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE) + if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); return 1; } -static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int cr8_write_interception(struct vcpu_svm *svm) { + struct kvm_run *kvm_run = svm->vcpu.run; + u8 cr8_prev = kvm_get_cr8(&svm->vcpu); /* instruction emulation calls kvm_set_cr8() */ - emulate_instruction(&svm->vcpu, NULL, 0, 0, 0); + emulate_instruction(&svm->vcpu, 0, 0, 0); if (irqchip_in_kernel(svm->vcpu.kvm)) { svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; return 1; @@ -2128,7 +2133,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) return 0; } -static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int rdmsr_interception(struct vcpu_svm *svm) { u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; u64 data; @@ -2221,7 +2226,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) return 0; } -static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int wrmsr_interception(struct vcpu_svm *svm) { u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) @@ -2237,17 +2242,18 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int msr_interception(struct vcpu_svm *svm) { if (svm->vmcb->control.exit_info_1) - return wrmsr_interception(svm, kvm_run); + return wrmsr_interception(svm); else - return rdmsr_interception(svm, kvm_run); + return rdmsr_interception(svm); } -static int interrupt_window_interception(struct vcpu_svm *svm, - struct kvm_run *kvm_run) +static int interrupt_window_interception(struct vcpu_svm *svm) { + struct kvm_run *kvm_run = svm->vcpu.run; + svm_clear_vintr(svm); svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; /* @@ -2265,8 +2271,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm, return 1; } -static int (*svm_exit_handlers[])(struct vcpu_svm *svm, - struct kvm_run *kvm_run) = { +static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR0] = emulate_on_interception, [SVM_EXIT_READ_CR3] = emulate_on_interception, [SVM_EXIT_READ_CR4] = emulate_on_interception, @@ -2321,9 +2326,10 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, [SVM_EXIT_NPF] = pf_interception, }; -static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +static int handle_exit(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + struct kvm_run *kvm_run = vcpu->run; u32 exit_code = svm->vmcb->control.exit_code; trace_kvm_exit(exit_code, svm->vmcb->save.rip); @@ -2383,7 +2389,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) return 0; } - return svm_exit_handlers[exit_code](svm, kvm_run); + return svm_exit_handlers[exit_code](svm); } static void reload_tss(struct kvm_vcpu *vcpu) @@ -2588,7 +2594,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) #define R "e" #endif -static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static void svm_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); u16 fs_selector; -- cgit v1.2.2 From e8b3433a5c062e94e34cadb6144c10689a497bc3 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 8 Sep 2009 14:47:38 -0300 Subject: KVM: SVM: remove needless mmap_sem acquision from nested_svm_map nested_svm_map unnecessarily takes mmap_sem around gfn_to_page, since gfn_to_page / get_user_pages are responsible for it. Signed-off-by: Marcelo Tosatti Acked-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 92048a626d4e..f54c4f9d2865 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1396,10 +1396,7 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx) { struct page *page; - down_read(¤t->mm->mmap_sem); page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); - up_read(¤t->mm->mmap_sem); - if (is_error_page(page)) goto error; -- cgit v1.2.2 From 10474ae8945ce08622fd1f3464e55bd817bf2376 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 15 Sep 2009 11:37:46 +0200 Subject: KVM: Activate Virtualization On Demand X86 CPUs need to have some magic happening to enable the virtualization extensions on them. This magic can result in unpleasant results for users, like blocking other VMMs from working (vmx) or using invalid TLB entries (svm). Currently KVM activates virtualization when the respective kernel module is loaded. This blocks us from autoloading KVM modules without breaking other VMMs. To circumvent this problem at least a bit, this patch introduces on demand activation of virtualization. This means, that instead virtualization is enabled on creation of the first virtual machine and disabled on destruction of the last one. So using this, KVM can be easily autoloaded, while keeping other hypervisors usable. Signed-off-by: Alexander Graf Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f54c4f9d2865..59fe4d54da11 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -316,7 +316,7 @@ static void svm_hardware_disable(void *garbage) cpu_svm_disable(); } -static void svm_hardware_enable(void *garbage) +static int svm_hardware_enable(void *garbage) { struct svm_cpu_data *svm_data; @@ -325,16 +325,20 @@ static void svm_hardware_enable(void *garbage) struct desc_struct *gdt; int me = raw_smp_processor_id(); + rdmsrl(MSR_EFER, efer); + if (efer & EFER_SVME) + return -EBUSY; + if (!has_svm()) { printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me); - return; + return -EINVAL; } svm_data = per_cpu(svm_data, me); if (!svm_data) { printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n", me); - return; + return -EINVAL; } svm_data->asid_generation = 1; @@ -345,11 +349,12 @@ static void svm_hardware_enable(void *garbage) gdt = (struct desc_struct *)gdt_descr.base; svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); - rdmsrl(MSR_EFER, efer); wrmsrl(MSR_EFER, efer | EFER_SVME); wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(svm_data->save_area) << PAGE_SHIFT); + + return 0; } static void svm_cpu_uninit(int cpu) -- cgit v1.2.2 From 7fcdb5103d10d2eb75876637a2efa9679cce14d3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 16 Sep 2009 15:24:15 +0200 Subject: KVM: SVM: reorganize svm_interrupt_allowed This patch reorganizes the logic in svm_interrupt_allowed to make it better to read. This is important because the logic is a lot more complicated with Nested SVM. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 59fe4d54da11..3f3fe815c21b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2472,10 +2472,18 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; - return (vmcb->save.rflags & X86_EFLAGS_IF) && - !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && - gif_set(svm) && - !(is_nested(svm) && (svm->vcpu.arch.hflags & HF_VINTR_MASK)); + int ret; + + if (!gif_set(svm) || + (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) + return 0; + + ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); + + if (is_nested(svm)) + return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); + + return ret; } static void enable_irq_window(struct kvm_vcpu *vcpu) -- cgit v1.2.2 From 33527ad7e12a5cb50b39165945464600ab2f7632 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 16 Sep 2009 15:24:16 +0200 Subject: KVM: SVM: don't copy exit_int_info on nested vmrun The exit_int_info field is only written by the hardware and never read. So it does not need to be copied on a vmrun emulation. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 3f3fe815c21b..41c996ab87e9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1797,8 +1797,6 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) svm->nested.intercept = nested_vmcb->control.intercept; force_new_asid(&svm->vcpu); - svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info; - svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err; svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; if (nested_vmcb->control.int_ctl & V_IRQ_MASK) { nsvm_printk("nSVM Injecting Interrupt: 0x%x\n", -- cgit v1.2.2 From e935d48e1b49451490218e1181d9834176200955 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 16 Sep 2009 15:24:19 +0200 Subject: KVM: SVM: Remove remaining occurences of rdtscll This patch replaces them with native_read_tsc() which can also be used in expressions and saves a variable on the stack in this case. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 41c996ab87e9..9a4dacab6d8a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -763,14 +763,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) int i; if (unlikely(cpu != vcpu->cpu)) { - u64 tsc_this, delta; + u64 delta; /* * Make sure that the guest sees a monotonically * increasing TSC. */ - rdtscll(tsc_this); - delta = vcpu->arch.host_tsc - tsc_this; + delta = vcpu->arch.host_tsc - native_read_tsc(); svm->vmcb->control.tsc_offset += delta; if (is_nested(svm)) svm->nested.hsave->control.tsc_offset += delta; @@ -792,7 +791,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); - rdtscll(vcpu->arch.host_tsc); + vcpu->arch.host_tsc = native_read_tsc(); } static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) -- cgit v1.2.2 From e6732a5af9dfcc87078706a1598df0efe5010f73 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Tue, 29 Sep 2009 11:38:36 -1000 Subject: KVM: Fix printk name error in svm.c Signed-off-by: Zachary Amsden Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9a4dacab6d8a..d1036ce8917e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -330,13 +330,14 @@ static int svm_hardware_enable(void *garbage) return -EBUSY; if (!has_svm()) { - printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me); + printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n", + me); return -EINVAL; } svm_data = per_cpu(svm_data, me); if (!svm_data) { - printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n", + printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n", me); return -EINVAL; } -- cgit v1.2.2 From 3230bb4707278dba25e24cd0a11ea7b2337678ee Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Tue, 29 Sep 2009 11:38:37 -1000 Subject: KVM: Fix hotplug of CPUs Both VMX and SVM require per-cpu memory allocation, which is done at module init time, for only online cpus. Backend was not allocating enough structure for all possible CPUs, so new CPUs coming online could not be hardware enabled. Signed-off-by: Zachary Amsden Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d1036ce8917e..02a4269be645 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -482,7 +482,7 @@ static __init int svm_hardware_setup(void) kvm_enable_efer_bits(EFER_SVME); } - for_each_online_cpu(cpu) { + for_each_possible_cpu(cpu) { r = svm_cpu_init(cpu); if (r) goto err; @@ -516,7 +516,7 @@ static __exit void svm_hardware_unsetup(void) { int cpu; - for_each_online_cpu(cpu) + for_each_possible_cpu(cpu) svm_cpu_uninit(cpu); __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); -- cgit v1.2.2 From 355be0b9300579e02275d7d19374806a974ce622 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 3 Oct 2009 00:31:21 +0200 Subject: KVM: x86: Refactor guest debug IOCTL handling Much of so far vendor-specific code for setting up guest debug can actually be handled by the generic code. This also fixes a minor deficit in the SVM part /wrt processing KVM_GUESTDBG_ENABLE. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 02a4269be645..279a2ae21b4f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1065,26 +1065,16 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) vcpu->guest_debug = 0; } -static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) +static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - int old_debug = vcpu->guest_debug; struct vcpu_svm *svm = to_svm(vcpu); - vcpu->guest_debug = dbg->control; - - update_db_intercept(vcpu); - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; else svm->vmcb->save.dr7 = vcpu->arch.dr7; - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; - else if (old_debug & KVM_GUESTDBG_SINGLESTEP) - svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); - - return 0; + update_db_intercept(vcpu); } static void load_host_msrs(struct kvm_vcpu *vcpu) -- cgit v1.2.2 From 8d23c4662427507f432c96ac4fa3b76f0a8360cd Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 9 Oct 2009 16:08:25 +0200 Subject: KVM: SVM: Notify nested hypervisor of lost event injections If event_inj is valid on a #vmexit the host CPU would write the contents to exit_int_info, so the hypervisor knows that the event wasn't injected. We don't do this in nested SVM by now which is a bug and fixed by this patch. Signed-off-by: Alexander Graf Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 279a2ae21b4f..e37285446cb7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1615,6 +1615,22 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; + + /* + * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have + * to make sure that we do not lose injected events. So check event_inj + * here and copy it to exit_int_info if it is valid. + * Exit_int_info and event_inj can't be both valid because the case + * below only happens on a VMRUN instruction intercept which has + * no valid exit_int_info set. + */ + if (vmcb->control.event_inj & SVM_EVTINJ_VALID) { + struct vmcb_control_area *nc = &nested_vmcb->control; + + nc->exit_int_info = vmcb->control.event_inj; + nc->exit_int_info_err = vmcb->control.event_inj_err; + } + nested_vmcb->control.tlb_ctl = 0; nested_vmcb->control.event_inj = 0; nested_vmcb->control.event_inj_err = 0; -- cgit v1.2.2 From cd3ff653ae0b45bac7a19208e9c75034fcacc85f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:26 +0200 Subject: KVM: SVM: Move INTR vmexit out of atomic code The nested SVM code emulates a #vmexit caused by a request to open the irq window right in the request function. This is a bug because the request function runs with preemption and interrupts disabled but the #vmexit emulation might sleep. This can cause a schedule()-while-atomic bug and is fixed with this patch. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e37285446cb7..884bffc70c7f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -85,6 +85,9 @@ struct nested_state { /* gpa pointers to the real vectors */ u64 vmcb_msrpm; + /* A VMEXIT is required but not yet emulated */ + bool exit_required; + /* cache for intercepts of the guest */ u16 intercept_cr_read; u16 intercept_cr_write; @@ -1379,7 +1382,14 @@ static inline int nested_svm_intr(struct vcpu_svm *svm) svm->vmcb->control.exit_code = SVM_EXIT_INTR; - if (nested_svm_exit_handled(svm)) { + if (svm->nested.intercept & 1ULL) { + /* + * The #vmexit can't be emulated here directly because this + * code path runs with irqs and preemtion disabled. A + * #vmexit emulation might sleep. Only signal request for + * the #vmexit here. + */ + svm->nested.exit_required = true; nsvm_printk("VMexit -> INTR\n"); return 1; } @@ -2340,6 +2350,13 @@ static int handle_exit(struct kvm_vcpu *vcpu) trace_kvm_exit(exit_code, svm->vmcb->save.rip); + if (unlikely(svm->nested.exit_required)) { + nested_svm_vmexit(svm); + svm->nested.exit_required = false; + + return 1; + } + if (is_nested(svm)) { int vmexit; @@ -2615,6 +2632,13 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) u16 gs_selector; u16 ldt_selector; + /* + * A vmexit emulation is required before the vcpu can be executed + * again. + */ + if (unlikely(svm->nested.exit_required)) + return; + svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; -- cgit v1.2.2 From 0ac406de8f3780c8e0801d5719e1ec531d4a6ec4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:27 +0200 Subject: KVM: SVM: Add tracepoint for nested vmrun This patch adds a dedicated kvm tracepoint for a nested vmrun. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 884bffc70c7f..907af3f3a7af 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1726,6 +1726,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) /* nested_vmcb is our indicator if nested SVM is activated */ svm->nested.vmcb = svm->vmcb->save.rax; + trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb, + nested_vmcb->save.rip, + nested_vmcb->control.int_ctl, + nested_vmcb->control.event_inj, + nested_vmcb->control.nested_ctl); + /* Clear internal status */ kvm_clear_exception_queue(&svm->vcpu); kvm_clear_interrupt_queue(&svm->vcpu); -- cgit v1.2.2 From d8cabddf7e8fbdced2dd668c98d7762c7ef75245 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:28 +0200 Subject: KVM: SVM: Add tracepoint for nested #vmexit This patch adds a tracepoint for every #vmexit we get from a nested guest. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 907af3f3a7af..edf6e8b2b84e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2366,6 +2366,12 @@ static int handle_exit(struct kvm_vcpu *vcpu) if (is_nested(svm)) { int vmexit; + trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code, + svm->vmcb->control.exit_info_1, + svm->vmcb->control.exit_info_2, + svm->vmcb->control.exit_int_info, + svm->vmcb->control.exit_int_info_err); + nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n", exit_code, svm->vmcb->control.exit_info_1, svm->vmcb->control.exit_info_2, svm->vmcb->save.rip); -- cgit v1.2.2 From 17897f366847a9ef8a13e3671a0eb1c15422abed Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:29 +0200 Subject: KVM: SVM: Add tracepoint for injected #vmexit This patch adds a tracepoint for a nested #vmexit that gets re-injected to the guest. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index edf6e8b2b84e..369eeb86e87c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1592,6 +1592,12 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) struct vmcb *hsave = svm->nested.hsave; struct vmcb *vmcb = svm->vmcb; + trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, + vmcb->control.exit_info_1, + vmcb->control.exit_info_2, + vmcb->control.exit_int_info, + vmcb->control.exit_int_info_err); + nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); if (!nested_vmcb) return 1; -- cgit v1.2.2 From 236649de3360916ef85f95c82723af17a25b9179 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:30 +0200 Subject: KVM: SVM: Add tracepoint for #vmexit because intr pending This patch adds a special tracepoint for the event that a nested #vmexit is injected because kvm wants to inject an interrupt into the guest. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 369eeb86e87c..78a391c60a75 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1390,7 +1390,7 @@ static inline int nested_svm_intr(struct vcpu_svm *svm) * the #vmexit here. */ svm->nested.exit_required = true; - nsvm_printk("VMexit -> INTR\n"); + trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); return 1; } -- cgit v1.2.2 From ec1ff79084fccdae0dca9b04b89dcdf3235bbfa1 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:31 +0200 Subject: KVM: SVM: Add tracepoint for invlpga instruction This patch adds a tracepoint for the event that the guest executed the INVLPGA instruction. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 78a391c60a75..ba18fb7d3657 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1976,6 +1976,9 @@ static int invlpga_interception(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; nsvm_printk("INVLPGA\n"); + trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX], + vcpu->arch.regs[VCPU_REGS_RAX]); + /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]); -- cgit v1.2.2 From 532a46b98963f110e9425a251e127d6537915dde Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:32 +0200 Subject: KVM: SVM: Add tracepoint for skinit instruction This patch adds a tracepoint for the event that the guest executed the SKINIT instruction. This information is important because SKINIT is an SVM extenstion not yet implemented by nested SVM and we may need this information for debugging hypervisors that do not yet run on nested SVM. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ba18fb7d3657..8b9f6fbba48c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1987,6 +1987,14 @@ static int invlpga_interception(struct vcpu_svm *svm) return 1; } +static int skinit_interception(struct vcpu_svm *svm) +{ + trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]); + + kvm_queue_exception(&svm->vcpu, UD_VECTOR); + return 1; +} + static int invalid_op_interception(struct vcpu_svm *svm) { kvm_queue_exception(&svm->vcpu, UD_VECTOR); @@ -2350,7 +2358,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_VMSAVE] = vmsave_interception, [SVM_EXIT_STGI] = stgi_interception, [SVM_EXIT_CLGI] = clgi_interception, - [SVM_EXIT_SKINIT] = invalid_op_interception, + [SVM_EXIT_SKINIT] = skinit_interception, [SVM_EXIT_WBINVD] = emulate_on_interception, [SVM_EXIT_MONITOR] = invalid_op_interception, [SVM_EXIT_MWAIT] = invalid_op_interception, -- cgit v1.2.2 From d36f19e9ecd22dc035ef4cc6361b564be650f8e7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:33 +0200 Subject: KVM: SVM: Remove nsvm_printk debugging code With all important informations now delivered through tracepoints we can savely remove the nsvm_printk debugging code for nested svm. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 34 ---------------------------------- 1 file changed, 34 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8b9f6fbba48c..69610c5d6dea 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -53,15 +53,6 @@ MODULE_LICENSE("GPL"); #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) -/* Turn on to get debugging output*/ -/* #define NESTED_DEBUG */ - -#ifdef NESTED_DEBUG -#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args) -#else -#define nsvm_printk(fmt, args...) do {} while(0) -#endif - static const u32 host_save_user_msrs[] = { #ifdef CONFIG_X86_64 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, @@ -1540,14 +1531,12 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) } default: { u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); - nsvm_printk("exit code: 0x%x\n", exit_code); if (svm->nested.intercept & exit_bits) vmexit = NESTED_EXIT_DONE; } } if (vmexit == NESTED_EXIT_DONE) { - nsvm_printk("#VMEXIT reason=%04x\n", exit_code); nested_svm_vmexit(svm); } @@ -1658,10 +1647,6 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) /* Restore the original control entries */ copy_vmcb_control_area(vmcb, hsave); - /* Kill any pending exceptions */ - if (svm->vcpu.arch.exception.pending == true) - nsvm_printk("WARNING: Pending Exception\n"); - kvm_clear_exception_queue(&svm->vcpu); kvm_clear_interrupt_queue(&svm->vcpu); @@ -1826,25 +1811,14 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) force_new_asid(&svm->vcpu); svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; - if (nested_vmcb->control.int_ctl & V_IRQ_MASK) { - nsvm_printk("nSVM Injecting Interrupt: 0x%x\n", - nested_vmcb->control.int_ctl); - } if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) svm->vcpu.arch.hflags |= HF_VINTR_MASK; else svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; - nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n", - nested_vmcb->control.exit_int_info, - nested_vmcb->control.int_state); - svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; svm->vmcb->control.int_state = nested_vmcb->control.int_state; svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; - if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID) - nsvm_printk("Injecting Event: 0x%x\n", - nested_vmcb->control.event_inj); svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; @@ -1913,8 +1887,6 @@ static int vmsave_interception(struct vcpu_svm *svm) static int vmrun_interception(struct vcpu_svm *svm) { - nsvm_printk("VMrun\n"); - if (nested_svm_check_permissions(svm)) return 1; @@ -1974,7 +1946,6 @@ static int clgi_interception(struct vcpu_svm *svm) static int invlpga_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; - nsvm_printk("INVLPGA\n"); trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX], vcpu->arch.regs[VCPU_REGS_RAX]); @@ -2389,10 +2360,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) svm->vmcb->control.exit_int_info, svm->vmcb->control.exit_int_info_err); - nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n", - exit_code, svm->vmcb->control.exit_info_1, - svm->vmcb->control.exit_info_2, svm->vmcb->save.rip); - vmexit = nested_svm_exit_special(svm); if (vmexit == NESTED_EXIT_CONTINUE) @@ -2539,7 +2506,6 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) static void enable_irq_window(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - nsvm_printk("Trying to open IRQ window\n"); nested_svm_intr(svm); -- cgit v1.2.2 From 565d0998ecac8373b9a9ecd5991abe74318cd235 Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Tue, 6 Oct 2009 14:25:02 -0500 Subject: KVM: SVM: Support Pause Filter in AMD processors New AMD processors (Family 0x10 models 8+) support the Pause Filter Feature. This feature creates a new field in the VMCB called Pause Filter Count. If Pause Filter Count is greater than 0 and intercepting PAUSEs is enabled, the processor will increment an internal counter when a PAUSE instruction occurs instead of intercepting. When the internal counter reaches the Pause Filter Count value, a PAUSE intercept will occur. This feature can be used to detect contended spinlocks, especially when the lock holding VCPU is not scheduled. Rescheduling another VCPU prevents the VCPU seeking the lock from wasting its quantum by spinning idly. Experimental results show that most spinlocks are held for less than 1000 PAUSE cycles or more than a few thousand. Default the Pause Filter Counter to 3000 to detect the contended spinlocks. Processor support for this feature is indicated by a CPUID bit. On a 24 core system running 4 guests each with 16 VCPUs, this patch improved overall performance of each guest's 32 job kernbench by approximately 3-5% when combined with a scheduler algorithm thati caused the VCPU to sleep for a brief period. Further performance improvement may be possible with a more sophisticated yield algorithm. Signed-off-by: Mark Langsdorf Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 69610c5d6dea..170b2d9c6909 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -46,6 +46,7 @@ MODULE_LICENSE("GPL"); #define SVM_FEATURE_NPT (1 << 0) #define SVM_FEATURE_LBRV (1 << 1) #define SVM_FEATURE_SVML (1 << 2) +#define SVM_FEATURE_PAUSE_FILTER (1 << 10) #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ @@ -654,6 +655,11 @@ static void init_vmcb(struct vcpu_svm *svm) svm->nested.vmcb = 0; svm->vcpu.arch.hflags = 0; + if (svm_has(SVM_FEATURE_PAUSE_FILTER)) { + control->pause_filter_count = 3000; + control->intercept |= (1ULL << INTERCEPT_PAUSE); + } + enable_gif(svm); } @@ -2281,6 +2287,12 @@ static int interrupt_window_interception(struct vcpu_svm *svm) return 1; } +static int pause_interception(struct vcpu_svm *svm) +{ + kvm_vcpu_on_spin(&(svm->vcpu)); + return 1; +} + static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR0] = emulate_on_interception, [SVM_EXIT_READ_CR3] = emulate_on_interception, @@ -2316,6 +2328,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_CPUID] = cpuid_interception, [SVM_EXIT_IRET] = iret_interception, [SVM_EXIT_INVD] = emulate_on_interception, + [SVM_EXIT_PAUSE] = pause_interception, [SVM_EXIT_HLT] = halt_interception, [SVM_EXIT_INVLPG] = invlpg_interception, [SVM_EXIT_INVLPGA] = invlpga_interception, -- cgit v1.2.2 From 6be7d3062b59af891be7e40c6802350de5f78cef Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 18 Oct 2009 13:24:54 +0200 Subject: KVM: SVM: Cleanup NMI singlestep Push the NMI-related singlestep variable into vcpu_svm. It's dealing with an AMD-specific deficit, nothing generic for x86. Acked-by: Gleb Natapov Signed-off-by: Jan Kiszka arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/svm.c | 12 +++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 170b2d9c6909..ffa6ad216b71 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -107,6 +107,8 @@ struct vcpu_svm { u32 *msrpm; struct nested_state nested; + + bool nmi_singlestep; }; /* enable NPT for AMD64 and X86 with PAE */ @@ -1050,7 +1052,7 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) svm->vmcb->control.intercept_exceptions &= ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); - if (vcpu->arch.singlestep) + if (svm->nmi_singlestep) svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { @@ -1195,13 +1197,13 @@ static int db_interception(struct vcpu_svm *svm) if (!(svm->vcpu.guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && - !svm->vcpu.arch.singlestep) { + !svm->nmi_singlestep) { kvm_queue_exception(&svm->vcpu, DB_VECTOR); return 1; } - if (svm->vcpu.arch.singlestep) { - svm->vcpu.arch.singlestep = false; + if (svm->nmi_singlestep) { + svm->nmi_singlestep = false; if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); @@ -2543,7 +2545,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) /* Something prevents NMI from been injected. Single step over possible problem (IRET or exception injection or interrupt shadow) */ - vcpu->arch.singlestep = true; + svm->nmi_singlestep = true; svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); update_db_intercept(vcpu); } -- cgit v1.2.2 From 18fa000ae453767b59ab97477925895a3f0c46ea Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Sat, 24 Oct 2009 02:49:59 -0200 Subject: KVM: SVM: Reset cr0 properly on vcpu reset svm_vcpu_reset() was not properly resetting the contents of the guest-visible cr0 register, causing the following issue: https://bugzilla.redhat.com/show_bug.cgi?id=525699 Without resetting cr0 properly, the vcpu was running the SIPI bootstrap routine with paging enabled, making the vcpu get a pagefault exception while trying to run it. Instead of setting vmcb->save.cr0 directly, the new code just resets kvm->arch.cr0 and calls kvm_set_cr0(). The bits that were set/cleared on vmcb->save.cr0 (PG, WP, !CD, !NW) will be set properly by svm_set_cr0(). kvm_set_cr0() is used instead of calling svm_set_cr0() directly to make sure kvm_mmu_reset_context() is called to reset the mmu to nonpaging mode. Signed-off-by: Eduardo Habkost Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ffa6ad216b71..c9ef6c0e1e98 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -628,11 +628,12 @@ static void init_vmcb(struct vcpu_svm *svm) save->rip = 0x0000fff0; svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; - /* - * cr0 val on cpu init should be 0x60000010, we enable cpu - * cache by default. the orderly way is to enable cache in bios. + /* This is the guest-visible cr0 value. + * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. */ - save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP; + svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; + kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0); + save->cr4 = X86_CR4_PAE; /* rdx = ?? */ -- cgit v1.2.2 From 3ce672d48400e0112fec7a3cb6bb2120493c6e11 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Sat, 24 Oct 2009 02:50:00 -0200 Subject: KVM: SVM: init_vmcb(): remove redundant save->cr0 initialization The svm_set_cr0() call will initialize save->cr0 properly even when npt is enabled, clearing the NW and CD bits as expected, so we don't need to initialize it manually for npt_enabled anymore. Signed-off-by: Eduardo Habkost Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c9ef6c0e1e98..34b700f9e498 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -648,8 +648,6 @@ static void init_vmcb(struct vcpu_svm *svm) control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK| INTERCEPT_CR3_MASK); save->g_pat = 0x0007040600070406ULL; - /* enable caching because the QEMU Bios doesn't enable it */ - save->cr0 = X86_CR0_ET; save->cr3 = 0; save->cr4 = 0; } -- cgit v1.2.2 From 3cfc3092f40bc37c57ba556cfd8de4218f2135ab Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 12 Nov 2009 01:04:25 +0100 Subject: KVM: x86: Add KVM_GET/SET_VCPU_EVENTS This new IOCTL exports all yet user-invisible states related to exceptions, interrupts, and NMIs. Together with appropriate user space changes, this fixes sporadic problems of vmsave/restore, live migration and system reset. [avi: future-proof abi by adding a flags field] Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 34b700f9e498..3de0b37ec038 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2499,6 +2499,26 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu) !(svm->vcpu.arch.hflags & HF_NMI_MASK); } +static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + return !!(svm->vcpu.arch.hflags & HF_NMI_MASK); +} + +static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + if (masked) { + svm->vcpu.arch.hflags |= HF_NMI_MASK; + svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); + } else { + svm->vcpu.arch.hflags &= ~HF_NMI_MASK; + svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); + } +} + static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -2946,6 +2966,8 @@ static struct kvm_x86_ops svm_x86_ops = { .queue_exception = svm_queue_exception, .interrupt_allowed = svm_interrupt_allowed, .nmi_allowed = svm_nmi_allowed, + .get_nmi_mask = svm_get_nmi_mask, + .set_nmi_mask = svm_set_nmi_mask, .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, -- cgit v1.2.2