From f461a1d80c865e5ec4d24107adbab8b010b60e32 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 10 Sep 2008 13:57:43 +0200 Subject: arch/x86/kernel/kdebugfs.c: introduce missing kfree Error handling code following a kmalloc should free the allocated data. Note that at the point of the change, node has not yet been stored in d, so it is not affected by the existing cleanup code. The semantic match that finds the problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r exists@ local idexpression x; statement S; expression E; identifier f,l; position p1,p2; expression *ptr != NULL; @@ ( if ((x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...)) == NULL) S | x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S ) <... when != x when != if (...) { <+...x...+> } x->f = E ...> ( return \(0\|<+...x...+>\|ptr\); | return@p2 ...; ) @script:python@ p1 << r.p1; p2 << r.p2; @@ print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line) // Signed-off-by: Julia Lawall Signed-off-by: Ingo Molnar --- arch/x86/kernel/kdebugfs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index f2d43bc75514..ff7d3b0124f1 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -139,6 +139,7 @@ static int __init create_setup_data_nodes(struct dentry *parent) if (PageHighMem(pg)) { data = ioremap_cache(pa_data, sizeof(*data)); if (!data) { + kfree(node); error = -ENXIO; goto err_dir; } -- cgit v1.2.2 From 44874f84918e37b64bec6df1587e5fe2fdf6ab62 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 27 Aug 2008 14:18:43 +0200 Subject: KVM: SVM: fix random segfaults with NPT enabled This patch introduces a guest TLB flush on every NPF exit in KVM. This fixes random segfaults and #UD exceptions in the guest seen under some workloads (e.g. long running compile workloads or tbench). A kernbench run with and without that fix showed that it has a slowdown lower than 0.5% Cc: stable@kernel.org Signed-off-by: Joerg Roedel Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e2ee264740c7..d1106cddab0d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -62,6 +62,7 @@ static int npt = 1; module_param(npt, int, S_IRUGO); static void kvm_reput_irq(struct vcpu_svm *svm); +static void svm_flush_tlb(struct kvm_vcpu *vcpu); static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) { @@ -1027,6 +1028,13 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code, (u32)fault_address, (u32)(fault_address >> 32), handler); + /* + * FIXME: Tis shouldn't be necessary here, but there is a flush + * missing in the MMU code. Until we find this bug, flush the + * complete TLB here on an NPF + */ + if (npt_enabled) + svm_flush_tlb(&svm->vcpu); if (event_injection) kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); -- cgit v1.2.2 From e5eab0cede4b1ffaca4ad857d840127622038e55 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 9 Sep 2008 19:11:51 +0200 Subject: KVM: SVM: fix guest global tlb flushes with NPT Accesses to CR4 are intercepted even with Nested Paging enabled. But the code does not check if the guest wants to do a global TLB flush. So this flush gets lost. This patch adds the check and the flush to svm_set_cr4. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d1106cddab0d..8233b86c778c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -879,6 +879,10 @@ set: static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE; + unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; + + if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) + force_new_asid(vcpu); vcpu->arch.cr4 = cr4; if (!npt_enabled) -- cgit v1.2.2 From 534e38b447df47f129a6d3ec3af6705c1e3f651e Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Mon, 8 Sep 2008 15:12:30 +0800 Subject: KVM: VMX: Always return old for clear_flush_young() when using EPT As well as discard fake accessed bit and dirty bit of EPT. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 4 ++++ arch/x86/kvm/vmx.c | 3 +-- arch/x86/kvm/vmx.h | 2 -- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 0bfe2bd305eb..3da2508eb22a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -711,6 +711,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) u64 *spte; int young = 0; + /* always return old for EPT */ + if (!shadow_accessed_mask) + return 0; + spte = rmap_next(kvm, rmapp, NULL); while (spte) { int _young; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2a69773e3b26..7041cc52b562 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3301,8 +3301,7 @@ static int __init vmx_init(void) kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | VMX_EPT_WRITABLE_MASK | VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); - kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, - VMX_EPT_FAKE_DIRTY_MASK, 0ull, + kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, VMX_EPT_EXECUTABLE_MASK); kvm_enable_tdp(); } else diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 425a13436b3f..23e8373507ad 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -370,8 +370,6 @@ enum vmcs_field { #define VMX_EPT_READABLE_MASK 0x1ull #define VMX_EPT_WRITABLE_MASK 0x2ull #define VMX_EPT_EXECUTABLE_MASK 0x4ull -#define VMX_EPT_FAKE_ACCESSED_MASK (1ull << 62) -#define VMX_EPT_FAKE_DIRTY_MASK (1ull << 63) #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul -- cgit v1.2.2 From 0ad5bce7409d681a5655c66e64bb0eb740b89c1f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 11 Sep 2008 16:42:00 -0700 Subject: x86: fix possible x86_64 and EFI regression Russ Anderson reported a boot crash with EFI and latest mainline: BIOS-e820: 00000000fffa0000 - 00000000fffac000 (reserved) Pid: 0, comm: swapper Not tainted 2.6.27-rc5-00100-gec0c15a-dirty #5 Call Trace: [] early_idt_handler+0x55/0x69 [] __memcpy+0x12/0xa4 [] efi_init+0xce/0x932 [] setup_early_serial8250_console+0x2d/0x36a [] __insert_resource+0x18/0xc8 [] setup_arch+0x3a7/0x632 [] start_kernel+0x91/0x367 [] x86_64_start_kernel+0xe3/0xe7 [] x86_64_start_kernel+0x0/0xe7 RIP 0x10 Such a crash is possible if the CPU in this system is a 64-bit processor which doesn't support NX (ie, old Intel P4 -based64-bit processors). Certainly, if we support such processors, then we should start with _PAGE_NX initially clear in __supported_pte_flags, and then set it once we've established that the processor does indeed support NX. That will prevent early_ioremap - or anything else - from trying to set it. The simple fix is to simply call check_efer() earlier. Reported-by: Russ Anderson Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 362d4e7f2d38..9838f2539dfc 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -670,6 +670,10 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); +#ifdef CONFIG_X86_64 + check_efer(); +#endif + #if defined(CONFIG_VMI) && defined(CONFIG_X86_32) /* * Must be before kernel pagetables are setup @@ -738,7 +742,6 @@ void __init setup_arch(char **cmdline_p) #else num_physpages = max_pfn; - check_efer(); /* How many end-of-memory variables you have, grandma! */ /* need this before calling reserve_initrd */ -- cgit v1.2.2 From 5670a43d710a12fcbbfaefd3991002768b488d82 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 14 Sep 2008 07:42:23 -0700 Subject: xen: fix for xen guest with mem > 3.7G PFN_PHYS() can truncate large addresses unless its passed a suitable large type. This is fixed more generally in the patch series introducing phys_addr_t, but we need a short-term fix to solve a Xen regression reported by Roberto De Ioris. Reported-by: Roberto De Ioris Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/xen/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index b6acc3a0af46..d67901083888 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -42,7 +42,7 @@ char * __init xen_memory_setup(void) e820.nr_map = 0; - e820_add_region(0, PFN_PHYS(max_pfn), E820_RAM); + e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM); /* * Even though this is normal, usable memory under Xen, reserve -- cgit v1.2.2 From 5132895f14a57607152f7865dc862fb076ce2585 Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Wed, 3 Sep 2008 14:36:40 +0100 Subject: x86/paravirt: Remove duplicate paravirt_pagetable_setup_{start, done}() They were already called once in arch/x86/kernel/setup.c - we don't need to call them again. fixes: http://bugzilla.kernel.org/show_bug.cgi?id=11485 Signed-off-by: Alex Nixon Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index d37f29376b0c..60ec1d08ff24 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -458,11 +458,7 @@ static void __init pagetable_init(void) { pgd_t *pgd_base = swapper_pg_dir; - paravirt_pagetable_setup_start(pgd_base); - permanent_kmaps_init(pgd_base); - - paravirt_pagetable_setup_done(pgd_base); } #ifdef CONFIG_ACPI_SLEEP -- cgit v1.2.2 From ba0593bf553c450a03dbc5f8c1f0ff58b778a0c8 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 16 Sep 2008 09:29:40 -0700 Subject: x86: completely disable NOPL on 32 bits Completely disable NOPL on 32 bits. It turns out that Microsoft Virtual PC is so broken it can't even reliably *fail* in the presence of NOPL. This leaves the infrastructure in place but disables it unconditionally. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8aab8517642e..4e456bd955bb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -344,31 +344,15 @@ static void __init early_cpu_detect(void) /* * The NOPL instruction is supposed to exist on all CPUs with - * family >= 6, unfortunately, that's not true in practice because + * family >= 6; unfortunately, that's not true in practice because * of early VIA chips and (more importantly) broken virtualizers that - * are not easy to detect. Hence, probe for it based on first - * principles. + * are not easy to detect. In the latter case it doesn't even *fail* + * reliably, so probing for it doesn't even work. Disable it completely + * unless we can find a reliable way to detect all the broken cases. */ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) { - const u32 nopl_signature = 0x888c53b1; /* Random number */ - u32 has_nopl = nopl_signature; - clear_cpu_cap(c, X86_FEATURE_NOPL); - if (c->x86 >= 6) { - asm volatile("\n" - "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ - "2:\n" - " .section .fixup,\"ax\"\n" - "3: xor %0,%0\n" - " jmp 2b\n" - " .previous\n" - _ASM_EXTABLE(1b,3b) - : "+a" (has_nopl)); - - if (has_nopl == nopl_signature) - set_cpu_cap(c, X86_FEATURE_NOPL); - } } static void __cpuinit generic_identify(struct cpuinfo_x86 *c) -- cgit v1.2.2