From 982d007a6eec4a0abb404d2355eeec2c041c61ea Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 30 Sep 2009 17:57:27 -0700 Subject: x86: Optimize cmpxchg64() at build-time some more Try to avoid the 'alternates()' code when we can statically determine that cmpxchg8b is fine. We already have that CONFIG_x86_CMPXCHG64 (enabled by PAE support), and we could easily also enable it for some of the CPU cases. Note, this patch only adds CMPXCHG8B for the obvious Intel CPU's, not for others. (There was something really messy about cmpxchg8b and clone CPU's, so if you enable it on other CPUs later, do it carefully.) If we avoid that asm-alternative thing when we can assume the instruction exists, we'll generate less support crud, and we'll avoid the whole issue with that extra 'nop' for padding instruction sizes etc. LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.cpu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 527519b8a9f9..f2824fb8c79c 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -400,7 +400,7 @@ config X86_TSC config X86_CMPXCHG64 def_bool y - depends on X86_PAE || X86_64 + depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM # this should be set for all -march=.. options where the compiler # generates cmov. @@ -412,6 +412,7 @@ config X86_MINIMUM_CPU_FAMILY int default "64" if X86_64 default "6" if X86_32 && X86_P6_NOP + default "5" if X86_32 && X86_CMPXCHG64 default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) default "3" -- cgit v1.2.2 From d1716a60a8ea90788d24aa22d7eec83fbdd2d88a Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Wed, 30 Sep 2009 16:19:39 -0700 Subject: x86: Fix csum_ipv6_magic asm memory clobber Just like ip_fast_csum, the assembly snippet in csum_ipv6_magic needs a memory clobber, as it is only passed the address of the buffer, not a memory reference to the buffer itself. This caused failures in Hurd's pfinetv4 when we tried to compile it with gcc-4.3 (bogus checksums). Signed-off-by: Samuel Thibault Acked-by: David S. Miller Cc: Andi Kleen Cc: Signed-off-by: Andrew Morton Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/include/asm/checksum_32.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h index 7c5ef8b14d92..46fc474fd819 100644 --- a/arch/x86/include/asm/checksum_32.h +++ b/arch/x86/include/asm/checksum_32.h @@ -161,7 +161,8 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, "adcl $0, %0 ;\n" : "=&r" (sum) : "r" (saddr), "r" (daddr), - "r" (htonl(len)), "r" (htonl(proto)), "0" (sum)); + "r" (htonl(len)), "r" (htonl(proto)), "0" (sum) + : "memory"); return csum_fold(sum); } -- cgit v1.2.2 From 04edbdef02ec4386a2ae38cab7ae7d166e17a756 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 1 Oct 2009 07:30:38 +0200 Subject: x86: Don't generate cmpxchg8b_emu if CONFIG_X86_CMPXCHG64=y Conditionaly compile cmpxchg8b_emu.o and EXPORT_SYMBOL(cmpxchg8b_emu). This reduces the kernel size a bit. Signed-off-by: Eric Dumazet Cc: Arjan van de Ven Cc: Martin Schwidefsky Cc: John Stultz Cc: Peter Zijlstra Cc: Linus Torvalds LKML-Reference: <4AC43E7E.1000600@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/i386_ksyms_32.c | 2 ++ arch/x86/lib/Makefile | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 1736c5a725aa..9c3bd4a2050e 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -15,8 +15,10 @@ EXPORT_SYMBOL(mcount); * the export, but dont use it from C code, it is used * by assembly code and is not using C calling convention! */ +#ifndef CONFIG_X86_CMPXCHG64 extern void cmpxchg8b_emu(void); EXPORT_SYMBOL(cmpxchg8b_emu); +#endif /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 3e549b8ec8c9..85f5db95c60f 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -15,8 +15,10 @@ ifeq ($(CONFIG_X86_32),y) obj-y += atomic64_32.o lib-y += checksum_32.o lib-y += strstr_32.o - lib-y += semaphore_32.o string_32.o cmpxchg8b_emu.o - + lib-y += semaphore_32.o string_32.o +ifneq ($(CONFIG_X86_CMPXCHG64),y) + lib-y += cmpxchg8b_emu.o +endif lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o else obj-y += io_64.o iomap_copy_64.o -- cgit v1.2.2 From ea3acb199a5d7e4da1de0a4288eba993b29f33b9 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 24 Sep 2009 09:08:30 -0500 Subject: x86: earlyprintk: Fix regression to handle serial,ttySn as 1 arg Commit c953094 ("early_printk: Allow more than one early console") introduced a regression in the parsing of the earlyprintk= kernel arguments. If you specify "earlyprintk=serial,ttyS0,115200" as a kernel argument, the "serial,ttyS" should be parsed as a single argument and not as "serial" and then "ttyS". Also update the documentation to reflect you can specify the ttyS directly without the "serial" argument. Signed-off-by: Jason Wessel Cc: Len Brown Cc: Greg KH Cc: Linus Torvalds Cc: Andrew Morton Cc: Johannes Weiner LKML-Reference: <4ABB7D5E.6000301@windriver.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 41fd965c80c6..b9c830c12b4a 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -206,8 +206,11 @@ static int __init setup_early_printk(char *buf) while (*buf != '\0') { if (!strncmp(buf, "serial", 6)) { - early_serial_init(buf + 6); + buf += 6; + early_serial_init(buf); early_console_register(&early_serial_console, keep); + if (!strncmp(buf, ",ttyS", 5)) + buf += 5; } if (!strncmp(buf, "ttyS", 4)) { early_serial_init(buf + 4); -- cgit v1.2.2 From 4701472e441e41be2549a25228f703bc9cd13b5b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 26 Sep 2009 21:41:41 +0530 Subject: x86, SLUB: Remove unused CONFIG FAST_CMPXCHG_LOCAL Remove unused CONFIG FAST_CMPXCHG_LOCAL from Kconfig. Reported-by: Robert P. J. Day Signed-off-by: Jaswinder Singh Rajput Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: Matt Mackall Cc: Andrew Morton Cc: "Robert P. J. Day" Cc: linux-mm@kvack.org LKML-Reference: <1253981501.4568.61.camel@ht.satnam> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8da93745c087..c876bace8fdc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -86,10 +86,6 @@ config STACKTRACE_SUPPORT config HAVE_LATENCYTOP_SUPPORT def_bool y -config FAST_CMPXCHG_LOCAL - bool - default y - config MMU def_bool y -- cgit v1.2.2 From 24e35800cdc4350fc34e2bed37b608a9e13ab3b6 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 30 Sep 2009 11:22:11 +0100 Subject: x86: Don't leak 64-bit kernel register values to 32-bit processes While 32-bit processes can't directly access R8...R15, they can gain access to these registers by temporarily switching themselves into 64-bit mode. Therefore, registers not preserved anyway by called C functions (i.e. R8...R11) must be cleared prior to returning to user mode. Signed-off-by: Jan Beulich Cc: LKML-Reference: <4AC34D73020000780001744A@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32entry.S | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 74619c4f9fda..1733f9f65e82 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -21,8 +21,8 @@ #define __AUDIT_ARCH_LE 0x40000000 #ifndef CONFIG_AUDITSYSCALL -#define sysexit_audit int_ret_from_sys_call -#define sysretl_audit int_ret_from_sys_call +#define sysexit_audit ia32_ret_from_sys_call +#define sysretl_audit ia32_ret_from_sys_call #endif #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) @@ -39,12 +39,12 @@ .endm /* clobbers %eax */ - .macro CLEAR_RREGS _r9=rax + .macro CLEAR_RREGS offset=0, _r9=rax xorl %eax,%eax - movq %rax,R11(%rsp) - movq %rax,R10(%rsp) - movq %\_r9,R9(%rsp) - movq %rax,R8(%rsp) + movq %rax,\offset+R11(%rsp) + movq %rax,\offset+R10(%rsp) + movq %\_r9,\offset+R9(%rsp) + movq %rax,\offset+R8(%rsp) .endm /* @@ -172,6 +172,10 @@ sysexit_from_sys_call: movl RIP-R11(%rsp),%edx /* User %eip */ CFI_REGISTER rip,rdx RESTORE_ARGS 1,24,1,1,1,1 + xorq %r8,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 popfq CFI_ADJUST_CFA_OFFSET -8 /*CFI_RESTORE rflags*/ @@ -202,7 +206,7 @@ sysexit_from_sys_call: .macro auditsys_exit exit,ebpsave=RBP testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) - jnz int_ret_from_sys_call + jnz ia32_ret_from_sys_call TRACE_IRQS_ON sti movl %eax,%esi /* second arg, syscall return value */ @@ -218,8 +222,9 @@ sysexit_from_sys_call: cli TRACE_IRQS_OFF testl %edi,TI_flags(%r10) - jnz int_with_check - jmp \exit + jz \exit + CLEAR_RREGS -ARGOFFSET + jmp int_with_check .endm sysenter_auditsys: @@ -329,6 +334,9 @@ sysretl_from_sys_call: CFI_REGISTER rip,rcx movl EFLAGS-ARGOFFSET(%rsp),%r11d /*CFI_REGISTER rflags,r11*/ + xorq %r10,%r10 + xorq %r9,%r9 + xorq %r8,%r8 TRACE_IRQS_ON movl RSP-ARGOFFSET(%rsp),%esp CFI_RESTORE rsp @@ -353,7 +361,7 @@ cstar_tracesys: #endif xchgl %r9d,%ebp SAVE_REST - CLEAR_RREGS r9 + CLEAR_RREGS 0, r9 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter @@ -425,6 +433,8 @@ ia32_do_call: call *ia32_sys_call_table(,%rax,8) # xxx: rip relative ia32_sysret: movq %rax,RAX-ARGOFFSET(%rsp) +ia32_ret_from_sys_call: + CLEAR_RREGS -ARGOFFSET jmp int_ret_from_sys_call ia32_tracesys: @@ -442,8 +452,8 @@ END(ia32_syscall) ia32_badsys: movq $0,ORIG_RAX-ARGOFFSET(%rsp) - movq $-ENOSYS,RAX-ARGOFFSET(%rsp) - jmp int_ret_from_sys_call + movq $-ENOSYS,%rax + jmp ia32_sysret quiet_ni_syscall: movq $-ENOSYS,%rax -- cgit v1.2.2 From f436f8bb73138bc74eb1c6527723e00988ad8a8a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 1 Oct 2009 16:14:32 +0200 Subject: x86: EDAC: MCE: Fix MCE decoding callback logic Make decoding of MCEs happen only on AMD hardware by registering a non-default callback only on CPU families which support it. While looking at the interaction of decode_mce() with the other MCE code i also noticed a few other things and made the following cleanups/fixes: - Fixed the mce_decode() weak alias - a weak alias is really not good here, it should be a proper callback. A weak alias will be overriden if a piece of code is built into the kernel - not good, obviously. - The patch initializes the callback on AMD family 10h and 11h. - Added the more correct fallback printk of: No support for human readable MCE decoding on this CPU type. Transcribe the message and run it through 'mcelog --ascii' to decode. On CPUs that dont have a decoder. - Made the surrounding code more readable. Note that the callback allows us to have a default fallback - without having to check the CPU versions during the printout itself. When an EDAC module registers itself, it can install the decode-print function. (there's no unregister needed as this is core code.) version -v2 by Borislav Petkov: - add K8 to the set of supported CPUs - always build in edac_mce_amd since we use an early_initcall now - fix checkpatch warnings Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Andi Kleen LKML-Reference: <20091001141432.GA11410@aftab> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 2 ++ arch/x86/kernel/cpu/mcheck/mce.c | 58 +++++++++++++++++++++++++--------------- 2 files changed, 38 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index b608a64c5814..f1363b72364f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -133,6 +133,8 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} static inline void enable_p5_mce(void) {} #endif +extern void (*x86_mce_decode_callback)(struct mce *m); + void mce_setup(struct mce *m); void mce_log(struct mce *m); DECLARE_PER_CPU(struct sys_device, mce_dev); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 183c3457d2f4..b1598a9436d0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -85,6 +85,18 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); static DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing; +static void default_decode_mce(struct mce *m) +{ + pr_emerg("No human readable MCE decoding support on this CPU type.\n"); + pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); +} + +/* + * CPU/chipset specific EDAC code can register a callback here to print + * MCE errors in a human-readable form: + */ +void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce; +EXPORT_SYMBOL(x86_mce_decode_callback); /* MCA banks polled by the period polling timer for corrected events */ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { @@ -165,46 +177,46 @@ void mce_log(struct mce *mce) set_bit(0, &mce_need_notify); } -void __weak decode_mce(struct mce *m) -{ - return; -} - static void print_mce(struct mce *m) { - printk(KERN_EMERG - "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", + pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", m->extcpu, m->mcgstatus, m->bank, m->status); + if (m->ip) { - printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", - !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", - m->cs, m->ip); + pr_emerg("RIP%s %02x:<%016Lx> ", + !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", + m->cs, m->ip); + if (m->cs == __KERNEL_CS) print_symbol("{%s}", m->ip); - printk(KERN_CONT "\n"); + pr_cont("\n"); } - printk(KERN_EMERG "TSC %llx ", m->tsc); + + pr_emerg("TSC %llx ", m->tsc); if (m->addr) - printk(KERN_CONT "ADDR %llx ", m->addr); + pr_cont("ADDR %llx ", m->addr); if (m->misc) - printk(KERN_CONT "MISC %llx ", m->misc); - printk(KERN_CONT "\n"); - printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", - m->cpuvendor, m->cpuid, m->time, m->socketid, - m->apicid); + pr_cont("MISC %llx ", m->misc); + + pr_cont("\n"); + pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", + m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); - decode_mce(m); + /* + * Print out human-readable details about the MCE error, + * (if the CPU has an implementation for that): + */ + x86_mce_decode_callback(m); } static void print_mce_head(void) { - printk(KERN_EMERG "\nHARDWARE ERROR\n"); + pr_emerg("\nHARDWARE ERROR\n"); } static void print_mce_tail(void) { - printk(KERN_EMERG "This is not a software problem!\n" - "Run through mcelog --ascii to decode and contact your hardware vendor\n"); + pr_emerg("This is not a software problem!\n"); } #define PANIC_TIMEOUT 5 /* 5 seconds */ @@ -218,6 +230,7 @@ static atomic_t mce_fake_paniced; static void wait_for_panic(void) { long timeout = PANIC_TIMEOUT*USEC_PER_SEC; + preempt_disable(); local_irq_enable(); while (timeout-- > 0) @@ -285,6 +298,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) static int msr_to_offset(u32 msr) { unsigned bank = __get_cpu_var(injectm.bank); + if (msr == rip_msr) return offsetof(struct mce, ip); if (msr == MSR_IA32_MCx_STATUS(bank)) -- cgit v1.2.2 From 11879ba5d9ab8174af9b9cefbb2396a54dfbf8c1 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 26 Sep 2009 20:51:50 +0200 Subject: x86: Simplify bound checks in the MTRR code The current bound checks for copy_from_user in the MTRR driver are not as obvious as they could be, and gcc agrees with that. This patch simplifies the boundary checks to the point that gcc can now prove to itself that the copy_from_user() is never going past its bounds. Signed-off-by: Arjan van de Ven Cc: Yinghai Lu Cc: Linus Torvalds LKML-Reference: <20090926205150.30797709@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/if.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index f04e72527604..3c1b12d461d1 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -96,17 +96,24 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) unsigned long long base, size; char *ptr; char line[LINE_SIZE]; + int length; size_t linelen; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!len) - return -EINVAL; memset(line, 0, LINE_SIZE); - if (len > LINE_SIZE) - len = LINE_SIZE; - if (copy_from_user(line, buf, len - 1)) + + length = len; + length--; + + if (length > LINE_SIZE - 1) + length = LINE_SIZE - 1; + + if (length < 0) + return -EINVAL; + + if (copy_from_user(line, buf, length)) return -EFAULT; linelen = strlen(line); -- cgit v1.2.2 From e3be785fb59f92c0df685037062d041619653b7a Mon Sep 17 00:00:00 2001 From: Marin Mitov Date: Sat, 3 Oct 2009 20:45:02 +0300 Subject: x86, pci: Correct spelling in a comment Signed-off-by: Marin Mitov Cc: Joerg Roedel Cc: Jesse Brandeburg LKML-Reference: <200910032045.02523.mitov@issp.bas.bg> Signed-off-by: Ingo Molnar ====================================================== --- arch/x86/kernel/pci-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 64b838eac18c..d20009b4e6ef 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -35,7 +35,7 @@ int iommu_detected __read_mostly = 0; /* * This variable becomes 1 if iommu=pt is passed on the kernel command line. - * If this variable is 1, IOMMU implementations do no DMA ranslation for + * If this variable is 1, IOMMU implementations do no DMA translation for * devices and allow every device to access to whole physical memory. This is * useful if a user want to use an IOMMU only for KVM device assignment to * guests and not for driver dma translation. -- cgit v1.2.2 From 77b1ab1732feb5e3dcbaf31d2f7547c5229f5f3a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 16 Sep 2009 15:24:17 +0200 Subject: KVM: SVM: Fix tsc offset adjustment when running nested When svm_vcpu_load is called while the vcpu is running in guest mode the tsc adjustment made there is lost on the next emulated #vmexit. This causes the tsc running backwards in the guest. This patch fixes the issue by also adjusting the tsc_offset in the emulated hsave area so that it will not get lost. Cc: stable@kernel.org Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 944cc9c04b3c..bf5799dc4f9f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -767,6 +767,8 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) rdtscll(tsc_this); delta = vcpu->arch.host_tsc - tsc_this; svm->vmcb->control.tsc_offset += delta; + if (is_nested(svm)) + svm->nested.hsave->control.tsc_offset += delta; vcpu->cpu = cpu; kvm_migrate_timers(vcpu); svm->asid_generation = 0; -- cgit v1.2.2 From 20824f30bb0b8ae0a4099895fd4509f54cf2e1e2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 16 Sep 2009 15:24:18 +0200 Subject: KVM: SVM: Handle tsc in svm_get_msr/svm_set_msr correctly When running nested we need to touch the l1 guests tsc_offset. Otherwise changes will be lost or a wrong value be read. Cc: stable@kernel.org Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index bf5799dc4f9f..c17404add91f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2059,10 +2059,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) switch (ecx) { case MSR_IA32_TSC: { - u64 tsc; + u64 tsc_offset; - rdtscll(tsc); - *data = svm->vmcb->control.tsc_offset + tsc; + if (is_nested(svm)) + tsc_offset = svm->nested.hsave->control.tsc_offset; + else + tsc_offset = svm->vmcb->control.tsc_offset; + + *data = tsc_offset + native_read_tsc(); break; } case MSR_K6_STAR: @@ -2148,10 +2152,17 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) switch (ecx) { case MSR_IA32_TSC: { - u64 tsc; + u64 tsc_offset = data - native_read_tsc(); + u64 g_tsc_offset = 0; + + if (is_nested(svm)) { + g_tsc_offset = svm->vmcb->control.tsc_offset - + svm->nested.hsave->control.tsc_offset; + svm->nested.hsave->control.tsc_offset = tsc_offset; + } + + svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset; - rdtscll(tsc); - svm->vmcb->control.tsc_offset = data - tsc; break; } case MSR_K6_STAR: -- cgit v1.2.2 From b2d83cfa3fdefe5c6573d443d099a18dc3a93c5f Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Fri, 25 Sep 2009 11:09:37 +0200 Subject: KVM: fix LAPIC timer period overflow Don't overflow when computing the 64-bit period from 32-bit registers. Fixes sourceforge bug #2826486. Signed-off-by: Aurelien Jarno Cc: stable@kernel.org Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/lapic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 1ae5ceba7eb2..7024224f0fc8 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -664,7 +664,7 @@ static void start_apic_timer(struct kvm_lapic *apic) { ktime_t now = apic->lapic_timer.timer.base->get_time(); - apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) * + apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) * APIC_BUS_CYCLE_NS * apic->divide_count; atomic_set(&apic->lapic_timer.pending, 0); -- cgit v1.2.2 From eb5109e311b5152c0614a28d7d615d087f268f19 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 1 Oct 2009 19:16:58 -0300 Subject: KVM: VMX: flush TLB with INVEPT on cpu migration It is possible that stale EPTP-tagged mappings are used, if a vcpu migrates to a different pcpu. Set KVM_REQ_TLB_FLUSH in vmx_vcpu_load, when switching pcpus, which will invalidate both VPID and EPT mappings on the next vm-entry. Cc: stable@kernel.org Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f3812014bd0b..ed53b42caba1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -709,7 +709,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (vcpu->cpu != cpu) { vcpu_clear(vmx); kvm_migrate_timers(vcpu); - vpid_sync_vcpu_all(vmx); + set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); local_irq_disable(); list_add(&vmx->local_vcpus_link, &per_cpu(vcpus_on_cpu, cpu)); -- cgit v1.2.2 From 6a54435560efdab1a08f429a954df4d6c740bddf Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 4 Oct 2009 16:45:13 +0200 Subject: KVM: Prevent overflow in KVM_GET_SUPPORTED_CPUID The number of entries is multiplied by the entry size, which can overflow on 32-bit hosts. Bound the entry count instead. Reported-by: David Wagner Cc: stable@kernel.org Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index be451ee44249..9b9695322f56 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1591,6 +1591,8 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, if (cpuid->nent < 1) goto out; + if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) + cpuid->nent = KVM_MAX_CPUID_ENTRIES; r = -ENOMEM; cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); if (!cpuid_entries) -- cgit v1.2.2 From acb66dd051d0834c8b36d147ff83a8d39da0fe0b Mon Sep 17 00:00:00 2001 From: Izik Eidus Date: Wed, 23 Sep 2009 21:47:16 +0300 Subject: KVM: MMU: dont hold pagecount reference for mapped sptes pages When using mmu notifiers, we are allowed to remove the page count reference tooken by get_user_pages to a specific page that is mapped inside the shadow page tables. This is needed so we can balance the pagecount against mapcount checking. (Right now kvm increase the pagecount and does not increase the mapcount when mapping page into shadow page table entry, so when comparing pagecount against mapcount, you have no reliable result.) Signed-off-by: Izik Eidus Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index eca41ae9f453..6c67b230e958 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -634,9 +634,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) if (*spte & shadow_accessed_mask) kvm_set_pfn_accessed(pfn); if (is_writeble_pte(*spte)) - kvm_release_pfn_dirty(pfn); - else - kvm_release_pfn_clean(pfn); + kvm_set_pfn_dirty(pfn); rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); if (!*rmapp) { printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); @@ -1877,8 +1875,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, page_header_update_slot(vcpu->kvm, sptep, gfn); if (!was_rmapped) { rmap_count = rmap_add(vcpu, sptep, gfn); - if (!is_rmap_spte(*sptep)) - kvm_release_pfn_clean(pfn); + kvm_release_pfn_clean(pfn); if (rmap_count > RMAP_RECYCLE_THRESHOLD) rmap_recycle(vcpu, sptep, gfn); } else { -- cgit v1.2.2 From 1403283acca398e244ece35741ad251c1feb5972 Mon Sep 17 00:00:00 2001 From: Izik Eidus Date: Wed, 23 Sep 2009 21:47:17 +0300 Subject: KVM: MMU: add SPTE_HOST_WRITEABLE flag to the shadow ptes this flag notify that the host physical page we are pointing to from the spte is write protected, and therefore we cant change its access to be write unless we run get_user_pages(write = 1). (this is needed for change_pte support in kvm) Signed-off-by: Izik Eidus Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 15 +++++++++++---- arch/x86/kvm/paging_tmpl.h | 18 +++++++++++++++--- 2 files changed, 26 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6c67b230e958..5cd8b4ec3a01 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -156,6 +156,8 @@ module_param(oos_shadow, bool, 0644); #define CREATE_TRACE_POINTS #include "mmutrace.h" +#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) + #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) struct kvm_rmap_desc { @@ -1754,7 +1756,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, int user_fault, int write_fault, int dirty, int level, gfn_t gfn, pfn_t pfn, bool speculative, - bool can_unsync) + bool can_unsync, bool reset_host_protection) { u64 spte; int ret = 0; @@ -1781,6 +1783,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, kvm_is_mmio_pfn(pfn)); + if (reset_host_protection) + spte |= SPTE_HOST_WRITEABLE; + spte |= (u64)pfn << PAGE_SHIFT; if ((pte_access & ACC_WRITE_MASK) @@ -1826,7 +1831,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pt_access, unsigned pte_access, int user_fault, int write_fault, int dirty, int *ptwrite, int level, gfn_t gfn, - pfn_t pfn, bool speculative) + pfn_t pfn, bool speculative, + bool reset_host_protection) { int was_rmapped = 0; int was_writeble = is_writeble_pte(*sptep); @@ -1858,7 +1864,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, } if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, - dirty, level, gfn, pfn, speculative, true)) { + dirty, level, gfn, pfn, speculative, true, + reset_host_protection)) { if (write_fault) *ptwrite = 1; kvm_x86_ops->tlb_flush(vcpu); @@ -1906,7 +1913,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, if (iterator.level == level) { mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, 0, write, 1, &pt_write, - level, gfn, pfn, false); + level, gfn, pfn, false, true); ++vcpu->stat.pf_fixed; break; } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index d2fec9c12d22..72558f8ff3f5 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -273,9 +273,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) return; kvm_get_pfn(pfn); + /* + * we call mmu_set_spte() with reset_host_protection = true beacuse that + * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). + */ mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, - gpte_to_gfn(gpte), pfn, true); + gpte_to_gfn(gpte), pfn, true, true); } /* @@ -308,7 +312,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, user_fault, write_fault, gw->ptes[gw->level-1] & PT_DIRTY_MASK, ptwrite, level, - gw->gfn, pfn, false); + gw->gfn, pfn, false, true); break; } @@ -558,6 +562,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { int i, offset, nr_present; + bool reset_host_protection; offset = nr_present = 0; @@ -595,9 +600,16 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) nr_present++; pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); + if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) { + pte_access &= ~ACC_WRITE_MASK; + reset_host_protection = 0; + } else { + reset_host_protection = 1; + } set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, - spte_to_pfn(sp->spt[i]), true, false); + spte_to_pfn(sp->spt[i]), true, false, + reset_host_protection); } return !nr_present; -- cgit v1.2.2 From 3da0dd433dc399a8c0124d0614d82a09b6a49bce Mon Sep 17 00:00:00 2001 From: Izik Eidus Date: Wed, 23 Sep 2009 21:47:18 +0300 Subject: KVM: add support for change_pte mmu notifiers this is needed for kvm if it want ksm to directly map pages into its shadow page tables. [marcelo: cast pfn assignment to u64] Signed-off-by: Izik Eidus Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu.c | 62 +++++++++++++++++++++++++++++++++++------ 2 files changed, 54 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3be000435fad..d83892226f73 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -796,6 +796,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void); #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_age_hva(struct kvm *kvm, unsigned long hva); +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5cd8b4ec3a01..685a4ffac8e6 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -748,7 +748,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) return write_protected; } -static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) +static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) { u64 *spte; int need_tlb_flush = 0; @@ -763,8 +763,45 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) return need_tlb_flush; } -static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, - int (*handler)(struct kvm *kvm, unsigned long *rmapp)) +static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) +{ + int need_flush = 0; + u64 *spte, new_spte; + pte_t *ptep = (pte_t *)data; + pfn_t new_pfn; + + WARN_ON(pte_huge(*ptep)); + new_pfn = pte_pfn(*ptep); + spte = rmap_next(kvm, rmapp, NULL); + while (spte) { + BUG_ON(!is_shadow_present_pte(*spte)); + rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); + need_flush = 1; + if (pte_write(*ptep)) { + rmap_remove(kvm, spte); + __set_spte(spte, shadow_trap_nonpresent_pte); + spte = rmap_next(kvm, rmapp, NULL); + } else { + new_spte = *spte &~ (PT64_BASE_ADDR_MASK); + new_spte |= (u64)new_pfn << PAGE_SHIFT; + + new_spte &= ~PT_WRITABLE_MASK; + new_spte &= ~SPTE_HOST_WRITEABLE; + if (is_writeble_pte(*spte)) + kvm_set_pfn_dirty(spte_to_pfn(*spte)); + __set_spte(spte, new_spte); + spte = rmap_next(kvm, rmapp, spte); + } + } + if (need_flush) + kvm_flush_remote_tlbs(kvm); + + return 0; +} + +static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, u64 data, + int (*handler)(struct kvm *kvm, unsigned long *rmapp, + u64 data)) { int i, j; int retval = 0; @@ -786,13 +823,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, if (hva >= start && hva < end) { gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; - retval |= handler(kvm, &memslot->rmap[gfn_offset]); + retval |= handler(kvm, &memslot->rmap[gfn_offset], + data); for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { int idx = gfn_offset; idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); retval |= handler(kvm, - &memslot->lpage_info[j][idx].rmap_pde); + &memslot->lpage_info[j][idx].rmap_pde, + data); } } } @@ -802,10 +841,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) { - return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); + return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); +} + +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + kvm_handle_hva(kvm, hva, (u64)&pte, kvm_set_pte_rmapp); } -static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) +static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) { u64 *spte; int young = 0; @@ -841,13 +885,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) gfn = unalias_gfn(vcpu->kvm, gfn); rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); - kvm_unmap_rmapp(vcpu->kvm, rmapp); + kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); kvm_flush_remote_tlbs(vcpu->kvm); } int kvm_age_hva(struct kvm *kvm, unsigned long hva) { - return kvm_handle_hva(kvm, hva, kvm_age_rmapp); + return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); } #ifdef MMU_DEBUG -- cgit v1.2.2 From 9bcbdd9c58617f1301dd4f17c738bb9bc73aca70 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 8 Oct 2009 06:40:41 -0700 Subject: x86, timers: Check for pending timers after (device) interrupts Now that range timers and deferred timers are common, I found a problem with these using the "perf timechart" tool. Frans Pop also reported high scheduler latencies via LatencyTop, when using iwlagn. It turns out that on x86, these two 'opportunistic' timers only get checked when another "real" timer happens. These opportunistic timers have the objective to save power by hitchhiking on other wakeups, as to avoid CPU wakeups by themselves as much as possible. The change in this patch runs this check not only at timer interrupts, but at all (device) interrupts. The effect is that: 1) the deferred timers/range timers get delayed less 2) the range timers cause less wakeups by themselves because the percentage of hitchhiking on existing wakeup events goes up. I've verified the working of the patch using "perf timechart", the original exposed bug is gone with this patch. Frans also reported success - the latencies are now down in the expected ~10 msec range. Signed-off-by: Arjan van de Ven Tested-by: Frans Pop Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: <20091008064041.67219b13@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq.c | 2 ++ arch/x86/kernel/smp.c | 1 + 2 files changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 74656d1d4e30..391206199515 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -244,6 +244,7 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) __func__, smp_processor_id(), vector, irq); } + run_local_timers(); irq_exit(); set_irq_regs(old_regs); @@ -268,6 +269,7 @@ void smp_generic_interrupt(struct pt_regs *regs) if (generic_interrupt_extension) generic_interrupt_extension(); + run_local_timers(); irq_exit(); set_irq_regs(old_regs); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index ec1de97600e7..d915d956e66d 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -198,6 +198,7 @@ void smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); inc_irq_stat(irq_resched_count); + run_local_timers(); /* * KVM uses this interrupt to force a cpu out of guest mode */ -- cgit v1.2.2 From d0153ca35d344d9b640dc305031b0703ba3f30f0 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Tue, 29 Sep 2009 10:25:24 -0700 Subject: x86, vmi: Mark VMI deprecated and schedule it for removal Add text in feature-removal.txt indicating that VMI will be removed in the 2.6.37 timeframe. Signed-off-by: Alok N Kataria Acked-by: Chris Wright LKML-Reference: <1254193238.13456.48.camel@ank32.eng.vmware.com> [ removed a bogus Kconfig change, marked (DEPRECATED) in Kconfig ] Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 11 ++++++++++- arch/x86/kernel/vmi_32.c | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c876bace8fdc..07e01149e3bf 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -491,7 +491,7 @@ if PARAVIRT_GUEST source "arch/x86/xen/Kconfig" config VMI - bool "VMI Guest support" + bool "VMI Guest support (DEPRECATED)" select PARAVIRT depends on X86_32 ---help--- @@ -500,6 +500,15 @@ config VMI at the moment), by linking the kernel to a GPL-ed ROM module provided by the hypervisor. + As of September 2009, VMware has started a phased retirement + of this feature from VMware's products. Please see + feature-removal-schedule.txt for details. If you are + planning to enable this option, please note that you cannot + live migrate a VMI enabled VM to a future VMware product, + which doesn't support VMI. So if you expect your kernel to + seamlessly migrate to newer VMware products, keep this + disabled. + config KVM_CLOCK bool "KVM paravirtualized clock" select PARAVIRT diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 31e6f6cfe53e..d430e4c30193 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -648,7 +648,7 @@ static inline int __init activate_vmi(void) pv_info.paravirt_enabled = 1; pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; - pv_info.name = "vmi"; + pv_info.name = "vmi [deprecated]"; pv_init_ops.patch = vmi_patch; -- cgit v1.2.2 From e7ab0f7b50bc4688fb5cf65de5d42e3b882fb8d1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 9 Oct 2009 15:58:20 +0200 Subject: Revert "x86, timers: Check for pending timers after (device) interrupts" This reverts commit 9bcbdd9c58617f1301dd4f17c738bb9bc73aca70. The real bug producing LatencyTop latencies has been fixed in: f5dc375: sched: Update the clock of runqueue select_task_rq() selected And the commit being reverted here triggers local timer processing from every device IRQ. If device IRQs come in at a high frequency, this could cause a performance regression. The commit being reverted here purely 'fixed' the reported latency as a side effect, because CPUs were being moved out of idle more often. Acked-by: Peter Zijlstra Cc: Arjan van de Ven Cc: Frans Pop Cc: Linus Torvalds Cc: Mike Galbraith Cc: Thomas Gleixner LKML-Reference: <20091008064041.67219b13@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq.c | 2 -- arch/x86/kernel/smp.c | 1 - 2 files changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 391206199515..74656d1d4e30 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -244,7 +244,6 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) __func__, smp_processor_id(), vector, irq); } - run_local_timers(); irq_exit(); set_irq_regs(old_regs); @@ -269,7 +268,6 @@ void smp_generic_interrupt(struct pt_regs *regs) if (generic_interrupt_extension) generic_interrupt_extension(); - run_local_timers(); irq_exit(); set_irq_regs(old_regs); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index d915d956e66d..ec1de97600e7 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -198,7 +198,6 @@ void smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); inc_irq_stat(irq_resched_count); - run_local_timers(); /* * KVM uses this interrupt to force a cpu out of guest mode */ -- cgit v1.2.2 From c5cca146aa03e1f60fb179df65f0dbaf17bc64ed Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 18:31:20 +0200 Subject: x86/amd-iommu: Workaround for erratum 63 There is an erratum for IOMMU hardware which documents undefined behavior when forwarding SMI requests from peripherals and the DTE of that peripheral has a sysmgt value of 01b. This problem caused weird IO_PAGE_FAULTS in my case. This patch implements the suggested workaround for that erratum into the AMD IOMMU driver. The erratum is documented with number 63. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu.h | 1 + arch/x86/kernel/amd_iommu.c | 2 ++ arch/x86/kernel/amd_iommu_init.c | 22 ++++++++++++++++++++++ 3 files changed, 25 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index bdf96f119f06..9dbd4030f0cd 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -30,6 +30,7 @@ extern irqreturn_t amd_iommu_int_handler(int irq, void *data); extern void amd_iommu_flush_all_domains(void); extern void amd_iommu_flush_all_devices(void); extern void amd_iommu_shutdown(void); +extern void amd_iommu_apply_erratum_63(u16 devid); #else static inline int amd_iommu_init(void) { return -ENODEV; } static inline void amd_iommu_detect(void) { } diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 6c99f5037801..f95dfe526444 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1114,6 +1114,8 @@ static void __detach_device(struct protection_domain *domain, u16 devid) amd_iommu_dev_table[devid].data[1] = 0; amd_iommu_dev_table[devid].data[2] = 0; + amd_iommu_apply_erratum_63(devid); + /* decrease reference counter */ domain->dev_cnt -= 1; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c1b17e97252e..498c8c79aaa5 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -509,6 +509,26 @@ static void set_dev_entry_bit(u16 devid, u8 bit) amd_iommu_dev_table[devid].data[i] |= (1 << _bit); } +static int get_dev_entry_bit(u16 devid, u8 bit) +{ + int i = (bit >> 5) & 0x07; + int _bit = bit & 0x1f; + + return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit; +} + + +void amd_iommu_apply_erratum_63(u16 devid) +{ + int sysmgt; + + sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) | + (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1); + + if (sysmgt == 0x01) + set_dev_entry_bit(devid, DEV_ENTRY_IW); +} + /* Writes the specific IOMMU for a device into the rlookup table */ static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) { @@ -537,6 +557,8 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, if (flags & ACPI_DEVFLAG_LINT1) set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); + amd_iommu_apply_erratum_63(devid); + set_iommu_for_device(iommu, devid); } -- cgit v1.2.2 From d43c36dc6b357fa1806800f18aa30123c747a6d1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 7 Oct 2009 17:09:06 +0400 Subject: headers: remove sched.h from interrupt.h After m68k's task_thread_info() doesn't refer to current, it's possible to remove sched.h from interrupt.h and not break m68k! Many thanks to Heiko Carstens for allowing this. Signed-off-by: Alexey Dobriyan --- arch/x86/kernel/cpu/mcheck/mce_intel.c | 1 + arch/x86/kernel/pci-gart_64.c | 1 + arch/x86/kernel/reboot.c | 1 + 3 files changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 889f665fe93d..7c785634af2b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 98a827ee9ed7..a7f1b64f86e0 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 27349f92a6d7..a1a3cdda06e1 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.2 From 15b812f1d0a5ca8f5efe7f5882f468af10682ca8 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 11 Oct 2009 14:17:16 -0700 Subject: pci: increase alignment to make more space for hidden code As reported in http://bugzilla.kernel.org/show_bug.cgi?id=13940 on some system when acpi are enabled, acpi clears some BAR for some devices without reason, and kernel will need to allocate devices for them. It then apparently hits some undocumented resource conflict, resulting in non-working devices. Try to increase alignment to get more safe range for unassigned devices. Signed-off-by: Yinghai Lu Signed-off-by: Linus Torvalds --- arch/x86/kernel/e820.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 85419bb7d4ab..d17d482a04f4 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1378,8 +1378,8 @@ static unsigned long ram_alignment(resource_size_t pos) if (mb < 16) return 1024*1024; - /* To 32MB for anything above that */ - return 32*1024*1024; + /* To 64MB for anything above that */ + return 64*1024*1024; } #define MAX_RESOURCE_SIZE ((resource_size_t)-1) -- cgit v1.2.2 From 9a821b231644028f8e2a853eb33d1184e925b183 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 12 Oct 2009 12:59:29 +0100 Subject: x86: Move pci_iommu_init to rootfs_initcall() We want this to happen after the PCI quirks, which are now running at the very end of the fs_initcalls. This works around the BIOS problems which were originally addressed by commit db8be50c4307dac2b37305fc59c8dc0f978d09ea ('USB: Work around BIOS bugs by quiescing USB controllers earlier'), which was reverted in commit d93a8f829fe1d2f3002f2c6ddb553d12db420412. Signed-off-by: David Woodhouse --- arch/x86/kernel/pci-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 64b838eac18c..e0d9199d0eb9 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -311,7 +311,7 @@ void pci_iommu_shutdown(void) amd_iommu_shutdown(); } /* Must execute after PCI subsystem */ -fs_initcall(pci_iommu_init); +rootfs_initcall(pci_iommu_init); #ifdef CONFIG_PCI /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ -- cgit v1.2.2 From 7a4b7e5e741fe0a72a517b0367a2659aa53f7c44 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 6 Oct 2009 16:32:43 +0100 Subject: x86: Fix Suspend to RAM freeze on Acer Aspire 1511Lmi laptop Move the trampoline and accessors back out of .cpuinit.* for the case of 64-bits+ACPI_SLEEP. This solves s2ram hangs reported in: http://bugzilla.kernel.org/show_bug.cgi?id=14279 Reported-and-bisected-by: Christian Casteyde Signed-off-by: Jan Beulich Cc: Cc: "Andrew Morton" Cc: "Rafael J. Wysocki" Signed-off-by: Ingo Molnar --- arch/x86/kernel/trampoline.c | 12 ++++++++++-- arch/x86/kernel/trampoline_64.S | 4 ++++ 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index 699f7eeb896a..cd022121cab6 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c @@ -3,8 +3,16 @@ #include #include +#if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP) +#define __trampinit +#define __trampinitdata +#else +#define __trampinit __cpuinit +#define __trampinitdata __cpuinitdata +#endif + /* ready for x86_64 and x86 */ -unsigned char *__cpuinitdata trampoline_base = __va(TRAMPOLINE_BASE); +unsigned char *__trampinitdata trampoline_base = __va(TRAMPOLINE_BASE); void __init reserve_trampoline_memory(void) { @@ -26,7 +34,7 @@ void __init reserve_trampoline_memory(void) * bootstrap into the page concerned. The caller * has made sure it's suitably aligned. */ -unsigned long __cpuinit setup_trampoline(void) +unsigned long __trampinit setup_trampoline(void) { memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); return virt_to_phys(trampoline_base); diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 596d54c660a5..3af2dff58b21 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S @@ -32,8 +32,12 @@ #include #include +#ifdef CONFIG_ACPI_SLEEP +.section .rodata, "a", @progbits +#else /* We can free up the trampoline after bootup if cpu hotplug is not supported. */ __CPUINITRODATA +#endif .code16 ENTRY(trampoline_data) -- cgit v1.2.2 From d1705c558c95418378b11a0be963fe1b3e2fa381 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 12 Oct 2009 11:32:31 -0700 Subject: x86: fix kernel panic on 32 bits when profiling Latest kernel has a kernel panic in booting on i386 machine when profile=2 setting in cmdline. It is due to 'sp' being incorrect in profile_pc(). BUG: unable to handle kernel NULL pointer dereference at 00000246 IP: [] profile_pc+0x2a/0x48 *pde = 00000000 Oops: 0000 [#1] SMP This differs from the original version by Alex Shi in that we use the kernel_stack_pointer() inline already defined in for this purpose, instead of #ifdef. Originally-by: Alex Shi Cc: "Chen, Tim C" Cc: "Rafael J. Wysocki" Cc: Andrew Morton Signed-off-by: H. Peter Anvin --- arch/x86/kernel/time.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index dcb00d278512..be2573448ed9 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -38,7 +38,8 @@ unsigned long profile_pc(struct pt_regs *regs) #ifdef CONFIG_FRAME_POINTER return *(unsigned long *)(regs->bp + sizeof(long)); #else - unsigned long *sp = (unsigned long *)regs->sp; + unsigned long *sp = + (unsigned long *)kernel_stack_pointer(regs); /* * Return address is either directly at stack pointer * or above a saved flags. Eflags has bits 22-31 zero, -- cgit v1.2.2 From 71999d9862e667f1fd14f8fbfa0cce6d855bad3f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 12 Oct 2009 16:32:43 -0700 Subject: x86/paravirt: Use normal calling sequences for irq enable/disable Bastian Blank reported a boot crash with stackprotector enabled, and debugged it back to edx register corruption. For historical reasons irq enable/disable/save/restore had special calling sequences to make them more efficient. With the more recent introduction of higher-level and more general optimisations this is no longer necessary so we can just use the normal PVOP_ macros. This fixes some residual bugs in the old implementations which left edx liable to inadvertent clobbering. Also, fix some bugs in __PVOP_VCALLEESAVE which were revealed by actual use. Reported-by: Bastian Blank Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel Cc: Xen-devel LKML-Reference: <4AD3BC9B.7040501@goop.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/paravirt.h | 28 ++++------------------------ arch/x86/include/asm/paravirt_types.h | 10 ++++++---- 2 files changed, 10 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 8aebcc41041d..efb38994859c 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -840,42 +840,22 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) static inline unsigned long __raw_local_save_flags(void) { - unsigned long f; - - asm volatile(paravirt_alt(PARAVIRT_CALL) - : "=a"(f) - : paravirt_type(pv_irq_ops.save_fl), - paravirt_clobber(CLBR_EAX) - : "memory", "cc"); - return f; + return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); } static inline void raw_local_irq_restore(unsigned long f) { - asm volatile(paravirt_alt(PARAVIRT_CALL) - : "=a"(f) - : PV_FLAGS_ARG(f), - paravirt_type(pv_irq_ops.restore_fl), - paravirt_clobber(CLBR_EAX) - : "memory", "cc"); + PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); } static inline void raw_local_irq_disable(void) { - asm volatile(paravirt_alt(PARAVIRT_CALL) - : - : paravirt_type(pv_irq_ops.irq_disable), - paravirt_clobber(CLBR_EAX) - : "memory", "eax", "cc"); + PVOP_VCALLEE0(pv_irq_ops.irq_disable); } static inline void raw_local_irq_enable(void) { - asm volatile(paravirt_alt(PARAVIRT_CALL) - : - : paravirt_type(pv_irq_ops.irq_enable), - paravirt_clobber(CLBR_EAX) - : "memory", "eax", "cc"); + PVOP_VCALLEE0(pv_irq_ops.irq_enable); } static inline unsigned long __raw_local_irq_save(void) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index dd0f5b32489d..9357473c8da0 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -494,10 +494,11 @@ int paravirt_disable_iospace(void); #define EXTRA_CLOBBERS #define VEXTRA_CLOBBERS #else /* CONFIG_X86_64 */ +/* [re]ax isn't an arg, but the return val */ #define PVOP_VCALL_ARGS \ unsigned long __edi = __edi, __esi = __esi, \ - __edx = __edx, __ecx = __ecx -#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax + __edx = __edx, __ecx = __ecx, __eax = __eax +#define PVOP_CALL_ARGS PVOP_VCALL_ARGS #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) #define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) @@ -509,6 +510,7 @@ int paravirt_disable_iospace(void); "=c" (__ecx) #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) +/* void functions are still allowed [re]ax for scratch */ #define PVOP_VCALLEE_CLOBBERS "=a" (__eax) #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS @@ -583,8 +585,8 @@ int paravirt_disable_iospace(void); VEXTRA_CLOBBERS, \ pre, post, ##__VA_ARGS__) -#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ - ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ +#define __PVOP_VCALLEESAVE(op, pre, post, ...) \ + ____PVOP_VCALL(op.func, CLBR_RET_REG, \ PVOP_VCALLEE_CLOBBERS, , \ pre, post, ##__VA_ARGS__) -- cgit v1.2.2 From 89ccf465abe6b20d804a63ae20307970c441369d Mon Sep 17 00:00:00 2001 From: Li Hong Date: Wed, 14 Oct 2009 18:50:39 +0800 Subject: x86, perf_event: Rename 'performance counter interrupt' In 'cdd6c482c9ff9c55475ee7392ec8f672eddb7be6', we renamed Performance Counters -> Performance Events. The name showed up in /proc/interrupts also needs a change. I use PMI (Performance monitoring interrupt) here, since it is the official name used in Intel's documents. Signed-off-by: Li Hong Cc: Andrew Morton Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091014105039.GA22670@uhli> Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 391206199515..86fb2c8e065a 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -63,10 +63,10 @@ static int show_other_interrupts(struct seq_file *p, int prec) for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); seq_printf(p, " Spurious interrupts\n"); - seq_printf(p, "%*s: ", prec, "CNT"); + seq_printf(p, "%*s: ", prec, "PMI"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); - seq_printf(p, " Performance counter interrupts\n"); + seq_printf(p, " Performance monitoring interrupts\n"); seq_printf(p, "%*s: ", prec, "PND"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); -- cgit v1.2.2 From 799e2205ec65e174f752b558c62a92c4752df313 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Oct 2009 12:16:40 +0200 Subject: sched: Disable SD_PREFER_LOCAL for MC/CPU domains Yanmin reported that both tbench and hackbench were significantly hurt by trying to keep tasks local on these domains, esp on small cache machines. So disable it in order to promote spreading outside of the cache domains. Reported-by: "Zhang, Yanmin" Signed-off-by: Peter Zijlstra CC: Mike Galbraith LKML-Reference: <1255083400.8802.15.camel@laptop> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/topology.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 25a92842dd99..d823c245f63b 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -143,6 +143,7 @@ extern unsigned long node_remap_size[]; | 1*SD_BALANCE_FORK \ | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ + | 1*SD_PREFER_LOCAL \ | 0*SD_SHARE_CPUPOWER \ | 0*SD_POWERSAVINGS_BALANCE \ | 0*SD_SHARE_PKG_RESOURCES \ -- cgit v1.2.2 From e9a63a4e559fbdc522072281d05e6b13c1022f4b Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 14 Oct 2009 14:16:38 -0700 Subject: x86: linker script syntax nits The linker scripts grew some use of weirdly wrong linker script syntax. It happens to work, but it's not what the syntax is documented to be. Clean it up to use the official syntax. Signed-off-by: Roland McGrath CC: Ian Lance Taylor --- arch/x86/kernel/acpi/realmode/wakeup.lds.S | 4 ++-- arch/x86/kernel/vmlinux.lds.S | 17 ++++++++--------- 2 files changed, 10 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S index 7da00b799cda..0e50e1e5c573 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S @@ -56,6 +56,6 @@ SECTIONS /DISCARD/ : { *(.note*) } - - . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); } + +ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 92929fb3f9fa..8d6001ad8d8d 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -305,8 +305,8 @@ SECTIONS #ifdef CONFIG_X86_32 -. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE"); +ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE"); #else /* * Per-cpu symbols which need to be offset from __per_cpu_load @@ -319,12 +319,12 @@ INIT_PER_CPU(irq_stack_union); /* * Build-time check on the image size: */ -. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE"); +ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE"); #ifdef CONFIG_SMP -. = ASSERT((per_cpu__irq_stack_union == 0), - "irq_stack_union is not at start of per-cpu area"); +ASSERT((per_cpu__irq_stack_union == 0), + "irq_stack_union is not at start of per-cpu area"); #endif #endif /* CONFIG_X86_32 */ @@ -332,7 +332,6 @@ INIT_PER_CPU(irq_stack_union); #ifdef CONFIG_KEXEC #include -. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, - "kexec control code size is too big"); +ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, + "kexec control code size is too big"); #endif - -- cgit v1.2.2 From db8590f5043f3436a65b24155a3a7af2604df876 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Oct 2009 08:08:12 +0200 Subject: Revert "x86: linker script syntax nits" This reverts commit e9a63a4e559fbdc522072281d05e6b13c1022f4b. This breaks older binutils, where sink-less asserts are broken. See this commit for further details: d2ba8b2: x86: Fix assert syntax in vmlinux.lds.S Acked-by: "H. Peter Anvin" Acked-by: Sam Ravnborg Cc: Linus Torvalds LKML-Reference: <4AD6523D.5030909@zytor.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/realmode/wakeup.lds.S | 4 ++-- arch/x86/kernel/vmlinux.lds.S | 17 +++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S index 0e50e1e5c573..7da00b799cda 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S @@ -56,6 +56,6 @@ SECTIONS /DISCARD/ : { *(.note*) } -} -ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); + . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); +} diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 8d6001ad8d8d..92929fb3f9fa 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -305,8 +305,8 @@ SECTIONS #ifdef CONFIG_X86_32 -ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE"); +. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE"); #else /* * Per-cpu symbols which need to be offset from __per_cpu_load @@ -319,12 +319,12 @@ INIT_PER_CPU(irq_stack_union); /* * Build-time check on the image size: */ -ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE"); +. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE"); #ifdef CONFIG_SMP -ASSERT((per_cpu__irq_stack_union == 0), - "irq_stack_union is not at start of per-cpu area"); +. = ASSERT((per_cpu__irq_stack_union == 0), + "irq_stack_union is not at start of per-cpu area"); #endif #endif /* CONFIG_X86_32 */ @@ -332,6 +332,7 @@ ASSERT((per_cpu__irq_stack_union == 0), #ifdef CONFIG_KEXEC #include -ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, - "kexec control code size is too big"); +. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, + "kexec control code size is too big"); #endif + -- cgit v1.2.2 From a5912f6b3e20c137172460e6d4dd180866c00963 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 16 Oct 2009 07:18:46 +0200 Subject: x86: Document linker script ASSERT() quirk Older binutils breaks if ASSERT() is used without a sink for the output. For example 2.14.90.0.6 is known to be broken, the link fails with: LD .tmp_vmlinux1 ld:arch/x86/kernel/vmlinux.lds:678: parse error Document this quirk in all three files that use it. See: http://marc.info/?l=linux-kbuild&m=124930110427870&w=2 See[2]: d2ba8b2 ("x86: Fix assert syntax in vmlinux.lds.S") Cc: Linus Torvalds Cc: Roland McGrath Cc: "H. Peter Anvin" Cc: Sam Ravnborg LKML-Reference: <4AD6523D.5030909@zytor.com> Signed-off-by: Ingo Molnar --- arch/x86/boot/setup.ld | 3 +++ arch/x86/kernel/acpi/realmode/wakeup.lds.S | 3 +++ arch/x86/kernel/vmlinux.lds.S | 3 +++ 3 files changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index 0f6ec455a2b1..03c0683636b6 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld @@ -53,6 +53,9 @@ SECTIONS /DISCARD/ : { *(.note*) } + /* + * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: + */ . = ASSERT(_end <= 0x8000, "Setup too big!"); . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!"); /* Necessary for the very-old-loader check to work... */ diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S index 7da00b799cda..060fff8f5c5b 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S @@ -57,5 +57,8 @@ SECTIONS *(.note*) } + /* + * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: + */ . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 92929fb3f9fa..3c68fe2d46cf 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -305,6 +305,9 @@ SECTIONS #ifdef CONFIG_X86_32 +/* + * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: + */ . = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), "kernel image bigger than KERNEL_IMAGE_SIZE"); #else -- cgit v1.2.2 From 036ed8ba61b72c19dc5759446d4fe0844aa88255 Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Thu, 15 Oct 2009 17:40:00 -0500 Subject: x86, UV: Fix information in __uv_hub_info structure A few parts of the uv_hub_info structure are initialized incorrectly. - n_val is being loaded with m_val. - gpa_mask is initialized with a bytes instead of an unsigned long. - Handle the case where none of the alias registers are used. Lastly I converted the bau over to using the uv_hub_info->m_val which is the correct value. Without this patch, booting a large configuration hits a problem where the upper bits of the gnode affect the pnode and the bau will not operate. Signed-off-by: Robin Holt Acked-by: Jack Steiner Cc: Cliff Whickman Cc: stable@kernel.org LKML-Reference: <20091015224946.396355000@alcatraz.americas.sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 8 ++++---- arch/x86/kernel/tlb_uv.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f5f5886a6b53..326c25477d3d 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -352,14 +352,14 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) { alias.v = uv_read_local_mmr(redir_addrs[i].alias); - if (alias.s.base == 0) { + if (alias.s.enable && alias.s.base == 0) { *size = (1UL << alias.s.m_alias); redirect.v = uv_read_local_mmr(redir_addrs[i].redirect); *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT; return; } } - BUG(); + *base = *size = 0; } enum map_type {map_wb, map_uc}; @@ -619,12 +619,12 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; uv_cpu_hub_info(cpu)->m_val = m_val; - uv_cpu_hub_info(cpu)->n_val = m_val; + uv_cpu_hub_info(cpu)->n_val = n_val; uv_cpu_hub_info(cpu)->numa_blade_id = blade; uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; uv_cpu_hub_info(cpu)->pnode = pnode; uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; - uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; + uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1; uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 503c1f2e8835..f99fb6acfe34 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -843,8 +843,8 @@ static int __init uv_bau_init(void) GFP_KERNEL, cpu_to_node(cur_cpu)); uv_bau_retry_limit = 1; - uv_nshift = uv_hub_info->n_val; - uv_mmask = (1UL << uv_hub_info->n_val) - 1; + uv_nshift = uv_hub_info->m_val; + uv_mmask = (1UL << uv_hub_info->m_val) - 1; nblades = uv_num_possible_blades(); uv_bau_table_bases = (struct bau_control **) -- cgit v1.2.2 From 93ae5012a79b11e7fc855b52c7ce1e16fe1540b0 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 15 Oct 2009 14:21:14 -0700 Subject: x86: Don't print number of MCE banks for every CPU The MCE initialization code explicitly says it doesn't handle asymmetric configurations where different CPUs support different numbers of MCE banks, and it prints a big warning in that case. Therefore, printing the "mce: CPU supports MCE banks" message into the kernel log for every CPU is pure redundancy that clutters the log significantly for systems with lots of CPUs. Signed-off-by: Roland Dreier LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b1598a9436d0..721a77ca8115 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1214,7 +1214,8 @@ static int __cpuinit mce_cap_init(void) rdmsrl(MSR_IA32_MCG_CAP, cap); b = cap & MCG_BANKCNT_MASK; - printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); + if (!banks) + printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); if (b > MAX_NR_BANKS) { printk(KERN_WARNING -- cgit v1.2.2 From 1d21e6e3ffad2939f9d8179817c6f9bc3b811b68 Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Fri, 16 Oct 2009 06:29:20 -0500 Subject: x86, UV: Fix and clean up bau code to use uv_gpa_to_pnode() Create an inline function to extract the pnode from a global physical address and then convert the broadcast assist unit to use the newly created uv_gpa_to_pnode function. The open-coded code was wrong as well - it might explain a few of our unexplained bau hangs. Signed-off-by: Robin Holt Acked-by: Cliff Whickman Cc: linux-mm@kvack.org Cc: Jack Steiner LKML-Reference: <20091016112920.GZ8903@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_hub.h | 16 +++++++++++++++- arch/x86/kernel/tlb_uv.c | 7 ++----- 2 files changed, 17 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 04eb6c958b9d..94908a08020a 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -114,7 +114,7 @@ /* * The largest possible NASID of a C or M brick (+ 2) */ -#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2) +#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_BLADES * 2) struct uv_scir_s { struct timer_list timer; @@ -230,6 +230,20 @@ static inline unsigned long uv_gpa(void *v) return uv_soc_phys_ram_to_gpa(__pa(v)); } +/* gnode -> pnode */ +static inline unsigned long uv_gpa_to_gnode(unsigned long gpa) +{ + return gpa >> uv_hub_info->m_val; +} + +/* gpa -> pnode */ +static inline int uv_gpa_to_pnode(unsigned long gpa) +{ + unsigned long n_mask = (1UL << uv_hub_info->n_val) - 1; + + return uv_gpa_to_gnode(gpa) & n_mask; +} + /* pnode, offset --> socket virtual */ static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) { diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index f99fb6acfe34..1740c85e24bb 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -23,8 +23,6 @@ static struct bau_control **uv_bau_table_bases __read_mostly; static int uv_bau_retry_limit __read_mostly; -/* position of pnode (which is nasid>>1): */ -static int uv_nshift __read_mostly; /* base pnode in this partition */ static int uv_partition_base_pnode __read_mostly; @@ -723,7 +721,7 @@ uv_activation_descriptor_init(int node, int pnode) BUG_ON(!adp); pa = uv_gpa(adp); /* need the real nasid*/ - n = pa >> uv_nshift; + n = uv_gpa_to_pnode(pa); m = pa & uv_mmask; uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, @@ -778,7 +776,7 @@ uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp) * need the pnode of where the memory was really allocated */ pa = uv_gpa(pqp); - pn = pa >> uv_nshift; + pn = uv_gpa_to_pnode(pa); uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | @@ -843,7 +841,6 @@ static int __init uv_bau_init(void) GFP_KERNEL, cpu_to_node(cur_cpu)); uv_bau_retry_limit = 1; - uv_nshift = uv_hub_info->m_val; uv_mmask = (1UL << uv_hub_info->m_val) - 1; nblades = uv_num_possible_blades(); -- cgit v1.2.2 From ace1546487a0fe4634e3251067f8a32cb2cdc099 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 8 Oct 2009 10:55:03 -0300 Subject: KVM: use proper hrtimer function to retrieve expiration time hrtimer->base can be temporarily NULL due to racing hrtimer_start. See switch_hrtimer_base/lock_hrtimer_base. Use hrtimer_get_remaining which is robust against it. CC: stable@kernel.org Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/i8254.c | 2 +- arch/x86/kvm/lapic.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 82ad523b4901..144e7f60b5e2 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -116,7 +116,7 @@ static s64 __kpit_elapsed(struct kvm *kvm) * itself with the initial count and continues counting * from there. */ - remaining = hrtimer_expires_remaining(&ps->pit_timer.timer); + remaining = hrtimer_get_remaining(&ps->pit_timer.timer); elapsed = ps->pit_timer.period - ktime_to_ns(remaining); elapsed = mod_64(elapsed, ps->pit_timer.period); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 7024224f0fc8..23c217692ea9 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -521,7 +521,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic) if (apic_get_reg(apic, APIC_TMICT) == 0) return 0; - remaining = hrtimer_expires_remaining(&apic->lapic_timer.timer); + remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); if (ktime_to_ns(remaining) < 0) remaining = ktime_set(0, 0); -- cgit v1.2.2 From 8a8365c560b8b631e0a2d1ac032fbca66a9645bc Mon Sep 17 00:00:00 2001 From: Frederik Deweerdt Date: Fri, 9 Oct 2009 11:42:56 +0000 Subject: KVM: MMU: fix pointer cast MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a 32 bits compile, commit 3da0dd433dc399a8c0124d0614d82a09b6a49bce introduced the following warnings: arch/x86/kvm/mmu.c: In function ‘kvm_set_pte_rmapp’: arch/x86/kvm/mmu.c:770: warning: cast to pointer from integer of different size arch/x86/kvm/mmu.c: In function ‘kvm_set_spte_hva’: arch/x86/kvm/mmu.c:849: warning: cast from pointer to integer of different size The following patch uses 'unsigned long' instead of u64 to match the pointer size on both arches. Signed-off-by: Frederik Deweerdt Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 685a4ffac8e6..818b92ad82cf 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -748,7 +748,8 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) return write_protected; } -static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) +static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, + unsigned long data) { u64 *spte; int need_tlb_flush = 0; @@ -763,7 +764,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) return need_tlb_flush; } -static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) +static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, + unsigned long data) { int need_flush = 0; u64 *spte, new_spte; @@ -799,9 +801,10 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) return 0; } -static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, u64 data, +static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, + unsigned long data, int (*handler)(struct kvm *kvm, unsigned long *rmapp, - u64 data)) + unsigned long data)) { int i, j; int retval = 0; @@ -846,10 +849,11 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) { - kvm_handle_hva(kvm, hva, (u64)&pte, kvm_set_pte_rmapp); + kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp); } -static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) +static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, + unsigned long data) { u64 *spte; int young = 0; -- cgit v1.2.2 From 13b79b971564ddd0f14e706592472adc8199e912 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Tue, 20 Oct 2009 16:20:47 +0900 Subject: crypto: aesni-intel - Fix irq_fpu_usable usage When renaming kernel_fpu_using to irq_fpu_usable, the semantics of the function is changed too, from mesuring whether kernel is using FPU, that is, the FPU is NOT available, to measuring whether FPU is usable, that is, the FPU is available. But the usage of irq_fpu_usable in aesni-intel_glue.c is not changed accordingly. This patch fixes this. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 585edebe12cf..49c552c060e9 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -82,7 +82,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx, return -EINVAL; } - if (irq_fpu_usable()) + if (!irq_fpu_usable()) err = crypto_aes_expand_key(ctx, in_key, key_len); else { kernel_fpu_begin(); @@ -103,7 +103,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); - if (irq_fpu_usable()) + if (!irq_fpu_usable()) crypto_aes_encrypt_x86(ctx, dst, src); else { kernel_fpu_begin(); @@ -116,7 +116,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); - if (irq_fpu_usable()) + if (!irq_fpu_usable()) crypto_aes_decrypt_x86(ctx, dst, src); else { kernel_fpu_begin(); @@ -342,7 +342,7 @@ static int ablk_encrypt(struct ablkcipher_request *req) struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); - if (irq_fpu_usable()) { + if (!irq_fpu_usable()) { struct ablkcipher_request *cryptd_req = ablkcipher_request_ctx(req); memcpy(cryptd_req, req, sizeof(*req)); @@ -363,7 +363,7 @@ static int ablk_decrypt(struct ablkcipher_request *req) struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); - if (irq_fpu_usable()) { + if (!irq_fpu_usable()) { struct ablkcipher_request *cryptd_req = ablkcipher_request_ctx(req); memcpy(cryptd_req, req, sizeof(*req)); -- cgit v1.2.2 From 02dd0a0613e0d84c7dd8315e3fe6204d005b7c79 Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Tue, 20 Oct 2009 14:36:15 -0500 Subject: x86, UV: Set DELIVERY_MODE=4 for vector=NMI_VECTOR in uv_hub_send_ipi() When sending a NMI_VECTOR IPI using the UV_HUB_IPI_INT register, we need to ensure the delivery mode field of that register has NMI delivery selected. This makes those IPIs true NMIs, instead of flat IPIs. It matters to reboot sequences and KGDB, both of which use NMI IPIs. Signed-off-by: Robin Holt Acked-by: Jack Steiner Cc: Martin Hicks Cc: LKML-Reference: <20091020193620.877322000@alcatraz.americas.sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_hub.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 94908a08020a..d1414af98559 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -19,6 +19,8 @@ #include #include #include +#include +#include /* @@ -435,9 +437,14 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) { unsigned long val; + unsigned long dmode = dest_Fixed; + + if (vector == NMI_VECTOR) + dmode = dest_NMI; val = (1UL << UVH_IPI_INT_SEND_SHFT) | ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | + (dmode << UVH_IPI_INT_DELIVERY_MODE_SHFT) | (vector << UVH_IPI_INT_VECTOR_SHFT); uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } -- cgit v1.2.2 From 14a3f40aafacde1dfd6912327ae14df4baf10304 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 23 Oct 2009 07:31:01 -0700 Subject: x86: Remove STACKPROTECTOR_ALL STACKPROTECTOR_ALL has a really high overhead (runtime and stack footprint) and is not really worth it protection wise (the normal STACKPROTECTOR is in effect for all functions with buffers already), so lets just remove the option entirely. Reported-by: Dave Jones Reported-by: Chuck Ebbert Signed-off-by: Arjan van de Ven Cc: Eric Sandeen LKML-Reference: <20091023073101.3dce4ebb@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ---- arch/x86/Makefile | 1 - 2 files changed, 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 07e01149e3bf..72ace9515a07 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1443,12 +1443,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. -config CC_STACKPROTECTOR_ALL - bool - config CC_STACKPROTECTOR bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" - select CC_STACKPROTECTOR_ALL ---help--- This option turns on the -fstack-protector GCC feature. This feature puts, at the beginning of functions, a canary value on diff --git a/arch/x86/Makefile b/arch/x86/Makefile index a012ee8ef803..d2d24c9ee64d 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -76,7 +76,6 @@ ifdef CONFIG_CC_STACKPROTECTOR cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(biarch)),y) stackp-y := -fstack-protector - stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all KBUILD_CFLAGS += $(stackp-y) else $(warning stack protector enabled but no compiler support) -- cgit v1.2.2 From ae1b22f6e46c03cede7cea234d0bf2253b4261cf Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 26 Oct 2009 14:26:04 +1030 Subject: x86: Side-step lguest problem by only building cmpxchg8b_emu for pre-Pentium Commit 79e1dd05d1a22 "x86: Provide an alternative() based cmpxchg64()" broke lguest, even on systems which have cmpxchg8b support. The emulation code gets used until alternatives get run, but it contains native instructions, not their paravirt alternatives. The simplest fix is to turn this code off except for 386 and 486 builds. Reported-by: Johannes Stezenbach Signed-off-by: Rusty Russell Acked-by: H. Peter Anvin Cc: lguest@ozlabs.org Cc: Arjan van de Ven Cc: Jeremy Fitzhardinge Cc: Linus Torvalds LKML-Reference: <200910261426.05769.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.cpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index f2824fb8c79c..2649840d888f 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -400,7 +400,7 @@ config X86_TSC config X86_CMPXCHG64 def_bool y - depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM + depends on !M386 && !M486 # this should be set for all -march=.. options where the compiler # generates cmov. -- cgit v1.2.2 From 72ed7de74e8f0fad0d8e567ae1f987b740accb3f Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 26 Oct 2009 11:11:43 +0100 Subject: x86: crash_dump: Fix non-pae kdump kernel memory accesses Non-PAE 32-bit dump kernels may wrap an address around 4G and poke unwanted space. ptes there are 32-bit long, and since pfn << PAGE_SIZE may exceed this limit, high pfn bits are cropped and wrong address mapped by kmap_atomic_pfn in copy_oldmem_page. Don't allow this behavior in non-PAE kdump kernels by checking pfns passed into copy_oldmem_page. In the case of failure, userspace process gets EFAULT. [v2] - fix comments - move ifdefs inside the function Signed-off-by: Jiri Slaby Cc: Vivek Goyal Cc: Eric W. Biederman Cc: Simon Horman Cc: Paul Mundt LKML-Reference: <1256551903-30567-1-git-send-email-jirislaby@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/crash_dump_32.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c index f7cdb3b457aa..cd97ce18c29d 100644 --- a/arch/x86/kernel/crash_dump_32.c +++ b/arch/x86/kernel/crash_dump_32.c @@ -16,6 +16,22 @@ static void *kdump_buf_page; /* Stores the physical address of elf header of crash image. */ unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; +static inline bool is_crashed_pfn_valid(unsigned long pfn) +{ +#ifndef CONFIG_X86_PAE + /* + * non-PAE kdump kernel executed from a PAE one will crop high pte + * bits and poke unwanted space counting again from address 0, we + * don't want that. pte must fit into unsigned long. In fact the + * test checks high 12 bits for being zero (pfn will be shifted left + * by PAGE_SHIFT). + */ + return pte_pfn(pfn_pte(pfn, __pgprot(0))) == pfn; +#else + return true; +#endif +} + /** * copy_oldmem_page - copy one page from "oldmem" * @pfn: page frame number to be copied @@ -41,6 +57,9 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, if (!csize) return 0; + if (!is_crashed_pfn_valid(pfn)) + return -EFAULT; + vaddr = kmap_atomic_pfn(pfn, KM_PTE0); if (!userbuf) { -- cgit v1.2.2 From 81766741fe1eee3884219e8daaf03f466f2ed52f Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 26 Oct 2009 15:20:29 +0000 Subject: x86-64: Fix register leak in 32-bit syscall audting Restoring %ebp after the call to audit_syscall_exit() is not only unnecessary (because the register didn't get clobbered), but in the sysenter case wasn't even doing the right thing: It loaded %ebp from a location below the top of stack (RBP < ARGOFFSET), i.e. arbitrary kernel data got passed back to user mode in the register. Signed-off-by: Jan Beulich Acked-by: Roland McGrath Cc: LKML-Reference: <4AE5CC4D020000780001BD13@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32entry.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 1733f9f65e82..581b0568fe19 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -204,7 +204,7 @@ sysexit_from_sys_call: movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ .endm - .macro auditsys_exit exit,ebpsave=RBP + .macro auditsys_exit exit testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) jnz ia32_ret_from_sys_call TRACE_IRQS_ON @@ -217,7 +217,6 @@ sysexit_from_sys_call: call audit_syscall_exit GET_THREAD_INFO(%r10) movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ - movl \ebpsave-ARGOFFSET(%rsp),%ebp /* reload user register value */ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi cli TRACE_IRQS_OFF @@ -351,7 +350,7 @@ cstar_auditsys: jmp cstar_dispatch sysretl_audit: - auditsys_exit sysretl_from_sys_call, RCX /* user %ebp in RCX slot */ + auditsys_exit sysretl_from_sys_call #endif cstar_tracesys: -- cgit v1.2.2 From 772be899bc022ef2b911c3611b487d417e3269c3 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Tue, 20 Oct 2009 12:54:02 +0800 Subject: x86: Make EFI RTC function depend on 32bit again The EFI RTC functions are only available on 32 bit. commit 7bd867df (x86: Move get/set_wallclock to x86_platform_ops) removed the 32bit dependency which leads to boot crashes on 64bit EFI systems. Add the dependency back. Solves: http://bugzilla.kernel.org/show_bug.cgi?id=14466 Tested-by: Matthew Garrett Signed-off-by: Feng Tang LKML-Reference: <20091020125402.028d66d5@feng-desktop> Signed-off-by: Thomas Gleixner --- arch/x86/kernel/efi.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index ad5bd988fb79..cdcfb122f256 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -454,8 +454,10 @@ void __init efi_init(void) if (add_efi_memmap) do_add_efi_memmap(); +#ifdef CONFIG_X86_32 x86_platform.get_wallclock = efi_get_time; x86_platform.set_wallclock = efi_set_rtc_mmss; +#endif /* Setup for EFI runtime service */ reboot_type = BOOT_EFI; -- cgit v1.2.2 From 973df35ed9ff7806403e793a2ad7e9bd4c2fd2a9 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 27 Oct 2009 16:54:19 -0700 Subject: xen: set up mmu_ops before trying to set any ptes xen_setup_stackprotector() ends up trying to set page protections, so we need to have vm_mmu_ops set up before trying to do so. Failing to do so causes an early boot crash. [ Impact: Fix early crash under Xen. ] Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/enlighten.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 3439616d69f1..23a4d80fb39e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1075,6 +1075,8 @@ asmlinkage void __init xen_start_kernel(void) * Set up some pagetable state before starting to set any ptes. */ + xen_init_mmu_ops(); + /* Prevent unwanted bits from being set in PTEs. */ __supported_pte_mask &= ~_PAGE_GLOBAL; if (!xen_initial_domain()) @@ -1099,7 +1101,6 @@ asmlinkage void __init xen_start_kernel(void) */ xen_setup_stackprotector(); - xen_init_mmu_ops(); xen_init_irq_ops(); xen_init_cpuid_mask(); -- cgit v1.2.2 From ca0207114f1708b563f510b7781a360ec5b98359 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 28 Oct 2009 18:02:26 +0100 Subject: x86/amd-iommu: Un__init function required on shutdown The function iommu_feature_disable is required on system shutdown to disable the IOMMU but it is marked as __init. This may result in a panic if the memory is reused. This patch fixes this bug. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 498c8c79aaa5..1e423b2add15 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -240,7 +240,7 @@ static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); } -static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) +static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) { u32 ctrl; -- cgit v1.2.2 From 16121d70fdf9eeb05ead46b241a293156323dbbe Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sun, 1 Nov 2009 19:27:05 -0500 Subject: x86: Fix printk message typo in mtrr cleanup code Trivial typo. Signed-off-by: Dave Jones LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/cleanup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 315738c74aad..73c86db5acbe 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -846,7 +846,7 @@ int __init mtrr_cleanup(unsigned address_bits) sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); range_sums = sum_ranges(range, nr_range); - printk(KERN_INFO "total RAM coverred: %ldM\n", + printk(KERN_INFO "total RAM covered: %ldM\n", range_sums >> (20 - PAGE_SHIFT)); if (mtrr_chunk_size && mtrr_gran_size) { -- cgit v1.2.2 From 05154752cf3767c544b65b5e340793d40b3f1229 Mon Sep 17 00:00:00 2001 From: Gottfried Haider Date: Mon, 2 Nov 2009 11:51:11 +0100 Subject: x86: Add reboot quirk for 3 series Mac mini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reboot does not work out of the box on my "Early 2009" Mac mini (3,1). Detect this machine via DMI as we do for recent MacBooks. Signed-off-by: Gottfried Haider Cc: Ozan Çağlayan Cc: Paul Mackerras Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index a1a3cdda06e1..f93078746e00 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -436,6 +436,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"), }, }, + { /* Handle problems with rebooting on Apple Macmini3,1 */ + .callback = set_pci_reboot, + .ident = "Apple Macmini3,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"), + }, + }, { } }; -- cgit v1.2.2 From 6b9de613ae9c79b637e070136585dde029578065 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Mon, 2 Nov 2009 20:36:51 +0100 Subject: sched: Disable SD_PREFER_LOCAL at node level Yanmin Zhang reported that SD_PREFER_LOCAL induces an order of magnitude increase in select_task_rq_fair() overhead while running heavy wakeup benchmarks (tbench and vmark). Since SD_BALANCE_WAKE is off at node level, turn SD_PREFER_LOCAL off as well pending further investigation. Reported-by: Zhang, Yanmin Signed-off-by: Mike Galbraith Cc: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index d823c245f63b..40e37b10c6c0 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -143,7 +143,7 @@ extern unsigned long node_remap_size[]; | 1*SD_BALANCE_FORK \ | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ - | 1*SD_PREFER_LOCAL \ + | 0*SD_PREFER_LOCAL \ | 0*SD_SHARE_CPUPOWER \ | 0*SD_POWERSAVINGS_BALANCE \ | 0*SD_SHARE_PKG_RESOURCES \ -- cgit v1.2.2 From 82d6469916c6fcfa345636a49004c9d1753905d1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 22 Oct 2009 16:41:15 -0700 Subject: xen: mask extended topology info in cpuid A Xen guest never needs to know about extended topology, and knowing would just confuse it. This patch just zeros ebx in leaf 0xb which indicates no topology info, preventing a crash under Xen on cpus which support this leaf. Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel --- arch/x86/xen/enlighten.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 23a4d80fb39e..dfbf70e65860 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -178,6 +178,7 @@ static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; static void xen_cpuid(unsigned int *ax, unsigned int *bx, unsigned int *cx, unsigned int *dx) { + unsigned maskebx = ~0; unsigned maskecx = ~0; unsigned maskedx = ~0; @@ -185,9 +186,16 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, * Mask out inconvenient features, to try and disable as many * unsupported kernel subsystems as possible. */ - if (*ax == 1) { + switch (*ax) { + case 1: maskecx = cpuid_leaf1_ecx_mask; maskedx = cpuid_leaf1_edx_mask; + break; + + case 0xb: + /* Suppress extended topology stuff */ + maskebx = 0; + break; } asm(XEN_EMULATE_PREFIX "cpuid" @@ -197,6 +205,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, "=d" (*dx) : "0" (*ax), "2" (*cx)); + *bx &= maskebx; *cx &= maskecx; *dx &= maskedx; } -- cgit v1.2.2 From 89240ba059ca468ae7a8346edf7f95082458c2fc Mon Sep 17 00:00:00 2001 From: Stefani Seibold Date: Tue, 3 Nov 2009 10:22:40 +0100 Subject: x86, fs: Fix x86 procfs stack information for threads on 64-bit This patch fixes two issues in the procfs stack information on x86-64 linux. The 32 bit loader compat_do_execve did not store stack start. (this was figured out by Alexey Dobriyan). The stack information on a x64_64 kernel always shows 0 kbyte stack usage, because of a missing implementation of the KSTK_ESP macro which always returned -1. The new implementation now returns the right value. Signed-off-by: Stefani Seibold Cc: Americo Wang Cc: Alexey Dobriyan Cc: Al Viro Cc: Andrew Morton LKML-Reference: <1257240160.4889.24.camel@wall-e> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/process_64.c | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c3429e8b2424..c9786480f0fe 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -1000,7 +1000,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); #define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8)) #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) -#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */ +extern unsigned long KSTK_ESP(struct task_struct *task); #endif /* CONFIG_X86_64 */ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ad535b683170..eb62cbcaa490 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -664,3 +664,8 @@ long sys_arch_prctl(int code, unsigned long addr) return do_arch_prctl(current, code, addr); } +unsigned long KSTK_ESP(struct task_struct *task) +{ + return (test_tsk_thread_flag(task, TIF_IA32)) ? + (task_pt_regs(task)->sp) : ((task)->thread.usersp); +} -- cgit v1.2.2 From a9e38c3e01ad242fe2a625354cf065c34b01e3aa Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 23 Oct 2009 09:37:00 +0200 Subject: KVM: x86: Catch potential overrun in MCE setup We only allocate memory for 32 MCE banks (KVM_MAX_MCE_BANKS) but we allow user space to fill up to 255 on setup (mcg_cap & 0xff), corrupting kernel memory. Catch these overflows. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9b9695322f56..8a93fa894ba6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1692,7 +1692,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, unsigned bank_num = mcg_cap & 0xff, bank; r = -EINVAL; - if (!bank_num) + if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) goto out; if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000)) goto out; -- cgit v1.2.2 From abb3911965c1bd8eea305f64d4840a314259d96d Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 25 Oct 2009 17:42:02 +0200 Subject: KVM: get_tss_base_addr() should return a gpa_t If TSS we are switching to resides in high memory task switch will fail since address will be truncated. Windows2k3 does this sometimes when running with more then 4G Cc: stable@kernel.org Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8a93fa894ba6..ae07d261527c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4051,7 +4051,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); } -static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, +static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu, struct desc_struct *seg_desc) { u32 base_addr = get_desc_base(seg_desc); -- cgit v1.2.2 From 2c75910f1aa042be1dd769378d2611bf551721ac Mon Sep 17 00:00:00 2001 From: Chris Lalancette Date: Thu, 5 Nov 2009 11:47:08 +0100 Subject: x86: Make sure get_user_desc() doesn't sign extend. The current implementation of get_user_desc() sign extends the return value because of integer promotion rules. For the most part, this doesn't matter, because the top bit of base2 is usually 0. If, however, that bit is 1, then the entire value will be 0xffff... which is probably not what the caller intended. This patch casts the entire thing to unsigned before returning, which generates almost the same assembly as the current code but replaces the final "cltq" (sign extend) with a "mov %eax %eax" (zero-extend). This fixes booting certain guests under KVM. Signed-off-by: Chris Lalancette Signed-off-by: Linus Torvalds --- arch/x86/include/asm/desc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index e8de2f6f5ca5..617bd56b3070 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -288,7 +288,7 @@ static inline void load_LDT(mm_context_t *pc) static inline unsigned long get_desc_base(const struct desc_struct *desc) { - return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24); + return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); } static inline void set_desc_base(struct desc_struct *desc, unsigned long base) -- cgit v1.2.2 From f1b291d4c47440cbfc1a478e88800e2742d60a80 Mon Sep 17 00:00:00 2001 From: Simon Kagstrom Date: Fri, 6 Nov 2009 15:44:04 +0100 Subject: x86: Add Phoenix/MSC BIOSes to lowmem corruption list We have a board with a Phoenix/MSC BIOS which also corrupts the low 64KB of RAM, so add an entry to the table. Signed-off-by: Simon Kagstrom LKML-Reference: <20091106154404.002648d9@marrow.netinsight.se> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e09f0e2c14b5..2a34f9c5be21 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -659,6 +659,13 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"), }, }, + { + .callback = dmi_low_memory_corruption, + .ident = "Phoenix/MSC BIOS", + .matches = { + DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"), + }, + }, { /* * AMI BIOS with low memory corruption was found on Intel DG45ID board. -- cgit v1.2.2 From de2a47cf2b3f59ef9664b277f4021b91af13598e Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Thu, 5 Nov 2009 10:43:51 +0800 Subject: x86: Fix error return sequence in __ioremap_caller() kernel missed to free memtype if get_vm_area_caller failed in __ioremap_caller. This patch introduces error path to fix this and cleans up the repetitive error return sequences that contributed to the creation of the bug. Signed-off-by: Xiaotian Feng Acked-by: Suresh Siddha Cc: Venkatesh Pallipadi Cc: H. Peter Anvin LKML-Reference: <1257389031-20429-1-git-send-email-dfeng@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 334e63ca7b2b..2feb9bdedaaf 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -170,8 +170,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, (unsigned long long)phys_addr, (unsigned long long)(phys_addr + size), prot_val, new_prot_val); - free_memtype(phys_addr, phys_addr + size); - return NULL; + goto err_free_memtype; } prot_val = new_prot_val; } @@ -197,26 +196,25 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, */ area = get_vm_area_caller(size, VM_IOREMAP, caller); if (!area) - return NULL; + goto err_free_memtype; area->phys_addr = phys_addr; vaddr = (unsigned long) area->addr; - if (kernel_map_sync_memtype(phys_addr, size, prot_val)) { - free_memtype(phys_addr, phys_addr + size); - free_vm_area(area); - return NULL; - } + if (kernel_map_sync_memtype(phys_addr, size, prot_val)) + goto err_free_area; - if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) { - free_memtype(phys_addr, phys_addr + size); - free_vm_area(area); - return NULL; - } + if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) + goto err_free_area; ret_addr = (void __iomem *) (vaddr + offset); mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); return ret_addr; +err_free_area: + free_vm_area(area); +err_free_memtype: + free_memtype(phys_addr, phys_addr + size); + return NULL; } /** -- cgit v1.2.2 From eb647138acefc897c0eb6eddd5d3650966dfe627 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sun, 8 Nov 2009 12:12:14 +0100 Subject: x86/PCI: Adjust GFP mask handling for coherent allocations Rather than forcing GFP flags and DMA mask to be inconsistent, GFP flags should be determined even for the fallback device through dma_alloc_coherent_mask()/dma_alloc_coherent_gfp_flags(). This restores 64-bit behavior as it was prior to commits 8965eb19386fdf5ccd0ef8b02593eb8560aa3416 and 4a367f3a9dbf2e7ffcee4702203479809236ee6e (not sure why there are two of them), where GFP_DMA was forced on for 32-bit, but not for 64-bit, with the slight adjustment that afaict even 32-bit doesn't need this without CONFIG_ISA. Signed-off-by: Jan Beulich Acked-by: Takashi Iwai LKML-Reference: <4AF18187020000780001D8AA@vpn.id2.novell.com> Signed-off-by: Ingo Molnar Signed-off-by: Jesse Barnes --- arch/x86/include/asm/dma-mapping.h | 10 +++++++--- arch/x86/kernel/pci-dma.c | 6 ++---- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 0ee770d23d0e..6a25d5d42836 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -14,6 +14,12 @@ #include #include +#ifdef CONFIG_ISA +# define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) +#else +# define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) +#endif + extern dma_addr_t bad_dma_address; extern int iommu_merge; extern struct device x86_dma_fallback_dev; @@ -124,10 +130,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) return memory; - if (!dev) { + if (!dev) dev = &x86_dma_fallback_dev; - gfp |= GFP_DMA; - } if (!is_device_dma_capable(dev)) return NULL; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index b2a71dca5642..a6e804d16c35 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -45,12 +45,10 @@ int iommu_pass_through __read_mostly; dma_addr_t bad_dma_address __read_mostly = 0; EXPORT_SYMBOL(bad_dma_address); -/* Dummy device used for NULL arguments (normally ISA). Better would - be probably a smaller DMA mask, but this is bug-to-bug compatible - to older i386. */ +/* Dummy device used for NULL arguments (normally ISA). */ struct device x86_dma_fallback_dev = { .init_name = "fallback device", - .coherent_dma_mask = DMA_BIT_MASK(32), + .coherent_dma_mask = ISA_DMA_BIT_MASK, .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, }; EXPORT_SYMBOL(x86_dma_fallback_dev); -- cgit v1.2.2 From 506f90eeae682dc96c11c7aefac0262b3a560b49 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 29 Oct 2009 14:45:52 +0100 Subject: x86, amd-ucode: Check UCODE_MAGIC before loading the container file Signed-off-by: Borislav Petkov Signed-off-by: Andreas Herrmann LKML-Reference: <20091029134552.GC30802@alberich.amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 366baa179913..f4c538b681ca 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -317,6 +317,12 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) return UCODE_NFOUND; } + if (*(u32 *)firmware->data != UCODE_MAGIC) { + printk(KERN_ERR "microcode: invalid UCODE_MAGIC (0x%08x)\n", + *(u32 *)firmware->data); + return UCODE_ERROR; + } + ret = generic_load_microcode(cpu, firmware->data, firmware->size); release_firmware(firmware); -- cgit v1.2.2 From f7f3cad06080f14f60b1453af94463ff68ea2739 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Sat, 24 Oct 2009 17:25:38 +0200 Subject: [CPUFREQ] longhaul: select Longhaul version 2 for capable CPUs There is a typo in the longhaul detection code so only Longhaul v1 or Longhaul v3 is selected. The Longhaul v2 is not selected even for CPUs which are capable of. Tested on PCChips Giga Pro board. Frequency changes work and the Longhaul v2 detects that the board is not capable of changing CPU voltage. Signed-off-by: Krzysztof Helt Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/longhaul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index ce2ed3e4aad9..cabd2fa3fc93 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -813,7 +813,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr)); break; case 1 ... 15: - longhaul_version = TYPE_LONGHAUL_V1; + longhaul_version = TYPE_LONGHAUL_V2; if (c->x86_mask < 8) { cpu_model = CPU_SAMUEL2; cpuname = "C3 'Samuel 2' [C5B]"; -- cgit v1.2.2 From c53614ec17fe6296a696aa4ac71a799814bb50c1 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 6 Oct 2009 17:36:53 +0200 Subject: [CPUFREQ] powernow-k8: Fix test in get_transition_latency() Not makes it a bool before the comparison. Signed-off-by: Roel Kluin Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 6394aa5c7985..3f12dabeab52 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1022,7 +1022,7 @@ static int get_transition_latency(struct powernow_k8_data *data) * set it to 1 to avoid problems in the future. * For all others it's a BIOS bug. */ - if (!boot_cpu_data.x86 == 0x11) + if (boot_cpu_data.x86 != 0x11) printk(KERN_ERR FW_WARN PFX "Invalid zero transition " "latency\n"); max_latency = 1; -- cgit v1.2.2 From 293afe44d75abce4252db76cbb303a7de4297ce1 Mon Sep 17 00:00:00 2001 From: John Villalovos Date: Fri, 25 Sep 2009 13:30:08 -0400 Subject: [CPUFREQ] acpi-cpufreq: blacklist Intel 0f68: Fix HT detection and put in notification message Removing the SMT/HT check, since the Errata doesn't mention Hyper-Threading. Adding in a printk, so that the user knows why acpi-cpufreq refuses to load. Also, once system is blacklisted, don't repeat checks to see if blacklisted. This also causes the message to only be printed once, rather than for each CPU. Signed-off-by: John L. Villalovos Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 7d5c3b0ea8da..8b581d3905cb 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -526,15 +526,21 @@ static const struct dmi_system_id sw_any_bug_dmi_table[] = { static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) { - /* http://www.intel.com/Assets/PDF/specupdate/314554.pdf + /* Intel Xeon Processor 7100 Series Specification Update + * http://www.intel.com/Assets/PDF/specupdate/314554.pdf * AL30: A Machine Check Exception (MCE) Occurring during an * Enhanced Intel SpeedStep Technology Ratio Change May Cause - * Both Processor Cores to Lock Up when HT is enabled*/ + * Both Processor Cores to Lock Up. */ if (c->x86_vendor == X86_VENDOR_INTEL) { if ((c->x86 == 15) && (c->x86_model == 6) && - (c->x86_mask == 8) && smt_capable()) + (c->x86_mask == 8)) { + printk(KERN_INFO "acpi-cpufreq: Intel(R) " + "Xeon(R) 7100 Errata AL30, processors may " + "lock up on frequency changes: disabling " + "acpi-cpufreq.\n"); return -ENODEV; + } } return 0; } @@ -549,13 +555,18 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) unsigned int result = 0; struct cpuinfo_x86 *c = &cpu_data(policy->cpu); struct acpi_processor_performance *perf; +#ifdef CONFIG_SMP + static int blacklisted; +#endif dprintk("acpi_cpufreq_cpu_init\n"); #ifdef CONFIG_SMP - result = acpi_cpufreq_blacklist(c); - if (result) - return result; + if (blacklisted) + return blacklisted; + blacklisted = acpi_cpufreq_blacklist(c); + if (blacklisted) + return blacklisted; #endif data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); -- cgit v1.2.2 From 8dca15e40889e5d5e9655b03ba79c26200f760ce Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 2 Nov 2009 23:35:30 -0800 Subject: [CPUFREQ] speedstep-ich: fix error caused by 394122ab144dae4b276d74644a2f11c44a60ac5c "[CPUFREQ] cpumask: avoid playing with cpus_allowed in speedstep-ich.c" changed the code to mistakenly pass the current cpu as the "processor" argument of speedstep_get_frequency(), whereas it should be the type of the processor. Addresses http://bugzilla.kernel.org/show_bug.cgi?id=14340 Based on a patch by Dave Mueller. Signed-off-by: Rusty Russell Acked-by: Dominik Brodowski Reported-by: Dave Mueller Cc: Signed-off-by: Andrew Morton Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 6911e91fb4f6..3ae5a7a3a500 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -232,28 +232,23 @@ static unsigned int speedstep_detect_chipset(void) return 0; } -struct get_freq_data { - unsigned int speed; - unsigned int processor; -}; - -static void get_freq_data(void *_data) +static void get_freq_data(void *_speed) { - struct get_freq_data *data = _data; + unsigned int *speed = _speed; - data->speed = speedstep_get_frequency(data->processor); + *speed = speedstep_get_frequency(speedstep_processor); } static unsigned int speedstep_get(unsigned int cpu) { - struct get_freq_data data = { .processor = cpu }; + unsigned int speed; /* You're supposed to ensure CPU is online. */ - if (smp_call_function_single(cpu, get_freq_data, &data, 1) != 0) + if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0) BUG(); - dprintk("detected %u kHz as current frequency\n", data.speed); - return data.speed; + dprintk("detected %u kHz as current frequency\n", speed); + return speed; } /** -- cgit v1.2.2