From e517a5e97080bbe52857bd0d7df9b66602d53c4d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 10 Sep 2009 17:48:48 -0700 Subject: agp/intel: Fix the pre-9xx chipset flush. Ever since we enabled GEM, the pre-9xx chipsets (particularly 865) have had serious stability issues. Back in May a wbinvd was added to the DRM to work around much of the problem. Some failure remained -- easily visible by dragging a window around on an X -retro desktop, or by looking at bugzilla. The chipset flush was on the right track -- hitting the right amount of memory, and it appears to be the only way to flush on these chipsets, but the flush page was mapped uncached. As a result, the writes trying to clear the writeback cache ended up bypassing the cache, and not flushing anything! The wbinvd would flush out other writeback data and often cause the data we wanted to get flushed, but not always. By removing the setting of the page to UC and instead just clflushing the data we write to try to flush it, we get the desired behavior with no wbinvd. This exports clflush_cache_range(), which was laying around and happened to basically match the code I was otherwise going to copy from the DRM. Signed-off-by: Eric Anholt Signed-off-by: Brice Goglin Cc: stable@kernel.org --- arch/x86/mm/pageattr.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7e600c1962db..5866b28eede1 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -143,6 +143,7 @@ void clflush_cache_range(void *vaddr, unsigned int size) mb(); } +EXPORT_SYMBOL_GPL(clflush_cache_range); static void __cpa_flush_all(void *arg) { -- cgit v1.2.2 From a6e04aa92965565968573a220a35b4e907385697 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Sep 2009 11:50:09 +0200 Subject: HWPOISON: x86: Add VM_FAULT_HWPOISON handling to x86 page fault handler v2 Add VM_FAULT_HWPOISON handling to the x86 page fault handler. This is very similar to VM_FAULT_OOM, the only difference is that a different si_code is passed to user space and the new addr_lsb field is initialized. v2: Make the printk more verbose/unique Cc: x86@kernel.org Signed-off-by: Andi Kleen --- arch/x86/mm/fault.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 775a020990a5..8ba562408200 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -167,6 +167,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, info.si_errno = 0; info.si_code = si_code; info.si_addr = (void __user *)address; + info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0; force_sig_info(si_signo, &info, tsk); } @@ -790,10 +791,12 @@ out_of_memory(struct pt_regs *regs, unsigned long error_code, } static void -do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) +do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, + unsigned int fault) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; + int code = BUS_ADRERR; up_read(&mm->mmap_sem); @@ -809,7 +812,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) tsk->thread.error_code = error_code; tsk->thread.trap_no = 14; - force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); +#ifdef CONFIG_MEMORY_FAILURE + if (fault & VM_FAULT_HWPOISON) { + printk(KERN_ERR + "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", + tsk->comm, tsk->pid, address); + code = BUS_MCEERR_AR; + } +#endif + force_sig_info_fault(SIGBUS, code, address, tsk); } static noinline void @@ -819,8 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, if (fault & VM_FAULT_OOM) { out_of_memory(regs, error_code, address); } else { - if (fault & VM_FAULT_SIGBUS) - do_sigbus(regs, error_code, address); + if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON)) + do_sigbus(regs, error_code, address, fault); else BUG(); } -- cgit v1.2.2 From 7715a1e887c5dedc9cf1e57a8f35df261c749b08 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 18 Sep 2009 03:41:10 -0700 Subject: x86/PCI: default pcibus cpumask to all cpus if it lacks affinity The early initialization of the pci bus to node mapping leaves all busses with a node id of -1 if it lacks memory affinity. Thus, cpumask_of_pcibus must return all online cpus for such busses. Signed-off-by: David Rientjes Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index f76a162c082c..ada8c201d513 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -143,7 +143,11 @@ static inline int __pcibus_to_node(const struct pci_bus *bus) static inline const struct cpumask * cpumask_of_pcibus(const struct pci_bus *bus) { - return cpumask_of_node(__pcibus_to_node(bus)); + int node; + + node = __pcibus_to_node(bus); + return (node == -1) ? cpu_online_mask : + cpumask_of_node(node); } #endif -- cgit v1.2.2 From 76baeebf7df493703eeb4428eac015bdb7fabda6 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 18 Sep 2009 09:13:57 -0700 Subject: x86/PCI: make 32 bit NUMA node array int, not unsigned char We use -1 to indicate no node affinity, so we need a signed type here or all sorts of bad things happen, like crashes in dev_attr_show as reported by Ingo: [ 158.058140] warning: `dbus-daemon' uses 32-bit capabilities (legacy support in use) [ 159.370562] BUG: unable to handle kernel NULL pointer dereference at (null) [ 159.372694] IP: [] bitmap_scnprintf+0x72/0xd0 [ 159.372694] PGD 71d3e067 PUD 7052e067 PMD 0 [ 159.372694] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC [ 159.372694] last sysfs file: /sys/devices/pci0000:00/0000:00:01.0/local_cpus [ 159.372694] CPU 0 [ 159.372694] Pid: 7364, comm: irqbalance Not tainted 2.6.31-tip #8043 System Product Name [ 159.372694] RIP: 0010:[] [] bitmap_scnprintf+0x72/0xd0 [ 159.372694] RSP: 0018:ffff8800712a1e38 EFLAGS: 00010246 [ 159.372694] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 159.372694] RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff880077dc5000 [ 159.372694] RBP: ffff8800712a1e68 R08: 0000000000000001 R09: 0000000000000001 [ 159.372694] R10: ffffffff8215c47c R11: 0000000000000000 R12: 0000000000000000 [ 159.372694] R13: 0000000000000000 R14: 0000000000000ffe R15: ffff880077dc5000 [ 159.372694] FS: 00007f5f578f76f0(0000) GS:ffff880007000000(0000) knlGS:0000000000000000 [ 159.372694] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 159.372694] CR2: 0000000000000000 CR3: 0000000071a77000 CR4: 00000000000006f0 [ 159.372694] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 159.372694] DR3: ffffffff835109dc DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 159.372694] Process irqbalance (pid: 7364, threadinfo ffff8800712a0000, task ffff880070773000) [ 159.372694] Stack: [ 159.372694] 2222222222222222 ffff880077dc5000 fffffffffffffffb ffff88007d366b40 [ 159.372694] <0> ffff8800712a1f48 ffff88007d3840a0 ffff8800712a1e88 ffffffff8146332b [ 159.372694] <0> fffffffffffffff4 ffffffff82450718 ffff8800712a1ea8 ffffffff815a9a1f [ 159.372694] Call Trace: [ 159.372694] [] local_cpus_show+0x3b/0x60 [ 159.372694] [] dev_attr_show+0x2f/0x60 [ 159.372694] [] sysfs_read_file+0xbf/0x1d0 [ 159.372694] [] vfs_read+0xc9/0x180 [ 159.372694] [] sys_read+0x55/0x90 [ 159.372694] [] system_call_fastpath+0x16/0x1b [ 159.372694] Code: 41 b9 01 00 00 00 44 8d 46 03 49 63 fc 0f 49 d3 c1 f8 1f 4c 01 ff c1 e8 1a c1 fa 06 41 c1 e8 02 8d 0c 03 48 63 d2 83 e1 3f 29 c1 <49> 8b 44 d5 00 48 c7 c2 8c 37 16 82 48 d3 e8 89 f1 44 89 f6 49 [ 159.372694] RIP [] bitmap_scnprintf+0x72/0xd0 [ 159.372694] RSP [ 159.372694] CR2: 0000000000000000 [ 159.600828] ---[ end trace 35550c356e84e60c ]--- Reported-by: Ingo Molnar Tested-by: Ingo Molnar Signed-off-by: Jesse Barnes --- arch/x86/pci/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 5db96d4304de..1331fcf26143 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -646,7 +646,7 @@ int get_mp_bus_to_node(int busnum) #else /* CONFIG_X86_32 */ -static unsigned char mp_bus_to_node[BUS_NR] = { +static int mp_bus_to_node[BUS_NR] = { [0 ... BUS_NR - 1] = -1 }; -- cgit v1.2.2 From d223246ef7e6d73c8e3d9b58f27f2eb3fe95e25d Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Wed, 16 Sep 2009 16:44:26 -0400 Subject: x86: fix fragile computation of vsyscall address Previously, the address of the vsyscall page (VSYSCALL_PHYS_ADDR, VSYSCALL_VIRT_ADDR) was computed by arithmetic on the address of the last section. This leads to bugs when new sections are inserted, such as the one fixed by commit d312ceda567ab91acd756cde95ac5fbc6b40ed40. Let's compute it from the current address instead. Signed-off-by: Anders Kaseorg Signed-off-by: H. Peter Anvin --- arch/x86/kernel/vmlinux.lds.S | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 0ccb57d5ee35..b96ca472fa26 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -135,24 +135,21 @@ SECTIONS #ifdef CONFIG_X86_64 #define VSYSCALL_ADDR (-10*1024*1024) -#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data) + SIZEOF(.data) + \ - PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)) -#define VSYSCALL_VIRT_ADDR ((ADDR(.data) + SIZEOF(.data) + \ - PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)) -#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) +#define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET) #define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) -#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR) +#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0) #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) + . = ALIGN(4096); + __vsyscall_0 = .; + . = VSYSCALL_ADDR; - .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { + .vsyscall_0 : AT(VLOAD(.vsyscall_0)) { *(.vsyscall_0) } :user - __vsyscall_0 = VSYSCALL_VIRT_ADDR; - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) @@ -192,11 +189,9 @@ SECTIONS *(.vsyscall_3) } - . = VSYSCALL_VIRT_ADDR + PAGE_SIZE; + . = __vsyscall_0 + PAGE_SIZE; #undef VSYSCALL_ADDR -#undef VSYSCALL_PHYS_ADDR -#undef VSYSCALL_VIRT_ADDR #undef VLOAD_OFFSET #undef VLOAD #undef VVIRT_OFFSET -- cgit v1.2.2 From 1dc818c1c574a4caa7b575bd9018d04090fb6c86 Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Wed, 16 Sep 2009 16:44:27 -0400 Subject: x86: convert compressed loader to use __HEAD and HEAD_TEXT macros. This has the consequence of changing the section name use for head code from ".text.head" to ".head.text". Linus suggested that we merge the ".text.head" section with ".text" (presumably while preserving the fact that the head code starts at 0). When I tried this it caused the kernel to not boot. Signed-off-by: Tim Abbott Cc: Ingo Molnar Cc: Sam Ravnborg Cc: Linus Torvalds Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_32.S | 3 ++- arch/x86/boot/compressed/head_64.S | 3 ++- arch/x86/boot/compressed/vmlinux.lds.S | 6 ++++-- 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 75e4f001e706..f543b70ffae2 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -23,13 +23,14 @@ */ .text +#include #include #include #include #include #include - .section ".text.head","ax",@progbits + __HEAD ENTRY(startup_32) cld /* diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index f62c284db9eb..077e1b69198e 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -24,6 +24,7 @@ .code32 .text +#include #include #include #include @@ -33,7 +34,7 @@ #include #include - .section ".text.head" + __HEAD .code32 ENTRY(startup_32) cld diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index cc353e1b3ffd..f4193bb48782 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -1,3 +1,5 @@ +#include + OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) #undef i386 @@ -18,9 +20,9 @@ SECTIONS * address 0. */ . = 0; - .text.head : { + .head.text : { _head = . ; - *(.text.head) + HEAD_TEXT _ehead = . ; } .rodata.compressed : { -- cgit v1.2.2 From 4ae59b916d269255800d69a07ac5b0c368a417f8 Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Wed, 16 Sep 2009 16:44:28 -0400 Subject: x86: convert to use __HEAD and HEAD_TEXT macros. This has the consequence of changing the section name use for head code from ".text.head" to ".head.text". It also eliminates the ".text.head" output section (instead placing head code at the start of the .text output section), which should be harmless. This patch only changes the sections in the actual kernel, not those in the compressed boot loader. Signed-off-by: Tim Abbott Cc: Ingo Molnar Cc: Sam Ravnborg Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head_32.S | 2 +- arch/x86/kernel/head_64.S | 2 +- arch/x86/kernel/vmlinux.lds.S | 12 +++--------- 3 files changed, 5 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 7ffec6b3b331..64ad7696786f 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -79,7 +79,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE) * any particular GDT layout, because we load our own as soon as we * can. */ -.section .text.head,"ax",@progbits +__HEAD ENTRY(startup_32) /* test KEEP_SEGMENTS flag to see if the bootloader is asking us to not reload segments */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index fa54f78e2a05..26406601031c 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -40,7 +40,7 @@ L4_START_KERNEL = pgd_index(__START_KERNEL_map) L3_START_KERNEL = pud_index(__START_KERNEL_map) .text - .section .text.head + __HEAD .code64 .globl startup_64 startup_64: diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index b96ca472fa26..5c7826dd804b 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -65,17 +65,11 @@ SECTIONS #endif /* Text and read-only data */ - - /* bootstrapping code */ - .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { - _text = .; - *(.text.head) - } :text = 0x9090 - - /* The rest of the text */ .text : AT(ADDR(.text) - LOAD_OFFSET) { + _text = .; + /* bootstrapping code */ + HEAD_TEXT #ifdef CONFIG_X86_32 - /* not really needed, already page aligned */ . = ALIGN(PAGE_SIZE); *(.text.page_aligned) #endif -- cgit v1.2.2 From 07e81d61605f885920f31634a65aace52beb97db Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Wed, 16 Sep 2009 16:44:29 -0400 Subject: x86: Use section .data.page_aligned for the idt_table. The .data.idt section is just squashed into the .data.page_aligned output section by the linker script anyway, so it might as well be in the .data.page_aligned section. This eliminates all references to .data.idt on x86. Signed-off-by: Tim Abbott Cc: Ingo Molnar Signed-off-by: H. Peter Anvin --- arch/x86/kernel/traps.c | 6 ++---- arch/x86/kernel/vmlinux.lds.S | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 83264922a878..ea23d3b76f95 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -73,11 +73,9 @@ char ignore_fpu_irq; /* * The IDT has to be page-aligned to simplify the Pentium - * F0 0F bug workaround.. We have a special link segment - * for this. + * F0 0F bug workaround. */ -gate_desc idt_table[NR_VECTORS] - __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; +gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, }; #endif DECLARE_BITMAP(used_vectors, NR_VECTORS); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 5c7826dd804b..7d6cef363c47 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -112,7 +112,6 @@ SECTIONS #endif PAGE_ALIGNED_DATA(PAGE_SIZE) - *(.data.idt) CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES) -- cgit v1.2.2 From 123f3e1d76952759c0fc8b4fec55a3fc8084d56f Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Wed, 16 Sep 2009 16:44:30 -0400 Subject: x86: Cleanup linker script using new linker script macros. Signed-off-by: Tim Abbott Signed-off-by: H. Peter Anvin --- arch/x86/kernel/vmlinux.lds.S | 47 +++---------------------------------------- 1 file changed, 3 insertions(+), 44 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 7d6cef363c47..8d2d0a271e23 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -88,13 +88,7 @@ SECTIONS NOTES :text :note - /* Exception table */ - . = ALIGN(16); - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } :text = 0x9090 + EXCEPTION_TABLE(16) :text = 0x9090 RO_DATA(PAGE_SIZE) @@ -207,36 +201,12 @@ SECTIONS PERCPU_VADDR(0, :percpu) #endif - .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { - _sinittext = .; - INIT_TEXT - _einittext = .; - } + INIT_TEXT_SECTION(PAGE_SIZE) #ifdef CONFIG_X86_64 :init #endif - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { - INIT_DATA - } - - . = ALIGN(16); - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { - __setup_start = .; - *(.init.setup) - __setup_end = .; - } - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - __initcall_start = .; - INITCALLS - __initcall_end = .; - } - - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; - } + INIT_DATA_SECTION(16) .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { __x86_cpu_dev_start = .; @@ -244,8 +214,6 @@ SECTIONS __x86_cpu_dev_end = .; } - SECURITY_INIT - . = ALIGN(8); .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { __parainstructions = .; @@ -276,15 +244,6 @@ SECTIONS EXIT_DATA } -#ifdef CONFIG_BLK_DEV_INITRD - . = ALIGN(PAGE_SIZE); - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; - } -#endif - #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) PERCPU(PAGE_SIZE) #endif -- cgit v1.2.2 From caa27b66bd7188fd063769eaf4b33533ef0709e6 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Mon, 20 Jul 2009 21:37:11 +0200 Subject: kbuild: use INSTALLKERNEL to select customized installkernel script Replace the use of CROSS_COMPILE to select a customized installkernel script with the possibility to set INSTALLKERNEL to select a custom installkernel script when running make: make INSTALLKERNEL=arm-installkernel install With this patch we are now more consistent across different architectures - they did not all support use of CROSS_COMPILE. The use of CROSS_COMPILE was a hack as this really belongs to gcc/binutils and the installkernel script does not change just because we change toolchain. The use of CROSS_COMPILE caused troubles with an upcoming patch that saves CROSS_COMPILE when a kernel is built - it would no longer be installable. [Thanks to Peter Z. for this hint] This patch undos what Ian did in commit: 0f8e2d62fa04441cd12c08ce521e84e5bd3f8a46 ("use ${CROSS_COMPILE}installkernel in arch/*/boot/install.sh") The patch has been lightly tested on x86 only - but all changes looks obvious. Acked-by: Peter Zijlstra Acked-by: Mike Frysinger [blackfin] Acked-by: Russell King [arm] Acked-by: Paul Mundt [sh] Acked-by: "H. Peter Anvin" [x86] Cc: Ian Campbell Cc: Tony Luck [ia64] Cc: Fenghua Yu [ia64] Cc: Hirokazu Takata [m32r] Cc: Geert Uytterhoeven [m68k] Cc: Kyle McMartin [parisc] Cc: Benjamin Herrenschmidt [powerpc] Cc: Martin Schwidefsky [s390] Cc: Thomas Gleixner [x86] Cc: Ingo Molnar [x86] Signed-off-by: Sam Ravnborg --- arch/x86/Makefile | 4 ++-- arch/x86/boot/install.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 7983c420eaf2..a012ee8ef803 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -179,8 +179,8 @@ archclean: define archhelp echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' echo ' install - Install kernel using' - echo ' (your) ~/bin/installkernel or' - echo ' (distribution) /sbin/installkernel or' + echo ' (your) ~/bin/$(INSTALLKERNEL) or' + echo ' (distribution) /sbin/$(INSTALLKERNEL) or' echo ' install to $$(INSTALL_PATH) and run lilo' echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' diff --git a/arch/x86/boot/install.sh b/arch/x86/boot/install.sh index 8d60ee15dfd9..d13ec1c38640 100644 --- a/arch/x86/boot/install.sh +++ b/arch/x86/boot/install.sh @@ -33,8 +33,8 @@ verify "$3" # User may have a custom install script -if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi -if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi +if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi # Default install - same as make zlilo -- cgit v1.2.2 From f86fd306605287d7c7f4f0f8e8e2a9d49d28b396 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 19 Sep 2009 10:14:33 +0200 Subject: kbuild: rename ld-option to cc-ldoption ld-option is misnamed as it test options to gcc, not to ld. Renamed it to reflect this. Cc: Andi Kleen Cc: Roland McGrath Signed-off-by: Sam Ravnborg --- arch/x86/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 88112b49f02c..6b4ffedb93c9 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -122,7 +122,7 @@ quiet_cmd_vdso = VDSO $@ $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) -VDSO_LDFLAGS = -fPIC -shared $(call ld-option, -Wl$(comma)--hash-style=sysv) +VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) GCOV_PROFILE := n # -- cgit v1.2.2 From d200c922bc2b1ac88b8d33b6cfff2ed837af186a Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 20 Sep 2009 18:14:13 -0400 Subject: Use new __init_task_data macro in arch init_task.c files. Signed-off-by: Joe Perches Signed-off-by: Tim Abbott Acked-by: Paul Mundt Signed-off-by: Sam Ravnborg --- arch/x86/kernel/init_task.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index 270ff83efc11..3a54dcb9cd0e 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c @@ -20,9 +20,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. -- cgit v1.2.2 From 02b7da37f7acd49277dea1481dc0c5c246c09732 Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Sun, 20 Sep 2009 18:14:14 -0400 Subject: Use macros for .bss.page_aligned section. This patch changes the remaining direct references to .bss.page_aligned in C and assembly code to use the macros in include/linux/linkage.h. Signed-off-by: Tim Abbott Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Acked-by: Paul Mundt Cc: Chris Zankel Signed-off-by: Sam Ravnborg --- arch/x86/kernel/head_32.S | 2 +- arch/x86/kernel/head_64.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index b766e8c7252d..1dac23958427 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -608,7 +608,7 @@ ENTRY(initial_code) /* * BSS section */ -.section ".bss.page_aligned","wa" +__PAGE_ALIGNED_BSS .align PAGE_SIZE_asm #ifdef CONFIG_X86_PAE swapper_pg_pmd: diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index fa54f78e2a05..d0bc0a13a437 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -418,7 +418,7 @@ ENTRY(phys_base) ENTRY(idt_table) .skip IDT_ENTRIES * 16 - .section .bss.page_aligned, "aw", @nobits + __PAGE_ALIGNED_BSS .align PAGE_SIZE ENTRY(empty_zero_page) .skip PAGE_SIZE -- cgit v1.2.2 From abe1ee3a221d53778c3e58747bbec6e518e5471b Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Sun, 20 Sep 2009 18:14:15 -0400 Subject: Use macros for .data.page_aligned section. This patch changes the remaining direct references to .data.page_aligned in C and assembly code to use the macros in include/linux/linkage.h. Signed-off-by: Tim Abbott Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Haavard Skinnemoen Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Signed-off-by: Sam Ravnborg --- arch/x86/include/asm/cache.h | 4 +++- arch/x86/kernel/head_32.S | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h index 5d367caa0e36..549860d3be8f 100644 --- a/arch/x86/include/asm/cache.h +++ b/arch/x86/include/asm/cache.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_CACHE_H #define _ASM_X86_CACHE_H +#include + /* L1 cache line size */ #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) @@ -13,7 +15,7 @@ #ifdef CONFIG_SMP #define __cacheline_aligned_in_smp \ __attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT)))) \ - __attribute__((__section__(".data.page_aligned"))) + __page_aligned_data #endif #endif diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 1dac23958427..218aad7ee76e 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -626,7 +626,7 @@ ENTRY(empty_zero_page) * This starts the data section. */ #ifdef CONFIG_X86_PAE -.section ".data.page_aligned","wa" +__PAGE_ALIGNED_DATA /* Page-aligned for the benefit of paravirt? */ .align PAGE_SIZE_asm ENTRY(swapper_pg_dir) -- cgit v1.2.2 From b75fe4e5b869f8dbebd36df64a7fcda0c5b318ed Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 21 Sep 2009 13:34:06 -0700 Subject: xen: check EFER for NX before setting up GDT mapping x86-64 assumes NX is available by default, so we need to explicitly check for it before using NX. Some first-generation Intel x86-64 processors didn't support NX, and even recent systems allow it to be disabled in BIOS. [ Impact: prevent Xen crash on NX-less 64-bit machines ] Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel --- arch/x86/mm/Makefile | 1 + arch/x86/xen/enlighten.c | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 9b5a9f59a478..dd313d035de7 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -4,6 +4,7 @@ obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ # Make sure __phys_addr has no stackprotector nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_physaddr.o := $(nostackp) +CFLAGS_init.o := $(nostackp) obj-$(CONFIG_SMP) += tlb.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0dd0c2c6cae0..5d701bf66a23 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1083,6 +1083,11 @@ asmlinkage void __init xen_start_kernel(void) __supported_pte_mask |= _PAGE_IOMAP; +#ifdef CONFIG_X86_64 + /* Work out if we support NX */ + check_efer(); +#endif + xen_setup_features(); /* Get mfn list */ @@ -1123,11 +1128,6 @@ asmlinkage void __init xen_start_kernel(void) pgd = (pgd_t *)xen_start_info->pt_base; -#ifdef CONFIG_X86_64 - /* Work out if we support NX */ - check_efer(); -#endif - /* Don't do the full vcpu_info placement stuff until we have a possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; -- cgit v1.2.2 From c44c9ec0f38b939b3200436e3aa95c1aa83c41c7 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 21 Sep 2009 13:40:42 -0700 Subject: x86: split NX setup into separate file to limit unstack-protected code Move the NX setup into a separate file so that it can be compiled without stack-protection while leaving the rest of the mm/init code protected. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable_types.h | 1 + arch/x86/mm/Makefile | 4 +-- arch/x86/mm/init.c | 63 -------------------------------- arch/x86/mm/setup_nx.c | 69 ++++++++++++++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 65 deletions(-) create mode 100644 arch/x86/mm/setup_nx.c (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 54cb697f4900..e9918d99f83c 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -277,6 +277,7 @@ static inline pteval_t pte_flags(pte_t pte) typedef struct page *pgtable_t; extern pteval_t __supported_pte_mask; +extern void set_nx(void); extern int nx_enabled; #define pgprot_writecombine pgprot_writecombine diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index dd313d035de7..06630d26e56d 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,10 +1,10 @@ obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ - pat.o pgtable.o physaddr.o gup.o + pat.o pgtable.o physaddr.o gup.o setup_nx.o # Make sure __phys_addr has no stackprotector nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_physaddr.o := $(nostackp) -CFLAGS_init.o := $(nostackp) +CFLAGS_setup_nx.o := $(nostackp) obj-$(CONFIG_SMP) += tlb.o diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 0607119cef94..73ffd5536f62 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -28,69 +28,6 @@ int direct_gbpages #endif ; -int nx_enabled; - -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) -static int disable_nx __cpuinitdata; - -/* - * noexec = on|off - * - * Control non-executable mappings for processes. - * - * on Enable - * off Disable - */ -static int __init noexec_setup(char *str) -{ - if (!str) - return -EINVAL; - if (!strncmp(str, "on", 2)) { - __supported_pte_mask |= _PAGE_NX; - disable_nx = 0; - } else if (!strncmp(str, "off", 3)) { - disable_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; - } - return 0; -} -early_param("noexec", noexec_setup); -#endif - -#ifdef CONFIG_X86_PAE -static void __init set_nx(void) -{ - unsigned int v[4], l, h; - - if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { - cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); - - if ((v[3] & (1 << 20)) && !disable_nx) { - rdmsr(MSR_EFER, l, h); - l |= EFER_NX; - wrmsr(MSR_EFER, l, h); - nx_enabled = 1; - __supported_pte_mask |= _PAGE_NX; - } - } -} -#else -static inline void set_nx(void) -{ -} -#endif - -#ifdef CONFIG_X86_64 -void __cpuinit check_efer(void) -{ - unsigned long efer; - - rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || disable_nx) - __supported_pte_mask &= ~_PAGE_NX; -} -#endif - static void __init find_early_table_space(unsigned long end, int use_pse, int use_gbpages) { diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c new file mode 100644 index 000000000000..513d8ed5d2ec --- /dev/null +++ b/arch/x86/mm/setup_nx.c @@ -0,0 +1,69 @@ +#include +#include +#include + +#include + +int nx_enabled; + +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) +static int disable_nx __cpuinitdata; + +/* + * noexec = on|off + * + * Control non-executable mappings for processes. + * + * on Enable + * off Disable + */ +static int __init noexec_setup(char *str) +{ + if (!str) + return -EINVAL; + if (!strncmp(str, "on", 2)) { + __supported_pte_mask |= _PAGE_NX; + disable_nx = 0; + } else if (!strncmp(str, "off", 3)) { + disable_nx = 1; + __supported_pte_mask &= ~_PAGE_NX; + } + return 0; +} +early_param("noexec", noexec_setup); +#endif + +#ifdef CONFIG_X86_PAE +void __init set_nx(void) +{ + unsigned int v[4], l, h; + + if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { + cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); + + if ((v[3] & (1 << 20)) && !disable_nx) { + rdmsr(MSR_EFER, l, h); + l |= EFER_NX; + wrmsr(MSR_EFER, l, h); + nx_enabled = 1; + __supported_pte_mask |= _PAGE_NX; + } + } +} +#else +void set_nx(void) +{ +} +#endif + +#ifdef CONFIG_X86_64 +void __cpuinit check_efer(void) +{ + unsigned long efer; + + rdmsrl(MSR_EFER, efer); + if (!(efer & EFER_NX) || disable_nx) + __supported_pte_mask &= ~_PAGE_NX; +} +#endif + -- cgit v1.2.2 From 3967684006f30c253bc6d4a6604d1bad4a7fc672 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Sep 2009 15:50:24 +0200 Subject: x86: mce: Clean up thermal throttling state tracking code Instead of a mess of three separate percpu variables, consolidate the state into a single structure. Also clean up therm_throt_process(), use cleaner and more understandable variable names and a clearer logic. This, without changing the logic, makes the code more streamlined, more readable and smaller as well: text data bss dec hex filename 1487 169 4 1660 67c therm_throt.o.before 1432 176 4 1612 64c therm_throt.o.after Cc: Hidetoshi Seto Cc: Huang Ying Cc: Andi Kleen LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/therm_throt.c | 63 ++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 63a56d147e4a..db80b577f601 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -34,20 +34,30 @@ /* How long to wait between reporting thermal events */ #define CHECK_INTERVAL (300 * HZ) -static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; -static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); -static DEFINE_PER_CPU(bool, thermal_throttle_active); +/* + * Current thermal throttling state: + */ +struct thermal_state { + bool is_throttled; + + u64 next_check; + unsigned long throttle_count; +}; + +static DEFINE_PER_CPU(struct thermal_state, thermal_state); -static atomic_t therm_throt_en = ATOMIC_INIT(0); +static atomic_t therm_throt_en = ATOMIC_INIT(0); #ifdef CONFIG_SYSFS #define define_therm_throt_sysdev_one_ro(_name) \ static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) #define define_therm_throt_sysdev_show_func(name) \ -static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ - struct sysdev_attribute *attr, \ - char *buf) \ + \ +static ssize_t therm_throt_sysdev_show_##name( \ + struct sys_device *dev, \ + struct sysdev_attribute *attr, \ + char *buf) \ { \ unsigned int cpu = dev->id; \ ssize_t ret; \ @@ -55,7 +65,7 @@ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ preempt_disable(); /* CPU hotplug */ \ if (cpu_online(cpu)) \ ret = sprintf(buf, "%lu\n", \ - per_cpu(thermal_throttle_##name, cpu)); \ + per_cpu(thermal_state, cpu).name); \ else \ ret = 0; \ preempt_enable(); \ @@ -63,11 +73,11 @@ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ return ret; \ } -define_therm_throt_sysdev_show_func(count); -define_therm_throt_sysdev_one_ro(count); +define_therm_throt_sysdev_show_func(throttle_count); +define_therm_throt_sysdev_one_ro(throttle_count); static struct attribute *thermal_throttle_attrs[] = { - &attr_count.attr, + &attr_throttle_count.attr, NULL }; @@ -93,33 +103,38 @@ static struct attribute_group thermal_throttle_attr_group = { * 1 : Event should be logged further, and a message has been * printed to the syslog. */ -static int therm_throt_process(int curr) +static int therm_throt_process(bool is_throttled) { - unsigned int cpu = smp_processor_id(); - __u64 tmp_jiffs = get_jiffies_64(); - bool was_throttled = __get_cpu_var(thermal_throttle_active); - bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr; + struct thermal_state *state; + unsigned int this_cpu; + bool was_throttled; + u64 now; + + this_cpu = smp_processor_id(); + now = get_jiffies_64(); + state = &per_cpu(thermal_state, this_cpu); + + was_throttled = state->is_throttled; + state->is_throttled = is_throttled; if (is_throttled) - __get_cpu_var(thermal_throttle_count)++; + state->throttle_count++; if (!(was_throttled ^ is_throttled) && - time_before64(tmp_jiffs, __get_cpu_var(next_check))) + time_before64(now, state->next_check)) return 0; - __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; + state->next_check = now + CHECK_INTERVAL; /* if we just entered the thermal event */ if (is_throttled) { - printk(KERN_CRIT "CPU%d: Temperature above threshold, " - "cpu clock throttled (total events = %lu)\n", - cpu, __get_cpu_var(thermal_throttle_count)); + printk(KERN_CRIT "CPU%d: Temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, state->throttle_count); add_taint(TAINT_MACHINE_CHECK); return 1; } if (was_throttled) { - printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); + printk(KERN_INFO "CPU%d: Temperature/speed normal\n", this_cpu); return 1; } @@ -213,7 +228,7 @@ static void intel_thermal_interrupt(void) __u64 msr_val; rdmsrl(MSR_IA32_THERM_STATUS, msr_val); - if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) + if (therm_throt_process((msr_val & THERM_STATUS_PROCHOT) != 0)) mce_log_therm_throt_event(msr_val); } -- cgit v1.2.2 From b417c9fd8690637f0c91479435ab3e2bf450c038 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Sep 2009 15:50:24 +0200 Subject: x86: mce: Fix thermal throttling message storm If a system switches back and forth between hot and cold mode, the MCE code will print a stream of critical kernel messages. Extend the throttling code to properly notice this, by only printing the first hot + cold transition and omitting the rest up to CHECK_INTERVAL (5 minutes). This way we'll only get a single incident of: [ 102.356584] CPU0: Temperature above threshold, cpu clock throttled (total events = 1) [ 102.357000] Disabling lock debugging due to kernel taint [ 102.369223] CPU0: Temperature/speed normal Every 5 minutes. The 'total events' count tells the number of cold/hot transitions detected, should overheating occur after 5 minutes again: [ 402.357580] CPU0: Temperature above threshold, cpu clock throttled (total events = 24891) [ 402.358001] CPU0: Temperature/speed normal [ 450.704142] Machine check events logged Cc: Hidetoshi Seto Cc: Huang Ying Cc: Andi Kleen LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/therm_throt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index db80b577f601..b3a1dba75330 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -42,6 +42,7 @@ struct thermal_state { u64 next_check; unsigned long throttle_count; + unsigned long last_throttle_count; }; static DEFINE_PER_CPU(struct thermal_state, thermal_state); @@ -120,11 +121,12 @@ static int therm_throt_process(bool is_throttled) if (is_throttled) state->throttle_count++; - if (!(was_throttled ^ is_throttled) && - time_before64(now, state->next_check)) + if (time_before64(now, state->next_check) && + state->throttle_count != state->last_throttle_count) return 0; state->next_check = now + CHECK_INTERVAL; + state->last_throttle_count = state->throttle_count; /* if we just entered the thermal event */ if (is_throttled) { -- cgit v1.2.2 From 14c0abf14a5e67e793131116bd97f57da37ccce3 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Tue, 22 Sep 2009 14:35:57 +0800 Subject: x86: mce, inject: Use real inject-msg in raise_local Current raise_local() uses a struct mce that comes from mce_write() as a parameter instead of the real inject-msg, so when we set mce.finished = 0 to clear injected MCE, the real inject stays valid. This will cause the remaining inject-msg affect the next injection, which is not desired. To fix this, real inject-msg is used in raise_local instead of the one on the stack. This patch is based on the diagnosis and the fixes by Dean Nelson. Reported-by: Dean Nelson Signed-off-by: Huang Ying Cc: Hidetoshi Seto Cc: Andi Kleen LKML-Reference: <1253601357.15717.757.camel@yhuang-dev.sh.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce-inject.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 7029f0e2acad..472763d92098 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -98,8 +98,9 @@ static struct notifier_block mce_raise_nb = { }; /* Inject mce on current CPU */ -static int raise_local(struct mce *m) +static int raise_local(void) { + struct mce *m = &__get_cpu_var(injectm); int context = MCJ_CTX(m->inject_flags); int ret = 0; int cpu = m->extcpu; @@ -167,12 +168,12 @@ static void raise_mce(struct mce *m) } cpu_relax(); } - raise_local(m); + raise_local(); put_cpu(); put_online_cpus(); } else #endif - raise_local(m); + raise_local(); } /* Error injection interface */ -- cgit v1.2.2 From 7d42896628202a551ad1107697cd215dc5fca099 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 23 Sep 2009 11:03:37 +0200 Subject: perf_event, x86: Fix 'perf sched record' crashing the machine Chris Malley reported that 'perf sched record' sometimes crashes his box with: [ 389.272175] BUG: unable to handle kernel paging request at ffffb300 [ 389.272294] IP: [] default_send_IPI_self+0x1d/0x50 [ 389.272366] *pde = 0073f067 *pte = 00000000 [ 389.274708] Call Trace: [ 389.274752] [] ? set_perf_event_pending+0x14/0x20 [ 389.274801] [] ? perf_output_unlock+0x121/0x1a0 [ 389.274848] [] ? perf_output_end+0x4a/0x70 [ 389.274893] [] ? __perf_event_overflow+0x240/0x2f0 [ 389.274942] [] ? atomic64_cmpxchg+0x1e/0x30 [ 389.274988] [] ? perf_swevent_ctx_event+0x1b4/0x1c0 [ 389.275035] [] ? perf_swevent_ctx_event+0x33/0x1c0 [ 389.275081] [] ? do_perf_sw_event+0xa7/0x160 [ 389.275127] [] ? perf_tp_event+0x82/0xa0 [ 389.275174] [] ? ftrace_profile_sched_stat_runtime+0xe6/0x120 [ 389.275224] [] ? ftrace_profile_sched_stat_runtime+0x0/0x120 [ 389.275273] [] ? update_curr+0x18a/0x230 [ 389.275318] [] ? put_prev_task_fair+0x155/0x160 [ 389.275366] [] ? sched_clock_cpu+0xd5/0x110 [ 389.275413] [] ? _spin_lock_irq+0x45/0x50 [ 389.275458] [] ? schedule+0x20e/0xb10 The problem is that the box has no lapic enabled: [ 0.042445] Local APIC not detected. Using dummy APIC emulation. The below seems like the best fix. We disabled all lapic bits, except the self-IPI-resend logic. Reported-by: Chris Malley Signed-off-by: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Frederic Weisbecker LKML-Reference: <7863dc4c0909221409v7893bfd3o4b590d5951a233ba@mail.gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index a3c7adb06b78..b5801c311846 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1790,6 +1790,9 @@ void smp_perf_pending_interrupt(struct pt_regs *regs) void set_perf_event_pending(void) { #ifdef CONFIG_X86_LOCAL_APIC + if (!x86_pmu.apic || !x86_pmu_initialized()) + return; + apic->send_IPI_self(LOCAL_PENDING_VECTOR); #endif } -- cgit v1.2.2 From 11868a2dc4f5e4f2f652bfd259e1360193fcee62 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 23 Sep 2009 17:49:55 +0200 Subject: x86: mce: Use safer ways to access MCE registers Use rdmsrl_safe() when accessing MCE registers. While in theory we always 'know' which ones are safe to access from the capability bits, there's a lot of hardware variations and reality might differ from theory, as it did in this case: http://bugzilla.kernel.org/show_bug.cgi?id=14204 [ 0.010016] mce: CPU supports 5 MCE banks [ 0.011029] general protection fault: 0000 [#1] [ 0.011998] last sysfs file: [ 0.011998] Modules linked in: [ 0.011998] [ 0.011998] Pid: 0, comm: swapper Not tainted (2.6.31_router #1) HP Vectra [ 0.011998] EIP: 0060:[] EFLAGS: 00010246 CPU: 0 [ 0.011998] EIP is at mce_rdmsrl+0x19/0x60 [ 0.011998] EAX: 00000000 EBX: 00000001 ECX: 00000407 EDX: 08000000 [ 0.011998] ESI: 00000000 EDI: 8c000000 EBP: 00000405 ESP: c17d5eac So WARN_ONCE() instead of crashing the box. ( also fix a number of stylistic inconsistencies in the code. ) Note, we might still crash in wrmsrl() if we get that far, but we shouldnt if the registers are truly inaccessible. Reported-by: GNUtoo Cc: Hidetoshi Seto Cc: Huang Ying Cc: Andi Kleen LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 2f5aab26320e..4b2af86e3e8d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -305,13 +305,25 @@ static int msr_to_offset(u32 msr) static u64 mce_rdmsrl(u32 msr) { u64 v; + if (__get_cpu_var(injectm).finished) { int offset = msr_to_offset(msr); + if (offset < 0) return 0; return *(u64 *)((char *)&__get_cpu_var(injectm) + offset); } - rdmsrl(msr, v); + + if (rdmsrl_safe(msr, &v)) { + WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr); + /* + * Return zero in case the access faulted. This should + * not happen normally but can happen if the CPU does + * something weird, or if the code is buggy. + */ + v = 0; + } + return v; } @@ -319,6 +331,7 @@ static void mce_wrmsrl(u32 msr, u64 v) { if (__get_cpu_var(injectm).finished) { int offset = msr_to_offset(msr); + if (offset >= 0) *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v; return; @@ -415,7 +428,7 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) m->ip = mce_rdmsrl(rip_msr); } -#ifdef CONFIG_X86_LOCAL_APIC +#ifdef CONFIG_X86_LOCAL_APIC /* * Called after interrupts have been reenabled again * when a MCE happened during an interrupts off region @@ -1172,6 +1185,7 @@ static int mce_banks_init(void) return -ENOMEM; for (i = 0; i < banks; i++) { struct mce_bank *b = &mce_banks[i]; + b->ctl = -1ULL; b->init = 1; } @@ -1203,6 +1217,7 @@ static int __cpuinit mce_cap_init(void) banks = b; if (!mce_banks) { int err = mce_banks_init(); + if (err) return err; } @@ -1237,6 +1252,7 @@ static void mce_init(void) for (i = 0; i < banks; i++) { struct mce_bank *b = &mce_banks[i]; + if (!b->init) continue; wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); @@ -1626,6 +1642,7 @@ static int mce_disable(void) for (i = 0; i < banks; i++) { struct mce_bank *b = &mce_banks[i]; + if (b->init) wrmsrl(MSR_IA32_MCx_CTL(i), 0); } @@ -1911,6 +1928,7 @@ static void mce_disable_cpu(void *h) cmci_clear(); for (i = 0; i < banks; i++) { struct mce_bank *b = &mce_banks[i]; + if (b->init) wrmsrl(MSR_IA32_MCx_CTL(i), 0); } @@ -1928,6 +1946,7 @@ static void mce_reenable_cpu(void *h) cmci_reenable(); for (i = 0; i < banks; i++) { struct mce_bank *b = &mce_banks[i]; + if (b->init) wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); } -- cgit v1.2.2 From 79f5599772ac2f138d7a75b8f3f06a93f09c75f7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 15 Jun 2009 14:58:26 +0800 Subject: cpumask: use zalloc_cpumask_var() where possible Remove open-coded zalloc_cpumask_var() and zalloc_cpumask_var_node(). Signed-off-by: Li Zefan Signed-off-by: Rusty Russell --- arch/x86/kernel/apic/io_apic.c | 7 ++----- arch/x86/kernel/process.c | 6 ++---- arch/x86/kernel/smpboot.c | 9 +++------ 3 files changed, 7 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 64970b9885f2..dc69f28489f5 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -227,17 +227,14 @@ static struct irq_cfg *get_one_free_irq_cfg(int node) cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); if (cfg) { - if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { + if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { kfree(cfg); cfg = NULL; - } else if (!alloc_cpumask_var_node(&cfg->old_domain, + } else if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_ATOMIC, node)) { free_cpumask_var(cfg->domain); kfree(cfg); cfg = NULL; - } else { - cpumask_clear(cfg->domain); - cpumask_clear(cfg->old_domain); } } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 847ab4160315..5284cd2b5776 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -555,10 +555,8 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) void __init init_c1e_mask(void) { /* If we're using c1e_idle, we need to allocate c1e_mask. */ - if (pm_idle == c1e_idle) { - alloc_cpumask_var(&c1e_mask, GFP_KERNEL); - cpumask_clear(c1e_mask); - } + if (pm_idle == c1e_idle) + zalloc_cpumask_var(&c1e_mask, GFP_KERNEL); } static int __init idle_setup(char *str) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 09c5e077dff7..565ebc65920e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1059,12 +1059,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) #endif current_thread_info()->cpu = 0; /* needed? */ for_each_possible_cpu(i) { - alloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); - alloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); - alloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL); - cpumask_clear(per_cpu(cpu_core_map, i)); - cpumask_clear(per_cpu(cpu_sibling_map, i)); - cpumask_clear(cpu_data(i).llc_shared_map); + zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); + zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); + zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL); } set_cpu_sibling_map(0); -- cgit v1.2.2 From 1d1afc1957a441fc75a27517b17437d8af3b3b93 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:36 -0600 Subject: cpumask: remove last assignment to mask field of struct irqaction. This snuck in after the patch which removed all the others. Signed-off-by: Rusty Russell Cc: Ingo Molnar --- arch/x86/kernel/time.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index e293ac56c723..dcb00d278512 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -93,7 +93,6 @@ static struct irqaction irq0 = { void __init setup_default_timer_irq(void) { - irq0.mask = cpumask_of_cpu(0); setup_irq(0, &irq0); } -- cgit v1.2.2 From 0748bd01773395003208996c4c0b3f80caf80976 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:46 -0600 Subject: cpumask: remove arch_send_call_function_ipi Now everyone is converted to arch_send_call_function_ipi_mask, remove the shim and the #defines. Signed-off-by: Rusty Russell --- arch/x86/include/asm/smp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 6a84ed166aec..1e796782cd7b 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -121,7 +121,6 @@ static inline void arch_send_call_function_single_ipi(int cpu) smp_ops.send_call_func_single_ipi(cpu); } -#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) { smp_ops.send_call_func_ipi(mask); -- cgit v1.2.2 From 78f1c4d6b027993763a5aba83873b0462d06db8f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:51 -0600 Subject: cpumask: use mm_cpumask() wrapper: x86 Makes code futureproof against the impending change to mm->cpu_vm_mask (to be a pointer). It's also a chance to use the new cpumask_ ops which take a pointer (the older ones are deprecated, but there's no hurry for arch code). Signed-off-by: Rusty Russell --- arch/x86/include/asm/mmu_context.h | 6 +++--- arch/x86/kernel/ldt.c | 4 ++-- arch/x86/mm/tlb.c | 15 ++++++++------- arch/x86/xen/mmu.c | 4 ++-- 4 files changed, 15 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index f923203dc39a..4a2d4e0c18d9 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -37,12 +37,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, if (likely(prev != next)) { /* stop flush ipis for the previous mm */ - cpu_clear(cpu, prev->cpu_vm_mask); + cpumask_clear_cpu(cpu, mm_cpumask(prev)); #ifdef CONFIG_SMP percpu_write(cpu_tlbstate.state, TLBSTATE_OK); percpu_write(cpu_tlbstate.active_mm, next); #endif - cpu_set(cpu, next->cpu_vm_mask); + cpumask_set_cpu(cpu, mm_cpumask(next)); /* Re-load page tables */ load_cr3(next->pgd); @@ -58,7 +58,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, percpu_write(cpu_tlbstate.state, TLBSTATE_OK); BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); - if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { + if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) { /* We were in lazy tlb mode and leave_mm disabled * tlb flush IPI delivery. We must reload CR3 * to make sure to use no freed page tables. diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 71f1d99a635d..ec6ef60cbd17 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -67,8 +67,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) #ifdef CONFIG_SMP preempt_disable(); load_LDT(pc); - if (!cpus_equal(current->mm->cpu_vm_mask, - cpumask_of_cpu(smp_processor_id()))) + if (!cpumask_equal(mm_cpumask(current->mm), + cpumask_of(smp_processor_id()))) smp_call_function(flush_ldt, current->mm, 1); preempt_enable(); #else diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index c814e144a3f0..36fe08eeb5c3 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -59,7 +59,8 @@ void leave_mm(int cpu) { if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) BUG(); - cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); + cpumask_clear_cpu(cpu, + mm_cpumask(percpu_read(cpu_tlbstate.active_mm))); load_cr3(swapper_pg_dir); } EXPORT_SYMBOL_GPL(leave_mm); @@ -234,8 +235,8 @@ void flush_tlb_current_task(void) preempt_disable(); local_flush_tlb(); - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL); preempt_enable(); } @@ -249,8 +250,8 @@ void flush_tlb_mm(struct mm_struct *mm) else leave_mm(smp_processor_id()); } - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL); preempt_enable(); } @@ -268,8 +269,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) leave_mm(smp_processor_id()); } - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, va); + if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(mm_cpumask(mm), mm, va); preempt_enable(); } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 093dd59b5385..3bf7b1d250ce 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1165,14 +1165,14 @@ static void xen_drop_mm_ref(struct mm_struct *mm) /* Get the "official" set of cpus referring to our pagetable. */ if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) { for_each_online_cpu(cpu) { - if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask) + if (!cpumask_test_cpu(cpu, mm_cpumask(mm)) && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) continue; smp_call_function_single(cpu, drop_other_mm_ref, mm, 1); } return; } - cpumask_copy(mask, &mm->cpu_vm_mask); + cpumask_copy(mask, mm_cpumask(mm)); /* It's possible that a vcpu may have a stale reference to our cr3, because its in lazy mode, and it hasn't yet flushed -- cgit v1.2.2 From 2bcd57ab61e7cabed626226a3771617981c11ce1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 24 Sep 2009 04:22:25 +0400 Subject: headers: utsname.h redux * remove asm/atomic.h inclusion from linux/utsname.h -- not needed after kref conversion * remove linux/utsname.h inclusion from files which do not need it NOTE: it looks like fs/binfmt_elf.c do not need utsname.h, however due to some personality stuff it _is_ needed -- cowardly leave ELF-related headers and files alone. Signed-off-by: Alexey Dobriyan Signed-off-by: Linus Torvalds --- arch/x86/kernel/dumpstack_32.c | 1 - arch/x86/kernel/dumpstack_64.c | 1 - arch/x86/kernel/traps.c | 1 - 3 files changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index bca5fba91c9e..f7dd2a7c3bf4 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 54b0a3276766..a071e6be177e 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 9346e102338d..a665c71352b8 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.2 From ea01c0d7315d6e3218fd22a6947c5b09305fcf65 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 23 Sep 2009 15:33:23 -0700 Subject: x86: Reduce verbosity of "TSC is reliable" message On modern systems, the kernel prints the message Skipping synchronization checks as TSC is reliable. once for every non-boot CPU. This gets kind of ridiculous on huge systems; for example, on a 64-thread system I was lucky enough to get: $ dmesg | grep 'TSC is reliable' | wc 63 567 4221 There's no point to doing this for every CPU, since the code is just checking the boot CPU anyway, so change this to a printk_once() to make the message appears only once. Signed-off-by: Roland Dreier LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 027b5b498993..f37930954d15 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -114,7 +114,7 @@ void __cpuinit check_tsc_sync_source(int cpu) return; if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { - pr_info("Skipping synchronization checks as TSC is reliable.\n"); + printk_once(KERN_INFO "Skipping synchronization checks as TSC is reliable.\n"); return; } -- cgit v1.2.2 From e23a8b6a8f319c0f08b6ccef2dccbb37e7603dc2 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 23 Sep 2009 15:35:35 -0700 Subject: x86: Reduce verbosity of "PAT enabled" kernel message On modern systems, the kernel prints the message x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106 once for every CPU. This gets kind of ridiculous on huge systems; for example, on a 64-thread system I was lucky enough to get: dmesg| grep 'PAT enabled' | wc 64 704 5174 There is already a BUG() if non-boot CPUs have PAT capabilities that don't match the boot CPU, so just print the message on the boot CPU. (I kept the print after the wrmsrl() that enables PAT, so that the log output continues to mean that the system survived enabling PAT on the boot CPU) Signed-off-by: Roland Dreier Cc: Suresh Siddha Cc: Venkatesh Pallipadi LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 7257cf3decf9..e78cd0ec2bcf 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -81,6 +81,7 @@ enum { void pat_init(void) { u64 pat; + bool boot_cpu = !boot_pat_state; if (!pat_enabled) return; @@ -122,8 +123,10 @@ void pat_init(void) rdmsrl(MSR_IA32_CR_PAT, boot_pat_state); wrmsrl(MSR_IA32_CR_PAT, pat); - printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n", - smp_processor_id(), boot_pat_state, pat); + + if (boot_cpu) + printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n", + smp_processor_id(), boot_pat_state, pat); } #undef PAT -- cgit v1.2.2 From 429a6e5e2c1fbe5d805aad123efbdb5f0c14769f Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Wed, 23 Sep 2009 18:13:13 -0500 Subject: x86: early_printk: Protect against using the same device twice If you use the kernel argument: earlyprintk=serial,ttyS0,115200 This will cause a recursive hang printing the same line again and again: BIOS-e820: 000000003fff3000 - 0000000040000000 (ACPI data) BIOS-e820: 00000000e0000000 - 00000000f0000000 (reserved) BIOS-e820: 00000000fec00000 - 0000000100000000 (reserved) bootconsole [earlyser0] enabled Linux version 2.6.31-07863-gb64ada6 (mingo@sirius) (gcc version 4.3.2 20081105 (Red Hat 4.3.2-7) (GCC) ) #16789 SMP Wed Sep 23 21:09:43 CEST 2009 Linux version 2.6.31-07863-gb64ada6 (mingo@sirius) (gcc version 4.3.2 20081105 (Red Hat 4.3.2-7) (GCC) ) #16789 SMP Wed Sep 23 21:09:43 CEST 2009 Linux version 2.6.31-07863-gb64ada6 (mingo@sirius) (gcc version 4.3.2 20081105 (Red Hat 4.3.2-7) (GCC) ) #16789 SMP Wed Sep 23 21:09:43 CEST 2009 Linux version 2.6.31-07863-gb64ada6 (mingo@sirius) (gcc version 4.3.2 20081105 (Red Hat 4.3.2-7) (GCC) ) #16789 SMP Wed Sep 23 21:09:43 CEST 2009 Linux version 2.6.31-07863-gb64ada6 (mingo@sirius) (gcc version 4.3.2 20081105 (Red Hat 4.3.2-7) (GCC) ) #16789 SMP Wed Sep 23 21:09:43 CEST 2009 Instead warn the end user that they specified the device a second time, and ignore that second console. Reported-by: Ingo Molnar Signed-off-by: Jason Wessel Cc: Len Brown Cc: Greg KH Cc: Andrew Morton Cc: Linus Torvalds LKML-Reference: <4ABAAB89.1080407@windriver.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 2acfd3fdc0cc..41fd965c80c6 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -178,6 +178,11 @@ asmlinkage void early_printk(const char *fmt, ...) static inline void early_console_register(struct console *con, int keep_early) { + if (early_console->index != -1) { + printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n", + con->name); + return; + } early_console = con; if (keep_early) early_console->flags &= ~CON_BOOT; -- cgit v1.2.2 From b0c6fbe458183cc7e1cab17be6efcbe7e435bad3 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 17:48:44 +0930 Subject: x86: Remove redundant non-NUMA topology functions arch/x86/include/asm/topology.h declares inline fns cpu_to_node and cpumask_of_node for !NUMA, even though they are then declared as macros by asm-generic/topology.h, which is #included just below. The macros (which are the same) end up being used; these functions are just confusing. Noticed-by: Linus Torvalds Signed-off-by: Rusty Russell Cc: Jesse Barnes Cc: "Greg Kroah-Hartman" Cc: Yinghai Lu Cc: Tejun Heo LKML-Reference: <200909241748.45629.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/topology.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 6f0695d744bf..25a92842dd99 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -165,21 +165,11 @@ static inline int numa_node_id(void) return 0; } -static inline int cpu_to_node(int cpu) -{ - return 0; -} - static inline int early_cpu_to_node(int cpu) { return 0; } -static inline const struct cpumask *cpumask_of_node(int node) -{ - return cpu_online_mask; -} - static inline void setup_node_to_cpumask_map(void) { } #endif -- cgit v1.2.2 From 8d65af789f3e2cf4cfbdbf71a0f7a61ebcd41d38 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 23 Sep 2009 15:57:19 -0700 Subject: sysctl: remove "struct file *" argument of ->proc_handler It's unused. It isn't needed -- read or write flag is already passed and sysctl shouldn't care about the rest. It _was_ used in two places at arch/frv for some reason. Signed-off-by: Alexey Dobriyan Cc: David Howells Cc: "Eric W. Biederman" Cc: Al Viro Cc: Ralf Baechle Cc: Martin Schwidefsky Cc: Ingo Molnar Cc: "David S. Miller" Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/nmi.h | 3 +-- arch/x86/kernel/apic/nmi.c | 4 ++-- arch/x86/kernel/vsyscall_64.c | 10 +--------- 3 files changed, 4 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index e63cf7d441e1..139d4c1a33a7 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -40,8 +40,7 @@ extern unsigned int nmi_watchdog; #define NMI_INVALID 3 struct ctl_table; -struct file; -extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, +extern int proc_nmi_enabled(struct ctl_table *, int , void __user *, size_t *, loff_t *); extern int unknown_nmi_panic; diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index cb66a22d98ad..7ff61d6a188a 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -508,14 +508,14 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) /* * proc handler for /proc/sys/kernel/nmi */ -int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, +int proc_nmi_enabled(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { int old_state; nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; old_state = nmi_watchdog_enabled; - proc_dointvec(table, write, file, buffer, length, ppos); + proc_dointvec(table, write, buffer, length, ppos); if (!!old_state == !!nmi_watchdog_enabled) return 0; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index cf53a78e2dcf..8cb4974ff599 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -228,19 +228,11 @@ static long __vsyscall(3) venosys_1(void) } #ifdef CONFIG_SYSCTL - -static int -vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return proc_dointvec(ctl, write, filp, buffer, lenp, ppos); -} - static ctl_table kernel_table2[] = { { .procname = "vsyscall64", .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = vsyscall_sysctl_change }, + .proc_handler = proc_dointvec }, {} }; -- cgit v1.2.2 From 3e2ada5867b7e9fa0b296d30fa8f3726ebd0a8b7 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Sun, 27 Sep 2009 03:30:51 -0400 Subject: ACPI: fix Compaq Evo N800c (Pentium 4m) boot hang regression Don't disable ARB_DISABLE when the familary ID is 0x0F. http://bugzilla.kernel.org/show_bug.cgi?id=14211 This was a 2.6.31 regression, and so this patch needs to be applied to 2.6.31.stable Signed-off-by: Zhao Yakui Signed-off-by: Len Brown --- arch/x86/kernel/acpi/cstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 8c44c232efcb..59cdfa4686b2 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -48,7 +48,7 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, * P4, Core and beyond CPUs */ if (c->x86_vendor == X86_VENDOR_INTEL && - (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 14))) + (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 14))) flags->bm_control = 0; } EXPORT_SYMBOL(acpi_processor_power_init_bm_check); -- cgit v1.2.2 From d949f36f1865c60239d4265b50c4b75354fcb8f3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 26 Sep 2009 09:35:07 -0700 Subject: x86: Fix hwpoison code related build failure on 32-bit NUMAQ This build failure triggers: In file included from include/linux/suspend.h:8, from arch/x86/kernel/asm-offsets_32.c:11, from arch/x86/kernel/asm-offsets.c:2: include/linux/mm.h:503:2: error: #error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS Because due to the hwpoison page flag we ran out of page flags on 32-bit. Dont turn on hwpoison on 32-bit NUMA (it's rare in any case). Also clean up the Kconfig dependencies in the generic MM code by introducing ARCH_SUPPORTS_MEMORY_FAILURE. Signed-off-by: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 93698794aa3a..8da93745c087 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -432,6 +432,17 @@ config X86_NUMAQ of Flat Logical. You will need a new lynxer.elf file to flash your firmware with - send email to . +config X86_SUPPORTS_MEMORY_FAILURE + bool + # MCE code calls memory_failure(): + depends on X86_MCE + # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags: + depends on !X86_NUMAQ + # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH: + depends on X86_64 || !SPARSEMEM + select ARCH_SUPPORTS_MEMORY_FAILURE + default y + config X86_VISWS bool "SGI 320/540 (Visual Workstation)" depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT -- cgit v1.2.2 From f0f37e2f77731b3473fa6bd5ee53255d9a9cdb40 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 27 Sep 2009 22:29:37 +0400 Subject: const: mark struct vm_struct_operations * mark struct vm_area_struct::vm_ops as const * mark vm_ops in AGP code But leave TTM code alone, something is fishy there with global vm_ops being used. Signed-off-by: Alexey Dobriyan Signed-off-by: Linus Torvalds --- arch/x86/pci/i386.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 52e62e57fedd..b22d13b0c71d 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -266,7 +266,7 @@ void pcibios_set_master(struct pci_dev *dev) pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); } -static struct vm_operations_struct pci_mmap_ops = { +static const struct vm_operations_struct pci_mmap_ops = { .access = generic_access_phys, }; -- cgit v1.2.2 From e207e143e2fb6a2790b1ce3687c8aedc3ddc357b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 30 Sep 2009 07:48:37 -0700 Subject: Revert "x86, mce: do not compile mcelog message on AMD" This reverts commit 22223c9b417be5fd0ab2cf9ad17eb7bd1e19f7b9, as requested by Andi Kleen: "Obviously kernels compiled with AMD support can still run on non AMD systems, so messages like this can never be removed at compile time." Requsted-by: Andi Kleen Cc: Borislav Petkov Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/mcheck/mce.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 4b2af86e3e8d..183c3457d2f4 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -204,10 +204,7 @@ static void print_mce_head(void) static void print_mce_tail(void) { printk(KERN_EMERG "This is not a software problem!\n" -#if (!defined(CONFIG_EDAC) || !defined(CONFIG_CPU_SUP_AMD)) - "Run through mcelog --ascii to decode and contact your hardware vendor\n" -#endif - ); + "Run through mcelog --ascii to decode and contact your hardware vendor\n"); } #define PANIC_TIMEOUT 5 /* 5 seconds */ -- cgit v1.2.2 From 79e1dd05d1a22e95ab6d54d21836f478b3b56976 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 30 Sep 2009 17:07:54 +0200 Subject: x86: Provide an alternative() based cmpxchg64() cmpxchg64() today generates, to quote Linus, "barf bag" code. cmpxchg64() is about to get used in the scheduler to fix a bug there, but it's a prerequisite that cmpxchg64() first be made non-sucking. This patch turns cmpxchg64() into an efficient implementation that uses the alternative() mechanism to just use the raw instruction on all modern systems. Note: the fallback is NOT smp safe, just like the current fallback is not SMP safe. (Interested parties with i486 based SMP systems are welcome to submit fix patches for that.) Signed-off-by: Arjan van de Ven Acked-by: Linus Torvalds [ fixed asm constraint bug ] Fixed-by: Eric Dumazet Cc: Martin Schwidefsky Cc: John Stultz Cc: Peter Zijlstra LKML-Reference: <20090930170754.0886ff2e@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cmpxchg_32.h | 30 ++++++++++++--------- arch/x86/kernel/i386_ksyms_32.c | 8 ++++++ arch/x86/lib/Makefile | 2 +- arch/x86/lib/cmpxchg8b_emu.S | 57 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 83 insertions(+), 14 deletions(-) create mode 100644 arch/x86/lib/cmpxchg8b_emu.S (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 82ceb788a981..ee1931be6593 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -312,19 +312,23 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); -#define cmpxchg64(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) __ret; \ - if (likely(boot_cpu_data.x86 > 4)) \ - __ret = (__typeof__(*(ptr)))__cmpxchg64((ptr), \ - (unsigned long long)(o), \ - (unsigned long long)(n)); \ - else \ - __ret = (__typeof__(*(ptr)))cmpxchg_486_u64((ptr), \ - (unsigned long long)(o), \ - (unsigned long long)(n)); \ - __ret; \ -}) +#define cmpxchg64(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (o); \ + __typeof__(*(ptr)) __new = (n); \ + alternative_io("call cmpxchg8b_emu", \ + "lock; cmpxchg8b (%%esi)" , \ + X86_FEATURE_CX8, \ + "=A" (__ret), \ + "S" ((ptr)), "0" (__old), \ + "b" ((unsigned int)__new), \ + "c" ((unsigned int)(__new>>32)) \ + : "memory"); \ + __ret; }) + + + #define cmpxchg64_local(ptr, o, n) \ ({ \ __typeof__(*(ptr)) __ret; \ diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 43cec6bdda63..1736c5a725aa 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -10,6 +10,14 @@ EXPORT_SYMBOL(mcount); #endif +/* + * Note, this is a prototype to get at the symbol for + * the export, but dont use it from C code, it is used + * by assembly code and is not using C calling convention! + */ +extern void cmpxchg8b_emu(void); +EXPORT_SYMBOL(cmpxchg8b_emu); + /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 9e609206fac9..3e549b8ec8c9 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -15,7 +15,7 @@ ifeq ($(CONFIG_X86_32),y) obj-y += atomic64_32.o lib-y += checksum_32.o lib-y += strstr_32.o - lib-y += semaphore_32.o string_32.o + lib-y += semaphore_32.o string_32.o cmpxchg8b_emu.o lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o else diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S new file mode 100644 index 000000000000..828cb710dec2 --- /dev/null +++ b/arch/x86/lib/cmpxchg8b_emu.S @@ -0,0 +1,57 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + * + */ + +#include +#include +#include +#include + + +.text + +/* + * Inputs: + * %esi : memory location to compare + * %eax : low 32 bits of old value + * %edx : high 32 bits of old value + * %ebx : low 32 bits of new value + * %ecx : high 32 bits of new value + */ +ENTRY(cmpxchg8b_emu) +CFI_STARTPROC + +# +# Emulate 'cmpxchg8b (%esi)' on UP except we don't +# set the whole ZF thing (caller will just compare +# eax:edx with the expected value) +# +cmpxchg8b_emu: + pushfl + cli + + cmpl (%esi), %eax + jne not_same + cmpl 4(%esi), %edx + jne half_same + + movl %ebx, (%esi) + movl %ecx, 4(%esi) + + popfl + ret + + not_same: + movl (%esi), %eax + half_same: + movl 4(%esi), %edx + + popfl + ret + +CFI_ENDPROC +ENDPROC(cmpxchg8b_emu) -- cgit v1.2.2 From 982d007a6eec4a0abb404d2355eeec2c041c61ea Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 30 Sep 2009 17:57:27 -0700 Subject: x86: Optimize cmpxchg64() at build-time some more Try to avoid the 'alternates()' code when we can statically determine that cmpxchg8b is fine. We already have that CONFIG_x86_CMPXCHG64 (enabled by PAE support), and we could easily also enable it for some of the CPU cases. Note, this patch only adds CMPXCHG8B for the obvious Intel CPU's, not for others. (There was something really messy about cmpxchg8b and clone CPU's, so if you enable it on other CPUs later, do it carefully.) If we avoid that asm-alternative thing when we can assume the instruction exists, we'll generate less support crud, and we'll avoid the whole issue with that extra 'nop' for padding instruction sizes etc. LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.cpu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 527519b8a9f9..f2824fb8c79c 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -400,7 +400,7 @@ config X86_TSC config X86_CMPXCHG64 def_bool y - depends on X86_PAE || X86_64 + depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM # this should be set for all -march=.. options where the compiler # generates cmov. @@ -412,6 +412,7 @@ config X86_MINIMUM_CPU_FAMILY int default "64" if X86_64 default "6" if X86_32 && X86_P6_NOP + default "5" if X86_32 && X86_CMPXCHG64 default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) default "3" -- cgit v1.2.2 From d1716a60a8ea90788d24aa22d7eec83fbdd2d88a Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Wed, 30 Sep 2009 16:19:39 -0700 Subject: x86: Fix csum_ipv6_magic asm memory clobber Just like ip_fast_csum, the assembly snippet in csum_ipv6_magic needs a memory clobber, as it is only passed the address of the buffer, not a memory reference to the buffer itself. This caused failures in Hurd's pfinetv4 when we tried to compile it with gcc-4.3 (bogus checksums). Signed-off-by: Samuel Thibault Acked-by: David S. Miller Cc: Andi Kleen Cc: Signed-off-by: Andrew Morton Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/include/asm/checksum_32.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h index 7c5ef8b14d92..46fc474fd819 100644 --- a/arch/x86/include/asm/checksum_32.h +++ b/arch/x86/include/asm/checksum_32.h @@ -161,7 +161,8 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, "adcl $0, %0 ;\n" : "=&r" (sum) : "r" (saddr), "r" (daddr), - "r" (htonl(len)), "r" (htonl(proto)), "0" (sum)); + "r" (htonl(len)), "r" (htonl(proto)), "0" (sum) + : "memory"); return csum_fold(sum); } -- cgit v1.2.2 From 04edbdef02ec4386a2ae38cab7ae7d166e17a756 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 1 Oct 2009 07:30:38 +0200 Subject: x86: Don't generate cmpxchg8b_emu if CONFIG_X86_CMPXCHG64=y Conditionaly compile cmpxchg8b_emu.o and EXPORT_SYMBOL(cmpxchg8b_emu). This reduces the kernel size a bit. Signed-off-by: Eric Dumazet Cc: Arjan van de Ven Cc: Martin Schwidefsky Cc: John Stultz Cc: Peter Zijlstra Cc: Linus Torvalds LKML-Reference: <4AC43E7E.1000600@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/i386_ksyms_32.c | 2 ++ arch/x86/lib/Makefile | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 1736c5a725aa..9c3bd4a2050e 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -15,8 +15,10 @@ EXPORT_SYMBOL(mcount); * the export, but dont use it from C code, it is used * by assembly code and is not using C calling convention! */ +#ifndef CONFIG_X86_CMPXCHG64 extern void cmpxchg8b_emu(void); EXPORT_SYMBOL(cmpxchg8b_emu); +#endif /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 3e549b8ec8c9..85f5db95c60f 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -15,8 +15,10 @@ ifeq ($(CONFIG_X86_32),y) obj-y += atomic64_32.o lib-y += checksum_32.o lib-y += strstr_32.o - lib-y += semaphore_32.o string_32.o cmpxchg8b_emu.o - + lib-y += semaphore_32.o string_32.o +ifneq ($(CONFIG_X86_CMPXCHG64),y) + lib-y += cmpxchg8b_emu.o +endif lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o else obj-y += io_64.o iomap_copy_64.o -- cgit v1.2.2 From ea3acb199a5d7e4da1de0a4288eba993b29f33b9 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 24 Sep 2009 09:08:30 -0500 Subject: x86: earlyprintk: Fix regression to handle serial,ttySn as 1 arg Commit c953094 ("early_printk: Allow more than one early console") introduced a regression in the parsing of the earlyprintk= kernel arguments. If you specify "earlyprintk=serial,ttyS0,115200" as a kernel argument, the "serial,ttyS" should be parsed as a single argument and not as "serial" and then "ttyS". Also update the documentation to reflect you can specify the ttyS directly without the "serial" argument. Signed-off-by: Jason Wessel Cc: Len Brown Cc: Greg KH Cc: Linus Torvalds Cc: Andrew Morton Cc: Johannes Weiner LKML-Reference: <4ABB7D5E.6000301@windriver.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 41fd965c80c6..b9c830c12b4a 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -206,8 +206,11 @@ static int __init setup_early_printk(char *buf) while (*buf != '\0') { if (!strncmp(buf, "serial", 6)) { - early_serial_init(buf + 6); + buf += 6; + early_serial_init(buf); early_console_register(&early_serial_console, keep); + if (!strncmp(buf, ",ttyS", 5)) + buf += 5; } if (!strncmp(buf, "ttyS", 4)) { early_serial_init(buf + 4); -- cgit v1.2.2 From 4701472e441e41be2549a25228f703bc9cd13b5b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 26 Sep 2009 21:41:41 +0530 Subject: x86, SLUB: Remove unused CONFIG FAST_CMPXCHG_LOCAL Remove unused CONFIG FAST_CMPXCHG_LOCAL from Kconfig. Reported-by: Robert P. J. Day Signed-off-by: Jaswinder Singh Rajput Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: Matt Mackall Cc: Andrew Morton Cc: "Robert P. J. Day" Cc: linux-mm@kvack.org LKML-Reference: <1253981501.4568.61.camel@ht.satnam> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8da93745c087..c876bace8fdc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -86,10 +86,6 @@ config STACKTRACE_SUPPORT config HAVE_LATENCYTOP_SUPPORT def_bool y -config FAST_CMPXCHG_LOCAL - bool - default y - config MMU def_bool y -- cgit v1.2.2 From 24e35800cdc4350fc34e2bed37b608a9e13ab3b6 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 30 Sep 2009 11:22:11 +0100 Subject: x86: Don't leak 64-bit kernel register values to 32-bit processes While 32-bit processes can't directly access R8...R15, they can gain access to these registers by temporarily switching themselves into 64-bit mode. Therefore, registers not preserved anyway by called C functions (i.e. R8...R11) must be cleared prior to returning to user mode. Signed-off-by: Jan Beulich Cc: LKML-Reference: <4AC34D73020000780001744A@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32entry.S | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 74619c4f9fda..1733f9f65e82 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -21,8 +21,8 @@ #define __AUDIT_ARCH_LE 0x40000000 #ifndef CONFIG_AUDITSYSCALL -#define sysexit_audit int_ret_from_sys_call -#define sysretl_audit int_ret_from_sys_call +#define sysexit_audit ia32_ret_from_sys_call +#define sysretl_audit ia32_ret_from_sys_call #endif #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) @@ -39,12 +39,12 @@ .endm /* clobbers %eax */ - .macro CLEAR_RREGS _r9=rax + .macro CLEAR_RREGS offset=0, _r9=rax xorl %eax,%eax - movq %rax,R11(%rsp) - movq %rax,R10(%rsp) - movq %\_r9,R9(%rsp) - movq %rax,R8(%rsp) + movq %rax,\offset+R11(%rsp) + movq %rax,\offset+R10(%rsp) + movq %\_r9,\offset+R9(%rsp) + movq %rax,\offset+R8(%rsp) .endm /* @@ -172,6 +172,10 @@ sysexit_from_sys_call: movl RIP-R11(%rsp),%edx /* User %eip */ CFI_REGISTER rip,rdx RESTORE_ARGS 1,24,1,1,1,1 + xorq %r8,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 popfq CFI_ADJUST_CFA_OFFSET -8 /*CFI_RESTORE rflags*/ @@ -202,7 +206,7 @@ sysexit_from_sys_call: .macro auditsys_exit exit,ebpsave=RBP testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) - jnz int_ret_from_sys_call + jnz ia32_ret_from_sys_call TRACE_IRQS_ON sti movl %eax,%esi /* second arg, syscall return value */ @@ -218,8 +222,9 @@ sysexit_from_sys_call: cli TRACE_IRQS_OFF testl %edi,TI_flags(%r10) - jnz int_with_check - jmp \exit + jz \exit + CLEAR_RREGS -ARGOFFSET + jmp int_with_check .endm sysenter_auditsys: @@ -329,6 +334,9 @@ sysretl_from_sys_call: CFI_REGISTER rip,rcx movl EFLAGS-ARGOFFSET(%rsp),%r11d /*CFI_REGISTER rflags,r11*/ + xorq %r10,%r10 + xorq %r9,%r9 + xorq %r8,%r8 TRACE_IRQS_ON movl RSP-ARGOFFSET(%rsp),%esp CFI_RESTORE rsp @@ -353,7 +361,7 @@ cstar_tracesys: #endif xchgl %r9d,%ebp SAVE_REST - CLEAR_RREGS r9 + CLEAR_RREGS 0, r9 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter @@ -425,6 +433,8 @@ ia32_do_call: call *ia32_sys_call_table(,%rax,8) # xxx: rip relative ia32_sysret: movq %rax,RAX-ARGOFFSET(%rsp) +ia32_ret_from_sys_call: + CLEAR_RREGS -ARGOFFSET jmp int_ret_from_sys_call ia32_tracesys: @@ -442,8 +452,8 @@ END(ia32_syscall) ia32_badsys: movq $0,ORIG_RAX-ARGOFFSET(%rsp) - movq $-ENOSYS,RAX-ARGOFFSET(%rsp) - jmp int_ret_from_sys_call + movq $-ENOSYS,%rax + jmp ia32_sysret quiet_ni_syscall: movq $-ENOSYS,%rax -- cgit v1.2.2 From 828c09509b9695271bcbdc53e9fc9a6a737148d2 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 1 Oct 2009 15:43:56 -0700 Subject: const: constify remaining file_operations [akpm@linux-foundation.org: fix KVM] Signed-off-by: Alexey Dobriyan Acked-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/xen/debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c index b53225d2cac3..e133ce25e290 100644 --- a/arch/x86/xen/debugfs.c +++ b/arch/x86/xen/debugfs.c @@ -100,7 +100,7 @@ static int xen_array_release(struct inode *inode, struct file *file) return 0; } -static struct file_operations u32_array_fops = { +static const struct file_operations u32_array_fops = { .owner = THIS_MODULE, .open = u32_array_open, .release= xen_array_release, -- cgit v1.2.2 From 392d814daf460a9564d29b2cebc51e1ea34e0504 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Thu, 1 Oct 2009 15:44:02 -0700 Subject: x86: fix csum_ipv6_magic asm memory clobber Just like ip_fast_csum, the assembly snippet in csum_ipv6_magic needs a memory clobber, as it is only passed the address of the buffer, not a memory reference to the buffer itself. This caused failures in Hurd's pfinetv4 when we tried to compile it with gcc-4.3 (bogus checksums). Signed-off-by: Samuel Thibault Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Acked-by: "David S. Miller" Cc: Andi Kleen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/checksum_32.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h index 7c5ef8b14d92..46fc474fd819 100644 --- a/arch/x86/include/asm/checksum_32.h +++ b/arch/x86/include/asm/checksum_32.h @@ -161,7 +161,8 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, "adcl $0, %0 ;\n" : "=&r" (sum) : "r" (saddr), "r" (daddr), - "r" (htonl(len)), "r" (htonl(proto)), "0" (sum)); + "r" (htonl(len)), "r" (htonl(proto)), "0" (sum) + : "memory"); return csum_fold(sum); } -- cgit v1.2.2 From f436f8bb73138bc74eb1c6527723e00988ad8a8a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 1 Oct 2009 16:14:32 +0200 Subject: x86: EDAC: MCE: Fix MCE decoding callback logic Make decoding of MCEs happen only on AMD hardware by registering a non-default callback only on CPU families which support it. While looking at the interaction of decode_mce() with the other MCE code i also noticed a few other things and made the following cleanups/fixes: - Fixed the mce_decode() weak alias - a weak alias is really not good here, it should be a proper callback. A weak alias will be overriden if a piece of code is built into the kernel - not good, obviously. - The patch initializes the callback on AMD family 10h and 11h. - Added the more correct fallback printk of: No support for human readable MCE decoding on this CPU type. Transcribe the message and run it through 'mcelog --ascii' to decode. On CPUs that dont have a decoder. - Made the surrounding code more readable. Note that the callback allows us to have a default fallback - without having to check the CPU versions during the printout itself. When an EDAC module registers itself, it can install the decode-print function. (there's no unregister needed as this is core code.) version -v2 by Borislav Petkov: - add K8 to the set of supported CPUs - always build in edac_mce_amd since we use an early_initcall now - fix checkpatch warnings Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Andi Kleen LKML-Reference: <20091001141432.GA11410@aftab> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 2 ++ arch/x86/kernel/cpu/mcheck/mce.c | 58 +++++++++++++++++++++++++--------------- 2 files changed, 38 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index b608a64c5814..f1363b72364f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -133,6 +133,8 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} static inline void enable_p5_mce(void) {} #endif +extern void (*x86_mce_decode_callback)(struct mce *m); + void mce_setup(struct mce *m); void mce_log(struct mce *m); DECLARE_PER_CPU(struct sys_device, mce_dev); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 183c3457d2f4..b1598a9436d0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -85,6 +85,18 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); static DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing; +static void default_decode_mce(struct mce *m) +{ + pr_emerg("No human readable MCE decoding support on this CPU type.\n"); + pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); +} + +/* + * CPU/chipset specific EDAC code can register a callback here to print + * MCE errors in a human-readable form: + */ +void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce; +EXPORT_SYMBOL(x86_mce_decode_callback); /* MCA banks polled by the period polling timer for corrected events */ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { @@ -165,46 +177,46 @@ void mce_log(struct mce *mce) set_bit(0, &mce_need_notify); } -void __weak decode_mce(struct mce *m) -{ - return; -} - static void print_mce(struct mce *m) { - printk(KERN_EMERG - "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", + pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", m->extcpu, m->mcgstatus, m->bank, m->status); + if (m->ip) { - printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", - !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", - m->cs, m->ip); + pr_emerg("RIP%s %02x:<%016Lx> ", + !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", + m->cs, m->ip); + if (m->cs == __KERNEL_CS) print_symbol("{%s}", m->ip); - printk(KERN_CONT "\n"); + pr_cont("\n"); } - printk(KERN_EMERG "TSC %llx ", m->tsc); + + pr_emerg("TSC %llx ", m->tsc); if (m->addr) - printk(KERN_CONT "ADDR %llx ", m->addr); + pr_cont("ADDR %llx ", m->addr); if (m->misc) - printk(KERN_CONT "MISC %llx ", m->misc); - printk(KERN_CONT "\n"); - printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", - m->cpuvendor, m->cpuid, m->time, m->socketid, - m->apicid); + pr_cont("MISC %llx ", m->misc); + + pr_cont("\n"); + pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", + m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); - decode_mce(m); + /* + * Print out human-readable details about the MCE error, + * (if the CPU has an implementation for that): + */ + x86_mce_decode_callback(m); } static void print_mce_head(void) { - printk(KERN_EMERG "\nHARDWARE ERROR\n"); + pr_emerg("\nHARDWARE ERROR\n"); } static void print_mce_tail(void) { - printk(KERN_EMERG "This is not a software problem!\n" - "Run through mcelog --ascii to decode and contact your hardware vendor\n"); + pr_emerg("This is not a software problem!\n"); } #define PANIC_TIMEOUT 5 /* 5 seconds */ @@ -218,6 +230,7 @@ static atomic_t mce_fake_paniced; static void wait_for_panic(void) { long timeout = PANIC_TIMEOUT*USEC_PER_SEC; + preempt_disable(); local_irq_enable(); while (timeout-- > 0) @@ -285,6 +298,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) static int msr_to_offset(u32 msr) { unsigned bank = __get_cpu_var(injectm.bank); + if (msr == rip_msr) return offsetof(struct mce, ip); if (msr == MSR_IA32_MCx_STATUS(bank)) -- cgit v1.2.2 From 11879ba5d9ab8174af9b9cefbb2396a54dfbf8c1 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 26 Sep 2009 20:51:50 +0200 Subject: x86: Simplify bound checks in the MTRR code The current bound checks for copy_from_user in the MTRR driver are not as obvious as they could be, and gcc agrees with that. This patch simplifies the boundary checks to the point that gcc can now prove to itself that the copy_from_user() is never going past its bounds. Signed-off-by: Arjan van de Ven Cc: Yinghai Lu Cc: Linus Torvalds LKML-Reference: <20090926205150.30797709@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/if.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index f04e72527604..3c1b12d461d1 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -96,17 +96,24 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) unsigned long long base, size; char *ptr; char line[LINE_SIZE]; + int length; size_t linelen; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!len) - return -EINVAL; memset(line, 0, LINE_SIZE); - if (len > LINE_SIZE) - len = LINE_SIZE; - if (copy_from_user(line, buf, len - 1)) + + length = len; + length--; + + if (length > LINE_SIZE - 1) + length = LINE_SIZE - 1; + + if (length < 0) + return -EINVAL; + + if (copy_from_user(line, buf, length)) return -EFAULT; linelen = strlen(line); -- cgit v1.2.2 From e3be785fb59f92c0df685037062d041619653b7a Mon Sep 17 00:00:00 2001 From: Marin Mitov Date: Sat, 3 Oct 2009 20:45:02 +0300 Subject: x86, pci: Correct spelling in a comment Signed-off-by: Marin Mitov Cc: Joerg Roedel Cc: Jesse Brandeburg LKML-Reference: <200910032045.02523.mitov@issp.bas.bg> Signed-off-by: Ingo Molnar ====================================================== --- arch/x86/kernel/pci-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 64b838eac18c..d20009b4e6ef 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -35,7 +35,7 @@ int iommu_detected __read_mostly = 0; /* * This variable becomes 1 if iommu=pt is passed on the kernel command line. - * If this variable is 1, IOMMU implementations do no DMA ranslation for + * If this variable is 1, IOMMU implementations do no DMA translation for * devices and allow every device to access to whole physical memory. This is * useful if a user want to use an IOMMU only for KVM device assignment to * guests and not for driver dma translation. -- cgit v1.2.2 From 77b1ab1732feb5e3dcbaf31d2f7547c5229f5f3a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 16 Sep 2009 15:24:17 +0200 Subject: KVM: SVM: Fix tsc offset adjustment when running nested When svm_vcpu_load is called while the vcpu is running in guest mode the tsc adjustment made there is lost on the next emulated #vmexit. This causes the tsc running backwards in the guest. This patch fixes the issue by also adjusting the tsc_offset in the emulated hsave area so that it will not get lost. Cc: stable@kernel.org Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 944cc9c04b3c..bf5799dc4f9f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -767,6 +767,8 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) rdtscll(tsc_this); delta = vcpu->arch.host_tsc - tsc_this; svm->vmcb->control.tsc_offset += delta; + if (is_nested(svm)) + svm->nested.hsave->control.tsc_offset += delta; vcpu->cpu = cpu; kvm_migrate_timers(vcpu); svm->asid_generation = 0; -- cgit v1.2.2 From 20824f30bb0b8ae0a4099895fd4509f54cf2e1e2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 16 Sep 2009 15:24:18 +0200 Subject: KVM: SVM: Handle tsc in svm_get_msr/svm_set_msr correctly When running nested we need to touch the l1 guests tsc_offset. Otherwise changes will be lost or a wrong value be read. Cc: stable@kernel.org Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index bf5799dc4f9f..c17404add91f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2059,10 +2059,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) switch (ecx) { case MSR_IA32_TSC: { - u64 tsc; + u64 tsc_offset; - rdtscll(tsc); - *data = svm->vmcb->control.tsc_offset + tsc; + if (is_nested(svm)) + tsc_offset = svm->nested.hsave->control.tsc_offset; + else + tsc_offset = svm->vmcb->control.tsc_offset; + + *data = tsc_offset + native_read_tsc(); break; } case MSR_K6_STAR: @@ -2148,10 +2152,17 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) switch (ecx) { case MSR_IA32_TSC: { - u64 tsc; + u64 tsc_offset = data - native_read_tsc(); + u64 g_tsc_offset = 0; + + if (is_nested(svm)) { + g_tsc_offset = svm->vmcb->control.tsc_offset - + svm->nested.hsave->control.tsc_offset; + svm->nested.hsave->control.tsc_offset = tsc_offset; + } + + svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset; - rdtscll(tsc); - svm->vmcb->control.tsc_offset = data - tsc; break; } case MSR_K6_STAR: -- cgit v1.2.2 From b2d83cfa3fdefe5c6573d443d099a18dc3a93c5f Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Fri, 25 Sep 2009 11:09:37 +0200 Subject: KVM: fix LAPIC timer period overflow Don't overflow when computing the 64-bit period from 32-bit registers. Fixes sourceforge bug #2826486. Signed-off-by: Aurelien Jarno Cc: stable@kernel.org Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/lapic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 1ae5ceba7eb2..7024224f0fc8 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -664,7 +664,7 @@ static void start_apic_timer(struct kvm_lapic *apic) { ktime_t now = apic->lapic_timer.timer.base->get_time(); - apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) * + apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) * APIC_BUS_CYCLE_NS * apic->divide_count; atomic_set(&apic->lapic_timer.pending, 0); -- cgit v1.2.2 From eb5109e311b5152c0614a28d7d615d087f268f19 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 1 Oct 2009 19:16:58 -0300 Subject: KVM: VMX: flush TLB with INVEPT on cpu migration It is possible that stale EPTP-tagged mappings are used, if a vcpu migrates to a different pcpu. Set KVM_REQ_TLB_FLUSH in vmx_vcpu_load, when switching pcpus, which will invalidate both VPID and EPT mappings on the next vm-entry. Cc: stable@kernel.org Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f3812014bd0b..ed53b42caba1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -709,7 +709,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (vcpu->cpu != cpu) { vcpu_clear(vmx); kvm_migrate_timers(vcpu); - vpid_sync_vcpu_all(vmx); + set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); local_irq_disable(); list_add(&vmx->local_vcpus_link, &per_cpu(vcpus_on_cpu, cpu)); -- cgit v1.2.2 From 6a54435560efdab1a08f429a954df4d6c740bddf Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 4 Oct 2009 16:45:13 +0200 Subject: KVM: Prevent overflow in KVM_GET_SUPPORTED_CPUID The number of entries is multiplied by the entry size, which can overflow on 32-bit hosts. Bound the entry count instead. Reported-by: David Wagner Cc: stable@kernel.org Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index be451ee44249..9b9695322f56 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1591,6 +1591,8 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, if (cpuid->nent < 1) goto out; + if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) + cpuid->nent = KVM_MAX_CPUID_ENTRIES; r = -ENOMEM; cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); if (!cpuid_entries) -- cgit v1.2.2 From acb66dd051d0834c8b36d147ff83a8d39da0fe0b Mon Sep 17 00:00:00 2001 From: Izik Eidus Date: Wed, 23 Sep 2009 21:47:16 +0300 Subject: KVM: MMU: dont hold pagecount reference for mapped sptes pages When using mmu notifiers, we are allowed to remove the page count reference tooken by get_user_pages to a specific page that is mapped inside the shadow page tables. This is needed so we can balance the pagecount against mapcount checking. (Right now kvm increase the pagecount and does not increase the mapcount when mapping page into shadow page table entry, so when comparing pagecount against mapcount, you have no reliable result.) Signed-off-by: Izik Eidus Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index eca41ae9f453..6c67b230e958 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -634,9 +634,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) if (*spte & shadow_accessed_mask) kvm_set_pfn_accessed(pfn); if (is_writeble_pte(*spte)) - kvm_release_pfn_dirty(pfn); - else - kvm_release_pfn_clean(pfn); + kvm_set_pfn_dirty(pfn); rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); if (!*rmapp) { printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); @@ -1877,8 +1875,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, page_header_update_slot(vcpu->kvm, sptep, gfn); if (!was_rmapped) { rmap_count = rmap_add(vcpu, sptep, gfn); - if (!is_rmap_spte(*sptep)) - kvm_release_pfn_clean(pfn); + kvm_release_pfn_clean(pfn); if (rmap_count > RMAP_RECYCLE_THRESHOLD) rmap_recycle(vcpu, sptep, gfn); } else { -- cgit v1.2.2 From 1403283acca398e244ece35741ad251c1feb5972 Mon Sep 17 00:00:00 2001 From: Izik Eidus Date: Wed, 23 Sep 2009 21:47:17 +0300 Subject: KVM: MMU: add SPTE_HOST_WRITEABLE flag to the shadow ptes this flag notify that the host physical page we are pointing to from the spte is write protected, and therefore we cant change its access to be write unless we run get_user_pages(write = 1). (this is needed for change_pte support in kvm) Signed-off-by: Izik Eidus Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 15 +++++++++++---- arch/x86/kvm/paging_tmpl.h | 18 +++++++++++++++--- 2 files changed, 26 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6c67b230e958..5cd8b4ec3a01 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -156,6 +156,8 @@ module_param(oos_shadow, bool, 0644); #define CREATE_TRACE_POINTS #include "mmutrace.h" +#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) + #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) struct kvm_rmap_desc { @@ -1754,7 +1756,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, int user_fault, int write_fault, int dirty, int level, gfn_t gfn, pfn_t pfn, bool speculative, - bool can_unsync) + bool can_unsync, bool reset_host_protection) { u64 spte; int ret = 0; @@ -1781,6 +1783,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, kvm_is_mmio_pfn(pfn)); + if (reset_host_protection) + spte |= SPTE_HOST_WRITEABLE; + spte |= (u64)pfn << PAGE_SHIFT; if ((pte_access & ACC_WRITE_MASK) @@ -1826,7 +1831,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pt_access, unsigned pte_access, int user_fault, int write_fault, int dirty, int *ptwrite, int level, gfn_t gfn, - pfn_t pfn, bool speculative) + pfn_t pfn, bool speculative, + bool reset_host_protection) { int was_rmapped = 0; int was_writeble = is_writeble_pte(*sptep); @@ -1858,7 +1864,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, } if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, - dirty, level, gfn, pfn, speculative, true)) { + dirty, level, gfn, pfn, speculative, true, + reset_host_protection)) { if (write_fault) *ptwrite = 1; kvm_x86_ops->tlb_flush(vcpu); @@ -1906,7 +1913,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, if (iterator.level == level) { mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, 0, write, 1, &pt_write, - level, gfn, pfn, false); + level, gfn, pfn, false, true); ++vcpu->stat.pf_fixed; break; } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index d2fec9c12d22..72558f8ff3f5 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -273,9 +273,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) return; kvm_get_pfn(pfn); + /* + * we call mmu_set_spte() with reset_host_protection = true beacuse that + * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). + */ mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, - gpte_to_gfn(gpte), pfn, true); + gpte_to_gfn(gpte), pfn, true, true); } /* @@ -308,7 +312,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, user_fault, write_fault, gw->ptes[gw->level-1] & PT_DIRTY_MASK, ptwrite, level, - gw->gfn, pfn, false); + gw->gfn, pfn, false, true); break; } @@ -558,6 +562,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { int i, offset, nr_present; + bool reset_host_protection; offset = nr_present = 0; @@ -595,9 +600,16 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) nr_present++; pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); + if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) { + pte_access &= ~ACC_WRITE_MASK; + reset_host_protection = 0; + } else { + reset_host_protection = 1; + } set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, - spte_to_pfn(sp->spt[i]), true, false); + spte_to_pfn(sp->spt[i]), true, false, + reset_host_protection); } return !nr_present; -- cgit v1.2.2 From 3da0dd433dc399a8c0124d0614d82a09b6a49bce Mon Sep 17 00:00:00 2001 From: Izik Eidus Date: Wed, 23 Sep 2009 21:47:18 +0300 Subject: KVM: add support for change_pte mmu notifiers this is needed for kvm if it want ksm to directly map pages into its shadow page tables. [marcelo: cast pfn assignment to u64] Signed-off-by: Izik Eidus Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu.c | 62 +++++++++++++++++++++++++++++++++++------ 2 files changed, 54 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3be000435fad..d83892226f73 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -796,6 +796,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void); #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_age_hva(struct kvm *kvm, unsigned long hva); +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5cd8b4ec3a01..685a4ffac8e6 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -748,7 +748,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) return write_protected; } -static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) +static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) { u64 *spte; int need_tlb_flush = 0; @@ -763,8 +763,45 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) return need_tlb_flush; } -static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, - int (*handler)(struct kvm *kvm, unsigned long *rmapp)) +static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) +{ + int need_flush = 0; + u64 *spte, new_spte; + pte_t *ptep = (pte_t *)data; + pfn_t new_pfn; + + WARN_ON(pte_huge(*ptep)); + new_pfn = pte_pfn(*ptep); + spte = rmap_next(kvm, rmapp, NULL); + while (spte) { + BUG_ON(!is_shadow_present_pte(*spte)); + rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); + need_flush = 1; + if (pte_write(*ptep)) { + rmap_remove(kvm, spte); + __set_spte(spte, shadow_trap_nonpresent_pte); + spte = rmap_next(kvm, rmapp, NULL); + } else { + new_spte = *spte &~ (PT64_BASE_ADDR_MASK); + new_spte |= (u64)new_pfn << PAGE_SHIFT; + + new_spte &= ~PT_WRITABLE_MASK; + new_spte &= ~SPTE_HOST_WRITEABLE; + if (is_writeble_pte(*spte)) + kvm_set_pfn_dirty(spte_to_pfn(*spte)); + __set_spte(spte, new_spte); + spte = rmap_next(kvm, rmapp, spte); + } + } + if (need_flush) + kvm_flush_remote_tlbs(kvm); + + return 0; +} + +static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, u64 data, + int (*handler)(struct kvm *kvm, unsigned long *rmapp, + u64 data)) { int i, j; int retval = 0; @@ -786,13 +823,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, if (hva >= start && hva < end) { gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; - retval |= handler(kvm, &memslot->rmap[gfn_offset]); + retval |= handler(kvm, &memslot->rmap[gfn_offset], + data); for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { int idx = gfn_offset; idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); retval |= handler(kvm, - &memslot->lpage_info[j][idx].rmap_pde); + &memslot->lpage_info[j][idx].rmap_pde, + data); } } } @@ -802,10 +841,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) { - return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); + return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); +} + +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + kvm_handle_hva(kvm, hva, (u64)&pte, kvm_set_pte_rmapp); } -static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) +static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) { u64 *spte; int young = 0; @@ -841,13 +885,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) gfn = unalias_gfn(vcpu->kvm, gfn); rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); - kvm_unmap_rmapp(vcpu->kvm, rmapp); + kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); kvm_flush_remote_tlbs(vcpu->kvm); } int kvm_age_hva(struct kvm *kvm, unsigned long hva) { - return kvm_handle_hva(kvm, hva, kvm_age_rmapp); + return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); } #ifdef MMU_DEBUG -- cgit v1.2.2 From 9bcbdd9c58617f1301dd4f17c738bb9bc73aca70 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 8 Oct 2009 06:40:41 -0700 Subject: x86, timers: Check for pending timers after (device) interrupts Now that range timers and deferred timers are common, I found a problem with these using the "perf timechart" tool. Frans Pop also reported high scheduler latencies via LatencyTop, when using iwlagn. It turns out that on x86, these two 'opportunistic' timers only get checked when another "real" timer happens. These opportunistic timers have the objective to save power by hitchhiking on other wakeups, as to avoid CPU wakeups by themselves as much as possible. The change in this patch runs this check not only at timer interrupts, but at all (device) interrupts. The effect is that: 1) the deferred timers/range timers get delayed less 2) the range timers cause less wakeups by themselves because the percentage of hitchhiking on existing wakeup events goes up. I've verified the working of the patch using "perf timechart", the original exposed bug is gone with this patch. Frans also reported success - the latencies are now down in the expected ~10 msec range. Signed-off-by: Arjan van de Ven Tested-by: Frans Pop Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: <20091008064041.67219b13@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq.c | 2 ++ arch/x86/kernel/smp.c | 1 + 2 files changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 74656d1d4e30..391206199515 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -244,6 +244,7 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) __func__, smp_processor_id(), vector, irq); } + run_local_timers(); irq_exit(); set_irq_regs(old_regs); @@ -268,6 +269,7 @@ void smp_generic_interrupt(struct pt_regs *regs) if (generic_interrupt_extension) generic_interrupt_extension(); + run_local_timers(); irq_exit(); set_irq_regs(old_regs); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index ec1de97600e7..d915d956e66d 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -198,6 +198,7 @@ void smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); inc_irq_stat(irq_resched_count); + run_local_timers(); /* * KVM uses this interrupt to force a cpu out of guest mode */ -- cgit v1.2.2 From d0153ca35d344d9b640dc305031b0703ba3f30f0 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Tue, 29 Sep 2009 10:25:24 -0700 Subject: x86, vmi: Mark VMI deprecated and schedule it for removal Add text in feature-removal.txt indicating that VMI will be removed in the 2.6.37 timeframe. Signed-off-by: Alok N Kataria Acked-by: Chris Wright LKML-Reference: <1254193238.13456.48.camel@ank32.eng.vmware.com> [ removed a bogus Kconfig change, marked (DEPRECATED) in Kconfig ] Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 11 ++++++++++- arch/x86/kernel/vmi_32.c | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c876bace8fdc..07e01149e3bf 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -491,7 +491,7 @@ if PARAVIRT_GUEST source "arch/x86/xen/Kconfig" config VMI - bool "VMI Guest support" + bool "VMI Guest support (DEPRECATED)" select PARAVIRT depends on X86_32 ---help--- @@ -500,6 +500,15 @@ config VMI at the moment), by linking the kernel to a GPL-ed ROM module provided by the hypervisor. + As of September 2009, VMware has started a phased retirement + of this feature from VMware's products. Please see + feature-removal-schedule.txt for details. If you are + planning to enable this option, please note that you cannot + live migrate a VMI enabled VM to a future VMware product, + which doesn't support VMI. So if you expect your kernel to + seamlessly migrate to newer VMware products, keep this + disabled. + config KVM_CLOCK bool "KVM paravirtualized clock" select PARAVIRT diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 31e6f6cfe53e..d430e4c30193 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -648,7 +648,7 @@ static inline int __init activate_vmi(void) pv_info.paravirt_enabled = 1; pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; - pv_info.name = "vmi"; + pv_info.name = "vmi [deprecated]"; pv_init_ops.patch = vmi_patch; -- cgit v1.2.2 From e7ab0f7b50bc4688fb5cf65de5d42e3b882fb8d1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 9 Oct 2009 15:58:20 +0200 Subject: Revert "x86, timers: Check for pending timers after (device) interrupts" This reverts commit 9bcbdd9c58617f1301dd4f17c738bb9bc73aca70. The real bug producing LatencyTop latencies has been fixed in: f5dc375: sched: Update the clock of runqueue select_task_rq() selected And the commit being reverted here triggers local timer processing from every device IRQ. If device IRQs come in at a high frequency, this could cause a performance regression. The commit being reverted here purely 'fixed' the reported latency as a side effect, because CPUs were being moved out of idle more often. Acked-by: Peter Zijlstra Cc: Arjan van de Ven Cc: Frans Pop Cc: Linus Torvalds Cc: Mike Galbraith Cc: Thomas Gleixner LKML-Reference: <20091008064041.67219b13@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq.c | 2 -- arch/x86/kernel/smp.c | 1 - 2 files changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 391206199515..74656d1d4e30 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -244,7 +244,6 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) __func__, smp_processor_id(), vector, irq); } - run_local_timers(); irq_exit(); set_irq_regs(old_regs); @@ -269,7 +268,6 @@ void smp_generic_interrupt(struct pt_regs *regs) if (generic_interrupt_extension) generic_interrupt_extension(); - run_local_timers(); irq_exit(); set_irq_regs(old_regs); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index d915d956e66d..ec1de97600e7 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -198,7 +198,6 @@ void smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); inc_irq_stat(irq_resched_count); - run_local_timers(); /* * KVM uses this interrupt to force a cpu out of guest mode */ -- cgit v1.2.2 From d43c36dc6b357fa1806800f18aa30123c747a6d1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 7 Oct 2009 17:09:06 +0400 Subject: headers: remove sched.h from interrupt.h After m68k's task_thread_info() doesn't refer to current, it's possible to remove sched.h from interrupt.h and not break m68k! Many thanks to Heiko Carstens for allowing this. Signed-off-by: Alexey Dobriyan --- arch/x86/kernel/cpu/mcheck/mce_intel.c | 1 + arch/x86/kernel/pci-gart_64.c | 1 + arch/x86/kernel/reboot.c | 1 + 3 files changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 889f665fe93d..7c785634af2b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 98a827ee9ed7..a7f1b64f86e0 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 27349f92a6d7..a1a3cdda06e1 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.2 From 15b812f1d0a5ca8f5efe7f5882f468af10682ca8 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 11 Oct 2009 14:17:16 -0700 Subject: pci: increase alignment to make more space for hidden code As reported in http://bugzilla.kernel.org/show_bug.cgi?id=13940 on some system when acpi are enabled, acpi clears some BAR for some devices without reason, and kernel will need to allocate devices for them. It then apparently hits some undocumented resource conflict, resulting in non-working devices. Try to increase alignment to get more safe range for unassigned devices. Signed-off-by: Yinghai Lu Signed-off-by: Linus Torvalds --- arch/x86/kernel/e820.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 85419bb7d4ab..d17d482a04f4 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1378,8 +1378,8 @@ static unsigned long ram_alignment(resource_size_t pos) if (mb < 16) return 1024*1024; - /* To 32MB for anything above that */ - return 32*1024*1024; + /* To 64MB for anything above that */ + return 64*1024*1024; } #define MAX_RESOURCE_SIZE ((resource_size_t)-1) -- cgit v1.2.2 From 9a821b231644028f8e2a853eb33d1184e925b183 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 12 Oct 2009 12:59:29 +0100 Subject: x86: Move pci_iommu_init to rootfs_initcall() We want this to happen after the PCI quirks, which are now running at the very end of the fs_initcalls. This works around the BIOS problems which were originally addressed by commit db8be50c4307dac2b37305fc59c8dc0f978d09ea ('USB: Work around BIOS bugs by quiescing USB controllers earlier'), which was reverted in commit d93a8f829fe1d2f3002f2c6ddb553d12db420412. Signed-off-by: David Woodhouse --- arch/x86/kernel/pci-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 64b838eac18c..e0d9199d0eb9 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -311,7 +311,7 @@ void pci_iommu_shutdown(void) amd_iommu_shutdown(); } /* Must execute after PCI subsystem */ -fs_initcall(pci_iommu_init); +rootfs_initcall(pci_iommu_init); #ifdef CONFIG_PCI /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ -- cgit v1.2.2 From 7a4b7e5e741fe0a72a517b0367a2659aa53f7c44 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 6 Oct 2009 16:32:43 +0100 Subject: x86: Fix Suspend to RAM freeze on Acer Aspire 1511Lmi laptop Move the trampoline and accessors back out of .cpuinit.* for the case of 64-bits+ACPI_SLEEP. This solves s2ram hangs reported in: http://bugzilla.kernel.org/show_bug.cgi?id=14279 Reported-and-bisected-by: Christian Casteyde Signed-off-by: Jan Beulich Cc: Cc: "Andrew Morton" Cc: "Rafael J. Wysocki" Signed-off-by: Ingo Molnar --- arch/x86/kernel/trampoline.c | 12 ++++++++++-- arch/x86/kernel/trampoline_64.S | 4 ++++ 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index 699f7eeb896a..cd022121cab6 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c @@ -3,8 +3,16 @@ #include #include +#if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP) +#define __trampinit +#define __trampinitdata +#else +#define __trampinit __cpuinit +#define __trampinitdata __cpuinitdata +#endif + /* ready for x86_64 and x86 */ -unsigned char *__cpuinitdata trampoline_base = __va(TRAMPOLINE_BASE); +unsigned char *__trampinitdata trampoline_base = __va(TRAMPOLINE_BASE); void __init reserve_trampoline_memory(void) { @@ -26,7 +34,7 @@ void __init reserve_trampoline_memory(void) * bootstrap into the page concerned. The caller * has made sure it's suitably aligned. */ -unsigned long __cpuinit setup_trampoline(void) +unsigned long __trampinit setup_trampoline(void) { memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); return virt_to_phys(trampoline_base); diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 596d54c660a5..3af2dff58b21 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S @@ -32,8 +32,12 @@ #include #include +#ifdef CONFIG_ACPI_SLEEP +.section .rodata, "a", @progbits +#else /* We can free up the trampoline after bootup if cpu hotplug is not supported. */ __CPUINITRODATA +#endif .code16 ENTRY(trampoline_data) -- cgit v1.2.2 From d1705c558c95418378b11a0be963fe1b3e2fa381 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 12 Oct 2009 11:32:31 -0700 Subject: x86: fix kernel panic on 32 bits when profiling Latest kernel has a kernel panic in booting on i386 machine when profile=2 setting in cmdline. It is due to 'sp' being incorrect in profile_pc(). BUG: unable to handle kernel NULL pointer dereference at 00000246 IP: [] profile_pc+0x2a/0x48 *pde = 00000000 Oops: 0000 [#1] SMP This differs from the original version by Alex Shi in that we use the kernel_stack_pointer() inline already defined in for this purpose, instead of #ifdef. Originally-by: Alex Shi Cc: "Chen, Tim C" Cc: "Rafael J. Wysocki" Cc: Andrew Morton Signed-off-by: H. Peter Anvin --- arch/x86/kernel/time.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index dcb00d278512..be2573448ed9 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -38,7 +38,8 @@ unsigned long profile_pc(struct pt_regs *regs) #ifdef CONFIG_FRAME_POINTER return *(unsigned long *)(regs->bp + sizeof(long)); #else - unsigned long *sp = (unsigned long *)regs->sp; + unsigned long *sp = + (unsigned long *)kernel_stack_pointer(regs); /* * Return address is either directly at stack pointer * or above a saved flags. Eflags has bits 22-31 zero, -- cgit v1.2.2 From 71999d9862e667f1fd14f8fbfa0cce6d855bad3f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 12 Oct 2009 16:32:43 -0700 Subject: x86/paravirt: Use normal calling sequences for irq enable/disable Bastian Blank reported a boot crash with stackprotector enabled, and debugged it back to edx register corruption. For historical reasons irq enable/disable/save/restore had special calling sequences to make them more efficient. With the more recent introduction of higher-level and more general optimisations this is no longer necessary so we can just use the normal PVOP_ macros. This fixes some residual bugs in the old implementations which left edx liable to inadvertent clobbering. Also, fix some bugs in __PVOP_VCALLEESAVE which were revealed by actual use. Reported-by: Bastian Blank Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel Cc: Xen-devel LKML-Reference: <4AD3BC9B.7040501@goop.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/paravirt.h | 28 ++++------------------------ arch/x86/include/asm/paravirt_types.h | 10 ++++++---- 2 files changed, 10 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 8aebcc41041d..efb38994859c 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -840,42 +840,22 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) static inline unsigned long __raw_local_save_flags(void) { - unsigned long f; - - asm volatile(paravirt_alt(PARAVIRT_CALL) - : "=a"(f) - : paravirt_type(pv_irq_ops.save_fl), - paravirt_clobber(CLBR_EAX) - : "memory", "cc"); - return f; + return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); } static inline void raw_local_irq_restore(unsigned long f) { - asm volatile(paravirt_alt(PARAVIRT_CALL) - : "=a"(f) - : PV_FLAGS_ARG(f), - paravirt_type(pv_irq_ops.restore_fl), - paravirt_clobber(CLBR_EAX) - : "memory", "cc"); + PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); } static inline void raw_local_irq_disable(void) { - asm volatile(paravirt_alt(PARAVIRT_CALL) - : - : paravirt_type(pv_irq_ops.irq_disable), - paravirt_clobber(CLBR_EAX) - : "memory", "eax", "cc"); + PVOP_VCALLEE0(pv_irq_ops.irq_disable); } static inline void raw_local_irq_enable(void) { - asm volatile(paravirt_alt(PARAVIRT_CALL) - : - : paravirt_type(pv_irq_ops.irq_enable), - paravirt_clobber(CLBR_EAX) - : "memory", "eax", "cc"); + PVOP_VCALLEE0(pv_irq_ops.irq_enable); } static inline unsigned long __raw_local_irq_save(void) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index dd0f5b32489d..9357473c8da0 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -494,10 +494,11 @@ int paravirt_disable_iospace(void); #define EXTRA_CLOBBERS #define VEXTRA_CLOBBERS #else /* CONFIG_X86_64 */ +/* [re]ax isn't an arg, but the return val */ #define PVOP_VCALL_ARGS \ unsigned long __edi = __edi, __esi = __esi, \ - __edx = __edx, __ecx = __ecx -#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax + __edx = __edx, __ecx = __ecx, __eax = __eax +#define PVOP_CALL_ARGS PVOP_VCALL_ARGS #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) #define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) @@ -509,6 +510,7 @@ int paravirt_disable_iospace(void); "=c" (__ecx) #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) +/* void functions are still allowed [re]ax for scratch */ #define PVOP_VCALLEE_CLOBBERS "=a" (__eax) #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS @@ -583,8 +585,8 @@ int paravirt_disable_iospace(void); VEXTRA_CLOBBERS, \ pre, post, ##__VA_ARGS__) -#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ - ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ +#define __PVOP_VCALLEESAVE(op, pre, post, ...) \ + ____PVOP_VCALL(op.func, CLBR_RET_REG, \ PVOP_VCALLEE_CLOBBERS, , \ pre, post, ##__VA_ARGS__) -- cgit v1.2.2 From 89ccf465abe6b20d804a63ae20307970c441369d Mon Sep 17 00:00:00 2001 From: Li Hong Date: Wed, 14 Oct 2009 18:50:39 +0800 Subject: x86, perf_event: Rename 'performance counter interrupt' In 'cdd6c482c9ff9c55475ee7392ec8f672eddb7be6', we renamed Performance Counters -> Performance Events. The name showed up in /proc/interrupts also needs a change. I use PMI (Performance monitoring interrupt) here, since it is the official name used in Intel's documents. Signed-off-by: Li Hong Cc: Andrew Morton Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091014105039.GA22670@uhli> Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 391206199515..86fb2c8e065a 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -63,10 +63,10 @@ static int show_other_interrupts(struct seq_file *p, int prec) for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); seq_printf(p, " Spurious interrupts\n"); - seq_printf(p, "%*s: ", prec, "CNT"); + seq_printf(p, "%*s: ", prec, "PMI"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); - seq_printf(p, " Performance counter interrupts\n"); + seq_printf(p, " Performance monitoring interrupts\n"); seq_printf(p, "%*s: ", prec, "PND"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); -- cgit v1.2.2 From e9a63a4e559fbdc522072281d05e6b13c1022f4b Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 14 Oct 2009 14:16:38 -0700 Subject: x86: linker script syntax nits The linker scripts grew some use of weirdly wrong linker script syntax. It happens to work, but it's not what the syntax is documented to be. Clean it up to use the official syntax. Signed-off-by: Roland McGrath CC: Ian Lance Taylor --- arch/x86/kernel/acpi/realmode/wakeup.lds.S | 4 ++-- arch/x86/kernel/vmlinux.lds.S | 17 ++++++++--------- 2 files changed, 10 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S index 7da00b799cda..0e50e1e5c573 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S @@ -56,6 +56,6 @@ SECTIONS /DISCARD/ : { *(.note*) } - - . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); } + +ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 92929fb3f9fa..8d6001ad8d8d 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -305,8 +305,8 @@ SECTIONS #ifdef CONFIG_X86_32 -. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE"); +ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE"); #else /* * Per-cpu symbols which need to be offset from __per_cpu_load @@ -319,12 +319,12 @@ INIT_PER_CPU(irq_stack_union); /* * Build-time check on the image size: */ -. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE"); +ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE"); #ifdef CONFIG_SMP -. = ASSERT((per_cpu__irq_stack_union == 0), - "irq_stack_union is not at start of per-cpu area"); +ASSERT((per_cpu__irq_stack_union == 0), + "irq_stack_union is not at start of per-cpu area"); #endif #endif /* CONFIG_X86_32 */ @@ -332,7 +332,6 @@ INIT_PER_CPU(irq_stack_union); #ifdef CONFIG_KEXEC #include -. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, - "kexec control code size is too big"); +ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, + "kexec control code size is too big"); #endif - -- cgit v1.2.2 From db8590f5043f3436a65b24155a3a7af2604df876 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Oct 2009 08:08:12 +0200 Subject: Revert "x86: linker script syntax nits" This reverts commit e9a63a4e559fbdc522072281d05e6b13c1022f4b. This breaks older binutils, where sink-less asserts are broken. See this commit for further details: d2ba8b2: x86: Fix assert syntax in vmlinux.lds.S Acked-by: "H. Peter Anvin" Acked-by: Sam Ravnborg Cc: Linus Torvalds LKML-Reference: <4AD6523D.5030909@zytor.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/realmode/wakeup.lds.S | 4 ++-- arch/x86/kernel/vmlinux.lds.S | 17 +++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S index 0e50e1e5c573..7da00b799cda 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S @@ -56,6 +56,6 @@ SECTIONS /DISCARD/ : { *(.note*) } -} -ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); + . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); +} diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 8d6001ad8d8d..92929fb3f9fa 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -305,8 +305,8 @@ SECTIONS #ifdef CONFIG_X86_32 -ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE"); +. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE"); #else /* * Per-cpu symbols which need to be offset from __per_cpu_load @@ -319,12 +319,12 @@ INIT_PER_CPU(irq_stack_union); /* * Build-time check on the image size: */ -ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE"); +. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE"); #ifdef CONFIG_SMP -ASSERT((per_cpu__irq_stack_union == 0), - "irq_stack_union is not at start of per-cpu area"); +. = ASSERT((per_cpu__irq_stack_union == 0), + "irq_stack_union is not at start of per-cpu area"); #endif #endif /* CONFIG_X86_32 */ @@ -332,6 +332,7 @@ ASSERT((per_cpu__irq_stack_union == 0), #ifdef CONFIG_KEXEC #include -ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, - "kexec control code size is too big"); +. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, + "kexec control code size is too big"); #endif + -- cgit v1.2.2