From 48ec4d9537282a55d602136724f069faafcac8c8 Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@infradead.org>
Date: Wed, 4 Feb 2009 15:54:45 -0500
Subject: x86, 64-bit: print DMI info in the oops trace

This patch echoes what we already do on 32-bit since
90f7d25c6b672137344f447a30a9159945ffea72, and prints the DMI
product name in show_regs, so that system specific problems can be
easily identified.

Signed-off-by: Kyle McMartin <kyle@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/process_64.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 416fb9282f4f..85b4cb5c1980 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -40,6 +40,7 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 #include <linux/ftrace.h>
+#include <linux/dmi.h>
 
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -151,14 +152,18 @@ void __show_regs(struct pt_regs *regs, int all)
 	unsigned long d0, d1, d2, d3, d6, d7;
 	unsigned int fsindex, gsindex;
 	unsigned int ds, cs, es;
+	const char *board;
 
 	printk("\n");
 	print_modules();
-	printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
+	board = dmi_get_system_info(DMI_PRODUCT_NAME);
+	if (!board)
+		board = "";
+	printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
 		current->pid, current->comm, print_tainted(),
 		init_utsname()->release,
 		(int)strcspn(init_utsname()->version, " "),
-		init_utsname()->version);
+		init_utsname()->version, board);
 	printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
 	printk_address(regs->ip, 1);
 	printk(KERN_INFO "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
-- 
cgit v1.2.2


From a6a95406c676ffe4f9dee708eb404a17c69f7fdd Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 4 Feb 2009 13:40:31 +0300
Subject: x86: fix hpet timer reinit for x86_64

There's a small problem with hpet_rtc_reinit function - it checks
for the:

	hpet_readl(HPET_COUNTER) - hpet_t1_cmp > 0

to continue increasing both the HPET_T1_CMP (register) and the
hpet_t1_cmp (variable).

But since the HPET_COUNTER is always 32-bit, if the hpet_t1_cmp
is 64-bit this condition will always be FALSE once the latter hits
the 32-bit boundary, and we can have a situation, when we don't
increase the HPET_T1_CMP register high enough.

The result - timer stops ticking, since HPET_T1_CMP becomes less,
than the COUNTER and never increased again.

The solution is (based on Linus's suggestion) to not compare 64-bits
(on 64-bit x86), but to do the comparison on 32-bit signed
integers.

Reported-by: Kirill Korotaev <dev@openvz.org>
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 64d5ad0b8add..c761f914430a 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1075,7 +1075,7 @@ static void hpet_rtc_timer_reinit(void)
 		hpet_t1_cmp += delta;
 		hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
 		lost_ints++;
-	} while ((long)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0);
+	} while ((s32)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0);
 
 	if (lost_ints) {
 		if (hpet_rtc_flags & RTC_PIE)
-- 
cgit v1.2.2


From 4560839939f4b4a96e21e80584f87308ac93c1da Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Wed, 4 Feb 2009 16:44:01 -0700
Subject: x86: fix grammar in user-visible BIOS warning

Fix user-visible grammo.

Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ae0d8042cf69..c461f6d69074 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -607,7 +607,7 @@ struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
 static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
 {
 	printk(KERN_NOTICE
-		"%s detected: BIOS may corrupt low RAM, working it around.\n",
+		"%s detected: BIOS may corrupt low RAM, working around it.\n",
 		d->ident);
 
 	e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED);
-- 
cgit v1.2.2


From ff08f76d738d0ec0f334b187f61e160caa321d54 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Wed, 4 Feb 2009 13:40:31 +0300
Subject: x86: clean up hpet timer reinit

Implement Linus's suggestion: introduce the hpet_cnt_ahead()
helper function to compare hpet time values - like other
wrapping counter comparisons are abstracted away elsewhere.
(jiffies, ktime_t, etc.)

Reported-by: Kirill Korotaev <dev@openvz.org>
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index c761f914430a..388254f69a2a 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -897,13 +897,21 @@ static unsigned long hpet_rtc_flags;
 static int hpet_prev_update_sec;
 static struct rtc_time hpet_alarm_time;
 static unsigned long hpet_pie_count;
-static unsigned long hpet_t1_cmp;
+static u32 hpet_t1_cmp;
 static unsigned long hpet_default_delta;
 static unsigned long hpet_pie_delta;
 static unsigned long hpet_pie_limit;
 
 static rtc_irq_handler irq_handler;
 
+/*
+ * Check that the hpet counter c1 is ahead of the c2
+ */
+static inline int hpet_cnt_ahead(u32 c1, u32 c2)
+{
+	return (s32)(c2 - c1) < 0;
+}
+
 /*
  * Registers a IRQ handler.
  */
@@ -1075,7 +1083,7 @@ static void hpet_rtc_timer_reinit(void)
 		hpet_t1_cmp += delta;
 		hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
 		lost_ints++;
-	} while ((s32)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0);
+	} while (!hpet_cnt_ahead(hpet_t1_cmp, hpet_readl(HPET_COUNTER)));
 
 	if (lost_ints) {
 		if (hpet_rtc_flags & RTC_PIE)
-- 
cgit v1.2.2


From e736ad548db152776de61d7a26805cfae77ce5ce Mon Sep 17 00:00:00 2001
From: "Pallipadi, Venkatesh" <venkatesh.pallipadi@intel.com>
Date: Fri, 6 Feb 2009 16:52:05 -0800
Subject: x86: add clflush before monitor for Intel 7400 series

For Intel 7400 series CPUs, the recommendation is to use a clflush on the
monitored address just before monitor and mwait pair [1].

This clflush makes sure that there are no false wakeups from mwait when the
monitored address was recently written to.

[1] "MONITOR/MWAIT Recommendations for Intel Xeon Processor 7400 series"
    section in specification update document of 7400 series
    http://download.intel.com/design/xeon/specupdt/32033601.pdf

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/intel.c | 3 +++
 arch/x86/kernel/process.c   | 6 ++++++
 2 files changed, 9 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 430e5c38a544..24ff26a38ade 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -291,6 +291,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 		ds_init_intel(c);
 	}
 
+	if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush)
+		set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR);
+
 #ifdef CONFIG_X86_64
 	if (c->x86 == 15)
 		c->x86_cache_alignment = c->x86_clflush_size * 2;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e68bb9e30864..6d12f7e37f8c 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -180,6 +180,9 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 
 	trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
 	if (!need_resched()) {
+		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
+			clflush((void *)&current_thread_info()->flags);
+
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		smp_mb();
 		if (!need_resched())
@@ -194,6 +197,9 @@ static void mwait_idle(void)
 	struct power_trace it;
 	if (!need_resched()) {
 		trace_power_start(&it, POWER_CSTATE, 1);
+		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
+			clflush((void *)&current_thread_info()->flags);
+
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		smp_mb();
 		if (!need_resched())
-- 
cgit v1.2.2


From 3f4a739c6accd651a11fcf3c7a20ec8147c42660 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sun, 8 Feb 2009 16:18:03 -0800
Subject: x86: find nr_irqs_gsi with mp_ioapic_routing

Impact: find right nr_irqs_gsi on some systems.

One test-system has gap between gsi's:

[    0.000000] ACPI: IOAPIC (id[0x04] address[0xfec00000] gsi_base[0])
[    0.000000] IOAPIC[0]: apic_id 4, version 0, address 0xfec00000, GSI 0-23
[    0.000000] ACPI: IOAPIC (id[0x05] address[0xfeafd000] gsi_base[48])
[    0.000000] IOAPIC[1]: apic_id 5, version 0, address 0xfeafd000, GSI 48-54
[    0.000000] ACPI: IOAPIC (id[0x06] address[0xfeafc000] gsi_base[56])
[    0.000000] IOAPIC[2]: apic_id 6, version 0, address 0xfeafc000, GSI 56-62
...
[    0.000000] nr_irqs_gsi: 38

So nr_irqs_gsi is not right. some irq for MSI will overwrite with io_apic.

need to get that with acpi_probe_gsi when acpi io_apic is used

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c | 23 +++++++++++++++++++++++
 arch/x86/kernel/io_apic.c   | 20 +++++++++++++++-----
 2 files changed, 38 insertions(+), 5 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index d37593c2f438..7678f10c4568 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -973,6 +973,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 	nr_ioapics++;
 }
 
+int __init acpi_probe_gsi(void)
+{
+	int idx;
+	int gsi;
+	int max_gsi = 0;
+
+	if (acpi_disabled)
+		return 0;
+
+	if (!acpi_ioapic)
+		return 0;
+
+	max_gsi = 0;
+	for (idx = 0; idx < nr_ioapics; idx++) {
+		gsi = mp_ioapic_routing[idx].gsi_end;
+
+		if (gsi > max_gsi)
+			max_gsi = gsi;
+	}
+
+	return max_gsi + 1;
+}
+
 static void assign_to_mp_irq(struct mp_config_intsrc *m,
 				    struct mp_config_intsrc *mp_irq)
 {
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 9b0c480c383b..bc7ac4da90d7 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3841,14 +3841,24 @@ int __init io_apic_get_redir_entries (int ioapic)
 
 void __init probe_nr_irqs_gsi(void)
 {
-	int idx;
 	int nr = 0;
 
-	for (idx = 0; idx < nr_ioapics; idx++)
-		nr += io_apic_get_redir_entries(idx) + 1;
-
-	if (nr > nr_irqs_gsi)
+	nr = acpi_probe_gsi();
+	if (nr > nr_irqs_gsi) {
 		nr_irqs_gsi = nr;
+	} else {
+		/* for acpi=off or acpi is not compiled in */
+		int idx;
+
+		nr = 0;
+		for (idx = 0; idx < nr_ioapics; idx++)
+			nr += io_apic_get_redir_entries(idx) + 1;
+
+		if (nr > nr_irqs_gsi)
+			nr_irqs_gsi = nr;
+	}
+
+	printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
 }
 
 /* --------------------------------------------------------------------------
-- 
cgit v1.2.2


From 55a8ba4b7f76bebd7e8ce3f74c04b140627a1bad Mon Sep 17 00:00:00 2001
From: Alok Kataria <akataria@vmware.com>
Date: Fri, 6 Feb 2009 10:29:35 -0800
Subject: x86, vmi: put a missing paravirt_release_pmd in pgd_dtor

Commit 6194ba6ff6ccf8d5c54c857600843c67aa82c407 ("x86: don't special-case
pmd allocations as much") made changes to the way we handle pmd allocations,
and while doing that it dropped a call to  paravirt_release_pd on the
pgd page from the pgd_dtor code path.

As a result of this missing release, the hypervisor is now unaware of the
pgd page being freed, and as a result it ends up tracking this page as a
page table page.

After this the guest may start using the same page for other purposes, and
depending on what use the page is put to, it may result in various performance
and/or functional issues ( hangs, reboots).

Since this release is only required for VMI, I now release the pgd page from
the (vmi)_pgd_free hook.

Signed-off-by: Alok N Kataria <akataria@vmware.com>
Acked-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: <stable@kernel.org>
---
 arch/x86/kernel/vmi_32.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 1d3302cc2ddf..bef58b4982db 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -320,6 +320,16 @@ static void vmi_release_pmd(unsigned long pfn)
 	vmi_ops.release_page(pfn, VMI_PAGE_L2);
 }
 
+/*
+ * We use the pgd_free hook for releasing the pgd page:
+ */
+static void vmi_pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	unsigned long pfn = __pa(pgd) >> PAGE_SHIFT;
+
+	vmi_ops.release_page(pfn, VMI_PAGE_L2);
+}
+
 /*
  * Helper macros for MMU update flags.  We can defer updates until a flush
  * or page invalidation only if the update is to the current address space
@@ -762,6 +772,7 @@ static inline int __init activate_vmi(void)
 	if (vmi_ops.release_page) {
 		pv_mmu_ops.release_pte = vmi_release_pte;
 		pv_mmu_ops.release_pmd = vmi_release_pmd;
+		pv_mmu_ops.pgd_free = vmi_pgd_free;
 	}
 
 	/* Set linear is needed in all cases */
-- 
cgit v1.2.2


From d315760ffa261c15ff92699ac6f514112543d7ca Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 9 Feb 2009 22:17:39 +0900
Subject: x86: fix math_emu register frame access

do_device_not_available() is the handler for #NM and it declares that
it takes a unsigned long and calls math_emu(), which takes a long
argument and surprisingly expects the stack frame starting at the zero
argument would match struct math_emu_info, which isn't true regardless
of configuration in the current code.

This patch makes do_device_not_available() take struct pt_regs like
other exception handlers and initialize struct math_emu_info with
pointer to it and pass pointer to the math_emu_info to math_emulate()
like normal C functions do.  This way, unless gcc makes a copy of
struct pt_regs in do_device_not_available(), the register frame is
correctly accessed regardless of kernel configuration or compiler
used.

This doesn't fix all math_emu problems but it at least gets it
somewhat working.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/traps.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 98c2d055284b..7932338d7cb3 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -896,7 +896,7 @@ asmlinkage void math_state_restore(void)
 EXPORT_SYMBOL_GPL(math_state_restore);
 
 #ifndef CONFIG_MATH_EMULATION
-asmlinkage void math_emulate(long arg)
+void math_emulate(struct math_emu_info *info)
 {
 	printk(KERN_EMERG
 		"math-emulation not enabled and no coprocessor found.\n");
@@ -906,16 +906,19 @@ asmlinkage void math_emulate(long arg)
 }
 #endif /* CONFIG_MATH_EMULATION */
 
-dotraplinkage void __kprobes
-do_device_not_available(struct pt_regs *regs, long error)
+dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs)
 {
 #ifdef CONFIG_X86_32
 	if (read_cr0() & X86_CR0_EM) {
-		conditional_sti(regs);
-		math_emulate(0);
+		struct math_emu_info info = { };
+
+		conditional_sti(&regs);
+
+		info.regs = &regs;
+		math_emulate(&info);
 	} else {
 		math_state_restore(); /* interrupts still off */
-		conditional_sti(regs);
+		conditional_sti(&regs);
 	}
 #else
 	math_state_restore();
-- 
cgit v1.2.2


From b52af40923fc91a12e3c7152d833e0c0c6a508f6 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Tue, 10 Feb 2009 09:21:07 +0100
Subject: i8327: fix outb() parameter order

In i8237A_resume(), when resetting the DMA controller, the parameters to
dma_outb() were mixed up.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
[ cleaned up the file a tiny bit. ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/i8237.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c
index dbd6c1d1b638..b42ca694dc68 100644
--- a/arch/x86/kernel/i8237.c
+++ b/arch/x86/kernel/i8237.c
@@ -28,10 +28,10 @@ static int i8237A_resume(struct sys_device *dev)
 
 	flags = claim_dma_lock();
 
-	dma_outb(DMA1_RESET_REG, 0);
-	dma_outb(DMA2_RESET_REG, 0);
+	dma_outb(0, DMA1_RESET_REG);
+	dma_outb(0, DMA2_RESET_REG);
 
-	for (i = 0;i < 8;i++) {
+	for (i = 0; i < 8; i++) {
 		set_dma_addr(i, 0x000000);
 		/* DMA count is a bit weird so this is not 0 */
 		set_dma_count(i, 1);
@@ -51,14 +51,14 @@ static int i8237A_suspend(struct sys_device *dev, pm_message_t state)
 }
 
 static struct sysdev_class i8237_sysdev_class = {
-	.name = "i8237",
-	.suspend = i8237A_suspend,
-	.resume = i8237A_resume,
+	.name		= "i8237",
+	.suspend	= i8237A_suspend,
+	.resume		= i8237A_resume,
 };
 
 static struct sys_device device_i8237A = {
-	.id	= 0,
-	.cls	= &i8237_sysdev_class,
+	.id		= 0,
+	.cls		= &i8237_sysdev_class,
 };
 
 static int __init i8237A_init_sysfs(void)
@@ -68,5 +68,4 @@ static int __init i8237A_init_sysfs(void)
 		error = sysdev_register(&device_i8237A);
 	return error;
 }
-
 device_initcall(i8237A_init_sysfs);
-- 
cgit v1.2.2


From e3944bfac961cd7fc82f3b3143c55dc375748569 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 10 Feb 2009 13:07:13 -0500
Subject: tracing, x86: fix fixup section to return to original code

Impact: fix to prevent a kernel crash on fault

If for some reason the pointer to the parent function on the
stack takes a fault, the fix up code will not return back to
the original faulting code. This can lead to unpredictable
results and perhaps even a kernel panic.

A fault should not happen, but if it does, we should simply
disable the tracer, warn, and continue running the kernel.
It should not lead to a kernel crash.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 arch/x86/kernel/ftrace.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1b43086b097a..9d549e4fe880 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -491,13 +491,15 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 		"1: " _ASM_MOV " (%[parent_old]), %[old]\n"
 		"2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n"
 		"   movl $0, %[faulted]\n"
+		"3:\n"
 
 		".section .fixup, \"ax\"\n"
-		"3: movl $1, %[faulted]\n"
+		"4: movl $1, %[faulted]\n"
+		"   jmp 3b\n"
 		".previous\n"
 
-		_ASM_EXTABLE(1b, 3b)
-		_ASM_EXTABLE(2b, 3b)
+		_ASM_EXTABLE(1b, 4b)
+		_ASM_EXTABLE(2b, 4b)
 
 		: [parent_replaced] "=r" (parent), [old] "=r" (old),
 		  [faulted] "=r" (faulted)
-- 
cgit v1.2.2


From f47a454db9129d2e61b224a40f4365cdd4f83042 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 10 Feb 2009 11:53:23 -0500
Subject: tracing, x86: fix constraint for parent variable

The constraint used for retrieving and restoring the parent function
pointer is incorrect. The parent variable is a pointer, and the
address of the pointer is modified by the asm statement and not
the pointer itself. It is incorrect to pass it in as an output
constraint since the asm will never update the pointer.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ftrace.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 9d549e4fe880..231bdd3c5b1c 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -488,8 +488,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 	 * ignore such a protection.
 	 */
 	asm volatile(
-		"1: " _ASM_MOV " (%[parent_old]), %[old]\n"
-		"2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n"
+		"1: " _ASM_MOV " (%[parent]), %[old]\n"
+		"2: " _ASM_MOV " %[return_hooker], (%[parent])\n"
 		"   movl $0, %[faulted]\n"
 		"3:\n"
 
@@ -501,9 +501,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 		_ASM_EXTABLE(1b, 4b)
 		_ASM_EXTABLE(2b, 4b)
 
-		: [parent_replaced] "=r" (parent), [old] "=r" (old),
-		  [faulted] "=r" (faulted)
-		: [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
+		: [old] "=r" (old), [faulted] "=r" (faulted)
+		: [parent] "r" (parent), [return_hooker] "r" (return_hooker)
 		: "memory"
 	);
 
-- 
cgit v1.2.2


From 9f339e7028e2855717af3193c938f9960ad13b38 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Wed, 11 Feb 2009 15:10:27 +0100
Subject: x86, ptrace, mm: fix double-free on race

Ptrace_detach() races with __ptrace_unlink() if the traced task is
reaped while detaching. This might cause a double-free of the BTS
buffer.

Change the ptrace_detach() path to only do the memory accounting in
ptrace_bts_detach() and leave the buffer free to ptrace_bts_untrace()
which will be called from __ptrace_unlink().

The fix follows a proposal from Oleg Nesterov.

Reported-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ptrace.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a5df5f82fb9..5a4c23d89892 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -810,12 +810,16 @@ static void ptrace_bts_untrace(struct task_struct *child)
 
 static void ptrace_bts_detach(struct task_struct *child)
 {
-	if (unlikely(child->bts)) {
-		ds_release_bts(child->bts);
-		child->bts = NULL;
-
-		ptrace_bts_free_buffer(child);
-	}
+	/*
+	 * Ptrace_detach() races with ptrace_untrace() in case
+	 * the child dies and is reaped by another thread.
+	 *
+	 * We only do the memory accounting at this point and
+	 * leave the buffer deallocation and the bts tracer
+	 * release to ptrace_bts_untrace() which will be called
+	 * later on with tasklist_lock held.
+	 */
+	release_locked_buffer(child->bts_buffer, child->bts_size);
 }
 #else
 static inline void ptrace_bts_fork(struct task_struct *tsk) {}
-- 
cgit v1.2.2


From d85cf93da66977dbc645352be1b2084a659d8a0b Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 12 Feb 2009 10:02:56 -0800
Subject: x86/paravirt: make arch_flush_lazy_mmu/cpu disable preemption

Impact: avoid access to percpu vars in preempible context

They are intended to be used whenever there's the possibility
that there's some stale state which is going to be overwritten
with a queued update, or to force a state change when we may be
in lazy mode.  Either way, we could end up calling it with
preemption enabled, so wrap the functions in their own little
preempt-disable section so they can be safely called in any
context (though preemption should never be enabled if we're actually
in a lazy state).

(Move out of line to avoid #include dependencies.)

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/paravirt.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e4c8fb608873..dcba6c567a2a 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -268,6 +268,30 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
 	return __get_cpu_var(paravirt_lazy_mode);
 }
 
+void arch_flush_lazy_mmu_mode(void)
+{
+	preempt_disable();
+
+	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
+		arch_leave_lazy_mmu_mode();
+		arch_enter_lazy_mmu_mode();
+	}
+
+	preempt_enable();
+}
+
+void arch_flush_lazy_cpu_mode(void)
+{
+	preempt_disable();
+
+	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
+		arch_leave_lazy_cpu_mode();
+		arch_enter_lazy_cpu_mode();
+	}
+
+	preempt_enable();
+}
+
 struct pv_info pv_info = {
 	.name = "bare hardware",
 	.paravirt_enabled = 0,
-- 
cgit v1.2.2


From 34b0900d323122113683685b200aae9f9b75e63b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 12 Feb 2009 21:30:48 +0100
Subject: x86: warn if arch_flush_lazy_mmu_cpu is called in preemptible context

Impact: Catch cases where lazy MMU state is active in a preemtible context

arch_flush_lazy_mmu_cpu() has been changed to disable preemption so
the checks in enter/leave will never trigger. Put the preemtible()
check into arch_flush_lazy_mmu_cpu() to catch such cases.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/paravirt.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index dcba6c567a2a..c6520a4e85d4 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -273,6 +273,7 @@ void arch_flush_lazy_mmu_mode(void)
 	preempt_disable();
 
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
+		WARN_ON(preempt_count() == 1);
 		arch_leave_lazy_mmu_mode();
 		arch_enter_lazy_mmu_mode();
 	}
@@ -285,6 +286,7 @@ void arch_flush_lazy_cpu_mode(void)
 	preempt_disable();
 
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
+		WARN_ON(preempt_count() == 1);
 		arch_leave_lazy_cpu_mode();
 		arch_enter_lazy_cpu_mode();
 	}
-- 
cgit v1.2.2


From b13e24644c138d0ddbc451403c30a96b09bfd556 Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Thu, 12 Feb 2009 18:48:53 -0800
Subject: x86, hpet: fix for LS21 + HPET = boot hang

Between 2.6.23 and 2.6.24-rc1 a change was made that broke IBM LS21
systems that had the HPET enabled in the BIOS, resulting in boot hangs
for x86_64.

Specifically commit b8ce33590687888ebb900d09557b8807c4539022, which
merges the i386 and x86_64 HPET code.

Prior to this commit, when we setup the HPET timers in x86_64, we did
the following:

	hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
                    HPET_TN_32BIT, HPET_T0_CFG);

However after the i386/x86_64 HPET merge, we do the following:

	cfg = hpet_readl(HPET_Tn_CFG(timer));
	cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
			HPET_TN_SETVAL | HPET_TN_32BIT;
	hpet_writel(cfg, HPET_Tn_CFG(timer));

However on LS21s with HPET enabled in the BIOS, the HPET_T0_CFG register
boots with Level triggered interrupts (HPET_TN_LEVEL) enabled. This
causes the periodic interrupt to be not so periodic, and that results in
the boot time hang I reported earlier in the delay calibration.

My fix: Always disable HPET_TN_LEVEL when setting up periodic mode.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 64d5ad0b8add..5c8da2c2c185 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -269,6 +269,8 @@ static void hpet_set_mode(enum clock_event_mode mode,
 		now = hpet_readl(HPET_COUNTER);
 		cmp = now + (unsigned long) delta;
 		cfg = hpet_readl(HPET_Tn_CFG(timer));
+		/* Make sure we use edge triggered interrupts */
+		cfg &= ~HPET_TN_LEVEL;
 		cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
 		       HPET_TN_SETVAL | HPET_TN_32BIT;
 		hpet_writel(cfg, HPET_Tn_CFG(timer));
-- 
cgit v1.2.2


From e49590b6dd356f8ef10ba3531a29e5086f6f2e3a Mon Sep 17 00:00:00 2001
From: Chris Ball <cjb@laptop.org>
Date: Fri, 13 Feb 2009 20:56:18 -0500
Subject: x86, olpc: fix model detection without OFW

Impact: fix "garbled display, laptop is unusable" bug

Commit e51a1ac2dfca9ad869471e88f828281db7e810c0 ("x86, olpc: fix endian
bug in openfirmware workaround") breaks model comparison on OLPC; the value
0xc2 needs to be scaled up by olpc_board().

The pre-patch version was wrong, but accidentally worked anyway
(big-endian 0xc2 is big enough to satisfy all other board revisions,
but little endian 0xc2 is not).

Signed-off-by: Chris Ball <cjb@laptop.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Andres Salomon <dilinger@queued.net>
Cc: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/olpc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 7a13fac63a1f..4006c522adc7 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -203,7 +203,7 @@ static void __init platform_detect(void)
 static void __init platform_detect(void)
 {
 	/* stopgap until OFW support is added to the kernel */
-	olpc_platform_info.boardrev = 0xc2;
+	olpc_platform_info.boardrev = olpc_board(0xc2);
 }
 #endif
 
-- 
cgit v1.2.2


From be716615fe596ee117292dc615e95f707fb67fd1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 13 Jan 2009 23:36:34 +0100
Subject: x86, vm86: fix preemption bug

Commit 3d2a71a596bd9c761c8487a2178e95f8a61da083 ("x86, traps: converge
do_debug handlers") changed the preemption disable logic of do_debug()
so vm86_handle_trap() is called with preemption disabled resulting in:

 BUG: sleeping function called from invalid context at include/linux/kernel.h:155
 in_atomic(): 1, irqs_disabled(): 0, pid: 3005, name: dosemu.bin
 Pid: 3005, comm: dosemu.bin Tainted: G        W  2.6.29-rc1 #51
 Call Trace:
  [<c050d669>] copy_to_user+0x33/0x108
  [<c04181f4>] save_v86_state+0x65/0x149
  [<c0418531>] handle_vm86_trap+0x20/0x8f
  [<c064e345>] do_debug+0x15b/0x1a4
  [<c064df1f>] debug_stack_correct+0x27/0x2c
  [<c040365b>] sysenter_do_call+0x12/0x2f
 BUG: scheduling while atomic: dosemu.bin/3005/0x10000001

Restore the original calling convention and reenable preemption before
calling handle_vm86_trap().

Reported-by: Michal Suchanek <hramrach@centrum.cz>
Cc: stable@kernel.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/traps.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7932338d7cb3..a9e7548e1790 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -99,6 +99,12 @@ static inline void preempt_conditional_sti(struct pt_regs *regs)
 		local_irq_enable();
 }
 
+static inline void conditional_cli(struct pt_regs *regs)
+{
+	if (regs->flags & X86_EFLAGS_IF)
+		local_irq_disable();
+}
+
 static inline void preempt_conditional_cli(struct pt_regs *regs)
 {
 	if (regs->flags & X86_EFLAGS_IF)
@@ -626,8 +632,10 @@ clear_dr7:
 
 #ifdef CONFIG_X86_32
 debug_vm86:
+	/* reenable preemption: handle_vm86_trap() might sleep */
+	dec_preempt_count();
 	handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
-	preempt_conditional_cli(regs);
+	conditional_cli(regs);
 	return;
 #endif
 
-- 
cgit v1.2.2


From a0abd520fd69295f4a3735e29a9448a32e101d47 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 16 Feb 2009 17:31:58 -0600
Subject: cpumask: fix powernow-k8: partial revert of
 2fdf66b491ac706657946442789ec644cc317e1a

Impact: fix powernow-k8 when acpi=off (or other error).

There was a spurious change introduced into powernow-k8 in this patch:
so that we try to "restore" the cpus_allowed we never saved.  We revert
that file.

See lkml "[PATCH] x86/powernow: fix cpus_allowed brokage when
acpi=off" from Yinghai for the bug report.

Cc: Mike Travis <travis@sgi.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index fb039cd345d8..6428aa17b40e 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1157,8 +1157,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 	data->cpu = pol->cpu;
 	data->currpstate = HW_PSTATE_INVALID;
 
-	rc = powernow_k8_cpu_init_acpi(data);
-	if (rc) {
+	if (powernow_k8_cpu_init_acpi(data)) {
 		/*
 		 * Use the PSB BIOS structure. This is only availabe on
 		 * an UP version, and is deprecated by AMD.
@@ -1176,17 +1175,20 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 			       "ACPI maintainers and complain to your BIOS "
 			       "vendor.\n");
 #endif
-			goto err_out;
+			kfree(data);
+			return -ENODEV;
 		}
 		if (pol->cpu != 0) {
 			printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
 			       "CPU other than CPU0. Complain to your BIOS "
 			       "vendor.\n");
-			goto err_out;
+			kfree(data);
+			return -ENODEV;
 		}
 		rc = find_psb_table(data);
 		if (rc) {
-			goto err_out;
+			kfree(data);
+			return -ENODEV;
 		}
 		/* Take a crude guess here.
 		 * That guess was in microseconds, so multiply with 1000 */
-- 
cgit v1.2.2


From bf51935f3e988e0ed6f34b55593e5912f990750a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 17 Feb 2009 06:01:30 -0800
Subject: x86, rcu: fix strange load average and ksoftirqd behavior

Damien Wyart reported high ksoftirqd CPU usage (20%) on an
otherwise idle system.

The function-graph trace Damien provided:

>   799.521187 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.521371 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.521555 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.521738 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.521934 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.522068 |   1)  ksoftir-2324  |               |                rcu_check_callbacks() {
>   799.522208 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.522392 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.522575 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.522759 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.522956 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.523074 |   1)  ksoftir-2324  |               |                  rcu_check_callbacks() {
>   799.523214 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.523397 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.523579 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.523762 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.523960 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.524079 |   1)  ksoftir-2324  |               |                  rcu_check_callbacks() {
>   799.524220 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.524403 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.524587 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
>   799.524770 |   1)    <idle>-0    |               |  rcu_check_callbacks() {
> [ . . . ]

Shows rcu_check_callbacks() being invoked way too often. It should be called
once per jiffy, and here it is called no less than 22 times in about
3.5 milliseconds, meaning one call every 160 microseconds or so.

Why do we need to call rcu_pending() and rcu_check_callbacks() from the
idle loop of 32-bit x86, especially given that no other architecture does
this?

The following patch removes the call to rcu_pending() and
rcu_check_callbacks() from the x86 32-bit idle loop in order to
reduce the softirq load on idle systems.

Reported-by: Damien Wyart <damien.wyart@free.fr>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/process_32.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a546f55c77b4..bd4da2af08ae 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -104,9 +104,6 @@ void cpu_idle(void)
 			check_pgt_cache();
 			rmb();
 
-			if (rcu_pending(cpu))
-				rcu_check_callbacks(cpu, 0);
-
 			if (cpu_is_offline(cpu))
 				play_dead();
 
-- 
cgit v1.2.2


From 6ec68bff3c81e776a455f6aca95c8c5f1d630198 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Thu, 12 Feb 2009 13:39:26 +0100
Subject: x86, mce: reinitialize per cpu features on resume

Impact: Bug fix

This fixes a long standing bug in the machine check code. On resume the
boot CPU wouldn't get its vendor specific state like thermal handling
reinitialized. This means the boot cpu wouldn't ever get any thermal
events reported again.

Call the respective initialization functions on resume

v2: Remove ancient init because they don't have a resume device anyways.
    Pointed out by Thomas Gleixner.
v3: Now fix the Subject too to reflect v2 change

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/mcheck/mce_64.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 1c838032fd37..1f184efb6bc2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -734,6 +734,7 @@ __setup("mce=", mcheck_enable);
 static int mce_resume(struct sys_device *dev)
 {
 	mce_init(NULL);
+	mce_cpu_features(&current_cpu_data);
 	return 0;
 }
 
-- 
cgit v1.2.2


From 380851bc6b1b4107c61dfa2997f9095dcf779336 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Thu, 12 Feb 2009 13:39:33 +0100
Subject: x86, mce: use force_sig_info to kill process in machine check

Impact: bug fix (with tolerant == 3)

do_exit cannot be called directly from the exception handler because
it can sleep and the exception handler runs on the exception stack.
Use force_sig() instead.

Based on a earlier patch by Ying Huang who debugged the problem.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/mcheck/mce_64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 1f184efb6bc2..25cf624eccb7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -295,11 +295,11 @@ void do_machine_check(struct pt_regs * regs, long error_code)
 		 * If we know that the error was in user space, send a
 		 * SIGBUS.  Otherwise, panic if tolerance is low.
 		 *
-		 * do_exit() takes an awful lot of locks and has a slight
+		 * force_sig() takes an awful lot of locks and has a slight
 		 * risk of deadlocking.
 		 */
 		if (user_space) {
-			do_exit(SIGBUS);
+			force_sig(SIGBUS, current);
 		} else if (panic_on_oops || tolerant < 2) {
 			mce_panic("Uncorrected machine check",
 				&panicm, mcestart);
-- 
cgit v1.2.2


From 07db1c140eb233971341396e492cc73d4280e698 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Thu, 12 Feb 2009 13:39:35 +0100
Subject: x86, mce: fix ifdef for 64bit thermal apic vector clear on shutdown

Impact: Bugfix

The ifdef for the apic clear on shutdown for the 64bit intel thermal
vector was incorrect and never triggered. Fix that.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 115449f869ee..570f36e44e59 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -862,7 +862,7 @@ void clear_local_APIC(void)
 	}
 
 	/* lets not touch this if we didn't frob it */
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
+#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
 	if (maxlvt >= 5) {
 		v = apic_read(APIC_LVTTHMR);
 		apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
-- 
cgit v1.2.2


From 48ffc70b675aa7798a52a2e92e20f6cce9140b3d Mon Sep 17 00:00:00 2001
From: Alok N Kataria <akataria@vmware.com>
Date: Wed, 18 Feb 2009 12:33:55 -0800
Subject: x86, vmi: TSC going backwards check in vmi clocksource

Impact: fix time warps under vmware

Similar to the check for TSC going backwards in the TSC clocksource,
we also need this check for VMI clocksource.

Signed-off-by: Alok N Kataria <akataria@vmware.com>
Cc: Zachary Amsden <zach@vmware.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: stable@kernel.org
---
 arch/x86/kernel/vmiclock_32.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index c4c1f9e09402..bde106cae0a9 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -283,10 +283,13 @@ void __devinit vmi_time_ap_init(void)
 #endif
 
 /** vmi clocksource */
+static struct clocksource clocksource_vmi;
 
 static cycle_t read_real_cycles(void)
 {
-	return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
+	cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
+	return ret >= clocksource_vmi.cycle_last ?
+		ret : clocksource_vmi.cycle_last;
 }
 
 static struct clocksource clocksource_vmi = {
-- 
cgit v1.2.2


From cc3ca22063784076bd240fda87217387a8f2ae92 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 20 Feb 2009 23:35:51 -0800
Subject: x86, mce: remove incorrect __cpuinit for mce_cpu_features()

Impact: Bug fix on UP

Checkin 6ec68bff3c81e776a455f6aca95c8c5f1d630198:
    x86, mce: reinitialize per cpu features on resume

introduced a call to mce_cpu_features() in the resume path, in order
for the MCE machinery to get properly reinitialized after a resume.
However, this function (and its successors) was flagged __cpuinit,
which becomes __init on UP configurations (on SMP suspend/resume
requires CPU hotplug and so this would not be seen.)

Remove the offending __cpuinit annotations for mce_cpu_features() and
its successor functions.

Cc: Andi Kleen <ak@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/mcheck/mce_64.c       | 2 +-
 arch/x86/kernel/cpu/mcheck/mce_amd_64.c   | 2 +-
 arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 25cf624eccb7..fe79985ce0f2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -490,7 +490,7 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
 
 }
 
-static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
+static void mce_cpu_features(struct cpuinfo_x86 *c)
 {
 	switch (c->x86_vendor) {
 	case X86_VENDOR_INTEL:
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 8ae8c4ff094d..f2ee0ae29bd6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -121,7 +121,7 @@ static long threshold_restart_bank(void *_tr)
 }
 
 /* cpu init entry point, called from mce.c with preempt off */
-void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
+void mce_amd_feature_init(struct cpuinfo_x86 *c)
 {
 	unsigned int bank, block;
 	unsigned int cpu = smp_processor_id();
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index 4b48f251fd39..f44c36624360 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -30,7 +30,7 @@ asmlinkage void smp_thermal_interrupt(void)
 	irq_exit();
 }
 
-static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c)
+static void intel_init_thermal(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
 	int tm2 = 0;
@@ -84,7 +84,7 @@ static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c)
 	return;
 }
 
-void __cpuinit mce_intel_feature_init(struct cpuinfo_x86 *c)
+void mce_intel_feature_init(struct cpuinfo_x86 *c)
 {
 	intel_init_thermal(c);
 }
-- 
cgit v1.2.2


From e6bd6760c92dc8475c79c4c4a8a16ac313c0b93d Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Sun, 15 Feb 2009 22:45:49 +0100
Subject: x86_64: acpi/wakeup_64 cleanup

- remove %ds re-set, it's already set in wakeup_long64
- remove double labels and alignment (ENTRY already adds both)
- use meaningful resume point labelname
- skip alignment while jumping from wakeup_long64 to the resume point
- remove .size, .type and unused labels
[v2]
- added ENDPROCs

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/acpi/wakeup_64.S | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index bcc293423a70..b5dee6a0de3a 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -13,7 +13,6 @@
 	 * Hooray, we are in Long 64-bit mode (but still running in low memory)
 	 */
 ENTRY(wakeup_long64)
-wakeup_long64:
 	movq	saved_magic, %rax
 	movq	$0x123456789abcdef0, %rdx
 	cmpq	%rdx, %rax
@@ -34,16 +33,12 @@ wakeup_long64:
 
 	movq	saved_rip, %rax
 	jmp	*%rax
+ENDPROC(wakeup_long64)
 
 bogus_64_magic:
 	jmp	bogus_64_magic
 
-	.align 2
-	.p2align 4,,15
-.globl do_suspend_lowlevel
-	.type	do_suspend_lowlevel,@function
-do_suspend_lowlevel:
-.LFB5:
+ENTRY(do_suspend_lowlevel)
 	subq	$8, %rsp
 	xorl	%eax, %eax
 	call	save_processor_state
@@ -67,7 +62,7 @@ do_suspend_lowlevel:
 	pushfq
 	popq	pt_regs_flags(%rax)
 
-	movq	$.L97, saved_rip(%rip)
+	movq	$resume_point, saved_rip(%rip)
 
 	movq	%rsp, saved_rsp
 	movq	%rbp, saved_rbp
@@ -79,13 +74,9 @@ do_suspend_lowlevel:
 	movl	$3, %edi
 	xorl	%eax, %eax
 	jmp	acpi_enter_sleep_state
-.L97:
-	.p2align 4,,7
-.L99:
-	.align 4
-	movl	$24, %eax
-	movw	%ax, %ds
 
+	.align 4
+resume_point:
 	/* We don't restore %rax, it must be 0 anyway */
 	movq	$saved_context, %rax
 	movq	saved_context_cr4(%rax), %rbx
@@ -117,12 +108,9 @@ do_suspend_lowlevel:
 	xorl	%eax, %eax
 	addq	$8, %rsp
 	jmp	restore_processor_state
-.LFE5:
-.Lfe5:
-	.size	do_suspend_lowlevel, .Lfe5-do_suspend_lowlevel
-	
+ENDPROC(do_suspend_lowlevel)
+
 .data
-ALIGN
 ENTRY(saved_rbp)	.quad	0
 ENTRY(saved_rsi)	.quad	0
 ENTRY(saved_rdi)	.quad	0
-- 
cgit v1.2.2


From 6defa2fe2019f3729933516fba5cfd75eecd07de Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Sun, 15 Feb 2009 22:46:45 +0100
Subject: x86_64: Fix S3 fail path

As acpi_enter_sleep_state can fail, take this into account in
do_suspend_lowlevel and don't return to the do_suspend_lowlevel's
caller. This would break (currently) fpu status and preempt count.

Technically, this means use `call' instead of `jmp' and `jmp' to
the `resume_point' after the `call' (i.e. if
acpi_enter_sleep_state returns=fails). `resume_point' will handle
the restore of fpu and preempt count gracefully.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/acpi/wakeup_64.S | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index b5dee6a0de3a..96258d9dc974 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -73,7 +73,9 @@ ENTRY(do_suspend_lowlevel)
 	addq	$8, %rsp
 	movl	$3, %edi
 	xorl	%eax, %eax
-	jmp	acpi_enter_sleep_state
+	call	acpi_enter_sleep_state
+	/* in case something went wrong, restore the machine status and go on */
+	jmp	resume_point
 
 	.align 4
 resume_point:
-- 
cgit v1.2.2


From 936577c61d0c10b8929608a92c98d839b22053bc Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 22 Feb 2009 10:27:49 -0800
Subject: x86: Add IRQF_TIMER to legacy x86 timer interrupt descriptors

Right now nobody cares, but the suspend/resume code will eventually want
to suspend device interrupts without suspending the timer, and will
depend on this flag to know.

The modern x86 timer infrastructure uses the local APIC timers and never
shows up as a device interrupt at all, so it isn't affected and doesn't
need any of this.

Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/time_64.c     | 2 +-
 arch/x86/kernel/vmiclock_32.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index e6e695acd725..241ec3923f61 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -115,7 +115,7 @@ unsigned long __init calibrate_cpu(void)
 
 static struct irqaction irq0 = {
 	.handler	= timer_interrupt,
-	.flags		= IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING,
+	.flags		= IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING | IRQF_TIMER,
 	.mask		= CPU_MASK_NONE,
 	.name		= "timer"
 };
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index bde106cae0a9..e5b088fffa40 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -202,7 +202,7 @@ static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
 static struct irqaction vmi_clock_action  = {
 	.name 		= "vmi-timer",
 	.handler 	= vmi_timer_interrupt,
-	.flags 		= IRQF_DISABLED | IRQF_NOBALANCING,
+	.flags 		= IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER,
 	.mask 		= CPU_MASK_ALL,
 };
 
-- 
cgit v1.2.2


From 770824bdc421ff58a64db608294323571c949f4c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 22 Feb 2009 18:38:50 +0100
Subject: PM: Split up sysdev_[suspend|resume] from device_power_[down|up]

Move the sysdev_suspend/resume from the callee to the callers, with
no real change in semantics, so that we can rework the disabling of
interrupts during suspend/hibernation.

This is based on an earlier patch from Linus.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/apm_32.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 98807bb095ad..266ec6c18b6c 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1192,6 +1192,7 @@ static int suspend(int vetoable)
 	device_suspend(PMSG_SUSPEND);
 	local_irq_disable();
 	device_power_down(PMSG_SUSPEND);
+	sysdev_suspend(PMSG_SUSPEND);
 
 	local_irq_enable();
 
@@ -1208,6 +1209,7 @@ static int suspend(int vetoable)
 	if (err != APM_SUCCESS)
 		apm_error("suspend", err);
 	err = (err == APM_SUCCESS) ? 0 : -EIO;
+	sysdev_resume();
 	device_power_up(PMSG_RESUME);
 	local_irq_enable();
 	device_resume(PMSG_RESUME);
@@ -1228,6 +1230,7 @@ static void standby(void)
 
 	local_irq_disable();
 	device_power_down(PMSG_SUSPEND);
+	sysdev_suspend(PMSG_SUSPEND);
 	local_irq_enable();
 
 	err = set_system_power_state(APM_STATE_STANDBY);
@@ -1235,6 +1238,7 @@ static void standby(void)
 		apm_error("standby", err);
 
 	local_irq_disable();
+	sysdev_resume();
 	device_power_up(PMSG_RESUME);
 	local_irq_enable();
 }
-- 
cgit v1.2.2


From ccbe495caa5e604b04d5a31d7459a6f6a76a756c Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 27 Feb 2009 19:03:24 -0800
Subject: x86-64: syscall-audit: fix 32/64 syscall hole

On x86-64, a 32-bit process (TIF_IA32) can switch to 64-bit mode with
ljmp, and then use the "syscall" instruction to make a 64-bit system
call.  A 64-bit process make a 32-bit system call with int $0x80.

In both these cases, audit_syscall_entry() will use the wrong system
call number table and the wrong system call argument registers.  This
could be used to circumvent a syscall audit configuration that filters
based on the syscall numbers or argument details.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/ptrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 5a4c23d89892..06ca07f6ad86 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1388,7 +1388,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
 #ifdef CONFIG_X86_32
 # define IS_IA32	1
 #elif defined CONFIG_IA32_EMULATION
-# define IS_IA32	test_thread_flag(TIF_IA32)
+# define IS_IA32	is_compat_task()
 #else
 # define IS_IA32	0
 #endif
-- 
cgit v1.2.2


From ff0c0874905fb312ca1491bbdac2653b0b48c20b Mon Sep 17 00:00:00 2001
From: Brian Maly <bmaly@redhat.com>
Date: Tue, 3 Mar 2009 21:55:31 -0500
Subject: x86: fix DMI on EFI

Impact: reactivate DMI quirks on EFI hardware

DMI tables are loaded by EFI, so the dmi calls must happen after
efi_init() and not before.

Currently Apple hardware uses DMI to determine the framebuffer mappings
for efifb. Without DMI working you also have no video on MacBook Pro.

This patch resolves the DMI issue for EFI hardware (DMI is now properly
detected at boot), and additionally efifb now loads on Apple hardware
(i.e. video works).

Signed-off-by: Brian Maly <bmaly@redhat>
Acked-by: Yinghai Lu <yinghai@kernel.org>
Cc: ying.huang@intel.com
LKML-Reference: <49ADEDA3.1030406@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

 arch/x86/kernel/setup.c |    5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
---
 arch/x86/kernel/setup.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c461f6d69074..6a8811a69324 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -770,6 +770,9 @@ void __init setup_arch(char **cmdline_p)
 
 	finish_e820_parsing();
 
+	if (efi_enabled)
+		efi_init();
+
 	dmi_scan_machine();
 
 	dmi_check_system(bad_bios_dmi_table);
@@ -789,8 +792,6 @@ void __init setup_arch(char **cmdline_p)
 	insert_resource(&iomem_resource, &data_resource);
 	insert_resource(&iomem_resource, &bss_resource);
 
-	if (efi_enabled)
-		efi_init();
 
 #ifdef CONFIG_X86_32
 	if (ppro_with_ram_bug()) {
-- 
cgit v1.2.2


From dd39ecf522ba86c70809715af46e6557f6491131 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Wed, 4 Mar 2009 10:58:33 +0800
Subject: x86: EFI: Back efi_ioremap with init_memory_mapping instead of
 FIX_MAP

Impact: Fix boot failure on EFI system with large runtime memory range

Brian Maly reported that some EFI system with large runtime memory
range can not boot. Because the FIX_MAP used to map runtime memory
range is smaller than run time memory range.

This patch fixes this issue by re-implement efi_ioremap() with
init_memory_mapping().

Reported-and-tested-by: Brian Maly <bmaly@redhat.com>
Signed-off-by: Huang Ying <ying.huang@intel.com>
Cc: Brian Maly <bmaly@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <1236135513.6204.306.camel@yhuang-dev.sh.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/efi.c    |  7 +++++--
 arch/x86/kernel/efi_64.c | 21 ++++-----------------
 2 files changed, 9 insertions(+), 19 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 1119d247fe11..eb1ef3b67dd5 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -467,7 +467,7 @@ void __init efi_enter_virtual_mode(void)
 	efi_memory_desc_t *md;
 	efi_status_t status;
 	unsigned long size;
-	u64 end, systab, addr, npages;
+	u64 end, systab, addr, npages, end_pfn;
 	void *p, *va;
 
 	efi.systab = NULL;
@@ -479,7 +479,10 @@ void __init efi_enter_virtual_mode(void)
 		size = md->num_pages << EFI_PAGE_SHIFT;
 		end = md->phys_addr + size;
 
-		if (PFN_UP(end) <= max_low_pfn_mapped)
+		end_pfn = PFN_UP(end);
+		if (end_pfn <= max_low_pfn_mapped
+		    || (end_pfn > (1UL << (32 - PAGE_SHIFT))
+			&& end_pfn <= max_pfn_mapped))
 			va = __va(md->phys_addr);
 		else
 			va = efi_ioremap(md->phys_addr, size);
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 652c5287215f..cb783b92c50c 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -99,24 +99,11 @@ void __init efi_call_phys_epilog(void)
 
 void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size)
 {
-	static unsigned pages_mapped __initdata;
-	unsigned i, pages;
-	unsigned long offset;
+	unsigned long last_map_pfn;
 
-	pages = PFN_UP(phys_addr + size) - PFN_DOWN(phys_addr);
-	offset = phys_addr & ~PAGE_MASK;
-	phys_addr &= PAGE_MASK;
-
-	if (pages_mapped + pages > MAX_EFI_IO_PAGES)
+	last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
+	if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size)
 		return NULL;
 
-	for (i = 0; i < pages; i++) {
-		__set_fixmap(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped,
-			     phys_addr, PAGE_KERNEL);
-		phys_addr += PAGE_SIZE;
-		pages_mapped++;
-	}
-
-	return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \
-					     (pages_mapped - pages)) + offset;
+	return (void __iomem *)__va(phys_addr);
 }
-- 
cgit v1.2.2


From ab9e18587f4cdb5f3fb3854c732f27a36f98e8f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Gl=C3=B6ckner?= <dg@emlix.com>
Date: Wed, 4 Mar 2009 19:42:27 +0100
Subject: x86, math-emu: fix init_fpu for task != current
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Impact: fix math-emu related crash while using GDB/ptrace

init_fpu() calls finit to initialize a task's xstate, while finit always
works on the current task. If we use PTRACE_GETFPREGS on another
process and both processes did not already use floating point, we get
a null pointer exception in finit.

This patch creates a new function finit_task that takes a task_struct
parameter. finit becomes a wrapper that simply calls finit_task with
current. On the plus side this avoids many calls to get_current which
would each resolve to an inline assembler mov instruction.

An empty finit_task has been added to i387.h to avoid linker errors in
case the compiler still emits the call in init_fpu when
CONFIG_MATH_EMULATION is not defined.

The declaration of finit in i387.h has been removed as the remaining
code using this function gets its prototype from fpu_proto.h.

Signed-off-by: Daniel Glöckner <dg@emlix.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Pallipadi Venkatesh" <venkatesh.pallipadi@intel.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Bill Metzenthen <billm@melbpc.org.au>
LKML-Reference: <E1Lew31-0004il-Fg@mailer.emlix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/i387.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index b0f61f0dcd0a..f2f8540a7f3d 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -136,7 +136,7 @@ int init_fpu(struct task_struct *tsk)
 #ifdef CONFIG_X86_32
 	if (!HAVE_HWFP) {
 		memset(tsk->thread.xstate, 0, xstate_size);
-		finit();
+		finit_task(tsk);
 		set_stopped_child_used_math(tsk);
 		return 0;
 	}
-- 
cgit v1.2.2


From dd4124a8a06bca89c077a16437edac010f0bb993 Mon Sep 17 00:00:00 2001
From: Leann Ogasawara <leann.ogasawara@canonical.com>
Date: Wed, 4 Mar 2009 11:53:00 -0800
Subject: x86: add Dell XPS710 reboot quirk

Dell XPS710 will hang on reboot.  This is resolved by adding a quirk to
set bios reboot.

Signed-off-by: Leann Ogasawara <leann.ogasawara@canonical.com>
Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Cc: "manoj.iyer" <manoj.iyer@canonical.com>
Cc: <stable@kernel.org>
LKML-Reference: <1236196380.3231.89.camel@emiko>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/reboot.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2b46eb41643b..4526b3a75ed2 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -217,6 +217,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"),
 		},
 	},
+	{	/* Handle problems with rebooting on Dell XPS710 */
+		.callback = set_bios_reboot,
+		.ident = "Dell XPS710",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"),
+		},
+	},
 	{ }
 };
 
-- 
cgit v1.2.2


From 9ca0791dcaa666e8e8f4b4ca028b65b4bde9cb28 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Thu, 5 Mar 2009 08:49:54 +0100
Subject: x86, bts: remove bad warning

In case a ptraced task is reaped (while the tracer is still attached),
ds_exit_thread() is called before ptrace_exit(). The latter will
release the bts_tracer and remove the thread's ds_ctx.
The former will WARN() if the context is not NULL.

Oleg Nesterov submitted patches that move ptrace_exit() before
exit_thread() and thus reverse the order of the above calls.

Remove the bad warning. I will add it again when Oleg's changes are in.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
LKML-Reference: <20090305084954.A22000@sedona.ch.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ds.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 169a120587be..de7cdbda1c36 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -1029,5 +1029,4 @@ void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
 
 void ds_exit_thread(struct task_struct *tsk)
 {
-	WARN_ON(tsk->thread.ds_ctx);
 }
-- 
cgit v1.2.2


From 73bf1b62f561fc8ecb00e2810efe4fe769f4933e Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Thu, 5 Mar 2009 08:57:21 +0100
Subject: x86, pebs: correct qualifier passed to ds_write_config() from
 ds_request_pebs()

ds_write_config() can write the BTS as well as the PEBS part of
the DS config. ds_request_pebs() passes the wrong qualifier, which
results in the wrong configuration to be written.

Reported-by: Stephane Eranian <eranian@googlemail.com>
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
LKML-Reference: <20090305085721.A22550@sedona.ch.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ds.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index de7cdbda1c36..87b67e3a765a 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -729,7 +729,7 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task,
 
 	spin_unlock_irqrestore(&ds_lock, irq);
 
-	ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
+	ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
 	ds_resume_pebs(tracer);
 
 	return tracer;
-- 
cgit v1.2.2


From 129f8ae9b1b5be94517da76009ea956e89104ce8 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Mon, 9 Mar 2009 15:07:33 -0400
Subject: Revert "[CPUFREQ] Disable sysfs ui for p4-clockmod."

This reverts commit e088e4c9cdb618675874becb91b2fd581ee707e6.

Removing the sysfs interface for p4-clockmod was flagged as a
regression in bug 12826.

Course of action:
 - Find out the remaining causes of overheating, and fix them
   if possible. ACPI should be doing the right thing automatically.
   If it isn't, we need to fix that.
 - mark p4-clockmod ui as deprecated
 - try again with the removal in six months.

It's not really feasible to printk about the deprecation, because
it needs to happen at all the sysfs entry points, which means adding
a lot of strcmp("p4-clockmod".. calls to the core, which.. bleuch.

Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index b585e04cbc9e..3178c3acd97e 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -277,7 +277,6 @@ static struct cpufreq_driver p4clockmod_driver = {
 	.name		= "p4-clockmod",
 	.owner		= THIS_MODULE,
 	.attr		= p4clockmod_attr,
-	.hide_interface	= 1,
 };
 
 
-- 
cgit v1.2.2


From a6a80e1d8cf82b46a69f88e659da02749231eb36 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 17 Mar 2009 07:58:26 -0700
Subject: Fix potential fast PIT TSC calibration startup glitch

During bootup, when we reprogram the PIT (programmable interval timer)
to start counting down from 0xffff in order to use it for the fast TSC
calibration, we should also make sure to delay a bit afterwards to allow
the PIT hardware to actually start counting with the new value.

That will happens at the next CLK pulse (1.193182 MHz), so the easiest
way to do that is to just wait at least one microsecond after
programming the new PIT counter value.  We do that by just reading the
counter value back once - which will take about 2us on PC hardware.

Reported-and-tested-by: john stultz <johnstul@us.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/tsc.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 599e58168631..9e80207c96a2 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -315,6 +315,15 @@ static unsigned long quick_pit_calibrate(void)
 	outb(0xff, 0x42);
 	outb(0xff, 0x42);
 
+	/*
+	 * The PIT starts counting at the next edge, so we
+	 * need to delay for a microsecond. The easiest way
+	 * to do that is to just read back the 16-bit counter
+	 * once from the PIT.
+	 */
+	inb(0x42);
+	inb(0x42);
+
 	if (pit_expect_msb(0xff)) {
 		int i;
 		u64 t1, t2, delta;
-- 
cgit v1.2.2


From 9e8912e04e612b43897b4b722205408b92f423e5 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 17 Mar 2009 08:13:17 -0700
Subject: Fast TSC calibration: calculate proper frequency error bounds

In order for ntpd to correctly synchronize the clocks, the frequency of
the system clock must not be off by more than 500 ppm (or, put another
way, 1:2000), or ntpd will end up giving up on trying to synchronize
properly, and ends up reseting the clock in jumps instead.

The fast TSC PIT calibration sometimes failed this test - it was
assuming that the PIT reads always took about one microsecond each (2us
for the two reads to get a 16-bit timer), and that calibrating TSC to
the PIT over 15ms should thus be sufficient to get much closer than
500ppm (max 2us error on both sides giving 4us over 15ms: a 270 ppm
error value).

However, that assumption does not always hold: apparently some hardware
is either very much slower at reading the PIT registers, or there was
other noise causing at least one machine to get 700+ ppm errors.

So instead of using a fixed 15ms timing loop, this changes the fast PIT
calibration to read the TSC delta over the individual PIT timer reads,
and use the result to calculate the error bars on the PIT read timing
properly.  We then successfully calibrate the TSC only if the maximum
error bars fall below 500ppm.

In the process, we also relax the timing to allow up to 25ms for the
calibration, although it can happen much faster depending on hardware.

Reported-and-tested-by: Jesper Krogh <jesper@krogh.cc>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/tsc.c | 101 ++++++++++++++++++++++++++++----------------------
 1 file changed, 56 insertions(+), 45 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 9e80207c96a2..d5cebb52d45b 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -273,30 +273,43 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
  * use the TSC value at the transitions to calculate a pretty
  * good value for the TSC frequencty.
  */
-static inline int pit_expect_msb(unsigned char val)
+static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
 {
-	int count = 0;
+	int count;
+	u64 tsc = 0;
 
 	for (count = 0; count < 50000; count++) {
 		/* Ignore LSB */
 		inb(0x42);
 		if (inb(0x42) != val)
 			break;
+		tsc = get_cycles();
 	}
-	return count > 50;
+	*deltap = get_cycles() - tsc;
+	*tscp = tsc;
+
+	/*
+	 * We require _some_ success, but the quality control
+	 * will be based on the error terms on the TSC values.
+	 */
+	return count > 5;
 }
 
 /*
- * How many MSB values do we want to see? We aim for a
- * 15ms calibration, which assuming a 2us counter read
- * error should give us roughly 150 ppm precision for
- * the calibration.
+ * How many MSB values do we want to see? We aim for
+ * a maximum error rate of 500ppm (in practice the
+ * real error is much smaller), but refuse to spend
+ * more than 25ms on it.
  */
-#define QUICK_PIT_MS 15
-#define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
+#define MAX_QUICK_PIT_MS 25
+#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
 
 static unsigned long quick_pit_calibrate(void)
 {
+	int i;
+	u64 tsc, delta;
+	unsigned long d1, d2;
+
 	/* Set the Gate high, disable speaker */
 	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
 
@@ -324,45 +337,43 @@ static unsigned long quick_pit_calibrate(void)
 	inb(0x42);
 	inb(0x42);
 
-	if (pit_expect_msb(0xff)) {
-		int i;
-		u64 t1, t2, delta;
-		unsigned char expect = 0xfe;
-
-		t1 = get_cycles();
-		for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) {
-			if (!pit_expect_msb(expect))
-				goto failed;
+	if (pit_expect_msb(0xff, &tsc, &d1)) {
+		for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
+			if (!pit_expect_msb(0xff-i, &delta, &d2))
+				break;
+
+			/*
+			 * Iterate until the error is less than 500 ppm
+			 */
+			delta -= tsc;
+			if (d1+d2 < delta >> 11)
+				goto success;
 		}
-		t2 = get_cycles();
-
-		/*
-		 * Make sure we can rely on the second TSC timestamp:
-		 */
-		if (!pit_expect_msb(expect))
-			goto failed;
-
-		/*
-		 * Ok, if we get here, then we've seen the
-		 * MSB of the PIT decrement QUICK_PIT_ITERATIONS
-		 * times, and each MSB had many hits, so we never
-		 * had any sudden jumps.
-		 *
-		 * As a result, we can depend on there not being
-		 * any odd delays anywhere, and the TSC reads are
-		 * reliable.
-		 *
-		 * kHz = ticks / time-in-seconds / 1000;
-		 * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000
-		 * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000)
-		 */
-		delta = (t2 - t1)*PIT_TICK_RATE;
-		do_div(delta, QUICK_PIT_ITERATIONS*256*1000);
-		printk("Fast TSC calibration using PIT\n");
-		return delta;
 	}
-failed:
+	printk("Fast TSC calibration failed\n");
 	return 0;
+
+success:
+	/*
+	 * Ok, if we get here, then we've seen the
+	 * MSB of the PIT decrement 'i' times, and the
+	 * error has shrunk to less than 500 ppm.
+	 *
+	 * As a result, we can depend on there not being
+	 * any odd delays anywhere, and the TSC reads are
+	 * reliable (within the error). We also adjust the
+	 * delta to the middle of the error bars, just
+	 * because it looks nicer.
+	 *
+	 * kHz = ticks / time-in-seconds / 1000;
+	 * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000
+	 * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000)
+	 */
+	delta += (long)(d2 - d1)/2;
+	delta *= PIT_TICK_RATE;
+	do_div(delta, i*256*1000);
+	printk("Fast TSC calibration using PIT\n");
+	return delta;
 }
 
 /**
-- 
cgit v1.2.2


From 30390880debce4a68fd23e87a787f27609e4bf4a Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Mon, 16 Mar 2009 18:57:22 -0400
Subject: prevent boosting kprobes on exception address

Don't boost at the addresses which are listed on exception tables,
because major page fault will occur on those addresses.  In that case,
kprobes can not ensure that when instruction buffer can be freed since
some processes will sleep on the buffer.

kprobes-ia64 already has same check.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/kprobes.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index e948b28a5a9a..4558dd3918cf 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -193,6 +193,9 @@ static int __kprobes can_boost(kprobe_opcode_t *opcodes)
 	kprobe_opcode_t opcode;
 	kprobe_opcode_t *orig_opcodes = opcodes;
 
+	if (search_exception_tables(opcodes))
+		return 0;	/* Page fault may occur on this address. */
+
 retry:
 	if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
 		return 0;
-- 
cgit v1.2.2