From 8f820e976057261e249367514e9920cf20048c76 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:00 +0100
Subject: [PATCH] x86-64: Update defconfig

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/defconfig | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 0f5d44e86be5..96f226cfb339 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.19-rc2-git4
-# Sat Oct 21 03:38:52 2006
+# Linux kernel version: 2.6.19-git7
+# Wed Dec  6 23:50:47 2006
 #
 CONFIG_X86_64=y
 CONFIG_64BIT=y
@@ -47,13 +47,14 @@ CONFIG_POSIX_MQUEUE=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 # CONFIG_CPUSETS is not set
+CONFIG_SYSFS_DEPRECATED=y
 # CONFIG_RELAY is not set
 CONFIG_INITRAMFS_SOURCE=""
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 # CONFIG_EMBEDDED is not set
 CONFIG_UID16=y
-# CONFIG_SYSCTL_SYSCALL is not set
+CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -87,9 +88,7 @@ CONFIG_STOP_MACHINE=y
 # Block layer
 #
 CONFIG_BLOCK=y
-CONFIG_LBD=y
 # CONFIG_BLK_DEV_IO_TRACE is not set
-# CONFIG_LSF is not set
 
 #
 # IO Schedulers
@@ -111,10 +110,11 @@ CONFIG_X86_PC=y
 # CONFIG_X86_VSMP is not set
 # CONFIG_MK8 is not set
 # CONFIG_MPSC is not set
-CONFIG_GENERIC_CPU=y
-CONFIG_X86_L1_CACHE_BYTES=128
-CONFIG_X86_L1_CACHE_SHIFT=7
-CONFIG_X86_INTERNODE_CACHE_BYTES=128
+CONFIG_MCORE2=y
+# CONFIG_GENERIC_CPU is not set
+CONFIG_X86_L1_CACHE_BYTES=64
+CONFIG_X86_L1_CACHE_SHIFT=6
+CONFIG_X86_INTERNODE_CACHE_BYTES=64
 CONFIG_X86_TSC=y
 CONFIG_X86_GOOD_APIC=y
 # CONFIG_MICROCODE is not set
@@ -322,6 +322,7 @@ CONFIG_INET_TCP_DIAG=y
 # CONFIG_TCP_CONG_ADVANCED is not set
 CONFIG_TCP_CONG_CUBIC=y
 CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
 CONFIG_IPV6=y
 # CONFIG_IPV6_PRIVACY is not set
 # CONFIG_IPV6_ROUTER_PREF is not set
@@ -624,6 +625,7 @@ CONFIG_SATA_INTEL_COMBINED=y
 # CONFIG_PATA_IT821X is not set
 # CONFIG_PATA_JMICRON is not set
 # CONFIG_PATA_TRIFLEX is not set
+# CONFIG_PATA_MARVELL is not set
 # CONFIG_PATA_MPIIX is not set
 # CONFIG_PATA_OLDPIIX is not set
 # CONFIG_PATA_NETCELL is not set
@@ -795,6 +797,7 @@ CONFIG_BNX2=y
 CONFIG_S2IO=m
 # CONFIG_S2IO_NAPI is not set
 # CONFIG_MYRI10GE is not set
+# CONFIG_NETXEN_NIC is not set
 
 #
 # Token Ring devices
@@ -927,10 +930,6 @@ CONFIG_RTC=y
 # CONFIG_DTLK is not set
 # CONFIG_R3964 is not set
 # CONFIG_APPLICOM is not set
-
-#
-# Ftape, the floppy tape device driver
-#
 CONFIG_AGP=y
 CONFIG_AGP_AMD64=y
 CONFIG_AGP_INTEL=y
@@ -1135,6 +1134,7 @@ CONFIG_USB_DEVICEFS=y
 # CONFIG_USB_BANDWIDTH is not set
 # CONFIG_USB_DYNAMIC_MINORS is not set
 # CONFIG_USB_SUSPEND is not set
+# CONFIG_USB_MULTITHREAD_PROBE is not set
 # CONFIG_USB_OTG is not set
 
 #
@@ -1212,6 +1212,7 @@ CONFIG_USB_HIDINPUT=y
 # CONFIG_USB_KAWETH is not set
 # CONFIG_USB_PEGASUS is not set
 # CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET_MII is not set
 # CONFIG_USB_USBNET is not set
 CONFIG_USB_MON=y
 
-- 
cgit v1.2.2


From b615ebdac97c648a2ae7d23c5a0bbb3972adf928 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:00 +0100
Subject: [PATCH] x86: shorten lines in unwinder to be <= 80 characters

Andrew complained about > 80 character lines in the new unwinder.
Fix that.

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/traps.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 0d65b22f229c..d3f43c958ca1 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -235,6 +235,8 @@ static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
 	return n;
 }
 
+#define MSG(txt) ops->warning(data, txt)
+
 /*
  * x86-64 can have upto three kernel stacks: 
  * process stack
@@ -248,11 +250,12 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
         return p > t && p < t + THREAD_SIZE - 3;
 }
 
-void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack,
+void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
+		unsigned long *stack,
 		struct stacktrace_ops *ops, void *data)
 {
 	const unsigned cpu = smp_processor_id();
-	unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
+	unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
 	unsigned used = 0;
 	struct thread_info *tinfo;
 
@@ -268,28 +271,30 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
 			if (unwind_init_frame_info(&info, tsk, regs) == 0)
 				unw_ret = dump_trace_unwind(&info, &oad);
 		} else if (tsk == current)
-			unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
+			unw_ret = unwind_init_running(&info, dump_trace_unwind,
+						      &oad);
 		else {
 			if (unwind_init_blocked(&info, tsk) == 0)
 				unw_ret = dump_trace_unwind(&info, &oad);
 		}
 		if (unw_ret > 0) {
 			if (call_trace == 1 && !arch_unw_user_mode(&info)) {
-				ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
+				ops->warning_symbol(data,
+					     "DWARF2 unwinder stuck at %s\n",
 					     UNW_PC(&info));
 				if ((long)UNW_SP(&info) < 0) {
-					ops->warning(data, "Leftover inexact backtrace:\n");
+					MSG("Leftover inexact backtrace:");
 					stack = (unsigned long *)UNW_SP(&info);
 					if (!stack)
 						return;
 				} else
-					ops->warning(data, "Full inexact backtrace again:\n");
+					MSG("Full inexact backtrace again:\n");
 			} else if (call_trace >= 1)
 				return;
 			else
-				ops->warning(data, "Full inexact backtrace again:\n");
+				MSG("Full inexact backtrace again:\n");
 		} else
-			ops->warning(data, "Inexact backtrace:\n");
+			MSG("Inexact backtrace:\n");
 	}
 	if (!stack) {
 		unsigned long dummy;
-- 
cgit v1.2.2


From dd315df1767cf56bd4fb8d730fdff4a3d7e15d84 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:00 +0100
Subject: [PATCH] x86: Compress stack unwinder output

The unwinder has some extra newlines, which eat up loads of screen
space when it spews. (See https://bugzilla.redhat.com/bugzilla/attachment.cgi?id=137900
for a nasty example).

warning_symbol-> and warning-> already printk a newline, so don't add one
in the strings passed to them.

[AK: redone for new code]

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/traps.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index d3f43c958ca1..eedd4e759c3a 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -280,7 +280,7 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 		if (unw_ret > 0) {
 			if (call_trace == 1 && !arch_unw_user_mode(&info)) {
 				ops->warning_symbol(data,
-					     "DWARF2 unwinder stuck at %s\n",
+					     "DWARF2 unwinder stuck at %s",
 					     UNW_PC(&info));
 				if ((long)UNW_SP(&info) < 0) {
 					MSG("Leftover inexact backtrace:");
@@ -288,13 +288,13 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 					if (!stack)
 						return;
 				} else
-					MSG("Full inexact backtrace again:\n");
+					MSG("Full inexact backtrace again:");
 			} else if (call_trace >= 1)
 				return;
 			else
-				MSG("Full inexact backtrace again:\n");
+				MSG("Full inexact backtrace again:");
 		} else
-			MSG("Inexact backtrace:\n");
+			MSG("Inexact backtrace:");
 	}
 	if (!stack) {
 		unsigned long dummy;
-- 
cgit v1.2.2


From 36b2a8d5aff4cb3ee83d5e40447a8f073bcfe2fb Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@hpl.hp.com>
Date: Thu, 7 Dec 2006 02:14:01 +0100
Subject: [PATCH] x86-64: add X86_FEATURE_PEBS and detection

Here is a patch (used by perfmon2) to detect the presence of the
Precise Event Based Sampling (PEBS) feature for Intel 64-bit processors.
The patch also adds the cpu_has_pebs macro.

changelog:
	- adds X86_FEATURE_PEBS
	- adds cpu_has_pebs to test for X86_FEATURE_PEBS

Signed-off-by: stephane eranian <eranian@hpl.hp.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/setup.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index fc944b5e8f4a..619af2e2fa26 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -835,6 +835,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 			set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
 	}
 
+	if (cpu_has_ds) {
+		unsigned int l1, l2;
+		rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
+		if (!(l1 & (1<<12)))
+			set_bit(X86_FEATURE_PEBS, c->x86_capability);
+	}
+
 	n = c->extended_cpuid_level;
 	if (n >= 0x80000008) {
 		unsigned eax = cpuid_eax(0x80000008);
-- 
cgit v1.2.2


From e2764a1e306c986053a52b33748c33463cf888de Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 7 Dec 2006 02:14:01 +0100
Subject: [PATCH] x86-64: use BUILD_BUG_ON in FPU code

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
 arch/x86_64/kernel/i387.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/i387.c b/arch/x86_64/kernel/i387.c
index 3aa1e9bb781d..1d58c13bc6bc 100644
--- a/arch/x86_64/kernel/i387.c
+++ b/arch/x86_64/kernel/i387.c
@@ -82,11 +82,8 @@ int save_i387(struct _fpstate __user *buf)
 	struct task_struct *tsk = current;
 	int err = 0;
 
-	{ 
-		extern void bad_user_i387_struct(void); 
-		if (sizeof(struct user_i387_struct) != sizeof(tsk->thread.i387.fxsave))
-			bad_user_i387_struct();
-	} 
+	BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
+			sizeof(tsk->thread.i387.fxsave));
 
 	if ((unsigned long)buf % 16) 
 		printk("save_i387: bad fpstate %p\n",buf); 
-- 
cgit v1.2.2


From bb81a09e55eaf7e5f798468ab971469b6f66a259 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 7 Dec 2006 02:14:01 +0100
Subject: [PATCH] x86: all cpu backtrace

When a spinlock lockup occurs, arrange for the NMI code to emit an all-cpu
backtrace, so we get to see which CPU is holding the lock, and where.

Cc: Andi Kleen <ak@muc.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/nmi.c | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 7af9cb3e2d99..27e95e7922c1 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -12,14 +12,15 @@
  *  Mikael Pettersson	: PM converted to driver model. Disable/enable API.
  */
 
+#include <linux/nmi.h>
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/sysdev.h>
-#include <linux/nmi.h>
 #include <linux/sysctl.h>
 #include <linux/kprobes.h>
+#include <linux/cpumask.h>
 
 #include <asm/smp.h>
 #include <asm/nmi.h>
@@ -41,6 +42,8 @@ int panic_on_unrecovered_nmi;
 static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner);
 static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]);
 
+static cpumask_t backtrace_mask = CPU_MASK_NONE;
+
 /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
  * offset from MSR_P4_BSU_ESCR0.  It will be the max for all platforms (for now)
  */
@@ -782,6 +785,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
 {
 	int sum;
 	int touched = 0;
+	int cpu = smp_processor_id();
 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 	u64 dummy;
 	int rc=0;
@@ -799,6 +803,16 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
 		touched = 1;
 	}
 
+	if (cpu_isset(cpu, backtrace_mask)) {
+		static DEFINE_SPINLOCK(lock);	/* Serialise the printks */
+
+		spin_lock(&lock);
+		printk("NMI backtrace for cpu %d\n", cpu);
+		dump_stack();
+		spin_unlock(&lock);
+		cpu_clear(cpu, backtrace_mask);
+	}
+
 #ifdef CONFIG_X86_MCE
 	/* Could check oops_in_progress here too, but it's safer
 	   not too */
@@ -931,6 +945,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
 
 #endif
 
+void __trigger_all_cpu_backtrace(void)
+{
+	int i;
+
+	backtrace_mask = cpu_online_map;
+	/* Wait for up to 10 seconds for all CPUs to do the backtrace */
+	for (i = 0; i < 10 * 1000; i++) {
+		if (cpus_empty(backtrace_mask))
+			break;
+		mdelay(1);
+	}
+}
+
 EXPORT_SYMBOL(nmi_active);
 EXPORT_SYMBOL(nmi_watchdog);
 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
-- 
cgit v1.2.2


From 399287229c775a8962a852a761d65dc9475dec7c Mon Sep 17 00:00:00 2001
From: Aaron Durbin <adurbin@google.com>
Date: Thu, 7 Dec 2006 02:14:01 +0100
Subject: [PATCH] x86-64: Insert Local and IO APIC(s) into resource map

Insert the Local APIC and IO APIC(s) into the resource tree.  It allows the
APIC resources to be visible within /proc/iomem.  The patch also takes into
account IO APIC(s) mapped in the PCI space by deferring the insertion until
after PCI has allocated its necessary resources.

Signed-off-by: Aaron Durbin <adurbin@google.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@muc.de>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
 arch/x86_64/kernel/apic.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 4d9d5ed942b2..5c468971e645 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -25,6 +25,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
 #include <linux/module.h>
+#include <linux/ioport.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
@@ -45,6 +46,12 @@ int apic_calibrate_pmtmr __initdata;
 
 int disable_apic_timer __initdata;
 
+static struct resource *ioapic_resources;
+static struct resource lapic_resource = {
+	.name = "Local APIC",
+	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
+};
+
 /*
  * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
  * IPIs in place of local APIC timers
@@ -585,6 +592,64 @@ static int __init detect_init_APIC (void)
 	return 0;
 }
 
+#ifdef CONFIG_X86_IO_APIC
+static struct resource * __init ioapic_setup_resources(void)
+{
+#define IOAPIC_RESOURCE_NAME_SIZE 11
+	unsigned long n;
+	struct resource *res;
+	char *mem;
+	int i;
+
+	if (nr_ioapics <= 0)
+		return NULL;
+
+	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
+	n *= nr_ioapics;
+
+	mem = alloc_bootmem(n);
+	res = (void *)mem;
+
+	if (mem != NULL) {
+		memset(mem, 0, n);
+		mem += sizeof(struct resource) * nr_ioapics;
+
+		for (i = 0; i < nr_ioapics; i++) {
+			res[i].name = mem;
+			res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+			sprintf(mem,  "IOAPIC %u", i);
+			mem += IOAPIC_RESOURCE_NAME_SIZE;
+		}
+	}
+
+	ioapic_resources = res;
+
+	return res;
+}
+
+static int __init ioapic_insert_resources(void)
+{
+	int i;
+	struct resource *r = ioapic_resources;
+
+	if (!r) {
+		printk("IO APIC resources could be not be allocated.\n");
+		return -1;
+	}
+
+	for (i = 0; i < nr_ioapics; i++) {
+		insert_resource(&iomem_resource, r);
+		r++;
+	}
+
+	return 0;
+}
+
+/* Insert the IO APIC resources after PCI initialization has occured to handle
+ * IO APICS that are mapped in on a BAR in PCI space. */
+late_initcall(ioapic_insert_resources);
+#endif
+
 void __init init_apic_mappings(void)
 {
 	unsigned long apic_phys;
@@ -604,6 +669,11 @@ void __init init_apic_mappings(void)
 	apic_mapped = 1;
 	apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys);
 
+	/* Put local APIC into the resource map. */
+	lapic_resource.start = apic_phys;
+	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
+	insert_resource(&iomem_resource, &lapic_resource);
+
 	/*
 	 * Fetch the APIC ID of the BSP in case we have a
 	 * default configuration (or the MP table is broken).
@@ -613,7 +683,9 @@ void __init init_apic_mappings(void)
 	{
 		unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
 		int i;
+		struct resource *ioapic_res;
 
+		ioapic_res = ioapic_setup_resources();
 		for (i = 0; i < nr_ioapics; i++) {
 			if (smp_found_config) {
 				ioapic_phys = mp_ioapics[i].mpc_apicaddr;
@@ -625,6 +697,12 @@ void __init init_apic_mappings(void)
 			apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n",
 					__fix_to_virt(idx), ioapic_phys);
 			idx++;
+
+			if (ioapic_res != NULL) {
+				ioapic_res->start = ioapic_phys;
+				ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+				ioapic_res++;
+			}
 		}
 	}
 }
-- 
cgit v1.2.2


From da68933e0a999fb13636653c710cca701b457ad2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 7 Dec 2006 02:14:02 +0100
Subject: [PATCH] x86-64: dump_trace() atomicity fix

Fix

BUG: using smp_processor_id() in preemptible [00000001] code:

in backtracer on preemptible debug kernels.

Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/traps.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index eedd4e759c3a..e37b4d77d5a8 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -254,7 +254,7 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 		unsigned long *stack,
 		struct stacktrace_ops *ops, void *data)
 {
-	const unsigned cpu = smp_processor_id();
+	const unsigned cpu = get_cpu();
 	unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
 	unsigned used = 0;
 	struct thread_info *tinfo;
@@ -286,11 +286,11 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 					MSG("Leftover inexact backtrace:");
 					stack = (unsigned long *)UNW_SP(&info);
 					if (!stack)
-						return;
+						goto out;
 				} else
 					MSG("Full inexact backtrace again:");
 			} else if (call_trace >= 1)
-				return;
+				goto out;
 			else
 				MSG("Full inexact backtrace again:");
 		} else
@@ -385,6 +385,8 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 	tinfo = current_thread_info();
 	HANDLE_STACK (valid_stack_ptr(tinfo, stack));
 #undef HANDLE_STACK
+out:
+	put_cpu();
 }
 EXPORT_SYMBOL(dump_trace);
 
-- 
cgit v1.2.2


From bcddc0155f351ab3f06c6ede6d91fd399ef9e18f Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Thu, 7 Dec 2006 02:14:02 +0100
Subject: [PATCH] x86-64: miscellaneous entry.S adjustments

This patch:
- makes ret_from_sys_call no longer global (all external users were
  previously switched to use int_ret_from_sys_call)
- adjusts placement of a CFI_{REMEMBER,RESTORE}_STATE pair to better
  fit logic flow
- eliminates an unnecessary pair of CFI_{REMEMBER,RESTORE}_STATE
- glues together function- and unwinder-wise the previously separate
  system_call and int_ret_from_sys_call function fragments

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/entry.S | 36 ++++++------------------------------
 1 file changed, 6 insertions(+), 30 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 7d401b00d822..601d332c4b79 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -230,7 +230,6 @@ ENTRY(system_call)
 	CFI_REL_OFFSET rip,RIP-ARGOFFSET
 	GET_THREAD_INFO(%rcx)
 	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
-	CFI_REMEMBER_STATE
 	jnz tracesys
 	cmpq $__NR_syscall_max,%rax
 	ja badsys
@@ -241,7 +240,6 @@ ENTRY(system_call)
  * Syscall return path ending with SYSRET (fast path)
  * Has incomplete stack frame and undefined top of stack. 
  */		
-	.globl ret_from_sys_call
 ret_from_sys_call:
 	movl $_TIF_ALLWORK_MASK,%edi
 	/* edi:	flagmask */
@@ -251,8 +249,8 @@ sysret_check:
 	TRACE_IRQS_OFF
 	movl threadinfo_flags(%rcx),%edx
 	andl %edi,%edx
-	CFI_REMEMBER_STATE
 	jnz  sysret_careful 
+	CFI_REMEMBER_STATE
 	/*
 	 * sysretq will re-enable interrupts:
 	 */
@@ -265,10 +263,10 @@ sysret_check:
 	swapgs
 	sysretq
 
+	CFI_RESTORE_STATE
 	/* Handle reschedules */
 	/* edx:	work, edi: workmask */	
 sysret_careful:
-	CFI_RESTORE_STATE
 	bt $TIF_NEED_RESCHED,%edx
 	jnc sysret_signal
 	TRACE_IRQS_ON
@@ -306,7 +304,6 @@ badsys:
 
 	/* Do syscall tracing */
 tracesys:			 
-	CFI_RESTORE_STATE
 	SAVE_REST
 	movq $-ENOSYS,RAX(%rsp)
 	FIXUP_TOP_OF_STACK %rdi
@@ -322,32 +319,13 @@ tracesys:
 	call *sys_call_table(,%rax,8)
 1:	movq %rax,RAX-ARGOFFSET(%rsp)
 	/* Use IRET because user could have changed frame */
-	jmp int_ret_from_sys_call
-	CFI_ENDPROC
-END(system_call)
 		
 /* 
  * Syscall return path ending with IRET.
  * Has correct top of stack, but partial stack frame.
- */ 	
-ENTRY(int_ret_from_sys_call)
-	CFI_STARTPROC	simple
-	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,SS+8-ARGOFFSET
-	/*CFI_REL_OFFSET	ss,SS-ARGOFFSET*/
-	CFI_REL_OFFSET	rsp,RSP-ARGOFFSET
-	/*CFI_REL_OFFSET	rflags,EFLAGS-ARGOFFSET*/
-	/*CFI_REL_OFFSET	cs,CS-ARGOFFSET*/
-	CFI_REL_OFFSET	rip,RIP-ARGOFFSET
-	CFI_REL_OFFSET	rdx,RDX-ARGOFFSET
-	CFI_REL_OFFSET	rcx,RCX-ARGOFFSET
-	CFI_REL_OFFSET	rax,RAX-ARGOFFSET
-	CFI_REL_OFFSET	rdi,RDI-ARGOFFSET
-	CFI_REL_OFFSET	rsi,RSI-ARGOFFSET
-	CFI_REL_OFFSET	r8,R8-ARGOFFSET
-	CFI_REL_OFFSET	r9,R9-ARGOFFSET
-	CFI_REL_OFFSET	r10,R10-ARGOFFSET
-	CFI_REL_OFFSET	r11,R11-ARGOFFSET
+ */
+	.globl int_ret_from_sys_call
+int_ret_from_sys_call:
 	cli
 	TRACE_IRQS_OFF
 	testl $3,CS-ARGOFFSET(%rsp)
@@ -394,8 +372,6 @@ int_very_careful:
 	popq %rdi
 	CFI_ADJUST_CFA_OFFSET -8
 	andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
-	cli
-	TRACE_IRQS_OFF
 	jmp int_restore_rest
 	
 int_signal:
@@ -411,7 +387,7 @@ int_restore_rest:
 	TRACE_IRQS_OFF
 	jmp int_with_check
 	CFI_ENDPROC
-END(int_ret_from_sys_call)
+END(system_call)
 		
 /* 
  * Certain special system calls that need to save a complete full stack frame.
-- 
cgit v1.2.2


From 72690a21188586022a9e65cb6f1cc8845167555a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:03 +0100
Subject: [PATCH] x86: Don't use nested idle loops

Currently the idle loop has two nested loops -- one high level
in cpu_idle and in some low level idle functions another one.

Looping in the low level idle functions breaks the idle notifiers
because interrupts waking up sleep states need to execute
exit_idle() which is only in cpu_idle().

So don't do that, only loop in cpu_idle(). This only removes
code.

In some cases e.g. poll_idle the idle loop is a little longer
now because cpu_idle checks more things. I hope that isn't a problem
ACPI idle doesn't change behaviour because it never looped anyways.

Cc: len.brown@intel.com
Cc: eranian@hpl.hp.com
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/process.c | 30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 7451a4c43c16..0b7b4caa4f74 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -108,17 +108,15 @@ void exit_idle(void)
  */
 static void default_idle(void)
 {
-	local_irq_enable();
-
 	current_thread_info()->status &= ~TS_POLLING;
 	smp_mb__after_clear_bit();
-	while (!need_resched()) {
-		local_irq_disable();
-		if (!need_resched())
-			safe_halt();
-		else
-			local_irq_enable();
-	}
+	local_irq_disable();
+	if (!need_resched()) {
+		/* Enables interrupts one instruction before HLT.
+		   x86 special cases this so there is no race. */
+		safe_halt();
+	} else
+		local_irq_enable();
 	current_thread_info()->status |= TS_POLLING;
 }
 
@@ -129,16 +127,7 @@ static void default_idle(void)
  */
 static void poll_idle (void)
 {
-	local_irq_enable();
-
-	asm volatile(
-		"2:"
-		"testl %0,%1;"
-		"rep; nop;"
-		"je 2b;"
-		: :
-		"i" (_TIF_NEED_RESCHED),
-		"m" (current_thread_info()->flags));
+	cpu_relax();
 }
 
 void cpu_idle_wait(void)
@@ -257,8 +246,7 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
 static void mwait_idle(void)
 {
 	local_irq_enable();
-	while (!need_resched())
-		mwait_idle_with_hints(0,0);
+	mwait_idle_with_hints(0,0);
 }
 
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
-- 
cgit v1.2.2


From 7cd8b6861eb586aabe4c725cc0c259ce2e653695 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Thu, 7 Dec 2006 02:14:03 +0100
Subject: [PATCH] x86: remove last two pci_find offenders in the core code

Resending as I believe the discussion about them established they were
correct.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
 arch/x86_64/kernel/pci-calgary.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index 37a770859e71..d2ea87a95268 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -921,7 +921,7 @@ static int __init calgary_init(void)
 
 error:
 	do {
-		dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM,
+		dev = pci_get_device_reverse(PCI_VENDOR_ID_IBM,
 					      PCI_DEVICE_ID_IBM_CALGARY,
 					      dev);
 		if (!dev)
-- 
cgit v1.2.2


From 9c5f8be4625e73f17e28fea89399ed871a30e064 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:03 +0100
Subject: [PATCH] x86: Mention PCI instead of RAM in NMI parity error message

On modern systems RAM errors don't cause NMIs, but it's usually
caused by PCI SERR. Mention PCI instead of RAM in the printk.

Reported by r_hayashi@ctc-g.co.jp (Ryutaro Hayashi)

Cc:  r_hayashi@ctc-g.co.jp

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/traps.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index e37b4d77d5a8..70bfaab9822c 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -793,8 +793,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs)
 {
 	printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
 		reason);
-	printk(KERN_EMERG "You probably have a hardware problem with your "
-		"RAM chips\n");
+	printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
 
 	if (panic_on_unrecovered_nmi)
 		panic("NMI: Not continuing");
-- 
cgit v1.2.2


From ea7322decb974a4a3e804f96a0201e893ff88ce3 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:05 +0100
Subject: [PATCH] x86-64: Speed and clean up cache flushing in change_page_attr

CLFLUSH is a lot faster than WBINVD so avoid the later if at all
possible.

Always pass the complete list of pages to other CPUs to cut down
the number of IPIs.

Minor other cleanup and sync with i386 version.

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/mm/pageattr.c | 58 ++++++++++++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 26 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 3e231d762aaa..ccb91dd996a9 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -61,34 +61,40 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
 	return base;
 } 
 
-
-static void flush_kernel_map(void *address) 
+static void cache_flush_page(void *adr)
 {
-	if (0 && address && cpu_has_clflush) {
-		/* is this worth it? */ 
-		int i;
-		for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) 
-			asm volatile("clflush (%0)" :: "r" (address + i)); 
-	} else
-		asm volatile("wbinvd":::"memory"); 
-	if (address)
-		__flush_tlb_one(address);
-	else
-		__flush_tlb_all();
+	int i;
+	for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
+		asm volatile("clflush (%0)" :: "r" (adr + i));
 }
 
+static void flush_kernel_map(void *arg)
+{
+	struct list_head *l = (struct list_head *)arg;
+	struct page *pg;
+
+	/* When clflush is available always use it because it is
+	   much cheaper than WBINVD */
+	if (!cpu_has_clflush)
+		asm volatile("wbinvd" ::: "memory");
+	list_for_each_entry(pg, l, lru) {
+		void *adr = page_address(pg);
+		if (cpu_has_clflush)
+			cache_flush_page(adr);
+		__flush_tlb_one(adr);
+	}
+}
 
-static inline void flush_map(unsigned long address)
+static inline void flush_map(struct list_head *l)
 {	
-	on_each_cpu(flush_kernel_map, (void *)address, 1, 1);
+	on_each_cpu(flush_kernel_map, l, 1, 1);
 }
 
-static struct page *deferred_pages; /* protected by init_mm.mmap_sem */
+static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */
 
 static inline void save_page(struct page *fpage)
 {
-	fpage->lru.next = (struct list_head *)deferred_pages;
-	deferred_pages = fpage;
+	list_add(&fpage->lru, &deferred_pages);
 }
 
 /* 
@@ -207,18 +213,18 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 
 void global_flush_tlb(void)
 { 
-	struct page *dpage;
+	struct page *pg, *next;
+	struct list_head l;
 
 	down_read(&init_mm.mmap_sem);
-	dpage = xchg(&deferred_pages, NULL);
+	list_replace_init(&deferred_pages, &l);
 	up_read(&init_mm.mmap_sem);
 
-	flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0);
-	while (dpage) {
-		struct page *tmp = dpage;
-		dpage = (struct page *)dpage->lru.next;
-		ClearPagePrivate(tmp);
-		__free_page(tmp);
+	flush_map(&l);
+
+	list_for_each_entry_safe(pg, next, &l, lru) {
+		ClearPagePrivate(pg);
+		__free_page(pg);
 	} 
 } 
 
-- 
cgit v1.2.2


From 5df0287ecc4f53e68bbd188fa8258b555e6b734f Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai.lu@amd.com>
Date: Thu, 7 Dec 2006 02:14:05 +0100
Subject: [PATCH] x86-64: Extend clear_irq_vector

Clear the irq releated entries in irq_vector, irq_domain and vector_irq
instead of clearing irq_vector only. So when new irq is created, it
could reuse that vector. (actually is the second loop scanning from
FIRST_DEVICE_VECTOR+8). This could avoid the vectors are used up
with enough module inserting and removing

Cc: Eric W. Biedierman <ebiederm@xmission.com>
Cc: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-By: Yinghai Lu <yinghai.lu@amd.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/io_apic.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index c80081a6ba41..f71461b1f03d 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -750,6 +750,22 @@ static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
 	return vector;
 }
 
+static void __clear_irq_vector(int irq)
+{
+	cpumask_t mask;
+	int cpu, vector;
+
+	BUG_ON(!irq_vector[irq]);
+
+	vector = irq_vector[irq];
+	cpus_and(mask, irq_domain[irq], cpu_online_map);
+	for_each_cpu_mask(cpu, mask)
+		per_cpu(vector_irq, cpu)[vector] = -1;
+
+	irq_vector[irq] = 0;
+	irq_domain[irq] = CPU_MASK_NONE;
+}
+
 void __setup_vector_irq(int cpu)
 {
 	/* Initialize vector_irq on a new cpu */
@@ -1837,7 +1853,7 @@ void destroy_irq(unsigned int irq)
 	dynamic_irq_cleanup(irq);
 
 	spin_lock_irqsave(&vector_lock, flags);
-	irq_vector[irq] = 0;
+	__clear_irq_vector(irq);
 	spin_unlock_irqrestore(&vector_lock, flags);
 }
 
-- 
cgit v1.2.2


From ab2bf0c1c689905b628dca94d0acd9c50e152468 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:06 +0100
Subject: [PATCH] x86-64: Use probe_kernel_address in arch/x86_64/*

Instead of open coded __get_user

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/traps.c |  2 +-
 arch/x86_64/mm/fault.c     | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 70bfaab9822c..264db33476ab 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -30,9 +30,9 @@
 #include <linux/kprobes.h>
 #include <linux/kexec.h>
 #include <linux/unwind.h>
+#include <linux/uaccess.h>
 
 #include <asm/system.h>
-#include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/atomic.h>
 #include <asm/debugreg.h>
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 3751b4788e28..a65fc6f1dcaf 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -23,9 +23,9 @@
 #include <linux/compiler.h>
 #include <linux/module.h>
 #include <linux/kprobes.h>
+#include <linux/uaccess.h>
 
 #include <asm/system.h>
-#include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/smp.h>
 #include <asm/tlbflush.h>
@@ -96,7 +96,7 @@ void bust_spinlocks(int yes)
 static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
 				unsigned long error_code)
 { 
-	unsigned char __user *instr;
+	unsigned char *instr;
 	int scan_more = 1;
 	int prefetch = 0; 
 	unsigned char *max_instr;
@@ -116,7 +116,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
 		unsigned char instr_hi;
 		unsigned char instr_lo;
 
-		if (__get_user(opcode, (char __user *)instr))
+		if (probe_kernel_address(instr, opcode))
 			break; 
 
 		instr_hi = opcode & 0xf0; 
@@ -154,7 +154,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
 		case 0x00:
 			/* Prefetch instruction is 0x0F0D or 0x0F18 */
 			scan_more = 0;
-			if (__get_user(opcode, (char __user *)instr))
+			if (probe_kernel_address(instr, opcode))
 				break;
 			prefetch = (instr_lo == 0xF) &&
 				(opcode == 0x0D || opcode == 0x18);
@@ -170,7 +170,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
 static int bad_address(void *p) 
 { 
 	unsigned long dummy;
-	return __get_user(dummy, (unsigned long __user *)p);
+	return probe_kernel_address((unsigned long *)p, dummy);
 } 
 
 void dump_pagetable(unsigned long address)
-- 
cgit v1.2.2


From b026872601976f666bae77b609dc490d1834bf77 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:06 +0100
Subject: [PATCH] x86-64: Try multiple timer variants in check_timer

Instead of adding all kinds of more quirks try various timer
routing variants in check_timer.

In particular this tries to handle quirks from:
- Nvidia NF2-4 reference BIOS: wrong timer override
- Asus: Wrong timer override but no HPET table
- ATI: require timer disabled in 8259
- Some boards: require timer enabled in 8259

We just try many of the the known variants in the hopefully right order
in check_timer.

Trying pin 0/2 on Nvidia suggested by Tim Hockin.

TBD Experimental. Needs a lot of testing

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/early-quirks.c |   5 --
 arch/x86_64/kernel/io_apic.c      | 124 ++++++++++++++++----------------------
 2 files changed, 51 insertions(+), 78 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
index 68273bff58cc..fb0c6da41b7e 100644
--- a/arch/x86_64/kernel/early-quirks.c
+++ b/arch/x86_64/kernel/early-quirks.c
@@ -69,11 +69,6 @@ static void nvidia_bugs(void)
 
 static void ati_bugs(void)
 {
-	if (timer_over_8254 == 1) {
-		timer_over_8254 = 0;
-		printk(KERN_INFO
-	 	"ATI board detected. Disabling timer routing over 8254.\n");
-	}
 }
 
 struct chipset {
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index f71461b1f03d..88fcc4ebbf6e 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -55,10 +55,6 @@ int sis_apic_bug; /* not actually supported, dummy for compile */
 
 static int no_timer_check;
 
-static int disable_timer_pin_1 __initdata;
-
-int timer_over_8254 __initdata = 1;
-
 /* Where if anywhere is the i8259 connect in external int mode */
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
@@ -348,29 +344,6 @@ static int __init disable_ioapic_setup(char *str)
 }
 early_param("noapic", disable_ioapic_setup);
 
-/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
-static int __init disable_timer_pin_setup(char *arg)
-{
-	disable_timer_pin_1 = 1;
-	return 1;
-}
-__setup("disable_timer_pin_1", disable_timer_pin_setup);
-
-static int __init setup_disable_8254_timer(char *s)
-{
-	timer_over_8254 = -1;
-	return 1;
-}
-static int __init setup_enable_8254_timer(char *s)
-{
-	timer_over_8254 = 2;
-	return 1;
-}
-
-__setup("disable_8254_timer", setup_disable_8254_timer);
-__setup("enable_8254_timer", setup_enable_8254_timer);
-
-
 /*
  * Find the IRQ entry number of a certain pin.
  */
@@ -1579,10 +1552,33 @@ static inline void unlock_ExtINT_logic(void)
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
  * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
  * fanatically on his truly buggy board.
- *
- * FIXME: really need to revamp this for modern platforms only.
  */
-static inline void check_timer(void)
+
+static int try_apic_pin(int apic, int pin, char *msg)
+{
+	apic_printk(APIC_VERBOSE, KERN_INFO
+		    "..TIMER: trying IO-APIC=%d PIN=%d %s",
+		    apic, pin, msg);
+
+	/*
+	 * Ok, does IRQ0 through the IOAPIC work?
+	 */
+	if (!no_timer_check && timer_irq_works()) {
+		nmi_watchdog_default();
+		if (nmi_watchdog == NMI_IO_APIC) {
+			disable_8259A_irq(0);
+			setup_nmi();
+			enable_8259A_irq(0);
+		}
+		return 1;
+	}
+	clear_IO_APIC_pin(apic, pin);
+	apic_printk(APIC_QUIET, KERN_ERR " .. failed\n");
+	return 0;
+}
+
+/* The function from hell */
+static void check_timer(void)
 {
 	int apic1, pin1, apic2, pin2;
 	int vector;
@@ -1603,61 +1599,43 @@ static inline void check_timer(void)
 	 */
 	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 	init_8259A(1);
-	if (timer_over_8254 > 0)
-		enable_8259A_irq(0);
 
 	pin1  = find_isa_irq_pin(0, mp_INT);
 	apic1 = find_isa_irq_apic(0, mp_INT);
 	pin2  = ioapic_i8259.pin;
 	apic2 = ioapic_i8259.apic;
 
-	apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
-		vector, apic1, pin1, apic2, pin2);
+	/* Do this first, otherwise we get double interrupts on ATI boards */
+	if ((pin1 != -1) && try_apic_pin(apic1, pin1,"with 8259 IRQ0 disabled"))
+		return;
 
-	if (pin1 != -1) {
-		/*
-		 * Ok, does IRQ0 through the IOAPIC work?
-		 */
-		unmask_IO_APIC_irq(0);
-		if (!no_timer_check && timer_irq_works()) {
-			nmi_watchdog_default();
-			if (nmi_watchdog == NMI_IO_APIC) {
-				disable_8259A_irq(0);
-				setup_nmi();
-				enable_8259A_irq(0);
-			}
-			if (disable_timer_pin_1 > 0)
-				clear_IO_APIC_pin(0, pin1);
-			return;
-		}
-		clear_IO_APIC_pin(apic1, pin1);
-		apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not "
-				"connected to IO-APIC\n");
-	}
+	/* Now try again with IRQ0 8259A enabled.
+	   Assumes timer is on IO-APIC 0 ?!? */
+	enable_8259A_irq(0);
+	unmask_IO_APIC_irq(0);
+	if (try_apic_pin(apic1, pin1, "with 8259 IRQ0 enabled"))
+		return;
+	disable_8259A_irq(0);
 
-	apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) "
-				"through the 8259A ... ");
+	/* Always try pin0 and pin2 on APIC 0 to handle buggy timer overrides
+	   on Nvidia boards */
+	if (!(apic1 == 0 && pin1 == 0) &&
+	    try_apic_pin(0, 0, "fallback with 8259 IRQ0 disabled"))
+		return;
+	if (!(apic1 == 0 && pin1 == 2) &&
+	    try_apic_pin(0, 2, "fallback with 8259 IRQ0 disabled"))
+		return;
+
+	/* Then try pure 8259A routing on the 8259 as reported by BIOS*/
+	enable_8259A_irq(0);
 	if (pin2 != -1) {
-		apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...",
-			apic2, pin2);
-		/*
-		 * legacy devices should be connected to IO APIC #0
-		 */
 		setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
-		if (timer_irq_works()) {
-			apic_printk(APIC_VERBOSE," works.\n");
-			nmi_watchdog_default();
-			if (nmi_watchdog == NMI_IO_APIC) {
-				setup_nmi();
-			}
+		if (try_apic_pin(apic2,pin2,"8259A broadcast ExtINT from BIOS"))
 			return;
-		}
-		/*
-		 * Cleanup, just in case ...
-		 */
-		clear_IO_APIC_pin(apic2, pin2);
 	}
-	apic_printk(APIC_VERBOSE," failed.\n");
+
+	/* Tried all possibilities to go through the IO-APIC. Now come the
+	   really cheesy fallbacks. */
 
 	if (nmi_watchdog == NMI_IO_APIC) {
 		printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
-- 
cgit v1.2.2


From 8e3de538eec95b57a5b86038988451c38ba83f7e Mon Sep 17 00:00:00 2001
From: Albert Cahalan <acahalan@gmail.com>
Date: Thu, 7 Dec 2006 02:14:06 +0100
Subject: [PATCH] x86-64: Support -mregparm arguments for signals with
 SA_SIGINFO in compat mode

The recent change to make x86_64 support i386 binaries compiled
with -mregparm=3 only covered signal handlers without SA_SIGINFO.
(the 3-arg "real-time" ones)

To be compatible with i386, both types should be supported.

Signed-off-by: Albert Cahalan <acahalan@gmail.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/ia32/ia32_signal.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c
index 0e0a266d976f..ff499ef2a1ba 100644
--- a/arch/x86_64/ia32/ia32_signal.c
+++ b/arch/x86_64/ia32/ia32_signal.c
@@ -584,6 +584,11 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	regs->rdx = (unsigned long) &frame->info;
 	regs->rcx = (unsigned long) &frame->uc;
 
+	/* Make -mregparm=3 work */
+	regs->rax = sig;
+	regs->rdx = (unsigned long) &frame->info;
+	regs->rcx = (unsigned long) &frame->uc;
+
 	asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); 
 	asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); 
 	
-- 
cgit v1.2.2


From 58db85482743f5e3495d168c641c60ce1d3dfb06 Mon Sep 17 00:00:00 2001
From: Muli Ben-Yehuda <muli@il.ibm.com>
Date: Thu, 7 Dec 2006 02:14:06 +0100
Subject: [PATCH] calgary: phb_shift can be int

Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/pci-calgary.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index d2ea87a95268..f53b581dfd0b 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -740,7 +740,7 @@ static void __init calgary_increase_split_completion_timeout(void __iomem *bbar,
 {
 	u64 val64;
 	void __iomem *target;
-	unsigned long phb_shift = -1;
+	unsigned int phb_shift = ~0; /* silence gcc */
 	u64 mask;
 
 	switch (busno_to_phbid(busnum)) {
-- 
cgit v1.2.2


From b34e90b8f0f30151349134f87b5dc6ef75a5218c Mon Sep 17 00:00:00 2001
From: Laurent Vivier <Laurent.Vivier@bull.net>
Date: Thu, 7 Dec 2006 02:14:06 +0100
Subject: [PATCH] Calgary: use BIOS supplied BBARs and topology information

Find the BBAR register address of each Calgary using the "Extended
BIOS Data Area" rather than calculating it ourselves. Also get the bus
topology (what PHB each bus is on) from Calgary rather than
calculating it ourselves.

This patch fixes http://bugzilla.kernel.org/show_bug.cgi?id=7407.

Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/pci-calgary.c | 161 ++++++++++++++++++++++++++++++---------
 1 file changed, 125 insertions(+), 36 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index f53b581dfd0b..afc0a53505f1 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -41,6 +41,7 @@
 #include <asm/pci-direct.h>
 #include <asm/system.h>
 #include <asm/dma.h>
+#include <asm/rio.h>
 
 #define PCI_DEVICE_ID_IBM_CALGARY 0x02a1
 #define PCI_VENDOR_DEVICE_ID_CALGARY \
@@ -115,14 +116,35 @@ static const unsigned long phb_offsets[] = {
 	0xB000 /* PHB3 */
 };
 
+/* PHB debug registers */
+
+static const unsigned long phb_debug_offsets[] = {
+	0x4000	/* PHB 0 DEBUG */,
+	0x5000	/* PHB 1 DEBUG */,
+	0x6000	/* PHB 2 DEBUG */,
+	0x7000	/* PHB 3 DEBUG */
+};
+
+/*
+ * STUFF register for each debug PHB,
+ * byte 1 = start bus number, byte 2 = end bus number
+ */
+
+#define PHB_DEBUG_STUFF_OFFSET	0x0020
+
 unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;
 static int translate_empty_slots __read_mostly = 0;
 static int calgary_detected __read_mostly = 0;
 
+static struct rio_table_hdr	*rio_table_hdr __initdata;
+static struct scal_detail	*scal_devs[MAX_NUMNODES] __initdata;
+static struct rio_detail	*rio_devs[MAX_NUMNODES*4] __initdata;
+
 struct calgary_bus_info {
 	void *tce_space;
 	unsigned char translation_disabled;
 	signed char phbid;
+	void __iomem *bbar;
 };
 
 static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
@@ -475,6 +497,11 @@ static struct dma_mapping_ops calgary_dma_ops = {
 	.unmap_sg = calgary_unmap_sg,
 };
 
+static inline void __iomem * busno_to_bbar(unsigned char num)
+{
+	return bus_info[num].bbar;
+}
+
 static inline int busno_to_phbid(unsigned char num)
 {
 	return bus_info[num].phbid;
@@ -828,31 +855,9 @@ static void __init calgary_disable_translation(struct pci_dev *dev)
 	del_timer_sync(&tbl->watchdog_timer);
 }
 
-static inline unsigned int __init locate_register_space(struct pci_dev *dev)
+static inline void __iomem * __init locate_register_space(struct pci_dev *dev)
 {
-	int rionodeid;
-	u32 address;
-
-	/*
-	 * Each Calgary has four busses. The first four busses (first Calgary)
-	 * have RIO node ID 2, then the next four (second Calgary) have RIO
-	 * node ID 3, the next four (third Calgary) have node ID 2 again, etc.
-	 * We use a gross hack - relying on the dev->bus->number ordering,
-	 * modulo 14 - to decide which Calgary a given bus is on. Busses 0, 1,
-	 * 2 and 4 are on the first Calgary (id 2), 6, 8, a and c are on the
-	 * second (id 3), and then it repeats modulo 14.
- 	 */
-	rionodeid = (dev->bus->number % 14 > 4) ? 3 : 2;
-	/*
-	 * register space address calculation as follows:
-	 * FE0MB-8MB*OneBasedChassisNumber+1MB*(RioNodeId-ChassisBase)
-	 * ChassisBase is always zero for x366/x260/x460
-	 * RioNodeId is 2 for first Calgary, 3 for second Calgary
-	 */
-	address = START_ADDRESS	-
-		(0x800000 * (ONE_BASED_CHASSIS_NUM + dev->bus->number / 14)) +
-		(0x100000) * (rionodeid - CHASSIS_BASE);
-	return address;
+	return busno_to_bbar(dev->bus->number);
 }
 
 static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
@@ -864,15 +869,12 @@ static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
 
 static int __init calgary_init_one(struct pci_dev *dev)
 {
-	u32 address;
 	void __iomem *bbar;
 	int ret;
 
 	BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM);
 
-	address = locate_register_space(dev);
-	/* map entire 1MB of Calgary config space */
-	bbar = ioremap_nocache(address, 1024 * 1024);
+	bbar = locate_register_space(dev);
 	if (!bbar) {
 		ret = -ENODATA;
 		goto done;
@@ -898,6 +900,35 @@ static int __init calgary_init(void)
 {
 	int ret = -ENODEV;
 	struct pci_dev *dev = NULL;
+	int rio, phb, bus;
+	void __iomem *bbar;
+	void __iomem *target;
+	u8 start_bus, end_bus;
+	u32 val;
+
+	for (rio = 0; rio < rio_table_hdr->num_rio_dev; rio++) {
+
+		if ( (rio_devs[rio]->type != COMPAT_CALGARY) &&
+		     (rio_devs[rio]->type != ALT_CALGARY) )
+			continue;
+
+		/* map entire 1MB of Calgary config space */
+		bbar = ioremap_nocache(rio_devs[rio]->BBAR, 1024 * 1024);
+
+		for (phb = 0; phb < PHBS_PER_CALGARY; phb++) {
+
+			target = calgary_reg(bbar, phb_debug_offsets[phb] |
+						   PHB_DEBUG_STUFF_OFFSET);
+			val = be32_to_cpu(readl(target));
+			start_bus = (u8)((val & 0x00FF0000) >> 16);
+			end_bus =   (u8)((val & 0x0000FF00) >> 8);
+			for (bus = start_bus; bus <= end_bus; bus++) {
+				bus_info[bus].bbar = bbar;
+				bus_info[bus].phbid = phb;
+			}
+		}
+	}
+
 
 	do {
 		dev = pci_get_device(PCI_VENDOR_ID_IBM,
@@ -962,13 +993,55 @@ static inline int __init determine_tce_table_size(u64 ram)
 	return ret;
 }
 
+static int __init build_detail_arrays(void)
+{
+	unsigned long ptr;
+	int i, scal_detail_size, rio_detail_size;
+
+	if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){
+		printk(KERN_WARNING
+			"Calgary: MAX_NUMNODES too low!  Defined as %d, "
+			"but system has %d nodes.\n",
+			MAX_NUMNODES, rio_table_hdr->num_scal_dev);
+		return -ENODEV;
+	}
+
+	switch (rio_table_hdr->version){
+	default:
+		printk(KERN_WARNING
+		       "Calgary: Invalid Rio Grande Table Version: %d\n",
+		       rio_table_hdr->version);
+		return -ENODEV;
+	case 2:
+		scal_detail_size = 11;
+		rio_detail_size = 13;
+		break;
+	case 3:
+		scal_detail_size = 12;
+		rio_detail_size = 15;
+		break;
+	}
+
+	ptr = ((unsigned long)rio_table_hdr) + 3;
+	for (i = 0; i < rio_table_hdr->num_scal_dev;
+		    i++, ptr += scal_detail_size)
+		scal_devs[i] = (struct scal_detail *)ptr;
+
+	for (i = 0; i < rio_table_hdr->num_rio_dev;
+		    i++, ptr += rio_detail_size)
+		rio_devs[i] = (struct rio_detail *)ptr;
+
+	return 0;
+}
+
 void __init detect_calgary(void)
 {
 	u32 val;
 	int bus;
 	void *tbl;
 	int calgary_found = 0;
-	int phb = -1;
+	unsigned long ptr;
+	int offset;
 
 	/*
 	 * if the user specified iommu=off or iommu=soft or we found
@@ -980,6 +1053,29 @@ void __init detect_calgary(void)
 	if (!early_pci_allowed())
 		return;
 
+	ptr = (unsigned long)phys_to_virt(get_bios_ebda());
+
+	rio_table_hdr = NULL;
+	offset = 0x180;
+	while (offset) {
+		/* The block id is stored in the 2nd word */
+		if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){
+			/* set the pointer past the offset & block id */
+			rio_table_hdr = (struct rio_table_hdr *)(ptr+offset+4);
+			break;
+		}
+		/* The next offset is stored in the 1st word. 0 means no more */
+		offset = *((unsigned short *)(ptr + offset));
+	}
+	if (!rio_table_hdr){
+		printk(KERN_ERR "Calgary: Unable to locate "
+				"Rio Grande Table in EBDA - bailing!\n");
+		return;
+	}
+
+	if (build_detail_arrays())
+		return;
+
 	specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
 
 	for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
@@ -990,12 +1086,6 @@ void __init detect_calgary(void)
 		if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY)
 			continue;
 
-		/*
-		 * There are 4 PHBs per Calgary chip.  Set phb to which phb (0-3)
-		 * it is connected to releative to the clagary chip.
-		 */
-		phb = (phb + 1) % PHBS_PER_CALGARY;
-
 		if (info->translation_disabled)
 			continue;
 
@@ -1010,7 +1100,6 @@ void __init detect_calgary(void)
 				if (!tbl)
 					goto cleanup;
 				info->tce_space = tbl;
-				info->phbid = phb;
 				calgary_found = 1;
 				break;
 			}
-- 
cgit v1.2.2


From eae93755540bae18aff46b8a0e621b5d65bd5380 Mon Sep 17 00:00:00 2001
From: Muli Ben-Yehuda <muli@il.ibm.com>
Date: Thu, 7 Dec 2006 02:14:06 +0100
Subject: [PATCH] Calgary: check BBAR ioremap success when ioremapping

This patch cleans up the previous "Use BIOS supplied BBAR information"
patch. Mostly stylistic clenaups, but also check for ioremap failure
when we ioremap the BBAR rather than when trying to use it.

Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
Signed-off-by: Andi Kleen <ak@suse.de>
Acked-by: Laurent Vivier <Laurent.Vivier@bull.net>
---
 arch/x86_64/kernel/pci-calgary.c | 85 +++++++++++++++++++++++-----------------
 1 file changed, 49 insertions(+), 36 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index afc0a53505f1..8a1e4f35bc3c 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -138,7 +138,7 @@ static int calgary_detected __read_mostly = 0;
 
 static struct rio_table_hdr	*rio_table_hdr __initdata;
 static struct scal_detail	*scal_devs[MAX_NUMNODES] __initdata;
-static struct rio_detail	*rio_devs[MAX_NUMNODES*4] __initdata;
+static struct rio_detail	*rio_devs[MAX_NUMNODES * 4] __initdata;
 
 struct calgary_bus_info {
 	void *tce_space;
@@ -855,11 +855,6 @@ static void __init calgary_disable_translation(struct pci_dev *dev)
 	del_timer_sync(&tbl->watchdog_timer);
 }
 
-static inline void __iomem * __init locate_register_space(struct pci_dev *dev)
-{
-	return busno_to_bbar(dev->bus->number);
-}
-
 static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
 {
 	pci_dev_get(dev);
@@ -874,15 +869,10 @@ static int __init calgary_init_one(struct pci_dev *dev)
 
 	BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM);
 
-	bbar = locate_register_space(dev);
-	if (!bbar) {
-		ret = -ENODATA;
-		goto done;
-	}
-
+	bbar = busno_to_bbar(dev->bus->number);
 	ret = calgary_setup_tar(dev, bbar);
 	if (ret)
-		goto iounmap;
+		goto done;
 
 	pci_dev_get(dev);
 	dev->bus->self = dev;
@@ -890,38 +880,39 @@ static int __init calgary_init_one(struct pci_dev *dev)
 
 	return 0;
 
-iounmap:
-	iounmap(bbar);
 done:
 	return ret;
 }
 
-static int __init calgary_init(void)
+static int __init calgary_locate_bbars(void)
 {
-	int ret = -ENODEV;
-	struct pci_dev *dev = NULL;
-	int rio, phb, bus;
+	int ret;
+	int rioidx, phb, bus;
 	void __iomem *bbar;
 	void __iomem *target;
+	unsigned long offset;
 	u8 start_bus, end_bus;
 	u32 val;
 
-	for (rio = 0; rio < rio_table_hdr->num_rio_dev; rio++) {
+	ret = -ENODATA;
+	for (rioidx = 0; rioidx < rio_table_hdr->num_rio_dev; rioidx++) {
+		struct rio_detail *rio = rio_devs[rioidx];
 
-		if ( (rio_devs[rio]->type != COMPAT_CALGARY) &&
-		     (rio_devs[rio]->type != ALT_CALGARY) )
+		if ((rio->type != COMPAT_CALGARY) && (rio->type != ALT_CALGARY))
 			continue;
 
 		/* map entire 1MB of Calgary config space */
-		bbar = ioremap_nocache(rio_devs[rio]->BBAR, 1024 * 1024);
+		bbar = ioremap_nocache(rio->BBAR, 1024 * 1024);
+		if (!bbar)
+			goto error;
 
 		for (phb = 0; phb < PHBS_PER_CALGARY; phb++) {
+			offset = phb_debug_offsets[phb] | PHB_DEBUG_STUFF_OFFSET;
+			target = calgary_reg(bbar, offset);
 
-			target = calgary_reg(bbar, phb_debug_offsets[phb] |
-						   PHB_DEBUG_STUFF_OFFSET);
 			val = be32_to_cpu(readl(target));
 			start_bus = (u8)((val & 0x00FF0000) >> 16);
-			end_bus =   (u8)((val & 0x0000FF00) >> 8);
+			end_bus = (u8)((val & 0x0000FF00) >> 8);
 			for (bus = start_bus; bus <= end_bus; bus++) {
 				bus_info[bus].bbar = bbar;
 				bus_info[bus].phbid = phb;
@@ -929,6 +920,25 @@ static int __init calgary_init(void)
 		}
 	}
 
+	return 0;
+
+error:
+	/* scan bus_info and iounmap any bbars we previously ioremap'd */
+	for (bus = 0; bus < ARRAY_SIZE(bus_info); bus++)
+		if (bus_info[bus].bbar)
+			iounmap(bus_info[bus].bbar);
+
+	return ret;
+}
+
+static int __init calgary_init(void)
+{
+	int ret;
+	struct pci_dev *dev = NULL;
+
+	ret = calgary_locate_bbars();
+	if (ret)
+		return ret;
 
 	do {
 		dev = pci_get_device(PCI_VENDOR_ID_IBM,
@@ -1000,18 +1010,13 @@ static int __init build_detail_arrays(void)
 
 	if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){
 		printk(KERN_WARNING
-			"Calgary: MAX_NUMNODES too low!  Defined as %d, "
+			"Calgary: MAX_NUMNODES too low! Defined as %d, "
 			"but system has %d nodes.\n",
 			MAX_NUMNODES, rio_table_hdr->num_scal_dev);
 		return -ENODEV;
 	}
 
 	switch (rio_table_hdr->version){
-	default:
-		printk(KERN_WARNING
-		       "Calgary: Invalid Rio Grande Table Version: %d\n",
-		       rio_table_hdr->version);
-		return -ENODEV;
 	case 2:
 		scal_detail_size = 11;
 		rio_detail_size = 13;
@@ -1020,6 +1025,11 @@ static int __init build_detail_arrays(void)
 		scal_detail_size = 12;
 		rio_detail_size = 15;
 		break;
+	default:
+		printk(KERN_WARNING
+		       "Calgary: Invalid Rio Grande Table Version: %d\n",
+		       rio_table_hdr->version);
+		return -EPROTO;
 	}
 
 	ptr = ((unsigned long)rio_table_hdr) + 3;
@@ -1042,6 +1052,7 @@ void __init detect_calgary(void)
 	int calgary_found = 0;
 	unsigned long ptr;
 	int offset;
+	int ret;
 
 	/*
 	 * if the user specified iommu=off or iommu=soft or we found
@@ -1061,27 +1072,29 @@ void __init detect_calgary(void)
 		/* The block id is stored in the 2nd word */
 		if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){
 			/* set the pointer past the offset & block id */
-			rio_table_hdr = (struct rio_table_hdr *)(ptr+offset+4);
+			rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
 			break;
 		}
 		/* The next offset is stored in the 1st word. 0 means no more */
 		offset = *((unsigned short *)(ptr + offset));
 	}
-	if (!rio_table_hdr){
+	if (!rio_table_hdr) {
 		printk(KERN_ERR "Calgary: Unable to locate "
 				"Rio Grande Table in EBDA - bailing!\n");
 		return;
 	}
 
-	if (build_detail_arrays())
+	ret = build_detail_arrays();
+	if (ret) {
+		printk(KERN_ERR "Calgary: build_detail_arrays ret %d\n", ret);
 		return;
+	}
 
 	specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
 
 	for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
 		int dev;
 		struct calgary_bus_info *info = &bus_info[bus];
-		info->phbid = -1;
 
 		if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY)
 			continue;
-- 
cgit v1.2.2


From bff6547bb6a4e82c399d74e7fba78b12d2f162ed Mon Sep 17 00:00:00 2001
From: Muli Ben-Yehuda <muli@il.ibm.com>
Date: Thu, 7 Dec 2006 02:14:07 +0100
Subject: [PATCH] Calgary: allow compiling Calgary in but not using it by
 default

This patch makes it possible to compile Calgary in but not use it by
default. In this mode, use 'iommu=calgary' to activate it.

Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/Kconfig              | 11 +++++++++++
 arch/x86_64/kernel/pci-calgary.c |  9 +++++++++
 arch/x86_64/kernel/pci-dma.c     |  5 +++++
 3 files changed, 25 insertions(+)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 010d2265f1cf..5cb509dbffe4 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -455,6 +455,17 @@ config CALGARY_IOMMU
 	  Normally the kernel will make the right choice by itself.
 	  If unsure, say Y.
 
+config CALGARY_IOMMU_ENABLED_BY_DEFAULT
+	bool "Should Calgary be enabled by default?"
+	default y
+	depends on CALGARY_IOMMU
+	help
+	  Should Calgary be enabled by default? if you choose 'y', Calgary
+	  will be used (if it exists). If you choose 'n', Calgary will not be
+	  used even if it exists. If you choose 'n' and would like to use
+	  Calgary anyway, pass 'iommu=calgary' on the kernel command line.
+	  If unsure, say Y.
+
 # need this always selected by IOMMU for the VIA workaround
 config SWIOTLB
 	bool
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index 8a1e4f35bc3c..0ddf29dae7e0 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -43,6 +43,12 @@
 #include <asm/dma.h>
 #include <asm/rio.h>
 
+#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
+int use_calgary __read_mostly = 1;
+#else
+int use_calgary __read_mostly = 0;
+#endif /* CONFIG_CALGARY_DEFAULT_ENABLED */
+
 #define PCI_DEVICE_ID_IBM_CALGARY 0x02a1
 #define PCI_VENDOR_DEVICE_ID_CALGARY \
 	(PCI_VENDOR_ID_IBM | PCI_DEVICE_ID_IBM_CALGARY << 16)
@@ -1061,6 +1067,9 @@ void __init detect_calgary(void)
 	if (swiotlb || no_iommu || iommu_detected)
 		return;
 
+	if (!use_calgary)
+		return;
+
 	if (!early_pci_allowed())
 		return;
 
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
index f8d857453f8a..683b7a5c1ab3 100644
--- a/arch/x86_64/kernel/pci-dma.c
+++ b/arch/x86_64/kernel/pci-dma.c
@@ -296,6 +296,11 @@ __init int iommu_setup(char *p)
 		gart_parse_options(p);
 #endif
 
+#ifdef CONFIG_CALGARY_IOMMU
+		if (!strncmp(p, "calgary", 7))
+			use_calgary = 1;
+#endif /* CONFIG_CALGARY_IOMMU */
+
 		p += strcspn(p, ",");
 		if (*p == ',')
 			++p;
-- 
cgit v1.2.2


From b9a8d94a47f8a41766f6f7944adfb1d641349903 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Thu, 7 Dec 2006 02:14:07 +0100
Subject: [PATCH] x86-64: Make x86_64 udelay() round up instead of down.

Port two patches from i386 to x86_64 delay.c to make sure all rounding is done
upward instead of downward.

There is no sign in commit messages that the mismatch was done on purpose, and
"delay() guarantees sleeping at least for the specified time" is still a valid
rule IMHO.

The original x86 patches are both from pre-GIT era, i.e.:

"[PATCH] round up  in __udelay()" in commit
54c7e1f5cc6771ff644d7bc21a2b829308bd126f

"[PATCH] add 1 in __const_udelay()" in commit
42c77a9801b8877d8b90f65f75db758822a0bccc

(both commits are from converted BK repository to x86_64).

AK: fixed gcc warning

linux/arch/x86_64/lib/delay.c:43: warning: suggest parentheses around + or - inside shift
(did this actually work?)

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/lib/delay.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c
index 50be90975d04..2dbebd308347 100644
--- a/arch/x86_64/lib/delay.c
+++ b/arch/x86_64/lib/delay.c
@@ -40,13 +40,13 @@ EXPORT_SYMBOL(__delay);
 
 inline void __const_udelay(unsigned long xloops)
 {
-	__delay((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32);
+	__delay(((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32) + 1);
 }
 EXPORT_SYMBOL(__const_udelay);
 
 void __udelay(unsigned long usecs)
 {
-	__const_udelay(usecs * 0x000010c6);  /* 2**32 / 1000000 */
+	__const_udelay(usecs * 0x000010c7);  /* 2**32 / 1000000 (rounded up) */
 }
 EXPORT_SYMBOL(__udelay);
 
-- 
cgit v1.2.2


From 516d2836434d279fef164bb1b964e83425d312d2 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:07 +0100
Subject: [PATCH] x86-64: Fix race in IO-APIC routing entry setup.

Interrupt could happen between setting the IO-APIC entry
and setting its interrupt data.

Pointed out by Linus.

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/io_apic.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 88fcc4ebbf6e..eaf0b708e67d 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -174,14 +174,20 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
  * the interrupt, and we need to make sure the entry is fully populated
  * before that happens.
  */
-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+static void
+__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 {
-	unsigned long flags;
 	union entry_union eu;
 	eu.entry = e;
-	spin_lock_irqsave(&ioapic_lock, flags);
 	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
 	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+}
+
+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&ioapic_lock, flags);
+	__ioapic_write_entry(apic, pin, e);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-- 
cgit v1.2.2


From b6bcc4bb1cdfbc3c8612aad63a8703ac3d59f61a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:07 +0100
Subject: [PATCH] x86-64: Don't force inlining of do_csum

It's two big and used by two callers. Calls should be cheap enough anyways.

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/lib/csum-partial.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/lib/csum-partial.c b/arch/x86_64/lib/csum-partial.c
index 06ae630de82b..bc503f506903 100644
--- a/arch/x86_64/lib/csum-partial.c
+++ b/arch/x86_64/lib/csum-partial.c
@@ -9,8 +9,6 @@
 #include <linux/module.h>
 #include <asm/checksum.h>
 
-#define __force_inline inline __attribute__((always_inline))
-
 static inline unsigned short from32to16(unsigned a) 
 {
 	unsigned short b = a >> 16; 
@@ -33,7 +31,7 @@ static inline unsigned short from32to16(unsigned a)
  * Unrolling to an 128 bytes inner loop.
  * Using interleaving with more registers to break the carry chains.
  */
-static __force_inline unsigned do_csum(const unsigned char *buff, unsigned len)
+static unsigned do_csum(const unsigned char *buff, unsigned len)
 {
 	unsigned odd, count;
 	unsigned long result = 0;
-- 
cgit v1.2.2


From 713819989aa4dd141a37074dbc369e7c620bc619 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:09 +0100
Subject: [PATCH] x86-64: Add option to compile for Core2

Add an option to compile for Intel's Core 2

The Kconfig help is a mouthful due to the inventiveness of Intel's
product naming department.

Mainly for the 64bit cache line sizes because gcc doesn't support
optimizing for core2 yet. However it will and then the kernel
should be ready by passing the right option

Also fix the old MPSC help text to confirm better to reality.

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/Kconfig  | 27 +++++++++++++++++++++------
 arch/x86_64/Makefile |  4 ++++
 2 files changed, 25 insertions(+), 6 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 5cb509dbffe4..6eece27d579c 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -122,7 +122,7 @@ endchoice
 
 choice
 	prompt "Processor family"
-	default MK8
+	default GENERIC_CPU
 
 config MK8
 	bool "AMD-Opteron/Athlon64"
@@ -130,16 +130,31 @@ config MK8
 	  Optimize for AMD Opteron/Athlon64/Hammer/K8 CPUs.
 
 config MPSC
-       bool "Intel EM64T"
+       bool "Intel P4 / older Netburst based Xeon"
        help
-	  Optimize for Intel Pentium 4 and Xeon CPUs with Intel
-	  Extended Memory 64 Technology(EM64T). For details see
+	  Optimize for Intel Pentium 4 and older Nocona/Dempsey Xeon CPUs
+	  with Intel Extended Memory 64 Technology(EM64T). For details see
 	  <http://www.intel.com/technology/64bitextensions/>.
+	  Note the the latest Xeons (Xeon 51xx and 53xx) are not based on the
+          Netburst core and shouldn't use this option. You can distingush them
+	  using the cpu family field
+	  in /proc/cpuinfo. Family 15 is a older Xeon, Family 6 a newer one
+	  (this rule only applies to system that support EM64T)
+
+config MCORE2
+	bool "Intel Core2 / newer Xeon"
+	help
+	  Optimize for Intel Core2 and newer Xeons (51xx)
+	  You can distingush the newer Xeons from the older ones using
+	  the cpu family field in /proc/cpuinfo. 15 is a older Xeon
+	  (use CONFIG_MPSC then), 6 is a newer one. This rule only
+	  applies to CPUs that support EM64T.
 
 config GENERIC_CPU
 	bool "Generic-x86-64"
 	help
 	  Generic x86-64 CPU.
+	  Run equally well on all x86-64 CPUs.
 
 endchoice
 
@@ -149,12 +164,12 @@ endchoice
 config X86_L1_CACHE_BYTES
 	int
 	default "128" if GENERIC_CPU || MPSC
-	default "64" if MK8
+	default "64" if MK8 || MCORE2
 
 config X86_L1_CACHE_SHIFT
 	int
 	default "7" if GENERIC_CPU || MPSC
-	default "6" if MK8
+	default "6" if MK8 || MCORE2
 
 config X86_INTERNODE_CACHE_BYTES
 	int
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 6e38d4daeed7..b471b8550d03 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -30,6 +30,10 @@ cflags-y	:=
 cflags-kernel-y	:=
 cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
 cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
+# gcc doesn't support -march=core2 yet as of gcc 4.3, but I hope it
+# will eventually. Use -mtune=generic as fallback
+cflags-$(CONFIG_MCORE2) += \
+	$(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
 cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
 
 cflags-y += -m64
-- 
cgit v1.2.2


From 103efcd9aac1de4da6a1477f2f3b9fcf35273a18 Mon Sep 17 00:00:00 2001
From: Ernie Petrides <petrides@redhat.com>
Date: Thu, 7 Dec 2006 02:14:09 +0100
Subject: [PATCH] x86-64: fix perms/range of vsyscall vma in /proc/*/maps

The final line of /proc/<pid>/maps on x86_64 for native 64-bit
tasks shows an incorrect ending address and incorrect permissions.  There
is only a single page mapped in this vsyscall region, and it is accessible
for both read and execute.

The patch below fixes this.  (Since 32-bit-compat tasks have a real vma
with correct perms/range, no change is necessary for that scenario.)

Before the patch, a "cat /proc/self/maps | tail -1" shows this:

        ffffffffff600000-ffffffffffe00000 ---p 00000000 [...]

After the patch, this is the output:

        ffffffffff600000-ffffffffff601000 r-xp 00000000 [...]

Signed-off-by: Ernie Petrides <petrides@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/vsyscall.c | 1 +
 arch/x86_64/mm/init.c         | 7 ++++---
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index 92546c1526f1..c3de9a09cd9f 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -290,6 +290,7 @@ static void __init map_vsyscall(void)
 	extern char __vsyscall_0;
 	unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
 
+	/* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
 	__set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
 }
 
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 4c0c00ef3ca7..2968b90ef8ad 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -730,14 +730,15 @@ static __init int x8664_sysctl_init(void)
 __initcall(x8664_sysctl_init);
 #endif
 
-/* A pseudo VMAs to allow ptrace access for the vsyscall page.   This only
+/* A pseudo VMA to allow ptrace access for the vsyscall page.  This only
    covers the 64bit vsyscall page now. 32bit has a real VMA now and does
    not need special handling anymore. */
 
 static struct vm_area_struct gate_vma = {
 	.vm_start = VSYSCALL_START,
-	.vm_end = VSYSCALL_END,
-	.vm_page_prot = PAGE_READONLY
+	.vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
+	.vm_page_prot = PAGE_READONLY_EXEC,
+	.vm_flags = VM_READ | VM_EXEC
 };
 
 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
-- 
cgit v1.2.2


From 9a457324229db34d3bcb0b67360130c287289401 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:09 +0100
Subject: [PATCH] x86-64: Rate limit no irq handler messages

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index e46c55856d40..0c06af6c13bc 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -120,7 +120,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 
 	if (likely(irq < NR_IRQS))
 		generic_handle_irq(irq);
-	else
+	else if (printk_ratelimit())
 		printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n",
 			__func__, smp_processor_id(), vector);
 
-- 
cgit v1.2.2


From 9899f826fc90beba4f78083f6230e06cbe1050c9 Mon Sep 17 00:00:00 2001
From: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Date: Thu, 7 Dec 2006 02:14:10 +0100
Subject: [PATCH] x86-64: add genapic_force

Add genapic_force. Used by the next Intel quirks patch.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@suse.de>
Cc: "Li, Shaohua" <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
 arch/x86_64/kernel/genapic.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c
index 8e78a75d1866..b007433f96bb 100644
--- a/arch/x86_64/kernel/genapic.c
+++ b/arch/x86_64/kernel/genapic.c
@@ -33,7 +33,7 @@ extern struct genapic apic_flat;
 extern struct genapic apic_physflat;
 
 struct genapic *genapic = &apic_flat;
-
+struct genapic *genapic_force;
 
 /*
  * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
@@ -46,6 +46,13 @@ void __init clustered_apic_check(void)
 	u8 cluster_cnt[NUM_APIC_CLUSTERS];
 	int max_apic = 0;
 
+	/* genapic selection can be forced because of certain quirks.
+	 */
+	if (genapic_force) {
+		genapic = genapic_force;
+		goto print;
+	}
+
 #if defined(CONFIG_ACPI)
 	/*
 	 * Some x86_64 machines use physical APIC mode regardless of how many
-- 
cgit v1.2.2


From b0d0a4ba45760b10ecee9035ed45b442c1a6cc84 Mon Sep 17 00:00:00 2001
From: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Date: Thu, 7 Dec 2006 02:14:10 +0100
Subject: [PATCH] x86: fix the irqbalance quirk for E7320/E7520/E7525

Move the irqbalance quirks for E7320/E7520/E7525(Errata 23 in
http://download.intel.com/design/chipsets/specupdt/30304203.pdf) to early
quirks.

And add a PCI quirk for these platforms to check(which happens very late
during the boot) if the APIC routing is indeed set to default flat mode.

This fixes the breakage(in x86_64) of this quirk due to cpu hotplug which
selects physical mode instead of the logical flat(as needed for this errata
workaround).

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@suse.de>
Cc: "Li, Shaohua" <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
 arch/x86_64/kernel/early-quirks.c | 13 +++++++++++++
 arch/x86_64/kernel/smpboot.c      |  8 ++++++++
 2 files changed, 21 insertions(+)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
index fb0c6da41b7e..829698f6d049 100644
--- a/arch/x86_64/kernel/early-quirks.c
+++ b/arch/x86_64/kernel/early-quirks.c
@@ -71,6 +71,18 @@ static void ati_bugs(void)
 {
 }
 
+static void intel_bugs(void)
+{
+	u16 device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID);
+
+#ifdef CONFIG_SMP
+	if (device == PCI_DEVICE_ID_INTEL_E7320_MCH ||
+	    device == PCI_DEVICE_ID_INTEL_E7520_MCH ||
+	    device == PCI_DEVICE_ID_INTEL_E7525_MCH)
+		quirk_intel_irqbalance();
+#endif
+}
+
 struct chipset {
 	u16 vendor;
 	void (*f)(void);
@@ -80,6 +92,7 @@ static struct chipset early_qrk[] = {
 	{ PCI_VENDOR_ID_NVIDIA, nvidia_bugs },
 	{ PCI_VENDOR_ID_VIA, via_bugs },
 	{ PCI_VENDOR_ID_ATI, ati_bugs },
+	{ PCI_VENDOR_ID_INTEL, intel_bugs},
 	{}
 };
 
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 62c2e747af58..4c161c208d5b 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -60,6 +60,7 @@
 #include <asm/irq.h>
 #include <asm/hw_irq.h>
 #include <asm/numa.h>
+#include <asm/genapic.h>
 
 /* Number of siblings per CPU package */
 int smp_num_siblings = 1;
@@ -1167,6 +1168,13 @@ int __cpuinit __cpu_up(unsigned int cpu)
 
 	while (!cpu_isset(cpu, cpu_online_map))
 		cpu_relax();
+
+	if (num_online_cpus() > 8 && genapic == &apic_flat) {
+		printk(KERN_WARNING
+		       "flat APIC routing can't be used with > 8 cpus\n");
+		BUG();
+	}
+
 	err = 0;
 
 	return err;
-- 
cgit v1.2.2


From ee58fad51a2a767cb2567706ace967705233d881 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@hpl.hp.com>
Date: Thu, 7 Dec 2006 02:14:11 +0100
Subject: [PATCH] x86-64: x86-64 add Intel BTS cpufeature bit and detection
 (take 2)

Here is a small patch for x86-64 which adds a cpufeature flag and
detection code for Intel's Branch Trace Store (BTS) feature. This
feature can be found on Intel P4 and Core 2 processors among others.
It can also be used by perfmon.

changelog:
	- add CPU_FEATURE_BTS
	- add Branch Trace Store detection

signed-off-by: stephane eranian <eranian@hpl.hp.com>

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/setup.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 619af2e2fa26..a570c81c8316 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -838,6 +838,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 	if (cpu_has_ds) {
 		unsigned int l1, l2;
 		rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
+		if (!(l1 & (1<<11)))
+			set_bit(X86_FEATURE_BTS, c->x86_capability);
 		if (!(l1 & (1<<12)))
 			set_bit(X86_FEATURE_PEBS, c->x86_capability);
 	}
-- 
cgit v1.2.2


From f990fff427d68af3e4e1d16fe799c106abc0bf53 Mon Sep 17 00:00:00 2001
From: Karsten Wiese <fzu@wemgehoertderstaat.de>
Date: Thu, 7 Dec 2006 02:14:11 +0100
Subject: [PATCH] x86: Regard MSRs in lapic_suspend()/lapic_resume()

Read/Write APIC_LVTPC and APIC_LVTTHMR only,
if get_maxlvt() returns certain values.
This is done like everywhere else in i386/kernel/apic.c,
so I guess its correct.
Suspends/Resumes to disk fine and eleminates an smp_error_interrupt()
here on a K8.

AK: ported to x86-64 too

Signed-off-by: Karsten Wiese <fzu@wemgehoertderstaat.de>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/apic.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 5c468971e645..f0b00d8731cb 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -459,23 +459,30 @@ static struct {
 static int lapic_suspend(struct sys_device *dev, pm_message_t state)
 {
 	unsigned long flags;
+	int maxlvt;
 
 	if (!apic_pm_state.active)
 		return 0;
 
+	maxlvt = get_maxlvt();
+
 	apic_pm_state.apic_id = apic_read(APIC_ID);
 	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
 	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
 	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
 	apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
 	apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
-	apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
+	if (maxlvt >= 4)
+		apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
 	apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
 	apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
 	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
 	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
 	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
-	apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
+#ifdef CONFIG_X86_MCE_INTEL
+	if (maxlvt >= 5)
+		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
+#endif
 	local_irq_save(flags);
 	disable_local_APIC();
 	local_irq_restore(flags);
@@ -486,10 +493,13 @@ static int lapic_resume(struct sys_device *dev)
 {
 	unsigned int l, h;
 	unsigned long flags;
+	int maxlvt;
 
 	if (!apic_pm_state.active)
 		return 0;
 
+	maxlvt = get_maxlvt();
+
 	local_irq_save(flags);
 	rdmsr(MSR_IA32_APICBASE, l, h);
 	l &= ~MSR_IA32_APICBASE_BASE;
@@ -503,8 +513,12 @@ static int lapic_resume(struct sys_device *dev)
 	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
 	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
 	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
-	apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
-	apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
+#ifdef CONFIG_X86_MCE_INTEL
+	if (maxlvt >= 5)
+		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
+#endif
+	if (maxlvt >= 4)
+		apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
 	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
 	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
 	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
-- 
cgit v1.2.2


From 86bd58bf4c383fde4e99b83ce917b091a072040d Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@cs.washington.edu>
Date: Thu, 7 Dec 2006 02:14:11 +0100
Subject: [PATCH] x86-64: Remove unused GET_APIC_VERSION call from
 clear_local_APIC

Remove unused GET_APIC_VERSION call from clear_local_APIC() and
__setup_APIC_LVTT().

Reported by D Binderman <dcb314@hotmail.com>.

Cc: Andi Kleen <ak@suse.de>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: David Rientjes <rientjes@cs.washington.edu>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/apic.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index f0b00d8731cb..124b2d27b4ac 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -140,7 +140,6 @@ void clear_local_APIC(void)
 		apic_write(APIC_LVTERR, APIC_LVT_MASKED);
 	if (maxlvt >= 4)
 		apic_write(APIC_LVTPC, APIC_LVT_MASKED);
-	v = GET_APIC_VERSION(apic_read(APIC_LVR));
 	apic_write(APIC_ESR, 0);
 	apic_read(APIC_ESR);
 }
@@ -736,10 +735,9 @@ void __init init_apic_mappings(void)
 
 static void __setup_APIC_LVTT(unsigned int clocks)
 {
-	unsigned int lvtt_value, tmp_value, ver;
+	unsigned int lvtt_value, tmp_value;
 	int cpu = smp_processor_id();
 
-	ver = GET_APIC_VERSION(apic_read(APIC_LVR));
 	lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
 
 	if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask))
-- 
cgit v1.2.2


From 0741f4d207a644482d7a040f05cd264c98cf7ee8 Mon Sep 17 00:00:00 2001
From: Chuck Ebbert <76306.1226@compuserve.com>
Date: Thu, 7 Dec 2006 02:14:11 +0100
Subject: [PATCH] x86: add sysctl for kstack_depth_to_print

Add sysctl for kstack_depth_to_print. This lets users change
the amount of raw stack data printed in dump_stack() without
having to reboot.

Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/traps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 264db33476ab..75ceccee178c 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -108,7 +108,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
 	preempt_enable_no_resched();
 }
 
-static int kstack_depth_to_print = 12;
+int kstack_depth_to_print = 12;
 #ifdef CONFIG_STACK_UNWIND
 static int call_trace = 1;
 #else
-- 
cgit v1.2.2


From 3df0af0eb064a16bbdbe81b46bc72a4089f88d54 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai.lu@amd.com>
Date: Thu, 7 Dec 2006 02:14:12 +0100
Subject: [PATCH] x86_64: clear_bss before set_intr_gate with early_idt_handler

idt_table is in the .bss section, so clear_bss need to called at first

Signed-off-by: Yinghai Lu <yinghai.lu@amd.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/head64.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index 9561eb3c5b5c..cc230b93cd1c 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -57,10 +57,12 @@ void __init x86_64_start_kernel(char * real_mode_data)
 {
 	int i;
 
-	for (i = 0; i < 256; i++)
+	/* clear bss before set_intr_gate with early_idt_handler */
+	clear_bss();
+
+	for (i = 0; i < IDT_ENTRIES; i++)
 		set_intr_gate(i, early_idt_handler);
 	asm volatile("lidt %0" :: "m" (idt_descr));
-	clear_bss();
 
 	early_printk("Kernel alive\n");
 
-- 
cgit v1.2.2


From 8fb6e5f5db860113e71ce7b854382ed40559395b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@amd.com>
Date: Thu, 7 Dec 2006 02:14:12 +0100
Subject: [PATCH] x86_64: interrupt array size should be aligned to NR_VECTORS

interrupt array is referred for idt vectors instead of NR_IRQS, so change size
to NR_VECTORS - FIRST_EXTERNAL_VECTOR. Also change to static.

Signed-off-by: Yinghai Lu <yinghai@amd.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/i8259.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index c4ef801b765b..d73c79e821f1 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -76,7 +76,8 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
 	IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
 	IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
 
-void (*interrupt[NR_IRQS])(void) = {
+/* for the irq vectors */
+static void (*interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
 					  IRQLIST_16(0x2), IRQLIST_16(0x3),
 	IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
 	IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
-- 
cgit v1.2.2


From 026c66bdda5f07959da7d74d29b18a7c480242f7 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Thu, 7 Dec 2006 02:14:12 +0100
Subject: [PATCH] x86-64: remove duplicate ARCH_DISCONTIGMEM_ENABLE option

One ARCH_DISCONTIGMEM_ENABLE option is enough.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/Kconfig | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 6eece27d579c..bfbb9bcae123 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -359,11 +359,6 @@ config ARCH_DISCONTIGMEM_ENABLE
        depends on NUMA
        default y
 
-
-config ARCH_DISCONTIGMEM_ENABLE
-	def_bool y
-	depends on NUMA
-
 config ARCH_DISCONTIGMEM_DEFAULT
 	def_bool y
 	depends on NUMA
-- 
cgit v1.2.2


From d4c45718b3c0d3045eab803cd15fabbed1ea5bcd Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 7 Dec 2006 02:14:12 +0100
Subject: [PATCH] x86-64: Fix kobject_init() WARN_ON on resume from disk

Make mce_remove_device() clean up the kobject in per_cpu(device_mce, cpu)
after it has been unregistered.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/mce.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index bbea88801d88..a7440cb9c14f 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -652,6 +652,7 @@ static void mce_remove_device(unsigned int cpu)
 	sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant);
 	sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval);
 	sysdev_unregister(&per_cpu(device_mce,cpu));
+	memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
 }
 
 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
-- 
cgit v1.2.2


From 73ad8355d7db6a3cdcf313d4a4586a8f81b19c2f Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai.lu@amd.com>
Date: Thu, 7 Dec 2006 02:14:12 +0100
Subject: [PATCH] x86-64: remove unused acpi_found_madt in mparse.

remove unused acpi_found_madt in mparse.c

Signed-off-by: Yinghai Lu <yinghai.lu@amd.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/mpparse.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index b147ab19fbd4..08072568847d 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -35,8 +35,6 @@
 int smp_found_config;
 unsigned int __initdata maxcpus = NR_CPUS;
 
-int acpi_found_madt;
-
 /*
  * Various Linux-internal data structures created from the
  * MP-table.
-- 
cgit v1.2.2


From 616779656989cb8c59177e35cb13e87028b1edc8 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:12 +0100
Subject: [PATCH] x86-64: Synchronize RDTSC on single core AMD

There is no guarantee that two RDTSCs in a row are monotonic,
so don't assume it on single core AMD systems.
This will make gettimeofday slower again
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/setup.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index a570c81c8316..05eaca41802b 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -732,11 +732,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	/* Fix cpuid4 emulation for more */
 	num_cache_leaves = 3;
 
-	/* When there is only one core no need to synchronize RDTSC */
-	if (num_possible_cpus() == 1)
-	        set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
-	else
-	        clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+	/* RDTSC can be speculated around */
+	clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
 }
 
 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
-- 
cgit v1.2.2


From e496a0da7f8110054d0ad4039245b0f49f9540f5 Mon Sep 17 00:00:00 2001
From: Muli Ben-Yehuda <muli@il.ibm.com>
Date: Thu, 7 Dec 2006 02:14:12 +0100
Subject: [PATCH] Calgary: remove unused variables

Spotted by d binderman <dcb314@hotmail.com>.

Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/pci-calgary.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index 0ddf29dae7e0..3215675ab128 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -653,14 +653,9 @@ static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev)
 static void __init calgary_reserve_regions(struct pci_dev *dev)
 {
 	unsigned int npages;
-	void __iomem *bbar;
-	unsigned char busnum;
 	u64 start;
 	struct iommu_table *tbl = dev->sysdata;
 
-	bbar = tbl->bbar;
-	busnum = dev->bus->number;
-
 	/* reserve bad_dma_address in case it's a legal address */
 	iommu_range_reserve(tbl, bad_dma_address, 1);
 
-- 
cgit v1.2.2


From f3d73707a1e84f0687a05144b70b660441e999c7 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Thu, 7 Dec 2006 02:14:12 +0100
Subject: [PATCH] x86-64: Mark rdtsc as sync only for netburst, not for core2

On the Core2 cpus, the rdtsc instruction is not serializing (as defined
in the architecture reference since rdtsc exists) and due to the deep
speculation of these cores, it's possible that you can observe time go
backwards between cores due to this speculation. Since the kernel
already deals with this with the SYNC_RDTSC flag, the solution is
simple, only assume that the instruction is serializing on family 15...

The price one pays for this is a slightly slower gettimeofday (by a
dozen or two cycles), but that increase is quite small to pay for a
really-going-forward tsc counter.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/setup.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 05eaca41802b..6595a4ebe7f1 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -860,7 +860,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 		set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
 	if (c->x86 == 6)
 		set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
-	set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+	if (c->x86 == 15)
+		set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+	else
+		clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
  	c->x86_max_cores = intel_num_cpu_cores(c);
 
 	srat_detect_node();
-- 
cgit v1.2.2


From 446f713ba1afd68568139ae4691335ba273fa7f4 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:12 +0100
Subject: [PATCH] unwinder: always use unlocked module list access in unwinder
 fallback

We're already well protected against module unloads because module
unload uses stop_machine(). The only exception is NMIs, but other
users already risk lockless accesses here.

This avoids some hackery in lockdep and also a potential deadlock

This matches what i386 does.

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/traps.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 75ceccee178c..9864d195c408 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -317,9 +317,9 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 #define HANDLE_STACK(cond) \
 	do while (cond) { \
 		unsigned long addr = *stack++; \
-		if (oops_in_progress ? 		\
-			__kernel_text_address(addr) : \
-			kernel_text_address(addr)) { \
+		/* Use unlocked access here because except for NMIs	\
+		   we should be already protected against module unloads */ \
+		if (__kernel_text_address(addr)) { \
 			/* \
 			 * If the address is either in the text segment of the \
 			 * kernel, or in the region which contains vmalloc'ed \
-- 
cgit v1.2.2


From 359ad0d4015a9ab39243f2ebc4eb07915bd618b2 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Thu, 7 Dec 2006 02:14:13 +0100
Subject: [PATCH] unwinder: more sanity checks in Dwarf2 unwinder

Tighten the requirements on both input to and output from the Dwarf2
unwinder.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/traps.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 9864d195c408..4fdd162f0bef 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -225,12 +225,19 @@ static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
 {
 	struct ops_and_data *oad = (struct ops_and_data *)context;
 	int n = 0;
+	unsigned long sp = UNW_SP(info);
 
+	if (arch_unw_user_mode(info))
+		return -1;
 	while (unwind(info) == 0 && UNW_PC(info)) {
 		n++;
 		oad->ops->address(oad->data, UNW_PC(info));
 		if (arch_unw_user_mode(info))
 			break;
+		if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1))
+		    && sp > UNW_SP(info))
+			break;
+		sp = UNW_SP(info);
 	}
 	return n;
 }
-- 
cgit v1.2.2


From a0429d0d7a6116dedcb71d9128da904bf135f189 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:13 +0100
Subject: [PATCH] x86-64: Remove unwind stack pointer alignment forcing again

This was added as a workaround for the fallback unwinder not supporting
unaligned stack pointers properly. But now it was fixed to do that,
so it's not needed anymore

Cc: mingo@elte.hu
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/traps.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 4fdd162f0bef..a1641ffdffcf 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -309,12 +309,6 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 		if (tsk && tsk != current)
 			stack = (unsigned long *)tsk->thread.rsp;
 	}
-	/*
-	 * Align the stack pointer on word boundary, later loops
-	 * rely on that (and corruption / debug info bugs can cause
-	 * unaligned values here):
-	 */
-	stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1));
 
 	/*
 	 * Print function call entries within a stack. 'cond' is the
-- 
cgit v1.2.2


From d331e739f5ad2aaa9d8553891ba6ca823bdbce37 Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Thu, 7 Dec 2006 02:14:13 +0100
Subject: [PATCH] x86-64: Fix interrupt race in idle callback (3rd try)

Idle callbacks has some races when enter_idle() sets isidle and subsequent
interrupts that can happen on that CPU, before CPU goes to idle. Due to this,
an IDLE_END can get called before IDLE_START. To avoid these races, disable
interrupts before enter_idle and make sure that all idle routines do not
enable interrupts before entering idle.

Note that poll_idle() still has a this race as it has to enable interrupts
before going to idle. But, all other idle routines have the race fixed.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/process.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 0b7b4caa4f74..a418ee4c8c62 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -127,6 +127,7 @@ static void default_idle(void)
  */
 static void poll_idle (void)
 {
+	local_irq_enable();
 	cpu_relax();
 }
 
@@ -208,6 +209,12 @@ void cpu_idle (void)
 				idle = default_idle;
 			if (cpu_is_offline(smp_processor_id()))
 				play_dead();
+			/*
+			 * Idle routines should keep interrupts disabled
+			 * from here on, until they go to idle.
+			 * Otherwise, idle callbacks can misfire.
+			 */
+			local_irq_disable();
 			enter_idle();
 			idle();
 			/* In many cases the interrupt that ended idle
@@ -245,8 +252,16 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
 /* Default MONITOR/MWAIT with no hints, used for default C1 state */
 static void mwait_idle(void)
 {
-	local_irq_enable();
-	mwait_idle_with_hints(0,0);
+	if (!need_resched()) {
+		__monitor((void *)&current_thread_info()->flags, 0, 0);
+		smp_mb();
+		if (!need_resched())
+			__sti_mwait(0, 0);
+		else
+			local_irq_enable();
+	} else {
+		local_irq_enable();
+	}
 }
 
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
-- 
cgit v1.2.2


From 3807fd46e94ab9f09e5ee3bff5e6515a94e9b3c7 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:13 +0100
Subject: [PATCH] x86-64: Clarify error message in GART code

- Remove "Disabling IOMMU" message because it confuses people
- Clarify that the GART IOMMU is refered to in other message

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/pci-gart.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 16261a8a3303..fc1960f1f243 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -601,10 +601,9 @@ void __init gart_iommu_init(void)
 	    (!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
 	    !iommu_aperture ||
 	    (no_agp && init_k8_gatt(&info) < 0)) {
-		printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
 		if (end_pfn > MAX_DMA32_PFN) {
 			printk(KERN_ERR "WARNING more than 4GB of memory "
-					"but IOMMU not available.\n"
+					"but GART IOMMU not available.\n"
 			       KERN_ERR "WARNING 32bit PCI may malfunction.\n");
 		}
 		return;
-- 
cgit v1.2.2


From ad892f5e0d01f3c3b475a688d1ddc211cf3ea56d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai.lu@amd.com>
Date: Thu, 7 Dec 2006 02:14:19 +0100
Subject: [PATCH] x86-64: check vector in setup_ioapic_dest to verify if need
 setup_IO_APIC_irq

setup_IO_APIC_irqs could fail to get vector for some device when you have too
many devices, because at that time only boot cpu is online.  So check vector
for irq in setup_ioapic_dest and call setup_IO_APIC_irq to make sure IO-APIC
irq-routing table is initialized.

Also seperate setup_IO_APIC_irq from setup_IO_APIC_irqs.

Signed-off-by: Yinghai Lu <yinghai.lu@amd.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@suse.de>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
 arch/x86_64/kernel/io_apic.c | 104 +++++++++++++++++++++++++------------------
 1 file changed, 61 insertions(+), 43 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index eaf0b708e67d..2a1dcd5f69c2 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -789,27 +789,65 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 					      handle_edge_irq, "edge");
 	}
 }
-
-static void __init setup_IO_APIC_irqs(void)
+static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq)
 {
 	struct IO_APIC_route_entry entry;
-	int apic, pin, idx, irq, first_notcon = 1, vector;
+	int vector;
 	unsigned long flags;
 
-	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
-	for (apic = 0; apic < nr_ioapics; apic++) {
-	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+	/*
+	 * add it to the IO-APIC irq-routing table:
+	 */
+	memset(&entry,0,sizeof(entry));
 
-		/*
-		 * add it to the IO-APIC irq-routing table:
-		 */
-		memset(&entry,0,sizeof(entry));
+	entry.delivery_mode = INT_DELIVERY_MODE;
+	entry.dest_mode = INT_DEST_MODE;
+	entry.mask = 0;				/* enable IRQ */
+	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+
+	entry.trigger = irq_trigger(idx);
+	entry.polarity = irq_polarity(idx);
 
-		entry.delivery_mode = INT_DELIVERY_MODE;
-		entry.dest_mode = INT_DEST_MODE;
-		entry.mask = 0;				/* enable IRQ */
+	if (irq_trigger(idx)) {
+		entry.trigger = 1;
+		entry.mask = 1;
 		entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+	}
+
+	if (!apic && !IO_APIC_IRQ(irq))
+		return;
+
+	if (IO_APIC_IRQ(irq)) {
+		cpumask_t mask;
+		vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
+		if (vector < 0)
+			return;
+
+		entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
+		entry.vector = vector;
+
+		ioapic_register_intr(irq, vector, IOAPIC_AUTO);
+		if (!apic && (irq < 16))
+			disable_8259A_irq(irq);
+	}
+
+	ioapic_write_entry(apic, pin, entry);
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	set_native_irq_info(irq, TARGET_CPUS);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+}
+
+static void __init setup_IO_APIC_irqs(void)
+{
+	int apic, pin, idx, irq, first_notcon = 1;
+
+	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+
+	for (apic = 0; apic < nr_ioapics; apic++) {
+	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 
 		idx = find_irq_entry(apic,pin,mp_INT);
 		if (idx == -1) {
@@ -821,39 +859,11 @@ static void __init setup_IO_APIC_irqs(void)
 			continue;
 		}
 
-		entry.trigger = irq_trigger(idx);
-		entry.polarity = irq_polarity(idx);
-
-		if (irq_trigger(idx)) {
-			entry.trigger = 1;
-			entry.mask = 1;
-			entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
-		}
-
 		irq = pin_2_irq(idx, apic, pin);
 		add_pin_to_irq(irq, apic, pin);
 
-		if (!apic && !IO_APIC_IRQ(irq))
-			continue;
+		setup_IO_APIC_irq(apic, pin, idx, irq);
 
-		if (IO_APIC_IRQ(irq)) {
-			cpumask_t mask;
-			vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
-			if (vector < 0)
-				continue;
-
-			entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
-			entry.vector = vector;
-
-			ioapic_register_intr(irq, vector, IOAPIC_AUTO);
-			if (!apic && (irq < 16))
-				disable_8259A_irq(irq);
-		}
-		ioapic_write_entry(apic, pin, entry);
-
-		spin_lock_irqsave(&ioapic_lock, flags);
-		set_native_irq_info(irq, TARGET_CPUS);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
 	}
 	}
 
@@ -2139,7 +2149,15 @@ void __init setup_ioapic_dest(void)
 			if (irq_entry == -1)
 				continue;
 			irq = pin_2_irq(irq_entry, ioapic, pin);
-			set_ioapic_affinity_irq(irq, TARGET_CPUS);
+
+			/* setup_IO_APIC_irqs could fail to get vector for some device
+			 * when you have too many devices, because at that time only boot
+			 * cpu is online.
+			 */
+			if(!irq_vector[irq])
+				setup_IO_APIC_irq(ioapic, pin, irq_entry, irq);
+			else
+				set_ioapic_affinity_irq(irq, TARGET_CPUS);
 		}
 
 	}
-- 
cgit v1.2.2


From b65780e123ba9b762276482bbfb52836e4d41fd9 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Thu, 7 Dec 2006 02:14:19 +0100
Subject: [PATCH] unwinder: move .eh_frame to RODATA

The .eh_frame section contents is never written to, so it can as well
benefit from CONFIG_DEBUG_RODATA.

Diff-ed against firstfloor tree.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/vmlinux.lds.S | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index d9534e750d4f..6a1f8f491e5d 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -51,15 +51,6 @@ SECTIONS
 
   RODATA
 
-#ifdef CONFIG_STACK_UNWIND
-  . = ALIGN(8);
-  .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
-	__start_unwind = .;
-  	*(.eh_frame)
-	__end_unwind = .;
-  }
-#endif
-
   . = ALIGN(PAGE_SIZE);        /* Align data segment to page size boundary */
 				/* Data */
   .data : AT(ADDR(.data) - LOAD_OFFSET) {
-- 
cgit v1.2.2


From 64a26a731235b59c9d73bbe82c1f896d57400d37 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:19 +0100
Subject: [PATCH] x86-64: Export smp_call_function_single

smp_call_function() is exported, makes sense to export this one too.

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/smp.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index 9f74c883568c..1bf0e094f439 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -385,6 +385,7 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
 	put_cpu();
 	return 0;
 }
+EXPORT_SYMBOL(smp_call_function_single);
 
 /*
  * this function sends a 'generic call function' IPI to all other CPUs
-- 
cgit v1.2.2