From 97f7f8189fe54e3cfe324ef9ad35064f3d2d3bff Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Mon, 10 Oct 2011 16:21:10 +0200
Subject: oprofile, x86: Fix crash when unloading module (nmi timer mode)

If oprofile uses the nmi timer interrupt there is a crash while
unloading the module. The bug can be triggered with oprofile build as
module and kernel parameter nolapic set. This patch fixes this.

oprofile: using NMI timer interrupt.
BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
IP: [<ffffffff8123c226>] unregister_syscore_ops+0x41/0x58
PGD 42dbca067 PUD 41da6a067 PMD 0
Oops: 0002 [#1] PREEMPT SMP
CPU 5
Modules linked in: oprofile(-) [last unloaded: oprofile]

Pid: 2518, comm: modprobe Not tainted 3.1.0-rc7-00019-gb2fb49d #19 Advanced Micro Device Anaheim/Anaheim
RIP: 0010:[<ffffffff8123c226>]  [<ffffffff8123c226>] unregister_syscore_ops+0x41/0x58
RSP: 0018:ffff88041ef71e98  EFLAGS: 00010296
RAX: 0000000000000000 RBX: ffffffffa0017100 RCX: dead000000200200
RDX: 0000000000000000 RSI: dead000000100100 RDI: ffffffff8178c620
RBP: ffff88041ef71ea8 R08: 0000000000000001 R09: 0000000000000082
R10: 0000000000000000 R11: ffff88041ef71de8 R12: 0000000000000080
R13: fffffffffffffff5 R14: 0000000000000001 R15: 0000000000610210
FS:  00007fc902f20700(0000) GS:ffff88042fd40000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000000000008 CR3: 000000041cdb6000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process modprobe (pid: 2518, threadinfo ffff88041ef70000, task ffff88041d348040)
Stack:
 ffff88041ef71eb8 ffffffffa0017790 ffff88041ef71eb8 ffffffffa0013532
 ffff88041ef71ec8 ffffffffa00132d6 ffff88041ef71ed8 ffffffffa00159b2
 ffff88041ef71f78 ffffffff81073115 656c69666f72706f 0000000000610200
Call Trace:
 [<ffffffffa0013532>] op_nmi_exit+0x15/0x17 [oprofile]
 [<ffffffffa00132d6>] oprofile_arch_exit+0xe/0x10 [oprofile]
 [<ffffffffa00159b2>] oprofile_exit+0x1e/0x20 [oprofile]
 [<ffffffff81073115>] sys_delete_module+0x1c3/0x22f
 [<ffffffff811bf09e>] ? trace_hardirqs_on_thunk+0x3a/0x3f
 [<ffffffff8148070b>] system_call_fastpath+0x16/0x1b
Code: 20 c6 78 81 e8 c5 cc 23 00 48 8b 13 48 8b 43 08 48 be 00 01 10 00 00 00 ad de 48 b9 00 02 20 00 00 00 ad de 48 c7 c7 20 c6 78 81
 89 42 08 48 89 10 48 89 33 48 89 4b 08 e8 a6 c0 23 00 5a 5b
RIP  [<ffffffff8123c226>] unregister_syscore_ops+0x41/0x58
 RSP <ffff88041ef71e98>
CR2: 0000000000000008
---[ end trace 43a541a52956b7b0 ]---

CC: stable@kernel.org # 2.6.37+
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/init.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch/x86')
diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c
index cdfe4c54deca..f148cf652678 100644
--- a/arch/x86/oprofile/init.c
+++ b/arch/x86/oprofile/init.c
@@ -21,6 +21,7 @@ extern int op_nmi_timer_init(struct oprofile_operations *ops);
 extern void op_nmi_exit(void);
 extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
 
+static int nmi_timer;
 
 int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
@@ -31,8 +32,9 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 #ifdef CONFIG_X86_LOCAL_APIC
 	ret = op_nmi_init(ops);
 #endif
+	nmi_timer = (ret != 0);
 #ifdef CONFIG_X86_IO_APIC
-	if (ret < 0)
+	if (nmi_timer)
 		ret = op_nmi_timer_init(ops);
 #endif
 	ops->backtrace = x86_backtrace;
@@ -44,6 +46,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 void oprofile_arch_exit(void)
 {
 #ifdef CONFIG_X86_LOCAL_APIC
-	op_nmi_exit();
+	if (!nmi_timer)
+		op_nmi_exit();
 #endif
 }
-- 
cgit v1.2.2


From 57d1c0c03c6b48b2b96870d831b9ce6b917f53ac Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 7 Oct 2011 13:36:40 +0200
Subject: perf/x86: Fix PEBS instruction unwind

Masami spotted that we always try to decode the instruction stream as
64bit instructions when running a 64bit kernel, this doesn't work for
ia32-compat proglets.

Use TIF_IA32 to detect if we need to use the 32bit instruction
decoder.

Reported-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: stable@kernel.org
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index c0d238f49db8..73da6b64f5b7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -493,6 +493,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
 	unsigned long from = cpuc->lbr_entries[0].from;
 	unsigned long old_to, to = cpuc->lbr_entries[0].to;
 	unsigned long ip = regs->ip;
+	int is_64bit = 0;
 
 	/*
 	 * We don't need to fixup if the PEBS assist is fault like
@@ -544,7 +545,10 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
 		} else
 			kaddr = (void *)to;
 
-		kernel_insn_init(&insn, kaddr);
+#ifdef CONFIG_X86_64
+		is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
+#endif
+		insn_init(&insn, kaddr, is_64bit);
 		insn_get_length(&insn);
 		to += insn.length;
 	} while (to < ip);
-- 
cgit v1.2.2


From aa2bc1ade59003a379ffc485d6da2d92ea3370a6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 9 Nov 2011 17:56:37 +0100
Subject: perf: Don't use -ENOSPC for out of PMU resources

People (Linus) objected to using -ENOSPC to signal not having enough
resources on the PMU to satisfy the request. Use -EINVAL.

Requested-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Daney <david.daney@cavium.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-xv8geaz2zpbjhlx0svmpp28n@git.kernel.org
[ merged to newer kernel, fixed up MIPS impact ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c    | 10 +++++-----
 arch/x86/kernel/cpu/perf_event_p4.c |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 640891014b2a..ff0e8d498750 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -588,7 +588,7 @@ done:
 				x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
 		}
 	}
-	return num ? -ENOSPC : 0;
+	return num ? -EINVAL : 0;
 }
 
 /*
@@ -607,7 +607,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
 
 	if (is_x86_event(leader)) {
 		if (n >= max_count)
-			return -ENOSPC;
+			return -EINVAL;
 		cpuc->event_list[n] = leader;
 		n++;
 	}
@@ -620,7 +620,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
 			continue;
 
 		if (n >= max_count)
-			return -ENOSPC;
+			return -EINVAL;
 
 		cpuc->event_list[n] = event;
 		n++;
@@ -1316,7 +1316,7 @@ static int validate_event(struct perf_event *event)
 	c = x86_pmu.get_event_constraints(fake_cpuc, event);
 
 	if (!c || !c->weight)
-		ret = -ENOSPC;
+		ret = -EINVAL;
 
 	if (x86_pmu.put_event_constraints)
 		x86_pmu.put_event_constraints(fake_cpuc, event);
@@ -1341,7 +1341,7 @@ static int validate_group(struct perf_event *event)
 {
 	struct perf_event *leader = event->group_leader;
 	struct cpu_hw_events *fake_cpuc;
-	int ret = -ENOSPC, n;
+	int ret = -EINVAL, n;
 
 	fake_cpuc = allocate_fake_cpuc();
 	if (IS_ERR(fake_cpuc))
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 492bf1358a7c..ef484d9d0a25 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -1268,7 +1268,7 @@ reserve:
 	}
 
 done:
-	return num ? -ENOSPC : 0;
+	return num ? -EINVAL : 0;
 }
 
 static __initconst const struct x86_pmu p4_pmu = {
-- 
cgit v1.2.2


From ed13ec58bfe0d5dc95f748e6118432cb0fa283cb Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 14 Nov 2011 10:03:25 +0100
Subject: perf/x86: Enable raw event access to Intel offcore events

Now that the core offcore support is fixed up (thanks Stephane) and we
have sane generic events utilizing them, re-enable the raw access to
the feature as well.

Note that it doesn't matter if you use event 0x1b7 or 0x1bb to specify
an offcore event, either one works and neither guarantees you'll end
up on a particular offcore MSR.

Based on original patch from: Vince Weaver <vweaver1@eecs.utk.edu>.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Vince Weaver <vweaver1@eecs.utk.edu>.
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/alpine.DEB.2.00.1108031200390.703@cl320.eecs.utk.edu
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ff0e8d498750..2bda212a0010 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -312,12 +312,8 @@ int x86_setup_perfctr(struct perf_event *event)
 			return -EOPNOTSUPP;
 	}
 
-	/*
-	 * Do not allow config1 (extended registers) to propagate,
-	 * there's no sane user-space generalization yet:
-	 */
 	if (attr->type == PERF_TYPE_RAW)
-		return 0;
+		return x86_pmu_extra_regs(event->attr.config, event);
 
 	if (attr->type == PERF_TYPE_HW_CACHE)
 		return set_ext_hw_attr(hwc, event);
-- 
cgit v1.2.2


From 4cecf6d401a01d054afc1e5f605bcbfe553cb9b9 Mon Sep 17 00:00:00 2001
From: Salman Qazi <sqazi@google.com>
Date: Tue, 15 Nov 2011 14:12:06 -0800
Subject: sched, x86: Avoid unnecessary overflow in sched_clock

(Added the missing signed-off-by line)

In hundreds of days, the __cycles_2_ns calculation in sched_clock
has an overflow.  cyc * per_cpu(cyc2ns, cpu) exceeds 64 bits, causing
the final value to become zero.  We can solve this without losing
any precision.

We can decompose TSC into quotient and remainder of division by the
scale factor, and then use this to convert TSC into nanoseconds.

Signed-off-by: Salman Qazi <sqazi@google.com>
Acked-by: John Stultz <johnstul@us.ibm.com>
Reviewed-by: Paul Turner <pjt@google.com>
Cc: stable@kernel.org
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20111115221121.7262.88871.stgit@dungbeetle.mtv.corp.google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/timer.h | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index fa7b9176b76c..431793e5d484 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -32,6 +32,22 @@ extern int no_timer_check;
  *  (mathieu.desnoyers@polymtl.ca)
  *
  *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
+ *
+ * In:
+ *
+ * ns = cycles * cyc2ns_scale / SC
+ *
+ * Although we may still have enough bits to store the value of ns,
+ * in some cases, we may not have enough bits to store cycles * cyc2ns_scale,
+ * leading to an incorrect result.
+ *
+ * To avoid this, we can decompose 'cycles' into quotient and remainder
+ * of division by SC.  Then,
+ *
+ * ns = (quot * SC + rem) * cyc2ns_scale / SC
+ *    = quot * cyc2ns_scale + (rem * cyc2ns_scale) / SC
+ *
+ *			- sqazi@google.com
  */
 
 DECLARE_PER_CPU(unsigned long, cyc2ns);
@@ -41,9 +57,14 @@ DECLARE_PER_CPU(unsigned long long, cyc2ns_offset);
 
 static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
 {
+	unsigned long long quot;
+	unsigned long long rem;
 	int cpu = smp_processor_id();
 	unsigned long long ns = per_cpu(cyc2ns_offset, cpu);
-	ns += cyc * per_cpu(cyc2ns, cpu) >> CYC2NS_SCALE_FACTOR;
+	quot = (cyc >> CYC2NS_SCALE_FACTOR);
+	rem = cyc & ((1ULL << CYC2NS_SCALE_FACTOR) - 1);
+	ns += quot * per_cpu(cyc2ns, cpu) +
+		((rem * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR);
 	return ns;
 }
 
-- 
cgit v1.2.2


From e5fd47bfab2df0c2184cc0bf4245d8e1bb7724fb Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 21 Nov 2011 18:02:02 -0500
Subject: xen/pm_idle: Make pm_idle be default_idle under Xen.

The idea behind commit d91ee5863b71 ("cpuidle: replace xen access to x86
pm_idle and default_idle") was to have one call - disable_cpuidle()
which would make pm_idle not be molested by other code.  It disallows
cpuidle_idle_call to be set to pm_idle (which is excellent).

But in the select_idle_routine() and idle_setup(), the pm_idle can still
be set to either: amd_e400_idle, mwait_idle or default_idle.  This
depends on some CPU flags (MWAIT) and in AMD case on the type of CPU.

In case of mwait_idle we can hit some instances where the hypervisor
(Amazon EC2 specifically) sets the MWAIT and we get:

  Brought up 2 CPUs
  invalid opcode: 0000 [#1] SMP

  Pid: 0, comm: swapper Not tainted 3.1.0-0.rc6.git0.3.fc16.x86_64 #1
  RIP: e030:[<ffffffff81015d1d>]  [<ffffffff81015d1d>] mwait_idle+0x6f/0xb4
  ...
  Call Trace:
   [<ffffffff8100e2ed>] cpu_idle+0xae/0xe8
   [<ffffffff8149ee78>] cpu_bringup_and_idle+0xe/0x10
  RIP  [<ffffffff81015d1d>] mwait_idle+0x6f/0xb4
   RSP <ffff8801d28ddf10>

In the case of amd_e400_idle we don't get so spectacular crashes, but we
do end up making an MSR which is trapped in the hypervisor, and then
follow it up with a yield hypercall.  Meaning we end up going to
hypervisor twice instead of just once.

The previous behavior before v3.0 was that pm_idle was set to
default_idle regardless of select_idle_routine/idle_setup.

We want to do that, but only for one specific case: Xen.  This patch
does that.

Fixes RH BZ #739499 and Ubuntu #881076
Reported-by: Stefan Bader <stefan.bader@canonical.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/system.h | 1 +
 arch/x86/kernel/process.c     | 8 ++++++++
 arch/x86/xen/setup.c          | 2 +-
 3 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index c2ff2a1d845e..2d2f01ce6dcb 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -401,6 +401,7 @@ extern unsigned long arch_align_stack(unsigned long sp);
 extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
 
 void default_idle(void);
+bool set_pm_idle_to_default(void);
 
 void stop_this_cpu(void *dummy);
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b9b3b1a51643..ee5d4fbd53b4 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -403,6 +403,14 @@ void default_idle(void)
 EXPORT_SYMBOL(default_idle);
 #endif
 
+bool set_pm_idle_to_default(void)
+{
+	bool ret = !!pm_idle;
+
+	pm_idle = default_idle;
+
+	return ret;
+}
 void stop_this_cpu(void *dummy)
 {
 	local_irq_disable();
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 38d0af4fefec..1093f80c162d 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -410,6 +410,6 @@ void __init xen_arch_setup(void)
 #endif
 	disable_cpuidle();
 	boot_option_idle_override = IDLE_HALT;
-
+	WARN_ON(set_pm_idle_to_default());
 	fiddle_vdso();
 }
-- 
cgit v1.2.2


From 8e8da023f5af71662867729db5547dc54786093c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 4 Dec 2011 11:57:09 -0800
Subject: x86: Fix boot failures on older AMD CPU's

People with old AMD chips are getting hung boots, because commit
bcb80e53877c ("x86, microcode, AMD: Add microcode revision to
/proc/cpuinfo") moved the microcode detection too early into
"early_init_amd()".

At that point we are *so* early in the booth that the exception tables
haven't even been set up yet, so the whole

	rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);

doesn't actually work: if the rdmsr does a GP fault (due to non-existant
MSR register on older CPU's), we can't fix it up yet, and the boot fails.

Fix it by simply moving the code to a slightly later point in the boot
(init_amd() instead of early_init_amd()), since the kernel itself
doesn't even really care about the microcode patchlevel at this point
(or really ever: it's made available to user space in /proc/cpuinfo, and
updated if you do a microcode load).

Reported-tested-and-bisected-by:  Larry Finger <Larry.Finger@lwfinger.net>
Tested-by: Bob Tracy <rct@gherkin.frus.com>
Acked-by: Borislav Petkov <borislav.petkov@amd.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/cpu/amd.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index c7e46cb35327..0bab2b18bb20 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -442,8 +442,6 @@ static void __cpuinit bsp_init_amd(struct cpuinfo_x86 *c)
 
 static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
 {
-	u32 dummy;
-
 	early_init_amd_mc(c);
 
 	/*
@@ -473,12 +471,12 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
 			set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
 	}
 #endif
-
-	rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
 }
 
 static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 {
+	u32 dummy;
+
 #ifdef CONFIG_SMP
 	unsigned long long value;
 
@@ -657,6 +655,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 			checking_wrmsrl(MSR_AMD64_MCx_MASK(4), mask);
 		}
 	}
+
+	rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
 }
 
 #ifdef CONFIG_X86_32
-- 
cgit v1.2.2


From 6a600a8b8749566a7d81ad75dcb8bf5342b5a39a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 15 Nov 2011 10:51:15 +0100
Subject: perf, x86: Disable PEBS on SandyBridge chips

Cc: Stephane Eranian <eranian@google.com>
Cc: stable@kernel.org
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 2be5ebe99872..8d601b18bf9f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1545,6 +1545,13 @@ static void intel_clovertown_quirks(void)
 	x86_pmu.pebs_constraints = NULL;
 }
 
+static void intel_sandybridge_quirks(void)
+{
+	printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
+	x86_pmu.pebs = 0;
+	x86_pmu.pebs_constraints = NULL;
+}
+
 __init int intel_pmu_init(void)
 {
 	union cpuid10_edx edx;
@@ -1694,6 +1701,7 @@ __init int intel_pmu_init(void)
 		break;
 
 	case 42: /* SandyBridge */
+		x86_pmu.quirks = intel_sandybridge_quirks;
 	case 45: /* SandyBridge, "Romely-EP" */
 		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
-- 
cgit v1.2.2


From 16e5294e5f8303756a179cf218e37dfb9ed34417 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Tue, 8 Nov 2011 19:20:44 +0100
Subject: perf, x86: Force IBS LVT offset assignment for family 10h

On AMD family 10h we see firmware bug messages like the following:

 [Firmware Bug]: cpu 6, try to use APIC500 (LVT offset 0) for vector 0x10400, but the register is already in use for vector 0xf9 on another cpu
 [Firmware Bug]: cpu 6, IBS interrupt offset 0 not available (MSRC001103A=0x0000000000000100)
 [Firmware Bug]: using offset 1 for IBS interrupts
 [Firmware Bug]: workaround enabled for IBS LVT offset
 perf: AMD IBS detected (0x00000007)

We always see this, since the offsets are not assigned by the BIOS for
this family. Force LVT offset assignment in this case. If the OS
assignment fails, fallback to BIOS settings and try to setup this.

The fallback to BIOS settings weakens the family check since
force_ibs_eilvt_setup() may fail e.g. in case of virtual machines.
But setup may still succeed if BIOS offsets are correct.

Other families don't have a workaround implemented that assigns LVT
offsets. It's ok, to drop calling force_ibs_eilvt_setup() for that
families.

With the patch the [Firmware Bug] messages vanish. We see now:

 IBS: LVT offset 1 assigned
 perf: AMD IBS detected (0x00000007)

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20111109162225.GO12451@erda.amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_amd_ibs.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index ab6343d21825..3b8a2d30d14e 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -199,8 +199,7 @@ static int force_ibs_eilvt_setup(void)
 		goto out;
 	}
 
-	pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset);
-	pr_err(FW_BUG "workaround enabled for IBS LVT offset\n");
+	pr_info("IBS: LVT offset %d assigned\n", offset);
 
 	return 0;
 out:
@@ -265,19 +264,23 @@ perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *h
 static __init int amd_ibs_init(void)
 {
 	u32 caps;
-	int ret;
+	int ret = -EINVAL;
 
 	caps = __get_ibs_caps();
 	if (!caps)
 		return -ENODEV;	/* ibs not supported by the cpu */
 
-	if (!ibs_eilvt_valid()) {
-		ret = force_ibs_eilvt_setup();
-		if (ret) {
-			pr_err("Failed to setup IBS, %d\n", ret);
-			return ret;
-		}
-	}
+	/*
+	 * Force LVT offset assignment for family 10h: The offsets are
+	 * not assigned by the BIOS for this family, so the OS is
+	 * responsible for doing it. If the OS assignment fails, fall
+	 * back to BIOS settings and try to setup this.
+	 */
+	if (boot_cpu_data.x86 == 0x10)
+		force_ibs_eilvt_setup();
+
+	if (!ibs_eilvt_valid())
+		goto out;
 
 	get_online_cpus();
 	ibs_caps = caps;
@@ -287,7 +290,11 @@ static __init int amd_ibs_init(void)
 	smp_call_function(setup_APIC_ibs, NULL, 1);
 	put_online_cpus();
 
-	return perf_event_ibs_init();
+	ret = perf_event_ibs_init();
+out:
+	if (ret)
+		pr_err("Failed to setup IBS, %d\n", ret);
+	return ret;
 }
 
 /* Since we need the pci subsystem to init ibs we can't do this earlier: */
-- 
cgit v1.2.2


From 69682b625a043b567873e6cda397969b502f0054 Mon Sep 17 00:00:00 2001
From: Mitsuo Hayasaka <mitsuo.hayasaka.hu@hitachi.com>
Date: Tue, 29 Nov 2011 15:08:21 +0900
Subject: x86: Add user_mode_vm check in stack_overflow_check

The kernel stack overflow is checked in stack_overflow_check(),
which may wrongly detect the overflow if the stack pointer in
user space points to the kernel stack intentionally or
accidentally. So, the actual overflow is never detected after
this misdetection because WARN_ONCE() is used on the detection
of it.

This patch adds user-mode-vm checking before it to avoid this
problem and bails out early if the user stack is used.

Signed-off-by: Mitsuo Hayasaka <mitsuo.hayasaka.hu@hitachi.com>
Cc: yrl.pp-manager.tt@hitachi.com
Cc: Randy Dunlap <rdunlap@xenotime.net>
Link: http://lkml.kernel.org/r/20111129060821.11076.55315.stgit@ltc219.sdl.hitachi.co.jp
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/kernel/irq_64.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index acf8fbf8fbda..69bca468c47a 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -38,6 +38,9 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 	u64 curbase = (u64)task_stack_page(current);
 
+	if (user_mode_vm(regs))
+		return;
+
 	WARN_ONCE(regs->sp >= curbase &&
 		  regs->sp <= curbase + THREAD_SIZE &&
 		  regs->sp <  curbase + sizeof(struct thread_info) +
-- 
cgit v1.2.2


From b495e039b4ce2ce4a96b3006004faf082f4d50e2 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 29 Nov 2011 15:00:58 -0600
Subject: x86, UV: Fix UV2 hub part number

There was a mixup when the SGI UV2 hub chip was sent to be
fabricated, and it ended up with the wrong part number in the
HRP_NODE_ID mmr. Future versions of the chip will (may) have the
correct part number. Change the UV infrastructure to recognize
both part numbers as valid IDs of a UV2 hub chip.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Link: http://lkml.kernel.org/r/20111129210058.GA20452@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uv/uv_mmrs.h  | 1 +
 arch/x86/kernel/apic/x2apic_uv_x.c | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 10474fb1185d..cf1d73643f60 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -57,6 +57,7 @@
 
 #define UV1_HUB_PART_NUMBER	0x88a5
 #define UV2_HUB_PART_NUMBER	0x8eb8
+#define UV2_HUB_PART_NUMBER_X	0x1111
 
 /* Compat: if this #define is present, UV headers support UV2 */
 #define UV2_HUB_IS_SUPPORTED	1
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 62ae3001ae02..9d59bbacd4e3 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -93,6 +93,8 @@ static int __init early_get_pnodeid(void)
 
 	if (node_id.s.part_number == UV2_HUB_PART_NUMBER)
 		uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1;
+	if (node_id.s.part_number == UV2_HUB_PART_NUMBER_X)
+		uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1;
 
 	uv_hub_info->hub_revision = uv_min_hub_revision_id;
 	pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1);
-- 
cgit v1.2.2


From 6be30bb7d7504ec687a65c9bbdae8d1d2f8eaa19 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 16 Nov 2011 00:19:51 +0100
Subject: x86/reboot: Blacklist Dell OptiPlex 990 known to require PCI reboot

Dell OptiPlex 990 is known to require PCI reboot, so add it to
the reboot blacklist in pci_reboot_dmi_table[].

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Link: http://lkml.kernel.org/r/201111160019.51303.rjw@sisk.pl
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/reboot.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index e334be1182b9..4463d2fee4e3 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -443,6 +443,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"),
 		},
 	},
+	{	/* Handle problems with rebooting on the OptiPlex 990. */
+		.callback = set_pci_reboot,
+		.ident = "Dell OptiPlex 990",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"),
+		},
+	},
 	{ }
 };
 
-- 
cgit v1.2.2


From 48bc5562103412590e4487b666b49e7b6c84ba44 Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan@linux.intel.com>
Date: Wed, 16 Nov 2011 16:07:22 +0000
Subject: x86,mrst: Power control commands update

On the Intel MID devices SCU commands are issued to manage power
off and the like. We need to issue different ones for
non-Lincroft based devices.

Signed-off-by: Alek Du <alek.du@intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/intel_scu_ipc.h | 14 +++++++++-----
 arch/x86/platform/mrst/mrst.c        | 25 ++++++++++++++-----------
 2 files changed, 23 insertions(+), 16 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h
index 4420993acc47..925b605eb5c6 100644
--- a/arch/x86/include/asm/intel_scu_ipc.h
+++ b/arch/x86/include/asm/intel_scu_ipc.h
@@ -3,11 +3,15 @@
 
 #include <linux/notifier.h>
 
-#define IPCMSG_VRTC	0xFA	 /* Set vRTC device */
-
-/* Command id associated with message IPCMSG_VRTC */
-#define IPC_CMD_VRTC_SETTIME      1 /* Set time */
-#define IPC_CMD_VRTC_SETALARM     2 /* Set alarm */
+#define IPCMSG_WARM_RESET	0xF0
+#define IPCMSG_COLD_RESET	0xF1
+#define IPCMSG_SOFT_RESET	0xF2
+#define IPCMSG_COLD_BOOT	0xF3
+
+#define IPCMSG_VRTC		0xFA	 /* Set vRTC device */
+	/* Command id associated with message IPCMSG_VRTC */
+	#define IPC_CMD_VRTC_SETTIME      1 /* Set time */
+	#define IPC_CMD_VRTC_SETALARM     2 /* Set alarm */
 
 /* Read single register */
 int intel_scu_ipc_ioread8(u16 addr, u8 *data);
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index b1489a06a49d..fedc3d7ffa77 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -76,6 +76,20 @@ struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX];
 EXPORT_SYMBOL_GPL(sfi_mrtc_array);
 int sfi_mrtc_num;
 
+static void mrst_power_off(void)
+{
+	if (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT)
+		intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 1);
+}
+
+static void mrst_reboot(void)
+{
+	if (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT)
+		intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0);
+	else
+		intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
+}
+
 /* parse all the mtimer info to a static mtimer array */
 static int __init sfi_parse_mtmr(struct sfi_table_header *table)
 {
@@ -265,17 +279,6 @@ static int mrst_i8042_detect(void)
 	return 0;
 }
 
-/* Reboot and power off are handled by the SCU on a MID device */
-static void mrst_power_off(void)
-{
-	intel_scu_ipc_simple_command(0xf1, 1);
-}
-
-static void mrst_reboot(void)
-{
-	intel_scu_ipc_simple_command(0xf1, 0);
-}
-
 /*
  * Moorestown does not have external NMI source nor port 0x61 to report
  * NMI status. The possible NMI sources are from pmu as a result of NMI
-- 
cgit v1.2.2


From 28744b3e9c85ea281d6371d4914c2498bceec10c Mon Sep 17 00:00:00 2001
From: Jekyll Lai <jekyll_lai@wistron.com>
Date: Wed, 16 Nov 2011 18:01:20 +0000
Subject: mrst: Added some platform data for the SFI translations

Add SFI glue for the following devices:

tca6416: a gpio expander compatible with max7315
mpu3050: gyro sensor

Both of these actual drivers are already upstream

Signed-off-by: Jekyll Lai <jekyll_lai@wistron.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/platform/mrst/mrst.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index fedc3d7ffa77..940a0807a5ca 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -487,6 +487,46 @@ static void __init *max7315_platform_data(void *info)
 	return max7315;
 }
 
+static void *tca6416_platform_data(void *info)
+{
+	static struct pca953x_platform_data tca6416;
+	struct i2c_board_info *i2c_info = info;
+	int gpio_base, intr;
+	char base_pin_name[SFI_NAME_LEN + 1];
+	char intr_pin_name[SFI_NAME_LEN + 1];
+
+	strcpy(i2c_info->type, "tca6416");
+	strcpy(base_pin_name, "tca6416_base");
+	strcpy(intr_pin_name, "tca6416_int");
+
+	gpio_base = get_gpio_by_name(base_pin_name);
+	intr = get_gpio_by_name(intr_pin_name);
+
+	if (gpio_base == -1)
+		return NULL;
+	tca6416.gpio_base = gpio_base;
+	if (intr != -1) {
+		i2c_info->irq = intr + MRST_IRQ_OFFSET;
+		tca6416.irq_base = gpio_base + MRST_IRQ_OFFSET;
+	} else {
+		i2c_info->irq = -1;
+		tca6416.irq_base = -1;
+	}
+	return &tca6416;
+}
+
+static void *mpu3050_platform_data(void *info)
+{
+	struct i2c_board_info *i2c_info = info;
+	int intr = get_gpio_by_name("mpu3050_int");
+
+	if (intr == -1)
+		return NULL;
+
+	i2c_info->irq = intr + MRST_IRQ_OFFSET;
+	return NULL;
+}
+
 static void __init *emc1403_platform_data(void *info)
 {
 	static short intr2nd_pdata;
@@ -652,9 +692,11 @@ static const struct devs_id __initconst device_ids[] = {
 	{"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data},
 	{"i2c_max7315", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
 	{"i2c_max7315_2", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
+	{"tca6416", SFI_DEV_TYPE_I2C, 1, &tca6416_platform_data},
 	{"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data},
 	{"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data},
 	{"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
+	{"mpu3050", SFI_DEV_TYPE_I2C, 1, &mpu3050_platform_data},
 
 	/* MSIC subdevices */
 	{"msic_battery", SFI_DEV_TYPE_IPC, 1, &msic_battery_platform_data},
-- 
cgit v1.2.2


From efa221268566c2caa0e2a540068a2e8250fbb819 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Wed, 16 Nov 2011 18:32:34 +0000
Subject: x86, mrst: Change the pmic_gpio device type to IPC

In latest firmware's SFI tables, pmic_gpio has been set to
IPC type of device, so we need handle it too.

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/platform/mrst/mrst.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index 940a0807a5ca..ad4ec1cb097e 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -689,6 +689,7 @@ static void *msic_ocd_platform_data(void *info)
 static const struct devs_id __initconst device_ids[] = {
 	{"bma023", SFI_DEV_TYPE_I2C, 1, &no_platform_data},
 	{"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data},
+	{"pmic_gpio", SFI_DEV_TYPE_IPC, 1, &pmic_gpio_platform_data},
 	{"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data},
 	{"i2c_max7315", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
 	{"i2c_max7315_2", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
-- 
cgit v1.2.2


From 9e6866686bdf2dcf3aeb0838076237ede532dcc8 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Sun, 25 Sep 2011 15:29:00 -0600
Subject: x86/mpparse: Account for bus types other than ISA and PCI

In commit f8924e770e04 ("x86: unify mp_bus_info"), the 32-bit
and 64-bit versions of MP_bus_info were rearranged to match each
other better.  Unfortunately it introduced a regression: prior
to that change we used to always set the mp_bus_not_pci bit,
then clear it if we found a PCI bus.  After it, we set
mp_bus_not_pci for ISA buses, clear it for PCI buses, and leave
it alone otherwise.

In the cases of ISA and PCI, there's not much difference.  But
ISA is not the only non-PCI bus, so it's better to always set
mp_bus_not_pci and clear it only for PCI.

Without this change, Dan's Dell PowerEdge 4200 panics on boot
with a log indicating interrupt routing trouble unless the
"noapic" option is supplied.  With this change, the machine
boots reliably without "noapic".

Fixes http://bugs.debian.org/586494

Reported-bisected-and-tested-by: Dan McGrath <troubledaemon@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: stable@vger.kernel.org	# 2.6.26+
Cc: Dan McGrath <troubledaemon@gmail.com>
Cc: Alexey Starikovskiy <aystarik@gmail.com>
[jrnieder@gmail.com: clarified commit message]
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Link: http://lkml.kernel.org/r/20111122215000.GA9151@elie.hsd1.il.comcast.net
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/mpparse.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 9103b89c145a..0741b062a304 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -95,8 +95,8 @@ static void __init MP_bus_info(struct mpc_bus *m)
 	}
 #endif
 
+	set_bit(m->busid, mp_bus_not_pci);
 	if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
-		set_bit(m->busid, mp_bus_not_pci);
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 		mp_bus_id_to_type[m->busid] = MP_BUS_ISA;
 #endif
-- 
cgit v1.2.2


From 644ddf588f5dba34df483a6ea8abe639cc102289 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Tue, 18 Oct 2011 13:24:10 -0400
Subject: Add TAINT_FIRMWARE_WORKAROUND on MTRR fixup

TAINT_FIRMWARE_WORKAROUND should be set when an MTRR fixup
is done.

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Link: http://lkml.kernel.org/r/1318958650-12447-1-git-send-email-prarit@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mtrr/generic.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index a71efcdbb092..e1fe7f42e0fb 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -547,6 +547,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
 
 		if (tmp != mask_lo) {
 			printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
+			add_taint(TAINT_FIRMWARE_WORKAROUND);
 			mask_lo = tmp;
 		}
 	}
-- 
cgit v1.2.2


From bd399063976c6c7a09beb4730ed1d93cadbcc739 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Mon, 7 Nov 2011 18:05:32 +0530
Subject: x86, microcode: Fix the failure path of microcode update driver init
 code

The microcode update driver's initialization code does not handle
failures correctly. This patch fixes this issue.

Signed-off-by: Jan Beulich <JBeulich@suse.com>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20111107123530.12164.31227.stgit@srivatsabhat.in.ibm.com
Link: http://lkml.kernel.org/r/4ED8E2270200007800065120@nat28.tlf.novell.com
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
 arch/x86/kernel/microcode_core.c | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index f2d2a664e797..9d46f5e43b51 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -256,7 +256,7 @@ static int __init microcode_dev_init(void)
 	return 0;
 }
 
-static void microcode_dev_exit(void)
+static void __exit microcode_dev_exit(void)
 {
 	misc_deregister(&microcode_dev);
 }
@@ -519,10 +519,8 @@ static int __init microcode_init(void)
 
 	microcode_pdev = platform_device_register_simple("microcode", -1,
 							 NULL, 0);
-	if (IS_ERR(microcode_pdev)) {
-		microcode_dev_exit();
+	if (IS_ERR(microcode_pdev))
 		return PTR_ERR(microcode_pdev);
-	}
 
 	get_online_cpus();
 	mutex_lock(&microcode_mutex);
@@ -532,14 +530,12 @@ static int __init microcode_init(void)
 	mutex_unlock(&microcode_mutex);
 	put_online_cpus();
 
-	if (error) {
-		platform_device_unregister(microcode_pdev);
-		return error;
-	}
+	if (error)
+		goto out_pdev;
 
 	error = microcode_dev_init();
 	if (error)
-		return error;
+		goto out_sysdev_driver;
 
 	register_syscore_ops(&mc_syscore_ops);
 	register_hotcpu_notifier(&mc_cpu_notifier);
@@ -548,6 +544,20 @@ static int __init microcode_init(void)
 		" <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n");
 
 	return 0;
+
+out_sysdev_driver:
+	get_online_cpus();
+	mutex_lock(&microcode_mutex);
+
+	sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver);
+
+	mutex_unlock(&microcode_mutex);
+	put_online_cpus();
+
+out_pdev:
+	platform_device_unregister(microcode_pdev);
+	return error;
+
 }
 module_init(microcode_init);
 
-- 
cgit v1.2.2


From ce37defc0f6673f5ca2c92ed5cfcaf290ae7dd16 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Mon, 5 Dec 2011 14:28:37 +0100
Subject: x86: Document rdmsr_safe restrictions

Recently, I got bitten by using rdmsr_safe too early in the boot
process. Document its shortcomings for future reference.

Link: http://lkml.kernel.org/r/4ED5B70F.606@lwfinger.net
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
 arch/x86/include/asm/msr.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 084ef95274cd..95203d40ffdd 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -169,7 +169,14 @@ static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high)
 	return native_write_msr_safe(msr, low, high);
 }
 
-/* rdmsr with exception handling */
+/*
+ * rdmsr with exception handling.
+ *
+ * Please note that the exception handling works only after we've
+ * switched to the "smart" #GP handler in trap_init() which knows about
+ * exception tables - using this macro earlier than that causes machine
+ * hangs on boxes which do not implement the @msr in the first argument.
+ */
 #define rdmsr_safe(msr, p1, p2)					\
 ({								\
 	int __err;						\
-- 
cgit v1.2.2


From 8dbf4a30033ff61091015f0076e872b5c8f717cc Mon Sep 17 00:00:00 2001
From: Ajaykumar Hotchandani <ajaykumar.hotchandani@oracle.com>
Date: Fri, 11 Nov 2011 18:31:57 +0530
Subject: x86/mtrr: Resolve inconsistency with Intel processor manual

Following is from Notes of section 11.5.3 of Intel processor
manual available at:

  http://www.intel.com/Assets/PDF/manual/325384.pdf

For the Pentium 4 and Intel Xeon processors, after the sequence of
steps given above has been executed, the cache lines containing the
code between the end of the WBINVD instruction and before the
MTRRS have actually been disabled may be retained in the cache
hierarchy. Here, to remove code from the cache completely, a
second WBINVD instruction must be executed after the MTRRs have
been disabled.

This patch provides resolution for that.

Ideally, I will like to make changes only for Pentium 4 and Xeon
processors. But, I am not finding easier way to do it.
And, extra wbinvd() instruction does not hurt much for other
processors.

Signed-off-by: Ajaykumar Hotchandani <ajaykumar.hotchandani@oracle.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Lucas De Marchi <lucas.demarchi@profusion.mobi>
Link: http://lkml.kernel.org/r/4EBD1CC5.3030008@oracle.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mtrr/generic.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index e1fe7f42e0fb..97b26356e9ee 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -694,6 +694,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
 
 	/* Disable MTRRs, and set the default type to uncached */
 	mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
+	wbinvd();
 }
 
 static void post_set(void) __releases(set_atomicity_lock)
-- 
cgit v1.2.2


From 1ef03890969932e9359b9a4c658f7f87771910ac Mon Sep 17 00:00:00 2001
From: Peter Chubb <peter.chubb@nicta.com.au>
Date: Mon, 5 Dec 2011 16:53:53 +0300
Subject: x86: Fix "Acer Aspire 1" reboot hang
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Looks like on some Acer Aspire 1s with older bioses, reboot via bios
fails.  It works on my machine, (with BIOS version 0.3310) but
not on some others (BIOS version 0.3309).

There's a log of problems at:

  https://bbs.archlinux.org/viewtopic.php?id=124136

This patch adds a different callback to the reboot quirk table,
to allow rebooting via keybaord controller.

Reported-by: Uroš Vampl <mobile.leecher@gmail.com>
Tested-by: Vasily Khoruzhick <anarsoul@gmail.com>
Signed-off-by: Peter Chubb <peter.chubb@nicta.com.au>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: stable@kernel.org
Link: http://lkml.kernel.org/r/1323093233-9481-1-git-send-email-anarsoul@gmail.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/reboot.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 4463d2fee4e3..37a458b521a6 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -124,7 +124,7 @@ __setup("reboot=", reboot_setup);
  */
 
 /*
- * Some machines require the "reboot=b"  commandline option,
+ * Some machines require the "reboot=b" or "reboot=k"  commandline options,
  * this quirk makes that automatic.
  */
 static int __init set_bios_reboot(const struct dmi_system_id *d)
@@ -136,6 +136,15 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)
 	return 0;
 }
 
+static int __init set_kbd_reboot(const struct dmi_system_id *d)
+{
+	if (reboot_type != BOOT_KBD) {
+		reboot_type = BOOT_KBD;
+		printk(KERN_INFO "%s series board detected. Selecting KBD-method for reboot.\n", d->ident);
+	}
+	return 0;
+}
+
 static struct dmi_system_id __initdata reboot_dmi_table[] = {
 	{	/* Handle problems with rebooting on Dell E520's */
 		.callback = set_bios_reboot,
@@ -295,7 +304,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 		},
 	},
 	{ /* Handle reboot issue on Acer Aspire one */
-		.callback = set_bios_reboot,
+		.callback = set_kbd_reboot,
 		.ident = "Acer Aspire One A110",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
-- 
cgit v1.2.2


From 2cd1c8d4dc7ecca9e9431e2dabe41ae9c7d89e51 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 15 Nov 2011 14:49:09 -0800
Subject: x86/paravirt: PTE updates in k(un)map_atomic need to be synchronous,
 regardless of lazy_mmu mode

Fix an outstanding issue that has been reported since 2.6.37.
Under a heavy loaded machine processing "fork()" calls could
crash with:

BUG: unable to handle kernel paging request at f573fc8c
IP: [<c01abc54>] swap_count_continued+0x104/0x180
*pdpt = 000000002a3b9027 *pde = 0000000001bed067 *pte = 0000000000000000 Oops: 0000 [#1] SMP
Modules linked in:
Pid: 1638, comm: apache2 Not tainted 3.0.4-linode37 #1
EIP: 0061:[<c01abc54>] EFLAGS: 00210246 CPU: 3
EIP is at swap_count_continued+0x104/0x180
.. snip..
Call Trace:
 [<c01ac222>] ? __swap_duplicate+0xc2/0x160
 [<c01040f7>] ? pte_mfn_to_pfn+0x87/0xe0
 [<c01ac2e4>] ? swap_duplicate+0x14/0x40
 [<c01a0a6b>] ? copy_pte_range+0x45b/0x500
 [<c01a0ca5>] ? copy_page_range+0x195/0x200
 [<c01328c6>] ? dup_mmap+0x1c6/0x2c0
 [<c0132cf8>] ? dup_mm+0xa8/0x130
 [<c013376a>] ? copy_process+0x98a/0xb30
 [<c013395f>] ? do_fork+0x4f/0x280
 [<c01573b3>] ? getnstimeofday+0x43/0x100
 [<c010f770>] ? sys_clone+0x30/0x40
 [<c06c048d>] ? ptregs_clone+0x15/0x48
 [<c06bfb71>] ? syscall_call+0x7/0xb

The problem is that in copy_page_range() we turn lazy mode on,
and then in swap_entry_free() we call swap_count_continued()
which ends up in:

         map = kmap_atomic(page, KM_USER0) + offset;

and then later we touch *map.

Since we are running in batched mode (lazy) we don't actually
set up the PTE mappings and the kmap_atomic is not done
synchronously and ends up trying to dereference a page that has
not been set.

Looking at kmap_atomic_prot_pfn(), it uses
'arch_flush_lazy_mmu_mode' and doing the same in
kmap_atomic_prot() and __kunmap_atomic() makes the problem go
away.

Interestingly, commit b8bcfe997e4615 ("x86/paravirt: remove lazy
mode in interrupts") removed part of this to fix an interrupt
issue - but it went to far and did not consider this scenario.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/highmem_32.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index b49962662101..f4f29b19fac5 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -45,6 +45,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 	BUG_ON(!pte_none(*(kmap_pte-idx)));
 	set_pte(kmap_pte-idx, mk_pte(page, prot));
+	arch_flush_lazy_mmu_mode();
 
 	return (void *)vaddr;
 }
@@ -88,6 +89,7 @@ void __kunmap_atomic(void *kvaddr)
 		 */
 		kpte_clear_flush(kmap_pte-idx, vaddr);
 		kmap_atomic_idx_pop();
+		arch_flush_lazy_mmu_mode();
 	}
 #ifdef CONFIG_DEBUG_HIGHMEM
 	else {
-- 
cgit v1.2.2


From 35d476996288af6a4aaa8b172bcd31decd233de7 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Tue, 15 Nov 2011 14:46:52 -0800
Subject: x86/rtc, mrst: Don't register a platform RTC device for for Intel MID
 platforms

Intel MID x86 platforms have a memory mapped virtual RTC
instead.  No MID platform have the default ports (and
accessing them may do weird stuff).

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Cc: feng.tang@intel.com
Cc: Feng Tang <feng.tang@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mrst.h | 9 +++++++++
 arch/x86/kernel/rtc.c       | 5 +++++
 2 files changed, 14 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h
index e6283129c821..93f79094c224 100644
--- a/arch/x86/include/asm/mrst.h
+++ b/arch/x86/include/asm/mrst.h
@@ -31,11 +31,20 @@ enum mrst_cpu_type {
 };
 
 extern enum mrst_cpu_type __mrst_cpu_chip;
+
+#ifdef CONFIG_X86_INTEL_MID
+
 static inline enum mrst_cpu_type mrst_identify_cpu(void)
 {
 	return __mrst_cpu_chip;
 }
 
+#else /* !CONFIG_X86_INTEL_MID */
+
+#define mrst_identify_cpu()    (0)
+
+#endif /* !CONFIG_X86_INTEL_MID */
+
 enum mrst_timer_options {
 	MRST_TIMER_DEFAULT,
 	MRST_TIMER_APBT_ONLY,
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 348ce016a835..af6db6ec5b2a 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -12,6 +12,7 @@
 #include <asm/vsyscall.h>
 #include <asm/x86_init.h>
 #include <asm/time.h>
+#include <asm/mrst.h>
 
 #ifdef CONFIG_X86_32
 /*
@@ -242,6 +243,10 @@ static __init int add_rtc_cmos(void)
 	if (of_have_populated_dt())
 		return 0;
 
+	/* Intel MID platforms don't have ioport rtc */
+	if (mrst_identify_cpu())
+		return -ENODEV;
+
 	platform_device_register(&rtc_device);
 	dev_info(&rtc_device.dev,
 		 "registered platform RTC device (no PNP device found)\n");
-- 
cgit v1.2.2


From f62ef5f3e9cff065aa845e2b7f487e1810b8e57e Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Fri, 2 Dec 2011 08:21:43 +0100
Subject: x86, amd: Fix up numa_node information for AMD CPU family 15h model
 0-0fh northbridge functions

I've received complaints that the numa_node attribute for family
15h model 00-0fh (e.g. Interlagos) northbridge functions shows
-1 instead of the proper node ID.

Correct this with attached quirks (similar to quirks for other
AMD CPU families used in multi-socket systems).

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Frank Arnold <frank.arnold@amd.com>
Cc: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/20111202072143.GA31916@alberich.amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/quirks.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index b78643d0f9a5..03920a15a632 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -553,4 +553,17 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC,
 			quirk_amd_nb_node);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_LINK,
 			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F0,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F1,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F2,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F5,
+			quirk_amd_nb_node);
+
 #endif
-- 
cgit v1.2.2


From dd13752537d36cc6c145cb040f71ce7acda31e6e Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Mon, 5 Dec 2011 23:14:39 +0000
Subject: x86/intel_mid: Fix the Kconfig for MID selection

We currently fail to build on CONFIG_X86_INTEL_MID=y and
CONFIG_X86_MRST unset.

We could build all the bits to make generic MID work if you
picked MID platform alone but that's really silly. Instead use
select and two variables.

This looks a bit daft right now but once we add a Medfield
selection it'll start to look a good deal more sensible.

Reported-by: Ingo Molnar <mingo@elte.hu>
Reported-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Link: http://lkml.kernel.org/r/20111205231433.28811.51297.stgit@bob.linux.org.uk
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cb9a1044a771..5553f9f1a70d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -390,7 +390,7 @@ config X86_INTEL_CE
 	  This option compiles in support for the CE4100 SOC for settop
 	  boxes and media devices.
 
-config X86_INTEL_MID
+config X86_WANT_INTEL_MID
 	bool "Intel MID platform support"
 	depends on X86_32
 	depends on X86_EXTENDED_PLATFORM
@@ -399,7 +399,7 @@ config X86_INTEL_MID
 	  systems which do not have the PCI legacy interfaces (Moorestown,
 	  Medfield). If you are building for a PC class system say N here.
 
-if X86_INTEL_MID
+if X86_WANT_INTEL_MID
 
 config X86_MRST
        bool "Moorestown MID platform"
@@ -411,6 +411,7 @@ config X86_MRST
 	select SPI
 	select INTEL_SCU_IPC
 	select X86_PLATFORM_DEVICES
+	select X86_INTEL_MID
 	---help---
 	  Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin
 	  Internet Device(MID) platform. Moorestown consists of two chips:
-- 
cgit v1.2.2


From 4e2b1c4f56227c742bbd2ea8e8f559567eb80c3f Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Tue, 6 Dec 2011 13:28:22 +0000
Subject: x86/intel_mid: Kconfig select fix

If we select a symbol it should have a type declared first
otherwise in some situations the config tools get upset. They
are currently perhaps a bit too resilient which is why this
wasn't noticed initially.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Link: http://lkml.kernel.org/r/20111206132811.4041.32549.stgit@bob.linux.org.uk
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5553f9f1a70d..efb42949cc09 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -401,6 +401,9 @@ config X86_WANT_INTEL_MID
 
 if X86_WANT_INTEL_MID
 
+config X86_INTEL_MID
+	bool
+
 config X86_MRST
        bool "Moorestown MID platform"
 	depends on PCI
-- 
cgit v1.2.2


From 2ded6e6a94c98ea453a156748cb7fabaf39a76b9 Mon Sep 17 00:00:00 2001
From: Mark Langsdorf <mark.langsdorf@amd.com>
Date: Fri, 18 Nov 2011 16:33:06 +0100
Subject: x86, hpet: Immediately disable HPET timer 1 if rtc irq is masked

When HPET is operating in RTC mode, the TN_ENABLE bit on timer1
controls whether the HPET or the RTC delivers interrupts to irq8. When
the system goes into suspend, the RTC driver sends a signal to the
HPET driver so that the HPET releases control of irq8, allowing the
RTC to wake the system from suspend. The switchover is accomplished by
a write to the HPET configuration registers which currently only
occurs while servicing the HPET interrupt.

On some systems, I have seen the system suspend before an HPET
interrupt occurs, preventing the write to the HPET configuration
register and leaving the HPET in control of the irq8. As the HPET is
not active during suspend, it does not generate a wake signal and RTC
alarms do not work.

This patch forces the HPET driver to immediately transfer control of
the irq8 channel to the RTC instead of waiting until the next
interrupt event.

Signed-off-by: Mark Langsdorf <mark.langsdorf@amd.com>
Link: http://lkml.kernel.org/r/20111118153306.GB16319@alberich.amd.com
Tested-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
---
 arch/x86/kernel/hpet.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index b946a9eac7d9..1bb0bf4d92cd 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1049,6 +1049,14 @@ int hpet_rtc_timer_init(void)
 }
 EXPORT_SYMBOL_GPL(hpet_rtc_timer_init);
 
+static void hpet_disable_rtc_channel(void)
+{
+	unsigned long cfg;
+	cfg = hpet_readl(HPET_T1_CFG);
+	cfg &= ~HPET_TN_ENABLE;
+	hpet_writel(cfg, HPET_T1_CFG);
+}
+
 /*
  * The functions below are called from rtc driver.
  * Return 0 if HPET is not being used.
@@ -1060,6 +1068,9 @@ int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
 		return 0;
 
 	hpet_rtc_flags &= ~bit_mask;
+	if (unlikely(!hpet_rtc_flags))
+		hpet_disable_rtc_channel();
+
 	return 1;
 }
 EXPORT_SYMBOL_GPL(hpet_mask_rtc_irq_bit);
@@ -1125,15 +1136,11 @@ EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq);
 
 static void hpet_rtc_timer_reinit(void)
 {
-	unsigned int cfg, delta;
+	unsigned int delta;
 	int lost_ints = -1;
 
-	if (unlikely(!hpet_rtc_flags)) {
-		cfg = hpet_readl(HPET_T1_CFG);
-		cfg &= ~HPET_TN_ENABLE;
-		hpet_writel(cfg, HPET_T1_CFG);
-		return;
-	}
+	if (unlikely(!hpet_rtc_flags))
+		hpet_disable_rtc_channel();
 
 	if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit)
 		delta = hpet_default_delta;
-- 
cgit v1.2.2


From e8c7106280a305e1ff2a3a8a4dfce141469fb039 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Fri, 18 Nov 2011 13:09:11 +0000
Subject: x86, efi: Calling __pa() with an ioremap()ed address is invalid

If we encounter an efi_memory_desc_t without EFI_MEMORY_WB set
in ->attribute we currently call set_memory_uc(), which in turn
calls __pa() on a potentially ioremap'd address.

On CONFIG_X86_32 this is invalid, resulting in the following
oops on some machines:

  BUG: unable to handle kernel paging request at f7f22280
  IP: [<c10257b9>] reserve_ram_pages_type+0x89/0x210
  [...]

  Call Trace:
   [<c104f8ca>] ? page_is_ram+0x1a/0x40
   [<c1025aff>] reserve_memtype+0xdf/0x2f0
   [<c1024dc9>] set_memory_uc+0x49/0xa0
   [<c19334d0>] efi_enter_virtual_mode+0x1c2/0x3aa
   [<c19216d4>] start_kernel+0x291/0x2f2
   [<c19211c7>] ? loglevel+0x1b/0x1b
   [<c19210bf>] i386_start_kernel+0xbf/0xc8

A better approach to this problem is to map the memory region
with the correct attributes from the start, instead of modifying
it after the fact. The uncached case can be handled by
ioremap_nocache() and the cached by ioremap_cache().

Despite first impressions, it's not possible to use
ioremap_cache() to map all cached memory regions on
CONFIG_X86_64 because EFI_RUNTIME_SERVICES_DATA regions really
don't like being mapped into the vmalloc space, as detailed in
the following bug report,

	https://bugzilla.redhat.com/show_bug.cgi?id=748516

Therefore, we need to ensure that any EFI_RUNTIME_SERVICES_DATA
regions are covered by the direct kernel mapping table on
CONFIG_X86_64. To accomplish this we now map E820_RESERVED_EFI
regions via the direct kernel mapping with the initial call to
init_memory_mapping() in setup_arch(), whereas previously these
regions wouldn't be mapped if they were after the last E820_RAM
region until efi_ioremap() was called. Doing it this way allows
us to delete efi_ioremap() completely.

Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg@redhat.com>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Huang Ying <huang.ying.caritas@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/1321621751-3650-1-git-send-email-matt@console-pimps.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/e820.h    |  8 ++++++++
 arch/x86/include/asm/efi.h     |  5 -----
 arch/x86/kernel/e820.c         |  3 ++-
 arch/x86/kernel/setup.c        | 21 ++++++++++++++++++++-
 arch/x86/platform/efi/efi.c    | 29 ++++++++++++++++++-----------
 arch/x86/platform/efi/efi_64.c | 17 -----------------
 6 files changed, 48 insertions(+), 35 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 908b96957d88..c9547033e38e 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -53,6 +53,13 @@
  */
 #define E820_RESERVED_KERN        128
 
+/*
+ * Address ranges that need to be mapped by the kernel direct
+ * mapping. This is used to make sure regions such as
+ * EFI_RUNTIME_SERVICES_DATA are directly mapped. See setup_arch().
+ */
+#define E820_RESERVED_EFI         129
+
 #ifndef __ASSEMBLY__
 #include <linux/types.h>
 struct e820entry {
@@ -115,6 +122,7 @@ static inline void early_memtest(unsigned long start, unsigned long end)
 }
 #endif
 
+extern unsigned long e820_end_pfn(unsigned long limit_pfn, unsigned type);
 extern unsigned long e820_end_of_ram_pfn(void);
 extern unsigned long e820_end_of_low_ram_pfn(void);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 7093e4a6a0bc..b8d8bfcd44a9 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -33,8 +33,6 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
 #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)	\
 	efi_call_virt(f, a1, a2, a3, a4, a5, a6)
 
-#define efi_ioremap(addr, size, type)		ioremap_cache(addr, size)
-
 #else /* !CONFIG_X86_32 */
 
 extern u64 efi_call0(void *fp);
@@ -84,9 +82,6 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
 	efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
 
-extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
-				 u32 type);
-
 #endif /* CONFIG_X86_32 */
 
 extern int add_efi_memmap;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 303a0e48f076..65ffd110a81b 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -135,6 +135,7 @@ static void __init e820_print_type(u32 type)
 		printk(KERN_CONT "(usable)");
 		break;
 	case E820_RESERVED:
+	case E820_RESERVED_EFI:
 		printk(KERN_CONT "(reserved)");
 		break;
 	case E820_ACPI:
@@ -783,7 +784,7 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
 /*
  * Find the highest page frame number we have available
  */
-static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
+unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
 {
 	int i;
 	unsigned long last_pfn = 0;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index cf0ef986cb6d..9a9e40fb091c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -691,6 +691,8 @@ early_param("reservelow", parse_reservelow);
 
 void __init setup_arch(char **cmdline_p)
 {
+	unsigned long end_pfn;
+
 #ifdef CONFIG_X86_32
 	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
 	visws_early_detect();
@@ -932,7 +934,24 @@ void __init setup_arch(char **cmdline_p)
 	init_gbpages();
 
 	/* max_pfn_mapped is updated here */
-	max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
+	end_pfn = max_low_pfn;
+
+#ifdef CONFIG_X86_64
+	/*
+	 * There may be regions after the last E820_RAM region that we
+	 * want to include in the kernel direct mapping, such as
+	 * EFI_RUNTIME_SERVICES_DATA.
+	 */
+	if (efi_enabled) {
+		unsigned long efi_end;
+
+		efi_end = e820_end_pfn(MAXMEM>>PAGE_SHIFT, E820_RESERVED_EFI);
+		if (efi_end > max_low_pfn)
+			end_pfn = efi_end;
+	}
+#endif
+
+	max_low_pfn_mapped = init_memory_mapping(0, end_pfn << PAGE_SHIFT);
 	max_pfn_mapped = max_low_pfn_mapped;
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 37718f0f053d..c9718a16be15 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -323,10 +323,13 @@ static void __init do_add_efi_memmap(void)
 		case EFI_UNUSABLE_MEMORY:
 			e820_type = E820_UNUSABLE;
 			break;
+		case EFI_RUNTIME_SERVICES_DATA:
+			e820_type = E820_RESERVED_EFI;
+			break;
 		default:
 			/*
 			 * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE
-			 * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO
+			 * EFI_MEMORY_MAPPED_IO
 			 * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE
 			 */
 			e820_type = E820_RESERVED;
@@ -671,10 +674,21 @@ void __init efi_enter_virtual_mode(void)
 		end_pfn = PFN_UP(end);
 		if (end_pfn <= max_low_pfn_mapped
 		    || (end_pfn > (1UL << (32 - PAGE_SHIFT))
-			&& end_pfn <= max_pfn_mapped))
+			&& end_pfn <= max_pfn_mapped)) {
 			va = __va(md->phys_addr);
-		else
-			va = efi_ioremap(md->phys_addr, size, md->type);
+
+			if (!(md->attribute & EFI_MEMORY_WB)) {
+				addr = (u64) (unsigned long)va;
+				npages = md->num_pages;
+				memrange_efi_to_native(&addr, &npages);
+				set_memory_uc(addr, npages);
+			}
+		} else {
+			if (!(md->attribute & EFI_MEMORY_WB))
+				va = ioremap_nocache(md->phys_addr, size);
+			else
+				va = ioremap_cache(md->phys_addr, size);
+		}
 
 		md->virt_addr = (u64) (unsigned long) va;
 
@@ -684,13 +698,6 @@ void __init efi_enter_virtual_mode(void)
 			continue;
 		}
 
-		if (!(md->attribute & EFI_MEMORY_WB)) {
-			addr = md->virt_addr;
-			npages = md->num_pages;
-			memrange_efi_to_native(&addr, &npages);
-			set_memory_uc(addr, npages);
-		}
-
 		systab = (u64) (unsigned long) efi_phys.systab;
 		if (md->phys_addr <= systab && systab < end) {
 			systab += md->virt_addr - md->phys_addr;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index ac3aa54e2654..312250c6b2de 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -80,20 +80,3 @@ void __init efi_call_phys_epilog(void)
 	local_irq_restore(efi_flags);
 	early_code_mapping_set_exec(0);
 }
-
-void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
-				 u32 type)
-{
-	unsigned long last_map_pfn;
-
-	if (type == EFI_MEMORY_MAPPED_IO)
-		return ioremap(phys_addr, size);
-
-	last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
-	if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) {
-		unsigned long top = last_map_pfn << PAGE_SHIFT;
-		efi_ioremap(top, size - (top - phys_addr), type);
-	}
-
-	return (void __iomem *)__va(phys_addr);
-}
-- 
cgit v1.2.2


From b6999b19120931ede364fa3b685e698a61fed31d Mon Sep 17 00:00:00 2001
From: Youquan Song <youquan.song@intel.com>
Date: Thu, 8 Dec 2011 14:34:16 -0800
Subject: thp: add compound tail page _mapcount when mapped

With the 3.2-rc kernel, IOMMU 2M pages in KVM works.  But when I tried
to use IOMMU 1GB pages in KVM, I encountered an oops and the 1GB page
failed to be used.

The root cause is that 1GB page allocation calls gup_huge_pud() while 2M
page calls gup_huge_pmd.  If compound pages are used and the page is a
tail page, gup_huge_pmd() increases _mapcount to record tail page are
mapped while gup_huge_pud does not do that.

So when the mapped page is relesed, it will result in kernel oops
because the page is not marked mapped.

This patch add tail process for compound page in 1GB huge page which
keeps the same process as 2M page.

Reproduce like:
1. Add grub boot option: hugepagesz=1G hugepages=8
2. mount -t hugetlbfs -o pagesize=1G hugetlbfs /dev/hugepages
3. qemu-kvm -m 2048 -hda os-kvm.img -cpu kvm64 -smp 4 -mem-path /dev/hugepages
	-net none -device pci-assign,host=07:00.1

  kernel BUG at mm/swap.c:114!
  invalid opcode: 0000 [#1] SMP
  Call Trace:
    put_page+0x15/0x37
    kvm_release_pfn_clean+0x31/0x36
    kvm_iommu_put_pages+0x94/0xb1
    kvm_iommu_unmap_memslots+0x80/0xb6
    kvm_assign_device+0xba/0x117
    kvm_vm_ioctl_assigned_device+0x301/0xa47
    kvm_vm_ioctl+0x36c/0x3a2
    do_vfs_ioctl+0x49e/0x4e4
    sys_ioctl+0x5a/0x7c
    system_call_fastpath+0x16/0x1b
  RIP  put_compound_page+0xd4/0x168

Signed-off-by: Youquan Song <youquan.song@intel.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/gup.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index ea305856151c..dd74e46828c0 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -201,6 +201,8 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
 	do {
 		VM_BUG_ON(compound_head(page) != head);
 		pages[*nr] = page;
+		if (PageTail(page))
+			get_huge_page_tail(page);
 		(*nr)++;
 		page++;
 		refs++;
-- 
cgit v1.2.2