From fb21b84e7f809ef04b1e5aed5d463cf0d4866638 Mon Sep 17 00:00:00 2001
From: Stefan Bader <stefan.bader@canonical.com>
Date: Fri, 15 Aug 2014 10:57:46 +0200
Subject: x86_32, entry: Clean up sysenter_badsys declaration

commit 554086d85e "x86_32, entry: Do syscall exit work on badsys
(CVE-2014-4508)" introduced a new jump label (sysenter_badsys) but
somehow the END statements seem to have gone wrong (at least it
feels that way to me).
This does not seem to be a fatal problem, but just for the sake
of symmetry, change the second syscall_badsys to sysenter_badsys.

Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
Link: http://lkml.kernel.org/r/1408093066-31021-1-git-send-email-stefan.bader@canonical.com
Acked-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/entry_32.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 47c410d99f5d..4b0e1dfa2226 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -683,7 +683,7 @@ END(syscall_badsys)
 sysenter_badsys:
 	movl $-ENOSYS,%eax
 	jmp sysenter_after_call
-END(syscall_badsys)
+END(sysenter_badsys)
 	CFI_ENDPROC
 
 .macro FIXUP_ESPFIX_STACK
-- 
cgit v1.2.2


From a90b858cfe27a576f7e44a456af2ee432404ee8f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 21 Jul 2014 11:38:40 +0300
Subject: x86: Fix non-PC platform kernel crash on boot due to NULL dereference

Upstream commit:

  95d76acc7518d5 ("x86, irq: Count legacy IRQs by legacy_pic->nr_legacy_irqs instead of NR_IRQS_LEGACY")

removed reserved interrupts for the platforms that do not have a legacy IOAPIC.

Which breaks the boot on Intel MID platforms such as Medfield:

  BUG: unable to handle kernel NULL pointer dereference at 0000003a
  IP: [<c107079a>] setup_irq+0xf/0x4d [    0.000000] *pdpt = 0000000000000000 *pde = 9bbf32453167e510

The culprit is an uncoditional setting of IRQ2 which is used
as cascade IRQ on legacy platforms. It seems we have to check
if we have enough legacy IRQs reserved before we can call
setup_irq().

The fix adds such check in native_init_IRQ() and in setup_default_timer_irq().

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jiang Liu <jiang.liu@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: David Cohen <david.a.cohen@linux.intel.com>
Link: http://lkml.kernel.org/r/1405931920-12871-1-git-send-email-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/irqinit.c | 2 +-
 arch/x86/kernel/time.c    | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 1e6cff5814fa..44f1ed42fdf2 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -203,7 +203,7 @@ void __init native_init_IRQ(void)
 		set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
 	}
 
-	if (!acpi_ioapic && !of_ioapic)
+	if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
 		setup_irq(2, &irq2);
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index bf7ef5ce29df..0fa29609b2c4 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -68,6 +68,8 @@ static struct irqaction irq0  = {
 
 void __init setup_default_timer_irq(void)
 {
+	if (!nr_legacy_irqs())
+		return;
 	setup_irq(0, &irq0);
 }
 
-- 
cgit v1.2.2


From f395dcae7a68497751869cf0031fd8ce5e115f0a Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Wed, 27 Aug 2014 13:53:11 +0800
Subject: x86: irq: Fix bug in setting IOAPIC pin attributes

Commit 15a3c7cc9154321fc3 "x86, irq: Introduce two helper functions
to support irqdomain map operation" breaks LPSS ACPI enumerated
devices.

On startup, IOAPIC driver preallocates IRQ descriptors and programs
IOAPIC pins with default level and polarity attributes for all legacy
IRQs. Later legacy IRQ users may fail to set IOAPIC pin attributes
if the requested attributes conflicts with the default IOAPIC pin
attributes. So change mp_irqdomain_map() to allow the first legacy IRQ
user to reprogram IOAPIC pin with different attributes.

Reported-and-tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Link: http://lkml.kernel.org/r/1409118795-17046-1-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/io_apic.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 29290f554e79..40a4aa3f4061 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1070,6 +1070,11 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin,
 	}
 
 	if (flags & IOAPIC_MAP_ALLOC) {
+		/* special handling for legacy IRQs */
+		if (irq < nr_legacy_irqs() && info->count == 1 &&
+		    mp_irqdomain_map(domain, irq, pin) != 0)
+			irq = -1;
+
 		if (irq > 0)
 			info->count++;
 		else if (info->count == 0)
@@ -3896,7 +3901,15 @@ int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
 			info->polarity = 1;
 		}
 		info->node = NUMA_NO_NODE;
-		info->set = 1;
+
+		/*
+		 * setup_IO_APIC_irqs() programs all legacy IRQs with default
+		 * trigger and polarity attributes. Don't set the flag for that
+		 * case so the first legacy IRQ user could reprogram the pin
+		 * with real trigger and polarity attributes.
+		 */
+		if (virq >= nr_legacy_irqs() || info->count)
+			info->set = 1;
 	}
 	set_io_apic_irq_attr(&attr, ioapic, hwirq, info->trigger,
 			     info->polarity);
-- 
cgit v1.2.2


From 256aae5eac6d328067d1a986a7c5df6f19bdc8b4 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Mon, 28 Jul 2014 20:20:19 +0800
Subject: kprobes/x86: Free 'optinsn' cache when range check fails

This patch frees the 'optinsn' slot when we get a range check error,
to prevent memory leaks.

Before this patch, cache entry in kprobe_insn_cache() won't be freed
if kprobe optimizing fails due to range check failure.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Pei Feiyue <peifeiyue@huawei.com>
Link: http://lkml.kernel.org/r/1406550019-70935-1-git-send-email-wangnan0@huawei.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/kprobes/opt.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index f304773285ae..f1314d0bcf0a 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -338,8 +338,10 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
 	 * a relative jump.
 	 */
 	rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
-	if (abs(rel) > 0x7fffffff)
+	if (abs(rel) > 0x7fffffff) {
+		__arch_remove_optimized_kprobe(op, 0);
 		return -ERANGE;
+	}
 
 	buf = (u8 *)op->optinsn.insn;
 
-- 
cgit v1.2.2


From 9eabc99a635a77cbf0948ce17d3cbc2b51680d4a Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Fri, 29 Aug 2014 17:26:23 +0800
Subject: x86, irq, PCI: Keep IRQ assignment for runtime power management

Now IOAPIC driver dynamically allocates IRQ numbers for IOAPIC pins.
We need to keep IRQ assignment for PCI devices during runtime power
management, otherwise it may cause failure of device wakeups.

Commit 3eec595235c17a7 "x86, irq, PCI: Keep IRQ assignment for PCI
devices during suspend/hibernation" has fixed the issue for suspend/
hibernation, we also need the same fix for runtime device sleep too.

Fix: https://bugzilla.kernel.org/show_bug.cgi?id=83271
Reported-and-Tested-by: EmanueL Czirai <amanual@openmailbox.org>
Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: EmanueL Czirai <amanual@openmailbox.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Grant Likely <grant.likely@linaro.org>
Link: http://lkml.kernel.org/r/1409304383-18806-1-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/io_apic.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 40a4aa3f4061..337ce5a9b15c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3959,6 +3959,18 @@ int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node)
 	return ret;
 }
 
+bool mp_should_keep_irq(struct device *dev)
+{
+	if (dev->power.is_prepared)
+		return true;
+#ifdef	CONFIG_PM_RUNTIME
+	if (dev->power.runtime_status == RPM_SUSPENDING)
+		return true;
+#endif
+
+	return false;
+}
+
 /* Enable IOAPIC early just for system timer */
 void __init pre_init_apic_IRQ0(void)
 {
-- 
cgit v1.2.2


From 74ca317c26a3f8543203b61d262c0ab2e30c384e Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 29 Aug 2014 15:18:46 -0700
Subject: kexec: create a new config option CONFIG_KEXEC_FILE for new syscall

Currently new system call kexec_file_load() and all the associated code
compiles if CONFIG_KEXEC=y.  But new syscall also compiles purgatory
code which currently uses gcc option -mcmodel=large.  This option seems
to be available only gcc 4.4 onwards.

Hiding new functionality behind a new config option will not break
existing users of old gcc.  Those who wish to enable new functionality
will require new gcc.  Having said that, I am trying to figure out how
can I move away from using -mcmodel=large but that can take a while.

I think there are other advantages of introducing this new config
option.  As this option will be enabled only on x86_64, other arches
don't have to compile generic kexec code which will never be used.  This
new code selects CRYPTO=y and CRYPTO_SHA256=y.  And all other arches had
to do this for CONFIG_KEXEC.  Now with introduction of new config
option, we can remove crypto dependency from other arches.

Now CONFIG_KEXEC_FILE is available only on x86_64.  So whereever I had
CONFIG_X86_64 defined, I got rid of that.

For CONFIG_KEXEC_FILE, instead of doing select CRYPTO=y, I changed it to
"depends on CRYPTO=y".  This should be safer as "select" is not
recursive.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Tested-by: Shaun Ruffell <sruffell@digium.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/Makefile           |  2 +-
 arch/x86/kernel/crash.c            |  6 ++----
 arch/x86/kernel/machine_kexec_64.c | 11 +++++++++++
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index b5ea75c4a4b4..ada2e2d6be3e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -71,6 +71,7 @@ obj-$(CONFIG_FTRACE_SYSCALLS)	+= ftrace.o
 obj-$(CONFIG_X86_TSC)		+= trace_clock.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec_$(BITS).o
 obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
+obj-$(CONFIG_KEXEC_FILE)	+= kexec-bzimage64.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
 obj-y				+= kprobes/
 obj-$(CONFIG_MODULES)		+= module.o
@@ -118,5 +119,4 @@ ifeq ($(CONFIG_X86_64),y)
 
 	obj-$(CONFIG_PCI_MMCONFIG)	+= mmconf-fam10h_64.o
 	obj-y				+= vsmp_64.o
-	obj-$(CONFIG_KEXEC)		+= kexec-bzimage64.o
 endif
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 0553a34fa0df..a618fcd2c07d 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -182,8 +182,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 	crash_save_cpu(regs, safe_smp_processor_id());
 }
 
-#ifdef CONFIG_X86_64
-
+#ifdef CONFIG_KEXEC_FILE
 static int get_nr_ram_ranges_callback(unsigned long start_pfn,
 				unsigned long nr_pfn, void *arg)
 {
@@ -696,5 +695,4 @@ int crash_load_segments(struct kimage *image)
 
 	return ret;
 }
-
-#endif /* CONFIG_X86_64 */
+#endif /* CONFIG_KEXEC_FILE */
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 8b04018e5d1f..485981059a40 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -25,9 +25,11 @@
 #include <asm/debugreg.h>
 #include <asm/kexec-bzimage64.h>
 
+#ifdef CONFIG_KEXEC_FILE
 static struct kexec_file_ops *kexec_file_loaders[] = {
 		&kexec_bzImage64_ops,
 };
+#endif
 
 static void free_transition_pgtable(struct kimage *image)
 {
@@ -178,6 +180,7 @@ static void load_segments(void)
 		);
 }
 
+#ifdef CONFIG_KEXEC_FILE
 /* Update purgatory as needed after various image segments have been prepared */
 static int arch_update_purgatory(struct kimage *image)
 {
@@ -209,6 +212,12 @@ static int arch_update_purgatory(struct kimage *image)
 
 	return ret;
 }
+#else /* !CONFIG_KEXEC_FILE */
+static inline int arch_update_purgatory(struct kimage *image)
+{
+	return 0;
+}
+#endif /* CONFIG_KEXEC_FILE */
 
 int machine_kexec_prepare(struct kimage *image)
 {
@@ -329,6 +338,7 @@ void arch_crash_save_vmcoreinfo(void)
 
 /* arch-dependent functionality related to kexec file-based syscall */
 
+#ifdef CONFIG_KEXEC_FILE
 int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
 				  unsigned long buf_len)
 {
@@ -522,3 +532,4 @@ overflow:
 	       (int)ELF64_R_TYPE(rel[i].r_info), value);
 	return -ENOEXEC;
 }
+#endif /* CONFIG_KEXEC_FILE */
-- 
cgit v1.2.2


From 03bd4e1f7265548832a76e7919a81f3137c44fd1 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@linux.intel.com>
Date: Wed, 24 Sep 2014 16:38:05 +0800
Subject: sched: Fix unreleased llc_shared_mask bit during CPU hotplug

The following bug can be triggered by hot adding and removing a large number of
xen domain0's vcpus repeatedly:

	BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 IP: [..] find_busiest_group
	PGD 5a9d5067 PUD 13067 PMD 0
	Oops: 0000 [#3] SMP
	[...]
	Call Trace:
	load_balance
	? _raw_spin_unlock_irqrestore
	idle_balance
	__schedule
	schedule
	schedule_timeout
	? lock_timer_base
	schedule_timeout_uninterruptible
	msleep
	lock_device_hotplug_sysfs
	online_store
	dev_attr_store
	sysfs_write_file
	vfs_write
	SyS_write
	system_call_fastpath

Last level cache shared mask is built during CPU up and the
build_sched_domain() routine takes advantage of it to setup
the sched domain CPU topology.

However, llc_shared_mask is not released during CPU disable,
which leads to an invalid sched domainCPU topology.

This patch fix it by releasing the llc_shared_mask correctly
during CPU disable.

Yasuaki also reported that this can happen on real hardware:

  https://lkml.org/lkml/2014/7/22/1018

His case is here:

	==
	Here is an example on my system.
	My system has 4 sockets and each socket has 15 cores and HT is
	enabled. In this case, each core of sockes is numbered as
	follows:

		 | CPU#
	Socket#0 | 0-14 , 60-74
	Socket#1 | 15-29, 75-89
	Socket#2 | 30-44, 90-104
	Socket#3 | 45-59, 105-119

	Then llc_shared_mask of CPU#30 has 0x3fff80000001fffc0000000.

	It means that last level cache of Socket#2 is shared with
	CPU#30-44 and 90-104.

	When hot-removing socket#2 and #3, each core of sockets is
	numbered as follows:

		 | CPU#
	Socket#0 | 0-14 , 60-74
	Socket#1 | 15-29, 75-89

	But llc_shared_mask is not cleared. So llc_shared_mask of CPU#30
	remains having 0x3fff80000001fffc0000000.

	After that, when hot-adding socket#2 and #3, each core of
	sockets is numbered as follows:

		 | CPU#
	Socket#0 | 0-14 , 60-74
	Socket#1 | 15-29, 75-89
	Socket#2 | 30-59
	Socket#3 | 90-119

	Then llc_shared_mask of CPU#30 becomes
	0x3fff8000fffffffc0000000. It means that last level cache of
	Socket#2 is shared with CPU#30-59 and 90-104. So the mask has
	the wrong value.

Signed-off-by: Wanpeng Li <wanpeng.li@linux.intel.com>
Tested-by: Linn Crosetto <linn@hp.com>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Toshi Kani <toshi.kani@hp.com>
Reviewed-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: <stable@vger.kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Steven Rostedt <srostedt@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1411547885-48165-1-git-send-email-wanpeng.li@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/smpboot.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2d872e08fab9..42a2dca984b3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1284,6 +1284,9 @@ static void remove_siblinginfo(int cpu)
 
 	for_each_cpu(sibling, cpu_sibling_mask(cpu))
 		cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling));
+	for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
+		cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
+	cpumask_clear(cpu_llc_shared_mask(cpu));
 	cpumask_clear(cpu_sibling_mask(cpu));
 	cpumask_clear(cpu_core_mask(cpu));
 	c->phys_proc_id = 0;
-- 
cgit v1.2.2