From fb21b84e7f809ef04b1e5aed5d463cf0d4866638 Mon Sep 17 00:00:00 2001 From: Stefan Bader Date: Fri, 15 Aug 2014 10:57:46 +0200 Subject: x86_32, entry: Clean up sysenter_badsys declaration commit 554086d85e "x86_32, entry: Do syscall exit work on badsys (CVE-2014-4508)" introduced a new jump label (sysenter_badsys) but somehow the END statements seem to have gone wrong (at least it feels that way to me). This does not seem to be a fatal problem, but just for the sake of symmetry, change the second syscall_badsys to sysenter_badsys. Signed-off-by: Stefan Bader Link: http://lkml.kernel.org/r/1408093066-31021-1-git-send-email-stefan.bader@canonical.com Acked-by: Andy Lutomirski Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 47c410d99f5d..4b0e1dfa2226 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -683,7 +683,7 @@ END(syscall_badsys) sysenter_badsys: movl $-ENOSYS,%eax jmp sysenter_after_call -END(syscall_badsys) +END(sysenter_badsys) CFI_ENDPROC .macro FIXUP_ESPFIX_STACK -- cgit v1.2.2 From a90b858cfe27a576f7e44a456af2ee432404ee8f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 21 Jul 2014 11:38:40 +0300 Subject: x86: Fix non-PC platform kernel crash on boot due to NULL dereference Upstream commit: 95d76acc7518d5 ("x86, irq: Count legacy IRQs by legacy_pic->nr_legacy_irqs instead of NR_IRQS_LEGACY") removed reserved interrupts for the platforms that do not have a legacy IOAPIC. Which breaks the boot on Intel MID platforms such as Medfield: BUG: unable to handle kernel NULL pointer dereference at 0000003a IP: [] setup_irq+0xf/0x4d [ 0.000000] *pdpt = 0000000000000000 *pde = 9bbf32453167e510 The culprit is an uncoditional setting of IRQ2 which is used as cascade IRQ on legacy platforms. It seems we have to check if we have enough legacy IRQs reserved before we can call setup_irq(). The fix adds such check in native_init_IRQ() and in setup_default_timer_irq(). Signed-off-by: Andy Shevchenko Reviewed-by: Jiang Liu Reviewed-by: Thomas Gleixner Cc: David Cohen Link: http://lkml.kernel.org/r/1405931920-12871-1-git-send-email-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/irqinit.c | 2 +- arch/x86/kernel/time.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 1e6cff5814fa..44f1ed42fdf2 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -203,7 +203,7 @@ void __init native_init_IRQ(void) set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); } - if (!acpi_ioapic && !of_ioapic) + if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) setup_irq(2, &irq2); #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index bf7ef5ce29df..0fa29609b2c4 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -68,6 +68,8 @@ static struct irqaction irq0 = { void __init setup_default_timer_irq(void) { + if (!nr_legacy_irqs()) + return; setup_irq(0, &irq0); } -- cgit v1.2.2 From f395dcae7a68497751869cf0031fd8ce5e115f0a Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 27 Aug 2014 13:53:11 +0800 Subject: x86: irq: Fix bug in setting IOAPIC pin attributes Commit 15a3c7cc9154321fc3 "x86, irq: Introduce two helper functions to support irqdomain map operation" breaks LPSS ACPI enumerated devices. On startup, IOAPIC driver preallocates IRQ descriptors and programs IOAPIC pins with default level and polarity attributes for all legacy IRQs. Later legacy IRQ users may fail to set IOAPIC pin attributes if the requested attributes conflicts with the default IOAPIC pin attributes. So change mp_irqdomain_map() to allow the first legacy IRQ user to reprogram IOAPIC pin with different attributes. Reported-and-tested-by: Mika Westerberg Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Grant Likely Cc: Prarit Bhargava Link: http://lkml.kernel.org/r/1409118795-17046-1-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 29290f554e79..40a4aa3f4061 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1070,6 +1070,11 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, } if (flags & IOAPIC_MAP_ALLOC) { + /* special handling for legacy IRQs */ + if (irq < nr_legacy_irqs() && info->count == 1 && + mp_irqdomain_map(domain, irq, pin) != 0) + irq = -1; + if (irq > 0) info->count++; else if (info->count == 0) @@ -3896,7 +3901,15 @@ int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, info->polarity = 1; } info->node = NUMA_NO_NODE; - info->set = 1; + + /* + * setup_IO_APIC_irqs() programs all legacy IRQs with default + * trigger and polarity attributes. Don't set the flag for that + * case so the first legacy IRQ user could reprogram the pin + * with real trigger and polarity attributes. + */ + if (virq >= nr_legacy_irqs() || info->count) + info->set = 1; } set_io_apic_irq_attr(&attr, ioapic, hwirq, info->trigger, info->polarity); -- cgit v1.2.2 From 256aae5eac6d328067d1a986a7c5df6f19bdc8b4 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 28 Jul 2014 20:20:19 +0800 Subject: kprobes/x86: Free 'optinsn' cache when range check fails This patch frees the 'optinsn' slot when we get a range check error, to prevent memory leaks. Before this patch, cache entry in kprobe_insn_cache() won't be freed if kprobe optimizing fails due to range check failure. Signed-off-by: Wang Nan Acked-by: Masami Hiramatsu Cc: Pei Feiyue Link: http://lkml.kernel.org/r/1406550019-70935-1-git-send-email-wangnan0@huawei.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/opt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index f304773285ae..f1314d0bcf0a 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -338,8 +338,10 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op) * a relative jump. */ rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; - if (abs(rel) > 0x7fffffff) + if (abs(rel) > 0x7fffffff) { + __arch_remove_optimized_kprobe(op, 0); return -ERANGE; + } buf = (u8 *)op->optinsn.insn; -- cgit v1.2.2 From 9eabc99a635a77cbf0948ce17d3cbc2b51680d4a Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 29 Aug 2014 17:26:23 +0800 Subject: x86, irq, PCI: Keep IRQ assignment for runtime power management Now IOAPIC driver dynamically allocates IRQ numbers for IOAPIC pins. We need to keep IRQ assignment for PCI devices during runtime power management, otherwise it may cause failure of device wakeups. Commit 3eec595235c17a7 "x86, irq, PCI: Keep IRQ assignment for PCI devices during suspend/hibernation" has fixed the issue for suspend/ hibernation, we also need the same fix for runtime device sleep too. Fix: https://bugzilla.kernel.org/show_bug.cgi?id=83271 Reported-and-Tested-by: EmanueL Czirai Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: EmanueL Czirai Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Grant Likely Link: http://lkml.kernel.org/r/1409304383-18806-1-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 40a4aa3f4061..337ce5a9b15c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3959,6 +3959,18 @@ int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node) return ret; } +bool mp_should_keep_irq(struct device *dev) +{ + if (dev->power.is_prepared) + return true; +#ifdef CONFIG_PM_RUNTIME + if (dev->power.runtime_status == RPM_SUSPENDING) + return true; +#endif + + return false; +} + /* Enable IOAPIC early just for system timer */ void __init pre_init_apic_IRQ0(void) { -- cgit v1.2.2 From 74ca317c26a3f8543203b61d262c0ab2e30c384e Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 29 Aug 2014 15:18:46 -0700 Subject: kexec: create a new config option CONFIG_KEXEC_FILE for new syscall Currently new system call kexec_file_load() and all the associated code compiles if CONFIG_KEXEC=y. But new syscall also compiles purgatory code which currently uses gcc option -mcmodel=large. This option seems to be available only gcc 4.4 onwards. Hiding new functionality behind a new config option will not break existing users of old gcc. Those who wish to enable new functionality will require new gcc. Having said that, I am trying to figure out how can I move away from using -mcmodel=large but that can take a while. I think there are other advantages of introducing this new config option. As this option will be enabled only on x86_64, other arches don't have to compile generic kexec code which will never be used. This new code selects CRYPTO=y and CRYPTO_SHA256=y. And all other arches had to do this for CONFIG_KEXEC. Now with introduction of new config option, we can remove crypto dependency from other arches. Now CONFIG_KEXEC_FILE is available only on x86_64. So whereever I had CONFIG_X86_64 defined, I got rid of that. For CONFIG_KEXEC_FILE, instead of doing select CRYPTO=y, I changed it to "depends on CRYPTO=y". This should be safer as "select" is not recursive. Signed-off-by: Vivek Goyal Cc: Eric Biederman Cc: H. Peter Anvin Tested-by: Shaun Ruffell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/crash.c | 6 ++---- arch/x86/kernel/machine_kexec_64.c | 11 +++++++++++ 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b5ea75c4a4b4..ada2e2d6be3e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -71,6 +71,7 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_X86_TSC) += trace_clock.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o +obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-y += kprobes/ obj-$(CONFIG_MODULES) += module.o @@ -118,5 +119,4 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o obj-y += vsmp_64.o - obj-$(CONFIG_KEXEC) += kexec-bzimage64.o endif diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 0553a34fa0df..a618fcd2c07d 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -182,8 +182,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) crash_save_cpu(regs, safe_smp_processor_id()); } -#ifdef CONFIG_X86_64 - +#ifdef CONFIG_KEXEC_FILE static int get_nr_ram_ranges_callback(unsigned long start_pfn, unsigned long nr_pfn, void *arg) { @@ -696,5 +695,4 @@ int crash_load_segments(struct kimage *image) return ret; } - -#endif /* CONFIG_X86_64 */ +#endif /* CONFIG_KEXEC_FILE */ diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 8b04018e5d1f..485981059a40 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -25,9 +25,11 @@ #include #include +#ifdef CONFIG_KEXEC_FILE static struct kexec_file_ops *kexec_file_loaders[] = { &kexec_bzImage64_ops, }; +#endif static void free_transition_pgtable(struct kimage *image) { @@ -178,6 +180,7 @@ static void load_segments(void) ); } +#ifdef CONFIG_KEXEC_FILE /* Update purgatory as needed after various image segments have been prepared */ static int arch_update_purgatory(struct kimage *image) { @@ -209,6 +212,12 @@ static int arch_update_purgatory(struct kimage *image) return ret; } +#else /* !CONFIG_KEXEC_FILE */ +static inline int arch_update_purgatory(struct kimage *image) +{ + return 0; +} +#endif /* CONFIG_KEXEC_FILE */ int machine_kexec_prepare(struct kimage *image) { @@ -329,6 +338,7 @@ void arch_crash_save_vmcoreinfo(void) /* arch-dependent functionality related to kexec file-based syscall */ +#ifdef CONFIG_KEXEC_FILE int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long buf_len) { @@ -522,3 +532,4 @@ overflow: (int)ELF64_R_TYPE(rel[i].r_info), value); return -ENOEXEC; } +#endif /* CONFIG_KEXEC_FILE */ -- cgit v1.2.2 From 03bd4e1f7265548832a76e7919a81f3137c44fd1 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 24 Sep 2014 16:38:05 +0800 Subject: sched: Fix unreleased llc_shared_mask bit during CPU hotplug The following bug can be triggered by hot adding and removing a large number of xen domain0's vcpus repeatedly: BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 IP: [..] find_busiest_group PGD 5a9d5067 PUD 13067 PMD 0 Oops: 0000 [#3] SMP [...] Call Trace: load_balance ? _raw_spin_unlock_irqrestore idle_balance __schedule schedule schedule_timeout ? lock_timer_base schedule_timeout_uninterruptible msleep lock_device_hotplug_sysfs online_store dev_attr_store sysfs_write_file vfs_write SyS_write system_call_fastpath Last level cache shared mask is built during CPU up and the build_sched_domain() routine takes advantage of it to setup the sched domain CPU topology. However, llc_shared_mask is not released during CPU disable, which leads to an invalid sched domainCPU topology. This patch fix it by releasing the llc_shared_mask correctly during CPU disable. Yasuaki also reported that this can happen on real hardware: https://lkml.org/lkml/2014/7/22/1018 His case is here: == Here is an example on my system. My system has 4 sockets and each socket has 15 cores and HT is enabled. In this case, each core of sockes is numbered as follows: | CPU# Socket#0 | 0-14 , 60-74 Socket#1 | 15-29, 75-89 Socket#2 | 30-44, 90-104 Socket#3 | 45-59, 105-119 Then llc_shared_mask of CPU#30 has 0x3fff80000001fffc0000000. It means that last level cache of Socket#2 is shared with CPU#30-44 and 90-104. When hot-removing socket#2 and #3, each core of sockets is numbered as follows: | CPU# Socket#0 | 0-14 , 60-74 Socket#1 | 15-29, 75-89 But llc_shared_mask is not cleared. So llc_shared_mask of CPU#30 remains having 0x3fff80000001fffc0000000. After that, when hot-adding socket#2 and #3, each core of sockets is numbered as follows: | CPU# Socket#0 | 0-14 , 60-74 Socket#1 | 15-29, 75-89 Socket#2 | 30-59 Socket#3 | 90-119 Then llc_shared_mask of CPU#30 becomes 0x3fff8000fffffffc0000000. It means that last level cache of Socket#2 is shared with CPU#30-59 and 90-104. So the mask has the wrong value. Signed-off-by: Wanpeng Li Tested-by: Linn Crosetto Reviewed-by: Borislav Petkov Reviewed-by: Toshi Kani Reviewed-by: Yasuaki Ishimatsu Cc: Cc: David Rientjes Cc: Prarit Bhargava Cc: Steven Rostedt Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1411547885-48165-1-git-send-email-wanpeng.li@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2d872e08fab9..42a2dca984b3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1284,6 +1284,9 @@ static void remove_siblinginfo(int cpu) for_each_cpu(sibling, cpu_sibling_mask(cpu)) cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling)); + for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) + cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling)); + cpumask_clear(cpu_llc_shared_mask(cpu)); cpumask_clear(cpu_sibling_mask(cpu)); cpumask_clear(cpu_core_mask(cpu)); c->phys_proc_id = 0; -- cgit v1.2.2