aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@mellanox.com>2016-10-07 20:02:52 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-10-07 21:46:30 -0400
commit511f8389454e55ece5115dc3bc84a0947788ff4f (patch)
treedfc902d595e5f55141f84a29a7b11c750b09c615
parent677664895278267a80bda0e3b26821d60cdbebf5 (diff)
arch/tile: adopt the new nmi_backtrace framework
Previously tile was rolling its own method of capturing backtrace data in the NMI handlers, but it was relying on running printk() from the NMI handler, which is not always safe. So adopt the nmi_backtrace model (with the new cpumask extension) instead. So we can call the nmi_backtrace code directly from the nmi handler, move the nmi_enter()/exit() into the top-level tile NMI handler. The semantics of the routine change slightly since it is now synchronous with the remote cores completing the backtraces. Previously it was asynchronous, but with protection to avoid starting a new remote backtrace if the old one was still in progress. Link: http://lkml.kernel.org/r/1472487169-14923-4-git-send-email-cmetcalf@mellanox.com Signed-off-by: Chris Metcalf <cmetcalf@mellanox.com> Cc: Daniel Thompson <daniel.thompson@linaro.org> [arm] Cc: Petr Mladek <pmladek@suse.com> Cc: Aaron Tomlin <atomlin@redhat.com> Cc: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: Russell King <linux@arm.linux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/tile/include/asm/irq.h5
-rw-r--r--arch/tile/kernel/pmc.c3
-rw-r--r--arch/tile/kernel/process.c73
-rw-r--r--arch/tile/kernel/traps.c9
4 files changed, 27 insertions, 63 deletions
diff --git a/arch/tile/include/asm/irq.h b/arch/tile/include/asm/irq.h
index 84a924034bdb..1fa1f2544ff9 100644
--- a/arch/tile/include/asm/irq.h
+++ b/arch/tile/include/asm/irq.h
@@ -79,8 +79,9 @@ void tile_irq_activate(unsigned int irq, int tile_irq_type);
79void setup_irq_regs(void); 79void setup_irq_regs(void);
80 80
81#ifdef __tilegx__ 81#ifdef __tilegx__
82void arch_trigger_all_cpu_backtrace(bool self); 82void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
83#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace 83 bool exclude_self);
84#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
84#endif 85#endif
85 86
86#endif /* _ASM_TILE_IRQ_H */ 87#endif /* _ASM_TILE_IRQ_H */
diff --git a/arch/tile/kernel/pmc.c b/arch/tile/kernel/pmc.c
index db62cc34b955..81cf8743a3f3 100644
--- a/arch/tile/kernel/pmc.c
+++ b/arch/tile/kernel/pmc.c
@@ -16,7 +16,6 @@
16#include <linux/spinlock.h> 16#include <linux/spinlock.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/atomic.h> 18#include <linux/atomic.h>
19#include <linux/interrupt.h>
20 19
21#include <asm/processor.h> 20#include <asm/processor.h>
22#include <asm/pmc.h> 21#include <asm/pmc.h>
@@ -29,9 +28,7 @@ int handle_perf_interrupt(struct pt_regs *regs, int fault)
29 if (!perf_irq) 28 if (!perf_irq)
30 panic("Unexpected PERF_COUNT interrupt %d\n", fault); 29 panic("Unexpected PERF_COUNT interrupt %d\n", fault);
31 30
32 nmi_enter();
33 retval = perf_irq(regs, fault); 31 retval = perf_irq(regs, fault);
34 nmi_exit();
35 return retval; 32 return retval;
36} 33}
37 34
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index a465d8372edd..9f37106ef93a 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -22,7 +22,7 @@
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/mm.h> 23#include <linux/mm.h>
24#include <linux/compat.h> 24#include <linux/compat.h>
25#include <linux/hardirq.h> 25#include <linux/nmi.h>
26#include <linux/syscalls.h> 26#include <linux/syscalls.h>
27#include <linux/kernel.h> 27#include <linux/kernel.h>
28#include <linux/tracehook.h> 28#include <linux/tracehook.h>
@@ -594,66 +594,18 @@ void show_regs(struct pt_regs *regs)
594 tile_show_stack(&kbt); 594 tile_show_stack(&kbt);
595} 595}
596 596
597/* To ensure stack dump on tiles occurs one by one. */
598static DEFINE_SPINLOCK(backtrace_lock);
599/* To ensure no backtrace occurs before all of the stack dump are done. */
600static atomic_t backtrace_cpus;
601/* The cpu mask to avoid reentrance. */
602static struct cpumask backtrace_mask;
603
604void do_nmi_dump_stack(struct pt_regs *regs)
605{
606 int is_idle = is_idle_task(current) && !in_interrupt();
607 int cpu;
608
609 nmi_enter();
610 cpu = smp_processor_id();
611 if (WARN_ON_ONCE(!cpumask_test_and_clear_cpu(cpu, &backtrace_mask)))
612 goto done;
613
614 spin_lock(&backtrace_lock);
615 if (is_idle)
616 pr_info("CPU: %d idle\n", cpu);
617 else
618 show_regs(regs);
619 spin_unlock(&backtrace_lock);
620 atomic_dec(&backtrace_cpus);
621done:
622 nmi_exit();
623}
624
625#ifdef __tilegx__ 597#ifdef __tilegx__
626void arch_trigger_all_cpu_backtrace(bool self) 598void nmi_raise_cpu_backtrace(struct cpumask *in_mask)
627{ 599{
628 struct cpumask mask; 600 struct cpumask mask;
629 HV_Coord tile; 601 HV_Coord tile;
630 unsigned int timeout; 602 unsigned int timeout;
631 int cpu; 603 int cpu;
632 int ongoing;
633 HV_NMI_Info info[NR_CPUS]; 604 HV_NMI_Info info[NR_CPUS];
634 605
635 ongoing = atomic_cmpxchg(&backtrace_cpus, 0, num_online_cpus() - 1);
636 if (ongoing != 0) {
637 pr_err("Trying to do all-cpu backtrace.\n");
638 pr_err("But another all-cpu backtrace is ongoing (%d cpus left)\n",
639 ongoing);
640 if (self) {
641 pr_err("Reporting the stack on this cpu only.\n");
642 dump_stack();
643 }
644 return;
645 }
646
647 cpumask_copy(&mask, cpu_online_mask);
648 cpumask_clear_cpu(smp_processor_id(), &mask);
649 cpumask_copy(&backtrace_mask, &mask);
650
651 /* Backtrace for myself first. */
652 if (self)
653 dump_stack();
654
655 /* Tentatively dump stack on remote tiles via NMI. */ 606 /* Tentatively dump stack on remote tiles via NMI. */
656 timeout = 100; 607 timeout = 100;
608 cpumask_copy(&mask, in_mask);
657 while (!cpumask_empty(&mask) && timeout) { 609 while (!cpumask_empty(&mask) && timeout) {
658 for_each_cpu(cpu, &mask) { 610 for_each_cpu(cpu, &mask) {
659 tile.x = cpu_x(cpu); 611 tile.x = cpu_x(cpu);
@@ -664,12 +616,17 @@ void arch_trigger_all_cpu_backtrace(bool self)
664 } 616 }
665 617
666 mdelay(10); 618 mdelay(10);
619 touch_softlockup_watchdog();
667 timeout--; 620 timeout--;
668 } 621 }
669 622
670 /* Warn about cpus stuck in ICS and decrement their counts here. */ 623 /* Warn about cpus stuck in ICS. */
671 if (!cpumask_empty(&mask)) { 624 if (!cpumask_empty(&mask)) {
672 for_each_cpu(cpu, &mask) { 625 for_each_cpu(cpu, &mask) {
626
627 /* Clear the bit as if nmi_cpu_backtrace() ran. */
628 cpumask_clear_cpu(cpu, in_mask);
629
673 switch (info[cpu].result) { 630 switch (info[cpu].result) {
674 case HV_NMI_RESULT_FAIL_ICS: 631 case HV_NMI_RESULT_FAIL_ICS:
675 pr_warn("Skipping stack dump of cpu %d in ICS at pc %#llx\n", 632 pr_warn("Skipping stack dump of cpu %d in ICS at pc %#llx\n",
@@ -680,16 +637,20 @@ void arch_trigger_all_cpu_backtrace(bool self)
680 cpu); 637 cpu);
681 break; 638 break;
682 case HV_ENOSYS: 639 case HV_ENOSYS:
683 pr_warn("Hypervisor too old to allow remote stack dumps.\n"); 640 WARN_ONCE(1, "Hypervisor too old to allow remote stack dumps.\n");
684 goto skip_for_each; 641 break;
685 default: /* should not happen */ 642 default: /* should not happen */
686 pr_warn("Skipping stack dump of cpu %d [%d,%#llx]\n", 643 pr_warn("Skipping stack dump of cpu %d [%d,%#llx]\n",
687 cpu, info[cpu].result, info[cpu].pc); 644 cpu, info[cpu].result, info[cpu].pc);
688 break; 645 break;
689 } 646 }
690 } 647 }
691skip_for_each:
692 atomic_sub(cpumask_weight(&mask), &backtrace_cpus);
693 } 648 }
694} 649}
650
651void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
652{
653 nmi_trigger_cpumask_backtrace(mask, exclude_self,
654 nmi_raise_cpu_backtrace);
655}
695#endif /* __tilegx_ */ 656#endif /* __tilegx_ */
diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c
index 4d9651c5b1ad..39f427bb0de2 100644
--- a/arch/tile/kernel/traps.c
+++ b/arch/tile/kernel/traps.c
@@ -20,6 +20,8 @@
20#include <linux/reboot.h> 20#include <linux/reboot.h>
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/ptrace.h> 22#include <linux/ptrace.h>
23#include <linux/hardirq.h>
24#include <linux/nmi.h>
23#include <asm/stack.h> 25#include <asm/stack.h>
24#include <asm/traps.h> 26#include <asm/traps.h>
25#include <asm/setup.h> 27#include <asm/setup.h>
@@ -392,14 +394,17 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
392 394
393void do_nmi(struct pt_regs *regs, int fault_num, unsigned long reason) 395void do_nmi(struct pt_regs *regs, int fault_num, unsigned long reason)
394{ 396{
397 nmi_enter();
395 switch (reason) { 398 switch (reason) {
399#ifdef arch_trigger_cpumask_backtrace
396 case TILE_NMI_DUMP_STACK: 400 case TILE_NMI_DUMP_STACK:
397 do_nmi_dump_stack(regs); 401 nmi_cpu_backtrace(regs);
398 break; 402 break;
403#endif
399 default: 404 default:
400 panic("Unexpected do_nmi type %ld", reason); 405 panic("Unexpected do_nmi type %ld", reason);
401 return;
402 } 406 }
407 nmi_exit();
403} 408}
404 409
405/* Deprecated function currently only used here. */ 410/* Deprecated function currently only used here. */