aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@ezchip.com>2015-05-04 17:26:35 -0400
committerChris Metcalf <cmetcalf@ezchip.com>2015-05-11 11:22:31 -0400
commite5701b74ccfdbbb0b4d9abcc7d0c569bf5e5375b (patch)
tree12fd2d5402bea262d4a191504f3ad25b5263ae3a
parentb4287df82991ca608f44d7ac12ad9b3bc39d5baa (diff)
tile: support delivering NMIs for multicore backtrace
A new hypervisor service was added some time ago (MDE 4.2.1 or later, or MDE 4.3 or later) that allows cores to request NMIs to be delivered to other cores. Use this facility to deliver a request that causes a backtrace to be generated on each core, and hook it into the magic SysRq functionality. Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com>
-rw-r--r--arch/tile/include/asm/irq.h5
-rw-r--r--arch/tile/include/asm/traps.h8
-rw-r--r--arch/tile/include/hv/hypervisor.h60
-rw-r--r--arch/tile/kernel/hvglue.S3
-rw-r--r--arch/tile/kernel/hvglue_trace.c4
-rw-r--r--arch/tile/kernel/intvec_64.S6
-rw-r--r--arch/tile/kernel/process.c101
-rw-r--r--arch/tile/kernel/traps.c12
8 files changed, 197 insertions, 2 deletions
diff --git a/arch/tile/include/asm/irq.h b/arch/tile/include/asm/irq.h
index 1fe86911838b..84a924034bdb 100644
--- a/arch/tile/include/asm/irq.h
+++ b/arch/tile/include/asm/irq.h
@@ -78,4 +78,9 @@ void tile_irq_activate(unsigned int irq, int tile_irq_type);
78 78
79void setup_irq_regs(void); 79void setup_irq_regs(void);
80 80
81#ifdef __tilegx__
82void arch_trigger_all_cpu_backtrace(bool self);
83#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
84#endif
85
81#endif /* _ASM_TILE_IRQ_H */ 86#endif /* _ASM_TILE_IRQ_H */
diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h
index 4b99a1c3aab2..11c82270c1f5 100644
--- a/arch/tile/include/asm/traps.h
+++ b/arch/tile/include/asm/traps.h
@@ -52,6 +52,14 @@ void do_timer_interrupt(struct pt_regs *, int fault_num);
52/* kernel/messaging.c */ 52/* kernel/messaging.c */
53void hv_message_intr(struct pt_regs *, int intnum); 53void hv_message_intr(struct pt_regs *, int intnum);
54 54
55#define TILE_NMI_DUMP_STACK 1 /* Dump stack for sysrq+'l' */
56
57/* kernel/process.c */
58void do_nmi_dump_stack(struct pt_regs *regs);
59
60/* kernel/traps.c */
61void do_nmi(struct pt_regs *, int fault_num, unsigned long reason);
62
55/* kernel/irq.c */ 63/* kernel/irq.c */
56void tile_dev_intr(struct pt_regs *, int intnum); 64void tile_dev_intr(struct pt_regs *, int intnum);
57 65
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h
index e0e6af4e783b..f10b332b3b65 100644
--- a/arch/tile/include/hv/hypervisor.h
+++ b/arch/tile/include/hv/hypervisor.h
@@ -321,8 +321,11 @@
321/** hv_console_set_ipi */ 321/** hv_console_set_ipi */
322#define HV_DISPATCH_CONSOLE_SET_IPI 63 322#define HV_DISPATCH_CONSOLE_SET_IPI 63
323 323
324/** hv_send_nmi */
325#define HV_DISPATCH_SEND_NMI 65
326
324/** One more than the largest dispatch value */ 327/** One more than the largest dispatch value */
325#define _HV_DISPATCH_END 64 328#define _HV_DISPATCH_END 66
326 329
327 330
328#ifndef __ASSEMBLER__ 331#ifndef __ASSEMBLER__
@@ -1253,6 +1256,11 @@ void hv_downcall_dispatch(void);
1253#define INT_DMATLB_ACCESS_DWNCL INT_DMA_CPL 1256#define INT_DMATLB_ACCESS_DWNCL INT_DMA_CPL
1254/** Device interrupt downcall interrupt vector */ 1257/** Device interrupt downcall interrupt vector */
1255#define INT_DEV_INTR_DWNCL INT_WORLD_ACCESS 1258#define INT_DEV_INTR_DWNCL INT_WORLD_ACCESS
1259/** NMI downcall interrupt vector */
1260#define INT_NMI_DWNCL 64
1261
1262#define HV_NMI_FLAG_FORCE 0x1 /**< Force an NMI downcall regardless of
1263 the ICS bit of the client. */
1256 1264
1257#ifndef __ASSEMBLER__ 1265#ifndef __ASSEMBLER__
1258 1266
@@ -1780,6 +1788,56 @@ int hv_dev_poll(int devhdl, __hv32 events, HV_IntArg intarg);
1780int hv_dev_poll_cancel(int devhdl); 1788int hv_dev_poll_cancel(int devhdl);
1781 1789
1782 1790
1791/** NMI information */
1792typedef struct
1793{
1794 /** Result: negative error, or HV_NMI_RESULT_xxx. */
1795 int result;
1796
1797 /** PC from interrupted remote core (if result != HV_NMI_RESULT_FAIL_HV). */
1798 HV_VirtAddr pc;
1799
1800} HV_NMI_Info;
1801
1802/** NMI issued successfully. */
1803#define HV_NMI_RESULT_OK 0
1804
1805/** NMI not issued: remote tile running at client PL with ICS set. */
1806#define HV_NMI_RESULT_FAIL_ICS 1
1807
1808/** NMI not issued: remote tile waiting in hypervisor. */
1809#define HV_NMI_RESULT_FAIL_HV 2
1810
1811/** Force an NMI downcall regardless of the ICS bit of the client. */
1812#define HV_NMI_FLAG_FORCE 0x1
1813
1814/** Send an NMI interrupt request to a particular tile.
1815 *
1816 * This will cause the NMI to be issued on the remote tile regardless
1817 * of the state of the client interrupt mask. However, if the remote
1818 * tile is in the hypervisor, it will not execute the NMI, and
1819 * HV_NMI_RESULT_FAIL_HV will be returned. Similarly, if the remote
1820 * tile is in a client interrupt critical section at the time of the
1821 * NMI, it will not execute the NMI, and HV_NMI_RESULT_FAIL_ICS will
1822 * be returned. In this second case, however, if HV_NMI_FLAG_FORCE
1823 * is set in flags, then the remote tile will enter its NMI interrupt
1824 * vector regardless. Forcing the NMI vector during an interrupt
1825 * critical section will mean that the client can not safely continue
1826 * execution after handling the interrupt.
1827 *
1828 * @param tile Tile to which the NMI request is sent.
1829 * @param info NMI information which is defined by and interpreted by the
1830 * supervisor, is passed to the specified tile, and is
1831 * stored in the SPR register SYSTEM_SAVE_{CLIENT_PL}_2 on the
1832 * specified tile when entering the NMI handler routine.
1833 * Typically, this parameter stores the NMI type, or an aligned
1834 * VA plus some special bits, etc.
1835 * @param flags Flags (HV_NMI_FLAG_xxx).
1836 * @return Information about the requested NMI.
1837 */
1838HV_NMI_Info hv_send_nmi(HV_Coord tile, unsigned long info, __hv64 flags);
1839
1840
1783/** Scatter-gather list for preada/pwritea calls. */ 1841/** Scatter-gather list for preada/pwritea calls. */
1784typedef struct 1842typedef struct
1785#if CHIP_VA_WIDTH() <= 32 1843#if CHIP_VA_WIDTH() <= 32
diff --git a/arch/tile/kernel/hvglue.S b/arch/tile/kernel/hvglue.S
index 2ab456622391..d78ee2ad610c 100644
--- a/arch/tile/kernel/hvglue.S
+++ b/arch/tile/kernel/hvglue.S
@@ -71,4 +71,5 @@ gensym hv_flush_all, 0x6e0, 32
71gensym hv_get_ipi_pte, 0x700, 32 71gensym hv_get_ipi_pte, 0x700, 32
72gensym hv_set_pte_super_shift, 0x720, 32 72gensym hv_set_pte_super_shift, 0x720, 32
73gensym hv_console_set_ipi, 0x7e0, 32 73gensym hv_console_set_ipi, 0x7e0, 32
74gensym hv_glue_internals, 0x800, 30720 74gensym hv_send_nmi, 0x820, 32
75gensym hv_glue_internals, 0x820, 30688
diff --git a/arch/tile/kernel/hvglue_trace.c b/arch/tile/kernel/hvglue_trace.c
index 85c74ad29312..add0d71395c6 100644
--- a/arch/tile/kernel/hvglue_trace.c
+++ b/arch/tile/kernel/hvglue_trace.c
@@ -75,6 +75,7 @@
75#define hv_get_ipi_pte _hv_get_ipi_pte 75#define hv_get_ipi_pte _hv_get_ipi_pte
76#define hv_set_pte_super_shift _hv_set_pte_super_shift 76#define hv_set_pte_super_shift _hv_set_pte_super_shift
77#define hv_console_set_ipi _hv_console_set_ipi 77#define hv_console_set_ipi _hv_console_set_ipi
78#define hv_send_nmi _hv_send_nmi
78#include <hv/hypervisor.h> 79#include <hv/hypervisor.h>
79#undef hv_init 80#undef hv_init
80#undef hv_install_context 81#undef hv_install_context
@@ -134,6 +135,7 @@
134#undef hv_get_ipi_pte 135#undef hv_get_ipi_pte
135#undef hv_set_pte_super_shift 136#undef hv_set_pte_super_shift
136#undef hv_console_set_ipi 137#undef hv_console_set_ipi
138#undef hv_send_nmi
137 139
138/* 140/*
139 * Provide macros based on <linux/syscalls.h> to provide a wrapper 141 * Provide macros based on <linux/syscalls.h> to provide a wrapper
@@ -264,3 +266,5 @@ HV_WRAP9(int, hv_flush_remote, HV_PhysAddr, cache_pa,
264 HV_VirtAddr, tlb_va, unsigned long, tlb_length, 266 HV_VirtAddr, tlb_va, unsigned long, tlb_length,
265 unsigned long, tlb_pgsize, unsigned long*, tlb_cpumask, 267 unsigned long, tlb_pgsize, unsigned long*, tlb_cpumask,
266 HV_Remote_ASID*, asids, int, asidcount) 268 HV_Remote_ASID*, asids, int, asidcount)
269HV_WRAP3(HV_NMI_Info, hv_send_nmi, HV_Coord, tile, unsigned long, info,
270 __hv64, flags)
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index 5b67efcecabd..800b91d3f9dc 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -515,6 +515,10 @@ intvec_\vecname:
515 .ifc \c_routine, handle_perf_interrupt 515 .ifc \c_routine, handle_perf_interrupt
516 mfspr r2, AUX_PERF_COUNT_STS 516 mfspr r2, AUX_PERF_COUNT_STS
517 .endif 517 .endif
518 .ifc \c_routine, do_nmi
519 mfspr r2, SPR_SYSTEM_SAVE_K_2 /* nmi type */
520 .else
521 .endif
518 .endif 522 .endif
519 .endif 523 .endif
520 .endif 524 .endif
@@ -1571,3 +1575,5 @@ intrpt_start:
1571 1575
1572 /* Synthetic interrupt delivered only by the simulator */ 1576 /* Synthetic interrupt delivered only by the simulator */
1573 int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint 1577 int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint
1578 /* Synthetic interrupt delivered by hv */
1579 int_hand INT_NMI_DWNCL, NMI_DWNCL, do_nmi, handle_nmi
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index b403c2e3e263..0dddcf7e5bfa 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -27,6 +27,7 @@
27#include <linux/kernel.h> 27#include <linux/kernel.h>
28#include <linux/tracehook.h> 28#include <linux/tracehook.h>
29#include <linux/signal.h> 29#include <linux/signal.h>
30#include <linux/delay.h>
30#include <linux/context_tracking.h> 31#include <linux/context_tracking.h>
31#include <asm/stack.h> 32#include <asm/stack.h>
32#include <asm/switch_to.h> 33#include <asm/switch_to.h>
@@ -574,3 +575,103 @@ void show_regs(struct pt_regs *regs)
574 575
575 dump_stack_regs(regs); 576 dump_stack_regs(regs);
576} 577}
578
579/* To ensure stack dump on tiles occurs one by one. */
580static DEFINE_SPINLOCK(backtrace_lock);
581/* To ensure no backtrace occurs before all of the stack dump are done. */
582static atomic_t backtrace_cpus;
583/* The cpu mask to avoid reentrance. */
584static struct cpumask backtrace_mask;
585
586void do_nmi_dump_stack(struct pt_regs *regs)
587{
588 int is_idle = is_idle_task(current) && !in_interrupt();
589 int cpu;
590
591 nmi_enter();
592 cpu = smp_processor_id();
593 if (WARN_ON_ONCE(!cpumask_test_and_clear_cpu(cpu, &backtrace_mask)))
594 goto done;
595
596 spin_lock(&backtrace_lock);
597 if (is_idle)
598 pr_info("CPU: %d idle\n", cpu);
599 else
600 show_regs(regs);
601 spin_unlock(&backtrace_lock);
602 atomic_dec(&backtrace_cpus);
603done:
604 nmi_exit();
605}
606
607#ifdef __tilegx__
608void arch_trigger_all_cpu_backtrace(bool self)
609{
610 struct cpumask mask;
611 HV_Coord tile;
612 unsigned int timeout;
613 int cpu;
614 int ongoing;
615 HV_NMI_Info info[NR_CPUS];
616
617 ongoing = atomic_cmpxchg(&backtrace_cpus, 0, num_online_cpus() - 1);
618 if (ongoing != 0) {
619 pr_err("Trying to do all-cpu backtrace.\n");
620 pr_err("But another all-cpu backtrace is ongoing (%d cpus left)\n",
621 ongoing);
622 if (self) {
623 pr_err("Reporting the stack on this cpu only.\n");
624 dump_stack();
625 }
626 return;
627 }
628
629 cpumask_copy(&mask, cpu_online_mask);
630 cpumask_clear_cpu(smp_processor_id(), &mask);
631 cpumask_copy(&backtrace_mask, &mask);
632
633 /* Backtrace for myself first. */
634 if (self)
635 dump_stack();
636
637 /* Tentatively dump stack on remote tiles via NMI. */
638 timeout = 100;
639 while (!cpumask_empty(&mask) && timeout) {
640 for_each_cpu(cpu, &mask) {
641 tile.x = cpu_x(cpu);
642 tile.y = cpu_y(cpu);
643 info[cpu] = hv_send_nmi(tile, TILE_NMI_DUMP_STACK, 0);
644 if (info[cpu].result == HV_NMI_RESULT_OK)
645 cpumask_clear_cpu(cpu, &mask);
646 }
647
648 mdelay(10);
649 timeout--;
650 }
651
652 /* Warn about cpus stuck in ICS and decrement their counts here. */
653 if (!cpumask_empty(&mask)) {
654 for_each_cpu(cpu, &mask) {
655 switch (info[cpu].result) {
656 case HV_NMI_RESULT_FAIL_ICS:
657 pr_warn("Skipping stack dump of cpu %d in ICS at pc %#llx\n",
658 cpu, info[cpu].pc);
659 break;
660 case HV_NMI_RESULT_FAIL_HV:
661 pr_warn("Skipping stack dump of cpu %d in hypervisor\n",
662 cpu);
663 break;
664 case HV_ENOSYS:
665 pr_warn("Hypervisor too old to allow remote stack dumps.\n");
666 goto skip_for_each;
667 default: /* should not happen */
668 pr_warn("Skipping stack dump of cpu %d [%d,%#llx]\n",
669 cpu, info[cpu].result, info[cpu].pc);
670 break;
671 }
672 }
673skip_for_each:
674 atomic_sub(cpumask_weight(&mask), &backtrace_cpus);
675 }
676}
677#endif /* __tilegx_ */
diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c
index 312fc134c1cb..855f7316f1ee 100644
--- a/arch/tile/kernel/traps.c
+++ b/arch/tile/kernel/traps.c
@@ -395,6 +395,18 @@ done:
395 exception_exit(prev_state); 395 exception_exit(prev_state);
396} 396}
397 397
398void do_nmi(struct pt_regs *regs, int fault_num, unsigned long reason)
399{
400 switch (reason) {
401 case TILE_NMI_DUMP_STACK:
402 do_nmi_dump_stack(regs);
403 break;
404 default:
405 panic("Unexpected do_nmi type %ld", reason);
406 return;
407 }
408}
409
398void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52) 410void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
399{ 411{
400 _dump_stack(dummy, pc, lr, sp, r52); 412 _dump_stack(dummy, pc, lr, sp, r52);