aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorMike Travis <travis@sgi.com>2013-09-23 17:25:01 -0400
committerIngo Molnar <mingo@kernel.org>2013-09-24 03:02:02 -0400
commit0d12ef0c900078cc1f4e78dff2245521aa5d0c89 (patch)
tree153aa438154d72d7fd838bd6c26b594062654206 /arch
parent1e019421bca68cfae1a61a09d9d49cf6a9e2143b (diff)
x86/UV: Update UV support for external NMI signals
The current UV NMI handler has not been updated for the changes in the system NMI handler and the perf operations. The UV NMI handler reads an MMR in the UV Hub to check to see if the NMI event was caused by the external 'system NMI' that the operator can initiate on the System Mgmt Controller. The problem arises when the perf tools are running, causing millions of perf events per second on very large CPU count systems. Previously this was okay because the perf NMI handler ran at a higher priority on the NMI call chain and if the NMI was a perf event, it would stop calling other NMI handlers remaining on the NMI call chain. Now the system NMI handler calls all the handlers on the NMI call chain including the UV NMI handler. This causes the UV NMI handler to read the MMRs at the same millions per second rate. This can lead to significant performance loss and possible system failures. It also can cause thousands of 'Dazed and Confused' messages being sent to the system console. This effectively makes perf tools unusable on UV systems. To avoid this excessive overhead when perf tools are running, this code has been optimized to minimize reading of the MMRs as much as possible, by moving to the NMI_UNKNOWN notifier chain. This chain is called only when all the users on the standard NMI_LOCAL call chain have been called and none of them have claimed this NMI. There is an exception where the NMI_LOCAL notifier chain is used. When the perf tools are in use, it's possible that the UV NMI was captured by some other NMI handler and then either ignored or mistakenly processed as a perf event. We set a per_cpu ('ping') flag for those CPUs that ignored the initial NMI, and then send them an IPI NMI signal. The NMI_LOCAL handler on each cpu does not need to read the MMR, but instead checks the in memory flag indicating it was pinged. There are two module variables, 'ping_count' indicating how many requested NMI events occurred, and 'ping_misses' indicating how many stray NMI events. These most likely are perf events so it shows the overhead of the perf NMI interrupts and how many MMR reads were avoided. This patch also minimizes the reads of the MMRs by having the first cpu entering the NMI handler on each node set a per HUB in-memory atomic value. (Having a per HUB value avoids sending lock traffic over NumaLink.) Both types of UV NMIs from the SMI layer are supported. Signed-off-by: Mike Travis <travis@sgi.com> Reviewed-by: Dimitri Sivanich <sivanich@sgi.com> Reviewed-by: Hedi Berriche <hedi@sgi.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> Cc: Jason Wessel <jason.wessel@windriver.com> Link: http://lkml.kernel.org/r/20130923212500.353547733@asylum.americas.sgi.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h57
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h31
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c1
-rw-r--r--arch/x86/platform/uv/uv_nmi.c553
4 files changed, 600 insertions, 42 deletions
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 2c32df95bb78..a30836c8ac4d 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -502,8 +502,8 @@ struct uv_blade_info {
502 unsigned short nr_online_cpus; 502 unsigned short nr_online_cpus;
503 unsigned short pnode; 503 unsigned short pnode;
504 short memory_nid; 504 short memory_nid;
505 spinlock_t nmi_lock; 505 spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */
506 unsigned long nmi_count; 506 unsigned long nmi_count; /* obsolete, see uv_hub_nmi */
507}; 507};
508extern struct uv_blade_info *uv_blade_info; 508extern struct uv_blade_info *uv_blade_info;
509extern short *uv_node_to_blade; 509extern short *uv_node_to_blade;
@@ -576,6 +576,59 @@ static inline int uv_num_possible_blades(void)
576 return uv_possible_blades; 576 return uv_possible_blades;
577} 577}
578 578
579/* Per Hub NMI support */
580extern void uv_nmi_setup(void);
581
582/* BMC sets a bit this MMR non-zero before sending an NMI */
583#define UVH_NMI_MMR UVH_SCRATCH5
584#define UVH_NMI_MMR_CLEAR UVH_SCRATCH5_ALIAS
585#define UVH_NMI_MMR_SHIFT 63
586#define UVH_NMI_MMR_TYPE "SCRATCH5"
587
588/* Newer SMM NMI handler, not present in all systems */
589#define UVH_NMI_MMRX UVH_EVENT_OCCURRED0
590#define UVH_NMI_MMRX_CLEAR UVH_EVENT_OCCURRED0_ALIAS
591#define UVH_NMI_MMRX_SHIFT (is_uv1_hub() ? \
592 UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT :\
593 UVXH_EVENT_OCCURRED0_EXTIO_INT0_SHFT)
594#define UVH_NMI_MMRX_TYPE "EXTIO_INT0"
595
596/* Non-zero indicates newer SMM NMI handler present */
597#define UVH_NMI_MMRX_SUPPORTED UVH_EXTIO_INT0_BROADCAST
598
599/* Indicates to BIOS that we want to use the newer SMM NMI handler */
600#define UVH_NMI_MMRX_REQ UVH_SCRATCH5_ALIAS_2
601#define UVH_NMI_MMRX_REQ_SHIFT 62
602
603struct uv_hub_nmi_s {
604 raw_spinlock_t nmi_lock;
605 atomic_t in_nmi; /* flag this node in UV NMI IRQ */
606 atomic_t cpu_owner; /* last locker of this struct */
607 atomic_t read_mmr_count; /* count of MMR reads */
608 atomic_t nmi_count; /* count of true UV NMIs */
609 unsigned long nmi_value; /* last value read from NMI MMR */
610};
611
612struct uv_cpu_nmi_s {
613 struct uv_hub_nmi_s *hub;
614 atomic_t state;
615 atomic_t pinging;
616 int queries;
617 int pings;
618};
619
620DECLARE_PER_CPU(struct uv_cpu_nmi_s, __uv_cpu_nmi);
621#define uv_cpu_nmi (__get_cpu_var(__uv_cpu_nmi))
622#define uv_hub_nmi (uv_cpu_nmi.hub)
623#define uv_cpu_nmi_per(cpu) (per_cpu(__uv_cpu_nmi, cpu))
624#define uv_hub_nmi_per(cpu) (uv_cpu_nmi_per(cpu).hub)
625
626/* uv_cpu_nmi_states */
627#define UV_NMI_STATE_OUT 0
628#define UV_NMI_STATE_IN 1
629#define UV_NMI_STATE_DUMP 2
630#define UV_NMI_STATE_DUMP_DONE 3
631
579/* Update SCIR state */ 632/* Update SCIR state */
580static inline void uv_set_scir_bits(unsigned char value) 633static inline void uv_set_scir_bits(unsigned char value)
581{ 634{
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index bd5f80e58a23..e42249bcf7e1 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -461,6 +461,23 @@ union uvh_event_occurred0_u {
461 461
462 462
463/* ========================================================================= */ 463/* ========================================================================= */
464/* UVH_EXTIO_INT0_BROADCAST */
465/* ========================================================================= */
466#define UVH_EXTIO_INT0_BROADCAST 0x61448UL
467#define UVH_EXTIO_INT0_BROADCAST_32 0x3f0
468
469#define UVH_EXTIO_INT0_BROADCAST_ENABLE_SHFT 0
470#define UVH_EXTIO_INT0_BROADCAST_ENABLE_MASK 0x0000000000000001UL
471
472union uvh_extio_int0_broadcast_u {
473 unsigned long v;
474 struct uvh_extio_int0_broadcast_s {
475 unsigned long enable:1; /* RW */
476 unsigned long rsvd_1_63:63;
477 } s;
478};
479
480/* ========================================================================= */
464/* UVH_GR0_TLB_INT0_CONFIG */ 481/* UVH_GR0_TLB_INT0_CONFIG */
465/* ========================================================================= */ 482/* ========================================================================= */
466#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL 483#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL
@@ -2606,6 +2623,20 @@ union uvh_scratch5_u {
2606}; 2623};
2607 2624
2608/* ========================================================================= */ 2625/* ========================================================================= */
2626/* UVH_SCRATCH5_ALIAS */
2627/* ========================================================================= */
2628#define UVH_SCRATCH5_ALIAS 0x2d0208UL
2629#define UVH_SCRATCH5_ALIAS_32 0x780
2630
2631
2632/* ========================================================================= */
2633/* UVH_SCRATCH5_ALIAS_2 */
2634/* ========================================================================= */
2635#define UVH_SCRATCH5_ALIAS_2 0x2d0210UL
2636#define UVH_SCRATCH5_ALIAS_2_32 0x788
2637
2638
2639/* ========================================================================= */
2609/* UVXH_EVENT_OCCURRED2 */ 2640/* UVXH_EVENT_OCCURRED2 */
2610/* ========================================================================= */ 2641/* ========================================================================= */
2611#define UVXH_EVENT_OCCURRED2 0x70100UL 2642#define UVXH_EVENT_OCCURRED2 0x70100UL
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 9e47c06ae5ab..0a5a4b8ae36c 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -977,6 +977,7 @@ void __init uv_system_init(void)
977 map_mmr_high(max_pnode); 977 map_mmr_high(max_pnode);
978 map_mmioh_high(min_pnode, max_pnode); 978 map_mmioh_high(min_pnode, max_pnode);
979 979
980 uv_nmi_setup();
980 uv_cpu_init(); 981 uv_cpu_init();
981 uv_scir_register_cpu_notifier(); 982 uv_scir_register_cpu_notifier();
982 uv_register_nmi_notifier(); 983 uv_register_nmi_notifier();
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index 37feb60618b1..fb02ea7d2b2d 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -20,72 +20,518 @@
20 */ 20 */
21 21
22#include <linux/cpu.h> 22#include <linux/cpu.h>
23#include <linux/delay.h>
24#include <linux/module.h>
23#include <linux/nmi.h> 25#include <linux/nmi.h>
26#include <linux/sched.h>
27#include <linux/slab.h>
24 28
25#include <asm/apic.h> 29#include <asm/apic.h>
30#include <asm/current.h>
31#include <asm/kdebug.h>
32#include <asm/local64.h>
26#include <asm/nmi.h> 33#include <asm/nmi.h>
27#include <asm/uv/uv.h> 34#include <asm/uv/uv.h>
28#include <asm/uv/uv_hub.h> 35#include <asm/uv/uv_hub.h>
29#include <asm/uv/uv_mmrs.h> 36#include <asm/uv/uv_mmrs.h>
30 37
31/* BMC sets a bit this MMR non-zero before sending an NMI */ 38/*
32#define UVH_NMI_MMR UVH_SCRATCH5 39 * UV handler for NMI
33#define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8) 40 *
34#define UV_NMI_PENDING_MASK (1UL << 63) 41 * Handle system-wide NMI events generated by the global 'power nmi' command.
35DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count); 42 *
36static DEFINE_SPINLOCK(uv_nmi_lock); 43 * Basic operation is to field the NMI interrupt on each cpu and wait
44 * until all cpus have arrived into the nmi handler. If some cpus do not
45 * make it into the handler, try and force them in with the IPI(NMI) signal.
46 *
47 * We also have to lessen UV Hub MMR accesses as much as possible as this
48 * disrupts the UV Hub's primary mission of directing NumaLink traffic and
49 * can cause system problems to occur.
50 *
51 * To do this we register our primary NMI notifier on the NMI_UNKNOWN
52 * chain. This reduces the number of false NMI calls when the perf
53 * tools are running which generate an enormous number of NMIs per
54 * second (~4M/s for 1024 cpu threads). Our secondary NMI handler is
55 * very short as it only checks that if it has been "pinged" with the
56 * IPI(NMI) signal as mentioned above, and does not read the UV Hub's MMR.
57 *
58 */
59
60static struct uv_hub_nmi_s **uv_hub_nmi_list;
61
62DEFINE_PER_CPU(struct uv_cpu_nmi_s, __uv_cpu_nmi);
63EXPORT_PER_CPU_SYMBOL_GPL(__uv_cpu_nmi);
64
65static unsigned long nmi_mmr;
66static unsigned long nmi_mmr_clear;
67static unsigned long nmi_mmr_pending;
68
69static atomic_t uv_in_nmi;
70static atomic_t uv_nmi_cpu = ATOMIC_INIT(-1);
71static atomic_t uv_nmi_cpus_in_nmi = ATOMIC_INIT(-1);
72static atomic_t uv_nmi_slave_continue;
73static cpumask_var_t uv_nmi_cpu_mask;
74
75/* Values for uv_nmi_slave_continue */
76#define SLAVE_CLEAR 0
77#define SLAVE_CONTINUE 1
78#define SLAVE_EXIT 2
37 79
38/* 80/*
39 * When NMI is received, print a stack trace. 81 * Default is all stack dumps go to the console and buffer.
82 * Lower level to send to log buffer only.
40 */ 83 */
41int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) 84static int uv_nmi_loglevel = 7;
85module_param_named(dump_loglevel, uv_nmi_loglevel, int, 0644);
86
87/*
88 * The following values show statistics on how perf events are affecting
89 * this system.
90 */
91static int param_get_local64(char *buffer, const struct kernel_param *kp)
42{ 92{
43 unsigned long real_uv_nmi; 93 return sprintf(buffer, "%lu\n", local64_read((local64_t *)kp->arg));
44 int bid; 94}
45 95
46 /* 96static int param_set_local64(const char *val, const struct kernel_param *kp)
47 * Each blade has an MMR that indicates when an NMI has been sent 97{
48 * to cpus on the blade. If an NMI is detected, atomically 98 /* clear on any write */
49 * clear the MMR and update a per-blade NMI count used to 99 local64_set((local64_t *)kp->arg, 0);
50 * cause each cpu on the blade to notice a new NMI. 100 return 0;
51 */ 101}
52 bid = uv_numa_blade_id(); 102
53 real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); 103static struct kernel_param_ops param_ops_local64 = {
54 104 .get = param_get_local64,
55 if (unlikely(real_uv_nmi)) { 105 .set = param_set_local64,
56 spin_lock(&uv_blade_info[bid].nmi_lock); 106};
57 real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & 107#define param_check_local64(name, p) __param_check(name, p, local64_t)
58 UV_NMI_PENDING_MASK); 108
59 if (real_uv_nmi) { 109static local64_t uv_nmi_count;
60 uv_blade_info[bid].nmi_count++; 110module_param_named(nmi_count, uv_nmi_count, local64, 0644);
61 uv_write_local_mmr(UVH_NMI_MMR_CLEAR, 111
62 UV_NMI_PENDING_MASK); 112static local64_t uv_nmi_misses;
113module_param_named(nmi_misses, uv_nmi_misses, local64, 0644);
114
115static local64_t uv_nmi_ping_count;
116module_param_named(ping_count, uv_nmi_ping_count, local64, 0644);
117
118static local64_t uv_nmi_ping_misses;
119module_param_named(ping_misses, uv_nmi_ping_misses, local64, 0644);
120
121/*
122 * Following values allow tuning for large systems under heavy loading
123 */
124static int uv_nmi_initial_delay = 100;
125module_param_named(initial_delay, uv_nmi_initial_delay, int, 0644);
126
127static int uv_nmi_slave_delay = 100;
128module_param_named(slave_delay, uv_nmi_slave_delay, int, 0644);
129
130static int uv_nmi_loop_delay = 100;
131module_param_named(loop_delay, uv_nmi_loop_delay, int, 0644);
132
133static int uv_nmi_trigger_delay = 10000;
134module_param_named(trigger_delay, uv_nmi_trigger_delay, int, 0644);
135
136static int uv_nmi_wait_count = 100;
137module_param_named(wait_count, uv_nmi_wait_count, int, 0644);
138
139static int uv_nmi_retry_count = 500;
140module_param_named(retry_count, uv_nmi_retry_count, int, 0644);
141
142/* Setup which NMI support is present in system */
143static void uv_nmi_setup_mmrs(void)
144{
145 if (uv_read_local_mmr(UVH_NMI_MMRX_SUPPORTED)) {
146 uv_write_local_mmr(UVH_NMI_MMRX_REQ,
147 1UL << UVH_NMI_MMRX_REQ_SHIFT);
148 nmi_mmr = UVH_NMI_MMRX;
149 nmi_mmr_clear = UVH_NMI_MMRX_CLEAR;
150 nmi_mmr_pending = 1UL << UVH_NMI_MMRX_SHIFT;
151 pr_info("UV: SMI NMI support: %s\n", UVH_NMI_MMRX_TYPE);
152 } else {
153 nmi_mmr = UVH_NMI_MMR;
154 nmi_mmr_clear = UVH_NMI_MMR_CLEAR;
155 nmi_mmr_pending = 1UL << UVH_NMI_MMR_SHIFT;
156 pr_info("UV: SMI NMI support: %s\n", UVH_NMI_MMR_TYPE);
157 }
158}
159
160/* Read NMI MMR and check if NMI flag was set by BMC. */
161static inline int uv_nmi_test_mmr(struct uv_hub_nmi_s *hub_nmi)
162{
163 hub_nmi->nmi_value = uv_read_local_mmr(nmi_mmr);
164 atomic_inc(&hub_nmi->read_mmr_count);
165 return !!(hub_nmi->nmi_value & nmi_mmr_pending);
166}
167
168static inline void uv_local_mmr_clear_nmi(void)
169{
170 uv_write_local_mmr(nmi_mmr_clear, nmi_mmr_pending);
171}
172
173/*
174 * If first cpu in on this hub, set hub_nmi "in_nmi" and "owner" values and
175 * return true. If first cpu in on the system, set global "in_nmi" flag.
176 */
177static int uv_set_in_nmi(int cpu, struct uv_hub_nmi_s *hub_nmi)
178{
179 int first = atomic_add_unless(&hub_nmi->in_nmi, 1, 1);
180
181 if (first) {
182 atomic_set(&hub_nmi->cpu_owner, cpu);
183 if (atomic_add_unless(&uv_in_nmi, 1, 1))
184 atomic_set(&uv_nmi_cpu, cpu);
185
186 atomic_inc(&hub_nmi->nmi_count);
187 }
188 return first;
189}
190
191/* Check if this is a system NMI event */
192static int uv_check_nmi(struct uv_hub_nmi_s *hub_nmi)
193{
194 int cpu = smp_processor_id();
195 int nmi = 0;
196
197 local64_inc(&uv_nmi_count);
198 uv_cpu_nmi.queries++;
199
200 do {
201 nmi = atomic_read(&hub_nmi->in_nmi);
202 if (nmi)
203 break;
204
205 if (raw_spin_trylock(&hub_nmi->nmi_lock)) {
206
207 /* check hub MMR NMI flag */
208 if (uv_nmi_test_mmr(hub_nmi)) {
209 uv_set_in_nmi(cpu, hub_nmi);
210 nmi = 1;
211 break;
212 }
213
214 /* MMR NMI flag is clear */
215 raw_spin_unlock(&hub_nmi->nmi_lock);
216
217 } else {
218 /* wait a moment for the hub nmi locker to set flag */
219 cpu_relax();
220 udelay(uv_nmi_slave_delay);
221
222 /* re-check hub in_nmi flag */
223 nmi = atomic_read(&hub_nmi->in_nmi);
224 if (nmi)
225 break;
226 }
227
228 /* check if this BMC missed setting the MMR NMI flag */
229 if (!nmi) {
230 nmi = atomic_read(&uv_in_nmi);
231 if (nmi)
232 uv_set_in_nmi(cpu, hub_nmi);
233 }
234
235 } while (0);
236
237 if (!nmi)
238 local64_inc(&uv_nmi_misses);
239
240 return nmi;
241}
242
243/* Need to reset the NMI MMR register, but only once per hub. */
244static inline void uv_clear_nmi(int cpu)
245{
246 struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi;
247
248 if (cpu == atomic_read(&hub_nmi->cpu_owner)) {
249 atomic_set(&hub_nmi->cpu_owner, -1);
250 atomic_set(&hub_nmi->in_nmi, 0);
251 uv_local_mmr_clear_nmi();
252 raw_spin_unlock(&hub_nmi->nmi_lock);
253 }
254}
255
256/* Print non-responding cpus */
257static void uv_nmi_nr_cpus_pr(char *fmt)
258{
259 static char cpu_list[1024];
260 int len = sizeof(cpu_list);
261 int c = cpumask_weight(uv_nmi_cpu_mask);
262 int n = cpulist_scnprintf(cpu_list, len, uv_nmi_cpu_mask);
263
264 if (n >= len-1)
265 strcpy(&cpu_list[len - 6], "...\n");
266
267 printk(fmt, c, cpu_list);
268}
269
270/* Ping non-responding cpus attemping to force them into the NMI handler */
271static void uv_nmi_nr_cpus_ping(void)
272{
273 int cpu;
274
275 for_each_cpu(cpu, uv_nmi_cpu_mask)
276 atomic_set(&uv_cpu_nmi_per(cpu).pinging, 1);
277
278 apic->send_IPI_mask(uv_nmi_cpu_mask, APIC_DM_NMI);
279}
280
281/* Clean up flags for cpus that ignored both NMI and ping */
282static void uv_nmi_cleanup_mask(void)
283{
284 int cpu;
285
286 for_each_cpu(cpu, uv_nmi_cpu_mask) {
287 atomic_set(&uv_cpu_nmi_per(cpu).pinging, 0);
288 atomic_set(&uv_cpu_nmi_per(cpu).state, UV_NMI_STATE_OUT);
289 cpumask_clear_cpu(cpu, uv_nmi_cpu_mask);
290 }
291}
292
293/* Loop waiting as cpus enter nmi handler */
294static int uv_nmi_wait_cpus(int first)
295{
296 int i, j, k, n = num_online_cpus();
297 int last_k = 0, waiting = 0;
298
299 if (first) {
300 cpumask_copy(uv_nmi_cpu_mask, cpu_online_mask);
301 k = 0;
302 } else {
303 k = n - cpumask_weight(uv_nmi_cpu_mask);
304 }
305
306 udelay(uv_nmi_initial_delay);
307 for (i = 0; i < uv_nmi_retry_count; i++) {
308 int loop_delay = uv_nmi_loop_delay;
309
310 for_each_cpu(j, uv_nmi_cpu_mask) {
311 if (atomic_read(&uv_cpu_nmi_per(j).state)) {
312 cpumask_clear_cpu(j, uv_nmi_cpu_mask);
313 if (++k >= n)
314 break;
315 }
316 }
317 if (k >= n) { /* all in? */
318 k = n;
319 break;
320 }
321 if (last_k != k) { /* abort if no new cpus coming in */
322 last_k = k;
323 waiting = 0;
324 } else if (++waiting > uv_nmi_wait_count)
325 break;
326
327 /* extend delay if waiting only for cpu 0 */
328 if (waiting && (n - k) == 1 &&
329 cpumask_test_cpu(0, uv_nmi_cpu_mask))
330 loop_delay *= 100;
331
332 udelay(loop_delay);
333 }
334 atomic_set(&uv_nmi_cpus_in_nmi, k);
335 return n - k;
336}
337
338/* Wait until all slave cpus have entered UV NMI handler */
339static void uv_nmi_wait(int master)
340{
341 /* indicate this cpu is in */
342 atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_IN);
343
344 /* if not the first cpu in (the master), then we are a slave cpu */
345 if (!master)
346 return;
347
348 do {
349 /* wait for all other cpus to gather here */
350 if (!uv_nmi_wait_cpus(1))
351 break;
352
353 /* if not all made it in, send IPI NMI to them */
354 uv_nmi_nr_cpus_pr(KERN_ALERT
355 "UV: Sending NMI IPI to %d non-responding CPUs: %s\n");
356 uv_nmi_nr_cpus_ping();
357
358 /* if all cpus are in, then done */
359 if (!uv_nmi_wait_cpus(0))
360 break;
361
362 uv_nmi_nr_cpus_pr(KERN_ALERT
363 "UV: %d CPUs not in NMI loop: %s\n");
364 } while (0);
365
366 pr_alert("UV: %d of %d CPUs in NMI\n",
367 atomic_read(&uv_nmi_cpus_in_nmi), num_online_cpus());
368}
369
370/* Dump this cpu's state */
371static void uv_nmi_dump_state_cpu(int cpu, struct pt_regs *regs)
372{
373 const char *dots = " ................................. ";
374
375 printk(KERN_DEFAULT "UV:%sNMI process trace for CPU %d\n", dots, cpu);
376 show_regs(regs);
377 atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_DUMP_DONE);
378}
379
380/* Trigger a slave cpu to dump it's state */
381static void uv_nmi_trigger_dump(int cpu)
382{
383 int retry = uv_nmi_trigger_delay;
384
385 if (atomic_read(&uv_cpu_nmi_per(cpu).state) != UV_NMI_STATE_IN)
386 return;
387
388 atomic_set(&uv_cpu_nmi_per(cpu).state, UV_NMI_STATE_DUMP);
389 do {
390 cpu_relax();
391 udelay(10);
392 if (atomic_read(&uv_cpu_nmi_per(cpu).state)
393 != UV_NMI_STATE_DUMP)
394 return;
395 } while (--retry > 0);
396
397 pr_crit("UV: CPU %d stuck in process dump function\n", cpu);
398 atomic_set(&uv_cpu_nmi_per(cpu).state, UV_NMI_STATE_DUMP_DONE);
399}
400
401/* Wait until all cpus ready to exit */
402static void uv_nmi_sync_exit(int master)
403{
404 atomic_dec(&uv_nmi_cpus_in_nmi);
405 if (master) {
406 while (atomic_read(&uv_nmi_cpus_in_nmi) > 0)
407 cpu_relax();
408 atomic_set(&uv_nmi_slave_continue, SLAVE_CLEAR);
409 } else {
410 while (atomic_read(&uv_nmi_slave_continue))
411 cpu_relax();
412 }
413}
414
415/* Walk through cpu list and dump state of each */
416static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master)
417{
418 if (master) {
419 int tcpu;
420 int ignored = 0;
421 int saved_console_loglevel = console_loglevel;
422
423 pr_alert("UV: tracing processes for %d CPUs from CPU %d\n",
424 atomic_read(&uv_nmi_cpus_in_nmi), cpu);
425
426 console_loglevel = uv_nmi_loglevel;
427 atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT);
428 for_each_online_cpu(tcpu) {
429 if (cpumask_test_cpu(tcpu, uv_nmi_cpu_mask))
430 ignored++;
431 else if (tcpu == cpu)
432 uv_nmi_dump_state_cpu(tcpu, regs);
433 else
434 uv_nmi_trigger_dump(tcpu);
63 } 435 }
64 spin_unlock(&uv_blade_info[bid].nmi_lock); 436 if (ignored)
437 printk(KERN_DEFAULT "UV: %d CPUs ignored NMI\n",
438 ignored);
439
440 console_loglevel = saved_console_loglevel;
441 pr_alert("UV: process trace complete\n");
442 } else {
443 while (!atomic_read(&uv_nmi_slave_continue))
444 cpu_relax();
445 while (atomic_read(&uv_cpu_nmi.state) != UV_NMI_STATE_DUMP)
446 cpu_relax();
447 uv_nmi_dump_state_cpu(cpu, regs);
65 } 448 }
449 uv_nmi_sync_exit(master);
450}
66 451
67 if (likely(__get_cpu_var(cpu_last_nmi_count) == 452static void uv_nmi_touch_watchdogs(void)
68 uv_blade_info[bid].nmi_count)) 453{
454 touch_softlockup_watchdog_sync();
455 clocksource_touch_watchdog();
456 rcu_cpu_stall_reset();
457 touch_nmi_watchdog();
458}
459
460/*
461 * UV NMI handler
462 */
463int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
464{
465 struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi;
466 int cpu = smp_processor_id();
467 int master = 0;
468 unsigned long flags;
469
470 local_irq_save(flags);
471
472 /* If not a UV System NMI, ignore */
473 if (!atomic_read(&uv_cpu_nmi.pinging) && !uv_check_nmi(hub_nmi)) {
474 local_irq_restore(flags);
69 return NMI_DONE; 475 return NMI_DONE;
476 }
70 477
71 __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; 478 /* Indicate we are the first CPU into the NMI handler */
479 master = (atomic_read(&uv_nmi_cpu) == cpu);
72 480
73 /* 481 /* Pause as all cpus enter the NMI handler */
74 * Use a lock so only one cpu prints at a time. 482 uv_nmi_wait(master);
75 * This prevents intermixed output. 483
76 */ 484 /* Dump state of each cpu */
77 spin_lock(&uv_nmi_lock); 485 uv_nmi_dump_state(cpu, regs, master);
78 pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id()); 486
79 dump_stack(); 487 /* Clear per_cpu "in nmi" flag */
80 spin_unlock(&uv_nmi_lock); 488 atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_OUT);
489
490 /* Clear MMR NMI flag on each hub */
491 uv_clear_nmi(cpu);
492
493 /* Clear global flags */
494 if (master) {
495 if (cpumask_weight(uv_nmi_cpu_mask))
496 uv_nmi_cleanup_mask();
497 atomic_set(&uv_nmi_cpus_in_nmi, -1);
498 atomic_set(&uv_nmi_cpu, -1);
499 atomic_set(&uv_in_nmi, 0);
500 }
501
502 uv_nmi_touch_watchdogs();
503 local_irq_restore(flags);
81 504
82 return NMI_HANDLED; 505 return NMI_HANDLED;
83} 506}
84 507
508/*
509 * NMI handler for pulling in CPUs when perf events are grabbing our NMI
510 */
511int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs)
512{
513 int ret;
514
515 uv_cpu_nmi.queries++;
516 if (!atomic_read(&uv_cpu_nmi.pinging)) {
517 local64_inc(&uv_nmi_ping_misses);
518 return NMI_DONE;
519 }
520
521 uv_cpu_nmi.pings++;
522 local64_inc(&uv_nmi_ping_count);
523 ret = uv_handle_nmi(reason, regs);
524 atomic_set(&uv_cpu_nmi.pinging, 0);
525 return ret;
526}
527
85void uv_register_nmi_notifier(void) 528void uv_register_nmi_notifier(void)
86{ 529{
87 if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv")) 530 if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv"))
88 pr_warn("UV NMI handler failed to register\n"); 531 pr_warn("UV: NMI handler failed to register\n");
532
533 if (register_nmi_handler(NMI_LOCAL, uv_handle_nmi_ping, 0, "uvping"))
534 pr_warn("UV: PING NMI handler failed to register\n");
89} 535}
90 536
91void uv_nmi_init(void) 537void uv_nmi_init(void)
@@ -100,3 +546,30 @@ void uv_nmi_init(void)
100 apic_write(APIC_LVT1, value); 546 apic_write(APIC_LVT1, value);
101} 547}
102 548
549void uv_nmi_setup(void)
550{
551 int size = sizeof(void *) * (1 << NODES_SHIFT);
552 int cpu, nid;
553
554 /* Setup hub nmi info */
555 uv_nmi_setup_mmrs();
556 uv_hub_nmi_list = kzalloc(size, GFP_KERNEL);
557 pr_info("UV: NMI hub list @ 0x%p (%d)\n", uv_hub_nmi_list, size);
558 BUG_ON(!uv_hub_nmi_list);
559 size = sizeof(struct uv_hub_nmi_s);
560 for_each_present_cpu(cpu) {
561 nid = cpu_to_node(cpu);
562 if (uv_hub_nmi_list[nid] == NULL) {
563 uv_hub_nmi_list[nid] = kzalloc_node(size,
564 GFP_KERNEL, nid);
565 BUG_ON(!uv_hub_nmi_list[nid]);
566 raw_spin_lock_init(&(uv_hub_nmi_list[nid]->nmi_lock));
567 atomic_set(&uv_hub_nmi_list[nid]->cpu_owner, -1);
568 }
569 uv_hub_nmi_per(cpu) = uv_hub_nmi_list[nid];
570 }
571 alloc_cpumask_var(&uv_nmi_cpu_mask, GFP_KERNEL);
572 BUG_ON(!uv_nmi_cpu_mask);
573}
574
575