aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/cpu/perf_event.c59
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c15
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c2
-rw-r--r--arch/x86/oprofile/nmi_int.c22
-rw-r--r--drivers/oprofile/buffer_sync.c27
-rw-r--r--drivers/oprofile/cpu_buffer.c2
-rw-r--r--kernel/perf_event.c26
-rw-r--r--kernel/trace/ftrace.c15
-rw-r--r--kernel/watchdog.c17
-rw-r--r--tools/perf/util/callchain.h1
10 files changed, 133 insertions, 53 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index f2da20fda02d..3efdf2870a35 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1154,7 +1154,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1154 /* 1154 /*
1155 * event overflow 1155 * event overflow
1156 */ 1156 */
1157 handled = 1; 1157 handled++;
1158 data.period = event->hw.last_period; 1158 data.period = event->hw.last_period;
1159 1159
1160 if (!x86_perf_event_set_period(event)) 1160 if (!x86_perf_event_set_period(event))
@@ -1200,12 +1200,20 @@ void perf_events_lapic_init(void)
1200 apic_write(APIC_LVTPC, APIC_DM_NMI); 1200 apic_write(APIC_LVTPC, APIC_DM_NMI);
1201} 1201}
1202 1202
1203struct pmu_nmi_state {
1204 unsigned int marked;
1205 int handled;
1206};
1207
1208static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi);
1209
1203static int __kprobes 1210static int __kprobes
1204perf_event_nmi_handler(struct notifier_block *self, 1211perf_event_nmi_handler(struct notifier_block *self,
1205 unsigned long cmd, void *__args) 1212 unsigned long cmd, void *__args)
1206{ 1213{
1207 struct die_args *args = __args; 1214 struct die_args *args = __args;
1208 struct pt_regs *regs; 1215 unsigned int this_nmi;
1216 int handled;
1209 1217
1210 if (!atomic_read(&active_events)) 1218 if (!atomic_read(&active_events))
1211 return NOTIFY_DONE; 1219 return NOTIFY_DONE;
@@ -1214,22 +1222,47 @@ perf_event_nmi_handler(struct notifier_block *self,
1214 case DIE_NMI: 1222 case DIE_NMI:
1215 case DIE_NMI_IPI: 1223 case DIE_NMI_IPI:
1216 break; 1224 break;
1217 1225 case DIE_NMIUNKNOWN:
1226 this_nmi = percpu_read(irq_stat.__nmi_count);
1227 if (this_nmi != __get_cpu_var(pmu_nmi).marked)
1228 /* let the kernel handle the unknown nmi */
1229 return NOTIFY_DONE;
1230 /*
1231 * This one is a PMU back-to-back nmi. Two events
1232 * trigger 'simultaneously' raising two back-to-back
1233 * NMIs. If the first NMI handles both, the latter
1234 * will be empty and daze the CPU. So, we drop it to
1235 * avoid false-positive 'unknown nmi' messages.
1236 */
1237 return NOTIFY_STOP;
1218 default: 1238 default:
1219 return NOTIFY_DONE; 1239 return NOTIFY_DONE;
1220 } 1240 }
1221 1241
1222 regs = args->regs;
1223
1224 apic_write(APIC_LVTPC, APIC_DM_NMI); 1242 apic_write(APIC_LVTPC, APIC_DM_NMI);
1225 /* 1243
1226 * Can't rely on the handled return value to say it was our NMI, two 1244 handled = x86_pmu.handle_irq(args->regs);
1227 * events could trigger 'simultaneously' raising two back-to-back NMIs. 1245 if (!handled)
1228 * 1246 return NOTIFY_DONE;
1229 * If the first NMI handles both, the latter will be empty and daze 1247
1230 * the CPU. 1248 this_nmi = percpu_read(irq_stat.__nmi_count);
1231 */ 1249 if ((handled > 1) ||
1232 x86_pmu.handle_irq(regs); 1250 /* the next nmi could be a back-to-back nmi */
1251 ((__get_cpu_var(pmu_nmi).marked == this_nmi) &&
1252 (__get_cpu_var(pmu_nmi).handled > 1))) {
1253 /*
1254 * We could have two subsequent back-to-back nmis: The
1255 * first handles more than one counter, the 2nd
1256 * handles only one counter and the 3rd handles no
1257 * counter.
1258 *
1259 * This is the 2nd nmi because the previous was
1260 * handling more than one counter. We will mark the
1261 * next (3rd) and then drop it if unhandled.
1262 */
1263 __get_cpu_var(pmu_nmi).marked = this_nmi + 1;
1264 __get_cpu_var(pmu_nmi).handled = handled;
1265 }
1233 1266
1234 return NOTIFY_STOP; 1267 return NOTIFY_STOP;
1235} 1268}
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index d8d86d014008..ee05c90012d2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -712,7 +712,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
712 struct perf_sample_data data; 712 struct perf_sample_data data;
713 struct cpu_hw_events *cpuc; 713 struct cpu_hw_events *cpuc;
714 int bit, loops; 714 int bit, loops;
715 u64 ack, status; 715 u64 status;
716 int handled = 0;
716 717
717 perf_sample_data_init(&data, 0); 718 perf_sample_data_init(&data, 0);
718 719
@@ -728,6 +729,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
728 729
729 loops = 0; 730 loops = 0;
730again: 731again:
732 intel_pmu_ack_status(status);
731 if (++loops > 100) { 733 if (++loops > 100) {
732 WARN_ONCE(1, "perfevents: irq loop stuck!\n"); 734 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
733 perf_event_print_debug(); 735 perf_event_print_debug();
@@ -736,19 +738,22 @@ again:
736 } 738 }
737 739
738 inc_irq_stat(apic_perf_irqs); 740 inc_irq_stat(apic_perf_irqs);
739 ack = status;
740 741
741 intel_pmu_lbr_read(); 742 intel_pmu_lbr_read();
742 743
743 /* 744 /*
744 * PEBS overflow sets bit 62 in the global status register 745 * PEBS overflow sets bit 62 in the global status register
745 */ 746 */
746 if (__test_and_clear_bit(62, (unsigned long *)&status)) 747 if (__test_and_clear_bit(62, (unsigned long *)&status)) {
748 handled++;
747 x86_pmu.drain_pebs(regs); 749 x86_pmu.drain_pebs(regs);
750 }
748 751
749 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { 752 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
750 struct perf_event *event = cpuc->events[bit]; 753 struct perf_event *event = cpuc->events[bit];
751 754
755 handled++;
756
752 if (!test_bit(bit, cpuc->active_mask)) 757 if (!test_bit(bit, cpuc->active_mask))
753 continue; 758 continue;
754 759
@@ -761,8 +766,6 @@ again:
761 x86_pmu_stop(event); 766 x86_pmu_stop(event);
762 } 767 }
763 768
764 intel_pmu_ack_status(ack);
765
766 /* 769 /*
767 * Repeat if there is more work to be done: 770 * Repeat if there is more work to be done:
768 */ 771 */
@@ -772,7 +775,7 @@ again:
772 775
773done: 776done:
774 intel_pmu_enable_all(0); 777 intel_pmu_enable_all(0);
775 return 1; 778 return handled;
776} 779}
777 780
778static struct event_constraint * 781static struct event_constraint *
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 7e578e9cc58b..b560db3305be 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -692,7 +692,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
692 inc_irq_stat(apic_perf_irqs); 692 inc_irq_stat(apic_perf_irqs);
693 } 693 }
694 694
695 return handled > 0; 695 return handled;
696} 696}
697 697
698/* 698/*
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index f6b48f6c5951..cfe4faabb0f6 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -568,8 +568,13 @@ static int __init init_sysfs(void)
568 int error; 568 int error;
569 569
570 error = sysdev_class_register(&oprofile_sysclass); 570 error = sysdev_class_register(&oprofile_sysclass);
571 if (!error) 571 if (error)
572 error = sysdev_register(&device_oprofile); 572 return error;
573
574 error = sysdev_register(&device_oprofile);
575 if (error)
576 sysdev_class_unregister(&oprofile_sysclass);
577
573 return error; 578 return error;
574} 579}
575 580
@@ -580,8 +585,10 @@ static void exit_sysfs(void)
580} 585}
581 586
582#else 587#else
583#define init_sysfs() do { } while (0) 588
584#define exit_sysfs() do { } while (0) 589static inline int init_sysfs(void) { return 0; }
590static inline void exit_sysfs(void) { }
591
585#endif /* CONFIG_PM */ 592#endif /* CONFIG_PM */
586 593
587static int __init p4_init(char **cpu_type) 594static int __init p4_init(char **cpu_type)
@@ -695,6 +702,8 @@ int __init op_nmi_init(struct oprofile_operations *ops)
695 char *cpu_type = NULL; 702 char *cpu_type = NULL;
696 int ret = 0; 703 int ret = 0;
697 704
705 using_nmi = 0;
706
698 if (!cpu_has_apic) 707 if (!cpu_has_apic)
699 return -ENODEV; 708 return -ENODEV;
700 709
@@ -774,7 +783,10 @@ int __init op_nmi_init(struct oprofile_operations *ops)
774 783
775 mux_init(ops); 784 mux_init(ops);
776 785
777 init_sysfs(); 786 ret = init_sysfs();
787 if (ret)
788 return ret;
789
778 using_nmi = 1; 790 using_nmi = 1;
779 printk(KERN_INFO "oprofile: using NMI interrupt.\n"); 791 printk(KERN_INFO "oprofile: using NMI interrupt.\n");
780 return 0; 792 return 0;
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index a9352b2c7ac4..b7e755f4178a 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -141,16 +141,6 @@ static struct notifier_block module_load_nb = {
141 .notifier_call = module_load_notify, 141 .notifier_call = module_load_notify,
142}; 142};
143 143
144
145static void end_sync(void)
146{
147 end_cpu_work();
148 /* make sure we don't leak task structs */
149 process_task_mortuary();
150 process_task_mortuary();
151}
152
153
154int sync_start(void) 144int sync_start(void)
155{ 145{
156 int err; 146 int err;
@@ -158,7 +148,7 @@ int sync_start(void)
158 if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL)) 148 if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL))
159 return -ENOMEM; 149 return -ENOMEM;
160 150
161 start_cpu_work(); 151 mutex_lock(&buffer_mutex);
162 152
163 err = task_handoff_register(&task_free_nb); 153 err = task_handoff_register(&task_free_nb);
164 if (err) 154 if (err)
@@ -173,7 +163,10 @@ int sync_start(void)
173 if (err) 163 if (err)
174 goto out4; 164 goto out4;
175 165
166 start_cpu_work();
167
176out: 168out:
169 mutex_unlock(&buffer_mutex);
177 return err; 170 return err;
178out4: 171out4:
179 profile_event_unregister(PROFILE_MUNMAP, &munmap_nb); 172 profile_event_unregister(PROFILE_MUNMAP, &munmap_nb);
@@ -182,7 +175,6 @@ out3:
182out2: 175out2:
183 task_handoff_unregister(&task_free_nb); 176 task_handoff_unregister(&task_free_nb);
184out1: 177out1:
185 end_sync();
186 free_cpumask_var(marked_cpus); 178 free_cpumask_var(marked_cpus);
187 goto out; 179 goto out;
188} 180}
@@ -190,11 +182,20 @@ out1:
190 182
191void sync_stop(void) 183void sync_stop(void)
192{ 184{
185 /* flush buffers */
186 mutex_lock(&buffer_mutex);
187 end_cpu_work();
193 unregister_module_notifier(&module_load_nb); 188 unregister_module_notifier(&module_load_nb);
194 profile_event_unregister(PROFILE_MUNMAP, &munmap_nb); 189 profile_event_unregister(PROFILE_MUNMAP, &munmap_nb);
195 profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb); 190 profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb);
196 task_handoff_unregister(&task_free_nb); 191 task_handoff_unregister(&task_free_nb);
197 end_sync(); 192 mutex_unlock(&buffer_mutex);
193 flush_scheduled_work();
194
195 /* make sure we don't leak task structs */
196 process_task_mortuary();
197 process_task_mortuary();
198
198 free_cpumask_var(marked_cpus); 199 free_cpumask_var(marked_cpus);
199} 200}
200 201
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 219f79e2210a..f179ac2ea801 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -120,8 +120,6 @@ void end_cpu_work(void)
120 120
121 cancel_delayed_work(&b->work); 121 cancel_delayed_work(&b->work);
122 } 122 }
123
124 flush_scheduled_work();
125} 123}
126 124
127/* 125/*
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 403d1804b198..657555a5f30f 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -402,11 +402,31 @@ static void perf_group_detach(struct perf_event *event)
402 } 402 }
403} 403}
404 404
405static inline int
406event_filter_match(struct perf_event *event)
407{
408 return event->cpu == -1 || event->cpu == smp_processor_id();
409}
410
405static void 411static void
406event_sched_out(struct perf_event *event, 412event_sched_out(struct perf_event *event,
407 struct perf_cpu_context *cpuctx, 413 struct perf_cpu_context *cpuctx,
408 struct perf_event_context *ctx) 414 struct perf_event_context *ctx)
409{ 415{
416 u64 delta;
417 /*
418 * An event which could not be activated because of
419 * filter mismatch still needs to have its timings
420 * maintained, otherwise bogus information is return
421 * via read() for time_enabled, time_running:
422 */
423 if (event->state == PERF_EVENT_STATE_INACTIVE
424 && !event_filter_match(event)) {
425 delta = ctx->time - event->tstamp_stopped;
426 event->tstamp_running += delta;
427 event->tstamp_stopped = ctx->time;
428 }
429
410 if (event->state != PERF_EVENT_STATE_ACTIVE) 430 if (event->state != PERF_EVENT_STATE_ACTIVE)
411 return; 431 return;
412 432
@@ -432,9 +452,7 @@ group_sched_out(struct perf_event *group_event,
432 struct perf_event_context *ctx) 452 struct perf_event_context *ctx)
433{ 453{
434 struct perf_event *event; 454 struct perf_event *event;
435 455 int state = group_event->state;
436 if (group_event->state != PERF_EVENT_STATE_ACTIVE)
437 return;
438 456
439 event_sched_out(group_event, cpuctx, ctx); 457 event_sched_out(group_event, cpuctx, ctx);
440 458
@@ -444,7 +462,7 @@ group_sched_out(struct perf_event *group_event,
444 list_for_each_entry(event, &group_event->sibling_list, group_entry) 462 list_for_each_entry(event, &group_event->sibling_list, group_entry)
445 event_sched_out(event, cpuctx, ctx); 463 event_sched_out(event, cpuctx, ctx);
446 464
447 if (group_event->attr.exclusive) 465 if (state == PERF_EVENT_STATE_ACTIVE && group_event->attr.exclusive)
448 cpuctx->exclusive = 0; 466 cpuctx->exclusive = 0;
449} 467}
450 468
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0d88ce9b9fb8..7cb1f45a1de1 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -381,12 +381,19 @@ static int function_stat_show(struct seq_file *m, void *v)
381{ 381{
382 struct ftrace_profile *rec = v; 382 struct ftrace_profile *rec = v;
383 char str[KSYM_SYMBOL_LEN]; 383 char str[KSYM_SYMBOL_LEN];
384 int ret = 0;
384#ifdef CONFIG_FUNCTION_GRAPH_TRACER 385#ifdef CONFIG_FUNCTION_GRAPH_TRACER
385 static DEFINE_MUTEX(mutex);
386 static struct trace_seq s; 386 static struct trace_seq s;
387 unsigned long long avg; 387 unsigned long long avg;
388 unsigned long long stddev; 388 unsigned long long stddev;
389#endif 389#endif
390 mutex_lock(&ftrace_profile_lock);
391
392 /* we raced with function_profile_reset() */
393 if (unlikely(rec->counter == 0)) {
394 ret = -EBUSY;
395 goto out;
396 }
390 397
391 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 398 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
392 seq_printf(m, " %-30.30s %10lu", str, rec->counter); 399 seq_printf(m, " %-30.30s %10lu", str, rec->counter);
@@ -408,7 +415,6 @@ static int function_stat_show(struct seq_file *m, void *v)
408 do_div(stddev, (rec->counter - 1) * 1000); 415 do_div(stddev, (rec->counter - 1) * 1000);
409 } 416 }
410 417
411 mutex_lock(&mutex);
412 trace_seq_init(&s); 418 trace_seq_init(&s);
413 trace_print_graph_duration(rec->time, &s); 419 trace_print_graph_duration(rec->time, &s);
414 trace_seq_puts(&s, " "); 420 trace_seq_puts(&s, " ");
@@ -416,11 +422,12 @@ static int function_stat_show(struct seq_file *m, void *v)
416 trace_seq_puts(&s, " "); 422 trace_seq_puts(&s, " ");
417 trace_print_graph_duration(stddev, &s); 423 trace_print_graph_duration(stddev, &s);
418 trace_print_seq(m, &s); 424 trace_print_seq(m, &s);
419 mutex_unlock(&mutex);
420#endif 425#endif
421 seq_putc(m, '\n'); 426 seq_putc(m, '\n');
427out:
428 mutex_unlock(&ftrace_profile_lock);
422 429
423 return 0; 430 return ret;
424} 431}
425 432
426static void ftrace_profile_reset(struct ftrace_profile_stat *stat) 433static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 0d53c8e853b1..7f9c3c52ecc1 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -122,7 +122,7 @@ static void __touch_watchdog(void)
122 122
123void touch_softlockup_watchdog(void) 123void touch_softlockup_watchdog(void)
124{ 124{
125 __get_cpu_var(watchdog_touch_ts) = 0; 125 __raw_get_cpu_var(watchdog_touch_ts) = 0;
126} 126}
127EXPORT_SYMBOL(touch_softlockup_watchdog); 127EXPORT_SYMBOL(touch_softlockup_watchdog);
128 128
@@ -142,7 +142,14 @@ void touch_all_softlockup_watchdogs(void)
142#ifdef CONFIG_HARDLOCKUP_DETECTOR 142#ifdef CONFIG_HARDLOCKUP_DETECTOR
143void touch_nmi_watchdog(void) 143void touch_nmi_watchdog(void)
144{ 144{
145 __get_cpu_var(watchdog_nmi_touch) = true; 145 if (watchdog_enabled) {
146 unsigned cpu;
147
148 for_each_present_cpu(cpu) {
149 if (per_cpu(watchdog_nmi_touch, cpu) != true)
150 per_cpu(watchdog_nmi_touch, cpu) = true;
151 }
152 }
146 touch_softlockup_watchdog(); 153 touch_softlockup_watchdog();
147} 154}
148EXPORT_SYMBOL(touch_nmi_watchdog); 155EXPORT_SYMBOL(touch_nmi_watchdog);
@@ -433,6 +440,9 @@ static int watchdog_enable(int cpu)
433 wake_up_process(p); 440 wake_up_process(p);
434 } 441 }
435 442
443 /* if any cpu succeeds, watchdog is considered enabled for the system */
444 watchdog_enabled = 1;
445
436 return 0; 446 return 0;
437} 447}
438 448
@@ -455,9 +465,6 @@ static void watchdog_disable(int cpu)
455 per_cpu(softlockup_watchdog, cpu) = NULL; 465 per_cpu(softlockup_watchdog, cpu) = NULL;
456 kthread_stop(p); 466 kthread_stop(p);
457 } 467 }
458
459 /* if any cpu succeeds, watchdog is considered enabled for the system */
460 watchdog_enabled = 1;
461} 468}
462 469
463static void watchdog_enable_all_cpus(void) 470static void watchdog_enable_all_cpus(void)
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 624a96c636fd..6de4313924fb 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -50,6 +50,7 @@ static inline void callchain_init(struct callchain_node *node)
50 INIT_LIST_HEAD(&node->children); 50 INIT_LIST_HEAD(&node->children);
51 INIT_LIST_HEAD(&node->val); 51 INIT_LIST_HEAD(&node->val);
52 52
53 node->children_hit = 0;
53 node->parent = NULL; 54 node->parent = NULL;
54 node->hit = 0; 55 node->hit = 0;
55} 56}