aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/x86/oprofile/nmi_int.c199
-rw-r--r--arch/x86/oprofile/op_model_amd.c280
-rw-r--r--arch/x86/oprofile/op_model_p4.c52
-rw-r--r--arch/x86/oprofile/op_model_ppro.c77
-rw-r--r--arch/x86/oprofile/op_x86_model.h4
-rw-r--r--drivers/oprofile/cpu_buffer.c75
-rw-r--r--drivers/oprofile/oprof.c12
-rw-r--r--drivers/oprofile/oprof.h3
-rw-r--r--drivers/oprofile/timer_int.c78
-rw-r--r--include/linux/ftrace_event.h1
-rw-r--r--include/linux/module.h6
-rw-r--r--include/linux/ring_buffer.h6
-rw-r--r--include/trace/events/module.h18
-rw-r--r--include/trace/events/signal.h52
-rw-r--r--include/trace/ftrace.h33
-rw-r--r--kernel/module.c8
-rw-r--r--kernel/trace/ring_buffer.c101
-rw-r--r--kernel/trace/ring_buffer_benchmark.c2
-rw-r--r--kernel/trace/trace.c30
-rw-r--r--kernel/trace/trace_functions_graph.c5
-rw-r--r--kernel/trace/trace_selftest.c2
22 files changed, 609 insertions, 436 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index d329b053a718..033161805587 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4165,6 +4165,7 @@ OPROFILE
4165M: Robert Richter <robert.richter@amd.com> 4165M: Robert Richter <robert.richter@amd.com>
4166L: oprofile-list@lists.sf.net 4166L: oprofile-list@lists.sf.net
4167S: Maintained 4167S: Maintained
4168F: arch/*/include/asm/oprofile*.h
4168F: arch/*/oprofile/ 4169F: arch/*/oprofile/
4169F: drivers/oprofile/ 4170F: drivers/oprofile/
4170F: include/linux/oprofile.h 4171F: include/linux/oprofile.h
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 2c505ee71014..b28d2f1253bb 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -31,8 +31,9 @@ static struct op_x86_model_spec *model;
31static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); 31static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
32static DEFINE_PER_CPU(unsigned long, saved_lvtpc); 32static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
33 33
34/* 0 == registered but off, 1 == registered and on */ 34/* must be protected with get_online_cpus()/put_online_cpus(): */
35static int nmi_enabled = 0; 35static int nmi_enabled;
36static int ctr_running;
36 37
37struct op_counter_config counter_config[OP_MAX_COUNTER]; 38struct op_counter_config counter_config[OP_MAX_COUNTER];
38 39
@@ -61,12 +62,16 @@ static int profile_exceptions_notify(struct notifier_block *self,
61{ 62{
62 struct die_args *args = (struct die_args *)data; 63 struct die_args *args = (struct die_args *)data;
63 int ret = NOTIFY_DONE; 64 int ret = NOTIFY_DONE;
64 int cpu = smp_processor_id();
65 65
66 switch (val) { 66 switch (val) {
67 case DIE_NMI: 67 case DIE_NMI:
68 case DIE_NMI_IPI: 68 case DIE_NMI_IPI:
69 model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)); 69 if (ctr_running)
70 model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs));
71 else if (!nmi_enabled)
72 break;
73 else
74 model->stop(&__get_cpu_var(cpu_msrs));
70 ret = NOTIFY_STOP; 75 ret = NOTIFY_STOP;
71 break; 76 break;
72 default: 77 default:
@@ -95,24 +100,36 @@ static void nmi_cpu_save_registers(struct op_msrs *msrs)
95static void nmi_cpu_start(void *dummy) 100static void nmi_cpu_start(void *dummy)
96{ 101{
97 struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); 102 struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
98 model->start(msrs); 103 if (!msrs->controls)
104 WARN_ON_ONCE(1);
105 else
106 model->start(msrs);
99} 107}
100 108
101static int nmi_start(void) 109static int nmi_start(void)
102{ 110{
111 get_online_cpus();
103 on_each_cpu(nmi_cpu_start, NULL, 1); 112 on_each_cpu(nmi_cpu_start, NULL, 1);
113 ctr_running = 1;
114 put_online_cpus();
104 return 0; 115 return 0;
105} 116}
106 117
107static void nmi_cpu_stop(void *dummy) 118static void nmi_cpu_stop(void *dummy)
108{ 119{
109 struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); 120 struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
110 model->stop(msrs); 121 if (!msrs->controls)
122 WARN_ON_ONCE(1);
123 else
124 model->stop(msrs);
111} 125}
112 126
113static void nmi_stop(void) 127static void nmi_stop(void)
114{ 128{
129 get_online_cpus();
115 on_each_cpu(nmi_cpu_stop, NULL, 1); 130 on_each_cpu(nmi_cpu_stop, NULL, 1);
131 ctr_running = 0;
132 put_online_cpus();
116} 133}
117 134
118#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX 135#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
@@ -252,7 +269,10 @@ static int nmi_switch_event(void)
252 if (nmi_multiplex_on() < 0) 269 if (nmi_multiplex_on() < 0)
253 return -EINVAL; /* not necessary */ 270 return -EINVAL; /* not necessary */
254 271
255 on_each_cpu(nmi_cpu_switch, NULL, 1); 272 get_online_cpus();
273 if (ctr_running)
274 on_each_cpu(nmi_cpu_switch, NULL, 1);
275 put_online_cpus();
256 276
257 return 0; 277 return 0;
258} 278}
@@ -295,6 +315,7 @@ static void free_msrs(void)
295 kfree(per_cpu(cpu_msrs, i).controls); 315 kfree(per_cpu(cpu_msrs, i).controls);
296 per_cpu(cpu_msrs, i).controls = NULL; 316 per_cpu(cpu_msrs, i).controls = NULL;
297 } 317 }
318 nmi_shutdown_mux();
298} 319}
299 320
300static int allocate_msrs(void) 321static int allocate_msrs(void)
@@ -307,14 +328,21 @@ static int allocate_msrs(void)
307 per_cpu(cpu_msrs, i).counters = kzalloc(counters_size, 328 per_cpu(cpu_msrs, i).counters = kzalloc(counters_size,
308 GFP_KERNEL); 329 GFP_KERNEL);
309 if (!per_cpu(cpu_msrs, i).counters) 330 if (!per_cpu(cpu_msrs, i).counters)
310 return 0; 331 goto fail;
311 per_cpu(cpu_msrs, i).controls = kzalloc(controls_size, 332 per_cpu(cpu_msrs, i).controls = kzalloc(controls_size,
312 GFP_KERNEL); 333 GFP_KERNEL);
313 if (!per_cpu(cpu_msrs, i).controls) 334 if (!per_cpu(cpu_msrs, i).controls)
314 return 0; 335 goto fail;
315 } 336 }
316 337
338 if (!nmi_setup_mux())
339 goto fail;
340
317 return 1; 341 return 1;
342
343fail:
344 free_msrs();
345 return 0;
318} 346}
319 347
320static void nmi_cpu_setup(void *dummy) 348static void nmi_cpu_setup(void *dummy)
@@ -336,49 +364,6 @@ static struct notifier_block profile_exceptions_nb = {
336 .priority = 2 364 .priority = 2
337}; 365};
338 366
339static int nmi_setup(void)
340{
341 int err = 0;
342 int cpu;
343
344 if (!allocate_msrs())
345 err = -ENOMEM;
346 else if (!nmi_setup_mux())
347 err = -ENOMEM;
348 else
349 err = register_die_notifier(&profile_exceptions_nb);
350
351 if (err) {
352 free_msrs();
353 nmi_shutdown_mux();
354 return err;
355 }
356
357 /* We need to serialize save and setup for HT because the subset
358 * of msrs are distinct for save and setup operations
359 */
360
361 /* Assume saved/restored counters are the same on all CPUs */
362 model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
363 for_each_possible_cpu(cpu) {
364 if (!cpu)
365 continue;
366
367 memcpy(per_cpu(cpu_msrs, cpu).counters,
368 per_cpu(cpu_msrs, 0).counters,
369 sizeof(struct op_msr) * model->num_counters);
370
371 memcpy(per_cpu(cpu_msrs, cpu).controls,
372 per_cpu(cpu_msrs, 0).controls,
373 sizeof(struct op_msr) * model->num_controls);
374
375 mux_clone(cpu);
376 }
377 on_each_cpu(nmi_cpu_setup, NULL, 1);
378 nmi_enabled = 1;
379 return 0;
380}
381
382static void nmi_cpu_restore_registers(struct op_msrs *msrs) 367static void nmi_cpu_restore_registers(struct op_msrs *msrs)
383{ 368{
384 struct op_msr *counters = msrs->counters; 369 struct op_msr *counters = msrs->counters;
@@ -412,20 +397,24 @@ static void nmi_cpu_shutdown(void *dummy)
412 apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); 397 apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
413 apic_write(APIC_LVTERR, v); 398 apic_write(APIC_LVTERR, v);
414 nmi_cpu_restore_registers(msrs); 399 nmi_cpu_restore_registers(msrs);
400 if (model->cpu_down)
401 model->cpu_down();
415} 402}
416 403
417static void nmi_shutdown(void) 404static void nmi_cpu_up(void *dummy)
418{ 405{
419 struct op_msrs *msrs; 406 if (nmi_enabled)
407 nmi_cpu_setup(dummy);
408 if (ctr_running)
409 nmi_cpu_start(dummy);
410}
420 411
421 nmi_enabled = 0; 412static void nmi_cpu_down(void *dummy)
422 on_each_cpu(nmi_cpu_shutdown, NULL, 1); 413{
423 unregister_die_notifier(&profile_exceptions_nb); 414 if (ctr_running)
424 nmi_shutdown_mux(); 415 nmi_cpu_stop(dummy);
425 msrs = &get_cpu_var(cpu_msrs); 416 if (nmi_enabled)
426 model->shutdown(msrs); 417 nmi_cpu_shutdown(dummy);
427 free_msrs();
428 put_cpu_var(cpu_msrs);
429} 418}
430 419
431static int nmi_create_files(struct super_block *sb, struct dentry *root) 420static int nmi_create_files(struct super_block *sb, struct dentry *root)
@@ -457,7 +446,6 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root)
457 return 0; 446 return 0;
458} 447}
459 448
460#ifdef CONFIG_SMP
461static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, 449static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
462 void *data) 450 void *data)
463{ 451{
@@ -465,10 +453,10 @@ static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
465 switch (action) { 453 switch (action) {
466 case CPU_DOWN_FAILED: 454 case CPU_DOWN_FAILED:
467 case CPU_ONLINE: 455 case CPU_ONLINE:
468 smp_call_function_single(cpu, nmi_cpu_start, NULL, 0); 456 smp_call_function_single(cpu, nmi_cpu_up, NULL, 0);
469 break; 457 break;
470 case CPU_DOWN_PREPARE: 458 case CPU_DOWN_PREPARE:
471 smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1); 459 smp_call_function_single(cpu, nmi_cpu_down, NULL, 1);
472 break; 460 break;
473 } 461 }
474 return NOTIFY_DONE; 462 return NOTIFY_DONE;
@@ -477,7 +465,75 @@ static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
477static struct notifier_block oprofile_cpu_nb = { 465static struct notifier_block oprofile_cpu_nb = {
478 .notifier_call = oprofile_cpu_notifier 466 .notifier_call = oprofile_cpu_notifier
479}; 467};
480#endif 468
469static int nmi_setup(void)
470{
471 int err = 0;
472 int cpu;
473
474 if (!allocate_msrs())
475 return -ENOMEM;
476
477 /* We need to serialize save and setup for HT because the subset
478 * of msrs are distinct for save and setup operations
479 */
480
481 /* Assume saved/restored counters are the same on all CPUs */
482 err = model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
483 if (err)
484 goto fail;
485
486 for_each_possible_cpu(cpu) {
487 if (!cpu)
488 continue;
489
490 memcpy(per_cpu(cpu_msrs, cpu).counters,
491 per_cpu(cpu_msrs, 0).counters,
492 sizeof(struct op_msr) * model->num_counters);
493
494 memcpy(per_cpu(cpu_msrs, cpu).controls,
495 per_cpu(cpu_msrs, 0).controls,
496 sizeof(struct op_msr) * model->num_controls);
497
498 mux_clone(cpu);
499 }
500
501 nmi_enabled = 0;
502 ctr_running = 0;
503 barrier();
504 err = register_die_notifier(&profile_exceptions_nb);
505 if (err)
506 goto fail;
507
508 get_online_cpus();
509 register_cpu_notifier(&oprofile_cpu_nb);
510 on_each_cpu(nmi_cpu_setup, NULL, 1);
511 nmi_enabled = 1;
512 put_online_cpus();
513
514 return 0;
515fail:
516 free_msrs();
517 return err;
518}
519
520static void nmi_shutdown(void)
521{
522 struct op_msrs *msrs;
523
524 get_online_cpus();
525 unregister_cpu_notifier(&oprofile_cpu_nb);
526 on_each_cpu(nmi_cpu_shutdown, NULL, 1);
527 nmi_enabled = 0;
528 ctr_running = 0;
529 put_online_cpus();
530 barrier();
531 unregister_die_notifier(&profile_exceptions_nb);
532 msrs = &get_cpu_var(cpu_msrs);
533 model->shutdown(msrs);
534 free_msrs();
535 put_cpu_var(cpu_msrs);
536}
481 537
482#ifdef CONFIG_PM 538#ifdef CONFIG_PM
483 539
@@ -687,9 +743,6 @@ int __init op_nmi_init(struct oprofile_operations *ops)
687 return -ENODEV; 743 return -ENODEV;
688 } 744 }
689 745
690#ifdef CONFIG_SMP
691 register_cpu_notifier(&oprofile_cpu_nb);
692#endif
693 /* default values, can be overwritten by model */ 746 /* default values, can be overwritten by model */
694 ops->create_files = nmi_create_files; 747 ops->create_files = nmi_create_files;
695 ops->setup = nmi_setup; 748 ops->setup = nmi_setup;
@@ -716,12 +769,6 @@ int __init op_nmi_init(struct oprofile_operations *ops)
716 769
717void op_nmi_exit(void) 770void op_nmi_exit(void)
718{ 771{
719 if (using_nmi) { 772 if (using_nmi)
720 exit_sysfs(); 773 exit_sysfs();
721#ifdef CONFIG_SMP
722 unregister_cpu_notifier(&oprofile_cpu_nb);
723#endif
724 }
725 if (model->exit)
726 model->exit();
727} 774}
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 090cbbec7dbd..b67a6b5aa8d4 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -30,13 +30,10 @@
30#include "op_counter.h" 30#include "op_counter.h"
31 31
32#define NUM_COUNTERS 4 32#define NUM_COUNTERS 4
33#define NUM_CONTROLS 4
34#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX 33#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
35#define NUM_VIRT_COUNTERS 32 34#define NUM_VIRT_COUNTERS 32
36#define NUM_VIRT_CONTROLS 32
37#else 35#else
38#define NUM_VIRT_COUNTERS NUM_COUNTERS 36#define NUM_VIRT_COUNTERS NUM_COUNTERS
39#define NUM_VIRT_CONTROLS NUM_CONTROLS
40#endif 37#endif
41 38
42#define OP_EVENT_MASK 0x0FFF 39#define OP_EVENT_MASK 0x0FFF
@@ -105,102 +102,6 @@ static u32 get_ibs_caps(void)
105 return ibs_caps; 102 return ibs_caps;
106} 103}
107 104
108#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
109
110static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
111 struct op_msrs const * const msrs)
112{
113 u64 val;
114 int i;
115
116 /* enable active counters */
117 for (i = 0; i < NUM_COUNTERS; ++i) {
118 int virt = op_x86_phys_to_virt(i);
119 if (!reset_value[virt])
120 continue;
121 rdmsrl(msrs->controls[i].addr, val);
122 val &= model->reserved;
123 val |= op_x86_get_ctrl(model, &counter_config[virt]);
124 wrmsrl(msrs->controls[i].addr, val);
125 }
126}
127
128#endif
129
130/* functions for op_amd_spec */
131
132static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
133{
134 int i;
135
136 for (i = 0; i < NUM_COUNTERS; i++) {
137 if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
138 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
139 }
140
141 for (i = 0; i < NUM_CONTROLS; i++) {
142 if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
143 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
144 }
145}
146
147static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
148 struct op_msrs const * const msrs)
149{
150 u64 val;
151 int i;
152
153 /* setup reset_value */
154 for (i = 0; i < NUM_VIRT_COUNTERS; ++i) {
155 if (counter_config[i].enabled
156 && msrs->counters[op_x86_virt_to_phys(i)].addr)
157 reset_value[i] = counter_config[i].count;
158 else
159 reset_value[i] = 0;
160 }
161
162 /* clear all counters */
163 for (i = 0; i < NUM_CONTROLS; ++i) {
164 if (unlikely(!msrs->controls[i].addr)) {
165 if (counter_config[i].enabled && !smp_processor_id())
166 /*
167 * counter is reserved, this is on all
168 * cpus, so report only for cpu #0
169 */
170 op_x86_warn_reserved(i);
171 continue;
172 }
173 rdmsrl(msrs->controls[i].addr, val);
174 if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
175 op_x86_warn_in_use(i);
176 val &= model->reserved;
177 wrmsrl(msrs->controls[i].addr, val);
178 }
179
180 /* avoid a false detection of ctr overflows in NMI handler */
181 for (i = 0; i < NUM_COUNTERS; ++i) {
182 if (unlikely(!msrs->counters[i].addr))
183 continue;
184 wrmsrl(msrs->counters[i].addr, -1LL);
185 }
186
187 /* enable active counters */
188 for (i = 0; i < NUM_COUNTERS; ++i) {
189 int virt = op_x86_phys_to_virt(i);
190 if (!reset_value[virt])
191 continue;
192
193 /* setup counter registers */
194 wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
195
196 /* setup control registers */
197 rdmsrl(msrs->controls[i].addr, val);
198 val &= model->reserved;
199 val |= op_x86_get_ctrl(model, &counter_config[virt]);
200 wrmsrl(msrs->controls[i].addr, val);
201 }
202}
203
204/* 105/*
205 * 16-bit Linear Feedback Shift Register (LFSR) 106 * 16-bit Linear Feedback Shift Register (LFSR)
206 * 107 *
@@ -365,6 +266,125 @@ static void op_amd_stop_ibs(void)
365 wrmsrl(MSR_AMD64_IBSOPCTL, 0); 266 wrmsrl(MSR_AMD64_IBSOPCTL, 0);
366} 267}
367 268
269#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
270
271static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
272 struct op_msrs const * const msrs)
273{
274 u64 val;
275 int i;
276
277 /* enable active counters */
278 for (i = 0; i < NUM_COUNTERS; ++i) {
279 int virt = op_x86_phys_to_virt(i);
280 if (!reset_value[virt])
281 continue;
282 rdmsrl(msrs->controls[i].addr, val);
283 val &= model->reserved;
284 val |= op_x86_get_ctrl(model, &counter_config[virt]);
285 wrmsrl(msrs->controls[i].addr, val);
286 }
287}
288
289#endif
290
291/* functions for op_amd_spec */
292
293static void op_amd_shutdown(struct op_msrs const * const msrs)
294{
295 int i;
296
297 for (i = 0; i < NUM_COUNTERS; ++i) {
298 if (!msrs->counters[i].addr)
299 continue;
300 release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
301 release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
302 }
303}
304
305static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
306{
307 int i;
308
309 for (i = 0; i < NUM_COUNTERS; i++) {
310 if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
311 goto fail;
312 if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) {
313 release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
314 goto fail;
315 }
316 /* both registers must be reserved */
317 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
318 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
319 continue;
320 fail:
321 if (!counter_config[i].enabled)
322 continue;
323 op_x86_warn_reserved(i);
324 op_amd_shutdown(msrs);
325 return -EBUSY;
326 }
327
328 return 0;
329}
330
331static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
332 struct op_msrs const * const msrs)
333{
334 u64 val;
335 int i;
336
337 /* setup reset_value */
338 for (i = 0; i < NUM_VIRT_COUNTERS; ++i) {
339 if (counter_config[i].enabled
340 && msrs->counters[op_x86_virt_to_phys(i)].addr)
341 reset_value[i] = counter_config[i].count;
342 else
343 reset_value[i] = 0;
344 }
345
346 /* clear all counters */
347 for (i = 0; i < NUM_COUNTERS; ++i) {
348 if (!msrs->controls[i].addr)
349 continue;
350 rdmsrl(msrs->controls[i].addr, val);
351 if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
352 op_x86_warn_in_use(i);
353 val &= model->reserved;
354 wrmsrl(msrs->controls[i].addr, val);
355 /*
356 * avoid a false detection of ctr overflows in NMI
357 * handler
358 */
359 wrmsrl(msrs->counters[i].addr, -1LL);
360 }
361
362 /* enable active counters */
363 for (i = 0; i < NUM_COUNTERS; ++i) {
364 int virt = op_x86_phys_to_virt(i);
365 if (!reset_value[virt])
366 continue;
367
368 /* setup counter registers */
369 wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
370
371 /* setup control registers */
372 rdmsrl(msrs->controls[i].addr, val);
373 val &= model->reserved;
374 val |= op_x86_get_ctrl(model, &counter_config[virt]);
375 wrmsrl(msrs->controls[i].addr, val);
376 }
377
378 if (ibs_caps)
379 setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0);
380}
381
382static void op_amd_cpu_shutdown(void)
383{
384 if (ibs_caps)
385 setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1);
386}
387
368static int op_amd_check_ctrs(struct pt_regs * const regs, 388static int op_amd_check_ctrs(struct pt_regs * const regs,
369 struct op_msrs const * const msrs) 389 struct op_msrs const * const msrs)
370{ 390{
@@ -425,42 +445,16 @@ static void op_amd_stop(struct op_msrs const * const msrs)
425 op_amd_stop_ibs(); 445 op_amd_stop_ibs();
426} 446}
427 447
428static void op_amd_shutdown(struct op_msrs const * const msrs) 448static int __init_ibs_nmi(void)
429{
430 int i;
431
432 for (i = 0; i < NUM_COUNTERS; ++i) {
433 if (msrs->counters[i].addr)
434 release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
435 }
436 for (i = 0; i < NUM_CONTROLS; ++i) {
437 if (msrs->controls[i].addr)
438 release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
439 }
440}
441
442static u8 ibs_eilvt_off;
443
444static inline void apic_init_ibs_nmi_per_cpu(void *arg)
445{
446 ibs_eilvt_off = setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0);
447}
448
449static inline void apic_clear_ibs_nmi_per_cpu(void *arg)
450{
451 setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1);
452}
453
454static int init_ibs_nmi(void)
455{ 449{
456#define IBSCTL_LVTOFFSETVAL (1 << 8) 450#define IBSCTL_LVTOFFSETVAL (1 << 8)
457#define IBSCTL 0x1cc 451#define IBSCTL 0x1cc
458 struct pci_dev *cpu_cfg; 452 struct pci_dev *cpu_cfg;
459 int nodes; 453 int nodes;
460 u32 value = 0; 454 u32 value = 0;
455 u8 ibs_eilvt_off;
461 456
462 /* per CPU setup */ 457 ibs_eilvt_off = setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1);
463 on_each_cpu(apic_init_ibs_nmi_per_cpu, NULL, 1);
464 458
465 nodes = 0; 459 nodes = 0;
466 cpu_cfg = NULL; 460 cpu_cfg = NULL;
@@ -490,22 +484,15 @@ static int init_ibs_nmi(void)
490 return 0; 484 return 0;
491} 485}
492 486
493/* uninitialize the APIC for the IBS interrupts if needed */
494static void clear_ibs_nmi(void)
495{
496 if (ibs_caps)
497 on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1);
498}
499
500/* initialize the APIC for the IBS interrupts if available */ 487/* initialize the APIC for the IBS interrupts if available */
501static void ibs_init(void) 488static void init_ibs(void)
502{ 489{
503 ibs_caps = get_ibs_caps(); 490 ibs_caps = get_ibs_caps();
504 491
505 if (!ibs_caps) 492 if (!ibs_caps)
506 return; 493 return;
507 494
508 if (init_ibs_nmi()) { 495 if (__init_ibs_nmi()) {
509 ibs_caps = 0; 496 ibs_caps = 0;
510 return; 497 return;
511 } 498 }
@@ -514,14 +501,6 @@ static void ibs_init(void)
514 (unsigned)ibs_caps); 501 (unsigned)ibs_caps);
515} 502}
516 503
517static void ibs_exit(void)
518{
519 if (!ibs_caps)
520 return;
521
522 clear_ibs_nmi();
523}
524
525static int (*create_arch_files)(struct super_block *sb, struct dentry *root); 504static int (*create_arch_files)(struct super_block *sb, struct dentry *root);
526 505
527static int setup_ibs_files(struct super_block *sb, struct dentry *root) 506static int setup_ibs_files(struct super_block *sb, struct dentry *root)
@@ -570,27 +549,22 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
570 549
571static int op_amd_init(struct oprofile_operations *ops) 550static int op_amd_init(struct oprofile_operations *ops)
572{ 551{
573 ibs_init(); 552 init_ibs();
574 create_arch_files = ops->create_files; 553 create_arch_files = ops->create_files;
575 ops->create_files = setup_ibs_files; 554 ops->create_files = setup_ibs_files;
576 return 0; 555 return 0;
577} 556}
578 557
579static void op_amd_exit(void)
580{
581 ibs_exit();
582}
583
584struct op_x86_model_spec op_amd_spec = { 558struct op_x86_model_spec op_amd_spec = {
585 .num_counters = NUM_COUNTERS, 559 .num_counters = NUM_COUNTERS,
586 .num_controls = NUM_CONTROLS, 560 .num_controls = NUM_COUNTERS,
587 .num_virt_counters = NUM_VIRT_COUNTERS, 561 .num_virt_counters = NUM_VIRT_COUNTERS,
588 .reserved = MSR_AMD_EVENTSEL_RESERVED, 562 .reserved = MSR_AMD_EVENTSEL_RESERVED,
589 .event_mask = OP_EVENT_MASK, 563 .event_mask = OP_EVENT_MASK,
590 .init = op_amd_init, 564 .init = op_amd_init,
591 .exit = op_amd_exit,
592 .fill_in_addresses = &op_amd_fill_in_addresses, 565 .fill_in_addresses = &op_amd_fill_in_addresses,
593 .setup_ctrs = &op_amd_setup_ctrs, 566 .setup_ctrs = &op_amd_setup_ctrs,
567 .cpu_down = &op_amd_cpu_shutdown,
594 .check_ctrs = &op_amd_check_ctrs, 568 .check_ctrs = &op_amd_check_ctrs,
595 .start = &op_amd_start, 569 .start = &op_amd_start,
596 .stop = &op_amd_stop, 570 .stop = &op_amd_stop,
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index e6a160a4684a..182558dd5515 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -385,8 +385,26 @@ static unsigned int get_stagger(void)
385 385
386static unsigned long reset_value[NUM_COUNTERS_NON_HT]; 386static unsigned long reset_value[NUM_COUNTERS_NON_HT];
387 387
388static void p4_shutdown(struct op_msrs const * const msrs)
389{
390 int i;
388 391
389static void p4_fill_in_addresses(struct op_msrs * const msrs) 392 for (i = 0; i < num_counters; ++i) {
393 if (msrs->counters[i].addr)
394 release_perfctr_nmi(msrs->counters[i].addr);
395 }
396 /*
397 * some of the control registers are specially reserved in
398 * conjunction with the counter registers (hence the starting offset).
399 * This saves a few bits.
400 */
401 for (i = num_counters; i < num_controls; ++i) {
402 if (msrs->controls[i].addr)
403 release_evntsel_nmi(msrs->controls[i].addr);
404 }
405}
406
407static int p4_fill_in_addresses(struct op_msrs * const msrs)
390{ 408{
391 unsigned int i; 409 unsigned int i;
392 unsigned int addr, cccraddr, stag; 410 unsigned int addr, cccraddr, stag;
@@ -468,6 +486,18 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs)
468 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; 486 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
469 } 487 }
470 } 488 }
489
490 for (i = 0; i < num_counters; ++i) {
491 if (!counter_config[i].enabled)
492 continue;
493 if (msrs->controls[i].addr)
494 continue;
495 op_x86_warn_reserved(i);
496 p4_shutdown(msrs);
497 return -EBUSY;
498 }
499
500 return 0;
471} 501}
472 502
473 503
@@ -668,26 +698,6 @@ static void p4_stop(struct op_msrs const * const msrs)
668 } 698 }
669} 699}
670 700
671static void p4_shutdown(struct op_msrs const * const msrs)
672{
673 int i;
674
675 for (i = 0; i < num_counters; ++i) {
676 if (msrs->counters[i].addr)
677 release_perfctr_nmi(msrs->counters[i].addr);
678 }
679 /*
680 * some of the control registers are specially reserved in
681 * conjunction with the counter registers (hence the starting offset).
682 * This saves a few bits.
683 */
684 for (i = num_counters; i < num_controls; ++i) {
685 if (msrs->controls[i].addr)
686 release_evntsel_nmi(msrs->controls[i].addr);
687 }
688}
689
690
691#ifdef CONFIG_SMP 701#ifdef CONFIG_SMP
692struct op_x86_model_spec op_p4_ht2_spec = { 702struct op_x86_model_spec op_p4_ht2_spec = {
693 .num_counters = NUM_COUNTERS_HT2, 703 .num_counters = NUM_COUNTERS_HT2,
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 2bf90fafa7b5..1fd17cfb956b 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -30,19 +30,46 @@ static int counter_width = 32;
30 30
31static u64 *reset_value; 31static u64 *reset_value;
32 32
33static void ppro_fill_in_addresses(struct op_msrs * const msrs) 33static void ppro_shutdown(struct op_msrs const * const msrs)
34{ 34{
35 int i; 35 int i;
36 36
37 for (i = 0; i < num_counters; i++) { 37 for (i = 0; i < num_counters; ++i) {
38 if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) 38 if (!msrs->counters[i].addr)
39 msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; 39 continue;
40 release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
41 release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
42 }
43 if (reset_value) {
44 kfree(reset_value);
45 reset_value = NULL;
40 } 46 }
47}
48
49static int ppro_fill_in_addresses(struct op_msrs * const msrs)
50{
51 int i;
41 52
42 for (i = 0; i < num_counters; i++) { 53 for (i = 0; i < num_counters; i++) {
43 if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) 54 if (!reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
44 msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; 55 goto fail;
56 if (!reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) {
57 release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
58 goto fail;
59 }
60 /* both registers must be reserved */
61 msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
62 msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
63 continue;
64 fail:
65 if (!counter_config[i].enabled)
66 continue;
67 op_x86_warn_reserved(i);
68 ppro_shutdown(msrs);
69 return -EBUSY;
45 } 70 }
71
72 return 0;
46} 73}
47 74
48 75
@@ -78,26 +105,17 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
78 105
79 /* clear all counters */ 106 /* clear all counters */
80 for (i = 0; i < num_counters; ++i) { 107 for (i = 0; i < num_counters; ++i) {
81 if (unlikely(!msrs->controls[i].addr)) { 108 if (!msrs->controls[i].addr)
82 if (counter_config[i].enabled && !smp_processor_id())
83 /*
84 * counter is reserved, this is on all
85 * cpus, so report only for cpu #0
86 */
87 op_x86_warn_reserved(i);
88 continue; 109 continue;
89 }
90 rdmsrl(msrs->controls[i].addr, val); 110 rdmsrl(msrs->controls[i].addr, val);
91 if (val & ARCH_PERFMON_EVENTSEL_ENABLE) 111 if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
92 op_x86_warn_in_use(i); 112 op_x86_warn_in_use(i);
93 val &= model->reserved; 113 val &= model->reserved;
94 wrmsrl(msrs->controls[i].addr, val); 114 wrmsrl(msrs->controls[i].addr, val);
95 } 115 /*
96 116 * avoid a false detection of ctr overflows in NMI *
97 /* avoid a false detection of ctr overflows in NMI handler */ 117 * handler
98 for (i = 0; i < num_counters; ++i) { 118 */
99 if (unlikely(!msrs->counters[i].addr))
100 continue;
101 wrmsrl(msrs->counters[i].addr, -1LL); 119 wrmsrl(msrs->counters[i].addr, -1LL);
102 } 120 }
103 121
@@ -189,25 +207,6 @@ static void ppro_stop(struct op_msrs const * const msrs)
189 } 207 }
190} 208}
191 209
192static void ppro_shutdown(struct op_msrs const * const msrs)
193{
194 int i;
195
196 for (i = 0; i < num_counters; ++i) {
197 if (msrs->counters[i].addr)
198 release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
199 }
200 for (i = 0; i < num_counters; ++i) {
201 if (msrs->controls[i].addr)
202 release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
203 }
204 if (reset_value) {
205 kfree(reset_value);
206 reset_value = NULL;
207 }
208}
209
210
211struct op_x86_model_spec op_ppro_spec = { 210struct op_x86_model_spec op_ppro_spec = {
212 .num_counters = 2, 211 .num_counters = 2,
213 .num_controls = 2, 212 .num_controls = 2,
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index ff82a755edd4..89017fa1fd63 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -40,10 +40,10 @@ struct op_x86_model_spec {
40 u64 reserved; 40 u64 reserved;
41 u16 event_mask; 41 u16 event_mask;
42 int (*init)(struct oprofile_operations *ops); 42 int (*init)(struct oprofile_operations *ops);
43 void (*exit)(void); 43 int (*fill_in_addresses)(struct op_msrs * const msrs);
44 void (*fill_in_addresses)(struct op_msrs * const msrs);
45 void (*setup_ctrs)(struct op_x86_model_spec const *model, 44 void (*setup_ctrs)(struct op_x86_model_spec const *model,
46 struct op_msrs const * const msrs); 45 struct op_msrs const * const msrs);
46 void (*cpu_down)(void);
47 int (*check_ctrs)(struct pt_regs * const regs, 47 int (*check_ctrs)(struct pt_regs * const regs,
48 struct op_msrs const * const msrs); 48 struct op_msrs const * const msrs);
49 void (*start)(struct op_msrs const * const msrs); 49 void (*start)(struct op_msrs const * const msrs);
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 166b67ea622f..219f79e2210a 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -30,23 +30,7 @@
30 30
31#define OP_BUFFER_FLAGS 0 31#define OP_BUFFER_FLAGS 0
32 32
33/* 33static struct ring_buffer *op_ring_buffer;
34 * Read and write access is using spin locking. Thus, writing to the
35 * buffer by NMI handler (x86) could occur also during critical
36 * sections when reading the buffer. To avoid this, there are 2
37 * buffers for independent read and write access. Read access is in
38 * process context only, write access only in the NMI handler. If the
39 * read buffer runs empty, both buffers are swapped atomically. There
40 * is potentially a small window during swapping where the buffers are
41 * disabled and samples could be lost.
42 *
43 * Using 2 buffers is a little bit overhead, but the solution is clear
44 * and does not require changes in the ring buffer implementation. It
45 * can be changed to a single buffer solution when the ring buffer
46 * access is implemented as non-locking atomic code.
47 */
48static struct ring_buffer *op_ring_buffer_read;
49static struct ring_buffer *op_ring_buffer_write;
50DEFINE_PER_CPU(struct oprofile_cpu_buffer, op_cpu_buffer); 34DEFINE_PER_CPU(struct oprofile_cpu_buffer, op_cpu_buffer);
51 35
52static void wq_sync_buffer(struct work_struct *work); 36static void wq_sync_buffer(struct work_struct *work);
@@ -68,12 +52,9 @@ void oprofile_cpu_buffer_inc_smpl_lost(void)
68 52
69void free_cpu_buffers(void) 53void free_cpu_buffers(void)
70{ 54{
71 if (op_ring_buffer_read) 55 if (op_ring_buffer)
72 ring_buffer_free(op_ring_buffer_read); 56 ring_buffer_free(op_ring_buffer);
73 op_ring_buffer_read = NULL; 57 op_ring_buffer = NULL;
74 if (op_ring_buffer_write)
75 ring_buffer_free(op_ring_buffer_write);
76 op_ring_buffer_write = NULL;
77} 58}
78 59
79#define RB_EVENT_HDR_SIZE 4 60#define RB_EVENT_HDR_SIZE 4
@@ -86,11 +67,8 @@ int alloc_cpu_buffers(void)
86 unsigned long byte_size = buffer_size * (sizeof(struct op_sample) + 67 unsigned long byte_size = buffer_size * (sizeof(struct op_sample) +
87 RB_EVENT_HDR_SIZE); 68 RB_EVENT_HDR_SIZE);
88 69
89 op_ring_buffer_read = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS); 70 op_ring_buffer = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS);
90 if (!op_ring_buffer_read) 71 if (!op_ring_buffer)
91 goto fail;
92 op_ring_buffer_write = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS);
93 if (!op_ring_buffer_write)
94 goto fail; 72 goto fail;
95 73
96 for_each_possible_cpu(i) { 74 for_each_possible_cpu(i) {
@@ -162,16 +140,11 @@ struct op_sample
162*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size) 140*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size)
163{ 141{
164 entry->event = ring_buffer_lock_reserve 142 entry->event = ring_buffer_lock_reserve
165 (op_ring_buffer_write, sizeof(struct op_sample) + 143 (op_ring_buffer, sizeof(struct op_sample) +
166 size * sizeof(entry->sample->data[0])); 144 size * sizeof(entry->sample->data[0]));
167 if (entry->event) 145 if (!entry->event)
168 entry->sample = ring_buffer_event_data(entry->event);
169 else
170 entry->sample = NULL;
171
172 if (!entry->sample)
173 return NULL; 146 return NULL;
174 147 entry->sample = ring_buffer_event_data(entry->event);
175 entry->size = size; 148 entry->size = size;
176 entry->data = entry->sample->data; 149 entry->data = entry->sample->data;
177 150
@@ -180,25 +153,16 @@ struct op_sample
180 153
181int op_cpu_buffer_write_commit(struct op_entry *entry) 154int op_cpu_buffer_write_commit(struct op_entry *entry)
182{ 155{
183 return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event); 156 return ring_buffer_unlock_commit(op_ring_buffer, entry->event);
184} 157}
185 158
186struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu) 159struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu)
187{ 160{
188 struct ring_buffer_event *e; 161 struct ring_buffer_event *e;
189 e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL); 162 e = ring_buffer_consume(op_ring_buffer, cpu, NULL, NULL);
190 if (e) 163 if (!e)
191 goto event;
192 if (ring_buffer_swap_cpu(op_ring_buffer_read,
193 op_ring_buffer_write,
194 cpu))
195 return NULL; 164 return NULL;
196 e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL);
197 if (e)
198 goto event;
199 return NULL;
200 165
201event:
202 entry->event = e; 166 entry->event = e;
203 entry->sample = ring_buffer_event_data(e); 167 entry->sample = ring_buffer_event_data(e);
204 entry->size = (ring_buffer_event_length(e) - sizeof(struct op_sample)) 168 entry->size = (ring_buffer_event_length(e) - sizeof(struct op_sample))
@@ -209,8 +173,7 @@ event:
209 173
210unsigned long op_cpu_buffer_entries(int cpu) 174unsigned long op_cpu_buffer_entries(int cpu)
211{ 175{
212 return ring_buffer_entries_cpu(op_ring_buffer_read, cpu) 176 return ring_buffer_entries_cpu(op_ring_buffer, cpu);
213 + ring_buffer_entries_cpu(op_ring_buffer_write, cpu);
214} 177}
215 178
216static int 179static int
@@ -356,8 +319,16 @@ void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
356 319
357void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) 320void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
358{ 321{
359 int is_kernel = !user_mode(regs); 322 int is_kernel;
360 unsigned long pc = profile_pc(regs); 323 unsigned long pc;
324
325 if (likely(regs)) {
326 is_kernel = !user_mode(regs);
327 pc = profile_pc(regs);
328 } else {
329 is_kernel = 0; /* This value will not be used */
330 pc = ESCAPE_CODE; /* as this causes an early return. */
331 }
361 332
362 __oprofile_add_ext_sample(pc, regs, event, is_kernel); 333 __oprofile_add_ext_sample(pc, regs, event, is_kernel);
363} 334}
diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c
index dc8a0428260d..b336cd9ee7a1 100644
--- a/drivers/oprofile/oprof.c
+++ b/drivers/oprofile/oprof.c
@@ -253,22 +253,26 @@ static int __init oprofile_init(void)
253 int err; 253 int err;
254 254
255 err = oprofile_arch_init(&oprofile_ops); 255 err = oprofile_arch_init(&oprofile_ops);
256
257 if (err < 0 || timer) { 256 if (err < 0 || timer) {
258 printk(KERN_INFO "oprofile: using timer interrupt.\n"); 257 printk(KERN_INFO "oprofile: using timer interrupt.\n");
259 oprofile_timer_init(&oprofile_ops); 258 err = oprofile_timer_init(&oprofile_ops);
259 if (err)
260 goto out_arch;
260 } 261 }
261
262 err = oprofilefs_register(); 262 err = oprofilefs_register();
263 if (err) 263 if (err)
264 oprofile_arch_exit(); 264 goto out_arch;
265 return 0;
265 266
267out_arch:
268 oprofile_arch_exit();
266 return err; 269 return err;
267} 270}
268 271
269 272
270static void __exit oprofile_exit(void) 273static void __exit oprofile_exit(void)
271{ 274{
275 oprofile_timer_exit();
272 oprofilefs_unregister(); 276 oprofilefs_unregister();
273 oprofile_arch_exit(); 277 oprofile_arch_exit();
274} 278}
diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h
index cb92f5c98c1a..47e12cb4ee8b 100644
--- a/drivers/oprofile/oprof.h
+++ b/drivers/oprofile/oprof.h
@@ -34,7 +34,8 @@ struct super_block;
34struct dentry; 34struct dentry;
35 35
36void oprofile_create_files(struct super_block *sb, struct dentry *root); 36void oprofile_create_files(struct super_block *sb, struct dentry *root);
37void oprofile_timer_init(struct oprofile_operations *ops); 37int oprofile_timer_init(struct oprofile_operations *ops);
38void oprofile_timer_exit(void);
38 39
39int oprofile_set_backtrace(unsigned long depth); 40int oprofile_set_backtrace(unsigned long depth);
40int oprofile_set_timeout(unsigned long time); 41int oprofile_set_timeout(unsigned long time);
diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c
index 333f915568c7..dc0ae4d14dff 100644
--- a/drivers/oprofile/timer_int.c
+++ b/drivers/oprofile/timer_int.c
@@ -13,34 +13,94 @@
13#include <linux/oprofile.h> 13#include <linux/oprofile.h>
14#include <linux/profile.h> 14#include <linux/profile.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/cpu.h>
17#include <linux/hrtimer.h>
18#include <asm/irq_regs.h>
16#include <asm/ptrace.h> 19#include <asm/ptrace.h>
17 20
18#include "oprof.h" 21#include "oprof.h"
19 22
20static int timer_notify(struct pt_regs *regs) 23static DEFINE_PER_CPU(struct hrtimer, oprofile_hrtimer);
24
25static enum hrtimer_restart oprofile_hrtimer_notify(struct hrtimer *hrtimer)
26{
27 oprofile_add_sample(get_irq_regs(), 0);
28 hrtimer_forward_now(hrtimer, ns_to_ktime(TICK_NSEC));
29 return HRTIMER_RESTART;
30}
31
32static void __oprofile_hrtimer_start(void *unused)
33{
34 struct hrtimer *hrtimer = &__get_cpu_var(oprofile_hrtimer);
35
36 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
37 hrtimer->function = oprofile_hrtimer_notify;
38
39 hrtimer_start(hrtimer, ns_to_ktime(TICK_NSEC),
40 HRTIMER_MODE_REL_PINNED);
41}
42
43static int oprofile_hrtimer_start(void)
21{ 44{
22 oprofile_add_sample(regs, 0); 45 on_each_cpu(__oprofile_hrtimer_start, NULL, 1);
23 return 0; 46 return 0;
24} 47}
25 48
26static int timer_start(void) 49static void __oprofile_hrtimer_stop(int cpu)
27{ 50{
28 return register_timer_hook(timer_notify); 51 struct hrtimer *hrtimer = &per_cpu(oprofile_hrtimer, cpu);
52
53 hrtimer_cancel(hrtimer);
29} 54}
30 55
56static void oprofile_hrtimer_stop(void)
57{
58 int cpu;
59
60 for_each_online_cpu(cpu)
61 __oprofile_hrtimer_stop(cpu);
62}
31 63
32static void timer_stop(void) 64static int __cpuinit oprofile_cpu_notify(struct notifier_block *self,
65 unsigned long action, void *hcpu)
33{ 66{
34 unregister_timer_hook(timer_notify); 67 long cpu = (long) hcpu;
68
69 switch (action) {
70 case CPU_ONLINE:
71 case CPU_ONLINE_FROZEN:
72 smp_call_function_single(cpu, __oprofile_hrtimer_start,
73 NULL, 1);
74 break;
75 case CPU_DEAD:
76 case CPU_DEAD_FROZEN:
77 __oprofile_hrtimer_stop(cpu);
78 break;
79 }
80 return NOTIFY_OK;
35} 81}
36 82
83static struct notifier_block __refdata oprofile_cpu_notifier = {
84 .notifier_call = oprofile_cpu_notify,
85};
37 86
38void __init oprofile_timer_init(struct oprofile_operations *ops) 87int __init oprofile_timer_init(struct oprofile_operations *ops)
39{ 88{
89 int rc;
90
91 rc = register_hotcpu_notifier(&oprofile_cpu_notifier);
92 if (rc)
93 return rc;
40 ops->create_files = NULL; 94 ops->create_files = NULL;
41 ops->setup = NULL; 95 ops->setup = NULL;
42 ops->shutdown = NULL; 96 ops->shutdown = NULL;
43 ops->start = timer_start; 97 ops->start = oprofile_hrtimer_start;
44 ops->stop = timer_stop; 98 ops->stop = oprofile_hrtimer_stop;
45 ops->cpu_type = "timer"; 99 ops->cpu_type = "timer";
100 return 0;
101}
102
103void __exit oprofile_timer_exit(void)
104{
105 unregister_hotcpu_notifier(&oprofile_cpu_notifier);
46} 106}
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index c0f4b364c711..39e71b0a3bfd 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -58,6 +58,7 @@ struct trace_iterator {
58 /* The below is zeroed out in pipe_read */ 58 /* The below is zeroed out in pipe_read */
59 struct trace_seq seq; 59 struct trace_seq seq;
60 struct trace_entry *ent; 60 struct trace_entry *ent;
61 unsigned long lost_events;
61 int leftover; 62 int leftover;
62 int cpu; 63 int cpu;
63 u64 ts; 64 u64 ts;
diff --git a/include/linux/module.h b/include/linux/module.h
index 515d53ae6a79..6914fcad4673 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -465,8 +465,7 @@ static inline void __module_get(struct module *module)
465 if (module) { 465 if (module) {
466 preempt_disable(); 466 preempt_disable();
467 __this_cpu_inc(module->refptr->incs); 467 __this_cpu_inc(module->refptr->incs);
468 trace_module_get(module, _THIS_IP_, 468 trace_module_get(module, _THIS_IP_);
469 __this_cpu_read(module->refptr->incs));
470 preempt_enable(); 469 preempt_enable();
471 } 470 }
472} 471}
@@ -480,8 +479,7 @@ static inline int try_module_get(struct module *module)
480 479
481 if (likely(module_is_live(module))) { 480 if (likely(module_is_live(module))) {
482 __this_cpu_inc(module->refptr->incs); 481 __this_cpu_inc(module->refptr->incs);
483 trace_module_get(module, _THIS_IP_, 482 trace_module_get(module, _THIS_IP_);
484 __this_cpu_read(module->refptr->incs));
485 } else 483 } else
486 ret = 0; 484 ret = 0;
487 485
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 5fcc31ed5771..c8297761e414 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -120,9 +120,11 @@ int ring_buffer_write(struct ring_buffer *buffer,
120 unsigned long length, void *data); 120 unsigned long length, void *data);
121 121
122struct ring_buffer_event * 122struct ring_buffer_event *
123ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts); 123ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
124 unsigned long *lost_events);
124struct ring_buffer_event * 125struct ring_buffer_event *
125ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts); 126ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
127 unsigned long *lost_events);
126 128
127struct ring_buffer_iter * 129struct ring_buffer_iter *
128ring_buffer_read_start(struct ring_buffer *buffer, int cpu); 130ring_buffer_read_start(struct ring_buffer *buffer, int cpu);
diff --git a/include/trace/events/module.h b/include/trace/events/module.h
index 4b0f48ba16a6..c7bb2f0482fe 100644
--- a/include/trace/events/module.h
+++ b/include/trace/events/module.h
@@ -51,11 +51,14 @@ TRACE_EVENT(module_free,
51 TP_printk("%s", __get_str(name)) 51 TP_printk("%s", __get_str(name))
52); 52);
53 53
54#ifdef CONFIG_MODULE_UNLOAD
55/* trace_module_get/put are only used if CONFIG_MODULE_UNLOAD is defined */
56
54DECLARE_EVENT_CLASS(module_refcnt, 57DECLARE_EVENT_CLASS(module_refcnt,
55 58
56 TP_PROTO(struct module *mod, unsigned long ip, int refcnt), 59 TP_PROTO(struct module *mod, unsigned long ip),
57 60
58 TP_ARGS(mod, ip, refcnt), 61 TP_ARGS(mod, ip),
59 62
60 TP_STRUCT__entry( 63 TP_STRUCT__entry(
61 __field( unsigned long, ip ) 64 __field( unsigned long, ip )
@@ -65,7 +68,7 @@ DECLARE_EVENT_CLASS(module_refcnt,
65 68
66 TP_fast_assign( 69 TP_fast_assign(
67 __entry->ip = ip; 70 __entry->ip = ip;
68 __entry->refcnt = refcnt; 71 __entry->refcnt = __this_cpu_read(mod->refptr->incs) + __this_cpu_read(mod->refptr->decs);
69 __assign_str(name, mod->name); 72 __assign_str(name, mod->name);
70 ), 73 ),
71 74
@@ -75,17 +78,18 @@ DECLARE_EVENT_CLASS(module_refcnt,
75 78
76DEFINE_EVENT(module_refcnt, module_get, 79DEFINE_EVENT(module_refcnt, module_get,
77 80
78 TP_PROTO(struct module *mod, unsigned long ip, int refcnt), 81 TP_PROTO(struct module *mod, unsigned long ip),
79 82
80 TP_ARGS(mod, ip, refcnt) 83 TP_ARGS(mod, ip)
81); 84);
82 85
83DEFINE_EVENT(module_refcnt, module_put, 86DEFINE_EVENT(module_refcnt, module_put,
84 87
85 TP_PROTO(struct module *mod, unsigned long ip, int refcnt), 88 TP_PROTO(struct module *mod, unsigned long ip),
86 89
87 TP_ARGS(mod, ip, refcnt) 90 TP_ARGS(mod, ip)
88); 91);
92#endif /* CONFIG_MODULE_UNLOAD */
89 93
90TRACE_EVENT(module_request, 94TRACE_EVENT(module_request,
91 95
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
index a510b75ac304..814566c99d29 100644
--- a/include/trace/events/signal.h
+++ b/include/trace/events/signal.h
@@ -100,18 +100,7 @@ TRACE_EVENT(signal_deliver,
100 __entry->sa_handler, __entry->sa_flags) 100 __entry->sa_handler, __entry->sa_flags)
101); 101);
102 102
103/** 103DECLARE_EVENT_CLASS(signal_queue_overflow,
104 * signal_overflow_fail - called when signal queue is overflow
105 * @sig: signal number
106 * @group: signal to process group or not (bool)
107 * @info: pointer to struct siginfo
108 *
109 * Kernel fails to generate 'sig' signal with 'info' siginfo, because
110 * siginfo queue is overflow, and the signal is dropped.
111 * 'group' is not 0 if the signal will be sent to a process group.
112 * 'sig' is always one of RT signals.
113 */
114TRACE_EVENT(signal_overflow_fail,
115 104
116 TP_PROTO(int sig, int group, struct siginfo *info), 105 TP_PROTO(int sig, int group, struct siginfo *info),
117 106
@@ -135,6 +124,24 @@ TRACE_EVENT(signal_overflow_fail,
135); 124);
136 125
137/** 126/**
127 * signal_overflow_fail - called when signal queue is overflow
128 * @sig: signal number
129 * @group: signal to process group or not (bool)
130 * @info: pointer to struct siginfo
131 *
132 * Kernel fails to generate 'sig' signal with 'info' siginfo, because
133 * siginfo queue is overflow, and the signal is dropped.
134 * 'group' is not 0 if the signal will be sent to a process group.
135 * 'sig' is always one of RT signals.
136 */
137DEFINE_EVENT(signal_queue_overflow, signal_overflow_fail,
138
139 TP_PROTO(int sig, int group, struct siginfo *info),
140
141 TP_ARGS(sig, group, info)
142);
143
144/**
138 * signal_lose_info - called when siginfo is lost 145 * signal_lose_info - called when siginfo is lost
139 * @sig: signal number 146 * @sig: signal number
140 * @group: signal to process group or not (bool) 147 * @group: signal to process group or not (bool)
@@ -145,28 +152,13 @@ TRACE_EVENT(signal_overflow_fail,
145 * 'group' is not 0 if the signal will be sent to a process group. 152 * 'group' is not 0 if the signal will be sent to a process group.
146 * 'sig' is always one of non-RT signals. 153 * 'sig' is always one of non-RT signals.
147 */ 154 */
148TRACE_EVENT(signal_lose_info, 155DEFINE_EVENT(signal_queue_overflow, signal_lose_info,
149 156
150 TP_PROTO(int sig, int group, struct siginfo *info), 157 TP_PROTO(int sig, int group, struct siginfo *info),
151 158
152 TP_ARGS(sig, group, info), 159 TP_ARGS(sig, group, info)
153
154 TP_STRUCT__entry(
155 __field( int, sig )
156 __field( int, group )
157 __field( int, errno )
158 __field( int, code )
159 ),
160
161 TP_fast_assign(
162 __entry->sig = sig;
163 __entry->group = group;
164 TP_STORE_SIGINFO(__entry, info);
165 ),
166
167 TP_printk("sig=%d group=%d errno=%d code=%d",
168 __entry->sig, __entry->group, __entry->errno, __entry->code)
169); 160);
161
170#endif /* _TRACE_SIGNAL_H */ 162#endif /* _TRACE_SIGNAL_H */
171 163
172/* This part must be outside protection */ 164/* This part must be outside protection */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index ea6f9d4a20e9..75dd7787fb37 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -154,9 +154,11 @@
154 * 154 *
155 * field = (typeof(field))entry; 155 * field = (typeof(field))entry;
156 * 156 *
157 * p = get_cpu_var(ftrace_event_seq); 157 * p = &get_cpu_var(ftrace_event_seq);
158 * trace_seq_init(p); 158 * trace_seq_init(p);
159 * ret = trace_seq_printf(s, <TP_printk> "\n"); 159 * ret = trace_seq_printf(s, "%s: ", <call>);
160 * if (ret)
161 * ret = trace_seq_printf(s, <TP_printk> "\n");
160 * put_cpu(); 162 * put_cpu();
161 * if (!ret) 163 * if (!ret)
162 * return TRACE_TYPE_PARTIAL_LINE; 164 * return TRACE_TYPE_PARTIAL_LINE;
@@ -450,38 +452,38 @@ perf_trace_disable_##name(struct ftrace_event_call *unused) \
450 * 452 *
451 * static void ftrace_raw_event_<call>(proto) 453 * static void ftrace_raw_event_<call>(proto)
452 * { 454 * {
455 * struct ftrace_data_offsets_<call> __maybe_unused __data_offsets;
453 * struct ring_buffer_event *event; 456 * struct ring_buffer_event *event;
454 * struct ftrace_raw_<call> *entry; <-- defined in stage 1 457 * struct ftrace_raw_<call> *entry; <-- defined in stage 1
455 * struct ring_buffer *buffer; 458 * struct ring_buffer *buffer;
456 * unsigned long irq_flags; 459 * unsigned long irq_flags;
460 * int __data_size;
457 * int pc; 461 * int pc;
458 * 462 *
459 * local_save_flags(irq_flags); 463 * local_save_flags(irq_flags);
460 * pc = preempt_count(); 464 * pc = preempt_count();
461 * 465 *
466 * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
467 *
462 * event = trace_current_buffer_lock_reserve(&buffer, 468 * event = trace_current_buffer_lock_reserve(&buffer,
463 * event_<call>.id, 469 * event_<call>.id,
464 * sizeof(struct ftrace_raw_<call>), 470 * sizeof(*entry) + __data_size,
465 * irq_flags, pc); 471 * irq_flags, pc);
466 * if (!event) 472 * if (!event)
467 * return; 473 * return;
468 * entry = ring_buffer_event_data(event); 474 * entry = ring_buffer_event_data(event);
469 * 475 *
470 * <assign>; <-- Here we assign the entries by the __field and 476 * { <assign>; } <-- Here we assign the entries by the __field and
471 * __array macros. 477 * __array macros.
472 * 478 *
473 * trace_current_buffer_unlock_commit(buffer, event, irq_flags, pc); 479 * if (!filter_current_check_discard(buffer, event_call, entry, event))
480 * trace_current_buffer_unlock_commit(buffer,
481 * event, irq_flags, pc);
474 * } 482 * }
475 * 483 *
476 * static int ftrace_raw_reg_event_<call>(struct ftrace_event_call *unused) 484 * static int ftrace_raw_reg_event_<call>(struct ftrace_event_call *unused)
477 * { 485 * {
478 * int ret; 486 * return register_trace_<call>(ftrace_raw_event_<call>);
479 *
480 * ret = register_trace_<call>(ftrace_raw_event_<call>);
481 * if (!ret)
482 * pr_info("event trace: Could not activate trace point "
483 * "probe to <call>");
484 * return ret;
485 * } 487 * }
486 * 488 *
487 * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused) 489 * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused)
@@ -493,6 +495,8 @@ perf_trace_disable_##name(struct ftrace_event_call *unused) \
493 * .trace = ftrace_raw_output_<call>, <-- stage 2 495 * .trace = ftrace_raw_output_<call>, <-- stage 2
494 * }; 496 * };
495 * 497 *
498 * static const char print_fmt_<call>[] = <TP_printk>;
499 *
496 * static struct ftrace_event_call __used 500 * static struct ftrace_event_call __used
497 * __attribute__((__aligned__(4))) 501 * __attribute__((__aligned__(4)))
498 * __attribute__((section("_ftrace_events"))) event_<call> = { 502 * __attribute__((section("_ftrace_events"))) event_<call> = {
@@ -501,6 +505,8 @@ perf_trace_disable_##name(struct ftrace_event_call *unused) \
501 * .raw_init = trace_event_raw_init, 505 * .raw_init = trace_event_raw_init,
502 * .regfunc = ftrace_reg_event_<call>, 506 * .regfunc = ftrace_reg_event_<call>,
503 * .unregfunc = ftrace_unreg_event_<call>, 507 * .unregfunc = ftrace_unreg_event_<call>,
508 * .print_fmt = print_fmt_<call>,
509 * .define_fields = ftrace_define_fields_<call>,
504 * } 510 * }
505 * 511 *
506 */ 512 */
@@ -569,7 +575,6 @@ ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \
569 return; \ 575 return; \
570 entry = ring_buffer_event_data(event); \ 576 entry = ring_buffer_event_data(event); \
571 \ 577 \
572 \
573 tstruct \ 578 tstruct \
574 \ 579 \
575 { assign; } \ 580 { assign; } \
diff --git a/kernel/module.c b/kernel/module.c
index 1016b75b026a..b8a1e313448c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -59,8 +59,6 @@
59#define CREATE_TRACE_POINTS 59#define CREATE_TRACE_POINTS
60#include <trace/events/module.h> 60#include <trace/events/module.h>
61 61
62EXPORT_TRACEPOINT_SYMBOL(module_get);
63
64#if 0 62#if 0
65#define DEBUGP printk 63#define DEBUGP printk
66#else 64#else
@@ -515,6 +513,9 @@ MODINFO_ATTR(srcversion);
515static char last_unloaded_module[MODULE_NAME_LEN+1]; 513static char last_unloaded_module[MODULE_NAME_LEN+1];
516 514
517#ifdef CONFIG_MODULE_UNLOAD 515#ifdef CONFIG_MODULE_UNLOAD
516
517EXPORT_TRACEPOINT_SYMBOL(module_get);
518
518/* Init the unload section of the module. */ 519/* Init the unload section of the module. */
519static void module_unload_init(struct module *mod) 520static void module_unload_init(struct module *mod)
520{ 521{
@@ -867,8 +868,7 @@ void module_put(struct module *module)
867 smp_wmb(); /* see comment in module_refcount */ 868 smp_wmb(); /* see comment in module_refcount */
868 __this_cpu_inc(module->refptr->decs); 869 __this_cpu_inc(module->refptr->decs);
869 870
870 trace_module_put(module, _RET_IP_, 871 trace_module_put(module, _RET_IP_);
871 __this_cpu_read(module->refptr->decs));
872 /* Maybe they're waiting for us to drop reference? */ 872 /* Maybe they're waiting for us to drop reference? */
873 if (unlikely(!module_is_live(module))) 873 if (unlikely(!module_is_live(module)))
874 wake_up_process(module->waiter); 874 wake_up_process(module->waiter);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 41ca394feb22..5885cdfc41f3 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -319,6 +319,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
319#define TS_MASK ((1ULL << TS_SHIFT) - 1) 319#define TS_MASK ((1ULL << TS_SHIFT) - 1)
320#define TS_DELTA_TEST (~TS_MASK) 320#define TS_DELTA_TEST (~TS_MASK)
321 321
322/* Flag when events were overwritten */
323#define RB_MISSED_EVENTS (1 << 31)
324/* Missed count stored at end */
325#define RB_MISSED_STORED (1 << 30)
326
322struct buffer_data_page { 327struct buffer_data_page {
323 u64 time_stamp; /* page time stamp */ 328 u64 time_stamp; /* page time stamp */
324 local_t commit; /* write committed index */ 329 local_t commit; /* write committed index */
@@ -338,6 +343,7 @@ struct buffer_page {
338 local_t write; /* index for next write */ 343 local_t write; /* index for next write */
339 unsigned read; /* index for next read */ 344 unsigned read; /* index for next read */
340 local_t entries; /* entries on this page */ 345 local_t entries; /* entries on this page */
346 unsigned long real_end; /* real end of data */
341 struct buffer_data_page *page; /* Actual data page */ 347 struct buffer_data_page *page; /* Actual data page */
342}; 348};
343 349
@@ -417,6 +423,12 @@ int ring_buffer_print_page_header(struct trace_seq *s)
417 (unsigned int)sizeof(field.commit), 423 (unsigned int)sizeof(field.commit),
418 (unsigned int)is_signed_type(long)); 424 (unsigned int)is_signed_type(long));
419 425
426 ret = trace_seq_printf(s, "\tfield: int overwrite;\t"
427 "offset:%u;\tsize:%u;\tsigned:%u;\n",
428 (unsigned int)offsetof(typeof(field), commit),
429 1,
430 (unsigned int)is_signed_type(long));
431
420 ret = trace_seq_printf(s, "\tfield: char data;\t" 432 ret = trace_seq_printf(s, "\tfield: char data;\t"
421 "offset:%u;\tsize:%u;\tsigned:%u;\n", 433 "offset:%u;\tsize:%u;\tsigned:%u;\n",
422 (unsigned int)offsetof(typeof(field), data), 434 (unsigned int)offsetof(typeof(field), data),
@@ -440,6 +452,8 @@ struct ring_buffer_per_cpu {
440 struct buffer_page *tail_page; /* write to tail */ 452 struct buffer_page *tail_page; /* write to tail */
441 struct buffer_page *commit_page; /* committed pages */ 453 struct buffer_page *commit_page; /* committed pages */
442 struct buffer_page *reader_page; 454 struct buffer_page *reader_page;
455 unsigned long lost_events;
456 unsigned long last_overrun;
443 local_t commit_overrun; 457 local_t commit_overrun;
444 local_t overrun; 458 local_t overrun;
445 local_t entries; 459 local_t entries;
@@ -1762,6 +1776,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1762 kmemcheck_annotate_bitfield(event, bitfield); 1776 kmemcheck_annotate_bitfield(event, bitfield);
1763 1777
1764 /* 1778 /*
1779 * Save the original length to the meta data.
1780 * This will be used by the reader to add lost event
1781 * counter.
1782 */
1783 tail_page->real_end = tail;
1784
1785 /*
1765 * If this event is bigger than the minimum size, then 1786 * If this event is bigger than the minimum size, then
1766 * we need to be careful that we don't subtract the 1787 * we need to be careful that we don't subtract the
1767 * write counter enough to allow another writer to slip 1788 * write counter enough to allow another writer to slip
@@ -2838,6 +2859,7 @@ static struct buffer_page *
2838rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 2859rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2839{ 2860{
2840 struct buffer_page *reader = NULL; 2861 struct buffer_page *reader = NULL;
2862 unsigned long overwrite;
2841 unsigned long flags; 2863 unsigned long flags;
2842 int nr_loops = 0; 2864 int nr_loops = 0;
2843 int ret; 2865 int ret;
@@ -2879,6 +2901,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2879 local_set(&cpu_buffer->reader_page->write, 0); 2901 local_set(&cpu_buffer->reader_page->write, 0);
2880 local_set(&cpu_buffer->reader_page->entries, 0); 2902 local_set(&cpu_buffer->reader_page->entries, 0);
2881 local_set(&cpu_buffer->reader_page->page->commit, 0); 2903 local_set(&cpu_buffer->reader_page->page->commit, 0);
2904 cpu_buffer->reader_page->real_end = 0;
2882 2905
2883 spin: 2906 spin:
2884 /* 2907 /*
@@ -2899,6 +2922,18 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2899 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); 2922 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
2900 2923
2901 /* 2924 /*
2925 * We want to make sure we read the overruns after we set up our
2926 * pointers to the next object. The writer side does a
2927 * cmpxchg to cross pages which acts as the mb on the writer
2928 * side. Note, the reader will constantly fail the swap
2929 * while the writer is updating the pointers, so this
2930 * guarantees that the overwrite recorded here is the one we
2931 * want to compare with the last_overrun.
2932 */
2933 smp_mb();
2934 overwrite = local_read(&(cpu_buffer->overrun));
2935
2936 /*
2902 * Here's the tricky part. 2937 * Here's the tricky part.
2903 * 2938 *
2904 * We need to move the pointer past the header page. 2939 * We need to move the pointer past the header page.
@@ -2929,6 +2964,11 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2929 cpu_buffer->reader_page = reader; 2964 cpu_buffer->reader_page = reader;
2930 rb_reset_reader_page(cpu_buffer); 2965 rb_reset_reader_page(cpu_buffer);
2931 2966
2967 if (overwrite != cpu_buffer->last_overrun) {
2968 cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
2969 cpu_buffer->last_overrun = overwrite;
2970 }
2971
2932 goto again; 2972 goto again;
2933 2973
2934 out: 2974 out:
@@ -3005,8 +3045,14 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
3005 rb_advance_iter(iter); 3045 rb_advance_iter(iter);
3006} 3046}
3007 3047
3048static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
3049{
3050 return cpu_buffer->lost_events;
3051}
3052
3008static struct ring_buffer_event * 3053static struct ring_buffer_event *
3009rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) 3054rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
3055 unsigned long *lost_events)
3010{ 3056{
3011 struct ring_buffer_event *event; 3057 struct ring_buffer_event *event;
3012 struct buffer_page *reader; 3058 struct buffer_page *reader;
@@ -3058,6 +3104,8 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
3058 ring_buffer_normalize_time_stamp(cpu_buffer->buffer, 3104 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
3059 cpu_buffer->cpu, ts); 3105 cpu_buffer->cpu, ts);
3060 } 3106 }
3107 if (lost_events)
3108 *lost_events = rb_lost_events(cpu_buffer);
3061 return event; 3109 return event;
3062 3110
3063 default: 3111 default:
@@ -3168,12 +3216,14 @@ static inline int rb_ok_to_lock(void)
3168 * @buffer: The ring buffer to read 3216 * @buffer: The ring buffer to read
3169 * @cpu: The cpu to peak at 3217 * @cpu: The cpu to peak at
3170 * @ts: The timestamp counter of this event. 3218 * @ts: The timestamp counter of this event.
3219 * @lost_events: a variable to store if events were lost (may be NULL)
3171 * 3220 *
3172 * This will return the event that will be read next, but does 3221 * This will return the event that will be read next, but does
3173 * not consume the data. 3222 * not consume the data.
3174 */ 3223 */
3175struct ring_buffer_event * 3224struct ring_buffer_event *
3176ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) 3225ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
3226 unsigned long *lost_events)
3177{ 3227{
3178 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 3228 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
3179 struct ring_buffer_event *event; 3229 struct ring_buffer_event *event;
@@ -3188,7 +3238,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
3188 local_irq_save(flags); 3238 local_irq_save(flags);
3189 if (dolock) 3239 if (dolock)
3190 spin_lock(&cpu_buffer->reader_lock); 3240 spin_lock(&cpu_buffer->reader_lock);
3191 event = rb_buffer_peek(cpu_buffer, ts); 3241 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3192 if (event && event->type_len == RINGBUF_TYPE_PADDING) 3242 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3193 rb_advance_reader(cpu_buffer); 3243 rb_advance_reader(cpu_buffer);
3194 if (dolock) 3244 if (dolock)
@@ -3230,13 +3280,17 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3230/** 3280/**
3231 * ring_buffer_consume - return an event and consume it 3281 * ring_buffer_consume - return an event and consume it
3232 * @buffer: The ring buffer to get the next event from 3282 * @buffer: The ring buffer to get the next event from
3283 * @cpu: the cpu to read the buffer from
3284 * @ts: a variable to store the timestamp (may be NULL)
3285 * @lost_events: a variable to store if events were lost (may be NULL)
3233 * 3286 *
3234 * Returns the next event in the ring buffer, and that event is consumed. 3287 * Returns the next event in the ring buffer, and that event is consumed.
3235 * Meaning, that sequential reads will keep returning a different event, 3288 * Meaning, that sequential reads will keep returning a different event,
3236 * and eventually empty the ring buffer if the producer is slower. 3289 * and eventually empty the ring buffer if the producer is slower.
3237 */ 3290 */
3238struct ring_buffer_event * 3291struct ring_buffer_event *
3239ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 3292ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
3293 unsigned long *lost_events)
3240{ 3294{
3241 struct ring_buffer_per_cpu *cpu_buffer; 3295 struct ring_buffer_per_cpu *cpu_buffer;
3242 struct ring_buffer_event *event = NULL; 3296 struct ring_buffer_event *event = NULL;
@@ -3257,9 +3311,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
3257 if (dolock) 3311 if (dolock)
3258 spin_lock(&cpu_buffer->reader_lock); 3312 spin_lock(&cpu_buffer->reader_lock);
3259 3313
3260 event = rb_buffer_peek(cpu_buffer, ts); 3314 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3261 if (event) 3315 if (event) {
3316 cpu_buffer->lost_events = 0;
3262 rb_advance_reader(cpu_buffer); 3317 rb_advance_reader(cpu_buffer);
3318 }
3263 3319
3264 if (dolock) 3320 if (dolock)
3265 spin_unlock(&cpu_buffer->reader_lock); 3321 spin_unlock(&cpu_buffer->reader_lock);
@@ -3408,6 +3464,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
3408 cpu_buffer->write_stamp = 0; 3464 cpu_buffer->write_stamp = 0;
3409 cpu_buffer->read_stamp = 0; 3465 cpu_buffer->read_stamp = 0;
3410 3466
3467 cpu_buffer->lost_events = 0;
3468 cpu_buffer->last_overrun = 0;
3469
3411 rb_head_page_activate(cpu_buffer); 3470 rb_head_page_activate(cpu_buffer);
3412} 3471}
3413 3472
@@ -3683,6 +3742,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3683 struct ring_buffer_event *event; 3742 struct ring_buffer_event *event;
3684 struct buffer_data_page *bpage; 3743 struct buffer_data_page *bpage;
3685 struct buffer_page *reader; 3744 struct buffer_page *reader;
3745 unsigned long missed_events;
3686 unsigned long flags; 3746 unsigned long flags;
3687 unsigned int commit; 3747 unsigned int commit;
3688 unsigned int read; 3748 unsigned int read;
@@ -3719,6 +3779,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3719 read = reader->read; 3779 read = reader->read;
3720 commit = rb_page_commit(reader); 3780 commit = rb_page_commit(reader);
3721 3781
3782 /* Check if any events were dropped */
3783 missed_events = cpu_buffer->lost_events;
3784
3722 /* 3785 /*
3723 * If this page has been partially read or 3786 * If this page has been partially read or
3724 * if len is not big enough to read the rest of the page or 3787 * if len is not big enough to read the rest of the page or
@@ -3779,9 +3842,35 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3779 local_set(&reader->entries, 0); 3842 local_set(&reader->entries, 0);
3780 reader->read = 0; 3843 reader->read = 0;
3781 *data_page = bpage; 3844 *data_page = bpage;
3845
3846 /*
3847 * Use the real_end for the data size,
3848 * This gives us a chance to store the lost events
3849 * on the page.
3850 */
3851 if (reader->real_end)
3852 local_set(&bpage->commit, reader->real_end);
3782 } 3853 }
3783 ret = read; 3854 ret = read;
3784 3855
3856 cpu_buffer->lost_events = 0;
3857 /*
3858 * Set a flag in the commit field if we lost events
3859 */
3860 if (missed_events) {
3861 commit = local_read(&bpage->commit);
3862
3863 /* If there is room at the end of the page to save the
3864 * missed events, then record it there.
3865 */
3866 if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) {
3867 memcpy(&bpage->data[commit], &missed_events,
3868 sizeof(missed_events));
3869 local_add(RB_MISSED_STORED, &bpage->commit);
3870 }
3871 local_add(RB_MISSED_EVENTS, &bpage->commit);
3872 }
3873
3785 out_unlock: 3874 out_unlock:
3786 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3875 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3787 3876
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index df74c7982255..dc56556b55a2 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -81,7 +81,7 @@ static enum event_status read_event(int cpu)
81 int *entry; 81 int *entry;
82 u64 ts; 82 u64 ts;
83 83
84 event = ring_buffer_consume(buffer, cpu, &ts); 84 event = ring_buffer_consume(buffer, cpu, &ts, NULL);
85 if (!event) 85 if (!event)
86 return EVENT_DROPPED; 86 return EVENT_DROPPED;
87 87
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 44f916a04065..60f3b6289731 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1545,7 +1545,8 @@ static void trace_iterator_increment(struct trace_iterator *iter)
1545} 1545}
1546 1546
1547static struct trace_entry * 1547static struct trace_entry *
1548peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts) 1548peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
1549 unsigned long *lost_events)
1549{ 1550{
1550 struct ring_buffer_event *event; 1551 struct ring_buffer_event *event;
1551 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; 1552 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
@@ -1556,7 +1557,8 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
1556 if (buf_iter) 1557 if (buf_iter)
1557 event = ring_buffer_iter_peek(buf_iter, ts); 1558 event = ring_buffer_iter_peek(buf_iter, ts);
1558 else 1559 else
1559 event = ring_buffer_peek(iter->tr->buffer, cpu, ts); 1560 event = ring_buffer_peek(iter->tr->buffer, cpu, ts,
1561 lost_events);
1560 1562
1561 ftrace_enable_cpu(); 1563 ftrace_enable_cpu();
1562 1564
@@ -1564,10 +1566,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
1564} 1566}
1565 1567
1566static struct trace_entry * 1568static struct trace_entry *
1567__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) 1569__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
1570 unsigned long *missing_events, u64 *ent_ts)
1568{ 1571{
1569 struct ring_buffer *buffer = iter->tr->buffer; 1572 struct ring_buffer *buffer = iter->tr->buffer;
1570 struct trace_entry *ent, *next = NULL; 1573 struct trace_entry *ent, *next = NULL;
1574 unsigned long lost_events, next_lost = 0;
1571 int cpu_file = iter->cpu_file; 1575 int cpu_file = iter->cpu_file;
1572 u64 next_ts = 0, ts; 1576 u64 next_ts = 0, ts;
1573 int next_cpu = -1; 1577 int next_cpu = -1;
@@ -1580,7 +1584,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1580 if (cpu_file > TRACE_PIPE_ALL_CPU) { 1584 if (cpu_file > TRACE_PIPE_ALL_CPU) {
1581 if (ring_buffer_empty_cpu(buffer, cpu_file)) 1585 if (ring_buffer_empty_cpu(buffer, cpu_file))
1582 return NULL; 1586 return NULL;
1583 ent = peek_next_entry(iter, cpu_file, ent_ts); 1587 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
1584 if (ent_cpu) 1588 if (ent_cpu)
1585 *ent_cpu = cpu_file; 1589 *ent_cpu = cpu_file;
1586 1590
@@ -1592,7 +1596,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1592 if (ring_buffer_empty_cpu(buffer, cpu)) 1596 if (ring_buffer_empty_cpu(buffer, cpu))
1593 continue; 1597 continue;
1594 1598
1595 ent = peek_next_entry(iter, cpu, &ts); 1599 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
1596 1600
1597 /* 1601 /*
1598 * Pick the entry with the smallest timestamp: 1602 * Pick the entry with the smallest timestamp:
@@ -1601,6 +1605,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1601 next = ent; 1605 next = ent;
1602 next_cpu = cpu; 1606 next_cpu = cpu;
1603 next_ts = ts; 1607 next_ts = ts;
1608 next_lost = lost_events;
1604 } 1609 }
1605 } 1610 }
1606 1611
@@ -1610,6 +1615,9 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1610 if (ent_ts) 1615 if (ent_ts)
1611 *ent_ts = next_ts; 1616 *ent_ts = next_ts;
1612 1617
1618 if (missing_events)
1619 *missing_events = next_lost;
1620
1613 return next; 1621 return next;
1614} 1622}
1615 1623
@@ -1617,13 +1625,14 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1617struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 1625struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
1618 int *ent_cpu, u64 *ent_ts) 1626 int *ent_cpu, u64 *ent_ts)
1619{ 1627{
1620 return __find_next_entry(iter, ent_cpu, ent_ts); 1628 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
1621} 1629}
1622 1630
1623/* Find the next real entry, and increment the iterator to the next entry */ 1631/* Find the next real entry, and increment the iterator to the next entry */
1624static void *find_next_entry_inc(struct trace_iterator *iter) 1632static void *find_next_entry_inc(struct trace_iterator *iter)
1625{ 1633{
1626 iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts); 1634 iter->ent = __find_next_entry(iter, &iter->cpu,
1635 &iter->lost_events, &iter->ts);
1627 1636
1628 if (iter->ent) 1637 if (iter->ent)
1629 trace_iterator_increment(iter); 1638 trace_iterator_increment(iter);
@@ -1635,7 +1644,8 @@ static void trace_consume(struct trace_iterator *iter)
1635{ 1644{
1636 /* Don't allow ftrace to trace into the ring buffers */ 1645 /* Don't allow ftrace to trace into the ring buffers */
1637 ftrace_disable_cpu(); 1646 ftrace_disable_cpu();
1638 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts); 1647 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts,
1648 &iter->lost_events);
1639 ftrace_enable_cpu(); 1649 ftrace_enable_cpu();
1640} 1650}
1641 1651
@@ -2030,6 +2040,10 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
2030{ 2040{
2031 enum print_line_t ret; 2041 enum print_line_t ret;
2032 2042
2043 if (iter->lost_events)
2044 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2045 iter->cpu, iter->lost_events);
2046
2033 if (iter->trace && iter->trace->print_line) { 2047 if (iter->trace && iter->trace->print_line) {
2034 ret = iter->trace->print_line(iter); 2048 ret = iter->trace->print_line(iter);
2035 if (ret != TRACE_TYPE_UNHANDLED) 2049 if (ret != TRACE_TYPE_UNHANDLED)
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 9aed1a5cf553..669b9c31861d 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -490,9 +490,10 @@ get_return_for_leaf(struct trace_iterator *iter,
490 * We need to consume the current entry to see 490 * We need to consume the current entry to see
491 * the next one. 491 * the next one.
492 */ 492 */
493 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); 493 ring_buffer_consume(iter->tr->buffer, iter->cpu,
494 NULL, NULL);
494 event = ring_buffer_peek(iter->tr->buffer, iter->cpu, 495 event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
495 NULL); 496 NULL, NULL);
496 } 497 }
497 498
498 if (!event) 499 if (!event)
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 81003b4d617f..9398034f814a 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -30,7 +30,7 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
30 struct trace_entry *entry; 30 struct trace_entry *entry;
31 unsigned int loops = 0; 31 unsigned int loops = 0;
32 32
33 while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) { 33 while ((event = ring_buffer_consume(tr->buffer, cpu, NULL, NULL))) {
34 entry = ring_buffer_event_data(event); 34 entry = ring_buffer_event_data(event);
35 35
36 /* 36 /*