aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRobert Richter <robert.richter@amd.com>2008-12-08 19:21:32 -0500
committerRobert Richter <robert.richter@amd.com>2008-12-10 08:20:18 -0500
commit6dad828b76c7224a22ddc9ce7aa495d994f03b31 (patch)
tree364de7a3efd56d60c0742145e3a8d3d4f73bcde4
parente09373f22e76cc048ca5fe10a9ff9012f5d64309 (diff)
oprofile: port to the new ring_buffer
This patch replaces the current oprofile cpu buffer implementation with the ring buffer provided by the tracing framework. The motivation here is to leave the pain of implementing ring buffers to others. Oh, no, there are more advantages. Main reason is the support of different sample sizes that could be stored in the buffer. Use cases for this are IBS and Cell spu profiling. Using the new ring buffer ensures valid and complete samples and allows copying the cpu buffer stateless without knowing its content. Second it will use generic kernel API and also reduce code size. And hopefully, there are less bugs. Since the new tracing ring buffer implementation uses spin locks to protect the buffer during read/write access, it is difficult to use the buffer in an NMI handler. In this case, writing to the buffer by the NMI handler (x86) could occur also during critical sections when reading the buffer. To avoid this, there are 2 buffers for independent read and write access. Read access is in process context only, write access only in the NMI handler. If the read buffer runs empty, both buffers are swapped atomically. There is potentially a small window during swapping where the buffers are disabled and samples could be lost. Using 2 buffers is a little bit overhead, but the solution is clear and does not require changes in the ring buffer implementation. It can be changed to a single buffer solution when the ring buffer access is implemented as non-locking atomic code. The new buffer requires more size to store the same amount of samples because each sample includes an u32 header. Also, there is more code to execute for buffer access. Nonetheless, the buffer implementation is proven in the ftrace environment and worth to use also in oprofile. Patches that changes the internal IBS buffer usage will follow. Cc: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Robert Richter <robert.richter@amd.com>
-rw-r--r--drivers/oprofile/buffer_sync.c65
-rw-r--r--drivers/oprofile/cpu_buffer.c63
-rw-r--r--drivers/oprofile/cpu_buffer.h71
3 files changed, 114 insertions, 85 deletions
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 944a5832d9e4..737bd9484822 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -268,18 +268,6 @@ lookup_dcookie(struct mm_struct *mm, unsigned long addr, off_t *offset)
268 return cookie; 268 return cookie;
269} 269}
270 270
271static void increment_tail(struct oprofile_cpu_buffer *b)
272{
273 unsigned long new_tail = b->tail_pos + 1;
274
275 rmb(); /* be sure fifo pointers are synchronized */
276
277 if (new_tail < b->buffer_size)
278 b->tail_pos = new_tail;
279 else
280 b->tail_pos = 0;
281}
282
283static unsigned long last_cookie = INVALID_COOKIE; 271static unsigned long last_cookie = INVALID_COOKIE;
284 272
285static void add_cpu_switch(int i) 273static void add_cpu_switch(int i)
@@ -331,26 +319,25 @@ static void add_trace_begin(void)
331 319
332#define IBS_FETCH_CODE_SIZE 2 320#define IBS_FETCH_CODE_SIZE 2
333#define IBS_OP_CODE_SIZE 5 321#define IBS_OP_CODE_SIZE 5
334#define IBS_EIP(cpu_buf) ((cpu_buffer_read_entry(cpu_buf))->eip)
335#define IBS_EVENT(cpu_buf) ((cpu_buffer_read_entry(cpu_buf))->event)
336 322
337/* 323/*
338 * Add IBS fetch and op entries to event buffer 324 * Add IBS fetch and op entries to event buffer
339 */ 325 */
340static void add_ibs_begin(struct oprofile_cpu_buffer *cpu_buf, int code, 326static void add_ibs_begin(int cpu, int code, struct mm_struct *mm)
341 struct mm_struct *mm)
342{ 327{
343 unsigned long rip; 328 unsigned long rip;
344 int i, count; 329 int i, count;
345 unsigned long ibs_cookie = 0; 330 unsigned long ibs_cookie = 0;
346 off_t offset; 331 off_t offset;
332 struct op_sample *sample;
347 333
348 increment_tail(cpu_buf); /* move to RIP entry */ 334 sample = cpu_buffer_read_entry(cpu);
349 335 if (!sample)
350 rip = IBS_EIP(cpu_buf); 336 goto Error;
337 rip = sample->eip;
351 338
352#ifdef __LP64__ 339#ifdef __LP64__
353 rip += IBS_EVENT(cpu_buf) << 32; 340 rip += sample->event << 32;
354#endif 341#endif
355 342
356 if (mm) { 343 if (mm) {
@@ -374,8 +361,8 @@ static void add_ibs_begin(struct oprofile_cpu_buffer *cpu_buf, int code,
374 add_event_entry(offset); /* Offset from Dcookie */ 361 add_event_entry(offset); /* Offset from Dcookie */
375 362
376 /* we send the Dcookie offset, but send the raw Linear Add also*/ 363 /* we send the Dcookie offset, but send the raw Linear Add also*/
377 add_event_entry(IBS_EIP(cpu_buf)); 364 add_event_entry(sample->eip);
378 add_event_entry(IBS_EVENT(cpu_buf)); 365 add_event_entry(sample->event);
379 366
380 if (code == IBS_FETCH_CODE) 367 if (code == IBS_FETCH_CODE)
381 count = IBS_FETCH_CODE_SIZE; /*IBS FETCH is 2 int64s*/ 368 count = IBS_FETCH_CODE_SIZE; /*IBS FETCH is 2 int64s*/
@@ -383,10 +370,17 @@ static void add_ibs_begin(struct oprofile_cpu_buffer *cpu_buf, int code,
383 count = IBS_OP_CODE_SIZE; /*IBS OP is 5 int64s*/ 370 count = IBS_OP_CODE_SIZE; /*IBS OP is 5 int64s*/
384 371
385 for (i = 0; i < count; i++) { 372 for (i = 0; i < count; i++) {
386 increment_tail(cpu_buf); 373 sample = cpu_buffer_read_entry(cpu);
387 add_event_entry(IBS_EIP(cpu_buf)); 374 if (!sample)
388 add_event_entry(IBS_EVENT(cpu_buf)); 375 goto Error;
376 add_event_entry(sample->eip);
377 add_event_entry(sample->event);
389 } 378 }
379
380 return;
381
382Error:
383 return;
390} 384}
391 385
392#endif 386#endif
@@ -530,33 +524,26 @@ typedef enum {
530 */ 524 */
531void sync_buffer(int cpu) 525void sync_buffer(int cpu)
532{ 526{
533 struct oprofile_cpu_buffer *cpu_buf = &per_cpu(cpu_buffer, cpu);
534 struct mm_struct *mm = NULL; 527 struct mm_struct *mm = NULL;
535 struct mm_struct *oldmm; 528 struct mm_struct *oldmm;
536 struct task_struct *new; 529 struct task_struct *new;
537 unsigned long cookie = 0; 530 unsigned long cookie = 0;
538 int in_kernel = 1; 531 int in_kernel = 1;
539 sync_buffer_state state = sb_buffer_start; 532 sync_buffer_state state = sb_buffer_start;
540#ifndef CONFIG_OPROFILE_IBS
541 unsigned int i; 533 unsigned int i;
542 unsigned long available; 534 unsigned long available;
543#endif
544 535
545 mutex_lock(&buffer_mutex); 536 mutex_lock(&buffer_mutex);
546 537
547 add_cpu_switch(cpu); 538 add_cpu_switch(cpu);
548 539
549 /* Remember, only we can modify tail_pos */
550
551 cpu_buffer_reset(cpu); 540 cpu_buffer_reset(cpu);
552#ifndef CONFIG_OPROFILE_IBS 541 available = cpu_buffer_entries(cpu);
553 available = cpu_buffer_entries(cpu_buf);
554 542
555 for (i = 0; i < available; ++i) { 543 for (i = 0; i < available; ++i) {
556#else 544 struct op_sample *s = cpu_buffer_read_entry(cpu);
557 while (cpu_buffer_entries(cpu_buf)) { 545 if (!s)
558#endif 546 break;
559 struct op_sample *s = cpu_buffer_read_entry(cpu_buf);
560 547
561 if (is_code(s->eip)) { 548 if (is_code(s->eip)) {
562 switch (s->event) { 549 switch (s->event) {
@@ -575,11 +562,11 @@ void sync_buffer(int cpu)
575#ifdef CONFIG_OPROFILE_IBS 562#ifdef CONFIG_OPROFILE_IBS
576 case IBS_FETCH_BEGIN: 563 case IBS_FETCH_BEGIN:
577 state = sb_bt_start; 564 state = sb_bt_start;
578 add_ibs_begin(cpu_buf, IBS_FETCH_CODE, mm); 565 add_ibs_begin(cpu, IBS_FETCH_CODE, mm);
579 break; 566 break;
580 case IBS_OP_BEGIN: 567 case IBS_OP_BEGIN:
581 state = sb_bt_start; 568 state = sb_bt_start;
582 add_ibs_begin(cpu_buf, IBS_OP_CODE, mm); 569 add_ibs_begin(cpu, IBS_OP_CODE, mm);
583 break; 570 break;
584#endif 571#endif
585 default: 572 default:
@@ -600,8 +587,6 @@ void sync_buffer(int cpu)
600 atomic_inc(&oprofile_stats.bt_lost_no_mapping); 587 atomic_inc(&oprofile_stats.bt_lost_no_mapping);
601 } 588 }
602 } 589 }
603
604 increment_tail(cpu_buf);
605 } 590 }
606 release_mm(mm); 591 release_mm(mm);
607 592
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 5cf7efe38e67..eb280ec96e24 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -28,6 +28,25 @@
28#include "buffer_sync.h" 28#include "buffer_sync.h"
29#include "oprof.h" 29#include "oprof.h"
30 30
31#define OP_BUFFER_FLAGS 0
32
33/*
34 * Read and write access is using spin locking. Thus, writing to the
35 * buffer by NMI handler (x86) could occur also during critical
36 * sections when reading the buffer. To avoid this, there are 2
37 * buffers for independent read and write access. Read access is in
38 * process context only, write access only in the NMI handler. If the
39 * read buffer runs empty, both buffers are swapped atomically. There
40 * is potentially a small window during swapping where the buffers are
41 * disabled and samples could be lost.
42 *
43 * Using 2 buffers is a little bit overhead, but the solution is clear
44 * and does not require changes in the ring buffer implementation. It
45 * can be changed to a single buffer solution when the ring buffer
46 * access is implemented as non-locking atomic code.
47 */
48struct ring_buffer *op_ring_buffer_read;
49struct ring_buffer *op_ring_buffer_write;
31DEFINE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer); 50DEFINE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer);
32 51
33static void wq_sync_buffer(struct work_struct *work); 52static void wq_sync_buffer(struct work_struct *work);
@@ -37,12 +56,12 @@ static int work_enabled;
37 56
38void free_cpu_buffers(void) 57void free_cpu_buffers(void)
39{ 58{
40 int i; 59 if (op_ring_buffer_read)
41 60 ring_buffer_free(op_ring_buffer_read);
42 for_each_possible_cpu(i) { 61 op_ring_buffer_read = NULL;
43 vfree(per_cpu(cpu_buffer, i).buffer); 62 if (op_ring_buffer_write)
44 per_cpu(cpu_buffer, i).buffer = NULL; 63 ring_buffer_free(op_ring_buffer_write);
45 } 64 op_ring_buffer_write = NULL;
46} 65}
47 66
48unsigned long oprofile_get_cpu_buffer_size(void) 67unsigned long oprofile_get_cpu_buffer_size(void)
@@ -64,14 +83,16 @@ int alloc_cpu_buffers(void)
64 83
65 unsigned long buffer_size = fs_cpu_buffer_size; 84 unsigned long buffer_size = fs_cpu_buffer_size;
66 85
86 op_ring_buffer_read = ring_buffer_alloc(buffer_size, OP_BUFFER_FLAGS);
87 if (!op_ring_buffer_read)
88 goto fail;
89 op_ring_buffer_write = ring_buffer_alloc(buffer_size, OP_BUFFER_FLAGS);
90 if (!op_ring_buffer_write)
91 goto fail;
92
67 for_each_possible_cpu(i) { 93 for_each_possible_cpu(i) {
68 struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i); 94 struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i);
69 95
70 b->buffer = vmalloc_node(sizeof(struct op_sample) * buffer_size,
71 cpu_to_node(i));
72 if (!b->buffer)
73 goto fail;
74
75 b->last_task = NULL; 96 b->last_task = NULL;
76 b->last_is_kernel = -1; 97 b->last_is_kernel = -1;
77 b->tracing = 0; 98 b->tracing = 0;
@@ -140,10 +161,22 @@ static inline void
140add_sample(struct oprofile_cpu_buffer *cpu_buf, 161add_sample(struct oprofile_cpu_buffer *cpu_buf,
141 unsigned long pc, unsigned long event) 162 unsigned long pc, unsigned long event)
142{ 163{
143 struct op_sample *entry = cpu_buffer_write_entry(cpu_buf); 164 struct op_entry entry;
144 entry->eip = pc; 165
145 entry->event = event; 166 if (cpu_buffer_write_entry(&entry))
146 cpu_buffer_write_commit(cpu_buf); 167 goto Error;
168
169 entry.sample->eip = pc;
170 entry.sample->event = event;
171
172 if (cpu_buffer_write_commit(&entry))
173 goto Error;
174
175 return;
176
177Error:
178 cpu_buf->sample_lost_overflow++;
179 return;
147} 180}
148 181
149static inline void 182static inline void
diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h
index 895763f065e9..aacb0f0bc566 100644
--- a/drivers/oprofile/cpu_buffer.h
+++ b/drivers/oprofile/cpu_buffer.h
@@ -15,6 +15,7 @@
15#include <linux/workqueue.h> 15#include <linux/workqueue.h>
16#include <linux/cache.h> 16#include <linux/cache.h>
17#include <linux/sched.h> 17#include <linux/sched.h>
18#include <linux/ring_buffer.h>
18 19
19struct task_struct; 20struct task_struct;
20 21
@@ -32,6 +33,12 @@ struct op_sample {
32 unsigned long event; 33 unsigned long event;
33}; 34};
34 35
36struct op_entry {
37 struct ring_buffer_event *event;
38 struct op_sample *sample;
39 unsigned long irq_flags;
40};
41
35struct oprofile_cpu_buffer { 42struct oprofile_cpu_buffer {
36 volatile unsigned long head_pos; 43 volatile unsigned long head_pos;
37 volatile unsigned long tail_pos; 44 volatile unsigned long tail_pos;
@@ -39,7 +46,6 @@ struct oprofile_cpu_buffer {
39 struct task_struct *last_task; 46 struct task_struct *last_task;
40 int last_is_kernel; 47 int last_is_kernel;
41 int tracing; 48 int tracing;
42 struct op_sample *buffer;
43 unsigned long sample_received; 49 unsigned long sample_received;
44 unsigned long sample_lost_overflow; 50 unsigned long sample_lost_overflow;
45 unsigned long backtrace_aborted; 51 unsigned long backtrace_aborted;
@@ -48,6 +54,8 @@ struct oprofile_cpu_buffer {
48 struct delayed_work work; 54 struct delayed_work work;
49}; 55};
50 56
57extern struct ring_buffer *op_ring_buffer_read;
58extern struct ring_buffer *op_ring_buffer_write;
51DECLARE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer); 59DECLARE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer);
52 60
53/* 61/*
@@ -64,46 +72,49 @@ static inline void cpu_buffer_reset(int cpu)
64 cpu_buf->last_task = NULL; 72 cpu_buf->last_task = NULL;
65} 73}
66 74
67static inline 75static inline int cpu_buffer_write_entry(struct op_entry *entry)
68struct op_sample *cpu_buffer_write_entry(struct oprofile_cpu_buffer *cpu_buf)
69{ 76{
70 return &cpu_buf->buffer[cpu_buf->head_pos]; 77 entry->event = ring_buffer_lock_reserve(op_ring_buffer_write,
71} 78 sizeof(struct op_sample),
79 &entry->irq_flags);
80 if (entry->event)
81 entry->sample = ring_buffer_event_data(entry->event);
82 else
83 entry->sample = NULL;
72 84
73static inline 85 if (!entry->sample)
74void cpu_buffer_write_commit(struct oprofile_cpu_buffer *b) 86 return -ENOMEM;
75{
76 unsigned long new_head = b->head_pos + 1;
77 87
78 /* 88 return 0;
79 * Ensure anything written to the slot before we increment is 89}
80 * visible
81 */
82 wmb();
83 90
84 if (new_head < b->buffer_size) 91static inline int cpu_buffer_write_commit(struct op_entry *entry)
85 b->head_pos = new_head; 92{
86 else 93 return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event,
87 b->head_pos = 0; 94 entry->irq_flags);
88} 95}
89 96
90static inline 97static inline struct op_sample *cpu_buffer_read_entry(int cpu)
91struct op_sample *cpu_buffer_read_entry(struct oprofile_cpu_buffer *cpu_buf)
92{ 98{
93 return &cpu_buf->buffer[cpu_buf->tail_pos]; 99 struct ring_buffer_event *e;
100 e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL);
101 if (e)
102 return ring_buffer_event_data(e);
103 if (ring_buffer_swap_cpu(op_ring_buffer_read,
104 op_ring_buffer_write,
105 cpu))
106 return NULL;
107 e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL);
108 if (e)
109 return ring_buffer_event_data(e);
110 return NULL;
94} 111}
95 112
96/* "acquire" as many cpu buffer slots as we can */ 113/* "acquire" as many cpu buffer slots as we can */
97static inline 114static inline unsigned long cpu_buffer_entries(int cpu)
98unsigned long cpu_buffer_entries(struct oprofile_cpu_buffer *b)
99{ 115{
100 unsigned long head = b->head_pos; 116 return ring_buffer_entries_cpu(op_ring_buffer_read, cpu)
101 unsigned long tail = b->tail_pos; 117 + ring_buffer_entries_cpu(op_ring_buffer_write, cpu);
102
103 if (head >= tail)
104 return head - tail;
105
106 return head + (b->buffer_size - tail);
107} 118}
108 119
109/* transient events for the CPU buffer -> event buffer */ 120/* transient events for the CPU buffer -> event buffer */