aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/oprofile
diff options
context:
space:
mode:
authorAndi Kleen <andi@firstfloor.org>2010-03-31 21:17:25 -0400
committerRobert Richter <robert.richter@amd.com>2010-04-23 09:30:38 -0400
commitcb6e943ccf19ab6d3189147e9d625a992e016084 (patch)
tree8ecbe4eae825ea4f7bbb685cab5a45206d437f05 /drivers/oprofile
parenta36bf32e9e8a86f291f746b7f8292e042ee04a46 (diff)
oprofile: remove double ring buffering
oprofile used a double buffer scheme for its cpu event buffer to avoid races on reading with the old locked ring buffer. But that is obsolete now with the new ring buffer, so simply use a single buffer. This greatly simplifies the code and avoids a lot of sample drops on large runs, especially with call graph. Based on suggestions from Steven Rostedt For stable kernels from v2.6.32, but not earlier. Signed-off-by: Andi Kleen <ak@linux.intel.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: stable <stable@kernel.org> Signed-off-by: Robert Richter <robert.richter@amd.com>
Diffstat (limited to 'drivers/oprofile')
-rw-r--r--drivers/oprofile/cpu_buffer.c63
1 files changed, 13 insertions, 50 deletions
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 166b67ea622f..de82183bb9b3 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -30,23 +30,7 @@
30 30
31#define OP_BUFFER_FLAGS 0 31#define OP_BUFFER_FLAGS 0
32 32
33/* 33static struct ring_buffer *op_ring_buffer;
34 * Read and write access is using spin locking. Thus, writing to the
35 * buffer by NMI handler (x86) could occur also during critical
36 * sections when reading the buffer. To avoid this, there are 2
37 * buffers for independent read and write access. Read access is in
38 * process context only, write access only in the NMI handler. If the
39 * read buffer runs empty, both buffers are swapped atomically. There
40 * is potentially a small window during swapping where the buffers are
41 * disabled and samples could be lost.
42 *
43 * Using 2 buffers is a little bit overhead, but the solution is clear
44 * and does not require changes in the ring buffer implementation. It
45 * can be changed to a single buffer solution when the ring buffer
46 * access is implemented as non-locking atomic code.
47 */
48static struct ring_buffer *op_ring_buffer_read;
49static struct ring_buffer *op_ring_buffer_write;
50DEFINE_PER_CPU(struct oprofile_cpu_buffer, op_cpu_buffer); 34DEFINE_PER_CPU(struct oprofile_cpu_buffer, op_cpu_buffer);
51 35
52static void wq_sync_buffer(struct work_struct *work); 36static void wq_sync_buffer(struct work_struct *work);
@@ -68,12 +52,9 @@ void oprofile_cpu_buffer_inc_smpl_lost(void)
68 52
69void free_cpu_buffers(void) 53void free_cpu_buffers(void)
70{ 54{
71 if (op_ring_buffer_read) 55 if (op_ring_buffer)
72 ring_buffer_free(op_ring_buffer_read); 56 ring_buffer_free(op_ring_buffer);
73 op_ring_buffer_read = NULL; 57 op_ring_buffer = NULL;
74 if (op_ring_buffer_write)
75 ring_buffer_free(op_ring_buffer_write);
76 op_ring_buffer_write = NULL;
77} 58}
78 59
79#define RB_EVENT_HDR_SIZE 4 60#define RB_EVENT_HDR_SIZE 4
@@ -86,11 +67,8 @@ int alloc_cpu_buffers(void)
86 unsigned long byte_size = buffer_size * (sizeof(struct op_sample) + 67 unsigned long byte_size = buffer_size * (sizeof(struct op_sample) +
87 RB_EVENT_HDR_SIZE); 68 RB_EVENT_HDR_SIZE);
88 69
89 op_ring_buffer_read = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS); 70 op_ring_buffer = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS);
90 if (!op_ring_buffer_read) 71 if (!op_ring_buffer)
91 goto fail;
92 op_ring_buffer_write = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS);
93 if (!op_ring_buffer_write)
94 goto fail; 72 goto fail;
95 73
96 for_each_possible_cpu(i) { 74 for_each_possible_cpu(i) {
@@ -162,16 +140,11 @@ struct op_sample
162*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size) 140*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size)
163{ 141{
164 entry->event = ring_buffer_lock_reserve 142 entry->event = ring_buffer_lock_reserve
165 (op_ring_buffer_write, sizeof(struct op_sample) + 143 (op_ring_buffer, sizeof(struct op_sample) +
166 size * sizeof(entry->sample->data[0])); 144 size * sizeof(entry->sample->data[0]));
167 if (entry->event) 145 if (!entry->event)
168 entry->sample = ring_buffer_event_data(entry->event);
169 else
170 entry->sample = NULL;
171
172 if (!entry->sample)
173 return NULL; 146 return NULL;
174 147 entry->sample = ring_buffer_event_data(entry->event);
175 entry->size = size; 148 entry->size = size;
176 entry->data = entry->sample->data; 149 entry->data = entry->sample->data;
177 150
@@ -180,25 +153,16 @@ struct op_sample
180 153
181int op_cpu_buffer_write_commit(struct op_entry *entry) 154int op_cpu_buffer_write_commit(struct op_entry *entry)
182{ 155{
183 return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event); 156 return ring_buffer_unlock_commit(op_ring_buffer, entry->event);
184} 157}
185 158
186struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu) 159struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu)
187{ 160{
188 struct ring_buffer_event *e; 161 struct ring_buffer_event *e;
189 e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL); 162 e = ring_buffer_consume(op_ring_buffer, cpu, NULL);
190 if (e) 163 if (!e)
191 goto event;
192 if (ring_buffer_swap_cpu(op_ring_buffer_read,
193 op_ring_buffer_write,
194 cpu))
195 return NULL; 164 return NULL;
196 e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL);
197 if (e)
198 goto event;
199 return NULL;
200 165
201event:
202 entry->event = e; 166 entry->event = e;
203 entry->sample = ring_buffer_event_data(e); 167 entry->sample = ring_buffer_event_data(e);
204 entry->size = (ring_buffer_event_length(e) - sizeof(struct op_sample)) 168 entry->size = (ring_buffer_event_length(e) - sizeof(struct op_sample))
@@ -209,8 +173,7 @@ event:
209 173
210unsigned long op_cpu_buffer_entries(int cpu) 174unsigned long op_cpu_buffer_entries(int cpu)
211{ 175{
212 return ring_buffer_entries_cpu(op_ring_buffer_read, cpu) 176 return ring_buffer_entries_cpu(op_ring_buffer, cpu);
213 + ring_buffer_entries_cpu(op_ring_buffer_write, cpu);
214} 177}
215 178
216static int 179static int