aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorDavid Miller <davem@davemloft.net>2010-04-20 18:47:11 -0400
committerSteven Rostedt <rostedt@goodmis.org>2010-04-27 13:06:35 -0400
commit72c9ddfd4c5bf54ef03cfdf57026416cb678eeba (patch)
treebd2c2b6b411975a8219d7138ba7699ee5d324e77 /kernel
parent62b915f1060996a8e1f69be50e3b8e9e43b710cb (diff)
ring-buffer: Make non-consuming read less expensive with lots of cpus.
When performing a non-consuming read, a synchronize_sched() is performed once for every cpu which is actively tracing. This is very expensive, and can make it take several seconds to open up the 'trace' file with lots of cpus. Only one synchronize_sched() call is actually necessary. What is desired is for all cpus to see the disabling state change. So we transform the existing sequence: for_each_cpu() { ring_buffer_read_start(); } where each ring_buffer_start() call performs a synchronize_sched(), into the following: for_each_cpu() { ring_buffer_read_prepare(); } ring_buffer_read_prepare_sync(); for_each_cpu() { ring_buffer_read_start(); } wherein only the single ring_buffer_read_prepare_sync() call needs to do the synchronize_sched(). The first phase, via ring_buffer_read_prepare(), allocates the 'iter' memory and increments ->record_disabled. In the second phase, ring_buffer_read_prepare_sync() makes sure this ->record_disabled state is visible fully to all cpus. And in the final third phase, the ring_buffer_read_start() calls reset the 'iter' objects allocated in the first phase since we now know that none of the cpus are adding trace entries any more. This makes openning the 'trace' file nearly instantaneous on a sparc64 Niagara2 box with 128 cpus tracing. Signed-off-by: David S. Miller <davem@davemloft.net> LKML-Reference: <20100420.154711.11246950.davem@davemloft.net> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/trace/ring_buffer.c64
-rw-r--r--kernel/trace/trace.c11
2 files changed, 62 insertions, 13 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 5885cdfc41f3..2a090448ef6b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3332,23 +3332,30 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
3332EXPORT_SYMBOL_GPL(ring_buffer_consume); 3332EXPORT_SYMBOL_GPL(ring_buffer_consume);
3333 3333
3334/** 3334/**
3335 * ring_buffer_read_start - start a non consuming read of the buffer 3335 * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer
3336 * @buffer: The ring buffer to read from 3336 * @buffer: The ring buffer to read from
3337 * @cpu: The cpu buffer to iterate over 3337 * @cpu: The cpu buffer to iterate over
3338 * 3338 *
3339 * This starts up an iteration through the buffer. It also disables 3339 * This performs the initial preparations necessary to iterate
3340 * the recording to the buffer until the reading is finished. 3340 * through the buffer. Memory is allocated, buffer recording
3341 * This prevents the reading from being corrupted. This is not 3341 * is disabled, and the iterator pointer is returned to the caller.
3342 * a consuming read, so a producer is not expected.
3343 * 3342 *
3344 * Must be paired with ring_buffer_finish. 3343 * Disabling buffer recordng prevents the reading from being
3344 * corrupted. This is not a consuming read, so a producer is not
3345 * expected.
3346 *
3347 * After a sequence of ring_buffer_read_prepare calls, the user is
3348 * expected to make at least one call to ring_buffer_prepare_sync.
3349 * Afterwards, ring_buffer_read_start is invoked to get things going
3350 * for real.
3351 *
3352 * This overall must be paired with ring_buffer_finish.
3345 */ 3353 */
3346struct ring_buffer_iter * 3354struct ring_buffer_iter *
3347ring_buffer_read_start(struct ring_buffer *buffer, int cpu) 3355ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
3348{ 3356{
3349 struct ring_buffer_per_cpu *cpu_buffer; 3357 struct ring_buffer_per_cpu *cpu_buffer;
3350 struct ring_buffer_iter *iter; 3358 struct ring_buffer_iter *iter;
3351 unsigned long flags;
3352 3359
3353 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 3360 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3354 return NULL; 3361 return NULL;
@@ -3362,15 +3369,52 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
3362 iter->cpu_buffer = cpu_buffer; 3369 iter->cpu_buffer = cpu_buffer;
3363 3370
3364 atomic_inc(&cpu_buffer->record_disabled); 3371 atomic_inc(&cpu_buffer->record_disabled);
3372
3373 return iter;
3374}
3375EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
3376
3377/**
3378 * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls
3379 *
3380 * All previously invoked ring_buffer_read_prepare calls to prepare
3381 * iterators will be synchronized. Afterwards, read_buffer_read_start
3382 * calls on those iterators are allowed.
3383 */
3384void
3385ring_buffer_read_prepare_sync(void)
3386{
3365 synchronize_sched(); 3387 synchronize_sched();
3388}
3389EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
3390
3391/**
3392 * ring_buffer_read_start - start a non consuming read of the buffer
3393 * @iter: The iterator returned by ring_buffer_read_prepare
3394 *
3395 * This finalizes the startup of an iteration through the buffer.
3396 * The iterator comes from a call to ring_buffer_read_prepare and
3397 * an intervening ring_buffer_read_prepare_sync must have been
3398 * performed.
3399 *
3400 * Must be paired with ring_buffer_finish.
3401 */
3402void
3403ring_buffer_read_start(struct ring_buffer_iter *iter)
3404{
3405 struct ring_buffer_per_cpu *cpu_buffer;
3406 unsigned long flags;
3407
3408 if (!iter)
3409 return;
3410
3411 cpu_buffer = iter->cpu_buffer;
3366 3412
3367 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3413 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3368 arch_spin_lock(&cpu_buffer->lock); 3414 arch_spin_lock(&cpu_buffer->lock);
3369 rb_iter_reset(iter); 3415 rb_iter_reset(iter);
3370 arch_spin_unlock(&cpu_buffer->lock); 3416 arch_spin_unlock(&cpu_buffer->lock);
3371 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3417 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3372
3373 return iter;
3374} 3418}
3375EXPORT_SYMBOL_GPL(ring_buffer_read_start); 3419EXPORT_SYMBOL_GPL(ring_buffer_read_start);
3376 3420
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8b9ba41ec146..756d7283318b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2201,15 +2201,20 @@ __tracing_open(struct inode *inode, struct file *file)
2201 2201
2202 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { 2202 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
2203 for_each_tracing_cpu(cpu) { 2203 for_each_tracing_cpu(cpu) {
2204
2205 iter->buffer_iter[cpu] = 2204 iter->buffer_iter[cpu] =
2206 ring_buffer_read_start(iter->tr->buffer, cpu); 2205 ring_buffer_read_prepare(iter->tr->buffer, cpu);
2206 }
2207 ring_buffer_read_prepare_sync();
2208 for_each_tracing_cpu(cpu) {
2209 ring_buffer_read_start(iter->buffer_iter[cpu]);
2207 tracing_iter_reset(iter, cpu); 2210 tracing_iter_reset(iter, cpu);
2208 } 2211 }
2209 } else { 2212 } else {
2210 cpu = iter->cpu_file; 2213 cpu = iter->cpu_file;
2211 iter->buffer_iter[cpu] = 2214 iter->buffer_iter[cpu] =
2212 ring_buffer_read_start(iter->tr->buffer, cpu); 2215 ring_buffer_read_prepare(iter->tr->buffer, cpu);
2216 ring_buffer_read_prepare_sync();
2217 ring_buffer_read_start(iter->buffer_iter[cpu]);
2213 tracing_iter_reset(iter, cpu); 2218 tracing_iter_reset(iter, cpu);
2214 } 2219 }
2215 2220