aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/trace.c
diff options
context:
space:
mode:
authorSteven Rostedt <rostedt@goodmis.org>2008-10-01 00:29:53 -0400
committerIngo Molnar <mingo@elte.hu>2008-10-14 04:39:05 -0400
commitd769041f865330034131525ee6a7f72eb4af2a24 (patch)
tree5c0d93063585c9a94d3c8e8105cc7ad27f4fe0e8 /kernel/trace/trace.c
parent70255b5e3f1bd1a5af5b1e425ec2c4db7c735112 (diff)
ring_buffer: implement new locking
The old "lock always" scheme had issues with lockdep, and was not very efficient anyways. This patch does a new design to be partially lockless on writes. Writes will add new entries to the per cpu pages by simply disabling interrupts. When a write needs to go to another page than it will grab the lock. A new "read page" has been added so that the reader can pull out a page from the ring buffer to read without worrying about the writer writing over it. This allows us to not take the lock for all reads. The lock is now only taken when a read needs to go to a new page. This is far from lockless, and interrupts still need to be disabled, but it is a step towards a more lockless solution, and it also solves a lot of the issues that were noticed by the first conversion of ftrace to the ring buffers. Note: the ring_buffer_{un}lock API has been removed. Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/trace/trace.c')
-rw-r--r--kernel/trace/trace.c113
1 files changed, 77 insertions, 36 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6a1c76bb56ba..b542f8837801 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -42,6 +42,20 @@
42unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; 42unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
43unsigned long __read_mostly tracing_thresh; 43unsigned long __read_mostly tracing_thresh;
44 44
45static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
46
47static inline void ftrace_disable_cpu(void)
48{
49 preempt_disable();
50 local_inc(&__get_cpu_var(ftrace_cpu_disabled));
51}
52
53static inline void ftrace_enable_cpu(void)
54{
55 local_dec(&__get_cpu_var(ftrace_cpu_disabled));
56 preempt_enable();
57}
58
45static cpumask_t __read_mostly tracing_buffer_mask; 59static cpumask_t __read_mostly tracing_buffer_mask;
46 60
47#define for_each_tracing_cpu(cpu) \ 61#define for_each_tracing_cpu(cpu) \
@@ -406,7 +420,9 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
406 tr->buffer = max_tr.buffer; 420 tr->buffer = max_tr.buffer;
407 max_tr.buffer = buf; 421 max_tr.buffer = buf;
408 422
423 ftrace_disable_cpu();
409 ring_buffer_reset(tr->buffer); 424 ring_buffer_reset(tr->buffer);
425 ftrace_enable_cpu();
410 426
411 __update_max_tr(tr, tsk, cpu); 427 __update_max_tr(tr, tsk, cpu);
412 __raw_spin_unlock(&ftrace_max_lock); 428 __raw_spin_unlock(&ftrace_max_lock);
@@ -428,9 +444,13 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
428 WARN_ON_ONCE(!irqs_disabled()); 444 WARN_ON_ONCE(!irqs_disabled());
429 __raw_spin_lock(&ftrace_max_lock); 445 __raw_spin_lock(&ftrace_max_lock);
430 446
447 ftrace_disable_cpu();
448
431 ring_buffer_reset(max_tr.buffer); 449 ring_buffer_reset(max_tr.buffer);
432 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); 450 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
433 451
452 ftrace_enable_cpu();
453
434 WARN_ON_ONCE(ret); 454 WARN_ON_ONCE(ret);
435 455
436 __update_max_tr(tr, tsk, cpu); 456 __update_max_tr(tr, tsk, cpu);
@@ -543,7 +563,9 @@ void unregister_tracer(struct tracer *type)
543 563
544void tracing_reset(struct trace_array *tr, int cpu) 564void tracing_reset(struct trace_array *tr, int cpu)
545{ 565{
566 ftrace_disable_cpu();
546 ring_buffer_reset_cpu(tr->buffer, cpu); 567 ring_buffer_reset_cpu(tr->buffer, cpu);
568 ftrace_enable_cpu();
547} 569}
548 570
549#define SAVED_CMDLINES 128 571#define SAVED_CMDLINES 128
@@ -654,6 +676,10 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
654 struct ftrace_entry *entry; 676 struct ftrace_entry *entry;
655 unsigned long irq_flags; 677 unsigned long irq_flags;
656 678
679 /* If we are reading the ring buffer, don't trace */
680 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
681 return;
682
657 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 683 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
658 &irq_flags); 684 &irq_flags);
659 if (!event) 685 if (!event)
@@ -870,8 +896,14 @@ enum trace_file_type {
870 896
871static void trace_iterator_increment(struct trace_iterator *iter, int cpu) 897static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
872{ 898{
899 /* Don't allow ftrace to trace into the ring buffers */
900 ftrace_disable_cpu();
901
873 iter->idx++; 902 iter->idx++;
874 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); 903 if (iter->buffer_iter[iter->cpu])
904 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
905
906 ftrace_enable_cpu();
875} 907}
876 908
877static struct trace_entry * 909static struct trace_entry *
@@ -880,9 +912,19 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
880 struct ring_buffer_event *event; 912 struct ring_buffer_event *event;
881 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; 913 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
882 914
883 event = ring_buffer_iter_peek(buf_iter, ts); 915 /* Don't allow ftrace to trace into the ring buffers */
916 ftrace_disable_cpu();
917
918 if (buf_iter)
919 event = ring_buffer_iter_peek(buf_iter, ts);
920 else
921 event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
922
923 ftrace_enable_cpu();
924
884 return event ? ring_buffer_event_data(event) : NULL; 925 return event ? ring_buffer_event_data(event) : NULL;
885} 926}
927
886static struct trace_entry * 928static struct trace_entry *
887__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) 929__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
888{ 930{
@@ -938,7 +980,10 @@ static void *find_next_entry_inc(struct trace_iterator *iter)
938 980
939static void trace_consume(struct trace_iterator *iter) 981static void trace_consume(struct trace_iterator *iter)
940{ 982{
983 /* Don't allow ftrace to trace into the ring buffers */
984 ftrace_disable_cpu();
941 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts); 985 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
986 ftrace_enable_cpu();
942} 987}
943 988
944static void *s_next(struct seq_file *m, void *v, loff_t *pos) 989static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -991,10 +1036,14 @@ static void *s_start(struct seq_file *m, loff_t *pos)
991 iter->cpu = 0; 1036 iter->cpu = 0;
992 iter->idx = -1; 1037 iter->idx = -1;
993 1038
1039 ftrace_disable_cpu();
1040
994 for_each_tracing_cpu(cpu) { 1041 for_each_tracing_cpu(cpu) {
995 ring_buffer_iter_reset(iter->buffer_iter[cpu]); 1042 ring_buffer_iter_reset(iter->buffer_iter[cpu]);
996 } 1043 }
997 1044
1045 ftrace_enable_cpu();
1046
998 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 1047 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
999 ; 1048 ;
1000 1049
@@ -1242,7 +1291,16 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1242 cont = (struct trace_field_cont *)ent; 1291 cont = (struct trace_field_cont *)ent;
1243 if (ok) 1292 if (ok)
1244 ok = (trace_seq_printf(s, "%s", cont->buf) > 0); 1293 ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
1245 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); 1294
1295 ftrace_disable_cpu();
1296
1297 if (iter->buffer_iter[iter->cpu])
1298 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1299 else
1300 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
1301
1302 ftrace_enable_cpu();
1303
1246 ent = peek_next_entry(iter, iter->cpu, NULL); 1304 ent = peek_next_entry(iter, iter->cpu, NULL);
1247 } while (ent && ent->type == TRACE_CONT); 1305 } while (ent && ent->type == TRACE_CONT);
1248 1306
@@ -1683,9 +1741,15 @@ static int trace_empty(struct trace_iterator *iter)
1683 int cpu; 1741 int cpu;
1684 1742
1685 for_each_tracing_cpu(cpu) { 1743 for_each_tracing_cpu(cpu) {
1686 if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) 1744 if (iter->buffer_iter[cpu]) {
1687 return 0; 1745 if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
1746 return 0;
1747 } else {
1748 if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
1749 return 0;
1750 }
1688 } 1751 }
1752
1689 return TRACE_TYPE_HANDLED; 1753 return TRACE_TYPE_HANDLED;
1690} 1754}
1691 1755
@@ -1776,8 +1840,10 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1776 iter->pos = -1; 1840 iter->pos = -1;
1777 1841
1778 for_each_tracing_cpu(cpu) { 1842 for_each_tracing_cpu(cpu) {
1843
1779 iter->buffer_iter[cpu] = 1844 iter->buffer_iter[cpu] =
1780 ring_buffer_read_start(iter->tr->buffer, cpu); 1845 ring_buffer_read_start(iter->tr->buffer, cpu);
1846
1781 if (!iter->buffer_iter[cpu]) 1847 if (!iter->buffer_iter[cpu])
1782 goto fail_buffer; 1848 goto fail_buffer;
1783 } 1849 }
@@ -2341,7 +2407,6 @@ static atomic_t tracing_reader;
2341static int tracing_open_pipe(struct inode *inode, struct file *filp) 2407static int tracing_open_pipe(struct inode *inode, struct file *filp)
2342{ 2408{
2343 struct trace_iterator *iter; 2409 struct trace_iterator *iter;
2344 int cpu;
2345 2410
2346 if (tracing_disabled) 2411 if (tracing_disabled)
2347 return -ENODEV; 2412 return -ENODEV;
@@ -2362,38 +2427,17 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2362 iter->trace = current_trace; 2427 iter->trace = current_trace;
2363 filp->private_data = iter; 2428 filp->private_data = iter;
2364 2429
2365 for_each_tracing_cpu(cpu) {
2366 iter->buffer_iter[cpu] =
2367 ring_buffer_read_start(iter->tr->buffer, cpu);
2368 if (!iter->buffer_iter[cpu])
2369 goto fail_buffer;
2370 }
2371
2372 if (iter->trace->pipe_open) 2430 if (iter->trace->pipe_open)
2373 iter->trace->pipe_open(iter); 2431 iter->trace->pipe_open(iter);
2374 mutex_unlock(&trace_types_lock); 2432 mutex_unlock(&trace_types_lock);
2375 2433
2376 return 0; 2434 return 0;
2377
2378 fail_buffer:
2379 for_each_tracing_cpu(cpu) {
2380 if (iter->buffer_iter[cpu])
2381 ring_buffer_read_finish(iter->buffer_iter[cpu]);
2382 }
2383 mutex_unlock(&trace_types_lock);
2384
2385 return -ENOMEM;
2386} 2435}
2387 2436
2388static int tracing_release_pipe(struct inode *inode, struct file *file) 2437static int tracing_release_pipe(struct inode *inode, struct file *file)
2389{ 2438{
2390 struct trace_iterator *iter = file->private_data; 2439 struct trace_iterator *iter = file->private_data;
2391 int cpu;
2392 2440
2393 for_each_tracing_cpu(cpu) {
2394 if (iter->buffer_iter[cpu])
2395 ring_buffer_read_finish(iter->buffer_iter[cpu]);
2396 }
2397 kfree(iter); 2441 kfree(iter);
2398 atomic_dec(&tracing_reader); 2442 atomic_dec(&tracing_reader);
2399 2443
@@ -2429,7 +2473,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2429 size_t cnt, loff_t *ppos) 2473 size_t cnt, loff_t *ppos)
2430{ 2474{
2431 struct trace_iterator *iter = filp->private_data; 2475 struct trace_iterator *iter = filp->private_data;
2432 unsigned long flags;
2433#ifdef CONFIG_FTRACE 2476#ifdef CONFIG_FTRACE
2434 int ftrace_save; 2477 int ftrace_save;
2435#endif 2478#endif
@@ -2528,7 +2571,6 @@ waitagain:
2528 ftrace_enabled = 0; 2571 ftrace_enabled = 0;
2529#endif 2572#endif
2530 smp_wmb(); 2573 smp_wmb();
2531 ring_buffer_lock(iter->tr->buffer, &flags);
2532 2574
2533 while (find_next_entry_inc(iter) != NULL) { 2575 while (find_next_entry_inc(iter) != NULL) {
2534 enum print_line_t ret; 2576 enum print_line_t ret;
@@ -2547,7 +2589,6 @@ waitagain:
2547 break; 2589 break;
2548 } 2590 }
2549 2591
2550 ring_buffer_unlock(iter->tr->buffer, flags);
2551#ifdef CONFIG_FTRACE 2592#ifdef CONFIG_FTRACE
2552 ftrace_enabled = ftrace_save; 2593 ftrace_enabled = ftrace_save;
2553#endif 2594#endif
@@ -3010,8 +3051,8 @@ void ftrace_dump(void)
3010 static struct trace_iterator iter; 3051 static struct trace_iterator iter;
3011 static cpumask_t mask; 3052 static cpumask_t mask;
3012 static int dump_ran; 3053 static int dump_ran;
3013 unsigned long flags, irq_flags; 3054 unsigned long flags;
3014 int cnt = 0; 3055 int cnt = 0, cpu;
3015 3056
3016 /* only one dump */ 3057 /* only one dump */
3017 spin_lock_irqsave(&ftrace_dump_lock, flags); 3058 spin_lock_irqsave(&ftrace_dump_lock, flags);
@@ -3023,6 +3064,10 @@ void ftrace_dump(void)
3023 /* No turning back! */ 3064 /* No turning back! */
3024 ftrace_kill_atomic(); 3065 ftrace_kill_atomic();
3025 3066
3067 for_each_tracing_cpu(cpu) {
3068 atomic_inc(&global_trace.data[cpu]->disabled);
3069 }
3070
3026 printk(KERN_TRACE "Dumping ftrace buffer:\n"); 3071 printk(KERN_TRACE "Dumping ftrace buffer:\n");
3027 3072
3028 iter.tr = &global_trace; 3073 iter.tr = &global_trace;
@@ -3037,8 +3082,6 @@ void ftrace_dump(void)
3037 3082
3038 cpus_clear(mask); 3083 cpus_clear(mask);
3039 3084
3040 ring_buffer_lock(iter.tr->buffer, &irq_flags);
3041
3042 while (!trace_empty(&iter)) { 3085 while (!trace_empty(&iter)) {
3043 3086
3044 if (!cnt) 3087 if (!cnt)
@@ -3066,8 +3109,6 @@ void ftrace_dump(void)
3066 else 3109 else
3067 printk(KERN_TRACE "---------------------------------\n"); 3110 printk(KERN_TRACE "---------------------------------\n");
3068 3111
3069 ring_buffer_unlock(iter.tr->buffer, irq_flags);
3070
3071 out: 3112 out:
3072 spin_unlock_irqrestore(&ftrace_dump_lock, flags); 3113 spin_unlock_irqrestore(&ftrace_dump_lock, flags);
3073} 3114}