diff options
author | Steven Rostedt <rostedt@goodmis.org> | 2008-10-01 00:29:53 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-10-14 04:39:05 -0400 |
commit | d769041f865330034131525ee6a7f72eb4af2a24 (patch) | |
tree | 5c0d93063585c9a94d3c8e8105cc7ad27f4fe0e8 /kernel/trace/trace.c | |
parent | 70255b5e3f1bd1a5af5b1e425ec2c4db7c735112 (diff) |
ring_buffer: implement new locking
The old "lock always" scheme had issues with lockdep, and was not very
efficient anyways.
This patch does a new design to be partially lockless on writes.
Writes will add new entries to the per cpu pages by simply disabling
interrupts. When a write needs to go to another page than it will
grab the lock.
A new "read page" has been added so that the reader can pull out a page
from the ring buffer to read without worrying about the writer writing over
it. This allows us to not take the lock for all reads. The lock is
now only taken when a read needs to go to a new page.
This is far from lockless, and interrupts still need to be disabled,
but it is a step towards a more lockless solution, and it also
solves a lot of the issues that were noticed by the first conversion
of ftrace to the ring buffers.
Note: the ring_buffer_{un}lock API has been removed.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/trace/trace.c')
-rw-r--r-- | kernel/trace/trace.c | 113 |
1 files changed, 77 insertions, 36 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6a1c76bb56ba..b542f8837801 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -42,6 +42,20 @@ | |||
42 | unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; | 42 | unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; |
43 | unsigned long __read_mostly tracing_thresh; | 43 | unsigned long __read_mostly tracing_thresh; |
44 | 44 | ||
45 | static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); | ||
46 | |||
47 | static inline void ftrace_disable_cpu(void) | ||
48 | { | ||
49 | preempt_disable(); | ||
50 | local_inc(&__get_cpu_var(ftrace_cpu_disabled)); | ||
51 | } | ||
52 | |||
53 | static inline void ftrace_enable_cpu(void) | ||
54 | { | ||
55 | local_dec(&__get_cpu_var(ftrace_cpu_disabled)); | ||
56 | preempt_enable(); | ||
57 | } | ||
58 | |||
45 | static cpumask_t __read_mostly tracing_buffer_mask; | 59 | static cpumask_t __read_mostly tracing_buffer_mask; |
46 | 60 | ||
47 | #define for_each_tracing_cpu(cpu) \ | 61 | #define for_each_tracing_cpu(cpu) \ |
@@ -406,7 +420,9 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) | |||
406 | tr->buffer = max_tr.buffer; | 420 | tr->buffer = max_tr.buffer; |
407 | max_tr.buffer = buf; | 421 | max_tr.buffer = buf; |
408 | 422 | ||
423 | ftrace_disable_cpu(); | ||
409 | ring_buffer_reset(tr->buffer); | 424 | ring_buffer_reset(tr->buffer); |
425 | ftrace_enable_cpu(); | ||
410 | 426 | ||
411 | __update_max_tr(tr, tsk, cpu); | 427 | __update_max_tr(tr, tsk, cpu); |
412 | __raw_spin_unlock(&ftrace_max_lock); | 428 | __raw_spin_unlock(&ftrace_max_lock); |
@@ -428,9 +444,13 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) | |||
428 | WARN_ON_ONCE(!irqs_disabled()); | 444 | WARN_ON_ONCE(!irqs_disabled()); |
429 | __raw_spin_lock(&ftrace_max_lock); | 445 | __raw_spin_lock(&ftrace_max_lock); |
430 | 446 | ||
447 | ftrace_disable_cpu(); | ||
448 | |||
431 | ring_buffer_reset(max_tr.buffer); | 449 | ring_buffer_reset(max_tr.buffer); |
432 | ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); | 450 | ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); |
433 | 451 | ||
452 | ftrace_enable_cpu(); | ||
453 | |||
434 | WARN_ON_ONCE(ret); | 454 | WARN_ON_ONCE(ret); |
435 | 455 | ||
436 | __update_max_tr(tr, tsk, cpu); | 456 | __update_max_tr(tr, tsk, cpu); |
@@ -543,7 +563,9 @@ void unregister_tracer(struct tracer *type) | |||
543 | 563 | ||
544 | void tracing_reset(struct trace_array *tr, int cpu) | 564 | void tracing_reset(struct trace_array *tr, int cpu) |
545 | { | 565 | { |
566 | ftrace_disable_cpu(); | ||
546 | ring_buffer_reset_cpu(tr->buffer, cpu); | 567 | ring_buffer_reset_cpu(tr->buffer, cpu); |
568 | ftrace_enable_cpu(); | ||
547 | } | 569 | } |
548 | 570 | ||
549 | #define SAVED_CMDLINES 128 | 571 | #define SAVED_CMDLINES 128 |
@@ -654,6 +676,10 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, | |||
654 | struct ftrace_entry *entry; | 676 | struct ftrace_entry *entry; |
655 | unsigned long irq_flags; | 677 | unsigned long irq_flags; |
656 | 678 | ||
679 | /* If we are reading the ring buffer, don't trace */ | ||
680 | if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) | ||
681 | return; | ||
682 | |||
657 | event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), | 683 | event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), |
658 | &irq_flags); | 684 | &irq_flags); |
659 | if (!event) | 685 | if (!event) |
@@ -870,8 +896,14 @@ enum trace_file_type { | |||
870 | 896 | ||
871 | static void trace_iterator_increment(struct trace_iterator *iter, int cpu) | 897 | static void trace_iterator_increment(struct trace_iterator *iter, int cpu) |
872 | { | 898 | { |
899 | /* Don't allow ftrace to trace into the ring buffers */ | ||
900 | ftrace_disable_cpu(); | ||
901 | |||
873 | iter->idx++; | 902 | iter->idx++; |
874 | ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); | 903 | if (iter->buffer_iter[iter->cpu]) |
904 | ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); | ||
905 | |||
906 | ftrace_enable_cpu(); | ||
875 | } | 907 | } |
876 | 908 | ||
877 | static struct trace_entry * | 909 | static struct trace_entry * |
@@ -880,9 +912,19 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts) | |||
880 | struct ring_buffer_event *event; | 912 | struct ring_buffer_event *event; |
881 | struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; | 913 | struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; |
882 | 914 | ||
883 | event = ring_buffer_iter_peek(buf_iter, ts); | 915 | /* Don't allow ftrace to trace into the ring buffers */ |
916 | ftrace_disable_cpu(); | ||
917 | |||
918 | if (buf_iter) | ||
919 | event = ring_buffer_iter_peek(buf_iter, ts); | ||
920 | else | ||
921 | event = ring_buffer_peek(iter->tr->buffer, cpu, ts); | ||
922 | |||
923 | ftrace_enable_cpu(); | ||
924 | |||
884 | return event ? ring_buffer_event_data(event) : NULL; | 925 | return event ? ring_buffer_event_data(event) : NULL; |
885 | } | 926 | } |
927 | |||
886 | static struct trace_entry * | 928 | static struct trace_entry * |
887 | __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) | 929 | __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) |
888 | { | 930 | { |
@@ -938,7 +980,10 @@ static void *find_next_entry_inc(struct trace_iterator *iter) | |||
938 | 980 | ||
939 | static void trace_consume(struct trace_iterator *iter) | 981 | static void trace_consume(struct trace_iterator *iter) |
940 | { | 982 | { |
983 | /* Don't allow ftrace to trace into the ring buffers */ | ||
984 | ftrace_disable_cpu(); | ||
941 | ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts); | 985 | ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts); |
986 | ftrace_enable_cpu(); | ||
942 | } | 987 | } |
943 | 988 | ||
944 | static void *s_next(struct seq_file *m, void *v, loff_t *pos) | 989 | static void *s_next(struct seq_file *m, void *v, loff_t *pos) |
@@ -991,10 +1036,14 @@ static void *s_start(struct seq_file *m, loff_t *pos) | |||
991 | iter->cpu = 0; | 1036 | iter->cpu = 0; |
992 | iter->idx = -1; | 1037 | iter->idx = -1; |
993 | 1038 | ||
1039 | ftrace_disable_cpu(); | ||
1040 | |||
994 | for_each_tracing_cpu(cpu) { | 1041 | for_each_tracing_cpu(cpu) { |
995 | ring_buffer_iter_reset(iter->buffer_iter[cpu]); | 1042 | ring_buffer_iter_reset(iter->buffer_iter[cpu]); |
996 | } | 1043 | } |
997 | 1044 | ||
1045 | ftrace_enable_cpu(); | ||
1046 | |||
998 | for (p = iter; p && l < *pos; p = s_next(m, p, &l)) | 1047 | for (p = iter; p && l < *pos; p = s_next(m, p, &l)) |
999 | ; | 1048 | ; |
1000 | 1049 | ||
@@ -1242,7 +1291,16 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter) | |||
1242 | cont = (struct trace_field_cont *)ent; | 1291 | cont = (struct trace_field_cont *)ent; |
1243 | if (ok) | 1292 | if (ok) |
1244 | ok = (trace_seq_printf(s, "%s", cont->buf) > 0); | 1293 | ok = (trace_seq_printf(s, "%s", cont->buf) > 0); |
1245 | ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); | 1294 | |
1295 | ftrace_disable_cpu(); | ||
1296 | |||
1297 | if (iter->buffer_iter[iter->cpu]) | ||
1298 | ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); | ||
1299 | else | ||
1300 | ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); | ||
1301 | |||
1302 | ftrace_enable_cpu(); | ||
1303 | |||
1246 | ent = peek_next_entry(iter, iter->cpu, NULL); | 1304 | ent = peek_next_entry(iter, iter->cpu, NULL); |
1247 | } while (ent && ent->type == TRACE_CONT); | 1305 | } while (ent && ent->type == TRACE_CONT); |
1248 | 1306 | ||
@@ -1683,9 +1741,15 @@ static int trace_empty(struct trace_iterator *iter) | |||
1683 | int cpu; | 1741 | int cpu; |
1684 | 1742 | ||
1685 | for_each_tracing_cpu(cpu) { | 1743 | for_each_tracing_cpu(cpu) { |
1686 | if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) | 1744 | if (iter->buffer_iter[cpu]) { |
1687 | return 0; | 1745 | if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) |
1746 | return 0; | ||
1747 | } else { | ||
1748 | if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) | ||
1749 | return 0; | ||
1750 | } | ||
1688 | } | 1751 | } |
1752 | |||
1689 | return TRACE_TYPE_HANDLED; | 1753 | return TRACE_TYPE_HANDLED; |
1690 | } | 1754 | } |
1691 | 1755 | ||
@@ -1776,8 +1840,10 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) | |||
1776 | iter->pos = -1; | 1840 | iter->pos = -1; |
1777 | 1841 | ||
1778 | for_each_tracing_cpu(cpu) { | 1842 | for_each_tracing_cpu(cpu) { |
1843 | |||
1779 | iter->buffer_iter[cpu] = | 1844 | iter->buffer_iter[cpu] = |
1780 | ring_buffer_read_start(iter->tr->buffer, cpu); | 1845 | ring_buffer_read_start(iter->tr->buffer, cpu); |
1846 | |||
1781 | if (!iter->buffer_iter[cpu]) | 1847 | if (!iter->buffer_iter[cpu]) |
1782 | goto fail_buffer; | 1848 | goto fail_buffer; |
1783 | } | 1849 | } |
@@ -2341,7 +2407,6 @@ static atomic_t tracing_reader; | |||
2341 | static int tracing_open_pipe(struct inode *inode, struct file *filp) | 2407 | static int tracing_open_pipe(struct inode *inode, struct file *filp) |
2342 | { | 2408 | { |
2343 | struct trace_iterator *iter; | 2409 | struct trace_iterator *iter; |
2344 | int cpu; | ||
2345 | 2410 | ||
2346 | if (tracing_disabled) | 2411 | if (tracing_disabled) |
2347 | return -ENODEV; | 2412 | return -ENODEV; |
@@ -2362,38 +2427,17 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) | |||
2362 | iter->trace = current_trace; | 2427 | iter->trace = current_trace; |
2363 | filp->private_data = iter; | 2428 | filp->private_data = iter; |
2364 | 2429 | ||
2365 | for_each_tracing_cpu(cpu) { | ||
2366 | iter->buffer_iter[cpu] = | ||
2367 | ring_buffer_read_start(iter->tr->buffer, cpu); | ||
2368 | if (!iter->buffer_iter[cpu]) | ||
2369 | goto fail_buffer; | ||
2370 | } | ||
2371 | |||
2372 | if (iter->trace->pipe_open) | 2430 | if (iter->trace->pipe_open) |
2373 | iter->trace->pipe_open(iter); | 2431 | iter->trace->pipe_open(iter); |
2374 | mutex_unlock(&trace_types_lock); | 2432 | mutex_unlock(&trace_types_lock); |
2375 | 2433 | ||
2376 | return 0; | 2434 | return 0; |
2377 | |||
2378 | fail_buffer: | ||
2379 | for_each_tracing_cpu(cpu) { | ||
2380 | if (iter->buffer_iter[cpu]) | ||
2381 | ring_buffer_read_finish(iter->buffer_iter[cpu]); | ||
2382 | } | ||
2383 | mutex_unlock(&trace_types_lock); | ||
2384 | |||
2385 | return -ENOMEM; | ||
2386 | } | 2435 | } |
2387 | 2436 | ||
2388 | static int tracing_release_pipe(struct inode *inode, struct file *file) | 2437 | static int tracing_release_pipe(struct inode *inode, struct file *file) |
2389 | { | 2438 | { |
2390 | struct trace_iterator *iter = file->private_data; | 2439 | struct trace_iterator *iter = file->private_data; |
2391 | int cpu; | ||
2392 | 2440 | ||
2393 | for_each_tracing_cpu(cpu) { | ||
2394 | if (iter->buffer_iter[cpu]) | ||
2395 | ring_buffer_read_finish(iter->buffer_iter[cpu]); | ||
2396 | } | ||
2397 | kfree(iter); | 2441 | kfree(iter); |
2398 | atomic_dec(&tracing_reader); | 2442 | atomic_dec(&tracing_reader); |
2399 | 2443 | ||
@@ -2429,7 +2473,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, | |||
2429 | size_t cnt, loff_t *ppos) | 2473 | size_t cnt, loff_t *ppos) |
2430 | { | 2474 | { |
2431 | struct trace_iterator *iter = filp->private_data; | 2475 | struct trace_iterator *iter = filp->private_data; |
2432 | unsigned long flags; | ||
2433 | #ifdef CONFIG_FTRACE | 2476 | #ifdef CONFIG_FTRACE |
2434 | int ftrace_save; | 2477 | int ftrace_save; |
2435 | #endif | 2478 | #endif |
@@ -2528,7 +2571,6 @@ waitagain: | |||
2528 | ftrace_enabled = 0; | 2571 | ftrace_enabled = 0; |
2529 | #endif | 2572 | #endif |
2530 | smp_wmb(); | 2573 | smp_wmb(); |
2531 | ring_buffer_lock(iter->tr->buffer, &flags); | ||
2532 | 2574 | ||
2533 | while (find_next_entry_inc(iter) != NULL) { | 2575 | while (find_next_entry_inc(iter) != NULL) { |
2534 | enum print_line_t ret; | 2576 | enum print_line_t ret; |
@@ -2547,7 +2589,6 @@ waitagain: | |||
2547 | break; | 2589 | break; |
2548 | } | 2590 | } |
2549 | 2591 | ||
2550 | ring_buffer_unlock(iter->tr->buffer, flags); | ||
2551 | #ifdef CONFIG_FTRACE | 2592 | #ifdef CONFIG_FTRACE |
2552 | ftrace_enabled = ftrace_save; | 2593 | ftrace_enabled = ftrace_save; |
2553 | #endif | 2594 | #endif |
@@ -3010,8 +3051,8 @@ void ftrace_dump(void) | |||
3010 | static struct trace_iterator iter; | 3051 | static struct trace_iterator iter; |
3011 | static cpumask_t mask; | 3052 | static cpumask_t mask; |
3012 | static int dump_ran; | 3053 | static int dump_ran; |
3013 | unsigned long flags, irq_flags; | 3054 | unsigned long flags; |
3014 | int cnt = 0; | 3055 | int cnt = 0, cpu; |
3015 | 3056 | ||
3016 | /* only one dump */ | 3057 | /* only one dump */ |
3017 | spin_lock_irqsave(&ftrace_dump_lock, flags); | 3058 | spin_lock_irqsave(&ftrace_dump_lock, flags); |
@@ -3023,6 +3064,10 @@ void ftrace_dump(void) | |||
3023 | /* No turning back! */ | 3064 | /* No turning back! */ |
3024 | ftrace_kill_atomic(); | 3065 | ftrace_kill_atomic(); |
3025 | 3066 | ||
3067 | for_each_tracing_cpu(cpu) { | ||
3068 | atomic_inc(&global_trace.data[cpu]->disabled); | ||
3069 | } | ||
3070 | |||
3026 | printk(KERN_TRACE "Dumping ftrace buffer:\n"); | 3071 | printk(KERN_TRACE "Dumping ftrace buffer:\n"); |
3027 | 3072 | ||
3028 | iter.tr = &global_trace; | 3073 | iter.tr = &global_trace; |
@@ -3037,8 +3082,6 @@ void ftrace_dump(void) | |||
3037 | 3082 | ||
3038 | cpus_clear(mask); | 3083 | cpus_clear(mask); |
3039 | 3084 | ||
3040 | ring_buffer_lock(iter.tr->buffer, &irq_flags); | ||
3041 | |||
3042 | while (!trace_empty(&iter)) { | 3085 | while (!trace_empty(&iter)) { |
3043 | 3086 | ||
3044 | if (!cnt) | 3087 | if (!cnt) |
@@ -3066,8 +3109,6 @@ void ftrace_dump(void) | |||
3066 | else | 3109 | else |
3067 | printk(KERN_TRACE "---------------------------------\n"); | 3110 | printk(KERN_TRACE "---------------------------------\n"); |
3068 | 3111 | ||
3069 | ring_buffer_unlock(iter.tr->buffer, irq_flags); | ||
3070 | |||
3071 | out: | 3112 | out: |
3072 | spin_unlock_irqrestore(&ftrace_dump_lock, flags); | 3113 | spin_unlock_irqrestore(&ftrace_dump_lock, flags); |
3073 | } | 3114 | } |