diff options
author | Steven Rostedt <srostedt@redhat.com> | 2008-12-01 22:20:19 -0500 |
---|---|---|
committer | Steven Rostedt <srostedt@redhat.com> | 2009-03-03 21:01:55 -0500 |
commit | 2cadf9135eb3b6d84b6427314be827ddd443c308 (patch) | |
tree | 919e201f615b8de6d0263ded5693bad326196671 | |
parent | 474d32b68d6d842f3e710e9ae9fe2568c53339f8 (diff) |
tracing: add binary buffer files for use with splice
Impact: new feature
This patch creates a directory of files that correspond to the
per CPU ring buffers. These are binary files and are made to
be used with splice. This is the fastest way to extract data from
the ftrace ring buffers.
Thanks to Jiaying Zhang for pushing me to get this code fixed,
and to Eduard - Gabriel Munteanu for his splice code that helped
me debug my code.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
-rw-r--r-- | kernel/trace/trace.c | 274 | ||||
-rw-r--r-- | kernel/trace/trace.h | 1 |
2 files changed, 268 insertions, 7 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ea055aa21cd9..12539f72f4a5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -11,31 +11,30 @@ | |||
11 | * Copyright (C) 2004-2006 Ingo Molnar | 11 | * Copyright (C) 2004-2006 Ingo Molnar |
12 | * Copyright (C) 2004 William Lee Irwin III | 12 | * Copyright (C) 2004 William Lee Irwin III |
13 | */ | 13 | */ |
14 | #include <linux/ring_buffer.h> | ||
14 | #include <linux/utsrelease.h> | 15 | #include <linux/utsrelease.h> |
16 | #include <linux/stacktrace.h> | ||
17 | #include <linux/writeback.h> | ||
15 | #include <linux/kallsyms.h> | 18 | #include <linux/kallsyms.h> |
16 | #include <linux/seq_file.h> | 19 | #include <linux/seq_file.h> |
17 | #include <linux/notifier.h> | 20 | #include <linux/notifier.h> |
21 | #include <linux/irqflags.h> | ||
18 | #include <linux/debugfs.h> | 22 | #include <linux/debugfs.h> |
19 | #include <linux/pagemap.h> | 23 | #include <linux/pagemap.h> |
20 | #include <linux/hardirq.h> | 24 | #include <linux/hardirq.h> |
21 | #include <linux/linkage.h> | 25 | #include <linux/linkage.h> |
22 | #include <linux/uaccess.h> | 26 | #include <linux/uaccess.h> |
27 | #include <linux/kprobes.h> | ||
23 | #include <linux/ftrace.h> | 28 | #include <linux/ftrace.h> |
24 | #include <linux/module.h> | 29 | #include <linux/module.h> |
25 | #include <linux/percpu.h> | 30 | #include <linux/percpu.h> |
31 | #include <linux/splice.h> | ||
26 | #include <linux/kdebug.h> | 32 | #include <linux/kdebug.h> |
27 | #include <linux/ctype.h> | 33 | #include <linux/ctype.h> |
28 | #include <linux/init.h> | 34 | #include <linux/init.h> |
29 | #include <linux/poll.h> | 35 | #include <linux/poll.h> |
30 | #include <linux/gfp.h> | 36 | #include <linux/gfp.h> |
31 | #include <linux/fs.h> | 37 | #include <linux/fs.h> |
32 | #include <linux/kprobes.h> | ||
33 | #include <linux/writeback.h> | ||
34 | #include <linux/splice.h> | ||
35 | |||
36 | #include <linux/stacktrace.h> | ||
37 | #include <linux/ring_buffer.h> | ||
38 | #include <linux/irqflags.h> | ||
39 | 38 | ||
40 | #include "trace.h" | 39 | #include "trace.h" |
41 | #include "trace_output.h" | 40 | #include "trace_output.h" |
@@ -3005,6 +3004,246 @@ static struct file_operations tracing_mark_fops = { | |||
3005 | .write = tracing_mark_write, | 3004 | .write = tracing_mark_write, |
3006 | }; | 3005 | }; |
3007 | 3006 | ||
3007 | struct ftrace_buffer_info { | ||
3008 | struct trace_array *tr; | ||
3009 | void *spare; | ||
3010 | int cpu; | ||
3011 | unsigned int read; | ||
3012 | }; | ||
3013 | |||
3014 | static int tracing_buffers_open(struct inode *inode, struct file *filp) | ||
3015 | { | ||
3016 | int cpu = (int)(long)inode->i_private; | ||
3017 | struct ftrace_buffer_info *info; | ||
3018 | |||
3019 | if (tracing_disabled) | ||
3020 | return -ENODEV; | ||
3021 | |||
3022 | info = kzalloc(sizeof(*info), GFP_KERNEL); | ||
3023 | if (!info) | ||
3024 | return -ENOMEM; | ||
3025 | |||
3026 | info->tr = &global_trace; | ||
3027 | info->cpu = cpu; | ||
3028 | info->spare = ring_buffer_alloc_read_page(info->tr->buffer); | ||
3029 | /* Force reading ring buffer for first read */ | ||
3030 | info->read = (unsigned int)-1; | ||
3031 | if (!info->spare) | ||
3032 | goto out; | ||
3033 | |||
3034 | filp->private_data = info; | ||
3035 | |||
3036 | return 0; | ||
3037 | |||
3038 | out: | ||
3039 | kfree(info); | ||
3040 | return -ENOMEM; | ||
3041 | } | ||
3042 | |||
3043 | static ssize_t | ||
3044 | tracing_buffers_read(struct file *filp, char __user *ubuf, | ||
3045 | size_t count, loff_t *ppos) | ||
3046 | { | ||
3047 | struct ftrace_buffer_info *info = filp->private_data; | ||
3048 | unsigned int pos; | ||
3049 | ssize_t ret; | ||
3050 | size_t size; | ||
3051 | |||
3052 | /* Do we have previous read data to read? */ | ||
3053 | if (info->read < PAGE_SIZE) | ||
3054 | goto read; | ||
3055 | |||
3056 | info->read = 0; | ||
3057 | |||
3058 | ret = ring_buffer_read_page(info->tr->buffer, | ||
3059 | &info->spare, | ||
3060 | count, | ||
3061 | info->cpu, 0); | ||
3062 | if (ret < 0) | ||
3063 | return 0; | ||
3064 | |||
3065 | pos = ring_buffer_page_len(info->spare); | ||
3066 | |||
3067 | if (pos < PAGE_SIZE) | ||
3068 | memset(info->spare + pos, 0, PAGE_SIZE - pos); | ||
3069 | |||
3070 | read: | ||
3071 | size = PAGE_SIZE - info->read; | ||
3072 | if (size > count) | ||
3073 | size = count; | ||
3074 | |||
3075 | ret = copy_to_user(ubuf, info->spare + info->read, size); | ||
3076 | if (ret) | ||
3077 | return -EFAULT; | ||
3078 | *ppos += size; | ||
3079 | info->read += size; | ||
3080 | |||
3081 | return size; | ||
3082 | } | ||
3083 | |||
3084 | static int tracing_buffers_release(struct inode *inode, struct file *file) | ||
3085 | { | ||
3086 | struct ftrace_buffer_info *info = file->private_data; | ||
3087 | |||
3088 | ring_buffer_free_read_page(info->tr->buffer, info->spare); | ||
3089 | kfree(info); | ||
3090 | |||
3091 | return 0; | ||
3092 | } | ||
3093 | |||
3094 | struct buffer_ref { | ||
3095 | struct ring_buffer *buffer; | ||
3096 | void *page; | ||
3097 | int ref; | ||
3098 | }; | ||
3099 | |||
3100 | static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, | ||
3101 | struct pipe_buffer *buf) | ||
3102 | { | ||
3103 | struct buffer_ref *ref = (struct buffer_ref *)buf->private; | ||
3104 | |||
3105 | if (--ref->ref) | ||
3106 | return; | ||
3107 | |||
3108 | ring_buffer_free_read_page(ref->buffer, ref->page); | ||
3109 | kfree(ref); | ||
3110 | buf->private = 0; | ||
3111 | } | ||
3112 | |||
3113 | static int buffer_pipe_buf_steal(struct pipe_inode_info *pipe, | ||
3114 | struct pipe_buffer *buf) | ||
3115 | { | ||
3116 | return 1; | ||
3117 | } | ||
3118 | |||
3119 | static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, | ||
3120 | struct pipe_buffer *buf) | ||
3121 | { | ||
3122 | struct buffer_ref *ref = (struct buffer_ref *)buf->private; | ||
3123 | |||
3124 | ref->ref++; | ||
3125 | } | ||
3126 | |||
3127 | /* Pipe buffer operations for a buffer. */ | ||
3128 | static struct pipe_buf_operations buffer_pipe_buf_ops = { | ||
3129 | .can_merge = 0, | ||
3130 | .map = generic_pipe_buf_map, | ||
3131 | .unmap = generic_pipe_buf_unmap, | ||
3132 | .confirm = generic_pipe_buf_confirm, | ||
3133 | .release = buffer_pipe_buf_release, | ||
3134 | .steal = buffer_pipe_buf_steal, | ||
3135 | .get = buffer_pipe_buf_get, | ||
3136 | }; | ||
3137 | |||
3138 | /* | ||
3139 | * Callback from splice_to_pipe(), if we need to release some pages | ||
3140 | * at the end of the spd in case we error'ed out in filling the pipe. | ||
3141 | */ | ||
3142 | static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i) | ||
3143 | { | ||
3144 | struct buffer_ref *ref = | ||
3145 | (struct buffer_ref *)spd->partial[i].private; | ||
3146 | |||
3147 | if (--ref->ref) | ||
3148 | return; | ||
3149 | |||
3150 | ring_buffer_free_read_page(ref->buffer, ref->page); | ||
3151 | kfree(ref); | ||
3152 | spd->partial[i].private = 0; | ||
3153 | } | ||
3154 | |||
3155 | static ssize_t | ||
3156 | tracing_buffers_splice_read(struct file *file, loff_t *ppos, | ||
3157 | struct pipe_inode_info *pipe, size_t len, | ||
3158 | unsigned int flags) | ||
3159 | { | ||
3160 | struct ftrace_buffer_info *info = file->private_data; | ||
3161 | struct partial_page partial[PIPE_BUFFERS]; | ||
3162 | struct page *pages[PIPE_BUFFERS]; | ||
3163 | struct splice_pipe_desc spd = { | ||
3164 | .pages = pages, | ||
3165 | .partial = partial, | ||
3166 | .flags = flags, | ||
3167 | .ops = &buffer_pipe_buf_ops, | ||
3168 | .spd_release = buffer_spd_release, | ||
3169 | }; | ||
3170 | struct buffer_ref *ref; | ||
3171 | int size, i; | ||
3172 | size_t ret; | ||
3173 | |||
3174 | /* | ||
3175 | * We can't seek on a buffer input | ||
3176 | */ | ||
3177 | if (unlikely(*ppos)) | ||
3178 | return -ESPIPE; | ||
3179 | |||
3180 | |||
3181 | for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) { | ||
3182 | struct page *page; | ||
3183 | int r; | ||
3184 | |||
3185 | ref = kzalloc(sizeof(*ref), GFP_KERNEL); | ||
3186 | if (!ref) | ||
3187 | break; | ||
3188 | |||
3189 | ref->buffer = info->tr->buffer; | ||
3190 | ref->page = ring_buffer_alloc_read_page(ref->buffer); | ||
3191 | if (!ref->page) { | ||
3192 | kfree(ref); | ||
3193 | break; | ||
3194 | } | ||
3195 | |||
3196 | r = ring_buffer_read_page(ref->buffer, &ref->page, | ||
3197 | len, info->cpu, 0); | ||
3198 | if (r < 0) { | ||
3199 | ring_buffer_free_read_page(ref->buffer, | ||
3200 | ref->page); | ||
3201 | kfree(ref); | ||
3202 | break; | ||
3203 | } | ||
3204 | |||
3205 | /* | ||
3206 | * zero out any left over data, this is going to | ||
3207 | * user land. | ||
3208 | */ | ||
3209 | size = ring_buffer_page_len(ref->page); | ||
3210 | if (size < PAGE_SIZE) | ||
3211 | memset(ref->page + size, 0, PAGE_SIZE - size); | ||
3212 | |||
3213 | page = virt_to_page(ref->page); | ||
3214 | |||
3215 | spd.pages[i] = page; | ||
3216 | spd.partial[i].len = PAGE_SIZE; | ||
3217 | spd.partial[i].offset = 0; | ||
3218 | spd.partial[i].private = (unsigned long)ref; | ||
3219 | spd.nr_pages++; | ||
3220 | } | ||
3221 | |||
3222 | spd.nr_pages = i; | ||
3223 | |||
3224 | /* did we read anything? */ | ||
3225 | if (!spd.nr_pages) { | ||
3226 | if (flags & SPLICE_F_NONBLOCK) | ||
3227 | ret = -EAGAIN; | ||
3228 | else | ||
3229 | ret = 0; | ||
3230 | /* TODO: block */ | ||
3231 | return ret; | ||
3232 | } | ||
3233 | |||
3234 | ret = splice_to_pipe(pipe, &spd); | ||
3235 | |||
3236 | return ret; | ||
3237 | } | ||
3238 | |||
3239 | static const struct file_operations tracing_buffers_fops = { | ||
3240 | .open = tracing_buffers_open, | ||
3241 | .read = tracing_buffers_read, | ||
3242 | .release = tracing_buffers_release, | ||
3243 | .splice_read = tracing_buffers_splice_read, | ||
3244 | .llseek = no_llseek, | ||
3245 | }; | ||
3246 | |||
3008 | #ifdef CONFIG_DYNAMIC_FTRACE | 3247 | #ifdef CONFIG_DYNAMIC_FTRACE |
3009 | 3248 | ||
3010 | int __weak ftrace_arch_read_dyn_info(char *buf, int size) | 3249 | int __weak ftrace_arch_read_dyn_info(char *buf, int size) |
@@ -3399,6 +3638,7 @@ static __init void create_trace_options_dir(void) | |||
3399 | static __init int tracer_init_debugfs(void) | 3638 | static __init int tracer_init_debugfs(void) |
3400 | { | 3639 | { |
3401 | struct dentry *d_tracer; | 3640 | struct dentry *d_tracer; |
3641 | struct dentry *buffers; | ||
3402 | struct dentry *entry; | 3642 | struct dentry *entry; |
3403 | int cpu; | 3643 | int cpu; |
3404 | 3644 | ||
@@ -3471,6 +3711,26 @@ static __init int tracer_init_debugfs(void) | |||
3471 | pr_warning("Could not create debugfs " | 3711 | pr_warning("Could not create debugfs " |
3472 | "'trace_marker' entry\n"); | 3712 | "'trace_marker' entry\n"); |
3473 | 3713 | ||
3714 | buffers = debugfs_create_dir("binary_buffers", d_tracer); | ||
3715 | |||
3716 | if (!buffers) | ||
3717 | pr_warning("Could not create buffers directory\n"); | ||
3718 | else { | ||
3719 | int cpu; | ||
3720 | char buf[64]; | ||
3721 | |||
3722 | for_each_tracing_cpu(cpu) { | ||
3723 | sprintf(buf, "%d", cpu); | ||
3724 | |||
3725 | entry = debugfs_create_file(buf, 0444, buffers, | ||
3726 | (void *)(long)cpu, | ||
3727 | &tracing_buffers_fops); | ||
3728 | if (!entry) | ||
3729 | pr_warning("Could not create debugfs buffers " | ||
3730 | "'%s' entry\n", buf); | ||
3731 | } | ||
3732 | } | ||
3733 | |||
3474 | #ifdef CONFIG_DYNAMIC_FTRACE | 3734 | #ifdef CONFIG_DYNAMIC_FTRACE |
3475 | entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, | 3735 | entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, |
3476 | &ftrace_update_tot_cnt, | 3736 | &ftrace_update_tot_cnt, |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e606633fb498..561bb5c5d988 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -217,6 +217,7 @@ enum trace_flag_type { | |||
217 | */ | 217 | */ |
218 | struct trace_array_cpu { | 218 | struct trace_array_cpu { |
219 | atomic_t disabled; | 219 | atomic_t disabled; |
220 | void *buffer_page; /* ring buffer spare */ | ||
220 | 221 | ||
221 | /* these fields get copied into max-trace: */ | 222 | /* these fields get copied into max-trace: */ |
222 | unsigned long trace_idx; | 223 | unsigned long trace_idx; |