diff options
Diffstat (limited to 'kernel/trace/trace.c')
| -rw-r--r-- | kernel/trace/trace.c | 150 |
1 files changed, 106 insertions, 44 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index eac6875cb990..ed01fdba4a55 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -32,6 +32,7 @@ | |||
| 32 | #include <linux/splice.h> | 32 | #include <linux/splice.h> |
| 33 | #include <linux/kdebug.h> | 33 | #include <linux/kdebug.h> |
| 34 | #include <linux/string.h> | 34 | #include <linux/string.h> |
| 35 | #include <linux/rwsem.h> | ||
| 35 | #include <linux/ctype.h> | 36 | #include <linux/ctype.h> |
| 36 | #include <linux/init.h> | 37 | #include <linux/init.h> |
| 37 | #include <linux/poll.h> | 38 | #include <linux/poll.h> |
| @@ -91,20 +92,17 @@ DEFINE_PER_CPU(int, ftrace_cpu_disabled); | |||
| 91 | static inline void ftrace_disable_cpu(void) | 92 | static inline void ftrace_disable_cpu(void) |
| 92 | { | 93 | { |
| 93 | preempt_disable(); | 94 | preempt_disable(); |
| 94 | __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled)); | 95 | __this_cpu_inc(ftrace_cpu_disabled); |
| 95 | } | 96 | } |
| 96 | 97 | ||
| 97 | static inline void ftrace_enable_cpu(void) | 98 | static inline void ftrace_enable_cpu(void) |
| 98 | { | 99 | { |
| 99 | __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled)); | 100 | __this_cpu_dec(ftrace_cpu_disabled); |
| 100 | preempt_enable(); | 101 | preempt_enable(); |
| 101 | } | 102 | } |
| 102 | 103 | ||
| 103 | static cpumask_var_t __read_mostly tracing_buffer_mask; | 104 | static cpumask_var_t __read_mostly tracing_buffer_mask; |
| 104 | 105 | ||
| 105 | /* Define which cpu buffers are currently read in trace_pipe */ | ||
| 106 | static cpumask_var_t tracing_reader_cpumask; | ||
| 107 | |||
| 108 | #define for_each_tracing_cpu(cpu) \ | 106 | #define for_each_tracing_cpu(cpu) \ |
| 109 | for_each_cpu(cpu, tracing_buffer_mask) | 107 | for_each_cpu(cpu, tracing_buffer_mask) |
| 110 | 108 | ||
| @@ -243,12 +241,91 @@ static struct tracer *current_trace __read_mostly; | |||
| 243 | 241 | ||
| 244 | /* | 242 | /* |
| 245 | * trace_types_lock is used to protect the trace_types list. | 243 | * trace_types_lock is used to protect the trace_types list. |
| 246 | * This lock is also used to keep user access serialized. | ||
| 247 | * Accesses from userspace will grab this lock while userspace | ||
| 248 | * activities happen inside the kernel. | ||
| 249 | */ | 244 | */ |
| 250 | static DEFINE_MUTEX(trace_types_lock); | 245 | static DEFINE_MUTEX(trace_types_lock); |
| 251 | 246 | ||
| 247 | /* | ||
| 248 | * serialize the access of the ring buffer | ||
| 249 | * | ||
| 250 | * ring buffer serializes readers, but it is low level protection. | ||
| 251 | * The validity of the events (which returns by ring_buffer_peek() ..etc) | ||
| 252 | * are not protected by ring buffer. | ||
| 253 | * | ||
| 254 | * The content of events may become garbage if we allow other process consumes | ||
| 255 | * these events concurrently: | ||
| 256 | * A) the page of the consumed events may become a normal page | ||
| 257 | * (not reader page) in ring buffer, and this page will be rewrited | ||
| 258 | * by events producer. | ||
| 259 | * B) The page of the consumed events may become a page for splice_read, | ||
| 260 | * and this page will be returned to system. | ||
| 261 | * | ||
| 262 | * These primitives allow multi process access to different cpu ring buffer | ||
| 263 | * concurrently. | ||
| 264 | * | ||
| 265 | * These primitives don't distinguish read-only and read-consume access. | ||
| 266 | * Multi read-only access are also serialized. | ||
| 267 | */ | ||
| 268 | |||
| 269 | #ifdef CONFIG_SMP | ||
| 270 | static DECLARE_RWSEM(all_cpu_access_lock); | ||
| 271 | static DEFINE_PER_CPU(struct mutex, cpu_access_lock); | ||
| 272 | |||
| 273 | static inline void trace_access_lock(int cpu) | ||
| 274 | { | ||
| 275 | if (cpu == TRACE_PIPE_ALL_CPU) { | ||
| 276 | /* gain it for accessing the whole ring buffer. */ | ||
| 277 | down_write(&all_cpu_access_lock); | ||
| 278 | } else { | ||
| 279 | /* gain it for accessing a cpu ring buffer. */ | ||
| 280 | |||
| 281 | /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */ | ||
| 282 | down_read(&all_cpu_access_lock); | ||
| 283 | |||
| 284 | /* Secondly block other access to this @cpu ring buffer. */ | ||
| 285 | mutex_lock(&per_cpu(cpu_access_lock, cpu)); | ||
| 286 | } | ||
| 287 | } | ||
| 288 | |||
| 289 | static inline void trace_access_unlock(int cpu) | ||
| 290 | { | ||
| 291 | if (cpu == TRACE_PIPE_ALL_CPU) { | ||
| 292 | up_write(&all_cpu_access_lock); | ||
| 293 | } else { | ||
| 294 | mutex_unlock(&per_cpu(cpu_access_lock, cpu)); | ||
| 295 | up_read(&all_cpu_access_lock); | ||
| 296 | } | ||
| 297 | } | ||
| 298 | |||
| 299 | static inline void trace_access_lock_init(void) | ||
| 300 | { | ||
| 301 | int cpu; | ||
| 302 | |||
| 303 | for_each_possible_cpu(cpu) | ||
| 304 | mutex_init(&per_cpu(cpu_access_lock, cpu)); | ||
| 305 | } | ||
| 306 | |||
| 307 | #else | ||
| 308 | |||
| 309 | static DEFINE_MUTEX(access_lock); | ||
| 310 | |||
| 311 | static inline void trace_access_lock(int cpu) | ||
| 312 | { | ||
| 313 | (void)cpu; | ||
| 314 | mutex_lock(&access_lock); | ||
| 315 | } | ||
| 316 | |||
| 317 | static inline void trace_access_unlock(int cpu) | ||
| 318 | { | ||
| 319 | (void)cpu; | ||
| 320 | mutex_unlock(&access_lock); | ||
| 321 | } | ||
| 322 | |||
| 323 | static inline void trace_access_lock_init(void) | ||
| 324 | { | ||
| 325 | } | ||
| 326 | |||
| 327 | #endif | ||
| 328 | |||
| 252 | /* trace_wait is a waitqueue for tasks blocked on trace_poll */ | 329 | /* trace_wait is a waitqueue for tasks blocked on trace_poll */ |
| 253 | static DECLARE_WAIT_QUEUE_HEAD(trace_wait); | 330 | static DECLARE_WAIT_QUEUE_HEAD(trace_wait); |
| 254 | 331 | ||
| @@ -1089,7 +1166,7 @@ trace_function(struct trace_array *tr, | |||
| 1089 | struct ftrace_entry *entry; | 1166 | struct ftrace_entry *entry; |
| 1090 | 1167 | ||
| 1091 | /* If we are reading the ring buffer, don't trace */ | 1168 | /* If we are reading the ring buffer, don't trace */ |
| 1092 | if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) | 1169 | if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) |
| 1093 | return; | 1170 | return; |
| 1094 | 1171 | ||
| 1095 | event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), | 1172 | event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), |
| @@ -1320,8 +1397,10 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) | |||
| 1320 | entry->fmt = fmt; | 1397 | entry->fmt = fmt; |
| 1321 | 1398 | ||
| 1322 | memcpy(entry->buf, trace_buf, sizeof(u32) * len); | 1399 | memcpy(entry->buf, trace_buf, sizeof(u32) * len); |
| 1323 | if (!filter_check_discard(call, entry, buffer, event)) | 1400 | if (!filter_check_discard(call, entry, buffer, event)) { |
| 1324 | ring_buffer_unlock_commit(buffer, event); | 1401 | ring_buffer_unlock_commit(buffer, event); |
| 1402 | ftrace_trace_stack(buffer, flags, 6, pc); | ||
| 1403 | } | ||
| 1325 | 1404 | ||
| 1326 | out_unlock: | 1405 | out_unlock: |
| 1327 | arch_spin_unlock(&trace_buf_lock); | 1406 | arch_spin_unlock(&trace_buf_lock); |
| @@ -1394,8 +1473,10 @@ int trace_array_vprintk(struct trace_array *tr, | |||
| 1394 | 1473 | ||
| 1395 | memcpy(&entry->buf, trace_buf, len); | 1474 | memcpy(&entry->buf, trace_buf, len); |
| 1396 | entry->buf[len] = '\0'; | 1475 | entry->buf[len] = '\0'; |
| 1397 | if (!filter_check_discard(call, entry, buffer, event)) | 1476 | if (!filter_check_discard(call, entry, buffer, event)) { |
| 1398 | ring_buffer_unlock_commit(buffer, event); | 1477 | ring_buffer_unlock_commit(buffer, event); |
| 1478 | ftrace_trace_stack(buffer, irq_flags, 6, pc); | ||
| 1479 | } | ||
| 1399 | 1480 | ||
| 1400 | out_unlock: | 1481 | out_unlock: |
| 1401 | arch_spin_unlock(&trace_buf_lock); | 1482 | arch_spin_unlock(&trace_buf_lock); |
| @@ -1585,12 +1666,6 @@ static void tracing_iter_reset(struct trace_iterator *iter, int cpu) | |||
| 1585 | } | 1666 | } |
| 1586 | 1667 | ||
| 1587 | /* | 1668 | /* |
| 1588 | * No necessary locking here. The worst thing which can | ||
| 1589 | * happen is loosing events consumed at the same time | ||
| 1590 | * by a trace_pipe reader. | ||
| 1591 | * Other than that, we don't risk to crash the ring buffer | ||
| 1592 | * because it serializes the readers. | ||
| 1593 | * | ||
| 1594 | * The current tracer is copied to avoid a global locking | 1669 | * The current tracer is copied to avoid a global locking |
| 1595 | * all around. | 1670 | * all around. |
| 1596 | */ | 1671 | */ |
| @@ -1645,12 +1720,16 @@ static void *s_start(struct seq_file *m, loff_t *pos) | |||
| 1645 | } | 1720 | } |
| 1646 | 1721 | ||
| 1647 | trace_event_read_lock(); | 1722 | trace_event_read_lock(); |
| 1723 | trace_access_lock(cpu_file); | ||
| 1648 | return p; | 1724 | return p; |
| 1649 | } | 1725 | } |
| 1650 | 1726 | ||
| 1651 | static void s_stop(struct seq_file *m, void *p) | 1727 | static void s_stop(struct seq_file *m, void *p) |
| 1652 | { | 1728 | { |
| 1729 | struct trace_iterator *iter = m->private; | ||
| 1730 | |||
| 1653 | atomic_dec(&trace_record_cmdline_disabled); | 1731 | atomic_dec(&trace_record_cmdline_disabled); |
| 1732 | trace_access_unlock(iter->cpu_file); | ||
| 1654 | trace_event_read_unlock(); | 1733 | trace_event_read_unlock(); |
| 1655 | } | 1734 | } |
| 1656 | 1735 | ||
| @@ -2841,22 +2920,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) | |||
| 2841 | 2920 | ||
| 2842 | mutex_lock(&trace_types_lock); | 2921 | mutex_lock(&trace_types_lock); |
| 2843 | 2922 | ||
| 2844 | /* We only allow one reader per cpu */ | ||
| 2845 | if (cpu_file == TRACE_PIPE_ALL_CPU) { | ||
| 2846 | if (!cpumask_empty(tracing_reader_cpumask)) { | ||
| 2847 | ret = -EBUSY; | ||
| 2848 | goto out; | ||
| 2849 | } | ||
| 2850 | cpumask_setall(tracing_reader_cpumask); | ||
| 2851 | } else { | ||
| 2852 | if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask)) | ||
| 2853 | cpumask_set_cpu(cpu_file, tracing_reader_cpumask); | ||
| 2854 | else { | ||
| 2855 | ret = -EBUSY; | ||
| 2856 | goto out; | ||
| 2857 | } | ||
| 2858 | } | ||
| 2859 | |||
| 2860 | /* create a buffer to store the information to pass to userspace */ | 2923 | /* create a buffer to store the information to pass to userspace */ |
| 2861 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); | 2924 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); |
| 2862 | if (!iter) { | 2925 | if (!iter) { |
| @@ -2912,12 +2975,6 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) | |||
| 2912 | 2975 | ||
| 2913 | mutex_lock(&trace_types_lock); | 2976 | mutex_lock(&trace_types_lock); |
| 2914 | 2977 | ||
| 2915 | if (iter->cpu_file == TRACE_PIPE_ALL_CPU) | ||
| 2916 | cpumask_clear(tracing_reader_cpumask); | ||
| 2917 | else | ||
| 2918 | cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask); | ||
| 2919 | |||
| 2920 | |||
| 2921 | if (iter->trace->pipe_close) | 2978 | if (iter->trace->pipe_close) |
| 2922 | iter->trace->pipe_close(iter); | 2979 | iter->trace->pipe_close(iter); |
| 2923 | 2980 | ||
| @@ -3079,6 +3136,7 @@ waitagain: | |||
| 3079 | iter->pos = -1; | 3136 | iter->pos = -1; |
| 3080 | 3137 | ||
| 3081 | trace_event_read_lock(); | 3138 | trace_event_read_lock(); |
| 3139 | trace_access_lock(iter->cpu_file); | ||
| 3082 | while (find_next_entry_inc(iter) != NULL) { | 3140 | while (find_next_entry_inc(iter) != NULL) { |
| 3083 | enum print_line_t ret; | 3141 | enum print_line_t ret; |
| 3084 | int len = iter->seq.len; | 3142 | int len = iter->seq.len; |
| @@ -3095,6 +3153,7 @@ waitagain: | |||
| 3095 | if (iter->seq.len >= cnt) | 3153 | if (iter->seq.len >= cnt) |
| 3096 | break; | 3154 | break; |
| 3097 | } | 3155 | } |
| 3156 | trace_access_unlock(iter->cpu_file); | ||
| 3098 | trace_event_read_unlock(); | 3157 | trace_event_read_unlock(); |
| 3099 | 3158 | ||
| 3100 | /* Now copy what we have to the user */ | 3159 | /* Now copy what we have to the user */ |
| @@ -3220,6 +3279,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
| 3220 | } | 3279 | } |
| 3221 | 3280 | ||
| 3222 | trace_event_read_lock(); | 3281 | trace_event_read_lock(); |
| 3282 | trace_access_lock(iter->cpu_file); | ||
| 3223 | 3283 | ||
| 3224 | /* Fill as many pages as possible. */ | 3284 | /* Fill as many pages as possible. */ |
| 3225 | for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { | 3285 | for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { |
| @@ -3243,6 +3303,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
| 3243 | trace_seq_init(&iter->seq); | 3303 | trace_seq_init(&iter->seq); |
| 3244 | } | 3304 | } |
| 3245 | 3305 | ||
| 3306 | trace_access_unlock(iter->cpu_file); | ||
| 3246 | trace_event_read_unlock(); | 3307 | trace_event_read_unlock(); |
| 3247 | mutex_unlock(&iter->mutex); | 3308 | mutex_unlock(&iter->mutex); |
| 3248 | 3309 | ||
| @@ -3544,10 +3605,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, | |||
| 3544 | 3605 | ||
| 3545 | info->read = 0; | 3606 | info->read = 0; |
| 3546 | 3607 | ||
| 3608 | trace_access_lock(info->cpu); | ||
| 3547 | ret = ring_buffer_read_page(info->tr->buffer, | 3609 | ret = ring_buffer_read_page(info->tr->buffer, |
| 3548 | &info->spare, | 3610 | &info->spare, |
| 3549 | count, | 3611 | count, |
| 3550 | info->cpu, 0); | 3612 | info->cpu, 0); |
| 3613 | trace_access_unlock(info->cpu); | ||
| 3551 | if (ret < 0) | 3614 | if (ret < 0) |
| 3552 | return 0; | 3615 | return 0; |
| 3553 | 3616 | ||
| @@ -3675,6 +3738,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
| 3675 | len &= PAGE_MASK; | 3738 | len &= PAGE_MASK; |
| 3676 | } | 3739 | } |
| 3677 | 3740 | ||
| 3741 | trace_access_lock(info->cpu); | ||
| 3678 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); | 3742 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); |
| 3679 | 3743 | ||
| 3680 | for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { | 3744 | for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { |
| @@ -3722,6 +3786,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
| 3722 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); | 3786 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); |
| 3723 | } | 3787 | } |
| 3724 | 3788 | ||
| 3789 | trace_access_unlock(info->cpu); | ||
| 3725 | spd.nr_pages = i; | 3790 | spd.nr_pages = i; |
| 3726 | 3791 | ||
| 3727 | /* did we read anything? */ | 3792 | /* did we read anything? */ |
| @@ -4158,6 +4223,8 @@ static __init int tracer_init_debugfs(void) | |||
| 4158 | struct dentry *d_tracer; | 4223 | struct dentry *d_tracer; |
| 4159 | int cpu; | 4224 | int cpu; |
| 4160 | 4225 | ||
| 4226 | trace_access_lock_init(); | ||
| 4227 | |||
| 4161 | d_tracer = tracing_init_dentry(); | 4228 | d_tracer = tracing_init_dentry(); |
| 4162 | 4229 | ||
| 4163 | trace_create_file("tracing_enabled", 0644, d_tracer, | 4230 | trace_create_file("tracing_enabled", 0644, d_tracer, |
| @@ -4392,9 +4459,6 @@ __init static int tracer_alloc_buffers(void) | |||
| 4392 | if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) | 4459 | if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) |
| 4393 | goto out_free_buffer_mask; | 4460 | goto out_free_buffer_mask; |
| 4394 | 4461 | ||
| 4395 | if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL)) | ||
| 4396 | goto out_free_tracing_cpumask; | ||
| 4397 | |||
| 4398 | /* To save memory, keep the ring buffer size to its minimum */ | 4462 | /* To save memory, keep the ring buffer size to its minimum */ |
| 4399 | if (ring_buffer_expanded) | 4463 | if (ring_buffer_expanded) |
| 4400 | ring_buf_size = trace_buf_size; | 4464 | ring_buf_size = trace_buf_size; |
| @@ -4452,8 +4516,6 @@ __init static int tracer_alloc_buffers(void) | |||
| 4452 | return 0; | 4516 | return 0; |
| 4453 | 4517 | ||
| 4454 | out_free_cpumask: | 4518 | out_free_cpumask: |
| 4455 | free_cpumask_var(tracing_reader_cpumask); | ||
| 4456 | out_free_tracing_cpumask: | ||
| 4457 | free_cpumask_var(tracing_cpumask); | 4519 | free_cpumask_var(tracing_cpumask); |
| 4458 | out_free_buffer_mask: | 4520 | out_free_buffer_mask: |
| 4459 | free_cpumask_var(tracing_buffer_mask); | 4521 | free_cpumask_var(tracing_buffer_mask); |
