diff options
Diffstat (limited to 'kernel/trace/trace.c')
| -rw-r--r-- | kernel/trace/trace.c | 149 | 
1 files changed, 108 insertions, 41 deletions
| diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0df1b0f2cb9..032c57ca650 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -32,6 +32,7 @@ | |||
| 32 | #include <linux/splice.h> | 32 | #include <linux/splice.h> | 
| 33 | #include <linux/kdebug.h> | 33 | #include <linux/kdebug.h> | 
| 34 | #include <linux/string.h> | 34 | #include <linux/string.h> | 
| 35 | #include <linux/rwsem.h> | ||
| 35 | #include <linux/ctype.h> | 36 | #include <linux/ctype.h> | 
| 36 | #include <linux/init.h> | 37 | #include <linux/init.h> | 
| 37 | #include <linux/poll.h> | 38 | #include <linux/poll.h> | 
| @@ -102,9 +103,6 @@ static inline void ftrace_enable_cpu(void) | |||
| 102 | 103 | ||
| 103 | static cpumask_var_t __read_mostly tracing_buffer_mask; | 104 | static cpumask_var_t __read_mostly tracing_buffer_mask; | 
| 104 | 105 | ||
| 105 | /* Define which cpu buffers are currently read in trace_pipe */ | ||
| 106 | static cpumask_var_t tracing_reader_cpumask; | ||
| 107 | |||
| 108 | #define for_each_tracing_cpu(cpu) \ | 106 | #define for_each_tracing_cpu(cpu) \ | 
| 109 | for_each_cpu(cpu, tracing_buffer_mask) | 107 | for_each_cpu(cpu, tracing_buffer_mask) | 
| 110 | 108 | ||
| @@ -243,12 +241,91 @@ static struct tracer *current_trace __read_mostly; | |||
| 243 | 241 | ||
| 244 | /* | 242 | /* | 
| 245 | * trace_types_lock is used to protect the trace_types list. | 243 | * trace_types_lock is used to protect the trace_types list. | 
| 246 | * This lock is also used to keep user access serialized. | ||
| 247 | * Accesses from userspace will grab this lock while userspace | ||
| 248 | * activities happen inside the kernel. | ||
| 249 | */ | 244 | */ | 
| 250 | static DEFINE_MUTEX(trace_types_lock); | 245 | static DEFINE_MUTEX(trace_types_lock); | 
| 251 | 246 | ||
| 247 | /* | ||
| 248 | * serialize the access of the ring buffer | ||
| 249 | * | ||
| 250 | * ring buffer serializes readers, but it is low level protection. | ||
| 251 | * The validity of the events (which returns by ring_buffer_peek() ..etc) | ||
| 252 | * are not protected by ring buffer. | ||
| 253 | * | ||
| 254 | * The content of events may become garbage if we allow other process consumes | ||
| 255 | * these events concurrently: | ||
| 256 | * A) the page of the consumed events may become a normal page | ||
| 257 | * (not reader page) in ring buffer, and this page will be rewrited | ||
| 258 | * by events producer. | ||
| 259 | * B) The page of the consumed events may become a page for splice_read, | ||
| 260 | * and this page will be returned to system. | ||
| 261 | * | ||
| 262 | * These primitives allow multi process access to different cpu ring buffer | ||
| 263 | * concurrently. | ||
| 264 | * | ||
| 265 | * These primitives don't distinguish read-only and read-consume access. | ||
| 266 | * Multi read-only access are also serialized. | ||
| 267 | */ | ||
| 268 | |||
| 269 | #ifdef CONFIG_SMP | ||
| 270 | static DECLARE_RWSEM(all_cpu_access_lock); | ||
| 271 | static DEFINE_PER_CPU(struct mutex, cpu_access_lock); | ||
| 272 | |||
| 273 | static inline void trace_access_lock(int cpu) | ||
| 274 | { | ||
| 275 | if (cpu == TRACE_PIPE_ALL_CPU) { | ||
| 276 | /* gain it for accessing the whole ring buffer. */ | ||
| 277 | down_write(&all_cpu_access_lock); | ||
| 278 | } else { | ||
| 279 | /* gain it for accessing a cpu ring buffer. */ | ||
| 280 | |||
| 281 | /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */ | ||
| 282 | down_read(&all_cpu_access_lock); | ||
| 283 | |||
| 284 | /* Secondly block other access to this @cpu ring buffer. */ | ||
| 285 | mutex_lock(&per_cpu(cpu_access_lock, cpu)); | ||
| 286 | } | ||
| 287 | } | ||
| 288 | |||
| 289 | static inline void trace_access_unlock(int cpu) | ||
| 290 | { | ||
| 291 | if (cpu == TRACE_PIPE_ALL_CPU) { | ||
| 292 | up_write(&all_cpu_access_lock); | ||
| 293 | } else { | ||
| 294 | mutex_unlock(&per_cpu(cpu_access_lock, cpu)); | ||
| 295 | up_read(&all_cpu_access_lock); | ||
| 296 | } | ||
| 297 | } | ||
| 298 | |||
| 299 | static inline void trace_access_lock_init(void) | ||
| 300 | { | ||
| 301 | int cpu; | ||
| 302 | |||
| 303 | for_each_possible_cpu(cpu) | ||
| 304 | mutex_init(&per_cpu(cpu_access_lock, cpu)); | ||
| 305 | } | ||
| 306 | |||
| 307 | #else | ||
| 308 | |||
| 309 | static DEFINE_MUTEX(access_lock); | ||
| 310 | |||
| 311 | static inline void trace_access_lock(int cpu) | ||
| 312 | { | ||
| 313 | (void)cpu; | ||
| 314 | mutex_lock(&access_lock); | ||
| 315 | } | ||
| 316 | |||
| 317 | static inline void trace_access_unlock(int cpu) | ||
| 318 | { | ||
| 319 | (void)cpu; | ||
| 320 | mutex_unlock(&access_lock); | ||
| 321 | } | ||
| 322 | |||
| 323 | static inline void trace_access_lock_init(void) | ||
| 324 | { | ||
| 325 | } | ||
| 326 | |||
| 327 | #endif | ||
| 328 | |||
| 252 | /* trace_wait is a waitqueue for tasks blocked on trace_poll */ | 329 | /* trace_wait is a waitqueue for tasks blocked on trace_poll */ | 
| 253 | static DECLARE_WAIT_QUEUE_HEAD(trace_wait); | 330 | static DECLARE_WAIT_QUEUE_HEAD(trace_wait); | 
| 254 | 331 | ||
| @@ -951,6 +1028,11 @@ void trace_find_cmdline(int pid, char comm[]) | |||
| 951 | return; | 1028 | return; | 
| 952 | } | 1029 | } | 
| 953 | 1030 | ||
| 1031 | if (WARN_ON_ONCE(pid < 0)) { | ||
| 1032 | strcpy(comm, "<XXX>"); | ||
| 1033 | return; | ||
| 1034 | } | ||
| 1035 | |||
| 954 | if (pid > PID_MAX_DEFAULT) { | 1036 | if (pid > PID_MAX_DEFAULT) { | 
| 955 | strcpy(comm, "<...>"); | 1037 | strcpy(comm, "<...>"); | 
| 956 | return; | 1038 | return; | 
| @@ -1315,8 +1397,10 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) | |||
| 1315 | entry->fmt = fmt; | 1397 | entry->fmt = fmt; | 
| 1316 | 1398 | ||
| 1317 | memcpy(entry->buf, trace_buf, sizeof(u32) * len); | 1399 | memcpy(entry->buf, trace_buf, sizeof(u32) * len); | 
| 1318 | if (!filter_check_discard(call, entry, buffer, event)) | 1400 | if (!filter_check_discard(call, entry, buffer, event)) { | 
| 1319 | ring_buffer_unlock_commit(buffer, event); | 1401 | ring_buffer_unlock_commit(buffer, event); | 
| 1402 | ftrace_trace_stack(buffer, flags, 6, pc); | ||
| 1403 | } | ||
| 1320 | 1404 | ||
| 1321 | out_unlock: | 1405 | out_unlock: | 
| 1322 | arch_spin_unlock(&trace_buf_lock); | 1406 | arch_spin_unlock(&trace_buf_lock); | 
| @@ -1389,8 +1473,10 @@ int trace_array_vprintk(struct trace_array *tr, | |||
| 1389 | 1473 | ||
| 1390 | memcpy(&entry->buf, trace_buf, len); | 1474 | memcpy(&entry->buf, trace_buf, len); | 
| 1391 | entry->buf[len] = '\0'; | 1475 | entry->buf[len] = '\0'; | 
| 1392 | if (!filter_check_discard(call, entry, buffer, event)) | 1476 | if (!filter_check_discard(call, entry, buffer, event)) { | 
| 1393 | ring_buffer_unlock_commit(buffer, event); | 1477 | ring_buffer_unlock_commit(buffer, event); | 
| 1478 | ftrace_trace_stack(buffer, irq_flags, 6, pc); | ||
| 1479 | } | ||
| 1394 | 1480 | ||
| 1395 | out_unlock: | 1481 | out_unlock: | 
| 1396 | arch_spin_unlock(&trace_buf_lock); | 1482 | arch_spin_unlock(&trace_buf_lock); | 
| @@ -1580,12 +1666,6 @@ static void tracing_iter_reset(struct trace_iterator *iter, int cpu) | |||
| 1580 | } | 1666 | } | 
| 1581 | 1667 | ||
| 1582 | /* | 1668 | /* | 
| 1583 | * No necessary locking here. The worst thing which can | ||
| 1584 | * happen is loosing events consumed at the same time | ||
| 1585 | * by a trace_pipe reader. | ||
| 1586 | * Other than that, we don't risk to crash the ring buffer | ||
| 1587 | * because it serializes the readers. | ||
| 1588 | * | ||
| 1589 | * The current tracer is copied to avoid a global locking | 1669 | * The current tracer is copied to avoid a global locking | 
| 1590 | * all around. | 1670 | * all around. | 
| 1591 | */ | 1671 | */ | 
| @@ -1640,12 +1720,16 @@ static void *s_start(struct seq_file *m, loff_t *pos) | |||
| 1640 | } | 1720 | } | 
| 1641 | 1721 | ||
| 1642 | trace_event_read_lock(); | 1722 | trace_event_read_lock(); | 
| 1723 | trace_access_lock(cpu_file); | ||
| 1643 | return p; | 1724 | return p; | 
| 1644 | } | 1725 | } | 
| 1645 | 1726 | ||
| 1646 | static void s_stop(struct seq_file *m, void *p) | 1727 | static void s_stop(struct seq_file *m, void *p) | 
| 1647 | { | 1728 | { | 
| 1729 | struct trace_iterator *iter = m->private; | ||
| 1730 | |||
| 1648 | atomic_dec(&trace_record_cmdline_disabled); | 1731 | atomic_dec(&trace_record_cmdline_disabled); | 
| 1732 | trace_access_unlock(iter->cpu_file); | ||
| 1649 | trace_event_read_unlock(); | 1733 | trace_event_read_unlock(); | 
| 1650 | } | 1734 | } | 
| 1651 | 1735 | ||
| @@ -2836,22 +2920,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) | |||
| 2836 | 2920 | ||
| 2837 | mutex_lock(&trace_types_lock); | 2921 | mutex_lock(&trace_types_lock); | 
| 2838 | 2922 | ||
| 2839 | /* We only allow one reader per cpu */ | ||
| 2840 | if (cpu_file == TRACE_PIPE_ALL_CPU) { | ||
| 2841 | if (!cpumask_empty(tracing_reader_cpumask)) { | ||
| 2842 | ret = -EBUSY; | ||
| 2843 | goto out; | ||
| 2844 | } | ||
| 2845 | cpumask_setall(tracing_reader_cpumask); | ||
| 2846 | } else { | ||
| 2847 | if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask)) | ||
| 2848 | cpumask_set_cpu(cpu_file, tracing_reader_cpumask); | ||
| 2849 | else { | ||
| 2850 | ret = -EBUSY; | ||
| 2851 | goto out; | ||
| 2852 | } | ||
| 2853 | } | ||
| 2854 | |||
| 2855 | /* create a buffer to store the information to pass to userspace */ | 2923 | /* create a buffer to store the information to pass to userspace */ | 
| 2856 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); | 2924 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); | 
| 2857 | if (!iter) { | 2925 | if (!iter) { | 
| @@ -2907,12 +2975,6 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) | |||
| 2907 | 2975 | ||
| 2908 | mutex_lock(&trace_types_lock); | 2976 | mutex_lock(&trace_types_lock); | 
| 2909 | 2977 | ||
| 2910 | if (iter->cpu_file == TRACE_PIPE_ALL_CPU) | ||
| 2911 | cpumask_clear(tracing_reader_cpumask); | ||
| 2912 | else | ||
| 2913 | cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask); | ||
| 2914 | |||
| 2915 | |||
| 2916 | if (iter->trace->pipe_close) | 2978 | if (iter->trace->pipe_close) | 
| 2917 | iter->trace->pipe_close(iter); | 2979 | iter->trace->pipe_close(iter); | 
| 2918 | 2980 | ||
| @@ -3074,6 +3136,7 @@ waitagain: | |||
| 3074 | iter->pos = -1; | 3136 | iter->pos = -1; | 
| 3075 | 3137 | ||
| 3076 | trace_event_read_lock(); | 3138 | trace_event_read_lock(); | 
| 3139 | trace_access_lock(iter->cpu_file); | ||
| 3077 | while (find_next_entry_inc(iter) != NULL) { | 3140 | while (find_next_entry_inc(iter) != NULL) { | 
| 3078 | enum print_line_t ret; | 3141 | enum print_line_t ret; | 
| 3079 | int len = iter->seq.len; | 3142 | int len = iter->seq.len; | 
| @@ -3090,6 +3153,7 @@ waitagain: | |||
| 3090 | if (iter->seq.len >= cnt) | 3153 | if (iter->seq.len >= cnt) | 
| 3091 | break; | 3154 | break; | 
| 3092 | } | 3155 | } | 
| 3156 | trace_access_unlock(iter->cpu_file); | ||
| 3093 | trace_event_read_unlock(); | 3157 | trace_event_read_unlock(); | 
| 3094 | 3158 | ||
| 3095 | /* Now copy what we have to the user */ | 3159 | /* Now copy what we have to the user */ | 
| @@ -3215,6 +3279,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
| 3215 | } | 3279 | } | 
| 3216 | 3280 | ||
| 3217 | trace_event_read_lock(); | 3281 | trace_event_read_lock(); | 
| 3282 | trace_access_lock(iter->cpu_file); | ||
| 3218 | 3283 | ||
| 3219 | /* Fill as many pages as possible. */ | 3284 | /* Fill as many pages as possible. */ | 
| 3220 | for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { | 3285 | for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { | 
| @@ -3238,6 +3303,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
| 3238 | trace_seq_init(&iter->seq); | 3303 | trace_seq_init(&iter->seq); | 
| 3239 | } | 3304 | } | 
| 3240 | 3305 | ||
| 3306 | trace_access_unlock(iter->cpu_file); | ||
| 3241 | trace_event_read_unlock(); | 3307 | trace_event_read_unlock(); | 
| 3242 | mutex_unlock(&iter->mutex); | 3308 | mutex_unlock(&iter->mutex); | 
| 3243 | 3309 | ||
| @@ -3539,10 +3605,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, | |||
| 3539 | 3605 | ||
| 3540 | info->read = 0; | 3606 | info->read = 0; | 
| 3541 | 3607 | ||
| 3608 | trace_access_lock(info->cpu); | ||
| 3542 | ret = ring_buffer_read_page(info->tr->buffer, | 3609 | ret = ring_buffer_read_page(info->tr->buffer, | 
| 3543 | &info->spare, | 3610 | &info->spare, | 
| 3544 | count, | 3611 | count, | 
| 3545 | info->cpu, 0); | 3612 | info->cpu, 0); | 
| 3613 | trace_access_unlock(info->cpu); | ||
| 3546 | if (ret < 0) | 3614 | if (ret < 0) | 
| 3547 | return 0; | 3615 | return 0; | 
| 3548 | 3616 | ||
| @@ -3670,6 +3738,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
| 3670 | len &= PAGE_MASK; | 3738 | len &= PAGE_MASK; | 
| 3671 | } | 3739 | } | 
| 3672 | 3740 | ||
| 3741 | trace_access_lock(info->cpu); | ||
| 3673 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); | 3742 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); | 
| 3674 | 3743 | ||
| 3675 | for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { | 3744 | for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { | 
| @@ -3717,6 +3786,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
| 3717 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); | 3786 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); | 
| 3718 | } | 3787 | } | 
| 3719 | 3788 | ||
| 3789 | trace_access_unlock(info->cpu); | ||
| 3720 | spd.nr_pages = i; | 3790 | spd.nr_pages = i; | 
| 3721 | 3791 | ||
| 3722 | /* did we read anything? */ | 3792 | /* did we read anything? */ | 
| @@ -4153,6 +4223,8 @@ static __init int tracer_init_debugfs(void) | |||
| 4153 | struct dentry *d_tracer; | 4223 | struct dentry *d_tracer; | 
| 4154 | int cpu; | 4224 | int cpu; | 
| 4155 | 4225 | ||
| 4226 | trace_access_lock_init(); | ||
| 4227 | |||
| 4156 | d_tracer = tracing_init_dentry(); | 4228 | d_tracer = tracing_init_dentry(); | 
| 4157 | 4229 | ||
| 4158 | trace_create_file("tracing_enabled", 0644, d_tracer, | 4230 | trace_create_file("tracing_enabled", 0644, d_tracer, | 
| @@ -4387,9 +4459,6 @@ __init static int tracer_alloc_buffers(void) | |||
| 4387 | if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) | 4459 | if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) | 
| 4388 | goto out_free_buffer_mask; | 4460 | goto out_free_buffer_mask; | 
| 4389 | 4461 | ||
| 4390 | if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL)) | ||
| 4391 | goto out_free_tracing_cpumask; | ||
| 4392 | |||
| 4393 | /* To save memory, keep the ring buffer size to its minimum */ | 4462 | /* To save memory, keep the ring buffer size to its minimum */ | 
| 4394 | if (ring_buffer_expanded) | 4463 | if (ring_buffer_expanded) | 
| 4395 | ring_buf_size = trace_buf_size; | 4464 | ring_buf_size = trace_buf_size; | 
| @@ -4447,8 +4516,6 @@ __init static int tracer_alloc_buffers(void) | |||
| 4447 | return 0; | 4516 | return 0; | 
| 4448 | 4517 | ||
| 4449 | out_free_cpumask: | 4518 | out_free_cpumask: | 
| 4450 | free_cpumask_var(tracing_reader_cpumask); | ||
| 4451 | out_free_tracing_cpumask: | ||
| 4452 | free_cpumask_var(tracing_cpumask); | 4519 | free_cpumask_var(tracing_cpumask); | 
| 4453 | out_free_buffer_mask: | 4520 | out_free_buffer_mask: | 
| 4454 | free_cpumask_var(tracing_buffer_mask); | 4521 | free_cpumask_var(tracing_buffer_mask); | 
