diff options
| -rw-r--r-- | kernel/trace/trace.c | 136 |
1 files changed, 97 insertions, 39 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0df1b0f2cb9e..abdd333a0825 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -32,6 +32,7 @@ | |||
| 32 | #include <linux/splice.h> | 32 | #include <linux/splice.h> |
| 33 | #include <linux/kdebug.h> | 33 | #include <linux/kdebug.h> |
| 34 | #include <linux/string.h> | 34 | #include <linux/string.h> |
| 35 | #include <linux/rwsem.h> | ||
| 35 | #include <linux/ctype.h> | 36 | #include <linux/ctype.h> |
| 36 | #include <linux/init.h> | 37 | #include <linux/init.h> |
| 37 | #include <linux/poll.h> | 38 | #include <linux/poll.h> |
| @@ -102,9 +103,6 @@ static inline void ftrace_enable_cpu(void) | |||
| 102 | 103 | ||
| 103 | static cpumask_var_t __read_mostly tracing_buffer_mask; | 104 | static cpumask_var_t __read_mostly tracing_buffer_mask; |
| 104 | 105 | ||
| 105 | /* Define which cpu buffers are currently read in trace_pipe */ | ||
| 106 | static cpumask_var_t tracing_reader_cpumask; | ||
| 107 | |||
| 108 | #define for_each_tracing_cpu(cpu) \ | 106 | #define for_each_tracing_cpu(cpu) \ |
| 109 | for_each_cpu(cpu, tracing_buffer_mask) | 107 | for_each_cpu(cpu, tracing_buffer_mask) |
| 110 | 108 | ||
| @@ -243,12 +241,91 @@ static struct tracer *current_trace __read_mostly; | |||
| 243 | 241 | ||
| 244 | /* | 242 | /* |
| 245 | * trace_types_lock is used to protect the trace_types list. | 243 | * trace_types_lock is used to protect the trace_types list. |
| 246 | * This lock is also used to keep user access serialized. | ||
| 247 | * Accesses from userspace will grab this lock while userspace | ||
| 248 | * activities happen inside the kernel. | ||
| 249 | */ | 244 | */ |
| 250 | static DEFINE_MUTEX(trace_types_lock); | 245 | static DEFINE_MUTEX(trace_types_lock); |
| 251 | 246 | ||
| 247 | /* | ||
| 248 | * serialize the access of the ring buffer | ||
| 249 | * | ||
| 250 | * ring buffer serializes readers, but it is low level protection. | ||
| 251 | * The validity of the events (which returns by ring_buffer_peek() ..etc) | ||
| 252 | * are not protected by ring buffer. | ||
| 253 | * | ||
| 254 | * The content of events may become garbage if we allow other process consumes | ||
| 255 | * these events concurrently: | ||
| 256 | * A) the page of the consumed events may become a normal page | ||
| 257 | * (not reader page) in ring buffer, and this page will be rewrited | ||
| 258 | * by events producer. | ||
| 259 | * B) The page of the consumed events may become a page for splice_read, | ||
| 260 | * and this page will be returned to system. | ||
| 261 | * | ||
| 262 | * These primitives allow multi process access to different cpu ring buffer | ||
| 263 | * concurrently. | ||
| 264 | * | ||
| 265 | * These primitives don't distinguish read-only and read-consume access. | ||
| 266 | * Multi read-only access are also serialized. | ||
| 267 | */ | ||
| 268 | |||
| 269 | #ifdef CONFIG_SMP | ||
| 270 | static DECLARE_RWSEM(all_cpu_access_lock); | ||
| 271 | static DEFINE_PER_CPU(struct mutex, cpu_access_lock); | ||
| 272 | |||
| 273 | static inline void trace_access_lock(int cpu) | ||
| 274 | { | ||
| 275 | if (cpu == TRACE_PIPE_ALL_CPU) { | ||
| 276 | /* gain it for accessing the whole ring buffer. */ | ||
| 277 | down_write(&all_cpu_access_lock); | ||
| 278 | } else { | ||
| 279 | /* gain it for accessing a cpu ring buffer. */ | ||
| 280 | |||
| 281 | /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */ | ||
| 282 | down_read(&all_cpu_access_lock); | ||
| 283 | |||
| 284 | /* Secondly block other access to this @cpu ring buffer. */ | ||
| 285 | mutex_lock(&per_cpu(cpu_access_lock, cpu)); | ||
| 286 | } | ||
| 287 | } | ||
| 288 | |||
| 289 | static inline void trace_access_unlock(int cpu) | ||
| 290 | { | ||
| 291 | if (cpu == TRACE_PIPE_ALL_CPU) { | ||
| 292 | up_write(&all_cpu_access_lock); | ||
| 293 | } else { | ||
| 294 | mutex_unlock(&per_cpu(cpu_access_lock, cpu)); | ||
| 295 | up_read(&all_cpu_access_lock); | ||
| 296 | } | ||
| 297 | } | ||
| 298 | |||
| 299 | static inline void trace_access_lock_init(void) | ||
| 300 | { | ||
| 301 | int cpu; | ||
| 302 | |||
| 303 | for_each_possible_cpu(cpu) | ||
| 304 | mutex_init(&per_cpu(cpu_access_lock, cpu)); | ||
| 305 | } | ||
| 306 | |||
| 307 | #else | ||
| 308 | |||
| 309 | static DEFINE_MUTEX(access_lock); | ||
| 310 | |||
| 311 | static inline void trace_access_lock(int cpu) | ||
| 312 | { | ||
| 313 | (void)cpu; | ||
| 314 | mutex_lock(&access_lock); | ||
| 315 | } | ||
| 316 | |||
| 317 | static inline void trace_access_unlock(int cpu) | ||
| 318 | { | ||
| 319 | (void)cpu; | ||
| 320 | mutex_unlock(&access_lock); | ||
| 321 | } | ||
| 322 | |||
| 323 | static inline void trace_access_lock_init(void) | ||
| 324 | { | ||
| 325 | } | ||
| 326 | |||
| 327 | #endif | ||
| 328 | |||
| 252 | /* trace_wait is a waitqueue for tasks blocked on trace_poll */ | 329 | /* trace_wait is a waitqueue for tasks blocked on trace_poll */ |
| 253 | static DECLARE_WAIT_QUEUE_HEAD(trace_wait); | 330 | static DECLARE_WAIT_QUEUE_HEAD(trace_wait); |
| 254 | 331 | ||
| @@ -1580,12 +1657,6 @@ static void tracing_iter_reset(struct trace_iterator *iter, int cpu) | |||
| 1580 | } | 1657 | } |
| 1581 | 1658 | ||
| 1582 | /* | 1659 | /* |
| 1583 | * No necessary locking here. The worst thing which can | ||
| 1584 | * happen is loosing events consumed at the same time | ||
| 1585 | * by a trace_pipe reader. | ||
| 1586 | * Other than that, we don't risk to crash the ring buffer | ||
| 1587 | * because it serializes the readers. | ||
| 1588 | * | ||
| 1589 | * The current tracer is copied to avoid a global locking | 1660 | * The current tracer is copied to avoid a global locking |
| 1590 | * all around. | 1661 | * all around. |
| 1591 | */ | 1662 | */ |
| @@ -1640,12 +1711,16 @@ static void *s_start(struct seq_file *m, loff_t *pos) | |||
| 1640 | } | 1711 | } |
| 1641 | 1712 | ||
| 1642 | trace_event_read_lock(); | 1713 | trace_event_read_lock(); |
| 1714 | trace_access_lock(cpu_file); | ||
| 1643 | return p; | 1715 | return p; |
| 1644 | } | 1716 | } |
| 1645 | 1717 | ||
| 1646 | static void s_stop(struct seq_file *m, void *p) | 1718 | static void s_stop(struct seq_file *m, void *p) |
| 1647 | { | 1719 | { |
| 1720 | struct trace_iterator *iter = m->private; | ||
| 1721 | |||
| 1648 | atomic_dec(&trace_record_cmdline_disabled); | 1722 | atomic_dec(&trace_record_cmdline_disabled); |
| 1723 | trace_access_unlock(iter->cpu_file); | ||
| 1649 | trace_event_read_unlock(); | 1724 | trace_event_read_unlock(); |
| 1650 | } | 1725 | } |
| 1651 | 1726 | ||
| @@ -2836,22 +2911,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) | |||
| 2836 | 2911 | ||
| 2837 | mutex_lock(&trace_types_lock); | 2912 | mutex_lock(&trace_types_lock); |
| 2838 | 2913 | ||
| 2839 | /* We only allow one reader per cpu */ | ||
| 2840 | if (cpu_file == TRACE_PIPE_ALL_CPU) { | ||
| 2841 | if (!cpumask_empty(tracing_reader_cpumask)) { | ||
| 2842 | ret = -EBUSY; | ||
| 2843 | goto out; | ||
| 2844 | } | ||
| 2845 | cpumask_setall(tracing_reader_cpumask); | ||
| 2846 | } else { | ||
| 2847 | if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask)) | ||
| 2848 | cpumask_set_cpu(cpu_file, tracing_reader_cpumask); | ||
| 2849 | else { | ||
| 2850 | ret = -EBUSY; | ||
| 2851 | goto out; | ||
| 2852 | } | ||
| 2853 | } | ||
| 2854 | |||
| 2855 | /* create a buffer to store the information to pass to userspace */ | 2914 | /* create a buffer to store the information to pass to userspace */ |
| 2856 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); | 2915 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); |
| 2857 | if (!iter) { | 2916 | if (!iter) { |
| @@ -2907,12 +2966,6 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) | |||
| 2907 | 2966 | ||
| 2908 | mutex_lock(&trace_types_lock); | 2967 | mutex_lock(&trace_types_lock); |
| 2909 | 2968 | ||
| 2910 | if (iter->cpu_file == TRACE_PIPE_ALL_CPU) | ||
| 2911 | cpumask_clear(tracing_reader_cpumask); | ||
| 2912 | else | ||
| 2913 | cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask); | ||
| 2914 | |||
| 2915 | |||
| 2916 | if (iter->trace->pipe_close) | 2969 | if (iter->trace->pipe_close) |
| 2917 | iter->trace->pipe_close(iter); | 2970 | iter->trace->pipe_close(iter); |
| 2918 | 2971 | ||
| @@ -3074,6 +3127,7 @@ waitagain: | |||
| 3074 | iter->pos = -1; | 3127 | iter->pos = -1; |
| 3075 | 3128 | ||
| 3076 | trace_event_read_lock(); | 3129 | trace_event_read_lock(); |
| 3130 | trace_access_lock(iter->cpu_file); | ||
| 3077 | while (find_next_entry_inc(iter) != NULL) { | 3131 | while (find_next_entry_inc(iter) != NULL) { |
| 3078 | enum print_line_t ret; | 3132 | enum print_line_t ret; |
| 3079 | int len = iter->seq.len; | 3133 | int len = iter->seq.len; |
| @@ -3090,6 +3144,7 @@ waitagain: | |||
| 3090 | if (iter->seq.len >= cnt) | 3144 | if (iter->seq.len >= cnt) |
| 3091 | break; | 3145 | break; |
| 3092 | } | 3146 | } |
| 3147 | trace_access_unlock(iter->cpu_file); | ||
| 3093 | trace_event_read_unlock(); | 3148 | trace_event_read_unlock(); |
| 3094 | 3149 | ||
| 3095 | /* Now copy what we have to the user */ | 3150 | /* Now copy what we have to the user */ |
| @@ -3215,6 +3270,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
| 3215 | } | 3270 | } |
| 3216 | 3271 | ||
| 3217 | trace_event_read_lock(); | 3272 | trace_event_read_lock(); |
| 3273 | trace_access_lock(iter->cpu_file); | ||
| 3218 | 3274 | ||
| 3219 | /* Fill as many pages as possible. */ | 3275 | /* Fill as many pages as possible. */ |
| 3220 | for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { | 3276 | for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { |
| @@ -3238,6 +3294,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
| 3238 | trace_seq_init(&iter->seq); | 3294 | trace_seq_init(&iter->seq); |
| 3239 | } | 3295 | } |
| 3240 | 3296 | ||
| 3297 | trace_access_unlock(iter->cpu_file); | ||
| 3241 | trace_event_read_unlock(); | 3298 | trace_event_read_unlock(); |
| 3242 | mutex_unlock(&iter->mutex); | 3299 | mutex_unlock(&iter->mutex); |
| 3243 | 3300 | ||
| @@ -3539,10 +3596,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, | |||
| 3539 | 3596 | ||
| 3540 | info->read = 0; | 3597 | info->read = 0; |
| 3541 | 3598 | ||
| 3599 | trace_access_lock(info->cpu); | ||
| 3542 | ret = ring_buffer_read_page(info->tr->buffer, | 3600 | ret = ring_buffer_read_page(info->tr->buffer, |
| 3543 | &info->spare, | 3601 | &info->spare, |
| 3544 | count, | 3602 | count, |
| 3545 | info->cpu, 0); | 3603 | info->cpu, 0); |
| 3604 | trace_access_unlock(info->cpu); | ||
| 3546 | if (ret < 0) | 3605 | if (ret < 0) |
| 3547 | return 0; | 3606 | return 0; |
| 3548 | 3607 | ||
| @@ -3670,6 +3729,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
| 3670 | len &= PAGE_MASK; | 3729 | len &= PAGE_MASK; |
| 3671 | } | 3730 | } |
| 3672 | 3731 | ||
| 3732 | trace_access_lock(info->cpu); | ||
| 3673 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); | 3733 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); |
| 3674 | 3734 | ||
| 3675 | for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { | 3735 | for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { |
| @@ -3717,6 +3777,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
| 3717 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); | 3777 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); |
| 3718 | } | 3778 | } |
| 3719 | 3779 | ||
| 3780 | trace_access_unlock(info->cpu); | ||
| 3720 | spd.nr_pages = i; | 3781 | spd.nr_pages = i; |
| 3721 | 3782 | ||
| 3722 | /* did we read anything? */ | 3783 | /* did we read anything? */ |
| @@ -4153,6 +4214,8 @@ static __init int tracer_init_debugfs(void) | |||
| 4153 | struct dentry *d_tracer; | 4214 | struct dentry *d_tracer; |
| 4154 | int cpu; | 4215 | int cpu; |
| 4155 | 4216 | ||
| 4217 | trace_access_lock_init(); | ||
| 4218 | |||
| 4156 | d_tracer = tracing_init_dentry(); | 4219 | d_tracer = tracing_init_dentry(); |
| 4157 | 4220 | ||
| 4158 | trace_create_file("tracing_enabled", 0644, d_tracer, | 4221 | trace_create_file("tracing_enabled", 0644, d_tracer, |
| @@ -4387,9 +4450,6 @@ __init static int tracer_alloc_buffers(void) | |||
| 4387 | if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) | 4450 | if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) |
| 4388 | goto out_free_buffer_mask; | 4451 | goto out_free_buffer_mask; |
| 4389 | 4452 | ||
| 4390 | if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL)) | ||
| 4391 | goto out_free_tracing_cpumask; | ||
| 4392 | |||
| 4393 | /* To save memory, keep the ring buffer size to its minimum */ | 4453 | /* To save memory, keep the ring buffer size to its minimum */ |
| 4394 | if (ring_buffer_expanded) | 4454 | if (ring_buffer_expanded) |
| 4395 | ring_buf_size = trace_buf_size; | 4455 | ring_buf_size = trace_buf_size; |
| @@ -4447,8 +4507,6 @@ __init static int tracer_alloc_buffers(void) | |||
| 4447 | return 0; | 4507 | return 0; |
| 4448 | 4508 | ||
| 4449 | out_free_cpumask: | 4509 | out_free_cpumask: |
| 4450 | free_cpumask_var(tracing_reader_cpumask); | ||
| 4451 | out_free_tracing_cpumask: | ||
| 4452 | free_cpumask_var(tracing_cpumask); | 4510 | free_cpumask_var(tracing_cpumask); |
| 4453 | out_free_buffer_mask: | 4511 | out_free_buffer_mask: |
| 4454 | free_cpumask_var(tracing_buffer_mask); | 4512 | free_cpumask_var(tracing_buffer_mask); |
