diff options
Diffstat (limited to 'kernel/trace/trace.c')
-rw-r--r-- | kernel/trace/trace.c | 144 |
1 files changed, 103 insertions, 41 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index eac6875cb990..032c57ca6502 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/splice.h> | 32 | #include <linux/splice.h> |
33 | #include <linux/kdebug.h> | 33 | #include <linux/kdebug.h> |
34 | #include <linux/string.h> | 34 | #include <linux/string.h> |
35 | #include <linux/rwsem.h> | ||
35 | #include <linux/ctype.h> | 36 | #include <linux/ctype.h> |
36 | #include <linux/init.h> | 37 | #include <linux/init.h> |
37 | #include <linux/poll.h> | 38 | #include <linux/poll.h> |
@@ -102,9 +103,6 @@ static inline void ftrace_enable_cpu(void) | |||
102 | 103 | ||
103 | static cpumask_var_t __read_mostly tracing_buffer_mask; | 104 | static cpumask_var_t __read_mostly tracing_buffer_mask; |
104 | 105 | ||
105 | /* Define which cpu buffers are currently read in trace_pipe */ | ||
106 | static cpumask_var_t tracing_reader_cpumask; | ||
107 | |||
108 | #define for_each_tracing_cpu(cpu) \ | 106 | #define for_each_tracing_cpu(cpu) \ |
109 | for_each_cpu(cpu, tracing_buffer_mask) | 107 | for_each_cpu(cpu, tracing_buffer_mask) |
110 | 108 | ||
@@ -243,12 +241,91 @@ static struct tracer *current_trace __read_mostly; | |||
243 | 241 | ||
244 | /* | 242 | /* |
245 | * trace_types_lock is used to protect the trace_types list. | 243 | * trace_types_lock is used to protect the trace_types list. |
246 | * This lock is also used to keep user access serialized. | ||
247 | * Accesses from userspace will grab this lock while userspace | ||
248 | * activities happen inside the kernel. | ||
249 | */ | 244 | */ |
250 | static DEFINE_MUTEX(trace_types_lock); | 245 | static DEFINE_MUTEX(trace_types_lock); |
251 | 246 | ||
247 | /* | ||
248 | * serialize the access of the ring buffer | ||
249 | * | ||
250 | * ring buffer serializes readers, but it is low level protection. | ||
251 | * The validity of the events (which returns by ring_buffer_peek() ..etc) | ||
252 | * are not protected by ring buffer. | ||
253 | * | ||
254 | * The content of events may become garbage if we allow other process consumes | ||
255 | * these events concurrently: | ||
256 | * A) the page of the consumed events may become a normal page | ||
257 | * (not reader page) in ring buffer, and this page will be rewrited | ||
258 | * by events producer. | ||
259 | * B) The page of the consumed events may become a page for splice_read, | ||
260 | * and this page will be returned to system. | ||
261 | * | ||
262 | * These primitives allow multi process access to different cpu ring buffer | ||
263 | * concurrently. | ||
264 | * | ||
265 | * These primitives don't distinguish read-only and read-consume access. | ||
266 | * Multi read-only access are also serialized. | ||
267 | */ | ||
268 | |||
269 | #ifdef CONFIG_SMP | ||
270 | static DECLARE_RWSEM(all_cpu_access_lock); | ||
271 | static DEFINE_PER_CPU(struct mutex, cpu_access_lock); | ||
272 | |||
273 | static inline void trace_access_lock(int cpu) | ||
274 | { | ||
275 | if (cpu == TRACE_PIPE_ALL_CPU) { | ||
276 | /* gain it for accessing the whole ring buffer. */ | ||
277 | down_write(&all_cpu_access_lock); | ||
278 | } else { | ||
279 | /* gain it for accessing a cpu ring buffer. */ | ||
280 | |||
281 | /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */ | ||
282 | down_read(&all_cpu_access_lock); | ||
283 | |||
284 | /* Secondly block other access to this @cpu ring buffer. */ | ||
285 | mutex_lock(&per_cpu(cpu_access_lock, cpu)); | ||
286 | } | ||
287 | } | ||
288 | |||
289 | static inline void trace_access_unlock(int cpu) | ||
290 | { | ||
291 | if (cpu == TRACE_PIPE_ALL_CPU) { | ||
292 | up_write(&all_cpu_access_lock); | ||
293 | } else { | ||
294 | mutex_unlock(&per_cpu(cpu_access_lock, cpu)); | ||
295 | up_read(&all_cpu_access_lock); | ||
296 | } | ||
297 | } | ||
298 | |||
299 | static inline void trace_access_lock_init(void) | ||
300 | { | ||
301 | int cpu; | ||
302 | |||
303 | for_each_possible_cpu(cpu) | ||
304 | mutex_init(&per_cpu(cpu_access_lock, cpu)); | ||
305 | } | ||
306 | |||
307 | #else | ||
308 | |||
309 | static DEFINE_MUTEX(access_lock); | ||
310 | |||
311 | static inline void trace_access_lock(int cpu) | ||
312 | { | ||
313 | (void)cpu; | ||
314 | mutex_lock(&access_lock); | ||
315 | } | ||
316 | |||
317 | static inline void trace_access_unlock(int cpu) | ||
318 | { | ||
319 | (void)cpu; | ||
320 | mutex_unlock(&access_lock); | ||
321 | } | ||
322 | |||
323 | static inline void trace_access_lock_init(void) | ||
324 | { | ||
325 | } | ||
326 | |||
327 | #endif | ||
328 | |||
252 | /* trace_wait is a waitqueue for tasks blocked on trace_poll */ | 329 | /* trace_wait is a waitqueue for tasks blocked on trace_poll */ |
253 | static DECLARE_WAIT_QUEUE_HEAD(trace_wait); | 330 | static DECLARE_WAIT_QUEUE_HEAD(trace_wait); |
254 | 331 | ||
@@ -1320,8 +1397,10 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) | |||
1320 | entry->fmt = fmt; | 1397 | entry->fmt = fmt; |
1321 | 1398 | ||
1322 | memcpy(entry->buf, trace_buf, sizeof(u32) * len); | 1399 | memcpy(entry->buf, trace_buf, sizeof(u32) * len); |
1323 | if (!filter_check_discard(call, entry, buffer, event)) | 1400 | if (!filter_check_discard(call, entry, buffer, event)) { |
1324 | ring_buffer_unlock_commit(buffer, event); | 1401 | ring_buffer_unlock_commit(buffer, event); |
1402 | ftrace_trace_stack(buffer, flags, 6, pc); | ||
1403 | } | ||
1325 | 1404 | ||
1326 | out_unlock: | 1405 | out_unlock: |
1327 | arch_spin_unlock(&trace_buf_lock); | 1406 | arch_spin_unlock(&trace_buf_lock); |
@@ -1394,8 +1473,10 @@ int trace_array_vprintk(struct trace_array *tr, | |||
1394 | 1473 | ||
1395 | memcpy(&entry->buf, trace_buf, len); | 1474 | memcpy(&entry->buf, trace_buf, len); |
1396 | entry->buf[len] = '\0'; | 1475 | entry->buf[len] = '\0'; |
1397 | if (!filter_check_discard(call, entry, buffer, event)) | 1476 | if (!filter_check_discard(call, entry, buffer, event)) { |
1398 | ring_buffer_unlock_commit(buffer, event); | 1477 | ring_buffer_unlock_commit(buffer, event); |
1478 | ftrace_trace_stack(buffer, irq_flags, 6, pc); | ||
1479 | } | ||
1399 | 1480 | ||
1400 | out_unlock: | 1481 | out_unlock: |
1401 | arch_spin_unlock(&trace_buf_lock); | 1482 | arch_spin_unlock(&trace_buf_lock); |
@@ -1585,12 +1666,6 @@ static void tracing_iter_reset(struct trace_iterator *iter, int cpu) | |||
1585 | } | 1666 | } |
1586 | 1667 | ||
1587 | /* | 1668 | /* |
1588 | * No necessary locking here. The worst thing which can | ||
1589 | * happen is loosing events consumed at the same time | ||
1590 | * by a trace_pipe reader. | ||
1591 | * Other than that, we don't risk to crash the ring buffer | ||
1592 | * because it serializes the readers. | ||
1593 | * | ||
1594 | * The current tracer is copied to avoid a global locking | 1669 | * The current tracer is copied to avoid a global locking |
1595 | * all around. | 1670 | * all around. |
1596 | */ | 1671 | */ |
@@ -1645,12 +1720,16 @@ static void *s_start(struct seq_file *m, loff_t *pos) | |||
1645 | } | 1720 | } |
1646 | 1721 | ||
1647 | trace_event_read_lock(); | 1722 | trace_event_read_lock(); |
1723 | trace_access_lock(cpu_file); | ||
1648 | return p; | 1724 | return p; |
1649 | } | 1725 | } |
1650 | 1726 | ||
1651 | static void s_stop(struct seq_file *m, void *p) | 1727 | static void s_stop(struct seq_file *m, void *p) |
1652 | { | 1728 | { |
1729 | struct trace_iterator *iter = m->private; | ||
1730 | |||
1653 | atomic_dec(&trace_record_cmdline_disabled); | 1731 | atomic_dec(&trace_record_cmdline_disabled); |
1732 | trace_access_unlock(iter->cpu_file); | ||
1654 | trace_event_read_unlock(); | 1733 | trace_event_read_unlock(); |
1655 | } | 1734 | } |
1656 | 1735 | ||
@@ -2841,22 +2920,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) | |||
2841 | 2920 | ||
2842 | mutex_lock(&trace_types_lock); | 2921 | mutex_lock(&trace_types_lock); |
2843 | 2922 | ||
2844 | /* We only allow one reader per cpu */ | ||
2845 | if (cpu_file == TRACE_PIPE_ALL_CPU) { | ||
2846 | if (!cpumask_empty(tracing_reader_cpumask)) { | ||
2847 | ret = -EBUSY; | ||
2848 | goto out; | ||
2849 | } | ||
2850 | cpumask_setall(tracing_reader_cpumask); | ||
2851 | } else { | ||
2852 | if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask)) | ||
2853 | cpumask_set_cpu(cpu_file, tracing_reader_cpumask); | ||
2854 | else { | ||
2855 | ret = -EBUSY; | ||
2856 | goto out; | ||
2857 | } | ||
2858 | } | ||
2859 | |||
2860 | /* create a buffer to store the information to pass to userspace */ | 2923 | /* create a buffer to store the information to pass to userspace */ |
2861 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); | 2924 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); |
2862 | if (!iter) { | 2925 | if (!iter) { |
@@ -2912,12 +2975,6 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) | |||
2912 | 2975 | ||
2913 | mutex_lock(&trace_types_lock); | 2976 | mutex_lock(&trace_types_lock); |
2914 | 2977 | ||
2915 | if (iter->cpu_file == TRACE_PIPE_ALL_CPU) | ||
2916 | cpumask_clear(tracing_reader_cpumask); | ||
2917 | else | ||
2918 | cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask); | ||
2919 | |||
2920 | |||
2921 | if (iter->trace->pipe_close) | 2978 | if (iter->trace->pipe_close) |
2922 | iter->trace->pipe_close(iter); | 2979 | iter->trace->pipe_close(iter); |
2923 | 2980 | ||
@@ -3079,6 +3136,7 @@ waitagain: | |||
3079 | iter->pos = -1; | 3136 | iter->pos = -1; |
3080 | 3137 | ||
3081 | trace_event_read_lock(); | 3138 | trace_event_read_lock(); |
3139 | trace_access_lock(iter->cpu_file); | ||
3082 | while (find_next_entry_inc(iter) != NULL) { | 3140 | while (find_next_entry_inc(iter) != NULL) { |
3083 | enum print_line_t ret; | 3141 | enum print_line_t ret; |
3084 | int len = iter->seq.len; | 3142 | int len = iter->seq.len; |
@@ -3095,6 +3153,7 @@ waitagain: | |||
3095 | if (iter->seq.len >= cnt) | 3153 | if (iter->seq.len >= cnt) |
3096 | break; | 3154 | break; |
3097 | } | 3155 | } |
3156 | trace_access_unlock(iter->cpu_file); | ||
3098 | trace_event_read_unlock(); | 3157 | trace_event_read_unlock(); |
3099 | 3158 | ||
3100 | /* Now copy what we have to the user */ | 3159 | /* Now copy what we have to the user */ |
@@ -3220,6 +3279,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
3220 | } | 3279 | } |
3221 | 3280 | ||
3222 | trace_event_read_lock(); | 3281 | trace_event_read_lock(); |
3282 | trace_access_lock(iter->cpu_file); | ||
3223 | 3283 | ||
3224 | /* Fill as many pages as possible. */ | 3284 | /* Fill as many pages as possible. */ |
3225 | for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { | 3285 | for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { |
@@ -3243,6 +3303,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
3243 | trace_seq_init(&iter->seq); | 3303 | trace_seq_init(&iter->seq); |
3244 | } | 3304 | } |
3245 | 3305 | ||
3306 | trace_access_unlock(iter->cpu_file); | ||
3246 | trace_event_read_unlock(); | 3307 | trace_event_read_unlock(); |
3247 | mutex_unlock(&iter->mutex); | 3308 | mutex_unlock(&iter->mutex); |
3248 | 3309 | ||
@@ -3544,10 +3605,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, | |||
3544 | 3605 | ||
3545 | info->read = 0; | 3606 | info->read = 0; |
3546 | 3607 | ||
3608 | trace_access_lock(info->cpu); | ||
3547 | ret = ring_buffer_read_page(info->tr->buffer, | 3609 | ret = ring_buffer_read_page(info->tr->buffer, |
3548 | &info->spare, | 3610 | &info->spare, |
3549 | count, | 3611 | count, |
3550 | info->cpu, 0); | 3612 | info->cpu, 0); |
3613 | trace_access_unlock(info->cpu); | ||
3551 | if (ret < 0) | 3614 | if (ret < 0) |
3552 | return 0; | 3615 | return 0; |
3553 | 3616 | ||
@@ -3675,6 +3738,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
3675 | len &= PAGE_MASK; | 3738 | len &= PAGE_MASK; |
3676 | } | 3739 | } |
3677 | 3740 | ||
3741 | trace_access_lock(info->cpu); | ||
3678 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); | 3742 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); |
3679 | 3743 | ||
3680 | for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { | 3744 | for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { |
@@ -3722,6 +3786,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
3722 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); | 3786 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); |
3723 | } | 3787 | } |
3724 | 3788 | ||
3789 | trace_access_unlock(info->cpu); | ||
3725 | spd.nr_pages = i; | 3790 | spd.nr_pages = i; |
3726 | 3791 | ||
3727 | /* did we read anything? */ | 3792 | /* did we read anything? */ |
@@ -4158,6 +4223,8 @@ static __init int tracer_init_debugfs(void) | |||
4158 | struct dentry *d_tracer; | 4223 | struct dentry *d_tracer; |
4159 | int cpu; | 4224 | int cpu; |
4160 | 4225 | ||
4226 | trace_access_lock_init(); | ||
4227 | |||
4161 | d_tracer = tracing_init_dentry(); | 4228 | d_tracer = tracing_init_dentry(); |
4162 | 4229 | ||
4163 | trace_create_file("tracing_enabled", 0644, d_tracer, | 4230 | trace_create_file("tracing_enabled", 0644, d_tracer, |
@@ -4392,9 +4459,6 @@ __init static int tracer_alloc_buffers(void) | |||
4392 | if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) | 4459 | if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) |
4393 | goto out_free_buffer_mask; | 4460 | goto out_free_buffer_mask; |
4394 | 4461 | ||
4395 | if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL)) | ||
4396 | goto out_free_tracing_cpumask; | ||
4397 | |||
4398 | /* To save memory, keep the ring buffer size to its minimum */ | 4462 | /* To save memory, keep the ring buffer size to its minimum */ |
4399 | if (ring_buffer_expanded) | 4463 | if (ring_buffer_expanded) |
4400 | ring_buf_size = trace_buf_size; | 4464 | ring_buf_size = trace_buf_size; |
@@ -4452,8 +4516,6 @@ __init static int tracer_alloc_buffers(void) | |||
4452 | return 0; | 4516 | return 0; |
4453 | 4517 | ||
4454 | out_free_cpumask: | 4518 | out_free_cpumask: |
4455 | free_cpumask_var(tracing_reader_cpumask); | ||
4456 | out_free_tracing_cpumask: | ||
4457 | free_cpumask_var(tracing_cpumask); | 4519 | free_cpumask_var(tracing_cpumask); |
4458 | out_free_buffer_mask: | 4520 | out_free_buffer_mask: |
4459 | free_cpumask_var(tracing_buffer_mask); | 4521 | free_cpumask_var(tracing_buffer_mask); |