aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-11-23 02:55:47 -0500
committerIngo Molnar <mingo@elte.hu>2008-11-23 02:55:47 -0500
commitca9eed76133c00e7f4b1eeb4c1a6cb800cd2654c (patch)
tree5f011f4bd00c921e41605b1915c227aeaf411a0d /kernel/trace
parent8652cb4b0d87accbe78725fd2a13be2787059649 (diff)
parent13d428afc007fcfcd6deeb215618f54cf9c0cae6 (diff)
Merge commit 'v2.6.28-rc6' into x86/debug
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig4
-rw-r--r--kernel/trace/ftrace.c155
-rw-r--r--kernel/trace/ring_buffer.c174
-rw-r--r--kernel/trace/trace.c68
-rw-r--r--kernel/trace/trace.h20
5 files changed, 311 insertions, 110 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e0cea282e0c5..33dbefd471e8 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -8,7 +8,6 @@ config NOP_TRACER
8 8
9config HAVE_FUNCTION_TRACER 9config HAVE_FUNCTION_TRACER
10 bool 10 bool
11 select NOP_TRACER
12 11
13config HAVE_DYNAMIC_FTRACE 12config HAVE_DYNAMIC_FTRACE
14 bool 13 bool
@@ -26,8 +25,9 @@ config TRACING
26 bool 25 bool
27 select DEBUG_FS 26 select DEBUG_FS
28 select RING_BUFFER 27 select RING_BUFFER
29 select STACKTRACE 28 select STACKTRACE if STACKTRACE_SUPPORT
30 select TRACEPOINTS 29 select TRACEPOINTS
30 select NOP_TRACER
31 31
32menu "Tracers" 32menu "Tracers"
33 33
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7618c528756b..78db083390f0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -185,7 +185,6 @@ enum {
185}; 185};
186 186
187static int ftrace_filtered; 187static int ftrace_filtered;
188static int tracing_on;
189 188
190static LIST_HEAD(ftrace_new_addrs); 189static LIST_HEAD(ftrace_new_addrs);
191 190
@@ -327,96 +326,89 @@ ftrace_record_ip(unsigned long ip)
327 326
328static int 327static int
329__ftrace_replace_code(struct dyn_ftrace *rec, 328__ftrace_replace_code(struct dyn_ftrace *rec,
330 unsigned char *old, unsigned char *new, int enable) 329 unsigned char *nop, int enable)
331{ 330{
332 unsigned long ip, fl; 331 unsigned long ip, fl;
332 unsigned char *call, *old, *new;
333 333
334 ip = rec->ip; 334 ip = rec->ip;
335 335
336 if (ftrace_filtered && enable) { 336 /*
337 * If this record is not to be traced and
338 * it is not enabled then do nothing.
339 *
340 * If this record is not to be traced and
341 * it is enabled then disabled it.
342 *
343 */
344 if (rec->flags & FTRACE_FL_NOTRACE) {
345 if (rec->flags & FTRACE_FL_ENABLED)
346 rec->flags &= ~FTRACE_FL_ENABLED;
347 else
348 return 0;
349
350 } else if (ftrace_filtered && enable) {
337 /* 351 /*
338 * If filtering is on: 352 * Filtering is on:
339 *
340 * If this record is set to be filtered and
341 * is enabled then do nothing.
342 *
343 * If this record is set to be filtered and
344 * it is not enabled, enable it.
345 *
346 * If this record is not set to be filtered
347 * and it is not enabled do nothing.
348 *
349 * If this record is set not to trace then
350 * do nothing.
351 *
352 * If this record is set not to trace and
353 * it is enabled then disable it.
354 *
355 * If this record is not set to be filtered and
356 * it is enabled, disable it.
357 */ 353 */
358 354
359 fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE | 355 fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED);
360 FTRACE_FL_ENABLED);
361 356
362 if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) || 357 /* Record is filtered and enabled, do nothing */
363 (fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) || 358 if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
364 !fl || (fl == FTRACE_FL_NOTRACE))
365 return 0; 359 return 0;
366 360
367 /* 361 /* Record is not filtered and is not enabled do nothing */
368 * If it is enabled disable it, 362 if (!fl)
369 * otherwise enable it! 363 return 0;
370 */ 364
371 if (fl & FTRACE_FL_ENABLED) { 365 /* Record is not filtered but enabled, disable it */
372 /* swap new and old */ 366 if (fl == FTRACE_FL_ENABLED)
373 new = old;
374 old = ftrace_call_replace(ip, FTRACE_ADDR);
375 rec->flags &= ~FTRACE_FL_ENABLED; 367 rec->flags &= ~FTRACE_FL_ENABLED;
376 } else { 368 else
377 new = ftrace_call_replace(ip, FTRACE_ADDR); 369 /* Otherwise record is filtered but not enabled, enable it */
378 rec->flags |= FTRACE_FL_ENABLED; 370 rec->flags |= FTRACE_FL_ENABLED;
379 }
380 } else { 371 } else {
372 /* Disable or not filtered */
381 373
382 if (enable) { 374 if (enable) {
383 /* 375 /* if record is enabled, do nothing */
384 * If this record is set not to trace and is
385 * not enabled, do nothing.
386 */
387 fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED);
388 if (fl == FTRACE_FL_NOTRACE)
389 return 0;
390
391 new = ftrace_call_replace(ip, FTRACE_ADDR);
392 } else
393 old = ftrace_call_replace(ip, FTRACE_ADDR);
394
395 if (enable) {
396 if (rec->flags & FTRACE_FL_ENABLED) 376 if (rec->flags & FTRACE_FL_ENABLED)
397 return 0; 377 return 0;
378
398 rec->flags |= FTRACE_FL_ENABLED; 379 rec->flags |= FTRACE_FL_ENABLED;
380
399 } else { 381 } else {
382
383 /* if record is not enabled do nothing */
400 if (!(rec->flags & FTRACE_FL_ENABLED)) 384 if (!(rec->flags & FTRACE_FL_ENABLED))
401 return 0; 385 return 0;
386
402 rec->flags &= ~FTRACE_FL_ENABLED; 387 rec->flags &= ~FTRACE_FL_ENABLED;
403 } 388 }
404 } 389 }
405 390
391 call = ftrace_call_replace(ip, FTRACE_ADDR);
392
393 if (rec->flags & FTRACE_FL_ENABLED) {
394 old = nop;
395 new = call;
396 } else {
397 old = call;
398 new = nop;
399 }
400
406 return ftrace_modify_code(ip, old, new); 401 return ftrace_modify_code(ip, old, new);
407} 402}
408 403
409static void ftrace_replace_code(int enable) 404static void ftrace_replace_code(int enable)
410{ 405{
411 int i, failed; 406 int i, failed;
412 unsigned char *new = NULL, *old = NULL; 407 unsigned char *nop = NULL;
413 struct dyn_ftrace *rec; 408 struct dyn_ftrace *rec;
414 struct ftrace_page *pg; 409 struct ftrace_page *pg;
415 410
416 if (enable) 411 nop = ftrace_nop_replace();
417 old = ftrace_nop_replace();
418 else
419 new = ftrace_nop_replace();
420 412
421 for (pg = ftrace_pages_start; pg; pg = pg->next) { 413 for (pg = ftrace_pages_start; pg; pg = pg->next) {
422 for (i = 0; i < pg->index; i++) { 414 for (i = 0; i < pg->index; i++) {
@@ -434,7 +426,7 @@ static void ftrace_replace_code(int enable)
434 unfreeze_record(rec); 426 unfreeze_record(rec);
435 } 427 }
436 428
437 failed = __ftrace_replace_code(rec, old, new, enable); 429 failed = __ftrace_replace_code(rec, nop, enable);
438 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { 430 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
439 rec->flags |= FTRACE_FL_FAILED; 431 rec->flags |= FTRACE_FL_FAILED;
440 if ((system_state == SYSTEM_BOOTING) || 432 if ((system_state == SYSTEM_BOOTING) ||
@@ -506,13 +498,10 @@ static int __ftrace_modify_code(void *data)
506{ 498{
507 int *command = data; 499 int *command = data;
508 500
509 if (*command & FTRACE_ENABLE_CALLS) { 501 if (*command & FTRACE_ENABLE_CALLS)
510 ftrace_replace_code(1); 502 ftrace_replace_code(1);
511 tracing_on = 1; 503 else if (*command & FTRACE_DISABLE_CALLS)
512 } else if (*command & FTRACE_DISABLE_CALLS) {
513 ftrace_replace_code(0); 504 ftrace_replace_code(0);
514 tracing_on = 0;
515 }
516 505
517 if (*command & FTRACE_UPDATE_TRACE_FUNC) 506 if (*command & FTRACE_UPDATE_TRACE_FUNC)
518 ftrace_update_ftrace_func(ftrace_trace_function); 507 ftrace_update_ftrace_func(ftrace_trace_function);
@@ -538,8 +527,7 @@ static void ftrace_startup(void)
538 527
539 mutex_lock(&ftrace_start_lock); 528 mutex_lock(&ftrace_start_lock);
540 ftrace_start++; 529 ftrace_start++;
541 if (ftrace_start == 1) 530 command |= FTRACE_ENABLE_CALLS;
542 command |= FTRACE_ENABLE_CALLS;
543 531
544 if (saved_ftrace_func != ftrace_trace_function) { 532 if (saved_ftrace_func != ftrace_trace_function) {
545 saved_ftrace_func = ftrace_trace_function; 533 saved_ftrace_func = ftrace_trace_function;
@@ -677,7 +665,7 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
677 665
678 cnt = num_to_init / ENTRIES_PER_PAGE; 666 cnt = num_to_init / ENTRIES_PER_PAGE;
679 pr_info("ftrace: allocating %ld entries in %d pages\n", 667 pr_info("ftrace: allocating %ld entries in %d pages\n",
680 num_to_init, cnt); 668 num_to_init, cnt + 1);
681 669
682 for (i = 0; i < cnt; i++) { 670 for (i = 0; i < cnt; i++) {
683 pg->next = (void *)get_zeroed_page(GFP_KERNEL); 671 pg->next = (void *)get_zeroed_page(GFP_KERNEL);
@@ -738,6 +726,9 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
738 ((iter->flags & FTRACE_ITER_FAILURES) && 726 ((iter->flags & FTRACE_ITER_FAILURES) &&
739 !(rec->flags & FTRACE_FL_FAILED)) || 727 !(rec->flags & FTRACE_FL_FAILED)) ||
740 728
729 ((iter->flags & FTRACE_ITER_FILTER) &&
730 !(rec->flags & FTRACE_FL_FILTER)) ||
731
741 ((iter->flags & FTRACE_ITER_NOTRACE) && 732 ((iter->flags & FTRACE_ITER_NOTRACE) &&
742 !(rec->flags & FTRACE_FL_NOTRACE))) { 733 !(rec->flags & FTRACE_FL_NOTRACE))) {
743 rec = NULL; 734 rec = NULL;
@@ -757,13 +748,11 @@ static void *t_start(struct seq_file *m, loff_t *pos)
757 void *p = NULL; 748 void *p = NULL;
758 loff_t l = -1; 749 loff_t l = -1;
759 750
760 if (*pos != iter->pos) { 751 if (*pos > iter->pos)
761 for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l)) 752 *pos = iter->pos;
762 ; 753
763 } else { 754 l = *pos;
764 l = *pos; 755 p = t_next(m, p, &l);
765 p = t_next(m, p, &l);
766 }
767 756
768 return p; 757 return p;
769} 758}
@@ -774,15 +763,21 @@ static void t_stop(struct seq_file *m, void *p)
774 763
775static int t_show(struct seq_file *m, void *v) 764static int t_show(struct seq_file *m, void *v)
776{ 765{
766 struct ftrace_iterator *iter = m->private;
777 struct dyn_ftrace *rec = v; 767 struct dyn_ftrace *rec = v;
778 char str[KSYM_SYMBOL_LEN]; 768 char str[KSYM_SYMBOL_LEN];
769 int ret = 0;
779 770
780 if (!rec) 771 if (!rec)
781 return 0; 772 return 0;
782 773
783 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 774 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
784 775
785 seq_printf(m, "%s\n", str); 776 ret = seq_printf(m, "%s\n", str);
777 if (ret < 0) {
778 iter->pos--;
779 iter->idx--;
780 }
786 781
787 return 0; 782 return 0;
788} 783}
@@ -808,7 +803,7 @@ ftrace_avail_open(struct inode *inode, struct file *file)
808 return -ENOMEM; 803 return -ENOMEM;
809 804
810 iter->pg = ftrace_pages_start; 805 iter->pg = ftrace_pages_start;
811 iter->pos = -1; 806 iter->pos = 0;
812 807
813 ret = seq_open(file, &show_ftrace_seq_ops); 808 ret = seq_open(file, &show_ftrace_seq_ops);
814 if (!ret) { 809 if (!ret) {
@@ -895,7 +890,7 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
895 890
896 if (file->f_mode & FMODE_READ) { 891 if (file->f_mode & FMODE_READ) {
897 iter->pg = ftrace_pages_start; 892 iter->pg = ftrace_pages_start;
898 iter->pos = -1; 893 iter->pos = 0;
899 iter->flags = enable ? FTRACE_ITER_FILTER : 894 iter->flags = enable ? FTRACE_ITER_FILTER :
900 FTRACE_ITER_NOTRACE; 895 FTRACE_ITER_NOTRACE;
901 896
@@ -1186,7 +1181,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1186 1181
1187 mutex_lock(&ftrace_sysctl_lock); 1182 mutex_lock(&ftrace_sysctl_lock);
1188 mutex_lock(&ftrace_start_lock); 1183 mutex_lock(&ftrace_start_lock);
1189 if (iter->filtered && ftrace_start && ftrace_enabled) 1184 if (ftrace_start && ftrace_enabled)
1190 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1185 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1191 mutex_unlock(&ftrace_start_lock); 1186 mutex_unlock(&ftrace_start_lock);
1192 mutex_unlock(&ftrace_sysctl_lock); 1187 mutex_unlock(&ftrace_sysctl_lock);
@@ -1339,6 +1334,14 @@ void __init ftrace_init(void)
1339} 1334}
1340 1335
1341#else 1336#else
1337
1338static int __init ftrace_nodyn_init(void)
1339{
1340 ftrace_enabled = 1;
1341 return 0;
1342}
1343device_initcall(ftrace_nodyn_init);
1344
1342# define ftrace_startup() do { } while (0) 1345# define ftrace_startup() do { } while (0)
1343# define ftrace_shutdown() do { } while (0) 1346# define ftrace_shutdown() do { } while (0)
1344# define ftrace_startup_sysctl() do { } while (0) 1347# define ftrace_startup_sysctl() do { } while (0)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index cedf4e268285..f780e9552f91 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -16,14 +16,49 @@
16#include <linux/list.h> 16#include <linux/list.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18 18
19#include "trace.h"
20
21/* Global flag to disable all recording to ring buffers */
22static int ring_buffers_off __read_mostly;
23
24/**
25 * tracing_on - enable all tracing buffers
26 *
27 * This function enables all tracing buffers that may have been
28 * disabled with tracing_off.
29 */
30void tracing_on(void)
31{
32 ring_buffers_off = 0;
33}
34
35/**
36 * tracing_off - turn off all tracing buffers
37 *
38 * This function stops all tracing buffers from recording data.
39 * It does not disable any overhead the tracers themselves may
40 * be causing. This function simply causes all recording to
41 * the ring buffers to fail.
42 */
43void tracing_off(void)
44{
45 ring_buffers_off = 1;
46}
47
19/* Up this if you want to test the TIME_EXTENTS and normalization */ 48/* Up this if you want to test the TIME_EXTENTS and normalization */
20#define DEBUG_SHIFT 0 49#define DEBUG_SHIFT 0
21 50
22/* FIXME!!! */ 51/* FIXME!!! */
23u64 ring_buffer_time_stamp(int cpu) 52u64 ring_buffer_time_stamp(int cpu)
24{ 53{
54 u64 time;
55
56 preempt_disable_notrace();
25 /* shift to debug/test normalization and TIME_EXTENTS */ 57 /* shift to debug/test normalization and TIME_EXTENTS */
26 return sched_clock() << DEBUG_SHIFT; 58 time = sched_clock() << DEBUG_SHIFT;
59 preempt_enable_notrace();
60
61 return time;
27} 62}
28 63
29void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) 64void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
@@ -503,6 +538,12 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
503 LIST_HEAD(pages); 538 LIST_HEAD(pages);
504 int i, cpu; 539 int i, cpu;
505 540
541 /*
542 * Always succeed at resizing a non-existent buffer:
543 */
544 if (!buffer)
545 return size;
546
506 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 547 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
507 size *= BUF_PAGE_SIZE; 548 size *= BUF_PAGE_SIZE;
508 buffer_size = buffer->pages * BUF_PAGE_SIZE; 549 buffer_size = buffer->pages * BUF_PAGE_SIZE;
@@ -576,6 +617,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
576 list_del_init(&page->list); 617 list_del_init(&page->list);
577 free_buffer_page(page); 618 free_buffer_page(page);
578 } 619 }
620 mutex_unlock(&buffer->mutex);
579 return -ENOMEM; 621 return -ENOMEM;
580} 622}
581 623
@@ -1022,8 +1064,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1022 struct ring_buffer_event *event; 1064 struct ring_buffer_event *event;
1023 u64 ts, delta; 1065 u64 ts, delta;
1024 int commit = 0; 1066 int commit = 0;
1067 int nr_loops = 0;
1025 1068
1026 again: 1069 again:
1070 /*
1071 * We allow for interrupts to reenter here and do a trace.
1072 * If one does, it will cause this original code to loop
1073 * back here. Even with heavy interrupts happening, this
1074 * should only happen a few times in a row. If this happens
1075 * 1000 times in a row, there must be either an interrupt
1076 * storm or we have something buggy.
1077 * Bail!
1078 */
1079 if (unlikely(++nr_loops > 1000)) {
1080 RB_WARN_ON(cpu_buffer, 1);
1081 return NULL;
1082 }
1083
1027 ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1084 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1028 1085
1029 /* 1086 /*
@@ -1045,7 +1102,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1045 1102
1046 /* Did the write stamp get updated already? */ 1103 /* Did the write stamp get updated already? */
1047 if (unlikely(ts < cpu_buffer->write_stamp)) 1104 if (unlikely(ts < cpu_buffer->write_stamp))
1048 goto again; 1105 delta = 0;
1049 1106
1050 if (test_time_stamp(delta)) { 1107 if (test_time_stamp(delta)) {
1051 1108
@@ -1118,6 +1175,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1118 struct ring_buffer_event *event; 1175 struct ring_buffer_event *event;
1119 int cpu, resched; 1176 int cpu, resched;
1120 1177
1178 if (ring_buffers_off)
1179 return NULL;
1180
1121 if (atomic_read(&buffer->record_disabled)) 1181 if (atomic_read(&buffer->record_disabled))
1122 return NULL; 1182 return NULL;
1123 1183
@@ -1234,6 +1294,9 @@ int ring_buffer_write(struct ring_buffer *buffer,
1234 int ret = -EBUSY; 1294 int ret = -EBUSY;
1235 int cpu, resched; 1295 int cpu, resched;
1236 1296
1297 if (ring_buffers_off)
1298 return -EBUSY;
1299
1237 if (atomic_read(&buffer->record_disabled)) 1300 if (atomic_read(&buffer->record_disabled))
1238 return -EBUSY; 1301 return -EBUSY;
1239 1302
@@ -1532,10 +1595,23 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1532{ 1595{
1533 struct buffer_page *reader = NULL; 1596 struct buffer_page *reader = NULL;
1534 unsigned long flags; 1597 unsigned long flags;
1598 int nr_loops = 0;
1535 1599
1536 spin_lock_irqsave(&cpu_buffer->lock, flags); 1600 spin_lock_irqsave(&cpu_buffer->lock, flags);
1537 1601
1538 again: 1602 again:
1603 /*
1604 * This should normally only loop twice. But because the
1605 * start of the reader inserts an empty page, it causes
1606 * a case where we will loop three times. There should be no
1607 * reason to loop four times (that I know of).
1608 */
1609 if (unlikely(++nr_loops > 3)) {
1610 RB_WARN_ON(cpu_buffer, 1);
1611 reader = NULL;
1612 goto out;
1613 }
1614
1539 reader = cpu_buffer->reader_page; 1615 reader = cpu_buffer->reader_page;
1540 1616
1541 /* If there's more to read, return this page */ 1617 /* If there's more to read, return this page */
@@ -1665,6 +1741,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1665 struct ring_buffer_per_cpu *cpu_buffer; 1741 struct ring_buffer_per_cpu *cpu_buffer;
1666 struct ring_buffer_event *event; 1742 struct ring_buffer_event *event;
1667 struct buffer_page *reader; 1743 struct buffer_page *reader;
1744 int nr_loops = 0;
1668 1745
1669 if (!cpu_isset(cpu, buffer->cpumask)) 1746 if (!cpu_isset(cpu, buffer->cpumask))
1670 return NULL; 1747 return NULL;
@@ -1672,6 +1749,19 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1672 cpu_buffer = buffer->buffers[cpu]; 1749 cpu_buffer = buffer->buffers[cpu];
1673 1750
1674 again: 1751 again:
1752 /*
1753 * We repeat when a timestamp is encountered. It is possible
1754 * to get multiple timestamps from an interrupt entering just
1755 * as one timestamp is about to be written. The max times
1756 * that this can happen is the number of nested interrupts we
1757 * can have. Nesting 10 deep of interrupts is clearly
1758 * an anomaly.
1759 */
1760 if (unlikely(++nr_loops > 10)) {
1761 RB_WARN_ON(cpu_buffer, 1);
1762 return NULL;
1763 }
1764
1675 reader = rb_get_reader_page(cpu_buffer); 1765 reader = rb_get_reader_page(cpu_buffer);
1676 if (!reader) 1766 if (!reader)
1677 return NULL; 1767 return NULL;
@@ -1722,6 +1812,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1722 struct ring_buffer *buffer; 1812 struct ring_buffer *buffer;
1723 struct ring_buffer_per_cpu *cpu_buffer; 1813 struct ring_buffer_per_cpu *cpu_buffer;
1724 struct ring_buffer_event *event; 1814 struct ring_buffer_event *event;
1815 int nr_loops = 0;
1725 1816
1726 if (ring_buffer_iter_empty(iter)) 1817 if (ring_buffer_iter_empty(iter))
1727 return NULL; 1818 return NULL;
@@ -1730,6 +1821,19 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1730 buffer = cpu_buffer->buffer; 1821 buffer = cpu_buffer->buffer;
1731 1822
1732 again: 1823 again:
1824 /*
1825 * We repeat when a timestamp is encountered. It is possible
1826 * to get multiple timestamps from an interrupt entering just
1827 * as one timestamp is about to be written. The max times
1828 * that this can happen is the number of nested interrupts we
1829 * can have. Nesting 10 deep of interrupts is clearly
1830 * an anomaly.
1831 */
1832 if (unlikely(++nr_loops > 10)) {
1833 RB_WARN_ON(cpu_buffer, 1);
1834 return NULL;
1835 }
1836
1733 if (rb_per_cpu_empty(cpu_buffer)) 1837 if (rb_per_cpu_empty(cpu_buffer))
1734 return NULL; 1838 return NULL;
1735 1839
@@ -2014,3 +2118,69 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2014 return 0; 2118 return 0;
2015} 2119}
2016 2120
2121static ssize_t
2122rb_simple_read(struct file *filp, char __user *ubuf,
2123 size_t cnt, loff_t *ppos)
2124{
2125 int *p = filp->private_data;
2126 char buf[64];
2127 int r;
2128
2129 /* !ring_buffers_off == tracing_on */
2130 r = sprintf(buf, "%d\n", !*p);
2131
2132 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2133}
2134
2135static ssize_t
2136rb_simple_write(struct file *filp, const char __user *ubuf,
2137 size_t cnt, loff_t *ppos)
2138{
2139 int *p = filp->private_data;
2140 char buf[64];
2141 long val;
2142 int ret;
2143
2144 if (cnt >= sizeof(buf))
2145 return -EINVAL;
2146
2147 if (copy_from_user(&buf, ubuf, cnt))
2148 return -EFAULT;
2149
2150 buf[cnt] = 0;
2151
2152 ret = strict_strtoul(buf, 10, &val);
2153 if (ret < 0)
2154 return ret;
2155
2156 /* !ring_buffers_off == tracing_on */
2157 *p = !val;
2158
2159 (*ppos)++;
2160
2161 return cnt;
2162}
2163
2164static struct file_operations rb_simple_fops = {
2165 .open = tracing_open_generic,
2166 .read = rb_simple_read,
2167 .write = rb_simple_write,
2168};
2169
2170
2171static __init int rb_init_debugfs(void)
2172{
2173 struct dentry *d_tracer;
2174 struct dentry *entry;
2175
2176 d_tracer = tracing_init_dentry();
2177
2178 entry = debugfs_create_file("tracing_on", 0644, d_tracer,
2179 &ring_buffers_off, &rb_simple_fops);
2180 if (!entry)
2181 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2182
2183 return 0;
2184}
2185
2186fs_initcall(rb_init_debugfs);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a610ca771558..d86e3252f300 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -656,7 +656,11 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
656 entry->preempt_count = pc & 0xff; 656 entry->preempt_count = pc & 0xff;
657 entry->pid = (tsk) ? tsk->pid : 0; 657 entry->pid = (tsk) ? tsk->pid : 0;
658 entry->flags = 658 entry->flags =
659#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
659 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 660 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
661#else
662 TRACE_FLAG_IRQS_NOSUPPORT |
663#endif
660 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | 664 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
661 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | 665 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
662 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); 666 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
@@ -701,6 +705,7 @@ static void ftrace_trace_stack(struct trace_array *tr,
701 unsigned long flags, 705 unsigned long flags,
702 int skip, int pc) 706 int skip, int pc)
703{ 707{
708#ifdef CONFIG_STACKTRACE
704 struct ring_buffer_event *event; 709 struct ring_buffer_event *event;
705 struct stack_entry *entry; 710 struct stack_entry *entry;
706 struct stack_trace trace; 711 struct stack_trace trace;
@@ -726,6 +731,7 @@ static void ftrace_trace_stack(struct trace_array *tr,
726 731
727 save_stack_trace(&trace); 732 save_stack_trace(&trace);
728 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 733 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
734#endif
729} 735}
730 736
731void __trace_stack(struct trace_array *tr, 737void __trace_stack(struct trace_array *tr,
@@ -1082,17 +1088,20 @@ static void s_stop(struct seq_file *m, void *p)
1082 mutex_unlock(&trace_types_lock); 1088 mutex_unlock(&trace_types_lock);
1083} 1089}
1084 1090
1085#define KRETPROBE_MSG "[unknown/kretprobe'd]"
1086
1087#ifdef CONFIG_KRETPROBES 1091#ifdef CONFIG_KRETPROBES
1088static inline int kretprobed(unsigned long addr) 1092static inline const char *kretprobed(const char *name)
1089{ 1093{
1090 return addr == (unsigned long)kretprobe_trampoline; 1094 static const char tramp_name[] = "kretprobe_trampoline";
1095 int size = sizeof(tramp_name);
1096
1097 if (strncmp(tramp_name, name, size) == 0)
1098 return "[unknown/kretprobe'd]";
1099 return name;
1091} 1100}
1092#else 1101#else
1093static inline int kretprobed(unsigned long addr) 1102static inline const char *kretprobed(const char *name)
1094{ 1103{
1095 return 0; 1104 return name;
1096} 1105}
1097#endif /* CONFIG_KRETPROBES */ 1106#endif /* CONFIG_KRETPROBES */
1098 1107
@@ -1101,10 +1110,13 @@ seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1101{ 1110{
1102#ifdef CONFIG_KALLSYMS 1111#ifdef CONFIG_KALLSYMS
1103 char str[KSYM_SYMBOL_LEN]; 1112 char str[KSYM_SYMBOL_LEN];
1113 const char *name;
1104 1114
1105 kallsyms_lookup(address, NULL, NULL, NULL, str); 1115 kallsyms_lookup(address, NULL, NULL, NULL, str);
1106 1116
1107 return trace_seq_printf(s, fmt, str); 1117 name = kretprobed(str);
1118
1119 return trace_seq_printf(s, fmt, name);
1108#endif 1120#endif
1109 return 1; 1121 return 1;
1110} 1122}
@@ -1115,9 +1127,12 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1115{ 1127{
1116#ifdef CONFIG_KALLSYMS 1128#ifdef CONFIG_KALLSYMS
1117 char str[KSYM_SYMBOL_LEN]; 1129 char str[KSYM_SYMBOL_LEN];
1130 const char *name;
1118 1131
1119 sprint_symbol(str, address); 1132 sprint_symbol(str, address);
1120 return trace_seq_printf(s, fmt, str); 1133 name = kretprobed(str);
1134
1135 return trace_seq_printf(s, fmt, name);
1121#endif 1136#endif
1122 return 1; 1137 return 1;
1123} 1138}
@@ -1244,7 +1259,8 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1244 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid); 1259 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1245 trace_seq_printf(s, "%3d", cpu); 1260 trace_seq_printf(s, "%3d", cpu);
1246 trace_seq_printf(s, "%c%c", 1261 trace_seq_printf(s, "%c%c",
1247 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.', 1262 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
1263 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.',
1248 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); 1264 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1249 1265
1250 hardirq = entry->flags & TRACE_FLAG_HARDIRQ; 1266 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
@@ -1370,10 +1386,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1370 1386
1371 seq_print_ip_sym(s, field->ip, sym_flags); 1387 seq_print_ip_sym(s, field->ip, sym_flags);
1372 trace_seq_puts(s, " ("); 1388 trace_seq_puts(s, " (");
1373 if (kretprobed(field->parent_ip)) 1389 seq_print_ip_sym(s, field->parent_ip, sym_flags);
1374 trace_seq_puts(s, KRETPROBE_MSG);
1375 else
1376 seq_print_ip_sym(s, field->parent_ip, sym_flags);
1377 trace_seq_puts(s, ")\n"); 1390 trace_seq_puts(s, ")\n");
1378 break; 1391 break;
1379 } 1392 }
@@ -1489,12 +1502,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1489 ret = trace_seq_printf(s, " <-"); 1502 ret = trace_seq_printf(s, " <-");
1490 if (!ret) 1503 if (!ret)
1491 return TRACE_TYPE_PARTIAL_LINE; 1504 return TRACE_TYPE_PARTIAL_LINE;
1492 if (kretprobed(field->parent_ip)) 1505 ret = seq_print_ip_sym(s,
1493 ret = trace_seq_puts(s, KRETPROBE_MSG); 1506 field->parent_ip,
1494 else 1507 sym_flags);
1495 ret = seq_print_ip_sym(s,
1496 field->parent_ip,
1497 sym_flags);
1498 if (!ret) 1508 if (!ret)
1499 return TRACE_TYPE_PARTIAL_LINE; 1509 return TRACE_TYPE_PARTIAL_LINE;
1500 } 1510 }
@@ -1745,7 +1755,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1745 return TRACE_TYPE_HANDLED; 1755 return TRACE_TYPE_HANDLED;
1746 1756
1747 SEQ_PUT_FIELD_RET(s, entry->pid); 1757 SEQ_PUT_FIELD_RET(s, entry->pid);
1748 SEQ_PUT_FIELD_RET(s, iter->cpu); 1758 SEQ_PUT_FIELD_RET(s, entry->cpu);
1749 SEQ_PUT_FIELD_RET(s, iter->ts); 1759 SEQ_PUT_FIELD_RET(s, iter->ts);
1750 1760
1751 switch (entry->type) { 1761 switch (entry->type) {
@@ -1926,6 +1936,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1926 ring_buffer_read_finish(iter->buffer_iter[cpu]); 1936 ring_buffer_read_finish(iter->buffer_iter[cpu]);
1927 } 1937 }
1928 mutex_unlock(&trace_types_lock); 1938 mutex_unlock(&trace_types_lock);
1939 kfree(iter);
1929 1940
1930 return ERR_PTR(-ENOMEM); 1941 return ERR_PTR(-ENOMEM);
1931} 1942}
@@ -2666,7 +2677,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2666{ 2677{
2667 unsigned long val; 2678 unsigned long val;
2668 char buf[64]; 2679 char buf[64];
2669 int ret; 2680 int ret, cpu;
2670 struct trace_array *tr = filp->private_data; 2681 struct trace_array *tr = filp->private_data;
2671 2682
2672 if (cnt >= sizeof(buf)) 2683 if (cnt >= sizeof(buf))
@@ -2694,6 +2705,14 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2694 goto out; 2705 goto out;
2695 } 2706 }
2696 2707
2708 /* disable all cpu buffers */
2709 for_each_tracing_cpu(cpu) {
2710 if (global_trace.data[cpu])
2711 atomic_inc(&global_trace.data[cpu]->disabled);
2712 if (max_tr.data[cpu])
2713 atomic_inc(&max_tr.data[cpu]->disabled);
2714 }
2715
2697 if (val != global_trace.entries) { 2716 if (val != global_trace.entries) {
2698 ret = ring_buffer_resize(global_trace.buffer, val); 2717 ret = ring_buffer_resize(global_trace.buffer, val);
2699 if (ret < 0) { 2718 if (ret < 0) {
@@ -2725,6 +2744,13 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2725 if (tracing_disabled) 2744 if (tracing_disabled)
2726 cnt = -ENOMEM; 2745 cnt = -ENOMEM;
2727 out: 2746 out:
2747 for_each_tracing_cpu(cpu) {
2748 if (global_trace.data[cpu])
2749 atomic_dec(&global_trace.data[cpu]->disabled);
2750 if (max_tr.data[cpu])
2751 atomic_dec(&max_tr.data[cpu]->disabled);
2752 }
2753
2728 max_tr.entries = global_trace.entries; 2754 max_tr.entries = global_trace.entries;
2729 mutex_unlock(&trace_types_lock); 2755 mutex_unlock(&trace_types_lock);
2730 2756
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6889ca48f1f1..8465ad052707 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -120,18 +120,20 @@ struct trace_boot {
120/* 120/*
121 * trace_flag_type is an enumeration that holds different 121 * trace_flag_type is an enumeration that holds different
122 * states when a trace occurs. These are: 122 * states when a trace occurs. These are:
123 * IRQS_OFF - interrupts were disabled 123 * IRQS_OFF - interrupts were disabled
124 * NEED_RESCED - reschedule is requested 124 * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
125 * HARDIRQ - inside an interrupt handler 125 * NEED_RESCED - reschedule is requested
126 * SOFTIRQ - inside a softirq handler 126 * HARDIRQ - inside an interrupt handler
127 * CONT - multiple entries hold the trace item 127 * SOFTIRQ - inside a softirq handler
128 * CONT - multiple entries hold the trace item
128 */ 129 */
129enum trace_flag_type { 130enum trace_flag_type {
130 TRACE_FLAG_IRQS_OFF = 0x01, 131 TRACE_FLAG_IRQS_OFF = 0x01,
131 TRACE_FLAG_NEED_RESCHED = 0x02, 132 TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
132 TRACE_FLAG_HARDIRQ = 0x04, 133 TRACE_FLAG_NEED_RESCHED = 0x04,
133 TRACE_FLAG_SOFTIRQ = 0x08, 134 TRACE_FLAG_HARDIRQ = 0x08,
134 TRACE_FLAG_CONT = 0x10, 135 TRACE_FLAG_SOFTIRQ = 0x10,
136 TRACE_FLAG_CONT = 0x20,
135}; 137};
136 138
137#define TRACE_BUF_SIZE 1024 139#define TRACE_BUF_SIZE 1024