aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig35
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/blktrace.c21
-rw-r--r--kernel/trace/ftrace.c179
-rw-r--r--kernel/trace/ring_buffer.c66
-rw-r--r--kernel/trace/ring_buffer_benchmark.c2
-rw-r--r--kernel/trace/trace.c330
-rw-r--r--kernel/trace/trace.h63
-rw-r--r--kernel/trace/trace_entries.h3
-rw-r--r--kernel/trace/trace_events.c138
-rw-r--r--kernel/trace/trace_events_filter.c12
-rw-r--r--kernel/trace/trace_functions.c3
-rw-r--r--kernel/trace/trace_functions_graph.c225
-rw-r--r--kernel/trace/trace_irqsoff.c4
-rw-r--r--kernel/trace/trace_kprobe.c378
-rw-r--r--kernel/trace/trace_mmiotrace.c2
-rw-r--r--kernel/trace/trace_output.c11
-rw-r--r--kernel/trace/trace_sched_wakeup.c4
-rw-r--r--kernel/trace/trace_stack.c13
-rw-r--r--kernel/trace/tracedump.c682
-rw-r--r--kernel/trace/tracelevel.c142
21 files changed, 1797 insertions, 518 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2ad39e556cb..93168c0f991 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -82,7 +82,7 @@ config EVENT_POWER_TRACING_DEPRECATED
82 power:power_frequency 82 power:power_frequency
83 This is for userspace compatibility 83 This is for userspace compatibility
84 and will vanish after 5 kernel iterations, 84 and will vanish after 5 kernel iterations,
85 namely 2.6.41. 85 namely 3.1.
86 86
87config CONTEXT_SWITCH_TRACER 87config CONTEXT_SWITCH_TRACER
88 bool 88 bool
@@ -487,6 +487,39 @@ config RING_BUFFER_BENCHMARK
487 487
488 If unsure, say N. 488 If unsure, say N.
489 489
490config TRACELEVEL
491 bool "Add capability to prioritize traces"
492 depends on EVENT_TRACING
493 help
494 This option allows subsystem programmers to add priorities to trace
495 events by calling to tracelevel_register. Traces of high priority
496 will automatically be enabled on kernel boot, and users can change
497 the the trace level in a kernel parameter.
498
499config TRACEDUMP
500 bool "Dumping functionality for ftrace"
501 depends on FUNCTION_TRACER
502 help
503 This option adds functionality to dump tracing data in several forms
504 Data can be dumped in ascii form or as raw pages from the tracing
505 ring buffers, along with the saved cmdlines. This is specified by
506 the module parameter tracedump_ascii. Data will be compressed
507 using zlib.
508
509config TRACEDUMP_PANIC
510 bool "Tracedump to console on panic"
511 depends on TRACEDUMP
512 help
513 With this option, tracedump will automatically dump to the console
514 on a kernel panic.
515
516config TRACEDUMP_PROCFS
517 bool "Tracedump via proc file"
518 depends on TRACEDUMP
519 help
520 With this option, tracedump can be dumped from user space by reading
521 from /proc/tracedump.
522
490endif # FTRACE 523endif # FTRACE
491 524
492endif # TRACING_SUPPORT 525endif # TRACING_SUPPORT
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 761c510a06c..1360a1a90d5 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -56,5 +56,7 @@ obj-$(CONFIG_TRACEPOINTS) += power-traces.o
56ifeq ($(CONFIG_TRACING),y) 56ifeq ($(CONFIG_TRACING),y)
57obj-$(CONFIG_KGDB_KDB) += trace_kdb.o 57obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
58endif 58endif
59obj-$(CONFIG_TRACELEVEL) += tracelevel.o
60obj-$(CONFIG_TRACEDUMP) += tracedump.o
59 61
60libftrace-y := ftrace.o 62libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 6957aa298df..7c910a5593a 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -206,6 +206,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
206 what |= MASK_TC_BIT(rw, RAHEAD); 206 what |= MASK_TC_BIT(rw, RAHEAD);
207 what |= MASK_TC_BIT(rw, META); 207 what |= MASK_TC_BIT(rw, META);
208 what |= MASK_TC_BIT(rw, DISCARD); 208 what |= MASK_TC_BIT(rw, DISCARD);
209 what |= MASK_TC_BIT(rw, FLUSH);
210 what |= MASK_TC_BIT(rw, FUA);
209 211
210 pid = tsk->pid; 212 pid = tsk->pid;
211 if (act_log_check(bt, what, sector, pid)) 213 if (act_log_check(bt, what, sector, pid))
@@ -1054,6 +1056,9 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
1054 goto out; 1056 goto out;
1055 } 1057 }
1056 1058
1059 if (tc & BLK_TC_FLUSH)
1060 rwbs[i++] = 'F';
1061
1057 if (tc & BLK_TC_DISCARD) 1062 if (tc & BLK_TC_DISCARD)
1058 rwbs[i++] = 'D'; 1063 rwbs[i++] = 'D';
1059 else if (tc & BLK_TC_WRITE) 1064 else if (tc & BLK_TC_WRITE)
@@ -1063,10 +1068,10 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
1063 else 1068 else
1064 rwbs[i++] = 'N'; 1069 rwbs[i++] = 'N';
1065 1070
1071 if (tc & BLK_TC_FUA)
1072 rwbs[i++] = 'F';
1066 if (tc & BLK_TC_AHEAD) 1073 if (tc & BLK_TC_AHEAD)
1067 rwbs[i++] = 'A'; 1074 rwbs[i++] = 'A';
1068 if (tc & BLK_TC_BARRIER)
1069 rwbs[i++] = 'B';
1070 if (tc & BLK_TC_SYNC) 1075 if (tc & BLK_TC_SYNC)
1071 rwbs[i++] = 'S'; 1076 rwbs[i++] = 'S';
1072 if (tc & BLK_TC_META) 1077 if (tc & BLK_TC_META)
@@ -1132,7 +1137,7 @@ typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
1132 1137
1133static int blk_log_action_classic(struct trace_iterator *iter, const char *act) 1138static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
1134{ 1139{
1135 char rwbs[6]; 1140 char rwbs[RWBS_LEN];
1136 unsigned long long ts = iter->ts; 1141 unsigned long long ts = iter->ts;
1137 unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); 1142 unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC);
1138 unsigned secs = (unsigned long)ts; 1143 unsigned secs = (unsigned long)ts;
@@ -1148,7 +1153,7 @@ static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
1148 1153
1149static int blk_log_action(struct trace_iterator *iter, const char *act) 1154static int blk_log_action(struct trace_iterator *iter, const char *act)
1150{ 1155{
1151 char rwbs[6]; 1156 char rwbs[RWBS_LEN];
1152 const struct blk_io_trace *t = te_blk_io_trace(iter->ent); 1157 const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
1153 1158
1154 fill_rwbs(rwbs, t); 1159 fill_rwbs(rwbs, t);
@@ -1561,7 +1566,7 @@ static const struct {
1561} mask_maps[] = { 1566} mask_maps[] = {
1562 { BLK_TC_READ, "read" }, 1567 { BLK_TC_READ, "read" },
1563 { BLK_TC_WRITE, "write" }, 1568 { BLK_TC_WRITE, "write" },
1564 { BLK_TC_BARRIER, "barrier" }, 1569 { BLK_TC_FLUSH, "flush" },
1565 { BLK_TC_SYNC, "sync" }, 1570 { BLK_TC_SYNC, "sync" },
1566 { BLK_TC_QUEUE, "queue" }, 1571 { BLK_TC_QUEUE, "queue" },
1567 { BLK_TC_REQUEUE, "requeue" }, 1572 { BLK_TC_REQUEUE, "requeue" },
@@ -1573,6 +1578,7 @@ static const struct {
1573 { BLK_TC_META, "meta" }, 1578 { BLK_TC_META, "meta" },
1574 { BLK_TC_DISCARD, "discard" }, 1579 { BLK_TC_DISCARD, "discard" },
1575 { BLK_TC_DRV_DATA, "drv_data" }, 1580 { BLK_TC_DRV_DATA, "drv_data" },
1581 { BLK_TC_FUA, "fua" },
1576}; 1582};
1577 1583
1578static int blk_trace_str2mask(const char *str) 1584static int blk_trace_str2mask(const char *str)
@@ -1788,6 +1794,9 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
1788{ 1794{
1789 int i = 0; 1795 int i = 0;
1790 1796
1797 if (rw & REQ_FLUSH)
1798 rwbs[i++] = 'F';
1799
1791 if (rw & WRITE) 1800 if (rw & WRITE)
1792 rwbs[i++] = 'W'; 1801 rwbs[i++] = 'W';
1793 else if (rw & REQ_DISCARD) 1802 else if (rw & REQ_DISCARD)
@@ -1797,6 +1806,8 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
1797 else 1806 else
1798 rwbs[i++] = 'N'; 1807 rwbs[i++] = 'N';
1799 1808
1809 if (rw & REQ_FUA)
1810 rwbs[i++] = 'F';
1800 if (rw & REQ_RAHEAD) 1811 if (rw & REQ_RAHEAD)
1801 rwbs[i++] = 'A'; 1812 rwbs[i++] = 'A';
1802 if (rw & REQ_SYNC) 1813 if (rw & REQ_SYNC)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 908038f5744..798b16cd40f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -32,7 +32,6 @@
32 32
33#include <trace/events/sched.h> 33#include <trace/events/sched.h>
34 34
35#include <asm/ftrace.h>
36#include <asm/setup.h> 35#include <asm/setup.h>
37 36
38#include "trace_output.h" 37#include "trace_output.h"
@@ -82,14 +81,14 @@ static int ftrace_disabled __read_mostly;
82 81
83static DEFINE_MUTEX(ftrace_lock); 82static DEFINE_MUTEX(ftrace_lock);
84 83
85static struct ftrace_ops ftrace_list_end __read_mostly = 84static struct ftrace_ops ftrace_list_end __read_mostly = {
86{
87 .func = ftrace_stub, 85 .func = ftrace_stub,
88}; 86};
89 87
90static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; 88static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
91static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; 89static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
92ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; 90ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
91static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub;
93ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; 92ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
94ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; 93ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
95static struct ftrace_ops global_ops; 94static struct ftrace_ops global_ops;
@@ -148,6 +147,7 @@ void clear_ftrace_function(void)
148{ 147{
149 ftrace_trace_function = ftrace_stub; 148 ftrace_trace_function = ftrace_stub;
150 __ftrace_trace_function = ftrace_stub; 149 __ftrace_trace_function = ftrace_stub;
150 __ftrace_trace_function_delay = ftrace_stub;
151 ftrace_pid_function = ftrace_stub; 151 ftrace_pid_function = ftrace_stub;
152} 152}
153 153
@@ -210,7 +210,12 @@ static void update_ftrace_function(void)
210#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST 210#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
211 ftrace_trace_function = func; 211 ftrace_trace_function = func;
212#else 212#else
213#ifdef CONFIG_DYNAMIC_FTRACE
214 /* do not update till all functions have been modified */
215 __ftrace_trace_function_delay = func;
216#else
213 __ftrace_trace_function = func; 217 __ftrace_trace_function = func;
218#endif
214 ftrace_trace_function = ftrace_test_stop_func; 219 ftrace_trace_function = ftrace_test_stop_func;
215#endif 220#endif
216} 221}
@@ -785,8 +790,7 @@ static void unregister_ftrace_profiler(void)
785 unregister_ftrace_graph(); 790 unregister_ftrace_graph();
786} 791}
787#else 792#else
788static struct ftrace_ops ftrace_profile_ops __read_mostly = 793static struct ftrace_ops ftrace_profile_ops __read_mostly = {
789{
790 .func = function_profile_call, 794 .func = function_profile_call,
791}; 795};
792 796
@@ -806,19 +810,10 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
806 size_t cnt, loff_t *ppos) 810 size_t cnt, loff_t *ppos)
807{ 811{
808 unsigned long val; 812 unsigned long val;
809 char buf[64]; /* big enough to hold a number */
810 int ret; 813 int ret;
811 814
812 if (cnt >= sizeof(buf)) 815 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
813 return -EINVAL; 816 if (ret)
814
815 if (copy_from_user(&buf, ubuf, cnt))
816 return -EFAULT;
817
818 buf[cnt] = 0;
819
820 ret = strict_strtoul(buf, 10, &val);
821 if (ret < 0)
822 return ret; 817 return ret;
823 818
824 val = !!val; 819 val = !!val;
@@ -952,7 +947,7 @@ struct ftrace_func_probe {
952}; 947};
953 948
954enum { 949enum {
955 FTRACE_ENABLE_CALLS = (1 << 0), 950 FTRACE_UPDATE_CALLS = (1 << 0),
956 FTRACE_DISABLE_CALLS = (1 << 1), 951 FTRACE_DISABLE_CALLS = (1 << 1),
957 FTRACE_UPDATE_TRACE_FUNC = (1 << 2), 952 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
958 FTRACE_START_FUNC_RET = (1 << 3), 953 FTRACE_START_FUNC_RET = (1 << 3),
@@ -1182,8 +1177,14 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
1182 return NULL; 1177 return NULL;
1183} 1178}
1184 1179
1180static void
1181ftrace_hash_rec_disable(struct ftrace_ops *ops, int filter_hash);
1182static void
1183ftrace_hash_rec_enable(struct ftrace_ops *ops, int filter_hash);
1184
1185static int 1185static int
1186ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src) 1186ftrace_hash_move(struct ftrace_ops *ops, int enable,
1187 struct ftrace_hash **dst, struct ftrace_hash *src)
1187{ 1188{
1188 struct ftrace_func_entry *entry; 1189 struct ftrace_func_entry *entry;
1189 struct hlist_node *tp, *tn; 1190 struct hlist_node *tp, *tn;
@@ -1193,9 +1194,16 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
1193 unsigned long key; 1194 unsigned long key;
1194 int size = src->count; 1195 int size = src->count;
1195 int bits = 0; 1196 int bits = 0;
1197 int ret;
1196 int i; 1198 int i;
1197 1199
1198 /* 1200 /*
1201 * Remove the current set, update the hash and add
1202 * them back.
1203 */
1204 ftrace_hash_rec_disable(ops, enable);
1205
1206 /*
1199 * If the new source is empty, just free dst and assign it 1207 * If the new source is empty, just free dst and assign it
1200 * the empty_hash. 1208 * the empty_hash.
1201 */ 1209 */
@@ -1215,9 +1223,10 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
1215 if (bits > FTRACE_HASH_MAX_BITS) 1223 if (bits > FTRACE_HASH_MAX_BITS)
1216 bits = FTRACE_HASH_MAX_BITS; 1224 bits = FTRACE_HASH_MAX_BITS;
1217 1225
1226 ret = -ENOMEM;
1218 new_hash = alloc_ftrace_hash(bits); 1227 new_hash = alloc_ftrace_hash(bits);
1219 if (!new_hash) 1228 if (!new_hash)
1220 return -ENOMEM; 1229 goto out;
1221 1230
1222 size = 1 << src->size_bits; 1231 size = 1 << src->size_bits;
1223 for (i = 0; i < size; i++) { 1232 for (i = 0; i < size; i++) {
@@ -1236,7 +1245,16 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
1236 rcu_assign_pointer(*dst, new_hash); 1245 rcu_assign_pointer(*dst, new_hash);
1237 free_ftrace_hash_rcu(old_hash); 1246 free_ftrace_hash_rcu(old_hash);
1238 1247
1239 return 0; 1248 ret = 0;
1249 out:
1250 /*
1251 * Enable regardless of ret:
1252 * On success, we enable the new hash.
1253 * On failure, we re-enable the original hash.
1254 */
1255 ftrace_hash_rec_enable(ops, enable);
1256
1257 return ret;
1240} 1258}
1241 1259
1242/* 1260/*
@@ -1498,7 +1516,7 @@ int ftrace_text_reserved(void *start, void *end)
1498 1516
1499 1517
1500static int 1518static int
1501__ftrace_replace_code(struct dyn_ftrace *rec, int enable) 1519__ftrace_replace_code(struct dyn_ftrace *rec, int update)
1502{ 1520{
1503 unsigned long ftrace_addr; 1521 unsigned long ftrace_addr;
1504 unsigned long flag = 0UL; 1522 unsigned long flag = 0UL;
@@ -1506,17 +1524,17 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1506 ftrace_addr = (unsigned long)FTRACE_ADDR; 1524 ftrace_addr = (unsigned long)FTRACE_ADDR;
1507 1525
1508 /* 1526 /*
1509 * If we are enabling tracing: 1527 * If we are updating calls:
1510 * 1528 *
1511 * If the record has a ref count, then we need to enable it 1529 * If the record has a ref count, then we need to enable it
1512 * because someone is using it. 1530 * because someone is using it.
1513 * 1531 *
1514 * Otherwise we make sure its disabled. 1532 * Otherwise we make sure its disabled.
1515 * 1533 *
1516 * If we are disabling tracing, then disable all records that 1534 * If we are disabling calls, then disable all records that
1517 * are enabled. 1535 * are enabled.
1518 */ 1536 */
1519 if (enable && (rec->flags & ~FTRACE_FL_MASK)) 1537 if (update && (rec->flags & ~FTRACE_FL_MASK))
1520 flag = FTRACE_FL_ENABLED; 1538 flag = FTRACE_FL_ENABLED;
1521 1539
1522 /* If the state of this record hasn't changed, then do nothing */ 1540 /* If the state of this record hasn't changed, then do nothing */
@@ -1532,7 +1550,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1532 return ftrace_make_nop(NULL, rec, ftrace_addr); 1550 return ftrace_make_nop(NULL, rec, ftrace_addr);
1533} 1551}
1534 1552
1535static void ftrace_replace_code(int enable) 1553static void ftrace_replace_code(int update)
1536{ 1554{
1537 struct dyn_ftrace *rec; 1555 struct dyn_ftrace *rec;
1538 struct ftrace_page *pg; 1556 struct ftrace_page *pg;
@@ -1546,7 +1564,7 @@ static void ftrace_replace_code(int enable)
1546 if (rec->flags & FTRACE_FL_FREE) 1564 if (rec->flags & FTRACE_FL_FREE)
1547 continue; 1565 continue;
1548 1566
1549 failed = __ftrace_replace_code(rec, enable); 1567 failed = __ftrace_replace_code(rec, update);
1550 if (failed) { 1568 if (failed) {
1551 ftrace_bug(failed, rec->ip); 1569 ftrace_bug(failed, rec->ip);
1552 /* Stop processing */ 1570 /* Stop processing */
@@ -1596,7 +1614,13 @@ static int __ftrace_modify_code(void *data)
1596{ 1614{
1597 int *command = data; 1615 int *command = data;
1598 1616
1599 if (*command & FTRACE_ENABLE_CALLS) 1617 /*
1618 * Do not call function tracer while we update the code.
1619 * We are in stop machine, no worrying about races.
1620 */
1621 function_trace_stop++;
1622
1623 if (*command & FTRACE_UPDATE_CALLS)
1600 ftrace_replace_code(1); 1624 ftrace_replace_code(1);
1601 else if (*command & FTRACE_DISABLE_CALLS) 1625 else if (*command & FTRACE_DISABLE_CALLS)
1602 ftrace_replace_code(0); 1626 ftrace_replace_code(0);
@@ -1609,6 +1633,18 @@ static int __ftrace_modify_code(void *data)
1609 else if (*command & FTRACE_STOP_FUNC_RET) 1633 else if (*command & FTRACE_STOP_FUNC_RET)
1610 ftrace_disable_ftrace_graph_caller(); 1634 ftrace_disable_ftrace_graph_caller();
1611 1635
1636#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
1637 /*
1638 * For archs that call ftrace_test_stop_func(), we must
1639 * wait till after we update all the function callers
1640 * before we update the callback. This keeps different
1641 * ops that record different functions from corrupting
1642 * each other.
1643 */
1644 __ftrace_trace_function = __ftrace_trace_function_delay;
1645#endif
1646 function_trace_stop--;
1647
1612 return 0; 1648 return 0;
1613} 1649}
1614 1650
@@ -1652,7 +1688,7 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
1652 return -ENODEV; 1688 return -ENODEV;
1653 1689
1654 ftrace_start_up++; 1690 ftrace_start_up++;
1655 command |= FTRACE_ENABLE_CALLS; 1691 command |= FTRACE_UPDATE_CALLS;
1656 1692
1657 /* ops marked global share the filter hashes */ 1693 /* ops marked global share the filter hashes */
1658 if (ops->flags & FTRACE_OPS_FL_GLOBAL) { 1694 if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
@@ -1704,8 +1740,7 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command)
1704 if (ops != &global_ops || !global_start_up) 1740 if (ops != &global_ops || !global_start_up)
1705 ops->flags &= ~FTRACE_OPS_FL_ENABLED; 1741 ops->flags &= ~FTRACE_OPS_FL_ENABLED;
1706 1742
1707 if (!ftrace_start_up) 1743 command |= FTRACE_UPDATE_CALLS;
1708 command |= FTRACE_DISABLE_CALLS;
1709 1744
1710 if (saved_ftrace_func != ftrace_trace_function) { 1745 if (saved_ftrace_func != ftrace_trace_function) {
1711 saved_ftrace_func = ftrace_trace_function; 1746 saved_ftrace_func = ftrace_trace_function;
@@ -1727,7 +1762,7 @@ static void ftrace_startup_sysctl(void)
1727 saved_ftrace_func = NULL; 1762 saved_ftrace_func = NULL;
1728 /* ftrace_start_up is true if we want ftrace running */ 1763 /* ftrace_start_up is true if we want ftrace running */
1729 if (ftrace_start_up) 1764 if (ftrace_start_up)
1730 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1765 ftrace_run_update_code(FTRACE_UPDATE_CALLS);
1731} 1766}
1732 1767
1733static void ftrace_shutdown_sysctl(void) 1768static void ftrace_shutdown_sysctl(void)
@@ -1744,10 +1779,36 @@ static cycle_t ftrace_update_time;
1744static unsigned long ftrace_update_cnt; 1779static unsigned long ftrace_update_cnt;
1745unsigned long ftrace_update_tot_cnt; 1780unsigned long ftrace_update_tot_cnt;
1746 1781
1782static int ops_traces_mod(struct ftrace_ops *ops)
1783{
1784 struct ftrace_hash *hash;
1785
1786 hash = ops->filter_hash;
1787 return !!(!hash || !hash->count);
1788}
1789
1747static int ftrace_update_code(struct module *mod) 1790static int ftrace_update_code(struct module *mod)
1748{ 1791{
1749 struct dyn_ftrace *p; 1792 struct dyn_ftrace *p;
1750 cycle_t start, stop; 1793 cycle_t start, stop;
1794 unsigned long ref = 0;
1795
1796 /*
1797 * When adding a module, we need to check if tracers are
1798 * currently enabled and if they are set to trace all functions.
1799 * If they are, we need to enable the module functions as well
1800 * as update the reference counts for those function records.
1801 */
1802 if (mod) {
1803 struct ftrace_ops *ops;
1804
1805 for (ops = ftrace_ops_list;
1806 ops != &ftrace_list_end; ops = ops->next) {
1807 if (ops->flags & FTRACE_OPS_FL_ENABLED &&
1808 ops_traces_mod(ops))
1809 ref++;
1810 }
1811 }
1751 1812
1752 start = ftrace_now(raw_smp_processor_id()); 1813 start = ftrace_now(raw_smp_processor_id());
1753 ftrace_update_cnt = 0; 1814 ftrace_update_cnt = 0;
@@ -1760,7 +1821,7 @@ static int ftrace_update_code(struct module *mod)
1760 1821
1761 p = ftrace_new_addrs; 1822 p = ftrace_new_addrs;
1762 ftrace_new_addrs = p->newlist; 1823 ftrace_new_addrs = p->newlist;
1763 p->flags = 0L; 1824 p->flags = ref;
1764 1825
1765 /* 1826 /*
1766 * Do the initial record conversion from mcount jump 1827 * Do the initial record conversion from mcount jump
@@ -1783,7 +1844,7 @@ static int ftrace_update_code(struct module *mod)
1783 * conversion puts the module to the correct state, thus 1844 * conversion puts the module to the correct state, thus
1784 * passing the ftrace_make_call check. 1845 * passing the ftrace_make_call check.
1785 */ 1846 */
1786 if (ftrace_start_up) { 1847 if (ftrace_start_up && ref) {
1787 int failed = __ftrace_replace_code(p, 1); 1848 int failed = __ftrace_replace_code(p, 1);
1788 if (failed) { 1849 if (failed) {
1789 ftrace_bug(failed, p->ip); 1850 ftrace_bug(failed, p->ip);
@@ -2407,10 +2468,9 @@ ftrace_match_module_records(struct ftrace_hash *hash, char *buff, char *mod)
2407 */ 2468 */
2408 2469
2409static int 2470static int
2410ftrace_mod_callback(char *func, char *cmd, char *param, int enable) 2471ftrace_mod_callback(struct ftrace_hash *hash,
2472 char *func, char *cmd, char *param, int enable)
2411{ 2473{
2412 struct ftrace_ops *ops = &global_ops;
2413 struct ftrace_hash *hash;
2414 char *mod; 2474 char *mod;
2415 int ret = -EINVAL; 2475 int ret = -EINVAL;
2416 2476
@@ -2430,11 +2490,6 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
2430 if (!strlen(mod)) 2490 if (!strlen(mod))
2431 return ret; 2491 return ret;
2432 2492
2433 if (enable)
2434 hash = ops->filter_hash;
2435 else
2436 hash = ops->notrace_hash;
2437
2438 ret = ftrace_match_module_records(hash, func, mod); 2493 ret = ftrace_match_module_records(hash, func, mod);
2439 if (!ret) 2494 if (!ret)
2440 ret = -EINVAL; 2495 ret = -EINVAL;
@@ -2760,7 +2815,7 @@ static int ftrace_process_regex(struct ftrace_hash *hash,
2760 mutex_lock(&ftrace_cmd_mutex); 2815 mutex_lock(&ftrace_cmd_mutex);
2761 list_for_each_entry(p, &ftrace_commands, list) { 2816 list_for_each_entry(p, &ftrace_commands, list) {
2762 if (strcmp(p->name, command) == 0) { 2817 if (strcmp(p->name, command) == 0) {
2763 ret = p->func(func, command, next, enable); 2818 ret = p->func(hash, func, command, next, enable);
2764 goto out_unlock; 2819 goto out_unlock;
2765 } 2820 }
2766 } 2821 }
@@ -2857,7 +2912,11 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
2857 ftrace_match_records(hash, buf, len); 2912 ftrace_match_records(hash, buf, len);
2858 2913
2859 mutex_lock(&ftrace_lock); 2914 mutex_lock(&ftrace_lock);
2860 ret = ftrace_hash_move(orig_hash, hash); 2915 ret = ftrace_hash_move(ops, enable, orig_hash, hash);
2916 if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED
2917 && ftrace_enabled)
2918 ftrace_run_update_code(FTRACE_UPDATE_CALLS);
2919
2861 mutex_unlock(&ftrace_lock); 2920 mutex_unlock(&ftrace_lock);
2862 2921
2863 mutex_unlock(&ftrace_regex_lock); 2922 mutex_unlock(&ftrace_regex_lock);
@@ -3040,18 +3099,12 @@ ftrace_regex_release(struct inode *inode, struct file *file)
3040 orig_hash = &iter->ops->notrace_hash; 3099 orig_hash = &iter->ops->notrace_hash;
3041 3100
3042 mutex_lock(&ftrace_lock); 3101 mutex_lock(&ftrace_lock);
3043 /* 3102 ret = ftrace_hash_move(iter->ops, filter_hash,
3044 * Remove the current set, update the hash and add 3103 orig_hash, iter->hash);
3045 * them back. 3104 if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED)
3046 */ 3105 && ftrace_enabled)
3047 ftrace_hash_rec_disable(iter->ops, filter_hash); 3106 ftrace_run_update_code(FTRACE_UPDATE_CALLS);
3048 ret = ftrace_hash_move(orig_hash, iter->hash); 3107
3049 if (!ret) {
3050 ftrace_hash_rec_enable(iter->ops, filter_hash);
3051 if (iter->ops->flags & FTRACE_OPS_FL_ENABLED
3052 && ftrace_enabled)
3053 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
3054 }
3055 mutex_unlock(&ftrace_lock); 3108 mutex_unlock(&ftrace_lock);
3056 } 3109 }
3057 free_ftrace_hash(iter->hash); 3110 free_ftrace_hash(iter->hash);
@@ -3330,7 +3383,7 @@ static int ftrace_process_locs(struct module *mod,
3330{ 3383{
3331 unsigned long *p; 3384 unsigned long *p;
3332 unsigned long addr; 3385 unsigned long addr;
3333 unsigned long flags; 3386 unsigned long flags = 0; /* Shut up gcc */
3334 3387
3335 mutex_lock(&ftrace_lock); 3388 mutex_lock(&ftrace_lock);
3336 p = start; 3389 p = start;
@@ -3348,12 +3401,18 @@ static int ftrace_process_locs(struct module *mod,
3348 } 3401 }
3349 3402
3350 /* 3403 /*
3351 * Disable interrupts to prevent interrupts from executing 3404 * We only need to disable interrupts on start up
3352 * code that is being modified. 3405 * because we are modifying code that an interrupt
3406 * may execute, and the modification is not atomic.
3407 * But for modules, nothing runs the code we modify
3408 * until we are finished with it, and there's no
3409 * reason to cause large interrupt latencies while we do it.
3353 */ 3410 */
3354 local_irq_save(flags); 3411 if (!mod)
3412 local_irq_save(flags);
3355 ftrace_update_code(mod); 3413 ftrace_update_code(mod);
3356 local_irq_restore(flags); 3414 if (!mod)
3415 local_irq_restore(flags);
3357 mutex_unlock(&ftrace_lock); 3416 mutex_unlock(&ftrace_lock);
3358 3417
3359 return 0; 3418 return 0;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b0c7aa40794..731201bf4ac 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -997,15 +997,21 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
997 unsigned nr_pages) 997 unsigned nr_pages)
998{ 998{
999 struct buffer_page *bpage, *tmp; 999 struct buffer_page *bpage, *tmp;
1000 unsigned long addr;
1001 LIST_HEAD(pages); 1000 LIST_HEAD(pages);
1002 unsigned i; 1001 unsigned i;
1003 1002
1004 WARN_ON(!nr_pages); 1003 WARN_ON(!nr_pages);
1005 1004
1006 for (i = 0; i < nr_pages; i++) { 1005 for (i = 0; i < nr_pages; i++) {
1006 struct page *page;
1007 /*
1008 * __GFP_NORETRY flag makes sure that the allocation fails
1009 * gracefully without invoking oom-killer and the system is
1010 * not destabilized.
1011 */
1007 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1012 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1008 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); 1013 GFP_KERNEL | __GFP_NORETRY,
1014 cpu_to_node(cpu_buffer->cpu));
1009 if (!bpage) 1015 if (!bpage)
1010 goto free_pages; 1016 goto free_pages;
1011 1017
@@ -1013,10 +1019,11 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1013 1019
1014 list_add(&bpage->list, &pages); 1020 list_add(&bpage->list, &pages);
1015 1021
1016 addr = __get_free_page(GFP_KERNEL); 1022 page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
1017 if (!addr) 1023 GFP_KERNEL | __GFP_NORETRY, 0);
1024 if (!page)
1018 goto free_pages; 1025 goto free_pages;
1019 bpage->page = (void *)addr; 1026 bpage->page = page_address(page);
1020 rb_init_page(bpage->page); 1027 rb_init_page(bpage->page);
1021 } 1028 }
1022 1029
@@ -1045,7 +1052,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
1045{ 1052{
1046 struct ring_buffer_per_cpu *cpu_buffer; 1053 struct ring_buffer_per_cpu *cpu_buffer;
1047 struct buffer_page *bpage; 1054 struct buffer_page *bpage;
1048 unsigned long addr; 1055 struct page *page;
1049 int ret; 1056 int ret;
1050 1057
1051 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), 1058 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
@@ -1067,10 +1074,10 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
1067 rb_check_bpage(cpu_buffer, bpage); 1074 rb_check_bpage(cpu_buffer, bpage);
1068 1075
1069 cpu_buffer->reader_page = bpage; 1076 cpu_buffer->reader_page = bpage;
1070 addr = __get_free_page(GFP_KERNEL); 1077 page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0);
1071 if (!addr) 1078 if (!page)
1072 goto fail_free_reader; 1079 goto fail_free_reader;
1073 bpage->page = (void *)addr; 1080 bpage->page = page_address(page);
1074 rb_init_page(bpage->page); 1081 rb_init_page(bpage->page);
1075 1082
1076 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 1083 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
@@ -1314,7 +1321,6 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1314 unsigned nr_pages, rm_pages, new_pages; 1321 unsigned nr_pages, rm_pages, new_pages;
1315 struct buffer_page *bpage, *tmp; 1322 struct buffer_page *bpage, *tmp;
1316 unsigned long buffer_size; 1323 unsigned long buffer_size;
1317 unsigned long addr;
1318 LIST_HEAD(pages); 1324 LIST_HEAD(pages);
1319 int i, cpu; 1325 int i, cpu;
1320 1326
@@ -1375,16 +1381,24 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1375 1381
1376 for_each_buffer_cpu(buffer, cpu) { 1382 for_each_buffer_cpu(buffer, cpu) {
1377 for (i = 0; i < new_pages; i++) { 1383 for (i = 0; i < new_pages; i++) {
1384 struct page *page;
1385 /*
1386 * __GFP_NORETRY flag makes sure that the allocation
1387 * fails gracefully without invoking oom-killer and
1388 * the system is not destabilized.
1389 */
1378 bpage = kzalloc_node(ALIGN(sizeof(*bpage), 1390 bpage = kzalloc_node(ALIGN(sizeof(*bpage),
1379 cache_line_size()), 1391 cache_line_size()),
1380 GFP_KERNEL, cpu_to_node(cpu)); 1392 GFP_KERNEL | __GFP_NORETRY,
1393 cpu_to_node(cpu));
1381 if (!bpage) 1394 if (!bpage)
1382 goto free_pages; 1395 goto free_pages;
1383 list_add(&bpage->list, &pages); 1396 list_add(&bpage->list, &pages);
1384 addr = __get_free_page(GFP_KERNEL); 1397 page = alloc_pages_node(cpu_to_node(cpu),
1385 if (!addr) 1398 GFP_KERNEL | __GFP_NORETRY, 0);
1399 if (!page)
1386 goto free_pages; 1400 goto free_pages;
1387 bpage->page = (void *)addr; 1401 bpage->page = page_address(page);
1388 rb_init_page(bpage->page); 1402 rb_init_page(bpage->page);
1389 } 1403 }
1390 } 1404 }
@@ -3730,16 +3744,17 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
3730 * Returns: 3744 * Returns:
3731 * The page allocated, or NULL on error. 3745 * The page allocated, or NULL on error.
3732 */ 3746 */
3733void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) 3747void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
3734{ 3748{
3735 struct buffer_data_page *bpage; 3749 struct buffer_data_page *bpage;
3736 unsigned long addr; 3750 struct page *page;
3737 3751
3738 addr = __get_free_page(GFP_KERNEL); 3752 page = alloc_pages_node(cpu_to_node(cpu),
3739 if (!addr) 3753 GFP_KERNEL | __GFP_NORETRY, 0);
3754 if (!page)
3740 return NULL; 3755 return NULL;
3741 3756
3742 bpage = (void *)addr; 3757 bpage = page_address(page);
3743 3758
3744 rb_init_page(bpage); 3759 rb_init_page(bpage);
3745 3760
@@ -3978,20 +3993,11 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
3978 size_t cnt, loff_t *ppos) 3993 size_t cnt, loff_t *ppos)
3979{ 3994{
3980 unsigned long *p = filp->private_data; 3995 unsigned long *p = filp->private_data;
3981 char buf[64];
3982 unsigned long val; 3996 unsigned long val;
3983 int ret; 3997 int ret;
3984 3998
3985 if (cnt >= sizeof(buf)) 3999 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3986 return -EINVAL; 4000 if (ret)
3987
3988 if (copy_from_user(&buf, ubuf, cnt))
3989 return -EFAULT;
3990
3991 buf[cnt] = 0;
3992
3993 ret = strict_strtoul(buf, 10, &val);
3994 if (ret < 0)
3995 return ret; 4001 return ret;
3996 4002
3997 if (val) 4003 if (val)
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 302f8a61463..a5457d577b9 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -106,7 +106,7 @@ static enum event_status read_page(int cpu)
106 int inc; 106 int inc;
107 int i; 107 int i;
108 108
109 bpage = ring_buffer_alloc_read_page(buffer); 109 bpage = ring_buffer_alloc_read_page(buffer, cpu);
110 if (!bpage) 110 if (!bpage)
111 return EVENT_DROPPED; 111 return EVENT_DROPPED;
112 112
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ee9c921d7f2..17a2d44e1af 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -343,26 +343,27 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
343static int trace_stop_count; 343static int trace_stop_count;
344static DEFINE_SPINLOCK(tracing_start_lock); 344static DEFINE_SPINLOCK(tracing_start_lock);
345 345
346static void wakeup_work_handler(struct work_struct *work)
347{
348 wake_up(&trace_wait);
349}
350
351static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler);
352
346/** 353/**
347 * trace_wake_up - wake up tasks waiting for trace input 354 * trace_wake_up - wake up tasks waiting for trace input
348 * 355 *
349 * Simply wakes up any task that is blocked on the trace_wait 356 * Schedules a delayed work to wake up any task that is blocked on the
350 * queue. These is used with trace_poll for tasks polling the trace. 357 * trace_wait queue. These is used with trace_poll for tasks polling the
358 * trace.
351 */ 359 */
352void trace_wake_up(void) 360void trace_wake_up(void)
353{ 361{
354 int cpu; 362 const unsigned long delay = msecs_to_jiffies(2);
355 363
356 if (trace_flags & TRACE_ITER_BLOCK) 364 if (trace_flags & TRACE_ITER_BLOCK)
357 return; 365 return;
358 /* 366 schedule_delayed_work(&wakeup_work, delay);
359 * The runqueue_is_locked() can fail, but this is the best we
360 * have for now:
361 */
362 cpu = get_cpu();
363 if (!runqueue_is_locked(cpu))
364 wake_up(&trace_wait);
365 put_cpu();
366} 367}
367 368
368static int __init set_buf_size(char *str) 369static int __init set_buf_size(char *str)
@@ -424,6 +425,7 @@ static const char *trace_options[] = {
424 "graph-time", 425 "graph-time",
425 "record-cmd", 426 "record-cmd",
426 "overwrite", 427 "overwrite",
428 "disable_on_free",
427 NULL 429 NULL
428}; 430};
429 431
@@ -1191,6 +1193,18 @@ void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
1191} 1193}
1192EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); 1194EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
1193 1195
1196void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1197 struct ring_buffer_event *event,
1198 unsigned long flags, int pc,
1199 struct pt_regs *regs)
1200{
1201 ring_buffer_unlock_commit(buffer, event);
1202
1203 ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1204 ftrace_trace_userstack(buffer, flags, pc);
1205}
1206EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs);
1207
1194void trace_current_buffer_discard_commit(struct ring_buffer *buffer, 1208void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1195 struct ring_buffer_event *event) 1209 struct ring_buffer_event *event)
1196{ 1210{
@@ -1234,30 +1248,103 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
1234} 1248}
1235 1249
1236#ifdef CONFIG_STACKTRACE 1250#ifdef CONFIG_STACKTRACE
1251
1252#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1253struct ftrace_stack {
1254 unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
1255};
1256
1257static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1258static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1259
1237static void __ftrace_trace_stack(struct ring_buffer *buffer, 1260static void __ftrace_trace_stack(struct ring_buffer *buffer,
1238 unsigned long flags, 1261 unsigned long flags,
1239 int skip, int pc) 1262 int skip, int pc, struct pt_regs *regs)
1240{ 1263{
1241 struct ftrace_event_call *call = &event_kernel_stack; 1264 struct ftrace_event_call *call = &event_kernel_stack;
1242 struct ring_buffer_event *event; 1265 struct ring_buffer_event *event;
1243 struct stack_entry *entry; 1266 struct stack_entry *entry;
1244 struct stack_trace trace; 1267 struct stack_trace trace;
1268 int use_stack;
1269 int size = FTRACE_STACK_ENTRIES;
1270
1271 trace.nr_entries = 0;
1272 trace.skip = skip;
1273
1274 /*
1275 * Since events can happen in NMIs there's no safe way to
1276 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1277 * or NMI comes in, it will just have to use the default
1278 * FTRACE_STACK_SIZE.
1279 */
1280 preempt_disable_notrace();
1281
1282 use_stack = ++__get_cpu_var(ftrace_stack_reserve);
1283 /*
1284 * We don't need any atomic variables, just a barrier.
1285 * If an interrupt comes in, we don't care, because it would
1286 * have exited and put the counter back to what we want.
1287 * We just need a barrier to keep gcc from moving things
1288 * around.
1289 */
1290 barrier();
1291 if (use_stack == 1) {
1292 trace.entries = &__get_cpu_var(ftrace_stack).calls[0];
1293 trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
1294
1295 if (regs)
1296 save_stack_trace_regs(regs, &trace);
1297 else
1298 save_stack_trace(&trace);
1299
1300 if (trace.nr_entries > size)
1301 size = trace.nr_entries;
1302 } else
1303 /* From now on, use_stack is a boolean */
1304 use_stack = 0;
1305
1306 size *= sizeof(unsigned long);
1245 1307
1246 event = trace_buffer_lock_reserve(buffer, TRACE_STACK, 1308 event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1247 sizeof(*entry), flags, pc); 1309 sizeof(*entry) + size, flags, pc);
1248 if (!event) 1310 if (!event)
1249 return; 1311 goto out;
1250 entry = ring_buffer_event_data(event); 1312 entry = ring_buffer_event_data(event);
1251 memset(&entry->caller, 0, sizeof(entry->caller));
1252 1313
1253 trace.nr_entries = 0; 1314 memset(&entry->caller, 0, size);
1254 trace.max_entries = FTRACE_STACK_ENTRIES; 1315
1255 trace.skip = skip; 1316 if (use_stack)
1256 trace.entries = entry->caller; 1317 memcpy(&entry->caller, trace.entries,
1318 trace.nr_entries * sizeof(unsigned long));
1319 else {
1320 trace.max_entries = FTRACE_STACK_ENTRIES;
1321 trace.entries = entry->caller;
1322 if (regs)
1323 save_stack_trace_regs(regs, &trace);
1324 else
1325 save_stack_trace(&trace);
1326 }
1327
1328 entry->size = trace.nr_entries;
1257 1329
1258 save_stack_trace(&trace);
1259 if (!filter_check_discard(call, entry, buffer, event)) 1330 if (!filter_check_discard(call, entry, buffer, event))
1260 ring_buffer_unlock_commit(buffer, event); 1331 ring_buffer_unlock_commit(buffer, event);
1332
1333 out:
1334 /* Again, don't let gcc optimize things here */
1335 barrier();
1336 __get_cpu_var(ftrace_stack_reserve)--;
1337 preempt_enable_notrace();
1338
1339}
1340
1341void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1342 int skip, int pc, struct pt_regs *regs)
1343{
1344 if (!(trace_flags & TRACE_ITER_STACKTRACE))
1345 return;
1346
1347 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1261} 1348}
1262 1349
1263void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, 1350void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
@@ -1266,13 +1353,13 @@ void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1266 if (!(trace_flags & TRACE_ITER_STACKTRACE)) 1353 if (!(trace_flags & TRACE_ITER_STACKTRACE))
1267 return; 1354 return;
1268 1355
1269 __ftrace_trace_stack(buffer, flags, skip, pc); 1356 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1270} 1357}
1271 1358
1272void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, 1359void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1273 int pc) 1360 int pc)
1274{ 1361{
1275 __ftrace_trace_stack(tr->buffer, flags, skip, pc); 1362 __ftrace_trace_stack(tr->buffer, flags, skip, pc, NULL);
1276} 1363}
1277 1364
1278/** 1365/**
@@ -1288,7 +1375,7 @@ void trace_dump_stack(void)
1288 local_save_flags(flags); 1375 local_save_flags(flags);
1289 1376
1290 /* skipping 3 traces, seems to get us at the caller of this function */ 1377 /* skipping 3 traces, seems to get us at the caller of this function */
1291 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); 1378 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count(), NULL);
1292} 1379}
1293 1380
1294static DEFINE_PER_CPU(int, user_stack_count); 1381static DEFINE_PER_CPU(int, user_stack_count);
@@ -1536,7 +1623,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
1536 1623
1537 ftrace_enable_cpu(); 1624 ftrace_enable_cpu();
1538 1625
1539 return event ? ring_buffer_event_data(event) : NULL; 1626 if (event) {
1627 iter->ent_size = ring_buffer_event_length(event);
1628 return ring_buffer_event_data(event);
1629 }
1630 iter->ent_size = 0;
1631 return NULL;
1540} 1632}
1541 1633
1542static struct trace_entry * 1634static struct trace_entry *
@@ -2051,6 +2143,9 @@ void trace_default_header(struct seq_file *m)
2051{ 2143{
2052 struct trace_iterator *iter = m->private; 2144 struct trace_iterator *iter = m->private;
2053 2145
2146 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2147 return;
2148
2054 if (iter->iter_flags & TRACE_FILE_LAT_FMT) { 2149 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2055 /* print nothing if the buffers are empty */ 2150 /* print nothing if the buffers are empty */
2056 if (trace_empty(iter)) 2151 if (trace_empty(iter))
@@ -2701,20 +2796,11 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2701 size_t cnt, loff_t *ppos) 2796 size_t cnt, loff_t *ppos)
2702{ 2797{
2703 struct trace_array *tr = filp->private_data; 2798 struct trace_array *tr = filp->private_data;
2704 char buf[64];
2705 unsigned long val; 2799 unsigned long val;
2706 int ret; 2800 int ret;
2707 2801
2708 if (cnt >= sizeof(buf)) 2802 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
2709 return -EINVAL; 2803 if (ret)
2710
2711 if (copy_from_user(&buf, ubuf, cnt))
2712 return -EFAULT;
2713
2714 buf[cnt] = 0;
2715
2716 ret = strict_strtoul(buf, 10, &val);
2717 if (ret < 0)
2718 return ret; 2804 return ret;
2719 2805
2720 val = !!val; 2806 val = !!val;
@@ -2767,7 +2853,7 @@ int tracer_init(struct tracer *t, struct trace_array *tr)
2767 return t->init(tr); 2853 return t->init(tr);
2768} 2854}
2769 2855
2770static int tracing_resize_ring_buffer(unsigned long size) 2856static int __tracing_resize_ring_buffer(unsigned long size)
2771{ 2857{
2772 int ret; 2858 int ret;
2773 2859
@@ -2819,6 +2905,41 @@ static int tracing_resize_ring_buffer(unsigned long size)
2819 return ret; 2905 return ret;
2820} 2906}
2821 2907
2908static ssize_t tracing_resize_ring_buffer(unsigned long size)
2909{
2910 int cpu, ret = size;
2911
2912 mutex_lock(&trace_types_lock);
2913
2914 tracing_stop();
2915
2916 /* disable all cpu buffers */
2917 for_each_tracing_cpu(cpu) {
2918 if (global_trace.data[cpu])
2919 atomic_inc(&global_trace.data[cpu]->disabled);
2920 if (max_tr.data[cpu])
2921 atomic_inc(&max_tr.data[cpu]->disabled);
2922 }
2923
2924 if (size != global_trace.entries)
2925 ret = __tracing_resize_ring_buffer(size);
2926
2927 if (ret < 0)
2928 ret = -ENOMEM;
2929
2930 for_each_tracing_cpu(cpu) {
2931 if (global_trace.data[cpu])
2932 atomic_dec(&global_trace.data[cpu]->disabled);
2933 if (max_tr.data[cpu])
2934 atomic_dec(&max_tr.data[cpu]->disabled);
2935 }
2936
2937 tracing_start();
2938 mutex_unlock(&trace_types_lock);
2939
2940 return ret;
2941}
2942
2822 2943
2823/** 2944/**
2824 * tracing_update_buffers - used by tracing facility to expand ring buffers 2945 * tracing_update_buffers - used by tracing facility to expand ring buffers
@@ -2836,7 +2957,7 @@ int tracing_update_buffers(void)
2836 2957
2837 mutex_lock(&trace_types_lock); 2958 mutex_lock(&trace_types_lock);
2838 if (!ring_buffer_expanded) 2959 if (!ring_buffer_expanded)
2839 ret = tracing_resize_ring_buffer(trace_buf_size); 2960 ret = __tracing_resize_ring_buffer(trace_buf_size);
2840 mutex_unlock(&trace_types_lock); 2961 mutex_unlock(&trace_types_lock);
2841 2962
2842 return ret; 2963 return ret;
@@ -2860,7 +2981,7 @@ static int tracing_set_tracer(const char *buf)
2860 mutex_lock(&trace_types_lock); 2981 mutex_lock(&trace_types_lock);
2861 2982
2862 if (!ring_buffer_expanded) { 2983 if (!ring_buffer_expanded) {
2863 ret = tracing_resize_ring_buffer(trace_buf_size); 2984 ret = __tracing_resize_ring_buffer(trace_buf_size);
2864 if (ret < 0) 2985 if (ret < 0)
2865 goto out; 2986 goto out;
2866 ret = 0; 2987 ret = 0;
@@ -2966,20 +3087,11 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
2966 size_t cnt, loff_t *ppos) 3087 size_t cnt, loff_t *ppos)
2967{ 3088{
2968 unsigned long *ptr = filp->private_data; 3089 unsigned long *ptr = filp->private_data;
2969 char buf[64];
2970 unsigned long val; 3090 unsigned long val;
2971 int ret; 3091 int ret;
2972 3092
2973 if (cnt >= sizeof(buf)) 3093 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
2974 return -EINVAL; 3094 if (ret)
2975
2976 if (copy_from_user(&buf, ubuf, cnt))
2977 return -EFAULT;
2978
2979 buf[cnt] = 0;
2980
2981 ret = strict_strtoul(buf, 10, &val);
2982 if (ret < 0)
2983 return ret; 3095 return ret;
2984 3096
2985 *ptr = val * 1000; 3097 *ptr = val * 1000;
@@ -3434,67 +3546,54 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
3434 size_t cnt, loff_t *ppos) 3546 size_t cnt, loff_t *ppos)
3435{ 3547{
3436 unsigned long val; 3548 unsigned long val;
3437 char buf[64]; 3549 int ret;
3438 int ret, cpu;
3439
3440 if (cnt >= sizeof(buf))
3441 return -EINVAL;
3442
3443 if (copy_from_user(&buf, ubuf, cnt))
3444 return -EFAULT;
3445
3446 buf[cnt] = 0;
3447 3550
3448 ret = strict_strtoul(buf, 10, &val); 3551 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3449 if (ret < 0) 3552 if (ret)
3450 return ret; 3553 return ret;
3451 3554
3452 /* must have at least 1 entry */ 3555 /* must have at least 1 entry */
3453 if (!val) 3556 if (!val)
3454 return -EINVAL; 3557 return -EINVAL;
3455 3558
3456 mutex_lock(&trace_types_lock);
3457
3458 tracing_stop();
3459
3460 /* disable all cpu buffers */
3461 for_each_tracing_cpu(cpu) {
3462 if (global_trace.data[cpu])
3463 atomic_inc(&global_trace.data[cpu]->disabled);
3464 if (max_tr.data[cpu])
3465 atomic_inc(&max_tr.data[cpu]->disabled);
3466 }
3467
3468 /* value is in KB */ 3559 /* value is in KB */
3469 val <<= 10; 3560 val <<= 10;
3470 3561
3471 if (val != global_trace.entries) { 3562 ret = tracing_resize_ring_buffer(val);
3472 ret = tracing_resize_ring_buffer(val); 3563 if (ret < 0)
3473 if (ret < 0) { 3564 return ret;
3474 cnt = ret;
3475 goto out;
3476 }
3477 }
3478 3565
3479 *ppos += cnt; 3566 *ppos += cnt;
3480 3567
3481 /* If check pages failed, return ENOMEM */ 3568 return cnt;
3482 if (tracing_disabled) 3569}
3483 cnt = -ENOMEM;
3484 out:
3485 for_each_tracing_cpu(cpu) {
3486 if (global_trace.data[cpu])
3487 atomic_dec(&global_trace.data[cpu]->disabled);
3488 if (max_tr.data[cpu])
3489 atomic_dec(&max_tr.data[cpu]->disabled);
3490 }
3491 3570
3492 tracing_start(); 3571static ssize_t
3493 mutex_unlock(&trace_types_lock); 3572tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
3573 size_t cnt, loff_t *ppos)
3574{
3575 /*
3576 * There is no need to read what the user has written, this function
3577 * is just to make sure that there is no error when "echo" is used
3578 */
3579
3580 *ppos += cnt;
3494 3581
3495 return cnt; 3582 return cnt;
3496} 3583}
3497 3584
3585static int
3586tracing_free_buffer_release(struct inode *inode, struct file *filp)
3587{
3588 /* disable tracing ? */
3589 if (trace_flags & TRACE_ITER_STOP_ON_FREE)
3590 tracing_off();
3591 /* resize the ring buffer to 0 */
3592 tracing_resize_ring_buffer(0);
3593
3594 return 0;
3595}
3596
3498static int mark_printk(const char *fmt, ...) 3597static int mark_printk(const char *fmt, ...)
3499{ 3598{
3500 int ret; 3599 int ret;
@@ -3640,6 +3739,11 @@ static const struct file_operations tracing_entries_fops = {
3640 .llseek = generic_file_llseek, 3739 .llseek = generic_file_llseek,
3641}; 3740};
3642 3741
3742static const struct file_operations tracing_free_buffer_fops = {
3743 .write = tracing_free_buffer_write,
3744 .release = tracing_free_buffer_release,
3745};
3746
3643static const struct file_operations tracing_mark_fops = { 3747static const struct file_operations tracing_mark_fops = {
3644 .open = tracing_open_generic, 3748 .open = tracing_open_generic,
3645 .write = tracing_mark_write, 3749 .write = tracing_mark_write,
@@ -3696,7 +3800,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3696 return 0; 3800 return 0;
3697 3801
3698 if (!info->spare) 3802 if (!info->spare)
3699 info->spare = ring_buffer_alloc_read_page(info->tr->buffer); 3803 info->spare = ring_buffer_alloc_read_page(info->tr->buffer, info->cpu);
3700 if (!info->spare) 3804 if (!info->spare)
3701 return -ENOMEM; 3805 return -ENOMEM;
3702 3806
@@ -3704,8 +3808,6 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3704 if (info->read < PAGE_SIZE) 3808 if (info->read < PAGE_SIZE)
3705 goto read; 3809 goto read;
3706 3810
3707 info->read = 0;
3708
3709 trace_access_lock(info->cpu); 3811 trace_access_lock(info->cpu);
3710 ret = ring_buffer_read_page(info->tr->buffer, 3812 ret = ring_buffer_read_page(info->tr->buffer,
3711 &info->spare, 3813 &info->spare,
@@ -3715,6 +3817,8 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3715 if (ret < 0) 3817 if (ret < 0)
3716 return 0; 3818 return 0;
3717 3819
3820 info->read = 0;
3821
3718read: 3822read:
3719 size = PAGE_SIZE - info->read; 3823 size = PAGE_SIZE - info->read;
3720 if (size > count) 3824 if (size > count)
@@ -3853,7 +3957,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3853 3957
3854 ref->ref = 1; 3958 ref->ref = 1;
3855 ref->buffer = info->tr->buffer; 3959 ref->buffer = info->tr->buffer;
3856 ref->page = ring_buffer_alloc_read_page(ref->buffer); 3960 ref->page = ring_buffer_alloc_read_page(ref->buffer, info->cpu);
3857 if (!ref->page) { 3961 if (!ref->page) {
3858 kfree(ref); 3962 kfree(ref);
3859 break; 3963 break;
@@ -3862,8 +3966,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3862 r = ring_buffer_read_page(ref->buffer, &ref->page, 3966 r = ring_buffer_read_page(ref->buffer, &ref->page,
3863 len, info->cpu, 1); 3967 len, info->cpu, 1);
3864 if (r < 0) { 3968 if (r < 0) {
3865 ring_buffer_free_read_page(ref->buffer, 3969 ring_buffer_free_read_page(ref->buffer, ref->page);
3866 ref->page);
3867 kfree(ref); 3970 kfree(ref);
3868 break; 3971 break;
3869 } 3972 }
@@ -4099,19 +4202,10 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
4099{ 4202{
4100 struct trace_option_dentry *topt = filp->private_data; 4203 struct trace_option_dentry *topt = filp->private_data;
4101 unsigned long val; 4204 unsigned long val;
4102 char buf[64];
4103 int ret; 4205 int ret;
4104 4206
4105 if (cnt >= sizeof(buf)) 4207 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4106 return -EINVAL; 4208 if (ret)
4107
4108 if (copy_from_user(&buf, ubuf, cnt))
4109 return -EFAULT;
4110
4111 buf[cnt] = 0;
4112
4113 ret = strict_strtoul(buf, 10, &val);
4114 if (ret < 0)
4115 return ret; 4209 return ret;
4116 4210
4117 if (val != 0 && val != 1) 4211 if (val != 0 && val != 1)
@@ -4159,20 +4253,11 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
4159 loff_t *ppos) 4253 loff_t *ppos)
4160{ 4254{
4161 long index = (long)filp->private_data; 4255 long index = (long)filp->private_data;
4162 char buf[64];
4163 unsigned long val; 4256 unsigned long val;
4164 int ret; 4257 int ret;
4165 4258
4166 if (cnt >= sizeof(buf)) 4259 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4167 return -EINVAL; 4260 if (ret)
4168
4169 if (copy_from_user(&buf, ubuf, cnt))
4170 return -EFAULT;
4171
4172 buf[cnt] = 0;
4173
4174 ret = strict_strtoul(buf, 10, &val);
4175 if (ret < 0)
4176 return ret; 4261 return ret;
4177 4262
4178 if (val != 0 && val != 1) 4263 if (val != 0 && val != 1)
@@ -4365,6 +4450,9 @@ static __init int tracer_init_debugfs(void)
4365 trace_create_file("buffer_size_kb", 0644, d_tracer, 4450 trace_create_file("buffer_size_kb", 0644, d_tracer,
4366 &global_trace, &tracing_entries_fops); 4451 &global_trace, &tracing_entries_fops);
4367 4452
4453 trace_create_file("free_buffer", 0644, d_tracer,
4454 &global_trace, &tracing_free_buffer_fops);
4455
4368 trace_create_file("trace_marker", 0220, d_tracer, 4456 trace_create_file("trace_marker", 0220, d_tracer,
4369 NULL, &tracing_mark_fops); 4457 NULL, &tracing_mark_fops);
4370 4458
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 229f8591f61..616846bcfee 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -2,7 +2,7 @@
2#define _LINUX_KERNEL_TRACE_H 2#define _LINUX_KERNEL_TRACE_H
3 3
4#include <linux/fs.h> 4#include <linux/fs.h>
5#include <asm/atomic.h> 5#include <linux/atomic.h>
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/clocksource.h> 7#include <linux/clocksource.h>
8#include <linux/ring_buffer.h> 8#include <linux/ring_buffer.h>
@@ -278,6 +278,29 @@ struct tracer {
278}; 278};
279 279
280 280
281/* Only current can touch trace_recursion */
282#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
283#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
284
285/* Ring buffer has the 10 LSB bits to count */
286#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
287
288/* for function tracing recursion */
289#define TRACE_INTERNAL_BIT (1<<11)
290#define TRACE_GLOBAL_BIT (1<<12)
291/*
292 * Abuse of the trace_recursion.
293 * As we need a way to maintain state if we are tracing the function
294 * graph in irq because we want to trace a particular function that
295 * was called in irq context but we have irq tracing off. Since this
296 * can only be modified by current, we can reuse trace_recursion.
297 */
298#define TRACE_IRQ_BIT (1<<13)
299
300#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0)
301#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0)
302#define trace_recursion_test(bit) ((current)->trace_recursion & (bit))
303
281#define TRACE_PIPE_ALL_CPU -1 304#define TRACE_PIPE_ALL_CPU -1
282 305
283int tracer_init(struct tracer *t, struct trace_array *tr); 306int tracer_init(struct tracer *t, struct trace_array *tr);
@@ -389,6 +412,9 @@ void update_max_tr_single(struct trace_array *tr,
389void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, 412void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
390 int skip, int pc); 413 int skip, int pc);
391 414
415void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
416 int skip, int pc, struct pt_regs *regs);
417
392void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, 418void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,
393 int pc); 419 int pc);
394 420
@@ -400,6 +426,12 @@ static inline void ftrace_trace_stack(struct ring_buffer *buffer,
400{ 426{
401} 427}
402 428
429static inline void ftrace_trace_stack_regs(struct ring_buffer *buffer,
430 unsigned long flags, int skip,
431 int pc, struct pt_regs *regs)
432{
433}
434
403static inline void ftrace_trace_userstack(struct ring_buffer *buffer, 435static inline void ftrace_trace_userstack(struct ring_buffer *buffer,
404 unsigned long flags, int pc) 436 unsigned long flags, int pc)
405{ 437{
@@ -507,8 +539,18 @@ static inline int ftrace_graph_addr(unsigned long addr)
507 return 1; 539 return 1;
508 540
509 for (i = 0; i < ftrace_graph_count; i++) { 541 for (i = 0; i < ftrace_graph_count; i++) {
510 if (addr == ftrace_graph_funcs[i]) 542 if (addr == ftrace_graph_funcs[i]) {
543 /*
544 * If no irqs are to be traced, but a set_graph_function
545 * is set, and called by an interrupt handler, we still
546 * want to trace it.
547 */
548 if (in_irq())
549 trace_recursion_set(TRACE_IRQ_BIT);
550 else
551 trace_recursion_clear(TRACE_IRQ_BIT);
511 return 1; 552 return 1;
553 }
512 } 554 }
513 555
514 return 0; 556 return 0;
@@ -609,6 +651,7 @@ enum trace_iterator_flags {
609 TRACE_ITER_GRAPH_TIME = 0x80000, 651 TRACE_ITER_GRAPH_TIME = 0x80000,
610 TRACE_ITER_RECORD_CMD = 0x100000, 652 TRACE_ITER_RECORD_CMD = 0x100000,
611 TRACE_ITER_OVERWRITE = 0x200000, 653 TRACE_ITER_OVERWRITE = 0x200000,
654 TRACE_ITER_STOP_ON_FREE = 0x400000,
612}; 655};
613 656
614/* 657/*
@@ -677,6 +720,7 @@ struct event_subsystem {
677 struct dentry *entry; 720 struct dentry *entry;
678 struct event_filter *filter; 721 struct event_filter *filter;
679 int nr_events; 722 int nr_events;
723 int ref_count;
680}; 724};
681 725
682#define FILTER_PRED_INVALID ((unsigned short)-1) 726#define FILTER_PRED_INVALID ((unsigned short)-1)
@@ -784,19 +828,4 @@ extern const char *__stop___trace_bprintk_fmt[];
784 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) 828 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
785#include "trace_entries.h" 829#include "trace_entries.h"
786 830
787/* Only current can touch trace_recursion */
788#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
789#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
790
791/* Ring buffer has the 10 LSB bits to count */
792#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
793
794/* for function tracing recursion */
795#define TRACE_INTERNAL_BIT (1<<11)
796#define TRACE_GLOBAL_BIT (1<<12)
797
798#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0)
799#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0)
800#define trace_recursion_test(bit) ((current)->trace_recursion & (bit))
801
802#endif /* _LINUX_KERNEL_TRACE_H */ 831#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index e32744c84d9..93365907f21 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -161,7 +161,8 @@ FTRACE_ENTRY(kernel_stack, stack_entry,
161 TRACE_STACK, 161 TRACE_STACK,
162 162
163 F_STRUCT( 163 F_STRUCT(
164 __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) 164 __field( int, size )
165 __dynamic_array(unsigned long, caller )
165 ), 166 ),
166 167
167 F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" 168 F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 686ec399f2a..c212a7f934e 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -244,6 +244,35 @@ static void ftrace_clear_events(void)
244 mutex_unlock(&event_mutex); 244 mutex_unlock(&event_mutex);
245} 245}
246 246
247static void __put_system(struct event_subsystem *system)
248{
249 struct event_filter *filter = system->filter;
250
251 WARN_ON_ONCE(system->ref_count == 0);
252 if (--system->ref_count)
253 return;
254
255 if (filter) {
256 kfree(filter->filter_string);
257 kfree(filter);
258 }
259 kfree(system->name);
260 kfree(system);
261}
262
263static void __get_system(struct event_subsystem *system)
264{
265 WARN_ON_ONCE(system->ref_count == 0);
266 system->ref_count++;
267}
268
269static void put_system(struct event_subsystem *system)
270{
271 mutex_lock(&event_mutex);
272 __put_system(system);
273 mutex_unlock(&event_mutex);
274}
275
247/* 276/*
248 * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. 277 * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
249 */ 278 */
@@ -486,20 +515,11 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
486 loff_t *ppos) 515 loff_t *ppos)
487{ 516{
488 struct ftrace_event_call *call = filp->private_data; 517 struct ftrace_event_call *call = filp->private_data;
489 char buf[64];
490 unsigned long val; 518 unsigned long val;
491 int ret; 519 int ret;
492 520
493 if (cnt >= sizeof(buf)) 521 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
494 return -EINVAL; 522 if (ret)
495
496 if (copy_from_user(&buf, ubuf, cnt))
497 return -EFAULT;
498
499 buf[cnt] = 0;
500
501 ret = strict_strtoul(buf, 10, &val);
502 if (ret < 0)
503 return ret; 523 return ret;
504 524
505 ret = tracing_update_buffers(); 525 ret = tracing_update_buffers();
@@ -528,7 +548,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
528 loff_t *ppos) 548 loff_t *ppos)
529{ 549{
530 const char set_to_char[4] = { '?', '0', '1', 'X' }; 550 const char set_to_char[4] = { '?', '0', '1', 'X' };
531 const char *system = filp->private_data; 551 struct event_subsystem *system = filp->private_data;
532 struct ftrace_event_call *call; 552 struct ftrace_event_call *call;
533 char buf[2]; 553 char buf[2];
534 int set = 0; 554 int set = 0;
@@ -539,7 +559,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
539 if (!call->name || !call->class || !call->class->reg) 559 if (!call->name || !call->class || !call->class->reg)
540 continue; 560 continue;
541 561
542 if (system && strcmp(call->class->system, system) != 0) 562 if (system && strcmp(call->class->system, system->name) != 0)
543 continue; 563 continue;
544 564
545 /* 565 /*
@@ -569,21 +589,13 @@ static ssize_t
569system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, 589system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
570 loff_t *ppos) 590 loff_t *ppos)
571{ 591{
572 const char *system = filp->private_data; 592 struct event_subsystem *system = filp->private_data;
593 const char *name = NULL;
573 unsigned long val; 594 unsigned long val;
574 char buf[64];
575 ssize_t ret; 595 ssize_t ret;
576 596
577 if (cnt >= sizeof(buf)) 597 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
578 return -EINVAL; 598 if (ret)
579
580 if (copy_from_user(&buf, ubuf, cnt))
581 return -EFAULT;
582
583 buf[cnt] = 0;
584
585 ret = strict_strtoul(buf, 10, &val);
586 if (ret < 0)
587 return ret; 599 return ret;
588 600
589 ret = tracing_update_buffers(); 601 ret = tracing_update_buffers();
@@ -593,7 +605,14 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
593 if (val != 0 && val != 1) 605 if (val != 0 && val != 1)
594 return -EINVAL; 606 return -EINVAL;
595 607
596 ret = __ftrace_set_clr_event(NULL, system, NULL, val); 608 /*
609 * Opening of "enable" adds a ref count to system,
610 * so the name is safe to use.
611 */
612 if (system)
613 name = system->name;
614
615 ret = __ftrace_set_clr_event(NULL, name, NULL, val);
597 if (ret) 616 if (ret)
598 goto out; 617 goto out;
599 618
@@ -826,6 +845,52 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
826 return cnt; 845 return cnt;
827} 846}
828 847
848static LIST_HEAD(event_subsystems);
849
850static int subsystem_open(struct inode *inode, struct file *filp)
851{
852 struct event_subsystem *system = NULL;
853 int ret;
854
855 if (!inode->i_private)
856 goto skip_search;
857
858 /* Make sure the system still exists */
859 mutex_lock(&event_mutex);
860 list_for_each_entry(system, &event_subsystems, list) {
861 if (system == inode->i_private) {
862 /* Don't open systems with no events */
863 if (!system->nr_events) {
864 system = NULL;
865 break;
866 }
867 __get_system(system);
868 break;
869 }
870 }
871 mutex_unlock(&event_mutex);
872
873 if (system != inode->i_private)
874 return -ENODEV;
875
876 skip_search:
877 ret = tracing_open_generic(inode, filp);
878 if (ret < 0 && system)
879 put_system(system);
880
881 return ret;
882}
883
884static int subsystem_release(struct inode *inode, struct file *file)
885{
886 struct event_subsystem *system = inode->i_private;
887
888 if (system)
889 put_system(system);
890
891 return 0;
892}
893
829static ssize_t 894static ssize_t
830subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, 895subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
831 loff_t *ppos) 896 loff_t *ppos)
@@ -963,17 +1028,19 @@ static const struct file_operations ftrace_event_filter_fops = {
963}; 1028};
964 1029
965static const struct file_operations ftrace_subsystem_filter_fops = { 1030static const struct file_operations ftrace_subsystem_filter_fops = {
966 .open = tracing_open_generic, 1031 .open = subsystem_open,
967 .read = subsystem_filter_read, 1032 .read = subsystem_filter_read,
968 .write = subsystem_filter_write, 1033 .write = subsystem_filter_write,
969 .llseek = default_llseek, 1034 .llseek = default_llseek,
1035 .release = subsystem_release,
970}; 1036};
971 1037
972static const struct file_operations ftrace_system_enable_fops = { 1038static const struct file_operations ftrace_system_enable_fops = {
973 .open = tracing_open_generic, 1039 .open = subsystem_open,
974 .read = system_enable_read, 1040 .read = system_enable_read,
975 .write = system_enable_write, 1041 .write = system_enable_write,
976 .llseek = default_llseek, 1042 .llseek = default_llseek,
1043 .release = subsystem_release,
977}; 1044};
978 1045
979static const struct file_operations ftrace_show_header_fops = { 1046static const struct file_operations ftrace_show_header_fops = {
@@ -1002,8 +1069,6 @@ static struct dentry *event_trace_events_dir(void)
1002 return d_events; 1069 return d_events;
1003} 1070}
1004 1071
1005static LIST_HEAD(event_subsystems);
1006
1007static struct dentry * 1072static struct dentry *
1008event_subsystem_dir(const char *name, struct dentry *d_events) 1073event_subsystem_dir(const char *name, struct dentry *d_events)
1009{ 1074{
@@ -1035,6 +1100,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
1035 } 1100 }
1036 1101
1037 system->nr_events = 1; 1102 system->nr_events = 1;
1103 system->ref_count = 1;
1038 system->name = kstrdup(name, GFP_KERNEL); 1104 system->name = kstrdup(name, GFP_KERNEL);
1039 if (!system->name) { 1105 if (!system->name) {
1040 debugfs_remove(system->entry); 1106 debugfs_remove(system->entry);
@@ -1062,8 +1128,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
1062 "'%s/filter' entry\n", name); 1128 "'%s/filter' entry\n", name);
1063 } 1129 }
1064 1130
1065 trace_create_file("enable", 0644, system->entry, 1131 trace_create_file("enable", 0644, system->entry, system,
1066 (void *)system->name,
1067 &ftrace_system_enable_fops); 1132 &ftrace_system_enable_fops);
1068 1133
1069 return system->entry; 1134 return system->entry;
@@ -1184,16 +1249,9 @@ static void remove_subsystem_dir(const char *name)
1184 list_for_each_entry(system, &event_subsystems, list) { 1249 list_for_each_entry(system, &event_subsystems, list) {
1185 if (strcmp(system->name, name) == 0) { 1250 if (strcmp(system->name, name) == 0) {
1186 if (!--system->nr_events) { 1251 if (!--system->nr_events) {
1187 struct event_filter *filter = system->filter;
1188
1189 debugfs_remove_recursive(system->entry); 1252 debugfs_remove_recursive(system->entry);
1190 list_del(&system->list); 1253 list_del(&system->list);
1191 if (filter) { 1254 __put_system(system);
1192 kfree(filter->filter_string);
1193 kfree(filter);
1194 }
1195 kfree(system->name);
1196 kfree(system);
1197 } 1255 }
1198 break; 1256 break;
1199 } 1257 }
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 8008ddcfbf2..bd3c6369f80 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1766,7 +1766,7 @@ static int replace_system_preds(struct event_subsystem *system,
1766 * replace the filter for the call. 1766 * replace the filter for the call.
1767 */ 1767 */
1768 filter = call->filter; 1768 filter = call->filter;
1769 call->filter = filter_item->filter; 1769 rcu_assign_pointer(call->filter, filter_item->filter);
1770 filter_item->filter = filter; 1770 filter_item->filter = filter;
1771 1771
1772 fail = false; 1772 fail = false;
@@ -1821,7 +1821,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1821 filter = call->filter; 1821 filter = call->filter;
1822 if (!filter) 1822 if (!filter)
1823 goto out_unlock; 1823 goto out_unlock;
1824 call->filter = NULL; 1824 RCU_INIT_POINTER(call->filter, NULL);
1825 /* Make sure the filter is not being used */ 1825 /* Make sure the filter is not being used */
1826 synchronize_sched(); 1826 synchronize_sched();
1827 __free_filter(filter); 1827 __free_filter(filter);
@@ -1862,7 +1862,7 @@ out:
1862 * string 1862 * string
1863 */ 1863 */
1864 tmp = call->filter; 1864 tmp = call->filter;
1865 call->filter = filter; 1865 rcu_assign_pointer(call->filter, filter);
1866 if (tmp) { 1866 if (tmp) {
1867 /* Make sure the call is done with the filter */ 1867 /* Make sure the call is done with the filter */
1868 synchronize_sched(); 1868 synchronize_sched();
@@ -1886,6 +1886,12 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1886 1886
1887 mutex_lock(&event_mutex); 1887 mutex_lock(&event_mutex);
1888 1888
1889 /* Make sure the system still has events */
1890 if (!system->nr_events) {
1891 err = -ENODEV;
1892 goto out_unlock;
1893 }
1894
1889 if (!strcmp(strstrip(filter_string), "0")) { 1895 if (!strcmp(strstrip(filter_string), "0")) {
1890 filter_free_subsystem_preds(system); 1896 filter_free_subsystem_preds(system);
1891 remove_filter_string(system->filter); 1897 remove_filter_string(system->filter);
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 8d0e1cc4e97..c7b0c6a7db0 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -324,7 +324,8 @@ ftrace_trace_onoff_unreg(char *glob, char *cmd, char *param)
324} 324}
325 325
326static int 326static int
327ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable) 327ftrace_trace_onoff_callback(struct ftrace_hash *hash,
328 char *glob, char *cmd, char *param, int enable)
328{ 329{
329 struct ftrace_probe_ops *ops; 330 struct ftrace_probe_ops *ops;
330 void *count = (void *)-1; 331 void *count = (void *)-1;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 962cdb24ed8..a7d2a4c653d 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -74,6 +74,20 @@ static struct tracer_flags tracer_flags = {
74 74
75static struct trace_array *graph_array; 75static struct trace_array *graph_array;
76 76
77/*
78 * DURATION column is being also used to display IRQ signs,
79 * following values are used by print_graph_irq and others
80 * to fill in space into DURATION column.
81 */
82enum {
83 DURATION_FILL_FULL = -1,
84 DURATION_FILL_START = -2,
85 DURATION_FILL_END = -3,
86};
87
88static enum print_line_t
89print_graph_duration(unsigned long long duration, struct trace_seq *s,
90 u32 flags);
77 91
78/* Add a function return address to the trace stack on thread info.*/ 92/* Add a function return address to the trace stack on thread info.*/
79int 93int
@@ -213,7 +227,7 @@ int __trace_graph_entry(struct trace_array *tr,
213 227
214static inline int ftrace_graph_ignore_irqs(void) 228static inline int ftrace_graph_ignore_irqs(void)
215{ 229{
216 if (!ftrace_graph_skip_irqs) 230 if (!ftrace_graph_skip_irqs || trace_recursion_test(TRACE_IRQ_BIT))
217 return 0; 231 return 0;
218 232
219 return in_irq(); 233 return in_irq();
@@ -577,32 +591,6 @@ get_return_for_leaf(struct trace_iterator *iter,
577 return next; 591 return next;
578} 592}
579 593
580/* Signal a overhead of time execution to the output */
581static int
582print_graph_overhead(unsigned long long duration, struct trace_seq *s,
583 u32 flags)
584{
585 /* If duration disappear, we don't need anything */
586 if (!(flags & TRACE_GRAPH_PRINT_DURATION))
587 return 1;
588
589 /* Non nested entry or return */
590 if (duration == -1)
591 return trace_seq_printf(s, " ");
592
593 if (flags & TRACE_GRAPH_PRINT_OVERHEAD) {
594 /* Duration exceeded 100 msecs */
595 if (duration > 100000ULL)
596 return trace_seq_printf(s, "! ");
597
598 /* Duration exceeded 10 msecs */
599 if (duration > 10000ULL)
600 return trace_seq_printf(s, "+ ");
601 }
602
603 return trace_seq_printf(s, " ");
604}
605
606static int print_graph_abs_time(u64 t, struct trace_seq *s) 594static int print_graph_abs_time(u64 t, struct trace_seq *s)
607{ 595{
608 unsigned long usecs_rem; 596 unsigned long usecs_rem;
@@ -625,34 +613,36 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
625 addr >= (unsigned long)__irqentry_text_end) 613 addr >= (unsigned long)__irqentry_text_end)
626 return TRACE_TYPE_UNHANDLED; 614 return TRACE_TYPE_UNHANDLED;
627 615
628 /* Absolute time */ 616 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
629 if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { 617 /* Absolute time */
630 ret = print_graph_abs_time(iter->ts, s); 618 if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
631 if (!ret) 619 ret = print_graph_abs_time(iter->ts, s);
632 return TRACE_TYPE_PARTIAL_LINE; 620 if (!ret)
633 } 621 return TRACE_TYPE_PARTIAL_LINE;
622 }
634 623
635 /* Cpu */ 624 /* Cpu */
636 if (flags & TRACE_GRAPH_PRINT_CPU) { 625 if (flags & TRACE_GRAPH_PRINT_CPU) {
637 ret = print_graph_cpu(s, cpu); 626 ret = print_graph_cpu(s, cpu);
638 if (ret == TRACE_TYPE_PARTIAL_LINE) 627 if (ret == TRACE_TYPE_PARTIAL_LINE)
639 return TRACE_TYPE_PARTIAL_LINE; 628 return TRACE_TYPE_PARTIAL_LINE;
640 } 629 }
641 630
642 /* Proc */ 631 /* Proc */
643 if (flags & TRACE_GRAPH_PRINT_PROC) { 632 if (flags & TRACE_GRAPH_PRINT_PROC) {
644 ret = print_graph_proc(s, pid); 633 ret = print_graph_proc(s, pid);
645 if (ret == TRACE_TYPE_PARTIAL_LINE) 634 if (ret == TRACE_TYPE_PARTIAL_LINE)
646 return TRACE_TYPE_PARTIAL_LINE; 635 return TRACE_TYPE_PARTIAL_LINE;
647 ret = trace_seq_printf(s, " | "); 636 ret = trace_seq_printf(s, " | ");
648 if (!ret) 637 if (!ret)
649 return TRACE_TYPE_PARTIAL_LINE; 638 return TRACE_TYPE_PARTIAL_LINE;
639 }
650 } 640 }
651 641
652 /* No overhead */ 642 /* No overhead */
653 ret = print_graph_overhead(-1, s, flags); 643 ret = print_graph_duration(DURATION_FILL_START, s, flags);
654 if (!ret) 644 if (ret != TRACE_TYPE_HANDLED)
655 return TRACE_TYPE_PARTIAL_LINE; 645 return ret;
656 646
657 if (type == TRACE_GRAPH_ENT) 647 if (type == TRACE_GRAPH_ENT)
658 ret = trace_seq_printf(s, "==========>"); 648 ret = trace_seq_printf(s, "==========>");
@@ -662,9 +652,10 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
662 if (!ret) 652 if (!ret)
663 return TRACE_TYPE_PARTIAL_LINE; 653 return TRACE_TYPE_PARTIAL_LINE;
664 654
665 /* Don't close the duration column if haven't one */ 655 ret = print_graph_duration(DURATION_FILL_END, s, flags);
666 if (flags & TRACE_GRAPH_PRINT_DURATION) 656 if (ret != TRACE_TYPE_HANDLED)
667 trace_seq_printf(s, " |"); 657 return ret;
658
668 ret = trace_seq_printf(s, "\n"); 659 ret = trace_seq_printf(s, "\n");
669 660
670 if (!ret) 661 if (!ret)
@@ -716,9 +707,49 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
716} 707}
717 708
718static enum print_line_t 709static enum print_line_t
719print_graph_duration(unsigned long long duration, struct trace_seq *s) 710print_graph_duration(unsigned long long duration, struct trace_seq *s,
711 u32 flags)
720{ 712{
721 int ret; 713 int ret = -1;
714
715 if (!(flags & TRACE_GRAPH_PRINT_DURATION) ||
716 !(trace_flags & TRACE_ITER_CONTEXT_INFO))
717 return TRACE_TYPE_HANDLED;
718
719 /* No real adata, just filling the column with spaces */
720 switch (duration) {
721 case DURATION_FILL_FULL:
722 ret = trace_seq_printf(s, " | ");
723 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
724 case DURATION_FILL_START:
725 ret = trace_seq_printf(s, " ");
726 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
727 case DURATION_FILL_END:
728 ret = trace_seq_printf(s, " |");
729 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
730 }
731
732 /* Signal a overhead of time execution to the output */
733 if (flags & TRACE_GRAPH_PRINT_OVERHEAD) {
734 /* Duration exceeded 100 msecs */
735 if (duration > 100000ULL)
736 ret = trace_seq_printf(s, "! ");
737 /* Duration exceeded 10 msecs */
738 else if (duration > 10000ULL)
739 ret = trace_seq_printf(s, "+ ");
740 }
741
742 /*
743 * The -1 means we either did not exceed the duration tresholds
744 * or we dont want to print out the overhead. Either way we need
745 * to fill out the space.
746 */
747 if (ret == -1)
748 ret = trace_seq_printf(s, " ");
749
750 /* Catching here any failure happenned above */
751 if (!ret)
752 return TRACE_TYPE_PARTIAL_LINE;
722 753
723 ret = trace_print_graph_duration(duration, s); 754 ret = trace_print_graph_duration(duration, s);
724 if (ret != TRACE_TYPE_HANDLED) 755 if (ret != TRACE_TYPE_HANDLED)
@@ -767,18 +798,11 @@ print_graph_entry_leaf(struct trace_iterator *iter,
767 cpu_data->enter_funcs[call->depth] = 0; 798 cpu_data->enter_funcs[call->depth] = 0;
768 } 799 }
769 800
770 /* Overhead */ 801 /* Overhead and duration */
771 ret = print_graph_overhead(duration, s, flags); 802 ret = print_graph_duration(duration, s, flags);
772 if (!ret) 803 if (ret == TRACE_TYPE_PARTIAL_LINE)
773 return TRACE_TYPE_PARTIAL_LINE; 804 return TRACE_TYPE_PARTIAL_LINE;
774 805
775 /* Duration */
776 if (flags & TRACE_GRAPH_PRINT_DURATION) {
777 ret = print_graph_duration(duration, s);
778 if (ret == TRACE_TYPE_PARTIAL_LINE)
779 return TRACE_TYPE_PARTIAL_LINE;
780 }
781
782 /* Function */ 806 /* Function */
783 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { 807 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
784 ret = trace_seq_printf(s, " "); 808 ret = trace_seq_printf(s, " ");
@@ -815,17 +839,10 @@ print_graph_entry_nested(struct trace_iterator *iter,
815 cpu_data->enter_funcs[call->depth] = call->func; 839 cpu_data->enter_funcs[call->depth] = call->func;
816 } 840 }
817 841
818 /* No overhead */
819 ret = print_graph_overhead(-1, s, flags);
820 if (!ret)
821 return TRACE_TYPE_PARTIAL_LINE;
822
823 /* No time */ 842 /* No time */
824 if (flags & TRACE_GRAPH_PRINT_DURATION) { 843 ret = print_graph_duration(DURATION_FILL_FULL, s, flags);
825 ret = trace_seq_printf(s, " | "); 844 if (ret != TRACE_TYPE_HANDLED)
826 if (!ret) 845 return ret;
827 return TRACE_TYPE_PARTIAL_LINE;
828 }
829 846
830 /* Function */ 847 /* Function */
831 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { 848 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
@@ -865,6 +882,9 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
865 return TRACE_TYPE_PARTIAL_LINE; 882 return TRACE_TYPE_PARTIAL_LINE;
866 } 883 }
867 884
885 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
886 return 0;
887
868 /* Absolute time */ 888 /* Absolute time */
869 if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { 889 if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
870 ret = print_graph_abs_time(iter->ts, s); 890 ret = print_graph_abs_time(iter->ts, s);
@@ -1078,18 +1098,11 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
1078 if (print_graph_prologue(iter, s, 0, 0, flags)) 1098 if (print_graph_prologue(iter, s, 0, 0, flags))
1079 return TRACE_TYPE_PARTIAL_LINE; 1099 return TRACE_TYPE_PARTIAL_LINE;
1080 1100
1081 /* Overhead */ 1101 /* Overhead and duration */
1082 ret = print_graph_overhead(duration, s, flags); 1102 ret = print_graph_duration(duration, s, flags);
1083 if (!ret) 1103 if (ret == TRACE_TYPE_PARTIAL_LINE)
1084 return TRACE_TYPE_PARTIAL_LINE; 1104 return TRACE_TYPE_PARTIAL_LINE;
1085 1105
1086 /* Duration */
1087 if (flags & TRACE_GRAPH_PRINT_DURATION) {
1088 ret = print_graph_duration(duration, s);
1089 if (ret == TRACE_TYPE_PARTIAL_LINE)
1090 return TRACE_TYPE_PARTIAL_LINE;
1091 }
1092
1093 /* Closing brace */ 1106 /* Closing brace */
1094 for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { 1107 for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
1095 ret = trace_seq_printf(s, " "); 1108 ret = trace_seq_printf(s, " ");
@@ -1146,17 +1159,10 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
1146 if (print_graph_prologue(iter, s, 0, 0, flags)) 1159 if (print_graph_prologue(iter, s, 0, 0, flags))
1147 return TRACE_TYPE_PARTIAL_LINE; 1160 return TRACE_TYPE_PARTIAL_LINE;
1148 1161
1149 /* No overhead */
1150 ret = print_graph_overhead(-1, s, flags);
1151 if (!ret)
1152 return TRACE_TYPE_PARTIAL_LINE;
1153
1154 /* No time */ 1162 /* No time */
1155 if (flags & TRACE_GRAPH_PRINT_DURATION) { 1163 ret = print_graph_duration(DURATION_FILL_FULL, s, flags);
1156 ret = trace_seq_printf(s, " | "); 1164 if (ret != TRACE_TYPE_HANDLED)
1157 if (!ret) 1165 return ret;
1158 return TRACE_TYPE_PARTIAL_LINE;
1159 }
1160 1166
1161 /* Indentation */ 1167 /* Indentation */
1162 if (depth > 0) 1168 if (depth > 0)
@@ -1207,7 +1213,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
1207 1213
1208 1214
1209enum print_line_t 1215enum print_line_t
1210__print_graph_function_flags(struct trace_iterator *iter, u32 flags) 1216print_graph_function_flags(struct trace_iterator *iter, u32 flags)
1211{ 1217{
1212 struct ftrace_graph_ent_entry *field; 1218 struct ftrace_graph_ent_entry *field;
1213 struct fgraph_data *data = iter->private; 1219 struct fgraph_data *data = iter->private;
@@ -1270,18 +1276,7 @@ __print_graph_function_flags(struct trace_iterator *iter, u32 flags)
1270static enum print_line_t 1276static enum print_line_t
1271print_graph_function(struct trace_iterator *iter) 1277print_graph_function(struct trace_iterator *iter)
1272{ 1278{
1273 return __print_graph_function_flags(iter, tracer_flags.val); 1279 return print_graph_function_flags(iter, tracer_flags.val);
1274}
1275
1276enum print_line_t print_graph_function_flags(struct trace_iterator *iter,
1277 u32 flags)
1278{
1279 if (trace_flags & TRACE_ITER_LATENCY_FMT)
1280 flags |= TRACE_GRAPH_PRINT_DURATION;
1281 else
1282 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
1283
1284 return __print_graph_function_flags(iter, flags);
1285} 1280}
1286 1281
1287static enum print_line_t 1282static enum print_line_t
@@ -1309,8 +1304,7 @@ static void print_lat_header(struct seq_file *s, u32 flags)
1309 seq_printf(s, "#%.*s / _----=> need-resched \n", size, spaces); 1304 seq_printf(s, "#%.*s / _----=> need-resched \n", size, spaces);
1310 seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces); 1305 seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces);
1311 seq_printf(s, "#%.*s|| / _--=> preempt-depth \n", size, spaces); 1306 seq_printf(s, "#%.*s|| / _--=> preempt-depth \n", size, spaces);
1312 seq_printf(s, "#%.*s||| / _-=> lock-depth \n", size, spaces); 1307 seq_printf(s, "#%.*s||| / \n", size, spaces);
1313 seq_printf(s, "#%.*s|||| / \n", size, spaces);
1314} 1308}
1315 1309
1316static void __print_graph_headers_flags(struct seq_file *s, u32 flags) 1310static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
@@ -1329,7 +1323,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
1329 if (flags & TRACE_GRAPH_PRINT_PROC) 1323 if (flags & TRACE_GRAPH_PRINT_PROC)
1330 seq_printf(s, " TASK/PID "); 1324 seq_printf(s, " TASK/PID ");
1331 if (lat) 1325 if (lat)
1332 seq_printf(s, "|||||"); 1326 seq_printf(s, "||||");
1333 if (flags & TRACE_GRAPH_PRINT_DURATION) 1327 if (flags & TRACE_GRAPH_PRINT_DURATION)
1334 seq_printf(s, " DURATION "); 1328 seq_printf(s, " DURATION ");
1335 seq_printf(s, " FUNCTION CALLS\n"); 1329 seq_printf(s, " FUNCTION CALLS\n");
@@ -1343,7 +1337,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
1343 if (flags & TRACE_GRAPH_PRINT_PROC) 1337 if (flags & TRACE_GRAPH_PRINT_PROC)
1344 seq_printf(s, " | | "); 1338 seq_printf(s, " | | ");
1345 if (lat) 1339 if (lat)
1346 seq_printf(s, "|||||"); 1340 seq_printf(s, "||||");
1347 if (flags & TRACE_GRAPH_PRINT_DURATION) 1341 if (flags & TRACE_GRAPH_PRINT_DURATION)
1348 seq_printf(s, " | | "); 1342 seq_printf(s, " | | ");
1349 seq_printf(s, " | | | |\n"); 1343 seq_printf(s, " | | | |\n");
@@ -1358,15 +1352,16 @@ void print_graph_headers_flags(struct seq_file *s, u32 flags)
1358{ 1352{
1359 struct trace_iterator *iter = s->private; 1353 struct trace_iterator *iter = s->private;
1360 1354
1355 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
1356 return;
1357
1361 if (trace_flags & TRACE_ITER_LATENCY_FMT) { 1358 if (trace_flags & TRACE_ITER_LATENCY_FMT) {
1362 /* print nothing if the buffers are empty */ 1359 /* print nothing if the buffers are empty */
1363 if (trace_empty(iter)) 1360 if (trace_empty(iter))
1364 return; 1361 return;
1365 1362
1366 print_trace_header(s, iter); 1363 print_trace_header(s, iter);
1367 flags |= TRACE_GRAPH_PRINT_DURATION; 1364 }
1368 } else
1369 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
1370 1365
1371 __print_graph_headers_flags(s, flags); 1366 __print_graph_headers_flags(s, flags);
1372} 1367}
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index c77424be284..667aa8cc0cf 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -226,7 +226,9 @@ static void irqsoff_trace_close(struct trace_iterator *iter)
226} 226}
227 227
228#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \ 228#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \
229 TRACE_GRAPH_PRINT_PROC) 229 TRACE_GRAPH_PRINT_PROC | \
230 TRACE_GRAPH_PRINT_ABS_TIME | \
231 TRACE_GRAPH_PRINT_DURATION)
230 232
231static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) 233static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
232{ 234{
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 27d13b36b8b..00d527c945a 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -343,6 +343,14 @@ DEFINE_BASIC_FETCH_FUNCS(deref)
343DEFINE_FETCH_deref(string) 343DEFINE_FETCH_deref(string)
344DEFINE_FETCH_deref(string_size) 344DEFINE_FETCH_deref(string_size)
345 345
346static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data)
347{
348 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
349 update_deref_fetch_param(data->orig.data);
350 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
351 update_symbol_cache(data->orig.data);
352}
353
346static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) 354static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
347{ 355{
348 if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) 356 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
@@ -377,6 +385,19 @@ DEFINE_BASIC_FETCH_FUNCS(bitfield)
377#define fetch_bitfield_string_size NULL 385#define fetch_bitfield_string_size NULL
378 386
379static __kprobes void 387static __kprobes void
388update_bitfield_fetch_param(struct bitfield_fetch_param *data)
389{
390 /*
391 * Don't check the bitfield itself, because this must be the
392 * last fetch function.
393 */
394 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
395 update_deref_fetch_param(data->orig.data);
396 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
397 update_symbol_cache(data->orig.data);
398}
399
400static __kprobes void
380free_bitfield_fetch_param(struct bitfield_fetch_param *data) 401free_bitfield_fetch_param(struct bitfield_fetch_param *data)
381{ 402{
382 /* 403 /*
@@ -389,6 +410,7 @@ free_bitfield_fetch_param(struct bitfield_fetch_param *data)
389 free_symbol_cache(data->orig.data); 410 free_symbol_cache(data->orig.data);
390 kfree(data); 411 kfree(data);
391} 412}
413
392/* Default (unsigned long) fetch type */ 414/* Default (unsigned long) fetch type */
393#define __DEFAULT_FETCH_TYPE(t) u##t 415#define __DEFAULT_FETCH_TYPE(t) u##t
394#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) 416#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
@@ -536,6 +558,7 @@ struct probe_arg {
536/* Flags for trace_probe */ 558/* Flags for trace_probe */
537#define TP_FLAG_TRACE 1 559#define TP_FLAG_TRACE 1
538#define TP_FLAG_PROFILE 2 560#define TP_FLAG_PROFILE 2
561#define TP_FLAG_REGISTERED 4
539 562
540struct trace_probe { 563struct trace_probe {
541 struct list_head list; 564 struct list_head list;
@@ -555,16 +578,49 @@ struct trace_probe {
555 (sizeof(struct probe_arg) * (n))) 578 (sizeof(struct probe_arg) * (n)))
556 579
557 580
558static __kprobes int probe_is_return(struct trace_probe *tp) 581static __kprobes int trace_probe_is_return(struct trace_probe *tp)
559{ 582{
560 return tp->rp.handler != NULL; 583 return tp->rp.handler != NULL;
561} 584}
562 585
563static __kprobes const char *probe_symbol(struct trace_probe *tp) 586static __kprobes const char *trace_probe_symbol(struct trace_probe *tp)
564{ 587{
565 return tp->symbol ? tp->symbol : "unknown"; 588 return tp->symbol ? tp->symbol : "unknown";
566} 589}
567 590
591static __kprobes unsigned long trace_probe_offset(struct trace_probe *tp)
592{
593 return tp->rp.kp.offset;
594}
595
596static __kprobes bool trace_probe_is_enabled(struct trace_probe *tp)
597{
598 return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE));
599}
600
601static __kprobes bool trace_probe_is_registered(struct trace_probe *tp)
602{
603 return !!(tp->flags & TP_FLAG_REGISTERED);
604}
605
606static __kprobes bool trace_probe_has_gone(struct trace_probe *tp)
607{
608 return !!(kprobe_gone(&tp->rp.kp));
609}
610
611static __kprobes bool trace_probe_within_module(struct trace_probe *tp,
612 struct module *mod)
613{
614 int len = strlen(mod->name);
615 const char *name = trace_probe_symbol(tp);
616 return strncmp(mod->name, name, len) == 0 && name[len] == ':';
617}
618
619static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp)
620{
621 return !!strchr(trace_probe_symbol(tp), ':');
622}
623
568static int register_probe_event(struct trace_probe *tp); 624static int register_probe_event(struct trace_probe *tp);
569static void unregister_probe_event(struct trace_probe *tp); 625static void unregister_probe_event(struct trace_probe *tp);
570 626
@@ -646,6 +702,16 @@ error:
646 return ERR_PTR(ret); 702 return ERR_PTR(ret);
647} 703}
648 704
705static void update_probe_arg(struct probe_arg *arg)
706{
707 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
708 update_bitfield_fetch_param(arg->fetch.data);
709 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
710 update_deref_fetch_param(arg->fetch.data);
711 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
712 update_symbol_cache(arg->fetch.data);
713}
714
649static void free_probe_arg(struct probe_arg *arg) 715static void free_probe_arg(struct probe_arg *arg)
650{ 716{
651 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn)) 717 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
@@ -671,7 +737,7 @@ static void free_trace_probe(struct trace_probe *tp)
671 kfree(tp); 737 kfree(tp);
672} 738}
673 739
674static struct trace_probe *find_probe_event(const char *event, 740static struct trace_probe *find_trace_probe(const char *event,
675 const char *group) 741 const char *group)
676{ 742{
677 struct trace_probe *tp; 743 struct trace_probe *tp;
@@ -683,15 +749,104 @@ static struct trace_probe *find_probe_event(const char *event,
683 return NULL; 749 return NULL;
684} 750}
685 751
686/* Unregister a trace_probe and probe_event: call with locking probe_lock */ 752/* Enable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */
687static void unregister_trace_probe(struct trace_probe *tp) 753static int enable_trace_probe(struct trace_probe *tp, int flag)
688{ 754{
689 if (probe_is_return(tp)) 755 int ret = 0;
690 unregister_kretprobe(&tp->rp); 756
757 tp->flags |= flag;
758 if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) &&
759 !trace_probe_has_gone(tp)) {
760 if (trace_probe_is_return(tp))
761 ret = enable_kretprobe(&tp->rp);
762 else
763 ret = enable_kprobe(&tp->rp.kp);
764 }
765
766 return ret;
767}
768
769/* Disable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */
770static void disable_trace_probe(struct trace_probe *tp, int flag)
771{
772 tp->flags &= ~flag;
773 if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) {
774 if (trace_probe_is_return(tp))
775 disable_kretprobe(&tp->rp);
776 else
777 disable_kprobe(&tp->rp.kp);
778 }
779}
780
781/* Internal register function - just handle k*probes and flags */
782static int __register_trace_probe(struct trace_probe *tp)
783{
784 int i, ret;
785
786 if (trace_probe_is_registered(tp))
787 return -EINVAL;
788
789 for (i = 0; i < tp->nr_args; i++)
790 update_probe_arg(&tp->args[i]);
791
792 /* Set/clear disabled flag according to tp->flag */
793 if (trace_probe_is_enabled(tp))
794 tp->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
795 else
796 tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
797
798 if (trace_probe_is_return(tp))
799 ret = register_kretprobe(&tp->rp);
691 else 800 else
692 unregister_kprobe(&tp->rp.kp); 801 ret = register_kprobe(&tp->rp.kp);
802
803 if (ret == 0)
804 tp->flags |= TP_FLAG_REGISTERED;
805 else {
806 pr_warning("Could not insert probe at %s+%lu: %d\n",
807 trace_probe_symbol(tp), trace_probe_offset(tp), ret);
808 if (ret == -ENOENT && trace_probe_is_on_module(tp)) {
809 pr_warning("This probe might be able to register after"
810 "target module is loaded. Continue.\n");
811 ret = 0;
812 } else if (ret == -EILSEQ) {
813 pr_warning("Probing address(0x%p) is not an "
814 "instruction boundary.\n",
815 tp->rp.kp.addr);
816 ret = -EINVAL;
817 }
818 }
819
820 return ret;
821}
822
823/* Internal unregister function - just handle k*probes and flags */
824static void __unregister_trace_probe(struct trace_probe *tp)
825{
826 if (trace_probe_is_registered(tp)) {
827 if (trace_probe_is_return(tp))
828 unregister_kretprobe(&tp->rp);
829 else
830 unregister_kprobe(&tp->rp.kp);
831 tp->flags &= ~TP_FLAG_REGISTERED;
832 /* Cleanup kprobe for reuse */
833 if (tp->rp.kp.symbol_name)
834 tp->rp.kp.addr = NULL;
835 }
836}
837
838/* Unregister a trace_probe and probe_event: call with locking probe_lock */
839static int unregister_trace_probe(struct trace_probe *tp)
840{
841 /* Enabled event can not be unregistered */
842 if (trace_probe_is_enabled(tp))
843 return -EBUSY;
844
845 __unregister_trace_probe(tp);
693 list_del(&tp->list); 846 list_del(&tp->list);
694 unregister_probe_event(tp); 847 unregister_probe_event(tp);
848
849 return 0;
695} 850}
696 851
697/* Register a trace_probe and probe_event */ 852/* Register a trace_probe and probe_event */
@@ -702,41 +857,68 @@ static int register_trace_probe(struct trace_probe *tp)
702 857
703 mutex_lock(&probe_lock); 858 mutex_lock(&probe_lock);
704 859
705 /* register as an event */ 860 /* Delete old (same name) event if exist */
706 old_tp = find_probe_event(tp->call.name, tp->call.class->system); 861 old_tp = find_trace_probe(tp->call.name, tp->call.class->system);
707 if (old_tp) { 862 if (old_tp) {
708 /* delete old event */ 863 ret = unregister_trace_probe(old_tp);
709 unregister_trace_probe(old_tp); 864 if (ret < 0)
865 goto end;
710 free_trace_probe(old_tp); 866 free_trace_probe(old_tp);
711 } 867 }
868
869 /* Register new event */
712 ret = register_probe_event(tp); 870 ret = register_probe_event(tp);
713 if (ret) { 871 if (ret) {
714 pr_warning("Failed to register probe event(%d)\n", ret); 872 pr_warning("Failed to register probe event(%d)\n", ret);
715 goto end; 873 goto end;
716 } 874 }
717 875
718 tp->rp.kp.flags |= KPROBE_FLAG_DISABLED; 876 /* Register k*probe */
719 if (probe_is_return(tp)) 877 ret = __register_trace_probe(tp);
720 ret = register_kretprobe(&tp->rp); 878 if (ret < 0)
721 else
722 ret = register_kprobe(&tp->rp.kp);
723
724 if (ret) {
725 pr_warning("Could not insert probe(%d)\n", ret);
726 if (ret == -EILSEQ) {
727 pr_warning("Probing address(0x%p) is not an "
728 "instruction boundary.\n",
729 tp->rp.kp.addr);
730 ret = -EINVAL;
731 }
732 unregister_probe_event(tp); 879 unregister_probe_event(tp);
733 } else 880 else
734 list_add_tail(&tp->list, &probe_list); 881 list_add_tail(&tp->list, &probe_list);
882
735end: 883end:
736 mutex_unlock(&probe_lock); 884 mutex_unlock(&probe_lock);
737 return ret; 885 return ret;
738} 886}
739 887
888/* Module notifier call back, checking event on the module */
889static int trace_probe_module_callback(struct notifier_block *nb,
890 unsigned long val, void *data)
891{
892 struct module *mod = data;
893 struct trace_probe *tp;
894 int ret;
895
896 if (val != MODULE_STATE_COMING)
897 return NOTIFY_DONE;
898
899 /* Update probes on coming module */
900 mutex_lock(&probe_lock);
901 list_for_each_entry(tp, &probe_list, list) {
902 if (trace_probe_within_module(tp, mod)) {
903 /* Don't need to check busy - this should have gone. */
904 __unregister_trace_probe(tp);
905 ret = __register_trace_probe(tp);
906 if (ret)
907 pr_warning("Failed to re-register probe %s on"
908 "%s: %d\n",
909 tp->call.name, mod->name, ret);
910 }
911 }
912 mutex_unlock(&probe_lock);
913
914 return NOTIFY_DONE;
915}
916
917static struct notifier_block trace_probe_module_nb = {
918 .notifier_call = trace_probe_module_callback,
919 .priority = 1 /* Invoked after kprobe module callback */
920};
921
740/* Split symbol and offset. */ 922/* Split symbol and offset. */
741static int split_symbol_offset(char *symbol, unsigned long *offset) 923static int split_symbol_offset(char *symbol, unsigned long *offset)
742{ 924{
@@ -962,8 +1144,8 @@ static int create_trace_probe(int argc, char **argv)
962{ 1144{
963 /* 1145 /*
964 * Argument syntax: 1146 * Argument syntax:
965 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] 1147 * - Add kprobe: p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
966 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] 1148 * - Add kretprobe: r[:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
967 * Fetch args: 1149 * Fetch args:
968 * $retval : fetch return value 1150 * $retval : fetch return value
969 * $stack : fetch stack address 1151 * $stack : fetch stack address
@@ -1025,17 +1207,18 @@ static int create_trace_probe(int argc, char **argv)
1025 return -EINVAL; 1207 return -EINVAL;
1026 } 1208 }
1027 mutex_lock(&probe_lock); 1209 mutex_lock(&probe_lock);
1028 tp = find_probe_event(event, group); 1210 tp = find_trace_probe(event, group);
1029 if (!tp) { 1211 if (!tp) {
1030 mutex_unlock(&probe_lock); 1212 mutex_unlock(&probe_lock);
1031 pr_info("Event %s/%s doesn't exist.\n", group, event); 1213 pr_info("Event %s/%s doesn't exist.\n", group, event);
1032 return -ENOENT; 1214 return -ENOENT;
1033 } 1215 }
1034 /* delete an event */ 1216 /* delete an event */
1035 unregister_trace_probe(tp); 1217 ret = unregister_trace_probe(tp);
1036 free_trace_probe(tp); 1218 if (ret == 0)
1219 free_trace_probe(tp);
1037 mutex_unlock(&probe_lock); 1220 mutex_unlock(&probe_lock);
1038 return 0; 1221 return ret;
1039 } 1222 }
1040 1223
1041 if (argc < 2) { 1224 if (argc < 2) {
@@ -1144,20 +1327,30 @@ error:
1144 return ret; 1327 return ret;
1145} 1328}
1146 1329
1147static void cleanup_all_probes(void) 1330static int release_all_trace_probes(void)
1148{ 1331{
1149 struct trace_probe *tp; 1332 struct trace_probe *tp;
1333 int ret = 0;
1150 1334
1151 mutex_lock(&probe_lock); 1335 mutex_lock(&probe_lock);
1336 /* Ensure no probe is in use. */
1337 list_for_each_entry(tp, &probe_list, list)
1338 if (trace_probe_is_enabled(tp)) {
1339 ret = -EBUSY;
1340 goto end;
1341 }
1152 /* TODO: Use batch unregistration */ 1342 /* TODO: Use batch unregistration */
1153 while (!list_empty(&probe_list)) { 1343 while (!list_empty(&probe_list)) {
1154 tp = list_entry(probe_list.next, struct trace_probe, list); 1344 tp = list_entry(probe_list.next, struct trace_probe, list);
1155 unregister_trace_probe(tp); 1345 unregister_trace_probe(tp);
1156 free_trace_probe(tp); 1346 free_trace_probe(tp);
1157 } 1347 }
1348
1349end:
1158 mutex_unlock(&probe_lock); 1350 mutex_unlock(&probe_lock);
1159}
1160 1351
1352 return ret;
1353}
1161 1354
1162/* Probes listing interfaces */ 1355/* Probes listing interfaces */
1163static void *probes_seq_start(struct seq_file *m, loff_t *pos) 1356static void *probes_seq_start(struct seq_file *m, loff_t *pos)
@@ -1181,15 +1374,16 @@ static int probes_seq_show(struct seq_file *m, void *v)
1181 struct trace_probe *tp = v; 1374 struct trace_probe *tp = v;
1182 int i; 1375 int i;
1183 1376
1184 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); 1377 seq_printf(m, "%c", trace_probe_is_return(tp) ? 'r' : 'p');
1185 seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name); 1378 seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);
1186 1379
1187 if (!tp->symbol) 1380 if (!tp->symbol)
1188 seq_printf(m, " 0x%p", tp->rp.kp.addr); 1381 seq_printf(m, " 0x%p", tp->rp.kp.addr);
1189 else if (tp->rp.kp.offset) 1382 else if (tp->rp.kp.offset)
1190 seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset); 1383 seq_printf(m, " %s+%u", trace_probe_symbol(tp),
1384 tp->rp.kp.offset);
1191 else 1385 else
1192 seq_printf(m, " %s", probe_symbol(tp)); 1386 seq_printf(m, " %s", trace_probe_symbol(tp));
1193 1387
1194 for (i = 0; i < tp->nr_args; i++) 1388 for (i = 0; i < tp->nr_args; i++)
1195 seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm); 1389 seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);
@@ -1207,9 +1401,13 @@ static const struct seq_operations probes_seq_op = {
1207 1401
1208static int probes_open(struct inode *inode, struct file *file) 1402static int probes_open(struct inode *inode, struct file *file)
1209{ 1403{
1210 if ((file->f_mode & FMODE_WRITE) && 1404 int ret;
1211 (file->f_flags & O_TRUNC)) 1405
1212 cleanup_all_probes(); 1406 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
1407 ret = release_all_trace_probes();
1408 if (ret < 0)
1409 return ret;
1410 }
1213 1411
1214 return seq_open(file, &probes_seq_op); 1412 return seq_open(file, &probes_seq_op);
1215} 1413}
@@ -1397,7 +1595,8 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
1397 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1595 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1398 1596
1399 if (!filter_current_check_discard(buffer, call, entry, event)) 1597 if (!filter_current_check_discard(buffer, call, entry, event))
1400 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1598 trace_nowake_buffer_unlock_commit_regs(buffer, event,
1599 irq_flags, pc, regs);
1401} 1600}
1402 1601
1403/* Kretprobe handler */ 1602/* Kretprobe handler */
@@ -1429,7 +1628,8 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
1429 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1628 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1430 1629
1431 if (!filter_current_check_discard(buffer, call, entry, event)) 1630 if (!filter_current_check_discard(buffer, call, entry, event))
1432 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1631 trace_nowake_buffer_unlock_commit_regs(buffer, event,
1632 irq_flags, pc, regs);
1433} 1633}
1434 1634
1435/* Event entry printers */ 1635/* Event entry printers */
@@ -1511,30 +1711,6 @@ partial:
1511 return TRACE_TYPE_PARTIAL_LINE; 1711 return TRACE_TYPE_PARTIAL_LINE;
1512} 1712}
1513 1713
1514static int probe_event_enable(struct ftrace_event_call *call)
1515{
1516 struct trace_probe *tp = (struct trace_probe *)call->data;
1517
1518 tp->flags |= TP_FLAG_TRACE;
1519 if (probe_is_return(tp))
1520 return enable_kretprobe(&tp->rp);
1521 else
1522 return enable_kprobe(&tp->rp.kp);
1523}
1524
1525static void probe_event_disable(struct ftrace_event_call *call)
1526{
1527 struct trace_probe *tp = (struct trace_probe *)call->data;
1528
1529 tp->flags &= ~TP_FLAG_TRACE;
1530 if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
1531 if (probe_is_return(tp))
1532 disable_kretprobe(&tp->rp);
1533 else
1534 disable_kprobe(&tp->rp.kp);
1535 }
1536}
1537
1538#undef DEFINE_FIELD 1714#undef DEFINE_FIELD
1539#define DEFINE_FIELD(type, item, name, is_signed) \ 1715#define DEFINE_FIELD(type, item, name, is_signed) \
1540 do { \ 1716 do { \
@@ -1596,7 +1772,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
1596 1772
1597 const char *fmt, *arg; 1773 const char *fmt, *arg;
1598 1774
1599 if (!probe_is_return(tp)) { 1775 if (!trace_probe_is_return(tp)) {
1600 fmt = "(%lx)"; 1776 fmt = "(%lx)";
1601 arg = "REC->" FIELD_STRING_IP; 1777 arg = "REC->" FIELD_STRING_IP;
1602 } else { 1778 } else {
@@ -1713,49 +1889,25 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1713 head = this_cpu_ptr(call->perf_events); 1889 head = this_cpu_ptr(call->perf_events);
1714 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); 1890 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
1715} 1891}
1716
1717static int probe_perf_enable(struct ftrace_event_call *call)
1718{
1719 struct trace_probe *tp = (struct trace_probe *)call->data;
1720
1721 tp->flags |= TP_FLAG_PROFILE;
1722
1723 if (probe_is_return(tp))
1724 return enable_kretprobe(&tp->rp);
1725 else
1726 return enable_kprobe(&tp->rp.kp);
1727}
1728
1729static void probe_perf_disable(struct ftrace_event_call *call)
1730{
1731 struct trace_probe *tp = (struct trace_probe *)call->data;
1732
1733 tp->flags &= ~TP_FLAG_PROFILE;
1734
1735 if (!(tp->flags & TP_FLAG_TRACE)) {
1736 if (probe_is_return(tp))
1737 disable_kretprobe(&tp->rp);
1738 else
1739 disable_kprobe(&tp->rp.kp);
1740 }
1741}
1742#endif /* CONFIG_PERF_EVENTS */ 1892#endif /* CONFIG_PERF_EVENTS */
1743 1893
1744static __kprobes 1894static __kprobes
1745int kprobe_register(struct ftrace_event_call *event, enum trace_reg type) 1895int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
1746{ 1896{
1897 struct trace_probe *tp = (struct trace_probe *)event->data;
1898
1747 switch (type) { 1899 switch (type) {
1748 case TRACE_REG_REGISTER: 1900 case TRACE_REG_REGISTER:
1749 return probe_event_enable(event); 1901 return enable_trace_probe(tp, TP_FLAG_TRACE);
1750 case TRACE_REG_UNREGISTER: 1902 case TRACE_REG_UNREGISTER:
1751 probe_event_disable(event); 1903 disable_trace_probe(tp, TP_FLAG_TRACE);
1752 return 0; 1904 return 0;
1753 1905
1754#ifdef CONFIG_PERF_EVENTS 1906#ifdef CONFIG_PERF_EVENTS
1755 case TRACE_REG_PERF_REGISTER: 1907 case TRACE_REG_PERF_REGISTER:
1756 return probe_perf_enable(event); 1908 return enable_trace_probe(tp, TP_FLAG_PROFILE);
1757 case TRACE_REG_PERF_UNREGISTER: 1909 case TRACE_REG_PERF_UNREGISTER:
1758 probe_perf_disable(event); 1910 disable_trace_probe(tp, TP_FLAG_PROFILE);
1759 return 0; 1911 return 0;
1760#endif 1912#endif
1761 } 1913 }
@@ -1805,7 +1957,7 @@ static int register_probe_event(struct trace_probe *tp)
1805 1957
1806 /* Initialize ftrace_event_call */ 1958 /* Initialize ftrace_event_call */
1807 INIT_LIST_HEAD(&call->class->fields); 1959 INIT_LIST_HEAD(&call->class->fields);
1808 if (probe_is_return(tp)) { 1960 if (trace_probe_is_return(tp)) {
1809 call->event.funcs = &kretprobe_funcs; 1961 call->event.funcs = &kretprobe_funcs;
1810 call->class->define_fields = kretprobe_event_define_fields; 1962 call->class->define_fields = kretprobe_event_define_fields;
1811 } else { 1963 } else {
@@ -1844,6 +1996,9 @@ static __init int init_kprobe_trace(void)
1844 struct dentry *d_tracer; 1996 struct dentry *d_tracer;
1845 struct dentry *entry; 1997 struct dentry *entry;
1846 1998
1999 if (register_module_notifier(&trace_probe_module_nb))
2000 return -EINVAL;
2001
1847 d_tracer = tracing_init_dentry(); 2002 d_tracer = tracing_init_dentry();
1848 if (!d_tracer) 2003 if (!d_tracer)
1849 return 0; 2004 return 0;
@@ -1897,12 +2052,12 @@ static __init int kprobe_trace_self_tests_init(void)
1897 warn++; 2052 warn++;
1898 } else { 2053 } else {
1899 /* Enable trace point */ 2054 /* Enable trace point */
1900 tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM); 2055 tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
1901 if (WARN_ON_ONCE(tp == NULL)) { 2056 if (WARN_ON_ONCE(tp == NULL)) {
1902 pr_warning("error on getting new probe.\n"); 2057 pr_warning("error on getting new probe.\n");
1903 warn++; 2058 warn++;
1904 } else 2059 } else
1905 probe_event_enable(&tp->call); 2060 enable_trace_probe(tp, TP_FLAG_TRACE);
1906 } 2061 }
1907 2062
1908 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " 2063 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
@@ -1912,12 +2067,12 @@ static __init int kprobe_trace_self_tests_init(void)
1912 warn++; 2067 warn++;
1913 } else { 2068 } else {
1914 /* Enable trace point */ 2069 /* Enable trace point */
1915 tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM); 2070 tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
1916 if (WARN_ON_ONCE(tp == NULL)) { 2071 if (WARN_ON_ONCE(tp == NULL)) {
1917 pr_warning("error on getting new probe.\n"); 2072 pr_warning("error on getting new probe.\n");
1918 warn++; 2073 warn++;
1919 } else 2074 } else
1920 probe_event_enable(&tp->call); 2075 enable_trace_probe(tp, TP_FLAG_TRACE);
1921 } 2076 }
1922 2077
1923 if (warn) 2078 if (warn)
@@ -1925,6 +2080,21 @@ static __init int kprobe_trace_self_tests_init(void)
1925 2080
1926 ret = target(1, 2, 3, 4, 5, 6); 2081 ret = target(1, 2, 3, 4, 5, 6);
1927 2082
2083 /* Disable trace points before removing it */
2084 tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
2085 if (WARN_ON_ONCE(tp == NULL)) {
2086 pr_warning("error on getting test probe.\n");
2087 warn++;
2088 } else
2089 disable_trace_probe(tp, TP_FLAG_TRACE);
2090
2091 tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
2092 if (WARN_ON_ONCE(tp == NULL)) {
2093 pr_warning("error on getting 2nd test probe.\n");
2094 warn++;
2095 } else
2096 disable_trace_probe(tp, TP_FLAG_TRACE);
2097
1928 ret = command_trace_probe("-:testprobe"); 2098 ret = command_trace_probe("-:testprobe");
1929 if (WARN_ON_ONCE(ret)) { 2099 if (WARN_ON_ONCE(ret)) {
1930 pr_warning("error on deleting a probe.\n"); 2100 pr_warning("error on deleting a probe.\n");
@@ -1938,7 +2108,7 @@ static __init int kprobe_trace_self_tests_init(void)
1938 } 2108 }
1939 2109
1940end: 2110end:
1941 cleanup_all_probes(); 2111 release_all_trace_probes();
1942 if (warn) 2112 if (warn)
1943 pr_cont("NG: Some tests are failed. Please check them.\n"); 2113 pr_cont("NG: Some tests are failed. Please check them.\n");
1944 else 2114 else
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 017fa376505..fd3c8aae55e 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -12,7 +12,7 @@
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/time.h> 13#include <linux/time.h>
14 14
15#include <asm/atomic.h> 15#include <linux/atomic.h>
16 16
17#include "trace.h" 17#include "trace.h"
18#include "trace_output.h" 18#include "trace_output.h"
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index e37de492a9e..51999309a6c 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1107,19 +1107,20 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
1107{ 1107{
1108 struct stack_entry *field; 1108 struct stack_entry *field;
1109 struct trace_seq *s = &iter->seq; 1109 struct trace_seq *s = &iter->seq;
1110 int i; 1110 unsigned long *p;
1111 unsigned long *end;
1111 1112
1112 trace_assign_type(field, iter->ent); 1113 trace_assign_type(field, iter->ent);
1114 end = (unsigned long *)((long)iter->ent + iter->ent_size);
1113 1115
1114 if (!trace_seq_puts(s, "<stack trace>\n")) 1116 if (!trace_seq_puts(s, "<stack trace>\n"))
1115 goto partial; 1117 goto partial;
1116 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { 1118
1117 if (!field->caller[i] || (field->caller[i] == ULONG_MAX)) 1119 for (p = field->caller; p && *p != ULONG_MAX && p < end; p++) {
1118 break;
1119 if (!trace_seq_puts(s, " => ")) 1120 if (!trace_seq_puts(s, " => "))
1120 goto partial; 1121 goto partial;
1121 1122
1122 if (!seq_print_ip_sym(s, field->caller[i], flags)) 1123 if (!seq_print_ip_sym(s, *p, flags))
1123 goto partial; 1124 goto partial;
1124 if (!trace_seq_puts(s, "\n")) 1125 if (!trace_seq_puts(s, "\n"))
1125 goto partial; 1126 goto partial;
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index f029dd4fd2c..e4a70c0c71b 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -227,7 +227,9 @@ static void wakeup_trace_close(struct trace_iterator *iter)
227 graph_trace_close(iter); 227 graph_trace_close(iter);
228} 228}
229 229
230#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC) 230#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC | \
231 TRACE_GRAPH_PRINT_ABS_TIME | \
232 TRACE_GRAPH_PRINT_DURATION)
231 233
232static enum print_line_t wakeup_print_line(struct trace_iterator *iter) 234static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
233{ 235{
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index b0b53b8e4c2..77575b386d9 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -156,20 +156,11 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
156{ 156{
157 long *ptr = filp->private_data; 157 long *ptr = filp->private_data;
158 unsigned long val, flags; 158 unsigned long val, flags;
159 char buf[64];
160 int ret; 159 int ret;
161 int cpu; 160 int cpu;
162 161
163 if (count >= sizeof(buf)) 162 ret = kstrtoul_from_user(ubuf, count, 10, &val);
164 return -EINVAL; 163 if (ret)
165
166 if (copy_from_user(&buf, ubuf, count))
167 return -EFAULT;
168
169 buf[count] = 0;
170
171 ret = strict_strtoul(buf, 10, &val);
172 if (ret < 0)
173 return ret; 164 return ret;
174 165
175 local_irq_save(flags); 166 local_irq_save(flags);
diff --git a/kernel/trace/tracedump.c b/kernel/trace/tracedump.c
new file mode 100644
index 00000000000..a83532bc36d
--- /dev/null
+++ b/kernel/trace/tracedump.c
@@ -0,0 +1,682 @@
1/*
2 * kernel/trace/tracedump.c
3 *
4 * Copyright (c) 2011, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 */
20
21#include <linux/console.h>
22#include <linux/cpumask.h>
23#include <linux/init.h>
24#include <linux/irqflags.h>
25#include <linux/module.h>
26#include <linux/moduleparam.h>
27#include <linux/mutex.h>
28#include <linux/notifier.h>
29#include <linux/proc_fs.h>
30#include <linux/ring_buffer.h>
31#include <linux/sched.h>
32#include <linux/smp.h>
33#include <linux/string.h>
34#include <linux/threads.h>
35#include <linux/tracedump.h>
36#include <linux/uaccess.h>
37#include <linux/vmalloc.h>
38#include <linux/zlib.h>
39
40#include "trace.h"
41#include "trace_output.h"
42
43#define CPU_MAX (NR_CPUS-1)
44
45#define TRYM(fn, ...) do { \
46 int try_error = (fn); \
47 if (try_error < 0) { \
48 printk(__VA_ARGS__); \
49 return try_error; \
50 } \
51} while (0)
52
53#define TRY(fn) TRYM(fn, TAG "Caught error from %s in %s\n", #fn, __func__)
54
55/* Stolen from printk.c */
56#define for_each_console(con) \
57 for (con = console_drivers; con != NULL; con = con->next)
58
59#define TAG KERN_ERR "tracedump: "
60
61#define TD_MIN_CONSUME 2000
62#define TD_COMPRESS_CHUNK 0x8000
63
64static DEFINE_MUTEX(tracedump_proc_lock);
65
66static const char MAGIC_NUMBER[9] = "TRACEDUMP";
67static const char CPU_DELIM[7] = "CPU_END";
68#define CMDLINE_DELIM "|"
69
70/* Type of output */
71static bool current_format;
72static bool format_ascii;
73module_param(format_ascii, bool, S_IRUGO | S_IWUSR);
74MODULE_PARM_DESC(format_ascii, "Dump ascii or raw data");
75
76/* Max size of output */
77static uint panic_size = 0x80000;
78module_param(panic_size, uint, S_IRUGO | S_IWUSR);
79MODULE_PARM_DESC(panic_size, "Max dump size during kernel panic (bytes)");
80
81static uint compress_level = 9;
82module_param(compress_level, uint, S_IRUGO | S_IWUSR);
83MODULE_PARM_DESC(compress_level, "Level of compression to use. [0-9]");
84
85static char out_buf[TD_COMPRESS_CHUNK];
86static z_stream stream;
87static int compress_done;
88static int flush;
89
90static int old_trace_flags;
91
92static struct trace_iterator iter;
93static struct pager_s {
94 struct trace_array *tr;
95 void *spare;
96 int cpu;
97 int len;
98 char __user *ubuf;
99} pager;
100
101static char cmdline_buf[16+TASK_COMM_LEN];
102
103static int print_to_console(const char *buf, size_t len)
104{
105 struct console *con;
106
107 /* Stolen from printk.c */
108 for_each_console(con) {
109 if ((con->flags & CON_ENABLED) && con->write &&
110 (cpu_online(smp_processor_id()) ||
111 (con->flags & CON_ANYTIME)))
112 con->write(con, buf, len);
113 }
114 return 0;
115}
116
117static int print_to_user(const char *buf, size_t len)
118{
119 int size;
120 size = copy_to_user(pager.ubuf, buf, len);
121 if (size > 0) {
122 printk(TAG "Failed to copy to user %d bytes\n", size);
123 return -EINVAL;
124 }
125 return 0;
126}
127
128static int print(const char *buf, size_t len, int print_to)
129{
130 if (print_to == TD_PRINT_CONSOLE)
131 TRY(print_to_console(buf, len));
132 else if (print_to == TD_PRINT_USER)
133 TRY(print_to_user(buf, len));
134 return 0;
135}
136
137/* print_magic will print MAGIC_NUMBER using the
138 * print function selected by print_to.
139 */
140static inline ssize_t print_magic(int print_to)
141{
142 print(MAGIC_NUMBER, sizeof(MAGIC_NUMBER), print_to);
143 return sizeof(MAGIC_NUMBER);
144}
145
146static int iter_init(void)
147{
148 int cpu;
149
150 /* Make iter point to global ring buffer used in trace. */
151 trace_init_global_iter(&iter);
152
153 /* Disable tracing */
154 for_each_tracing_cpu(cpu) {
155 atomic_inc(&iter.tr->data[cpu]->disabled);
156 }
157
158 /* Save flags */
159 old_trace_flags = trace_flags;
160
161 /* Dont look at memory in panic mode. */
162 trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
163
164 /* Prepare ring buffer iter */
165 for_each_tracing_cpu(cpu) {
166 iter.buffer_iter[cpu] =
167 ring_buffer_read_prepare(iter.tr->buffer, cpu);
168 }
169 ring_buffer_read_prepare_sync();
170 for_each_tracing_cpu(cpu) {
171 ring_buffer_read_start(iter.buffer_iter[cpu]);
172 tracing_iter_reset(&iter, cpu);
173 }
174 return 0;
175}
176
177/* iter_next gets the next entry in the ring buffer, ordered by time.
178 * If there are no more entries, returns 0.
179 */
180static ssize_t iter_next(void)
181{
182 /* Zero out the iterator's seq */
183 memset(&iter.seq, 0,
184 sizeof(struct trace_iterator) -
185 offsetof(struct trace_iterator, seq));
186
187 while (!trace_empty(&iter)) {
188 if (trace_find_next_entry_inc(&iter) == NULL) {
189 printk(TAG "trace_find_next_entry failed!\n");
190 return -EINVAL;
191 }
192
193 /* Copy the ring buffer data to iterator's seq */
194 print_trace_line(&iter);
195 if (iter.seq.len != 0)
196 return iter.seq.len;
197 }
198 return 0;
199}
200
201static int iter_deinit(void)
202{
203 int cpu;
204 /* Enable tracing */
205 for_each_tracing_cpu(cpu) {
206 ring_buffer_read_finish(iter.buffer_iter[cpu]);
207 }
208 for_each_tracing_cpu(cpu) {
209 atomic_dec(&iter.tr->data[cpu]->disabled);
210 }
211
212 /* Restore flags */
213 trace_flags = old_trace_flags;
214 return 0;
215}
216
217static int pager_init(void)
218{
219 int cpu;
220
221 /* Need to do this to get a pointer to global_trace (iter.tr).
222 Lame, I know. */
223 trace_init_global_iter(&iter);
224
225 /* Turn off tracing */
226 for_each_tracing_cpu(cpu) {
227 atomic_inc(&iter.tr->data[cpu]->disabled);
228 }
229
230 memset(&pager, 0, sizeof(pager));
231 pager.tr = iter.tr;
232 pager.len = TD_COMPRESS_CHUNK;
233
234 return 0;
235}
236
237/* pager_next_cpu moves the pager to the next cpu.
238 * Returns 0 if pager is done, else 1.
239 */
240static ssize_t pager_next_cpu(void)
241{
242 if (pager.cpu <= CPU_MAX) {
243 pager.cpu += 1;
244 return 1;
245 }
246
247 return 0;
248}
249
250/* pager_next gets the next page of data from the ring buffer
251 * of the current cpu. Returns page size or 0 if no more data.
252 */
253static ssize_t pager_next(void)
254{
255 int ret;
256
257 if (pager.cpu > CPU_MAX)
258 return 0;
259
260 if (!pager.spare)
261 pager.spare = ring_buffer_alloc_read_page(pager.tr->buffer, pager.cpu);
262 if (!pager.spare) {
263 printk(TAG "ring_buffer_alloc_read_page failed!");
264 return -ENOMEM;
265 }
266
267 ret = ring_buffer_read_page(pager.tr->buffer,
268 &pager.spare,
269 pager.len,
270 pager.cpu, 0);
271 if (ret < 0)
272 return 0;
273
274 return PAGE_SIZE;
275}
276
277static int pager_deinit(void)
278{
279 int cpu;
280 if (pager.spare != NULL)
281 ring_buffer_free_read_page(pager.tr->buffer, pager.spare);
282
283 for_each_tracing_cpu(cpu) {
284 atomic_dec(&iter.tr->data[cpu]->disabled);
285 }
286 return 0;
287}
288
289/* cmdline_next gets the next saved cmdline from the trace and
290 * puts it in cmdline_buf. Returns the size of the cmdline, or 0 if empty.
291 * but will reset itself on a subsequent call.
292 */
293static ssize_t cmdline_next(void)
294{
295 static int pid;
296 ssize_t size = 0;
297
298 if (pid >= PID_MAX_DEFAULT)
299 pid = -1;
300
301 while (size == 0 && pid < PID_MAX_DEFAULT) {
302 pid++;
303 trace_find_cmdline(pid, cmdline_buf);
304 if (!strncmp(cmdline_buf, "<...>", 5))
305 continue;
306
307 sprintf(&cmdline_buf[strlen(cmdline_buf)], " %d"
308 CMDLINE_DELIM, pid);
309 size = strlen(cmdline_buf);
310 }
311 return size;
312}
313
314/* comsume_events removes the first 'num' entries from the ring buffer. */
315static int consume_events(size_t num)
316{
317 TRY(iter_init());
318 for (; num > 0 && !trace_empty(&iter); num--) {
319 trace_find_next_entry_inc(&iter);
320 ring_buffer_consume(iter.tr->buffer, iter.cpu, &iter.ts,
321 &iter.lost_events);
322 }
323 TRY(iter_deinit());
324 return 0;
325}
326
327static int data_init(void)
328{
329 if (current_format)
330 TRY(iter_init());
331 else
332 TRY(pager_init());
333 return 0;
334}
335
336/* data_next will figure out the right 'next' function to
337 * call and will select the right buffer to pass back
338 * to compress_next.
339 *
340 * iter_next should be used to get data entry-by-entry, ordered
341 * by time, which is what we need in order to convert it to ascii.
342 *
343 * pager_next will return a full page of raw data at a time, one
344 * CPU at a time. pager_next_cpu must be called to get the next CPU.
345 * cmdline_next will get the next saved cmdline
346 */
347static ssize_t data_next(const char **buf)
348{
349 ssize_t size;
350
351 if (current_format) {
352 TRY(size = iter_next());
353 *buf = iter.seq.buffer;
354 } else {
355 TRY(size = pager_next());
356 *buf = pager.spare;
357 if (size == 0) {
358 if (pager_next_cpu()) {
359 size = sizeof(CPU_DELIM);
360 *buf = CPU_DELIM;
361 } else {
362 TRY(size = cmdline_next());
363 *buf = cmdline_buf;
364 }
365 }
366 }
367 return size;
368}
369
370static int data_deinit(void)
371{
372 if (current_format)
373 TRY(iter_deinit());
374 else
375 TRY(pager_deinit());
376 return 0;
377}
378
379static int compress_init(void)
380{
381 int workspacesize, ret;
382
383 compress_done = 0;
384 flush = Z_NO_FLUSH;
385 stream.data_type = current_format ? Z_ASCII : Z_BINARY;
386 workspacesize = zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL);
387 stream.workspace = vmalloc(workspacesize);
388 if (!stream.workspace) {
389 printk(TAG "Could not allocate "
390 "enough memory for zlib!\n");
391 return -ENOMEM;
392 }
393 memset(stream.workspace, 0, workspacesize);
394
395 ret = zlib_deflateInit(&stream, compress_level);
396 if (ret != Z_OK) {
397 printk(TAG "%s\n", stream.msg);
398 return ret;
399 }
400 stream.avail_in = 0;
401 stream.avail_out = 0;
402 TRY(data_init());
403 return 0;
404}
405
406/* compress_next will compress up to min(max_out, TD_COMPRESS_CHUNK) bytes
407 * of data into the output buffer. It gets the data by calling data_next.
408 * It will return the most data it possibly can. If it returns 0, then
409 * there is no more data.
410 *
411 * By the way that zlib works, each call to zlib_deflate will possibly
412 * consume up to avail_in bytes from next_in, and will fill up to
413 * avail_out bytes in next_out. Once flush == Z_FINISH, it can not take
414 * any more input. It will output until it is finished, and will return
415 * Z_STREAM_END.
416 */
417static ssize_t compress_next(size_t max_out)
418{
419 ssize_t ret;
420 max_out = min(max_out, (size_t)TD_COMPRESS_CHUNK);
421 stream.next_out = out_buf;
422 stream.avail_out = max_out;
423 while (stream.avail_out > 0 && !compress_done) {
424 if (stream.avail_in == 0 && flush != Z_FINISH) {
425 TRY(stream.avail_in =
426 data_next((const char **)&stream.next_in));
427 flush = (stream.avail_in == 0) ? Z_FINISH : Z_NO_FLUSH;
428 }
429 if (stream.next_in != NULL) {
430 TRYM((ret = zlib_deflate(&stream, flush)),
431 "zlib: %s\n", stream.msg);
432 compress_done = (ret == Z_STREAM_END);
433 }
434 }
435 ret = max_out - stream.avail_out;
436 return ret;
437}
438
439static int compress_deinit(void)
440{
441 TRY(data_deinit());
442
443 zlib_deflateEnd(&stream);
444 vfree(stream.workspace);
445
446 /* TODO: remove */
447 printk(TAG "Total in: %ld\n", stream.total_in);
448 printk(TAG "Total out: %ld\n", stream.total_out);
449 return stream.total_out;
450}
451
452static int compress_reset(void)
453{
454 TRY(compress_deinit());
455 TRY(compress_init());
456 return 0;
457}
458
459/* tracedump_init initializes all tracedump components.
460 * Call this before tracedump_next
461 */
462int tracedump_init(void)
463{
464 TRY(compress_init());
465 return 0;
466}
467
468/* tracedump_next will print up to max_out data from the tracing ring
469 * buffers using the print function selected by print_to. The data is
470 * compressed using zlib.
471 *
472 * The output type of the data is specified by the format_ascii module
473 * parameter. If format_ascii == 1, human-readable data will be output.
474 * Otherwise, it will output raw data from the ring buffer in cpu order,
475 * followed by the saved_cmdlines data.
476 */
477ssize_t tracedump_next(size_t max_out, int print_to)
478{
479 ssize_t size;
480 TRY(size = compress_next(max_out));
481 print(out_buf, size, print_to);
482 return size;
483}
484
485/* tracedump_all will print all data in the tracing ring buffers using
486 * the print function selected by print_to. The data is compressed using
487 * zlib, and is surrounded by MAGIC_NUMBER.
488 *
489 * The output type of the data is specified by the format_ascii module
490 * parameter. If format_ascii == 1, human-readable data will be output.
491 * Otherwise, it will output raw data from the ring buffer in cpu order,
492 * followed by the saved_cmdlines data.
493 */
494ssize_t tracedump_all(int print_to)
495{
496 ssize_t ret, size = 0;
497 TRY(size += print_magic(print_to));
498
499 do {
500 /* Here the size used doesn't really matter,
501 * since we're dumping everything. */
502 TRY(ret = tracedump_next(0xFFFFFFFF, print_to));
503 size += ret;
504 } while (ret > 0);
505
506 TRY(size += print_magic(print_to));
507
508 return size;
509}
510
511/* tracedump_deinit deinitializes all tracedump components.
512 * This must be called, even on error.
513 */
514int tracedump_deinit(void)
515{
516 TRY(compress_deinit());
517 return 0;
518}
519
520/* tracedump_reset reinitializes all tracedump components. */
521int tracedump_reset(void)
522{
523 TRY(compress_reset());
524 return 0;
525}
526
527
528
529/* tracedump_open opens the tracedump file for reading. */
530static int tracedump_open(struct inode *inode, struct file *file)
531{
532 int ret;
533 mutex_lock(&tracedump_proc_lock);
534 current_format = format_ascii;
535 ret = tracedump_init();
536 if (ret < 0)
537 goto err;
538
539 ret = nonseekable_open(inode, file);
540 if (ret < 0)
541 goto err;
542 return ret;
543
544err:
545 mutex_unlock(&tracedump_proc_lock);
546 return ret;
547}
548
549/* tracedump_read will reads data from tracedump_next and prints
550 * it to userspace. It will surround the data with MAGIC_NUMBER.
551 */
552static ssize_t tracedump_read(struct file *file, char __user *buf,
553 size_t len, loff_t *offset)
554{
555 static int done;
556 ssize_t size = 0;
557
558 pager.ubuf = buf;
559
560 if (*offset == 0) {
561 done = 0;
562 TRY(size = print_magic(TD_PRINT_USER));
563 } else if (!done) {
564 TRY(size = tracedump_next(len, TD_PRINT_USER));
565 if (size == 0) {
566 TRY(size = print_magic(TD_PRINT_USER));
567 done = 1;
568 }
569 }
570
571 *offset += size;
572
573 return size;
574}
575
576static int tracedump_release(struct inode *inode, struct file *file)
577{
578 int ret;
579 ret = tracedump_deinit();
580 mutex_unlock(&tracedump_proc_lock);
581 return ret;
582}
583
584/* tracedump_dump dumps all tracing data from the tracing ring buffers
585 * to all consoles. For details about the output format, see
586 * tracedump_all.
587
588 * At most max_out bytes are dumped. To accomplish this,
589 * tracedump_dump calls tracedump_all several times without writing the data,
590 * each time tossing out old data until it reaches its goal.
591 *
592 * Note: dumping raw pages currently does NOT follow the size limit.
593 */
594
595int tracedump_dump(size_t max_out)
596{
597 ssize_t size;
598 size_t consume;
599
600 printk(TAG "\n");
601
602 tracedump_init();
603
604 if (format_ascii) {
605 size = tracedump_all(TD_NO_PRINT);
606 if (size < 0) {
607 printk(TAG "failed to dump\n");
608 goto out;
609 }
610 while (size > max_out) {
611 TRY(tracedump_deinit());
612 /* Events take more or less 60 ascii bytes each,
613 not counting compression */
614 consume = TD_MIN_CONSUME + (size - max_out) /
615 (60 / (compress_level + 1));
616 TRY(consume_events(consume));
617 TRY(tracedump_init());
618 size = tracedump_all(TD_NO_PRINT);
619 if (size < 0) {
620 printk(TAG "failed to dump\n");
621 goto out;
622 }
623 }
624
625 TRY(tracedump_reset());
626 }
627 size = tracedump_all(TD_PRINT_CONSOLE);
628 if (size < 0) {
629 printk(TAG "failed to dump\n");
630 goto out;
631 }
632
633out:
634 tracedump_deinit();
635 printk(KERN_INFO "\n" TAG " end\n");
636 return size;
637}
638
639static const struct file_operations tracedump_fops = {
640 .owner = THIS_MODULE,
641 .open = tracedump_open,
642 .read = tracedump_read,
643 .release = tracedump_release,
644};
645
646#ifdef CONFIG_TRACEDUMP_PANIC
647static int tracedump_panic_handler(struct notifier_block *this,
648 unsigned long event, void *unused)
649{
650 tracedump_dump(panic_size);
651 return 0;
652}
653
654static struct notifier_block tracedump_panic_notifier = {
655 .notifier_call = tracedump_panic_handler,
656 .next = NULL,
657 .priority = 150 /* priority: INT_MAX >= x >= 0 */
658};
659#endif
660
661static int __init tracedump_initcall(void)
662{
663#ifdef CONFIG_TRACEDUMP_PROCFS
664 struct proc_dir_entry *entry;
665
666 /* Create a procfs file for easy dumping */
667 entry = create_proc_entry("tracedump", S_IFREG | S_IRUGO, NULL);
668 if (!entry)
669 printk(TAG "failed to create proc entry\n");
670 else
671 entry->proc_fops = &tracedump_fops;
672#endif
673
674#ifdef CONFIG_TRACEDUMP_PANIC
675 /* Automatically dump to console on a kernel panic */
676 atomic_notifier_chain_register(&panic_notifier_list,
677 &tracedump_panic_notifier);
678#endif
679 return 0;
680}
681
682early_initcall(tracedump_initcall);
diff --git a/kernel/trace/tracelevel.c b/kernel/trace/tracelevel.c
new file mode 100644
index 00000000000..9f8b8eedbb5
--- /dev/null
+++ b/kernel/trace/tracelevel.c
@@ -0,0 +1,142 @@
1/*
2 * kernel/trace/tracelevel.c
3 *
4 * Copyright (c) 2011, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 */
20
21#include <linux/ftrace_event.h>
22#include <linux/list.h>
23#include <linux/moduleparam.h>
24#include <linux/mutex.h>
25#include <linux/tracelevel.h>
26#include <linux/vmalloc.h>
27
28#include "trace.h"
29
30#define TAG KERN_ERR "tracelevel: "
31
32struct tracelevel_record {
33 struct list_head list;
34 char *name;
35 int level;
36};
37
38static LIST_HEAD(tracelevel_list);
39
40static bool started;
41static unsigned int tracelevel_level = TRACELEVEL_DEFAULT;
42
43static DEFINE_MUTEX(tracelevel_record_lock);
44
45/* tracelevel_set_event sets a single event if set = 1, or
46 * clears an event if set = 0.
47 */
48static int tracelevel_set_event(struct tracelevel_record *evt, bool set)
49{
50 if (trace_set_clr_event(NULL, evt->name, set) < 0) {
51 printk(TAG "failed to set event %s\n", evt->name);
52 return -EINVAL;
53 }
54 return 0;
55}
56
57/* Registers an event. If possible, it also sets it.
58 * If not, we'll set it in tracelevel_init.
59 */
60int __tracelevel_register(char *name, unsigned int level)
61{
62 struct tracelevel_record *evt = (struct tracelevel_record *)
63 vmalloc(sizeof(struct tracelevel_record));
64 if (!evt) {
65 printk(TAG "failed to allocate tracelevel_record for %s\n",
66 name);
67 return -ENOMEM;
68 }
69
70 evt->name = name;
71 evt->level = level;
72
73 mutex_lock(&tracelevel_record_lock);
74 list_add(&evt->list, &tracelevel_list);
75 mutex_unlock(&tracelevel_record_lock);
76
77 if (level >= tracelevel_level && started)
78 tracelevel_set_event(evt, 1);
79 return 0;
80}
81
82/* tracelevel_set_level sets the global level, clears events
83 * lower than that level, and enables events greater or equal.
84 */
85int tracelevel_set_level(int level)
86{
87 struct tracelevel_record *evt = NULL;
88
89 if (level < 0 || level > TRACELEVEL_MAX)
90 return -EINVAL;
91 tracelevel_level = level;
92
93 mutex_lock(&tracelevel_record_lock);
94 list_for_each_entry(evt, &tracelevel_list, list) {
95 if (evt->level >= level)
96 tracelevel_set_event(evt, 1);
97 else
98 tracelevel_set_event(evt, 0);
99 }
100 mutex_unlock(&tracelevel_record_lock);
101 return 0;
102}
103
104static int param_set_level(const char *val, const struct kernel_param *kp)
105{
106 int level, ret;
107 ret = strict_strtol(val, 0, &level);
108 if (ret < 0)
109 return ret;
110 return tracelevel_set_level(level);
111}
112
113static int param_get_level(char *buffer, const struct kernel_param *kp)
114{
115 return param_get_int(buffer, kp);
116}
117
118static struct kernel_param_ops tracelevel_level_ops = {
119 .set = param_set_level,
120 .get = param_get_level
121};
122
123module_param_cb(level, &tracelevel_level_ops, &tracelevel_level, 0644);
124
125/* Turn on the tracing that has been registered thus far. */
126static int __init tracelevel_init(void)
127{
128 int ret;
129 started = true;
130
131 /* Ring buffer is initialize to 1 page until the user sets a tracer.
132 * Since we're doing this manually, we need to ask for expanded buffer.
133 */
134 ret = tracing_update_buffers();
135 if (ret < 0)
136 return ret;
137
138 return tracelevel_set_level(tracelevel_level);
139}
140
141/* Tracing mechanism is set up during fs_initcall. */
142fs_initcall_sync(tracelevel_init);