aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig13
-rw-r--r--kernel/trace/blktrace.c12
-rw-r--r--kernel/trace/ftrace.c124
-rw-r--r--kernel/trace/kmemtrace.c149
-rw-r--r--kernel/trace/ring_buffer.c1112
-rw-r--r--kernel/trace/trace.c691
-rw-r--r--kernel/trace/trace.h76
-rw-r--r--kernel/trace/trace_boot.c16
-rw-r--r--kernel/trace/trace_events.c146
-rw-r--r--kernel/trace/trace_events_filter.c261
-rw-r--r--kernel/trace/trace_export.c28
-rw-r--r--kernel/trace/trace_functions.c4
-rw-r--r--kernel/trace/trace_functions_graph.c166
-rw-r--r--kernel/trace/trace_irqsoff.c3
-rw-r--r--kernel/trace/trace_mmiotrace.c10
-rw-r--r--kernel/trace/trace_power.c22
-rw-r--r--kernel/trace/trace_sched_switch.c59
-rw-r--r--kernel/trace/trace_sched_wakeup.c7
-rw-r--r--kernel/trace/trace_selftest.c1
-rw-r--r--kernel/trace/trace_stack.c43
-rw-r--r--kernel/trace/trace_stat.c17
-rw-r--r--kernel/trace/trace_stat.h2
-rw-r--r--kernel/trace/trace_syscalls.c471
-rw-r--r--kernel/trace/trace_workqueue.c32
24 files changed, 2374 insertions, 1091 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 019f380fd76..1ea0d1234f4 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -41,7 +41,7 @@ config HAVE_FTRACE_MCOUNT_RECORD
41config HAVE_HW_BRANCH_TRACER 41config HAVE_HW_BRANCH_TRACER
42 bool 42 bool
43 43
44config HAVE_FTRACE_SYSCALLS 44config HAVE_SYSCALL_TRACEPOINTS
45 bool 45 bool
46 46
47config TRACER_MAX_TRACE 47config TRACER_MAX_TRACE
@@ -60,9 +60,14 @@ config EVENT_TRACING
60 bool 60 bool
61 61
62config CONTEXT_SWITCH_TRACER 62config CONTEXT_SWITCH_TRACER
63 select MARKERS
64 bool 63 bool
65 64
65config RING_BUFFER_ALLOW_SWAP
66 bool
67 help
68 Allow the use of ring_buffer_swap_cpu.
69 Adds a very slight overhead to tracing when enabled.
70
66# All tracer options should select GENERIC_TRACER. For those options that are 71# All tracer options should select GENERIC_TRACER. For those options that are
67# enabled by all tracers (context switch and event tracer) they select TRACING. 72# enabled by all tracers (context switch and event tracer) they select TRACING.
68# This allows those options to appear when no other tracer is selected. But the 73# This allows those options to appear when no other tracer is selected. But the
@@ -147,6 +152,7 @@ config IRQSOFF_TRACER
147 select TRACE_IRQFLAGS 152 select TRACE_IRQFLAGS
148 select GENERIC_TRACER 153 select GENERIC_TRACER
149 select TRACER_MAX_TRACE 154 select TRACER_MAX_TRACE
155 select RING_BUFFER_ALLOW_SWAP
150 help 156 help
151 This option measures the time spent in irqs-off critical 157 This option measures the time spent in irqs-off critical
152 sections, with microsecond accuracy. 158 sections, with microsecond accuracy.
@@ -168,6 +174,7 @@ config PREEMPT_TRACER
168 depends on PREEMPT 174 depends on PREEMPT
169 select GENERIC_TRACER 175 select GENERIC_TRACER
170 select TRACER_MAX_TRACE 176 select TRACER_MAX_TRACE
177 select RING_BUFFER_ALLOW_SWAP
171 help 178 help
172 This option measures the time spent in preemption off critical 179 This option measures the time spent in preemption off critical
173 sections, with microsecond accuracy. 180 sections, with microsecond accuracy.
@@ -211,7 +218,7 @@ config ENABLE_DEFAULT_TRACERS
211 218
212config FTRACE_SYSCALLS 219config FTRACE_SYSCALLS
213 bool "Trace syscalls" 220 bool "Trace syscalls"
214 depends on HAVE_FTRACE_SYSCALLS 221 depends on HAVE_SYSCALL_TRACEPOINTS
215 select GENERIC_TRACER 222 select GENERIC_TRACER
216 select KALLSYMS 223 select KALLSYMS
217 help 224 help
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 7a34cb563fe..3eb159c277c 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -65,13 +65,15 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
65{ 65{
66 struct blk_io_trace *t; 66 struct blk_io_trace *t;
67 struct ring_buffer_event *event = NULL; 67 struct ring_buffer_event *event = NULL;
68 struct ring_buffer *buffer = NULL;
68 int pc = 0; 69 int pc = 0;
69 int cpu = smp_processor_id(); 70 int cpu = smp_processor_id();
70 bool blk_tracer = blk_tracer_enabled; 71 bool blk_tracer = blk_tracer_enabled;
71 72
72 if (blk_tracer) { 73 if (blk_tracer) {
74 buffer = blk_tr->buffer;
73 pc = preempt_count(); 75 pc = preempt_count();
74 event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, 76 event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
75 sizeof(*t) + len, 77 sizeof(*t) + len,
76 0, pc); 78 0, pc);
77 if (!event) 79 if (!event)
@@ -96,7 +98,7 @@ record_it:
96 memcpy((void *) t + sizeof(*t), data, len); 98 memcpy((void *) t + sizeof(*t), data, len);
97 99
98 if (blk_tracer) 100 if (blk_tracer)
99 trace_buffer_unlock_commit(blk_tr, event, 0, pc); 101 trace_buffer_unlock_commit(buffer, event, 0, pc);
100 } 102 }
101} 103}
102 104
@@ -179,6 +181,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
179{ 181{
180 struct task_struct *tsk = current; 182 struct task_struct *tsk = current;
181 struct ring_buffer_event *event = NULL; 183 struct ring_buffer_event *event = NULL;
184 struct ring_buffer *buffer = NULL;
182 struct blk_io_trace *t; 185 struct blk_io_trace *t;
183 unsigned long flags = 0; 186 unsigned long flags = 0;
184 unsigned long *sequence; 187 unsigned long *sequence;
@@ -204,8 +207,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
204 if (blk_tracer) { 207 if (blk_tracer) {
205 tracing_record_cmdline(current); 208 tracing_record_cmdline(current);
206 209
210 buffer = blk_tr->buffer;
207 pc = preempt_count(); 211 pc = preempt_count();
208 event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, 212 event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
209 sizeof(*t) + pdu_len, 213 sizeof(*t) + pdu_len,
210 0, pc); 214 0, pc);
211 if (!event) 215 if (!event)
@@ -252,7 +256,7 @@ record_it:
252 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); 256 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
253 257
254 if (blk_tracer) { 258 if (blk_tracer) {
255 trace_buffer_unlock_commit(blk_tr, event, 0, pc); 259 trace_buffer_unlock_commit(buffer, event, 0, pc);
256 return; 260 return;
257 } 261 }
258 } 262 }
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1e1d23c2630..8c804e24f96 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1016,71 +1016,35 @@ static int
1016__ftrace_replace_code(struct dyn_ftrace *rec, int enable) 1016__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1017{ 1017{
1018 unsigned long ftrace_addr; 1018 unsigned long ftrace_addr;
1019 unsigned long ip, fl; 1019 unsigned long flag = 0UL;
1020 1020
1021 ftrace_addr = (unsigned long)FTRACE_ADDR; 1021 ftrace_addr = (unsigned long)FTRACE_ADDR;
1022 1022
1023 ip = rec->ip;
1024
1025 /* 1023 /*
1026 * If this record is not to be traced and 1024 * If this record is not to be traced or we want to disable it,
1027 * it is not enabled then do nothing. 1025 * then disable it.
1028 * 1026 *
1029 * If this record is not to be traced and 1027 * If we want to enable it and filtering is off, then enable it.
1030 * it is enabled then disable it.
1031 * 1028 *
1029 * If we want to enable it and filtering is on, enable it only if
1030 * it's filtered
1032 */ 1031 */
1033 if (rec->flags & FTRACE_FL_NOTRACE) { 1032 if (enable && !(rec->flags & FTRACE_FL_NOTRACE)) {
1034 if (rec->flags & FTRACE_FL_ENABLED) 1033 if (!ftrace_filtered || (rec->flags & FTRACE_FL_FILTER))
1035 rec->flags &= ~FTRACE_FL_ENABLED; 1034 flag = FTRACE_FL_ENABLED;
1036 else 1035 }
1037 return 0;
1038
1039 } else if (ftrace_filtered && enable) {
1040 /*
1041 * Filtering is on:
1042 */
1043
1044 fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED);
1045
1046 /* Record is filtered and enabled, do nothing */
1047 if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
1048 return 0;
1049
1050 /* Record is not filtered or enabled, do nothing */
1051 if (!fl)
1052 return 0;
1053
1054 /* Record is not filtered but enabled, disable it */
1055 if (fl == FTRACE_FL_ENABLED)
1056 rec->flags &= ~FTRACE_FL_ENABLED;
1057 else
1058 /* Otherwise record is filtered but not enabled, enable it */
1059 rec->flags |= FTRACE_FL_ENABLED;
1060 } else {
1061 /* Disable or not filtered */
1062
1063 if (enable) {
1064 /* if record is enabled, do nothing */
1065 if (rec->flags & FTRACE_FL_ENABLED)
1066 return 0;
1067
1068 rec->flags |= FTRACE_FL_ENABLED;
1069
1070 } else {
1071 1036
1072 /* if record is not enabled, do nothing */ 1037 /* If the state of this record hasn't changed, then do nothing */
1073 if (!(rec->flags & FTRACE_FL_ENABLED)) 1038 if ((rec->flags & FTRACE_FL_ENABLED) == flag)
1074 return 0; 1039 return 0;
1075 1040
1076 rec->flags &= ~FTRACE_FL_ENABLED; 1041 if (flag) {
1077 } 1042 rec->flags |= FTRACE_FL_ENABLED;
1043 return ftrace_make_call(rec, ftrace_addr);
1078 } 1044 }
1079 1045
1080 if (rec->flags & FTRACE_FL_ENABLED) 1046 rec->flags &= ~FTRACE_FL_ENABLED;
1081 return ftrace_make_call(rec, ftrace_addr); 1047 return ftrace_make_nop(NULL, rec, ftrace_addr);
1082 else
1083 return ftrace_make_nop(NULL, rec, ftrace_addr);
1084} 1048}
1085 1049
1086static void ftrace_replace_code(int enable) 1050static void ftrace_replace_code(int enable)
@@ -1375,7 +1339,6 @@ struct ftrace_iterator {
1375 unsigned flags; 1339 unsigned flags;
1376 unsigned char buffer[FTRACE_BUFF_MAX+1]; 1340 unsigned char buffer[FTRACE_BUFF_MAX+1];
1377 unsigned buffer_idx; 1341 unsigned buffer_idx;
1378 unsigned filtered;
1379}; 1342};
1380 1343
1381static void * 1344static void *
@@ -1438,18 +1401,13 @@ static int t_hash_show(struct seq_file *m, void *v)
1438{ 1401{
1439 struct ftrace_func_probe *rec; 1402 struct ftrace_func_probe *rec;
1440 struct hlist_node *hnd = v; 1403 struct hlist_node *hnd = v;
1441 char str[KSYM_SYMBOL_LEN];
1442 1404
1443 rec = hlist_entry(hnd, struct ftrace_func_probe, node); 1405 rec = hlist_entry(hnd, struct ftrace_func_probe, node);
1444 1406
1445 if (rec->ops->print) 1407 if (rec->ops->print)
1446 return rec->ops->print(m, rec->ip, rec->ops, rec->data); 1408 return rec->ops->print(m, rec->ip, rec->ops, rec->data);
1447 1409
1448 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 1410 seq_printf(m, "%pf:%pf", (void *)rec->ip, (void *)rec->ops->func);
1449 seq_printf(m, "%s:", str);
1450
1451 kallsyms_lookup((unsigned long)rec->ops->func, NULL, NULL, NULL, str);
1452 seq_printf(m, "%s", str);
1453 1411
1454 if (rec->data) 1412 if (rec->data)
1455 seq_printf(m, ":%p", rec->data); 1413 seq_printf(m, ":%p", rec->data);
@@ -1547,7 +1505,6 @@ static int t_show(struct seq_file *m, void *v)
1547{ 1505{
1548 struct ftrace_iterator *iter = m->private; 1506 struct ftrace_iterator *iter = m->private;
1549 struct dyn_ftrace *rec = v; 1507 struct dyn_ftrace *rec = v;
1550 char str[KSYM_SYMBOL_LEN];
1551 1508
1552 if (iter->flags & FTRACE_ITER_HASH) 1509 if (iter->flags & FTRACE_ITER_HASH)
1553 return t_hash_show(m, v); 1510 return t_hash_show(m, v);
@@ -1560,9 +1517,7 @@ static int t_show(struct seq_file *m, void *v)
1560 if (!rec) 1517 if (!rec)
1561 return 0; 1518 return 0;
1562 1519
1563 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 1520 seq_printf(m, "%pf\n", (void *)rec->ip);
1564
1565 seq_printf(m, "%s\n", str);
1566 1521
1567 return 0; 1522 return 0;
1568} 1523}
@@ -1601,17 +1556,6 @@ ftrace_avail_open(struct inode *inode, struct file *file)
1601 return ret; 1556 return ret;
1602} 1557}
1603 1558
1604int ftrace_avail_release(struct inode *inode, struct file *file)
1605{
1606 struct seq_file *m = (struct seq_file *)file->private_data;
1607 struct ftrace_iterator *iter = m->private;
1608
1609 seq_release(inode, file);
1610 kfree(iter);
1611
1612 return 0;
1613}
1614
1615static int 1559static int
1616ftrace_failures_open(struct inode *inode, struct file *file) 1560ftrace_failures_open(struct inode *inode, struct file *file)
1617{ 1561{
@@ -2278,7 +2222,11 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2278 read++; 2222 read++;
2279 cnt--; 2223 cnt--;
2280 2224
2281 if (!(iter->flags & ~FTRACE_ITER_CONT)) { 2225 /*
2226 * If the parser haven't finished with the last write,
2227 * continue reading the user input without skipping spaces.
2228 */
2229 if (!(iter->flags & FTRACE_ITER_CONT)) {
2282 /* skip white space */ 2230 /* skip white space */
2283 while (cnt && isspace(ch)) { 2231 while (cnt && isspace(ch)) {
2284 ret = get_user(ch, ubuf++); 2232 ret = get_user(ch, ubuf++);
@@ -2288,8 +2236,9 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2288 cnt--; 2236 cnt--;
2289 } 2237 }
2290 2238
2239 /* only spaces were written */
2291 if (isspace(ch)) { 2240 if (isspace(ch)) {
2292 file->f_pos += read; 2241 *ppos += read;
2293 ret = read; 2242 ret = read;
2294 goto out; 2243 goto out;
2295 } 2244 }
@@ -2312,19 +2261,18 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2312 } 2261 }
2313 2262
2314 if (isspace(ch)) { 2263 if (isspace(ch)) {
2315 iter->filtered++;
2316 iter->buffer[iter->buffer_idx] = 0; 2264 iter->buffer[iter->buffer_idx] = 0;
2317 ret = ftrace_process_regex(iter->buffer, 2265 ret = ftrace_process_regex(iter->buffer,
2318 iter->buffer_idx, enable); 2266 iter->buffer_idx, enable);
2319 if (ret) 2267 if (ret)
2320 goto out; 2268 goto out;
2321 iter->buffer_idx = 0; 2269 iter->buffer_idx = 0;
2322 } else 2270 } else {
2323 iter->flags |= FTRACE_ITER_CONT; 2271 iter->flags |= FTRACE_ITER_CONT;
2272 iter->buffer[iter->buffer_idx++] = ch;
2273 }
2324 2274
2325 2275 *ppos += read;
2326 file->f_pos += read;
2327
2328 ret = read; 2276 ret = read;
2329 out: 2277 out:
2330 mutex_unlock(&ftrace_regex_lock); 2278 mutex_unlock(&ftrace_regex_lock);
@@ -2443,7 +2391,6 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
2443 iter = file->private_data; 2391 iter = file->private_data;
2444 2392
2445 if (iter->buffer_idx) { 2393 if (iter->buffer_idx) {
2446 iter->filtered++;
2447 iter->buffer[iter->buffer_idx] = 0; 2394 iter->buffer[iter->buffer_idx] = 0;
2448 ftrace_match_records(iter->buffer, iter->buffer_idx, enable); 2395 ftrace_match_records(iter->buffer, iter->buffer_idx, enable);
2449 } 2396 }
@@ -2474,14 +2421,14 @@ static const struct file_operations ftrace_avail_fops = {
2474 .open = ftrace_avail_open, 2421 .open = ftrace_avail_open,
2475 .read = seq_read, 2422 .read = seq_read,
2476 .llseek = seq_lseek, 2423 .llseek = seq_lseek,
2477 .release = ftrace_avail_release, 2424 .release = seq_release_private,
2478}; 2425};
2479 2426
2480static const struct file_operations ftrace_failures_fops = { 2427static const struct file_operations ftrace_failures_fops = {
2481 .open = ftrace_failures_open, 2428 .open = ftrace_failures_open,
2482 .read = seq_read, 2429 .read = seq_read,
2483 .llseek = seq_lseek, 2430 .llseek = seq_lseek,
2484 .release = ftrace_avail_release, 2431 .release = seq_release_private,
2485}; 2432};
2486 2433
2487static const struct file_operations ftrace_filter_fops = { 2434static const struct file_operations ftrace_filter_fops = {
@@ -2543,7 +2490,6 @@ static void g_stop(struct seq_file *m, void *p)
2543static int g_show(struct seq_file *m, void *v) 2490static int g_show(struct seq_file *m, void *v)
2544{ 2491{
2545 unsigned long *ptr = v; 2492 unsigned long *ptr = v;
2546 char str[KSYM_SYMBOL_LEN];
2547 2493
2548 if (!ptr) 2494 if (!ptr)
2549 return 0; 2495 return 0;
@@ -2553,9 +2499,7 @@ static int g_show(struct seq_file *m, void *v)
2553 return 0; 2499 return 0;
2554 } 2500 }
2555 2501
2556 kallsyms_lookup(*ptr, NULL, NULL, NULL, str); 2502 seq_printf(m, "%pf\n", v);
2557
2558 seq_printf(m, "%s\n", str);
2559 2503
2560 return 0; 2504 return 0;
2561} 2505}
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index 1edaa9516e8..81b1645c854 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -183,11 +183,9 @@ static void kmemtrace_stop_probes(void)
183 183
184static int kmem_trace_init(struct trace_array *tr) 184static int kmem_trace_init(struct trace_array *tr)
185{ 185{
186 int cpu;
187 kmemtrace_array = tr; 186 kmemtrace_array = tr;
188 187
189 for_each_cpu(cpu, cpu_possible_mask) 188 tracing_reset_online_cpus(tr);
190 tracing_reset(tr, cpu);
191 189
192 kmemtrace_start_probes(); 190 kmemtrace_start_probes();
193 191
@@ -239,12 +237,52 @@ struct kmemtrace_user_event_alloc {
239}; 237};
240 238
241static enum print_line_t 239static enum print_line_t
242kmemtrace_print_alloc_user(struct trace_iterator *iter, 240kmemtrace_print_alloc(struct trace_iterator *iter, int flags)
243 struct kmemtrace_alloc_entry *entry)
244{ 241{
245 struct kmemtrace_user_event_alloc *ev_alloc;
246 struct trace_seq *s = &iter->seq; 242 struct trace_seq *s = &iter->seq;
243 struct kmemtrace_alloc_entry *entry;
244 int ret;
245
246 trace_assign_type(entry, iter->ent);
247
248 ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu "
249 "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
250 entry->type_id, (void *)entry->call_site, (unsigned long)entry->ptr,
251 (unsigned long)entry->bytes_req, (unsigned long)entry->bytes_alloc,
252 (unsigned long)entry->gfp_flags, entry->node);
253
254 if (!ret)
255 return TRACE_TYPE_PARTIAL_LINE;
256 return TRACE_TYPE_HANDLED;
257}
258
259static enum print_line_t
260kmemtrace_print_free(struct trace_iterator *iter, int flags)
261{
262 struct trace_seq *s = &iter->seq;
263 struct kmemtrace_free_entry *entry;
264 int ret;
265
266 trace_assign_type(entry, iter->ent);
267
268 ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu\n",
269 entry->type_id, (void *)entry->call_site,
270 (unsigned long)entry->ptr);
271
272 if (!ret)
273 return TRACE_TYPE_PARTIAL_LINE;
274 return TRACE_TYPE_HANDLED;
275}
276
277static enum print_line_t
278kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags)
279{
280 struct trace_seq *s = &iter->seq;
281 struct kmemtrace_alloc_entry *entry;
247 struct kmemtrace_user_event *ev; 282 struct kmemtrace_user_event *ev;
283 struct kmemtrace_user_event_alloc *ev_alloc;
284
285 trace_assign_type(entry, iter->ent);
248 286
249 ev = trace_seq_reserve(s, sizeof(*ev)); 287 ev = trace_seq_reserve(s, sizeof(*ev));
250 if (!ev) 288 if (!ev)
@@ -271,12 +309,14 @@ kmemtrace_print_alloc_user(struct trace_iterator *iter,
271} 309}
272 310
273static enum print_line_t 311static enum print_line_t
274kmemtrace_print_free_user(struct trace_iterator *iter, 312kmemtrace_print_free_user(struct trace_iterator *iter, int flags)
275 struct kmemtrace_free_entry *entry)
276{ 313{
277 struct trace_seq *s = &iter->seq; 314 struct trace_seq *s = &iter->seq;
315 struct kmemtrace_free_entry *entry;
278 struct kmemtrace_user_event *ev; 316 struct kmemtrace_user_event *ev;
279 317
318 trace_assign_type(entry, iter->ent);
319
280 ev = trace_seq_reserve(s, sizeof(*ev)); 320 ev = trace_seq_reserve(s, sizeof(*ev));
281 if (!ev) 321 if (!ev)
282 return TRACE_TYPE_PARTIAL_LINE; 322 return TRACE_TYPE_PARTIAL_LINE;
@@ -294,12 +334,14 @@ kmemtrace_print_free_user(struct trace_iterator *iter,
294 334
295/* The two other following provide a more minimalistic output */ 335/* The two other following provide a more minimalistic output */
296static enum print_line_t 336static enum print_line_t
297kmemtrace_print_alloc_compress(struct trace_iterator *iter, 337kmemtrace_print_alloc_compress(struct trace_iterator *iter)
298 struct kmemtrace_alloc_entry *entry)
299{ 338{
339 struct kmemtrace_alloc_entry *entry;
300 struct trace_seq *s = &iter->seq; 340 struct trace_seq *s = &iter->seq;
301 int ret; 341 int ret;
302 342
343 trace_assign_type(entry, iter->ent);
344
303 /* Alloc entry */ 345 /* Alloc entry */
304 ret = trace_seq_printf(s, " + "); 346 ret = trace_seq_printf(s, " + ");
305 if (!ret) 347 if (!ret)
@@ -345,29 +387,24 @@ kmemtrace_print_alloc_compress(struct trace_iterator *iter,
345 if (!ret) 387 if (!ret)
346 return TRACE_TYPE_PARTIAL_LINE; 388 return TRACE_TYPE_PARTIAL_LINE;
347 389
348 /* Node */ 390 /* Node and call site*/
349 ret = trace_seq_printf(s, "%4d ", entry->node); 391 ret = trace_seq_printf(s, "%4d %pf\n", entry->node,
350 if (!ret) 392 (void *)entry->call_site);
351 return TRACE_TYPE_PARTIAL_LINE;
352
353 /* Call site */
354 ret = seq_print_ip_sym(s, entry->call_site, 0);
355 if (!ret) 393 if (!ret)
356 return TRACE_TYPE_PARTIAL_LINE; 394 return TRACE_TYPE_PARTIAL_LINE;
357 395
358 if (!trace_seq_printf(s, "\n"))
359 return TRACE_TYPE_PARTIAL_LINE;
360
361 return TRACE_TYPE_HANDLED; 396 return TRACE_TYPE_HANDLED;
362} 397}
363 398
364static enum print_line_t 399static enum print_line_t
365kmemtrace_print_free_compress(struct trace_iterator *iter, 400kmemtrace_print_free_compress(struct trace_iterator *iter)
366 struct kmemtrace_free_entry *entry)
367{ 401{
402 struct kmemtrace_free_entry *entry;
368 struct trace_seq *s = &iter->seq; 403 struct trace_seq *s = &iter->seq;
369 int ret; 404 int ret;
370 405
406 trace_assign_type(entry, iter->ent);
407
371 /* Free entry */ 408 /* Free entry */
372 ret = trace_seq_printf(s, " - "); 409 ret = trace_seq_printf(s, " - ");
373 if (!ret) 410 if (!ret)
@@ -401,19 +438,11 @@ kmemtrace_print_free_compress(struct trace_iterator *iter,
401 if (!ret) 438 if (!ret)
402 return TRACE_TYPE_PARTIAL_LINE; 439 return TRACE_TYPE_PARTIAL_LINE;
403 440
404 /* Skip node */ 441 /* Skip node and print call site*/
405 ret = trace_seq_printf(s, " "); 442 ret = trace_seq_printf(s, " %pf\n", (void *)entry->call_site);
406 if (!ret) 443 if (!ret)
407 return TRACE_TYPE_PARTIAL_LINE; 444 return TRACE_TYPE_PARTIAL_LINE;
408 445
409 /* Call site */
410 ret = seq_print_ip_sym(s, entry->call_site, 0);
411 if (!ret)
412 return TRACE_TYPE_PARTIAL_LINE;
413
414 if (!trace_seq_printf(s, "\n"))
415 return TRACE_TYPE_PARTIAL_LINE;
416
417 return TRACE_TYPE_HANDLED; 446 return TRACE_TYPE_HANDLED;
418} 447}
419 448
@@ -421,32 +450,31 @@ static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
421{ 450{
422 struct trace_entry *entry = iter->ent; 451 struct trace_entry *entry = iter->ent;
423 452
424 switch (entry->type) { 453 if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
425 case TRACE_KMEM_ALLOC: { 454 return TRACE_TYPE_UNHANDLED;
426 struct kmemtrace_alloc_entry *field;
427
428 trace_assign_type(field, entry);
429 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
430 return kmemtrace_print_alloc_compress(iter, field);
431 else
432 return kmemtrace_print_alloc_user(iter, field);
433 }
434
435 case TRACE_KMEM_FREE: {
436 struct kmemtrace_free_entry *field;
437
438 trace_assign_type(field, entry);
439 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
440 return kmemtrace_print_free_compress(iter, field);
441 else
442 return kmemtrace_print_free_user(iter, field);
443 }
444 455
456 switch (entry->type) {
457 case TRACE_KMEM_ALLOC:
458 return kmemtrace_print_alloc_compress(iter);
459 case TRACE_KMEM_FREE:
460 return kmemtrace_print_free_compress(iter);
445 default: 461 default:
446 return TRACE_TYPE_UNHANDLED; 462 return TRACE_TYPE_UNHANDLED;
447 } 463 }
448} 464}
449 465
466static struct trace_event kmem_trace_alloc = {
467 .type = TRACE_KMEM_ALLOC,
468 .trace = kmemtrace_print_alloc,
469 .binary = kmemtrace_print_alloc_user,
470};
471
472static struct trace_event kmem_trace_free = {
473 .type = TRACE_KMEM_FREE,
474 .trace = kmemtrace_print_free,
475 .binary = kmemtrace_print_free_user,
476};
477
450static struct tracer kmem_tracer __read_mostly = { 478static struct tracer kmem_tracer __read_mostly = {
451 .name = "kmemtrace", 479 .name = "kmemtrace",
452 .init = kmem_trace_init, 480 .init = kmem_trace_init,
@@ -463,6 +491,21 @@ void kmemtrace_init(void)
463 491
464static int __init init_kmem_tracer(void) 492static int __init init_kmem_tracer(void)
465{ 493{
466 return register_tracer(&kmem_tracer); 494 if (!register_ftrace_event(&kmem_trace_alloc)) {
495 pr_warning("Warning: could not register kmem events\n");
496 return 1;
497 }
498
499 if (!register_ftrace_event(&kmem_trace_free)) {
500 pr_warning("Warning: could not register kmem events\n");
501 return 1;
502 }
503
504 if (!register_tracer(&kmem_tracer)) {
505 pr_warning("Warning: could not register the kmem tracer\n");
506 return 1;
507 }
508
509 return 0;
467} 510}
468device_initcall(init_kmem_tracer); 511device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a330513d96c..454e74e718c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -218,17 +218,12 @@ enum {
218 218
219static inline int rb_null_event(struct ring_buffer_event *event) 219static inline int rb_null_event(struct ring_buffer_event *event)
220{ 220{
221 return event->type_len == RINGBUF_TYPE_PADDING 221 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
222 && event->time_delta == 0;
223}
224
225static inline int rb_discarded_event(struct ring_buffer_event *event)
226{
227 return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta;
228} 222}
229 223
230static void rb_event_set_padding(struct ring_buffer_event *event) 224static void rb_event_set_padding(struct ring_buffer_event *event)
231{ 225{
226 /* padding has a NULL time_delta */
232 event->type_len = RINGBUF_TYPE_PADDING; 227 event->type_len = RINGBUF_TYPE_PADDING;
233 event->time_delta = 0; 228 event->time_delta = 0;
234} 229}
@@ -322,6 +317,14 @@ struct buffer_data_page {
322 unsigned char data[]; /* data of buffer page */ 317 unsigned char data[]; /* data of buffer page */
323}; 318};
324 319
320/*
321 * Note, the buffer_page list must be first. The buffer pages
322 * are allocated in cache lines, which means that each buffer
323 * page will be at the beginning of a cache line, and thus
324 * the least significant bits will be zero. We use this to
325 * add flags in the list struct pointers, to make the ring buffer
326 * lockless.
327 */
325struct buffer_page { 328struct buffer_page {
326 struct list_head list; /* list of buffer pages */ 329 struct list_head list; /* list of buffer pages */
327 local_t write; /* index for next write */ 330 local_t write; /* index for next write */
@@ -330,6 +333,21 @@ struct buffer_page {
330 struct buffer_data_page *page; /* Actual data page */ 333 struct buffer_data_page *page; /* Actual data page */
331}; 334};
332 335
336/*
337 * The buffer page counters, write and entries, must be reset
338 * atomically when crossing page boundaries. To synchronize this
339 * update, two counters are inserted into the number. One is
340 * the actual counter for the write position or count on the page.
341 *
342 * The other is a counter of updaters. Before an update happens
343 * the update partition of the counter is incremented. This will
344 * allow the updater to update the counter atomically.
345 *
346 * The counter is 20 bits, and the state data is 12.
347 */
348#define RB_WRITE_MASK 0xfffff
349#define RB_WRITE_INTCNT (1 << 20)
350
333static void rb_init_page(struct buffer_data_page *bpage) 351static void rb_init_page(struct buffer_data_page *bpage)
334{ 352{
335 local_set(&bpage->commit, 0); 353 local_set(&bpage->commit, 0);
@@ -403,21 +421,20 @@ int ring_buffer_print_page_header(struct trace_seq *s)
403struct ring_buffer_per_cpu { 421struct ring_buffer_per_cpu {
404 int cpu; 422 int cpu;
405 struct ring_buffer *buffer; 423 struct ring_buffer *buffer;
406 spinlock_t reader_lock; /* serialize readers */ 424 spinlock_t reader_lock; /* serialize readers */
407 raw_spinlock_t lock; 425 raw_spinlock_t lock;
408 struct lock_class_key lock_key; 426 struct lock_class_key lock_key;
409 struct list_head pages; 427 struct list_head *pages;
410 struct buffer_page *head_page; /* read from head */ 428 struct buffer_page *head_page; /* read from head */
411 struct buffer_page *tail_page; /* write to tail */ 429 struct buffer_page *tail_page; /* write to tail */
412 struct buffer_page *commit_page; /* committed pages */ 430 struct buffer_page *commit_page; /* committed pages */
413 struct buffer_page *reader_page; 431 struct buffer_page *reader_page;
414 unsigned long nmi_dropped; 432 local_t commit_overrun;
415 unsigned long commit_overrun; 433 local_t overrun;
416 unsigned long overrun;
417 unsigned long read;
418 local_t entries; 434 local_t entries;
419 local_t committing; 435 local_t committing;
420 local_t commits; 436 local_t commits;
437 unsigned long read;
421 u64 write_stamp; 438 u64 write_stamp;
422 u64 read_stamp; 439 u64 read_stamp;
423 atomic_t record_disabled; 440 atomic_t record_disabled;
@@ -450,14 +467,19 @@ struct ring_buffer_iter {
450}; 467};
451 468
452/* buffer may be either ring_buffer or ring_buffer_per_cpu */ 469/* buffer may be either ring_buffer or ring_buffer_per_cpu */
453#define RB_WARN_ON(buffer, cond) \ 470#define RB_WARN_ON(b, cond) \
454 ({ \ 471 ({ \
455 int _____ret = unlikely(cond); \ 472 int _____ret = unlikely(cond); \
456 if (_____ret) { \ 473 if (_____ret) { \
457 atomic_inc(&buffer->record_disabled); \ 474 if (__same_type(*(b), struct ring_buffer_per_cpu)) { \
458 WARN_ON(1); \ 475 struct ring_buffer_per_cpu *__b = \
459 } \ 476 (void *)b; \
460 _____ret; \ 477 atomic_inc(&__b->buffer->record_disabled); \
478 } else \
479 atomic_inc(&b->record_disabled); \
480 WARN_ON(1); \
481 } \
482 _____ret; \
461 }) 483 })
462 484
463/* Up this if you want to test the TIME_EXTENTS and normalization */ 485/* Up this if you want to test the TIME_EXTENTS and normalization */
@@ -489,6 +511,390 @@ void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
489} 511}
490EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); 512EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
491 513
514/*
515 * Making the ring buffer lockless makes things tricky.
516 * Although writes only happen on the CPU that they are on,
517 * and they only need to worry about interrupts. Reads can
518 * happen on any CPU.
519 *
520 * The reader page is always off the ring buffer, but when the
521 * reader finishes with a page, it needs to swap its page with
522 * a new one from the buffer. The reader needs to take from
523 * the head (writes go to the tail). But if a writer is in overwrite
524 * mode and wraps, it must push the head page forward.
525 *
526 * Here lies the problem.
527 *
528 * The reader must be careful to replace only the head page, and
529 * not another one. As described at the top of the file in the
530 * ASCII art, the reader sets its old page to point to the next
531 * page after head. It then sets the page after head to point to
532 * the old reader page. But if the writer moves the head page
533 * during this operation, the reader could end up with the tail.
534 *
535 * We use cmpxchg to help prevent this race. We also do something
536 * special with the page before head. We set the LSB to 1.
537 *
538 * When the writer must push the page forward, it will clear the
539 * bit that points to the head page, move the head, and then set
540 * the bit that points to the new head page.
541 *
542 * We also don't want an interrupt coming in and moving the head
543 * page on another writer. Thus we use the second LSB to catch
544 * that too. Thus:
545 *
546 * head->list->prev->next bit 1 bit 0
547 * ------- -------
548 * Normal page 0 0
549 * Points to head page 0 1
550 * New head page 1 0
551 *
552 * Note we can not trust the prev pointer of the head page, because:
553 *
554 * +----+ +-----+ +-----+
555 * | |------>| T |---X--->| N |
556 * | |<------| | | |
557 * +----+ +-----+ +-----+
558 * ^ ^ |
559 * | +-----+ | |
560 * +----------| R |----------+ |
561 * | |<-----------+
562 * +-----+
563 *
564 * Key: ---X--> HEAD flag set in pointer
565 * T Tail page
566 * R Reader page
567 * N Next page
568 *
569 * (see __rb_reserve_next() to see where this happens)
570 *
571 * What the above shows is that the reader just swapped out
572 * the reader page with a page in the buffer, but before it
573 * could make the new header point back to the new page added
574 * it was preempted by a writer. The writer moved forward onto
575 * the new page added by the reader and is about to move forward
576 * again.
577 *
578 * You can see, it is legitimate for the previous pointer of
579 * the head (or any page) not to point back to itself. But only
580 * temporarially.
581 */
582
583#define RB_PAGE_NORMAL 0UL
584#define RB_PAGE_HEAD 1UL
585#define RB_PAGE_UPDATE 2UL
586
587
588#define RB_FLAG_MASK 3UL
589
590/* PAGE_MOVED is not part of the mask */
591#define RB_PAGE_MOVED 4UL
592
593/*
594 * rb_list_head - remove any bit
595 */
596static struct list_head *rb_list_head(struct list_head *list)
597{
598 unsigned long val = (unsigned long)list;
599
600 return (struct list_head *)(val & ~RB_FLAG_MASK);
601}
602
603/*
604 * rb_is_head_page - test if the give page is the head page
605 *
606 * Because the reader may move the head_page pointer, we can
607 * not trust what the head page is (it may be pointing to
608 * the reader page). But if the next page is a header page,
609 * its flags will be non zero.
610 */
611static int inline
612rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
613 struct buffer_page *page, struct list_head *list)
614{
615 unsigned long val;
616
617 val = (unsigned long)list->next;
618
619 if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list)
620 return RB_PAGE_MOVED;
621
622 return val & RB_FLAG_MASK;
623}
624
625/*
626 * rb_is_reader_page
627 *
628 * The unique thing about the reader page, is that, if the
629 * writer is ever on it, the previous pointer never points
630 * back to the reader page.
631 */
632static int rb_is_reader_page(struct buffer_page *page)
633{
634 struct list_head *list = page->list.prev;
635
636 return rb_list_head(list->next) != &page->list;
637}
638
639/*
640 * rb_set_list_to_head - set a list_head to be pointing to head.
641 */
642static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer,
643 struct list_head *list)
644{
645 unsigned long *ptr;
646
647 ptr = (unsigned long *)&list->next;
648 *ptr |= RB_PAGE_HEAD;
649 *ptr &= ~RB_PAGE_UPDATE;
650}
651
652/*
653 * rb_head_page_activate - sets up head page
654 */
655static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
656{
657 struct buffer_page *head;
658
659 head = cpu_buffer->head_page;
660 if (!head)
661 return;
662
663 /*
664 * Set the previous list pointer to have the HEAD flag.
665 */
666 rb_set_list_to_head(cpu_buffer, head->list.prev);
667}
668
669static void rb_list_head_clear(struct list_head *list)
670{
671 unsigned long *ptr = (unsigned long *)&list->next;
672
673 *ptr &= ~RB_FLAG_MASK;
674}
675
676/*
677 * rb_head_page_dactivate - clears head page ptr (for free list)
678 */
679static void
680rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
681{
682 struct list_head *hd;
683
684 /* Go through the whole list and clear any pointers found. */
685 rb_list_head_clear(cpu_buffer->pages);
686
687 list_for_each(hd, cpu_buffer->pages)
688 rb_list_head_clear(hd);
689}
690
691static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
692 struct buffer_page *head,
693 struct buffer_page *prev,
694 int old_flag, int new_flag)
695{
696 struct list_head *list;
697 unsigned long val = (unsigned long)&head->list;
698 unsigned long ret;
699
700 list = &prev->list;
701
702 val &= ~RB_FLAG_MASK;
703
704 ret = (unsigned long)cmpxchg(&list->next,
705 val | old_flag, val | new_flag);
706
707 /* check if the reader took the page */
708 if ((ret & ~RB_FLAG_MASK) != val)
709 return RB_PAGE_MOVED;
710
711 return ret & RB_FLAG_MASK;
712}
713
714static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer,
715 struct buffer_page *head,
716 struct buffer_page *prev,
717 int old_flag)
718{
719 return rb_head_page_set(cpu_buffer, head, prev,
720 old_flag, RB_PAGE_UPDATE);
721}
722
723static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer,
724 struct buffer_page *head,
725 struct buffer_page *prev,
726 int old_flag)
727{
728 return rb_head_page_set(cpu_buffer, head, prev,
729 old_flag, RB_PAGE_HEAD);
730}
731
732static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer,
733 struct buffer_page *head,
734 struct buffer_page *prev,
735 int old_flag)
736{
737 return rb_head_page_set(cpu_buffer, head, prev,
738 old_flag, RB_PAGE_NORMAL);
739}
740
741static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
742 struct buffer_page **bpage)
743{
744 struct list_head *p = rb_list_head((*bpage)->list.next);
745
746 *bpage = list_entry(p, struct buffer_page, list);
747}
748
749static struct buffer_page *
750rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
751{
752 struct buffer_page *head;
753 struct buffer_page *page;
754 struct list_head *list;
755 int i;
756
757 if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page))
758 return NULL;
759
760 /* sanity check */
761 list = cpu_buffer->pages;
762 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list))
763 return NULL;
764
765 page = head = cpu_buffer->head_page;
766 /*
767 * It is possible that the writer moves the header behind
768 * where we started, and we miss in one loop.
769 * A second loop should grab the header, but we'll do
770 * three loops just because I'm paranoid.
771 */
772 for (i = 0; i < 3; i++) {
773 do {
774 if (rb_is_head_page(cpu_buffer, page, page->list.prev)) {
775 cpu_buffer->head_page = page;
776 return page;
777 }
778 rb_inc_page(cpu_buffer, &page);
779 } while (page != head);
780 }
781
782 RB_WARN_ON(cpu_buffer, 1);
783
784 return NULL;
785}
786
787static int rb_head_page_replace(struct buffer_page *old,
788 struct buffer_page *new)
789{
790 unsigned long *ptr = (unsigned long *)&old->list.prev->next;
791 unsigned long val;
792 unsigned long ret;
793
794 val = *ptr & ~RB_FLAG_MASK;
795 val |= RB_PAGE_HEAD;
796
797 ret = cmpxchg(ptr, val, &new->list);
798
799 return ret == val;
800}
801
802/*
803 * rb_tail_page_update - move the tail page forward
804 *
805 * Returns 1 if moved tail page, 0 if someone else did.
806 */
807static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
808 struct buffer_page *tail_page,
809 struct buffer_page *next_page)
810{
811 struct buffer_page *old_tail;
812 unsigned long old_entries;
813 unsigned long old_write;
814 int ret = 0;
815
816 /*
817 * The tail page now needs to be moved forward.
818 *
819 * We need to reset the tail page, but without messing
820 * with possible erasing of data brought in by interrupts
821 * that have moved the tail page and are currently on it.
822 *
823 * We add a counter to the write field to denote this.
824 */
825 old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
826 old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
827
828 /*
829 * Just make sure we have seen our old_write and synchronize
830 * with any interrupts that come in.
831 */
832 barrier();
833
834 /*
835 * If the tail page is still the same as what we think
836 * it is, then it is up to us to update the tail
837 * pointer.
838 */
839 if (tail_page == cpu_buffer->tail_page) {
840 /* Zero the write counter */
841 unsigned long val = old_write & ~RB_WRITE_MASK;
842 unsigned long eval = old_entries & ~RB_WRITE_MASK;
843
844 /*
845 * This will only succeed if an interrupt did
846 * not come in and change it. In which case, we
847 * do not want to modify it.
848 *
849 * We add (void) to let the compiler know that we do not care
850 * about the return value of these functions. We use the
851 * cmpxchg to only update if an interrupt did not already
852 * do it for us. If the cmpxchg fails, we don't care.
853 */
854 (void)local_cmpxchg(&next_page->write, old_write, val);
855 (void)local_cmpxchg(&next_page->entries, old_entries, eval);
856
857 /*
858 * No need to worry about races with clearing out the commit.
859 * it only can increment when a commit takes place. But that
860 * only happens in the outer most nested commit.
861 */
862 local_set(&next_page->page->commit, 0);
863
864 old_tail = cmpxchg(&cpu_buffer->tail_page,
865 tail_page, next_page);
866
867 if (old_tail == tail_page)
868 ret = 1;
869 }
870
871 return ret;
872}
873
874static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
875 struct buffer_page *bpage)
876{
877 unsigned long val = (unsigned long)bpage;
878
879 if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
880 return 1;
881
882 return 0;
883}
884
885/**
886 * rb_check_list - make sure a pointer to a list has the last bits zero
887 */
888static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
889 struct list_head *list)
890{
891 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
892 return 1;
893 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
894 return 1;
895 return 0;
896}
897
492/** 898/**
493 * check_pages - integrity check of buffer pages 899 * check_pages - integrity check of buffer pages
494 * @cpu_buffer: CPU buffer with pages to test 900 * @cpu_buffer: CPU buffer with pages to test
@@ -498,14 +904,19 @@ EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
498 */ 904 */
499static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) 905static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
500{ 906{
501 struct list_head *head = &cpu_buffer->pages; 907 struct list_head *head = cpu_buffer->pages;
502 struct buffer_page *bpage, *tmp; 908 struct buffer_page *bpage, *tmp;
503 909
910 rb_head_page_deactivate(cpu_buffer);
911
504 if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) 912 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
505 return -1; 913 return -1;
506 if (RB_WARN_ON(cpu_buffer, head->prev->next != head)) 914 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
507 return -1; 915 return -1;
508 916
917 if (rb_check_list(cpu_buffer, head))
918 return -1;
919
509 list_for_each_entry_safe(bpage, tmp, head, list) { 920 list_for_each_entry_safe(bpage, tmp, head, list) {
510 if (RB_WARN_ON(cpu_buffer, 921 if (RB_WARN_ON(cpu_buffer,
511 bpage->list.next->prev != &bpage->list)) 922 bpage->list.next->prev != &bpage->list))
@@ -513,25 +924,33 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
513 if (RB_WARN_ON(cpu_buffer, 924 if (RB_WARN_ON(cpu_buffer,
514 bpage->list.prev->next != &bpage->list)) 925 bpage->list.prev->next != &bpage->list))
515 return -1; 926 return -1;
927 if (rb_check_list(cpu_buffer, &bpage->list))
928 return -1;
516 } 929 }
517 930
931 rb_head_page_activate(cpu_buffer);
932
518 return 0; 933 return 0;
519} 934}
520 935
521static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, 936static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
522 unsigned nr_pages) 937 unsigned nr_pages)
523{ 938{
524 struct list_head *head = &cpu_buffer->pages;
525 struct buffer_page *bpage, *tmp; 939 struct buffer_page *bpage, *tmp;
526 unsigned long addr; 940 unsigned long addr;
527 LIST_HEAD(pages); 941 LIST_HEAD(pages);
528 unsigned i; 942 unsigned i;
529 943
944 WARN_ON(!nr_pages);
945
530 for (i = 0; i < nr_pages; i++) { 946 for (i = 0; i < nr_pages; i++) {
531 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 947 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
532 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); 948 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
533 if (!bpage) 949 if (!bpage)
534 goto free_pages; 950 goto free_pages;
951
952 rb_check_bpage(cpu_buffer, bpage);
953
535 list_add(&bpage->list, &pages); 954 list_add(&bpage->list, &pages);
536 955
537 addr = __get_free_page(GFP_KERNEL); 956 addr = __get_free_page(GFP_KERNEL);
@@ -541,7 +960,13 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
541 rb_init_page(bpage->page); 960 rb_init_page(bpage->page);
542 } 961 }
543 962
544 list_splice(&pages, head); 963 /*
964 * The ring buffer page list is a circular list that does not
965 * start and end with a list head. All page list items point to
966 * other pages.
967 */
968 cpu_buffer->pages = pages.next;
969 list_del(&pages);
545 970
546 rb_check_pages(cpu_buffer); 971 rb_check_pages(cpu_buffer);
547 972
@@ -573,13 +998,14 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
573 spin_lock_init(&cpu_buffer->reader_lock); 998 spin_lock_init(&cpu_buffer->reader_lock);
574 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); 999 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
575 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1000 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
576 INIT_LIST_HEAD(&cpu_buffer->pages);
577 1001
578 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1002 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
579 GFP_KERNEL, cpu_to_node(cpu)); 1003 GFP_KERNEL, cpu_to_node(cpu));
580 if (!bpage) 1004 if (!bpage)
581 goto fail_free_buffer; 1005 goto fail_free_buffer;
582 1006
1007 rb_check_bpage(cpu_buffer, bpage);
1008
583 cpu_buffer->reader_page = bpage; 1009 cpu_buffer->reader_page = bpage;
584 addr = __get_free_page(GFP_KERNEL); 1010 addr = __get_free_page(GFP_KERNEL);
585 if (!addr) 1011 if (!addr)
@@ -594,9 +1020,11 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
594 goto fail_free_reader; 1020 goto fail_free_reader;
595 1021
596 cpu_buffer->head_page 1022 cpu_buffer->head_page
597 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 1023 = list_entry(cpu_buffer->pages, struct buffer_page, list);
598 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; 1024 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
599 1025
1026 rb_head_page_activate(cpu_buffer);
1027
600 return cpu_buffer; 1028 return cpu_buffer;
601 1029
602 fail_free_reader: 1030 fail_free_reader:
@@ -609,15 +1037,22 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
609 1037
610static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) 1038static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
611{ 1039{
612 struct list_head *head = &cpu_buffer->pages; 1040 struct list_head *head = cpu_buffer->pages;
613 struct buffer_page *bpage, *tmp; 1041 struct buffer_page *bpage, *tmp;
614 1042
615 free_buffer_page(cpu_buffer->reader_page); 1043 free_buffer_page(cpu_buffer->reader_page);
616 1044
617 list_for_each_entry_safe(bpage, tmp, head, list) { 1045 rb_head_page_deactivate(cpu_buffer);
618 list_del_init(&bpage->list); 1046
1047 if (head) {
1048 list_for_each_entry_safe(bpage, tmp, head, list) {
1049 list_del_init(&bpage->list);
1050 free_buffer_page(bpage);
1051 }
1052 bpage = list_entry(head, struct buffer_page, list);
619 free_buffer_page(bpage); 1053 free_buffer_page(bpage);
620 } 1054 }
1055
621 kfree(cpu_buffer); 1056 kfree(cpu_buffer);
622} 1057}
623 1058
@@ -760,15 +1195,17 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
760 atomic_inc(&cpu_buffer->record_disabled); 1195 atomic_inc(&cpu_buffer->record_disabled);
761 synchronize_sched(); 1196 synchronize_sched();
762 1197
1198 rb_head_page_deactivate(cpu_buffer);
1199
763 for (i = 0; i < nr_pages; i++) { 1200 for (i = 0; i < nr_pages; i++) {
764 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) 1201 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
765 return; 1202 return;
766 p = cpu_buffer->pages.next; 1203 p = cpu_buffer->pages->next;
767 bpage = list_entry(p, struct buffer_page, list); 1204 bpage = list_entry(p, struct buffer_page, list);
768 list_del_init(&bpage->list); 1205 list_del_init(&bpage->list);
769 free_buffer_page(bpage); 1206 free_buffer_page(bpage);
770 } 1207 }
771 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) 1208 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
772 return; 1209 return;
773 1210
774 rb_reset_cpu(cpu_buffer); 1211 rb_reset_cpu(cpu_buffer);
@@ -790,15 +1227,19 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
790 atomic_inc(&cpu_buffer->record_disabled); 1227 atomic_inc(&cpu_buffer->record_disabled);
791 synchronize_sched(); 1228 synchronize_sched();
792 1229
1230 spin_lock_irq(&cpu_buffer->reader_lock);
1231 rb_head_page_deactivate(cpu_buffer);
1232
793 for (i = 0; i < nr_pages; i++) { 1233 for (i = 0; i < nr_pages; i++) {
794 if (RB_WARN_ON(cpu_buffer, list_empty(pages))) 1234 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
795 return; 1235 return;
796 p = pages->next; 1236 p = pages->next;
797 bpage = list_entry(p, struct buffer_page, list); 1237 bpage = list_entry(p, struct buffer_page, list);
798 list_del_init(&bpage->list); 1238 list_del_init(&bpage->list);
799 list_add_tail(&bpage->list, &cpu_buffer->pages); 1239 list_add_tail(&bpage->list, cpu_buffer->pages);
800 } 1240 }
801 rb_reset_cpu(cpu_buffer); 1241 rb_reset_cpu(cpu_buffer);
1242 spin_unlock_irq(&cpu_buffer->reader_lock);
802 1243
803 rb_check_pages(cpu_buffer); 1244 rb_check_pages(cpu_buffer);
804 1245
@@ -949,21 +1390,14 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
949} 1390}
950 1391
951static inline struct ring_buffer_event * 1392static inline struct ring_buffer_event *
952rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
953{
954 return __rb_page_index(cpu_buffer->head_page,
955 cpu_buffer->head_page->read);
956}
957
958static inline struct ring_buffer_event *
959rb_iter_head_event(struct ring_buffer_iter *iter) 1393rb_iter_head_event(struct ring_buffer_iter *iter)
960{ 1394{
961 return __rb_page_index(iter->head_page, iter->head); 1395 return __rb_page_index(iter->head_page, iter->head);
962} 1396}
963 1397
964static inline unsigned rb_page_write(struct buffer_page *bpage) 1398static inline unsigned long rb_page_write(struct buffer_page *bpage)
965{ 1399{
966 return local_read(&bpage->write); 1400 return local_read(&bpage->write) & RB_WRITE_MASK;
967} 1401}
968 1402
969static inline unsigned rb_page_commit(struct buffer_page *bpage) 1403static inline unsigned rb_page_commit(struct buffer_page *bpage)
@@ -971,6 +1405,11 @@ static inline unsigned rb_page_commit(struct buffer_page *bpage)
971 return local_read(&bpage->page->commit); 1405 return local_read(&bpage->page->commit);
972} 1406}
973 1407
1408static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1409{
1410 return local_read(&bpage->entries) & RB_WRITE_MASK;
1411}
1412
974/* Size is determined by what has been commited */ 1413/* Size is determined by what has been commited */
975static inline unsigned rb_page_size(struct buffer_page *bpage) 1414static inline unsigned rb_page_size(struct buffer_page *bpage)
976{ 1415{
@@ -983,22 +1422,6 @@ rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
983 return rb_page_commit(cpu_buffer->commit_page); 1422 return rb_page_commit(cpu_buffer->commit_page);
984} 1423}
985 1424
986static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
987{
988 return rb_page_commit(cpu_buffer->head_page);
989}
990
991static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
992 struct buffer_page **bpage)
993{
994 struct list_head *p = (*bpage)->list.next;
995
996 if (p == &cpu_buffer->pages)
997 p = p->next;
998
999 *bpage = list_entry(p, struct buffer_page, list);
1000}
1001
1002static inline unsigned 1425static inline unsigned
1003rb_event_index(struct ring_buffer_event *event) 1426rb_event_index(struct ring_buffer_event *event)
1004{ 1427{
@@ -1024,6 +1447,8 @@ rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
1024static void 1447static void
1025rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) 1448rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1026{ 1449{
1450 unsigned long max_count;
1451
1027 /* 1452 /*
1028 * We only race with interrupts and NMIs on this CPU. 1453 * We only race with interrupts and NMIs on this CPU.
1029 * If we own the commit event, then we can commit 1454 * If we own the commit event, then we can commit
@@ -1033,9 +1458,16 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1033 * assign the commit to the tail. 1458 * assign the commit to the tail.
1034 */ 1459 */
1035 again: 1460 again:
1461 max_count = cpu_buffer->buffer->pages * 100;
1462
1036 while (cpu_buffer->commit_page != cpu_buffer->tail_page) { 1463 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
1037 cpu_buffer->commit_page->page->commit = 1464 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
1038 cpu_buffer->commit_page->write; 1465 return;
1466 if (RB_WARN_ON(cpu_buffer,
1467 rb_is_reader_page(cpu_buffer->tail_page)))
1468 return;
1469 local_set(&cpu_buffer->commit_page->page->commit,
1470 rb_page_write(cpu_buffer->commit_page));
1039 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 1471 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
1040 cpu_buffer->write_stamp = 1472 cpu_buffer->write_stamp =
1041 cpu_buffer->commit_page->page->time_stamp; 1473 cpu_buffer->commit_page->page->time_stamp;
@@ -1044,8 +1476,12 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1044 } 1476 }
1045 while (rb_commit_index(cpu_buffer) != 1477 while (rb_commit_index(cpu_buffer) !=
1046 rb_page_write(cpu_buffer->commit_page)) { 1478 rb_page_write(cpu_buffer->commit_page)) {
1047 cpu_buffer->commit_page->page->commit = 1479
1048 cpu_buffer->commit_page->write; 1480 local_set(&cpu_buffer->commit_page->page->commit,
1481 rb_page_write(cpu_buffer->commit_page));
1482 RB_WARN_ON(cpu_buffer,
1483 local_read(&cpu_buffer->commit_page->page->commit) &
1484 ~RB_WRITE_MASK);
1049 barrier(); 1485 barrier();
1050 } 1486 }
1051 1487
@@ -1078,7 +1514,7 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1078 * to the head page instead of next. 1514 * to the head page instead of next.
1079 */ 1515 */
1080 if (iter->head_page == cpu_buffer->reader_page) 1516 if (iter->head_page == cpu_buffer->reader_page)
1081 iter->head_page = cpu_buffer->head_page; 1517 iter->head_page = rb_set_head_page(cpu_buffer);
1082 else 1518 else
1083 rb_inc_page(cpu_buffer, &iter->head_page); 1519 rb_inc_page(cpu_buffer, &iter->head_page);
1084 1520
@@ -1122,6 +1558,163 @@ rb_update_event(struct ring_buffer_event *event,
1122 } 1558 }
1123} 1559}
1124 1560
1561/*
1562 * rb_handle_head_page - writer hit the head page
1563 *
1564 * Returns: +1 to retry page
1565 * 0 to continue
1566 * -1 on error
1567 */
1568static int
1569rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
1570 struct buffer_page *tail_page,
1571 struct buffer_page *next_page)
1572{
1573 struct buffer_page *new_head;
1574 int entries;
1575 int type;
1576 int ret;
1577
1578 entries = rb_page_entries(next_page);
1579
1580 /*
1581 * The hard part is here. We need to move the head
1582 * forward, and protect against both readers on
1583 * other CPUs and writers coming in via interrupts.
1584 */
1585 type = rb_head_page_set_update(cpu_buffer, next_page, tail_page,
1586 RB_PAGE_HEAD);
1587
1588 /*
1589 * type can be one of four:
1590 * NORMAL - an interrupt already moved it for us
1591 * HEAD - we are the first to get here.
1592 * UPDATE - we are the interrupt interrupting
1593 * a current move.
1594 * MOVED - a reader on another CPU moved the next
1595 * pointer to its reader page. Give up
1596 * and try again.
1597 */
1598
1599 switch (type) {
1600 case RB_PAGE_HEAD:
1601 /*
1602 * We changed the head to UPDATE, thus
1603 * it is our responsibility to update
1604 * the counters.
1605 */
1606 local_add(entries, &cpu_buffer->overrun);
1607
1608 /*
1609 * The entries will be zeroed out when we move the
1610 * tail page.
1611 */
1612
1613 /* still more to do */
1614 break;
1615
1616 case RB_PAGE_UPDATE:
1617 /*
1618 * This is an interrupt that interrupt the
1619 * previous update. Still more to do.
1620 */
1621 break;
1622 case RB_PAGE_NORMAL:
1623 /*
1624 * An interrupt came in before the update
1625 * and processed this for us.
1626 * Nothing left to do.
1627 */
1628 return 1;
1629 case RB_PAGE_MOVED:
1630 /*
1631 * The reader is on another CPU and just did
1632 * a swap with our next_page.
1633 * Try again.
1634 */
1635 return 1;
1636 default:
1637 RB_WARN_ON(cpu_buffer, 1); /* WTF??? */
1638 return -1;
1639 }
1640
1641 /*
1642 * Now that we are here, the old head pointer is
1643 * set to UPDATE. This will keep the reader from
1644 * swapping the head page with the reader page.
1645 * The reader (on another CPU) will spin till
1646 * we are finished.
1647 *
1648 * We just need to protect against interrupts
1649 * doing the job. We will set the next pointer
1650 * to HEAD. After that, we set the old pointer
1651 * to NORMAL, but only if it was HEAD before.
1652 * otherwise we are an interrupt, and only
1653 * want the outer most commit to reset it.
1654 */
1655 new_head = next_page;
1656 rb_inc_page(cpu_buffer, &new_head);
1657
1658 ret = rb_head_page_set_head(cpu_buffer, new_head, next_page,
1659 RB_PAGE_NORMAL);
1660
1661 /*
1662 * Valid returns are:
1663 * HEAD - an interrupt came in and already set it.
1664 * NORMAL - One of two things:
1665 * 1) We really set it.
1666 * 2) A bunch of interrupts came in and moved
1667 * the page forward again.
1668 */
1669 switch (ret) {
1670 case RB_PAGE_HEAD:
1671 case RB_PAGE_NORMAL:
1672 /* OK */
1673 break;
1674 default:
1675 RB_WARN_ON(cpu_buffer, 1);
1676 return -1;
1677 }
1678
1679 /*
1680 * It is possible that an interrupt came in,
1681 * set the head up, then more interrupts came in
1682 * and moved it again. When we get back here,
1683 * the page would have been set to NORMAL but we
1684 * just set it back to HEAD.
1685 *
1686 * How do you detect this? Well, if that happened
1687 * the tail page would have moved.
1688 */
1689 if (ret == RB_PAGE_NORMAL) {
1690 /*
1691 * If the tail had moved passed next, then we need
1692 * to reset the pointer.
1693 */
1694 if (cpu_buffer->tail_page != tail_page &&
1695 cpu_buffer->tail_page != next_page)
1696 rb_head_page_set_normal(cpu_buffer, new_head,
1697 next_page,
1698 RB_PAGE_HEAD);
1699 }
1700
1701 /*
1702 * If this was the outer most commit (the one that
1703 * changed the original pointer from HEAD to UPDATE),
1704 * then it is up to us to reset it to NORMAL.
1705 */
1706 if (type == RB_PAGE_HEAD) {
1707 ret = rb_head_page_set_normal(cpu_buffer, next_page,
1708 tail_page,
1709 RB_PAGE_UPDATE);
1710 if (RB_WARN_ON(cpu_buffer,
1711 ret != RB_PAGE_UPDATE))
1712 return -1;
1713 }
1714
1715 return 0;
1716}
1717
1125static unsigned rb_calculate_event_length(unsigned length) 1718static unsigned rb_calculate_event_length(unsigned length)
1126{ 1719{
1127 struct ring_buffer_event event; /* Used only for sizeof array */ 1720 struct ring_buffer_event event; /* Used only for sizeof array */
@@ -1185,9 +1778,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1185 event->type_len = RINGBUF_TYPE_PADDING; 1778 event->type_len = RINGBUF_TYPE_PADDING;
1186 /* time delta must be non zero */ 1779 /* time delta must be non zero */
1187 event->time_delta = 1; 1780 event->time_delta = 1;
1188 /* Account for this as an entry */
1189 local_inc(&tail_page->entries);
1190 local_inc(&cpu_buffer->entries);
1191 1781
1192 /* Set write to end of buffer */ 1782 /* Set write to end of buffer */
1193 length = (tail + length) - BUF_PAGE_SIZE; 1783 length = (tail + length) - BUF_PAGE_SIZE;
@@ -1200,96 +1790,93 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1200 struct buffer_page *commit_page, 1790 struct buffer_page *commit_page,
1201 struct buffer_page *tail_page, u64 *ts) 1791 struct buffer_page *tail_page, u64 *ts)
1202{ 1792{
1203 struct buffer_page *next_page, *head_page, *reader_page;
1204 struct ring_buffer *buffer = cpu_buffer->buffer; 1793 struct ring_buffer *buffer = cpu_buffer->buffer;
1205 bool lock_taken = false; 1794 struct buffer_page *next_page;
1206 unsigned long flags; 1795 int ret;
1207 1796
1208 next_page = tail_page; 1797 next_page = tail_page;
1209 1798
1210 local_irq_save(flags);
1211 /*
1212 * Since the write to the buffer is still not
1213 * fully lockless, we must be careful with NMIs.
1214 * The locks in the writers are taken when a write
1215 * crosses to a new page. The locks protect against
1216 * races with the readers (this will soon be fixed
1217 * with a lockless solution).
1218 *
1219 * Because we can not protect against NMIs, and we
1220 * want to keep traces reentrant, we need to manage
1221 * what happens when we are in an NMI.
1222 *
1223 * NMIs can happen after we take the lock.
1224 * If we are in an NMI, only take the lock
1225 * if it is not already taken. Otherwise
1226 * simply fail.
1227 */
1228 if (unlikely(in_nmi())) {
1229 if (!__raw_spin_trylock(&cpu_buffer->lock)) {
1230 cpu_buffer->nmi_dropped++;
1231 goto out_reset;
1232 }
1233 } else
1234 __raw_spin_lock(&cpu_buffer->lock);
1235
1236 lock_taken = true;
1237
1238 rb_inc_page(cpu_buffer, &next_page); 1799 rb_inc_page(cpu_buffer, &next_page);
1239 1800
1240 head_page = cpu_buffer->head_page;
1241 reader_page = cpu_buffer->reader_page;
1242
1243 /* we grabbed the lock before incrementing */
1244 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
1245 goto out_reset;
1246
1247 /* 1801 /*
1248 * If for some reason, we had an interrupt storm that made 1802 * If for some reason, we had an interrupt storm that made
1249 * it all the way around the buffer, bail, and warn 1803 * it all the way around the buffer, bail, and warn
1250 * about it. 1804 * about it.
1251 */ 1805 */
1252 if (unlikely(next_page == commit_page)) { 1806 if (unlikely(next_page == commit_page)) {
1253 cpu_buffer->commit_overrun++; 1807 local_inc(&cpu_buffer->commit_overrun);
1254 goto out_reset; 1808 goto out_reset;
1255 } 1809 }
1256 1810
1257 if (next_page == head_page) { 1811 /*
1258 if (!(buffer->flags & RB_FL_OVERWRITE)) 1812 * This is where the fun begins!
1259 goto out_reset; 1813 *
1260 1814 * We are fighting against races between a reader that
1261 /* tail_page has not moved yet? */ 1815 * could be on another CPU trying to swap its reader
1262 if (tail_page == cpu_buffer->tail_page) { 1816 * page with the buffer head.
1263 /* count overflows */ 1817 *
1264 cpu_buffer->overrun += 1818 * We are also fighting against interrupts coming in and
1265 local_read(&head_page->entries); 1819 * moving the head or tail on us as well.
1820 *
1821 * If the next page is the head page then we have filled
1822 * the buffer, unless the commit page is still on the
1823 * reader page.
1824 */
1825 if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) {
1266 1826
1267 rb_inc_page(cpu_buffer, &head_page); 1827 /*
1268 cpu_buffer->head_page = head_page; 1828 * If the commit is not on the reader page, then
1269 cpu_buffer->head_page->read = 0; 1829 * move the header page.
1830 */
1831 if (!rb_is_reader_page(cpu_buffer->commit_page)) {
1832 /*
1833 * If we are not in overwrite mode,
1834 * this is easy, just stop here.
1835 */
1836 if (!(buffer->flags & RB_FL_OVERWRITE))
1837 goto out_reset;
1838
1839 ret = rb_handle_head_page(cpu_buffer,
1840 tail_page,
1841 next_page);
1842 if (ret < 0)
1843 goto out_reset;
1844 if (ret)
1845 goto out_again;
1846 } else {
1847 /*
1848 * We need to be careful here too. The
1849 * commit page could still be on the reader
1850 * page. We could have a small buffer, and
1851 * have filled up the buffer with events
1852 * from interrupts and such, and wrapped.
1853 *
1854 * Note, if the tail page is also the on the
1855 * reader_page, we let it move out.
1856 */
1857 if (unlikely((cpu_buffer->commit_page !=
1858 cpu_buffer->tail_page) &&
1859 (cpu_buffer->commit_page ==
1860 cpu_buffer->reader_page))) {
1861 local_inc(&cpu_buffer->commit_overrun);
1862 goto out_reset;
1863 }
1270 } 1864 }
1271 } 1865 }
1272 1866
1273 /* 1867 ret = rb_tail_page_update(cpu_buffer, tail_page, next_page);
1274 * If the tail page is still the same as what we think 1868 if (ret) {
1275 * it is, then it is up to us to update the tail 1869 /*
1276 * pointer. 1870 * Nested commits always have zero deltas, so
1277 */ 1871 * just reread the time stamp
1278 if (tail_page == cpu_buffer->tail_page) { 1872 */
1279 local_set(&next_page->write, 0);
1280 local_set(&next_page->entries, 0);
1281 local_set(&next_page->page->commit, 0);
1282 cpu_buffer->tail_page = next_page;
1283
1284 /* reread the time stamp */
1285 *ts = rb_time_stamp(buffer, cpu_buffer->cpu); 1873 *ts = rb_time_stamp(buffer, cpu_buffer->cpu);
1286 cpu_buffer->tail_page->page->time_stamp = *ts; 1874 next_page->page->time_stamp = *ts;
1287 } 1875 }
1288 1876
1289 rb_reset_tail(cpu_buffer, tail_page, tail, length); 1877 out_again:
1290 1878
1291 __raw_spin_unlock(&cpu_buffer->lock); 1879 rb_reset_tail(cpu_buffer, tail_page, tail, length);
1292 local_irq_restore(flags);
1293 1880
1294 /* fail and let the caller try again */ 1881 /* fail and let the caller try again */
1295 return ERR_PTR(-EAGAIN); 1882 return ERR_PTR(-EAGAIN);
@@ -1298,9 +1885,6 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1298 /* reset write */ 1885 /* reset write */
1299 rb_reset_tail(cpu_buffer, tail_page, tail, length); 1886 rb_reset_tail(cpu_buffer, tail_page, tail, length);
1300 1887
1301 if (likely(lock_taken))
1302 __raw_spin_unlock(&cpu_buffer->lock);
1303 local_irq_restore(flags);
1304 return NULL; 1888 return NULL;
1305} 1889}
1306 1890
@@ -1317,6 +1901,9 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1317 barrier(); 1901 barrier();
1318 tail_page = cpu_buffer->tail_page; 1902 tail_page = cpu_buffer->tail_page;
1319 write = local_add_return(length, &tail_page->write); 1903 write = local_add_return(length, &tail_page->write);
1904
1905 /* set write to only the index of the write */
1906 write &= RB_WRITE_MASK;
1320 tail = write - length; 1907 tail = write - length;
1321 1908
1322 /* See if we shot pass the end of this buffer page */ 1909 /* See if we shot pass the end of this buffer page */
@@ -1361,12 +1948,16 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
1361 bpage = cpu_buffer->tail_page; 1948 bpage = cpu_buffer->tail_page;
1362 1949
1363 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { 1950 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
1951 unsigned long write_mask =
1952 local_read(&bpage->write) & ~RB_WRITE_MASK;
1364 /* 1953 /*
1365 * This is on the tail page. It is possible that 1954 * This is on the tail page. It is possible that
1366 * a write could come in and move the tail page 1955 * a write could come in and move the tail page
1367 * and write to the next page. That is fine 1956 * and write to the next page. That is fine
1368 * because we just shorten what is on this page. 1957 * because we just shorten what is on this page.
1369 */ 1958 */
1959 old_index += write_mask;
1960 new_index += write_mask;
1370 index = local_cmpxchg(&bpage->write, old_index, new_index); 1961 index = local_cmpxchg(&bpage->write, old_index, new_index);
1371 if (index == old_index) 1962 if (index == old_index)
1372 return 1; 1963 return 1;
@@ -1482,7 +2073,8 @@ static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
1482} 2073}
1483 2074
1484static struct ring_buffer_event * 2075static struct ring_buffer_event *
1485rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, 2076rb_reserve_next_event(struct ring_buffer *buffer,
2077 struct ring_buffer_per_cpu *cpu_buffer,
1486 unsigned long length) 2078 unsigned long length)
1487{ 2079{
1488 struct ring_buffer_event *event; 2080 struct ring_buffer_event *event;
@@ -1492,6 +2084,21 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1492 2084
1493 rb_start_commit(cpu_buffer); 2085 rb_start_commit(cpu_buffer);
1494 2086
2087#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2088 /*
2089 * Due to the ability to swap a cpu buffer from a buffer
2090 * it is possible it was swapped before we committed.
2091 * (committing stops a swap). We check for it here and
2092 * if it happened, we have to fail the write.
2093 */
2094 barrier();
2095 if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) {
2096 local_dec(&cpu_buffer->committing);
2097 local_dec(&cpu_buffer->commits);
2098 return NULL;
2099 }
2100#endif
2101
1495 length = rb_calculate_event_length(length); 2102 length = rb_calculate_event_length(length);
1496 again: 2103 again:
1497 /* 2104 /*
@@ -1652,7 +2259,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
1652 if (length > BUF_MAX_DATA_SIZE) 2259 if (length > BUF_MAX_DATA_SIZE)
1653 goto out; 2260 goto out;
1654 2261
1655 event = rb_reserve_next_event(cpu_buffer, length); 2262 event = rb_reserve_next_event(buffer, cpu_buffer, length);
1656 if (!event) 2263 if (!event)
1657 goto out; 2264 goto out;
1658 2265
@@ -1675,18 +2282,23 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
1675} 2282}
1676EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); 2283EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
1677 2284
1678static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, 2285static void
2286rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1679 struct ring_buffer_event *event) 2287 struct ring_buffer_event *event)
1680{ 2288{
1681 local_inc(&cpu_buffer->entries);
1682
1683 /* 2289 /*
1684 * The event first in the commit queue updates the 2290 * The event first in the commit queue updates the
1685 * time stamp. 2291 * time stamp.
1686 */ 2292 */
1687 if (rb_event_is_commit(cpu_buffer, event)) 2293 if (rb_event_is_commit(cpu_buffer, event))
1688 cpu_buffer->write_stamp += event->time_delta; 2294 cpu_buffer->write_stamp += event->time_delta;
2295}
1689 2296
2297static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
2298 struct ring_buffer_event *event)
2299{
2300 local_inc(&cpu_buffer->entries);
2301 rb_update_write_stamp(cpu_buffer, event);
1690 rb_end_commit(cpu_buffer); 2302 rb_end_commit(cpu_buffer);
1691} 2303}
1692 2304
@@ -1733,32 +2345,57 @@ static inline void rb_event_discard(struct ring_buffer_event *event)
1733 event->time_delta = 1; 2345 event->time_delta = 1;
1734} 2346}
1735 2347
1736/** 2348/*
1737 * ring_buffer_event_discard - discard any event in the ring buffer 2349 * Decrement the entries to the page that an event is on.
1738 * @event: the event to discard 2350 * The event does not even need to exist, only the pointer
1739 * 2351 * to the page it is on. This may only be called before the commit
1740 * Sometimes a event that is in the ring buffer needs to be ignored. 2352 * takes place.
1741 * This function lets the user discard an event in the ring buffer
1742 * and then that event will not be read later.
1743 *
1744 * Note, it is up to the user to be careful with this, and protect
1745 * against races. If the user discards an event that has been consumed
1746 * it is possible that it could corrupt the ring buffer.
1747 */ 2353 */
1748void ring_buffer_event_discard(struct ring_buffer_event *event) 2354static inline void
2355rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer,
2356 struct ring_buffer_event *event)
1749{ 2357{
1750 rb_event_discard(event); 2358 unsigned long addr = (unsigned long)event;
2359 struct buffer_page *bpage = cpu_buffer->commit_page;
2360 struct buffer_page *start;
2361
2362 addr &= PAGE_MASK;
2363
2364 /* Do the likely case first */
2365 if (likely(bpage->page == (void *)addr)) {
2366 local_dec(&bpage->entries);
2367 return;
2368 }
2369
2370 /*
2371 * Because the commit page may be on the reader page we
2372 * start with the next page and check the end loop there.
2373 */
2374 rb_inc_page(cpu_buffer, &bpage);
2375 start = bpage;
2376 do {
2377 if (bpage->page == (void *)addr) {
2378 local_dec(&bpage->entries);
2379 return;
2380 }
2381 rb_inc_page(cpu_buffer, &bpage);
2382 } while (bpage != start);
2383
2384 /* commit not part of this buffer?? */
2385 RB_WARN_ON(cpu_buffer, 1);
1751} 2386}
1752EXPORT_SYMBOL_GPL(ring_buffer_event_discard);
1753 2387
1754/** 2388/**
1755 * ring_buffer_commit_discard - discard an event that has not been committed 2389 * ring_buffer_commit_discard - discard an event that has not been committed
1756 * @buffer: the ring buffer 2390 * @buffer: the ring buffer
1757 * @event: non committed event to discard 2391 * @event: non committed event to discard
1758 * 2392 *
1759 * This is similar to ring_buffer_event_discard but must only be 2393 * Sometimes an event that is in the ring buffer needs to be ignored.
1760 * performed on an event that has not been committed yet. The difference 2394 * This function lets the user discard an event in the ring buffer
1761 * is that this will also try to free the event from the ring buffer 2395 * and then that event will not be read later.
2396 *
2397 * This function only works if it is called before the the item has been
2398 * committed. It will try to free the event from the ring buffer
1762 * if another event has not been added behind it. 2399 * if another event has not been added behind it.
1763 * 2400 *
1764 * If another event has been added behind it, it will set the event 2401 * If another event has been added behind it, it will set the event
@@ -1786,14 +2423,15 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
1786 */ 2423 */
1787 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing)); 2424 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
1788 2425
2426 rb_decrement_entry(cpu_buffer, event);
1789 if (rb_try_to_discard(cpu_buffer, event)) 2427 if (rb_try_to_discard(cpu_buffer, event))
1790 goto out; 2428 goto out;
1791 2429
1792 /* 2430 /*
1793 * The commit is still visible by the reader, so we 2431 * The commit is still visible by the reader, so we
1794 * must increment entries. 2432 * must still update the timestamp.
1795 */ 2433 */
1796 local_inc(&cpu_buffer->entries); 2434 rb_update_write_stamp(cpu_buffer, event);
1797 out: 2435 out:
1798 rb_end_commit(cpu_buffer); 2436 rb_end_commit(cpu_buffer);
1799 2437
@@ -1854,7 +2492,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1854 if (length > BUF_MAX_DATA_SIZE) 2492 if (length > BUF_MAX_DATA_SIZE)
1855 goto out; 2493 goto out;
1856 2494
1857 event = rb_reserve_next_event(cpu_buffer, length); 2495 event = rb_reserve_next_event(buffer, cpu_buffer, length);
1858 if (!event) 2496 if (!event)
1859 goto out; 2497 goto out;
1860 2498
@@ -1875,9 +2513,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_write);
1875static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) 2513static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
1876{ 2514{
1877 struct buffer_page *reader = cpu_buffer->reader_page; 2515 struct buffer_page *reader = cpu_buffer->reader_page;
1878 struct buffer_page *head = cpu_buffer->head_page; 2516 struct buffer_page *head = rb_set_head_page(cpu_buffer);
1879 struct buffer_page *commit = cpu_buffer->commit_page; 2517 struct buffer_page *commit = cpu_buffer->commit_page;
1880 2518
2519 /* In case of error, head will be NULL */
2520 if (unlikely(!head))
2521 return 1;
2522
1881 return reader->read == rb_page_commit(reader) && 2523 return reader->read == rb_page_commit(reader) &&
1882 (commit == reader || 2524 (commit == reader ||
1883 (commit == head && 2525 (commit == head &&
@@ -1968,7 +2610,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
1968 return 0; 2610 return 0;
1969 2611
1970 cpu_buffer = buffer->buffers[cpu]; 2612 cpu_buffer = buffer->buffers[cpu];
1971 ret = (local_read(&cpu_buffer->entries) - cpu_buffer->overrun) 2613 ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun))
1972 - cpu_buffer->read; 2614 - cpu_buffer->read;
1973 2615
1974 return ret; 2616 return ret;
@@ -1989,33 +2631,13 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
1989 return 0; 2631 return 0;
1990 2632
1991 cpu_buffer = buffer->buffers[cpu]; 2633 cpu_buffer = buffer->buffers[cpu];
1992 ret = cpu_buffer->overrun; 2634 ret = local_read(&cpu_buffer->overrun);
1993 2635
1994 return ret; 2636 return ret;
1995} 2637}
1996EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); 2638EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
1997 2639
1998/** 2640/**
1999 * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped
2000 * @buffer: The ring buffer
2001 * @cpu: The per CPU buffer to get the number of overruns from
2002 */
2003unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu)
2004{
2005 struct ring_buffer_per_cpu *cpu_buffer;
2006 unsigned long ret;
2007
2008 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2009 return 0;
2010
2011 cpu_buffer = buffer->buffers[cpu];
2012 ret = cpu_buffer->nmi_dropped;
2013
2014 return ret;
2015}
2016EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu);
2017
2018/**
2019 * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits 2641 * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
2020 * @buffer: The ring buffer 2642 * @buffer: The ring buffer
2021 * @cpu: The per CPU buffer to get the number of overruns from 2643 * @cpu: The per CPU buffer to get the number of overruns from
@@ -2030,7 +2652,7 @@ ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
2030 return 0; 2652 return 0;
2031 2653
2032 cpu_buffer = buffer->buffers[cpu]; 2654 cpu_buffer = buffer->buffers[cpu];
2033 ret = cpu_buffer->commit_overrun; 2655 ret = local_read(&cpu_buffer->commit_overrun);
2034 2656
2035 return ret; 2657 return ret;
2036} 2658}
@@ -2053,7 +2675,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
2053 for_each_buffer_cpu(buffer, cpu) { 2675 for_each_buffer_cpu(buffer, cpu) {
2054 cpu_buffer = buffer->buffers[cpu]; 2676 cpu_buffer = buffer->buffers[cpu];
2055 entries += (local_read(&cpu_buffer->entries) - 2677 entries += (local_read(&cpu_buffer->entries) -
2056 cpu_buffer->overrun) - cpu_buffer->read; 2678 local_read(&cpu_buffer->overrun)) - cpu_buffer->read;
2057 } 2679 }
2058 2680
2059 return entries; 2681 return entries;
@@ -2076,7 +2698,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
2076 /* if you care about this being correct, lock the buffer */ 2698 /* if you care about this being correct, lock the buffer */
2077 for_each_buffer_cpu(buffer, cpu) { 2699 for_each_buffer_cpu(buffer, cpu) {
2078 cpu_buffer = buffer->buffers[cpu]; 2700 cpu_buffer = buffer->buffers[cpu];
2079 overruns += cpu_buffer->overrun; 2701 overruns += local_read(&cpu_buffer->overrun);
2080 } 2702 }
2081 2703
2082 return overruns; 2704 return overruns;
@@ -2089,8 +2711,10 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
2089 2711
2090 /* Iterator usage is expected to have record disabled */ 2712 /* Iterator usage is expected to have record disabled */
2091 if (list_empty(&cpu_buffer->reader_page->list)) { 2713 if (list_empty(&cpu_buffer->reader_page->list)) {
2092 iter->head_page = cpu_buffer->head_page; 2714 iter->head_page = rb_set_head_page(cpu_buffer);
2093 iter->head = cpu_buffer->head_page->read; 2715 if (unlikely(!iter->head_page))
2716 return;
2717 iter->head = iter->head_page->read;
2094 } else { 2718 } else {
2095 iter->head_page = cpu_buffer->reader_page; 2719 iter->head_page = cpu_buffer->reader_page;
2096 iter->head = cpu_buffer->reader_page->read; 2720 iter->head = cpu_buffer->reader_page->read;
@@ -2207,6 +2831,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2207 struct buffer_page *reader = NULL; 2831 struct buffer_page *reader = NULL;
2208 unsigned long flags; 2832 unsigned long flags;
2209 int nr_loops = 0; 2833 int nr_loops = 0;
2834 int ret;
2210 2835
2211 local_irq_save(flags); 2836 local_irq_save(flags);
2212 __raw_spin_lock(&cpu_buffer->lock); 2837 __raw_spin_lock(&cpu_buffer->lock);
@@ -2240,30 +2865,56 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2240 goto out; 2865 goto out;
2241 2866
2242 /* 2867 /*
2243 * Splice the empty reader page into the list around the head.
2244 * Reset the reader page to size zero. 2868 * Reset the reader page to size zero.
2245 */ 2869 */
2870 local_set(&cpu_buffer->reader_page->write, 0);
2871 local_set(&cpu_buffer->reader_page->entries, 0);
2872 local_set(&cpu_buffer->reader_page->page->commit, 0);
2246 2873
2247 reader = cpu_buffer->head_page; 2874 spin:
2875 /*
2876 * Splice the empty reader page into the list around the head.
2877 */
2878 reader = rb_set_head_page(cpu_buffer);
2248 cpu_buffer->reader_page->list.next = reader->list.next; 2879 cpu_buffer->reader_page->list.next = reader->list.next;
2249 cpu_buffer->reader_page->list.prev = reader->list.prev; 2880 cpu_buffer->reader_page->list.prev = reader->list.prev;
2250 2881
2251 local_set(&cpu_buffer->reader_page->write, 0); 2882 /*
2252 local_set(&cpu_buffer->reader_page->entries, 0); 2883 * cpu_buffer->pages just needs to point to the buffer, it
2253 local_set(&cpu_buffer->reader_page->page->commit, 0); 2884 * has no specific buffer page to point to. Lets move it out
2885 * of our way so we don't accidently swap it.
2886 */
2887 cpu_buffer->pages = reader->list.prev;
2254 2888
2255 /* Make the reader page now replace the head */ 2889 /* The reader page will be pointing to the new head */
2256 reader->list.prev->next = &cpu_buffer->reader_page->list; 2890 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
2257 reader->list.next->prev = &cpu_buffer->reader_page->list;
2258 2891
2259 /* 2892 /*
2260 * If the tail is on the reader, then we must set the head 2893 * Here's the tricky part.
2261 * to the inserted page, otherwise we set it one before. 2894 *
2895 * We need to move the pointer past the header page.
2896 * But we can only do that if a writer is not currently
2897 * moving it. The page before the header page has the
2898 * flag bit '1' set if it is pointing to the page we want.
2899 * but if the writer is in the process of moving it
2900 * than it will be '2' or already moved '0'.
2262 */ 2901 */
2263 cpu_buffer->head_page = cpu_buffer->reader_page;
2264 2902
2265 if (cpu_buffer->commit_page != reader) 2903 ret = rb_head_page_replace(reader, cpu_buffer->reader_page);
2266 rb_inc_page(cpu_buffer, &cpu_buffer->head_page); 2904
2905 /*
2906 * If we did not convert it, then we must try again.
2907 */
2908 if (!ret)
2909 goto spin;
2910
2911 /*
2912 * Yeah! We succeeded in replacing the page.
2913 *
2914 * Now make the new head point back to the reader page.
2915 */
2916 reader->list.next->prev = &cpu_buffer->reader_page->list;
2917 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
2267 2918
2268 /* Finally update the reader page to the new head */ 2919 /* Finally update the reader page to the new head */
2269 cpu_buffer->reader_page = reader; 2920 cpu_buffer->reader_page = reader;
@@ -2292,8 +2943,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
2292 2943
2293 event = rb_reader_event(cpu_buffer); 2944 event = rb_reader_event(cpu_buffer);
2294 2945
2295 if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX 2946 if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
2296 || rb_discarded_event(event))
2297 cpu_buffer->read++; 2947 cpu_buffer->read++;
2298 2948
2299 rb_update_read_stamp(cpu_buffer, event); 2949 rb_update_read_stamp(cpu_buffer, event);
@@ -2525,10 +3175,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2525 spin_unlock(&cpu_buffer->reader_lock); 3175 spin_unlock(&cpu_buffer->reader_lock);
2526 local_irq_restore(flags); 3176 local_irq_restore(flags);
2527 3177
2528 if (event && event->type_len == RINGBUF_TYPE_PADDING) { 3178 if (event && event->type_len == RINGBUF_TYPE_PADDING)
2529 cpu_relax();
2530 goto again; 3179 goto again;
2531 }
2532 3180
2533 return event; 3181 return event;
2534} 3182}
@@ -2553,10 +3201,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2553 event = rb_iter_peek(iter, ts); 3201 event = rb_iter_peek(iter, ts);
2554 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3202 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2555 3203
2556 if (event && event->type_len == RINGBUF_TYPE_PADDING) { 3204 if (event && event->type_len == RINGBUF_TYPE_PADDING)
2557 cpu_relax();
2558 goto again; 3205 goto again;
2559 }
2560 3206
2561 return event; 3207 return event;
2562} 3208}
@@ -2602,10 +3248,8 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2602 out: 3248 out:
2603 preempt_enable(); 3249 preempt_enable();
2604 3250
2605 if (event && event->type_len == RINGBUF_TYPE_PADDING) { 3251 if (event && event->type_len == RINGBUF_TYPE_PADDING)
2606 cpu_relax();
2607 goto again; 3252 goto again;
2608 }
2609 3253
2610 return event; 3254 return event;
2611} 3255}
@@ -2685,21 +3329,19 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
2685 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 3329 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2686 unsigned long flags; 3330 unsigned long flags;
2687 3331
2688 again:
2689 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3332 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3333 again:
2690 event = rb_iter_peek(iter, ts); 3334 event = rb_iter_peek(iter, ts);
2691 if (!event) 3335 if (!event)
2692 goto out; 3336 goto out;
2693 3337
3338 if (event->type_len == RINGBUF_TYPE_PADDING)
3339 goto again;
3340
2694 rb_advance_iter(iter); 3341 rb_advance_iter(iter);
2695 out: 3342 out:
2696 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3343 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2697 3344
2698 if (event && event->type_len == RINGBUF_TYPE_PADDING) {
2699 cpu_relax();
2700 goto again;
2701 }
2702
2703 return event; 3345 return event;
2704} 3346}
2705EXPORT_SYMBOL_GPL(ring_buffer_read); 3347EXPORT_SYMBOL_GPL(ring_buffer_read);
@@ -2717,8 +3359,10 @@ EXPORT_SYMBOL_GPL(ring_buffer_size);
2717static void 3359static void
2718rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) 3360rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2719{ 3361{
3362 rb_head_page_deactivate(cpu_buffer);
3363
2720 cpu_buffer->head_page 3364 cpu_buffer->head_page
2721 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 3365 = list_entry(cpu_buffer->pages, struct buffer_page, list);
2722 local_set(&cpu_buffer->head_page->write, 0); 3366 local_set(&cpu_buffer->head_page->write, 0);
2723 local_set(&cpu_buffer->head_page->entries, 0); 3367 local_set(&cpu_buffer->head_page->entries, 0);
2724 local_set(&cpu_buffer->head_page->page->commit, 0); 3368 local_set(&cpu_buffer->head_page->page->commit, 0);
@@ -2734,16 +3378,17 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2734 local_set(&cpu_buffer->reader_page->page->commit, 0); 3378 local_set(&cpu_buffer->reader_page->page->commit, 0);
2735 cpu_buffer->reader_page->read = 0; 3379 cpu_buffer->reader_page->read = 0;
2736 3380
2737 cpu_buffer->nmi_dropped = 0; 3381 local_set(&cpu_buffer->commit_overrun, 0);
2738 cpu_buffer->commit_overrun = 0; 3382 local_set(&cpu_buffer->overrun, 0);
2739 cpu_buffer->overrun = 0;
2740 cpu_buffer->read = 0;
2741 local_set(&cpu_buffer->entries, 0); 3383 local_set(&cpu_buffer->entries, 0);
2742 local_set(&cpu_buffer->committing, 0); 3384 local_set(&cpu_buffer->committing, 0);
2743 local_set(&cpu_buffer->commits, 0); 3385 local_set(&cpu_buffer->commits, 0);
3386 cpu_buffer->read = 0;
2744 3387
2745 cpu_buffer->write_stamp = 0; 3388 cpu_buffer->write_stamp = 0;
2746 cpu_buffer->read_stamp = 0; 3389 cpu_buffer->read_stamp = 0;
3390
3391 rb_head_page_activate(cpu_buffer);
2747} 3392}
2748 3393
2749/** 3394/**
@@ -2763,12 +3408,16 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2763 3408
2764 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3409 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2765 3410
3411 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
3412 goto out;
3413
2766 __raw_spin_lock(&cpu_buffer->lock); 3414 __raw_spin_lock(&cpu_buffer->lock);
2767 3415
2768 rb_reset_cpu(cpu_buffer); 3416 rb_reset_cpu(cpu_buffer);
2769 3417
2770 __raw_spin_unlock(&cpu_buffer->lock); 3418 __raw_spin_unlock(&cpu_buffer->lock);
2771 3419
3420 out:
2772 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3421 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2773 3422
2774 atomic_dec(&cpu_buffer->record_disabled); 3423 atomic_dec(&cpu_buffer->record_disabled);
@@ -2851,6 +3500,7 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
2851} 3500}
2852EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); 3501EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
2853 3502
3503#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2854/** 3504/**
2855 * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers 3505 * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
2856 * @buffer_a: One buffer to swap with 3506 * @buffer_a: One buffer to swap with
@@ -2905,20 +3555,28 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2905 atomic_inc(&cpu_buffer_a->record_disabled); 3555 atomic_inc(&cpu_buffer_a->record_disabled);
2906 atomic_inc(&cpu_buffer_b->record_disabled); 3556 atomic_inc(&cpu_buffer_b->record_disabled);
2907 3557
3558 ret = -EBUSY;
3559 if (local_read(&cpu_buffer_a->committing))
3560 goto out_dec;
3561 if (local_read(&cpu_buffer_b->committing))
3562 goto out_dec;
3563
2908 buffer_a->buffers[cpu] = cpu_buffer_b; 3564 buffer_a->buffers[cpu] = cpu_buffer_b;
2909 buffer_b->buffers[cpu] = cpu_buffer_a; 3565 buffer_b->buffers[cpu] = cpu_buffer_a;
2910 3566
2911 cpu_buffer_b->buffer = buffer_a; 3567 cpu_buffer_b->buffer = buffer_a;
2912 cpu_buffer_a->buffer = buffer_b; 3568 cpu_buffer_a->buffer = buffer_b;
2913 3569
3570 ret = 0;
3571
3572out_dec:
2914 atomic_dec(&cpu_buffer_a->record_disabled); 3573 atomic_dec(&cpu_buffer_a->record_disabled);
2915 atomic_dec(&cpu_buffer_b->record_disabled); 3574 atomic_dec(&cpu_buffer_b->record_disabled);
2916
2917 ret = 0;
2918out: 3575out:
2919 return ret; 3576 return ret;
2920} 3577}
2921EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); 3578EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
3579#endif /* CONFIG_RING_BUFFER_ALLOW_SWAP */
2922 3580
2923/** 3581/**
2924 * ring_buffer_alloc_read_page - allocate a page to read from buffer 3582 * ring_buffer_alloc_read_page - allocate a page to read from buffer
@@ -3091,7 +3749,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3091 read = 0; 3749 read = 0;
3092 } else { 3750 } else {
3093 /* update the entry counter */ 3751 /* update the entry counter */
3094 cpu_buffer->read += local_read(&reader->entries); 3752 cpu_buffer->read += rb_page_entries(reader);
3095 3753
3096 /* swap the pages */ 3754 /* swap the pages */
3097 rb_init_page(bpage); 3755 rb_init_page(bpage);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c22b40f8f57..5c75deeefe3 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -43,14 +43,11 @@
43 43
44#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) 44#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
45 45
46unsigned long __read_mostly tracing_max_latency;
47unsigned long __read_mostly tracing_thresh;
48
49/* 46/*
50 * On boot up, the ring buffer is set to the minimum size, so that 47 * On boot up, the ring buffer is set to the minimum size, so that
51 * we do not waste memory on systems that are not using tracing. 48 * we do not waste memory on systems that are not using tracing.
52 */ 49 */
53static int ring_buffer_expanded; 50int ring_buffer_expanded;
54 51
55/* 52/*
56 * We need to change this state when a selftest is running. 53 * We need to change this state when a selftest is running.
@@ -64,7 +61,7 @@ static bool __read_mostly tracing_selftest_running;
64/* 61/*
65 * If a tracer is running, we do not want to run SELFTEST. 62 * If a tracer is running, we do not want to run SELFTEST.
66 */ 63 */
67static bool __read_mostly tracing_selftest_disabled; 64bool __read_mostly tracing_selftest_disabled;
68 65
69/* For tracers that don't implement custom flags */ 66/* For tracers that don't implement custom flags */
70static struct tracer_opt dummy_tracer_opt[] = { 67static struct tracer_opt dummy_tracer_opt[] = {
@@ -89,7 +86,7 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
89 */ 86 */
90static int tracing_disabled = 1; 87static int tracing_disabled = 1;
91 88
92static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); 89DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
93 90
94static inline void ftrace_disable_cpu(void) 91static inline void ftrace_disable_cpu(void)
95{ 92{
@@ -172,10 +169,11 @@ static struct trace_array global_trace;
172 169
173static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); 170static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
174 171
175int filter_current_check_discard(struct ftrace_event_call *call, void *rec, 172int filter_current_check_discard(struct ring_buffer *buffer,
173 struct ftrace_event_call *call, void *rec,
176 struct ring_buffer_event *event) 174 struct ring_buffer_event *event)
177{ 175{
178 return filter_check_discard(call, rec, global_trace.buffer, event); 176 return filter_check_discard(call, rec, buffer, event);
179} 177}
180EXPORT_SYMBOL_GPL(filter_current_check_discard); 178EXPORT_SYMBOL_GPL(filter_current_check_discard);
181 179
@@ -266,6 +264,9 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
266 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | 264 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
267 TRACE_ITER_GRAPH_TIME; 265 TRACE_ITER_GRAPH_TIME;
268 266
267static int trace_stop_count;
268static DEFINE_SPINLOCK(tracing_start_lock);
269
269/** 270/**
270 * trace_wake_up - wake up tasks waiting for trace input 271 * trace_wake_up - wake up tasks waiting for trace input
271 * 272 *
@@ -323,50 +324,20 @@ static const char *trace_options[] = {
323 "printk-msg-only", 324 "printk-msg-only",
324 "context-info", 325 "context-info",
325 "latency-format", 326 "latency-format",
326 "global-clock",
327 "sleep-time", 327 "sleep-time",
328 "graph-time", 328 "graph-time",
329 NULL 329 NULL
330}; 330};
331 331
332/* 332static struct {
333 * ftrace_max_lock is used to protect the swapping of buffers 333 u64 (*func)(void);
334 * when taking a max snapshot. The buffers themselves are 334 const char *name;
335 * protected by per_cpu spinlocks. But the action of the swap 335} trace_clocks[] = {
336 * needs its own lock. 336 { trace_clock_local, "local" },
337 * 337 { trace_clock_global, "global" },
338 * This is defined as a raw_spinlock_t in order to help 338};
339 * with performance when lockdep debugging is enabled.
340 */
341static raw_spinlock_t ftrace_max_lock =
342 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
343
344/*
345 * Copy the new maximum trace into the separate maximum-trace
346 * structure. (this way the maximum trace is permanently saved,
347 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
348 */
349static void
350__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
351{
352 struct trace_array_cpu *data = tr->data[cpu];
353
354 max_tr.cpu = cpu;
355 max_tr.time_start = data->preempt_timestamp;
356
357 data = max_tr.data[cpu];
358 data->saved_latency = tracing_max_latency;
359 339
360 memcpy(data->comm, tsk->comm, TASK_COMM_LEN); 340int trace_clock_id;
361 data->pid = tsk->pid;
362 data->uid = task_uid(tsk);
363 data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
364 data->policy = tsk->policy;
365 data->rt_priority = tsk->rt_priority;
366
367 /* record this tasks comm */
368 tracing_record_cmdline(tsk);
369}
370 341
371ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) 342ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
372{ 343{
@@ -411,6 +382,56 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
411 return cnt; 382 return cnt;
412} 383}
413 384
385/*
386 * ftrace_max_lock is used to protect the swapping of buffers
387 * when taking a max snapshot. The buffers themselves are
388 * protected by per_cpu spinlocks. But the action of the swap
389 * needs its own lock.
390 *
391 * This is defined as a raw_spinlock_t in order to help
392 * with performance when lockdep debugging is enabled.
393 *
394 * It is also used in other places outside the update_max_tr
395 * so it needs to be defined outside of the
396 * CONFIG_TRACER_MAX_TRACE.
397 */
398static raw_spinlock_t ftrace_max_lock =
399 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
400
401#ifdef CONFIG_TRACER_MAX_TRACE
402unsigned long __read_mostly tracing_max_latency;
403unsigned long __read_mostly tracing_thresh;
404
405/*
406 * Copy the new maximum trace into the separate maximum-trace
407 * structure. (this way the maximum trace is permanently saved,
408 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
409 */
410static void
411__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
412{
413 struct trace_array_cpu *data = tr->data[cpu];
414 struct trace_array_cpu *max_data = tr->data[cpu];
415
416 max_tr.cpu = cpu;
417 max_tr.time_start = data->preempt_timestamp;
418
419 max_data = max_tr.data[cpu];
420 max_data->saved_latency = tracing_max_latency;
421 max_data->critical_start = data->critical_start;
422 max_data->critical_end = data->critical_end;
423
424 memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
425 max_data->pid = tsk->pid;
426 max_data->uid = task_uid(tsk);
427 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
428 max_data->policy = tsk->policy;
429 max_data->rt_priority = tsk->rt_priority;
430
431 /* record this tasks comm */
432 tracing_record_cmdline(tsk);
433}
434
414/** 435/**
415 * update_max_tr - snapshot all trace buffers from global_trace to max_tr 436 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
416 * @tr: tracer 437 * @tr: tracer
@@ -425,16 +446,15 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
425{ 446{
426 struct ring_buffer *buf = tr->buffer; 447 struct ring_buffer *buf = tr->buffer;
427 448
449 if (trace_stop_count)
450 return;
451
428 WARN_ON_ONCE(!irqs_disabled()); 452 WARN_ON_ONCE(!irqs_disabled());
429 __raw_spin_lock(&ftrace_max_lock); 453 __raw_spin_lock(&ftrace_max_lock);
430 454
431 tr->buffer = max_tr.buffer; 455 tr->buffer = max_tr.buffer;
432 max_tr.buffer = buf; 456 max_tr.buffer = buf;
433 457
434 ftrace_disable_cpu();
435 ring_buffer_reset(tr->buffer);
436 ftrace_enable_cpu();
437
438 __update_max_tr(tr, tsk, cpu); 458 __update_max_tr(tr, tsk, cpu);
439 __raw_spin_unlock(&ftrace_max_lock); 459 __raw_spin_unlock(&ftrace_max_lock);
440} 460}
@@ -452,21 +472,35 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
452{ 472{
453 int ret; 473 int ret;
454 474
475 if (trace_stop_count)
476 return;
477
455 WARN_ON_ONCE(!irqs_disabled()); 478 WARN_ON_ONCE(!irqs_disabled());
456 __raw_spin_lock(&ftrace_max_lock); 479 __raw_spin_lock(&ftrace_max_lock);
457 480
458 ftrace_disable_cpu(); 481 ftrace_disable_cpu();
459 482
460 ring_buffer_reset(max_tr.buffer);
461 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu); 483 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
462 484
485 if (ret == -EBUSY) {
486 /*
487 * We failed to swap the buffer due to a commit taking
488 * place on this CPU. We fail to record, but we reset
489 * the max trace buffer (no one writes directly to it)
490 * and flag that it failed.
491 */
492 trace_array_printk(&max_tr, _THIS_IP_,
493 "Failed to swap buffers due to commit in progress\n");
494 }
495
463 ftrace_enable_cpu(); 496 ftrace_enable_cpu();
464 497
465 WARN_ON_ONCE(ret && ret != -EAGAIN); 498 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
466 499
467 __update_max_tr(tr, tsk, cpu); 500 __update_max_tr(tr, tsk, cpu);
468 __raw_spin_unlock(&ftrace_max_lock); 501 __raw_spin_unlock(&ftrace_max_lock);
469} 502}
503#endif /* CONFIG_TRACER_MAX_TRACE */
470 504
471/** 505/**
472 * register_tracer - register a tracer with the ftrace system. 506 * register_tracer - register a tracer with the ftrace system.
@@ -523,7 +557,6 @@ __acquires(kernel_lock)
523 if (type->selftest && !tracing_selftest_disabled) { 557 if (type->selftest && !tracing_selftest_disabled) {
524 struct tracer *saved_tracer = current_trace; 558 struct tracer *saved_tracer = current_trace;
525 struct trace_array *tr = &global_trace; 559 struct trace_array *tr = &global_trace;
526 int i;
527 560
528 /* 561 /*
529 * Run a selftest on this tracer. 562 * Run a selftest on this tracer.
@@ -532,8 +565,7 @@ __acquires(kernel_lock)
532 * internal tracing to verify that everything is in order. 565 * internal tracing to verify that everything is in order.
533 * If we fail, we do not register this tracer. 566 * If we fail, we do not register this tracer.
534 */ 567 */
535 for_each_tracing_cpu(i) 568 tracing_reset_online_cpus(tr);
536 tracing_reset(tr, i);
537 569
538 current_trace = type; 570 current_trace = type;
539 /* the test is responsible for initializing and enabling */ 571 /* the test is responsible for initializing and enabling */
@@ -546,8 +578,7 @@ __acquires(kernel_lock)
546 goto out; 578 goto out;
547 } 579 }
548 /* Only reset on passing, to avoid touching corrupted buffers */ 580 /* Only reset on passing, to avoid touching corrupted buffers */
549 for_each_tracing_cpu(i) 581 tracing_reset_online_cpus(tr);
550 tracing_reset(tr, i);
551 582
552 printk(KERN_CONT "PASSED\n"); 583 printk(KERN_CONT "PASSED\n");
553 } 584 }
@@ -622,21 +653,42 @@ void unregister_tracer(struct tracer *type)
622 mutex_unlock(&trace_types_lock); 653 mutex_unlock(&trace_types_lock);
623} 654}
624 655
625void tracing_reset(struct trace_array *tr, int cpu) 656static void __tracing_reset(struct trace_array *tr, int cpu)
626{ 657{
627 ftrace_disable_cpu(); 658 ftrace_disable_cpu();
628 ring_buffer_reset_cpu(tr->buffer, cpu); 659 ring_buffer_reset_cpu(tr->buffer, cpu);
629 ftrace_enable_cpu(); 660 ftrace_enable_cpu();
630} 661}
631 662
663void tracing_reset(struct trace_array *tr, int cpu)
664{
665 struct ring_buffer *buffer = tr->buffer;
666
667 ring_buffer_record_disable(buffer);
668
669 /* Make sure all commits have finished */
670 synchronize_sched();
671 __tracing_reset(tr, cpu);
672
673 ring_buffer_record_enable(buffer);
674}
675
632void tracing_reset_online_cpus(struct trace_array *tr) 676void tracing_reset_online_cpus(struct trace_array *tr)
633{ 677{
678 struct ring_buffer *buffer = tr->buffer;
634 int cpu; 679 int cpu;
635 680
681 ring_buffer_record_disable(buffer);
682
683 /* Make sure all commits have finished */
684 synchronize_sched();
685
636 tr->time_start = ftrace_now(tr->cpu); 686 tr->time_start = ftrace_now(tr->cpu);
637 687
638 for_each_online_cpu(cpu) 688 for_each_online_cpu(cpu)
639 tracing_reset(tr, cpu); 689 __tracing_reset(tr, cpu);
690
691 ring_buffer_record_enable(buffer);
640} 692}
641 693
642void tracing_reset_current(int cpu) 694void tracing_reset_current(int cpu)
@@ -667,9 +719,6 @@ static void trace_init_cmdlines(void)
667 cmdline_idx = 0; 719 cmdline_idx = 0;
668} 720}
669 721
670static int trace_stop_count;
671static DEFINE_SPINLOCK(tracing_start_lock);
672
673/** 722/**
674 * ftrace_off_permanent - disable all ftrace code permanently 723 * ftrace_off_permanent - disable all ftrace code permanently
675 * 724 *
@@ -850,14 +899,15 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
850} 899}
851EXPORT_SYMBOL_GPL(tracing_generic_entry_update); 900EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
852 901
853struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, 902struct ring_buffer_event *
854 int type, 903trace_buffer_lock_reserve(struct ring_buffer *buffer,
855 unsigned long len, 904 int type,
856 unsigned long flags, int pc) 905 unsigned long len,
906 unsigned long flags, int pc)
857{ 907{
858 struct ring_buffer_event *event; 908 struct ring_buffer_event *event;
859 909
860 event = ring_buffer_lock_reserve(tr->buffer, len); 910 event = ring_buffer_lock_reserve(buffer, len);
861 if (event != NULL) { 911 if (event != NULL) {
862 struct trace_entry *ent = ring_buffer_event_data(event); 912 struct trace_entry *ent = ring_buffer_event_data(event);
863 913
@@ -867,58 +917,60 @@ struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
867 917
868 return event; 918 return event;
869} 919}
870static void ftrace_trace_stack(struct trace_array *tr,
871 unsigned long flags, int skip, int pc);
872static void ftrace_trace_userstack(struct trace_array *tr,
873 unsigned long flags, int pc);
874 920
875static inline void __trace_buffer_unlock_commit(struct trace_array *tr, 921static inline void
876 struct ring_buffer_event *event, 922__trace_buffer_unlock_commit(struct ring_buffer *buffer,
877 unsigned long flags, int pc, 923 struct ring_buffer_event *event,
878 int wake) 924 unsigned long flags, int pc,
925 int wake)
879{ 926{
880 ring_buffer_unlock_commit(tr->buffer, event); 927 ring_buffer_unlock_commit(buffer, event);
881 928
882 ftrace_trace_stack(tr, flags, 6, pc); 929 ftrace_trace_stack(buffer, flags, 6, pc);
883 ftrace_trace_userstack(tr, flags, pc); 930 ftrace_trace_userstack(buffer, flags, pc);
884 931
885 if (wake) 932 if (wake)
886 trace_wake_up(); 933 trace_wake_up();
887} 934}
888 935
889void trace_buffer_unlock_commit(struct trace_array *tr, 936void trace_buffer_unlock_commit(struct ring_buffer *buffer,
890 struct ring_buffer_event *event, 937 struct ring_buffer_event *event,
891 unsigned long flags, int pc) 938 unsigned long flags, int pc)
892{ 939{
893 __trace_buffer_unlock_commit(tr, event, flags, pc, 1); 940 __trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
894} 941}
895 942
896struct ring_buffer_event * 943struct ring_buffer_event *
897trace_current_buffer_lock_reserve(int type, unsigned long len, 944trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
945 int type, unsigned long len,
898 unsigned long flags, int pc) 946 unsigned long flags, int pc)
899{ 947{
900 return trace_buffer_lock_reserve(&global_trace, 948 *current_rb = global_trace.buffer;
949 return trace_buffer_lock_reserve(*current_rb,
901 type, len, flags, pc); 950 type, len, flags, pc);
902} 951}
903EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve); 952EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
904 953
905void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, 954void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
955 struct ring_buffer_event *event,
906 unsigned long flags, int pc) 956 unsigned long flags, int pc)
907{ 957{
908 __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); 958 __trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
909} 959}
910EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit); 960EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
911 961
912void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, 962void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
913 unsigned long flags, int pc) 963 struct ring_buffer_event *event,
964 unsigned long flags, int pc)
914{ 965{
915 __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); 966 __trace_buffer_unlock_commit(buffer, event, flags, pc, 0);
916} 967}
917EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); 968EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
918 969
919void trace_current_buffer_discard_commit(struct ring_buffer_event *event) 970void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
971 struct ring_buffer_event *event)
920{ 972{
921 ring_buffer_discard_commit(global_trace.buffer, event); 973 ring_buffer_discard_commit(buffer, event);
922} 974}
923EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit); 975EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
924 976
@@ -928,6 +980,7 @@ trace_function(struct trace_array *tr,
928 int pc) 980 int pc)
929{ 981{
930 struct ftrace_event_call *call = &event_function; 982 struct ftrace_event_call *call = &event_function;
983 struct ring_buffer *buffer = tr->buffer;
931 struct ring_buffer_event *event; 984 struct ring_buffer_event *event;
932 struct ftrace_entry *entry; 985 struct ftrace_entry *entry;
933 986
@@ -935,7 +988,7 @@ trace_function(struct trace_array *tr,
935 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 988 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
936 return; 989 return;
937 990
938 event = trace_buffer_lock_reserve(tr, TRACE_FN, sizeof(*entry), 991 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
939 flags, pc); 992 flags, pc);
940 if (!event) 993 if (!event)
941 return; 994 return;
@@ -943,57 +996,9 @@ trace_function(struct trace_array *tr,
943 entry->ip = ip; 996 entry->ip = ip;
944 entry->parent_ip = parent_ip; 997 entry->parent_ip = parent_ip;
945 998
946 if (!filter_check_discard(call, entry, tr->buffer, event)) 999 if (!filter_check_discard(call, entry, buffer, event))
947 ring_buffer_unlock_commit(tr->buffer, event); 1000 ring_buffer_unlock_commit(buffer, event);
948}
949
950#ifdef CONFIG_FUNCTION_GRAPH_TRACER
951static int __trace_graph_entry(struct trace_array *tr,
952 struct ftrace_graph_ent *trace,
953 unsigned long flags,
954 int pc)
955{
956 struct ftrace_event_call *call = &event_funcgraph_entry;
957 struct ring_buffer_event *event;
958 struct ftrace_graph_ent_entry *entry;
959
960 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
961 return 0;
962
963 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT,
964 sizeof(*entry), flags, pc);
965 if (!event)
966 return 0;
967 entry = ring_buffer_event_data(event);
968 entry->graph_ent = *trace;
969 if (!filter_current_check_discard(call, entry, event))
970 ring_buffer_unlock_commit(global_trace.buffer, event);
971
972 return 1;
973}
974
975static void __trace_graph_return(struct trace_array *tr,
976 struct ftrace_graph_ret *trace,
977 unsigned long flags,
978 int pc)
979{
980 struct ftrace_event_call *call = &event_funcgraph_exit;
981 struct ring_buffer_event *event;
982 struct ftrace_graph_ret_entry *entry;
983
984 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
985 return;
986
987 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_RET,
988 sizeof(*entry), flags, pc);
989 if (!event)
990 return;
991 entry = ring_buffer_event_data(event);
992 entry->ret = *trace;
993 if (!filter_current_check_discard(call, entry, event))
994 ring_buffer_unlock_commit(global_trace.buffer, event);
995} 1001}
996#endif
997 1002
998void 1003void
999ftrace(struct trace_array *tr, struct trace_array_cpu *data, 1004ftrace(struct trace_array *tr, struct trace_array_cpu *data,
@@ -1004,17 +1009,17 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
1004 trace_function(tr, ip, parent_ip, flags, pc); 1009 trace_function(tr, ip, parent_ip, flags, pc);
1005} 1010}
1006 1011
1007static void __ftrace_trace_stack(struct trace_array *tr, 1012#ifdef CONFIG_STACKTRACE
1013static void __ftrace_trace_stack(struct ring_buffer *buffer,
1008 unsigned long flags, 1014 unsigned long flags,
1009 int skip, int pc) 1015 int skip, int pc)
1010{ 1016{
1011#ifdef CONFIG_STACKTRACE
1012 struct ftrace_event_call *call = &event_kernel_stack; 1017 struct ftrace_event_call *call = &event_kernel_stack;
1013 struct ring_buffer_event *event; 1018 struct ring_buffer_event *event;
1014 struct stack_entry *entry; 1019 struct stack_entry *entry;
1015 struct stack_trace trace; 1020 struct stack_trace trace;
1016 1021
1017 event = trace_buffer_lock_reserve(tr, TRACE_STACK, 1022 event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1018 sizeof(*entry), flags, pc); 1023 sizeof(*entry), flags, pc);
1019 if (!event) 1024 if (!event)
1020 return; 1025 return;
@@ -1027,32 +1032,28 @@ static void __ftrace_trace_stack(struct trace_array *tr,
1027 trace.entries = entry->caller; 1032 trace.entries = entry->caller;
1028 1033
1029 save_stack_trace(&trace); 1034 save_stack_trace(&trace);
1030 if (!filter_check_discard(call, entry, tr->buffer, event)) 1035 if (!filter_check_discard(call, entry, buffer, event))
1031 ring_buffer_unlock_commit(tr->buffer, event); 1036 ring_buffer_unlock_commit(buffer, event);
1032#endif
1033} 1037}
1034 1038
1035static void ftrace_trace_stack(struct trace_array *tr, 1039void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1036 unsigned long flags, 1040 int skip, int pc)
1037 int skip, int pc)
1038{ 1041{
1039 if (!(trace_flags & TRACE_ITER_STACKTRACE)) 1042 if (!(trace_flags & TRACE_ITER_STACKTRACE))
1040 return; 1043 return;
1041 1044
1042 __ftrace_trace_stack(tr, flags, skip, pc); 1045 __ftrace_trace_stack(buffer, flags, skip, pc);
1043} 1046}
1044 1047
1045void __trace_stack(struct trace_array *tr, 1048void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1046 unsigned long flags, 1049 int pc)
1047 int skip, int pc)
1048{ 1050{
1049 __ftrace_trace_stack(tr, flags, skip, pc); 1051 __ftrace_trace_stack(tr->buffer, flags, skip, pc);
1050} 1052}
1051 1053
1052static void ftrace_trace_userstack(struct trace_array *tr, 1054void
1053 unsigned long flags, int pc) 1055ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1054{ 1056{
1055#ifdef CONFIG_STACKTRACE
1056 struct ftrace_event_call *call = &event_user_stack; 1057 struct ftrace_event_call *call = &event_user_stack;
1057 struct ring_buffer_event *event; 1058 struct ring_buffer_event *event;
1058 struct userstack_entry *entry; 1059 struct userstack_entry *entry;
@@ -1061,7 +1062,7 @@ static void ftrace_trace_userstack(struct trace_array *tr,
1061 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) 1062 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1062 return; 1063 return;
1063 1064
1064 event = trace_buffer_lock_reserve(tr, TRACE_USER_STACK, 1065 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1065 sizeof(*entry), flags, pc); 1066 sizeof(*entry), flags, pc);
1066 if (!event) 1067 if (!event)
1067 return; 1068 return;
@@ -1075,9 +1076,8 @@ static void ftrace_trace_userstack(struct trace_array *tr,
1075 trace.entries = entry->caller; 1076 trace.entries = entry->caller;
1076 1077
1077 save_stack_trace_user(&trace); 1078 save_stack_trace_user(&trace);
1078 if (!filter_check_discard(call, entry, tr->buffer, event)) 1079 if (!filter_check_discard(call, entry, buffer, event))
1079 ring_buffer_unlock_commit(tr->buffer, event); 1080 ring_buffer_unlock_commit(buffer, event);
1080#endif
1081} 1081}
1082 1082
1083#ifdef UNUSED 1083#ifdef UNUSED
@@ -1087,6 +1087,8 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1087} 1087}
1088#endif /* UNUSED */ 1088#endif /* UNUSED */
1089 1089
1090#endif /* CONFIG_STACKTRACE */
1091
1090static void 1092static void
1091ftrace_trace_special(void *__tr, 1093ftrace_trace_special(void *__tr,
1092 unsigned long arg1, unsigned long arg2, unsigned long arg3, 1094 unsigned long arg1, unsigned long arg2, unsigned long arg3,
@@ -1094,9 +1096,10 @@ ftrace_trace_special(void *__tr,
1094{ 1096{
1095 struct ring_buffer_event *event; 1097 struct ring_buffer_event *event;
1096 struct trace_array *tr = __tr; 1098 struct trace_array *tr = __tr;
1099 struct ring_buffer *buffer = tr->buffer;
1097 struct special_entry *entry; 1100 struct special_entry *entry;
1098 1101
1099 event = trace_buffer_lock_reserve(tr, TRACE_SPECIAL, 1102 event = trace_buffer_lock_reserve(buffer, TRACE_SPECIAL,
1100 sizeof(*entry), 0, pc); 1103 sizeof(*entry), 0, pc);
1101 if (!event) 1104 if (!event)
1102 return; 1105 return;
@@ -1104,7 +1107,7 @@ ftrace_trace_special(void *__tr,
1104 entry->arg1 = arg1; 1107 entry->arg1 = arg1;
1105 entry->arg2 = arg2; 1108 entry->arg2 = arg2;
1106 entry->arg3 = arg3; 1109 entry->arg3 = arg3;
1107 trace_buffer_unlock_commit(tr, event, 0, pc); 1110 trace_buffer_unlock_commit(buffer, event, 0, pc);
1108} 1111}
1109 1112
1110void 1113void
@@ -1115,62 +1118,6 @@ __trace_special(void *__tr, void *__data,
1115} 1118}
1116 1119
1117void 1120void
1118tracing_sched_switch_trace(struct trace_array *tr,
1119 struct task_struct *prev,
1120 struct task_struct *next,
1121 unsigned long flags, int pc)
1122{
1123 struct ftrace_event_call *call = &event_context_switch;
1124 struct ring_buffer_event *event;
1125 struct ctx_switch_entry *entry;
1126
1127 event = trace_buffer_lock_reserve(tr, TRACE_CTX,
1128 sizeof(*entry), flags, pc);
1129 if (!event)
1130 return;
1131 entry = ring_buffer_event_data(event);
1132 entry->prev_pid = prev->pid;
1133 entry->prev_prio = prev->prio;
1134 entry->prev_state = prev->state;
1135 entry->next_pid = next->pid;
1136 entry->next_prio = next->prio;
1137 entry->next_state = next->state;
1138 entry->next_cpu = task_cpu(next);
1139
1140 if (!filter_check_discard(call, entry, tr->buffer, event))
1141 trace_buffer_unlock_commit(tr, event, flags, pc);
1142}
1143
1144void
1145tracing_sched_wakeup_trace(struct trace_array *tr,
1146 struct task_struct *wakee,
1147 struct task_struct *curr,
1148 unsigned long flags, int pc)
1149{
1150 struct ftrace_event_call *call = &event_wakeup;
1151 struct ring_buffer_event *event;
1152 struct ctx_switch_entry *entry;
1153
1154 event = trace_buffer_lock_reserve(tr, TRACE_WAKE,
1155 sizeof(*entry), flags, pc);
1156 if (!event)
1157 return;
1158 entry = ring_buffer_event_data(event);
1159 entry->prev_pid = curr->pid;
1160 entry->prev_prio = curr->prio;
1161 entry->prev_state = curr->state;
1162 entry->next_pid = wakee->pid;
1163 entry->next_prio = wakee->prio;
1164 entry->next_state = wakee->state;
1165 entry->next_cpu = task_cpu(wakee);
1166
1167 if (!filter_check_discard(call, entry, tr->buffer, event))
1168 ring_buffer_unlock_commit(tr->buffer, event);
1169 ftrace_trace_stack(tr, flags, 6, pc);
1170 ftrace_trace_userstack(tr, flags, pc);
1171}
1172
1173void
1174ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) 1121ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1175{ 1122{
1176 struct trace_array *tr = &global_trace; 1123 struct trace_array *tr = &global_trace;
@@ -1194,68 +1141,6 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1194 local_irq_restore(flags); 1141 local_irq_restore(flags);
1195} 1142}
1196 1143
1197#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1198int trace_graph_entry(struct ftrace_graph_ent *trace)
1199{
1200 struct trace_array *tr = &global_trace;
1201 struct trace_array_cpu *data;
1202 unsigned long flags;
1203 long disabled;
1204 int ret;
1205 int cpu;
1206 int pc;
1207
1208 if (!ftrace_trace_task(current))
1209 return 0;
1210
1211 if (!ftrace_graph_addr(trace->func))
1212 return 0;
1213
1214 local_irq_save(flags);
1215 cpu = raw_smp_processor_id();
1216 data = tr->data[cpu];
1217 disabled = atomic_inc_return(&data->disabled);
1218 if (likely(disabled == 1)) {
1219 pc = preempt_count();
1220 ret = __trace_graph_entry(tr, trace, flags, pc);
1221 } else {
1222 ret = 0;
1223 }
1224 /* Only do the atomic if it is not already set */
1225 if (!test_tsk_trace_graph(current))
1226 set_tsk_trace_graph(current);
1227
1228 atomic_dec(&data->disabled);
1229 local_irq_restore(flags);
1230
1231 return ret;
1232}
1233
1234void trace_graph_return(struct ftrace_graph_ret *trace)
1235{
1236 struct trace_array *tr = &global_trace;
1237 struct trace_array_cpu *data;
1238 unsigned long flags;
1239 long disabled;
1240 int cpu;
1241 int pc;
1242
1243 local_irq_save(flags);
1244 cpu = raw_smp_processor_id();
1245 data = tr->data[cpu];
1246 disabled = atomic_inc_return(&data->disabled);
1247 if (likely(disabled == 1)) {
1248 pc = preempt_count();
1249 __trace_graph_return(tr, trace, flags, pc);
1250 }
1251 if (!trace->depth)
1252 clear_tsk_trace_graph(current);
1253 atomic_dec(&data->disabled);
1254 local_irq_restore(flags);
1255}
1256#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1257
1258
1259/** 1144/**
1260 * trace_vbprintk - write binary msg to tracing buffer 1145 * trace_vbprintk - write binary msg to tracing buffer
1261 * 1146 *
@@ -1268,6 +1153,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1268 1153
1269 struct ftrace_event_call *call = &event_bprint; 1154 struct ftrace_event_call *call = &event_bprint;
1270 struct ring_buffer_event *event; 1155 struct ring_buffer_event *event;
1156 struct ring_buffer *buffer;
1271 struct trace_array *tr = &global_trace; 1157 struct trace_array *tr = &global_trace;
1272 struct trace_array_cpu *data; 1158 struct trace_array_cpu *data;
1273 struct bprint_entry *entry; 1159 struct bprint_entry *entry;
@@ -1300,7 +1186,9 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1300 goto out_unlock; 1186 goto out_unlock;
1301 1187
1302 size = sizeof(*entry) + sizeof(u32) * len; 1188 size = sizeof(*entry) + sizeof(u32) * len;
1303 event = trace_buffer_lock_reserve(tr, TRACE_BPRINT, size, flags, pc); 1189 buffer = tr->buffer;
1190 event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
1191 flags, pc);
1304 if (!event) 1192 if (!event)
1305 goto out_unlock; 1193 goto out_unlock;
1306 entry = ring_buffer_event_data(event); 1194 entry = ring_buffer_event_data(event);
@@ -1308,8 +1196,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1308 entry->fmt = fmt; 1196 entry->fmt = fmt;
1309 1197
1310 memcpy(entry->buf, trace_buf, sizeof(u32) * len); 1198 memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1311 if (!filter_check_discard(call, entry, tr->buffer, event)) 1199 if (!filter_check_discard(call, entry, buffer, event))
1312 ring_buffer_unlock_commit(tr->buffer, event); 1200 ring_buffer_unlock_commit(buffer, event);
1313 1201
1314out_unlock: 1202out_unlock:
1315 __raw_spin_unlock(&trace_buf_lock); 1203 __raw_spin_unlock(&trace_buf_lock);
@@ -1324,14 +1212,30 @@ out:
1324} 1212}
1325EXPORT_SYMBOL_GPL(trace_vbprintk); 1213EXPORT_SYMBOL_GPL(trace_vbprintk);
1326 1214
1327int trace_vprintk(unsigned long ip, const char *fmt, va_list args) 1215int trace_array_printk(struct trace_array *tr,
1216 unsigned long ip, const char *fmt, ...)
1217{
1218 int ret;
1219 va_list ap;
1220
1221 if (!(trace_flags & TRACE_ITER_PRINTK))
1222 return 0;
1223
1224 va_start(ap, fmt);
1225 ret = trace_array_vprintk(tr, ip, fmt, ap);
1226 va_end(ap);
1227 return ret;
1228}
1229
1230int trace_array_vprintk(struct trace_array *tr,
1231 unsigned long ip, const char *fmt, va_list args)
1328{ 1232{
1329 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; 1233 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
1330 static char trace_buf[TRACE_BUF_SIZE]; 1234 static char trace_buf[TRACE_BUF_SIZE];
1331 1235
1332 struct ftrace_event_call *call = &event_print; 1236 struct ftrace_event_call *call = &event_print;
1333 struct ring_buffer_event *event; 1237 struct ring_buffer_event *event;
1334 struct trace_array *tr = &global_trace; 1238 struct ring_buffer *buffer;
1335 struct trace_array_cpu *data; 1239 struct trace_array_cpu *data;
1336 int cpu, len = 0, size, pc; 1240 int cpu, len = 0, size, pc;
1337 struct print_entry *entry; 1241 struct print_entry *entry;
@@ -1359,7 +1263,9 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1359 trace_buf[len] = 0; 1263 trace_buf[len] = 0;
1360 1264
1361 size = sizeof(*entry) + len + 1; 1265 size = sizeof(*entry) + len + 1;
1362 event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc); 1266 buffer = tr->buffer;
1267 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
1268 irq_flags, pc);
1363 if (!event) 1269 if (!event)
1364 goto out_unlock; 1270 goto out_unlock;
1365 entry = ring_buffer_event_data(event); 1271 entry = ring_buffer_event_data(event);
@@ -1367,8 +1273,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1367 1273
1368 memcpy(&entry->buf, trace_buf, len); 1274 memcpy(&entry->buf, trace_buf, len);
1369 entry->buf[len] = 0; 1275 entry->buf[len] = 0;
1370 if (!filter_check_discard(call, entry, tr->buffer, event)) 1276 if (!filter_check_discard(call, entry, buffer, event))
1371 ring_buffer_unlock_commit(tr->buffer, event); 1277 ring_buffer_unlock_commit(buffer, event);
1372 1278
1373 out_unlock: 1279 out_unlock:
1374 __raw_spin_unlock(&trace_buf_lock); 1280 __raw_spin_unlock(&trace_buf_lock);
@@ -1380,6 +1286,11 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1380 1286
1381 return len; 1287 return len;
1382} 1288}
1289
1290int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1291{
1292 return trace_array_printk(&global_trace, ip, fmt, args);
1293}
1383EXPORT_SYMBOL_GPL(trace_vprintk); 1294EXPORT_SYMBOL_GPL(trace_vprintk);
1384 1295
1385enum trace_file_type { 1296enum trace_file_type {
@@ -1519,6 +1430,37 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1519 return ent; 1430 return ent;
1520} 1431}
1521 1432
1433static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1434{
1435 struct trace_array *tr = iter->tr;
1436 struct ring_buffer_event *event;
1437 struct ring_buffer_iter *buf_iter;
1438 unsigned long entries = 0;
1439 u64 ts;
1440
1441 tr->data[cpu]->skipped_entries = 0;
1442
1443 if (!iter->buffer_iter[cpu])
1444 return;
1445
1446 buf_iter = iter->buffer_iter[cpu];
1447 ring_buffer_iter_reset(buf_iter);
1448
1449 /*
1450 * We could have the case with the max latency tracers
1451 * that a reset never took place on a cpu. This is evident
1452 * by the timestamp being before the start of the buffer.
1453 */
1454 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
1455 if (ts >= iter->tr->time_start)
1456 break;
1457 entries++;
1458 ring_buffer_read(buf_iter, NULL);
1459 }
1460
1461 tr->data[cpu]->skipped_entries = entries;
1462}
1463
1522/* 1464/*
1523 * No necessary locking here. The worst thing which can 1465 * No necessary locking here. The worst thing which can
1524 * happen is loosing events consumed at the same time 1466 * happen is loosing events consumed at the same time
@@ -1557,10 +1499,9 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1557 1499
1558 if (cpu_file == TRACE_PIPE_ALL_CPU) { 1500 if (cpu_file == TRACE_PIPE_ALL_CPU) {
1559 for_each_tracing_cpu(cpu) 1501 for_each_tracing_cpu(cpu)
1560 ring_buffer_iter_reset(iter->buffer_iter[cpu]); 1502 tracing_iter_reset(iter, cpu);
1561 } else 1503 } else
1562 ring_buffer_iter_reset(iter->buffer_iter[cpu_file]); 1504 tracing_iter_reset(iter, cpu_file);
1563
1564 1505
1565 ftrace_enable_cpu(); 1506 ftrace_enable_cpu();
1566 1507
@@ -1609,16 +1550,32 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1609 struct trace_array *tr = iter->tr; 1550 struct trace_array *tr = iter->tr;
1610 struct trace_array_cpu *data = tr->data[tr->cpu]; 1551 struct trace_array_cpu *data = tr->data[tr->cpu];
1611 struct tracer *type = current_trace; 1552 struct tracer *type = current_trace;
1612 unsigned long total; 1553 unsigned long entries = 0;
1613 unsigned long entries; 1554 unsigned long total = 0;
1555 unsigned long count;
1614 const char *name = "preemption"; 1556 const char *name = "preemption";
1557 int cpu;
1615 1558
1616 if (type) 1559 if (type)
1617 name = type->name; 1560 name = type->name;
1618 1561
1619 entries = ring_buffer_entries(iter->tr->buffer); 1562
1620 total = entries + 1563 for_each_tracing_cpu(cpu) {
1621 ring_buffer_overruns(iter->tr->buffer); 1564 count = ring_buffer_entries_cpu(tr->buffer, cpu);
1565 /*
1566 * If this buffer has skipped entries, then we hold all
1567 * entries for the trace and we need to ignore the
1568 * ones before the time stamp.
1569 */
1570 if (tr->data[cpu]->skipped_entries) {
1571 count -= tr->data[cpu]->skipped_entries;
1572 /* total is the same as the entries */
1573 total += count;
1574 } else
1575 total += count +
1576 ring_buffer_overrun_cpu(tr->buffer, cpu);
1577 entries += count;
1578 }
1622 1579
1623 seq_printf(m, "# %s latency trace v1.1.5 on %s\n", 1580 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
1624 name, UTS_RELEASE); 1581 name, UTS_RELEASE);
@@ -1660,7 +1617,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1660 seq_puts(m, "\n# => ended at: "); 1617 seq_puts(m, "\n# => ended at: ");
1661 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); 1618 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1662 trace_print_seq(m, &iter->seq); 1619 trace_print_seq(m, &iter->seq);
1663 seq_puts(m, "#\n"); 1620 seq_puts(m, "\n#\n");
1664 } 1621 }
1665 1622
1666 seq_puts(m, "#\n"); 1623 seq_puts(m, "#\n");
@@ -1679,6 +1636,9 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
1679 if (cpumask_test_cpu(iter->cpu, iter->started)) 1636 if (cpumask_test_cpu(iter->cpu, iter->started))
1680 return; 1637 return;
1681 1638
1639 if (iter->tr->data[iter->cpu]->skipped_entries)
1640 return;
1641
1682 cpumask_set_cpu(iter->cpu, iter->started); 1642 cpumask_set_cpu(iter->cpu, iter->started);
1683 1643
1684 /* Don't print started cpu buffer for the first entry of the trace */ 1644 /* Don't print started cpu buffer for the first entry of the trace */
@@ -1941,19 +1901,23 @@ __tracing_open(struct inode *inode, struct file *file)
1941 if (ring_buffer_overruns(iter->tr->buffer)) 1901 if (ring_buffer_overruns(iter->tr->buffer))
1942 iter->iter_flags |= TRACE_FILE_ANNOTATE; 1902 iter->iter_flags |= TRACE_FILE_ANNOTATE;
1943 1903
1904 /* stop the trace while dumping */
1905 tracing_stop();
1906
1944 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { 1907 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
1945 for_each_tracing_cpu(cpu) { 1908 for_each_tracing_cpu(cpu) {
1946 1909
1947 iter->buffer_iter[cpu] = 1910 iter->buffer_iter[cpu] =
1948 ring_buffer_read_start(iter->tr->buffer, cpu); 1911 ring_buffer_read_start(iter->tr->buffer, cpu);
1912 tracing_iter_reset(iter, cpu);
1949 } 1913 }
1950 } else { 1914 } else {
1951 cpu = iter->cpu_file; 1915 cpu = iter->cpu_file;
1952 iter->buffer_iter[cpu] = 1916 iter->buffer_iter[cpu] =
1953 ring_buffer_read_start(iter->tr->buffer, cpu); 1917 ring_buffer_read_start(iter->tr->buffer, cpu);
1918 tracing_iter_reset(iter, cpu);
1954 } 1919 }
1955 1920
1956 /* TODO stop tracer */
1957 ret = seq_open(file, &tracer_seq_ops); 1921 ret = seq_open(file, &tracer_seq_ops);
1958 if (ret < 0) { 1922 if (ret < 0) {
1959 fail_ret = ERR_PTR(ret); 1923 fail_ret = ERR_PTR(ret);
@@ -1963,9 +1927,6 @@ __tracing_open(struct inode *inode, struct file *file)
1963 m = file->private_data; 1927 m = file->private_data;
1964 m->private = iter; 1928 m->private = iter;
1965 1929
1966 /* stop the trace while dumping */
1967 tracing_stop();
1968
1969 mutex_unlock(&trace_types_lock); 1930 mutex_unlock(&trace_types_lock);
1970 1931
1971 return iter; 1932 return iter;
@@ -1976,6 +1937,7 @@ __tracing_open(struct inode *inode, struct file *file)
1976 ring_buffer_read_finish(iter->buffer_iter[cpu]); 1937 ring_buffer_read_finish(iter->buffer_iter[cpu]);
1977 } 1938 }
1978 free_cpumask_var(iter->started); 1939 free_cpumask_var(iter->started);
1940 tracing_start();
1979 fail: 1941 fail:
1980 mutex_unlock(&trace_types_lock); 1942 mutex_unlock(&trace_types_lock);
1981 kfree(iter->trace); 1943 kfree(iter->trace);
@@ -2257,8 +2219,8 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf,
2257 len += 3; /* "no" and newline */ 2219 len += 3; /* "no" and newline */
2258 } 2220 }
2259 2221
2260 /* +2 for \n and \0 */ 2222 /* +1 for \0 */
2261 buf = kmalloc(len + 2, GFP_KERNEL); 2223 buf = kmalloc(len + 1, GFP_KERNEL);
2262 if (!buf) { 2224 if (!buf) {
2263 mutex_unlock(&trace_types_lock); 2225 mutex_unlock(&trace_types_lock);
2264 return -ENOMEM; 2226 return -ENOMEM;
@@ -2281,7 +2243,7 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf,
2281 } 2243 }
2282 mutex_unlock(&trace_types_lock); 2244 mutex_unlock(&trace_types_lock);
2283 2245
2284 WARN_ON(r >= len + 2); 2246 WARN_ON(r >= len + 1);
2285 2247
2286 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2248 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2287 2249
@@ -2292,23 +2254,23 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf,
2292/* Try to assign a tracer specific option */ 2254/* Try to assign a tracer specific option */
2293static int set_tracer_option(struct tracer *trace, char *cmp, int neg) 2255static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2294{ 2256{
2295 struct tracer_flags *trace_flags = trace->flags; 2257 struct tracer_flags *tracer_flags = trace->flags;
2296 struct tracer_opt *opts = NULL; 2258 struct tracer_opt *opts = NULL;
2297 int ret = 0, i = 0; 2259 int ret = 0, i = 0;
2298 int len; 2260 int len;
2299 2261
2300 for (i = 0; trace_flags->opts[i].name; i++) { 2262 for (i = 0; tracer_flags->opts[i].name; i++) {
2301 opts = &trace_flags->opts[i]; 2263 opts = &tracer_flags->opts[i];
2302 len = strlen(opts->name); 2264 len = strlen(opts->name);
2303 2265
2304 if (strncmp(cmp, opts->name, len) == 0) { 2266 if (strncmp(cmp, opts->name, len) == 0) {
2305 ret = trace->set_flag(trace_flags->val, 2267 ret = trace->set_flag(tracer_flags->val,
2306 opts->bit, !neg); 2268 opts->bit, !neg);
2307 break; 2269 break;
2308 } 2270 }
2309 } 2271 }
2310 /* Not found */ 2272 /* Not found */
2311 if (!trace_flags->opts[i].name) 2273 if (!tracer_flags->opts[i].name)
2312 return -EINVAL; 2274 return -EINVAL;
2313 2275
2314 /* Refused to handle */ 2276 /* Refused to handle */
@@ -2316,9 +2278,9 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2316 return ret; 2278 return ret;
2317 2279
2318 if (neg) 2280 if (neg)
2319 trace_flags->val &= ~opts->bit; 2281 tracer_flags->val &= ~opts->bit;
2320 else 2282 else
2321 trace_flags->val |= opts->bit; 2283 tracer_flags->val |= opts->bit;
2322 2284
2323 return 0; 2285 return 0;
2324} 2286}
@@ -2333,22 +2295,6 @@ static void set_tracer_flags(unsigned int mask, int enabled)
2333 trace_flags |= mask; 2295 trace_flags |= mask;
2334 else 2296 else
2335 trace_flags &= ~mask; 2297 trace_flags &= ~mask;
2336
2337 if (mask == TRACE_ITER_GLOBAL_CLK) {
2338 u64 (*func)(void);
2339
2340 if (enabled)
2341 func = trace_clock_global;
2342 else
2343 func = trace_clock_local;
2344
2345 mutex_lock(&trace_types_lock);
2346 ring_buffer_set_clock(global_trace.buffer, func);
2347
2348 if (max_tr.buffer)
2349 ring_buffer_set_clock(max_tr.buffer, func);
2350 mutex_unlock(&trace_types_lock);
2351 }
2352} 2298}
2353 2299
2354static ssize_t 2300static ssize_t
@@ -3316,6 +3262,62 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3316 return cnt; 3262 return cnt;
3317} 3263}
3318 3264
3265static ssize_t tracing_clock_read(struct file *filp, char __user *ubuf,
3266 size_t cnt, loff_t *ppos)
3267{
3268 char buf[64];
3269 int bufiter = 0;
3270 int i;
3271
3272 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
3273 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter,
3274 "%s%s%s%s", i ? " " : "",
3275 i == trace_clock_id ? "[" : "", trace_clocks[i].name,
3276 i == trace_clock_id ? "]" : "");
3277 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, "\n");
3278
3279 return simple_read_from_buffer(ubuf, cnt, ppos, buf, bufiter);
3280}
3281
3282static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
3283 size_t cnt, loff_t *fpos)
3284{
3285 char buf[64];
3286 const char *clockstr;
3287 int i;
3288
3289 if (cnt >= sizeof(buf))
3290 return -EINVAL;
3291
3292 if (copy_from_user(&buf, ubuf, cnt))
3293 return -EFAULT;
3294
3295 buf[cnt] = 0;
3296
3297 clockstr = strstrip(buf);
3298
3299 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
3300 if (strcmp(trace_clocks[i].name, clockstr) == 0)
3301 break;
3302 }
3303 if (i == ARRAY_SIZE(trace_clocks))
3304 return -EINVAL;
3305
3306 trace_clock_id = i;
3307
3308 mutex_lock(&trace_types_lock);
3309
3310 ring_buffer_set_clock(global_trace.buffer, trace_clocks[i].func);
3311 if (max_tr.buffer)
3312 ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func);
3313
3314 mutex_unlock(&trace_types_lock);
3315
3316 *fpos += cnt;
3317
3318 return cnt;
3319}
3320
3319static const struct file_operations tracing_max_lat_fops = { 3321static const struct file_operations tracing_max_lat_fops = {
3320 .open = tracing_open_generic, 3322 .open = tracing_open_generic,
3321 .read = tracing_max_lat_read, 3323 .read = tracing_max_lat_read,
@@ -3353,6 +3355,12 @@ static const struct file_operations tracing_mark_fops = {
3353 .write = tracing_mark_write, 3355 .write = tracing_mark_write,
3354}; 3356};
3355 3357
3358static const struct file_operations trace_clock_fops = {
3359 .open = tracing_open_generic,
3360 .read = tracing_clock_read,
3361 .write = tracing_clock_write,
3362};
3363
3356struct ftrace_buffer_info { 3364struct ftrace_buffer_info {
3357 struct trace_array *tr; 3365 struct trace_array *tr;
3358 void *spare; 3366 void *spare;
@@ -3633,9 +3641,6 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
3633 cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu); 3641 cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
3634 trace_seq_printf(s, "commit overrun: %ld\n", cnt); 3642 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
3635 3643
3636 cnt = ring_buffer_nmi_dropped_cpu(tr->buffer, cpu);
3637 trace_seq_printf(s, "nmi dropped: %ld\n", cnt);
3638
3639 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); 3644 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
3640 3645
3641 kfree(s); 3646 kfree(s);
@@ -3896,17 +3901,9 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
3896 if (ret < 0) 3901 if (ret < 0)
3897 return ret; 3902 return ret;
3898 3903
3899 switch (val) { 3904 if (val != 0 && val != 1)
3900 case 0:
3901 trace_flags &= ~(1 << index);
3902 break;
3903 case 1:
3904 trace_flags |= 1 << index;
3905 break;
3906
3907 default:
3908 return -EINVAL; 3905 return -EINVAL;
3909 } 3906 set_tracer_flags(1 << index, val);
3910 3907
3911 *ppos += cnt; 3908 *ppos += cnt;
3912 3909
@@ -4074,11 +4071,13 @@ static __init int tracer_init_debugfs(void)
4074 trace_create_file("current_tracer", 0644, d_tracer, 4071 trace_create_file("current_tracer", 0644, d_tracer,
4075 &global_trace, &set_tracer_fops); 4072 &global_trace, &set_tracer_fops);
4076 4073
4074#ifdef CONFIG_TRACER_MAX_TRACE
4077 trace_create_file("tracing_max_latency", 0644, d_tracer, 4075 trace_create_file("tracing_max_latency", 0644, d_tracer,
4078 &tracing_max_latency, &tracing_max_lat_fops); 4076 &tracing_max_latency, &tracing_max_lat_fops);
4079 4077
4080 trace_create_file("tracing_thresh", 0644, d_tracer, 4078 trace_create_file("tracing_thresh", 0644, d_tracer,
4081 &tracing_thresh, &tracing_max_lat_fops); 4079 &tracing_thresh, &tracing_max_lat_fops);
4080#endif
4082 4081
4083 trace_create_file("README", 0444, d_tracer, 4082 trace_create_file("README", 0444, d_tracer,
4084 NULL, &tracing_readme_fops); 4083 NULL, &tracing_readme_fops);
@@ -4095,6 +4094,9 @@ static __init int tracer_init_debugfs(void)
4095 trace_create_file("saved_cmdlines", 0444, d_tracer, 4094 trace_create_file("saved_cmdlines", 0444, d_tracer,
4096 NULL, &tracing_saved_cmdlines_fops); 4095 NULL, &tracing_saved_cmdlines_fops);
4097 4096
4097 trace_create_file("trace_clock", 0644, d_tracer, NULL,
4098 &trace_clock_fops);
4099
4098#ifdef CONFIG_DYNAMIC_FTRACE 4100#ifdef CONFIG_DYNAMIC_FTRACE
4099 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, 4101 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
4100 &ftrace_update_tot_cnt, &tracing_dyn_info_fops); 4102 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
@@ -4273,7 +4275,6 @@ void ftrace_dump(void)
4273 4275
4274__init static int tracer_alloc_buffers(void) 4276__init static int tracer_alloc_buffers(void)
4275{ 4277{
4276 struct trace_array_cpu *data;
4277 int ring_buf_size; 4278 int ring_buf_size;
4278 int i; 4279 int i;
4279 int ret = -ENOMEM; 4280 int ret = -ENOMEM;
@@ -4323,7 +4324,7 @@ __init static int tracer_alloc_buffers(void)
4323 4324
4324 /* Allocate the first page for all buffers */ 4325 /* Allocate the first page for all buffers */
4325 for_each_tracing_cpu(i) { 4326 for_each_tracing_cpu(i) {
4326 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i); 4327 global_trace.data[i] = &per_cpu(global_trace_cpu, i);
4327 max_tr.data[i] = &per_cpu(max_data, i); 4328 max_tr.data[i] = &per_cpu(max_data, i);
4328 } 4329 }
4329 4330
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8b9f4f6e955..fa1dccb579d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -34,8 +34,6 @@ enum trace_type {
34 TRACE_GRAPH_ENT, 34 TRACE_GRAPH_ENT,
35 TRACE_USER_STACK, 35 TRACE_USER_STACK,
36 TRACE_HW_BRANCHES, 36 TRACE_HW_BRANCHES,
37 TRACE_SYSCALL_ENTER,
38 TRACE_SYSCALL_EXIT,
39 TRACE_KMEM_ALLOC, 37 TRACE_KMEM_ALLOC,
40 TRACE_KMEM_FREE, 38 TRACE_KMEM_FREE,
41 TRACE_POWER, 39 TRACE_POWER,
@@ -236,9 +234,6 @@ struct trace_array_cpu {
236 atomic_t disabled; 234 atomic_t disabled;
237 void *buffer_page; /* ring buffer spare */ 235 void *buffer_page; /* ring buffer spare */
238 236
239 /* these fields get copied into max-trace: */
240 unsigned long trace_idx;
241 unsigned long overrun;
242 unsigned long saved_latency; 237 unsigned long saved_latency;
243 unsigned long critical_start; 238 unsigned long critical_start;
244 unsigned long critical_end; 239 unsigned long critical_end;
@@ -246,6 +241,7 @@ struct trace_array_cpu {
246 unsigned long nice; 241 unsigned long nice;
247 unsigned long policy; 242 unsigned long policy;
248 unsigned long rt_priority; 243 unsigned long rt_priority;
244 unsigned long skipped_entries;
249 cycle_t preempt_timestamp; 245 cycle_t preempt_timestamp;
250 pid_t pid; 246 pid_t pid;
251 uid_t uid; 247 uid_t uid;
@@ -319,10 +315,6 @@ extern void __ftrace_bad_type(void);
319 TRACE_KMEM_ALLOC); \ 315 TRACE_KMEM_ALLOC); \
320 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ 316 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
321 TRACE_KMEM_FREE); \ 317 TRACE_KMEM_FREE); \
322 IF_ASSIGN(var, ent, struct syscall_trace_enter, \
323 TRACE_SYSCALL_ENTER); \
324 IF_ASSIGN(var, ent, struct syscall_trace_exit, \
325 TRACE_SYSCALL_EXIT); \
326 __ftrace_bad_type(); \ 318 __ftrace_bad_type(); \
327 } while (0) 319 } while (0)
328 320
@@ -423,12 +415,13 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
423 415
424struct ring_buffer_event; 416struct ring_buffer_event;
425 417
426struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, 418struct ring_buffer_event *
427 int type, 419trace_buffer_lock_reserve(struct ring_buffer *buffer,
428 unsigned long len, 420 int type,
429 unsigned long flags, 421 unsigned long len,
430 int pc); 422 unsigned long flags,
431void trace_buffer_unlock_commit(struct trace_array *tr, 423 int pc);
424void trace_buffer_unlock_commit(struct ring_buffer *buffer,
432 struct ring_buffer_event *event, 425 struct ring_buffer_event *event,
433 unsigned long flags, int pc); 426 unsigned long flags, int pc);
434 427
@@ -467,6 +460,7 @@ void trace_function(struct trace_array *tr,
467 460
468void trace_graph_return(struct ftrace_graph_ret *trace); 461void trace_graph_return(struct ftrace_graph_ret *trace);
469int trace_graph_entry(struct ftrace_graph_ent *trace); 462int trace_graph_entry(struct ftrace_graph_ent *trace);
463void set_graph_array(struct trace_array *tr);
470 464
471void tracing_start_cmdline_record(void); 465void tracing_start_cmdline_record(void);
472void tracing_stop_cmdline_record(void); 466void tracing_stop_cmdline_record(void);
@@ -478,16 +472,40 @@ void unregister_tracer(struct tracer *type);
478 472
479extern unsigned long nsecs_to_usecs(unsigned long nsecs); 473extern unsigned long nsecs_to_usecs(unsigned long nsecs);
480 474
475#ifdef CONFIG_TRACER_MAX_TRACE
481extern unsigned long tracing_max_latency; 476extern unsigned long tracing_max_latency;
482extern unsigned long tracing_thresh; 477extern unsigned long tracing_thresh;
483 478
484void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); 479void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
485void update_max_tr_single(struct trace_array *tr, 480void update_max_tr_single(struct trace_array *tr,
486 struct task_struct *tsk, int cpu); 481 struct task_struct *tsk, int cpu);
482#endif /* CONFIG_TRACER_MAX_TRACE */
487 483
488void __trace_stack(struct trace_array *tr, 484#ifdef CONFIG_STACKTRACE
489 unsigned long flags, 485void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
490 int skip, int pc); 486 int skip, int pc);
487
488void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,
489 int pc);
490
491void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
492 int pc);
493#else
494static inline void ftrace_trace_stack(struct trace_array *tr,
495 unsigned long flags, int skip, int pc)
496{
497}
498
499static inline void ftrace_trace_userstack(struct trace_array *tr,
500 unsigned long flags, int pc)
501{
502}
503
504static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
505 int skip, int pc)
506{
507}
508#endif /* CONFIG_STACKTRACE */
491 509
492extern cycle_t ftrace_now(int cpu); 510extern cycle_t ftrace_now(int cpu);
493 511
@@ -513,6 +531,10 @@ extern unsigned long ftrace_update_tot_cnt;
513extern int DYN_FTRACE_TEST_NAME(void); 531extern int DYN_FTRACE_TEST_NAME(void);
514#endif 532#endif
515 533
534extern int ring_buffer_expanded;
535extern bool tracing_selftest_disabled;
536DECLARE_PER_CPU(local_t, ftrace_cpu_disabled);
537
516#ifdef CONFIG_FTRACE_STARTUP_TEST 538#ifdef CONFIG_FTRACE_STARTUP_TEST
517extern int trace_selftest_startup_function(struct tracer *trace, 539extern int trace_selftest_startup_function(struct tracer *trace,
518 struct trace_array *tr); 540 struct trace_array *tr);
@@ -544,9 +566,16 @@ extern int
544trace_vbprintk(unsigned long ip, const char *fmt, va_list args); 566trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
545extern int 567extern int
546trace_vprintk(unsigned long ip, const char *fmt, va_list args); 568trace_vprintk(unsigned long ip, const char *fmt, va_list args);
569extern int
570trace_array_vprintk(struct trace_array *tr,
571 unsigned long ip, const char *fmt, va_list args);
572int trace_array_printk(struct trace_array *tr,
573 unsigned long ip, const char *fmt, ...);
547 574
548extern unsigned long trace_flags; 575extern unsigned long trace_flags;
549 576
577extern int trace_clock_id;
578
550/* Standard output formatting function used for function return traces */ 579/* Standard output formatting function used for function return traces */
551#ifdef CONFIG_FUNCTION_GRAPH_TRACER 580#ifdef CONFIG_FUNCTION_GRAPH_TRACER
552extern enum print_line_t print_graph_function(struct trace_iterator *iter); 581extern enum print_line_t print_graph_function(struct trace_iterator *iter);
@@ -635,9 +664,8 @@ enum trace_iterator_flags {
635 TRACE_ITER_PRINTK_MSGONLY = 0x10000, 664 TRACE_ITER_PRINTK_MSGONLY = 0x10000,
636 TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ 665 TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */
637 TRACE_ITER_LATENCY_FMT = 0x40000, 666 TRACE_ITER_LATENCY_FMT = 0x40000,
638 TRACE_ITER_GLOBAL_CLK = 0x80000, 667 TRACE_ITER_SLEEP_TIME = 0x80000,
639 TRACE_ITER_SLEEP_TIME = 0x100000, 668 TRACE_ITER_GRAPH_TIME = 0x100000,
640 TRACE_ITER_GRAPH_TIME = 0x200000,
641}; 669};
642 670
643/* 671/*
@@ -734,6 +762,7 @@ struct ftrace_event_field {
734 struct list_head link; 762 struct list_head link;
735 char *name; 763 char *name;
736 char *type; 764 char *type;
765 int filter_type;
737 int offset; 766 int offset;
738 int size; 767 int size;
739 int is_signed; 768 int is_signed;
@@ -743,13 +772,15 @@ struct event_filter {
743 int n_preds; 772 int n_preds;
744 struct filter_pred **preds; 773 struct filter_pred **preds;
745 char *filter_string; 774 char *filter_string;
775 bool no_reset;
746}; 776};
747 777
748struct event_subsystem { 778struct event_subsystem {
749 struct list_head list; 779 struct list_head list;
750 const char *name; 780 const char *name;
751 struct dentry *entry; 781 struct dentry *entry;
752 void *filter; 782 struct event_filter *filter;
783 int nr_events;
753}; 784};
754 785
755struct filter_pred; 786struct filter_pred;
@@ -777,6 +808,7 @@ extern int apply_subsystem_event_filter(struct event_subsystem *system,
777 char *filter_string); 808 char *filter_string);
778extern void print_subsystem_event_filter(struct event_subsystem *system, 809extern void print_subsystem_event_filter(struct event_subsystem *system,
779 struct trace_seq *s); 810 struct trace_seq *s);
811extern int filter_assign_type(const char *type);
780 812
781static inline int 813static inline int
782filter_check_discard(struct ftrace_event_call *call, void *rec, 814filter_check_discard(struct ftrace_event_call *call, void *rec,
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index a29ef23ffb4..19bfc75d467 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -41,14 +41,12 @@ void disable_boot_trace(void)
41 41
42static int boot_trace_init(struct trace_array *tr) 42static int boot_trace_init(struct trace_array *tr)
43{ 43{
44 int cpu;
45 boot_trace = tr; 44 boot_trace = tr;
46 45
47 if (!tr) 46 if (!tr)
48 return 0; 47 return 0;
49 48
50 for_each_cpu(cpu, cpu_possible_mask) 49 tracing_reset_online_cpus(tr);
51 tracing_reset(tr, cpu);
52 50
53 tracing_sched_switch_assign_trace(tr); 51 tracing_sched_switch_assign_trace(tr);
54 return 0; 52 return 0;
@@ -132,6 +130,7 @@ struct tracer boot_tracer __read_mostly =
132void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) 130void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
133{ 131{
134 struct ring_buffer_event *event; 132 struct ring_buffer_event *event;
133 struct ring_buffer *buffer;
135 struct trace_boot_call *entry; 134 struct trace_boot_call *entry;
136 struct trace_array *tr = boot_trace; 135 struct trace_array *tr = boot_trace;
137 136
@@ -144,13 +143,14 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
144 sprint_symbol(bt->func, (unsigned long)fn); 143 sprint_symbol(bt->func, (unsigned long)fn);
145 preempt_disable(); 144 preempt_disable();
146 145
147 event = trace_buffer_lock_reserve(tr, TRACE_BOOT_CALL, 146 buffer = tr->buffer;
147 event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_CALL,
148 sizeof(*entry), 0, 0); 148 sizeof(*entry), 0, 0);
149 if (!event) 149 if (!event)
150 goto out; 150 goto out;
151 entry = ring_buffer_event_data(event); 151 entry = ring_buffer_event_data(event);
152 entry->boot_call = *bt; 152 entry->boot_call = *bt;
153 trace_buffer_unlock_commit(tr, event, 0, 0); 153 trace_buffer_unlock_commit(buffer, event, 0, 0);
154 out: 154 out:
155 preempt_enable(); 155 preempt_enable();
156} 156}
@@ -158,6 +158,7 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
158void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) 158void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
159{ 159{
160 struct ring_buffer_event *event; 160 struct ring_buffer_event *event;
161 struct ring_buffer *buffer;
161 struct trace_boot_ret *entry; 162 struct trace_boot_ret *entry;
162 struct trace_array *tr = boot_trace; 163 struct trace_array *tr = boot_trace;
163 164
@@ -167,13 +168,14 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
167 sprint_symbol(bt->func, (unsigned long)fn); 168 sprint_symbol(bt->func, (unsigned long)fn);
168 preempt_disable(); 169 preempt_disable();
169 170
170 event = trace_buffer_lock_reserve(tr, TRACE_BOOT_RET, 171 buffer = tr->buffer;
172 event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_RET,
171 sizeof(*entry), 0, 0); 173 sizeof(*entry), 0, 0);
172 if (!event) 174 if (!event)
173 goto out; 175 goto out;
174 entry = ring_buffer_event_data(event); 176 entry = ring_buffer_event_data(event);
175 entry->boot_ret = *bt; 177 entry->boot_ret = *bt;
176 trace_buffer_unlock_commit(tr, event, 0, 0); 178 trace_buffer_unlock_commit(buffer, event, 0, 0);
177 out: 179 out:
178 preempt_enable(); 180 preempt_enable();
179} 181}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index e75276a49cf..78b1ed23017 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -17,6 +17,8 @@
17#include <linux/ctype.h> 17#include <linux/ctype.h>
18#include <linux/delay.h> 18#include <linux/delay.h>
19 19
20#include <asm/setup.h>
21
20#include "trace_output.h" 22#include "trace_output.h"
21 23
22#define TRACE_SYSTEM "TRACE_SYSTEM" 24#define TRACE_SYSTEM "TRACE_SYSTEM"
@@ -25,8 +27,9 @@ DEFINE_MUTEX(event_mutex);
25 27
26LIST_HEAD(ftrace_events); 28LIST_HEAD(ftrace_events);
27 29
28int trace_define_field(struct ftrace_event_call *call, char *type, 30int trace_define_field(struct ftrace_event_call *call, const char *type,
29 char *name, int offset, int size, int is_signed) 31 const char *name, int offset, int size, int is_signed,
32 int filter_type)
30{ 33{
31 struct ftrace_event_field *field; 34 struct ftrace_event_field *field;
32 35
@@ -42,9 +45,15 @@ int trace_define_field(struct ftrace_event_call *call, char *type,
42 if (!field->type) 45 if (!field->type)
43 goto err; 46 goto err;
44 47
48 if (filter_type == FILTER_OTHER)
49 field->filter_type = filter_assign_type(type);
50 else
51 field->filter_type = filter_type;
52
45 field->offset = offset; 53 field->offset = offset;
46 field->size = size; 54 field->size = size;
47 field->is_signed = is_signed; 55 field->is_signed = is_signed;
56
48 list_add(&field->link, &call->fields); 57 list_add(&field->link, &call->fields);
49 58
50 return 0; 59 return 0;
@@ -60,6 +69,29 @@ err:
60} 69}
61EXPORT_SYMBOL_GPL(trace_define_field); 70EXPORT_SYMBOL_GPL(trace_define_field);
62 71
72#define __common_field(type, item) \
73 ret = trace_define_field(call, #type, "common_" #item, \
74 offsetof(typeof(ent), item), \
75 sizeof(ent.item), \
76 is_signed_type(type), FILTER_OTHER); \
77 if (ret) \
78 return ret;
79
80int trace_define_common_fields(struct ftrace_event_call *call)
81{
82 int ret;
83 struct trace_entry ent;
84
85 __common_field(unsigned short, type);
86 __common_field(unsigned char, flags);
87 __common_field(unsigned char, preempt_count);
88 __common_field(int, pid);
89 __common_field(int, tgid);
90
91 return ret;
92}
93EXPORT_SYMBOL_GPL(trace_define_common_fields);
94
63#ifdef CONFIG_MODULES 95#ifdef CONFIG_MODULES
64 96
65static void trace_destroy_fields(struct ftrace_event_call *call) 97static void trace_destroy_fields(struct ftrace_event_call *call)
@@ -84,14 +116,14 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
84 if (call->enabled) { 116 if (call->enabled) {
85 call->enabled = 0; 117 call->enabled = 0;
86 tracing_stop_cmdline_record(); 118 tracing_stop_cmdline_record();
87 call->unregfunc(); 119 call->unregfunc(call->data);
88 } 120 }
89 break; 121 break;
90 case 1: 122 case 1:
91 if (!call->enabled) { 123 if (!call->enabled) {
92 call->enabled = 1; 124 call->enabled = 1;
93 tracing_start_cmdline_record(); 125 tracing_start_cmdline_record();
94 call->regfunc(); 126 call->regfunc(call->data);
95 } 127 }
96 break; 128 break;
97 } 129 }
@@ -574,7 +606,7 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
574 trace_seq_printf(s, "format:\n"); 606 trace_seq_printf(s, "format:\n");
575 trace_write_header(s); 607 trace_write_header(s);
576 608
577 r = call->show_format(s); 609 r = call->show_format(call, s);
578 if (!r) { 610 if (!r) {
579 /* 611 /*
580 * ug! The format output is bigger than a PAGE!! 612 * ug! The format output is bigger than a PAGE!!
@@ -849,8 +881,10 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
849 881
850 /* First see if we did not already create this dir */ 882 /* First see if we did not already create this dir */
851 list_for_each_entry(system, &event_subsystems, list) { 883 list_for_each_entry(system, &event_subsystems, list) {
852 if (strcmp(system->name, name) == 0) 884 if (strcmp(system->name, name) == 0) {
885 system->nr_events++;
853 return system->entry; 886 return system->entry;
887 }
854 } 888 }
855 889
856 /* need to create new entry */ 890 /* need to create new entry */
@@ -869,6 +903,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
869 return d_events; 903 return d_events;
870 } 904 }
871 905
906 system->nr_events = 1;
872 system->name = kstrdup(name, GFP_KERNEL); 907 system->name = kstrdup(name, GFP_KERNEL);
873 if (!system->name) { 908 if (!system->name) {
874 debugfs_remove(system->entry); 909 debugfs_remove(system->entry);
@@ -920,15 +955,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
920 if (strcmp(call->system, TRACE_SYSTEM) != 0) 955 if (strcmp(call->system, TRACE_SYSTEM) != 0)
921 d_events = event_subsystem_dir(call->system, d_events); 956 d_events = event_subsystem_dir(call->system, d_events);
922 957
923 if (call->raw_init) {
924 ret = call->raw_init();
925 if (ret < 0) {
926 pr_warning("Could not initialize trace point"
927 " events/%s\n", call->name);
928 return ret;
929 }
930 }
931
932 call->dir = debugfs_create_dir(call->name, d_events); 958 call->dir = debugfs_create_dir(call->name, d_events);
933 if (!call->dir) { 959 if (!call->dir) {
934 pr_warning("Could not create debugfs " 960 pr_warning("Could not create debugfs "
@@ -945,7 +971,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
945 id); 971 id);
946 972
947 if (call->define_fields) { 973 if (call->define_fields) {
948 ret = call->define_fields(); 974 ret = call->define_fields(call);
949 if (ret < 0) { 975 if (ret < 0) {
950 pr_warning("Could not initialize trace point" 976 pr_warning("Could not initialize trace point"
951 " events/%s\n", call->name); 977 " events/%s\n", call->name);
@@ -987,6 +1013,32 @@ struct ftrace_module_file_ops {
987 struct file_operations filter; 1013 struct file_operations filter;
988}; 1014};
989 1015
1016static void remove_subsystem_dir(const char *name)
1017{
1018 struct event_subsystem *system;
1019
1020 if (strcmp(name, TRACE_SYSTEM) == 0)
1021 return;
1022
1023 list_for_each_entry(system, &event_subsystems, list) {
1024 if (strcmp(system->name, name) == 0) {
1025 if (!--system->nr_events) {
1026 struct event_filter *filter = system->filter;
1027
1028 debugfs_remove_recursive(system->entry);
1029 list_del(&system->list);
1030 if (filter) {
1031 kfree(filter->filter_string);
1032 kfree(filter);
1033 }
1034 kfree(system->name);
1035 kfree(system);
1036 }
1037 break;
1038 }
1039 }
1040}
1041
990static struct ftrace_module_file_ops * 1042static struct ftrace_module_file_ops *
991trace_create_file_ops(struct module *mod) 1043trace_create_file_ops(struct module *mod)
992{ 1044{
@@ -1027,6 +1079,7 @@ static void trace_module_add_events(struct module *mod)
1027 struct ftrace_module_file_ops *file_ops = NULL; 1079 struct ftrace_module_file_ops *file_ops = NULL;
1028 struct ftrace_event_call *call, *start, *end; 1080 struct ftrace_event_call *call, *start, *end;
1029 struct dentry *d_events; 1081 struct dentry *d_events;
1082 int ret;
1030 1083
1031 start = mod->trace_events; 1084 start = mod->trace_events;
1032 end = mod->trace_events + mod->num_trace_events; 1085 end = mod->trace_events + mod->num_trace_events;
@@ -1042,7 +1095,15 @@ static void trace_module_add_events(struct module *mod)
1042 /* The linker may leave blanks */ 1095 /* The linker may leave blanks */
1043 if (!call->name) 1096 if (!call->name)
1044 continue; 1097 continue;
1045 1098 if (call->raw_init) {
1099 ret = call->raw_init();
1100 if (ret < 0) {
1101 if (ret != -ENOSYS)
1102 pr_warning("Could not initialize trace "
1103 "point events/%s\n", call->name);
1104 continue;
1105 }
1106 }
1046 /* 1107 /*
1047 * This module has events, create file ops for this module 1108 * This module has events, create file ops for this module
1048 * if not already done. 1109 * if not already done.
@@ -1077,6 +1138,7 @@ static void trace_module_remove_events(struct module *mod)
1077 list_del(&call->list); 1138 list_del(&call->list);
1078 trace_destroy_fields(call); 1139 trace_destroy_fields(call);
1079 destroy_preds(call); 1140 destroy_preds(call);
1141 remove_subsystem_dir(call->system);
1080 } 1142 }
1081 } 1143 }
1082 1144
@@ -1133,6 +1195,18 @@ struct notifier_block trace_module_nb = {
1133extern struct ftrace_event_call __start_ftrace_events[]; 1195extern struct ftrace_event_call __start_ftrace_events[];
1134extern struct ftrace_event_call __stop_ftrace_events[]; 1196extern struct ftrace_event_call __stop_ftrace_events[];
1135 1197
1198static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
1199
1200static __init int setup_trace_event(char *str)
1201{
1202 strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
1203 ring_buffer_expanded = 1;
1204 tracing_selftest_disabled = 1;
1205
1206 return 1;
1207}
1208__setup("trace_event=", setup_trace_event);
1209
1136static __init int event_trace_init(void) 1210static __init int event_trace_init(void)
1137{ 1211{
1138 struct ftrace_event_call *call; 1212 struct ftrace_event_call *call;
@@ -1140,6 +1214,8 @@ static __init int event_trace_init(void)
1140 struct dentry *entry; 1214 struct dentry *entry;
1141 struct dentry *d_events; 1215 struct dentry *d_events;
1142 int ret; 1216 int ret;
1217 char *buf = bootup_event_buf;
1218 char *token;
1143 1219
1144 d_tracer = tracing_init_dentry(); 1220 d_tracer = tracing_init_dentry();
1145 if (!d_tracer) 1221 if (!d_tracer)
@@ -1179,12 +1255,34 @@ static __init int event_trace_init(void)
1179 /* The linker may leave blanks */ 1255 /* The linker may leave blanks */
1180 if (!call->name) 1256 if (!call->name)
1181 continue; 1257 continue;
1258 if (call->raw_init) {
1259 ret = call->raw_init();
1260 if (ret < 0) {
1261 if (ret != -ENOSYS)
1262 pr_warning("Could not initialize trace "
1263 "point events/%s\n", call->name);
1264 continue;
1265 }
1266 }
1182 list_add(&call->list, &ftrace_events); 1267 list_add(&call->list, &ftrace_events);
1183 event_create_dir(call, d_events, &ftrace_event_id_fops, 1268 event_create_dir(call, d_events, &ftrace_event_id_fops,
1184 &ftrace_enable_fops, &ftrace_event_filter_fops, 1269 &ftrace_enable_fops, &ftrace_event_filter_fops,
1185 &ftrace_event_format_fops); 1270 &ftrace_event_format_fops);
1186 } 1271 }
1187 1272
1273 while (true) {
1274 token = strsep(&buf, ",");
1275
1276 if (!token)
1277 break;
1278 if (!*token)
1279 continue;
1280
1281 ret = ftrace_set_clr_event(token, 1);
1282 if (ret)
1283 pr_warning("Failed to enable trace event: %s\n", token);
1284 }
1285
1188 ret = register_module_notifier(&trace_module_nb); 1286 ret = register_module_notifier(&trace_module_nb);
1189 if (ret) 1287 if (ret)
1190 pr_warning("Failed to register trace events module notifier\n"); 1288 pr_warning("Failed to register trace events module notifier\n");
@@ -1340,6 +1438,7 @@ static void
1340function_test_events_call(unsigned long ip, unsigned long parent_ip) 1438function_test_events_call(unsigned long ip, unsigned long parent_ip)
1341{ 1439{
1342 struct ring_buffer_event *event; 1440 struct ring_buffer_event *event;
1441 struct ring_buffer *buffer;
1343 struct ftrace_entry *entry; 1442 struct ftrace_entry *entry;
1344 unsigned long flags; 1443 unsigned long flags;
1345 long disabled; 1444 long disabled;
@@ -1357,7 +1456,8 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
1357 1456
1358 local_save_flags(flags); 1457 local_save_flags(flags);
1359 1458
1360 event = trace_current_buffer_lock_reserve(TRACE_FN, sizeof(*entry), 1459 event = trace_current_buffer_lock_reserve(&buffer,
1460 TRACE_FN, sizeof(*entry),
1361 flags, pc); 1461 flags, pc);
1362 if (!event) 1462 if (!event)
1363 goto out; 1463 goto out;
@@ -1365,7 +1465,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
1365 entry->ip = ip; 1465 entry->ip = ip;
1366 entry->parent_ip = parent_ip; 1466 entry->parent_ip = parent_ip;
1367 1467
1368 trace_nowake_buffer_unlock_commit(event, flags, pc); 1468 trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
1369 1469
1370 out: 1470 out:
1371 atomic_dec(&per_cpu(test_event_disable, cpu)); 1471 atomic_dec(&per_cpu(test_event_disable, cpu));
@@ -1392,10 +1492,10 @@ static __init void event_trace_self_test_with_function(void)
1392 1492
1393static __init int event_trace_self_tests_init(void) 1493static __init int event_trace_self_tests_init(void)
1394{ 1494{
1395 1495 if (!tracing_selftest_disabled) {
1396 event_trace_self_tests(); 1496 event_trace_self_tests();
1397 1497 event_trace_self_test_with_function();
1398 event_trace_self_test_with_function(); 1498 }
1399 1499
1400 return 0; 1500 return 0;
1401} 1501}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index f32dc9d1ea7..93660fbbf62 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -163,6 +163,20 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
163 return match; 163 return match;
164} 164}
165 165
166/* Filter predicate for char * pointers */
167static int filter_pred_pchar(struct filter_pred *pred, void *event,
168 int val1, int val2)
169{
170 char **addr = (char **)(event + pred->offset);
171 int cmp, match;
172
173 cmp = strncmp(*addr, pred->str_val, pred->str_len);
174
175 match = (!cmp) ^ pred->not;
176
177 return match;
178}
179
166/* 180/*
167 * Filter predicate for dynamic sized arrays of characters. 181 * Filter predicate for dynamic sized arrays of characters.
168 * These are implemented through a list of strings at the end 182 * These are implemented through a list of strings at the end
@@ -176,11 +190,13 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
176static int filter_pred_strloc(struct filter_pred *pred, void *event, 190static int filter_pred_strloc(struct filter_pred *pred, void *event,
177 int val1, int val2) 191 int val1, int val2)
178{ 192{
179 unsigned short str_loc = *(unsigned short *)(event + pred->offset); 193 u32 str_item = *(u32 *)(event + pred->offset);
194 int str_loc = str_item & 0xffff;
195 int str_len = str_item >> 16;
180 char *addr = (char *)(event + str_loc); 196 char *addr = (char *)(event + str_loc);
181 int cmp, match; 197 int cmp, match;
182 198
183 cmp = strncmp(addr, pred->str_val, pred->str_len); 199 cmp = strncmp(addr, pred->str_val, str_len);
184 200
185 match = (!cmp) ^ pred->not; 201 match = (!cmp) ^ pred->not;
186 202
@@ -293,7 +309,7 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
293 struct event_filter *filter = call->filter; 309 struct event_filter *filter = call->filter;
294 310
295 mutex_lock(&event_mutex); 311 mutex_lock(&event_mutex);
296 if (filter->filter_string) 312 if (filter && filter->filter_string)
297 trace_seq_printf(s, "%s\n", filter->filter_string); 313 trace_seq_printf(s, "%s\n", filter->filter_string);
298 else 314 else
299 trace_seq_printf(s, "none\n"); 315 trace_seq_printf(s, "none\n");
@@ -306,7 +322,7 @@ void print_subsystem_event_filter(struct event_subsystem *system,
306 struct event_filter *filter = system->filter; 322 struct event_filter *filter = system->filter;
307 323
308 mutex_lock(&event_mutex); 324 mutex_lock(&event_mutex);
309 if (filter->filter_string) 325 if (filter && filter->filter_string)
310 trace_seq_printf(s, "%s\n", filter->filter_string); 326 trace_seq_printf(s, "%s\n", filter->filter_string);
311 else 327 else
312 trace_seq_printf(s, "none\n"); 328 trace_seq_printf(s, "none\n");
@@ -374,6 +390,9 @@ void destroy_preds(struct ftrace_event_call *call)
374 struct event_filter *filter = call->filter; 390 struct event_filter *filter = call->filter;
375 int i; 391 int i;
376 392
393 if (!filter)
394 return;
395
377 for (i = 0; i < MAX_FILTER_PRED; i++) { 396 for (i = 0; i < MAX_FILTER_PRED; i++) {
378 if (filter->preds[i]) 397 if (filter->preds[i])
379 filter_free_pred(filter->preds[i]); 398 filter_free_pred(filter->preds[i]);
@@ -384,17 +403,19 @@ void destroy_preds(struct ftrace_event_call *call)
384 call->filter = NULL; 403 call->filter = NULL;
385} 404}
386 405
387int init_preds(struct ftrace_event_call *call) 406static int init_preds(struct ftrace_event_call *call)
388{ 407{
389 struct event_filter *filter; 408 struct event_filter *filter;
390 struct filter_pred *pred; 409 struct filter_pred *pred;
391 int i; 410 int i;
392 411
412 if (call->filter)
413 return 0;
414
393 filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL); 415 filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
394 if (!call->filter) 416 if (!call->filter)
395 return -ENOMEM; 417 return -ENOMEM;
396 418
397 call->filter_active = 0;
398 filter->n_preds = 0; 419 filter->n_preds = 0;
399 420
400 filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); 421 filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
@@ -416,30 +437,55 @@ oom:
416 437
417 return -ENOMEM; 438 return -ENOMEM;
418} 439}
419EXPORT_SYMBOL_GPL(init_preds);
420 440
421static void filter_free_subsystem_preds(struct event_subsystem *system) 441static int init_subsystem_preds(struct event_subsystem *system)
422{ 442{
423 struct event_filter *filter = system->filter;
424 struct ftrace_event_call *call; 443 struct ftrace_event_call *call;
425 int i; 444 int err;
426 445
427 if (filter->n_preds) { 446 list_for_each_entry(call, &ftrace_events, list) {
428 for (i = 0; i < filter->n_preds; i++) 447 if (!call->define_fields)
429 filter_free_pred(filter->preds[i]); 448 continue;
430 kfree(filter->preds); 449
431 filter->preds = NULL; 450 if (strcmp(call->system, system->name) != 0)
432 filter->n_preds = 0; 451 continue;
452
453 err = init_preds(call);
454 if (err)
455 return err;
433 } 456 }
434 457
458 return 0;
459}
460
461enum {
462 FILTER_DISABLE_ALL,
463 FILTER_INIT_NO_RESET,
464 FILTER_SKIP_NO_RESET,
465};
466
467static void filter_free_subsystem_preds(struct event_subsystem *system,
468 int flag)
469{
470 struct ftrace_event_call *call;
471
435 list_for_each_entry(call, &ftrace_events, list) { 472 list_for_each_entry(call, &ftrace_events, list) {
436 if (!call->define_fields) 473 if (!call->define_fields)
437 continue; 474 continue;
438 475
439 if (!strcmp(call->system, system->name)) { 476 if (strcmp(call->system, system->name) != 0)
440 filter_disable_preds(call); 477 continue;
441 remove_filter_string(call->filter); 478
479 if (flag == FILTER_INIT_NO_RESET) {
480 call->filter->no_reset = false;
481 continue;
442 } 482 }
483
484 if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset)
485 continue;
486
487 filter_disable_preds(call);
488 remove_filter_string(call->filter);
443 } 489 }
444} 490}
445 491
@@ -468,12 +514,7 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
468 return 0; 514 return 0;
469} 515}
470 516
471enum { 517int filter_assign_type(const char *type)
472 FILTER_STATIC_STRING = 1,
473 FILTER_DYN_STRING
474};
475
476static int is_string_field(const char *type)
477{ 518{
478 if (strstr(type, "__data_loc") && strstr(type, "char")) 519 if (strstr(type, "__data_loc") && strstr(type, "char"))
479 return FILTER_DYN_STRING; 520 return FILTER_DYN_STRING;
@@ -481,12 +522,19 @@ static int is_string_field(const char *type)
481 if (strchr(type, '[') && strstr(type, "char")) 522 if (strchr(type, '[') && strstr(type, "char"))
482 return FILTER_STATIC_STRING; 523 return FILTER_STATIC_STRING;
483 524
484 return 0; 525 return FILTER_OTHER;
526}
527
528static bool is_string_field(struct ftrace_event_field *field)
529{
530 return field->filter_type == FILTER_DYN_STRING ||
531 field->filter_type == FILTER_STATIC_STRING ||
532 field->filter_type == FILTER_PTR_STRING;
485} 533}
486 534
487static int is_legal_op(struct ftrace_event_field *field, int op) 535static int is_legal_op(struct ftrace_event_field *field, int op)
488{ 536{
489 if (is_string_field(field->type) && (op != OP_EQ && op != OP_NE)) 537 if (is_string_field(field) && (op != OP_EQ && op != OP_NE))
490 return 0; 538 return 0;
491 539
492 return 1; 540 return 1;
@@ -537,22 +585,24 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
537 585
538static int filter_add_pred(struct filter_parse_state *ps, 586static int filter_add_pred(struct filter_parse_state *ps,
539 struct ftrace_event_call *call, 587 struct ftrace_event_call *call,
540 struct filter_pred *pred) 588 struct filter_pred *pred,
589 bool dry_run)
541{ 590{
542 struct ftrace_event_field *field; 591 struct ftrace_event_field *field;
543 filter_pred_fn_t fn; 592 filter_pred_fn_t fn;
544 unsigned long long val; 593 unsigned long long val;
545 int string_type;
546 int ret; 594 int ret;
547 595
548 pred->fn = filter_pred_none; 596 pred->fn = filter_pred_none;
549 597
550 if (pred->op == OP_AND) { 598 if (pred->op == OP_AND) {
551 pred->pop_n = 2; 599 pred->pop_n = 2;
552 return filter_add_pred_fn(ps, call, pred, filter_pred_and); 600 fn = filter_pred_and;
601 goto add_pred_fn;
553 } else if (pred->op == OP_OR) { 602 } else if (pred->op == OP_OR) {
554 pred->pop_n = 2; 603 pred->pop_n = 2;
555 return filter_add_pred_fn(ps, call, pred, filter_pred_or); 604 fn = filter_pred_or;
605 goto add_pred_fn;
556 } 606 }
557 607
558 field = find_event_field(call, pred->field_name); 608 field = find_event_field(call, pred->field_name);
@@ -568,16 +618,17 @@ static int filter_add_pred(struct filter_parse_state *ps,
568 return -EINVAL; 618 return -EINVAL;
569 } 619 }
570 620
571 string_type = is_string_field(field->type); 621 if (is_string_field(field)) {
572 if (string_type) { 622 pred->str_len = field->size;
573 if (string_type == FILTER_STATIC_STRING) 623
624 if (field->filter_type == FILTER_STATIC_STRING)
574 fn = filter_pred_string; 625 fn = filter_pred_string;
575 else 626 else if (field->filter_type == FILTER_DYN_STRING)
576 fn = filter_pred_strloc; 627 fn = filter_pred_strloc;
577 pred->str_len = field->size; 628 else {
578 if (pred->op == OP_NE) 629 fn = filter_pred_pchar;
579 pred->not = 1; 630 pred->str_len = strlen(pred->str_val);
580 return filter_add_pred_fn(ps, call, pred, fn); 631 }
581 } else { 632 } else {
582 if (field->is_signed) 633 if (field->is_signed)
583 ret = strict_strtoll(pred->str_val, 0, &val); 634 ret = strict_strtoll(pred->str_val, 0, &val);
@@ -588,41 +639,33 @@ static int filter_add_pred(struct filter_parse_state *ps,
588 return -EINVAL; 639 return -EINVAL;
589 } 640 }
590 pred->val = val; 641 pred->val = val;
591 }
592 642
593 fn = select_comparison_fn(pred->op, field->size, field->is_signed); 643 fn = select_comparison_fn(pred->op, field->size,
594 if (!fn) { 644 field->is_signed);
595 parse_error(ps, FILT_ERR_INVALID_OP, 0); 645 if (!fn) {
596 return -EINVAL; 646 parse_error(ps, FILT_ERR_INVALID_OP, 0);
647 return -EINVAL;
648 }
597 } 649 }
598 650
599 if (pred->op == OP_NE) 651 if (pred->op == OP_NE)
600 pred->not = 1; 652 pred->not = 1;
601 653
602 return filter_add_pred_fn(ps, call, pred, fn); 654add_pred_fn:
655 if (!dry_run)
656 return filter_add_pred_fn(ps, call, pred, fn);
657 return 0;
603} 658}
604 659
605static int filter_add_subsystem_pred(struct filter_parse_state *ps, 660static int filter_add_subsystem_pred(struct filter_parse_state *ps,
606 struct event_subsystem *system, 661 struct event_subsystem *system,
607 struct filter_pred *pred, 662 struct filter_pred *pred,
608 char *filter_string) 663 char *filter_string,
664 bool dry_run)
609{ 665{
610 struct event_filter *filter = system->filter;
611 struct ftrace_event_call *call; 666 struct ftrace_event_call *call;
612 int err = 0; 667 int err = 0;
613 668 bool fail = true;
614 if (!filter->preds) {
615 filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
616 GFP_KERNEL);
617
618 if (!filter->preds)
619 return -ENOMEM;
620 }
621
622 if (filter->n_preds == MAX_FILTER_PRED) {
623 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
624 return -ENOSPC;
625 }
626 669
627 list_for_each_entry(call, &ftrace_events, list) { 670 list_for_each_entry(call, &ftrace_events, list) {
628 671
@@ -632,19 +675,24 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
632 if (strcmp(call->system, system->name)) 675 if (strcmp(call->system, system->name))
633 continue; 676 continue;
634 677
635 err = filter_add_pred(ps, call, pred); 678 if (call->filter->no_reset)
636 if (err) { 679 continue;
637 filter_free_subsystem_preds(system); 680
638 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); 681 err = filter_add_pred(ps, call, pred, dry_run);
639 goto out; 682 if (err)
640 } 683 call->filter->no_reset = true;
641 replace_filter_string(call->filter, filter_string); 684 else
685 fail = false;
686
687 if (!dry_run)
688 replace_filter_string(call->filter, filter_string);
642 } 689 }
643 690
644 filter->preds[filter->n_preds] = pred; 691 if (fail) {
645 filter->n_preds++; 692 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
646out: 693 return err;
647 return err; 694 }
695 return 0;
648} 696}
649 697
650static void parse_init(struct filter_parse_state *ps, 698static void parse_init(struct filter_parse_state *ps,
@@ -1003,12 +1051,14 @@ static int check_preds(struct filter_parse_state *ps)
1003static int replace_preds(struct event_subsystem *system, 1051static int replace_preds(struct event_subsystem *system,
1004 struct ftrace_event_call *call, 1052 struct ftrace_event_call *call,
1005 struct filter_parse_state *ps, 1053 struct filter_parse_state *ps,
1006 char *filter_string) 1054 char *filter_string,
1055 bool dry_run)
1007{ 1056{
1008 char *operand1 = NULL, *operand2 = NULL; 1057 char *operand1 = NULL, *operand2 = NULL;
1009 struct filter_pred *pred; 1058 struct filter_pred *pred;
1010 struct postfix_elt *elt; 1059 struct postfix_elt *elt;
1011 int err; 1060 int err;
1061 int n_preds = 0;
1012 1062
1013 err = check_preds(ps); 1063 err = check_preds(ps);
1014 if (err) 1064 if (err)
@@ -1027,24 +1077,14 @@ static int replace_preds(struct event_subsystem *system,
1027 continue; 1077 continue;
1028 } 1078 }
1029 1079
1080 if (n_preds++ == MAX_FILTER_PRED) {
1081 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
1082 return -ENOSPC;
1083 }
1084
1030 if (elt->op == OP_AND || elt->op == OP_OR) { 1085 if (elt->op == OP_AND || elt->op == OP_OR) {
1031 pred = create_logical_pred(elt->op); 1086 pred = create_logical_pred(elt->op);
1032 if (!pred) 1087 goto add_pred;
1033 return -ENOMEM;
1034 if (call) {
1035 err = filter_add_pred(ps, call, pred);
1036 filter_free_pred(pred);
1037 } else {
1038 err = filter_add_subsystem_pred(ps, system,
1039 pred, filter_string);
1040 if (err)
1041 filter_free_pred(pred);
1042 }
1043 if (err)
1044 return err;
1045
1046 operand1 = operand2 = NULL;
1047 continue;
1048 } 1088 }
1049 1089
1050 if (!operand1 || !operand2) { 1090 if (!operand1 || !operand2) {
@@ -1053,17 +1093,15 @@ static int replace_preds(struct event_subsystem *system,
1053 } 1093 }
1054 1094
1055 pred = create_pred(elt->op, operand1, operand2); 1095 pred = create_pred(elt->op, operand1, operand2);
1096add_pred:
1056 if (!pred) 1097 if (!pred)
1057 return -ENOMEM; 1098 return -ENOMEM;
1058 if (call) { 1099 if (call)
1059 err = filter_add_pred(ps, call, pred); 1100 err = filter_add_pred(ps, call, pred, false);
1060 filter_free_pred(pred); 1101 else
1061 } else {
1062 err = filter_add_subsystem_pred(ps, system, pred, 1102 err = filter_add_subsystem_pred(ps, system, pred,
1063 filter_string); 1103 filter_string, dry_run);
1064 if (err) 1104 filter_free_pred(pred);
1065 filter_free_pred(pred);
1066 }
1067 if (err) 1105 if (err)
1068 return err; 1106 return err;
1069 1107
@@ -1081,6 +1119,10 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1081 1119
1082 mutex_lock(&event_mutex); 1120 mutex_lock(&event_mutex);
1083 1121
1122 err = init_preds(call);
1123 if (err)
1124 goto out_unlock;
1125
1084 if (!strcmp(strstrip(filter_string), "0")) { 1126 if (!strcmp(strstrip(filter_string), "0")) {
1085 filter_disable_preds(call); 1127 filter_disable_preds(call);
1086 remove_filter_string(call->filter); 1128 remove_filter_string(call->filter);
@@ -1103,7 +1145,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1103 goto out; 1145 goto out;
1104 } 1146 }
1105 1147
1106 err = replace_preds(NULL, call, ps, filter_string); 1148 err = replace_preds(NULL, call, ps, filter_string, false);
1107 if (err) 1149 if (err)
1108 append_filter_err(ps, call->filter); 1150 append_filter_err(ps, call->filter);
1109 1151
@@ -1126,8 +1168,12 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1126 1168
1127 mutex_lock(&event_mutex); 1169 mutex_lock(&event_mutex);
1128 1170
1171 err = init_subsystem_preds(system);
1172 if (err)
1173 goto out_unlock;
1174
1129 if (!strcmp(strstrip(filter_string), "0")) { 1175 if (!strcmp(strstrip(filter_string), "0")) {
1130 filter_free_subsystem_preds(system); 1176 filter_free_subsystem_preds(system, FILTER_DISABLE_ALL);
1131 remove_filter_string(system->filter); 1177 remove_filter_string(system->filter);
1132 mutex_unlock(&event_mutex); 1178 mutex_unlock(&event_mutex);
1133 return 0; 1179 return 0;
@@ -1138,7 +1184,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1138 if (!ps) 1184 if (!ps)
1139 goto out_unlock; 1185 goto out_unlock;
1140 1186
1141 filter_free_subsystem_preds(system);
1142 replace_filter_string(system->filter, filter_string); 1187 replace_filter_string(system->filter, filter_string);
1143 1188
1144 parse_init(ps, filter_ops, filter_string); 1189 parse_init(ps, filter_ops, filter_string);
@@ -1148,9 +1193,23 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1148 goto out; 1193 goto out;
1149 } 1194 }
1150 1195
1151 err = replace_preds(system, NULL, ps, filter_string); 1196 filter_free_subsystem_preds(system, FILTER_INIT_NO_RESET);
1152 if (err) 1197
1198 /* try to see the filter can be applied to which events */
1199 err = replace_preds(system, NULL, ps, filter_string, true);
1200 if (err) {
1153 append_filter_err(ps, system->filter); 1201 append_filter_err(ps, system->filter);
1202 goto out;
1203 }
1204
1205 filter_free_subsystem_preds(system, FILTER_SKIP_NO_RESET);
1206
1207 /* really apply the filter to the events */
1208 err = replace_preds(system, NULL, ps, filter_string, false);
1209 if (err) {
1210 append_filter_err(ps, system->filter);
1211 filter_free_subsystem_preds(system, 2);
1212 }
1154 1213
1155out: 1214out:
1156 filter_opstack_clear(ps); 1215 filter_opstack_clear(ps);
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d06cf898dc8..df1bf6e48bb 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -60,7 +60,8 @@ extern void __bad_type_size(void);
60#undef TRACE_EVENT_FORMAT 60#undef TRACE_EVENT_FORMAT
61#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ 61#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
62static int \ 62static int \
63ftrace_format_##call(struct trace_seq *s) \ 63ftrace_format_##call(struct ftrace_event_call *unused, \
64 struct trace_seq *s) \
64{ \ 65{ \
65 struct args field; \ 66 struct args field; \
66 int ret; \ 67 int ret; \
@@ -76,7 +77,8 @@ ftrace_format_##call(struct trace_seq *s) \
76#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ 77#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \
77 tpfmt) \ 78 tpfmt) \
78static int \ 79static int \
79ftrace_format_##call(struct trace_seq *s) \ 80ftrace_format_##call(struct ftrace_event_call *unused, \
81 struct trace_seq *s) \
80{ \ 82{ \
81 struct args field; \ 83 struct args field; \
82 int ret; \ 84 int ret; \
@@ -117,7 +119,7 @@ ftrace_format_##call(struct trace_seq *s) \
117 119
118#undef TRACE_EVENT_FORMAT 120#undef TRACE_EVENT_FORMAT
119#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ 121#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
120int ftrace_define_fields_##call(void); \ 122int ftrace_define_fields_##call(struct ftrace_event_call *event_call); \
121static int ftrace_raw_init_event_##call(void); \ 123static int ftrace_raw_init_event_##call(void); \
122 \ 124 \
123struct ftrace_event_call __used \ 125struct ftrace_event_call __used \
@@ -133,7 +135,6 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
133static int ftrace_raw_init_event_##call(void) \ 135static int ftrace_raw_init_event_##call(void) \
134{ \ 136{ \
135 INIT_LIST_HEAD(&event_##call.fields); \ 137 INIT_LIST_HEAD(&event_##call.fields); \
136 init_preds(&event_##call); \
137 return 0; \ 138 return 0; \
138} \ 139} \
139 140
@@ -156,7 +157,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
156#define TRACE_FIELD(type, item, assign) \ 157#define TRACE_FIELD(type, item, assign) \
157 ret = trace_define_field(event_call, #type, #item, \ 158 ret = trace_define_field(event_call, #type, #item, \
158 offsetof(typeof(field), item), \ 159 offsetof(typeof(field), item), \
159 sizeof(field.item), is_signed_type(type)); \ 160 sizeof(field.item), \
161 is_signed_type(type), FILTER_OTHER); \
160 if (ret) \ 162 if (ret) \
161 return ret; 163 return ret;
162 164
@@ -164,7 +166,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
164#define TRACE_FIELD_SPECIAL(type, item, len, cmd) \ 166#define TRACE_FIELD_SPECIAL(type, item, len, cmd) \
165 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 167 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
166 offsetof(typeof(field), item), \ 168 offsetof(typeof(field), item), \
167 sizeof(field.item), 0); \ 169 sizeof(field.item), 0, FILTER_OTHER); \
168 if (ret) \ 170 if (ret) \
169 return ret; 171 return ret;
170 172
@@ -172,7 +174,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
172#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ 174#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \
173 ret = trace_define_field(event_call, #type, #item, \ 175 ret = trace_define_field(event_call, #type, #item, \
174 offsetof(typeof(field), item), \ 176 offsetof(typeof(field), item), \
175 sizeof(field.item), is_signed); \ 177 sizeof(field.item), is_signed, \
178 FILTER_OTHER); \
176 if (ret) \ 179 if (ret) \
177 return ret; 180 return ret;
178 181
@@ -182,17 +185,14 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
182#undef TRACE_EVENT_FORMAT 185#undef TRACE_EVENT_FORMAT
183#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ 186#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
184int \ 187int \
185ftrace_define_fields_##call(void) \ 188ftrace_define_fields_##call(struct ftrace_event_call *event_call) \
186{ \ 189{ \
187 struct ftrace_event_call *event_call = &event_##call; \
188 struct args field; \ 190 struct args field; \
189 int ret; \ 191 int ret; \
190 \ 192 \
191 __common_field(unsigned char, type, 0); \ 193 ret = trace_define_common_fields(event_call); \
192 __common_field(unsigned char, flags, 0); \ 194 if (ret) \
193 __common_field(unsigned char, preempt_count, 0); \ 195 return ret; \
194 __common_field(int, pid, 1); \
195 __common_field(int, tgid, 1); \
196 \ 196 \
197 tstruct; \ 197 tstruct; \
198 \ 198 \
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 75ef000613c..5b01b94518f 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -288,11 +288,9 @@ static int
288ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip, 288ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
289 struct ftrace_probe_ops *ops, void *data) 289 struct ftrace_probe_ops *ops, void *data)
290{ 290{
291 char str[KSYM_SYMBOL_LEN];
292 long count = (long)data; 291 long count = (long)data;
293 292
294 kallsyms_lookup(ip, NULL, NULL, NULL, str); 293 seq_printf(m, "%pf:", (void *)ip);
295 seq_printf(m, "%s:", str);
296 294
297 if (ops == &traceon_probe_ops) 295 if (ops == &traceon_probe_ops)
298 seq_printf(m, "traceon"); 296 seq_printf(m, "traceon");
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 420ec348757..b3749a2c313 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -52,7 +52,7 @@ static struct tracer_flags tracer_flags = {
52 .opts = trace_opts 52 .opts = trace_opts
53}; 53};
54 54
55/* pid on the last trace processed */ 55static struct trace_array *graph_array;
56 56
57 57
58/* Add a function return address to the trace stack on thread info.*/ 58/* Add a function return address to the trace stack on thread info.*/
@@ -166,10 +166,123 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
166 return ret; 166 return ret;
167} 167}
168 168
169static int __trace_graph_entry(struct trace_array *tr,
170 struct ftrace_graph_ent *trace,
171 unsigned long flags,
172 int pc)
173{
174 struct ftrace_event_call *call = &event_funcgraph_entry;
175 struct ring_buffer_event *event;
176 struct ring_buffer *buffer = tr->buffer;
177 struct ftrace_graph_ent_entry *entry;
178
179 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
180 return 0;
181
182 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
183 sizeof(*entry), flags, pc);
184 if (!event)
185 return 0;
186 entry = ring_buffer_event_data(event);
187 entry->graph_ent = *trace;
188 if (!filter_current_check_discard(buffer, call, entry, event))
189 ring_buffer_unlock_commit(buffer, event);
190
191 return 1;
192}
193
194int trace_graph_entry(struct ftrace_graph_ent *trace)
195{
196 struct trace_array *tr = graph_array;
197 struct trace_array_cpu *data;
198 unsigned long flags;
199 long disabled;
200 int ret;
201 int cpu;
202 int pc;
203
204 if (unlikely(!tr))
205 return 0;
206
207 if (!ftrace_trace_task(current))
208 return 0;
209
210 if (!ftrace_graph_addr(trace->func))
211 return 0;
212
213 local_irq_save(flags);
214 cpu = raw_smp_processor_id();
215 data = tr->data[cpu];
216 disabled = atomic_inc_return(&data->disabled);
217 if (likely(disabled == 1)) {
218 pc = preempt_count();
219 ret = __trace_graph_entry(tr, trace, flags, pc);
220 } else {
221 ret = 0;
222 }
223 /* Only do the atomic if it is not already set */
224 if (!test_tsk_trace_graph(current))
225 set_tsk_trace_graph(current);
226
227 atomic_dec(&data->disabled);
228 local_irq_restore(flags);
229
230 return ret;
231}
232
233static void __trace_graph_return(struct trace_array *tr,
234 struct ftrace_graph_ret *trace,
235 unsigned long flags,
236 int pc)
237{
238 struct ftrace_event_call *call = &event_funcgraph_exit;
239 struct ring_buffer_event *event;
240 struct ring_buffer *buffer = tr->buffer;
241 struct ftrace_graph_ret_entry *entry;
242
243 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
244 return;
245
246 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
247 sizeof(*entry), flags, pc);
248 if (!event)
249 return;
250 entry = ring_buffer_event_data(event);
251 entry->ret = *trace;
252 if (!filter_current_check_discard(buffer, call, entry, event))
253 ring_buffer_unlock_commit(buffer, event);
254}
255
256void trace_graph_return(struct ftrace_graph_ret *trace)
257{
258 struct trace_array *tr = graph_array;
259 struct trace_array_cpu *data;
260 unsigned long flags;
261 long disabled;
262 int cpu;
263 int pc;
264
265 local_irq_save(flags);
266 cpu = raw_smp_processor_id();
267 data = tr->data[cpu];
268 disabled = atomic_inc_return(&data->disabled);
269 if (likely(disabled == 1)) {
270 pc = preempt_count();
271 __trace_graph_return(tr, trace, flags, pc);
272 }
273 if (!trace->depth)
274 clear_tsk_trace_graph(current);
275 atomic_dec(&data->disabled);
276 local_irq_restore(flags);
277}
278
169static int graph_trace_init(struct trace_array *tr) 279static int graph_trace_init(struct trace_array *tr)
170{ 280{
171 int ret = register_ftrace_graph(&trace_graph_return, 281 int ret;
172 &trace_graph_entry); 282
283 graph_array = tr;
284 ret = register_ftrace_graph(&trace_graph_return,
285 &trace_graph_entry);
173 if (ret) 286 if (ret)
174 return ret; 287 return ret;
175 tracing_start_cmdline_record(); 288 tracing_start_cmdline_record();
@@ -177,49 +290,30 @@ static int graph_trace_init(struct trace_array *tr)
177 return 0; 290 return 0;
178} 291}
179 292
293void set_graph_array(struct trace_array *tr)
294{
295 graph_array = tr;
296}
297
180static void graph_trace_reset(struct trace_array *tr) 298static void graph_trace_reset(struct trace_array *tr)
181{ 299{
182 tracing_stop_cmdline_record(); 300 tracing_stop_cmdline_record();
183 unregister_ftrace_graph(); 301 unregister_ftrace_graph();
184} 302}
185 303
186static inline int log10_cpu(int nb) 304static int max_bytes_for_cpu;
187{
188 if (nb / 100)
189 return 3;
190 if (nb / 10)
191 return 2;
192 return 1;
193}
194 305
195static enum print_line_t 306static enum print_line_t
196print_graph_cpu(struct trace_seq *s, int cpu) 307print_graph_cpu(struct trace_seq *s, int cpu)
197{ 308{
198 int i;
199 int ret; 309 int ret;
200 int log10_this = log10_cpu(cpu);
201 int log10_all = log10_cpu(cpumask_weight(cpu_online_mask));
202
203 310
204 /* 311 /*
205 * Start with a space character - to make it stand out 312 * Start with a space character - to make it stand out
206 * to the right a bit when trace output is pasted into 313 * to the right a bit when trace output is pasted into
207 * email: 314 * email:
208 */ 315 */
209 ret = trace_seq_printf(s, " "); 316 ret = trace_seq_printf(s, " %*d) ", max_bytes_for_cpu, cpu);
210
211 /*
212 * Tricky - we space the CPU field according to the max
213 * number of online CPUs. On a 2-cpu system it would take
214 * a maximum of 1 digit - on a 128 cpu system it would
215 * take up to 3 digits:
216 */
217 for (i = 0; i < log10_all - log10_this; i++) {
218 ret = trace_seq_printf(s, " ");
219 if (!ret)
220 return TRACE_TYPE_PARTIAL_LINE;
221 }
222 ret = trace_seq_printf(s, "%d) ", cpu);
223 if (!ret) 317 if (!ret)
224 return TRACE_TYPE_PARTIAL_LINE; 318 return TRACE_TYPE_PARTIAL_LINE;
225 319
@@ -565,11 +659,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
565 return TRACE_TYPE_PARTIAL_LINE; 659 return TRACE_TYPE_PARTIAL_LINE;
566 } 660 }
567 661
568 ret = seq_print_ip_sym(s, call->func, 0); 662 ret = trace_seq_printf(s, "%pf();\n", (void *)call->func);
569 if (!ret)
570 return TRACE_TYPE_PARTIAL_LINE;
571
572 ret = trace_seq_printf(s, "();\n");
573 if (!ret) 663 if (!ret)
574 return TRACE_TYPE_PARTIAL_LINE; 664 return TRACE_TYPE_PARTIAL_LINE;
575 665
@@ -612,11 +702,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
612 return TRACE_TYPE_PARTIAL_LINE; 702 return TRACE_TYPE_PARTIAL_LINE;
613 } 703 }
614 704
615 ret = seq_print_ip_sym(s, call->func, 0); 705 ret = trace_seq_printf(s, "%pf() {\n", (void *)call->func);
616 if (!ret)
617 return TRACE_TYPE_PARTIAL_LINE;
618
619 ret = trace_seq_printf(s, "() {\n");
620 if (!ret) 706 if (!ret)
621 return TRACE_TYPE_PARTIAL_LINE; 707 return TRACE_TYPE_PARTIAL_LINE;
622 708
@@ -934,6 +1020,8 @@ static struct tracer graph_trace __read_mostly = {
934 1020
935static __init int init_graph_trace(void) 1021static __init int init_graph_trace(void)
936{ 1022{
1023 max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
1024
937 return register_tracer(&graph_trace); 1025 return register_tracer(&graph_trace);
938} 1026}
939 1027
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index b923d13e2fa..5555b75a0d1 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -178,7 +178,6 @@ out_unlock:
178out: 178out:
179 data->critical_sequence = max_sequence; 179 data->critical_sequence = max_sequence;
180 data->preempt_timestamp = ftrace_now(cpu); 180 data->preempt_timestamp = ftrace_now(cpu);
181 tracing_reset(tr, cpu);
182 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); 181 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
183} 182}
184 183
@@ -208,7 +207,6 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
208 data->critical_sequence = max_sequence; 207 data->critical_sequence = max_sequence;
209 data->preempt_timestamp = ftrace_now(cpu); 208 data->preempt_timestamp = ftrace_now(cpu);
210 data->critical_start = parent_ip ? : ip; 209 data->critical_start = parent_ip ? : ip;
211 tracing_reset(tr, cpu);
212 210
213 local_save_flags(flags); 211 local_save_flags(flags);
214 212
@@ -379,6 +377,7 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
379 irqsoff_trace = tr; 377 irqsoff_trace = tr;
380 /* make sure that the tracer is visible */ 378 /* make sure that the tracer is visible */
381 smp_wmb(); 379 smp_wmb();
380 tracing_reset_online_cpus(tr);
382 start_irqsoff_tracer(tr); 381 start_irqsoff_tracer(tr);
383} 382}
384 383
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index d53b45ed080..c4c9bbda53d 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -307,11 +307,12 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
307 struct trace_array_cpu *data, 307 struct trace_array_cpu *data,
308 struct mmiotrace_rw *rw) 308 struct mmiotrace_rw *rw)
309{ 309{
310 struct ring_buffer *buffer = tr->buffer;
310 struct ring_buffer_event *event; 311 struct ring_buffer_event *event;
311 struct trace_mmiotrace_rw *entry; 312 struct trace_mmiotrace_rw *entry;
312 int pc = preempt_count(); 313 int pc = preempt_count();
313 314
314 event = trace_buffer_lock_reserve(tr, TRACE_MMIO_RW, 315 event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_RW,
315 sizeof(*entry), 0, pc); 316 sizeof(*entry), 0, pc);
316 if (!event) { 317 if (!event) {
317 atomic_inc(&dropped_count); 318 atomic_inc(&dropped_count);
@@ -319,7 +320,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
319 } 320 }
320 entry = ring_buffer_event_data(event); 321 entry = ring_buffer_event_data(event);
321 entry->rw = *rw; 322 entry->rw = *rw;
322 trace_buffer_unlock_commit(tr, event, 0, pc); 323 trace_buffer_unlock_commit(buffer, event, 0, pc);
323} 324}
324 325
325void mmio_trace_rw(struct mmiotrace_rw *rw) 326void mmio_trace_rw(struct mmiotrace_rw *rw)
@@ -333,11 +334,12 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
333 struct trace_array_cpu *data, 334 struct trace_array_cpu *data,
334 struct mmiotrace_map *map) 335 struct mmiotrace_map *map)
335{ 336{
337 struct ring_buffer *buffer = tr->buffer;
336 struct ring_buffer_event *event; 338 struct ring_buffer_event *event;
337 struct trace_mmiotrace_map *entry; 339 struct trace_mmiotrace_map *entry;
338 int pc = preempt_count(); 340 int pc = preempt_count();
339 341
340 event = trace_buffer_lock_reserve(tr, TRACE_MMIO_MAP, 342 event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_MAP,
341 sizeof(*entry), 0, pc); 343 sizeof(*entry), 0, pc);
342 if (!event) { 344 if (!event) {
343 atomic_inc(&dropped_count); 345 atomic_inc(&dropped_count);
@@ -345,7 +347,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
345 } 347 }
346 entry = ring_buffer_event_data(event); 348 entry = ring_buffer_event_data(event);
347 entry->map = *map; 349 entry->map = *map;
348 trace_buffer_unlock_commit(tr, event, 0, pc); 350 trace_buffer_unlock_commit(buffer, event, 0, pc);
349} 351}
350 352
351void mmio_trace_mapping(struct mmiotrace_map *map) 353void mmio_trace_mapping(struct mmiotrace_map *map)
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index 8a30d9874cd..fe1a00f1445 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -38,6 +38,7 @@ static void probe_power_end(struct power_trace *it)
38{ 38{
39 struct ftrace_event_call *call = &event_power; 39 struct ftrace_event_call *call = &event_power;
40 struct ring_buffer_event *event; 40 struct ring_buffer_event *event;
41 struct ring_buffer *buffer;
41 struct trace_power *entry; 42 struct trace_power *entry;
42 struct trace_array_cpu *data; 43 struct trace_array_cpu *data;
43 struct trace_array *tr = power_trace; 44 struct trace_array *tr = power_trace;
@@ -45,18 +46,20 @@ static void probe_power_end(struct power_trace *it)
45 if (!trace_power_enabled) 46 if (!trace_power_enabled)
46 return; 47 return;
47 48
49 buffer = tr->buffer;
50
48 preempt_disable(); 51 preempt_disable();
49 it->end = ktime_get(); 52 it->end = ktime_get();
50 data = tr->data[smp_processor_id()]; 53 data = tr->data[smp_processor_id()];
51 54
52 event = trace_buffer_lock_reserve(tr, TRACE_POWER, 55 event = trace_buffer_lock_reserve(buffer, TRACE_POWER,
53 sizeof(*entry), 0, 0); 56 sizeof(*entry), 0, 0);
54 if (!event) 57 if (!event)
55 goto out; 58 goto out;
56 entry = ring_buffer_event_data(event); 59 entry = ring_buffer_event_data(event);
57 entry->state_data = *it; 60 entry->state_data = *it;
58 if (!filter_check_discard(call, entry, tr->buffer, event)) 61 if (!filter_check_discard(call, entry, buffer, event))
59 trace_buffer_unlock_commit(tr, event, 0, 0); 62 trace_buffer_unlock_commit(buffer, event, 0, 0);
60 out: 63 out:
61 preempt_enable(); 64 preempt_enable();
62} 65}
@@ -66,6 +69,7 @@ static void probe_power_mark(struct power_trace *it, unsigned int type,
66{ 69{
67 struct ftrace_event_call *call = &event_power; 70 struct ftrace_event_call *call = &event_power;
68 struct ring_buffer_event *event; 71 struct ring_buffer_event *event;
72 struct ring_buffer *buffer;
69 struct trace_power *entry; 73 struct trace_power *entry;
70 struct trace_array_cpu *data; 74 struct trace_array_cpu *data;
71 struct trace_array *tr = power_trace; 75 struct trace_array *tr = power_trace;
@@ -73,6 +77,8 @@ static void probe_power_mark(struct power_trace *it, unsigned int type,
73 if (!trace_power_enabled) 77 if (!trace_power_enabled)
74 return; 78 return;
75 79
80 buffer = tr->buffer;
81
76 memset(it, 0, sizeof(struct power_trace)); 82 memset(it, 0, sizeof(struct power_trace));
77 it->state = level; 83 it->state = level;
78 it->type = type; 84 it->type = type;
@@ -81,14 +87,14 @@ static void probe_power_mark(struct power_trace *it, unsigned int type,
81 it->end = it->stamp; 87 it->end = it->stamp;
82 data = tr->data[smp_processor_id()]; 88 data = tr->data[smp_processor_id()];
83 89
84 event = trace_buffer_lock_reserve(tr, TRACE_POWER, 90 event = trace_buffer_lock_reserve(buffer, TRACE_POWER,
85 sizeof(*entry), 0, 0); 91 sizeof(*entry), 0, 0);
86 if (!event) 92 if (!event)
87 goto out; 93 goto out;
88 entry = ring_buffer_event_data(event); 94 entry = ring_buffer_event_data(event);
89 entry->state_data = *it; 95 entry->state_data = *it;
90 if (!filter_check_discard(call, entry, tr->buffer, event)) 96 if (!filter_check_discard(call, entry, buffer, event))
91 trace_buffer_unlock_commit(tr, event, 0, 0); 97 trace_buffer_unlock_commit(buffer, event, 0, 0);
92 out: 98 out:
93 preempt_enable(); 99 preempt_enable();
94} 100}
@@ -144,14 +150,12 @@ static void power_trace_reset(struct trace_array *tr)
144 150
145static int power_trace_init(struct trace_array *tr) 151static int power_trace_init(struct trace_array *tr)
146{ 152{
147 int cpu;
148 power_trace = tr; 153 power_trace = tr;
149 154
150 trace_power_enabled = 1; 155 trace_power_enabled = 1;
151 tracing_power_register(); 156 tracing_power_register();
152 157
153 for_each_cpu(cpu, cpu_possible_mask) 158 tracing_reset_online_cpus(tr);
154 tracing_reset(tr, cpu);
155 return 0; 159 return 0;
156} 160}
157 161
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index a98106dd979..5fca0f51fde 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -20,6 +20,35 @@ static int sched_ref;
20static DEFINE_MUTEX(sched_register_mutex); 20static DEFINE_MUTEX(sched_register_mutex);
21static int sched_stopped; 21static int sched_stopped;
22 22
23
24void
25tracing_sched_switch_trace(struct trace_array *tr,
26 struct task_struct *prev,
27 struct task_struct *next,
28 unsigned long flags, int pc)
29{
30 struct ftrace_event_call *call = &event_context_switch;
31 struct ring_buffer *buffer = tr->buffer;
32 struct ring_buffer_event *event;
33 struct ctx_switch_entry *entry;
34
35 event = trace_buffer_lock_reserve(buffer, TRACE_CTX,
36 sizeof(*entry), flags, pc);
37 if (!event)
38 return;
39 entry = ring_buffer_event_data(event);
40 entry->prev_pid = prev->pid;
41 entry->prev_prio = prev->prio;
42 entry->prev_state = prev->state;
43 entry->next_pid = next->pid;
44 entry->next_prio = next->prio;
45 entry->next_state = next->state;
46 entry->next_cpu = task_cpu(next);
47
48 if (!filter_check_discard(call, entry, buffer, event))
49 trace_buffer_unlock_commit(buffer, event, flags, pc);
50}
51
23static void 52static void
24probe_sched_switch(struct rq *__rq, struct task_struct *prev, 53probe_sched_switch(struct rq *__rq, struct task_struct *prev,
25 struct task_struct *next) 54 struct task_struct *next)
@@ -49,6 +78,36 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
49 local_irq_restore(flags); 78 local_irq_restore(flags);
50} 79}
51 80
81void
82tracing_sched_wakeup_trace(struct trace_array *tr,
83 struct task_struct *wakee,
84 struct task_struct *curr,
85 unsigned long flags, int pc)
86{
87 struct ftrace_event_call *call = &event_wakeup;
88 struct ring_buffer_event *event;
89 struct ctx_switch_entry *entry;
90 struct ring_buffer *buffer = tr->buffer;
91
92 event = trace_buffer_lock_reserve(buffer, TRACE_WAKE,
93 sizeof(*entry), flags, pc);
94 if (!event)
95 return;
96 entry = ring_buffer_event_data(event);
97 entry->prev_pid = curr->pid;
98 entry->prev_prio = curr->prio;
99 entry->prev_state = curr->state;
100 entry->next_pid = wakee->pid;
101 entry->next_prio = wakee->prio;
102 entry->next_state = wakee->state;
103 entry->next_cpu = task_cpu(wakee);
104
105 if (!filter_check_discard(call, entry, buffer, event))
106 ring_buffer_unlock_commit(buffer, event);
107 ftrace_trace_stack(tr->buffer, flags, 6, pc);
108 ftrace_trace_userstack(tr->buffer, flags, pc);
109}
110
52static void 111static void
53probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success) 112probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
54{ 113{
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index eacb2722517..ad69f105a7c 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -186,11 +186,6 @@ out:
186 186
187static void __wakeup_reset(struct trace_array *tr) 187static void __wakeup_reset(struct trace_array *tr)
188{ 188{
189 int cpu;
190
191 for_each_possible_cpu(cpu)
192 tracing_reset(tr, cpu);
193
194 wakeup_cpu = -1; 189 wakeup_cpu = -1;
195 wakeup_prio = -1; 190 wakeup_prio = -1;
196 191
@@ -204,6 +199,8 @@ static void wakeup_reset(struct trace_array *tr)
204{ 199{
205 unsigned long flags; 200 unsigned long flags;
206 201
202 tracing_reset_online_cpus(tr);
203
207 local_irq_save(flags); 204 local_irq_save(flags);
208 __raw_spin_lock(&wakeup_lock); 205 __raw_spin_lock(&wakeup_lock);
209 __wakeup_reset(tr); 206 __wakeup_reset(tr);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 00dd6485bdd..d2cdbabb4ea 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -288,6 +288,7 @@ trace_selftest_startup_function_graph(struct tracer *trace,
288 * to detect and recover from possible hangs 288 * to detect and recover from possible hangs
289 */ 289 */
290 tracing_reset_online_cpus(tr); 290 tracing_reset_online_cpus(tr);
291 set_graph_array(tr);
291 ret = register_ftrace_graph(&trace_graph_return, 292 ret = register_ftrace_graph(&trace_graph_return,
292 &trace_graph_entry_watchdog); 293 &trace_graph_entry_watchdog);
293 if (ret) { 294 if (ret) {
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 6a2a9d484cd..0f6facb050a 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -186,43 +186,33 @@ static const struct file_operations stack_max_size_fops = {
186}; 186};
187 187
188static void * 188static void *
189t_next(struct seq_file *m, void *v, loff_t *pos) 189__next(struct seq_file *m, loff_t *pos)
190{ 190{
191 long i; 191 long n = *pos - 1;
192 192
193 (*pos)++; 193 if (n >= max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
194
195 if (v == SEQ_START_TOKEN)
196 i = 0;
197 else {
198 i = *(long *)v;
199 i++;
200 }
201
202 if (i >= max_stack_trace.nr_entries ||
203 stack_dump_trace[i] == ULONG_MAX)
204 return NULL; 194 return NULL;
205 195
206 m->private = (void *)i; 196 m->private = (void *)n;
207
208 return &m->private; 197 return &m->private;
209} 198}
210 199
211static void *t_start(struct seq_file *m, loff_t *pos) 200static void *
201t_next(struct seq_file *m, void *v, loff_t *pos)
212{ 202{
213 void *t = SEQ_START_TOKEN; 203 (*pos)++;
214 loff_t l = 0; 204 return __next(m, pos);
205}
215 206
207static void *t_start(struct seq_file *m, loff_t *pos)
208{
216 local_irq_disable(); 209 local_irq_disable();
217 __raw_spin_lock(&max_stack_lock); 210 __raw_spin_lock(&max_stack_lock);
218 211
219 if (*pos == 0) 212 if (*pos == 0)
220 return SEQ_START_TOKEN; 213 return SEQ_START_TOKEN;
221 214
222 for (; t && l < *pos; t = t_next(m, t, &l)) 215 return __next(m, pos);
223 ;
224
225 return t;
226} 216}
227 217
228static void t_stop(struct seq_file *m, void *p) 218static void t_stop(struct seq_file *m, void *p)
@@ -234,15 +224,8 @@ static void t_stop(struct seq_file *m, void *p)
234static int trace_lookup_stack(struct seq_file *m, long i) 224static int trace_lookup_stack(struct seq_file *m, long i)
235{ 225{
236 unsigned long addr = stack_dump_trace[i]; 226 unsigned long addr = stack_dump_trace[i];
237#ifdef CONFIG_KALLSYMS
238 char str[KSYM_SYMBOL_LEN];
239
240 sprint_symbol(str, addr);
241 227
242 return seq_printf(m, "%s\n", str); 228 return seq_printf(m, "%pF\n", (void *)addr);
243#else
244 return seq_printf(m, "%p\n", (void*)addr);
245#endif
246} 229}
247 230
248static void print_disabled(struct seq_file *m) 231static void print_disabled(struct seq_file *m)
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index aea321c82fa..a4bb239eb98 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -49,7 +49,8 @@ static struct dentry *stat_dir;
49 * but it will at least advance closer to the next one 49 * but it will at least advance closer to the next one
50 * to be released. 50 * to be released.
51 */ 51 */
52static struct rb_node *release_next(struct rb_node *node) 52static struct rb_node *release_next(struct tracer_stat *ts,
53 struct rb_node *node)
53{ 54{
54 struct stat_node *snode; 55 struct stat_node *snode;
55 struct rb_node *parent = rb_parent(node); 56 struct rb_node *parent = rb_parent(node);
@@ -67,6 +68,8 @@ static struct rb_node *release_next(struct rb_node *node)
67 parent->rb_right = NULL; 68 parent->rb_right = NULL;
68 69
69 snode = container_of(node, struct stat_node, node); 70 snode = container_of(node, struct stat_node, node);
71 if (ts->stat_release)
72 ts->stat_release(snode->stat);
70 kfree(snode); 73 kfree(snode);
71 74
72 return parent; 75 return parent;
@@ -78,7 +81,7 @@ static void __reset_stat_session(struct stat_session *session)
78 struct rb_node *node = session->stat_root.rb_node; 81 struct rb_node *node = session->stat_root.rb_node;
79 82
80 while (node) 83 while (node)
81 node = release_next(node); 84 node = release_next(session->ts, node);
82 85
83 session->stat_root = RB_ROOT; 86 session->stat_root = RB_ROOT;
84} 87}
@@ -200,17 +203,21 @@ static void *stat_seq_start(struct seq_file *s, loff_t *pos)
200{ 203{
201 struct stat_session *session = s->private; 204 struct stat_session *session = s->private;
202 struct rb_node *node; 205 struct rb_node *node;
206 int n = *pos;
203 int i; 207 int i;
204 208
205 /* Prevent from tracer switch or rbtree modification */ 209 /* Prevent from tracer switch or rbtree modification */
206 mutex_lock(&session->stat_mutex); 210 mutex_lock(&session->stat_mutex);
207 211
208 /* If we are in the beginning of the file, print the headers */ 212 /* If we are in the beginning of the file, print the headers */
209 if (!*pos && session->ts->stat_headers) 213 if (session->ts->stat_headers) {
210 return SEQ_START_TOKEN; 214 if (n == 0)
215 return SEQ_START_TOKEN;
216 n--;
217 }
211 218
212 node = rb_first(&session->stat_root); 219 node = rb_first(&session->stat_root);
213 for (i = 0; node && i < *pos; i++) 220 for (i = 0; node && i < n; i++)
214 node = rb_next(node); 221 node = rb_next(node);
215 222
216 return node; 223 return node;
diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h
index f3546a2cd82..8f03914b9a6 100644
--- a/kernel/trace/trace_stat.h
+++ b/kernel/trace/trace_stat.h
@@ -18,6 +18,8 @@ struct tracer_stat {
18 int (*stat_cmp)(void *p1, void *p2); 18 int (*stat_cmp)(void *p1, void *p2);
19 /* Print a stat entry */ 19 /* Print a stat entry */
20 int (*stat_show)(struct seq_file *s, void *p); 20 int (*stat_show)(struct seq_file *s, void *p);
21 /* Release an entry */
22 void (*stat_release)(void *stat);
21 /* Print the headers of your stat entries */ 23 /* Print the headers of your stat entries */
22 int (*stat_headers)(struct seq_file *s); 24 int (*stat_headers)(struct seq_file *s);
23}; 25};
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 5e579645ac8..8712ce3c6a0 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,30 +1,18 @@
1#include <trace/syscall.h> 1#include <trace/syscall.h>
2#include <trace/events/syscalls.h>
2#include <linux/kernel.h> 3#include <linux/kernel.h>
4#include <linux/ftrace.h>
5#include <linux/perf_counter.h>
3#include <asm/syscall.h> 6#include <asm/syscall.h>
4 7
5#include "trace_output.h" 8#include "trace_output.h"
6#include "trace.h" 9#include "trace.h"
7 10
8/* Keep a counter of the syscall tracing users */
9static int refcount;
10
11/* Prevent from races on thread flags toggling */
12static DEFINE_MUTEX(syscall_trace_lock); 11static DEFINE_MUTEX(syscall_trace_lock);
13 12static int sys_refcount_enter;
14/* Option to display the parameters types */ 13static int sys_refcount_exit;
15enum { 14static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
16 TRACE_SYSCALLS_OPT_TYPES = 0x1, 15static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
17};
18
19static struct tracer_opt syscalls_opts[] = {
20 { TRACER_OPT(syscall_arg_type, TRACE_SYSCALLS_OPT_TYPES) },
21 { }
22};
23
24static struct tracer_flags syscalls_flags = {
25 .val = 0, /* By default: no parameters types */
26 .opts = syscalls_opts
27};
28 16
29enum print_line_t 17enum print_line_t
30print_syscall_enter(struct trace_iterator *iter, int flags) 18print_syscall_enter(struct trace_iterator *iter, int flags)
@@ -35,35 +23,46 @@ print_syscall_enter(struct trace_iterator *iter, int flags)
35 struct syscall_metadata *entry; 23 struct syscall_metadata *entry;
36 int i, ret, syscall; 24 int i, ret, syscall;
37 25
38 trace_assign_type(trace, ent); 26 trace = (typeof(trace))ent;
39
40 syscall = trace->nr; 27 syscall = trace->nr;
41
42 entry = syscall_nr_to_meta(syscall); 28 entry = syscall_nr_to_meta(syscall);
29
43 if (!entry) 30 if (!entry)
44 goto end; 31 goto end;
45 32
33 if (entry->enter_id != ent->type) {
34 WARN_ON_ONCE(1);
35 goto end;
36 }
37
46 ret = trace_seq_printf(s, "%s(", entry->name); 38 ret = trace_seq_printf(s, "%s(", entry->name);
47 if (!ret) 39 if (!ret)
48 return TRACE_TYPE_PARTIAL_LINE; 40 return TRACE_TYPE_PARTIAL_LINE;
49 41
50 for (i = 0; i < entry->nb_args; i++) { 42 for (i = 0; i < entry->nb_args; i++) {
51 /* parameter types */ 43 /* parameter types */
52 if (syscalls_flags.val & TRACE_SYSCALLS_OPT_TYPES) { 44 if (trace_flags & TRACE_ITER_VERBOSE) {
53 ret = trace_seq_printf(s, "%s ", entry->types[i]); 45 ret = trace_seq_printf(s, "%s ", entry->types[i]);
54 if (!ret) 46 if (!ret)
55 return TRACE_TYPE_PARTIAL_LINE; 47 return TRACE_TYPE_PARTIAL_LINE;
56 } 48 }
57 /* parameter values */ 49 /* parameter values */
58 ret = trace_seq_printf(s, "%s: %lx%s ", entry->args[i], 50 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
59 trace->args[i], 51 trace->args[i],
60 i == entry->nb_args - 1 ? ")" : ","); 52 i == entry->nb_args - 1 ? "" : ", ");
61 if (!ret) 53 if (!ret)
62 return TRACE_TYPE_PARTIAL_LINE; 54 return TRACE_TYPE_PARTIAL_LINE;
63 } 55 }
64 56
57 ret = trace_seq_putc(s, ')');
58 if (!ret)
59 return TRACE_TYPE_PARTIAL_LINE;
60
65end: 61end:
66 trace_seq_printf(s, "\n"); 62 ret = trace_seq_putc(s, '\n');
63 if (!ret)
64 return TRACE_TYPE_PARTIAL_LINE;
65
67 return TRACE_TYPE_HANDLED; 66 return TRACE_TYPE_HANDLED;
68} 67}
69 68
@@ -77,16 +76,20 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
77 struct syscall_metadata *entry; 76 struct syscall_metadata *entry;
78 int ret; 77 int ret;
79 78
80 trace_assign_type(trace, ent); 79 trace = (typeof(trace))ent;
81
82 syscall = trace->nr; 80 syscall = trace->nr;
83
84 entry = syscall_nr_to_meta(syscall); 81 entry = syscall_nr_to_meta(syscall);
82
85 if (!entry) { 83 if (!entry) {
86 trace_seq_printf(s, "\n"); 84 trace_seq_printf(s, "\n");
87 return TRACE_TYPE_HANDLED; 85 return TRACE_TYPE_HANDLED;
88 } 86 }
89 87
88 if (entry->exit_id != ent->type) {
89 WARN_ON_ONCE(1);
90 return TRACE_TYPE_UNHANDLED;
91 }
92
90 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, 93 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
91 trace->ret); 94 trace->ret);
92 if (!ret) 95 if (!ret)
@@ -95,62 +98,140 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
95 return TRACE_TYPE_HANDLED; 98 return TRACE_TYPE_HANDLED;
96} 99}
97 100
98void start_ftrace_syscalls(void) 101extern char *__bad_type_size(void);
102
103#define SYSCALL_FIELD(type, name) \
104 sizeof(type) != sizeof(trace.name) ? \
105 __bad_type_size() : \
106 #type, #name, offsetof(typeof(trace), name), sizeof(trace.name)
107
108int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
99{ 109{
100 unsigned long flags; 110 int i;
101 struct task_struct *g, *t; 111 int nr;
112 int ret;
113 struct syscall_metadata *entry;
114 struct syscall_trace_enter trace;
115 int offset = offsetof(struct syscall_trace_enter, args);
102 116
103 mutex_lock(&syscall_trace_lock); 117 nr = syscall_name_to_nr(call->data);
118 entry = syscall_nr_to_meta(nr);
104 119
105 /* Don't enable the flag on the tasks twice */ 120 if (!entry)
106 if (++refcount != 1) 121 return 0;
107 goto unlock;
108 122
109 arch_init_ftrace_syscalls(); 123 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
110 read_lock_irqsave(&tasklist_lock, flags); 124 SYSCALL_FIELD(int, nr));
125 if (!ret)
126 return 0;
111 127
112 do_each_thread(g, t) { 128 for (i = 0; i < entry->nb_args; i++) {
113 set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); 129 ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i],
114 } while_each_thread(g, t); 130 entry->args[i]);
131 if (!ret)
132 return 0;
133 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset,
134 sizeof(unsigned long));
135 if (!ret)
136 return 0;
137 offset += sizeof(unsigned long);
138 }
115 139
116 read_unlock_irqrestore(&tasklist_lock, flags); 140 trace_seq_puts(s, "\nprint fmt: \"");
141 for (i = 0; i < entry->nb_args; i++) {
142 ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i],
143 sizeof(unsigned long),
144 i == entry->nb_args - 1 ? "" : ", ");
145 if (!ret)
146 return 0;
147 }
148 trace_seq_putc(s, '"');
117 149
118unlock: 150 for (i = 0; i < entry->nb_args; i++) {
119 mutex_unlock(&syscall_trace_lock); 151 ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
152 entry->args[i]);
153 if (!ret)
154 return 0;
155 }
156
157 return trace_seq_putc(s, '\n');
120} 158}
121 159
122void stop_ftrace_syscalls(void) 160int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
123{ 161{
124 unsigned long flags; 162 int ret;
125 struct task_struct *g, *t; 163 struct syscall_trace_exit trace;
126 164
127 mutex_lock(&syscall_trace_lock); 165 ret = trace_seq_printf(s,
166 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
167 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
168 SYSCALL_FIELD(int, nr),
169 SYSCALL_FIELD(unsigned long, ret));
170 if (!ret)
171 return 0;
128 172
129 /* There are perhaps still some users */ 173 return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n");
130 if (--refcount) 174}
131 goto unlock;
132 175
133 read_lock_irqsave(&tasklist_lock, flags); 176int syscall_enter_define_fields(struct ftrace_event_call *call)
177{
178 struct syscall_trace_enter trace;
179 struct syscall_metadata *meta;
180 int ret;
181 int nr;
182 int i;
183 int offset = offsetof(typeof(trace), args);
184
185 nr = syscall_name_to_nr(call->data);
186 meta = syscall_nr_to_meta(nr);
187
188 if (!meta)
189 return 0;
190
191 ret = trace_define_common_fields(call);
192 if (ret)
193 return ret;
194
195 for (i = 0; i < meta->nb_args; i++) {
196 ret = trace_define_field(call, meta->types[i],
197 meta->args[i], offset,
198 sizeof(unsigned long), 0,
199 FILTER_OTHER);
200 offset += sizeof(unsigned long);
201 }
134 202
135 do_each_thread(g, t) { 203 return ret;
136 clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); 204}
137 } while_each_thread(g, t);
138 205
139 read_unlock_irqrestore(&tasklist_lock, flags); 206int syscall_exit_define_fields(struct ftrace_event_call *call)
207{
208 struct syscall_trace_exit trace;
209 int ret;
140 210
141unlock: 211 ret = trace_define_common_fields(call);
142 mutex_unlock(&syscall_trace_lock); 212 if (ret)
213 return ret;
214
215 ret = trace_define_field(call, SYSCALL_FIELD(unsigned long, ret), 0,
216 FILTER_OTHER);
217
218 return ret;
143} 219}
144 220
145void ftrace_syscall_enter(struct pt_regs *regs) 221void ftrace_syscall_enter(struct pt_regs *regs, long id)
146{ 222{
147 struct syscall_trace_enter *entry; 223 struct syscall_trace_enter *entry;
148 struct syscall_metadata *sys_data; 224 struct syscall_metadata *sys_data;
149 struct ring_buffer_event *event; 225 struct ring_buffer_event *event;
226 struct ring_buffer *buffer;
150 int size; 227 int size;
151 int syscall_nr; 228 int syscall_nr;
152 229
153 syscall_nr = syscall_get_nr(current, regs); 230 syscall_nr = syscall_get_nr(current, regs);
231 if (syscall_nr < 0)
232 return;
233 if (!test_bit(syscall_nr, enabled_enter_syscalls))
234 return;
154 235
155 sys_data = syscall_nr_to_meta(syscall_nr); 236 sys_data = syscall_nr_to_meta(syscall_nr);
156 if (!sys_data) 237 if (!sys_data)
@@ -158,8 +239,8 @@ void ftrace_syscall_enter(struct pt_regs *regs)
158 239
159 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 240 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
160 241
161 event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_ENTER, size, 242 event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id,
162 0, 0); 243 size, 0, 0);
163 if (!event) 244 if (!event)
164 return; 245 return;
165 246
@@ -167,24 +248,30 @@ void ftrace_syscall_enter(struct pt_regs *regs)
167 entry->nr = syscall_nr; 248 entry->nr = syscall_nr;
168 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); 249 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
169 250
170 trace_current_buffer_unlock_commit(event, 0, 0); 251 if (!filter_current_check_discard(buffer, sys_data->enter_event,
171 trace_wake_up(); 252 entry, event))
253 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
172} 254}
173 255
174void ftrace_syscall_exit(struct pt_regs *regs) 256void ftrace_syscall_exit(struct pt_regs *regs, long ret)
175{ 257{
176 struct syscall_trace_exit *entry; 258 struct syscall_trace_exit *entry;
177 struct syscall_metadata *sys_data; 259 struct syscall_metadata *sys_data;
178 struct ring_buffer_event *event; 260 struct ring_buffer_event *event;
261 struct ring_buffer *buffer;
179 int syscall_nr; 262 int syscall_nr;
180 263
181 syscall_nr = syscall_get_nr(current, regs); 264 syscall_nr = syscall_get_nr(current, regs);
265 if (syscall_nr < 0)
266 return;
267 if (!test_bit(syscall_nr, enabled_exit_syscalls))
268 return;
182 269
183 sys_data = syscall_nr_to_meta(syscall_nr); 270 sys_data = syscall_nr_to_meta(syscall_nr);
184 if (!sys_data) 271 if (!sys_data)
185 return; 272 return;
186 273
187 event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_EXIT, 274 event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id,
188 sizeof(*entry), 0, 0); 275 sizeof(*entry), 0, 0);
189 if (!event) 276 if (!event)
190 return; 277 return;
@@ -193,58 +280,244 @@ void ftrace_syscall_exit(struct pt_regs *regs)
193 entry->nr = syscall_nr; 280 entry->nr = syscall_nr;
194 entry->ret = syscall_get_return_value(current, regs); 281 entry->ret = syscall_get_return_value(current, regs);
195 282
196 trace_current_buffer_unlock_commit(event, 0, 0); 283 if (!filter_current_check_discard(buffer, sys_data->exit_event,
197 trace_wake_up(); 284 entry, event))
285 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
198} 286}
199 287
200static int init_syscall_tracer(struct trace_array *tr) 288int reg_event_syscall_enter(void *ptr)
201{ 289{
202 start_ftrace_syscalls(); 290 int ret = 0;
291 int num;
292 char *name;
293
294 name = (char *)ptr;
295 num = syscall_name_to_nr(name);
296 if (num < 0 || num >= NR_syscalls)
297 return -ENOSYS;
298 mutex_lock(&syscall_trace_lock);
299 if (!sys_refcount_enter)
300 ret = register_trace_sys_enter(ftrace_syscall_enter);
301 if (ret) {
302 pr_info("event trace: Could not activate"
303 "syscall entry trace point");
304 } else {
305 set_bit(num, enabled_enter_syscalls);
306 sys_refcount_enter++;
307 }
308 mutex_unlock(&syscall_trace_lock);
309 return ret;
310}
311
312void unreg_event_syscall_enter(void *ptr)
313{
314 int num;
315 char *name;
203 316
204 return 0; 317 name = (char *)ptr;
318 num = syscall_name_to_nr(name);
319 if (num < 0 || num >= NR_syscalls)
320 return;
321 mutex_lock(&syscall_trace_lock);
322 sys_refcount_enter--;
323 clear_bit(num, enabled_enter_syscalls);
324 if (!sys_refcount_enter)
325 unregister_trace_sys_enter(ftrace_syscall_enter);
326 mutex_unlock(&syscall_trace_lock);
205} 327}
206 328
207static void reset_syscall_tracer(struct trace_array *tr) 329int reg_event_syscall_exit(void *ptr)
208{ 330{
209 stop_ftrace_syscalls(); 331 int ret = 0;
210 tracing_reset_online_cpus(tr); 332 int num;
333 char *name;
334
335 name = (char *)ptr;
336 num = syscall_name_to_nr(name);
337 if (num < 0 || num >= NR_syscalls)
338 return -ENOSYS;
339 mutex_lock(&syscall_trace_lock);
340 if (!sys_refcount_exit)
341 ret = register_trace_sys_exit(ftrace_syscall_exit);
342 if (ret) {
343 pr_info("event trace: Could not activate"
344 "syscall exit trace point");
345 } else {
346 set_bit(num, enabled_exit_syscalls);
347 sys_refcount_exit++;
348 }
349 mutex_unlock(&syscall_trace_lock);
350 return ret;
211} 351}
212 352
213static struct trace_event syscall_enter_event = { 353void unreg_event_syscall_exit(void *ptr)
214 .type = TRACE_SYSCALL_ENTER, 354{
215 .trace = print_syscall_enter, 355 int num;
216}; 356 char *name;
357
358 name = (char *)ptr;
359 num = syscall_name_to_nr(name);
360 if (num < 0 || num >= NR_syscalls)
361 return;
362 mutex_lock(&syscall_trace_lock);
363 sys_refcount_exit--;
364 clear_bit(num, enabled_exit_syscalls);
365 if (!sys_refcount_exit)
366 unregister_trace_sys_exit(ftrace_syscall_exit);
367 mutex_unlock(&syscall_trace_lock);
368}
217 369
218static struct trace_event syscall_exit_event = { 370struct trace_event event_syscall_enter = {
219 .type = TRACE_SYSCALL_EXIT, 371 .trace = print_syscall_enter,
220 .trace = print_syscall_exit,
221}; 372};
222 373
223static struct tracer syscall_tracer __read_mostly = { 374struct trace_event event_syscall_exit = {
224 .name = "syscall", 375 .trace = print_syscall_exit,
225 .init = init_syscall_tracer,
226 .reset = reset_syscall_tracer,
227 .flags = &syscalls_flags,
228}; 376};
229 377
230__init int register_ftrace_syscalls(void) 378#ifdef CONFIG_EVENT_PROFILE
379
380static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
381static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
382static int sys_prof_refcount_enter;
383static int sys_prof_refcount_exit;
384
385static void prof_syscall_enter(struct pt_regs *regs, long id)
231{ 386{
232 int ret; 387 struct syscall_trace_enter *rec;
388 struct syscall_metadata *sys_data;
389 int syscall_nr;
390 int size;
233 391
234 ret = register_ftrace_event(&syscall_enter_event); 392 syscall_nr = syscall_get_nr(current, regs);
235 if (!ret) { 393 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
236 printk(KERN_WARNING "event %d failed to register\n", 394 return;
237 syscall_enter_event.type); 395
238 WARN_ON_ONCE(1); 396 sys_data = syscall_nr_to_meta(syscall_nr);
397 if (!sys_data)
398 return;
399
400 /* get the size after alignment with the u32 buffer size field */
401 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
402 size = ALIGN(size + sizeof(u32), sizeof(u64));
403 size -= sizeof(u32);
404
405 do {
406 char raw_data[size];
407
408 /* zero the dead bytes from align to not leak stack to user */
409 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
410
411 rec = (struct syscall_trace_enter *) raw_data;
412 tracing_generic_entry_update(&rec->ent, 0, 0);
413 rec->ent.type = sys_data->enter_id;
414 rec->nr = syscall_nr;
415 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
416 (unsigned long *)&rec->args);
417 perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
418 } while(0);
419}
420
421int reg_prof_syscall_enter(char *name)
422{
423 int ret = 0;
424 int num;
425
426 num = syscall_name_to_nr(name);
427 if (num < 0 || num >= NR_syscalls)
428 return -ENOSYS;
429
430 mutex_lock(&syscall_trace_lock);
431 if (!sys_prof_refcount_enter)
432 ret = register_trace_sys_enter(prof_syscall_enter);
433 if (ret) {
434 pr_info("event trace: Could not activate"
435 "syscall entry trace point");
436 } else {
437 set_bit(num, enabled_prof_enter_syscalls);
438 sys_prof_refcount_enter++;
239 } 439 }
440 mutex_unlock(&syscall_trace_lock);
441 return ret;
442}
240 443
241 ret = register_ftrace_event(&syscall_exit_event); 444void unreg_prof_syscall_enter(char *name)
242 if (!ret) { 445{
243 printk(KERN_WARNING "event %d failed to register\n", 446 int num;
244 syscall_exit_event.type); 447
245 WARN_ON_ONCE(1); 448 num = syscall_name_to_nr(name);
449 if (num < 0 || num >= NR_syscalls)
450 return;
451
452 mutex_lock(&syscall_trace_lock);
453 sys_prof_refcount_enter--;
454 clear_bit(num, enabled_prof_enter_syscalls);
455 if (!sys_prof_refcount_enter)
456 unregister_trace_sys_enter(prof_syscall_enter);
457 mutex_unlock(&syscall_trace_lock);
458}
459
460static void prof_syscall_exit(struct pt_regs *regs, long ret)
461{
462 struct syscall_metadata *sys_data;
463 struct syscall_trace_exit rec;
464 int syscall_nr;
465
466 syscall_nr = syscall_get_nr(current, regs);
467 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
468 return;
469
470 sys_data = syscall_nr_to_meta(syscall_nr);
471 if (!sys_data)
472 return;
473
474 tracing_generic_entry_update(&rec.ent, 0, 0);
475 rec.ent.type = sys_data->exit_id;
476 rec.nr = syscall_nr;
477 rec.ret = syscall_get_return_value(current, regs);
478
479 perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec));
480}
481
482int reg_prof_syscall_exit(char *name)
483{
484 int ret = 0;
485 int num;
486
487 num = syscall_name_to_nr(name);
488 if (num < 0 || num >= NR_syscalls)
489 return -ENOSYS;
490
491 mutex_lock(&syscall_trace_lock);
492 if (!sys_prof_refcount_exit)
493 ret = register_trace_sys_exit(prof_syscall_exit);
494 if (ret) {
495 pr_info("event trace: Could not activate"
496 "syscall entry trace point");
497 } else {
498 set_bit(num, enabled_prof_exit_syscalls);
499 sys_prof_refcount_exit++;
246 } 500 }
501 mutex_unlock(&syscall_trace_lock);
502 return ret;
503}
247 504
248 return register_tracer(&syscall_tracer); 505void unreg_prof_syscall_exit(char *name)
506{
507 int num;
508
509 num = syscall_name_to_nr(name);
510 if (num < 0 || num >= NR_syscalls)
511 return;
512
513 mutex_lock(&syscall_trace_lock);
514 sys_prof_refcount_exit--;
515 clear_bit(num, enabled_prof_exit_syscalls);
516 if (!sys_prof_refcount_exit)
517 unregister_trace_sys_exit(prof_syscall_exit);
518 mutex_unlock(&syscall_trace_lock);
249} 519}
250device_initcall(register_ftrace_syscalls); 520
521#endif
522
523
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 97fcea4acce..40cafb07dff 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -9,6 +9,7 @@
9#include <trace/events/workqueue.h> 9#include <trace/events/workqueue.h>
10#include <linux/list.h> 10#include <linux/list.h>
11#include <linux/percpu.h> 11#include <linux/percpu.h>
12#include <linux/kref.h>
12#include "trace_stat.h" 13#include "trace_stat.h"
13#include "trace.h" 14#include "trace.h"
14 15
@@ -16,6 +17,7 @@
16/* A cpu workqueue thread */ 17/* A cpu workqueue thread */
17struct cpu_workqueue_stats { 18struct cpu_workqueue_stats {
18 struct list_head list; 19 struct list_head list;
20 struct kref kref;
19 int cpu; 21 int cpu;
20 pid_t pid; 22 pid_t pid;
21/* Can be inserted from interrupt or user context, need to be atomic */ 23/* Can be inserted from interrupt or user context, need to be atomic */
@@ -39,6 +41,11 @@ struct workqueue_global_stats {
39static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat); 41static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
40#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu)) 42#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu))
41 43
44static void cpu_workqueue_stat_free(struct kref *kref)
45{
46 kfree(container_of(kref, struct cpu_workqueue_stats, kref));
47}
48
42/* Insertion of a work */ 49/* Insertion of a work */
43static void 50static void
44probe_workqueue_insertion(struct task_struct *wq_thread, 51probe_workqueue_insertion(struct task_struct *wq_thread,
@@ -96,8 +103,8 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
96 return; 103 return;
97 } 104 }
98 INIT_LIST_HEAD(&cws->list); 105 INIT_LIST_HEAD(&cws->list);
106 kref_init(&cws->kref);
99 cws->cpu = cpu; 107 cws->cpu = cpu;
100
101 cws->pid = wq_thread->pid; 108 cws->pid = wq_thread->pid;
102 109
103 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); 110 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
@@ -118,7 +125,7 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread)
118 list) { 125 list) {
119 if (node->pid == wq_thread->pid) { 126 if (node->pid == wq_thread->pid) {
120 list_del(&node->list); 127 list_del(&node->list);
121 kfree(node); 128 kref_put(&node->kref, cpu_workqueue_stat_free);
122 goto found; 129 goto found;
123 } 130 }
124 } 131 }
@@ -137,9 +144,11 @@ static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
137 144
138 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); 145 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
139 146
140 if (!list_empty(&workqueue_cpu_stat(cpu)->list)) 147 if (!list_empty(&workqueue_cpu_stat(cpu)->list)) {
141 ret = list_entry(workqueue_cpu_stat(cpu)->list.next, 148 ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
142 struct cpu_workqueue_stats, list); 149 struct cpu_workqueue_stats, list);
150 kref_get(&ret->kref);
151 }
143 152
144 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); 153 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
145 154
@@ -162,9 +171,9 @@ static void *workqueue_stat_start(struct tracer_stat *trace)
162static void *workqueue_stat_next(void *prev, int idx) 171static void *workqueue_stat_next(void *prev, int idx)
163{ 172{
164 struct cpu_workqueue_stats *prev_cws = prev; 173 struct cpu_workqueue_stats *prev_cws = prev;
174 struct cpu_workqueue_stats *ret;
165 int cpu = prev_cws->cpu; 175 int cpu = prev_cws->cpu;
166 unsigned long flags; 176 unsigned long flags;
167 void *ret = NULL;
168 177
169 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); 178 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
170 if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) { 179 if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
@@ -175,11 +184,14 @@ static void *workqueue_stat_next(void *prev, int idx)
175 return NULL; 184 return NULL;
176 } while (!(ret = workqueue_stat_start_cpu(cpu))); 185 } while (!(ret = workqueue_stat_start_cpu(cpu)));
177 return ret; 186 return ret;
187 } else {
188 ret = list_entry(prev_cws->list.next,
189 struct cpu_workqueue_stats, list);
190 kref_get(&ret->kref);
178 } 191 }
179 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); 192 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
180 193
181 return list_entry(prev_cws->list.next, struct cpu_workqueue_stats, 194 return ret;
182 list);
183} 195}
184 196
185static int workqueue_stat_show(struct seq_file *s, void *p) 197static int workqueue_stat_show(struct seq_file *s, void *p)
@@ -203,6 +215,13 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
203 return 0; 215 return 0;
204} 216}
205 217
218static void workqueue_stat_release(void *stat)
219{
220 struct cpu_workqueue_stats *node = stat;
221
222 kref_put(&node->kref, cpu_workqueue_stat_free);
223}
224
206static int workqueue_stat_headers(struct seq_file *s) 225static int workqueue_stat_headers(struct seq_file *s)
207{ 226{
208 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n"); 227 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");
@@ -215,6 +234,7 @@ struct tracer_stat workqueue_stats __read_mostly = {
215 .stat_start = workqueue_stat_start, 234 .stat_start = workqueue_stat_start,
216 .stat_next = workqueue_stat_next, 235 .stat_next = workqueue_stat_next,
217 .stat_show = workqueue_stat_show, 236 .stat_show = workqueue_stat_show,
237 .stat_release = workqueue_stat_release,
218 .stat_headers = workqueue_stat_headers 238 .stat_headers = workqueue_stat_headers
219}; 239};
220 240