aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/kprobes.c30
-rw-r--r--kernel/trace/ftrace.c92
-rw-r--r--kernel/trace/kmemtrace.c145
-rw-r--r--kernel/trace/ring_buffer.c940
-rw-r--r--kernel/trace/trace.c208
-rw-r--r--kernel/trace/trace.h37
-rw-r--r--kernel/trace/trace_events.c69
-rw-r--r--kernel/trace/trace_events_filter.c169
-rw-r--r--kernel/trace/trace_functions.c4
-rw-r--r--kernel/trace/trace_functions_graph.c164
-rw-r--r--kernel/trace/trace_sched_switch.c57
-rw-r--r--kernel/trace/trace_selftest.c1
-rw-r--r--kernel/trace/trace_stack.c9
-rw-r--r--kernel/trace/trace_stat.c7
-rw-r--r--kernel/trace/trace_stat.h2
-rw-r--r--kernel/trace/trace_workqueue.c32
16 files changed, 1320 insertions, 646 deletions
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 0540948e29ab..ef177d653b2c 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -103,7 +103,7 @@ static struct kprobe_blackpoint kprobe_blacklist[] = {
103#define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) 103#define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
104 104
105struct kprobe_insn_page { 105struct kprobe_insn_page {
106 struct hlist_node hlist; 106 struct list_head list;
107 kprobe_opcode_t *insns; /* Page of instruction slots */ 107 kprobe_opcode_t *insns; /* Page of instruction slots */
108 char slot_used[INSNS_PER_PAGE]; 108 char slot_used[INSNS_PER_PAGE];
109 int nused; 109 int nused;
@@ -117,7 +117,7 @@ enum kprobe_slot_state {
117}; 117};
118 118
119static DEFINE_MUTEX(kprobe_insn_mutex); /* Protects kprobe_insn_pages */ 119static DEFINE_MUTEX(kprobe_insn_mutex); /* Protects kprobe_insn_pages */
120static struct hlist_head kprobe_insn_pages; 120static LIST_HEAD(kprobe_insn_pages);
121static int kprobe_garbage_slots; 121static int kprobe_garbage_slots;
122static int collect_garbage_slots(void); 122static int collect_garbage_slots(void);
123 123
@@ -152,10 +152,9 @@ loop_end:
152static kprobe_opcode_t __kprobes *__get_insn_slot(void) 152static kprobe_opcode_t __kprobes *__get_insn_slot(void)
153{ 153{
154 struct kprobe_insn_page *kip; 154 struct kprobe_insn_page *kip;
155 struct hlist_node *pos;
156 155
157 retry: 156 retry:
158 hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) { 157 list_for_each_entry(kip, &kprobe_insn_pages, list) {
159 if (kip->nused < INSNS_PER_PAGE) { 158 if (kip->nused < INSNS_PER_PAGE) {
160 int i; 159 int i;
161 for (i = 0; i < INSNS_PER_PAGE; i++) { 160 for (i = 0; i < INSNS_PER_PAGE; i++) {
@@ -189,8 +188,8 @@ static kprobe_opcode_t __kprobes *__get_insn_slot(void)
189 kfree(kip); 188 kfree(kip);
190 return NULL; 189 return NULL;
191 } 190 }
192 INIT_HLIST_NODE(&kip->hlist); 191 INIT_LIST_HEAD(&kip->list);
193 hlist_add_head(&kip->hlist, &kprobe_insn_pages); 192 list_add(&kip->list, &kprobe_insn_pages);
194 memset(kip->slot_used, SLOT_CLEAN, INSNS_PER_PAGE); 193 memset(kip->slot_used, SLOT_CLEAN, INSNS_PER_PAGE);
195 kip->slot_used[0] = SLOT_USED; 194 kip->slot_used[0] = SLOT_USED;
196 kip->nused = 1; 195 kip->nused = 1;
@@ -219,12 +218,8 @@ static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
219 * so as not to have to set it up again the 218 * so as not to have to set it up again the
220 * next time somebody inserts a probe. 219 * next time somebody inserts a probe.
221 */ 220 */
222 hlist_del(&kip->hlist); 221 if (!list_is_singular(&kprobe_insn_pages)) {
223 if (hlist_empty(&kprobe_insn_pages)) { 222 list_del(&kip->list);
224 INIT_HLIST_NODE(&kip->hlist);
225 hlist_add_head(&kip->hlist,
226 &kprobe_insn_pages);
227 } else {
228 module_free(NULL, kip->insns); 223 module_free(NULL, kip->insns);
229 kfree(kip); 224 kfree(kip);
230 } 225 }
@@ -235,14 +230,13 @@ static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
235 230
236static int __kprobes collect_garbage_slots(void) 231static int __kprobes collect_garbage_slots(void)
237{ 232{
238 struct kprobe_insn_page *kip; 233 struct kprobe_insn_page *kip, *next;
239 struct hlist_node *pos, *next;
240 234
241 /* Ensure no-one is preepmted on the garbages */ 235 /* Ensure no-one is preepmted on the garbages */
242 if (check_safety()) 236 if (check_safety())
243 return -EAGAIN; 237 return -EAGAIN;
244 238
245 hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) { 239 list_for_each_entry_safe(kip, next, &kprobe_insn_pages, list) {
246 int i; 240 int i;
247 if (kip->ngarbage == 0) 241 if (kip->ngarbage == 0)
248 continue; 242 continue;
@@ -260,19 +254,17 @@ static int __kprobes collect_garbage_slots(void)
260void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty) 254void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
261{ 255{
262 struct kprobe_insn_page *kip; 256 struct kprobe_insn_page *kip;
263 struct hlist_node *pos;
264 257
265 mutex_lock(&kprobe_insn_mutex); 258 mutex_lock(&kprobe_insn_mutex);
266 hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) { 259 list_for_each_entry(kip, &kprobe_insn_pages, list) {
267 if (kip->insns <= slot && 260 if (kip->insns <= slot &&
268 slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) { 261 slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) {
269 int i = (slot - kip->insns) / MAX_INSN_SIZE; 262 int i = (slot - kip->insns) / MAX_INSN_SIZE;
270 if (dirty) { 263 if (dirty) {
271 kip->slot_used[i] = SLOT_DIRTY; 264 kip->slot_used[i] = SLOT_DIRTY;
272 kip->ngarbage++; 265 kip->ngarbage++;
273 } else { 266 } else
274 collect_one_slot(kip, i); 267 collect_one_slot(kip, i);
275 }
276 break; 268 break;
277 } 269 }
278 } 270 }
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1e1d23c26308..094863416b2e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1016,71 +1016,35 @@ static int
1016__ftrace_replace_code(struct dyn_ftrace *rec, int enable) 1016__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1017{ 1017{
1018 unsigned long ftrace_addr; 1018 unsigned long ftrace_addr;
1019 unsigned long ip, fl; 1019 unsigned long flag = 0UL;
1020 1020
1021 ftrace_addr = (unsigned long)FTRACE_ADDR; 1021 ftrace_addr = (unsigned long)FTRACE_ADDR;
1022 1022
1023 ip = rec->ip;
1024
1025 /* 1023 /*
1026 * If this record is not to be traced and 1024 * If this record is not to be traced or we want to disable it,
1027 * it is not enabled then do nothing. 1025 * then disable it.
1028 * 1026 *
1029 * If this record is not to be traced and 1027 * If we want to enable it and filtering is off, then enable it.
1030 * it is enabled then disable it.
1031 * 1028 *
1029 * If we want to enable it and filtering is on, enable it only if
1030 * it's filtered
1032 */ 1031 */
1033 if (rec->flags & FTRACE_FL_NOTRACE) { 1032 if (enable && !(rec->flags & FTRACE_FL_NOTRACE)) {
1034 if (rec->flags & FTRACE_FL_ENABLED) 1033 if (!ftrace_filtered || (rec->flags & FTRACE_FL_FILTER))
1035 rec->flags &= ~FTRACE_FL_ENABLED; 1034 flag = FTRACE_FL_ENABLED;
1036 else 1035 }
1037 return 0;
1038
1039 } else if (ftrace_filtered && enable) {
1040 /*
1041 * Filtering is on:
1042 */
1043
1044 fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED);
1045
1046 /* Record is filtered and enabled, do nothing */
1047 if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
1048 return 0;
1049
1050 /* Record is not filtered or enabled, do nothing */
1051 if (!fl)
1052 return 0;
1053
1054 /* Record is not filtered but enabled, disable it */
1055 if (fl == FTRACE_FL_ENABLED)
1056 rec->flags &= ~FTRACE_FL_ENABLED;
1057 else
1058 /* Otherwise record is filtered but not enabled, enable it */
1059 rec->flags |= FTRACE_FL_ENABLED;
1060 } else {
1061 /* Disable or not filtered */
1062
1063 if (enable) {
1064 /* if record is enabled, do nothing */
1065 if (rec->flags & FTRACE_FL_ENABLED)
1066 return 0;
1067
1068 rec->flags |= FTRACE_FL_ENABLED;
1069
1070 } else {
1071 1036
1072 /* if record is not enabled, do nothing */ 1037 /* If the state of this record hasn't changed, then do nothing */
1073 if (!(rec->flags & FTRACE_FL_ENABLED)) 1038 if ((rec->flags & FTRACE_FL_ENABLED) == flag)
1074 return 0; 1039 return 0;
1075 1040
1076 rec->flags &= ~FTRACE_FL_ENABLED; 1041 if (flag) {
1077 } 1042 rec->flags |= FTRACE_FL_ENABLED;
1043 return ftrace_make_call(rec, ftrace_addr);
1078 } 1044 }
1079 1045
1080 if (rec->flags & FTRACE_FL_ENABLED) 1046 rec->flags &= ~FTRACE_FL_ENABLED;
1081 return ftrace_make_call(rec, ftrace_addr); 1047 return ftrace_make_nop(NULL, rec, ftrace_addr);
1082 else
1083 return ftrace_make_nop(NULL, rec, ftrace_addr);
1084} 1048}
1085 1049
1086static void ftrace_replace_code(int enable) 1050static void ftrace_replace_code(int enable)
@@ -1375,7 +1339,6 @@ struct ftrace_iterator {
1375 unsigned flags; 1339 unsigned flags;
1376 unsigned char buffer[FTRACE_BUFF_MAX+1]; 1340 unsigned char buffer[FTRACE_BUFF_MAX+1];
1377 unsigned buffer_idx; 1341 unsigned buffer_idx;
1378 unsigned filtered;
1379}; 1342};
1380 1343
1381static void * 1344static void *
@@ -1438,18 +1401,13 @@ static int t_hash_show(struct seq_file *m, void *v)
1438{ 1401{
1439 struct ftrace_func_probe *rec; 1402 struct ftrace_func_probe *rec;
1440 struct hlist_node *hnd = v; 1403 struct hlist_node *hnd = v;
1441 char str[KSYM_SYMBOL_LEN];
1442 1404
1443 rec = hlist_entry(hnd, struct ftrace_func_probe, node); 1405 rec = hlist_entry(hnd, struct ftrace_func_probe, node);
1444 1406
1445 if (rec->ops->print) 1407 if (rec->ops->print)
1446 return rec->ops->print(m, rec->ip, rec->ops, rec->data); 1408 return rec->ops->print(m, rec->ip, rec->ops, rec->data);
1447 1409
1448 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 1410 seq_printf(m, "%pf:%pf", (void *)rec->ip, (void *)rec->ops->func);
1449 seq_printf(m, "%s:", str);
1450
1451 kallsyms_lookup((unsigned long)rec->ops->func, NULL, NULL, NULL, str);
1452 seq_printf(m, "%s", str);
1453 1411
1454 if (rec->data) 1412 if (rec->data)
1455 seq_printf(m, ":%p", rec->data); 1413 seq_printf(m, ":%p", rec->data);
@@ -1547,7 +1505,6 @@ static int t_show(struct seq_file *m, void *v)
1547{ 1505{
1548 struct ftrace_iterator *iter = m->private; 1506 struct ftrace_iterator *iter = m->private;
1549 struct dyn_ftrace *rec = v; 1507 struct dyn_ftrace *rec = v;
1550 char str[KSYM_SYMBOL_LEN];
1551 1508
1552 if (iter->flags & FTRACE_ITER_HASH) 1509 if (iter->flags & FTRACE_ITER_HASH)
1553 return t_hash_show(m, v); 1510 return t_hash_show(m, v);
@@ -1560,9 +1517,7 @@ static int t_show(struct seq_file *m, void *v)
1560 if (!rec) 1517 if (!rec)
1561 return 0; 1518 return 0;
1562 1519
1563 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 1520 seq_printf(m, "%pf\n", (void *)rec->ip);
1564
1565 seq_printf(m, "%s\n", str);
1566 1521
1567 return 0; 1522 return 0;
1568} 1523}
@@ -2312,7 +2267,6 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2312 } 2267 }
2313 2268
2314 if (isspace(ch)) { 2269 if (isspace(ch)) {
2315 iter->filtered++;
2316 iter->buffer[iter->buffer_idx] = 0; 2270 iter->buffer[iter->buffer_idx] = 0;
2317 ret = ftrace_process_regex(iter->buffer, 2271 ret = ftrace_process_regex(iter->buffer,
2318 iter->buffer_idx, enable); 2272 iter->buffer_idx, enable);
@@ -2443,7 +2397,6 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
2443 iter = file->private_data; 2397 iter = file->private_data;
2444 2398
2445 if (iter->buffer_idx) { 2399 if (iter->buffer_idx) {
2446 iter->filtered++;
2447 iter->buffer[iter->buffer_idx] = 0; 2400 iter->buffer[iter->buffer_idx] = 0;
2448 ftrace_match_records(iter->buffer, iter->buffer_idx, enable); 2401 ftrace_match_records(iter->buffer, iter->buffer_idx, enable);
2449 } 2402 }
@@ -2543,7 +2496,6 @@ static void g_stop(struct seq_file *m, void *p)
2543static int g_show(struct seq_file *m, void *v) 2496static int g_show(struct seq_file *m, void *v)
2544{ 2497{
2545 unsigned long *ptr = v; 2498 unsigned long *ptr = v;
2546 char str[KSYM_SYMBOL_LEN];
2547 2499
2548 if (!ptr) 2500 if (!ptr)
2549 return 0; 2501 return 0;
@@ -2553,9 +2505,7 @@ static int g_show(struct seq_file *m, void *v)
2553 return 0; 2505 return 0;
2554 } 2506 }
2555 2507
2556 kallsyms_lookup(*ptr, NULL, NULL, NULL, str); 2508 seq_printf(m, "%pf\n", v);
2557
2558 seq_printf(m, "%s\n", str);
2559 2509
2560 return 0; 2510 return 0;
2561} 2511}
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index 1edaa9516e81..dda53ccf749b 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -239,12 +239,52 @@ struct kmemtrace_user_event_alloc {
239}; 239};
240 240
241static enum print_line_t 241static enum print_line_t
242kmemtrace_print_alloc_user(struct trace_iterator *iter, 242kmemtrace_print_alloc(struct trace_iterator *iter, int flags)
243 struct kmemtrace_alloc_entry *entry)
244{ 243{
245 struct kmemtrace_user_event_alloc *ev_alloc;
246 struct trace_seq *s = &iter->seq; 244 struct trace_seq *s = &iter->seq;
245 struct kmemtrace_alloc_entry *entry;
246 int ret;
247
248 trace_assign_type(entry, iter->ent);
249
250 ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu "
251 "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
252 entry->type_id, (void *)entry->call_site, (unsigned long)entry->ptr,
253 (unsigned long)entry->bytes_req, (unsigned long)entry->bytes_alloc,
254 (unsigned long)entry->gfp_flags, entry->node);
255
256 if (!ret)
257 return TRACE_TYPE_PARTIAL_LINE;
258 return TRACE_TYPE_HANDLED;
259}
260
261static enum print_line_t
262kmemtrace_print_free(struct trace_iterator *iter, int flags)
263{
264 struct trace_seq *s = &iter->seq;
265 struct kmemtrace_free_entry *entry;
266 int ret;
267
268 trace_assign_type(entry, iter->ent);
269
270 ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu\n",
271 entry->type_id, (void *)entry->call_site,
272 (unsigned long)entry->ptr);
273
274 if (!ret)
275 return TRACE_TYPE_PARTIAL_LINE;
276 return TRACE_TYPE_HANDLED;
277}
278
279static enum print_line_t
280kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags)
281{
282 struct trace_seq *s = &iter->seq;
283 struct kmemtrace_alloc_entry *entry;
247 struct kmemtrace_user_event *ev; 284 struct kmemtrace_user_event *ev;
285 struct kmemtrace_user_event_alloc *ev_alloc;
286
287 trace_assign_type(entry, iter->ent);
248 288
249 ev = trace_seq_reserve(s, sizeof(*ev)); 289 ev = trace_seq_reserve(s, sizeof(*ev));
250 if (!ev) 290 if (!ev)
@@ -271,12 +311,14 @@ kmemtrace_print_alloc_user(struct trace_iterator *iter,
271} 311}
272 312
273static enum print_line_t 313static enum print_line_t
274kmemtrace_print_free_user(struct trace_iterator *iter, 314kmemtrace_print_free_user(struct trace_iterator *iter, int flags)
275 struct kmemtrace_free_entry *entry)
276{ 315{
277 struct trace_seq *s = &iter->seq; 316 struct trace_seq *s = &iter->seq;
317 struct kmemtrace_free_entry *entry;
278 struct kmemtrace_user_event *ev; 318 struct kmemtrace_user_event *ev;
279 319
320 trace_assign_type(entry, iter->ent);
321
280 ev = trace_seq_reserve(s, sizeof(*ev)); 322 ev = trace_seq_reserve(s, sizeof(*ev));
281 if (!ev) 323 if (!ev)
282 return TRACE_TYPE_PARTIAL_LINE; 324 return TRACE_TYPE_PARTIAL_LINE;
@@ -294,12 +336,14 @@ kmemtrace_print_free_user(struct trace_iterator *iter,
294 336
295/* The two other following provide a more minimalistic output */ 337/* The two other following provide a more minimalistic output */
296static enum print_line_t 338static enum print_line_t
297kmemtrace_print_alloc_compress(struct trace_iterator *iter, 339kmemtrace_print_alloc_compress(struct trace_iterator *iter)
298 struct kmemtrace_alloc_entry *entry)
299{ 340{
341 struct kmemtrace_alloc_entry *entry;
300 struct trace_seq *s = &iter->seq; 342 struct trace_seq *s = &iter->seq;
301 int ret; 343 int ret;
302 344
345 trace_assign_type(entry, iter->ent);
346
303 /* Alloc entry */ 347 /* Alloc entry */
304 ret = trace_seq_printf(s, " + "); 348 ret = trace_seq_printf(s, " + ");
305 if (!ret) 349 if (!ret)
@@ -345,29 +389,24 @@ kmemtrace_print_alloc_compress(struct trace_iterator *iter,
345 if (!ret) 389 if (!ret)
346 return TRACE_TYPE_PARTIAL_LINE; 390 return TRACE_TYPE_PARTIAL_LINE;
347 391
348 /* Node */ 392 /* Node and call site*/
349 ret = trace_seq_printf(s, "%4d ", entry->node); 393 ret = trace_seq_printf(s, "%4d %pf\n", entry->node,
350 if (!ret) 394 (void *)entry->call_site);
351 return TRACE_TYPE_PARTIAL_LINE;
352
353 /* Call site */
354 ret = seq_print_ip_sym(s, entry->call_site, 0);
355 if (!ret) 395 if (!ret)
356 return TRACE_TYPE_PARTIAL_LINE; 396 return TRACE_TYPE_PARTIAL_LINE;
357 397
358 if (!trace_seq_printf(s, "\n"))
359 return TRACE_TYPE_PARTIAL_LINE;
360
361 return TRACE_TYPE_HANDLED; 398 return TRACE_TYPE_HANDLED;
362} 399}
363 400
364static enum print_line_t 401static enum print_line_t
365kmemtrace_print_free_compress(struct trace_iterator *iter, 402kmemtrace_print_free_compress(struct trace_iterator *iter)
366 struct kmemtrace_free_entry *entry)
367{ 403{
404 struct kmemtrace_free_entry *entry;
368 struct trace_seq *s = &iter->seq; 405 struct trace_seq *s = &iter->seq;
369 int ret; 406 int ret;
370 407
408 trace_assign_type(entry, iter->ent);
409
371 /* Free entry */ 410 /* Free entry */
372 ret = trace_seq_printf(s, " - "); 411 ret = trace_seq_printf(s, " - ");
373 if (!ret) 412 if (!ret)
@@ -401,19 +440,11 @@ kmemtrace_print_free_compress(struct trace_iterator *iter,
401 if (!ret) 440 if (!ret)
402 return TRACE_TYPE_PARTIAL_LINE; 441 return TRACE_TYPE_PARTIAL_LINE;
403 442
404 /* Skip node */ 443 /* Skip node and print call site*/
405 ret = trace_seq_printf(s, " "); 444 ret = trace_seq_printf(s, " %pf\n", (void *)entry->call_site);
406 if (!ret) 445 if (!ret)
407 return TRACE_TYPE_PARTIAL_LINE; 446 return TRACE_TYPE_PARTIAL_LINE;
408 447
409 /* Call site */
410 ret = seq_print_ip_sym(s, entry->call_site, 0);
411 if (!ret)
412 return TRACE_TYPE_PARTIAL_LINE;
413
414 if (!trace_seq_printf(s, "\n"))
415 return TRACE_TYPE_PARTIAL_LINE;
416
417 return TRACE_TYPE_HANDLED; 448 return TRACE_TYPE_HANDLED;
418} 449}
419 450
@@ -421,32 +452,31 @@ static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
421{ 452{
422 struct trace_entry *entry = iter->ent; 453 struct trace_entry *entry = iter->ent;
423 454
424 switch (entry->type) { 455 if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
425 case TRACE_KMEM_ALLOC: { 456 return TRACE_TYPE_UNHANDLED;
426 struct kmemtrace_alloc_entry *field;
427
428 trace_assign_type(field, entry);
429 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
430 return kmemtrace_print_alloc_compress(iter, field);
431 else
432 return kmemtrace_print_alloc_user(iter, field);
433 }
434
435 case TRACE_KMEM_FREE: {
436 struct kmemtrace_free_entry *field;
437
438 trace_assign_type(field, entry);
439 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
440 return kmemtrace_print_free_compress(iter, field);
441 else
442 return kmemtrace_print_free_user(iter, field);
443 }
444 457
458 switch (entry->type) {
459 case TRACE_KMEM_ALLOC:
460 return kmemtrace_print_alloc_compress(iter);
461 case TRACE_KMEM_FREE:
462 return kmemtrace_print_free_compress(iter);
445 default: 463 default:
446 return TRACE_TYPE_UNHANDLED; 464 return TRACE_TYPE_UNHANDLED;
447 } 465 }
448} 466}
449 467
468static struct trace_event kmem_trace_alloc = {
469 .type = TRACE_KMEM_ALLOC,
470 .trace = kmemtrace_print_alloc,
471 .binary = kmemtrace_print_alloc_user,
472};
473
474static struct trace_event kmem_trace_free = {
475 .type = TRACE_KMEM_FREE,
476 .trace = kmemtrace_print_free,
477 .binary = kmemtrace_print_free_user,
478};
479
450static struct tracer kmem_tracer __read_mostly = { 480static struct tracer kmem_tracer __read_mostly = {
451 .name = "kmemtrace", 481 .name = "kmemtrace",
452 .init = kmem_trace_init, 482 .init = kmem_trace_init,
@@ -463,6 +493,21 @@ void kmemtrace_init(void)
463 493
464static int __init init_kmem_tracer(void) 494static int __init init_kmem_tracer(void)
465{ 495{
466 return register_tracer(&kmem_tracer); 496 if (!register_ftrace_event(&kmem_trace_alloc)) {
497 pr_warning("Warning: could not register kmem events\n");
498 return 1;
499 }
500
501 if (!register_ftrace_event(&kmem_trace_free)) {
502 pr_warning("Warning: could not register kmem events\n");
503 return 1;
504 }
505
506 if (!register_tracer(&kmem_tracer)) {
507 pr_warning("Warning: could not register the kmem tracer\n");
508 return 1;
509 }
510
511 return 0;
467} 512}
468device_initcall(init_kmem_tracer); 513device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a330513d96ce..da2c59d8f486 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -322,6 +322,14 @@ struct buffer_data_page {
322 unsigned char data[]; /* data of buffer page */ 322 unsigned char data[]; /* data of buffer page */
323}; 323};
324 324
325/*
326 * Note, the buffer_page list must be first. The buffer pages
327 * are allocated in cache lines, which means that each buffer
328 * page will be at the beginning of a cache line, and thus
329 * the least significant bits will be zero. We use this to
330 * add flags in the list struct pointers, to make the ring buffer
331 * lockless.
332 */
325struct buffer_page { 333struct buffer_page {
326 struct list_head list; /* list of buffer pages */ 334 struct list_head list; /* list of buffer pages */
327 local_t write; /* index for next write */ 335 local_t write; /* index for next write */
@@ -330,6 +338,21 @@ struct buffer_page {
330 struct buffer_data_page *page; /* Actual data page */ 338 struct buffer_data_page *page; /* Actual data page */
331}; 339};
332 340
341/*
342 * The buffer page counters, write and entries, must be reset
343 * atomically when crossing page boundaries. To synchronize this
344 * update, two counters are inserted into the number. One is
345 * the actual counter for the write position or count on the page.
346 *
347 * The other is a counter of updaters. Before an update happens
348 * the update partition of the counter is incremented. This will
349 * allow the updater to update the counter atomically.
350 *
351 * The counter is 20 bits, and the state data is 12.
352 */
353#define RB_WRITE_MASK 0xfffff
354#define RB_WRITE_INTCNT (1 << 20)
355
333static void rb_init_page(struct buffer_data_page *bpage) 356static void rb_init_page(struct buffer_data_page *bpage)
334{ 357{
335 local_set(&bpage->commit, 0); 358 local_set(&bpage->commit, 0);
@@ -403,21 +426,20 @@ int ring_buffer_print_page_header(struct trace_seq *s)
403struct ring_buffer_per_cpu { 426struct ring_buffer_per_cpu {
404 int cpu; 427 int cpu;
405 struct ring_buffer *buffer; 428 struct ring_buffer *buffer;
406 spinlock_t reader_lock; /* serialize readers */ 429 spinlock_t reader_lock; /* serialize readers */
407 raw_spinlock_t lock; 430 raw_spinlock_t lock;
408 struct lock_class_key lock_key; 431 struct lock_class_key lock_key;
409 struct list_head pages; 432 struct list_head *pages;
410 struct buffer_page *head_page; /* read from head */ 433 struct buffer_page *head_page; /* read from head */
411 struct buffer_page *tail_page; /* write to tail */ 434 struct buffer_page *tail_page; /* write to tail */
412 struct buffer_page *commit_page; /* committed pages */ 435 struct buffer_page *commit_page; /* committed pages */
413 struct buffer_page *reader_page; 436 struct buffer_page *reader_page;
414 unsigned long nmi_dropped; 437 local_t commit_overrun;
415 unsigned long commit_overrun; 438 local_t overrun;
416 unsigned long overrun;
417 unsigned long read;
418 local_t entries; 439 local_t entries;
419 local_t committing; 440 local_t committing;
420 local_t commits; 441 local_t commits;
442 unsigned long read;
421 u64 write_stamp; 443 u64 write_stamp;
422 u64 read_stamp; 444 u64 read_stamp;
423 atomic_t record_disabled; 445 atomic_t record_disabled;
@@ -489,6 +511,390 @@ void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
489} 511}
490EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); 512EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
491 513
514/*
515 * Making the ring buffer lockless makes things tricky.
516 * Although writes only happen on the CPU that they are on,
517 * and they only need to worry about interrupts. Reads can
518 * happen on any CPU.
519 *
520 * The reader page is always off the ring buffer, but when the
521 * reader finishes with a page, it needs to swap its page with
522 * a new one from the buffer. The reader needs to take from
523 * the head (writes go to the tail). But if a writer is in overwrite
524 * mode and wraps, it must push the head page forward.
525 *
526 * Here lies the problem.
527 *
528 * The reader must be careful to replace only the head page, and
529 * not another one. As described at the top of the file in the
530 * ASCII art, the reader sets its old page to point to the next
531 * page after head. It then sets the page after head to point to
532 * the old reader page. But if the writer moves the head page
533 * during this operation, the reader could end up with the tail.
534 *
535 * We use cmpxchg to help prevent this race. We also do something
536 * special with the page before head. We set the LSB to 1.
537 *
538 * When the writer must push the page forward, it will clear the
539 * bit that points to the head page, move the head, and then set
540 * the bit that points to the new head page.
541 *
542 * We also don't want an interrupt coming in and moving the head
543 * page on another writer. Thus we use the second LSB to catch
544 * that too. Thus:
545 *
546 * head->list->prev->next bit 1 bit 0
547 * ------- -------
548 * Normal page 0 0
549 * Points to head page 0 1
550 * New head page 1 0
551 *
552 * Note we can not trust the prev pointer of the head page, because:
553 *
554 * +----+ +-----+ +-----+
555 * | |------>| T |---X--->| N |
556 * | |<------| | | |
557 * +----+ +-----+ +-----+
558 * ^ ^ |
559 * | +-----+ | |
560 * +----------| R |----------+ |
561 * | |<-----------+
562 * +-----+
563 *
564 * Key: ---X--> HEAD flag set in pointer
565 * T Tail page
566 * R Reader page
567 * N Next page
568 *
569 * (see __rb_reserve_next() to see where this happens)
570 *
571 * What the above shows is that the reader just swapped out
572 * the reader page with a page in the buffer, but before it
573 * could make the new header point back to the new page added
574 * it was preempted by a writer. The writer moved forward onto
575 * the new page added by the reader and is about to move forward
576 * again.
577 *
578 * You can see, it is legitimate for the previous pointer of
579 * the head (or any page) not to point back to itself. But only
580 * temporarially.
581 */
582
583#define RB_PAGE_NORMAL 0UL
584#define RB_PAGE_HEAD 1UL
585#define RB_PAGE_UPDATE 2UL
586
587
588#define RB_FLAG_MASK 3UL
589
590/* PAGE_MOVED is not part of the mask */
591#define RB_PAGE_MOVED 4UL
592
593/*
594 * rb_list_head - remove any bit
595 */
596static struct list_head *rb_list_head(struct list_head *list)
597{
598 unsigned long val = (unsigned long)list;
599
600 return (struct list_head *)(val & ~RB_FLAG_MASK);
601}
602
603/*
604 * rb_is_head_page - test if the give page is the head page
605 *
606 * Because the reader may move the head_page pointer, we can
607 * not trust what the head page is (it may be pointing to
608 * the reader page). But if the next page is a header page,
609 * its flags will be non zero.
610 */
611static int inline
612rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
613 struct buffer_page *page, struct list_head *list)
614{
615 unsigned long val;
616
617 val = (unsigned long)list->next;
618
619 if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list)
620 return RB_PAGE_MOVED;
621
622 return val & RB_FLAG_MASK;
623}
624
625/*
626 * rb_is_reader_page
627 *
628 * The unique thing about the reader page, is that, if the
629 * writer is ever on it, the previous pointer never points
630 * back to the reader page.
631 */
632static int rb_is_reader_page(struct buffer_page *page)
633{
634 struct list_head *list = page->list.prev;
635
636 return rb_list_head(list->next) != &page->list;
637}
638
639/*
640 * rb_set_list_to_head - set a list_head to be pointing to head.
641 */
642static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer,
643 struct list_head *list)
644{
645 unsigned long *ptr;
646
647 ptr = (unsigned long *)&list->next;
648 *ptr |= RB_PAGE_HEAD;
649 *ptr &= ~RB_PAGE_UPDATE;
650}
651
652/*
653 * rb_head_page_activate - sets up head page
654 */
655static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
656{
657 struct buffer_page *head;
658
659 head = cpu_buffer->head_page;
660 if (!head)
661 return;
662
663 /*
664 * Set the previous list pointer to have the HEAD flag.
665 */
666 rb_set_list_to_head(cpu_buffer, head->list.prev);
667}
668
669static void rb_list_head_clear(struct list_head *list)
670{
671 unsigned long *ptr = (unsigned long *)&list->next;
672
673 *ptr &= ~RB_FLAG_MASK;
674}
675
676/*
677 * rb_head_page_dactivate - clears head page ptr (for free list)
678 */
679static void
680rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
681{
682 struct list_head *hd;
683
684 /* Go through the whole list and clear any pointers found. */
685 rb_list_head_clear(cpu_buffer->pages);
686
687 list_for_each(hd, cpu_buffer->pages)
688 rb_list_head_clear(hd);
689}
690
691static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
692 struct buffer_page *head,
693 struct buffer_page *prev,
694 int old_flag, int new_flag)
695{
696 struct list_head *list;
697 unsigned long val = (unsigned long)&head->list;
698 unsigned long ret;
699
700 list = &prev->list;
701
702 val &= ~RB_FLAG_MASK;
703
704 ret = (unsigned long)cmpxchg(&list->next,
705 val | old_flag, val | new_flag);
706
707 /* check if the reader took the page */
708 if ((ret & ~RB_FLAG_MASK) != val)
709 return RB_PAGE_MOVED;
710
711 return ret & RB_FLAG_MASK;
712}
713
714static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer,
715 struct buffer_page *head,
716 struct buffer_page *prev,
717 int old_flag)
718{
719 return rb_head_page_set(cpu_buffer, head, prev,
720 old_flag, RB_PAGE_UPDATE);
721}
722
723static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer,
724 struct buffer_page *head,
725 struct buffer_page *prev,
726 int old_flag)
727{
728 return rb_head_page_set(cpu_buffer, head, prev,
729 old_flag, RB_PAGE_HEAD);
730}
731
732static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer,
733 struct buffer_page *head,
734 struct buffer_page *prev,
735 int old_flag)
736{
737 return rb_head_page_set(cpu_buffer, head, prev,
738 old_flag, RB_PAGE_NORMAL);
739}
740
741static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
742 struct buffer_page **bpage)
743{
744 struct list_head *p = rb_list_head((*bpage)->list.next);
745
746 *bpage = list_entry(p, struct buffer_page, list);
747}
748
749static struct buffer_page *
750rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
751{
752 struct buffer_page *head;
753 struct buffer_page *page;
754 struct list_head *list;
755 int i;
756
757 if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page))
758 return NULL;
759
760 /* sanity check */
761 list = cpu_buffer->pages;
762 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list))
763 return NULL;
764
765 page = head = cpu_buffer->head_page;
766 /*
767 * It is possible that the writer moves the header behind
768 * where we started, and we miss in one loop.
769 * A second loop should grab the header, but we'll do
770 * three loops just because I'm paranoid.
771 */
772 for (i = 0; i < 3; i++) {
773 do {
774 if (rb_is_head_page(cpu_buffer, page, page->list.prev)) {
775 cpu_buffer->head_page = page;
776 return page;
777 }
778 rb_inc_page(cpu_buffer, &page);
779 } while (page != head);
780 }
781
782 RB_WARN_ON(cpu_buffer, 1);
783
784 return NULL;
785}
786
787static int rb_head_page_replace(struct buffer_page *old,
788 struct buffer_page *new)
789{
790 unsigned long *ptr = (unsigned long *)&old->list.prev->next;
791 unsigned long val;
792 unsigned long ret;
793
794 val = *ptr & ~RB_FLAG_MASK;
795 val |= RB_PAGE_HEAD;
796
797 ret = cmpxchg(ptr, val, &new->list);
798
799 return ret == val;
800}
801
802/*
803 * rb_tail_page_update - move the tail page forward
804 *
805 * Returns 1 if moved tail page, 0 if someone else did.
806 */
807static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
808 struct buffer_page *tail_page,
809 struct buffer_page *next_page)
810{
811 struct buffer_page *old_tail;
812 unsigned long old_entries;
813 unsigned long old_write;
814 int ret = 0;
815
816 /*
817 * The tail page now needs to be moved forward.
818 *
819 * We need to reset the tail page, but without messing
820 * with possible erasing of data brought in by interrupts
821 * that have moved the tail page and are currently on it.
822 *
823 * We add a counter to the write field to denote this.
824 */
825 old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
826 old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
827
828 /*
829 * Just make sure we have seen our old_write and synchronize
830 * with any interrupts that come in.
831 */
832 barrier();
833
834 /*
835 * If the tail page is still the same as what we think
836 * it is, then it is up to us to update the tail
837 * pointer.
838 */
839 if (tail_page == cpu_buffer->tail_page) {
840 /* Zero the write counter */
841 unsigned long val = old_write & ~RB_WRITE_MASK;
842 unsigned long eval = old_entries & ~RB_WRITE_MASK;
843
844 /*
845 * This will only succeed if an interrupt did
846 * not come in and change it. In which case, we
847 * do not want to modify it.
848 *
849 * We add (void) to let the compiler know that we do not care
850 * about the return value of these functions. We use the
851 * cmpxchg to only update if an interrupt did not already
852 * do it for us. If the cmpxchg fails, we don't care.
853 */
854 (void)local_cmpxchg(&next_page->write, old_write, val);
855 (void)local_cmpxchg(&next_page->entries, old_entries, eval);
856
857 /*
858 * No need to worry about races with clearing out the commit.
859 * it only can increment when a commit takes place. But that
860 * only happens in the outer most nested commit.
861 */
862 local_set(&next_page->page->commit, 0);
863
864 old_tail = cmpxchg(&cpu_buffer->tail_page,
865 tail_page, next_page);
866
867 if (old_tail == tail_page)
868 ret = 1;
869 }
870
871 return ret;
872}
873
874static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
875 struct buffer_page *bpage)
876{
877 unsigned long val = (unsigned long)bpage;
878
879 if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
880 return 1;
881
882 return 0;
883}
884
885/**
886 * rb_check_list - make sure a pointer to a list has the last bits zero
887 */
888static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
889 struct list_head *list)
890{
891 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
892 return 1;
893 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
894 return 1;
895 return 0;
896}
897
492/** 898/**
493 * check_pages - integrity check of buffer pages 899 * check_pages - integrity check of buffer pages
494 * @cpu_buffer: CPU buffer with pages to test 900 * @cpu_buffer: CPU buffer with pages to test
@@ -498,14 +904,19 @@ EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
498 */ 904 */
499static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) 905static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
500{ 906{
501 struct list_head *head = &cpu_buffer->pages; 907 struct list_head *head = cpu_buffer->pages;
502 struct buffer_page *bpage, *tmp; 908 struct buffer_page *bpage, *tmp;
503 909
910 rb_head_page_deactivate(cpu_buffer);
911
504 if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) 912 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
505 return -1; 913 return -1;
506 if (RB_WARN_ON(cpu_buffer, head->prev->next != head)) 914 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
507 return -1; 915 return -1;
508 916
917 if (rb_check_list(cpu_buffer, head))
918 return -1;
919
509 list_for_each_entry_safe(bpage, tmp, head, list) { 920 list_for_each_entry_safe(bpage, tmp, head, list) {
510 if (RB_WARN_ON(cpu_buffer, 921 if (RB_WARN_ON(cpu_buffer,
511 bpage->list.next->prev != &bpage->list)) 922 bpage->list.next->prev != &bpage->list))
@@ -513,25 +924,33 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
513 if (RB_WARN_ON(cpu_buffer, 924 if (RB_WARN_ON(cpu_buffer,
514 bpage->list.prev->next != &bpage->list)) 925 bpage->list.prev->next != &bpage->list))
515 return -1; 926 return -1;
927 if (rb_check_list(cpu_buffer, &bpage->list))
928 return -1;
516 } 929 }
517 930
931 rb_head_page_activate(cpu_buffer);
932
518 return 0; 933 return 0;
519} 934}
520 935
521static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, 936static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
522 unsigned nr_pages) 937 unsigned nr_pages)
523{ 938{
524 struct list_head *head = &cpu_buffer->pages;
525 struct buffer_page *bpage, *tmp; 939 struct buffer_page *bpage, *tmp;
526 unsigned long addr; 940 unsigned long addr;
527 LIST_HEAD(pages); 941 LIST_HEAD(pages);
528 unsigned i; 942 unsigned i;
529 943
944 WARN_ON(!nr_pages);
945
530 for (i = 0; i < nr_pages; i++) { 946 for (i = 0; i < nr_pages; i++) {
531 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 947 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
532 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); 948 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
533 if (!bpage) 949 if (!bpage)
534 goto free_pages; 950 goto free_pages;
951
952 rb_check_bpage(cpu_buffer, bpage);
953
535 list_add(&bpage->list, &pages); 954 list_add(&bpage->list, &pages);
536 955
537 addr = __get_free_page(GFP_KERNEL); 956 addr = __get_free_page(GFP_KERNEL);
@@ -541,7 +960,13 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
541 rb_init_page(bpage->page); 960 rb_init_page(bpage->page);
542 } 961 }
543 962
544 list_splice(&pages, head); 963 /*
964 * The ring buffer page list is a circular list that does not
965 * start and end with a list head. All page list items point to
966 * other pages.
967 */
968 cpu_buffer->pages = pages.next;
969 list_del(&pages);
545 970
546 rb_check_pages(cpu_buffer); 971 rb_check_pages(cpu_buffer);
547 972
@@ -573,13 +998,14 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
573 spin_lock_init(&cpu_buffer->reader_lock); 998 spin_lock_init(&cpu_buffer->reader_lock);
574 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); 999 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
575 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1000 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
576 INIT_LIST_HEAD(&cpu_buffer->pages);
577 1001
578 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1002 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
579 GFP_KERNEL, cpu_to_node(cpu)); 1003 GFP_KERNEL, cpu_to_node(cpu));
580 if (!bpage) 1004 if (!bpage)
581 goto fail_free_buffer; 1005 goto fail_free_buffer;
582 1006
1007 rb_check_bpage(cpu_buffer, bpage);
1008
583 cpu_buffer->reader_page = bpage; 1009 cpu_buffer->reader_page = bpage;
584 addr = __get_free_page(GFP_KERNEL); 1010 addr = __get_free_page(GFP_KERNEL);
585 if (!addr) 1011 if (!addr)
@@ -594,9 +1020,11 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
594 goto fail_free_reader; 1020 goto fail_free_reader;
595 1021
596 cpu_buffer->head_page 1022 cpu_buffer->head_page
597 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 1023 = list_entry(cpu_buffer->pages, struct buffer_page, list);
598 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; 1024 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
599 1025
1026 rb_head_page_activate(cpu_buffer);
1027
600 return cpu_buffer; 1028 return cpu_buffer;
601 1029
602 fail_free_reader: 1030 fail_free_reader:
@@ -609,15 +1037,22 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
609 1037
610static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) 1038static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
611{ 1039{
612 struct list_head *head = &cpu_buffer->pages; 1040 struct list_head *head = cpu_buffer->pages;
613 struct buffer_page *bpage, *tmp; 1041 struct buffer_page *bpage, *tmp;
614 1042
615 free_buffer_page(cpu_buffer->reader_page); 1043 free_buffer_page(cpu_buffer->reader_page);
616 1044
617 list_for_each_entry_safe(bpage, tmp, head, list) { 1045 rb_head_page_deactivate(cpu_buffer);
618 list_del_init(&bpage->list); 1046
1047 if (head) {
1048 list_for_each_entry_safe(bpage, tmp, head, list) {
1049 list_del_init(&bpage->list);
1050 free_buffer_page(bpage);
1051 }
1052 bpage = list_entry(head, struct buffer_page, list);
619 free_buffer_page(bpage); 1053 free_buffer_page(bpage);
620 } 1054 }
1055
621 kfree(cpu_buffer); 1056 kfree(cpu_buffer);
622} 1057}
623 1058
@@ -760,15 +1195,17 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
760 atomic_inc(&cpu_buffer->record_disabled); 1195 atomic_inc(&cpu_buffer->record_disabled);
761 synchronize_sched(); 1196 synchronize_sched();
762 1197
1198 rb_head_page_deactivate(cpu_buffer);
1199
763 for (i = 0; i < nr_pages; i++) { 1200 for (i = 0; i < nr_pages; i++) {
764 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) 1201 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
765 return; 1202 return;
766 p = cpu_buffer->pages.next; 1203 p = cpu_buffer->pages->next;
767 bpage = list_entry(p, struct buffer_page, list); 1204 bpage = list_entry(p, struct buffer_page, list);
768 list_del_init(&bpage->list); 1205 list_del_init(&bpage->list);
769 free_buffer_page(bpage); 1206 free_buffer_page(bpage);
770 } 1207 }
771 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages))) 1208 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
772 return; 1209 return;
773 1210
774 rb_reset_cpu(cpu_buffer); 1211 rb_reset_cpu(cpu_buffer);
@@ -790,15 +1227,19 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
790 atomic_inc(&cpu_buffer->record_disabled); 1227 atomic_inc(&cpu_buffer->record_disabled);
791 synchronize_sched(); 1228 synchronize_sched();
792 1229
1230 spin_lock_irq(&cpu_buffer->reader_lock);
1231 rb_head_page_deactivate(cpu_buffer);
1232
793 for (i = 0; i < nr_pages; i++) { 1233 for (i = 0; i < nr_pages; i++) {
794 if (RB_WARN_ON(cpu_buffer, list_empty(pages))) 1234 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
795 return; 1235 return;
796 p = pages->next; 1236 p = pages->next;
797 bpage = list_entry(p, struct buffer_page, list); 1237 bpage = list_entry(p, struct buffer_page, list);
798 list_del_init(&bpage->list); 1238 list_del_init(&bpage->list);
799 list_add_tail(&bpage->list, &cpu_buffer->pages); 1239 list_add_tail(&bpage->list, cpu_buffer->pages);
800 } 1240 }
801 rb_reset_cpu(cpu_buffer); 1241 rb_reset_cpu(cpu_buffer);
1242 spin_unlock_irq(&cpu_buffer->reader_lock);
802 1243
803 rb_check_pages(cpu_buffer); 1244 rb_check_pages(cpu_buffer);
804 1245
@@ -949,21 +1390,14 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
949} 1390}
950 1391
951static inline struct ring_buffer_event * 1392static inline struct ring_buffer_event *
952rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
953{
954 return __rb_page_index(cpu_buffer->head_page,
955 cpu_buffer->head_page->read);
956}
957
958static inline struct ring_buffer_event *
959rb_iter_head_event(struct ring_buffer_iter *iter) 1393rb_iter_head_event(struct ring_buffer_iter *iter)
960{ 1394{
961 return __rb_page_index(iter->head_page, iter->head); 1395 return __rb_page_index(iter->head_page, iter->head);
962} 1396}
963 1397
964static inline unsigned rb_page_write(struct buffer_page *bpage) 1398static inline unsigned long rb_page_write(struct buffer_page *bpage)
965{ 1399{
966 return local_read(&bpage->write); 1400 return local_read(&bpage->write) & RB_WRITE_MASK;
967} 1401}
968 1402
969static inline unsigned rb_page_commit(struct buffer_page *bpage) 1403static inline unsigned rb_page_commit(struct buffer_page *bpage)
@@ -971,6 +1405,11 @@ static inline unsigned rb_page_commit(struct buffer_page *bpage)
971 return local_read(&bpage->page->commit); 1405 return local_read(&bpage->page->commit);
972} 1406}
973 1407
1408static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1409{
1410 return local_read(&bpage->entries) & RB_WRITE_MASK;
1411}
1412
974/* Size is determined by what has been commited */ 1413/* Size is determined by what has been commited */
975static inline unsigned rb_page_size(struct buffer_page *bpage) 1414static inline unsigned rb_page_size(struct buffer_page *bpage)
976{ 1415{
@@ -983,22 +1422,6 @@ rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
983 return rb_page_commit(cpu_buffer->commit_page); 1422 return rb_page_commit(cpu_buffer->commit_page);
984} 1423}
985 1424
986static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
987{
988 return rb_page_commit(cpu_buffer->head_page);
989}
990
991static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
992 struct buffer_page **bpage)
993{
994 struct list_head *p = (*bpage)->list.next;
995
996 if (p == &cpu_buffer->pages)
997 p = p->next;
998
999 *bpage = list_entry(p, struct buffer_page, list);
1000}
1001
1002static inline unsigned 1425static inline unsigned
1003rb_event_index(struct ring_buffer_event *event) 1426rb_event_index(struct ring_buffer_event *event)
1004{ 1427{
@@ -1024,6 +1447,8 @@ rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
1024static void 1447static void
1025rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) 1448rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1026{ 1449{
1450 unsigned long max_count;
1451
1027 /* 1452 /*
1028 * We only race with interrupts and NMIs on this CPU. 1453 * We only race with interrupts and NMIs on this CPU.
1029 * If we own the commit event, then we can commit 1454 * If we own the commit event, then we can commit
@@ -1033,9 +1458,16 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1033 * assign the commit to the tail. 1458 * assign the commit to the tail.
1034 */ 1459 */
1035 again: 1460 again:
1461 max_count = cpu_buffer->buffer->pages * 100;
1462
1036 while (cpu_buffer->commit_page != cpu_buffer->tail_page) { 1463 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
1037 cpu_buffer->commit_page->page->commit = 1464 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
1038 cpu_buffer->commit_page->write; 1465 return;
1466 if (RB_WARN_ON(cpu_buffer,
1467 rb_is_reader_page(cpu_buffer->tail_page)))
1468 return;
1469 local_set(&cpu_buffer->commit_page->page->commit,
1470 rb_page_write(cpu_buffer->commit_page));
1039 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 1471 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
1040 cpu_buffer->write_stamp = 1472 cpu_buffer->write_stamp =
1041 cpu_buffer->commit_page->page->time_stamp; 1473 cpu_buffer->commit_page->page->time_stamp;
@@ -1044,8 +1476,12 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1044 } 1476 }
1045 while (rb_commit_index(cpu_buffer) != 1477 while (rb_commit_index(cpu_buffer) !=
1046 rb_page_write(cpu_buffer->commit_page)) { 1478 rb_page_write(cpu_buffer->commit_page)) {
1047 cpu_buffer->commit_page->page->commit = 1479
1048 cpu_buffer->commit_page->write; 1480 local_set(&cpu_buffer->commit_page->page->commit,
1481 rb_page_write(cpu_buffer->commit_page));
1482 RB_WARN_ON(cpu_buffer,
1483 local_read(&cpu_buffer->commit_page->page->commit) &
1484 ~RB_WRITE_MASK);
1049 barrier(); 1485 barrier();
1050 } 1486 }
1051 1487
@@ -1078,7 +1514,7 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1078 * to the head page instead of next. 1514 * to the head page instead of next.
1079 */ 1515 */
1080 if (iter->head_page == cpu_buffer->reader_page) 1516 if (iter->head_page == cpu_buffer->reader_page)
1081 iter->head_page = cpu_buffer->head_page; 1517 iter->head_page = rb_set_head_page(cpu_buffer);
1082 else 1518 else
1083 rb_inc_page(cpu_buffer, &iter->head_page); 1519 rb_inc_page(cpu_buffer, &iter->head_page);
1084 1520
@@ -1122,6 +1558,163 @@ rb_update_event(struct ring_buffer_event *event,
1122 } 1558 }
1123} 1559}
1124 1560
1561/*
1562 * rb_handle_head_page - writer hit the head page
1563 *
1564 * Returns: +1 to retry page
1565 * 0 to continue
1566 * -1 on error
1567 */
1568static int
1569rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
1570 struct buffer_page *tail_page,
1571 struct buffer_page *next_page)
1572{
1573 struct buffer_page *new_head;
1574 int entries;
1575 int type;
1576 int ret;
1577
1578 entries = rb_page_entries(next_page);
1579
1580 /*
1581 * The hard part is here. We need to move the head
1582 * forward, and protect against both readers on
1583 * other CPUs and writers coming in via interrupts.
1584 */
1585 type = rb_head_page_set_update(cpu_buffer, next_page, tail_page,
1586 RB_PAGE_HEAD);
1587
1588 /*
1589 * type can be one of four:
1590 * NORMAL - an interrupt already moved it for us
1591 * HEAD - we are the first to get here.
1592 * UPDATE - we are the interrupt interrupting
1593 * a current move.
1594 * MOVED - a reader on another CPU moved the next
1595 * pointer to its reader page. Give up
1596 * and try again.
1597 */
1598
1599 switch (type) {
1600 case RB_PAGE_HEAD:
1601 /*
1602 * We changed the head to UPDATE, thus
1603 * it is our responsibility to update
1604 * the counters.
1605 */
1606 local_add(entries, &cpu_buffer->overrun);
1607
1608 /*
1609 * The entries will be zeroed out when we move the
1610 * tail page.
1611 */
1612
1613 /* still more to do */
1614 break;
1615
1616 case RB_PAGE_UPDATE:
1617 /*
1618 * This is an interrupt that interrupt the
1619 * previous update. Still more to do.
1620 */
1621 break;
1622 case RB_PAGE_NORMAL:
1623 /*
1624 * An interrupt came in before the update
1625 * and processed this for us.
1626 * Nothing left to do.
1627 */
1628 return 1;
1629 case RB_PAGE_MOVED:
1630 /*
1631 * The reader is on another CPU and just did
1632 * a swap with our next_page.
1633 * Try again.
1634 */
1635 return 1;
1636 default:
1637 RB_WARN_ON(cpu_buffer, 1); /* WTF??? */
1638 return -1;
1639 }
1640
1641 /*
1642 * Now that we are here, the old head pointer is
1643 * set to UPDATE. This will keep the reader from
1644 * swapping the head page with the reader page.
1645 * The reader (on another CPU) will spin till
1646 * we are finished.
1647 *
1648 * We just need to protect against interrupts
1649 * doing the job. We will set the next pointer
1650 * to HEAD. After that, we set the old pointer
1651 * to NORMAL, but only if it was HEAD before.
1652 * otherwise we are an interrupt, and only
1653 * want the outer most commit to reset it.
1654 */
1655 new_head = next_page;
1656 rb_inc_page(cpu_buffer, &new_head);
1657
1658 ret = rb_head_page_set_head(cpu_buffer, new_head, next_page,
1659 RB_PAGE_NORMAL);
1660
1661 /*
1662 * Valid returns are:
1663 * HEAD - an interrupt came in and already set it.
1664 * NORMAL - One of two things:
1665 * 1) We really set it.
1666 * 2) A bunch of interrupts came in and moved
1667 * the page forward again.
1668 */
1669 switch (ret) {
1670 case RB_PAGE_HEAD:
1671 case RB_PAGE_NORMAL:
1672 /* OK */
1673 break;
1674 default:
1675 RB_WARN_ON(cpu_buffer, 1);
1676 return -1;
1677 }
1678
1679 /*
1680 * It is possible that an interrupt came in,
1681 * set the head up, then more interrupts came in
1682 * and moved it again. When we get back here,
1683 * the page would have been set to NORMAL but we
1684 * just set it back to HEAD.
1685 *
1686 * How do you detect this? Well, if that happened
1687 * the tail page would have moved.
1688 */
1689 if (ret == RB_PAGE_NORMAL) {
1690 /*
1691 * If the tail had moved passed next, then we need
1692 * to reset the pointer.
1693 */
1694 if (cpu_buffer->tail_page != tail_page &&
1695 cpu_buffer->tail_page != next_page)
1696 rb_head_page_set_normal(cpu_buffer, new_head,
1697 next_page,
1698 RB_PAGE_HEAD);
1699 }
1700
1701 /*
1702 * If this was the outer most commit (the one that
1703 * changed the original pointer from HEAD to UPDATE),
1704 * then it is up to us to reset it to NORMAL.
1705 */
1706 if (type == RB_PAGE_HEAD) {
1707 ret = rb_head_page_set_normal(cpu_buffer, next_page,
1708 tail_page,
1709 RB_PAGE_UPDATE);
1710 if (RB_WARN_ON(cpu_buffer,
1711 ret != RB_PAGE_UPDATE))
1712 return -1;
1713 }
1714
1715 return 0;
1716}
1717
1125static unsigned rb_calculate_event_length(unsigned length) 1718static unsigned rb_calculate_event_length(unsigned length)
1126{ 1719{
1127 struct ring_buffer_event event; /* Used only for sizeof array */ 1720 struct ring_buffer_event event; /* Used only for sizeof array */
@@ -1200,96 +1793,93 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1200 struct buffer_page *commit_page, 1793 struct buffer_page *commit_page,
1201 struct buffer_page *tail_page, u64 *ts) 1794 struct buffer_page *tail_page, u64 *ts)
1202{ 1795{
1203 struct buffer_page *next_page, *head_page, *reader_page;
1204 struct ring_buffer *buffer = cpu_buffer->buffer; 1796 struct ring_buffer *buffer = cpu_buffer->buffer;
1205 bool lock_taken = false; 1797 struct buffer_page *next_page;
1206 unsigned long flags; 1798 int ret;
1207 1799
1208 next_page = tail_page; 1800 next_page = tail_page;
1209 1801
1210 local_irq_save(flags);
1211 /*
1212 * Since the write to the buffer is still not
1213 * fully lockless, we must be careful with NMIs.
1214 * The locks in the writers are taken when a write
1215 * crosses to a new page. The locks protect against
1216 * races with the readers (this will soon be fixed
1217 * with a lockless solution).
1218 *
1219 * Because we can not protect against NMIs, and we
1220 * want to keep traces reentrant, we need to manage
1221 * what happens when we are in an NMI.
1222 *
1223 * NMIs can happen after we take the lock.
1224 * If we are in an NMI, only take the lock
1225 * if it is not already taken. Otherwise
1226 * simply fail.
1227 */
1228 if (unlikely(in_nmi())) {
1229 if (!__raw_spin_trylock(&cpu_buffer->lock)) {
1230 cpu_buffer->nmi_dropped++;
1231 goto out_reset;
1232 }
1233 } else
1234 __raw_spin_lock(&cpu_buffer->lock);
1235
1236 lock_taken = true;
1237
1238 rb_inc_page(cpu_buffer, &next_page); 1802 rb_inc_page(cpu_buffer, &next_page);
1239 1803
1240 head_page = cpu_buffer->head_page;
1241 reader_page = cpu_buffer->reader_page;
1242
1243 /* we grabbed the lock before incrementing */
1244 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
1245 goto out_reset;
1246
1247 /* 1804 /*
1248 * If for some reason, we had an interrupt storm that made 1805 * If for some reason, we had an interrupt storm that made
1249 * it all the way around the buffer, bail, and warn 1806 * it all the way around the buffer, bail, and warn
1250 * about it. 1807 * about it.
1251 */ 1808 */
1252 if (unlikely(next_page == commit_page)) { 1809 if (unlikely(next_page == commit_page)) {
1253 cpu_buffer->commit_overrun++; 1810 local_inc(&cpu_buffer->commit_overrun);
1254 goto out_reset; 1811 goto out_reset;
1255 } 1812 }
1256 1813
1257 if (next_page == head_page) { 1814 /*
1258 if (!(buffer->flags & RB_FL_OVERWRITE)) 1815 * This is where the fun begins!
1259 goto out_reset; 1816 *
1260 1817 * We are fighting against races between a reader that
1261 /* tail_page has not moved yet? */ 1818 * could be on another CPU trying to swap its reader
1262 if (tail_page == cpu_buffer->tail_page) { 1819 * page with the buffer head.
1263 /* count overflows */ 1820 *
1264 cpu_buffer->overrun += 1821 * We are also fighting against interrupts coming in and
1265 local_read(&head_page->entries); 1822 * moving the head or tail on us as well.
1823 *
1824 * If the next page is the head page then we have filled
1825 * the buffer, unless the commit page is still on the
1826 * reader page.
1827 */
1828 if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) {
1266 1829
1267 rb_inc_page(cpu_buffer, &head_page); 1830 /*
1268 cpu_buffer->head_page = head_page; 1831 * If the commit is not on the reader page, then
1269 cpu_buffer->head_page->read = 0; 1832 * move the header page.
1833 */
1834 if (!rb_is_reader_page(cpu_buffer->commit_page)) {
1835 /*
1836 * If we are not in overwrite mode,
1837 * this is easy, just stop here.
1838 */
1839 if (!(buffer->flags & RB_FL_OVERWRITE))
1840 goto out_reset;
1841
1842 ret = rb_handle_head_page(cpu_buffer,
1843 tail_page,
1844 next_page);
1845 if (ret < 0)
1846 goto out_reset;
1847 if (ret)
1848 goto out_again;
1849 } else {
1850 /*
1851 * We need to be careful here too. The
1852 * commit page could still be on the reader
1853 * page. We could have a small buffer, and
1854 * have filled up the buffer with events
1855 * from interrupts and such, and wrapped.
1856 *
1857 * Note, if the tail page is also the on the
1858 * reader_page, we let it move out.
1859 */
1860 if (unlikely((cpu_buffer->commit_page !=
1861 cpu_buffer->tail_page) &&
1862 (cpu_buffer->commit_page ==
1863 cpu_buffer->reader_page))) {
1864 local_inc(&cpu_buffer->commit_overrun);
1865 goto out_reset;
1866 }
1270 } 1867 }
1271 } 1868 }
1272 1869
1273 /* 1870 ret = rb_tail_page_update(cpu_buffer, tail_page, next_page);
1274 * If the tail page is still the same as what we think 1871 if (ret) {
1275 * it is, then it is up to us to update the tail 1872 /*
1276 * pointer. 1873 * Nested commits always have zero deltas, so
1277 */ 1874 * just reread the time stamp
1278 if (tail_page == cpu_buffer->tail_page) { 1875 */
1279 local_set(&next_page->write, 0);
1280 local_set(&next_page->entries, 0);
1281 local_set(&next_page->page->commit, 0);
1282 cpu_buffer->tail_page = next_page;
1283
1284 /* reread the time stamp */
1285 *ts = rb_time_stamp(buffer, cpu_buffer->cpu); 1876 *ts = rb_time_stamp(buffer, cpu_buffer->cpu);
1286 cpu_buffer->tail_page->page->time_stamp = *ts; 1877 next_page->page->time_stamp = *ts;
1287 } 1878 }
1288 1879
1289 rb_reset_tail(cpu_buffer, tail_page, tail, length); 1880 out_again:
1290 1881
1291 __raw_spin_unlock(&cpu_buffer->lock); 1882 rb_reset_tail(cpu_buffer, tail_page, tail, length);
1292 local_irq_restore(flags);
1293 1883
1294 /* fail and let the caller try again */ 1884 /* fail and let the caller try again */
1295 return ERR_PTR(-EAGAIN); 1885 return ERR_PTR(-EAGAIN);
@@ -1298,9 +1888,6 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1298 /* reset write */ 1888 /* reset write */
1299 rb_reset_tail(cpu_buffer, tail_page, tail, length); 1889 rb_reset_tail(cpu_buffer, tail_page, tail, length);
1300 1890
1301 if (likely(lock_taken))
1302 __raw_spin_unlock(&cpu_buffer->lock);
1303 local_irq_restore(flags);
1304 return NULL; 1891 return NULL;
1305} 1892}
1306 1893
@@ -1317,6 +1904,9 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1317 barrier(); 1904 barrier();
1318 tail_page = cpu_buffer->tail_page; 1905 tail_page = cpu_buffer->tail_page;
1319 write = local_add_return(length, &tail_page->write); 1906 write = local_add_return(length, &tail_page->write);
1907
1908 /* set write to only the index of the write */
1909 write &= RB_WRITE_MASK;
1320 tail = write - length; 1910 tail = write - length;
1321 1911
1322 /* See if we shot pass the end of this buffer page */ 1912 /* See if we shot pass the end of this buffer page */
@@ -1361,12 +1951,16 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
1361 bpage = cpu_buffer->tail_page; 1951 bpage = cpu_buffer->tail_page;
1362 1952
1363 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { 1953 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
1954 unsigned long write_mask =
1955 local_read(&bpage->write) & ~RB_WRITE_MASK;
1364 /* 1956 /*
1365 * This is on the tail page. It is possible that 1957 * This is on the tail page. It is possible that
1366 * a write could come in and move the tail page 1958 * a write could come in and move the tail page
1367 * and write to the next page. That is fine 1959 * and write to the next page. That is fine
1368 * because we just shorten what is on this page. 1960 * because we just shorten what is on this page.
1369 */ 1961 */
1962 old_index += write_mask;
1963 new_index += write_mask;
1370 index = local_cmpxchg(&bpage->write, old_index, new_index); 1964 index = local_cmpxchg(&bpage->write, old_index, new_index);
1371 if (index == old_index) 1965 if (index == old_index)
1372 return 1; 1966 return 1;
@@ -1875,9 +2469,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_write);
1875static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) 2469static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
1876{ 2470{
1877 struct buffer_page *reader = cpu_buffer->reader_page; 2471 struct buffer_page *reader = cpu_buffer->reader_page;
1878 struct buffer_page *head = cpu_buffer->head_page; 2472 struct buffer_page *head = rb_set_head_page(cpu_buffer);
1879 struct buffer_page *commit = cpu_buffer->commit_page; 2473 struct buffer_page *commit = cpu_buffer->commit_page;
1880 2474
2475 /* In case of error, head will be NULL */
2476 if (unlikely(!head))
2477 return 1;
2478
1881 return reader->read == rb_page_commit(reader) && 2479 return reader->read == rb_page_commit(reader) &&
1882 (commit == reader || 2480 (commit == reader ||
1883 (commit == head && 2481 (commit == head &&
@@ -1968,7 +2566,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
1968 return 0; 2566 return 0;
1969 2567
1970 cpu_buffer = buffer->buffers[cpu]; 2568 cpu_buffer = buffer->buffers[cpu];
1971 ret = (local_read(&cpu_buffer->entries) - cpu_buffer->overrun) 2569 ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun))
1972 - cpu_buffer->read; 2570 - cpu_buffer->read;
1973 2571
1974 return ret; 2572 return ret;
@@ -1989,33 +2587,13 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
1989 return 0; 2587 return 0;
1990 2588
1991 cpu_buffer = buffer->buffers[cpu]; 2589 cpu_buffer = buffer->buffers[cpu];
1992 ret = cpu_buffer->overrun; 2590 ret = local_read(&cpu_buffer->overrun);
1993 2591
1994 return ret; 2592 return ret;
1995} 2593}
1996EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); 2594EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
1997 2595
1998/** 2596/**
1999 * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped
2000 * @buffer: The ring buffer
2001 * @cpu: The per CPU buffer to get the number of overruns from
2002 */
2003unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu)
2004{
2005 struct ring_buffer_per_cpu *cpu_buffer;
2006 unsigned long ret;
2007
2008 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2009 return 0;
2010
2011 cpu_buffer = buffer->buffers[cpu];
2012 ret = cpu_buffer->nmi_dropped;
2013
2014 return ret;
2015}
2016EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu);
2017
2018/**
2019 * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits 2597 * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
2020 * @buffer: The ring buffer 2598 * @buffer: The ring buffer
2021 * @cpu: The per CPU buffer to get the number of overruns from 2599 * @cpu: The per CPU buffer to get the number of overruns from
@@ -2030,7 +2608,7 @@ ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
2030 return 0; 2608 return 0;
2031 2609
2032 cpu_buffer = buffer->buffers[cpu]; 2610 cpu_buffer = buffer->buffers[cpu];
2033 ret = cpu_buffer->commit_overrun; 2611 ret = local_read(&cpu_buffer->commit_overrun);
2034 2612
2035 return ret; 2613 return ret;
2036} 2614}
@@ -2053,7 +2631,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
2053 for_each_buffer_cpu(buffer, cpu) { 2631 for_each_buffer_cpu(buffer, cpu) {
2054 cpu_buffer = buffer->buffers[cpu]; 2632 cpu_buffer = buffer->buffers[cpu];
2055 entries += (local_read(&cpu_buffer->entries) - 2633 entries += (local_read(&cpu_buffer->entries) -
2056 cpu_buffer->overrun) - cpu_buffer->read; 2634 local_read(&cpu_buffer->overrun)) - cpu_buffer->read;
2057 } 2635 }
2058 2636
2059 return entries; 2637 return entries;
@@ -2076,7 +2654,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
2076 /* if you care about this being correct, lock the buffer */ 2654 /* if you care about this being correct, lock the buffer */
2077 for_each_buffer_cpu(buffer, cpu) { 2655 for_each_buffer_cpu(buffer, cpu) {
2078 cpu_buffer = buffer->buffers[cpu]; 2656 cpu_buffer = buffer->buffers[cpu];
2079 overruns += cpu_buffer->overrun; 2657 overruns += local_read(&cpu_buffer->overrun);
2080 } 2658 }
2081 2659
2082 return overruns; 2660 return overruns;
@@ -2089,8 +2667,10 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
2089 2667
2090 /* Iterator usage is expected to have record disabled */ 2668 /* Iterator usage is expected to have record disabled */
2091 if (list_empty(&cpu_buffer->reader_page->list)) { 2669 if (list_empty(&cpu_buffer->reader_page->list)) {
2092 iter->head_page = cpu_buffer->head_page; 2670 iter->head_page = rb_set_head_page(cpu_buffer);
2093 iter->head = cpu_buffer->head_page->read; 2671 if (unlikely(!iter->head_page))
2672 return;
2673 iter->head = iter->head_page->read;
2094 } else { 2674 } else {
2095 iter->head_page = cpu_buffer->reader_page; 2675 iter->head_page = cpu_buffer->reader_page;
2096 iter->head = cpu_buffer->reader_page->read; 2676 iter->head = cpu_buffer->reader_page->read;
@@ -2207,6 +2787,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2207 struct buffer_page *reader = NULL; 2787 struct buffer_page *reader = NULL;
2208 unsigned long flags; 2788 unsigned long flags;
2209 int nr_loops = 0; 2789 int nr_loops = 0;
2790 int ret;
2210 2791
2211 local_irq_save(flags); 2792 local_irq_save(flags);
2212 __raw_spin_lock(&cpu_buffer->lock); 2793 __raw_spin_lock(&cpu_buffer->lock);
@@ -2240,30 +2821,56 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2240 goto out; 2821 goto out;
2241 2822
2242 /* 2823 /*
2243 * Splice the empty reader page into the list around the head.
2244 * Reset the reader page to size zero. 2824 * Reset the reader page to size zero.
2245 */ 2825 */
2826 local_set(&cpu_buffer->reader_page->write, 0);
2827 local_set(&cpu_buffer->reader_page->entries, 0);
2828 local_set(&cpu_buffer->reader_page->page->commit, 0);
2246 2829
2247 reader = cpu_buffer->head_page; 2830 spin:
2831 /*
2832 * Splice the empty reader page into the list around the head.
2833 */
2834 reader = rb_set_head_page(cpu_buffer);
2248 cpu_buffer->reader_page->list.next = reader->list.next; 2835 cpu_buffer->reader_page->list.next = reader->list.next;
2249 cpu_buffer->reader_page->list.prev = reader->list.prev; 2836 cpu_buffer->reader_page->list.prev = reader->list.prev;
2250 2837
2251 local_set(&cpu_buffer->reader_page->write, 0); 2838 /*
2252 local_set(&cpu_buffer->reader_page->entries, 0); 2839 * cpu_buffer->pages just needs to point to the buffer, it
2253 local_set(&cpu_buffer->reader_page->page->commit, 0); 2840 * has no specific buffer page to point to. Lets move it out
2841 * of our way so we don't accidently swap it.
2842 */
2843 cpu_buffer->pages = reader->list.prev;
2254 2844
2255 /* Make the reader page now replace the head */ 2845 /* The reader page will be pointing to the new head */
2256 reader->list.prev->next = &cpu_buffer->reader_page->list; 2846 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
2257 reader->list.next->prev = &cpu_buffer->reader_page->list; 2847
2848 /*
2849 * Here's the tricky part.
2850 *
2851 * We need to move the pointer past the header page.
2852 * But we can only do that if a writer is not currently
2853 * moving it. The page before the header page has the
2854 * flag bit '1' set if it is pointing to the page we want.
2855 * but if the writer is in the process of moving it
2856 * than it will be '2' or already moved '0'.
2857 */
2858
2859 ret = rb_head_page_replace(reader, cpu_buffer->reader_page);
2258 2860
2259 /* 2861 /*
2260 * If the tail is on the reader, then we must set the head 2862 * If we did not convert it, then we must try again.
2261 * to the inserted page, otherwise we set it one before.
2262 */ 2863 */
2263 cpu_buffer->head_page = cpu_buffer->reader_page; 2864 if (!ret)
2865 goto spin;
2264 2866
2265 if (cpu_buffer->commit_page != reader) 2867 /*
2266 rb_inc_page(cpu_buffer, &cpu_buffer->head_page); 2868 * Yeah! We succeeded in replacing the page.
2869 *
2870 * Now make the new head point back to the reader page.
2871 */
2872 reader->list.next->prev = &cpu_buffer->reader_page->list;
2873 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
2267 2874
2268 /* Finally update the reader page to the new head */ 2875 /* Finally update the reader page to the new head */
2269 cpu_buffer->reader_page = reader; 2876 cpu_buffer->reader_page = reader;
@@ -2717,8 +3324,10 @@ EXPORT_SYMBOL_GPL(ring_buffer_size);
2717static void 3324static void
2718rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) 3325rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2719{ 3326{
3327 rb_head_page_deactivate(cpu_buffer);
3328
2720 cpu_buffer->head_page 3329 cpu_buffer->head_page
2721 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 3330 = list_entry(cpu_buffer->pages, struct buffer_page, list);
2722 local_set(&cpu_buffer->head_page->write, 0); 3331 local_set(&cpu_buffer->head_page->write, 0);
2723 local_set(&cpu_buffer->head_page->entries, 0); 3332 local_set(&cpu_buffer->head_page->entries, 0);
2724 local_set(&cpu_buffer->head_page->page->commit, 0); 3333 local_set(&cpu_buffer->head_page->page->commit, 0);
@@ -2734,16 +3343,17 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2734 local_set(&cpu_buffer->reader_page->page->commit, 0); 3343 local_set(&cpu_buffer->reader_page->page->commit, 0);
2735 cpu_buffer->reader_page->read = 0; 3344 cpu_buffer->reader_page->read = 0;
2736 3345
2737 cpu_buffer->nmi_dropped = 0; 3346 local_set(&cpu_buffer->commit_overrun, 0);
2738 cpu_buffer->commit_overrun = 0; 3347 local_set(&cpu_buffer->overrun, 0);
2739 cpu_buffer->overrun = 0;
2740 cpu_buffer->read = 0;
2741 local_set(&cpu_buffer->entries, 0); 3348 local_set(&cpu_buffer->entries, 0);
2742 local_set(&cpu_buffer->committing, 0); 3349 local_set(&cpu_buffer->committing, 0);
2743 local_set(&cpu_buffer->commits, 0); 3350 local_set(&cpu_buffer->commits, 0);
3351 cpu_buffer->read = 0;
2744 3352
2745 cpu_buffer->write_stamp = 0; 3353 cpu_buffer->write_stamp = 0;
2746 cpu_buffer->read_stamp = 0; 3354 cpu_buffer->read_stamp = 0;
3355
3356 rb_head_page_activate(cpu_buffer);
2747} 3357}
2748 3358
2749/** 3359/**
@@ -3091,7 +3701,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3091 read = 0; 3701 read = 0;
3092 } else { 3702 } else {
3093 /* update the entry counter */ 3703 /* update the entry counter */
3094 cpu_buffer->read += local_read(&reader->entries); 3704 cpu_buffer->read += rb_page_entries(reader);
3095 3705
3096 /* swap the pages */ 3706 /* swap the pages */
3097 rb_init_page(bpage); 3707 rb_init_page(bpage);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c22b40f8f576..e793cda91dd3 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -50,7 +50,7 @@ unsigned long __read_mostly tracing_thresh;
50 * On boot up, the ring buffer is set to the minimum size, so that 50 * On boot up, the ring buffer is set to the minimum size, so that
51 * we do not waste memory on systems that are not using tracing. 51 * we do not waste memory on systems that are not using tracing.
52 */ 52 */
53static int ring_buffer_expanded; 53int ring_buffer_expanded;
54 54
55/* 55/*
56 * We need to change this state when a selftest is running. 56 * We need to change this state when a selftest is running.
@@ -64,7 +64,7 @@ static bool __read_mostly tracing_selftest_running;
64/* 64/*
65 * If a tracer is running, we do not want to run SELFTEST. 65 * If a tracer is running, we do not want to run SELFTEST.
66 */ 66 */
67static bool __read_mostly tracing_selftest_disabled; 67bool __read_mostly tracing_selftest_disabled;
68 68
69/* For tracers that don't implement custom flags */ 69/* For tracers that don't implement custom flags */
70static struct tracer_opt dummy_tracer_opt[] = { 70static struct tracer_opt dummy_tracer_opt[] = {
@@ -89,7 +89,7 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
89 */ 89 */
90static int tracing_disabled = 1; 90static int tracing_disabled = 1;
91 91
92static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); 92DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
93 93
94static inline void ftrace_disable_cpu(void) 94static inline void ftrace_disable_cpu(void)
95{ 95{
@@ -867,10 +867,6 @@ struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
867 867
868 return event; 868 return event;
869} 869}
870static void ftrace_trace_stack(struct trace_array *tr,
871 unsigned long flags, int skip, int pc);
872static void ftrace_trace_userstack(struct trace_array *tr,
873 unsigned long flags, int pc);
874 870
875static inline void __trace_buffer_unlock_commit(struct trace_array *tr, 871static inline void __trace_buffer_unlock_commit(struct trace_array *tr,
876 struct ring_buffer_event *event, 872 struct ring_buffer_event *event,
@@ -947,54 +943,6 @@ trace_function(struct trace_array *tr,
947 ring_buffer_unlock_commit(tr->buffer, event); 943 ring_buffer_unlock_commit(tr->buffer, event);
948} 944}
949 945
950#ifdef CONFIG_FUNCTION_GRAPH_TRACER
951static int __trace_graph_entry(struct trace_array *tr,
952 struct ftrace_graph_ent *trace,
953 unsigned long flags,
954 int pc)
955{
956 struct ftrace_event_call *call = &event_funcgraph_entry;
957 struct ring_buffer_event *event;
958 struct ftrace_graph_ent_entry *entry;
959
960 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
961 return 0;
962
963 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT,
964 sizeof(*entry), flags, pc);
965 if (!event)
966 return 0;
967 entry = ring_buffer_event_data(event);
968 entry->graph_ent = *trace;
969 if (!filter_current_check_discard(call, entry, event))
970 ring_buffer_unlock_commit(global_trace.buffer, event);
971
972 return 1;
973}
974
975static void __trace_graph_return(struct trace_array *tr,
976 struct ftrace_graph_ret *trace,
977 unsigned long flags,
978 int pc)
979{
980 struct ftrace_event_call *call = &event_funcgraph_exit;
981 struct ring_buffer_event *event;
982 struct ftrace_graph_ret_entry *entry;
983
984 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
985 return;
986
987 event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_RET,
988 sizeof(*entry), flags, pc);
989 if (!event)
990 return;
991 entry = ring_buffer_event_data(event);
992 entry->ret = *trace;
993 if (!filter_current_check_discard(call, entry, event))
994 ring_buffer_unlock_commit(global_trace.buffer, event);
995}
996#endif
997
998void 946void
999ftrace(struct trace_array *tr, struct trace_array_cpu *data, 947ftrace(struct trace_array *tr, struct trace_array_cpu *data,
1000 unsigned long ip, unsigned long parent_ip, unsigned long flags, 948 unsigned long ip, unsigned long parent_ip, unsigned long flags,
@@ -1004,11 +952,11 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
1004 trace_function(tr, ip, parent_ip, flags, pc); 952 trace_function(tr, ip, parent_ip, flags, pc);
1005} 953}
1006 954
955#ifdef CONFIG_STACKTRACE
1007static void __ftrace_trace_stack(struct trace_array *tr, 956static void __ftrace_trace_stack(struct trace_array *tr,
1008 unsigned long flags, 957 unsigned long flags,
1009 int skip, int pc) 958 int skip, int pc)
1010{ 959{
1011#ifdef CONFIG_STACKTRACE
1012 struct ftrace_event_call *call = &event_kernel_stack; 960 struct ftrace_event_call *call = &event_kernel_stack;
1013 struct ring_buffer_event *event; 961 struct ring_buffer_event *event;
1014 struct stack_entry *entry; 962 struct stack_entry *entry;
@@ -1029,12 +977,10 @@ static void __ftrace_trace_stack(struct trace_array *tr,
1029 save_stack_trace(&trace); 977 save_stack_trace(&trace);
1030 if (!filter_check_discard(call, entry, tr->buffer, event)) 978 if (!filter_check_discard(call, entry, tr->buffer, event))
1031 ring_buffer_unlock_commit(tr->buffer, event); 979 ring_buffer_unlock_commit(tr->buffer, event);
1032#endif
1033} 980}
1034 981
1035static void ftrace_trace_stack(struct trace_array *tr, 982void ftrace_trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1036 unsigned long flags, 983 int pc)
1037 int skip, int pc)
1038{ 984{
1039 if (!(trace_flags & TRACE_ITER_STACKTRACE)) 985 if (!(trace_flags & TRACE_ITER_STACKTRACE))
1040 return; 986 return;
@@ -1042,17 +988,14 @@ static void ftrace_trace_stack(struct trace_array *tr,
1042 __ftrace_trace_stack(tr, flags, skip, pc); 988 __ftrace_trace_stack(tr, flags, skip, pc);
1043} 989}
1044 990
1045void __trace_stack(struct trace_array *tr, 991void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1046 unsigned long flags, 992 int pc)
1047 int skip, int pc)
1048{ 993{
1049 __ftrace_trace_stack(tr, flags, skip, pc); 994 __ftrace_trace_stack(tr, flags, skip, pc);
1050} 995}
1051 996
1052static void ftrace_trace_userstack(struct trace_array *tr, 997void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags, int pc)
1053 unsigned long flags, int pc)
1054{ 998{
1055#ifdef CONFIG_STACKTRACE
1056 struct ftrace_event_call *call = &event_user_stack; 999 struct ftrace_event_call *call = &event_user_stack;
1057 struct ring_buffer_event *event; 1000 struct ring_buffer_event *event;
1058 struct userstack_entry *entry; 1001 struct userstack_entry *entry;
@@ -1077,7 +1020,6 @@ static void ftrace_trace_userstack(struct trace_array *tr,
1077 save_stack_trace_user(&trace); 1020 save_stack_trace_user(&trace);
1078 if (!filter_check_discard(call, entry, tr->buffer, event)) 1021 if (!filter_check_discard(call, entry, tr->buffer, event))
1079 ring_buffer_unlock_commit(tr->buffer, event); 1022 ring_buffer_unlock_commit(tr->buffer, event);
1080#endif
1081} 1023}
1082 1024
1083#ifdef UNUSED 1025#ifdef UNUSED
@@ -1087,6 +1029,8 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1087} 1029}
1088#endif /* UNUSED */ 1030#endif /* UNUSED */
1089 1031
1032#endif /* CONFIG_STACKTRACE */
1033
1090static void 1034static void
1091ftrace_trace_special(void *__tr, 1035ftrace_trace_special(void *__tr,
1092 unsigned long arg1, unsigned long arg2, unsigned long arg3, 1036 unsigned long arg1, unsigned long arg2, unsigned long arg3,
@@ -1115,62 +1059,6 @@ __trace_special(void *__tr, void *__data,
1115} 1059}
1116 1060
1117void 1061void
1118tracing_sched_switch_trace(struct trace_array *tr,
1119 struct task_struct *prev,
1120 struct task_struct *next,
1121 unsigned long flags, int pc)
1122{
1123 struct ftrace_event_call *call = &event_context_switch;
1124 struct ring_buffer_event *event;
1125 struct ctx_switch_entry *entry;
1126
1127 event = trace_buffer_lock_reserve(tr, TRACE_CTX,
1128 sizeof(*entry), flags, pc);
1129 if (!event)
1130 return;
1131 entry = ring_buffer_event_data(event);
1132 entry->prev_pid = prev->pid;
1133 entry->prev_prio = prev->prio;
1134 entry->prev_state = prev->state;
1135 entry->next_pid = next->pid;
1136 entry->next_prio = next->prio;
1137 entry->next_state = next->state;
1138 entry->next_cpu = task_cpu(next);
1139
1140 if (!filter_check_discard(call, entry, tr->buffer, event))
1141 trace_buffer_unlock_commit(tr, event, flags, pc);
1142}
1143
1144void
1145tracing_sched_wakeup_trace(struct trace_array *tr,
1146 struct task_struct *wakee,
1147 struct task_struct *curr,
1148 unsigned long flags, int pc)
1149{
1150 struct ftrace_event_call *call = &event_wakeup;
1151 struct ring_buffer_event *event;
1152 struct ctx_switch_entry *entry;
1153
1154 event = trace_buffer_lock_reserve(tr, TRACE_WAKE,
1155 sizeof(*entry), flags, pc);
1156 if (!event)
1157 return;
1158 entry = ring_buffer_event_data(event);
1159 entry->prev_pid = curr->pid;
1160 entry->prev_prio = curr->prio;
1161 entry->prev_state = curr->state;
1162 entry->next_pid = wakee->pid;
1163 entry->next_prio = wakee->prio;
1164 entry->next_state = wakee->state;
1165 entry->next_cpu = task_cpu(wakee);
1166
1167 if (!filter_check_discard(call, entry, tr->buffer, event))
1168 ring_buffer_unlock_commit(tr->buffer, event);
1169 ftrace_trace_stack(tr, flags, 6, pc);
1170 ftrace_trace_userstack(tr, flags, pc);
1171}
1172
1173void
1174ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) 1062ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1175{ 1063{
1176 struct trace_array *tr = &global_trace; 1064 struct trace_array *tr = &global_trace;
@@ -1194,68 +1082,6 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1194 local_irq_restore(flags); 1082 local_irq_restore(flags);
1195} 1083}
1196 1084
1197#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1198int trace_graph_entry(struct ftrace_graph_ent *trace)
1199{
1200 struct trace_array *tr = &global_trace;
1201 struct trace_array_cpu *data;
1202 unsigned long flags;
1203 long disabled;
1204 int ret;
1205 int cpu;
1206 int pc;
1207
1208 if (!ftrace_trace_task(current))
1209 return 0;
1210
1211 if (!ftrace_graph_addr(trace->func))
1212 return 0;
1213
1214 local_irq_save(flags);
1215 cpu = raw_smp_processor_id();
1216 data = tr->data[cpu];
1217 disabled = atomic_inc_return(&data->disabled);
1218 if (likely(disabled == 1)) {
1219 pc = preempt_count();
1220 ret = __trace_graph_entry(tr, trace, flags, pc);
1221 } else {
1222 ret = 0;
1223 }
1224 /* Only do the atomic if it is not already set */
1225 if (!test_tsk_trace_graph(current))
1226 set_tsk_trace_graph(current);
1227
1228 atomic_dec(&data->disabled);
1229 local_irq_restore(flags);
1230
1231 return ret;
1232}
1233
1234void trace_graph_return(struct ftrace_graph_ret *trace)
1235{
1236 struct trace_array *tr = &global_trace;
1237 struct trace_array_cpu *data;
1238 unsigned long flags;
1239 long disabled;
1240 int cpu;
1241 int pc;
1242
1243 local_irq_save(flags);
1244 cpu = raw_smp_processor_id();
1245 data = tr->data[cpu];
1246 disabled = atomic_inc_return(&data->disabled);
1247 if (likely(disabled == 1)) {
1248 pc = preempt_count();
1249 __trace_graph_return(tr, trace, flags, pc);
1250 }
1251 if (!trace->depth)
1252 clear_tsk_trace_graph(current);
1253 atomic_dec(&data->disabled);
1254 local_irq_restore(flags);
1255}
1256#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1257
1258
1259/** 1085/**
1260 * trace_vbprintk - write binary msg to tracing buffer 1086 * trace_vbprintk - write binary msg to tracing buffer
1261 * 1087 *
@@ -2257,8 +2083,8 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf,
2257 len += 3; /* "no" and newline */ 2083 len += 3; /* "no" and newline */
2258 } 2084 }
2259 2085
2260 /* +2 for \n and \0 */ 2086 /* +1 for \0 */
2261 buf = kmalloc(len + 2, GFP_KERNEL); 2087 buf = kmalloc(len + 1, GFP_KERNEL);
2262 if (!buf) { 2088 if (!buf) {
2263 mutex_unlock(&trace_types_lock); 2089 mutex_unlock(&trace_types_lock);
2264 return -ENOMEM; 2090 return -ENOMEM;
@@ -2281,7 +2107,7 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf,
2281 } 2107 }
2282 mutex_unlock(&trace_types_lock); 2108 mutex_unlock(&trace_types_lock);
2283 2109
2284 WARN_ON(r >= len + 2); 2110 WARN_ON(r >= len + 1);
2285 2111
2286 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2112 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2287 2113
@@ -3633,9 +3459,6 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
3633 cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu); 3459 cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
3634 trace_seq_printf(s, "commit overrun: %ld\n", cnt); 3460 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
3635 3461
3636 cnt = ring_buffer_nmi_dropped_cpu(tr->buffer, cpu);
3637 trace_seq_printf(s, "nmi dropped: %ld\n", cnt);
3638
3639 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); 3462 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
3640 3463
3641 kfree(s); 3464 kfree(s);
@@ -4273,7 +4096,6 @@ void ftrace_dump(void)
4273 4096
4274__init static int tracer_alloc_buffers(void) 4097__init static int tracer_alloc_buffers(void)
4275{ 4098{
4276 struct trace_array_cpu *data;
4277 int ring_buf_size; 4099 int ring_buf_size;
4278 int i; 4100 int i;
4279 int ret = -ENOMEM; 4101 int ret = -ENOMEM;
@@ -4323,7 +4145,7 @@ __init static int tracer_alloc_buffers(void)
4323 4145
4324 /* Allocate the first page for all buffers */ 4146 /* Allocate the first page for all buffers */
4325 for_each_tracing_cpu(i) { 4147 for_each_tracing_cpu(i) {
4326 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i); 4148 global_trace.data[i] = &per_cpu(global_trace_cpu, i);
4327 max_tr.data[i] = &per_cpu(max_data, i); 4149 max_tr.data[i] = &per_cpu(max_data, i);
4328 } 4150 }
4329 4151
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8b9f4f6e9559..d682357e4b1f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -467,6 +467,7 @@ void trace_function(struct trace_array *tr,
467 467
468void trace_graph_return(struct ftrace_graph_ret *trace); 468void trace_graph_return(struct ftrace_graph_ret *trace);
469int trace_graph_entry(struct ftrace_graph_ent *trace); 469int trace_graph_entry(struct ftrace_graph_ent *trace);
470void set_graph_array(struct trace_array *tr);
470 471
471void tracing_start_cmdline_record(void); 472void tracing_start_cmdline_record(void);
472void tracing_stop_cmdline_record(void); 473void tracing_stop_cmdline_record(void);
@@ -485,9 +486,31 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
485void update_max_tr_single(struct trace_array *tr, 486void update_max_tr_single(struct trace_array *tr,
486 struct task_struct *tsk, int cpu); 487 struct task_struct *tsk, int cpu);
487 488
488void __trace_stack(struct trace_array *tr, 489#ifdef CONFIG_STACKTRACE
489 unsigned long flags, 490void ftrace_trace_stack(struct trace_array *tr, unsigned long flags,
490 int skip, int pc); 491 int skip, int pc);
492
493void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags,
494 int pc);
495
496void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
497 int pc);
498#else
499static inline void ftrace_trace_stack(struct trace_array *tr,
500 unsigned long flags, int skip, int pc)
501{
502}
503
504static inline void ftrace_trace_userstack(struct trace_array *tr,
505 unsigned long flags, int pc)
506{
507}
508
509static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
510 int skip, int pc)
511{
512}
513#endif /* CONFIG_STACKTRACE */
491 514
492extern cycle_t ftrace_now(int cpu); 515extern cycle_t ftrace_now(int cpu);
493 516
@@ -513,6 +536,10 @@ extern unsigned long ftrace_update_tot_cnt;
513extern int DYN_FTRACE_TEST_NAME(void); 536extern int DYN_FTRACE_TEST_NAME(void);
514#endif 537#endif
515 538
539extern int ring_buffer_expanded;
540extern bool tracing_selftest_disabled;
541DECLARE_PER_CPU(local_t, ftrace_cpu_disabled);
542
516#ifdef CONFIG_FTRACE_STARTUP_TEST 543#ifdef CONFIG_FTRACE_STARTUP_TEST
517extern int trace_selftest_startup_function(struct tracer *trace, 544extern int trace_selftest_startup_function(struct tracer *trace,
518 struct trace_array *tr); 545 struct trace_array *tr);
@@ -743,13 +770,15 @@ struct event_filter {
743 int n_preds; 770 int n_preds;
744 struct filter_pred **preds; 771 struct filter_pred **preds;
745 char *filter_string; 772 char *filter_string;
773 bool no_reset;
746}; 774};
747 775
748struct event_subsystem { 776struct event_subsystem {
749 struct list_head list; 777 struct list_head list;
750 const char *name; 778 const char *name;
751 struct dentry *entry; 779 struct dentry *entry;
752 void *filter; 780 struct event_filter *filter;
781 int nr_events;
753}; 782};
754 783
755struct filter_pred; 784struct filter_pred;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index e75276a49cf5..e0cbede96783 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -17,6 +17,8 @@
17#include <linux/ctype.h> 17#include <linux/ctype.h>
18#include <linux/delay.h> 18#include <linux/delay.h>
19 19
20#include <asm/setup.h>
21
20#include "trace_output.h" 22#include "trace_output.h"
21 23
22#define TRACE_SYSTEM "TRACE_SYSTEM" 24#define TRACE_SYSTEM "TRACE_SYSTEM"
@@ -849,8 +851,10 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
849 851
850 /* First see if we did not already create this dir */ 852 /* First see if we did not already create this dir */
851 list_for_each_entry(system, &event_subsystems, list) { 853 list_for_each_entry(system, &event_subsystems, list) {
852 if (strcmp(system->name, name) == 0) 854 if (strcmp(system->name, name) == 0) {
855 system->nr_events++;
853 return system->entry; 856 return system->entry;
857 }
854 } 858 }
855 859
856 /* need to create new entry */ 860 /* need to create new entry */
@@ -869,6 +873,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
869 return d_events; 873 return d_events;
870 } 874 }
871 875
876 system->nr_events = 1;
872 system->name = kstrdup(name, GFP_KERNEL); 877 system->name = kstrdup(name, GFP_KERNEL);
873 if (!system->name) { 878 if (!system->name) {
874 debugfs_remove(system->entry); 879 debugfs_remove(system->entry);
@@ -987,6 +992,32 @@ struct ftrace_module_file_ops {
987 struct file_operations filter; 992 struct file_operations filter;
988}; 993};
989 994
995static void remove_subsystem_dir(const char *name)
996{
997 struct event_subsystem *system;
998
999 if (strcmp(name, TRACE_SYSTEM) == 0)
1000 return;
1001
1002 list_for_each_entry(system, &event_subsystems, list) {
1003 if (strcmp(system->name, name) == 0) {
1004 if (!--system->nr_events) {
1005 struct event_filter *filter = system->filter;
1006
1007 debugfs_remove_recursive(system->entry);
1008 list_del(&system->list);
1009 if (filter) {
1010 kfree(filter->filter_string);
1011 kfree(filter);
1012 }
1013 kfree(system->name);
1014 kfree(system);
1015 }
1016 break;
1017 }
1018 }
1019}
1020
990static struct ftrace_module_file_ops * 1021static struct ftrace_module_file_ops *
991trace_create_file_ops(struct module *mod) 1022trace_create_file_ops(struct module *mod)
992{ 1023{
@@ -1077,6 +1108,7 @@ static void trace_module_remove_events(struct module *mod)
1077 list_del(&call->list); 1108 list_del(&call->list);
1078 trace_destroy_fields(call); 1109 trace_destroy_fields(call);
1079 destroy_preds(call); 1110 destroy_preds(call);
1111 remove_subsystem_dir(call->system);
1080 } 1112 }
1081 } 1113 }
1082 1114
@@ -1133,6 +1165,18 @@ struct notifier_block trace_module_nb = {
1133extern struct ftrace_event_call __start_ftrace_events[]; 1165extern struct ftrace_event_call __start_ftrace_events[];
1134extern struct ftrace_event_call __stop_ftrace_events[]; 1166extern struct ftrace_event_call __stop_ftrace_events[];
1135 1167
1168static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
1169
1170static __init int setup_trace_event(char *str)
1171{
1172 strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
1173 ring_buffer_expanded = 1;
1174 tracing_selftest_disabled = 1;
1175
1176 return 1;
1177}
1178__setup("trace_event=", setup_trace_event);
1179
1136static __init int event_trace_init(void) 1180static __init int event_trace_init(void)
1137{ 1181{
1138 struct ftrace_event_call *call; 1182 struct ftrace_event_call *call;
@@ -1140,6 +1184,8 @@ static __init int event_trace_init(void)
1140 struct dentry *entry; 1184 struct dentry *entry;
1141 struct dentry *d_events; 1185 struct dentry *d_events;
1142 int ret; 1186 int ret;
1187 char *buf = bootup_event_buf;
1188 char *token;
1143 1189
1144 d_tracer = tracing_init_dentry(); 1190 d_tracer = tracing_init_dentry();
1145 if (!d_tracer) 1191 if (!d_tracer)
@@ -1185,6 +1231,19 @@ static __init int event_trace_init(void)
1185 &ftrace_event_format_fops); 1231 &ftrace_event_format_fops);
1186 } 1232 }
1187 1233
1234 while (true) {
1235 token = strsep(&buf, ",");
1236
1237 if (!token)
1238 break;
1239 if (!*token)
1240 continue;
1241
1242 ret = ftrace_set_clr_event(token, 1);
1243 if (ret)
1244 pr_warning("Failed to enable trace event: %s\n", token);
1245 }
1246
1188 ret = register_module_notifier(&trace_module_nb); 1247 ret = register_module_notifier(&trace_module_nb);
1189 if (ret) 1248 if (ret)
1190 pr_warning("Failed to register trace events module notifier\n"); 1249 pr_warning("Failed to register trace events module notifier\n");
@@ -1392,10 +1451,10 @@ static __init void event_trace_self_test_with_function(void)
1392 1451
1393static __init int event_trace_self_tests_init(void) 1452static __init int event_trace_self_tests_init(void)
1394{ 1453{
1395 1454 if (!tracing_selftest_disabled) {
1396 event_trace_self_tests(); 1455 event_trace_self_tests();
1397 1456 event_trace_self_test_with_function();
1398 event_trace_self_test_with_function(); 1457 }
1399 1458
1400 return 0; 1459 return 0;
1401} 1460}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index f32dc9d1ea7b..490337abed75 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -176,11 +176,13 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
176static int filter_pred_strloc(struct filter_pred *pred, void *event, 176static int filter_pred_strloc(struct filter_pred *pred, void *event,
177 int val1, int val2) 177 int val1, int val2)
178{ 178{
179 unsigned short str_loc = *(unsigned short *)(event + pred->offset); 179 u32 str_item = *(u32 *)(event + pred->offset);
180 int str_loc = str_item & 0xffff;
181 int str_len = str_item >> 16;
180 char *addr = (char *)(event + str_loc); 182 char *addr = (char *)(event + str_loc);
181 int cmp, match; 183 int cmp, match;
182 184
183 cmp = strncmp(addr, pred->str_val, pred->str_len); 185 cmp = strncmp(addr, pred->str_val, str_len);
184 186
185 match = (!cmp) ^ pred->not; 187 match = (!cmp) ^ pred->not;
186 188
@@ -418,24 +420,29 @@ oom:
418} 420}
419EXPORT_SYMBOL_GPL(init_preds); 421EXPORT_SYMBOL_GPL(init_preds);
420 422
421static void filter_free_subsystem_preds(struct event_subsystem *system) 423enum {
424 FILTER_DISABLE_ALL,
425 FILTER_INIT_NO_RESET,
426 FILTER_SKIP_NO_RESET,
427};
428
429static void filter_free_subsystem_preds(struct event_subsystem *system,
430 int flag)
422{ 431{
423 struct event_filter *filter = system->filter;
424 struct ftrace_event_call *call; 432 struct ftrace_event_call *call;
425 int i;
426
427 if (filter->n_preds) {
428 for (i = 0; i < filter->n_preds; i++)
429 filter_free_pred(filter->preds[i]);
430 kfree(filter->preds);
431 filter->preds = NULL;
432 filter->n_preds = 0;
433 }
434 433
435 list_for_each_entry(call, &ftrace_events, list) { 434 list_for_each_entry(call, &ftrace_events, list) {
436 if (!call->define_fields) 435 if (!call->define_fields)
437 continue; 436 continue;
438 437
438 if (flag == FILTER_INIT_NO_RESET) {
439 call->filter->no_reset = false;
440 continue;
441 }
442
443 if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset)
444 continue;
445
439 if (!strcmp(call->system, system->name)) { 446 if (!strcmp(call->system, system->name)) {
440 filter_disable_preds(call); 447 filter_disable_preds(call);
441 remove_filter_string(call->filter); 448 remove_filter_string(call->filter);
@@ -537,7 +544,8 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
537 544
538static int filter_add_pred(struct filter_parse_state *ps, 545static int filter_add_pred(struct filter_parse_state *ps,
539 struct ftrace_event_call *call, 546 struct ftrace_event_call *call,
540 struct filter_pred *pred) 547 struct filter_pred *pred,
548 bool dry_run)
541{ 549{
542 struct ftrace_event_field *field; 550 struct ftrace_event_field *field;
543 filter_pred_fn_t fn; 551 filter_pred_fn_t fn;
@@ -549,10 +557,12 @@ static int filter_add_pred(struct filter_parse_state *ps,
549 557
550 if (pred->op == OP_AND) { 558 if (pred->op == OP_AND) {
551 pred->pop_n = 2; 559 pred->pop_n = 2;
552 return filter_add_pred_fn(ps, call, pred, filter_pred_and); 560 fn = filter_pred_and;
561 goto add_pred_fn;
553 } else if (pred->op == OP_OR) { 562 } else if (pred->op == OP_OR) {
554 pred->pop_n = 2; 563 pred->pop_n = 2;
555 return filter_add_pred_fn(ps, call, pred, filter_pred_or); 564 fn = filter_pred_or;
565 goto add_pred_fn;
556 } 566 }
557 567
558 field = find_event_field(call, pred->field_name); 568 field = find_event_field(call, pred->field_name);
@@ -575,9 +585,6 @@ static int filter_add_pred(struct filter_parse_state *ps,
575 else 585 else
576 fn = filter_pred_strloc; 586 fn = filter_pred_strloc;
577 pred->str_len = field->size; 587 pred->str_len = field->size;
578 if (pred->op == OP_NE)
579 pred->not = 1;
580 return filter_add_pred_fn(ps, call, pred, fn);
581 } else { 588 } else {
582 if (field->is_signed) 589 if (field->is_signed)
583 ret = strict_strtoll(pred->str_val, 0, &val); 590 ret = strict_strtoll(pred->str_val, 0, &val);
@@ -588,41 +595,33 @@ static int filter_add_pred(struct filter_parse_state *ps,
588 return -EINVAL; 595 return -EINVAL;
589 } 596 }
590 pred->val = val; 597 pred->val = val;
591 }
592 598
593 fn = select_comparison_fn(pred->op, field->size, field->is_signed); 599 fn = select_comparison_fn(pred->op, field->size,
594 if (!fn) { 600 field->is_signed);
595 parse_error(ps, FILT_ERR_INVALID_OP, 0); 601 if (!fn) {
596 return -EINVAL; 602 parse_error(ps, FILT_ERR_INVALID_OP, 0);
603 return -EINVAL;
604 }
597 } 605 }
598 606
599 if (pred->op == OP_NE) 607 if (pred->op == OP_NE)
600 pred->not = 1; 608 pred->not = 1;
601 609
602 return filter_add_pred_fn(ps, call, pred, fn); 610add_pred_fn:
611 if (!dry_run)
612 return filter_add_pred_fn(ps, call, pred, fn);
613 return 0;
603} 614}
604 615
605static int filter_add_subsystem_pred(struct filter_parse_state *ps, 616static int filter_add_subsystem_pred(struct filter_parse_state *ps,
606 struct event_subsystem *system, 617 struct event_subsystem *system,
607 struct filter_pred *pred, 618 struct filter_pred *pred,
608 char *filter_string) 619 char *filter_string,
620 bool dry_run)
609{ 621{
610 struct event_filter *filter = system->filter;
611 struct ftrace_event_call *call; 622 struct ftrace_event_call *call;
612 int err = 0; 623 int err = 0;
613 624 bool fail = true;
614 if (!filter->preds) {
615 filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
616 GFP_KERNEL);
617
618 if (!filter->preds)
619 return -ENOMEM;
620 }
621
622 if (filter->n_preds == MAX_FILTER_PRED) {
623 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
624 return -ENOSPC;
625 }
626 625
627 list_for_each_entry(call, &ftrace_events, list) { 626 list_for_each_entry(call, &ftrace_events, list) {
628 627
@@ -632,19 +631,24 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
632 if (strcmp(call->system, system->name)) 631 if (strcmp(call->system, system->name))
633 continue; 632 continue;
634 633
635 err = filter_add_pred(ps, call, pred); 634 if (call->filter->no_reset)
636 if (err) { 635 continue;
637 filter_free_subsystem_preds(system); 636
638 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); 637 err = filter_add_pred(ps, call, pred, dry_run);
639 goto out; 638 if (err)
640 } 639 call->filter->no_reset = true;
641 replace_filter_string(call->filter, filter_string); 640 else
641 fail = false;
642
643 if (!dry_run)
644 replace_filter_string(call->filter, filter_string);
642 } 645 }
643 646
644 filter->preds[filter->n_preds] = pred; 647 if (fail) {
645 filter->n_preds++; 648 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
646out: 649 return err;
647 return err; 650 }
651 return 0;
648} 652}
649 653
650static void parse_init(struct filter_parse_state *ps, 654static void parse_init(struct filter_parse_state *ps,
@@ -1003,12 +1007,14 @@ static int check_preds(struct filter_parse_state *ps)
1003static int replace_preds(struct event_subsystem *system, 1007static int replace_preds(struct event_subsystem *system,
1004 struct ftrace_event_call *call, 1008 struct ftrace_event_call *call,
1005 struct filter_parse_state *ps, 1009 struct filter_parse_state *ps,
1006 char *filter_string) 1010 char *filter_string,
1011 bool dry_run)
1007{ 1012{
1008 char *operand1 = NULL, *operand2 = NULL; 1013 char *operand1 = NULL, *operand2 = NULL;
1009 struct filter_pred *pred; 1014 struct filter_pred *pred;
1010 struct postfix_elt *elt; 1015 struct postfix_elt *elt;
1011 int err; 1016 int err;
1017 int n_preds = 0;
1012 1018
1013 err = check_preds(ps); 1019 err = check_preds(ps);
1014 if (err) 1020 if (err)
@@ -1027,24 +1033,14 @@ static int replace_preds(struct event_subsystem *system,
1027 continue; 1033 continue;
1028 } 1034 }
1029 1035
1036 if (n_preds++ == MAX_FILTER_PRED) {
1037 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
1038 return -ENOSPC;
1039 }
1040
1030 if (elt->op == OP_AND || elt->op == OP_OR) { 1041 if (elt->op == OP_AND || elt->op == OP_OR) {
1031 pred = create_logical_pred(elt->op); 1042 pred = create_logical_pred(elt->op);
1032 if (!pred) 1043 goto add_pred;
1033 return -ENOMEM;
1034 if (call) {
1035 err = filter_add_pred(ps, call, pred);
1036 filter_free_pred(pred);
1037 } else {
1038 err = filter_add_subsystem_pred(ps, system,
1039 pred, filter_string);
1040 if (err)
1041 filter_free_pred(pred);
1042 }
1043 if (err)
1044 return err;
1045
1046 operand1 = operand2 = NULL;
1047 continue;
1048 } 1044 }
1049 1045
1050 if (!operand1 || !operand2) { 1046 if (!operand1 || !operand2) {
@@ -1053,17 +1049,15 @@ static int replace_preds(struct event_subsystem *system,
1053 } 1049 }
1054 1050
1055 pred = create_pred(elt->op, operand1, operand2); 1051 pred = create_pred(elt->op, operand1, operand2);
1052add_pred:
1056 if (!pred) 1053 if (!pred)
1057 return -ENOMEM; 1054 return -ENOMEM;
1058 if (call) { 1055 if (call)
1059 err = filter_add_pred(ps, call, pred); 1056 err = filter_add_pred(ps, call, pred, false);
1060 filter_free_pred(pred); 1057 else
1061 } else {
1062 err = filter_add_subsystem_pred(ps, system, pred, 1058 err = filter_add_subsystem_pred(ps, system, pred,
1063 filter_string); 1059 filter_string, dry_run);
1064 if (err) 1060 filter_free_pred(pred);
1065 filter_free_pred(pred);
1066 }
1067 if (err) 1061 if (err)
1068 return err; 1062 return err;
1069 1063
@@ -1103,7 +1097,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1103 goto out; 1097 goto out;
1104 } 1098 }
1105 1099
1106 err = replace_preds(NULL, call, ps, filter_string); 1100 err = replace_preds(NULL, call, ps, filter_string, false);
1107 if (err) 1101 if (err)
1108 append_filter_err(ps, call->filter); 1102 append_filter_err(ps, call->filter);
1109 1103
@@ -1127,7 +1121,7 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1127 mutex_lock(&event_mutex); 1121 mutex_lock(&event_mutex);
1128 1122
1129 if (!strcmp(strstrip(filter_string), "0")) { 1123 if (!strcmp(strstrip(filter_string), "0")) {
1130 filter_free_subsystem_preds(system); 1124 filter_free_subsystem_preds(system, FILTER_DISABLE_ALL);
1131 remove_filter_string(system->filter); 1125 remove_filter_string(system->filter);
1132 mutex_unlock(&event_mutex); 1126 mutex_unlock(&event_mutex);
1133 return 0; 1127 return 0;
@@ -1138,7 +1132,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1138 if (!ps) 1132 if (!ps)
1139 goto out_unlock; 1133 goto out_unlock;
1140 1134
1141 filter_free_subsystem_preds(system);
1142 replace_filter_string(system->filter, filter_string); 1135 replace_filter_string(system->filter, filter_string);
1143 1136
1144 parse_init(ps, filter_ops, filter_string); 1137 parse_init(ps, filter_ops, filter_string);
@@ -1148,9 +1141,23 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1148 goto out; 1141 goto out;
1149 } 1142 }
1150 1143
1151 err = replace_preds(system, NULL, ps, filter_string); 1144 filter_free_subsystem_preds(system, FILTER_INIT_NO_RESET);
1152 if (err) 1145
1146 /* try to see the filter can be applied to which events */
1147 err = replace_preds(system, NULL, ps, filter_string, true);
1148 if (err) {
1153 append_filter_err(ps, system->filter); 1149 append_filter_err(ps, system->filter);
1150 goto out;
1151 }
1152
1153 filter_free_subsystem_preds(system, FILTER_SKIP_NO_RESET);
1154
1155 /* really apply the filter to the events */
1156 err = replace_preds(system, NULL, ps, filter_string, false);
1157 if (err) {
1158 append_filter_err(ps, system->filter);
1159 filter_free_subsystem_preds(system, 2);
1160 }
1154 1161
1155out: 1162out:
1156 filter_opstack_clear(ps); 1163 filter_opstack_clear(ps);
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 75ef000613c3..5b01b94518fc 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -288,11 +288,9 @@ static int
288ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip, 288ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
289 struct ftrace_probe_ops *ops, void *data) 289 struct ftrace_probe_ops *ops, void *data)
290{ 290{
291 char str[KSYM_SYMBOL_LEN];
292 long count = (long)data; 291 long count = (long)data;
293 292
294 kallsyms_lookup(ip, NULL, NULL, NULL, str); 293 seq_printf(m, "%pf:", (void *)ip);
295 seq_printf(m, "%s:", str);
296 294
297 if (ops == &traceon_probe_ops) 295 if (ops == &traceon_probe_ops)
298 seq_printf(m, "traceon"); 296 seq_printf(m, "traceon");
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 420ec3487579..3f4a251b7d16 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -52,7 +52,7 @@ static struct tracer_flags tracer_flags = {
52 .opts = trace_opts 52 .opts = trace_opts
53}; 53};
54 54
55/* pid on the last trace processed */ 55static struct trace_array *graph_array;
56 56
57 57
58/* Add a function return address to the trace stack on thread info.*/ 58/* Add a function return address to the trace stack on thread info.*/
@@ -166,10 +166,121 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
166 return ret; 166 return ret;
167} 167}
168 168
169static int __trace_graph_entry(struct trace_array *tr,
170 struct ftrace_graph_ent *trace,
171 unsigned long flags,
172 int pc)
173{
174 struct ftrace_event_call *call = &event_funcgraph_entry;
175 struct ring_buffer_event *event;
176 struct ftrace_graph_ent_entry *entry;
177
178 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
179 return 0;
180
181 event = trace_buffer_lock_reserve(tr, TRACE_GRAPH_ENT,
182 sizeof(*entry), flags, pc);
183 if (!event)
184 return 0;
185 entry = ring_buffer_event_data(event);
186 entry->graph_ent = *trace;
187 if (!filter_current_check_discard(call, entry, event))
188 ring_buffer_unlock_commit(tr->buffer, event);
189
190 return 1;
191}
192
193int trace_graph_entry(struct ftrace_graph_ent *trace)
194{
195 struct trace_array *tr = graph_array;
196 struct trace_array_cpu *data;
197 unsigned long flags;
198 long disabled;
199 int ret;
200 int cpu;
201 int pc;
202
203 if (unlikely(!tr))
204 return 0;
205
206 if (!ftrace_trace_task(current))
207 return 0;
208
209 if (!ftrace_graph_addr(trace->func))
210 return 0;
211
212 local_irq_save(flags);
213 cpu = raw_smp_processor_id();
214 data = tr->data[cpu];
215 disabled = atomic_inc_return(&data->disabled);
216 if (likely(disabled == 1)) {
217 pc = preempt_count();
218 ret = __trace_graph_entry(tr, trace, flags, pc);
219 } else {
220 ret = 0;
221 }
222 /* Only do the atomic if it is not already set */
223 if (!test_tsk_trace_graph(current))
224 set_tsk_trace_graph(current);
225
226 atomic_dec(&data->disabled);
227 local_irq_restore(flags);
228
229 return ret;
230}
231
232static void __trace_graph_return(struct trace_array *tr,
233 struct ftrace_graph_ret *trace,
234 unsigned long flags,
235 int pc)
236{
237 struct ftrace_event_call *call = &event_funcgraph_exit;
238 struct ring_buffer_event *event;
239 struct ftrace_graph_ret_entry *entry;
240
241 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
242 return;
243
244 event = trace_buffer_lock_reserve(tr, TRACE_GRAPH_RET,
245 sizeof(*entry), flags, pc);
246 if (!event)
247 return;
248 entry = ring_buffer_event_data(event);
249 entry->ret = *trace;
250 if (!filter_current_check_discard(call, entry, event))
251 ring_buffer_unlock_commit(tr->buffer, event);
252}
253
254void trace_graph_return(struct ftrace_graph_ret *trace)
255{
256 struct trace_array *tr = graph_array;
257 struct trace_array_cpu *data;
258 unsigned long flags;
259 long disabled;
260 int cpu;
261 int pc;
262
263 local_irq_save(flags);
264 cpu = raw_smp_processor_id();
265 data = tr->data[cpu];
266 disabled = atomic_inc_return(&data->disabled);
267 if (likely(disabled == 1)) {
268 pc = preempt_count();
269 __trace_graph_return(tr, trace, flags, pc);
270 }
271 if (!trace->depth)
272 clear_tsk_trace_graph(current);
273 atomic_dec(&data->disabled);
274 local_irq_restore(flags);
275}
276
169static int graph_trace_init(struct trace_array *tr) 277static int graph_trace_init(struct trace_array *tr)
170{ 278{
171 int ret = register_ftrace_graph(&trace_graph_return, 279 int ret;
172 &trace_graph_entry); 280
281 graph_array = tr;
282 ret = register_ftrace_graph(&trace_graph_return,
283 &trace_graph_entry);
173 if (ret) 284 if (ret)
174 return ret; 285 return ret;
175 tracing_start_cmdline_record(); 286 tracing_start_cmdline_record();
@@ -177,49 +288,30 @@ static int graph_trace_init(struct trace_array *tr)
177 return 0; 288 return 0;
178} 289}
179 290
291void set_graph_array(struct trace_array *tr)
292{
293 graph_array = tr;
294}
295
180static void graph_trace_reset(struct trace_array *tr) 296static void graph_trace_reset(struct trace_array *tr)
181{ 297{
182 tracing_stop_cmdline_record(); 298 tracing_stop_cmdline_record();
183 unregister_ftrace_graph(); 299 unregister_ftrace_graph();
184} 300}
185 301
186static inline int log10_cpu(int nb) 302static int max_bytes_for_cpu;
187{
188 if (nb / 100)
189 return 3;
190 if (nb / 10)
191 return 2;
192 return 1;
193}
194 303
195static enum print_line_t 304static enum print_line_t
196print_graph_cpu(struct trace_seq *s, int cpu) 305print_graph_cpu(struct trace_seq *s, int cpu)
197{ 306{
198 int i;
199 int ret; 307 int ret;
200 int log10_this = log10_cpu(cpu);
201 int log10_all = log10_cpu(cpumask_weight(cpu_online_mask));
202
203 308
204 /* 309 /*
205 * Start with a space character - to make it stand out 310 * Start with a space character - to make it stand out
206 * to the right a bit when trace output is pasted into 311 * to the right a bit when trace output is pasted into
207 * email: 312 * email:
208 */ 313 */
209 ret = trace_seq_printf(s, " "); 314 ret = trace_seq_printf(s, " %*d) ", max_bytes_for_cpu, cpu);
210
211 /*
212 * Tricky - we space the CPU field according to the max
213 * number of online CPUs. On a 2-cpu system it would take
214 * a maximum of 1 digit - on a 128 cpu system it would
215 * take up to 3 digits:
216 */
217 for (i = 0; i < log10_all - log10_this; i++) {
218 ret = trace_seq_printf(s, " ");
219 if (!ret)
220 return TRACE_TYPE_PARTIAL_LINE;
221 }
222 ret = trace_seq_printf(s, "%d) ", cpu);
223 if (!ret) 315 if (!ret)
224 return TRACE_TYPE_PARTIAL_LINE; 316 return TRACE_TYPE_PARTIAL_LINE;
225 317
@@ -565,11 +657,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
565 return TRACE_TYPE_PARTIAL_LINE; 657 return TRACE_TYPE_PARTIAL_LINE;
566 } 658 }
567 659
568 ret = seq_print_ip_sym(s, call->func, 0); 660 ret = trace_seq_printf(s, "%pf();\n", (void *)call->func);
569 if (!ret)
570 return TRACE_TYPE_PARTIAL_LINE;
571
572 ret = trace_seq_printf(s, "();\n");
573 if (!ret) 661 if (!ret)
574 return TRACE_TYPE_PARTIAL_LINE; 662 return TRACE_TYPE_PARTIAL_LINE;
575 663
@@ -612,11 +700,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
612 return TRACE_TYPE_PARTIAL_LINE; 700 return TRACE_TYPE_PARTIAL_LINE;
613 } 701 }
614 702
615 ret = seq_print_ip_sym(s, call->func, 0); 703 ret = trace_seq_printf(s, "%pf() {\n", (void *)call->func);
616 if (!ret)
617 return TRACE_TYPE_PARTIAL_LINE;
618
619 ret = trace_seq_printf(s, "() {\n");
620 if (!ret) 704 if (!ret)
621 return TRACE_TYPE_PARTIAL_LINE; 705 return TRACE_TYPE_PARTIAL_LINE;
622 706
@@ -934,6 +1018,8 @@ static struct tracer graph_trace __read_mostly = {
934 1018
935static __init int init_graph_trace(void) 1019static __init int init_graph_trace(void)
936{ 1020{
1021 max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
1022
937 return register_tracer(&graph_trace); 1023 return register_tracer(&graph_trace);
938} 1024}
939 1025
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index a98106dd979c..e1285d7b5488 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -20,6 +20,34 @@ static int sched_ref;
20static DEFINE_MUTEX(sched_register_mutex); 20static DEFINE_MUTEX(sched_register_mutex);
21static int sched_stopped; 21static int sched_stopped;
22 22
23
24void
25tracing_sched_switch_trace(struct trace_array *tr,
26 struct task_struct *prev,
27 struct task_struct *next,
28 unsigned long flags, int pc)
29{
30 struct ftrace_event_call *call = &event_context_switch;
31 struct ring_buffer_event *event;
32 struct ctx_switch_entry *entry;
33
34 event = trace_buffer_lock_reserve(tr, TRACE_CTX,
35 sizeof(*entry), flags, pc);
36 if (!event)
37 return;
38 entry = ring_buffer_event_data(event);
39 entry->prev_pid = prev->pid;
40 entry->prev_prio = prev->prio;
41 entry->prev_state = prev->state;
42 entry->next_pid = next->pid;
43 entry->next_prio = next->prio;
44 entry->next_state = next->state;
45 entry->next_cpu = task_cpu(next);
46
47 if (!filter_check_discard(call, entry, tr->buffer, event))
48 trace_buffer_unlock_commit(tr, event, flags, pc);
49}
50
23static void 51static void
24probe_sched_switch(struct rq *__rq, struct task_struct *prev, 52probe_sched_switch(struct rq *__rq, struct task_struct *prev,
25 struct task_struct *next) 53 struct task_struct *next)
@@ -49,6 +77,35 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
49 local_irq_restore(flags); 77 local_irq_restore(flags);
50} 78}
51 79
80void
81tracing_sched_wakeup_trace(struct trace_array *tr,
82 struct task_struct *wakee,
83 struct task_struct *curr,
84 unsigned long flags, int pc)
85{
86 struct ftrace_event_call *call = &event_wakeup;
87 struct ring_buffer_event *event;
88 struct ctx_switch_entry *entry;
89
90 event = trace_buffer_lock_reserve(tr, TRACE_WAKE,
91 sizeof(*entry), flags, pc);
92 if (!event)
93 return;
94 entry = ring_buffer_event_data(event);
95 entry->prev_pid = curr->pid;
96 entry->prev_prio = curr->prio;
97 entry->prev_state = curr->state;
98 entry->next_pid = wakee->pid;
99 entry->next_prio = wakee->prio;
100 entry->next_state = wakee->state;
101 entry->next_cpu = task_cpu(wakee);
102
103 if (!filter_check_discard(call, entry, tr->buffer, event))
104 ring_buffer_unlock_commit(tr->buffer, event);
105 ftrace_trace_stack(tr, flags, 6, pc);
106 ftrace_trace_userstack(tr, flags, pc);
107}
108
52static void 109static void
53probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success) 110probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
54{ 111{
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 00dd6485bdd7..d2cdbabb4ead 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -288,6 +288,7 @@ trace_selftest_startup_function_graph(struct tracer *trace,
288 * to detect and recover from possible hangs 288 * to detect and recover from possible hangs
289 */ 289 */
290 tracing_reset_online_cpus(tr); 290 tracing_reset_online_cpus(tr);
291 set_graph_array(tr);
291 ret = register_ftrace_graph(&trace_graph_return, 292 ret = register_ftrace_graph(&trace_graph_return,
292 &trace_graph_entry_watchdog); 293 &trace_graph_entry_watchdog);
293 if (ret) { 294 if (ret) {
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 6a2a9d484cd6..0da1cff08d67 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -234,15 +234,8 @@ static void t_stop(struct seq_file *m, void *p)
234static int trace_lookup_stack(struct seq_file *m, long i) 234static int trace_lookup_stack(struct seq_file *m, long i)
235{ 235{
236 unsigned long addr = stack_dump_trace[i]; 236 unsigned long addr = stack_dump_trace[i];
237#ifdef CONFIG_KALLSYMS
238 char str[KSYM_SYMBOL_LEN];
239 237
240 sprint_symbol(str, addr); 238 return seq_printf(m, "%pF\n", (void *)addr);
241
242 return seq_printf(m, "%s\n", str);
243#else
244 return seq_printf(m, "%p\n", (void*)addr);
245#endif
246} 239}
247 240
248static void print_disabled(struct seq_file *m) 241static void print_disabled(struct seq_file *m)
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index aea321c82fa0..07c60b09258f 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -49,7 +49,8 @@ static struct dentry *stat_dir;
49 * but it will at least advance closer to the next one 49 * but it will at least advance closer to the next one
50 * to be released. 50 * to be released.
51 */ 51 */
52static struct rb_node *release_next(struct rb_node *node) 52static struct rb_node *release_next(struct tracer_stat *ts,
53 struct rb_node *node)
53{ 54{
54 struct stat_node *snode; 55 struct stat_node *snode;
55 struct rb_node *parent = rb_parent(node); 56 struct rb_node *parent = rb_parent(node);
@@ -67,6 +68,8 @@ static struct rb_node *release_next(struct rb_node *node)
67 parent->rb_right = NULL; 68 parent->rb_right = NULL;
68 69
69 snode = container_of(node, struct stat_node, node); 70 snode = container_of(node, struct stat_node, node);
71 if (ts->stat_release)
72 ts->stat_release(snode->stat);
70 kfree(snode); 73 kfree(snode);
71 74
72 return parent; 75 return parent;
@@ -78,7 +81,7 @@ static void __reset_stat_session(struct stat_session *session)
78 struct rb_node *node = session->stat_root.rb_node; 81 struct rb_node *node = session->stat_root.rb_node;
79 82
80 while (node) 83 while (node)
81 node = release_next(node); 84 node = release_next(session->ts, node);
82 85
83 session->stat_root = RB_ROOT; 86 session->stat_root = RB_ROOT;
84} 87}
diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h
index f3546a2cd826..8f03914b9a6a 100644
--- a/kernel/trace/trace_stat.h
+++ b/kernel/trace/trace_stat.h
@@ -18,6 +18,8 @@ struct tracer_stat {
18 int (*stat_cmp)(void *p1, void *p2); 18 int (*stat_cmp)(void *p1, void *p2);
19 /* Print a stat entry */ 19 /* Print a stat entry */
20 int (*stat_show)(struct seq_file *s, void *p); 20 int (*stat_show)(struct seq_file *s, void *p);
21 /* Release an entry */
22 void (*stat_release)(void *stat);
21 /* Print the headers of your stat entries */ 23 /* Print the headers of your stat entries */
22 int (*stat_headers)(struct seq_file *s); 24 int (*stat_headers)(struct seq_file *s);
23}; 25};
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 97fcea4acce1..40cafb07dffd 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -9,6 +9,7 @@
9#include <trace/events/workqueue.h> 9#include <trace/events/workqueue.h>
10#include <linux/list.h> 10#include <linux/list.h>
11#include <linux/percpu.h> 11#include <linux/percpu.h>
12#include <linux/kref.h>
12#include "trace_stat.h" 13#include "trace_stat.h"
13#include "trace.h" 14#include "trace.h"
14 15
@@ -16,6 +17,7 @@
16/* A cpu workqueue thread */ 17/* A cpu workqueue thread */
17struct cpu_workqueue_stats { 18struct cpu_workqueue_stats {
18 struct list_head list; 19 struct list_head list;
20 struct kref kref;
19 int cpu; 21 int cpu;
20 pid_t pid; 22 pid_t pid;
21/* Can be inserted from interrupt or user context, need to be atomic */ 23/* Can be inserted from interrupt or user context, need to be atomic */
@@ -39,6 +41,11 @@ struct workqueue_global_stats {
39static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat); 41static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
40#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu)) 42#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu))
41 43
44static void cpu_workqueue_stat_free(struct kref *kref)
45{
46 kfree(container_of(kref, struct cpu_workqueue_stats, kref));
47}
48
42/* Insertion of a work */ 49/* Insertion of a work */
43static void 50static void
44probe_workqueue_insertion(struct task_struct *wq_thread, 51probe_workqueue_insertion(struct task_struct *wq_thread,
@@ -96,8 +103,8 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
96 return; 103 return;
97 } 104 }
98 INIT_LIST_HEAD(&cws->list); 105 INIT_LIST_HEAD(&cws->list);
106 kref_init(&cws->kref);
99 cws->cpu = cpu; 107 cws->cpu = cpu;
100
101 cws->pid = wq_thread->pid; 108 cws->pid = wq_thread->pid;
102 109
103 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); 110 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
@@ -118,7 +125,7 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread)
118 list) { 125 list) {
119 if (node->pid == wq_thread->pid) { 126 if (node->pid == wq_thread->pid) {
120 list_del(&node->list); 127 list_del(&node->list);
121 kfree(node); 128 kref_put(&node->kref, cpu_workqueue_stat_free);
122 goto found; 129 goto found;
123 } 130 }
124 } 131 }
@@ -137,9 +144,11 @@ static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
137 144
138 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); 145 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
139 146
140 if (!list_empty(&workqueue_cpu_stat(cpu)->list)) 147 if (!list_empty(&workqueue_cpu_stat(cpu)->list)) {
141 ret = list_entry(workqueue_cpu_stat(cpu)->list.next, 148 ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
142 struct cpu_workqueue_stats, list); 149 struct cpu_workqueue_stats, list);
150 kref_get(&ret->kref);
151 }
143 152
144 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); 153 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
145 154
@@ -162,9 +171,9 @@ static void *workqueue_stat_start(struct tracer_stat *trace)
162static void *workqueue_stat_next(void *prev, int idx) 171static void *workqueue_stat_next(void *prev, int idx)
163{ 172{
164 struct cpu_workqueue_stats *prev_cws = prev; 173 struct cpu_workqueue_stats *prev_cws = prev;
174 struct cpu_workqueue_stats *ret;
165 int cpu = prev_cws->cpu; 175 int cpu = prev_cws->cpu;
166 unsigned long flags; 176 unsigned long flags;
167 void *ret = NULL;
168 177
169 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); 178 spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
170 if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) { 179 if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
@@ -175,11 +184,14 @@ static void *workqueue_stat_next(void *prev, int idx)
175 return NULL; 184 return NULL;
176 } while (!(ret = workqueue_stat_start_cpu(cpu))); 185 } while (!(ret = workqueue_stat_start_cpu(cpu)));
177 return ret; 186 return ret;
187 } else {
188 ret = list_entry(prev_cws->list.next,
189 struct cpu_workqueue_stats, list);
190 kref_get(&ret->kref);
178 } 191 }
179 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); 192 spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
180 193
181 return list_entry(prev_cws->list.next, struct cpu_workqueue_stats, 194 return ret;
182 list);
183} 195}
184 196
185static int workqueue_stat_show(struct seq_file *s, void *p) 197static int workqueue_stat_show(struct seq_file *s, void *p)
@@ -203,6 +215,13 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
203 return 0; 215 return 0;
204} 216}
205 217
218static void workqueue_stat_release(void *stat)
219{
220 struct cpu_workqueue_stats *node = stat;
221
222 kref_put(&node->kref, cpu_workqueue_stat_free);
223}
224
206static int workqueue_stat_headers(struct seq_file *s) 225static int workqueue_stat_headers(struct seq_file *s)
207{ 226{
208 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n"); 227 seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");
@@ -215,6 +234,7 @@ struct tracer_stat workqueue_stats __read_mostly = {
215 .stat_start = workqueue_stat_start, 234 .stat_start = workqueue_stat_start,
216 .stat_next = workqueue_stat_next, 235 .stat_next = workqueue_stat_next,
217 .stat_show = workqueue_stat_show, 236 .stat_show = workqueue_stat_show,
237 .stat_release = workqueue_stat_release,
218 .stat_headers = workqueue_stat_headers 238 .stat_headers = workqueue_stat_headers
219}; 239};
220 240