aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/ftrace.c8
-rw-r--r--kernel/trace/ring_buffer.c70
-rw-r--r--kernel/trace/trace.c173
-rw-r--r--kernel/trace/trace.h16
-rw-r--r--kernel/trace/trace_clock.c12
-rw-r--r--kernel/trace/trace_events_filter.c795
-rw-r--r--kernel/trace/trace_events_filter_test.h50
-rw-r--r--kernel/trace/trace_irqsoff.c4
-rw-r--r--kernel/trace/trace_printk.c19
10 files changed, 766 insertions, 383 deletions
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 761c510a06c5..b384ed512bac 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -15,6 +15,8 @@ ifdef CONFIG_TRACING_BRANCHES
15KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING 15KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
16endif 16endif
17 17
18CFLAGS_trace_events_filter.o := -I$(src)
19
18# 20#
19# Make the trace clocks available generally: it's infrastructure 21# Make the trace clocks available generally: it's infrastructure
20# relied on by ptrace for example: 22# relied on by ptrace for example:
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index c3e4575e7829..077d85387908 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3863,6 +3863,14 @@ void ftrace_kill(void)
3863} 3863}
3864 3864
3865/** 3865/**
3866 * Test if ftrace is dead or not.
3867 */
3868int ftrace_is_dead(void)
3869{
3870 return ftrace_disabled;
3871}
3872
3873/**
3866 * register_ftrace_function - register a function for profiling 3874 * register_ftrace_function - register a function for profiling
3867 * @ops - ops structure that holds the function for profiling. 3875 * @ops - ops structure that holds the function for profiling.
3868 * 3876 *
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 731201bf4acc..acf6b68dc4a8 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -488,12 +488,14 @@ struct ring_buffer_per_cpu {
488 struct buffer_page *reader_page; 488 struct buffer_page *reader_page;
489 unsigned long lost_events; 489 unsigned long lost_events;
490 unsigned long last_overrun; 490 unsigned long last_overrun;
491 local_t entries_bytes;
491 local_t commit_overrun; 492 local_t commit_overrun;
492 local_t overrun; 493 local_t overrun;
493 local_t entries; 494 local_t entries;
494 local_t committing; 495 local_t committing;
495 local_t commits; 496 local_t commits;
496 unsigned long read; 497 unsigned long read;
498 unsigned long read_bytes;
497 u64 write_stamp; 499 u64 write_stamp;
498 u64 read_stamp; 500 u64 read_stamp;
499}; 501};
@@ -1708,6 +1710,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
1708 * the counters. 1710 * the counters.
1709 */ 1711 */
1710 local_add(entries, &cpu_buffer->overrun); 1712 local_add(entries, &cpu_buffer->overrun);
1713 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
1711 1714
1712 /* 1715 /*
1713 * The entries will be zeroed out when we move the 1716 * The entries will be zeroed out when we move the
@@ -1863,6 +1866,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1863 event = __rb_page_index(tail_page, tail); 1866 event = __rb_page_index(tail_page, tail);
1864 kmemcheck_annotate_bitfield(event, bitfield); 1867 kmemcheck_annotate_bitfield(event, bitfield);
1865 1868
1869 /* account for padding bytes */
1870 local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
1871
1866 /* 1872 /*
1867 * Save the original length to the meta data. 1873 * Save the original length to the meta data.
1868 * This will be used by the reader to add lost event 1874 * This will be used by the reader to add lost event
@@ -2054,6 +2060,9 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
2054 if (!tail) 2060 if (!tail)
2055 tail_page->page->time_stamp = ts; 2061 tail_page->page->time_stamp = ts;
2056 2062
2063 /* account for these added bytes */
2064 local_add(length, &cpu_buffer->entries_bytes);
2065
2057 return event; 2066 return event;
2058} 2067}
2059 2068
@@ -2076,6 +2085,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2076 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { 2085 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
2077 unsigned long write_mask = 2086 unsigned long write_mask =
2078 local_read(&bpage->write) & ~RB_WRITE_MASK; 2087 local_read(&bpage->write) & ~RB_WRITE_MASK;
2088 unsigned long event_length = rb_event_length(event);
2079 /* 2089 /*
2080 * This is on the tail page. It is possible that 2090 * This is on the tail page. It is possible that
2081 * a write could come in and move the tail page 2091 * a write could come in and move the tail page
@@ -2085,8 +2095,11 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2085 old_index += write_mask; 2095 old_index += write_mask;
2086 new_index += write_mask; 2096 new_index += write_mask;
2087 index = local_cmpxchg(&bpage->write, old_index, new_index); 2097 index = local_cmpxchg(&bpage->write, old_index, new_index);
2088 if (index == old_index) 2098 if (index == old_index) {
2099 /* update counters */
2100 local_sub(event_length, &cpu_buffer->entries_bytes);
2089 return 1; 2101 return 1;
2102 }
2090 } 2103 }
2091 2104
2092 /* could not discard */ 2105 /* could not discard */
@@ -2661,6 +2674,58 @@ rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
2661} 2674}
2662 2675
2663/** 2676/**
2677 * ring_buffer_oldest_event_ts - get the oldest event timestamp from the buffer
2678 * @buffer: The ring buffer
2679 * @cpu: The per CPU buffer to read from.
2680 */
2681unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
2682{
2683 unsigned long flags;
2684 struct ring_buffer_per_cpu *cpu_buffer;
2685 struct buffer_page *bpage;
2686 unsigned long ret;
2687
2688 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2689 return 0;
2690
2691 cpu_buffer = buffer->buffers[cpu];
2692 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2693 /*
2694 * if the tail is on reader_page, oldest time stamp is on the reader
2695 * page
2696 */
2697 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
2698 bpage = cpu_buffer->reader_page;
2699 else
2700 bpage = rb_set_head_page(cpu_buffer);
2701 ret = bpage->page->time_stamp;
2702 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2703
2704 return ret;
2705}
2706EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
2707
2708/**
2709 * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer
2710 * @buffer: The ring buffer
2711 * @cpu: The per CPU buffer to read from.
2712 */
2713unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu)
2714{
2715 struct ring_buffer_per_cpu *cpu_buffer;
2716 unsigned long ret;
2717
2718 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2719 return 0;
2720
2721 cpu_buffer = buffer->buffers[cpu];
2722 ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes;
2723
2724 return ret;
2725}
2726EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu);
2727
2728/**
2664 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer 2729 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
2665 * @buffer: The ring buffer 2730 * @buffer: The ring buffer
2666 * @cpu: The per CPU buffer to get the entries from. 2731 * @cpu: The per CPU buffer to get the entries from.
@@ -3527,11 +3592,13 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
3527 cpu_buffer->reader_page->read = 0; 3592 cpu_buffer->reader_page->read = 0;
3528 3593
3529 local_set(&cpu_buffer->commit_overrun, 0); 3594 local_set(&cpu_buffer->commit_overrun, 0);
3595 local_set(&cpu_buffer->entries_bytes, 0);
3530 local_set(&cpu_buffer->overrun, 0); 3596 local_set(&cpu_buffer->overrun, 0);
3531 local_set(&cpu_buffer->entries, 0); 3597 local_set(&cpu_buffer->entries, 0);
3532 local_set(&cpu_buffer->committing, 0); 3598 local_set(&cpu_buffer->committing, 0);
3533 local_set(&cpu_buffer->commits, 0); 3599 local_set(&cpu_buffer->commits, 0);
3534 cpu_buffer->read = 0; 3600 cpu_buffer->read = 0;
3601 cpu_buffer->read_bytes = 0;
3535 3602
3536 cpu_buffer->write_stamp = 0; 3603 cpu_buffer->write_stamp = 0;
3537 cpu_buffer->read_stamp = 0; 3604 cpu_buffer->read_stamp = 0;
@@ -3918,6 +3985,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3918 } else { 3985 } else {
3919 /* update the entry counter */ 3986 /* update the entry counter */
3920 cpu_buffer->read += rb_page_entries(reader); 3987 cpu_buffer->read += rb_page_entries(reader);
3988 cpu_buffer->read_bytes += BUF_PAGE_SIZE;
3921 3989
3922 /* swap the pages */ 3990 /* swap the pages */
3923 rb_init_page(bpage); 3991 rb_init_page(bpage);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e5df02c69b1d..f86efe90ca45 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -435,6 +435,7 @@ static struct {
435} trace_clocks[] = { 435} trace_clocks[] = {
436 { trace_clock_local, "local" }, 436 { trace_clock_local, "local" },
437 { trace_clock_global, "global" }, 437 { trace_clock_global, "global" },
438 { trace_clock_counter, "counter" },
438}; 439};
439 440
440int trace_clock_id; 441int trace_clock_id;
@@ -2159,6 +2160,14 @@ void trace_default_header(struct seq_file *m)
2159 } 2160 }
2160} 2161}
2161 2162
2163static void test_ftrace_alive(struct seq_file *m)
2164{
2165 if (!ftrace_is_dead())
2166 return;
2167 seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2168 seq_printf(m, "# MAY BE MISSING FUNCTION EVENTS\n");
2169}
2170
2162static int s_show(struct seq_file *m, void *v) 2171static int s_show(struct seq_file *m, void *v)
2163{ 2172{
2164 struct trace_iterator *iter = v; 2173 struct trace_iterator *iter = v;
@@ -2168,6 +2177,7 @@ static int s_show(struct seq_file *m, void *v)
2168 if (iter->tr) { 2177 if (iter->tr) {
2169 seq_printf(m, "# tracer: %s\n", iter->trace->name); 2178 seq_printf(m, "# tracer: %s\n", iter->trace->name);
2170 seq_puts(m, "#\n"); 2179 seq_puts(m, "#\n");
2180 test_ftrace_alive(m);
2171 } 2181 }
2172 if (iter->trace && iter->trace->print_header) 2182 if (iter->trace && iter->trace->print_header)
2173 iter->trace->print_header(m); 2183 iter->trace->print_header(m);
@@ -3569,6 +3579,30 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
3569} 3579}
3570 3580
3571static ssize_t 3581static ssize_t
3582tracing_total_entries_read(struct file *filp, char __user *ubuf,
3583 size_t cnt, loff_t *ppos)
3584{
3585 struct trace_array *tr = filp->private_data;
3586 char buf[64];
3587 int r, cpu;
3588 unsigned long size = 0, expanded_size = 0;
3589
3590 mutex_lock(&trace_types_lock);
3591 for_each_tracing_cpu(cpu) {
3592 size += tr->entries >> 10;
3593 if (!ring_buffer_expanded)
3594 expanded_size += trace_buf_size >> 10;
3595 }
3596 if (ring_buffer_expanded)
3597 r = sprintf(buf, "%lu\n", size);
3598 else
3599 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
3600 mutex_unlock(&trace_types_lock);
3601
3602 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3603}
3604
3605static ssize_t
3572tracing_free_buffer_write(struct file *filp, const char __user *ubuf, 3606tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
3573 size_t cnt, loff_t *ppos) 3607 size_t cnt, loff_t *ppos)
3574{ 3608{
@@ -3594,22 +3628,24 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
3594 return 0; 3628 return 0;
3595} 3629}
3596 3630
3597static int mark_printk(const char *fmt, ...)
3598{
3599 int ret;
3600 va_list args;
3601 va_start(args, fmt);
3602 ret = trace_vprintk(0, fmt, args);
3603 va_end(args);
3604 return ret;
3605}
3606
3607static ssize_t 3631static ssize_t
3608tracing_mark_write(struct file *filp, const char __user *ubuf, 3632tracing_mark_write(struct file *filp, const char __user *ubuf,
3609 size_t cnt, loff_t *fpos) 3633 size_t cnt, loff_t *fpos)
3610{ 3634{
3611 char *buf; 3635 unsigned long addr = (unsigned long)ubuf;
3612 size_t written; 3636 struct ring_buffer_event *event;
3637 struct ring_buffer *buffer;
3638 struct print_entry *entry;
3639 unsigned long irq_flags;
3640 struct page *pages[2];
3641 int nr_pages = 1;
3642 ssize_t written;
3643 void *page1;
3644 void *page2;
3645 int offset;
3646 int size;
3647 int len;
3648 int ret;
3613 3649
3614 if (tracing_disabled) 3650 if (tracing_disabled)
3615 return -EINVAL; 3651 return -EINVAL;
@@ -3617,28 +3653,81 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3617 if (cnt > TRACE_BUF_SIZE) 3653 if (cnt > TRACE_BUF_SIZE)
3618 cnt = TRACE_BUF_SIZE; 3654 cnt = TRACE_BUF_SIZE;
3619 3655
3620 buf = kmalloc(cnt + 2, GFP_KERNEL); 3656 /*
3621 if (buf == NULL) 3657 * Userspace is injecting traces into the kernel trace buffer.
3622 return -ENOMEM; 3658 * We want to be as non intrusive as possible.
3659 * To do so, we do not want to allocate any special buffers
3660 * or take any locks, but instead write the userspace data
3661 * straight into the ring buffer.
3662 *
3663 * First we need to pin the userspace buffer into memory,
3664 * which, most likely it is, because it just referenced it.
3665 * But there's no guarantee that it is. By using get_user_pages_fast()
3666 * and kmap_atomic/kunmap_atomic() we can get access to the
3667 * pages directly. We then write the data directly into the
3668 * ring buffer.
3669 */
3670 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
3623 3671
3624 if (copy_from_user(buf, ubuf, cnt)) { 3672 /* check if we cross pages */
3625 kfree(buf); 3673 if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
3626 return -EFAULT; 3674 nr_pages = 2;
3675
3676 offset = addr & (PAGE_SIZE - 1);
3677 addr &= PAGE_MASK;
3678
3679 ret = get_user_pages_fast(addr, nr_pages, 0, pages);
3680 if (ret < nr_pages) {
3681 while (--ret >= 0)
3682 put_page(pages[ret]);
3683 written = -EFAULT;
3684 goto out;
3627 } 3685 }
3628 if (buf[cnt-1] != '\n') { 3686
3629 buf[cnt] = '\n'; 3687 page1 = kmap_atomic(pages[0]);
3630 buf[cnt+1] = '\0'; 3688 if (nr_pages == 2)
3689 page2 = kmap_atomic(pages[1]);
3690
3691 local_save_flags(irq_flags);
3692 size = sizeof(*entry) + cnt + 2; /* possible \n added */
3693 buffer = global_trace.buffer;
3694 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3695 irq_flags, preempt_count());
3696 if (!event) {
3697 /* Ring buffer disabled, return as if not open for write */
3698 written = -EBADF;
3699 goto out_unlock;
3700 }
3701
3702 entry = ring_buffer_event_data(event);
3703 entry->ip = _THIS_IP_;
3704
3705 if (nr_pages == 2) {
3706 len = PAGE_SIZE - offset;
3707 memcpy(&entry->buf, page1 + offset, len);
3708 memcpy(&entry->buf[len], page2, cnt - len);
3631 } else 3709 } else
3632 buf[cnt] = '\0'; 3710 memcpy(&entry->buf, page1 + offset, cnt);
3633 3711
3634 written = mark_printk("%s", buf); 3712 if (entry->buf[cnt - 1] != '\n') {
3635 kfree(buf); 3713 entry->buf[cnt] = '\n';
3636 *fpos += written; 3714 entry->buf[cnt + 1] = '\0';
3715 } else
3716 entry->buf[cnt] = '\0';
3637 3717
3638 /* don't tell userspace we wrote more - it might confuse them */ 3718 ring_buffer_unlock_commit(buffer, event);
3639 if (written > cnt) 3719
3640 written = cnt; 3720 written = cnt;
3721
3722 *fpos += written;
3641 3723
3724 out_unlock:
3725 if (nr_pages == 2)
3726 kunmap_atomic(page2);
3727 kunmap_atomic(page1);
3728 while (nr_pages > 0)
3729 put_page(pages[--nr_pages]);
3730 out:
3642 return written; 3731 return written;
3643} 3732}
3644 3733
@@ -3739,6 +3828,12 @@ static const struct file_operations tracing_entries_fops = {
3739 .llseek = generic_file_llseek, 3828 .llseek = generic_file_llseek,
3740}; 3829};
3741 3830
3831static const struct file_operations tracing_total_entries_fops = {
3832 .open = tracing_open_generic,
3833 .read = tracing_total_entries_read,
3834 .llseek = generic_file_llseek,
3835};
3836
3742static const struct file_operations tracing_free_buffer_fops = { 3837static const struct file_operations tracing_free_buffer_fops = {
3743 .write = tracing_free_buffer_write, 3838 .write = tracing_free_buffer_write,
3744 .release = tracing_free_buffer_release, 3839 .release = tracing_free_buffer_release,
@@ -4026,6 +4121,8 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
4026 struct trace_array *tr = &global_trace; 4121 struct trace_array *tr = &global_trace;
4027 struct trace_seq *s; 4122 struct trace_seq *s;
4028 unsigned long cnt; 4123 unsigned long cnt;
4124 unsigned long long t;
4125 unsigned long usec_rem;
4029 4126
4030 s = kmalloc(sizeof(*s), GFP_KERNEL); 4127 s = kmalloc(sizeof(*s), GFP_KERNEL);
4031 if (!s) 4128 if (!s)
@@ -4042,6 +4139,17 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
4042 cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu); 4139 cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
4043 trace_seq_printf(s, "commit overrun: %ld\n", cnt); 4140 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
4044 4141
4142 cnt = ring_buffer_bytes_cpu(tr->buffer, cpu);
4143 trace_seq_printf(s, "bytes: %ld\n", cnt);
4144
4145 t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu));
4146 usec_rem = do_div(t, USEC_PER_SEC);
4147 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", t, usec_rem);
4148
4149 t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu));
4150 usec_rem = do_div(t, USEC_PER_SEC);
4151 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
4152
4045 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); 4153 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
4046 4154
4047 kfree(s); 4155 kfree(s);
@@ -4450,6 +4558,9 @@ static __init int tracer_init_debugfs(void)
4450 trace_create_file("buffer_size_kb", 0644, d_tracer, 4558 trace_create_file("buffer_size_kb", 0644, d_tracer,
4451 &global_trace, &tracing_entries_fops); 4559 &global_trace, &tracing_entries_fops);
4452 4560
4561 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
4562 &global_trace, &tracing_total_entries_fops);
4563
4453 trace_create_file("free_buffer", 0644, d_tracer, 4564 trace_create_file("free_buffer", 0644, d_tracer,
4454 &global_trace, &tracing_free_buffer_fops); 4565 &global_trace, &tracing_free_buffer_fops);
4455 4566
@@ -4566,6 +4677,12 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
4566 4677
4567 tracing_off(); 4678 tracing_off();
4568 4679
4680 /* Did function tracer already get disabled? */
4681 if (ftrace_is_dead()) {
4682 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
4683 printk("# MAY BE MISSING FUNCTION EVENTS\n");
4684 }
4685
4569 if (disable_tracing) 4686 if (disable_tracing)
4570 ftrace_kill(); 4687 ftrace_kill();
4571 4688
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 616846bcfee5..092e1f8d18dc 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -579,11 +579,13 @@ static inline int ftrace_trace_task(struct task_struct *task)
579 579
580 return test_tsk_trace_trace(task); 580 return test_tsk_trace_trace(task);
581} 581}
582extern int ftrace_is_dead(void);
582#else 583#else
583static inline int ftrace_trace_task(struct task_struct *task) 584static inline int ftrace_trace_task(struct task_struct *task)
584{ 585{
585 return 1; 586 return 1;
586} 587}
588static inline int ftrace_is_dead(void) { return 0; }
587#endif 589#endif
588 590
589/* 591/*
@@ -761,16 +763,10 @@ struct filter_pred {
761 filter_pred_fn_t fn; 763 filter_pred_fn_t fn;
762 u64 val; 764 u64 val;
763 struct regex regex; 765 struct regex regex;
764 /* 766 unsigned short *ops;
765 * Leaf nodes use field_name, ops is used by AND and OR 767#ifdef CONFIG_FTRACE_STARTUP_TEST
766 * nodes. The field_name is always freed when freeing a pred. 768 struct ftrace_event_field *field;
767 * We can overload field_name for ops and have it freed 769#endif
768 * as well.
769 */
770 union {
771 char *field_name;
772 unsigned short *ops;
773 };
774 int offset; 770 int offset;
775 int not; 771 int not;
776 int op; 772 int op;
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 6302747a1398..394783531cbb 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -113,3 +113,15 @@ u64 notrace trace_clock_global(void)
113 113
114 return now; 114 return now;
115} 115}
116
117static atomic64_t trace_counter;
118
119/*
120 * trace_clock_counter(): simply an atomic counter.
121 * Use the trace_counter "counter" for cases where you do not care
122 * about timings, but are interested in strict ordering.
123 */
124u64 notrace trace_clock_counter(void)
125{
126 return atomic64_add_return(1, &trace_counter);
127}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 256764ecccd6..816d3d074979 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -381,6 +381,63 @@ get_pred_parent(struct filter_pred *pred, struct filter_pred *preds,
381 return pred; 381 return pred;
382} 382}
383 383
384enum walk_return {
385 WALK_PRED_ABORT,
386 WALK_PRED_PARENT,
387 WALK_PRED_DEFAULT,
388};
389
390typedef int (*filter_pred_walkcb_t) (enum move_type move,
391 struct filter_pred *pred,
392 int *err, void *data);
393
394static int walk_pred_tree(struct filter_pred *preds,
395 struct filter_pred *root,
396 filter_pred_walkcb_t cb, void *data)
397{
398 struct filter_pred *pred = root;
399 enum move_type move = MOVE_DOWN;
400 int done = 0;
401
402 if (!preds)
403 return -EINVAL;
404
405 do {
406 int err = 0, ret;
407
408 ret = cb(move, pred, &err, data);
409 if (ret == WALK_PRED_ABORT)
410 return err;
411 if (ret == WALK_PRED_PARENT)
412 goto get_parent;
413
414 switch (move) {
415 case MOVE_DOWN:
416 if (pred->left != FILTER_PRED_INVALID) {
417 pred = &preds[pred->left];
418 continue;
419 }
420 goto get_parent;
421 case MOVE_UP_FROM_LEFT:
422 pred = &preds[pred->right];
423 move = MOVE_DOWN;
424 continue;
425 case MOVE_UP_FROM_RIGHT:
426 get_parent:
427 if (pred == root)
428 break;
429 pred = get_pred_parent(pred, preds,
430 pred->parent,
431 &move);
432 continue;
433 }
434 done = 1;
435 } while (!done);
436
437 /* We are fine. */
438 return 0;
439}
440
384/* 441/*
385 * A series of AND or ORs where found together. Instead of 442 * A series of AND or ORs where found together. Instead of
386 * climbing up and down the tree branches, an array of the 443 * climbing up and down the tree branches, an array of the
@@ -410,99 +467,91 @@ static int process_ops(struct filter_pred *preds,
410 467
411 for (i = 0; i < op->val; i++) { 468 for (i = 0; i < op->val; i++) {
412 pred = &preds[op->ops[i]]; 469 pred = &preds[op->ops[i]];
413 match = pred->fn(pred, rec); 470 if (!WARN_ON_ONCE(!pred->fn))
471 match = pred->fn(pred, rec);
414 if (!!match == type) 472 if (!!match == type)
415 return match; 473 return match;
416 } 474 }
417 return match; 475 return match;
418} 476}
419 477
478struct filter_match_preds_data {
479 struct filter_pred *preds;
480 int match;
481 void *rec;
482};
483
484static int filter_match_preds_cb(enum move_type move, struct filter_pred *pred,
485 int *err, void *data)
486{
487 struct filter_match_preds_data *d = data;
488
489 *err = 0;
490 switch (move) {
491 case MOVE_DOWN:
492 /* only AND and OR have children */
493 if (pred->left != FILTER_PRED_INVALID) {
494 /* If ops is set, then it was folded. */
495 if (!pred->ops)
496 return WALK_PRED_DEFAULT;
497 /* We can treat folded ops as a leaf node */
498 d->match = process_ops(d->preds, pred, d->rec);
499 } else {
500 if (!WARN_ON_ONCE(!pred->fn))
501 d->match = pred->fn(pred, d->rec);
502 }
503
504 return WALK_PRED_PARENT;
505 case MOVE_UP_FROM_LEFT:
506 /*
507 * Check for short circuits.
508 *
509 * Optimization: !!match == (pred->op == OP_OR)
510 * is the same as:
511 * if ((match && pred->op == OP_OR) ||
512 * (!match && pred->op == OP_AND))
513 */
514 if (!!d->match == (pred->op == OP_OR))
515 return WALK_PRED_PARENT;
516 break;
517 case MOVE_UP_FROM_RIGHT:
518 break;
519 }
520
521 return WALK_PRED_DEFAULT;
522}
523
420/* return 1 if event matches, 0 otherwise (discard) */ 524/* return 1 if event matches, 0 otherwise (discard) */
421int filter_match_preds(struct event_filter *filter, void *rec) 525int filter_match_preds(struct event_filter *filter, void *rec)
422{ 526{
423 int match = -1;
424 enum move_type move = MOVE_DOWN;
425 struct filter_pred *preds; 527 struct filter_pred *preds;
426 struct filter_pred *pred;
427 struct filter_pred *root; 528 struct filter_pred *root;
428 int n_preds; 529 struct filter_match_preds_data data = {
429 int done = 0; 530 /* match is currently meaningless */
531 .match = -1,
532 .rec = rec,
533 };
534 int n_preds, ret;
430 535
431 /* no filter is considered a match */ 536 /* no filter is considered a match */
432 if (!filter) 537 if (!filter)
433 return 1; 538 return 1;
434 539
435 n_preds = filter->n_preds; 540 n_preds = filter->n_preds;
436
437 if (!n_preds) 541 if (!n_preds)
438 return 1; 542 return 1;
439 543
440 /* 544 /*
441 * n_preds, root and filter->preds are protect with preemption disabled. 545 * n_preds, root and filter->preds are protect with preemption disabled.
442 */ 546 */
443 preds = rcu_dereference_sched(filter->preds);
444 root = rcu_dereference_sched(filter->root); 547 root = rcu_dereference_sched(filter->root);
445 if (!root) 548 if (!root)
446 return 1; 549 return 1;
447 550
448 pred = root; 551 data.preds = preds = rcu_dereference_sched(filter->preds);
449 552 ret = walk_pred_tree(preds, root, filter_match_preds_cb, &data);
450 /* match is currently meaningless */ 553 WARN_ON(ret);
451 match = -1; 554 return data.match;
452
453 do {
454 switch (move) {
455 case MOVE_DOWN:
456 /* only AND and OR have children */
457 if (pred->left != FILTER_PRED_INVALID) {
458 /* If ops is set, then it was folded. */
459 if (!pred->ops) {
460 /* keep going to down the left side */
461 pred = &preds[pred->left];
462 continue;
463 }
464 /* We can treat folded ops as a leaf node */
465 match = process_ops(preds, pred, rec);
466 } else
467 match = pred->fn(pred, rec);
468 /* If this pred is the only pred */
469 if (pred == root)
470 break;
471 pred = get_pred_parent(pred, preds,
472 pred->parent, &move);
473 continue;
474 case MOVE_UP_FROM_LEFT:
475 /*
476 * Check for short circuits.
477 *
478 * Optimization: !!match == (pred->op == OP_OR)
479 * is the same as:
480 * if ((match && pred->op == OP_OR) ||
481 * (!match && pred->op == OP_AND))
482 */
483 if (!!match == (pred->op == OP_OR)) {
484 if (pred == root)
485 break;
486 pred = get_pred_parent(pred, preds,
487 pred->parent, &move);
488 continue;
489 }
490 /* now go down the right side of the tree. */
491 pred = &preds[pred->right];
492 move = MOVE_DOWN;
493 continue;
494 case MOVE_UP_FROM_RIGHT:
495 /* We finished this equation. */
496 if (pred == root)
497 break;
498 pred = get_pred_parent(pred, preds,
499 pred->parent, &move);
500 continue;
501 }
502 done = 1;
503 } while (!done);
504
505 return match;
506} 555}
507EXPORT_SYMBOL_GPL(filter_match_preds); 556EXPORT_SYMBOL_GPL(filter_match_preds);
508 557
@@ -628,22 +677,6 @@ find_event_field(struct ftrace_event_call *call, char *name)
628 return __find_event_field(head, name); 677 return __find_event_field(head, name);
629} 678}
630 679
631static void filter_free_pred(struct filter_pred *pred)
632{
633 if (!pred)
634 return;
635
636 kfree(pred->field_name);
637 kfree(pred);
638}
639
640static void filter_clear_pred(struct filter_pred *pred)
641{
642 kfree(pred->field_name);
643 pred->field_name = NULL;
644 pred->regex.len = 0;
645}
646
647static int __alloc_pred_stack(struct pred_stack *stack, int n_preds) 680static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
648{ 681{
649 stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL); 682 stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL);
@@ -689,20 +722,13 @@ __pop_pred_stack(struct pred_stack *stack)
689static int filter_set_pred(struct event_filter *filter, 722static int filter_set_pred(struct event_filter *filter,
690 int idx, 723 int idx,
691 struct pred_stack *stack, 724 struct pred_stack *stack,
692 struct filter_pred *src, 725 struct filter_pred *src)
693 filter_pred_fn_t fn)
694{ 726{
695 struct filter_pred *dest = &filter->preds[idx]; 727 struct filter_pred *dest = &filter->preds[idx];
696 struct filter_pred *left; 728 struct filter_pred *left;
697 struct filter_pred *right; 729 struct filter_pred *right;
698 730
699 *dest = *src; 731 *dest = *src;
700 if (src->field_name) {
701 dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
702 if (!dest->field_name)
703 return -ENOMEM;
704 }
705 dest->fn = fn;
706 dest->index = idx; 732 dest->index = idx;
707 733
708 if (dest->op == OP_OR || dest->op == OP_AND) { 734 if (dest->op == OP_OR || dest->op == OP_AND) {
@@ -743,11 +769,7 @@ static int filter_set_pred(struct event_filter *filter,
743 769
744static void __free_preds(struct event_filter *filter) 770static void __free_preds(struct event_filter *filter)
745{ 771{
746 int i;
747
748 if (filter->preds) { 772 if (filter->preds) {
749 for (i = 0; i < filter->a_preds; i++)
750 kfree(filter->preds[i].field_name);
751 kfree(filter->preds); 773 kfree(filter->preds);
752 filter->preds = NULL; 774 filter->preds = NULL;
753 } 775 }
@@ -840,23 +862,19 @@ static void filter_free_subsystem_filters(struct event_subsystem *system)
840 } 862 }
841} 863}
842 864
843static int filter_add_pred_fn(struct filter_parse_state *ps, 865static int filter_add_pred(struct filter_parse_state *ps,
844 struct ftrace_event_call *call, 866 struct event_filter *filter,
845 struct event_filter *filter, 867 struct filter_pred *pred,
846 struct filter_pred *pred, 868 struct pred_stack *stack)
847 struct pred_stack *stack,
848 filter_pred_fn_t fn)
849{ 869{
850 int idx, err; 870 int err;
851 871
852 if (WARN_ON(filter->n_preds == filter->a_preds)) { 872 if (WARN_ON(filter->n_preds == filter->a_preds)) {
853 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); 873 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
854 return -ENOSPC; 874 return -ENOSPC;
855 } 875 }
856 876
857 idx = filter->n_preds; 877 err = filter_set_pred(filter, filter->n_preds, stack, pred);
858 filter_clear_pred(&filter->preds[idx]);
859 err = filter_set_pred(filter, idx, stack, pred, fn);
860 if (err) 878 if (err)
861 return err; 879 return err;
862 880
@@ -937,31 +955,15 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
937 return fn; 955 return fn;
938} 956}
939 957
940static int filter_add_pred(struct filter_parse_state *ps, 958static int init_pred(struct filter_parse_state *ps,
941 struct ftrace_event_call *call, 959 struct ftrace_event_field *field,
942 struct event_filter *filter, 960 struct filter_pred *pred)
943 struct filter_pred *pred, 961
944 struct pred_stack *stack,
945 bool dry_run)
946{ 962{
947 struct ftrace_event_field *field; 963 filter_pred_fn_t fn = filter_pred_none;
948 filter_pred_fn_t fn;
949 unsigned long long val; 964 unsigned long long val;
950 int ret; 965 int ret;
951 966
952 fn = pred->fn = filter_pred_none;
953
954 if (pred->op == OP_AND)
955 goto add_pred_fn;
956 else if (pred->op == OP_OR)
957 goto add_pred_fn;
958
959 field = find_event_field(call, pred->field_name);
960 if (!field) {
961 parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0);
962 return -EINVAL;
963 }
964
965 pred->offset = field->offset; 967 pred->offset = field->offset;
966 968
967 if (!is_legal_op(field, pred->op)) { 969 if (!is_legal_op(field, pred->op)) {
@@ -1001,9 +1003,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
1001 if (pred->op == OP_NE) 1003 if (pred->op == OP_NE)
1002 pred->not = 1; 1004 pred->not = 1;
1003 1005
1004add_pred_fn: 1006 pred->fn = fn;
1005 if (!dry_run)
1006 return filter_add_pred_fn(ps, call, filter, pred, stack, fn);
1007 return 0; 1007 return 0;
1008} 1008}
1009 1009
@@ -1302,39 +1302,37 @@ parse_operand:
1302 return 0; 1302 return 0;
1303} 1303}
1304 1304
1305static struct filter_pred *create_pred(int op, char *operand1, char *operand2) 1305static struct filter_pred *create_pred(struct filter_parse_state *ps,
1306 struct ftrace_event_call *call,
1307 int op, char *operand1, char *operand2)
1306{ 1308{
1307 struct filter_pred *pred; 1309 struct ftrace_event_field *field;
1310 static struct filter_pred pred;
1308 1311
1309 pred = kzalloc(sizeof(*pred), GFP_KERNEL); 1312 memset(&pred, 0, sizeof(pred));
1310 if (!pred) 1313 pred.op = op;
1311 return NULL;
1312 1314
1313 pred->field_name = kstrdup(operand1, GFP_KERNEL); 1315 if (op == OP_AND || op == OP_OR)
1314 if (!pred->field_name) { 1316 return &pred;
1315 kfree(pred); 1317
1318 if (!operand1 || !operand2) {
1319 parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
1316 return NULL; 1320 return NULL;
1317 } 1321 }
1318 1322
1319 strcpy(pred->regex.pattern, operand2); 1323 field = find_event_field(call, operand1);
1320 pred->regex.len = strlen(pred->regex.pattern); 1324 if (!field) {
1321 1325 parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0);
1322 pred->op = op;
1323
1324 return pred;
1325}
1326
1327static struct filter_pred *create_logical_pred(int op)
1328{
1329 struct filter_pred *pred;
1330
1331 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
1332 if (!pred)
1333 return NULL; 1326 return NULL;
1327 }
1334 1328
1335 pred->op = op; 1329 strcpy(pred.regex.pattern, operand2);
1330 pred.regex.len = strlen(pred.regex.pattern);
1336 1331
1337 return pred; 1332#ifdef CONFIG_FTRACE_STARTUP_TEST
1333 pred.field = field;
1334#endif
1335 return init_pred(ps, field, &pred) ? NULL : &pred;
1338} 1336}
1339 1337
1340static int check_preds(struct filter_parse_state *ps) 1338static int check_preds(struct filter_parse_state *ps)
@@ -1375,6 +1373,23 @@ static int count_preds(struct filter_parse_state *ps)
1375 return n_preds; 1373 return n_preds;
1376} 1374}
1377 1375
1376struct check_pred_data {
1377 int count;
1378 int max;
1379};
1380
1381static int check_pred_tree_cb(enum move_type move, struct filter_pred *pred,
1382 int *err, void *data)
1383{
1384 struct check_pred_data *d = data;
1385
1386 if (WARN_ON(d->count++ > d->max)) {
1387 *err = -EINVAL;
1388 return WALK_PRED_ABORT;
1389 }
1390 return WALK_PRED_DEFAULT;
1391}
1392
1378/* 1393/*
1379 * The tree is walked at filtering of an event. If the tree is not correctly 1394 * The tree is walked at filtering of an event. If the tree is not correctly
1380 * built, it may cause an infinite loop. Check here that the tree does 1395 * built, it may cause an infinite loop. Check here that the tree does
@@ -1383,107 +1398,76 @@ static int count_preds(struct filter_parse_state *ps)
1383static int check_pred_tree(struct event_filter *filter, 1398static int check_pred_tree(struct event_filter *filter,
1384 struct filter_pred *root) 1399 struct filter_pred *root)
1385{ 1400{
1386 struct filter_pred *preds; 1401 struct check_pred_data data = {
1387 struct filter_pred *pred; 1402 /*
1388 enum move_type move = MOVE_DOWN; 1403 * The max that we can hit a node is three times.
1389 int count = 0; 1404 * Once going down, once coming up from left, and
1390 int done = 0; 1405 * once coming up from right. This is more than enough
1391 int max; 1406 * since leafs are only hit a single time.
1392 1407 */
1393 /* 1408 .max = 3 * filter->n_preds,
1394 * The max that we can hit a node is three times. 1409 .count = 0,
1395 * Once going down, once coming up from left, and 1410 };
1396 * once coming up from right. This is more than enough
1397 * since leafs are only hit a single time.
1398 */
1399 max = 3 * filter->n_preds;
1400 1411
1401 preds = filter->preds; 1412 return walk_pred_tree(filter->preds, root,
1402 if (!preds) 1413 check_pred_tree_cb, &data);
1403 return -EINVAL; 1414}
1404 pred = root;
1405 1415
1406 do { 1416static int count_leafs_cb(enum move_type move, struct filter_pred *pred,
1407 if (WARN_ON(count++ > max)) 1417 int *err, void *data)
1408 return -EINVAL; 1418{
1419 int *count = data;
1409 1420
1410 switch (move) { 1421 if ((move == MOVE_DOWN) &&
1411 case MOVE_DOWN: 1422 (pred->left == FILTER_PRED_INVALID))
1412 if (pred->left != FILTER_PRED_INVALID) { 1423 (*count)++;
1413 pred = &preds[pred->left];
1414 continue;
1415 }
1416 /* A leaf at the root is just a leaf in the tree */
1417 if (pred == root)
1418 break;
1419 pred = get_pred_parent(pred, preds,
1420 pred->parent, &move);
1421 continue;
1422 case MOVE_UP_FROM_LEFT:
1423 pred = &preds[pred->right];
1424 move = MOVE_DOWN;
1425 continue;
1426 case MOVE_UP_FROM_RIGHT:
1427 if (pred == root)
1428 break;
1429 pred = get_pred_parent(pred, preds,
1430 pred->parent, &move);
1431 continue;
1432 }
1433 done = 1;
1434 } while (!done);
1435 1424
1436 /* We are fine. */ 1425 return WALK_PRED_DEFAULT;
1437 return 0;
1438} 1426}
1439 1427
1440static int count_leafs(struct filter_pred *preds, struct filter_pred *root) 1428static int count_leafs(struct filter_pred *preds, struct filter_pred *root)
1441{ 1429{
1442 struct filter_pred *pred; 1430 int count = 0, ret;
1443 enum move_type move = MOVE_DOWN;
1444 int count = 0;
1445 int done = 0;
1446 1431
1447 pred = root; 1432 ret = walk_pred_tree(preds, root, count_leafs_cb, &count);
1433 WARN_ON(ret);
1434 return count;
1435}
1448 1436
1449 do { 1437struct fold_pred_data {
1450 switch (move) { 1438 struct filter_pred *root;
1451 case MOVE_DOWN: 1439 int count;
1452 if (pred->left != FILTER_PRED_INVALID) { 1440 int children;
1453 pred = &preds[pred->left]; 1441};
1454 continue;
1455 }
1456 /* A leaf at the root is just a leaf in the tree */
1457 if (pred == root)
1458 return 1;
1459 count++;
1460 pred = get_pred_parent(pred, preds,
1461 pred->parent, &move);
1462 continue;
1463 case MOVE_UP_FROM_LEFT:
1464 pred = &preds[pred->right];
1465 move = MOVE_DOWN;
1466 continue;
1467 case MOVE_UP_FROM_RIGHT:
1468 if (pred == root)
1469 break;
1470 pred = get_pred_parent(pred, preds,
1471 pred->parent, &move);
1472 continue;
1473 }
1474 done = 1;
1475 } while (!done);
1476 1442
1477 return count; 1443static int fold_pred_cb(enum move_type move, struct filter_pred *pred,
1444 int *err, void *data)
1445{
1446 struct fold_pred_data *d = data;
1447 struct filter_pred *root = d->root;
1448
1449 if (move != MOVE_DOWN)
1450 return WALK_PRED_DEFAULT;
1451 if (pred->left != FILTER_PRED_INVALID)
1452 return WALK_PRED_DEFAULT;
1453
1454 if (WARN_ON(d->count == d->children)) {
1455 *err = -EINVAL;
1456 return WALK_PRED_ABORT;
1457 }
1458
1459 pred->index &= ~FILTER_PRED_FOLD;
1460 root->ops[d->count++] = pred->index;
1461 return WALK_PRED_DEFAULT;
1478} 1462}
1479 1463
1480static int fold_pred(struct filter_pred *preds, struct filter_pred *root) 1464static int fold_pred(struct filter_pred *preds, struct filter_pred *root)
1481{ 1465{
1482 struct filter_pred *pred; 1466 struct fold_pred_data data = {
1483 enum move_type move = MOVE_DOWN; 1467 .root = root,
1484 int count = 0; 1468 .count = 0,
1469 };
1485 int children; 1470 int children;
1486 int done = 0;
1487 1471
1488 /* No need to keep the fold flag */ 1472 /* No need to keep the fold flag */
1489 root->index &= ~FILTER_PRED_FOLD; 1473 root->index &= ~FILTER_PRED_FOLD;
@@ -1501,37 +1485,26 @@ static int fold_pred(struct filter_pred *preds, struct filter_pred *root)
1501 return -ENOMEM; 1485 return -ENOMEM;
1502 1486
1503 root->val = children; 1487 root->val = children;
1488 data.children = children;
1489 return walk_pred_tree(preds, root, fold_pred_cb, &data);
1490}
1504 1491
1505 pred = root; 1492static int fold_pred_tree_cb(enum move_type move, struct filter_pred *pred,
1506 do { 1493 int *err, void *data)
1507 switch (move) { 1494{
1508 case MOVE_DOWN: 1495 struct filter_pred *preds = data;
1509 if (pred->left != FILTER_PRED_INVALID) {
1510 pred = &preds[pred->left];
1511 continue;
1512 }
1513 if (WARN_ON(count == children))
1514 return -EINVAL;
1515 pred->index &= ~FILTER_PRED_FOLD;
1516 root->ops[count++] = pred->index;
1517 pred = get_pred_parent(pred, preds,
1518 pred->parent, &move);
1519 continue;
1520 case MOVE_UP_FROM_LEFT:
1521 pred = &preds[pred->right];
1522 move = MOVE_DOWN;
1523 continue;
1524 case MOVE_UP_FROM_RIGHT:
1525 if (pred == root)
1526 break;
1527 pred = get_pred_parent(pred, preds,
1528 pred->parent, &move);
1529 continue;
1530 }
1531 done = 1;
1532 } while (!done);
1533 1496
1534 return 0; 1497 if (move != MOVE_DOWN)
1498 return WALK_PRED_DEFAULT;
1499 if (!(pred->index & FILTER_PRED_FOLD))
1500 return WALK_PRED_DEFAULT;
1501
1502 *err = fold_pred(preds, pred);
1503 if (*err)
1504 return WALK_PRED_ABORT;
1505
1506 /* eveyrhing below is folded, continue with parent */
1507 return WALK_PRED_PARENT;
1535} 1508}
1536 1509
1537/* 1510/*
@@ -1542,51 +1515,8 @@ static int fold_pred(struct filter_pred *preds, struct filter_pred *root)
1542static int fold_pred_tree(struct event_filter *filter, 1515static int fold_pred_tree(struct event_filter *filter,
1543 struct filter_pred *root) 1516 struct filter_pred *root)
1544{ 1517{
1545 struct filter_pred *preds; 1518 return walk_pred_tree(filter->preds, root, fold_pred_tree_cb,
1546 struct filter_pred *pred; 1519 filter->preds);
1547 enum move_type move = MOVE_DOWN;
1548 int done = 0;
1549 int err;
1550
1551 preds = filter->preds;
1552 if (!preds)
1553 return -EINVAL;
1554 pred = root;
1555
1556 do {
1557 switch (move) {
1558 case MOVE_DOWN:
1559 if (pred->index & FILTER_PRED_FOLD) {
1560 err = fold_pred(preds, pred);
1561 if (err)
1562 return err;
1563 /* Folded nodes are like leafs */
1564 } else if (pred->left != FILTER_PRED_INVALID) {
1565 pred = &preds[pred->left];
1566 continue;
1567 }
1568
1569 /* A leaf at the root is just a leaf in the tree */
1570 if (pred == root)
1571 break;
1572 pred = get_pred_parent(pred, preds,
1573 pred->parent, &move);
1574 continue;
1575 case MOVE_UP_FROM_LEFT:
1576 pred = &preds[pred->right];
1577 move = MOVE_DOWN;
1578 continue;
1579 case MOVE_UP_FROM_RIGHT:
1580 if (pred == root)
1581 break;
1582 pred = get_pred_parent(pred, preds,
1583 pred->parent, &move);
1584 continue;
1585 }
1586 done = 1;
1587 } while (!done);
1588
1589 return 0;
1590} 1520}
1591 1521
1592static int replace_preds(struct ftrace_event_call *call, 1522static int replace_preds(struct ftrace_event_call *call,
@@ -1643,27 +1573,17 @@ static int replace_preds(struct ftrace_event_call *call,
1643 goto fail; 1573 goto fail;
1644 } 1574 }
1645 1575
1646 if (elt->op == OP_AND || elt->op == OP_OR) { 1576 pred = create_pred(ps, call, elt->op, operand1, operand2);
1647 pred = create_logical_pred(elt->op); 1577 if (!pred) {
1648 goto add_pred;
1649 }
1650
1651 if (!operand1 || !operand2) {
1652 parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
1653 err = -EINVAL; 1578 err = -EINVAL;
1654 goto fail; 1579 goto fail;
1655 } 1580 }
1656 1581
1657 pred = create_pred(elt->op, operand1, operand2); 1582 if (!dry_run) {
1658add_pred: 1583 err = filter_add_pred(ps, filter, pred, &stack);
1659 if (!pred) { 1584 if (err)
1660 err = -ENOMEM; 1585 goto fail;
1661 goto fail;
1662 } 1586 }
1663 err = filter_add_pred(ps, call, filter, pred, &stack, dry_run);
1664 filter_free_pred(pred);
1665 if (err)
1666 goto fail;
1667 1587
1668 operand1 = operand2 = NULL; 1588 operand1 = operand2 = NULL;
1669 } 1589 }
@@ -1958,17 +1878,14 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1958 int err; 1878 int err;
1959 struct event_filter *filter; 1879 struct event_filter *filter;
1960 struct filter_parse_state *ps; 1880 struct filter_parse_state *ps;
1961 struct ftrace_event_call *call = NULL; 1881 struct ftrace_event_call *call;
1962 1882
1963 mutex_lock(&event_mutex); 1883 mutex_lock(&event_mutex);
1964 1884
1965 list_for_each_entry(call, &ftrace_events, list) { 1885 call = event->tp_event;
1966 if (call->event.type == event_id)
1967 break;
1968 }
1969 1886
1970 err = -EINVAL; 1887 err = -EINVAL;
1971 if (&call->list == &ftrace_events) 1888 if (!call)
1972 goto out_unlock; 1889 goto out_unlock;
1973 1890
1974 err = -EEXIST; 1891 err = -EEXIST;
@@ -2012,3 +1929,215 @@ out_unlock:
2012 1929
2013#endif /* CONFIG_PERF_EVENTS */ 1930#endif /* CONFIG_PERF_EVENTS */
2014 1931
1932#ifdef CONFIG_FTRACE_STARTUP_TEST
1933
1934#include <linux/types.h>
1935#include <linux/tracepoint.h>
1936
1937#define CREATE_TRACE_POINTS
1938#include "trace_events_filter_test.h"
1939
1940static int test_get_filter(char *filter_str, struct ftrace_event_call *call,
1941 struct event_filter **pfilter)
1942{
1943 struct event_filter *filter;
1944 struct filter_parse_state *ps;
1945 int err = -ENOMEM;
1946
1947 filter = __alloc_filter();
1948 if (!filter)
1949 goto out;
1950
1951 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1952 if (!ps)
1953 goto free_filter;
1954
1955 parse_init(ps, filter_ops, filter_str);
1956 err = filter_parse(ps);
1957 if (err)
1958 goto free_ps;
1959
1960 err = replace_preds(call, filter, ps, filter_str, false);
1961 if (!err)
1962 *pfilter = filter;
1963
1964 free_ps:
1965 filter_opstack_clear(ps);
1966 postfix_clear(ps);
1967 kfree(ps);
1968
1969 free_filter:
1970 if (err)
1971 __free_filter(filter);
1972
1973 out:
1974 return err;
1975}
1976
1977#define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \
1978{ \
1979 .filter = FILTER, \
1980 .rec = { .a = va, .b = vb, .c = vc, .d = vd, \
1981 .e = ve, .f = vf, .g = vg, .h = vh }, \
1982 .match = m, \
1983 .not_visited = nvisit, \
1984}
1985#define YES 1
1986#define NO 0
1987
1988static struct test_filter_data_t {
1989 char *filter;
1990 struct ftrace_raw_ftrace_test_filter rec;
1991 int match;
1992 char *not_visited;
1993} test_filter_data[] = {
1994#define FILTER "a == 1 && b == 1 && c == 1 && d == 1 && " \
1995 "e == 1 && f == 1 && g == 1 && h == 1"
1996 DATA_REC(YES, 1, 1, 1, 1, 1, 1, 1, 1, ""),
1997 DATA_REC(NO, 0, 1, 1, 1, 1, 1, 1, 1, "bcdefgh"),
1998 DATA_REC(NO, 1, 1, 1, 1, 1, 1, 1, 0, ""),
1999#undef FILTER
2000#define FILTER "a == 1 || b == 1 || c == 1 || d == 1 || " \
2001 "e == 1 || f == 1 || g == 1 || h == 1"
2002 DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 0, ""),
2003 DATA_REC(YES, 0, 0, 0, 0, 0, 0, 0, 1, ""),
2004 DATA_REC(YES, 1, 0, 0, 0, 0, 0, 0, 0, "bcdefgh"),
2005#undef FILTER
2006#define FILTER "(a == 1 || b == 1) && (c == 1 || d == 1) && " \
2007 "(e == 1 || f == 1) && (g == 1 || h == 1)"
2008 DATA_REC(NO, 0, 0, 1, 1, 1, 1, 1, 1, "dfh"),
2009 DATA_REC(YES, 0, 1, 0, 1, 0, 1, 0, 1, ""),
2010 DATA_REC(YES, 1, 0, 1, 0, 0, 1, 0, 1, "bd"),
2011 DATA_REC(NO, 1, 0, 1, 0, 0, 1, 0, 0, "bd"),
2012#undef FILTER
2013#define FILTER "(a == 1 && b == 1) || (c == 1 && d == 1) || " \
2014 "(e == 1 && f == 1) || (g == 1 && h == 1)"
2015 DATA_REC(YES, 1, 0, 1, 1, 1, 1, 1, 1, "efgh"),
2016 DATA_REC(YES, 0, 0, 0, 0, 0, 0, 1, 1, ""),
2017 DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 1, ""),
2018#undef FILTER
2019#define FILTER "(a == 1 && b == 1) && (c == 1 && d == 1) && " \
2020 "(e == 1 && f == 1) || (g == 1 && h == 1)"
2021 DATA_REC(YES, 1, 1, 1, 1, 1, 1, 0, 0, "gh"),
2022 DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 1, ""),
2023 DATA_REC(YES, 1, 1, 1, 1, 1, 0, 1, 1, ""),
2024#undef FILTER
2025#define FILTER "((a == 1 || b == 1) || (c == 1 || d == 1) || " \
2026 "(e == 1 || f == 1)) && (g == 1 || h == 1)"
2027 DATA_REC(YES, 1, 1, 1, 1, 1, 1, 0, 1, "bcdef"),
2028 DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 0, ""),
2029 DATA_REC(YES, 1, 1, 1, 1, 1, 0, 1, 1, "h"),
2030#undef FILTER
2031#define FILTER "((((((((a == 1) && (b == 1)) || (c == 1)) && (d == 1)) || " \
2032 "(e == 1)) && (f == 1)) || (g == 1)) && (h == 1))"
2033 DATA_REC(YES, 1, 1, 1, 1, 1, 1, 1, 1, "ceg"),
2034 DATA_REC(NO, 0, 1, 0, 1, 0, 1, 0, 1, ""),
2035 DATA_REC(NO, 1, 0, 1, 0, 1, 0, 1, 0, ""),
2036#undef FILTER
2037#define FILTER "((((((((a == 1) || (b == 1)) && (c == 1)) || (d == 1)) && " \
2038 "(e == 1)) || (f == 1)) && (g == 1)) || (h == 1))"
2039 DATA_REC(YES, 1, 1, 1, 1, 1, 1, 1, 1, "bdfh"),
2040 DATA_REC(YES, 0, 1, 0, 1, 0, 1, 0, 1, ""),
2041 DATA_REC(YES, 1, 0, 1, 0, 1, 0, 1, 0, "bdfh"),
2042};
2043
2044#undef DATA_REC
2045#undef FILTER
2046#undef YES
2047#undef NO
2048
2049#define DATA_CNT (sizeof(test_filter_data)/sizeof(struct test_filter_data_t))
2050
2051static int test_pred_visited;
2052
2053static int test_pred_visited_fn(struct filter_pred *pred, void *event)
2054{
2055 struct ftrace_event_field *field = pred->field;
2056
2057 test_pred_visited = 1;
2058 printk(KERN_INFO "\npred visited %s\n", field->name);
2059 return 1;
2060}
2061
2062static int test_walk_pred_cb(enum move_type move, struct filter_pred *pred,
2063 int *err, void *data)
2064{
2065 char *fields = data;
2066
2067 if ((move == MOVE_DOWN) &&
2068 (pred->left == FILTER_PRED_INVALID)) {
2069 struct ftrace_event_field *field = pred->field;
2070
2071 if (!field) {
2072 WARN(1, "all leafs should have field defined");
2073 return WALK_PRED_DEFAULT;
2074 }
2075 if (!strchr(fields, *field->name))
2076 return WALK_PRED_DEFAULT;
2077
2078 WARN_ON(!pred->fn);
2079 pred->fn = test_pred_visited_fn;
2080 }
2081 return WALK_PRED_DEFAULT;
2082}
2083
2084static __init int ftrace_test_event_filter(void)
2085{
2086 int i;
2087
2088 printk(KERN_INFO "Testing ftrace filter: ");
2089
2090 for (i = 0; i < DATA_CNT; i++) {
2091 struct event_filter *filter = NULL;
2092 struct test_filter_data_t *d = &test_filter_data[i];
2093 int err;
2094
2095 err = test_get_filter(d->filter, &event_ftrace_test_filter,
2096 &filter);
2097 if (err) {
2098 printk(KERN_INFO
2099 "Failed to get filter for '%s', err %d\n",
2100 d->filter, err);
2101 break;
2102 }
2103
2104 /*
2105 * The preemption disabling is not really needed for self
2106 * tests, but the rcu dereference will complain without it.
2107 */
2108 preempt_disable();
2109 if (*d->not_visited)
2110 walk_pred_tree(filter->preds, filter->root,
2111 test_walk_pred_cb,
2112 d->not_visited);
2113
2114 test_pred_visited = 0;
2115 err = filter_match_preds(filter, &d->rec);
2116 preempt_enable();
2117
2118 __free_filter(filter);
2119
2120 if (test_pred_visited) {
2121 printk(KERN_INFO
2122 "Failed, unwanted pred visited for filter %s\n",
2123 d->filter);
2124 break;
2125 }
2126
2127 if (err != d->match) {
2128 printk(KERN_INFO
2129 "Failed to match filter '%s', expected %d\n",
2130 d->filter, d->match);
2131 break;
2132 }
2133 }
2134
2135 if (i == DATA_CNT)
2136 printk(KERN_CONT "OK\n");
2137
2138 return 0;
2139}
2140
2141late_initcall(ftrace_test_event_filter);
2142
2143#endif /* CONFIG_FTRACE_STARTUP_TEST */
diff --git a/kernel/trace/trace_events_filter_test.h b/kernel/trace/trace_events_filter_test.h
new file mode 100644
index 000000000000..bfd4dba0d603
--- /dev/null
+++ b/kernel/trace/trace_events_filter_test.h
@@ -0,0 +1,50 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM test
3
4#if !defined(_TRACE_TEST_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_TEST_H
6
7#include <linux/tracepoint.h>
8
9TRACE_EVENT(ftrace_test_filter,
10
11 TP_PROTO(int a, int b, int c, int d, int e, int f, int g, int h),
12
13 TP_ARGS(a, b, c, d, e, f, g, h),
14
15 TP_STRUCT__entry(
16 __field(int, a)
17 __field(int, b)
18 __field(int, c)
19 __field(int, d)
20 __field(int, e)
21 __field(int, f)
22 __field(int, g)
23 __field(int, h)
24 ),
25
26 TP_fast_assign(
27 __entry->a = a;
28 __entry->b = b;
29 __entry->c = c;
30 __entry->d = d;
31 __entry->e = e;
32 __entry->f = f;
33 __entry->g = g;
34 __entry->h = h;
35 ),
36
37 TP_printk("a %d, b %d, c %d, d %d, e %d, f %d, g %d, h %d",
38 __entry->a, __entry->b, __entry->c, __entry->d,
39 __entry->e, __entry->f, __entry->g, __entry->h)
40);
41
42#endif /* _TRACE_TEST_H || TRACE_HEADER_MULTI_READ */
43
44#undef TRACE_INCLUDE_PATH
45#undef TRACE_INCLUDE_FILE
46#define TRACE_INCLUDE_PATH .
47#define TRACE_INCLUDE_FILE trace_events_filter_test
48
49/* This part must be outside protection */
50#include <trace/define_trace.h>
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 667aa8cc0cfc..a1a3359996a7 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -505,13 +505,13 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller);
505#ifdef CONFIG_PREEMPT_TRACER 505#ifdef CONFIG_PREEMPT_TRACER
506void trace_preempt_on(unsigned long a0, unsigned long a1) 506void trace_preempt_on(unsigned long a0, unsigned long a1)
507{ 507{
508 if (preempt_trace()) 508 if (preempt_trace() && !irq_trace())
509 stop_critical_timing(a0, a1); 509 stop_critical_timing(a0, a1);
510} 510}
511 511
512void trace_preempt_off(unsigned long a0, unsigned long a1) 512void trace_preempt_off(unsigned long a0, unsigned long a1)
513{ 513{
514 if (preempt_trace()) 514 if (preempt_trace() && !irq_trace())
515 start_critical_timing(a0, a1); 515 start_critical_timing(a0, a1);
516} 516}
517#endif /* CONFIG_PREEMPT_TRACER */ 517#endif /* CONFIG_PREEMPT_TRACER */
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 1f06468a10d7..6fd4ffd042f9 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -59,18 +59,19 @@ void hold_module_trace_bprintk_format(const char **start, const char **end)
59 continue; 59 continue;
60 } 60 }
61 61
62 fmt = NULL;
62 tb_fmt = kmalloc(sizeof(*tb_fmt), GFP_KERNEL); 63 tb_fmt = kmalloc(sizeof(*tb_fmt), GFP_KERNEL);
63 if (tb_fmt) 64 if (tb_fmt) {
64 fmt = kmalloc(strlen(*iter) + 1, GFP_KERNEL); 65 fmt = kmalloc(strlen(*iter) + 1, GFP_KERNEL);
65 if (tb_fmt && fmt) { 66 if (fmt) {
66 list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list); 67 list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list);
67 strcpy(fmt, *iter); 68 strcpy(fmt, *iter);
68 tb_fmt->fmt = fmt; 69 tb_fmt->fmt = fmt;
69 *iter = tb_fmt->fmt; 70 } else
70 } else { 71 kfree(tb_fmt);
71 kfree(tb_fmt);
72 *iter = NULL;
73 } 72 }
73 *iter = fmt;
74
74 } 75 }
75 mutex_unlock(&btrace_mutex); 76 mutex_unlock(&btrace_mutex);
76} 77}