aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorSteven Rostedt (Red Hat) <rostedt@goodmis.org>2016-05-03 17:15:43 -0400
committerSteven Rostedt <rostedt@goodmis.org>2016-05-03 17:59:24 -0400
commit0fc1b09ff1ff404ddf753f5ffa5cd0adc8fdcdc9 (patch)
tree2607e3d3e242fb4d3d426fafe6295e8d7d6ac5a7 /kernel/trace
parentdcb0b5575d24a32f51a3f1003312fb94ed4e214a (diff)
tracing: Use temp buffer when filtering events
Filtering of events requires the data to be written to the ring buffer before it can be decided to filter or not. This is because the parameters of the filter are based on the result that is written to the ring buffer and not on the parameters that are passed into the trace functions. The ftrace ring buffer is optimized for writing into the ring buffer and committing. The discard procedure used when filtering decides the event should be discarded is much more heavy weight. Thus, using a temporary filter when filtering events can speed things up drastically. Without a temp buffer we have: # trace-cmd start -p nop # perf stat -r 10 hackbench 50 0.790706626 seconds time elapsed ( +- 0.71% ) # trace-cmd start -e all # perf stat -r 10 hackbench 50 1.566904059 seconds time elapsed ( +- 0.27% ) # trace-cmd start -e all -f 'common_preempt_count==20' # perf stat -r 10 hackbench 50 1.690598511 seconds time elapsed ( +- 0.19% ) # trace-cmd start -e all -f 'common_preempt_count!=20' # perf stat -r 10 hackbench 50 1.707486364 seconds time elapsed ( +- 0.30% ) The first run above is without any tracing, just to get a based figure. hackbench takes ~0.79 seconds to run on the system. The second run enables tracing all events where nothing is filtered. This increases the time by 100% and hackbench takes 1.57 seconds to run. The third run filters all events where the preempt count will equal "20" (this should never happen) thus all events are discarded. This takes 1.69 seconds to run. This is 10% slower than just committing the events! The last run enables all events and filters where the filter will commit all events, and this takes 1.70 seconds to run. The filtering overhead is approximately 10%. Thus, the discard and commit of an event from the ring buffer may be about the same time. With this patch, the numbers change: # trace-cmd start -p nop # perf stat -r 10 hackbench 50 0.778233033 seconds time elapsed ( +- 0.38% ) # trace-cmd start -e all # perf stat -r 10 hackbench 50 1.582102692 seconds time elapsed ( +- 0.28% ) # trace-cmd start -e all -f 'common_preempt_count==20' # perf stat -r 10 hackbench 50 1.309230710 seconds time elapsed ( +- 0.22% ) # trace-cmd start -e all -f 'common_preempt_count!=20' # perf stat -r 10 hackbench 50 1.786001924 seconds time elapsed ( +- 0.20% ) The first run is again the base with no tracing. The second run is all tracing with no filtering. It is a little slower, but that may be well within the noise. The third run shows that discarding all events only took 1.3 seconds. This is a speed up of 23%! The discard is much faster than even the commit. The one downside is shown in the last run. Events that are not discarded by the filter will take longer to add, this is due to the extra copy of the event. Cc: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/trace.c154
-rw-r--r--kernel/trace/trace.h19
-rw-r--r--kernel/trace/trace_events.c10
-rw-r--r--kernel/trace/trace_events_filter.c10
4 files changed, 185 insertions, 8 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c09e8ffadc73..8a4bd6b68a0b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -312,7 +312,7 @@ int call_filter_check_discard(struct trace_event_call *call, void *rec,
312{ 312{
313 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) && 313 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314 !filter_match_preds(call->filter, rec)) { 314 !filter_match_preds(call->filter, rec)) {
315 ring_buffer_discard_commit(buffer, event); 315 __trace_event_discard_commit(buffer, event);
316 return 1; 316 return 1;
317 } 317 }
318 318
@@ -1660,6 +1660,16 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1660} 1660}
1661EXPORT_SYMBOL_GPL(tracing_generic_entry_update); 1661EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1662 1662
1663static __always_inline void
1664trace_event_setup(struct ring_buffer_event *event,
1665 int type, unsigned long flags, int pc)
1666{
1667 struct trace_entry *ent = ring_buffer_event_data(event);
1668
1669 tracing_generic_entry_update(ent, flags, pc);
1670 ent->type = type;
1671}
1672
1663struct ring_buffer_event * 1673struct ring_buffer_event *
1664trace_buffer_lock_reserve(struct ring_buffer *buffer, 1674trace_buffer_lock_reserve(struct ring_buffer *buffer,
1665 int type, 1675 int type,
@@ -1669,21 +1679,136 @@ trace_buffer_lock_reserve(struct ring_buffer *buffer,
1669 struct ring_buffer_event *event; 1679 struct ring_buffer_event *event;
1670 1680
1671 event = ring_buffer_lock_reserve(buffer, len); 1681 event = ring_buffer_lock_reserve(buffer, len);
1672 if (event != NULL) { 1682 if (event != NULL)
1673 struct trace_entry *ent = ring_buffer_event_data(event); 1683 trace_event_setup(event, type, flags, pc);
1684
1685 return event;
1686}
1687
1688DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1689DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1690static int trace_buffered_event_ref;
1691
1692/**
1693 * trace_buffered_event_enable - enable buffering events
1694 *
1695 * When events are being filtered, it is quicker to use a temporary
1696 * buffer to write the event data into if there's a likely chance
1697 * that it will not be committed. The discard of the ring buffer
1698 * is not as fast as committing, and is much slower than copying
1699 * a commit.
1700 *
1701 * When an event is to be filtered, allocate per cpu buffers to
1702 * write the event data into, and if the event is filtered and discarded
1703 * it is simply dropped, otherwise, the entire data is to be committed
1704 * in one shot.
1705 */
1706void trace_buffered_event_enable(void)
1707{
1708 struct ring_buffer_event *event;
1709 struct page *page;
1710 int cpu;
1674 1711
1675 tracing_generic_entry_update(ent, flags, pc); 1712 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1676 ent->type = type; 1713
1714 if (trace_buffered_event_ref++)
1715 return;
1716
1717 for_each_tracing_cpu(cpu) {
1718 page = alloc_pages_node(cpu_to_node(cpu),
1719 GFP_KERNEL | __GFP_NORETRY, 0);
1720 if (!page)
1721 goto failed;
1722
1723 event = page_address(page);
1724 memset(event, 0, sizeof(*event));
1725
1726 per_cpu(trace_buffered_event, cpu) = event;
1727
1728 preempt_disable();
1729 if (cpu == smp_processor_id() &&
1730 this_cpu_read(trace_buffered_event) !=
1731 per_cpu(trace_buffered_event, cpu))
1732 WARN_ON_ONCE(1);
1733 preempt_enable();
1677 } 1734 }
1678 1735
1679 return event; 1736 return;
1737 failed:
1738 trace_buffered_event_disable();
1739}
1740
1741static void enable_trace_buffered_event(void *data)
1742{
1743 /* Probably not needed, but do it anyway */
1744 smp_rmb();
1745 this_cpu_dec(trace_buffered_event_cnt);
1746}
1747
1748static void disable_trace_buffered_event(void *data)
1749{
1750 this_cpu_inc(trace_buffered_event_cnt);
1751}
1752
1753/**
1754 * trace_buffered_event_disable - disable buffering events
1755 *
1756 * When a filter is removed, it is faster to not use the buffered
1757 * events, and to commit directly into the ring buffer. Free up
1758 * the temp buffers when there are no more users. This requires
1759 * special synchronization with current events.
1760 */
1761void trace_buffered_event_disable(void)
1762{
1763 int cpu;
1764
1765 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1766
1767 if (WARN_ON_ONCE(!trace_buffered_event_ref))
1768 return;
1769
1770 if (--trace_buffered_event_ref)
1771 return;
1772
1773 preempt_disable();
1774 /* For each CPU, set the buffer as used. */
1775 smp_call_function_many(tracing_buffer_mask,
1776 disable_trace_buffered_event, NULL, 1);
1777 preempt_enable();
1778
1779 /* Wait for all current users to finish */
1780 synchronize_sched();
1781
1782 for_each_tracing_cpu(cpu) {
1783 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
1784 per_cpu(trace_buffered_event, cpu) = NULL;
1785 }
1786 /*
1787 * Make sure trace_buffered_event is NULL before clearing
1788 * trace_buffered_event_cnt.
1789 */
1790 smp_wmb();
1791
1792 preempt_disable();
1793 /* Do the work on each cpu */
1794 smp_call_function_many(tracing_buffer_mask,
1795 enable_trace_buffered_event, NULL, 1);
1796 preempt_enable();
1680} 1797}
1681 1798
1682void 1799void
1683__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) 1800__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1684{ 1801{
1685 __this_cpu_write(trace_cmdline_save, true); 1802 __this_cpu_write(trace_cmdline_save, true);
1686 ring_buffer_unlock_commit(buffer, event); 1803
1804 /* If this is the temp buffer, we need to commit fully */
1805 if (this_cpu_read(trace_buffered_event) == event) {
1806 /* Length is in event->array[0] */
1807 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1808 /* Release the temp buffer */
1809 this_cpu_dec(trace_buffered_event_cnt);
1810 } else
1811 ring_buffer_unlock_commit(buffer, event);
1687} 1812}
1688 1813
1689static struct ring_buffer *temp_buffer; 1814static struct ring_buffer *temp_buffer;
@@ -1695,8 +1820,23 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1695 unsigned long flags, int pc) 1820 unsigned long flags, int pc)
1696{ 1821{
1697 struct ring_buffer_event *entry; 1822 struct ring_buffer_event *entry;
1823 int val;
1698 1824
1699 *current_rb = trace_file->tr->trace_buffer.buffer; 1825 *current_rb = trace_file->tr->trace_buffer.buffer;
1826
1827 if ((trace_file->flags &
1828 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
1829 (entry = this_cpu_read(trace_buffered_event))) {
1830 /* Try to use the per cpu buffer first */
1831 val = this_cpu_inc_return(trace_buffered_event_cnt);
1832 if (val == 1) {
1833 trace_event_setup(entry, type, flags, pc);
1834 entry->array[0] = len;
1835 return entry;
1836 }
1837 this_cpu_dec(trace_buffered_event_cnt);
1838 }
1839
1700 entry = trace_buffer_lock_reserve(*current_rb, 1840 entry = trace_buffer_lock_reserve(*current_rb,
1701 type, len, flags, pc); 1841 type, len, flags, pc);
1702 /* 1842 /*
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 10156a09103f..5167c366d6b7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1083,6 +1083,23 @@ static inline void trace_buffer_unlock_commit(struct trace_array *tr,
1083 trace_buffer_unlock_commit_regs(tr, buffer, event, flags, pc, NULL); 1083 trace_buffer_unlock_commit_regs(tr, buffer, event, flags, pc, NULL);
1084} 1084}
1085 1085
1086DECLARE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1087DECLARE_PER_CPU(int, trace_buffered_event_cnt);
1088void trace_buffered_event_disable(void);
1089void trace_buffered_event_enable(void);
1090
1091static inline void
1092__trace_event_discard_commit(struct ring_buffer *buffer,
1093 struct ring_buffer_event *event)
1094{
1095 if (this_cpu_read(trace_buffered_event) == event) {
1096 /* Simply release the temp buffer */
1097 this_cpu_dec(trace_buffered_event_cnt);
1098 return;
1099 }
1100 ring_buffer_discard_commit(buffer, event);
1101}
1102
1086/* 1103/*
1087 * Helper function for event_trigger_unlock_commit{_regs}(). 1104 * Helper function for event_trigger_unlock_commit{_regs}().
1088 * If there are event triggers attached to this event that requires 1105 * If there are event triggers attached to this event that requires
@@ -1111,7 +1128,7 @@ __event_trigger_test_discard(struct trace_event_file *file,
1111 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || 1128 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
1112 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && 1129 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
1113 !filter_match_preds(file->filter, entry))) { 1130 !filter_match_preds(file->filter, entry))) {
1114 ring_buffer_discard_commit(buffer, event); 1131 __trace_event_discard_commit(buffer, event);
1115 return true; 1132 return true;
1116 } 1133 }
1117 1134
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index da1eeb6190e3..4d006707b947 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -363,6 +363,7 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
363{ 363{
364 struct trace_event_call *call = file->event_call; 364 struct trace_event_call *call = file->event_call;
365 struct trace_array *tr = file->tr; 365 struct trace_array *tr = file->tr;
366 unsigned long file_flags = file->flags;
366 int ret = 0; 367 int ret = 0;
367 int disable; 368 int disable;
368 369
@@ -445,6 +446,15 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
445 break; 446 break;
446 } 447 }
447 448
449 /* Enable or disable use of trace_buffered_event */
450 if ((file_flags & EVENT_FILE_FL_SOFT_DISABLED) !=
451 (file->flags & EVENT_FILE_FL_SOFT_DISABLED)) {
452 if (file->flags & EVENT_FILE_FL_SOFT_DISABLED)
453 trace_buffered_event_enable();
454 else
455 trace_buffered_event_disable();
456 }
457
448 return ret; 458 return ret;
449} 459}
450 460
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index d1d27bf37a19..9daa9b3bc6d9 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -823,7 +823,12 @@ static void __free_preds(struct event_filter *filter)
823 823
824static void filter_disable(struct trace_event_file *file) 824static void filter_disable(struct trace_event_file *file)
825{ 825{
826 unsigned long old_flags = file->flags;
827
826 file->flags &= ~EVENT_FILE_FL_FILTERED; 828 file->flags &= ~EVENT_FILE_FL_FILTERED;
829
830 if (old_flags != file->flags)
831 trace_buffered_event_disable();
827} 832}
828 833
829static void __free_filter(struct event_filter *filter) 834static void __free_filter(struct event_filter *filter)
@@ -1698,7 +1703,12 @@ fail:
1698 1703
1699static inline void event_set_filtered_flag(struct trace_event_file *file) 1704static inline void event_set_filtered_flag(struct trace_event_file *file)
1700{ 1705{
1706 unsigned long old_flags = file->flags;
1707
1701 file->flags |= EVENT_FILE_FL_FILTERED; 1708 file->flags |= EVENT_FILE_FL_FILTERED;
1709
1710 if (old_flags != file->flags)
1711 trace_buffered_event_enable();
1702} 1712}
1703 1713
1704static inline void event_set_filter(struct trace_event_file *file, 1714static inline void event_set_filter(struct trace_event_file *file,