aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorSteven Rostedt <srostedt@redhat.com>2012-11-02 17:03:03 -0400
committerSteven Rostedt <rostedt@goodmis.org>2013-01-22 23:37:59 -0500
commit0a016409e42f273415f8225ddf2c58eb2df88034 (patch)
treed18d106e6041e516d8ffa05e2bb62d76306c6afd /kernel/trace
parent9640388b63556b4cfecbb5aaf91a5c99d272f429 (diff)
ftrace: Optimize the function tracer list loop
There is lots of places that perform: op = rcu_dereference_raw(ftrace_control_list); while (op != &ftrace_list_end) { Add a helper macro to do this, and also optimize for a single entity. That is, gcc will optimize a loop for either no iterations or more than one iteration. But usually only a single callback is registered to the function tracer, thus the optimized case should be a single pass. to do this we now do: op = rcu_dereference_raw(list); do { [...] } while (likely(op = rcu_dereference_raw((op)->next)) && unlikely((op) != &ftrace_list_end)); An op is always registered (ftrace_list_end when no callbacks is registered), thus when a single callback is registered, the link list looks like: top => callback => ftrace_list_end => NULL. The likely(op = op->next) still must be performed due to the race of removing the callback, where the first op assignment could equal ftrace_list_end. In that case, the op->next would be NULL. But this is unlikely (only happens in a race condition when removing the callback). But it is very likely that the next op would be ftrace_list_end, unless more than one callback has been registered. This tells gcc what the most common case is and makes the fast path with the least amount of branches. Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/ftrace.c48
1 files changed, 26 insertions, 22 deletions
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 789cbec24e81..1330969d8447 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -111,6 +111,26 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
111#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) 111#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
112#endif 112#endif
113 113
114/*
115 * Traverse the ftrace_global_list, invoking all entries. The reason that we
116 * can use rcu_dereference_raw() is that elements removed from this list
117 * are simply leaked, so there is no need to interact with a grace-period
118 * mechanism. The rcu_dereference_raw() calls are needed to handle
119 * concurrent insertions into the ftrace_global_list.
120 *
121 * Silly Alpha and silly pointer-speculation compiler optimizations!
122 */
123#define do_for_each_ftrace_op(op, list) \
124 op = rcu_dereference_raw(list); \
125 do
126
127/*
128 * Optimized for just a single item in the list (as that is the normal case).
129 */
130#define while_for_each_ftrace_op(op) \
131 while (likely(op = rcu_dereference_raw((op)->next)) && \
132 unlikely((op) != &ftrace_list_end))
133
114/** 134/**
115 * ftrace_nr_registered_ops - return number of ops registered 135 * ftrace_nr_registered_ops - return number of ops registered
116 * 136 *
@@ -132,15 +152,6 @@ int ftrace_nr_registered_ops(void)
132 return cnt; 152 return cnt;
133} 153}
134 154
135/*
136 * Traverse the ftrace_global_list, invoking all entries. The reason that we
137 * can use rcu_dereference_raw() is that elements removed from this list
138 * are simply leaked, so there is no need to interact with a grace-period
139 * mechanism. The rcu_dereference_raw() calls are needed to handle
140 * concurrent insertions into the ftrace_global_list.
141 *
142 * Silly Alpha and silly pointer-speculation compiler optimizations!
143 */
144static void 155static void
145ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, 156ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
146 struct ftrace_ops *op, struct pt_regs *regs) 157 struct ftrace_ops *op, struct pt_regs *regs)
@@ -149,11 +160,9 @@ ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
149 return; 160 return;
150 161
151 trace_recursion_set(TRACE_GLOBAL_BIT); 162 trace_recursion_set(TRACE_GLOBAL_BIT);
152 op = rcu_dereference_raw(ftrace_global_list); /*see above*/ 163 do_for_each_ftrace_op(op, ftrace_global_list) {
153 while (op != &ftrace_list_end) {
154 op->func(ip, parent_ip, op, regs); 164 op->func(ip, parent_ip, op, regs);
155 op = rcu_dereference_raw(op->next); /*see above*/ 165 } while_for_each_ftrace_op(op);
156 };
157 trace_recursion_clear(TRACE_GLOBAL_BIT); 166 trace_recursion_clear(TRACE_GLOBAL_BIT);
158} 167}
159 168
@@ -4104,14 +4113,11 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
4104 */ 4113 */
4105 preempt_disable_notrace(); 4114 preempt_disable_notrace();
4106 trace_recursion_set(TRACE_CONTROL_BIT); 4115 trace_recursion_set(TRACE_CONTROL_BIT);
4107 op = rcu_dereference_raw(ftrace_control_list); 4116 do_for_each_ftrace_op(op, ftrace_control_list) {
4108 while (op != &ftrace_list_end) {
4109 if (!ftrace_function_local_disabled(op) && 4117 if (!ftrace_function_local_disabled(op) &&
4110 ftrace_ops_test(op, ip)) 4118 ftrace_ops_test(op, ip))
4111 op->func(ip, parent_ip, op, regs); 4119 op->func(ip, parent_ip, op, regs);
4112 4120 } while_for_each_ftrace_op(op);
4113 op = rcu_dereference_raw(op->next);
4114 };
4115 trace_recursion_clear(TRACE_CONTROL_BIT); 4121 trace_recursion_clear(TRACE_CONTROL_BIT);
4116 preempt_enable_notrace(); 4122 preempt_enable_notrace();
4117} 4123}
@@ -4139,12 +4145,10 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
4139 * they must be freed after a synchronize_sched(). 4145 * they must be freed after a synchronize_sched().
4140 */ 4146 */
4141 preempt_disable_notrace(); 4147 preempt_disable_notrace();
4142 op = rcu_dereference_raw(ftrace_ops_list); 4148 do_for_each_ftrace_op(op, ftrace_ops_list) {
4143 while (op != &ftrace_list_end) {
4144 if (ftrace_ops_test(op, ip)) 4149 if (ftrace_ops_test(op, ip))
4145 op->func(ip, parent_ip, op, regs); 4150 op->func(ip, parent_ip, op, regs);
4146 op = rcu_dereference_raw(op->next); 4151 } while_for_each_ftrace_op(op);
4147 };
4148 preempt_enable_notrace(); 4152 preempt_enable_notrace();
4149 trace_recursion_clear(TRACE_INTERNAL_BIT); 4153 trace_recursion_clear(TRACE_INTERNAL_BIT);
4150} 4154}