diff options
author | Steven Rostedt <srostedt@redhat.com> | 2012-11-02 17:03:03 -0400 |
---|---|---|
committer | Steven Rostedt <rostedt@goodmis.org> | 2013-01-22 23:37:59 -0500 |
commit | 0a016409e42f273415f8225ddf2c58eb2df88034 (patch) | |
tree | d18d106e6041e516d8ffa05e2bb62d76306c6afd /kernel/trace | |
parent | 9640388b63556b4cfecbb5aaf91a5c99d272f429 (diff) |
ftrace: Optimize the function tracer list loop
There is lots of places that perform:
op = rcu_dereference_raw(ftrace_control_list);
while (op != &ftrace_list_end) {
Add a helper macro to do this, and also optimize for a single
entity. That is, gcc will optimize a loop for either no iterations
or more than one iteration. But usually only a single callback
is registered to the function tracer, thus the optimized case
should be a single pass. to do this we now do:
op = rcu_dereference_raw(list);
do {
[...]
} while (likely(op = rcu_dereference_raw((op)->next)) &&
unlikely((op) != &ftrace_list_end));
An op is always registered (ftrace_list_end when no callbacks is
registered), thus when a single callback is registered, the link
list looks like:
top => callback => ftrace_list_end => NULL.
The likely(op = op->next) still must be performed due to the race
of removing the callback, where the first op assignment could
equal ftrace_list_end. In that case, the op->next would be NULL.
But this is unlikely (only happens in a race condition when
removing the callback).
But it is very likely that the next op would be ftrace_list_end,
unless more than one callback has been registered. This tells
gcc what the most common case is and makes the fast path with
the least amount of branches.
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/ftrace.c | 48 |
1 files changed, 26 insertions, 22 deletions
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 789cbec24e81..1330969d8447 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -111,6 +111,26 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); | |||
111 | #define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) | 111 | #define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) |
112 | #endif | 112 | #endif |
113 | 113 | ||
114 | /* | ||
115 | * Traverse the ftrace_global_list, invoking all entries. The reason that we | ||
116 | * can use rcu_dereference_raw() is that elements removed from this list | ||
117 | * are simply leaked, so there is no need to interact with a grace-period | ||
118 | * mechanism. The rcu_dereference_raw() calls are needed to handle | ||
119 | * concurrent insertions into the ftrace_global_list. | ||
120 | * | ||
121 | * Silly Alpha and silly pointer-speculation compiler optimizations! | ||
122 | */ | ||
123 | #define do_for_each_ftrace_op(op, list) \ | ||
124 | op = rcu_dereference_raw(list); \ | ||
125 | do | ||
126 | |||
127 | /* | ||
128 | * Optimized for just a single item in the list (as that is the normal case). | ||
129 | */ | ||
130 | #define while_for_each_ftrace_op(op) \ | ||
131 | while (likely(op = rcu_dereference_raw((op)->next)) && \ | ||
132 | unlikely((op) != &ftrace_list_end)) | ||
133 | |||
114 | /** | 134 | /** |
115 | * ftrace_nr_registered_ops - return number of ops registered | 135 | * ftrace_nr_registered_ops - return number of ops registered |
116 | * | 136 | * |
@@ -132,15 +152,6 @@ int ftrace_nr_registered_ops(void) | |||
132 | return cnt; | 152 | return cnt; |
133 | } | 153 | } |
134 | 154 | ||
135 | /* | ||
136 | * Traverse the ftrace_global_list, invoking all entries. The reason that we | ||
137 | * can use rcu_dereference_raw() is that elements removed from this list | ||
138 | * are simply leaked, so there is no need to interact with a grace-period | ||
139 | * mechanism. The rcu_dereference_raw() calls are needed to handle | ||
140 | * concurrent insertions into the ftrace_global_list. | ||
141 | * | ||
142 | * Silly Alpha and silly pointer-speculation compiler optimizations! | ||
143 | */ | ||
144 | static void | 155 | static void |
145 | ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, | 156 | ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, |
146 | struct ftrace_ops *op, struct pt_regs *regs) | 157 | struct ftrace_ops *op, struct pt_regs *regs) |
@@ -149,11 +160,9 @@ ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, | |||
149 | return; | 160 | return; |
150 | 161 | ||
151 | trace_recursion_set(TRACE_GLOBAL_BIT); | 162 | trace_recursion_set(TRACE_GLOBAL_BIT); |
152 | op = rcu_dereference_raw(ftrace_global_list); /*see above*/ | 163 | do_for_each_ftrace_op(op, ftrace_global_list) { |
153 | while (op != &ftrace_list_end) { | ||
154 | op->func(ip, parent_ip, op, regs); | 164 | op->func(ip, parent_ip, op, regs); |
155 | op = rcu_dereference_raw(op->next); /*see above*/ | 165 | } while_for_each_ftrace_op(op); |
156 | }; | ||
157 | trace_recursion_clear(TRACE_GLOBAL_BIT); | 166 | trace_recursion_clear(TRACE_GLOBAL_BIT); |
158 | } | 167 | } |
159 | 168 | ||
@@ -4104,14 +4113,11 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, | |||
4104 | */ | 4113 | */ |
4105 | preempt_disable_notrace(); | 4114 | preempt_disable_notrace(); |
4106 | trace_recursion_set(TRACE_CONTROL_BIT); | 4115 | trace_recursion_set(TRACE_CONTROL_BIT); |
4107 | op = rcu_dereference_raw(ftrace_control_list); | 4116 | do_for_each_ftrace_op(op, ftrace_control_list) { |
4108 | while (op != &ftrace_list_end) { | ||
4109 | if (!ftrace_function_local_disabled(op) && | 4117 | if (!ftrace_function_local_disabled(op) && |
4110 | ftrace_ops_test(op, ip)) | 4118 | ftrace_ops_test(op, ip)) |
4111 | op->func(ip, parent_ip, op, regs); | 4119 | op->func(ip, parent_ip, op, regs); |
4112 | 4120 | } while_for_each_ftrace_op(op); | |
4113 | op = rcu_dereference_raw(op->next); | ||
4114 | }; | ||
4115 | trace_recursion_clear(TRACE_CONTROL_BIT); | 4121 | trace_recursion_clear(TRACE_CONTROL_BIT); |
4116 | preempt_enable_notrace(); | 4122 | preempt_enable_notrace(); |
4117 | } | 4123 | } |
@@ -4139,12 +4145,10 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, | |||
4139 | * they must be freed after a synchronize_sched(). | 4145 | * they must be freed after a synchronize_sched(). |
4140 | */ | 4146 | */ |
4141 | preempt_disable_notrace(); | 4147 | preempt_disable_notrace(); |
4142 | op = rcu_dereference_raw(ftrace_ops_list); | 4148 | do_for_each_ftrace_op(op, ftrace_ops_list) { |
4143 | while (op != &ftrace_list_end) { | ||
4144 | if (ftrace_ops_test(op, ip)) | 4149 | if (ftrace_ops_test(op, ip)) |
4145 | op->func(ip, parent_ip, op, regs); | 4150 | op->func(ip, parent_ip, op, regs); |
4146 | op = rcu_dereference_raw(op->next); | 4151 | } while_for_each_ftrace_op(op); |
4147 | }; | ||
4148 | preempt_enable_notrace(); | 4152 | preempt_enable_notrace(); |
4149 | trace_recursion_clear(TRACE_INTERNAL_BIT); | 4153 | trace_recursion_clear(TRACE_INTERNAL_BIT); |
4150 | } | 4154 | } |