aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorSteven Rostedt (Red Hat) <rostedt@goodmis.org>2013-07-30 00:04:32 -0400
committerSteven Rostedt <rostedt@goodmis.org>2013-07-30 20:52:51 -0400
commit8c4f3c3fa9681dc549cd35419b259496082fef8b (patch)
treebcec6232d02770494503abed317a778808fa0aab /kernel
parent1c80c43290ee576afe8d39ecc905fa3958a5858c (diff)
ftrace: Check module functions being traced on reload
There's been a nasty bug that would show up and not give much info. The bug displayed the following warning: WARNING: at kernel/trace/ftrace.c:1529 __ftrace_hash_rec_update+0x1e3/0x230() Pid: 20903, comm: bash Tainted: G O 3.6.11+ #38405.trunk Call Trace: [<ffffffff8103e5ff>] warn_slowpath_common+0x7f/0xc0 [<ffffffff8103e65a>] warn_slowpath_null+0x1a/0x20 [<ffffffff810c2ee3>] __ftrace_hash_rec_update+0x1e3/0x230 [<ffffffff810c4f28>] ftrace_hash_move+0x28/0x1d0 [<ffffffff811401cc>] ? kfree+0x2c/0x110 [<ffffffff810c68ee>] ftrace_regex_release+0x8e/0x150 [<ffffffff81149f1e>] __fput+0xae/0x220 [<ffffffff8114a09e>] ____fput+0xe/0x10 [<ffffffff8105fa22>] task_work_run+0x72/0x90 [<ffffffff810028ec>] do_notify_resume+0x6c/0xc0 [<ffffffff8126596e>] ? trace_hardirqs_on_thunk+0x3a/0x3c [<ffffffff815c0f88>] int_signal+0x12/0x17 ---[ end trace 793179526ee09b2c ]--- It was finally narrowed down to unloading a module that was being traced. It was actually more than that. When functions are being traced, there's a table of all functions that have a ref count of the number of active tracers attached to that function. When a function trace callback is registered to a function, the function's record ref count is incremented. When it is unregistered, the function's record ref count is decremented. If an inconsistency is detected (ref count goes below zero) the above warning is shown and the function tracing is permanently disabled until reboot. The ftrace callback ops holds a hash of functions that it filters on (and/or filters off). If the hash is empty, the default means to filter all functions (for the filter_hash) or to disable no functions (for the notrace_hash). When a module is unloaded, it frees the function records that represent the module functions. These records exist on their own pages, that is function records for one module will not exist on the same page as function records for other modules or even the core kernel. Now when a module unloads, the records that represents its functions are freed. When the module is loaded again, the records are recreated with a default ref count of zero (unless there's a callback that traces all functions, then they will also be traced, and the ref count will be incremented). The problem is that if an ftrace callback hash includes functions of the module being unloaded, those hash entries will not be removed. If the module is reloaded in the same location, the hash entries still point to the functions of the module but the module's ref counts do not reflect that. With the help of Steve and Joern, we found a reproducer: Using uinput module and uinput_release function. cd /sys/kernel/debug/tracing modprobe uinput echo uinput_release > set_ftrace_filter echo function > current_tracer rmmod uinput modprobe uinput # check /proc/modules to see if loaded in same addr, otherwise try again echo nop > current_tracer [BOOM] The above loads the uinput module, which creates a table of functions that can be traced within the module. We add uinput_release to the filter_hash to trace just that function. Enable function tracincg, which increments the ref count of the record associated to uinput_release. Remove uinput, which frees the records including the one that represents uinput_release. Load the uinput module again (and make sure it's at the same address). This recreates the function records all with a ref count of zero, including uinput_release. Disable function tracing, which will decrement the ref count for uinput_release which is now zero because of the module removal and reload, and we have a mismatch (below zero ref count). The solution is to check all currently tracing ftrace callbacks to see if any are tracing any of the module's functions when a module is loaded (it already does that with callbacks that trace all functions). If a callback happens to have a module function being traced, it increments that records ref count and starts tracing that function. There may be a strange side effect with this, where tracing module functions on unload and then reloading a new module may have that new module's functions being traced. This may be something that confuses the user, but it's not a big deal. Another approach is to disable all callback hashes on module unload, but this leaves some ftrace callbacks that may not be registered, but can still have hashes tracing the module's function where ftrace doesn't know about it. That situation can cause the same bug. This solution solves that case too. Another benefit of this solution, is it is possible to trace a module's function on unload and load. Link: http://lkml.kernel.org/r/20130705142629.GA325@redhat.com Reported-by: Jörn Engel <joern@logfs.org> Reported-by: Dave Jones <davej@redhat.com> Reported-by: Steve Hodgson <steve@purestorage.com> Tested-by: Steve Hodgson <steve@purestorage.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/trace/ftrace.c71
1 files changed, 62 insertions, 9 deletions
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 92d3334de0c3..a6d098c6df3f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2169,12 +2169,57 @@ static cycle_t ftrace_update_time;
2169static unsigned long ftrace_update_cnt; 2169static unsigned long ftrace_update_cnt;
2170unsigned long ftrace_update_tot_cnt; 2170unsigned long ftrace_update_tot_cnt;
2171 2171
2172static int ops_traces_mod(struct ftrace_ops *ops) 2172static inline int ops_traces_mod(struct ftrace_ops *ops)
2173{ 2173{
2174 struct ftrace_hash *hash; 2174 /*
2175 * Filter_hash being empty will default to trace module.
2176 * But notrace hash requires a test of individual module functions.
2177 */
2178 return ftrace_hash_empty(ops->filter_hash) &&
2179 ftrace_hash_empty(ops->notrace_hash);
2180}
2181
2182/*
2183 * Check if the current ops references the record.
2184 *
2185 * If the ops traces all functions, then it was already accounted for.
2186 * If the ops does not trace the current record function, skip it.
2187 * If the ops ignores the function via notrace filter, skip it.
2188 */
2189static inline bool
2190ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
2191{
2192 /* If ops isn't enabled, ignore it */
2193 if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
2194 return 0;
2195
2196 /* If ops traces all mods, we already accounted for it */
2197 if (ops_traces_mod(ops))
2198 return 0;
2199
2200 /* The function must be in the filter */
2201 if (!ftrace_hash_empty(ops->filter_hash) &&
2202 !ftrace_lookup_ip(ops->filter_hash, rec->ip))
2203 return 0;
2175 2204
2176 hash = ops->filter_hash; 2205 /* If in notrace hash, we ignore it too */
2177 return ftrace_hash_empty(hash); 2206 if (ftrace_lookup_ip(ops->notrace_hash, rec->ip))
2207 return 0;
2208
2209 return 1;
2210}
2211
2212static int referenced_filters(struct dyn_ftrace *rec)
2213{
2214 struct ftrace_ops *ops;
2215 int cnt = 0;
2216
2217 for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) {
2218 if (ops_references_rec(ops, rec))
2219 cnt++;
2220 }
2221
2222 return cnt;
2178} 2223}
2179 2224
2180static int ftrace_update_code(struct module *mod) 2225static int ftrace_update_code(struct module *mod)
@@ -2183,6 +2228,7 @@ static int ftrace_update_code(struct module *mod)
2183 struct dyn_ftrace *p; 2228 struct dyn_ftrace *p;
2184 cycle_t start, stop; 2229 cycle_t start, stop;
2185 unsigned long ref = 0; 2230 unsigned long ref = 0;
2231 bool test = false;
2186 int i; 2232 int i;
2187 2233
2188 /* 2234 /*
@@ -2196,9 +2242,12 @@ static int ftrace_update_code(struct module *mod)
2196 2242
2197 for (ops = ftrace_ops_list; 2243 for (ops = ftrace_ops_list;
2198 ops != &ftrace_list_end; ops = ops->next) { 2244 ops != &ftrace_list_end; ops = ops->next) {
2199 if (ops->flags & FTRACE_OPS_FL_ENABLED && 2245 if (ops->flags & FTRACE_OPS_FL_ENABLED) {
2200 ops_traces_mod(ops)) 2246 if (ops_traces_mod(ops))
2201 ref++; 2247 ref++;
2248 else
2249 test = true;
2250 }
2202 } 2251 }
2203 } 2252 }
2204 2253
@@ -2208,12 +2257,16 @@ static int ftrace_update_code(struct module *mod)
2208 for (pg = ftrace_new_pgs; pg; pg = pg->next) { 2257 for (pg = ftrace_new_pgs; pg; pg = pg->next) {
2209 2258
2210 for (i = 0; i < pg->index; i++) { 2259 for (i = 0; i < pg->index; i++) {
2260 int cnt = ref;
2261
2211 /* If something went wrong, bail without enabling anything */ 2262 /* If something went wrong, bail without enabling anything */
2212 if (unlikely(ftrace_disabled)) 2263 if (unlikely(ftrace_disabled))
2213 return -1; 2264 return -1;
2214 2265
2215 p = &pg->records[i]; 2266 p = &pg->records[i];
2216 p->flags = ref; 2267 if (test)
2268 cnt += referenced_filters(p);
2269 p->flags = cnt;
2217 2270
2218 /* 2271 /*
2219 * Do the initial record conversion from mcount jump 2272 * Do the initial record conversion from mcount jump
@@ -2233,7 +2286,7 @@ static int ftrace_update_code(struct module *mod)
2233 * conversion puts the module to the correct state, thus 2286 * conversion puts the module to the correct state, thus
2234 * passing the ftrace_make_call check. 2287 * passing the ftrace_make_call check.
2235 */ 2288 */
2236 if (ftrace_start_up && ref) { 2289 if (ftrace_start_up && cnt) {
2237 int failed = __ftrace_replace_code(p, 1); 2290 int failed = __ftrace_replace_code(p, 1);
2238 if (failed) 2291 if (failed)
2239 ftrace_bug(failed, p->ip); 2292 ftrace_bug(failed, p->ip);