aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/ftrace.c
diff options
context:
space:
mode:
authorSteven Rostedt (Red Hat) <rostedt@goodmis.org>2013-07-30 00:04:32 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2013-08-29 12:47:34 -0400
commitf05e999f16aed044c0b8b0837bdb084cbeb8cee6 (patch)
tree05ad1c3fc1b21128e9d98f786c48ec33fa4984cf /kernel/trace/ftrace.c
parent396dd500521287215d836b84b2ae61cd9163bba5 (diff)
ftrace: Check module functions being traced on reload
commit 8c4f3c3fa9681dc549cd35419b259496082fef8b upstream. There's been a nasty bug that would show up and not give much info. The bug displayed the following warning: WARNING: at kernel/trace/ftrace.c:1529 __ftrace_hash_rec_update+0x1e3/0x230() Pid: 20903, comm: bash Tainted: G O 3.6.11+ #38405.trunk Call Trace: [<ffffffff8103e5ff>] warn_slowpath_common+0x7f/0xc0 [<ffffffff8103e65a>] warn_slowpath_null+0x1a/0x20 [<ffffffff810c2ee3>] __ftrace_hash_rec_update+0x1e3/0x230 [<ffffffff810c4f28>] ftrace_hash_move+0x28/0x1d0 [<ffffffff811401cc>] ? kfree+0x2c/0x110 [<ffffffff810c68ee>] ftrace_regex_release+0x8e/0x150 [<ffffffff81149f1e>] __fput+0xae/0x220 [<ffffffff8114a09e>] ____fput+0xe/0x10 [<ffffffff8105fa22>] task_work_run+0x72/0x90 [<ffffffff810028ec>] do_notify_resume+0x6c/0xc0 [<ffffffff8126596e>] ? trace_hardirqs_on_thunk+0x3a/0x3c [<ffffffff815c0f88>] int_signal+0x12/0x17 ---[ end trace 793179526ee09b2c ]--- It was finally narrowed down to unloading a module that was being traced. It was actually more than that. When functions are being traced, there's a table of all functions that have a ref count of the number of active tracers attached to that function. When a function trace callback is registered to a function, the function's record ref count is incremented. When it is unregistered, the function's record ref count is decremented. If an inconsistency is detected (ref count goes below zero) the above warning is shown and the function tracing is permanently disabled until reboot. The ftrace callback ops holds a hash of functions that it filters on (and/or filters off). If the hash is empty, the default means to filter all functions (for the filter_hash) or to disable no functions (for the notrace_hash). When a module is unloaded, it frees the function records that represent the module functions. These records exist on their own pages, that is function records for one module will not exist on the same page as function records for other modules or even the core kernel. Now when a module unloads, the records that represents its functions are freed. When the module is loaded again, the records are recreated with a default ref count of zero (unless there's a callback that traces all functions, then they will also be traced, and the ref count will be incremented). The problem is that if an ftrace callback hash includes functions of the module being unloaded, those hash entries will not be removed. If the module is reloaded in the same location, the hash entries still point to the functions of the module but the module's ref counts do not reflect that. With the help of Steve and Joern, we found a reproducer: Using uinput module and uinput_release function. cd /sys/kernel/debug/tracing modprobe uinput echo uinput_release > set_ftrace_filter echo function > current_tracer rmmod uinput modprobe uinput # check /proc/modules to see if loaded in same addr, otherwise try again echo nop > current_tracer [BOOM] The above loads the uinput module, which creates a table of functions that can be traced within the module. We add uinput_release to the filter_hash to trace just that function. Enable function tracincg, which increments the ref count of the record associated to uinput_release. Remove uinput, which frees the records including the one that represents uinput_release. Load the uinput module again (and make sure it's at the same address). This recreates the function records all with a ref count of zero, including uinput_release. Disable function tracing, which will decrement the ref count for uinput_release which is now zero because of the module removal and reload, and we have a mismatch (below zero ref count). The solution is to check all currently tracing ftrace callbacks to see if any are tracing any of the module's functions when a module is loaded (it already does that with callbacks that trace all functions). If a callback happens to have a module function being traced, it increments that records ref count and starts tracing that function. There may be a strange side effect with this, where tracing module functions on unload and then reloading a new module may have that new module's functions being traced. This may be something that confuses the user, but it's not a big deal. Another approach is to disable all callback hashes on module unload, but this leaves some ftrace callbacks that may not be registered, but can still have hashes tracing the module's function where ftrace doesn't know about it. That situation can cause the same bug. This solution solves that case too. Another benefit of this solution, is it is possible to trace a module's function on unload and load. Link: http://lkml.kernel.org/r/20130705142629.GA325@redhat.com Reported-by: Jörn Engel <joern@logfs.org> Reported-by: Dave Jones <davej@redhat.com> Reported-by: Steve Hodgson <steve@purestorage.com> Tested-by: Steve Hodgson <steve@purestorage.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'kernel/trace/ftrace.c')
-rw-r--r--kernel/trace/ftrace.c71
1 files changed, 62 insertions, 9 deletions
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 13f12351b350..f23449d0650e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2144,12 +2144,57 @@ static cycle_t ftrace_update_time;
2144static unsigned long ftrace_update_cnt; 2144static unsigned long ftrace_update_cnt;
2145unsigned long ftrace_update_tot_cnt; 2145unsigned long ftrace_update_tot_cnt;
2146 2146
2147static int ops_traces_mod(struct ftrace_ops *ops) 2147static inline int ops_traces_mod(struct ftrace_ops *ops)
2148{ 2148{
2149 struct ftrace_hash *hash; 2149 /*
2150 * Filter_hash being empty will default to trace module.
2151 * But notrace hash requires a test of individual module functions.
2152 */
2153 return ftrace_hash_empty(ops->filter_hash) &&
2154 ftrace_hash_empty(ops->notrace_hash);
2155}
2156
2157/*
2158 * Check if the current ops references the record.
2159 *
2160 * If the ops traces all functions, then it was already accounted for.
2161 * If the ops does not trace the current record function, skip it.
2162 * If the ops ignores the function via notrace filter, skip it.
2163 */
2164static inline bool
2165ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
2166{
2167 /* If ops isn't enabled, ignore it */
2168 if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
2169 return 0;
2170
2171 /* If ops traces all mods, we already accounted for it */
2172 if (ops_traces_mod(ops))
2173 return 0;
2174
2175 /* The function must be in the filter */
2176 if (!ftrace_hash_empty(ops->filter_hash) &&
2177 !ftrace_lookup_ip(ops->filter_hash, rec->ip))
2178 return 0;
2150 2179
2151 hash = ops->filter_hash; 2180 /* If in notrace hash, we ignore it too */
2152 return ftrace_hash_empty(hash); 2181 if (ftrace_lookup_ip(ops->notrace_hash, rec->ip))
2182 return 0;
2183
2184 return 1;
2185}
2186
2187static int referenced_filters(struct dyn_ftrace *rec)
2188{
2189 struct ftrace_ops *ops;
2190 int cnt = 0;
2191
2192 for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) {
2193 if (ops_references_rec(ops, rec))
2194 cnt++;
2195 }
2196
2197 return cnt;
2153} 2198}
2154 2199
2155static int ftrace_update_code(struct module *mod) 2200static int ftrace_update_code(struct module *mod)
@@ -2158,6 +2203,7 @@ static int ftrace_update_code(struct module *mod)
2158 struct dyn_ftrace *p; 2203 struct dyn_ftrace *p;
2159 cycle_t start, stop; 2204 cycle_t start, stop;
2160 unsigned long ref = 0; 2205 unsigned long ref = 0;
2206 bool test = false;
2161 int i; 2207 int i;
2162 2208
2163 /* 2209 /*
@@ -2171,9 +2217,12 @@ static int ftrace_update_code(struct module *mod)
2171 2217
2172 for (ops = ftrace_ops_list; 2218 for (ops = ftrace_ops_list;
2173 ops != &ftrace_list_end; ops = ops->next) { 2219 ops != &ftrace_list_end; ops = ops->next) {
2174 if (ops->flags & FTRACE_OPS_FL_ENABLED && 2220 if (ops->flags & FTRACE_OPS_FL_ENABLED) {
2175 ops_traces_mod(ops)) 2221 if (ops_traces_mod(ops))
2176 ref++; 2222 ref++;
2223 else
2224 test = true;
2225 }
2177 } 2226 }
2178 } 2227 }
2179 2228
@@ -2183,12 +2232,16 @@ static int ftrace_update_code(struct module *mod)
2183 for (pg = ftrace_new_pgs; pg; pg = pg->next) { 2232 for (pg = ftrace_new_pgs; pg; pg = pg->next) {
2184 2233
2185 for (i = 0; i < pg->index; i++) { 2234 for (i = 0; i < pg->index; i++) {
2235 int cnt = ref;
2236
2186 /* If something went wrong, bail without enabling anything */ 2237 /* If something went wrong, bail without enabling anything */
2187 if (unlikely(ftrace_disabled)) 2238 if (unlikely(ftrace_disabled))
2188 return -1; 2239 return -1;
2189 2240
2190 p = &pg->records[i]; 2241 p = &pg->records[i];
2191 p->flags = ref; 2242 if (test)
2243 cnt += referenced_filters(p);
2244 p->flags = cnt;
2192 2245
2193 /* 2246 /*
2194 * Do the initial record conversion from mcount jump 2247 * Do the initial record conversion from mcount jump
@@ -2208,7 +2261,7 @@ static int ftrace_update_code(struct module *mod)
2208 * conversion puts the module to the correct state, thus 2261 * conversion puts the module to the correct state, thus
2209 * passing the ftrace_make_call check. 2262 * passing the ftrace_make_call check.
2210 */ 2263 */
2211 if (ftrace_start_up && ref) { 2264 if (ftrace_start_up && cnt) {
2212 int failed = __ftrace_replace_code(p, 1); 2265 int failed = __ftrace_replace_code(p, 1);
2213 if (failed) 2266 if (failed)
2214 ftrace_bug(failed, p->ip); 2267 ftrace_bug(failed, p->ip);