diff options
author | Peter Zijlstra <peterz@infradead.org> | 2015-05-26 21:39:37 -0400 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2015-05-27 22:02:07 -0400 |
commit | 93c2e105f6bcee231c951ba0e56e84505c4b0483 (patch) | |
tree | c81d9957d95194807d6907b1318047af16c71c5a /include/linux/module.h | |
parent | ade3f510f93a5613b672febe88eff8ea7f1c63b7 (diff) |
module: Optimize __module_address() using a latched RB-tree
Currently __module_address() is using a linear search through all
modules in order to find the module corresponding to the provided
address. With a lot of modules this can take a lot of time.
One of the users of this is kernel_text_address() which is employed
in many stack unwinders; which in turn are used by perf-callchain and
ftrace (possibly from NMI context).
So by optimizing __module_address() we optimize many stack unwinders
which are used by both perf and tracing in performance sensitive code.
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'include/linux/module.h')
-rw-r--r-- | include/linux/module.h | 29 |
1 files changed, 26 insertions, 3 deletions
diff --git a/include/linux/module.h b/include/linux/module.h index fb56dd85a862..ddf35a3368fb 100644 --- a/include/linux/module.h +++ b/include/linux/module.h | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/moduleparam.h> | 17 | #include <linux/moduleparam.h> |
18 | #include <linux/jump_label.h> | 18 | #include <linux/jump_label.h> |
19 | #include <linux/export.h> | 19 | #include <linux/export.h> |
20 | #include <linux/rbtree_latch.h> | ||
20 | 21 | ||
21 | #include <linux/percpu.h> | 22 | #include <linux/percpu.h> |
22 | #include <asm/module.h> | 23 | #include <asm/module.h> |
@@ -210,6 +211,13 @@ enum module_state { | |||
210 | MODULE_STATE_UNFORMED, /* Still setting it up. */ | 211 | MODULE_STATE_UNFORMED, /* Still setting it up. */ |
211 | }; | 212 | }; |
212 | 213 | ||
214 | struct module; | ||
215 | |||
216 | struct mod_tree_node { | ||
217 | struct module *mod; | ||
218 | struct latch_tree_node node; | ||
219 | }; | ||
220 | |||
213 | struct module { | 221 | struct module { |
214 | enum module_state state; | 222 | enum module_state state; |
215 | 223 | ||
@@ -269,8 +277,15 @@ struct module { | |||
269 | /* Startup function. */ | 277 | /* Startup function. */ |
270 | int (*init)(void); | 278 | int (*init)(void); |
271 | 279 | ||
272 | /* If this is non-NULL, vfree after init() returns */ | 280 | /* |
273 | void *module_init; | 281 | * If this is non-NULL, vfree() after init() returns. |
282 | * | ||
283 | * Cacheline align here, such that: | ||
284 | * module_init, module_core, init_size, core_size, | ||
285 | * init_text_size, core_text_size and ltn_core.node[0] | ||
286 | * are on the same cacheline. | ||
287 | */ | ||
288 | void *module_init ____cacheline_aligned; | ||
274 | 289 | ||
275 | /* Here is the actual code + data, vfree'd on unload. */ | 290 | /* Here is the actual code + data, vfree'd on unload. */ |
276 | void *module_core; | 291 | void *module_core; |
@@ -281,6 +296,14 @@ struct module { | |||
281 | /* The size of the executable code in each section. */ | 296 | /* The size of the executable code in each section. */ |
282 | unsigned int init_text_size, core_text_size; | 297 | unsigned int init_text_size, core_text_size; |
283 | 298 | ||
299 | /* | ||
300 | * We want mtn_core::{mod,node[0]} to be in the same cacheline as the | ||
301 | * above entries such that a regular lookup will only touch one | ||
302 | * cacheline. | ||
303 | */ | ||
304 | struct mod_tree_node mtn_core; | ||
305 | struct mod_tree_node mtn_init; | ||
306 | |||
284 | /* Size of RO sections of the module (text+rodata) */ | 307 | /* Size of RO sections of the module (text+rodata) */ |
285 | unsigned int init_ro_size, core_ro_size; | 308 | unsigned int init_ro_size, core_ro_size; |
286 | 309 | ||
@@ -367,7 +390,7 @@ struct module { | |||
367 | ctor_fn_t *ctors; | 390 | ctor_fn_t *ctors; |
368 | unsigned int num_ctors; | 391 | unsigned int num_ctors; |
369 | #endif | 392 | #endif |
370 | }; | 393 | } ____cacheline_aligned; |
371 | #ifndef MODULE_ARCH_INIT | 394 | #ifndef MODULE_ARCH_INIT |
372 | #define MODULE_ARCH_INIT {} | 395 | #define MODULE_ARCH_INIT {} |
373 | #endif | 396 | #endif |