diff options
author | Peter Zijlstra <peterz@infradead.org> | 2015-05-26 21:39:37 -0400 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2015-05-27 22:02:07 -0400 |
commit | 93c2e105f6bcee231c951ba0e56e84505c4b0483 (patch) | |
tree | c81d9957d95194807d6907b1318047af16c71c5a | |
parent | ade3f510f93a5613b672febe88eff8ea7f1c63b7 (diff) |
module: Optimize __module_address() using a latched RB-tree
Currently __module_address() is using a linear search through all
modules in order to find the module corresponding to the provided
address. With a lot of modules this can take a lot of time.
One of the users of this is kernel_text_address() which is employed
in many stack unwinders; which in turn are used by perf-callchain and
ftrace (possibly from NMI context).
So by optimizing __module_address() we optimize many stack unwinders
which are used by both perf and tracing in performance sensitive code.
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
-rw-r--r-- | include/linux/module.h | 29 | ||||
-rw-r--r-- | kernel/module.c | 115 |
2 files changed, 136 insertions, 8 deletions
diff --git a/include/linux/module.h b/include/linux/module.h index fb56dd85a862..ddf35a3368fb 100644 --- a/include/linux/module.h +++ b/include/linux/module.h | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/moduleparam.h> | 17 | #include <linux/moduleparam.h> |
18 | #include <linux/jump_label.h> | 18 | #include <linux/jump_label.h> |
19 | #include <linux/export.h> | 19 | #include <linux/export.h> |
20 | #include <linux/rbtree_latch.h> | ||
20 | 21 | ||
21 | #include <linux/percpu.h> | 22 | #include <linux/percpu.h> |
22 | #include <asm/module.h> | 23 | #include <asm/module.h> |
@@ -210,6 +211,13 @@ enum module_state { | |||
210 | MODULE_STATE_UNFORMED, /* Still setting it up. */ | 211 | MODULE_STATE_UNFORMED, /* Still setting it up. */ |
211 | }; | 212 | }; |
212 | 213 | ||
214 | struct module; | ||
215 | |||
216 | struct mod_tree_node { | ||
217 | struct module *mod; | ||
218 | struct latch_tree_node node; | ||
219 | }; | ||
220 | |||
213 | struct module { | 221 | struct module { |
214 | enum module_state state; | 222 | enum module_state state; |
215 | 223 | ||
@@ -269,8 +277,15 @@ struct module { | |||
269 | /* Startup function. */ | 277 | /* Startup function. */ |
270 | int (*init)(void); | 278 | int (*init)(void); |
271 | 279 | ||
272 | /* If this is non-NULL, vfree after init() returns */ | 280 | /* |
273 | void *module_init; | 281 | * If this is non-NULL, vfree() after init() returns. |
282 | * | ||
283 | * Cacheline align here, such that: | ||
284 | * module_init, module_core, init_size, core_size, | ||
285 | * init_text_size, core_text_size and ltn_core.node[0] | ||
286 | * are on the same cacheline. | ||
287 | */ | ||
288 | void *module_init ____cacheline_aligned; | ||
274 | 289 | ||
275 | /* Here is the actual code + data, vfree'd on unload. */ | 290 | /* Here is the actual code + data, vfree'd on unload. */ |
276 | void *module_core; | 291 | void *module_core; |
@@ -281,6 +296,14 @@ struct module { | |||
281 | /* The size of the executable code in each section. */ | 296 | /* The size of the executable code in each section. */ |
282 | unsigned int init_text_size, core_text_size; | 297 | unsigned int init_text_size, core_text_size; |
283 | 298 | ||
299 | /* | ||
300 | * We want mtn_core::{mod,node[0]} to be in the same cacheline as the | ||
301 | * above entries such that a regular lookup will only touch one | ||
302 | * cacheline. | ||
303 | */ | ||
304 | struct mod_tree_node mtn_core; | ||
305 | struct mod_tree_node mtn_init; | ||
306 | |||
284 | /* Size of RO sections of the module (text+rodata) */ | 307 | /* Size of RO sections of the module (text+rodata) */ |
285 | unsigned int init_ro_size, core_ro_size; | 308 | unsigned int init_ro_size, core_ro_size; |
286 | 309 | ||
@@ -367,7 +390,7 @@ struct module { | |||
367 | ctor_fn_t *ctors; | 390 | ctor_fn_t *ctors; |
368 | unsigned int num_ctors; | 391 | unsigned int num_ctors; |
369 | #endif | 392 | #endif |
370 | }; | 393 | } ____cacheline_aligned; |
371 | #ifndef MODULE_ARCH_INIT | 394 | #ifndef MODULE_ARCH_INIT |
372 | #define MODULE_ARCH_INIT {} | 395 | #define MODULE_ARCH_INIT {} |
373 | #endif | 396 | #endif |
diff --git a/kernel/module.c b/kernel/module.c index a15899e00ca9..e0db5c31cb53 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -101,6 +101,108 @@ | |||
101 | DEFINE_MUTEX(module_mutex); | 101 | DEFINE_MUTEX(module_mutex); |
102 | EXPORT_SYMBOL_GPL(module_mutex); | 102 | EXPORT_SYMBOL_GPL(module_mutex); |
103 | static LIST_HEAD(modules); | 103 | static LIST_HEAD(modules); |
104 | |||
105 | /* | ||
106 | * Use a latched RB-tree for __module_address(); this allows us to use | ||
107 | * RCU-sched lookups of the address from any context. | ||
108 | * | ||
109 | * Because modules have two address ranges: init and core, we need two | ||
110 | * latch_tree_nodes entries. Therefore we need the back-pointer from | ||
111 | * mod_tree_node. | ||
112 | * | ||
113 | * Because init ranges are short lived we mark them unlikely and have placed | ||
114 | * them outside the critical cacheline in struct module. | ||
115 | */ | ||
116 | |||
117 | static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n) | ||
118 | { | ||
119 | struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node); | ||
120 | struct module *mod = mtn->mod; | ||
121 | |||
122 | if (unlikely(mtn == &mod->mtn_init)) | ||
123 | return (unsigned long)mod->module_init; | ||
124 | |||
125 | return (unsigned long)mod->module_core; | ||
126 | } | ||
127 | |||
128 | static __always_inline unsigned long __mod_tree_size(struct latch_tree_node *n) | ||
129 | { | ||
130 | struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node); | ||
131 | struct module *mod = mtn->mod; | ||
132 | |||
133 | if (unlikely(mtn == &mod->mtn_init)) | ||
134 | return (unsigned long)mod->init_size; | ||
135 | |||
136 | return (unsigned long)mod->core_size; | ||
137 | } | ||
138 | |||
139 | static __always_inline bool | ||
140 | mod_tree_less(struct latch_tree_node *a, struct latch_tree_node *b) | ||
141 | { | ||
142 | return __mod_tree_val(a) < __mod_tree_val(b); | ||
143 | } | ||
144 | |||
145 | static __always_inline int | ||
146 | mod_tree_comp(void *key, struct latch_tree_node *n) | ||
147 | { | ||
148 | unsigned long val = (unsigned long)key; | ||
149 | unsigned long start, end; | ||
150 | |||
151 | start = __mod_tree_val(n); | ||
152 | if (val < start) | ||
153 | return -1; | ||
154 | |||
155 | end = start + __mod_tree_size(n); | ||
156 | if (val >= end) | ||
157 | return 1; | ||
158 | |||
159 | return 0; | ||
160 | } | ||
161 | |||
162 | static const struct latch_tree_ops mod_tree_ops = { | ||
163 | .less = mod_tree_less, | ||
164 | .comp = mod_tree_comp, | ||
165 | }; | ||
166 | |||
167 | static struct latch_tree_root mod_tree __cacheline_aligned; | ||
168 | |||
169 | /* | ||
170 | * These modifications: insert, remove_init and remove; are serialized by the | ||
171 | * module_mutex. | ||
172 | */ | ||
173 | static void mod_tree_insert(struct module *mod) | ||
174 | { | ||
175 | mod->mtn_core.mod = mod; | ||
176 | mod->mtn_init.mod = mod; | ||
177 | |||
178 | latch_tree_insert(&mod->mtn_core.node, &mod_tree, &mod_tree_ops); | ||
179 | if (mod->init_size) | ||
180 | latch_tree_insert(&mod->mtn_init.node, &mod_tree, &mod_tree_ops); | ||
181 | } | ||
182 | |||
183 | static void mod_tree_remove_init(struct module *mod) | ||
184 | { | ||
185 | if (mod->init_size) | ||
186 | latch_tree_erase(&mod->mtn_init.node, &mod_tree, &mod_tree_ops); | ||
187 | } | ||
188 | |||
189 | static void mod_tree_remove(struct module *mod) | ||
190 | { | ||
191 | latch_tree_erase(&mod->mtn_core.node, &mod_tree, &mod_tree_ops); | ||
192 | mod_tree_remove_init(mod); | ||
193 | } | ||
194 | |||
195 | static struct module *mod_tree_find(unsigned long addr) | ||
196 | { | ||
197 | struct latch_tree_node *ltn; | ||
198 | |||
199 | ltn = latch_tree_find((void *)addr, &mod_tree, &mod_tree_ops); | ||
200 | if (!ltn) | ||
201 | return NULL; | ||
202 | |||
203 | return container_of(ltn, struct mod_tree_node, node)->mod; | ||
204 | } | ||
205 | |||
104 | #ifdef CONFIG_KGDB_KDB | 206 | #ifdef CONFIG_KGDB_KDB |
105 | struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ | 207 | struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ |
106 | #endif /* CONFIG_KGDB_KDB */ | 208 | #endif /* CONFIG_KGDB_KDB */ |
@@ -1878,6 +1980,7 @@ static void free_module(struct module *mod) | |||
1878 | mutex_lock(&module_mutex); | 1980 | mutex_lock(&module_mutex); |
1879 | /* Unlink carefully: kallsyms could be walking list. */ | 1981 | /* Unlink carefully: kallsyms could be walking list. */ |
1880 | list_del_rcu(&mod->list); | 1982 | list_del_rcu(&mod->list); |
1983 | mod_tree_remove(mod); | ||
1881 | /* Remove this module from bug list, this uses list_del_rcu */ | 1984 | /* Remove this module from bug list, this uses list_del_rcu */ |
1882 | module_bug_cleanup(mod); | 1985 | module_bug_cleanup(mod); |
1883 | /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */ | 1986 | /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */ |
@@ -3145,6 +3248,7 @@ static noinline int do_init_module(struct module *mod) | |||
3145 | mod->symtab = mod->core_symtab; | 3248 | mod->symtab = mod->core_symtab; |
3146 | mod->strtab = mod->core_strtab; | 3249 | mod->strtab = mod->core_strtab; |
3147 | #endif | 3250 | #endif |
3251 | mod_tree_remove_init(mod); | ||
3148 | unset_module_init_ro_nx(mod); | 3252 | unset_module_init_ro_nx(mod); |
3149 | module_arch_freeing_init(mod); | 3253 | module_arch_freeing_init(mod); |
3150 | mod->module_init = NULL; | 3254 | mod->module_init = NULL; |
@@ -3215,6 +3319,7 @@ again: | |||
3215 | goto out; | 3319 | goto out; |
3216 | } | 3320 | } |
3217 | list_add_rcu(&mod->list, &modules); | 3321 | list_add_rcu(&mod->list, &modules); |
3322 | mod_tree_insert(mod); | ||
3218 | err = 0; | 3323 | err = 0; |
3219 | 3324 | ||
3220 | out: | 3325 | out: |
@@ -3861,13 +3966,13 @@ struct module *__module_address(unsigned long addr) | |||
3861 | 3966 | ||
3862 | module_assert_mutex_or_preempt(); | 3967 | module_assert_mutex_or_preempt(); |
3863 | 3968 | ||
3864 | list_for_each_entry_rcu(mod, &modules, list) { | 3969 | mod = mod_tree_find(addr); |
3970 | if (mod) { | ||
3971 | BUG_ON(!within_module(addr, mod)); | ||
3865 | if (mod->state == MODULE_STATE_UNFORMED) | 3972 | if (mod->state == MODULE_STATE_UNFORMED) |
3866 | continue; | 3973 | mod = NULL; |
3867 | if (within_module(addr, mod)) | ||
3868 | return mod; | ||
3869 | } | 3974 | } |
3870 | return NULL; | 3975 | return mod; |
3871 | } | 3976 | } |
3872 | EXPORT_SYMBOL_GPL(__module_address); | 3977 | EXPORT_SYMBOL_GPL(__module_address); |
3873 | 3978 | ||