diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2013-01-11 21:57:34 -0500 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2013-01-11 21:57:46 -0500 |
commit | 1fb9341ac34825aa40354e74d9a2c69df7d2c304 (patch) | |
tree | ebb2b836673d722845cd0a17c836799d800ae810 /kernel | |
parent | 0d21b0e3477395e7ff2acc269f15df6e6a8d356d (diff) |
module: put modules in list much earlier.
Prarit's excellent bug report:
> In recent Fedora releases (F17 & F18) some users have reported seeing
> messages similar to
>
> [ 15.478160] kvm: Could not allocate 304 bytes percpu data
> [ 15.478174] PERCPU: allocation failed, size=304 align=32, alloc from
> reserved chunk failed
>
> during system boot. In some cases, users have also reported seeing this
> message along with a failed load of other modules.
>
> What is happening is systemd is loading an instance of the kvm module for
> each cpu found (see commit e9bda3b). When the module load occurs the kernel
> currently allocates the modules percpu data area prior to checking to see
> if the module is already loaded or is in the process of being loaded. If
> the module is already loaded, or finishes load, the module loading code
> releases the current instance's module's percpu data.
Now we have a new state MODULE_STATE_UNFORMED, we can insert the
module into the list (and thus guarantee its uniqueness) before we
allocate the per-cpu region.
Reported-by: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Tested-by: Prarit Bhargava <prarit@redhat.com>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/module.c | 90 |
1 files changed, 49 insertions, 41 deletions
diff --git a/kernel/module.c b/kernel/module.c index c3a2ee8e3679..ec535aa63c98 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -3017,7 +3017,7 @@ static bool finished_loading(const char *name) | |||
3017 | bool ret; | 3017 | bool ret; |
3018 | 3018 | ||
3019 | mutex_lock(&module_mutex); | 3019 | mutex_lock(&module_mutex); |
3020 | mod = find_module(name); | 3020 | mod = find_module_all(name, true); |
3021 | ret = !mod || mod->state == MODULE_STATE_LIVE | 3021 | ret = !mod || mod->state == MODULE_STATE_LIVE |
3022 | || mod->state == MODULE_STATE_GOING; | 3022 | || mod->state == MODULE_STATE_GOING; |
3023 | mutex_unlock(&module_mutex); | 3023 | mutex_unlock(&module_mutex); |
@@ -3141,6 +3141,32 @@ static int load_module(struct load_info *info, const char __user *uargs, | |||
3141 | goto free_copy; | 3141 | goto free_copy; |
3142 | } | 3142 | } |
3143 | 3143 | ||
3144 | /* | ||
3145 | * We try to place it in the list now to make sure it's unique | ||
3146 | * before we dedicate too many resources. In particular, | ||
3147 | * temporary percpu memory exhaustion. | ||
3148 | */ | ||
3149 | mod->state = MODULE_STATE_UNFORMED; | ||
3150 | again: | ||
3151 | mutex_lock(&module_mutex); | ||
3152 | if ((old = find_module_all(mod->name, true)) != NULL) { | ||
3153 | if (old->state == MODULE_STATE_COMING | ||
3154 | || old->state == MODULE_STATE_UNFORMED) { | ||
3155 | /* Wait in case it fails to load. */ | ||
3156 | mutex_unlock(&module_mutex); | ||
3157 | err = wait_event_interruptible(module_wq, | ||
3158 | finished_loading(mod->name)); | ||
3159 | if (err) | ||
3160 | goto free_module; | ||
3161 | goto again; | ||
3162 | } | ||
3163 | err = -EEXIST; | ||
3164 | mutex_unlock(&module_mutex); | ||
3165 | goto free_module; | ||
3166 | } | ||
3167 | list_add_rcu(&mod->list, &modules); | ||
3168 | mutex_unlock(&module_mutex); | ||
3169 | |||
3144 | #ifdef CONFIG_MODULE_SIG | 3170 | #ifdef CONFIG_MODULE_SIG |
3145 | mod->sig_ok = info->sig_ok; | 3171 | mod->sig_ok = info->sig_ok; |
3146 | if (!mod->sig_ok) | 3172 | if (!mod->sig_ok) |
@@ -3150,7 +3176,7 @@ static int load_module(struct load_info *info, const char __user *uargs, | |||
3150 | /* Now module is in final location, initialize linked lists, etc. */ | 3176 | /* Now module is in final location, initialize linked lists, etc. */ |
3151 | err = module_unload_init(mod); | 3177 | err = module_unload_init(mod); |
3152 | if (err) | 3178 | if (err) |
3153 | goto free_module; | 3179 | goto unlink_mod; |
3154 | 3180 | ||
3155 | /* Now we've got everything in the final locations, we can | 3181 | /* Now we've got everything in the final locations, we can |
3156 | * find optional sections. */ | 3182 | * find optional sections. */ |
@@ -3185,54 +3211,33 @@ static int load_module(struct load_info *info, const char __user *uargs, | |||
3185 | goto free_arch_cleanup; | 3211 | goto free_arch_cleanup; |
3186 | } | 3212 | } |
3187 | 3213 | ||
3188 | /* Mark state as coming so strong_try_module_get() ignores us. */ | ||
3189 | mod->state = MODULE_STATE_COMING; | ||
3190 | |||
3191 | /* Now sew it into the lists so we can get lockdep and oops | ||
3192 | * info during argument parsing. No one should access us, since | ||
3193 | * strong_try_module_get() will fail. | ||
3194 | * lockdep/oops can run asynchronous, so use the RCU list insertion | ||
3195 | * function to insert in a way safe to concurrent readers. | ||
3196 | * The mutex protects against concurrent writers. | ||
3197 | */ | ||
3198 | again: | ||
3199 | mutex_lock(&module_mutex); | ||
3200 | if ((old = find_module(mod->name)) != NULL) { | ||
3201 | if (old->state == MODULE_STATE_COMING) { | ||
3202 | /* Wait in case it fails to load. */ | ||
3203 | mutex_unlock(&module_mutex); | ||
3204 | err = wait_event_interruptible(module_wq, | ||
3205 | finished_loading(mod->name)); | ||
3206 | if (err) | ||
3207 | goto free_arch_cleanup; | ||
3208 | goto again; | ||
3209 | } | ||
3210 | err = -EEXIST; | ||
3211 | goto unlock; | ||
3212 | } | ||
3213 | |||
3214 | /* This has to be done once we're sure module name is unique. */ | ||
3215 | dynamic_debug_setup(info->debug, info->num_debug); | 3214 | dynamic_debug_setup(info->debug, info->num_debug); |
3216 | 3215 | ||
3217 | /* Find duplicate symbols */ | 3216 | mutex_lock(&module_mutex); |
3217 | /* Find duplicate symbols (must be called under lock). */ | ||
3218 | err = verify_export_symbols(mod); | 3218 | err = verify_export_symbols(mod); |
3219 | if (err < 0) | 3219 | if (err < 0) |
3220 | goto ddebug; | 3220 | goto ddebug_cleanup; |
3221 | 3221 | ||
3222 | /* This relies on module_mutex for list integrity. */ | ||
3222 | module_bug_finalize(info->hdr, info->sechdrs, mod); | 3223 | module_bug_finalize(info->hdr, info->sechdrs, mod); |
3223 | list_add_rcu(&mod->list, &modules); | 3224 | |
3225 | /* Mark state as coming so strong_try_module_get() ignores us, | ||
3226 | * but kallsyms etc. can see us. */ | ||
3227 | mod->state = MODULE_STATE_COMING; | ||
3228 | |||
3224 | mutex_unlock(&module_mutex); | 3229 | mutex_unlock(&module_mutex); |
3225 | 3230 | ||
3226 | /* Module is ready to execute: parsing args may do that. */ | 3231 | /* Module is ready to execute: parsing args may do that. */ |
3227 | err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, | 3232 | err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, |
3228 | -32768, 32767, &ddebug_dyndbg_module_param_cb); | 3233 | -32768, 32767, &ddebug_dyndbg_module_param_cb); |
3229 | if (err < 0) | 3234 | if (err < 0) |
3230 | goto unlink; | 3235 | goto bug_cleanup; |
3231 | 3236 | ||
3232 | /* Link in to syfs. */ | 3237 | /* Link in to syfs. */ |
3233 | err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp); | 3238 | err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp); |
3234 | if (err < 0) | 3239 | if (err < 0) |
3235 | goto unlink; | 3240 | goto bug_cleanup; |
3236 | 3241 | ||
3237 | /* Get rid of temporary copy. */ | 3242 | /* Get rid of temporary copy. */ |
3238 | free_copy(info); | 3243 | free_copy(info); |
@@ -3242,16 +3247,13 @@ again: | |||
3242 | 3247 | ||
3243 | return do_init_module(mod); | 3248 | return do_init_module(mod); |
3244 | 3249 | ||
3245 | unlink: | 3250 | bug_cleanup: |
3251 | /* module_bug_cleanup needs module_mutex protection */ | ||
3246 | mutex_lock(&module_mutex); | 3252 | mutex_lock(&module_mutex); |
3247 | /* Unlink carefully: kallsyms could be walking list. */ | ||
3248 | list_del_rcu(&mod->list); | ||
3249 | module_bug_cleanup(mod); | 3253 | module_bug_cleanup(mod); |
3250 | wake_up_all(&module_wq); | ||
3251 | ddebug: | ||
3252 | dynamic_debug_remove(info->debug); | ||
3253 | unlock: | ||
3254 | mutex_unlock(&module_mutex); | 3254 | mutex_unlock(&module_mutex); |
3255 | ddebug_cleanup: | ||
3256 | dynamic_debug_remove(info->debug); | ||
3255 | synchronize_sched(); | 3257 | synchronize_sched(); |
3256 | kfree(mod->args); | 3258 | kfree(mod->args); |
3257 | free_arch_cleanup: | 3259 | free_arch_cleanup: |
@@ -3260,6 +3262,12 @@ again: | |||
3260 | free_modinfo(mod); | 3262 | free_modinfo(mod); |
3261 | free_unload: | 3263 | free_unload: |
3262 | module_unload_free(mod); | 3264 | module_unload_free(mod); |
3265 | unlink_mod: | ||
3266 | mutex_lock(&module_mutex); | ||
3267 | /* Unlink carefully: kallsyms could be walking list. */ | ||
3268 | list_del_rcu(&mod->list); | ||
3269 | wake_up_all(&module_wq); | ||
3270 | mutex_unlock(&module_mutex); | ||
3263 | free_module: | 3271 | free_module: |
3264 | module_deallocate(mod, info); | 3272 | module_deallocate(mod, info); |
3265 | free_copy: | 3273 | free_copy: |