From 3d9a854c2dac3e888393b23ba7adafcce4d6d4b9 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 20 Feb 2010 01:03:43 +0100 Subject: Rename .data[.percpu][.XXX] to .data[..percpu][..XXX]. Signed-off-by: Denys Vlasenko Signed-off-by: Michal Marek --- kernel/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index f82386bd9ee9..5daf0abd63c1 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -397,7 +397,7 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, const char *secstrings) { - return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); + return find_sec(hdr, sechdrs, secstrings, ".data..percpu"); } static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) -- cgit v1.2.2 From 259354deaaf03d49a02dbb9975d6ec2a54675672 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 10 Mar 2010 18:56:10 +0900 Subject: module: encapsulate percpu handling better and record percpu_size Better encapsulate module static percpu area handling so that code outsidef of CONFIG_SMP ifdef doesn't deal with mod->percpu directly and add mod->percpu_size and record percpu_size in it. Both percpu fields are compiled out on UP. While at it, mark mod->percpu w/ __percpu. This is to prepare for is_module_percpu_address(). Signed-off-by: Tejun Heo Acked-by: Rusty Russell --- kernel/module.c | 66 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 35 insertions(+), 31 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index c968d3606dca..e7a6e53fc73e 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -370,27 +370,33 @@ EXPORT_SYMBOL_GPL(find_module); #ifdef CONFIG_SMP -static void *percpu_modalloc(unsigned long size, unsigned long align, - const char *name) +static inline void __percpu *mod_percpu(struct module *mod) { - void *ptr; + return mod->percpu; +} +static int percpu_modalloc(struct module *mod, + unsigned long size, unsigned long align) +{ if (align > PAGE_SIZE) { printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", - name, align, PAGE_SIZE); + mod->name, align, PAGE_SIZE); align = PAGE_SIZE; } - ptr = __alloc_reserved_percpu(size, align); - if (!ptr) + mod->percpu = __alloc_reserved_percpu(size, align); + if (!mod->percpu) { printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n", size); - return ptr; + return -ENOMEM; + } + mod->percpu_size = size; + return 0; } -static void percpu_modfree(void *freeme) +static void percpu_modfree(struct module *mod) { - free_percpu(freeme); + free_percpu(mod->percpu); } static unsigned int find_pcpusec(Elf_Ehdr *hdr, @@ -400,24 +406,28 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr, return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); } -static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) +static void percpu_modcopy(struct module *mod, + const void *from, unsigned long size) { int cpu; for_each_possible_cpu(cpu) - memcpy(pcpudest + per_cpu_offset(cpu), from, size); + memcpy(per_cpu_ptr(mod->percpu, cpu), from, size); } #else /* ... !CONFIG_SMP */ -static inline void *percpu_modalloc(unsigned long size, unsigned long align, - const char *name) +static inline void __percpu *mod_percpu(struct module *mod) { return NULL; } -static inline void percpu_modfree(void *pcpuptr) +static inline int percpu_modalloc(struct module *mod, + unsigned long size, unsigned long align) +{ + return -ENOMEM; +} +static inline void percpu_modfree(struct module *mod) { - BUG(); } static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, @@ -425,8 +435,8 @@ static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, { return 0; } -static inline void percpu_modcopy(void *pcpudst, const void *src, - unsigned long size) +static inline void percpu_modcopy(struct module *mod, + const void *from, unsigned long size) { /* pcpusec should be 0, and size of that section should be 0. */ BUG_ON(size != 0); @@ -1400,8 +1410,7 @@ static void free_module(struct module *mod) /* This may be NULL, but that's OK */ module_free(mod, mod->module_init); kfree(mod->args); - if (mod->percpu) - percpu_modfree(mod->percpu); + percpu_modfree(mod); #if defined(CONFIG_MODULE_UNLOAD) if (mod->refptr) free_percpu(mod->refptr); @@ -1520,7 +1529,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs, default: /* Divert to percpu allocation if a percpu var. */ if (sym[i].st_shndx == pcpuindex) - secbase = (unsigned long)mod->percpu; + secbase = (unsigned long)mod_percpu(mod); else secbase = sechdrs[sym[i].st_shndx].sh_addr; sym[i].st_value += secbase; @@ -1954,7 +1963,7 @@ static noinline struct module *load_module(void __user *umod, unsigned int modindex, versindex, infoindex, pcpuindex; struct module *mod; long err = 0; - void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ + void *ptr = NULL; /* Stops spurious gcc warning */ unsigned long symoffs, stroffs, *strmap; mm_segment_t old_fs; @@ -2094,15 +2103,11 @@ static noinline struct module *load_module(void __user *umod, if (pcpuindex) { /* We have a special allocation for this section. */ - percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, - sechdrs[pcpuindex].sh_addralign, - mod->name); - if (!percpu) { - err = -ENOMEM; + err = percpu_modalloc(mod, sechdrs[pcpuindex].sh_size, + sechdrs[pcpuindex].sh_addralign); + if (err) goto free_mod; - } sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; - mod->percpu = percpu; } /* Determine total sizes, and put offsets in sh_entsize. For now @@ -2317,7 +2322,7 @@ static noinline struct module *load_module(void __user *umod, sort_extable(mod->extable, mod->extable + mod->num_exentries); /* Finally, copy percpu area over. */ - percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, + percpu_modcopy(mod, (void *)sechdrs[pcpuindex].sh_addr, sechdrs[pcpuindex].sh_size); add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, @@ -2409,8 +2414,7 @@ static noinline struct module *load_module(void __user *umod, module_free(mod, mod->module_core); /* mod will be freed with core. Don't access it beyond this line! */ free_percpu: - if (percpu) - percpu_modfree(percpu); + percpu_modfree(mod); free_mod: kfree(args); kfree(strmap); -- cgit v1.2.2 From 10fad5e46f6c7bdfb01b1a012380a38e3c6ab346 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 10 Mar 2010 18:57:54 +0900 Subject: percpu, module: implement and use is_kernel/module_percpu_address() lockdep has custom code to check whether a pointer belongs to static percpu area which is somewhat broken. Implement proper is_kernel/module_percpu_address() and replace the custom code. On UP, percpu variables are regular static variables and can't be distinguished from them. Always return %false on UP. Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra Cc: Rusty Russell Cc: Ingo Molnar --- kernel/module.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index e7a6e53fc73e..9f8d23d8b3a8 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -415,6 +415,40 @@ static void percpu_modcopy(struct module *mod, memcpy(per_cpu_ptr(mod->percpu, cpu), from, size); } +/** + * is_module_percpu_address - test whether address is from module static percpu + * @addr: address to test + * + * Test whether @addr belongs to module static percpu area. + * + * RETURNS: + * %true if @addr is from module static percpu area + */ +bool is_module_percpu_address(unsigned long addr) +{ + struct module *mod; + unsigned int cpu; + + preempt_disable(); + + list_for_each_entry_rcu(mod, &modules, list) { + if (!mod->percpu_size) + continue; + for_each_possible_cpu(cpu) { + void *start = per_cpu_ptr(mod->percpu, cpu); + + if ((void *)addr >= start && + (void *)addr < start + mod->percpu_size) { + preempt_enable(); + return true; + } + } + } + + preempt_enable(); + return false; +} + #else /* ... !CONFIG_SMP */ static inline void __percpu *mod_percpu(struct module *mod) @@ -441,6 +475,10 @@ static inline void percpu_modcopy(struct module *mod, /* pcpusec should be 0, and size of that section should be 0. */ BUG_ON(size != 0); } +bool is_module_percpu_address(unsigned long addr) +{ + return false; +} #endif /* CONFIG_SMP */ -- cgit v1.2.2 From ae832d1e03ac9bf09fb8a07fb37908ab40c7cd0e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 24 Mar 2010 10:57:43 +0800 Subject: tracing: Remove side effect from module tracepoints that caused a GPF Remove the @refcnt argument, because it has side-effects, and arguments with side-effects are not skipped by the jump over disabled instrumentation and are executed even when the tracepoint is disabled. This was also causing a GPF as found by Randy Dunlap: Subject: 2.6.33 GP fault only when built with tracing LKML-Reference: <4BA2B69D.3000309@oracle.com> Note, the current 2.6.34-rc has a fix for the actual cause of the GPF, but this fixes one of its triggers. Tested-by: Randy Dunlap Acked-by: Mathieu Desnoyers Signed-off-by: Li Zefan LKML-Reference: <4BA97FA7.6040406@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/module.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index c968d3606dca..21591ad921f3 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -800,8 +800,7 @@ void module_put(struct module *module) preempt_disable(); __this_cpu_dec(module->refptr->count); - trace_module_put(module, _RET_IP_, - __this_cpu_read(module->refptr->count)); + trace_module_put(module, _RET_IP_); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); -- cgit v1.2.2 From eb0c53771fb2f5f66b0edb3ebce33be4bbf1c285 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 29 Mar 2010 14:25:18 -0400 Subject: tracing: Fix compile error in module tracepoints when MODULE_UNLOAD not set If modules are configured in the build but unloading of modules is not, then the refcnt is not defined. Place the get/put module tracepoints under CONFIG_MODULE_UNLOAD since it references this field in the module structure. As a side-effect, this patch also reduces the code when MODULE_UNLOAD is not set, because these unused tracepoints are not created. Signed-off-by: Steven Rostedt --- kernel/module.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 21591ad921f3..d9e237926b69 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -59,8 +59,6 @@ #define CREATE_TRACE_POINTS #include -EXPORT_TRACEPOINT_SYMBOL(module_get); - #if 0 #define DEBUGP printk #else @@ -467,6 +465,9 @@ MODINFO_ATTR(srcversion); static char last_unloaded_module[MODULE_NAME_LEN+1]; #ifdef CONFIG_MODULE_UNLOAD + +EXPORT_TRACEPOINT_SYMBOL(module_get); + /* Init the unload section of the module. */ static void module_unload_init(struct module *mod) { -- cgit v1.2.2 From 5fbfb18d7a5b846946d52c4a10e3aaa213ec31b6 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 1 Apr 2010 19:09:40 +1100 Subject: Fix up possibly racy module refcounting Module refcounting is implemented with a per-cpu counter for speed. However there is a race when tallying the counter where a reference may be taken by one CPU and released by another. Reference count summation may then see the decrement without having seen the previous increment, leading to lower than expected count. A module which never has its actual reference drop below 1 may return a reference count of 0 due to this race. Module removal generally runs under stop_machine, which prevents this race causing bugs due to removal of in-use modules. However there are other real bugs in module.c code and driver code (module_refcount is exported) where the callers do not run under stop_machine. Fix this by maintaining running per-cpu counters for the number of module refcount increments and the number of refcount decrements. The increments are tallied after the decrements, so any decrement seen will always have its corresponding increment counted. The final refcount is the difference of the total increments and decrements, preventing a low-refcount from being returned. Signed-off-by: Nick Piggin Acked-by: Rusty Russell Signed-off-by: Linus Torvalds --- kernel/module.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 9f8d23d8b3a8..1016b75b026a 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -521,11 +521,13 @@ static void module_unload_init(struct module *mod) int cpu; INIT_LIST_HEAD(&mod->modules_which_use_me); - for_each_possible_cpu(cpu) - per_cpu_ptr(mod->refptr, cpu)->count = 0; + for_each_possible_cpu(cpu) { + per_cpu_ptr(mod->refptr, cpu)->incs = 0; + per_cpu_ptr(mod->refptr, cpu)->decs = 0; + } /* Hold reference count during initialization. */ - __this_cpu_write(mod->refptr->count, 1); + __this_cpu_write(mod->refptr->incs, 1); /* Backwards compatibility macros put refcount during init. */ mod->waiter = current; } @@ -664,12 +666,28 @@ static int try_stop_module(struct module *mod, int flags, int *forced) unsigned int module_refcount(struct module *mod) { - unsigned int total = 0; + unsigned int incs = 0, decs = 0; int cpu; for_each_possible_cpu(cpu) - total += per_cpu_ptr(mod->refptr, cpu)->count; - return total; + decs += per_cpu_ptr(mod->refptr, cpu)->decs; + /* + * ensure the incs are added up after the decs. + * module_put ensures incs are visible before decs with smp_wmb. + * + * This 2-count scheme avoids the situation where the refcount + * for CPU0 is read, then CPU0 increments the module refcount, + * then CPU1 drops that refcount, then the refcount for CPU1 is + * read. We would record a decrement but not its corresponding + * increment so we would see a low count (disaster). + * + * Rare situation? But module_refcount can be preempted, and we + * might be tallying up 4096+ CPUs. So it is not impossible. + */ + smp_rmb(); + for_each_possible_cpu(cpu) + incs += per_cpu_ptr(mod->refptr, cpu)->incs; + return incs - decs; } EXPORT_SYMBOL(module_refcount); @@ -846,10 +864,11 @@ void module_put(struct module *module) { if (module) { preempt_disable(); - __this_cpu_dec(module->refptr->count); + smp_wmb(); /* see comment in module_refcount */ + __this_cpu_inc(module->refptr->decs); trace_module_put(module, _RET_IP_, - __this_cpu_read(module->refptr->count)); + __this_cpu_read(module->refptr->decs)); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); -- cgit v1.2.2 From 3fc1f1e27a5b807791d72e5d992aa33b668a6626 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 6 May 2010 18:49:20 +0200 Subject: stop_machine: reimplement using cpu_stop Reimplement stop_machine using cpu_stop. As cpu stoppers are guaranteed to be available for all online cpus, stop_machine_create/destroy() are no longer necessary and removed. With resource management and synchronization handled by cpu_stop, the new implementation is much simpler. Asking the cpu_stop to execute the stop_cpu() state machine on all online cpus with cpu hotplug disabled is enough. stop_machine itself doesn't need to manage any global resources anymore, so all per-instance information is rolled into struct stop_machine_data and the mutex and all static data variables are removed. The previous implementation created and destroyed RT workqueues as necessary which made stop_machine() calls highly expensive on very large machines. According to Dimitri Sivanich, preventing the dynamic creation/destruction makes booting faster more than twice on very large machines. cpu_stop resources are preallocated for all online cpus and should have the same effect. Signed-off-by: Tejun Heo Acked-by: Rusty Russell Acked-by: Peter Zijlstra Cc: Oleg Nesterov Cc: Dimitri Sivanich --- kernel/module.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 1016b75b026a..0838246d8c94 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -723,16 +723,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, return -EFAULT; name[MODULE_NAME_LEN-1] = '\0'; - /* Create stop_machine threads since free_module relies on - * a non-failing stop_machine call. */ - ret = stop_machine_create(); - if (ret) - return ret; - - if (mutex_lock_interruptible(&module_mutex) != 0) { - ret = -EINTR; - goto out_stop; - } + if (mutex_lock_interruptible(&module_mutex) != 0) + return -EINTR; mod = find_module(name); if (!mod) { @@ -792,8 +784,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, out: mutex_unlock(&module_mutex); -out_stop: - stop_machine_destroy(); return ret; } -- cgit v1.2.2 From 480b02df3aa9f07d1c7df0cd8be7a5ca73893455 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 19 May 2010 17:33:39 -0600 Subject: module: drop the lock while waiting for module to complete initialization. This fixes "gave up waiting for init of module libcrc32c." which happened at boot time due to multiple parallel module loads. The problem was a deadlock: we wait for a module to finish initializing, but we keep the module_lock mutex so it can't complete. In particular, this could reasonably happen if a module does a request_module() in its initialization routine. So we change use_module() to return an errno rather than a bool, and if it's -EBUSY we drop the lock and wait in the caller, then reaquire the lock. Reported-by: Brandon Philips Signed-off-by: Rusty Russell Tested-by: Brandon Philips --- kernel/module.c | 59 ++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 22 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index e2564580f3f1..970d773aec62 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -561,33 +561,26 @@ int use_module(struct module *a, struct module *b) struct module_use *use; int no_warn, err; - if (b == NULL || already_uses(a, b)) return 1; - - /* If we're interrupted or time out, we fail. */ - if (wait_event_interruptible_timeout( - module_wq, (err = strong_try_module_get(b)) != -EBUSY, - 30 * HZ) <= 0) { - printk("%s: gave up waiting for init of module %s.\n", - a->name, b->name); + if (b == NULL || already_uses(a, b)) return 0; - } - /* If strong_try_module_get() returned a different error, we fail. */ + /* If we're interrupted or time out, we fail. */ + err = strong_try_module_get(b); if (err) - return 0; + return err; DEBUGP("Allocating new usage for %s.\n", a->name); use = kmalloc(sizeof(*use), GFP_ATOMIC); if (!use) { printk("%s: out of memory loading\n", a->name); module_put(b); - return 0; + return -ENOMEM; } use->module_which_uses = a; list_add(&use->list, &b->modules_which_use_me); no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name); - return 1; + return 0; } EXPORT_SYMBOL_GPL(use_module); @@ -880,7 +873,7 @@ static inline void module_unload_free(struct module *mod) int use_module(struct module *a, struct module *b) { - return strong_try_module_get(b) == 0; + return strong_try_module_get(b); } EXPORT_SYMBOL_GPL(use_module); @@ -1051,17 +1044,39 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, struct module *owner; const struct kernel_symbol *sym; const unsigned long *crc; + DEFINE_WAIT(wait); + int err; + long timeleft = 30 * HZ; +again: sym = find_symbol(name, &owner, &crc, !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); - /* use_module can fail due to OOM, - or module initialization or unloading */ - if (sym) { - if (!check_version(sechdrs, versindex, name, mod, crc, owner) - || !use_module(mod, owner)) - sym = NULL; - } - return sym; + if (!sym) + return NULL; + + if (!check_version(sechdrs, versindex, name, mod, crc, owner)) + return NULL; + + prepare_to_wait(&module_wq, &wait, TASK_INTERRUPTIBLE); + err = use_module(mod, owner); + if (likely(!err) || err != -EBUSY || signal_pending(current)) { + finish_wait(&module_wq, &wait); + return err ? NULL : sym; + } + + /* Module is still loading. Drop lock and wait. */ + mutex_unlock(&module_mutex); + timeleft = schedule_timeout(timeleft); + mutex_lock(&module_mutex); + finish_wait(&module_wq, &wait); + + /* Module might be gone entirely, or replaced. Re-lookup. */ + if (timeleft) + goto again; + + printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n", + mod->name, owner->name); + return NULL; } /* -- cgit v1.2.2 From 67fc4e0cb931d6b4ccf21248e4199b154478ecea Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 20 May 2010 21:04:21 -0500 Subject: kdb: core for kgdb back end (2 of 2) This patch contains the hooks and instrumentation into kernel which live outside the kernel/debug directory, which the kdb core will call to run commands like lsmod, dmesg, bt etc... CC: linux-arch@vger.kernel.org Signed-off-by: Jason Wessel Signed-off-by: Martin Hicks --- kernel/module.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index e2564580f3f1..b751f1902476 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -77,6 +77,10 @@ DEFINE_MUTEX(module_mutex); EXPORT_SYMBOL_GPL(module_mutex); static LIST_HEAD(modules); +#ifdef CONFIG_KGDB_KDB +struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ +#endif /* CONFIG_KGDB_KDB */ + /* Block module loading/unloading? */ int modules_disabled = 0; -- cgit v1.2.2 From 2c3c8bea608866d8bd9dcf92657d57fdcac011c5 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Wed, 12 May 2010 18:28:57 -0700 Subject: sysfs: add struct file* to bin_attr callbacks This allows bin_attr->read,write,mmap callbacks to check file specific data (such as inode owner) as part of any privilege validation. Signed-off-by: Chris Wright Signed-off-by: Greg Kroah-Hartman --- kernel/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index e2564580f3f1..5e14483768bb 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1182,7 +1182,7 @@ struct module_notes_attrs { struct bin_attribute attrs[0]; }; -static ssize_t module_notes_read(struct kobject *kobj, +static ssize_t module_notes_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { -- cgit v1.2.2 From 7d52669b14e36f8365070324be009486d387ad00 Mon Sep 17 00:00:00 2001 From: Wenji Huang Date: Mon, 24 May 2010 14:33:12 -0700 Subject: module: remove duplicate declaration of __ksymtab_gpl_future Minor cleanup on duplicate __{start/stop}__ksymtab_gpl_future. Signed-off-by: Wenji Huang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/module.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index a8014bfb5a4e..625985e70e9d 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -180,8 +180,6 @@ extern const struct kernel_symbol __start___ksymtab_gpl[]; extern const struct kernel_symbol __stop___ksymtab_gpl[]; extern const struct kernel_symbol __start___ksymtab_gpl_future[]; extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; -extern const struct kernel_symbol __start___ksymtab_gpl_future[]; -extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; extern const unsigned long __start___kcrctab[]; extern const unsigned long __start___kcrctab_gpl[]; extern const unsigned long __start___kcrctab_gpl_future[]; -- cgit v1.2.2 From 218ce7351413b8287a80fab1d7b94906a5559f01 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 25 May 2010 16:48:30 -0700 Subject: Revert "module: drop the lock while waiting for module to complete initialization." This reverts commit 480b02df3aa9f07d1c7df0cd8be7a5ca73893455, since Rafael reports that it causes occasional kernel paging request faults in load_module(). Dropping the module lock and re-taking it deep in the call-chain is definitely not the right thing to do. That just turns the mutex from a lock into a "random non-locking data structure" that doesn't actually protect what it's supposed to protect. Requested-and-tested-by: Rafael J. Wysocki Cc: Rusty Russell Cc: Brandon Philips Cc: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/module.c | 59 +++++++++++++++++++++------------------------------------ 1 file changed, 22 insertions(+), 37 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 625985e70e9d..333fbcc96978 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -563,26 +563,33 @@ int use_module(struct module *a, struct module *b) struct module_use *use; int no_warn, err; - if (b == NULL || already_uses(a, b)) - return 0; + if (b == NULL || already_uses(a, b)) return 1; /* If we're interrupted or time out, we fail. */ - err = strong_try_module_get(b); + if (wait_event_interruptible_timeout( + module_wq, (err = strong_try_module_get(b)) != -EBUSY, + 30 * HZ) <= 0) { + printk("%s: gave up waiting for init of module %s.\n", + a->name, b->name); + return 0; + } + + /* If strong_try_module_get() returned a different error, we fail. */ if (err) - return err; + return 0; DEBUGP("Allocating new usage for %s.\n", a->name); use = kmalloc(sizeof(*use), GFP_ATOMIC); if (!use) { printk("%s: out of memory loading\n", a->name); module_put(b); - return -ENOMEM; + return 0; } use->module_which_uses = a; list_add(&use->list, &b->modules_which_use_me); no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name); - return 0; + return 1; } EXPORT_SYMBOL_GPL(use_module); @@ -875,7 +882,7 @@ static inline void module_unload_free(struct module *mod) int use_module(struct module *a, struct module *b) { - return strong_try_module_get(b); + return strong_try_module_get(b) == 0; } EXPORT_SYMBOL_GPL(use_module); @@ -1046,39 +1053,17 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, struct module *owner; const struct kernel_symbol *sym; const unsigned long *crc; - DEFINE_WAIT(wait); - int err; - long timeleft = 30 * HZ; -again: sym = find_symbol(name, &owner, &crc, !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); - if (!sym) - return NULL; - - if (!check_version(sechdrs, versindex, name, mod, crc, owner)) - return NULL; - - prepare_to_wait(&module_wq, &wait, TASK_INTERRUPTIBLE); - err = use_module(mod, owner); - if (likely(!err) || err != -EBUSY || signal_pending(current)) { - finish_wait(&module_wq, &wait); - return err ? NULL : sym; - } - - /* Module is still loading. Drop lock and wait. */ - mutex_unlock(&module_mutex); - timeleft = schedule_timeout(timeleft); - mutex_lock(&module_mutex); - finish_wait(&module_wq, &wait); - - /* Module might be gone entirely, or replaced. Re-lookup. */ - if (timeleft) - goto again; - - printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n", - mod->name, owner->name); - return NULL; + /* use_module can fail due to OOM, + or module initialization or unloading */ + if (sym) { + if (!check_version(sechdrs, versindex, name, mod, crc, owner) + || !use_module(mod, owner)) + sym = NULL; + } + return sym; } /* -- cgit v1.2.2 From 293a7cfeedc2b2380a7c7274902323c3cf5f7575 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 31 May 2010 19:53:50 +0930 Subject: module: fix reference to mod->percpu after freeing module. Rafael sees a sometimes crash at precpu_modfree from kernel/module.c; it only occurred with another (since-reverted) patch, but that patch simply changed timing to uncover this bug, it was otherwise unrelated. The comment about the mod being freed is self-explanatory, but neither Tejun nor I read it. This bug was introduced in 259354deaa, after it had previously been fixed in 6e2b75740b. How embarrassing. Reported-by: "Rafael J. Wysocki" Signed-off-by: Rusty Russell Embarrassingly-Acked-by: Tejun Heo Cc: Masami Hiramatsu Tested-by: "Rafael J. Wysocki" Signed-off-by: Linus Torvalds --- kernel/module.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 333fbcc96978..d806e00e4450 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2014,6 +2014,7 @@ static noinline struct module *load_module(void __user *umod, long err = 0; void *ptr = NULL; /* Stops spurious gcc warning */ unsigned long symoffs, stroffs, *strmap; + void __percpu *percpu; mm_segment_t old_fs; @@ -2158,6 +2159,8 @@ static noinline struct module *load_module(void __user *umod, goto free_mod; sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; } + /* Keep this around for failure path. */ + percpu = mod_percpu(mod); /* Determine total sizes, and put offsets in sh_entsize. For now this is done generically; there doesn't appear to be any @@ -2463,7 +2466,7 @@ static noinline struct module *load_module(void __user *umod, module_free(mod, mod->module_core); /* mod will be freed with core. Don't access it beyond this line! */ free_percpu: - percpu_modfree(mod); + free_percpu(percpu); free_mod: kfree(args); kfree(strmap); -- cgit v1.2.2 From 2c02dfe7fe3fba97a5665d329d039d2415ea5607 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 31 May 2010 12:19:37 -0700 Subject: module: Make the 'usage' lists be two-way When adding a module that depends on another one, we used to create a one-way list of "modules_which_use_me", so that module unloading could see who needs a module. It's actually quite simple to make that list go both ways: so that we not only can see "who uses me", but also see a list of modules that are "used by me". In fact, we always wanted that list in "module_unload_free()": when we unload a module, we want to also release all the other modules that are used by that module. But because we didn't have that list, we used to first iterate over all modules, and then iterate over each "used by me" list of that module. By making the list two-way, we simplify module_unload_free(), and it allows for some trivial fixes later too. Signed-off-by: Linus Torvalds Signed-off-by: Rusty Russell (cleaned & rebased) --- kernel/module.c | 79 +++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 31 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 0129769301e3..be18c3e34684 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -523,7 +523,8 @@ static void module_unload_init(struct module *mod) { int cpu; - INIT_LIST_HEAD(&mod->modules_which_use_me); + INIT_LIST_HEAD(&mod->source_list); + INIT_LIST_HEAD(&mod->target_list); for_each_possible_cpu(cpu) { per_cpu_ptr(mod->refptr, cpu)->incs = 0; per_cpu_ptr(mod->refptr, cpu)->decs = 0; @@ -538,8 +539,9 @@ static void module_unload_init(struct module *mod) /* modules using other modules */ struct module_use { - struct list_head list; - struct module *module_which_uses; + struct list_head source_list; + struct list_head target_list; + struct module *source, *target; }; /* Does a already use b? */ @@ -547,8 +549,8 @@ static int already_uses(struct module *a, struct module *b) { struct module_use *use; - list_for_each_entry(use, &b->modules_which_use_me, list) { - if (use->module_which_uses == a) { + list_for_each_entry(use, &b->source_list, source_list) { + if (use->source == a) { DEBUGP("%s uses %s!\n", a->name, b->name); return 1; } @@ -557,6 +559,33 @@ static int already_uses(struct module *a, struct module *b) return 0; } +/* + * Module a uses b + * - we add 'a' as a "source", 'b' as a "target" of module use + * - the module_use is added to the list of 'b' sources (so + * 'b' can walk the list to see who sourced them), and of 'a' + * targets (so 'a' can see what modules it targets). + */ +static int add_module_usage(struct module *a, struct module *b) +{ + int no_warn; + struct module_use *use; + + DEBUGP("Allocating new usage for %s.\n", a->name); + use = kmalloc(sizeof(*use), GFP_ATOMIC); + if (!use) { + printk(KERN_WARNING "%s: out of memory loading\n", a->name); + return -ENOMEM; + } + + use->source = a; + use->target = b; + list_add(&use->source_list, &b->source_list); + list_add(&use->target_list, &a->target_list); + no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name); + return 0; +} + /* Module a uses b */ int use_module(struct module *a, struct module *b) { @@ -578,17 +607,11 @@ int use_module(struct module *a, struct module *b) if (err) return 0; - DEBUGP("Allocating new usage for %s.\n", a->name); - use = kmalloc(sizeof(*use), GFP_ATOMIC); - if (!use) { - printk("%s: out of memory loading\n", a->name); + err = add_module_usage(a, b); + if (err) { module_put(b); return 0; } - - use->module_which_uses = a; - list_add(&use->list, &b->modules_which_use_me); - no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name); return 1; } EXPORT_SYMBOL_GPL(use_module); @@ -596,22 +619,16 @@ EXPORT_SYMBOL_GPL(use_module); /* Clear the unload stuff of the module. */ static void module_unload_free(struct module *mod) { - struct module *i; - - list_for_each_entry(i, &modules, list) { - struct module_use *use; + struct module_use *use, *tmp; - list_for_each_entry(use, &i->modules_which_use_me, list) { - if (use->module_which_uses == mod) { - DEBUGP("%s unusing %s\n", mod->name, i->name); - module_put(i); - list_del(&use->list); - kfree(use); - sysfs_remove_link(i->holders_dir, mod->name); - /* There can be at most one match. */ - break; - } - } + list_for_each_entry_safe(use, tmp, &mod->target_list, target_list) { + struct module *i = use->target; + DEBUGP("%s unusing %s\n", mod->name, i->name); + module_put(i); + list_del(&use->source_list); + list_del(&use->target_list); + kfree(use); + sysfs_remove_link(i->holders_dir, mod->name); } } @@ -735,7 +752,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, goto out; } - if (!list_empty(&mod->modules_which_use_me)) { + if (!list_empty(&mod->source_list)) { /* Other modules depend on us: get rid of them first. */ ret = -EWOULDBLOCK; goto out; @@ -799,9 +816,9 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod) /* Always include a trailing , so userspace can differentiate between this and the old multi-field proc format. */ - list_for_each_entry(use, &mod->modules_which_use_me, list) { + list_for_each_entry(use, &mod->source_list, source_list) { printed_something = 1; - seq_printf(m, "%s,", use->module_which_uses->name); + seq_printf(m, "%s,", use->source->name); } if (mod->init != NULL && mod->exit == NULL) { -- cgit v1.2.2 From c8e21ced08b39ef8dfe7236fb2a923a95f645262 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 5 Jun 2010 11:17:35 -0600 Subject: module: fix kdb's illicit use of struct module_use. Linus changed the structure, and luckily this didn't compile any more. Reported-by: Stephen Rothwell Signed-off-by: Rusty Russell Cc: Jason Wessel Cc: Martin Hicks --- kernel/module.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index be18c3e34684..bbb1d812c79c 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -536,14 +536,6 @@ static void module_unload_init(struct module *mod) mod->waiter = current; } -/* modules using other modules */ -struct module_use -{ - struct list_head source_list; - struct list_head target_list; - struct module *source, *target; -}; - /* Does a already use b? */ static int already_uses(struct module *a, struct module *b) { @@ -589,8 +581,7 @@ static int add_module_usage(struct module *a, struct module *b) /* Module a uses b */ int use_module(struct module *a, struct module *b) { - struct module_use *use; - int no_warn, err; + int err; if (b == NULL || already_uses(a, b)) return 1; -- cgit v1.2.2 From 80a3d1bb410e000e176931a076cdf19a1e89a955 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 5 Jun 2010 11:17:36 -0600 Subject: module: move sysfs exposure to end of load_module This means a little extra work, but is more logical: we don't put anything in sysfs until we're about to put the module into the global list an parse its parameters. This also gives us a logical place to put duplicate module detection in the next patch. Signed-off-by: Rusty Russell --- kernel/module.c | 47 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index bbb1d812c79c..c690d9885797 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -560,7 +560,6 @@ static int already_uses(struct module *a, struct module *b) */ static int add_module_usage(struct module *a, struct module *b) { - int no_warn; struct module_use *use; DEBUGP("Allocating new usage for %s.\n", a->name); @@ -574,7 +573,6 @@ static int add_module_usage(struct module *a, struct module *b) use->target = b; list_add(&use->source_list, &b->source_list); list_add(&use->target_list, &a->target_list); - no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name); return 0; } @@ -619,7 +617,6 @@ static void module_unload_free(struct module *mod) list_del(&use->source_list); list_del(&use->target_list); kfree(use); - sysfs_remove_link(i->holders_dir, mod->name); } } @@ -1303,6 +1300,29 @@ static inline void remove_notes_attrs(struct module *mod) #endif #ifdef CONFIG_SYSFS +static void add_usage_links(struct module *mod) +{ +#ifdef CONFIG_MODULE_UNLOAD + struct module_use *use; + int nowarn; + + list_for_each_entry(use, &mod->target_list, target_list) { + nowarn = sysfs_create_link(use->target->holders_dir, + &mod->mkobj.kobj, mod->name); + } +#endif +} + +static void del_usage_links(struct module *mod) +{ +#ifdef CONFIG_MODULE_UNLOAD + struct module_use *use; + + list_for_each_entry(use, &mod->target_list, target_list) + sysfs_remove_link(use->target->holders_dir, mod->name); +#endif +} + int module_add_modinfo_attrs(struct module *mod) { struct module_attribute *attr; @@ -1385,6 +1405,10 @@ int mod_sysfs_setup(struct module *mod, { int err; + err = mod_sysfs_init(mod); + if (err) + goto out; + mod->holders_dir = kobject_create_and_add("holders", &mod->mkobj.kobj); if (!mod->holders_dir) { err = -ENOMEM; @@ -1399,6 +1423,8 @@ int mod_sysfs_setup(struct module *mod, if (err) goto out_unreg_param; + add_usage_links(mod); + kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD); return 0; @@ -1408,6 +1434,7 @@ out_unreg_holders: kobject_put(mod->holders_dir); out_unreg: kobject_put(&mod->mkobj.kobj); +out: return err; } @@ -1422,10 +1449,15 @@ static void mod_sysfs_fini(struct module *mod) { } +static void del_usage_links(struct module *mod) +{ +} + #endif /* CONFIG_SYSFS */ static void mod_kobject_remove(struct module *mod) { + del_usage_links(mod); module_remove_modinfo_attrs(mod); module_param_sysfs_remove(mod); kobject_put(mod->mkobj.drivers_dir); @@ -2242,11 +2274,6 @@ static noinline struct module *load_module(void __user *umod, /* Now we've moved module, initialize linked lists, etc. */ module_unload_init(mod); - /* add kobject, so we can reference it. */ - err = mod_sysfs_init(mod); - if (err) - goto free_unload; - /* Set up license info based on the info section */ set_license(mod, get_modinfo(sechdrs, infoindex, "license")); @@ -2443,6 +2470,7 @@ static noinline struct module *load_module(void __user *umod, err = mod_sysfs_setup(mod, mod->kp, mod->num_kp); if (err < 0) goto unlink; + add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); @@ -2461,9 +2489,6 @@ static noinline struct module *load_module(void __user *umod, module_arch_cleanup(mod); cleanup: free_modinfo(mod); - kobject_del(&mod->mkobj.kobj); - kobject_put(&mod->mkobj.kobj); - free_unload: module_unload_free(mod); #if defined(CONFIG_MODULE_UNLOAD) free_percpu(mod->refptr); -- cgit v1.2.2 From 6407ebb271fc34440b306f305e1efb7685eece26 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 5 Jun 2010 11:17:36 -0600 Subject: module: Make module sysfs functions private. These were placed in the header in ef665c1a06 to get the various SYSFS/MODULE config combintations to compile. That may have been necessary then, but it's not now. These functions are all local to module.c. Signed-off-by: Rusty Russell Cc: Randy Dunlap --- kernel/module.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index c690d9885797..808aa18dd661 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1323,7 +1323,7 @@ static void del_usage_links(struct module *mod) #endif } -int module_add_modinfo_attrs(struct module *mod) +static int module_add_modinfo_attrs(struct module *mod) { struct module_attribute *attr; struct module_attribute *temp_attr; @@ -1349,7 +1349,7 @@ int module_add_modinfo_attrs(struct module *mod) return error; } -void module_remove_modinfo_attrs(struct module *mod) +static void module_remove_modinfo_attrs(struct module *mod) { struct module_attribute *attr; int i; @@ -1365,7 +1365,7 @@ void module_remove_modinfo_attrs(struct module *mod) kfree(mod->modinfo_attrs); } -int mod_sysfs_init(struct module *mod) +static int mod_sysfs_init(struct module *mod) { int err; struct kobject *kobj; @@ -1399,7 +1399,7 @@ out: return err; } -int mod_sysfs_setup(struct module *mod, +static int mod_sysfs_setup(struct module *mod, struct kernel_param *kparam, unsigned int num_params) { @@ -1445,6 +1445,27 @@ static void mod_sysfs_fini(struct module *mod) #else /* CONFIG_SYSFS */ +static inline int mod_sysfs_init(struct module *mod) +{ + return 0; +} + +static inline int mod_sysfs_setup(struct module *mod, + struct kernel_param *kparam, + unsigned int num_params) +{ + return 0; +} + +static inline int module_add_modinfo_attrs(struct module *mod) +{ + return 0; +} + +static inline void module_remove_modinfo_attrs(struct module *mod) +{ +} + static void mod_sysfs_fini(struct module *mod) { } -- cgit v1.2.2 From 75676500f8298f0ee89db12db97294883c4b768e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 5 Jun 2010 11:17:36 -0600 Subject: module: make locking more fine-grained. Kay Sievers reports that we still have some contention over module loading which is slowing boot. Linus also disliked a previous "drop lock and regrab" patch to fix the bne2 "gave up waiting for init of module libcrc32c" message. This is more ambitious: we only grab the lock where we need it. Signed-off-by: Rusty Russell Cc: Brandon Philips Cc: Kay Sievers Cc: Linus Torvalds --- kernel/module.c | 65 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 23 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 808aa18dd661..d293c213c22c 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -72,7 +72,11 @@ /* If this is set, the section belongs in the init part of the module */ #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) -/* List of modules, protected by module_mutex or preempt_disable +/* + * Mutex protects: + * 1) List of modules (also safely readable with preempt_disable), + * 2) module_use links, + * 3) module_addr_min/module_addr_max. * (delete uses stop_machine/add uses RCU list operations). */ DEFINE_MUTEX(module_mutex); EXPORT_SYMBOL_GPL(module_mutex); @@ -90,7 +94,8 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq); static BLOCKING_NOTIFIER_HEAD(module_notify_list); -/* Bounds of module allocation, for speeding __module_address */ +/* Bounds of module allocation, for speeding __module_address. + * Protected by module_mutex. */ static unsigned long module_addr_min = -1UL, module_addr_max = 0; int register_module_notifier(struct notifier_block * nb) @@ -329,7 +334,7 @@ static bool find_symbol_in_section(const struct symsearch *syms, } /* Find a symbol and return it, along with, (optional) crc and - * (optional) module which owns it */ + * (optional) module which owns it. Needs preempt disabled or module_mutex. */ const struct kernel_symbol *find_symbol(const char *name, struct module **owner, const unsigned long **crc, @@ -576,7 +581,7 @@ static int add_module_usage(struct module *a, struct module *b) return 0; } -/* Module a uses b */ +/* Module a uses b: caller needs module_mutex() */ int use_module(struct module *a, struct module *b) { int err; @@ -610,6 +615,7 @@ static void module_unload_free(struct module *mod) { struct module_use *use, *tmp; + mutex_lock(&module_mutex); list_for_each_entry_safe(use, tmp, &mod->target_list, target_list) { struct module *i = use->target; DEBUGP("%s unusing %s\n", mod->name, i->name); @@ -618,6 +624,7 @@ static void module_unload_free(struct module *mod) list_del(&use->target_list); kfree(use); } + mutex_unlock(&module_mutex); } #ifdef CONFIG_MODULE_FORCE_UNLOAD @@ -784,13 +791,14 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, mod); async_synchronize_full(); - mutex_lock(&module_mutex); + /* Store the name of the last unloaded module for diagnostic purposes */ strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); ddebug_remove_module(mod->name); - free_module(mod); - out: + free_module(mod); + return 0; +out: mutex_unlock(&module_mutex); return ret; } @@ -1006,6 +1014,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, { const unsigned long *crc; + /* Since this should be found in kernel (which can't be removed), + * no locking is necessary. */ if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL, &crc, true, false)) BUG(); @@ -1048,8 +1058,7 @@ static inline int same_magic(const char *amagic, const char *bmagic, } #endif /* CONFIG_MODVERSIONS */ -/* Resolve a symbol for this module. I.e. if we find one, record usage. - Must be holding module_mutex. */ +/* Resolve a symbol for this module. I.e. if we find one, record usage. */ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, unsigned int versindex, const char *name, @@ -1059,6 +1068,7 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, const struct kernel_symbol *sym; const unsigned long *crc; + mutex_lock(&module_mutex); sym = find_symbol(name, &owner, &crc, !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); /* use_module can fail due to OOM, @@ -1068,6 +1078,7 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, || !use_module(mod, owner)) sym = NULL; } + mutex_unlock(&module_mutex); return sym; } @@ -1306,10 +1317,12 @@ static void add_usage_links(struct module *mod) struct module_use *use; int nowarn; + mutex_lock(&module_mutex); list_for_each_entry(use, &mod->target_list, target_list) { nowarn = sysfs_create_link(use->target->holders_dir, &mod->mkobj.kobj, mod->name); } + mutex_unlock(&module_mutex); #endif } @@ -1318,8 +1331,10 @@ static void del_usage_links(struct module *mod) #ifdef CONFIG_MODULE_UNLOAD struct module_use *use; + mutex_lock(&module_mutex); list_for_each_entry(use, &mod->target_list, target_list) sysfs_remove_link(use->target->holders_dir, mod->name); + mutex_unlock(&module_mutex); #endif } @@ -1497,13 +1512,15 @@ static int __unlink_module(void *_mod) return 0; } -/* Free a module, remove from lists, etc (must hold module_mutex). */ +/* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { trace_module_free(mod); /* Delete from various lists */ + mutex_lock(&module_mutex); stop_machine(__unlink_module, mod, NULL); + mutex_unlock(&module_mutex); remove_notes_attrs(mod); remove_sect_attrs(mod); mod_kobject_remove(mod); @@ -1575,7 +1592,14 @@ static int verify_export_symbols(struct module *mod) for (i = 0; i < ARRAY_SIZE(arr); i++) { for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) { - if (find_symbol(s->name, &owner, NULL, true, false)) { + const struct kernel_symbol *sym; + + /* Stopping preemption makes find_symbol safe. */ + preempt_disable(); + sym = find_symbol(s->name, &owner, NULL, true, false); + preempt_enable(); + + if (sym) { printk(KERN_ERR "%s: exports duplicate symbol %s" " (owned by %s)\n", @@ -2021,11 +2045,13 @@ static void *module_alloc_update_bounds(unsigned long size) void *ret = module_alloc(size); if (ret) { + mutex_lock(&module_mutex); /* Update module bounds. */ if ((unsigned long)ret < module_addr_min) module_addr_min = (unsigned long)ret; if ((unsigned long)ret + size > module_addr_max) module_addr_max = (unsigned long)ret + size; + mutex_unlock(&module_mutex); } return ret; } @@ -2482,7 +2508,9 @@ static noinline struct module *load_module(void __user *umod, * function to insert in a way safe to concurrent readers. * The mutex protects against concurrent writers. */ + mutex_lock(&module_mutex); list_add_rcu(&mod->list, &modules); + mutex_unlock(&module_mutex); err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL); if (err < 0) @@ -2504,8 +2532,10 @@ static noinline struct module *load_module(void __user *umod, return mod; unlink: + mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + mutex_unlock(&module_mutex); synchronize_sched(); module_arch_cleanup(mod); cleanup: @@ -2556,19 +2586,10 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, if (!capable(CAP_SYS_MODULE) || modules_disabled) return -EPERM; - /* Only one module load at a time, please */ - if (mutex_lock_interruptible(&module_mutex) != 0) - return -EINTR; - /* Do all the hard work */ mod = load_module(umod, len, uargs); - if (IS_ERR(mod)) { - mutex_unlock(&module_mutex); + if (IS_ERR(mod)) return PTR_ERR(mod); - } - - /* Drop lock so they can recurse */ - mutex_unlock(&module_mutex); blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_COMING, mod); @@ -2585,9 +2606,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, module_put(mod); blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, mod); - mutex_lock(&module_mutex); free_module(mod); - mutex_unlock(&module_mutex); wake_up(&module_wq); return ret; } -- cgit v1.2.2 From 3bafeb6247042dcbb72b0141ec7c7107de9f0b99 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 5 Jun 2010 11:17:36 -0600 Subject: module: move find_module check to end I think Rusty may have made the lock a bit _too_ finegrained there, and didn't add it to some places that needed it. It looks, for example, like PATCH 1/2 actually drops the lock in places where it's needed ("find_module()" is documented to need it, but now load_module() didn't hold it at all when it did the find_module()). Rather than adding a new "module_loading" list, I think we should be able to just use the existing "modules" list, and just fix up the locking a bit. In fact, maybe we could just move the "look up existing module" a bit later - optimistically assuming that the module doesn't exist, and then just undoing the work if it turns out that we were wrong, just before adding ourselves to the list. Signed-off-by: Rusty Russell --- kernel/module.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index d293c213c22c..28450047852a 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2226,11 +2226,6 @@ static noinline struct module *load_module(void __user *umod, goto free_mod; } - if (find_module(mod->name)) { - err = -EEXIST; - goto free_mod; - } - mod->state = MODULE_STATE_COMING; /* Allow arches to frob section contents and sizes. */ @@ -2509,6 +2504,12 @@ static noinline struct module *load_module(void __user *umod, * The mutex protects against concurrent writers. */ mutex_lock(&module_mutex); + if (find_module(mod->name)) { + err = -EEXIST; + /* This will also unlock the mutex */ + goto already_exists; + } + list_add_rcu(&mod->list, &modules); mutex_unlock(&module_mutex); @@ -2535,6 +2536,7 @@ static noinline struct module *load_module(void __user *umod, mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + already_exists: mutex_unlock(&module_mutex); synchronize_sched(); module_arch_cleanup(mod); -- cgit v1.2.2 From be593f4ce4eb1bd40e38fdc403371f149f6f12eb Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 5 Jun 2010 11:17:37 -0600 Subject: module: verify_export_symbols under the lock It disabled preempt so it was "safe", but nothing stops another module slipping in before this module is added to the global list now we don't hold the lock the whole time. So we check this just after we check for duplicate modules, and just before we put the module in the global list. (find_symbol finds symbols in coming and going modules, too). Signed-off-by: Rusty Russell --- kernel/module.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 28450047852a..f99558e1945a 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1571,6 +1571,8 @@ EXPORT_SYMBOL_GPL(__symbol_get); /* * Ensure that an exported symbol [global namespace] does not already exist * in the kernel or in some other module's exported symbol table. + * + * You must hold the module_mutex. */ static int verify_export_symbols(struct module *mod) { @@ -1592,14 +1594,7 @@ static int verify_export_symbols(struct module *mod) for (i = 0; i < ARRAY_SIZE(arr); i++) { for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) { - const struct kernel_symbol *sym; - - /* Stopping preemption makes find_symbol safe. */ - preempt_disable(); - sym = find_symbol(s->name, &owner, NULL, true, false); - preempt_enable(); - - if (sym) { + if (find_symbol(s->name, &owner, NULL, true, false)) { printk(KERN_ERR "%s: exports duplicate symbol %s" " (owned by %s)\n", @@ -2440,11 +2435,6 @@ static noinline struct module *load_module(void __user *umod, goto cleanup; } - /* Find duplicate symbols */ - err = verify_export_symbols(mod); - if (err < 0) - goto cleanup; - /* Set up and sort exception table */ mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table", sizeof(*mod->extable), &mod->num_exentries); @@ -2506,10 +2496,14 @@ static noinline struct module *load_module(void __user *umod, mutex_lock(&module_mutex); if (find_module(mod->name)) { err = -EEXIST; - /* This will also unlock the mutex */ - goto already_exists; + goto unlock; } + /* Find duplicate symbols */ + err = verify_export_symbols(mod); + if (err < 0) + goto unlock; + list_add_rcu(&mod->list, &modules); mutex_unlock(&module_mutex); @@ -2536,7 +2530,7 @@ static noinline struct module *load_module(void __user *umod, mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); - already_exists: + unlock: mutex_unlock(&module_mutex); synchronize_sched(); module_arch_cleanup(mod); -- cgit v1.2.2 From 9bea7f23952d5948f8e5dfdff4de09bb9981fb5f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 5 Jun 2010 11:17:37 -0600 Subject: module: fix bne2 "gave up waiting for init of module libcrc32c" Problem: it's hard to avoid an init routine stumbling over a request_module these days. And it's not clear it's always a bad idea: for example, a module like kvm with dynamic dependencies on kvm-intel or kvm-amd would be neater if it could simply request_module the right one. In this particular case, it's libcrc32c: libcrc32c_mod_init crypto_alloc_shash crypto_alloc_tfm crypto_find_alg crypto_alg_mod_lookup crypto_larval_lookup request_module If another module is waiting inside resolve_symbol() for libcrc32c to finish initializing (ie. bne2 depends on libcrc32c) then it does so holding the module lock, and our request_module() can't make progress until that is released. Waiting inside resolve_symbol() without the lock isn't all that hard: we just need to pass the -EBUSY up the call chain so we can sleep where we don't hold the lock. Error reporting is a bit trickier: we need to copy the name of the unfinished module before releasing the lock. Other notes: 1) This also fixes a theoretical issue where a weak dependency would allow symbol version mismatches to be ignored. 2) We rename use_module to ref_module to make life easier for the only external user (the out-of-tree ksplice patches). Signed-off-by: Rusty Russell Cc: Linus Torvalds Cc: Tim Abbot Tested-by: Brandon Philips --- kernel/module.c | 91 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 59 insertions(+), 32 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index f99558e1945a..8c6b42840dd1 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -582,33 +582,26 @@ static int add_module_usage(struct module *a, struct module *b) } /* Module a uses b: caller needs module_mutex() */ -int use_module(struct module *a, struct module *b) +int ref_module(struct module *a, struct module *b) { int err; - if (b == NULL || already_uses(a, b)) return 1; - - /* If we're interrupted or time out, we fail. */ - if (wait_event_interruptible_timeout( - module_wq, (err = strong_try_module_get(b)) != -EBUSY, - 30 * HZ) <= 0) { - printk("%s: gave up waiting for init of module %s.\n", - a->name, b->name); + if (b == NULL || already_uses(a, b)) return 0; - } - /* If strong_try_module_get() returned a different error, we fail. */ + /* If module isn't available, we fail. */ + err = strong_try_module_get(b); if (err) - return 0; + return err; err = add_module_usage(a, b); if (err) { module_put(b); - return 0; + return err; } - return 1; + return 0; } -EXPORT_SYMBOL_GPL(use_module); +EXPORT_SYMBOL_GPL(ref_module); /* Clear the unload stuff of the module. */ static void module_unload_free(struct module *mod) @@ -893,11 +886,11 @@ static inline void module_unload_free(struct module *mod) { } -int use_module(struct module *a, struct module *b) +int ref_module(struct module *a, struct module *b) { - return strong_try_module_get(b) == 0; + return strong_try_module_get(b); } -EXPORT_SYMBOL_GPL(use_module); +EXPORT_SYMBOL_GPL(ref_module); static inline void module_unload_init(struct module *mod) { @@ -1062,26 +1055,58 @@ static inline int same_magic(const char *amagic, const char *bmagic, static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, unsigned int versindex, const char *name, - struct module *mod) + struct module *mod, + char ownername[]) { struct module *owner; const struct kernel_symbol *sym; const unsigned long *crc; + int err; mutex_lock(&module_mutex); sym = find_symbol(name, &owner, &crc, !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); - /* use_module can fail due to OOM, - or module initialization or unloading */ - if (sym) { - if (!check_version(sechdrs, versindex, name, mod, crc, owner) - || !use_module(mod, owner)) - sym = NULL; + if (!sym) + goto unlock; + + if (!check_version(sechdrs, versindex, name, mod, crc, owner)) { + sym = ERR_PTR(-EINVAL); + goto getname; } + + err = ref_module(mod, owner); + if (err) { + sym = ERR_PTR(err); + goto getname; + } + +getname: + /* We must make copy under the lock if we failed to get ref. */ + strncpy(ownername, module_name(owner), MODULE_NAME_LEN); +unlock: mutex_unlock(&module_mutex); return sym; } +static const struct kernel_symbol *resolve_symbol_wait(Elf_Shdr *sechdrs, + unsigned int versindex, + const char *name, + struct module *mod) +{ + const struct kernel_symbol *ksym; + char ownername[MODULE_NAME_LEN]; + + if (wait_event_interruptible_timeout(module_wq, + !IS_ERR(ksym = resolve_symbol(sechdrs, versindex, name, + mod, ownername)) || + PTR_ERR(ksym) != -EBUSY, + 30 * HZ) <= 0) { + printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n", + mod->name, ownername); + } + return ksym; +} + /* * /sys/module/foo/sections stuff * J. Corbet @@ -1638,21 +1663,23 @@ static int simplify_symbols(Elf_Shdr *sechdrs, break; case SHN_UNDEF: - ksym = resolve_symbol(sechdrs, versindex, - strtab + sym[i].st_name, mod); + ksym = resolve_symbol_wait(sechdrs, versindex, + strtab + sym[i].st_name, + mod); /* Ok if resolved. */ - if (ksym) { + if (ksym && !IS_ERR(ksym)) { sym[i].st_value = ksym->value; break; } /* Ok if weak. */ - if (ELF_ST_BIND(sym[i].st_info) == STB_WEAK) + if (!ksym && ELF_ST_BIND(sym[i].st_info) == STB_WEAK) break; - printk(KERN_WARNING "%s: Unknown symbol %s\n", - mod->name, strtab + sym[i].st_name); - ret = -ENOENT; + printk(KERN_WARNING "%s: Unknown symbol %s (err %li)\n", + mod->name, strtab + sym[i].st_name, + PTR_ERR(ksym)); + ret = PTR_ERR(ksym) ?: -ENOENT; break; default: -- cgit v1.2.2 From ff49d74ad383f54041378144ca1a229ee9aeaa59 Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Sat, 3 Jul 2010 13:07:35 +1000 Subject: module: initialize module dynamic debug later We should initialize the module dynamic debug datastructures only after determining that the module is not loaded yet. This fixes a bug that introduced in 2.6.35-rc2, where when a trying to load a module twice, we also load it's dynamic printing data twice which causes all sorts of nasty issues. Also handle the dynamic debug cleanup later on failure. Signed-off-by: Yehuda Sadeh Signed-off-by: Rusty Russell (removed a #ifdef) Signed-off-by: Linus Torvalds --- kernel/module.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 8c6b42840dd1..5d2d28197c82 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2062,6 +2062,12 @@ static void dynamic_debug_setup(struct _ddebug *debug, unsigned int num) #endif } +static void dynamic_debug_remove(struct _ddebug *debug) +{ + if (debug) + ddebug_remove_module(debug->modname); +} + static void *module_alloc_update_bounds(unsigned long size) { void *ret = module_alloc(size); @@ -2124,6 +2130,8 @@ static noinline struct module *load_module(void __user *umod, void *ptr = NULL; /* Stops spurious gcc warning */ unsigned long symoffs, stroffs, *strmap; void __percpu *percpu; + struct _ddebug *debug = NULL; + unsigned int num_debug = 0; mm_segment_t old_fs; @@ -2476,15 +2484,9 @@ static noinline struct module *load_module(void __user *umod, kfree(strmap); strmap = NULL; - if (!mod->taints) { - struct _ddebug *debug; - unsigned int num_debug; - + if (!mod->taints) debug = section_objs(hdr, sechdrs, secstrings, "__verbose", sizeof(*debug), &num_debug); - if (debug) - dynamic_debug_setup(debug, num_debug); - } err = module_finalize(hdr, sechdrs, mod); if (err < 0) @@ -2526,10 +2528,13 @@ static noinline struct module *load_module(void __user *umod, goto unlock; } + if (debug) + dynamic_debug_setup(debug, num_debug); + /* Find duplicate symbols */ err = verify_export_symbols(mod); if (err < 0) - goto unlock; + goto ddebug; list_add_rcu(&mod->list, &modules); mutex_unlock(&module_mutex); @@ -2557,6 +2562,8 @@ static noinline struct module *load_module(void __user *umod, mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + ddebug: + dynamic_debug_remove(debug); unlock: mutex_unlock(&module_mutex); synchronize_sched(); -- cgit v1.2.2 From b82bab4bbe9efa7bc7177fc20620fff19bd95484 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Tue, 27 Jul 2010 13:18:01 -0700 Subject: dynamic debug: move ddebug_remove_module() down into free_module() The command echo "file ec.c +p" >/sys/kernel/debug/dynamic_debug/control causes an oops. Move the call to ddebug_remove_module() down into free_module(). In this way it should be called from all error paths. Currently, we are missing the remove if the module init routine fails. Signed-off-by: Jason Baron Reported-by: Thomas Renninger Tested-by: Thomas Renninger Cc: [2.6.32+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/module.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 5d2d28197c82..6c562828c85c 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -787,7 +787,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, /* Store the name of the last unloaded module for diagnostic purposes */ strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); - ddebug_remove_module(mod->name); free_module(mod); return 0; @@ -1550,6 +1549,9 @@ static void free_module(struct module *mod) remove_sect_attrs(mod); mod_kobject_remove(mod); + /* Remove dynamic debug info */ + ddebug_remove_module(mod->name); + /* Arch-specific cleanup. */ module_arch_cleanup(mod); -- cgit v1.2.2