aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2010-04-01 04:09:40 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-04-05 22:50:02 -0400
commit5fbfb18d7a5b846946d52c4a10e3aaa213ec31b6 (patch)
treebcfa13dec8cb2527c3007b3e5f957cb50e571c64
parent7da23b86e14b77c094b11a9fa5ef5b3758fc9193 (diff)
Fix up possibly racy module refcounting
Module refcounting is implemented with a per-cpu counter for speed. However there is a race when tallying the counter where a reference may be taken by one CPU and released by another. Reference count summation may then see the decrement without having seen the previous increment, leading to lower than expected count. A module which never has its actual reference drop below 1 may return a reference count of 0 due to this race. Module removal generally runs under stop_machine, which prevents this race causing bugs due to removal of in-use modules. However there are other real bugs in module.c code and driver code (module_refcount is exported) where the callers do not run under stop_machine. Fix this by maintaining running per-cpu counters for the number of module refcount increments and the number of refcount decrements. The increments are tallied after the decrements, so any decrement seen will always have its corresponding increment counted. The final refcount is the difference of the total increments and decrements, preventing a low-refcount from being returned. Signed-off-by: Nick Piggin <npiggin@suse.de> Acked-by: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/module.h14
-rw-r--r--kernel/module.c35
2 files changed, 34 insertions, 15 deletions
diff --git a/include/linux/module.h b/include/linux/module.h
index 8bd399a00343..515d53ae6a79 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -368,7 +368,8 @@ struct module
368 void (*exit)(void); 368 void (*exit)(void);
369 369
370 struct module_ref { 370 struct module_ref {
371 int count; 371 unsigned int incs;
372 unsigned int decs;
372 } __percpu *refptr; 373 } __percpu *refptr;
373#endif 374#endif
374 375
@@ -463,9 +464,9 @@ static inline void __module_get(struct module *module)
463{ 464{
464 if (module) { 465 if (module) {
465 preempt_disable(); 466 preempt_disable();
466 __this_cpu_inc(module->refptr->count); 467 __this_cpu_inc(module->refptr->incs);
467 trace_module_get(module, _THIS_IP_, 468 trace_module_get(module, _THIS_IP_,
468 __this_cpu_read(module->refptr->count)); 469 __this_cpu_read(module->refptr->incs));
469 preempt_enable(); 470 preempt_enable();
470 } 471 }
471} 472}
@@ -478,11 +479,10 @@ static inline int try_module_get(struct module *module)
478 preempt_disable(); 479 preempt_disable();
479 480
480 if (likely(module_is_live(module))) { 481 if (likely(module_is_live(module))) {
481 __this_cpu_inc(module->refptr->count); 482 __this_cpu_inc(module->refptr->incs);
482 trace_module_get(module, _THIS_IP_, 483 trace_module_get(module, _THIS_IP_,
483 __this_cpu_read(module->refptr->count)); 484 __this_cpu_read(module->refptr->incs));
484 } 485 } else
485 else
486 ret = 0; 486 ret = 0;
487 487
488 preempt_enable(); 488 preempt_enable();
diff --git a/kernel/module.c b/kernel/module.c
index 9f8d23d8b3a8..1016b75b026a 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -521,11 +521,13 @@ static void module_unload_init(struct module *mod)
521 int cpu; 521 int cpu;
522 522
523 INIT_LIST_HEAD(&mod->modules_which_use_me); 523 INIT_LIST_HEAD(&mod->modules_which_use_me);
524 for_each_possible_cpu(cpu) 524 for_each_possible_cpu(cpu) {
525 per_cpu_ptr(mod->refptr, cpu)->count = 0; 525 per_cpu_ptr(mod->refptr, cpu)->incs = 0;
526 per_cpu_ptr(mod->refptr, cpu)->decs = 0;
527 }
526 528
527 /* Hold reference count during initialization. */ 529 /* Hold reference count during initialization. */
528 __this_cpu_write(mod->refptr->count, 1); 530 __this_cpu_write(mod->refptr->incs, 1);
529 /* Backwards compatibility macros put refcount during init. */ 531 /* Backwards compatibility macros put refcount during init. */
530 mod->waiter = current; 532 mod->waiter = current;
531} 533}
@@ -664,12 +666,28 @@ static int try_stop_module(struct module *mod, int flags, int *forced)
664 666
665unsigned int module_refcount(struct module *mod) 667unsigned int module_refcount(struct module *mod)
666{ 668{
667 unsigned int total = 0; 669 unsigned int incs = 0, decs = 0;
668 int cpu; 670 int cpu;
669 671
670 for_each_possible_cpu(cpu) 672 for_each_possible_cpu(cpu)
671 total += per_cpu_ptr(mod->refptr, cpu)->count; 673 decs += per_cpu_ptr(mod->refptr, cpu)->decs;
672 return total; 674 /*
675 * ensure the incs are added up after the decs.
676 * module_put ensures incs are visible before decs with smp_wmb.
677 *
678 * This 2-count scheme avoids the situation where the refcount
679 * for CPU0 is read, then CPU0 increments the module refcount,
680 * then CPU1 drops that refcount, then the refcount for CPU1 is
681 * read. We would record a decrement but not its corresponding
682 * increment so we would see a low count (disaster).
683 *
684 * Rare situation? But module_refcount can be preempted, and we
685 * might be tallying up 4096+ CPUs. So it is not impossible.
686 */
687 smp_rmb();
688 for_each_possible_cpu(cpu)
689 incs += per_cpu_ptr(mod->refptr, cpu)->incs;
690 return incs - decs;
673} 691}
674EXPORT_SYMBOL(module_refcount); 692EXPORT_SYMBOL(module_refcount);
675 693
@@ -846,10 +864,11 @@ void module_put(struct module *module)
846{ 864{
847 if (module) { 865 if (module) {
848 preempt_disable(); 866 preempt_disable();
849 __this_cpu_dec(module->refptr->count); 867 smp_wmb(); /* see comment in module_refcount */
868 __this_cpu_inc(module->refptr->decs);
850 869
851 trace_module_put(module, _RET_IP_, 870 trace_module_put(module, _RET_IP_,
852 __this_cpu_read(module->refptr->count)); 871 __this_cpu_read(module->refptr->decs));
853 /* Maybe they're waiting for us to drop reference? */ 872 /* Maybe they're waiting for us to drop reference? */
854 if (unlikely(!module_is_live(module))) 873 if (unlikely(!module_is_live(module)))
855 wake_up_process(module->waiter); 874 wake_up_process(module->waiter);