diff options
Diffstat (limited to 'kernel/module.c')
-rw-r--r-- | kernel/module.c | 181 |
1 files changed, 118 insertions, 63 deletions
diff --git a/kernel/module.c b/kernel/module.c index f82386bd9ee9..e2564580f3f1 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -59,8 +59,6 @@ | |||
59 | #define CREATE_TRACE_POINTS | 59 | #define CREATE_TRACE_POINTS |
60 | #include <trace/events/module.h> | 60 | #include <trace/events/module.h> |
61 | 61 | ||
62 | EXPORT_TRACEPOINT_SYMBOL(module_get); | ||
63 | |||
64 | #if 0 | 62 | #if 0 |
65 | #define DEBUGP printk | 63 | #define DEBUGP printk |
66 | #else | 64 | #else |
@@ -370,27 +368,33 @@ EXPORT_SYMBOL_GPL(find_module); | |||
370 | 368 | ||
371 | #ifdef CONFIG_SMP | 369 | #ifdef CONFIG_SMP |
372 | 370 | ||
373 | static void *percpu_modalloc(unsigned long size, unsigned long align, | 371 | static inline void __percpu *mod_percpu(struct module *mod) |
374 | const char *name) | ||
375 | { | 372 | { |
376 | void *ptr; | 373 | return mod->percpu; |
374 | } | ||
377 | 375 | ||
376 | static int percpu_modalloc(struct module *mod, | ||
377 | unsigned long size, unsigned long align) | ||
378 | { | ||
378 | if (align > PAGE_SIZE) { | 379 | if (align > PAGE_SIZE) { |
379 | printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", | 380 | printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", |
380 | name, align, PAGE_SIZE); | 381 | mod->name, align, PAGE_SIZE); |
381 | align = PAGE_SIZE; | 382 | align = PAGE_SIZE; |
382 | } | 383 | } |
383 | 384 | ||
384 | ptr = __alloc_reserved_percpu(size, align); | 385 | mod->percpu = __alloc_reserved_percpu(size, align); |
385 | if (!ptr) | 386 | if (!mod->percpu) { |
386 | printk(KERN_WARNING | 387 | printk(KERN_WARNING |
387 | "Could not allocate %lu bytes percpu data\n", size); | 388 | "Could not allocate %lu bytes percpu data\n", size); |
388 | return ptr; | 389 | return -ENOMEM; |
390 | } | ||
391 | mod->percpu_size = size; | ||
392 | return 0; | ||
389 | } | 393 | } |
390 | 394 | ||
391 | static void percpu_modfree(void *freeme) | 395 | static void percpu_modfree(struct module *mod) |
392 | { | 396 | { |
393 | free_percpu(freeme); | 397 | free_percpu(mod->percpu); |
394 | } | 398 | } |
395 | 399 | ||
396 | static unsigned int find_pcpusec(Elf_Ehdr *hdr, | 400 | static unsigned int find_pcpusec(Elf_Ehdr *hdr, |
@@ -400,24 +404,62 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr, | |||
400 | return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); | 404 | return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); |
401 | } | 405 | } |
402 | 406 | ||
403 | static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) | 407 | static void percpu_modcopy(struct module *mod, |
408 | const void *from, unsigned long size) | ||
404 | { | 409 | { |
405 | int cpu; | 410 | int cpu; |
406 | 411 | ||
407 | for_each_possible_cpu(cpu) | 412 | for_each_possible_cpu(cpu) |
408 | memcpy(pcpudest + per_cpu_offset(cpu), from, size); | 413 | memcpy(per_cpu_ptr(mod->percpu, cpu), from, size); |
414 | } | ||
415 | |||
416 | /** | ||
417 | * is_module_percpu_address - test whether address is from module static percpu | ||
418 | * @addr: address to test | ||
419 | * | ||
420 | * Test whether @addr belongs to module static percpu area. | ||
421 | * | ||
422 | * RETURNS: | ||
423 | * %true if @addr is from module static percpu area | ||
424 | */ | ||
425 | bool is_module_percpu_address(unsigned long addr) | ||
426 | { | ||
427 | struct module *mod; | ||
428 | unsigned int cpu; | ||
429 | |||
430 | preempt_disable(); | ||
431 | |||
432 | list_for_each_entry_rcu(mod, &modules, list) { | ||
433 | if (!mod->percpu_size) | ||
434 | continue; | ||
435 | for_each_possible_cpu(cpu) { | ||
436 | void *start = per_cpu_ptr(mod->percpu, cpu); | ||
437 | |||
438 | if ((void *)addr >= start && | ||
439 | (void *)addr < start + mod->percpu_size) { | ||
440 | preempt_enable(); | ||
441 | return true; | ||
442 | } | ||
443 | } | ||
444 | } | ||
445 | |||
446 | preempt_enable(); | ||
447 | return false; | ||
409 | } | 448 | } |
410 | 449 | ||
411 | #else /* ... !CONFIG_SMP */ | 450 | #else /* ... !CONFIG_SMP */ |
412 | 451 | ||
413 | static inline void *percpu_modalloc(unsigned long size, unsigned long align, | 452 | static inline void __percpu *mod_percpu(struct module *mod) |
414 | const char *name) | ||
415 | { | 453 | { |
416 | return NULL; | 454 | return NULL; |
417 | } | 455 | } |
418 | static inline void percpu_modfree(void *pcpuptr) | 456 | static inline int percpu_modalloc(struct module *mod, |
457 | unsigned long size, unsigned long align) | ||
458 | { | ||
459 | return -ENOMEM; | ||
460 | } | ||
461 | static inline void percpu_modfree(struct module *mod) | ||
419 | { | 462 | { |
420 | BUG(); | ||
421 | } | 463 | } |
422 | static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, | 464 | static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, |
423 | Elf_Shdr *sechdrs, | 465 | Elf_Shdr *sechdrs, |
@@ -425,12 +467,16 @@ static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, | |||
425 | { | 467 | { |
426 | return 0; | 468 | return 0; |
427 | } | 469 | } |
428 | static inline void percpu_modcopy(void *pcpudst, const void *src, | 470 | static inline void percpu_modcopy(struct module *mod, |
429 | unsigned long size) | 471 | const void *from, unsigned long size) |
430 | { | 472 | { |
431 | /* pcpusec should be 0, and size of that section should be 0. */ | 473 | /* pcpusec should be 0, and size of that section should be 0. */ |
432 | BUG_ON(size != 0); | 474 | BUG_ON(size != 0); |
433 | } | 475 | } |
476 | bool is_module_percpu_address(unsigned long addr) | ||
477 | { | ||
478 | return false; | ||
479 | } | ||
434 | 480 | ||
435 | #endif /* CONFIG_SMP */ | 481 | #endif /* CONFIG_SMP */ |
436 | 482 | ||
@@ -467,16 +513,22 @@ MODINFO_ATTR(srcversion); | |||
467 | static char last_unloaded_module[MODULE_NAME_LEN+1]; | 513 | static char last_unloaded_module[MODULE_NAME_LEN+1]; |
468 | 514 | ||
469 | #ifdef CONFIG_MODULE_UNLOAD | 515 | #ifdef CONFIG_MODULE_UNLOAD |
516 | |||
517 | EXPORT_TRACEPOINT_SYMBOL(module_get); | ||
518 | |||
470 | /* Init the unload section of the module. */ | 519 | /* Init the unload section of the module. */ |
471 | static void module_unload_init(struct module *mod) | 520 | static void module_unload_init(struct module *mod) |
472 | { | 521 | { |
473 | int cpu; | 522 | int cpu; |
474 | 523 | ||
475 | INIT_LIST_HEAD(&mod->modules_which_use_me); | 524 | INIT_LIST_HEAD(&mod->modules_which_use_me); |
476 | for_each_possible_cpu(cpu) | 525 | for_each_possible_cpu(cpu) { |
477 | local_set(__module_ref_addr(mod, cpu), 0); | 526 | per_cpu_ptr(mod->refptr, cpu)->incs = 0; |
527 | per_cpu_ptr(mod->refptr, cpu)->decs = 0; | ||
528 | } | ||
529 | |||
478 | /* Hold reference count during initialization. */ | 530 | /* Hold reference count during initialization. */ |
479 | local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1); | 531 | __this_cpu_write(mod->refptr->incs, 1); |
480 | /* Backwards compatibility macros put refcount during init. */ | 532 | /* Backwards compatibility macros put refcount during init. */ |
481 | mod->waiter = current; | 533 | mod->waiter = current; |
482 | } | 534 | } |
@@ -615,12 +667,28 @@ static int try_stop_module(struct module *mod, int flags, int *forced) | |||
615 | 667 | ||
616 | unsigned int module_refcount(struct module *mod) | 668 | unsigned int module_refcount(struct module *mod) |
617 | { | 669 | { |
618 | unsigned int total = 0; | 670 | unsigned int incs = 0, decs = 0; |
619 | int cpu; | 671 | int cpu; |
620 | 672 | ||
621 | for_each_possible_cpu(cpu) | 673 | for_each_possible_cpu(cpu) |
622 | total += local_read(__module_ref_addr(mod, cpu)); | 674 | decs += per_cpu_ptr(mod->refptr, cpu)->decs; |
623 | return total; | 675 | /* |
676 | * ensure the incs are added up after the decs. | ||
677 | * module_put ensures incs are visible before decs with smp_wmb. | ||
678 | * | ||
679 | * This 2-count scheme avoids the situation where the refcount | ||
680 | * for CPU0 is read, then CPU0 increments the module refcount, | ||
681 | * then CPU1 drops that refcount, then the refcount for CPU1 is | ||
682 | * read. We would record a decrement but not its corresponding | ||
683 | * increment so we would see a low count (disaster). | ||
684 | * | ||
685 | * Rare situation? But module_refcount can be preempted, and we | ||
686 | * might be tallying up 4096+ CPUs. So it is not impossible. | ||
687 | */ | ||
688 | smp_rmb(); | ||
689 | for_each_possible_cpu(cpu) | ||
690 | incs += per_cpu_ptr(mod->refptr, cpu)->incs; | ||
691 | return incs - decs; | ||
624 | } | 692 | } |
625 | EXPORT_SYMBOL(module_refcount); | 693 | EXPORT_SYMBOL(module_refcount); |
626 | 694 | ||
@@ -656,16 +724,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, | |||
656 | return -EFAULT; | 724 | return -EFAULT; |
657 | name[MODULE_NAME_LEN-1] = '\0'; | 725 | name[MODULE_NAME_LEN-1] = '\0'; |
658 | 726 | ||
659 | /* Create stop_machine threads since free_module relies on | 727 | if (mutex_lock_interruptible(&module_mutex) != 0) |
660 | * a non-failing stop_machine call. */ | 728 | return -EINTR; |
661 | ret = stop_machine_create(); | ||
662 | if (ret) | ||
663 | return ret; | ||
664 | |||
665 | if (mutex_lock_interruptible(&module_mutex) != 0) { | ||
666 | ret = -EINTR; | ||
667 | goto out_stop; | ||
668 | } | ||
669 | 729 | ||
670 | mod = find_module(name); | 730 | mod = find_module(name); |
671 | if (!mod) { | 731 | if (!mod) { |
@@ -725,8 +785,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, | |||
725 | 785 | ||
726 | out: | 786 | out: |
727 | mutex_unlock(&module_mutex); | 787 | mutex_unlock(&module_mutex); |
728 | out_stop: | ||
729 | stop_machine_destroy(); | ||
730 | return ret; | 788 | return ret; |
731 | } | 789 | } |
732 | 790 | ||
@@ -796,14 +854,15 @@ static struct module_attribute refcnt = { | |||
796 | void module_put(struct module *module) | 854 | void module_put(struct module *module) |
797 | { | 855 | { |
798 | if (module) { | 856 | if (module) { |
799 | unsigned int cpu = get_cpu(); | 857 | preempt_disable(); |
800 | local_dec(__module_ref_addr(module, cpu)); | 858 | smp_wmb(); /* see comment in module_refcount */ |
801 | trace_module_put(module, _RET_IP_, | 859 | __this_cpu_inc(module->refptr->decs); |
802 | local_read(__module_ref_addr(module, cpu))); | 860 | |
861 | trace_module_put(module, _RET_IP_); | ||
803 | /* Maybe they're waiting for us to drop reference? */ | 862 | /* Maybe they're waiting for us to drop reference? */ |
804 | if (unlikely(!module_is_live(module))) | 863 | if (unlikely(!module_is_live(module))) |
805 | wake_up_process(module->waiter); | 864 | wake_up_process(module->waiter); |
806 | put_cpu(); | 865 | preempt_enable(); |
807 | } | 866 | } |
808 | } | 867 | } |
809 | EXPORT_SYMBOL(module_put); | 868 | EXPORT_SYMBOL(module_put); |
@@ -1083,6 +1142,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, | |||
1083 | if (sattr->name == NULL) | 1142 | if (sattr->name == NULL) |
1084 | goto out; | 1143 | goto out; |
1085 | sect_attrs->nsections++; | 1144 | sect_attrs->nsections++; |
1145 | sysfs_attr_init(&sattr->mattr.attr); | ||
1086 | sattr->mattr.show = module_sect_show; | 1146 | sattr->mattr.show = module_sect_show; |
1087 | sattr->mattr.store = NULL; | 1147 | sattr->mattr.store = NULL; |
1088 | sattr->mattr.attr.name = sattr->name; | 1148 | sattr->mattr.attr.name = sattr->name; |
@@ -1178,6 +1238,7 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect, | |||
1178 | if (sect_empty(&sechdrs[i])) | 1238 | if (sect_empty(&sechdrs[i])) |
1179 | continue; | 1239 | continue; |
1180 | if (sechdrs[i].sh_type == SHT_NOTE) { | 1240 | if (sechdrs[i].sh_type == SHT_NOTE) { |
1241 | sysfs_bin_attr_init(nattr); | ||
1181 | nattr->attr.name = mod->sect_attrs->attrs[loaded].name; | 1242 | nattr->attr.name = mod->sect_attrs->attrs[loaded].name; |
1182 | nattr->attr.mode = S_IRUGO; | 1243 | nattr->attr.mode = S_IRUGO; |
1183 | nattr->size = sechdrs[i].sh_size; | 1244 | nattr->size = sechdrs[i].sh_size; |
@@ -1250,6 +1311,7 @@ int module_add_modinfo_attrs(struct module *mod) | |||
1250 | if (!attr->test || | 1311 | if (!attr->test || |
1251 | (attr->test && attr->test(mod))) { | 1312 | (attr->test && attr->test(mod))) { |
1252 | memcpy(temp_attr, attr, sizeof(*temp_attr)); | 1313 | memcpy(temp_attr, attr, sizeof(*temp_attr)); |
1314 | sysfs_attr_init(&temp_attr->attr); | ||
1253 | error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr); | 1315 | error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr); |
1254 | ++temp_attr; | 1316 | ++temp_attr; |
1255 | } | 1317 | } |
@@ -1395,11 +1457,10 @@ static void free_module(struct module *mod) | |||
1395 | /* This may be NULL, but that's OK */ | 1457 | /* This may be NULL, but that's OK */ |
1396 | module_free(mod, mod->module_init); | 1458 | module_free(mod, mod->module_init); |
1397 | kfree(mod->args); | 1459 | kfree(mod->args); |
1398 | if (mod->percpu) | 1460 | percpu_modfree(mod); |
1399 | percpu_modfree(mod->percpu); | 1461 | #if defined(CONFIG_MODULE_UNLOAD) |
1400 | #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) | ||
1401 | if (mod->refptr) | 1462 | if (mod->refptr) |
1402 | percpu_modfree(mod->refptr); | 1463 | free_percpu(mod->refptr); |
1403 | #endif | 1464 | #endif |
1404 | /* Free lock-classes: */ | 1465 | /* Free lock-classes: */ |
1405 | lockdep_free_key_range(mod->module_core, mod->core_size); | 1466 | lockdep_free_key_range(mod->module_core, mod->core_size); |
@@ -1515,7 +1576,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs, | |||
1515 | default: | 1576 | default: |
1516 | /* Divert to percpu allocation if a percpu var. */ | 1577 | /* Divert to percpu allocation if a percpu var. */ |
1517 | if (sym[i].st_shndx == pcpuindex) | 1578 | if (sym[i].st_shndx == pcpuindex) |
1518 | secbase = (unsigned long)mod->percpu; | 1579 | secbase = (unsigned long)mod_percpu(mod); |
1519 | else | 1580 | else |
1520 | secbase = sechdrs[sym[i].st_shndx].sh_addr; | 1581 | secbase = sechdrs[sym[i].st_shndx].sh_addr; |
1521 | sym[i].st_value += secbase; | 1582 | sym[i].st_value += secbase; |
@@ -1949,7 +2010,7 @@ static noinline struct module *load_module(void __user *umod, | |||
1949 | unsigned int modindex, versindex, infoindex, pcpuindex; | 2010 | unsigned int modindex, versindex, infoindex, pcpuindex; |
1950 | struct module *mod; | 2011 | struct module *mod; |
1951 | long err = 0; | 2012 | long err = 0; |
1952 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ | 2013 | void *ptr = NULL; /* Stops spurious gcc warning */ |
1953 | unsigned long symoffs, stroffs, *strmap; | 2014 | unsigned long symoffs, stroffs, *strmap; |
1954 | 2015 | ||
1955 | mm_segment_t old_fs; | 2016 | mm_segment_t old_fs; |
@@ -2089,15 +2150,11 @@ static noinline struct module *load_module(void __user *umod, | |||
2089 | 2150 | ||
2090 | if (pcpuindex) { | 2151 | if (pcpuindex) { |
2091 | /* We have a special allocation for this section. */ | 2152 | /* We have a special allocation for this section. */ |
2092 | percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, | 2153 | err = percpu_modalloc(mod, sechdrs[pcpuindex].sh_size, |
2093 | sechdrs[pcpuindex].sh_addralign, | 2154 | sechdrs[pcpuindex].sh_addralign); |
2094 | mod->name); | 2155 | if (err) |
2095 | if (!percpu) { | ||
2096 | err = -ENOMEM; | ||
2097 | goto free_mod; | 2156 | goto free_mod; |
2098 | } | ||
2099 | sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; | 2157 | sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; |
2100 | mod->percpu = percpu; | ||
2101 | } | 2158 | } |
2102 | 2159 | ||
2103 | /* Determine total sizes, and put offsets in sh_entsize. For now | 2160 | /* Determine total sizes, and put offsets in sh_entsize. For now |
@@ -2162,9 +2219,8 @@ static noinline struct module *load_module(void __user *umod, | |||
2162 | mod = (void *)sechdrs[modindex].sh_addr; | 2219 | mod = (void *)sechdrs[modindex].sh_addr; |
2163 | kmemleak_load_module(mod, hdr, sechdrs, secstrings); | 2220 | kmemleak_load_module(mod, hdr, sechdrs, secstrings); |
2164 | 2221 | ||
2165 | #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) | 2222 | #if defined(CONFIG_MODULE_UNLOAD) |
2166 | mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), | 2223 | mod->refptr = alloc_percpu(struct module_ref); |
2167 | mod->name); | ||
2168 | if (!mod->refptr) { | 2224 | if (!mod->refptr) { |
2169 | err = -ENOMEM; | 2225 | err = -ENOMEM; |
2170 | goto free_init; | 2226 | goto free_init; |
@@ -2313,7 +2369,7 @@ static noinline struct module *load_module(void __user *umod, | |||
2313 | sort_extable(mod->extable, mod->extable + mod->num_exentries); | 2369 | sort_extable(mod->extable, mod->extable + mod->num_exentries); |
2314 | 2370 | ||
2315 | /* Finally, copy percpu area over. */ | 2371 | /* Finally, copy percpu area over. */ |
2316 | percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, | 2372 | percpu_modcopy(mod, (void *)sechdrs[pcpuindex].sh_addr, |
2317 | sechdrs[pcpuindex].sh_size); | 2373 | sechdrs[pcpuindex].sh_size); |
2318 | 2374 | ||
2319 | add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, | 2375 | add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, |
@@ -2396,8 +2452,8 @@ static noinline struct module *load_module(void __user *umod, | |||
2396 | kobject_put(&mod->mkobj.kobj); | 2452 | kobject_put(&mod->mkobj.kobj); |
2397 | free_unload: | 2453 | free_unload: |
2398 | module_unload_free(mod); | 2454 | module_unload_free(mod); |
2399 | #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) | 2455 | #if defined(CONFIG_MODULE_UNLOAD) |
2400 | percpu_modfree(mod->refptr); | 2456 | free_percpu(mod->refptr); |
2401 | free_init: | 2457 | free_init: |
2402 | #endif | 2458 | #endif |
2403 | module_free(mod, mod->module_init); | 2459 | module_free(mod, mod->module_init); |
@@ -2405,8 +2461,7 @@ static noinline struct module *load_module(void __user *umod, | |||
2405 | module_free(mod, mod->module_core); | 2461 | module_free(mod, mod->module_core); |
2406 | /* mod will be freed with core. Don't access it beyond this line! */ | 2462 | /* mod will be freed with core. Don't access it beyond this line! */ |
2407 | free_percpu: | 2463 | free_percpu: |
2408 | if (percpu) | 2464 | percpu_modfree(mod); |
2409 | percpu_modfree(percpu); | ||
2410 | free_mod: | 2465 | free_mod: |
2411 | kfree(args); | 2466 | kfree(args); |
2412 | kfree(strmap); | 2467 | kfree(strmap); |