diff options
Diffstat (limited to 'kernel/module.c')
-rw-r--r-- | kernel/module.c | 137 |
1 files changed, 99 insertions, 38 deletions
diff --git a/kernel/module.c b/kernel/module.c index d9e237926b69..b8a1e313448c 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -368,27 +368,33 @@ EXPORT_SYMBOL_GPL(find_module); | |||
368 | 368 | ||
369 | #ifdef CONFIG_SMP | 369 | #ifdef CONFIG_SMP |
370 | 370 | ||
371 | static void *percpu_modalloc(unsigned long size, unsigned long align, | 371 | static inline void __percpu *mod_percpu(struct module *mod) |
372 | const char *name) | ||
373 | { | 372 | { |
374 | void *ptr; | 373 | return mod->percpu; |
374 | } | ||
375 | 375 | ||
376 | static int percpu_modalloc(struct module *mod, | ||
377 | unsigned long size, unsigned long align) | ||
378 | { | ||
376 | if (align > PAGE_SIZE) { | 379 | if (align > PAGE_SIZE) { |
377 | printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", | 380 | printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", |
378 | name, align, PAGE_SIZE); | 381 | mod->name, align, PAGE_SIZE); |
379 | align = PAGE_SIZE; | 382 | align = PAGE_SIZE; |
380 | } | 383 | } |
381 | 384 | ||
382 | ptr = __alloc_reserved_percpu(size, align); | 385 | mod->percpu = __alloc_reserved_percpu(size, align); |
383 | if (!ptr) | 386 | if (!mod->percpu) { |
384 | printk(KERN_WARNING | 387 | printk(KERN_WARNING |
385 | "Could not allocate %lu bytes percpu data\n", size); | 388 | "Could not allocate %lu bytes percpu data\n", size); |
386 | return ptr; | 389 | return -ENOMEM; |
390 | } | ||
391 | mod->percpu_size = size; | ||
392 | return 0; | ||
387 | } | 393 | } |
388 | 394 | ||
389 | static void percpu_modfree(void *freeme) | 395 | static void percpu_modfree(struct module *mod) |
390 | { | 396 | { |
391 | free_percpu(freeme); | 397 | free_percpu(mod->percpu); |
392 | } | 398 | } |
393 | 399 | ||
394 | static unsigned int find_pcpusec(Elf_Ehdr *hdr, | 400 | static unsigned int find_pcpusec(Elf_Ehdr *hdr, |
@@ -398,24 +404,62 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr, | |||
398 | return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); | 404 | return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); |
399 | } | 405 | } |
400 | 406 | ||
401 | static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) | 407 | static void percpu_modcopy(struct module *mod, |
408 | const void *from, unsigned long size) | ||
402 | { | 409 | { |
403 | int cpu; | 410 | int cpu; |
404 | 411 | ||
405 | for_each_possible_cpu(cpu) | 412 | for_each_possible_cpu(cpu) |
406 | memcpy(pcpudest + per_cpu_offset(cpu), from, size); | 413 | memcpy(per_cpu_ptr(mod->percpu, cpu), from, size); |
414 | } | ||
415 | |||
416 | /** | ||
417 | * is_module_percpu_address - test whether address is from module static percpu | ||
418 | * @addr: address to test | ||
419 | * | ||
420 | * Test whether @addr belongs to module static percpu area. | ||
421 | * | ||
422 | * RETURNS: | ||
423 | * %true if @addr is from module static percpu area | ||
424 | */ | ||
425 | bool is_module_percpu_address(unsigned long addr) | ||
426 | { | ||
427 | struct module *mod; | ||
428 | unsigned int cpu; | ||
429 | |||
430 | preempt_disable(); | ||
431 | |||
432 | list_for_each_entry_rcu(mod, &modules, list) { | ||
433 | if (!mod->percpu_size) | ||
434 | continue; | ||
435 | for_each_possible_cpu(cpu) { | ||
436 | void *start = per_cpu_ptr(mod->percpu, cpu); | ||
437 | |||
438 | if ((void *)addr >= start && | ||
439 | (void *)addr < start + mod->percpu_size) { | ||
440 | preempt_enable(); | ||
441 | return true; | ||
442 | } | ||
443 | } | ||
444 | } | ||
445 | |||
446 | preempt_enable(); | ||
447 | return false; | ||
407 | } | 448 | } |
408 | 449 | ||
409 | #else /* ... !CONFIG_SMP */ | 450 | #else /* ... !CONFIG_SMP */ |
410 | 451 | ||
411 | static inline void *percpu_modalloc(unsigned long size, unsigned long align, | 452 | static inline void __percpu *mod_percpu(struct module *mod) |
412 | const char *name) | ||
413 | { | 453 | { |
414 | return NULL; | 454 | return NULL; |
415 | } | 455 | } |
416 | static inline void percpu_modfree(void *pcpuptr) | 456 | static inline int percpu_modalloc(struct module *mod, |
457 | unsigned long size, unsigned long align) | ||
458 | { | ||
459 | return -ENOMEM; | ||
460 | } | ||
461 | static inline void percpu_modfree(struct module *mod) | ||
417 | { | 462 | { |
418 | BUG(); | ||
419 | } | 463 | } |
420 | static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, | 464 | static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, |
421 | Elf_Shdr *sechdrs, | 465 | Elf_Shdr *sechdrs, |
@@ -423,12 +467,16 @@ static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, | |||
423 | { | 467 | { |
424 | return 0; | 468 | return 0; |
425 | } | 469 | } |
426 | static inline void percpu_modcopy(void *pcpudst, const void *src, | 470 | static inline void percpu_modcopy(struct module *mod, |
427 | unsigned long size) | 471 | const void *from, unsigned long size) |
428 | { | 472 | { |
429 | /* pcpusec should be 0, and size of that section should be 0. */ | 473 | /* pcpusec should be 0, and size of that section should be 0. */ |
430 | BUG_ON(size != 0); | 474 | BUG_ON(size != 0); |
431 | } | 475 | } |
476 | bool is_module_percpu_address(unsigned long addr) | ||
477 | { | ||
478 | return false; | ||
479 | } | ||
432 | 480 | ||
433 | #endif /* CONFIG_SMP */ | 481 | #endif /* CONFIG_SMP */ |
434 | 482 | ||
@@ -474,11 +522,13 @@ static void module_unload_init(struct module *mod) | |||
474 | int cpu; | 522 | int cpu; |
475 | 523 | ||
476 | INIT_LIST_HEAD(&mod->modules_which_use_me); | 524 | INIT_LIST_HEAD(&mod->modules_which_use_me); |
477 | for_each_possible_cpu(cpu) | 525 | for_each_possible_cpu(cpu) { |
478 | per_cpu_ptr(mod->refptr, cpu)->count = 0; | 526 | per_cpu_ptr(mod->refptr, cpu)->incs = 0; |
527 | per_cpu_ptr(mod->refptr, cpu)->decs = 0; | ||
528 | } | ||
479 | 529 | ||
480 | /* Hold reference count during initialization. */ | 530 | /* Hold reference count during initialization. */ |
481 | __this_cpu_write(mod->refptr->count, 1); | 531 | __this_cpu_write(mod->refptr->incs, 1); |
482 | /* Backwards compatibility macros put refcount during init. */ | 532 | /* Backwards compatibility macros put refcount during init. */ |
483 | mod->waiter = current; | 533 | mod->waiter = current; |
484 | } | 534 | } |
@@ -617,12 +667,28 @@ static int try_stop_module(struct module *mod, int flags, int *forced) | |||
617 | 667 | ||
618 | unsigned int module_refcount(struct module *mod) | 668 | unsigned int module_refcount(struct module *mod) |
619 | { | 669 | { |
620 | unsigned int total = 0; | 670 | unsigned int incs = 0, decs = 0; |
621 | int cpu; | 671 | int cpu; |
622 | 672 | ||
623 | for_each_possible_cpu(cpu) | 673 | for_each_possible_cpu(cpu) |
624 | total += per_cpu_ptr(mod->refptr, cpu)->count; | 674 | decs += per_cpu_ptr(mod->refptr, cpu)->decs; |
625 | return total; | 675 | /* |
676 | * ensure the incs are added up after the decs. | ||
677 | * module_put ensures incs are visible before decs with smp_wmb. | ||
678 | * | ||
679 | * This 2-count scheme avoids the situation where the refcount | ||
680 | * for CPU0 is read, then CPU0 increments the module refcount, | ||
681 | * then CPU1 drops that refcount, then the refcount for CPU1 is | ||
682 | * read. We would record a decrement but not its corresponding | ||
683 | * increment so we would see a low count (disaster). | ||
684 | * | ||
685 | * Rare situation? But module_refcount can be preempted, and we | ||
686 | * might be tallying up 4096+ CPUs. So it is not impossible. | ||
687 | */ | ||
688 | smp_rmb(); | ||
689 | for_each_possible_cpu(cpu) | ||
690 | incs += per_cpu_ptr(mod->refptr, cpu)->incs; | ||
691 | return incs - decs; | ||
626 | } | 692 | } |
627 | EXPORT_SYMBOL(module_refcount); | 693 | EXPORT_SYMBOL(module_refcount); |
628 | 694 | ||
@@ -799,7 +865,8 @@ void module_put(struct module *module) | |||
799 | { | 865 | { |
800 | if (module) { | 866 | if (module) { |
801 | preempt_disable(); | 867 | preempt_disable(); |
802 | __this_cpu_dec(module->refptr->count); | 868 | smp_wmb(); /* see comment in module_refcount */ |
869 | __this_cpu_inc(module->refptr->decs); | ||
803 | 870 | ||
804 | trace_module_put(module, _RET_IP_); | 871 | trace_module_put(module, _RET_IP_); |
805 | /* Maybe they're waiting for us to drop reference? */ | 872 | /* Maybe they're waiting for us to drop reference? */ |
@@ -1400,8 +1467,7 @@ static void free_module(struct module *mod) | |||
1400 | /* This may be NULL, but that's OK */ | 1467 | /* This may be NULL, but that's OK */ |
1401 | module_free(mod, mod->module_init); | 1468 | module_free(mod, mod->module_init); |
1402 | kfree(mod->args); | 1469 | kfree(mod->args); |
1403 | if (mod->percpu) | 1470 | percpu_modfree(mod); |
1404 | percpu_modfree(mod->percpu); | ||
1405 | #if defined(CONFIG_MODULE_UNLOAD) | 1471 | #if defined(CONFIG_MODULE_UNLOAD) |
1406 | if (mod->refptr) | 1472 | if (mod->refptr) |
1407 | free_percpu(mod->refptr); | 1473 | free_percpu(mod->refptr); |
@@ -1520,7 +1586,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs, | |||
1520 | default: | 1586 | default: |
1521 | /* Divert to percpu allocation if a percpu var. */ | 1587 | /* Divert to percpu allocation if a percpu var. */ |
1522 | if (sym[i].st_shndx == pcpuindex) | 1588 | if (sym[i].st_shndx == pcpuindex) |
1523 | secbase = (unsigned long)mod->percpu; | 1589 | secbase = (unsigned long)mod_percpu(mod); |
1524 | else | 1590 | else |
1525 | secbase = sechdrs[sym[i].st_shndx].sh_addr; | 1591 | secbase = sechdrs[sym[i].st_shndx].sh_addr; |
1526 | sym[i].st_value += secbase; | 1592 | sym[i].st_value += secbase; |
@@ -1954,7 +2020,7 @@ static noinline struct module *load_module(void __user *umod, | |||
1954 | unsigned int modindex, versindex, infoindex, pcpuindex; | 2020 | unsigned int modindex, versindex, infoindex, pcpuindex; |
1955 | struct module *mod; | 2021 | struct module *mod; |
1956 | long err = 0; | 2022 | long err = 0; |
1957 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ | 2023 | void *ptr = NULL; /* Stops spurious gcc warning */ |
1958 | unsigned long symoffs, stroffs, *strmap; | 2024 | unsigned long symoffs, stroffs, *strmap; |
1959 | 2025 | ||
1960 | mm_segment_t old_fs; | 2026 | mm_segment_t old_fs; |
@@ -2094,15 +2160,11 @@ static noinline struct module *load_module(void __user *umod, | |||
2094 | 2160 | ||
2095 | if (pcpuindex) { | 2161 | if (pcpuindex) { |
2096 | /* We have a special allocation for this section. */ | 2162 | /* We have a special allocation for this section. */ |
2097 | percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, | 2163 | err = percpu_modalloc(mod, sechdrs[pcpuindex].sh_size, |
2098 | sechdrs[pcpuindex].sh_addralign, | 2164 | sechdrs[pcpuindex].sh_addralign); |
2099 | mod->name); | 2165 | if (err) |
2100 | if (!percpu) { | ||
2101 | err = -ENOMEM; | ||
2102 | goto free_mod; | 2166 | goto free_mod; |
2103 | } | ||
2104 | sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; | 2167 | sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; |
2105 | mod->percpu = percpu; | ||
2106 | } | 2168 | } |
2107 | 2169 | ||
2108 | /* Determine total sizes, and put offsets in sh_entsize. For now | 2170 | /* Determine total sizes, and put offsets in sh_entsize. For now |
@@ -2317,7 +2379,7 @@ static noinline struct module *load_module(void __user *umod, | |||
2317 | sort_extable(mod->extable, mod->extable + mod->num_exentries); | 2379 | sort_extable(mod->extable, mod->extable + mod->num_exentries); |
2318 | 2380 | ||
2319 | /* Finally, copy percpu area over. */ | 2381 | /* Finally, copy percpu area over. */ |
2320 | percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, | 2382 | percpu_modcopy(mod, (void *)sechdrs[pcpuindex].sh_addr, |
2321 | sechdrs[pcpuindex].sh_size); | 2383 | sechdrs[pcpuindex].sh_size); |
2322 | 2384 | ||
2323 | add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, | 2385 | add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, |
@@ -2409,8 +2471,7 @@ static noinline struct module *load_module(void __user *umod, | |||
2409 | module_free(mod, mod->module_core); | 2471 | module_free(mod, mod->module_core); |
2410 | /* mod will be freed with core. Don't access it beyond this line! */ | 2472 | /* mod will be freed with core. Don't access it beyond this line! */ |
2411 | free_percpu: | 2473 | free_percpu: |
2412 | if (percpu) | 2474 | percpu_modfree(mod); |
2413 | percpu_modfree(percpu); | ||
2414 | free_mod: | 2475 | free_mod: |
2415 | kfree(args); | 2476 | kfree(args); |
2416 | kfree(strmap); | 2477 | kfree(strmap); |