diff options
| author | H. Peter Anvin <hpa@zytor.com> | 2010-05-08 17:59:58 -0400 |
|---|---|---|
| committer | H. Peter Anvin <hpa@zytor.com> | 2010-05-08 17:59:58 -0400 |
| commit | d7be0ce6afb1df60bc786f57410407ceae92b994 (patch) | |
| tree | 5e91acfc12c833531ad3320f274e0cd96a129973 /kernel/module.c | |
| parent | e08cae4181af9483b04ecfac48f01c8e5a5f27bf (diff) | |
| parent | 66f41d4c5c8a5deed66fdcc84509376c9a0bf9d8 (diff) | |
Merge commit 'v2.6.34-rc6' into x86/cpu
Diffstat (limited to 'kernel/module.c')
| -rw-r--r-- | kernel/module.c | 139 |
1 files changed, 100 insertions, 39 deletions
diff --git a/kernel/module.c b/kernel/module.c index c968d3606dca..1016b75b026a 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -370,27 +370,33 @@ EXPORT_SYMBOL_GPL(find_module); | |||
| 370 | 370 | ||
| 371 | #ifdef CONFIG_SMP | 371 | #ifdef CONFIG_SMP |
| 372 | 372 | ||
| 373 | static void *percpu_modalloc(unsigned long size, unsigned long align, | 373 | static inline void __percpu *mod_percpu(struct module *mod) |
| 374 | const char *name) | ||
| 375 | { | 374 | { |
| 376 | void *ptr; | 375 | return mod->percpu; |
| 376 | } | ||
| 377 | 377 | ||
| 378 | static int percpu_modalloc(struct module *mod, | ||
| 379 | unsigned long size, unsigned long align) | ||
| 380 | { | ||
| 378 | if (align > PAGE_SIZE) { | 381 | if (align > PAGE_SIZE) { |
| 379 | printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", | 382 | printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", |
| 380 | name, align, PAGE_SIZE); | 383 | mod->name, align, PAGE_SIZE); |
| 381 | align = PAGE_SIZE; | 384 | align = PAGE_SIZE; |
| 382 | } | 385 | } |
| 383 | 386 | ||
| 384 | ptr = __alloc_reserved_percpu(size, align); | 387 | mod->percpu = __alloc_reserved_percpu(size, align); |
| 385 | if (!ptr) | 388 | if (!mod->percpu) { |
| 386 | printk(KERN_WARNING | 389 | printk(KERN_WARNING |
| 387 | "Could not allocate %lu bytes percpu data\n", size); | 390 | "Could not allocate %lu bytes percpu data\n", size); |
| 388 | return ptr; | 391 | return -ENOMEM; |
| 392 | } | ||
| 393 | mod->percpu_size = size; | ||
| 394 | return 0; | ||
| 389 | } | 395 | } |
| 390 | 396 | ||
| 391 | static void percpu_modfree(void *freeme) | 397 | static void percpu_modfree(struct module *mod) |
| 392 | { | 398 | { |
| 393 | free_percpu(freeme); | 399 | free_percpu(mod->percpu); |
| 394 | } | 400 | } |
| 395 | 401 | ||
| 396 | static unsigned int find_pcpusec(Elf_Ehdr *hdr, | 402 | static unsigned int find_pcpusec(Elf_Ehdr *hdr, |
| @@ -400,24 +406,62 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr, | |||
| 400 | return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); | 406 | return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); |
| 401 | } | 407 | } |
| 402 | 408 | ||
| 403 | static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) | 409 | static void percpu_modcopy(struct module *mod, |
| 410 | const void *from, unsigned long size) | ||
| 404 | { | 411 | { |
| 405 | int cpu; | 412 | int cpu; |
| 406 | 413 | ||
| 407 | for_each_possible_cpu(cpu) | 414 | for_each_possible_cpu(cpu) |
| 408 | memcpy(pcpudest + per_cpu_offset(cpu), from, size); | 415 | memcpy(per_cpu_ptr(mod->percpu, cpu), from, size); |
| 416 | } | ||
| 417 | |||
| 418 | /** | ||
| 419 | * is_module_percpu_address - test whether address is from module static percpu | ||
| 420 | * @addr: address to test | ||
| 421 | * | ||
| 422 | * Test whether @addr belongs to module static percpu area. | ||
| 423 | * | ||
| 424 | * RETURNS: | ||
| 425 | * %true if @addr is from module static percpu area | ||
| 426 | */ | ||
| 427 | bool is_module_percpu_address(unsigned long addr) | ||
| 428 | { | ||
| 429 | struct module *mod; | ||
| 430 | unsigned int cpu; | ||
| 431 | |||
| 432 | preempt_disable(); | ||
| 433 | |||
| 434 | list_for_each_entry_rcu(mod, &modules, list) { | ||
| 435 | if (!mod->percpu_size) | ||
| 436 | continue; | ||
| 437 | for_each_possible_cpu(cpu) { | ||
| 438 | void *start = per_cpu_ptr(mod->percpu, cpu); | ||
| 439 | |||
| 440 | if ((void *)addr >= start && | ||
| 441 | (void *)addr < start + mod->percpu_size) { | ||
| 442 | preempt_enable(); | ||
| 443 | return true; | ||
| 444 | } | ||
| 445 | } | ||
| 446 | } | ||
| 447 | |||
| 448 | preempt_enable(); | ||
| 449 | return false; | ||
| 409 | } | 450 | } |
| 410 | 451 | ||
| 411 | #else /* ... !CONFIG_SMP */ | 452 | #else /* ... !CONFIG_SMP */ |
| 412 | 453 | ||
| 413 | static inline void *percpu_modalloc(unsigned long size, unsigned long align, | 454 | static inline void __percpu *mod_percpu(struct module *mod) |
| 414 | const char *name) | ||
| 415 | { | 455 | { |
| 416 | return NULL; | 456 | return NULL; |
| 417 | } | 457 | } |
| 418 | static inline void percpu_modfree(void *pcpuptr) | 458 | static inline int percpu_modalloc(struct module *mod, |
| 459 | unsigned long size, unsigned long align) | ||
| 460 | { | ||
| 461 | return -ENOMEM; | ||
| 462 | } | ||
| 463 | static inline void percpu_modfree(struct module *mod) | ||
| 419 | { | 464 | { |
| 420 | BUG(); | ||
| 421 | } | 465 | } |
| 422 | static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, | 466 | static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, |
| 423 | Elf_Shdr *sechdrs, | 467 | Elf_Shdr *sechdrs, |
| @@ -425,12 +469,16 @@ static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, | |||
| 425 | { | 469 | { |
| 426 | return 0; | 470 | return 0; |
| 427 | } | 471 | } |
| 428 | static inline void percpu_modcopy(void *pcpudst, const void *src, | 472 | static inline void percpu_modcopy(struct module *mod, |
| 429 | unsigned long size) | 473 | const void *from, unsigned long size) |
| 430 | { | 474 | { |
| 431 | /* pcpusec should be 0, and size of that section should be 0. */ | 475 | /* pcpusec should be 0, and size of that section should be 0. */ |
| 432 | BUG_ON(size != 0); | 476 | BUG_ON(size != 0); |
| 433 | } | 477 | } |
| 478 | bool is_module_percpu_address(unsigned long addr) | ||
| 479 | { | ||
| 480 | return false; | ||
| 481 | } | ||
| 434 | 482 | ||
| 435 | #endif /* CONFIG_SMP */ | 483 | #endif /* CONFIG_SMP */ |
| 436 | 484 | ||
| @@ -473,11 +521,13 @@ static void module_unload_init(struct module *mod) | |||
| 473 | int cpu; | 521 | int cpu; |
| 474 | 522 | ||
| 475 | INIT_LIST_HEAD(&mod->modules_which_use_me); | 523 | INIT_LIST_HEAD(&mod->modules_which_use_me); |
| 476 | for_each_possible_cpu(cpu) | 524 | for_each_possible_cpu(cpu) { |
| 477 | per_cpu_ptr(mod->refptr, cpu)->count = 0; | 525 | per_cpu_ptr(mod->refptr, cpu)->incs = 0; |
| 526 | per_cpu_ptr(mod->refptr, cpu)->decs = 0; | ||
| 527 | } | ||
| 478 | 528 | ||
| 479 | /* Hold reference count during initialization. */ | 529 | /* Hold reference count during initialization. */ |
| 480 | __this_cpu_write(mod->refptr->count, 1); | 530 | __this_cpu_write(mod->refptr->incs, 1); |
| 481 | /* Backwards compatibility macros put refcount during init. */ | 531 | /* Backwards compatibility macros put refcount during init. */ |
| 482 | mod->waiter = current; | 532 | mod->waiter = current; |
| 483 | } | 533 | } |
| @@ -616,12 +666,28 @@ static int try_stop_module(struct module *mod, int flags, int *forced) | |||
| 616 | 666 | ||
| 617 | unsigned int module_refcount(struct module *mod) | 667 | unsigned int module_refcount(struct module *mod) |
| 618 | { | 668 | { |
| 619 | unsigned int total = 0; | 669 | unsigned int incs = 0, decs = 0; |
| 620 | int cpu; | 670 | int cpu; |
| 621 | 671 | ||
| 622 | for_each_possible_cpu(cpu) | 672 | for_each_possible_cpu(cpu) |
| 623 | total += per_cpu_ptr(mod->refptr, cpu)->count; | 673 | decs += per_cpu_ptr(mod->refptr, cpu)->decs; |
| 624 | return total; | 674 | /* |
| 675 | * ensure the incs are added up after the decs. | ||
| 676 | * module_put ensures incs are visible before decs with smp_wmb. | ||
| 677 | * | ||
| 678 | * This 2-count scheme avoids the situation where the refcount | ||
| 679 | * for CPU0 is read, then CPU0 increments the module refcount, | ||
| 680 | * then CPU1 drops that refcount, then the refcount for CPU1 is | ||
| 681 | * read. We would record a decrement but not its corresponding | ||
| 682 | * increment so we would see a low count (disaster). | ||
| 683 | * | ||
| 684 | * Rare situation? But module_refcount can be preempted, and we | ||
| 685 | * might be tallying up 4096+ CPUs. So it is not impossible. | ||
| 686 | */ | ||
| 687 | smp_rmb(); | ||
| 688 | for_each_possible_cpu(cpu) | ||
| 689 | incs += per_cpu_ptr(mod->refptr, cpu)->incs; | ||
| 690 | return incs - decs; | ||
| 625 | } | 691 | } |
| 626 | EXPORT_SYMBOL(module_refcount); | 692 | EXPORT_SYMBOL(module_refcount); |
| 627 | 693 | ||
| @@ -798,10 +864,11 @@ void module_put(struct module *module) | |||
| 798 | { | 864 | { |
| 799 | if (module) { | 865 | if (module) { |
| 800 | preempt_disable(); | 866 | preempt_disable(); |
| 801 | __this_cpu_dec(module->refptr->count); | 867 | smp_wmb(); /* see comment in module_refcount */ |
| 868 | __this_cpu_inc(module->refptr->decs); | ||
| 802 | 869 | ||
| 803 | trace_module_put(module, _RET_IP_, | 870 | trace_module_put(module, _RET_IP_, |
| 804 | __this_cpu_read(module->refptr->count)); | 871 | __this_cpu_read(module->refptr->decs)); |
| 805 | /* Maybe they're waiting for us to drop reference? */ | 872 | /* Maybe they're waiting for us to drop reference? */ |
| 806 | if (unlikely(!module_is_live(module))) | 873 | if (unlikely(!module_is_live(module))) |
| 807 | wake_up_process(module->waiter); | 874 | wake_up_process(module->waiter); |
| @@ -1400,8 +1467,7 @@ static void free_module(struct module *mod) | |||
| 1400 | /* This may be NULL, but that's OK */ | 1467 | /* This may be NULL, but that's OK */ |
| 1401 | module_free(mod, mod->module_init); | 1468 | module_free(mod, mod->module_init); |
| 1402 | kfree(mod->args); | 1469 | kfree(mod->args); |
| 1403 | if (mod->percpu) | 1470 | percpu_modfree(mod); |
| 1404 | percpu_modfree(mod->percpu); | ||
| 1405 | #if defined(CONFIG_MODULE_UNLOAD) | 1471 | #if defined(CONFIG_MODULE_UNLOAD) |
| 1406 | if (mod->refptr) | 1472 | if (mod->refptr) |
| 1407 | free_percpu(mod->refptr); | 1473 | free_percpu(mod->refptr); |
| @@ -1520,7 +1586,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs, | |||
| 1520 | default: | 1586 | default: |
| 1521 | /* Divert to percpu allocation if a percpu var. */ | 1587 | /* Divert to percpu allocation if a percpu var. */ |
| 1522 | if (sym[i].st_shndx == pcpuindex) | 1588 | if (sym[i].st_shndx == pcpuindex) |
| 1523 | secbase = (unsigned long)mod->percpu; | 1589 | secbase = (unsigned long)mod_percpu(mod); |
| 1524 | else | 1590 | else |
| 1525 | secbase = sechdrs[sym[i].st_shndx].sh_addr; | 1591 | secbase = sechdrs[sym[i].st_shndx].sh_addr; |
| 1526 | sym[i].st_value += secbase; | 1592 | sym[i].st_value += secbase; |
| @@ -1954,7 +2020,7 @@ static noinline struct module *load_module(void __user *umod, | |||
| 1954 | unsigned int modindex, versindex, infoindex, pcpuindex; | 2020 | unsigned int modindex, versindex, infoindex, pcpuindex; |
| 1955 | struct module *mod; | 2021 | struct module *mod; |
| 1956 | long err = 0; | 2022 | long err = 0; |
| 1957 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ | 2023 | void *ptr = NULL; /* Stops spurious gcc warning */ |
| 1958 | unsigned long symoffs, stroffs, *strmap; | 2024 | unsigned long symoffs, stroffs, *strmap; |
| 1959 | 2025 | ||
| 1960 | mm_segment_t old_fs; | 2026 | mm_segment_t old_fs; |
| @@ -2094,15 +2160,11 @@ static noinline struct module *load_module(void __user *umod, | |||
| 2094 | 2160 | ||
| 2095 | if (pcpuindex) { | 2161 | if (pcpuindex) { |
| 2096 | /* We have a special allocation for this section. */ | 2162 | /* We have a special allocation for this section. */ |
| 2097 | percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, | 2163 | err = percpu_modalloc(mod, sechdrs[pcpuindex].sh_size, |
| 2098 | sechdrs[pcpuindex].sh_addralign, | 2164 | sechdrs[pcpuindex].sh_addralign); |
| 2099 | mod->name); | 2165 | if (err) |
| 2100 | if (!percpu) { | ||
| 2101 | err = -ENOMEM; | ||
| 2102 | goto free_mod; | 2166 | goto free_mod; |
| 2103 | } | ||
| 2104 | sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; | 2167 | sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; |
| 2105 | mod->percpu = percpu; | ||
| 2106 | } | 2168 | } |
| 2107 | 2169 | ||
| 2108 | /* Determine total sizes, and put offsets in sh_entsize. For now | 2170 | /* Determine total sizes, and put offsets in sh_entsize. For now |
| @@ -2317,7 +2379,7 @@ static noinline struct module *load_module(void __user *umod, | |||
| 2317 | sort_extable(mod->extable, mod->extable + mod->num_exentries); | 2379 | sort_extable(mod->extable, mod->extable + mod->num_exentries); |
| 2318 | 2380 | ||
| 2319 | /* Finally, copy percpu area over. */ | 2381 | /* Finally, copy percpu area over. */ |
| 2320 | percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, | 2382 | percpu_modcopy(mod, (void *)sechdrs[pcpuindex].sh_addr, |
| 2321 | sechdrs[pcpuindex].sh_size); | 2383 | sechdrs[pcpuindex].sh_size); |
| 2322 | 2384 | ||
| 2323 | add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, | 2385 | add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, |
| @@ -2409,8 +2471,7 @@ static noinline struct module *load_module(void __user *umod, | |||
| 2409 | module_free(mod, mod->module_core); | 2471 | module_free(mod, mod->module_core); |
| 2410 | /* mod will be freed with core. Don't access it beyond this line! */ | 2472 | /* mod will be freed with core. Don't access it beyond this line! */ |
| 2411 | free_percpu: | 2473 | free_percpu: |
| 2412 | if (percpu) | 2474 | percpu_modfree(mod); |
| 2413 | percpu_modfree(percpu); | ||
| 2414 | free_mod: | 2475 | free_mod: |
| 2415 | kfree(args); | 2476 | kfree(args); |
| 2416 | kfree(strmap); | 2477 | kfree(strmap); |
