diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/cgroup.c | 2 | ||||
| -rw-r--r-- | kernel/cpu.c | 2 | ||||
| -rw-r--r-- | kernel/debug/kdb/kdb_main.c | 12 | ||||
| -rw-r--r-- | kernel/exec_domain.c | 18 | ||||
| -rw-r--r-- | kernel/futex.c | 17 | ||||
| -rw-r--r-- | kernel/irq/manage.c | 3 | ||||
| -rw-r--r-- | kernel/kexec.c | 7 | ||||
| -rw-r--r-- | kernel/module.c | 348 | ||||
| -rw-r--r-- | kernel/perf_event.c | 356 | ||||
| -rw-r--r-- | kernel/power/Kconfig | 9 | ||||
| -rw-r--r-- | kernel/power/Makefile | 2 | ||||
| -rw-r--r-- | kernel/power/nvs.c (renamed from kernel/power/hibernate_nvs.c) | 24 | ||||
| -rw-r--r-- | kernel/power/suspend.c | 6 | ||||
| -rw-r--r-- | kernel/sched.c | 161 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 24 | ||||
| -rw-r--r-- | kernel/softirq.c | 2 | ||||
| -rw-r--r-- | kernel/stop_machine.c | 2 | ||||
| -rw-r--r-- | kernel/sysctl.c | 8 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 21 | ||||
| -rw-r--r-- | kernel/timer.c | 2 | ||||
| -rw-r--r-- | kernel/trace/blktrace.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace_event_perf.c | 19 | ||||
| -rw-r--r-- | kernel/trace/trace_kprobe.c | 4 | ||||
| -rw-r--r-- | kernel/trace/trace_syscalls.c | 4 |
24 files changed, 633 insertions, 422 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 422cb19f156e..3ac6f5b0a64b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -4598,7 +4598,7 @@ static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent, | |||
| 4598 | parent_css = parent->subsys[subsys_id]; | 4598 | parent_css = parent->subsys[subsys_id]; |
| 4599 | child_css = child->subsys[subsys_id]; | 4599 | child_css = child->subsys[subsys_id]; |
| 4600 | parent_id = parent_css->id; | 4600 | parent_id = parent_css->id; |
| 4601 | depth = parent_id->depth; | 4601 | depth = parent_id->depth + 1; |
| 4602 | 4602 | ||
| 4603 | child_id = get_new_cssid(ss, depth); | 4603 | child_id = get_new_cssid(ss, depth); |
| 4604 | if (IS_ERR(child_id)) | 4604 | if (IS_ERR(child_id)) |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 8b92539b4754..97d1b426a4ac 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
| @@ -34,7 +34,7 @@ void cpu_maps_update_done(void) | |||
| 34 | mutex_unlock(&cpu_add_remove_lock); | 34 | mutex_unlock(&cpu_add_remove_lock); |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain); | 37 | static RAW_NOTIFIER_HEAD(cpu_chain); |
| 38 | 38 | ||
| 39 | /* If set, cpu_up and cpu_down will return -EBUSY and do nothing. | 39 | /* If set, cpu_up and cpu_down will return -EBUSY and do nothing. |
| 40 | * Should always be manipulated under cpu_add_remove_lock | 40 | * Should always be manipulated under cpu_add_remove_lock |
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index b724c791b6d4..184cd8209c36 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c | |||
| @@ -1857,12 +1857,6 @@ static int kdb_ef(int argc, const char **argv) | |||
| 1857 | } | 1857 | } |
| 1858 | 1858 | ||
| 1859 | #if defined(CONFIG_MODULES) | 1859 | #if defined(CONFIG_MODULES) |
| 1860 | /* modules using other modules */ | ||
| 1861 | struct module_use { | ||
| 1862 | struct list_head list; | ||
| 1863 | struct module *module_which_uses; | ||
| 1864 | }; | ||
| 1865 | |||
| 1866 | /* | 1860 | /* |
| 1867 | * kdb_lsmod - This function implements the 'lsmod' command. Lists | 1861 | * kdb_lsmod - This function implements the 'lsmod' command. Lists |
| 1868 | * currently loaded kernel modules. | 1862 | * currently loaded kernel modules. |
| @@ -1894,9 +1888,9 @@ static int kdb_lsmod(int argc, const char **argv) | |||
| 1894 | { | 1888 | { |
| 1895 | struct module_use *use; | 1889 | struct module_use *use; |
| 1896 | kdb_printf(" [ "); | 1890 | kdb_printf(" [ "); |
| 1897 | list_for_each_entry(use, &mod->modules_which_use_me, | 1891 | list_for_each_entry(use, &mod->source_list, |
| 1898 | list) | 1892 | source_list) |
| 1899 | kdb_printf("%s ", use->module_which_uses->name); | 1893 | kdb_printf("%s ", use->target->name); |
| 1900 | kdb_printf("]\n"); | 1894 | kdb_printf("]\n"); |
| 1901 | } | 1895 | } |
| 1902 | #endif | 1896 | #endif |
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index c35452cadded..dd62f8e714ca 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c | |||
| @@ -27,7 +27,7 @@ static struct exec_domain *exec_domains = &default_exec_domain; | |||
| 27 | static DEFINE_RWLOCK(exec_domains_lock); | 27 | static DEFINE_RWLOCK(exec_domains_lock); |
| 28 | 28 | ||
| 29 | 29 | ||
| 30 | static u_long ident_map[32] = { | 30 | static unsigned long ident_map[32] = { |
| 31 | 0, 1, 2, 3, 4, 5, 6, 7, | 31 | 0, 1, 2, 3, 4, 5, 6, 7, |
| 32 | 8, 9, 10, 11, 12, 13, 14, 15, | 32 | 8, 9, 10, 11, 12, 13, 14, 15, |
| 33 | 16, 17, 18, 19, 20, 21, 22, 23, | 33 | 16, 17, 18, 19, 20, 21, 22, 23, |
| @@ -56,10 +56,10 @@ default_handler(int segment, struct pt_regs *regp) | |||
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | static struct exec_domain * | 58 | static struct exec_domain * |
| 59 | lookup_exec_domain(u_long personality) | 59 | lookup_exec_domain(unsigned int personality) |
| 60 | { | 60 | { |
| 61 | struct exec_domain * ep; | 61 | unsigned int pers = personality(personality); |
| 62 | u_long pers = personality(personality); | 62 | struct exec_domain *ep; |
| 63 | 63 | ||
| 64 | read_lock(&exec_domains_lock); | 64 | read_lock(&exec_domains_lock); |
| 65 | for (ep = exec_domains; ep; ep = ep->next) { | 65 | for (ep = exec_domains; ep; ep = ep->next) { |
| @@ -70,7 +70,7 @@ lookup_exec_domain(u_long personality) | |||
| 70 | 70 | ||
| 71 | #ifdef CONFIG_MODULES | 71 | #ifdef CONFIG_MODULES |
| 72 | read_unlock(&exec_domains_lock); | 72 | read_unlock(&exec_domains_lock); |
| 73 | request_module("personality-%ld", pers); | 73 | request_module("personality-%d", pers); |
| 74 | read_lock(&exec_domains_lock); | 74 | read_lock(&exec_domains_lock); |
| 75 | 75 | ||
| 76 | for (ep = exec_domains; ep; ep = ep->next) { | 76 | for (ep = exec_domains; ep; ep = ep->next) { |
| @@ -135,7 +135,7 @@ unregister: | |||
| 135 | } | 135 | } |
| 136 | 136 | ||
| 137 | int | 137 | int |
| 138 | __set_personality(u_long personality) | 138 | __set_personality(unsigned int personality) |
| 139 | { | 139 | { |
| 140 | struct exec_domain *ep, *oep; | 140 | struct exec_domain *ep, *oep; |
| 141 | 141 | ||
| @@ -188,9 +188,9 @@ static int __init proc_execdomains_init(void) | |||
| 188 | module_init(proc_execdomains_init); | 188 | module_init(proc_execdomains_init); |
| 189 | #endif | 189 | #endif |
| 190 | 190 | ||
| 191 | SYSCALL_DEFINE1(personality, u_long, personality) | 191 | SYSCALL_DEFINE1(personality, unsigned int, personality) |
| 192 | { | 192 | { |
| 193 | u_long old = current->personality; | 193 | unsigned int old = current->personality; |
| 194 | 194 | ||
| 195 | if (personality != 0xffffffff) { | 195 | if (personality != 0xffffffff) { |
| 196 | set_personality(personality); | 196 | set_personality(personality); |
| @@ -198,7 +198,7 @@ SYSCALL_DEFINE1(personality, u_long, personality) | |||
| 198 | return -EINVAL; | 198 | return -EINVAL; |
| 199 | } | 199 | } |
| 200 | 200 | ||
| 201 | return (long)old; | 201 | return old; |
| 202 | } | 202 | } |
| 203 | 203 | ||
| 204 | 204 | ||
diff --git a/kernel/futex.c b/kernel/futex.c index e7a35f1039e7..6a3a5fa1526d 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -429,20 +429,11 @@ static void free_pi_state(struct futex_pi_state *pi_state) | |||
| 429 | static struct task_struct * futex_find_get_task(pid_t pid) | 429 | static struct task_struct * futex_find_get_task(pid_t pid) |
| 430 | { | 430 | { |
| 431 | struct task_struct *p; | 431 | struct task_struct *p; |
| 432 | const struct cred *cred = current_cred(), *pcred; | ||
| 433 | 432 | ||
| 434 | rcu_read_lock(); | 433 | rcu_read_lock(); |
| 435 | p = find_task_by_vpid(pid); | 434 | p = find_task_by_vpid(pid); |
| 436 | if (!p) { | 435 | if (p) |
| 437 | p = ERR_PTR(-ESRCH); | 436 | get_task_struct(p); |
| 438 | } else { | ||
| 439 | pcred = __task_cred(p); | ||
| 440 | if (cred->euid != pcred->euid && | ||
| 441 | cred->euid != pcred->uid) | ||
| 442 | p = ERR_PTR(-ESRCH); | ||
| 443 | else | ||
| 444 | get_task_struct(p); | ||
| 445 | } | ||
| 446 | 437 | ||
| 447 | rcu_read_unlock(); | 438 | rcu_read_unlock(); |
| 448 | 439 | ||
| @@ -564,8 +555,8 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
| 564 | if (!pid) | 555 | if (!pid) |
| 565 | return -ESRCH; | 556 | return -ESRCH; |
| 566 | p = futex_find_get_task(pid); | 557 | p = futex_find_get_task(pid); |
| 567 | if (IS_ERR(p)) | 558 | if (!p) |
| 568 | return PTR_ERR(p); | 559 | return -ESRCH; |
| 569 | 560 | ||
| 570 | /* | 561 | /* |
| 571 | * We need to look at the task state flags to figure out, | 562 | * We need to look at the task state flags to figure out, |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 3164ba7ce151..e1497481fe8a 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
| @@ -456,6 +456,9 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | |||
| 456 | /* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */ | 456 | /* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */ |
| 457 | desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK); | 457 | desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK); |
| 458 | desc->status |= flags; | 458 | desc->status |= flags; |
| 459 | |||
| 460 | if (chip != desc->chip) | ||
| 461 | irq_chip_set_defaults(desc->chip); | ||
| 459 | } | 462 | } |
| 460 | 463 | ||
| 461 | return ret; | 464 | return ret; |
diff --git a/kernel/kexec.c b/kernel/kexec.c index 474a84715eac..131b1703936f 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
| @@ -1089,9 +1089,10 @@ void crash_kexec(struct pt_regs *regs) | |||
| 1089 | 1089 | ||
| 1090 | size_t crash_get_memory_size(void) | 1090 | size_t crash_get_memory_size(void) |
| 1091 | { | 1091 | { |
| 1092 | size_t size; | 1092 | size_t size = 0; |
| 1093 | mutex_lock(&kexec_mutex); | 1093 | mutex_lock(&kexec_mutex); |
| 1094 | size = crashk_res.end - crashk_res.start + 1; | 1094 | if (crashk_res.end != crashk_res.start) |
| 1095 | size = crashk_res.end - crashk_res.start + 1; | ||
| 1095 | mutex_unlock(&kexec_mutex); | 1096 | mutex_unlock(&kexec_mutex); |
| 1096 | return size; | 1097 | return size; |
| 1097 | } | 1098 | } |
| @@ -1134,7 +1135,7 @@ int crash_shrink_memory(unsigned long new_size) | |||
| 1134 | 1135 | ||
| 1135 | free_reserved_phys_range(end, crashk_res.end); | 1136 | free_reserved_phys_range(end, crashk_res.end); |
| 1136 | 1137 | ||
| 1137 | if (start == end) | 1138 | if ((start == end) && (crashk_res.parent != NULL)) |
| 1138 | release_resource(&crashk_res); | 1139 | release_resource(&crashk_res); |
| 1139 | crashk_res.end = end - 1; | 1140 | crashk_res.end = end - 1; |
| 1140 | 1141 | ||
diff --git a/kernel/module.c b/kernel/module.c index 333fbcc96978..5d2d28197c82 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -72,7 +72,11 @@ | |||
| 72 | /* If this is set, the section belongs in the init part of the module */ | 72 | /* If this is set, the section belongs in the init part of the module */ |
| 73 | #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) | 73 | #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) |
| 74 | 74 | ||
| 75 | /* List of modules, protected by module_mutex or preempt_disable | 75 | /* |
| 76 | * Mutex protects: | ||
| 77 | * 1) List of modules (also safely readable with preempt_disable), | ||
| 78 | * 2) module_use links, | ||
| 79 | * 3) module_addr_min/module_addr_max. | ||
| 76 | * (delete uses stop_machine/add uses RCU list operations). */ | 80 | * (delete uses stop_machine/add uses RCU list operations). */ |
| 77 | DEFINE_MUTEX(module_mutex); | 81 | DEFINE_MUTEX(module_mutex); |
| 78 | EXPORT_SYMBOL_GPL(module_mutex); | 82 | EXPORT_SYMBOL_GPL(module_mutex); |
| @@ -90,7 +94,8 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq); | |||
| 90 | 94 | ||
| 91 | static BLOCKING_NOTIFIER_HEAD(module_notify_list); | 95 | static BLOCKING_NOTIFIER_HEAD(module_notify_list); |
| 92 | 96 | ||
| 93 | /* Bounds of module allocation, for speeding __module_address */ | 97 | /* Bounds of module allocation, for speeding __module_address. |
| 98 | * Protected by module_mutex. */ | ||
| 94 | static unsigned long module_addr_min = -1UL, module_addr_max = 0; | 99 | static unsigned long module_addr_min = -1UL, module_addr_max = 0; |
| 95 | 100 | ||
| 96 | int register_module_notifier(struct notifier_block * nb) | 101 | int register_module_notifier(struct notifier_block * nb) |
| @@ -329,7 +334,7 @@ static bool find_symbol_in_section(const struct symsearch *syms, | |||
| 329 | } | 334 | } |
| 330 | 335 | ||
| 331 | /* Find a symbol and return it, along with, (optional) crc and | 336 | /* Find a symbol and return it, along with, (optional) crc and |
| 332 | * (optional) module which owns it */ | 337 | * (optional) module which owns it. Needs preempt disabled or module_mutex. */ |
| 333 | const struct kernel_symbol *find_symbol(const char *name, | 338 | const struct kernel_symbol *find_symbol(const char *name, |
| 334 | struct module **owner, | 339 | struct module **owner, |
| 335 | const unsigned long **crc, | 340 | const unsigned long **crc, |
| @@ -403,7 +408,7 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr, | |||
| 403 | Elf_Shdr *sechdrs, | 408 | Elf_Shdr *sechdrs, |
| 404 | const char *secstrings) | 409 | const char *secstrings) |
| 405 | { | 410 | { |
| 406 | return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); | 411 | return find_sec(hdr, sechdrs, secstrings, ".data..percpu"); |
| 407 | } | 412 | } |
| 408 | 413 | ||
| 409 | static void percpu_modcopy(struct module *mod, | 414 | static void percpu_modcopy(struct module *mod, |
| @@ -523,7 +528,8 @@ static void module_unload_init(struct module *mod) | |||
| 523 | { | 528 | { |
| 524 | int cpu; | 529 | int cpu; |
| 525 | 530 | ||
| 526 | INIT_LIST_HEAD(&mod->modules_which_use_me); | 531 | INIT_LIST_HEAD(&mod->source_list); |
| 532 | INIT_LIST_HEAD(&mod->target_list); | ||
| 527 | for_each_possible_cpu(cpu) { | 533 | for_each_possible_cpu(cpu) { |
| 528 | per_cpu_ptr(mod->refptr, cpu)->incs = 0; | 534 | per_cpu_ptr(mod->refptr, cpu)->incs = 0; |
| 529 | per_cpu_ptr(mod->refptr, cpu)->decs = 0; | 535 | per_cpu_ptr(mod->refptr, cpu)->decs = 0; |
| @@ -535,20 +541,13 @@ static void module_unload_init(struct module *mod) | |||
| 535 | mod->waiter = current; | 541 | mod->waiter = current; |
| 536 | } | 542 | } |
| 537 | 543 | ||
| 538 | /* modules using other modules */ | ||
| 539 | struct module_use | ||
| 540 | { | ||
| 541 | struct list_head list; | ||
| 542 | struct module *module_which_uses; | ||
| 543 | }; | ||
| 544 | |||
| 545 | /* Does a already use b? */ | 544 | /* Does a already use b? */ |
| 546 | static int already_uses(struct module *a, struct module *b) | 545 | static int already_uses(struct module *a, struct module *b) |
| 547 | { | 546 | { |
| 548 | struct module_use *use; | 547 | struct module_use *use; |
| 549 | 548 | ||
| 550 | list_for_each_entry(use, &b->modules_which_use_me, list) { | 549 | list_for_each_entry(use, &b->source_list, source_list) { |
| 551 | if (use->module_which_uses == a) { | 550 | if (use->source == a) { |
| 552 | DEBUGP("%s uses %s!\n", a->name, b->name); | 551 | DEBUGP("%s uses %s!\n", a->name, b->name); |
| 553 | return 1; | 552 | return 1; |
| 554 | } | 553 | } |
| @@ -557,62 +556,68 @@ static int already_uses(struct module *a, struct module *b) | |||
| 557 | return 0; | 556 | return 0; |
| 558 | } | 557 | } |
| 559 | 558 | ||
| 560 | /* Module a uses b */ | 559 | /* |
| 561 | int use_module(struct module *a, struct module *b) | 560 | * Module a uses b |
| 561 | * - we add 'a' as a "source", 'b' as a "target" of module use | ||
| 562 | * - the module_use is added to the list of 'b' sources (so | ||
| 563 | * 'b' can walk the list to see who sourced them), and of 'a' | ||
| 564 | * targets (so 'a' can see what modules it targets). | ||
| 565 | */ | ||
| 566 | static int add_module_usage(struct module *a, struct module *b) | ||
| 562 | { | 567 | { |
| 563 | struct module_use *use; | 568 | struct module_use *use; |
| 564 | int no_warn, err; | ||
| 565 | 569 | ||
| 566 | if (b == NULL || already_uses(a, b)) return 1; | 570 | DEBUGP("Allocating new usage for %s.\n", a->name); |
| 571 | use = kmalloc(sizeof(*use), GFP_ATOMIC); | ||
| 572 | if (!use) { | ||
| 573 | printk(KERN_WARNING "%s: out of memory loading\n", a->name); | ||
| 574 | return -ENOMEM; | ||
| 575 | } | ||
| 576 | |||
| 577 | use->source = a; | ||
| 578 | use->target = b; | ||
| 579 | list_add(&use->source_list, &b->source_list); | ||
| 580 | list_add(&use->target_list, &a->target_list); | ||
| 581 | return 0; | ||
| 582 | } | ||
| 567 | 583 | ||
| 568 | /* If we're interrupted or time out, we fail. */ | 584 | /* Module a uses b: caller needs module_mutex() */ |
| 569 | if (wait_event_interruptible_timeout( | 585 | int ref_module(struct module *a, struct module *b) |
| 570 | module_wq, (err = strong_try_module_get(b)) != -EBUSY, | 586 | { |
| 571 | 30 * HZ) <= 0) { | 587 | int err; |
| 572 | printk("%s: gave up waiting for init of module %s.\n", | 588 | |
| 573 | a->name, b->name); | 589 | if (b == NULL || already_uses(a, b)) |
| 574 | return 0; | 590 | return 0; |
| 575 | } | ||
| 576 | 591 | ||
| 577 | /* If strong_try_module_get() returned a different error, we fail. */ | 592 | /* If module isn't available, we fail. */ |
| 593 | err = strong_try_module_get(b); | ||
| 578 | if (err) | 594 | if (err) |
| 579 | return 0; | 595 | return err; |
| 580 | 596 | ||
| 581 | DEBUGP("Allocating new usage for %s.\n", a->name); | 597 | err = add_module_usage(a, b); |
| 582 | use = kmalloc(sizeof(*use), GFP_ATOMIC); | 598 | if (err) { |
| 583 | if (!use) { | ||
| 584 | printk("%s: out of memory loading\n", a->name); | ||
| 585 | module_put(b); | 599 | module_put(b); |
| 586 | return 0; | 600 | return err; |
| 587 | } | 601 | } |
| 588 | 602 | return 0; | |
| 589 | use->module_which_uses = a; | ||
| 590 | list_add(&use->list, &b->modules_which_use_me); | ||
| 591 | no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name); | ||
| 592 | return 1; | ||
| 593 | } | 603 | } |
| 594 | EXPORT_SYMBOL_GPL(use_module); | 604 | EXPORT_SYMBOL_GPL(ref_module); |
| 595 | 605 | ||
| 596 | /* Clear the unload stuff of the module. */ | 606 | /* Clear the unload stuff of the module. */ |
| 597 | static void module_unload_free(struct module *mod) | 607 | static void module_unload_free(struct module *mod) |
| 598 | { | 608 | { |
| 599 | struct module *i; | 609 | struct module_use *use, *tmp; |
| 600 | |||
| 601 | list_for_each_entry(i, &modules, list) { | ||
| 602 | struct module_use *use; | ||
| 603 | 610 | ||
| 604 | list_for_each_entry(use, &i->modules_which_use_me, list) { | 611 | mutex_lock(&module_mutex); |
| 605 | if (use->module_which_uses == mod) { | 612 | list_for_each_entry_safe(use, tmp, &mod->target_list, target_list) { |
| 606 | DEBUGP("%s unusing %s\n", mod->name, i->name); | 613 | struct module *i = use->target; |
| 607 | module_put(i); | 614 | DEBUGP("%s unusing %s\n", mod->name, i->name); |
| 608 | list_del(&use->list); | 615 | module_put(i); |
| 609 | kfree(use); | 616 | list_del(&use->source_list); |
| 610 | sysfs_remove_link(i->holders_dir, mod->name); | 617 | list_del(&use->target_list); |
| 611 | /* There can be at most one match. */ | 618 | kfree(use); |
| 612 | break; | ||
| 613 | } | ||
| 614 | } | ||
| 615 | } | 619 | } |
| 620 | mutex_unlock(&module_mutex); | ||
| 616 | } | 621 | } |
| 617 | 622 | ||
| 618 | #ifdef CONFIG_MODULE_FORCE_UNLOAD | 623 | #ifdef CONFIG_MODULE_FORCE_UNLOAD |
| @@ -735,7 +740,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, | |||
| 735 | goto out; | 740 | goto out; |
| 736 | } | 741 | } |
| 737 | 742 | ||
| 738 | if (!list_empty(&mod->modules_which_use_me)) { | 743 | if (!list_empty(&mod->source_list)) { |
| 739 | /* Other modules depend on us: get rid of them first. */ | 744 | /* Other modules depend on us: get rid of them first. */ |
| 740 | ret = -EWOULDBLOCK; | 745 | ret = -EWOULDBLOCK; |
| 741 | goto out; | 746 | goto out; |
| @@ -779,13 +784,14 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, | |||
| 779 | blocking_notifier_call_chain(&module_notify_list, | 784 | blocking_notifier_call_chain(&module_notify_list, |
| 780 | MODULE_STATE_GOING, mod); | 785 | MODULE_STATE_GOING, mod); |
| 781 | async_synchronize_full(); | 786 | async_synchronize_full(); |
| 782 | mutex_lock(&module_mutex); | 787 | |
| 783 | /* Store the name of the last unloaded module for diagnostic purposes */ | 788 | /* Store the name of the last unloaded module for diagnostic purposes */ |
| 784 | strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); | 789 | strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); |
| 785 | ddebug_remove_module(mod->name); | 790 | ddebug_remove_module(mod->name); |
| 786 | free_module(mod); | ||
| 787 | 791 | ||
| 788 | out: | 792 | free_module(mod); |
| 793 | return 0; | ||
| 794 | out: | ||
| 789 | mutex_unlock(&module_mutex); | 795 | mutex_unlock(&module_mutex); |
| 790 | return ret; | 796 | return ret; |
| 791 | } | 797 | } |
| @@ -799,9 +805,9 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod) | |||
| 799 | 805 | ||
| 800 | /* Always include a trailing , so userspace can differentiate | 806 | /* Always include a trailing , so userspace can differentiate |
| 801 | between this and the old multi-field proc format. */ | 807 | between this and the old multi-field proc format. */ |
| 802 | list_for_each_entry(use, &mod->modules_which_use_me, list) { | 808 | list_for_each_entry(use, &mod->source_list, source_list) { |
| 803 | printed_something = 1; | 809 | printed_something = 1; |
| 804 | seq_printf(m, "%s,", use->module_which_uses->name); | 810 | seq_printf(m, "%s,", use->source->name); |
| 805 | } | 811 | } |
| 806 | 812 | ||
| 807 | if (mod->init != NULL && mod->exit == NULL) { | 813 | if (mod->init != NULL && mod->exit == NULL) { |
| @@ -880,11 +886,11 @@ static inline void module_unload_free(struct module *mod) | |||
| 880 | { | 886 | { |
| 881 | } | 887 | } |
| 882 | 888 | ||
| 883 | int use_module(struct module *a, struct module *b) | 889 | int ref_module(struct module *a, struct module *b) |
| 884 | { | 890 | { |
| 885 | return strong_try_module_get(b) == 0; | 891 | return strong_try_module_get(b); |
| 886 | } | 892 | } |
| 887 | EXPORT_SYMBOL_GPL(use_module); | 893 | EXPORT_SYMBOL_GPL(ref_module); |
| 888 | 894 | ||
| 889 | static inline void module_unload_init(struct module *mod) | 895 | static inline void module_unload_init(struct module *mod) |
| 890 | { | 896 | { |
| @@ -1001,6 +1007,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, | |||
| 1001 | { | 1007 | { |
| 1002 | const unsigned long *crc; | 1008 | const unsigned long *crc; |
| 1003 | 1009 | ||
| 1010 | /* Since this should be found in kernel (which can't be removed), | ||
| 1011 | * no locking is necessary. */ | ||
| 1004 | if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL, | 1012 | if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL, |
| 1005 | &crc, true, false)) | 1013 | &crc, true, false)) |
| 1006 | BUG(); | 1014 | BUG(); |
| @@ -1043,29 +1051,62 @@ static inline int same_magic(const char *amagic, const char *bmagic, | |||
| 1043 | } | 1051 | } |
| 1044 | #endif /* CONFIG_MODVERSIONS */ | 1052 | #endif /* CONFIG_MODVERSIONS */ |
| 1045 | 1053 | ||
| 1046 | /* Resolve a symbol for this module. I.e. if we find one, record usage. | 1054 | /* Resolve a symbol for this module. I.e. if we find one, record usage. */ |
| 1047 | Must be holding module_mutex. */ | ||
| 1048 | static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, | 1055 | static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, |
| 1049 | unsigned int versindex, | 1056 | unsigned int versindex, |
| 1050 | const char *name, | 1057 | const char *name, |
| 1051 | struct module *mod) | 1058 | struct module *mod, |
| 1059 | char ownername[]) | ||
| 1052 | { | 1060 | { |
| 1053 | struct module *owner; | 1061 | struct module *owner; |
| 1054 | const struct kernel_symbol *sym; | 1062 | const struct kernel_symbol *sym; |
| 1055 | const unsigned long *crc; | 1063 | const unsigned long *crc; |
| 1064 | int err; | ||
| 1056 | 1065 | ||
| 1066 | mutex_lock(&module_mutex); | ||
| 1057 | sym = find_symbol(name, &owner, &crc, | 1067 | sym = find_symbol(name, &owner, &crc, |
| 1058 | !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); | 1068 | !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); |
| 1059 | /* use_module can fail due to OOM, | 1069 | if (!sym) |
| 1060 | or module initialization or unloading */ | 1070 | goto unlock; |
| 1061 | if (sym) { | 1071 | |
| 1062 | if (!check_version(sechdrs, versindex, name, mod, crc, owner) | 1072 | if (!check_version(sechdrs, versindex, name, mod, crc, owner)) { |
| 1063 | || !use_module(mod, owner)) | 1073 | sym = ERR_PTR(-EINVAL); |
| 1064 | sym = NULL; | 1074 | goto getname; |
| 1065 | } | 1075 | } |
| 1076 | |||
| 1077 | err = ref_module(mod, owner); | ||
| 1078 | if (err) { | ||
| 1079 | sym = ERR_PTR(err); | ||
| 1080 | goto getname; | ||
| 1081 | } | ||
| 1082 | |||
| 1083 | getname: | ||
| 1084 | /* We must make copy under the lock if we failed to get ref. */ | ||
| 1085 | strncpy(ownername, module_name(owner), MODULE_NAME_LEN); | ||
| 1086 | unlock: | ||
| 1087 | mutex_unlock(&module_mutex); | ||
| 1066 | return sym; | 1088 | return sym; |
| 1067 | } | 1089 | } |
| 1068 | 1090 | ||
| 1091 | static const struct kernel_symbol *resolve_symbol_wait(Elf_Shdr *sechdrs, | ||
| 1092 | unsigned int versindex, | ||
| 1093 | const char *name, | ||
| 1094 | struct module *mod) | ||
| 1095 | { | ||
| 1096 | const struct kernel_symbol *ksym; | ||
| 1097 | char ownername[MODULE_NAME_LEN]; | ||
| 1098 | |||
| 1099 | if (wait_event_interruptible_timeout(module_wq, | ||
| 1100 | !IS_ERR(ksym = resolve_symbol(sechdrs, versindex, name, | ||
| 1101 | mod, ownername)) || | ||
| 1102 | PTR_ERR(ksym) != -EBUSY, | ||
| 1103 | 30 * HZ) <= 0) { | ||
| 1104 | printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n", | ||
| 1105 | mod->name, ownername); | ||
| 1106 | } | ||
| 1107 | return ksym; | ||
| 1108 | } | ||
| 1109 | |||
| 1069 | /* | 1110 | /* |
| 1070 | * /sys/module/foo/sections stuff | 1111 | * /sys/module/foo/sections stuff |
| 1071 | * J. Corbet <corbet@lwn.net> | 1112 | * J. Corbet <corbet@lwn.net> |
| @@ -1295,7 +1336,34 @@ static inline void remove_notes_attrs(struct module *mod) | |||
| 1295 | #endif | 1336 | #endif |
| 1296 | 1337 | ||
| 1297 | #ifdef CONFIG_SYSFS | 1338 | #ifdef CONFIG_SYSFS |
| 1298 | int module_add_modinfo_attrs(struct module *mod) | 1339 | static void add_usage_links(struct module *mod) |
| 1340 | { | ||
| 1341 | #ifdef CONFIG_MODULE_UNLOAD | ||
| 1342 | struct module_use *use; | ||
| 1343 | int nowarn; | ||
| 1344 | |||
| 1345 | mutex_lock(&module_mutex); | ||
| 1346 | list_for_each_entry(use, &mod->target_list, target_list) { | ||
| 1347 | nowarn = sysfs_create_link(use->target->holders_dir, | ||
| 1348 | &mod->mkobj.kobj, mod->name); | ||
| 1349 | } | ||
| 1350 | mutex_unlock(&module_mutex); | ||
| 1351 | #endif | ||
| 1352 | } | ||
| 1353 | |||
| 1354 | static void del_usage_links(struct module *mod) | ||
| 1355 | { | ||
| 1356 | #ifdef CONFIG_MODULE_UNLOAD | ||
| 1357 | struct module_use *use; | ||
| 1358 | |||
| 1359 | mutex_lock(&module_mutex); | ||
| 1360 | list_for_each_entry(use, &mod->target_list, target_list) | ||
| 1361 | sysfs_remove_link(use->target->holders_dir, mod->name); | ||
| 1362 | mutex_unlock(&module_mutex); | ||
| 1363 | #endif | ||
| 1364 | } | ||
| 1365 | |||
| 1366 | static int module_add_modinfo_attrs(struct module *mod) | ||
| 1299 | { | 1367 | { |
| 1300 | struct module_attribute *attr; | 1368 | struct module_attribute *attr; |
| 1301 | struct module_attribute *temp_attr; | 1369 | struct module_attribute *temp_attr; |
| @@ -1321,7 +1389,7 @@ int module_add_modinfo_attrs(struct module *mod) | |||
| 1321 | return error; | 1389 | return error; |
| 1322 | } | 1390 | } |
| 1323 | 1391 | ||
| 1324 | void module_remove_modinfo_attrs(struct module *mod) | 1392 | static void module_remove_modinfo_attrs(struct module *mod) |
| 1325 | { | 1393 | { |
| 1326 | struct module_attribute *attr; | 1394 | struct module_attribute *attr; |
| 1327 | int i; | 1395 | int i; |
| @@ -1337,7 +1405,7 @@ void module_remove_modinfo_attrs(struct module *mod) | |||
| 1337 | kfree(mod->modinfo_attrs); | 1405 | kfree(mod->modinfo_attrs); |
| 1338 | } | 1406 | } |
| 1339 | 1407 | ||
| 1340 | int mod_sysfs_init(struct module *mod) | 1408 | static int mod_sysfs_init(struct module *mod) |
| 1341 | { | 1409 | { |
| 1342 | int err; | 1410 | int err; |
| 1343 | struct kobject *kobj; | 1411 | struct kobject *kobj; |
| @@ -1371,12 +1439,16 @@ out: | |||
| 1371 | return err; | 1439 | return err; |
| 1372 | } | 1440 | } |
| 1373 | 1441 | ||
| 1374 | int mod_sysfs_setup(struct module *mod, | 1442 | static int mod_sysfs_setup(struct module *mod, |
| 1375 | struct kernel_param *kparam, | 1443 | struct kernel_param *kparam, |
| 1376 | unsigned int num_params) | 1444 | unsigned int num_params) |
| 1377 | { | 1445 | { |
| 1378 | int err; | 1446 | int err; |
| 1379 | 1447 | ||
| 1448 | err = mod_sysfs_init(mod); | ||
| 1449 | if (err) | ||
| 1450 | goto out; | ||
| 1451 | |||
| 1380 | mod->holders_dir = kobject_create_and_add("holders", &mod->mkobj.kobj); | 1452 | mod->holders_dir = kobject_create_and_add("holders", &mod->mkobj.kobj); |
| 1381 | if (!mod->holders_dir) { | 1453 | if (!mod->holders_dir) { |
| 1382 | err = -ENOMEM; | 1454 | err = -ENOMEM; |
| @@ -1391,6 +1463,8 @@ int mod_sysfs_setup(struct module *mod, | |||
| 1391 | if (err) | 1463 | if (err) |
| 1392 | goto out_unreg_param; | 1464 | goto out_unreg_param; |
| 1393 | 1465 | ||
| 1466 | add_usage_links(mod); | ||
| 1467 | |||
| 1394 | kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD); | 1468 | kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD); |
| 1395 | return 0; | 1469 | return 0; |
| 1396 | 1470 | ||
| @@ -1400,6 +1474,7 @@ out_unreg_holders: | |||
| 1400 | kobject_put(mod->holders_dir); | 1474 | kobject_put(mod->holders_dir); |
| 1401 | out_unreg: | 1475 | out_unreg: |
| 1402 | kobject_put(&mod->mkobj.kobj); | 1476 | kobject_put(&mod->mkobj.kobj); |
| 1477 | out: | ||
| 1403 | return err; | 1478 | return err; |
| 1404 | } | 1479 | } |
| 1405 | 1480 | ||
| @@ -1410,14 +1485,40 @@ static void mod_sysfs_fini(struct module *mod) | |||
| 1410 | 1485 | ||
| 1411 | #else /* CONFIG_SYSFS */ | 1486 | #else /* CONFIG_SYSFS */ |
| 1412 | 1487 | ||
| 1488 | static inline int mod_sysfs_init(struct module *mod) | ||
| 1489 | { | ||
| 1490 | return 0; | ||
| 1491 | } | ||
| 1492 | |||
| 1493 | static inline int mod_sysfs_setup(struct module *mod, | ||
| 1494 | struct kernel_param *kparam, | ||
| 1495 | unsigned int num_params) | ||
| 1496 | { | ||
| 1497 | return 0; | ||
| 1498 | } | ||
| 1499 | |||
| 1500 | static inline int module_add_modinfo_attrs(struct module *mod) | ||
| 1501 | { | ||
| 1502 | return 0; | ||
| 1503 | } | ||
| 1504 | |||
| 1505 | static inline void module_remove_modinfo_attrs(struct module *mod) | ||
| 1506 | { | ||
| 1507 | } | ||
| 1508 | |||
| 1413 | static void mod_sysfs_fini(struct module *mod) | 1509 | static void mod_sysfs_fini(struct module *mod) |
| 1414 | { | 1510 | { |
| 1415 | } | 1511 | } |
| 1416 | 1512 | ||
| 1513 | static void del_usage_links(struct module *mod) | ||
| 1514 | { | ||
| 1515 | } | ||
| 1516 | |||
| 1417 | #endif /* CONFIG_SYSFS */ | 1517 | #endif /* CONFIG_SYSFS */ |
| 1418 | 1518 | ||
| 1419 | static void mod_kobject_remove(struct module *mod) | 1519 | static void mod_kobject_remove(struct module *mod) |
| 1420 | { | 1520 | { |
| 1521 | del_usage_links(mod); | ||
| 1421 | module_remove_modinfo_attrs(mod); | 1522 | module_remove_modinfo_attrs(mod); |
| 1422 | module_param_sysfs_remove(mod); | 1523 | module_param_sysfs_remove(mod); |
| 1423 | kobject_put(mod->mkobj.drivers_dir); | 1524 | kobject_put(mod->mkobj.drivers_dir); |
| @@ -1436,13 +1537,15 @@ static int __unlink_module(void *_mod) | |||
| 1436 | return 0; | 1537 | return 0; |
| 1437 | } | 1538 | } |
| 1438 | 1539 | ||
| 1439 | /* Free a module, remove from lists, etc (must hold module_mutex). */ | 1540 | /* Free a module, remove from lists, etc. */ |
| 1440 | static void free_module(struct module *mod) | 1541 | static void free_module(struct module *mod) |
| 1441 | { | 1542 | { |
| 1442 | trace_module_free(mod); | 1543 | trace_module_free(mod); |
| 1443 | 1544 | ||
| 1444 | /* Delete from various lists */ | 1545 | /* Delete from various lists */ |
| 1546 | mutex_lock(&module_mutex); | ||
| 1445 | stop_machine(__unlink_module, mod, NULL); | 1547 | stop_machine(__unlink_module, mod, NULL); |
| 1548 | mutex_unlock(&module_mutex); | ||
| 1446 | remove_notes_attrs(mod); | 1549 | remove_notes_attrs(mod); |
| 1447 | remove_sect_attrs(mod); | 1550 | remove_sect_attrs(mod); |
| 1448 | mod_kobject_remove(mod); | 1551 | mod_kobject_remove(mod); |
| @@ -1493,6 +1596,8 @@ EXPORT_SYMBOL_GPL(__symbol_get); | |||
| 1493 | /* | 1596 | /* |
| 1494 | * Ensure that an exported symbol [global namespace] does not already exist | 1597 | * Ensure that an exported symbol [global namespace] does not already exist |
| 1495 | * in the kernel or in some other module's exported symbol table. | 1598 | * in the kernel or in some other module's exported symbol table. |
| 1599 | * | ||
| 1600 | * You must hold the module_mutex. | ||
| 1496 | */ | 1601 | */ |
| 1497 | static int verify_export_symbols(struct module *mod) | 1602 | static int verify_export_symbols(struct module *mod) |
| 1498 | { | 1603 | { |
| @@ -1558,21 +1663,23 @@ static int simplify_symbols(Elf_Shdr *sechdrs, | |||
| 1558 | break; | 1663 | break; |
| 1559 | 1664 | ||
| 1560 | case SHN_UNDEF: | 1665 | case SHN_UNDEF: |
| 1561 | ksym = resolve_symbol(sechdrs, versindex, | 1666 | ksym = resolve_symbol_wait(sechdrs, versindex, |
| 1562 | strtab + sym[i].st_name, mod); | 1667 | strtab + sym[i].st_name, |
| 1668 | mod); | ||
| 1563 | /* Ok if resolved. */ | 1669 | /* Ok if resolved. */ |
| 1564 | if (ksym) { | 1670 | if (ksym && !IS_ERR(ksym)) { |
| 1565 | sym[i].st_value = ksym->value; | 1671 | sym[i].st_value = ksym->value; |
| 1566 | break; | 1672 | break; |
| 1567 | } | 1673 | } |
| 1568 | 1674 | ||
| 1569 | /* Ok if weak. */ | 1675 | /* Ok if weak. */ |
| 1570 | if (ELF_ST_BIND(sym[i].st_info) == STB_WEAK) | 1676 | if (!ksym && ELF_ST_BIND(sym[i].st_info) == STB_WEAK) |
| 1571 | break; | 1677 | break; |
| 1572 | 1678 | ||
| 1573 | printk(KERN_WARNING "%s: Unknown symbol %s\n", | 1679 | printk(KERN_WARNING "%s: Unknown symbol %s (err %li)\n", |
| 1574 | mod->name, strtab + sym[i].st_name); | 1680 | mod->name, strtab + sym[i].st_name, |
| 1575 | ret = -ENOENT; | 1681 | PTR_ERR(ksym)); |
| 1682 | ret = PTR_ERR(ksym) ?: -ENOENT; | ||
| 1576 | break; | 1683 | break; |
| 1577 | 1684 | ||
| 1578 | default: | 1685 | default: |
| @@ -1955,16 +2062,24 @@ static void dynamic_debug_setup(struct _ddebug *debug, unsigned int num) | |||
| 1955 | #endif | 2062 | #endif |
| 1956 | } | 2063 | } |
| 1957 | 2064 | ||
| 2065 | static void dynamic_debug_remove(struct _ddebug *debug) | ||
| 2066 | { | ||
| 2067 | if (debug) | ||
| 2068 | ddebug_remove_module(debug->modname); | ||
| 2069 | } | ||
| 2070 | |||
| 1958 | static void *module_alloc_update_bounds(unsigned long size) | 2071 | static void *module_alloc_update_bounds(unsigned long size) |
| 1959 | { | 2072 | { |
| 1960 | void *ret = module_alloc(size); | 2073 | void *ret = module_alloc(size); |
| 1961 | 2074 | ||
| 1962 | if (ret) { | 2075 | if (ret) { |
| 2076 | mutex_lock(&module_mutex); | ||
| 1963 | /* Update module bounds. */ | 2077 | /* Update module bounds. */ |
| 1964 | if ((unsigned long)ret < module_addr_min) | 2078 | if ((unsigned long)ret < module_addr_min) |
| 1965 | module_addr_min = (unsigned long)ret; | 2079 | module_addr_min = (unsigned long)ret; |
| 1966 | if ((unsigned long)ret + size > module_addr_max) | 2080 | if ((unsigned long)ret + size > module_addr_max) |
| 1967 | module_addr_max = (unsigned long)ret + size; | 2081 | module_addr_max = (unsigned long)ret + size; |
| 2082 | mutex_unlock(&module_mutex); | ||
| 1968 | } | 2083 | } |
| 1969 | return ret; | 2084 | return ret; |
| 1970 | } | 2085 | } |
| @@ -2014,6 +2129,9 @@ static noinline struct module *load_module(void __user *umod, | |||
| 2014 | long err = 0; | 2129 | long err = 0; |
| 2015 | void *ptr = NULL; /* Stops spurious gcc warning */ | 2130 | void *ptr = NULL; /* Stops spurious gcc warning */ |
| 2016 | unsigned long symoffs, stroffs, *strmap; | 2131 | unsigned long symoffs, stroffs, *strmap; |
| 2132 | void __percpu *percpu; | ||
| 2133 | struct _ddebug *debug = NULL; | ||
| 2134 | unsigned int num_debug = 0; | ||
| 2017 | 2135 | ||
| 2018 | mm_segment_t old_fs; | 2136 | mm_segment_t old_fs; |
| 2019 | 2137 | ||
| @@ -2138,11 +2256,6 @@ static noinline struct module *load_module(void __user *umod, | |||
| 2138 | goto free_mod; | 2256 | goto free_mod; |
| 2139 | } | 2257 | } |
| 2140 | 2258 | ||
| 2141 | if (find_module(mod->name)) { | ||
| 2142 | err = -EEXIST; | ||
| 2143 | goto free_mod; | ||
| 2144 | } | ||
| 2145 | |||
| 2146 | mod->state = MODULE_STATE_COMING; | 2259 | mod->state = MODULE_STATE_COMING; |
| 2147 | 2260 | ||
| 2148 | /* Allow arches to frob section contents and sizes. */ | 2261 | /* Allow arches to frob section contents and sizes. */ |
| @@ -2158,6 +2271,8 @@ static noinline struct module *load_module(void __user *umod, | |||
| 2158 | goto free_mod; | 2271 | goto free_mod; |
| 2159 | sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; | 2272 | sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; |
| 2160 | } | 2273 | } |
| 2274 | /* Keep this around for failure path. */ | ||
| 2275 | percpu = mod_percpu(mod); | ||
| 2161 | 2276 | ||
| 2162 | /* Determine total sizes, and put offsets in sh_entsize. For now | 2277 | /* Determine total sizes, and put offsets in sh_entsize. For now |
| 2163 | this is done generically; there doesn't appear to be any | 2278 | this is done generically; there doesn't appear to be any |
| @@ -2231,11 +2346,6 @@ static noinline struct module *load_module(void __user *umod, | |||
| 2231 | /* Now we've moved module, initialize linked lists, etc. */ | 2346 | /* Now we've moved module, initialize linked lists, etc. */ |
| 2232 | module_unload_init(mod); | 2347 | module_unload_init(mod); |
| 2233 | 2348 | ||
| 2234 | /* add kobject, so we can reference it. */ | ||
| 2235 | err = mod_sysfs_init(mod); | ||
| 2236 | if (err) | ||
| 2237 | goto free_unload; | ||
| 2238 | |||
| 2239 | /* Set up license info based on the info section */ | 2349 | /* Set up license info based on the info section */ |
| 2240 | set_license(mod, get_modinfo(sechdrs, infoindex, "license")); | 2350 | set_license(mod, get_modinfo(sechdrs, infoindex, "license")); |
| 2241 | 2351 | ||
| @@ -2360,11 +2470,6 @@ static noinline struct module *load_module(void __user *umod, | |||
| 2360 | goto cleanup; | 2470 | goto cleanup; |
| 2361 | } | 2471 | } |
| 2362 | 2472 | ||
| 2363 | /* Find duplicate symbols */ | ||
| 2364 | err = verify_export_symbols(mod); | ||
| 2365 | if (err < 0) | ||
| 2366 | goto cleanup; | ||
| 2367 | |||
| 2368 | /* Set up and sort exception table */ | 2473 | /* Set up and sort exception table */ |
| 2369 | mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table", | 2474 | mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table", |
| 2370 | sizeof(*mod->extable), &mod->num_exentries); | 2475 | sizeof(*mod->extable), &mod->num_exentries); |
| @@ -2379,15 +2484,9 @@ static noinline struct module *load_module(void __user *umod, | |||
| 2379 | kfree(strmap); | 2484 | kfree(strmap); |
| 2380 | strmap = NULL; | 2485 | strmap = NULL; |
| 2381 | 2486 | ||
| 2382 | if (!mod->taints) { | 2487 | if (!mod->taints) |
| 2383 | struct _ddebug *debug; | ||
| 2384 | unsigned int num_debug; | ||
| 2385 | |||
| 2386 | debug = section_objs(hdr, sechdrs, secstrings, "__verbose", | 2488 | debug = section_objs(hdr, sechdrs, secstrings, "__verbose", |
| 2387 | sizeof(*debug), &num_debug); | 2489 | sizeof(*debug), &num_debug); |
| 2388 | if (debug) | ||
| 2389 | dynamic_debug_setup(debug, num_debug); | ||
| 2390 | } | ||
| 2391 | 2490 | ||
| 2392 | err = module_finalize(hdr, sechdrs, mod); | 2491 | err = module_finalize(hdr, sechdrs, mod); |
| 2393 | if (err < 0) | 2492 | if (err < 0) |
| @@ -2423,7 +2522,22 @@ static noinline struct module *load_module(void __user *umod, | |||
| 2423 | * function to insert in a way safe to concurrent readers. | 2522 | * function to insert in a way safe to concurrent readers. |
| 2424 | * The mutex protects against concurrent writers. | 2523 | * The mutex protects against concurrent writers. |
| 2425 | */ | 2524 | */ |
| 2525 | mutex_lock(&module_mutex); | ||
| 2526 | if (find_module(mod->name)) { | ||
| 2527 | err = -EEXIST; | ||
| 2528 | goto unlock; | ||
| 2529 | } | ||
| 2530 | |||
| 2531 | if (debug) | ||
| 2532 | dynamic_debug_setup(debug, num_debug); | ||
| 2533 | |||
| 2534 | /* Find duplicate symbols */ | ||
| 2535 | err = verify_export_symbols(mod); | ||
| 2536 | if (err < 0) | ||
| 2537 | goto ddebug; | ||
| 2538 | |||
| 2426 | list_add_rcu(&mod->list, &modules); | 2539 | list_add_rcu(&mod->list, &modules); |
| 2540 | mutex_unlock(&module_mutex); | ||
| 2427 | 2541 | ||
| 2428 | err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL); | 2542 | err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL); |
| 2429 | if (err < 0) | 2543 | if (err < 0) |
| @@ -2432,6 +2546,7 @@ static noinline struct module *load_module(void __user *umod, | |||
| 2432 | err = mod_sysfs_setup(mod, mod->kp, mod->num_kp); | 2546 | err = mod_sysfs_setup(mod, mod->kp, mod->num_kp); |
| 2433 | if (err < 0) | 2547 | if (err < 0) |
| 2434 | goto unlink; | 2548 | goto unlink; |
| 2549 | |||
| 2435 | add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); | 2550 | add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); |
| 2436 | add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); | 2551 | add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); |
| 2437 | 2552 | ||
| @@ -2444,15 +2559,17 @@ static noinline struct module *load_module(void __user *umod, | |||
| 2444 | return mod; | 2559 | return mod; |
| 2445 | 2560 | ||
| 2446 | unlink: | 2561 | unlink: |
| 2562 | mutex_lock(&module_mutex); | ||
| 2447 | /* Unlink carefully: kallsyms could be walking list. */ | 2563 | /* Unlink carefully: kallsyms could be walking list. */ |
| 2448 | list_del_rcu(&mod->list); | 2564 | list_del_rcu(&mod->list); |
| 2565 | ddebug: | ||
| 2566 | dynamic_debug_remove(debug); | ||
| 2567 | unlock: | ||
| 2568 | mutex_unlock(&module_mutex); | ||
| 2449 | synchronize_sched(); | 2569 | synchronize_sched(); |
| 2450 | module_arch_cleanup(mod); | 2570 | module_arch_cleanup(mod); |
| 2451 | cleanup: | 2571 | cleanup: |
| 2452 | free_modinfo(mod); | 2572 | free_modinfo(mod); |
| 2453 | kobject_del(&mod->mkobj.kobj); | ||
| 2454 | kobject_put(&mod->mkobj.kobj); | ||
| 2455 | free_unload: | ||
| 2456 | module_unload_free(mod); | 2573 | module_unload_free(mod); |
| 2457 | #if defined(CONFIG_MODULE_UNLOAD) | 2574 | #if defined(CONFIG_MODULE_UNLOAD) |
| 2458 | free_percpu(mod->refptr); | 2575 | free_percpu(mod->refptr); |
| @@ -2463,7 +2580,7 @@ static noinline struct module *load_module(void __user *umod, | |||
| 2463 | module_free(mod, mod->module_core); | 2580 | module_free(mod, mod->module_core); |
| 2464 | /* mod will be freed with core. Don't access it beyond this line! */ | 2581 | /* mod will be freed with core. Don't access it beyond this line! */ |
| 2465 | free_percpu: | 2582 | free_percpu: |
| 2466 | percpu_modfree(mod); | 2583 | free_percpu(percpu); |
| 2467 | free_mod: | 2584 | free_mod: |
| 2468 | kfree(args); | 2585 | kfree(args); |
| 2469 | kfree(strmap); | 2586 | kfree(strmap); |
| @@ -2499,19 +2616,10 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, | |||
| 2499 | if (!capable(CAP_SYS_MODULE) || modules_disabled) | 2616 | if (!capable(CAP_SYS_MODULE) || modules_disabled) |
| 2500 | return -EPERM; | 2617 | return -EPERM; |
| 2501 | 2618 | ||
| 2502 | /* Only one module load at a time, please */ | ||
| 2503 | if (mutex_lock_interruptible(&module_mutex) != 0) | ||
| 2504 | return -EINTR; | ||
| 2505 | |||
| 2506 | /* Do all the hard work */ | 2619 | /* Do all the hard work */ |
| 2507 | mod = load_module(umod, len, uargs); | 2620 | mod = load_module(umod, len, uargs); |
| 2508 | if (IS_ERR(mod)) { | 2621 | if (IS_ERR(mod)) |
| 2509 | mutex_unlock(&module_mutex); | ||
| 2510 | return PTR_ERR(mod); | 2622 | return PTR_ERR(mod); |
| 2511 | } | ||
| 2512 | |||
| 2513 | /* Drop lock so they can recurse */ | ||
| 2514 | mutex_unlock(&module_mutex); | ||
| 2515 | 2623 | ||
| 2516 | blocking_notifier_call_chain(&module_notify_list, | 2624 | blocking_notifier_call_chain(&module_notify_list, |
| 2517 | MODULE_STATE_COMING, mod); | 2625 | MODULE_STATE_COMING, mod); |
| @@ -2528,9 +2636,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, | |||
| 2528 | module_put(mod); | 2636 | module_put(mod); |
| 2529 | blocking_notifier_call_chain(&module_notify_list, | 2637 | blocking_notifier_call_chain(&module_notify_list, |
| 2530 | MODULE_STATE_GOING, mod); | 2638 | MODULE_STATE_GOING, mod); |
| 2531 | mutex_lock(&module_mutex); | ||
| 2532 | free_module(mod); | 2639 | free_module(mod); |
| 2533 | mutex_unlock(&module_mutex); | ||
| 2534 | wake_up(&module_wq); | 2640 | wake_up(&module_wq); |
| 2535 | return ret; | 2641 | return ret; |
| 2536 | } | 2642 | } |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index bd7ce8ca5bb9..ff86c558af4c 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
| @@ -283,14 +283,15 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) | |||
| 283 | static void | 283 | static void |
| 284 | list_add_event(struct perf_event *event, struct perf_event_context *ctx) | 284 | list_add_event(struct perf_event *event, struct perf_event_context *ctx) |
| 285 | { | 285 | { |
| 286 | struct perf_event *group_leader = event->group_leader; | 286 | WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); |
| 287 | event->attach_state |= PERF_ATTACH_CONTEXT; | ||
| 287 | 288 | ||
| 288 | /* | 289 | /* |
| 289 | * Depending on whether it is a standalone or sibling event, | 290 | * If we're a stand alone event or group leader, we go to the context |
| 290 | * add it straight to the context's event list, or to the group | 291 | * list, group events are kept attached to the group so that |
| 291 | * leader's sibling list: | 292 | * perf_group_detach can, at all times, locate all siblings. |
| 292 | */ | 293 | */ |
| 293 | if (group_leader == event) { | 294 | if (event->group_leader == event) { |
| 294 | struct list_head *list; | 295 | struct list_head *list; |
| 295 | 296 | ||
| 296 | if (is_software_event(event)) | 297 | if (is_software_event(event)) |
| @@ -298,13 +299,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
| 298 | 299 | ||
| 299 | list = ctx_group_list(event, ctx); | 300 | list = ctx_group_list(event, ctx); |
| 300 | list_add_tail(&event->group_entry, list); | 301 | list_add_tail(&event->group_entry, list); |
| 301 | } else { | ||
| 302 | if (group_leader->group_flags & PERF_GROUP_SOFTWARE && | ||
| 303 | !is_software_event(event)) | ||
| 304 | group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; | ||
| 305 | |||
| 306 | list_add_tail(&event->group_entry, &group_leader->sibling_list); | ||
| 307 | group_leader->nr_siblings++; | ||
| 308 | } | 302 | } |
| 309 | 303 | ||
| 310 | list_add_rcu(&event->event_entry, &ctx->event_list); | 304 | list_add_rcu(&event->event_entry, &ctx->event_list); |
| @@ -313,6 +307,24 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
| 313 | ctx->nr_stat++; | 307 | ctx->nr_stat++; |
| 314 | } | 308 | } |
| 315 | 309 | ||
| 310 | static void perf_group_attach(struct perf_event *event) | ||
| 311 | { | ||
| 312 | struct perf_event *group_leader = event->group_leader; | ||
| 313 | |||
| 314 | WARN_ON_ONCE(event->attach_state & PERF_ATTACH_GROUP); | ||
| 315 | event->attach_state |= PERF_ATTACH_GROUP; | ||
| 316 | |||
| 317 | if (group_leader == event) | ||
| 318 | return; | ||
| 319 | |||
| 320 | if (group_leader->group_flags & PERF_GROUP_SOFTWARE && | ||
| 321 | !is_software_event(event)) | ||
| 322 | group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; | ||
| 323 | |||
| 324 | list_add_tail(&event->group_entry, &group_leader->sibling_list); | ||
| 325 | group_leader->nr_siblings++; | ||
| 326 | } | ||
| 327 | |||
| 316 | /* | 328 | /* |
| 317 | * Remove a event from the lists for its context. | 329 | * Remove a event from the lists for its context. |
| 318 | * Must be called with ctx->mutex and ctx->lock held. | 330 | * Must be called with ctx->mutex and ctx->lock held. |
| @@ -320,17 +332,22 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
| 320 | static void | 332 | static void |
| 321 | list_del_event(struct perf_event *event, struct perf_event_context *ctx) | 333 | list_del_event(struct perf_event *event, struct perf_event_context *ctx) |
| 322 | { | 334 | { |
| 323 | if (list_empty(&event->group_entry)) | 335 | /* |
| 336 | * We can have double detach due to exit/hot-unplug + close. | ||
| 337 | */ | ||
| 338 | if (!(event->attach_state & PERF_ATTACH_CONTEXT)) | ||
| 324 | return; | 339 | return; |
| 340 | |||
| 341 | event->attach_state &= ~PERF_ATTACH_CONTEXT; | ||
| 342 | |||
| 325 | ctx->nr_events--; | 343 | ctx->nr_events--; |
| 326 | if (event->attr.inherit_stat) | 344 | if (event->attr.inherit_stat) |
| 327 | ctx->nr_stat--; | 345 | ctx->nr_stat--; |
| 328 | 346 | ||
| 329 | list_del_init(&event->group_entry); | ||
| 330 | list_del_rcu(&event->event_entry); | 347 | list_del_rcu(&event->event_entry); |
| 331 | 348 | ||
| 332 | if (event->group_leader != event) | 349 | if (event->group_leader == event) |
| 333 | event->group_leader->nr_siblings--; | 350 | list_del_init(&event->group_entry); |
| 334 | 351 | ||
| 335 | update_group_times(event); | 352 | update_group_times(event); |
| 336 | 353 | ||
| @@ -345,21 +362,39 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
| 345 | event->state = PERF_EVENT_STATE_OFF; | 362 | event->state = PERF_EVENT_STATE_OFF; |
| 346 | } | 363 | } |
| 347 | 364 | ||
| 348 | static void | 365 | static void perf_group_detach(struct perf_event *event) |
| 349 | perf_destroy_group(struct perf_event *event, struct perf_event_context *ctx) | ||
| 350 | { | 366 | { |
| 351 | struct perf_event *sibling, *tmp; | 367 | struct perf_event *sibling, *tmp; |
| 368 | struct list_head *list = NULL; | ||
| 369 | |||
| 370 | /* | ||
| 371 | * We can have double detach due to exit/hot-unplug + close. | ||
| 372 | */ | ||
| 373 | if (!(event->attach_state & PERF_ATTACH_GROUP)) | ||
| 374 | return; | ||
| 375 | |||
| 376 | event->attach_state &= ~PERF_ATTACH_GROUP; | ||
| 377 | |||
| 378 | /* | ||
| 379 | * If this is a sibling, remove it from its group. | ||
| 380 | */ | ||
| 381 | if (event->group_leader != event) { | ||
| 382 | list_del_init(&event->group_entry); | ||
| 383 | event->group_leader->nr_siblings--; | ||
| 384 | return; | ||
| 385 | } | ||
| 386 | |||
| 387 | if (!list_empty(&event->group_entry)) | ||
| 388 | list = &event->group_entry; | ||
| 352 | 389 | ||
| 353 | /* | 390 | /* |
| 354 | * If this was a group event with sibling events then | 391 | * If this was a group event with sibling events then |
| 355 | * upgrade the siblings to singleton events by adding them | 392 | * upgrade the siblings to singleton events by adding them |
| 356 | * to the context list directly: | 393 | * to whatever list we are on. |
| 357 | */ | 394 | */ |
| 358 | list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { | 395 | list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { |
| 359 | struct list_head *list; | 396 | if (list) |
| 360 | 397 | list_move_tail(&sibling->group_entry, list); | |
| 361 | list = ctx_group_list(event, ctx); | ||
| 362 | list_move_tail(&sibling->group_entry, list); | ||
| 363 | sibling->group_leader = sibling; | 398 | sibling->group_leader = sibling; |
| 364 | 399 | ||
| 365 | /* Inherit group flags from the previous leader */ | 400 | /* Inherit group flags from the previous leader */ |
| @@ -652,8 +687,11 @@ group_sched_in(struct perf_event *group_event, | |||
| 652 | if (txn) | 687 | if (txn) |
| 653 | pmu->start_txn(pmu); | 688 | pmu->start_txn(pmu); |
| 654 | 689 | ||
| 655 | if (event_sched_in(group_event, cpuctx, ctx)) | 690 | if (event_sched_in(group_event, cpuctx, ctx)) { |
| 691 | if (txn) | ||
| 692 | pmu->cancel_txn(pmu); | ||
| 656 | return -EAGAIN; | 693 | return -EAGAIN; |
| 694 | } | ||
| 657 | 695 | ||
| 658 | /* | 696 | /* |
| 659 | * Schedule in siblings as one group (if any): | 697 | * Schedule in siblings as one group (if any): |
| @@ -675,9 +713,6 @@ group_sched_in(struct perf_event *group_event, | |||
| 675 | } | 713 | } |
| 676 | 714 | ||
| 677 | group_error: | 715 | group_error: |
| 678 | if (txn) | ||
| 679 | pmu->cancel_txn(pmu); | ||
| 680 | |||
| 681 | /* | 716 | /* |
| 682 | * Groups can be scheduled in as one unit only, so undo any | 717 | * Groups can be scheduled in as one unit only, so undo any |
| 683 | * partial group before returning: | 718 | * partial group before returning: |
| @@ -689,6 +724,9 @@ group_error: | |||
| 689 | } | 724 | } |
| 690 | event_sched_out(group_event, cpuctx, ctx); | 725 | event_sched_out(group_event, cpuctx, ctx); |
| 691 | 726 | ||
| 727 | if (txn) | ||
| 728 | pmu->cancel_txn(pmu); | ||
| 729 | |||
| 692 | return -EAGAIN; | 730 | return -EAGAIN; |
| 693 | } | 731 | } |
| 694 | 732 | ||
| @@ -727,6 +765,7 @@ static void add_event_to_ctx(struct perf_event *event, | |||
| 727 | struct perf_event_context *ctx) | 765 | struct perf_event_context *ctx) |
| 728 | { | 766 | { |
| 729 | list_add_event(event, ctx); | 767 | list_add_event(event, ctx); |
| 768 | perf_group_attach(event); | ||
| 730 | event->tstamp_enabled = ctx->time; | 769 | event->tstamp_enabled = ctx->time; |
| 731 | event->tstamp_running = ctx->time; | 770 | event->tstamp_running = ctx->time; |
| 732 | event->tstamp_stopped = ctx->time; | 771 | event->tstamp_stopped = ctx->time; |
| @@ -1468,6 +1507,9 @@ do { \ | |||
| 1468 | divisor = nsec * frequency; | 1507 | divisor = nsec * frequency; |
| 1469 | } | 1508 | } |
| 1470 | 1509 | ||
| 1510 | if (!divisor) | ||
| 1511 | return dividend; | ||
| 1512 | |||
| 1471 | return div64_u64(dividend, divisor); | 1513 | return div64_u64(dividend, divisor); |
| 1472 | } | 1514 | } |
| 1473 | 1515 | ||
| @@ -1490,7 +1532,7 @@ static int perf_event_start(struct perf_event *event) | |||
| 1490 | static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) | 1532 | static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) |
| 1491 | { | 1533 | { |
| 1492 | struct hw_perf_event *hwc = &event->hw; | 1534 | struct hw_perf_event *hwc = &event->hw; |
| 1493 | u64 period, sample_period; | 1535 | s64 period, sample_period; |
| 1494 | s64 delta; | 1536 | s64 delta; |
| 1495 | 1537 | ||
| 1496 | period = perf_calculate_period(event, nsec, count); | 1538 | period = perf_calculate_period(event, nsec, count); |
| @@ -1841,6 +1883,7 @@ static void free_event_rcu(struct rcu_head *head) | |||
| 1841 | } | 1883 | } |
| 1842 | 1884 | ||
| 1843 | static void perf_pending_sync(struct perf_event *event); | 1885 | static void perf_pending_sync(struct perf_event *event); |
| 1886 | static void perf_mmap_data_put(struct perf_mmap_data *data); | ||
| 1844 | 1887 | ||
| 1845 | static void free_event(struct perf_event *event) | 1888 | static void free_event(struct perf_event *event) |
| 1846 | { | 1889 | { |
| @@ -1856,9 +1899,9 @@ static void free_event(struct perf_event *event) | |||
| 1856 | atomic_dec(&nr_task_events); | 1899 | atomic_dec(&nr_task_events); |
| 1857 | } | 1900 | } |
| 1858 | 1901 | ||
| 1859 | if (event->output) { | 1902 | if (event->data) { |
| 1860 | fput(event->output->filp); | 1903 | perf_mmap_data_put(event->data); |
| 1861 | event->output = NULL; | 1904 | event->data = NULL; |
| 1862 | } | 1905 | } |
| 1863 | 1906 | ||
| 1864 | if (event->destroy) | 1907 | if (event->destroy) |
| @@ -1893,8 +1936,8 @@ int perf_event_release_kernel(struct perf_event *event) | |||
| 1893 | */ | 1936 | */ |
| 1894 | mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); | 1937 | mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); |
| 1895 | raw_spin_lock_irq(&ctx->lock); | 1938 | raw_spin_lock_irq(&ctx->lock); |
| 1939 | perf_group_detach(event); | ||
| 1896 | list_del_event(event, ctx); | 1940 | list_del_event(event, ctx); |
| 1897 | perf_destroy_group(event, ctx); | ||
| 1898 | raw_spin_unlock_irq(&ctx->lock); | 1941 | raw_spin_unlock_irq(&ctx->lock); |
| 1899 | mutex_unlock(&ctx->mutex); | 1942 | mutex_unlock(&ctx->mutex); |
| 1900 | 1943 | ||
| @@ -2175,7 +2218,27 @@ unlock: | |||
| 2175 | return ret; | 2218 | return ret; |
| 2176 | } | 2219 | } |
| 2177 | 2220 | ||
| 2178 | static int perf_event_set_output(struct perf_event *event, int output_fd); | 2221 | static const struct file_operations perf_fops; |
| 2222 | |||
| 2223 | static struct perf_event *perf_fget_light(int fd, int *fput_needed) | ||
| 2224 | { | ||
| 2225 | struct file *file; | ||
| 2226 | |||
| 2227 | file = fget_light(fd, fput_needed); | ||
| 2228 | if (!file) | ||
| 2229 | return ERR_PTR(-EBADF); | ||
| 2230 | |||
| 2231 | if (file->f_op != &perf_fops) { | ||
| 2232 | fput_light(file, *fput_needed); | ||
| 2233 | *fput_needed = 0; | ||
| 2234 | return ERR_PTR(-EBADF); | ||
| 2235 | } | ||
| 2236 | |||
| 2237 | return file->private_data; | ||
| 2238 | } | ||
| 2239 | |||
| 2240 | static int perf_event_set_output(struct perf_event *event, | ||
| 2241 | struct perf_event *output_event); | ||
| 2179 | static int perf_event_set_filter(struct perf_event *event, void __user *arg); | 2242 | static int perf_event_set_filter(struct perf_event *event, void __user *arg); |
| 2180 | 2243 | ||
| 2181 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 2244 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
| @@ -2202,7 +2265,23 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
| 2202 | return perf_event_period(event, (u64 __user *)arg); | 2265 | return perf_event_period(event, (u64 __user *)arg); |
| 2203 | 2266 | ||
| 2204 | case PERF_EVENT_IOC_SET_OUTPUT: | 2267 | case PERF_EVENT_IOC_SET_OUTPUT: |
| 2205 | return perf_event_set_output(event, arg); | 2268 | { |
| 2269 | struct perf_event *output_event = NULL; | ||
| 2270 | int fput_needed = 0; | ||
| 2271 | int ret; | ||
| 2272 | |||
| 2273 | if (arg != -1) { | ||
| 2274 | output_event = perf_fget_light(arg, &fput_needed); | ||
| 2275 | if (IS_ERR(output_event)) | ||
| 2276 | return PTR_ERR(output_event); | ||
| 2277 | } | ||
| 2278 | |||
| 2279 | ret = perf_event_set_output(event, output_event); | ||
| 2280 | if (output_event) | ||
| 2281 | fput_light(output_event->filp, fput_needed); | ||
| 2282 | |||
| 2283 | return ret; | ||
| 2284 | } | ||
| 2206 | 2285 | ||
| 2207 | case PERF_EVENT_IOC_SET_FILTER: | 2286 | case PERF_EVENT_IOC_SET_FILTER: |
| 2208 | return perf_event_set_filter(event, (void __user *)arg); | 2287 | return perf_event_set_filter(event, (void __user *)arg); |
| @@ -2335,8 +2414,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | |||
| 2335 | unsigned long size; | 2414 | unsigned long size; |
| 2336 | int i; | 2415 | int i; |
| 2337 | 2416 | ||
| 2338 | WARN_ON(atomic_read(&event->mmap_count)); | ||
| 2339 | |||
| 2340 | size = sizeof(struct perf_mmap_data); | 2417 | size = sizeof(struct perf_mmap_data); |
| 2341 | size += nr_pages * sizeof(void *); | 2418 | size += nr_pages * sizeof(void *); |
| 2342 | 2419 | ||
| @@ -2452,8 +2529,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | |||
| 2452 | unsigned long size; | 2529 | unsigned long size; |
| 2453 | void *all_buf; | 2530 | void *all_buf; |
| 2454 | 2531 | ||
| 2455 | WARN_ON(atomic_read(&event->mmap_count)); | ||
| 2456 | |||
| 2457 | size = sizeof(struct perf_mmap_data); | 2532 | size = sizeof(struct perf_mmap_data); |
| 2458 | size += sizeof(void *); | 2533 | size += sizeof(void *); |
| 2459 | 2534 | ||
| @@ -2536,7 +2611,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data) | |||
| 2536 | if (!data->watermark) | 2611 | if (!data->watermark) |
| 2537 | data->watermark = max_size / 2; | 2612 | data->watermark = max_size / 2; |
| 2538 | 2613 | ||
| 2539 | 2614 | atomic_set(&data->refcount, 1); | |
| 2540 | rcu_assign_pointer(event->data, data); | 2615 | rcu_assign_pointer(event->data, data); |
| 2541 | } | 2616 | } |
| 2542 | 2617 | ||
| @@ -2548,13 +2623,26 @@ static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head) | |||
| 2548 | perf_mmap_data_free(data); | 2623 | perf_mmap_data_free(data); |
| 2549 | } | 2624 | } |
| 2550 | 2625 | ||
| 2551 | static void perf_mmap_data_release(struct perf_event *event) | 2626 | static struct perf_mmap_data *perf_mmap_data_get(struct perf_event *event) |
| 2552 | { | 2627 | { |
| 2553 | struct perf_mmap_data *data = event->data; | 2628 | struct perf_mmap_data *data; |
| 2629 | |||
| 2630 | rcu_read_lock(); | ||
| 2631 | data = rcu_dereference(event->data); | ||
| 2632 | if (data) { | ||
| 2633 | if (!atomic_inc_not_zero(&data->refcount)) | ||
| 2634 | data = NULL; | ||
| 2635 | } | ||
| 2636 | rcu_read_unlock(); | ||
| 2637 | |||
| 2638 | return data; | ||
| 2639 | } | ||
| 2554 | 2640 | ||
| 2555 | WARN_ON(atomic_read(&event->mmap_count)); | 2641 | static void perf_mmap_data_put(struct perf_mmap_data *data) |
| 2642 | { | ||
| 2643 | if (!atomic_dec_and_test(&data->refcount)) | ||
| 2644 | return; | ||
| 2556 | 2645 | ||
| 2557 | rcu_assign_pointer(event->data, NULL); | ||
| 2558 | call_rcu(&data->rcu_head, perf_mmap_data_free_rcu); | 2646 | call_rcu(&data->rcu_head, perf_mmap_data_free_rcu); |
| 2559 | } | 2647 | } |
| 2560 | 2648 | ||
| @@ -2569,15 +2657,18 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
| 2569 | { | 2657 | { |
| 2570 | struct perf_event *event = vma->vm_file->private_data; | 2658 | struct perf_event *event = vma->vm_file->private_data; |
| 2571 | 2659 | ||
| 2572 | WARN_ON_ONCE(event->ctx->parent_ctx); | ||
| 2573 | if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { | 2660 | if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { |
| 2574 | unsigned long size = perf_data_size(event->data); | 2661 | unsigned long size = perf_data_size(event->data); |
| 2575 | struct user_struct *user = current_user(); | 2662 | struct user_struct *user = event->mmap_user; |
| 2663 | struct perf_mmap_data *data = event->data; | ||
| 2576 | 2664 | ||
| 2577 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); | 2665 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); |
| 2578 | vma->vm_mm->locked_vm -= event->data->nr_locked; | 2666 | vma->vm_mm->locked_vm -= event->mmap_locked; |
| 2579 | perf_mmap_data_release(event); | 2667 | rcu_assign_pointer(event->data, NULL); |
| 2580 | mutex_unlock(&event->mmap_mutex); | 2668 | mutex_unlock(&event->mmap_mutex); |
| 2669 | |||
| 2670 | perf_mmap_data_put(data); | ||
| 2671 | free_uid(user); | ||
| 2581 | } | 2672 | } |
| 2582 | } | 2673 | } |
| 2583 | 2674 | ||
| @@ -2629,13 +2720,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 2629 | 2720 | ||
| 2630 | WARN_ON_ONCE(event->ctx->parent_ctx); | 2721 | WARN_ON_ONCE(event->ctx->parent_ctx); |
| 2631 | mutex_lock(&event->mmap_mutex); | 2722 | mutex_lock(&event->mmap_mutex); |
| 2632 | if (event->output) { | 2723 | if (event->data) { |
| 2633 | ret = -EINVAL; | 2724 | if (event->data->nr_pages == nr_pages) |
| 2634 | goto unlock; | 2725 | atomic_inc(&event->data->refcount); |
| 2635 | } | 2726 | else |
| 2636 | |||
| 2637 | if (atomic_inc_not_zero(&event->mmap_count)) { | ||
| 2638 | if (nr_pages != event->data->nr_pages) | ||
| 2639 | ret = -EINVAL; | 2727 | ret = -EINVAL; |
| 2640 | goto unlock; | 2728 | goto unlock; |
| 2641 | } | 2729 | } |
| @@ -2667,21 +2755,23 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 2667 | WARN_ON(event->data); | 2755 | WARN_ON(event->data); |
| 2668 | 2756 | ||
| 2669 | data = perf_mmap_data_alloc(event, nr_pages); | 2757 | data = perf_mmap_data_alloc(event, nr_pages); |
| 2670 | ret = -ENOMEM; | 2758 | if (!data) { |
| 2671 | if (!data) | 2759 | ret = -ENOMEM; |
| 2672 | goto unlock; | 2760 | goto unlock; |
| 2761 | } | ||
| 2673 | 2762 | ||
| 2674 | ret = 0; | ||
| 2675 | perf_mmap_data_init(event, data); | 2763 | perf_mmap_data_init(event, data); |
| 2676 | |||
| 2677 | atomic_set(&event->mmap_count, 1); | ||
| 2678 | atomic_long_add(user_extra, &user->locked_vm); | ||
| 2679 | vma->vm_mm->locked_vm += extra; | ||
| 2680 | event->data->nr_locked = extra; | ||
| 2681 | if (vma->vm_flags & VM_WRITE) | 2764 | if (vma->vm_flags & VM_WRITE) |
| 2682 | event->data->writable = 1; | 2765 | event->data->writable = 1; |
| 2683 | 2766 | ||
| 2767 | atomic_long_add(user_extra, &user->locked_vm); | ||
| 2768 | event->mmap_locked = extra; | ||
| 2769 | event->mmap_user = get_current_user(); | ||
| 2770 | vma->vm_mm->locked_vm += event->mmap_locked; | ||
| 2771 | |||
| 2684 | unlock: | 2772 | unlock: |
| 2773 | if (!ret) | ||
| 2774 | atomic_inc(&event->mmap_count); | ||
| 2685 | mutex_unlock(&event->mmap_mutex); | 2775 | mutex_unlock(&event->mmap_mutex); |
| 2686 | 2776 | ||
| 2687 | vma->vm_flags |= VM_RESERVED; | 2777 | vma->vm_flags |= VM_RESERVED; |
| @@ -2977,6 +3067,7 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle, | |||
| 2977 | 3067 | ||
| 2978 | len -= size; | 3068 | len -= size; |
| 2979 | handle->addr += size; | 3069 | handle->addr += size; |
| 3070 | buf += size; | ||
| 2980 | handle->size -= size; | 3071 | handle->size -= size; |
| 2981 | if (!handle->size) { | 3072 | if (!handle->size) { |
| 2982 | struct perf_mmap_data *data = handle->data; | 3073 | struct perf_mmap_data *data = handle->data; |
| @@ -2993,7 +3084,6 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
| 2993 | struct perf_event *event, unsigned int size, | 3084 | struct perf_event *event, unsigned int size, |
| 2994 | int nmi, int sample) | 3085 | int nmi, int sample) |
| 2995 | { | 3086 | { |
| 2996 | struct perf_event *output_event; | ||
| 2997 | struct perf_mmap_data *data; | 3087 | struct perf_mmap_data *data; |
| 2998 | unsigned long tail, offset, head; | 3088 | unsigned long tail, offset, head; |
| 2999 | int have_lost; | 3089 | int have_lost; |
| @@ -3010,10 +3100,6 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
| 3010 | if (event->parent) | 3100 | if (event->parent) |
| 3011 | event = event->parent; | 3101 | event = event->parent; |
| 3012 | 3102 | ||
| 3013 | output_event = rcu_dereference(event->output); | ||
| 3014 | if (output_event) | ||
| 3015 | event = output_event; | ||
| 3016 | |||
| 3017 | data = rcu_dereference(event->data); | 3103 | data = rcu_dereference(event->data); |
| 3018 | if (!data) | 3104 | if (!data) |
| 3019 | goto out; | 3105 | goto out; |
| @@ -3972,13 +4058,6 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow, | |||
| 3972 | } | 4058 | } |
| 3973 | } | 4059 | } |
| 3974 | 4060 | ||
| 3975 | static void perf_swevent_unthrottle(struct perf_event *event) | ||
| 3976 | { | ||
| 3977 | /* | ||
| 3978 | * Nothing to do, we already reset hwc->interrupts. | ||
| 3979 | */ | ||
| 3980 | } | ||
| 3981 | |||
| 3982 | static void perf_swevent_add(struct perf_event *event, u64 nr, | 4061 | static void perf_swevent_add(struct perf_event *event, u64 nr, |
| 3983 | int nmi, struct perf_sample_data *data, | 4062 | int nmi, struct perf_sample_data *data, |
| 3984 | struct pt_regs *regs) | 4063 | struct pt_regs *regs) |
| @@ -4193,11 +4272,22 @@ static void perf_swevent_disable(struct perf_event *event) | |||
| 4193 | hlist_del_rcu(&event->hlist_entry); | 4272 | hlist_del_rcu(&event->hlist_entry); |
| 4194 | } | 4273 | } |
| 4195 | 4274 | ||
| 4275 | static void perf_swevent_void(struct perf_event *event) | ||
| 4276 | { | ||
| 4277 | } | ||
| 4278 | |||
| 4279 | static int perf_swevent_int(struct perf_event *event) | ||
| 4280 | { | ||
| 4281 | return 0; | ||
| 4282 | } | ||
| 4283 | |||
| 4196 | static const struct pmu perf_ops_generic = { | 4284 | static const struct pmu perf_ops_generic = { |
| 4197 | .enable = perf_swevent_enable, | 4285 | .enable = perf_swevent_enable, |
| 4198 | .disable = perf_swevent_disable, | 4286 | .disable = perf_swevent_disable, |
| 4287 | .start = perf_swevent_int, | ||
| 4288 | .stop = perf_swevent_void, | ||
| 4199 | .read = perf_swevent_read, | 4289 | .read = perf_swevent_read, |
| 4200 | .unthrottle = perf_swevent_unthrottle, | 4290 | .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */ |
| 4201 | }; | 4291 | }; |
| 4202 | 4292 | ||
| 4203 | /* | 4293 | /* |
| @@ -4478,8 +4568,10 @@ static int swevent_hlist_get(struct perf_event *event) | |||
| 4478 | static const struct pmu perf_ops_tracepoint = { | 4568 | static const struct pmu perf_ops_tracepoint = { |
| 4479 | .enable = perf_trace_enable, | 4569 | .enable = perf_trace_enable, |
| 4480 | .disable = perf_trace_disable, | 4570 | .disable = perf_trace_disable, |
| 4571 | .start = perf_swevent_int, | ||
| 4572 | .stop = perf_swevent_void, | ||
| 4481 | .read = perf_swevent_read, | 4573 | .read = perf_swevent_read, |
| 4482 | .unthrottle = perf_swevent_unthrottle, | 4574 | .unthrottle = perf_swevent_void, |
| 4483 | }; | 4575 | }; |
| 4484 | 4576 | ||
| 4485 | static int perf_tp_filter_match(struct perf_event *event, | 4577 | static int perf_tp_filter_match(struct perf_event *event, |
| @@ -4912,39 +5004,17 @@ err_size: | |||
| 4912 | goto out; | 5004 | goto out; |
| 4913 | } | 5005 | } |
| 4914 | 5006 | ||
| 4915 | static int perf_event_set_output(struct perf_event *event, int output_fd) | 5007 | static int |
| 5008 | perf_event_set_output(struct perf_event *event, struct perf_event *output_event) | ||
| 4916 | { | 5009 | { |
| 4917 | struct perf_event *output_event = NULL; | 5010 | struct perf_mmap_data *data = NULL, *old_data = NULL; |
| 4918 | struct file *output_file = NULL; | ||
| 4919 | struct perf_event *old_output; | ||
| 4920 | int fput_needed = 0; | ||
| 4921 | int ret = -EINVAL; | 5011 | int ret = -EINVAL; |
| 4922 | 5012 | ||
| 4923 | /* | 5013 | if (!output_event) |
| 4924 | * Don't allow output of inherited per-task events. This would | ||
| 4925 | * create performance issues due to cross cpu access. | ||
| 4926 | */ | ||
| 4927 | if (event->cpu == -1 && event->attr.inherit) | ||
| 4928 | return -EINVAL; | ||
| 4929 | |||
| 4930 | if (!output_fd) | ||
| 4931 | goto set; | 5014 | goto set; |
| 4932 | 5015 | ||
| 4933 | output_file = fget_light(output_fd, &fput_needed); | 5016 | /* don't allow circular references */ |
| 4934 | if (!output_file) | 5017 | if (event == output_event) |
| 4935 | return -EBADF; | ||
| 4936 | |||
| 4937 | if (output_file->f_op != &perf_fops) | ||
| 4938 | goto out; | ||
| 4939 | |||
| 4940 | output_event = output_file->private_data; | ||
| 4941 | |||
| 4942 | /* Don't chain output fds */ | ||
| 4943 | if (output_event->output) | ||
| 4944 | goto out; | ||
| 4945 | |||
| 4946 | /* Don't set an output fd when we already have an output channel */ | ||
| 4947 | if (event->data) | ||
| 4948 | goto out; | 5018 | goto out; |
| 4949 | 5019 | ||
| 4950 | /* | 5020 | /* |
| @@ -4959,26 +5029,28 @@ static int perf_event_set_output(struct perf_event *event, int output_fd) | |||
| 4959 | if (output_event->cpu == -1 && output_event->ctx != event->ctx) | 5029 | if (output_event->cpu == -1 && output_event->ctx != event->ctx) |
| 4960 | goto out; | 5030 | goto out; |
| 4961 | 5031 | ||
| 4962 | atomic_long_inc(&output_file->f_count); | ||
| 4963 | |||
| 4964 | set: | 5032 | set: |
| 4965 | mutex_lock(&event->mmap_mutex); | 5033 | mutex_lock(&event->mmap_mutex); |
| 4966 | old_output = event->output; | 5034 | /* Can't redirect output if we've got an active mmap() */ |
| 4967 | rcu_assign_pointer(event->output, output_event); | 5035 | if (atomic_read(&event->mmap_count)) |
| 4968 | mutex_unlock(&event->mmap_mutex); | 5036 | goto unlock; |
| 4969 | 5037 | ||
| 4970 | if (old_output) { | 5038 | if (output_event) { |
| 4971 | /* | 5039 | /* get the buffer we want to redirect to */ |
| 4972 | * we need to make sure no existing perf_output_*() | 5040 | data = perf_mmap_data_get(output_event); |
| 4973 | * is still referencing this event. | 5041 | if (!data) |
| 4974 | */ | 5042 | goto unlock; |
| 4975 | synchronize_rcu(); | ||
| 4976 | fput(old_output->filp); | ||
| 4977 | } | 5043 | } |
| 4978 | 5044 | ||
| 5045 | old_data = event->data; | ||
| 5046 | rcu_assign_pointer(event->data, data); | ||
| 4979 | ret = 0; | 5047 | ret = 0; |
| 5048 | unlock: | ||
| 5049 | mutex_unlock(&event->mmap_mutex); | ||
| 5050 | |||
| 5051 | if (old_data) | ||
| 5052 | perf_mmap_data_put(old_data); | ||
| 4980 | out: | 5053 | out: |
| 4981 | fput_light(output_file, fput_needed); | ||
| 4982 | return ret; | 5054 | return ret; |
| 4983 | } | 5055 | } |
| 4984 | 5056 | ||
| @@ -4994,7 +5066,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 4994 | struct perf_event_attr __user *, attr_uptr, | 5066 | struct perf_event_attr __user *, attr_uptr, |
| 4995 | pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) | 5067 | pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) |
| 4996 | { | 5068 | { |
| 4997 | struct perf_event *event, *group_leader; | 5069 | struct perf_event *event, *group_leader = NULL, *output_event = NULL; |
| 4998 | struct perf_event_attr attr; | 5070 | struct perf_event_attr attr; |
| 4999 | struct perf_event_context *ctx; | 5071 | struct perf_event_context *ctx; |
| 5000 | struct file *event_file = NULL; | 5072 | struct file *event_file = NULL; |
| @@ -5034,19 +5106,25 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 5034 | goto err_fd; | 5106 | goto err_fd; |
| 5035 | } | 5107 | } |
| 5036 | 5108 | ||
| 5109 | if (group_fd != -1) { | ||
| 5110 | group_leader = perf_fget_light(group_fd, &fput_needed); | ||
| 5111 | if (IS_ERR(group_leader)) { | ||
| 5112 | err = PTR_ERR(group_leader); | ||
| 5113 | goto err_put_context; | ||
| 5114 | } | ||
| 5115 | group_file = group_leader->filp; | ||
| 5116 | if (flags & PERF_FLAG_FD_OUTPUT) | ||
| 5117 | output_event = group_leader; | ||
| 5118 | if (flags & PERF_FLAG_FD_NO_GROUP) | ||
| 5119 | group_leader = NULL; | ||
| 5120 | } | ||
| 5121 | |||
| 5037 | /* | 5122 | /* |
| 5038 | * Look up the group leader (we will attach this event to it): | 5123 | * Look up the group leader (we will attach this event to it): |
| 5039 | */ | 5124 | */ |
| 5040 | group_leader = NULL; | 5125 | if (group_leader) { |
| 5041 | if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) { | ||
| 5042 | err = -EINVAL; | 5126 | err = -EINVAL; |
| 5043 | group_file = fget_light(group_fd, &fput_needed); | ||
| 5044 | if (!group_file) | ||
| 5045 | goto err_put_context; | ||
| 5046 | if (group_file->f_op != &perf_fops) | ||
| 5047 | goto err_put_context; | ||
| 5048 | 5127 | ||
| 5049 | group_leader = group_file->private_data; | ||
| 5050 | /* | 5128 | /* |
| 5051 | * Do not allow a recursive hierarchy (this new sibling | 5129 | * Do not allow a recursive hierarchy (this new sibling |
| 5052 | * becoming part of another group-sibling): | 5130 | * becoming part of another group-sibling): |
| @@ -5068,9 +5146,16 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 5068 | 5146 | ||
| 5069 | event = perf_event_alloc(&attr, cpu, ctx, group_leader, | 5147 | event = perf_event_alloc(&attr, cpu, ctx, group_leader, |
| 5070 | NULL, NULL, GFP_KERNEL); | 5148 | NULL, NULL, GFP_KERNEL); |
| 5071 | err = PTR_ERR(event); | 5149 | if (IS_ERR(event)) { |
| 5072 | if (IS_ERR(event)) | 5150 | err = PTR_ERR(event); |
| 5073 | goto err_put_context; | 5151 | goto err_put_context; |
| 5152 | } | ||
| 5153 | |||
| 5154 | if (output_event) { | ||
| 5155 | err = perf_event_set_output(event, output_event); | ||
| 5156 | if (err) | ||
| 5157 | goto err_free_put_context; | ||
| 5158 | } | ||
| 5074 | 5159 | ||
| 5075 | event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR); | 5160 | event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR); |
| 5076 | if (IS_ERR(event_file)) { | 5161 | if (IS_ERR(event_file)) { |
| @@ -5078,12 +5163,6 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 5078 | goto err_free_put_context; | 5163 | goto err_free_put_context; |
| 5079 | } | 5164 | } |
| 5080 | 5165 | ||
| 5081 | if (flags & PERF_FLAG_FD_OUTPUT) { | ||
| 5082 | err = perf_event_set_output(event, group_fd); | ||
| 5083 | if (err) | ||
| 5084 | goto err_fput_free_put_context; | ||
| 5085 | } | ||
| 5086 | |||
| 5087 | event->filp = event_file; | 5166 | event->filp = event_file; |
| 5088 | WARN_ON_ONCE(ctx->parent_ctx); | 5167 | WARN_ON_ONCE(ctx->parent_ctx); |
| 5089 | mutex_lock(&ctx->mutex); | 5168 | mutex_lock(&ctx->mutex); |
| @@ -5097,12 +5176,16 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 5097 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | 5176 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); |
| 5098 | mutex_unlock(¤t->perf_event_mutex); | 5177 | mutex_unlock(¤t->perf_event_mutex); |
| 5099 | 5178 | ||
| 5179 | /* | ||
| 5180 | * Drop the reference on the group_event after placing the | ||
| 5181 | * new event on the sibling_list. This ensures destruction | ||
| 5182 | * of the group leader will find the pointer to itself in | ||
| 5183 | * perf_group_detach(). | ||
| 5184 | */ | ||
| 5100 | fput_light(group_file, fput_needed); | 5185 | fput_light(group_file, fput_needed); |
| 5101 | fd_install(event_fd, event_file); | 5186 | fd_install(event_fd, event_file); |
| 5102 | return event_fd; | 5187 | return event_fd; |
| 5103 | 5188 | ||
| 5104 | err_fput_free_put_context: | ||
| 5105 | fput(event_file); | ||
| 5106 | err_free_put_context: | 5189 | err_free_put_context: |
| 5107 | free_event(event); | 5190 | free_event(event); |
| 5108 | err_put_context: | 5191 | err_put_context: |
| @@ -5420,6 +5503,7 @@ static void perf_free_event(struct perf_event *event, | |||
| 5420 | 5503 | ||
| 5421 | fput(parent->filp); | 5504 | fput(parent->filp); |
| 5422 | 5505 | ||
| 5506 | perf_group_detach(event); | ||
| 5423 | list_del_event(event, ctx); | 5507 | list_del_event(event, ctx); |
| 5424 | free_event(event); | 5508 | free_event(event); |
| 5425 | } | 5509 | } |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 5c36ea9d55d2..ca6066a6952e 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
| @@ -99,9 +99,13 @@ config PM_SLEEP_ADVANCED_DEBUG | |||
| 99 | depends on PM_ADVANCED_DEBUG | 99 | depends on PM_ADVANCED_DEBUG |
| 100 | default n | 100 | default n |
| 101 | 101 | ||
| 102 | config SUSPEND_NVS | ||
| 103 | bool | ||
| 104 | |||
| 102 | config SUSPEND | 105 | config SUSPEND |
| 103 | bool "Suspend to RAM and standby" | 106 | bool "Suspend to RAM and standby" |
| 104 | depends on PM && ARCH_SUSPEND_POSSIBLE | 107 | depends on PM && ARCH_SUSPEND_POSSIBLE |
| 108 | select SUSPEND_NVS if HAS_IOMEM | ||
| 105 | default y | 109 | default y |
| 106 | ---help--- | 110 | ---help--- |
| 107 | Allow the system to enter sleep states in which main memory is | 111 | Allow the system to enter sleep states in which main memory is |
| @@ -130,13 +134,10 @@ config SUSPEND_FREEZER | |||
| 130 | 134 | ||
| 131 | Turning OFF this setting is NOT recommended! If in doubt, say Y. | 135 | Turning OFF this setting is NOT recommended! If in doubt, say Y. |
| 132 | 136 | ||
| 133 | config HIBERNATION_NVS | ||
| 134 | bool | ||
| 135 | |||
| 136 | config HIBERNATION | 137 | config HIBERNATION |
| 137 | bool "Hibernation (aka 'suspend to disk')" | 138 | bool "Hibernation (aka 'suspend to disk')" |
| 138 | depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE | 139 | depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE |
| 139 | select HIBERNATION_NVS if HAS_IOMEM | 140 | select SUSPEND_NVS if HAS_IOMEM |
| 140 | ---help--- | 141 | ---help--- |
| 141 | Enable the suspend to disk (STD) functionality, which is usually | 142 | Enable the suspend to disk (STD) functionality, which is usually |
| 142 | called "hibernation" in user interfaces. STD checkpoints the | 143 | called "hibernation" in user interfaces. STD checkpoints the |
diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 524e058dcf06..f9063c6b185d 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile | |||
| @@ -10,6 +10,6 @@ obj-$(CONFIG_SUSPEND) += suspend.o | |||
| 10 | obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o | 10 | obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o |
| 11 | obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ | 11 | obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ |
| 12 | block_io.o | 12 | block_io.o |
| 13 | obj-$(CONFIG_HIBERNATION_NVS) += hibernate_nvs.o | 13 | obj-$(CONFIG_SUSPEND_NVS) += nvs.o |
| 14 | 14 | ||
| 15 | obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o | 15 | obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o |
diff --git a/kernel/power/hibernate_nvs.c b/kernel/power/nvs.c index fdcad9ed5a7b..1836db60bbb6 100644 --- a/kernel/power/hibernate_nvs.c +++ b/kernel/power/nvs.c | |||
| @@ -15,7 +15,7 @@ | |||
| 15 | 15 | ||
| 16 | /* | 16 | /* |
| 17 | * Platforms, like ACPI, may want us to save some memory used by them during | 17 | * Platforms, like ACPI, may want us to save some memory used by them during |
| 18 | * hibernation and to restore the contents of this memory during the subsequent | 18 | * suspend and to restore the contents of this memory during the subsequent |
| 19 | * resume. The code below implements a mechanism allowing us to do that. | 19 | * resume. The code below implements a mechanism allowing us to do that. |
| 20 | */ | 20 | */ |
| 21 | 21 | ||
| @@ -30,7 +30,7 @@ struct nvs_page { | |||
| 30 | static LIST_HEAD(nvs_list); | 30 | static LIST_HEAD(nvs_list); |
| 31 | 31 | ||
| 32 | /** | 32 | /** |
| 33 | * hibernate_nvs_register - register platform NVS memory region to save | 33 | * suspend_nvs_register - register platform NVS memory region to save |
| 34 | * @start - physical address of the region | 34 | * @start - physical address of the region |
| 35 | * @size - size of the region | 35 | * @size - size of the region |
| 36 | * | 36 | * |
| @@ -38,7 +38,7 @@ static LIST_HEAD(nvs_list); | |||
| 38 | * things so that the data from page-aligned addresses in this region will | 38 | * things so that the data from page-aligned addresses in this region will |
| 39 | * be copied into separate RAM pages. | 39 | * be copied into separate RAM pages. |
| 40 | */ | 40 | */ |
| 41 | int hibernate_nvs_register(unsigned long start, unsigned long size) | 41 | int suspend_nvs_register(unsigned long start, unsigned long size) |
| 42 | { | 42 | { |
| 43 | struct nvs_page *entry, *next; | 43 | struct nvs_page *entry, *next; |
| 44 | 44 | ||
| @@ -68,9 +68,9 @@ int hibernate_nvs_register(unsigned long start, unsigned long size) | |||
| 68 | } | 68 | } |
| 69 | 69 | ||
| 70 | /** | 70 | /** |
| 71 | * hibernate_nvs_free - free data pages allocated for saving NVS regions | 71 | * suspend_nvs_free - free data pages allocated for saving NVS regions |
| 72 | */ | 72 | */ |
| 73 | void hibernate_nvs_free(void) | 73 | void suspend_nvs_free(void) |
| 74 | { | 74 | { |
| 75 | struct nvs_page *entry; | 75 | struct nvs_page *entry; |
| 76 | 76 | ||
| @@ -86,16 +86,16 @@ void hibernate_nvs_free(void) | |||
| 86 | } | 86 | } |
| 87 | 87 | ||
| 88 | /** | 88 | /** |
| 89 | * hibernate_nvs_alloc - allocate memory necessary for saving NVS regions | 89 | * suspend_nvs_alloc - allocate memory necessary for saving NVS regions |
| 90 | */ | 90 | */ |
| 91 | int hibernate_nvs_alloc(void) | 91 | int suspend_nvs_alloc(void) |
| 92 | { | 92 | { |
| 93 | struct nvs_page *entry; | 93 | struct nvs_page *entry; |
| 94 | 94 | ||
| 95 | list_for_each_entry(entry, &nvs_list, node) { | 95 | list_for_each_entry(entry, &nvs_list, node) { |
| 96 | entry->data = (void *)__get_free_page(GFP_KERNEL); | 96 | entry->data = (void *)__get_free_page(GFP_KERNEL); |
| 97 | if (!entry->data) { | 97 | if (!entry->data) { |
| 98 | hibernate_nvs_free(); | 98 | suspend_nvs_free(); |
| 99 | return -ENOMEM; | 99 | return -ENOMEM; |
| 100 | } | 100 | } |
| 101 | } | 101 | } |
| @@ -103,9 +103,9 @@ int hibernate_nvs_alloc(void) | |||
| 103 | } | 103 | } |
| 104 | 104 | ||
| 105 | /** | 105 | /** |
| 106 | * hibernate_nvs_save - save NVS memory regions | 106 | * suspend_nvs_save - save NVS memory regions |
| 107 | */ | 107 | */ |
| 108 | void hibernate_nvs_save(void) | 108 | void suspend_nvs_save(void) |
| 109 | { | 109 | { |
| 110 | struct nvs_page *entry; | 110 | struct nvs_page *entry; |
| 111 | 111 | ||
| @@ -119,12 +119,12 @@ void hibernate_nvs_save(void) | |||
| 119 | } | 119 | } |
| 120 | 120 | ||
| 121 | /** | 121 | /** |
| 122 | * hibernate_nvs_restore - restore NVS memory regions | 122 | * suspend_nvs_restore - restore NVS memory regions |
| 123 | * | 123 | * |
| 124 | * This function is going to be called with interrupts disabled, so it | 124 | * This function is going to be called with interrupts disabled, so it |
| 125 | * cannot iounmap the virtual addresses used to access the NVS region. | 125 | * cannot iounmap the virtual addresses used to access the NVS region. |
| 126 | */ | 126 | */ |
| 127 | void hibernate_nvs_restore(void) | 127 | void suspend_nvs_restore(void) |
| 128 | { | 128 | { |
| 129 | struct nvs_page *entry; | 129 | struct nvs_page *entry; |
| 130 | 130 | ||
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 56e7dbb8b996..f37cb7dd4402 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
| @@ -16,6 +16,12 @@ | |||
| 16 | #include <linux/cpu.h> | 16 | #include <linux/cpu.h> |
| 17 | #include <linux/syscalls.h> | 17 | #include <linux/syscalls.h> |
| 18 | #include <linux/gfp.h> | 18 | #include <linux/gfp.h> |
| 19 | #include <linux/io.h> | ||
| 20 | #include <linux/kernel.h> | ||
| 21 | #include <linux/list.h> | ||
| 22 | #include <linux/mm.h> | ||
| 23 | #include <linux/slab.h> | ||
| 24 | #include <linux/suspend.h> | ||
| 19 | 25 | ||
| 20 | #include "power.h" | 26 | #include "power.h" |
| 21 | 27 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index d48408142503..f52a8801b7a2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -306,52 +306,6 @@ static int init_task_group_load = INIT_TASK_GROUP_LOAD; | |||
| 306 | */ | 306 | */ |
| 307 | struct task_group init_task_group; | 307 | struct task_group init_task_group; |
| 308 | 308 | ||
| 309 | /* return group to which a task belongs */ | ||
| 310 | static inline struct task_group *task_group(struct task_struct *p) | ||
| 311 | { | ||
| 312 | struct task_group *tg; | ||
| 313 | |||
| 314 | #ifdef CONFIG_CGROUP_SCHED | ||
| 315 | tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), | ||
| 316 | struct task_group, css); | ||
| 317 | #else | ||
| 318 | tg = &init_task_group; | ||
| 319 | #endif | ||
| 320 | return tg; | ||
| 321 | } | ||
| 322 | |||
| 323 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ | ||
| 324 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) | ||
| 325 | { | ||
| 326 | /* | ||
| 327 | * Strictly speaking this rcu_read_lock() is not needed since the | ||
| 328 | * task_group is tied to the cgroup, which in turn can never go away | ||
| 329 | * as long as there are tasks attached to it. | ||
| 330 | * | ||
| 331 | * However since task_group() uses task_subsys_state() which is an | ||
| 332 | * rcu_dereference() user, this quiets CONFIG_PROVE_RCU. | ||
| 333 | */ | ||
| 334 | rcu_read_lock(); | ||
| 335 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 336 | p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; | ||
| 337 | p->se.parent = task_group(p)->se[cpu]; | ||
| 338 | #endif | ||
| 339 | |||
| 340 | #ifdef CONFIG_RT_GROUP_SCHED | ||
| 341 | p->rt.rt_rq = task_group(p)->rt_rq[cpu]; | ||
| 342 | p->rt.parent = task_group(p)->rt_se[cpu]; | ||
| 343 | #endif | ||
| 344 | rcu_read_unlock(); | ||
| 345 | } | ||
| 346 | |||
| 347 | #else | ||
| 348 | |||
| 349 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } | ||
| 350 | static inline struct task_group *task_group(struct task_struct *p) | ||
| 351 | { | ||
| 352 | return NULL; | ||
| 353 | } | ||
| 354 | |||
| 355 | #endif /* CONFIG_CGROUP_SCHED */ | 309 | #endif /* CONFIG_CGROUP_SCHED */ |
| 356 | 310 | ||
| 357 | /* CFS-related fields in a runqueue */ | 311 | /* CFS-related fields in a runqueue */ |
| @@ -544,6 +498,8 @@ struct rq { | |||
| 544 | struct root_domain *rd; | 498 | struct root_domain *rd; |
| 545 | struct sched_domain *sd; | 499 | struct sched_domain *sd; |
| 546 | 500 | ||
| 501 | unsigned long cpu_power; | ||
| 502 | |||
| 547 | unsigned char idle_at_tick; | 503 | unsigned char idle_at_tick; |
| 548 | /* For active balancing */ | 504 | /* For active balancing */ |
| 549 | int post_schedule; | 505 | int post_schedule; |
| @@ -642,6 +598,49 @@ static inline int cpu_of(struct rq *rq) | |||
| 642 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) | 598 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) |
| 643 | #define raw_rq() (&__raw_get_cpu_var(runqueues)) | 599 | #define raw_rq() (&__raw_get_cpu_var(runqueues)) |
| 644 | 600 | ||
| 601 | #ifdef CONFIG_CGROUP_SCHED | ||
| 602 | |||
| 603 | /* | ||
| 604 | * Return the group to which this tasks belongs. | ||
| 605 | * | ||
| 606 | * We use task_subsys_state_check() and extend the RCU verification | ||
| 607 | * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach() | ||
| 608 | * holds that lock for each task it moves into the cgroup. Therefore | ||
| 609 | * by holding that lock, we pin the task to the current cgroup. | ||
| 610 | */ | ||
| 611 | static inline struct task_group *task_group(struct task_struct *p) | ||
| 612 | { | ||
| 613 | struct cgroup_subsys_state *css; | ||
| 614 | |||
| 615 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | ||
| 616 | lockdep_is_held(&task_rq(p)->lock)); | ||
| 617 | return container_of(css, struct task_group, css); | ||
| 618 | } | ||
| 619 | |||
| 620 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ | ||
| 621 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) | ||
| 622 | { | ||
| 623 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 624 | p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; | ||
| 625 | p->se.parent = task_group(p)->se[cpu]; | ||
| 626 | #endif | ||
| 627 | |||
| 628 | #ifdef CONFIG_RT_GROUP_SCHED | ||
| 629 | p->rt.rt_rq = task_group(p)->rt_rq[cpu]; | ||
| 630 | p->rt.parent = task_group(p)->rt_se[cpu]; | ||
| 631 | #endif | ||
| 632 | } | ||
| 633 | |||
| 634 | #else /* CONFIG_CGROUP_SCHED */ | ||
| 635 | |||
| 636 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } | ||
| 637 | static inline struct task_group *task_group(struct task_struct *p) | ||
| 638 | { | ||
| 639 | return NULL; | ||
| 640 | } | ||
| 641 | |||
| 642 | #endif /* CONFIG_CGROUP_SCHED */ | ||
| 643 | |||
| 645 | inline void update_rq_clock(struct rq *rq) | 644 | inline void update_rq_clock(struct rq *rq) |
| 646 | { | 645 | { |
| 647 | if (!rq->skip_clock_update) | 646 | if (!rq->skip_clock_update) |
| @@ -1255,6 +1254,12 @@ static void sched_avg_update(struct rq *rq) | |||
| 1255 | s64 period = sched_avg_period(); | 1254 | s64 period = sched_avg_period(); |
| 1256 | 1255 | ||
| 1257 | while ((s64)(rq->clock - rq->age_stamp) > period) { | 1256 | while ((s64)(rq->clock - rq->age_stamp) > period) { |
| 1257 | /* | ||
| 1258 | * Inline assembly required to prevent the compiler | ||
| 1259 | * optimising this loop into a divmod call. | ||
| 1260 | * See __iter_div_u64_rem() for another example of this. | ||
| 1261 | */ | ||
| 1262 | asm("" : "+rm" (rq->age_stamp)); | ||
| 1258 | rq->age_stamp += period; | 1263 | rq->age_stamp += period; |
| 1259 | rq->rt_avg /= 2; | 1264 | rq->rt_avg /= 2; |
| 1260 | } | 1265 | } |
| @@ -1499,24 +1504,9 @@ static unsigned long target_load(int cpu, int type) | |||
| 1499 | return max(rq->cpu_load[type-1], total); | 1504 | return max(rq->cpu_load[type-1], total); |
| 1500 | } | 1505 | } |
| 1501 | 1506 | ||
| 1502 | static struct sched_group *group_of(int cpu) | ||
| 1503 | { | ||
| 1504 | struct sched_domain *sd = rcu_dereference_sched(cpu_rq(cpu)->sd); | ||
| 1505 | |||
| 1506 | if (!sd) | ||
| 1507 | return NULL; | ||
| 1508 | |||
| 1509 | return sd->groups; | ||
| 1510 | } | ||
| 1511 | |||
| 1512 | static unsigned long power_of(int cpu) | 1507 | static unsigned long power_of(int cpu) |
| 1513 | { | 1508 | { |
| 1514 | struct sched_group *group = group_of(cpu); | 1509 | return cpu_rq(cpu)->cpu_power; |
| 1515 | |||
| 1516 | if (!group) | ||
| 1517 | return SCHED_LOAD_SCALE; | ||
| 1518 | |||
| 1519 | return group->cpu_power; | ||
| 1520 | } | 1510 | } |
| 1521 | 1511 | ||
| 1522 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | 1512 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); |
| @@ -1673,9 +1663,6 @@ static void update_shares(struct sched_domain *sd) | |||
| 1673 | 1663 | ||
| 1674 | static void update_h_load(long cpu) | 1664 | static void update_h_load(long cpu) |
| 1675 | { | 1665 | { |
| 1676 | if (root_task_group_empty()) | ||
| 1677 | return; | ||
| 1678 | |||
| 1679 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); | 1666 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); |
| 1680 | } | 1667 | } |
| 1681 | 1668 | ||
| @@ -1854,8 +1841,8 @@ static void dec_nr_running(struct rq *rq) | |||
| 1854 | static void set_load_weight(struct task_struct *p) | 1841 | static void set_load_weight(struct task_struct *p) |
| 1855 | { | 1842 | { |
| 1856 | if (task_has_rt_policy(p)) { | 1843 | if (task_has_rt_policy(p)) { |
| 1857 | p->se.load.weight = prio_to_weight[0] * 2; | 1844 | p->se.load.weight = 0; |
| 1858 | p->se.load.inv_weight = prio_to_wmult[0] >> 1; | 1845 | p->se.load.inv_weight = WMULT_CONST; |
| 1859 | return; | 1846 | return; |
| 1860 | } | 1847 | } |
| 1861 | 1848 | ||
| @@ -2507,7 +2494,16 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
| 2507 | if (p->sched_class->task_fork) | 2494 | if (p->sched_class->task_fork) |
| 2508 | p->sched_class->task_fork(p); | 2495 | p->sched_class->task_fork(p); |
| 2509 | 2496 | ||
| 2497 | /* | ||
| 2498 | * The child is not yet in the pid-hash so no cgroup attach races, | ||
| 2499 | * and the cgroup is pinned to this child due to cgroup_fork() | ||
| 2500 | * is ran before sched_fork(). | ||
| 2501 | * | ||
| 2502 | * Silence PROVE_RCU. | ||
| 2503 | */ | ||
| 2504 | rcu_read_lock(); | ||
| 2510 | set_task_cpu(p, cpu); | 2505 | set_task_cpu(p, cpu); |
| 2506 | rcu_read_unlock(); | ||
| 2511 | 2507 | ||
| 2512 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2508 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
| 2513 | if (likely(sched_info_on())) | 2509 | if (likely(sched_info_on())) |
| @@ -2877,9 +2873,9 @@ unsigned long nr_iowait(void) | |||
| 2877 | return sum; | 2873 | return sum; |
| 2878 | } | 2874 | } |
| 2879 | 2875 | ||
| 2880 | unsigned long nr_iowait_cpu(void) | 2876 | unsigned long nr_iowait_cpu(int cpu) |
| 2881 | { | 2877 | { |
| 2882 | struct rq *this = this_rq(); | 2878 | struct rq *this = cpu_rq(cpu); |
| 2883 | return atomic_read(&this->nr_iowait); | 2879 | return atomic_read(&this->nr_iowait); |
| 2884 | } | 2880 | } |
| 2885 | 2881 | ||
| @@ -4478,16 +4474,6 @@ recheck: | |||
| 4478 | } | 4474 | } |
| 4479 | 4475 | ||
| 4480 | if (user) { | 4476 | if (user) { |
| 4481 | #ifdef CONFIG_RT_GROUP_SCHED | ||
| 4482 | /* | ||
| 4483 | * Do not allow realtime tasks into groups that have no runtime | ||
| 4484 | * assigned. | ||
| 4485 | */ | ||
| 4486 | if (rt_bandwidth_enabled() && rt_policy(policy) && | ||
| 4487 | task_group(p)->rt_bandwidth.rt_runtime == 0) | ||
| 4488 | return -EPERM; | ||
| 4489 | #endif | ||
| 4490 | |||
| 4491 | retval = security_task_setscheduler(p, policy, param); | 4477 | retval = security_task_setscheduler(p, policy, param); |
| 4492 | if (retval) | 4478 | if (retval) |
| 4493 | return retval; | 4479 | return retval; |
| @@ -4503,6 +4489,22 @@ recheck: | |||
| 4503 | * runqueue lock must be held. | 4489 | * runqueue lock must be held. |
| 4504 | */ | 4490 | */ |
| 4505 | rq = __task_rq_lock(p); | 4491 | rq = __task_rq_lock(p); |
| 4492 | |||
| 4493 | #ifdef CONFIG_RT_GROUP_SCHED | ||
| 4494 | if (user) { | ||
| 4495 | /* | ||
| 4496 | * Do not allow realtime tasks into groups that have no runtime | ||
| 4497 | * assigned. | ||
| 4498 | */ | ||
| 4499 | if (rt_bandwidth_enabled() && rt_policy(policy) && | ||
| 4500 | task_group(p)->rt_bandwidth.rt_runtime == 0) { | ||
| 4501 | __task_rq_unlock(rq); | ||
| 4502 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
| 4503 | return -EPERM; | ||
| 4504 | } | ||
| 4505 | } | ||
| 4506 | #endif | ||
| 4507 | |||
| 4506 | /* recheck policy now with rq lock held */ | 4508 | /* recheck policy now with rq lock held */ |
| 4507 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { | 4509 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { |
| 4508 | policy = oldpolicy = -1; | 4510 | policy = oldpolicy = -1; |
| @@ -7605,6 +7607,7 @@ void __init sched_init(void) | |||
| 7605 | #ifdef CONFIG_SMP | 7607 | #ifdef CONFIG_SMP |
| 7606 | rq->sd = NULL; | 7608 | rq->sd = NULL; |
| 7607 | rq->rd = NULL; | 7609 | rq->rd = NULL; |
| 7610 | rq->cpu_power = SCHED_LOAD_SCALE; | ||
| 7608 | rq->post_schedule = 0; | 7611 | rq->post_schedule = 0; |
| 7609 | rq->active_balance = 0; | 7612 | rq->active_balance = 0; |
| 7610 | rq->next_balance = jiffies; | 7613 | rq->next_balance = jiffies; |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 217e4a9393e4..a878b5332daa 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -1225,7 +1225,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) | |||
| 1225 | unsigned long this_load, load; | 1225 | unsigned long this_load, load; |
| 1226 | int idx, this_cpu, prev_cpu; | 1226 | int idx, this_cpu, prev_cpu; |
| 1227 | unsigned long tl_per_task; | 1227 | unsigned long tl_per_task; |
| 1228 | unsigned int imbalance; | ||
| 1229 | struct task_group *tg; | 1228 | struct task_group *tg; |
| 1230 | unsigned long weight; | 1229 | unsigned long weight; |
| 1231 | int balanced; | 1230 | int balanced; |
| @@ -1241,6 +1240,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) | |||
| 1241 | * effect of the currently running task from the load | 1240 | * effect of the currently running task from the load |
| 1242 | * of the current CPU: | 1241 | * of the current CPU: |
| 1243 | */ | 1242 | */ |
| 1243 | rcu_read_lock(); | ||
| 1244 | if (sync) { | 1244 | if (sync) { |
| 1245 | tg = task_group(current); | 1245 | tg = task_group(current); |
| 1246 | weight = current->se.load.weight; | 1246 | weight = current->se.load.weight; |
| @@ -1252,8 +1252,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) | |||
| 1252 | tg = task_group(p); | 1252 | tg = task_group(p); |
| 1253 | weight = p->se.load.weight; | 1253 | weight = p->se.load.weight; |
| 1254 | 1254 | ||
| 1255 | imbalance = 100 + (sd->imbalance_pct - 100) / 2; | ||
| 1256 | |||
| 1257 | /* | 1255 | /* |
| 1258 | * In low-load situations, where prev_cpu is idle and this_cpu is idle | 1256 | * In low-load situations, where prev_cpu is idle and this_cpu is idle |
| 1259 | * due to the sync cause above having dropped this_load to 0, we'll | 1257 | * due to the sync cause above having dropped this_load to 0, we'll |
| @@ -1263,9 +1261,22 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) | |||
| 1263 | * Otherwise check if either cpus are near enough in load to allow this | 1261 | * Otherwise check if either cpus are near enough in load to allow this |
| 1264 | * task to be woken on this_cpu. | 1262 | * task to be woken on this_cpu. |
| 1265 | */ | 1263 | */ |
| 1266 | balanced = !this_load || | 1264 | if (this_load) { |
| 1267 | 100*(this_load + effective_load(tg, this_cpu, weight, weight)) <= | 1265 | unsigned long this_eff_load, prev_eff_load; |
| 1268 | imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); | 1266 | |
| 1267 | this_eff_load = 100; | ||
| 1268 | this_eff_load *= power_of(prev_cpu); | ||
| 1269 | this_eff_load *= this_load + | ||
| 1270 | effective_load(tg, this_cpu, weight, weight); | ||
| 1271 | |||
| 1272 | prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2; | ||
| 1273 | prev_eff_load *= power_of(this_cpu); | ||
| 1274 | prev_eff_load *= load + effective_load(tg, prev_cpu, 0, weight); | ||
| 1275 | |||
| 1276 | balanced = this_eff_load <= prev_eff_load; | ||
| 1277 | } else | ||
| 1278 | balanced = true; | ||
| 1279 | rcu_read_unlock(); | ||
| 1269 | 1280 | ||
| 1270 | /* | 1281 | /* |
| 1271 | * If the currently running task will sleep within | 1282 | * If the currently running task will sleep within |
| @@ -2298,6 +2309,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) | |||
| 2298 | if (!power) | 2309 | if (!power) |
| 2299 | power = 1; | 2310 | power = 1; |
| 2300 | 2311 | ||
| 2312 | cpu_rq(cpu)->cpu_power = power; | ||
| 2301 | sdg->cpu_power = power; | 2313 | sdg->cpu_power = power; |
| 2302 | } | 2314 | } |
| 2303 | 2315 | ||
diff --git a/kernel/softirq.c b/kernel/softirq.c index 825e1126008f..07b4f1b1a73a 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -850,7 +850,7 @@ static __init int spawn_ksoftirqd(void) | |||
| 850 | void *cpu = (void *)(long)smp_processor_id(); | 850 | void *cpu = (void *)(long)smp_processor_id(); |
| 851 | int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); | 851 | int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); |
| 852 | 852 | ||
| 853 | BUG_ON(err == NOTIFY_BAD); | 853 | BUG_ON(err != NOTIFY_OK); |
| 854 | cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); | 854 | cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); |
| 855 | register_cpu_notifier(&cpu_nfb); | 855 | register_cpu_notifier(&cpu_nfb); |
| 856 | return 0; | 856 | return 0; |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index b4e7431e7c78..70f8d90331e9 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
| @@ -321,7 +321,7 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, | |||
| 321 | 321 | ||
| 322 | #ifdef CONFIG_HOTPLUG_CPU | 322 | #ifdef CONFIG_HOTPLUG_CPU |
| 323 | case CPU_UP_CANCELED: | 323 | case CPU_UP_CANCELED: |
| 324 | case CPU_DEAD: | 324 | case CPU_POST_DEAD: |
| 325 | { | 325 | { |
| 326 | struct cpu_stop_work *work; | 326 | struct cpu_stop_work *work; |
| 327 | 327 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 997080f00e0b..d24f761f4876 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -1471,12 +1471,12 @@ static struct ctl_table fs_table[] = { | |||
| 1471 | }, | 1471 | }, |
| 1472 | #endif | 1472 | #endif |
| 1473 | { | 1473 | { |
| 1474 | .procname = "pipe-max-pages", | 1474 | .procname = "pipe-max-size", |
| 1475 | .data = &pipe_max_pages, | 1475 | .data = &pipe_max_size, |
| 1476 | .maxlen = sizeof(int), | 1476 | .maxlen = sizeof(int), |
| 1477 | .mode = 0644, | 1477 | .mode = 0644, |
| 1478 | .proc_handler = &proc_dointvec_minmax, | 1478 | .proc_handler = &pipe_proc_fn, |
| 1479 | .extra1 = &two, | 1479 | .extra1 = &pipe_min_size, |
| 1480 | }, | 1480 | }, |
| 1481 | /* | 1481 | /* |
| 1482 | * NOTE: do not add new entries to this table unless you have read | 1482 | * NOTE: do not add new entries to this table unless you have read |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 1d7b9bc1c034..813993b5fb61 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -154,14 +154,14 @@ static void tick_nohz_update_jiffies(ktime_t now) | |||
| 154 | * Updates the per cpu time idle statistics counters | 154 | * Updates the per cpu time idle statistics counters |
| 155 | */ | 155 | */ |
| 156 | static void | 156 | static void |
| 157 | update_ts_time_stats(struct tick_sched *ts, ktime_t now, u64 *last_update_time) | 157 | update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time) |
| 158 | { | 158 | { |
| 159 | ktime_t delta; | 159 | ktime_t delta; |
| 160 | 160 | ||
| 161 | if (ts->idle_active) { | 161 | if (ts->idle_active) { |
| 162 | delta = ktime_sub(now, ts->idle_entrytime); | 162 | delta = ktime_sub(now, ts->idle_entrytime); |
| 163 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); | 163 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); |
| 164 | if (nr_iowait_cpu() > 0) | 164 | if (nr_iowait_cpu(cpu) > 0) |
| 165 | ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); | 165 | ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); |
| 166 | ts->idle_entrytime = now; | 166 | ts->idle_entrytime = now; |
| 167 | } | 167 | } |
| @@ -175,19 +175,19 @@ static void tick_nohz_stop_idle(int cpu, ktime_t now) | |||
| 175 | { | 175 | { |
| 176 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 176 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
| 177 | 177 | ||
| 178 | update_ts_time_stats(ts, now, NULL); | 178 | update_ts_time_stats(cpu, ts, now, NULL); |
| 179 | ts->idle_active = 0; | 179 | ts->idle_active = 0; |
| 180 | 180 | ||
| 181 | sched_clock_idle_wakeup_event(0); | 181 | sched_clock_idle_wakeup_event(0); |
| 182 | } | 182 | } |
| 183 | 183 | ||
| 184 | static ktime_t tick_nohz_start_idle(struct tick_sched *ts) | 184 | static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) |
| 185 | { | 185 | { |
| 186 | ktime_t now; | 186 | ktime_t now; |
| 187 | 187 | ||
| 188 | now = ktime_get(); | 188 | now = ktime_get(); |
| 189 | 189 | ||
| 190 | update_ts_time_stats(ts, now, NULL); | 190 | update_ts_time_stats(cpu, ts, now, NULL); |
| 191 | 191 | ||
| 192 | ts->idle_entrytime = now; | 192 | ts->idle_entrytime = now; |
| 193 | ts->idle_active = 1; | 193 | ts->idle_active = 1; |
| @@ -216,7 +216,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) | |||
| 216 | if (!tick_nohz_enabled) | 216 | if (!tick_nohz_enabled) |
| 217 | return -1; | 217 | return -1; |
| 218 | 218 | ||
| 219 | update_ts_time_stats(ts, ktime_get(), last_update_time); | 219 | update_ts_time_stats(cpu, ts, ktime_get(), last_update_time); |
| 220 | 220 | ||
| 221 | return ktime_to_us(ts->idle_sleeptime); | 221 | return ktime_to_us(ts->idle_sleeptime); |
| 222 | } | 222 | } |
| @@ -242,7 +242,7 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) | |||
| 242 | if (!tick_nohz_enabled) | 242 | if (!tick_nohz_enabled) |
| 243 | return -1; | 243 | return -1; |
| 244 | 244 | ||
| 245 | update_ts_time_stats(ts, ktime_get(), last_update_time); | 245 | update_ts_time_stats(cpu, ts, ktime_get(), last_update_time); |
| 246 | 246 | ||
| 247 | return ktime_to_us(ts->iowait_sleeptime); | 247 | return ktime_to_us(ts->iowait_sleeptime); |
| 248 | } | 248 | } |
| @@ -284,7 +284,7 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
| 284 | */ | 284 | */ |
| 285 | ts->inidle = 1; | 285 | ts->inidle = 1; |
| 286 | 286 | ||
| 287 | now = tick_nohz_start_idle(ts); | 287 | now = tick_nohz_start_idle(cpu, ts); |
| 288 | 288 | ||
| 289 | /* | 289 | /* |
| 290 | * If this cpu is offline and it is the one which updates | 290 | * If this cpu is offline and it is the one which updates |
| @@ -315,9 +315,6 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
| 315 | goto end; | 315 | goto end; |
| 316 | } | 316 | } |
| 317 | 317 | ||
| 318 | if (nohz_ratelimit(cpu)) | ||
| 319 | goto end; | ||
| 320 | |||
| 321 | ts->idle_calls++; | 318 | ts->idle_calls++; |
| 322 | /* Read jiffies and the time when jiffies were updated last */ | 319 | /* Read jiffies and the time when jiffies were updated last */ |
| 323 | do { | 320 | do { |
| @@ -328,7 +325,7 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
| 328 | } while (read_seqretry(&xtime_lock, seq)); | 325 | } while (read_seqretry(&xtime_lock, seq)); |
| 329 | 326 | ||
| 330 | if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || | 327 | if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || |
| 331 | arch_needs_cpu(cpu)) { | 328 | arch_needs_cpu(cpu) || nohz_ratelimit(cpu)) { |
| 332 | next_jiffies = last_jiffies + 1; | 329 | next_jiffies = last_jiffies + 1; |
| 333 | delta_jiffies = 1; | 330 | delta_jiffies = 1; |
| 334 | } else { | 331 | } else { |
diff --git a/kernel/timer.c b/kernel/timer.c index 2454172a80d3..ee305c8d4e18 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -1717,7 +1717,7 @@ void __init init_timers(void) | |||
| 1717 | 1717 | ||
| 1718 | init_timer_stats(); | 1718 | init_timer_stats(); |
| 1719 | 1719 | ||
| 1720 | BUG_ON(err == NOTIFY_BAD); | 1720 | BUG_ON(err != NOTIFY_OK); |
| 1721 | register_cpu_notifier(&timers_nb); | 1721 | register_cpu_notifier(&timers_nb); |
| 1722 | open_softirq(TIMER_SOFTIRQ, run_timer_softirq); | 1722 | open_softirq(TIMER_SOFTIRQ, run_timer_softirq); |
| 1723 | } | 1723 | } |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 36ea2b65dcdc..638711c17504 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
| @@ -842,6 +842,7 @@ static void blk_add_trace_split(void *ignore, | |||
| 842 | 842 | ||
| 843 | /** | 843 | /** |
| 844 | * blk_add_trace_remap - Add a trace for a remap operation | 844 | * blk_add_trace_remap - Add a trace for a remap operation |
| 845 | * @ignore: trace callback data parameter (not used) | ||
| 845 | * @q: queue the io is for | 846 | * @q: queue the io is for |
| 846 | * @bio: the source bio | 847 | * @bio: the source bio |
| 847 | * @dev: target device | 848 | * @dev: target device |
| @@ -873,6 +874,7 @@ static void blk_add_trace_remap(void *ignore, | |||
| 873 | 874 | ||
| 874 | /** | 875 | /** |
| 875 | * blk_add_trace_rq_remap - Add a trace for a request-remap operation | 876 | * blk_add_trace_rq_remap - Add a trace for a request-remap operation |
| 877 | * @ignore: trace callback data parameter (not used) | ||
| 876 | * @q: queue the io is for | 878 | * @q: queue the io is for |
| 877 | * @rq: the source request | 879 | * @rq: the source request |
| 878 | * @dev: target device | 880 | * @dev: target device |
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index cb6f365016e4..8a2b73f7c068 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
| @@ -96,7 +96,9 @@ int perf_trace_init(struct perf_event *p_event) | |||
| 96 | mutex_lock(&event_mutex); | 96 | mutex_lock(&event_mutex); |
| 97 | list_for_each_entry(tp_event, &ftrace_events, list) { | 97 | list_for_each_entry(tp_event, &ftrace_events, list) { |
| 98 | if (tp_event->event.type == event_id && | 98 | if (tp_event->event.type == event_id && |
| 99 | tp_event->class && tp_event->class->perf_probe && | 99 | tp_event->class && |
| 100 | (tp_event->class->perf_probe || | ||
| 101 | tp_event->class->reg) && | ||
| 100 | try_module_get(tp_event->mod)) { | 102 | try_module_get(tp_event->mod)) { |
| 101 | ret = perf_trace_event_init(tp_event, p_event); | 103 | ret = perf_trace_event_init(tp_event, p_event); |
| 102 | break; | 104 | break; |
| @@ -116,7 +118,7 @@ int perf_trace_enable(struct perf_event *p_event) | |||
| 116 | if (WARN_ON_ONCE(!list)) | 118 | if (WARN_ON_ONCE(!list)) |
| 117 | return -EINVAL; | 119 | return -EINVAL; |
| 118 | 120 | ||
| 119 | list = per_cpu_ptr(list, smp_processor_id()); | 121 | list = this_cpu_ptr(list); |
| 120 | hlist_add_head_rcu(&p_event->hlist_entry, list); | 122 | hlist_add_head_rcu(&p_event->hlist_entry, list); |
| 121 | 123 | ||
| 122 | return 0; | 124 | return 0; |
| @@ -132,8 +134,9 @@ void perf_trace_destroy(struct perf_event *p_event) | |||
| 132 | struct ftrace_event_call *tp_event = p_event->tp_event; | 134 | struct ftrace_event_call *tp_event = p_event->tp_event; |
| 133 | int i; | 135 | int i; |
| 134 | 136 | ||
| 137 | mutex_lock(&event_mutex); | ||
| 135 | if (--tp_event->perf_refcount > 0) | 138 | if (--tp_event->perf_refcount > 0) |
| 136 | return; | 139 | goto out; |
| 137 | 140 | ||
| 138 | if (tp_event->class->reg) | 141 | if (tp_event->class->reg) |
| 139 | tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER); | 142 | tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER); |
| @@ -142,6 +145,12 @@ void perf_trace_destroy(struct perf_event *p_event) | |||
| 142 | tp_event->class->perf_probe, | 145 | tp_event->class->perf_probe, |
| 143 | tp_event); | 146 | tp_event); |
| 144 | 147 | ||
| 148 | /* | ||
| 149 | * Ensure our callback won't be called anymore. See | ||
| 150 | * tracepoint_probe_unregister() and __DO_TRACE(). | ||
| 151 | */ | ||
| 152 | synchronize_sched(); | ||
| 153 | |||
| 145 | free_percpu(tp_event->perf_events); | 154 | free_percpu(tp_event->perf_events); |
| 146 | tp_event->perf_events = NULL; | 155 | tp_event->perf_events = NULL; |
| 147 | 156 | ||
| @@ -151,6 +160,8 @@ void perf_trace_destroy(struct perf_event *p_event) | |||
| 151 | perf_trace_buf[i] = NULL; | 160 | perf_trace_buf[i] = NULL; |
| 152 | } | 161 | } |
| 153 | } | 162 | } |
| 163 | out: | ||
| 164 | mutex_unlock(&event_mutex); | ||
| 154 | } | 165 | } |
| 155 | 166 | ||
| 156 | __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | 167 | __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, |
| @@ -169,7 +180,7 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | |||
| 169 | if (*rctxp < 0) | 180 | if (*rctxp < 0) |
| 170 | return NULL; | 181 | return NULL; |
| 171 | 182 | ||
| 172 | raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id()); | 183 | raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]); |
| 173 | 184 | ||
| 174 | /* zero the dead bytes from align to not leak stack to user */ | 185 | /* zero the dead bytes from align to not leak stack to user */ |
| 175 | memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); | 186 | memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index faf7cefd15da..f52b5f50299d 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
| @@ -1359,7 +1359,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, | |||
| 1359 | for (i = 0; i < tp->nr_args; i++) | 1359 | for (i = 0; i < tp->nr_args; i++) |
| 1360 | call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); | 1360 | call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); |
| 1361 | 1361 | ||
| 1362 | head = per_cpu_ptr(call->perf_events, smp_processor_id()); | 1362 | head = this_cpu_ptr(call->perf_events); |
| 1363 | perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); | 1363 | perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); |
| 1364 | } | 1364 | } |
| 1365 | 1365 | ||
| @@ -1392,7 +1392,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, | |||
| 1392 | for (i = 0; i < tp->nr_args; i++) | 1392 | for (i = 0; i < tp->nr_args; i++) |
| 1393 | call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); | 1393 | call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); |
| 1394 | 1394 | ||
| 1395 | head = per_cpu_ptr(call->perf_events, smp_processor_id()); | 1395 | head = this_cpu_ptr(call->perf_events); |
| 1396 | perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); | 1396 | perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); |
| 1397 | } | 1397 | } |
| 1398 | 1398 | ||
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index d2c859cec9ea..34e35804304b 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
| @@ -519,7 +519,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) | |||
| 519 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, | 519 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, |
| 520 | (unsigned long *)&rec->args); | 520 | (unsigned long *)&rec->args); |
| 521 | 521 | ||
| 522 | head = per_cpu_ptr(sys_data->enter_event->perf_events, smp_processor_id()); | 522 | head = this_cpu_ptr(sys_data->enter_event->perf_events); |
| 523 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); | 523 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); |
| 524 | } | 524 | } |
| 525 | 525 | ||
| @@ -595,7 +595,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) | |||
| 595 | rec->nr = syscall_nr; | 595 | rec->nr = syscall_nr; |
| 596 | rec->ret = syscall_get_return_value(current, regs); | 596 | rec->ret = syscall_get_return_value(current, regs); |
| 597 | 597 | ||
| 598 | head = per_cpu_ptr(sys_data->exit_event->perf_events, smp_processor_id()); | 598 | head = this_cpu_ptr(sys_data->exit_event->perf_events); |
| 599 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); | 599 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); |
| 600 | } | 600 | } |
| 601 | 601 | ||
