aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c2
-rw-r--r--kernel/cpu.c2
-rw-r--r--kernel/debug/kdb/kdb_main.c12
-rw-r--r--kernel/exec_domain.c18
-rw-r--r--kernel/module.c327
-rw-r--r--kernel/perf_event.c356
-rw-r--r--kernel/sched.c24
-rw-r--r--kernel/sched_fair.c22
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/stop_machine.c2
-rw-r--r--kernel/sysctl.c8
-rw-r--r--kernel/timer.c2
-rw-r--r--kernel/trace/blktrace.c2
-rw-r--r--kernel/trace/trace_event_perf.c15
-rw-r--r--kernel/trace/trace_kprobe.c4
-rw-r--r--kernel/trace/trace_syscalls.c4
16 files changed, 494 insertions, 308 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 422cb19f156e..3ac6f5b0a64b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4598,7 +4598,7 @@ static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
4598 parent_css = parent->subsys[subsys_id]; 4598 parent_css = parent->subsys[subsys_id];
4599 child_css = child->subsys[subsys_id]; 4599 child_css = child->subsys[subsys_id];
4600 parent_id = parent_css->id; 4600 parent_id = parent_css->id;
4601 depth = parent_id->depth; 4601 depth = parent_id->depth + 1;
4602 4602
4603 child_id = get_new_cssid(ss, depth); 4603 child_id = get_new_cssid(ss, depth);
4604 if (IS_ERR(child_id)) 4604 if (IS_ERR(child_id))
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8b92539b4754..97d1b426a4ac 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -34,7 +34,7 @@ void cpu_maps_update_done(void)
34 mutex_unlock(&cpu_add_remove_lock); 34 mutex_unlock(&cpu_add_remove_lock);
35} 35}
36 36
37static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain); 37static RAW_NOTIFIER_HEAD(cpu_chain);
38 38
39/* If set, cpu_up and cpu_down will return -EBUSY and do nothing. 39/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
40 * Should always be manipulated under cpu_add_remove_lock 40 * Should always be manipulated under cpu_add_remove_lock
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index b724c791b6d4..184cd8209c36 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1857,12 +1857,6 @@ static int kdb_ef(int argc, const char **argv)
1857} 1857}
1858 1858
1859#if defined(CONFIG_MODULES) 1859#if defined(CONFIG_MODULES)
1860/* modules using other modules */
1861struct module_use {
1862 struct list_head list;
1863 struct module *module_which_uses;
1864};
1865
1866/* 1860/*
1867 * kdb_lsmod - This function implements the 'lsmod' command. Lists 1861 * kdb_lsmod - This function implements the 'lsmod' command. Lists
1868 * currently loaded kernel modules. 1862 * currently loaded kernel modules.
@@ -1894,9 +1888,9 @@ static int kdb_lsmod(int argc, const char **argv)
1894 { 1888 {
1895 struct module_use *use; 1889 struct module_use *use;
1896 kdb_printf(" [ "); 1890 kdb_printf(" [ ");
1897 list_for_each_entry(use, &mod->modules_which_use_me, 1891 list_for_each_entry(use, &mod->source_list,
1898 list) 1892 source_list)
1899 kdb_printf("%s ", use->module_which_uses->name); 1893 kdb_printf("%s ", use->target->name);
1900 kdb_printf("]\n"); 1894 kdb_printf("]\n");
1901 } 1895 }
1902#endif 1896#endif
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index c35452cadded..dd62f8e714ca 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -27,7 +27,7 @@ static struct exec_domain *exec_domains = &default_exec_domain;
27static DEFINE_RWLOCK(exec_domains_lock); 27static DEFINE_RWLOCK(exec_domains_lock);
28 28
29 29
30static u_long ident_map[32] = { 30static unsigned long ident_map[32] = {
31 0, 1, 2, 3, 4, 5, 6, 7, 31 0, 1, 2, 3, 4, 5, 6, 7,
32 8, 9, 10, 11, 12, 13, 14, 15, 32 8, 9, 10, 11, 12, 13, 14, 15,
33 16, 17, 18, 19, 20, 21, 22, 23, 33 16, 17, 18, 19, 20, 21, 22, 23,
@@ -56,10 +56,10 @@ default_handler(int segment, struct pt_regs *regp)
56} 56}
57 57
58static struct exec_domain * 58static struct exec_domain *
59lookup_exec_domain(u_long personality) 59lookup_exec_domain(unsigned int personality)
60{ 60{
61 struct exec_domain * ep; 61 unsigned int pers = personality(personality);
62 u_long pers = personality(personality); 62 struct exec_domain *ep;
63 63
64 read_lock(&exec_domains_lock); 64 read_lock(&exec_domains_lock);
65 for (ep = exec_domains; ep; ep = ep->next) { 65 for (ep = exec_domains; ep; ep = ep->next) {
@@ -70,7 +70,7 @@ lookup_exec_domain(u_long personality)
70 70
71#ifdef CONFIG_MODULES 71#ifdef CONFIG_MODULES
72 read_unlock(&exec_domains_lock); 72 read_unlock(&exec_domains_lock);
73 request_module("personality-%ld", pers); 73 request_module("personality-%d", pers);
74 read_lock(&exec_domains_lock); 74 read_lock(&exec_domains_lock);
75 75
76 for (ep = exec_domains; ep; ep = ep->next) { 76 for (ep = exec_domains; ep; ep = ep->next) {
@@ -135,7 +135,7 @@ unregister:
135} 135}
136 136
137int 137int
138__set_personality(u_long personality) 138__set_personality(unsigned int personality)
139{ 139{
140 struct exec_domain *ep, *oep; 140 struct exec_domain *ep, *oep;
141 141
@@ -188,9 +188,9 @@ static int __init proc_execdomains_init(void)
188module_init(proc_execdomains_init); 188module_init(proc_execdomains_init);
189#endif 189#endif
190 190
191SYSCALL_DEFINE1(personality, u_long, personality) 191SYSCALL_DEFINE1(personality, unsigned int, personality)
192{ 192{
193 u_long old = current->personality; 193 unsigned int old = current->personality;
194 194
195 if (personality != 0xffffffff) { 195 if (personality != 0xffffffff) {
196 set_personality(personality); 196 set_personality(personality);
@@ -198,7 +198,7 @@ SYSCALL_DEFINE1(personality, u_long, personality)
198 return -EINVAL; 198 return -EINVAL;
199 } 199 }
200 200
201 return (long)old; 201 return old;
202} 202}
203 203
204 204
diff --git a/kernel/module.c b/kernel/module.c
index 333fbcc96978..8c6b42840dd1 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -72,7 +72,11 @@
72/* If this is set, the section belongs in the init part of the module */ 72/* If this is set, the section belongs in the init part of the module */
73#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) 73#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
74 74
75/* List of modules, protected by module_mutex or preempt_disable 75/*
76 * Mutex protects:
77 * 1) List of modules (also safely readable with preempt_disable),
78 * 2) module_use links,
79 * 3) module_addr_min/module_addr_max.
76 * (delete uses stop_machine/add uses RCU list operations). */ 80 * (delete uses stop_machine/add uses RCU list operations). */
77DEFINE_MUTEX(module_mutex); 81DEFINE_MUTEX(module_mutex);
78EXPORT_SYMBOL_GPL(module_mutex); 82EXPORT_SYMBOL_GPL(module_mutex);
@@ -90,7 +94,8 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq);
90 94
91static BLOCKING_NOTIFIER_HEAD(module_notify_list); 95static BLOCKING_NOTIFIER_HEAD(module_notify_list);
92 96
93/* Bounds of module allocation, for speeding __module_address */ 97/* Bounds of module allocation, for speeding __module_address.
98 * Protected by module_mutex. */
94static unsigned long module_addr_min = -1UL, module_addr_max = 0; 99static unsigned long module_addr_min = -1UL, module_addr_max = 0;
95 100
96int register_module_notifier(struct notifier_block * nb) 101int register_module_notifier(struct notifier_block * nb)
@@ -329,7 +334,7 @@ static bool find_symbol_in_section(const struct symsearch *syms,
329} 334}
330 335
331/* Find a symbol and return it, along with, (optional) crc and 336/* Find a symbol and return it, along with, (optional) crc and
332 * (optional) module which owns it */ 337 * (optional) module which owns it. Needs preempt disabled or module_mutex. */
333const struct kernel_symbol *find_symbol(const char *name, 338const struct kernel_symbol *find_symbol(const char *name,
334 struct module **owner, 339 struct module **owner,
335 const unsigned long **crc, 340 const unsigned long **crc,
@@ -403,7 +408,7 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr,
403 Elf_Shdr *sechdrs, 408 Elf_Shdr *sechdrs,
404 const char *secstrings) 409 const char *secstrings)
405{ 410{
406 return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); 411 return find_sec(hdr, sechdrs, secstrings, ".data..percpu");
407} 412}
408 413
409static void percpu_modcopy(struct module *mod, 414static void percpu_modcopy(struct module *mod,
@@ -523,7 +528,8 @@ static void module_unload_init(struct module *mod)
523{ 528{
524 int cpu; 529 int cpu;
525 530
526 INIT_LIST_HEAD(&mod->modules_which_use_me); 531 INIT_LIST_HEAD(&mod->source_list);
532 INIT_LIST_HEAD(&mod->target_list);
527 for_each_possible_cpu(cpu) { 533 for_each_possible_cpu(cpu) {
528 per_cpu_ptr(mod->refptr, cpu)->incs = 0; 534 per_cpu_ptr(mod->refptr, cpu)->incs = 0;
529 per_cpu_ptr(mod->refptr, cpu)->decs = 0; 535 per_cpu_ptr(mod->refptr, cpu)->decs = 0;
@@ -535,20 +541,13 @@ static void module_unload_init(struct module *mod)
535 mod->waiter = current; 541 mod->waiter = current;
536} 542}
537 543
538/* modules using other modules */
539struct module_use
540{
541 struct list_head list;
542 struct module *module_which_uses;
543};
544
545/* Does a already use b? */ 544/* Does a already use b? */
546static int already_uses(struct module *a, struct module *b) 545static int already_uses(struct module *a, struct module *b)
547{ 546{
548 struct module_use *use; 547 struct module_use *use;
549 548
550 list_for_each_entry(use, &b->modules_which_use_me, list) { 549 list_for_each_entry(use, &b->source_list, source_list) {
551 if (use->module_which_uses == a) { 550 if (use->source == a) {
552 DEBUGP("%s uses %s!\n", a->name, b->name); 551 DEBUGP("%s uses %s!\n", a->name, b->name);
553 return 1; 552 return 1;
554 } 553 }
@@ -557,62 +556,68 @@ static int already_uses(struct module *a, struct module *b)
557 return 0; 556 return 0;
558} 557}
559 558
560/* Module a uses b */ 559/*
561int use_module(struct module *a, struct module *b) 560 * Module a uses b
561 * - we add 'a' as a "source", 'b' as a "target" of module use
562 * - the module_use is added to the list of 'b' sources (so
563 * 'b' can walk the list to see who sourced them), and of 'a'
564 * targets (so 'a' can see what modules it targets).
565 */
566static int add_module_usage(struct module *a, struct module *b)
562{ 567{
563 struct module_use *use; 568 struct module_use *use;
564 int no_warn, err;
565 569
566 if (b == NULL || already_uses(a, b)) return 1; 570 DEBUGP("Allocating new usage for %s.\n", a->name);
571 use = kmalloc(sizeof(*use), GFP_ATOMIC);
572 if (!use) {
573 printk(KERN_WARNING "%s: out of memory loading\n", a->name);
574 return -ENOMEM;
575 }
576
577 use->source = a;
578 use->target = b;
579 list_add(&use->source_list, &b->source_list);
580 list_add(&use->target_list, &a->target_list);
581 return 0;
582}
583
584/* Module a uses b: caller needs module_mutex() */
585int ref_module(struct module *a, struct module *b)
586{
587 int err;
567 588
568 /* If we're interrupted or time out, we fail. */ 589 if (b == NULL || already_uses(a, b))
569 if (wait_event_interruptible_timeout(
570 module_wq, (err = strong_try_module_get(b)) != -EBUSY,
571 30 * HZ) <= 0) {
572 printk("%s: gave up waiting for init of module %s.\n",
573 a->name, b->name);
574 return 0; 590 return 0;
575 }
576 591
577 /* If strong_try_module_get() returned a different error, we fail. */ 592 /* If module isn't available, we fail. */
593 err = strong_try_module_get(b);
578 if (err) 594 if (err)
579 return 0; 595 return err;
580 596
581 DEBUGP("Allocating new usage for %s.\n", a->name); 597 err = add_module_usage(a, b);
582 use = kmalloc(sizeof(*use), GFP_ATOMIC); 598 if (err) {
583 if (!use) {
584 printk("%s: out of memory loading\n", a->name);
585 module_put(b); 599 module_put(b);
586 return 0; 600 return err;
587 } 601 }
588 602 return 0;
589 use->module_which_uses = a;
590 list_add(&use->list, &b->modules_which_use_me);
591 no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name);
592 return 1;
593} 603}
594EXPORT_SYMBOL_GPL(use_module); 604EXPORT_SYMBOL_GPL(ref_module);
595 605
596/* Clear the unload stuff of the module. */ 606/* Clear the unload stuff of the module. */
597static void module_unload_free(struct module *mod) 607static void module_unload_free(struct module *mod)
598{ 608{
599 struct module *i; 609 struct module_use *use, *tmp;
600 610
601 list_for_each_entry(i, &modules, list) { 611 mutex_lock(&module_mutex);
602 struct module_use *use; 612 list_for_each_entry_safe(use, tmp, &mod->target_list, target_list) {
603 613 struct module *i = use->target;
604 list_for_each_entry(use, &i->modules_which_use_me, list) { 614 DEBUGP("%s unusing %s\n", mod->name, i->name);
605 if (use->module_which_uses == mod) { 615 module_put(i);
606 DEBUGP("%s unusing %s\n", mod->name, i->name); 616 list_del(&use->source_list);
607 module_put(i); 617 list_del(&use->target_list);
608 list_del(&use->list); 618 kfree(use);
609 kfree(use);
610 sysfs_remove_link(i->holders_dir, mod->name);
611 /* There can be at most one match. */
612 break;
613 }
614 }
615 } 619 }
620 mutex_unlock(&module_mutex);
616} 621}
617 622
618#ifdef CONFIG_MODULE_FORCE_UNLOAD 623#ifdef CONFIG_MODULE_FORCE_UNLOAD
@@ -735,7 +740,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
735 goto out; 740 goto out;
736 } 741 }
737 742
738 if (!list_empty(&mod->modules_which_use_me)) { 743 if (!list_empty(&mod->source_list)) {
739 /* Other modules depend on us: get rid of them first. */ 744 /* Other modules depend on us: get rid of them first. */
740 ret = -EWOULDBLOCK; 745 ret = -EWOULDBLOCK;
741 goto out; 746 goto out;
@@ -779,13 +784,14 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
779 blocking_notifier_call_chain(&module_notify_list, 784 blocking_notifier_call_chain(&module_notify_list,
780 MODULE_STATE_GOING, mod); 785 MODULE_STATE_GOING, mod);
781 async_synchronize_full(); 786 async_synchronize_full();
782 mutex_lock(&module_mutex); 787
783 /* Store the name of the last unloaded module for diagnostic purposes */ 788 /* Store the name of the last unloaded module for diagnostic purposes */
784 strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); 789 strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
785 ddebug_remove_module(mod->name); 790 ddebug_remove_module(mod->name);
786 free_module(mod);
787 791
788 out: 792 free_module(mod);
793 return 0;
794out:
789 mutex_unlock(&module_mutex); 795 mutex_unlock(&module_mutex);
790 return ret; 796 return ret;
791} 797}
@@ -799,9 +805,9 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod)
799 805
800 /* Always include a trailing , so userspace can differentiate 806 /* Always include a trailing , so userspace can differentiate
801 between this and the old multi-field proc format. */ 807 between this and the old multi-field proc format. */
802 list_for_each_entry(use, &mod->modules_which_use_me, list) { 808 list_for_each_entry(use, &mod->source_list, source_list) {
803 printed_something = 1; 809 printed_something = 1;
804 seq_printf(m, "%s,", use->module_which_uses->name); 810 seq_printf(m, "%s,", use->source->name);
805 } 811 }
806 812
807 if (mod->init != NULL && mod->exit == NULL) { 813 if (mod->init != NULL && mod->exit == NULL) {
@@ -880,11 +886,11 @@ static inline void module_unload_free(struct module *mod)
880{ 886{
881} 887}
882 888
883int use_module(struct module *a, struct module *b) 889int ref_module(struct module *a, struct module *b)
884{ 890{
885 return strong_try_module_get(b) == 0; 891 return strong_try_module_get(b);
886} 892}
887EXPORT_SYMBOL_GPL(use_module); 893EXPORT_SYMBOL_GPL(ref_module);
888 894
889static inline void module_unload_init(struct module *mod) 895static inline void module_unload_init(struct module *mod)
890{ 896{
@@ -1001,6 +1007,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
1001{ 1007{
1002 const unsigned long *crc; 1008 const unsigned long *crc;
1003 1009
1010 /* Since this should be found in kernel (which can't be removed),
1011 * no locking is necessary. */
1004 if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL, 1012 if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL,
1005 &crc, true, false)) 1013 &crc, true, false))
1006 BUG(); 1014 BUG();
@@ -1043,29 +1051,62 @@ static inline int same_magic(const char *amagic, const char *bmagic,
1043} 1051}
1044#endif /* CONFIG_MODVERSIONS */ 1052#endif /* CONFIG_MODVERSIONS */
1045 1053
1046/* Resolve a symbol for this module. I.e. if we find one, record usage. 1054/* Resolve a symbol for this module. I.e. if we find one, record usage. */
1047 Must be holding module_mutex. */
1048static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, 1055static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs,
1049 unsigned int versindex, 1056 unsigned int versindex,
1050 const char *name, 1057 const char *name,
1051 struct module *mod) 1058 struct module *mod,
1059 char ownername[])
1052{ 1060{
1053 struct module *owner; 1061 struct module *owner;
1054 const struct kernel_symbol *sym; 1062 const struct kernel_symbol *sym;
1055 const unsigned long *crc; 1063 const unsigned long *crc;
1064 int err;
1056 1065
1066 mutex_lock(&module_mutex);
1057 sym = find_symbol(name, &owner, &crc, 1067 sym = find_symbol(name, &owner, &crc,
1058 !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); 1068 !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true);
1059 /* use_module can fail due to OOM, 1069 if (!sym)
1060 or module initialization or unloading */ 1070 goto unlock;
1061 if (sym) { 1071
1062 if (!check_version(sechdrs, versindex, name, mod, crc, owner) 1072 if (!check_version(sechdrs, versindex, name, mod, crc, owner)) {
1063 || !use_module(mod, owner)) 1073 sym = ERR_PTR(-EINVAL);
1064 sym = NULL; 1074 goto getname;
1075 }
1076
1077 err = ref_module(mod, owner);
1078 if (err) {
1079 sym = ERR_PTR(err);
1080 goto getname;
1065 } 1081 }
1082
1083getname:
1084 /* We must make copy under the lock if we failed to get ref. */
1085 strncpy(ownername, module_name(owner), MODULE_NAME_LEN);
1086unlock:
1087 mutex_unlock(&module_mutex);
1066 return sym; 1088 return sym;
1067} 1089}
1068 1090
1091static const struct kernel_symbol *resolve_symbol_wait(Elf_Shdr *sechdrs,
1092 unsigned int versindex,
1093 const char *name,
1094 struct module *mod)
1095{
1096 const struct kernel_symbol *ksym;
1097 char ownername[MODULE_NAME_LEN];
1098
1099 if (wait_event_interruptible_timeout(module_wq,
1100 !IS_ERR(ksym = resolve_symbol(sechdrs, versindex, name,
1101 mod, ownername)) ||
1102 PTR_ERR(ksym) != -EBUSY,
1103 30 * HZ) <= 0) {
1104 printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n",
1105 mod->name, ownername);
1106 }
1107 return ksym;
1108}
1109
1069/* 1110/*
1070 * /sys/module/foo/sections stuff 1111 * /sys/module/foo/sections stuff
1071 * J. Corbet <corbet@lwn.net> 1112 * J. Corbet <corbet@lwn.net>
@@ -1295,7 +1336,34 @@ static inline void remove_notes_attrs(struct module *mod)
1295#endif 1336#endif
1296 1337
1297#ifdef CONFIG_SYSFS 1338#ifdef CONFIG_SYSFS
1298int module_add_modinfo_attrs(struct module *mod) 1339static void add_usage_links(struct module *mod)
1340{
1341#ifdef CONFIG_MODULE_UNLOAD
1342 struct module_use *use;
1343 int nowarn;
1344
1345 mutex_lock(&module_mutex);
1346 list_for_each_entry(use, &mod->target_list, target_list) {
1347 nowarn = sysfs_create_link(use->target->holders_dir,
1348 &mod->mkobj.kobj, mod->name);
1349 }
1350 mutex_unlock(&module_mutex);
1351#endif
1352}
1353
1354static void del_usage_links(struct module *mod)
1355{
1356#ifdef CONFIG_MODULE_UNLOAD
1357 struct module_use *use;
1358
1359 mutex_lock(&module_mutex);
1360 list_for_each_entry(use, &mod->target_list, target_list)
1361 sysfs_remove_link(use->target->holders_dir, mod->name);
1362 mutex_unlock(&module_mutex);
1363#endif
1364}
1365
1366static int module_add_modinfo_attrs(struct module *mod)
1299{ 1367{
1300 struct module_attribute *attr; 1368 struct module_attribute *attr;
1301 struct module_attribute *temp_attr; 1369 struct module_attribute *temp_attr;
@@ -1321,7 +1389,7 @@ int module_add_modinfo_attrs(struct module *mod)
1321 return error; 1389 return error;
1322} 1390}
1323 1391
1324void module_remove_modinfo_attrs(struct module *mod) 1392static void module_remove_modinfo_attrs(struct module *mod)
1325{ 1393{
1326 struct module_attribute *attr; 1394 struct module_attribute *attr;
1327 int i; 1395 int i;
@@ -1337,7 +1405,7 @@ void module_remove_modinfo_attrs(struct module *mod)
1337 kfree(mod->modinfo_attrs); 1405 kfree(mod->modinfo_attrs);
1338} 1406}
1339 1407
1340int mod_sysfs_init(struct module *mod) 1408static int mod_sysfs_init(struct module *mod)
1341{ 1409{
1342 int err; 1410 int err;
1343 struct kobject *kobj; 1411 struct kobject *kobj;
@@ -1371,12 +1439,16 @@ out:
1371 return err; 1439 return err;
1372} 1440}
1373 1441
1374int mod_sysfs_setup(struct module *mod, 1442static int mod_sysfs_setup(struct module *mod,
1375 struct kernel_param *kparam, 1443 struct kernel_param *kparam,
1376 unsigned int num_params) 1444 unsigned int num_params)
1377{ 1445{
1378 int err; 1446 int err;
1379 1447
1448 err = mod_sysfs_init(mod);
1449 if (err)
1450 goto out;
1451
1380 mod->holders_dir = kobject_create_and_add("holders", &mod->mkobj.kobj); 1452 mod->holders_dir = kobject_create_and_add("holders", &mod->mkobj.kobj);
1381 if (!mod->holders_dir) { 1453 if (!mod->holders_dir) {
1382 err = -ENOMEM; 1454 err = -ENOMEM;
@@ -1391,6 +1463,8 @@ int mod_sysfs_setup(struct module *mod,
1391 if (err) 1463 if (err)
1392 goto out_unreg_param; 1464 goto out_unreg_param;
1393 1465
1466 add_usage_links(mod);
1467
1394 kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD); 1468 kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);
1395 return 0; 1469 return 0;
1396 1470
@@ -1400,6 +1474,7 @@ out_unreg_holders:
1400 kobject_put(mod->holders_dir); 1474 kobject_put(mod->holders_dir);
1401out_unreg: 1475out_unreg:
1402 kobject_put(&mod->mkobj.kobj); 1476 kobject_put(&mod->mkobj.kobj);
1477out:
1403 return err; 1478 return err;
1404} 1479}
1405 1480
@@ -1410,14 +1485,40 @@ static void mod_sysfs_fini(struct module *mod)
1410 1485
1411#else /* CONFIG_SYSFS */ 1486#else /* CONFIG_SYSFS */
1412 1487
1488static inline int mod_sysfs_init(struct module *mod)
1489{
1490 return 0;
1491}
1492
1493static inline int mod_sysfs_setup(struct module *mod,
1494 struct kernel_param *kparam,
1495 unsigned int num_params)
1496{
1497 return 0;
1498}
1499
1500static inline int module_add_modinfo_attrs(struct module *mod)
1501{
1502 return 0;
1503}
1504
1505static inline void module_remove_modinfo_attrs(struct module *mod)
1506{
1507}
1508
1413static void mod_sysfs_fini(struct module *mod) 1509static void mod_sysfs_fini(struct module *mod)
1414{ 1510{
1415} 1511}
1416 1512
1513static void del_usage_links(struct module *mod)
1514{
1515}
1516
1417#endif /* CONFIG_SYSFS */ 1517#endif /* CONFIG_SYSFS */
1418 1518
1419static void mod_kobject_remove(struct module *mod) 1519static void mod_kobject_remove(struct module *mod)
1420{ 1520{
1521 del_usage_links(mod);
1421 module_remove_modinfo_attrs(mod); 1522 module_remove_modinfo_attrs(mod);
1422 module_param_sysfs_remove(mod); 1523 module_param_sysfs_remove(mod);
1423 kobject_put(mod->mkobj.drivers_dir); 1524 kobject_put(mod->mkobj.drivers_dir);
@@ -1436,13 +1537,15 @@ static int __unlink_module(void *_mod)
1436 return 0; 1537 return 0;
1437} 1538}
1438 1539
1439/* Free a module, remove from lists, etc (must hold module_mutex). */ 1540/* Free a module, remove from lists, etc. */
1440static void free_module(struct module *mod) 1541static void free_module(struct module *mod)
1441{ 1542{
1442 trace_module_free(mod); 1543 trace_module_free(mod);
1443 1544
1444 /* Delete from various lists */ 1545 /* Delete from various lists */
1546 mutex_lock(&module_mutex);
1445 stop_machine(__unlink_module, mod, NULL); 1547 stop_machine(__unlink_module, mod, NULL);
1548 mutex_unlock(&module_mutex);
1446 remove_notes_attrs(mod); 1549 remove_notes_attrs(mod);
1447 remove_sect_attrs(mod); 1550 remove_sect_attrs(mod);
1448 mod_kobject_remove(mod); 1551 mod_kobject_remove(mod);
@@ -1493,6 +1596,8 @@ EXPORT_SYMBOL_GPL(__symbol_get);
1493/* 1596/*
1494 * Ensure that an exported symbol [global namespace] does not already exist 1597 * Ensure that an exported symbol [global namespace] does not already exist
1495 * in the kernel or in some other module's exported symbol table. 1598 * in the kernel or in some other module's exported symbol table.
1599 *
1600 * You must hold the module_mutex.
1496 */ 1601 */
1497static int verify_export_symbols(struct module *mod) 1602static int verify_export_symbols(struct module *mod)
1498{ 1603{
@@ -1558,21 +1663,23 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
1558 break; 1663 break;
1559 1664
1560 case SHN_UNDEF: 1665 case SHN_UNDEF:
1561 ksym = resolve_symbol(sechdrs, versindex, 1666 ksym = resolve_symbol_wait(sechdrs, versindex,
1562 strtab + sym[i].st_name, mod); 1667 strtab + sym[i].st_name,
1668 mod);
1563 /* Ok if resolved. */ 1669 /* Ok if resolved. */
1564 if (ksym) { 1670 if (ksym && !IS_ERR(ksym)) {
1565 sym[i].st_value = ksym->value; 1671 sym[i].st_value = ksym->value;
1566 break; 1672 break;
1567 } 1673 }
1568 1674
1569 /* Ok if weak. */ 1675 /* Ok if weak. */
1570 if (ELF_ST_BIND(sym[i].st_info) == STB_WEAK) 1676 if (!ksym && ELF_ST_BIND(sym[i].st_info) == STB_WEAK)
1571 break; 1677 break;
1572 1678
1573 printk(KERN_WARNING "%s: Unknown symbol %s\n", 1679 printk(KERN_WARNING "%s: Unknown symbol %s (err %li)\n",
1574 mod->name, strtab + sym[i].st_name); 1680 mod->name, strtab + sym[i].st_name,
1575 ret = -ENOENT; 1681 PTR_ERR(ksym));
1682 ret = PTR_ERR(ksym) ?: -ENOENT;
1576 break; 1683 break;
1577 1684
1578 default: 1685 default:
@@ -1960,11 +2067,13 @@ static void *module_alloc_update_bounds(unsigned long size)
1960 void *ret = module_alloc(size); 2067 void *ret = module_alloc(size);
1961 2068
1962 if (ret) { 2069 if (ret) {
2070 mutex_lock(&module_mutex);
1963 /* Update module bounds. */ 2071 /* Update module bounds. */
1964 if ((unsigned long)ret < module_addr_min) 2072 if ((unsigned long)ret < module_addr_min)
1965 module_addr_min = (unsigned long)ret; 2073 module_addr_min = (unsigned long)ret;
1966 if ((unsigned long)ret + size > module_addr_max) 2074 if ((unsigned long)ret + size > module_addr_max)
1967 module_addr_max = (unsigned long)ret + size; 2075 module_addr_max = (unsigned long)ret + size;
2076 mutex_unlock(&module_mutex);
1968 } 2077 }
1969 return ret; 2078 return ret;
1970} 2079}
@@ -2014,6 +2123,7 @@ static noinline struct module *load_module(void __user *umod,
2014 long err = 0; 2123 long err = 0;
2015 void *ptr = NULL; /* Stops spurious gcc warning */ 2124 void *ptr = NULL; /* Stops spurious gcc warning */
2016 unsigned long symoffs, stroffs, *strmap; 2125 unsigned long symoffs, stroffs, *strmap;
2126 void __percpu *percpu;
2017 2127
2018 mm_segment_t old_fs; 2128 mm_segment_t old_fs;
2019 2129
@@ -2138,11 +2248,6 @@ static noinline struct module *load_module(void __user *umod,
2138 goto free_mod; 2248 goto free_mod;
2139 } 2249 }
2140 2250
2141 if (find_module(mod->name)) {
2142 err = -EEXIST;
2143 goto free_mod;
2144 }
2145
2146 mod->state = MODULE_STATE_COMING; 2251 mod->state = MODULE_STATE_COMING;
2147 2252
2148 /* Allow arches to frob section contents and sizes. */ 2253 /* Allow arches to frob section contents and sizes. */
@@ -2158,6 +2263,8 @@ static noinline struct module *load_module(void __user *umod,
2158 goto free_mod; 2263 goto free_mod;
2159 sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; 2264 sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
2160 } 2265 }
2266 /* Keep this around for failure path. */
2267 percpu = mod_percpu(mod);
2161 2268
2162 /* Determine total sizes, and put offsets in sh_entsize. For now 2269 /* Determine total sizes, and put offsets in sh_entsize. For now
2163 this is done generically; there doesn't appear to be any 2270 this is done generically; there doesn't appear to be any
@@ -2231,11 +2338,6 @@ static noinline struct module *load_module(void __user *umod,
2231 /* Now we've moved module, initialize linked lists, etc. */ 2338 /* Now we've moved module, initialize linked lists, etc. */
2232 module_unload_init(mod); 2339 module_unload_init(mod);
2233 2340
2234 /* add kobject, so we can reference it. */
2235 err = mod_sysfs_init(mod);
2236 if (err)
2237 goto free_unload;
2238
2239 /* Set up license info based on the info section */ 2341 /* Set up license info based on the info section */
2240 set_license(mod, get_modinfo(sechdrs, infoindex, "license")); 2342 set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
2241 2343
@@ -2360,11 +2462,6 @@ static noinline struct module *load_module(void __user *umod,
2360 goto cleanup; 2462 goto cleanup;
2361 } 2463 }
2362 2464
2363 /* Find duplicate symbols */
2364 err = verify_export_symbols(mod);
2365 if (err < 0)
2366 goto cleanup;
2367
2368 /* Set up and sort exception table */ 2465 /* Set up and sort exception table */
2369 mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table", 2466 mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table",
2370 sizeof(*mod->extable), &mod->num_exentries); 2467 sizeof(*mod->extable), &mod->num_exentries);
@@ -2423,7 +2520,19 @@ static noinline struct module *load_module(void __user *umod,
2423 * function to insert in a way safe to concurrent readers. 2520 * function to insert in a way safe to concurrent readers.
2424 * The mutex protects against concurrent writers. 2521 * The mutex protects against concurrent writers.
2425 */ 2522 */
2523 mutex_lock(&module_mutex);
2524 if (find_module(mod->name)) {
2525 err = -EEXIST;
2526 goto unlock;
2527 }
2528
2529 /* Find duplicate symbols */
2530 err = verify_export_symbols(mod);
2531 if (err < 0)
2532 goto unlock;
2533
2426 list_add_rcu(&mod->list, &modules); 2534 list_add_rcu(&mod->list, &modules);
2535 mutex_unlock(&module_mutex);
2427 2536
2428 err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL); 2537 err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL);
2429 if (err < 0) 2538 if (err < 0)
@@ -2432,6 +2541,7 @@ static noinline struct module *load_module(void __user *umod,
2432 err = mod_sysfs_setup(mod, mod->kp, mod->num_kp); 2541 err = mod_sysfs_setup(mod, mod->kp, mod->num_kp);
2433 if (err < 0) 2542 if (err < 0)
2434 goto unlink; 2543 goto unlink;
2544
2435 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); 2545 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
2436 add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); 2546 add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
2437 2547
@@ -2444,15 +2554,15 @@ static noinline struct module *load_module(void __user *umod,
2444 return mod; 2554 return mod;
2445 2555
2446 unlink: 2556 unlink:
2557 mutex_lock(&module_mutex);
2447 /* Unlink carefully: kallsyms could be walking list. */ 2558 /* Unlink carefully: kallsyms could be walking list. */
2448 list_del_rcu(&mod->list); 2559 list_del_rcu(&mod->list);
2560 unlock:
2561 mutex_unlock(&module_mutex);
2449 synchronize_sched(); 2562 synchronize_sched();
2450 module_arch_cleanup(mod); 2563 module_arch_cleanup(mod);
2451 cleanup: 2564 cleanup:
2452 free_modinfo(mod); 2565 free_modinfo(mod);
2453 kobject_del(&mod->mkobj.kobj);
2454 kobject_put(&mod->mkobj.kobj);
2455 free_unload:
2456 module_unload_free(mod); 2566 module_unload_free(mod);
2457#if defined(CONFIG_MODULE_UNLOAD) 2567#if defined(CONFIG_MODULE_UNLOAD)
2458 free_percpu(mod->refptr); 2568 free_percpu(mod->refptr);
@@ -2463,7 +2573,7 @@ static noinline struct module *load_module(void __user *umod,
2463 module_free(mod, mod->module_core); 2573 module_free(mod, mod->module_core);
2464 /* mod will be freed with core. Don't access it beyond this line! */ 2574 /* mod will be freed with core. Don't access it beyond this line! */
2465 free_percpu: 2575 free_percpu:
2466 percpu_modfree(mod); 2576 free_percpu(percpu);
2467 free_mod: 2577 free_mod:
2468 kfree(args); 2578 kfree(args);
2469 kfree(strmap); 2579 kfree(strmap);
@@ -2499,19 +2609,10 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
2499 if (!capable(CAP_SYS_MODULE) || modules_disabled) 2609 if (!capable(CAP_SYS_MODULE) || modules_disabled)
2500 return -EPERM; 2610 return -EPERM;
2501 2611
2502 /* Only one module load at a time, please */
2503 if (mutex_lock_interruptible(&module_mutex) != 0)
2504 return -EINTR;
2505
2506 /* Do all the hard work */ 2612 /* Do all the hard work */
2507 mod = load_module(umod, len, uargs); 2613 mod = load_module(umod, len, uargs);
2508 if (IS_ERR(mod)) { 2614 if (IS_ERR(mod))
2509 mutex_unlock(&module_mutex);
2510 return PTR_ERR(mod); 2615 return PTR_ERR(mod);
2511 }
2512
2513 /* Drop lock so they can recurse */
2514 mutex_unlock(&module_mutex);
2515 2616
2516 blocking_notifier_call_chain(&module_notify_list, 2617 blocking_notifier_call_chain(&module_notify_list,
2517 MODULE_STATE_COMING, mod); 2618 MODULE_STATE_COMING, mod);
@@ -2528,9 +2629,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
2528 module_put(mod); 2629 module_put(mod);
2529 blocking_notifier_call_chain(&module_notify_list, 2630 blocking_notifier_call_chain(&module_notify_list,
2530 MODULE_STATE_GOING, mod); 2631 MODULE_STATE_GOING, mod);
2531 mutex_lock(&module_mutex);
2532 free_module(mod); 2632 free_module(mod);
2533 mutex_unlock(&module_mutex);
2534 wake_up(&module_wq); 2633 wake_up(&module_wq);
2535 return ret; 2634 return ret;
2536 } 2635 }
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index bd7ce8ca5bb9..ff86c558af4c 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -283,14 +283,15 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
283static void 283static void
284list_add_event(struct perf_event *event, struct perf_event_context *ctx) 284list_add_event(struct perf_event *event, struct perf_event_context *ctx)
285{ 285{
286 struct perf_event *group_leader = event->group_leader; 286 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
287 event->attach_state |= PERF_ATTACH_CONTEXT;
287 288
288 /* 289 /*
289 * Depending on whether it is a standalone or sibling event, 290 * If we're a stand alone event or group leader, we go to the context
290 * add it straight to the context's event list, or to the group 291 * list, group events are kept attached to the group so that
291 * leader's sibling list: 292 * perf_group_detach can, at all times, locate all siblings.
292 */ 293 */
293 if (group_leader == event) { 294 if (event->group_leader == event) {
294 struct list_head *list; 295 struct list_head *list;
295 296
296 if (is_software_event(event)) 297 if (is_software_event(event))
@@ -298,13 +299,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
298 299
299 list = ctx_group_list(event, ctx); 300 list = ctx_group_list(event, ctx);
300 list_add_tail(&event->group_entry, list); 301 list_add_tail(&event->group_entry, list);
301 } else {
302 if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
303 !is_software_event(event))
304 group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
305
306 list_add_tail(&event->group_entry, &group_leader->sibling_list);
307 group_leader->nr_siblings++;
308 } 302 }
309 303
310 list_add_rcu(&event->event_entry, &ctx->event_list); 304 list_add_rcu(&event->event_entry, &ctx->event_list);
@@ -313,6 +307,24 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
313 ctx->nr_stat++; 307 ctx->nr_stat++;
314} 308}
315 309
310static void perf_group_attach(struct perf_event *event)
311{
312 struct perf_event *group_leader = event->group_leader;
313
314 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_GROUP);
315 event->attach_state |= PERF_ATTACH_GROUP;
316
317 if (group_leader == event)
318 return;
319
320 if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
321 !is_software_event(event))
322 group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
323
324 list_add_tail(&event->group_entry, &group_leader->sibling_list);
325 group_leader->nr_siblings++;
326}
327
316/* 328/*
317 * Remove a event from the lists for its context. 329 * Remove a event from the lists for its context.
318 * Must be called with ctx->mutex and ctx->lock held. 330 * Must be called with ctx->mutex and ctx->lock held.
@@ -320,17 +332,22 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
320static void 332static void
321list_del_event(struct perf_event *event, struct perf_event_context *ctx) 333list_del_event(struct perf_event *event, struct perf_event_context *ctx)
322{ 334{
323 if (list_empty(&event->group_entry)) 335 /*
336 * We can have double detach due to exit/hot-unplug + close.
337 */
338 if (!(event->attach_state & PERF_ATTACH_CONTEXT))
324 return; 339 return;
340
341 event->attach_state &= ~PERF_ATTACH_CONTEXT;
342
325 ctx->nr_events--; 343 ctx->nr_events--;
326 if (event->attr.inherit_stat) 344 if (event->attr.inherit_stat)
327 ctx->nr_stat--; 345 ctx->nr_stat--;
328 346
329 list_del_init(&event->group_entry);
330 list_del_rcu(&event->event_entry); 347 list_del_rcu(&event->event_entry);
331 348
332 if (event->group_leader != event) 349 if (event->group_leader == event)
333 event->group_leader->nr_siblings--; 350 list_del_init(&event->group_entry);
334 351
335 update_group_times(event); 352 update_group_times(event);
336 353
@@ -345,21 +362,39 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
345 event->state = PERF_EVENT_STATE_OFF; 362 event->state = PERF_EVENT_STATE_OFF;
346} 363}
347 364
348static void 365static void perf_group_detach(struct perf_event *event)
349perf_destroy_group(struct perf_event *event, struct perf_event_context *ctx)
350{ 366{
351 struct perf_event *sibling, *tmp; 367 struct perf_event *sibling, *tmp;
368 struct list_head *list = NULL;
369
370 /*
371 * We can have double detach due to exit/hot-unplug + close.
372 */
373 if (!(event->attach_state & PERF_ATTACH_GROUP))
374 return;
375
376 event->attach_state &= ~PERF_ATTACH_GROUP;
377
378 /*
379 * If this is a sibling, remove it from its group.
380 */
381 if (event->group_leader != event) {
382 list_del_init(&event->group_entry);
383 event->group_leader->nr_siblings--;
384 return;
385 }
386
387 if (!list_empty(&event->group_entry))
388 list = &event->group_entry;
352 389
353 /* 390 /*
354 * If this was a group event with sibling events then 391 * If this was a group event with sibling events then
355 * upgrade the siblings to singleton events by adding them 392 * upgrade the siblings to singleton events by adding them
356 * to the context list directly: 393 * to whatever list we are on.
357 */ 394 */
358 list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { 395 list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
359 struct list_head *list; 396 if (list)
360 397 list_move_tail(&sibling->group_entry, list);
361 list = ctx_group_list(event, ctx);
362 list_move_tail(&sibling->group_entry, list);
363 sibling->group_leader = sibling; 398 sibling->group_leader = sibling;
364 399
365 /* Inherit group flags from the previous leader */ 400 /* Inherit group flags from the previous leader */
@@ -652,8 +687,11 @@ group_sched_in(struct perf_event *group_event,
652 if (txn) 687 if (txn)
653 pmu->start_txn(pmu); 688 pmu->start_txn(pmu);
654 689
655 if (event_sched_in(group_event, cpuctx, ctx)) 690 if (event_sched_in(group_event, cpuctx, ctx)) {
691 if (txn)
692 pmu->cancel_txn(pmu);
656 return -EAGAIN; 693 return -EAGAIN;
694 }
657 695
658 /* 696 /*
659 * Schedule in siblings as one group (if any): 697 * Schedule in siblings as one group (if any):
@@ -675,9 +713,6 @@ group_sched_in(struct perf_event *group_event,
675 } 713 }
676 714
677group_error: 715group_error:
678 if (txn)
679 pmu->cancel_txn(pmu);
680
681 /* 716 /*
682 * Groups can be scheduled in as one unit only, so undo any 717 * Groups can be scheduled in as one unit only, so undo any
683 * partial group before returning: 718 * partial group before returning:
@@ -689,6 +724,9 @@ group_error:
689 } 724 }
690 event_sched_out(group_event, cpuctx, ctx); 725 event_sched_out(group_event, cpuctx, ctx);
691 726
727 if (txn)
728 pmu->cancel_txn(pmu);
729
692 return -EAGAIN; 730 return -EAGAIN;
693} 731}
694 732
@@ -727,6 +765,7 @@ static void add_event_to_ctx(struct perf_event *event,
727 struct perf_event_context *ctx) 765 struct perf_event_context *ctx)
728{ 766{
729 list_add_event(event, ctx); 767 list_add_event(event, ctx);
768 perf_group_attach(event);
730 event->tstamp_enabled = ctx->time; 769 event->tstamp_enabled = ctx->time;
731 event->tstamp_running = ctx->time; 770 event->tstamp_running = ctx->time;
732 event->tstamp_stopped = ctx->time; 771 event->tstamp_stopped = ctx->time;
@@ -1468,6 +1507,9 @@ do { \
1468 divisor = nsec * frequency; 1507 divisor = nsec * frequency;
1469 } 1508 }
1470 1509
1510 if (!divisor)
1511 return dividend;
1512
1471 return div64_u64(dividend, divisor); 1513 return div64_u64(dividend, divisor);
1472} 1514}
1473 1515
@@ -1490,7 +1532,7 @@ static int perf_event_start(struct perf_event *event)
1490static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) 1532static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
1491{ 1533{
1492 struct hw_perf_event *hwc = &event->hw; 1534 struct hw_perf_event *hwc = &event->hw;
1493 u64 period, sample_period; 1535 s64 period, sample_period;
1494 s64 delta; 1536 s64 delta;
1495 1537
1496 period = perf_calculate_period(event, nsec, count); 1538 period = perf_calculate_period(event, nsec, count);
@@ -1841,6 +1883,7 @@ static void free_event_rcu(struct rcu_head *head)
1841} 1883}
1842 1884
1843static void perf_pending_sync(struct perf_event *event); 1885static void perf_pending_sync(struct perf_event *event);
1886static void perf_mmap_data_put(struct perf_mmap_data *data);
1844 1887
1845static void free_event(struct perf_event *event) 1888static void free_event(struct perf_event *event)
1846{ 1889{
@@ -1856,9 +1899,9 @@ static void free_event(struct perf_event *event)
1856 atomic_dec(&nr_task_events); 1899 atomic_dec(&nr_task_events);
1857 } 1900 }
1858 1901
1859 if (event->output) { 1902 if (event->data) {
1860 fput(event->output->filp); 1903 perf_mmap_data_put(event->data);
1861 event->output = NULL; 1904 event->data = NULL;
1862 } 1905 }
1863 1906
1864 if (event->destroy) 1907 if (event->destroy)
@@ -1893,8 +1936,8 @@ int perf_event_release_kernel(struct perf_event *event)
1893 */ 1936 */
1894 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); 1937 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
1895 raw_spin_lock_irq(&ctx->lock); 1938 raw_spin_lock_irq(&ctx->lock);
1939 perf_group_detach(event);
1896 list_del_event(event, ctx); 1940 list_del_event(event, ctx);
1897 perf_destroy_group(event, ctx);
1898 raw_spin_unlock_irq(&ctx->lock); 1941 raw_spin_unlock_irq(&ctx->lock);
1899 mutex_unlock(&ctx->mutex); 1942 mutex_unlock(&ctx->mutex);
1900 1943
@@ -2175,7 +2218,27 @@ unlock:
2175 return ret; 2218 return ret;
2176} 2219}
2177 2220
2178static int perf_event_set_output(struct perf_event *event, int output_fd); 2221static const struct file_operations perf_fops;
2222
2223static struct perf_event *perf_fget_light(int fd, int *fput_needed)
2224{
2225 struct file *file;
2226
2227 file = fget_light(fd, fput_needed);
2228 if (!file)
2229 return ERR_PTR(-EBADF);
2230
2231 if (file->f_op != &perf_fops) {
2232 fput_light(file, *fput_needed);
2233 *fput_needed = 0;
2234 return ERR_PTR(-EBADF);
2235 }
2236
2237 return file->private_data;
2238}
2239
2240static int perf_event_set_output(struct perf_event *event,
2241 struct perf_event *output_event);
2179static int perf_event_set_filter(struct perf_event *event, void __user *arg); 2242static int perf_event_set_filter(struct perf_event *event, void __user *arg);
2180 2243
2181static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 2244static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -2202,7 +2265,23 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2202 return perf_event_period(event, (u64 __user *)arg); 2265 return perf_event_period(event, (u64 __user *)arg);
2203 2266
2204 case PERF_EVENT_IOC_SET_OUTPUT: 2267 case PERF_EVENT_IOC_SET_OUTPUT:
2205 return perf_event_set_output(event, arg); 2268 {
2269 struct perf_event *output_event = NULL;
2270 int fput_needed = 0;
2271 int ret;
2272
2273 if (arg != -1) {
2274 output_event = perf_fget_light(arg, &fput_needed);
2275 if (IS_ERR(output_event))
2276 return PTR_ERR(output_event);
2277 }
2278
2279 ret = perf_event_set_output(event, output_event);
2280 if (output_event)
2281 fput_light(output_event->filp, fput_needed);
2282
2283 return ret;
2284 }
2206 2285
2207 case PERF_EVENT_IOC_SET_FILTER: 2286 case PERF_EVENT_IOC_SET_FILTER:
2208 return perf_event_set_filter(event, (void __user *)arg); 2287 return perf_event_set_filter(event, (void __user *)arg);
@@ -2335,8 +2414,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
2335 unsigned long size; 2414 unsigned long size;
2336 int i; 2415 int i;
2337 2416
2338 WARN_ON(atomic_read(&event->mmap_count));
2339
2340 size = sizeof(struct perf_mmap_data); 2417 size = sizeof(struct perf_mmap_data);
2341 size += nr_pages * sizeof(void *); 2418 size += nr_pages * sizeof(void *);
2342 2419
@@ -2452,8 +2529,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
2452 unsigned long size; 2529 unsigned long size;
2453 void *all_buf; 2530 void *all_buf;
2454 2531
2455 WARN_ON(atomic_read(&event->mmap_count));
2456
2457 size = sizeof(struct perf_mmap_data); 2532 size = sizeof(struct perf_mmap_data);
2458 size += sizeof(void *); 2533 size += sizeof(void *);
2459 2534
@@ -2536,7 +2611,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
2536 if (!data->watermark) 2611 if (!data->watermark)
2537 data->watermark = max_size / 2; 2612 data->watermark = max_size / 2;
2538 2613
2539 2614 atomic_set(&data->refcount, 1);
2540 rcu_assign_pointer(event->data, data); 2615 rcu_assign_pointer(event->data, data);
2541} 2616}
2542 2617
@@ -2548,13 +2623,26 @@ static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
2548 perf_mmap_data_free(data); 2623 perf_mmap_data_free(data);
2549} 2624}
2550 2625
2551static void perf_mmap_data_release(struct perf_event *event) 2626static struct perf_mmap_data *perf_mmap_data_get(struct perf_event *event)
2552{ 2627{
2553 struct perf_mmap_data *data = event->data; 2628 struct perf_mmap_data *data;
2629
2630 rcu_read_lock();
2631 data = rcu_dereference(event->data);
2632 if (data) {
2633 if (!atomic_inc_not_zero(&data->refcount))
2634 data = NULL;
2635 }
2636 rcu_read_unlock();
2637
2638 return data;
2639}
2554 2640
2555 WARN_ON(atomic_read(&event->mmap_count)); 2641static void perf_mmap_data_put(struct perf_mmap_data *data)
2642{
2643 if (!atomic_dec_and_test(&data->refcount))
2644 return;
2556 2645
2557 rcu_assign_pointer(event->data, NULL);
2558 call_rcu(&data->rcu_head, perf_mmap_data_free_rcu); 2646 call_rcu(&data->rcu_head, perf_mmap_data_free_rcu);
2559} 2647}
2560 2648
@@ -2569,15 +2657,18 @@ static void perf_mmap_close(struct vm_area_struct *vma)
2569{ 2657{
2570 struct perf_event *event = vma->vm_file->private_data; 2658 struct perf_event *event = vma->vm_file->private_data;
2571 2659
2572 WARN_ON_ONCE(event->ctx->parent_ctx);
2573 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { 2660 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
2574 unsigned long size = perf_data_size(event->data); 2661 unsigned long size = perf_data_size(event->data);
2575 struct user_struct *user = current_user(); 2662 struct user_struct *user = event->mmap_user;
2663 struct perf_mmap_data *data = event->data;
2576 2664
2577 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); 2665 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
2578 vma->vm_mm->locked_vm -= event->data->nr_locked; 2666 vma->vm_mm->locked_vm -= event->mmap_locked;
2579 perf_mmap_data_release(event); 2667 rcu_assign_pointer(event->data, NULL);
2580 mutex_unlock(&event->mmap_mutex); 2668 mutex_unlock(&event->mmap_mutex);
2669
2670 perf_mmap_data_put(data);
2671 free_uid(user);
2581 } 2672 }
2582} 2673}
2583 2674
@@ -2629,13 +2720,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2629 2720
2630 WARN_ON_ONCE(event->ctx->parent_ctx); 2721 WARN_ON_ONCE(event->ctx->parent_ctx);
2631 mutex_lock(&event->mmap_mutex); 2722 mutex_lock(&event->mmap_mutex);
2632 if (event->output) { 2723 if (event->data) {
2633 ret = -EINVAL; 2724 if (event->data->nr_pages == nr_pages)
2634 goto unlock; 2725 atomic_inc(&event->data->refcount);
2635 } 2726 else
2636
2637 if (atomic_inc_not_zero(&event->mmap_count)) {
2638 if (nr_pages != event->data->nr_pages)
2639 ret = -EINVAL; 2727 ret = -EINVAL;
2640 goto unlock; 2728 goto unlock;
2641 } 2729 }
@@ -2667,21 +2755,23 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2667 WARN_ON(event->data); 2755 WARN_ON(event->data);
2668 2756
2669 data = perf_mmap_data_alloc(event, nr_pages); 2757 data = perf_mmap_data_alloc(event, nr_pages);
2670 ret = -ENOMEM; 2758 if (!data) {
2671 if (!data) 2759 ret = -ENOMEM;
2672 goto unlock; 2760 goto unlock;
2761 }
2673 2762
2674 ret = 0;
2675 perf_mmap_data_init(event, data); 2763 perf_mmap_data_init(event, data);
2676
2677 atomic_set(&event->mmap_count, 1);
2678 atomic_long_add(user_extra, &user->locked_vm);
2679 vma->vm_mm->locked_vm += extra;
2680 event->data->nr_locked = extra;
2681 if (vma->vm_flags & VM_WRITE) 2764 if (vma->vm_flags & VM_WRITE)
2682 event->data->writable = 1; 2765 event->data->writable = 1;
2683 2766
2767 atomic_long_add(user_extra, &user->locked_vm);
2768 event->mmap_locked = extra;
2769 event->mmap_user = get_current_user();
2770 vma->vm_mm->locked_vm += event->mmap_locked;
2771
2684unlock: 2772unlock:
2773 if (!ret)
2774 atomic_inc(&event->mmap_count);
2685 mutex_unlock(&event->mmap_mutex); 2775 mutex_unlock(&event->mmap_mutex);
2686 2776
2687 vma->vm_flags |= VM_RESERVED; 2777 vma->vm_flags |= VM_RESERVED;
@@ -2977,6 +3067,7 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
2977 3067
2978 len -= size; 3068 len -= size;
2979 handle->addr += size; 3069 handle->addr += size;
3070 buf += size;
2980 handle->size -= size; 3071 handle->size -= size;
2981 if (!handle->size) { 3072 if (!handle->size) {
2982 struct perf_mmap_data *data = handle->data; 3073 struct perf_mmap_data *data = handle->data;
@@ -2993,7 +3084,6 @@ int perf_output_begin(struct perf_output_handle *handle,
2993 struct perf_event *event, unsigned int size, 3084 struct perf_event *event, unsigned int size,
2994 int nmi, int sample) 3085 int nmi, int sample)
2995{ 3086{
2996 struct perf_event *output_event;
2997 struct perf_mmap_data *data; 3087 struct perf_mmap_data *data;
2998 unsigned long tail, offset, head; 3088 unsigned long tail, offset, head;
2999 int have_lost; 3089 int have_lost;
@@ -3010,10 +3100,6 @@ int perf_output_begin(struct perf_output_handle *handle,
3010 if (event->parent) 3100 if (event->parent)
3011 event = event->parent; 3101 event = event->parent;
3012 3102
3013 output_event = rcu_dereference(event->output);
3014 if (output_event)
3015 event = output_event;
3016
3017 data = rcu_dereference(event->data); 3103 data = rcu_dereference(event->data);
3018 if (!data) 3104 if (!data)
3019 goto out; 3105 goto out;
@@ -3972,13 +4058,6 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
3972 } 4058 }
3973} 4059}
3974 4060
3975static void perf_swevent_unthrottle(struct perf_event *event)
3976{
3977 /*
3978 * Nothing to do, we already reset hwc->interrupts.
3979 */
3980}
3981
3982static void perf_swevent_add(struct perf_event *event, u64 nr, 4061static void perf_swevent_add(struct perf_event *event, u64 nr,
3983 int nmi, struct perf_sample_data *data, 4062 int nmi, struct perf_sample_data *data,
3984 struct pt_regs *regs) 4063 struct pt_regs *regs)
@@ -4193,11 +4272,22 @@ static void perf_swevent_disable(struct perf_event *event)
4193 hlist_del_rcu(&event->hlist_entry); 4272 hlist_del_rcu(&event->hlist_entry);
4194} 4273}
4195 4274
4275static void perf_swevent_void(struct perf_event *event)
4276{
4277}
4278
4279static int perf_swevent_int(struct perf_event *event)
4280{
4281 return 0;
4282}
4283
4196static const struct pmu perf_ops_generic = { 4284static const struct pmu perf_ops_generic = {
4197 .enable = perf_swevent_enable, 4285 .enable = perf_swevent_enable,
4198 .disable = perf_swevent_disable, 4286 .disable = perf_swevent_disable,
4287 .start = perf_swevent_int,
4288 .stop = perf_swevent_void,
4199 .read = perf_swevent_read, 4289 .read = perf_swevent_read,
4200 .unthrottle = perf_swevent_unthrottle, 4290 .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */
4201}; 4291};
4202 4292
4203/* 4293/*
@@ -4478,8 +4568,10 @@ static int swevent_hlist_get(struct perf_event *event)
4478static const struct pmu perf_ops_tracepoint = { 4568static const struct pmu perf_ops_tracepoint = {
4479 .enable = perf_trace_enable, 4569 .enable = perf_trace_enable,
4480 .disable = perf_trace_disable, 4570 .disable = perf_trace_disable,
4571 .start = perf_swevent_int,
4572 .stop = perf_swevent_void,
4481 .read = perf_swevent_read, 4573 .read = perf_swevent_read,
4482 .unthrottle = perf_swevent_unthrottle, 4574 .unthrottle = perf_swevent_void,
4483}; 4575};
4484 4576
4485static int perf_tp_filter_match(struct perf_event *event, 4577static int perf_tp_filter_match(struct perf_event *event,
@@ -4912,39 +5004,17 @@ err_size:
4912 goto out; 5004 goto out;
4913} 5005}
4914 5006
4915static int perf_event_set_output(struct perf_event *event, int output_fd) 5007static int
5008perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
4916{ 5009{
4917 struct perf_event *output_event = NULL; 5010 struct perf_mmap_data *data = NULL, *old_data = NULL;
4918 struct file *output_file = NULL;
4919 struct perf_event *old_output;
4920 int fput_needed = 0;
4921 int ret = -EINVAL; 5011 int ret = -EINVAL;
4922 5012
4923 /* 5013 if (!output_event)
4924 * Don't allow output of inherited per-task events. This would
4925 * create performance issues due to cross cpu access.
4926 */
4927 if (event->cpu == -1 && event->attr.inherit)
4928 return -EINVAL;
4929
4930 if (!output_fd)
4931 goto set; 5014 goto set;
4932 5015
4933 output_file = fget_light(output_fd, &fput_needed); 5016 /* don't allow circular references */
4934 if (!output_file) 5017 if (event == output_event)
4935 return -EBADF;
4936
4937 if (output_file->f_op != &perf_fops)
4938 goto out;
4939
4940 output_event = output_file->private_data;
4941
4942 /* Don't chain output fds */
4943 if (output_event->output)
4944 goto out;
4945
4946 /* Don't set an output fd when we already have an output channel */
4947 if (event->data)
4948 goto out; 5018 goto out;
4949 5019
4950 /* 5020 /*
@@ -4959,26 +5029,28 @@ static int perf_event_set_output(struct perf_event *event, int output_fd)
4959 if (output_event->cpu == -1 && output_event->ctx != event->ctx) 5029 if (output_event->cpu == -1 && output_event->ctx != event->ctx)
4960 goto out; 5030 goto out;
4961 5031
4962 atomic_long_inc(&output_file->f_count);
4963
4964set: 5032set:
4965 mutex_lock(&event->mmap_mutex); 5033 mutex_lock(&event->mmap_mutex);
4966 old_output = event->output; 5034 /* Can't redirect output if we've got an active mmap() */
4967 rcu_assign_pointer(event->output, output_event); 5035 if (atomic_read(&event->mmap_count))
4968 mutex_unlock(&event->mmap_mutex); 5036 goto unlock;
4969 5037
4970 if (old_output) { 5038 if (output_event) {
4971 /* 5039 /* get the buffer we want to redirect to */
4972 * we need to make sure no existing perf_output_*() 5040 data = perf_mmap_data_get(output_event);
4973 * is still referencing this event. 5041 if (!data)
4974 */ 5042 goto unlock;
4975 synchronize_rcu();
4976 fput(old_output->filp);
4977 } 5043 }
4978 5044
5045 old_data = event->data;
5046 rcu_assign_pointer(event->data, data);
4979 ret = 0; 5047 ret = 0;
5048unlock:
5049 mutex_unlock(&event->mmap_mutex);
5050
5051 if (old_data)
5052 perf_mmap_data_put(old_data);
4980out: 5053out:
4981 fput_light(output_file, fput_needed);
4982 return ret; 5054 return ret;
4983} 5055}
4984 5056
@@ -4994,7 +5066,7 @@ SYSCALL_DEFINE5(perf_event_open,
4994 struct perf_event_attr __user *, attr_uptr, 5066 struct perf_event_attr __user *, attr_uptr,
4995 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) 5067 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
4996{ 5068{
4997 struct perf_event *event, *group_leader; 5069 struct perf_event *event, *group_leader = NULL, *output_event = NULL;
4998 struct perf_event_attr attr; 5070 struct perf_event_attr attr;
4999 struct perf_event_context *ctx; 5071 struct perf_event_context *ctx;
5000 struct file *event_file = NULL; 5072 struct file *event_file = NULL;
@@ -5034,19 +5106,25 @@ SYSCALL_DEFINE5(perf_event_open,
5034 goto err_fd; 5106 goto err_fd;
5035 } 5107 }
5036 5108
5109 if (group_fd != -1) {
5110 group_leader = perf_fget_light(group_fd, &fput_needed);
5111 if (IS_ERR(group_leader)) {
5112 err = PTR_ERR(group_leader);
5113 goto err_put_context;
5114 }
5115 group_file = group_leader->filp;
5116 if (flags & PERF_FLAG_FD_OUTPUT)
5117 output_event = group_leader;
5118 if (flags & PERF_FLAG_FD_NO_GROUP)
5119 group_leader = NULL;
5120 }
5121
5037 /* 5122 /*
5038 * Look up the group leader (we will attach this event to it): 5123 * Look up the group leader (we will attach this event to it):
5039 */ 5124 */
5040 group_leader = NULL; 5125 if (group_leader) {
5041 if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
5042 err = -EINVAL; 5126 err = -EINVAL;
5043 group_file = fget_light(group_fd, &fput_needed);
5044 if (!group_file)
5045 goto err_put_context;
5046 if (group_file->f_op != &perf_fops)
5047 goto err_put_context;
5048 5127
5049 group_leader = group_file->private_data;
5050 /* 5128 /*
5051 * Do not allow a recursive hierarchy (this new sibling 5129 * Do not allow a recursive hierarchy (this new sibling
5052 * becoming part of another group-sibling): 5130 * becoming part of another group-sibling):
@@ -5068,9 +5146,16 @@ SYSCALL_DEFINE5(perf_event_open,
5068 5146
5069 event = perf_event_alloc(&attr, cpu, ctx, group_leader, 5147 event = perf_event_alloc(&attr, cpu, ctx, group_leader,
5070 NULL, NULL, GFP_KERNEL); 5148 NULL, NULL, GFP_KERNEL);
5071 err = PTR_ERR(event); 5149 if (IS_ERR(event)) {
5072 if (IS_ERR(event)) 5150 err = PTR_ERR(event);
5073 goto err_put_context; 5151 goto err_put_context;
5152 }
5153
5154 if (output_event) {
5155 err = perf_event_set_output(event, output_event);
5156 if (err)
5157 goto err_free_put_context;
5158 }
5074 5159
5075 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR); 5160 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR);
5076 if (IS_ERR(event_file)) { 5161 if (IS_ERR(event_file)) {
@@ -5078,12 +5163,6 @@ SYSCALL_DEFINE5(perf_event_open,
5078 goto err_free_put_context; 5163 goto err_free_put_context;
5079 } 5164 }
5080 5165
5081 if (flags & PERF_FLAG_FD_OUTPUT) {
5082 err = perf_event_set_output(event, group_fd);
5083 if (err)
5084 goto err_fput_free_put_context;
5085 }
5086
5087 event->filp = event_file; 5166 event->filp = event_file;
5088 WARN_ON_ONCE(ctx->parent_ctx); 5167 WARN_ON_ONCE(ctx->parent_ctx);
5089 mutex_lock(&ctx->mutex); 5168 mutex_lock(&ctx->mutex);
@@ -5097,12 +5176,16 @@ SYSCALL_DEFINE5(perf_event_open,
5097 list_add_tail(&event->owner_entry, &current->perf_event_list); 5176 list_add_tail(&event->owner_entry, &current->perf_event_list);
5098 mutex_unlock(&current->perf_event_mutex); 5177 mutex_unlock(&current->perf_event_mutex);
5099 5178
5179 /*
5180 * Drop the reference on the group_event after placing the
5181 * new event on the sibling_list. This ensures destruction
5182 * of the group leader will find the pointer to itself in
5183 * perf_group_detach().
5184 */
5100 fput_light(group_file, fput_needed); 5185 fput_light(group_file, fput_needed);
5101 fd_install(event_fd, event_file); 5186 fd_install(event_fd, event_file);
5102 return event_fd; 5187 return event_fd;
5103 5188
5104err_fput_free_put_context:
5105 fput(event_file);
5106err_free_put_context: 5189err_free_put_context:
5107 free_event(event); 5190 free_event(event);
5108err_put_context: 5191err_put_context:
@@ -5420,6 +5503,7 @@ static void perf_free_event(struct perf_event *event,
5420 5503
5421 fput(parent->filp); 5504 fput(parent->filp);
5422 5505
5506 perf_group_detach(event);
5423 list_del_event(event, ctx); 5507 list_del_event(event, ctx);
5424 free_event(event); 5508 free_event(event);
5425} 5509}
diff --git a/kernel/sched.c b/kernel/sched.c
index d48408142503..f8b8996228dd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -544,6 +544,8 @@ struct rq {
544 struct root_domain *rd; 544 struct root_domain *rd;
545 struct sched_domain *sd; 545 struct sched_domain *sd;
546 546
547 unsigned long cpu_power;
548
547 unsigned char idle_at_tick; 549 unsigned char idle_at_tick;
548 /* For active balancing */ 550 /* For active balancing */
549 int post_schedule; 551 int post_schedule;
@@ -1499,24 +1501,9 @@ static unsigned long target_load(int cpu, int type)
1499 return max(rq->cpu_load[type-1], total); 1501 return max(rq->cpu_load[type-1], total);
1500} 1502}
1501 1503
1502static struct sched_group *group_of(int cpu)
1503{
1504 struct sched_domain *sd = rcu_dereference_sched(cpu_rq(cpu)->sd);
1505
1506 if (!sd)
1507 return NULL;
1508
1509 return sd->groups;
1510}
1511
1512static unsigned long power_of(int cpu) 1504static unsigned long power_of(int cpu)
1513{ 1505{
1514 struct sched_group *group = group_of(cpu); 1506 return cpu_rq(cpu)->cpu_power;
1515
1516 if (!group)
1517 return SCHED_LOAD_SCALE;
1518
1519 return group->cpu_power;
1520} 1507}
1521 1508
1522static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); 1509static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
@@ -1854,8 +1841,8 @@ static void dec_nr_running(struct rq *rq)
1854static void set_load_weight(struct task_struct *p) 1841static void set_load_weight(struct task_struct *p)
1855{ 1842{
1856 if (task_has_rt_policy(p)) { 1843 if (task_has_rt_policy(p)) {
1857 p->se.load.weight = prio_to_weight[0] * 2; 1844 p->se.load.weight = 0;
1858 p->se.load.inv_weight = prio_to_wmult[0] >> 1; 1845 p->se.load.inv_weight = WMULT_CONST;
1859 return; 1846 return;
1860 } 1847 }
1861 1848
@@ -7605,6 +7592,7 @@ void __init sched_init(void)
7605#ifdef CONFIG_SMP 7592#ifdef CONFIG_SMP
7606 rq->sd = NULL; 7593 rq->sd = NULL;
7607 rq->rd = NULL; 7594 rq->rd = NULL;
7595 rq->cpu_power = SCHED_LOAD_SCALE;
7608 rq->post_schedule = 0; 7596 rq->post_schedule = 0;
7609 rq->active_balance = 0; 7597 rq->active_balance = 0;
7610 rq->next_balance = jiffies; 7598 rq->next_balance = jiffies;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 217e4a9393e4..eed35eded602 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1225,7 +1225,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1225 unsigned long this_load, load; 1225 unsigned long this_load, load;
1226 int idx, this_cpu, prev_cpu; 1226 int idx, this_cpu, prev_cpu;
1227 unsigned long tl_per_task; 1227 unsigned long tl_per_task;
1228 unsigned int imbalance;
1229 struct task_group *tg; 1228 struct task_group *tg;
1230 unsigned long weight; 1229 unsigned long weight;
1231 int balanced; 1230 int balanced;
@@ -1252,8 +1251,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1252 tg = task_group(p); 1251 tg = task_group(p);
1253 weight = p->se.load.weight; 1252 weight = p->se.load.weight;
1254 1253
1255 imbalance = 100 + (sd->imbalance_pct - 100) / 2;
1256
1257 /* 1254 /*
1258 * In low-load situations, where prev_cpu is idle and this_cpu is idle 1255 * In low-load situations, where prev_cpu is idle and this_cpu is idle
1259 * due to the sync cause above having dropped this_load to 0, we'll 1256 * due to the sync cause above having dropped this_load to 0, we'll
@@ -1263,9 +1260,21 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1263 * Otherwise check if either cpus are near enough in load to allow this 1260 * Otherwise check if either cpus are near enough in load to allow this
1264 * task to be woken on this_cpu. 1261 * task to be woken on this_cpu.
1265 */ 1262 */
1266 balanced = !this_load || 1263 if (this_load) {
1267 100*(this_load + effective_load(tg, this_cpu, weight, weight)) <= 1264 unsigned long this_eff_load, prev_eff_load;
1268 imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); 1265
1266 this_eff_load = 100;
1267 this_eff_load *= power_of(prev_cpu);
1268 this_eff_load *= this_load +
1269 effective_load(tg, this_cpu, weight, weight);
1270
1271 prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
1272 prev_eff_load *= power_of(this_cpu);
1273 prev_eff_load *= load + effective_load(tg, prev_cpu, 0, weight);
1274
1275 balanced = this_eff_load <= prev_eff_load;
1276 } else
1277 balanced = true;
1269 1278
1270 /* 1279 /*
1271 * If the currently running task will sleep within 1280 * If the currently running task will sleep within
@@ -2298,6 +2307,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
2298 if (!power) 2307 if (!power)
2299 power = 1; 2308 power = 1;
2300 2309
2310 cpu_rq(cpu)->cpu_power = power;
2301 sdg->cpu_power = power; 2311 sdg->cpu_power = power;
2302} 2312}
2303 2313
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 825e1126008f..07b4f1b1a73a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -850,7 +850,7 @@ static __init int spawn_ksoftirqd(void)
850 void *cpu = (void *)(long)smp_processor_id(); 850 void *cpu = (void *)(long)smp_processor_id();
851 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); 851 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
852 852
853 BUG_ON(err == NOTIFY_BAD); 853 BUG_ON(err != NOTIFY_OK);
854 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); 854 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
855 register_cpu_notifier(&cpu_nfb); 855 register_cpu_notifier(&cpu_nfb);
856 return 0; 856 return 0;
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index b4e7431e7c78..70f8d90331e9 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -321,7 +321,7 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
321 321
322#ifdef CONFIG_HOTPLUG_CPU 322#ifdef CONFIG_HOTPLUG_CPU
323 case CPU_UP_CANCELED: 323 case CPU_UP_CANCELED:
324 case CPU_DEAD: 324 case CPU_POST_DEAD:
325 { 325 {
326 struct cpu_stop_work *work; 326 struct cpu_stop_work *work;
327 327
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 997080f00e0b..d24f761f4876 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1471,12 +1471,12 @@ static struct ctl_table fs_table[] = {
1471 }, 1471 },
1472#endif 1472#endif
1473 { 1473 {
1474 .procname = "pipe-max-pages", 1474 .procname = "pipe-max-size",
1475 .data = &pipe_max_pages, 1475 .data = &pipe_max_size,
1476 .maxlen = sizeof(int), 1476 .maxlen = sizeof(int),
1477 .mode = 0644, 1477 .mode = 0644,
1478 .proc_handler = &proc_dointvec_minmax, 1478 .proc_handler = &pipe_proc_fn,
1479 .extra1 = &two, 1479 .extra1 = &pipe_min_size,
1480 }, 1480 },
1481/* 1481/*
1482 * NOTE: do not add new entries to this table unless you have read 1482 * NOTE: do not add new entries to this table unless you have read
diff --git a/kernel/timer.c b/kernel/timer.c
index 2454172a80d3..ee305c8d4e18 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1717,7 +1717,7 @@ void __init init_timers(void)
1717 1717
1718 init_timer_stats(); 1718 init_timer_stats();
1719 1719
1720 BUG_ON(err == NOTIFY_BAD); 1720 BUG_ON(err != NOTIFY_OK);
1721 register_cpu_notifier(&timers_nb); 1721 register_cpu_notifier(&timers_nb);
1722 open_softirq(TIMER_SOFTIRQ, run_timer_softirq); 1722 open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
1723} 1723}
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 36ea2b65dcdc..638711c17504 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -842,6 +842,7 @@ static void blk_add_trace_split(void *ignore,
842 842
843/** 843/**
844 * blk_add_trace_remap - Add a trace for a remap operation 844 * blk_add_trace_remap - Add a trace for a remap operation
845 * @ignore: trace callback data parameter (not used)
845 * @q: queue the io is for 846 * @q: queue the io is for
846 * @bio: the source bio 847 * @bio: the source bio
847 * @dev: target device 848 * @dev: target device
@@ -873,6 +874,7 @@ static void blk_add_trace_remap(void *ignore,
873 874
874/** 875/**
875 * blk_add_trace_rq_remap - Add a trace for a request-remap operation 876 * blk_add_trace_rq_remap - Add a trace for a request-remap operation
877 * @ignore: trace callback data parameter (not used)
876 * @q: queue the io is for 878 * @q: queue the io is for
877 * @rq: the source request 879 * @rq: the source request
878 * @dev: target device 880 * @dev: target device
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index cb6f365016e4..e6f65887842c 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -116,7 +116,7 @@ int perf_trace_enable(struct perf_event *p_event)
116 if (WARN_ON_ONCE(!list)) 116 if (WARN_ON_ONCE(!list))
117 return -EINVAL; 117 return -EINVAL;
118 118
119 list = per_cpu_ptr(list, smp_processor_id()); 119 list = this_cpu_ptr(list);
120 hlist_add_head_rcu(&p_event->hlist_entry, list); 120 hlist_add_head_rcu(&p_event->hlist_entry, list);
121 121
122 return 0; 122 return 0;
@@ -132,8 +132,9 @@ void perf_trace_destroy(struct perf_event *p_event)
132 struct ftrace_event_call *tp_event = p_event->tp_event; 132 struct ftrace_event_call *tp_event = p_event->tp_event;
133 int i; 133 int i;
134 134
135 mutex_lock(&event_mutex);
135 if (--tp_event->perf_refcount > 0) 136 if (--tp_event->perf_refcount > 0)
136 return; 137 goto out;
137 138
138 if (tp_event->class->reg) 139 if (tp_event->class->reg)
139 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER); 140 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
@@ -142,6 +143,12 @@ void perf_trace_destroy(struct perf_event *p_event)
142 tp_event->class->perf_probe, 143 tp_event->class->perf_probe,
143 tp_event); 144 tp_event);
144 145
146 /*
147 * Ensure our callback won't be called anymore. See
148 * tracepoint_probe_unregister() and __DO_TRACE().
149 */
150 synchronize_sched();
151
145 free_percpu(tp_event->perf_events); 152 free_percpu(tp_event->perf_events);
146 tp_event->perf_events = NULL; 153 tp_event->perf_events = NULL;
147 154
@@ -151,6 +158,8 @@ void perf_trace_destroy(struct perf_event *p_event)
151 perf_trace_buf[i] = NULL; 158 perf_trace_buf[i] = NULL;
152 } 159 }
153 } 160 }
161out:
162 mutex_unlock(&event_mutex);
154} 163}
155 164
156__kprobes void *perf_trace_buf_prepare(int size, unsigned short type, 165__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
@@ -169,7 +178,7 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
169 if (*rctxp < 0) 178 if (*rctxp < 0)
170 return NULL; 179 return NULL;
171 180
172 raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id()); 181 raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
173 182
174 /* zero the dead bytes from align to not leak stack to user */ 183 /* zero the dead bytes from align to not leak stack to user */
175 memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); 184 memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index faf7cefd15da..f52b5f50299d 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1359,7 +1359,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
1359 for (i = 0; i < tp->nr_args; i++) 1359 for (i = 0; i < tp->nr_args; i++)
1360 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); 1360 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1361 1361
1362 head = per_cpu_ptr(call->perf_events, smp_processor_id()); 1362 head = this_cpu_ptr(call->perf_events);
1363 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); 1363 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
1364} 1364}
1365 1365
@@ -1392,7 +1392,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1392 for (i = 0; i < tp->nr_args; i++) 1392 for (i = 0; i < tp->nr_args; i++)
1393 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); 1393 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1394 1394
1395 head = per_cpu_ptr(call->perf_events, smp_processor_id()); 1395 head = this_cpu_ptr(call->perf_events);
1396 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); 1396 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
1397} 1397}
1398 1398
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index d2c859cec9ea..34e35804304b 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -519,7 +519,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
519 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 519 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
520 (unsigned long *)&rec->args); 520 (unsigned long *)&rec->args);
521 521
522 head = per_cpu_ptr(sys_data->enter_event->perf_events, smp_processor_id()); 522 head = this_cpu_ptr(sys_data->enter_event->perf_events);
523 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); 523 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
524} 524}
525 525
@@ -595,7 +595,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
595 rec->nr = syscall_nr; 595 rec->nr = syscall_nr;
596 rec->ret = syscall_get_return_value(current, regs); 596 rec->ret = syscall_get_return_value(current, regs);
597 597
598 head = per_cpu_ptr(sys_data->exit_event->perf_events, smp_processor_id()); 598 head = this_cpu_ptr(sys_data->exit_event->perf_events);
599 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); 599 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
600} 600}
601 601