aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c2
-rw-r--r--kernel/cpu.c2
-rw-r--r--kernel/debug/kdb/kdb_main.c12
-rw-r--r--kernel/exec_domain.c18
-rw-r--r--kernel/irq/manage.c3
-rw-r--r--kernel/module.c327
-rw-r--r--kernel/power/Kconfig9
-rw-r--r--kernel/power/Makefile2
-rw-r--r--kernel/power/nvs.c (renamed from kernel/power/hibernate_nvs.c)24
-rw-r--r--kernel/power/suspend.c6
-rw-r--r--kernel/sched.c157
-rw-r--r--kernel/sched_fair.c24
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/stop_machine.c2
-rw-r--r--kernel/sysctl.c8
-rw-r--r--kernel/time/tick-sched.c5
-rw-r--r--kernel/timer.c2
17 files changed, 360 insertions, 245 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 422cb19f156e..3ac6f5b0a64b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4598,7 +4598,7 @@ static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
4598 parent_css = parent->subsys[subsys_id]; 4598 parent_css = parent->subsys[subsys_id];
4599 child_css = child->subsys[subsys_id]; 4599 child_css = child->subsys[subsys_id];
4600 parent_id = parent_css->id; 4600 parent_id = parent_css->id;
4601 depth = parent_id->depth; 4601 depth = parent_id->depth + 1;
4602 4602
4603 child_id = get_new_cssid(ss, depth); 4603 child_id = get_new_cssid(ss, depth);
4604 if (IS_ERR(child_id)) 4604 if (IS_ERR(child_id))
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8b92539b4754..97d1b426a4ac 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -34,7 +34,7 @@ void cpu_maps_update_done(void)
34 mutex_unlock(&cpu_add_remove_lock); 34 mutex_unlock(&cpu_add_remove_lock);
35} 35}
36 36
37static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain); 37static RAW_NOTIFIER_HEAD(cpu_chain);
38 38
39/* If set, cpu_up and cpu_down will return -EBUSY and do nothing. 39/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
40 * Should always be manipulated under cpu_add_remove_lock 40 * Should always be manipulated under cpu_add_remove_lock
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index b724c791b6d4..184cd8209c36 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1857,12 +1857,6 @@ static int kdb_ef(int argc, const char **argv)
1857} 1857}
1858 1858
1859#if defined(CONFIG_MODULES) 1859#if defined(CONFIG_MODULES)
1860/* modules using other modules */
1861struct module_use {
1862 struct list_head list;
1863 struct module *module_which_uses;
1864};
1865
1866/* 1860/*
1867 * kdb_lsmod - This function implements the 'lsmod' command. Lists 1861 * kdb_lsmod - This function implements the 'lsmod' command. Lists
1868 * currently loaded kernel modules. 1862 * currently loaded kernel modules.
@@ -1894,9 +1888,9 @@ static int kdb_lsmod(int argc, const char **argv)
1894 { 1888 {
1895 struct module_use *use; 1889 struct module_use *use;
1896 kdb_printf(" [ "); 1890 kdb_printf(" [ ");
1897 list_for_each_entry(use, &mod->modules_which_use_me, 1891 list_for_each_entry(use, &mod->source_list,
1898 list) 1892 source_list)
1899 kdb_printf("%s ", use->module_which_uses->name); 1893 kdb_printf("%s ", use->target->name);
1900 kdb_printf("]\n"); 1894 kdb_printf("]\n");
1901 } 1895 }
1902#endif 1896#endif
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index c35452cadded..dd62f8e714ca 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -27,7 +27,7 @@ static struct exec_domain *exec_domains = &default_exec_domain;
27static DEFINE_RWLOCK(exec_domains_lock); 27static DEFINE_RWLOCK(exec_domains_lock);
28 28
29 29
30static u_long ident_map[32] = { 30static unsigned long ident_map[32] = {
31 0, 1, 2, 3, 4, 5, 6, 7, 31 0, 1, 2, 3, 4, 5, 6, 7,
32 8, 9, 10, 11, 12, 13, 14, 15, 32 8, 9, 10, 11, 12, 13, 14, 15,
33 16, 17, 18, 19, 20, 21, 22, 23, 33 16, 17, 18, 19, 20, 21, 22, 23,
@@ -56,10 +56,10 @@ default_handler(int segment, struct pt_regs *regp)
56} 56}
57 57
58static struct exec_domain * 58static struct exec_domain *
59lookup_exec_domain(u_long personality) 59lookup_exec_domain(unsigned int personality)
60{ 60{
61 struct exec_domain * ep; 61 unsigned int pers = personality(personality);
62 u_long pers = personality(personality); 62 struct exec_domain *ep;
63 63
64 read_lock(&exec_domains_lock); 64 read_lock(&exec_domains_lock);
65 for (ep = exec_domains; ep; ep = ep->next) { 65 for (ep = exec_domains; ep; ep = ep->next) {
@@ -70,7 +70,7 @@ lookup_exec_domain(u_long personality)
70 70
71#ifdef CONFIG_MODULES 71#ifdef CONFIG_MODULES
72 read_unlock(&exec_domains_lock); 72 read_unlock(&exec_domains_lock);
73 request_module("personality-%ld", pers); 73 request_module("personality-%d", pers);
74 read_lock(&exec_domains_lock); 74 read_lock(&exec_domains_lock);
75 75
76 for (ep = exec_domains; ep; ep = ep->next) { 76 for (ep = exec_domains; ep; ep = ep->next) {
@@ -135,7 +135,7 @@ unregister:
135} 135}
136 136
137int 137int
138__set_personality(u_long personality) 138__set_personality(unsigned int personality)
139{ 139{
140 struct exec_domain *ep, *oep; 140 struct exec_domain *ep, *oep;
141 141
@@ -188,9 +188,9 @@ static int __init proc_execdomains_init(void)
188module_init(proc_execdomains_init); 188module_init(proc_execdomains_init);
189#endif 189#endif
190 190
191SYSCALL_DEFINE1(personality, u_long, personality) 191SYSCALL_DEFINE1(personality, unsigned int, personality)
192{ 192{
193 u_long old = current->personality; 193 unsigned int old = current->personality;
194 194
195 if (personality != 0xffffffff) { 195 if (personality != 0xffffffff) {
196 set_personality(personality); 196 set_personality(personality);
@@ -198,7 +198,7 @@ SYSCALL_DEFINE1(personality, u_long, personality)
198 return -EINVAL; 198 return -EINVAL;
199 } 199 }
200 200
201 return (long)old; 201 return old;
202} 202}
203 203
204 204
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 3164ba7ce151..e1497481fe8a 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -456,6 +456,9 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
456 /* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */ 456 /* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */
457 desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK); 457 desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK);
458 desc->status |= flags; 458 desc->status |= flags;
459
460 if (chip != desc->chip)
461 irq_chip_set_defaults(desc->chip);
459 } 462 }
460 463
461 return ret; 464 return ret;
diff --git a/kernel/module.c b/kernel/module.c
index 333fbcc96978..8c6b42840dd1 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -72,7 +72,11 @@
72/* If this is set, the section belongs in the init part of the module */ 72/* If this is set, the section belongs in the init part of the module */
73#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) 73#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
74 74
75/* List of modules, protected by module_mutex or preempt_disable 75/*
76 * Mutex protects:
77 * 1) List of modules (also safely readable with preempt_disable),
78 * 2) module_use links,
79 * 3) module_addr_min/module_addr_max.
76 * (delete uses stop_machine/add uses RCU list operations). */ 80 * (delete uses stop_machine/add uses RCU list operations). */
77DEFINE_MUTEX(module_mutex); 81DEFINE_MUTEX(module_mutex);
78EXPORT_SYMBOL_GPL(module_mutex); 82EXPORT_SYMBOL_GPL(module_mutex);
@@ -90,7 +94,8 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq);
90 94
91static BLOCKING_NOTIFIER_HEAD(module_notify_list); 95static BLOCKING_NOTIFIER_HEAD(module_notify_list);
92 96
93/* Bounds of module allocation, for speeding __module_address */ 97/* Bounds of module allocation, for speeding __module_address.
98 * Protected by module_mutex. */
94static unsigned long module_addr_min = -1UL, module_addr_max = 0; 99static unsigned long module_addr_min = -1UL, module_addr_max = 0;
95 100
96int register_module_notifier(struct notifier_block * nb) 101int register_module_notifier(struct notifier_block * nb)
@@ -329,7 +334,7 @@ static bool find_symbol_in_section(const struct symsearch *syms,
329} 334}
330 335
331/* Find a symbol and return it, along with, (optional) crc and 336/* Find a symbol and return it, along with, (optional) crc and
332 * (optional) module which owns it */ 337 * (optional) module which owns it. Needs preempt disabled or module_mutex. */
333const struct kernel_symbol *find_symbol(const char *name, 338const struct kernel_symbol *find_symbol(const char *name,
334 struct module **owner, 339 struct module **owner,
335 const unsigned long **crc, 340 const unsigned long **crc,
@@ -403,7 +408,7 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr,
403 Elf_Shdr *sechdrs, 408 Elf_Shdr *sechdrs,
404 const char *secstrings) 409 const char *secstrings)
405{ 410{
406 return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); 411 return find_sec(hdr, sechdrs, secstrings, ".data..percpu");
407} 412}
408 413
409static void percpu_modcopy(struct module *mod, 414static void percpu_modcopy(struct module *mod,
@@ -523,7 +528,8 @@ static void module_unload_init(struct module *mod)
523{ 528{
524 int cpu; 529 int cpu;
525 530
526 INIT_LIST_HEAD(&mod->modules_which_use_me); 531 INIT_LIST_HEAD(&mod->source_list);
532 INIT_LIST_HEAD(&mod->target_list);
527 for_each_possible_cpu(cpu) { 533 for_each_possible_cpu(cpu) {
528 per_cpu_ptr(mod->refptr, cpu)->incs = 0; 534 per_cpu_ptr(mod->refptr, cpu)->incs = 0;
529 per_cpu_ptr(mod->refptr, cpu)->decs = 0; 535 per_cpu_ptr(mod->refptr, cpu)->decs = 0;
@@ -535,20 +541,13 @@ static void module_unload_init(struct module *mod)
535 mod->waiter = current; 541 mod->waiter = current;
536} 542}
537 543
538/* modules using other modules */
539struct module_use
540{
541 struct list_head list;
542 struct module *module_which_uses;
543};
544
545/* Does a already use b? */ 544/* Does a already use b? */
546static int already_uses(struct module *a, struct module *b) 545static int already_uses(struct module *a, struct module *b)
547{ 546{
548 struct module_use *use; 547 struct module_use *use;
549 548
550 list_for_each_entry(use, &b->modules_which_use_me, list) { 549 list_for_each_entry(use, &b->source_list, source_list) {
551 if (use->module_which_uses == a) { 550 if (use->source == a) {
552 DEBUGP("%s uses %s!\n", a->name, b->name); 551 DEBUGP("%s uses %s!\n", a->name, b->name);
553 return 1; 552 return 1;
554 } 553 }
@@ -557,62 +556,68 @@ static int already_uses(struct module *a, struct module *b)
557 return 0; 556 return 0;
558} 557}
559 558
560/* Module a uses b */ 559/*
561int use_module(struct module *a, struct module *b) 560 * Module a uses b
561 * - we add 'a' as a "source", 'b' as a "target" of module use
562 * - the module_use is added to the list of 'b' sources (so
563 * 'b' can walk the list to see who sourced them), and of 'a'
564 * targets (so 'a' can see what modules it targets).
565 */
566static int add_module_usage(struct module *a, struct module *b)
562{ 567{
563 struct module_use *use; 568 struct module_use *use;
564 int no_warn, err;
565 569
566 if (b == NULL || already_uses(a, b)) return 1; 570 DEBUGP("Allocating new usage for %s.\n", a->name);
571 use = kmalloc(sizeof(*use), GFP_ATOMIC);
572 if (!use) {
573 printk(KERN_WARNING "%s: out of memory loading\n", a->name);
574 return -ENOMEM;
575 }
576
577 use->source = a;
578 use->target = b;
579 list_add(&use->source_list, &b->source_list);
580 list_add(&use->target_list, &a->target_list);
581 return 0;
582}
583
584/* Module a uses b: caller needs module_mutex() */
585int ref_module(struct module *a, struct module *b)
586{
587 int err;
567 588
568 /* If we're interrupted or time out, we fail. */ 589 if (b == NULL || already_uses(a, b))
569 if (wait_event_interruptible_timeout(
570 module_wq, (err = strong_try_module_get(b)) != -EBUSY,
571 30 * HZ) <= 0) {
572 printk("%s: gave up waiting for init of module %s.\n",
573 a->name, b->name);
574 return 0; 590 return 0;
575 }
576 591
577 /* If strong_try_module_get() returned a different error, we fail. */ 592 /* If module isn't available, we fail. */
593 err = strong_try_module_get(b);
578 if (err) 594 if (err)
579 return 0; 595 return err;
580 596
581 DEBUGP("Allocating new usage for %s.\n", a->name); 597 err = add_module_usage(a, b);
582 use = kmalloc(sizeof(*use), GFP_ATOMIC); 598 if (err) {
583 if (!use) {
584 printk("%s: out of memory loading\n", a->name);
585 module_put(b); 599 module_put(b);
586 return 0; 600 return err;
587 } 601 }
588 602 return 0;
589 use->module_which_uses = a;
590 list_add(&use->list, &b->modules_which_use_me);
591 no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name);
592 return 1;
593} 603}
594EXPORT_SYMBOL_GPL(use_module); 604EXPORT_SYMBOL_GPL(ref_module);
595 605
596/* Clear the unload stuff of the module. */ 606/* Clear the unload stuff of the module. */
597static void module_unload_free(struct module *mod) 607static void module_unload_free(struct module *mod)
598{ 608{
599 struct module *i; 609 struct module_use *use, *tmp;
600 610
601 list_for_each_entry(i, &modules, list) { 611 mutex_lock(&module_mutex);
602 struct module_use *use; 612 list_for_each_entry_safe(use, tmp, &mod->target_list, target_list) {
603 613 struct module *i = use->target;
604 list_for_each_entry(use, &i->modules_which_use_me, list) { 614 DEBUGP("%s unusing %s\n", mod->name, i->name);
605 if (use->module_which_uses == mod) { 615 module_put(i);
606 DEBUGP("%s unusing %s\n", mod->name, i->name); 616 list_del(&use->source_list);
607 module_put(i); 617 list_del(&use->target_list);
608 list_del(&use->list); 618 kfree(use);
609 kfree(use);
610 sysfs_remove_link(i->holders_dir, mod->name);
611 /* There can be at most one match. */
612 break;
613 }
614 }
615 } 619 }
620 mutex_unlock(&module_mutex);
616} 621}
617 622
618#ifdef CONFIG_MODULE_FORCE_UNLOAD 623#ifdef CONFIG_MODULE_FORCE_UNLOAD
@@ -735,7 +740,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
735 goto out; 740 goto out;
736 } 741 }
737 742
738 if (!list_empty(&mod->modules_which_use_me)) { 743 if (!list_empty(&mod->source_list)) {
739 /* Other modules depend on us: get rid of them first. */ 744 /* Other modules depend on us: get rid of them first. */
740 ret = -EWOULDBLOCK; 745 ret = -EWOULDBLOCK;
741 goto out; 746 goto out;
@@ -779,13 +784,14 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
779 blocking_notifier_call_chain(&module_notify_list, 784 blocking_notifier_call_chain(&module_notify_list,
780 MODULE_STATE_GOING, mod); 785 MODULE_STATE_GOING, mod);
781 async_synchronize_full(); 786 async_synchronize_full();
782 mutex_lock(&module_mutex); 787
783 /* Store the name of the last unloaded module for diagnostic purposes */ 788 /* Store the name of the last unloaded module for diagnostic purposes */
784 strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); 789 strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
785 ddebug_remove_module(mod->name); 790 ddebug_remove_module(mod->name);
786 free_module(mod);
787 791
788 out: 792 free_module(mod);
793 return 0;
794out:
789 mutex_unlock(&module_mutex); 795 mutex_unlock(&module_mutex);
790 return ret; 796 return ret;
791} 797}
@@ -799,9 +805,9 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod)
799 805
800 /* Always include a trailing , so userspace can differentiate 806 /* Always include a trailing , so userspace can differentiate
801 between this and the old multi-field proc format. */ 807 between this and the old multi-field proc format. */
802 list_for_each_entry(use, &mod->modules_which_use_me, list) { 808 list_for_each_entry(use, &mod->source_list, source_list) {
803 printed_something = 1; 809 printed_something = 1;
804 seq_printf(m, "%s,", use->module_which_uses->name); 810 seq_printf(m, "%s,", use->source->name);
805 } 811 }
806 812
807 if (mod->init != NULL && mod->exit == NULL) { 813 if (mod->init != NULL && mod->exit == NULL) {
@@ -880,11 +886,11 @@ static inline void module_unload_free(struct module *mod)
880{ 886{
881} 887}
882 888
883int use_module(struct module *a, struct module *b) 889int ref_module(struct module *a, struct module *b)
884{ 890{
885 return strong_try_module_get(b) == 0; 891 return strong_try_module_get(b);
886} 892}
887EXPORT_SYMBOL_GPL(use_module); 893EXPORT_SYMBOL_GPL(ref_module);
888 894
889static inline void module_unload_init(struct module *mod) 895static inline void module_unload_init(struct module *mod)
890{ 896{
@@ -1001,6 +1007,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
1001{ 1007{
1002 const unsigned long *crc; 1008 const unsigned long *crc;
1003 1009
1010 /* Since this should be found in kernel (which can't be removed),
1011 * no locking is necessary. */
1004 if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL, 1012 if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL,
1005 &crc, true, false)) 1013 &crc, true, false))
1006 BUG(); 1014 BUG();
@@ -1043,29 +1051,62 @@ static inline int same_magic(const char *amagic, const char *bmagic,
1043} 1051}
1044#endif /* CONFIG_MODVERSIONS */ 1052#endif /* CONFIG_MODVERSIONS */
1045 1053
1046/* Resolve a symbol for this module. I.e. if we find one, record usage. 1054/* Resolve a symbol for this module. I.e. if we find one, record usage. */
1047 Must be holding module_mutex. */
1048static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, 1055static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs,
1049 unsigned int versindex, 1056 unsigned int versindex,
1050 const char *name, 1057 const char *name,
1051 struct module *mod) 1058 struct module *mod,
1059 char ownername[])
1052{ 1060{
1053 struct module *owner; 1061 struct module *owner;
1054 const struct kernel_symbol *sym; 1062 const struct kernel_symbol *sym;
1055 const unsigned long *crc; 1063 const unsigned long *crc;
1064 int err;
1056 1065
1066 mutex_lock(&module_mutex);
1057 sym = find_symbol(name, &owner, &crc, 1067 sym = find_symbol(name, &owner, &crc,
1058 !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); 1068 !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true);
1059 /* use_module can fail due to OOM, 1069 if (!sym)
1060 or module initialization or unloading */ 1070 goto unlock;
1061 if (sym) { 1071
1062 if (!check_version(sechdrs, versindex, name, mod, crc, owner) 1072 if (!check_version(sechdrs, versindex, name, mod, crc, owner)) {
1063 || !use_module(mod, owner)) 1073 sym = ERR_PTR(-EINVAL);
1064 sym = NULL; 1074 goto getname;
1075 }
1076
1077 err = ref_module(mod, owner);
1078 if (err) {
1079 sym = ERR_PTR(err);
1080 goto getname;
1065 } 1081 }
1082
1083getname:
1084 /* We must make copy under the lock if we failed to get ref. */
1085 strncpy(ownername, module_name(owner), MODULE_NAME_LEN);
1086unlock:
1087 mutex_unlock(&module_mutex);
1066 return sym; 1088 return sym;
1067} 1089}
1068 1090
1091static const struct kernel_symbol *resolve_symbol_wait(Elf_Shdr *sechdrs,
1092 unsigned int versindex,
1093 const char *name,
1094 struct module *mod)
1095{
1096 const struct kernel_symbol *ksym;
1097 char ownername[MODULE_NAME_LEN];
1098
1099 if (wait_event_interruptible_timeout(module_wq,
1100 !IS_ERR(ksym = resolve_symbol(sechdrs, versindex, name,
1101 mod, ownername)) ||
1102 PTR_ERR(ksym) != -EBUSY,
1103 30 * HZ) <= 0) {
1104 printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n",
1105 mod->name, ownername);
1106 }
1107 return ksym;
1108}
1109
1069/* 1110/*
1070 * /sys/module/foo/sections stuff 1111 * /sys/module/foo/sections stuff
1071 * J. Corbet <corbet@lwn.net> 1112 * J. Corbet <corbet@lwn.net>
@@ -1295,7 +1336,34 @@ static inline void remove_notes_attrs(struct module *mod)
1295#endif 1336#endif
1296 1337
1297#ifdef CONFIG_SYSFS 1338#ifdef CONFIG_SYSFS
1298int module_add_modinfo_attrs(struct module *mod) 1339static void add_usage_links(struct module *mod)
1340{
1341#ifdef CONFIG_MODULE_UNLOAD
1342 struct module_use *use;
1343 int nowarn;
1344
1345 mutex_lock(&module_mutex);
1346 list_for_each_entry(use, &mod->target_list, target_list) {
1347 nowarn = sysfs_create_link(use->target->holders_dir,
1348 &mod->mkobj.kobj, mod->name);
1349 }
1350 mutex_unlock(&module_mutex);
1351#endif
1352}
1353
1354static void del_usage_links(struct module *mod)
1355{
1356#ifdef CONFIG_MODULE_UNLOAD
1357 struct module_use *use;
1358
1359 mutex_lock(&module_mutex);
1360 list_for_each_entry(use, &mod->target_list, target_list)
1361 sysfs_remove_link(use->target->holders_dir, mod->name);
1362 mutex_unlock(&module_mutex);
1363#endif
1364}
1365
1366static int module_add_modinfo_attrs(struct module *mod)
1299{ 1367{
1300 struct module_attribute *attr; 1368 struct module_attribute *attr;
1301 struct module_attribute *temp_attr; 1369 struct module_attribute *temp_attr;
@@ -1321,7 +1389,7 @@ int module_add_modinfo_attrs(struct module *mod)
1321 return error; 1389 return error;
1322} 1390}
1323 1391
1324void module_remove_modinfo_attrs(struct module *mod) 1392static void module_remove_modinfo_attrs(struct module *mod)
1325{ 1393{
1326 struct module_attribute *attr; 1394 struct module_attribute *attr;
1327 int i; 1395 int i;
@@ -1337,7 +1405,7 @@ void module_remove_modinfo_attrs(struct module *mod)
1337 kfree(mod->modinfo_attrs); 1405 kfree(mod->modinfo_attrs);
1338} 1406}
1339 1407
1340int mod_sysfs_init(struct module *mod) 1408static int mod_sysfs_init(struct module *mod)
1341{ 1409{
1342 int err; 1410 int err;
1343 struct kobject *kobj; 1411 struct kobject *kobj;
@@ -1371,12 +1439,16 @@ out:
1371 return err; 1439 return err;
1372} 1440}
1373 1441
1374int mod_sysfs_setup(struct module *mod, 1442static int mod_sysfs_setup(struct module *mod,
1375 struct kernel_param *kparam, 1443 struct kernel_param *kparam,
1376 unsigned int num_params) 1444 unsigned int num_params)
1377{ 1445{
1378 int err; 1446 int err;
1379 1447
1448 err = mod_sysfs_init(mod);
1449 if (err)
1450 goto out;
1451
1380 mod->holders_dir = kobject_create_and_add("holders", &mod->mkobj.kobj); 1452 mod->holders_dir = kobject_create_and_add("holders", &mod->mkobj.kobj);
1381 if (!mod->holders_dir) { 1453 if (!mod->holders_dir) {
1382 err = -ENOMEM; 1454 err = -ENOMEM;
@@ -1391,6 +1463,8 @@ int mod_sysfs_setup(struct module *mod,
1391 if (err) 1463 if (err)
1392 goto out_unreg_param; 1464 goto out_unreg_param;
1393 1465
1466 add_usage_links(mod);
1467
1394 kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD); 1468 kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);
1395 return 0; 1469 return 0;
1396 1470
@@ -1400,6 +1474,7 @@ out_unreg_holders:
1400 kobject_put(mod->holders_dir); 1474 kobject_put(mod->holders_dir);
1401out_unreg: 1475out_unreg:
1402 kobject_put(&mod->mkobj.kobj); 1476 kobject_put(&mod->mkobj.kobj);
1477out:
1403 return err; 1478 return err;
1404} 1479}
1405 1480
@@ -1410,14 +1485,40 @@ static void mod_sysfs_fini(struct module *mod)
1410 1485
1411#else /* CONFIG_SYSFS */ 1486#else /* CONFIG_SYSFS */
1412 1487
1488static inline int mod_sysfs_init(struct module *mod)
1489{
1490 return 0;
1491}
1492
1493static inline int mod_sysfs_setup(struct module *mod,
1494 struct kernel_param *kparam,
1495 unsigned int num_params)
1496{
1497 return 0;
1498}
1499
1500static inline int module_add_modinfo_attrs(struct module *mod)
1501{
1502 return 0;
1503}
1504
1505static inline void module_remove_modinfo_attrs(struct module *mod)
1506{
1507}
1508
1413static void mod_sysfs_fini(struct module *mod) 1509static void mod_sysfs_fini(struct module *mod)
1414{ 1510{
1415} 1511}
1416 1512
1513static void del_usage_links(struct module *mod)
1514{
1515}
1516
1417#endif /* CONFIG_SYSFS */ 1517#endif /* CONFIG_SYSFS */
1418 1518
1419static void mod_kobject_remove(struct module *mod) 1519static void mod_kobject_remove(struct module *mod)
1420{ 1520{
1521 del_usage_links(mod);
1421 module_remove_modinfo_attrs(mod); 1522 module_remove_modinfo_attrs(mod);
1422 module_param_sysfs_remove(mod); 1523 module_param_sysfs_remove(mod);
1423 kobject_put(mod->mkobj.drivers_dir); 1524 kobject_put(mod->mkobj.drivers_dir);
@@ -1436,13 +1537,15 @@ static int __unlink_module(void *_mod)
1436 return 0; 1537 return 0;
1437} 1538}
1438 1539
1439/* Free a module, remove from lists, etc (must hold module_mutex). */ 1540/* Free a module, remove from lists, etc. */
1440static void free_module(struct module *mod) 1541static void free_module(struct module *mod)
1441{ 1542{
1442 trace_module_free(mod); 1543 trace_module_free(mod);
1443 1544
1444 /* Delete from various lists */ 1545 /* Delete from various lists */
1546 mutex_lock(&module_mutex);
1445 stop_machine(__unlink_module, mod, NULL); 1547 stop_machine(__unlink_module, mod, NULL);
1548 mutex_unlock(&module_mutex);
1446 remove_notes_attrs(mod); 1549 remove_notes_attrs(mod);
1447 remove_sect_attrs(mod); 1550 remove_sect_attrs(mod);
1448 mod_kobject_remove(mod); 1551 mod_kobject_remove(mod);
@@ -1493,6 +1596,8 @@ EXPORT_SYMBOL_GPL(__symbol_get);
1493/* 1596/*
1494 * Ensure that an exported symbol [global namespace] does not already exist 1597 * Ensure that an exported symbol [global namespace] does not already exist
1495 * in the kernel or in some other module's exported symbol table. 1598 * in the kernel or in some other module's exported symbol table.
1599 *
1600 * You must hold the module_mutex.
1496 */ 1601 */
1497static int verify_export_symbols(struct module *mod) 1602static int verify_export_symbols(struct module *mod)
1498{ 1603{
@@ -1558,21 +1663,23 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
1558 break; 1663 break;
1559 1664
1560 case SHN_UNDEF: 1665 case SHN_UNDEF:
1561 ksym = resolve_symbol(sechdrs, versindex, 1666 ksym = resolve_symbol_wait(sechdrs, versindex,
1562 strtab + sym[i].st_name, mod); 1667 strtab + sym[i].st_name,
1668 mod);
1563 /* Ok if resolved. */ 1669 /* Ok if resolved. */
1564 if (ksym) { 1670 if (ksym && !IS_ERR(ksym)) {
1565 sym[i].st_value = ksym->value; 1671 sym[i].st_value = ksym->value;
1566 break; 1672 break;
1567 } 1673 }
1568 1674
1569 /* Ok if weak. */ 1675 /* Ok if weak. */
1570 if (ELF_ST_BIND(sym[i].st_info) == STB_WEAK) 1676 if (!ksym && ELF_ST_BIND(sym[i].st_info) == STB_WEAK)
1571 break; 1677 break;
1572 1678
1573 printk(KERN_WARNING "%s: Unknown symbol %s\n", 1679 printk(KERN_WARNING "%s: Unknown symbol %s (err %li)\n",
1574 mod->name, strtab + sym[i].st_name); 1680 mod->name, strtab + sym[i].st_name,
1575 ret = -ENOENT; 1681 PTR_ERR(ksym));
1682 ret = PTR_ERR(ksym) ?: -ENOENT;
1576 break; 1683 break;
1577 1684
1578 default: 1685 default:
@@ -1960,11 +2067,13 @@ static void *module_alloc_update_bounds(unsigned long size)
1960 void *ret = module_alloc(size); 2067 void *ret = module_alloc(size);
1961 2068
1962 if (ret) { 2069 if (ret) {
2070 mutex_lock(&module_mutex);
1963 /* Update module bounds. */ 2071 /* Update module bounds. */
1964 if ((unsigned long)ret < module_addr_min) 2072 if ((unsigned long)ret < module_addr_min)
1965 module_addr_min = (unsigned long)ret; 2073 module_addr_min = (unsigned long)ret;
1966 if ((unsigned long)ret + size > module_addr_max) 2074 if ((unsigned long)ret + size > module_addr_max)
1967 module_addr_max = (unsigned long)ret + size; 2075 module_addr_max = (unsigned long)ret + size;
2076 mutex_unlock(&module_mutex);
1968 } 2077 }
1969 return ret; 2078 return ret;
1970} 2079}
@@ -2014,6 +2123,7 @@ static noinline struct module *load_module(void __user *umod,
2014 long err = 0; 2123 long err = 0;
2015 void *ptr = NULL; /* Stops spurious gcc warning */ 2124 void *ptr = NULL; /* Stops spurious gcc warning */
2016 unsigned long symoffs, stroffs, *strmap; 2125 unsigned long symoffs, stroffs, *strmap;
2126 void __percpu *percpu;
2017 2127
2018 mm_segment_t old_fs; 2128 mm_segment_t old_fs;
2019 2129
@@ -2138,11 +2248,6 @@ static noinline struct module *load_module(void __user *umod,
2138 goto free_mod; 2248 goto free_mod;
2139 } 2249 }
2140 2250
2141 if (find_module(mod->name)) {
2142 err = -EEXIST;
2143 goto free_mod;
2144 }
2145
2146 mod->state = MODULE_STATE_COMING; 2251 mod->state = MODULE_STATE_COMING;
2147 2252
2148 /* Allow arches to frob section contents and sizes. */ 2253 /* Allow arches to frob section contents and sizes. */
@@ -2158,6 +2263,8 @@ static noinline struct module *load_module(void __user *umod,
2158 goto free_mod; 2263 goto free_mod;
2159 sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; 2264 sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
2160 } 2265 }
2266 /* Keep this around for failure path. */
2267 percpu = mod_percpu(mod);
2161 2268
2162 /* Determine total sizes, and put offsets in sh_entsize. For now 2269 /* Determine total sizes, and put offsets in sh_entsize. For now
2163 this is done generically; there doesn't appear to be any 2270 this is done generically; there doesn't appear to be any
@@ -2231,11 +2338,6 @@ static noinline struct module *load_module(void __user *umod,
2231 /* Now we've moved module, initialize linked lists, etc. */ 2338 /* Now we've moved module, initialize linked lists, etc. */
2232 module_unload_init(mod); 2339 module_unload_init(mod);
2233 2340
2234 /* add kobject, so we can reference it. */
2235 err = mod_sysfs_init(mod);
2236 if (err)
2237 goto free_unload;
2238
2239 /* Set up license info based on the info section */ 2341 /* Set up license info based on the info section */
2240 set_license(mod, get_modinfo(sechdrs, infoindex, "license")); 2342 set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
2241 2343
@@ -2360,11 +2462,6 @@ static noinline struct module *load_module(void __user *umod,
2360 goto cleanup; 2462 goto cleanup;
2361 } 2463 }
2362 2464
2363 /* Find duplicate symbols */
2364 err = verify_export_symbols(mod);
2365 if (err < 0)
2366 goto cleanup;
2367
2368 /* Set up and sort exception table */ 2465 /* Set up and sort exception table */
2369 mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table", 2466 mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table",
2370 sizeof(*mod->extable), &mod->num_exentries); 2467 sizeof(*mod->extable), &mod->num_exentries);
@@ -2423,7 +2520,19 @@ static noinline struct module *load_module(void __user *umod,
2423 * function to insert in a way safe to concurrent readers. 2520 * function to insert in a way safe to concurrent readers.
2424 * The mutex protects against concurrent writers. 2521 * The mutex protects against concurrent writers.
2425 */ 2522 */
2523 mutex_lock(&module_mutex);
2524 if (find_module(mod->name)) {
2525 err = -EEXIST;
2526 goto unlock;
2527 }
2528
2529 /* Find duplicate symbols */
2530 err = verify_export_symbols(mod);
2531 if (err < 0)
2532 goto unlock;
2533
2426 list_add_rcu(&mod->list, &modules); 2534 list_add_rcu(&mod->list, &modules);
2535 mutex_unlock(&module_mutex);
2427 2536
2428 err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL); 2537 err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL);
2429 if (err < 0) 2538 if (err < 0)
@@ -2432,6 +2541,7 @@ static noinline struct module *load_module(void __user *umod,
2432 err = mod_sysfs_setup(mod, mod->kp, mod->num_kp); 2541 err = mod_sysfs_setup(mod, mod->kp, mod->num_kp);
2433 if (err < 0) 2542 if (err < 0)
2434 goto unlink; 2543 goto unlink;
2544
2435 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); 2545 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
2436 add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); 2546 add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
2437 2547
@@ -2444,15 +2554,15 @@ static noinline struct module *load_module(void __user *umod,
2444 return mod; 2554 return mod;
2445 2555
2446 unlink: 2556 unlink:
2557 mutex_lock(&module_mutex);
2447 /* Unlink carefully: kallsyms could be walking list. */ 2558 /* Unlink carefully: kallsyms could be walking list. */
2448 list_del_rcu(&mod->list); 2559 list_del_rcu(&mod->list);
2560 unlock:
2561 mutex_unlock(&module_mutex);
2449 synchronize_sched(); 2562 synchronize_sched();
2450 module_arch_cleanup(mod); 2563 module_arch_cleanup(mod);
2451 cleanup: 2564 cleanup:
2452 free_modinfo(mod); 2565 free_modinfo(mod);
2453 kobject_del(&mod->mkobj.kobj);
2454 kobject_put(&mod->mkobj.kobj);
2455 free_unload:
2456 module_unload_free(mod); 2566 module_unload_free(mod);
2457#if defined(CONFIG_MODULE_UNLOAD) 2567#if defined(CONFIG_MODULE_UNLOAD)
2458 free_percpu(mod->refptr); 2568 free_percpu(mod->refptr);
@@ -2463,7 +2573,7 @@ static noinline struct module *load_module(void __user *umod,
2463 module_free(mod, mod->module_core); 2573 module_free(mod, mod->module_core);
2464 /* mod will be freed with core. Don't access it beyond this line! */ 2574 /* mod will be freed with core. Don't access it beyond this line! */
2465 free_percpu: 2575 free_percpu:
2466 percpu_modfree(mod); 2576 free_percpu(percpu);
2467 free_mod: 2577 free_mod:
2468 kfree(args); 2578 kfree(args);
2469 kfree(strmap); 2579 kfree(strmap);
@@ -2499,19 +2609,10 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
2499 if (!capable(CAP_SYS_MODULE) || modules_disabled) 2609 if (!capable(CAP_SYS_MODULE) || modules_disabled)
2500 return -EPERM; 2610 return -EPERM;
2501 2611
2502 /* Only one module load at a time, please */
2503 if (mutex_lock_interruptible(&module_mutex) != 0)
2504 return -EINTR;
2505
2506 /* Do all the hard work */ 2612 /* Do all the hard work */
2507 mod = load_module(umod, len, uargs); 2613 mod = load_module(umod, len, uargs);
2508 if (IS_ERR(mod)) { 2614 if (IS_ERR(mod))
2509 mutex_unlock(&module_mutex);
2510 return PTR_ERR(mod); 2615 return PTR_ERR(mod);
2511 }
2512
2513 /* Drop lock so they can recurse */
2514 mutex_unlock(&module_mutex);
2515 2616
2516 blocking_notifier_call_chain(&module_notify_list, 2617 blocking_notifier_call_chain(&module_notify_list,
2517 MODULE_STATE_COMING, mod); 2618 MODULE_STATE_COMING, mod);
@@ -2528,9 +2629,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
2528 module_put(mod); 2629 module_put(mod);
2529 blocking_notifier_call_chain(&module_notify_list, 2630 blocking_notifier_call_chain(&module_notify_list,
2530 MODULE_STATE_GOING, mod); 2631 MODULE_STATE_GOING, mod);
2531 mutex_lock(&module_mutex);
2532 free_module(mod); 2632 free_module(mod);
2533 mutex_unlock(&module_mutex);
2534 wake_up(&module_wq); 2633 wake_up(&module_wq);
2535 return ret; 2634 return ret;
2536 } 2635 }
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 5c36ea9d55d2..ca6066a6952e 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -99,9 +99,13 @@ config PM_SLEEP_ADVANCED_DEBUG
99 depends on PM_ADVANCED_DEBUG 99 depends on PM_ADVANCED_DEBUG
100 default n 100 default n
101 101
102config SUSPEND_NVS
103 bool
104
102config SUSPEND 105config SUSPEND
103 bool "Suspend to RAM and standby" 106 bool "Suspend to RAM and standby"
104 depends on PM && ARCH_SUSPEND_POSSIBLE 107 depends on PM && ARCH_SUSPEND_POSSIBLE
108 select SUSPEND_NVS if HAS_IOMEM
105 default y 109 default y
106 ---help--- 110 ---help---
107 Allow the system to enter sleep states in which main memory is 111 Allow the system to enter sleep states in which main memory is
@@ -130,13 +134,10 @@ config SUSPEND_FREEZER
130 134
131 Turning OFF this setting is NOT recommended! If in doubt, say Y. 135 Turning OFF this setting is NOT recommended! If in doubt, say Y.
132 136
133config HIBERNATION_NVS
134 bool
135
136config HIBERNATION 137config HIBERNATION
137 bool "Hibernation (aka 'suspend to disk')" 138 bool "Hibernation (aka 'suspend to disk')"
138 depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE 139 depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE
139 select HIBERNATION_NVS if HAS_IOMEM 140 select SUSPEND_NVS if HAS_IOMEM
140 ---help--- 141 ---help---
141 Enable the suspend to disk (STD) functionality, which is usually 142 Enable the suspend to disk (STD) functionality, which is usually
142 called "hibernation" in user interfaces. STD checkpoints the 143 called "hibernation" in user interfaces. STD checkpoints the
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 524e058dcf06..f9063c6b185d 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -10,6 +10,6 @@ obj-$(CONFIG_SUSPEND) += suspend.o
10obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o 10obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o
11obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ 11obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \
12 block_io.o 12 block_io.o
13obj-$(CONFIG_HIBERNATION_NVS) += hibernate_nvs.o 13obj-$(CONFIG_SUSPEND_NVS) += nvs.o
14 14
15obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o 15obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/hibernate_nvs.c b/kernel/power/nvs.c
index fdcad9ed5a7b..1836db60bbb6 100644
--- a/kernel/power/hibernate_nvs.c
+++ b/kernel/power/nvs.c
@@ -15,7 +15,7 @@
15 15
16/* 16/*
17 * Platforms, like ACPI, may want us to save some memory used by them during 17 * Platforms, like ACPI, may want us to save some memory used by them during
18 * hibernation and to restore the contents of this memory during the subsequent 18 * suspend and to restore the contents of this memory during the subsequent
19 * resume. The code below implements a mechanism allowing us to do that. 19 * resume. The code below implements a mechanism allowing us to do that.
20 */ 20 */
21 21
@@ -30,7 +30,7 @@ struct nvs_page {
30static LIST_HEAD(nvs_list); 30static LIST_HEAD(nvs_list);
31 31
32/** 32/**
33 * hibernate_nvs_register - register platform NVS memory region to save 33 * suspend_nvs_register - register platform NVS memory region to save
34 * @start - physical address of the region 34 * @start - physical address of the region
35 * @size - size of the region 35 * @size - size of the region
36 * 36 *
@@ -38,7 +38,7 @@ static LIST_HEAD(nvs_list);
38 * things so that the data from page-aligned addresses in this region will 38 * things so that the data from page-aligned addresses in this region will
39 * be copied into separate RAM pages. 39 * be copied into separate RAM pages.
40 */ 40 */
41int hibernate_nvs_register(unsigned long start, unsigned long size) 41int suspend_nvs_register(unsigned long start, unsigned long size)
42{ 42{
43 struct nvs_page *entry, *next; 43 struct nvs_page *entry, *next;
44 44
@@ -68,9 +68,9 @@ int hibernate_nvs_register(unsigned long start, unsigned long size)
68} 68}
69 69
70/** 70/**
71 * hibernate_nvs_free - free data pages allocated for saving NVS regions 71 * suspend_nvs_free - free data pages allocated for saving NVS regions
72 */ 72 */
73void hibernate_nvs_free(void) 73void suspend_nvs_free(void)
74{ 74{
75 struct nvs_page *entry; 75 struct nvs_page *entry;
76 76
@@ -86,16 +86,16 @@ void hibernate_nvs_free(void)
86} 86}
87 87
88/** 88/**
89 * hibernate_nvs_alloc - allocate memory necessary for saving NVS regions 89 * suspend_nvs_alloc - allocate memory necessary for saving NVS regions
90 */ 90 */
91int hibernate_nvs_alloc(void) 91int suspend_nvs_alloc(void)
92{ 92{
93 struct nvs_page *entry; 93 struct nvs_page *entry;
94 94
95 list_for_each_entry(entry, &nvs_list, node) { 95 list_for_each_entry(entry, &nvs_list, node) {
96 entry->data = (void *)__get_free_page(GFP_KERNEL); 96 entry->data = (void *)__get_free_page(GFP_KERNEL);
97 if (!entry->data) { 97 if (!entry->data) {
98 hibernate_nvs_free(); 98 suspend_nvs_free();
99 return -ENOMEM; 99 return -ENOMEM;
100 } 100 }
101 } 101 }
@@ -103,9 +103,9 @@ int hibernate_nvs_alloc(void)
103} 103}
104 104
105/** 105/**
106 * hibernate_nvs_save - save NVS memory regions 106 * suspend_nvs_save - save NVS memory regions
107 */ 107 */
108void hibernate_nvs_save(void) 108void suspend_nvs_save(void)
109{ 109{
110 struct nvs_page *entry; 110 struct nvs_page *entry;
111 111
@@ -119,12 +119,12 @@ void hibernate_nvs_save(void)
119} 119}
120 120
121/** 121/**
122 * hibernate_nvs_restore - restore NVS memory regions 122 * suspend_nvs_restore - restore NVS memory regions
123 * 123 *
124 * This function is going to be called with interrupts disabled, so it 124 * This function is going to be called with interrupts disabled, so it
125 * cannot iounmap the virtual addresses used to access the NVS region. 125 * cannot iounmap the virtual addresses used to access the NVS region.
126 */ 126 */
127void hibernate_nvs_restore(void) 127void suspend_nvs_restore(void)
128{ 128{
129 struct nvs_page *entry; 129 struct nvs_page *entry;
130 130
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 56e7dbb8b996..f37cb7dd4402 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -16,6 +16,12 @@
16#include <linux/cpu.h> 16#include <linux/cpu.h>
17#include <linux/syscalls.h> 17#include <linux/syscalls.h>
18#include <linux/gfp.h> 18#include <linux/gfp.h>
19#include <linux/io.h>
20#include <linux/kernel.h>
21#include <linux/list.h>
22#include <linux/mm.h>
23#include <linux/slab.h>
24#include <linux/suspend.h>
19 25
20#include "power.h" 26#include "power.h"
21 27
diff --git a/kernel/sched.c b/kernel/sched.c
index d48408142503..cb816e36cc8b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -306,52 +306,6 @@ static int init_task_group_load = INIT_TASK_GROUP_LOAD;
306 */ 306 */
307struct task_group init_task_group; 307struct task_group init_task_group;
308 308
309/* return group to which a task belongs */
310static inline struct task_group *task_group(struct task_struct *p)
311{
312 struct task_group *tg;
313
314#ifdef CONFIG_CGROUP_SCHED
315 tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
316 struct task_group, css);
317#else
318 tg = &init_task_group;
319#endif
320 return tg;
321}
322
323/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
324static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
325{
326 /*
327 * Strictly speaking this rcu_read_lock() is not needed since the
328 * task_group is tied to the cgroup, which in turn can never go away
329 * as long as there are tasks attached to it.
330 *
331 * However since task_group() uses task_subsys_state() which is an
332 * rcu_dereference() user, this quiets CONFIG_PROVE_RCU.
333 */
334 rcu_read_lock();
335#ifdef CONFIG_FAIR_GROUP_SCHED
336 p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
337 p->se.parent = task_group(p)->se[cpu];
338#endif
339
340#ifdef CONFIG_RT_GROUP_SCHED
341 p->rt.rt_rq = task_group(p)->rt_rq[cpu];
342 p->rt.parent = task_group(p)->rt_se[cpu];
343#endif
344 rcu_read_unlock();
345}
346
347#else
348
349static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
350static inline struct task_group *task_group(struct task_struct *p)
351{
352 return NULL;
353}
354
355#endif /* CONFIG_CGROUP_SCHED */ 309#endif /* CONFIG_CGROUP_SCHED */
356 310
357/* CFS-related fields in a runqueue */ 311/* CFS-related fields in a runqueue */
@@ -544,6 +498,8 @@ struct rq {
544 struct root_domain *rd; 498 struct root_domain *rd;
545 struct sched_domain *sd; 499 struct sched_domain *sd;
546 500
501 unsigned long cpu_power;
502
547 unsigned char idle_at_tick; 503 unsigned char idle_at_tick;
548 /* For active balancing */ 504 /* For active balancing */
549 int post_schedule; 505 int post_schedule;
@@ -642,6 +598,49 @@ static inline int cpu_of(struct rq *rq)
642#define cpu_curr(cpu) (cpu_rq(cpu)->curr) 598#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
643#define raw_rq() (&__raw_get_cpu_var(runqueues)) 599#define raw_rq() (&__raw_get_cpu_var(runqueues))
644 600
601#ifdef CONFIG_CGROUP_SCHED
602
603/*
604 * Return the group to which this tasks belongs.
605 *
606 * We use task_subsys_state_check() and extend the RCU verification
607 * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach()
608 * holds that lock for each task it moves into the cgroup. Therefore
609 * by holding that lock, we pin the task to the current cgroup.
610 */
611static inline struct task_group *task_group(struct task_struct *p)
612{
613 struct cgroup_subsys_state *css;
614
615 css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
616 lockdep_is_held(&task_rq(p)->lock));
617 return container_of(css, struct task_group, css);
618}
619
620/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
621static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
622{
623#ifdef CONFIG_FAIR_GROUP_SCHED
624 p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
625 p->se.parent = task_group(p)->se[cpu];
626#endif
627
628#ifdef CONFIG_RT_GROUP_SCHED
629 p->rt.rt_rq = task_group(p)->rt_rq[cpu];
630 p->rt.parent = task_group(p)->rt_se[cpu];
631#endif
632}
633
634#else /* CONFIG_CGROUP_SCHED */
635
636static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
637static inline struct task_group *task_group(struct task_struct *p)
638{
639 return NULL;
640}
641
642#endif /* CONFIG_CGROUP_SCHED */
643
645inline void update_rq_clock(struct rq *rq) 644inline void update_rq_clock(struct rq *rq)
646{ 645{
647 if (!rq->skip_clock_update) 646 if (!rq->skip_clock_update)
@@ -1255,6 +1254,12 @@ static void sched_avg_update(struct rq *rq)
1255 s64 period = sched_avg_period(); 1254 s64 period = sched_avg_period();
1256 1255
1257 while ((s64)(rq->clock - rq->age_stamp) > period) { 1256 while ((s64)(rq->clock - rq->age_stamp) > period) {
1257 /*
1258 * Inline assembly required to prevent the compiler
1259 * optimising this loop into a divmod call.
1260 * See __iter_div_u64_rem() for another example of this.
1261 */
1262 asm("" : "+rm" (rq->age_stamp));
1258 rq->age_stamp += period; 1263 rq->age_stamp += period;
1259 rq->rt_avg /= 2; 1264 rq->rt_avg /= 2;
1260 } 1265 }
@@ -1499,24 +1504,9 @@ static unsigned long target_load(int cpu, int type)
1499 return max(rq->cpu_load[type-1], total); 1504 return max(rq->cpu_load[type-1], total);
1500} 1505}
1501 1506
1502static struct sched_group *group_of(int cpu)
1503{
1504 struct sched_domain *sd = rcu_dereference_sched(cpu_rq(cpu)->sd);
1505
1506 if (!sd)
1507 return NULL;
1508
1509 return sd->groups;
1510}
1511
1512static unsigned long power_of(int cpu) 1507static unsigned long power_of(int cpu)
1513{ 1508{
1514 struct sched_group *group = group_of(cpu); 1509 return cpu_rq(cpu)->cpu_power;
1515
1516 if (!group)
1517 return SCHED_LOAD_SCALE;
1518
1519 return group->cpu_power;
1520} 1510}
1521 1511
1522static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); 1512static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
@@ -1673,9 +1663,6 @@ static void update_shares(struct sched_domain *sd)
1673 1663
1674static void update_h_load(long cpu) 1664static void update_h_load(long cpu)
1675{ 1665{
1676 if (root_task_group_empty())
1677 return;
1678
1679 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); 1666 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
1680} 1667}
1681 1668
@@ -1854,8 +1841,8 @@ static void dec_nr_running(struct rq *rq)
1854static void set_load_weight(struct task_struct *p) 1841static void set_load_weight(struct task_struct *p)
1855{ 1842{
1856 if (task_has_rt_policy(p)) { 1843 if (task_has_rt_policy(p)) {
1857 p->se.load.weight = prio_to_weight[0] * 2; 1844 p->se.load.weight = 0;
1858 p->se.load.inv_weight = prio_to_wmult[0] >> 1; 1845 p->se.load.inv_weight = WMULT_CONST;
1859 return; 1846 return;
1860 } 1847 }
1861 1848
@@ -2507,7 +2494,16 @@ void sched_fork(struct task_struct *p, int clone_flags)
2507 if (p->sched_class->task_fork) 2494 if (p->sched_class->task_fork)
2508 p->sched_class->task_fork(p); 2495 p->sched_class->task_fork(p);
2509 2496
2497 /*
2498 * The child is not yet in the pid-hash so no cgroup attach races,
2499 * and the cgroup is pinned to this child due to cgroup_fork()
2500 * is ran before sched_fork().
2501 *
2502 * Silence PROVE_RCU.
2503 */
2504 rcu_read_lock();
2510 set_task_cpu(p, cpu); 2505 set_task_cpu(p, cpu);
2506 rcu_read_unlock();
2511 2507
2512#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 2508#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
2513 if (likely(sched_info_on())) 2509 if (likely(sched_info_on()))
@@ -4478,16 +4474,6 @@ recheck:
4478 } 4474 }
4479 4475
4480 if (user) { 4476 if (user) {
4481#ifdef CONFIG_RT_GROUP_SCHED
4482 /*
4483 * Do not allow realtime tasks into groups that have no runtime
4484 * assigned.
4485 */
4486 if (rt_bandwidth_enabled() && rt_policy(policy) &&
4487 task_group(p)->rt_bandwidth.rt_runtime == 0)
4488 return -EPERM;
4489#endif
4490
4491 retval = security_task_setscheduler(p, policy, param); 4477 retval = security_task_setscheduler(p, policy, param);
4492 if (retval) 4478 if (retval)
4493 return retval; 4479 return retval;
@@ -4503,6 +4489,22 @@ recheck:
4503 * runqueue lock must be held. 4489 * runqueue lock must be held.
4504 */ 4490 */
4505 rq = __task_rq_lock(p); 4491 rq = __task_rq_lock(p);
4492
4493#ifdef CONFIG_RT_GROUP_SCHED
4494 if (user) {
4495 /*
4496 * Do not allow realtime tasks into groups that have no runtime
4497 * assigned.
4498 */
4499 if (rt_bandwidth_enabled() && rt_policy(policy) &&
4500 task_group(p)->rt_bandwidth.rt_runtime == 0) {
4501 __task_rq_unlock(rq);
4502 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4503 return -EPERM;
4504 }
4505 }
4506#endif
4507
4506 /* recheck policy now with rq lock held */ 4508 /* recheck policy now with rq lock held */
4507 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { 4509 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
4508 policy = oldpolicy = -1; 4510 policy = oldpolicy = -1;
@@ -7605,6 +7607,7 @@ void __init sched_init(void)
7605#ifdef CONFIG_SMP 7607#ifdef CONFIG_SMP
7606 rq->sd = NULL; 7608 rq->sd = NULL;
7607 rq->rd = NULL; 7609 rq->rd = NULL;
7610 rq->cpu_power = SCHED_LOAD_SCALE;
7608 rq->post_schedule = 0; 7611 rq->post_schedule = 0;
7609 rq->active_balance = 0; 7612 rq->active_balance = 0;
7610 rq->next_balance = jiffies; 7613 rq->next_balance = jiffies;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 217e4a9393e4..a878b5332daa 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1225,7 +1225,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1225 unsigned long this_load, load; 1225 unsigned long this_load, load;
1226 int idx, this_cpu, prev_cpu; 1226 int idx, this_cpu, prev_cpu;
1227 unsigned long tl_per_task; 1227 unsigned long tl_per_task;
1228 unsigned int imbalance;
1229 struct task_group *tg; 1228 struct task_group *tg;
1230 unsigned long weight; 1229 unsigned long weight;
1231 int balanced; 1230 int balanced;
@@ -1241,6 +1240,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1241 * effect of the currently running task from the load 1240 * effect of the currently running task from the load
1242 * of the current CPU: 1241 * of the current CPU:
1243 */ 1242 */
1243 rcu_read_lock();
1244 if (sync) { 1244 if (sync) {
1245 tg = task_group(current); 1245 tg = task_group(current);
1246 weight = current->se.load.weight; 1246 weight = current->se.load.weight;
@@ -1252,8 +1252,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1252 tg = task_group(p); 1252 tg = task_group(p);
1253 weight = p->se.load.weight; 1253 weight = p->se.load.weight;
1254 1254
1255 imbalance = 100 + (sd->imbalance_pct - 100) / 2;
1256
1257 /* 1255 /*
1258 * In low-load situations, where prev_cpu is idle and this_cpu is idle 1256 * In low-load situations, where prev_cpu is idle and this_cpu is idle
1259 * due to the sync cause above having dropped this_load to 0, we'll 1257 * due to the sync cause above having dropped this_load to 0, we'll
@@ -1263,9 +1261,22 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1263 * Otherwise check if either cpus are near enough in load to allow this 1261 * Otherwise check if either cpus are near enough in load to allow this
1264 * task to be woken on this_cpu. 1262 * task to be woken on this_cpu.
1265 */ 1263 */
1266 balanced = !this_load || 1264 if (this_load) {
1267 100*(this_load + effective_load(tg, this_cpu, weight, weight)) <= 1265 unsigned long this_eff_load, prev_eff_load;
1268 imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); 1266
1267 this_eff_load = 100;
1268 this_eff_load *= power_of(prev_cpu);
1269 this_eff_load *= this_load +
1270 effective_load(tg, this_cpu, weight, weight);
1271
1272 prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
1273 prev_eff_load *= power_of(this_cpu);
1274 prev_eff_load *= load + effective_load(tg, prev_cpu, 0, weight);
1275
1276 balanced = this_eff_load <= prev_eff_load;
1277 } else
1278 balanced = true;
1279 rcu_read_unlock();
1269 1280
1270 /* 1281 /*
1271 * If the currently running task will sleep within 1282 * If the currently running task will sleep within
@@ -2298,6 +2309,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
2298 if (!power) 2309 if (!power)
2299 power = 1; 2310 power = 1;
2300 2311
2312 cpu_rq(cpu)->cpu_power = power;
2301 sdg->cpu_power = power; 2313 sdg->cpu_power = power;
2302} 2314}
2303 2315
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 825e1126008f..07b4f1b1a73a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -850,7 +850,7 @@ static __init int spawn_ksoftirqd(void)
850 void *cpu = (void *)(long)smp_processor_id(); 850 void *cpu = (void *)(long)smp_processor_id();
851 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); 851 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
852 852
853 BUG_ON(err == NOTIFY_BAD); 853 BUG_ON(err != NOTIFY_OK);
854 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); 854 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
855 register_cpu_notifier(&cpu_nfb); 855 register_cpu_notifier(&cpu_nfb);
856 return 0; 856 return 0;
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index b4e7431e7c78..70f8d90331e9 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -321,7 +321,7 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
321 321
322#ifdef CONFIG_HOTPLUG_CPU 322#ifdef CONFIG_HOTPLUG_CPU
323 case CPU_UP_CANCELED: 323 case CPU_UP_CANCELED:
324 case CPU_DEAD: 324 case CPU_POST_DEAD:
325 { 325 {
326 struct cpu_stop_work *work; 326 struct cpu_stop_work *work;
327 327
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 997080f00e0b..d24f761f4876 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1471,12 +1471,12 @@ static struct ctl_table fs_table[] = {
1471 }, 1471 },
1472#endif 1472#endif
1473 { 1473 {
1474 .procname = "pipe-max-pages", 1474 .procname = "pipe-max-size",
1475 .data = &pipe_max_pages, 1475 .data = &pipe_max_size,
1476 .maxlen = sizeof(int), 1476 .maxlen = sizeof(int),
1477 .mode = 0644, 1477 .mode = 0644,
1478 .proc_handler = &proc_dointvec_minmax, 1478 .proc_handler = &pipe_proc_fn,
1479 .extra1 = &two, 1479 .extra1 = &pipe_min_size,
1480 }, 1480 },
1481/* 1481/*
1482 * NOTE: do not add new entries to this table unless you have read 1482 * NOTE: do not add new entries to this table unless you have read
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1d7b9bc1c034..783fbadf2202 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -315,9 +315,6 @@ void tick_nohz_stop_sched_tick(int inidle)
315 goto end; 315 goto end;
316 } 316 }
317 317
318 if (nohz_ratelimit(cpu))
319 goto end;
320
321 ts->idle_calls++; 318 ts->idle_calls++;
322 /* Read jiffies and the time when jiffies were updated last */ 319 /* Read jiffies and the time when jiffies were updated last */
323 do { 320 do {
@@ -328,7 +325,7 @@ void tick_nohz_stop_sched_tick(int inidle)
328 } while (read_seqretry(&xtime_lock, seq)); 325 } while (read_seqretry(&xtime_lock, seq));
329 326
330 if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || 327 if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
331 arch_needs_cpu(cpu)) { 328 arch_needs_cpu(cpu) || nohz_ratelimit(cpu)) {
332 next_jiffies = last_jiffies + 1; 329 next_jiffies = last_jiffies + 1;
333 delta_jiffies = 1; 330 delta_jiffies = 1;
334 } else { 331 } else {
diff --git a/kernel/timer.c b/kernel/timer.c
index 2454172a80d3..ee305c8d4e18 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1717,7 +1717,7 @@ void __init init_timers(void)
1717 1717
1718 init_timer_stats(); 1718 init_timer_stats();
1719 1719
1720 BUG_ON(err == NOTIFY_BAD); 1720 BUG_ON(err != NOTIFY_OK);
1721 register_cpu_notifier(&timers_nb); 1721 register_cpu_notifier(&timers_nb);
1722 open_softirq(TIMER_SOFTIRQ, run_timer_softirq); 1722 open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
1723} 1723}