aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit_tree.c48
-rw-r--r--kernel/irq/chip.c3
-rw-r--r--kernel/module.c284
-rw-r--r--kernel/panic.c17
-rw-r--r--kernel/params.c274
-rw-r--r--kernel/rcupdate.c19
-rw-r--r--kernel/sched.c51
-rw-r--r--kernel/sched_fair.c62
-rw-r--r--kernel/sched_features.h2
-rw-r--r--kernel/sched_stats.h2
-rw-r--r--kernel/stop_machine.c120
-rw-r--r--kernel/sysctl.c10
-rw-r--r--kernel/time/tick-sched.c12
-rw-r--r--kernel/workqueue.c7
14 files changed, 449 insertions, 462 deletions
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index f7921a2ecf16..8ba0e0d934f2 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -532,7 +532,7 @@ void audit_trim_trees(void)
532 list_add(&cursor, &tree_list); 532 list_add(&cursor, &tree_list);
533 while (cursor.next != &tree_list) { 533 while (cursor.next != &tree_list) {
534 struct audit_tree *tree; 534 struct audit_tree *tree;
535 struct nameidata nd; 535 struct path path;
536 struct vfsmount *root_mnt; 536 struct vfsmount *root_mnt;
537 struct node *node; 537 struct node *node;
538 struct list_head list; 538 struct list_head list;
@@ -544,12 +544,12 @@ void audit_trim_trees(void)
544 list_add(&cursor, &tree->list); 544 list_add(&cursor, &tree->list);
545 mutex_unlock(&audit_filter_mutex); 545 mutex_unlock(&audit_filter_mutex);
546 546
547 err = path_lookup(tree->pathname, 0, &nd); 547 err = kern_path(tree->pathname, 0, &path);
548 if (err) 548 if (err)
549 goto skip_it; 549 goto skip_it;
550 550
551 root_mnt = collect_mounts(nd.path.mnt, nd.path.dentry); 551 root_mnt = collect_mounts(path.mnt, path.dentry);
552 path_put(&nd.path); 552 path_put(&path);
553 if (!root_mnt) 553 if (!root_mnt)
554 goto skip_it; 554 goto skip_it;
555 555
@@ -580,19 +580,19 @@ skip_it:
580} 580}
581 581
582static int is_under(struct vfsmount *mnt, struct dentry *dentry, 582static int is_under(struct vfsmount *mnt, struct dentry *dentry,
583 struct nameidata *nd) 583 struct path *path)
584{ 584{
585 if (mnt != nd->path.mnt) { 585 if (mnt != path->mnt) {
586 for (;;) { 586 for (;;) {
587 if (mnt->mnt_parent == mnt) 587 if (mnt->mnt_parent == mnt)
588 return 0; 588 return 0;
589 if (mnt->mnt_parent == nd->path.mnt) 589 if (mnt->mnt_parent == path->mnt)
590 break; 590 break;
591 mnt = mnt->mnt_parent; 591 mnt = mnt->mnt_parent;
592 } 592 }
593 dentry = mnt->mnt_mountpoint; 593 dentry = mnt->mnt_mountpoint;
594 } 594 }
595 return is_subdir(dentry, nd->path.dentry); 595 return is_subdir(dentry, path->dentry);
596} 596}
597 597
598int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op) 598int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op)
@@ -618,7 +618,7 @@ void audit_put_tree(struct audit_tree *tree)
618int audit_add_tree_rule(struct audit_krule *rule) 618int audit_add_tree_rule(struct audit_krule *rule)
619{ 619{
620 struct audit_tree *seed = rule->tree, *tree; 620 struct audit_tree *seed = rule->tree, *tree;
621 struct nameidata nd; 621 struct path path;
622 struct vfsmount *mnt, *p; 622 struct vfsmount *mnt, *p;
623 struct list_head list; 623 struct list_head list;
624 int err; 624 int err;
@@ -637,11 +637,11 @@ int audit_add_tree_rule(struct audit_krule *rule)
637 /* do not set rule->tree yet */ 637 /* do not set rule->tree yet */
638 mutex_unlock(&audit_filter_mutex); 638 mutex_unlock(&audit_filter_mutex);
639 639
640 err = path_lookup(tree->pathname, 0, &nd); 640 err = kern_path(tree->pathname, 0, &path);
641 if (err) 641 if (err)
642 goto Err; 642 goto Err;
643 mnt = collect_mounts(nd.path.mnt, nd.path.dentry); 643 mnt = collect_mounts(path.mnt, path.dentry);
644 path_put(&nd.path); 644 path_put(&path);
645 if (!mnt) { 645 if (!mnt) {
646 err = -ENOMEM; 646 err = -ENOMEM;
647 goto Err; 647 goto Err;
@@ -690,29 +690,29 @@ int audit_tag_tree(char *old, char *new)
690{ 690{
691 struct list_head cursor, barrier; 691 struct list_head cursor, barrier;
692 int failed = 0; 692 int failed = 0;
693 struct nameidata nd; 693 struct path path;
694 struct vfsmount *tagged; 694 struct vfsmount *tagged;
695 struct list_head list; 695 struct list_head list;
696 struct vfsmount *mnt; 696 struct vfsmount *mnt;
697 struct dentry *dentry; 697 struct dentry *dentry;
698 int err; 698 int err;
699 699
700 err = path_lookup(new, 0, &nd); 700 err = kern_path(new, 0, &path);
701 if (err) 701 if (err)
702 return err; 702 return err;
703 tagged = collect_mounts(nd.path.mnt, nd.path.dentry); 703 tagged = collect_mounts(path.mnt, path.dentry);
704 path_put(&nd.path); 704 path_put(&path);
705 if (!tagged) 705 if (!tagged)
706 return -ENOMEM; 706 return -ENOMEM;
707 707
708 err = path_lookup(old, 0, &nd); 708 err = kern_path(old, 0, &path);
709 if (err) { 709 if (err) {
710 drop_collected_mounts(tagged); 710 drop_collected_mounts(tagged);
711 return err; 711 return err;
712 } 712 }
713 mnt = mntget(nd.path.mnt); 713 mnt = mntget(path.mnt);
714 dentry = dget(nd.path.dentry); 714 dentry = dget(path.dentry);
715 path_put(&nd.path); 715 path_put(&path);
716 716
717 if (dentry == tagged->mnt_root && dentry == mnt->mnt_root) 717 if (dentry == tagged->mnt_root && dentry == mnt->mnt_root)
718 follow_up(&mnt, &dentry); 718 follow_up(&mnt, &dentry);
@@ -733,7 +733,7 @@ int audit_tag_tree(char *old, char *new)
733 list_add(&cursor, &tree->list); 733 list_add(&cursor, &tree->list);
734 mutex_unlock(&audit_filter_mutex); 734 mutex_unlock(&audit_filter_mutex);
735 735
736 err = path_lookup(tree->pathname, 0, &nd); 736 err = kern_path(tree->pathname, 0, &path);
737 if (err) { 737 if (err) {
738 put_tree(tree); 738 put_tree(tree);
739 mutex_lock(&audit_filter_mutex); 739 mutex_lock(&audit_filter_mutex);
@@ -741,15 +741,15 @@ int audit_tag_tree(char *old, char *new)
741 } 741 }
742 742
743 spin_lock(&vfsmount_lock); 743 spin_lock(&vfsmount_lock);
744 if (!is_under(mnt, dentry, &nd)) { 744 if (!is_under(mnt, dentry, &path)) {
745 spin_unlock(&vfsmount_lock); 745 spin_unlock(&vfsmount_lock);
746 path_put(&nd.path); 746 path_put(&path);
747 put_tree(tree); 747 put_tree(tree);
748 mutex_lock(&audit_filter_mutex); 748 mutex_lock(&audit_filter_mutex);
749 continue; 749 continue;
750 } 750 }
751 spin_unlock(&vfsmount_lock); 751 spin_unlock(&vfsmount_lock);
752 path_put(&nd.path); 752 path_put(&path);
753 753
754 list_for_each_entry(p, &list, mnt_list) { 754 list_for_each_entry(p, &list, mnt_list) {
755 failed = tag_chunk(p->mnt_root->d_inode, tree); 755 failed = tag_chunk(p->mnt_root->d_inode, tree);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 4895fde4eb93..10b5092e9bfe 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -76,6 +76,7 @@ void dynamic_irq_cleanup(unsigned int irq)
76 desc->chip_data = NULL; 76 desc->chip_data = NULL;
77 desc->handle_irq = handle_bad_irq; 77 desc->handle_irq = handle_bad_irq;
78 desc->chip = &no_irq_chip; 78 desc->chip = &no_irq_chip;
79 desc->name = NULL;
79 spin_unlock_irqrestore(&desc->lock, flags); 80 spin_unlock_irqrestore(&desc->lock, flags);
80} 81}
81 82
@@ -127,7 +128,7 @@ int set_irq_type(unsigned int irq, unsigned int type)
127 return 0; 128 return 0;
128 129
129 spin_lock_irqsave(&desc->lock, flags); 130 spin_lock_irqsave(&desc->lock, flags);
130 ret = __irq_set_trigger(desc, irq, flags); 131 ret = __irq_set_trigger(desc, irq, type);
131 spin_unlock_irqrestore(&desc->lock, flags); 132 spin_unlock_irqrestore(&desc->lock, flags);
132 return ret; 133 return ret;
133} 134}
diff --git a/kernel/module.c b/kernel/module.c
index 0d8d21ee792c..c0f1826e2d9e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -42,6 +42,7 @@
42#include <linux/string.h> 42#include <linux/string.h>
43#include <linux/mutex.h> 43#include <linux/mutex.h>
44#include <linux/unwind.h> 44#include <linux/unwind.h>
45#include <linux/rculist.h>
45#include <asm/uaccess.h> 46#include <asm/uaccess.h>
46#include <asm/cacheflush.h> 47#include <asm/cacheflush.h>
47#include <linux/license.h> 48#include <linux/license.h>
@@ -63,7 +64,7 @@
63#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) 64#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
64 65
65/* List of modules, protected by module_mutex or preempt_disable 66/* List of modules, protected by module_mutex or preempt_disable
66 * (add/delete uses stop_machine). */ 67 * (delete uses stop_machine/add uses RCU list operations). */
67static DEFINE_MUTEX(module_mutex); 68static DEFINE_MUTEX(module_mutex);
68static LIST_HEAD(modules); 69static LIST_HEAD(modules);
69 70
@@ -132,6 +133,29 @@ static unsigned int find_sec(Elf_Ehdr *hdr,
132 return 0; 133 return 0;
133} 134}
134 135
136/* Find a module section, or NULL. */
137static void *section_addr(Elf_Ehdr *hdr, Elf_Shdr *shdrs,
138 const char *secstrings, const char *name)
139{
140 /* Section 0 has sh_addr 0. */
141 return (void *)shdrs[find_sec(hdr, shdrs, secstrings, name)].sh_addr;
142}
143
144/* Find a module section, or NULL. Fill in number of "objects" in section. */
145static void *section_objs(Elf_Ehdr *hdr,
146 Elf_Shdr *sechdrs,
147 const char *secstrings,
148 const char *name,
149 size_t object_size,
150 unsigned int *num)
151{
152 unsigned int sec = find_sec(hdr, sechdrs, secstrings, name);
153
154 /* Section 0 has sh_addr 0 and sh_size 0. */
155 *num = sechdrs[sec].sh_size / object_size;
156 return (void *)sechdrs[sec].sh_addr;
157}
158
135/* Provided by the linker */ 159/* Provided by the linker */
136extern const struct kernel_symbol __start___ksymtab[]; 160extern const struct kernel_symbol __start___ksymtab[];
137extern const struct kernel_symbol __stop___ksymtab[]; 161extern const struct kernel_symbol __stop___ksymtab[];
@@ -218,7 +242,7 @@ static bool each_symbol(bool (*fn)(const struct symsearch *arr,
218 if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data)) 242 if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data))
219 return true; 243 return true;
220 244
221 list_for_each_entry(mod, &modules, list) { 245 list_for_each_entry_rcu(mod, &modules, list) {
222 struct symsearch arr[] = { 246 struct symsearch arr[] = {
223 { mod->syms, mod->syms + mod->num_syms, mod->crcs, 247 { mod->syms, mod->syms + mod->num_syms, mod->crcs,
224 NOT_GPL_ONLY, false }, 248 NOT_GPL_ONLY, false },
@@ -1394,17 +1418,6 @@ static void mod_kobject_remove(struct module *mod)
1394} 1418}
1395 1419
1396/* 1420/*
1397 * link the module with the whole machine is stopped with interrupts off
1398 * - this defends against kallsyms not taking locks
1399 */
1400static int __link_module(void *_mod)
1401{
1402 struct module *mod = _mod;
1403 list_add(&mod->list, &modules);
1404 return 0;
1405}
1406
1407/*
1408 * unlink the module with the whole machine is stopped with interrupts off 1421 * unlink the module with the whole machine is stopped with interrupts off
1409 * - this defends against kallsyms not taking locks 1422 * - this defends against kallsyms not taking locks
1410 */ 1423 */
@@ -1789,32 +1802,20 @@ static inline void add_kallsyms(struct module *mod,
1789} 1802}
1790#endif /* CONFIG_KALLSYMS */ 1803#endif /* CONFIG_KALLSYMS */
1791 1804
1792#ifdef CONFIG_DYNAMIC_PRINTK_DEBUG 1805static void dynamic_printk_setup(struct mod_debug *debug, unsigned int num)
1793static void dynamic_printk_setup(Elf_Shdr *sechdrs, unsigned int verboseindex)
1794{ 1806{
1795 struct mod_debug *debug_info; 1807#ifdef CONFIG_DYNAMIC_PRINTK_DEBUG
1796 unsigned long pos, end; 1808 unsigned int i;
1797 unsigned int num_verbose;
1798
1799 pos = sechdrs[verboseindex].sh_addr;
1800 num_verbose = sechdrs[verboseindex].sh_size /
1801 sizeof(struct mod_debug);
1802 end = pos + (num_verbose * sizeof(struct mod_debug));
1803 1809
1804 for (; pos < end; pos += sizeof(struct mod_debug)) { 1810 for (i = 0; i < num; i++) {
1805 debug_info = (struct mod_debug *)pos; 1811 register_dynamic_debug_module(debug[i].modname,
1806 register_dynamic_debug_module(debug_info->modname, 1812 debug[i].type,
1807 debug_info->type, debug_info->logical_modname, 1813 debug[i].logical_modname,
1808 debug_info->flag_names, debug_info->hash, 1814 debug[i].flag_names,
1809 debug_info->hash2); 1815 debug[i].hash, debug[i].hash2);
1810 } 1816 }
1811}
1812#else
1813static inline void dynamic_printk_setup(Elf_Shdr *sechdrs,
1814 unsigned int verboseindex)
1815{
1816}
1817#endif /* CONFIG_DYNAMIC_PRINTK_DEBUG */ 1817#endif /* CONFIG_DYNAMIC_PRINTK_DEBUG */
1818}
1818 1819
1819static void *module_alloc_update_bounds(unsigned long size) 1820static void *module_alloc_update_bounds(unsigned long size)
1820{ 1821{
@@ -1843,37 +1844,14 @@ static noinline struct module *load_module(void __user *umod,
1843 unsigned int i; 1844 unsigned int i;
1844 unsigned int symindex = 0; 1845 unsigned int symindex = 0;
1845 unsigned int strindex = 0; 1846 unsigned int strindex = 0;
1846 unsigned int setupindex; 1847 unsigned int modindex, versindex, infoindex, pcpuindex;
1847 unsigned int exindex;
1848 unsigned int exportindex;
1849 unsigned int modindex;
1850 unsigned int obsparmindex;
1851 unsigned int infoindex;
1852 unsigned int gplindex;
1853 unsigned int crcindex;
1854 unsigned int gplcrcindex;
1855 unsigned int versindex;
1856 unsigned int pcpuindex;
1857 unsigned int gplfutureindex;
1858 unsigned int gplfuturecrcindex;
1859 unsigned int unwindex = 0; 1848 unsigned int unwindex = 0;
1860#ifdef CONFIG_UNUSED_SYMBOLS 1849 unsigned int num_kp, num_mcount;
1861 unsigned int unusedindex; 1850 struct kernel_param *kp;
1862 unsigned int unusedcrcindex;
1863 unsigned int unusedgplindex;
1864 unsigned int unusedgplcrcindex;
1865#endif
1866 unsigned int markersindex;
1867 unsigned int markersstringsindex;
1868 unsigned int verboseindex;
1869 unsigned int tracepointsindex;
1870 unsigned int tracepointsstringsindex;
1871 unsigned int mcountindex;
1872 struct module *mod; 1851 struct module *mod;
1873 long err = 0; 1852 long err = 0;
1874 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ 1853 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
1875 void *mseg; 1854 unsigned long *mseg;
1876 struct exception_table_entry *extable;
1877 mm_segment_t old_fs; 1855 mm_segment_t old_fs;
1878 1856
1879 DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", 1857 DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
@@ -1937,6 +1915,7 @@ static noinline struct module *load_module(void __user *umod,
1937 err = -ENOEXEC; 1915 err = -ENOEXEC;
1938 goto free_hdr; 1916 goto free_hdr;
1939 } 1917 }
1918 /* This is temporary: point mod into copy of data. */
1940 mod = (void *)sechdrs[modindex].sh_addr; 1919 mod = (void *)sechdrs[modindex].sh_addr;
1941 1920
1942 if (symindex == 0) { 1921 if (symindex == 0) {
@@ -1946,22 +1925,6 @@ static noinline struct module *load_module(void __user *umod,
1946 goto free_hdr; 1925 goto free_hdr;
1947 } 1926 }
1948 1927
1949 /* Optional sections */
1950 exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab");
1951 gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl");
1952 gplfutureindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl_future");
1953 crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab");
1954 gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl");
1955 gplfuturecrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl_future");
1956#ifdef CONFIG_UNUSED_SYMBOLS
1957 unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused");
1958 unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl");
1959 unusedcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused");
1960 unusedgplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl");
1961#endif
1962 setupindex = find_sec(hdr, sechdrs, secstrings, "__param");
1963 exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table");
1964 obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm");
1965 versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); 1928 versindex = find_sec(hdr, sechdrs, secstrings, "__versions");
1966 infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); 1929 infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo");
1967 pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); 1930 pcpuindex = find_pcpusec(hdr, sechdrs, secstrings);
@@ -2117,42 +2080,57 @@ static noinline struct module *load_module(void __user *umod,
2117 if (err < 0) 2080 if (err < 0)
2118 goto cleanup; 2081 goto cleanup;
2119 2082
2120 /* Set up EXPORTed & EXPORT_GPLed symbols (section 0 is 0 length) */ 2083 /* Now we've got everything in the final locations, we can
2121 mod->num_syms = sechdrs[exportindex].sh_size / sizeof(*mod->syms); 2084 * find optional sections. */
2122 mod->syms = (void *)sechdrs[exportindex].sh_addr; 2085 kp = section_objs(hdr, sechdrs, secstrings, "__param", sizeof(*kp),
2123 if (crcindex) 2086 &num_kp);
2124 mod->crcs = (void *)sechdrs[crcindex].sh_addr; 2087 mod->syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab",
2125 mod->num_gpl_syms = sechdrs[gplindex].sh_size / sizeof(*mod->gpl_syms); 2088 sizeof(*mod->syms), &mod->num_syms);
2126 mod->gpl_syms = (void *)sechdrs[gplindex].sh_addr; 2089 mod->crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab");
2127 if (gplcrcindex) 2090 mod->gpl_syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab_gpl",
2128 mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr; 2091 sizeof(*mod->gpl_syms),
2129 mod->num_gpl_future_syms = sechdrs[gplfutureindex].sh_size / 2092 &mod->num_gpl_syms);
2130 sizeof(*mod->gpl_future_syms); 2093 mod->gpl_crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab_gpl");
2131 mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr; 2094 mod->gpl_future_syms = section_objs(hdr, sechdrs, secstrings,
2132 if (gplfuturecrcindex) 2095 "__ksymtab_gpl_future",
2133 mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr; 2096 sizeof(*mod->gpl_future_syms),
2097 &mod->num_gpl_future_syms);
2098 mod->gpl_future_crcs = section_addr(hdr, sechdrs, secstrings,
2099 "__kcrctab_gpl_future");
2134 2100
2135#ifdef CONFIG_UNUSED_SYMBOLS 2101#ifdef CONFIG_UNUSED_SYMBOLS
2136 mod->num_unused_syms = sechdrs[unusedindex].sh_size / 2102 mod->unused_syms = section_objs(hdr, sechdrs, secstrings,
2137 sizeof(*mod->unused_syms); 2103 "__ksymtab_unused",
2138 mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size / 2104 sizeof(*mod->unused_syms),
2139 sizeof(*mod->unused_gpl_syms); 2105 &mod->num_unused_syms);
2140 mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr; 2106 mod->unused_crcs = section_addr(hdr, sechdrs, secstrings,
2141 if (unusedcrcindex) 2107 "__kcrctab_unused");
2142 mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr; 2108 mod->unused_gpl_syms = section_objs(hdr, sechdrs, secstrings,
2143 mod->unused_gpl_syms = (void *)sechdrs[unusedgplindex].sh_addr; 2109 "__ksymtab_unused_gpl",
2144 if (unusedgplcrcindex) 2110 sizeof(*mod->unused_gpl_syms),
2145 mod->unused_gpl_crcs 2111 &mod->num_unused_gpl_syms);
2146 = (void *)sechdrs[unusedgplcrcindex].sh_addr; 2112 mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings,
2113 "__kcrctab_unused_gpl");
2114#endif
2115
2116#ifdef CONFIG_MARKERS
2117 mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers",
2118 sizeof(*mod->markers), &mod->num_markers);
2119#endif
2120#ifdef CONFIG_TRACEPOINTS
2121 mod->tracepoints = section_objs(hdr, sechdrs, secstrings,
2122 "__tracepoints",
2123 sizeof(*mod->tracepoints),
2124 &mod->num_tracepoints);
2147#endif 2125#endif
2148 2126
2149#ifdef CONFIG_MODVERSIONS 2127#ifdef CONFIG_MODVERSIONS
2150 if ((mod->num_syms && !crcindex) 2128 if ((mod->num_syms && !mod->crcs)
2151 || (mod->num_gpl_syms && !gplcrcindex) 2129 || (mod->num_gpl_syms && !mod->gpl_crcs)
2152 || (mod->num_gpl_future_syms && !gplfuturecrcindex) 2130 || (mod->num_gpl_future_syms && !mod->gpl_future_crcs)
2153#ifdef CONFIG_UNUSED_SYMBOLS 2131#ifdef CONFIG_UNUSED_SYMBOLS
2154 || (mod->num_unused_syms && !unusedcrcindex) 2132 || (mod->num_unused_syms && !mod->unused_crcs)
2155 || (mod->num_unused_gpl_syms && !unusedgplcrcindex) 2133 || (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs)
2156#endif 2134#endif
2157 ) { 2135 ) {
2158 printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name); 2136 printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name);
@@ -2161,16 +2139,6 @@ static noinline struct module *load_module(void __user *umod,
2161 goto cleanup; 2139 goto cleanup;
2162 } 2140 }
2163#endif 2141#endif
2164 markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
2165 markersstringsindex = find_sec(hdr, sechdrs, secstrings,
2166 "__markers_strings");
2167 verboseindex = find_sec(hdr, sechdrs, secstrings, "__verbose");
2168 tracepointsindex = find_sec(hdr, sechdrs, secstrings, "__tracepoints");
2169 tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings,
2170 "__tracepoints_strings");
2171
2172 mcountindex = find_sec(hdr, sechdrs, secstrings,
2173 "__mcount_loc");
2174 2142
2175 /* Now do relocations. */ 2143 /* Now do relocations. */
2176 for (i = 1; i < hdr->e_shnum; i++) { 2144 for (i = 1; i < hdr->e_shnum; i++) {
@@ -2193,28 +2161,16 @@ static noinline struct module *load_module(void __user *umod,
2193 if (err < 0) 2161 if (err < 0)
2194 goto cleanup; 2162 goto cleanup;
2195 } 2163 }
2196#ifdef CONFIG_MARKERS
2197 mod->markers = (void *)sechdrs[markersindex].sh_addr;
2198 mod->num_markers =
2199 sechdrs[markersindex].sh_size / sizeof(*mod->markers);
2200#endif
2201#ifdef CONFIG_TRACEPOINTS
2202 mod->tracepoints = (void *)sechdrs[tracepointsindex].sh_addr;
2203 mod->num_tracepoints =
2204 sechdrs[tracepointsindex].sh_size / sizeof(*mod->tracepoints);
2205#endif
2206
2207 2164
2208 /* Find duplicate symbols */ 2165 /* Find duplicate symbols */
2209 err = verify_export_symbols(mod); 2166 err = verify_export_symbols(mod);
2210
2211 if (err < 0) 2167 if (err < 0)
2212 goto cleanup; 2168 goto cleanup;
2213 2169
2214 /* Set up and sort exception table */ 2170 /* Set up and sort exception table */
2215 mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); 2171 mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table",
2216 mod->extable = extable = (void *)sechdrs[exindex].sh_addr; 2172 sizeof(*mod->extable), &mod->num_exentries);
2217 sort_extable(extable, extable + mod->num_exentries); 2173 sort_extable(mod->extable, mod->extable + mod->num_exentries);
2218 2174
2219 /* Finally, copy percpu area over. */ 2175 /* Finally, copy percpu area over. */
2220 percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, 2176 percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr,
@@ -2223,11 +2179,17 @@ static noinline struct module *load_module(void __user *umod,
2223 add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); 2179 add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
2224 2180
2225 if (!mod->taints) { 2181 if (!mod->taints) {
2182 struct mod_debug *debug;
2183 unsigned int num_debug;
2184
2226#ifdef CONFIG_MARKERS 2185#ifdef CONFIG_MARKERS
2227 marker_update_probe_range(mod->markers, 2186 marker_update_probe_range(mod->markers,
2228 mod->markers + mod->num_markers); 2187 mod->markers + mod->num_markers);
2229#endif 2188#endif
2230 dynamic_printk_setup(sechdrs, verboseindex); 2189 debug = section_objs(hdr, sechdrs, secstrings, "__verbose",
2190 sizeof(*debug), &num_debug);
2191 dynamic_printk_setup(debug, num_debug);
2192
2231#ifdef CONFIG_TRACEPOINTS 2193#ifdef CONFIG_TRACEPOINTS
2232 tracepoint_update_probe_range(mod->tracepoints, 2194 tracepoint_update_probe_range(mod->tracepoints,
2233 mod->tracepoints + mod->num_tracepoints); 2195 mod->tracepoints + mod->num_tracepoints);
@@ -2235,8 +2197,9 @@ static noinline struct module *load_module(void __user *umod,
2235 } 2197 }
2236 2198
2237 /* sechdrs[0].sh_size is always zero */ 2199 /* sechdrs[0].sh_size is always zero */
2238 mseg = (void *)sechdrs[mcountindex].sh_addr; 2200 mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
2239 ftrace_init_module(mseg, mseg + sechdrs[mcountindex].sh_size); 2201 sizeof(*mseg), &num_mcount);
2202 ftrace_init_module(mseg, mseg + num_mcount);
2240 2203
2241 err = module_finalize(hdr, sechdrs, mod); 2204 err = module_finalize(hdr, sechdrs, mod);
2242 if (err < 0) 2205 if (err < 0)
@@ -2261,30 +2224,24 @@ static noinline struct module *load_module(void __user *umod,
2261 set_fs(old_fs); 2224 set_fs(old_fs);
2262 2225
2263 mod->args = args; 2226 mod->args = args;
2264 if (obsparmindex) 2227 if (section_addr(hdr, sechdrs, secstrings, "__obsparm"))
2265 printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", 2228 printk(KERN_WARNING "%s: Ignoring obsolete parameters\n",
2266 mod->name); 2229 mod->name);
2267 2230
2268 /* Now sew it into the lists so we can get lockdep and oops 2231 /* Now sew it into the lists so we can get lockdep and oops
2269 * info during argument parsing. Noone should access us, since 2232 * info during argument parsing. Noone should access us, since
2270 * strong_try_module_get() will fail. */ 2233 * strong_try_module_get() will fail.
2271 stop_machine(__link_module, mod, NULL); 2234 * lockdep/oops can run asynchronous, so use the RCU list insertion
2272 2235 * function to insert in a way safe to concurrent readers.
2273 /* Size of section 0 is 0, so this works well if no params */ 2236 * The mutex protects against concurrent writers.
2274 err = parse_args(mod->name, mod->args, 2237 */
2275 (struct kernel_param *) 2238 list_add_rcu(&mod->list, &modules);
2276 sechdrs[setupindex].sh_addr, 2239
2277 sechdrs[setupindex].sh_size 2240 err = parse_args(mod->name, mod->args, kp, num_kp, NULL);
2278 / sizeof(struct kernel_param),
2279 NULL);
2280 if (err < 0) 2241 if (err < 0)
2281 goto unlink; 2242 goto unlink;
2282 2243
2283 err = mod_sysfs_setup(mod, 2244 err = mod_sysfs_setup(mod, kp, num_kp);
2284 (struct kernel_param *)
2285 sechdrs[setupindex].sh_addr,
2286 sechdrs[setupindex].sh_size
2287 / sizeof(struct kernel_param));
2288 if (err < 0) 2245 if (err < 0)
2289 goto unlink; 2246 goto unlink;
2290 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); 2247 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
@@ -2473,7 +2430,7 @@ const char *module_address_lookup(unsigned long addr,
2473 const char *ret = NULL; 2430 const char *ret = NULL;
2474 2431
2475 preempt_disable(); 2432 preempt_disable();
2476 list_for_each_entry(mod, &modules, list) { 2433 list_for_each_entry_rcu(mod, &modules, list) {
2477 if (within(addr, mod->module_init, mod->init_size) 2434 if (within(addr, mod->module_init, mod->init_size)
2478 || within(addr, mod->module_core, mod->core_size)) { 2435 || within(addr, mod->module_core, mod->core_size)) {
2479 if (modname) 2436 if (modname)
@@ -2496,7 +2453,7 @@ int lookup_module_symbol_name(unsigned long addr, char *symname)
2496 struct module *mod; 2453 struct module *mod;
2497 2454
2498 preempt_disable(); 2455 preempt_disable();
2499 list_for_each_entry(mod, &modules, list) { 2456 list_for_each_entry_rcu(mod, &modules, list) {
2500 if (within(addr, mod->module_init, mod->init_size) || 2457 if (within(addr, mod->module_init, mod->init_size) ||
2501 within(addr, mod->module_core, mod->core_size)) { 2458 within(addr, mod->module_core, mod->core_size)) {
2502 const char *sym; 2459 const char *sym;
@@ -2520,7 +2477,7 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
2520 struct module *mod; 2477 struct module *mod;
2521 2478
2522 preempt_disable(); 2479 preempt_disable();
2523 list_for_each_entry(mod, &modules, list) { 2480 list_for_each_entry_rcu(mod, &modules, list) {
2524 if (within(addr, mod->module_init, mod->init_size) || 2481 if (within(addr, mod->module_init, mod->init_size) ||
2525 within(addr, mod->module_core, mod->core_size)) { 2482 within(addr, mod->module_core, mod->core_size)) {
2526 const char *sym; 2483 const char *sym;
@@ -2547,7 +2504,7 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
2547 struct module *mod; 2504 struct module *mod;
2548 2505
2549 preempt_disable(); 2506 preempt_disable();
2550 list_for_each_entry(mod, &modules, list) { 2507 list_for_each_entry_rcu(mod, &modules, list) {
2551 if (symnum < mod->num_symtab) { 2508 if (symnum < mod->num_symtab) {
2552 *value = mod->symtab[symnum].st_value; 2509 *value = mod->symtab[symnum].st_value;
2553 *type = mod->symtab[symnum].st_info; 2510 *type = mod->symtab[symnum].st_info;
@@ -2590,7 +2547,7 @@ unsigned long module_kallsyms_lookup_name(const char *name)
2590 ret = mod_find_symname(mod, colon+1); 2547 ret = mod_find_symname(mod, colon+1);
2591 *colon = ':'; 2548 *colon = ':';
2592 } else { 2549 } else {
2593 list_for_each_entry(mod, &modules, list) 2550 list_for_each_entry_rcu(mod, &modules, list)
2594 if ((ret = mod_find_symname(mod, name)) != 0) 2551 if ((ret = mod_find_symname(mod, name)) != 0)
2595 break; 2552 break;
2596 } 2553 }
@@ -2693,7 +2650,7 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
2693 struct module *mod; 2650 struct module *mod;
2694 2651
2695 preempt_disable(); 2652 preempt_disable();
2696 list_for_each_entry(mod, &modules, list) { 2653 list_for_each_entry_rcu(mod, &modules, list) {
2697 if (mod->num_exentries == 0) 2654 if (mod->num_exentries == 0)
2698 continue; 2655 continue;
2699 2656
@@ -2719,7 +2676,7 @@ int is_module_address(unsigned long addr)
2719 2676
2720 preempt_disable(); 2677 preempt_disable();
2721 2678
2722 list_for_each_entry(mod, &modules, list) { 2679 list_for_each_entry_rcu(mod, &modules, list) {
2723 if (within(addr, mod->module_core, mod->core_size)) { 2680 if (within(addr, mod->module_core, mod->core_size)) {
2724 preempt_enable(); 2681 preempt_enable();
2725 return 1; 2682 return 1;
@@ -2740,7 +2697,7 @@ struct module *__module_text_address(unsigned long addr)
2740 if (addr < module_addr_min || addr > module_addr_max) 2697 if (addr < module_addr_min || addr > module_addr_max)
2741 return NULL; 2698 return NULL;
2742 2699
2743 list_for_each_entry(mod, &modules, list) 2700 list_for_each_entry_rcu(mod, &modules, list)
2744 if (within(addr, mod->module_init, mod->init_text_size) 2701 if (within(addr, mod->module_init, mod->init_text_size)
2745 || within(addr, mod->module_core, mod->core_text_size)) 2702 || within(addr, mod->module_core, mod->core_text_size))
2746 return mod; 2703 return mod;
@@ -2765,8 +2722,11 @@ void print_modules(void)
2765 char buf[8]; 2722 char buf[8];
2766 2723
2767 printk("Modules linked in:"); 2724 printk("Modules linked in:");
2768 list_for_each_entry(mod, &modules, list) 2725 /* Most callers should already have preempt disabled, but make sure */
2726 preempt_disable();
2727 list_for_each_entry_rcu(mod, &modules, list)
2769 printk(" %s%s", mod->name, module_flags(mod, buf)); 2728 printk(" %s%s", mod->name, module_flags(mod, buf));
2729 preempt_enable();
2770 if (last_unloaded_module[0]) 2730 if (last_unloaded_module[0])
2771 printk(" [last unloaded: %s]", last_unloaded_module); 2731 printk(" [last unloaded: %s]", last_unloaded_module);
2772 printk("\n"); 2732 printk("\n");
diff --git a/kernel/panic.c b/kernel/panic.c
index bda561ef3cdf..6513aac8e992 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -34,13 +34,6 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
34 34
35EXPORT_SYMBOL(panic_notifier_list); 35EXPORT_SYMBOL(panic_notifier_list);
36 36
37static int __init panic_setup(char *str)
38{
39 panic_timeout = simple_strtoul(str, NULL, 0);
40 return 1;
41}
42__setup("panic=", panic_setup);
43
44static long no_blink(long time) 37static long no_blink(long time)
45{ 38{
46 return 0; 39 return 0;
@@ -218,13 +211,6 @@ void add_taint(unsigned flag)
218} 211}
219EXPORT_SYMBOL(add_taint); 212EXPORT_SYMBOL(add_taint);
220 213
221static int __init pause_on_oops_setup(char *str)
222{
223 pause_on_oops = simple_strtoul(str, NULL, 0);
224 return 1;
225}
226__setup("pause_on_oops=", pause_on_oops_setup);
227
228static void spin_msec(int msecs) 214static void spin_msec(int msecs)
229{ 215{
230 int i; 216 int i;
@@ -384,3 +370,6 @@ void __stack_chk_fail(void)
384} 370}
385EXPORT_SYMBOL(__stack_chk_fail); 371EXPORT_SYMBOL(__stack_chk_fail);
386#endif 372#endif
373
374core_param(panic, panic_timeout, int, 0644);
375core_param(pause_on_oops, pause_on_oops, int, 0644);
diff --git a/kernel/params.c b/kernel/params.c
index afc46a23eb6d..b077f1b045d3 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -373,6 +373,8 @@ int param_get_string(char *buffer, struct kernel_param *kp)
373} 373}
374 374
375/* sysfs output in /sys/modules/XYZ/parameters/ */ 375/* sysfs output in /sys/modules/XYZ/parameters/ */
376#define to_module_attr(n) container_of(n, struct module_attribute, attr);
377#define to_module_kobject(n) container_of(n, struct module_kobject, kobj);
376 378
377extern struct kernel_param __start___param[], __stop___param[]; 379extern struct kernel_param __start___param[], __stop___param[];
378 380
@@ -384,6 +386,7 @@ struct param_attribute
384 386
385struct module_param_attrs 387struct module_param_attrs
386{ 388{
389 unsigned int num;
387 struct attribute_group grp; 390 struct attribute_group grp;
388 struct param_attribute attrs[0]; 391 struct param_attribute attrs[0];
389}; 392};
@@ -434,69 +437,84 @@ static ssize_t param_attr_store(struct module_attribute *mattr,
434 437
435#ifdef CONFIG_SYSFS 438#ifdef CONFIG_SYSFS
436/* 439/*
437 * param_sysfs_setup - setup sysfs support for one module or KBUILD_MODNAME 440 * add_sysfs_param - add a parameter to sysfs
438 * @mk: struct module_kobject (contains parent kobject) 441 * @mk: struct module_kobject
439 * @kparam: array of struct kernel_param, the actual parameter definitions 442 * @kparam: the actual parameter definition to add to sysfs
440 * @num_params: number of entries in array 443 * @name: name of parameter
441 * @name_skip: offset where the parameter name start in kparam[].name. Needed for built-in "modules"
442 * 444 *
443 * Create a kobject for a (per-module) group of parameters, and create files 445 * Create a kobject if for a (per-module) parameter if mp NULL, and
444 * in sysfs. A pointer to the param_kobject is returned on success, 446 * create file in sysfs. Returns an error on out of memory. Always cleans up
445 * NULL if there's no parameter to export, or other ERR_PTR(err). 447 * if there's an error.
446 */ 448 */
447static __modinit struct module_param_attrs * 449static __modinit int add_sysfs_param(struct module_kobject *mk,
448param_sysfs_setup(struct module_kobject *mk, 450 struct kernel_param *kp,
449 struct kernel_param *kparam, 451 const char *name)
450 unsigned int num_params,
451 unsigned int name_skip)
452{ 452{
453 struct module_param_attrs *mp; 453 struct module_param_attrs *new;
454 unsigned int valid_attrs = 0; 454 struct attribute **attrs;
455 unsigned int i, size[2]; 455 int err, num;
456 struct param_attribute *pattr; 456
457 struct attribute **gattr; 457 /* We don't bother calling this with invisible parameters. */
458 int err; 458 BUG_ON(!kp->perm);
459 459
460 for (i=0; i<num_params; i++) { 460 if (!mk->mp) {
461 if (kparam[i].perm) 461 num = 0;
462 valid_attrs++; 462 attrs = NULL;
463 } else {
464 num = mk->mp->num;
465 attrs = mk->mp->grp.attrs;
463 } 466 }
464 467
465 if (!valid_attrs) 468 /* Enlarge. */
466 return NULL; 469 new = krealloc(mk->mp,
467 470 sizeof(*mk->mp) + sizeof(mk->mp->attrs[0]) * (num+1),
468 size[0] = ALIGN(sizeof(*mp) + 471 GFP_KERNEL);
469 valid_attrs * sizeof(mp->attrs[0]), 472 if (!new) {
470 sizeof(mp->grp.attrs[0])); 473 kfree(mk->mp);
471 size[1] = (valid_attrs + 1) * sizeof(mp->grp.attrs[0]); 474 err = -ENOMEM;
472 475 goto fail;
473 mp = kzalloc(size[0] + size[1], GFP_KERNEL); 476 }
474 if (!mp) 477 attrs = krealloc(attrs, sizeof(new->grp.attrs[0])*(num+2), GFP_KERNEL);
475 return ERR_PTR(-ENOMEM); 478 if (!attrs) {
479 err = -ENOMEM;
480 goto fail_free_new;
481 }
476 482
477 mp->grp.name = "parameters"; 483 /* Sysfs wants everything zeroed. */
478 mp->grp.attrs = (void *)mp + size[0]; 484 memset(new, 0, sizeof(*new));
485 memset(&new->attrs[num], 0, sizeof(new->attrs[num]));
486 memset(&attrs[num], 0, sizeof(attrs[num]));
487 new->grp.name = "parameters";
488 new->grp.attrs = attrs;
489
490 /* Tack new one on the end. */
491 new->attrs[num].param = kp;
492 new->attrs[num].mattr.show = param_attr_show;
493 new->attrs[num].mattr.store = param_attr_store;
494 new->attrs[num].mattr.attr.name = (char *)name;
495 new->attrs[num].mattr.attr.mode = kp->perm;
496 new->num = num+1;
497
498 /* Fix up all the pointers, since krealloc can move us */
499 for (num = 0; num < new->num; num++)
500 new->grp.attrs[num] = &new->attrs[num].mattr.attr;
501 new->grp.attrs[num] = NULL;
502
503 mk->mp = new;
504 return 0;
479 505
480 pattr = &mp->attrs[0]; 506fail_free_new:
481 gattr = &mp->grp.attrs[0]; 507 kfree(new);
482 for (i = 0; i < num_params; i++) { 508fail:
483 struct kernel_param *kp = &kparam[i]; 509 mk->mp = NULL;
484 if (kp->perm) { 510 return err;
485 pattr->param = kp; 511}
486 pattr->mattr.show = param_attr_show;
487 pattr->mattr.store = param_attr_store;
488 pattr->mattr.attr.name = (char *)&kp->name[name_skip];
489 pattr->mattr.attr.mode = kp->perm;
490 *(gattr++) = &(pattr++)->mattr.attr;
491 }
492 }
493 *gattr = NULL;
494 512
495 if ((err = sysfs_create_group(&mk->kobj, &mp->grp))) { 513static void free_module_param_attrs(struct module_kobject *mk)
496 kfree(mp); 514{
497 return ERR_PTR(err); 515 kfree(mk->mp->grp.attrs);
498 } 516 kfree(mk->mp);
499 return mp; 517 mk->mp = NULL;
500} 518}
501 519
502#ifdef CONFIG_MODULES 520#ifdef CONFIG_MODULES
@@ -506,21 +524,33 @@ param_sysfs_setup(struct module_kobject *mk,
506 * @kparam: module parameters (array) 524 * @kparam: module parameters (array)
507 * @num_params: number of module parameters 525 * @num_params: number of module parameters
508 * 526 *
509 * Adds sysfs entries for module parameters, and creates a link from 527 * Adds sysfs entries for module parameters under
510 * /sys/module/[mod->name]/parameters to /sys/parameters/[mod->name]/ 528 * /sys/module/[mod->name]/parameters/
511 */ 529 */
512int module_param_sysfs_setup(struct module *mod, 530int module_param_sysfs_setup(struct module *mod,
513 struct kernel_param *kparam, 531 struct kernel_param *kparam,
514 unsigned int num_params) 532 unsigned int num_params)
515{ 533{
516 struct module_param_attrs *mp; 534 int i, err;
535 bool params = false;
536
537 for (i = 0; i < num_params; i++) {
538 if (kparam[i].perm == 0)
539 continue;
540 err = add_sysfs_param(&mod->mkobj, &kparam[i], kparam[i].name);
541 if (err)
542 return err;
543 params = true;
544 }
517 545
518 mp = param_sysfs_setup(&mod->mkobj, kparam, num_params, 0); 546 if (!params)
519 if (IS_ERR(mp)) 547 return 0;
520 return PTR_ERR(mp);
521 548
522 mod->param_attrs = mp; 549 /* Create the param group. */
523 return 0; 550 err = sysfs_create_group(&mod->mkobj.kobj, &mod->mkobj.mp->grp);
551 if (err)
552 free_module_param_attrs(&mod->mkobj);
553 return err;
524} 554}
525 555
526/* 556/*
@@ -532,43 +562,55 @@ int module_param_sysfs_setup(struct module *mod,
532 */ 562 */
533void module_param_sysfs_remove(struct module *mod) 563void module_param_sysfs_remove(struct module *mod)
534{ 564{
535 if (mod->param_attrs) { 565 if (mod->mkobj.mp) {
536 sysfs_remove_group(&mod->mkobj.kobj, 566 sysfs_remove_group(&mod->mkobj.kobj, &mod->mkobj.mp->grp);
537 &mod->param_attrs->grp);
538 /* We are positive that no one is using any param 567 /* We are positive that no one is using any param
539 * attrs at this point. Deallocate immediately. */ 568 * attrs at this point. Deallocate immediately. */
540 kfree(mod->param_attrs); 569 free_module_param_attrs(&mod->mkobj);
541 mod->param_attrs = NULL;
542 } 570 }
543} 571}
544#endif 572#endif
545 573
546/* 574static void __init kernel_add_sysfs_param(const char *name,
547 * kernel_param_sysfs_setup - wrapper for built-in params support 575 struct kernel_param *kparam,
548 */ 576 unsigned int name_skip)
549static void __init kernel_param_sysfs_setup(const char *name,
550 struct kernel_param *kparam,
551 unsigned int num_params,
552 unsigned int name_skip)
553{ 577{
554 struct module_kobject *mk; 578 struct module_kobject *mk;
555 int ret; 579 struct kobject *kobj;
580 int err;
556 581
557 mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); 582 kobj = kset_find_obj(module_kset, name);
558 BUG_ON(!mk); 583 if (kobj) {
559 584 /* We already have one. Remove params so we can add more. */
560 mk->mod = THIS_MODULE; 585 mk = to_module_kobject(kobj);
561 mk->kobj.kset = module_kset; 586 /* We need to remove it before adding parameters. */
562 ret = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, "%s", name); 587 sysfs_remove_group(&mk->kobj, &mk->mp->grp);
563 if (ret) { 588 } else {
564 kobject_put(&mk->kobj); 589 mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL);
565 printk(KERN_ERR "Module '%s' failed to be added to sysfs, " 590 BUG_ON(!mk);
566 "error number %d\n", name, ret); 591
567 printk(KERN_ERR "The system will be unstable now.\n"); 592 mk->mod = THIS_MODULE;
568 return; 593 mk->kobj.kset = module_kset;
594 err = kobject_init_and_add(&mk->kobj, &module_ktype, NULL,
595 "%s", name);
596 if (err) {
597 kobject_put(&mk->kobj);
598 printk(KERN_ERR "Module '%s' failed add to sysfs, "
599 "error number %d\n", name, err);
600 printk(KERN_ERR "The system will be unstable now.\n");
601 return;
602 }
603 /* So that exit path is even. */
604 kobject_get(&mk->kobj);
569 } 605 }
570 param_sysfs_setup(mk, kparam, num_params, name_skip); 606
607 /* These should not fail at boot. */
608 err = add_sysfs_param(mk, kparam, kparam->name + name_skip);
609 BUG_ON(err);
610 err = sysfs_create_group(&mk->kobj, &mk->mp->grp);
611 BUG_ON(err);
571 kobject_uevent(&mk->kobj, KOBJ_ADD); 612 kobject_uevent(&mk->kobj, KOBJ_ADD);
613 kobject_put(&mk->kobj);
572} 614}
573 615
574/* 616/*
@@ -579,60 +621,36 @@ static void __init kernel_param_sysfs_setup(const char *name,
579 * The "module" name (KBUILD_MODNAME) is stored before a dot, the 621 * The "module" name (KBUILD_MODNAME) is stored before a dot, the
580 * "parameter" name is stored behind a dot in kernel_param->name. So, 622 * "parameter" name is stored behind a dot in kernel_param->name. So,
581 * extract the "module" name for all built-in kernel_param-eters, 623 * extract the "module" name for all built-in kernel_param-eters,
582 * and for all who have the same, call kernel_param_sysfs_setup. 624 * and for all who have the same, call kernel_add_sysfs_param.
583 */ 625 */
584static void __init param_sysfs_builtin(void) 626static void __init param_sysfs_builtin(void)
585{ 627{
586 struct kernel_param *kp, *kp_begin = NULL; 628 struct kernel_param *kp;
587 unsigned int i, name_len, count = 0; 629 unsigned int name_len;
588 char modname[MODULE_NAME_LEN + 1] = ""; 630 char modname[MODULE_NAME_LEN];
589 631
590 for (i=0; i < __stop___param - __start___param; i++) { 632 for (kp = __start___param; kp < __stop___param; kp++) {
591 char *dot; 633 char *dot;
592 size_t max_name_len;
593 634
594 kp = &__start___param[i]; 635 if (kp->perm == 0)
595 max_name_len = 636 continue;
596 min_t(size_t, MODULE_NAME_LEN, strlen(kp->name));
597 637
598 dot = memchr(kp->name, '.', max_name_len); 638 dot = strchr(kp->name, '.');
599 if (!dot) { 639 if (!dot) {
600 DEBUGP("couldn't find period in first %d characters " 640 /* This happens for core_param() */
601 "of %s\n", MODULE_NAME_LEN, kp->name); 641 strcpy(modname, "kernel");
602 continue; 642 name_len = 0;
603 } 643 } else {
604 name_len = dot - kp->name; 644 name_len = dot - kp->name + 1;
605 645 strlcpy(modname, kp->name, name_len);
606 /* new kbuild_modname? */
607 if (strlen(modname) != name_len
608 || strncmp(modname, kp->name, name_len) != 0) {
609 /* add a new kobject for previous kernel_params. */
610 if (count)
611 kernel_param_sysfs_setup(modname,
612 kp_begin,
613 count,
614 strlen(modname)+1);
615
616 strncpy(modname, kp->name, name_len);
617 modname[name_len] = '\0';
618 count = 0;
619 kp_begin = kp;
620 } 646 }
621 count++; 647 kernel_add_sysfs_param(modname, kp, name_len);
622 } 648 }
623
624 /* last kernel_params need to be registered as well */
625 if (count)
626 kernel_param_sysfs_setup(modname, kp_begin, count,
627 strlen(modname)+1);
628} 649}
629 650
630 651
631/* module-related sysfs stuff */ 652/* module-related sysfs stuff */
632 653
633#define to_module_attr(n) container_of(n, struct module_attribute, attr);
634#define to_module_kobject(n) container_of(n, struct module_kobject, kobj);
635
636static ssize_t module_attr_show(struct kobject *kobj, 654static ssize_t module_attr_show(struct kobject *kobj,
637 struct attribute *attr, 655 struct attribute *attr,
638 char *buf) 656 char *buf)
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 467d5940f624..ad63af8b2521 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -119,18 +119,19 @@ static void _rcu_barrier(enum rcu_barrier type)
119 /* Take cpucontrol mutex to protect against CPU hotplug */ 119 /* Take cpucontrol mutex to protect against CPU hotplug */
120 mutex_lock(&rcu_barrier_mutex); 120 mutex_lock(&rcu_barrier_mutex);
121 init_completion(&rcu_barrier_completion); 121 init_completion(&rcu_barrier_completion);
122 atomic_set(&rcu_barrier_cpu_count, 0);
123 /* 122 /*
124 * The queueing of callbacks in all CPUs must be atomic with 123 * Initialize rcu_barrier_cpu_count to 1, then invoke
125 * respect to RCU, otherwise one CPU may queue a callback, 124 * rcu_barrier_func() on each CPU, so that each CPU also has
126 * wait for a grace period, decrement barrier count and call 125 * incremented rcu_barrier_cpu_count. Only then is it safe to
127 * complete(), while other CPUs have not yet queued anything. 126 * decrement rcu_barrier_cpu_count -- otherwise the first CPU
128 * So, we need to make sure that grace periods cannot complete 127 * might complete its grace period before all of the other CPUs
129 * until all the callbacks are queued. 128 * did their increment, causing this function to return too
129 * early.
130 */ 130 */
131 rcu_read_lock(); 131 atomic_set(&rcu_barrier_cpu_count, 1);
132 on_each_cpu(rcu_barrier_func, (void *)type, 1); 132 on_each_cpu(rcu_barrier_func, (void *)type, 1);
133 rcu_read_unlock(); 133 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
134 complete(&rcu_barrier_completion);
134 wait_for_completion(&rcu_barrier_completion); 135 wait_for_completion(&rcu_barrier_completion);
135 mutex_unlock(&rcu_barrier_mutex); 136 mutex_unlock(&rcu_barrier_mutex);
136} 137}
diff --git a/kernel/sched.c b/kernel/sched.c
index d906f72b42d2..945a97b9600d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -819,6 +819,13 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
819unsigned int sysctl_sched_shares_ratelimit = 250000; 819unsigned int sysctl_sched_shares_ratelimit = 250000;
820 820
821/* 821/*
822 * Inject some fuzzyness into changing the per-cpu group shares
823 * this avoids remote rq-locks at the expense of fairness.
824 * default: 4
825 */
826unsigned int sysctl_sched_shares_thresh = 4;
827
828/*
822 * period over which we measure -rt task cpu usage in us. 829 * period over which we measure -rt task cpu usage in us.
823 * default: 1s 830 * default: 1s
824 */ 831 */
@@ -1454,8 +1461,8 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares);
1454 * Calculate and set the cpu's group shares. 1461 * Calculate and set the cpu's group shares.
1455 */ 1462 */
1456static void 1463static void
1457__update_group_shares_cpu(struct task_group *tg, int cpu, 1464update_group_shares_cpu(struct task_group *tg, int cpu,
1458 unsigned long sd_shares, unsigned long sd_rq_weight) 1465 unsigned long sd_shares, unsigned long sd_rq_weight)
1459{ 1466{
1460 int boost = 0; 1467 int boost = 0;
1461 unsigned long shares; 1468 unsigned long shares;
@@ -1486,19 +1493,23 @@ __update_group_shares_cpu(struct task_group *tg, int cpu,
1486 * 1493 *
1487 */ 1494 */
1488 shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); 1495 shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
1496 shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
1489 1497
1490 /* 1498 if (abs(shares - tg->se[cpu]->load.weight) >
1491 * record the actual number of shares, not the boosted amount. 1499 sysctl_sched_shares_thresh) {
1492 */ 1500 struct rq *rq = cpu_rq(cpu);
1493 tg->cfs_rq[cpu]->shares = boost ? 0 : shares; 1501 unsigned long flags;
1494 tg->cfs_rq[cpu]->rq_weight = rq_weight;
1495 1502
1496 if (shares < MIN_SHARES) 1503 spin_lock_irqsave(&rq->lock, flags);
1497 shares = MIN_SHARES; 1504 /*
1498 else if (shares > MAX_SHARES) 1505 * record the actual number of shares, not the boosted amount.
1499 shares = MAX_SHARES; 1506 */
1507 tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
1508 tg->cfs_rq[cpu]->rq_weight = rq_weight;
1500 1509
1501 __set_se_shares(tg->se[cpu], shares); 1510 __set_se_shares(tg->se[cpu], shares);
1511 spin_unlock_irqrestore(&rq->lock, flags);
1512 }
1502} 1513}
1503 1514
1504/* 1515/*
@@ -1527,14 +1538,8 @@ static int tg_shares_up(struct task_group *tg, void *data)
1527 if (!rq_weight) 1538 if (!rq_weight)
1528 rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; 1539 rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
1529 1540
1530 for_each_cpu_mask(i, sd->span) { 1541 for_each_cpu_mask(i, sd->span)
1531 struct rq *rq = cpu_rq(i); 1542 update_group_shares_cpu(tg, i, shares, rq_weight);
1532 unsigned long flags;
1533
1534 spin_lock_irqsave(&rq->lock, flags);
1535 __update_group_shares_cpu(tg, i, shares, rq_weight);
1536 spin_unlock_irqrestore(&rq->lock, flags);
1537 }
1538 1543
1539 return 0; 1544 return 0;
1540} 1545}
@@ -4443,12 +4448,8 @@ need_resched_nonpreemptible:
4443 if (sched_feat(HRTICK)) 4448 if (sched_feat(HRTICK))
4444 hrtick_clear(rq); 4449 hrtick_clear(rq);
4445 4450
4446 /* 4451 spin_lock_irq(&rq->lock);
4447 * Do the rq-clock update outside the rq lock:
4448 */
4449 local_irq_disable();
4450 update_rq_clock(rq); 4452 update_rq_clock(rq);
4451 spin_lock(&rq->lock);
4452 clear_tsk_need_resched(prev); 4453 clear_tsk_need_resched(prev);
4453 4454
4454 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { 4455 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f604dae71316..9573c33688b8 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -73,6 +73,8 @@ unsigned int sysctl_sched_wakeup_granularity = 5000000UL;
73 73
74const_debug unsigned int sysctl_sched_migration_cost = 500000UL; 74const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
75 75
76static const struct sched_class fair_sched_class;
77
76/************************************************************** 78/**************************************************************
77 * CFS operations on generic schedulable entities: 79 * CFS operations on generic schedulable entities:
78 */ 80 */
@@ -334,7 +336,7 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
334#endif 336#endif
335 337
336/* 338/*
337 * delta *= w / rw 339 * delta *= P[w / rw]
338 */ 340 */
339static inline unsigned long 341static inline unsigned long
340calc_delta_weight(unsigned long delta, struct sched_entity *se) 342calc_delta_weight(unsigned long delta, struct sched_entity *se)
@@ -348,15 +350,13 @@ calc_delta_weight(unsigned long delta, struct sched_entity *se)
348} 350}
349 351
350/* 352/*
351 * delta *= rw / w 353 * delta /= w
352 */ 354 */
353static inline unsigned long 355static inline unsigned long
354calc_delta_fair(unsigned long delta, struct sched_entity *se) 356calc_delta_fair(unsigned long delta, struct sched_entity *se)
355{ 357{
356 for_each_sched_entity(se) { 358 if (unlikely(se->load.weight != NICE_0_LOAD))
357 delta = calc_delta_mine(delta, 359 delta = calc_delta_mine(delta, NICE_0_LOAD, &se->load);
358 cfs_rq_of(se)->load.weight, &se->load);
359 }
360 360
361 return delta; 361 return delta;
362} 362}
@@ -386,26 +386,26 @@ static u64 __sched_period(unsigned long nr_running)
386 * We calculate the wall-time slice from the period by taking a part 386 * We calculate the wall-time slice from the period by taking a part
387 * proportional to the weight. 387 * proportional to the weight.
388 * 388 *
389 * s = p*w/rw 389 * s = p*P[w/rw]
390 */ 390 */
391static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) 391static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
392{ 392{
393 return calc_delta_weight(__sched_period(cfs_rq->nr_running), se); 393 unsigned long nr_running = cfs_rq->nr_running;
394
395 if (unlikely(!se->on_rq))
396 nr_running++;
397
398 return calc_delta_weight(__sched_period(nr_running), se);
394} 399}
395 400
396/* 401/*
397 * We calculate the vruntime slice of a to be inserted task 402 * We calculate the vruntime slice of a to be inserted task
398 * 403 *
399 * vs = s*rw/w = p 404 * vs = s/w
400 */ 405 */
401static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) 406static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
402{ 407{
403 unsigned long nr_running = cfs_rq->nr_running; 408 return calc_delta_fair(sched_slice(cfs_rq, se), se);
404
405 if (!se->on_rq)
406 nr_running++;
407
408 return __sched_period(nr_running);
409} 409}
410 410
411/* 411/*
@@ -628,7 +628,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
628 * stays open at the end. 628 * stays open at the end.
629 */ 629 */
630 if (initial && sched_feat(START_DEBIT)) 630 if (initial && sched_feat(START_DEBIT))
631 vruntime += sched_vslice_add(cfs_rq, se); 631 vruntime += sched_vslice(cfs_rq, se);
632 632
633 if (!initial) { 633 if (!initial) {
634 /* sleeps upto a single latency don't count. */ 634 /* sleeps upto a single latency don't count. */
@@ -748,7 +748,7 @@ pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
748 struct rq *rq = rq_of(cfs_rq); 748 struct rq *rq = rq_of(cfs_rq);
749 u64 pair_slice = rq->clock - cfs_rq->pair_start; 749 u64 pair_slice = rq->clock - cfs_rq->pair_start;
750 750
751 if (!cfs_rq->next || pair_slice > sched_slice(cfs_rq, cfs_rq->next)) { 751 if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) {
752 cfs_rq->pair_start = rq->clock; 752 cfs_rq->pair_start = rq->clock;
753 return se; 753 return se;
754 } 754 }
@@ -849,11 +849,31 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
849 hrtick_start(rq, delta); 849 hrtick_start(rq, delta);
850 } 850 }
851} 851}
852
853/*
854 * called from enqueue/dequeue and updates the hrtick when the
855 * current task is from our class and nr_running is low enough
856 * to matter.
857 */
858static void hrtick_update(struct rq *rq)
859{
860 struct task_struct *curr = rq->curr;
861
862 if (curr->sched_class != &fair_sched_class)
863 return;
864
865 if (cfs_rq_of(&curr->se)->nr_running < sched_nr_latency)
866 hrtick_start_fair(rq, curr);
867}
852#else /* !CONFIG_SCHED_HRTICK */ 868#else /* !CONFIG_SCHED_HRTICK */
853static inline void 869static inline void
854hrtick_start_fair(struct rq *rq, struct task_struct *p) 870hrtick_start_fair(struct rq *rq, struct task_struct *p)
855{ 871{
856} 872}
873
874static inline void hrtick_update(struct rq *rq)
875{
876}
857#endif 877#endif
858 878
859/* 879/*
@@ -874,7 +894,7 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
874 wakeup = 1; 894 wakeup = 1;
875 } 895 }
876 896
877 hrtick_start_fair(rq, rq->curr); 897 hrtick_update(rq);
878} 898}
879 899
880/* 900/*
@@ -896,7 +916,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
896 sleep = 1; 916 sleep = 1;
897 } 917 }
898 918
899 hrtick_start_fair(rq, rq->curr); 919 hrtick_update(rq);
900} 920}
901 921
902/* 922/*
@@ -1002,8 +1022,6 @@ static inline int wake_idle(int cpu, struct task_struct *p)
1002 1022
1003#ifdef CONFIG_SMP 1023#ifdef CONFIG_SMP
1004 1024
1005static const struct sched_class fair_sched_class;
1006
1007#ifdef CONFIG_FAIR_GROUP_SCHED 1025#ifdef CONFIG_FAIR_GROUP_SCHED
1008/* 1026/*
1009 * effective_load() calculates the load change as seen from the root_task_group 1027 * effective_load() calculates the load change as seen from the root_task_group
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 7c9e8f4a049f..fda016218296 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -5,7 +5,7 @@ SCHED_FEAT(START_DEBIT, 1)
5SCHED_FEAT(AFFINE_WAKEUPS, 1) 5SCHED_FEAT(AFFINE_WAKEUPS, 1)
6SCHED_FEAT(CACHE_HOT_BUDDY, 1) 6SCHED_FEAT(CACHE_HOT_BUDDY, 1)
7SCHED_FEAT(SYNC_WAKEUPS, 1) 7SCHED_FEAT(SYNC_WAKEUPS, 1)
8SCHED_FEAT(HRTICK, 1) 8SCHED_FEAT(HRTICK, 0)
9SCHED_FEAT(DOUBLE_TICK, 0) 9SCHED_FEAT(DOUBLE_TICK, 0)
10SCHED_FEAT(ASYM_GRAN, 1) 10SCHED_FEAT(ASYM_GRAN, 1)
11SCHED_FEAT(LB_BIAS, 1) 11SCHED_FEAT(LB_BIAS, 1)
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index b8c156979cf2..2df9d297d292 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -9,7 +9,7 @@
9static int show_schedstat(struct seq_file *seq, void *v) 9static int show_schedstat(struct seq_file *seq, void *v)
10{ 10{
11 int cpu; 11 int cpu;
12 int mask_len = NR_CPUS/32 * 9; 12 int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
13 char *mask_str = kmalloc(mask_len, GFP_KERNEL); 13 char *mask_str = kmalloc(mask_len, GFP_KERNEL);
14 14
15 if (mask_str == NULL) 15 if (mask_str == NULL)
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index af3c7cea258b..8aff79d90ddc 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -37,9 +37,13 @@ struct stop_machine_data {
37/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ 37/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
38static unsigned int num_threads; 38static unsigned int num_threads;
39static atomic_t thread_ack; 39static atomic_t thread_ack;
40static struct completion finished;
41static DEFINE_MUTEX(lock); 40static DEFINE_MUTEX(lock);
42 41
42static struct workqueue_struct *stop_machine_wq;
43static struct stop_machine_data active, idle;
44static const cpumask_t *active_cpus;
45static void *stop_machine_work;
46
43static void set_state(enum stopmachine_state newstate) 47static void set_state(enum stopmachine_state newstate)
44{ 48{
45 /* Reset ack counter. */ 49 /* Reset ack counter. */
@@ -51,21 +55,26 @@ static void set_state(enum stopmachine_state newstate)
51/* Last one to ack a state moves to the next state. */ 55/* Last one to ack a state moves to the next state. */
52static void ack_state(void) 56static void ack_state(void)
53{ 57{
54 if (atomic_dec_and_test(&thread_ack)) { 58 if (atomic_dec_and_test(&thread_ack))
55 /* If we're the last one to ack the EXIT, we're finished. */ 59 set_state(state + 1);
56 if (state == STOPMACHINE_EXIT)
57 complete(&finished);
58 else
59 set_state(state + 1);
60 }
61} 60}
62 61
63/* This is the actual thread which stops the CPU. It exits by itself rather 62/* This is the actual function which stops the CPU. It runs
64 * than waiting for kthread_stop(), because it's easier for hotplug CPU. */ 63 * in the context of a dedicated stopmachine workqueue. */
65static int stop_cpu(struct stop_machine_data *smdata) 64static void stop_cpu(struct work_struct *unused)
66{ 65{
67 enum stopmachine_state curstate = STOPMACHINE_NONE; 66 enum stopmachine_state curstate = STOPMACHINE_NONE;
68 67 struct stop_machine_data *smdata = &idle;
68 int cpu = smp_processor_id();
69 int err;
70
71 if (!active_cpus) {
72 if (cpu == first_cpu(cpu_online_map))
73 smdata = &active;
74 } else {
75 if (cpu_isset(cpu, *active_cpus))
76 smdata = &active;
77 }
69 /* Simple state machine */ 78 /* Simple state machine */
70 do { 79 do {
71 /* Chill out and ensure we re-read stopmachine_state. */ 80 /* Chill out and ensure we re-read stopmachine_state. */
@@ -78,9 +87,11 @@ static int stop_cpu(struct stop_machine_data *smdata)
78 hard_irq_disable(); 87 hard_irq_disable();
79 break; 88 break;
80 case STOPMACHINE_RUN: 89 case STOPMACHINE_RUN:
81 /* |= allows error detection if functions on 90 /* On multiple CPUs only a single error code
82 * multiple CPUs. */ 91 * is needed to tell that something failed. */
83 smdata->fnret |= smdata->fn(smdata->data); 92 err = smdata->fn(smdata->data);
93 if (err)
94 smdata->fnret = err;
84 break; 95 break;
85 default: 96 default:
86 break; 97 break;
@@ -90,7 +101,6 @@ static int stop_cpu(struct stop_machine_data *smdata)
90 } while (curstate != STOPMACHINE_EXIT); 101 } while (curstate != STOPMACHINE_EXIT);
91 102
92 local_irq_enable(); 103 local_irq_enable();
93 do_exit(0);
94} 104}
95 105
96/* Callback for CPUs which aren't supposed to do anything. */ 106/* Callback for CPUs which aren't supposed to do anything. */
@@ -101,78 +111,34 @@ static int chill(void *unused)
101 111
102int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus) 112int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
103{ 113{
104 int i, err; 114 struct work_struct *sm_work;
105 struct stop_machine_data active, idle; 115 int i;
106 struct task_struct **threads;
107 116
117 /* Set up initial state. */
118 mutex_lock(&lock);
119 num_threads = num_online_cpus();
120 active_cpus = cpus;
108 active.fn = fn; 121 active.fn = fn;
109 active.data = data; 122 active.data = data;
110 active.fnret = 0; 123 active.fnret = 0;
111 idle.fn = chill; 124 idle.fn = chill;
112 idle.data = NULL; 125 idle.data = NULL;
113 126
114 /* This could be too big for stack on large machines. */
115 threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
116 if (!threads)
117 return -ENOMEM;
118
119 /* Set up initial state. */
120 mutex_lock(&lock);
121 init_completion(&finished);
122 num_threads = num_online_cpus();
123 set_state(STOPMACHINE_PREPARE); 127 set_state(STOPMACHINE_PREPARE);
124 128
125 for_each_online_cpu(i) { 129 /* Schedule the stop_cpu work on all cpus: hold this CPU so one
126 struct stop_machine_data *smdata = &idle;
127 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
128
129 if (!cpus) {
130 if (i == first_cpu(cpu_online_map))
131 smdata = &active;
132 } else {
133 if (cpu_isset(i, *cpus))
134 smdata = &active;
135 }
136
137 threads[i] = kthread_create((void *)stop_cpu, smdata, "kstop%u",
138 i);
139 if (IS_ERR(threads[i])) {
140 err = PTR_ERR(threads[i]);
141 threads[i] = NULL;
142 goto kill_threads;
143 }
144
145 /* Place it onto correct cpu. */
146 kthread_bind(threads[i], i);
147
148 /* Make it highest prio. */
149 if (sched_setscheduler_nocheck(threads[i], SCHED_FIFO, &param))
150 BUG();
151 }
152
153 /* We've created all the threads. Wake them all: hold this CPU so one
154 * doesn't hit this CPU until we're ready. */ 130 * doesn't hit this CPU until we're ready. */
155 get_cpu(); 131 get_cpu();
156 for_each_online_cpu(i) 132 for_each_online_cpu(i) {
157 wake_up_process(threads[i]); 133 sm_work = percpu_ptr(stop_machine_work, i);
158 134 INIT_WORK(sm_work, stop_cpu);
135 queue_work_on(i, stop_machine_wq, sm_work);
136 }
159 /* This will release the thread on our CPU. */ 137 /* This will release the thread on our CPU. */
160 put_cpu(); 138 put_cpu();
161 wait_for_completion(&finished); 139 flush_workqueue(stop_machine_wq);
162 mutex_unlock(&lock); 140 mutex_unlock(&lock);
163
164 kfree(threads);
165
166 return active.fnret; 141 return active.fnret;
167
168kill_threads:
169 for_each_online_cpu(i)
170 if (threads[i])
171 kthread_stop(threads[i]);
172 mutex_unlock(&lock);
173
174 kfree(threads);
175 return err;
176} 142}
177 143
178int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus) 144int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
@@ -187,3 +153,11 @@ int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
187 return ret; 153 return ret;
188} 154}
189EXPORT_SYMBOL_GPL(stop_machine); 155EXPORT_SYMBOL_GPL(stop_machine);
156
157static int __init stop_machine_init(void)
158{
159 stop_machine_wq = create_rt_workqueue("kstop");
160 stop_machine_work = alloc_percpu(struct work_struct);
161 return 0;
162}
163early_initcall(stop_machine_init);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b3cc73931d1f..a13bd4dfaeb1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -276,6 +276,16 @@ static struct ctl_table kern_table[] = {
276 }, 276 },
277 { 277 {
278 .ctl_name = CTL_UNNUMBERED, 278 .ctl_name = CTL_UNNUMBERED,
279 .procname = "sched_shares_thresh",
280 .data = &sysctl_sched_shares_thresh,
281 .maxlen = sizeof(unsigned int),
282 .mode = 0644,
283 .proc_handler = &proc_dointvec_minmax,
284 .strategy = &sysctl_intvec,
285 .extra1 = &zero,
286 },
287 {
288 .ctl_name = CTL_UNNUMBERED,
279 .procname = "sched_child_runs_first", 289 .procname = "sched_child_runs_first",
280 .data = &sysctl_sched_child_runs_first, 290 .data = &sysctl_sched_child_runs_first,
281 .maxlen = sizeof(unsigned int), 291 .maxlen = sizeof(unsigned int),
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 0581c11fe6c6..727c1ae0517a 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -567,11 +567,21 @@ static void tick_nohz_switch_to_nohz(void)
567static void tick_nohz_kick_tick(int cpu) 567static void tick_nohz_kick_tick(int cpu)
568{ 568{
569 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 569 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
570 ktime_t delta, now;
570 571
571 if (!ts->tick_stopped) 572 if (!ts->tick_stopped)
572 return; 573 return;
573 574
574 tick_nohz_restart(ts, ktime_get()); 575 /*
576 * Do not touch the tick device, when the next expiry is either
577 * already reached or less/equal than the tick period.
578 */
579 now = ktime_get();
580 delta = ktime_sub(ts->sched_timer.expires, now);
581 if (delta.tv64 <= tick_period.tv64)
582 return;
583
584 tick_nohz_restart(ts, now);
575} 585}
576 586
577#else 587#else
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 714afad46539..f928f2a87b9b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -62,6 +62,7 @@ struct workqueue_struct {
62 const char *name; 62 const char *name;
63 int singlethread; 63 int singlethread;
64 int freezeable; /* Freeze threads during suspend */ 64 int freezeable; /* Freeze threads during suspend */
65 int rt;
65#ifdef CONFIG_LOCKDEP 66#ifdef CONFIG_LOCKDEP
66 struct lockdep_map lockdep_map; 67 struct lockdep_map lockdep_map;
67#endif 68#endif
@@ -766,6 +767,7 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
766 767
767static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) 768static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
768{ 769{
770 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
769 struct workqueue_struct *wq = cwq->wq; 771 struct workqueue_struct *wq = cwq->wq;
770 const char *fmt = is_single_threaded(wq) ? "%s" : "%s/%d"; 772 const char *fmt = is_single_threaded(wq) ? "%s" : "%s/%d";
771 struct task_struct *p; 773 struct task_struct *p;
@@ -781,7 +783,8 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
781 */ 783 */
782 if (IS_ERR(p)) 784 if (IS_ERR(p))
783 return PTR_ERR(p); 785 return PTR_ERR(p);
784 786 if (cwq->wq->rt)
787 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
785 cwq->thread = p; 788 cwq->thread = p;
786 789
787 return 0; 790 return 0;
@@ -801,6 +804,7 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
801struct workqueue_struct *__create_workqueue_key(const char *name, 804struct workqueue_struct *__create_workqueue_key(const char *name,
802 int singlethread, 805 int singlethread,
803 int freezeable, 806 int freezeable,
807 int rt,
804 struct lock_class_key *key, 808 struct lock_class_key *key,
805 const char *lock_name) 809 const char *lock_name)
806{ 810{
@@ -822,6 +826,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
822 lockdep_init_map(&wq->lockdep_map, lock_name, key, 0); 826 lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
823 wq->singlethread = singlethread; 827 wq->singlethread = singlethread;
824 wq->freezeable = freezeable; 828 wq->freezeable = freezeable;
829 wq->rt = rt;
825 INIT_LIST_HEAD(&wq->list); 830 INIT_LIST_HEAD(&wq->list);
826 831
827 if (singlethread) { 832 if (singlethread) {