aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/capability.c16
-rw-r--r--kernel/cpu.c3
-rw-r--r--kernel/cpuset.c152
-rw-r--r--kernel/exec_domain.c1
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/irq/Makefile3
-rw-r--r--kernel/irq/manage.c23
-rw-r--r--kernel/irq/migration.c65
-rw-r--r--kernel/itimer.c103
-rw-r--r--kernel/ksysfs.c4
-rw-r--r--kernel/kthread.c2
-rw-r--r--kernel/module.c202
-rw-r--r--kernel/params.c12
-rw-r--r--kernel/power/smp.c4
-rw-r--r--kernel/printk.c76
-rw-r--r--kernel/rcupdate.c5
-rw-r--r--kernel/rcutorture.c33
-rw-r--r--kernel/softlockup.c55
-rw-r--r--kernel/sys.c68
-rw-r--r--kernel/sysctl.c19
-rw-r--r--kernel/time.c59
-rw-r--r--kernel/timer.c74
-rw-r--r--kernel/user.c10
23 files changed, 549 insertions, 441 deletions
diff --git a/kernel/capability.c b/kernel/capability.c
index bfa3c92e16f2..1a4d8a40d3f9 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -233,3 +233,19 @@ out:
233 233
234 return ret; 234 return ret;
235} 235}
236
237int __capable(struct task_struct *t, int cap)
238{
239 if (security_capable(t, cap) == 0) {
240 t->flags |= PF_SUPERPRIV;
241 return 1;
242 }
243 return 0;
244}
245EXPORT_SYMBOL(__capable);
246
247int capable(int cap)
248{
249 return __capable(current, cap);
250}
251EXPORT_SYMBOL(capable);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index e882c6babf41..8be22bd80933 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -223,8 +223,7 @@ int __devinit cpu_up(unsigned int cpu)
223 ret = __cpu_up(cpu); 223 ret = __cpu_up(cpu);
224 if (ret != 0) 224 if (ret != 0)
225 goto out_notify; 225 goto out_notify;
226 if (!cpu_online(cpu)) 226 BUG_ON(!cpu_online(cpu));
227 BUG();
228 227
229 /* Now call notifier in preparation. */ 228 /* Now call notifier in preparation. */
230 notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu); 229 notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index c86ee051b734..18aea1bd1284 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -4,15 +4,14 @@
4 * Processor and Memory placement constraints for sets of tasks. 4 * Processor and Memory placement constraints for sets of tasks.
5 * 5 *
6 * Copyright (C) 2003 BULL SA. 6 * Copyright (C) 2003 BULL SA.
7 * Copyright (C) 2004 Silicon Graphics, Inc. 7 * Copyright (C) 2004-2006 Silicon Graphics, Inc.
8 * 8 *
9 * Portions derived from Patrick Mochel's sysfs code. 9 * Portions derived from Patrick Mochel's sysfs code.
10 * sysfs is Copyright (c) 2001-3 Patrick Mochel 10 * sysfs is Copyright (c) 2001-3 Patrick Mochel
11 * Portions Copyright (c) 2004 Silicon Graphics, Inc.
12 * 11 *
13 * 2003-10-10 Written by Simon Derr <simon.derr@bull.net> 12 * 2003-10-10 Written by Simon Derr.
14 * 2003-10-22 Updates by Stephen Hemminger. 13 * 2003-10-22 Updates by Stephen Hemminger.
15 * 2004 May-July Rework by Paul Jackson <pj@sgi.com> 14 * 2004 May-July Rework by Paul Jackson.
16 * 15 *
17 * This file is subject to the terms and conditions of the GNU General Public 16 * This file is subject to the terms and conditions of the GNU General Public
18 * License. See the file COPYING in the main directory of the Linux 17 * License. See the file COPYING in the main directory of the Linux
@@ -108,37 +107,49 @@ typedef enum {
108 CS_MEM_EXCLUSIVE, 107 CS_MEM_EXCLUSIVE,
109 CS_MEMORY_MIGRATE, 108 CS_MEMORY_MIGRATE,
110 CS_REMOVED, 109 CS_REMOVED,
111 CS_NOTIFY_ON_RELEASE 110 CS_NOTIFY_ON_RELEASE,
111 CS_SPREAD_PAGE,
112 CS_SPREAD_SLAB,
112} cpuset_flagbits_t; 113} cpuset_flagbits_t;
113 114
114/* convenient tests for these bits */ 115/* convenient tests for these bits */
115static inline int is_cpu_exclusive(const struct cpuset *cs) 116static inline int is_cpu_exclusive(const struct cpuset *cs)
116{ 117{
117 return !!test_bit(CS_CPU_EXCLUSIVE, &cs->flags); 118 return test_bit(CS_CPU_EXCLUSIVE, &cs->flags);
118} 119}
119 120
120static inline int is_mem_exclusive(const struct cpuset *cs) 121static inline int is_mem_exclusive(const struct cpuset *cs)
121{ 122{
122 return !!test_bit(CS_MEM_EXCLUSIVE, &cs->flags); 123 return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
123} 124}
124 125
125static inline int is_removed(const struct cpuset *cs) 126static inline int is_removed(const struct cpuset *cs)
126{ 127{
127 return !!test_bit(CS_REMOVED, &cs->flags); 128 return test_bit(CS_REMOVED, &cs->flags);
128} 129}
129 130
130static inline int notify_on_release(const struct cpuset *cs) 131static inline int notify_on_release(const struct cpuset *cs)
131{ 132{
132 return !!test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); 133 return test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
133} 134}
134 135
135static inline int is_memory_migrate(const struct cpuset *cs) 136static inline int is_memory_migrate(const struct cpuset *cs)
136{ 137{
137 return !!test_bit(CS_MEMORY_MIGRATE, &cs->flags); 138 return test_bit(CS_MEMORY_MIGRATE, &cs->flags);
139}
140
141static inline int is_spread_page(const struct cpuset *cs)
142{
143 return test_bit(CS_SPREAD_PAGE, &cs->flags);
144}
145
146static inline int is_spread_slab(const struct cpuset *cs)
147{
148 return test_bit(CS_SPREAD_SLAB, &cs->flags);
138} 149}
139 150
140/* 151/*
141 * Increment this atomic integer everytime any cpuset changes its 152 * Increment this integer everytime any cpuset changes its
142 * mems_allowed value. Users of cpusets can track this generation 153 * mems_allowed value. Users of cpusets can track this generation
143 * number, and avoid having to lock and reload mems_allowed unless 154 * number, and avoid having to lock and reload mems_allowed unless
144 * the cpuset they're using changes generation. 155 * the cpuset they're using changes generation.
@@ -152,8 +163,11 @@ static inline int is_memory_migrate(const struct cpuset *cs)
152 * on every visit to __alloc_pages(), to efficiently check whether 163 * on every visit to __alloc_pages(), to efficiently check whether
153 * its current->cpuset->mems_allowed has changed, requiring an update 164 * its current->cpuset->mems_allowed has changed, requiring an update
154 * of its current->mems_allowed. 165 * of its current->mems_allowed.
166 *
167 * Since cpuset_mems_generation is guarded by manage_mutex,
168 * there is no need to mark it atomic.
155 */ 169 */
156static atomic_t cpuset_mems_generation = ATOMIC_INIT(1); 170static int cpuset_mems_generation;
157 171
158static struct cpuset top_cpuset = { 172static struct cpuset top_cpuset = {
159 .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), 173 .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
@@ -657,6 +671,14 @@ void cpuset_update_task_memory_state(void)
657 cs = tsk->cpuset; /* Maybe changed when task not locked */ 671 cs = tsk->cpuset; /* Maybe changed when task not locked */
658 guarantee_online_mems(cs, &tsk->mems_allowed); 672 guarantee_online_mems(cs, &tsk->mems_allowed);
659 tsk->cpuset_mems_generation = cs->mems_generation; 673 tsk->cpuset_mems_generation = cs->mems_generation;
674 if (is_spread_page(cs))
675 tsk->flags |= PF_SPREAD_PAGE;
676 else
677 tsk->flags &= ~PF_SPREAD_PAGE;
678 if (is_spread_slab(cs))
679 tsk->flags |= PF_SPREAD_SLAB;
680 else
681 tsk->flags &= ~PF_SPREAD_SLAB;
660 task_unlock(tsk); 682 task_unlock(tsk);
661 mutex_unlock(&callback_mutex); 683 mutex_unlock(&callback_mutex);
662 mpol_rebind_task(tsk, &tsk->mems_allowed); 684 mpol_rebind_task(tsk, &tsk->mems_allowed);
@@ -858,8 +880,7 @@ static int update_nodemask(struct cpuset *cs, char *buf)
858 880
859 mutex_lock(&callback_mutex); 881 mutex_lock(&callback_mutex);
860 cs->mems_allowed = trialcs.mems_allowed; 882 cs->mems_allowed = trialcs.mems_allowed;
861 atomic_inc(&cpuset_mems_generation); 883 cs->mems_generation = cpuset_mems_generation++;
862 cs->mems_generation = atomic_read(&cpuset_mems_generation);
863 mutex_unlock(&callback_mutex); 884 mutex_unlock(&callback_mutex);
864 885
865 set_cpuset_being_rebound(cs); /* causes mpol_copy() rebind */ 886 set_cpuset_being_rebound(cs); /* causes mpol_copy() rebind */
@@ -957,7 +978,8 @@ static int update_memory_pressure_enabled(struct cpuset *cs, char *buf)
957/* 978/*
958 * update_flag - read a 0 or a 1 in a file and update associated flag 979 * update_flag - read a 0 or a 1 in a file and update associated flag
959 * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, 980 * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
960 * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE) 981 * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE,
982 * CS_SPREAD_PAGE, CS_SPREAD_SLAB)
961 * cs: the cpuset to update 983 * cs: the cpuset to update
962 * buf: the buffer where we read the 0 or 1 984 * buf: the buffer where we read the 0 or 1
963 * 985 *
@@ -1188,6 +1210,8 @@ typedef enum {
1188 FILE_NOTIFY_ON_RELEASE, 1210 FILE_NOTIFY_ON_RELEASE,
1189 FILE_MEMORY_PRESSURE_ENABLED, 1211 FILE_MEMORY_PRESSURE_ENABLED,
1190 FILE_MEMORY_PRESSURE, 1212 FILE_MEMORY_PRESSURE,
1213 FILE_SPREAD_PAGE,
1214 FILE_SPREAD_SLAB,
1191 FILE_TASKLIST, 1215 FILE_TASKLIST,
1192} cpuset_filetype_t; 1216} cpuset_filetype_t;
1193 1217
@@ -1247,6 +1271,14 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
1247 case FILE_MEMORY_PRESSURE: 1271 case FILE_MEMORY_PRESSURE:
1248 retval = -EACCES; 1272 retval = -EACCES;
1249 break; 1273 break;
1274 case FILE_SPREAD_PAGE:
1275 retval = update_flag(CS_SPREAD_PAGE, cs, buffer);
1276 cs->mems_generation = cpuset_mems_generation++;
1277 break;
1278 case FILE_SPREAD_SLAB:
1279 retval = update_flag(CS_SPREAD_SLAB, cs, buffer);
1280 cs->mems_generation = cpuset_mems_generation++;
1281 break;
1250 case FILE_TASKLIST: 1282 case FILE_TASKLIST:
1251 retval = attach_task(cs, buffer, &pathbuf); 1283 retval = attach_task(cs, buffer, &pathbuf);
1252 break; 1284 break;
@@ -1356,6 +1388,12 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
1356 case FILE_MEMORY_PRESSURE: 1388 case FILE_MEMORY_PRESSURE:
1357 s += sprintf(s, "%d", fmeter_getrate(&cs->fmeter)); 1389 s += sprintf(s, "%d", fmeter_getrate(&cs->fmeter));
1358 break; 1390 break;
1391 case FILE_SPREAD_PAGE:
1392 *s++ = is_spread_page(cs) ? '1' : '0';
1393 break;
1394 case FILE_SPREAD_SLAB:
1395 *s++ = is_spread_slab(cs) ? '1' : '0';
1396 break;
1359 default: 1397 default:
1360 retval = -EINVAL; 1398 retval = -EINVAL;
1361 goto out; 1399 goto out;
@@ -1719,6 +1757,16 @@ static struct cftype cft_memory_pressure = {
1719 .private = FILE_MEMORY_PRESSURE, 1757 .private = FILE_MEMORY_PRESSURE,
1720}; 1758};
1721 1759
1760static struct cftype cft_spread_page = {
1761 .name = "memory_spread_page",
1762 .private = FILE_SPREAD_PAGE,
1763};
1764
1765static struct cftype cft_spread_slab = {
1766 .name = "memory_spread_slab",
1767 .private = FILE_SPREAD_SLAB,
1768};
1769
1722static int cpuset_populate_dir(struct dentry *cs_dentry) 1770static int cpuset_populate_dir(struct dentry *cs_dentry)
1723{ 1771{
1724 int err; 1772 int err;
@@ -1737,6 +1785,10 @@ static int cpuset_populate_dir(struct dentry *cs_dentry)
1737 return err; 1785 return err;
1738 if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0) 1786 if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0)
1739 return err; 1787 return err;
1788 if ((err = cpuset_add_file(cs_dentry, &cft_spread_page)) < 0)
1789 return err;
1790 if ((err = cpuset_add_file(cs_dentry, &cft_spread_slab)) < 0)
1791 return err;
1740 if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0) 1792 if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0)
1741 return err; 1793 return err;
1742 return 0; 1794 return 0;
@@ -1765,13 +1817,16 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode)
1765 cs->flags = 0; 1817 cs->flags = 0;
1766 if (notify_on_release(parent)) 1818 if (notify_on_release(parent))
1767 set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); 1819 set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
1820 if (is_spread_page(parent))
1821 set_bit(CS_SPREAD_PAGE, &cs->flags);
1822 if (is_spread_slab(parent))
1823 set_bit(CS_SPREAD_SLAB, &cs->flags);
1768 cs->cpus_allowed = CPU_MASK_NONE; 1824 cs->cpus_allowed = CPU_MASK_NONE;
1769 cs->mems_allowed = NODE_MASK_NONE; 1825 cs->mems_allowed = NODE_MASK_NONE;
1770 atomic_set(&cs->count, 0); 1826 atomic_set(&cs->count, 0);
1771 INIT_LIST_HEAD(&cs->sibling); 1827 INIT_LIST_HEAD(&cs->sibling);
1772 INIT_LIST_HEAD(&cs->children); 1828 INIT_LIST_HEAD(&cs->children);
1773 atomic_inc(&cpuset_mems_generation); 1829 cs->mems_generation = cpuset_mems_generation++;
1774 cs->mems_generation = atomic_read(&cpuset_mems_generation);
1775 fmeter_init(&cs->fmeter); 1830 fmeter_init(&cs->fmeter);
1776 1831
1777 cs->parent = parent; 1832 cs->parent = parent;
@@ -1861,7 +1916,7 @@ int __init cpuset_init_early(void)
1861 struct task_struct *tsk = current; 1916 struct task_struct *tsk = current;
1862 1917
1863 tsk->cpuset = &top_cpuset; 1918 tsk->cpuset = &top_cpuset;
1864 tsk->cpuset->mems_generation = atomic_read(&cpuset_mems_generation); 1919 tsk->cpuset->mems_generation = cpuset_mems_generation++;
1865 return 0; 1920 return 0;
1866} 1921}
1867 1922
@@ -1880,8 +1935,7 @@ int __init cpuset_init(void)
1880 top_cpuset.mems_allowed = NODE_MASK_ALL; 1935 top_cpuset.mems_allowed = NODE_MASK_ALL;
1881 1936
1882 fmeter_init(&top_cpuset.fmeter); 1937 fmeter_init(&top_cpuset.fmeter);
1883 atomic_inc(&cpuset_mems_generation); 1938 top_cpuset.mems_generation = cpuset_mems_generation++;
1884 top_cpuset.mems_generation = atomic_read(&cpuset_mems_generation);
1885 1939
1886 init_task.cpuset = &top_cpuset; 1940 init_task.cpuset = &top_cpuset;
1887 1941
@@ -1972,7 +2026,7 @@ void cpuset_fork(struct task_struct *child)
1972 * because tsk is already marked PF_EXITING, so attach_task() won't 2026 * because tsk is already marked PF_EXITING, so attach_task() won't
1973 * mess with it, or task is a failed fork, never visible to attach_task. 2027 * mess with it, or task is a failed fork, never visible to attach_task.
1974 * 2028 *
1975 * Hack: 2029 * the_top_cpuset_hack:
1976 * 2030 *
1977 * Set the exiting tasks cpuset to the root cpuset (top_cpuset). 2031 * Set the exiting tasks cpuset to the root cpuset (top_cpuset).
1978 * 2032 *
@@ -2011,7 +2065,7 @@ void cpuset_exit(struct task_struct *tsk)
2011 struct cpuset *cs; 2065 struct cpuset *cs;
2012 2066
2013 cs = tsk->cpuset; 2067 cs = tsk->cpuset;
2014 tsk->cpuset = &top_cpuset; /* Hack - see comment above */ 2068 tsk->cpuset = &top_cpuset; /* the_top_cpuset_hack - see above */
2015 2069
2016 if (notify_on_release(cs)) { 2070 if (notify_on_release(cs)) {
2017 char *pathbuf = NULL; 2071 char *pathbuf = NULL;
@@ -2151,7 +2205,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
2151{ 2205{
2152 int node; /* node that zone z is on */ 2206 int node; /* node that zone z is on */
2153 const struct cpuset *cs; /* current cpuset ancestors */ 2207 const struct cpuset *cs; /* current cpuset ancestors */
2154 int allowed = 1; /* is allocation in zone z allowed? */ 2208 int allowed; /* is allocation in zone z allowed? */
2155 2209
2156 if (in_interrupt()) 2210 if (in_interrupt())
2157 return 1; 2211 return 1;
@@ -2204,6 +2258,44 @@ void cpuset_unlock(void)
2204} 2258}
2205 2259
2206/** 2260/**
2261 * cpuset_mem_spread_node() - On which node to begin search for a page
2262 *
2263 * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for
2264 * tasks in a cpuset with is_spread_page or is_spread_slab set),
2265 * and if the memory allocation used cpuset_mem_spread_node()
2266 * to determine on which node to start looking, as it will for
2267 * certain page cache or slab cache pages such as used for file
2268 * system buffers and inode caches, then instead of starting on the
2269 * local node to look for a free page, rather spread the starting
2270 * node around the tasks mems_allowed nodes.
2271 *
2272 * We don't have to worry about the returned node being offline
2273 * because "it can't happen", and even if it did, it would be ok.
2274 *
2275 * The routines calling guarantee_online_mems() are careful to
2276 * only set nodes in task->mems_allowed that are online. So it
2277 * should not be possible for the following code to return an
2278 * offline node. But if it did, that would be ok, as this routine
2279 * is not returning the node where the allocation must be, only
2280 * the node where the search should start. The zonelist passed to
2281 * __alloc_pages() will include all nodes. If the slab allocator
2282 * is passed an offline node, it will fall back to the local node.
2283 * See kmem_cache_alloc_node().
2284 */
2285
2286int cpuset_mem_spread_node(void)
2287{
2288 int node;
2289
2290 node = next_node(current->cpuset_mem_spread_rotor, current->mems_allowed);
2291 if (node == MAX_NUMNODES)
2292 node = first_node(current->mems_allowed);
2293 current->cpuset_mem_spread_rotor = node;
2294 return node;
2295}
2296EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
2297
2298/**
2207 * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors? 2299 * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors?
2208 * @p: pointer to task_struct of some other task. 2300 * @p: pointer to task_struct of some other task.
2209 * 2301 *
@@ -2284,12 +2376,12 @@ void __cpuset_memory_pressure_bump(void)
2284 * - No need to task_lock(tsk) on this tsk->cpuset reference, as it 2376 * - No need to task_lock(tsk) on this tsk->cpuset reference, as it
2285 * doesn't really matter if tsk->cpuset changes after we read it, 2377 * doesn't really matter if tsk->cpuset changes after we read it,
2286 * and we take manage_mutex, keeping attach_task() from changing it 2378 * and we take manage_mutex, keeping attach_task() from changing it
2287 * anyway. 2379 * anyway. No need to check that tsk->cpuset != NULL, thanks to
2380 * the_top_cpuset_hack in cpuset_exit(), which sets an exiting tasks
2381 * cpuset to top_cpuset.
2288 */ 2382 */
2289
2290static int proc_cpuset_show(struct seq_file *m, void *v) 2383static int proc_cpuset_show(struct seq_file *m, void *v)
2291{ 2384{
2292 struct cpuset *cs;
2293 struct task_struct *tsk; 2385 struct task_struct *tsk;
2294 char *buf; 2386 char *buf;
2295 int retval = 0; 2387 int retval = 0;
@@ -2300,13 +2392,7 @@ static int proc_cpuset_show(struct seq_file *m, void *v)
2300 2392
2301 tsk = m->private; 2393 tsk = m->private;
2302 mutex_lock(&manage_mutex); 2394 mutex_lock(&manage_mutex);
2303 cs = tsk->cpuset; 2395 retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE);
2304 if (!cs) {
2305 retval = -EINVAL;
2306 goto out;
2307 }
2308
2309 retval = cpuset_path(cs, buf, PAGE_SIZE);
2310 if (retval < 0) 2396 if (retval < 0)
2311 goto out; 2397 goto out;
2312 seq_puts(m, buf); 2398 seq_puts(m, buf);
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index 867d6dbeb574..c01cead2cfd6 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -140,6 +140,7 @@ __set_personality(u_long personality)
140 ep = lookup_exec_domain(personality); 140 ep = lookup_exec_domain(personality);
141 if (ep == current_thread_info()->exec_domain) { 141 if (ep == current_thread_info()->exec_domain) {
142 current->personality = personality; 142 current->personality = personality;
143 module_put(ep->module);
143 return 0; 144 return 0;
144 } 145 }
145 146
diff --git a/kernel/fork.c b/kernel/fork.c
index c21bae8c93b9..a02063903aaa 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1021,6 +1021,7 @@ static task_t *copy_process(unsigned long clone_flags,
1021 p->mempolicy = NULL; 1021 p->mempolicy = NULL;
1022 goto bad_fork_cleanup_cpuset; 1022 goto bad_fork_cleanup_cpuset;
1023 } 1023 }
1024 mpol_fix_fork_child_flag(p);
1024#endif 1025#endif
1025 1026
1026#ifdef CONFIG_DEBUG_MUTEXES 1027#ifdef CONFIG_DEBUG_MUTEXES
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 49378738ff5e..2b33f852be3e 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -1,5 +1,4 @@
1 1
2obj-y := handle.o manage.o spurious.o 2obj-y := handle.o manage.o spurious.o migration.o
3obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o 3obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
4obj-$(CONFIG_PROC_FS) += proc.o 4obj-$(CONFIG_PROC_FS) += proc.o
5
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 97d5559997d2..6edfcef291e8 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -204,10 +204,14 @@ int setup_irq(unsigned int irq, struct irqaction * new)
204 p = &desc->action; 204 p = &desc->action;
205 if ((old = *p) != NULL) { 205 if ((old = *p) != NULL) {
206 /* Can't share interrupts unless both agree to */ 206 /* Can't share interrupts unless both agree to */
207 if (!(old->flags & new->flags & SA_SHIRQ)) { 207 if (!(old->flags & new->flags & SA_SHIRQ))
208 spin_unlock_irqrestore(&desc->lock,flags); 208 goto mismatch;
209 return -EBUSY; 209
210 } 210#if defined(ARCH_HAS_IRQ_PER_CPU) && defined(SA_PERCPU_IRQ)
211 /* All handlers must agree on per-cpuness */
212 if ((old->flags & IRQ_PER_CPU) != (new->flags & IRQ_PER_CPU))
213 goto mismatch;
214#endif
211 215
212 /* add new interrupt at end of irq queue */ 216 /* add new interrupt at end of irq queue */
213 do { 217 do {
@@ -218,7 +222,10 @@ int setup_irq(unsigned int irq, struct irqaction * new)
218 } 222 }
219 223
220 *p = new; 224 *p = new;
221 225#if defined(ARCH_HAS_IRQ_PER_CPU) && defined(SA_PERCPU_IRQ)
226 if (new->flags & SA_PERCPU_IRQ)
227 desc->status |= IRQ_PER_CPU;
228#endif
222 if (!shared) { 229 if (!shared) {
223 desc->depth = 0; 230 desc->depth = 0;
224 desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | 231 desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT |
@@ -236,6 +243,12 @@ int setup_irq(unsigned int irq, struct irqaction * new)
236 register_handler_proc(irq, new); 243 register_handler_proc(irq, new);
237 244
238 return 0; 245 return 0;
246
247mismatch:
248 spin_unlock_irqrestore(&desc->lock, flags);
249 printk(KERN_ERR "%s: irq handler mismatch\n", __FUNCTION__);
250 dump_stack();
251 return -EBUSY;
239} 252}
240 253
241/** 254/**
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
new file mode 100644
index 000000000000..52a8655fa080
--- /dev/null
+++ b/kernel/irq/migration.c
@@ -0,0 +1,65 @@
1#include <linux/irq.h>
2
3#if defined(CONFIG_GENERIC_PENDING_IRQ)
4
5void set_pending_irq(unsigned int irq, cpumask_t mask)
6{
7 irq_desc_t *desc = irq_desc + irq;
8 unsigned long flags;
9
10 spin_lock_irqsave(&desc->lock, flags);
11 desc->move_irq = 1;
12 pending_irq_cpumask[irq] = mask;
13 spin_unlock_irqrestore(&desc->lock, flags);
14}
15
16void move_native_irq(int irq)
17{
18 cpumask_t tmp;
19 irq_desc_t *desc = irq_descp(irq);
20
21 if (likely(!desc->move_irq))
22 return;
23
24 /*
25 * Paranoia: cpu-local interrupts shouldn't be calling in here anyway.
26 */
27 if (CHECK_IRQ_PER_CPU(desc->status)) {
28 WARN_ON(1);
29 return;
30 }
31
32 desc->move_irq = 0;
33
34 if (likely(cpus_empty(pending_irq_cpumask[irq])))
35 return;
36
37 if (!desc->handler->set_affinity)
38 return;
39
40 assert_spin_locked(&desc->lock);
41
42 cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map);
43
44 /*
45 * If there was a valid mask to work with, please
46 * do the disable, re-program, enable sequence.
47 * This is *not* particularly important for level triggered
48 * but in a edge trigger case, we might be setting rte
49 * when an active trigger is comming in. This could
50 * cause some ioapics to mal-function.
51 * Being paranoid i guess!
52 */
53 if (unlikely(!cpus_empty(tmp))) {
54 if (likely(!(desc->status & IRQ_DISABLED)))
55 desc->handler->disable(irq);
56
57 desc->handler->set_affinity(irq,tmp);
58
59 if (likely(!(desc->status & IRQ_DISABLED)))
60 desc->handler->enable(irq);
61 }
62 cpus_clear(pending_irq_cpumask[irq]);
63}
64
65#endif
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 379be2f8c84c..680e6b70c872 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -143,6 +143,60 @@ int it_real_fn(void *data)
143 return HRTIMER_NORESTART; 143 return HRTIMER_NORESTART;
144} 144}
145 145
146/*
147 * We do not care about correctness. We just sanitize the values so
148 * the ktime_t operations which expect normalized values do not
149 * break. This converts negative values to long timeouts similar to
150 * the code in kernel versions < 2.6.16
151 *
152 * Print a limited number of warning messages when an invalid timeval
153 * is detected.
154 */
155static void fixup_timeval(struct timeval *tv, int interval)
156{
157 static int warnlimit = 10;
158 unsigned long tmp;
159
160 if (warnlimit > 0) {
161 warnlimit--;
162 printk(KERN_WARNING
163 "setitimer: %s (pid = %d) provided "
164 "invalid timeval %s: tv_sec = %ld tv_usec = %ld\n",
165 current->comm, current->pid,
166 interval ? "it_interval" : "it_value",
167 tv->tv_sec, (long) tv->tv_usec);
168 }
169
170 tmp = tv->tv_usec;
171 if (tmp >= USEC_PER_SEC) {
172 tv->tv_usec = tmp % USEC_PER_SEC;
173 tv->tv_sec += tmp / USEC_PER_SEC;
174 }
175
176 tmp = tv->tv_sec;
177 if (tmp > LONG_MAX)
178 tv->tv_sec = LONG_MAX;
179}
180
181/*
182 * Returns true if the timeval is in canonical form
183 */
184#define timeval_valid(t) \
185 (((t)->tv_sec >= 0) && (((unsigned long) (t)->tv_usec) < USEC_PER_SEC))
186
187/*
188 * Check for invalid timevals, sanitize them and print a limited
189 * number of warnings.
190 */
191static void check_itimerval(struct itimerval *value) {
192
193 if (unlikely(!timeval_valid(&value->it_value)))
194 fixup_timeval(&value->it_value, 0);
195
196 if (unlikely(!timeval_valid(&value->it_interval)))
197 fixup_timeval(&value->it_interval, 1);
198}
199
146int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) 200int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
147{ 201{
148 struct task_struct *tsk = current; 202 struct task_struct *tsk = current;
@@ -150,6 +204,18 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
150 ktime_t expires; 204 ktime_t expires;
151 cputime_t cval, cinterval, nval, ninterval; 205 cputime_t cval, cinterval, nval, ninterval;
152 206
207 /*
208 * Validate the timevals in value.
209 *
210 * Note: Although the spec requires that invalid values shall
211 * return -EINVAL, we just fixup the value and print a limited
212 * number of warnings in order not to break users of this
213 * historical misfeature.
214 *
215 * Scheduled for replacement in March 2007
216 */
217 check_itimerval(value);
218
153 switch (which) { 219 switch (which) {
154 case ITIMER_REAL: 220 case ITIMER_REAL:
155again: 221again:
@@ -226,6 +292,43 @@ again:
226 return 0; 292 return 0;
227} 293}
228 294
295/**
296 * alarm_setitimer - set alarm in seconds
297 *
298 * @seconds: number of seconds until alarm
299 * 0 disables the alarm
300 *
301 * Returns the remaining time in seconds of a pending timer or 0 when
302 * the timer is not active.
303 *
304 * On 32 bit machines the seconds value is limited to (INT_MAX/2) to avoid
305 * negative timeval settings which would cause immediate expiry.
306 */
307unsigned int alarm_setitimer(unsigned int seconds)
308{
309 struct itimerval it_new, it_old;
310
311#if BITS_PER_LONG < 64
312 if (seconds > INT_MAX)
313 seconds = INT_MAX;
314#endif
315 it_new.it_value.tv_sec = seconds;
316 it_new.it_value.tv_usec = 0;
317 it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
318
319 do_setitimer(ITIMER_REAL, &it_new, &it_old);
320
321 /*
322 * We can't return 0 if we have an alarm pending ... And we'd
323 * better return too much than too little anyway
324 */
325 if ((!it_old.it_value.tv_sec && it_old.it_value.tv_usec) ||
326 it_old.it_value.tv_usec >= 500000)
327 it_old.it_value.tv_sec++;
328
329 return it_old.it_value.tv_sec;
330}
331
229asmlinkage long sys_setitimer(int which, 332asmlinkage long sys_setitimer(int which,
230 struct itimerval __user *value, 333 struct itimerval __user *value,
231 struct itimerval __user *ovalue) 334 struct itimerval __user *ovalue)
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index f2690ed74530..f119e098e67b 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -22,7 +22,7 @@ static struct subsys_attribute _name##_attr = __ATTR_RO(_name)
22static struct subsys_attribute _name##_attr = \ 22static struct subsys_attribute _name##_attr = \
23 __ATTR(_name, 0644, _name##_show, _name##_store) 23 __ATTR(_name, 0644, _name##_show, _name##_store)
24 24
25#ifdef CONFIG_HOTPLUG 25#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
26/* current uevent sequence number */ 26/* current uevent sequence number */
27static ssize_t uevent_seqnum_show(struct subsystem *subsys, char *page) 27static ssize_t uevent_seqnum_show(struct subsystem *subsys, char *page)
28{ 28{
@@ -52,7 +52,7 @@ decl_subsys(kernel, NULL, NULL);
52EXPORT_SYMBOL_GPL(kernel_subsys); 52EXPORT_SYMBOL_GPL(kernel_subsys);
53 53
54static struct attribute * kernel_attrs[] = { 54static struct attribute * kernel_attrs[] = {
55#ifdef CONFIG_HOTPLUG 55#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
56 &uevent_seqnum_attr.attr, 56 &uevent_seqnum_attr.attr,
57 &uevent_helper_attr.attr, 57 &uevent_helper_attr.attr,
58#endif 58#endif
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 6a5373868a98..c5f3c6613b6d 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -115,7 +115,9 @@ static void keventd_create_kthread(void *_create)
115 create->result = ERR_PTR(pid); 115 create->result = ERR_PTR(pid);
116 } else { 116 } else {
117 wait_for_completion(&create->started); 117 wait_for_completion(&create->started);
118 read_lock(&tasklist_lock);
118 create->result = find_task_by_pid(pid); 119 create->result = find_task_by_pid(pid);
120 read_unlock(&tasklist_lock);
119 } 121 }
120 complete(&create->done); 122 complete(&create->done);
121} 123}
diff --git a/kernel/module.c b/kernel/module.c
index fb404299082e..ddfe45ac2fd1 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -233,24 +233,6 @@ static unsigned long __find_symbol(const char *name,
233 return 0; 233 return 0;
234} 234}
235 235
236/* Find a symbol in this elf symbol table */
237static unsigned long find_local_symbol(Elf_Shdr *sechdrs,
238 unsigned int symindex,
239 const char *strtab,
240 const char *name)
241{
242 unsigned int i;
243 Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr;
244
245 /* Search (defined) internal symbols first. */
246 for (i = 1; i < sechdrs[symindex].sh_size/sizeof(*sym); i++) {
247 if (sym[i].st_shndx != SHN_UNDEF
248 && strcmp(name, strtab + sym[i].st_name) == 0)
249 return sym[i].st_value;
250 }
251 return 0;
252}
253
254/* Search for module by name: must hold module_mutex. */ 236/* Search for module by name: must hold module_mutex. */
255static struct module *find_module(const char *name) 237static struct module *find_module(const char *name)
256{ 238{
@@ -785,139 +767,6 @@ static struct module_attribute *modinfo_attrs[] = {
785 NULL, 767 NULL,
786}; 768};
787 769
788#ifdef CONFIG_OBSOLETE_MODPARM
789/* Bounds checking done below */
790static int obsparm_copy_string(const char *val, struct kernel_param *kp)
791{
792 strcpy(kp->arg, val);
793 return 0;
794}
795
796static int set_obsolete(const char *val, struct kernel_param *kp)
797{
798 unsigned int min, max;
799 unsigned int size, maxsize;
800 int dummy;
801 char *endp;
802 const char *p;
803 struct obsolete_modparm *obsparm = kp->arg;
804
805 if (!val) {
806 printk(KERN_ERR "Parameter %s needs an argument\n", kp->name);
807 return -EINVAL;
808 }
809
810 /* type is: [min[-max]]{b,h,i,l,s} */
811 p = obsparm->type;
812 min = simple_strtol(p, &endp, 10);
813 if (endp == obsparm->type)
814 min = max = 1;
815 else if (*endp == '-') {
816 p = endp+1;
817 max = simple_strtol(p, &endp, 10);
818 } else
819 max = min;
820 switch (*endp) {
821 case 'b':
822 return param_array(kp->name, val, min, max, obsparm->addr,
823 1, param_set_byte, &dummy);
824 case 'h':
825 return param_array(kp->name, val, min, max, obsparm->addr,
826 sizeof(short), param_set_short, &dummy);
827 case 'i':
828 return param_array(kp->name, val, min, max, obsparm->addr,
829 sizeof(int), param_set_int, &dummy);
830 case 'l':
831 return param_array(kp->name, val, min, max, obsparm->addr,
832 sizeof(long), param_set_long, &dummy);
833 case 's':
834 return param_array(kp->name, val, min, max, obsparm->addr,
835 sizeof(char *), param_set_charp, &dummy);
836
837 case 'c':
838 /* Undocumented: 1-5c50 means 1-5 strings of up to 49 chars,
839 and the decl is "char xxx[5][50];" */
840 p = endp+1;
841 maxsize = simple_strtol(p, &endp, 10);
842 /* We check lengths here (yes, this is a hack). */
843 p = val;
844 while (p[size = strcspn(p, ",")]) {
845 if (size >= maxsize)
846 goto oversize;
847 p += size+1;
848 }
849 if (size >= maxsize)
850 goto oversize;
851 return param_array(kp->name, val, min, max, obsparm->addr,
852 maxsize, obsparm_copy_string, &dummy);
853 }
854 printk(KERN_ERR "Unknown obsolete parameter type %s\n", obsparm->type);
855 return -EINVAL;
856 oversize:
857 printk(KERN_ERR
858 "Parameter %s doesn't fit in %u chars.\n", kp->name, maxsize);
859 return -EINVAL;
860}
861
862static int obsolete_params(const char *name,
863 char *args,
864 struct obsolete_modparm obsparm[],
865 unsigned int num,
866 Elf_Shdr *sechdrs,
867 unsigned int symindex,
868 const char *strtab)
869{
870 struct kernel_param *kp;
871 unsigned int i;
872 int ret;
873
874 kp = kmalloc(sizeof(kp[0]) * num, GFP_KERNEL);
875 if (!kp)
876 return -ENOMEM;
877
878 for (i = 0; i < num; i++) {
879 char sym_name[128 + sizeof(MODULE_SYMBOL_PREFIX)];
880
881 snprintf(sym_name, sizeof(sym_name), "%s%s",
882 MODULE_SYMBOL_PREFIX, obsparm[i].name);
883
884 kp[i].name = obsparm[i].name;
885 kp[i].perm = 000;
886 kp[i].set = set_obsolete;
887 kp[i].get = NULL;
888 obsparm[i].addr
889 = (void *)find_local_symbol(sechdrs, symindex, strtab,
890 sym_name);
891 if (!obsparm[i].addr) {
892 printk("%s: falsely claims to have parameter %s\n",
893 name, obsparm[i].name);
894 ret = -EINVAL;
895 goto out;
896 }
897 kp[i].arg = &obsparm[i];
898 }
899
900 ret = parse_args(name, args, kp, num, NULL);
901 out:
902 kfree(kp);
903 return ret;
904}
905#else
906static int obsolete_params(const char *name,
907 char *args,
908 struct obsolete_modparm obsparm[],
909 unsigned int num,
910 Elf_Shdr *sechdrs,
911 unsigned int symindex,
912 const char *strtab)
913{
914 if (num != 0)
915 printk(KERN_WARNING "%s: Ignoring obsolete parameters\n",
916 name);
917 return 0;
918}
919#endif /* CONFIG_OBSOLETE_MODPARM */
920
921static const char vermagic[] = VERMAGIC_STRING; 770static const char vermagic[] = VERMAGIC_STRING;
922 771
923#ifdef CONFIG_MODVERSIONS 772#ifdef CONFIG_MODVERSIONS
@@ -1572,7 +1421,6 @@ static struct module *load_module(void __user *umod,
1572 exportindex, modindex, obsparmindex, infoindex, gplindex, 1421 exportindex, modindex, obsparmindex, infoindex, gplindex,
1573 crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex, 1422 crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex,
1574 gplfuturecrcindex; 1423 gplfuturecrcindex;
1575 long arglen;
1576 struct module *mod; 1424 struct module *mod;
1577 long err = 0; 1425 long err = 0;
1578 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ 1426 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1691,23 +1539,11 @@ static struct module *load_module(void __user *umod,
1691 } 1539 }
1692 1540
1693 /* Now copy in args */ 1541 /* Now copy in args */
1694 arglen = strlen_user(uargs); 1542 args = strndup_user(uargs, ~0UL >> 1);
1695 if (!arglen) { 1543 if (IS_ERR(args)) {
1696 err = -EFAULT; 1544 err = PTR_ERR(args);
1697 goto free_hdr;
1698 }
1699 args = kmalloc(arglen, GFP_KERNEL);
1700 if (!args) {
1701 err = -ENOMEM;
1702 goto free_hdr; 1545 goto free_hdr;
1703 } 1546 }
1704 if (copy_from_user(args, uargs, arglen) != 0) {
1705 err = -EFAULT;
1706 goto free_mod;
1707 }
1708
1709 /* Userspace could have altered the string after the strlen_user() */
1710 args[arglen - 1] = '\0';
1711 1547
1712 if (find_module(mod->name)) { 1548 if (find_module(mod->name)) {
1713 err = -EEXIST; 1549 err = -EEXIST;
@@ -1887,27 +1723,17 @@ static struct module *load_module(void __user *umod,
1887 set_fs(old_fs); 1723 set_fs(old_fs);
1888 1724
1889 mod->args = args; 1725 mod->args = args;
1890 if (obsparmindex) { 1726 if (obsparmindex)
1891 err = obsolete_params(mod->name, mod->args, 1727 printk(KERN_WARNING "%s: Ignoring obsolete parameters\n",
1892 (struct obsolete_modparm *) 1728 mod->name);
1893 sechdrs[obsparmindex].sh_addr, 1729
1894 sechdrs[obsparmindex].sh_size 1730 /* Size of section 0 is 0, so this works well if no params */
1895 / sizeof(struct obsolete_modparm), 1731 err = parse_args(mod->name, mod->args,
1896 sechdrs, symindex, 1732 (struct kernel_param *)
1897 (char *)sechdrs[strindex].sh_addr); 1733 sechdrs[setupindex].sh_addr,
1898 if (setupindex) 1734 sechdrs[setupindex].sh_size
1899 printk(KERN_WARNING "%s: Ignoring new-style " 1735 / sizeof(struct kernel_param),
1900 "parameters in presence of obsolete ones\n", 1736 NULL);
1901 mod->name);
1902 } else {
1903 /* Size of section 0 is 0, so this works well if no params */
1904 err = parse_args(mod->name, mod->args,
1905 (struct kernel_param *)
1906 sechdrs[setupindex].sh_addr,
1907 sechdrs[setupindex].sh_size
1908 / sizeof(struct kernel_param),
1909 NULL);
1910 }
1911 if (err < 0) 1737 if (err < 0)
1912 goto arch_cleanup; 1738 goto arch_cleanup;
1913 1739
diff --git a/kernel/params.c b/kernel/params.c
index a29150582310..9de637a5c8bc 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -265,12 +265,12 @@ int param_get_invbool(char *buffer, struct kernel_param *kp)
265} 265}
266 266
267/* We cheat here and temporarily mangle the string. */ 267/* We cheat here and temporarily mangle the string. */
268int param_array(const char *name, 268static int param_array(const char *name,
269 const char *val, 269 const char *val,
270 unsigned int min, unsigned int max, 270 unsigned int min, unsigned int max,
271 void *elem, int elemsize, 271 void *elem, int elemsize,
272 int (*set)(const char *, struct kernel_param *kp), 272 int (*set)(const char *, struct kernel_param *kp),
273 int *num) 273 int *num)
274{ 274{
275 int ret; 275 int ret;
276 struct kernel_param kp; 276 struct kernel_param kp;
diff --git a/kernel/power/smp.c b/kernel/power/smp.c
index 911fc62b8225..5957312b2d68 100644
--- a/kernel/power/smp.c
+++ b/kernel/power/smp.c
@@ -49,9 +49,7 @@ void enable_nonboot_cpus(void)
49 49
50 printk("Thawing cpus ...\n"); 50 printk("Thawing cpus ...\n");
51 for_each_cpu_mask(cpu, frozen_cpus) { 51 for_each_cpu_mask(cpu, frozen_cpus) {
52 error = smp_prepare_cpu(cpu); 52 error = cpu_up(cpu);
53 if (!error)
54 error = cpu_up(cpu);
55 if (!error) { 53 if (!error) {
56 printk("CPU%d is up\n", cpu); 54 printk("CPU%d is up\n", cpu);
57 continue; 55 continue;
diff --git a/kernel/printk.c b/kernel/printk.c
index 13ced0f7828f..8cc19431e74b 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -122,44 +122,6 @@ static char *log_buf = __log_buf;
122static int log_buf_len = __LOG_BUF_LEN; 122static int log_buf_len = __LOG_BUF_LEN;
123static unsigned long logged_chars; /* Number of chars produced since last read+clear operation */ 123static unsigned long logged_chars; /* Number of chars produced since last read+clear operation */
124 124
125/*
126 * Setup a list of consoles. Called from init/main.c
127 */
128static int __init console_setup(char *str)
129{
130 char name[sizeof(console_cmdline[0].name)];
131 char *s, *options;
132 int idx;
133
134 /*
135 * Decode str into name, index, options.
136 */
137 if (str[0] >= '0' && str[0] <= '9') {
138 strcpy(name, "ttyS");
139 strncpy(name + 4, str, sizeof(name) - 5);
140 } else
141 strncpy(name, str, sizeof(name) - 1);
142 name[sizeof(name) - 1] = 0;
143 if ((options = strchr(str, ',')) != NULL)
144 *(options++) = 0;
145#ifdef __sparc__
146 if (!strcmp(str, "ttya"))
147 strcpy(name, "ttyS0");
148 if (!strcmp(str, "ttyb"))
149 strcpy(name, "ttyS1");
150#endif
151 for (s = name; *s; s++)
152 if ((*s >= '0' && *s <= '9') || *s == ',')
153 break;
154 idx = simple_strtoul(s, NULL, 10);
155 *s = 0;
156
157 add_preferred_console(name, idx, options);
158 return 1;
159}
160
161__setup("console=", console_setup);
162
163static int __init log_buf_len_setup(char *str) 125static int __init log_buf_len_setup(char *str)
164{ 126{
165 unsigned long size = memparse(str, &str); 127 unsigned long size = memparse(str, &str);
@@ -659,6 +621,44 @@ static void call_console_drivers(unsigned long start, unsigned long end)
659 621
660#endif 622#endif
661 623
624/*
625 * Set up a list of consoles. Called from init/main.c
626 */
627static int __init console_setup(char *str)
628{
629 char name[sizeof(console_cmdline[0].name)];
630 char *s, *options;
631 int idx;
632
633 /*
634 * Decode str into name, index, options.
635 */
636 if (str[0] >= '0' && str[0] <= '9') {
637 strcpy(name, "ttyS");
638 strncpy(name + 4, str, sizeof(name) - 5);
639 } else {
640 strncpy(name, str, sizeof(name) - 1);
641 }
642 name[sizeof(name) - 1] = 0;
643 if ((options = strchr(str, ',')) != NULL)
644 *(options++) = 0;
645#ifdef __sparc__
646 if (!strcmp(str, "ttya"))
647 strcpy(name, "ttyS0");
648 if (!strcmp(str, "ttyb"))
649 strcpy(name, "ttyS1");
650#endif
651 for (s = name; *s; s++)
652 if ((*s >= '0' && *s <= '9') || *s == ',')
653 break;
654 idx = simple_strtoul(s, NULL, 10);
655 *s = 0;
656
657 add_preferred_console(name, idx, options);
658 return 1;
659}
660__setup("console=", console_setup);
661
662/** 662/**
663 * add_preferred_console - add a device to the list of preferred consoles. 663 * add_preferred_console - add a device to the list of preferred consoles.
664 * @name: device name 664 * @name: device name
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 6df1559b1c02..13458bbaa1be 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -416,8 +416,8 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
416 rdp->curtail = &rdp->curlist; 416 rdp->curtail = &rdp->curlist;
417 } 417 }
418 418
419 local_irq_disable();
420 if (rdp->nxtlist && !rdp->curlist) { 419 if (rdp->nxtlist && !rdp->curlist) {
420 local_irq_disable();
421 rdp->curlist = rdp->nxtlist; 421 rdp->curlist = rdp->nxtlist;
422 rdp->curtail = rdp->nxttail; 422 rdp->curtail = rdp->nxttail;
423 rdp->nxtlist = NULL; 423 rdp->nxtlist = NULL;
@@ -442,9 +442,8 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
442 rcu_start_batch(rcp); 442 rcu_start_batch(rcp);
443 spin_unlock(&rcp->lock); 443 spin_unlock(&rcp->lock);
444 } 444 }
445 } else {
446 local_irq_enable();
447 } 445 }
446
448 rcu_check_quiescent_state(rcp, rdp); 447 rcu_check_quiescent_state(rcp, rdp);
449 if (rdp->donelist) 448 if (rdp->donelist)
450 rcu_do_batch(rdp); 449 rcu_do_batch(rdp);
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 7712912dbc84..b4b362b5baf5 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -54,15 +54,15 @@ static int verbose; /* Print more debug info. */
54static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */ 54static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */
55static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/ 55static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/
56 56
57MODULE_PARM(nreaders, "i"); 57module_param(nreaders, int, 0);
58MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); 58MODULE_PARM_DESC(nreaders, "Number of RCU reader threads");
59MODULE_PARM(stat_interval, "i"); 59module_param(stat_interval, int, 0);
60MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); 60MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s");
61MODULE_PARM(verbose, "i"); 61module_param(verbose, bool, 0);
62MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s"); 62MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s");
63MODULE_PARM(test_no_idle_hz, "i"); 63module_param(test_no_idle_hz, bool, 0);
64MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs"); 64MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
65MODULE_PARM(shuffle_interval, "i"); 65module_param(shuffle_interval, int, 0);
66MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles"); 66MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
67#define TORTURE_FLAG "rcutorture: " 67#define TORTURE_FLAG "rcutorture: "
68#define PRINTK_STRING(s) \ 68#define PRINTK_STRING(s) \
@@ -441,6 +441,16 @@ rcu_torture_shuffle(void *arg)
441 return 0; 441 return 0;
442} 442}
443 443
444static inline void
445rcu_torture_print_module_parms(char *tag)
446{
447 printk(KERN_ALERT TORTURE_FLAG "--- %s: nreaders=%d "
448 "stat_interval=%d verbose=%d test_no_idle_hz=%d "
449 "shuffle_interval = %d\n",
450 tag, nrealreaders, stat_interval, verbose, test_no_idle_hz,
451 shuffle_interval);
452}
453
444static void 454static void
445rcu_torture_cleanup(void) 455rcu_torture_cleanup(void)
446{ 456{
@@ -483,9 +493,10 @@ rcu_torture_cleanup(void)
483 rcu_barrier(); 493 rcu_barrier();
484 494
485 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */ 495 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
486 printk(KERN_ALERT TORTURE_FLAG 496 if (atomic_read(&n_rcu_torture_error))
487 "--- End of test: %s\n", 497 rcu_torture_print_module_parms("End of test: FAILURE");
488 atomic_read(&n_rcu_torture_error) == 0 ? "SUCCESS" : "FAILURE"); 498 else
499 rcu_torture_print_module_parms("End of test: SUCCESS");
489} 500}
490 501
491static int 502static int
@@ -501,11 +512,7 @@ rcu_torture_init(void)
501 nrealreaders = nreaders; 512 nrealreaders = nreaders;
502 else 513 else
503 nrealreaders = 2 * num_online_cpus(); 514 nrealreaders = 2 * num_online_cpus();
504 printk(KERN_ALERT TORTURE_FLAG "--- Start of test: nreaders=%d " 515 rcu_torture_print_module_parms("Start of test");
505 "stat_interval=%d verbose=%d test_no_idle_hz=%d "
506 "shuffle_interval = %d\n",
507 nrealreaders, stat_interval, verbose, test_no_idle_hz,
508 shuffle_interval);
509 fullstop = 0; 516 fullstop = 0;
510 517
511 /* Set up the freelist. */ 518 /* Set up the freelist. */
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index c67189a25d52..d9b3d5847ed8 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -1,12 +1,11 @@
1/* 1/*
2 * Detect Soft Lockups 2 * Detect Soft Lockups
3 * 3 *
4 * started by Ingo Molnar, (C) 2005, Red Hat 4 * started by Ingo Molnar, Copyright (C) 2005, 2006 Red Hat, Inc.
5 * 5 *
6 * this code detects soft lockups: incidents in where on a CPU 6 * this code detects soft lockups: incidents in where on a CPU
7 * the kernel does not reschedule for 10 seconds or more. 7 * the kernel does not reschedule for 10 seconds or more.
8 */ 8 */
9
10#include <linux/mm.h> 9#include <linux/mm.h>
11#include <linux/cpu.h> 10#include <linux/cpu.h>
12#include <linux/init.h> 11#include <linux/init.h>
@@ -17,13 +16,14 @@
17 16
18static DEFINE_SPINLOCK(print_lock); 17static DEFINE_SPINLOCK(print_lock);
19 18
20static DEFINE_PER_CPU(unsigned long, timestamp) = 0; 19static DEFINE_PER_CPU(unsigned long, touch_timestamp);
21static DEFINE_PER_CPU(unsigned long, print_timestamp) = 0; 20static DEFINE_PER_CPU(unsigned long, print_timestamp);
22static DEFINE_PER_CPU(struct task_struct *, watchdog_task); 21static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
23 22
24static int did_panic = 0; 23static int did_panic = 0;
25static int softlock_panic(struct notifier_block *this, unsigned long event, 24
26 void *ptr) 25static int
26softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
27{ 27{
28 did_panic = 1; 28 did_panic = 1;
29 29
@@ -36,7 +36,7 @@ static struct notifier_block panic_block = {
36 36
37void touch_softlockup_watchdog(void) 37void touch_softlockup_watchdog(void)
38{ 38{
39 per_cpu(timestamp, raw_smp_processor_id()) = jiffies; 39 per_cpu(touch_timestamp, raw_smp_processor_id()) = jiffies;
40} 40}
41EXPORT_SYMBOL(touch_softlockup_watchdog); 41EXPORT_SYMBOL(touch_softlockup_watchdog);
42 42
@@ -44,25 +44,35 @@ EXPORT_SYMBOL(touch_softlockup_watchdog);
44 * This callback runs from the timer interrupt, and checks 44 * This callback runs from the timer interrupt, and checks
45 * whether the watchdog thread has hung or not: 45 * whether the watchdog thread has hung or not:
46 */ 46 */
47void softlockup_tick(struct pt_regs *regs) 47void softlockup_tick(void)
48{ 48{
49 int this_cpu = smp_processor_id(); 49 int this_cpu = smp_processor_id();
50 unsigned long timestamp = per_cpu(timestamp, this_cpu); 50 unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu);
51 51
52 if (per_cpu(print_timestamp, this_cpu) == timestamp) 52 /* prevent double reports: */
53 if (per_cpu(print_timestamp, this_cpu) == touch_timestamp ||
54 did_panic ||
55 !per_cpu(watchdog_task, this_cpu))
53 return; 56 return;
54 57
55 /* Do not cause a second panic when there already was one */ 58 /* do not print during early bootup: */
56 if (did_panic) 59 if (unlikely(system_state != SYSTEM_RUNNING)) {
60 touch_softlockup_watchdog();
57 return; 61 return;
62 }
58 63
59 if (time_after(jiffies, timestamp + 10*HZ)) { 64 /* Wake up the high-prio watchdog task every second: */
60 per_cpu(print_timestamp, this_cpu) = timestamp; 65 if (time_after(jiffies, touch_timestamp + HZ))
66 wake_up_process(per_cpu(watchdog_task, this_cpu));
67
68 /* Warn about unreasonable 10+ seconds delays: */
69 if (time_after(jiffies, touch_timestamp + 10*HZ)) {
70 per_cpu(print_timestamp, this_cpu) = touch_timestamp;
61 71
62 spin_lock(&print_lock); 72 spin_lock(&print_lock);
63 printk(KERN_ERR "BUG: soft lockup detected on CPU#%d!\n", 73 printk(KERN_ERR "BUG: soft lockup detected on CPU#%d!\n",
64 this_cpu); 74 this_cpu);
65 show_regs(regs); 75 dump_stack();
66 spin_unlock(&print_lock); 76 spin_unlock(&print_lock);
67 } 77 }
68} 78}
@@ -77,18 +87,16 @@ static int watchdog(void * __bind_cpu)
77 sched_setscheduler(current, SCHED_FIFO, &param); 87 sched_setscheduler(current, SCHED_FIFO, &param);
78 current->flags |= PF_NOFREEZE; 88 current->flags |= PF_NOFREEZE;
79 89
80 set_current_state(TASK_INTERRUPTIBLE);
81
82 /* 90 /*
83 * Run briefly once per second - if this gets delayed for 91 * Run briefly once per second to reset the softlockup timestamp.
84 * more than 10 seconds then the debug-printout triggers 92 * If this gets delayed for more than 10 seconds then the
85 * in softlockup_tick(): 93 * debug-printout triggers in softlockup_tick().
86 */ 94 */
87 while (!kthread_should_stop()) { 95 while (!kthread_should_stop()) {
88 msleep_interruptible(1000); 96 set_current_state(TASK_INTERRUPTIBLE);
89 touch_softlockup_watchdog(); 97 touch_softlockup_watchdog();
98 schedule();
90 } 99 }
91 __set_current_state(TASK_RUNNING);
92 100
93 return 0; 101 return 0;
94} 102}
@@ -110,11 +118,11 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
110 printk("watchdog for %i failed\n", hotcpu); 118 printk("watchdog for %i failed\n", hotcpu);
111 return NOTIFY_BAD; 119 return NOTIFY_BAD;
112 } 120 }
121 per_cpu(touch_timestamp, hotcpu) = jiffies;
113 per_cpu(watchdog_task, hotcpu) = p; 122 per_cpu(watchdog_task, hotcpu) = p;
114 kthread_bind(p, hotcpu); 123 kthread_bind(p, hotcpu);
115 break; 124 break;
116 case CPU_ONLINE: 125 case CPU_ONLINE:
117
118 wake_up_process(per_cpu(watchdog_task, hotcpu)); 126 wake_up_process(per_cpu(watchdog_task, hotcpu));
119 break; 127 break;
120#ifdef CONFIG_HOTPLUG_CPU 128#ifdef CONFIG_HOTPLUG_CPU
@@ -146,4 +154,3 @@ __init void spawn_softlockup_task(void)
146 154
147 notifier_chain_register(&panic_notifier_list, &panic_block); 155 notifier_chain_register(&panic_notifier_list, &panic_block);
148} 156}
149
diff --git a/kernel/sys.c b/kernel/sys.c
index c0fcad9f826c..38bc73ede2ba 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -224,18 +224,6 @@ int unregister_reboot_notifier(struct notifier_block * nb)
224 224
225EXPORT_SYMBOL(unregister_reboot_notifier); 225EXPORT_SYMBOL(unregister_reboot_notifier);
226 226
227#ifndef CONFIG_SECURITY
228int capable(int cap)
229{
230 if (cap_raised(current->cap_effective, cap)) {
231 current->flags |= PF_SUPERPRIV;
232 return 1;
233 }
234 return 0;
235}
236EXPORT_SYMBOL(capable);
237#endif
238
239static int set_one_prio(struct task_struct *p, int niceval, int error) 227static int set_one_prio(struct task_struct *p, int niceval, int error)
240{ 228{
241 int no_nice; 229 int no_nice;
@@ -1375,7 +1363,7 @@ static void groups_sort(struct group_info *group_info)
1375/* a simple bsearch */ 1363/* a simple bsearch */
1376int groups_search(struct group_info *group_info, gid_t grp) 1364int groups_search(struct group_info *group_info, gid_t grp)
1377{ 1365{
1378 int left, right; 1366 unsigned int left, right;
1379 1367
1380 if (!group_info) 1368 if (!group_info)
1381 return 0; 1369 return 0;
@@ -1383,7 +1371,7 @@ int groups_search(struct group_info *group_info, gid_t grp)
1383 left = 0; 1371 left = 0;
1384 right = group_info->ngroups; 1372 right = group_info->ngroups;
1385 while (left < right) { 1373 while (left < right) {
1386 int mid = (left+right)/2; 1374 unsigned int mid = (left+right)/2;
1387 int cmp = grp - GROUP_AT(group_info, mid); 1375 int cmp = grp - GROUP_AT(group_info, mid);
1388 if (cmp > 0) 1376 if (cmp > 0)
1389 left = mid + 1; 1377 left = mid + 1;
@@ -1433,7 +1421,6 @@ asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist)
1433 return -EINVAL; 1421 return -EINVAL;
1434 1422
1435 /* no need to grab task_lock here; it cannot change */ 1423 /* no need to grab task_lock here; it cannot change */
1436 get_group_info(current->group_info);
1437 i = current->group_info->ngroups; 1424 i = current->group_info->ngroups;
1438 if (gidsetsize) { 1425 if (gidsetsize) {
1439 if (i > gidsetsize) { 1426 if (i > gidsetsize) {
@@ -1446,7 +1433,6 @@ asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist)
1446 } 1433 }
1447 } 1434 }
1448out: 1435out:
1449 put_group_info(current->group_info);
1450 return i; 1436 return i;
1451} 1437}
1452 1438
@@ -1487,9 +1473,7 @@ int in_group_p(gid_t grp)
1487{ 1473{
1488 int retval = 1; 1474 int retval = 1;
1489 if (grp != current->fsgid) { 1475 if (grp != current->fsgid) {
1490 get_group_info(current->group_info);
1491 retval = groups_search(current->group_info, grp); 1476 retval = groups_search(current->group_info, grp);
1492 put_group_info(current->group_info);
1493 } 1477 }
1494 return retval; 1478 return retval;
1495} 1479}
@@ -1500,9 +1484,7 @@ int in_egroup_p(gid_t grp)
1500{ 1484{
1501 int retval = 1; 1485 int retval = 1;
1502 if (grp != current->egid) { 1486 if (grp != current->egid) {
1503 get_group_info(current->group_info);
1504 retval = groups_search(current->group_info, grp); 1487 retval = groups_search(current->group_info, grp);
1505 put_group_info(current->group_info);
1506 } 1488 }
1507 return retval; 1489 return retval;
1508} 1490}
@@ -1630,20 +1612,21 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r
1630asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) 1612asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
1631{ 1613{
1632 struct rlimit new_rlim, *old_rlim; 1614 struct rlimit new_rlim, *old_rlim;
1615 unsigned long it_prof_secs;
1633 int retval; 1616 int retval;
1634 1617
1635 if (resource >= RLIM_NLIMITS) 1618 if (resource >= RLIM_NLIMITS)
1636 return -EINVAL; 1619 return -EINVAL;
1637 if(copy_from_user(&new_rlim, rlim, sizeof(*rlim))) 1620 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1638 return -EFAULT; 1621 return -EFAULT;
1639 if (new_rlim.rlim_cur > new_rlim.rlim_max) 1622 if (new_rlim.rlim_cur > new_rlim.rlim_max)
1640 return -EINVAL; 1623 return -EINVAL;
1641 old_rlim = current->signal->rlim + resource; 1624 old_rlim = current->signal->rlim + resource;
1642 if ((new_rlim.rlim_max > old_rlim->rlim_max) && 1625 if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
1643 !capable(CAP_SYS_RESOURCE)) 1626 !capable(CAP_SYS_RESOURCE))
1644 return -EPERM; 1627 return -EPERM;
1645 if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN) 1628 if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN)
1646 return -EPERM; 1629 return -EPERM;
1647 1630
1648 retval = security_task_setrlimit(resource, &new_rlim); 1631 retval = security_task_setrlimit(resource, &new_rlim);
1649 if (retval) 1632 if (retval)
@@ -1653,19 +1636,40 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
1653 *old_rlim = new_rlim; 1636 *old_rlim = new_rlim;
1654 task_unlock(current->group_leader); 1637 task_unlock(current->group_leader);
1655 1638
1656 if (resource == RLIMIT_CPU && new_rlim.rlim_cur != RLIM_INFINITY && 1639 if (resource != RLIMIT_CPU)
1657 (cputime_eq(current->signal->it_prof_expires, cputime_zero) || 1640 goto out;
1658 new_rlim.rlim_cur <= cputime_to_secs( 1641
1659 current->signal->it_prof_expires))) { 1642 /*
1660 cputime_t cputime = secs_to_cputime(new_rlim.rlim_cur); 1643 * RLIMIT_CPU handling. Note that the kernel fails to return an error
1644 * code if it rejected the user's attempt to set RLIMIT_CPU. This is a
1645 * very long-standing error, and fixing it now risks breakage of
1646 * applications, so we live with it
1647 */
1648 if (new_rlim.rlim_cur == RLIM_INFINITY)
1649 goto out;
1650
1651 it_prof_secs = cputime_to_secs(current->signal->it_prof_expires);
1652 if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) {
1653 unsigned long rlim_cur = new_rlim.rlim_cur;
1654 cputime_t cputime;
1655
1656 if (rlim_cur == 0) {
1657 /*
1658 * The caller is asking for an immediate RLIMIT_CPU
1659 * expiry. But we use the zero value to mean "it was
1660 * never set". So let's cheat and make it one second
1661 * instead
1662 */
1663 rlim_cur = 1;
1664 }
1665 cputime = secs_to_cputime(rlim_cur);
1661 read_lock(&tasklist_lock); 1666 read_lock(&tasklist_lock);
1662 spin_lock_irq(&current->sighand->siglock); 1667 spin_lock_irq(&current->sighand->siglock);
1663 set_process_cpu_timer(current, CPUCLOCK_PROF, 1668 set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
1664 &cputime, NULL);
1665 spin_unlock_irq(&current->sighand->siglock); 1669 spin_unlock_irq(&current->sighand->siglock);
1666 read_unlock(&tasklist_lock); 1670 read_unlock(&tasklist_lock);
1667 } 1671 }
1668 1672out:
1669 return 0; 1673 return 0;
1670} 1674}
1671 1675
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 32b48e8ee36e..e82726faeeff 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -742,18 +742,18 @@ static ctl_table vm_table[] = {
742 { 742 {
743 .ctl_name = VM_DIRTY_WB_CS, 743 .ctl_name = VM_DIRTY_WB_CS,
744 .procname = "dirty_writeback_centisecs", 744 .procname = "dirty_writeback_centisecs",
745 .data = &dirty_writeback_centisecs, 745 .data = &dirty_writeback_interval,
746 .maxlen = sizeof(dirty_writeback_centisecs), 746 .maxlen = sizeof(dirty_writeback_interval),
747 .mode = 0644, 747 .mode = 0644,
748 .proc_handler = &dirty_writeback_centisecs_handler, 748 .proc_handler = &dirty_writeback_centisecs_handler,
749 }, 749 },
750 { 750 {
751 .ctl_name = VM_DIRTY_EXPIRE_CS, 751 .ctl_name = VM_DIRTY_EXPIRE_CS,
752 .procname = "dirty_expire_centisecs", 752 .procname = "dirty_expire_centisecs",
753 .data = &dirty_expire_centisecs, 753 .data = &dirty_expire_interval,
754 .maxlen = sizeof(dirty_expire_centisecs), 754 .maxlen = sizeof(dirty_expire_interval),
755 .mode = 0644, 755 .mode = 0644,
756 .proc_handler = &proc_dointvec, 756 .proc_handler = &proc_dointvec_userhz_jiffies,
757 }, 757 },
758 { 758 {
759 .ctl_name = VM_NR_PDFLUSH_THREADS, 759 .ctl_name = VM_NR_PDFLUSH_THREADS,
@@ -848,9 +848,8 @@ static ctl_table vm_table[] = {
848 .data = &laptop_mode, 848 .data = &laptop_mode,
849 .maxlen = sizeof(laptop_mode), 849 .maxlen = sizeof(laptop_mode),
850 .mode = 0644, 850 .mode = 0644,
851 .proc_handler = &proc_dointvec, 851 .proc_handler = &proc_dointvec_jiffies,
852 .strategy = &sysctl_intvec, 852 .strategy = &sysctl_jiffies,
853 .extra1 = &zero,
854 }, 853 },
855 { 854 {
856 .ctl_name = VM_BLOCK_DUMP, 855 .ctl_name = VM_BLOCK_DUMP,
@@ -2054,6 +2053,8 @@ static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2054 int write, void *data) 2053 int write, void *data)
2055{ 2054{
2056 if (write) { 2055 if (write) {
2056 if (*lvalp > LONG_MAX / HZ)
2057 return 1;
2057 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ); 2058 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2058 } else { 2059 } else {
2059 int val = *valp; 2060 int val = *valp;
@@ -2075,6 +2076,8 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2075 int write, void *data) 2076 int write, void *data)
2076{ 2077{
2077 if (write) { 2078 if (write) {
2079 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2080 return 1;
2078 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp); 2081 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2079 } else { 2082 } else {
2080 int val = *valp; 2083 int val = *valp;
diff --git a/kernel/time.c b/kernel/time.c
index 804539165d8b..e00a97b77241 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -202,24 +202,6 @@ asmlinkage long sys_settimeofday(struct timeval __user *tv,
202 return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL); 202 return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL);
203} 203}
204 204
205long pps_offset; /* pps time offset (us) */
206long pps_jitter = MAXTIME; /* time dispersion (jitter) (us) */
207
208long pps_freq; /* frequency offset (scaled ppm) */
209long pps_stabil = MAXFREQ; /* frequency dispersion (scaled ppm) */
210
211long pps_valid = PPS_VALID; /* pps signal watchdog counter */
212
213int pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */
214
215long pps_jitcnt; /* jitter limit exceeded */
216long pps_calcnt; /* calibration intervals */
217long pps_errcnt; /* calibration errors */
218long pps_stbcnt; /* stability limit exceeded */
219
220/* hook for a loadable hardpps kernel module */
221void (*hardpps_ptr)(struct timeval *);
222
223/* we call this to notify the arch when the clock is being 205/* we call this to notify the arch when the clock is being
224 * controlled. If no such arch routine, do nothing. 206 * controlled. If no such arch routine, do nothing.
225 */ 207 */
@@ -279,7 +261,7 @@ int do_adjtimex(struct timex *txc)
279 result = -EINVAL; 261 result = -EINVAL;
280 goto leave; 262 goto leave;
281 } 263 }
282 time_freq = txc->freq - pps_freq; 264 time_freq = txc->freq;
283 } 265 }
284 266
285 if (txc->modes & ADJ_MAXERROR) { 267 if (txc->modes & ADJ_MAXERROR) {
@@ -312,10 +294,8 @@ int do_adjtimex(struct timex *txc)
312 if ((time_next_adjust = txc->offset) == 0) 294 if ((time_next_adjust = txc->offset) == 0)
313 time_adjust = 0; 295 time_adjust = 0;
314 } 296 }
315 else if ( time_status & (STA_PLL | STA_PPSTIME) ) { 297 else if (time_status & STA_PLL) {
316 ltemp = (time_status & (STA_PPSTIME | STA_PPSSIGNAL)) == 298 ltemp = txc->offset;
317 (STA_PPSTIME | STA_PPSSIGNAL) ?
318 pps_offset : txc->offset;
319 299
320 /* 300 /*
321 * Scale the phase adjustment and 301 * Scale the phase adjustment and
@@ -356,23 +336,14 @@ int do_adjtimex(struct timex *txc)
356 } 336 }
357 time_freq = min(time_freq, time_tolerance); 337 time_freq = min(time_freq, time_tolerance);
358 time_freq = max(time_freq, -time_tolerance); 338 time_freq = max(time_freq, -time_tolerance);
359 } /* STA_PLL || STA_PPSTIME */ 339 } /* STA_PLL */
360 } /* txc->modes & ADJ_OFFSET */ 340 } /* txc->modes & ADJ_OFFSET */
361 if (txc->modes & ADJ_TICK) { 341 if (txc->modes & ADJ_TICK) {
362 tick_usec = txc->tick; 342 tick_usec = txc->tick;
363 tick_nsec = TICK_USEC_TO_NSEC(tick_usec); 343 tick_nsec = TICK_USEC_TO_NSEC(tick_usec);
364 } 344 }
365 } /* txc->modes */ 345 } /* txc->modes */
366leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0 346leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0)
367 || ((time_status & (STA_PPSFREQ|STA_PPSTIME)) != 0
368 && (time_status & STA_PPSSIGNAL) == 0)
369 /* p. 24, (b) */
370 || ((time_status & (STA_PPSTIME|STA_PPSJITTER))
371 == (STA_PPSTIME|STA_PPSJITTER))
372 /* p. 24, (c) */
373 || ((time_status & STA_PPSFREQ) != 0
374 && (time_status & (STA_PPSWANDER|STA_PPSERROR)) != 0))
375 /* p. 24, (d) */
376 result = TIME_ERROR; 347 result = TIME_ERROR;
377 348
378 if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) 349 if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
@@ -380,7 +351,7 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0
380 else { 351 else {
381 txc->offset = shift_right(time_offset, SHIFT_UPDATE); 352 txc->offset = shift_right(time_offset, SHIFT_UPDATE);
382 } 353 }
383 txc->freq = time_freq + pps_freq; 354 txc->freq = time_freq;
384 txc->maxerror = time_maxerror; 355 txc->maxerror = time_maxerror;
385 txc->esterror = time_esterror; 356 txc->esterror = time_esterror;
386 txc->status = time_status; 357 txc->status = time_status;
@@ -388,14 +359,16 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0
388 txc->precision = time_precision; 359 txc->precision = time_precision;
389 txc->tolerance = time_tolerance; 360 txc->tolerance = time_tolerance;
390 txc->tick = tick_usec; 361 txc->tick = tick_usec;
391 txc->ppsfreq = pps_freq; 362
392 txc->jitter = pps_jitter >> PPS_AVG; 363 /* PPS is not implemented, so these are zero */
393 txc->shift = pps_shift; 364 txc->ppsfreq = 0;
394 txc->stabil = pps_stabil; 365 txc->jitter = 0;
395 txc->jitcnt = pps_jitcnt; 366 txc->shift = 0;
396 txc->calcnt = pps_calcnt; 367 txc->stabil = 0;
397 txc->errcnt = pps_errcnt; 368 txc->jitcnt = 0;
398 txc->stbcnt = pps_stbcnt; 369 txc->calcnt = 0;
370 txc->errcnt = 0;
371 txc->stbcnt = 0;
399 write_sequnlock_irq(&xtime_lock); 372 write_sequnlock_irq(&xtime_lock);
400 do_gettimeofday(&txc->time); 373 do_gettimeofday(&txc->time);
401 notify_arch_cmos_timer(); 374 notify_arch_cmos_timer();
diff --git a/kernel/timer.c b/kernel/timer.c
index 2410c18dbeb1..ab189dd187cb 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -86,7 +86,8 @@ struct tvec_t_base_s {
86} ____cacheline_aligned_in_smp; 86} ____cacheline_aligned_in_smp;
87 87
88typedef struct tvec_t_base_s tvec_base_t; 88typedef struct tvec_t_base_s tvec_base_t;
89static DEFINE_PER_CPU(tvec_base_t, tvec_bases); 89static DEFINE_PER_CPU(tvec_base_t *, tvec_bases);
90static tvec_base_t boot_tvec_bases;
90 91
91static inline void set_running_timer(tvec_base_t *base, 92static inline void set_running_timer(tvec_base_t *base,
92 struct timer_list *timer) 93 struct timer_list *timer)
@@ -157,7 +158,7 @@ EXPORT_SYMBOL(__init_timer_base);
157void fastcall init_timer(struct timer_list *timer) 158void fastcall init_timer(struct timer_list *timer)
158{ 159{
159 timer->entry.next = NULL; 160 timer->entry.next = NULL;
160 timer->base = &per_cpu(tvec_bases, raw_smp_processor_id()).t_base; 161 timer->base = &per_cpu(tvec_bases, raw_smp_processor_id())->t_base;
161} 162}
162EXPORT_SYMBOL(init_timer); 163EXPORT_SYMBOL(init_timer);
163 164
@@ -218,7 +219,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
218 ret = 1; 219 ret = 1;
219 } 220 }
220 221
221 new_base = &__get_cpu_var(tvec_bases); 222 new_base = __get_cpu_var(tvec_bases);
222 223
223 if (base != &new_base->t_base) { 224 if (base != &new_base->t_base) {
224 /* 225 /*
@@ -258,7 +259,7 @@ EXPORT_SYMBOL(__mod_timer);
258 */ 259 */
259void add_timer_on(struct timer_list *timer, int cpu) 260void add_timer_on(struct timer_list *timer, int cpu)
260{ 261{
261 tvec_base_t *base = &per_cpu(tvec_bases, cpu); 262 tvec_base_t *base = per_cpu(tvec_bases, cpu);
262 unsigned long flags; 263 unsigned long flags;
263 264
264 BUG_ON(timer_pending(timer) || !timer->function); 265 BUG_ON(timer_pending(timer) || !timer->function);
@@ -504,7 +505,7 @@ unsigned long next_timer_interrupt(void)
504 } 505 }
505 hr_expires += jiffies; 506 hr_expires += jiffies;
506 507
507 base = &__get_cpu_var(tvec_bases); 508 base = __get_cpu_var(tvec_bases);
508 spin_lock(&base->t_base.lock); 509 spin_lock(&base->t_base.lock);
509 expires = base->timer_jiffies + (LONG_MAX >> 1); 510 expires = base->timer_jiffies + (LONG_MAX >> 1);
510 list = NULL; 511 list = NULL;
@@ -696,18 +697,9 @@ static void second_overflow(void)
696 697
697 /* 698 /*
698 * Compute the frequency estimate and additional phase adjustment due 699 * Compute the frequency estimate and additional phase adjustment due
699 * to frequency error for the next second. When the PPS signal is 700 * to frequency error for the next second.
700 * engaged, gnaw on the watchdog counter and update the frequency
701 * computed by the pll and the PPS signal.
702 */ 701 */
703 pps_valid++; 702 ltemp = time_freq;
704 if (pps_valid == PPS_VALID) { /* PPS signal lost */
705 pps_jitter = MAXTIME;
706 pps_stabil = MAXFREQ;
707 time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
708 STA_PPSWANDER | STA_PPSERROR);
709 }
710 ltemp = time_freq + pps_freq;
711 time_adj += shift_right(ltemp,(SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE)); 703 time_adj += shift_right(ltemp,(SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE));
712 704
713#if HZ == 100 705#if HZ == 100
@@ -901,7 +893,7 @@ EXPORT_SYMBOL(xtime_lock);
901 */ 893 */
902static void run_timer_softirq(struct softirq_action *h) 894static void run_timer_softirq(struct softirq_action *h)
903{ 895{
904 tvec_base_t *base = &__get_cpu_var(tvec_bases); 896 tvec_base_t *base = __get_cpu_var(tvec_bases);
905 897
906 hrtimer_run_queues(); 898 hrtimer_run_queues();
907 if (time_after_eq(jiffies, base->timer_jiffies)) 899 if (time_after_eq(jiffies, base->timer_jiffies))
@@ -914,6 +906,7 @@ static void run_timer_softirq(struct softirq_action *h)
914void run_local_timers(void) 906void run_local_timers(void)
915{ 907{
916 raise_softirq(TIMER_SOFTIRQ); 908 raise_softirq(TIMER_SOFTIRQ);
909 softlockup_tick();
917} 910}
918 911
919/* 912/*
@@ -944,7 +937,6 @@ void do_timer(struct pt_regs *regs)
944 /* prevent loading jiffies before storing new jiffies_64 value. */ 937 /* prevent loading jiffies before storing new jiffies_64 value. */
945 barrier(); 938 barrier();
946 update_times(); 939 update_times();
947 softlockup_tick(regs);
948} 940}
949 941
950#ifdef __ARCH_WANT_SYS_ALARM 942#ifdef __ARCH_WANT_SYS_ALARM
@@ -955,19 +947,7 @@ void do_timer(struct pt_regs *regs)
955 */ 947 */
956asmlinkage unsigned long sys_alarm(unsigned int seconds) 948asmlinkage unsigned long sys_alarm(unsigned int seconds)
957{ 949{
958 struct itimerval it_new, it_old; 950 return alarm_setitimer(seconds);
959 unsigned int oldalarm;
960
961 it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
962 it_new.it_value.tv_sec = seconds;
963 it_new.it_value.tv_usec = 0;
964 do_setitimer(ITIMER_REAL, &it_new, &it_old);
965 oldalarm = it_old.it_value.tv_sec;
966 /* ehhh.. We can't return 0 if we have an alarm pending.. */
967 /* And we'd better return too much than too little anyway */
968 if ((!oldalarm && it_old.it_value.tv_usec) || it_old.it_value.tv_usec >= 500000)
969 oldalarm++;
970 return oldalarm;
971} 951}
972 952
973#endif 953#endif
@@ -1256,12 +1236,32 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
1256 return 0; 1236 return 0;
1257} 1237}
1258 1238
1259static void __devinit init_timers_cpu(int cpu) 1239static int __devinit init_timers_cpu(int cpu)
1260{ 1240{
1261 int j; 1241 int j;
1262 tvec_base_t *base; 1242 tvec_base_t *base;
1263 1243
1264 base = &per_cpu(tvec_bases, cpu); 1244 base = per_cpu(tvec_bases, cpu);
1245 if (!base) {
1246 static char boot_done;
1247
1248 /*
1249 * Cannot do allocation in init_timers as that runs before the
1250 * allocator initializes (and would waste memory if there are
1251 * more possible CPUs than will ever be installed/brought up).
1252 */
1253 if (boot_done) {
1254 base = kmalloc_node(sizeof(*base), GFP_KERNEL,
1255 cpu_to_node(cpu));
1256 if (!base)
1257 return -ENOMEM;
1258 memset(base, 0, sizeof(*base));
1259 } else {
1260 base = &boot_tvec_bases;
1261 boot_done = 1;
1262 }
1263 per_cpu(tvec_bases, cpu) = base;
1264 }
1265 spin_lock_init(&base->t_base.lock); 1265 spin_lock_init(&base->t_base.lock);
1266 for (j = 0; j < TVN_SIZE; j++) { 1266 for (j = 0; j < TVN_SIZE; j++) {
1267 INIT_LIST_HEAD(base->tv5.vec + j); 1267 INIT_LIST_HEAD(base->tv5.vec + j);
@@ -1273,6 +1273,7 @@ static void __devinit init_timers_cpu(int cpu)
1273 INIT_LIST_HEAD(base->tv1.vec + j); 1273 INIT_LIST_HEAD(base->tv1.vec + j);
1274 1274
1275 base->timer_jiffies = jiffies; 1275 base->timer_jiffies = jiffies;
1276 return 0;
1276} 1277}
1277 1278
1278#ifdef CONFIG_HOTPLUG_CPU 1279#ifdef CONFIG_HOTPLUG_CPU
@@ -1295,8 +1296,8 @@ static void __devinit migrate_timers(int cpu)
1295 int i; 1296 int i;
1296 1297
1297 BUG_ON(cpu_online(cpu)); 1298 BUG_ON(cpu_online(cpu));
1298 old_base = &per_cpu(tvec_bases, cpu); 1299 old_base = per_cpu(tvec_bases, cpu);
1299 new_base = &get_cpu_var(tvec_bases); 1300 new_base = get_cpu_var(tvec_bases);
1300 1301
1301 local_irq_disable(); 1302 local_irq_disable();
1302 spin_lock(&new_base->t_base.lock); 1303 spin_lock(&new_base->t_base.lock);
@@ -1326,7 +1327,8 @@ static int __devinit timer_cpu_notify(struct notifier_block *self,
1326 long cpu = (long)hcpu; 1327 long cpu = (long)hcpu;
1327 switch(action) { 1328 switch(action) {
1328 case CPU_UP_PREPARE: 1329 case CPU_UP_PREPARE:
1329 init_timers_cpu(cpu); 1330 if (init_timers_cpu(cpu) < 0)
1331 return NOTIFY_BAD;
1330 break; 1332 break;
1331#ifdef CONFIG_HOTPLUG_CPU 1333#ifdef CONFIG_HOTPLUG_CPU
1332 case CPU_DEAD: 1334 case CPU_DEAD:
diff --git a/kernel/user.c b/kernel/user.c
index d9deae43a9ab..2116642f42c6 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -105,15 +105,19 @@ void free_uid(struct user_struct *up)
105{ 105{
106 unsigned long flags; 106 unsigned long flags;
107 107
108 if (!up)
109 return;
110
108 local_irq_save(flags); 111 local_irq_save(flags);
109 if (up && atomic_dec_and_lock(&up->__count, &uidhash_lock)) { 112 if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
110 uid_hash_remove(up); 113 uid_hash_remove(up);
114 spin_unlock_irqrestore(&uidhash_lock, flags);
111 key_put(up->uid_keyring); 115 key_put(up->uid_keyring);
112 key_put(up->session_keyring); 116 key_put(up->session_keyring);
113 kmem_cache_free(uid_cachep, up); 117 kmem_cache_free(uid_cachep, up);
114 spin_unlock(&uidhash_lock); 118 } else {
119 local_irq_restore(flags);
115 } 120 }
116 local_irq_restore(flags);
117} 121}
118 122
119struct user_struct * alloc_uid(uid_t uid) 123struct user_struct * alloc_uid(uid_t uid)