aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-03-26 13:49:48 -0400
committerIngo Molnar <mingo@elte.hu>2009-03-26 13:49:48 -0400
commit18ffa418aead13c56515ac74cd26105102128aca (patch)
tree2096ea8db3b2594bd25ad39a70edc691219f669b /kernel
parentab76f3d771590d5c89faa3219559c5d3fc0ce0c2 (diff)
parent8e0ee43bc2c3e19db56a4adaa9a9b04ce885cd84 (diff)
Merge commit 'v2.6.29' into x86/setup-lzma
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/acct.c2
-rw-r--r--kernel/async.c115
-rw-r--r--kernel/capability.c4
-rw-r--r--kernel/cgroup.c33
-rw-r--r--kernel/cpuset.c15
-rw-r--r--kernel/dma-coherent.c47
-rw-r--r--kernel/exec_domain.c3
-rw-r--r--kernel/exit.c20
-rw-r--r--kernel/fork.c37
-rw-r--r--kernel/futex.c70
-rw-r--r--kernel/hrtimer.c49
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/irq/handle.c16
-rw-r--r--kernel/irq/manage.c10
-rw-r--r--kernel/irq/numa_migrate.c7
-rw-r--r--kernel/itimer.c11
-rw-r--r--kernel/kallsyms.c16
-rw-r--r--kernel/kexec.c12
-rw-r--r--kernel/kprobes.c2
-rw-r--r--kernel/module.c45
-rw-r--r--kernel/posix-cpu-timers.c193
-rw-r--r--kernel/posix-timers.c43
-rw-r--r--kernel/power/Makefile5
-rw-r--r--kernel/power/console.c6
-rw-r--r--kernel/power/disk.c32
-rw-r--r--kernel/power/main.c34
-rw-r--r--kernel/power/swap.c5
-rw-r--r--kernel/power/user.c8
-rw-r--r--kernel/printk.c22
-rw-r--r--kernel/profile.c3
-rw-r--r--kernel/ptrace.c2
-rw-r--r--kernel/rcuclassic.c6
-rw-r--r--kernel/rcupdate.c12
-rw-r--r--kernel/rcupreempt.c3
-rw-r--r--kernel/rcutorture.c113
-rw-r--r--kernel/rcutree.c6
-rw-r--r--kernel/relay.c4
-rw-r--r--kernel/resource.c1
-rw-r--r--kernel/sched.c107
-rw-r--r--kernel/sched_debug.c21
-rw-r--r--kernel/sched_fair.c58
-rw-r--r--kernel/sched_rt.c4
-rw-r--r--kernel/sched_stats.h48
-rw-r--r--kernel/seccomp.c7
-rw-r--r--kernel/signal.c77
-rw-r--r--kernel/smp.c36
-rw-r--r--kernel/softirq.c1
-rw-r--r--kernel/softlockup.c9
-rw-r--r--kernel/sys.c117
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl.c20
-rw-r--r--kernel/time.c14
-rw-r--r--kernel/time/tick-common.c26
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/timer.c18
-rw-r--r--kernel/trace/Kconfig25
-rw-r--r--kernel/trace/ftrace.c38
-rw-r--r--kernel/trace/ring_buffer.c15
-rw-r--r--kernel/trace/trace.c5
-rw-r--r--kernel/trace/trace_irqsoff.c1
-rw-r--r--kernel/trace/trace_mmiotrace.c14
-rw-r--r--kernel/trace/trace_sched_wakeup.c1
-rw-r--r--kernel/trace/trace_selftest.c19
-rw-r--r--kernel/tsacct.c6
-rw-r--r--kernel/uid16.c39
-rw-r--r--kernel/up.c21
-rw-r--r--kernel/user.c35
-rw-r--r--kernel/user_namespace.c21
-rw-r--r--kernel/wait.c59
-rw-r--r--kernel/workqueue.c20
71 files changed, 1272 insertions, 631 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 2921d90ce32f..e4791b3ba55d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -41,6 +41,9 @@ obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
41obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o 41obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
42obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o 42obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
43obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o 43obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o
44ifneq ($(CONFIG_SMP),y)
45obj-y += up.o
46endif
44obj-$(CONFIG_SMP) += spinlock.o 47obj-$(CONFIG_SMP) += spinlock.o
45obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o 48obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
46obj-$(CONFIG_PROVE_LOCKING) += spinlock.o 49obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
@@ -48,6 +51,7 @@ obj-$(CONFIG_UID16) += uid16.o
48obj-$(CONFIG_MODULES) += module.o 51obj-$(CONFIG_MODULES) += module.o
49obj-$(CONFIG_KALLSYMS) += kallsyms.o 52obj-$(CONFIG_KALLSYMS) += kallsyms.o
50obj-$(CONFIG_PM) += power/ 53obj-$(CONFIG_PM) += power/
54obj-$(CONFIG_FREEZER) += power/
51obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o 55obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
52obj-$(CONFIG_KEXEC) += kexec.o 56obj-$(CONFIG_KEXEC) += kexec.o
53obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o 57obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
diff --git a/kernel/acct.c b/kernel/acct.c
index d57b7cbb98b6..7afa31564162 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -277,7 +277,7 @@ static int acct_on(char *name)
277 * should be written. If the filename is NULL, accounting will be 277 * should be written. If the filename is NULL, accounting will be
278 * shutdown. 278 * shutdown.
279 */ 279 */
280asmlinkage long sys_acct(const char __user *name) 280SYSCALL_DEFINE1(acct, const char __user *, name)
281{ 281{
282 int error; 282 int error;
283 283
diff --git a/kernel/async.c b/kernel/async.c
index f286e9f2b736..f565891f2c9b 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -54,6 +54,7 @@ asynchronous and synchronous parts of the kernel.
54#include <linux/sched.h> 54#include <linux/sched.h>
55#include <linux/init.h> 55#include <linux/init.h>
56#include <linux/kthread.h> 56#include <linux/kthread.h>
57#include <linux/delay.h>
57#include <asm/atomic.h> 58#include <asm/atomic.h>
58 59
59static async_cookie_t next_cookie = 1; 60static async_cookie_t next_cookie = 1;
@@ -90,12 +91,12 @@ extern int initcall_debug;
90static async_cookie_t __lowest_in_progress(struct list_head *running) 91static async_cookie_t __lowest_in_progress(struct list_head *running)
91{ 92{
92 struct async_entry *entry; 93 struct async_entry *entry;
93 if (!list_empty(&async_pending)) { 94 if (!list_empty(running)) {
94 entry = list_first_entry(&async_pending, 95 entry = list_first_entry(running,
95 struct async_entry, list); 96 struct async_entry, list);
96 return entry->cookie; 97 return entry->cookie;
97 } else if (!list_empty(running)) { 98 } else if (!list_empty(&async_pending)) {
98 entry = list_first_entry(running, 99 entry = list_first_entry(&async_pending,
99 struct async_entry, list); 100 struct async_entry, list);
100 return entry->cookie; 101 return entry->cookie;
101 } else { 102 } else {
@@ -104,6 +105,17 @@ static async_cookie_t __lowest_in_progress(struct list_head *running)
104 } 105 }
105 106
106} 107}
108
109static async_cookie_t lowest_in_progress(struct list_head *running)
110{
111 unsigned long flags;
112 async_cookie_t ret;
113
114 spin_lock_irqsave(&async_lock, flags);
115 ret = __lowest_in_progress(running);
116 spin_unlock_irqrestore(&async_lock, flags);
117 return ret;
118}
107/* 119/*
108 * pick the first pending entry and run it 120 * pick the first pending entry and run it
109 */ 121 */
@@ -121,21 +133,23 @@ static void run_one_entry(void)
121 entry = list_first_entry(&async_pending, struct async_entry, list); 133 entry = list_first_entry(&async_pending, struct async_entry, list);
122 134
123 /* 2) move it to the running queue */ 135 /* 2) move it to the running queue */
124 list_del(&entry->list); 136 list_move_tail(&entry->list, entry->running);
125 list_add_tail(&entry->list, &async_running);
126 spin_unlock_irqrestore(&async_lock, flags); 137 spin_unlock_irqrestore(&async_lock, flags);
127 138
128 /* 3) run it (and print duration)*/ 139 /* 3) run it (and print duration)*/
129 if (initcall_debug && system_state == SYSTEM_BOOTING) { 140 if (initcall_debug && system_state == SYSTEM_BOOTING) {
130 printk("calling %lli_%pF @ %i\n", entry->cookie, entry->func, task_pid_nr(current)); 141 printk("calling %lli_%pF @ %i\n", (long long)entry->cookie,
142 entry->func, task_pid_nr(current));
131 calltime = ktime_get(); 143 calltime = ktime_get();
132 } 144 }
133 entry->func(entry->data, entry->cookie); 145 entry->func(entry->data, entry->cookie);
134 if (initcall_debug && system_state == SYSTEM_BOOTING) { 146 if (initcall_debug && system_state == SYSTEM_BOOTING) {
135 rettime = ktime_get(); 147 rettime = ktime_get();
136 delta = ktime_sub(rettime, calltime); 148 delta = ktime_sub(rettime, calltime);
137 printk("initcall %lli_%pF returned 0 after %lld usecs\n", entry->cookie, 149 printk("initcall %lli_%pF returned 0 after %lld usecs\n",
138 entry->func, ktime_to_ns(delta) >> 10); 150 (long long)entry->cookie,
151 entry->func,
152 (long long)ktime_to_ns(delta) >> 10);
139 } 153 }
140 154
141 /* 4) remove it from the running queue */ 155 /* 4) remove it from the running queue */
@@ -194,18 +208,44 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct l
194 return newcookie; 208 return newcookie;
195} 209}
196 210
211/**
212 * async_schedule - schedule a function for asynchronous execution
213 * @ptr: function to execute asynchronously
214 * @data: data pointer to pass to the function
215 *
216 * Returns an async_cookie_t that may be used for checkpointing later.
217 * Note: This function may be called from atomic or non-atomic contexts.
218 */
197async_cookie_t async_schedule(async_func_ptr *ptr, void *data) 219async_cookie_t async_schedule(async_func_ptr *ptr, void *data)
198{ 220{
199 return __async_schedule(ptr, data, &async_pending); 221 return __async_schedule(ptr, data, &async_running);
200} 222}
201EXPORT_SYMBOL_GPL(async_schedule); 223EXPORT_SYMBOL_GPL(async_schedule);
202 224
203async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *running) 225/**
226 * async_schedule_domain - schedule a function for asynchronous execution within a certain domain
227 * @ptr: function to execute asynchronously
228 * @data: data pointer to pass to the function
229 * @running: running list for the domain
230 *
231 * Returns an async_cookie_t that may be used for checkpointing later.
232 * @running may be used in the async_synchronize_*_domain() functions
233 * to wait within a certain synchronization domain rather than globally.
234 * A synchronization domain is specified via the running queue @running to use.
235 * Note: This function may be called from atomic or non-atomic contexts.
236 */
237async_cookie_t async_schedule_domain(async_func_ptr *ptr, void *data,
238 struct list_head *running)
204{ 239{
205 return __async_schedule(ptr, data, running); 240 return __async_schedule(ptr, data, running);
206} 241}
207EXPORT_SYMBOL_GPL(async_schedule_special); 242EXPORT_SYMBOL_GPL(async_schedule_domain);
208 243
244/**
245 * async_synchronize_full - synchronize all asynchronous function calls
246 *
247 * This function waits until all asynchronous function calls have been done.
248 */
209void async_synchronize_full(void) 249void async_synchronize_full(void)
210{ 250{
211 do { 251 do {
@@ -214,13 +254,30 @@ void async_synchronize_full(void)
214} 254}
215EXPORT_SYMBOL_GPL(async_synchronize_full); 255EXPORT_SYMBOL_GPL(async_synchronize_full);
216 256
217void async_synchronize_full_special(struct list_head *list) 257/**
258 * async_synchronize_full_domain - synchronize all asynchronous function within a certain domain
259 * @list: running list to synchronize on
260 *
261 * This function waits until all asynchronous function calls for the
262 * synchronization domain specified by the running list @list have been done.
263 */
264void async_synchronize_full_domain(struct list_head *list)
218{ 265{
219 async_synchronize_cookie_special(next_cookie, list); 266 async_synchronize_cookie_domain(next_cookie, list);
220} 267}
221EXPORT_SYMBOL_GPL(async_synchronize_full_special); 268EXPORT_SYMBOL_GPL(async_synchronize_full_domain);
222 269
223void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *running) 270/**
271 * async_synchronize_cookie_domain - synchronize asynchronous function calls within a certain domain with cookie checkpointing
272 * @cookie: async_cookie_t to use as checkpoint
273 * @running: running list to synchronize on
274 *
275 * This function waits until all asynchronous function calls for the
276 * synchronization domain specified by the running list @list submitted
277 * prior to @cookie have been done.
278 */
279void async_synchronize_cookie_domain(async_cookie_t cookie,
280 struct list_head *running)
224{ 281{
225 ktime_t starttime, delta, endtime; 282 ktime_t starttime, delta, endtime;
226 283
@@ -229,21 +286,29 @@ void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *r
229 starttime = ktime_get(); 286 starttime = ktime_get();
230 } 287 }
231 288
232 wait_event(async_done, __lowest_in_progress(running) >= cookie); 289 wait_event(async_done, lowest_in_progress(running) >= cookie);
233 290
234 if (initcall_debug && system_state == SYSTEM_BOOTING) { 291 if (initcall_debug && system_state == SYSTEM_BOOTING) {
235 endtime = ktime_get(); 292 endtime = ktime_get();
236 delta = ktime_sub(endtime, starttime); 293 delta = ktime_sub(endtime, starttime);
237 294
238 printk("async_continuing @ %i after %lli usec\n", 295 printk("async_continuing @ %i after %lli usec\n",
239 task_pid_nr(current), ktime_to_ns(delta) >> 10); 296 task_pid_nr(current),
297 (long long)ktime_to_ns(delta) >> 10);
240 } 298 }
241} 299}
242EXPORT_SYMBOL_GPL(async_synchronize_cookie_special); 300EXPORT_SYMBOL_GPL(async_synchronize_cookie_domain);
243 301
302/**
303 * async_synchronize_cookie - synchronize asynchronous function calls with cookie checkpointing
304 * @cookie: async_cookie_t to use as checkpoint
305 *
306 * This function waits until all asynchronous function calls prior to @cookie
307 * have been done.
308 */
244void async_synchronize_cookie(async_cookie_t cookie) 309void async_synchronize_cookie(async_cookie_t cookie)
245{ 310{
246 async_synchronize_cookie_special(cookie, &async_running); 311 async_synchronize_cookie_domain(cookie, &async_running);
247} 312}
248EXPORT_SYMBOL_GPL(async_synchronize_cookie); 313EXPORT_SYMBOL_GPL(async_synchronize_cookie);
249 314
@@ -304,7 +369,11 @@ static int async_manager_thread(void *unused)
304 ec = atomic_read(&entry_count); 369 ec = atomic_read(&entry_count);
305 370
306 while (tc < ec && tc < MAX_THREADS) { 371 while (tc < ec && tc < MAX_THREADS) {
307 kthread_run(async_thread, NULL, "async/%i", tc); 372 if (IS_ERR(kthread_run(async_thread, NULL, "async/%i",
373 tc))) {
374 msleep(100);
375 continue;
376 }
308 atomic_inc(&thread_count); 377 atomic_inc(&thread_count);
309 tc++; 378 tc++;
310 } 379 }
@@ -319,7 +388,9 @@ static int async_manager_thread(void *unused)
319static int __init async_init(void) 388static int __init async_init(void)
320{ 389{
321 if (async_enabled) 390 if (async_enabled)
322 kthread_run(async_manager_thread, NULL, "async/mgr"); 391 if (IS_ERR(kthread_run(async_manager_thread, NULL,
392 "async/mgr")))
393 async_enabled = 0;
323 return 0; 394 return 0;
324} 395}
325 396
diff --git a/kernel/capability.c b/kernel/capability.c
index 688926e496be..4e17041963f5 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -161,7 +161,7 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
161 * 161 *
162 * Returns 0 on success and < 0 on error. 162 * Returns 0 on success and < 0 on error.
163 */ 163 */
164asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) 164SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
165{ 165{
166 int ret = 0; 166 int ret = 0;
167 pid_t pid; 167 pid_t pid;
@@ -235,7 +235,7 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
235 * 235 *
236 * Returns 0 on success and < 0 on error. 236 * Returns 0 on success and < 0 on error.
237 */ 237 */
238asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) 238SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
239{ 239{
240 struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; 240 struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
241 unsigned i, tocopy; 241 unsigned i, tocopy;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c29831076e7a..9edb5c4b79b4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1115,13 +1115,15 @@ static void cgroup_kill_sb(struct super_block *sb) {
1115 } 1115 }
1116 write_unlock(&css_set_lock); 1116 write_unlock(&css_set_lock);
1117 1117
1118 list_del(&root->root_list); 1118 if (!list_empty(&root->root_list)) {
1119 root_count--; 1119 list_del(&root->root_list);
1120 root_count--;
1121 }
1120 1122
1121 mutex_unlock(&cgroup_mutex); 1123 mutex_unlock(&cgroup_mutex);
1122 1124
1123 kfree(root);
1124 kill_litter_super(sb); 1125 kill_litter_super(sb);
1126 kfree(root);
1125} 1127}
1126 1128
1127static struct file_system_type cgroup_fs_type = { 1129static struct file_system_type cgroup_fs_type = {
@@ -2349,7 +2351,7 @@ static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
2349 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 2351 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2350 struct cgroup_subsys *ss = subsys[i]; 2352 struct cgroup_subsys *ss = subsys[i];
2351 if (ss->root == root) 2353 if (ss->root == root)
2352 mutex_lock_nested(&ss->hierarchy_mutex, i); 2354 mutex_lock(&ss->hierarchy_mutex);
2353 } 2355 }
2354} 2356}
2355 2357
@@ -2434,7 +2436,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
2434 2436
2435 err_remove: 2437 err_remove:
2436 2438
2439 cgroup_lock_hierarchy(root);
2437 list_del(&cgrp->sibling); 2440 list_del(&cgrp->sibling);
2441 cgroup_unlock_hierarchy(root);
2438 root->number_of_cgroups--; 2442 root->number_of_cgroups--;
2439 2443
2440 err_destroy: 2444 err_destroy:
@@ -2507,7 +2511,7 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp)
2507 for_each_subsys(cgrp->root, ss) { 2511 for_each_subsys(cgrp->root, ss) {
2508 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; 2512 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
2509 int refcnt; 2513 int refcnt;
2510 do { 2514 while (1) {
2511 /* We can only remove a CSS with a refcnt==1 */ 2515 /* We can only remove a CSS with a refcnt==1 */
2512 refcnt = atomic_read(&css->refcnt); 2516 refcnt = atomic_read(&css->refcnt);
2513 if (refcnt > 1) { 2517 if (refcnt > 1) {
@@ -2521,7 +2525,10 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp)
2521 * css_tryget() to spin until we set the 2525 * css_tryget() to spin until we set the
2522 * CSS_REMOVED bits or abort 2526 * CSS_REMOVED bits or abort
2523 */ 2527 */
2524 } while (atomic_cmpxchg(&css->refcnt, refcnt, 0) != refcnt); 2528 if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt)
2529 break;
2530 cpu_relax();
2531 }
2525 } 2532 }
2526 done: 2533 done:
2527 for_each_subsys(cgrp->root, ss) { 2534 for_each_subsys(cgrp->root, ss) {
@@ -2630,6 +2637,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
2630 BUG_ON(!list_empty(&init_task.tasks)); 2637 BUG_ON(!list_empty(&init_task.tasks));
2631 2638
2632 mutex_init(&ss->hierarchy_mutex); 2639 mutex_init(&ss->hierarchy_mutex);
2640 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
2633 ss->active = 1; 2641 ss->active = 1;
2634} 2642}
2635 2643
@@ -2991,20 +2999,21 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
2991 mutex_unlock(&cgroup_mutex); 2999 mutex_unlock(&cgroup_mutex);
2992 return 0; 3000 return 0;
2993 } 3001 }
2994 task_lock(tsk);
2995 cg = tsk->cgroups;
2996 parent = task_cgroup(tsk, subsys->subsys_id);
2997 3002
2998 /* Pin the hierarchy */ 3003 /* Pin the hierarchy */
2999 if (!atomic_inc_not_zero(&parent->root->sb->s_active)) { 3004 if (!atomic_inc_not_zero(&root->sb->s_active)) {
3000 /* We race with the final deactivate_super() */ 3005 /* We race with the final deactivate_super() */
3001 mutex_unlock(&cgroup_mutex); 3006 mutex_unlock(&cgroup_mutex);
3002 return 0; 3007 return 0;
3003 } 3008 }
3004 3009
3005 /* Keep the cgroup alive */ 3010 /* Keep the cgroup alive */
3011 task_lock(tsk);
3012 parent = task_cgroup(tsk, subsys->subsys_id);
3013 cg = tsk->cgroups;
3006 get_css_set(cg); 3014 get_css_set(cg);
3007 task_unlock(tsk); 3015 task_unlock(tsk);
3016
3008 mutex_unlock(&cgroup_mutex); 3017 mutex_unlock(&cgroup_mutex);
3009 3018
3010 /* Now do the VFS work to create a cgroup */ 3019 /* Now do the VFS work to create a cgroup */
@@ -3043,7 +3052,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
3043 mutex_unlock(&inode->i_mutex); 3052 mutex_unlock(&inode->i_mutex);
3044 put_css_set(cg); 3053 put_css_set(cg);
3045 3054
3046 deactivate_super(parent->root->sb); 3055 deactivate_super(root->sb);
3047 /* The cgroup is still accessible in the VFS, but 3056 /* The cgroup is still accessible in the VFS, but
3048 * we're not going to try to rmdir() it at this 3057 * we're not going to try to rmdir() it at this
3049 * point. */ 3058 * point. */
@@ -3069,7 +3078,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
3069 mutex_lock(&cgroup_mutex); 3078 mutex_lock(&cgroup_mutex);
3070 put_css_set(cg); 3079 put_css_set(cg);
3071 mutex_unlock(&cgroup_mutex); 3080 mutex_unlock(&cgroup_mutex);
3072 deactivate_super(parent->root->sb); 3081 deactivate_super(root->sb);
3073 return ret; 3082 return ret;
3074} 3083}
3075 3084
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 647c77a88fcb..f76db9dcaa05 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -61,6 +61,14 @@
61#include <linux/cgroup.h> 61#include <linux/cgroup.h>
62 62
63/* 63/*
64 * Workqueue for cpuset related tasks.
65 *
66 * Using kevent workqueue may cause deadlock when memory_migrate
67 * is set. So we create a separate workqueue thread for cpuset.
68 */
69static struct workqueue_struct *cpuset_wq;
70
71/*
64 * Tracks how many cpusets are currently defined in system. 72 * Tracks how many cpusets are currently defined in system.
65 * When there is only one cpuset (the root cpuset) we can 73 * When there is only one cpuset (the root cpuset) we can
66 * short circuit some hooks. 74 * short circuit some hooks.
@@ -568,7 +576,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
568 * load balancing domains (sched domains) as specified by that partial 576 * load balancing domains (sched domains) as specified by that partial
569 * partition. 577 * partition.
570 * 578 *
571 * See "What is sched_load_balance" in Documentation/cpusets.txt 579 * See "What is sched_load_balance" in Documentation/cgroups/cpusets.txt
572 * for a background explanation of this. 580 * for a background explanation of this.
573 * 581 *
574 * Does not return errors, on the theory that the callers of this 582 * Does not return errors, on the theory that the callers of this
@@ -831,7 +839,7 @@ static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains);
831 */ 839 */
832static void async_rebuild_sched_domains(void) 840static void async_rebuild_sched_domains(void)
833{ 841{
834 schedule_work(&rebuild_sched_domains_work); 842 queue_work(cpuset_wq, &rebuild_sched_domains_work);
835} 843}
836 844
837/* 845/*
@@ -2111,6 +2119,9 @@ void __init cpuset_init_smp(void)
2111 2119
2112 hotcpu_notifier(cpuset_track_online_cpus, 0); 2120 hotcpu_notifier(cpuset_track_online_cpus, 0);
2113 hotplug_memory_notifier(cpuset_track_online_nodes, 10); 2121 hotplug_memory_notifier(cpuset_track_online_nodes, 10);
2122
2123 cpuset_wq = create_singlethread_workqueue("cpuset");
2124 BUG_ON(!cpuset_wq);
2114} 2125}
2115 2126
2116/** 2127/**
diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c
index 038707404b76..962a3b574f21 100644
--- a/kernel/dma-coherent.c
+++ b/kernel/dma-coherent.c
@@ -98,7 +98,7 @@ EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
98 * @size: size of requested memory area 98 * @size: size of requested memory area
99 * @dma_handle: This will be filled with the correct dma handle 99 * @dma_handle: This will be filled with the correct dma handle
100 * @ret: This pointer will be filled with the virtual address 100 * @ret: This pointer will be filled with the virtual address
101 * to allocated area. 101 * to allocated area.
102 * 102 *
103 * This function should be only called from per-arch dma_alloc_coherent() 103 * This function should be only called from per-arch dma_alloc_coherent()
104 * to support allocation from per-device coherent memory pools. 104 * to support allocation from per-device coherent memory pools.
@@ -118,31 +118,32 @@ int dma_alloc_from_coherent(struct device *dev, ssize_t size,
118 mem = dev->dma_mem; 118 mem = dev->dma_mem;
119 if (!mem) 119 if (!mem)
120 return 0; 120 return 0;
121 if (unlikely(size > mem->size)) 121
122 return 0; 122 *ret = NULL;
123
124 if (unlikely(size > (mem->size << PAGE_SHIFT)))
125 goto err;
123 126
124 pageno = bitmap_find_free_region(mem->bitmap, mem->size, order); 127 pageno = bitmap_find_free_region(mem->bitmap, mem->size, order);
125 if (pageno >= 0) { 128 if (unlikely(pageno < 0))
126 /* 129 goto err;
127 * Memory was found in the per-device arena. 130
128 */ 131 /*
129 *dma_handle = mem->device_base + (pageno << PAGE_SHIFT); 132 * Memory was found in the per-device area.
130 *ret = mem->virt_base + (pageno << PAGE_SHIFT); 133 */
131 memset(*ret, 0, size); 134 *dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
132 } else if (mem->flags & DMA_MEMORY_EXCLUSIVE) { 135 *ret = mem->virt_base + (pageno << PAGE_SHIFT);
133 /* 136 memset(*ret, 0, size);
134 * The per-device arena is exhausted and we are not 137
135 * permitted to fall back to generic memory.
136 */
137 *ret = NULL;
138 } else {
139 /*
140 * The per-device arena is exhausted and we are
141 * permitted to fall back to generic memory.
142 */
143 return 0;
144 }
145 return 1; 138 return 1;
139
140err:
141 /*
142 * In the case where the allocation can not be satisfied from the
143 * per-device area, try to fall back to generic memory if the
144 * constraints allow it.
145 */
146 return mem->flags & DMA_MEMORY_EXCLUSIVE;
146} 147}
147EXPORT_SYMBOL(dma_alloc_from_coherent); 148EXPORT_SYMBOL(dma_alloc_from_coherent);
148 149
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index 0511716e9424..667c841c2952 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -209,8 +209,7 @@ static int __init proc_execdomains_init(void)
209module_init(proc_execdomains_init); 209module_init(proc_execdomains_init);
210#endif 210#endif
211 211
212asmlinkage long 212SYSCALL_DEFINE1(personality, u_long, personality)
213sys_personality(u_long personality)
214{ 213{
215 u_long old = current->personality; 214 u_long old = current->personality;
216 215
diff --git a/kernel/exit.c b/kernel/exit.c
index c7740fa3252c..efd30ccf3858 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -118,6 +118,8 @@ static void __exit_signal(struct task_struct *tsk)
118 * We won't ever get here for the group leader, since it 118 * We won't ever get here for the group leader, since it
119 * will have been the last reference on the signal_struct. 119 * will have been the last reference on the signal_struct.
120 */ 120 */
121 sig->utime = cputime_add(sig->utime, task_utime(tsk));
122 sig->stime = cputime_add(sig->stime, task_stime(tsk));
121 sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); 123 sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
122 sig->min_flt += tsk->min_flt; 124 sig->min_flt += tsk->min_flt;
123 sig->maj_flt += tsk->maj_flt; 125 sig->maj_flt += tsk->maj_flt;
@@ -126,6 +128,7 @@ static void __exit_signal(struct task_struct *tsk)
126 sig->inblock += task_io_get_inblock(tsk); 128 sig->inblock += task_io_get_inblock(tsk);
127 sig->oublock += task_io_get_oublock(tsk); 129 sig->oublock += task_io_get_oublock(tsk);
128 task_io_accounting_add(&sig->ioac, &tsk->ioac); 130 task_io_accounting_add(&sig->ioac, &tsk->ioac);
131 sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
129 sig = NULL; /* Marker for below. */ 132 sig = NULL; /* Marker for below. */
130 } 133 }
131 134
@@ -1141,7 +1144,7 @@ NORET_TYPE void complete_and_exit(struct completion *comp, long code)
1141 1144
1142EXPORT_SYMBOL(complete_and_exit); 1145EXPORT_SYMBOL(complete_and_exit);
1143 1146
1144asmlinkage long sys_exit(int error_code) 1147SYSCALL_DEFINE1(exit, int, error_code)
1145{ 1148{
1146 do_exit((error_code&0xff)<<8); 1149 do_exit((error_code&0xff)<<8);
1147} 1150}
@@ -1182,9 +1185,11 @@ do_group_exit(int exit_code)
1182 * wait4()-ing process will get the correct exit code - even if this 1185 * wait4()-ing process will get the correct exit code - even if this
1183 * thread is not the thread group leader. 1186 * thread is not the thread group leader.
1184 */ 1187 */
1185asmlinkage void sys_exit_group(int error_code) 1188SYSCALL_DEFINE1(exit_group, int, error_code)
1186{ 1189{
1187 do_group_exit((error_code & 0xff) << 8); 1190 do_group_exit((error_code & 0xff) << 8);
1191 /* NOTREACHED */
1192 return 0;
1188} 1193}
1189 1194
1190static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) 1195static struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
@@ -1752,9 +1757,8 @@ end:
1752 return retval; 1757 return retval;
1753} 1758}
1754 1759
1755asmlinkage long sys_waitid(int which, pid_t upid, 1760SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
1756 struct siginfo __user *infop, int options, 1761 infop, int, options, struct rusage __user *, ru)
1757 struct rusage __user *ru)
1758{ 1762{
1759 struct pid *pid = NULL; 1763 struct pid *pid = NULL;
1760 enum pid_type type; 1764 enum pid_type type;
@@ -1793,8 +1797,8 @@ asmlinkage long sys_waitid(int which, pid_t upid,
1793 return ret; 1797 return ret;
1794} 1798}
1795 1799
1796asmlinkage long sys_wait4(pid_t upid, int __user *stat_addr, 1800SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
1797 int options, struct rusage __user *ru) 1801 int, options, struct rusage __user *, ru)
1798{ 1802{
1799 struct pid *pid = NULL; 1803 struct pid *pid = NULL;
1800 enum pid_type type; 1804 enum pid_type type;
@@ -1831,7 +1835,7 @@ asmlinkage long sys_wait4(pid_t upid, int __user *stat_addr,
1831 * sys_waitpid() remains for compatibility. waitpid() should be 1835 * sys_waitpid() remains for compatibility. waitpid() should be
1832 * implemented by calling sys_wait4() from libc.a. 1836 * implemented by calling sys_wait4() from libc.a.
1833 */ 1837 */
1834asmlinkage long sys_waitpid(pid_t pid, int __user *stat_addr, int options) 1838SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options)
1835{ 1839{
1836 return sys_wait4(pid, stat_addr, options, NULL); 1840 return sys_wait4(pid, stat_addr, options, NULL);
1837} 1841}
diff --git a/kernel/fork.c b/kernel/fork.c
index 1d68f1255dd8..4854c2c4a82e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -817,17 +817,17 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
817static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) 817static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
818{ 818{
819 struct signal_struct *sig; 819 struct signal_struct *sig;
820 int ret;
821 820
822 if (clone_flags & CLONE_THREAD) { 821 if (clone_flags & CLONE_THREAD) {
823 ret = thread_group_cputime_clone_thread(current); 822 atomic_inc(&current->signal->count);
824 if (likely(!ret)) { 823 atomic_inc(&current->signal->live);
825 atomic_inc(&current->signal->count); 824 return 0;
826 atomic_inc(&current->signal->live);
827 }
828 return ret;
829 } 825 }
830 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); 826 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
827
828 if (sig)
829 posix_cpu_timers_init_group(sig);
830
831 tsk->signal = sig; 831 tsk->signal = sig;
832 if (!sig) 832 if (!sig)
833 return -ENOMEM; 833 return -ENOMEM;
@@ -851,21 +851,20 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
851 sig->tty_old_pgrp = NULL; 851 sig->tty_old_pgrp = NULL;
852 sig->tty = NULL; 852 sig->tty = NULL;
853 853
854 sig->cutime = sig->cstime = cputime_zero; 854 sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
855 sig->gtime = cputime_zero; 855 sig->gtime = cputime_zero;
856 sig->cgtime = cputime_zero; 856 sig->cgtime = cputime_zero;
857 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; 857 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
858 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; 858 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
859 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; 859 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
860 task_io_accounting_init(&sig->ioac); 860 task_io_accounting_init(&sig->ioac);
861 sig->sum_sched_runtime = 0;
861 taskstats_tgid_init(sig); 862 taskstats_tgid_init(sig);
862 863
863 task_lock(current->group_leader); 864 task_lock(current->group_leader);
864 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); 865 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
865 task_unlock(current->group_leader); 866 task_unlock(current->group_leader);
866 867
867 posix_cpu_timers_init_group(sig);
868
869 acct_init_pacct(&sig->pacct); 868 acct_init_pacct(&sig->pacct);
870 869
871 tty_audit_fork(sig); 870 tty_audit_fork(sig);
@@ -901,7 +900,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
901 clear_freeze_flag(p); 900 clear_freeze_flag(p);
902} 901}
903 902
904asmlinkage long sys_set_tid_address(int __user *tidptr) 903SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
905{ 904{
906 current->clear_child_tid = tidptr; 905 current->clear_child_tid = tidptr;
907 906
@@ -1007,6 +1006,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1007 * triggers too late. This doesn't hurt, the check is only there 1006 * triggers too late. This doesn't hurt, the check is only there
1008 * to stop root fork bombs. 1007 * to stop root fork bombs.
1009 */ 1008 */
1009 retval = -EAGAIN;
1010 if (nr_threads >= max_threads) 1010 if (nr_threads >= max_threads)
1011 goto bad_fork_cleanup_count; 1011 goto bad_fork_cleanup_count;
1012 1012
@@ -1095,7 +1095,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1095#ifdef CONFIG_DEBUG_MUTEXES 1095#ifdef CONFIG_DEBUG_MUTEXES
1096 p->blocked_on = NULL; /* not blocked yet */ 1096 p->blocked_on = NULL; /* not blocked yet */
1097#endif 1097#endif
1098 if (unlikely(ptrace_reparented(current))) 1098 if (unlikely(current->ptrace))
1099 ptrace_fork(p, clone_flags); 1099 ptrace_fork(p, clone_flags);
1100 1100
1101 /* Perform scheduler related setup. Assign this task to a CPU. */ 1101 /* Perform scheduler related setup. Assign this task to a CPU. */
@@ -1179,10 +1179,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1179#endif 1179#endif
1180 clear_all_latency_tracing(p); 1180 clear_all_latency_tracing(p);
1181 1181
1182 /* Our parent execution domain becomes current domain
1183 These must match for thread signalling to apply */
1184 p->parent_exec_id = p->self_exec_id;
1185
1186 /* ok, now we should be set up.. */ 1182 /* ok, now we should be set up.. */
1187 p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); 1183 p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
1188 p->pdeath_signal = 0; 1184 p->pdeath_signal = 0;
@@ -1220,10 +1216,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1220 set_task_cpu(p, smp_processor_id()); 1216 set_task_cpu(p, smp_processor_id());
1221 1217
1222 /* CLONE_PARENT re-uses the old parent */ 1218 /* CLONE_PARENT re-uses the old parent */
1223 if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) 1219 if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
1224 p->real_parent = current->real_parent; 1220 p->real_parent = current->real_parent;
1225 else 1221 p->parent_exec_id = current->parent_exec_id;
1222 } else {
1226 p->real_parent = current; 1223 p->real_parent = current;
1224 p->parent_exec_id = current->self_exec_id;
1225 }
1227 1226
1228 spin_lock(&current->sighand->siglock); 1227 spin_lock(&current->sighand->siglock);
1229 1228
@@ -1603,7 +1602,7 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp
1603 * constructed. Here we are modifying the current, active, 1602 * constructed. Here we are modifying the current, active,
1604 * task_struct. 1603 * task_struct.
1605 */ 1604 */
1606asmlinkage long sys_unshare(unsigned long unshare_flags) 1605SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1607{ 1606{
1608 int err = 0; 1607 int err = 0;
1609 struct fs_struct *fs, *new_fs = NULL; 1608 struct fs_struct *fs, *new_fs = NULL;
diff --git a/kernel/futex.c b/kernel/futex.c
index 002aa189eb09..438701adce23 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1165,6 +1165,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
1165 u32 val, ktime_t *abs_time, u32 bitset, int clockrt) 1165 u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
1166{ 1166{
1167 struct task_struct *curr = current; 1167 struct task_struct *curr = current;
1168 struct restart_block *restart;
1168 DECLARE_WAITQUEUE(wait, curr); 1169 DECLARE_WAITQUEUE(wait, curr);
1169 struct futex_hash_bucket *hb; 1170 struct futex_hash_bucket *hb;
1170 struct futex_q q; 1171 struct futex_q q;
@@ -1216,11 +1217,13 @@ retry:
1216 1217
1217 if (!ret) 1218 if (!ret)
1218 goto retry; 1219 goto retry;
1219 return ret; 1220 goto out;
1220 } 1221 }
1221 ret = -EWOULDBLOCK; 1222 ret = -EWOULDBLOCK;
1222 if (uval != val) 1223 if (unlikely(uval != val)) {
1223 goto out_unlock_put_key; 1224 queue_unlock(&q, hb);
1225 goto out_put_key;
1226 }
1224 1227
1225 /* Only actually queue if *uaddr contained val. */ 1228 /* Only actually queue if *uaddr contained val. */
1226 queue_me(&q, hb); 1229 queue_me(&q, hb);
@@ -1284,38 +1287,38 @@ retry:
1284 */ 1287 */
1285 1288
1286 /* If we were woken (and unqueued), we succeeded, whatever. */ 1289 /* If we were woken (and unqueued), we succeeded, whatever. */
1290 ret = 0;
1287 if (!unqueue_me(&q)) 1291 if (!unqueue_me(&q))
1288 return 0; 1292 goto out_put_key;
1293 ret = -ETIMEDOUT;
1289 if (rem) 1294 if (rem)
1290 return -ETIMEDOUT; 1295 goto out_put_key;
1291 1296
1292 /* 1297 /*
1293 * We expect signal_pending(current), but another thread may 1298 * We expect signal_pending(current), but another thread may
1294 * have handled it for us already. 1299 * have handled it for us already.
1295 */ 1300 */
1301 ret = -ERESTARTSYS;
1296 if (!abs_time) 1302 if (!abs_time)
1297 return -ERESTARTSYS; 1303 goto out_put_key;
1298 else {
1299 struct restart_block *restart;
1300 restart = &current_thread_info()->restart_block;
1301 restart->fn = futex_wait_restart;
1302 restart->futex.uaddr = (u32 *)uaddr;
1303 restart->futex.val = val;
1304 restart->futex.time = abs_time->tv64;
1305 restart->futex.bitset = bitset;
1306 restart->futex.flags = 0;
1307
1308 if (fshared)
1309 restart->futex.flags |= FLAGS_SHARED;
1310 if (clockrt)
1311 restart->futex.flags |= FLAGS_CLOCKRT;
1312 return -ERESTART_RESTARTBLOCK;
1313 }
1314 1304
1315out_unlock_put_key: 1305 restart = &current_thread_info()->restart_block;
1316 queue_unlock(&q, hb); 1306 restart->fn = futex_wait_restart;
1317 put_futex_key(fshared, &q.key); 1307 restart->futex.uaddr = (u32 *)uaddr;
1308 restart->futex.val = val;
1309 restart->futex.time = abs_time->tv64;
1310 restart->futex.bitset = bitset;
1311 restart->futex.flags = 0;
1312
1313 if (fshared)
1314 restart->futex.flags |= FLAGS_SHARED;
1315 if (clockrt)
1316 restart->futex.flags |= FLAGS_CLOCKRT;
1318 1317
1318 ret = -ERESTART_RESTARTBLOCK;
1319
1320out_put_key:
1321 put_futex_key(fshared, &q.key);
1319out: 1322out:
1320 return ret; 1323 return ret;
1321} 1324}
@@ -1733,9 +1736,8 @@ pi_faulted:
1733 * @head: pointer to the list-head 1736 * @head: pointer to the list-head
1734 * @len: length of the list-head, as userspace expects 1737 * @len: length of the list-head, as userspace expects
1735 */ 1738 */
1736asmlinkage long 1739SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
1737sys_set_robust_list(struct robust_list_head __user *head, 1740 size_t, len)
1738 size_t len)
1739{ 1741{
1740 if (!futex_cmpxchg_enabled) 1742 if (!futex_cmpxchg_enabled)
1741 return -ENOSYS; 1743 return -ENOSYS;
@@ -1756,9 +1758,9 @@ sys_set_robust_list(struct robust_list_head __user *head,
1756 * @head_ptr: pointer to a list-head pointer, the kernel fills it in 1758 * @head_ptr: pointer to a list-head pointer, the kernel fills it in
1757 * @len_ptr: pointer to a length field, the kernel fills in the header size 1759 * @len_ptr: pointer to a length field, the kernel fills in the header size
1758 */ 1760 */
1759asmlinkage long 1761SYSCALL_DEFINE3(get_robust_list, int, pid,
1760sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, 1762 struct robust_list_head __user * __user *, head_ptr,
1761 size_t __user *len_ptr) 1763 size_t __user *, len_ptr)
1762{ 1764{
1763 struct robust_list_head __user *head; 1765 struct robust_list_head __user *head;
1764 unsigned long ret; 1766 unsigned long ret;
@@ -1978,9 +1980,9 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
1978} 1980}
1979 1981
1980 1982
1981asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, 1983SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
1982 struct timespec __user *utime, u32 __user *uaddr2, 1984 struct timespec __user *, utime, u32 __user *, uaddr2,
1983 u32 val3) 1985 u32, val3)
1984{ 1986{
1985 struct timespec ts; 1987 struct timespec ts;
1986 ktime_t t, *tp = NULL; 1988 ktime_t t, *tp = NULL;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 1455b7651b6b..f394d2a42ca3 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -501,6 +501,13 @@ static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base)
501 continue; 501 continue;
502 timer = rb_entry(base->first, struct hrtimer, node); 502 timer = rb_entry(base->first, struct hrtimer, node);
503 expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 503 expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
504 /*
505 * clock_was_set() has changed base->offset so the
506 * result might be negative. Fix it up to prevent a
507 * false positive in clockevents_program_event()
508 */
509 if (expires.tv64 < 0)
510 expires.tv64 = 0;
504 if (expires.tv64 < cpu_base->expires_next.tv64) 511 if (expires.tv64 < cpu_base->expires_next.tv64)
505 cpu_base->expires_next = expires; 512 cpu_base->expires_next = expires;
506 } 513 }
@@ -614,7 +621,9 @@ void clock_was_set(void)
614 */ 621 */
615void hres_timers_resume(void) 622void hres_timers_resume(void)
616{ 623{
617 /* Retrigger the CPU local events: */ 624 WARN_ONCE(!irqs_disabled(),
625 KERN_INFO "hres_timers_resume() called with IRQs enabled!");
626
618 retrigger_next_event(NULL); 627 retrigger_next_event(NULL);
619} 628}
620 629
@@ -1156,6 +1165,29 @@ static void __run_hrtimer(struct hrtimer *timer)
1156 1165
1157#ifdef CONFIG_HIGH_RES_TIMERS 1166#ifdef CONFIG_HIGH_RES_TIMERS
1158 1167
1168static int force_clock_reprogram;
1169
1170/*
1171 * After 5 iteration's attempts, we consider that hrtimer_interrupt()
1172 * is hanging, which could happen with something that slows the interrupt
1173 * such as the tracing. Then we force the clock reprogramming for each future
1174 * hrtimer interrupts to avoid infinite loops and use the min_delta_ns
1175 * threshold that we will overwrite.
1176 * The next tick event will be scheduled to 3 times we currently spend on
1177 * hrtimer_interrupt(). This gives a good compromise, the cpus will spend
1178 * 1/4 of their time to process the hrtimer interrupts. This is enough to
1179 * let it running without serious starvation.
1180 */
1181
1182static inline void
1183hrtimer_interrupt_hanging(struct clock_event_device *dev,
1184 ktime_t try_time)
1185{
1186 force_clock_reprogram = 1;
1187 dev->min_delta_ns = (unsigned long)try_time.tv64 * 3;
1188 printk(KERN_WARNING "hrtimer: interrupt too slow, "
1189 "forcing clock min delta to %lu ns\n", dev->min_delta_ns);
1190}
1159/* 1191/*
1160 * High resolution timer interrupt 1192 * High resolution timer interrupt
1161 * Called with interrupts disabled 1193 * Called with interrupts disabled
@@ -1165,6 +1197,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1165 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); 1197 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1166 struct hrtimer_clock_base *base; 1198 struct hrtimer_clock_base *base;
1167 ktime_t expires_next, now; 1199 ktime_t expires_next, now;
1200 int nr_retries = 0;
1168 int i; 1201 int i;
1169 1202
1170 BUG_ON(!cpu_base->hres_active); 1203 BUG_ON(!cpu_base->hres_active);
@@ -1172,6 +1205,10 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1172 dev->next_event.tv64 = KTIME_MAX; 1205 dev->next_event.tv64 = KTIME_MAX;
1173 1206
1174 retry: 1207 retry:
1208 /* 5 retries is enough to notice a hang */
1209 if (!(++nr_retries % 5))
1210 hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now));
1211
1175 now = ktime_get(); 1212 now = ktime_get();
1176 1213
1177 expires_next.tv64 = KTIME_MAX; 1214 expires_next.tv64 = KTIME_MAX;
@@ -1224,7 +1261,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1224 1261
1225 /* Reprogramming necessary ? */ 1262 /* Reprogramming necessary ? */
1226 if (expires_next.tv64 != KTIME_MAX) { 1263 if (expires_next.tv64 != KTIME_MAX) {
1227 if (tick_program_event(expires_next, 0)) 1264 if (tick_program_event(expires_next, force_clock_reprogram))
1228 goto retry; 1265 goto retry;
1229 } 1266 }
1230} 1267}
@@ -1467,8 +1504,8 @@ out:
1467 return ret; 1504 return ret;
1468} 1505}
1469 1506
1470asmlinkage long 1507SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
1471sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) 1508 struct timespec __user *, rmtp)
1472{ 1509{
1473 struct timespec tu; 1510 struct timespec tu;
1474 1511
@@ -1578,6 +1615,10 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
1578 break; 1615 break;
1579 1616
1580#ifdef CONFIG_HOTPLUG_CPU 1617#ifdef CONFIG_HOTPLUG_CPU
1618 case CPU_DYING:
1619 case CPU_DYING_FROZEN:
1620 clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu);
1621 break;
1581 case CPU_DEAD: 1622 case CPU_DEAD:
1582 case CPU_DEAD_FROZEN: 1623 case CPU_DEAD_FROZEN:
1583 { 1624 {
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index f63c706d25e1..7de11bd64dfe 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -383,6 +383,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
383out_unlock: 383out_unlock:
384 spin_unlock(&desc->lock); 384 spin_unlock(&desc->lock);
385} 385}
386EXPORT_SYMBOL_GPL(handle_level_irq);
386 387
387/** 388/**
388 * handle_fasteoi_irq - irq handler for transparent controllers 389 * handle_fasteoi_irq - irq handler for transparent controllers
@@ -593,6 +594,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
593 } 594 }
594 spin_unlock_irqrestore(&desc->lock, flags); 595 spin_unlock_irqrestore(&desc->lock, flags);
595} 596}
597EXPORT_SYMBOL_GPL(__set_irq_handler);
596 598
597void 599void
598set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip, 600set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index c20db0be9173..3aba8d12f328 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -39,6 +39,18 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
39 ack_bad_irq(irq); 39 ack_bad_irq(irq);
40} 40}
41 41
42#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
43static void __init init_irq_default_affinity(void)
44{
45 alloc_bootmem_cpumask_var(&irq_default_affinity);
46 cpumask_setall(irq_default_affinity);
47}
48#else
49static void __init init_irq_default_affinity(void)
50{
51}
52#endif
53
42/* 54/*
43 * Linux has a controller-independent interrupt architecture. 55 * Linux has a controller-independent interrupt architecture.
44 * Every controller has a 'controller-template', that is used 56 * Every controller has a 'controller-template', that is used
@@ -134,6 +146,8 @@ int __init early_irq_init(void)
134 int legacy_count; 146 int legacy_count;
135 int i; 147 int i;
136 148
149 init_irq_default_affinity();
150
137 desc = irq_desc_legacy; 151 desc = irq_desc_legacy;
138 legacy_count = ARRAY_SIZE(irq_desc_legacy); 152 legacy_count = ARRAY_SIZE(irq_desc_legacy);
139 153
@@ -219,6 +233,8 @@ int __init early_irq_init(void)
219 int count; 233 int count;
220 int i; 234 int i;
221 235
236 init_irq_default_affinity();
237
222 desc = irq_desc; 238 desc = irq_desc;
223 count = ARRAY_SIZE(irq_desc); 239 count = ARRAY_SIZE(irq_desc);
224 240
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index cd0cd8dcb345..291f03664552 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -15,17 +15,9 @@
15 15
16#include "internals.h" 16#include "internals.h"
17 17
18#ifdef CONFIG_SMP 18#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
19cpumask_var_t irq_default_affinity; 19cpumask_var_t irq_default_affinity;
20 20
21static int init_irq_default_affinity(void)
22{
23 alloc_cpumask_var(&irq_default_affinity, GFP_KERNEL);
24 cpumask_setall(irq_default_affinity);
25 return 0;
26}
27core_initcall(init_irq_default_affinity);
28
29/** 21/**
30 * synchronize_irq - wait for pending IRQ handlers (on other CPUs) 22 * synchronize_irq - wait for pending IRQ handlers (on other CPUs)
31 * @irq: interrupt number to wait for 23 * @irq: interrupt number to wait for
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index ecf765c6a77a..acd88356ac76 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -71,7 +71,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
71 desc = irq_desc_ptrs[irq]; 71 desc = irq_desc_ptrs[irq];
72 72
73 if (desc && old_desc != desc) 73 if (desc && old_desc != desc)
74 goto out_unlock; 74 goto out_unlock;
75 75
76 node = cpu_to_node(cpu); 76 node = cpu_to_node(cpu);
77 desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); 77 desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
@@ -84,10 +84,15 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
84 init_copy_one_irq_desc(irq, old_desc, desc, cpu); 84 init_copy_one_irq_desc(irq, old_desc, desc, cpu);
85 85
86 irq_desc_ptrs[irq] = desc; 86 irq_desc_ptrs[irq] = desc;
87 spin_unlock_irqrestore(&sparse_irq_lock, flags);
87 88
88 /* free the old one */ 89 /* free the old one */
89 free_one_irq_desc(old_desc, desc); 90 free_one_irq_desc(old_desc, desc);
91 spin_unlock(&old_desc->lock);
90 kfree(old_desc); 92 kfree(old_desc);
93 spin_lock(&desc->lock);
94
95 return desc;
91 96
92out_unlock: 97out_unlock:
93 spin_unlock_irqrestore(&sparse_irq_lock, flags); 98 spin_unlock_irqrestore(&sparse_irq_lock, flags);
diff --git a/kernel/itimer.c b/kernel/itimer.c
index db7c358b9a02..58762f7077ec 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -62,7 +62,7 @@ int do_getitimer(int which, struct itimerval *value)
62 struct task_cputime cputime; 62 struct task_cputime cputime;
63 cputime_t utime; 63 cputime_t utime;
64 64
65 thread_group_cputime(tsk, &cputime); 65 thread_group_cputimer(tsk, &cputime);
66 utime = cputime.utime; 66 utime = cputime.utime;
67 if (cputime_le(cval, utime)) { /* about to fire */ 67 if (cputime_le(cval, utime)) { /* about to fire */
68 cval = jiffies_to_cputime(1); 68 cval = jiffies_to_cputime(1);
@@ -82,7 +82,7 @@ int do_getitimer(int which, struct itimerval *value)
82 struct task_cputime times; 82 struct task_cputime times;
83 cputime_t ptime; 83 cputime_t ptime;
84 84
85 thread_group_cputime(tsk, &times); 85 thread_group_cputimer(tsk, &times);
86 ptime = cputime_add(times.utime, times.stime); 86 ptime = cputime_add(times.utime, times.stime);
87 if (cputime_le(cval, ptime)) { /* about to fire */ 87 if (cputime_le(cval, ptime)) { /* about to fire */
88 cval = jiffies_to_cputime(1); 88 cval = jiffies_to_cputime(1);
@@ -100,7 +100,7 @@ int do_getitimer(int which, struct itimerval *value)
100 return 0; 100 return 0;
101} 101}
102 102
103asmlinkage long sys_getitimer(int which, struct itimerval __user *value) 103SYSCALL_DEFINE2(getitimer, int, which, struct itimerval __user *, value)
104{ 104{
105 int error = -EFAULT; 105 int error = -EFAULT;
106 struct itimerval get_buffer; 106 struct itimerval get_buffer;
@@ -260,9 +260,8 @@ unsigned int alarm_setitimer(unsigned int seconds)
260 return it_old.it_value.tv_sec; 260 return it_old.it_value.tv_sec;
261} 261}
262 262
263asmlinkage long sys_setitimer(int which, 263SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value,
264 struct itimerval __user *value, 264 struct itimerval __user *, ovalue)
265 struct itimerval __user *ovalue)
266{ 265{
267 struct itimerval set_buffer, get_buffer; 266 struct itimerval set_buffer, get_buffer;
268 int error; 267 int error;
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index e694afa0eb8c..7b8b0f21a5b1 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -30,19 +30,20 @@
30#define all_var 0 30#define all_var 0
31#endif 31#endif
32 32
33extern const unsigned long kallsyms_addresses[]; 33/* These will be re-linked against their real values during the second link stage */
34extern const u8 kallsyms_names[]; 34extern const unsigned long kallsyms_addresses[] __attribute__((weak));
35extern const u8 kallsyms_names[] __attribute__((weak));
35 36
36/* tell the compiler that the count isn't in the small data section if the arch 37/* tell the compiler that the count isn't in the small data section if the arch
37 * has one (eg: FRV) 38 * has one (eg: FRV)
38 */ 39 */
39extern const unsigned long kallsyms_num_syms 40extern const unsigned long kallsyms_num_syms
40 __attribute__((__section__(".rodata"))); 41__attribute__((weak, section(".rodata")));
41 42
42extern const u8 kallsyms_token_table[]; 43extern const u8 kallsyms_token_table[] __attribute__((weak));
43extern const u16 kallsyms_token_index[]; 44extern const u16 kallsyms_token_index[] __attribute__((weak));
44 45
45extern const unsigned long kallsyms_markers[]; 46extern const unsigned long kallsyms_markers[] __attribute__((weak));
46 47
47static inline int is_kernel_inittext(unsigned long addr) 48static inline int is_kernel_inittext(unsigned long addr)
48{ 49{
@@ -167,6 +168,9 @@ static unsigned long get_symbol_pos(unsigned long addr,
167 unsigned long symbol_start = 0, symbol_end = 0; 168 unsigned long symbol_start = 0, symbol_end = 0;
168 unsigned long i, low, high, mid; 169 unsigned long i, low, high, mid;
169 170
171 /* This kernel should never had been booted. */
172 BUG_ON(!kallsyms_addresses);
173
170 /* do a binary search on the sorted kallsyms_addresses array */ 174 /* do a binary search on the sorted kallsyms_addresses array */
171 low = 0; 175 low = 0;
172 high = kallsyms_num_syms; 176 high = kallsyms_num_syms;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 3fb855ad6aa0..483899578259 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -934,9 +934,8 @@ struct kimage *kexec_crash_image;
934 934
935static DEFINE_MUTEX(kexec_mutex); 935static DEFINE_MUTEX(kexec_mutex);
936 936
937asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, 937SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
938 struct kexec_segment __user *segments, 938 struct kexec_segment __user *, segments, unsigned long, flags)
939 unsigned long flags)
940{ 939{
941 struct kimage **dest_image, *image; 940 struct kimage **dest_image, *image;
942 int result; 941 int result;
@@ -1466,6 +1465,11 @@ int kernel_kexec(void)
1466 error = device_power_down(PMSG_FREEZE); 1465 error = device_power_down(PMSG_FREEZE);
1467 if (error) 1466 if (error)
1468 goto Enable_irqs; 1467 goto Enable_irqs;
1468
1469 /* Suspend system devices */
1470 error = sysdev_suspend(PMSG_FREEZE);
1471 if (error)
1472 goto Power_up_devices;
1469 } else 1473 } else
1470#endif 1474#endif
1471 { 1475 {
@@ -1478,6 +1482,8 @@ int kernel_kexec(void)
1478 1482
1479#ifdef CONFIG_KEXEC_JUMP 1483#ifdef CONFIG_KEXEC_JUMP
1480 if (kexec_image->preserve_context) { 1484 if (kexec_image->preserve_context) {
1485 sysdev_resume();
1486 Power_up_devices:
1481 device_power_up(PMSG_RESTORE); 1487 device_power_up(PMSG_RESTORE);
1482 Enable_irqs: 1488 Enable_irqs:
1483 local_irq_enable(); 1489 local_irq_enable();
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1b9cbdc0127a..7ba8cd9845cb 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -123,7 +123,7 @@ static int collect_garbage_slots(void);
123static int __kprobes check_safety(void) 123static int __kprobes check_safety(void)
124{ 124{
125 int ret = 0; 125 int ret = 0;
126#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM) 126#if defined(CONFIG_PREEMPT) && defined(CONFIG_FREEZER)
127 ret = freeze_processes(); 127 ret = freeze_processes();
128 if (ret == 0) { 128 if (ret == 0) {
129 struct task_struct *p, *q; 129 struct task_struct *p, *q;
diff --git a/kernel/module.c b/kernel/module.c
index c9332c90d5a0..1196f5d11700 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -573,13 +573,13 @@ static char last_unloaded_module[MODULE_NAME_LEN+1];
573/* Init the unload section of the module. */ 573/* Init the unload section of the module. */
574static void module_unload_init(struct module *mod) 574static void module_unload_init(struct module *mod)
575{ 575{
576 unsigned int i; 576 int cpu;
577 577
578 INIT_LIST_HEAD(&mod->modules_which_use_me); 578 INIT_LIST_HEAD(&mod->modules_which_use_me);
579 for (i = 0; i < NR_CPUS; i++) 579 for_each_possible_cpu(cpu)
580 local_set(&mod->ref[i].count, 0); 580 local_set(__module_ref_addr(mod, cpu), 0);
581 /* Hold reference count during initialization. */ 581 /* Hold reference count during initialization. */
582 local_set(&mod->ref[raw_smp_processor_id()].count, 1); 582 local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1);
583 /* Backwards compatibility macros put refcount during init. */ 583 /* Backwards compatibility macros put refcount during init. */
584 mod->waiter = current; 584 mod->waiter = current;
585} 585}
@@ -717,10 +717,11 @@ static int try_stop_module(struct module *mod, int flags, int *forced)
717 717
718unsigned int module_refcount(struct module *mod) 718unsigned int module_refcount(struct module *mod)
719{ 719{
720 unsigned int i, total = 0; 720 unsigned int total = 0;
721 int cpu;
721 722
722 for (i = 0; i < NR_CPUS; i++) 723 for_each_possible_cpu(cpu)
723 total += local_read(&mod->ref[i].count); 724 total += local_read(__module_ref_addr(mod, cpu));
724 return total; 725 return total;
725} 726}
726EXPORT_SYMBOL(module_refcount); 727EXPORT_SYMBOL(module_refcount);
@@ -743,8 +744,8 @@ static void wait_for_zero_refcount(struct module *mod)
743 mutex_lock(&module_mutex); 744 mutex_lock(&module_mutex);
744} 745}
745 746
746asmlinkage long 747SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
747sys_delete_module(const char __user *name_user, unsigned int flags) 748 unsigned int, flags)
748{ 749{
749 struct module *mod; 750 struct module *mod;
750 char name[MODULE_NAME_LEN]; 751 char name[MODULE_NAME_LEN];
@@ -894,7 +895,7 @@ void module_put(struct module *module)
894{ 895{
895 if (module) { 896 if (module) {
896 unsigned int cpu = get_cpu(); 897 unsigned int cpu = get_cpu();
897 local_dec(&module->ref[cpu].count); 898 local_dec(__module_ref_addr(module, cpu));
898 /* Maybe they're waiting for us to drop reference? */ 899 /* Maybe they're waiting for us to drop reference? */
899 if (unlikely(!module_is_live(module))) 900 if (unlikely(!module_is_live(module)))
900 wake_up_process(module->waiter); 901 wake_up_process(module->waiter);
@@ -1464,7 +1465,10 @@ static void free_module(struct module *mod)
1464 kfree(mod->args); 1465 kfree(mod->args);
1465 if (mod->percpu) 1466 if (mod->percpu)
1466 percpu_modfree(mod->percpu); 1467 percpu_modfree(mod->percpu);
1467 1468#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
1469 if (mod->refptr)
1470 percpu_modfree(mod->refptr);
1471#endif
1468 /* Free lock-classes: */ 1472 /* Free lock-classes: */
1469 lockdep_free_key_range(mod->module_core, mod->core_size); 1473 lockdep_free_key_range(mod->module_core, mod->core_size);
1470 1474
@@ -2070,6 +2074,14 @@ static noinline struct module *load_module(void __user *umod,
2070 /* Module has been moved. */ 2074 /* Module has been moved. */
2071 mod = (void *)sechdrs[modindex].sh_addr; 2075 mod = (void *)sechdrs[modindex].sh_addr;
2072 2076
2077#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
2078 mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t),
2079 mod->name);
2080 if (!mod->refptr) {
2081 err = -ENOMEM;
2082 goto free_init;
2083 }
2084#endif
2073 /* Now we've moved module, initialize linked lists, etc. */ 2085 /* Now we've moved module, initialize linked lists, etc. */
2074 module_unload_init(mod); 2086 module_unload_init(mod);
2075 2087
@@ -2276,9 +2288,14 @@ static noinline struct module *load_module(void __user *umod,
2276 ftrace_release(mod->module_core, mod->core_size); 2288 ftrace_release(mod->module_core, mod->core_size);
2277 free_unload: 2289 free_unload:
2278 module_unload_free(mod); 2290 module_unload_free(mod);
2291 free_init:
2292#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
2293 percpu_modfree(mod->refptr);
2294#endif
2279 module_free(mod, mod->module_init); 2295 module_free(mod, mod->module_init);
2280 free_core: 2296 free_core:
2281 module_free(mod, mod->module_core); 2297 module_free(mod, mod->module_core);
2298 /* mod will be freed with core. Don't access it beyond this line! */
2282 free_percpu: 2299 free_percpu:
2283 if (percpu) 2300 if (percpu)
2284 percpu_modfree(percpu); 2301 percpu_modfree(percpu);
@@ -2296,10 +2313,8 @@ static noinline struct module *load_module(void __user *umod,
2296} 2313}
2297 2314
2298/* This is where the real work happens */ 2315/* This is where the real work happens */
2299asmlinkage long 2316SYSCALL_DEFINE3(init_module, void __user *, umod,
2300sys_init_module(void __user *umod, 2317 unsigned long, len, const char __user *, uargs)
2301 unsigned long len,
2302 const char __user *uargs)
2303{ 2318{
2304 struct module *mod; 2319 struct module *mod;
2305 int ret = 0; 2320 int ret = 0;
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 157de3a47832..e976e505648d 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -10,76 +10,6 @@
10#include <linux/kernel_stat.h> 10#include <linux/kernel_stat.h>
11 11
12/* 12/*
13 * Allocate the thread_group_cputime structure appropriately and fill in the
14 * current values of the fields. Called from copy_signal() via
15 * thread_group_cputime_clone_thread() when adding a second or subsequent
16 * thread to a thread group. Assumes interrupts are enabled when called.
17 */
18int thread_group_cputime_alloc(struct task_struct *tsk)
19{
20 struct signal_struct *sig = tsk->signal;
21 struct task_cputime *cputime;
22
23 /*
24 * If we have multiple threads and we don't already have a
25 * per-CPU task_cputime struct (checked in the caller), allocate
26 * one and fill it in with the times accumulated so far. We may
27 * race with another thread so recheck after we pick up the sighand
28 * lock.
29 */
30 cputime = alloc_percpu(struct task_cputime);
31 if (cputime == NULL)
32 return -ENOMEM;
33 spin_lock_irq(&tsk->sighand->siglock);
34 if (sig->cputime.totals) {
35 spin_unlock_irq(&tsk->sighand->siglock);
36 free_percpu(cputime);
37 return 0;
38 }
39 sig->cputime.totals = cputime;
40 cputime = per_cpu_ptr(sig->cputime.totals, smp_processor_id());
41 cputime->utime = tsk->utime;
42 cputime->stime = tsk->stime;
43 cputime->sum_exec_runtime = tsk->se.sum_exec_runtime;
44 spin_unlock_irq(&tsk->sighand->siglock);
45 return 0;
46}
47
48/**
49 * thread_group_cputime - Sum the thread group time fields across all CPUs.
50 *
51 * @tsk: The task we use to identify the thread group.
52 * @times: task_cputime structure in which we return the summed fields.
53 *
54 * Walk the list of CPUs to sum the per-CPU time fields in the thread group
55 * time structure.
56 */
57void thread_group_cputime(
58 struct task_struct *tsk,
59 struct task_cputime *times)
60{
61 struct task_cputime *totals, *tot;
62 int i;
63
64 totals = tsk->signal->cputime.totals;
65 if (!totals) {
66 times->utime = tsk->utime;
67 times->stime = tsk->stime;
68 times->sum_exec_runtime = tsk->se.sum_exec_runtime;
69 return;
70 }
71
72 times->stime = times->utime = cputime_zero;
73 times->sum_exec_runtime = 0;
74 for_each_possible_cpu(i) {
75 tot = per_cpu_ptr(totals, i);
76 times->utime = cputime_add(times->utime, tot->utime);
77 times->stime = cputime_add(times->stime, tot->stime);
78 times->sum_exec_runtime += tot->sum_exec_runtime;
79 }
80}
81
82/*
83 * Called after updating RLIMIT_CPU to set timer expiration if necessary. 13 * Called after updating RLIMIT_CPU to set timer expiration if necessary.
84 */ 14 */
85void update_rlimit_cpu(unsigned long rlim_new) 15void update_rlimit_cpu(unsigned long rlim_new)
@@ -300,6 +230,71 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
300 return 0; 230 return 0;
301} 231}
302 232
233void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
234{
235 struct sighand_struct *sighand;
236 struct signal_struct *sig;
237 struct task_struct *t;
238
239 *times = INIT_CPUTIME;
240
241 rcu_read_lock();
242 sighand = rcu_dereference(tsk->sighand);
243 if (!sighand)
244 goto out;
245
246 sig = tsk->signal;
247
248 t = tsk;
249 do {
250 times->utime = cputime_add(times->utime, t->utime);
251 times->stime = cputime_add(times->stime, t->stime);
252 times->sum_exec_runtime += t->se.sum_exec_runtime;
253
254 t = next_thread(t);
255 } while (t != tsk);
256
257 times->utime = cputime_add(times->utime, sig->utime);
258 times->stime = cputime_add(times->stime, sig->stime);
259 times->sum_exec_runtime += sig->sum_sched_runtime;
260out:
261 rcu_read_unlock();
262}
263
264static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
265{
266 if (cputime_gt(b->utime, a->utime))
267 a->utime = b->utime;
268
269 if (cputime_gt(b->stime, a->stime))
270 a->stime = b->stime;
271
272 if (b->sum_exec_runtime > a->sum_exec_runtime)
273 a->sum_exec_runtime = b->sum_exec_runtime;
274}
275
276void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
277{
278 struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
279 struct task_cputime sum;
280 unsigned long flags;
281
282 spin_lock_irqsave(&cputimer->lock, flags);
283 if (!cputimer->running) {
284 cputimer->running = 1;
285 /*
286 * The POSIX timer interface allows for absolute time expiry
287 * values through the TIMER_ABSTIME flag, therefore we have
288 * to synchronize the timer to the clock every time we start
289 * it.
290 */
291 thread_group_cputime(tsk, &sum);
292 update_gt_cputime(&cputimer->cputime, &sum);
293 }
294 *times = cputimer->cputime;
295 spin_unlock_irqrestore(&cputimer->lock, flags);
296}
297
303/* 298/*
304 * Sample a process (thread group) clock for the given group_leader task. 299 * Sample a process (thread group) clock for the given group_leader task.
305 * Must be called with tasklist_lock held for reading. 300 * Must be called with tasklist_lock held for reading.
@@ -527,7 +522,7 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
527{ 522{
528 struct task_cputime cputime; 523 struct task_cputime cputime;
529 524
530 thread_group_cputime(tsk, &cputime); 525 thread_group_cputimer(tsk, &cputime);
531 cleanup_timers(tsk->signal->cpu_timers, 526 cleanup_timers(tsk->signal->cpu_timers,
532 cputime.utime, cputime.stime, cputime.sum_exec_runtime); 527 cputime.utime, cputime.stime, cputime.sum_exec_runtime);
533} 528}
@@ -686,6 +681,33 @@ static void cpu_timer_fire(struct k_itimer *timer)
686} 681}
687 682
688/* 683/*
684 * Sample a process (thread group) timer for the given group_leader task.
685 * Must be called with tasklist_lock held for reading.
686 */
687static int cpu_timer_sample_group(const clockid_t which_clock,
688 struct task_struct *p,
689 union cpu_time_count *cpu)
690{
691 struct task_cputime cputime;
692
693 thread_group_cputimer(p, &cputime);
694 switch (CPUCLOCK_WHICH(which_clock)) {
695 default:
696 return -EINVAL;
697 case CPUCLOCK_PROF:
698 cpu->cpu = cputime_add(cputime.utime, cputime.stime);
699 break;
700 case CPUCLOCK_VIRT:
701 cpu->cpu = cputime.utime;
702 break;
703 case CPUCLOCK_SCHED:
704 cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
705 break;
706 }
707 return 0;
708}
709
710/*
689 * Guts of sys_timer_settime for CPU timers. 711 * Guts of sys_timer_settime for CPU timers.
690 * This is called with the timer locked and interrupts disabled. 712 * This is called with the timer locked and interrupts disabled.
691 * If we return TIMER_RETRY, it's necessary to release the timer's lock 713 * If we return TIMER_RETRY, it's necessary to release the timer's lock
@@ -746,7 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
746 if (CPUCLOCK_PERTHREAD(timer->it_clock)) { 768 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
747 cpu_clock_sample(timer->it_clock, p, &val); 769 cpu_clock_sample(timer->it_clock, p, &val);
748 } else { 770 } else {
749 cpu_clock_sample_group(timer->it_clock, p, &val); 771 cpu_timer_sample_group(timer->it_clock, p, &val);
750 } 772 }
751 773
752 if (old) { 774 if (old) {
@@ -894,7 +916,7 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
894 read_unlock(&tasklist_lock); 916 read_unlock(&tasklist_lock);
895 goto dead; 917 goto dead;
896 } else { 918 } else {
897 cpu_clock_sample_group(timer->it_clock, p, &now); 919 cpu_timer_sample_group(timer->it_clock, p, &now);
898 clear_dead = (unlikely(p->exit_state) && 920 clear_dead = (unlikely(p->exit_state) &&
899 thread_group_empty(p)); 921 thread_group_empty(p));
900 } 922 }
@@ -1034,6 +1056,19 @@ static void check_thread_timers(struct task_struct *tsk,
1034 } 1056 }
1035} 1057}
1036 1058
1059static void stop_process_timers(struct task_struct *tsk)
1060{
1061 struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
1062 unsigned long flags;
1063
1064 if (!cputimer->running)
1065 return;
1066
1067 spin_lock_irqsave(&cputimer->lock, flags);
1068 cputimer->running = 0;
1069 spin_unlock_irqrestore(&cputimer->lock, flags);
1070}
1071
1037/* 1072/*
1038 * Check for any per-thread CPU timers that have fired and move them 1073 * Check for any per-thread CPU timers that have fired and move them
1039 * off the tsk->*_timers list onto the firing list. Per-thread timers 1074 * off the tsk->*_timers list onto the firing list. Per-thread timers
@@ -1057,13 +1092,15 @@ static void check_process_timers(struct task_struct *tsk,
1057 sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY && 1092 sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
1058 list_empty(&timers[CPUCLOCK_VIRT]) && 1093 list_empty(&timers[CPUCLOCK_VIRT]) &&
1059 cputime_eq(sig->it_virt_expires, cputime_zero) && 1094 cputime_eq(sig->it_virt_expires, cputime_zero) &&
1060 list_empty(&timers[CPUCLOCK_SCHED])) 1095 list_empty(&timers[CPUCLOCK_SCHED])) {
1096 stop_process_timers(tsk);
1061 return; 1097 return;
1098 }
1062 1099
1063 /* 1100 /*
1064 * Collect the current process totals. 1101 * Collect the current process totals.
1065 */ 1102 */
1066 thread_group_cputime(tsk, &cputime); 1103 thread_group_cputimer(tsk, &cputime);
1067 utime = cputime.utime; 1104 utime = cputime.utime;
1068 ptime = cputime_add(utime, cputime.stime); 1105 ptime = cputime_add(utime, cputime.stime);
1069 sum_sched_runtime = cputime.sum_exec_runtime; 1106 sum_sched_runtime = cputime.sum_exec_runtime;
@@ -1234,7 +1271,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
1234 clear_dead_task(timer, now); 1271 clear_dead_task(timer, now);
1235 goto out_unlock; 1272 goto out_unlock;
1236 } 1273 }
1237 cpu_clock_sample_group(timer->it_clock, p, &now); 1274 cpu_timer_sample_group(timer->it_clock, p, &now);
1238 bump_cpu_timer(timer, now); 1275 bump_cpu_timer(timer, now);
1239 /* Leave the tasklist_lock locked for the call below. */ 1276 /* Leave the tasklist_lock locked for the call below. */
1240 } 1277 }
@@ -1329,7 +1366,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
1329 if (!task_cputime_zero(&sig->cputime_expires)) { 1366 if (!task_cputime_zero(&sig->cputime_expires)) {
1330 struct task_cputime group_sample; 1367 struct task_cputime group_sample;
1331 1368
1332 thread_group_cputime(tsk, &group_sample); 1369 thread_group_cputimer(tsk, &group_sample);
1333 if (task_cputime_expired(&group_sample, &sig->cputime_expires)) 1370 if (task_cputime_expired(&group_sample, &sig->cputime_expires))
1334 return 1; 1371 return 1;
1335 } 1372 }
@@ -1411,7 +1448,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1411 struct list_head *head; 1448 struct list_head *head;
1412 1449
1413 BUG_ON(clock_idx == CPUCLOCK_SCHED); 1450 BUG_ON(clock_idx == CPUCLOCK_SCHED);
1414 cpu_clock_sample_group(clock_idx, tsk, &now); 1451 cpu_timer_sample_group(clock_idx, tsk, &now);
1415 1452
1416 if (oldval) { 1453 if (oldval) {
1417 if (!cputime_eq(*oldval, cputime_zero)) { 1454 if (!cputime_eq(*oldval, cputime_zero)) {
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 887c63787de6..052ec4d195c7 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -477,10 +477,9 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
477 477
478/* Create a POSIX.1b interval timer. */ 478/* Create a POSIX.1b interval timer. */
479 479
480asmlinkage long 480SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
481sys_timer_create(const clockid_t which_clock, 481 struct sigevent __user *, timer_event_spec,
482 struct sigevent __user *timer_event_spec, 482 timer_t __user *, created_timer_id)
483 timer_t __user * created_timer_id)
484{ 483{
485 struct k_itimer *new_timer; 484 struct k_itimer *new_timer;
486 int error, new_timer_id; 485 int error, new_timer_id;
@@ -661,8 +660,8 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
661} 660}
662 661
663/* Get the time remaining on a POSIX.1b interval timer. */ 662/* Get the time remaining on a POSIX.1b interval timer. */
664asmlinkage long 663SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
665sys_timer_gettime(timer_t timer_id, struct itimerspec __user *setting) 664 struct itimerspec __user *, setting)
666{ 665{
667 struct k_itimer *timr; 666 struct k_itimer *timr;
668 struct itimerspec cur_setting; 667 struct itimerspec cur_setting;
@@ -691,8 +690,7 @@ sys_timer_gettime(timer_t timer_id, struct itimerspec __user *setting)
691 * the call back to do_schedule_next_timer(). So all we need to do is 690 * the call back to do_schedule_next_timer(). So all we need to do is
692 * to pick up the frozen overrun. 691 * to pick up the frozen overrun.
693 */ 692 */
694asmlinkage long 693SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
695sys_timer_getoverrun(timer_t timer_id)
696{ 694{
697 struct k_itimer *timr; 695 struct k_itimer *timr;
698 int overrun; 696 int overrun;
@@ -760,10 +758,9 @@ common_timer_set(struct k_itimer *timr, int flags,
760} 758}
761 759
762/* Set a POSIX.1b interval timer */ 760/* Set a POSIX.1b interval timer */
763asmlinkage long 761SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
764sys_timer_settime(timer_t timer_id, int flags, 762 const struct itimerspec __user *, new_setting,
765 const struct itimerspec __user *new_setting, 763 struct itimerspec __user *, old_setting)
766 struct itimerspec __user *old_setting)
767{ 764{
768 struct k_itimer *timr; 765 struct k_itimer *timr;
769 struct itimerspec new_spec, old_spec; 766 struct itimerspec new_spec, old_spec;
@@ -816,8 +813,7 @@ static inline int timer_delete_hook(struct k_itimer *timer)
816} 813}
817 814
818/* Delete a POSIX.1b interval timer. */ 815/* Delete a POSIX.1b interval timer. */
819asmlinkage long 816SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
820sys_timer_delete(timer_t timer_id)
821{ 817{
822 struct k_itimer *timer; 818 struct k_itimer *timer;
823 unsigned long flags; 819 unsigned long flags;
@@ -903,8 +899,8 @@ int do_posix_clock_nonanosleep(const clockid_t clock, int flags,
903} 899}
904EXPORT_SYMBOL_GPL(do_posix_clock_nonanosleep); 900EXPORT_SYMBOL_GPL(do_posix_clock_nonanosleep);
905 901
906asmlinkage long sys_clock_settime(const clockid_t which_clock, 902SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
907 const struct timespec __user *tp) 903 const struct timespec __user *, tp)
908{ 904{
909 struct timespec new_tp; 905 struct timespec new_tp;
910 906
@@ -916,8 +912,8 @@ asmlinkage long sys_clock_settime(const clockid_t which_clock,
916 return CLOCK_DISPATCH(which_clock, clock_set, (which_clock, &new_tp)); 912 return CLOCK_DISPATCH(which_clock, clock_set, (which_clock, &new_tp));
917} 913}
918 914
919asmlinkage long 915SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
920sys_clock_gettime(const clockid_t which_clock, struct timespec __user *tp) 916 struct timespec __user *,tp)
921{ 917{
922 struct timespec kernel_tp; 918 struct timespec kernel_tp;
923 int error; 919 int error;
@@ -933,8 +929,8 @@ sys_clock_gettime(const clockid_t which_clock, struct timespec __user *tp)
933 929
934} 930}
935 931
936asmlinkage long 932SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
937sys_clock_getres(const clockid_t which_clock, struct timespec __user *tp) 933 struct timespec __user *, tp)
938{ 934{
939 struct timespec rtn_tp; 935 struct timespec rtn_tp;
940 int error; 936 int error;
@@ -963,10 +959,9 @@ static int common_nsleep(const clockid_t which_clock, int flags,
963 which_clock); 959 which_clock);
964} 960}
965 961
966asmlinkage long 962SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
967sys_clock_nanosleep(const clockid_t which_clock, int flags, 963 const struct timespec __user *, rqtp,
968 const struct timespec __user *rqtp, 964 struct timespec __user *, rmtp)
969 struct timespec __user *rmtp)
970{ 965{
971 struct timespec t; 966 struct timespec t;
972 967
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 597823b5b700..720ea4f781bd 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -3,8 +3,9 @@ ifeq ($(CONFIG_PM_DEBUG),y)
3EXTRA_CFLAGS += -DDEBUG 3EXTRA_CFLAGS += -DDEBUG
4endif 4endif
5 5
6obj-y := main.o 6obj-$(CONFIG_PM) += main.o
7obj-$(CONFIG_PM_SLEEP) += process.o console.o 7obj-$(CONFIG_PM_SLEEP) += console.o
8obj-$(CONFIG_FREEZER) += process.o
8obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o 9obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o
9 10
10obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o 11obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/console.c b/kernel/power/console.c
index b8628be2a465..a3961b205de7 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -78,6 +78,12 @@ void pm_restore_console(void)
78 } 78 }
79 set_console(orig_fgconsole); 79 set_console(orig_fgconsole);
80 release_console_sem(); 80 release_console_sem();
81
82 if (vt_waitactive(orig_fgconsole)) {
83 pr_debug("Resume: Can't switch VCs.");
84 return;
85 }
86
81 kmsg_redirect = orig_kmsg; 87 kmsg_redirect = orig_kmsg;
82} 88}
83#endif 89#endif
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 45e8541ab7e3..4a4a206b1979 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -71,6 +71,14 @@ void hibernation_set_ops(struct platform_hibernation_ops *ops)
71 mutex_unlock(&pm_mutex); 71 mutex_unlock(&pm_mutex);
72} 72}
73 73
74static bool entering_platform_hibernation;
75
76bool system_entering_hibernation(void)
77{
78 return entering_platform_hibernation;
79}
80EXPORT_SYMBOL(system_entering_hibernation);
81
74#ifdef CONFIG_PM_DEBUG 82#ifdef CONFIG_PM_DEBUG
75static void hibernation_debug_sleep(void) 83static void hibernation_debug_sleep(void)
76{ 84{
@@ -219,6 +227,12 @@ static int create_image(int platform_mode)
219 "aborting hibernation\n"); 227 "aborting hibernation\n");
220 goto Enable_irqs; 228 goto Enable_irqs;
221 } 229 }
230 sysdev_suspend(PMSG_FREEZE);
231 if (error) {
232 printk(KERN_ERR "PM: Some devices failed to power down, "
233 "aborting hibernation\n");
234 goto Power_up_devices;
235 }
222 236
223 if (hibernation_test(TEST_CORE)) 237 if (hibernation_test(TEST_CORE))
224 goto Power_up; 238 goto Power_up;
@@ -234,9 +248,11 @@ static int create_image(int platform_mode)
234 if (!in_suspend) 248 if (!in_suspend)
235 platform_leave(platform_mode); 249 platform_leave(platform_mode);
236 Power_up: 250 Power_up:
251 sysdev_resume();
237 /* NOTE: device_power_up() is just a resume() for devices 252 /* NOTE: device_power_up() is just a resume() for devices
238 * that suspended with irqs off ... no overall powerup. 253 * that suspended with irqs off ... no overall powerup.
239 */ 254 */
255 Power_up_devices:
240 device_power_up(in_suspend ? 256 device_power_up(in_suspend ?
241 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); 257 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
242 Enable_irqs: 258 Enable_irqs:
@@ -327,6 +343,7 @@ static int resume_target_kernel(void)
327 "aborting resume\n"); 343 "aborting resume\n");
328 goto Enable_irqs; 344 goto Enable_irqs;
329 } 345 }
346 sysdev_suspend(PMSG_QUIESCE);
330 /* We'll ignore saved state, but this gets preempt count (etc) right */ 347 /* We'll ignore saved state, but this gets preempt count (etc) right */
331 save_processor_state(); 348 save_processor_state();
332 error = restore_highmem(); 349 error = restore_highmem();
@@ -349,6 +366,7 @@ static int resume_target_kernel(void)
349 swsusp_free(); 366 swsusp_free();
350 restore_processor_state(); 367 restore_processor_state();
351 touch_softlockup_watchdog(); 368 touch_softlockup_watchdog();
369 sysdev_resume();
352 device_power_up(PMSG_RECOVER); 370 device_power_up(PMSG_RECOVER);
353 Enable_irqs: 371 Enable_irqs:
354 local_irq_enable(); 372 local_irq_enable();
@@ -411,6 +429,7 @@ int hibernation_platform_enter(void)
411 if (error) 429 if (error)
412 goto Close; 430 goto Close;
413 431
432 entering_platform_hibernation = true;
414 suspend_console(); 433 suspend_console();
415 error = device_suspend(PMSG_HIBERNATE); 434 error = device_suspend(PMSG_HIBERNATE);
416 if (error) { 435 if (error) {
@@ -431,6 +450,7 @@ int hibernation_platform_enter(void)
431 local_irq_disable(); 450 local_irq_disable();
432 error = device_power_down(PMSG_HIBERNATE); 451 error = device_power_down(PMSG_HIBERNATE);
433 if (!error) { 452 if (!error) {
453 sysdev_suspend(PMSG_HIBERNATE);
434 hibernation_ops->enter(); 454 hibernation_ops->enter();
435 /* We should never get here */ 455 /* We should never get here */
436 while (1); 456 while (1);
@@ -445,6 +465,7 @@ int hibernation_platform_enter(void)
445 Finish: 465 Finish:
446 hibernation_ops->finish(); 466 hibernation_ops->finish();
447 Resume_devices: 467 Resume_devices:
468 entering_platform_hibernation = false;
448 device_resume(PMSG_RESTORE); 469 device_resume(PMSG_RESTORE);
449 resume_console(); 470 resume_console();
450 Close: 471 Close:
@@ -585,6 +606,12 @@ static int software_resume(void)
585 unsigned int flags; 606 unsigned int flags;
586 607
587 /* 608 /*
609 * If the user said "noresume".. bail out early.
610 */
611 if (noresume)
612 return 0;
613
614 /*
588 * name_to_dev_t() below takes a sysfs buffer mutex when sysfs 615 * name_to_dev_t() below takes a sysfs buffer mutex when sysfs
589 * is configured into the kernel. Since the regular hibernate 616 * is configured into the kernel. Since the regular hibernate
590 * trigger path is via sysfs which takes a buffer mutex before 617 * trigger path is via sysfs which takes a buffer mutex before
@@ -600,6 +627,11 @@ static int software_resume(void)
600 mutex_unlock(&pm_mutex); 627 mutex_unlock(&pm_mutex);
601 return -ENOENT; 628 return -ENOENT;
602 } 629 }
630 /*
631 * Some device discovery might still be in progress; we need
632 * to wait for this to finish.
633 */
634 wait_for_device_probe();
603 swsusp_resume_device = name_to_dev_t(resume_file); 635 swsusp_resume_device = name_to_dev_t(resume_file);
604 pr_debug("PM: Resume from partition %s\n", resume_file); 636 pr_debug("PM: Resume from partition %s\n", resume_file);
605 } else { 637 } else {
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 239988873971..c9632f841f64 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -57,16 +57,6 @@ int pm_notifier_call_chain(unsigned long val)
57#ifdef CONFIG_PM_DEBUG 57#ifdef CONFIG_PM_DEBUG
58int pm_test_level = TEST_NONE; 58int pm_test_level = TEST_NONE;
59 59
60static int suspend_test(int level)
61{
62 if (pm_test_level == level) {
63 printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n");
64 mdelay(5000);
65 return 1;
66 }
67 return 0;
68}
69
70static const char * const pm_tests[__TEST_AFTER_LAST] = { 60static const char * const pm_tests[__TEST_AFTER_LAST] = {
71 [TEST_NONE] = "none", 61 [TEST_NONE] = "none",
72 [TEST_CORE] = "core", 62 [TEST_CORE] = "core",
@@ -125,14 +115,24 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
125} 115}
126 116
127power_attr(pm_test); 117power_attr(pm_test);
128#else /* !CONFIG_PM_DEBUG */ 118#endif /* CONFIG_PM_DEBUG */
129static inline int suspend_test(int level) { return 0; }
130#endif /* !CONFIG_PM_DEBUG */
131 119
132#endif /* CONFIG_PM_SLEEP */ 120#endif /* CONFIG_PM_SLEEP */
133 121
134#ifdef CONFIG_SUSPEND 122#ifdef CONFIG_SUSPEND
135 123
124static int suspend_test(int level)
125{
126#ifdef CONFIG_PM_DEBUG
127 if (pm_test_level == level) {
128 printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n");
129 mdelay(5000);
130 return 1;
131 }
132#endif /* !CONFIG_PM_DEBUG */
133 return 0;
134}
135
136#ifdef CONFIG_PM_TEST_SUSPEND 136#ifdef CONFIG_PM_TEST_SUSPEND
137 137
138/* 138/*
@@ -298,8 +298,12 @@ static int suspend_enter(suspend_state_t state)
298 goto Done; 298 goto Done;
299 } 299 }
300 300
301 if (!suspend_test(TEST_CORE)) 301 error = sysdev_suspend(PMSG_SUSPEND);
302 error = suspend_ops->enter(state); 302 if (!error) {
303 if (!suspend_test(TEST_CORE))
304 error = suspend_ops->enter(state);
305 sysdev_resume();
306 }
303 307
304 device_power_up(PMSG_RESUME); 308 device_power_up(PMSG_RESUME);
305 Done: 309 Done:
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 6da14358537c..505f319e489c 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -60,6 +60,7 @@ static struct block_device *resume_bdev;
60static int submit(int rw, pgoff_t page_off, struct page *page, 60static int submit(int rw, pgoff_t page_off, struct page *page,
61 struct bio **bio_chain) 61 struct bio **bio_chain)
62{ 62{
63 const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
63 struct bio *bio; 64 struct bio *bio;
64 65
65 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); 66 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
@@ -80,7 +81,7 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
80 bio_get(bio); 81 bio_get(bio);
81 82
82 if (bio_chain == NULL) { 83 if (bio_chain == NULL) {
83 submit_bio(rw | (1 << BIO_RW_SYNC), bio); 84 submit_bio(bio_rw, bio);
84 wait_on_page_locked(page); 85 wait_on_page_locked(page);
85 if (rw == READ) 86 if (rw == READ)
86 bio_set_pages_dirty(bio); 87 bio_set_pages_dirty(bio);
@@ -90,7 +91,7 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
90 get_page(page); /* These pages are freed later */ 91 get_page(page); /* These pages are freed later */
91 bio->bi_private = *bio_chain; 92 bio->bi_private = *bio_chain;
92 *bio_chain = bio; 93 *bio_chain = bio;
93 submit_bio(rw | (1 << BIO_RW_SYNC), bio); 94 submit_bio(bio_rw, bio);
94 } 95 }
95 return 0; 96 return 0;
96} 97}
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 005b93d839ba..6c85359364f2 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -95,15 +95,15 @@ static int snapshot_open(struct inode *inode, struct file *filp)
95 data->swap = swsusp_resume_device ? 95 data->swap = swsusp_resume_device ?
96 swap_type_of(swsusp_resume_device, 0, NULL) : -1; 96 swap_type_of(swsusp_resume_device, 0, NULL) : -1;
97 data->mode = O_RDONLY; 97 data->mode = O_RDONLY;
98 error = pm_notifier_call_chain(PM_RESTORE_PREPARE); 98 error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
99 if (error) 99 if (error)
100 pm_notifier_call_chain(PM_POST_RESTORE); 100 pm_notifier_call_chain(PM_POST_HIBERNATION);
101 } else { 101 } else {
102 data->swap = -1; 102 data->swap = -1;
103 data->mode = O_WRONLY; 103 data->mode = O_WRONLY;
104 error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); 104 error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
105 if (error) 105 if (error)
106 pm_notifier_call_chain(PM_POST_HIBERNATION); 106 pm_notifier_call_chain(PM_POST_RESTORE);
107 } 107 }
108 if (error) 108 if (error)
109 atomic_inc(&snapshot_device_available); 109 atomic_inc(&snapshot_device_available);
diff --git a/kernel/printk.c b/kernel/printk.c
index 7015733793e8..e3602d0755b0 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -73,7 +73,6 @@ EXPORT_SYMBOL(oops_in_progress);
73 * driver system. 73 * driver system.
74 */ 74 */
75static DECLARE_MUTEX(console_sem); 75static DECLARE_MUTEX(console_sem);
76static DECLARE_MUTEX(secondary_console_sem);
77struct console *console_drivers; 76struct console *console_drivers;
78EXPORT_SYMBOL_GPL(console_drivers); 77EXPORT_SYMBOL_GPL(console_drivers);
79 78
@@ -382,7 +381,7 @@ out:
382 return error; 381 return error;
383} 382}
384 383
385asmlinkage long sys_syslog(int type, char __user *buf, int len) 384SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
386{ 385{
387 return do_syslog(type, buf, len); 386 return do_syslog(type, buf, len);
388} 387}
@@ -742,11 +741,6 @@ EXPORT_SYMBOL(vprintk);
742 741
743#else 742#else
744 743
745asmlinkage long sys_syslog(int type, char __user *buf, int len)
746{
747 return -ENOSYS;
748}
749
750static void call_console_drivers(unsigned start, unsigned end) 744static void call_console_drivers(unsigned start, unsigned end)
751{ 745{
752} 746}
@@ -896,12 +890,14 @@ void suspend_console(void)
896 printk("Suspending console(s) (use no_console_suspend to debug)\n"); 890 printk("Suspending console(s) (use no_console_suspend to debug)\n");
897 acquire_console_sem(); 891 acquire_console_sem();
898 console_suspended = 1; 892 console_suspended = 1;
893 up(&console_sem);
899} 894}
900 895
901void resume_console(void) 896void resume_console(void)
902{ 897{
903 if (!console_suspend_enabled) 898 if (!console_suspend_enabled)
904 return; 899 return;
900 down(&console_sem);
905 console_suspended = 0; 901 console_suspended = 0;
906 release_console_sem(); 902 release_console_sem();
907} 903}
@@ -917,11 +913,9 @@ void resume_console(void)
917void acquire_console_sem(void) 913void acquire_console_sem(void)
918{ 914{
919 BUG_ON(in_interrupt()); 915 BUG_ON(in_interrupt());
920 if (console_suspended) {
921 down(&secondary_console_sem);
922 return;
923 }
924 down(&console_sem); 916 down(&console_sem);
917 if (console_suspended)
918 return;
925 console_locked = 1; 919 console_locked = 1;
926 console_may_schedule = 1; 920 console_may_schedule = 1;
927} 921}
@@ -931,6 +925,10 @@ int try_acquire_console_sem(void)
931{ 925{
932 if (down_trylock(&console_sem)) 926 if (down_trylock(&console_sem))
933 return -1; 927 return -1;
928 if (console_suspended) {
929 up(&console_sem);
930 return -1;
931 }
934 console_locked = 1; 932 console_locked = 1;
935 console_may_schedule = 0; 933 console_may_schedule = 0;
936 return 0; 934 return 0;
@@ -984,7 +982,7 @@ void release_console_sem(void)
984 unsigned wake_klogd = 0; 982 unsigned wake_klogd = 0;
985 983
986 if (console_suspended) { 984 if (console_suspended) {
987 up(&secondary_console_sem); 985 up(&console_sem);
988 return; 986 return;
989 } 987 }
990 988
diff --git a/kernel/profile.c b/kernel/profile.c
index 784933acf5b8..7724e0409bae 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -114,12 +114,15 @@ int __ref profile_init(void)
114 if (!slab_is_available()) { 114 if (!slab_is_available()) {
115 prof_buffer = alloc_bootmem(buffer_bytes); 115 prof_buffer = alloc_bootmem(buffer_bytes);
116 alloc_bootmem_cpumask_var(&prof_cpu_mask); 116 alloc_bootmem_cpumask_var(&prof_cpu_mask);
117 cpumask_copy(prof_cpu_mask, cpu_possible_mask);
117 return 0; 118 return 0;
118 } 119 }
119 120
120 if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL)) 121 if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
121 return -ENOMEM; 122 return -ENOMEM;
122 123
124 cpumask_copy(prof_cpu_mask, cpu_possible_mask);
125
123 prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL); 126 prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL);
124 if (prof_buffer) 127 if (prof_buffer)
125 return 0; 128 return 0;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 29dc700e198c..c9cf48b21f05 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -574,7 +574,7 @@ struct task_struct *ptrace_get_task_struct(pid_t pid)
574#define arch_ptrace_attach(child) do { } while (0) 574#define arch_ptrace_attach(child) do { } while (0)
575#endif 575#endif
576 576
577asmlinkage long sys_ptrace(long request, long pid, long addr, long data) 577SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data)
578{ 578{
579 struct task_struct *child; 579 struct task_struct *child;
580 long ret; 580 long ret;
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 490934fc7ac3..654c640a6b9c 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -679,8 +679,8 @@ int rcu_needs_cpu(int cpu)
679void rcu_check_callbacks(int cpu, int user) 679void rcu_check_callbacks(int cpu, int user)
680{ 680{
681 if (user || 681 if (user ||
682 (idle_cpu(cpu) && !in_softirq() && 682 (idle_cpu(cpu) && rcu_scheduler_active &&
683 hardirq_count() <= (1 << HARDIRQ_SHIFT))) { 683 !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
684 684
685 /* 685 /*
686 * Get here if this CPU took its interrupt from user 686 * Get here if this CPU took its interrupt from user
@@ -716,7 +716,7 @@ void rcu_check_callbacks(int cpu, int user)
716 raise_rcu_softirq(); 716 raise_rcu_softirq();
717} 717}
718 718
719static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, 719static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
720 struct rcu_data *rdp) 720 struct rcu_data *rdp)
721{ 721{
722 unsigned long flags; 722 unsigned long flags;
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index d92a76a881aa..cae8a059cf47 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -44,6 +44,7 @@
44#include <linux/cpu.h> 44#include <linux/cpu.h>
45#include <linux/mutex.h> 45#include <linux/mutex.h>
46#include <linux/module.h> 46#include <linux/module.h>
47#include <linux/kernel_stat.h>
47 48
48enum rcu_barrier { 49enum rcu_barrier {
49 RCU_BARRIER_STD, 50 RCU_BARRIER_STD,
@@ -55,6 +56,7 @@ static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
55static atomic_t rcu_barrier_cpu_count; 56static atomic_t rcu_barrier_cpu_count;
56static DEFINE_MUTEX(rcu_barrier_mutex); 57static DEFINE_MUTEX(rcu_barrier_mutex);
57static struct completion rcu_barrier_completion; 58static struct completion rcu_barrier_completion;
59int rcu_scheduler_active __read_mostly;
58 60
59/* 61/*
60 * Awaken the corresponding synchronize_rcu() instance now that a 62 * Awaken the corresponding synchronize_rcu() instance now that a
@@ -80,6 +82,10 @@ void wakeme_after_rcu(struct rcu_head *head)
80void synchronize_rcu(void) 82void synchronize_rcu(void)
81{ 83{
82 struct rcu_synchronize rcu; 84 struct rcu_synchronize rcu;
85
86 if (rcu_blocking_is_gp())
87 return;
88
83 init_completion(&rcu.completion); 89 init_completion(&rcu.completion);
84 /* Will wake me after RCU finished. */ 90 /* Will wake me after RCU finished. */
85 call_rcu(&rcu.head, wakeme_after_rcu); 91 call_rcu(&rcu.head, wakeme_after_rcu);
@@ -175,3 +181,9 @@ void __init rcu_init(void)
175 __rcu_init(); 181 __rcu_init();
176} 182}
177 183
184void rcu_scheduler_starting(void)
185{
186 WARN_ON(num_online_cpus() != 1);
187 WARN_ON(nr_context_switches() > 0);
188 rcu_scheduler_active = 1;
189}
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 33cfc50781f9..5d59e850fb71 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -1181,6 +1181,9 @@ void __synchronize_sched(void)
1181{ 1181{
1182 struct rcu_synchronize rcu; 1182 struct rcu_synchronize rcu;
1183 1183
1184 if (num_online_cpus() == 1)
1185 return; /* blocking is gp if only one CPU! */
1186
1184 init_completion(&rcu.completion); 1187 init_completion(&rcu.completion);
1185 /* Will wake me after RCU finished. */ 1188 /* Will wake me after RCU finished. */
1186 call_rcu_sched(&rcu.head, wakeme_after_rcu); 1189 call_rcu_sched(&rcu.head, wakeme_after_rcu);
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 1cff28db56b6..7c4142a79f0a 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -136,29 +136,47 @@ static int stutter_pause_test = 0;
136#endif 136#endif
137int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; 137int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
138 138
139#define FULLSTOP_SHUTDOWN 1 /* Bail due to system shutdown/panic. */ 139/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */
140#define FULLSTOP_CLEANUP 2 /* Orderly shutdown. */ 140
141static int fullstop; /* stop generating callbacks at test end. */ 141#define FULLSTOP_DONTSTOP 0 /* Normal operation. */
142DEFINE_MUTEX(fullstop_mutex); /* protect fullstop transitions and */ 142#define FULLSTOP_SHUTDOWN 1 /* System shutdown with rcutorture running. */
143 /* spawning of kthreads. */ 143#define FULLSTOP_RMMOD 2 /* Normal rmmod of rcutorture. */
144static int fullstop = FULLSTOP_RMMOD;
145DEFINE_MUTEX(fullstop_mutex); /* Protect fullstop transitions and spawning */
146 /* of kthreads. */
144 147
145/* 148/*
146 * Detect and respond to a signal-based shutdown. 149 * Detect and respond to a system shutdown.
147 */ 150 */
148static int 151static int
149rcutorture_shutdown_notify(struct notifier_block *unused1, 152rcutorture_shutdown_notify(struct notifier_block *unused1,
150 unsigned long unused2, void *unused3) 153 unsigned long unused2, void *unused3)
151{ 154{
152 if (fullstop)
153 return NOTIFY_DONE;
154 mutex_lock(&fullstop_mutex); 155 mutex_lock(&fullstop_mutex);
155 if (!fullstop) 156 if (fullstop == FULLSTOP_DONTSTOP)
156 fullstop = FULLSTOP_SHUTDOWN; 157 fullstop = FULLSTOP_SHUTDOWN;
158 else
159 printk(KERN_WARNING /* but going down anyway, so... */
160 "Concurrent 'rmmod rcutorture' and shutdown illegal!\n");
157 mutex_unlock(&fullstop_mutex); 161 mutex_unlock(&fullstop_mutex);
158 return NOTIFY_DONE; 162 return NOTIFY_DONE;
159} 163}
160 164
161/* 165/*
166 * Absorb kthreads into a kernel function that won't return, so that
167 * they won't ever access module text or data again.
168 */
169static void rcutorture_shutdown_absorb(char *title)
170{
171 if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
172 printk(KERN_NOTICE
173 "rcutorture thread %s parking due to system shutdown\n",
174 title);
175 schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT);
176 }
177}
178
179/*
162 * Allocate an element from the rcu_tortures pool. 180 * Allocate an element from the rcu_tortures pool.
163 */ 181 */
164static struct rcu_torture * 182static struct rcu_torture *
@@ -219,13 +237,14 @@ rcu_random(struct rcu_random_state *rrsp)
219} 237}
220 238
221static void 239static void
222rcu_stutter_wait(void) 240rcu_stutter_wait(char *title)
223{ 241{
224 while ((stutter_pause_test || !rcutorture_runnable) && !fullstop) { 242 while (stutter_pause_test || !rcutorture_runnable) {
225 if (rcutorture_runnable) 243 if (rcutorture_runnable)
226 schedule_timeout_interruptible(1); 244 schedule_timeout_interruptible(1);
227 else 245 else
228 schedule_timeout_interruptible(round_jiffies_relative(HZ)); 246 schedule_timeout_interruptible(round_jiffies_relative(HZ));
247 rcutorture_shutdown_absorb(title);
229 } 248 }
230} 249}
231 250
@@ -287,7 +306,7 @@ rcu_torture_cb(struct rcu_head *p)
287 int i; 306 int i;
288 struct rcu_torture *rp = container_of(p, struct rcu_torture, rtort_rcu); 307 struct rcu_torture *rp = container_of(p, struct rcu_torture, rtort_rcu);
289 308
290 if (fullstop) { 309 if (fullstop != FULLSTOP_DONTSTOP) {
291 /* Test is ending, just drop callbacks on the floor. */ 310 /* Test is ending, just drop callbacks on the floor. */
292 /* The next initialization will pick up the pieces. */ 311 /* The next initialization will pick up the pieces. */
293 return; 312 return;
@@ -619,10 +638,11 @@ rcu_torture_writer(void *arg)
619 } 638 }
620 rcu_torture_current_version++; 639 rcu_torture_current_version++;
621 oldbatch = cur_ops->completed(); 640 oldbatch = cur_ops->completed();
622 rcu_stutter_wait(); 641 rcu_stutter_wait("rcu_torture_writer");
623 } while (!kthread_should_stop() && !fullstop); 642 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
624 VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping"); 643 VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping");
625 while (!kthread_should_stop() && fullstop != FULLSTOP_SHUTDOWN) 644 rcutorture_shutdown_absorb("rcu_torture_writer");
645 while (!kthread_should_stop())
626 schedule_timeout_uninterruptible(1); 646 schedule_timeout_uninterruptible(1);
627 return 0; 647 return 0;
628} 648}
@@ -643,11 +663,12 @@ rcu_torture_fakewriter(void *arg)
643 schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); 663 schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
644 udelay(rcu_random(&rand) & 0x3ff); 664 udelay(rcu_random(&rand) & 0x3ff);
645 cur_ops->sync(); 665 cur_ops->sync();
646 rcu_stutter_wait(); 666 rcu_stutter_wait("rcu_torture_fakewriter");
647 } while (!kthread_should_stop() && !fullstop); 667 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
648 668
649 VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping"); 669 VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping");
650 while (!kthread_should_stop() && fullstop != FULLSTOP_SHUTDOWN) 670 rcutorture_shutdown_absorb("rcu_torture_fakewriter");
671 while (!kthread_should_stop())
651 schedule_timeout_uninterruptible(1); 672 schedule_timeout_uninterruptible(1);
652 return 0; 673 return 0;
653} 674}
@@ -752,12 +773,13 @@ rcu_torture_reader(void *arg)
752 preempt_enable(); 773 preempt_enable();
753 cur_ops->readunlock(idx); 774 cur_ops->readunlock(idx);
754 schedule(); 775 schedule();
755 rcu_stutter_wait(); 776 rcu_stutter_wait("rcu_torture_reader");
756 } while (!kthread_should_stop() && !fullstop); 777 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
757 VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping"); 778 VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping");
779 rcutorture_shutdown_absorb("rcu_torture_reader");
758 if (irqreader && cur_ops->irqcapable) 780 if (irqreader && cur_ops->irqcapable)
759 del_timer_sync(&t); 781 del_timer_sync(&t);
760 while (!kthread_should_stop() && fullstop != FULLSTOP_SHUTDOWN) 782 while (!kthread_should_stop())
761 schedule_timeout_uninterruptible(1); 783 schedule_timeout_uninterruptible(1);
762 return 0; 784 return 0;
763} 785}
@@ -854,7 +876,8 @@ rcu_torture_stats(void *arg)
854 do { 876 do {
855 schedule_timeout_interruptible(stat_interval * HZ); 877 schedule_timeout_interruptible(stat_interval * HZ);
856 rcu_torture_stats_print(); 878 rcu_torture_stats_print();
857 } while (!kthread_should_stop() && !fullstop); 879 rcutorture_shutdown_absorb("rcu_torture_stats");
880 } while (!kthread_should_stop());
858 VERBOSE_PRINTK_STRING("rcu_torture_stats task stopping"); 881 VERBOSE_PRINTK_STRING("rcu_torture_stats task stopping");
859 return 0; 882 return 0;
860} 883}
@@ -866,52 +889,49 @@ static int rcu_idle_cpu; /* Force all torture tasks off this CPU */
866 */ 889 */
867static void rcu_torture_shuffle_tasks(void) 890static void rcu_torture_shuffle_tasks(void)
868{ 891{
869 cpumask_var_t tmp_mask; 892 cpumask_t tmp_mask;
870 int i; 893 int i;
871 894
872 if (!alloc_cpumask_var(&tmp_mask, GFP_KERNEL)) 895 cpus_setall(tmp_mask);
873 BUG();
874
875 cpumask_setall(tmp_mask);
876 get_online_cpus(); 896 get_online_cpus();
877 897
878 /* No point in shuffling if there is only one online CPU (ex: UP) */ 898 /* No point in shuffling if there is only one online CPU (ex: UP) */
879 if (num_online_cpus() == 1) 899 if (num_online_cpus() == 1) {
880 goto out; 900 put_online_cpus();
901 return;
902 }
881 903
882 if (rcu_idle_cpu != -1) 904 if (rcu_idle_cpu != -1)
883 cpumask_clear_cpu(rcu_idle_cpu, tmp_mask); 905 cpu_clear(rcu_idle_cpu, tmp_mask);
884 906
885 set_cpus_allowed_ptr(current, tmp_mask); 907 set_cpus_allowed_ptr(current, &tmp_mask);
886 908
887 if (reader_tasks) { 909 if (reader_tasks) {
888 for (i = 0; i < nrealreaders; i++) 910 for (i = 0; i < nrealreaders; i++)
889 if (reader_tasks[i]) 911 if (reader_tasks[i])
890 set_cpus_allowed_ptr(reader_tasks[i], 912 set_cpus_allowed_ptr(reader_tasks[i],
891 tmp_mask); 913 &tmp_mask);
892 } 914 }
893 915
894 if (fakewriter_tasks) { 916 if (fakewriter_tasks) {
895 for (i = 0; i < nfakewriters; i++) 917 for (i = 0; i < nfakewriters; i++)
896 if (fakewriter_tasks[i]) 918 if (fakewriter_tasks[i])
897 set_cpus_allowed_ptr(fakewriter_tasks[i], 919 set_cpus_allowed_ptr(fakewriter_tasks[i],
898 tmp_mask); 920 &tmp_mask);
899 } 921 }
900 922
901 if (writer_task) 923 if (writer_task)
902 set_cpus_allowed_ptr(writer_task, tmp_mask); 924 set_cpus_allowed_ptr(writer_task, &tmp_mask);
903 925
904 if (stats_task) 926 if (stats_task)
905 set_cpus_allowed_ptr(stats_task, tmp_mask); 927 set_cpus_allowed_ptr(stats_task, &tmp_mask);
906 928
907 if (rcu_idle_cpu == -1) 929 if (rcu_idle_cpu == -1)
908 rcu_idle_cpu = num_online_cpus() - 1; 930 rcu_idle_cpu = num_online_cpus() - 1;
909 else 931 else
910 rcu_idle_cpu--; 932 rcu_idle_cpu--;
911 933
912out:
913 put_online_cpus(); 934 put_online_cpus();
914 free_cpumask_var(tmp_mask);
915} 935}
916 936
917/* Shuffle tasks across CPUs, with the intent of allowing each CPU in the 937/* Shuffle tasks across CPUs, with the intent of allowing each CPU in the
@@ -925,7 +945,8 @@ rcu_torture_shuffle(void *arg)
925 do { 945 do {
926 schedule_timeout_interruptible(shuffle_interval * HZ); 946 schedule_timeout_interruptible(shuffle_interval * HZ);
927 rcu_torture_shuffle_tasks(); 947 rcu_torture_shuffle_tasks();
928 } while (!kthread_should_stop() && !fullstop); 948 rcutorture_shutdown_absorb("rcu_torture_shuffle");
949 } while (!kthread_should_stop());
929 VERBOSE_PRINTK_STRING("rcu_torture_shuffle task stopping"); 950 VERBOSE_PRINTK_STRING("rcu_torture_shuffle task stopping");
930 return 0; 951 return 0;
931} 952}
@@ -940,10 +961,11 @@ rcu_torture_stutter(void *arg)
940 do { 961 do {
941 schedule_timeout_interruptible(stutter * HZ); 962 schedule_timeout_interruptible(stutter * HZ);
942 stutter_pause_test = 1; 963 stutter_pause_test = 1;
943 if (!kthread_should_stop() && !fullstop) 964 if (!kthread_should_stop())
944 schedule_timeout_interruptible(stutter * HZ); 965 schedule_timeout_interruptible(stutter * HZ);
945 stutter_pause_test = 0; 966 stutter_pause_test = 0;
946 } while (!kthread_should_stop() && !fullstop); 967 rcutorture_shutdown_absorb("rcu_torture_stutter");
968 } while (!kthread_should_stop());
947 VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping"); 969 VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping");
948 return 0; 970 return 0;
949} 971}
@@ -970,15 +992,16 @@ rcu_torture_cleanup(void)
970 int i; 992 int i;
971 993
972 mutex_lock(&fullstop_mutex); 994 mutex_lock(&fullstop_mutex);
973 if (!fullstop) { 995 if (fullstop == FULLSTOP_SHUTDOWN) {
974 /* If being signaled, let it happen, then exit. */ 996 printk(KERN_WARNING /* but going down anyway, so... */
997 "Concurrent 'rmmod rcutorture' and shutdown illegal!\n");
975 mutex_unlock(&fullstop_mutex); 998 mutex_unlock(&fullstop_mutex);
976 schedule_timeout_interruptible(10 * HZ); 999 schedule_timeout_uninterruptible(10);
977 if (cur_ops->cb_barrier != NULL) 1000 if (cur_ops->cb_barrier != NULL)
978 cur_ops->cb_barrier(); 1001 cur_ops->cb_barrier();
979 return; 1002 return;
980 } 1003 }
981 fullstop = FULLSTOP_CLEANUP; 1004 fullstop = FULLSTOP_RMMOD;
982 mutex_unlock(&fullstop_mutex); 1005 mutex_unlock(&fullstop_mutex);
983 unregister_reboot_notifier(&rcutorture_nb); 1006 unregister_reboot_notifier(&rcutorture_nb);
984 if (stutter_task) { 1007 if (stutter_task) {
@@ -1078,7 +1101,7 @@ rcu_torture_init(void)
1078 else 1101 else
1079 nrealreaders = 2 * num_online_cpus(); 1102 nrealreaders = 2 * num_online_cpus();
1080 rcu_torture_print_module_parms("Start of test"); 1103 rcu_torture_print_module_parms("Start of test");
1081 fullstop = 0; 1104 fullstop = FULLSTOP_DONTSTOP;
1082 1105
1083 /* Set up the freelist. */ 1106 /* Set up the freelist. */
1084 1107
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index f2d8638e6c60..97ce31579ec0 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -948,8 +948,8 @@ static void rcu_do_batch(struct rcu_data *rdp)
948void rcu_check_callbacks(int cpu, int user) 948void rcu_check_callbacks(int cpu, int user)
949{ 949{
950 if (user || 950 if (user ||
951 (idle_cpu(cpu) && !in_softirq() && 951 (idle_cpu(cpu) && rcu_scheduler_active &&
952 hardirq_count() <= (1 << HARDIRQ_SHIFT))) { 952 !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
953 953
954 /* 954 /*
955 * Get here if this CPU took its interrupt from user 955 * Get here if this CPU took its interrupt from user
@@ -1314,7 +1314,7 @@ int rcu_needs_cpu(int cpu)
1314 * access due to the fact that this CPU cannot possibly have any RCU 1314 * access due to the fact that this CPU cannot possibly have any RCU
1315 * callbacks in flight yet. 1315 * callbacks in flight yet.
1316 */ 1316 */
1317static void 1317static void __cpuinit
1318rcu_init_percpu_data(int cpu, struct rcu_state *rsp) 1318rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
1319{ 1319{
1320 unsigned long flags; 1320 unsigned long flags;
diff --git a/kernel/relay.c b/kernel/relay.c
index 09ac2008f77b..9d79b7854fa6 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -663,8 +663,10 @@ int relay_late_setup_files(struct rchan *chan,
663 663
664 mutex_lock(&relay_channels_mutex); 664 mutex_lock(&relay_channels_mutex);
665 /* Is chan already set up? */ 665 /* Is chan already set up? */
666 if (unlikely(chan->has_base_filename)) 666 if (unlikely(chan->has_base_filename)) {
667 mutex_unlock(&relay_channels_mutex);
667 return -EEXIST; 668 return -EEXIST;
669 }
668 chan->has_base_filename = 1; 670 chan->has_base_filename = 1;
669 chan->parent = parent; 671 chan->parent = parent;
670 curr_cpu = get_cpu(); 672 curr_cpu = get_cpu();
diff --git a/kernel/resource.c b/kernel/resource.c
index ca6a1536b205..fd5d7d574bb9 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -620,6 +620,7 @@ resource_size_t resource_alignment(struct resource *res)
620 * @start: resource start address 620 * @start: resource start address
621 * @n: resource region size 621 * @n: resource region size
622 * @name: reserving caller's ID string 622 * @name: reserving caller's ID string
623 * @flags: IO resource flags
623 */ 624 */
624struct resource * __request_region(struct resource *parent, 625struct resource * __request_region(struct resource *parent,
625 resource_size_t start, resource_size_t n, 626 resource_size_t start, resource_size_t n,
diff --git a/kernel/sched.c b/kernel/sched.c
index deb5ac8c12f3..8e2558c2ba67 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -125,6 +125,9 @@ DEFINE_TRACE(sched_switch);
125DEFINE_TRACE(sched_migrate_task); 125DEFINE_TRACE(sched_migrate_task);
126 126
127#ifdef CONFIG_SMP 127#ifdef CONFIG_SMP
128
129static void double_rq_lock(struct rq *rq1, struct rq *rq2);
130
128/* 131/*
129 * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) 132 * Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
130 * Since cpu_power is a 'constant', we can use a reciprocal divide. 133 * Since cpu_power is a 'constant', we can use a reciprocal divide.
@@ -220,7 +223,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
220{ 223{
221 ktime_t now; 224 ktime_t now;
222 225
223 if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF) 226 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
224 return; 227 return;
225 228
226 if (hrtimer_active(&rt_b->rt_period_timer)) 229 if (hrtimer_active(&rt_b->rt_period_timer))
@@ -1320,8 +1323,8 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
1320 * slice expiry etc. 1323 * slice expiry etc.
1321 */ 1324 */
1322 1325
1323#define WEIGHT_IDLEPRIO 2 1326#define WEIGHT_IDLEPRIO 3
1324#define WMULT_IDLEPRIO (1 << 31) 1327#define WMULT_IDLEPRIO 1431655765
1325 1328
1326/* 1329/*
1327 * Nice levels are multiplicative, with a gentle 10% change for every 1330 * Nice levels are multiplicative, with a gentle 10% change for every
@@ -3877,19 +3880,24 @@ int select_nohz_load_balancer(int stop_tick)
3877 int cpu = smp_processor_id(); 3880 int cpu = smp_processor_id();
3878 3881
3879 if (stop_tick) { 3882 if (stop_tick) {
3880 cpumask_set_cpu(cpu, nohz.cpu_mask);
3881 cpu_rq(cpu)->in_nohz_recently = 1; 3883 cpu_rq(cpu)->in_nohz_recently = 1;
3882 3884
3883 /* 3885 if (!cpu_active(cpu)) {
3884 * If we are going offline and still the leader, give up! 3886 if (atomic_read(&nohz.load_balancer) != cpu)
3885 */ 3887 return 0;
3886 if (!cpu_active(cpu) && 3888
3887 atomic_read(&nohz.load_balancer) == cpu) { 3889 /*
3890 * If we are going offline and still the leader,
3891 * give up!
3892 */
3888 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) 3893 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
3889 BUG(); 3894 BUG();
3895
3890 return 0; 3896 return 0;
3891 } 3897 }
3892 3898
3899 cpumask_set_cpu(cpu, nohz.cpu_mask);
3900
3893 /* time for ilb owner also to sleep */ 3901 /* time for ilb owner also to sleep */
3894 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { 3902 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
3895 if (atomic_read(&nohz.load_balancer) == cpu) 3903 if (atomic_read(&nohz.load_balancer) == cpu)
@@ -4437,7 +4445,7 @@ void __kprobes sub_preempt_count(int val)
4437 /* 4445 /*
4438 * Underflow? 4446 * Underflow?
4439 */ 4447 */
4440 if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked()))) 4448 if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
4441 return; 4449 return;
4442 /* 4450 /*
4443 * Is the spinlock portion underflowing? 4451 * Is the spinlock portion underflowing?
@@ -4684,8 +4692,8 @@ EXPORT_SYMBOL(default_wake_function);
4684 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns 4692 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
4685 * zero in this (rare) case, and we handle it by continuing to scan the queue. 4693 * zero in this (rare) case, and we handle it by continuing to scan the queue.
4686 */ 4694 */
4687static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, 4695void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
4688 int nr_exclusive, int sync, void *key) 4696 int nr_exclusive, int sync, void *key)
4689{ 4697{
4690 wait_queue_t *curr, *next; 4698 wait_queue_t *curr, *next;
4691 4699
@@ -5123,7 +5131,7 @@ int can_nice(const struct task_struct *p, const int nice)
5123 * sys_setpriority is a more generic, but much slower function that 5131 * sys_setpriority is a more generic, but much slower function that
5124 * does similar things. 5132 * does similar things.
5125 */ 5133 */
5126asmlinkage long sys_nice(int increment) 5134SYSCALL_DEFINE1(nice, int, increment)
5127{ 5135{
5128 long nice, retval; 5136 long nice, retval;
5129 5137
@@ -5430,8 +5438,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
5430 * @policy: new policy. 5438 * @policy: new policy.
5431 * @param: structure containing the new RT priority. 5439 * @param: structure containing the new RT priority.
5432 */ 5440 */
5433asmlinkage long 5441SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
5434sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) 5442 struct sched_param __user *, param)
5435{ 5443{
5436 /* negative values for policy are not valid */ 5444 /* negative values for policy are not valid */
5437 if (policy < 0) 5445 if (policy < 0)
@@ -5445,7 +5453,7 @@ sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
5445 * @pid: the pid in question. 5453 * @pid: the pid in question.
5446 * @param: structure containing the new RT priority. 5454 * @param: structure containing the new RT priority.
5447 */ 5455 */
5448asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) 5456SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
5449{ 5457{
5450 return do_sched_setscheduler(pid, -1, param); 5458 return do_sched_setscheduler(pid, -1, param);
5451} 5459}
@@ -5454,7 +5462,7 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
5454 * sys_sched_getscheduler - get the policy (scheduling class) of a thread 5462 * sys_sched_getscheduler - get the policy (scheduling class) of a thread
5455 * @pid: the pid in question. 5463 * @pid: the pid in question.
5456 */ 5464 */
5457asmlinkage long sys_sched_getscheduler(pid_t pid) 5465SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
5458{ 5466{
5459 struct task_struct *p; 5467 struct task_struct *p;
5460 int retval; 5468 int retval;
@@ -5479,7 +5487,7 @@ asmlinkage long sys_sched_getscheduler(pid_t pid)
5479 * @pid: the pid in question. 5487 * @pid: the pid in question.
5480 * @param: structure containing the RT priority. 5488 * @param: structure containing the RT priority.
5481 */ 5489 */
5482asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) 5490SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
5483{ 5491{
5484 struct sched_param lp; 5492 struct sched_param lp;
5485 struct task_struct *p; 5493 struct task_struct *p;
@@ -5597,8 +5605,8 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
5597 * @len: length in bytes of the bitmask pointed to by user_mask_ptr 5605 * @len: length in bytes of the bitmask pointed to by user_mask_ptr
5598 * @user_mask_ptr: user-space pointer to the new cpu mask 5606 * @user_mask_ptr: user-space pointer to the new cpu mask
5599 */ 5607 */
5600asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, 5608SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
5601 unsigned long __user *user_mask_ptr) 5609 unsigned long __user *, user_mask_ptr)
5602{ 5610{
5603 cpumask_var_t new_mask; 5611 cpumask_var_t new_mask;
5604 int retval; 5612 int retval;
@@ -5645,8 +5653,8 @@ out_unlock:
5645 * @len: length in bytes of the bitmask pointed to by user_mask_ptr 5653 * @len: length in bytes of the bitmask pointed to by user_mask_ptr
5646 * @user_mask_ptr: user-space pointer to hold the current cpu mask 5654 * @user_mask_ptr: user-space pointer to hold the current cpu mask
5647 */ 5655 */
5648asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, 5656SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
5649 unsigned long __user *user_mask_ptr) 5657 unsigned long __user *, user_mask_ptr)
5650{ 5658{
5651 int ret; 5659 int ret;
5652 cpumask_var_t mask; 5660 cpumask_var_t mask;
@@ -5675,7 +5683,7 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
5675 * This function yields the current CPU to other tasks. If there are no 5683 * This function yields the current CPU to other tasks. If there are no
5676 * other threads running on this CPU then this function will return. 5684 * other threads running on this CPU then this function will return.
5677 */ 5685 */
5678asmlinkage long sys_sched_yield(void) 5686SYSCALL_DEFINE0(sched_yield)
5679{ 5687{
5680 struct rq *rq = this_rq_lock(); 5688 struct rq *rq = this_rq_lock();
5681 5689
@@ -5816,7 +5824,7 @@ long __sched io_schedule_timeout(long timeout)
5816 * this syscall returns the maximum rt_priority that can be used 5824 * this syscall returns the maximum rt_priority that can be used
5817 * by a given scheduling class. 5825 * by a given scheduling class.
5818 */ 5826 */
5819asmlinkage long sys_sched_get_priority_max(int policy) 5827SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
5820{ 5828{
5821 int ret = -EINVAL; 5829 int ret = -EINVAL;
5822 5830
@@ -5841,7 +5849,7 @@ asmlinkage long sys_sched_get_priority_max(int policy)
5841 * this syscall returns the minimum rt_priority that can be used 5849 * this syscall returns the minimum rt_priority that can be used
5842 * by a given scheduling class. 5850 * by a given scheduling class.
5843 */ 5851 */
5844asmlinkage long sys_sched_get_priority_min(int policy) 5852SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
5845{ 5853{
5846 int ret = -EINVAL; 5854 int ret = -EINVAL;
5847 5855
@@ -5866,8 +5874,8 @@ asmlinkage long sys_sched_get_priority_min(int policy)
5866 * this syscall writes the default timeslice value of a given process 5874 * this syscall writes the default timeslice value of a given process
5867 * into the user-space timespec buffer. A value of '0' means infinity. 5875 * into the user-space timespec buffer. A value of '0' means infinity.
5868 */ 5876 */
5869asmlinkage 5877SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
5870long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) 5878 struct timespec __user *, interval)
5871{ 5879{
5872 struct task_struct *p; 5880 struct task_struct *p;
5873 unsigned int time_slice; 5881 unsigned int time_slice;
@@ -6936,20 +6944,26 @@ static void free_rootdomain(struct root_domain *rd)
6936 6944
6937static void rq_attach_root(struct rq *rq, struct root_domain *rd) 6945static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6938{ 6946{
6947 struct root_domain *old_rd = NULL;
6939 unsigned long flags; 6948 unsigned long flags;
6940 6949
6941 spin_lock_irqsave(&rq->lock, flags); 6950 spin_lock_irqsave(&rq->lock, flags);
6942 6951
6943 if (rq->rd) { 6952 if (rq->rd) {
6944 struct root_domain *old_rd = rq->rd; 6953 old_rd = rq->rd;
6945 6954
6946 if (cpumask_test_cpu(rq->cpu, old_rd->online)) 6955 if (cpumask_test_cpu(rq->cpu, old_rd->online))
6947 set_rq_offline(rq); 6956 set_rq_offline(rq);
6948 6957
6949 cpumask_clear_cpu(rq->cpu, old_rd->span); 6958 cpumask_clear_cpu(rq->cpu, old_rd->span);
6950 6959
6951 if (atomic_dec_and_test(&old_rd->refcount)) 6960 /*
6952 free_rootdomain(old_rd); 6961 * If we dont want to free the old_rt yet then
6962 * set old_rd to NULL to skip the freeing later
6963 * in this function:
6964 */
6965 if (!atomic_dec_and_test(&old_rd->refcount))
6966 old_rd = NULL;
6953 } 6967 }
6954 6968
6955 atomic_inc(&rd->refcount); 6969 atomic_inc(&rd->refcount);
@@ -6960,6 +6974,9 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6960 set_rq_online(rq); 6974 set_rq_online(rq);
6961 6975
6962 spin_unlock_irqrestore(&rq->lock, flags); 6976 spin_unlock_irqrestore(&rq->lock, flags);
6977
6978 if (old_rd)
6979 free_rootdomain(old_rd);
6963} 6980}
6964 6981
6965static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) 6982static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem)
@@ -7282,10 +7299,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
7282 * groups, so roll our own. Now each node has its own list of groups which 7299 * groups, so roll our own. Now each node has its own list of groups which
7283 * gets dynamically allocated. 7300 * gets dynamically allocated.
7284 */ 7301 */
7285static DEFINE_PER_CPU(struct sched_domain, node_domains); 7302static DEFINE_PER_CPU(struct static_sched_domain, node_domains);
7286static struct sched_group ***sched_group_nodes_bycpu; 7303static struct sched_group ***sched_group_nodes_bycpu;
7287 7304
7288static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); 7305static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains);
7289static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes); 7306static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
7290 7307
7291static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map, 7308static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
@@ -7560,7 +7577,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7560#ifdef CONFIG_NUMA 7577#ifdef CONFIG_NUMA
7561 if (cpumask_weight(cpu_map) > 7578 if (cpumask_weight(cpu_map) >
7562 SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) { 7579 SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
7563 sd = &per_cpu(allnodes_domains, i); 7580 sd = &per_cpu(allnodes_domains, i).sd;
7564 SD_INIT(sd, ALLNODES); 7581 SD_INIT(sd, ALLNODES);
7565 set_domain_attribute(sd, attr); 7582 set_domain_attribute(sd, attr);
7566 cpumask_copy(sched_domain_span(sd), cpu_map); 7583 cpumask_copy(sched_domain_span(sd), cpu_map);
@@ -7570,7 +7587,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7570 } else 7587 } else
7571 p = NULL; 7588 p = NULL;
7572 7589
7573 sd = &per_cpu(node_domains, i); 7590 sd = &per_cpu(node_domains, i).sd;
7574 SD_INIT(sd, NODE); 7591 SD_INIT(sd, NODE);
7575 set_domain_attribute(sd, attr); 7592 set_domain_attribute(sd, attr);
7576 sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); 7593 sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
@@ -7688,7 +7705,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7688 for_each_cpu(j, nodemask) { 7705 for_each_cpu(j, nodemask) {
7689 struct sched_domain *sd; 7706 struct sched_domain *sd;
7690 7707
7691 sd = &per_cpu(node_domains, j); 7708 sd = &per_cpu(node_domains, j).sd;
7692 sd->groups = sg; 7709 sd->groups = sg;
7693 } 7710 }
7694 sg->__cpu_power = 0; 7711 sg->__cpu_power = 0;
@@ -9047,6 +9064,13 @@ static int tg_schedulable(struct task_group *tg, void *data)
9047 runtime = d->rt_runtime; 9064 runtime = d->rt_runtime;
9048 } 9065 }
9049 9066
9067#ifdef CONFIG_USER_SCHED
9068 if (tg == &root_task_group) {
9069 period = global_rt_period();
9070 runtime = global_rt_runtime();
9071 }
9072#endif
9073
9050 /* 9074 /*
9051 * Cannot have more runtime than the period. 9075 * Cannot have more runtime than the period.
9052 */ 9076 */
@@ -9200,6 +9224,16 @@ static int sched_rt_global_constraints(void)
9200 9224
9201 return ret; 9225 return ret;
9202} 9226}
9227
9228int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
9229{
9230 /* Don't accept realtime tasks when there is no way for them to run */
9231 if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
9232 return 0;
9233
9234 return 1;
9235}
9236
9203#else /* !CONFIG_RT_GROUP_SCHED */ 9237#else /* !CONFIG_RT_GROUP_SCHED */
9204static int sched_rt_global_constraints(void) 9238static int sched_rt_global_constraints(void)
9205{ 9239{
@@ -9293,8 +9327,7 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
9293 struct task_struct *tsk) 9327 struct task_struct *tsk)
9294{ 9328{
9295#ifdef CONFIG_RT_GROUP_SCHED 9329#ifdef CONFIG_RT_GROUP_SCHED
9296 /* Don't accept realtime tasks when there is no way for them to run */ 9330 if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk))
9297 if (rt_task(tsk) && cgroup_tg(cgrp)->rt_bandwidth.rt_runtime == 0)
9298 return -EINVAL; 9331 return -EINVAL;
9299#else 9332#else
9300 /* We don't support RT-tasks being in separate groups */ 9333 /* We don't support RT-tasks being in separate groups */
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 4293cfa9681d..16eeba4e4169 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -145,6 +145,19 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
145 read_unlock_irqrestore(&tasklist_lock, flags); 145 read_unlock_irqrestore(&tasklist_lock, flags);
146} 146}
147 147
148#if defined(CONFIG_CGROUP_SCHED) && \
149 (defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED))
150static void task_group_path(struct task_group *tg, char *buf, int buflen)
151{
152 /* may be NULL if the underlying cgroup isn't fully-created yet */
153 if (!tg->css.cgroup) {
154 buf[0] = '\0';
155 return;
156 }
157 cgroup_path(tg->css.cgroup, buf, buflen);
158}
159#endif
160
148void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) 161void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
149{ 162{
150 s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, 163 s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
@@ -154,10 +167,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
154 unsigned long flags; 167 unsigned long flags;
155 168
156#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED) 169#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
157 char path[128] = ""; 170 char path[128];
158 struct task_group *tg = cfs_rq->tg; 171 struct task_group *tg = cfs_rq->tg;
159 172
160 cgroup_path(tg->css.cgroup, path, sizeof(path)); 173 task_group_path(tg, path, sizeof(path));
161 174
162 SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path); 175 SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
163#elif defined(CONFIG_USER_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED) 176#elif defined(CONFIG_USER_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
@@ -208,10 +221,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
208void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) 221void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
209{ 222{
210#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED) 223#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED)
211 char path[128] = ""; 224 char path[128];
212 struct task_group *tg = rt_rq->tg; 225 struct task_group *tg = rt_rq->tg;
213 226
214 cgroup_path(tg->css.cgroup, path, sizeof(path)); 227 task_group_path(tg, path, sizeof(path));
215 228
216 SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path); 229 SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path);
217#else 230#else
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 8e1352c75557..0566f2a03c42 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -283,7 +283,7 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
283 struct sched_entity, 283 struct sched_entity,
284 run_node); 284 run_node);
285 285
286 if (vruntime == cfs_rq->min_vruntime) 286 if (!cfs_rq->curr)
287 vruntime = se->vruntime; 287 vruntime = se->vruntime;
288 else 288 else
289 vruntime = min_vruntime(vruntime, se->vruntime); 289 vruntime = min_vruntime(vruntime, se->vruntime);
@@ -429,7 +429,10 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
429 u64 slice = __sched_period(cfs_rq->nr_running + !se->on_rq); 429 u64 slice = __sched_period(cfs_rq->nr_running + !se->on_rq);
430 430
431 for_each_sched_entity(se) { 431 for_each_sched_entity(se) {
432 struct load_weight *load = &cfs_rq->load; 432 struct load_weight *load;
433
434 cfs_rq = cfs_rq_of(se);
435 load = &cfs_rq->load;
433 436
434 if (unlikely(!se->on_rq)) { 437 if (unlikely(!se->on_rq)) {
435 struct load_weight lw = cfs_rq->load; 438 struct load_weight lw = cfs_rq->load;
@@ -677,9 +680,13 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
677 unsigned long thresh = sysctl_sched_latency; 680 unsigned long thresh = sysctl_sched_latency;
678 681
679 /* 682 /*
680 * convert the sleeper threshold into virtual time 683 * Convert the sleeper threshold into virtual time.
684 * SCHED_IDLE is a special sub-class. We care about
685 * fairness only relative to other SCHED_IDLE tasks,
686 * all of which have the same weight.
681 */ 687 */
682 if (sched_feat(NORMALIZED_SLEEPER)) 688 if (sched_feat(NORMALIZED_SLEEPER) &&
689 task_of(se)->policy != SCHED_IDLE)
683 thresh = calc_delta_fair(thresh, se); 690 thresh = calc_delta_fair(thresh, se);
684 691
685 vruntime -= thresh; 692 vruntime -= thresh;
@@ -712,7 +719,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
712 __enqueue_entity(cfs_rq, se); 719 __enqueue_entity(cfs_rq, se);
713} 720}
714 721
715static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) 722static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
716{ 723{
717 if (cfs_rq->last == se) 724 if (cfs_rq->last == se)
718 cfs_rq->last = NULL; 725 cfs_rq->last = NULL;
@@ -721,6 +728,12 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
721 cfs_rq->next = NULL; 728 cfs_rq->next = NULL;
722} 729}
723 730
731static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
732{
733 for_each_sched_entity(se)
734 __clear_buddies(cfs_rq_of(se), se);
735}
736
724static void 737static void
725dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) 738dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
726{ 739{
@@ -761,8 +774,14 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
761 774
762 ideal_runtime = sched_slice(cfs_rq, curr); 775 ideal_runtime = sched_slice(cfs_rq, curr);
763 delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; 776 delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
764 if (delta_exec > ideal_runtime) 777 if (delta_exec > ideal_runtime) {
765 resched_task(rq_of(cfs_rq)->curr); 778 resched_task(rq_of(cfs_rq)->curr);
779 /*
780 * The current task ran long enough, ensure it doesn't get
781 * re-elected due to buddy favours.
782 */
783 clear_buddies(cfs_rq, curr);
784 }
766} 785}
767 786
768static void 787static void
@@ -1340,14 +1359,18 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
1340 1359
1341static void set_last_buddy(struct sched_entity *se) 1360static void set_last_buddy(struct sched_entity *se)
1342{ 1361{
1343 for_each_sched_entity(se) 1362 if (likely(task_of(se)->policy != SCHED_IDLE)) {
1344 cfs_rq_of(se)->last = se; 1363 for_each_sched_entity(se)
1364 cfs_rq_of(se)->last = se;
1365 }
1345} 1366}
1346 1367
1347static void set_next_buddy(struct sched_entity *se) 1368static void set_next_buddy(struct sched_entity *se)
1348{ 1369{
1349 for_each_sched_entity(se) 1370 if (likely(task_of(se)->policy != SCHED_IDLE)) {
1350 cfs_rq_of(se)->next = se; 1371 for_each_sched_entity(se)
1372 cfs_rq_of(se)->next = se;
1373 }
1351} 1374}
1352 1375
1353/* 1376/*
@@ -1393,11 +1416,17 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
1393 return; 1416 return;
1394 1417
1395 /* 1418 /*
1396 * Batch tasks do not preempt (their preemption is driven by 1419 * Batch and idle tasks do not preempt (their preemption is driven by
1397 * the tick): 1420 * the tick):
1398 */ 1421 */
1399 if (unlikely(p->policy == SCHED_BATCH)) 1422 if (unlikely(p->policy != SCHED_NORMAL))
1423 return;
1424
1425 /* Idle tasks are by definition preempted by everybody. */
1426 if (unlikely(curr->policy == SCHED_IDLE)) {
1427 resched_task(curr);
1400 return; 1428 return;
1429 }
1401 1430
1402 if (!sched_feat(WAKEUP_PREEMPT)) 1431 if (!sched_feat(WAKEUP_PREEMPT))
1403 return; 1432 return;
@@ -1435,6 +1464,11 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
1435 1464
1436 do { 1465 do {
1437 se = pick_next_entity(cfs_rq); 1466 se = pick_next_entity(cfs_rq);
1467 /*
1468 * If se was a buddy, clear it so that it will have to earn
1469 * the favour again.
1470 */
1471 __clear_buddies(cfs_rq, se);
1438 set_next_entity(cfs_rq, se); 1472 set_next_entity(cfs_rq, se);
1439 cfs_rq = group_cfs_rq(se); 1473 cfs_rq = group_cfs_rq(se);
1440 } while (cfs_rq); 1474 } while (cfs_rq);
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 954e1a81b796..bac1061cea2f 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -968,8 +968,8 @@ static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
968 if ((this_cpu != -1) && cpu_isset(this_cpu, *mask)) 968 if ((this_cpu != -1) && cpu_isset(this_cpu, *mask))
969 return this_cpu; 969 return this_cpu;
970 970
971 first = first_cpu(*mask); 971 first = cpumask_first(mask);
972 if (first != NR_CPUS) 972 if (first < nr_cpu_ids)
973 return first; 973 return first;
974 974
975 return -1; 975 return -1;
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index f2773b5d1226..a8f93dd374e1 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -296,20 +296,21 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
296static inline void account_group_user_time(struct task_struct *tsk, 296static inline void account_group_user_time(struct task_struct *tsk,
297 cputime_t cputime) 297 cputime_t cputime)
298{ 298{
299 struct signal_struct *sig; 299 struct thread_group_cputimer *cputimer;
300 300
301 /* tsk == current, ensure it is safe to use ->signal */ 301 /* tsk == current, ensure it is safe to use ->signal */
302 if (unlikely(tsk->exit_state)) 302 if (unlikely(tsk->exit_state))
303 return; 303 return;
304 304
305 sig = tsk->signal; 305 cputimer = &tsk->signal->cputimer;
306 if (sig->cputime.totals) {
307 struct task_cputime *times;
308 306
309 times = per_cpu_ptr(sig->cputime.totals, get_cpu()); 307 if (!cputimer->running)
310 times->utime = cputime_add(times->utime, cputime); 308 return;
311 put_cpu_no_resched(); 309
312 } 310 spin_lock(&cputimer->lock);
311 cputimer->cputime.utime =
312 cputime_add(cputimer->cputime.utime, cputime);
313 spin_unlock(&cputimer->lock);
313} 314}
314 315
315/** 316/**
@@ -325,20 +326,21 @@ static inline void account_group_user_time(struct task_struct *tsk,
325static inline void account_group_system_time(struct task_struct *tsk, 326static inline void account_group_system_time(struct task_struct *tsk,
326 cputime_t cputime) 327 cputime_t cputime)
327{ 328{
328 struct signal_struct *sig; 329 struct thread_group_cputimer *cputimer;
329 330
330 /* tsk == current, ensure it is safe to use ->signal */ 331 /* tsk == current, ensure it is safe to use ->signal */
331 if (unlikely(tsk->exit_state)) 332 if (unlikely(tsk->exit_state))
332 return; 333 return;
333 334
334 sig = tsk->signal; 335 cputimer = &tsk->signal->cputimer;
335 if (sig->cputime.totals) {
336 struct task_cputime *times;
337 336
338 times = per_cpu_ptr(sig->cputime.totals, get_cpu()); 337 if (!cputimer->running)
339 times->stime = cputime_add(times->stime, cputime); 338 return;
340 put_cpu_no_resched(); 339
341 } 340 spin_lock(&cputimer->lock);
341 cputimer->cputime.stime =
342 cputime_add(cputimer->cputime.stime, cputime);
343 spin_unlock(&cputimer->lock);
342} 344}
343 345
344/** 346/**
@@ -354,6 +356,7 @@ static inline void account_group_system_time(struct task_struct *tsk,
354static inline void account_group_exec_runtime(struct task_struct *tsk, 356static inline void account_group_exec_runtime(struct task_struct *tsk,
355 unsigned long long ns) 357 unsigned long long ns)
356{ 358{
359 struct thread_group_cputimer *cputimer;
357 struct signal_struct *sig; 360 struct signal_struct *sig;
358 361
359 sig = tsk->signal; 362 sig = tsk->signal;
@@ -362,11 +365,12 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,
362 if (unlikely(!sig)) 365 if (unlikely(!sig))
363 return; 366 return;
364 367
365 if (sig->cputime.totals) { 368 cputimer = &sig->cputimer;
366 struct task_cputime *times;
367 369
368 times = per_cpu_ptr(sig->cputime.totals, get_cpu()); 370 if (!cputimer->running)
369 times->sum_exec_runtime += ns; 371 return;
370 put_cpu_no_resched(); 372
371 } 373 spin_lock(&cputimer->lock);
374 cputimer->cputime.sum_exec_runtime += ns;
375 spin_unlock(&cputimer->lock);
372} 376}
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index ad64fcb731f2..57d4b13b631d 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -8,6 +8,7 @@
8 8
9#include <linux/seccomp.h> 9#include <linux/seccomp.h>
10#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/compat.h>
11 12
12/* #define SECCOMP_DEBUG 1 */ 13/* #define SECCOMP_DEBUG 1 */
13#define NR_SECCOMP_MODES 1 14#define NR_SECCOMP_MODES 1
@@ -22,7 +23,7 @@ static int mode1_syscalls[] = {
22 0, /* null terminated */ 23 0, /* null terminated */
23}; 24};
24 25
25#ifdef TIF_32BIT 26#ifdef CONFIG_COMPAT
26static int mode1_syscalls_32[] = { 27static int mode1_syscalls_32[] = {
27 __NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32, 28 __NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32,
28 0, /* null terminated */ 29 0, /* null terminated */
@@ -37,8 +38,8 @@ void __secure_computing(int this_syscall)
37 switch (mode) { 38 switch (mode) {
38 case 1: 39 case 1:
39 syscall = mode1_syscalls; 40 syscall = mode1_syscalls;
40#ifdef TIF_32BIT 41#ifdef CONFIG_COMPAT
41 if (test_thread_flag(TIF_32BIT)) 42 if (is_compat_task())
42 syscall = mode1_syscalls_32; 43 syscall = mode1_syscalls_32;
43#endif 44#endif
44 do { 45 do {
diff --git a/kernel/signal.c b/kernel/signal.c
index 3152ac3b62e2..1c8814481a11 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -909,7 +909,9 @@ static void print_fatal_signal(struct pt_regs *regs, int signr)
909 } 909 }
910#endif 910#endif
911 printk("\n"); 911 printk("\n");
912 preempt_disable();
912 show_regs(regs); 913 show_regs(regs);
914 preempt_enable();
913} 915}
914 916
915static int __init setup_print_fatal_signals(char *str) 917static int __init setup_print_fatal_signals(char *str)
@@ -1365,7 +1367,6 @@ int do_notify_parent(struct task_struct *tsk, int sig)
1365 struct siginfo info; 1367 struct siginfo info;
1366 unsigned long flags; 1368 unsigned long flags;
1367 struct sighand_struct *psig; 1369 struct sighand_struct *psig;
1368 struct task_cputime cputime;
1369 int ret = sig; 1370 int ret = sig;
1370 1371
1371 BUG_ON(sig == -1); 1372 BUG_ON(sig == -1);
@@ -1395,9 +1396,10 @@ int do_notify_parent(struct task_struct *tsk, int sig)
1395 info.si_uid = __task_cred(tsk)->uid; 1396 info.si_uid = __task_cred(tsk)->uid;
1396 rcu_read_unlock(); 1397 rcu_read_unlock();
1397 1398
1398 thread_group_cputime(tsk, &cputime); 1399 info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
1399 info.si_utime = cputime_to_jiffies(cputime.utime); 1400 tsk->signal->utime));
1400 info.si_stime = cputime_to_jiffies(cputime.stime); 1401 info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
1402 tsk->signal->stime));
1401 1403
1402 info.si_status = tsk->exit_code & 0x7f; 1404 info.si_status = tsk->exit_code & 0x7f;
1403 if (tsk->exit_code & 0x80) 1405 if (tsk->exit_code & 0x80)
@@ -1573,7 +1575,15 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
1573 read_lock(&tasklist_lock); 1575 read_lock(&tasklist_lock);
1574 if (may_ptrace_stop()) { 1576 if (may_ptrace_stop()) {
1575 do_notify_parent_cldstop(current, CLD_TRAPPED); 1577 do_notify_parent_cldstop(current, CLD_TRAPPED);
1578 /*
1579 * Don't want to allow preemption here, because
1580 * sys_ptrace() needs this task to be inactive.
1581 *
1582 * XXX: implement read_unlock_no_resched().
1583 */
1584 preempt_disable();
1576 read_unlock(&tasklist_lock); 1585 read_unlock(&tasklist_lock);
1586 preempt_enable_no_resched();
1577 schedule(); 1587 schedule();
1578 } else { 1588 } else {
1579 /* 1589 /*
@@ -1961,7 +1971,7 @@ EXPORT_SYMBOL(unblock_all_signals);
1961 * System call entry points. 1971 * System call entry points.
1962 */ 1972 */
1963 1973
1964asmlinkage long sys_restart_syscall(void) 1974SYSCALL_DEFINE0(restart_syscall)
1965{ 1975{
1966 struct restart_block *restart = &current_thread_info()->restart_block; 1976 struct restart_block *restart = &current_thread_info()->restart_block;
1967 return restart->fn(restart); 1977 return restart->fn(restart);
@@ -2014,8 +2024,8 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
2014 return error; 2024 return error;
2015} 2025}
2016 2026
2017asmlinkage long 2027SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set,
2018sys_rt_sigprocmask(int how, sigset_t __user *set, sigset_t __user *oset, size_t sigsetsize) 2028 sigset_t __user *, oset, size_t, sigsetsize)
2019{ 2029{
2020 int error = -EINVAL; 2030 int error = -EINVAL;
2021 sigset_t old_set, new_set; 2031 sigset_t old_set, new_set;
@@ -2074,8 +2084,7 @@ out:
2074 return error; 2084 return error;
2075} 2085}
2076 2086
2077asmlinkage long 2087SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize)
2078sys_rt_sigpending(sigset_t __user *set, size_t sigsetsize)
2079{ 2088{
2080 return do_sigpending(set, sigsetsize); 2089 return do_sigpending(set, sigsetsize);
2081} 2090}
@@ -2146,11 +2155,9 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
2146 2155
2147#endif 2156#endif
2148 2157
2149asmlinkage long 2158SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
2150sys_rt_sigtimedwait(const sigset_t __user *uthese, 2159 siginfo_t __user *, uinfo, const struct timespec __user *, uts,
2151 siginfo_t __user *uinfo, 2160 size_t, sigsetsize)
2152 const struct timespec __user *uts,
2153 size_t sigsetsize)
2154{ 2161{
2155 int ret, sig; 2162 int ret, sig;
2156 sigset_t these; 2163 sigset_t these;
@@ -2223,8 +2230,7 @@ sys_rt_sigtimedwait(const sigset_t __user *uthese,
2223 return ret; 2230 return ret;
2224} 2231}
2225 2232
2226asmlinkage long 2233SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
2227sys_kill(pid_t pid, int sig)
2228{ 2234{
2229 struct siginfo info; 2235 struct siginfo info;
2230 2236
@@ -2283,7 +2289,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig)
2283 * exists but it's not belonging to the target process anymore. This 2289 * exists but it's not belonging to the target process anymore. This
2284 * method solves the problem of threads exiting and PIDs getting reused. 2290 * method solves the problem of threads exiting and PIDs getting reused.
2285 */ 2291 */
2286asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig) 2292SYSCALL_DEFINE3(tgkill, pid_t, tgid, pid_t, pid, int, sig)
2287{ 2293{
2288 /* This is only valid for single tasks */ 2294 /* This is only valid for single tasks */
2289 if (pid <= 0 || tgid <= 0) 2295 if (pid <= 0 || tgid <= 0)
@@ -2295,8 +2301,7 @@ asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig)
2295/* 2301/*
2296 * Send a signal to only one task, even if it's a CLONE_THREAD task. 2302 * Send a signal to only one task, even if it's a CLONE_THREAD task.
2297 */ 2303 */
2298asmlinkage long 2304SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig)
2299sys_tkill(pid_t pid, int sig)
2300{ 2305{
2301 /* This is only valid for single tasks */ 2306 /* This is only valid for single tasks */
2302 if (pid <= 0) 2307 if (pid <= 0)
@@ -2305,8 +2310,8 @@ sys_tkill(pid_t pid, int sig)
2305 return do_tkill(0, pid, sig); 2310 return do_tkill(0, pid, sig);
2306} 2311}
2307 2312
2308asmlinkage long 2313SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
2309sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo) 2314 siginfo_t __user *, uinfo)
2310{ 2315{
2311 siginfo_t info; 2316 siginfo_t info;
2312 2317
@@ -2434,8 +2439,7 @@ out:
2434 2439
2435#ifdef __ARCH_WANT_SYS_SIGPENDING 2440#ifdef __ARCH_WANT_SYS_SIGPENDING
2436 2441
2437asmlinkage long 2442SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set)
2438sys_sigpending(old_sigset_t __user *set)
2439{ 2443{
2440 return do_sigpending(set, sizeof(*set)); 2444 return do_sigpending(set, sizeof(*set));
2441} 2445}
@@ -2446,8 +2450,8 @@ sys_sigpending(old_sigset_t __user *set)
2446/* Some platforms have their own version with special arguments others 2450/* Some platforms have their own version with special arguments others
2447 support only sys_rt_sigprocmask. */ 2451 support only sys_rt_sigprocmask. */
2448 2452
2449asmlinkage long 2453SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set,
2450sys_sigprocmask(int how, old_sigset_t __user *set, old_sigset_t __user *oset) 2454 old_sigset_t __user *, oset)
2451{ 2455{
2452 int error; 2456 int error;
2453 old_sigset_t old_set, new_set; 2457 old_sigset_t old_set, new_set;
@@ -2497,11 +2501,10 @@ out:
2497#endif /* __ARCH_WANT_SYS_SIGPROCMASK */ 2501#endif /* __ARCH_WANT_SYS_SIGPROCMASK */
2498 2502
2499#ifdef __ARCH_WANT_SYS_RT_SIGACTION 2503#ifdef __ARCH_WANT_SYS_RT_SIGACTION
2500asmlinkage long 2504SYSCALL_DEFINE4(rt_sigaction, int, sig,
2501sys_rt_sigaction(int sig, 2505 const struct sigaction __user *, act,
2502 const struct sigaction __user *act, 2506 struct sigaction __user *, oact,
2503 struct sigaction __user *oact, 2507 size_t, sigsetsize)
2504 size_t sigsetsize)
2505{ 2508{
2506 struct k_sigaction new_sa, old_sa; 2509 struct k_sigaction new_sa, old_sa;
2507 int ret = -EINVAL; 2510 int ret = -EINVAL;
@@ -2531,15 +2534,13 @@ out:
2531/* 2534/*
2532 * For backwards compatibility. Functionality superseded by sigprocmask. 2535 * For backwards compatibility. Functionality superseded by sigprocmask.
2533 */ 2536 */
2534asmlinkage long 2537SYSCALL_DEFINE0(sgetmask)
2535sys_sgetmask(void)
2536{ 2538{
2537 /* SMP safe */ 2539 /* SMP safe */
2538 return current->blocked.sig[0]; 2540 return current->blocked.sig[0];
2539} 2541}
2540 2542
2541asmlinkage long 2543SYSCALL_DEFINE1(ssetmask, int, newmask)
2542sys_ssetmask(int newmask)
2543{ 2544{
2544 int old; 2545 int old;
2545 2546
@@ -2559,8 +2560,7 @@ sys_ssetmask(int newmask)
2559/* 2560/*
2560 * For backwards compatibility. Functionality superseded by sigaction. 2561 * For backwards compatibility. Functionality superseded by sigaction.
2561 */ 2562 */
2562asmlinkage unsigned long 2563SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler)
2563sys_signal(int sig, __sighandler_t handler)
2564{ 2564{
2565 struct k_sigaction new_sa, old_sa; 2565 struct k_sigaction new_sa, old_sa;
2566 int ret; 2566 int ret;
@@ -2577,8 +2577,7 @@ sys_signal(int sig, __sighandler_t handler)
2577 2577
2578#ifdef __ARCH_WANT_SYS_PAUSE 2578#ifdef __ARCH_WANT_SYS_PAUSE
2579 2579
2580asmlinkage long 2580SYSCALL_DEFINE0(pause)
2581sys_pause(void)
2582{ 2581{
2583 current->state = TASK_INTERRUPTIBLE; 2582 current->state = TASK_INTERRUPTIBLE;
2584 schedule(); 2583 schedule();
@@ -2588,7 +2587,7 @@ sys_pause(void)
2588#endif 2587#endif
2589 2588
2590#ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND 2589#ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND
2591asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize) 2590SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize)
2592{ 2591{
2593 sigset_t newset; 2592 sigset_t newset;
2594 2593
diff --git a/kernel/smp.c b/kernel/smp.c
index 5cfa0e5e3e88..bbedbb7efe32 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -18,6 +18,7 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock);
18enum { 18enum {
19 CSD_FLAG_WAIT = 0x01, 19 CSD_FLAG_WAIT = 0x01,
20 CSD_FLAG_ALLOC = 0x02, 20 CSD_FLAG_ALLOC = 0x02,
21 CSD_FLAG_LOCK = 0x04,
21}; 22};
22 23
23struct call_function_data { 24struct call_function_data {
@@ -186,6 +187,9 @@ void generic_smp_call_function_single_interrupt(void)
186 if (data_flags & CSD_FLAG_WAIT) { 187 if (data_flags & CSD_FLAG_WAIT) {
187 smp_wmb(); 188 smp_wmb();
188 data->flags &= ~CSD_FLAG_WAIT; 189 data->flags &= ~CSD_FLAG_WAIT;
190 } else if (data_flags & CSD_FLAG_LOCK) {
191 smp_wmb();
192 data->flags &= ~CSD_FLAG_LOCK;
189 } else if (data_flags & CSD_FLAG_ALLOC) 193 } else if (data_flags & CSD_FLAG_ALLOC)
190 kfree(data); 194 kfree(data);
191 } 195 }
@@ -196,6 +200,8 @@ void generic_smp_call_function_single_interrupt(void)
196 } 200 }
197} 201}
198 202
203static DEFINE_PER_CPU(struct call_single_data, csd_data);
204
199/* 205/*
200 * smp_call_function_single - Run a function on a specific CPU 206 * smp_call_function_single - Run a function on a specific CPU
201 * @func: The function to run. This must be fast and non-blocking. 207 * @func: The function to run. This must be fast and non-blocking.
@@ -224,14 +230,38 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
224 func(info); 230 func(info);
225 local_irq_restore(flags); 231 local_irq_restore(flags);
226 } else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { 232 } else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
227 struct call_single_data *data = NULL; 233 struct call_single_data *data;
228 234
229 if (!wait) { 235 if (!wait) {
236 /*
237 * We are calling a function on a single CPU
238 * and we are not going to wait for it to finish.
239 * We first try to allocate the data, but if we
240 * fail, we fall back to use a per cpu data to pass
241 * the information to that CPU. Since all callers
242 * of this code will use the same data, we must
243 * synchronize the callers to prevent a new caller
244 * from corrupting the data before the callee
245 * can access it.
246 *
247 * The CSD_FLAG_LOCK is used to let us know when
248 * the IPI handler is done with the data.
249 * The first caller will set it, and the callee
250 * will clear it. The next caller must wait for
251 * it to clear before we set it again. This
252 * will make sure the callee is done with the
253 * data before a new caller will use it.
254 */
230 data = kmalloc(sizeof(*data), GFP_ATOMIC); 255 data = kmalloc(sizeof(*data), GFP_ATOMIC);
231 if (data) 256 if (data)
232 data->flags = CSD_FLAG_ALLOC; 257 data->flags = CSD_FLAG_ALLOC;
233 } 258 else {
234 if (!data) { 259 data = &per_cpu(csd_data, me);
260 while (data->flags & CSD_FLAG_LOCK)
261 cpu_relax();
262 data->flags = CSD_FLAG_LOCK;
263 }
264 } else {
235 data = &d; 265 data = &d;
236 data->flags = CSD_FLAG_WAIT; 266 data->flags = CSD_FLAG_WAIT;
237 } 267 }
diff --git a/kernel/softirq.c b/kernel/softirq.c
index bdbe9de9cd8d..9041ea7948fe 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -626,6 +626,7 @@ static int ksoftirqd(void * __bind_cpu)
626 preempt_enable_no_resched(); 626 preempt_enable_no_resched();
627 cond_resched(); 627 cond_resched();
628 preempt_disable(); 628 preempt_disable();
629 rcu_qsctr_inc((long)__bind_cpu);
629 } 630 }
630 preempt_enable(); 631 preempt_enable();
631 set_current_state(TASK_INTERRUPTIBLE); 632 set_current_state(TASK_INTERRUPTIBLE);
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index d9188c66278a..85d5a2455103 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -16,6 +16,7 @@
16#include <linux/lockdep.h> 16#include <linux/lockdep.h>
17#include <linux/notifier.h> 17#include <linux/notifier.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/sysctl.h>
19 20
20#include <asm/irq_regs.h> 21#include <asm/irq_regs.h>
21 22
@@ -88,6 +89,14 @@ void touch_all_softlockup_watchdogs(void)
88} 89}
89EXPORT_SYMBOL(touch_all_softlockup_watchdogs); 90EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
90 91
92int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
93 struct file *filp, void __user *buffer,
94 size_t *lenp, loff_t *ppos)
95{
96 touch_all_softlockup_watchdogs();
97 return proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
98}
99
91/* 100/*
92 * This callback runs from the timer interrupt, and checks 101 * This callback runs from the timer interrupt, and checks
93 * whether the watchdog thread has hung or not: 102 * whether the watchdog thread has hung or not:
diff --git a/kernel/sys.c b/kernel/sys.c
index 763c3c17ded3..37f458e6882a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -143,7 +143,7 @@ out:
143 return error; 143 return error;
144} 144}
145 145
146asmlinkage long sys_setpriority(int which, int who, int niceval) 146SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
147{ 147{
148 struct task_struct *g, *p; 148 struct task_struct *g, *p;
149 struct user_struct *user; 149 struct user_struct *user;
@@ -208,7 +208,7 @@ out:
208 * has been offset by 20 (ie it returns 40..1 instead of -20..19) 208 * has been offset by 20 (ie it returns 40..1 instead of -20..19)
209 * to stay compatible. 209 * to stay compatible.
210 */ 210 */
211asmlinkage long sys_getpriority(int which, int who) 211SYSCALL_DEFINE2(getpriority, int, which, int, who)
212{ 212{
213 struct task_struct *g, *p; 213 struct task_struct *g, *p;
214 struct user_struct *user; 214 struct user_struct *user;
@@ -355,7 +355,8 @@ EXPORT_SYMBOL_GPL(kernel_power_off);
355 * 355 *
356 * reboot doesn't sync: do that yourself before calling this. 356 * reboot doesn't sync: do that yourself before calling this.
357 */ 357 */
358asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user * arg) 358SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
359 void __user *, arg)
359{ 360{
360 char buffer[256]; 361 char buffer[256];
361 362
@@ -478,7 +479,7 @@ void ctrl_alt_del(void)
478 * SMP: There are not races, the GIDs are checked only by filesystem 479 * SMP: There are not races, the GIDs are checked only by filesystem
479 * operations (as far as semantic preservation is concerned). 480 * operations (as far as semantic preservation is concerned).
480 */ 481 */
481asmlinkage long sys_setregid(gid_t rgid, gid_t egid) 482SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
482{ 483{
483 const struct cred *old; 484 const struct cred *old;
484 struct cred *new; 485 struct cred *new;
@@ -529,7 +530,7 @@ error:
529 * 530 *
530 * SMP: Same implicit races as above. 531 * SMP: Same implicit races as above.
531 */ 532 */
532asmlinkage long sys_setgid(gid_t gid) 533SYSCALL_DEFINE1(setgid, gid_t, gid)
533{ 534{
534 const struct cred *old; 535 const struct cred *old;
535 struct cred *new; 536 struct cred *new;
@@ -558,7 +559,7 @@ error:
558 abort_creds(new); 559 abort_creds(new);
559 return retval; 560 return retval;
560} 561}
561 562
562/* 563/*
563 * change the user struct in a credentials set to match the new UID 564 * change the user struct in a credentials set to match the new UID
564 */ 565 */
@@ -570,6 +571,11 @@ static int set_user(struct cred *new)
570 if (!new_user) 571 if (!new_user)
571 return -EAGAIN; 572 return -EAGAIN;
572 573
574 if (!task_can_switch_user(new_user, current)) {
575 free_uid(new_user);
576 return -EINVAL;
577 }
578
573 if (atomic_read(&new_user->processes) >= 579 if (atomic_read(&new_user->processes) >=
574 current->signal->rlim[RLIMIT_NPROC].rlim_cur && 580 current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
575 new_user != INIT_USER) { 581 new_user != INIT_USER) {
@@ -597,7 +603,7 @@ static int set_user(struct cred *new)
597 * 100% compatible with BSD. A program which uses just setuid() will be 603 * 100% compatible with BSD. A program which uses just setuid() will be
598 * 100% compatible with POSIX with saved IDs. 604 * 100% compatible with POSIX with saved IDs.
599 */ 605 */
600asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) 606SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
601{ 607{
602 const struct cred *old; 608 const struct cred *old;
603 struct cred *new; 609 struct cred *new;
@@ -630,10 +636,11 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
630 goto error; 636 goto error;
631 } 637 }
632 638
633 retval = -EAGAIN; 639 if (new->uid != old->uid) {
634 if (new->uid != old->uid && set_user(new) < 0) 640 retval = set_user(new);
635 goto error; 641 if (retval < 0)
636 642 goto error;
643 }
637 if (ruid != (uid_t) -1 || 644 if (ruid != (uid_t) -1 ||
638 (euid != (uid_t) -1 && euid != old->uid)) 645 (euid != (uid_t) -1 && euid != old->uid))
639 new->suid = new->euid; 646 new->suid = new->euid;
@@ -661,7 +668,7 @@ error:
661 * will allow a root program to temporarily drop privileges and be able to 668 * will allow a root program to temporarily drop privileges and be able to
662 * regain them by swapping the real and effective uid. 669 * regain them by swapping the real and effective uid.
663 */ 670 */
664asmlinkage long sys_setuid(uid_t uid) 671SYSCALL_DEFINE1(setuid, uid_t, uid)
665{ 672{
666 const struct cred *old; 673 const struct cred *old;
667 struct cred *new; 674 struct cred *new;
@@ -679,9 +686,10 @@ asmlinkage long sys_setuid(uid_t uid)
679 retval = -EPERM; 686 retval = -EPERM;
680 if (capable(CAP_SETUID)) { 687 if (capable(CAP_SETUID)) {
681 new->suid = new->uid = uid; 688 new->suid = new->uid = uid;
682 if (uid != old->uid && set_user(new) < 0) { 689 if (uid != old->uid) {
683 retval = -EAGAIN; 690 retval = set_user(new);
684 goto error; 691 if (retval < 0)
692 goto error;
685 } 693 }
686 } else if (uid != old->uid && uid != new->suid) { 694 } else if (uid != old->uid && uid != new->suid) {
687 goto error; 695 goto error;
@@ -705,7 +713,7 @@ error:
705 * This function implements a generic ability to update ruid, euid, 713 * This function implements a generic ability to update ruid, euid,
706 * and suid. This allows you to implement the 4.4 compatible seteuid(). 714 * and suid. This allows you to implement the 4.4 compatible seteuid().
707 */ 715 */
708asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) 716SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
709{ 717{
710 const struct cred *old; 718 const struct cred *old;
711 struct cred *new; 719 struct cred *new;
@@ -733,11 +741,13 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
733 goto error; 741 goto error;
734 } 742 }
735 743
736 retval = -EAGAIN;
737 if (ruid != (uid_t) -1) { 744 if (ruid != (uid_t) -1) {
738 new->uid = ruid; 745 new->uid = ruid;
739 if (ruid != old->uid && set_user(new) < 0) 746 if (ruid != old->uid) {
740 goto error; 747 retval = set_user(new);
748 if (retval < 0)
749 goto error;
750 }
741 } 751 }
742 if (euid != (uid_t) -1) 752 if (euid != (uid_t) -1)
743 new->euid = euid; 753 new->euid = euid;
@@ -756,7 +766,7 @@ error:
756 return retval; 766 return retval;
757} 767}
758 768
759asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid) 769SYSCALL_DEFINE3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid)
760{ 770{
761 const struct cred *cred = current_cred(); 771 const struct cred *cred = current_cred();
762 int retval; 772 int retval;
@@ -771,7 +781,7 @@ asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __us
771/* 781/*
772 * Same as above, but for rgid, egid, sgid. 782 * Same as above, but for rgid, egid, sgid.
773 */ 783 */
774asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) 784SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
775{ 785{
776 const struct cred *old; 786 const struct cred *old;
777 struct cred *new; 787 struct cred *new;
@@ -814,7 +824,7 @@ error:
814 return retval; 824 return retval;
815} 825}
816 826
817asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid) 827SYSCALL_DEFINE3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid)
818{ 828{
819 const struct cred *cred = current_cred(); 829 const struct cred *cred = current_cred();
820 int retval; 830 int retval;
@@ -833,7 +843,7 @@ asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __us
833 * whatever uid it wants to). It normally shadows "euid", except when 843 * whatever uid it wants to). It normally shadows "euid", except when
834 * explicitly set by setfsuid() or for access.. 844 * explicitly set by setfsuid() or for access..
835 */ 845 */
836asmlinkage long sys_setfsuid(uid_t uid) 846SYSCALL_DEFINE1(setfsuid, uid_t, uid)
837{ 847{
838 const struct cred *old; 848 const struct cred *old;
839 struct cred *new; 849 struct cred *new;
@@ -870,7 +880,7 @@ change_okay:
870/* 880/*
871 * Samma på svenska.. 881 * Samma på svenska..
872 */ 882 */
873asmlinkage long sys_setfsgid(gid_t gid) 883SYSCALL_DEFINE1(setfsgid, gid_t, gid)
874{ 884{
875 const struct cred *old; 885 const struct cred *old;
876 struct cred *new; 886 struct cred *new;
@@ -919,7 +929,7 @@ void do_sys_times(struct tms *tms)
919 tms->tms_cstime = cputime_to_clock_t(cstime); 929 tms->tms_cstime = cputime_to_clock_t(cstime);
920} 930}
921 931
922asmlinkage long sys_times(struct tms __user * tbuf) 932SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
923{ 933{
924 if (tbuf) { 934 if (tbuf) {
925 struct tms tmp; 935 struct tms tmp;
@@ -944,7 +954,7 @@ asmlinkage long sys_times(struct tms __user * tbuf)
944 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. 954 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
945 * LBT 04.03.94 955 * LBT 04.03.94
946 */ 956 */
947asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) 957SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
948{ 958{
949 struct task_struct *p; 959 struct task_struct *p;
950 struct task_struct *group_leader = current->group_leader; 960 struct task_struct *group_leader = current->group_leader;
@@ -1015,7 +1025,7 @@ out:
1015 return err; 1025 return err;
1016} 1026}
1017 1027
1018asmlinkage long sys_getpgid(pid_t pid) 1028SYSCALL_DEFINE1(getpgid, pid_t, pid)
1019{ 1029{
1020 struct task_struct *p; 1030 struct task_struct *p;
1021 struct pid *grp; 1031 struct pid *grp;
@@ -1045,14 +1055,14 @@ out:
1045 1055
1046#ifdef __ARCH_WANT_SYS_GETPGRP 1056#ifdef __ARCH_WANT_SYS_GETPGRP
1047 1057
1048asmlinkage long sys_getpgrp(void) 1058SYSCALL_DEFINE0(getpgrp)
1049{ 1059{
1050 return sys_getpgid(0); 1060 return sys_getpgid(0);
1051} 1061}
1052 1062
1053#endif 1063#endif
1054 1064
1055asmlinkage long sys_getsid(pid_t pid) 1065SYSCALL_DEFINE1(getsid, pid_t, pid)
1056{ 1066{
1057 struct task_struct *p; 1067 struct task_struct *p;
1058 struct pid *sid; 1068 struct pid *sid;
@@ -1080,7 +1090,7 @@ out:
1080 return retval; 1090 return retval;
1081} 1091}
1082 1092
1083asmlinkage long sys_setsid(void) 1093SYSCALL_DEFINE0(setsid)
1084{ 1094{
1085 struct task_struct *group_leader = current->group_leader; 1095 struct task_struct *group_leader = current->group_leader;
1086 struct pid *sid = task_pid(group_leader); 1096 struct pid *sid = task_pid(group_leader);
@@ -1311,7 +1321,7 @@ int set_current_groups(struct group_info *group_info)
1311 1321
1312EXPORT_SYMBOL(set_current_groups); 1322EXPORT_SYMBOL(set_current_groups);
1313 1323
1314asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) 1324SYSCALL_DEFINE2(getgroups, int, gidsetsize, gid_t __user *, grouplist)
1315{ 1325{
1316 const struct cred *cred = current_cred(); 1326 const struct cred *cred = current_cred();
1317 int i; 1327 int i;
@@ -1340,7 +1350,7 @@ out:
1340 * without another task interfering. 1350 * without another task interfering.
1341 */ 1351 */
1342 1352
1343asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist) 1353SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
1344{ 1354{
1345 struct group_info *group_info; 1355 struct group_info *group_info;
1346 int retval; 1356 int retval;
@@ -1394,7 +1404,7 @@ EXPORT_SYMBOL(in_egroup_p);
1394 1404
1395DECLARE_RWSEM(uts_sem); 1405DECLARE_RWSEM(uts_sem);
1396 1406
1397asmlinkage long sys_newuname(struct new_utsname __user * name) 1407SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
1398{ 1408{
1399 int errno = 0; 1409 int errno = 0;
1400 1410
@@ -1405,7 +1415,7 @@ asmlinkage long sys_newuname(struct new_utsname __user * name)
1405 return errno; 1415 return errno;
1406} 1416}
1407 1417
1408asmlinkage long sys_sethostname(char __user *name, int len) 1418SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
1409{ 1419{
1410 int errno; 1420 int errno;
1411 char tmp[__NEW_UTS_LEN]; 1421 char tmp[__NEW_UTS_LEN];
@@ -1429,7 +1439,7 @@ asmlinkage long sys_sethostname(char __user *name, int len)
1429 1439
1430#ifdef __ARCH_WANT_SYS_GETHOSTNAME 1440#ifdef __ARCH_WANT_SYS_GETHOSTNAME
1431 1441
1432asmlinkage long sys_gethostname(char __user *name, int len) 1442SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
1433{ 1443{
1434 int i, errno; 1444 int i, errno;
1435 struct new_utsname *u; 1445 struct new_utsname *u;
@@ -1454,7 +1464,7 @@ asmlinkage long sys_gethostname(char __user *name, int len)
1454 * Only setdomainname; getdomainname can be implemented by calling 1464 * Only setdomainname; getdomainname can be implemented by calling
1455 * uname() 1465 * uname()
1456 */ 1466 */
1457asmlinkage long sys_setdomainname(char __user *name, int len) 1467SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
1458{ 1468{
1459 int errno; 1469 int errno;
1460 char tmp[__NEW_UTS_LEN]; 1470 char tmp[__NEW_UTS_LEN];
@@ -1477,7 +1487,7 @@ asmlinkage long sys_setdomainname(char __user *name, int len)
1477 return errno; 1487 return errno;
1478} 1488}
1479 1489
1480asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim) 1490SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
1481{ 1491{
1482 if (resource >= RLIM_NLIMITS) 1492 if (resource >= RLIM_NLIMITS)
1483 return -EINVAL; 1493 return -EINVAL;
@@ -1496,7 +1506,8 @@ asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim)
1496 * Back compatibility for getrlimit. Needed for some apps. 1506 * Back compatibility for getrlimit. Needed for some apps.
1497 */ 1507 */
1498 1508
1499asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim) 1509SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
1510 struct rlimit __user *, rlim)
1500{ 1511{
1501 struct rlimit x; 1512 struct rlimit x;
1502 if (resource >= RLIM_NLIMITS) 1513 if (resource >= RLIM_NLIMITS)
@@ -1514,7 +1525,7 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r
1514 1525
1515#endif 1526#endif
1516 1527
1517asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) 1528SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
1518{ 1529{
1519 struct rlimit new_rlim, *old_rlim; 1530 struct rlimit new_rlim, *old_rlim;
1520 int retval; 1531 int retval;
@@ -1523,22 +1534,14 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
1523 return -EINVAL; 1534 return -EINVAL;
1524 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) 1535 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1525 return -EFAULT; 1536 return -EFAULT;
1537 if (new_rlim.rlim_cur > new_rlim.rlim_max)
1538 return -EINVAL;
1526 old_rlim = current->signal->rlim + resource; 1539 old_rlim = current->signal->rlim + resource;
1527 if ((new_rlim.rlim_max > old_rlim->rlim_max) && 1540 if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
1528 !capable(CAP_SYS_RESOURCE)) 1541 !capable(CAP_SYS_RESOURCE))
1529 return -EPERM; 1542 return -EPERM;
1530 1543 if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
1531 if (resource == RLIMIT_NOFILE) { 1544 return -EPERM;
1532 if (new_rlim.rlim_max == RLIM_INFINITY)
1533 new_rlim.rlim_max = sysctl_nr_open;
1534 if (new_rlim.rlim_cur == RLIM_INFINITY)
1535 new_rlim.rlim_cur = sysctl_nr_open;
1536 if (new_rlim.rlim_max > sysctl_nr_open)
1537 return -EPERM;
1538 }
1539
1540 if (new_rlim.rlim_cur > new_rlim.rlim_max)
1541 return -EINVAL;
1542 1545
1543 retval = security_task_setrlimit(resource, &new_rlim); 1546 retval = security_task_setrlimit(resource, &new_rlim);
1544 if (retval) 1547 if (retval)
@@ -1687,7 +1690,7 @@ int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
1687 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; 1690 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
1688} 1691}
1689 1692
1690asmlinkage long sys_getrusage(int who, struct rusage __user *ru) 1693SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
1691{ 1694{
1692 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && 1695 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN &&
1693 who != RUSAGE_THREAD) 1696 who != RUSAGE_THREAD)
@@ -1695,14 +1698,14 @@ asmlinkage long sys_getrusage(int who, struct rusage __user *ru)
1695 return getrusage(current, who, ru); 1698 return getrusage(current, who, ru);
1696} 1699}
1697 1700
1698asmlinkage long sys_umask(int mask) 1701SYSCALL_DEFINE1(umask, int, mask)
1699{ 1702{
1700 mask = xchg(&current->fs->umask, mask & S_IRWXUGO); 1703 mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
1701 return mask; 1704 return mask;
1702} 1705}
1703 1706
1704asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, 1707SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
1705 unsigned long arg4, unsigned long arg5) 1708 unsigned long, arg4, unsigned long, arg5)
1706{ 1709{
1707 struct task_struct *me = current; 1710 struct task_struct *me = current;
1708 unsigned char comm[sizeof(me->comm)]; 1711 unsigned char comm[sizeof(me->comm)];
@@ -1815,8 +1818,8 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
1815 return error; 1818 return error;
1816} 1819}
1817 1820
1818asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep, 1821SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
1819 struct getcpu_cache __user *unused) 1822 struct getcpu_cache __user *, unused)
1820{ 1823{
1821 int err = 0; 1824 int err = 0;
1822 int cpu = raw_smp_processor_id(); 1825 int cpu = raw_smp_processor_id();
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index e14a23281707..27dad2967387 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -131,6 +131,7 @@ cond_syscall(sys_io_destroy);
131cond_syscall(sys_io_submit); 131cond_syscall(sys_io_submit);
132cond_syscall(sys_io_cancel); 132cond_syscall(sys_io_cancel);
133cond_syscall(sys_io_getevents); 133cond_syscall(sys_io_getevents);
134cond_syscall(sys_syslog);
134 135
135/* arch-specific weak syscall entries */ 136/* arch-specific weak syscall entries */
136cond_syscall(sys_pciconfig_read); 137cond_syscall(sys_pciconfig_read);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 89d74436318c..c5ef44ff850f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -101,6 +101,7 @@ static int two = 2;
101 101
102static int zero; 102static int zero;
103static int one = 1; 103static int one = 1;
104static unsigned long one_ul = 1;
104static int one_hundred = 100; 105static int one_hundred = 100;
105 106
106/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ 107/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
@@ -144,6 +145,7 @@ extern int acct_parm[];
144 145
145#ifdef CONFIG_IA64 146#ifdef CONFIG_IA64
146extern int no_unaligned_warning; 147extern int no_unaligned_warning;
148extern int unaligned_dump_stack;
147#endif 149#endif
148 150
149#ifdef CONFIG_RT_MUTEXES 151#ifdef CONFIG_RT_MUTEXES
@@ -781,6 +783,14 @@ static struct ctl_table kern_table[] = {
781 .mode = 0644, 783 .mode = 0644,
782 .proc_handler = &proc_dointvec, 784 .proc_handler = &proc_dointvec,
783 }, 785 },
786 {
787 .ctl_name = CTL_UNNUMBERED,
788 .procname = "unaligned-dump-stack",
789 .data = &unaligned_dump_stack,
790 .maxlen = sizeof (int),
791 .mode = 0644,
792 .proc_handler = &proc_dointvec,
793 },
784#endif 794#endif
785#ifdef CONFIG_DETECT_SOFTLOCKUP 795#ifdef CONFIG_DETECT_SOFTLOCKUP
786 { 796 {
@@ -800,7 +810,7 @@ static struct ctl_table kern_table[] = {
800 .data = &softlockup_thresh, 810 .data = &softlockup_thresh,
801 .maxlen = sizeof(int), 811 .maxlen = sizeof(int),
802 .mode = 0644, 812 .mode = 0644,
803 .proc_handler = &proc_dointvec_minmax, 813 .proc_handler = &proc_dosoftlockup_thresh,
804 .strategy = &sysctl_intvec, 814 .strategy = &sysctl_intvec,
805 .extra1 = &neg_one, 815 .extra1 = &neg_one,
806 .extra2 = &sixty, 816 .extra2 = &sixty,
@@ -965,7 +975,7 @@ static struct ctl_table vm_table[] = {
965 .mode = 0644, 975 .mode = 0644,
966 .proc_handler = &dirty_background_bytes_handler, 976 .proc_handler = &dirty_background_bytes_handler,
967 .strategy = &sysctl_intvec, 977 .strategy = &sysctl_intvec,
968 .extra1 = &one, 978 .extra1 = &one_ul,
969 }, 979 },
970 { 980 {
971 .ctl_name = VM_DIRTY_RATIO, 981 .ctl_name = VM_DIRTY_RATIO,
@@ -986,7 +996,7 @@ static struct ctl_table vm_table[] = {
986 .mode = 0644, 996 .mode = 0644,
987 .proc_handler = &dirty_bytes_handler, 997 .proc_handler = &dirty_bytes_handler,
988 .strategy = &sysctl_intvec, 998 .strategy = &sysctl_intvec,
989 .extra1 = &one, 999 .extra1 = &one_ul,
990 }, 1000 },
991 { 1001 {
992 .procname = "dirty_writeback_centisecs", 1002 .procname = "dirty_writeback_centisecs",
@@ -1688,7 +1698,7 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol
1688 return error; 1698 return error;
1689} 1699}
1690 1700
1691asmlinkage long sys_sysctl(struct __sysctl_args __user *args) 1701SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
1692{ 1702{
1693 struct __sysctl_args tmp; 1703 struct __sysctl_args tmp;
1694 int error; 1704 int error;
@@ -2989,7 +2999,7 @@ int sysctl_ms_jiffies(struct ctl_table *table,
2989#else /* CONFIG_SYSCTL_SYSCALL */ 2999#else /* CONFIG_SYSCTL_SYSCALL */
2990 3000
2991 3001
2992asmlinkage long sys_sysctl(struct __sysctl_args __user *args) 3002SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
2993{ 3003{
2994 struct __sysctl_args tmp; 3004 struct __sysctl_args tmp;
2995 int error; 3005 int error;
diff --git a/kernel/time.c b/kernel/time.c
index 4886e3ce83a4..29511943871a 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -60,7 +60,7 @@ EXPORT_SYMBOL(sys_tz);
60 * why not move it into the appropriate arch directory (for those 60 * why not move it into the appropriate arch directory (for those
61 * architectures that need it). 61 * architectures that need it).
62 */ 62 */
63asmlinkage long sys_time(time_t __user * tloc) 63SYSCALL_DEFINE1(time, time_t __user *, tloc)
64{ 64{
65 time_t i = get_seconds(); 65 time_t i = get_seconds();
66 66
@@ -79,7 +79,7 @@ asmlinkage long sys_time(time_t __user * tloc)
79 * architectures that need it). 79 * architectures that need it).
80 */ 80 */
81 81
82asmlinkage long sys_stime(time_t __user *tptr) 82SYSCALL_DEFINE1(stime, time_t __user *, tptr)
83{ 83{
84 struct timespec tv; 84 struct timespec tv;
85 int err; 85 int err;
@@ -99,8 +99,8 @@ asmlinkage long sys_stime(time_t __user *tptr)
99 99
100#endif /* __ARCH_WANT_SYS_TIME */ 100#endif /* __ARCH_WANT_SYS_TIME */
101 101
102asmlinkage long sys_gettimeofday(struct timeval __user *tv, 102SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
103 struct timezone __user *tz) 103 struct timezone __user *, tz)
104{ 104{
105 if (likely(tv != NULL)) { 105 if (likely(tv != NULL)) {
106 struct timeval ktv; 106 struct timeval ktv;
@@ -184,8 +184,8 @@ int do_sys_settimeofday(struct timespec *tv, struct timezone *tz)
184 return 0; 184 return 0;
185} 185}
186 186
187asmlinkage long sys_settimeofday(struct timeval __user *tv, 187SYSCALL_DEFINE2(settimeofday, struct timeval __user *, tv,
188 struct timezone __user *tz) 188 struct timezone __user *, tz)
189{ 189{
190 struct timeval user_tv; 190 struct timeval user_tv;
191 struct timespec new_ts; 191 struct timespec new_ts;
@@ -205,7 +205,7 @@ asmlinkage long sys_settimeofday(struct timeval __user *tv,
205 return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL); 205 return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL);
206} 206}
207 207
208asmlinkage long sys_adjtimex(struct timex __user *txc_p) 208SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
209{ 209{
210 struct timex txc; /* Local copy of parameter */ 210 struct timex txc; /* Local copy of parameter */
211 int ret; 211 int ret;
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 63e05d423a09..21a5ca849514 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -274,6 +274,21 @@ out_bc:
274} 274}
275 275
276/* 276/*
277 * Transfer the do_timer job away from a dying cpu.
278 *
279 * Called with interrupts disabled.
280 */
281static void tick_handover_do_timer(int *cpup)
282{
283 if (*cpup == tick_do_timer_cpu) {
284 int cpu = cpumask_first(cpu_online_mask);
285
286 tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
287 TICK_DO_TIMER_NONE;
288 }
289}
290
291/*
277 * Shutdown an event device on a given cpu: 292 * Shutdown an event device on a given cpu:
278 * 293 *
279 * This is called on a life CPU, when a CPU is dead. So we cannot 294 * This is called on a life CPU, when a CPU is dead. So we cannot
@@ -297,13 +312,6 @@ static void tick_shutdown(unsigned int *cpup)
297 clockevents_exchange_device(dev, NULL); 312 clockevents_exchange_device(dev, NULL);
298 td->evtdev = NULL; 313 td->evtdev = NULL;
299 } 314 }
300 /* Transfer the do_timer job away from this cpu */
301 if (*cpup == tick_do_timer_cpu) {
302 int cpu = cpumask_first(cpu_online_mask);
303
304 tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
305 TICK_DO_TIMER_NONE;
306 }
307 spin_unlock_irqrestore(&tick_device_lock, flags); 315 spin_unlock_irqrestore(&tick_device_lock, flags);
308} 316}
309 317
@@ -357,6 +365,10 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason,
357 tick_broadcast_oneshot_control(reason); 365 tick_broadcast_oneshot_control(reason);
358 break; 366 break;
359 367
368 case CLOCK_EVT_NOTIFY_CPU_DYING:
369 tick_handover_do_timer(dev);
370 break;
371
360 case CLOCK_EVT_NOTIFY_CPU_DEAD: 372 case CLOCK_EVT_NOTIFY_CPU_DEAD:
361 tick_shutdown_broadcast_oneshot(dev); 373 tick_shutdown_broadcast_oneshot(dev);
362 tick_shutdown_broadcast(dev); 374 tick_shutdown_broadcast(dev);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1b6c05bd0d0a..d3f1ef4d5cbe 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -134,7 +134,7 @@ __setup("nohz=", setup_tick_nohz);
134 * value. We do this unconditionally on any cpu, as we don't know whether the 134 * value. We do this unconditionally on any cpu, as we don't know whether the
135 * cpu, which has the update task assigned is in a long sleep. 135 * cpu, which has the update task assigned is in a long sleep.
136 */ 136 */
137void tick_nohz_update_jiffies(void) 137static void tick_nohz_update_jiffies(void)
138{ 138{
139 int cpu = smp_processor_id(); 139 int cpu = smp_processor_id();
140 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 140 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
diff --git a/kernel/timer.c b/kernel/timer.c
index dee3f641a7a7..13dd64fe143d 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1129,7 +1129,7 @@ void do_timer(unsigned long ticks)
1129 * For backwards compatibility? This can be done in libc so Alpha 1129 * For backwards compatibility? This can be done in libc so Alpha
1130 * and all newer ports shouldn't need it. 1130 * and all newer ports shouldn't need it.
1131 */ 1131 */
1132asmlinkage unsigned long sys_alarm(unsigned int seconds) 1132SYSCALL_DEFINE1(alarm, unsigned int, seconds)
1133{ 1133{
1134 return alarm_setitimer(seconds); 1134 return alarm_setitimer(seconds);
1135} 1135}
@@ -1152,7 +1152,7 @@ asmlinkage unsigned long sys_alarm(unsigned int seconds)
1152 * 1152 *
1153 * This is SMP safe as current->tgid does not change. 1153 * This is SMP safe as current->tgid does not change.
1154 */ 1154 */
1155asmlinkage long sys_getpid(void) 1155SYSCALL_DEFINE0(getpid)
1156{ 1156{
1157 return task_tgid_vnr(current); 1157 return task_tgid_vnr(current);
1158} 1158}
@@ -1163,7 +1163,7 @@ asmlinkage long sys_getpid(void)
1163 * value of ->real_parent under rcu_read_lock(), see 1163 * value of ->real_parent under rcu_read_lock(), see
1164 * release_task()->call_rcu(delayed_put_task_struct). 1164 * release_task()->call_rcu(delayed_put_task_struct).
1165 */ 1165 */
1166asmlinkage long sys_getppid(void) 1166SYSCALL_DEFINE0(getppid)
1167{ 1167{
1168 int pid; 1168 int pid;
1169 1169
@@ -1174,25 +1174,25 @@ asmlinkage long sys_getppid(void)
1174 return pid; 1174 return pid;
1175} 1175}
1176 1176
1177asmlinkage long sys_getuid(void) 1177SYSCALL_DEFINE0(getuid)
1178{ 1178{
1179 /* Only we change this so SMP safe */ 1179 /* Only we change this so SMP safe */
1180 return current_uid(); 1180 return current_uid();
1181} 1181}
1182 1182
1183asmlinkage long sys_geteuid(void) 1183SYSCALL_DEFINE0(geteuid)
1184{ 1184{
1185 /* Only we change this so SMP safe */ 1185 /* Only we change this so SMP safe */
1186 return current_euid(); 1186 return current_euid();
1187} 1187}
1188 1188
1189asmlinkage long sys_getgid(void) 1189SYSCALL_DEFINE0(getgid)
1190{ 1190{
1191 /* Only we change this so SMP safe */ 1191 /* Only we change this so SMP safe */
1192 return current_gid(); 1192 return current_gid();
1193} 1193}
1194 1194
1195asmlinkage long sys_getegid(void) 1195SYSCALL_DEFINE0(getegid)
1196{ 1196{
1197 /* Only we change this so SMP safe */ 1197 /* Only we change this so SMP safe */
1198 return current_egid(); 1198 return current_egid();
@@ -1308,7 +1308,7 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
1308EXPORT_SYMBOL(schedule_timeout_uninterruptible); 1308EXPORT_SYMBOL(schedule_timeout_uninterruptible);
1309 1309
1310/* Thread ID - the internal kernel "pid" */ 1310/* Thread ID - the internal kernel "pid" */
1311asmlinkage long sys_gettid(void) 1311SYSCALL_DEFINE0(gettid)
1312{ 1312{
1313 return task_pid_vnr(current); 1313 return task_pid_vnr(current);
1314} 1314}
@@ -1400,7 +1400,7 @@ out:
1400 return 0; 1400 return 0;
1401} 1401}
1402 1402
1403asmlinkage long sys_sysinfo(struct sysinfo __user *info) 1403SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info)
1404{ 1404{
1405 struct sysinfo val; 1405 struct sysinfo val;
1406 1406
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e2a4ff6fc3a6..34e707e5ab87 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -52,6 +52,7 @@ config FUNCTION_TRACER
52 depends on HAVE_FUNCTION_TRACER 52 depends on HAVE_FUNCTION_TRACER
53 depends on DEBUG_KERNEL 53 depends on DEBUG_KERNEL
54 select FRAME_POINTER 54 select FRAME_POINTER
55 select KALLSYMS
55 select TRACING 56 select TRACING
56 select CONTEXT_SWITCH_TRACER 57 select CONTEXT_SWITCH_TRACER
57 help 58 help
@@ -238,6 +239,7 @@ config STACK_TRACER
238 depends on DEBUG_KERNEL 239 depends on DEBUG_KERNEL
239 select FUNCTION_TRACER 240 select FUNCTION_TRACER
240 select STACKTRACE 241 select STACKTRACE
242 select KALLSYMS
241 help 243 help
242 This special tracer records the maximum stack footprint of the 244 This special tracer records the maximum stack footprint of the
243 kernel and displays it in debugfs/tracing/stack_trace. 245 kernel and displays it in debugfs/tracing/stack_trace.
@@ -302,4 +304,27 @@ config FTRACE_STARTUP_TEST
302 functioning properly. It will do tests on all the configured 304 functioning properly. It will do tests on all the configured
303 tracers of ftrace. 305 tracers of ftrace.
304 306
307config MMIOTRACE
308 bool "Memory mapped IO tracing"
309 depends on HAVE_MMIOTRACE_SUPPORT && DEBUG_KERNEL && PCI
310 select TRACING
311 help
312 Mmiotrace traces Memory Mapped I/O access and is meant for
313 debugging and reverse engineering. It is called from the ioremap
314 implementation and works via page faults. Tracing is disabled by
315 default and can be enabled at run-time.
316
317 See Documentation/tracers/mmiotrace.txt.
318 If you are not helping to develop drivers, say N.
319
320config MMIOTRACE_TEST
321 tristate "Test module for mmiotrace"
322 depends on MMIOTRACE && m
323 help
324 This is a dumb module for testing mmiotrace. It is very dangerous
325 as it will write garbage to IO memory starting at a given address.
326 However, it should be safe to use on e.g. unused portion of VRAM.
327
328 Say N, unless you absolutely know what you are doing.
329
305endmenu 330endmenu
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2f32969c09df..fdf913dfc7e8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -17,6 +17,7 @@
17#include <linux/clocksource.h> 17#include <linux/clocksource.h>
18#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/suspend.h>
20#include <linux/debugfs.h> 21#include <linux/debugfs.h>
21#include <linux/hardirq.h> 22#include <linux/hardirq.h>
22#include <linux/kthread.h> 23#include <linux/kthread.h>
@@ -1736,9 +1737,12 @@ static void clear_ftrace_pid(struct pid *pid)
1736{ 1737{
1737 struct task_struct *p; 1738 struct task_struct *p;
1738 1739
1740 rcu_read_lock();
1739 do_each_pid_task(pid, PIDTYPE_PID, p) { 1741 do_each_pid_task(pid, PIDTYPE_PID, p) {
1740 clear_tsk_trace_trace(p); 1742 clear_tsk_trace_trace(p);
1741 } while_each_pid_task(pid, PIDTYPE_PID, p); 1743 } while_each_pid_task(pid, PIDTYPE_PID, p);
1744 rcu_read_unlock();
1745
1742 put_pid(pid); 1746 put_pid(pid);
1743} 1747}
1744 1748
@@ -1746,9 +1750,11 @@ static void set_ftrace_pid(struct pid *pid)
1746{ 1750{
1747 struct task_struct *p; 1751 struct task_struct *p;
1748 1752
1753 rcu_read_lock();
1749 do_each_pid_task(pid, PIDTYPE_PID, p) { 1754 do_each_pid_task(pid, PIDTYPE_PID, p) {
1750 set_tsk_trace_trace(p); 1755 set_tsk_trace_trace(p);
1751 } while_each_pid_task(pid, PIDTYPE_PID, p); 1756 } while_each_pid_task(pid, PIDTYPE_PID, p);
1757 rcu_read_unlock();
1752} 1758}
1753 1759
1754static void clear_ftrace_pid_task(struct pid **pid) 1760static void clear_ftrace_pid_task(struct pid **pid)
@@ -1965,6 +1971,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1965#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1971#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1966 1972
1967static atomic_t ftrace_graph_active; 1973static atomic_t ftrace_graph_active;
1974static struct notifier_block ftrace_suspend_notifier;
1968 1975
1969int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) 1976int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
1970{ 1977{
@@ -2026,7 +2033,7 @@ free:
2026static int start_graph_tracing(void) 2033static int start_graph_tracing(void)
2027{ 2034{
2028 struct ftrace_ret_stack **ret_stack_list; 2035 struct ftrace_ret_stack **ret_stack_list;
2029 int ret; 2036 int ret, cpu;
2030 2037
2031 ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE * 2038 ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE *
2032 sizeof(struct ftrace_ret_stack *), 2039 sizeof(struct ftrace_ret_stack *),
@@ -2035,6 +2042,10 @@ static int start_graph_tracing(void)
2035 if (!ret_stack_list) 2042 if (!ret_stack_list)
2036 return -ENOMEM; 2043 return -ENOMEM;
2037 2044
2045 /* The cpu_boot init_task->ret_stack will never be freed */
2046 for_each_online_cpu(cpu)
2047 ftrace_graph_init_task(idle_task(cpu));
2048
2038 do { 2049 do {
2039 ret = alloc_retstack_tasklist(ret_stack_list); 2050 ret = alloc_retstack_tasklist(ret_stack_list);
2040 } while (ret == -EAGAIN); 2051 } while (ret == -EAGAIN);
@@ -2043,6 +2054,27 @@ static int start_graph_tracing(void)
2043 return ret; 2054 return ret;
2044} 2055}
2045 2056
2057/*
2058 * Hibernation protection.
2059 * The state of the current task is too much unstable during
2060 * suspend/restore to disk. We want to protect against that.
2061 */
2062static int
2063ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
2064 void *unused)
2065{
2066 switch (state) {
2067 case PM_HIBERNATION_PREPARE:
2068 pause_graph_tracing();
2069 break;
2070
2071 case PM_POST_HIBERNATION:
2072 unpause_graph_tracing();
2073 break;
2074 }
2075 return NOTIFY_DONE;
2076}
2077
2046int register_ftrace_graph(trace_func_graph_ret_t retfunc, 2078int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2047 trace_func_graph_ent_t entryfunc) 2079 trace_func_graph_ent_t entryfunc)
2048{ 2080{
@@ -2050,6 +2082,9 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2050 2082
2051 mutex_lock(&ftrace_sysctl_lock); 2083 mutex_lock(&ftrace_sysctl_lock);
2052 2084
2085 ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
2086 register_pm_notifier(&ftrace_suspend_notifier);
2087
2053 atomic_inc(&ftrace_graph_active); 2088 atomic_inc(&ftrace_graph_active);
2054 ret = start_graph_tracing(); 2089 ret = start_graph_tracing();
2055 if (ret) { 2090 if (ret) {
@@ -2075,6 +2110,7 @@ void unregister_ftrace_graph(void)
2075 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; 2110 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
2076 ftrace_graph_entry = ftrace_graph_entry_stub; 2111 ftrace_graph_entry = ftrace_graph_entry_stub;
2077 ftrace_shutdown(FTRACE_STOP_FUNC_RET); 2112 ftrace_shutdown(FTRACE_STOP_FUNC_RET);
2113 unregister_pm_notifier(&ftrace_suspend_notifier);
2078 2114
2079 mutex_unlock(&ftrace_sysctl_lock); 2115 mutex_unlock(&ftrace_sysctl_lock);
2080} 2116}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8b0daf0662ef..bd38c5cfd8ad 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -246,7 +246,7 @@ static inline int test_time_stamp(u64 delta)
246 return 0; 246 return 0;
247} 247}
248 248
249#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page)) 249#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data))
250 250
251/* 251/*
252 * head_page == tail_page && head == tail then buffer is empty. 252 * head_page == tail_page && head == tail then buffer is empty.
@@ -1025,12 +1025,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1025 } 1025 }
1026 1026
1027 if (next_page == head_page) { 1027 if (next_page == head_page) {
1028 if (!(buffer->flags & RB_FL_OVERWRITE)) { 1028 if (!(buffer->flags & RB_FL_OVERWRITE))
1029 /* reset write */
1030 if (tail <= BUF_PAGE_SIZE)
1031 local_set(&tail_page->write, tail);
1032 goto out_unlock; 1029 goto out_unlock;
1033 }
1034 1030
1035 /* tail_page has not moved yet? */ 1031 /* tail_page has not moved yet? */
1036 if (tail_page == cpu_buffer->tail_page) { 1032 if (tail_page == cpu_buffer->tail_page) {
@@ -1105,6 +1101,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1105 return event; 1101 return event;
1106 1102
1107 out_unlock: 1103 out_unlock:
1104 /* reset write */
1105 if (tail <= BUF_PAGE_SIZE)
1106 local_set(&tail_page->write, tail);
1107
1108 __raw_spin_unlock(&cpu_buffer->lock); 1108 __raw_spin_unlock(&cpu_buffer->lock);
1109 local_irq_restore(flags); 1109 local_irq_restore(flags);
1110 return NULL; 1110 return NULL;
@@ -2174,6 +2174,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2174 2174
2175 cpu_buffer->overrun = 0; 2175 cpu_buffer->overrun = 0;
2176 cpu_buffer->entries = 0; 2176 cpu_buffer->entries = 0;
2177
2178 cpu_buffer->write_stamp = 0;
2179 cpu_buffer->read_stamp = 0;
2177} 2180}
2178 2181
2179/** 2182/**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c580233add95..17bb88d86ac2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -40,7 +40,7 @@
40 40
41#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) 41#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
42 42
43unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; 43unsigned long __read_mostly tracing_max_latency;
44unsigned long __read_mostly tracing_thresh; 44unsigned long __read_mostly tracing_thresh;
45 45
46/* 46/*
@@ -3736,7 +3736,7 @@ static struct notifier_block trace_die_notifier = {
3736 * it if we decide to change what log level the ftrace dump 3736 * it if we decide to change what log level the ftrace dump
3737 * should be at. 3737 * should be at.
3738 */ 3738 */
3739#define KERN_TRACE KERN_INFO 3739#define KERN_TRACE KERN_EMERG
3740 3740
3741static void 3741static void
3742trace_printk_seq(struct trace_seq *s) 3742trace_printk_seq(struct trace_seq *s)
@@ -3770,6 +3770,7 @@ void ftrace_dump(void)
3770 dump_ran = 1; 3770 dump_ran = 1;
3771 3771
3772 /* No turning back! */ 3772 /* No turning back! */
3773 tracing_off();
3773 ftrace_kill(); 3774 ftrace_kill();
3774 3775
3775 for_each_tracing_cpu(cpu) { 3776 for_each_tracing_cpu(cpu) {
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 7c2e326bbc8b..62a78d943534 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -380,6 +380,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr)
380 380
381static void __irqsoff_tracer_init(struct trace_array *tr) 381static void __irqsoff_tracer_init(struct trace_array *tr)
382{ 382{
383 tracing_max_latency = 0;
383 irqsoff_trace = tr; 384 irqsoff_trace = tr;
384 /* make sure that the tracer is visible */ 385 /* make sure that the tracer is visible */
385 smp_wmb(); 386 smp_wmb();
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index fffcb069f1dc..80e503ef6136 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -9,6 +9,7 @@
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/mmiotrace.h> 10#include <linux/mmiotrace.h>
11#include <linux/pci.h> 11#include <linux/pci.h>
12#include <asm/atomic.h>
12 13
13#include "trace.h" 14#include "trace.h"
14 15
@@ -19,6 +20,7 @@ struct header_iter {
19static struct trace_array *mmio_trace_array; 20static struct trace_array *mmio_trace_array;
20static bool overrun_detected; 21static bool overrun_detected;
21static unsigned long prev_overruns; 22static unsigned long prev_overruns;
23static atomic_t dropped_count;
22 24
23static void mmio_reset_data(struct trace_array *tr) 25static void mmio_reset_data(struct trace_array *tr)
24{ 26{
@@ -121,11 +123,11 @@ static void mmio_close(struct trace_iterator *iter)
121 123
122static unsigned long count_overruns(struct trace_iterator *iter) 124static unsigned long count_overruns(struct trace_iterator *iter)
123{ 125{
124 unsigned long cnt = 0; 126 unsigned long cnt = atomic_xchg(&dropped_count, 0);
125 unsigned long over = ring_buffer_overruns(iter->tr->buffer); 127 unsigned long over = ring_buffer_overruns(iter->tr->buffer);
126 128
127 if (over > prev_overruns) 129 if (over > prev_overruns)
128 cnt = over - prev_overruns; 130 cnt += over - prev_overruns;
129 prev_overruns = over; 131 prev_overruns = over;
130 return cnt; 132 return cnt;
131} 133}
@@ -310,8 +312,10 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
310 312
311 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 313 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
312 &irq_flags); 314 &irq_flags);
313 if (!event) 315 if (!event) {
316 atomic_inc(&dropped_count);
314 return; 317 return;
318 }
315 entry = ring_buffer_event_data(event); 319 entry = ring_buffer_event_data(event);
316 tracing_generic_entry_update(&entry->ent, 0, preempt_count()); 320 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
317 entry->ent.type = TRACE_MMIO_RW; 321 entry->ent.type = TRACE_MMIO_RW;
@@ -338,8 +342,10 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
338 342
339 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 343 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
340 &irq_flags); 344 &irq_flags);
341 if (!event) 345 if (!event) {
346 atomic_inc(&dropped_count);
342 return; 347 return;
348 }
343 entry = ring_buffer_event_data(event); 349 entry = ring_buffer_event_data(event);
344 tracing_generic_entry_update(&entry->ent, 0, preempt_count()); 350 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
345 entry->ent.type = TRACE_MMIO_MAP; 351 entry->ent.type = TRACE_MMIO_MAP;
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 43586b689e31..42ae1e77b6b3 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -333,6 +333,7 @@ static void stop_wakeup_tracer(struct trace_array *tr)
333 333
334static int wakeup_tracer_init(struct trace_array *tr) 334static int wakeup_tracer_init(struct trace_array *tr)
335{ 335{
336 tracing_max_latency = 0;
336 wakeup_trace = tr; 337 wakeup_trace = tr;
337 start_wakeup_tracer(tr); 338 start_wakeup_tracer(tr);
338 return 0; 339 return 0;
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 88c8eb70f54a..bc8e80a86bca 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -23,10 +23,20 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
23{ 23{
24 struct ring_buffer_event *event; 24 struct ring_buffer_event *event;
25 struct trace_entry *entry; 25 struct trace_entry *entry;
26 unsigned int loops = 0;
26 27
27 while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) { 28 while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) {
28 entry = ring_buffer_event_data(event); 29 entry = ring_buffer_event_data(event);
29 30
31 /*
32 * The ring buffer is a size of trace_buf_size, if
33 * we loop more than the size, there's something wrong
34 * with the ring buffer.
35 */
36 if (loops++ > trace_buf_size) {
37 printk(KERN_CONT ".. bad ring buffer ");
38 goto failed;
39 }
30 if (!trace_valid_entry(entry)) { 40 if (!trace_valid_entry(entry)) {
31 printk(KERN_CONT ".. invalid entry %d ", 41 printk(KERN_CONT ".. invalid entry %d ",
32 entry->type); 42 entry->type);
@@ -57,11 +67,20 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
57 67
58 cnt = ring_buffer_entries(tr->buffer); 68 cnt = ring_buffer_entries(tr->buffer);
59 69
70 /*
71 * The trace_test_buffer_cpu runs a while loop to consume all data.
72 * If the calling tracer is broken, and is constantly filling
73 * the buffer, this will run forever, and hard lock the box.
74 * We disable the ring buffer while we do this test to prevent
75 * a hard lock up.
76 */
77 tracing_off();
60 for_each_possible_cpu(cpu) { 78 for_each_possible_cpu(cpu) {
61 ret = trace_test_buffer_cpu(tr, cpu); 79 ret = trace_test_buffer_cpu(tr, cpu);
62 if (ret) 80 if (ret)
63 break; 81 break;
64 } 82 }
83 tracing_on();
65 __raw_spin_unlock(&ftrace_max_lock); 84 __raw_spin_unlock(&ftrace_max_lock);
66 local_irq_restore(flags); 85 local_irq_restore(flags);
67 86
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 43f891b05a4b..00d59d048edf 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -122,8 +122,10 @@ void acct_update_integrals(struct task_struct *tsk)
122 if (likely(tsk->mm)) { 122 if (likely(tsk->mm)) {
123 cputime_t time, dtime; 123 cputime_t time, dtime;
124 struct timeval value; 124 struct timeval value;
125 unsigned long flags;
125 u64 delta; 126 u64 delta;
126 127
128 local_irq_save(flags);
127 time = tsk->stime + tsk->utime; 129 time = tsk->stime + tsk->utime;
128 dtime = cputime_sub(time, tsk->acct_timexpd); 130 dtime = cputime_sub(time, tsk->acct_timexpd);
129 jiffies_to_timeval(cputime_to_jiffies(dtime), &value); 131 jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
@@ -131,10 +133,12 @@ void acct_update_integrals(struct task_struct *tsk)
131 delta = delta * USEC_PER_SEC + value.tv_usec; 133 delta = delta * USEC_PER_SEC + value.tv_usec;
132 134
133 if (delta == 0) 135 if (delta == 0)
134 return; 136 goto out;
135 tsk->acct_timexpd = time; 137 tsk->acct_timexpd = time;
136 tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm); 138 tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
137 tsk->acct_vm_mem1 += delta * tsk->mm->total_vm; 139 tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
140 out:
141 local_irq_restore(flags);
138 } 142 }
139} 143}
140 144
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 2460c3199b5a..0314501688b9 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -17,7 +17,7 @@
17 17
18#include <asm/uaccess.h> 18#include <asm/uaccess.h>
19 19
20asmlinkage long sys_chown16(const char __user * filename, old_uid_t user, old_gid_t group) 20SYSCALL_DEFINE3(chown16, const char __user *, filename, old_uid_t, user, old_gid_t, group)
21{ 21{
22 long ret = sys_chown(filename, low2highuid(user), low2highgid(group)); 22 long ret = sys_chown(filename, low2highuid(user), low2highgid(group));
23 /* avoid REGPARM breakage on x86: */ 23 /* avoid REGPARM breakage on x86: */
@@ -25,7 +25,7 @@ asmlinkage long sys_chown16(const char __user * filename, old_uid_t user, old_gi
25 return ret; 25 return ret;
26} 26}
27 27
28asmlinkage long sys_lchown16(const char __user * filename, old_uid_t user, old_gid_t group) 28SYSCALL_DEFINE3(lchown16, const char __user *, filename, old_uid_t, user, old_gid_t, group)
29{ 29{
30 long ret = sys_lchown(filename, low2highuid(user), low2highgid(group)); 30 long ret = sys_lchown(filename, low2highuid(user), low2highgid(group));
31 /* avoid REGPARM breakage on x86: */ 31 /* avoid REGPARM breakage on x86: */
@@ -33,7 +33,7 @@ asmlinkage long sys_lchown16(const char __user * filename, old_uid_t user, old_g
33 return ret; 33 return ret;
34} 34}
35 35
36asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group) 36SYSCALL_DEFINE3(fchown16, unsigned int, fd, old_uid_t, user, old_gid_t, group)
37{ 37{
38 long ret = sys_fchown(fd, low2highuid(user), low2highgid(group)); 38 long ret = sys_fchown(fd, low2highuid(user), low2highgid(group));
39 /* avoid REGPARM breakage on x86: */ 39 /* avoid REGPARM breakage on x86: */
@@ -41,7 +41,7 @@ asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group)
41 return ret; 41 return ret;
42} 42}
43 43
44asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid) 44SYSCALL_DEFINE2(setregid16, old_gid_t, rgid, old_gid_t, egid)
45{ 45{
46 long ret = sys_setregid(low2highgid(rgid), low2highgid(egid)); 46 long ret = sys_setregid(low2highgid(rgid), low2highgid(egid));
47 /* avoid REGPARM breakage on x86: */ 47 /* avoid REGPARM breakage on x86: */
@@ -49,7 +49,7 @@ asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid)
49 return ret; 49 return ret;
50} 50}
51 51
52asmlinkage long sys_setgid16(old_gid_t gid) 52SYSCALL_DEFINE1(setgid16, old_gid_t, gid)
53{ 53{
54 long ret = sys_setgid(low2highgid(gid)); 54 long ret = sys_setgid(low2highgid(gid));
55 /* avoid REGPARM breakage on x86: */ 55 /* avoid REGPARM breakage on x86: */
@@ -57,7 +57,7 @@ asmlinkage long sys_setgid16(old_gid_t gid)
57 return ret; 57 return ret;
58} 58}
59 59
60asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid) 60SYSCALL_DEFINE2(setreuid16, old_uid_t, ruid, old_uid_t, euid)
61{ 61{
62 long ret = sys_setreuid(low2highuid(ruid), low2highuid(euid)); 62 long ret = sys_setreuid(low2highuid(ruid), low2highuid(euid));
63 /* avoid REGPARM breakage on x86: */ 63 /* avoid REGPARM breakage on x86: */
@@ -65,7 +65,7 @@ asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid)
65 return ret; 65 return ret;
66} 66}
67 67
68asmlinkage long sys_setuid16(old_uid_t uid) 68SYSCALL_DEFINE1(setuid16, old_uid_t, uid)
69{ 69{
70 long ret = sys_setuid(low2highuid(uid)); 70 long ret = sys_setuid(low2highuid(uid));
71 /* avoid REGPARM breakage on x86: */ 71 /* avoid REGPARM breakage on x86: */
@@ -73,7 +73,7 @@ asmlinkage long sys_setuid16(old_uid_t uid)
73 return ret; 73 return ret;
74} 74}
75 75
76asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid) 76SYSCALL_DEFINE3(setresuid16, old_uid_t, ruid, old_uid_t, euid, old_uid_t, suid)
77{ 77{
78 long ret = sys_setresuid(low2highuid(ruid), low2highuid(euid), 78 long ret = sys_setresuid(low2highuid(ruid), low2highuid(euid),
79 low2highuid(suid)); 79 low2highuid(suid));
@@ -82,7 +82,7 @@ asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid)
82 return ret; 82 return ret;
83} 83}
84 84
85asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, old_uid_t __user *suid) 85SYSCALL_DEFINE3(getresuid16, old_uid_t __user *, ruid, old_uid_t __user *, euid, old_uid_t __user *, suid)
86{ 86{
87 const struct cred *cred = current_cred(); 87 const struct cred *cred = current_cred();
88 int retval; 88 int retval;
@@ -94,7 +94,7 @@ asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid,
94 return retval; 94 return retval;
95} 95}
96 96
97asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid) 97SYSCALL_DEFINE3(setresgid16, old_gid_t, rgid, old_gid_t, egid, old_gid_t, sgid)
98{ 98{
99 long ret = sys_setresgid(low2highgid(rgid), low2highgid(egid), 99 long ret = sys_setresgid(low2highgid(rgid), low2highgid(egid),
100 low2highgid(sgid)); 100 low2highgid(sgid));
@@ -103,7 +103,8 @@ asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid)
103 return ret; 103 return ret;
104} 104}
105 105
106asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, old_gid_t __user *sgid) 106
107SYSCALL_DEFINE3(getresgid16, old_gid_t __user *, rgid, old_gid_t __user *, egid, old_gid_t __user *, sgid)
107{ 108{
108 const struct cred *cred = current_cred(); 109 const struct cred *cred = current_cred();
109 int retval; 110 int retval;
@@ -115,7 +116,7 @@ asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid,
115 return retval; 116 return retval;
116} 117}
117 118
118asmlinkage long sys_setfsuid16(old_uid_t uid) 119SYSCALL_DEFINE1(setfsuid16, old_uid_t, uid)
119{ 120{
120 long ret = sys_setfsuid(low2highuid(uid)); 121 long ret = sys_setfsuid(low2highuid(uid));
121 /* avoid REGPARM breakage on x86: */ 122 /* avoid REGPARM breakage on x86: */
@@ -123,7 +124,7 @@ asmlinkage long sys_setfsuid16(old_uid_t uid)
123 return ret; 124 return ret;
124} 125}
125 126
126asmlinkage long sys_setfsgid16(old_gid_t gid) 127SYSCALL_DEFINE1(setfsgid16, old_gid_t, gid)
127{ 128{
128 long ret = sys_setfsgid(low2highgid(gid)); 129 long ret = sys_setfsgid(low2highgid(gid));
129 /* avoid REGPARM breakage on x86: */ 130 /* avoid REGPARM breakage on x86: */
@@ -161,7 +162,7 @@ static int groups16_from_user(struct group_info *group_info,
161 return 0; 162 return 0;
162} 163}
163 164
164asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist) 165SYSCALL_DEFINE2(getgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
165{ 166{
166 const struct cred *cred = current_cred(); 167 const struct cred *cred = current_cred();
167 int i; 168 int i;
@@ -184,7 +185,7 @@ out:
184 return i; 185 return i;
185} 186}
186 187
187asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist) 188SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
188{ 189{
189 struct group_info *group_info; 190 struct group_info *group_info;
190 int retval; 191 int retval;
@@ -209,22 +210,22 @@ asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist)
209 return retval; 210 return retval;
210} 211}
211 212
212asmlinkage long sys_getuid16(void) 213SYSCALL_DEFINE0(getuid16)
213{ 214{
214 return high2lowuid(current_uid()); 215 return high2lowuid(current_uid());
215} 216}
216 217
217asmlinkage long sys_geteuid16(void) 218SYSCALL_DEFINE0(geteuid16)
218{ 219{
219 return high2lowuid(current_euid()); 220 return high2lowuid(current_euid());
220} 221}
221 222
222asmlinkage long sys_getgid16(void) 223SYSCALL_DEFINE0(getgid16)
223{ 224{
224 return high2lowgid(current_gid()); 225 return high2lowgid(current_gid());
225} 226}
226 227
227asmlinkage long sys_getegid16(void) 228SYSCALL_DEFINE0(getegid16)
228{ 229{
229 return high2lowgid(current_egid()); 230 return high2lowgid(current_egid());
230} 231}
diff --git a/kernel/up.c b/kernel/up.c
new file mode 100644
index 000000000000..1ff27a28bb7d
--- /dev/null
+++ b/kernel/up.c
@@ -0,0 +1,21 @@
1/*
2 * Uniprocessor-only support functions. The counterpart to kernel/smp.c
3 */
4
5#include <linux/interrupt.h>
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/smp.h>
9
10int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
11 int wait)
12{
13 WARN_ON(cpu != 0);
14
15 local_irq_disable();
16 (func)(info);
17 local_irq_enable();
18
19 return 0;
20}
21EXPORT_SYMBOL(smp_call_function_single);
diff --git a/kernel/user.c b/kernel/user.c
index 477b6660f447..fbb300e6191f 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -72,6 +72,7 @@ static void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent)
72static void uid_hash_remove(struct user_struct *up) 72static void uid_hash_remove(struct user_struct *up)
73{ 73{
74 hlist_del_init(&up->uidhash_node); 74 hlist_del_init(&up->uidhash_node);
75 put_user_ns(up->user_ns);
75} 76}
76 77
77static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) 78static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
@@ -285,14 +286,12 @@ int __init uids_sysfs_init(void)
285/* work function to remove sysfs directory for a user and free up 286/* work function to remove sysfs directory for a user and free up
286 * corresponding structures. 287 * corresponding structures.
287 */ 288 */
288static void remove_user_sysfs_dir(struct work_struct *w) 289static void cleanup_user_struct(struct work_struct *w)
289{ 290{
290 struct user_struct *up = container_of(w, struct user_struct, work); 291 struct user_struct *up = container_of(w, struct user_struct, work);
291 unsigned long flags; 292 unsigned long flags;
292 int remove_user = 0; 293 int remove_user = 0;
293 294
294 if (up->user_ns != &init_user_ns)
295 return;
296 /* Make uid_hash_remove() + sysfs_remove_file() + kobject_del() 295 /* Make uid_hash_remove() + sysfs_remove_file() + kobject_del()
297 * atomic. 296 * atomic.
298 */ 297 */
@@ -311,9 +310,11 @@ static void remove_user_sysfs_dir(struct work_struct *w)
311 if (!remove_user) 310 if (!remove_user)
312 goto done; 311 goto done;
313 312
314 kobject_uevent(&up->kobj, KOBJ_REMOVE); 313 if (up->user_ns == &init_user_ns) {
315 kobject_del(&up->kobj); 314 kobject_uevent(&up->kobj, KOBJ_REMOVE);
316 kobject_put(&up->kobj); 315 kobject_del(&up->kobj);
316 kobject_put(&up->kobj);
317 }
317 318
318 sched_destroy_user(up); 319 sched_destroy_user(up);
319 key_put(up->uid_keyring); 320 key_put(up->uid_keyring);
@@ -334,8 +335,7 @@ static void free_user(struct user_struct *up, unsigned long flags)
334 atomic_inc(&up->__count); 335 atomic_inc(&up->__count);
335 spin_unlock_irqrestore(&uidhash_lock, flags); 336 spin_unlock_irqrestore(&uidhash_lock, flags);
336 337
337 put_user_ns(up->user_ns); 338 INIT_WORK(&up->work, cleanup_user_struct);
338 INIT_WORK(&up->work, remove_user_sysfs_dir);
339 schedule_work(&up->work); 339 schedule_work(&up->work);
340} 340}
341 341
@@ -357,12 +357,29 @@ static void free_user(struct user_struct *up, unsigned long flags)
357 sched_destroy_user(up); 357 sched_destroy_user(up);
358 key_put(up->uid_keyring); 358 key_put(up->uid_keyring);
359 key_put(up->session_keyring); 359 key_put(up->session_keyring);
360 put_user_ns(up->user_ns);
361 kmem_cache_free(uid_cachep, up); 360 kmem_cache_free(uid_cachep, up);
362} 361}
363 362
364#endif 363#endif
365 364
365#if defined(CONFIG_RT_GROUP_SCHED) && defined(CONFIG_USER_SCHED)
366/*
367 * We need to check if a setuid can take place. This function should be called
368 * before successfully completing the setuid.
369 */
370int task_can_switch_user(struct user_struct *up, struct task_struct *tsk)
371{
372
373 return sched_rt_can_attach(up->tg, tsk);
374
375}
376#else
377int task_can_switch_user(struct user_struct *up, struct task_struct *tsk)
378{
379 return 1;
380}
381#endif
382
366/* 383/*
367 * Locate the user_struct for the passed UID. If found, take a ref on it. The 384 * Locate the user_struct for the passed UID. If found, take a ref on it. The
368 * caller must undo that ref with free_uid(). 385 * caller must undo that ref with free_uid().
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 79084311ee57..076c7c8215b0 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -60,12 +60,25 @@ int create_user_ns(struct cred *new)
60 return 0; 60 return 0;
61} 61}
62 62
63void free_user_ns(struct kref *kref) 63/*
64 * Deferred destructor for a user namespace. This is required because
65 * free_user_ns() may be called with uidhash_lock held, but we need to call
66 * back to free_uid() which will want to take the lock again.
67 */
68static void free_user_ns_work(struct work_struct *work)
64{ 69{
65 struct user_namespace *ns; 70 struct user_namespace *ns =
66 71 container_of(work, struct user_namespace, destroyer);
67 ns = container_of(kref, struct user_namespace, kref);
68 free_uid(ns->creator); 72 free_uid(ns->creator);
69 kfree(ns); 73 kfree(ns);
70} 74}
75
76void free_user_ns(struct kref *kref)
77{
78 struct user_namespace *ns =
79 container_of(kref, struct user_namespace, kref);
80
81 INIT_WORK(&ns->destroyer, free_user_ns_work);
82 schedule_work(&ns->destroyer);
83}
71EXPORT_SYMBOL(free_user_ns); 84EXPORT_SYMBOL(free_user_ns);
diff --git a/kernel/wait.c b/kernel/wait.c
index cd87131f2fc2..42a2dbc181c8 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -91,6 +91,15 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
91} 91}
92EXPORT_SYMBOL(prepare_to_wait_exclusive); 92EXPORT_SYMBOL(prepare_to_wait_exclusive);
93 93
94/*
95 * finish_wait - clean up after waiting in a queue
96 * @q: waitqueue waited on
97 * @wait: wait descriptor
98 *
99 * Sets current thread back to running state and removes
100 * the wait descriptor from the given waitqueue if still
101 * queued.
102 */
94void finish_wait(wait_queue_head_t *q, wait_queue_t *wait) 103void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
95{ 104{
96 unsigned long flags; 105 unsigned long flags;
@@ -117,6 +126,39 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
117} 126}
118EXPORT_SYMBOL(finish_wait); 127EXPORT_SYMBOL(finish_wait);
119 128
129/*
130 * abort_exclusive_wait - abort exclusive waiting in a queue
131 * @q: waitqueue waited on
132 * @wait: wait descriptor
133 * @state: runstate of the waiter to be woken
134 * @key: key to identify a wait bit queue or %NULL
135 *
136 * Sets current thread back to running state and removes
137 * the wait descriptor from the given waitqueue if still
138 * queued.
139 *
140 * Wakes up the next waiter if the caller is concurrently
141 * woken up through the queue.
142 *
143 * This prevents waiter starvation where an exclusive waiter
144 * aborts and is woken up concurrently and noone wakes up
145 * the next waiter.
146 */
147void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
148 unsigned int mode, void *key)
149{
150 unsigned long flags;
151
152 __set_current_state(TASK_RUNNING);
153 spin_lock_irqsave(&q->lock, flags);
154 if (!list_empty(&wait->task_list))
155 list_del_init(&wait->task_list);
156 else if (waitqueue_active(q))
157 __wake_up_common(q, mode, 1, 0, key);
158 spin_unlock_irqrestore(&q->lock, flags);
159}
160EXPORT_SYMBOL(abort_exclusive_wait);
161
120int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) 162int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
121{ 163{
122 int ret = default_wake_function(wait, mode, sync, key); 164 int ret = default_wake_function(wait, mode, sync, key);
@@ -177,17 +219,20 @@ int __sched
177__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, 219__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
178 int (*action)(void *), unsigned mode) 220 int (*action)(void *), unsigned mode)
179{ 221{
180 int ret = 0;
181
182 do { 222 do {
223 int ret;
224
183 prepare_to_wait_exclusive(wq, &q->wait, mode); 225 prepare_to_wait_exclusive(wq, &q->wait, mode);
184 if (test_bit(q->key.bit_nr, q->key.flags)) { 226 if (!test_bit(q->key.bit_nr, q->key.flags))
185 if ((ret = (*action)(q->key.flags))) 227 continue;
186 break; 228 ret = action(q->key.flags);
187 } 229 if (!ret)
230 continue;
231 abort_exclusive_wait(wq, &q->wait, mode, &q->key);
232 return ret;
188 } while (test_and_set_bit(q->key.bit_nr, q->key.flags)); 233 } while (test_and_set_bit(q->key.bit_nr, q->key.flags));
189 finish_wait(wq, &q->wait); 234 finish_wait(wq, &q->wait);
190 return ret; 235 return 0;
191} 236}
192EXPORT_SYMBOL(__wait_on_bit_lock); 237EXPORT_SYMBOL(__wait_on_bit_lock);
193 238
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2f445833ae37..1f0c509b40d3 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -971,6 +971,8 @@ undo:
971} 971}
972 972
973#ifdef CONFIG_SMP 973#ifdef CONFIG_SMP
974static struct workqueue_struct *work_on_cpu_wq __read_mostly;
975
974struct work_for_cpu { 976struct work_for_cpu {
975 struct work_struct work; 977 struct work_struct work;
976 long (*fn)(void *); 978 long (*fn)(void *);
@@ -991,8 +993,8 @@ static void do_work_for_cpu(struct work_struct *w)
991 * @fn: the function to run 993 * @fn: the function to run
992 * @arg: the function arg 994 * @arg: the function arg
993 * 995 *
994 * This will return -EINVAL in the cpu is not online, or the return value 996 * This will return the value @fn returns.
995 * of @fn otherwise. 997 * It is up to the caller to ensure that the cpu doesn't go offline.
996 */ 998 */
997long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) 999long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
998{ 1000{
@@ -1001,14 +1003,8 @@ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
1001 INIT_WORK(&wfc.work, do_work_for_cpu); 1003 INIT_WORK(&wfc.work, do_work_for_cpu);
1002 wfc.fn = fn; 1004 wfc.fn = fn;
1003 wfc.arg = arg; 1005 wfc.arg = arg;
1004 get_online_cpus(); 1006 queue_work_on(cpu, work_on_cpu_wq, &wfc.work);
1005 if (unlikely(!cpu_online(cpu))) 1007 flush_work(&wfc.work);
1006 wfc.ret = -EINVAL;
1007 else {
1008 schedule_work_on(cpu, &wfc.work);
1009 flush_work(&wfc.work);
1010 }
1011 put_online_cpus();
1012 1008
1013 return wfc.ret; 1009 return wfc.ret;
1014} 1010}
@@ -1025,4 +1021,8 @@ void __init init_workqueues(void)
1025 hotcpu_notifier(workqueue_cpu_callback, 0); 1021 hotcpu_notifier(workqueue_cpu_callback, 0);
1026 keventd_wq = create_workqueue("events"); 1022 keventd_wq = create_workqueue("events");
1027 BUG_ON(!keventd_wq); 1023 BUG_ON(!keventd_wq);
1024#ifdef CONFIG_SMP
1025 work_on_cpu_wq = create_workqueue("work_on_cpu");
1026 BUG_ON(!work_on_cpu_wq);
1027#endif
1028} 1028}