aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/async.c94
-rw-r--r--kernel/cgroup.c5
-rw-r--r--kernel/cpuset.c13
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/fork.c17
-rw-r--r--kernel/futex.c53
-rw-r--r--kernel/hrtimer.c41
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/irq/numa_migrate.c7
-rw-r--r--kernel/itimer.c4
-rw-r--r--kernel/kexec.c7
-rw-r--r--kernel/module.c35
-rw-r--r--kernel/posix-cpu-timers.c123
-rw-r--r--kernel/power/Makefile2
-rw-r--r--kernel/power/console.c6
-rw-r--r--kernel/power/disk.c22
-rw-r--r--kernel/power/main.c34
-rw-r--r--kernel/power/swap.c5
-rw-r--r--kernel/power/user.c8
-rw-r--r--kernel/printk.c15
-rw-r--r--kernel/profile.c3
-rw-r--r--kernel/rcuclassic.c4
-rw-r--r--kernel/rcupdate.c12
-rw-r--r--kernel/rcupreempt.c3
-rw-r--r--kernel/rcutree.c4
-rw-r--r--kernel/sched.c51
-rw-r--r--kernel/sched_fair.c21
-rw-r--r--kernel/sched_rt.c4
-rw-r--r--kernel/sched_stats.h45
-rw-r--r--kernel/seccomp.c7
-rw-r--r--kernel/signal.c18
-rw-r--r--kernel/smp.c36
-rw-r--r--kernel/softirq.c1
-rw-r--r--kernel/sys.c47
-rw-r--r--kernel/sysctl.c5
-rw-r--r--kernel/time/tick-common.c26
-rw-r--r--kernel/trace/Kconfig25
-rw-r--r--kernel/trace/ftrace.c38
-rw-r--r--kernel/trace/ring_buffer.c15
-rw-r--r--kernel/trace/trace.c5
-rw-r--r--kernel/trace/trace_irqsoff.c1
-rw-r--r--kernel/trace/trace_mmiotrace.c14
-rw-r--r--kernel/trace/trace_sched_wakeup.c1
-rw-r--r--kernel/trace/trace_selftest.c19
-rw-r--r--kernel/tsacct.c6
-rw-r--r--kernel/user.c35
-rw-r--r--kernel/user_namespace.c21
-rw-r--r--kernel/wait.c59
49 files changed, 804 insertions, 219 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 170a9213c1b6..e4791b3ba55d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -51,6 +51,7 @@ obj-$(CONFIG_UID16) += uid16.o
51obj-$(CONFIG_MODULES) += module.o 51obj-$(CONFIG_MODULES) += module.o
52obj-$(CONFIG_KALLSYMS) += kallsyms.o 52obj-$(CONFIG_KALLSYMS) += kallsyms.o
53obj-$(CONFIG_PM) += power/ 53obj-$(CONFIG_PM) += power/
54obj-$(CONFIG_FREEZER) += power/
54obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o 55obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
55obj-$(CONFIG_KEXEC) += kexec.o 56obj-$(CONFIG_KEXEC) += kexec.o
56obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o 57obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
diff --git a/kernel/async.c b/kernel/async.c
index 608b32b42812..f565891f2c9b 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -54,6 +54,7 @@ asynchronous and synchronous parts of the kernel.
54#include <linux/sched.h> 54#include <linux/sched.h>
55#include <linux/init.h> 55#include <linux/init.h>
56#include <linux/kthread.h> 56#include <linux/kthread.h>
57#include <linux/delay.h>
57#include <asm/atomic.h> 58#include <asm/atomic.h>
58 59
59static async_cookie_t next_cookie = 1; 60static async_cookie_t next_cookie = 1;
@@ -132,21 +133,23 @@ static void run_one_entry(void)
132 entry = list_first_entry(&async_pending, struct async_entry, list); 133 entry = list_first_entry(&async_pending, struct async_entry, list);
133 134
134 /* 2) move it to the running queue */ 135 /* 2) move it to the running queue */
135 list_del(&entry->list); 136 list_move_tail(&entry->list, entry->running);
136 list_add_tail(&entry->list, &async_running);
137 spin_unlock_irqrestore(&async_lock, flags); 137 spin_unlock_irqrestore(&async_lock, flags);
138 138
139 /* 3) run it (and print duration)*/ 139 /* 3) run it (and print duration)*/
140 if (initcall_debug && system_state == SYSTEM_BOOTING) { 140 if (initcall_debug && system_state == SYSTEM_BOOTING) {
141 printk("calling %lli_%pF @ %i\n", entry->cookie, entry->func, task_pid_nr(current)); 141 printk("calling %lli_%pF @ %i\n", (long long)entry->cookie,
142 entry->func, task_pid_nr(current));
142 calltime = ktime_get(); 143 calltime = ktime_get();
143 } 144 }
144 entry->func(entry->data, entry->cookie); 145 entry->func(entry->data, entry->cookie);
145 if (initcall_debug && system_state == SYSTEM_BOOTING) { 146 if (initcall_debug && system_state == SYSTEM_BOOTING) {
146 rettime = ktime_get(); 147 rettime = ktime_get();
147 delta = ktime_sub(rettime, calltime); 148 delta = ktime_sub(rettime, calltime);
148 printk("initcall %lli_%pF returned 0 after %lld usecs\n", entry->cookie, 149 printk("initcall %lli_%pF returned 0 after %lld usecs\n",
149 entry->func, ktime_to_ns(delta) >> 10); 150 (long long)entry->cookie,
151 entry->func,
152 (long long)ktime_to_ns(delta) >> 10);
150 } 153 }
151 154
152 /* 4) remove it from the running queue */ 155 /* 4) remove it from the running queue */
@@ -205,18 +208,44 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct l
205 return newcookie; 208 return newcookie;
206} 209}
207 210
211/**
212 * async_schedule - schedule a function for asynchronous execution
213 * @ptr: function to execute asynchronously
214 * @data: data pointer to pass to the function
215 *
216 * Returns an async_cookie_t that may be used for checkpointing later.
217 * Note: This function may be called from atomic or non-atomic contexts.
218 */
208async_cookie_t async_schedule(async_func_ptr *ptr, void *data) 219async_cookie_t async_schedule(async_func_ptr *ptr, void *data)
209{ 220{
210 return __async_schedule(ptr, data, &async_pending); 221 return __async_schedule(ptr, data, &async_running);
211} 222}
212EXPORT_SYMBOL_GPL(async_schedule); 223EXPORT_SYMBOL_GPL(async_schedule);
213 224
214async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *running) 225/**
226 * async_schedule_domain - schedule a function for asynchronous execution within a certain domain
227 * @ptr: function to execute asynchronously
228 * @data: data pointer to pass to the function
229 * @running: running list for the domain
230 *
231 * Returns an async_cookie_t that may be used for checkpointing later.
232 * @running may be used in the async_synchronize_*_domain() functions
233 * to wait within a certain synchronization domain rather than globally.
234 * A synchronization domain is specified via the running queue @running to use.
235 * Note: This function may be called from atomic or non-atomic contexts.
236 */
237async_cookie_t async_schedule_domain(async_func_ptr *ptr, void *data,
238 struct list_head *running)
215{ 239{
216 return __async_schedule(ptr, data, running); 240 return __async_schedule(ptr, data, running);
217} 241}
218EXPORT_SYMBOL_GPL(async_schedule_special); 242EXPORT_SYMBOL_GPL(async_schedule_domain);
219 243
244/**
245 * async_synchronize_full - synchronize all asynchronous function calls
246 *
247 * This function waits until all asynchronous function calls have been done.
248 */
220void async_synchronize_full(void) 249void async_synchronize_full(void)
221{ 250{
222 do { 251 do {
@@ -225,13 +254,30 @@ void async_synchronize_full(void)
225} 254}
226EXPORT_SYMBOL_GPL(async_synchronize_full); 255EXPORT_SYMBOL_GPL(async_synchronize_full);
227 256
228void async_synchronize_full_special(struct list_head *list) 257/**
258 * async_synchronize_full_domain - synchronize all asynchronous function within a certain domain
259 * @list: running list to synchronize on
260 *
261 * This function waits until all asynchronous function calls for the
262 * synchronization domain specified by the running list @list have been done.
263 */
264void async_synchronize_full_domain(struct list_head *list)
229{ 265{
230 async_synchronize_cookie_special(next_cookie, list); 266 async_synchronize_cookie_domain(next_cookie, list);
231} 267}
232EXPORT_SYMBOL_GPL(async_synchronize_full_special); 268EXPORT_SYMBOL_GPL(async_synchronize_full_domain);
233 269
234void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *running) 270/**
271 * async_synchronize_cookie_domain - synchronize asynchronous function calls within a certain domain with cookie checkpointing
272 * @cookie: async_cookie_t to use as checkpoint
273 * @running: running list to synchronize on
274 *
275 * This function waits until all asynchronous function calls for the
276 * synchronization domain specified by the running list @list submitted
277 * prior to @cookie have been done.
278 */
279void async_synchronize_cookie_domain(async_cookie_t cookie,
280 struct list_head *running)
235{ 281{
236 ktime_t starttime, delta, endtime; 282 ktime_t starttime, delta, endtime;
237 283
@@ -247,14 +293,22 @@ void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *r
247 delta = ktime_sub(endtime, starttime); 293 delta = ktime_sub(endtime, starttime);
248 294
249 printk("async_continuing @ %i after %lli usec\n", 295 printk("async_continuing @ %i after %lli usec\n",
250 task_pid_nr(current), ktime_to_ns(delta) >> 10); 296 task_pid_nr(current),
297 (long long)ktime_to_ns(delta) >> 10);
251 } 298 }
252} 299}
253EXPORT_SYMBOL_GPL(async_synchronize_cookie_special); 300EXPORT_SYMBOL_GPL(async_synchronize_cookie_domain);
254 301
302/**
303 * async_synchronize_cookie - synchronize asynchronous function calls with cookie checkpointing
304 * @cookie: async_cookie_t to use as checkpoint
305 *
306 * This function waits until all asynchronous function calls prior to @cookie
307 * have been done.
308 */
255void async_synchronize_cookie(async_cookie_t cookie) 309void async_synchronize_cookie(async_cookie_t cookie)
256{ 310{
257 async_synchronize_cookie_special(cookie, &async_running); 311 async_synchronize_cookie_domain(cookie, &async_running);
258} 312}
259EXPORT_SYMBOL_GPL(async_synchronize_cookie); 313EXPORT_SYMBOL_GPL(async_synchronize_cookie);
260 314
@@ -315,7 +369,11 @@ static int async_manager_thread(void *unused)
315 ec = atomic_read(&entry_count); 369 ec = atomic_read(&entry_count);
316 370
317 while (tc < ec && tc < MAX_THREADS) { 371 while (tc < ec && tc < MAX_THREADS) {
318 kthread_run(async_thread, NULL, "async/%i", tc); 372 if (IS_ERR(kthread_run(async_thread, NULL, "async/%i",
373 tc))) {
374 msleep(100);
375 continue;
376 }
319 atomic_inc(&thread_count); 377 atomic_inc(&thread_count);
320 tc++; 378 tc++;
321 } 379 }
@@ -330,7 +388,9 @@ static int async_manager_thread(void *unused)
330static int __init async_init(void) 388static int __init async_init(void)
331{ 389{
332 if (async_enabled) 390 if (async_enabled)
333 kthread_run(async_manager_thread, NULL, "async/mgr"); 391 if (IS_ERR(kthread_run(async_manager_thread, NULL,
392 "async/mgr")))
393 async_enabled = 0;
334 return 0; 394 return 0;
335} 395}
336 396
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 5a54ff42874e..9edb5c4b79b4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1122,8 +1122,8 @@ static void cgroup_kill_sb(struct super_block *sb) {
1122 1122
1123 mutex_unlock(&cgroup_mutex); 1123 mutex_unlock(&cgroup_mutex);
1124 1124
1125 kfree(root);
1126 kill_litter_super(sb); 1125 kill_litter_super(sb);
1126 kfree(root);
1127} 1127}
1128 1128
1129static struct file_system_type cgroup_fs_type = { 1129static struct file_system_type cgroup_fs_type = {
@@ -2351,7 +2351,7 @@ static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
2351 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 2351 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2352 struct cgroup_subsys *ss = subsys[i]; 2352 struct cgroup_subsys *ss = subsys[i];
2353 if (ss->root == root) 2353 if (ss->root == root)
2354 mutex_lock_nested(&ss->hierarchy_mutex, i); 2354 mutex_lock(&ss->hierarchy_mutex);
2355 } 2355 }
2356} 2356}
2357 2357
@@ -2637,6 +2637,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
2637 BUG_ON(!list_empty(&init_task.tasks)); 2637 BUG_ON(!list_empty(&init_task.tasks));
2638 2638
2639 mutex_init(&ss->hierarchy_mutex); 2639 mutex_init(&ss->hierarchy_mutex);
2640 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
2640 ss->active = 1; 2641 ss->active = 1;
2641} 2642}
2642 2643
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index a85678865c5e..f76db9dcaa05 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -61,6 +61,14 @@
61#include <linux/cgroup.h> 61#include <linux/cgroup.h>
62 62
63/* 63/*
64 * Workqueue for cpuset related tasks.
65 *
66 * Using kevent workqueue may cause deadlock when memory_migrate
67 * is set. So we create a separate workqueue thread for cpuset.
68 */
69static struct workqueue_struct *cpuset_wq;
70
71/*
64 * Tracks how many cpusets are currently defined in system. 72 * Tracks how many cpusets are currently defined in system.
65 * When there is only one cpuset (the root cpuset) we can 73 * When there is only one cpuset (the root cpuset) we can
66 * short circuit some hooks. 74 * short circuit some hooks.
@@ -831,7 +839,7 @@ static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains);
831 */ 839 */
832static void async_rebuild_sched_domains(void) 840static void async_rebuild_sched_domains(void)
833{ 841{
834 schedule_work(&rebuild_sched_domains_work); 842 queue_work(cpuset_wq, &rebuild_sched_domains_work);
835} 843}
836 844
837/* 845/*
@@ -2111,6 +2119,9 @@ void __init cpuset_init_smp(void)
2111 2119
2112 hotcpu_notifier(cpuset_track_online_cpus, 0); 2120 hotcpu_notifier(cpuset_track_online_cpus, 0);
2113 hotplug_memory_notifier(cpuset_track_online_nodes, 10); 2121 hotplug_memory_notifier(cpuset_track_online_nodes, 10);
2122
2123 cpuset_wq = create_singlethread_workqueue("cpuset");
2124 BUG_ON(!cpuset_wq);
2114} 2125}
2115 2126
2116/** 2127/**
diff --git a/kernel/exit.c b/kernel/exit.c
index f80dec3f1875..efd30ccf3858 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -118,6 +118,8 @@ static void __exit_signal(struct task_struct *tsk)
118 * We won't ever get here for the group leader, since it 118 * We won't ever get here for the group leader, since it
119 * will have been the last reference on the signal_struct. 119 * will have been the last reference on the signal_struct.
120 */ 120 */
121 sig->utime = cputime_add(sig->utime, task_utime(tsk));
122 sig->stime = cputime_add(sig->stime, task_stime(tsk));
121 sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); 123 sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
122 sig->min_flt += tsk->min_flt; 124 sig->min_flt += tsk->min_flt;
123 sig->maj_flt += tsk->maj_flt; 125 sig->maj_flt += tsk->maj_flt;
@@ -126,6 +128,7 @@ static void __exit_signal(struct task_struct *tsk)
126 sig->inblock += task_io_get_inblock(tsk); 128 sig->inblock += task_io_get_inblock(tsk);
127 sig->oublock += task_io_get_oublock(tsk); 129 sig->oublock += task_io_get_oublock(tsk);
128 task_io_accounting_add(&sig->ioac, &tsk->ioac); 130 task_io_accounting_add(&sig->ioac, &tsk->ioac);
131 sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
129 sig = NULL; /* Marker for below. */ 132 sig = NULL; /* Marker for below. */
130 } 133 }
131 134
diff --git a/kernel/fork.c b/kernel/fork.c
index 242a706e7721..4854c2c4a82e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -851,13 +851,14 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
851 sig->tty_old_pgrp = NULL; 851 sig->tty_old_pgrp = NULL;
852 sig->tty = NULL; 852 sig->tty = NULL;
853 853
854 sig->cutime = sig->cstime = cputime_zero; 854 sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
855 sig->gtime = cputime_zero; 855 sig->gtime = cputime_zero;
856 sig->cgtime = cputime_zero; 856 sig->cgtime = cputime_zero;
857 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; 857 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
858 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; 858 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
859 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; 859 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
860 task_io_accounting_init(&sig->ioac); 860 task_io_accounting_init(&sig->ioac);
861 sig->sum_sched_runtime = 0;
861 taskstats_tgid_init(sig); 862 taskstats_tgid_init(sig);
862 863
863 task_lock(current->group_leader); 864 task_lock(current->group_leader);
@@ -1005,6 +1006,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1005 * triggers too late. This doesn't hurt, the check is only there 1006 * triggers too late. This doesn't hurt, the check is only there
1006 * to stop root fork bombs. 1007 * to stop root fork bombs.
1007 */ 1008 */
1009 retval = -EAGAIN;
1008 if (nr_threads >= max_threads) 1010 if (nr_threads >= max_threads)
1009 goto bad_fork_cleanup_count; 1011 goto bad_fork_cleanup_count;
1010 1012
@@ -1093,7 +1095,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1093#ifdef CONFIG_DEBUG_MUTEXES 1095#ifdef CONFIG_DEBUG_MUTEXES
1094 p->blocked_on = NULL; /* not blocked yet */ 1096 p->blocked_on = NULL; /* not blocked yet */
1095#endif 1097#endif
1096 if (unlikely(ptrace_reparented(current))) 1098 if (unlikely(current->ptrace))
1097 ptrace_fork(p, clone_flags); 1099 ptrace_fork(p, clone_flags);
1098 1100
1099 /* Perform scheduler related setup. Assign this task to a CPU. */ 1101 /* Perform scheduler related setup. Assign this task to a CPU. */
@@ -1177,10 +1179,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1177#endif 1179#endif
1178 clear_all_latency_tracing(p); 1180 clear_all_latency_tracing(p);
1179 1181
1180 /* Our parent execution domain becomes current domain
1181 These must match for thread signalling to apply */
1182 p->parent_exec_id = p->self_exec_id;
1183
1184 /* ok, now we should be set up.. */ 1182 /* ok, now we should be set up.. */
1185 p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); 1183 p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
1186 p->pdeath_signal = 0; 1184 p->pdeath_signal = 0;
@@ -1218,10 +1216,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1218 set_task_cpu(p, smp_processor_id()); 1216 set_task_cpu(p, smp_processor_id());
1219 1217
1220 /* CLONE_PARENT re-uses the old parent */ 1218 /* CLONE_PARENT re-uses the old parent */
1221 if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) 1219 if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
1222 p->real_parent = current->real_parent; 1220 p->real_parent = current->real_parent;
1223 else 1221 p->parent_exec_id = current->parent_exec_id;
1222 } else {
1224 p->real_parent = current; 1223 p->real_parent = current;
1224 p->parent_exec_id = current->self_exec_id;
1225 }
1225 1226
1226 spin_lock(&current->sighand->siglock); 1227 spin_lock(&current->sighand->siglock);
1227 1228
diff --git a/kernel/futex.c b/kernel/futex.c
index f89d373a9c6d..438701adce23 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1165,6 +1165,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
1165 u32 val, ktime_t *abs_time, u32 bitset, int clockrt) 1165 u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
1166{ 1166{
1167 struct task_struct *curr = current; 1167 struct task_struct *curr = current;
1168 struct restart_block *restart;
1168 DECLARE_WAITQUEUE(wait, curr); 1169 DECLARE_WAITQUEUE(wait, curr);
1169 struct futex_hash_bucket *hb; 1170 struct futex_hash_bucket *hb;
1170 struct futex_q q; 1171 struct futex_q q;
@@ -1216,11 +1217,13 @@ retry:
1216 1217
1217 if (!ret) 1218 if (!ret)
1218 goto retry; 1219 goto retry;
1219 return ret; 1220 goto out;
1220 } 1221 }
1221 ret = -EWOULDBLOCK; 1222 ret = -EWOULDBLOCK;
1222 if (uval != val) 1223 if (unlikely(uval != val)) {
1223 goto out_unlock_put_key; 1224 queue_unlock(&q, hb);
1225 goto out_put_key;
1226 }
1224 1227
1225 /* Only actually queue if *uaddr contained val. */ 1228 /* Only actually queue if *uaddr contained val. */
1226 queue_me(&q, hb); 1229 queue_me(&q, hb);
@@ -1284,38 +1287,38 @@ retry:
1284 */ 1287 */
1285 1288
1286 /* If we were woken (and unqueued), we succeeded, whatever. */ 1289 /* If we were woken (and unqueued), we succeeded, whatever. */
1290 ret = 0;
1287 if (!unqueue_me(&q)) 1291 if (!unqueue_me(&q))
1288 return 0; 1292 goto out_put_key;
1293 ret = -ETIMEDOUT;
1289 if (rem) 1294 if (rem)
1290 return -ETIMEDOUT; 1295 goto out_put_key;
1291 1296
1292 /* 1297 /*
1293 * We expect signal_pending(current), but another thread may 1298 * We expect signal_pending(current), but another thread may
1294 * have handled it for us already. 1299 * have handled it for us already.
1295 */ 1300 */
1301 ret = -ERESTARTSYS;
1296 if (!abs_time) 1302 if (!abs_time)
1297 return -ERESTARTSYS; 1303 goto out_put_key;
1298 else {
1299 struct restart_block *restart;
1300 restart = &current_thread_info()->restart_block;
1301 restart->fn = futex_wait_restart;
1302 restart->futex.uaddr = (u32 *)uaddr;
1303 restart->futex.val = val;
1304 restart->futex.time = abs_time->tv64;
1305 restart->futex.bitset = bitset;
1306 restart->futex.flags = 0;
1307
1308 if (fshared)
1309 restart->futex.flags |= FLAGS_SHARED;
1310 if (clockrt)
1311 restart->futex.flags |= FLAGS_CLOCKRT;
1312 return -ERESTART_RESTARTBLOCK;
1313 }
1314 1304
1315out_unlock_put_key: 1305 restart = &current_thread_info()->restart_block;
1316 queue_unlock(&q, hb); 1306 restart->fn = futex_wait_restart;
1317 put_futex_key(fshared, &q.key); 1307 restart->futex.uaddr = (u32 *)uaddr;
1308 restart->futex.val = val;
1309 restart->futex.time = abs_time->tv64;
1310 restart->futex.bitset = bitset;
1311 restart->futex.flags = 0;
1312
1313 if (fshared)
1314 restart->futex.flags |= FLAGS_SHARED;
1315 if (clockrt)
1316 restart->futex.flags |= FLAGS_CLOCKRT;
1318 1317
1318 ret = -ERESTART_RESTARTBLOCK;
1319
1320out_put_key:
1321 put_futex_key(fshared, &q.key);
1319out: 1322out:
1320 return ret; 1323 return ret;
1321} 1324}
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index f33afb0407bc..f394d2a42ca3 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -501,6 +501,13 @@ static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base)
501 continue; 501 continue;
502 timer = rb_entry(base->first, struct hrtimer, node); 502 timer = rb_entry(base->first, struct hrtimer, node);
503 expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 503 expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
504 /*
505 * clock_was_set() has changed base->offset so the
506 * result might be negative. Fix it up to prevent a
507 * false positive in clockevents_program_event()
508 */
509 if (expires.tv64 < 0)
510 expires.tv64 = 0;
504 if (expires.tv64 < cpu_base->expires_next.tv64) 511 if (expires.tv64 < cpu_base->expires_next.tv64)
505 cpu_base->expires_next = expires; 512 cpu_base->expires_next = expires;
506 } 513 }
@@ -1158,6 +1165,29 @@ static void __run_hrtimer(struct hrtimer *timer)
1158 1165
1159#ifdef CONFIG_HIGH_RES_TIMERS 1166#ifdef CONFIG_HIGH_RES_TIMERS
1160 1167
1168static int force_clock_reprogram;
1169
1170/*
1171 * After 5 iteration's attempts, we consider that hrtimer_interrupt()
1172 * is hanging, which could happen with something that slows the interrupt
1173 * such as the tracing. Then we force the clock reprogramming for each future
1174 * hrtimer interrupts to avoid infinite loops and use the min_delta_ns
1175 * threshold that we will overwrite.
1176 * The next tick event will be scheduled to 3 times we currently spend on
1177 * hrtimer_interrupt(). This gives a good compromise, the cpus will spend
1178 * 1/4 of their time to process the hrtimer interrupts. This is enough to
1179 * let it running without serious starvation.
1180 */
1181
1182static inline void
1183hrtimer_interrupt_hanging(struct clock_event_device *dev,
1184 ktime_t try_time)
1185{
1186 force_clock_reprogram = 1;
1187 dev->min_delta_ns = (unsigned long)try_time.tv64 * 3;
1188 printk(KERN_WARNING "hrtimer: interrupt too slow, "
1189 "forcing clock min delta to %lu ns\n", dev->min_delta_ns);
1190}
1161/* 1191/*
1162 * High resolution timer interrupt 1192 * High resolution timer interrupt
1163 * Called with interrupts disabled 1193 * Called with interrupts disabled
@@ -1167,6 +1197,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1167 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); 1197 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1168 struct hrtimer_clock_base *base; 1198 struct hrtimer_clock_base *base;
1169 ktime_t expires_next, now; 1199 ktime_t expires_next, now;
1200 int nr_retries = 0;
1170 int i; 1201 int i;
1171 1202
1172 BUG_ON(!cpu_base->hres_active); 1203 BUG_ON(!cpu_base->hres_active);
@@ -1174,6 +1205,10 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1174 dev->next_event.tv64 = KTIME_MAX; 1205 dev->next_event.tv64 = KTIME_MAX;
1175 1206
1176 retry: 1207 retry:
1208 /* 5 retries is enough to notice a hang */
1209 if (!(++nr_retries % 5))
1210 hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now));
1211
1177 now = ktime_get(); 1212 now = ktime_get();
1178 1213
1179 expires_next.tv64 = KTIME_MAX; 1214 expires_next.tv64 = KTIME_MAX;
@@ -1226,7 +1261,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1226 1261
1227 /* Reprogramming necessary ? */ 1262 /* Reprogramming necessary ? */
1228 if (expires_next.tv64 != KTIME_MAX) { 1263 if (expires_next.tv64 != KTIME_MAX) {
1229 if (tick_program_event(expires_next, 0)) 1264 if (tick_program_event(expires_next, force_clock_reprogram))
1230 goto retry; 1265 goto retry;
1231 } 1266 }
1232} 1267}
@@ -1580,6 +1615,10 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
1580 break; 1615 break;
1581 1616
1582#ifdef CONFIG_HOTPLUG_CPU 1617#ifdef CONFIG_HOTPLUG_CPU
1618 case CPU_DYING:
1619 case CPU_DYING_FROZEN:
1620 clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu);
1621 break;
1583 case CPU_DEAD: 1622 case CPU_DEAD:
1584 case CPU_DEAD_FROZEN: 1623 case CPU_DEAD_FROZEN:
1585 { 1624 {
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index f63c706d25e1..7de11bd64dfe 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -383,6 +383,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
383out_unlock: 383out_unlock:
384 spin_unlock(&desc->lock); 384 spin_unlock(&desc->lock);
385} 385}
386EXPORT_SYMBOL_GPL(handle_level_irq);
386 387
387/** 388/**
388 * handle_fasteoi_irq - irq handler for transparent controllers 389 * handle_fasteoi_irq - irq handler for transparent controllers
@@ -593,6 +594,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
593 } 594 }
594 spin_unlock_irqrestore(&desc->lock, flags); 595 spin_unlock_irqrestore(&desc->lock, flags);
595} 596}
597EXPORT_SYMBOL_GPL(__set_irq_handler);
596 598
597void 599void
598set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip, 600set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index ecf765c6a77a..acd88356ac76 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -71,7 +71,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
71 desc = irq_desc_ptrs[irq]; 71 desc = irq_desc_ptrs[irq];
72 72
73 if (desc && old_desc != desc) 73 if (desc && old_desc != desc)
74 goto out_unlock; 74 goto out_unlock;
75 75
76 node = cpu_to_node(cpu); 76 node = cpu_to_node(cpu);
77 desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); 77 desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
@@ -84,10 +84,15 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
84 init_copy_one_irq_desc(irq, old_desc, desc, cpu); 84 init_copy_one_irq_desc(irq, old_desc, desc, cpu);
85 85
86 irq_desc_ptrs[irq] = desc; 86 irq_desc_ptrs[irq] = desc;
87 spin_unlock_irqrestore(&sparse_irq_lock, flags);
87 88
88 /* free the old one */ 89 /* free the old one */
89 free_one_irq_desc(old_desc, desc); 90 free_one_irq_desc(old_desc, desc);
91 spin_unlock(&old_desc->lock);
90 kfree(old_desc); 92 kfree(old_desc);
93 spin_lock(&desc->lock);
94
95 return desc;
91 96
92out_unlock: 97out_unlock:
93 spin_unlock_irqrestore(&sparse_irq_lock, flags); 98 spin_unlock_irqrestore(&sparse_irq_lock, flags);
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 6a5fe93dd8bd..58762f7077ec 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -62,7 +62,7 @@ int do_getitimer(int which, struct itimerval *value)
62 struct task_cputime cputime; 62 struct task_cputime cputime;
63 cputime_t utime; 63 cputime_t utime;
64 64
65 thread_group_cputime(tsk, &cputime); 65 thread_group_cputimer(tsk, &cputime);
66 utime = cputime.utime; 66 utime = cputime.utime;
67 if (cputime_le(cval, utime)) { /* about to fire */ 67 if (cputime_le(cval, utime)) { /* about to fire */
68 cval = jiffies_to_cputime(1); 68 cval = jiffies_to_cputime(1);
@@ -82,7 +82,7 @@ int do_getitimer(int which, struct itimerval *value)
82 struct task_cputime times; 82 struct task_cputime times;
83 cputime_t ptime; 83 cputime_t ptime;
84 84
85 thread_group_cputime(tsk, &times); 85 thread_group_cputimer(tsk, &times);
86 ptime = cputime_add(times.utime, times.stime); 86 ptime = cputime_add(times.utime, times.stime);
87 if (cputime_le(cval, ptime)) { /* about to fire */ 87 if (cputime_le(cval, ptime)) { /* about to fire */
88 cval = jiffies_to_cputime(1); 88 cval = jiffies_to_cputime(1);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 8a6d7b08864e..483899578259 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1465,6 +1465,11 @@ int kernel_kexec(void)
1465 error = device_power_down(PMSG_FREEZE); 1465 error = device_power_down(PMSG_FREEZE);
1466 if (error) 1466 if (error)
1467 goto Enable_irqs; 1467 goto Enable_irqs;
1468
1469 /* Suspend system devices */
1470 error = sysdev_suspend(PMSG_FREEZE);
1471 if (error)
1472 goto Power_up_devices;
1468 } else 1473 } else
1469#endif 1474#endif
1470 { 1475 {
@@ -1477,6 +1482,8 @@ int kernel_kexec(void)
1477 1482
1478#ifdef CONFIG_KEXEC_JUMP 1483#ifdef CONFIG_KEXEC_JUMP
1479 if (kexec_image->preserve_context) { 1484 if (kexec_image->preserve_context) {
1485 sysdev_resume();
1486 Power_up_devices:
1480 device_power_up(PMSG_RESTORE); 1487 device_power_up(PMSG_RESTORE);
1481 Enable_irqs: 1488 Enable_irqs:
1482 local_irq_enable(); 1489 local_irq_enable();
diff --git a/kernel/module.c b/kernel/module.c
index e8b51d41dd72..1196f5d11700 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -573,13 +573,13 @@ static char last_unloaded_module[MODULE_NAME_LEN+1];
573/* Init the unload section of the module. */ 573/* Init the unload section of the module. */
574static void module_unload_init(struct module *mod) 574static void module_unload_init(struct module *mod)
575{ 575{
576 unsigned int i; 576 int cpu;
577 577
578 INIT_LIST_HEAD(&mod->modules_which_use_me); 578 INIT_LIST_HEAD(&mod->modules_which_use_me);
579 for (i = 0; i < NR_CPUS; i++) 579 for_each_possible_cpu(cpu)
580 local_set(&mod->ref[i].count, 0); 580 local_set(__module_ref_addr(mod, cpu), 0);
581 /* Hold reference count during initialization. */ 581 /* Hold reference count during initialization. */
582 local_set(&mod->ref[raw_smp_processor_id()].count, 1); 582 local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1);
583 /* Backwards compatibility macros put refcount during init. */ 583 /* Backwards compatibility macros put refcount during init. */
584 mod->waiter = current; 584 mod->waiter = current;
585} 585}
@@ -717,10 +717,11 @@ static int try_stop_module(struct module *mod, int flags, int *forced)
717 717
718unsigned int module_refcount(struct module *mod) 718unsigned int module_refcount(struct module *mod)
719{ 719{
720 unsigned int i, total = 0; 720 unsigned int total = 0;
721 int cpu;
721 722
722 for (i = 0; i < NR_CPUS; i++) 723 for_each_possible_cpu(cpu)
723 total += local_read(&mod->ref[i].count); 724 total += local_read(__module_ref_addr(mod, cpu));
724 return total; 725 return total;
725} 726}
726EXPORT_SYMBOL(module_refcount); 727EXPORT_SYMBOL(module_refcount);
@@ -894,7 +895,7 @@ void module_put(struct module *module)
894{ 895{
895 if (module) { 896 if (module) {
896 unsigned int cpu = get_cpu(); 897 unsigned int cpu = get_cpu();
897 local_dec(&module->ref[cpu].count); 898 local_dec(__module_ref_addr(module, cpu));
898 /* Maybe they're waiting for us to drop reference? */ 899 /* Maybe they're waiting for us to drop reference? */
899 if (unlikely(!module_is_live(module))) 900 if (unlikely(!module_is_live(module)))
900 wake_up_process(module->waiter); 901 wake_up_process(module->waiter);
@@ -1464,7 +1465,10 @@ static void free_module(struct module *mod)
1464 kfree(mod->args); 1465 kfree(mod->args);
1465 if (mod->percpu) 1466 if (mod->percpu)
1466 percpu_modfree(mod->percpu); 1467 percpu_modfree(mod->percpu);
1467 1468#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
1469 if (mod->refptr)
1470 percpu_modfree(mod->refptr);
1471#endif
1468 /* Free lock-classes: */ 1472 /* Free lock-classes: */
1469 lockdep_free_key_range(mod->module_core, mod->core_size); 1473 lockdep_free_key_range(mod->module_core, mod->core_size);
1470 1474
@@ -2070,6 +2074,14 @@ static noinline struct module *load_module(void __user *umod,
2070 /* Module has been moved. */ 2074 /* Module has been moved. */
2071 mod = (void *)sechdrs[modindex].sh_addr; 2075 mod = (void *)sechdrs[modindex].sh_addr;
2072 2076
2077#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
2078 mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t),
2079 mod->name);
2080 if (!mod->refptr) {
2081 err = -ENOMEM;
2082 goto free_init;
2083 }
2084#endif
2073 /* Now we've moved module, initialize linked lists, etc. */ 2085 /* Now we've moved module, initialize linked lists, etc. */
2074 module_unload_init(mod); 2086 module_unload_init(mod);
2075 2087
@@ -2276,9 +2288,14 @@ static noinline struct module *load_module(void __user *umod,
2276 ftrace_release(mod->module_core, mod->core_size); 2288 ftrace_release(mod->module_core, mod->core_size);
2277 free_unload: 2289 free_unload:
2278 module_unload_free(mod); 2290 module_unload_free(mod);
2291 free_init:
2292#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
2293 percpu_modfree(mod->refptr);
2294#endif
2279 module_free(mod, mod->module_init); 2295 module_free(mod, mod->module_init);
2280 free_core: 2296 free_core:
2281 module_free(mod, mod->module_core); 2297 module_free(mod, mod->module_core);
2298 /* mod will be freed with core. Don't access it beyond this line! */
2282 free_percpu: 2299 free_percpu:
2283 if (percpu) 2300 if (percpu)
2284 percpu_modfree(percpu); 2301 percpu_modfree(percpu);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index fa07da94d7be..e976e505648d 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -230,6 +230,71 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
230 return 0; 230 return 0;
231} 231}
232 232
233void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
234{
235 struct sighand_struct *sighand;
236 struct signal_struct *sig;
237 struct task_struct *t;
238
239 *times = INIT_CPUTIME;
240
241 rcu_read_lock();
242 sighand = rcu_dereference(tsk->sighand);
243 if (!sighand)
244 goto out;
245
246 sig = tsk->signal;
247
248 t = tsk;
249 do {
250 times->utime = cputime_add(times->utime, t->utime);
251 times->stime = cputime_add(times->stime, t->stime);
252 times->sum_exec_runtime += t->se.sum_exec_runtime;
253
254 t = next_thread(t);
255 } while (t != tsk);
256
257 times->utime = cputime_add(times->utime, sig->utime);
258 times->stime = cputime_add(times->stime, sig->stime);
259 times->sum_exec_runtime += sig->sum_sched_runtime;
260out:
261 rcu_read_unlock();
262}
263
264static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
265{
266 if (cputime_gt(b->utime, a->utime))
267 a->utime = b->utime;
268
269 if (cputime_gt(b->stime, a->stime))
270 a->stime = b->stime;
271
272 if (b->sum_exec_runtime > a->sum_exec_runtime)
273 a->sum_exec_runtime = b->sum_exec_runtime;
274}
275
276void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
277{
278 struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
279 struct task_cputime sum;
280 unsigned long flags;
281
282 spin_lock_irqsave(&cputimer->lock, flags);
283 if (!cputimer->running) {
284 cputimer->running = 1;
285 /*
286 * The POSIX timer interface allows for absolute time expiry
287 * values through the TIMER_ABSTIME flag, therefore we have
288 * to synchronize the timer to the clock every time we start
289 * it.
290 */
291 thread_group_cputime(tsk, &sum);
292 update_gt_cputime(&cputimer->cputime, &sum);
293 }
294 *times = cputimer->cputime;
295 spin_unlock_irqrestore(&cputimer->lock, flags);
296}
297
233/* 298/*
234 * Sample a process (thread group) clock for the given group_leader task. 299 * Sample a process (thread group) clock for the given group_leader task.
235 * Must be called with tasklist_lock held for reading. 300 * Must be called with tasklist_lock held for reading.
@@ -457,7 +522,7 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
457{ 522{
458 struct task_cputime cputime; 523 struct task_cputime cputime;
459 524
460 thread_group_cputime(tsk, &cputime); 525 thread_group_cputimer(tsk, &cputime);
461 cleanup_timers(tsk->signal->cpu_timers, 526 cleanup_timers(tsk->signal->cpu_timers,
462 cputime.utime, cputime.stime, cputime.sum_exec_runtime); 527 cputime.utime, cputime.stime, cputime.sum_exec_runtime);
463} 528}
@@ -616,6 +681,33 @@ static void cpu_timer_fire(struct k_itimer *timer)
616} 681}
617 682
618/* 683/*
684 * Sample a process (thread group) timer for the given group_leader task.
685 * Must be called with tasklist_lock held for reading.
686 */
687static int cpu_timer_sample_group(const clockid_t which_clock,
688 struct task_struct *p,
689 union cpu_time_count *cpu)
690{
691 struct task_cputime cputime;
692
693 thread_group_cputimer(p, &cputime);
694 switch (CPUCLOCK_WHICH(which_clock)) {
695 default:
696 return -EINVAL;
697 case CPUCLOCK_PROF:
698 cpu->cpu = cputime_add(cputime.utime, cputime.stime);
699 break;
700 case CPUCLOCK_VIRT:
701 cpu->cpu = cputime.utime;
702 break;
703 case CPUCLOCK_SCHED:
704 cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
705 break;
706 }
707 return 0;
708}
709
710/*
619 * Guts of sys_timer_settime for CPU timers. 711 * Guts of sys_timer_settime for CPU timers.
620 * This is called with the timer locked and interrupts disabled. 712 * This is called with the timer locked and interrupts disabled.
621 * If we return TIMER_RETRY, it's necessary to release the timer's lock 713 * If we return TIMER_RETRY, it's necessary to release the timer's lock
@@ -676,7 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
676 if (CPUCLOCK_PERTHREAD(timer->it_clock)) { 768 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
677 cpu_clock_sample(timer->it_clock, p, &val); 769 cpu_clock_sample(timer->it_clock, p, &val);
678 } else { 770 } else {
679 cpu_clock_sample_group(timer->it_clock, p, &val); 771 cpu_timer_sample_group(timer->it_clock, p, &val);
680 } 772 }
681 773
682 if (old) { 774 if (old) {
@@ -824,7 +916,7 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
824 read_unlock(&tasklist_lock); 916 read_unlock(&tasklist_lock);
825 goto dead; 917 goto dead;
826 } else { 918 } else {
827 cpu_clock_sample_group(timer->it_clock, p, &now); 919 cpu_timer_sample_group(timer->it_clock, p, &now);
828 clear_dead = (unlikely(p->exit_state) && 920 clear_dead = (unlikely(p->exit_state) &&
829 thread_group_empty(p)); 921 thread_group_empty(p));
830 } 922 }
@@ -964,6 +1056,19 @@ static void check_thread_timers(struct task_struct *tsk,
964 } 1056 }
965} 1057}
966 1058
1059static void stop_process_timers(struct task_struct *tsk)
1060{
1061 struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
1062 unsigned long flags;
1063
1064 if (!cputimer->running)
1065 return;
1066
1067 spin_lock_irqsave(&cputimer->lock, flags);
1068 cputimer->running = 0;
1069 spin_unlock_irqrestore(&cputimer->lock, flags);
1070}
1071
967/* 1072/*
968 * Check for any per-thread CPU timers that have fired and move them 1073 * Check for any per-thread CPU timers that have fired and move them
969 * off the tsk->*_timers list onto the firing list. Per-thread timers 1074 * off the tsk->*_timers list onto the firing list. Per-thread timers
@@ -987,13 +1092,15 @@ static void check_process_timers(struct task_struct *tsk,
987 sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY && 1092 sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
988 list_empty(&timers[CPUCLOCK_VIRT]) && 1093 list_empty(&timers[CPUCLOCK_VIRT]) &&
989 cputime_eq(sig->it_virt_expires, cputime_zero) && 1094 cputime_eq(sig->it_virt_expires, cputime_zero) &&
990 list_empty(&timers[CPUCLOCK_SCHED])) 1095 list_empty(&timers[CPUCLOCK_SCHED])) {
1096 stop_process_timers(tsk);
991 return; 1097 return;
1098 }
992 1099
993 /* 1100 /*
994 * Collect the current process totals. 1101 * Collect the current process totals.
995 */ 1102 */
996 thread_group_cputime(tsk, &cputime); 1103 thread_group_cputimer(tsk, &cputime);
997 utime = cputime.utime; 1104 utime = cputime.utime;
998 ptime = cputime_add(utime, cputime.stime); 1105 ptime = cputime_add(utime, cputime.stime);
999 sum_sched_runtime = cputime.sum_exec_runtime; 1106 sum_sched_runtime = cputime.sum_exec_runtime;
@@ -1164,7 +1271,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
1164 clear_dead_task(timer, now); 1271 clear_dead_task(timer, now);
1165 goto out_unlock; 1272 goto out_unlock;
1166 } 1273 }
1167 cpu_clock_sample_group(timer->it_clock, p, &now); 1274 cpu_timer_sample_group(timer->it_clock, p, &now);
1168 bump_cpu_timer(timer, now); 1275 bump_cpu_timer(timer, now);
1169 /* Leave the tasklist_lock locked for the call below. */ 1276 /* Leave the tasklist_lock locked for the call below. */
1170 } 1277 }
@@ -1259,7 +1366,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
1259 if (!task_cputime_zero(&sig->cputime_expires)) { 1366 if (!task_cputime_zero(&sig->cputime_expires)) {
1260 struct task_cputime group_sample; 1367 struct task_cputime group_sample;
1261 1368
1262 thread_group_cputime(tsk, &group_sample); 1369 thread_group_cputimer(tsk, &group_sample);
1263 if (task_cputime_expired(&group_sample, &sig->cputime_expires)) 1370 if (task_cputime_expired(&group_sample, &sig->cputime_expires))
1264 return 1; 1371 return 1;
1265 } 1372 }
@@ -1341,7 +1448,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1341 struct list_head *head; 1448 struct list_head *head;
1342 1449
1343 BUG_ON(clock_idx == CPUCLOCK_SCHED); 1450 BUG_ON(clock_idx == CPUCLOCK_SCHED);
1344 cpu_clock_sample_group(clock_idx, tsk, &now); 1451 cpu_timer_sample_group(clock_idx, tsk, &now);
1345 1452
1346 if (oldval) { 1453 if (oldval) {
1347 if (!cputime_eq(*oldval, cputime_zero)) { 1454 if (!cputime_eq(*oldval, cputime_zero)) {
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index d7a10167a25b..720ea4f781bd 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -3,7 +3,7 @@ ifeq ($(CONFIG_PM_DEBUG),y)
3EXTRA_CFLAGS += -DDEBUG 3EXTRA_CFLAGS += -DDEBUG
4endif 4endif
5 5
6obj-y := main.o 6obj-$(CONFIG_PM) += main.o
7obj-$(CONFIG_PM_SLEEP) += console.o 7obj-$(CONFIG_PM_SLEEP) += console.o
8obj-$(CONFIG_FREEZER) += process.o 8obj-$(CONFIG_FREEZER) += process.o
9obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o 9obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o
diff --git a/kernel/power/console.c b/kernel/power/console.c
index b8628be2a465..a3961b205de7 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -78,6 +78,12 @@ void pm_restore_console(void)
78 } 78 }
79 set_console(orig_fgconsole); 79 set_console(orig_fgconsole);
80 release_console_sem(); 80 release_console_sem();
81
82 if (vt_waitactive(orig_fgconsole)) {
83 pr_debug("Resume: Can't switch VCs.");
84 return;
85 }
86
81 kmsg_redirect = orig_kmsg; 87 kmsg_redirect = orig_kmsg;
82} 88}
83#endif 89#endif
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 432ee575c9ee..4a4a206b1979 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -227,6 +227,12 @@ static int create_image(int platform_mode)
227 "aborting hibernation\n"); 227 "aborting hibernation\n");
228 goto Enable_irqs; 228 goto Enable_irqs;
229 } 229 }
230 sysdev_suspend(PMSG_FREEZE);
231 if (error) {
232 printk(KERN_ERR "PM: Some devices failed to power down, "
233 "aborting hibernation\n");
234 goto Power_up_devices;
235 }
230 236
231 if (hibernation_test(TEST_CORE)) 237 if (hibernation_test(TEST_CORE))
232 goto Power_up; 238 goto Power_up;
@@ -242,9 +248,11 @@ static int create_image(int platform_mode)
242 if (!in_suspend) 248 if (!in_suspend)
243 platform_leave(platform_mode); 249 platform_leave(platform_mode);
244 Power_up: 250 Power_up:
251 sysdev_resume();
245 /* NOTE: device_power_up() is just a resume() for devices 252 /* NOTE: device_power_up() is just a resume() for devices
246 * that suspended with irqs off ... no overall powerup. 253 * that suspended with irqs off ... no overall powerup.
247 */ 254 */
255 Power_up_devices:
248 device_power_up(in_suspend ? 256 device_power_up(in_suspend ?
249 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); 257 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
250 Enable_irqs: 258 Enable_irqs:
@@ -335,6 +343,7 @@ static int resume_target_kernel(void)
335 "aborting resume\n"); 343 "aborting resume\n");
336 goto Enable_irqs; 344 goto Enable_irqs;
337 } 345 }
346 sysdev_suspend(PMSG_QUIESCE);
338 /* We'll ignore saved state, but this gets preempt count (etc) right */ 347 /* We'll ignore saved state, but this gets preempt count (etc) right */
339 save_processor_state(); 348 save_processor_state();
340 error = restore_highmem(); 349 error = restore_highmem();
@@ -357,6 +366,7 @@ static int resume_target_kernel(void)
357 swsusp_free(); 366 swsusp_free();
358 restore_processor_state(); 367 restore_processor_state();
359 touch_softlockup_watchdog(); 368 touch_softlockup_watchdog();
369 sysdev_resume();
360 device_power_up(PMSG_RECOVER); 370 device_power_up(PMSG_RECOVER);
361 Enable_irqs: 371 Enable_irqs:
362 local_irq_enable(); 372 local_irq_enable();
@@ -440,6 +450,7 @@ int hibernation_platform_enter(void)
440 local_irq_disable(); 450 local_irq_disable();
441 error = device_power_down(PMSG_HIBERNATE); 451 error = device_power_down(PMSG_HIBERNATE);
442 if (!error) { 452 if (!error) {
453 sysdev_suspend(PMSG_HIBERNATE);
443 hibernation_ops->enter(); 454 hibernation_ops->enter();
444 /* We should never get here */ 455 /* We should never get here */
445 while (1); 456 while (1);
@@ -595,6 +606,12 @@ static int software_resume(void)
595 unsigned int flags; 606 unsigned int flags;
596 607
597 /* 608 /*
609 * If the user said "noresume".. bail out early.
610 */
611 if (noresume)
612 return 0;
613
614 /*
598 * name_to_dev_t() below takes a sysfs buffer mutex when sysfs 615 * name_to_dev_t() below takes a sysfs buffer mutex when sysfs
599 * is configured into the kernel. Since the regular hibernate 616 * is configured into the kernel. Since the regular hibernate
600 * trigger path is via sysfs which takes a buffer mutex before 617 * trigger path is via sysfs which takes a buffer mutex before
@@ -610,6 +627,11 @@ static int software_resume(void)
610 mutex_unlock(&pm_mutex); 627 mutex_unlock(&pm_mutex);
611 return -ENOENT; 628 return -ENOENT;
612 } 629 }
630 /*
631 * Some device discovery might still be in progress; we need
632 * to wait for this to finish.
633 */
634 wait_for_device_probe();
613 swsusp_resume_device = name_to_dev_t(resume_file); 635 swsusp_resume_device = name_to_dev_t(resume_file);
614 pr_debug("PM: Resume from partition %s\n", resume_file); 636 pr_debug("PM: Resume from partition %s\n", resume_file);
615 } else { 637 } else {
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 239988873971..c9632f841f64 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -57,16 +57,6 @@ int pm_notifier_call_chain(unsigned long val)
57#ifdef CONFIG_PM_DEBUG 57#ifdef CONFIG_PM_DEBUG
58int pm_test_level = TEST_NONE; 58int pm_test_level = TEST_NONE;
59 59
60static int suspend_test(int level)
61{
62 if (pm_test_level == level) {
63 printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n");
64 mdelay(5000);
65 return 1;
66 }
67 return 0;
68}
69
70static const char * const pm_tests[__TEST_AFTER_LAST] = { 60static const char * const pm_tests[__TEST_AFTER_LAST] = {
71 [TEST_NONE] = "none", 61 [TEST_NONE] = "none",
72 [TEST_CORE] = "core", 62 [TEST_CORE] = "core",
@@ -125,14 +115,24 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
125} 115}
126 116
127power_attr(pm_test); 117power_attr(pm_test);
128#else /* !CONFIG_PM_DEBUG */ 118#endif /* CONFIG_PM_DEBUG */
129static inline int suspend_test(int level) { return 0; }
130#endif /* !CONFIG_PM_DEBUG */
131 119
132#endif /* CONFIG_PM_SLEEP */ 120#endif /* CONFIG_PM_SLEEP */
133 121
134#ifdef CONFIG_SUSPEND 122#ifdef CONFIG_SUSPEND
135 123
124static int suspend_test(int level)
125{
126#ifdef CONFIG_PM_DEBUG
127 if (pm_test_level == level) {
128 printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n");
129 mdelay(5000);
130 return 1;
131 }
132#endif /* !CONFIG_PM_DEBUG */
133 return 0;
134}
135
136#ifdef CONFIG_PM_TEST_SUSPEND 136#ifdef CONFIG_PM_TEST_SUSPEND
137 137
138/* 138/*
@@ -298,8 +298,12 @@ static int suspend_enter(suspend_state_t state)
298 goto Done; 298 goto Done;
299 } 299 }
300 300
301 if (!suspend_test(TEST_CORE)) 301 error = sysdev_suspend(PMSG_SUSPEND);
302 error = suspend_ops->enter(state); 302 if (!error) {
303 if (!suspend_test(TEST_CORE))
304 error = suspend_ops->enter(state);
305 sysdev_resume();
306 }
303 307
304 device_power_up(PMSG_RESUME); 308 device_power_up(PMSG_RESUME);
305 Done: 309 Done:
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 6da14358537c..505f319e489c 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -60,6 +60,7 @@ static struct block_device *resume_bdev;
60static int submit(int rw, pgoff_t page_off, struct page *page, 60static int submit(int rw, pgoff_t page_off, struct page *page,
61 struct bio **bio_chain) 61 struct bio **bio_chain)
62{ 62{
63 const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
63 struct bio *bio; 64 struct bio *bio;
64 65
65 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); 66 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
@@ -80,7 +81,7 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
80 bio_get(bio); 81 bio_get(bio);
81 82
82 if (bio_chain == NULL) { 83 if (bio_chain == NULL) {
83 submit_bio(rw | (1 << BIO_RW_SYNC), bio); 84 submit_bio(bio_rw, bio);
84 wait_on_page_locked(page); 85 wait_on_page_locked(page);
85 if (rw == READ) 86 if (rw == READ)
86 bio_set_pages_dirty(bio); 87 bio_set_pages_dirty(bio);
@@ -90,7 +91,7 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
90 get_page(page); /* These pages are freed later */ 91 get_page(page); /* These pages are freed later */
91 bio->bi_private = *bio_chain; 92 bio->bi_private = *bio_chain;
92 *bio_chain = bio; 93 *bio_chain = bio;
93 submit_bio(rw | (1 << BIO_RW_SYNC), bio); 94 submit_bio(bio_rw, bio);
94 } 95 }
95 return 0; 96 return 0;
96} 97}
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 005b93d839ba..6c85359364f2 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -95,15 +95,15 @@ static int snapshot_open(struct inode *inode, struct file *filp)
95 data->swap = swsusp_resume_device ? 95 data->swap = swsusp_resume_device ?
96 swap_type_of(swsusp_resume_device, 0, NULL) : -1; 96 swap_type_of(swsusp_resume_device, 0, NULL) : -1;
97 data->mode = O_RDONLY; 97 data->mode = O_RDONLY;
98 error = pm_notifier_call_chain(PM_RESTORE_PREPARE); 98 error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
99 if (error) 99 if (error)
100 pm_notifier_call_chain(PM_POST_RESTORE); 100 pm_notifier_call_chain(PM_POST_HIBERNATION);
101 } else { 101 } else {
102 data->swap = -1; 102 data->swap = -1;
103 data->mode = O_WRONLY; 103 data->mode = O_WRONLY;
104 error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); 104 error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
105 if (error) 105 if (error)
106 pm_notifier_call_chain(PM_POST_HIBERNATION); 106 pm_notifier_call_chain(PM_POST_RESTORE);
107 } 107 }
108 if (error) 108 if (error)
109 atomic_inc(&snapshot_device_available); 109 atomic_inc(&snapshot_device_available);
diff --git a/kernel/printk.c b/kernel/printk.c
index 69188f226a93..e3602d0755b0 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -73,7 +73,6 @@ EXPORT_SYMBOL(oops_in_progress);
73 * driver system. 73 * driver system.
74 */ 74 */
75static DECLARE_MUTEX(console_sem); 75static DECLARE_MUTEX(console_sem);
76static DECLARE_MUTEX(secondary_console_sem);
77struct console *console_drivers; 76struct console *console_drivers;
78EXPORT_SYMBOL_GPL(console_drivers); 77EXPORT_SYMBOL_GPL(console_drivers);
79 78
@@ -891,12 +890,14 @@ void suspend_console(void)
891 printk("Suspending console(s) (use no_console_suspend to debug)\n"); 890 printk("Suspending console(s) (use no_console_suspend to debug)\n");
892 acquire_console_sem(); 891 acquire_console_sem();
893 console_suspended = 1; 892 console_suspended = 1;
893 up(&console_sem);
894} 894}
895 895
896void resume_console(void) 896void resume_console(void)
897{ 897{
898 if (!console_suspend_enabled) 898 if (!console_suspend_enabled)
899 return; 899 return;
900 down(&console_sem);
900 console_suspended = 0; 901 console_suspended = 0;
901 release_console_sem(); 902 release_console_sem();
902} 903}
@@ -912,11 +913,9 @@ void resume_console(void)
912void acquire_console_sem(void) 913void acquire_console_sem(void)
913{ 914{
914 BUG_ON(in_interrupt()); 915 BUG_ON(in_interrupt());
915 if (console_suspended) {
916 down(&secondary_console_sem);
917 return;
918 }
919 down(&console_sem); 916 down(&console_sem);
917 if (console_suspended)
918 return;
920 console_locked = 1; 919 console_locked = 1;
921 console_may_schedule = 1; 920 console_may_schedule = 1;
922} 921}
@@ -926,6 +925,10 @@ int try_acquire_console_sem(void)
926{ 925{
927 if (down_trylock(&console_sem)) 926 if (down_trylock(&console_sem))
928 return -1; 927 return -1;
928 if (console_suspended) {
929 up(&console_sem);
930 return -1;
931 }
929 console_locked = 1; 932 console_locked = 1;
930 console_may_schedule = 0; 933 console_may_schedule = 0;
931 return 0; 934 return 0;
@@ -979,7 +982,7 @@ void release_console_sem(void)
979 unsigned wake_klogd = 0; 982 unsigned wake_klogd = 0;
980 983
981 if (console_suspended) { 984 if (console_suspended) {
982 up(&secondary_console_sem); 985 up(&console_sem);
983 return; 986 return;
984 } 987 }
985 988
diff --git a/kernel/profile.c b/kernel/profile.c
index 784933acf5b8..7724e0409bae 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -114,12 +114,15 @@ int __ref profile_init(void)
114 if (!slab_is_available()) { 114 if (!slab_is_available()) {
115 prof_buffer = alloc_bootmem(buffer_bytes); 115 prof_buffer = alloc_bootmem(buffer_bytes);
116 alloc_bootmem_cpumask_var(&prof_cpu_mask); 116 alloc_bootmem_cpumask_var(&prof_cpu_mask);
117 cpumask_copy(prof_cpu_mask, cpu_possible_mask);
117 return 0; 118 return 0;
118 } 119 }
119 120
120 if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL)) 121 if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
121 return -ENOMEM; 122 return -ENOMEM;
122 123
124 cpumask_copy(prof_cpu_mask, cpu_possible_mask);
125
123 prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL); 126 prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL);
124 if (prof_buffer) 127 if (prof_buffer)
125 return 0; 128 return 0;
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index bd5a9003497c..654c640a6b9c 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -679,8 +679,8 @@ int rcu_needs_cpu(int cpu)
679void rcu_check_callbacks(int cpu, int user) 679void rcu_check_callbacks(int cpu, int user)
680{ 680{
681 if (user || 681 if (user ||
682 (idle_cpu(cpu) && !in_softirq() && 682 (idle_cpu(cpu) && rcu_scheduler_active &&
683 hardirq_count() <= (1 << HARDIRQ_SHIFT))) { 683 !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
684 684
685 /* 685 /*
686 * Get here if this CPU took its interrupt from user 686 * Get here if this CPU took its interrupt from user
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index d92a76a881aa..cae8a059cf47 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -44,6 +44,7 @@
44#include <linux/cpu.h> 44#include <linux/cpu.h>
45#include <linux/mutex.h> 45#include <linux/mutex.h>
46#include <linux/module.h> 46#include <linux/module.h>
47#include <linux/kernel_stat.h>
47 48
48enum rcu_barrier { 49enum rcu_barrier {
49 RCU_BARRIER_STD, 50 RCU_BARRIER_STD,
@@ -55,6 +56,7 @@ static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
55static atomic_t rcu_barrier_cpu_count; 56static atomic_t rcu_barrier_cpu_count;
56static DEFINE_MUTEX(rcu_barrier_mutex); 57static DEFINE_MUTEX(rcu_barrier_mutex);
57static struct completion rcu_barrier_completion; 58static struct completion rcu_barrier_completion;
59int rcu_scheduler_active __read_mostly;
58 60
59/* 61/*
60 * Awaken the corresponding synchronize_rcu() instance now that a 62 * Awaken the corresponding synchronize_rcu() instance now that a
@@ -80,6 +82,10 @@ void wakeme_after_rcu(struct rcu_head *head)
80void synchronize_rcu(void) 82void synchronize_rcu(void)
81{ 83{
82 struct rcu_synchronize rcu; 84 struct rcu_synchronize rcu;
85
86 if (rcu_blocking_is_gp())
87 return;
88
83 init_completion(&rcu.completion); 89 init_completion(&rcu.completion);
84 /* Will wake me after RCU finished. */ 90 /* Will wake me after RCU finished. */
85 call_rcu(&rcu.head, wakeme_after_rcu); 91 call_rcu(&rcu.head, wakeme_after_rcu);
@@ -175,3 +181,9 @@ void __init rcu_init(void)
175 __rcu_init(); 181 __rcu_init();
176} 182}
177 183
184void rcu_scheduler_starting(void)
185{
186 WARN_ON(num_online_cpus() != 1);
187 WARN_ON(nr_context_switches() > 0);
188 rcu_scheduler_active = 1;
189}
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 33cfc50781f9..5d59e850fb71 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -1181,6 +1181,9 @@ void __synchronize_sched(void)
1181{ 1181{
1182 struct rcu_synchronize rcu; 1182 struct rcu_synchronize rcu;
1183 1183
1184 if (num_online_cpus() == 1)
1185 return; /* blocking is gp if only one CPU! */
1186
1184 init_completion(&rcu.completion); 1187 init_completion(&rcu.completion);
1185 /* Will wake me after RCU finished. */ 1188 /* Will wake me after RCU finished. */
1186 call_rcu_sched(&rcu.head, wakeme_after_rcu); 1189 call_rcu_sched(&rcu.head, wakeme_after_rcu);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index b2fd602a6f6f..97ce31579ec0 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -948,8 +948,8 @@ static void rcu_do_batch(struct rcu_data *rdp)
948void rcu_check_callbacks(int cpu, int user) 948void rcu_check_callbacks(int cpu, int user)
949{ 949{
950 if (user || 950 if (user ||
951 (idle_cpu(cpu) && !in_softirq() && 951 (idle_cpu(cpu) && rcu_scheduler_active &&
952 hardirq_count() <= (1 << HARDIRQ_SHIFT))) { 952 !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
953 953
954 /* 954 /*
955 * Get here if this CPU took its interrupt from user 955 * Get here if this CPU took its interrupt from user
diff --git a/kernel/sched.c b/kernel/sched.c
index 52bbf1c842a8..8e2558c2ba67 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -223,7 +223,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
223{ 223{
224 ktime_t now; 224 ktime_t now;
225 225
226 if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF) 226 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
227 return; 227 return;
228 228
229 if (hrtimer_active(&rt_b->rt_period_timer)) 229 if (hrtimer_active(&rt_b->rt_period_timer))
@@ -3880,19 +3880,24 @@ int select_nohz_load_balancer(int stop_tick)
3880 int cpu = smp_processor_id(); 3880 int cpu = smp_processor_id();
3881 3881
3882 if (stop_tick) { 3882 if (stop_tick) {
3883 cpumask_set_cpu(cpu, nohz.cpu_mask);
3884 cpu_rq(cpu)->in_nohz_recently = 1; 3883 cpu_rq(cpu)->in_nohz_recently = 1;
3885 3884
3886 /* 3885 if (!cpu_active(cpu)) {
3887 * If we are going offline and still the leader, give up! 3886 if (atomic_read(&nohz.load_balancer) != cpu)
3888 */ 3887 return 0;
3889 if (!cpu_active(cpu) && 3888
3890 atomic_read(&nohz.load_balancer) == cpu) { 3889 /*
3890 * If we are going offline and still the leader,
3891 * give up!
3892 */
3891 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) 3893 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
3892 BUG(); 3894 BUG();
3895
3893 return 0; 3896 return 0;
3894 } 3897 }
3895 3898
3899 cpumask_set_cpu(cpu, nohz.cpu_mask);
3900
3896 /* time for ilb owner also to sleep */ 3901 /* time for ilb owner also to sleep */
3897 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { 3902 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
3898 if (atomic_read(&nohz.load_balancer) == cpu) 3903 if (atomic_read(&nohz.load_balancer) == cpu)
@@ -4687,8 +4692,8 @@ EXPORT_SYMBOL(default_wake_function);
4687 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns 4692 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
4688 * zero in this (rare) case, and we handle it by continuing to scan the queue. 4693 * zero in this (rare) case, and we handle it by continuing to scan the queue.
4689 */ 4694 */
4690static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, 4695void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
4691 int nr_exclusive, int sync, void *key) 4696 int nr_exclusive, int sync, void *key)
4692{ 4697{
4693 wait_queue_t *curr, *next; 4698 wait_queue_t *curr, *next;
4694 4699
@@ -6939,20 +6944,26 @@ static void free_rootdomain(struct root_domain *rd)
6939 6944
6940static void rq_attach_root(struct rq *rq, struct root_domain *rd) 6945static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6941{ 6946{
6947 struct root_domain *old_rd = NULL;
6942 unsigned long flags; 6948 unsigned long flags;
6943 6949
6944 spin_lock_irqsave(&rq->lock, flags); 6950 spin_lock_irqsave(&rq->lock, flags);
6945 6951
6946 if (rq->rd) { 6952 if (rq->rd) {
6947 struct root_domain *old_rd = rq->rd; 6953 old_rd = rq->rd;
6948 6954
6949 if (cpumask_test_cpu(rq->cpu, old_rd->online)) 6955 if (cpumask_test_cpu(rq->cpu, old_rd->online))
6950 set_rq_offline(rq); 6956 set_rq_offline(rq);
6951 6957
6952 cpumask_clear_cpu(rq->cpu, old_rd->span); 6958 cpumask_clear_cpu(rq->cpu, old_rd->span);
6953 6959
6954 if (atomic_dec_and_test(&old_rd->refcount)) 6960 /*
6955 free_rootdomain(old_rd); 6961 * If we dont want to free the old_rt yet then
6962 * set old_rd to NULL to skip the freeing later
6963 * in this function:
6964 */
6965 if (!atomic_dec_and_test(&old_rd->refcount))
6966 old_rd = NULL;
6956 } 6967 }
6957 6968
6958 atomic_inc(&rd->refcount); 6969 atomic_inc(&rd->refcount);
@@ -6963,6 +6974,9 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6963 set_rq_online(rq); 6974 set_rq_online(rq);
6964 6975
6965 spin_unlock_irqrestore(&rq->lock, flags); 6976 spin_unlock_irqrestore(&rq->lock, flags);
6977
6978 if (old_rd)
6979 free_rootdomain(old_rd);
6966} 6980}
6967 6981
6968static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) 6982static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem)
@@ -9210,6 +9224,16 @@ static int sched_rt_global_constraints(void)
9210 9224
9211 return ret; 9225 return ret;
9212} 9226}
9227
9228int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
9229{
9230 /* Don't accept realtime tasks when there is no way for them to run */
9231 if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
9232 return 0;
9233
9234 return 1;
9235}
9236
9213#else /* !CONFIG_RT_GROUP_SCHED */ 9237#else /* !CONFIG_RT_GROUP_SCHED */
9214static int sched_rt_global_constraints(void) 9238static int sched_rt_global_constraints(void)
9215{ 9239{
@@ -9303,8 +9327,7 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
9303 struct task_struct *tsk) 9327 struct task_struct *tsk)
9304{ 9328{
9305#ifdef CONFIG_RT_GROUP_SCHED 9329#ifdef CONFIG_RT_GROUP_SCHED
9306 /* Don't accept realtime tasks when there is no way for them to run */ 9330 if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk))
9307 if (rt_task(tsk) && cgroup_tg(cgrp)->rt_bandwidth.rt_runtime == 0)
9308 return -EINVAL; 9331 return -EINVAL;
9309#else 9332#else
9310 /* We don't support RT-tasks being in separate groups */ 9333 /* We don't support RT-tasks being in separate groups */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5cc1c162044f..0566f2a03c42 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -719,7 +719,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
719 __enqueue_entity(cfs_rq, se); 719 __enqueue_entity(cfs_rq, se);
720} 720}
721 721
722static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) 722static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
723{ 723{
724 if (cfs_rq->last == se) 724 if (cfs_rq->last == se)
725 cfs_rq->last = NULL; 725 cfs_rq->last = NULL;
@@ -728,6 +728,12 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
728 cfs_rq->next = NULL; 728 cfs_rq->next = NULL;
729} 729}
730 730
731static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
732{
733 for_each_sched_entity(se)
734 __clear_buddies(cfs_rq_of(se), se);
735}
736
731static void 737static void
732dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) 738dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
733{ 739{
@@ -768,8 +774,14 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
768 774
769 ideal_runtime = sched_slice(cfs_rq, curr); 775 ideal_runtime = sched_slice(cfs_rq, curr);
770 delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; 776 delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
771 if (delta_exec > ideal_runtime) 777 if (delta_exec > ideal_runtime) {
772 resched_task(rq_of(cfs_rq)->curr); 778 resched_task(rq_of(cfs_rq)->curr);
779 /*
780 * The current task ran long enough, ensure it doesn't get
781 * re-elected due to buddy favours.
782 */
783 clear_buddies(cfs_rq, curr);
784 }
773} 785}
774 786
775static void 787static void
@@ -1452,6 +1464,11 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
1452 1464
1453 do { 1465 do {
1454 se = pick_next_entity(cfs_rq); 1466 se = pick_next_entity(cfs_rq);
1467 /*
1468 * If se was a buddy, clear it so that it will have to earn
1469 * the favour again.
1470 */
1471 __clear_buddies(cfs_rq, se);
1455 set_next_entity(cfs_rq, se); 1472 set_next_entity(cfs_rq, se);
1456 cfs_rq = group_cfs_rq(se); 1473 cfs_rq = group_cfs_rq(se);
1457 } while (cfs_rq); 1474 } while (cfs_rq);
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 954e1a81b796..bac1061cea2f 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -968,8 +968,8 @@ static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
968 if ((this_cpu != -1) && cpu_isset(this_cpu, *mask)) 968 if ((this_cpu != -1) && cpu_isset(this_cpu, *mask))
969 return this_cpu; 969 return this_cpu;
970 970
971 first = first_cpu(*mask); 971 first = cpumask_first(mask);
972 if (first != NR_CPUS) 972 if (first < nr_cpu_ids)
973 return first; 973 return first;
974 974
975 return -1; 975 return -1;
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 8ab0cef8ecab..a8f93dd374e1 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -296,19 +296,21 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
296static inline void account_group_user_time(struct task_struct *tsk, 296static inline void account_group_user_time(struct task_struct *tsk,
297 cputime_t cputime) 297 cputime_t cputime)
298{ 298{
299 struct task_cputime *times; 299 struct thread_group_cputimer *cputimer;
300 struct signal_struct *sig;
301 300
302 /* tsk == current, ensure it is safe to use ->signal */ 301 /* tsk == current, ensure it is safe to use ->signal */
303 if (unlikely(tsk->exit_state)) 302 if (unlikely(tsk->exit_state))
304 return; 303 return;
305 304
306 sig = tsk->signal; 305 cputimer = &tsk->signal->cputimer;
307 times = &sig->cputime.totals;
308 306
309 spin_lock(&times->lock); 307 if (!cputimer->running)
310 times->utime = cputime_add(times->utime, cputime); 308 return;
311 spin_unlock(&times->lock); 309
310 spin_lock(&cputimer->lock);
311 cputimer->cputime.utime =
312 cputime_add(cputimer->cputime.utime, cputime);
313 spin_unlock(&cputimer->lock);
312} 314}
313 315
314/** 316/**
@@ -324,19 +326,21 @@ static inline void account_group_user_time(struct task_struct *tsk,
324static inline void account_group_system_time(struct task_struct *tsk, 326static inline void account_group_system_time(struct task_struct *tsk,
325 cputime_t cputime) 327 cputime_t cputime)
326{ 328{
327 struct task_cputime *times; 329 struct thread_group_cputimer *cputimer;
328 struct signal_struct *sig;
329 330
330 /* tsk == current, ensure it is safe to use ->signal */ 331 /* tsk == current, ensure it is safe to use ->signal */
331 if (unlikely(tsk->exit_state)) 332 if (unlikely(tsk->exit_state))
332 return; 333 return;
333 334
334 sig = tsk->signal; 335 cputimer = &tsk->signal->cputimer;
335 times = &sig->cputime.totals; 336
337 if (!cputimer->running)
338 return;
336 339
337 spin_lock(&times->lock); 340 spin_lock(&cputimer->lock);
338 times->stime = cputime_add(times->stime, cputime); 341 cputimer->cputime.stime =
339 spin_unlock(&times->lock); 342 cputime_add(cputimer->cputime.stime, cputime);
343 spin_unlock(&cputimer->lock);
340} 344}
341 345
342/** 346/**
@@ -352,7 +356,7 @@ static inline void account_group_system_time(struct task_struct *tsk,
352static inline void account_group_exec_runtime(struct task_struct *tsk, 356static inline void account_group_exec_runtime(struct task_struct *tsk,
353 unsigned long long ns) 357 unsigned long long ns)
354{ 358{
355 struct task_cputime *times; 359 struct thread_group_cputimer *cputimer;
356 struct signal_struct *sig; 360 struct signal_struct *sig;
357 361
358 sig = tsk->signal; 362 sig = tsk->signal;
@@ -361,9 +365,12 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,
361 if (unlikely(!sig)) 365 if (unlikely(!sig))
362 return; 366 return;
363 367
364 times = &sig->cputime.totals; 368 cputimer = &sig->cputimer;
369
370 if (!cputimer->running)
371 return;
365 372
366 spin_lock(&times->lock); 373 spin_lock(&cputimer->lock);
367 times->sum_exec_runtime += ns; 374 cputimer->cputime.sum_exec_runtime += ns;
368 spin_unlock(&times->lock); 375 spin_unlock(&cputimer->lock);
369} 376}
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index ad64fcb731f2..57d4b13b631d 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -8,6 +8,7 @@
8 8
9#include <linux/seccomp.h> 9#include <linux/seccomp.h>
10#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/compat.h>
11 12
12/* #define SECCOMP_DEBUG 1 */ 13/* #define SECCOMP_DEBUG 1 */
13#define NR_SECCOMP_MODES 1 14#define NR_SECCOMP_MODES 1
@@ -22,7 +23,7 @@ static int mode1_syscalls[] = {
22 0, /* null terminated */ 23 0, /* null terminated */
23}; 24};
24 25
25#ifdef TIF_32BIT 26#ifdef CONFIG_COMPAT
26static int mode1_syscalls_32[] = { 27static int mode1_syscalls_32[] = {
27 __NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32, 28 __NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32,
28 0, /* null terminated */ 29 0, /* null terminated */
@@ -37,8 +38,8 @@ void __secure_computing(int this_syscall)
37 switch (mode) { 38 switch (mode) {
38 case 1: 39 case 1:
39 syscall = mode1_syscalls; 40 syscall = mode1_syscalls;
40#ifdef TIF_32BIT 41#ifdef CONFIG_COMPAT
41 if (test_thread_flag(TIF_32BIT)) 42 if (is_compat_task())
42 syscall = mode1_syscalls_32; 43 syscall = mode1_syscalls_32;
43#endif 44#endif
44 do { 45 do {
diff --git a/kernel/signal.c b/kernel/signal.c
index e73759783dc8..1c8814481a11 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -909,7 +909,9 @@ static void print_fatal_signal(struct pt_regs *regs, int signr)
909 } 909 }
910#endif 910#endif
911 printk("\n"); 911 printk("\n");
912 preempt_disable();
912 show_regs(regs); 913 show_regs(regs);
914 preempt_enable();
913} 915}
914 916
915static int __init setup_print_fatal_signals(char *str) 917static int __init setup_print_fatal_signals(char *str)
@@ -1365,7 +1367,6 @@ int do_notify_parent(struct task_struct *tsk, int sig)
1365 struct siginfo info; 1367 struct siginfo info;
1366 unsigned long flags; 1368 unsigned long flags;
1367 struct sighand_struct *psig; 1369 struct sighand_struct *psig;
1368 struct task_cputime cputime;
1369 int ret = sig; 1370 int ret = sig;
1370 1371
1371 BUG_ON(sig == -1); 1372 BUG_ON(sig == -1);
@@ -1395,9 +1396,10 @@ int do_notify_parent(struct task_struct *tsk, int sig)
1395 info.si_uid = __task_cred(tsk)->uid; 1396 info.si_uid = __task_cred(tsk)->uid;
1396 rcu_read_unlock(); 1397 rcu_read_unlock();
1397 1398
1398 thread_group_cputime(tsk, &cputime); 1399 info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
1399 info.si_utime = cputime_to_jiffies(cputime.utime); 1400 tsk->signal->utime));
1400 info.si_stime = cputime_to_jiffies(cputime.stime); 1401 info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
1402 tsk->signal->stime));
1401 1403
1402 info.si_status = tsk->exit_code & 0x7f; 1404 info.si_status = tsk->exit_code & 0x7f;
1403 if (tsk->exit_code & 0x80) 1405 if (tsk->exit_code & 0x80)
@@ -1573,7 +1575,15 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
1573 read_lock(&tasklist_lock); 1575 read_lock(&tasklist_lock);
1574 if (may_ptrace_stop()) { 1576 if (may_ptrace_stop()) {
1575 do_notify_parent_cldstop(current, CLD_TRAPPED); 1577 do_notify_parent_cldstop(current, CLD_TRAPPED);
1578 /*
1579 * Don't want to allow preemption here, because
1580 * sys_ptrace() needs this task to be inactive.
1581 *
1582 * XXX: implement read_unlock_no_resched().
1583 */
1584 preempt_disable();
1576 read_unlock(&tasklist_lock); 1585 read_unlock(&tasklist_lock);
1586 preempt_enable_no_resched();
1577 schedule(); 1587 schedule();
1578 } else { 1588 } else {
1579 /* 1589 /*
diff --git a/kernel/smp.c b/kernel/smp.c
index 5cfa0e5e3e88..bbedbb7efe32 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -18,6 +18,7 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock);
18enum { 18enum {
19 CSD_FLAG_WAIT = 0x01, 19 CSD_FLAG_WAIT = 0x01,
20 CSD_FLAG_ALLOC = 0x02, 20 CSD_FLAG_ALLOC = 0x02,
21 CSD_FLAG_LOCK = 0x04,
21}; 22};
22 23
23struct call_function_data { 24struct call_function_data {
@@ -186,6 +187,9 @@ void generic_smp_call_function_single_interrupt(void)
186 if (data_flags & CSD_FLAG_WAIT) { 187 if (data_flags & CSD_FLAG_WAIT) {
187 smp_wmb(); 188 smp_wmb();
188 data->flags &= ~CSD_FLAG_WAIT; 189 data->flags &= ~CSD_FLAG_WAIT;
190 } else if (data_flags & CSD_FLAG_LOCK) {
191 smp_wmb();
192 data->flags &= ~CSD_FLAG_LOCK;
189 } else if (data_flags & CSD_FLAG_ALLOC) 193 } else if (data_flags & CSD_FLAG_ALLOC)
190 kfree(data); 194 kfree(data);
191 } 195 }
@@ -196,6 +200,8 @@ void generic_smp_call_function_single_interrupt(void)
196 } 200 }
197} 201}
198 202
203static DEFINE_PER_CPU(struct call_single_data, csd_data);
204
199/* 205/*
200 * smp_call_function_single - Run a function on a specific CPU 206 * smp_call_function_single - Run a function on a specific CPU
201 * @func: The function to run. This must be fast and non-blocking. 207 * @func: The function to run. This must be fast and non-blocking.
@@ -224,14 +230,38 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
224 func(info); 230 func(info);
225 local_irq_restore(flags); 231 local_irq_restore(flags);
226 } else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { 232 } else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
227 struct call_single_data *data = NULL; 233 struct call_single_data *data;
228 234
229 if (!wait) { 235 if (!wait) {
236 /*
237 * We are calling a function on a single CPU
238 * and we are not going to wait for it to finish.
239 * We first try to allocate the data, but if we
240 * fail, we fall back to use a per cpu data to pass
241 * the information to that CPU. Since all callers
242 * of this code will use the same data, we must
243 * synchronize the callers to prevent a new caller
244 * from corrupting the data before the callee
245 * can access it.
246 *
247 * The CSD_FLAG_LOCK is used to let us know when
248 * the IPI handler is done with the data.
249 * The first caller will set it, and the callee
250 * will clear it. The next caller must wait for
251 * it to clear before we set it again. This
252 * will make sure the callee is done with the
253 * data before a new caller will use it.
254 */
230 data = kmalloc(sizeof(*data), GFP_ATOMIC); 255 data = kmalloc(sizeof(*data), GFP_ATOMIC);
231 if (data) 256 if (data)
232 data->flags = CSD_FLAG_ALLOC; 257 data->flags = CSD_FLAG_ALLOC;
233 } 258 else {
234 if (!data) { 259 data = &per_cpu(csd_data, me);
260 while (data->flags & CSD_FLAG_LOCK)
261 cpu_relax();
262 data->flags = CSD_FLAG_LOCK;
263 }
264 } else {
235 data = &d; 265 data = &d;
236 data->flags = CSD_FLAG_WAIT; 266 data->flags = CSD_FLAG_WAIT;
237 } 267 }
diff --git a/kernel/softirq.c b/kernel/softirq.c
index bdbe9de9cd8d..9041ea7948fe 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -626,6 +626,7 @@ static int ksoftirqd(void * __bind_cpu)
626 preempt_enable_no_resched(); 626 preempt_enable_no_resched();
627 cond_resched(); 627 cond_resched();
628 preempt_disable(); 628 preempt_disable();
629 rcu_qsctr_inc((long)__bind_cpu);
629 } 630 }
630 preempt_enable(); 631 preempt_enable();
631 set_current_state(TASK_INTERRUPTIBLE); 632 set_current_state(TASK_INTERRUPTIBLE);
diff --git a/kernel/sys.c b/kernel/sys.c
index e7dc0e10a485..37f458e6882a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -559,7 +559,7 @@ error:
559 abort_creds(new); 559 abort_creds(new);
560 return retval; 560 return retval;
561} 561}
562 562
563/* 563/*
564 * change the user struct in a credentials set to match the new UID 564 * change the user struct in a credentials set to match the new UID
565 */ 565 */
@@ -571,6 +571,11 @@ static int set_user(struct cred *new)
571 if (!new_user) 571 if (!new_user)
572 return -EAGAIN; 572 return -EAGAIN;
573 573
574 if (!task_can_switch_user(new_user, current)) {
575 free_uid(new_user);
576 return -EINVAL;
577 }
578
574 if (atomic_read(&new_user->processes) >= 579 if (atomic_read(&new_user->processes) >=
575 current->signal->rlim[RLIMIT_NPROC].rlim_cur && 580 current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
576 new_user != INIT_USER) { 581 new_user != INIT_USER) {
@@ -631,10 +636,11 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
631 goto error; 636 goto error;
632 } 637 }
633 638
634 retval = -EAGAIN; 639 if (new->uid != old->uid) {
635 if (new->uid != old->uid && set_user(new) < 0) 640 retval = set_user(new);
636 goto error; 641 if (retval < 0)
637 642 goto error;
643 }
638 if (ruid != (uid_t) -1 || 644 if (ruid != (uid_t) -1 ||
639 (euid != (uid_t) -1 && euid != old->uid)) 645 (euid != (uid_t) -1 && euid != old->uid))
640 new->suid = new->euid; 646 new->suid = new->euid;
@@ -680,9 +686,10 @@ SYSCALL_DEFINE1(setuid, uid_t, uid)
680 retval = -EPERM; 686 retval = -EPERM;
681 if (capable(CAP_SETUID)) { 687 if (capable(CAP_SETUID)) {
682 new->suid = new->uid = uid; 688 new->suid = new->uid = uid;
683 if (uid != old->uid && set_user(new) < 0) { 689 if (uid != old->uid) {
684 retval = -EAGAIN; 690 retval = set_user(new);
685 goto error; 691 if (retval < 0)
692 goto error;
686 } 693 }
687 } else if (uid != old->uid && uid != new->suid) { 694 } else if (uid != old->uid && uid != new->suid) {
688 goto error; 695 goto error;
@@ -734,11 +741,13 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
734 goto error; 741 goto error;
735 } 742 }
736 743
737 retval = -EAGAIN;
738 if (ruid != (uid_t) -1) { 744 if (ruid != (uid_t) -1) {
739 new->uid = ruid; 745 new->uid = ruid;
740 if (ruid != old->uid && set_user(new) < 0) 746 if (ruid != old->uid) {
741 goto error; 747 retval = set_user(new);
748 if (retval < 0)
749 goto error;
750 }
742 } 751 }
743 if (euid != (uid_t) -1) 752 if (euid != (uid_t) -1)
744 new->euid = euid; 753 new->euid = euid;
@@ -1525,22 +1534,14 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
1525 return -EINVAL; 1534 return -EINVAL;
1526 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) 1535 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1527 return -EFAULT; 1536 return -EFAULT;
1537 if (new_rlim.rlim_cur > new_rlim.rlim_max)
1538 return -EINVAL;
1528 old_rlim = current->signal->rlim + resource; 1539 old_rlim = current->signal->rlim + resource;
1529 if ((new_rlim.rlim_max > old_rlim->rlim_max) && 1540 if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
1530 !capable(CAP_SYS_RESOURCE)) 1541 !capable(CAP_SYS_RESOURCE))
1531 return -EPERM; 1542 return -EPERM;
1532 1543 if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
1533 if (resource == RLIMIT_NOFILE) { 1544 return -EPERM;
1534 if (new_rlim.rlim_max == RLIM_INFINITY)
1535 new_rlim.rlim_max = sysctl_nr_open;
1536 if (new_rlim.rlim_cur == RLIM_INFINITY)
1537 new_rlim.rlim_cur = sysctl_nr_open;
1538 if (new_rlim.rlim_max > sysctl_nr_open)
1539 return -EPERM;
1540 }
1541
1542 if (new_rlim.rlim_cur > new_rlim.rlim_max)
1543 return -EINVAL;
1544 1545
1545 retval = security_task_setrlimit(resource, &new_rlim); 1546 retval = security_task_setrlimit(resource, &new_rlim);
1546 if (retval) 1547 if (retval)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 790f9d785663..c5ef44ff850f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -101,6 +101,7 @@ static int two = 2;
101 101
102static int zero; 102static int zero;
103static int one = 1; 103static int one = 1;
104static unsigned long one_ul = 1;
104static int one_hundred = 100; 105static int one_hundred = 100;
105 106
106/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ 107/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
@@ -974,7 +975,7 @@ static struct ctl_table vm_table[] = {
974 .mode = 0644, 975 .mode = 0644,
975 .proc_handler = &dirty_background_bytes_handler, 976 .proc_handler = &dirty_background_bytes_handler,
976 .strategy = &sysctl_intvec, 977 .strategy = &sysctl_intvec,
977 .extra1 = &one, 978 .extra1 = &one_ul,
978 }, 979 },
979 { 980 {
980 .ctl_name = VM_DIRTY_RATIO, 981 .ctl_name = VM_DIRTY_RATIO,
@@ -995,7 +996,7 @@ static struct ctl_table vm_table[] = {
995 .mode = 0644, 996 .mode = 0644,
996 .proc_handler = &dirty_bytes_handler, 997 .proc_handler = &dirty_bytes_handler,
997 .strategy = &sysctl_intvec, 998 .strategy = &sysctl_intvec,
998 .extra1 = &one, 999 .extra1 = &one_ul,
999 }, 1000 },
1000 { 1001 {
1001 .procname = "dirty_writeback_centisecs", 1002 .procname = "dirty_writeback_centisecs",
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 63e05d423a09..21a5ca849514 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -274,6 +274,21 @@ out_bc:
274} 274}
275 275
276/* 276/*
277 * Transfer the do_timer job away from a dying cpu.
278 *
279 * Called with interrupts disabled.
280 */
281static void tick_handover_do_timer(int *cpup)
282{
283 if (*cpup == tick_do_timer_cpu) {
284 int cpu = cpumask_first(cpu_online_mask);
285
286 tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
287 TICK_DO_TIMER_NONE;
288 }
289}
290
291/*
277 * Shutdown an event device on a given cpu: 292 * Shutdown an event device on a given cpu:
278 * 293 *
279 * This is called on a life CPU, when a CPU is dead. So we cannot 294 * This is called on a life CPU, when a CPU is dead. So we cannot
@@ -297,13 +312,6 @@ static void tick_shutdown(unsigned int *cpup)
297 clockevents_exchange_device(dev, NULL); 312 clockevents_exchange_device(dev, NULL);
298 td->evtdev = NULL; 313 td->evtdev = NULL;
299 } 314 }
300 /* Transfer the do_timer job away from this cpu */
301 if (*cpup == tick_do_timer_cpu) {
302 int cpu = cpumask_first(cpu_online_mask);
303
304 tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
305 TICK_DO_TIMER_NONE;
306 }
307 spin_unlock_irqrestore(&tick_device_lock, flags); 315 spin_unlock_irqrestore(&tick_device_lock, flags);
308} 316}
309 317
@@ -357,6 +365,10 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason,
357 tick_broadcast_oneshot_control(reason); 365 tick_broadcast_oneshot_control(reason);
358 break; 366 break;
359 367
368 case CLOCK_EVT_NOTIFY_CPU_DYING:
369 tick_handover_do_timer(dev);
370 break;
371
360 case CLOCK_EVT_NOTIFY_CPU_DEAD: 372 case CLOCK_EVT_NOTIFY_CPU_DEAD:
361 tick_shutdown_broadcast_oneshot(dev); 373 tick_shutdown_broadcast_oneshot(dev);
362 tick_shutdown_broadcast(dev); 374 tick_shutdown_broadcast(dev);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e2a4ff6fc3a6..34e707e5ab87 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -52,6 +52,7 @@ config FUNCTION_TRACER
52 depends on HAVE_FUNCTION_TRACER 52 depends on HAVE_FUNCTION_TRACER
53 depends on DEBUG_KERNEL 53 depends on DEBUG_KERNEL
54 select FRAME_POINTER 54 select FRAME_POINTER
55 select KALLSYMS
55 select TRACING 56 select TRACING
56 select CONTEXT_SWITCH_TRACER 57 select CONTEXT_SWITCH_TRACER
57 help 58 help
@@ -238,6 +239,7 @@ config STACK_TRACER
238 depends on DEBUG_KERNEL 239 depends on DEBUG_KERNEL
239 select FUNCTION_TRACER 240 select FUNCTION_TRACER
240 select STACKTRACE 241 select STACKTRACE
242 select KALLSYMS
241 help 243 help
242 This special tracer records the maximum stack footprint of the 244 This special tracer records the maximum stack footprint of the
243 kernel and displays it in debugfs/tracing/stack_trace. 245 kernel and displays it in debugfs/tracing/stack_trace.
@@ -302,4 +304,27 @@ config FTRACE_STARTUP_TEST
302 functioning properly. It will do tests on all the configured 304 functioning properly. It will do tests on all the configured
303 tracers of ftrace. 305 tracers of ftrace.
304 306
307config MMIOTRACE
308 bool "Memory mapped IO tracing"
309 depends on HAVE_MMIOTRACE_SUPPORT && DEBUG_KERNEL && PCI
310 select TRACING
311 help
312 Mmiotrace traces Memory Mapped I/O access and is meant for
313 debugging and reverse engineering. It is called from the ioremap
314 implementation and works via page faults. Tracing is disabled by
315 default and can be enabled at run-time.
316
317 See Documentation/tracers/mmiotrace.txt.
318 If you are not helping to develop drivers, say N.
319
320config MMIOTRACE_TEST
321 tristate "Test module for mmiotrace"
322 depends on MMIOTRACE && m
323 help
324 This is a dumb module for testing mmiotrace. It is very dangerous
325 as it will write garbage to IO memory starting at a given address.
326 However, it should be safe to use on e.g. unused portion of VRAM.
327
328 Say N, unless you absolutely know what you are doing.
329
305endmenu 330endmenu
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2f32969c09df..fdf913dfc7e8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -17,6 +17,7 @@
17#include <linux/clocksource.h> 17#include <linux/clocksource.h>
18#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/suspend.h>
20#include <linux/debugfs.h> 21#include <linux/debugfs.h>
21#include <linux/hardirq.h> 22#include <linux/hardirq.h>
22#include <linux/kthread.h> 23#include <linux/kthread.h>
@@ -1736,9 +1737,12 @@ static void clear_ftrace_pid(struct pid *pid)
1736{ 1737{
1737 struct task_struct *p; 1738 struct task_struct *p;
1738 1739
1740 rcu_read_lock();
1739 do_each_pid_task(pid, PIDTYPE_PID, p) { 1741 do_each_pid_task(pid, PIDTYPE_PID, p) {
1740 clear_tsk_trace_trace(p); 1742 clear_tsk_trace_trace(p);
1741 } while_each_pid_task(pid, PIDTYPE_PID, p); 1743 } while_each_pid_task(pid, PIDTYPE_PID, p);
1744 rcu_read_unlock();
1745
1742 put_pid(pid); 1746 put_pid(pid);
1743} 1747}
1744 1748
@@ -1746,9 +1750,11 @@ static void set_ftrace_pid(struct pid *pid)
1746{ 1750{
1747 struct task_struct *p; 1751 struct task_struct *p;
1748 1752
1753 rcu_read_lock();
1749 do_each_pid_task(pid, PIDTYPE_PID, p) { 1754 do_each_pid_task(pid, PIDTYPE_PID, p) {
1750 set_tsk_trace_trace(p); 1755 set_tsk_trace_trace(p);
1751 } while_each_pid_task(pid, PIDTYPE_PID, p); 1756 } while_each_pid_task(pid, PIDTYPE_PID, p);
1757 rcu_read_unlock();
1752} 1758}
1753 1759
1754static void clear_ftrace_pid_task(struct pid **pid) 1760static void clear_ftrace_pid_task(struct pid **pid)
@@ -1965,6 +1971,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1965#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1971#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1966 1972
1967static atomic_t ftrace_graph_active; 1973static atomic_t ftrace_graph_active;
1974static struct notifier_block ftrace_suspend_notifier;
1968 1975
1969int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) 1976int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
1970{ 1977{
@@ -2026,7 +2033,7 @@ free:
2026static int start_graph_tracing(void) 2033static int start_graph_tracing(void)
2027{ 2034{
2028 struct ftrace_ret_stack **ret_stack_list; 2035 struct ftrace_ret_stack **ret_stack_list;
2029 int ret; 2036 int ret, cpu;
2030 2037
2031 ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE * 2038 ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE *
2032 sizeof(struct ftrace_ret_stack *), 2039 sizeof(struct ftrace_ret_stack *),
@@ -2035,6 +2042,10 @@ static int start_graph_tracing(void)
2035 if (!ret_stack_list) 2042 if (!ret_stack_list)
2036 return -ENOMEM; 2043 return -ENOMEM;
2037 2044
2045 /* The cpu_boot init_task->ret_stack will never be freed */
2046 for_each_online_cpu(cpu)
2047 ftrace_graph_init_task(idle_task(cpu));
2048
2038 do { 2049 do {
2039 ret = alloc_retstack_tasklist(ret_stack_list); 2050 ret = alloc_retstack_tasklist(ret_stack_list);
2040 } while (ret == -EAGAIN); 2051 } while (ret == -EAGAIN);
@@ -2043,6 +2054,27 @@ static int start_graph_tracing(void)
2043 return ret; 2054 return ret;
2044} 2055}
2045 2056
2057/*
2058 * Hibernation protection.
2059 * The state of the current task is too much unstable during
2060 * suspend/restore to disk. We want to protect against that.
2061 */
2062static int
2063ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
2064 void *unused)
2065{
2066 switch (state) {
2067 case PM_HIBERNATION_PREPARE:
2068 pause_graph_tracing();
2069 break;
2070
2071 case PM_POST_HIBERNATION:
2072 unpause_graph_tracing();
2073 break;
2074 }
2075 return NOTIFY_DONE;
2076}
2077
2046int register_ftrace_graph(trace_func_graph_ret_t retfunc, 2078int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2047 trace_func_graph_ent_t entryfunc) 2079 trace_func_graph_ent_t entryfunc)
2048{ 2080{
@@ -2050,6 +2082,9 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2050 2082
2051 mutex_lock(&ftrace_sysctl_lock); 2083 mutex_lock(&ftrace_sysctl_lock);
2052 2084
2085 ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
2086 register_pm_notifier(&ftrace_suspend_notifier);
2087
2053 atomic_inc(&ftrace_graph_active); 2088 atomic_inc(&ftrace_graph_active);
2054 ret = start_graph_tracing(); 2089 ret = start_graph_tracing();
2055 if (ret) { 2090 if (ret) {
@@ -2075,6 +2110,7 @@ void unregister_ftrace_graph(void)
2075 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; 2110 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
2076 ftrace_graph_entry = ftrace_graph_entry_stub; 2111 ftrace_graph_entry = ftrace_graph_entry_stub;
2077 ftrace_shutdown(FTRACE_STOP_FUNC_RET); 2112 ftrace_shutdown(FTRACE_STOP_FUNC_RET);
2113 unregister_pm_notifier(&ftrace_suspend_notifier);
2078 2114
2079 mutex_unlock(&ftrace_sysctl_lock); 2115 mutex_unlock(&ftrace_sysctl_lock);
2080} 2116}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8b0daf0662ef..bd38c5cfd8ad 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -246,7 +246,7 @@ static inline int test_time_stamp(u64 delta)
246 return 0; 246 return 0;
247} 247}
248 248
249#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page)) 249#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data))
250 250
251/* 251/*
252 * head_page == tail_page && head == tail then buffer is empty. 252 * head_page == tail_page && head == tail then buffer is empty.
@@ -1025,12 +1025,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1025 } 1025 }
1026 1026
1027 if (next_page == head_page) { 1027 if (next_page == head_page) {
1028 if (!(buffer->flags & RB_FL_OVERWRITE)) { 1028 if (!(buffer->flags & RB_FL_OVERWRITE))
1029 /* reset write */
1030 if (tail <= BUF_PAGE_SIZE)
1031 local_set(&tail_page->write, tail);
1032 goto out_unlock; 1029 goto out_unlock;
1033 }
1034 1030
1035 /* tail_page has not moved yet? */ 1031 /* tail_page has not moved yet? */
1036 if (tail_page == cpu_buffer->tail_page) { 1032 if (tail_page == cpu_buffer->tail_page) {
@@ -1105,6 +1101,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1105 return event; 1101 return event;
1106 1102
1107 out_unlock: 1103 out_unlock:
1104 /* reset write */
1105 if (tail <= BUF_PAGE_SIZE)
1106 local_set(&tail_page->write, tail);
1107
1108 __raw_spin_unlock(&cpu_buffer->lock); 1108 __raw_spin_unlock(&cpu_buffer->lock);
1109 local_irq_restore(flags); 1109 local_irq_restore(flags);
1110 return NULL; 1110 return NULL;
@@ -2174,6 +2174,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2174 2174
2175 cpu_buffer->overrun = 0; 2175 cpu_buffer->overrun = 0;
2176 cpu_buffer->entries = 0; 2176 cpu_buffer->entries = 0;
2177
2178 cpu_buffer->write_stamp = 0;
2179 cpu_buffer->read_stamp = 0;
2177} 2180}
2178 2181
2179/** 2182/**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c580233add95..17bb88d86ac2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -40,7 +40,7 @@
40 40
41#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) 41#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
42 42
43unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; 43unsigned long __read_mostly tracing_max_latency;
44unsigned long __read_mostly tracing_thresh; 44unsigned long __read_mostly tracing_thresh;
45 45
46/* 46/*
@@ -3736,7 +3736,7 @@ static struct notifier_block trace_die_notifier = {
3736 * it if we decide to change what log level the ftrace dump 3736 * it if we decide to change what log level the ftrace dump
3737 * should be at. 3737 * should be at.
3738 */ 3738 */
3739#define KERN_TRACE KERN_INFO 3739#define KERN_TRACE KERN_EMERG
3740 3740
3741static void 3741static void
3742trace_printk_seq(struct trace_seq *s) 3742trace_printk_seq(struct trace_seq *s)
@@ -3770,6 +3770,7 @@ void ftrace_dump(void)
3770 dump_ran = 1; 3770 dump_ran = 1;
3771 3771
3772 /* No turning back! */ 3772 /* No turning back! */
3773 tracing_off();
3773 ftrace_kill(); 3774 ftrace_kill();
3774 3775
3775 for_each_tracing_cpu(cpu) { 3776 for_each_tracing_cpu(cpu) {
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 7c2e326bbc8b..62a78d943534 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -380,6 +380,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr)
380 380
381static void __irqsoff_tracer_init(struct trace_array *tr) 381static void __irqsoff_tracer_init(struct trace_array *tr)
382{ 382{
383 tracing_max_latency = 0;
383 irqsoff_trace = tr; 384 irqsoff_trace = tr;
384 /* make sure that the tracer is visible */ 385 /* make sure that the tracer is visible */
385 smp_wmb(); 386 smp_wmb();
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index fffcb069f1dc..80e503ef6136 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -9,6 +9,7 @@
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/mmiotrace.h> 10#include <linux/mmiotrace.h>
11#include <linux/pci.h> 11#include <linux/pci.h>
12#include <asm/atomic.h>
12 13
13#include "trace.h" 14#include "trace.h"
14 15
@@ -19,6 +20,7 @@ struct header_iter {
19static struct trace_array *mmio_trace_array; 20static struct trace_array *mmio_trace_array;
20static bool overrun_detected; 21static bool overrun_detected;
21static unsigned long prev_overruns; 22static unsigned long prev_overruns;
23static atomic_t dropped_count;
22 24
23static void mmio_reset_data(struct trace_array *tr) 25static void mmio_reset_data(struct trace_array *tr)
24{ 26{
@@ -121,11 +123,11 @@ static void mmio_close(struct trace_iterator *iter)
121 123
122static unsigned long count_overruns(struct trace_iterator *iter) 124static unsigned long count_overruns(struct trace_iterator *iter)
123{ 125{
124 unsigned long cnt = 0; 126 unsigned long cnt = atomic_xchg(&dropped_count, 0);
125 unsigned long over = ring_buffer_overruns(iter->tr->buffer); 127 unsigned long over = ring_buffer_overruns(iter->tr->buffer);
126 128
127 if (over > prev_overruns) 129 if (over > prev_overruns)
128 cnt = over - prev_overruns; 130 cnt += over - prev_overruns;
129 prev_overruns = over; 131 prev_overruns = over;
130 return cnt; 132 return cnt;
131} 133}
@@ -310,8 +312,10 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
310 312
311 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 313 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
312 &irq_flags); 314 &irq_flags);
313 if (!event) 315 if (!event) {
316 atomic_inc(&dropped_count);
314 return; 317 return;
318 }
315 entry = ring_buffer_event_data(event); 319 entry = ring_buffer_event_data(event);
316 tracing_generic_entry_update(&entry->ent, 0, preempt_count()); 320 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
317 entry->ent.type = TRACE_MMIO_RW; 321 entry->ent.type = TRACE_MMIO_RW;
@@ -338,8 +342,10 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
338 342
339 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 343 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
340 &irq_flags); 344 &irq_flags);
341 if (!event) 345 if (!event) {
346 atomic_inc(&dropped_count);
342 return; 347 return;
348 }
343 entry = ring_buffer_event_data(event); 349 entry = ring_buffer_event_data(event);
344 tracing_generic_entry_update(&entry->ent, 0, preempt_count()); 350 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
345 entry->ent.type = TRACE_MMIO_MAP; 351 entry->ent.type = TRACE_MMIO_MAP;
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 43586b689e31..42ae1e77b6b3 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -333,6 +333,7 @@ static void stop_wakeup_tracer(struct trace_array *tr)
333 333
334static int wakeup_tracer_init(struct trace_array *tr) 334static int wakeup_tracer_init(struct trace_array *tr)
335{ 335{
336 tracing_max_latency = 0;
336 wakeup_trace = tr; 337 wakeup_trace = tr;
337 start_wakeup_tracer(tr); 338 start_wakeup_tracer(tr);
338 return 0; 339 return 0;
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 88c8eb70f54a..bc8e80a86bca 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -23,10 +23,20 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
23{ 23{
24 struct ring_buffer_event *event; 24 struct ring_buffer_event *event;
25 struct trace_entry *entry; 25 struct trace_entry *entry;
26 unsigned int loops = 0;
26 27
27 while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) { 28 while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) {
28 entry = ring_buffer_event_data(event); 29 entry = ring_buffer_event_data(event);
29 30
31 /*
32 * The ring buffer is a size of trace_buf_size, if
33 * we loop more than the size, there's something wrong
34 * with the ring buffer.
35 */
36 if (loops++ > trace_buf_size) {
37 printk(KERN_CONT ".. bad ring buffer ");
38 goto failed;
39 }
30 if (!trace_valid_entry(entry)) { 40 if (!trace_valid_entry(entry)) {
31 printk(KERN_CONT ".. invalid entry %d ", 41 printk(KERN_CONT ".. invalid entry %d ",
32 entry->type); 42 entry->type);
@@ -57,11 +67,20 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
57 67
58 cnt = ring_buffer_entries(tr->buffer); 68 cnt = ring_buffer_entries(tr->buffer);
59 69
70 /*
71 * The trace_test_buffer_cpu runs a while loop to consume all data.
72 * If the calling tracer is broken, and is constantly filling
73 * the buffer, this will run forever, and hard lock the box.
74 * We disable the ring buffer while we do this test to prevent
75 * a hard lock up.
76 */
77 tracing_off();
60 for_each_possible_cpu(cpu) { 78 for_each_possible_cpu(cpu) {
61 ret = trace_test_buffer_cpu(tr, cpu); 79 ret = trace_test_buffer_cpu(tr, cpu);
62 if (ret) 80 if (ret)
63 break; 81 break;
64 } 82 }
83 tracing_on();
65 __raw_spin_unlock(&ftrace_max_lock); 84 __raw_spin_unlock(&ftrace_max_lock);
66 local_irq_restore(flags); 85 local_irq_restore(flags);
67 86
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 43f891b05a4b..00d59d048edf 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -122,8 +122,10 @@ void acct_update_integrals(struct task_struct *tsk)
122 if (likely(tsk->mm)) { 122 if (likely(tsk->mm)) {
123 cputime_t time, dtime; 123 cputime_t time, dtime;
124 struct timeval value; 124 struct timeval value;
125 unsigned long flags;
125 u64 delta; 126 u64 delta;
126 127
128 local_irq_save(flags);
127 time = tsk->stime + tsk->utime; 129 time = tsk->stime + tsk->utime;
128 dtime = cputime_sub(time, tsk->acct_timexpd); 130 dtime = cputime_sub(time, tsk->acct_timexpd);
129 jiffies_to_timeval(cputime_to_jiffies(dtime), &value); 131 jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
@@ -131,10 +133,12 @@ void acct_update_integrals(struct task_struct *tsk)
131 delta = delta * USEC_PER_SEC + value.tv_usec; 133 delta = delta * USEC_PER_SEC + value.tv_usec;
132 134
133 if (delta == 0) 135 if (delta == 0)
134 return; 136 goto out;
135 tsk->acct_timexpd = time; 137 tsk->acct_timexpd = time;
136 tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm); 138 tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
137 tsk->acct_vm_mem1 += delta * tsk->mm->total_vm; 139 tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
140 out:
141 local_irq_restore(flags);
138 } 142 }
139} 143}
140 144
diff --git a/kernel/user.c b/kernel/user.c
index 477b6660f447..fbb300e6191f 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -72,6 +72,7 @@ static void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent)
72static void uid_hash_remove(struct user_struct *up) 72static void uid_hash_remove(struct user_struct *up)
73{ 73{
74 hlist_del_init(&up->uidhash_node); 74 hlist_del_init(&up->uidhash_node);
75 put_user_ns(up->user_ns);
75} 76}
76 77
77static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) 78static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
@@ -285,14 +286,12 @@ int __init uids_sysfs_init(void)
285/* work function to remove sysfs directory for a user and free up 286/* work function to remove sysfs directory for a user and free up
286 * corresponding structures. 287 * corresponding structures.
287 */ 288 */
288static void remove_user_sysfs_dir(struct work_struct *w) 289static void cleanup_user_struct(struct work_struct *w)
289{ 290{
290 struct user_struct *up = container_of(w, struct user_struct, work); 291 struct user_struct *up = container_of(w, struct user_struct, work);
291 unsigned long flags; 292 unsigned long flags;
292 int remove_user = 0; 293 int remove_user = 0;
293 294
294 if (up->user_ns != &init_user_ns)
295 return;
296 /* Make uid_hash_remove() + sysfs_remove_file() + kobject_del() 295 /* Make uid_hash_remove() + sysfs_remove_file() + kobject_del()
297 * atomic. 296 * atomic.
298 */ 297 */
@@ -311,9 +310,11 @@ static void remove_user_sysfs_dir(struct work_struct *w)
311 if (!remove_user) 310 if (!remove_user)
312 goto done; 311 goto done;
313 312
314 kobject_uevent(&up->kobj, KOBJ_REMOVE); 313 if (up->user_ns == &init_user_ns) {
315 kobject_del(&up->kobj); 314 kobject_uevent(&up->kobj, KOBJ_REMOVE);
316 kobject_put(&up->kobj); 315 kobject_del(&up->kobj);
316 kobject_put(&up->kobj);
317 }
317 318
318 sched_destroy_user(up); 319 sched_destroy_user(up);
319 key_put(up->uid_keyring); 320 key_put(up->uid_keyring);
@@ -334,8 +335,7 @@ static void free_user(struct user_struct *up, unsigned long flags)
334 atomic_inc(&up->__count); 335 atomic_inc(&up->__count);
335 spin_unlock_irqrestore(&uidhash_lock, flags); 336 spin_unlock_irqrestore(&uidhash_lock, flags);
336 337
337 put_user_ns(up->user_ns); 338 INIT_WORK(&up->work, cleanup_user_struct);
338 INIT_WORK(&up->work, remove_user_sysfs_dir);
339 schedule_work(&up->work); 339 schedule_work(&up->work);
340} 340}
341 341
@@ -357,12 +357,29 @@ static void free_user(struct user_struct *up, unsigned long flags)
357 sched_destroy_user(up); 357 sched_destroy_user(up);
358 key_put(up->uid_keyring); 358 key_put(up->uid_keyring);
359 key_put(up->session_keyring); 359 key_put(up->session_keyring);
360 put_user_ns(up->user_ns);
361 kmem_cache_free(uid_cachep, up); 360 kmem_cache_free(uid_cachep, up);
362} 361}
363 362
364#endif 363#endif
365 364
365#if defined(CONFIG_RT_GROUP_SCHED) && defined(CONFIG_USER_SCHED)
366/*
367 * We need to check if a setuid can take place. This function should be called
368 * before successfully completing the setuid.
369 */
370int task_can_switch_user(struct user_struct *up, struct task_struct *tsk)
371{
372
373 return sched_rt_can_attach(up->tg, tsk);
374
375}
376#else
377int task_can_switch_user(struct user_struct *up, struct task_struct *tsk)
378{
379 return 1;
380}
381#endif
382
366/* 383/*
367 * Locate the user_struct for the passed UID. If found, take a ref on it. The 384 * Locate the user_struct for the passed UID. If found, take a ref on it. The
368 * caller must undo that ref with free_uid(). 385 * caller must undo that ref with free_uid().
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 79084311ee57..076c7c8215b0 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -60,12 +60,25 @@ int create_user_ns(struct cred *new)
60 return 0; 60 return 0;
61} 61}
62 62
63void free_user_ns(struct kref *kref) 63/*
64 * Deferred destructor for a user namespace. This is required because
65 * free_user_ns() may be called with uidhash_lock held, but we need to call
66 * back to free_uid() which will want to take the lock again.
67 */
68static void free_user_ns_work(struct work_struct *work)
64{ 69{
65 struct user_namespace *ns; 70 struct user_namespace *ns =
66 71 container_of(work, struct user_namespace, destroyer);
67 ns = container_of(kref, struct user_namespace, kref);
68 free_uid(ns->creator); 72 free_uid(ns->creator);
69 kfree(ns); 73 kfree(ns);
70} 74}
75
76void free_user_ns(struct kref *kref)
77{
78 struct user_namespace *ns =
79 container_of(kref, struct user_namespace, kref);
80
81 INIT_WORK(&ns->destroyer, free_user_ns_work);
82 schedule_work(&ns->destroyer);
83}
71EXPORT_SYMBOL(free_user_ns); 84EXPORT_SYMBOL(free_user_ns);
diff --git a/kernel/wait.c b/kernel/wait.c
index cd87131f2fc2..42a2dbc181c8 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -91,6 +91,15 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
91} 91}
92EXPORT_SYMBOL(prepare_to_wait_exclusive); 92EXPORT_SYMBOL(prepare_to_wait_exclusive);
93 93
94/*
95 * finish_wait - clean up after waiting in a queue
96 * @q: waitqueue waited on
97 * @wait: wait descriptor
98 *
99 * Sets current thread back to running state and removes
100 * the wait descriptor from the given waitqueue if still
101 * queued.
102 */
94void finish_wait(wait_queue_head_t *q, wait_queue_t *wait) 103void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
95{ 104{
96 unsigned long flags; 105 unsigned long flags;
@@ -117,6 +126,39 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
117} 126}
118EXPORT_SYMBOL(finish_wait); 127EXPORT_SYMBOL(finish_wait);
119 128
129/*
130 * abort_exclusive_wait - abort exclusive waiting in a queue
131 * @q: waitqueue waited on
132 * @wait: wait descriptor
133 * @state: runstate of the waiter to be woken
134 * @key: key to identify a wait bit queue or %NULL
135 *
136 * Sets current thread back to running state and removes
137 * the wait descriptor from the given waitqueue if still
138 * queued.
139 *
140 * Wakes up the next waiter if the caller is concurrently
141 * woken up through the queue.
142 *
143 * This prevents waiter starvation where an exclusive waiter
144 * aborts and is woken up concurrently and noone wakes up
145 * the next waiter.
146 */
147void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
148 unsigned int mode, void *key)
149{
150 unsigned long flags;
151
152 __set_current_state(TASK_RUNNING);
153 spin_lock_irqsave(&q->lock, flags);
154 if (!list_empty(&wait->task_list))
155 list_del_init(&wait->task_list);
156 else if (waitqueue_active(q))
157 __wake_up_common(q, mode, 1, 0, key);
158 spin_unlock_irqrestore(&q->lock, flags);
159}
160EXPORT_SYMBOL(abort_exclusive_wait);
161
120int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) 162int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
121{ 163{
122 int ret = default_wake_function(wait, mode, sync, key); 164 int ret = default_wake_function(wait, mode, sync, key);
@@ -177,17 +219,20 @@ int __sched
177__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, 219__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
178 int (*action)(void *), unsigned mode) 220 int (*action)(void *), unsigned mode)
179{ 221{
180 int ret = 0;
181
182 do { 222 do {
223 int ret;
224
183 prepare_to_wait_exclusive(wq, &q->wait, mode); 225 prepare_to_wait_exclusive(wq, &q->wait, mode);
184 if (test_bit(q->key.bit_nr, q->key.flags)) { 226 if (!test_bit(q->key.bit_nr, q->key.flags))
185 if ((ret = (*action)(q->key.flags))) 227 continue;
186 break; 228 ret = action(q->key.flags);
187 } 229 if (!ret)
230 continue;
231 abort_exclusive_wait(wq, &q->wait, mode, &q->key);
232 return ret;
188 } while (test_and_set_bit(q->key.bit_nr, q->key.flags)); 233 } while (test_and_set_bit(q->key.bit_nr, q->key.flags));
189 finish_wait(wq, &q->wait); 234 finish_wait(wq, &q->wait);
190 return ret; 235 return 0;
191} 236}
192EXPORT_SYMBOL(__wait_on_bit_lock); 237EXPORT_SYMBOL(__wait_on_bit_lock);
193 238