aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/async.c94
-rw-r--r--kernel/cgroup.c3
-rw-r--r--kernel/cpuset.c13
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/irq/numa_migrate.c7
-rw-r--r--kernel/itimer.c4
-rw-r--r--kernel/module.c35
-rw-r--r--kernel/posix-cpu-timers.c117
-rw-r--r--kernel/power/main.c26
-rw-r--r--kernel/profile.c3
-rw-r--r--kernel/sched.c21
-rw-r--r--kernel/sched_fair.c21
-rw-r--r--kernel/sched_rt.c4
-rw-r--r--kernel/sched_stats.h45
-rw-r--r--kernel/signal.c8
-rw-r--r--kernel/sys.c16
-rw-r--r--kernel/sysctl.c5
-rw-r--r--kernel/trace/ftrace.c5
-rw-r--r--kernel/wait.c59
20 files changed, 387 insertions, 108 deletions
diff --git a/kernel/async.c b/kernel/async.c
index 608b32b42812..f565891f2c9b 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -54,6 +54,7 @@ asynchronous and synchronous parts of the kernel.
54#include <linux/sched.h> 54#include <linux/sched.h>
55#include <linux/init.h> 55#include <linux/init.h>
56#include <linux/kthread.h> 56#include <linux/kthread.h>
57#include <linux/delay.h>
57#include <asm/atomic.h> 58#include <asm/atomic.h>
58 59
59static async_cookie_t next_cookie = 1; 60static async_cookie_t next_cookie = 1;
@@ -132,21 +133,23 @@ static void run_one_entry(void)
132 entry = list_first_entry(&async_pending, struct async_entry, list); 133 entry = list_first_entry(&async_pending, struct async_entry, list);
133 134
134 /* 2) move it to the running queue */ 135 /* 2) move it to the running queue */
135 list_del(&entry->list); 136 list_move_tail(&entry->list, entry->running);
136 list_add_tail(&entry->list, &async_running);
137 spin_unlock_irqrestore(&async_lock, flags); 137 spin_unlock_irqrestore(&async_lock, flags);
138 138
139 /* 3) run it (and print duration)*/ 139 /* 3) run it (and print duration)*/
140 if (initcall_debug && system_state == SYSTEM_BOOTING) { 140 if (initcall_debug && system_state == SYSTEM_BOOTING) {
141 printk("calling %lli_%pF @ %i\n", entry->cookie, entry->func, task_pid_nr(current)); 141 printk("calling %lli_%pF @ %i\n", (long long)entry->cookie,
142 entry->func, task_pid_nr(current));
142 calltime = ktime_get(); 143 calltime = ktime_get();
143 } 144 }
144 entry->func(entry->data, entry->cookie); 145 entry->func(entry->data, entry->cookie);
145 if (initcall_debug && system_state == SYSTEM_BOOTING) { 146 if (initcall_debug && system_state == SYSTEM_BOOTING) {
146 rettime = ktime_get(); 147 rettime = ktime_get();
147 delta = ktime_sub(rettime, calltime); 148 delta = ktime_sub(rettime, calltime);
148 printk("initcall %lli_%pF returned 0 after %lld usecs\n", entry->cookie, 149 printk("initcall %lli_%pF returned 0 after %lld usecs\n",
149 entry->func, ktime_to_ns(delta) >> 10); 150 (long long)entry->cookie,
151 entry->func,
152 (long long)ktime_to_ns(delta) >> 10);
150 } 153 }
151 154
152 /* 4) remove it from the running queue */ 155 /* 4) remove it from the running queue */
@@ -205,18 +208,44 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct l
205 return newcookie; 208 return newcookie;
206} 209}
207 210
211/**
212 * async_schedule - schedule a function for asynchronous execution
213 * @ptr: function to execute asynchronously
214 * @data: data pointer to pass to the function
215 *
216 * Returns an async_cookie_t that may be used for checkpointing later.
217 * Note: This function may be called from atomic or non-atomic contexts.
218 */
208async_cookie_t async_schedule(async_func_ptr *ptr, void *data) 219async_cookie_t async_schedule(async_func_ptr *ptr, void *data)
209{ 220{
210 return __async_schedule(ptr, data, &async_pending); 221 return __async_schedule(ptr, data, &async_running);
211} 222}
212EXPORT_SYMBOL_GPL(async_schedule); 223EXPORT_SYMBOL_GPL(async_schedule);
213 224
214async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *running) 225/**
226 * async_schedule_domain - schedule a function for asynchronous execution within a certain domain
227 * @ptr: function to execute asynchronously
228 * @data: data pointer to pass to the function
229 * @running: running list for the domain
230 *
231 * Returns an async_cookie_t that may be used for checkpointing later.
232 * @running may be used in the async_synchronize_*_domain() functions
233 * to wait within a certain synchronization domain rather than globally.
234 * A synchronization domain is specified via the running queue @running to use.
235 * Note: This function may be called from atomic or non-atomic contexts.
236 */
237async_cookie_t async_schedule_domain(async_func_ptr *ptr, void *data,
238 struct list_head *running)
215{ 239{
216 return __async_schedule(ptr, data, running); 240 return __async_schedule(ptr, data, running);
217} 241}
218EXPORT_SYMBOL_GPL(async_schedule_special); 242EXPORT_SYMBOL_GPL(async_schedule_domain);
219 243
244/**
245 * async_synchronize_full - synchronize all asynchronous function calls
246 *
247 * This function waits until all asynchronous function calls have been done.
248 */
220void async_synchronize_full(void) 249void async_synchronize_full(void)
221{ 250{
222 do { 251 do {
@@ -225,13 +254,30 @@ void async_synchronize_full(void)
225} 254}
226EXPORT_SYMBOL_GPL(async_synchronize_full); 255EXPORT_SYMBOL_GPL(async_synchronize_full);
227 256
228void async_synchronize_full_special(struct list_head *list) 257/**
258 * async_synchronize_full_domain - synchronize all asynchronous function within a certain domain
259 * @list: running list to synchronize on
260 *
261 * This function waits until all asynchronous function calls for the
262 * synchronization domain specified by the running list @list have been done.
263 */
264void async_synchronize_full_domain(struct list_head *list)
229{ 265{
230 async_synchronize_cookie_special(next_cookie, list); 266 async_synchronize_cookie_domain(next_cookie, list);
231} 267}
232EXPORT_SYMBOL_GPL(async_synchronize_full_special); 268EXPORT_SYMBOL_GPL(async_synchronize_full_domain);
233 269
234void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *running) 270/**
271 * async_synchronize_cookie_domain - synchronize asynchronous function calls within a certain domain with cookie checkpointing
272 * @cookie: async_cookie_t to use as checkpoint
273 * @running: running list to synchronize on
274 *
275 * This function waits until all asynchronous function calls for the
276 * synchronization domain specified by the running list @list submitted
277 * prior to @cookie have been done.
278 */
279void async_synchronize_cookie_domain(async_cookie_t cookie,
280 struct list_head *running)
235{ 281{
236 ktime_t starttime, delta, endtime; 282 ktime_t starttime, delta, endtime;
237 283
@@ -247,14 +293,22 @@ void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *r
247 delta = ktime_sub(endtime, starttime); 293 delta = ktime_sub(endtime, starttime);
248 294
249 printk("async_continuing @ %i after %lli usec\n", 295 printk("async_continuing @ %i after %lli usec\n",
250 task_pid_nr(current), ktime_to_ns(delta) >> 10); 296 task_pid_nr(current),
297 (long long)ktime_to_ns(delta) >> 10);
251 } 298 }
252} 299}
253EXPORT_SYMBOL_GPL(async_synchronize_cookie_special); 300EXPORT_SYMBOL_GPL(async_synchronize_cookie_domain);
254 301
302/**
303 * async_synchronize_cookie - synchronize asynchronous function calls with cookie checkpointing
304 * @cookie: async_cookie_t to use as checkpoint
305 *
306 * This function waits until all asynchronous function calls prior to @cookie
307 * have been done.
308 */
255void async_synchronize_cookie(async_cookie_t cookie) 309void async_synchronize_cookie(async_cookie_t cookie)
256{ 310{
257 async_synchronize_cookie_special(cookie, &async_running); 311 async_synchronize_cookie_domain(cookie, &async_running);
258} 312}
259EXPORT_SYMBOL_GPL(async_synchronize_cookie); 313EXPORT_SYMBOL_GPL(async_synchronize_cookie);
260 314
@@ -315,7 +369,11 @@ static int async_manager_thread(void *unused)
315 ec = atomic_read(&entry_count); 369 ec = atomic_read(&entry_count);
316 370
317 while (tc < ec && tc < MAX_THREADS) { 371 while (tc < ec && tc < MAX_THREADS) {
318 kthread_run(async_thread, NULL, "async/%i", tc); 372 if (IS_ERR(kthread_run(async_thread, NULL, "async/%i",
373 tc))) {
374 msleep(100);
375 continue;
376 }
319 atomic_inc(&thread_count); 377 atomic_inc(&thread_count);
320 tc++; 378 tc++;
321 } 379 }
@@ -330,7 +388,9 @@ static int async_manager_thread(void *unused)
330static int __init async_init(void) 388static int __init async_init(void)
331{ 389{
332 if (async_enabled) 390 if (async_enabled)
333 kthread_run(async_manager_thread, NULL, "async/mgr"); 391 if (IS_ERR(kthread_run(async_manager_thread, NULL,
392 "async/mgr")))
393 async_enabled = 0;
334 return 0; 394 return 0;
335} 395}
336 396
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 5a54ff42874e..e14db9c089b9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2351,7 +2351,7 @@ static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
2351 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 2351 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2352 struct cgroup_subsys *ss = subsys[i]; 2352 struct cgroup_subsys *ss = subsys[i];
2353 if (ss->root == root) 2353 if (ss->root == root)
2354 mutex_lock_nested(&ss->hierarchy_mutex, i); 2354 mutex_lock(&ss->hierarchy_mutex);
2355 } 2355 }
2356} 2356}
2357 2357
@@ -2637,6 +2637,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
2637 BUG_ON(!list_empty(&init_task.tasks)); 2637 BUG_ON(!list_empty(&init_task.tasks));
2638 2638
2639 mutex_init(&ss->hierarchy_mutex); 2639 mutex_init(&ss->hierarchy_mutex);
2640 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
2640 ss->active = 1; 2641 ss->active = 1;
2641} 2642}
2642 2643
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index a85678865c5e..f76db9dcaa05 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -61,6 +61,14 @@
61#include <linux/cgroup.h> 61#include <linux/cgroup.h>
62 62
63/* 63/*
64 * Workqueue for cpuset related tasks.
65 *
66 * Using kevent workqueue may cause deadlock when memory_migrate
67 * is set. So we create a separate workqueue thread for cpuset.
68 */
69static struct workqueue_struct *cpuset_wq;
70
71/*
64 * Tracks how many cpusets are currently defined in system. 72 * Tracks how many cpusets are currently defined in system.
65 * When there is only one cpuset (the root cpuset) we can 73 * When there is only one cpuset (the root cpuset) we can
66 * short circuit some hooks. 74 * short circuit some hooks.
@@ -831,7 +839,7 @@ static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains);
831 */ 839 */
832static void async_rebuild_sched_domains(void) 840static void async_rebuild_sched_domains(void)
833{ 841{
834 schedule_work(&rebuild_sched_domains_work); 842 queue_work(cpuset_wq, &rebuild_sched_domains_work);
835} 843}
836 844
837/* 845/*
@@ -2111,6 +2119,9 @@ void __init cpuset_init_smp(void)
2111 2119
2112 hotcpu_notifier(cpuset_track_online_cpus, 0); 2120 hotcpu_notifier(cpuset_track_online_cpus, 0);
2113 hotplug_memory_notifier(cpuset_track_online_nodes, 10); 2121 hotplug_memory_notifier(cpuset_track_online_nodes, 10);
2122
2123 cpuset_wq = create_singlethread_workqueue("cpuset");
2124 BUG_ON(!cpuset_wq);
2114} 2125}
2115 2126
2116/** 2127/**
diff --git a/kernel/exit.c b/kernel/exit.c
index f80dec3f1875..efd30ccf3858 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -118,6 +118,8 @@ static void __exit_signal(struct task_struct *tsk)
118 * We won't ever get here for the group leader, since it 118 * We won't ever get here for the group leader, since it
119 * will have been the last reference on the signal_struct. 119 * will have been the last reference on the signal_struct.
120 */ 120 */
121 sig->utime = cputime_add(sig->utime, task_utime(tsk));
122 sig->stime = cputime_add(sig->stime, task_stime(tsk));
121 sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); 123 sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
122 sig->min_flt += tsk->min_flt; 124 sig->min_flt += tsk->min_flt;
123 sig->maj_flt += tsk->maj_flt; 125 sig->maj_flt += tsk->maj_flt;
@@ -126,6 +128,7 @@ static void __exit_signal(struct task_struct *tsk)
126 sig->inblock += task_io_get_inblock(tsk); 128 sig->inblock += task_io_get_inblock(tsk);
127 sig->oublock += task_io_get_oublock(tsk); 129 sig->oublock += task_io_get_oublock(tsk);
128 task_io_accounting_add(&sig->ioac, &tsk->ioac); 130 task_io_accounting_add(&sig->ioac, &tsk->ioac);
131 sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
129 sig = NULL; /* Marker for below. */ 132 sig = NULL; /* Marker for below. */
130 } 133 }
131 134
diff --git a/kernel/fork.c b/kernel/fork.c
index 242a706e7721..a66fbde20715 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -851,13 +851,14 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
851 sig->tty_old_pgrp = NULL; 851 sig->tty_old_pgrp = NULL;
852 sig->tty = NULL; 852 sig->tty = NULL;
853 853
854 sig->cutime = sig->cstime = cputime_zero; 854 sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
855 sig->gtime = cputime_zero; 855 sig->gtime = cputime_zero;
856 sig->cgtime = cputime_zero; 856 sig->cgtime = cputime_zero;
857 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; 857 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
858 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; 858 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
859 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; 859 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
860 task_io_accounting_init(&sig->ioac); 860 task_io_accounting_init(&sig->ioac);
861 sig->sum_sched_runtime = 0;
861 taskstats_tgid_init(sig); 862 taskstats_tgid_init(sig);
862 863
863 task_lock(current->group_leader); 864 task_lock(current->group_leader);
@@ -1005,6 +1006,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1005 * triggers too late. This doesn't hurt, the check is only there 1006 * triggers too late. This doesn't hurt, the check is only there
1006 * to stop root fork bombs. 1007 * to stop root fork bombs.
1007 */ 1008 */
1009 retval = -EAGAIN;
1008 if (nr_threads >= max_threads) 1010 if (nr_threads >= max_threads)
1009 goto bad_fork_cleanup_count; 1011 goto bad_fork_cleanup_count;
1010 1012
@@ -1093,7 +1095,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1093#ifdef CONFIG_DEBUG_MUTEXES 1095#ifdef CONFIG_DEBUG_MUTEXES
1094 p->blocked_on = NULL; /* not blocked yet */ 1096 p->blocked_on = NULL; /* not blocked yet */
1095#endif 1097#endif
1096 if (unlikely(ptrace_reparented(current))) 1098 if (unlikely(current->ptrace))
1097 ptrace_fork(p, clone_flags); 1099 ptrace_fork(p, clone_flags);
1098 1100
1099 /* Perform scheduler related setup. Assign this task to a CPU. */ 1101 /* Perform scheduler related setup. Assign this task to a CPU. */
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index ecf765c6a77a..acd88356ac76 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -71,7 +71,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
71 desc = irq_desc_ptrs[irq]; 71 desc = irq_desc_ptrs[irq];
72 72
73 if (desc && old_desc != desc) 73 if (desc && old_desc != desc)
74 goto out_unlock; 74 goto out_unlock;
75 75
76 node = cpu_to_node(cpu); 76 node = cpu_to_node(cpu);
77 desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); 77 desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
@@ -84,10 +84,15 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
84 init_copy_one_irq_desc(irq, old_desc, desc, cpu); 84 init_copy_one_irq_desc(irq, old_desc, desc, cpu);
85 85
86 irq_desc_ptrs[irq] = desc; 86 irq_desc_ptrs[irq] = desc;
87 spin_unlock_irqrestore(&sparse_irq_lock, flags);
87 88
88 /* free the old one */ 89 /* free the old one */
89 free_one_irq_desc(old_desc, desc); 90 free_one_irq_desc(old_desc, desc);
91 spin_unlock(&old_desc->lock);
90 kfree(old_desc); 92 kfree(old_desc);
93 spin_lock(&desc->lock);
94
95 return desc;
91 96
92out_unlock: 97out_unlock:
93 spin_unlock_irqrestore(&sparse_irq_lock, flags); 98 spin_unlock_irqrestore(&sparse_irq_lock, flags);
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 6a5fe93dd8bd..58762f7077ec 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -62,7 +62,7 @@ int do_getitimer(int which, struct itimerval *value)
62 struct task_cputime cputime; 62 struct task_cputime cputime;
63 cputime_t utime; 63 cputime_t utime;
64 64
65 thread_group_cputime(tsk, &cputime); 65 thread_group_cputimer(tsk, &cputime);
66 utime = cputime.utime; 66 utime = cputime.utime;
67 if (cputime_le(cval, utime)) { /* about to fire */ 67 if (cputime_le(cval, utime)) { /* about to fire */
68 cval = jiffies_to_cputime(1); 68 cval = jiffies_to_cputime(1);
@@ -82,7 +82,7 @@ int do_getitimer(int which, struct itimerval *value)
82 struct task_cputime times; 82 struct task_cputime times;
83 cputime_t ptime; 83 cputime_t ptime;
84 84
85 thread_group_cputime(tsk, &times); 85 thread_group_cputimer(tsk, &times);
86 ptime = cputime_add(times.utime, times.stime); 86 ptime = cputime_add(times.utime, times.stime);
87 if (cputime_le(cval, ptime)) { /* about to fire */ 87 if (cputime_le(cval, ptime)) { /* about to fire */
88 cval = jiffies_to_cputime(1); 88 cval = jiffies_to_cputime(1);
diff --git a/kernel/module.c b/kernel/module.c
index e8b51d41dd72..ba22484a987e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -573,13 +573,13 @@ static char last_unloaded_module[MODULE_NAME_LEN+1];
573/* Init the unload section of the module. */ 573/* Init the unload section of the module. */
574static void module_unload_init(struct module *mod) 574static void module_unload_init(struct module *mod)
575{ 575{
576 unsigned int i; 576 int cpu;
577 577
578 INIT_LIST_HEAD(&mod->modules_which_use_me); 578 INIT_LIST_HEAD(&mod->modules_which_use_me);
579 for (i = 0; i < NR_CPUS; i++) 579 for_each_possible_cpu(cpu)
580 local_set(&mod->ref[i].count, 0); 580 local_set(__module_ref_addr(mod, cpu), 0);
581 /* Hold reference count during initialization. */ 581 /* Hold reference count during initialization. */
582 local_set(&mod->ref[raw_smp_processor_id()].count, 1); 582 local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1);
583 /* Backwards compatibility macros put refcount during init. */ 583 /* Backwards compatibility macros put refcount during init. */
584 mod->waiter = current; 584 mod->waiter = current;
585} 585}
@@ -717,10 +717,11 @@ static int try_stop_module(struct module *mod, int flags, int *forced)
717 717
718unsigned int module_refcount(struct module *mod) 718unsigned int module_refcount(struct module *mod)
719{ 719{
720 unsigned int i, total = 0; 720 unsigned int total = 0;
721 int cpu;
721 722
722 for (i = 0; i < NR_CPUS; i++) 723 for_each_possible_cpu(cpu)
723 total += local_read(&mod->ref[i].count); 724 total += local_read(__module_ref_addr(mod, cpu));
724 return total; 725 return total;
725} 726}
726EXPORT_SYMBOL(module_refcount); 727EXPORT_SYMBOL(module_refcount);
@@ -894,7 +895,7 @@ void module_put(struct module *module)
894{ 895{
895 if (module) { 896 if (module) {
896 unsigned int cpu = get_cpu(); 897 unsigned int cpu = get_cpu();
897 local_dec(&module->ref[cpu].count); 898 local_dec(__module_ref_addr(module, cpu));
898 /* Maybe they're waiting for us to drop reference? */ 899 /* Maybe they're waiting for us to drop reference? */
899 if (unlikely(!module_is_live(module))) 900 if (unlikely(!module_is_live(module)))
900 wake_up_process(module->waiter); 901 wake_up_process(module->waiter);
@@ -1464,7 +1465,10 @@ static void free_module(struct module *mod)
1464 kfree(mod->args); 1465 kfree(mod->args);
1465 if (mod->percpu) 1466 if (mod->percpu)
1466 percpu_modfree(mod->percpu); 1467 percpu_modfree(mod->percpu);
1467 1468#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
1469 if (mod->refptr)
1470 percpu_modfree(mod->refptr);
1471#endif
1468 /* Free lock-classes: */ 1472 /* Free lock-classes: */
1469 lockdep_free_key_range(mod->module_core, mod->core_size); 1473 lockdep_free_key_range(mod->module_core, mod->core_size);
1470 1474
@@ -2011,6 +2015,14 @@ static noinline struct module *load_module(void __user *umod,
2011 if (err < 0) 2015 if (err < 0)
2012 goto free_mod; 2016 goto free_mod;
2013 2017
2018#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
2019 mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t),
2020 mod->name);
2021 if (!mod->refptr) {
2022 err = -ENOMEM;
2023 goto free_mod;
2024 }
2025#endif
2014 if (pcpuindex) { 2026 if (pcpuindex) {
2015 /* We have a special allocation for this section. */ 2027 /* We have a special allocation for this section. */
2016 percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, 2028 percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size,
@@ -2018,7 +2030,7 @@ static noinline struct module *load_module(void __user *umod,
2018 mod->name); 2030 mod->name);
2019 if (!percpu) { 2031 if (!percpu) {
2020 err = -ENOMEM; 2032 err = -ENOMEM;
2021 goto free_mod; 2033 goto free_percpu;
2022 } 2034 }
2023 sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; 2035 sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
2024 mod->percpu = percpu; 2036 mod->percpu = percpu;
@@ -2282,6 +2294,9 @@ static noinline struct module *load_module(void __user *umod,
2282 free_percpu: 2294 free_percpu:
2283 if (percpu) 2295 if (percpu)
2284 percpu_modfree(percpu); 2296 percpu_modfree(percpu);
2297#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
2298 percpu_modfree(mod->refptr);
2299#endif
2285 free_mod: 2300 free_mod:
2286 kfree(args); 2301 kfree(args);
2287 free_hdr: 2302 free_hdr:
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index fa07da94d7be..2313a4cc14ea 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -230,6 +230,71 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
230 return 0; 230 return 0;
231} 231}
232 232
233void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
234{
235 struct sighand_struct *sighand;
236 struct signal_struct *sig;
237 struct task_struct *t;
238
239 *times = INIT_CPUTIME;
240
241 rcu_read_lock();
242 sighand = rcu_dereference(tsk->sighand);
243 if (!sighand)
244 goto out;
245
246 sig = tsk->signal;
247
248 t = tsk;
249 do {
250 times->utime = cputime_add(times->utime, t->utime);
251 times->stime = cputime_add(times->stime, t->stime);
252 times->sum_exec_runtime += t->se.sum_exec_runtime;
253
254 t = next_thread(t);
255 } while (t != tsk);
256
257 times->utime = cputime_add(times->utime, sig->utime);
258 times->stime = cputime_add(times->stime, sig->stime);
259 times->sum_exec_runtime += sig->sum_sched_runtime;
260out:
261 rcu_read_unlock();
262}
263
264static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
265{
266 if (cputime_gt(b->utime, a->utime))
267 a->utime = b->utime;
268
269 if (cputime_gt(b->stime, a->stime))
270 a->stime = b->stime;
271
272 if (b->sum_exec_runtime > a->sum_exec_runtime)
273 a->sum_exec_runtime = b->sum_exec_runtime;
274}
275
276void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
277{
278 struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
279 struct task_cputime sum;
280 unsigned long flags;
281
282 spin_lock_irqsave(&cputimer->lock, flags);
283 if (!cputimer->running) {
284 cputimer->running = 1;
285 /*
286 * The POSIX timer interface allows for absolute time expiry
287 * values through the TIMER_ABSTIME flag, therefore we have
288 * to synchronize the timer to the clock every time we start
289 * it.
290 */
291 thread_group_cputime(tsk, &sum);
292 update_gt_cputime(&cputimer->cputime, &sum);
293 }
294 *times = cputimer->cputime;
295 spin_unlock_irqrestore(&cputimer->lock, flags);
296}
297
233/* 298/*
234 * Sample a process (thread group) clock for the given group_leader task. 299 * Sample a process (thread group) clock for the given group_leader task.
235 * Must be called with tasklist_lock held for reading. 300 * Must be called with tasklist_lock held for reading.
@@ -457,7 +522,7 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
457{ 522{
458 struct task_cputime cputime; 523 struct task_cputime cputime;
459 524
460 thread_group_cputime(tsk, &cputime); 525 thread_group_cputimer(tsk, &cputime);
461 cleanup_timers(tsk->signal->cpu_timers, 526 cleanup_timers(tsk->signal->cpu_timers,
462 cputime.utime, cputime.stime, cputime.sum_exec_runtime); 527 cputime.utime, cputime.stime, cputime.sum_exec_runtime);
463} 528}
@@ -964,6 +1029,19 @@ static void check_thread_timers(struct task_struct *tsk,
964 } 1029 }
965} 1030}
966 1031
1032static void stop_process_timers(struct task_struct *tsk)
1033{
1034 struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
1035 unsigned long flags;
1036
1037 if (!cputimer->running)
1038 return;
1039
1040 spin_lock_irqsave(&cputimer->lock, flags);
1041 cputimer->running = 0;
1042 spin_unlock_irqrestore(&cputimer->lock, flags);
1043}
1044
967/* 1045/*
968 * Check for any per-thread CPU timers that have fired and move them 1046 * Check for any per-thread CPU timers that have fired and move them
969 * off the tsk->*_timers list onto the firing list. Per-thread timers 1047 * off the tsk->*_timers list onto the firing list. Per-thread timers
@@ -987,13 +1065,15 @@ static void check_process_timers(struct task_struct *tsk,
987 sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY && 1065 sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
988 list_empty(&timers[CPUCLOCK_VIRT]) && 1066 list_empty(&timers[CPUCLOCK_VIRT]) &&
989 cputime_eq(sig->it_virt_expires, cputime_zero) && 1067 cputime_eq(sig->it_virt_expires, cputime_zero) &&
990 list_empty(&timers[CPUCLOCK_SCHED])) 1068 list_empty(&timers[CPUCLOCK_SCHED])) {
1069 stop_process_timers(tsk);
991 return; 1070 return;
1071 }
992 1072
993 /* 1073 /*
994 * Collect the current process totals. 1074 * Collect the current process totals.
995 */ 1075 */
996 thread_group_cputime(tsk, &cputime); 1076 thread_group_cputimer(tsk, &cputime);
997 utime = cputime.utime; 1077 utime = cputime.utime;
998 ptime = cputime_add(utime, cputime.stime); 1078 ptime = cputime_add(utime, cputime.stime);
999 sum_sched_runtime = cputime.sum_exec_runtime; 1079 sum_sched_runtime = cputime.sum_exec_runtime;
@@ -1259,7 +1339,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
1259 if (!task_cputime_zero(&sig->cputime_expires)) { 1339 if (!task_cputime_zero(&sig->cputime_expires)) {
1260 struct task_cputime group_sample; 1340 struct task_cputime group_sample;
1261 1341
1262 thread_group_cputime(tsk, &group_sample); 1342 thread_group_cputimer(tsk, &group_sample);
1263 if (task_cputime_expired(&group_sample, &sig->cputime_expires)) 1343 if (task_cputime_expired(&group_sample, &sig->cputime_expires))
1264 return 1; 1344 return 1;
1265 } 1345 }
@@ -1329,6 +1409,33 @@ void run_posix_cpu_timers(struct task_struct *tsk)
1329} 1409}
1330 1410
1331/* 1411/*
1412 * Sample a process (thread group) timer for the given group_leader task.
1413 * Must be called with tasklist_lock held for reading.
1414 */
1415static int cpu_timer_sample_group(const clockid_t which_clock,
1416 struct task_struct *p,
1417 union cpu_time_count *cpu)
1418{
1419 struct task_cputime cputime;
1420
1421 thread_group_cputimer(p, &cputime);
1422 switch (CPUCLOCK_WHICH(which_clock)) {
1423 default:
1424 return -EINVAL;
1425 case CPUCLOCK_PROF:
1426 cpu->cpu = cputime_add(cputime.utime, cputime.stime);
1427 break;
1428 case CPUCLOCK_VIRT:
1429 cpu->cpu = cputime.utime;
1430 break;
1431 case CPUCLOCK_SCHED:
1432 cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
1433 break;
1434 }
1435 return 0;
1436}
1437
1438/*
1332 * Set one of the process-wide special case CPU timers. 1439 * Set one of the process-wide special case CPU timers.
1333 * The tsk->sighand->siglock must be held by the caller. 1440 * The tsk->sighand->siglock must be held by the caller.
1334 * The *newval argument is relative and we update it to be absolute, *oldval 1441 * The *newval argument is relative and we update it to be absolute, *oldval
@@ -1341,7 +1448,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1341 struct list_head *head; 1448 struct list_head *head;
1342 1449
1343 BUG_ON(clock_idx == CPUCLOCK_SCHED); 1450 BUG_ON(clock_idx == CPUCLOCK_SCHED);
1344 cpu_clock_sample_group(clock_idx, tsk, &now); 1451 cpu_timer_sample_group(clock_idx, tsk, &now);
1345 1452
1346 if (oldval) { 1453 if (oldval) {
1347 if (!cputime_eq(*oldval, cputime_zero)) { 1454 if (!cputime_eq(*oldval, cputime_zero)) {
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 239988873971..b4d219016b6c 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -57,16 +57,6 @@ int pm_notifier_call_chain(unsigned long val)
57#ifdef CONFIG_PM_DEBUG 57#ifdef CONFIG_PM_DEBUG
58int pm_test_level = TEST_NONE; 58int pm_test_level = TEST_NONE;
59 59
60static int suspend_test(int level)
61{
62 if (pm_test_level == level) {
63 printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n");
64 mdelay(5000);
65 return 1;
66 }
67 return 0;
68}
69
70static const char * const pm_tests[__TEST_AFTER_LAST] = { 60static const char * const pm_tests[__TEST_AFTER_LAST] = {
71 [TEST_NONE] = "none", 61 [TEST_NONE] = "none",
72 [TEST_CORE] = "core", 62 [TEST_CORE] = "core",
@@ -125,14 +115,24 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
125} 115}
126 116
127power_attr(pm_test); 117power_attr(pm_test);
128#else /* !CONFIG_PM_DEBUG */ 118#endif /* CONFIG_PM_DEBUG */
129static inline int suspend_test(int level) { return 0; }
130#endif /* !CONFIG_PM_DEBUG */
131 119
132#endif /* CONFIG_PM_SLEEP */ 120#endif /* CONFIG_PM_SLEEP */
133 121
134#ifdef CONFIG_SUSPEND 122#ifdef CONFIG_SUSPEND
135 123
124static int suspend_test(int level)
125{
126#ifdef CONFIG_PM_DEBUG
127 if (pm_test_level == level) {
128 printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n");
129 mdelay(5000);
130 return 1;
131 }
132#endif /* !CONFIG_PM_DEBUG */
133 return 0;
134}
135
136#ifdef CONFIG_PM_TEST_SUSPEND 136#ifdef CONFIG_PM_TEST_SUSPEND
137 137
138/* 138/*
diff --git a/kernel/profile.c b/kernel/profile.c
index 784933acf5b8..7724e0409bae 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -114,12 +114,15 @@ int __ref profile_init(void)
114 if (!slab_is_available()) { 114 if (!slab_is_available()) {
115 prof_buffer = alloc_bootmem(buffer_bytes); 115 prof_buffer = alloc_bootmem(buffer_bytes);
116 alloc_bootmem_cpumask_var(&prof_cpu_mask); 116 alloc_bootmem_cpumask_var(&prof_cpu_mask);
117 cpumask_copy(prof_cpu_mask, cpu_possible_mask);
117 return 0; 118 return 0;
118 } 119 }
119 120
120 if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL)) 121 if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
121 return -ENOMEM; 122 return -ENOMEM;
122 123
124 cpumask_copy(prof_cpu_mask, cpu_possible_mask);
125
123 prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL); 126 prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL);
124 if (prof_buffer) 127 if (prof_buffer)
125 return 0; 128 return 0;
diff --git a/kernel/sched.c b/kernel/sched.c
index 52bbf1c842a8..c1d0ed360088 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3880,19 +3880,24 @@ int select_nohz_load_balancer(int stop_tick)
3880 int cpu = smp_processor_id(); 3880 int cpu = smp_processor_id();
3881 3881
3882 if (stop_tick) { 3882 if (stop_tick) {
3883 cpumask_set_cpu(cpu, nohz.cpu_mask);
3884 cpu_rq(cpu)->in_nohz_recently = 1; 3883 cpu_rq(cpu)->in_nohz_recently = 1;
3885 3884
3886 /* 3885 if (!cpu_active(cpu)) {
3887 * If we are going offline and still the leader, give up! 3886 if (atomic_read(&nohz.load_balancer) != cpu)
3888 */ 3887 return 0;
3889 if (!cpu_active(cpu) && 3888
3890 atomic_read(&nohz.load_balancer) == cpu) { 3889 /*
3890 * If we are going offline and still the leader,
3891 * give up!
3892 */
3891 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) 3893 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
3892 BUG(); 3894 BUG();
3895
3893 return 0; 3896 return 0;
3894 } 3897 }
3895 3898
3899 cpumask_set_cpu(cpu, nohz.cpu_mask);
3900
3896 /* time for ilb owner also to sleep */ 3901 /* time for ilb owner also to sleep */
3897 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { 3902 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
3898 if (atomic_read(&nohz.load_balancer) == cpu) 3903 if (atomic_read(&nohz.load_balancer) == cpu)
@@ -4687,8 +4692,8 @@ EXPORT_SYMBOL(default_wake_function);
4687 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns 4692 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
4688 * zero in this (rare) case, and we handle it by continuing to scan the queue. 4693 * zero in this (rare) case, and we handle it by continuing to scan the queue.
4689 */ 4694 */
4690static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, 4695void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
4691 int nr_exclusive, int sync, void *key) 4696 int nr_exclusive, int sync, void *key)
4692{ 4697{
4693 wait_queue_t *curr, *next; 4698 wait_queue_t *curr, *next;
4694 4699
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5cc1c162044f..0566f2a03c42 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -719,7 +719,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
719 __enqueue_entity(cfs_rq, se); 719 __enqueue_entity(cfs_rq, se);
720} 720}
721 721
722static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) 722static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
723{ 723{
724 if (cfs_rq->last == se) 724 if (cfs_rq->last == se)
725 cfs_rq->last = NULL; 725 cfs_rq->last = NULL;
@@ -728,6 +728,12 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
728 cfs_rq->next = NULL; 728 cfs_rq->next = NULL;
729} 729}
730 730
731static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
732{
733 for_each_sched_entity(se)
734 __clear_buddies(cfs_rq_of(se), se);
735}
736
731static void 737static void
732dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) 738dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
733{ 739{
@@ -768,8 +774,14 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
768 774
769 ideal_runtime = sched_slice(cfs_rq, curr); 775 ideal_runtime = sched_slice(cfs_rq, curr);
770 delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; 776 delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
771 if (delta_exec > ideal_runtime) 777 if (delta_exec > ideal_runtime) {
772 resched_task(rq_of(cfs_rq)->curr); 778 resched_task(rq_of(cfs_rq)->curr);
779 /*
780 * The current task ran long enough, ensure it doesn't get
781 * re-elected due to buddy favours.
782 */
783 clear_buddies(cfs_rq, curr);
784 }
773} 785}
774 786
775static void 787static void
@@ -1452,6 +1464,11 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
1452 1464
1453 do { 1465 do {
1454 se = pick_next_entity(cfs_rq); 1466 se = pick_next_entity(cfs_rq);
1467 /*
1468 * If se was a buddy, clear it so that it will have to earn
1469 * the favour again.
1470 */
1471 __clear_buddies(cfs_rq, se);
1455 set_next_entity(cfs_rq, se); 1472 set_next_entity(cfs_rq, se);
1456 cfs_rq = group_cfs_rq(se); 1473 cfs_rq = group_cfs_rq(se);
1457 } while (cfs_rq); 1474 } while (cfs_rq);
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 954e1a81b796..bac1061cea2f 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -968,8 +968,8 @@ static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
968 if ((this_cpu != -1) && cpu_isset(this_cpu, *mask)) 968 if ((this_cpu != -1) && cpu_isset(this_cpu, *mask))
969 return this_cpu; 969 return this_cpu;
970 970
971 first = first_cpu(*mask); 971 first = cpumask_first(mask);
972 if (first != NR_CPUS) 972 if (first < nr_cpu_ids)
973 return first; 973 return first;
974 974
975 return -1; 975 return -1;
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 8ab0cef8ecab..a8f93dd374e1 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -296,19 +296,21 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
296static inline void account_group_user_time(struct task_struct *tsk, 296static inline void account_group_user_time(struct task_struct *tsk,
297 cputime_t cputime) 297 cputime_t cputime)
298{ 298{
299 struct task_cputime *times; 299 struct thread_group_cputimer *cputimer;
300 struct signal_struct *sig;
301 300
302 /* tsk == current, ensure it is safe to use ->signal */ 301 /* tsk == current, ensure it is safe to use ->signal */
303 if (unlikely(tsk->exit_state)) 302 if (unlikely(tsk->exit_state))
304 return; 303 return;
305 304
306 sig = tsk->signal; 305 cputimer = &tsk->signal->cputimer;
307 times = &sig->cputime.totals;
308 306
309 spin_lock(&times->lock); 307 if (!cputimer->running)
310 times->utime = cputime_add(times->utime, cputime); 308 return;
311 spin_unlock(&times->lock); 309
310 spin_lock(&cputimer->lock);
311 cputimer->cputime.utime =
312 cputime_add(cputimer->cputime.utime, cputime);
313 spin_unlock(&cputimer->lock);
312} 314}
313 315
314/** 316/**
@@ -324,19 +326,21 @@ static inline void account_group_user_time(struct task_struct *tsk,
324static inline void account_group_system_time(struct task_struct *tsk, 326static inline void account_group_system_time(struct task_struct *tsk,
325 cputime_t cputime) 327 cputime_t cputime)
326{ 328{
327 struct task_cputime *times; 329 struct thread_group_cputimer *cputimer;
328 struct signal_struct *sig;
329 330
330 /* tsk == current, ensure it is safe to use ->signal */ 331 /* tsk == current, ensure it is safe to use ->signal */
331 if (unlikely(tsk->exit_state)) 332 if (unlikely(tsk->exit_state))
332 return; 333 return;
333 334
334 sig = tsk->signal; 335 cputimer = &tsk->signal->cputimer;
335 times = &sig->cputime.totals; 336
337 if (!cputimer->running)
338 return;
336 339
337 spin_lock(&times->lock); 340 spin_lock(&cputimer->lock);
338 times->stime = cputime_add(times->stime, cputime); 341 cputimer->cputime.stime =
339 spin_unlock(&times->lock); 342 cputime_add(cputimer->cputime.stime, cputime);
343 spin_unlock(&cputimer->lock);
340} 344}
341 345
342/** 346/**
@@ -352,7 +356,7 @@ static inline void account_group_system_time(struct task_struct *tsk,
352static inline void account_group_exec_runtime(struct task_struct *tsk, 356static inline void account_group_exec_runtime(struct task_struct *tsk,
353 unsigned long long ns) 357 unsigned long long ns)
354{ 358{
355 struct task_cputime *times; 359 struct thread_group_cputimer *cputimer;
356 struct signal_struct *sig; 360 struct signal_struct *sig;
357 361
358 sig = tsk->signal; 362 sig = tsk->signal;
@@ -361,9 +365,12 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,
361 if (unlikely(!sig)) 365 if (unlikely(!sig))
362 return; 366 return;
363 367
364 times = &sig->cputime.totals; 368 cputimer = &sig->cputimer;
369
370 if (!cputimer->running)
371 return;
365 372
366 spin_lock(&times->lock); 373 spin_lock(&cputimer->lock);
367 times->sum_exec_runtime += ns; 374 cputimer->cputime.sum_exec_runtime += ns;
368 spin_unlock(&times->lock); 375 spin_unlock(&cputimer->lock);
369} 376}
diff --git a/kernel/signal.c b/kernel/signal.c
index b6b36768b758..2a74fe87c0dd 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1367,7 +1367,6 @@ int do_notify_parent(struct task_struct *tsk, int sig)
1367 struct siginfo info; 1367 struct siginfo info;
1368 unsigned long flags; 1368 unsigned long flags;
1369 struct sighand_struct *psig; 1369 struct sighand_struct *psig;
1370 struct task_cputime cputime;
1371 int ret = sig; 1370 int ret = sig;
1372 1371
1373 BUG_ON(sig == -1); 1372 BUG_ON(sig == -1);
@@ -1397,9 +1396,10 @@ int do_notify_parent(struct task_struct *tsk, int sig)
1397 info.si_uid = __task_cred(tsk)->uid; 1396 info.si_uid = __task_cred(tsk)->uid;
1398 rcu_read_unlock(); 1397 rcu_read_unlock();
1399 1398
1400 thread_group_cputime(tsk, &cputime); 1399 info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
1401 info.si_utime = cputime_to_jiffies(cputime.utime); 1400 tsk->signal->utime));
1402 info.si_stime = cputime_to_jiffies(cputime.stime); 1401 info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
1402 tsk->signal->stime));
1403 1403
1404 info.si_status = tsk->exit_code & 0x7f; 1404 info.si_status = tsk->exit_code & 0x7f;
1405 if (tsk->exit_code & 0x80) 1405 if (tsk->exit_code & 0x80)
diff --git a/kernel/sys.c b/kernel/sys.c
index e7dc0e10a485..f145c415bc16 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1525,22 +1525,14 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
1525 return -EINVAL; 1525 return -EINVAL;
1526 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) 1526 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1527 return -EFAULT; 1527 return -EFAULT;
1528 if (new_rlim.rlim_cur > new_rlim.rlim_max)
1529 return -EINVAL;
1528 old_rlim = current->signal->rlim + resource; 1530 old_rlim = current->signal->rlim + resource;
1529 if ((new_rlim.rlim_max > old_rlim->rlim_max) && 1531 if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
1530 !capable(CAP_SYS_RESOURCE)) 1532 !capable(CAP_SYS_RESOURCE))
1531 return -EPERM; 1533 return -EPERM;
1532 1534 if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
1533 if (resource == RLIMIT_NOFILE) { 1535 return -EPERM;
1534 if (new_rlim.rlim_max == RLIM_INFINITY)
1535 new_rlim.rlim_max = sysctl_nr_open;
1536 if (new_rlim.rlim_cur == RLIM_INFINITY)
1537 new_rlim.rlim_cur = sysctl_nr_open;
1538 if (new_rlim.rlim_max > sysctl_nr_open)
1539 return -EPERM;
1540 }
1541
1542 if (new_rlim.rlim_cur > new_rlim.rlim_max)
1543 return -EINVAL;
1544 1536
1545 retval = security_task_setrlimit(resource, &new_rlim); 1537 retval = security_task_setrlimit(resource, &new_rlim);
1546 if (retval) 1538 if (retval)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 790f9d785663..c5ef44ff850f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -101,6 +101,7 @@ static int two = 2;
101 101
102static int zero; 102static int zero;
103static int one = 1; 103static int one = 1;
104static unsigned long one_ul = 1;
104static int one_hundred = 100; 105static int one_hundred = 100;
105 106
106/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ 107/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
@@ -974,7 +975,7 @@ static struct ctl_table vm_table[] = {
974 .mode = 0644, 975 .mode = 0644,
975 .proc_handler = &dirty_background_bytes_handler, 976 .proc_handler = &dirty_background_bytes_handler,
976 .strategy = &sysctl_intvec, 977 .strategy = &sysctl_intvec,
977 .extra1 = &one, 978 .extra1 = &one_ul,
978 }, 979 },
979 { 980 {
980 .ctl_name = VM_DIRTY_RATIO, 981 .ctl_name = VM_DIRTY_RATIO,
@@ -995,7 +996,7 @@ static struct ctl_table vm_table[] = {
995 .mode = 0644, 996 .mode = 0644,
996 .proc_handler = &dirty_bytes_handler, 997 .proc_handler = &dirty_bytes_handler,
997 .strategy = &sysctl_intvec, 998 .strategy = &sysctl_intvec,
998 .extra1 = &one, 999 .extra1 = &one_ul,
999 }, 1000 },
1000 { 1001 {
1001 .procname = "dirty_writeback_centisecs", 1002 .procname = "dirty_writeback_centisecs",
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7dcf6e9f2b04..9a236ffe2aa4 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1737,9 +1737,12 @@ static void clear_ftrace_pid(struct pid *pid)
1737{ 1737{
1738 struct task_struct *p; 1738 struct task_struct *p;
1739 1739
1740 rcu_read_lock();
1740 do_each_pid_task(pid, PIDTYPE_PID, p) { 1741 do_each_pid_task(pid, PIDTYPE_PID, p) {
1741 clear_tsk_trace_trace(p); 1742 clear_tsk_trace_trace(p);
1742 } while_each_pid_task(pid, PIDTYPE_PID, p); 1743 } while_each_pid_task(pid, PIDTYPE_PID, p);
1744 rcu_read_unlock();
1745
1743 put_pid(pid); 1746 put_pid(pid);
1744} 1747}
1745 1748
@@ -1747,9 +1750,11 @@ static void set_ftrace_pid(struct pid *pid)
1747{ 1750{
1748 struct task_struct *p; 1751 struct task_struct *p;
1749 1752
1753 rcu_read_lock();
1750 do_each_pid_task(pid, PIDTYPE_PID, p) { 1754 do_each_pid_task(pid, PIDTYPE_PID, p) {
1751 set_tsk_trace_trace(p); 1755 set_tsk_trace_trace(p);
1752 } while_each_pid_task(pid, PIDTYPE_PID, p); 1756 } while_each_pid_task(pid, PIDTYPE_PID, p);
1757 rcu_read_unlock();
1753} 1758}
1754 1759
1755static void clear_ftrace_pid_task(struct pid **pid) 1760static void clear_ftrace_pid_task(struct pid **pid)
diff --git a/kernel/wait.c b/kernel/wait.c
index cd87131f2fc2..42a2dbc181c8 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -91,6 +91,15 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
91} 91}
92EXPORT_SYMBOL(prepare_to_wait_exclusive); 92EXPORT_SYMBOL(prepare_to_wait_exclusive);
93 93
94/*
95 * finish_wait - clean up after waiting in a queue
96 * @q: waitqueue waited on
97 * @wait: wait descriptor
98 *
99 * Sets current thread back to running state and removes
100 * the wait descriptor from the given waitqueue if still
101 * queued.
102 */
94void finish_wait(wait_queue_head_t *q, wait_queue_t *wait) 103void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
95{ 104{
96 unsigned long flags; 105 unsigned long flags;
@@ -117,6 +126,39 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
117} 126}
118EXPORT_SYMBOL(finish_wait); 127EXPORT_SYMBOL(finish_wait);
119 128
129/*
130 * abort_exclusive_wait - abort exclusive waiting in a queue
131 * @q: waitqueue waited on
132 * @wait: wait descriptor
133 * @state: runstate of the waiter to be woken
134 * @key: key to identify a wait bit queue or %NULL
135 *
136 * Sets current thread back to running state and removes
137 * the wait descriptor from the given waitqueue if still
138 * queued.
139 *
140 * Wakes up the next waiter if the caller is concurrently
141 * woken up through the queue.
142 *
143 * This prevents waiter starvation where an exclusive waiter
144 * aborts and is woken up concurrently and noone wakes up
145 * the next waiter.
146 */
147void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
148 unsigned int mode, void *key)
149{
150 unsigned long flags;
151
152 __set_current_state(TASK_RUNNING);
153 spin_lock_irqsave(&q->lock, flags);
154 if (!list_empty(&wait->task_list))
155 list_del_init(&wait->task_list);
156 else if (waitqueue_active(q))
157 __wake_up_common(q, mode, 1, 0, key);
158 spin_unlock_irqrestore(&q->lock, flags);
159}
160EXPORT_SYMBOL(abort_exclusive_wait);
161
120int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) 162int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
121{ 163{
122 int ret = default_wake_function(wait, mode, sync, key); 164 int ret = default_wake_function(wait, mode, sync, key);
@@ -177,17 +219,20 @@ int __sched
177__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, 219__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
178 int (*action)(void *), unsigned mode) 220 int (*action)(void *), unsigned mode)
179{ 221{
180 int ret = 0;
181
182 do { 222 do {
223 int ret;
224
183 prepare_to_wait_exclusive(wq, &q->wait, mode); 225 prepare_to_wait_exclusive(wq, &q->wait, mode);
184 if (test_bit(q->key.bit_nr, q->key.flags)) { 226 if (!test_bit(q->key.bit_nr, q->key.flags))
185 if ((ret = (*action)(q->key.flags))) 227 continue;
186 break; 228 ret = action(q->key.flags);
187 } 229 if (!ret)
230 continue;
231 abort_exclusive_wait(wq, &q->wait, mode, &q->key);
232 return ret;
188 } while (test_and_set_bit(q->key.bit_nr, q->key.flags)); 233 } while (test_and_set_bit(q->key.bit_nr, q->key.flags));
189 finish_wait(wq, &q->wait); 234 finish_wait(wq, &q->wait);
190 return ret; 235 return 0;
191} 236}
192EXPORT_SYMBOL(__wait_on_bit_lock); 237EXPORT_SYMBOL(__wait_on_bit_lock);
193 238