aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-12-05 12:27:46 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-12-05 12:27:46 -0500
commit7e1fb765c613298d861f80fa18af26df87a4ec19 (patch)
tree2c6d499decaf469bcc79a4873557146ead3bef9c
parentad658cec232771b11e95bb5f0d639d48f898a1f2 (diff)
parentcde898fa80a45bb23eab2a060fc79d0913081409 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched
* git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched: futex: correctly return -EFAULT not -EINVAL lockdep: in_range() fix lockdep: fix debug_show_all_locks() sched: style cleanups futex: fix for futex_wait signal stack corruption
-rw-r--r--include/linux/thread_info.h17
-rw-r--r--kernel/futex.c27
-rw-r--r--kernel/lockdep.c29
-rw-r--r--kernel/sched.c132
4 files changed, 114 insertions, 91 deletions
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 1c4eb41dbd89..9c4ad755d7e5 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -7,12 +7,25 @@
7#ifndef _LINUX_THREAD_INFO_H 7#ifndef _LINUX_THREAD_INFO_H
8#define _LINUX_THREAD_INFO_H 8#define _LINUX_THREAD_INFO_H
9 9
10#include <linux/types.h>
11
10/* 12/*
11 * System call restart block. 13 * System call restart block.
12 */ 14 */
13struct restart_block { 15struct restart_block {
14 long (*fn)(struct restart_block *); 16 long (*fn)(struct restart_block *);
15 unsigned long arg0, arg1, arg2, arg3; 17 union {
18 struct {
19 unsigned long arg0, arg1, arg2, arg3;
20 };
21 /* For futex_wait */
22 struct {
23 u32 *uaddr;
24 u32 val;
25 u32 flags;
26 u64 time;
27 } futex;
28 };
16}; 29};
17 30
18extern long do_no_restart_syscall(struct restart_block *parm); 31extern long do_no_restart_syscall(struct restart_block *parm);
diff --git a/kernel/futex.c b/kernel/futex.c
index 9dc591ab681a..172a1aeeafdb 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -658,7 +658,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
658 658
659 if (curval == -EFAULT) 659 if (curval == -EFAULT)
660 ret = -EFAULT; 660 ret = -EFAULT;
661 if (curval != uval) 661 else if (curval != uval)
662 ret = -EINVAL; 662 ret = -EINVAL;
663 if (ret) { 663 if (ret) {
664 spin_unlock(&pi_state->pi_mutex.wait_lock); 664 spin_unlock(&pi_state->pi_mutex.wait_lock);
@@ -1149,9 +1149,9 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1149 1149
1150/* 1150/*
1151 * In case we must use restart_block to restart a futex_wait, 1151 * In case we must use restart_block to restart a futex_wait,
1152 * we encode in the 'arg3' shared capability 1152 * we encode in the 'flags' shared capability
1153 */ 1153 */
1154#define ARG3_SHARED 1 1154#define FLAGS_SHARED 1
1155 1155
1156static long futex_wait_restart(struct restart_block *restart); 1156static long futex_wait_restart(struct restart_block *restart);
1157 1157
@@ -1290,12 +1290,13 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1290 struct restart_block *restart; 1290 struct restart_block *restart;
1291 restart = &current_thread_info()->restart_block; 1291 restart = &current_thread_info()->restart_block;
1292 restart->fn = futex_wait_restart; 1292 restart->fn = futex_wait_restart;
1293 restart->arg0 = (unsigned long)uaddr; 1293 restart->futex.uaddr = (u32 *)uaddr;
1294 restart->arg1 = (unsigned long)val; 1294 restart->futex.val = val;
1295 restart->arg2 = (unsigned long)abs_time; 1295 restart->futex.time = abs_time->tv64;
1296 restart->arg3 = 0; 1296 restart->futex.flags = 0;
1297
1297 if (fshared) 1298 if (fshared)
1298 restart->arg3 |= ARG3_SHARED; 1299 restart->futex.flags |= FLAGS_SHARED;
1299 return -ERESTART_RESTARTBLOCK; 1300 return -ERESTART_RESTARTBLOCK;
1300 } 1301 }
1301 1302
@@ -1310,15 +1311,15 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1310 1311
1311static long futex_wait_restart(struct restart_block *restart) 1312static long futex_wait_restart(struct restart_block *restart)
1312{ 1313{
1313 u32 __user *uaddr = (u32 __user *)restart->arg0; 1314 u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
1314 u32 val = (u32)restart->arg1;
1315 ktime_t *abs_time = (ktime_t *)restart->arg2;
1316 struct rw_semaphore *fshared = NULL; 1315 struct rw_semaphore *fshared = NULL;
1316 ktime_t t;
1317 1317
1318 t.tv64 = restart->futex.time;
1318 restart->fn = do_no_restart_syscall; 1319 restart->fn = do_no_restart_syscall;
1319 if (restart->arg3 & ARG3_SHARED) 1320 if (restart->futex.flags & FLAGS_SHARED)
1320 fshared = &current->mm->mmap_sem; 1321 fshared = &current->mm->mmap_sem;
1321 return (long)futex_wait(uaddr, fshared, val, abs_time); 1322 return (long)futex_wait(uaddr, fshared, restart->futex.val, &t);
1322} 1323}
1323 1324
1324 1325
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index ed38bbfc48a3..0f389621bb6b 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3054,11 +3054,6 @@ void __init lockdep_info(void)
3054#endif 3054#endif
3055} 3055}
3056 3056
3057static inline int in_range(const void *start, const void *addr, const void *end)
3058{
3059 return addr >= start && addr <= end;
3060}
3061
3062static void 3057static void
3063print_freed_lock_bug(struct task_struct *curr, const void *mem_from, 3058print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
3064 const void *mem_to, struct held_lock *hlock) 3059 const void *mem_to, struct held_lock *hlock)
@@ -3080,6 +3075,13 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
3080 dump_stack(); 3075 dump_stack();
3081} 3076}
3082 3077
3078static inline int not_in_range(const void* mem_from, unsigned long mem_len,
3079 const void* lock_from, unsigned long lock_len)
3080{
3081 return lock_from + lock_len <= mem_from ||
3082 mem_from + mem_len <= lock_from;
3083}
3084
3083/* 3085/*
3084 * Called when kernel memory is freed (or unmapped), or if a lock 3086 * Called when kernel memory is freed (or unmapped), or if a lock
3085 * is destroyed or reinitialized - this code checks whether there is 3087 * is destroyed or reinitialized - this code checks whether there is
@@ -3087,7 +3089,6 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
3087 */ 3089 */
3088void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) 3090void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
3089{ 3091{
3090 const void *mem_to = mem_from + mem_len, *lock_from, *lock_to;
3091 struct task_struct *curr = current; 3092 struct task_struct *curr = current;
3092 struct held_lock *hlock; 3093 struct held_lock *hlock;
3093 unsigned long flags; 3094 unsigned long flags;
@@ -3100,14 +3101,11 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
3100 for (i = 0; i < curr->lockdep_depth; i++) { 3101 for (i = 0; i < curr->lockdep_depth; i++) {
3101 hlock = curr->held_locks + i; 3102 hlock = curr->held_locks + i;
3102 3103
3103 lock_from = (void *)hlock->instance; 3104 if (not_in_range(mem_from, mem_len, hlock->instance,
3104 lock_to = (void *)(hlock->instance + 1); 3105 sizeof(*hlock->instance)))
3105
3106 if (!in_range(mem_from, lock_from, mem_to) &&
3107 !in_range(mem_from, lock_to, mem_to))
3108 continue; 3106 continue;
3109 3107
3110 print_freed_lock_bug(curr, mem_from, mem_to, hlock); 3108 print_freed_lock_bug(curr, mem_from, mem_from + mem_len, hlock);
3111 break; 3109 break;
3112 } 3110 }
3113 local_irq_restore(flags); 3111 local_irq_restore(flags);
@@ -3173,6 +3171,13 @@ retry:
3173 printk(" locked it.\n"); 3171 printk(" locked it.\n");
3174 3172
3175 do_each_thread(g, p) { 3173 do_each_thread(g, p) {
3174 /*
3175 * It's not reliable to print a task's held locks
3176 * if it's not sleeping (or if it's not the current
3177 * task):
3178 */
3179 if (p->state == TASK_RUNNING && p != current)
3180 continue;
3176 if (p->lockdep_depth) 3181 if (p->lockdep_depth)
3177 lockdep_print_held_locks(p); 3182 lockdep_print_held_locks(p);
3178 if (!unlock) 3183 if (!unlock)
diff --git a/kernel/sched.c b/kernel/sched.c
index b062856b946c..67d9d1799d86 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -209,9 +209,8 @@ static inline struct task_group *task_group(struct task_struct *p)
209 tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), 209 tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
210 struct task_group, css); 210 struct task_group, css);
211#else 211#else
212 tg = &init_task_group; 212 tg = &init_task_group;
213#endif 213#endif
214
215 return tg; 214 return tg;
216} 215}
217 216
@@ -249,15 +248,16 @@ struct cfs_rq {
249#ifdef CONFIG_FAIR_GROUP_SCHED 248#ifdef CONFIG_FAIR_GROUP_SCHED
250 struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ 249 struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
251 250
252 /* leaf cfs_rqs are those that hold tasks (lowest schedulable entity in 251 /*
252 * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
253 * a hierarchy). Non-leaf lrqs hold other higher schedulable entities 253 * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
254 * (like users, containers etc.) 254 * (like users, containers etc.)
255 * 255 *
256 * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This 256 * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
257 * list is used during load balance. 257 * list is used during load balance.
258 */ 258 */
259 struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */ 259 struct list_head leaf_cfs_rq_list;
260 struct task_group *tg; /* group that "owns" this runqueue */ 260 struct task_group *tg; /* group that "owns" this runqueue */
261#endif 261#endif
262}; 262};
263 263
@@ -300,7 +300,7 @@ struct rq {
300 /* list of leaf cfs_rq on this cpu: */ 300 /* list of leaf cfs_rq on this cpu: */
301 struct list_head leaf_cfs_rq_list; 301 struct list_head leaf_cfs_rq_list;
302#endif 302#endif
303 struct rt_rq rt; 303 struct rt_rq rt;
304 304
305 /* 305 /*
306 * This is part of a global counter where only the total sum 306 * This is part of a global counter where only the total sum
@@ -457,8 +457,8 @@ enum {
457 SCHED_FEAT_NEW_FAIR_SLEEPERS = 1, 457 SCHED_FEAT_NEW_FAIR_SLEEPERS = 1,
458 SCHED_FEAT_WAKEUP_PREEMPT = 2, 458 SCHED_FEAT_WAKEUP_PREEMPT = 2,
459 SCHED_FEAT_START_DEBIT = 4, 459 SCHED_FEAT_START_DEBIT = 4,
460 SCHED_FEAT_TREE_AVG = 8, 460 SCHED_FEAT_TREE_AVG = 8,
461 SCHED_FEAT_APPROX_AVG = 16, 461 SCHED_FEAT_APPROX_AVG = 16,
462}; 462};
463 463
464const_debug unsigned int sysctl_sched_features = 464const_debug unsigned int sysctl_sched_features =
@@ -591,7 +591,7 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
591 591
592/* 592/*
593 * task_rq_lock - lock the runqueue a given task resides on and disable 593 * task_rq_lock - lock the runqueue a given task resides on and disable
594 * interrupts. Note the ordering: we can safely lookup the task_rq without 594 * interrupts. Note the ordering: we can safely lookup the task_rq without
595 * explicitly disabling preemption. 595 * explicitly disabling preemption.
596 */ 596 */
597static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) 597static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
@@ -779,7 +779,7 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
779 * To aid in avoiding the subversion of "niceness" due to uneven distribution 779 * To aid in avoiding the subversion of "niceness" due to uneven distribution
780 * of tasks with abnormal "nice" values across CPUs the contribution that 780 * of tasks with abnormal "nice" values across CPUs the contribution that
781 * each task makes to its run queue's load is weighted according to its 781 * each task makes to its run queue's load is weighted according to its
782 * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a 782 * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
783 * scaled version of the new time slice allocation that they receive on time 783 * scaled version of the new time slice allocation that they receive on time
784 * slice expiry etc. 784 * slice expiry etc.
785 */ 785 */
@@ -1854,7 +1854,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
1854 * and do any other architecture-specific cleanup actions. 1854 * and do any other architecture-specific cleanup actions.
1855 * 1855 *
1856 * Note that we may have delayed dropping an mm in context_switch(). If 1856 * Note that we may have delayed dropping an mm in context_switch(). If
1857 * so, we finish that here outside of the runqueue lock. (Doing it 1857 * so, we finish that here outside of the runqueue lock. (Doing it
1858 * with the lock held can cause deadlocks; see schedule() for 1858 * with the lock held can cause deadlocks; see schedule() for
1859 * details.) 1859 * details.)
1860 */ 1860 */
@@ -2136,7 +2136,7 @@ static void double_lock_balance(struct rq *this_rq, struct rq *busiest)
2136/* 2136/*
2137 * If dest_cpu is allowed for this process, migrate the task to it. 2137 * If dest_cpu is allowed for this process, migrate the task to it.
2138 * This is accomplished by forcing the cpu_allowed mask to only 2138 * This is accomplished by forcing the cpu_allowed mask to only
2139 * allow dest_cpu, which will force the cpu onto dest_cpu. Then 2139 * allow dest_cpu, which will force the cpu onto dest_cpu. Then
2140 * the cpu_allowed mask is restored. 2140 * the cpu_allowed mask is restored.
2141 */ 2141 */
2142static void sched_migrate_task(struct task_struct *p, int dest_cpu) 2142static void sched_migrate_task(struct task_struct *p, int dest_cpu)
@@ -2581,7 +2581,7 @@ group_next:
2581 * tasks around. Thus we look for the minimum possible imbalance. 2581 * tasks around. Thus we look for the minimum possible imbalance.
2582 * Negative imbalances (*we* are more loaded than anyone else) will 2582 * Negative imbalances (*we* are more loaded than anyone else) will
2583 * be counted as no imbalance for these purposes -- we can't fix that 2583 * be counted as no imbalance for these purposes -- we can't fix that
2584 * by pulling tasks to us. Be careful of negative numbers as they'll 2584 * by pulling tasks to us. Be careful of negative numbers as they'll
2585 * appear as very large values with unsigned longs. 2585 * appear as very large values with unsigned longs.
2586 */ 2586 */
2587 if (max_load <= busiest_load_per_task) 2587 if (max_load <= busiest_load_per_task)
@@ -3016,7 +3016,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
3016 3016
3017 /* 3017 /*
3018 * This condition is "impossible", if it occurs 3018 * This condition is "impossible", if it occurs
3019 * we need to fix it. Originally reported by 3019 * we need to fix it. Originally reported by
3020 * Bjorn Helgaas on a 128-cpu setup. 3020 * Bjorn Helgaas on a 128-cpu setup.
3021 */ 3021 */
3022 BUG_ON(busiest_rq == target_rq); 3022 BUG_ON(busiest_rq == target_rq);
@@ -3048,7 +3048,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
3048#ifdef CONFIG_NO_HZ 3048#ifdef CONFIG_NO_HZ
3049static struct { 3049static struct {
3050 atomic_t load_balancer; 3050 atomic_t load_balancer;
3051 cpumask_t cpu_mask; 3051 cpumask_t cpu_mask;
3052} nohz ____cacheline_aligned = { 3052} nohz ____cacheline_aligned = {
3053 .load_balancer = ATOMIC_INIT(-1), 3053 .load_balancer = ATOMIC_INIT(-1),
3054 .cpu_mask = CPU_MASK_NONE, 3054 .cpu_mask = CPU_MASK_NONE,
@@ -3552,7 +3552,7 @@ static noinline void __schedule_bug(struct task_struct *prev)
3552static inline void schedule_debug(struct task_struct *prev) 3552static inline void schedule_debug(struct task_struct *prev)
3553{ 3553{
3554 /* 3554 /*
3555 * Test if we are atomic. Since do_exit() needs to call into 3555 * Test if we are atomic. Since do_exit() needs to call into
3556 * schedule() atomically, we ignore that path for now. 3556 * schedule() atomically, we ignore that path for now.
3557 * Otherwise, whine if we are scheduling when we should not be. 3557 * Otherwise, whine if we are scheduling when we should not be.
3558 */ 3558 */
@@ -3674,7 +3674,7 @@ EXPORT_SYMBOL(schedule);
3674#ifdef CONFIG_PREEMPT 3674#ifdef CONFIG_PREEMPT
3675/* 3675/*
3676 * this is the entry point to schedule() from in-kernel preemption 3676 * this is the entry point to schedule() from in-kernel preemption
3677 * off of preempt_enable. Kernel preemptions off return from interrupt 3677 * off of preempt_enable. Kernel preemptions off return from interrupt
3678 * occur there and call schedule directly. 3678 * occur there and call schedule directly.
3679 */ 3679 */
3680asmlinkage void __sched preempt_schedule(void) 3680asmlinkage void __sched preempt_schedule(void)
@@ -3686,7 +3686,7 @@ asmlinkage void __sched preempt_schedule(void)
3686#endif 3686#endif
3687 /* 3687 /*
3688 * If there is a non-zero preempt_count or interrupts are disabled, 3688 * If there is a non-zero preempt_count or interrupts are disabled,
3689 * we do not want to preempt the current task. Just return.. 3689 * we do not want to preempt the current task. Just return..
3690 */ 3690 */
3691 if (likely(ti->preempt_count || irqs_disabled())) 3691 if (likely(ti->preempt_count || irqs_disabled()))
3692 return; 3692 return;
@@ -3772,12 +3772,12 @@ int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
3772EXPORT_SYMBOL(default_wake_function); 3772EXPORT_SYMBOL(default_wake_function);
3773 3773
3774/* 3774/*
3775 * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just 3775 * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
3776 * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve 3776 * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
3777 * number) then we wake all the non-exclusive tasks and one exclusive task. 3777 * number) then we wake all the non-exclusive tasks and one exclusive task.
3778 * 3778 *
3779 * There are circumstances in which we can try to wake a task which has already 3779 * There are circumstances in which we can try to wake a task which has already
3780 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns 3780 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
3781 * zero in this (rare) case, and we handle it by continuing to scan the queue. 3781 * zero in this (rare) case, and we handle it by continuing to scan the queue.
3782 */ 3782 */
3783static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, 3783static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
@@ -4390,8 +4390,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
4390 * @policy: new policy. 4390 * @policy: new policy.
4391 * @param: structure containing the new RT priority. 4391 * @param: structure containing the new RT priority.
4392 */ 4392 */
4393asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, 4393asmlinkage long
4394 struct sched_param __user *param) 4394sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
4395{ 4395{
4396 /* negative values for policy are not valid */ 4396 /* negative values for policy are not valid */
4397 if (policy < 0) 4397 if (policy < 0)
@@ -4491,7 +4491,7 @@ long sched_setaffinity(pid_t pid, cpumask_t new_mask)
4491 4491
4492 /* 4492 /*
4493 * It is not safe to call set_cpus_allowed with the 4493 * It is not safe to call set_cpus_allowed with the
4494 * tasklist_lock held. We will bump the task_struct's 4494 * tasklist_lock held. We will bump the task_struct's
4495 * usage count and then drop tasklist_lock. 4495 * usage count and then drop tasklist_lock.
4496 */ 4496 */
4497 get_task_struct(p); 4497 get_task_struct(p);
@@ -4687,7 +4687,7 @@ EXPORT_SYMBOL(cond_resched);
4687 * cond_resched_lock() - if a reschedule is pending, drop the given lock, 4687 * cond_resched_lock() - if a reschedule is pending, drop the given lock,
4688 * call schedule, and on return reacquire the lock. 4688 * call schedule, and on return reacquire the lock.
4689 * 4689 *
4690 * This works OK both with and without CONFIG_PREEMPT. We do strange low-level 4690 * This works OK both with and without CONFIG_PREEMPT. We do strange low-level
4691 * operations here to prevent schedule() from being called twice (once via 4691 * operations here to prevent schedule() from being called twice (once via
4692 * spin_unlock(), once by hand). 4692 * spin_unlock(), once by hand).
4693 */ 4693 */
@@ -4741,7 +4741,7 @@ void __sched yield(void)
4741EXPORT_SYMBOL(yield); 4741EXPORT_SYMBOL(yield);
4742 4742
4743/* 4743/*
4744 * This task is about to go to sleep on IO. Increment rq->nr_iowait so 4744 * This task is about to go to sleep on IO. Increment rq->nr_iowait so
4745 * that process accounting knows that this is a task in IO wait state. 4745 * that process accounting knows that this is a task in IO wait state.
4746 * 4746 *
4747 * But don't do that if it is a deliberate, throttling IO wait (this task 4747 * But don't do that if it is a deliberate, throttling IO wait (this task
@@ -5050,7 +5050,7 @@ static inline void sched_init_granularity(void)
5050 * is removed from the allowed bitmask. 5050 * is removed from the allowed bitmask.
5051 * 5051 *
5052 * NOTE: the caller must have a valid reference to the task, the 5052 * NOTE: the caller must have a valid reference to the task, the
5053 * task must not exit() & deallocate itself prematurely. The 5053 * task must not exit() & deallocate itself prematurely. The
5054 * call is not atomic; no spinlocks may be held. 5054 * call is not atomic; no spinlocks may be held.
5055 */ 5055 */
5056int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) 5056int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
@@ -5087,7 +5087,7 @@ out:
5087EXPORT_SYMBOL_GPL(set_cpus_allowed); 5087EXPORT_SYMBOL_GPL(set_cpus_allowed);
5088 5088
5089/* 5089/*
5090 * Move (not current) task off this cpu, onto dest cpu. We're doing 5090 * Move (not current) task off this cpu, onto dest cpu. We're doing
5091 * this because either it can't run here any more (set_cpus_allowed() 5091 * this because either it can't run here any more (set_cpus_allowed()
5092 * away from this CPU, or CPU going down), or because we're 5092 * away from this CPU, or CPU going down), or because we're
5093 * attempting to rebalance this task on exec (sched_exec). 5093 * attempting to rebalance this task on exec (sched_exec).
@@ -5232,7 +5232,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
5232 * Try to stay on the same cpuset, where the 5232 * Try to stay on the same cpuset, where the
5233 * current cpuset may be a subset of all cpus. 5233 * current cpuset may be a subset of all cpus.
5234 * The cpuset_cpus_allowed_locked() variant of 5234 * The cpuset_cpus_allowed_locked() variant of
5235 * cpuset_cpus_allowed() will not block. It must be 5235 * cpuset_cpus_allowed() will not block. It must be
5236 * called within calls to cpuset_lock/cpuset_unlock. 5236 * called within calls to cpuset_lock/cpuset_unlock.
5237 */ 5237 */
5238 rq = task_rq_lock(p, &flags); 5238 rq = task_rq_lock(p, &flags);
@@ -5245,10 +5245,11 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
5245 * kernel threads (both mm NULL), since they never 5245 * kernel threads (both mm NULL), since they never
5246 * leave kernel. 5246 * leave kernel.
5247 */ 5247 */
5248 if (p->mm && printk_ratelimit()) 5248 if (p->mm && printk_ratelimit()) {
5249 printk(KERN_INFO "process %d (%s) no " 5249 printk(KERN_INFO "process %d (%s) no "
5250 "longer affine to cpu%d\n", 5250 "longer affine to cpu%d\n",
5251 task_pid_nr(p), p->comm, dead_cpu); 5251 task_pid_nr(p), p->comm, dead_cpu);
5252 }
5252 } 5253 }
5253 } while (!__migrate_task_irq(p, dead_cpu, dest_cpu)); 5254 } while (!__migrate_task_irq(p, dead_cpu, dest_cpu));
5254} 5255}
@@ -5350,7 +5351,7 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
5350 5351
5351 /* 5352 /*
5352 * Drop lock around migration; if someone else moves it, 5353 * Drop lock around migration; if someone else moves it,
5353 * that's OK. No task can be added to this CPU, so iteration is 5354 * that's OK. No task can be added to this CPU, so iteration is
5354 * fine. 5355 * fine.
5355 */ 5356 */
5356 spin_unlock_irq(&rq->lock); 5357 spin_unlock_irq(&rq->lock);
@@ -5414,7 +5415,7 @@ static void sd_free_ctl_entry(struct ctl_table **tablep)
5414 /* 5415 /*
5415 * In the intermediate directories, both the child directory and 5416 * In the intermediate directories, both the child directory and
5416 * procname are dynamically allocated and could fail but the mode 5417 * procname are dynamically allocated and could fail but the mode
5417 * will always be set. In the lowest directory the names are 5418 * will always be set. In the lowest directory the names are
5418 * static strings and all have proc handlers. 5419 * static strings and all have proc handlers.
5419 */ 5420 */
5420 for (entry = *tablep; entry->mode; entry++) { 5421 for (entry = *tablep; entry->mode; entry++) {
@@ -5585,7 +5586,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5585 case CPU_UP_CANCELED_FROZEN: 5586 case CPU_UP_CANCELED_FROZEN:
5586 if (!cpu_rq(cpu)->migration_thread) 5587 if (!cpu_rq(cpu)->migration_thread)
5587 break; 5588 break;
5588 /* Unbind it from offline cpu so it can run. Fall thru. */ 5589 /* Unbind it from offline cpu so it can run. Fall thru. */
5589 kthread_bind(cpu_rq(cpu)->migration_thread, 5590 kthread_bind(cpu_rq(cpu)->migration_thread,
5590 any_online_cpu(cpu_online_map)); 5591 any_online_cpu(cpu_online_map));
5591 kthread_stop(cpu_rq(cpu)->migration_thread); 5592 kthread_stop(cpu_rq(cpu)->migration_thread);
@@ -5612,9 +5613,11 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5612 migrate_nr_uninterruptible(rq); 5613 migrate_nr_uninterruptible(rq);
5613 BUG_ON(rq->nr_running != 0); 5614 BUG_ON(rq->nr_running != 0);
5614 5615
5615 /* No need to migrate the tasks: it was best-effort if 5616 /*
5616 * they didn't take sched_hotcpu_mutex. Just wake up 5617 * No need to migrate the tasks: it was best-effort if
5617 * the requestors. */ 5618 * they didn't take sched_hotcpu_mutex. Just wake up
5619 * the requestors.
5620 */
5618 spin_lock_irq(&rq->lock); 5621 spin_lock_irq(&rq->lock);
5619 while (!list_empty(&rq->migration_queue)) { 5622 while (!list_empty(&rq->migration_queue)) {
5620 struct migration_req *req; 5623 struct migration_req *req;
@@ -5922,7 +5925,7 @@ init_sched_build_groups(cpumask_t span, const cpumask_t *cpu_map,
5922 * @node: node whose sched_domain we're building 5925 * @node: node whose sched_domain we're building
5923 * @used_nodes: nodes already in the sched_domain 5926 * @used_nodes: nodes already in the sched_domain
5924 * 5927 *
5925 * Find the next node to include in a given scheduling domain. Simply 5928 * Find the next node to include in a given scheduling domain. Simply
5926 * finds the closest node not already in the @used_nodes map. 5929 * finds the closest node not already in the @used_nodes map.
5927 * 5930 *
5928 * Should use nodemask_t. 5931 * Should use nodemask_t.
@@ -5962,7 +5965,7 @@ static int find_next_best_node(int node, unsigned long *used_nodes)
5962 * @node: node whose cpumask we're constructing 5965 * @node: node whose cpumask we're constructing
5963 * @size: number of nodes to include in this span 5966 * @size: number of nodes to include in this span
5964 * 5967 *
5965 * Given a node, construct a good cpumask for its sched_domain to span. It 5968 * Given a node, construct a good cpumask for its sched_domain to span. It
5966 * should be one that prevents unnecessary balancing, but also spreads tasks 5969 * should be one that prevents unnecessary balancing, but also spreads tasks
5967 * out optimally. 5970 * out optimally.
5968 */ 5971 */
@@ -5999,8 +6002,8 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
5999static DEFINE_PER_CPU(struct sched_domain, cpu_domains); 6002static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
6000static DEFINE_PER_CPU(struct sched_group, sched_group_cpus); 6003static DEFINE_PER_CPU(struct sched_group, sched_group_cpus);
6001 6004
6002static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, 6005static int
6003 struct sched_group **sg) 6006cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg)
6004{ 6007{
6005 if (sg) 6008 if (sg)
6006 *sg = &per_cpu(sched_group_cpus, cpu); 6009 *sg = &per_cpu(sched_group_cpus, cpu);
@@ -6017,8 +6020,8 @@ static DEFINE_PER_CPU(struct sched_group, sched_group_core);
6017#endif 6020#endif
6018 6021
6019#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) 6022#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
6020static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map, 6023static int
6021 struct sched_group **sg) 6024cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg)
6022{ 6025{
6023 int group; 6026 int group;
6024 cpumask_t mask = per_cpu(cpu_sibling_map, cpu); 6027 cpumask_t mask = per_cpu(cpu_sibling_map, cpu);
@@ -6029,8 +6032,8 @@ static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map,
6029 return group; 6032 return group;
6030} 6033}
6031#elif defined(CONFIG_SCHED_MC) 6034#elif defined(CONFIG_SCHED_MC)
6032static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map, 6035static int
6033 struct sched_group **sg) 6036cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg)
6034{ 6037{
6035 if (sg) 6038 if (sg)
6036 *sg = &per_cpu(sched_group_core, cpu); 6039 *sg = &per_cpu(sched_group_core, cpu);
@@ -6041,8 +6044,8 @@ static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map,
6041static DEFINE_PER_CPU(struct sched_domain, phys_domains); 6044static DEFINE_PER_CPU(struct sched_domain, phys_domains);
6042static DEFINE_PER_CPU(struct sched_group, sched_group_phys); 6045static DEFINE_PER_CPU(struct sched_group, sched_group_phys);
6043 6046
6044static int cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, 6047static int
6045 struct sched_group **sg) 6048cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg)
6046{ 6049{
6047 int group; 6050 int group;
6048#ifdef CONFIG_SCHED_MC 6051#ifdef CONFIG_SCHED_MC
@@ -6222,7 +6225,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6222 * Allocate the per-node list of sched groups 6225 * Allocate the per-node list of sched groups
6223 */ 6226 */
6224 sched_group_nodes = kcalloc(MAX_NUMNODES, sizeof(struct sched_group *), 6227 sched_group_nodes = kcalloc(MAX_NUMNODES, sizeof(struct sched_group *),
6225 GFP_KERNEL); 6228 GFP_KERNEL);
6226 if (!sched_group_nodes) { 6229 if (!sched_group_nodes) {
6227 printk(KERN_WARNING "Can not alloc sched group node list\n"); 6230 printk(KERN_WARNING "Can not alloc sched group node list\n");
6228 return -ENOMEM; 6231 return -ENOMEM;
@@ -6469,7 +6472,7 @@ static int ndoms_cur; /* number of sched domains in 'doms_cur' */
6469static cpumask_t fallback_doms; 6472static cpumask_t fallback_doms;
6470 6473
6471/* 6474/*
6472 * Set up scheduler domains and groups. Callers must hold the hotplug lock. 6475 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
6473 * For now this just excludes isolated cpus, but could be used to 6476 * For now this just excludes isolated cpus, but could be used to
6474 * exclude other special cases in the future. 6477 * exclude other special cases in the future.
6475 */ 6478 */
@@ -6511,19 +6514,19 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
6511 6514
6512/* 6515/*
6513 * Partition sched domains as specified by the 'ndoms_new' 6516 * Partition sched domains as specified by the 'ndoms_new'
6514 * cpumasks in the array doms_new[] of cpumasks. This compares 6517 * cpumasks in the array doms_new[] of cpumasks. This compares
6515 * doms_new[] to the current sched domain partitioning, doms_cur[]. 6518 * doms_new[] to the current sched domain partitioning, doms_cur[].
6516 * It destroys each deleted domain and builds each new domain. 6519 * It destroys each deleted domain and builds each new domain.
6517 * 6520 *
6518 * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'. 6521 * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'.
6519 * The masks don't intersect (don't overlap.) We should setup one 6522 * The masks don't intersect (don't overlap.) We should setup one
6520 * sched domain for each mask. CPUs not in any of the cpumasks will 6523 * sched domain for each mask. CPUs not in any of the cpumasks will
6521 * not be load balanced. If the same cpumask appears both in the 6524 * not be load balanced. If the same cpumask appears both in the
6522 * current 'doms_cur' domains and in the new 'doms_new', we can leave 6525 * current 'doms_cur' domains and in the new 'doms_new', we can leave
6523 * it as it is. 6526 * it as it is.
6524 * 6527 *
6525 * The passed in 'doms_new' should be kmalloc'd. This routine takes 6528 * The passed in 'doms_new' should be kmalloc'd. This routine takes
6526 * ownership of it and will kfree it when done with it. If the caller 6529 * ownership of it and will kfree it when done with it. If the caller
6527 * failed the kmalloc call, then it can pass in doms_new == NULL, 6530 * failed the kmalloc call, then it can pass in doms_new == NULL,
6528 * and partition_sched_domains() will fallback to the single partition 6531 * and partition_sched_domains() will fallback to the single partition
6529 * 'fallback_doms'. 6532 * 'fallback_doms'.
@@ -6653,7 +6656,7 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
6653#endif 6656#endif
6654 6657
6655/* 6658/*
6656 * Force a reinitialization of the sched domains hierarchy. The domains 6659 * Force a reinitialization of the sched domains hierarchy. The domains
6657 * and groups cannot be updated in place without racing with the balancing 6660 * and groups cannot be updated in place without racing with the balancing
6658 * code, so we temporarily attach all running cpus to the NULL domain 6661 * code, so we temporarily attach all running cpus to the NULL domain
6659 * which will prevent rebalancing while the sched domains are recalculated. 6662 * which will prevent rebalancing while the sched domains are recalculated.
@@ -6943,8 +6946,8 @@ struct task_struct *curr_task(int cpu)
6943 * @p: the task pointer to set. 6946 * @p: the task pointer to set.
6944 * 6947 *
6945 * Description: This function must only be used when non-maskable interrupts 6948 * Description: This function must only be used when non-maskable interrupts
6946 * are serviced on a separate stack. It allows the architecture to switch the 6949 * are serviced on a separate stack. It allows the architecture to switch the
6947 * notion of the current task on a cpu in a non-blocking manner. This function 6950 * notion of the current task on a cpu in a non-blocking manner. This function
6948 * must be called with all CPU's synchronized, and interrupts disabled, the 6951 * must be called with all CPU's synchronized, and interrupts disabled, the
6949 * and caller must save the original value of the current task (see 6952 * and caller must save the original value of the current task (see
6950 * curr_task() above) and restore that value before reenabling interrupts and 6953 * curr_task() above) and restore that value before reenabling interrupts and
@@ -7193,16 +7196,17 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
7193 return &tg->css; 7196 return &tg->css;
7194} 7197}
7195 7198
7196static void cpu_cgroup_destroy(struct cgroup_subsys *ss, 7199static void
7197 struct cgroup *cgrp) 7200cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
7198{ 7201{
7199 struct task_group *tg = cgroup_tg(cgrp); 7202 struct task_group *tg = cgroup_tg(cgrp);
7200 7203
7201 sched_destroy_group(tg); 7204 sched_destroy_group(tg);
7202} 7205}
7203 7206
7204static int cpu_cgroup_can_attach(struct cgroup_subsys *ss, 7207static int
7205 struct cgroup *cgrp, struct task_struct *tsk) 7208cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
7209 struct task_struct *tsk)
7206{ 7210{
7207 /* We don't support RT-tasks being in separate groups */ 7211 /* We don't support RT-tasks being in separate groups */
7208 if (tsk->sched_class != &fair_sched_class) 7212 if (tsk->sched_class != &fair_sched_class)
@@ -7308,8 +7312,8 @@ static struct cgroup_subsys_state *cpuacct_create(
7308} 7312}
7309 7313
7310/* destroy an existing cpu accounting group */ 7314/* destroy an existing cpu accounting group */
7311static void cpuacct_destroy(struct cgroup_subsys *ss, 7315static void
7312 struct cgroup *cont) 7316cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
7313{ 7317{
7314 struct cpuacct *ca = cgroup_ca(cont); 7318 struct cpuacct *ca = cgroup_ca(cont);
7315 7319