diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-12-05 12:27:46 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-12-05 12:27:46 -0500 |
commit | 7e1fb765c613298d861f80fa18af26df87a4ec19 (patch) | |
tree | 2c6d499decaf469bcc79a4873557146ead3bef9c | |
parent | ad658cec232771b11e95bb5f0d639d48f898a1f2 (diff) | |
parent | cde898fa80a45bb23eab2a060fc79d0913081409 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched
* git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched:
futex: correctly return -EFAULT not -EINVAL
lockdep: in_range() fix
lockdep: fix debug_show_all_locks()
sched: style cleanups
futex: fix for futex_wait signal stack corruption
-rw-r--r-- | include/linux/thread_info.h | 17 | ||||
-rw-r--r-- | kernel/futex.c | 27 | ||||
-rw-r--r-- | kernel/lockdep.c | 29 | ||||
-rw-r--r-- | kernel/sched.c | 132 |
4 files changed, 114 insertions, 91 deletions
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 1c4eb41dbd89..9c4ad755d7e5 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h | |||
@@ -7,12 +7,25 @@ | |||
7 | #ifndef _LINUX_THREAD_INFO_H | 7 | #ifndef _LINUX_THREAD_INFO_H |
8 | #define _LINUX_THREAD_INFO_H | 8 | #define _LINUX_THREAD_INFO_H |
9 | 9 | ||
10 | #include <linux/types.h> | ||
11 | |||
10 | /* | 12 | /* |
11 | * System call restart block. | 13 | * System call restart block. |
12 | */ | 14 | */ |
13 | struct restart_block { | 15 | struct restart_block { |
14 | long (*fn)(struct restart_block *); | 16 | long (*fn)(struct restart_block *); |
15 | unsigned long arg0, arg1, arg2, arg3; | 17 | union { |
18 | struct { | ||
19 | unsigned long arg0, arg1, arg2, arg3; | ||
20 | }; | ||
21 | /* For futex_wait */ | ||
22 | struct { | ||
23 | u32 *uaddr; | ||
24 | u32 val; | ||
25 | u32 flags; | ||
26 | u64 time; | ||
27 | } futex; | ||
28 | }; | ||
16 | }; | 29 | }; |
17 | 30 | ||
18 | extern long do_no_restart_syscall(struct restart_block *parm); | 31 | extern long do_no_restart_syscall(struct restart_block *parm); |
diff --git a/kernel/futex.c b/kernel/futex.c index 9dc591ab681a..172a1aeeafdb 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -658,7 +658,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
658 | 658 | ||
659 | if (curval == -EFAULT) | 659 | if (curval == -EFAULT) |
660 | ret = -EFAULT; | 660 | ret = -EFAULT; |
661 | if (curval != uval) | 661 | else if (curval != uval) |
662 | ret = -EINVAL; | 662 | ret = -EINVAL; |
663 | if (ret) { | 663 | if (ret) { |
664 | spin_unlock(&pi_state->pi_mutex.wait_lock); | 664 | spin_unlock(&pi_state->pi_mutex.wait_lock); |
@@ -1149,9 +1149,9 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |||
1149 | 1149 | ||
1150 | /* | 1150 | /* |
1151 | * In case we must use restart_block to restart a futex_wait, | 1151 | * In case we must use restart_block to restart a futex_wait, |
1152 | * we encode in the 'arg3' shared capability | 1152 | * we encode in the 'flags' shared capability |
1153 | */ | 1153 | */ |
1154 | #define ARG3_SHARED 1 | 1154 | #define FLAGS_SHARED 1 |
1155 | 1155 | ||
1156 | static long futex_wait_restart(struct restart_block *restart); | 1156 | static long futex_wait_restart(struct restart_block *restart); |
1157 | 1157 | ||
@@ -1290,12 +1290,13 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1290 | struct restart_block *restart; | 1290 | struct restart_block *restart; |
1291 | restart = ¤t_thread_info()->restart_block; | 1291 | restart = ¤t_thread_info()->restart_block; |
1292 | restart->fn = futex_wait_restart; | 1292 | restart->fn = futex_wait_restart; |
1293 | restart->arg0 = (unsigned long)uaddr; | 1293 | restart->futex.uaddr = (u32 *)uaddr; |
1294 | restart->arg1 = (unsigned long)val; | 1294 | restart->futex.val = val; |
1295 | restart->arg2 = (unsigned long)abs_time; | 1295 | restart->futex.time = abs_time->tv64; |
1296 | restart->arg3 = 0; | 1296 | restart->futex.flags = 0; |
1297 | |||
1297 | if (fshared) | 1298 | if (fshared) |
1298 | restart->arg3 |= ARG3_SHARED; | 1299 | restart->futex.flags |= FLAGS_SHARED; |
1299 | return -ERESTART_RESTARTBLOCK; | 1300 | return -ERESTART_RESTARTBLOCK; |
1300 | } | 1301 | } |
1301 | 1302 | ||
@@ -1310,15 +1311,15 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1310 | 1311 | ||
1311 | static long futex_wait_restart(struct restart_block *restart) | 1312 | static long futex_wait_restart(struct restart_block *restart) |
1312 | { | 1313 | { |
1313 | u32 __user *uaddr = (u32 __user *)restart->arg0; | 1314 | u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; |
1314 | u32 val = (u32)restart->arg1; | ||
1315 | ktime_t *abs_time = (ktime_t *)restart->arg2; | ||
1316 | struct rw_semaphore *fshared = NULL; | 1315 | struct rw_semaphore *fshared = NULL; |
1316 | ktime_t t; | ||
1317 | 1317 | ||
1318 | t.tv64 = restart->futex.time; | ||
1318 | restart->fn = do_no_restart_syscall; | 1319 | restart->fn = do_no_restart_syscall; |
1319 | if (restart->arg3 & ARG3_SHARED) | 1320 | if (restart->futex.flags & FLAGS_SHARED) |
1320 | fshared = ¤t->mm->mmap_sem; | 1321 | fshared = ¤t->mm->mmap_sem; |
1321 | return (long)futex_wait(uaddr, fshared, val, abs_time); | 1322 | return (long)futex_wait(uaddr, fshared, restart->futex.val, &t); |
1322 | } | 1323 | } |
1323 | 1324 | ||
1324 | 1325 | ||
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index ed38bbfc48a3..0f389621bb6b 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -3054,11 +3054,6 @@ void __init lockdep_info(void) | |||
3054 | #endif | 3054 | #endif |
3055 | } | 3055 | } |
3056 | 3056 | ||
3057 | static inline int in_range(const void *start, const void *addr, const void *end) | ||
3058 | { | ||
3059 | return addr >= start && addr <= end; | ||
3060 | } | ||
3061 | |||
3062 | static void | 3057 | static void |
3063 | print_freed_lock_bug(struct task_struct *curr, const void *mem_from, | 3058 | print_freed_lock_bug(struct task_struct *curr, const void *mem_from, |
3064 | const void *mem_to, struct held_lock *hlock) | 3059 | const void *mem_to, struct held_lock *hlock) |
@@ -3080,6 +3075,13 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from, | |||
3080 | dump_stack(); | 3075 | dump_stack(); |
3081 | } | 3076 | } |
3082 | 3077 | ||
3078 | static inline int not_in_range(const void* mem_from, unsigned long mem_len, | ||
3079 | const void* lock_from, unsigned long lock_len) | ||
3080 | { | ||
3081 | return lock_from + lock_len <= mem_from || | ||
3082 | mem_from + mem_len <= lock_from; | ||
3083 | } | ||
3084 | |||
3083 | /* | 3085 | /* |
3084 | * Called when kernel memory is freed (or unmapped), or if a lock | 3086 | * Called when kernel memory is freed (or unmapped), or if a lock |
3085 | * is destroyed or reinitialized - this code checks whether there is | 3087 | * is destroyed or reinitialized - this code checks whether there is |
@@ -3087,7 +3089,6 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from, | |||
3087 | */ | 3089 | */ |
3088 | void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) | 3090 | void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) |
3089 | { | 3091 | { |
3090 | const void *mem_to = mem_from + mem_len, *lock_from, *lock_to; | ||
3091 | struct task_struct *curr = current; | 3092 | struct task_struct *curr = current; |
3092 | struct held_lock *hlock; | 3093 | struct held_lock *hlock; |
3093 | unsigned long flags; | 3094 | unsigned long flags; |
@@ -3100,14 +3101,11 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) | |||
3100 | for (i = 0; i < curr->lockdep_depth; i++) { | 3101 | for (i = 0; i < curr->lockdep_depth; i++) { |
3101 | hlock = curr->held_locks + i; | 3102 | hlock = curr->held_locks + i; |
3102 | 3103 | ||
3103 | lock_from = (void *)hlock->instance; | 3104 | if (not_in_range(mem_from, mem_len, hlock->instance, |
3104 | lock_to = (void *)(hlock->instance + 1); | 3105 | sizeof(*hlock->instance))) |
3105 | |||
3106 | if (!in_range(mem_from, lock_from, mem_to) && | ||
3107 | !in_range(mem_from, lock_to, mem_to)) | ||
3108 | continue; | 3106 | continue; |
3109 | 3107 | ||
3110 | print_freed_lock_bug(curr, mem_from, mem_to, hlock); | 3108 | print_freed_lock_bug(curr, mem_from, mem_from + mem_len, hlock); |
3111 | break; | 3109 | break; |
3112 | } | 3110 | } |
3113 | local_irq_restore(flags); | 3111 | local_irq_restore(flags); |
@@ -3173,6 +3171,13 @@ retry: | |||
3173 | printk(" locked it.\n"); | 3171 | printk(" locked it.\n"); |
3174 | 3172 | ||
3175 | do_each_thread(g, p) { | 3173 | do_each_thread(g, p) { |
3174 | /* | ||
3175 | * It's not reliable to print a task's held locks | ||
3176 | * if it's not sleeping (or if it's not the current | ||
3177 | * task): | ||
3178 | */ | ||
3179 | if (p->state == TASK_RUNNING && p != current) | ||
3180 | continue; | ||
3176 | if (p->lockdep_depth) | 3181 | if (p->lockdep_depth) |
3177 | lockdep_print_held_locks(p); | 3182 | lockdep_print_held_locks(p); |
3178 | if (!unlock) | 3183 | if (!unlock) |
diff --git a/kernel/sched.c b/kernel/sched.c index b062856b946c..67d9d1799d86 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -209,9 +209,8 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
209 | tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), | 209 | tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), |
210 | struct task_group, css); | 210 | struct task_group, css); |
211 | #else | 211 | #else |
212 | tg = &init_task_group; | 212 | tg = &init_task_group; |
213 | #endif | 213 | #endif |
214 | |||
215 | return tg; | 214 | return tg; |
216 | } | 215 | } |
217 | 216 | ||
@@ -249,15 +248,16 @@ struct cfs_rq { | |||
249 | #ifdef CONFIG_FAIR_GROUP_SCHED | 248 | #ifdef CONFIG_FAIR_GROUP_SCHED |
250 | struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ | 249 | struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ |
251 | 250 | ||
252 | /* leaf cfs_rqs are those that hold tasks (lowest schedulable entity in | 251 | /* |
252 | * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in | ||
253 | * a hierarchy). Non-leaf lrqs hold other higher schedulable entities | 253 | * a hierarchy). Non-leaf lrqs hold other higher schedulable entities |
254 | * (like users, containers etc.) | 254 | * (like users, containers etc.) |
255 | * | 255 | * |
256 | * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This | 256 | * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This |
257 | * list is used during load balance. | 257 | * list is used during load balance. |
258 | */ | 258 | */ |
259 | struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */ | 259 | struct list_head leaf_cfs_rq_list; |
260 | struct task_group *tg; /* group that "owns" this runqueue */ | 260 | struct task_group *tg; /* group that "owns" this runqueue */ |
261 | #endif | 261 | #endif |
262 | }; | 262 | }; |
263 | 263 | ||
@@ -300,7 +300,7 @@ struct rq { | |||
300 | /* list of leaf cfs_rq on this cpu: */ | 300 | /* list of leaf cfs_rq on this cpu: */ |
301 | struct list_head leaf_cfs_rq_list; | 301 | struct list_head leaf_cfs_rq_list; |
302 | #endif | 302 | #endif |
303 | struct rt_rq rt; | 303 | struct rt_rq rt; |
304 | 304 | ||
305 | /* | 305 | /* |
306 | * This is part of a global counter where only the total sum | 306 | * This is part of a global counter where only the total sum |
@@ -457,8 +457,8 @@ enum { | |||
457 | SCHED_FEAT_NEW_FAIR_SLEEPERS = 1, | 457 | SCHED_FEAT_NEW_FAIR_SLEEPERS = 1, |
458 | SCHED_FEAT_WAKEUP_PREEMPT = 2, | 458 | SCHED_FEAT_WAKEUP_PREEMPT = 2, |
459 | SCHED_FEAT_START_DEBIT = 4, | 459 | SCHED_FEAT_START_DEBIT = 4, |
460 | SCHED_FEAT_TREE_AVG = 8, | 460 | SCHED_FEAT_TREE_AVG = 8, |
461 | SCHED_FEAT_APPROX_AVG = 16, | 461 | SCHED_FEAT_APPROX_AVG = 16, |
462 | }; | 462 | }; |
463 | 463 | ||
464 | const_debug unsigned int sysctl_sched_features = | 464 | const_debug unsigned int sysctl_sched_features = |
@@ -591,7 +591,7 @@ static inline struct rq *__task_rq_lock(struct task_struct *p) | |||
591 | 591 | ||
592 | /* | 592 | /* |
593 | * task_rq_lock - lock the runqueue a given task resides on and disable | 593 | * task_rq_lock - lock the runqueue a given task resides on and disable |
594 | * interrupts. Note the ordering: we can safely lookup the task_rq without | 594 | * interrupts. Note the ordering: we can safely lookup the task_rq without |
595 | * explicitly disabling preemption. | 595 | * explicitly disabling preemption. |
596 | */ | 596 | */ |
597 | static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | 597 | static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) |
@@ -779,7 +779,7 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec) | |||
779 | * To aid in avoiding the subversion of "niceness" due to uneven distribution | 779 | * To aid in avoiding the subversion of "niceness" due to uneven distribution |
780 | * of tasks with abnormal "nice" values across CPUs the contribution that | 780 | * of tasks with abnormal "nice" values across CPUs the contribution that |
781 | * each task makes to its run queue's load is weighted according to its | 781 | * each task makes to its run queue's load is weighted according to its |
782 | * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a | 782 | * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a |
783 | * scaled version of the new time slice allocation that they receive on time | 783 | * scaled version of the new time slice allocation that they receive on time |
784 | * slice expiry etc. | 784 | * slice expiry etc. |
785 | */ | 785 | */ |
@@ -1854,7 +1854,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, | |||
1854 | * and do any other architecture-specific cleanup actions. | 1854 | * and do any other architecture-specific cleanup actions. |
1855 | * | 1855 | * |
1856 | * Note that we may have delayed dropping an mm in context_switch(). If | 1856 | * Note that we may have delayed dropping an mm in context_switch(). If |
1857 | * so, we finish that here outside of the runqueue lock. (Doing it | 1857 | * so, we finish that here outside of the runqueue lock. (Doing it |
1858 | * with the lock held can cause deadlocks; see schedule() for | 1858 | * with the lock held can cause deadlocks; see schedule() for |
1859 | * details.) | 1859 | * details.) |
1860 | */ | 1860 | */ |
@@ -2136,7 +2136,7 @@ static void double_lock_balance(struct rq *this_rq, struct rq *busiest) | |||
2136 | /* | 2136 | /* |
2137 | * If dest_cpu is allowed for this process, migrate the task to it. | 2137 | * If dest_cpu is allowed for this process, migrate the task to it. |
2138 | * This is accomplished by forcing the cpu_allowed mask to only | 2138 | * This is accomplished by forcing the cpu_allowed mask to only |
2139 | * allow dest_cpu, which will force the cpu onto dest_cpu. Then | 2139 | * allow dest_cpu, which will force the cpu onto dest_cpu. Then |
2140 | * the cpu_allowed mask is restored. | 2140 | * the cpu_allowed mask is restored. |
2141 | */ | 2141 | */ |
2142 | static void sched_migrate_task(struct task_struct *p, int dest_cpu) | 2142 | static void sched_migrate_task(struct task_struct *p, int dest_cpu) |
@@ -2581,7 +2581,7 @@ group_next: | |||
2581 | * tasks around. Thus we look for the minimum possible imbalance. | 2581 | * tasks around. Thus we look for the minimum possible imbalance. |
2582 | * Negative imbalances (*we* are more loaded than anyone else) will | 2582 | * Negative imbalances (*we* are more loaded than anyone else) will |
2583 | * be counted as no imbalance for these purposes -- we can't fix that | 2583 | * be counted as no imbalance for these purposes -- we can't fix that |
2584 | * by pulling tasks to us. Be careful of negative numbers as they'll | 2584 | * by pulling tasks to us. Be careful of negative numbers as they'll |
2585 | * appear as very large values with unsigned longs. | 2585 | * appear as very large values with unsigned longs. |
2586 | */ | 2586 | */ |
2587 | if (max_load <= busiest_load_per_task) | 2587 | if (max_load <= busiest_load_per_task) |
@@ -3016,7 +3016,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | |||
3016 | 3016 | ||
3017 | /* | 3017 | /* |
3018 | * This condition is "impossible", if it occurs | 3018 | * This condition is "impossible", if it occurs |
3019 | * we need to fix it. Originally reported by | 3019 | * we need to fix it. Originally reported by |
3020 | * Bjorn Helgaas on a 128-cpu setup. | 3020 | * Bjorn Helgaas on a 128-cpu setup. |
3021 | */ | 3021 | */ |
3022 | BUG_ON(busiest_rq == target_rq); | 3022 | BUG_ON(busiest_rq == target_rq); |
@@ -3048,7 +3048,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | |||
3048 | #ifdef CONFIG_NO_HZ | 3048 | #ifdef CONFIG_NO_HZ |
3049 | static struct { | 3049 | static struct { |
3050 | atomic_t load_balancer; | 3050 | atomic_t load_balancer; |
3051 | cpumask_t cpu_mask; | 3051 | cpumask_t cpu_mask; |
3052 | } nohz ____cacheline_aligned = { | 3052 | } nohz ____cacheline_aligned = { |
3053 | .load_balancer = ATOMIC_INIT(-1), | 3053 | .load_balancer = ATOMIC_INIT(-1), |
3054 | .cpu_mask = CPU_MASK_NONE, | 3054 | .cpu_mask = CPU_MASK_NONE, |
@@ -3552,7 +3552,7 @@ static noinline void __schedule_bug(struct task_struct *prev) | |||
3552 | static inline void schedule_debug(struct task_struct *prev) | 3552 | static inline void schedule_debug(struct task_struct *prev) |
3553 | { | 3553 | { |
3554 | /* | 3554 | /* |
3555 | * Test if we are atomic. Since do_exit() needs to call into | 3555 | * Test if we are atomic. Since do_exit() needs to call into |
3556 | * schedule() atomically, we ignore that path for now. | 3556 | * schedule() atomically, we ignore that path for now. |
3557 | * Otherwise, whine if we are scheduling when we should not be. | 3557 | * Otherwise, whine if we are scheduling when we should not be. |
3558 | */ | 3558 | */ |
@@ -3674,7 +3674,7 @@ EXPORT_SYMBOL(schedule); | |||
3674 | #ifdef CONFIG_PREEMPT | 3674 | #ifdef CONFIG_PREEMPT |
3675 | /* | 3675 | /* |
3676 | * this is the entry point to schedule() from in-kernel preemption | 3676 | * this is the entry point to schedule() from in-kernel preemption |
3677 | * off of preempt_enable. Kernel preemptions off return from interrupt | 3677 | * off of preempt_enable. Kernel preemptions off return from interrupt |
3678 | * occur there and call schedule directly. | 3678 | * occur there and call schedule directly. |
3679 | */ | 3679 | */ |
3680 | asmlinkage void __sched preempt_schedule(void) | 3680 | asmlinkage void __sched preempt_schedule(void) |
@@ -3686,7 +3686,7 @@ asmlinkage void __sched preempt_schedule(void) | |||
3686 | #endif | 3686 | #endif |
3687 | /* | 3687 | /* |
3688 | * If there is a non-zero preempt_count or interrupts are disabled, | 3688 | * If there is a non-zero preempt_count or interrupts are disabled, |
3689 | * we do not want to preempt the current task. Just return.. | 3689 | * we do not want to preempt the current task. Just return.. |
3690 | */ | 3690 | */ |
3691 | if (likely(ti->preempt_count || irqs_disabled())) | 3691 | if (likely(ti->preempt_count || irqs_disabled())) |
3692 | return; | 3692 | return; |
@@ -3772,12 +3772,12 @@ int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, | |||
3772 | EXPORT_SYMBOL(default_wake_function); | 3772 | EXPORT_SYMBOL(default_wake_function); |
3773 | 3773 | ||
3774 | /* | 3774 | /* |
3775 | * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just | 3775 | * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just |
3776 | * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve | 3776 | * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve |
3777 | * number) then we wake all the non-exclusive tasks and one exclusive task. | 3777 | * number) then we wake all the non-exclusive tasks and one exclusive task. |
3778 | * | 3778 | * |
3779 | * There are circumstances in which we can try to wake a task which has already | 3779 | * There are circumstances in which we can try to wake a task which has already |
3780 | * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns | 3780 | * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns |
3781 | * zero in this (rare) case, and we handle it by continuing to scan the queue. | 3781 | * zero in this (rare) case, and we handle it by continuing to scan the queue. |
3782 | */ | 3782 | */ |
3783 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | 3783 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, |
@@ -4390,8 +4390,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) | |||
4390 | * @policy: new policy. | 4390 | * @policy: new policy. |
4391 | * @param: structure containing the new RT priority. | 4391 | * @param: structure containing the new RT priority. |
4392 | */ | 4392 | */ |
4393 | asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, | 4393 | asmlinkage long |
4394 | struct sched_param __user *param) | 4394 | sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) |
4395 | { | 4395 | { |
4396 | /* negative values for policy are not valid */ | 4396 | /* negative values for policy are not valid */ |
4397 | if (policy < 0) | 4397 | if (policy < 0) |
@@ -4491,7 +4491,7 @@ long sched_setaffinity(pid_t pid, cpumask_t new_mask) | |||
4491 | 4491 | ||
4492 | /* | 4492 | /* |
4493 | * It is not safe to call set_cpus_allowed with the | 4493 | * It is not safe to call set_cpus_allowed with the |
4494 | * tasklist_lock held. We will bump the task_struct's | 4494 | * tasklist_lock held. We will bump the task_struct's |
4495 | * usage count and then drop tasklist_lock. | 4495 | * usage count and then drop tasklist_lock. |
4496 | */ | 4496 | */ |
4497 | get_task_struct(p); | 4497 | get_task_struct(p); |
@@ -4687,7 +4687,7 @@ EXPORT_SYMBOL(cond_resched); | |||
4687 | * cond_resched_lock() - if a reschedule is pending, drop the given lock, | 4687 | * cond_resched_lock() - if a reschedule is pending, drop the given lock, |
4688 | * call schedule, and on return reacquire the lock. | 4688 | * call schedule, and on return reacquire the lock. |
4689 | * | 4689 | * |
4690 | * This works OK both with and without CONFIG_PREEMPT. We do strange low-level | 4690 | * This works OK both with and without CONFIG_PREEMPT. We do strange low-level |
4691 | * operations here to prevent schedule() from being called twice (once via | 4691 | * operations here to prevent schedule() from being called twice (once via |
4692 | * spin_unlock(), once by hand). | 4692 | * spin_unlock(), once by hand). |
4693 | */ | 4693 | */ |
@@ -4741,7 +4741,7 @@ void __sched yield(void) | |||
4741 | EXPORT_SYMBOL(yield); | 4741 | EXPORT_SYMBOL(yield); |
4742 | 4742 | ||
4743 | /* | 4743 | /* |
4744 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so | 4744 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so |
4745 | * that process accounting knows that this is a task in IO wait state. | 4745 | * that process accounting knows that this is a task in IO wait state. |
4746 | * | 4746 | * |
4747 | * But don't do that if it is a deliberate, throttling IO wait (this task | 4747 | * But don't do that if it is a deliberate, throttling IO wait (this task |
@@ -5050,7 +5050,7 @@ static inline void sched_init_granularity(void) | |||
5050 | * is removed from the allowed bitmask. | 5050 | * is removed from the allowed bitmask. |
5051 | * | 5051 | * |
5052 | * NOTE: the caller must have a valid reference to the task, the | 5052 | * NOTE: the caller must have a valid reference to the task, the |
5053 | * task must not exit() & deallocate itself prematurely. The | 5053 | * task must not exit() & deallocate itself prematurely. The |
5054 | * call is not atomic; no spinlocks may be held. | 5054 | * call is not atomic; no spinlocks may be held. |
5055 | */ | 5055 | */ |
5056 | int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) | 5056 | int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) |
@@ -5087,7 +5087,7 @@ out: | |||
5087 | EXPORT_SYMBOL_GPL(set_cpus_allowed); | 5087 | EXPORT_SYMBOL_GPL(set_cpus_allowed); |
5088 | 5088 | ||
5089 | /* | 5089 | /* |
5090 | * Move (not current) task off this cpu, onto dest cpu. We're doing | 5090 | * Move (not current) task off this cpu, onto dest cpu. We're doing |
5091 | * this because either it can't run here any more (set_cpus_allowed() | 5091 | * this because either it can't run here any more (set_cpus_allowed() |
5092 | * away from this CPU, or CPU going down), or because we're | 5092 | * away from this CPU, or CPU going down), or because we're |
5093 | * attempting to rebalance this task on exec (sched_exec). | 5093 | * attempting to rebalance this task on exec (sched_exec). |
@@ -5232,7 +5232,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | |||
5232 | * Try to stay on the same cpuset, where the | 5232 | * Try to stay on the same cpuset, where the |
5233 | * current cpuset may be a subset of all cpus. | 5233 | * current cpuset may be a subset of all cpus. |
5234 | * The cpuset_cpus_allowed_locked() variant of | 5234 | * The cpuset_cpus_allowed_locked() variant of |
5235 | * cpuset_cpus_allowed() will not block. It must be | 5235 | * cpuset_cpus_allowed() will not block. It must be |
5236 | * called within calls to cpuset_lock/cpuset_unlock. | 5236 | * called within calls to cpuset_lock/cpuset_unlock. |
5237 | */ | 5237 | */ |
5238 | rq = task_rq_lock(p, &flags); | 5238 | rq = task_rq_lock(p, &flags); |
@@ -5245,10 +5245,11 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | |||
5245 | * kernel threads (both mm NULL), since they never | 5245 | * kernel threads (both mm NULL), since they never |
5246 | * leave kernel. | 5246 | * leave kernel. |
5247 | */ | 5247 | */ |
5248 | if (p->mm && printk_ratelimit()) | 5248 | if (p->mm && printk_ratelimit()) { |
5249 | printk(KERN_INFO "process %d (%s) no " | 5249 | printk(KERN_INFO "process %d (%s) no " |
5250 | "longer affine to cpu%d\n", | 5250 | "longer affine to cpu%d\n", |
5251 | task_pid_nr(p), p->comm, dead_cpu); | 5251 | task_pid_nr(p), p->comm, dead_cpu); |
5252 | } | ||
5252 | } | 5253 | } |
5253 | } while (!__migrate_task_irq(p, dead_cpu, dest_cpu)); | 5254 | } while (!__migrate_task_irq(p, dead_cpu, dest_cpu)); |
5254 | } | 5255 | } |
@@ -5350,7 +5351,7 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) | |||
5350 | 5351 | ||
5351 | /* | 5352 | /* |
5352 | * Drop lock around migration; if someone else moves it, | 5353 | * Drop lock around migration; if someone else moves it, |
5353 | * that's OK. No task can be added to this CPU, so iteration is | 5354 | * that's OK. No task can be added to this CPU, so iteration is |
5354 | * fine. | 5355 | * fine. |
5355 | */ | 5356 | */ |
5356 | spin_unlock_irq(&rq->lock); | 5357 | spin_unlock_irq(&rq->lock); |
@@ -5414,7 +5415,7 @@ static void sd_free_ctl_entry(struct ctl_table **tablep) | |||
5414 | /* | 5415 | /* |
5415 | * In the intermediate directories, both the child directory and | 5416 | * In the intermediate directories, both the child directory and |
5416 | * procname are dynamically allocated and could fail but the mode | 5417 | * procname are dynamically allocated and could fail but the mode |
5417 | * will always be set. In the lowest directory the names are | 5418 | * will always be set. In the lowest directory the names are |
5418 | * static strings and all have proc handlers. | 5419 | * static strings and all have proc handlers. |
5419 | */ | 5420 | */ |
5420 | for (entry = *tablep; entry->mode; entry++) { | 5421 | for (entry = *tablep; entry->mode; entry++) { |
@@ -5585,7 +5586,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
5585 | case CPU_UP_CANCELED_FROZEN: | 5586 | case CPU_UP_CANCELED_FROZEN: |
5586 | if (!cpu_rq(cpu)->migration_thread) | 5587 | if (!cpu_rq(cpu)->migration_thread) |
5587 | break; | 5588 | break; |
5588 | /* Unbind it from offline cpu so it can run. Fall thru. */ | 5589 | /* Unbind it from offline cpu so it can run. Fall thru. */ |
5589 | kthread_bind(cpu_rq(cpu)->migration_thread, | 5590 | kthread_bind(cpu_rq(cpu)->migration_thread, |
5590 | any_online_cpu(cpu_online_map)); | 5591 | any_online_cpu(cpu_online_map)); |
5591 | kthread_stop(cpu_rq(cpu)->migration_thread); | 5592 | kthread_stop(cpu_rq(cpu)->migration_thread); |
@@ -5612,9 +5613,11 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
5612 | migrate_nr_uninterruptible(rq); | 5613 | migrate_nr_uninterruptible(rq); |
5613 | BUG_ON(rq->nr_running != 0); | 5614 | BUG_ON(rq->nr_running != 0); |
5614 | 5615 | ||
5615 | /* No need to migrate the tasks: it was best-effort if | 5616 | /* |
5616 | * they didn't take sched_hotcpu_mutex. Just wake up | 5617 | * No need to migrate the tasks: it was best-effort if |
5617 | * the requestors. */ | 5618 | * they didn't take sched_hotcpu_mutex. Just wake up |
5619 | * the requestors. | ||
5620 | */ | ||
5618 | spin_lock_irq(&rq->lock); | 5621 | spin_lock_irq(&rq->lock); |
5619 | while (!list_empty(&rq->migration_queue)) { | 5622 | while (!list_empty(&rq->migration_queue)) { |
5620 | struct migration_req *req; | 5623 | struct migration_req *req; |
@@ -5922,7 +5925,7 @@ init_sched_build_groups(cpumask_t span, const cpumask_t *cpu_map, | |||
5922 | * @node: node whose sched_domain we're building | 5925 | * @node: node whose sched_domain we're building |
5923 | * @used_nodes: nodes already in the sched_domain | 5926 | * @used_nodes: nodes already in the sched_domain |
5924 | * | 5927 | * |
5925 | * Find the next node to include in a given scheduling domain. Simply | 5928 | * Find the next node to include in a given scheduling domain. Simply |
5926 | * finds the closest node not already in the @used_nodes map. | 5929 | * finds the closest node not already in the @used_nodes map. |
5927 | * | 5930 | * |
5928 | * Should use nodemask_t. | 5931 | * Should use nodemask_t. |
@@ -5962,7 +5965,7 @@ static int find_next_best_node(int node, unsigned long *used_nodes) | |||
5962 | * @node: node whose cpumask we're constructing | 5965 | * @node: node whose cpumask we're constructing |
5963 | * @size: number of nodes to include in this span | 5966 | * @size: number of nodes to include in this span |
5964 | * | 5967 | * |
5965 | * Given a node, construct a good cpumask for its sched_domain to span. It | 5968 | * Given a node, construct a good cpumask for its sched_domain to span. It |
5966 | * should be one that prevents unnecessary balancing, but also spreads tasks | 5969 | * should be one that prevents unnecessary balancing, but also spreads tasks |
5967 | * out optimally. | 5970 | * out optimally. |
5968 | */ | 5971 | */ |
@@ -5999,8 +6002,8 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0; | |||
5999 | static DEFINE_PER_CPU(struct sched_domain, cpu_domains); | 6002 | static DEFINE_PER_CPU(struct sched_domain, cpu_domains); |
6000 | static DEFINE_PER_CPU(struct sched_group, sched_group_cpus); | 6003 | static DEFINE_PER_CPU(struct sched_group, sched_group_cpus); |
6001 | 6004 | ||
6002 | static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, | 6005 | static int |
6003 | struct sched_group **sg) | 6006 | cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg) |
6004 | { | 6007 | { |
6005 | if (sg) | 6008 | if (sg) |
6006 | *sg = &per_cpu(sched_group_cpus, cpu); | 6009 | *sg = &per_cpu(sched_group_cpus, cpu); |
@@ -6017,8 +6020,8 @@ static DEFINE_PER_CPU(struct sched_group, sched_group_core); | |||
6017 | #endif | 6020 | #endif |
6018 | 6021 | ||
6019 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) | 6022 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) |
6020 | static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map, | 6023 | static int |
6021 | struct sched_group **sg) | 6024 | cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg) |
6022 | { | 6025 | { |
6023 | int group; | 6026 | int group; |
6024 | cpumask_t mask = per_cpu(cpu_sibling_map, cpu); | 6027 | cpumask_t mask = per_cpu(cpu_sibling_map, cpu); |
@@ -6029,8 +6032,8 @@ static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map, | |||
6029 | return group; | 6032 | return group; |
6030 | } | 6033 | } |
6031 | #elif defined(CONFIG_SCHED_MC) | 6034 | #elif defined(CONFIG_SCHED_MC) |
6032 | static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map, | 6035 | static int |
6033 | struct sched_group **sg) | 6036 | cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg) |
6034 | { | 6037 | { |
6035 | if (sg) | 6038 | if (sg) |
6036 | *sg = &per_cpu(sched_group_core, cpu); | 6039 | *sg = &per_cpu(sched_group_core, cpu); |
@@ -6041,8 +6044,8 @@ static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map, | |||
6041 | static DEFINE_PER_CPU(struct sched_domain, phys_domains); | 6044 | static DEFINE_PER_CPU(struct sched_domain, phys_domains); |
6042 | static DEFINE_PER_CPU(struct sched_group, sched_group_phys); | 6045 | static DEFINE_PER_CPU(struct sched_group, sched_group_phys); |
6043 | 6046 | ||
6044 | static int cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, | 6047 | static int |
6045 | struct sched_group **sg) | 6048 | cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg) |
6046 | { | 6049 | { |
6047 | int group; | 6050 | int group; |
6048 | #ifdef CONFIG_SCHED_MC | 6051 | #ifdef CONFIG_SCHED_MC |
@@ -6222,7 +6225,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6222 | * Allocate the per-node list of sched groups | 6225 | * Allocate the per-node list of sched groups |
6223 | */ | 6226 | */ |
6224 | sched_group_nodes = kcalloc(MAX_NUMNODES, sizeof(struct sched_group *), | 6227 | sched_group_nodes = kcalloc(MAX_NUMNODES, sizeof(struct sched_group *), |
6225 | GFP_KERNEL); | 6228 | GFP_KERNEL); |
6226 | if (!sched_group_nodes) { | 6229 | if (!sched_group_nodes) { |
6227 | printk(KERN_WARNING "Can not alloc sched group node list\n"); | 6230 | printk(KERN_WARNING "Can not alloc sched group node list\n"); |
6228 | return -ENOMEM; | 6231 | return -ENOMEM; |
@@ -6469,7 +6472,7 @@ static int ndoms_cur; /* number of sched domains in 'doms_cur' */ | |||
6469 | static cpumask_t fallback_doms; | 6472 | static cpumask_t fallback_doms; |
6470 | 6473 | ||
6471 | /* | 6474 | /* |
6472 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. | 6475 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. |
6473 | * For now this just excludes isolated cpus, but could be used to | 6476 | * For now this just excludes isolated cpus, but could be used to |
6474 | * exclude other special cases in the future. | 6477 | * exclude other special cases in the future. |
6475 | */ | 6478 | */ |
@@ -6511,19 +6514,19 @@ static void detach_destroy_domains(const cpumask_t *cpu_map) | |||
6511 | 6514 | ||
6512 | /* | 6515 | /* |
6513 | * Partition sched domains as specified by the 'ndoms_new' | 6516 | * Partition sched domains as specified by the 'ndoms_new' |
6514 | * cpumasks in the array doms_new[] of cpumasks. This compares | 6517 | * cpumasks in the array doms_new[] of cpumasks. This compares |
6515 | * doms_new[] to the current sched domain partitioning, doms_cur[]. | 6518 | * doms_new[] to the current sched domain partitioning, doms_cur[]. |
6516 | * It destroys each deleted domain and builds each new domain. | 6519 | * It destroys each deleted domain and builds each new domain. |
6517 | * | 6520 | * |
6518 | * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'. | 6521 | * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'. |
6519 | * The masks don't intersect (don't overlap.) We should setup one | 6522 | * The masks don't intersect (don't overlap.) We should setup one |
6520 | * sched domain for each mask. CPUs not in any of the cpumasks will | 6523 | * sched domain for each mask. CPUs not in any of the cpumasks will |
6521 | * not be load balanced. If the same cpumask appears both in the | 6524 | * not be load balanced. If the same cpumask appears both in the |
6522 | * current 'doms_cur' domains and in the new 'doms_new', we can leave | 6525 | * current 'doms_cur' domains and in the new 'doms_new', we can leave |
6523 | * it as it is. | 6526 | * it as it is. |
6524 | * | 6527 | * |
6525 | * The passed in 'doms_new' should be kmalloc'd. This routine takes | 6528 | * The passed in 'doms_new' should be kmalloc'd. This routine takes |
6526 | * ownership of it and will kfree it when done with it. If the caller | 6529 | * ownership of it and will kfree it when done with it. If the caller |
6527 | * failed the kmalloc call, then it can pass in doms_new == NULL, | 6530 | * failed the kmalloc call, then it can pass in doms_new == NULL, |
6528 | * and partition_sched_domains() will fallback to the single partition | 6531 | * and partition_sched_domains() will fallback to the single partition |
6529 | * 'fallback_doms'. | 6532 | * 'fallback_doms'. |
@@ -6653,7 +6656,7 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) | |||
6653 | #endif | 6656 | #endif |
6654 | 6657 | ||
6655 | /* | 6658 | /* |
6656 | * Force a reinitialization of the sched domains hierarchy. The domains | 6659 | * Force a reinitialization of the sched domains hierarchy. The domains |
6657 | * and groups cannot be updated in place without racing with the balancing | 6660 | * and groups cannot be updated in place without racing with the balancing |
6658 | * code, so we temporarily attach all running cpus to the NULL domain | 6661 | * code, so we temporarily attach all running cpus to the NULL domain |
6659 | * which will prevent rebalancing while the sched domains are recalculated. | 6662 | * which will prevent rebalancing while the sched domains are recalculated. |
@@ -6943,8 +6946,8 @@ struct task_struct *curr_task(int cpu) | |||
6943 | * @p: the task pointer to set. | 6946 | * @p: the task pointer to set. |
6944 | * | 6947 | * |
6945 | * Description: This function must only be used when non-maskable interrupts | 6948 | * Description: This function must only be used when non-maskable interrupts |
6946 | * are serviced on a separate stack. It allows the architecture to switch the | 6949 | * are serviced on a separate stack. It allows the architecture to switch the |
6947 | * notion of the current task on a cpu in a non-blocking manner. This function | 6950 | * notion of the current task on a cpu in a non-blocking manner. This function |
6948 | * must be called with all CPU's synchronized, and interrupts disabled, the | 6951 | * must be called with all CPU's synchronized, and interrupts disabled, the |
6949 | * and caller must save the original value of the current task (see | 6952 | * and caller must save the original value of the current task (see |
6950 | * curr_task() above) and restore that value before reenabling interrupts and | 6953 | * curr_task() above) and restore that value before reenabling interrupts and |
@@ -7193,16 +7196,17 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
7193 | return &tg->css; | 7196 | return &tg->css; |
7194 | } | 7197 | } |
7195 | 7198 | ||
7196 | static void cpu_cgroup_destroy(struct cgroup_subsys *ss, | 7199 | static void |
7197 | struct cgroup *cgrp) | 7200 | cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) |
7198 | { | 7201 | { |
7199 | struct task_group *tg = cgroup_tg(cgrp); | 7202 | struct task_group *tg = cgroup_tg(cgrp); |
7200 | 7203 | ||
7201 | sched_destroy_group(tg); | 7204 | sched_destroy_group(tg); |
7202 | } | 7205 | } |
7203 | 7206 | ||
7204 | static int cpu_cgroup_can_attach(struct cgroup_subsys *ss, | 7207 | static int |
7205 | struct cgroup *cgrp, struct task_struct *tsk) | 7208 | cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
7209 | struct task_struct *tsk) | ||
7206 | { | 7210 | { |
7207 | /* We don't support RT-tasks being in separate groups */ | 7211 | /* We don't support RT-tasks being in separate groups */ |
7208 | if (tsk->sched_class != &fair_sched_class) | 7212 | if (tsk->sched_class != &fair_sched_class) |
@@ -7308,8 +7312,8 @@ static struct cgroup_subsys_state *cpuacct_create( | |||
7308 | } | 7312 | } |
7309 | 7313 | ||
7310 | /* destroy an existing cpu accounting group */ | 7314 | /* destroy an existing cpu accounting group */ |
7311 | static void cpuacct_destroy(struct cgroup_subsys *ss, | 7315 | static void |
7312 | struct cgroup *cont) | 7316 | cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cont) |
7313 | { | 7317 | { |
7314 | struct cpuacct *ca = cgroup_ca(cont); | 7318 | struct cpuacct *ca = cgroup_ca(cont); |
7315 | 7319 | ||