aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c128
1 files changed, 85 insertions, 43 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index a234fbee1238..74f169ac0773 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -238,6 +238,7 @@ struct rq {
238 /* For active balancing */ 238 /* For active balancing */
239 int active_balance; 239 int active_balance;
240 int push_cpu; 240 int push_cpu;
241 int cpu; /* cpu of this runqueue */
241 242
242 struct task_struct *migration_thread; 243 struct task_struct *migration_thread;
243 struct list_head migration_queue; 244 struct list_head migration_queue;
@@ -267,6 +268,15 @@ struct rq {
267 268
268static DEFINE_PER_CPU(struct rq, runqueues); 269static DEFINE_PER_CPU(struct rq, runqueues);
269 270
271static inline int cpu_of(struct rq *rq)
272{
273#ifdef CONFIG_SMP
274 return rq->cpu;
275#else
276 return 0;
277#endif
278}
279
270/* 280/*
271 * The domain tree (rq->sd) is protected by RCU's quiescent state transition. 281 * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
272 * See detach_destroy_domains: synchronize_sched for details. 282 * See detach_destroy_domains: synchronize_sched for details.
@@ -1745,27 +1755,27 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
1745 __releases(rq->lock) 1755 __releases(rq->lock)
1746{ 1756{
1747 struct mm_struct *mm = rq->prev_mm; 1757 struct mm_struct *mm = rq->prev_mm;
1748 unsigned long prev_task_flags; 1758 long prev_state;
1749 1759
1750 rq->prev_mm = NULL; 1760 rq->prev_mm = NULL;
1751 1761
1752 /* 1762 /*
1753 * A task struct has one reference for the use as "current". 1763 * A task struct has one reference for the use as "current".
1754 * If a task dies, then it sets EXIT_ZOMBIE in tsk->exit_state and 1764 * If a task dies, then it sets TASK_DEAD in tsk->state and calls
1755 * calls schedule one last time. The schedule call will never return, 1765 * schedule one last time. The schedule call will never return, and
1756 * and the scheduled task must drop that reference. 1766 * the scheduled task must drop that reference.
1757 * The test for EXIT_ZOMBIE must occur while the runqueue locks are 1767 * The test for TASK_DEAD must occur while the runqueue locks are
1758 * still held, otherwise prev could be scheduled on another cpu, die 1768 * still held, otherwise prev could be scheduled on another cpu, die
1759 * there before we look at prev->state, and then the reference would 1769 * there before we look at prev->state, and then the reference would
1760 * be dropped twice. 1770 * be dropped twice.
1761 * Manfred Spraul <manfred@colorfullife.com> 1771 * Manfred Spraul <manfred@colorfullife.com>
1762 */ 1772 */
1763 prev_task_flags = prev->flags; 1773 prev_state = prev->state;
1764 finish_arch_switch(prev); 1774 finish_arch_switch(prev);
1765 finish_lock_switch(rq, prev); 1775 finish_lock_switch(rq, prev);
1766 if (mm) 1776 if (mm)
1767 mmdrop(mm); 1777 mmdrop(mm);
1768 if (unlikely(prev_task_flags & PF_DEAD)) { 1778 if (unlikely(prev_state == TASK_DEAD)) {
1769 /* 1779 /*
1770 * Remove function-return probe instances associated with this 1780 * Remove function-return probe instances associated with this
1771 * task and put them back on the free list. 1781 * task and put them back on the free list.
@@ -2211,7 +2221,8 @@ out:
2211 */ 2221 */
2212static struct sched_group * 2222static struct sched_group *
2213find_busiest_group(struct sched_domain *sd, int this_cpu, 2223find_busiest_group(struct sched_domain *sd, int this_cpu,
2214 unsigned long *imbalance, enum idle_type idle, int *sd_idle) 2224 unsigned long *imbalance, enum idle_type idle, int *sd_idle,
2225 cpumask_t *cpus)
2215{ 2226{
2216 struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; 2227 struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
2217 unsigned long max_load, avg_load, total_load, this_load, total_pwr; 2228 unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -2248,7 +2259,12 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2248 sum_weighted_load = sum_nr_running = avg_load = 0; 2259 sum_weighted_load = sum_nr_running = avg_load = 0;
2249 2260
2250 for_each_cpu_mask(i, group->cpumask) { 2261 for_each_cpu_mask(i, group->cpumask) {
2251 struct rq *rq = cpu_rq(i); 2262 struct rq *rq;
2263
2264 if (!cpu_isset(i, *cpus))
2265 continue;
2266
2267 rq = cpu_rq(i);
2252 2268
2253 if (*sd_idle && !idle_cpu(i)) 2269 if (*sd_idle && !idle_cpu(i))
2254 *sd_idle = 0; 2270 *sd_idle = 0;
@@ -2466,13 +2482,17 @@ ret:
2466 */ 2482 */
2467static struct rq * 2483static struct rq *
2468find_busiest_queue(struct sched_group *group, enum idle_type idle, 2484find_busiest_queue(struct sched_group *group, enum idle_type idle,
2469 unsigned long imbalance) 2485 unsigned long imbalance, cpumask_t *cpus)
2470{ 2486{
2471 struct rq *busiest = NULL, *rq; 2487 struct rq *busiest = NULL, *rq;
2472 unsigned long max_load = 0; 2488 unsigned long max_load = 0;
2473 int i; 2489 int i;
2474 2490
2475 for_each_cpu_mask(i, group->cpumask) { 2491 for_each_cpu_mask(i, group->cpumask) {
2492
2493 if (!cpu_isset(i, *cpus))
2494 continue;
2495
2476 rq = cpu_rq(i); 2496 rq = cpu_rq(i);
2477 2497
2478 if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance) 2498 if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance)
@@ -2511,6 +2531,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
2511 struct sched_group *group; 2531 struct sched_group *group;
2512 unsigned long imbalance; 2532 unsigned long imbalance;
2513 struct rq *busiest; 2533 struct rq *busiest;
2534 cpumask_t cpus = CPU_MASK_ALL;
2514 2535
2515 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && 2536 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
2516 !sched_smt_power_savings) 2537 !sched_smt_power_savings)
@@ -2518,13 +2539,15 @@ static int load_balance(int this_cpu, struct rq *this_rq,
2518 2539
2519 schedstat_inc(sd, lb_cnt[idle]); 2540 schedstat_inc(sd, lb_cnt[idle]);
2520 2541
2521 group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle); 2542redo:
2543 group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
2544 &cpus);
2522 if (!group) { 2545 if (!group) {
2523 schedstat_inc(sd, lb_nobusyg[idle]); 2546 schedstat_inc(sd, lb_nobusyg[idle]);
2524 goto out_balanced; 2547 goto out_balanced;
2525 } 2548 }
2526 2549
2527 busiest = find_busiest_queue(group, idle, imbalance); 2550 busiest = find_busiest_queue(group, idle, imbalance, &cpus);
2528 if (!busiest) { 2551 if (!busiest) {
2529 schedstat_inc(sd, lb_nobusyq[idle]); 2552 schedstat_inc(sd, lb_nobusyq[idle]);
2530 goto out_balanced; 2553 goto out_balanced;
@@ -2549,8 +2572,12 @@ static int load_balance(int this_cpu, struct rq *this_rq,
2549 double_rq_unlock(this_rq, busiest); 2572 double_rq_unlock(this_rq, busiest);
2550 2573
2551 /* All tasks on this runqueue were pinned by CPU affinity */ 2574 /* All tasks on this runqueue were pinned by CPU affinity */
2552 if (unlikely(all_pinned)) 2575 if (unlikely(all_pinned)) {
2576 cpu_clear(cpu_of(busiest), cpus);
2577 if (!cpus_empty(cpus))
2578 goto redo;
2553 goto out_balanced; 2579 goto out_balanced;
2580 }
2554 } 2581 }
2555 2582
2556 if (!nr_moved) { 2583 if (!nr_moved) {
@@ -2639,18 +2666,22 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
2639 unsigned long imbalance; 2666 unsigned long imbalance;
2640 int nr_moved = 0; 2667 int nr_moved = 0;
2641 int sd_idle = 0; 2668 int sd_idle = 0;
2669 cpumask_t cpus = CPU_MASK_ALL;
2642 2670
2643 if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) 2671 if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings)
2644 sd_idle = 1; 2672 sd_idle = 1;
2645 2673
2646 schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); 2674 schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
2647 group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, &sd_idle); 2675redo:
2676 group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE,
2677 &sd_idle, &cpus);
2648 if (!group) { 2678 if (!group) {
2649 schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); 2679 schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]);
2650 goto out_balanced; 2680 goto out_balanced;
2651 } 2681 }
2652 2682
2653 busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance); 2683 busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance,
2684 &cpus);
2654 if (!busiest) { 2685 if (!busiest) {
2655 schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); 2686 schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
2656 goto out_balanced; 2687 goto out_balanced;
@@ -2668,6 +2699,12 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
2668 minus_1_or_zero(busiest->nr_running), 2699 minus_1_or_zero(busiest->nr_running),
2669 imbalance, sd, NEWLY_IDLE, NULL); 2700 imbalance, sd, NEWLY_IDLE, NULL);
2670 spin_unlock(&busiest->lock); 2701 spin_unlock(&busiest->lock);
2702
2703 if (!nr_moved) {
2704 cpu_clear(cpu_of(busiest), cpus);
2705 if (!cpus_empty(cpus))
2706 goto redo;
2707 }
2671 } 2708 }
2672 2709
2673 if (!nr_moved) { 2710 if (!nr_moved) {
@@ -3311,9 +3348,6 @@ need_resched_nonpreemptible:
3311 3348
3312 spin_lock_irq(&rq->lock); 3349 spin_lock_irq(&rq->lock);
3313 3350
3314 if (unlikely(prev->flags & PF_DEAD))
3315 prev->state = EXIT_DEAD;
3316
3317 switch_count = &prev->nivcsw; 3351 switch_count = &prev->nivcsw;
3318 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { 3352 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
3319 switch_count = &prev->nvcsw; 3353 switch_count = &prev->nvcsw;
@@ -4043,6 +4077,8 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
4043 * @p: the task in question. 4077 * @p: the task in question.
4044 * @policy: new policy. 4078 * @policy: new policy.
4045 * @param: structure containing the new RT priority. 4079 * @param: structure containing the new RT priority.
4080 *
4081 * NOTE: the task may be already dead
4046 */ 4082 */
4047int sched_setscheduler(struct task_struct *p, int policy, 4083int sched_setscheduler(struct task_struct *p, int policy,
4048 struct sched_param *param) 4084 struct sched_param *param)
@@ -4070,28 +4106,32 @@ recheck:
4070 (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || 4106 (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
4071 (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) 4107 (!p->mm && param->sched_priority > MAX_RT_PRIO-1))
4072 return -EINVAL; 4108 return -EINVAL;
4073 if ((policy == SCHED_NORMAL || policy == SCHED_BATCH) 4109 if (is_rt_policy(policy) != (param->sched_priority != 0))
4074 != (param->sched_priority == 0))
4075 return -EINVAL; 4110 return -EINVAL;
4076 4111
4077 /* 4112 /*
4078 * Allow unprivileged RT tasks to decrease priority: 4113 * Allow unprivileged RT tasks to decrease priority:
4079 */ 4114 */
4080 if (!capable(CAP_SYS_NICE)) { 4115 if (!capable(CAP_SYS_NICE)) {
4081 /* 4116 if (is_rt_policy(policy)) {
4082 * can't change policy, except between SCHED_NORMAL 4117 unsigned long rlim_rtprio;
4083 * and SCHED_BATCH: 4118 unsigned long flags;
4084 */ 4119
4085 if (((policy != SCHED_NORMAL && p->policy != SCHED_BATCH) && 4120 if (!lock_task_sighand(p, &flags))
4086 (policy != SCHED_BATCH && p->policy != SCHED_NORMAL)) && 4121 return -ESRCH;
4087 !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) 4122 rlim_rtprio = p->signal->rlim[RLIMIT_RTPRIO].rlim_cur;
4088 return -EPERM; 4123 unlock_task_sighand(p, &flags);
4089 /* can't increase priority */ 4124
4090 if ((policy != SCHED_NORMAL && policy != SCHED_BATCH) && 4125 /* can't set/change the rt policy */
4091 param->sched_priority > p->rt_priority && 4126 if (policy != p->policy && !rlim_rtprio)
4092 param->sched_priority > 4127 return -EPERM;
4093 p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) 4128
4094 return -EPERM; 4129 /* can't increase priority */
4130 if (param->sched_priority > p->rt_priority &&
4131 param->sched_priority > rlim_rtprio)
4132 return -EPERM;
4133 }
4134
4095 /* can't change other user's priorities */ 4135 /* can't change other user's priorities */
4096 if ((current->euid != p->euid) && 4136 if ((current->euid != p->euid) &&
4097 (current->euid != p->uid)) 4137 (current->euid != p->uid))
@@ -4156,14 +4196,13 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
4156 return -EINVAL; 4196 return -EINVAL;
4157 if (copy_from_user(&lparam, param, sizeof(struct sched_param))) 4197 if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
4158 return -EFAULT; 4198 return -EFAULT;
4159 read_lock_irq(&tasklist_lock); 4199
4200 rcu_read_lock();
4201 retval = -ESRCH;
4160 p = find_process_by_pid(pid); 4202 p = find_process_by_pid(pid);
4161 if (!p) { 4203 if (p != NULL)
4162 read_unlock_irq(&tasklist_lock); 4204 retval = sched_setscheduler(p, policy, &lparam);
4163 return -ESRCH; 4205 rcu_read_unlock();
4164 }
4165 retval = sched_setscheduler(p, policy, &lparam);
4166 read_unlock_irq(&tasklist_lock);
4167 4206
4168 return retval; 4207 return retval;
4169} 4208}
@@ -5114,7 +5153,7 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
5114 BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD); 5153 BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD);
5115 5154
5116 /* Cannot have done final schedule yet: would have vanished. */ 5155 /* Cannot have done final schedule yet: would have vanished. */
5117 BUG_ON(p->flags & PF_DEAD); 5156 BUG_ON(p->state == TASK_DEAD);
5118 5157
5119 get_task_struct(p); 5158 get_task_struct(p);
5120 5159
@@ -5235,9 +5274,11 @@ static struct notifier_block __cpuinitdata migration_notifier = {
5235int __init migration_init(void) 5274int __init migration_init(void)
5236{ 5275{
5237 void *cpu = (void *)(long)smp_processor_id(); 5276 void *cpu = (void *)(long)smp_processor_id();
5277 int err;
5238 5278
5239 /* Start one for the boot CPU: */ 5279 /* Start one for the boot CPU: */
5240 migration_call(&migration_notifier, CPU_UP_PREPARE, cpu); 5280 err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
5281 BUG_ON(err == NOTIFY_BAD);
5241 migration_call(&migration_notifier, CPU_ONLINE, cpu); 5282 migration_call(&migration_notifier, CPU_ONLINE, cpu);
5242 register_cpu_notifier(&migration_notifier); 5283 register_cpu_notifier(&migration_notifier);
5243 5284
@@ -6747,6 +6788,7 @@ void __init sched_init(void)
6747 rq->cpu_load[j] = 0; 6788 rq->cpu_load[j] = 0;
6748 rq->active_balance = 0; 6789 rq->active_balance = 0;
6749 rq->push_cpu = 0; 6790 rq->push_cpu = 0;
6791 rq->cpu = i;
6750 rq->migration_thread = NULL; 6792 rq->migration_thread = NULL;
6751 INIT_LIST_HEAD(&rq->migration_queue); 6793 INIT_LIST_HEAD(&rq->migration_queue);
6752#endif 6794#endif