diff options
Diffstat (limited to 'kernel/sched.c')
| -rw-r--r-- | kernel/sched.c | 128 |
1 files changed, 85 insertions, 43 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index a234fbee1238..74f169ac0773 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -238,6 +238,7 @@ struct rq { | |||
| 238 | /* For active balancing */ | 238 | /* For active balancing */ |
| 239 | int active_balance; | 239 | int active_balance; |
| 240 | int push_cpu; | 240 | int push_cpu; |
| 241 | int cpu; /* cpu of this runqueue */ | ||
| 241 | 242 | ||
| 242 | struct task_struct *migration_thread; | 243 | struct task_struct *migration_thread; |
| 243 | struct list_head migration_queue; | 244 | struct list_head migration_queue; |
| @@ -267,6 +268,15 @@ struct rq { | |||
| 267 | 268 | ||
| 268 | static DEFINE_PER_CPU(struct rq, runqueues); | 269 | static DEFINE_PER_CPU(struct rq, runqueues); |
| 269 | 270 | ||
| 271 | static inline int cpu_of(struct rq *rq) | ||
| 272 | { | ||
| 273 | #ifdef CONFIG_SMP | ||
| 274 | return rq->cpu; | ||
| 275 | #else | ||
| 276 | return 0; | ||
| 277 | #endif | ||
| 278 | } | ||
| 279 | |||
| 270 | /* | 280 | /* |
| 271 | * The domain tree (rq->sd) is protected by RCU's quiescent state transition. | 281 | * The domain tree (rq->sd) is protected by RCU's quiescent state transition. |
| 272 | * See detach_destroy_domains: synchronize_sched for details. | 282 | * See detach_destroy_domains: synchronize_sched for details. |
| @@ -1745,27 +1755,27 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
| 1745 | __releases(rq->lock) | 1755 | __releases(rq->lock) |
| 1746 | { | 1756 | { |
| 1747 | struct mm_struct *mm = rq->prev_mm; | 1757 | struct mm_struct *mm = rq->prev_mm; |
| 1748 | unsigned long prev_task_flags; | 1758 | long prev_state; |
| 1749 | 1759 | ||
| 1750 | rq->prev_mm = NULL; | 1760 | rq->prev_mm = NULL; |
| 1751 | 1761 | ||
| 1752 | /* | 1762 | /* |
| 1753 | * A task struct has one reference for the use as "current". | 1763 | * A task struct has one reference for the use as "current". |
| 1754 | * If a task dies, then it sets EXIT_ZOMBIE in tsk->exit_state and | 1764 | * If a task dies, then it sets TASK_DEAD in tsk->state and calls |
| 1755 | * calls schedule one last time. The schedule call will never return, | 1765 | * schedule one last time. The schedule call will never return, and |
| 1756 | * and the scheduled task must drop that reference. | 1766 | * the scheduled task must drop that reference. |
| 1757 | * The test for EXIT_ZOMBIE must occur while the runqueue locks are | 1767 | * The test for TASK_DEAD must occur while the runqueue locks are |
| 1758 | * still held, otherwise prev could be scheduled on another cpu, die | 1768 | * still held, otherwise prev could be scheduled on another cpu, die |
| 1759 | * there before we look at prev->state, and then the reference would | 1769 | * there before we look at prev->state, and then the reference would |
| 1760 | * be dropped twice. | 1770 | * be dropped twice. |
| 1761 | * Manfred Spraul <manfred@colorfullife.com> | 1771 | * Manfred Spraul <manfred@colorfullife.com> |
| 1762 | */ | 1772 | */ |
| 1763 | prev_task_flags = prev->flags; | 1773 | prev_state = prev->state; |
| 1764 | finish_arch_switch(prev); | 1774 | finish_arch_switch(prev); |
| 1765 | finish_lock_switch(rq, prev); | 1775 | finish_lock_switch(rq, prev); |
| 1766 | if (mm) | 1776 | if (mm) |
| 1767 | mmdrop(mm); | 1777 | mmdrop(mm); |
| 1768 | if (unlikely(prev_task_flags & PF_DEAD)) { | 1778 | if (unlikely(prev_state == TASK_DEAD)) { |
| 1769 | /* | 1779 | /* |
| 1770 | * Remove function-return probe instances associated with this | 1780 | * Remove function-return probe instances associated with this |
| 1771 | * task and put them back on the free list. | 1781 | * task and put them back on the free list. |
| @@ -2211,7 +2221,8 @@ out: | |||
| 2211 | */ | 2221 | */ |
| 2212 | static struct sched_group * | 2222 | static struct sched_group * |
| 2213 | find_busiest_group(struct sched_domain *sd, int this_cpu, | 2223 | find_busiest_group(struct sched_domain *sd, int this_cpu, |
| 2214 | unsigned long *imbalance, enum idle_type idle, int *sd_idle) | 2224 | unsigned long *imbalance, enum idle_type idle, int *sd_idle, |
| 2225 | cpumask_t *cpus) | ||
| 2215 | { | 2226 | { |
| 2216 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; | 2227 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; |
| 2217 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; | 2228 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; |
| @@ -2248,7 +2259,12 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2248 | sum_weighted_load = sum_nr_running = avg_load = 0; | 2259 | sum_weighted_load = sum_nr_running = avg_load = 0; |
| 2249 | 2260 | ||
| 2250 | for_each_cpu_mask(i, group->cpumask) { | 2261 | for_each_cpu_mask(i, group->cpumask) { |
| 2251 | struct rq *rq = cpu_rq(i); | 2262 | struct rq *rq; |
| 2263 | |||
| 2264 | if (!cpu_isset(i, *cpus)) | ||
| 2265 | continue; | ||
| 2266 | |||
| 2267 | rq = cpu_rq(i); | ||
| 2252 | 2268 | ||
| 2253 | if (*sd_idle && !idle_cpu(i)) | 2269 | if (*sd_idle && !idle_cpu(i)) |
| 2254 | *sd_idle = 0; | 2270 | *sd_idle = 0; |
| @@ -2466,13 +2482,17 @@ ret: | |||
| 2466 | */ | 2482 | */ |
| 2467 | static struct rq * | 2483 | static struct rq * |
| 2468 | find_busiest_queue(struct sched_group *group, enum idle_type idle, | 2484 | find_busiest_queue(struct sched_group *group, enum idle_type idle, |
| 2469 | unsigned long imbalance) | 2485 | unsigned long imbalance, cpumask_t *cpus) |
| 2470 | { | 2486 | { |
| 2471 | struct rq *busiest = NULL, *rq; | 2487 | struct rq *busiest = NULL, *rq; |
| 2472 | unsigned long max_load = 0; | 2488 | unsigned long max_load = 0; |
| 2473 | int i; | 2489 | int i; |
| 2474 | 2490 | ||
| 2475 | for_each_cpu_mask(i, group->cpumask) { | 2491 | for_each_cpu_mask(i, group->cpumask) { |
| 2492 | |||
| 2493 | if (!cpu_isset(i, *cpus)) | ||
| 2494 | continue; | ||
| 2495 | |||
| 2476 | rq = cpu_rq(i); | 2496 | rq = cpu_rq(i); |
| 2477 | 2497 | ||
| 2478 | if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance) | 2498 | if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance) |
| @@ -2511,6 +2531,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
| 2511 | struct sched_group *group; | 2531 | struct sched_group *group; |
| 2512 | unsigned long imbalance; | 2532 | unsigned long imbalance; |
| 2513 | struct rq *busiest; | 2533 | struct rq *busiest; |
| 2534 | cpumask_t cpus = CPU_MASK_ALL; | ||
| 2514 | 2535 | ||
| 2515 | if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && | 2536 | if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && |
| 2516 | !sched_smt_power_savings) | 2537 | !sched_smt_power_savings) |
| @@ -2518,13 +2539,15 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
| 2518 | 2539 | ||
| 2519 | schedstat_inc(sd, lb_cnt[idle]); | 2540 | schedstat_inc(sd, lb_cnt[idle]); |
| 2520 | 2541 | ||
| 2521 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle); | 2542 | redo: |
| 2543 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, | ||
| 2544 | &cpus); | ||
| 2522 | if (!group) { | 2545 | if (!group) { |
| 2523 | schedstat_inc(sd, lb_nobusyg[idle]); | 2546 | schedstat_inc(sd, lb_nobusyg[idle]); |
| 2524 | goto out_balanced; | 2547 | goto out_balanced; |
| 2525 | } | 2548 | } |
| 2526 | 2549 | ||
| 2527 | busiest = find_busiest_queue(group, idle, imbalance); | 2550 | busiest = find_busiest_queue(group, idle, imbalance, &cpus); |
| 2528 | if (!busiest) { | 2551 | if (!busiest) { |
| 2529 | schedstat_inc(sd, lb_nobusyq[idle]); | 2552 | schedstat_inc(sd, lb_nobusyq[idle]); |
| 2530 | goto out_balanced; | 2553 | goto out_balanced; |
| @@ -2549,8 +2572,12 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
| 2549 | double_rq_unlock(this_rq, busiest); | 2572 | double_rq_unlock(this_rq, busiest); |
| 2550 | 2573 | ||
| 2551 | /* All tasks on this runqueue were pinned by CPU affinity */ | 2574 | /* All tasks on this runqueue were pinned by CPU affinity */ |
| 2552 | if (unlikely(all_pinned)) | 2575 | if (unlikely(all_pinned)) { |
| 2576 | cpu_clear(cpu_of(busiest), cpus); | ||
| 2577 | if (!cpus_empty(cpus)) | ||
| 2578 | goto redo; | ||
| 2553 | goto out_balanced; | 2579 | goto out_balanced; |
| 2580 | } | ||
| 2554 | } | 2581 | } |
| 2555 | 2582 | ||
| 2556 | if (!nr_moved) { | 2583 | if (!nr_moved) { |
| @@ -2639,18 +2666,22 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
| 2639 | unsigned long imbalance; | 2666 | unsigned long imbalance; |
| 2640 | int nr_moved = 0; | 2667 | int nr_moved = 0; |
| 2641 | int sd_idle = 0; | 2668 | int sd_idle = 0; |
| 2669 | cpumask_t cpus = CPU_MASK_ALL; | ||
| 2642 | 2670 | ||
| 2643 | if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) | 2671 | if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) |
| 2644 | sd_idle = 1; | 2672 | sd_idle = 1; |
| 2645 | 2673 | ||
| 2646 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); | 2674 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); |
| 2647 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, &sd_idle); | 2675 | redo: |
| 2676 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, | ||
| 2677 | &sd_idle, &cpus); | ||
| 2648 | if (!group) { | 2678 | if (!group) { |
| 2649 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); | 2679 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); |
| 2650 | goto out_balanced; | 2680 | goto out_balanced; |
| 2651 | } | 2681 | } |
| 2652 | 2682 | ||
| 2653 | busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance); | 2683 | busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance, |
| 2684 | &cpus); | ||
| 2654 | if (!busiest) { | 2685 | if (!busiest) { |
| 2655 | schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); | 2686 | schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); |
| 2656 | goto out_balanced; | 2687 | goto out_balanced; |
| @@ -2668,6 +2699,12 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
| 2668 | minus_1_or_zero(busiest->nr_running), | 2699 | minus_1_or_zero(busiest->nr_running), |
| 2669 | imbalance, sd, NEWLY_IDLE, NULL); | 2700 | imbalance, sd, NEWLY_IDLE, NULL); |
| 2670 | spin_unlock(&busiest->lock); | 2701 | spin_unlock(&busiest->lock); |
| 2702 | |||
| 2703 | if (!nr_moved) { | ||
| 2704 | cpu_clear(cpu_of(busiest), cpus); | ||
| 2705 | if (!cpus_empty(cpus)) | ||
| 2706 | goto redo; | ||
| 2707 | } | ||
| 2671 | } | 2708 | } |
| 2672 | 2709 | ||
| 2673 | if (!nr_moved) { | 2710 | if (!nr_moved) { |
| @@ -3311,9 +3348,6 @@ need_resched_nonpreemptible: | |||
| 3311 | 3348 | ||
| 3312 | spin_lock_irq(&rq->lock); | 3349 | spin_lock_irq(&rq->lock); |
| 3313 | 3350 | ||
| 3314 | if (unlikely(prev->flags & PF_DEAD)) | ||
| 3315 | prev->state = EXIT_DEAD; | ||
| 3316 | |||
| 3317 | switch_count = &prev->nivcsw; | 3351 | switch_count = &prev->nivcsw; |
| 3318 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { | 3352 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { |
| 3319 | switch_count = &prev->nvcsw; | 3353 | switch_count = &prev->nvcsw; |
| @@ -4043,6 +4077,8 @@ static void __setscheduler(struct task_struct *p, int policy, int prio) | |||
| 4043 | * @p: the task in question. | 4077 | * @p: the task in question. |
| 4044 | * @policy: new policy. | 4078 | * @policy: new policy. |
| 4045 | * @param: structure containing the new RT priority. | 4079 | * @param: structure containing the new RT priority. |
| 4080 | * | ||
| 4081 | * NOTE: the task may be already dead | ||
| 4046 | */ | 4082 | */ |
| 4047 | int sched_setscheduler(struct task_struct *p, int policy, | 4083 | int sched_setscheduler(struct task_struct *p, int policy, |
| 4048 | struct sched_param *param) | 4084 | struct sched_param *param) |
| @@ -4070,28 +4106,32 @@ recheck: | |||
| 4070 | (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || | 4106 | (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || |
| 4071 | (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) | 4107 | (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) |
| 4072 | return -EINVAL; | 4108 | return -EINVAL; |
| 4073 | if ((policy == SCHED_NORMAL || policy == SCHED_BATCH) | 4109 | if (is_rt_policy(policy) != (param->sched_priority != 0)) |
| 4074 | != (param->sched_priority == 0)) | ||
| 4075 | return -EINVAL; | 4110 | return -EINVAL; |
| 4076 | 4111 | ||
| 4077 | /* | 4112 | /* |
| 4078 | * Allow unprivileged RT tasks to decrease priority: | 4113 | * Allow unprivileged RT tasks to decrease priority: |
| 4079 | */ | 4114 | */ |
| 4080 | if (!capable(CAP_SYS_NICE)) { | 4115 | if (!capable(CAP_SYS_NICE)) { |
| 4081 | /* | 4116 | if (is_rt_policy(policy)) { |
| 4082 | * can't change policy, except between SCHED_NORMAL | 4117 | unsigned long rlim_rtprio; |
| 4083 | * and SCHED_BATCH: | 4118 | unsigned long flags; |
| 4084 | */ | 4119 | |
| 4085 | if (((policy != SCHED_NORMAL && p->policy != SCHED_BATCH) && | 4120 | if (!lock_task_sighand(p, &flags)) |
| 4086 | (policy != SCHED_BATCH && p->policy != SCHED_NORMAL)) && | 4121 | return -ESRCH; |
| 4087 | !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) | 4122 | rlim_rtprio = p->signal->rlim[RLIMIT_RTPRIO].rlim_cur; |
| 4088 | return -EPERM; | 4123 | unlock_task_sighand(p, &flags); |
| 4089 | /* can't increase priority */ | 4124 | |
| 4090 | if ((policy != SCHED_NORMAL && policy != SCHED_BATCH) && | 4125 | /* can't set/change the rt policy */ |
| 4091 | param->sched_priority > p->rt_priority && | 4126 | if (policy != p->policy && !rlim_rtprio) |
| 4092 | param->sched_priority > | 4127 | return -EPERM; |
| 4093 | p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) | 4128 | |
| 4094 | return -EPERM; | 4129 | /* can't increase priority */ |
| 4130 | if (param->sched_priority > p->rt_priority && | ||
| 4131 | param->sched_priority > rlim_rtprio) | ||
| 4132 | return -EPERM; | ||
| 4133 | } | ||
| 4134 | |||
| 4095 | /* can't change other user's priorities */ | 4135 | /* can't change other user's priorities */ |
| 4096 | if ((current->euid != p->euid) && | 4136 | if ((current->euid != p->euid) && |
| 4097 | (current->euid != p->uid)) | 4137 | (current->euid != p->uid)) |
| @@ -4156,14 +4196,13 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) | |||
| 4156 | return -EINVAL; | 4196 | return -EINVAL; |
| 4157 | if (copy_from_user(&lparam, param, sizeof(struct sched_param))) | 4197 | if (copy_from_user(&lparam, param, sizeof(struct sched_param))) |
| 4158 | return -EFAULT; | 4198 | return -EFAULT; |
| 4159 | read_lock_irq(&tasklist_lock); | 4199 | |
| 4200 | rcu_read_lock(); | ||
| 4201 | retval = -ESRCH; | ||
| 4160 | p = find_process_by_pid(pid); | 4202 | p = find_process_by_pid(pid); |
| 4161 | if (!p) { | 4203 | if (p != NULL) |
| 4162 | read_unlock_irq(&tasklist_lock); | 4204 | retval = sched_setscheduler(p, policy, &lparam); |
| 4163 | return -ESRCH; | 4205 | rcu_read_unlock(); |
| 4164 | } | ||
| 4165 | retval = sched_setscheduler(p, policy, &lparam); | ||
| 4166 | read_unlock_irq(&tasklist_lock); | ||
| 4167 | 4206 | ||
| 4168 | return retval; | 4207 | return retval; |
| 4169 | } | 4208 | } |
| @@ -5114,7 +5153,7 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) | |||
| 5114 | BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD); | 5153 | BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD); |
| 5115 | 5154 | ||
| 5116 | /* Cannot have done final schedule yet: would have vanished. */ | 5155 | /* Cannot have done final schedule yet: would have vanished. */ |
| 5117 | BUG_ON(p->flags & PF_DEAD); | 5156 | BUG_ON(p->state == TASK_DEAD); |
| 5118 | 5157 | ||
| 5119 | get_task_struct(p); | 5158 | get_task_struct(p); |
| 5120 | 5159 | ||
| @@ -5235,9 +5274,11 @@ static struct notifier_block __cpuinitdata migration_notifier = { | |||
| 5235 | int __init migration_init(void) | 5274 | int __init migration_init(void) |
| 5236 | { | 5275 | { |
| 5237 | void *cpu = (void *)(long)smp_processor_id(); | 5276 | void *cpu = (void *)(long)smp_processor_id(); |
| 5277 | int err; | ||
| 5238 | 5278 | ||
| 5239 | /* Start one for the boot CPU: */ | 5279 | /* Start one for the boot CPU: */ |
| 5240 | migration_call(&migration_notifier, CPU_UP_PREPARE, cpu); | 5280 | err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu); |
| 5281 | BUG_ON(err == NOTIFY_BAD); | ||
| 5241 | migration_call(&migration_notifier, CPU_ONLINE, cpu); | 5282 | migration_call(&migration_notifier, CPU_ONLINE, cpu); |
| 5242 | register_cpu_notifier(&migration_notifier); | 5283 | register_cpu_notifier(&migration_notifier); |
| 5243 | 5284 | ||
| @@ -6747,6 +6788,7 @@ void __init sched_init(void) | |||
| 6747 | rq->cpu_load[j] = 0; | 6788 | rq->cpu_load[j] = 0; |
| 6748 | rq->active_balance = 0; | 6789 | rq->active_balance = 0; |
| 6749 | rq->push_cpu = 0; | 6790 | rq->push_cpu = 0; |
| 6791 | rq->cpu = i; | ||
| 6750 | rq->migration_thread = NULL; | 6792 | rq->migration_thread = NULL; |
| 6751 | INIT_LIST_HEAD(&rq->migration_queue); | 6793 | INIT_LIST_HEAD(&rq->migration_queue); |
| 6752 | #endif | 6794 | #endif |
