diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 327 |
1 files changed, 162 insertions, 165 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index e8051bd59acb..4c15b1726196 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -562,16 +562,13 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
562 | static inline struct rq *__task_rq_lock(struct task_struct *p) | 562 | static inline struct rq *__task_rq_lock(struct task_struct *p) |
563 | __acquires(rq->lock) | 563 | __acquires(rq->lock) |
564 | { | 564 | { |
565 | struct rq *rq; | 565 | for (;;) { |
566 | 566 | struct rq *rq = task_rq(p); | |
567 | repeat_lock_task: | 567 | spin_lock(&rq->lock); |
568 | rq = task_rq(p); | 568 | if (likely(rq == task_rq(p))) |
569 | spin_lock(&rq->lock); | 569 | return rq; |
570 | if (unlikely(rq != task_rq(p))) { | ||
571 | spin_unlock(&rq->lock); | 570 | spin_unlock(&rq->lock); |
572 | goto repeat_lock_task; | ||
573 | } | 571 | } |
574 | return rq; | ||
575 | } | 572 | } |
576 | 573 | ||
577 | /* | 574 | /* |
@@ -584,15 +581,14 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | |||
584 | { | 581 | { |
585 | struct rq *rq; | 582 | struct rq *rq; |
586 | 583 | ||
587 | repeat_lock_task: | 584 | for (;;) { |
588 | local_irq_save(*flags); | 585 | local_irq_save(*flags); |
589 | rq = task_rq(p); | 586 | rq = task_rq(p); |
590 | spin_lock(&rq->lock); | 587 | spin_lock(&rq->lock); |
591 | if (unlikely(rq != task_rq(p))) { | 588 | if (likely(rq == task_rq(p))) |
589 | return rq; | ||
592 | spin_unlock_irqrestore(&rq->lock, *flags); | 590 | spin_unlock_irqrestore(&rq->lock, *flags); |
593 | goto repeat_lock_task; | ||
594 | } | 591 | } |
595 | return rq; | ||
596 | } | 592 | } |
597 | 593 | ||
598 | static void __task_rq_unlock(struct rq *rq) | 594 | static void __task_rq_unlock(struct rq *rq) |
@@ -1083,69 +1079,71 @@ void wait_task_inactive(struct task_struct *p) | |||
1083 | int running, on_rq; | 1079 | int running, on_rq; |
1084 | struct rq *rq; | 1080 | struct rq *rq; |
1085 | 1081 | ||
1086 | repeat: | 1082 | for (;;) { |
1087 | /* | 1083 | /* |
1088 | * We do the initial early heuristics without holding | 1084 | * We do the initial early heuristics without holding |
1089 | * any task-queue locks at all. We'll only try to get | 1085 | * any task-queue locks at all. We'll only try to get |
1090 | * the runqueue lock when things look like they will | 1086 | * the runqueue lock when things look like they will |
1091 | * work out! | 1087 | * work out! |
1092 | */ | 1088 | */ |
1093 | rq = task_rq(p); | 1089 | rq = task_rq(p); |
1094 | 1090 | ||
1095 | /* | 1091 | /* |
1096 | * If the task is actively running on another CPU | 1092 | * If the task is actively running on another CPU |
1097 | * still, just relax and busy-wait without holding | 1093 | * still, just relax and busy-wait without holding |
1098 | * any locks. | 1094 | * any locks. |
1099 | * | 1095 | * |
1100 | * NOTE! Since we don't hold any locks, it's not | 1096 | * NOTE! Since we don't hold any locks, it's not |
1101 | * even sure that "rq" stays as the right runqueue! | 1097 | * even sure that "rq" stays as the right runqueue! |
1102 | * But we don't care, since "task_running()" will | 1098 | * But we don't care, since "task_running()" will |
1103 | * return false if the runqueue has changed and p | 1099 | * return false if the runqueue has changed and p |
1104 | * is actually now running somewhere else! | 1100 | * is actually now running somewhere else! |
1105 | */ | 1101 | */ |
1106 | while (task_running(rq, p)) | 1102 | while (task_running(rq, p)) |
1107 | cpu_relax(); | 1103 | cpu_relax(); |
1108 | 1104 | ||
1109 | /* | 1105 | /* |
1110 | * Ok, time to look more closely! We need the rq | 1106 | * Ok, time to look more closely! We need the rq |
1111 | * lock now, to be *sure*. If we're wrong, we'll | 1107 | * lock now, to be *sure*. If we're wrong, we'll |
1112 | * just go back and repeat. | 1108 | * just go back and repeat. |
1113 | */ | 1109 | */ |
1114 | rq = task_rq_lock(p, &flags); | 1110 | rq = task_rq_lock(p, &flags); |
1115 | running = task_running(rq, p); | 1111 | running = task_running(rq, p); |
1116 | on_rq = p->se.on_rq; | 1112 | on_rq = p->se.on_rq; |
1117 | task_rq_unlock(rq, &flags); | 1113 | task_rq_unlock(rq, &flags); |
1118 | 1114 | ||
1119 | /* | 1115 | /* |
1120 | * Was it really running after all now that we | 1116 | * Was it really running after all now that we |
1121 | * checked with the proper locks actually held? | 1117 | * checked with the proper locks actually held? |
1122 | * | 1118 | * |
1123 | * Oops. Go back and try again.. | 1119 | * Oops. Go back and try again.. |
1124 | */ | 1120 | */ |
1125 | if (unlikely(running)) { | 1121 | if (unlikely(running)) { |
1126 | cpu_relax(); | 1122 | cpu_relax(); |
1127 | goto repeat; | 1123 | continue; |
1128 | } | 1124 | } |
1129 | 1125 | ||
1130 | /* | 1126 | /* |
1131 | * It's not enough that it's not actively running, | 1127 | * It's not enough that it's not actively running, |
1132 | * it must be off the runqueue _entirely_, and not | 1128 | * it must be off the runqueue _entirely_, and not |
1133 | * preempted! | 1129 | * preempted! |
1134 | * | 1130 | * |
1135 | * So if it wa still runnable (but just not actively | 1131 | * So if it wa still runnable (but just not actively |
1136 | * running right now), it's preempted, and we should | 1132 | * running right now), it's preempted, and we should |
1137 | * yield - it could be a while. | 1133 | * yield - it could be a while. |
1138 | */ | 1134 | */ |
1139 | if (unlikely(on_rq)) { | 1135 | if (unlikely(on_rq)) { |
1140 | schedule_timeout_uninterruptible(1); | 1136 | schedule_timeout_uninterruptible(1); |
1141 | goto repeat; | 1137 | continue; |
1142 | } | 1138 | } |
1143 | 1139 | ||
1144 | /* | 1140 | /* |
1145 | * Ahh, all good. It wasn't running, and it wasn't | 1141 | * Ahh, all good. It wasn't running, and it wasn't |
1146 | * runnable, which means that it will never become | 1142 | * runnable, which means that it will never become |
1147 | * running in the future either. We're all done! | 1143 | * running in the future either. We're all done! |
1148 | */ | 1144 | */ |
1145 | break; | ||
1146 | } | ||
1149 | } | 1147 | } |
1150 | 1148 | ||
1151 | /*** | 1149 | /*** |
@@ -1236,7 +1234,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | |||
1236 | 1234 | ||
1237 | /* Skip over this group if it has no CPUs allowed */ | 1235 | /* Skip over this group if it has no CPUs allowed */ |
1238 | if (!cpus_intersects(group->cpumask, p->cpus_allowed)) | 1236 | if (!cpus_intersects(group->cpumask, p->cpus_allowed)) |
1239 | goto nextgroup; | 1237 | continue; |
1240 | 1238 | ||
1241 | local_group = cpu_isset(this_cpu, group->cpumask); | 1239 | local_group = cpu_isset(this_cpu, group->cpumask); |
1242 | 1240 | ||
@@ -1264,9 +1262,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | |||
1264 | min_load = avg_load; | 1262 | min_load = avg_load; |
1265 | idlest = group; | 1263 | idlest = group; |
1266 | } | 1264 | } |
1267 | nextgroup: | 1265 | } while (group = group->next, group != sd->groups); |
1268 | group = group->next; | ||
1269 | } while (group != sd->groups); | ||
1270 | 1266 | ||
1271 | if (!idlest || 100*this_load < imbalance*min_load) | 1267 | if (!idlest || 100*this_load < imbalance*min_load) |
1272 | return NULL; | 1268 | return NULL; |
@@ -3517,27 +3513,30 @@ asmlinkage void __sched preempt_schedule(void) | |||
3517 | if (likely(ti->preempt_count || irqs_disabled())) | 3513 | if (likely(ti->preempt_count || irqs_disabled())) |
3518 | return; | 3514 | return; |
3519 | 3515 | ||
3520 | need_resched: | 3516 | do { |
3521 | add_preempt_count(PREEMPT_ACTIVE); | 3517 | add_preempt_count(PREEMPT_ACTIVE); |
3522 | /* | 3518 | |
3523 | * We keep the big kernel semaphore locked, but we | 3519 | /* |
3524 | * clear ->lock_depth so that schedule() doesnt | 3520 | * We keep the big kernel semaphore locked, but we |
3525 | * auto-release the semaphore: | 3521 | * clear ->lock_depth so that schedule() doesnt |
3526 | */ | 3522 | * auto-release the semaphore: |
3523 | */ | ||
3527 | #ifdef CONFIG_PREEMPT_BKL | 3524 | #ifdef CONFIG_PREEMPT_BKL |
3528 | saved_lock_depth = task->lock_depth; | 3525 | saved_lock_depth = task->lock_depth; |
3529 | task->lock_depth = -1; | 3526 | task->lock_depth = -1; |
3530 | #endif | 3527 | #endif |
3531 | schedule(); | 3528 | schedule(); |
3532 | #ifdef CONFIG_PREEMPT_BKL | 3529 | #ifdef CONFIG_PREEMPT_BKL |
3533 | task->lock_depth = saved_lock_depth; | 3530 | task->lock_depth = saved_lock_depth; |
3534 | #endif | 3531 | #endif |
3535 | sub_preempt_count(PREEMPT_ACTIVE); | 3532 | sub_preempt_count(PREEMPT_ACTIVE); |
3536 | 3533 | ||
3537 | /* we could miss a preemption opportunity between schedule and now */ | 3534 | /* |
3538 | barrier(); | 3535 | * Check again in case we missed a preemption opportunity |
3539 | if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) | 3536 | * between schedule and now. |
3540 | goto need_resched; | 3537 | */ |
3538 | barrier(); | ||
3539 | } while (unlikely(test_thread_flag(TIF_NEED_RESCHED))); | ||
3541 | } | 3540 | } |
3542 | EXPORT_SYMBOL(preempt_schedule); | 3541 | EXPORT_SYMBOL(preempt_schedule); |
3543 | 3542 | ||
@@ -3557,29 +3556,32 @@ asmlinkage void __sched preempt_schedule_irq(void) | |||
3557 | /* Catch callers which need to be fixed */ | 3556 | /* Catch callers which need to be fixed */ |
3558 | BUG_ON(ti->preempt_count || !irqs_disabled()); | 3557 | BUG_ON(ti->preempt_count || !irqs_disabled()); |
3559 | 3558 | ||
3560 | need_resched: | 3559 | do { |
3561 | add_preempt_count(PREEMPT_ACTIVE); | 3560 | add_preempt_count(PREEMPT_ACTIVE); |
3562 | /* | 3561 | |
3563 | * We keep the big kernel semaphore locked, but we | 3562 | /* |
3564 | * clear ->lock_depth so that schedule() doesnt | 3563 | * We keep the big kernel semaphore locked, but we |
3565 | * auto-release the semaphore: | 3564 | * clear ->lock_depth so that schedule() doesnt |
3566 | */ | 3565 | * auto-release the semaphore: |
3566 | */ | ||
3567 | #ifdef CONFIG_PREEMPT_BKL | 3567 | #ifdef CONFIG_PREEMPT_BKL |
3568 | saved_lock_depth = task->lock_depth; | 3568 | saved_lock_depth = task->lock_depth; |
3569 | task->lock_depth = -1; | 3569 | task->lock_depth = -1; |
3570 | #endif | 3570 | #endif |
3571 | local_irq_enable(); | 3571 | local_irq_enable(); |
3572 | schedule(); | 3572 | schedule(); |
3573 | local_irq_disable(); | 3573 | local_irq_disable(); |
3574 | #ifdef CONFIG_PREEMPT_BKL | 3574 | #ifdef CONFIG_PREEMPT_BKL |
3575 | task->lock_depth = saved_lock_depth; | 3575 | task->lock_depth = saved_lock_depth; |
3576 | #endif | 3576 | #endif |
3577 | sub_preempt_count(PREEMPT_ACTIVE); | 3577 | sub_preempt_count(PREEMPT_ACTIVE); |
3578 | 3578 | ||
3579 | /* we could miss a preemption opportunity between schedule and now */ | 3579 | /* |
3580 | barrier(); | 3580 | * Check again in case we missed a preemption opportunity |
3581 | if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) | 3581 | * between schedule and now. |
3582 | goto need_resched; | 3582 | */ |
3583 | barrier(); | ||
3584 | } while (unlikely(test_thread_flag(TIF_NEED_RESCHED))); | ||
3583 | } | 3585 | } |
3584 | 3586 | ||
3585 | #endif /* CONFIG_PREEMPT */ | 3587 | #endif /* CONFIG_PREEMPT */ |
@@ -4324,10 +4326,10 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) | |||
4324 | asmlinkage long sys_sched_getscheduler(pid_t pid) | 4326 | asmlinkage long sys_sched_getscheduler(pid_t pid) |
4325 | { | 4327 | { |
4326 | struct task_struct *p; | 4328 | struct task_struct *p; |
4327 | int retval = -EINVAL; | 4329 | int retval; |
4328 | 4330 | ||
4329 | if (pid < 0) | 4331 | if (pid < 0) |
4330 | goto out_nounlock; | 4332 | return -EINVAL; |
4331 | 4333 | ||
4332 | retval = -ESRCH; | 4334 | retval = -ESRCH; |
4333 | read_lock(&tasklist_lock); | 4335 | read_lock(&tasklist_lock); |
@@ -4338,8 +4340,6 @@ asmlinkage long sys_sched_getscheduler(pid_t pid) | |||
4338 | retval = p->policy; | 4340 | retval = p->policy; |
4339 | } | 4341 | } |
4340 | read_unlock(&tasklist_lock); | 4342 | read_unlock(&tasklist_lock); |
4341 | |||
4342 | out_nounlock: | ||
4343 | return retval; | 4343 | return retval; |
4344 | } | 4344 | } |
4345 | 4345 | ||
@@ -4352,10 +4352,10 @@ asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) | |||
4352 | { | 4352 | { |
4353 | struct sched_param lp; | 4353 | struct sched_param lp; |
4354 | struct task_struct *p; | 4354 | struct task_struct *p; |
4355 | int retval = -EINVAL; | 4355 | int retval; |
4356 | 4356 | ||
4357 | if (!param || pid < 0) | 4357 | if (!param || pid < 0) |
4358 | goto out_nounlock; | 4358 | return -EINVAL; |
4359 | 4359 | ||
4360 | read_lock(&tasklist_lock); | 4360 | read_lock(&tasklist_lock); |
4361 | p = find_process_by_pid(pid); | 4361 | p = find_process_by_pid(pid); |
@@ -4375,7 +4375,6 @@ asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) | |||
4375 | */ | 4375 | */ |
4376 | retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0; | 4376 | retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0; |
4377 | 4377 | ||
4378 | out_nounlock: | ||
4379 | return retval; | 4378 | return retval; |
4380 | 4379 | ||
4381 | out_unlock: | 4380 | out_unlock: |
@@ -4731,11 +4730,11 @@ long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) | |||
4731 | { | 4730 | { |
4732 | struct task_struct *p; | 4731 | struct task_struct *p; |
4733 | unsigned int time_slice; | 4732 | unsigned int time_slice; |
4734 | int retval = -EINVAL; | 4733 | int retval; |
4735 | struct timespec t; | 4734 | struct timespec t; |
4736 | 4735 | ||
4737 | if (pid < 0) | 4736 | if (pid < 0) |
4738 | goto out_nounlock; | 4737 | return -EINVAL; |
4739 | 4738 | ||
4740 | retval = -ESRCH; | 4739 | retval = -ESRCH; |
4741 | read_lock(&tasklist_lock); | 4740 | read_lock(&tasklist_lock); |
@@ -4763,8 +4762,8 @@ long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) | |||
4763 | read_unlock(&tasklist_lock); | 4762 | read_unlock(&tasklist_lock); |
4764 | jiffies_to_timespec(time_slice, &t); | 4763 | jiffies_to_timespec(time_slice, &t); |
4765 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; | 4764 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; |
4766 | out_nounlock: | ||
4767 | return retval; | 4765 | return retval; |
4766 | |||
4768 | out_unlock: | 4767 | out_unlock: |
4769 | read_unlock(&tasklist_lock); | 4768 | read_unlock(&tasklist_lock); |
4770 | return retval; | 4769 | return retval; |
@@ -5070,35 +5069,34 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | |||
5070 | struct rq *rq; | 5069 | struct rq *rq; |
5071 | int dest_cpu; | 5070 | int dest_cpu; |
5072 | 5071 | ||
5073 | restart: | 5072 | do { |
5074 | /* On same node? */ | 5073 | /* On same node? */ |
5075 | mask = node_to_cpumask(cpu_to_node(dead_cpu)); | 5074 | mask = node_to_cpumask(cpu_to_node(dead_cpu)); |
5076 | cpus_and(mask, mask, p->cpus_allowed); | 5075 | cpus_and(mask, mask, p->cpus_allowed); |
5077 | dest_cpu = any_online_cpu(mask); | 5076 | dest_cpu = any_online_cpu(mask); |
5078 | 5077 | ||
5079 | /* On any allowed CPU? */ | 5078 | /* On any allowed CPU? */ |
5080 | if (dest_cpu == NR_CPUS) | 5079 | if (dest_cpu == NR_CPUS) |
5081 | dest_cpu = any_online_cpu(p->cpus_allowed); | 5080 | dest_cpu = any_online_cpu(p->cpus_allowed); |
5082 | 5081 | ||
5083 | /* No more Mr. Nice Guy. */ | 5082 | /* No more Mr. Nice Guy. */ |
5084 | if (dest_cpu == NR_CPUS) { | 5083 | if (dest_cpu == NR_CPUS) { |
5085 | rq = task_rq_lock(p, &flags); | 5084 | rq = task_rq_lock(p, &flags); |
5086 | cpus_setall(p->cpus_allowed); | 5085 | cpus_setall(p->cpus_allowed); |
5087 | dest_cpu = any_online_cpu(p->cpus_allowed); | 5086 | dest_cpu = any_online_cpu(p->cpus_allowed); |
5088 | task_rq_unlock(rq, &flags); | 5087 | task_rq_unlock(rq, &flags); |
5089 | 5088 | ||
5090 | /* | 5089 | /* |
5091 | * Don't tell them about moving exiting tasks or | 5090 | * Don't tell them about moving exiting tasks or |
5092 | * kernel threads (both mm NULL), since they never | 5091 | * kernel threads (both mm NULL), since they never |
5093 | * leave kernel. | 5092 | * leave kernel. |
5094 | */ | 5093 | */ |
5095 | if (p->mm && printk_ratelimit()) | 5094 | if (p->mm && printk_ratelimit()) |
5096 | printk(KERN_INFO "process %d (%s) no " | 5095 | printk(KERN_INFO "process %d (%s) no " |
5097 | "longer affine to cpu%d\n", | 5096 | "longer affine to cpu%d\n", |
5098 | p->pid, p->comm, dead_cpu); | 5097 | p->pid, p->comm, dead_cpu); |
5099 | } | 5098 | } |
5100 | if (!__migrate_task(p, dead_cpu, dest_cpu)) | 5099 | } while (!__migrate_task(p, dead_cpu, dest_cpu)); |
5101 | goto restart; | ||
5102 | } | 5100 | } |
5103 | 5101 | ||
5104 | /* | 5102 | /* |
@@ -5913,24 +5911,23 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) | |||
5913 | 5911 | ||
5914 | if (!sg) | 5912 | if (!sg) |
5915 | return; | 5913 | return; |
5916 | next_sg: | 5914 | do { |
5917 | for_each_cpu_mask(j, sg->cpumask) { | 5915 | for_each_cpu_mask(j, sg->cpumask) { |
5918 | struct sched_domain *sd; | 5916 | struct sched_domain *sd; |
5919 | 5917 | ||
5920 | sd = &per_cpu(phys_domains, j); | 5918 | sd = &per_cpu(phys_domains, j); |
5921 | if (j != first_cpu(sd->groups->cpumask)) { | 5919 | if (j != first_cpu(sd->groups->cpumask)) { |
5922 | /* | 5920 | /* |
5923 | * Only add "power" once for each | 5921 | * Only add "power" once for each |
5924 | * physical package. | 5922 | * physical package. |
5925 | */ | 5923 | */ |
5926 | continue; | 5924 | continue; |
5927 | } | 5925 | } |
5928 | 5926 | ||
5929 | sg_inc_cpu_power(sg, sd->groups->__cpu_power); | 5927 | sg_inc_cpu_power(sg, sd->groups->__cpu_power); |
5930 | } | 5928 | } |
5931 | sg = sg->next; | 5929 | sg = sg->next; |
5932 | if (sg != group_head) | 5930 | } while (sg != group_head); |
5933 | goto next_sg; | ||
5934 | } | 5931 | } |
5935 | #endif | 5932 | #endif |
5936 | 5933 | ||