aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c327
1 files changed, 162 insertions, 165 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index e8051bd59acb..4c15b1726196 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -562,16 +562,13 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
562static inline struct rq *__task_rq_lock(struct task_struct *p) 562static inline struct rq *__task_rq_lock(struct task_struct *p)
563 __acquires(rq->lock) 563 __acquires(rq->lock)
564{ 564{
565 struct rq *rq; 565 for (;;) {
566 566 struct rq *rq = task_rq(p);
567repeat_lock_task: 567 spin_lock(&rq->lock);
568 rq = task_rq(p); 568 if (likely(rq == task_rq(p)))
569 spin_lock(&rq->lock); 569 return rq;
570 if (unlikely(rq != task_rq(p))) {
571 spin_unlock(&rq->lock); 570 spin_unlock(&rq->lock);
572 goto repeat_lock_task;
573 } 571 }
574 return rq;
575} 572}
576 573
577/* 574/*
@@ -584,15 +581,14 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
584{ 581{
585 struct rq *rq; 582 struct rq *rq;
586 583
587repeat_lock_task: 584 for (;;) {
588 local_irq_save(*flags); 585 local_irq_save(*flags);
589 rq = task_rq(p); 586 rq = task_rq(p);
590 spin_lock(&rq->lock); 587 spin_lock(&rq->lock);
591 if (unlikely(rq != task_rq(p))) { 588 if (likely(rq == task_rq(p)))
589 return rq;
592 spin_unlock_irqrestore(&rq->lock, *flags); 590 spin_unlock_irqrestore(&rq->lock, *flags);
593 goto repeat_lock_task;
594 } 591 }
595 return rq;
596} 592}
597 593
598static void __task_rq_unlock(struct rq *rq) 594static void __task_rq_unlock(struct rq *rq)
@@ -1083,69 +1079,71 @@ void wait_task_inactive(struct task_struct *p)
1083 int running, on_rq; 1079 int running, on_rq;
1084 struct rq *rq; 1080 struct rq *rq;
1085 1081
1086repeat: 1082 for (;;) {
1087 /* 1083 /*
1088 * We do the initial early heuristics without holding 1084 * We do the initial early heuristics without holding
1089 * any task-queue locks at all. We'll only try to get 1085 * any task-queue locks at all. We'll only try to get
1090 * the runqueue lock when things look like they will 1086 * the runqueue lock when things look like they will
1091 * work out! 1087 * work out!
1092 */ 1088 */
1093 rq = task_rq(p); 1089 rq = task_rq(p);
1094 1090
1095 /* 1091 /*
1096 * If the task is actively running on another CPU 1092 * If the task is actively running on another CPU
1097 * still, just relax and busy-wait without holding 1093 * still, just relax and busy-wait without holding
1098 * any locks. 1094 * any locks.
1099 * 1095 *
1100 * NOTE! Since we don't hold any locks, it's not 1096 * NOTE! Since we don't hold any locks, it's not
1101 * even sure that "rq" stays as the right runqueue! 1097 * even sure that "rq" stays as the right runqueue!
1102 * But we don't care, since "task_running()" will 1098 * But we don't care, since "task_running()" will
1103 * return false if the runqueue has changed and p 1099 * return false if the runqueue has changed and p
1104 * is actually now running somewhere else! 1100 * is actually now running somewhere else!
1105 */ 1101 */
1106 while (task_running(rq, p)) 1102 while (task_running(rq, p))
1107 cpu_relax(); 1103 cpu_relax();
1108 1104
1109 /* 1105 /*
1110 * Ok, time to look more closely! We need the rq 1106 * Ok, time to look more closely! We need the rq
1111 * lock now, to be *sure*. If we're wrong, we'll 1107 * lock now, to be *sure*. If we're wrong, we'll
1112 * just go back and repeat. 1108 * just go back and repeat.
1113 */ 1109 */
1114 rq = task_rq_lock(p, &flags); 1110 rq = task_rq_lock(p, &flags);
1115 running = task_running(rq, p); 1111 running = task_running(rq, p);
1116 on_rq = p->se.on_rq; 1112 on_rq = p->se.on_rq;
1117 task_rq_unlock(rq, &flags); 1113 task_rq_unlock(rq, &flags);
1118 1114
1119 /* 1115 /*
1120 * Was it really running after all now that we 1116 * Was it really running after all now that we
1121 * checked with the proper locks actually held? 1117 * checked with the proper locks actually held?
1122 * 1118 *
1123 * Oops. Go back and try again.. 1119 * Oops. Go back and try again..
1124 */ 1120 */
1125 if (unlikely(running)) { 1121 if (unlikely(running)) {
1126 cpu_relax(); 1122 cpu_relax();
1127 goto repeat; 1123 continue;
1128 } 1124 }
1129 1125
1130 /* 1126 /*
1131 * It's not enough that it's not actively running, 1127 * It's not enough that it's not actively running,
1132 * it must be off the runqueue _entirely_, and not 1128 * it must be off the runqueue _entirely_, and not
1133 * preempted! 1129 * preempted!
1134 * 1130 *
1135 * So if it wa still runnable (but just not actively 1131 * So if it wa still runnable (but just not actively
1136 * running right now), it's preempted, and we should 1132 * running right now), it's preempted, and we should
1137 * yield - it could be a while. 1133 * yield - it could be a while.
1138 */ 1134 */
1139 if (unlikely(on_rq)) { 1135 if (unlikely(on_rq)) {
1140 schedule_timeout_uninterruptible(1); 1136 schedule_timeout_uninterruptible(1);
1141 goto repeat; 1137 continue;
1142 } 1138 }
1143 1139
1144 /* 1140 /*
1145 * Ahh, all good. It wasn't running, and it wasn't 1141 * Ahh, all good. It wasn't running, and it wasn't
1146 * runnable, which means that it will never become 1142 * runnable, which means that it will never become
1147 * running in the future either. We're all done! 1143 * running in the future either. We're all done!
1148 */ 1144 */
1145 break;
1146 }
1149} 1147}
1150 1148
1151/*** 1149/***
@@ -1236,7 +1234,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
1236 1234
1237 /* Skip over this group if it has no CPUs allowed */ 1235 /* Skip over this group if it has no CPUs allowed */
1238 if (!cpus_intersects(group->cpumask, p->cpus_allowed)) 1236 if (!cpus_intersects(group->cpumask, p->cpus_allowed))
1239 goto nextgroup; 1237 continue;
1240 1238
1241 local_group = cpu_isset(this_cpu, group->cpumask); 1239 local_group = cpu_isset(this_cpu, group->cpumask);
1242 1240
@@ -1264,9 +1262,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
1264 min_load = avg_load; 1262 min_load = avg_load;
1265 idlest = group; 1263 idlest = group;
1266 } 1264 }
1267nextgroup: 1265 } while (group = group->next, group != sd->groups);
1268 group = group->next;
1269 } while (group != sd->groups);
1270 1266
1271 if (!idlest || 100*this_load < imbalance*min_load) 1267 if (!idlest || 100*this_load < imbalance*min_load)
1272 return NULL; 1268 return NULL;
@@ -3517,27 +3513,30 @@ asmlinkage void __sched preempt_schedule(void)
3517 if (likely(ti->preempt_count || irqs_disabled())) 3513 if (likely(ti->preempt_count || irqs_disabled()))
3518 return; 3514 return;
3519 3515
3520need_resched: 3516 do {
3521 add_preempt_count(PREEMPT_ACTIVE); 3517 add_preempt_count(PREEMPT_ACTIVE);
3522 /* 3518
3523 * We keep the big kernel semaphore locked, but we 3519 /*
3524 * clear ->lock_depth so that schedule() doesnt 3520 * We keep the big kernel semaphore locked, but we
3525 * auto-release the semaphore: 3521 * clear ->lock_depth so that schedule() doesnt
3526 */ 3522 * auto-release the semaphore:
3523 */
3527#ifdef CONFIG_PREEMPT_BKL 3524#ifdef CONFIG_PREEMPT_BKL
3528 saved_lock_depth = task->lock_depth; 3525 saved_lock_depth = task->lock_depth;
3529 task->lock_depth = -1; 3526 task->lock_depth = -1;
3530#endif 3527#endif
3531 schedule(); 3528 schedule();
3532#ifdef CONFIG_PREEMPT_BKL 3529#ifdef CONFIG_PREEMPT_BKL
3533 task->lock_depth = saved_lock_depth; 3530 task->lock_depth = saved_lock_depth;
3534#endif 3531#endif
3535 sub_preempt_count(PREEMPT_ACTIVE); 3532 sub_preempt_count(PREEMPT_ACTIVE);
3536 3533
3537 /* we could miss a preemption opportunity between schedule and now */ 3534 /*
3538 barrier(); 3535 * Check again in case we missed a preemption opportunity
3539 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 3536 * between schedule and now.
3540 goto need_resched; 3537 */
3538 barrier();
3539 } while (unlikely(test_thread_flag(TIF_NEED_RESCHED)));
3541} 3540}
3542EXPORT_SYMBOL(preempt_schedule); 3541EXPORT_SYMBOL(preempt_schedule);
3543 3542
@@ -3557,29 +3556,32 @@ asmlinkage void __sched preempt_schedule_irq(void)
3557 /* Catch callers which need to be fixed */ 3556 /* Catch callers which need to be fixed */
3558 BUG_ON(ti->preempt_count || !irqs_disabled()); 3557 BUG_ON(ti->preempt_count || !irqs_disabled());
3559 3558
3560need_resched: 3559 do {
3561 add_preempt_count(PREEMPT_ACTIVE); 3560 add_preempt_count(PREEMPT_ACTIVE);
3562 /* 3561
3563 * We keep the big kernel semaphore locked, but we 3562 /*
3564 * clear ->lock_depth so that schedule() doesnt 3563 * We keep the big kernel semaphore locked, but we
3565 * auto-release the semaphore: 3564 * clear ->lock_depth so that schedule() doesnt
3566 */ 3565 * auto-release the semaphore:
3566 */
3567#ifdef CONFIG_PREEMPT_BKL 3567#ifdef CONFIG_PREEMPT_BKL
3568 saved_lock_depth = task->lock_depth; 3568 saved_lock_depth = task->lock_depth;
3569 task->lock_depth = -1; 3569 task->lock_depth = -1;
3570#endif 3570#endif
3571 local_irq_enable(); 3571 local_irq_enable();
3572 schedule(); 3572 schedule();
3573 local_irq_disable(); 3573 local_irq_disable();
3574#ifdef CONFIG_PREEMPT_BKL 3574#ifdef CONFIG_PREEMPT_BKL
3575 task->lock_depth = saved_lock_depth; 3575 task->lock_depth = saved_lock_depth;
3576#endif 3576#endif
3577 sub_preempt_count(PREEMPT_ACTIVE); 3577 sub_preempt_count(PREEMPT_ACTIVE);
3578 3578
3579 /* we could miss a preemption opportunity between schedule and now */ 3579 /*
3580 barrier(); 3580 * Check again in case we missed a preemption opportunity
3581 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 3581 * between schedule and now.
3582 goto need_resched; 3582 */
3583 barrier();
3584 } while (unlikely(test_thread_flag(TIF_NEED_RESCHED)));
3583} 3585}
3584 3586
3585#endif /* CONFIG_PREEMPT */ 3587#endif /* CONFIG_PREEMPT */
@@ -4324,10 +4326,10 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
4324asmlinkage long sys_sched_getscheduler(pid_t pid) 4326asmlinkage long sys_sched_getscheduler(pid_t pid)
4325{ 4327{
4326 struct task_struct *p; 4328 struct task_struct *p;
4327 int retval = -EINVAL; 4329 int retval;
4328 4330
4329 if (pid < 0) 4331 if (pid < 0)
4330 goto out_nounlock; 4332 return -EINVAL;
4331 4333
4332 retval = -ESRCH; 4334 retval = -ESRCH;
4333 read_lock(&tasklist_lock); 4335 read_lock(&tasklist_lock);
@@ -4338,8 +4340,6 @@ asmlinkage long sys_sched_getscheduler(pid_t pid)
4338 retval = p->policy; 4340 retval = p->policy;
4339 } 4341 }
4340 read_unlock(&tasklist_lock); 4342 read_unlock(&tasklist_lock);
4341
4342out_nounlock:
4343 return retval; 4343 return retval;
4344} 4344}
4345 4345
@@ -4352,10 +4352,10 @@ asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param)
4352{ 4352{
4353 struct sched_param lp; 4353 struct sched_param lp;
4354 struct task_struct *p; 4354 struct task_struct *p;
4355 int retval = -EINVAL; 4355 int retval;
4356 4356
4357 if (!param || pid < 0) 4357 if (!param || pid < 0)
4358 goto out_nounlock; 4358 return -EINVAL;
4359 4359
4360 read_lock(&tasklist_lock); 4360 read_lock(&tasklist_lock);
4361 p = find_process_by_pid(pid); 4361 p = find_process_by_pid(pid);
@@ -4375,7 +4375,6 @@ asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param)
4375 */ 4375 */
4376 retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0; 4376 retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
4377 4377
4378out_nounlock:
4379 return retval; 4378 return retval;
4380 4379
4381out_unlock: 4380out_unlock:
@@ -4731,11 +4730,11 @@ long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
4731{ 4730{
4732 struct task_struct *p; 4731 struct task_struct *p;
4733 unsigned int time_slice; 4732 unsigned int time_slice;
4734 int retval = -EINVAL; 4733 int retval;
4735 struct timespec t; 4734 struct timespec t;
4736 4735
4737 if (pid < 0) 4736 if (pid < 0)
4738 goto out_nounlock; 4737 return -EINVAL;
4739 4738
4740 retval = -ESRCH; 4739 retval = -ESRCH;
4741 read_lock(&tasklist_lock); 4740 read_lock(&tasklist_lock);
@@ -4763,8 +4762,8 @@ long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
4763 read_unlock(&tasklist_lock); 4762 read_unlock(&tasklist_lock);
4764 jiffies_to_timespec(time_slice, &t); 4763 jiffies_to_timespec(time_slice, &t);
4765 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; 4764 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
4766out_nounlock:
4767 return retval; 4765 return retval;
4766
4768out_unlock: 4767out_unlock:
4769 read_unlock(&tasklist_lock); 4768 read_unlock(&tasklist_lock);
4770 return retval; 4769 return retval;
@@ -5070,35 +5069,34 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
5070 struct rq *rq; 5069 struct rq *rq;
5071 int dest_cpu; 5070 int dest_cpu;
5072 5071
5073restart: 5072 do {
5074 /* On same node? */ 5073 /* On same node? */
5075 mask = node_to_cpumask(cpu_to_node(dead_cpu)); 5074 mask = node_to_cpumask(cpu_to_node(dead_cpu));
5076 cpus_and(mask, mask, p->cpus_allowed); 5075 cpus_and(mask, mask, p->cpus_allowed);
5077 dest_cpu = any_online_cpu(mask); 5076 dest_cpu = any_online_cpu(mask);
5078 5077
5079 /* On any allowed CPU? */ 5078 /* On any allowed CPU? */
5080 if (dest_cpu == NR_CPUS) 5079 if (dest_cpu == NR_CPUS)
5081 dest_cpu = any_online_cpu(p->cpus_allowed); 5080 dest_cpu = any_online_cpu(p->cpus_allowed);
5082 5081
5083 /* No more Mr. Nice Guy. */ 5082 /* No more Mr. Nice Guy. */
5084 if (dest_cpu == NR_CPUS) { 5083 if (dest_cpu == NR_CPUS) {
5085 rq = task_rq_lock(p, &flags); 5084 rq = task_rq_lock(p, &flags);
5086 cpus_setall(p->cpus_allowed); 5085 cpus_setall(p->cpus_allowed);
5087 dest_cpu = any_online_cpu(p->cpus_allowed); 5086 dest_cpu = any_online_cpu(p->cpus_allowed);
5088 task_rq_unlock(rq, &flags); 5087 task_rq_unlock(rq, &flags);
5089 5088
5090 /* 5089 /*
5091 * Don't tell them about moving exiting tasks or 5090 * Don't tell them about moving exiting tasks or
5092 * kernel threads (both mm NULL), since they never 5091 * kernel threads (both mm NULL), since they never
5093 * leave kernel. 5092 * leave kernel.
5094 */ 5093 */
5095 if (p->mm && printk_ratelimit()) 5094 if (p->mm && printk_ratelimit())
5096 printk(KERN_INFO "process %d (%s) no " 5095 printk(KERN_INFO "process %d (%s) no "
5097 "longer affine to cpu%d\n", 5096 "longer affine to cpu%d\n",
5098 p->pid, p->comm, dead_cpu); 5097 p->pid, p->comm, dead_cpu);
5099 } 5098 }
5100 if (!__migrate_task(p, dead_cpu, dest_cpu)) 5099 } while (!__migrate_task(p, dead_cpu, dest_cpu));
5101 goto restart;
5102} 5100}
5103 5101
5104/* 5102/*
@@ -5913,24 +5911,23 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
5913 5911
5914 if (!sg) 5912 if (!sg)
5915 return; 5913 return;
5916next_sg: 5914 do {
5917 for_each_cpu_mask(j, sg->cpumask) { 5915 for_each_cpu_mask(j, sg->cpumask) {
5918 struct sched_domain *sd; 5916 struct sched_domain *sd;
5919 5917
5920 sd = &per_cpu(phys_domains, j); 5918 sd = &per_cpu(phys_domains, j);
5921 if (j != first_cpu(sd->groups->cpumask)) { 5919 if (j != first_cpu(sd->groups->cpumask)) {
5922 /* 5920 /*
5923 * Only add "power" once for each 5921 * Only add "power" once for each
5924 * physical package. 5922 * physical package.
5925 */ 5923 */
5926 continue; 5924 continue;
5927 } 5925 }
5928 5926
5929 sg_inc_cpu_power(sg, sd->groups->__cpu_power); 5927 sg_inc_cpu_power(sg, sd->groups->__cpu_power);
5930 } 5928 }
5931 sg = sg->next; 5929 sg = sg->next;
5932 if (sg != group_head) 5930 } while (sg != group_head);
5933 goto next_sg;
5934} 5931}
5935#endif 5932#endif
5936 5933