aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-11-16 18:20:05 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-11-16 18:20:05 -0500
commitd33fdee4d090076462cfe25473f7139c3204b16e (patch)
tree0b7cbabcd57b72baf0e5bbfa2ce97094a518adf0 /kernel
parent1e8703b2e6aefba84dd9633d90a4093ff1200b93 (diff)
parent1e5a74059f9059d330744eac84873b1b99657008 (diff)
Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: sched: Fix cross-sched-class wakeup preemption sched: Fix runnable condition for stoptask sched: Use group weight, idle cpu metrics to fix imbalances during idle
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c39
-rw-r--r--kernel/sched_fair.c40
-rw-r--r--kernel/sched_stoptask.c4
3 files changed, 61 insertions, 22 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index aa14a56f9d03..dc91a4d09ac3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -560,18 +560,8 @@ struct rq {
560 560
561static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); 561static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
562 562
563static inline
564void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
565{
566 rq->curr->sched_class->check_preempt_curr(rq, p, flags);
567 563
568 /* 564static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
569 * A queue event has occurred, and we're going to schedule. In
570 * this case, we can save a useless back to back clock update.
571 */
572 if (test_tsk_need_resched(p))
573 rq->skip_clock_update = 1;
574}
575 565
576static inline int cpu_of(struct rq *rq) 566static inline int cpu_of(struct rq *rq)
577{ 567{
@@ -2118,6 +2108,31 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
2118 p->sched_class->prio_changed(rq, p, oldprio, running); 2108 p->sched_class->prio_changed(rq, p, oldprio, running);
2119} 2109}
2120 2110
2111static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
2112{
2113 const struct sched_class *class;
2114
2115 if (p->sched_class == rq->curr->sched_class) {
2116 rq->curr->sched_class->check_preempt_curr(rq, p, flags);
2117 } else {
2118 for_each_class(class) {
2119 if (class == rq->curr->sched_class)
2120 break;
2121 if (class == p->sched_class) {
2122 resched_task(rq->curr);
2123 break;
2124 }
2125 }
2126 }
2127
2128 /*
2129 * A queue event has occurred, and we're going to schedule. In
2130 * this case, we can save a useless back to back clock update.
2131 */
2132 if (test_tsk_need_resched(rq->curr))
2133 rq->skip_clock_update = 1;
2134}
2135
2121#ifdef CONFIG_SMP 2136#ifdef CONFIG_SMP
2122/* 2137/*
2123 * Is this task likely cache-hot: 2138 * Is this task likely cache-hot:
@@ -6960,6 +6975,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
6960 if (cpu != group_first_cpu(sd->groups)) 6975 if (cpu != group_first_cpu(sd->groups))
6961 return; 6976 return;
6962 6977
6978 sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));
6979
6963 child = sd->child; 6980 child = sd->child;
6964 6981
6965 sd->groups->cpu_power = 0; 6982 sd->groups->cpu_power = 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f4f6a8326dd0..52ab113d8bb9 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1654,12 +1654,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1654 struct cfs_rq *cfs_rq = task_cfs_rq(curr); 1654 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1655 int scale = cfs_rq->nr_running >= sched_nr_latency; 1655 int scale = cfs_rq->nr_running >= sched_nr_latency;
1656 1656
1657 if (unlikely(rt_prio(p->prio)))
1658 goto preempt;
1659
1660 if (unlikely(p->sched_class != &fair_sched_class))
1661 return;
1662
1663 if (unlikely(se == pse)) 1657 if (unlikely(se == pse))
1664 return; 1658 return;
1665 1659
@@ -2035,13 +2029,16 @@ struct sd_lb_stats {
2035 unsigned long this_load_per_task; 2029 unsigned long this_load_per_task;
2036 unsigned long this_nr_running; 2030 unsigned long this_nr_running;
2037 unsigned long this_has_capacity; 2031 unsigned long this_has_capacity;
2032 unsigned int this_idle_cpus;
2038 2033
2039 /* Statistics of the busiest group */ 2034 /* Statistics of the busiest group */
2035 unsigned int busiest_idle_cpus;
2040 unsigned long max_load; 2036 unsigned long max_load;
2041 unsigned long busiest_load_per_task; 2037 unsigned long busiest_load_per_task;
2042 unsigned long busiest_nr_running; 2038 unsigned long busiest_nr_running;
2043 unsigned long busiest_group_capacity; 2039 unsigned long busiest_group_capacity;
2044 unsigned long busiest_has_capacity; 2040 unsigned long busiest_has_capacity;
2041 unsigned int busiest_group_weight;
2045 2042
2046 int group_imb; /* Is there imbalance in this sd */ 2043 int group_imb; /* Is there imbalance in this sd */
2047#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 2044#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
@@ -2063,6 +2060,8 @@ struct sg_lb_stats {
2063 unsigned long sum_nr_running; /* Nr tasks running in the group */ 2060 unsigned long sum_nr_running; /* Nr tasks running in the group */
2064 unsigned long sum_weighted_load; /* Weighted load of group's tasks */ 2061 unsigned long sum_weighted_load; /* Weighted load of group's tasks */
2065 unsigned long group_capacity; 2062 unsigned long group_capacity;
2063 unsigned long idle_cpus;
2064 unsigned long group_weight;
2066 int group_imb; /* Is there an imbalance in the group ? */ 2065 int group_imb; /* Is there an imbalance in the group ? */
2067 int group_has_capacity; /* Is there extra capacity in the group? */ 2066 int group_has_capacity; /* Is there extra capacity in the group? */
2068}; 2067};
@@ -2431,7 +2430,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2431 sgs->group_load += load; 2430 sgs->group_load += load;
2432 sgs->sum_nr_running += rq->nr_running; 2431 sgs->sum_nr_running += rq->nr_running;
2433 sgs->sum_weighted_load += weighted_cpuload(i); 2432 sgs->sum_weighted_load += weighted_cpuload(i);
2434 2433 if (idle_cpu(i))
2434 sgs->idle_cpus++;
2435 } 2435 }
2436 2436
2437 /* 2437 /*
@@ -2469,6 +2469,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2469 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); 2469 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
2470 if (!sgs->group_capacity) 2470 if (!sgs->group_capacity)
2471 sgs->group_capacity = fix_small_capacity(sd, group); 2471 sgs->group_capacity = fix_small_capacity(sd, group);
2472 sgs->group_weight = group->group_weight;
2472 2473
2473 if (sgs->group_capacity > sgs->sum_nr_running) 2474 if (sgs->group_capacity > sgs->sum_nr_running)
2474 sgs->group_has_capacity = 1; 2475 sgs->group_has_capacity = 1;
@@ -2576,13 +2577,16 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
2576 sds->this_nr_running = sgs.sum_nr_running; 2577 sds->this_nr_running = sgs.sum_nr_running;
2577 sds->this_load_per_task = sgs.sum_weighted_load; 2578 sds->this_load_per_task = sgs.sum_weighted_load;
2578 sds->this_has_capacity = sgs.group_has_capacity; 2579 sds->this_has_capacity = sgs.group_has_capacity;
2580 sds->this_idle_cpus = sgs.idle_cpus;
2579 } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) { 2581 } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) {
2580 sds->max_load = sgs.avg_load; 2582 sds->max_load = sgs.avg_load;
2581 sds->busiest = sg; 2583 sds->busiest = sg;
2582 sds->busiest_nr_running = sgs.sum_nr_running; 2584 sds->busiest_nr_running = sgs.sum_nr_running;
2585 sds->busiest_idle_cpus = sgs.idle_cpus;
2583 sds->busiest_group_capacity = sgs.group_capacity; 2586 sds->busiest_group_capacity = sgs.group_capacity;
2584 sds->busiest_load_per_task = sgs.sum_weighted_load; 2587 sds->busiest_load_per_task = sgs.sum_weighted_load;
2585 sds->busiest_has_capacity = sgs.group_has_capacity; 2588 sds->busiest_has_capacity = sgs.group_has_capacity;
2589 sds->busiest_group_weight = sgs.group_weight;
2586 sds->group_imb = sgs.group_imb; 2590 sds->group_imb = sgs.group_imb;
2587 } 2591 }
2588 2592
@@ -2860,8 +2864,26 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2860 if (sds.this_load >= sds.avg_load) 2864 if (sds.this_load >= sds.avg_load)
2861 goto out_balanced; 2865 goto out_balanced;
2862 2866
2863 if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) 2867 /*
2864 goto out_balanced; 2868 * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative.
2869 * And to check for busy balance use !idle_cpu instead of
2870 * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE
2871 * even when they are idle.
2872 */
2873 if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) {
2874 if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
2875 goto out_balanced;
2876 } else {
2877 /*
2878 * This cpu is idle. If the busiest group load doesn't
2879 * have more tasks than the number of available cpu's and
2880 * there is no imbalance between this and busiest group
2881 * wrt to idle cpu's, it is balanced.
2882 */
2883 if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) &&
2884 sds.busiest_nr_running <= sds.busiest_group_weight)
2885 goto out_balanced;
2886 }
2865 2887
2866force_balance: 2888force_balance:
2867 /* Looks like there is an imbalance. Compute it */ 2889 /* Looks like there is an imbalance. Compute it */
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 45bddc0c1048..2bf6b47058c1 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -19,14 +19,14 @@ select_task_rq_stop(struct rq *rq, struct task_struct *p,
19static void 19static void
20check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags) 20check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags)
21{ 21{
22 resched_task(rq->curr); /* we preempt everything */ 22 /* we're never preempted */
23} 23}
24 24
25static struct task_struct *pick_next_task_stop(struct rq *rq) 25static struct task_struct *pick_next_task_stop(struct rq *rq)
26{ 26{
27 struct task_struct *stop = rq->stop; 27 struct task_struct *stop = rq->stop;
28 28
29 if (stop && stop->state == TASK_RUNNING) 29 if (stop && stop->se.on_rq)
30 return stop; 30 return stop;
31 31
32 return NULL; 32 return NULL;