diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-11-16 18:20:05 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-11-16 18:20:05 -0500 |
commit | d33fdee4d090076462cfe25473f7139c3204b16e (patch) | |
tree | 0b7cbabcd57b72baf0e5bbfa2ce97094a518adf0 /kernel | |
parent | 1e8703b2e6aefba84dd9633d90a4093ff1200b93 (diff) | |
parent | 1e5a74059f9059d330744eac84873b1b99657008 (diff) |
Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
sched: Fix cross-sched-class wakeup preemption
sched: Fix runnable condition for stoptask
sched: Use group weight, idle cpu metrics to fix imbalances during idle
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched.c | 39 | ||||
-rw-r--r-- | kernel/sched_fair.c | 40 | ||||
-rw-r--r-- | kernel/sched_stoptask.c | 4 |
3 files changed, 61 insertions, 22 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index aa14a56f9d03..dc91a4d09ac3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -560,18 +560,8 @@ struct rq { | |||
560 | 560 | ||
561 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); | 561 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
562 | 562 | ||
563 | static inline | ||
564 | void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | ||
565 | { | ||
566 | rq->curr->sched_class->check_preempt_curr(rq, p, flags); | ||
567 | 563 | ||
568 | /* | 564 | static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags); |
569 | * A queue event has occurred, and we're going to schedule. In | ||
570 | * this case, we can save a useless back to back clock update. | ||
571 | */ | ||
572 | if (test_tsk_need_resched(p)) | ||
573 | rq->skip_clock_update = 1; | ||
574 | } | ||
575 | 565 | ||
576 | static inline int cpu_of(struct rq *rq) | 566 | static inline int cpu_of(struct rq *rq) |
577 | { | 567 | { |
@@ -2118,6 +2108,31 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
2118 | p->sched_class->prio_changed(rq, p, oldprio, running); | 2108 | p->sched_class->prio_changed(rq, p, oldprio, running); |
2119 | } | 2109 | } |
2120 | 2110 | ||
2111 | static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | ||
2112 | { | ||
2113 | const struct sched_class *class; | ||
2114 | |||
2115 | if (p->sched_class == rq->curr->sched_class) { | ||
2116 | rq->curr->sched_class->check_preempt_curr(rq, p, flags); | ||
2117 | } else { | ||
2118 | for_each_class(class) { | ||
2119 | if (class == rq->curr->sched_class) | ||
2120 | break; | ||
2121 | if (class == p->sched_class) { | ||
2122 | resched_task(rq->curr); | ||
2123 | break; | ||
2124 | } | ||
2125 | } | ||
2126 | } | ||
2127 | |||
2128 | /* | ||
2129 | * A queue event has occurred, and we're going to schedule. In | ||
2130 | * this case, we can save a useless back to back clock update. | ||
2131 | */ | ||
2132 | if (test_tsk_need_resched(rq->curr)) | ||
2133 | rq->skip_clock_update = 1; | ||
2134 | } | ||
2135 | |||
2121 | #ifdef CONFIG_SMP | 2136 | #ifdef CONFIG_SMP |
2122 | /* | 2137 | /* |
2123 | * Is this task likely cache-hot: | 2138 | * Is this task likely cache-hot: |
@@ -6960,6 +6975,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
6960 | if (cpu != group_first_cpu(sd->groups)) | 6975 | if (cpu != group_first_cpu(sd->groups)) |
6961 | return; | 6976 | return; |
6962 | 6977 | ||
6978 | sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups)); | ||
6979 | |||
6963 | child = sd->child; | 6980 | child = sd->child; |
6964 | 6981 | ||
6965 | sd->groups->cpu_power = 0; | 6982 | sd->groups->cpu_power = 0; |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f4f6a8326dd0..52ab113d8bb9 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1654,12 +1654,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1654 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | 1654 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); |
1655 | int scale = cfs_rq->nr_running >= sched_nr_latency; | 1655 | int scale = cfs_rq->nr_running >= sched_nr_latency; |
1656 | 1656 | ||
1657 | if (unlikely(rt_prio(p->prio))) | ||
1658 | goto preempt; | ||
1659 | |||
1660 | if (unlikely(p->sched_class != &fair_sched_class)) | ||
1661 | return; | ||
1662 | |||
1663 | if (unlikely(se == pse)) | 1657 | if (unlikely(se == pse)) |
1664 | return; | 1658 | return; |
1665 | 1659 | ||
@@ -2035,13 +2029,16 @@ struct sd_lb_stats { | |||
2035 | unsigned long this_load_per_task; | 2029 | unsigned long this_load_per_task; |
2036 | unsigned long this_nr_running; | 2030 | unsigned long this_nr_running; |
2037 | unsigned long this_has_capacity; | 2031 | unsigned long this_has_capacity; |
2032 | unsigned int this_idle_cpus; | ||
2038 | 2033 | ||
2039 | /* Statistics of the busiest group */ | 2034 | /* Statistics of the busiest group */ |
2035 | unsigned int busiest_idle_cpus; | ||
2040 | unsigned long max_load; | 2036 | unsigned long max_load; |
2041 | unsigned long busiest_load_per_task; | 2037 | unsigned long busiest_load_per_task; |
2042 | unsigned long busiest_nr_running; | 2038 | unsigned long busiest_nr_running; |
2043 | unsigned long busiest_group_capacity; | 2039 | unsigned long busiest_group_capacity; |
2044 | unsigned long busiest_has_capacity; | 2040 | unsigned long busiest_has_capacity; |
2041 | unsigned int busiest_group_weight; | ||
2045 | 2042 | ||
2046 | int group_imb; /* Is there imbalance in this sd */ | 2043 | int group_imb; /* Is there imbalance in this sd */ |
2047 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 2044 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) |
@@ -2063,6 +2060,8 @@ struct sg_lb_stats { | |||
2063 | unsigned long sum_nr_running; /* Nr tasks running in the group */ | 2060 | unsigned long sum_nr_running; /* Nr tasks running in the group */ |
2064 | unsigned long sum_weighted_load; /* Weighted load of group's tasks */ | 2061 | unsigned long sum_weighted_load; /* Weighted load of group's tasks */ |
2065 | unsigned long group_capacity; | 2062 | unsigned long group_capacity; |
2063 | unsigned long idle_cpus; | ||
2064 | unsigned long group_weight; | ||
2066 | int group_imb; /* Is there an imbalance in the group ? */ | 2065 | int group_imb; /* Is there an imbalance in the group ? */ |
2067 | int group_has_capacity; /* Is there extra capacity in the group? */ | 2066 | int group_has_capacity; /* Is there extra capacity in the group? */ |
2068 | }; | 2067 | }; |
@@ -2431,7 +2430,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
2431 | sgs->group_load += load; | 2430 | sgs->group_load += load; |
2432 | sgs->sum_nr_running += rq->nr_running; | 2431 | sgs->sum_nr_running += rq->nr_running; |
2433 | sgs->sum_weighted_load += weighted_cpuload(i); | 2432 | sgs->sum_weighted_load += weighted_cpuload(i); |
2434 | 2433 | if (idle_cpu(i)) | |
2434 | sgs->idle_cpus++; | ||
2435 | } | 2435 | } |
2436 | 2436 | ||
2437 | /* | 2437 | /* |
@@ -2469,6 +2469,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
2469 | sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); | 2469 | sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); |
2470 | if (!sgs->group_capacity) | 2470 | if (!sgs->group_capacity) |
2471 | sgs->group_capacity = fix_small_capacity(sd, group); | 2471 | sgs->group_capacity = fix_small_capacity(sd, group); |
2472 | sgs->group_weight = group->group_weight; | ||
2472 | 2473 | ||
2473 | if (sgs->group_capacity > sgs->sum_nr_running) | 2474 | if (sgs->group_capacity > sgs->sum_nr_running) |
2474 | sgs->group_has_capacity = 1; | 2475 | sgs->group_has_capacity = 1; |
@@ -2576,13 +2577,16 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, | |||
2576 | sds->this_nr_running = sgs.sum_nr_running; | 2577 | sds->this_nr_running = sgs.sum_nr_running; |
2577 | sds->this_load_per_task = sgs.sum_weighted_load; | 2578 | sds->this_load_per_task = sgs.sum_weighted_load; |
2578 | sds->this_has_capacity = sgs.group_has_capacity; | 2579 | sds->this_has_capacity = sgs.group_has_capacity; |
2580 | sds->this_idle_cpus = sgs.idle_cpus; | ||
2579 | } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) { | 2581 | } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) { |
2580 | sds->max_load = sgs.avg_load; | 2582 | sds->max_load = sgs.avg_load; |
2581 | sds->busiest = sg; | 2583 | sds->busiest = sg; |
2582 | sds->busiest_nr_running = sgs.sum_nr_running; | 2584 | sds->busiest_nr_running = sgs.sum_nr_running; |
2585 | sds->busiest_idle_cpus = sgs.idle_cpus; | ||
2583 | sds->busiest_group_capacity = sgs.group_capacity; | 2586 | sds->busiest_group_capacity = sgs.group_capacity; |
2584 | sds->busiest_load_per_task = sgs.sum_weighted_load; | 2587 | sds->busiest_load_per_task = sgs.sum_weighted_load; |
2585 | sds->busiest_has_capacity = sgs.group_has_capacity; | 2588 | sds->busiest_has_capacity = sgs.group_has_capacity; |
2589 | sds->busiest_group_weight = sgs.group_weight; | ||
2586 | sds->group_imb = sgs.group_imb; | 2590 | sds->group_imb = sgs.group_imb; |
2587 | } | 2591 | } |
2588 | 2592 | ||
@@ -2860,8 +2864,26 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2860 | if (sds.this_load >= sds.avg_load) | 2864 | if (sds.this_load >= sds.avg_load) |
2861 | goto out_balanced; | 2865 | goto out_balanced; |
2862 | 2866 | ||
2863 | if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) | 2867 | /* |
2864 | goto out_balanced; | 2868 | * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative. |
2869 | * And to check for busy balance use !idle_cpu instead of | ||
2870 | * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE | ||
2871 | * even when they are idle. | ||
2872 | */ | ||
2873 | if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) { | ||
2874 | if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) | ||
2875 | goto out_balanced; | ||
2876 | } else { | ||
2877 | /* | ||
2878 | * This cpu is idle. If the busiest group load doesn't | ||
2879 | * have more tasks than the number of available cpu's and | ||
2880 | * there is no imbalance between this and busiest group | ||
2881 | * wrt to idle cpu's, it is balanced. | ||
2882 | */ | ||
2883 | if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) && | ||
2884 | sds.busiest_nr_running <= sds.busiest_group_weight) | ||
2885 | goto out_balanced; | ||
2886 | } | ||
2865 | 2887 | ||
2866 | force_balance: | 2888 | force_balance: |
2867 | /* Looks like there is an imbalance. Compute it */ | 2889 | /* Looks like there is an imbalance. Compute it */ |
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c index 45bddc0c1048..2bf6b47058c1 100644 --- a/kernel/sched_stoptask.c +++ b/kernel/sched_stoptask.c | |||
@@ -19,14 +19,14 @@ select_task_rq_stop(struct rq *rq, struct task_struct *p, | |||
19 | static void | 19 | static void |
20 | check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags) | 20 | check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags) |
21 | { | 21 | { |
22 | resched_task(rq->curr); /* we preempt everything */ | 22 | /* we're never preempted */ |
23 | } | 23 | } |
24 | 24 | ||
25 | static struct task_struct *pick_next_task_stop(struct rq *rq) | 25 | static struct task_struct *pick_next_task_stop(struct rq *rq) |
26 | { | 26 | { |
27 | struct task_struct *stop = rq->stop; | 27 | struct task_struct *stop = rq->stop; |
28 | 28 | ||
29 | if (stop && stop->state == TASK_RUNNING) | 29 | if (stop && stop->se.on_rq) |
30 | return stop; | 30 | return stop; |
31 | 31 | ||
32 | return NULL; | 32 | return NULL; |