revert ("sched: fair: weight calculations")

Yanmin Zhang reported: Comparing with kernel 2.6.25, sysbench+mysql(oltp, readonly) has many regressions with 2.6.26-rc1: 1) 8-core stoakley: 28%; 2) 16-core tigerton: 20%; 3) Itanium Montvale: 50%. Bisect located this patch: | 8f1bc385cfbab474db6c27b5af1e439614f3025c is first bad commit | commit 8f1bc385cfbab474db6c27b5af1e439614f3025c | Author: Peter Zijlstra <a.p.zijlstra@chello.nl> | Date: Sat Apr 19 19:45:00 2008 +0200 | | sched: fair: weight calculations Revert it to the 2.6.25 state. Bisected-by: Yanmin Zhang <yanmin_zhang@linux.intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Ingo Molnar <mingo@elte.hu> 2008-05-29 05:23:17 -0400
committer: Ingo Molnar <mingo@elte.hu> 2008-05-29 05:24:01 -0400
commit: f9305d4a0968201b2818dbed0dc8cb0d4ee7aeb3 (patch)
tree: e171e568f04bd25c7c2ff99b5ef673b917b6eae6
parent: f26a3988917913b3d11b2bd741601a2c64ab9204 (diff)
2 files changed, 39 insertions, 75 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index cfa222a91539..4aac8aa16037 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1368,9 +1368,6 @@ static void __resched_task(struct task_struct *p, int tif_bit)
 */
 #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
-/*
- * delta *= weight / lw
- */
 static unsigned long
 calc_delta_mine(unsigned long delta_exec, unsigned long weight,
                struct load_weight *lw)
@@ -1393,6 +1390,12 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
        return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
 }
+static inline unsigned long
+calc_delta_fair(unsigned long delta_exec, struct load_weight *lw)
+{
+        return calc_delta_mine(delta_exec, NICE_0_LOAD, lw);
+}
 static inline void update_load_add(struct load_weight *lw, unsigned long inc)
 {
        lw->weight += inc;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index e24ecd39c4b8..0eb0ae879542 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -334,34 +334,6 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
 #endif
 /*
- * delta *= w / rw
- */
-static inline unsigned long
-calc_delta_weight(unsigned long delta, struct sched_entity *se)
-{
-        for_each_sched_entity(se) {
-                delta = calc_delta_mine(delta,
-                                se->load.weight, &cfs_rq_of(se)->load);
-        }
-        return delta;
-}
-/*
- * delta *= rw / w
- */
-static inline unsigned long
-calc_delta_fair(unsigned long delta, struct sched_entity *se)
-{
-        for_each_sched_entity(se) {
-                delta = calc_delta_mine(delta,
-                                cfs_rq_of(se)->load.weight, &se->load);
-        }
-        return delta;
-}
-/*
 * The idea is to set a period in which each task runs once.
 *
 * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch
@@ -390,54 +362,47 @@ static u64 __sched_period(unsigned long nr_running)
 */
 static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-        return calc_delta_weight(__sched_period(cfs_rq->nr_running), se);
+        u64 slice = __sched_period(cfs_rq->nr_running);
+        for_each_sched_entity(se) {
+                cfs_rq = cfs_rq_of(se);
+                slice *= se->load.weight;
+                do_div(slice, cfs_rq->load.weight);
+        }
+        return slice;
 }
 /*
 * We calculate the vruntime slice of a to be inserted task
 *
- * vs = s*rw/w = p
+ * vs = s/w = p/rw
 */
 static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
        unsigned long nr_running = cfs_rq->nr_running;
+        unsigned long weight;
+        u64 vslice;
        if (!se->on_rq)
                nr_running++;
-        return __sched_period(nr_running);
+        vslice = __sched_period(nr_running);
-}
-/*
- * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in
- * that it favours >=0 over <0.
- *
- *   -20         |
- *               |
- *     0 --------+-------
- *             .'
- *    19     .'
- *
- */
-static unsigned long
-calc_delta_asym(unsigned long delta, struct sched_entity *se)
-{
-        struct load_weight lw = {
-                .weight = NICE_0_LOAD,
-                .inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT)
-        };
        for_each_sched_entity(se) {
-                struct load_weight *se_lw = &se->load;
+                cfs_rq = cfs_rq_of(se);
-                if (se->load.weight < NICE_0_LOAD)
+                weight = cfs_rq->load.weight;
-                        se_lw = &lw;
+                if (!se->on_rq)
+                        weight += se->load.weight;
-                delta = calc_delta_mine(delta,
+                vslice *= NICE_0_LOAD;
-                                cfs_rq_of(se)->load.weight, se_lw);
+                do_div(vslice, weight);
        }
-        return delta;
+        return vslice;
 }
 /*
@@ -454,7 +419,11 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
        curr->sum_exec_runtime += delta_exec;
        schedstat_add(cfs_rq, exec_clock, delta_exec);
-        delta_exec_weighted = calc_delta_fair(delta_exec, curr);
+        delta_exec_weighted = delta_exec;
+        if (unlikely(curr->load.weight != NICE_0_LOAD)) {
+                delta_exec_weighted = calc_delta_fair(delta_exec_weighted,
+                                                        &curr->load);
+        }
        curr->vruntime += delta_exec_weighted;
 }
@@ -661,17 +630,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
        if (!initial) {
                /* sleeps upto a single latency don't count. */
-                if (sched_feat(NEW_FAIR_SLEEPERS)) {
+                if (sched_feat(NEW_FAIR_SLEEPERS))
-                        unsigned long thresh = sysctl_sched_latency;
+                        vruntime -= sysctl_sched_latency;
-                        /*
-                         * convert the sleeper threshold into virtual time
-                         */
-                        if (sched_feat(NORMALIZED_SLEEPER))
-                                thresh = calc_delta_fair(thresh, se);
-                        vruntime -= thresh;
-                }
                /* ensure we never gain time by being placed backwards. */
                vruntime = max_vruntime(se->vruntime, vruntime);
@@ -1169,10 +1129,11 @@ static unsigned long wakeup_gran(struct sched_entity *se)
        unsigned long gran = sysctl_sched_wakeup_granularity;
        /*
-         * More easily preempt - nice tasks, while not making it harder for
+         * More easily preempt - nice tasks, while not making
-         * + nice tasks.
+         * it harder for + nice tasks.
         */
-        gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se);
+        if (unlikely(se->load.weight > NICE_0_LOAD))
+                gran = calc_delta_fair(gran, &se->load);
        return gran;
 }
author	Ingo Molnar <mingo@elte.hu>	2008-05-29 05:23:17 -0400
committer	Ingo Molnar <mingo@elte.hu>	2008-05-29 05:24:01 -0400
commit	f9305d4a0968201b2818dbed0dc8cb0d4ee7aeb3 (patch)
tree	e171e568f04bd25c7c2ff99b5ef673b917b6eae6
parent	f26a3988917913b3d11b2bd741601a2c64ab9204 (diff)