sched: revert load_balance_monitor() changes

The following commits cause a number of regressions: commit 58e2d4ca581167c2a079f4ee02be2f0bc52e8729 Author: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> Date: Fri Jan 25 21:08:00 2008 +0100 sched: group scheduling, change how cpu load is calculated commit 6b2d7700266b9402e12824e11e0099ae6a4a6a79 Author: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> Date: Fri Jan 25 21:08:00 2008 +0100 sched: group scheduler, fix fairness of cpu bandwidth allocation for task groups Namely: - very frequent wakeups on SMP, reported by PowerTop users. - cacheline trashing on (large) SMP - some latencies larger than 500ms While there is a mergeable patch to fix the latter, the former issues are not fixable in a manner suitable for .25 (we're at -rc3 now). Hence we revert them and try again in v2.6.26. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> CC: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> Tested-by: Alexey Zaytsev <alexey.zaytsev@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Peter Zijlstra <a.p.zijlstra@chello.nl> 2008-02-25 11:34:02 -0500
committer: Ingo Molnar <mingo@elte.hu> 2008-03-04 11:54:06 -0500
commit: 62fb185130e4d420f71a30ff59d8b16b74ef5d2b (patch)
tree: 474c0824a5bf90950b0a430a11a52b358c9e1f31 /kernel/sched_fair.c
parent: 976dde010e513a9c7c3117a32b7b015f84b37430 (diff)
1 files changed, 35 insertions, 80 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index c8e6492c5925..3df4d46994ca 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -727,8 +727,6 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
        return se->parent;
 }
-#define GROUP_IMBALANCE_PCT     20
 #else   /* CONFIG_FAIR_GROUP_SCHED */
 #define for_each_sched_entity(se) \
@@ -819,26 +817,15 @@ hrtick_start_fair(struct rq *rq, struct task_struct *p)
 static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
 {
        struct cfs_rq *cfs_rq;
-        struct sched_entity *se = &p->se,
+        struct sched_entity *se = &p->se;
-                            *topse = NULL;      /* Highest schedulable entity */
-        int incload = 1;
        for_each_sched_entity(se) {
-                topse = se;
+                if (se->on_rq)
-                if (se->on_rq) {
-                        incload = 0;
                        break;
-                }
                cfs_rq = cfs_rq_of(se);
                enqueue_entity(cfs_rq, se, wakeup);
                wakeup = 1;
        }
-        /* Increment cpu load if we just enqueued the first task of a group on
-         * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs
-         * at the highest grouping level.
-         */
-        if (incload)
-                inc_cpu_load(rq, topse->load.weight);
        hrtick_start_fair(rq, rq->curr);
 }
@@ -851,28 +838,16 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
 static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
 {
        struct cfs_rq *cfs_rq;
-        struct sched_entity *se = &p->se,
+        struct sched_entity *se = &p->se;
-                            *topse = NULL;      /* Highest schedulable entity */
-        int decload = 1;
        for_each_sched_entity(se) {
-                topse = se;
                cfs_rq = cfs_rq_of(se);
                dequeue_entity(cfs_rq, se, sleep);
                /* Don't dequeue parent if it has other entities besides us */
-                if (cfs_rq->load.weight) {
+                if (cfs_rq->load.weight)
-                        if (parent_entity(se))
-                                decload = 0;
                        break;
-                }
                sleep = 1;
        }
-        /* Decrement cpu load if we just dequeued the last task of a group on
-         * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs
-         * at the highest grouping level.
-         */
-        if (decload)
-                dec_cpu_load(rq, topse->load.weight);
        hrtick_start_fair(rq, rq->curr);
 }
@@ -1186,6 +1161,25 @@ static struct task_struct *load_balance_next_fair(void *arg)
        return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr);
 }
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
+{
+        struct sched_entity *curr;
+        struct task_struct *p;
+        if (!cfs_rq->nr_running || !first_fair(cfs_rq))
+                return MAX_PRIO;
+        curr = cfs_rq->curr;
+        if (!curr)
+                curr = __pick_next_entity(cfs_rq);
+        p = task_of(curr);
+        return p->prio;
+}
+#endif
 static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
                  unsigned long max_load_move,
@@ -1195,45 +1189,28 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
        struct cfs_rq *busy_cfs_rq;
        long rem_load_move = max_load_move;
        struct rq_iterator cfs_rq_iterator;
-        unsigned long load_moved;
        cfs_rq_iterator.start = load_balance_start_fair;
        cfs_rq_iterator.next = load_balance_next_fair;
        for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
 #ifdef CONFIG_FAIR_GROUP_SCHED
-                struct cfs_rq *this_cfs_rq = busy_cfs_rq->tg->cfs_rq[this_cpu];
+                struct cfs_rq *this_cfs_rq;
-                unsigned long maxload, task_load, group_weight;
+                long imbalance;
-                unsigned long thisload, per_task_load;
+                unsigned long maxload;
-                struct sched_entity *se = busy_cfs_rq->tg->se[busiest->cpu];
-                task_load = busy_cfs_rq->load.weight;
-                group_weight = se->load.weight;
-                /*
+                this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
-                 * 'group_weight' is contributed by tasks of total weight
-                 * 'task_load'. To move 'rem_load_move' worth of weight only,
-                 * we need to move a maximum task load of:
-                 *
-                 *      maxload = (remload / group_weight) * task_load;
-                 */
-                maxload = (rem_load_move * task_load) / group_weight;
-                if (!maxload || !task_load)
+                imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight;
+                /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */
+                if (imbalance <= 0)
                        continue;
-                per_task_load = task_load / busy_cfs_rq->nr_running;
+                /* Don't pull more than imbalance/2 */
-                /*
+                imbalance /= 2;
-                 * balance_tasks will try to forcibly move atleast one task if
+                maxload = min(rem_load_move, imbalance);
-                 * possible (because of SCHED_LOAD_SCALE_FUZZ). Avoid that if
-                 * maxload is less than GROUP_IMBALANCE_FUZZ% the per_task_load.
-                 */
-                 if (100 * maxload < GROUP_IMBALANCE_PCT * per_task_load)
-                        continue;
-                /* Disable priority-based load balance */
+                *this_best_prio = cfs_rq_best_prio(this_cfs_rq);
-                *this_best_prio = 0;
-                thisload = this_cfs_rq->load.weight;
 #else
 # define maxload rem_load_move
 #endif
@@ -1242,33 +1219,11 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
                 * load_balance_[start|next]_fair iterators
                 */
                cfs_rq_iterator.arg = busy_cfs_rq;
-                load_moved = balance_tasks(this_rq, this_cpu, busiest,
+                rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
                                               maxload, sd, idle, all_pinned,
                                               this_best_prio,
                                               &cfs_rq_iterator);
-#ifdef CONFIG_FAIR_GROUP_SCHED
-                /*
-                 * load_moved holds the task load that was moved. The
-                 * effective (group) weight moved would be:
-                 *      load_moved_eff = load_moved/task_load * group_weight;
-                 */
-                load_moved = (group_weight * load_moved) / task_load;
-                /* Adjust shares on both cpus to reflect load_moved */
-                group_weight -= load_moved;
-                set_se_shares(se, group_weight);
-                se = busy_cfs_rq->tg->se[this_cpu];
-                if (!thisload)
-                        group_weight = load_moved;
-                else
-                        group_weight = se->load.weight + load_moved;
-                set_se_shares(se, group_weight);
-#endif
-                rem_load_move -= load_moved;
                if (rem_load_move <= 0)
                        break;
        }
author	Peter Zijlstra <a.p.zijlstra@chello.nl>	2008-02-25 11:34:02 -0500
committer	Ingo Molnar <mingo@elte.hu>	2008-03-04 11:54:06 -0500
commit	62fb185130e4d420f71a30ff59d8b16b74ef5d2b (patch)
tree	474c0824a5bf90950b0a430a11a52b358c9e1f31 /kernel/sched_fair.c
parent	976dde010e513a9c7c3117a32b7b015f84b37430 (diff)