aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorGregory Haskins <ghaskins@novell.com>2008-01-25 15:08:09 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-25 15:08:09 -0500
commite7693a362ec84bb5b6fd441d8a8b4b9d568a7a0c (patch)
tree078940540641a59aaf199695bfc6de3f062a987b /kernel/sched.c
parent697f0a487f294e634a342764472b79375bb3158a (diff)
sched: de-SCHED_OTHER-ize the RT path
The current wake-up code path tries to determine if it can optimize the wake-up to "this_cpu" by computing load calculations. The problem is that these calculations are only relevant to SCHED_OTHER tasks where load is king. For RT tasks, priority is king. So the load calculation is completely wasted bandwidth. Therefore, we create a new sched_class interface to help with pre-wakeup routing decisions and move the load calculation as a function of CFS task's class. Signed-off-by: Gregory Haskins <ghaskins@novell.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c167
1 files changed, 27 insertions, 140 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 66e99b419b31..3344ba776b97 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -960,6 +960,13 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load)
960 update_load_sub(&rq->load, load); 960 update_load_sub(&rq->load, load);
961} 961}
962 962
963#ifdef CONFIG_SMP
964static unsigned long source_load(int cpu, int type);
965static unsigned long target_load(int cpu, int type);
966static unsigned long cpu_avg_load_per_task(int cpu);
967static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
968#endif /* CONFIG_SMP */
969
963#include "sched_stats.h" 970#include "sched_stats.h"
964#include "sched_idletask.c" 971#include "sched_idletask.c"
965#include "sched_fair.c" 972#include "sched_fair.c"
@@ -1118,7 +1125,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1118/* 1125/*
1119 * Is this task likely cache-hot: 1126 * Is this task likely cache-hot:
1120 */ 1127 */
1121static inline int 1128static int
1122task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) 1129task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
1123{ 1130{
1124 s64 delta; 1131 s64 delta;
@@ -1343,7 +1350,7 @@ static unsigned long target_load(int cpu, int type)
1343/* 1350/*
1344 * Return the average load per task on the cpu's run queue 1351 * Return the average load per task on the cpu's run queue
1345 */ 1352 */
1346static inline unsigned long cpu_avg_load_per_task(int cpu) 1353static unsigned long cpu_avg_load_per_task(int cpu)
1347{ 1354{
1348 struct rq *rq = cpu_rq(cpu); 1355 struct rq *rq = cpu_rq(cpu);
1349 unsigned long total = weighted_cpuload(cpu); 1356 unsigned long total = weighted_cpuload(cpu);
@@ -1500,58 +1507,6 @@ static int sched_balance_self(int cpu, int flag)
1500 1507
1501#endif /* CONFIG_SMP */ 1508#endif /* CONFIG_SMP */
1502 1509
1503/*
1504 * wake_idle() will wake a task on an idle cpu if task->cpu is
1505 * not idle and an idle cpu is available. The span of cpus to
1506 * search starts with cpus closest then further out as needed,
1507 * so we always favor a closer, idle cpu.
1508 *
1509 * Returns the CPU we should wake onto.
1510 */
1511#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
1512static int wake_idle(int cpu, struct task_struct *p)
1513{
1514 cpumask_t tmp;
1515 struct sched_domain *sd;
1516 int i;
1517
1518 /*
1519 * If it is idle, then it is the best cpu to run this task.
1520 *
1521 * This cpu is also the best, if it has more than one task already.
1522 * Siblings must be also busy(in most cases) as they didn't already
1523 * pickup the extra load from this cpu and hence we need not check
1524 * sibling runqueue info. This will avoid the checks and cache miss
1525 * penalities associated with that.
1526 */
1527 if (idle_cpu(cpu) || cpu_rq(cpu)->nr_running > 1)
1528 return cpu;
1529
1530 for_each_domain(cpu, sd) {
1531 if (sd->flags & SD_WAKE_IDLE) {
1532 cpus_and(tmp, sd->span, p->cpus_allowed);
1533 for_each_cpu_mask(i, tmp) {
1534 if (idle_cpu(i)) {
1535 if (i != task_cpu(p)) {
1536 schedstat_inc(p,
1537 se.nr_wakeups_idle);
1538 }
1539 return i;
1540 }
1541 }
1542 } else {
1543 break;
1544 }
1545 }
1546 return cpu;
1547}
1548#else
1549static inline int wake_idle(int cpu, struct task_struct *p)
1550{
1551 return cpu;
1552}
1553#endif
1554
1555/*** 1510/***
1556 * try_to_wake_up - wake up a thread 1511 * try_to_wake_up - wake up a thread
1557 * @p: the to-be-woken-up thread 1512 * @p: the to-be-woken-up thread
@@ -1573,8 +1528,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
1573 long old_state; 1528 long old_state;
1574 struct rq *rq; 1529 struct rq *rq;
1575#ifdef CONFIG_SMP 1530#ifdef CONFIG_SMP
1576 struct sched_domain *sd, *this_sd = NULL;
1577 unsigned long load, this_load;
1578 int new_cpu; 1531 int new_cpu;
1579#endif 1532#endif
1580 1533
@@ -1594,90 +1547,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
1594 if (unlikely(task_running(rq, p))) 1547 if (unlikely(task_running(rq, p)))
1595 goto out_activate; 1548 goto out_activate;
1596 1549
1597 new_cpu = cpu; 1550 new_cpu = p->sched_class->select_task_rq(p, sync);
1598
1599 schedstat_inc(rq, ttwu_count);
1600 if (cpu == this_cpu) {
1601 schedstat_inc(rq, ttwu_local);
1602 goto out_set_cpu;
1603 }
1604
1605 for_each_domain(this_cpu, sd) {
1606 if (cpu_isset(cpu, sd->span)) {
1607 schedstat_inc(sd, ttwu_wake_remote);
1608 this_sd = sd;
1609 break;
1610 }
1611 }
1612
1613 if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
1614 goto out_set_cpu;
1615
1616 /*
1617 * Check for affine wakeup and passive balancing possibilities.
1618 */
1619 if (this_sd) {
1620 int idx = this_sd->wake_idx;
1621 unsigned int imbalance;
1622
1623 imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
1624
1625 load = source_load(cpu, idx);
1626 this_load = target_load(this_cpu, idx);
1627
1628 new_cpu = this_cpu; /* Wake to this CPU if we can */
1629
1630 if (this_sd->flags & SD_WAKE_AFFINE) {
1631 unsigned long tl = this_load;
1632 unsigned long tl_per_task;
1633
1634 /*
1635 * Attract cache-cold tasks on sync wakeups:
1636 */
1637 if (sync && !task_hot(p, rq->clock, this_sd))
1638 goto out_set_cpu;
1639
1640 schedstat_inc(p, se.nr_wakeups_affine_attempts);
1641 tl_per_task = cpu_avg_load_per_task(this_cpu);
1642
1643 /*
1644 * If sync wakeup then subtract the (maximum possible)
1645 * effect of the currently running task from the load
1646 * of the current CPU:
1647 */
1648 if (sync)
1649 tl -= current->se.load.weight;
1650
1651 if ((tl <= load &&
1652 tl + target_load(cpu, idx) <= tl_per_task) ||
1653 100*(tl + p->se.load.weight) <= imbalance*load) {
1654 /*
1655 * This domain has SD_WAKE_AFFINE and
1656 * p is cache cold in this domain, and
1657 * there is no bad imbalance.
1658 */
1659 schedstat_inc(this_sd, ttwu_move_affine);
1660 schedstat_inc(p, se.nr_wakeups_affine);
1661 goto out_set_cpu;
1662 }
1663 }
1664
1665 /*
1666 * Start passive balancing when half the imbalance_pct
1667 * limit is reached.
1668 */
1669 if (this_sd->flags & SD_WAKE_BALANCE) {
1670 if (imbalance*this_load <= 100*load) {
1671 schedstat_inc(this_sd, ttwu_move_balance);
1672 schedstat_inc(p, se.nr_wakeups_passive);
1673 goto out_set_cpu;
1674 }
1675 }
1676 }
1677
1678 new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */
1679out_set_cpu:
1680 new_cpu = wake_idle(new_cpu, p);
1681 if (new_cpu != cpu) { 1551 if (new_cpu != cpu) {
1682 set_task_cpu(p, new_cpu); 1552 set_task_cpu(p, new_cpu);
1683 task_rq_unlock(rq, &flags); 1553 task_rq_unlock(rq, &flags);
@@ -1693,6 +1563,23 @@ out_set_cpu:
1693 cpu = task_cpu(p); 1563 cpu = task_cpu(p);
1694 } 1564 }
1695 1565
1566#ifdef CONFIG_SCHEDSTATS
1567 schedstat_inc(rq, ttwu_count);
1568 if (cpu == this_cpu)
1569 schedstat_inc(rq, ttwu_local);
1570 else {
1571 struct sched_domain *sd;
1572 for_each_domain(this_cpu, sd) {
1573 if (cpu_isset(cpu, sd->span)) {
1574 schedstat_inc(sd, ttwu_wake_remote);
1575 break;
1576 }
1577 }
1578 }
1579
1580#endif
1581
1582
1696out_activate: 1583out_activate:
1697#endif /* CONFIG_SMP */ 1584#endif /* CONFIG_SMP */
1698 schedstat_inc(p, se.nr_wakeups); 1585 schedstat_inc(p, se.nr_wakeups);