diff options
author | Gregory Haskins <ghaskins@novell.com> | 2008-01-25 15:08:09 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-01-25 15:08:09 -0500 |
commit | e7693a362ec84bb5b6fd441d8a8b4b9d568a7a0c (patch) | |
tree | 078940540641a59aaf199695bfc6de3f062a987b /kernel/sched.c | |
parent | 697f0a487f294e634a342764472b79375bb3158a (diff) |
sched: de-SCHED_OTHER-ize the RT path
The current wake-up code path tries to determine if it can optimize the
wake-up to "this_cpu" by computing load calculations. The problem is that
these calculations are only relevant to SCHED_OTHER tasks where load is king.
For RT tasks, priority is king. So the load calculation is completely wasted
bandwidth.
Therefore, we create a new sched_class interface to help with
pre-wakeup routing decisions and move the load calculation as a function
of CFS task's class.
Signed-off-by: Gregory Haskins <ghaskins@novell.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 167 |
1 files changed, 27 insertions, 140 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 66e99b419b31..3344ba776b97 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -960,6 +960,13 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load) | |||
960 | update_load_sub(&rq->load, load); | 960 | update_load_sub(&rq->load, load); |
961 | } | 961 | } |
962 | 962 | ||
963 | #ifdef CONFIG_SMP | ||
964 | static unsigned long source_load(int cpu, int type); | ||
965 | static unsigned long target_load(int cpu, int type); | ||
966 | static unsigned long cpu_avg_load_per_task(int cpu); | ||
967 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | ||
968 | #endif /* CONFIG_SMP */ | ||
969 | |||
963 | #include "sched_stats.h" | 970 | #include "sched_stats.h" |
964 | #include "sched_idletask.c" | 971 | #include "sched_idletask.c" |
965 | #include "sched_fair.c" | 972 | #include "sched_fair.c" |
@@ -1118,7 +1125,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | |||
1118 | /* | 1125 | /* |
1119 | * Is this task likely cache-hot: | 1126 | * Is this task likely cache-hot: |
1120 | */ | 1127 | */ |
1121 | static inline int | 1128 | static int |
1122 | task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | 1129 | task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) |
1123 | { | 1130 | { |
1124 | s64 delta; | 1131 | s64 delta; |
@@ -1343,7 +1350,7 @@ static unsigned long target_load(int cpu, int type) | |||
1343 | /* | 1350 | /* |
1344 | * Return the average load per task on the cpu's run queue | 1351 | * Return the average load per task on the cpu's run queue |
1345 | */ | 1352 | */ |
1346 | static inline unsigned long cpu_avg_load_per_task(int cpu) | 1353 | static unsigned long cpu_avg_load_per_task(int cpu) |
1347 | { | 1354 | { |
1348 | struct rq *rq = cpu_rq(cpu); | 1355 | struct rq *rq = cpu_rq(cpu); |
1349 | unsigned long total = weighted_cpuload(cpu); | 1356 | unsigned long total = weighted_cpuload(cpu); |
@@ -1500,58 +1507,6 @@ static int sched_balance_self(int cpu, int flag) | |||
1500 | 1507 | ||
1501 | #endif /* CONFIG_SMP */ | 1508 | #endif /* CONFIG_SMP */ |
1502 | 1509 | ||
1503 | /* | ||
1504 | * wake_idle() will wake a task on an idle cpu if task->cpu is | ||
1505 | * not idle and an idle cpu is available. The span of cpus to | ||
1506 | * search starts with cpus closest then further out as needed, | ||
1507 | * so we always favor a closer, idle cpu. | ||
1508 | * | ||
1509 | * Returns the CPU we should wake onto. | ||
1510 | */ | ||
1511 | #if defined(ARCH_HAS_SCHED_WAKE_IDLE) | ||
1512 | static int wake_idle(int cpu, struct task_struct *p) | ||
1513 | { | ||
1514 | cpumask_t tmp; | ||
1515 | struct sched_domain *sd; | ||
1516 | int i; | ||
1517 | |||
1518 | /* | ||
1519 | * If it is idle, then it is the best cpu to run this task. | ||
1520 | * | ||
1521 | * This cpu is also the best, if it has more than one task already. | ||
1522 | * Siblings must be also busy(in most cases) as they didn't already | ||
1523 | * pickup the extra load from this cpu and hence we need not check | ||
1524 | * sibling runqueue info. This will avoid the checks and cache miss | ||
1525 | * penalities associated with that. | ||
1526 | */ | ||
1527 | if (idle_cpu(cpu) || cpu_rq(cpu)->nr_running > 1) | ||
1528 | return cpu; | ||
1529 | |||
1530 | for_each_domain(cpu, sd) { | ||
1531 | if (sd->flags & SD_WAKE_IDLE) { | ||
1532 | cpus_and(tmp, sd->span, p->cpus_allowed); | ||
1533 | for_each_cpu_mask(i, tmp) { | ||
1534 | if (idle_cpu(i)) { | ||
1535 | if (i != task_cpu(p)) { | ||
1536 | schedstat_inc(p, | ||
1537 | se.nr_wakeups_idle); | ||
1538 | } | ||
1539 | return i; | ||
1540 | } | ||
1541 | } | ||
1542 | } else { | ||
1543 | break; | ||
1544 | } | ||
1545 | } | ||
1546 | return cpu; | ||
1547 | } | ||
1548 | #else | ||
1549 | static inline int wake_idle(int cpu, struct task_struct *p) | ||
1550 | { | ||
1551 | return cpu; | ||
1552 | } | ||
1553 | #endif | ||
1554 | |||
1555 | /*** | 1510 | /*** |
1556 | * try_to_wake_up - wake up a thread | 1511 | * try_to_wake_up - wake up a thread |
1557 | * @p: the to-be-woken-up thread | 1512 | * @p: the to-be-woken-up thread |
@@ -1573,8 +1528,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
1573 | long old_state; | 1528 | long old_state; |
1574 | struct rq *rq; | 1529 | struct rq *rq; |
1575 | #ifdef CONFIG_SMP | 1530 | #ifdef CONFIG_SMP |
1576 | struct sched_domain *sd, *this_sd = NULL; | ||
1577 | unsigned long load, this_load; | ||
1578 | int new_cpu; | 1531 | int new_cpu; |
1579 | #endif | 1532 | #endif |
1580 | 1533 | ||
@@ -1594,90 +1547,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
1594 | if (unlikely(task_running(rq, p))) | 1547 | if (unlikely(task_running(rq, p))) |
1595 | goto out_activate; | 1548 | goto out_activate; |
1596 | 1549 | ||
1597 | new_cpu = cpu; | 1550 | new_cpu = p->sched_class->select_task_rq(p, sync); |
1598 | |||
1599 | schedstat_inc(rq, ttwu_count); | ||
1600 | if (cpu == this_cpu) { | ||
1601 | schedstat_inc(rq, ttwu_local); | ||
1602 | goto out_set_cpu; | ||
1603 | } | ||
1604 | |||
1605 | for_each_domain(this_cpu, sd) { | ||
1606 | if (cpu_isset(cpu, sd->span)) { | ||
1607 | schedstat_inc(sd, ttwu_wake_remote); | ||
1608 | this_sd = sd; | ||
1609 | break; | ||
1610 | } | ||
1611 | } | ||
1612 | |||
1613 | if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) | ||
1614 | goto out_set_cpu; | ||
1615 | |||
1616 | /* | ||
1617 | * Check for affine wakeup and passive balancing possibilities. | ||
1618 | */ | ||
1619 | if (this_sd) { | ||
1620 | int idx = this_sd->wake_idx; | ||
1621 | unsigned int imbalance; | ||
1622 | |||
1623 | imbalance = 100 + (this_sd->imbalance_pct - 100) / 2; | ||
1624 | |||
1625 | load = source_load(cpu, idx); | ||
1626 | this_load = target_load(this_cpu, idx); | ||
1627 | |||
1628 | new_cpu = this_cpu; /* Wake to this CPU if we can */ | ||
1629 | |||
1630 | if (this_sd->flags & SD_WAKE_AFFINE) { | ||
1631 | unsigned long tl = this_load; | ||
1632 | unsigned long tl_per_task; | ||
1633 | |||
1634 | /* | ||
1635 | * Attract cache-cold tasks on sync wakeups: | ||
1636 | */ | ||
1637 | if (sync && !task_hot(p, rq->clock, this_sd)) | ||
1638 | goto out_set_cpu; | ||
1639 | |||
1640 | schedstat_inc(p, se.nr_wakeups_affine_attempts); | ||
1641 | tl_per_task = cpu_avg_load_per_task(this_cpu); | ||
1642 | |||
1643 | /* | ||
1644 | * If sync wakeup then subtract the (maximum possible) | ||
1645 | * effect of the currently running task from the load | ||
1646 | * of the current CPU: | ||
1647 | */ | ||
1648 | if (sync) | ||
1649 | tl -= current->se.load.weight; | ||
1650 | |||
1651 | if ((tl <= load && | ||
1652 | tl + target_load(cpu, idx) <= tl_per_task) || | ||
1653 | 100*(tl + p->se.load.weight) <= imbalance*load) { | ||
1654 | /* | ||
1655 | * This domain has SD_WAKE_AFFINE and | ||
1656 | * p is cache cold in this domain, and | ||
1657 | * there is no bad imbalance. | ||
1658 | */ | ||
1659 | schedstat_inc(this_sd, ttwu_move_affine); | ||
1660 | schedstat_inc(p, se.nr_wakeups_affine); | ||
1661 | goto out_set_cpu; | ||
1662 | } | ||
1663 | } | ||
1664 | |||
1665 | /* | ||
1666 | * Start passive balancing when half the imbalance_pct | ||
1667 | * limit is reached. | ||
1668 | */ | ||
1669 | if (this_sd->flags & SD_WAKE_BALANCE) { | ||
1670 | if (imbalance*this_load <= 100*load) { | ||
1671 | schedstat_inc(this_sd, ttwu_move_balance); | ||
1672 | schedstat_inc(p, se.nr_wakeups_passive); | ||
1673 | goto out_set_cpu; | ||
1674 | } | ||
1675 | } | ||
1676 | } | ||
1677 | |||
1678 | new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */ | ||
1679 | out_set_cpu: | ||
1680 | new_cpu = wake_idle(new_cpu, p); | ||
1681 | if (new_cpu != cpu) { | 1551 | if (new_cpu != cpu) { |
1682 | set_task_cpu(p, new_cpu); | 1552 | set_task_cpu(p, new_cpu); |
1683 | task_rq_unlock(rq, &flags); | 1553 | task_rq_unlock(rq, &flags); |
@@ -1693,6 +1563,23 @@ out_set_cpu: | |||
1693 | cpu = task_cpu(p); | 1563 | cpu = task_cpu(p); |
1694 | } | 1564 | } |
1695 | 1565 | ||
1566 | #ifdef CONFIG_SCHEDSTATS | ||
1567 | schedstat_inc(rq, ttwu_count); | ||
1568 | if (cpu == this_cpu) | ||
1569 | schedstat_inc(rq, ttwu_local); | ||
1570 | else { | ||
1571 | struct sched_domain *sd; | ||
1572 | for_each_domain(this_cpu, sd) { | ||
1573 | if (cpu_isset(cpu, sd->span)) { | ||
1574 | schedstat_inc(sd, ttwu_wake_remote); | ||
1575 | break; | ||
1576 | } | ||
1577 | } | ||
1578 | } | ||
1579 | |||
1580 | #endif | ||
1581 | |||
1582 | |||
1696 | out_activate: | 1583 | out_activate: |
1697 | #endif /* CONFIG_SMP */ | 1584 | #endif /* CONFIG_SMP */ |
1698 | schedstat_inc(p, se.nr_wakeups); | 1585 | schedstat_inc(p, se.nr_wakeups); |