diff options
| author | Jack Steiner <steiner@sgi.com> | 2006-03-31 05:31:21 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-31 15:18:58 -0500 |
| commit | db1b1fefc2cecbff2e4214062fa8c680cb6e7b7d (patch) | |
| tree | ad8e68882f7c36216e16ab264101c5da96ccd5c9 /kernel | |
| parent | 3055addadbe9bfb2365006a1c13fd342a8d30d52 (diff) | |
[PATCH] sched: reduce overhead of calc_load
Currently, count_active_tasks() calls both nr_running() &
nr_interruptible(). Each of these functions does a "for_each_cpu" & reads
values from the runqueue of each cpu. Although this is not a lot of
instructions, each runqueue may be located on different node. Depending on
the architecture, a unique TLB entry may be required to access each
runqueue.
Since there may be more runqueues than cpu TLB entries, a scan of all
runqueues can trash the TLB. Each memory reference incurs a TLB miss &
refill.
In addition, the runqueue cacheline that contains nr_running &
nr_uninterruptible may be evicted from the cache between the two passes.
This causes unnecessary cache misses.
Combining nr_running() & nr_interruptible() into a single function
substantially reduces the TLB & cache misses on large systems. This should
have no measureable effect on smaller systems.
On a 128p IA64 system running a memory stress workload, the new function
reduced the overhead of calc_load() from 605 usec/call to 324 usec/call.
Signed-off-by: Jack Steiner <steiner@sgi.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/sched.c | 15 | ||||
| -rw-r--r-- | kernel/timer.c | 2 |
2 files changed, 16 insertions, 1 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index a9ecac398bb9..6e52e0adff80 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -1658,6 +1658,21 @@ unsigned long nr_iowait(void) | |||
| 1658 | return sum; | 1658 | return sum; |
| 1659 | } | 1659 | } |
| 1660 | 1660 | ||
| 1661 | unsigned long nr_active(void) | ||
| 1662 | { | ||
| 1663 | unsigned long i, running = 0, uninterruptible = 0; | ||
| 1664 | |||
| 1665 | for_each_online_cpu(i) { | ||
| 1666 | running += cpu_rq(i)->nr_running; | ||
| 1667 | uninterruptible += cpu_rq(i)->nr_uninterruptible; | ||
| 1668 | } | ||
| 1669 | |||
| 1670 | if (unlikely((long)uninterruptible < 0)) | ||
| 1671 | uninterruptible = 0; | ||
| 1672 | |||
| 1673 | return running + uninterruptible; | ||
| 1674 | } | ||
| 1675 | |||
| 1661 | #ifdef CONFIG_SMP | 1676 | #ifdef CONFIG_SMP |
| 1662 | 1677 | ||
| 1663 | /* | 1678 | /* |
diff --git a/kernel/timer.c b/kernel/timer.c index 9062a82ee8ec..6b812c04737b 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -825,7 +825,7 @@ void update_process_times(int user_tick) | |||
| 825 | */ | 825 | */ |
| 826 | static unsigned long count_active_tasks(void) | 826 | static unsigned long count_active_tasks(void) |
| 827 | { | 827 | { |
| 828 | return (nr_running() + nr_uninterruptible()) * FIXED_1; | 828 | return nr_active() * FIXED_1; |
| 829 | } | 829 | } |
| 830 | 830 | ||
| 831 | /* | 831 | /* |
