aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorJack Steiner <steiner@sgi.com>2006-03-31 05:31:21 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-31 15:18:58 -0500
commitdb1b1fefc2cecbff2e4214062fa8c680cb6e7b7d (patch)
treead8e68882f7c36216e16ab264101c5da96ccd5c9 /kernel
parent3055addadbe9bfb2365006a1c13fd342a8d30d52 (diff)
[PATCH] sched: reduce overhead of calc_load
Currently, count_active_tasks() calls both nr_running() & nr_interruptible(). Each of these functions does a "for_each_cpu" & reads values from the runqueue of each cpu. Although this is not a lot of instructions, each runqueue may be located on different node. Depending on the architecture, a unique TLB entry may be required to access each runqueue. Since there may be more runqueues than cpu TLB entries, a scan of all runqueues can trash the TLB. Each memory reference incurs a TLB miss & refill. In addition, the runqueue cacheline that contains nr_running & nr_uninterruptible may be evicted from the cache between the two passes. This causes unnecessary cache misses. Combining nr_running() & nr_interruptible() into a single function substantially reduces the TLB & cache misses on large systems. This should have no measureable effect on smaller systems. On a 128p IA64 system running a memory stress workload, the new function reduced the overhead of calc_load() from 605 usec/call to 324 usec/call. Signed-off-by: Jack Steiner <steiner@sgi.com> Acked-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c15
-rw-r--r--kernel/timer.c2
2 files changed, 16 insertions, 1 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index a9ecac398bb9..6e52e0adff80 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1658,6 +1658,21 @@ unsigned long nr_iowait(void)
1658 return sum; 1658 return sum;
1659} 1659}
1660 1660
1661unsigned long nr_active(void)
1662{
1663 unsigned long i, running = 0, uninterruptible = 0;
1664
1665 for_each_online_cpu(i) {
1666 running += cpu_rq(i)->nr_running;
1667 uninterruptible += cpu_rq(i)->nr_uninterruptible;
1668 }
1669
1670 if (unlikely((long)uninterruptible < 0))
1671 uninterruptible = 0;
1672
1673 return running + uninterruptible;
1674}
1675
1661#ifdef CONFIG_SMP 1676#ifdef CONFIG_SMP
1662 1677
1663/* 1678/*
diff --git a/kernel/timer.c b/kernel/timer.c
index 9062a82ee8ec..6b812c04737b 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -825,7 +825,7 @@ void update_process_times(int user_tick)
825 */ 825 */
826static unsigned long count_active_tasks(void) 826static unsigned long count_active_tasks(void)
827{ 827{
828 return (nr_running() + nr_uninterruptible()) * FIXED_1; 828 return nr_active() * FIXED_1;
829} 829}
830 830
831/* 831/*