From 39c0cbe2150cbd848a25ba6cdb271d1ad46818ad Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 11 Mar 2010 17:17:13 +0100 Subject: sched: Rate-limit nohz Entering nohz code on every micro-idle is costing ~10% throughput for netperf TCP_RR when scheduling cross-cpu. Rate limiting entry fixes this, but raises ticks a bit. On my Q6600, an idle box goes from ~85 interrupts/sec to 128. The higher the context switch rate, the more nohz entry costs. With this patch and some cycle recovery patches in my tree, max cross cpu context switch rate is improved by ~16%, a large portion of which of which is this ratelimiting. Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra LKML-Reference: <1268301003.6785.28.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f992762d7f51..f25735a767af 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -262,6 +262,9 @@ void tick_nohz_stop_sched_tick(int inidle) goto end; } + if (nohz_ratelimit(cpu)) + goto end; + ts->idle_calls++; /* Read jiffies and the time when jiffies were updated last */ do { -- cgit v1.2.2 From b1f724c3055fa75a31d272222213647547a2d3d4 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 9 May 2010 08:22:08 -0700 Subject: sched: Add a comment to get_cpu_idle_time_us() The exported function get_cpu_idle_time_us() has no comment describing it; add a kerneldoc comment Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Reviewed-by: Rik van Riel Acked-by: Peter Zijlstra Cc: davej@redhat.com LKML-Reference: <20100509082208.7cb721f0@infradead.org> Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f25735a767af..358822ee32d5 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -179,6 +179,20 @@ static ktime_t tick_nohz_start_idle(struct tick_sched *ts) return now; } +/** + * get_cpu_idle_time_us - get the total idle time of a cpu + * @cpu: CPU number to query + * @last_update_time: variable to store update time in + * + * Return the cummulative idle time (since boot) for a given + * CPU, in microseconds. The idle time returned includes + * the iowait time (unlike what "top" and co report). + * + * This time is measured via accounting rather than sampling, + * and is as accurate as ktime_get() is. + * + * This function returns -1 if NOHZ is not enabled. + */ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); -- cgit v1.2.2 From 595aac488b546c7185be7e29c8ae165a588b2a9f Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 9 May 2010 08:22:45 -0700 Subject: sched: Introduce a function to update the idle statistics Currently, two places update the idle statistics (and more to come later in this series). This patch creates a helper function for updating these statistics. Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Reviewed-by: Rik van Riel Acked-by: Peter Zijlstra Cc: davej@redhat.com LKML-Reference: <20100509082245.163e67ed@infradead.org> Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 358822ee32d5..59d8762c7e1d 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -150,14 +150,25 @@ static void tick_nohz_update_jiffies(ktime_t now) touch_softlockup_watchdog(); } -static void tick_nohz_stop_idle(int cpu, ktime_t now) +/* + * Updates the per cpu time idle statistics counters + */ +static void update_ts_time_stats(struct tick_sched *ts, ktime_t now) { - struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); ktime_t delta; - delta = ktime_sub(now, ts->idle_entrytime); ts->idle_lastupdate = now; - ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); + if (ts->idle_active) { + delta = ktime_sub(now, ts->idle_entrytime); + ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); + } +} + +static void tick_nohz_stop_idle(int cpu, ktime_t now) +{ + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + + update_ts_time_stats(ts, now); ts->idle_active = 0; sched_clock_idle_wakeup_event(0); @@ -165,14 +176,12 @@ static void tick_nohz_stop_idle(int cpu, ktime_t now) static ktime_t tick_nohz_start_idle(struct tick_sched *ts) { - ktime_t now, delta; + ktime_t now; now = ktime_get(); - if (ts->idle_active) { - delta = ktime_sub(now, ts->idle_entrytime); - ts->idle_lastupdate = now; - ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); - } + + update_ts_time_stats(ts, now); + ts->idle_entrytime = now; ts->idle_active = 1; sched_clock_idle_sleep_event(); -- cgit v1.2.2 From 8c7b09f43f4bf570654bcc458ce96819a932303c Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 9 May 2010 08:23:23 -0700 Subject: sched: Update the idle statistics in get_cpu_idle_time_us() Right now, get_cpu_idle_time_us() only reports the idle statistics upto the point the CPU entered last idle; not what is valid right now. This patch adds an update of the idle statistics to get_cpu_idle_time_us(), so that calling this function always returns statistics that are accurate at the point of the call. This includes resetting the start of the idle time for accounting purposes to avoid double accounting. Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Reviewed-by: Rik van Riel Acked-by: Peter Zijlstra Cc: davej@redhat.com LKML-Reference: <20100509082323.2d2f1945@infradead.org> Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 59d8762c7e1d..f15d18d82c18 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -161,6 +161,7 @@ static void update_ts_time_stats(struct tick_sched *ts, ktime_t now) if (ts->idle_active) { delta = ktime_sub(now, ts->idle_entrytime); ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); + ts->idle_entrytime = now; } } @@ -205,14 +206,18 @@ static ktime_t tick_nohz_start_idle(struct tick_sched *ts) u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t now; if (!tick_nohz_enabled) return -1; + now = ktime_get(); + update_ts_time_stats(ts, now); + if (ts->idle_active) *last_update_time = ktime_to_us(ts->idle_lastupdate); else - *last_update_time = ktime_to_us(ktime_get()); + *last_update_time = ktime_to_us(now); return ktime_to_us(ts->idle_sleeptime); } -- cgit v1.2.2 From 8d63bf949e330588b80d30ca8f0a27a45297a9e9 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 9 May 2010 08:24:03 -0700 Subject: sched: Fold updating of the last_update_time_info into update_ts_time_stats() This patch folds the updating of the last_update_time into the update_ts_time_stats() function, and updates the callers. This allows for further cleanups that are done in the next patch. Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Reviewed-by: Rik van Riel Acked-by: Peter Zijlstra Cc: davej@redhat.com LKML-Reference: <20100509082403.60072967@infradead.org> Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f15d18d82c18..e86e1c6674d1 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -153,7 +153,8 @@ static void tick_nohz_update_jiffies(ktime_t now) /* * Updates the per cpu time idle statistics counters */ -static void update_ts_time_stats(struct tick_sched *ts, ktime_t now) +static void +update_ts_time_stats(struct tick_sched *ts, ktime_t now, u64 *last_update_time) { ktime_t delta; @@ -163,13 +164,19 @@ static void update_ts_time_stats(struct tick_sched *ts, ktime_t now) ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); ts->idle_entrytime = now; } + + if (ts->idle_active && last_update_time) + *last_update_time = ktime_to_us(ts->idle_lastupdate); + else + *last_update_time = ktime_to_us(now); + } static void tick_nohz_stop_idle(int cpu, ktime_t now) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); - update_ts_time_stats(ts, now); + update_ts_time_stats(ts, now, NULL); ts->idle_active = 0; sched_clock_idle_wakeup_event(0); @@ -181,7 +188,7 @@ static ktime_t tick_nohz_start_idle(struct tick_sched *ts) now = ktime_get(); - update_ts_time_stats(ts, now); + update_ts_time_stats(ts, now, NULL); ts->idle_entrytime = now; ts->idle_active = 1; @@ -206,18 +213,11 @@ static ktime_t tick_nohz_start_idle(struct tick_sched *ts) u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); - ktime_t now; if (!tick_nohz_enabled) return -1; - now = ktime_get(); - update_ts_time_stats(ts, now); - - if (ts->idle_active) - *last_update_time = ktime_to_us(ts->idle_lastupdate); - else - *last_update_time = ktime_to_us(now); + update_ts_time_stats(ts, ktime_get(), last_update_time); return ktime_to_us(ts->idle_sleeptime); } -- cgit v1.2.2 From e0e37c200f1357db0dd986edb359c41c57d24f6e Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 9 May 2010 08:24:39 -0700 Subject: sched: Eliminate the ts->idle_lastupdate field Now that the only user of ts->idle_lastupdate is update_ts_time_stats(), the entire field can be eliminated. In update_ts_time_stats(), idle_lastupdate is first set to "now", and a few lines later, the only user is an if() statement that assigns a variable either to "now" or to ts->idle_lastupdate, which has the value of "now" at that point. Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Reviewed-by: Rik van Riel Acked-by: Peter Zijlstra Cc: davej@redhat.com LKML-Reference: <20100509082439.2fab0b4f@infradead.org> Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index e86e1c6674d1..50953f4c42b2 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -158,16 +158,13 @@ update_ts_time_stats(struct tick_sched *ts, ktime_t now, u64 *last_update_time) { ktime_t delta; - ts->idle_lastupdate = now; if (ts->idle_active) { delta = ktime_sub(now, ts->idle_entrytime); ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); ts->idle_entrytime = now; } - if (ts->idle_active && last_update_time) - *last_update_time = ktime_to_us(ts->idle_lastupdate); - else + if (last_update_time) *last_update_time = ktime_to_us(now); } -- cgit v1.2.2 From 0224cf4c5ee0d7faec83956b8e21f7d89e3df3bd Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 9 May 2010 08:25:23 -0700 Subject: sched: Intoduce get_cpu_iowait_time_us() For the ondemand cpufreq governor, it is desired that the iowait time is microaccounted in a similar way as idle time is. This patch introduces the infrastructure to account and expose this information via the get_cpu_iowait_time_us() function. [akpm@linux-foundation.org: fix CONFIG_NO_HZ=n build] Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Reviewed-by: Rik van Riel Acked-by: Peter Zijlstra Cc: davej@redhat.com LKML-Reference: <20100509082523.284feab6@infradead.org> Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'kernel/time/tick-sched.c') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 50953f4c42b2..1d7b9bc1c034 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -161,6 +161,8 @@ update_ts_time_stats(struct tick_sched *ts, ktime_t now, u64 *last_update_time) if (ts->idle_active) { delta = ktime_sub(now, ts->idle_entrytime); ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); + if (nr_iowait_cpu() > 0) + ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); ts->idle_entrytime = now; } @@ -220,6 +222,32 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) } EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); +/* + * get_cpu_iowait_time_us - get the total iowait time of a cpu + * @cpu: CPU number to query + * @last_update_time: variable to store update time in + * + * Return the cummulative iowait time (since boot) for a given + * CPU, in microseconds. + * + * This time is measured via accounting rather than sampling, + * and is as accurate as ktime_get() is. + * + * This function returns -1 if NOHZ is not enabled. + */ +u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) +{ + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + + if (!tick_nohz_enabled) + return -1; + + update_ts_time_stats(ts, ktime_get(), last_update_time); + + return ktime_to_us(ts->iowait_sleeptime); +} +EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); + /** * tick_nohz_stop_sched_tick - stop the idle tick from the idle task * -- cgit v1.2.2