aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2013-11-11 12:21:56 -0500
committerIngo Molnar <mingo@kernel.org>2013-11-13 07:33:54 -0500
commit911b2898b3c9fe0048e9485ad1629ed4fce330fd (patch)
treeb430d3ef975f60b4c19caf211ea025cc4fa173a1 /kernel
parent5eca82a9ac2c961cfbd26a4b6f43e6e3747a71dd (diff)
sched: Optimize task_sched_runtime()
Large multi-threaded apps like to hit this using do_sys_times() and then queue up on the rq->lock. Avoid when possible. Larry reported ~20% performance increase his test case. Reported-by: Larry Woodman <lwoodman@redhat.com> Suggested-by: Paul Turner <pjt@google.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/20131111172925.GG26898@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/core.c14
1 files changed, 14 insertions, 0 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1deccd78be98..c1808606ee5f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2253,6 +2253,20 @@ unsigned long long task_sched_runtime(struct task_struct *p)
2253 struct rq *rq; 2253 struct rq *rq;
2254 u64 ns = 0; 2254 u64 ns = 0;
2255 2255
2256#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
2257 /*
2258 * 64-bit doesn't need locks to atomically read a 64bit value.
2259 * So we have a optimization chance when the task's delta_exec is 0.
2260 * Reading ->on_cpu is racy, but this is ok.
2261 *
2262 * If we race with it leaving cpu, we'll take a lock. So we're correct.
2263 * If we race with it entering cpu, unaccounted time is 0. This is
2264 * indistinguishable from the read occurring a few cycles earlier.
2265 */
2266 if (!p->on_cpu)
2267 return p->se.sum_exec_runtime;
2268#endif
2269
2256 rq = task_rq_lock(p, &flags); 2270 rq = task_rq_lock(p, &flags);
2257 ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); 2271 ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
2258 task_rq_unlock(rq, p, &flags); 2272 task_rq_unlock(rq, p, &flags);