diff options
author | Peter Zijlstra <peterz@infradead.org> | 2016-04-28 10:16:33 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-05-05 03:23:58 -0400 |
commit | 3e71a462dd483ce508a723356b293731e7d788ea (patch) | |
tree | 59c0f38f491a6b2a3feac1a6d4e5dba6281cc0b2 /kernel/sched | |
parent | 64b7aad5798478ffff52e110878ccaae4c3aaa34 (diff) |
sched/core: Move task_rq_lock() out of line
Its a rather large function, inline doesn't seems to make much sense:
$ size defconfig-build/kernel/sched/core.o{.orig,}
text data bss dec hex filename
56533 21037 2320 79890 13812 defconfig-build/kernel/sched/core.o.orig
55733 21037 2320 79090 134f2 defconfig-build/kernel/sched/core.o
The 'perf bench sched messaging' micro-benchmark shows a visible improvement
of 4-5%:
$ for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor ; do echo performance > $i ; done
$ perf stat --null --repeat 25 -- perf bench sched messaging -g 40 -l 5000
pre:
4.582798193 seconds time elapsed ( +- 1.41% )
4.733374877 seconds time elapsed ( +- 2.10% )
4.560955136 seconds time elapsed ( +- 1.43% )
4.631062303 seconds time elapsed ( +- 1.40% )
post:
4.364765213 seconds time elapsed ( +- 0.91% )
4.454442734 seconds time elapsed ( +- 1.18% )
4.448893817 seconds time elapsed ( +- 1.41% )
4.424346872 seconds time elapsed ( +- 0.97% )
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/core.c | 65 | ||||
-rw-r--r-- | kernel/sched/sched.h | 67 |
2 files changed, 69 insertions, 63 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c82ca6eccfec..1b609a886795 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -170,6 +170,71 @@ static struct rq *this_rq_lock(void) | |||
170 | return rq; | 170 | return rq; |
171 | } | 171 | } |
172 | 172 | ||
173 | /* | ||
174 | * __task_rq_lock - lock the rq @p resides on. | ||
175 | */ | ||
176 | struct rq *__task_rq_lock(struct task_struct *p) | ||
177 | __acquires(rq->lock) | ||
178 | { | ||
179 | struct rq *rq; | ||
180 | |||
181 | lockdep_assert_held(&p->pi_lock); | ||
182 | |||
183 | for (;;) { | ||
184 | rq = task_rq(p); | ||
185 | raw_spin_lock(&rq->lock); | ||
186 | if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { | ||
187 | lockdep_pin_lock(&rq->lock); | ||
188 | return rq; | ||
189 | } | ||
190 | raw_spin_unlock(&rq->lock); | ||
191 | |||
192 | while (unlikely(task_on_rq_migrating(p))) | ||
193 | cpu_relax(); | ||
194 | } | ||
195 | } | ||
196 | |||
197 | /* | ||
198 | * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. | ||
199 | */ | ||
200 | struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | ||
201 | __acquires(p->pi_lock) | ||
202 | __acquires(rq->lock) | ||
203 | { | ||
204 | struct rq *rq; | ||
205 | |||
206 | for (;;) { | ||
207 | raw_spin_lock_irqsave(&p->pi_lock, *flags); | ||
208 | rq = task_rq(p); | ||
209 | raw_spin_lock(&rq->lock); | ||
210 | /* | ||
211 | * move_queued_task() task_rq_lock() | ||
212 | * | ||
213 | * ACQUIRE (rq->lock) | ||
214 | * [S] ->on_rq = MIGRATING [L] rq = task_rq() | ||
215 | * WMB (__set_task_cpu()) ACQUIRE (rq->lock); | ||
216 | * [S] ->cpu = new_cpu [L] task_rq() | ||
217 | * [L] ->on_rq | ||
218 | * RELEASE (rq->lock) | ||
219 | * | ||
220 | * If we observe the old cpu in task_rq_lock, the acquire of | ||
221 | * the old rq->lock will fully serialize against the stores. | ||
222 | * | ||
223 | * If we observe the new cpu in task_rq_lock, the acquire will | ||
224 | * pair with the WMB to ensure we must then also see migrating. | ||
225 | */ | ||
226 | if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { | ||
227 | lockdep_pin_lock(&rq->lock); | ||
228 | return rq; | ||
229 | } | ||
230 | raw_spin_unlock(&rq->lock); | ||
231 | raw_spin_unlock_irqrestore(&p->pi_lock, *flags); | ||
232 | |||
233 | while (unlikely(task_on_rq_migrating(p))) | ||
234 | cpu_relax(); | ||
235 | } | ||
236 | } | ||
237 | |||
173 | #ifdef CONFIG_SCHED_HRTICK | 238 | #ifdef CONFIG_SCHED_HRTICK |
174 | /* | 239 | /* |
175 | * Use HR-timers to deliver accurate preemption points. | 240 | * Use HR-timers to deliver accurate preemption points. |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 69da6fcaa0e8..aab4cf05d48a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -1451,70 +1451,11 @@ static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { } | |||
1451 | static inline void sched_avg_update(struct rq *rq) { } | 1451 | static inline void sched_avg_update(struct rq *rq) { } |
1452 | #endif | 1452 | #endif |
1453 | 1453 | ||
1454 | /* | 1454 | struct rq *__task_rq_lock(struct task_struct *p) |
1455 | * __task_rq_lock - lock the rq @p resides on. | 1455 | __acquires(rq->lock); |
1456 | */ | 1456 | struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) |
1457 | static inline struct rq *__task_rq_lock(struct task_struct *p) | ||
1458 | __acquires(rq->lock) | ||
1459 | { | ||
1460 | struct rq *rq; | ||
1461 | |||
1462 | lockdep_assert_held(&p->pi_lock); | ||
1463 | |||
1464 | for (;;) { | ||
1465 | rq = task_rq(p); | ||
1466 | raw_spin_lock(&rq->lock); | ||
1467 | if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { | ||
1468 | lockdep_pin_lock(&rq->lock); | ||
1469 | return rq; | ||
1470 | } | ||
1471 | raw_spin_unlock(&rq->lock); | ||
1472 | |||
1473 | while (unlikely(task_on_rq_migrating(p))) | ||
1474 | cpu_relax(); | ||
1475 | } | ||
1476 | } | ||
1477 | |||
1478 | /* | ||
1479 | * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. | ||
1480 | */ | ||
1481 | static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | ||
1482 | __acquires(p->pi_lock) | 1457 | __acquires(p->pi_lock) |
1483 | __acquires(rq->lock) | 1458 | __acquires(rq->lock); |
1484 | { | ||
1485 | struct rq *rq; | ||
1486 | |||
1487 | for (;;) { | ||
1488 | raw_spin_lock_irqsave(&p->pi_lock, *flags); | ||
1489 | rq = task_rq(p); | ||
1490 | raw_spin_lock(&rq->lock); | ||
1491 | /* | ||
1492 | * move_queued_task() task_rq_lock() | ||
1493 | * | ||
1494 | * ACQUIRE (rq->lock) | ||
1495 | * [S] ->on_rq = MIGRATING [L] rq = task_rq() | ||
1496 | * WMB (__set_task_cpu()) ACQUIRE (rq->lock); | ||
1497 | * [S] ->cpu = new_cpu [L] task_rq() | ||
1498 | * [L] ->on_rq | ||
1499 | * RELEASE (rq->lock) | ||
1500 | * | ||
1501 | * If we observe the old cpu in task_rq_lock, the acquire of | ||
1502 | * the old rq->lock will fully serialize against the stores. | ||
1503 | * | ||
1504 | * If we observe the new cpu in task_rq_lock, the acquire will | ||
1505 | * pair with the WMB to ensure we must then also see migrating. | ||
1506 | */ | ||
1507 | if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { | ||
1508 | lockdep_pin_lock(&rq->lock); | ||
1509 | return rq; | ||
1510 | } | ||
1511 | raw_spin_unlock(&rq->lock); | ||
1512 | raw_spin_unlock_irqrestore(&p->pi_lock, *flags); | ||
1513 | |||
1514 | while (unlikely(task_on_rq_migrating(p))) | ||
1515 | cpu_relax(); | ||
1516 | } | ||
1517 | } | ||
1518 | 1459 | ||
1519 | static inline void __task_rq_unlock(struct rq *rq) | 1460 | static inline void __task_rq_unlock(struct rq *rq) |
1520 | __releases(rq->lock) | 1461 | __releases(rq->lock) |