diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2015-01-22 12:08:04 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-01-30 13:38:51 -0500 |
commit | a18b5d01819235629289212ad428a5ee2b40f0d9 (patch) | |
tree | 9e3b0c3fdf4f9e994b13ee200cf32f5b2a311e88 /kernel | |
parent | 80e3d87b2c5582db0ab5e39610ce3707d97ba409 (diff) |
sched: Fix missing preemption opportunity
If an interrupt fires in cond_resched(), between the call to __schedule()
and the PREEMPT_ACTIVE count decrementation, and that interrupt sets
TIF_NEED_RESCHED, the call to preempt_schedule_irq() will be ignored
due to the PREEMPT_ACTIVE count. This kind of scenario, with irq preemption
being delayed because it's interrupting a preempt-disabled area, is
usually fixed up after preemption is re-enabled back with an explicit
call to preempt_schedule().
This is what preempt_enable() does but a raw preempt count decrement as
performed by __preempt_count_sub(PREEMPT_ACTIVE) doesn't handle delayed
preemption check. Therefore when such a race happens, the rescheduling
is going to be delayed until the next scheduler or preemption entrypoint.
This can be a problem for scheduler latency sensitive workloads.
Lets fix that by consolidating cond_resched() with preempt_schedule()
internals.
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Reported-by: Ingo Molnar <mingo@kernel.org>
Original-patch-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1421946484-9298-1-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched/core.c | 40 |
1 files changed, 19 insertions, 21 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0b591fe67b70..54dce019c0ce 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2884,6 +2884,21 @@ void __sched schedule_preempt_disabled(void) | |||
2884 | preempt_disable(); | 2884 | preempt_disable(); |
2885 | } | 2885 | } |
2886 | 2886 | ||
2887 | static void preempt_schedule_common(void) | ||
2888 | { | ||
2889 | do { | ||
2890 | __preempt_count_add(PREEMPT_ACTIVE); | ||
2891 | __schedule(); | ||
2892 | __preempt_count_sub(PREEMPT_ACTIVE); | ||
2893 | |||
2894 | /* | ||
2895 | * Check again in case we missed a preemption opportunity | ||
2896 | * between schedule and now. | ||
2897 | */ | ||
2898 | barrier(); | ||
2899 | } while (need_resched()); | ||
2900 | } | ||
2901 | |||
2887 | #ifdef CONFIG_PREEMPT | 2902 | #ifdef CONFIG_PREEMPT |
2888 | /* | 2903 | /* |
2889 | * this is the entry point to schedule() from in-kernel preemption | 2904 | * this is the entry point to schedule() from in-kernel preemption |
@@ -2899,17 +2914,7 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) | |||
2899 | if (likely(!preemptible())) | 2914 | if (likely(!preemptible())) |
2900 | return; | 2915 | return; |
2901 | 2916 | ||
2902 | do { | 2917 | preempt_schedule_common(); |
2903 | __preempt_count_add(PREEMPT_ACTIVE); | ||
2904 | __schedule(); | ||
2905 | __preempt_count_sub(PREEMPT_ACTIVE); | ||
2906 | |||
2907 | /* | ||
2908 | * Check again in case we missed a preemption opportunity | ||
2909 | * between schedule and now. | ||
2910 | */ | ||
2911 | barrier(); | ||
2912 | } while (need_resched()); | ||
2913 | } | 2918 | } |
2914 | NOKPROBE_SYMBOL(preempt_schedule); | 2919 | NOKPROBE_SYMBOL(preempt_schedule); |
2915 | EXPORT_SYMBOL(preempt_schedule); | 2920 | EXPORT_SYMBOL(preempt_schedule); |
@@ -4209,17 +4214,10 @@ SYSCALL_DEFINE0(sched_yield) | |||
4209 | return 0; | 4214 | return 0; |
4210 | } | 4215 | } |
4211 | 4216 | ||
4212 | static void __cond_resched(void) | ||
4213 | { | ||
4214 | __preempt_count_add(PREEMPT_ACTIVE); | ||
4215 | __schedule(); | ||
4216 | __preempt_count_sub(PREEMPT_ACTIVE); | ||
4217 | } | ||
4218 | |||
4219 | int __sched _cond_resched(void) | 4217 | int __sched _cond_resched(void) |
4220 | { | 4218 | { |
4221 | if (should_resched()) { | 4219 | if (should_resched()) { |
4222 | __cond_resched(); | 4220 | preempt_schedule_common(); |
4223 | return 1; | 4221 | return 1; |
4224 | } | 4222 | } |
4225 | return 0; | 4223 | return 0; |
@@ -4244,7 +4242,7 @@ int __cond_resched_lock(spinlock_t *lock) | |||
4244 | if (spin_needbreak(lock) || resched) { | 4242 | if (spin_needbreak(lock) || resched) { |
4245 | spin_unlock(lock); | 4243 | spin_unlock(lock); |
4246 | if (resched) | 4244 | if (resched) |
4247 | __cond_resched(); | 4245 | preempt_schedule_common(); |
4248 | else | 4246 | else |
4249 | cpu_relax(); | 4247 | cpu_relax(); |
4250 | ret = 1; | 4248 | ret = 1; |
@@ -4260,7 +4258,7 @@ int __sched __cond_resched_softirq(void) | |||
4260 | 4258 | ||
4261 | if (should_resched()) { | 4259 | if (should_resched()) { |
4262 | local_bh_enable(); | 4260 | local_bh_enable(); |
4263 | __cond_resched(); | 4261 | preempt_schedule_common(); |
4264 | local_bh_disable(); | 4262 | local_bh_disable(); |
4265 | return 1; | 4263 | return 1; |
4266 | } | 4264 | } |