aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2018-05-01 12:14:45 -0400
committerIngo Molnar <mingo@kernel.org>2018-05-03 01:38:05 -0400
commit85f1abe0019fcb3ea10df7029056cf42702283a8 (patch)
tree26e317238165c6c8389ba125a716667584358c89
parent741a76b350897604c48fb12beff1c9b77724dc96 (diff)
kthread, sched/wait: Fix kthread_parkme() completion issue
Even with the wait-loop fixed, there is a further issue with kthread_parkme(). Upon hotplug, when we do takedown_cpu(), smpboot_park_threads() can return before all those threads are in fact blocked, due to the placement of the complete() in __kthread_parkme(). When that happens, sched_cpu_dying() -> migrate_tasks() can end up migrating such a still runnable task onto another CPU. Normally the task will have hit schedule() and gone to sleep by the time we do kthread_unpark(), which will then do __kthread_bind() to re-bind the task to the correct CPU. However, when we loose the initial TASK_PARKED store to the concurrent wakeup issue described previously, do the complete(), get migrated, it is possible to either: - observe kthread_unpark()'s clearing of SHOULD_PARK and terminate the park and set TASK_RUNNING, or - __kthread_bind()'s wait_task_inactive() to observe the competing TASK_RUNNING store. Either way the WARN() in __kthread_bind() will trigger and fail to correctly set the CPU affinity. Fix this by only issuing the complete() when the kthread has scheduled out. This does away with all the icky 'still running' nonsense. The alternative is to promote TASK_PARKED to a special state, this guarantees wait_task_inactive() cannot observe a 'stale' TASK_RUNNING and we'll end up doing the right thing, but this preserves the whole icky business of potentially migating the still runnable thing. Reported-by: Gaurav Kohli <gkohli@codeaurora.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--include/linux/kthread.h1
-rw-r--r--kernel/kthread.c43
-rw-r--r--kernel/sched/core.c32
3 files changed, 41 insertions, 35 deletions
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index c1961761311d..2803264c512f 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -62,6 +62,7 @@ void *kthread_probe_data(struct task_struct *k);
62int kthread_park(struct task_struct *k); 62int kthread_park(struct task_struct *k);
63void kthread_unpark(struct task_struct *k); 63void kthread_unpark(struct task_struct *k);
64void kthread_parkme(void); 64void kthread_parkme(void);
65void kthread_park_complete(struct task_struct *k);
65 66
66int kthreadd(void *unused); 67int kthreadd(void *unused);
67extern struct task_struct *kthreadd_task; 68extern struct task_struct *kthreadd_task;
diff --git a/kernel/kthread.c b/kernel/kthread.c
index cbee858e5815..2017a39ab490 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -55,7 +55,6 @@ enum KTHREAD_BITS {
55 KTHREAD_IS_PER_CPU = 0, 55 KTHREAD_IS_PER_CPU = 0,
56 KTHREAD_SHOULD_STOP, 56 KTHREAD_SHOULD_STOP,
57 KTHREAD_SHOULD_PARK, 57 KTHREAD_SHOULD_PARK,
58 KTHREAD_IS_PARKED,
59}; 58};
60 59
61static inline void set_kthread_struct(void *kthread) 60static inline void set_kthread_struct(void *kthread)
@@ -181,11 +180,8 @@ static void __kthread_parkme(struct kthread *self)
181 set_current_state(TASK_PARKED); 180 set_current_state(TASK_PARKED);
182 if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags)) 181 if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
183 break; 182 break;
184 if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags))
185 complete(&self->parked);
186 schedule(); 183 schedule();
187 } 184 }
188 clear_bit(KTHREAD_IS_PARKED, &self->flags);
189 __set_current_state(TASK_RUNNING); 185 __set_current_state(TASK_RUNNING);
190} 186}
191 187
@@ -195,6 +191,11 @@ void kthread_parkme(void)
195} 191}
196EXPORT_SYMBOL_GPL(kthread_parkme); 192EXPORT_SYMBOL_GPL(kthread_parkme);
197 193
194void kthread_park_complete(struct task_struct *k)
195{
196 complete(&to_kthread(k)->parked);
197}
198
198static int kthread(void *_create) 199static int kthread(void *_create)
199{ 200{
200 /* Copy data: it's on kthread's stack */ 201 /* Copy data: it's on kthread's stack */
@@ -451,22 +452,15 @@ void kthread_unpark(struct task_struct *k)
451{ 452{
452 struct kthread *kthread = to_kthread(k); 453 struct kthread *kthread = to_kthread(k);
453 454
454 clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
455 /* 455 /*
456 * We clear the IS_PARKED bit here as we don't wait 456 * Newly created kthread was parked when the CPU was offline.
457 * until the task has left the park code. So if we'd 457 * The binding was lost and we need to set it again.
458 * park before that happens we'd see the IS_PARKED bit
459 * which might be about to be cleared.
460 */ 458 */
461 if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) { 459 if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
462 /* 460 __kthread_bind(k, kthread->cpu, TASK_PARKED);
463 * Newly created kthread was parked when the CPU was offline. 461
464 * The binding was lost and we need to set it again. 462 clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
465 */ 463 wake_up_state(k, TASK_PARKED);
466 if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
467 __kthread_bind(k, kthread->cpu, TASK_PARKED);
468 wake_up_state(k, TASK_PARKED);
469 }
470} 464}
471EXPORT_SYMBOL_GPL(kthread_unpark); 465EXPORT_SYMBOL_GPL(kthread_unpark);
472 466
@@ -489,12 +483,13 @@ int kthread_park(struct task_struct *k)
489 if (WARN_ON(k->flags & PF_EXITING)) 483 if (WARN_ON(k->flags & PF_EXITING))
490 return -ENOSYS; 484 return -ENOSYS;
491 485
492 if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) { 486 if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags)))
493 set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); 487 return -EBUSY;
494 if (k != current) { 488
495 wake_up_process(k); 489 set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
496 wait_for_completion(&kthread->parked); 490 if (k != current) {
497 } 491 wake_up_process(k);
492 wait_for_completion(&kthread->parked);
498 } 493 }
499 494
500 return 0; 495 return 0;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5e10aaeebfcc..7ad60e00a6a8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7,6 +7,8 @@
7 */ 7 */
8#include "sched.h" 8#include "sched.h"
9 9
10#include <linux/kthread.h>
11
10#include <asm/switch_to.h> 12#include <asm/switch_to.h>
11#include <asm/tlb.h> 13#include <asm/tlb.h>
12 14
@@ -2718,20 +2720,28 @@ static struct rq *finish_task_switch(struct task_struct *prev)
2718 membarrier_mm_sync_core_before_usermode(mm); 2720 membarrier_mm_sync_core_before_usermode(mm);
2719 mmdrop(mm); 2721 mmdrop(mm);
2720 } 2722 }
2721 if (unlikely(prev_state == TASK_DEAD)) { 2723 if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) {
2722 if (prev->sched_class->task_dead) 2724 switch (prev_state) {
2723 prev->sched_class->task_dead(prev); 2725 case TASK_DEAD:
2726 if (prev->sched_class->task_dead)
2727 prev->sched_class->task_dead(prev);
2724 2728
2725 /* 2729 /*
2726 * Remove function-return probe instances associated with this 2730 * Remove function-return probe instances associated with this
2727 * task and put them back on the free list. 2731 * task and put them back on the free list.
2728 */ 2732 */
2729 kprobe_flush_task(prev); 2733 kprobe_flush_task(prev);
2730 2734
2731 /* Task is done with its stack. */ 2735 /* Task is done with its stack. */
2732 put_task_stack(prev); 2736 put_task_stack(prev);
2733 2737
2734 put_task_struct(prev); 2738 put_task_struct(prev);
2739 break;
2740
2741 case TASK_PARKED:
2742 kthread_park_complete(prev);
2743 break;
2744 }
2735 } 2745 }
2736 2746
2737 tick_nohz_task_switch(); 2747 tick_nohz_task_switch();