diff options
author | Oleg Nesterov <oleg@redhat.com> | 2012-08-26 15:12:09 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-09-13 10:47:33 -0400 |
commit | ac3d0da8f3290b3d394cdb7f50604424a7cd6092 (patch) | |
tree | c1a126ac8ab3d87f22baaa51fd786f3c56c22327 | |
parent | 15674868d6c5985466835c56dd89d39235f16302 (diff) |
task_work: Make task_work_add() lockless
Change task_work's to use llist-like code to avoid pi_lock
in task_work_add(), this makes it useable under rq->lock.
task_work_cancel() and task_work_run() still use pi_lock
to synchronize with each other.
(This is in preparation for a deadlock fix.)
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20120826191209.GA4221@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | kernel/task_work.c | 95 |
1 files changed, 48 insertions, 47 deletions
diff --git a/kernel/task_work.c b/kernel/task_work.c index d320d44903bd..f13ec0bda1d5 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c | |||
@@ -3,25 +3,18 @@ | |||
3 | #include <linux/tracehook.h> | 3 | #include <linux/tracehook.h> |
4 | 4 | ||
5 | int | 5 | int |
6 | task_work_add(struct task_struct *task, struct callback_head *twork, bool notify) | 6 | task_work_add(struct task_struct *task, struct callback_head *work, bool notify) |
7 | { | 7 | { |
8 | struct callback_head *last, *first; | 8 | struct callback_head *head; |
9 | unsigned long flags; | ||
10 | |||
11 | /* | 9 | /* |
12 | * Not inserting the new work if the task has already passed | 10 | * Not inserting the new work if the task has already passed |
13 | * exit_task_work() is the responisbility of callers. | 11 | * exit_task_work() is the responisbility of callers. |
14 | */ | 12 | */ |
15 | raw_spin_lock_irqsave(&task->pi_lock, flags); | 13 | do { |
16 | last = task->task_works; | 14 | head = ACCESS_ONCE(task->task_works); |
17 | first = last ? last->next : twork; | 15 | work->next = head; |
18 | twork->next = first; | 16 | } while (cmpxchg(&task->task_works, head, work) != head); |
19 | if (last) | ||
20 | last->next = twork; | ||
21 | task->task_works = twork; | ||
22 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | ||
23 | 17 | ||
24 | /* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */ | ||
25 | if (notify) | 18 | if (notify) |
26 | set_notify_resume(task); | 19 | set_notify_resume(task); |
27 | return 0; | 20 | return 0; |
@@ -30,52 +23,60 @@ task_work_add(struct task_struct *task, struct callback_head *twork, bool notify | |||
30 | struct callback_head * | 23 | struct callback_head * |
31 | task_work_cancel(struct task_struct *task, task_work_func_t func) | 24 | task_work_cancel(struct task_struct *task, task_work_func_t func) |
32 | { | 25 | { |
26 | struct callback_head **pprev = &task->task_works; | ||
27 | struct callback_head *work = NULL; | ||
33 | unsigned long flags; | 28 | unsigned long flags; |
34 | struct callback_head *last, *res = NULL; | 29 | /* |
35 | 30 | * If cmpxchg() fails we continue without updating pprev. | |
31 | * Either we raced with task_work_add() which added the | ||
32 | * new entry before this work, we will find it again. Or | ||
33 | * we raced with task_work_run(), *pprev == NULL. | ||
34 | */ | ||
36 | raw_spin_lock_irqsave(&task->pi_lock, flags); | 35 | raw_spin_lock_irqsave(&task->pi_lock, flags); |
37 | last = task->task_works; | 36 | while ((work = ACCESS_ONCE(*pprev))) { |
38 | if (last) { | 37 | read_barrier_depends(); |
39 | struct callback_head *q = last, *p = q->next; | 38 | if (work->func != func) |
40 | while (1) { | 39 | pprev = &work->next; |
41 | if (p->func == func) { | 40 | else if (cmpxchg(pprev, work, work->next) == work) |
42 | q->next = p->next; | 41 | break; |
43 | if (p == last) | ||
44 | task->task_works = q == p ? NULL : q; | ||
45 | res = p; | ||
46 | break; | ||
47 | } | ||
48 | if (p == last) | ||
49 | break; | ||
50 | q = p; | ||
51 | p = q->next; | ||
52 | } | ||
53 | } | 42 | } |
54 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | 43 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); |
55 | return res; | 44 | |
45 | return work; | ||
56 | } | 46 | } |
57 | 47 | ||
58 | void task_work_run(void) | 48 | void task_work_run(void) |
59 | { | 49 | { |
60 | struct task_struct *task = current; | 50 | struct task_struct *task = current; |
61 | struct callback_head *p, *q; | 51 | struct callback_head *work, *head, *next; |
62 | 52 | ||
63 | while (1) { | 53 | for (;;) { |
64 | raw_spin_lock_irq(&task->pi_lock); | 54 | work = xchg(&task->task_works, NULL); |
65 | p = task->task_works; | 55 | if (!work) |
66 | task->task_works = NULL; | 56 | break; |
67 | raw_spin_unlock_irq(&task->pi_lock); | 57 | /* |
58 | * Synchronize with task_work_cancel(). It can't remove | ||
59 | * the first entry == work, cmpxchg(task_works) should | ||
60 | * fail, but it can play with *work and other entries. | ||
61 | */ | ||
62 | raw_spin_unlock_wait(&task->pi_lock); | ||
63 | smp_mb(); | ||
68 | 64 | ||
69 | if (unlikely(!p)) | 65 | /* Reverse the list to run the works in fifo order */ |
70 | return; | 66 | head = NULL; |
67 | do { | ||
68 | next = work->next; | ||
69 | work->next = head; | ||
70 | head = work; | ||
71 | work = next; | ||
72 | } while (work); | ||
71 | 73 | ||
72 | q = p->next; /* head */ | 74 | work = head; |
73 | p->next = NULL; /* cut it */ | 75 | do { |
74 | while (q) { | 76 | next = work->next; |
75 | p = q->next; | 77 | work->func(work); |
76 | q->func(q); | 78 | work = next; |
77 | q = p; | ||
78 | cond_resched(); | 79 | cond_resched(); |
79 | } | 80 | } while (work); |
80 | } | 81 | } |
81 | } | 82 | } |