diff options
author | Oleg Nesterov <oleg@redhat.com> | 2012-05-10 20:59:07 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2012-05-23 22:09:21 -0400 |
commit | e73f8959af0439d114847eab5a8a5ce48f1217c4 (patch) | |
tree | 47f056093590a5e5552e3a75f163e1f798063bda | |
parent | 62366c88b29c5a32e1531142092f98eaf49b1103 (diff) |
task_work_add: generic process-context callbacks
Provide a simple mechanism that allows running code in the (nonatomic)
context of the arbitrary task.
The caller does task_work_add(task, task_work) and this task executes
task_work->func() either from do_notify_resume() or from do_exit(). The
callback can rely on PF_EXITING to detect the latter case.
"struct task_work" can be embedded in another struct, still it has "void
*data" to handle the most common/simple case.
This allows us to kill the ->replacement_session_keyring hack, and
potentially this can have more users.
Performance-wise, this adds 2 "unlikely(!hlist_empty())" checks into
tracehook_notify_resume() and do_exit(). But at the same time we can
remove the "replacement_session_keyring != NULL" checks from
arch/*/signal.c and exit_creds().
Note: task_work_add/task_work_run abuses ->pi_lock. This is only because
this lock is already used by lookup_pi_state() to synchronize with
do_exit() setting PF_EXITING. Fortunately the scope of this lock in
task_work.c is really tiny, and the code is unlikely anyway.
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alexander Gordeev <agordeev@redhat.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: David Smith <dsmith@redhat.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r-- | include/linux/sched.h | 2 | ||||
-rw-r--r-- | include/linux/task_work.h | 33 | ||||
-rw-r--r-- | include/linux/tracehook.h | 11 | ||||
-rw-r--r-- | kernel/Makefile | 2 | ||||
-rw-r--r-- | kernel/exit.c | 5 | ||||
-rw-r--r-- | kernel/fork.c | 1 | ||||
-rw-r--r-- | kernel/task_work.c | 84 |
7 files changed, 136 insertions, 2 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 5ea8baea9387..7930131abc1a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1400,6 +1400,8 @@ struct task_struct { | |||
1400 | int (*notifier)(void *priv); | 1400 | int (*notifier)(void *priv); |
1401 | void *notifier_data; | 1401 | void *notifier_data; |
1402 | sigset_t *notifier_mask; | 1402 | sigset_t *notifier_mask; |
1403 | struct hlist_head task_works; | ||
1404 | |||
1403 | struct audit_context *audit_context; | 1405 | struct audit_context *audit_context; |
1404 | #ifdef CONFIG_AUDITSYSCALL | 1406 | #ifdef CONFIG_AUDITSYSCALL |
1405 | uid_t loginuid; | 1407 | uid_t loginuid; |
diff --git a/include/linux/task_work.h b/include/linux/task_work.h new file mode 100644 index 000000000000..294d5d5e90b1 --- /dev/null +++ b/include/linux/task_work.h | |||
@@ -0,0 +1,33 @@ | |||
1 | #ifndef _LINUX_TASK_WORK_H | ||
2 | #define _LINUX_TASK_WORK_H | ||
3 | |||
4 | #include <linux/list.h> | ||
5 | #include <linux/sched.h> | ||
6 | |||
7 | struct task_work; | ||
8 | typedef void (*task_work_func_t)(struct task_work *); | ||
9 | |||
10 | struct task_work { | ||
11 | struct hlist_node hlist; | ||
12 | task_work_func_t func; | ||
13 | void *data; | ||
14 | }; | ||
15 | |||
16 | static inline void | ||
17 | init_task_work(struct task_work *twork, task_work_func_t func, void *data) | ||
18 | { | ||
19 | twork->func = func; | ||
20 | twork->data = data; | ||
21 | } | ||
22 | |||
23 | int task_work_add(struct task_struct *task, struct task_work *twork, bool); | ||
24 | struct task_work *task_work_cancel(struct task_struct *, task_work_func_t); | ||
25 | void task_work_run(void); | ||
26 | |||
27 | static inline void exit_task_work(struct task_struct *task) | ||
28 | { | ||
29 | if (unlikely(!hlist_empty(&task->task_works))) | ||
30 | task_work_run(); | ||
31 | } | ||
32 | |||
33 | #endif /* _LINUX_TASK_WORK_H */ | ||
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index b9ca903bb553..b2dd0917ca0d 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <linux/sched.h> | 49 | #include <linux/sched.h> |
50 | #include <linux/ptrace.h> | 50 | #include <linux/ptrace.h> |
51 | #include <linux/security.h> | 51 | #include <linux/security.h> |
52 | #include <linux/task_work.h> | ||
52 | struct linux_binprm; | 53 | struct linux_binprm; |
53 | 54 | ||
54 | /* | 55 | /* |
@@ -164,8 +165,10 @@ static inline void tracehook_signal_handler(int sig, siginfo_t *info, | |||
164 | */ | 165 | */ |
165 | static inline void set_notify_resume(struct task_struct *task) | 166 | static inline void set_notify_resume(struct task_struct *task) |
166 | { | 167 | { |
168 | #ifdef TIF_NOTIFY_RESUME | ||
167 | if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME)) | 169 | if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME)) |
168 | kick_process(task); | 170 | kick_process(task); |
171 | #endif | ||
169 | } | 172 | } |
170 | 173 | ||
171 | /** | 174 | /** |
@@ -185,6 +188,14 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) | |||
185 | { | 188 | { |
186 | if (current->replacement_session_keyring) | 189 | if (current->replacement_session_keyring) |
187 | key_replace_session_keyring(); | 190 | key_replace_session_keyring(); |
191 | /* | ||
192 | * The caller just cleared TIF_NOTIFY_RESUME. This barrier | ||
193 | * pairs with task_work_add()->set_notify_resume() after | ||
194 | * hlist_add_head(task->task_works); | ||
195 | */ | ||
196 | smp_mb__after_clear_bit(); | ||
197 | if (unlikely(!hlist_empty(¤t->task_works))) | ||
198 | task_work_run(); | ||
188 | } | 199 | } |
189 | 200 | ||
190 | #endif /* <linux/tracehook.h> */ | 201 | #endif /* <linux/tracehook.h> */ |
diff --git a/kernel/Makefile b/kernel/Makefile index 6c07f30fa9b7..bf1034008aca 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -5,7 +5,7 @@ | |||
5 | obj-y = fork.o exec_domain.o panic.o printk.o \ | 5 | obj-y = fork.o exec_domain.o panic.o printk.o \ |
6 | cpu.o exit.o itimer.o time.o softirq.o resource.o \ | 6 | cpu.o exit.o itimer.o time.o softirq.o resource.o \ |
7 | sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ | 7 | sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ |
8 | signal.o sys.o kmod.o workqueue.o pid.o \ | 8 | signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ |
9 | rcupdate.o extable.o params.o posix-timers.o \ | 9 | rcupdate.o extable.o params.o posix-timers.o \ |
10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ | 11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ |
diff --git a/kernel/exit.c b/kernel/exit.c index 910a0716e17a..3d93325e0b1a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -946,11 +946,14 @@ void do_exit(long code) | |||
946 | exit_signals(tsk); /* sets PF_EXITING */ | 946 | exit_signals(tsk); /* sets PF_EXITING */ |
947 | /* | 947 | /* |
948 | * tsk->flags are checked in the futex code to protect against | 948 | * tsk->flags are checked in the futex code to protect against |
949 | * an exiting task cleaning up the robust pi futexes. | 949 | * an exiting task cleaning up the robust pi futexes, and in |
950 | * task_work_add() to avoid the race with exit_task_work(). | ||
950 | */ | 951 | */ |
951 | smp_mb(); | 952 | smp_mb(); |
952 | raw_spin_unlock_wait(&tsk->pi_lock); | 953 | raw_spin_unlock_wait(&tsk->pi_lock); |
953 | 954 | ||
955 | exit_task_work(tsk); | ||
956 | |||
954 | exit_irq_thread(); | 957 | exit_irq_thread(); |
955 | 958 | ||
956 | if (unlikely(in_atomic())) | 959 | if (unlikely(in_atomic())) |
diff --git a/kernel/fork.c b/kernel/fork.c index 05c813dc9ecc..a46db217a589 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1411,6 +1411,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1411 | */ | 1411 | */ |
1412 | p->group_leader = p; | 1412 | p->group_leader = p; |
1413 | INIT_LIST_HEAD(&p->thread_group); | 1413 | INIT_LIST_HEAD(&p->thread_group); |
1414 | INIT_HLIST_HEAD(&p->task_works); | ||
1414 | 1415 | ||
1415 | /* Now that the task is set up, run cgroup callbacks if | 1416 | /* Now that the task is set up, run cgroup callbacks if |
1416 | * necessary. We need to run them before the task is visible | 1417 | * necessary. We need to run them before the task is visible |
diff --git a/kernel/task_work.c b/kernel/task_work.c new file mode 100644 index 000000000000..82d1c794066d --- /dev/null +++ b/kernel/task_work.c | |||
@@ -0,0 +1,84 @@ | |||
1 | #include <linux/spinlock.h> | ||
2 | #include <linux/task_work.h> | ||
3 | #include <linux/tracehook.h> | ||
4 | |||
5 | int | ||
6 | task_work_add(struct task_struct *task, struct task_work *twork, bool notify) | ||
7 | { | ||
8 | unsigned long flags; | ||
9 | int err = -ESRCH; | ||
10 | |||
11 | #ifndef TIF_NOTIFY_RESUME | ||
12 | if (notify) | ||
13 | return -ENOTSUPP; | ||
14 | #endif | ||
15 | /* | ||
16 | * We must not insert the new work if the task has already passed | ||
17 | * exit_task_work(). We rely on do_exit()->raw_spin_unlock_wait() | ||
18 | * and check PF_EXITING under pi_lock. | ||
19 | */ | ||
20 | raw_spin_lock_irqsave(&task->pi_lock, flags); | ||
21 | if (likely(!(task->flags & PF_EXITING))) { | ||
22 | hlist_add_head(&twork->hlist, &task->task_works); | ||
23 | err = 0; | ||
24 | } | ||
25 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | ||
26 | |||
27 | /* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */ | ||
28 | if (likely(!err) && notify) | ||
29 | set_notify_resume(task); | ||
30 | return err; | ||
31 | } | ||
32 | |||
33 | struct task_work * | ||
34 | task_work_cancel(struct task_struct *task, task_work_func_t func) | ||
35 | { | ||
36 | unsigned long flags; | ||
37 | struct task_work *twork; | ||
38 | struct hlist_node *pos; | ||
39 | |||
40 | raw_spin_lock_irqsave(&task->pi_lock, flags); | ||
41 | hlist_for_each_entry(twork, pos, &task->task_works, hlist) { | ||
42 | if (twork->func == func) { | ||
43 | hlist_del(&twork->hlist); | ||
44 | goto found; | ||
45 | } | ||
46 | } | ||
47 | twork = NULL; | ||
48 | found: | ||
49 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | ||
50 | |||
51 | return twork; | ||
52 | } | ||
53 | |||
54 | void task_work_run(void) | ||
55 | { | ||
56 | struct task_struct *task = current; | ||
57 | struct hlist_head task_works; | ||
58 | struct hlist_node *pos; | ||
59 | |||
60 | raw_spin_lock_irq(&task->pi_lock); | ||
61 | hlist_move_list(&task->task_works, &task_works); | ||
62 | raw_spin_unlock_irq(&task->pi_lock); | ||
63 | |||
64 | if (unlikely(hlist_empty(&task_works))) | ||
65 | return; | ||
66 | /* | ||
67 | * We use hlist to save the space in task_struct, but we want fifo. | ||
68 | * Find the last entry, the list should be short, then process them | ||
69 | * in reverse order. | ||
70 | */ | ||
71 | for (pos = task_works.first; pos->next; pos = pos->next) | ||
72 | ; | ||
73 | |||
74 | for (;;) { | ||
75 | struct hlist_node **pprev = pos->pprev; | ||
76 | struct task_work *twork = container_of(pos, struct task_work, | ||
77 | hlist); | ||
78 | twork->func(twork); | ||
79 | |||
80 | if (pprev == &task_works.first) | ||
81 | break; | ||
82 | pos = container_of(pprev, struct hlist_node, next); | ||
83 | } | ||
84 | } | ||