aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOleg Nesterov <oleg@redhat.com>2012-05-10 20:59:07 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2012-05-23 22:09:21 -0400
commite73f8959af0439d114847eab5a8a5ce48f1217c4 (patch)
tree47f056093590a5e5552e3a75f163e1f798063bda
parent62366c88b29c5a32e1531142092f98eaf49b1103 (diff)
task_work_add: generic process-context callbacks
Provide a simple mechanism that allows running code in the (nonatomic) context of the arbitrary task. The caller does task_work_add(task, task_work) and this task executes task_work->func() either from do_notify_resume() or from do_exit(). The callback can rely on PF_EXITING to detect the latter case. "struct task_work" can be embedded in another struct, still it has "void *data" to handle the most common/simple case. This allows us to kill the ->replacement_session_keyring hack, and potentially this can have more users. Performance-wise, this adds 2 "unlikely(!hlist_empty())" checks into tracehook_notify_resume() and do_exit(). But at the same time we can remove the "replacement_session_keyring != NULL" checks from arch/*/signal.c and exit_creds(). Note: task_work_add/task_work_run abuses ->pi_lock. This is only because this lock is already used by lookup_pi_state() to synchronize with do_exit() setting PF_EXITING. Fortunately the scope of this lock in task_work.c is really tiny, and the code is unlikely anyway. Signed-off-by: Oleg Nesterov <oleg@redhat.com> Acked-by: David Howells <dhowells@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Richard Kuo <rkuo@codeaurora.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Alexander Gordeev <agordeev@redhat.com> Cc: Chris Zankel <chris@zankel.net> Cc: David Smith <dsmith@redhat.com> Cc: "Frank Ch. Eigler" <fche@redhat.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Larry Woodman <lwoodman@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Tejun Heo <tj@kernel.org> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/task_work.h33
-rw-r--r--include/linux/tracehook.h11
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/task_work.c84
7 files changed, 136 insertions, 2 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5ea8baea9387..7930131abc1a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1400,6 +1400,8 @@ struct task_struct {
1400 int (*notifier)(void *priv); 1400 int (*notifier)(void *priv);
1401 void *notifier_data; 1401 void *notifier_data;
1402 sigset_t *notifier_mask; 1402 sigset_t *notifier_mask;
1403 struct hlist_head task_works;
1404
1403 struct audit_context *audit_context; 1405 struct audit_context *audit_context;
1404#ifdef CONFIG_AUDITSYSCALL 1406#ifdef CONFIG_AUDITSYSCALL
1405 uid_t loginuid; 1407 uid_t loginuid;
diff --git a/include/linux/task_work.h b/include/linux/task_work.h
new file mode 100644
index 000000000000..294d5d5e90b1
--- /dev/null
+++ b/include/linux/task_work.h
@@ -0,0 +1,33 @@
1#ifndef _LINUX_TASK_WORK_H
2#define _LINUX_TASK_WORK_H
3
4#include <linux/list.h>
5#include <linux/sched.h>
6
7struct task_work;
8typedef void (*task_work_func_t)(struct task_work *);
9
10struct task_work {
11 struct hlist_node hlist;
12 task_work_func_t func;
13 void *data;
14};
15
16static inline void
17init_task_work(struct task_work *twork, task_work_func_t func, void *data)
18{
19 twork->func = func;
20 twork->data = data;
21}
22
23int task_work_add(struct task_struct *task, struct task_work *twork, bool);
24struct task_work *task_work_cancel(struct task_struct *, task_work_func_t);
25void task_work_run(void);
26
27static inline void exit_task_work(struct task_struct *task)
28{
29 if (unlikely(!hlist_empty(&task->task_works)))
30 task_work_run();
31}
32
33#endif /* _LINUX_TASK_WORK_H */
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index b9ca903bb553..b2dd0917ca0d 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -49,6 +49,7 @@
49#include <linux/sched.h> 49#include <linux/sched.h>
50#include <linux/ptrace.h> 50#include <linux/ptrace.h>
51#include <linux/security.h> 51#include <linux/security.h>
52#include <linux/task_work.h>
52struct linux_binprm; 53struct linux_binprm;
53 54
54/* 55/*
@@ -164,8 +165,10 @@ static inline void tracehook_signal_handler(int sig, siginfo_t *info,
164 */ 165 */
165static inline void set_notify_resume(struct task_struct *task) 166static inline void set_notify_resume(struct task_struct *task)
166{ 167{
168#ifdef TIF_NOTIFY_RESUME
167 if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME)) 169 if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME))
168 kick_process(task); 170 kick_process(task);
171#endif
169} 172}
170 173
171/** 174/**
@@ -185,6 +188,14 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
185{ 188{
186 if (current->replacement_session_keyring) 189 if (current->replacement_session_keyring)
187 key_replace_session_keyring(); 190 key_replace_session_keyring();
191 /*
192 * The caller just cleared TIF_NOTIFY_RESUME. This barrier
193 * pairs with task_work_add()->set_notify_resume() after
194 * hlist_add_head(task->task_works);
195 */
196 smp_mb__after_clear_bit();
197 if (unlikely(!hlist_empty(&current->task_works)))
198 task_work_run();
188} 199}
189 200
190#endif /* <linux/tracehook.h> */ 201#endif /* <linux/tracehook.h> */
diff --git a/kernel/Makefile b/kernel/Makefile
index 6c07f30fa9b7..bf1034008aca 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -5,7 +5,7 @@
5obj-y = fork.o exec_domain.o panic.o printk.o \ 5obj-y = fork.o exec_domain.o panic.o printk.o \
6 cpu.o exit.o itimer.o time.o softirq.o resource.o \ 6 cpu.o exit.o itimer.o time.o softirq.o resource.o \
7 sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ 7 sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
8 signal.o sys.o kmod.o workqueue.o pid.o \ 8 signal.o sys.o kmod.o workqueue.o pid.o task_work.o \
9 rcupdate.o extable.o params.o posix-timers.o \ 9 rcupdate.o extable.o params.o posix-timers.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ 11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
diff --git a/kernel/exit.c b/kernel/exit.c
index 910a0716e17a..3d93325e0b1a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -946,11 +946,14 @@ void do_exit(long code)
946 exit_signals(tsk); /* sets PF_EXITING */ 946 exit_signals(tsk); /* sets PF_EXITING */
947 /* 947 /*
948 * tsk->flags are checked in the futex code to protect against 948 * tsk->flags are checked in the futex code to protect against
949 * an exiting task cleaning up the robust pi futexes. 949 * an exiting task cleaning up the robust pi futexes, and in
950 * task_work_add() to avoid the race with exit_task_work().
950 */ 951 */
951 smp_mb(); 952 smp_mb();
952 raw_spin_unlock_wait(&tsk->pi_lock); 953 raw_spin_unlock_wait(&tsk->pi_lock);
953 954
955 exit_task_work(tsk);
956
954 exit_irq_thread(); 957 exit_irq_thread();
955 958
956 if (unlikely(in_atomic())) 959 if (unlikely(in_atomic()))
diff --git a/kernel/fork.c b/kernel/fork.c
index 05c813dc9ecc..a46db217a589 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1411,6 +1411,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1411 */ 1411 */
1412 p->group_leader = p; 1412 p->group_leader = p;
1413 INIT_LIST_HEAD(&p->thread_group); 1413 INIT_LIST_HEAD(&p->thread_group);
1414 INIT_HLIST_HEAD(&p->task_works);
1414 1415
1415 /* Now that the task is set up, run cgroup callbacks if 1416 /* Now that the task is set up, run cgroup callbacks if
1416 * necessary. We need to run them before the task is visible 1417 * necessary. We need to run them before the task is visible
diff --git a/kernel/task_work.c b/kernel/task_work.c
new file mode 100644
index 000000000000..82d1c794066d
--- /dev/null
+++ b/kernel/task_work.c
@@ -0,0 +1,84 @@
1#include <linux/spinlock.h>
2#include <linux/task_work.h>
3#include <linux/tracehook.h>
4
5int
6task_work_add(struct task_struct *task, struct task_work *twork, bool notify)
7{
8 unsigned long flags;
9 int err = -ESRCH;
10
11#ifndef TIF_NOTIFY_RESUME
12 if (notify)
13 return -ENOTSUPP;
14#endif
15 /*
16 * We must not insert the new work if the task has already passed
17 * exit_task_work(). We rely on do_exit()->raw_spin_unlock_wait()
18 * and check PF_EXITING under pi_lock.
19 */
20 raw_spin_lock_irqsave(&task->pi_lock, flags);
21 if (likely(!(task->flags & PF_EXITING))) {
22 hlist_add_head(&twork->hlist, &task->task_works);
23 err = 0;
24 }
25 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
26
27 /* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */
28 if (likely(!err) && notify)
29 set_notify_resume(task);
30 return err;
31}
32
33struct task_work *
34task_work_cancel(struct task_struct *task, task_work_func_t func)
35{
36 unsigned long flags;
37 struct task_work *twork;
38 struct hlist_node *pos;
39
40 raw_spin_lock_irqsave(&task->pi_lock, flags);
41 hlist_for_each_entry(twork, pos, &task->task_works, hlist) {
42 if (twork->func == func) {
43 hlist_del(&twork->hlist);
44 goto found;
45 }
46 }
47 twork = NULL;
48 found:
49 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
50
51 return twork;
52}
53
54void task_work_run(void)
55{
56 struct task_struct *task = current;
57 struct hlist_head task_works;
58 struct hlist_node *pos;
59
60 raw_spin_lock_irq(&task->pi_lock);
61 hlist_move_list(&task->task_works, &task_works);
62 raw_spin_unlock_irq(&task->pi_lock);
63
64 if (unlikely(hlist_empty(&task_works)))
65 return;
66 /*
67 * We use hlist to save the space in task_struct, but we want fifo.
68 * Find the last entry, the list should be short, then process them
69 * in reverse order.
70 */
71 for (pos = task_works.first; pos->next; pos = pos->next)
72 ;
73
74 for (;;) {
75 struct hlist_node **pprev = pos->pprev;
76 struct task_work *twork = container_of(pos, struct task_work,
77 hlist);
78 twork->func(twork);
79
80 if (pprev == &task_works.first)
81 break;
82 pos = container_of(pprev, struct hlist_node, next);
83 }
84}