aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2006-01-08 04:01:37 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-08 23:13:40 -0500
commite56d090310d7625ecb43a1eeebd479f04affb48b (patch)
tree2f479215dff4a2d8f3a9ed85200a5bc4f51534be
parent4369ef3c3e9d3bd9b879580678778f558d481e90 (diff)
[PATCH] RCU signal handling
RCU tasklist_lock and RCU signal handling: send signals RCU-read-locked instead of tasklist_lock read-locked. This is a scalability improvement on SMP and a preemption-latency improvement under PREEMPT_RCU. Signed-off-by: Paul E. McKenney <paulmck@us.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Acked-by: William Irwin <wli@holomorphy.com> Cc: Roland McGrath <roland@redhat.com> Cc: Oleg Nesterov <oleg@tv-sign.ru> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/exec.c4
-rw-r--r--include/linux/sched.h32
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c10
-rw-r--r--kernel/pid.c22
-rw-r--r--kernel/rcupdate.c1
-rw-r--r--kernel/sched.c7
-rw-r--r--kernel/signal.c97
8 files changed, 143 insertions, 31 deletions
diff --git a/fs/exec.c b/fs/exec.c
index e75a9548da8e..e9650cd22a3b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -760,7 +760,7 @@ no_thread_group:
760 spin_lock(&oldsighand->siglock); 760 spin_lock(&oldsighand->siglock);
761 spin_lock(&newsighand->siglock); 761 spin_lock(&newsighand->siglock);
762 762
763 current->sighand = newsighand; 763 rcu_assign_pointer(current->sighand, newsighand);
764 recalc_sigpending(); 764 recalc_sigpending();
765 765
766 spin_unlock(&newsighand->siglock); 766 spin_unlock(&newsighand->siglock);
@@ -768,7 +768,7 @@ no_thread_group:
768 write_unlock_irq(&tasklist_lock); 768 write_unlock_irq(&tasklist_lock);
769 769
770 if (atomic_dec_and_test(&oldsighand->count)) 770 if (atomic_dec_and_test(&oldsighand->count))
771 kmem_cache_free(sighand_cachep, oldsighand); 771 sighand_free(oldsighand);
772 } 772 }
773 773
774 BUG_ON(!thread_group_leader(current)); 774 BUG_ON(!thread_group_leader(current));
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a74662077d60..a6af77e9b4cf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -34,6 +34,7 @@
34#include <linux/percpu.h> 34#include <linux/percpu.h>
35#include <linux/topology.h> 35#include <linux/topology.h>
36#include <linux/seccomp.h> 36#include <linux/seccomp.h>
37#include <linux/rcupdate.h>
37 38
38#include <linux/auxvec.h> /* For AT_VECTOR_SIZE */ 39#include <linux/auxvec.h> /* For AT_VECTOR_SIZE */
39 40
@@ -350,8 +351,16 @@ struct sighand_struct {
350 atomic_t count; 351 atomic_t count;
351 struct k_sigaction action[_NSIG]; 352 struct k_sigaction action[_NSIG];
352 spinlock_t siglock; 353 spinlock_t siglock;
354 struct rcu_head rcu;
353}; 355};
354 356
357extern void sighand_free_cb(struct rcu_head *rhp);
358
359static inline void sighand_free(struct sighand_struct *sp)
360{
361 call_rcu(&sp->rcu, sighand_free_cb);
362}
363
355/* 364/*
356 * NOTE! "signal_struct" does not have it's own 365 * NOTE! "signal_struct" does not have it's own
357 * locking, because a shared signal_struct always 366 * locking, because a shared signal_struct always
@@ -844,6 +853,7 @@ struct task_struct {
844 int cpuset_mems_generation; 853 int cpuset_mems_generation;
845#endif 854#endif
846 atomic_t fs_excl; /* holding fs exclusive resources */ 855 atomic_t fs_excl; /* holding fs exclusive resources */
856 struct rcu_head rcu;
847}; 857};
848 858
849static inline pid_t process_group(struct task_struct *tsk) 859static inline pid_t process_group(struct task_struct *tsk)
@@ -867,8 +877,26 @@ static inline int pid_alive(struct task_struct *p)
867extern void free_task(struct task_struct *tsk); 877extern void free_task(struct task_struct *tsk);
868extern void __put_task_struct(struct task_struct *tsk); 878extern void __put_task_struct(struct task_struct *tsk);
869#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) 879#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
870#define put_task_struct(tsk) \ 880
871do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0) 881static inline int get_task_struct_rcu(struct task_struct *t)
882{
883 int oldusage;
884
885 do {
886 oldusage = atomic_read(&t->usage);
887 if (oldusage == 0)
888 return 0;
889 } while (cmpxchg(&t->usage.counter, oldusage, oldusage+1) != oldusage);
890 return 1;
891}
892
893extern void __put_task_struct_cb(struct rcu_head *rhp);
894
895static inline void put_task_struct(struct task_struct *t)
896{
897 if (atomic_dec_and_test(&t->usage))
898 call_rcu(&t->rcu, __put_task_struct_cb);
899}
872 900
873/* 901/*
874 * Per process flags 902 * Per process flags
diff --git a/kernel/exit.c b/kernel/exit.c
index ee515683b92d..c73a7eb26de3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -72,7 +72,6 @@ repeat:
72 __ptrace_unlink(p); 72 __ptrace_unlink(p);
73 BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); 73 BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
74 __exit_signal(p); 74 __exit_signal(p);
75 __exit_sighand(p);
76 /* 75 /*
77 * Note that the fastpath in sys_times depends on __exit_signal having 76 * Note that the fastpath in sys_times depends on __exit_signal having
78 * updated the counters before a task is removed from the tasklist of 77 * updated the counters before a task is removed from the tasklist of
diff --git a/kernel/fork.c b/kernel/fork.c
index fb8572a42297..7fe3adfa65cb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -743,6 +743,14 @@ int unshare_files(void)
743 743
744EXPORT_SYMBOL(unshare_files); 744EXPORT_SYMBOL(unshare_files);
745 745
746void sighand_free_cb(struct rcu_head *rhp)
747{
748 struct sighand_struct *sp;
749
750 sp = container_of(rhp, struct sighand_struct, rcu);
751 kmem_cache_free(sighand_cachep, sp);
752}
753
746static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk) 754static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
747{ 755{
748 struct sighand_struct *sig; 756 struct sighand_struct *sig;
@@ -752,7 +760,7 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t
752 return 0; 760 return 0;
753 } 761 }
754 sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); 762 sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
755 tsk->sighand = sig; 763 rcu_assign_pointer(tsk->sighand, sig);
756 if (!sig) 764 if (!sig)
757 return -ENOMEM; 765 return -ENOMEM;
758 spin_lock_init(&sig->siglock); 766 spin_lock_init(&sig->siglock);
diff --git a/kernel/pid.c b/kernel/pid.c
index edba31c681ac..1acc07246991 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -136,7 +136,7 @@ struct pid * fastcall find_pid(enum pid_type type, int nr)
136 struct hlist_node *elem; 136 struct hlist_node *elem;
137 struct pid *pid; 137 struct pid *pid;
138 138
139 hlist_for_each_entry(pid, elem, 139 hlist_for_each_entry_rcu(pid, elem,
140 &pid_hash[type][pid_hashfn(nr)], pid_chain) { 140 &pid_hash[type][pid_hashfn(nr)], pid_chain) {
141 if (pid->nr == nr) 141 if (pid->nr == nr)
142 return pid; 142 return pid;
@@ -150,15 +150,15 @@ int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
150 150
151 task_pid = &task->pids[type]; 151 task_pid = &task->pids[type];
152 pid = find_pid(type, nr); 152 pid = find_pid(type, nr);
153 task_pid->nr = nr;
153 if (pid == NULL) { 154 if (pid == NULL) {
154 hlist_add_head(&task_pid->pid_chain,
155 &pid_hash[type][pid_hashfn(nr)]);
156 INIT_LIST_HEAD(&task_pid->pid_list); 155 INIT_LIST_HEAD(&task_pid->pid_list);
156 hlist_add_head_rcu(&task_pid->pid_chain,
157 &pid_hash[type][pid_hashfn(nr)]);
157 } else { 158 } else {
158 INIT_HLIST_NODE(&task_pid->pid_chain); 159 INIT_HLIST_NODE(&task_pid->pid_chain);
159 list_add_tail(&task_pid->pid_list, &pid->pid_list); 160 list_add_tail_rcu(&task_pid->pid_list, &pid->pid_list);
160 } 161 }
161 task_pid->nr = nr;
162 162
163 return 0; 163 return 0;
164} 164}
@@ -170,20 +170,20 @@ static fastcall int __detach_pid(task_t *task, enum pid_type type)
170 170
171 pid = &task->pids[type]; 171 pid = &task->pids[type];
172 if (!hlist_unhashed(&pid->pid_chain)) { 172 if (!hlist_unhashed(&pid->pid_chain)) {
173 hlist_del(&pid->pid_chain);
174 173
175 if (list_empty(&pid->pid_list)) 174 if (list_empty(&pid->pid_list)) {
176 nr = pid->nr; 175 nr = pid->nr;
177 else { 176 hlist_del_rcu(&pid->pid_chain);
177 } else {
178 pid_next = list_entry(pid->pid_list.next, 178 pid_next = list_entry(pid->pid_list.next,
179 struct pid, pid_list); 179 struct pid, pid_list);
180 /* insert next pid from pid_list to hash */ 180 /* insert next pid from pid_list to hash */
181 hlist_add_head(&pid_next->pid_chain, 181 hlist_replace_rcu(&pid->pid_chain,
182 &pid_hash[type][pid_hashfn(pid_next->nr)]); 182 &pid_next->pid_chain);
183 } 183 }
184 } 184 }
185 185
186 list_del(&pid->pid_list); 186 list_del_rcu(&pid->pid_list);
187 pid->nr = 0; 187 pid->nr = 0;
188 188
189 return nr; 189 return nr;
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index c9afc61240e4..0a669bd2f6d1 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -35,6 +35,7 @@
35#include <linux/init.h> 35#include <linux/init.h>
36#include <linux/spinlock.h> 36#include <linux/spinlock.h>
37#include <linux/smp.h> 37#include <linux/smp.h>
38#include <linux/rcupdate.h>
38#include <linux/interrupt.h> 39#include <linux/interrupt.h>
39#include <linux/sched.h> 40#include <linux/sched.h>
40#include <asm/atomic.h> 41#include <asm/atomic.h>
diff --git a/kernel/sched.c b/kernel/sched.c
index 6f46c94cc29e..92733091154c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -176,6 +176,13 @@ static unsigned int task_timeslice(task_t *p)
176#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \ 176#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \
177 < (long long) (sd)->cache_hot_time) 177 < (long long) (sd)->cache_hot_time)
178 178
179void __put_task_struct_cb(struct rcu_head *rhp)
180{
181 __put_task_struct(container_of(rhp, struct task_struct, rcu));
182}
183
184EXPORT_SYMBOL_GPL(__put_task_struct_cb);
185
179/* 186/*
180 * These are the runqueue data structures: 187 * These are the runqueue data structures:
181 */ 188 */
diff --git a/kernel/signal.c b/kernel/signal.c
index d7611f189ef7..64737c72dadd 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -329,13 +329,20 @@ void __exit_sighand(struct task_struct *tsk)
329 /* Ok, we're done with the signal handlers */ 329 /* Ok, we're done with the signal handlers */
330 tsk->sighand = NULL; 330 tsk->sighand = NULL;
331 if (atomic_dec_and_test(&sighand->count)) 331 if (atomic_dec_and_test(&sighand->count))
332 kmem_cache_free(sighand_cachep, sighand); 332 sighand_free(sighand);
333} 333}
334 334
335void exit_sighand(struct task_struct *tsk) 335void exit_sighand(struct task_struct *tsk)
336{ 336{
337 write_lock_irq(&tasklist_lock); 337 write_lock_irq(&tasklist_lock);
338 __exit_sighand(tsk); 338 rcu_read_lock();
339 if (tsk->sighand != NULL) {
340 struct sighand_struct *sighand = rcu_dereference(tsk->sighand);
341 spin_lock(&sighand->siglock);
342 __exit_sighand(tsk);
343 spin_unlock(&sighand->siglock);
344 }
345 rcu_read_unlock();
339 write_unlock_irq(&tasklist_lock); 346 write_unlock_irq(&tasklist_lock);
340} 347}
341 348
@@ -345,12 +352,14 @@ void exit_sighand(struct task_struct *tsk)
345void __exit_signal(struct task_struct *tsk) 352void __exit_signal(struct task_struct *tsk)
346{ 353{
347 struct signal_struct * sig = tsk->signal; 354 struct signal_struct * sig = tsk->signal;
348 struct sighand_struct * sighand = tsk->sighand; 355 struct sighand_struct * sighand;
349 356
350 if (!sig) 357 if (!sig)
351 BUG(); 358 BUG();
352 if (!atomic_read(&sig->count)) 359 if (!atomic_read(&sig->count))
353 BUG(); 360 BUG();
361 rcu_read_lock();
362 sighand = rcu_dereference(tsk->sighand);
354 spin_lock(&sighand->siglock); 363 spin_lock(&sighand->siglock);
355 posix_cpu_timers_exit(tsk); 364 posix_cpu_timers_exit(tsk);
356 if (atomic_dec_and_test(&sig->count)) { 365 if (atomic_dec_and_test(&sig->count)) {
@@ -358,6 +367,7 @@ void __exit_signal(struct task_struct *tsk)
358 if (tsk == sig->curr_target) 367 if (tsk == sig->curr_target)
359 sig->curr_target = next_thread(tsk); 368 sig->curr_target = next_thread(tsk);
360 tsk->signal = NULL; 369 tsk->signal = NULL;
370 __exit_sighand(tsk);
361 spin_unlock(&sighand->siglock); 371 spin_unlock(&sighand->siglock);
362 flush_sigqueue(&sig->shared_pending); 372 flush_sigqueue(&sig->shared_pending);
363 } else { 373 } else {
@@ -389,9 +399,11 @@ void __exit_signal(struct task_struct *tsk)
389 sig->nvcsw += tsk->nvcsw; 399 sig->nvcsw += tsk->nvcsw;
390 sig->nivcsw += tsk->nivcsw; 400 sig->nivcsw += tsk->nivcsw;
391 sig->sched_time += tsk->sched_time; 401 sig->sched_time += tsk->sched_time;
402 __exit_sighand(tsk);
392 spin_unlock(&sighand->siglock); 403 spin_unlock(&sighand->siglock);
393 sig = NULL; /* Marker for below. */ 404 sig = NULL; /* Marker for below. */
394 } 405 }
406 rcu_read_unlock();
395 clear_tsk_thread_flag(tsk,TIF_SIGPENDING); 407 clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
396 flush_sigqueue(&tsk->pending); 408 flush_sigqueue(&tsk->pending);
397 if (sig) { 409 if (sig) {
@@ -1080,18 +1092,28 @@ void zap_other_threads(struct task_struct *p)
1080} 1092}
1081 1093
1082/* 1094/*
1083 * Must be called with the tasklist_lock held for reading! 1095 * Must be called under rcu_read_lock() or with tasklist_lock read-held.
1084 */ 1096 */
1085int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) 1097int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
1086{ 1098{
1087 unsigned long flags; 1099 unsigned long flags;
1100 struct sighand_struct *sp;
1088 int ret; 1101 int ret;
1089 1102
1103retry:
1090 ret = check_kill_permission(sig, info, p); 1104 ret = check_kill_permission(sig, info, p);
1091 if (!ret && sig && p->sighand) { 1105 if (!ret && sig && (sp = p->sighand)) {
1092 spin_lock_irqsave(&p->sighand->siglock, flags); 1106 if (!get_task_struct_rcu(p))
1107 return -ESRCH;
1108 spin_lock_irqsave(&sp->siglock, flags);
1109 if (p->sighand != sp) {
1110 spin_unlock_irqrestore(&sp->siglock, flags);
1111 put_task_struct(p);
1112 goto retry;
1113 }
1093 ret = __group_send_sig_info(sig, info, p); 1114 ret = __group_send_sig_info(sig, info, p);
1094 spin_unlock_irqrestore(&p->sighand->siglock, flags); 1115 spin_unlock_irqrestore(&sp->siglock, flags);
1116 put_task_struct(p);
1095 } 1117 }
1096 1118
1097 return ret; 1119 return ret;
@@ -1136,14 +1158,21 @@ int
1136kill_proc_info(int sig, struct siginfo *info, pid_t pid) 1158kill_proc_info(int sig, struct siginfo *info, pid_t pid)
1137{ 1159{
1138 int error; 1160 int error;
1161 int acquired_tasklist_lock = 0;
1139 struct task_struct *p; 1162 struct task_struct *p;
1140 1163
1141 read_lock(&tasklist_lock); 1164 rcu_read_lock();
1165 if (unlikely(sig_kernel_stop(sig) || sig == SIGCONT)) {
1166 read_lock(&tasklist_lock);
1167 acquired_tasklist_lock = 1;
1168 }
1142 p = find_task_by_pid(pid); 1169 p = find_task_by_pid(pid);
1143 error = -ESRCH; 1170 error = -ESRCH;
1144 if (p) 1171 if (p)
1145 error = group_send_sig_info(sig, info, p); 1172 error = group_send_sig_info(sig, info, p);
1146 read_unlock(&tasklist_lock); 1173 if (unlikely(acquired_tasklist_lock))
1174 read_unlock(&tasklist_lock);
1175 rcu_read_unlock();
1147 return error; 1176 return error;
1148} 1177}
1149 1178
@@ -1355,16 +1384,54 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
1355{ 1384{
1356 unsigned long flags; 1385 unsigned long flags;
1357 int ret = 0; 1386 int ret = 0;
1387 struct sighand_struct *sh;
1358 1388
1359 BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); 1389 BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
1360 read_lock(&tasklist_lock); 1390
1391 /*
1392 * The rcu based delayed sighand destroy makes it possible to
1393 * run this without tasklist lock held. The task struct itself
1394 * cannot go away as create_timer did get_task_struct().
1395 *
1396 * We return -1, when the task is marked exiting, so
1397 * posix_timer_event can redirect it to the group leader
1398 */
1399 rcu_read_lock();
1361 1400
1362 if (unlikely(p->flags & PF_EXITING)) { 1401 if (unlikely(p->flags & PF_EXITING)) {
1363 ret = -1; 1402 ret = -1;
1364 goto out_err; 1403 goto out_err;
1365 } 1404 }
1366 1405
1367 spin_lock_irqsave(&p->sighand->siglock, flags); 1406retry:
1407 sh = rcu_dereference(p->sighand);
1408
1409 spin_lock_irqsave(&sh->siglock, flags);
1410 if (p->sighand != sh) {
1411 /* We raced with exec() in a multithreaded process... */
1412 spin_unlock_irqrestore(&sh->siglock, flags);
1413 goto retry;
1414 }
1415
1416 /*
1417 * We do the check here again to handle the following scenario:
1418 *
1419 * CPU 0 CPU 1
1420 * send_sigqueue
1421 * check PF_EXITING
1422 * interrupt exit code running
1423 * __exit_signal
1424 * lock sighand->siglock
1425 * unlock sighand->siglock
1426 * lock sh->siglock
1427 * add(tsk->pending) flush_sigqueue(tsk->pending)
1428 *
1429 */
1430
1431 if (unlikely(p->flags & PF_EXITING)) {
1432 ret = -1;
1433 goto out;
1434 }
1368 1435
1369 if (unlikely(!list_empty(&q->list))) { 1436 if (unlikely(!list_empty(&q->list))) {
1370 /* 1437 /*
@@ -1388,9 +1455,9 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
1388 signal_wake_up(p, sig == SIGKILL); 1455 signal_wake_up(p, sig == SIGKILL);
1389 1456
1390out: 1457out:
1391 spin_unlock_irqrestore(&p->sighand->siglock, flags); 1458 spin_unlock_irqrestore(&sh->siglock, flags);
1392out_err: 1459out_err:
1393 read_unlock(&tasklist_lock); 1460 rcu_read_unlock();
1394 1461
1395 return ret; 1462 return ret;
1396} 1463}
@@ -1402,7 +1469,9 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
1402 int ret = 0; 1469 int ret = 0;
1403 1470
1404 BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); 1471 BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
1472
1405 read_lock(&tasklist_lock); 1473 read_lock(&tasklist_lock);
1474 /* Since it_lock is held, p->sighand cannot be NULL. */
1406 spin_lock_irqsave(&p->sighand->siglock, flags); 1475 spin_lock_irqsave(&p->sighand->siglock, flags);
1407 handle_stop_signal(sig, p); 1476 handle_stop_signal(sig, p);
1408 1477
@@ -1436,7 +1505,7 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
1436out: 1505out:
1437 spin_unlock_irqrestore(&p->sighand->siglock, flags); 1506 spin_unlock_irqrestore(&p->sighand->siglock, flags);
1438 read_unlock(&tasklist_lock); 1507 read_unlock(&tasklist_lock);
1439 return(ret); 1508 return ret;
1440} 1509}
1441 1510
1442/* 1511/*