diff options
author | Ingo Molnar <mingo@elte.hu> | 2006-01-08 04:01:37 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-01-08 23:13:40 -0500 |
commit | e56d090310d7625ecb43a1eeebd479f04affb48b (patch) | |
tree | 2f479215dff4a2d8f3a9ed85200a5bc4f51534be /kernel | |
parent | 4369ef3c3e9d3bd9b879580678778f558d481e90 (diff) |
[PATCH] RCU signal handling
RCU tasklist_lock and RCU signal handling: send signals RCU-read-locked
instead of tasklist_lock read-locked. This is a scalability improvement on
SMP and a preemption-latency improvement under PREEMPT_RCU.
Signed-off-by: Paul E. McKenney <paulmck@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: William Irwin <wli@holomorphy.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/exit.c | 1 | ||||
-rw-r--r-- | kernel/fork.c | 10 | ||||
-rw-r--r-- | kernel/pid.c | 22 | ||||
-rw-r--r-- | kernel/rcupdate.c | 1 | ||||
-rw-r--r-- | kernel/sched.c | 7 | ||||
-rw-r--r-- | kernel/signal.c | 97 |
6 files changed, 111 insertions, 27 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index ee515683b92d..c73a7eb26de3 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -72,7 +72,6 @@ repeat: | |||
72 | __ptrace_unlink(p); | 72 | __ptrace_unlink(p); |
73 | BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); | 73 | BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); |
74 | __exit_signal(p); | 74 | __exit_signal(p); |
75 | __exit_sighand(p); | ||
76 | /* | 75 | /* |
77 | * Note that the fastpath in sys_times depends on __exit_signal having | 76 | * Note that the fastpath in sys_times depends on __exit_signal having |
78 | * updated the counters before a task is removed from the tasklist of | 77 | * updated the counters before a task is removed from the tasklist of |
diff --git a/kernel/fork.c b/kernel/fork.c index fb8572a42297..7fe3adfa65cb 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -743,6 +743,14 @@ int unshare_files(void) | |||
743 | 743 | ||
744 | EXPORT_SYMBOL(unshare_files); | 744 | EXPORT_SYMBOL(unshare_files); |
745 | 745 | ||
746 | void sighand_free_cb(struct rcu_head *rhp) | ||
747 | { | ||
748 | struct sighand_struct *sp; | ||
749 | |||
750 | sp = container_of(rhp, struct sighand_struct, rcu); | ||
751 | kmem_cache_free(sighand_cachep, sp); | ||
752 | } | ||
753 | |||
746 | static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk) | 754 | static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk) |
747 | { | 755 | { |
748 | struct sighand_struct *sig; | 756 | struct sighand_struct *sig; |
@@ -752,7 +760,7 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t | |||
752 | return 0; | 760 | return 0; |
753 | } | 761 | } |
754 | sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); | 762 | sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); |
755 | tsk->sighand = sig; | 763 | rcu_assign_pointer(tsk->sighand, sig); |
756 | if (!sig) | 764 | if (!sig) |
757 | return -ENOMEM; | 765 | return -ENOMEM; |
758 | spin_lock_init(&sig->siglock); | 766 | spin_lock_init(&sig->siglock); |
diff --git a/kernel/pid.c b/kernel/pid.c index edba31c681ac..1acc07246991 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -136,7 +136,7 @@ struct pid * fastcall find_pid(enum pid_type type, int nr) | |||
136 | struct hlist_node *elem; | 136 | struct hlist_node *elem; |
137 | struct pid *pid; | 137 | struct pid *pid; |
138 | 138 | ||
139 | hlist_for_each_entry(pid, elem, | 139 | hlist_for_each_entry_rcu(pid, elem, |
140 | &pid_hash[type][pid_hashfn(nr)], pid_chain) { | 140 | &pid_hash[type][pid_hashfn(nr)], pid_chain) { |
141 | if (pid->nr == nr) | 141 | if (pid->nr == nr) |
142 | return pid; | 142 | return pid; |
@@ -150,15 +150,15 @@ int fastcall attach_pid(task_t *task, enum pid_type type, int nr) | |||
150 | 150 | ||
151 | task_pid = &task->pids[type]; | 151 | task_pid = &task->pids[type]; |
152 | pid = find_pid(type, nr); | 152 | pid = find_pid(type, nr); |
153 | task_pid->nr = nr; | ||
153 | if (pid == NULL) { | 154 | if (pid == NULL) { |
154 | hlist_add_head(&task_pid->pid_chain, | ||
155 | &pid_hash[type][pid_hashfn(nr)]); | ||
156 | INIT_LIST_HEAD(&task_pid->pid_list); | 155 | INIT_LIST_HEAD(&task_pid->pid_list); |
156 | hlist_add_head_rcu(&task_pid->pid_chain, | ||
157 | &pid_hash[type][pid_hashfn(nr)]); | ||
157 | } else { | 158 | } else { |
158 | INIT_HLIST_NODE(&task_pid->pid_chain); | 159 | INIT_HLIST_NODE(&task_pid->pid_chain); |
159 | list_add_tail(&task_pid->pid_list, &pid->pid_list); | 160 | list_add_tail_rcu(&task_pid->pid_list, &pid->pid_list); |
160 | } | 161 | } |
161 | task_pid->nr = nr; | ||
162 | 162 | ||
163 | return 0; | 163 | return 0; |
164 | } | 164 | } |
@@ -170,20 +170,20 @@ static fastcall int __detach_pid(task_t *task, enum pid_type type) | |||
170 | 170 | ||
171 | pid = &task->pids[type]; | 171 | pid = &task->pids[type]; |
172 | if (!hlist_unhashed(&pid->pid_chain)) { | 172 | if (!hlist_unhashed(&pid->pid_chain)) { |
173 | hlist_del(&pid->pid_chain); | ||
174 | 173 | ||
175 | if (list_empty(&pid->pid_list)) | 174 | if (list_empty(&pid->pid_list)) { |
176 | nr = pid->nr; | 175 | nr = pid->nr; |
177 | else { | 176 | hlist_del_rcu(&pid->pid_chain); |
177 | } else { | ||
178 | pid_next = list_entry(pid->pid_list.next, | 178 | pid_next = list_entry(pid->pid_list.next, |
179 | struct pid, pid_list); | 179 | struct pid, pid_list); |
180 | /* insert next pid from pid_list to hash */ | 180 | /* insert next pid from pid_list to hash */ |
181 | hlist_add_head(&pid_next->pid_chain, | 181 | hlist_replace_rcu(&pid->pid_chain, |
182 | &pid_hash[type][pid_hashfn(pid_next->nr)]); | 182 | &pid_next->pid_chain); |
183 | } | 183 | } |
184 | } | 184 | } |
185 | 185 | ||
186 | list_del(&pid->pid_list); | 186 | list_del_rcu(&pid->pid_list); |
187 | pid->nr = 0; | 187 | pid->nr = 0; |
188 | 188 | ||
189 | return nr; | 189 | return nr; |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index c9afc61240e4..0a669bd2f6d1 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/init.h> | 35 | #include <linux/init.h> |
36 | #include <linux/spinlock.h> | 36 | #include <linux/spinlock.h> |
37 | #include <linux/smp.h> | 37 | #include <linux/smp.h> |
38 | #include <linux/rcupdate.h> | ||
38 | #include <linux/interrupt.h> | 39 | #include <linux/interrupt.h> |
39 | #include <linux/sched.h> | 40 | #include <linux/sched.h> |
40 | #include <asm/atomic.h> | 41 | #include <asm/atomic.h> |
diff --git a/kernel/sched.c b/kernel/sched.c index 6f46c94cc29e..92733091154c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -176,6 +176,13 @@ static unsigned int task_timeslice(task_t *p) | |||
176 | #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \ | 176 | #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \ |
177 | < (long long) (sd)->cache_hot_time) | 177 | < (long long) (sd)->cache_hot_time) |
178 | 178 | ||
179 | void __put_task_struct_cb(struct rcu_head *rhp) | ||
180 | { | ||
181 | __put_task_struct(container_of(rhp, struct task_struct, rcu)); | ||
182 | } | ||
183 | |||
184 | EXPORT_SYMBOL_GPL(__put_task_struct_cb); | ||
185 | |||
179 | /* | 186 | /* |
180 | * These are the runqueue data structures: | 187 | * These are the runqueue data structures: |
181 | */ | 188 | */ |
diff --git a/kernel/signal.c b/kernel/signal.c index d7611f189ef7..64737c72dadd 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -329,13 +329,20 @@ void __exit_sighand(struct task_struct *tsk) | |||
329 | /* Ok, we're done with the signal handlers */ | 329 | /* Ok, we're done with the signal handlers */ |
330 | tsk->sighand = NULL; | 330 | tsk->sighand = NULL; |
331 | if (atomic_dec_and_test(&sighand->count)) | 331 | if (atomic_dec_and_test(&sighand->count)) |
332 | kmem_cache_free(sighand_cachep, sighand); | 332 | sighand_free(sighand); |
333 | } | 333 | } |
334 | 334 | ||
335 | void exit_sighand(struct task_struct *tsk) | 335 | void exit_sighand(struct task_struct *tsk) |
336 | { | 336 | { |
337 | write_lock_irq(&tasklist_lock); | 337 | write_lock_irq(&tasklist_lock); |
338 | __exit_sighand(tsk); | 338 | rcu_read_lock(); |
339 | if (tsk->sighand != NULL) { | ||
340 | struct sighand_struct *sighand = rcu_dereference(tsk->sighand); | ||
341 | spin_lock(&sighand->siglock); | ||
342 | __exit_sighand(tsk); | ||
343 | spin_unlock(&sighand->siglock); | ||
344 | } | ||
345 | rcu_read_unlock(); | ||
339 | write_unlock_irq(&tasklist_lock); | 346 | write_unlock_irq(&tasklist_lock); |
340 | } | 347 | } |
341 | 348 | ||
@@ -345,12 +352,14 @@ void exit_sighand(struct task_struct *tsk) | |||
345 | void __exit_signal(struct task_struct *tsk) | 352 | void __exit_signal(struct task_struct *tsk) |
346 | { | 353 | { |
347 | struct signal_struct * sig = tsk->signal; | 354 | struct signal_struct * sig = tsk->signal; |
348 | struct sighand_struct * sighand = tsk->sighand; | 355 | struct sighand_struct * sighand; |
349 | 356 | ||
350 | if (!sig) | 357 | if (!sig) |
351 | BUG(); | 358 | BUG(); |
352 | if (!atomic_read(&sig->count)) | 359 | if (!atomic_read(&sig->count)) |
353 | BUG(); | 360 | BUG(); |
361 | rcu_read_lock(); | ||
362 | sighand = rcu_dereference(tsk->sighand); | ||
354 | spin_lock(&sighand->siglock); | 363 | spin_lock(&sighand->siglock); |
355 | posix_cpu_timers_exit(tsk); | 364 | posix_cpu_timers_exit(tsk); |
356 | if (atomic_dec_and_test(&sig->count)) { | 365 | if (atomic_dec_and_test(&sig->count)) { |
@@ -358,6 +367,7 @@ void __exit_signal(struct task_struct *tsk) | |||
358 | if (tsk == sig->curr_target) | 367 | if (tsk == sig->curr_target) |
359 | sig->curr_target = next_thread(tsk); | 368 | sig->curr_target = next_thread(tsk); |
360 | tsk->signal = NULL; | 369 | tsk->signal = NULL; |
370 | __exit_sighand(tsk); | ||
361 | spin_unlock(&sighand->siglock); | 371 | spin_unlock(&sighand->siglock); |
362 | flush_sigqueue(&sig->shared_pending); | 372 | flush_sigqueue(&sig->shared_pending); |
363 | } else { | 373 | } else { |
@@ -389,9 +399,11 @@ void __exit_signal(struct task_struct *tsk) | |||
389 | sig->nvcsw += tsk->nvcsw; | 399 | sig->nvcsw += tsk->nvcsw; |
390 | sig->nivcsw += tsk->nivcsw; | 400 | sig->nivcsw += tsk->nivcsw; |
391 | sig->sched_time += tsk->sched_time; | 401 | sig->sched_time += tsk->sched_time; |
402 | __exit_sighand(tsk); | ||
392 | spin_unlock(&sighand->siglock); | 403 | spin_unlock(&sighand->siglock); |
393 | sig = NULL; /* Marker for below. */ | 404 | sig = NULL; /* Marker for below. */ |
394 | } | 405 | } |
406 | rcu_read_unlock(); | ||
395 | clear_tsk_thread_flag(tsk,TIF_SIGPENDING); | 407 | clear_tsk_thread_flag(tsk,TIF_SIGPENDING); |
396 | flush_sigqueue(&tsk->pending); | 408 | flush_sigqueue(&tsk->pending); |
397 | if (sig) { | 409 | if (sig) { |
@@ -1080,18 +1092,28 @@ void zap_other_threads(struct task_struct *p) | |||
1080 | } | 1092 | } |
1081 | 1093 | ||
1082 | /* | 1094 | /* |
1083 | * Must be called with the tasklist_lock held for reading! | 1095 | * Must be called under rcu_read_lock() or with tasklist_lock read-held. |
1084 | */ | 1096 | */ |
1085 | int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) | 1097 | int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) |
1086 | { | 1098 | { |
1087 | unsigned long flags; | 1099 | unsigned long flags; |
1100 | struct sighand_struct *sp; | ||
1088 | int ret; | 1101 | int ret; |
1089 | 1102 | ||
1103 | retry: | ||
1090 | ret = check_kill_permission(sig, info, p); | 1104 | ret = check_kill_permission(sig, info, p); |
1091 | if (!ret && sig && p->sighand) { | 1105 | if (!ret && sig && (sp = p->sighand)) { |
1092 | spin_lock_irqsave(&p->sighand->siglock, flags); | 1106 | if (!get_task_struct_rcu(p)) |
1107 | return -ESRCH; | ||
1108 | spin_lock_irqsave(&sp->siglock, flags); | ||
1109 | if (p->sighand != sp) { | ||
1110 | spin_unlock_irqrestore(&sp->siglock, flags); | ||
1111 | put_task_struct(p); | ||
1112 | goto retry; | ||
1113 | } | ||
1093 | ret = __group_send_sig_info(sig, info, p); | 1114 | ret = __group_send_sig_info(sig, info, p); |
1094 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | 1115 | spin_unlock_irqrestore(&sp->siglock, flags); |
1116 | put_task_struct(p); | ||
1095 | } | 1117 | } |
1096 | 1118 | ||
1097 | return ret; | 1119 | return ret; |
@@ -1136,14 +1158,21 @@ int | |||
1136 | kill_proc_info(int sig, struct siginfo *info, pid_t pid) | 1158 | kill_proc_info(int sig, struct siginfo *info, pid_t pid) |
1137 | { | 1159 | { |
1138 | int error; | 1160 | int error; |
1161 | int acquired_tasklist_lock = 0; | ||
1139 | struct task_struct *p; | 1162 | struct task_struct *p; |
1140 | 1163 | ||
1141 | read_lock(&tasklist_lock); | 1164 | rcu_read_lock(); |
1165 | if (unlikely(sig_kernel_stop(sig) || sig == SIGCONT)) { | ||
1166 | read_lock(&tasklist_lock); | ||
1167 | acquired_tasklist_lock = 1; | ||
1168 | } | ||
1142 | p = find_task_by_pid(pid); | 1169 | p = find_task_by_pid(pid); |
1143 | error = -ESRCH; | 1170 | error = -ESRCH; |
1144 | if (p) | 1171 | if (p) |
1145 | error = group_send_sig_info(sig, info, p); | 1172 | error = group_send_sig_info(sig, info, p); |
1146 | read_unlock(&tasklist_lock); | 1173 | if (unlikely(acquired_tasklist_lock)) |
1174 | read_unlock(&tasklist_lock); | ||
1175 | rcu_read_unlock(); | ||
1147 | return error; | 1176 | return error; |
1148 | } | 1177 | } |
1149 | 1178 | ||
@@ -1355,16 +1384,54 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) | |||
1355 | { | 1384 | { |
1356 | unsigned long flags; | 1385 | unsigned long flags; |
1357 | int ret = 0; | 1386 | int ret = 0; |
1387 | struct sighand_struct *sh; | ||
1358 | 1388 | ||
1359 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); | 1389 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); |
1360 | read_lock(&tasklist_lock); | 1390 | |
1391 | /* | ||
1392 | * The rcu based delayed sighand destroy makes it possible to | ||
1393 | * run this without tasklist lock held. The task struct itself | ||
1394 | * cannot go away as create_timer did get_task_struct(). | ||
1395 | * | ||
1396 | * We return -1, when the task is marked exiting, so | ||
1397 | * posix_timer_event can redirect it to the group leader | ||
1398 | */ | ||
1399 | rcu_read_lock(); | ||
1361 | 1400 | ||
1362 | if (unlikely(p->flags & PF_EXITING)) { | 1401 | if (unlikely(p->flags & PF_EXITING)) { |
1363 | ret = -1; | 1402 | ret = -1; |
1364 | goto out_err; | 1403 | goto out_err; |
1365 | } | 1404 | } |
1366 | 1405 | ||
1367 | spin_lock_irqsave(&p->sighand->siglock, flags); | 1406 | retry: |
1407 | sh = rcu_dereference(p->sighand); | ||
1408 | |||
1409 | spin_lock_irqsave(&sh->siglock, flags); | ||
1410 | if (p->sighand != sh) { | ||
1411 | /* We raced with exec() in a multithreaded process... */ | ||
1412 | spin_unlock_irqrestore(&sh->siglock, flags); | ||
1413 | goto retry; | ||
1414 | } | ||
1415 | |||
1416 | /* | ||
1417 | * We do the check here again to handle the following scenario: | ||
1418 | * | ||
1419 | * CPU 0 CPU 1 | ||
1420 | * send_sigqueue | ||
1421 | * check PF_EXITING | ||
1422 | * interrupt exit code running | ||
1423 | * __exit_signal | ||
1424 | * lock sighand->siglock | ||
1425 | * unlock sighand->siglock | ||
1426 | * lock sh->siglock | ||
1427 | * add(tsk->pending) flush_sigqueue(tsk->pending) | ||
1428 | * | ||
1429 | */ | ||
1430 | |||
1431 | if (unlikely(p->flags & PF_EXITING)) { | ||
1432 | ret = -1; | ||
1433 | goto out; | ||
1434 | } | ||
1368 | 1435 | ||
1369 | if (unlikely(!list_empty(&q->list))) { | 1436 | if (unlikely(!list_empty(&q->list))) { |
1370 | /* | 1437 | /* |
@@ -1388,9 +1455,9 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) | |||
1388 | signal_wake_up(p, sig == SIGKILL); | 1455 | signal_wake_up(p, sig == SIGKILL); |
1389 | 1456 | ||
1390 | out: | 1457 | out: |
1391 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | 1458 | spin_unlock_irqrestore(&sh->siglock, flags); |
1392 | out_err: | 1459 | out_err: |
1393 | read_unlock(&tasklist_lock); | 1460 | rcu_read_unlock(); |
1394 | 1461 | ||
1395 | return ret; | 1462 | return ret; |
1396 | } | 1463 | } |
@@ -1402,7 +1469,9 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) | |||
1402 | int ret = 0; | 1469 | int ret = 0; |
1403 | 1470 | ||
1404 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); | 1471 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); |
1472 | |||
1405 | read_lock(&tasklist_lock); | 1473 | read_lock(&tasklist_lock); |
1474 | /* Since it_lock is held, p->sighand cannot be NULL. */ | ||
1406 | spin_lock_irqsave(&p->sighand->siglock, flags); | 1475 | spin_lock_irqsave(&p->sighand->siglock, flags); |
1407 | handle_stop_signal(sig, p); | 1476 | handle_stop_signal(sig, p); |
1408 | 1477 | ||
@@ -1436,7 +1505,7 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) | |||
1436 | out: | 1505 | out: |
1437 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | 1506 | spin_unlock_irqrestore(&p->sighand->siglock, flags); |
1438 | read_unlock(&tasklist_lock); | 1507 | read_unlock(&tasklist_lock); |
1439 | return(ret); | 1508 | return ret; |
1440 | } | 1509 | } |
1441 | 1510 | ||
1442 | /* | 1511 | /* |