diff options
author | Oleg Nesterov <oleg@tv-sign.ru> | 2008-02-08 07:19:01 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-08 12:22:26 -0500 |
commit | ee7c82da830ea860b1f9274f1f0cdf99f206e7c2 (patch) | |
tree | 449477479b25ec73a1c2242723b3fb1c09cd936d /kernel/exit.c | |
parent | 6405f7f4675884b671bee66678e1c2859bdb0e56 (diff) |
wait_task_stopped: simplify and fix races with SIGCONT/SIGKILL/untrace
wait_task_stopped() has multiple races with SIGCONT/SIGKILL. tasklist_lock
does not pin the child in TASK_TRACED/TASK_STOPPED stated, almost all info
reported (including exit_code) may be wrong.
In fact, the code under write_lock_irq(tasklist_lock) is not safe. The child
may be PTRACE_DETACH'ed at this time by another subthread, in that case it is
possible we are no longer its ->parent.
Change wait_task_stopped() to take ->siglock before inspecting the task. This
guarantees that the child can't resume and (for example) clear its
->exit_code, so we don't need to use xchg(&p->exit_code) and re-check. The
only exception is ptrace_stop() which changes ->state and ->exit_code without
->siglock held during abort. But this can only happen if both the tracer and
the tracee are dying (coredump is in progress), we don't care.
With this patch wait_task_stopped() doesn't move the child to the end of
the ->parent list on success. This optimization could be restored, but
in that case we have to take write_lock(tasklist) and do some nasty
checks.
Also change the do_wait() since we don't return EAGAIN any longer.
[akpm@linux-foundation.org: fix up after Willy renamed everything]
Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/exit.c')
-rw-r--r-- | kernel/exit.c | 88 |
1 files changed, 26 insertions, 62 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index b5ff2b121093..da293ac7e379 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -1355,17 +1355,35 @@ static int wait_task_stopped(struct task_struct *p, int delayed_group_leader, | |||
1355 | int noreap, struct siginfo __user *infop, | 1355 | int noreap, struct siginfo __user *infop, |
1356 | int __user *stat_addr, struct rusage __user *ru) | 1356 | int __user *stat_addr, struct rusage __user *ru) |
1357 | { | 1357 | { |
1358 | int retval, exit_code; | 1358 | int retval, exit_code, why; |
1359 | uid_t uid = 0; /* unneeded, required by compiler */ | ||
1359 | pid_t pid; | 1360 | pid_t pid; |
1360 | 1361 | ||
1361 | if (!p->exit_code) | 1362 | exit_code = 0; |
1362 | return 0; | 1363 | spin_lock_irq(&p->sighand->siglock); |
1364 | |||
1365 | if (unlikely(!task_is_stopped_or_traced(p))) | ||
1366 | goto unlock_sig; | ||
1367 | |||
1363 | if (delayed_group_leader && !(p->ptrace & PT_PTRACED) && | 1368 | if (delayed_group_leader && !(p->ptrace & PT_PTRACED) && |
1364 | p->signal->group_stop_count > 0) | 1369 | p->signal->group_stop_count > 0) |
1365 | /* | 1370 | /* |
1366 | * A group stop is in progress and this is the group leader. | 1371 | * A group stop is in progress and this is the group leader. |
1367 | * We won't report until all threads have stopped. | 1372 | * We won't report until all threads have stopped. |
1368 | */ | 1373 | */ |
1374 | goto unlock_sig; | ||
1375 | |||
1376 | exit_code = p->exit_code; | ||
1377 | if (!exit_code) | ||
1378 | goto unlock_sig; | ||
1379 | |||
1380 | if (!noreap) | ||
1381 | p->exit_code = 0; | ||
1382 | |||
1383 | uid = p->uid; | ||
1384 | unlock_sig: | ||
1385 | spin_unlock_irq(&p->sighand->siglock); | ||
1386 | if (!exit_code) | ||
1369 | return 0; | 1387 | return 0; |
1370 | 1388 | ||
1371 | /* | 1389 | /* |
@@ -1375,65 +1393,15 @@ static int wait_task_stopped(struct task_struct *p, int delayed_group_leader, | |||
1375 | * keep holding onto the tasklist_lock while we call getrusage and | 1393 | * keep holding onto the tasklist_lock while we call getrusage and |
1376 | * possibly take page faults for user memory. | 1394 | * possibly take page faults for user memory. |
1377 | */ | 1395 | */ |
1378 | pid = task_pid_nr_ns(p, current->nsproxy->pid_ns); | ||
1379 | get_task_struct(p); | 1396 | get_task_struct(p); |
1397 | pid = task_pid_nr_ns(p, current->nsproxy->pid_ns); | ||
1398 | why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED; | ||
1380 | read_unlock(&tasklist_lock); | 1399 | read_unlock(&tasklist_lock); |
1381 | 1400 | ||
1382 | if (unlikely(noreap)) { | 1401 | if (unlikely(noreap)) |
1383 | uid_t uid = p->uid; | ||
1384 | int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED; | ||
1385 | |||
1386 | exit_code = p->exit_code; | ||
1387 | if (unlikely(!exit_code) || unlikely(p->exit_state)) | ||
1388 | goto bail_ref; | ||
1389 | return wait_noreap_copyout(p, pid, uid, | 1402 | return wait_noreap_copyout(p, pid, uid, |
1390 | why, exit_code, | 1403 | why, exit_code, |
1391 | infop, ru); | 1404 | infop, ru); |
1392 | } | ||
1393 | |||
1394 | write_lock_irq(&tasklist_lock); | ||
1395 | |||
1396 | /* | ||
1397 | * This uses xchg to be atomic with the thread resuming and setting | ||
1398 | * it. It must also be done with the write lock held to prevent a | ||
1399 | * race with the EXIT_ZOMBIE case. | ||
1400 | */ | ||
1401 | exit_code = xchg(&p->exit_code, 0); | ||
1402 | if (unlikely(p->exit_state)) { | ||
1403 | /* | ||
1404 | * The task resumed and then died. Let the next iteration | ||
1405 | * catch it in EXIT_ZOMBIE. Note that exit_code might | ||
1406 | * already be zero here if it resumed and did _exit(0). | ||
1407 | * The task itself is dead and won't touch exit_code again; | ||
1408 | * other processors in this function are locked out. | ||
1409 | */ | ||
1410 | p->exit_code = exit_code; | ||
1411 | exit_code = 0; | ||
1412 | } | ||
1413 | if (unlikely(exit_code == 0)) { | ||
1414 | /* | ||
1415 | * Another thread in this function got to it first, or it | ||
1416 | * resumed, or it resumed and then died. | ||
1417 | */ | ||
1418 | write_unlock_irq(&tasklist_lock); | ||
1419 | bail_ref: | ||
1420 | put_task_struct(p); | ||
1421 | /* | ||
1422 | * We are returning to the wait loop without having successfully | ||
1423 | * removed the process and having released the lock. We cannot | ||
1424 | * continue, since the "p" task pointer is potentially stale. | ||
1425 | * | ||
1426 | * Return -EAGAIN, and do_wait() will restart the loop from the | ||
1427 | * beginning. Do _not_ re-acquire the lock. | ||
1428 | */ | ||
1429 | return -EAGAIN; | ||
1430 | } | ||
1431 | |||
1432 | /* move to end of parent's list to avoid starvation */ | ||
1433 | remove_parent(p); | ||
1434 | add_parent(p); | ||
1435 | |||
1436 | write_unlock_irq(&tasklist_lock); | ||
1437 | 1405 | ||
1438 | retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; | 1406 | retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; |
1439 | if (!retval && stat_addr) | 1407 | if (!retval && stat_addr) |
@@ -1443,15 +1411,13 @@ bail_ref: | |||
1443 | if (!retval && infop) | 1411 | if (!retval && infop) |
1444 | retval = put_user(0, &infop->si_errno); | 1412 | retval = put_user(0, &infop->si_errno); |
1445 | if (!retval && infop) | 1413 | if (!retval && infop) |
1446 | retval = put_user((short)((p->ptrace & PT_PTRACED) | 1414 | retval = put_user(why, &infop->si_code); |
1447 | ? CLD_TRAPPED : CLD_STOPPED), | ||
1448 | &infop->si_code); | ||
1449 | if (!retval && infop) | 1415 | if (!retval && infop) |
1450 | retval = put_user(exit_code, &infop->si_status); | 1416 | retval = put_user(exit_code, &infop->si_status); |
1451 | if (!retval && infop) | 1417 | if (!retval && infop) |
1452 | retval = put_user(pid, &infop->si_pid); | 1418 | retval = put_user(pid, &infop->si_pid); |
1453 | if (!retval && infop) | 1419 | if (!retval && infop) |
1454 | retval = put_user(p->uid, &infop->si_uid); | 1420 | retval = put_user(uid, &infop->si_uid); |
1455 | if (!retval) | 1421 | if (!retval) |
1456 | retval = pid; | 1422 | retval = pid; |
1457 | put_task_struct(p); | 1423 | put_task_struct(p); |
@@ -1558,8 +1524,6 @@ repeat: | |||
1558 | retval = wait_task_stopped(p, ret == 2, | 1524 | retval = wait_task_stopped(p, ret == 2, |
1559 | (options & WNOWAIT), infop, | 1525 | (options & WNOWAIT), infop, |
1560 | stat_addr, ru); | 1526 | stat_addr, ru); |
1561 | if (retval == -EAGAIN) | ||
1562 | goto repeat; | ||
1563 | if (retval != 0) /* He released the lock. */ | 1527 | if (retval != 0) /* He released the lock. */ |
1564 | goto end; | 1528 | goto end; |
1565 | } else if (p->exit_state == EXIT_ZOMBIE) { | 1529 | } else if (p->exit_state == EXIT_ZOMBIE) { |