aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2008-02-08 07:19:14 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-08 12:22:27 -0500
commit161550d74c07303ffa6187ba776f62df5a906a21 (patch)
treec47ece377d76141ba48b3e7ffd2d7dbd31c906e1 /kernel
parent5dee1707dfbfc55eb7569b9ae5abaf932bd4c377 (diff)
pid: sys_wait... fixes
This modifies do_wait and eligible child to take a pair of enum pid_type and struct pid *pid to precisely specify what set of processes are eligible to be waited for, instead of the raw pid_t value from sys_wait4. This fixes a bug in sys_waitid where you could not wait for children in just process group 1. This fixes a pid namespace crossing case in eligible_child. Allowing us to wait for a processes in our current process group even if our current process group == 0. This allows the no child with this pid case to be optimized. This allows us to optimize the pid membership test in eligible child to be optimized. This even closes a theoretical pid wraparound race where in a threaded parent if two threads are waiting for the same child and one thread picks up the child and the pid numbers wrap around and generate another child with that same pid before the other thread is scheduled (teribly insanely unlikely) we could end up waiting on the second child with the same pid# and not discover that the specific child we were waiting for has exited. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Cc: Oleg Nesterov <oleg@tv-sign.ru> Cc: Pavel Emelyanov <xemul@openvz.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c80
1 files changed, 56 insertions, 24 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 2b332d170327..2567de3487bd 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1090,20 +1090,23 @@ asmlinkage void sys_exit_group(int error_code)
1090 do_group_exit((error_code & 0xff) << 8); 1090 do_group_exit((error_code & 0xff) << 8);
1091} 1091}
1092 1092
1093static int eligible_child(pid_t pid, int options, struct task_struct *p) 1093static struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
1094{
1095 struct pid *pid = NULL;
1096 if (type == PIDTYPE_PID)
1097 pid = task->pids[type].pid;
1098 else if (type < PIDTYPE_MAX)
1099 pid = task->group_leader->pids[type].pid;
1100 return pid;
1101}
1102
1103static int eligible_child(enum pid_type type, struct pid *pid, int options,
1104 struct task_struct *p)
1094{ 1105{
1095 int err; 1106 int err;
1096 struct pid_namespace *ns;
1097 1107
1098 ns = current->nsproxy->pid_ns; 1108 if (type < PIDTYPE_MAX) {
1099 if (pid > 0) { 1109 if (task_pid_type(p, type) != pid)
1100 if (task_pid_nr_ns(p, ns) != pid)
1101 return 0;
1102 } else if (!pid) {
1103 if (task_pgrp_nr_ns(p, ns) != task_pgrp_vnr(current))
1104 return 0;
1105 } else if (pid != -1) {
1106 if (task_pgrp_nr_ns(p, ns) != -pid)
1107 return 0; 1110 return 0;
1108 } 1111 }
1109 1112
@@ -1127,7 +1130,7 @@ static int eligible_child(pid_t pid, int options, struct task_struct *p)
1127 if (likely(!err)) 1130 if (likely(!err))
1128 return 1; 1131 return 1;
1129 1132
1130 if (pid <= 0) 1133 if (type != PIDTYPE_PID)
1131 return 0; 1134 return 0;
1132 /* This child was explicitly requested, abort */ 1135 /* This child was explicitly requested, abort */
1133 read_unlock(&tasklist_lock); 1136 read_unlock(&tasklist_lock);
@@ -1447,8 +1450,9 @@ static int wait_task_continued(struct task_struct *p, int noreap,
1447 return retval; 1450 return retval;
1448} 1451}
1449 1452
1450static long do_wait(pid_t pid, int options, struct siginfo __user *infop, 1453static long do_wait(enum pid_type type, struct pid *pid, int options,
1451 int __user *stat_addr, struct rusage __user *ru) 1454 struct siginfo __user *infop, int __user *stat_addr,
1455 struct rusage __user *ru)
1452{ 1456{
1453 DECLARE_WAITQUEUE(wait, current); 1457 DECLARE_WAITQUEUE(wait, current);
1454 struct task_struct *tsk; 1458 struct task_struct *tsk;
@@ -1456,6 +1460,11 @@ static long do_wait(pid_t pid, int options, struct siginfo __user *infop,
1456 1460
1457 add_wait_queue(&current->signal->wait_chldexit,&wait); 1461 add_wait_queue(&current->signal->wait_chldexit,&wait);
1458repeat: 1462repeat:
1463 /* If there is nothing that can match our critier just get out */
1464 retval = -ECHILD;
1465 if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type])))
1466 goto end;
1467
1459 /* 1468 /*
1460 * We will set this flag if we see any child that might later 1469 * We will set this flag if we see any child that might later
1461 * match our criteria, even if we are not able to reap it yet. 1470 * match our criteria, even if we are not able to reap it yet.
@@ -1468,7 +1477,7 @@ repeat:
1468 struct task_struct *p; 1477 struct task_struct *p;
1469 1478
1470 list_for_each_entry(p, &tsk->children, sibling) { 1479 list_for_each_entry(p, &tsk->children, sibling) {
1471 int ret = eligible_child(pid, options, p); 1480 int ret = eligible_child(type, pid, options, p);
1472 if (!ret) 1481 if (!ret)
1473 continue; 1482 continue;
1474 1483
@@ -1515,7 +1524,7 @@ repeat:
1515 if (!flag) { 1524 if (!flag) {
1516 list_for_each_entry(p, &tsk->ptrace_children, 1525 list_for_each_entry(p, &tsk->ptrace_children,
1517 ptrace_list) { 1526 ptrace_list) {
1518 flag = eligible_child(pid, options, p); 1527 flag = eligible_child(type, pid, options, p);
1519 if (!flag) 1528 if (!flag)
1520 continue; 1529 continue;
1521 if (likely(flag > 0)) 1530 if (likely(flag > 0))
@@ -1570,10 +1579,12 @@ end:
1570 return retval; 1579 return retval;
1571} 1580}
1572 1581
1573asmlinkage long sys_waitid(int which, pid_t pid, 1582asmlinkage long sys_waitid(int which, pid_t upid,
1574 struct siginfo __user *infop, int options, 1583 struct siginfo __user *infop, int options,
1575 struct rusage __user *ru) 1584 struct rusage __user *ru)
1576{ 1585{
1586 struct pid *pid = NULL;
1587 enum pid_type type;
1577 long ret; 1588 long ret;
1578 1589
1579 if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED)) 1590 if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED))
@@ -1583,37 +1594,58 @@ asmlinkage long sys_waitid(int which, pid_t pid,
1583 1594
1584 switch (which) { 1595 switch (which) {
1585 case P_ALL: 1596 case P_ALL:
1586 pid = -1; 1597 type = PIDTYPE_MAX;
1587 break; 1598 break;
1588 case P_PID: 1599 case P_PID:
1589 if (pid <= 0) 1600 type = PIDTYPE_PID;
1601 if (upid <= 0)
1590 return -EINVAL; 1602 return -EINVAL;
1591 break; 1603 break;
1592 case P_PGID: 1604 case P_PGID:
1593 if (pid <= 0) 1605 type = PIDTYPE_PGID;
1606 if (upid <= 0)
1594 return -EINVAL; 1607 return -EINVAL;
1595 pid = -pid;
1596 break; 1608 break;
1597 default: 1609 default:
1598 return -EINVAL; 1610 return -EINVAL;
1599 } 1611 }
1600 1612
1601 ret = do_wait(pid, options, infop, NULL, ru); 1613 if (type < PIDTYPE_MAX)
1614 pid = find_get_pid(upid);
1615 ret = do_wait(type, pid, options, infop, NULL, ru);
1616 put_pid(pid);
1602 1617
1603 /* avoid REGPARM breakage on x86: */ 1618 /* avoid REGPARM breakage on x86: */
1604 prevent_tail_call(ret); 1619 prevent_tail_call(ret);
1605 return ret; 1620 return ret;
1606} 1621}
1607 1622
1608asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr, 1623asmlinkage long sys_wait4(pid_t upid, int __user *stat_addr,
1609 int options, struct rusage __user *ru) 1624 int options, struct rusage __user *ru)
1610{ 1625{
1626 struct pid *pid = NULL;
1627 enum pid_type type;
1611 long ret; 1628 long ret;
1612 1629
1613 if (options & ~(WNOHANG|WUNTRACED|WCONTINUED| 1630 if (options & ~(WNOHANG|WUNTRACED|WCONTINUED|
1614 __WNOTHREAD|__WCLONE|__WALL)) 1631 __WNOTHREAD|__WCLONE|__WALL))
1615 return -EINVAL; 1632 return -EINVAL;
1616 ret = do_wait(pid, options | WEXITED, NULL, stat_addr, ru); 1633
1634 if (upid == -1)
1635 type = PIDTYPE_MAX;
1636 else if (upid < 0) {
1637 type = PIDTYPE_PGID;
1638 pid = find_get_pid(-upid);
1639 } else if (upid == 0) {
1640 type = PIDTYPE_PGID;
1641 pid = get_pid(task_pgrp(current));
1642 } else /* upid > 0 */ {
1643 type = PIDTYPE_PID;
1644 pid = find_get_pid(upid);
1645 }
1646
1647 ret = do_wait(type, pid, options | WEXITED, NULL, stat_addr, ru);
1648 put_pid(pid);
1617 1649
1618 /* avoid REGPARM breakage on x86: */ 1650 /* avoid REGPARM breakage on x86: */
1619 prevent_tail_call(ret); 1651 prevent_tail_call(ret);