aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sys.c
diff options
context:
space:
mode:
authorRavikiran G Thirumalai <kiran@scalex86.org>2006-03-23 06:00:13 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-23 10:38:09 -0500
commit2dd0ebcd2ab7b18a50c0810ddb45a84316e4ee2e (patch)
tree17518ebe86c2b701aa4a74fcac025c1793a87a87 /kernel/sys.c
parent0c9e63fd38a2fb2181668a0cdd622a3c23cfd567 (diff)
[PATCH] Avoid taking global tasklist_lock for single threadedprocess at getrusage()
Avoid taking the global tasklist_lock when possible, if a process is single threaded during getrusage(). Any avoidance of tasklist_lock is good for NUMA boxes (and possibly for large SMPs). Thanks to Oleg Nesterov for review and suggestions. Signed-off-by: Nippun Goel <nippung@calsoftinc.com> Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org> Signed-off-by: Shai Fultheim <shai@scalex86.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel/sys.c')
-rw-r--r--kernel/sys.c42
1 files changed, 34 insertions, 8 deletions
diff --git a/kernel/sys.c b/kernel/sys.c
index f91218a5463e..4941b9b14b97 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1677,9 +1677,6 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
1677 * a lot simpler! (Which we're not doing right now because we're not 1677 * a lot simpler! (Which we're not doing right now because we're not
1678 * measuring them yet). 1678 * measuring them yet).
1679 * 1679 *
1680 * This expects to be called with tasklist_lock read-locked or better,
1681 * and the siglock not locked. It may momentarily take the siglock.
1682 *
1683 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have 1680 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have
1684 * races with threads incrementing their own counters. But since word 1681 * races with threads incrementing their own counters. But since word
1685 * reads are atomic, we either get new values or old values and we don't 1682 * reads are atomic, we either get new values or old values and we don't
@@ -1687,6 +1684,25 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
1687 * the c* fields from p->signal from races with exit.c updating those 1684 * the c* fields from p->signal from races with exit.c updating those
1688 * fields when reaping, so a sample either gets all the additions of a 1685 * fields when reaping, so a sample either gets all the additions of a
1689 * given child after it's reaped, or none so this sample is before reaping. 1686 * given child after it's reaped, or none so this sample is before reaping.
1687 *
1688 * tasklist_lock locking optimisation:
1689 * If we are current and single threaded, we do not need to take the tasklist
1690 * lock or the siglock. No one else can take our signal_struct away,
1691 * no one else can reap the children to update signal->c* counters, and
1692 * no one else can race with the signal-> fields.
1693 * If we do not take the tasklist_lock, the signal-> fields could be read
1694 * out of order while another thread was just exiting. So we place a
1695 * read memory barrier when we avoid the lock. On the writer side,
1696 * write memory barrier is implied in __exit_signal as __exit_signal releases
1697 * the siglock spinlock after updating the signal-> fields.
1698 *
1699 * We don't really need the siglock when we access the non c* fields
1700 * of the signal_struct (for RUSAGE_SELF) even in multithreaded
1701 * case, since we take the tasklist lock for read and the non c* signal->
1702 * fields are updated only in __exit_signal, which is called with
1703 * tasklist_lock taken for write, hence these two threads cannot execute
1704 * concurrently.
1705 *
1690 */ 1706 */
1691 1707
1692static void k_getrusage(struct task_struct *p, int who, struct rusage *r) 1708static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
@@ -1694,13 +1710,23 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1694 struct task_struct *t; 1710 struct task_struct *t;
1695 unsigned long flags; 1711 unsigned long flags;
1696 cputime_t utime, stime; 1712 cputime_t utime, stime;
1713 int need_lock = 0;
1697 1714
1698 memset((char *) r, 0, sizeof *r); 1715 memset((char *) r, 0, sizeof *r);
1716 utime = stime = cputime_zero;
1699 1717
1700 if (unlikely(!p->signal)) 1718 if (p != current || !thread_group_empty(p))
1701 return; 1719 need_lock = 1;
1702 1720
1703 utime = stime = cputime_zero; 1721 if (need_lock) {
1722 read_lock(&tasklist_lock);
1723 if (unlikely(!p->signal)) {
1724 read_unlock(&tasklist_lock);
1725 return;
1726 }
1727 } else
1728 /* See locking comments above */
1729 smp_rmb();
1704 1730
1705 switch (who) { 1731 switch (who) {
1706 case RUSAGE_BOTH: 1732 case RUSAGE_BOTH:
@@ -1740,6 +1766,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1740 BUG(); 1766 BUG();
1741 } 1767 }
1742 1768
1769 if (need_lock)
1770 read_unlock(&tasklist_lock);
1743 cputime_to_timeval(utime, &r->ru_utime); 1771 cputime_to_timeval(utime, &r->ru_utime);
1744 cputime_to_timeval(stime, &r->ru_stime); 1772 cputime_to_timeval(stime, &r->ru_stime);
1745} 1773}
@@ -1747,9 +1775,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1747int getrusage(struct task_struct *p, int who, struct rusage __user *ru) 1775int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
1748{ 1776{
1749 struct rusage r; 1777 struct rusage r;
1750 read_lock(&tasklist_lock);
1751 k_getrusage(p, who, &r); 1778 k_getrusage(p, who, &r);
1752 read_unlock(&tasklist_lock);
1753 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; 1779 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
1754} 1780}
1755 1781