diff options
author | Ravikiran G Thirumalai <kiran@scalex86.org> | 2006-03-23 06:00:13 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-23 10:38:09 -0500 |
commit | 2dd0ebcd2ab7b18a50c0810ddb45a84316e4ee2e (patch) | |
tree | 17518ebe86c2b701aa4a74fcac025c1793a87a87 | |
parent | 0c9e63fd38a2fb2181668a0cdd622a3c23cfd567 (diff) |
[PATCH] Avoid taking global tasklist_lock for single threadedprocess at getrusage()
Avoid taking the global tasklist_lock when possible, if a process is single
threaded during getrusage(). Any avoidance of tasklist_lock is good for
NUMA boxes (and possibly for large SMPs). Thanks to Oleg Nesterov for
review and suggestions.
Signed-off-by: Nippun Goel <nippung@calsoftinc.com>
Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org>
Signed-off-by: Shai Fultheim <shai@scalex86.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | kernel/sys.c | 42 |
1 files changed, 34 insertions, 8 deletions
diff --git a/kernel/sys.c b/kernel/sys.c index f91218a5463e..4941b9b14b97 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1677,9 +1677,6 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) | |||
1677 | * a lot simpler! (Which we're not doing right now because we're not | 1677 | * a lot simpler! (Which we're not doing right now because we're not |
1678 | * measuring them yet). | 1678 | * measuring them yet). |
1679 | * | 1679 | * |
1680 | * This expects to be called with tasklist_lock read-locked or better, | ||
1681 | * and the siglock not locked. It may momentarily take the siglock. | ||
1682 | * | ||
1683 | * When sampling multiple threads for RUSAGE_SELF, under SMP we might have | 1680 | * When sampling multiple threads for RUSAGE_SELF, under SMP we might have |
1684 | * races with threads incrementing their own counters. But since word | 1681 | * races with threads incrementing their own counters. But since word |
1685 | * reads are atomic, we either get new values or old values and we don't | 1682 | * reads are atomic, we either get new values or old values and we don't |
@@ -1687,6 +1684,25 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) | |||
1687 | * the c* fields from p->signal from races with exit.c updating those | 1684 | * the c* fields from p->signal from races with exit.c updating those |
1688 | * fields when reaping, so a sample either gets all the additions of a | 1685 | * fields when reaping, so a sample either gets all the additions of a |
1689 | * given child after it's reaped, or none so this sample is before reaping. | 1686 | * given child after it's reaped, or none so this sample is before reaping. |
1687 | * | ||
1688 | * tasklist_lock locking optimisation: | ||
1689 | * If we are current and single threaded, we do not need to take the tasklist | ||
1690 | * lock or the siglock. No one else can take our signal_struct away, | ||
1691 | * no one else can reap the children to update signal->c* counters, and | ||
1692 | * no one else can race with the signal-> fields. | ||
1693 | * If we do not take the tasklist_lock, the signal-> fields could be read | ||
1694 | * out of order while another thread was just exiting. So we place a | ||
1695 | * read memory barrier when we avoid the lock. On the writer side, | ||
1696 | * write memory barrier is implied in __exit_signal as __exit_signal releases | ||
1697 | * the siglock spinlock after updating the signal-> fields. | ||
1698 | * | ||
1699 | * We don't really need the siglock when we access the non c* fields | ||
1700 | * of the signal_struct (for RUSAGE_SELF) even in multithreaded | ||
1701 | * case, since we take the tasklist lock for read and the non c* signal-> | ||
1702 | * fields are updated only in __exit_signal, which is called with | ||
1703 | * tasklist_lock taken for write, hence these two threads cannot execute | ||
1704 | * concurrently. | ||
1705 | * | ||
1690 | */ | 1706 | */ |
1691 | 1707 | ||
1692 | static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | 1708 | static void k_getrusage(struct task_struct *p, int who, struct rusage *r) |
@@ -1694,13 +1710,23 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1694 | struct task_struct *t; | 1710 | struct task_struct *t; |
1695 | unsigned long flags; | 1711 | unsigned long flags; |
1696 | cputime_t utime, stime; | 1712 | cputime_t utime, stime; |
1713 | int need_lock = 0; | ||
1697 | 1714 | ||
1698 | memset((char *) r, 0, sizeof *r); | 1715 | memset((char *) r, 0, sizeof *r); |
1716 | utime = stime = cputime_zero; | ||
1699 | 1717 | ||
1700 | if (unlikely(!p->signal)) | 1718 | if (p != current || !thread_group_empty(p)) |
1701 | return; | 1719 | need_lock = 1; |
1702 | 1720 | ||
1703 | utime = stime = cputime_zero; | 1721 | if (need_lock) { |
1722 | read_lock(&tasklist_lock); | ||
1723 | if (unlikely(!p->signal)) { | ||
1724 | read_unlock(&tasklist_lock); | ||
1725 | return; | ||
1726 | } | ||
1727 | } else | ||
1728 | /* See locking comments above */ | ||
1729 | smp_rmb(); | ||
1704 | 1730 | ||
1705 | switch (who) { | 1731 | switch (who) { |
1706 | case RUSAGE_BOTH: | 1732 | case RUSAGE_BOTH: |
@@ -1740,6 +1766,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1740 | BUG(); | 1766 | BUG(); |
1741 | } | 1767 | } |
1742 | 1768 | ||
1769 | if (need_lock) | ||
1770 | read_unlock(&tasklist_lock); | ||
1743 | cputime_to_timeval(utime, &r->ru_utime); | 1771 | cputime_to_timeval(utime, &r->ru_utime); |
1744 | cputime_to_timeval(stime, &r->ru_stime); | 1772 | cputime_to_timeval(stime, &r->ru_stime); |
1745 | } | 1773 | } |
@@ -1747,9 +1775,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1747 | int getrusage(struct task_struct *p, int who, struct rusage __user *ru) | 1775 | int getrusage(struct task_struct *p, int who, struct rusage __user *ru) |
1748 | { | 1776 | { |
1749 | struct rusage r; | 1777 | struct rusage r; |
1750 | read_lock(&tasklist_lock); | ||
1751 | k_getrusage(p, who, &r); | 1778 | k_getrusage(p, who, &r); |
1752 | read_unlock(&tasklist_lock); | ||
1753 | return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; | 1779 | return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; |
1754 | } | 1780 | } |
1755 | 1781 | ||