diff options
| author | Ravikiran G Thirumalai <kiran@scalex86.org> | 2006-03-23 06:00:13 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-23 10:38:09 -0500 |
| commit | 2dd0ebcd2ab7b18a50c0810ddb45a84316e4ee2e (patch) | |
| tree | 17518ebe86c2b701aa4a74fcac025c1793a87a87 /kernel | |
| parent | 0c9e63fd38a2fb2181668a0cdd622a3c23cfd567 (diff) | |
[PATCH] Avoid taking global tasklist_lock for single threadedprocess at getrusage()
Avoid taking the global tasklist_lock when possible, if a process is single
threaded during getrusage(). Any avoidance of tasklist_lock is good for
NUMA boxes (and possibly for large SMPs). Thanks to Oleg Nesterov for
review and suggestions.
Signed-off-by: Nippun Goel <nippung@calsoftinc.com>
Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org>
Signed-off-by: Shai Fultheim <shai@scalex86.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/sys.c | 42 |
1 files changed, 34 insertions, 8 deletions
diff --git a/kernel/sys.c b/kernel/sys.c index f91218a5463e..4941b9b14b97 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -1677,9 +1677,6 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) | |||
| 1677 | * a lot simpler! (Which we're not doing right now because we're not | 1677 | * a lot simpler! (Which we're not doing right now because we're not |
| 1678 | * measuring them yet). | 1678 | * measuring them yet). |
| 1679 | * | 1679 | * |
| 1680 | * This expects to be called with tasklist_lock read-locked or better, | ||
| 1681 | * and the siglock not locked. It may momentarily take the siglock. | ||
| 1682 | * | ||
| 1683 | * When sampling multiple threads for RUSAGE_SELF, under SMP we might have | 1680 | * When sampling multiple threads for RUSAGE_SELF, under SMP we might have |
| 1684 | * races with threads incrementing their own counters. But since word | 1681 | * races with threads incrementing their own counters. But since word |
| 1685 | * reads are atomic, we either get new values or old values and we don't | 1682 | * reads are atomic, we either get new values or old values and we don't |
| @@ -1687,6 +1684,25 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) | |||
| 1687 | * the c* fields from p->signal from races with exit.c updating those | 1684 | * the c* fields from p->signal from races with exit.c updating those |
| 1688 | * fields when reaping, so a sample either gets all the additions of a | 1685 | * fields when reaping, so a sample either gets all the additions of a |
| 1689 | * given child after it's reaped, or none so this sample is before reaping. | 1686 | * given child after it's reaped, or none so this sample is before reaping. |
| 1687 | * | ||
| 1688 | * tasklist_lock locking optimisation: | ||
| 1689 | * If we are current and single threaded, we do not need to take the tasklist | ||
| 1690 | * lock or the siglock. No one else can take our signal_struct away, | ||
| 1691 | * no one else can reap the children to update signal->c* counters, and | ||
| 1692 | * no one else can race with the signal-> fields. | ||
| 1693 | * If we do not take the tasklist_lock, the signal-> fields could be read | ||
| 1694 | * out of order while another thread was just exiting. So we place a | ||
| 1695 | * read memory barrier when we avoid the lock. On the writer side, | ||
| 1696 | * write memory barrier is implied in __exit_signal as __exit_signal releases | ||
| 1697 | * the siglock spinlock after updating the signal-> fields. | ||
| 1698 | * | ||
| 1699 | * We don't really need the siglock when we access the non c* fields | ||
| 1700 | * of the signal_struct (for RUSAGE_SELF) even in multithreaded | ||
| 1701 | * case, since we take the tasklist lock for read and the non c* signal-> | ||
| 1702 | * fields are updated only in __exit_signal, which is called with | ||
| 1703 | * tasklist_lock taken for write, hence these two threads cannot execute | ||
| 1704 | * concurrently. | ||
| 1705 | * | ||
| 1690 | */ | 1706 | */ |
| 1691 | 1707 | ||
| 1692 | static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | 1708 | static void k_getrusage(struct task_struct *p, int who, struct rusage *r) |
| @@ -1694,13 +1710,23 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
| 1694 | struct task_struct *t; | 1710 | struct task_struct *t; |
| 1695 | unsigned long flags; | 1711 | unsigned long flags; |
| 1696 | cputime_t utime, stime; | 1712 | cputime_t utime, stime; |
| 1713 | int need_lock = 0; | ||
| 1697 | 1714 | ||
| 1698 | memset((char *) r, 0, sizeof *r); | 1715 | memset((char *) r, 0, sizeof *r); |
| 1716 | utime = stime = cputime_zero; | ||
| 1699 | 1717 | ||
| 1700 | if (unlikely(!p->signal)) | 1718 | if (p != current || !thread_group_empty(p)) |
| 1701 | return; | 1719 | need_lock = 1; |
| 1702 | 1720 | ||
| 1703 | utime = stime = cputime_zero; | 1721 | if (need_lock) { |
| 1722 | read_lock(&tasklist_lock); | ||
| 1723 | if (unlikely(!p->signal)) { | ||
| 1724 | read_unlock(&tasklist_lock); | ||
| 1725 | return; | ||
| 1726 | } | ||
| 1727 | } else | ||
| 1728 | /* See locking comments above */ | ||
| 1729 | smp_rmb(); | ||
| 1704 | 1730 | ||
| 1705 | switch (who) { | 1731 | switch (who) { |
| 1706 | case RUSAGE_BOTH: | 1732 | case RUSAGE_BOTH: |
| @@ -1740,6 +1766,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
| 1740 | BUG(); | 1766 | BUG(); |
| 1741 | } | 1767 | } |
| 1742 | 1768 | ||
| 1769 | if (need_lock) | ||
| 1770 | read_unlock(&tasklist_lock); | ||
| 1743 | cputime_to_timeval(utime, &r->ru_utime); | 1771 | cputime_to_timeval(utime, &r->ru_utime); |
| 1744 | cputime_to_timeval(stime, &r->ru_stime); | 1772 | cputime_to_timeval(stime, &r->ru_stime); |
| 1745 | } | 1773 | } |
| @@ -1747,9 +1775,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
| 1747 | int getrusage(struct task_struct *p, int who, struct rusage __user *ru) | 1775 | int getrusage(struct task_struct *p, int who, struct rusage __user *ru) |
| 1748 | { | 1776 | { |
| 1749 | struct rusage r; | 1777 | struct rusage r; |
| 1750 | read_lock(&tasklist_lock); | ||
| 1751 | k_getrusage(p, who, &r); | 1778 | k_getrusage(p, who, &r); |
| 1752 | read_unlock(&tasklist_lock); | ||
| 1753 | return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; | 1779 | return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; |
| 1754 | } | 1780 | } |
| 1755 | 1781 | ||
