diff options
author | Ravikiran G Thirumalai <kiran@scalex86.org> | 2006-06-22 17:47:26 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-06-22 18:05:57 -0400 |
commit | de047c1bcd7f7bcfbdc29eb5b439fb332594da3f (patch) | |
tree | ff0d52044559971b8d68aaeb00fc7b6a876b3a3d /kernel | |
parent | c89681ed7d0e4a61d35bdc12c06c6733b718b2cb (diff) |
[PATCH] avoid tasklist_lock at getrusage for multithreaded case too
Avoid taking tasklist_lock for at getrusage for the multithreaded case too.
We don't need to take the tasklist lock for thread traversal of a process
since Oleg's do-__unhash_process-under-siglock.patch and related work.
Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sys.c | 56 |
1 files changed, 22 insertions, 34 deletions
diff --git a/kernel/sys.c b/kernel/sys.c index 0b6ec0e7936f..fc9ebbbaba0c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1860,23 +1860,20 @@ out: | |||
1860 | * fields when reaping, so a sample either gets all the additions of a | 1860 | * fields when reaping, so a sample either gets all the additions of a |
1861 | * given child after it's reaped, or none so this sample is before reaping. | 1861 | * given child after it's reaped, or none so this sample is before reaping. |
1862 | * | 1862 | * |
1863 | * tasklist_lock locking optimisation: | 1863 | * Locking: |
1864 | * If we are current and single threaded, we do not need to take the tasklist | 1864 | * We need to take the siglock for CHILDEREN, SELF and BOTH |
1865 | * lock or the siglock. No one else can take our signal_struct away, | 1865 | * for the cases current multithreaded, non-current single threaded |
1866 | * no one else can reap the children to update signal->c* counters, and | 1866 | * non-current multithreaded. Thread traversal is now safe with |
1867 | * no one else can race with the signal-> fields. | 1867 | * the siglock held. |
1868 | * If we do not take the tasklist_lock, the signal-> fields could be read | 1868 | * Strictly speaking, we donot need to take the siglock if we are current and |
1869 | * out of order while another thread was just exiting. So we place a | 1869 | * single threaded, as no one else can take our signal_struct away, no one |
1870 | * read memory barrier when we avoid the lock. On the writer side, | 1870 | * else can reap the children to update signal->c* counters, and no one else |
1871 | * write memory barrier is implied in __exit_signal as __exit_signal releases | 1871 | * can race with the signal-> fields. If we do not take any lock, the |
1872 | * the siglock spinlock after updating the signal-> fields. | 1872 | * signal-> fields could be read out of order while another thread was just |
1873 | * | 1873 | * exiting. So we should place a read memory barrier when we avoid the lock. |
1874 | * We don't really need the siglock when we access the non c* fields | 1874 | * On the writer side, write memory barrier is implied in __exit_signal |
1875 | * of the signal_struct (for RUSAGE_SELF) even in multithreaded | 1875 | * as __exit_signal releases the siglock spinlock after updating the signal-> |
1876 | * case, since we take the tasklist lock for read and the non c* signal-> | 1876 | * fields. But we don't do this yet to keep things simple. |
1877 | * fields are updated only in __exit_signal, which is called with | ||
1878 | * tasklist_lock taken for write, hence these two threads cannot execute | ||
1879 | * concurrently. | ||
1880 | * | 1877 | * |
1881 | */ | 1878 | */ |
1882 | 1879 | ||
@@ -1885,35 +1882,25 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1885 | struct task_struct *t; | 1882 | struct task_struct *t; |
1886 | unsigned long flags; | 1883 | unsigned long flags; |
1887 | cputime_t utime, stime; | 1884 | cputime_t utime, stime; |
1888 | int need_lock = 0; | ||
1889 | 1885 | ||
1890 | memset((char *) r, 0, sizeof *r); | 1886 | memset((char *) r, 0, sizeof *r); |
1891 | utime = stime = cputime_zero; | 1887 | utime = stime = cputime_zero; |
1892 | 1888 | ||
1893 | if (p != current || !thread_group_empty(p)) | 1889 | rcu_read_lock(); |
1894 | need_lock = 1; | 1890 | if (!lock_task_sighand(p, &flags)) { |
1895 | 1891 | rcu_read_unlock(); | |
1896 | if (need_lock) { | 1892 | return; |
1897 | read_lock(&tasklist_lock); | 1893 | } |
1898 | if (unlikely(!p->signal)) { | ||
1899 | read_unlock(&tasklist_lock); | ||
1900 | return; | ||
1901 | } | ||
1902 | } else | ||
1903 | /* See locking comments above */ | ||
1904 | smp_rmb(); | ||
1905 | 1894 | ||
1906 | switch (who) { | 1895 | switch (who) { |
1907 | case RUSAGE_BOTH: | 1896 | case RUSAGE_BOTH: |
1908 | case RUSAGE_CHILDREN: | 1897 | case RUSAGE_CHILDREN: |
1909 | spin_lock_irqsave(&p->sighand->siglock, flags); | ||
1910 | utime = p->signal->cutime; | 1898 | utime = p->signal->cutime; |
1911 | stime = p->signal->cstime; | 1899 | stime = p->signal->cstime; |
1912 | r->ru_nvcsw = p->signal->cnvcsw; | 1900 | r->ru_nvcsw = p->signal->cnvcsw; |
1913 | r->ru_nivcsw = p->signal->cnivcsw; | 1901 | r->ru_nivcsw = p->signal->cnivcsw; |
1914 | r->ru_minflt = p->signal->cmin_flt; | 1902 | r->ru_minflt = p->signal->cmin_flt; |
1915 | r->ru_majflt = p->signal->cmaj_flt; | 1903 | r->ru_majflt = p->signal->cmaj_flt; |
1916 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | ||
1917 | 1904 | ||
1918 | if (who == RUSAGE_CHILDREN) | 1905 | if (who == RUSAGE_CHILDREN) |
1919 | break; | 1906 | break; |
@@ -1941,8 +1928,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1941 | BUG(); | 1928 | BUG(); |
1942 | } | 1929 | } |
1943 | 1930 | ||
1944 | if (need_lock) | 1931 | unlock_task_sighand(p, &flags); |
1945 | read_unlock(&tasklist_lock); | 1932 | rcu_read_unlock(); |
1933 | |||
1946 | cputime_to_timeval(utime, &r->ru_utime); | 1934 | cputime_to_timeval(utime, &r->ru_utime); |
1947 | cputime_to_timeval(stime, &r->ru_stime); | 1935 | cputime_to_timeval(stime, &r->ru_stime); |
1948 | } | 1936 | } |