aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <christoph@lameter.com>2005-06-23 03:10:05 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-23 12:45:30 -0400
commit71a2224d7d1cefc23a1ac80bba421cc069cc3257 (patch)
tree3b3bc52e592484f008b4f1035a0d2e9e16b029f3
parentb030a4dd609e167da7f73c2d1fa5af864a0aea17 (diff)
[PATCH] Optimize sys_times for a single thread process
Avoid taking the tasklist_lock in sys_times if the process is single threaded. In a NUMA system taking the tasklist_lock may cause a bouncing cacheline if multiple independent processes continually call sys_times to measure their performance. Signed-off-by: Christoph Lameter <christoph@lameter.com> Signed-off-by: Shai Fultheim <shai@scalex86.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/sys.c86
2 files changed, 65 insertions, 26 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index c2bdf6fb61a5..3ebcd60a19c6 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -72,6 +72,11 @@ repeat:
72 BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); 72 BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
73 __exit_signal(p); 73 __exit_signal(p);
74 __exit_sighand(p); 74 __exit_sighand(p);
75 /*
76 * Note that the fastpath in sys_times depends on __exit_signal having
77 * updated the counters before a task is removed from the tasklist of
78 * the process by __unhash_process.
79 */
75 __unhash_process(p); 80 __unhash_process(p);
76 81
77 /* 82 /*
diff --git a/kernel/sys.c b/kernel/sys.c
index 0a2c8cda9638..5a9d6b075016 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -894,35 +894,69 @@ asmlinkage long sys_times(struct tms __user * tbuf)
894 */ 894 */
895 if (tbuf) { 895 if (tbuf) {
896 struct tms tmp; 896 struct tms tmp;
897 struct task_struct *tsk = current;
898 struct task_struct *t;
899 cputime_t utime, stime, cutime, cstime; 897 cputime_t utime, stime, cutime, cstime;
900 898
901 read_lock(&tasklist_lock); 899#ifdef CONFIG_SMP
902 utime = tsk->signal->utime; 900 if (thread_group_empty(current)) {
903 stime = tsk->signal->stime; 901 /*
904 t = tsk; 902 * Single thread case without the use of any locks.
905 do { 903 *
906 utime = cputime_add(utime, t->utime); 904 * We may race with release_task if two threads are
907 stime = cputime_add(stime, t->stime); 905 * executing. However, release task first adds up the
908 t = next_thread(t); 906 * counters (__exit_signal) before removing the task
909 } while (t != tsk); 907 * from the process tasklist (__unhash_process).
910 908 * __exit_signal also acquires and releases the
911 /* 909 * siglock which results in the proper memory ordering
912 * While we have tasklist_lock read-locked, no dying thread 910 * so that the list modifications are always visible
913 * can be updating current->signal->[us]time. Instead, 911 * after the counters have been updated.
914 * we got their counts included in the live thread loop. 912 *
915 * However, another thread can come in right now and 913 * If the counters have been updated by the second thread
916 * do a wait call that updates current->signal->c[us]time. 914 * but the thread has not yet been removed from the list
917 * To make sure we always see that pair updated atomically, 915 * then the other branch will be executing which will
918 * we take the siglock around fetching them. 916 * block on tasklist_lock until the exit handling of the
919 */ 917 * other task is finished.
920 spin_lock_irq(&tsk->sighand->siglock); 918 *
921 cutime = tsk->signal->cutime; 919 * This also implies that the sighand->siglock cannot
922 cstime = tsk->signal->cstime; 920 * be held by another processor. So we can also
923 spin_unlock_irq(&tsk->sighand->siglock); 921 * skip acquiring that lock.
924 read_unlock(&tasklist_lock); 922 */
923 utime = cputime_add(current->signal->utime, current->utime);
924 stime = cputime_add(current->signal->utime, current->stime);
925 cutime = current->signal->cutime;
926 cstime = current->signal->cstime;
927 } else
928#endif
929 {
930
931 /* Process with multiple threads */
932 struct task_struct *tsk = current;
933 struct task_struct *t;
925 934
935 read_lock(&tasklist_lock);
936 utime = tsk->signal->utime;
937 stime = tsk->signal->stime;
938 t = tsk;
939 do {
940 utime = cputime_add(utime, t->utime);
941 stime = cputime_add(stime, t->stime);
942 t = next_thread(t);
943 } while (t != tsk);
944
945 /*
946 * While we have tasklist_lock read-locked, no dying thread
947 * can be updating current->signal->[us]time. Instead,
948 * we got their counts included in the live thread loop.
949 * However, another thread can come in right now and
950 * do a wait call that updates current->signal->c[us]time.
951 * To make sure we always see that pair updated atomically,
952 * we take the siglock around fetching them.
953 */
954 spin_lock_irq(&tsk->sighand->siglock);
955 cutime = tsk->signal->cutime;
956 cstime = tsk->signal->cstime;
957 spin_unlock_irq(&tsk->sighand->siglock);
958 read_unlock(&tasklist_lock);
959 }
926 tmp.tms_utime = cputime_to_clock_t(utime); 960 tmp.tms_utime = cputime_to_clock_t(utime);
927 tmp.tms_stime = cputime_to_clock_t(stime); 961 tmp.tms_stime = cputime_to_clock_t(stime);
928 tmp.tms_cutime = cputime_to_clock_t(cutime); 962 tmp.tms_cutime = cputime_to_clock_t(cutime);