diff options
author | Christoph Lameter <christoph@lameter.com> | 2005-06-23 03:10:05 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-23 12:45:30 -0400 |
commit | 71a2224d7d1cefc23a1ac80bba421cc069cc3257 (patch) | |
tree | 3b3bc52e592484f008b4f1035a0d2e9e16b029f3 | |
parent | b030a4dd609e167da7f73c2d1fa5af864a0aea17 (diff) |
[PATCH] Optimize sys_times for a single thread process
Avoid taking the tasklist_lock in sys_times if the process is single
threaded. In a NUMA system taking the tasklist_lock may cause a bouncing
cacheline if multiple independent processes continually call sys_times to
measure their performance.
Signed-off-by: Christoph Lameter <christoph@lameter.com>
Signed-off-by: Shai Fultheim <shai@scalex86.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | kernel/exit.c | 5 | ||||
-rw-r--r-- | kernel/sys.c | 86 |
2 files changed, 65 insertions, 26 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index c2bdf6fb61a5..3ebcd60a19c6 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -72,6 +72,11 @@ repeat: | |||
72 | BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); | 72 | BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); |
73 | __exit_signal(p); | 73 | __exit_signal(p); |
74 | __exit_sighand(p); | 74 | __exit_sighand(p); |
75 | /* | ||
76 | * Note that the fastpath in sys_times depends on __exit_signal having | ||
77 | * updated the counters before a task is removed from the tasklist of | ||
78 | * the process by __unhash_process. | ||
79 | */ | ||
75 | __unhash_process(p); | 80 | __unhash_process(p); |
76 | 81 | ||
77 | /* | 82 | /* |
diff --git a/kernel/sys.c b/kernel/sys.c index 0a2c8cda9638..5a9d6b075016 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -894,35 +894,69 @@ asmlinkage long sys_times(struct tms __user * tbuf) | |||
894 | */ | 894 | */ |
895 | if (tbuf) { | 895 | if (tbuf) { |
896 | struct tms tmp; | 896 | struct tms tmp; |
897 | struct task_struct *tsk = current; | ||
898 | struct task_struct *t; | ||
899 | cputime_t utime, stime, cutime, cstime; | 897 | cputime_t utime, stime, cutime, cstime; |
900 | 898 | ||
901 | read_lock(&tasklist_lock); | 899 | #ifdef CONFIG_SMP |
902 | utime = tsk->signal->utime; | 900 | if (thread_group_empty(current)) { |
903 | stime = tsk->signal->stime; | 901 | /* |
904 | t = tsk; | 902 | * Single thread case without the use of any locks. |
905 | do { | 903 | * |
906 | utime = cputime_add(utime, t->utime); | 904 | * We may race with release_task if two threads are |
907 | stime = cputime_add(stime, t->stime); | 905 | * executing. However, release task first adds up the |
908 | t = next_thread(t); | 906 | * counters (__exit_signal) before removing the task |
909 | } while (t != tsk); | 907 | * from the process tasklist (__unhash_process). |
910 | 908 | * __exit_signal also acquires and releases the | |
911 | /* | 909 | * siglock which results in the proper memory ordering |
912 | * While we have tasklist_lock read-locked, no dying thread | 910 | * so that the list modifications are always visible |
913 | * can be updating current->signal->[us]time. Instead, | 911 | * after the counters have been updated. |
914 | * we got their counts included in the live thread loop. | 912 | * |
915 | * However, another thread can come in right now and | 913 | * If the counters have been updated by the second thread |
916 | * do a wait call that updates current->signal->c[us]time. | 914 | * but the thread has not yet been removed from the list |
917 | * To make sure we always see that pair updated atomically, | 915 | * then the other branch will be executing which will |
918 | * we take the siglock around fetching them. | 916 | * block on tasklist_lock until the exit handling of the |
919 | */ | 917 | * other task is finished. |
920 | spin_lock_irq(&tsk->sighand->siglock); | 918 | * |
921 | cutime = tsk->signal->cutime; | 919 | * This also implies that the sighand->siglock cannot |
922 | cstime = tsk->signal->cstime; | 920 | * be held by another processor. So we can also |
923 | spin_unlock_irq(&tsk->sighand->siglock); | 921 | * skip acquiring that lock. |
924 | read_unlock(&tasklist_lock); | 922 | */ |
923 | utime = cputime_add(current->signal->utime, current->utime); | ||
924 | stime = cputime_add(current->signal->utime, current->stime); | ||
925 | cutime = current->signal->cutime; | ||
926 | cstime = current->signal->cstime; | ||
927 | } else | ||
928 | #endif | ||
929 | { | ||
930 | |||
931 | /* Process with multiple threads */ | ||
932 | struct task_struct *tsk = current; | ||
933 | struct task_struct *t; | ||
925 | 934 | ||
935 | read_lock(&tasklist_lock); | ||
936 | utime = tsk->signal->utime; | ||
937 | stime = tsk->signal->stime; | ||
938 | t = tsk; | ||
939 | do { | ||
940 | utime = cputime_add(utime, t->utime); | ||
941 | stime = cputime_add(stime, t->stime); | ||
942 | t = next_thread(t); | ||
943 | } while (t != tsk); | ||
944 | |||
945 | /* | ||
946 | * While we have tasklist_lock read-locked, no dying thread | ||
947 | * can be updating current->signal->[us]time. Instead, | ||
948 | * we got their counts included in the live thread loop. | ||
949 | * However, another thread can come in right now and | ||
950 | * do a wait call that updates current->signal->c[us]time. | ||
951 | * To make sure we always see that pair updated atomically, | ||
952 | * we take the siglock around fetching them. | ||
953 | */ | ||
954 | spin_lock_irq(&tsk->sighand->siglock); | ||
955 | cutime = tsk->signal->cutime; | ||
956 | cstime = tsk->signal->cstime; | ||
957 | spin_unlock_irq(&tsk->sighand->siglock); | ||
958 | read_unlock(&tasklist_lock); | ||
959 | } | ||
926 | tmp.tms_utime = cputime_to_clock_t(utime); | 960 | tmp.tms_utime = cputime_to_clock_t(utime); |
927 | tmp.tms_stime = cputime_to_clock_t(stime); | 961 | tmp.tms_stime = cputime_to_clock_t(stime); |
928 | tmp.tms_cutime = cputime_to_clock_t(cutime); | 962 | tmp.tms_cutime = cputime_to_clock_t(cutime); |