aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/exit.c
diff options
context:
space:
mode:
authorRik van Riel <riel@redhat.com>2014-08-16 13:40:10 -0400
committerIngo Molnar <mingo@kernel.org>2014-09-08 02:17:01 -0400
commite78c3496790ee8a36522a838b59b388e8a709e65 (patch)
tree0473b9ea676754d50b19eb1a862ac16fdffacbeb /kernel/exit.c
parent90ed9cbe765ad358b3151a12b8bf889a3cbcd573 (diff)
time, signal: Protect resource use statistics with seqlock
Both times() and clock_gettime(CLOCK_PROCESS_CPUTIME_ID) have scalability issues on large systems, due to both functions being serialized with a lock. The lock protects against reporting a wrong value, due to a thread in the task group exiting, its statistics reporting up to the signal struct, and that exited task's statistics being counted twice (or not at all). Protecting that with a lock results in times() and clock_gettime() being completely serialized on large systems. This can be fixed by using a seqlock around the events that gather and propagate statistics. As an additional benefit, the protection code can be moved into thread_group_cputime(), slightly simplifying the calling functions. In the case of posix_cpu_clock_get_task() things can be simplified a lot, because the calling function already ensures that the task sticks around, and the rest is now taken care of in thread_group_cputime(). This way the statistics reporting code can run lockless. Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alex Thorlton <athorlton@sgi.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Daeseok Youn <daeseok.youn@gmail.com> Cc: David Rientjes <rientjes@google.com> Cc: Dongsheng Yang <yangds.fnst@cn.fujitsu.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Guillaume Morin <guillaume@morinfr.org> Cc: Ionut Alexa <ionut.m.alexa@gmail.com> Cc: Kees Cook <keescook@chromium.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Li Zefan <lizefan@huawei.com> Cc: Michal Hocko <mhocko@suse.cz> Cc: Michal Schmidt <mschmidt@redhat.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Vladimir Davydov <vdavydov@parallels.com> Cc: umgwanakikbuti@gmail.com Cc: fweisbec@gmail.com Cc: srao@redhat.com Cc: lwoodman@redhat.com Cc: atheurer@redhat.com Link: http://lkml.kernel.org/r/20140816134010.26a9b572@annuminas.surriel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/exit.c')
-rw-r--r--kernel/exit.c4
1 files changed, 4 insertions, 0 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index b93d46dab6fc..fa09b86609db 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -127,6 +127,7 @@ static void __exit_signal(struct task_struct *tsk)
127 * the signal_struct. 127 * the signal_struct.
128 */ 128 */
129 task_cputime(tsk, &utime, &stime); 129 task_cputime(tsk, &utime, &stime);
130 write_seqlock(&sig->stats_lock);
130 sig->utime += utime; 131 sig->utime += utime;
131 sig->stime += stime; 132 sig->stime += stime;
132 sig->gtime += task_gtime(tsk); 133 sig->gtime += task_gtime(tsk);
@@ -140,6 +141,7 @@ static void __exit_signal(struct task_struct *tsk)
140 sig->sum_sched_runtime += tsk->se.sum_exec_runtime; 141 sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
141 sig->nr_threads--; 142 sig->nr_threads--;
142 __unhash_process(tsk, group_dead); 143 __unhash_process(tsk, group_dead);
144 write_sequnlock(&sig->stats_lock);
143 145
144 /* 146 /*
145 * Do this under ->siglock, we can race with another thread 147 * Do this under ->siglock, we can race with another thread
@@ -1042,6 +1044,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1042 spin_lock_irq(&p->real_parent->sighand->siglock); 1044 spin_lock_irq(&p->real_parent->sighand->siglock);
1043 psig = p->real_parent->signal; 1045 psig = p->real_parent->signal;
1044 sig = p->signal; 1046 sig = p->signal;
1047 write_seqlock(&psig->stats_lock);
1045 psig->cutime += tgutime + sig->cutime; 1048 psig->cutime += tgutime + sig->cutime;
1046 psig->cstime += tgstime + sig->cstime; 1049 psig->cstime += tgstime + sig->cstime;
1047 psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime; 1050 psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
@@ -1064,6 +1067,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1064 psig->cmaxrss = maxrss; 1067 psig->cmaxrss = maxrss;
1065 task_io_accounting_add(&psig->ioac, &p->ioac); 1068 task_io_accounting_add(&psig->ioac, &p->ioac);
1066 task_io_accounting_add(&psig->ioac, &sig->ioac); 1069 task_io_accounting_add(&psig->ioac, &sig->ioac);
1070 write_sequnlock(&psig->stats_lock);
1067 spin_unlock_irq(&p->real_parent->sighand->siglock); 1071 spin_unlock_irq(&p->real_parent->sighand->siglock);
1068 } 1072 }
1069 1073