aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrank Mayhar <fmayhar@google.com>2008-09-12 12:54:39 -0400
committerIngo Molnar <mingo@elte.hu>2008-09-23 07:38:44 -0400
commitbb34d92f643086d546b49cef680f6f305ed84414 (patch)
tree275887040c96971e133fa20d99517c1fcea76415
parent5ce73a4a5a4893a1aa4cdeed1b1a5a6de42c43b6 (diff)
timers: fix itimer/many thread hang, v2
This is the second resubmission of the posix timer rework patch, posted a few days ago. This includes the changes from the previous resubmittion, which addressed Oleg Nesterov's comments, removing the RCU stuff from the patch and un-inlining the thread_group_cputime() function for SMP. In addition, per Ingo Molnar it simplifies the UP code, consolidating much of it with the SMP version and depending on lower-level SMP/UP handling to take care of the differences. It also cleans up some UP compile errors, moves the scheduler stats-related macros into kernel/sched_stats.h, cleans up a merge error in kernel/fork.c and has a few other minor fixes and cleanups as suggested by Oleg and Ingo. Thanks for the review, guys. Signed-off-by: Frank Mayhar <fmayhar@google.com> Cc: Roland McGrath <roland@redhat.com> Cc: Alexey Dobriyan <adobriyan@gmail.com> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/kernel_stat.h1
-rw-r--r--include/linux/sched.h183
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/posix-cpu-timers.c153
-rw-r--r--kernel/sched.c47
-rw-r--r--kernel/sched_stats.h136
6 files changed, 214 insertions, 311 deletions
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index cf9f40a91c9c..cac3750cd65e 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -52,6 +52,7 @@ static inline int kstat_irqs(int irq)
52 return sum; 52 return sum;
53} 53}
54 54
55extern unsigned long long task_delta_exec(struct task_struct *);
55extern void account_user_time(struct task_struct *, cputime_t); 56extern void account_user_time(struct task_struct *, cputime_t);
56extern void account_user_time_scaled(struct task_struct *, cputime_t); 57extern void account_user_time_scaled(struct task_struct *, cputime_t);
57extern void account_system_time(struct task_struct *, int, cputime_t); 58extern void account_system_time(struct task_struct *, int, cputime_t);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7ce8d4e53565..b982fb48c8f0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -454,15 +454,9 @@ struct task_cputime {
454 * This structure contains the version of task_cputime, above, that is 454 * This structure contains the version of task_cputime, above, that is
455 * used for thread group CPU clock calculations. 455 * used for thread group CPU clock calculations.
456 */ 456 */
457#ifdef CONFIG_SMP
458struct thread_group_cputime { 457struct thread_group_cputime {
459 struct task_cputime *totals; 458 struct task_cputime *totals;
460}; 459};
461#else
462struct thread_group_cputime {
463 struct task_cputime totals;
464};
465#endif
466 460
467/* 461/*
468 * NOTE! "signal_struct" does not have it's own 462 * NOTE! "signal_struct" does not have it's own
@@ -2124,193 +2118,26 @@ static inline int spin_needbreak(spinlock_t *lock)
2124/* 2118/*
2125 * Thread group CPU time accounting. 2119 * Thread group CPU time accounting.
2126 */ 2120 */
2127#ifdef CONFIG_SMP
2128 2121
2129extern int thread_group_cputime_alloc_smp(struct task_struct *); 2122extern int thread_group_cputime_alloc(struct task_struct *);
2130extern void thread_group_cputime_smp(struct task_struct *, struct task_cputime *); 2123extern void thread_group_cputime(struct task_struct *, struct task_cputime *);
2131 2124
2132static inline void thread_group_cputime_init(struct signal_struct *sig) 2125static inline void thread_group_cputime_init(struct signal_struct *sig)
2133{ 2126{
2134 sig->cputime.totals = NULL; 2127 sig->cputime.totals = NULL;
2135} 2128}
2136 2129
2137static inline int thread_group_cputime_clone_thread(struct task_struct *curr, 2130static inline int thread_group_cputime_clone_thread(struct task_struct *curr)
2138 struct task_struct *new)
2139{ 2131{
2140 if (curr->signal->cputime.totals) 2132 if (curr->signal->cputime.totals)
2141 return 0; 2133 return 0;
2142 return thread_group_cputime_alloc_smp(curr); 2134 return thread_group_cputime_alloc(curr);
2143} 2135}
2144 2136
2145static inline void thread_group_cputime_free(struct signal_struct *sig)
2146{
2147 free_percpu(sig->cputime.totals);
2148}
2149
2150/**
2151 * thread_group_cputime - Sum the thread group time fields across all CPUs.
2152 *
2153 * This is a wrapper for the real routine, thread_group_cputime_smp(). See
2154 * that routine for details.
2155 */
2156static inline void thread_group_cputime(
2157 struct task_struct *tsk,
2158 struct task_cputime *times)
2159{
2160 thread_group_cputime_smp(tsk, times);
2161}
2162
2163/**
2164 * thread_group_cputime_account_user - Maintain utime for a thread group.
2165 *
2166 * @tgtimes: Pointer to thread_group_cputime structure.
2167 * @cputime: Time value by which to increment the utime field of that
2168 * structure.
2169 *
2170 * If thread group time is being maintained, get the structure for the
2171 * running CPU and update the utime field there.
2172 */
2173static inline void thread_group_cputime_account_user(
2174 struct thread_group_cputime *tgtimes,
2175 cputime_t cputime)
2176{
2177 if (tgtimes->totals) {
2178 struct task_cputime *times;
2179
2180 times = per_cpu_ptr(tgtimes->totals, get_cpu());
2181 times->utime = cputime_add(times->utime, cputime);
2182 put_cpu_no_resched();
2183 }
2184}
2185
2186/**
2187 * thread_group_cputime_account_system - Maintain stime for a thread group.
2188 *
2189 * @tgtimes: Pointer to thread_group_cputime structure.
2190 * @cputime: Time value by which to increment the stime field of that
2191 * structure.
2192 *
2193 * If thread group time is being maintained, get the structure for the
2194 * running CPU and update the stime field there.
2195 */
2196static inline void thread_group_cputime_account_system(
2197 struct thread_group_cputime *tgtimes,
2198 cputime_t cputime)
2199{
2200 if (tgtimes->totals) {
2201 struct task_cputime *times;
2202
2203 times = per_cpu_ptr(tgtimes->totals, get_cpu());
2204 times->stime = cputime_add(times->stime, cputime);
2205 put_cpu_no_resched();
2206 }
2207}
2208
2209/**
2210 * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a
2211 * thread group.
2212 *
2213 * @tgtimes: Pointer to thread_group_cputime structure.
2214 * @ns: Time value by which to increment the sum_exec_runtime field
2215 * of that structure.
2216 *
2217 * If thread group time is being maintained, get the structure for the
2218 * running CPU and update the sum_exec_runtime field there.
2219 */
2220static inline void thread_group_cputime_account_exec_runtime(
2221 struct thread_group_cputime *tgtimes,
2222 unsigned long long ns)
2223{
2224 if (tgtimes->totals) {
2225 struct task_cputime *times;
2226
2227 times = per_cpu_ptr(tgtimes->totals, get_cpu());
2228 times->sum_exec_runtime += ns;
2229 put_cpu_no_resched();
2230 }
2231}
2232
2233#else /* CONFIG_SMP */
2234
2235static inline void thread_group_cputime_init(struct signal_struct *sig)
2236{
2237 sig->cputime.totals.utime = cputime_zero;
2238 sig->cputime.totals.stime = cputime_zero;
2239 sig->cputime.totals.sum_exec_runtime = 0;
2240}
2241
2242static inline int thread_group_cputime_alloc(struct task_struct *tsk)
2243{
2244 return 0;
2245}
2246 2137
2247static inline void thread_group_cputime_free(struct signal_struct *sig) 2138static inline void thread_group_cputime_free(struct signal_struct *sig)
2248{ 2139{
2249} 2140 free_percpu(sig->cputime.totals);
2250
2251static inline int thread_group_cputime_clone_thread(struct task_struct *curr,
2252 struct task_struct *tsk)
2253{
2254 return 0;
2255}
2256
2257static inline void thread_group_cputime(struct task_struct *tsk,
2258 struct task_cputime *cputime)
2259{
2260 *cputime = tsk->signal->cputime.totals;
2261}
2262
2263static inline void thread_group_cputime_account_user(
2264 struct thread_group_cputime *tgtimes,
2265 cputime_t cputime)
2266{
2267 tgtimes->totals.utime = cputime_add(tgtimes->totals.utime, cputime);
2268}
2269
2270static inline void thread_group_cputime_account_system(
2271 struct thread_group_cputime *tgtimes,
2272 cputime_t cputime)
2273{
2274 tgtimes->totals.stime = cputime_add(tgtimes->totals.stime, cputime);
2275}
2276
2277static inline void thread_group_cputime_account_exec_runtime(
2278 struct thread_group_cputime *tgtimes,
2279 unsigned long long ns)
2280{
2281 tgtimes->totals.sum_exec_runtime += ns;
2282}
2283
2284#endif /* CONFIG_SMP */
2285
2286static inline void account_group_user_time(struct task_struct *tsk,
2287 cputime_t cputime)
2288{
2289 struct signal_struct *sig;
2290
2291 sig = tsk->signal;
2292 if (likely(sig))
2293 thread_group_cputime_account_user(&sig->cputime, cputime);
2294}
2295
2296static inline void account_group_system_time(struct task_struct *tsk,
2297 cputime_t cputime)
2298{
2299 struct signal_struct *sig;
2300
2301 sig = tsk->signal;
2302 if (likely(sig))
2303 thread_group_cputime_account_system(&sig->cputime, cputime);
2304}
2305
2306static inline void account_group_exec_runtime(struct task_struct *tsk,
2307 unsigned long long ns)
2308{
2309 struct signal_struct *sig;
2310
2311 sig = tsk->signal;
2312 if (likely(sig))
2313 thread_group_cputime_account_exec_runtime(&sig->cputime, ns);
2314} 2141}
2315 2142
2316/* 2143/*
diff --git a/kernel/fork.c b/kernel/fork.c
index 1181b9aac48e..021ae012cc75 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -791,7 +791,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
791 int ret; 791 int ret;
792 792
793 if (clone_flags & CLONE_THREAD) { 793 if (clone_flags & CLONE_THREAD) {
794 ret = thread_group_cputime_clone_thread(current, tsk); 794 ret = thread_group_cputime_clone_thread(current);
795 if (likely(!ret)) { 795 if (likely(!ret)) {
796 atomic_inc(&current->signal->count); 796 atomic_inc(&current->signal->count);
797 atomic_inc(&current->signal->live); 797 atomic_inc(&current->signal->live);
@@ -834,9 +834,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
834 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; 834 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
835 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; 835 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
836 task_io_accounting_init(&sig->ioac); 836 task_io_accounting_init(&sig->ioac);
837 INIT_LIST_HEAD(&sig->cpu_timers[0]);
838 INIT_LIST_HEAD(&sig->cpu_timers[1]);
839 INIT_LIST_HEAD(&sig->cpu_timers[2]);
840 taskstats_tgid_init(sig); 837 taskstats_tgid_init(sig);
841 838
842 task_lock(current->group_leader); 839 task_lock(current->group_leader);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 9a7ea049fcdc..153dcb2639c3 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -7,50 +7,46 @@
7#include <linux/errno.h> 7#include <linux/errno.h>
8#include <linux/math64.h> 8#include <linux/math64.h>
9#include <asm/uaccess.h> 9#include <asm/uaccess.h>
10#include <linux/kernel_stat.h>
10 11
11#ifdef CONFIG_SMP
12/* 12/*
13 * Allocate the thread_group_cputime structure appropriately for SMP kernels 13 * Allocate the thread_group_cputime structure appropriately and fill in the
14 * and fill in the current values of the fields. Called from copy_signal() 14 * current values of the fields. Called from copy_signal() via
15 * via thread_group_cputime_clone_thread() when adding a second or subsequent 15 * thread_group_cputime_clone_thread() when adding a second or subsequent
16 * thread to a thread group. Assumes interrupts are enabled when called. 16 * thread to a thread group. Assumes interrupts are enabled when called.
17 */ 17 */
18int thread_group_cputime_alloc_smp(struct task_struct *tsk) 18int thread_group_cputime_alloc(struct task_struct *tsk)
19{ 19{
20 struct signal_struct *sig = tsk->signal; 20 struct signal_struct *sig = tsk->signal;
21 struct task_cputime *cputime; 21 struct task_cputime *cputime;
22 22
23 /* 23 /*
24 * If we have multiple threads and we don't already have a 24 * If we have multiple threads and we don't already have a
25 * per-CPU task_cputime struct, allocate one and fill it in with 25 * per-CPU task_cputime struct (checked in the caller), allocate
26 * the times accumulated so far. 26 * one and fill it in with the times accumulated so far. We may
27 * race with another thread so recheck after we pick up the sighand
28 * lock.
27 */ 29 */
28 if (sig->cputime.totals)
29 return 0;
30 cputime = alloc_percpu(struct task_cputime); 30 cputime = alloc_percpu(struct task_cputime);
31 if (cputime == NULL) 31 if (cputime == NULL)
32 return -ENOMEM; 32 return -ENOMEM;
33 read_lock(&tasklist_lock);
34 spin_lock_irq(&tsk->sighand->siglock); 33 spin_lock_irq(&tsk->sighand->siglock);
35 if (sig->cputime.totals) { 34 if (sig->cputime.totals) {
36 spin_unlock_irq(&tsk->sighand->siglock); 35 spin_unlock_irq(&tsk->sighand->siglock);
37 read_unlock(&tasklist_lock);
38 free_percpu(cputime); 36 free_percpu(cputime);
39 return 0; 37 return 0;
40 } 38 }
41 sig->cputime.totals = cputime; 39 sig->cputime.totals = cputime;
42 cputime = per_cpu_ptr(sig->cputime.totals, get_cpu()); 40 cputime = per_cpu_ptr(sig->cputime.totals, smp_processor_id());
43 cputime->utime = tsk->utime; 41 cputime->utime = tsk->utime;
44 cputime->stime = tsk->stime; 42 cputime->stime = tsk->stime;
45 cputime->sum_exec_runtime = tsk->se.sum_exec_runtime; 43 cputime->sum_exec_runtime = tsk->se.sum_exec_runtime;
46 put_cpu_no_resched();
47 spin_unlock_irq(&tsk->sighand->siglock); 44 spin_unlock_irq(&tsk->sighand->siglock);
48 read_unlock(&tasklist_lock);
49 return 0; 45 return 0;
50} 46}
51 47
52/** 48/**
53 * thread_group_cputime_smp - Sum the thread group time fields across all CPUs. 49 * thread_group_cputime - Sum the thread group time fields across all CPUs.
54 * 50 *
55 * @tsk: The task we use to identify the thread group. 51 * @tsk: The task we use to identify the thread group.
56 * @times: task_cputime structure in which we return the summed fields. 52 * @times: task_cputime structure in which we return the summed fields.
@@ -58,7 +54,7 @@ int thread_group_cputime_alloc_smp(struct task_struct *tsk)
58 * Walk the list of CPUs to sum the per-CPU time fields in the thread group 54 * Walk the list of CPUs to sum the per-CPU time fields in the thread group
59 * time structure. 55 * time structure.
60 */ 56 */
61void thread_group_cputime_smp( 57void thread_group_cputime(
62 struct task_struct *tsk, 58 struct task_struct *tsk,
63 struct task_cputime *times) 59 struct task_cputime *times)
64{ 60{
@@ -83,8 +79,6 @@ void thread_group_cputime_smp(
83 } 79 }
84} 80}
85 81
86#endif /* CONFIG_SMP */
87
88/* 82/*
89 * Called after updating RLIMIT_CPU to set timer expiration if necessary. 83 * Called after updating RLIMIT_CPU to set timer expiration if necessary.
90 */ 84 */
@@ -300,7 +294,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
300 cpu->cpu = virt_ticks(p); 294 cpu->cpu = virt_ticks(p);
301 break; 295 break;
302 case CPUCLOCK_SCHED: 296 case CPUCLOCK_SCHED:
303 cpu->sched = task_sched_runtime(p); 297 cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p);
304 break; 298 break;
305 } 299 }
306 return 0; 300 return 0;
@@ -309,16 +303,15 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
309/* 303/*
310 * Sample a process (thread group) clock for the given group_leader task. 304 * Sample a process (thread group) clock for the given group_leader task.
311 * Must be called with tasklist_lock held for reading. 305 * Must be called with tasklist_lock held for reading.
312 * Must be called with tasklist_lock held for reading, and p->sighand->siglock.
313 */ 306 */
314static int cpu_clock_sample_group_locked(unsigned int clock_idx, 307static int cpu_clock_sample_group(const clockid_t which_clock,
315 struct task_struct *p, 308 struct task_struct *p,
316 union cpu_time_count *cpu) 309 union cpu_time_count *cpu)
317{ 310{
318 struct task_cputime cputime; 311 struct task_cputime cputime;
319 312
320 thread_group_cputime(p, &cputime); 313 thread_group_cputime(p, &cputime);
321 switch (clock_idx) { 314 switch (which_clock) {
322 default: 315 default:
323 return -EINVAL; 316 return -EINVAL;
324 case CPUCLOCK_PROF: 317 case CPUCLOCK_PROF:
@@ -328,29 +321,12 @@ static int cpu_clock_sample_group_locked(unsigned int clock_idx,
328 cpu->cpu = cputime.utime; 321 cpu->cpu = cputime.utime;
329 break; 322 break;
330 case CPUCLOCK_SCHED: 323 case CPUCLOCK_SCHED:
331 cpu->sched = thread_group_sched_runtime(p); 324 cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
332 break; 325 break;
333 } 326 }
334 return 0; 327 return 0;
335} 328}
336 329
337/*
338 * Sample a process (thread group) clock for the given group_leader task.
339 * Must be called with tasklist_lock held for reading.
340 */
341static int cpu_clock_sample_group(const clockid_t which_clock,
342 struct task_struct *p,
343 union cpu_time_count *cpu)
344{
345 int ret;
346 unsigned long flags;
347 spin_lock_irqsave(&p->sighand->siglock, flags);
348 ret = cpu_clock_sample_group_locked(CPUCLOCK_WHICH(which_clock), p,
349 cpu);
350 spin_unlock_irqrestore(&p->sighand->siglock, flags);
351 return ret;
352}
353
354 330
355int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) 331int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
356{ 332{
@@ -1324,29 +1300,37 @@ static inline int task_cputime_expired(const struct task_cputime *sample,
1324 * fastpath_timer_check - POSIX CPU timers fast path. 1300 * fastpath_timer_check - POSIX CPU timers fast path.
1325 * 1301 *
1326 * @tsk: The task (thread) being checked. 1302 * @tsk: The task (thread) being checked.
1327 * @sig: The signal pointer for that task.
1328 * 1303 *
1329 * If there are no timers set return false. Otherwise snapshot the task and 1304 * Check the task and thread group timers. If both are zero (there are no
1330 * thread group timers, then compare them with the corresponding expiration 1305 * timers set) return false. Otherwise snapshot the task and thread group
1331 # times. Returns true if a timer has expired, else returns false. 1306 * timers and compare them with the corresponding expiration times. Return
1307 * true if a timer has expired, else return false.
1332 */ 1308 */
1333static inline int fastpath_timer_check(struct task_struct *tsk, 1309static inline int fastpath_timer_check(struct task_struct *tsk)
1334 struct signal_struct *sig)
1335{ 1310{
1336 struct task_cputime task_sample = { 1311 struct signal_struct *sig = tsk->signal;
1337 .utime = tsk->utime,
1338 .stime = tsk->stime,
1339 .sum_exec_runtime = tsk->se.sum_exec_runtime
1340 };
1341 struct task_cputime group_sample;
1342 1312
1343 if (task_cputime_zero(&tsk->cputime_expires) && 1313 if (unlikely(!sig))
1344 task_cputime_zero(&sig->cputime_expires))
1345 return 0; 1314 return 0;
1346 if (task_cputime_expired(&task_sample, &tsk->cputime_expires)) 1315
1347 return 1; 1316 if (!task_cputime_zero(&tsk->cputime_expires)) {
1348 thread_group_cputime(tsk, &group_sample); 1317 struct task_cputime task_sample = {
1349 return task_cputime_expired(&group_sample, &sig->cputime_expires); 1318 .utime = tsk->utime,
1319 .stime = tsk->stime,
1320 .sum_exec_runtime = tsk->se.sum_exec_runtime
1321 };
1322
1323 if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
1324 return 1;
1325 }
1326 if (!task_cputime_zero(&sig->cputime_expires)) {
1327 struct task_cputime group_sample;
1328
1329 thread_group_cputime(tsk, &group_sample);
1330 if (task_cputime_expired(&group_sample, &sig->cputime_expires))
1331 return 1;
1332 }
1333 return 0;
1350} 1334}
1351 1335
1352/* 1336/*
@@ -1358,43 +1342,34 @@ void run_posix_cpu_timers(struct task_struct *tsk)
1358{ 1342{
1359 LIST_HEAD(firing); 1343 LIST_HEAD(firing);
1360 struct k_itimer *timer, *next; 1344 struct k_itimer *timer, *next;
1361 struct signal_struct *sig;
1362 struct sighand_struct *sighand;
1363 unsigned long flags;
1364 1345
1365 BUG_ON(!irqs_disabled()); 1346 BUG_ON(!irqs_disabled());
1366 1347
1367 /* Pick up tsk->signal and make sure it's valid. */
1368 sig = tsk->signal;
1369 /* 1348 /*
1370 * The fast path checks that there are no expired thread or thread 1349 * The fast path checks that there are no expired thread or thread
1371 * group timers. If that's so, just return. Also check that 1350 * group timers. If that's so, just return.
1372 * tsk->signal is non-NULL; this probably can't happen but cover the
1373 * possibility anyway.
1374 */ 1351 */
1375 if (unlikely(!sig) || !fastpath_timer_check(tsk, sig)) 1352 if (!fastpath_timer_check(tsk))
1376 return; 1353 return;
1377 1354
1378 sighand = lock_task_sighand(tsk, &flags); 1355 spin_lock(&tsk->sighand->siglock);
1379 if (likely(sighand)) { 1356 /*
1380 /* 1357 * Here we take off tsk->signal->cpu_timers[N] and
1381 * Here we take off tsk->signal->cpu_timers[N] and 1358 * tsk->cpu_timers[N] all the timers that are firing, and
1382 * tsk->cpu_timers[N] all the timers that are firing, and 1359 * put them on the firing list.
1383 * put them on the firing list. 1360 */
1384 */ 1361 check_thread_timers(tsk, &firing);
1385 check_thread_timers(tsk, &firing); 1362 check_process_timers(tsk, &firing);
1386 check_process_timers(tsk, &firing);
1387 1363
1388 /* 1364 /*
1389 * We must release these locks before taking any timer's lock. 1365 * We must release these locks before taking any timer's lock.
1390 * There is a potential race with timer deletion here, as the 1366 * There is a potential race with timer deletion here, as the
1391 * siglock now protects our private firing list. We have set 1367 * siglock now protects our private firing list. We have set
1392 * the firing flag in each timer, so that a deletion attempt 1368 * the firing flag in each timer, so that a deletion attempt
1393 * that gets the timer lock before we do will give it up and 1369 * that gets the timer lock before we do will give it up and
1394 * spin until we've taken care of that timer below. 1370 * spin until we've taken care of that timer below.
1395 */ 1371 */
1396 } 1372 spin_unlock(&tsk->sighand->siglock);
1397 unlock_task_sighand(tsk, &flags);
1398 1373
1399 /* 1374 /*
1400 * Now that all the timers on our list have the firing flag, 1375 * Now that all the timers on our list have the firing flag,
@@ -1433,7 +1408,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1433 struct list_head *head; 1408 struct list_head *head;
1434 1409
1435 BUG_ON(clock_idx == CPUCLOCK_SCHED); 1410 BUG_ON(clock_idx == CPUCLOCK_SCHED);
1436 cpu_clock_sample_group_locked(clock_idx, tsk, &now); 1411 cpu_clock_sample_group(clock_idx, tsk, &now);
1437 1412
1438 if (oldval) { 1413 if (oldval) {
1439 if (!cputime_eq(*oldval, cputime_zero)) { 1414 if (!cputime_eq(*oldval, cputime_zero)) {
diff --git a/kernel/sched.c b/kernel/sched.c
index c51b5d276665..260c22cc530a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4039,55 +4039,22 @@ EXPORT_PER_CPU_SYMBOL(kstat);
4039/* 4039/*
4040 * Return any ns on the sched_clock that have not yet been banked in 4040 * Return any ns on the sched_clock that have not yet been banked in
4041 * @p in case that task is currently running. 4041 * @p in case that task is currently running.
4042 *
4043 * Called with task_rq_lock() held on @rq.
4044 */ 4042 */
4045static unsigned long long task_delta_exec(struct task_struct *p, struct rq *rq) 4043unsigned long long task_delta_exec(struct task_struct *p)
4046{ 4044{
4045 struct rq *rq;
4046 unsigned long flags;
4047 u64 ns = 0;
4048
4049 rq = task_rq_lock(p, &flags);
4047 if (task_current(rq, p)) { 4050 if (task_current(rq, p)) {
4048 u64 delta_exec; 4051 u64 delta_exec;
4049 4052
4050 update_rq_clock(rq); 4053 update_rq_clock(rq);
4051 delta_exec = rq->clock - p->se.exec_start; 4054 delta_exec = rq->clock - p->se.exec_start;
4052 if ((s64)delta_exec > 0) 4055 if ((s64)delta_exec > 0)
4053 return delta_exec; 4056 ns = delta_exec;
4054 } 4057 }
4055 return 0;
4056}
4057
4058/*
4059 * Return p->sum_exec_runtime plus any more ns on the sched_clock
4060 * that have not yet been banked in case the task is currently running.
4061 */
4062unsigned long long task_sched_runtime(struct task_struct *p)
4063{
4064 unsigned long flags;
4065 u64 ns;
4066 struct rq *rq;
4067
4068 rq = task_rq_lock(p, &flags);
4069 ns = p->se.sum_exec_runtime + task_delta_exec(p, rq);
4070 task_rq_unlock(rq, &flags);
4071
4072 return ns;
4073}
4074
4075/*
4076 * Return sum_exec_runtime for the thread group plus any more ns on the
4077 * sched_clock that have not yet been banked in case the task is currently
4078 * running.
4079 */
4080unsigned long long thread_group_sched_runtime(struct task_struct *p)
4081{
4082 unsigned long flags;
4083 u64 ns;
4084 struct rq *rq;
4085 struct task_cputime totals;
4086
4087 rq = task_rq_lock(p, &flags);
4088 thread_group_cputime(p, &totals);
4089 ns = totals.sum_exec_runtime + task_delta_exec(p, rq);
4090 task_rq_unlock(rq, &flags);
4091 4058
4092 return ns; 4059 return ns;
4093} 4060}
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 8385d43987e2..d6903bd0c7a8 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -270,3 +270,139 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
270#define sched_info_switch(t, next) do { } while (0) 270#define sched_info_switch(t, next) do { } while (0)
271#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ 271#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
272 272
273/*
274 * The following are functions that support scheduler-internal time accounting.
275 * These functions are generally called at the timer tick. None of this depends
276 * on CONFIG_SCHEDSTATS.
277 */
278
279#ifdef CONFIG_SMP
280
281/**
282 * thread_group_cputime_account_user - Maintain utime for a thread group.
283 *
284 * @tgtimes: Pointer to thread_group_cputime structure.
285 * @cputime: Time value by which to increment the utime field of that
286 * structure.
287 *
288 * If thread group time is being maintained, get the structure for the
289 * running CPU and update the utime field there.
290 */
291static inline void thread_group_cputime_account_user(
292 struct thread_group_cputime *tgtimes,
293 cputime_t cputime)
294{
295 if (tgtimes->totals) {
296 struct task_cputime *times;
297
298 times = per_cpu_ptr(tgtimes->totals, get_cpu());
299 times->utime = cputime_add(times->utime, cputime);
300 put_cpu_no_resched();
301 }
302}
303
304/**
305 * thread_group_cputime_account_system - Maintain stime for a thread group.
306 *
307 * @tgtimes: Pointer to thread_group_cputime structure.
308 * @cputime: Time value by which to increment the stime field of that
309 * structure.
310 *
311 * If thread group time is being maintained, get the structure for the
312 * running CPU and update the stime field there.
313 */
314static inline void thread_group_cputime_account_system(
315 struct thread_group_cputime *tgtimes,
316 cputime_t cputime)
317{
318 if (tgtimes->totals) {
319 struct task_cputime *times;
320
321 times = per_cpu_ptr(tgtimes->totals, get_cpu());
322 times->stime = cputime_add(times->stime, cputime);
323 put_cpu_no_resched();
324 }
325}
326
327/**
328 * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a
329 * thread group.
330 *
331 * @tgtimes: Pointer to thread_group_cputime structure.
332 * @ns: Time value by which to increment the sum_exec_runtime field
333 * of that structure.
334 *
335 * If thread group time is being maintained, get the structure for the
336 * running CPU and update the sum_exec_runtime field there.
337 */
338static inline void thread_group_cputime_account_exec_runtime(
339 struct thread_group_cputime *tgtimes,
340 unsigned long long ns)
341{
342 if (tgtimes->totals) {
343 struct task_cputime *times;
344
345 times = per_cpu_ptr(tgtimes->totals, get_cpu());
346 times->sum_exec_runtime += ns;
347 put_cpu_no_resched();
348 }
349}
350
351#else /* CONFIG_SMP */
352
353static inline void thread_group_cputime_account_user(
354 struct thread_group_cputime *tgtimes,
355 cputime_t cputime)
356{
357 tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime);
358}
359
360static inline void thread_group_cputime_account_system(
361 struct thread_group_cputime *tgtimes,
362 cputime_t cputime)
363{
364 tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime);
365}
366
367static inline void thread_group_cputime_account_exec_runtime(
368 struct thread_group_cputime *tgtimes,
369 unsigned long long ns)
370{
371 tgtimes->totals->sum_exec_runtime += ns;
372}
373
374#endif /* CONFIG_SMP */
375
376/*
377 * These are the generic time-accounting routines that use the above
378 * functions. They are the functions actually called by the scheduler.
379 */
380static inline void account_group_user_time(struct task_struct *tsk,
381 cputime_t cputime)
382{
383 struct signal_struct *sig;
384
385 sig = tsk->signal;
386 if (likely(sig))
387 thread_group_cputime_account_user(&sig->cputime, cputime);
388}
389
390static inline void account_group_system_time(struct task_struct *tsk,
391 cputime_t cputime)
392{
393 struct signal_struct *sig;
394
395 sig = tsk->signal;
396 if (likely(sig))
397 thread_group_cputime_account_system(&sig->cputime, cputime);
398}
399
400static inline void account_group_exec_runtime(struct task_struct *tsk,
401 unsigned long long ns)
402{
403 struct signal_struct *sig;
404
405 sig = tsk->signal;
406 if (likely(sig))
407 thread_group_cputime_account_exec_runtime(&sig->cputime, ns);
408}