aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/binfmt_elf.c19
-rw-r--r--fs/proc/array.c8
-rw-r--r--include/linux/posix-timers.h2
-rw-r--r--include/linux/sched.h257
-rw-r--r--include/linux/time.h3
-rw-r--r--kernel/compat.c53
-rw-r--r--kernel/exit.c19
-rw-r--r--kernel/fork.c88
-rw-r--r--kernel/itimer.c33
-rw-r--r--kernel/posix-cpu-timers.c471
-rw-r--r--kernel/sched.c53
-rw-r--r--kernel/sched_fair.c1
-rw-r--r--kernel/sched_rt.c4
-rw-r--r--kernel/signal.c8
-rw-r--r--kernel/sys.c75
-rw-r--r--security/selinux/hooks.c9
16 files changed, 677 insertions, 426 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 655ed8d30a86..a8635f637038 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1333,20 +1333,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1333 prstatus->pr_pgrp = task_pgrp_vnr(p); 1333 prstatus->pr_pgrp = task_pgrp_vnr(p);
1334 prstatus->pr_sid = task_session_vnr(p); 1334 prstatus->pr_sid = task_session_vnr(p);
1335 if (thread_group_leader(p)) { 1335 if (thread_group_leader(p)) {
1336 struct task_cputime cputime;
1337
1336 /* 1338 /*
1337 * This is the record for the group leader. Add in the 1339 * This is the record for the group leader. It shows the
1338 * cumulative times of previous dead threads. This total 1340 * group-wide total, not its individual thread total.
1339 * won't include the time of each live thread whose state
1340 * is included in the core dump. The final total reported
1341 * to our parent process when it calls wait4 will include
1342 * those sums as well as the little bit more time it takes
1343 * this and each other thread to finish dying after the
1344 * core dump synchronization phase.
1345 */ 1341 */
1346 cputime_to_timeval(cputime_add(p->utime, p->signal->utime), 1342 thread_group_cputime(p, &cputime);
1347 &prstatus->pr_utime); 1343 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1348 cputime_to_timeval(cputime_add(p->stime, p->signal->stime), 1344 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1349 &prstatus->pr_stime);
1350 } else { 1345 } else {
1351 cputime_to_timeval(p->utime, &prstatus->pr_utime); 1346 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1352 cputime_to_timeval(p->stime, &prstatus->pr_stime); 1347 cputime_to_timeval(p->stime, &prstatus->pr_stime);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 71c9be59c9c2..933953c4e407 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -395,20 +395,20 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
395 395
396 /* add up live thread stats at the group level */ 396 /* add up live thread stats at the group level */
397 if (whole) { 397 if (whole) {
398 struct task_cputime cputime;
398 struct task_struct *t = task; 399 struct task_struct *t = task;
399 do { 400 do {
400 min_flt += t->min_flt; 401 min_flt += t->min_flt;
401 maj_flt += t->maj_flt; 402 maj_flt += t->maj_flt;
402 utime = cputime_add(utime, task_utime(t));
403 stime = cputime_add(stime, task_stime(t));
404 gtime = cputime_add(gtime, task_gtime(t)); 403 gtime = cputime_add(gtime, task_gtime(t));
405 t = next_thread(t); 404 t = next_thread(t);
406 } while (t != task); 405 } while (t != task);
407 406
408 min_flt += sig->min_flt; 407 min_flt += sig->min_flt;
409 maj_flt += sig->maj_flt; 408 maj_flt += sig->maj_flt;
410 utime = cputime_add(utime, sig->utime); 409 thread_group_cputime(task, &cputime);
411 stime = cputime_add(stime, sig->stime); 410 utime = cputime.utime;
411 stime = cputime.stime;
412 gtime = cputime_add(gtime, sig->gtime); 412 gtime = cputime_add(gtime, sig->gtime);
413 } 413 }
414 414
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index a7dd38f30ade..f9d8e9e94e9b 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -115,4 +115,6 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
115 115
116long clock_nanosleep_restart(struct restart_block *restart_block); 116long clock_nanosleep_restart(struct restart_block *restart_block);
117 117
118void update_rlimit_cpu(unsigned long rlim_new);
119
118#endif 120#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3d9120c5ad15..26d7a5f2d0ba 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -425,6 +425,45 @@ struct pacct_struct {
425 unsigned long ac_minflt, ac_majflt; 425 unsigned long ac_minflt, ac_majflt;
426}; 426};
427 427
428/**
429 * struct task_cputime - collected CPU time counts
430 * @utime: time spent in user mode, in &cputime_t units
431 * @stime: time spent in kernel mode, in &cputime_t units
432 * @sum_exec_runtime: total time spent on the CPU, in nanoseconds
433 *
434 * This structure groups together three kinds of CPU time that are
435 * tracked for threads and thread groups. Most things considering
436 * CPU time want to group these counts together and treat all three
437 * of them in parallel.
438 */
439struct task_cputime {
440 cputime_t utime;
441 cputime_t stime;
442 unsigned long long sum_exec_runtime;
443};
444/* Alternate field names when used to cache expirations. */
445#define prof_exp stime
446#define virt_exp utime
447#define sched_exp sum_exec_runtime
448
449/**
450 * struct thread_group_cputime - thread group interval timer counts
451 * @totals: thread group interval timers; substructure for
452 * uniprocessor kernel, per-cpu for SMP kernel.
453 *
454 * This structure contains the version of task_cputime, above, that is
455 * used for thread group CPU clock calculations.
456 */
457#ifdef CONFIG_SMP
458struct thread_group_cputime {
459 struct task_cputime *totals;
460};
461#else
462struct thread_group_cputime {
463 struct task_cputime totals;
464};
465#endif
466
428/* 467/*
429 * NOTE! "signal_struct" does not have it's own 468 * NOTE! "signal_struct" does not have it's own
430 * locking, because a shared signal_struct always 469 * locking, because a shared signal_struct always
@@ -470,6 +509,17 @@ struct signal_struct {
470 cputime_t it_prof_expires, it_virt_expires; 509 cputime_t it_prof_expires, it_virt_expires;
471 cputime_t it_prof_incr, it_virt_incr; 510 cputime_t it_prof_incr, it_virt_incr;
472 511
512 /*
513 * Thread group totals for process CPU clocks.
514 * See thread_group_cputime(), et al, for details.
515 */
516 struct thread_group_cputime cputime;
517
518 /* Earliest-expiration cache. */
519 struct task_cputime cputime_expires;
520
521 struct list_head cpu_timers[3];
522
473 /* job control IDs */ 523 /* job control IDs */
474 524
475 /* 525 /*
@@ -500,7 +550,7 @@ struct signal_struct {
500 * Live threads maintain their own counters and add to these 550 * Live threads maintain their own counters and add to these
501 * in __exit_signal, except for the group leader. 551 * in __exit_signal, except for the group leader.
502 */ 552 */
503 cputime_t utime, stime, cutime, cstime; 553 cputime_t cutime, cstime;
504 cputime_t gtime; 554 cputime_t gtime;
505 cputime_t cgtime; 555 cputime_t cgtime;
506 unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; 556 unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
@@ -509,14 +559,6 @@ struct signal_struct {
509 struct task_io_accounting ioac; 559 struct task_io_accounting ioac;
510 560
511 /* 561 /*
512 * Cumulative ns of scheduled CPU time for dead threads in the
513 * group, not including a zombie group leader. (This only differs
514 * from jiffies_to_ns(utime + stime) if sched_clock uses something
515 * other than jiffies.)
516 */
517 unsigned long long sum_sched_runtime;
518
519 /*
520 * We don't bother to synchronize most readers of this at all, 562 * We don't bother to synchronize most readers of this at all,
521 * because there is no reader checking a limit that actually needs 563 * because there is no reader checking a limit that actually needs
522 * to get both rlim_cur and rlim_max atomically, and either one 564 * to get both rlim_cur and rlim_max atomically, and either one
@@ -527,8 +569,6 @@ struct signal_struct {
527 */ 569 */
528 struct rlimit rlim[RLIM_NLIMITS]; 570 struct rlimit rlim[RLIM_NLIMITS];
529 571
530 struct list_head cpu_timers[3];
531
532 /* keep the process-shared keyrings here so that they do the right 572 /* keep the process-shared keyrings here so that they do the right
533 * thing in threads created with CLONE_THREAD */ 573 * thing in threads created with CLONE_THREAD */
534#ifdef CONFIG_KEYS 574#ifdef CONFIG_KEYS
@@ -1134,8 +1174,7 @@ struct task_struct {
1134/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ 1174/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
1135 unsigned long min_flt, maj_flt; 1175 unsigned long min_flt, maj_flt;
1136 1176
1137 cputime_t it_prof_expires, it_virt_expires; 1177 struct task_cputime cputime_expires;
1138 unsigned long long it_sched_expires;
1139 struct list_head cpu_timers[3]; 1178 struct list_head cpu_timers[3];
1140 1179
1141/* process credentials */ 1180/* process credentials */
@@ -1585,6 +1624,7 @@ extern unsigned long long cpu_clock(int cpu);
1585 1624
1586extern unsigned long long 1625extern unsigned long long
1587task_sched_runtime(struct task_struct *task); 1626task_sched_runtime(struct task_struct *task);
1627extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
1588 1628
1589/* sched_exec is called by processes performing an exec */ 1629/* sched_exec is called by processes performing an exec */
1590#ifdef CONFIG_SMP 1630#ifdef CONFIG_SMP
@@ -2082,6 +2122,197 @@ static inline int spin_needbreak(spinlock_t *lock)
2082} 2122}
2083 2123
2084/* 2124/*
2125 * Thread group CPU time accounting.
2126 */
2127#ifdef CONFIG_SMP
2128
2129extern int thread_group_cputime_alloc_smp(struct task_struct *);
2130extern void thread_group_cputime_smp(struct task_struct *, struct task_cputime *);
2131
2132static inline void thread_group_cputime_init(struct signal_struct *sig)
2133{
2134 sig->cputime.totals = NULL;
2135}
2136
2137static inline int thread_group_cputime_clone_thread(struct task_struct *curr,
2138 struct task_struct *new)
2139{
2140 if (curr->signal->cputime.totals)
2141 return 0;
2142 return thread_group_cputime_alloc_smp(curr);
2143}
2144
2145static inline void thread_group_cputime_free(struct signal_struct *sig)
2146{
2147 free_percpu(sig->cputime.totals);
2148}
2149
2150/**
2151 * thread_group_cputime - Sum the thread group time fields across all CPUs.
2152 *
2153 * This is a wrapper for the real routine, thread_group_cputime_smp(). See
2154 * that routine for details.
2155 */
2156static inline void thread_group_cputime(
2157 struct task_struct *tsk,
2158 struct task_cputime *times)
2159{
2160 thread_group_cputime_smp(tsk, times);
2161}
2162
2163/**
2164 * thread_group_cputime_account_user - Maintain utime for a thread group.
2165 *
2166 * @tgtimes: Pointer to thread_group_cputime structure.
2167 * @cputime: Time value by which to increment the utime field of that
2168 * structure.
2169 *
2170 * If thread group time is being maintained, get the structure for the
2171 * running CPU and update the utime field there.
2172 */
2173static inline void thread_group_cputime_account_user(
2174 struct thread_group_cputime *tgtimes,
2175 cputime_t cputime)
2176{
2177 if (tgtimes->totals) {
2178 struct task_cputime *times;
2179
2180 times = per_cpu_ptr(tgtimes->totals, get_cpu());
2181 times->utime = cputime_add(times->utime, cputime);
2182 put_cpu_no_resched();
2183 }
2184}
2185
2186/**
2187 * thread_group_cputime_account_system - Maintain stime for a thread group.
2188 *
2189 * @tgtimes: Pointer to thread_group_cputime structure.
2190 * @cputime: Time value by which to increment the stime field of that
2191 * structure.
2192 *
2193 * If thread group time is being maintained, get the structure for the
2194 * running CPU and update the stime field there.
2195 */
2196static inline void thread_group_cputime_account_system(
2197 struct thread_group_cputime *tgtimes,
2198 cputime_t cputime)
2199{
2200 if (tgtimes->totals) {
2201 struct task_cputime *times;
2202
2203 times = per_cpu_ptr(tgtimes->totals, get_cpu());
2204 times->stime = cputime_add(times->stime, cputime);
2205 put_cpu_no_resched();
2206 }
2207}
2208
2209/**
2210 * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a
2211 * thread group.
2212 *
2213 * @tgtimes: Pointer to thread_group_cputime structure.
2214 * @ns: Time value by which to increment the sum_exec_runtime field
2215 * of that structure.
2216 *
2217 * If thread group time is being maintained, get the structure for the
2218 * running CPU and update the sum_exec_runtime field there.
2219 */
2220static inline void thread_group_cputime_account_exec_runtime(
2221 struct thread_group_cputime *tgtimes,
2222 unsigned long long ns)
2223{
2224 if (tgtimes->totals) {
2225 struct task_cputime *times;
2226
2227 times = per_cpu_ptr(tgtimes->totals, get_cpu());
2228 times->sum_exec_runtime += ns;
2229 put_cpu_no_resched();
2230 }
2231}
2232
2233#else /* CONFIG_SMP */
2234
2235static inline void thread_group_cputime_init(struct signal_struct *sig)
2236{
2237 sig->cputime.totals.utime = cputime_zero;
2238 sig->cputime.totals.stime = cputime_zero;
2239 sig->cputime.totals.sum_exec_runtime = 0;
2240}
2241
2242static inline int thread_group_cputime_alloc(struct task_struct *tsk)
2243{
2244 return 0;
2245}
2246
2247static inline void thread_group_cputime_free(struct signal_struct *sig)
2248{
2249}
2250
2251static inline int thread_group_cputime_clone_thread(struct task_struct *curr,
2252 struct task_struct *tsk)
2253{
2254}
2255
2256static inline void thread_group_cputime(struct task_struct *tsk,
2257 struct task_cputime *cputime)
2258{
2259 *cputime = tsk->signal->cputime.totals;
2260}
2261
2262static inline void thread_group_cputime_account_user(
2263 struct thread_group_cputime *tgtimes,
2264 cputime_t cputime)
2265{
2266 tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime);
2267}
2268
2269static inline void thread_group_cputime_account_system(
2270 struct thread_group_cputime *tgtimes,
2271 cputime_t cputime)
2272{
2273 tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime);
2274}
2275
2276static inline void thread_group_cputime_account_exec_runtime(
2277 struct thread_group_cputime *tgtimes,
2278 unsigned long long ns)
2279{
2280 tgtimes->totals->sum_exec_runtime += ns;
2281}
2282
2283#endif /* CONFIG_SMP */
2284
2285static inline void account_group_user_time(struct task_struct *tsk,
2286 cputime_t cputime)
2287{
2288 struct signal_struct *sig;
2289
2290 sig = tsk->signal;
2291 if (likely(sig))
2292 thread_group_cputime_account_user(&sig->cputime, cputime);
2293}
2294
2295static inline void account_group_system_time(struct task_struct *tsk,
2296 cputime_t cputime)
2297{
2298 struct signal_struct *sig;
2299
2300 sig = tsk->signal;
2301 if (likely(sig))
2302 thread_group_cputime_account_system(&sig->cputime, cputime);
2303}
2304
2305static inline void account_group_exec_runtime(struct task_struct *tsk,
2306 unsigned long long ns)
2307{
2308 struct signal_struct *sig;
2309
2310 sig = tsk->signal;
2311 if (likely(sig))
2312 thread_group_cputime_account_exec_runtime(&sig->cputime, ns);
2313}
2314
2315/*
2085 * Reevaluate whether the task has signals pending delivery. 2316 * Reevaluate whether the task has signals pending delivery.
2086 * Wake the task if so. 2317 * Wake the task if so.
2087 * This is required every time the blocked sigset_t changes. 2318 * This is required every time the blocked sigset_t changes.
diff --git a/include/linux/time.h b/include/linux/time.h
index e15206a7e82e..1b70b3c293e9 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -125,6 +125,9 @@ extern int timekeeping_valid_for_hres(void);
125extern void update_wall_time(void); 125extern void update_wall_time(void);
126extern void update_xtime_cache(u64 nsec); 126extern void update_xtime_cache(u64 nsec);
127 127
128struct tms;
129extern void do_sys_times(struct tms *);
130
128/** 131/**
129 * timespec_to_ns - Convert timespec to nanoseconds 132 * timespec_to_ns - Convert timespec to nanoseconds
130 * @ts: pointer to the timespec variable to be converted 133 * @ts: pointer to the timespec variable to be converted
diff --git a/kernel/compat.c b/kernel/compat.c
index 32c254a8ab9a..72650e39b3e6 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -23,6 +23,7 @@
23#include <linux/timex.h> 23#include <linux/timex.h>
24#include <linux/migrate.h> 24#include <linux/migrate.h>
25#include <linux/posix-timers.h> 25#include <linux/posix-timers.h>
26#include <linux/times.h>
26 27
27#include <asm/uaccess.h> 28#include <asm/uaccess.h>
28 29
@@ -150,49 +151,23 @@ asmlinkage long compat_sys_setitimer(int which,
150 return 0; 151 return 0;
151} 152}
152 153
154static compat_clock_t clock_t_to_compat_clock_t(clock_t x)
155{
156 return compat_jiffies_to_clock_t(clock_t_to_jiffies(x));
157}
158
153asmlinkage long compat_sys_times(struct compat_tms __user *tbuf) 159asmlinkage long compat_sys_times(struct compat_tms __user *tbuf)
154{ 160{
155 /*
156 * In the SMP world we might just be unlucky and have one of
157 * the times increment as we use it. Since the value is an
158 * atomically safe type this is just fine. Conceptually its
159 * as if the syscall took an instant longer to occur.
160 */
161 if (tbuf) { 161 if (tbuf) {
162 struct tms tms;
162 struct compat_tms tmp; 163 struct compat_tms tmp;
163 struct task_struct *tsk = current; 164
164 struct task_struct *t; 165 do_sys_times(&tms);
165 cputime_t utime, stime, cutime, cstime; 166 /* Convert our struct tms to the compat version. */
166 167 tmp.tms_utime = clock_t_to_compat_clock_t(tms.tms_utime);
167 read_lock(&tasklist_lock); 168 tmp.tms_stime = clock_t_to_compat_clock_t(tms.tms_stime);
168 utime = tsk->signal->utime; 169 tmp.tms_cutime = clock_t_to_compat_clock_t(tms.tms_cutime);
169 stime = tsk->signal->stime; 170 tmp.tms_cstime = clock_t_to_compat_clock_t(tms.tms_cstime);
170 t = tsk;
171 do {
172 utime = cputime_add(utime, t->utime);
173 stime = cputime_add(stime, t->stime);
174 t = next_thread(t);
175 } while (t != tsk);
176
177 /*
178 * While we have tasklist_lock read-locked, no dying thread
179 * can be updating current->signal->[us]time. Instead,
180 * we got their counts included in the live thread loop.
181 * However, another thread can come in right now and
182 * do a wait call that updates current->signal->c[us]time.
183 * To make sure we always see that pair updated atomically,
184 * we take the siglock around fetching them.
185 */
186 spin_lock_irq(&tsk->sighand->siglock);
187 cutime = tsk->signal->cutime;
188 cstime = tsk->signal->cstime;
189 spin_unlock_irq(&tsk->sighand->siglock);
190 read_unlock(&tasklist_lock);
191
192 tmp.tms_utime = compat_jiffies_to_clock_t(cputime_to_jiffies(utime));
193 tmp.tms_stime = compat_jiffies_to_clock_t(cputime_to_jiffies(stime));
194 tmp.tms_cutime = compat_jiffies_to_clock_t(cputime_to_jiffies(cutime));
195 tmp.tms_cstime = compat_jiffies_to_clock_t(cputime_to_jiffies(cstime));
196 if (copy_to_user(tbuf, &tmp, sizeof(tmp))) 171 if (copy_to_user(tbuf, &tmp, sizeof(tmp)))
197 return -EFAULT; 172 return -EFAULT;
198 } 173 }
diff --git a/kernel/exit.c b/kernel/exit.c
index 16395644a98f..40036ac04271 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -112,8 +112,6 @@ static void __exit_signal(struct task_struct *tsk)
112 * We won't ever get here for the group leader, since it 112 * We won't ever get here for the group leader, since it
113 * will have been the last reference on the signal_struct. 113 * will have been the last reference on the signal_struct.
114 */ 114 */
115 sig->utime = cputime_add(sig->utime, task_utime(tsk));
116 sig->stime = cputime_add(sig->stime, task_stime(tsk));
117 sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); 115 sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
118 sig->min_flt += tsk->min_flt; 116 sig->min_flt += tsk->min_flt;
119 sig->maj_flt += tsk->maj_flt; 117 sig->maj_flt += tsk->maj_flt;
@@ -122,7 +120,6 @@ static void __exit_signal(struct task_struct *tsk)
122 sig->inblock += task_io_get_inblock(tsk); 120 sig->inblock += task_io_get_inblock(tsk);
123 sig->oublock += task_io_get_oublock(tsk); 121 sig->oublock += task_io_get_oublock(tsk);
124 task_io_accounting_add(&sig->ioac, &tsk->ioac); 122 task_io_accounting_add(&sig->ioac, &tsk->ioac);
125 sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
126 sig = NULL; /* Marker for below. */ 123 sig = NULL; /* Marker for below. */
127 } 124 }
128 125
@@ -1294,6 +1291,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
1294 if (likely(!traced)) { 1291 if (likely(!traced)) {
1295 struct signal_struct *psig; 1292 struct signal_struct *psig;
1296 struct signal_struct *sig; 1293 struct signal_struct *sig;
1294 struct task_cputime cputime;
1297 1295
1298 /* 1296 /*
1299 * The resource counters for the group leader are in its 1297 * The resource counters for the group leader are in its
@@ -1309,20 +1307,23 @@ static int wait_task_zombie(struct task_struct *p, int options,
1309 * need to protect the access to p->parent->signal fields, 1307 * need to protect the access to p->parent->signal fields,
1310 * as other threads in the parent group can be right 1308 * as other threads in the parent group can be right
1311 * here reaping other children at the same time. 1309 * here reaping other children at the same time.
1310 *
1311 * We use thread_group_cputime() to get times for the thread
1312 * group, which consolidates times for all threads in the
1313 * group including the group leader.
1312 */ 1314 */
1313 spin_lock_irq(&p->parent->sighand->siglock); 1315 spin_lock_irq(&p->parent->sighand->siglock);
1314 psig = p->parent->signal; 1316 psig = p->parent->signal;
1315 sig = p->signal; 1317 sig = p->signal;
1318 thread_group_cputime(p, &cputime);
1316 psig->cutime = 1319 psig->cutime =
1317 cputime_add(psig->cutime, 1320 cputime_add(psig->cutime,
1318 cputime_add(p->utime, 1321 cputime_add(cputime.utime,
1319 cputime_add(sig->utime, 1322 sig->cutime));
1320 sig->cutime)));
1321 psig->cstime = 1323 psig->cstime =
1322 cputime_add(psig->cstime, 1324 cputime_add(psig->cstime,
1323 cputime_add(p->stime, 1325 cputime_add(cputime.stime,
1324 cputime_add(sig->stime, 1326 sig->cstime));
1325 sig->cstime)));
1326 psig->cgtime = 1327 psig->cgtime =
1327 cputime_add(psig->cgtime, 1328 cputime_add(psig->cgtime,
1328 cputime_add(p->gtime, 1329 cputime_add(p->gtime,
diff --git a/kernel/fork.c b/kernel/fork.c
index 7ce2ebe84796..a8ac2efb8e30 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -759,15 +759,44 @@ void __cleanup_sighand(struct sighand_struct *sighand)
759 kmem_cache_free(sighand_cachep, sighand); 759 kmem_cache_free(sighand_cachep, sighand);
760} 760}
761 761
762
763/*
764 * Initialize POSIX timer handling for a thread group.
765 */
766static void posix_cpu_timers_init_group(struct signal_struct *sig)
767{
768 /* Thread group counters. */
769 thread_group_cputime_init(sig);
770
771 /* Expiration times and increments. */
772 sig->it_virt_expires = cputime_zero;
773 sig->it_virt_incr = cputime_zero;
774 sig->it_prof_expires = cputime_zero;
775 sig->it_prof_incr = cputime_zero;
776
777 /* Cached expiration times. */
778 sig->cputime_expires.prof_exp = cputime_zero;
779 sig->cputime_expires.virt_exp = cputime_zero;
780 sig->cputime_expires.sched_exp = 0;
781
782 /* The timer lists. */
783 INIT_LIST_HEAD(&sig->cpu_timers[0]);
784 INIT_LIST_HEAD(&sig->cpu_timers[1]);
785 INIT_LIST_HEAD(&sig->cpu_timers[2]);
786}
787
762static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) 788static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
763{ 789{
764 struct signal_struct *sig; 790 struct signal_struct *sig;
765 int ret; 791 int ret;
766 792
767 if (clone_flags & CLONE_THREAD) { 793 if (clone_flags & CLONE_THREAD) {
768 atomic_inc(&current->signal->count); 794 ret = thread_group_cputime_clone_thread(current, tsk);
769 atomic_inc(&current->signal->live); 795 if (likely(!ret)) {
770 return 0; 796 atomic_inc(&current->signal->count);
797 atomic_inc(&current->signal->live);
798 }
799 return ret;
771 } 800 }
772 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); 801 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
773 tsk->signal = sig; 802 tsk->signal = sig;
@@ -795,15 +824,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
795 sig->it_real_incr.tv64 = 0; 824 sig->it_real_incr.tv64 = 0;
796 sig->real_timer.function = it_real_fn; 825 sig->real_timer.function = it_real_fn;
797 826
798 sig->it_virt_expires = cputime_zero;
799 sig->it_virt_incr = cputime_zero;
800 sig->it_prof_expires = cputime_zero;
801 sig->it_prof_incr = cputime_zero;
802
803 sig->leader = 0; /* session leadership doesn't inherit */ 827 sig->leader = 0; /* session leadership doesn't inherit */
804 sig->tty_old_pgrp = NULL; 828 sig->tty_old_pgrp = NULL;
805 829
806 sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; 830 sig->cutime = sig->cstime = cputime_zero;
807 sig->gtime = cputime_zero; 831 sig->gtime = cputime_zero;
808 sig->cgtime = cputime_zero; 832 sig->cgtime = cputime_zero;
809 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; 833 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
@@ -820,14 +844,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
820 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); 844 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
821 task_unlock(current->group_leader); 845 task_unlock(current->group_leader);
822 846
823 if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { 847 posix_cpu_timers_init_group(sig);
824 /* 848
825 * New sole thread in the process gets an expiry time
826 * of the whole CPU time limit.
827 */
828 tsk->it_prof_expires =
829 secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
830 }
831 acct_init_pacct(&sig->pacct); 849 acct_init_pacct(&sig->pacct);
832 850
833 tty_audit_fork(sig); 851 tty_audit_fork(sig);
@@ -837,6 +855,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
837 855
838void __cleanup_signal(struct signal_struct *sig) 856void __cleanup_signal(struct signal_struct *sig)
839{ 857{
858 thread_group_cputime_free(sig);
840 exit_thread_group_keys(sig); 859 exit_thread_group_keys(sig);
841 kmem_cache_free(signal_cachep, sig); 860 kmem_cache_free(signal_cachep, sig);
842} 861}
@@ -886,6 +905,19 @@ void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
886#endif /* CONFIG_MM_OWNER */ 905#endif /* CONFIG_MM_OWNER */
887 906
888/* 907/*
908 * Initialize POSIX timer handling for a single task.
909 */
910static void posix_cpu_timers_init(struct task_struct *tsk)
911{
912 tsk->cputime_expires.prof_exp = cputime_zero;
913 tsk->cputime_expires.virt_exp = cputime_zero;
914 tsk->cputime_expires.sched_exp = 0;
915 INIT_LIST_HEAD(&tsk->cpu_timers[0]);
916 INIT_LIST_HEAD(&tsk->cpu_timers[1]);
917 INIT_LIST_HEAD(&tsk->cpu_timers[2]);
918}
919
920/*
889 * This creates a new process as a copy of the old one, 921 * This creates a new process as a copy of the old one,
890 * but does not actually start it yet. 922 * but does not actually start it yet.
891 * 923 *
@@ -995,12 +1027,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
995 task_io_accounting_init(&p->ioac); 1027 task_io_accounting_init(&p->ioac);
996 acct_clear_integrals(p); 1028 acct_clear_integrals(p);
997 1029
998 p->it_virt_expires = cputime_zero; 1030 posix_cpu_timers_init(p);
999 p->it_prof_expires = cputime_zero;
1000 p->it_sched_expires = 0;
1001 INIT_LIST_HEAD(&p->cpu_timers[0]);
1002 INIT_LIST_HEAD(&p->cpu_timers[1]);
1003 INIT_LIST_HEAD(&p->cpu_timers[2]);
1004 1031
1005 p->lock_depth = -1; /* -1 = no lock */ 1032 p->lock_depth = -1; /* -1 = no lock */
1006 do_posix_clock_monotonic_gettime(&p->start_time); 1033 do_posix_clock_monotonic_gettime(&p->start_time);
@@ -1201,21 +1228,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1201 if (clone_flags & CLONE_THREAD) { 1228 if (clone_flags & CLONE_THREAD) {
1202 p->group_leader = current->group_leader; 1229 p->group_leader = current->group_leader;
1203 list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); 1230 list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
1204
1205 if (!cputime_eq(current->signal->it_virt_expires,
1206 cputime_zero) ||
1207 !cputime_eq(current->signal->it_prof_expires,
1208 cputime_zero) ||
1209 current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY ||
1210 !list_empty(&current->signal->cpu_timers[0]) ||
1211 !list_empty(&current->signal->cpu_timers[1]) ||
1212 !list_empty(&current->signal->cpu_timers[2])) {
1213 /*
1214 * Have child wake up on its first tick to check
1215 * for process CPU timers.
1216 */
1217 p->it_prof_expires = jiffies_to_cputime(1);
1218 }
1219 } 1231 }
1220 1232
1221 if (likely(p->pid)) { 1233 if (likely(p->pid)) {
diff --git a/kernel/itimer.c b/kernel/itimer.c
index ab982747d9bd..db7c358b9a02 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -55,17 +55,15 @@ int do_getitimer(int which, struct itimerval *value)
55 spin_unlock_irq(&tsk->sighand->siglock); 55 spin_unlock_irq(&tsk->sighand->siglock);
56 break; 56 break;
57 case ITIMER_VIRTUAL: 57 case ITIMER_VIRTUAL:
58 read_lock(&tasklist_lock);
59 spin_lock_irq(&tsk->sighand->siglock); 58 spin_lock_irq(&tsk->sighand->siglock);
60 cval = tsk->signal->it_virt_expires; 59 cval = tsk->signal->it_virt_expires;
61 cinterval = tsk->signal->it_virt_incr; 60 cinterval = tsk->signal->it_virt_incr;
62 if (!cputime_eq(cval, cputime_zero)) { 61 if (!cputime_eq(cval, cputime_zero)) {
63 struct task_struct *t = tsk; 62 struct task_cputime cputime;
64 cputime_t utime = tsk->signal->utime; 63 cputime_t utime;
65 do { 64
66 utime = cputime_add(utime, t->utime); 65 thread_group_cputime(tsk, &cputime);
67 t = next_thread(t); 66 utime = cputime.utime;
68 } while (t != tsk);
69 if (cputime_le(cval, utime)) { /* about to fire */ 67 if (cputime_le(cval, utime)) { /* about to fire */
70 cval = jiffies_to_cputime(1); 68 cval = jiffies_to_cputime(1);
71 } else { 69 } else {
@@ -73,25 +71,19 @@ int do_getitimer(int which, struct itimerval *value)
73 } 71 }
74 } 72 }
75 spin_unlock_irq(&tsk->sighand->siglock); 73 spin_unlock_irq(&tsk->sighand->siglock);
76 read_unlock(&tasklist_lock);
77 cputime_to_timeval(cval, &value->it_value); 74 cputime_to_timeval(cval, &value->it_value);
78 cputime_to_timeval(cinterval, &value->it_interval); 75 cputime_to_timeval(cinterval, &value->it_interval);
79 break; 76 break;
80 case ITIMER_PROF: 77 case ITIMER_PROF:
81 read_lock(&tasklist_lock);
82 spin_lock_irq(&tsk->sighand->siglock); 78 spin_lock_irq(&tsk->sighand->siglock);
83 cval = tsk->signal->it_prof_expires; 79 cval = tsk->signal->it_prof_expires;
84 cinterval = tsk->signal->it_prof_incr; 80 cinterval = tsk->signal->it_prof_incr;
85 if (!cputime_eq(cval, cputime_zero)) { 81 if (!cputime_eq(cval, cputime_zero)) {
86 struct task_struct *t = tsk; 82 struct task_cputime times;
87 cputime_t ptime = cputime_add(tsk->signal->utime, 83 cputime_t ptime;
88 tsk->signal->stime); 84
89 do { 85 thread_group_cputime(tsk, &times);
90 ptime = cputime_add(ptime, 86 ptime = cputime_add(times.utime, times.stime);
91 cputime_add(t->utime,
92 t->stime));
93 t = next_thread(t);
94 } while (t != tsk);
95 if (cputime_le(cval, ptime)) { /* about to fire */ 87 if (cputime_le(cval, ptime)) { /* about to fire */
96 cval = jiffies_to_cputime(1); 88 cval = jiffies_to_cputime(1);
97 } else { 89 } else {
@@ -99,7 +91,6 @@ int do_getitimer(int which, struct itimerval *value)
99 } 91 }
100 } 92 }
101 spin_unlock_irq(&tsk->sighand->siglock); 93 spin_unlock_irq(&tsk->sighand->siglock);
102 read_unlock(&tasklist_lock);
103 cputime_to_timeval(cval, &value->it_value); 94 cputime_to_timeval(cval, &value->it_value);
104 cputime_to_timeval(cinterval, &value->it_interval); 95 cputime_to_timeval(cinterval, &value->it_interval);
105 break; 96 break;
@@ -185,7 +176,6 @@ again:
185 case ITIMER_VIRTUAL: 176 case ITIMER_VIRTUAL:
186 nval = timeval_to_cputime(&value->it_value); 177 nval = timeval_to_cputime(&value->it_value);
187 ninterval = timeval_to_cputime(&value->it_interval); 178 ninterval = timeval_to_cputime(&value->it_interval);
188 read_lock(&tasklist_lock);
189 spin_lock_irq(&tsk->sighand->siglock); 179 spin_lock_irq(&tsk->sighand->siglock);
190 cval = tsk->signal->it_virt_expires; 180 cval = tsk->signal->it_virt_expires;
191 cinterval = tsk->signal->it_virt_incr; 181 cinterval = tsk->signal->it_virt_incr;
@@ -200,7 +190,6 @@ again:
200 tsk->signal->it_virt_expires = nval; 190 tsk->signal->it_virt_expires = nval;
201 tsk->signal->it_virt_incr = ninterval; 191 tsk->signal->it_virt_incr = ninterval;
202 spin_unlock_irq(&tsk->sighand->siglock); 192 spin_unlock_irq(&tsk->sighand->siglock);
203 read_unlock(&tasklist_lock);
204 if (ovalue) { 193 if (ovalue) {
205 cputime_to_timeval(cval, &ovalue->it_value); 194 cputime_to_timeval(cval, &ovalue->it_value);
206 cputime_to_timeval(cinterval, &ovalue->it_interval); 195 cputime_to_timeval(cinterval, &ovalue->it_interval);
@@ -209,7 +198,6 @@ again:
209 case ITIMER_PROF: 198 case ITIMER_PROF:
210 nval = timeval_to_cputime(&value->it_value); 199 nval = timeval_to_cputime(&value->it_value);
211 ninterval = timeval_to_cputime(&value->it_interval); 200 ninterval = timeval_to_cputime(&value->it_interval);
212 read_lock(&tasklist_lock);
213 spin_lock_irq(&tsk->sighand->siglock); 201 spin_lock_irq(&tsk->sighand->siglock);
214 cval = tsk->signal->it_prof_expires; 202 cval = tsk->signal->it_prof_expires;
215 cinterval = tsk->signal->it_prof_incr; 203 cinterval = tsk->signal->it_prof_incr;
@@ -224,7 +212,6 @@ again:
224 tsk->signal->it_prof_expires = nval; 212 tsk->signal->it_prof_expires = nval;
225 tsk->signal->it_prof_incr = ninterval; 213 tsk->signal->it_prof_incr = ninterval;
226 spin_unlock_irq(&tsk->sighand->siglock); 214 spin_unlock_irq(&tsk->sighand->siglock);
227 read_unlock(&tasklist_lock);
228 if (ovalue) { 215 if (ovalue) {
229 cputime_to_timeval(cval, &ovalue->it_value); 216 cputime_to_timeval(cval, &ovalue->it_value);
230 cputime_to_timeval(cinterval, &ovalue->it_interval); 217 cputime_to_timeval(cinterval, &ovalue->it_interval);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index c42a03aef36f..dba1c334c3e8 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -8,6 +8,99 @@
8#include <linux/math64.h> 8#include <linux/math64.h>
9#include <asm/uaccess.h> 9#include <asm/uaccess.h>
10 10
11#ifdef CONFIG_SMP
12/*
13 * Allocate the thread_group_cputime structure appropriately for SMP kernels
14 * and fill in the current values of the fields. Called from copy_signal()
15 * via thread_group_cputime_clone_thread() when adding a second or subsequent
16 * thread to a thread group. Assumes interrupts are enabled when called.
17 */
18int thread_group_cputime_alloc_smp(struct task_struct *tsk)
19{
20 struct signal_struct *sig = tsk->signal;
21 struct task_cputime *cputime;
22
23 /*
24 * If we have multiple threads and we don't already have a
25 * per-CPU task_cputime struct, allocate one and fill it in with
26 * the times accumulated so far.
27 */
28 if (sig->cputime.totals)
29 return 0;
30 cputime = alloc_percpu(struct task_cputime);
31 if (cputime == NULL)
32 return -ENOMEM;
33 read_lock(&tasklist_lock);
34 spin_lock_irq(&tsk->sighand->siglock);
35 if (sig->cputime.totals) {
36 spin_unlock_irq(&tsk->sighand->siglock);
37 read_unlock(&tasklist_lock);
38 free_percpu(cputime);
39 return 0;
40 }
41 sig->cputime.totals = cputime;
42 cputime = per_cpu_ptr(sig->cputime.totals, get_cpu());
43 cputime->utime = tsk->utime;
44 cputime->stime = tsk->stime;
45 cputime->sum_exec_runtime = tsk->se.sum_exec_runtime;
46 put_cpu_no_resched();
47 spin_unlock_irq(&tsk->sighand->siglock);
48 read_unlock(&tasklist_lock);
49 return 0;
50}
51
52/**
53 * thread_group_cputime_smp - Sum the thread group time fields across all CPUs.
54 *
55 * @tsk: The task we use to identify the thread group.
56 * @times: task_cputime structure in which we return the summed fields.
57 *
58 * Walk the list of CPUs to sum the per-CPU time fields in the thread group
59 * time structure.
60 */
61void thread_group_cputime_smp(
62 struct task_struct *tsk,
63 struct task_cputime *times)
64{
65 struct signal_struct *sig;
66 int i;
67 struct task_cputime *tot;
68
69 sig = tsk->signal;
70 if (unlikely(!sig) || !sig->cputime.totals) {
71 times->utime = tsk->utime;
72 times->stime = tsk->stime;
73 times->sum_exec_runtime = tsk->se.sum_exec_runtime;
74 return;
75 }
76 times->stime = times->utime = cputime_zero;
77 times->sum_exec_runtime = 0;
78 for_each_possible_cpu(i) {
79 tot = per_cpu_ptr(tsk->signal->cputime.totals, i);
80 times->utime = cputime_add(times->utime, tot->utime);
81 times->stime = cputime_add(times->stime, tot->stime);
82 times->sum_exec_runtime += tot->sum_exec_runtime;
83 }
84}
85
86#endif /* CONFIG_SMP */
87
88/*
89 * Called after updating RLIMIT_CPU to set timer expiration if necessary.
90 */
91void update_rlimit_cpu(unsigned long rlim_new)
92{
93 cputime_t cputime;
94
95 cputime = secs_to_cputime(rlim_new);
96 if (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
97 cputime_lt(current->signal->it_prof_expires, cputime)) {
98 spin_lock_irq(&current->sighand->siglock);
99 set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
100 spin_unlock_irq(&current->sighand->siglock);
101 }
102}
103
11static int check_clock(const clockid_t which_clock) 104static int check_clock(const clockid_t which_clock)
12{ 105{
13 int error = 0; 106 int error = 0;
@@ -158,10 +251,6 @@ static inline cputime_t virt_ticks(struct task_struct *p)
158{ 251{
159 return p->utime; 252 return p->utime;
160} 253}
161static inline unsigned long long sched_ns(struct task_struct *p)
162{
163 return task_sched_runtime(p);
164}
165 254
166int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) 255int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
167{ 256{
@@ -211,7 +300,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
211 cpu->cpu = virt_ticks(p); 300 cpu->cpu = virt_ticks(p);
212 break; 301 break;
213 case CPUCLOCK_SCHED: 302 case CPUCLOCK_SCHED:
214 cpu->sched = sched_ns(p); 303 cpu->sched = task_sched_runtime(p);
215 break; 304 break;
216 } 305 }
217 return 0; 306 return 0;
@@ -226,31 +315,20 @@ static int cpu_clock_sample_group_locked(unsigned int clock_idx,
226 struct task_struct *p, 315 struct task_struct *p,
227 union cpu_time_count *cpu) 316 union cpu_time_count *cpu)
228{ 317{
229 struct task_struct *t = p; 318 struct task_cputime cputime;
230 switch (clock_idx) { 319
320 thread_group_cputime(p, &cputime);
321 switch (clock_idx) {
231 default: 322 default:
232 return -EINVAL; 323 return -EINVAL;
233 case CPUCLOCK_PROF: 324 case CPUCLOCK_PROF:
234 cpu->cpu = cputime_add(p->signal->utime, p->signal->stime); 325 cpu->cpu = cputime_add(cputime.utime, cputime.stime);
235 do {
236 cpu->cpu = cputime_add(cpu->cpu, prof_ticks(t));
237 t = next_thread(t);
238 } while (t != p);
239 break; 326 break;
240 case CPUCLOCK_VIRT: 327 case CPUCLOCK_VIRT:
241 cpu->cpu = p->signal->utime; 328 cpu->cpu = cputime.utime;
242 do {
243 cpu->cpu = cputime_add(cpu->cpu, virt_ticks(t));
244 t = next_thread(t);
245 } while (t != p);
246 break; 329 break;
247 case CPUCLOCK_SCHED: 330 case CPUCLOCK_SCHED:
248 cpu->sched = p->signal->sum_sched_runtime; 331 cpu->sched = thread_group_sched_runtime(p);
249 /* Add in each other live thread. */
250 while ((t = next_thread(t)) != p) {
251 cpu->sched += t->se.sum_exec_runtime;
252 }
253 cpu->sched += sched_ns(p);
254 break; 332 break;
255 } 333 }
256 return 0; 334 return 0;
@@ -471,80 +549,11 @@ void posix_cpu_timers_exit(struct task_struct *tsk)
471} 549}
472void posix_cpu_timers_exit_group(struct task_struct *tsk) 550void posix_cpu_timers_exit_group(struct task_struct *tsk)
473{ 551{
474 cleanup_timers(tsk->signal->cpu_timers, 552 struct task_cputime cputime;
475 cputime_add(tsk->utime, tsk->signal->utime),
476 cputime_add(tsk->stime, tsk->signal->stime),
477 tsk->se.sum_exec_runtime + tsk->signal->sum_sched_runtime);
478}
479
480
481/*
482 * Set the expiry times of all the threads in the process so one of them
483 * will go off before the process cumulative expiry total is reached.
484 */
485static void process_timer_rebalance(struct task_struct *p,
486 unsigned int clock_idx,
487 union cpu_time_count expires,
488 union cpu_time_count val)
489{
490 cputime_t ticks, left;
491 unsigned long long ns, nsleft;
492 struct task_struct *t = p;
493 unsigned int nthreads = atomic_read(&p->signal->live);
494
495 if (!nthreads)
496 return;
497 553
498 switch (clock_idx) { 554 thread_group_cputime(tsk, &cputime);
499 default: 555 cleanup_timers(tsk->signal->cpu_timers,
500 BUG(); 556 cputime.utime, cputime.stime, cputime.sum_exec_runtime);
501 break;
502 case CPUCLOCK_PROF:
503 left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
504 nthreads);
505 do {
506 if (likely(!(t->flags & PF_EXITING))) {
507 ticks = cputime_add(prof_ticks(t), left);
508 if (cputime_eq(t->it_prof_expires,
509 cputime_zero) ||
510 cputime_gt(t->it_prof_expires, ticks)) {
511 t->it_prof_expires = ticks;
512 }
513 }
514 t = next_thread(t);
515 } while (t != p);
516 break;
517 case CPUCLOCK_VIRT:
518 left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
519 nthreads);
520 do {
521 if (likely(!(t->flags & PF_EXITING))) {
522 ticks = cputime_add(virt_ticks(t), left);
523 if (cputime_eq(t->it_virt_expires,
524 cputime_zero) ||
525 cputime_gt(t->it_virt_expires, ticks)) {
526 t->it_virt_expires = ticks;
527 }
528 }
529 t = next_thread(t);
530 } while (t != p);
531 break;
532 case CPUCLOCK_SCHED:
533 nsleft = expires.sched - val.sched;
534 do_div(nsleft, nthreads);
535 nsleft = max_t(unsigned long long, nsleft, 1);
536 do {
537 if (likely(!(t->flags & PF_EXITING))) {
538 ns = t->se.sum_exec_runtime + nsleft;
539 if (t->it_sched_expires == 0 ||
540 t->it_sched_expires > ns) {
541 t->it_sched_expires = ns;
542 }
543 }
544 t = next_thread(t);
545 } while (t != p);
546 break;
547 }
548} 557}
549 558
550static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) 559static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
@@ -608,29 +617,32 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
608 default: 617 default:
609 BUG(); 618 BUG();
610 case CPUCLOCK_PROF: 619 case CPUCLOCK_PROF:
611 if (cputime_eq(p->it_prof_expires, 620 if (cputime_eq(p->cputime_expires.prof_exp,
612 cputime_zero) || 621 cputime_zero) ||
613 cputime_gt(p->it_prof_expires, 622 cputime_gt(p->cputime_expires.prof_exp,
614 nt->expires.cpu)) 623 nt->expires.cpu))
615 p->it_prof_expires = nt->expires.cpu; 624 p->cputime_expires.prof_exp =
625 nt->expires.cpu;
616 break; 626 break;
617 case CPUCLOCK_VIRT: 627 case CPUCLOCK_VIRT:
618 if (cputime_eq(p->it_virt_expires, 628 if (cputime_eq(p->cputime_expires.virt_exp,
619 cputime_zero) || 629 cputime_zero) ||
620 cputime_gt(p->it_virt_expires, 630 cputime_gt(p->cputime_expires.virt_exp,
621 nt->expires.cpu)) 631 nt->expires.cpu))
622 p->it_virt_expires = nt->expires.cpu; 632 p->cputime_expires.virt_exp =
633 nt->expires.cpu;
623 break; 634 break;
624 case CPUCLOCK_SCHED: 635 case CPUCLOCK_SCHED:
625 if (p->it_sched_expires == 0 || 636 if (p->cputime_expires.sched_exp == 0 ||
626 p->it_sched_expires > nt->expires.sched) 637 p->cputime_expires.sched_exp >
627 p->it_sched_expires = nt->expires.sched; 638 nt->expires.sched)
639 p->cputime_expires.sched_exp =
640 nt->expires.sched;
628 break; 641 break;
629 } 642 }
630 } else { 643 } else {
631 /* 644 /*
632 * For a process timer, we must balance 645 * For a process timer, set the cached expiration time.
633 * all the live threads' expirations.
634 */ 646 */
635 switch (CPUCLOCK_WHICH(timer->it_clock)) { 647 switch (CPUCLOCK_WHICH(timer->it_clock)) {
636 default: 648 default:
@@ -641,7 +653,9 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
641 cputime_lt(p->signal->it_virt_expires, 653 cputime_lt(p->signal->it_virt_expires,
642 timer->it.cpu.expires.cpu)) 654 timer->it.cpu.expires.cpu))
643 break; 655 break;
644 goto rebalance; 656 p->signal->cputime_expires.virt_exp =
657 timer->it.cpu.expires.cpu;
658 break;
645 case CPUCLOCK_PROF: 659 case CPUCLOCK_PROF:
646 if (!cputime_eq(p->signal->it_prof_expires, 660 if (!cputime_eq(p->signal->it_prof_expires,
647 cputime_zero) && 661 cputime_zero) &&
@@ -652,13 +666,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
652 if (i != RLIM_INFINITY && 666 if (i != RLIM_INFINITY &&
653 i <= cputime_to_secs(timer->it.cpu.expires.cpu)) 667 i <= cputime_to_secs(timer->it.cpu.expires.cpu))
654 break; 668 break;
655 goto rebalance; 669 p->signal->cputime_expires.prof_exp =
670 timer->it.cpu.expires.cpu;
671 break;
656 case CPUCLOCK_SCHED: 672 case CPUCLOCK_SCHED:
657 rebalance: 673 p->signal->cputime_expires.sched_exp =
658 process_timer_rebalance( 674 timer->it.cpu.expires.sched;
659 timer->it.cpu.task,
660 CPUCLOCK_WHICH(timer->it_clock),
661 timer->it.cpu.expires, now);
662 break; 675 break;
663 } 676 }
664 } 677 }
@@ -969,13 +982,13 @@ static void check_thread_timers(struct task_struct *tsk,
969 struct signal_struct *const sig = tsk->signal; 982 struct signal_struct *const sig = tsk->signal;
970 983
971 maxfire = 20; 984 maxfire = 20;
972 tsk->it_prof_expires = cputime_zero; 985 tsk->cputime_expires.prof_exp = cputime_zero;
973 while (!list_empty(timers)) { 986 while (!list_empty(timers)) {
974 struct cpu_timer_list *t = list_first_entry(timers, 987 struct cpu_timer_list *t = list_first_entry(timers,
975 struct cpu_timer_list, 988 struct cpu_timer_list,
976 entry); 989 entry);
977 if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) { 990 if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) {
978 tsk->it_prof_expires = t->expires.cpu; 991 tsk->cputime_expires.prof_exp = t->expires.cpu;
979 break; 992 break;
980 } 993 }
981 t->firing = 1; 994 t->firing = 1;
@@ -984,13 +997,13 @@ static void check_thread_timers(struct task_struct *tsk,
984 997
985 ++timers; 998 ++timers;
986 maxfire = 20; 999 maxfire = 20;
987 tsk->it_virt_expires = cputime_zero; 1000 tsk->cputime_expires.virt_exp = cputime_zero;
988 while (!list_empty(timers)) { 1001 while (!list_empty(timers)) {
989 struct cpu_timer_list *t = list_first_entry(timers, 1002 struct cpu_timer_list *t = list_first_entry(timers,
990 struct cpu_timer_list, 1003 struct cpu_timer_list,
991 entry); 1004 entry);
992 if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) { 1005 if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) {
993 tsk->it_virt_expires = t->expires.cpu; 1006 tsk->cputime_expires.virt_exp = t->expires.cpu;
994 break; 1007 break;
995 } 1008 }
996 t->firing = 1; 1009 t->firing = 1;
@@ -999,13 +1012,13 @@ static void check_thread_timers(struct task_struct *tsk,
999 1012
1000 ++timers; 1013 ++timers;
1001 maxfire = 20; 1014 maxfire = 20;
1002 tsk->it_sched_expires = 0; 1015 tsk->cputime_expires.sched_exp = 0;
1003 while (!list_empty(timers)) { 1016 while (!list_empty(timers)) {
1004 struct cpu_timer_list *t = list_first_entry(timers, 1017 struct cpu_timer_list *t = list_first_entry(timers,
1005 struct cpu_timer_list, 1018 struct cpu_timer_list,
1006 entry); 1019 entry);
1007 if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) { 1020 if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
1008 tsk->it_sched_expires = t->expires.sched; 1021 tsk->cputime_expires.sched_exp = t->expires.sched;
1009 break; 1022 break;
1010 } 1023 }
1011 t->firing = 1; 1024 t->firing = 1;
@@ -1055,10 +1068,10 @@ static void check_process_timers(struct task_struct *tsk,
1055{ 1068{
1056 int maxfire; 1069 int maxfire;
1057 struct signal_struct *const sig = tsk->signal; 1070 struct signal_struct *const sig = tsk->signal;
1058 cputime_t utime, stime, ptime, virt_expires, prof_expires; 1071 cputime_t utime, ptime, virt_expires, prof_expires;
1059 unsigned long long sum_sched_runtime, sched_expires; 1072 unsigned long long sum_sched_runtime, sched_expires;
1060 struct task_struct *t;
1061 struct list_head *timers = sig->cpu_timers; 1073 struct list_head *timers = sig->cpu_timers;
1074 struct task_cputime cputime;
1062 1075
1063 /* 1076 /*
1064 * Don't sample the current process CPU clocks if there are no timers. 1077 * Don't sample the current process CPU clocks if there are no timers.
@@ -1074,18 +1087,10 @@ static void check_process_timers(struct task_struct *tsk,
1074 /* 1087 /*
1075 * Collect the current process totals. 1088 * Collect the current process totals.
1076 */ 1089 */
1077 utime = sig->utime; 1090 thread_group_cputime(tsk, &cputime);
1078 stime = sig->stime; 1091 utime = cputime.utime;
1079 sum_sched_runtime = sig->sum_sched_runtime; 1092 ptime = cputime_add(utime, cputime.stime);
1080 t = tsk; 1093 sum_sched_runtime = cputime.sum_exec_runtime;
1081 do {
1082 utime = cputime_add(utime, t->utime);
1083 stime = cputime_add(stime, t->stime);
1084 sum_sched_runtime += t->se.sum_exec_runtime;
1085 t = next_thread(t);
1086 } while (t != tsk);
1087 ptime = cputime_add(utime, stime);
1088
1089 maxfire = 20; 1094 maxfire = 20;
1090 prof_expires = cputime_zero; 1095 prof_expires = cputime_zero;
1091 while (!list_empty(timers)) { 1096 while (!list_empty(timers)) {
@@ -1193,60 +1198,18 @@ static void check_process_timers(struct task_struct *tsk,
1193 } 1198 }
1194 } 1199 }
1195 1200
1196 if (!cputime_eq(prof_expires, cputime_zero) || 1201 if (!cputime_eq(prof_expires, cputime_zero) &&
1197 !cputime_eq(virt_expires, cputime_zero) || 1202 (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) ||
1198 sched_expires != 0) { 1203 cputime_gt(sig->cputime_expires.prof_exp, prof_expires)))
1199 /* 1204 sig->cputime_expires.prof_exp = prof_expires;
1200 * Rebalance the threads' expiry times for the remaining 1205 if (!cputime_eq(virt_expires, cputime_zero) &&
1201 * process CPU timers. 1206 (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) ||
1202 */ 1207 cputime_gt(sig->cputime_expires.virt_exp, virt_expires)))
1203 1208 sig->cputime_expires.virt_exp = virt_expires;
1204 cputime_t prof_left, virt_left, ticks; 1209 if (sched_expires != 0 &&
1205 unsigned long long sched_left, sched; 1210 (sig->cputime_expires.sched_exp == 0 ||
1206 const unsigned int nthreads = atomic_read(&sig->live); 1211 sig->cputime_expires.sched_exp > sched_expires))
1207 1212 sig->cputime_expires.sched_exp = sched_expires;
1208 if (!nthreads)
1209 return;
1210
1211 prof_left = cputime_sub(prof_expires, utime);
1212 prof_left = cputime_sub(prof_left, stime);
1213 prof_left = cputime_div_non_zero(prof_left, nthreads);
1214 virt_left = cputime_sub(virt_expires, utime);
1215 virt_left = cputime_div_non_zero(virt_left, nthreads);
1216 if (sched_expires) {
1217 sched_left = sched_expires - sum_sched_runtime;
1218 do_div(sched_left, nthreads);
1219 sched_left = max_t(unsigned long long, sched_left, 1);
1220 } else {
1221 sched_left = 0;
1222 }
1223 t = tsk;
1224 do {
1225 if (unlikely(t->flags & PF_EXITING))
1226 continue;
1227
1228 ticks = cputime_add(cputime_add(t->utime, t->stime),
1229 prof_left);
1230 if (!cputime_eq(prof_expires, cputime_zero) &&
1231 (cputime_eq(t->it_prof_expires, cputime_zero) ||
1232 cputime_gt(t->it_prof_expires, ticks))) {
1233 t->it_prof_expires = ticks;
1234 }
1235
1236 ticks = cputime_add(t->utime, virt_left);
1237 if (!cputime_eq(virt_expires, cputime_zero) &&
1238 (cputime_eq(t->it_virt_expires, cputime_zero) ||
1239 cputime_gt(t->it_virt_expires, ticks))) {
1240 t->it_virt_expires = ticks;
1241 }
1242
1243 sched = t->se.sum_exec_runtime + sched_left;
1244 if (sched_expires && (t->it_sched_expires == 0 ||
1245 t->it_sched_expires > sched)) {
1246 t->it_sched_expires = sched;
1247 }
1248 } while ((t = next_thread(t)) != tsk);
1249 }
1250} 1213}
1251 1214
1252/* 1215/*
@@ -1314,6 +1277,78 @@ out:
1314 ++timer->it_requeue_pending; 1277 ++timer->it_requeue_pending;
1315} 1278}
1316 1279
1280/**
1281 * task_cputime_zero - Check a task_cputime struct for all zero fields.
1282 *
1283 * @cputime: The struct to compare.
1284 *
1285 * Checks @cputime to see if all fields are zero. Returns true if all fields
1286 * are zero, false if any field is nonzero.
1287 */
1288static inline int task_cputime_zero(const struct task_cputime *cputime)
1289{
1290 if (cputime_eq(cputime->utime, cputime_zero) &&
1291 cputime_eq(cputime->stime, cputime_zero) &&
1292 cputime->sum_exec_runtime == 0)
1293 return 1;
1294 return 0;
1295}
1296
1297/**
1298 * task_cputime_expired - Compare two task_cputime entities.
1299 *
1300 * @sample: The task_cputime structure to be checked for expiration.
1301 * @expires: Expiration times, against which @sample will be checked.
1302 *
1303 * Checks @sample against @expires to see if any field of @sample has expired.
1304 * Returns true if any field of the former is greater than the corresponding
1305 * field of the latter if the latter field is set. Otherwise returns false.
1306 */
1307static inline int task_cputime_expired(const struct task_cputime *sample,
1308 const struct task_cputime *expires)
1309{
1310 if (!cputime_eq(expires->utime, cputime_zero) &&
1311 cputime_ge(sample->utime, expires->utime))
1312 return 1;
1313 if (!cputime_eq(expires->stime, cputime_zero) &&
1314 cputime_ge(cputime_add(sample->utime, sample->stime),
1315 expires->stime))
1316 return 1;
1317 if (expires->sum_exec_runtime != 0 &&
1318 sample->sum_exec_runtime >= expires->sum_exec_runtime)
1319 return 1;
1320 return 0;
1321}
1322
1323/**
1324 * fastpath_timer_check - POSIX CPU timers fast path.
1325 *
1326 * @tsk: The task (thread) being checked.
1327 * @sig: The signal pointer for that task.
1328 *
1329 * If there are no timers set return false. Otherwise snapshot the task and
1330 * thread group timers, then compare them with the corresponding expiration
1331 # times. Returns true if a timer has expired, else returns false.
1332 */
1333static inline int fastpath_timer_check(struct task_struct *tsk,
1334 struct signal_struct *sig)
1335{
1336 struct task_cputime task_sample = {
1337 .utime = tsk->utime,
1338 .stime = tsk->stime,
1339 .sum_exec_runtime = tsk->se.sum_exec_runtime
1340 };
1341 struct task_cputime group_sample;
1342
1343 if (task_cputime_zero(&tsk->cputime_expires) &&
1344 task_cputime_zero(&sig->cputime_expires))
1345 return 0;
1346 if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
1347 return 1;
1348 thread_group_cputime(tsk, &group_sample);
1349 return task_cputime_expired(&group_sample, &sig->cputime_expires);
1350}
1351
1317/* 1352/*
1318 * This is called from the timer interrupt handler. The irq handler has 1353 * This is called from the timer interrupt handler. The irq handler has
1319 * already updated our counts. We need to check if any timers fire now. 1354 * already updated our counts. We need to check if any timers fire now.
@@ -1323,30 +1358,29 @@ void run_posix_cpu_timers(struct task_struct *tsk)
1323{ 1358{
1324 LIST_HEAD(firing); 1359 LIST_HEAD(firing);
1325 struct k_itimer *timer, *next; 1360 struct k_itimer *timer, *next;
1361 struct signal_struct *sig;
1362 struct sighand_struct *sighand;
1363 unsigned long flags;
1326 1364
1327 BUG_ON(!irqs_disabled()); 1365 BUG_ON(!irqs_disabled());
1328 1366
1329#define UNEXPIRED(clock) \ 1367 /* Pick up tsk->signal and make sure it's valid. */
1330 (cputime_eq(tsk->it_##clock##_expires, cputime_zero) || \ 1368 sig = tsk->signal;
1331 cputime_lt(clock##_ticks(tsk), tsk->it_##clock##_expires))
1332
1333 if (UNEXPIRED(prof) && UNEXPIRED(virt) &&
1334 (tsk->it_sched_expires == 0 ||
1335 tsk->se.sum_exec_runtime < tsk->it_sched_expires))
1336 return;
1337
1338#undef UNEXPIRED
1339
1340 /* 1369 /*
1341 * Double-check with locks held. 1370 * The fast path checks that there are no expired thread or thread
1371 * group timers. If that's so, just return. Also check that
1372 * tsk->signal is non-NULL; this probably can't happen but cover the
1373 * possibility anyway.
1342 */ 1374 */
1343 read_lock(&tasklist_lock); 1375 if (unlikely(!sig) || !fastpath_timer_check(tsk, sig)) {
1344 if (likely(tsk->signal != NULL)) { 1376 return;
1345 spin_lock(&tsk->sighand->siglock); 1377 }
1346 1378 sighand = lock_task_sighand(tsk, &flags);
1379 if (likely(sighand)) {
1347 /* 1380 /*
1348 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N] 1381 * Here we take off tsk->signal->cpu_timers[N] and
1349 * all the timers that are firing, and put them on the firing list. 1382 * tsk->cpu_timers[N] all the timers that are firing, and
1383 * put them on the firing list.
1350 */ 1384 */
1351 check_thread_timers(tsk, &firing); 1385 check_thread_timers(tsk, &firing);
1352 check_process_timers(tsk, &firing); 1386 check_process_timers(tsk, &firing);
@@ -1359,9 +1393,8 @@ void run_posix_cpu_timers(struct task_struct *tsk)
1359 * that gets the timer lock before we do will give it up and 1393 * that gets the timer lock before we do will give it up and
1360 * spin until we've taken care of that timer below. 1394 * spin until we've taken care of that timer below.
1361 */ 1395 */
1362 spin_unlock(&tsk->sighand->siglock);
1363 } 1396 }
1364 read_unlock(&tasklist_lock); 1397 unlock_task_sighand(tsk, &flags);
1365 1398
1366 /* 1399 /*
1367 * Now that all the timers on our list have the firing flag, 1400 * Now that all the timers on our list have the firing flag,
@@ -1389,10 +1422,9 @@ void run_posix_cpu_timers(struct task_struct *tsk)
1389 1422
1390/* 1423/*
1391 * Set one of the process-wide special case CPU timers. 1424 * Set one of the process-wide special case CPU timers.
1392 * The tasklist_lock and tsk->sighand->siglock must be held by the caller. 1425 * The tsk->sighand->siglock must be held by the caller.
1393 * The oldval argument is null for the RLIMIT_CPU timer, where *newval is 1426 * The *newval argument is relative and we update it to be absolute, *oldval
1394 * absolute; non-null for ITIMER_*, where *newval is relative and we update 1427 * is absolute and we update it to be relative.
1395 * it to be absolute, *oldval is absolute and we update it to be relative.
1396 */ 1428 */
1397void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, 1429void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1398 cputime_t *newval, cputime_t *oldval) 1430 cputime_t *newval, cputime_t *oldval)
@@ -1435,13 +1467,14 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1435 cputime_ge(list_first_entry(head, 1467 cputime_ge(list_first_entry(head,
1436 struct cpu_timer_list, entry)->expires.cpu, 1468 struct cpu_timer_list, entry)->expires.cpu,
1437 *newval)) { 1469 *newval)) {
1438 /* 1470 switch (clock_idx) {
1439 * Rejigger each thread's expiry time so that one will 1471 case CPUCLOCK_PROF:
1440 * notice before we hit the process-cumulative expiry time. 1472 tsk->signal->cputime_expires.prof_exp = *newval;
1441 */ 1473 break;
1442 union cpu_time_count expires = { .sched = 0 }; 1474 case CPUCLOCK_VIRT:
1443 expires.cpu = *newval; 1475 tsk->signal->cputime_expires.virt_exp = *newval;
1444 process_timer_rebalance(tsk, clock_idx, expires, now); 1476 break;
1477 }
1445 } 1478 }
1446} 1479}
1447 1480
diff --git a/kernel/sched.c b/kernel/sched.c
index cc1f81b50b82..c51b5d276665 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4037,23 +4037,56 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
4037EXPORT_PER_CPU_SYMBOL(kstat); 4037EXPORT_PER_CPU_SYMBOL(kstat);
4038 4038
4039/* 4039/*
4040 * Return any ns on the sched_clock that have not yet been banked in
4041 * @p in case that task is currently running.
4042 *
4043 * Called with task_rq_lock() held on @rq.
4044 */
4045static unsigned long long task_delta_exec(struct task_struct *p, struct rq *rq)
4046{
4047 if (task_current(rq, p)) {
4048 u64 delta_exec;
4049
4050 update_rq_clock(rq);
4051 delta_exec = rq->clock - p->se.exec_start;
4052 if ((s64)delta_exec > 0)
4053 return delta_exec;
4054 }
4055 return 0;
4056}
4057
4058/*
4040 * Return p->sum_exec_runtime plus any more ns on the sched_clock 4059 * Return p->sum_exec_runtime plus any more ns on the sched_clock
4041 * that have not yet been banked in case the task is currently running. 4060 * that have not yet been banked in case the task is currently running.
4042 */ 4061 */
4043unsigned long long task_sched_runtime(struct task_struct *p) 4062unsigned long long task_sched_runtime(struct task_struct *p)
4044{ 4063{
4045 unsigned long flags; 4064 unsigned long flags;
4046 u64 ns, delta_exec; 4065 u64 ns;
4047 struct rq *rq; 4066 struct rq *rq;
4048 4067
4049 rq = task_rq_lock(p, &flags); 4068 rq = task_rq_lock(p, &flags);
4050 ns = p->se.sum_exec_runtime; 4069 ns = p->se.sum_exec_runtime + task_delta_exec(p, rq);
4051 if (task_current(rq, p)) { 4070 task_rq_unlock(rq, &flags);
4052 update_rq_clock(rq); 4071
4053 delta_exec = rq->clock - p->se.exec_start; 4072 return ns;
4054 if ((s64)delta_exec > 0) 4073}
4055 ns += delta_exec; 4074
4056 } 4075/*
4076 * Return sum_exec_runtime for the thread group plus any more ns on the
4077 * sched_clock that have not yet been banked in case the task is currently
4078 * running.
4079 */
4080unsigned long long thread_group_sched_runtime(struct task_struct *p)
4081{
4082 unsigned long flags;
4083 u64 ns;
4084 struct rq *rq;
4085 struct task_cputime totals;
4086
4087 rq = task_rq_lock(p, &flags);
4088 thread_group_cputime(p, &totals);
4089 ns = totals.sum_exec_runtime + task_delta_exec(p, rq);
4057 task_rq_unlock(rq, &flags); 4090 task_rq_unlock(rq, &flags);
4058 4091
4059 return ns; 4092 return ns;
@@ -4070,6 +4103,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
4070 cputime64_t tmp; 4103 cputime64_t tmp;
4071 4104
4072 p->utime = cputime_add(p->utime, cputime); 4105 p->utime = cputime_add(p->utime, cputime);
4106 account_group_user_time(p, cputime);
4073 4107
4074 /* Add user time to cpustat. */ 4108 /* Add user time to cpustat. */
4075 tmp = cputime_to_cputime64(cputime); 4109 tmp = cputime_to_cputime64(cputime);
@@ -4094,6 +4128,7 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime)
4094 tmp = cputime_to_cputime64(cputime); 4128 tmp = cputime_to_cputime64(cputime);
4095 4129
4096 p->utime = cputime_add(p->utime, cputime); 4130 p->utime = cputime_add(p->utime, cputime);
4131 account_group_user_time(p, cputime);
4097 p->gtime = cputime_add(p->gtime, cputime); 4132 p->gtime = cputime_add(p->gtime, cputime);
4098 4133
4099 cpustat->user = cputime64_add(cpustat->user, tmp); 4134 cpustat->user = cputime64_add(cpustat->user, tmp);
@@ -4129,6 +4164,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
4129 } 4164 }
4130 4165
4131 p->stime = cputime_add(p->stime, cputime); 4166 p->stime = cputime_add(p->stime, cputime);
4167 account_group_system_time(p, cputime);
4132 4168
4133 /* Add system time to cpustat. */ 4169 /* Add system time to cpustat. */
4134 tmp = cputime_to_cputime64(cputime); 4170 tmp = cputime_to_cputime64(cputime);
@@ -4170,6 +4206,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
4170 4206
4171 if (p == rq->idle) { 4207 if (p == rq->idle) {
4172 p->stime = cputime_add(p->stime, steal); 4208 p->stime = cputime_add(p->stime, steal);
4209 account_group_system_time(p, steal);
4173 if (atomic_read(&rq->nr_iowait) > 0) 4210 if (atomic_read(&rq->nr_iowait) > 0)
4174 cpustat->iowait = cputime64_add(cpustat->iowait, tmp); 4211 cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
4175 else 4212 else
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index fb8994c6d4bb..99aa31acc544 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -507,6 +507,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
507 struct task_struct *curtask = task_of(curr); 507 struct task_struct *curtask = task_of(curr);
508 508
509 cpuacct_charge(curtask, delta_exec); 509 cpuacct_charge(curtask, delta_exec);
510 account_group_exec_runtime(curtask, delta_exec);
510 } 511 }
511} 512}
512 513
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 552310798dad..8375e69af36a 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -483,6 +483,8 @@ static void update_curr_rt(struct rq *rq)
483 schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec)); 483 schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
484 484
485 curr->se.sum_exec_runtime += delta_exec; 485 curr->se.sum_exec_runtime += delta_exec;
486 account_group_exec_runtime(curr, delta_exec);
487
486 curr->se.exec_start = rq->clock; 488 curr->se.exec_start = rq->clock;
487 cpuacct_charge(curr, delta_exec); 489 cpuacct_charge(curr, delta_exec);
488 490
@@ -1412,7 +1414,7 @@ static void watchdog(struct rq *rq, struct task_struct *p)
1412 p->rt.timeout++; 1414 p->rt.timeout++;
1413 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); 1415 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
1414 if (p->rt.timeout > next) 1416 if (p->rt.timeout > next)
1415 p->it_sched_expires = p->se.sum_exec_runtime; 1417 p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
1416 } 1418 }
1417} 1419}
1418 1420
diff --git a/kernel/signal.c b/kernel/signal.c
index e661b01d340f..6eea5826d618 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1338,6 +1338,7 @@ int do_notify_parent(struct task_struct *tsk, int sig)
1338 struct siginfo info; 1338 struct siginfo info;
1339 unsigned long flags; 1339 unsigned long flags;
1340 struct sighand_struct *psig; 1340 struct sighand_struct *psig;
1341 struct task_cputime cputime;
1341 int ret = sig; 1342 int ret = sig;
1342 1343
1343 BUG_ON(sig == -1); 1344 BUG_ON(sig == -1);
@@ -1368,10 +1369,9 @@ int do_notify_parent(struct task_struct *tsk, int sig)
1368 1369
1369 info.si_uid = tsk->uid; 1370 info.si_uid = tsk->uid;
1370 1371
1371 info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime, 1372 thread_group_cputime(tsk, &cputime);
1372 tsk->signal->utime)); 1373 info.si_utime = cputime_to_jiffies(cputime.utime);
1373 info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime, 1374 info.si_stime = cputime_to_jiffies(cputime.stime);
1374 tsk->signal->stime));
1375 1375
1376 info.si_status = tsk->exit_code & 0x7f; 1376 info.si_status = tsk->exit_code & 0x7f;
1377 if (tsk->exit_code & 0x80) 1377 if (tsk->exit_code & 0x80)
diff --git a/kernel/sys.c b/kernel/sys.c
index 038a7bc0901d..d046a7a055c2 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -853,38 +853,28 @@ asmlinkage long sys_setfsgid(gid_t gid)
853 return old_fsgid; 853 return old_fsgid;
854} 854}
855 855
856void do_sys_times(struct tms *tms)
857{
858 struct task_cputime cputime;
859 cputime_t cutime, cstime;
860
861 spin_lock_irq(&current->sighand->siglock);
862 thread_group_cputime(current, &cputime);
863 cutime = current->signal->cutime;
864 cstime = current->signal->cstime;
865 spin_unlock_irq(&current->sighand->siglock);
866 tms->tms_utime = cputime_to_clock_t(cputime.utime);
867 tms->tms_stime = cputime_to_clock_t(cputime.stime);
868 tms->tms_cutime = cputime_to_clock_t(cutime);
869 tms->tms_cstime = cputime_to_clock_t(cstime);
870}
871
856asmlinkage long sys_times(struct tms __user * tbuf) 872asmlinkage long sys_times(struct tms __user * tbuf)
857{ 873{
858 /*
859 * In the SMP world we might just be unlucky and have one of
860 * the times increment as we use it. Since the value is an
861 * atomically safe type this is just fine. Conceptually its
862 * as if the syscall took an instant longer to occur.
863 */
864 if (tbuf) { 874 if (tbuf) {
865 struct tms tmp; 875 struct tms tmp;
866 struct task_struct *tsk = current; 876
867 struct task_struct *t; 877 do_sys_times(&tmp);
868 cputime_t utime, stime, cutime, cstime;
869
870 spin_lock_irq(&tsk->sighand->siglock);
871 utime = tsk->signal->utime;
872 stime = tsk->signal->stime;
873 t = tsk;
874 do {
875 utime = cputime_add(utime, t->utime);
876 stime = cputime_add(stime, t->stime);
877 t = next_thread(t);
878 } while (t != tsk);
879
880 cutime = tsk->signal->cutime;
881 cstime = tsk->signal->cstime;
882 spin_unlock_irq(&tsk->sighand->siglock);
883
884 tmp.tms_utime = cputime_to_clock_t(utime);
885 tmp.tms_stime = cputime_to_clock_t(stime);
886 tmp.tms_cutime = cputime_to_clock_t(cutime);
887 tmp.tms_cstime = cputime_to_clock_t(cstime);
888 if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) 878 if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
889 return -EFAULT; 879 return -EFAULT;
890 } 880 }
@@ -1445,7 +1435,6 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r
1445asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) 1435asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
1446{ 1436{
1447 struct rlimit new_rlim, *old_rlim; 1437 struct rlimit new_rlim, *old_rlim;
1448 unsigned long it_prof_secs;
1449 int retval; 1438 int retval;
1450 1439
1451 if (resource >= RLIM_NLIMITS) 1440 if (resource >= RLIM_NLIMITS)
@@ -1491,18 +1480,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
1491 if (new_rlim.rlim_cur == RLIM_INFINITY) 1480 if (new_rlim.rlim_cur == RLIM_INFINITY)
1492 goto out; 1481 goto out;
1493 1482
1494 it_prof_secs = cputime_to_secs(current->signal->it_prof_expires); 1483 update_rlimit_cpu(new_rlim.rlim_cur);
1495 if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) {
1496 unsigned long rlim_cur = new_rlim.rlim_cur;
1497 cputime_t cputime;
1498
1499 cputime = secs_to_cputime(rlim_cur);
1500 read_lock(&tasklist_lock);
1501 spin_lock_irq(&current->sighand->siglock);
1502 set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
1503 spin_unlock_irq(&current->sighand->siglock);
1504 read_unlock(&tasklist_lock);
1505 }
1506out: 1484out:
1507 return 0; 1485 return 0;
1508} 1486}
@@ -1540,11 +1518,8 @@ out:
1540 * 1518 *
1541 */ 1519 */
1542 1520
1543static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r, 1521static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
1544 cputime_t *utimep, cputime_t *stimep)
1545{ 1522{
1546 *utimep = cputime_add(*utimep, t->utime);
1547 *stimep = cputime_add(*stimep, t->stime);
1548 r->ru_nvcsw += t->nvcsw; 1523 r->ru_nvcsw += t->nvcsw;
1549 r->ru_nivcsw += t->nivcsw; 1524 r->ru_nivcsw += t->nivcsw;
1550 r->ru_minflt += t->min_flt; 1525 r->ru_minflt += t->min_flt;
@@ -1558,12 +1533,13 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1558 struct task_struct *t; 1533 struct task_struct *t;
1559 unsigned long flags; 1534 unsigned long flags;
1560 cputime_t utime, stime; 1535 cputime_t utime, stime;
1536 struct task_cputime cputime;
1561 1537
1562 memset((char *) r, 0, sizeof *r); 1538 memset((char *) r, 0, sizeof *r);
1563 utime = stime = cputime_zero; 1539 utime = stime = cputime_zero;
1564 1540
1565 if (who == RUSAGE_THREAD) { 1541 if (who == RUSAGE_THREAD) {
1566 accumulate_thread_rusage(p, r, &utime, &stime); 1542 accumulate_thread_rusage(p, r);
1567 goto out; 1543 goto out;
1568 } 1544 }
1569 1545
@@ -1586,8 +1562,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1586 break; 1562 break;
1587 1563
1588 case RUSAGE_SELF: 1564 case RUSAGE_SELF:
1589 utime = cputime_add(utime, p->signal->utime); 1565 thread_group_cputime(p, &cputime);
1590 stime = cputime_add(stime, p->signal->stime); 1566 utime = cputime_add(utime, cputime.utime);
1567 stime = cputime_add(stime, cputime.stime);
1591 r->ru_nvcsw += p->signal->nvcsw; 1568 r->ru_nvcsw += p->signal->nvcsw;
1592 r->ru_nivcsw += p->signal->nivcsw; 1569 r->ru_nivcsw += p->signal->nivcsw;
1593 r->ru_minflt += p->signal->min_flt; 1570 r->ru_minflt += p->signal->min_flt;
@@ -1596,7 +1573,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1596 r->ru_oublock += p->signal->oublock; 1573 r->ru_oublock += p->signal->oublock;
1597 t = p; 1574 t = p;
1598 do { 1575 do {
1599 accumulate_thread_rusage(t, r, &utime, &stime); 1576 accumulate_thread_rusage(t, r);
1600 t = next_thread(t); 1577 t = next_thread(t);
1601 } while (t != p); 1578 } while (t != p);
1602 break; 1579 break;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 03fc6a81ae32..69649783c266 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -75,6 +75,7 @@
75#include <linux/string.h> 75#include <linux/string.h>
76#include <linux/selinux.h> 76#include <linux/selinux.h>
77#include <linux/mutex.h> 77#include <linux/mutex.h>
78#include <linux/posix-timers.h>
78 79
79#include "avc.h" 80#include "avc.h"
80#include "objsec.h" 81#include "objsec.h"
@@ -2321,13 +2322,7 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm)
2321 initrlim = init_task.signal->rlim+i; 2322 initrlim = init_task.signal->rlim+i;
2322 rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur); 2323 rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur);
2323 } 2324 }
2324 if (current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { 2325 update_rlimit_cpu(rlim->rlim_cur);
2325 /*
2326 * This will cause RLIMIT_CPU calculations
2327 * to be refigured.
2328 */
2329 current->it_prof_expires = jiffies_to_cputime(1);
2330 }
2331 } 2326 }
2332 2327
2333 /* Wake up the parent if it is waiting so that it can 2328 /* Wake up the parent if it is waiting so that it can