diff options
author | Frank Mayhar <fmayhar@google.com> | 2008-09-12 12:54:39 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-09-23 07:38:44 -0400 |
commit | bb34d92f643086d546b49cef680f6f305ed84414 (patch) | |
tree | 275887040c96971e133fa20d99517c1fcea76415 | |
parent | 5ce73a4a5a4893a1aa4cdeed1b1a5a6de42c43b6 (diff) |
timers: fix itimer/many thread hang, v2
This is the second resubmission of the posix timer rework patch, posted
a few days ago.
This includes the changes from the previous resubmittion, which addressed
Oleg Nesterov's comments, removing the RCU stuff from the patch and
un-inlining the thread_group_cputime() function for SMP.
In addition, per Ingo Molnar it simplifies the UP code, consolidating much
of it with the SMP version and depending on lower-level SMP/UP handling to
take care of the differences.
It also cleans up some UP compile errors, moves the scheduler stats-related
macros into kernel/sched_stats.h, cleans up a merge error in
kernel/fork.c and has a few other minor fixes and cleanups as suggested
by Oleg and Ingo. Thanks for the review, guys.
Signed-off-by: Frank Mayhar <fmayhar@google.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/kernel_stat.h | 1 | ||||
-rw-r--r-- | include/linux/sched.h | 183 | ||||
-rw-r--r-- | kernel/fork.c | 5 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 153 | ||||
-rw-r--r-- | kernel/sched.c | 47 | ||||
-rw-r--r-- | kernel/sched_stats.h | 136 |
6 files changed, 214 insertions, 311 deletions
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index cf9f40a91c9c..cac3750cd65e 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h | |||
@@ -52,6 +52,7 @@ static inline int kstat_irqs(int irq) | |||
52 | return sum; | 52 | return sum; |
53 | } | 53 | } |
54 | 54 | ||
55 | extern unsigned long long task_delta_exec(struct task_struct *); | ||
55 | extern void account_user_time(struct task_struct *, cputime_t); | 56 | extern void account_user_time(struct task_struct *, cputime_t); |
56 | extern void account_user_time_scaled(struct task_struct *, cputime_t); | 57 | extern void account_user_time_scaled(struct task_struct *, cputime_t); |
57 | extern void account_system_time(struct task_struct *, int, cputime_t); | 58 | extern void account_system_time(struct task_struct *, int, cputime_t); |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 7ce8d4e53565..b982fb48c8f0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -454,15 +454,9 @@ struct task_cputime { | |||
454 | * This structure contains the version of task_cputime, above, that is | 454 | * This structure contains the version of task_cputime, above, that is |
455 | * used for thread group CPU clock calculations. | 455 | * used for thread group CPU clock calculations. |
456 | */ | 456 | */ |
457 | #ifdef CONFIG_SMP | ||
458 | struct thread_group_cputime { | 457 | struct thread_group_cputime { |
459 | struct task_cputime *totals; | 458 | struct task_cputime *totals; |
460 | }; | 459 | }; |
461 | #else | ||
462 | struct thread_group_cputime { | ||
463 | struct task_cputime totals; | ||
464 | }; | ||
465 | #endif | ||
466 | 460 | ||
467 | /* | 461 | /* |
468 | * NOTE! "signal_struct" does not have it's own | 462 | * NOTE! "signal_struct" does not have it's own |
@@ -2124,193 +2118,26 @@ static inline int spin_needbreak(spinlock_t *lock) | |||
2124 | /* | 2118 | /* |
2125 | * Thread group CPU time accounting. | 2119 | * Thread group CPU time accounting. |
2126 | */ | 2120 | */ |
2127 | #ifdef CONFIG_SMP | ||
2128 | 2121 | ||
2129 | extern int thread_group_cputime_alloc_smp(struct task_struct *); | 2122 | extern int thread_group_cputime_alloc(struct task_struct *); |
2130 | extern void thread_group_cputime_smp(struct task_struct *, struct task_cputime *); | 2123 | extern void thread_group_cputime(struct task_struct *, struct task_cputime *); |
2131 | 2124 | ||
2132 | static inline void thread_group_cputime_init(struct signal_struct *sig) | 2125 | static inline void thread_group_cputime_init(struct signal_struct *sig) |
2133 | { | 2126 | { |
2134 | sig->cputime.totals = NULL; | 2127 | sig->cputime.totals = NULL; |
2135 | } | 2128 | } |
2136 | 2129 | ||
2137 | static inline int thread_group_cputime_clone_thread(struct task_struct *curr, | 2130 | static inline int thread_group_cputime_clone_thread(struct task_struct *curr) |
2138 | struct task_struct *new) | ||
2139 | { | 2131 | { |
2140 | if (curr->signal->cputime.totals) | 2132 | if (curr->signal->cputime.totals) |
2141 | return 0; | 2133 | return 0; |
2142 | return thread_group_cputime_alloc_smp(curr); | 2134 | return thread_group_cputime_alloc(curr); |
2143 | } | 2135 | } |
2144 | 2136 | ||
2145 | static inline void thread_group_cputime_free(struct signal_struct *sig) | ||
2146 | { | ||
2147 | free_percpu(sig->cputime.totals); | ||
2148 | } | ||
2149 | |||
2150 | /** | ||
2151 | * thread_group_cputime - Sum the thread group time fields across all CPUs. | ||
2152 | * | ||
2153 | * This is a wrapper for the real routine, thread_group_cputime_smp(). See | ||
2154 | * that routine for details. | ||
2155 | */ | ||
2156 | static inline void thread_group_cputime( | ||
2157 | struct task_struct *tsk, | ||
2158 | struct task_cputime *times) | ||
2159 | { | ||
2160 | thread_group_cputime_smp(tsk, times); | ||
2161 | } | ||
2162 | |||
2163 | /** | ||
2164 | * thread_group_cputime_account_user - Maintain utime for a thread group. | ||
2165 | * | ||
2166 | * @tgtimes: Pointer to thread_group_cputime structure. | ||
2167 | * @cputime: Time value by which to increment the utime field of that | ||
2168 | * structure. | ||
2169 | * | ||
2170 | * If thread group time is being maintained, get the structure for the | ||
2171 | * running CPU and update the utime field there. | ||
2172 | */ | ||
2173 | static inline void thread_group_cputime_account_user( | ||
2174 | struct thread_group_cputime *tgtimes, | ||
2175 | cputime_t cputime) | ||
2176 | { | ||
2177 | if (tgtimes->totals) { | ||
2178 | struct task_cputime *times; | ||
2179 | |||
2180 | times = per_cpu_ptr(tgtimes->totals, get_cpu()); | ||
2181 | times->utime = cputime_add(times->utime, cputime); | ||
2182 | put_cpu_no_resched(); | ||
2183 | } | ||
2184 | } | ||
2185 | |||
2186 | /** | ||
2187 | * thread_group_cputime_account_system - Maintain stime for a thread group. | ||
2188 | * | ||
2189 | * @tgtimes: Pointer to thread_group_cputime structure. | ||
2190 | * @cputime: Time value by which to increment the stime field of that | ||
2191 | * structure. | ||
2192 | * | ||
2193 | * If thread group time is being maintained, get the structure for the | ||
2194 | * running CPU and update the stime field there. | ||
2195 | */ | ||
2196 | static inline void thread_group_cputime_account_system( | ||
2197 | struct thread_group_cputime *tgtimes, | ||
2198 | cputime_t cputime) | ||
2199 | { | ||
2200 | if (tgtimes->totals) { | ||
2201 | struct task_cputime *times; | ||
2202 | |||
2203 | times = per_cpu_ptr(tgtimes->totals, get_cpu()); | ||
2204 | times->stime = cputime_add(times->stime, cputime); | ||
2205 | put_cpu_no_resched(); | ||
2206 | } | ||
2207 | } | ||
2208 | |||
2209 | /** | ||
2210 | * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a | ||
2211 | * thread group. | ||
2212 | * | ||
2213 | * @tgtimes: Pointer to thread_group_cputime structure. | ||
2214 | * @ns: Time value by which to increment the sum_exec_runtime field | ||
2215 | * of that structure. | ||
2216 | * | ||
2217 | * If thread group time is being maintained, get the structure for the | ||
2218 | * running CPU and update the sum_exec_runtime field there. | ||
2219 | */ | ||
2220 | static inline void thread_group_cputime_account_exec_runtime( | ||
2221 | struct thread_group_cputime *tgtimes, | ||
2222 | unsigned long long ns) | ||
2223 | { | ||
2224 | if (tgtimes->totals) { | ||
2225 | struct task_cputime *times; | ||
2226 | |||
2227 | times = per_cpu_ptr(tgtimes->totals, get_cpu()); | ||
2228 | times->sum_exec_runtime += ns; | ||
2229 | put_cpu_no_resched(); | ||
2230 | } | ||
2231 | } | ||
2232 | |||
2233 | #else /* CONFIG_SMP */ | ||
2234 | |||
2235 | static inline void thread_group_cputime_init(struct signal_struct *sig) | ||
2236 | { | ||
2237 | sig->cputime.totals.utime = cputime_zero; | ||
2238 | sig->cputime.totals.stime = cputime_zero; | ||
2239 | sig->cputime.totals.sum_exec_runtime = 0; | ||
2240 | } | ||
2241 | |||
2242 | static inline int thread_group_cputime_alloc(struct task_struct *tsk) | ||
2243 | { | ||
2244 | return 0; | ||
2245 | } | ||
2246 | 2137 | ||
2247 | static inline void thread_group_cputime_free(struct signal_struct *sig) | 2138 | static inline void thread_group_cputime_free(struct signal_struct *sig) |
2248 | { | 2139 | { |
2249 | } | 2140 | free_percpu(sig->cputime.totals); |
2250 | |||
2251 | static inline int thread_group_cputime_clone_thread(struct task_struct *curr, | ||
2252 | struct task_struct *tsk) | ||
2253 | { | ||
2254 | return 0; | ||
2255 | } | ||
2256 | |||
2257 | static inline void thread_group_cputime(struct task_struct *tsk, | ||
2258 | struct task_cputime *cputime) | ||
2259 | { | ||
2260 | *cputime = tsk->signal->cputime.totals; | ||
2261 | } | ||
2262 | |||
2263 | static inline void thread_group_cputime_account_user( | ||
2264 | struct thread_group_cputime *tgtimes, | ||
2265 | cputime_t cputime) | ||
2266 | { | ||
2267 | tgtimes->totals.utime = cputime_add(tgtimes->totals.utime, cputime); | ||
2268 | } | ||
2269 | |||
2270 | static inline void thread_group_cputime_account_system( | ||
2271 | struct thread_group_cputime *tgtimes, | ||
2272 | cputime_t cputime) | ||
2273 | { | ||
2274 | tgtimes->totals.stime = cputime_add(tgtimes->totals.stime, cputime); | ||
2275 | } | ||
2276 | |||
2277 | static inline void thread_group_cputime_account_exec_runtime( | ||
2278 | struct thread_group_cputime *tgtimes, | ||
2279 | unsigned long long ns) | ||
2280 | { | ||
2281 | tgtimes->totals.sum_exec_runtime += ns; | ||
2282 | } | ||
2283 | |||
2284 | #endif /* CONFIG_SMP */ | ||
2285 | |||
2286 | static inline void account_group_user_time(struct task_struct *tsk, | ||
2287 | cputime_t cputime) | ||
2288 | { | ||
2289 | struct signal_struct *sig; | ||
2290 | |||
2291 | sig = tsk->signal; | ||
2292 | if (likely(sig)) | ||
2293 | thread_group_cputime_account_user(&sig->cputime, cputime); | ||
2294 | } | ||
2295 | |||
2296 | static inline void account_group_system_time(struct task_struct *tsk, | ||
2297 | cputime_t cputime) | ||
2298 | { | ||
2299 | struct signal_struct *sig; | ||
2300 | |||
2301 | sig = tsk->signal; | ||
2302 | if (likely(sig)) | ||
2303 | thread_group_cputime_account_system(&sig->cputime, cputime); | ||
2304 | } | ||
2305 | |||
2306 | static inline void account_group_exec_runtime(struct task_struct *tsk, | ||
2307 | unsigned long long ns) | ||
2308 | { | ||
2309 | struct signal_struct *sig; | ||
2310 | |||
2311 | sig = tsk->signal; | ||
2312 | if (likely(sig)) | ||
2313 | thread_group_cputime_account_exec_runtime(&sig->cputime, ns); | ||
2314 | } | 2141 | } |
2315 | 2142 | ||
2316 | /* | 2143 | /* |
diff --git a/kernel/fork.c b/kernel/fork.c index 1181b9aac48e..021ae012cc75 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -791,7 +791,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
791 | int ret; | 791 | int ret; |
792 | 792 | ||
793 | if (clone_flags & CLONE_THREAD) { | 793 | if (clone_flags & CLONE_THREAD) { |
794 | ret = thread_group_cputime_clone_thread(current, tsk); | 794 | ret = thread_group_cputime_clone_thread(current); |
795 | if (likely(!ret)) { | 795 | if (likely(!ret)) { |
796 | atomic_inc(¤t->signal->count); | 796 | atomic_inc(¤t->signal->count); |
797 | atomic_inc(¤t->signal->live); | 797 | atomic_inc(¤t->signal->live); |
@@ -834,9 +834,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
834 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; | 834 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; |
835 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; | 835 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; |
836 | task_io_accounting_init(&sig->ioac); | 836 | task_io_accounting_init(&sig->ioac); |
837 | INIT_LIST_HEAD(&sig->cpu_timers[0]); | ||
838 | INIT_LIST_HEAD(&sig->cpu_timers[1]); | ||
839 | INIT_LIST_HEAD(&sig->cpu_timers[2]); | ||
840 | taskstats_tgid_init(sig); | 837 | taskstats_tgid_init(sig); |
841 | 838 | ||
842 | task_lock(current->group_leader); | 839 | task_lock(current->group_leader); |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 9a7ea049fcdc..153dcb2639c3 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -7,50 +7,46 @@ | |||
7 | #include <linux/errno.h> | 7 | #include <linux/errno.h> |
8 | #include <linux/math64.h> | 8 | #include <linux/math64.h> |
9 | #include <asm/uaccess.h> | 9 | #include <asm/uaccess.h> |
10 | #include <linux/kernel_stat.h> | ||
10 | 11 | ||
11 | #ifdef CONFIG_SMP | ||
12 | /* | 12 | /* |
13 | * Allocate the thread_group_cputime structure appropriately for SMP kernels | 13 | * Allocate the thread_group_cputime structure appropriately and fill in the |
14 | * and fill in the current values of the fields. Called from copy_signal() | 14 | * current values of the fields. Called from copy_signal() via |
15 | * via thread_group_cputime_clone_thread() when adding a second or subsequent | 15 | * thread_group_cputime_clone_thread() when adding a second or subsequent |
16 | * thread to a thread group. Assumes interrupts are enabled when called. | 16 | * thread to a thread group. Assumes interrupts are enabled when called. |
17 | */ | 17 | */ |
18 | int thread_group_cputime_alloc_smp(struct task_struct *tsk) | 18 | int thread_group_cputime_alloc(struct task_struct *tsk) |
19 | { | 19 | { |
20 | struct signal_struct *sig = tsk->signal; | 20 | struct signal_struct *sig = tsk->signal; |
21 | struct task_cputime *cputime; | 21 | struct task_cputime *cputime; |
22 | 22 | ||
23 | /* | 23 | /* |
24 | * If we have multiple threads and we don't already have a | 24 | * If we have multiple threads and we don't already have a |
25 | * per-CPU task_cputime struct, allocate one and fill it in with | 25 | * per-CPU task_cputime struct (checked in the caller), allocate |
26 | * the times accumulated so far. | 26 | * one and fill it in with the times accumulated so far. We may |
27 | * race with another thread so recheck after we pick up the sighand | ||
28 | * lock. | ||
27 | */ | 29 | */ |
28 | if (sig->cputime.totals) | ||
29 | return 0; | ||
30 | cputime = alloc_percpu(struct task_cputime); | 30 | cputime = alloc_percpu(struct task_cputime); |
31 | if (cputime == NULL) | 31 | if (cputime == NULL) |
32 | return -ENOMEM; | 32 | return -ENOMEM; |
33 | read_lock(&tasklist_lock); | ||
34 | spin_lock_irq(&tsk->sighand->siglock); | 33 | spin_lock_irq(&tsk->sighand->siglock); |
35 | if (sig->cputime.totals) { | 34 | if (sig->cputime.totals) { |
36 | spin_unlock_irq(&tsk->sighand->siglock); | 35 | spin_unlock_irq(&tsk->sighand->siglock); |
37 | read_unlock(&tasklist_lock); | ||
38 | free_percpu(cputime); | 36 | free_percpu(cputime); |
39 | return 0; | 37 | return 0; |
40 | } | 38 | } |
41 | sig->cputime.totals = cputime; | 39 | sig->cputime.totals = cputime; |
42 | cputime = per_cpu_ptr(sig->cputime.totals, get_cpu()); | 40 | cputime = per_cpu_ptr(sig->cputime.totals, smp_processor_id()); |
43 | cputime->utime = tsk->utime; | 41 | cputime->utime = tsk->utime; |
44 | cputime->stime = tsk->stime; | 42 | cputime->stime = tsk->stime; |
45 | cputime->sum_exec_runtime = tsk->se.sum_exec_runtime; | 43 | cputime->sum_exec_runtime = tsk->se.sum_exec_runtime; |
46 | put_cpu_no_resched(); | ||
47 | spin_unlock_irq(&tsk->sighand->siglock); | 44 | spin_unlock_irq(&tsk->sighand->siglock); |
48 | read_unlock(&tasklist_lock); | ||
49 | return 0; | 45 | return 0; |
50 | } | 46 | } |
51 | 47 | ||
52 | /** | 48 | /** |
53 | * thread_group_cputime_smp - Sum the thread group time fields across all CPUs. | 49 | * thread_group_cputime - Sum the thread group time fields across all CPUs. |
54 | * | 50 | * |
55 | * @tsk: The task we use to identify the thread group. | 51 | * @tsk: The task we use to identify the thread group. |
56 | * @times: task_cputime structure in which we return the summed fields. | 52 | * @times: task_cputime structure in which we return the summed fields. |
@@ -58,7 +54,7 @@ int thread_group_cputime_alloc_smp(struct task_struct *tsk) | |||
58 | * Walk the list of CPUs to sum the per-CPU time fields in the thread group | 54 | * Walk the list of CPUs to sum the per-CPU time fields in the thread group |
59 | * time structure. | 55 | * time structure. |
60 | */ | 56 | */ |
61 | void thread_group_cputime_smp( | 57 | void thread_group_cputime( |
62 | struct task_struct *tsk, | 58 | struct task_struct *tsk, |
63 | struct task_cputime *times) | 59 | struct task_cputime *times) |
64 | { | 60 | { |
@@ -83,8 +79,6 @@ void thread_group_cputime_smp( | |||
83 | } | 79 | } |
84 | } | 80 | } |
85 | 81 | ||
86 | #endif /* CONFIG_SMP */ | ||
87 | |||
88 | /* | 82 | /* |
89 | * Called after updating RLIMIT_CPU to set timer expiration if necessary. | 83 | * Called after updating RLIMIT_CPU to set timer expiration if necessary. |
90 | */ | 84 | */ |
@@ -300,7 +294,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, | |||
300 | cpu->cpu = virt_ticks(p); | 294 | cpu->cpu = virt_ticks(p); |
301 | break; | 295 | break; |
302 | case CPUCLOCK_SCHED: | 296 | case CPUCLOCK_SCHED: |
303 | cpu->sched = task_sched_runtime(p); | 297 | cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p); |
304 | break; | 298 | break; |
305 | } | 299 | } |
306 | return 0; | 300 | return 0; |
@@ -309,16 +303,15 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, | |||
309 | /* | 303 | /* |
310 | * Sample a process (thread group) clock for the given group_leader task. | 304 | * Sample a process (thread group) clock for the given group_leader task. |
311 | * Must be called with tasklist_lock held for reading. | 305 | * Must be called with tasklist_lock held for reading. |
312 | * Must be called with tasklist_lock held for reading, and p->sighand->siglock. | ||
313 | */ | 306 | */ |
314 | static int cpu_clock_sample_group_locked(unsigned int clock_idx, | 307 | static int cpu_clock_sample_group(const clockid_t which_clock, |
315 | struct task_struct *p, | 308 | struct task_struct *p, |
316 | union cpu_time_count *cpu) | 309 | union cpu_time_count *cpu) |
317 | { | 310 | { |
318 | struct task_cputime cputime; | 311 | struct task_cputime cputime; |
319 | 312 | ||
320 | thread_group_cputime(p, &cputime); | 313 | thread_group_cputime(p, &cputime); |
321 | switch (clock_idx) { | 314 | switch (which_clock) { |
322 | default: | 315 | default: |
323 | return -EINVAL; | 316 | return -EINVAL; |
324 | case CPUCLOCK_PROF: | 317 | case CPUCLOCK_PROF: |
@@ -328,29 +321,12 @@ static int cpu_clock_sample_group_locked(unsigned int clock_idx, | |||
328 | cpu->cpu = cputime.utime; | 321 | cpu->cpu = cputime.utime; |
329 | break; | 322 | break; |
330 | case CPUCLOCK_SCHED: | 323 | case CPUCLOCK_SCHED: |
331 | cpu->sched = thread_group_sched_runtime(p); | 324 | cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); |
332 | break; | 325 | break; |
333 | } | 326 | } |
334 | return 0; | 327 | return 0; |
335 | } | 328 | } |
336 | 329 | ||
337 | /* | ||
338 | * Sample a process (thread group) clock for the given group_leader task. | ||
339 | * Must be called with tasklist_lock held for reading. | ||
340 | */ | ||
341 | static int cpu_clock_sample_group(const clockid_t which_clock, | ||
342 | struct task_struct *p, | ||
343 | union cpu_time_count *cpu) | ||
344 | { | ||
345 | int ret; | ||
346 | unsigned long flags; | ||
347 | spin_lock_irqsave(&p->sighand->siglock, flags); | ||
348 | ret = cpu_clock_sample_group_locked(CPUCLOCK_WHICH(which_clock), p, | ||
349 | cpu); | ||
350 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | ||
351 | return ret; | ||
352 | } | ||
353 | |||
354 | 330 | ||
355 | int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) | 331 | int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) |
356 | { | 332 | { |
@@ -1324,29 +1300,37 @@ static inline int task_cputime_expired(const struct task_cputime *sample, | |||
1324 | * fastpath_timer_check - POSIX CPU timers fast path. | 1300 | * fastpath_timer_check - POSIX CPU timers fast path. |
1325 | * | 1301 | * |
1326 | * @tsk: The task (thread) being checked. | 1302 | * @tsk: The task (thread) being checked. |
1327 | * @sig: The signal pointer for that task. | ||
1328 | * | 1303 | * |
1329 | * If there are no timers set return false. Otherwise snapshot the task and | 1304 | * Check the task and thread group timers. If both are zero (there are no |
1330 | * thread group timers, then compare them with the corresponding expiration | 1305 | * timers set) return false. Otherwise snapshot the task and thread group |
1331 | # times. Returns true if a timer has expired, else returns false. | 1306 | * timers and compare them with the corresponding expiration times. Return |
1307 | * true if a timer has expired, else return false. | ||
1332 | */ | 1308 | */ |
1333 | static inline int fastpath_timer_check(struct task_struct *tsk, | 1309 | static inline int fastpath_timer_check(struct task_struct *tsk) |
1334 | struct signal_struct *sig) | ||
1335 | { | 1310 | { |
1336 | struct task_cputime task_sample = { | 1311 | struct signal_struct *sig = tsk->signal; |
1337 | .utime = tsk->utime, | ||
1338 | .stime = tsk->stime, | ||
1339 | .sum_exec_runtime = tsk->se.sum_exec_runtime | ||
1340 | }; | ||
1341 | struct task_cputime group_sample; | ||
1342 | 1312 | ||
1343 | if (task_cputime_zero(&tsk->cputime_expires) && | 1313 | if (unlikely(!sig)) |
1344 | task_cputime_zero(&sig->cputime_expires)) | ||
1345 | return 0; | 1314 | return 0; |
1346 | if (task_cputime_expired(&task_sample, &tsk->cputime_expires)) | 1315 | |
1347 | return 1; | 1316 | if (!task_cputime_zero(&tsk->cputime_expires)) { |
1348 | thread_group_cputime(tsk, &group_sample); | 1317 | struct task_cputime task_sample = { |
1349 | return task_cputime_expired(&group_sample, &sig->cputime_expires); | 1318 | .utime = tsk->utime, |
1319 | .stime = tsk->stime, | ||
1320 | .sum_exec_runtime = tsk->se.sum_exec_runtime | ||
1321 | }; | ||
1322 | |||
1323 | if (task_cputime_expired(&task_sample, &tsk->cputime_expires)) | ||
1324 | return 1; | ||
1325 | } | ||
1326 | if (!task_cputime_zero(&sig->cputime_expires)) { | ||
1327 | struct task_cputime group_sample; | ||
1328 | |||
1329 | thread_group_cputime(tsk, &group_sample); | ||
1330 | if (task_cputime_expired(&group_sample, &sig->cputime_expires)) | ||
1331 | return 1; | ||
1332 | } | ||
1333 | return 0; | ||
1350 | } | 1334 | } |
1351 | 1335 | ||
1352 | /* | 1336 | /* |
@@ -1358,43 +1342,34 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
1358 | { | 1342 | { |
1359 | LIST_HEAD(firing); | 1343 | LIST_HEAD(firing); |
1360 | struct k_itimer *timer, *next; | 1344 | struct k_itimer *timer, *next; |
1361 | struct signal_struct *sig; | ||
1362 | struct sighand_struct *sighand; | ||
1363 | unsigned long flags; | ||
1364 | 1345 | ||
1365 | BUG_ON(!irqs_disabled()); | 1346 | BUG_ON(!irqs_disabled()); |
1366 | 1347 | ||
1367 | /* Pick up tsk->signal and make sure it's valid. */ | ||
1368 | sig = tsk->signal; | ||
1369 | /* | 1348 | /* |
1370 | * The fast path checks that there are no expired thread or thread | 1349 | * The fast path checks that there are no expired thread or thread |
1371 | * group timers. If that's so, just return. Also check that | 1350 | * group timers. If that's so, just return. |
1372 | * tsk->signal is non-NULL; this probably can't happen but cover the | ||
1373 | * possibility anyway. | ||
1374 | */ | 1351 | */ |
1375 | if (unlikely(!sig) || !fastpath_timer_check(tsk, sig)) | 1352 | if (!fastpath_timer_check(tsk)) |
1376 | return; | 1353 | return; |
1377 | 1354 | ||
1378 | sighand = lock_task_sighand(tsk, &flags); | 1355 | spin_lock(&tsk->sighand->siglock); |
1379 | if (likely(sighand)) { | 1356 | /* |
1380 | /* | 1357 | * Here we take off tsk->signal->cpu_timers[N] and |
1381 | * Here we take off tsk->signal->cpu_timers[N] and | 1358 | * tsk->cpu_timers[N] all the timers that are firing, and |
1382 | * tsk->cpu_timers[N] all the timers that are firing, and | 1359 | * put them on the firing list. |
1383 | * put them on the firing list. | 1360 | */ |
1384 | */ | 1361 | check_thread_timers(tsk, &firing); |
1385 | check_thread_timers(tsk, &firing); | 1362 | check_process_timers(tsk, &firing); |
1386 | check_process_timers(tsk, &firing); | ||
1387 | 1363 | ||
1388 | /* | 1364 | /* |
1389 | * We must release these locks before taking any timer's lock. | 1365 | * We must release these locks before taking any timer's lock. |
1390 | * There is a potential race with timer deletion here, as the | 1366 | * There is a potential race with timer deletion here, as the |
1391 | * siglock now protects our private firing list. We have set | 1367 | * siglock now protects our private firing list. We have set |
1392 | * the firing flag in each timer, so that a deletion attempt | 1368 | * the firing flag in each timer, so that a deletion attempt |
1393 | * that gets the timer lock before we do will give it up and | 1369 | * that gets the timer lock before we do will give it up and |
1394 | * spin until we've taken care of that timer below. | 1370 | * spin until we've taken care of that timer below. |
1395 | */ | 1371 | */ |
1396 | } | 1372 | spin_unlock(&tsk->sighand->siglock); |
1397 | unlock_task_sighand(tsk, &flags); | ||
1398 | 1373 | ||
1399 | /* | 1374 | /* |
1400 | * Now that all the timers on our list have the firing flag, | 1375 | * Now that all the timers on our list have the firing flag, |
@@ -1433,7 +1408,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | |||
1433 | struct list_head *head; | 1408 | struct list_head *head; |
1434 | 1409 | ||
1435 | BUG_ON(clock_idx == CPUCLOCK_SCHED); | 1410 | BUG_ON(clock_idx == CPUCLOCK_SCHED); |
1436 | cpu_clock_sample_group_locked(clock_idx, tsk, &now); | 1411 | cpu_clock_sample_group(clock_idx, tsk, &now); |
1437 | 1412 | ||
1438 | if (oldval) { | 1413 | if (oldval) { |
1439 | if (!cputime_eq(*oldval, cputime_zero)) { | 1414 | if (!cputime_eq(*oldval, cputime_zero)) { |
diff --git a/kernel/sched.c b/kernel/sched.c index c51b5d276665..260c22cc530a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -4039,55 +4039,22 @@ EXPORT_PER_CPU_SYMBOL(kstat); | |||
4039 | /* | 4039 | /* |
4040 | * Return any ns on the sched_clock that have not yet been banked in | 4040 | * Return any ns on the sched_clock that have not yet been banked in |
4041 | * @p in case that task is currently running. | 4041 | * @p in case that task is currently running. |
4042 | * | ||
4043 | * Called with task_rq_lock() held on @rq. | ||
4044 | */ | 4042 | */ |
4045 | static unsigned long long task_delta_exec(struct task_struct *p, struct rq *rq) | 4043 | unsigned long long task_delta_exec(struct task_struct *p) |
4046 | { | 4044 | { |
4045 | struct rq *rq; | ||
4046 | unsigned long flags; | ||
4047 | u64 ns = 0; | ||
4048 | |||
4049 | rq = task_rq_lock(p, &flags); | ||
4047 | if (task_current(rq, p)) { | 4050 | if (task_current(rq, p)) { |
4048 | u64 delta_exec; | 4051 | u64 delta_exec; |
4049 | 4052 | ||
4050 | update_rq_clock(rq); | 4053 | update_rq_clock(rq); |
4051 | delta_exec = rq->clock - p->se.exec_start; | 4054 | delta_exec = rq->clock - p->se.exec_start; |
4052 | if ((s64)delta_exec > 0) | 4055 | if ((s64)delta_exec > 0) |
4053 | return delta_exec; | 4056 | ns = delta_exec; |
4054 | } | 4057 | } |
4055 | return 0; | ||
4056 | } | ||
4057 | |||
4058 | /* | ||
4059 | * Return p->sum_exec_runtime plus any more ns on the sched_clock | ||
4060 | * that have not yet been banked in case the task is currently running. | ||
4061 | */ | ||
4062 | unsigned long long task_sched_runtime(struct task_struct *p) | ||
4063 | { | ||
4064 | unsigned long flags; | ||
4065 | u64 ns; | ||
4066 | struct rq *rq; | ||
4067 | |||
4068 | rq = task_rq_lock(p, &flags); | ||
4069 | ns = p->se.sum_exec_runtime + task_delta_exec(p, rq); | ||
4070 | task_rq_unlock(rq, &flags); | ||
4071 | |||
4072 | return ns; | ||
4073 | } | ||
4074 | |||
4075 | /* | ||
4076 | * Return sum_exec_runtime for the thread group plus any more ns on the | ||
4077 | * sched_clock that have not yet been banked in case the task is currently | ||
4078 | * running. | ||
4079 | */ | ||
4080 | unsigned long long thread_group_sched_runtime(struct task_struct *p) | ||
4081 | { | ||
4082 | unsigned long flags; | ||
4083 | u64 ns; | ||
4084 | struct rq *rq; | ||
4085 | struct task_cputime totals; | ||
4086 | |||
4087 | rq = task_rq_lock(p, &flags); | ||
4088 | thread_group_cputime(p, &totals); | ||
4089 | ns = totals.sum_exec_runtime + task_delta_exec(p, rq); | ||
4090 | task_rq_unlock(rq, &flags); | ||
4091 | 4058 | ||
4092 | return ns; | 4059 | return ns; |
4093 | } | 4060 | } |
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 8385d43987e2..d6903bd0c7a8 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h | |||
@@ -270,3 +270,139 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) | |||
270 | #define sched_info_switch(t, next) do { } while (0) | 270 | #define sched_info_switch(t, next) do { } while (0) |
271 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ | 271 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ |
272 | 272 | ||
273 | /* | ||
274 | * The following are functions that support scheduler-internal time accounting. | ||
275 | * These functions are generally called at the timer tick. None of this depends | ||
276 | * on CONFIG_SCHEDSTATS. | ||
277 | */ | ||
278 | |||
279 | #ifdef CONFIG_SMP | ||
280 | |||
281 | /** | ||
282 | * thread_group_cputime_account_user - Maintain utime for a thread group. | ||
283 | * | ||
284 | * @tgtimes: Pointer to thread_group_cputime structure. | ||
285 | * @cputime: Time value by which to increment the utime field of that | ||
286 | * structure. | ||
287 | * | ||
288 | * If thread group time is being maintained, get the structure for the | ||
289 | * running CPU and update the utime field there. | ||
290 | */ | ||
291 | static inline void thread_group_cputime_account_user( | ||
292 | struct thread_group_cputime *tgtimes, | ||
293 | cputime_t cputime) | ||
294 | { | ||
295 | if (tgtimes->totals) { | ||
296 | struct task_cputime *times; | ||
297 | |||
298 | times = per_cpu_ptr(tgtimes->totals, get_cpu()); | ||
299 | times->utime = cputime_add(times->utime, cputime); | ||
300 | put_cpu_no_resched(); | ||
301 | } | ||
302 | } | ||
303 | |||
304 | /** | ||
305 | * thread_group_cputime_account_system - Maintain stime for a thread group. | ||
306 | * | ||
307 | * @tgtimes: Pointer to thread_group_cputime structure. | ||
308 | * @cputime: Time value by which to increment the stime field of that | ||
309 | * structure. | ||
310 | * | ||
311 | * If thread group time is being maintained, get the structure for the | ||
312 | * running CPU and update the stime field there. | ||
313 | */ | ||
314 | static inline void thread_group_cputime_account_system( | ||
315 | struct thread_group_cputime *tgtimes, | ||
316 | cputime_t cputime) | ||
317 | { | ||
318 | if (tgtimes->totals) { | ||
319 | struct task_cputime *times; | ||
320 | |||
321 | times = per_cpu_ptr(tgtimes->totals, get_cpu()); | ||
322 | times->stime = cputime_add(times->stime, cputime); | ||
323 | put_cpu_no_resched(); | ||
324 | } | ||
325 | } | ||
326 | |||
327 | /** | ||
328 | * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a | ||
329 | * thread group. | ||
330 | * | ||
331 | * @tgtimes: Pointer to thread_group_cputime structure. | ||
332 | * @ns: Time value by which to increment the sum_exec_runtime field | ||
333 | * of that structure. | ||
334 | * | ||
335 | * If thread group time is being maintained, get the structure for the | ||
336 | * running CPU and update the sum_exec_runtime field there. | ||
337 | */ | ||
338 | static inline void thread_group_cputime_account_exec_runtime( | ||
339 | struct thread_group_cputime *tgtimes, | ||
340 | unsigned long long ns) | ||
341 | { | ||
342 | if (tgtimes->totals) { | ||
343 | struct task_cputime *times; | ||
344 | |||
345 | times = per_cpu_ptr(tgtimes->totals, get_cpu()); | ||
346 | times->sum_exec_runtime += ns; | ||
347 | put_cpu_no_resched(); | ||
348 | } | ||
349 | } | ||
350 | |||
351 | #else /* CONFIG_SMP */ | ||
352 | |||
353 | static inline void thread_group_cputime_account_user( | ||
354 | struct thread_group_cputime *tgtimes, | ||
355 | cputime_t cputime) | ||
356 | { | ||
357 | tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime); | ||
358 | } | ||
359 | |||
360 | static inline void thread_group_cputime_account_system( | ||
361 | struct thread_group_cputime *tgtimes, | ||
362 | cputime_t cputime) | ||
363 | { | ||
364 | tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime); | ||
365 | } | ||
366 | |||
367 | static inline void thread_group_cputime_account_exec_runtime( | ||
368 | struct thread_group_cputime *tgtimes, | ||
369 | unsigned long long ns) | ||
370 | { | ||
371 | tgtimes->totals->sum_exec_runtime += ns; | ||
372 | } | ||
373 | |||
374 | #endif /* CONFIG_SMP */ | ||
375 | |||
376 | /* | ||
377 | * These are the generic time-accounting routines that use the above | ||
378 | * functions. They are the functions actually called by the scheduler. | ||
379 | */ | ||
380 | static inline void account_group_user_time(struct task_struct *tsk, | ||
381 | cputime_t cputime) | ||
382 | { | ||
383 | struct signal_struct *sig; | ||
384 | |||
385 | sig = tsk->signal; | ||
386 | if (likely(sig)) | ||
387 | thread_group_cputime_account_user(&sig->cputime, cputime); | ||
388 | } | ||
389 | |||
390 | static inline void account_group_system_time(struct task_struct *tsk, | ||
391 | cputime_t cputime) | ||
392 | { | ||
393 | struct signal_struct *sig; | ||
394 | |||
395 | sig = tsk->signal; | ||
396 | if (likely(sig)) | ||
397 | thread_group_cputime_account_system(&sig->cputime, cputime); | ||
398 | } | ||
399 | |||
400 | static inline void account_group_exec_runtime(struct task_struct *tsk, | ||
401 | unsigned long long ns) | ||
402 | { | ||
403 | struct signal_struct *sig; | ||
404 | |||
405 | sig = tsk->signal; | ||
406 | if (likely(sig)) | ||
407 | thread_group_cputime_account_exec_runtime(&sig->cputime, ns); | ||
408 | } | ||