diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-02-05 06:24:16 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-02-05 07:04:33 -0500 |
commit | 4cd4c1b40d40447fb5e7ba80746c6d7ba91d7a53 (patch) | |
tree | b1e580d5284648d6884e951d995509094a92cca4 /kernel | |
parent | 32bd671d6cbeda60dc73be77fa2b9037d9a9bfa0 (diff) |
timers: split process wide cpu clocks/timers
Change the process wide cpu timers/clocks so that we:
1) don't mess up the kernel with too many threads,
2) don't have a per-cpu allocation for each process,
3) have no impact when not used.
In order to accomplish this we're going to split it into two parts:
- clocks; which can take all the time they want since they run
from user context -- ie. sys_clock_gettime(CLOCK_PROCESS_CPUTIME_ID)
- timers; which need constant time sampling but since they're
explicity used, the user can pay the overhead.
The clock readout will go back to a full sum of the thread group, while the
timers will run of a global 'clock' that only runs when needed, so only
programs that make use of the facility pay the price.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/itimer.c | 4 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 95 | ||||
-rw-r--r-- | kernel/sched_stats.h | 45 |
3 files changed, 119 insertions, 25 deletions
diff --git a/kernel/itimer.c b/kernel/itimer.c index 6a5fe93dd8bd..58762f7077ec 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c | |||
@@ -62,7 +62,7 @@ int do_getitimer(int which, struct itimerval *value) | |||
62 | struct task_cputime cputime; | 62 | struct task_cputime cputime; |
63 | cputime_t utime; | 63 | cputime_t utime; |
64 | 64 | ||
65 | thread_group_cputime(tsk, &cputime); | 65 | thread_group_cputimer(tsk, &cputime); |
66 | utime = cputime.utime; | 66 | utime = cputime.utime; |
67 | if (cputime_le(cval, utime)) { /* about to fire */ | 67 | if (cputime_le(cval, utime)) { /* about to fire */ |
68 | cval = jiffies_to_cputime(1); | 68 | cval = jiffies_to_cputime(1); |
@@ -82,7 +82,7 @@ int do_getitimer(int which, struct itimerval *value) | |||
82 | struct task_cputime times; | 82 | struct task_cputime times; |
83 | cputime_t ptime; | 83 | cputime_t ptime; |
84 | 84 | ||
85 | thread_group_cputime(tsk, ×); | 85 | thread_group_cputimer(tsk, ×); |
86 | ptime = cputime_add(times.utime, times.stime); | 86 | ptime = cputime_add(times.utime, times.stime); |
87 | if (cputime_le(cval, ptime)) { /* about to fire */ | 87 | if (cputime_le(cval, ptime)) { /* about to fire */ |
88 | cval = jiffies_to_cputime(1); | 88 | cval = jiffies_to_cputime(1); |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index fa07da94d7be..db107c9bbc05 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -230,6 +230,37 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, | |||
230 | return 0; | 230 | return 0; |
231 | } | 231 | } |
232 | 232 | ||
233 | void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) | ||
234 | { | ||
235 | struct sighand_struct *sighand; | ||
236 | struct signal_struct *sig; | ||
237 | struct task_struct *t; | ||
238 | |||
239 | *times = INIT_CPUTIME; | ||
240 | |||
241 | rcu_read_lock(); | ||
242 | sighand = rcu_dereference(tsk->sighand); | ||
243 | if (!sighand) | ||
244 | goto out; | ||
245 | |||
246 | sig = tsk->signal; | ||
247 | |||
248 | t = tsk; | ||
249 | do { | ||
250 | times->utime = cputime_add(times->utime, t->utime); | ||
251 | times->stime = cputime_add(times->stime, t->stime); | ||
252 | times->sum_exec_runtime += t->se.sum_exec_runtime; | ||
253 | |||
254 | t = next_thread(t); | ||
255 | } while (t != tsk); | ||
256 | |||
257 | times->utime = cputime_add(times->utime, sig->utime); | ||
258 | times->stime = cputime_add(times->stime, sig->stime); | ||
259 | times->sum_exec_runtime += sig->sum_sched_runtime; | ||
260 | out: | ||
261 | rcu_read_unlock(); | ||
262 | } | ||
263 | |||
233 | /* | 264 | /* |
234 | * Sample a process (thread group) clock for the given group_leader task. | 265 | * Sample a process (thread group) clock for the given group_leader task. |
235 | * Must be called with tasklist_lock held for reading. | 266 | * Must be called with tasklist_lock held for reading. |
@@ -476,6 +507,29 @@ static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) | |||
476 | } | 507 | } |
477 | 508 | ||
478 | /* | 509 | /* |
510 | * Enable the process wide cpu timer accounting. | ||
511 | * | ||
512 | * serialized using ->sighand->siglock | ||
513 | */ | ||
514 | static void start_process_timers(struct task_struct *tsk) | ||
515 | { | ||
516 | tsk->signal->cputimer.running = 1; | ||
517 | barrier(); | ||
518 | } | ||
519 | |||
520 | /* | ||
521 | * Release the process wide timer accounting -- timer stops ticking when | ||
522 | * nobody cares about it. | ||
523 | * | ||
524 | * serialized using ->sighand->siglock | ||
525 | */ | ||
526 | static void stop_process_timers(struct task_struct *tsk) | ||
527 | { | ||
528 | tsk->signal->cputimer.running = 0; | ||
529 | barrier(); | ||
530 | } | ||
531 | |||
532 | /* | ||
479 | * Insert the timer on the appropriate list before any timers that | 533 | * Insert the timer on the appropriate list before any timers that |
480 | * expire later. This must be called with the tasklist_lock held | 534 | * expire later. This must be called with the tasklist_lock held |
481 | * for reading, and interrupts disabled. | 535 | * for reading, and interrupts disabled. |
@@ -495,6 +549,9 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) | |||
495 | BUG_ON(!irqs_disabled()); | 549 | BUG_ON(!irqs_disabled()); |
496 | spin_lock(&p->sighand->siglock); | 550 | spin_lock(&p->sighand->siglock); |
497 | 551 | ||
552 | if (!CPUCLOCK_PERTHREAD(timer->it_clock)) | ||
553 | start_process_timers(p); | ||
554 | |||
498 | listpos = head; | 555 | listpos = head; |
499 | if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { | 556 | if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { |
500 | list_for_each_entry(next, head, entry) { | 557 | list_for_each_entry(next, head, entry) { |
@@ -987,13 +1044,15 @@ static void check_process_timers(struct task_struct *tsk, | |||
987 | sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY && | 1044 | sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY && |
988 | list_empty(&timers[CPUCLOCK_VIRT]) && | 1045 | list_empty(&timers[CPUCLOCK_VIRT]) && |
989 | cputime_eq(sig->it_virt_expires, cputime_zero) && | 1046 | cputime_eq(sig->it_virt_expires, cputime_zero) && |
990 | list_empty(&timers[CPUCLOCK_SCHED])) | 1047 | list_empty(&timers[CPUCLOCK_SCHED])) { |
1048 | stop_process_timers(tsk); | ||
991 | return; | 1049 | return; |
1050 | } | ||
992 | 1051 | ||
993 | /* | 1052 | /* |
994 | * Collect the current process totals. | 1053 | * Collect the current process totals. |
995 | */ | 1054 | */ |
996 | thread_group_cputime(tsk, &cputime); | 1055 | thread_group_cputimer(tsk, &cputime); |
997 | utime = cputime.utime; | 1056 | utime = cputime.utime; |
998 | ptime = cputime_add(utime, cputime.stime); | 1057 | ptime = cputime_add(utime, cputime.stime); |
999 | sum_sched_runtime = cputime.sum_exec_runtime; | 1058 | sum_sched_runtime = cputime.sum_exec_runtime; |
@@ -1259,7 +1318,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk) | |||
1259 | if (!task_cputime_zero(&sig->cputime_expires)) { | 1318 | if (!task_cputime_zero(&sig->cputime_expires)) { |
1260 | struct task_cputime group_sample; | 1319 | struct task_cputime group_sample; |
1261 | 1320 | ||
1262 | thread_group_cputime(tsk, &group_sample); | 1321 | thread_group_cputimer(tsk, &group_sample); |
1263 | if (task_cputime_expired(&group_sample, &sig->cputime_expires)) | 1322 | if (task_cputime_expired(&group_sample, &sig->cputime_expires)) |
1264 | return 1; | 1323 | return 1; |
1265 | } | 1324 | } |
@@ -1329,6 +1388,33 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
1329 | } | 1388 | } |
1330 | 1389 | ||
1331 | /* | 1390 | /* |
1391 | * Sample a process (thread group) timer for the given group_leader task. | ||
1392 | * Must be called with tasklist_lock held for reading. | ||
1393 | */ | ||
1394 | static int cpu_timer_sample_group(const clockid_t which_clock, | ||
1395 | struct task_struct *p, | ||
1396 | union cpu_time_count *cpu) | ||
1397 | { | ||
1398 | struct task_cputime cputime; | ||
1399 | |||
1400 | thread_group_cputimer(p, &cputime); | ||
1401 | switch (CPUCLOCK_WHICH(which_clock)) { | ||
1402 | default: | ||
1403 | return -EINVAL; | ||
1404 | case CPUCLOCK_PROF: | ||
1405 | cpu->cpu = cputime_add(cputime.utime, cputime.stime); | ||
1406 | break; | ||
1407 | case CPUCLOCK_VIRT: | ||
1408 | cpu->cpu = cputime.utime; | ||
1409 | break; | ||
1410 | case CPUCLOCK_SCHED: | ||
1411 | cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); | ||
1412 | break; | ||
1413 | } | ||
1414 | return 0; | ||
1415 | } | ||
1416 | |||
1417 | /* | ||
1332 | * Set one of the process-wide special case CPU timers. | 1418 | * Set one of the process-wide special case CPU timers. |
1333 | * The tsk->sighand->siglock must be held by the caller. | 1419 | * The tsk->sighand->siglock must be held by the caller. |
1334 | * The *newval argument is relative and we update it to be absolute, *oldval | 1420 | * The *newval argument is relative and we update it to be absolute, *oldval |
@@ -1341,7 +1427,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | |||
1341 | struct list_head *head; | 1427 | struct list_head *head; |
1342 | 1428 | ||
1343 | BUG_ON(clock_idx == CPUCLOCK_SCHED); | 1429 | BUG_ON(clock_idx == CPUCLOCK_SCHED); |
1344 | cpu_clock_sample_group(clock_idx, tsk, &now); | 1430 | start_process_timers(tsk); |
1431 | cpu_timer_sample_group(clock_idx, tsk, &now); | ||
1345 | 1432 | ||
1346 | if (oldval) { | 1433 | if (oldval) { |
1347 | if (!cputime_eq(*oldval, cputime_zero)) { | 1434 | if (!cputime_eq(*oldval, cputime_zero)) { |
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 8ab0cef8ecab..a8f93dd374e1 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h | |||
@@ -296,19 +296,21 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) | |||
296 | static inline void account_group_user_time(struct task_struct *tsk, | 296 | static inline void account_group_user_time(struct task_struct *tsk, |
297 | cputime_t cputime) | 297 | cputime_t cputime) |
298 | { | 298 | { |
299 | struct task_cputime *times; | 299 | struct thread_group_cputimer *cputimer; |
300 | struct signal_struct *sig; | ||
301 | 300 | ||
302 | /* tsk == current, ensure it is safe to use ->signal */ | 301 | /* tsk == current, ensure it is safe to use ->signal */ |
303 | if (unlikely(tsk->exit_state)) | 302 | if (unlikely(tsk->exit_state)) |
304 | return; | 303 | return; |
305 | 304 | ||
306 | sig = tsk->signal; | 305 | cputimer = &tsk->signal->cputimer; |
307 | times = &sig->cputime.totals; | ||
308 | 306 | ||
309 | spin_lock(×->lock); | 307 | if (!cputimer->running) |
310 | times->utime = cputime_add(times->utime, cputime); | 308 | return; |
311 | spin_unlock(×->lock); | 309 | |
310 | spin_lock(&cputimer->lock); | ||
311 | cputimer->cputime.utime = | ||
312 | cputime_add(cputimer->cputime.utime, cputime); | ||
313 | spin_unlock(&cputimer->lock); | ||
312 | } | 314 | } |
313 | 315 | ||
314 | /** | 316 | /** |
@@ -324,19 +326,21 @@ static inline void account_group_user_time(struct task_struct *tsk, | |||
324 | static inline void account_group_system_time(struct task_struct *tsk, | 326 | static inline void account_group_system_time(struct task_struct *tsk, |
325 | cputime_t cputime) | 327 | cputime_t cputime) |
326 | { | 328 | { |
327 | struct task_cputime *times; | 329 | struct thread_group_cputimer *cputimer; |
328 | struct signal_struct *sig; | ||
329 | 330 | ||
330 | /* tsk == current, ensure it is safe to use ->signal */ | 331 | /* tsk == current, ensure it is safe to use ->signal */ |
331 | if (unlikely(tsk->exit_state)) | 332 | if (unlikely(tsk->exit_state)) |
332 | return; | 333 | return; |
333 | 334 | ||
334 | sig = tsk->signal; | 335 | cputimer = &tsk->signal->cputimer; |
335 | times = &sig->cputime.totals; | 336 | |
337 | if (!cputimer->running) | ||
338 | return; | ||
336 | 339 | ||
337 | spin_lock(×->lock); | 340 | spin_lock(&cputimer->lock); |
338 | times->stime = cputime_add(times->stime, cputime); | 341 | cputimer->cputime.stime = |
339 | spin_unlock(×->lock); | 342 | cputime_add(cputimer->cputime.stime, cputime); |
343 | spin_unlock(&cputimer->lock); | ||
340 | } | 344 | } |
341 | 345 | ||
342 | /** | 346 | /** |
@@ -352,7 +356,7 @@ static inline void account_group_system_time(struct task_struct *tsk, | |||
352 | static inline void account_group_exec_runtime(struct task_struct *tsk, | 356 | static inline void account_group_exec_runtime(struct task_struct *tsk, |
353 | unsigned long long ns) | 357 | unsigned long long ns) |
354 | { | 358 | { |
355 | struct task_cputime *times; | 359 | struct thread_group_cputimer *cputimer; |
356 | struct signal_struct *sig; | 360 | struct signal_struct *sig; |
357 | 361 | ||
358 | sig = tsk->signal; | 362 | sig = tsk->signal; |
@@ -361,9 +365,12 @@ static inline void account_group_exec_runtime(struct task_struct *tsk, | |||
361 | if (unlikely(!sig)) | 365 | if (unlikely(!sig)) |
362 | return; | 366 | return; |
363 | 367 | ||
364 | times = &sig->cputime.totals; | 368 | cputimer = &sig->cputimer; |
369 | |||
370 | if (!cputimer->running) | ||
371 | return; | ||
365 | 372 | ||
366 | spin_lock(×->lock); | 373 | spin_lock(&cputimer->lock); |
367 | times->sum_exec_runtime += ns; | 374 | cputimer->cputime.sum_exec_runtime += ns; |
368 | spin_unlock(×->lock); | 375 | spin_unlock(&cputimer->lock); |
369 | } | 376 | } |