diff options
author | Peter Zijlstra <peterz@infradead.org> | 2008-11-24 11:06:57 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-01-07 12:52:44 -0500 |
commit | 490dea45d00f01847ebebd007685d564aaf2cd98 (patch) | |
tree | a1f559fd497b10c21479b378ffb262d517cb627b | |
parent | ede6f5aea054d3fb67c78857f7abdee602302043 (diff) |
itimers: remove the per-cpu-ish-ness
Either we bounce once cacheline per cpu per tick, yielding n^2 bounces
or we just bounce a single..
Also, using per-cpu allocations for the thread-groups complicates the
per-cpu allocator in that its currently aimed to be a fixed sized
allocator and the only possible extention to that would be vmap based,
which is seriously constrained on 32 bit archs.
So making the per-cpu memory requirement depend on the number of
processes is an issue.
Lastly, it didn't deal with cpu-hotplug, although admittedly that might
be fixable.
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/init_task.h | 6 | ||||
-rw-r--r-- | include/linux/sched.h | 29 | ||||
-rw-r--r-- | kernel/fork.c | 15 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 70 | ||||
-rw-r--r-- | kernel/sched_stats.h | 33 |
5 files changed, 46 insertions, 107 deletions
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 2f3c2d4ef73b..ea0ea1a4c36f 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -48,6 +48,12 @@ extern struct fs_struct init_fs; | |||
48 | .posix_timers = LIST_HEAD_INIT(sig.posix_timers), \ | 48 | .posix_timers = LIST_HEAD_INIT(sig.posix_timers), \ |
49 | .cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \ | 49 | .cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \ |
50 | .rlim = INIT_RLIMITS, \ | 50 | .rlim = INIT_RLIMITS, \ |
51 | .cputime = { .totals = { \ | ||
52 | .utime = cputime_zero, \ | ||
53 | .stime = cputime_zero, \ | ||
54 | .sum_exec_runtime = 0, \ | ||
55 | .lock = __SPIN_LOCK_UNLOCKED(sig.cputime.totals.lock), \ | ||
56 | }, }, \ | ||
51 | } | 57 | } |
52 | 58 | ||
53 | extern struct nsproxy init_nsproxy; | 59 | extern struct nsproxy init_nsproxy; |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 4cae9b81a1f8..c20943eabb4c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -450,6 +450,7 @@ struct task_cputime { | |||
450 | cputime_t utime; | 450 | cputime_t utime; |
451 | cputime_t stime; | 451 | cputime_t stime; |
452 | unsigned long long sum_exec_runtime; | 452 | unsigned long long sum_exec_runtime; |
453 | spinlock_t lock; | ||
453 | }; | 454 | }; |
454 | /* Alternate field names when used to cache expirations. */ | 455 | /* Alternate field names when used to cache expirations. */ |
455 | #define prof_exp stime | 456 | #define prof_exp stime |
@@ -465,7 +466,7 @@ struct task_cputime { | |||
465 | * used for thread group CPU clock calculations. | 466 | * used for thread group CPU clock calculations. |
466 | */ | 467 | */ |
467 | struct thread_group_cputime { | 468 | struct thread_group_cputime { |
468 | struct task_cputime *totals; | 469 | struct task_cputime totals; |
469 | }; | 470 | }; |
470 | 471 | ||
471 | /* | 472 | /* |
@@ -2180,24 +2181,30 @@ static inline int spin_needbreak(spinlock_t *lock) | |||
2180 | * Thread group CPU time accounting. | 2181 | * Thread group CPU time accounting. |
2181 | */ | 2182 | */ |
2182 | 2183 | ||
2183 | extern int thread_group_cputime_alloc(struct task_struct *); | 2184 | static inline |
2184 | extern void thread_group_cputime(struct task_struct *, struct task_cputime *); | 2185 | void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) |
2185 | |||
2186 | static inline void thread_group_cputime_init(struct signal_struct *sig) | ||
2187 | { | 2186 | { |
2188 | sig->cputime.totals = NULL; | 2187 | struct task_cputime *totals = &tsk->signal->cputime.totals; |
2188 | unsigned long flags; | ||
2189 | |||
2190 | spin_lock_irqsave(&totals->lock, flags); | ||
2191 | *times = *totals; | ||
2192 | spin_unlock_irqrestore(&totals->lock, flags); | ||
2189 | } | 2193 | } |
2190 | 2194 | ||
2191 | static inline int thread_group_cputime_clone_thread(struct task_struct *curr) | 2195 | static inline void thread_group_cputime_init(struct signal_struct *sig) |
2192 | { | 2196 | { |
2193 | if (curr->signal->cputime.totals) | 2197 | sig->cputime.totals = (struct task_cputime){ |
2194 | return 0; | 2198 | .utime = cputime_zero, |
2195 | return thread_group_cputime_alloc(curr); | 2199 | .stime = cputime_zero, |
2200 | .sum_exec_runtime = 0, | ||
2201 | }; | ||
2202 | |||
2203 | spin_lock_init(&sig->cputime.totals.lock); | ||
2196 | } | 2204 | } |
2197 | 2205 | ||
2198 | static inline void thread_group_cputime_free(struct signal_struct *sig) | 2206 | static inline void thread_group_cputime_free(struct signal_struct *sig) |
2199 | { | 2207 | { |
2200 | free_percpu(sig->cputime.totals); | ||
2201 | } | 2208 | } |
2202 | 2209 | ||
2203 | /* | 2210 | /* |
diff --git a/kernel/fork.c b/kernel/fork.c index 7b8f2a78be3d..7087d8c0e5e2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -820,14 +820,15 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
820 | int ret; | 820 | int ret; |
821 | 821 | ||
822 | if (clone_flags & CLONE_THREAD) { | 822 | if (clone_flags & CLONE_THREAD) { |
823 | ret = thread_group_cputime_clone_thread(current); | 823 | atomic_inc(¤t->signal->count); |
824 | if (likely(!ret)) { | 824 | atomic_inc(¤t->signal->live); |
825 | atomic_inc(¤t->signal->count); | 825 | return 0; |
826 | atomic_inc(¤t->signal->live); | ||
827 | } | ||
828 | return ret; | ||
829 | } | 826 | } |
830 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); | 827 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); |
828 | |||
829 | if (sig) | ||
830 | posix_cpu_timers_init_group(sig); | ||
831 | |||
831 | tsk->signal = sig; | 832 | tsk->signal = sig; |
832 | if (!sig) | 833 | if (!sig) |
833 | return -ENOMEM; | 834 | return -ENOMEM; |
@@ -864,8 +865,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
864 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); | 865 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); |
865 | task_unlock(current->group_leader); | 866 | task_unlock(current->group_leader); |
866 | 867 | ||
867 | posix_cpu_timers_init_group(sig); | ||
868 | |||
869 | acct_init_pacct(&sig->pacct); | 868 | acct_init_pacct(&sig->pacct); |
870 | 869 | ||
871 | tty_audit_fork(sig); | 870 | tty_audit_fork(sig); |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 157de3a47832..fa07da94d7be 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -10,76 +10,6 @@ | |||
10 | #include <linux/kernel_stat.h> | 10 | #include <linux/kernel_stat.h> |
11 | 11 | ||
12 | /* | 12 | /* |
13 | * Allocate the thread_group_cputime structure appropriately and fill in the | ||
14 | * current values of the fields. Called from copy_signal() via | ||
15 | * thread_group_cputime_clone_thread() when adding a second or subsequent | ||
16 | * thread to a thread group. Assumes interrupts are enabled when called. | ||
17 | */ | ||
18 | int thread_group_cputime_alloc(struct task_struct *tsk) | ||
19 | { | ||
20 | struct signal_struct *sig = tsk->signal; | ||
21 | struct task_cputime *cputime; | ||
22 | |||
23 | /* | ||
24 | * If we have multiple threads and we don't already have a | ||
25 | * per-CPU task_cputime struct (checked in the caller), allocate | ||
26 | * one and fill it in with the times accumulated so far. We may | ||
27 | * race with another thread so recheck after we pick up the sighand | ||
28 | * lock. | ||
29 | */ | ||
30 | cputime = alloc_percpu(struct task_cputime); | ||
31 | if (cputime == NULL) | ||
32 | return -ENOMEM; | ||
33 | spin_lock_irq(&tsk->sighand->siglock); | ||
34 | if (sig->cputime.totals) { | ||
35 | spin_unlock_irq(&tsk->sighand->siglock); | ||
36 | free_percpu(cputime); | ||
37 | return 0; | ||
38 | } | ||
39 | sig->cputime.totals = cputime; | ||
40 | cputime = per_cpu_ptr(sig->cputime.totals, smp_processor_id()); | ||
41 | cputime->utime = tsk->utime; | ||
42 | cputime->stime = tsk->stime; | ||
43 | cputime->sum_exec_runtime = tsk->se.sum_exec_runtime; | ||
44 | spin_unlock_irq(&tsk->sighand->siglock); | ||
45 | return 0; | ||
46 | } | ||
47 | |||
48 | /** | ||
49 | * thread_group_cputime - Sum the thread group time fields across all CPUs. | ||
50 | * | ||
51 | * @tsk: The task we use to identify the thread group. | ||
52 | * @times: task_cputime structure in which we return the summed fields. | ||
53 | * | ||
54 | * Walk the list of CPUs to sum the per-CPU time fields in the thread group | ||
55 | * time structure. | ||
56 | */ | ||
57 | void thread_group_cputime( | ||
58 | struct task_struct *tsk, | ||
59 | struct task_cputime *times) | ||
60 | { | ||
61 | struct task_cputime *totals, *tot; | ||
62 | int i; | ||
63 | |||
64 | totals = tsk->signal->cputime.totals; | ||
65 | if (!totals) { | ||
66 | times->utime = tsk->utime; | ||
67 | times->stime = tsk->stime; | ||
68 | times->sum_exec_runtime = tsk->se.sum_exec_runtime; | ||
69 | return; | ||
70 | } | ||
71 | |||
72 | times->stime = times->utime = cputime_zero; | ||
73 | times->sum_exec_runtime = 0; | ||
74 | for_each_possible_cpu(i) { | ||
75 | tot = per_cpu_ptr(totals, i); | ||
76 | times->utime = cputime_add(times->utime, tot->utime); | ||
77 | times->stime = cputime_add(times->stime, tot->stime); | ||
78 | times->sum_exec_runtime += tot->sum_exec_runtime; | ||
79 | } | ||
80 | } | ||
81 | |||
82 | /* | ||
83 | * Called after updating RLIMIT_CPU to set timer expiration if necessary. | 13 | * Called after updating RLIMIT_CPU to set timer expiration if necessary. |
84 | */ | 14 | */ |
85 | void update_rlimit_cpu(unsigned long rlim_new) | 15 | void update_rlimit_cpu(unsigned long rlim_new) |
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index f2773b5d1226..8ab0cef8ecab 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h | |||
@@ -296,6 +296,7 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) | |||
296 | static inline void account_group_user_time(struct task_struct *tsk, | 296 | static inline void account_group_user_time(struct task_struct *tsk, |
297 | cputime_t cputime) | 297 | cputime_t cputime) |
298 | { | 298 | { |
299 | struct task_cputime *times; | ||
299 | struct signal_struct *sig; | 300 | struct signal_struct *sig; |
300 | 301 | ||
301 | /* tsk == current, ensure it is safe to use ->signal */ | 302 | /* tsk == current, ensure it is safe to use ->signal */ |
@@ -303,13 +304,11 @@ static inline void account_group_user_time(struct task_struct *tsk, | |||
303 | return; | 304 | return; |
304 | 305 | ||
305 | sig = tsk->signal; | 306 | sig = tsk->signal; |
306 | if (sig->cputime.totals) { | 307 | times = &sig->cputime.totals; |
307 | struct task_cputime *times; | ||
308 | 308 | ||
309 | times = per_cpu_ptr(sig->cputime.totals, get_cpu()); | 309 | spin_lock(×->lock); |
310 | times->utime = cputime_add(times->utime, cputime); | 310 | times->utime = cputime_add(times->utime, cputime); |
311 | put_cpu_no_resched(); | 311 | spin_unlock(×->lock); |
312 | } | ||
313 | } | 312 | } |
314 | 313 | ||
315 | /** | 314 | /** |
@@ -325,6 +324,7 @@ static inline void account_group_user_time(struct task_struct *tsk, | |||
325 | static inline void account_group_system_time(struct task_struct *tsk, | 324 | static inline void account_group_system_time(struct task_struct *tsk, |
326 | cputime_t cputime) | 325 | cputime_t cputime) |
327 | { | 326 | { |
327 | struct task_cputime *times; | ||
328 | struct signal_struct *sig; | 328 | struct signal_struct *sig; |
329 | 329 | ||
330 | /* tsk == current, ensure it is safe to use ->signal */ | 330 | /* tsk == current, ensure it is safe to use ->signal */ |
@@ -332,13 +332,11 @@ static inline void account_group_system_time(struct task_struct *tsk, | |||
332 | return; | 332 | return; |
333 | 333 | ||
334 | sig = tsk->signal; | 334 | sig = tsk->signal; |
335 | if (sig->cputime.totals) { | 335 | times = &sig->cputime.totals; |
336 | struct task_cputime *times; | ||
337 | 336 | ||
338 | times = per_cpu_ptr(sig->cputime.totals, get_cpu()); | 337 | spin_lock(×->lock); |
339 | times->stime = cputime_add(times->stime, cputime); | 338 | times->stime = cputime_add(times->stime, cputime); |
340 | put_cpu_no_resched(); | 339 | spin_unlock(×->lock); |
341 | } | ||
342 | } | 340 | } |
343 | 341 | ||
344 | /** | 342 | /** |
@@ -354,6 +352,7 @@ static inline void account_group_system_time(struct task_struct *tsk, | |||
354 | static inline void account_group_exec_runtime(struct task_struct *tsk, | 352 | static inline void account_group_exec_runtime(struct task_struct *tsk, |
355 | unsigned long long ns) | 353 | unsigned long long ns) |
356 | { | 354 | { |
355 | struct task_cputime *times; | ||
357 | struct signal_struct *sig; | 356 | struct signal_struct *sig; |
358 | 357 | ||
359 | sig = tsk->signal; | 358 | sig = tsk->signal; |
@@ -362,11 +361,9 @@ static inline void account_group_exec_runtime(struct task_struct *tsk, | |||
362 | if (unlikely(!sig)) | 361 | if (unlikely(!sig)) |
363 | return; | 362 | return; |
364 | 363 | ||
365 | if (sig->cputime.totals) { | 364 | times = &sig->cputime.totals; |
366 | struct task_cputime *times; | ||
367 | 365 | ||
368 | times = per_cpu_ptr(sig->cputime.totals, get_cpu()); | 366 | spin_lock(×->lock); |
369 | times->sum_exec_runtime += ns; | 367 | times->sum_exec_runtime += ns; |
370 | put_cpu_no_resched(); | 368 | spin_unlock(×->lock); |
371 | } | ||
372 | } | 369 | } |