diff options
author | Frank Mayhar <fmayhar@google.com> | 2008-09-12 12:54:39 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-09-23 07:38:44 -0400 |
commit | bb34d92f643086d546b49cef680f6f305ed84414 (patch) | |
tree | 275887040c96971e133fa20d99517c1fcea76415 /kernel/posix-cpu-timers.c | |
parent | 5ce73a4a5a4893a1aa4cdeed1b1a5a6de42c43b6 (diff) |
timers: fix itimer/many thread hang, v2
This is the second resubmission of the posix timer rework patch, posted
a few days ago.
This includes the changes from the previous resubmittion, which addressed
Oleg Nesterov's comments, removing the RCU stuff from the patch and
un-inlining the thread_group_cputime() function for SMP.
In addition, per Ingo Molnar it simplifies the UP code, consolidating much
of it with the SMP version and depending on lower-level SMP/UP handling to
take care of the differences.
It also cleans up some UP compile errors, moves the scheduler stats-related
macros into kernel/sched_stats.h, cleans up a merge error in
kernel/fork.c and has a few other minor fixes and cleanups as suggested
by Oleg and Ingo. Thanks for the review, guys.
Signed-off-by: Frank Mayhar <fmayhar@google.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/posix-cpu-timers.c')
-rw-r--r-- | kernel/posix-cpu-timers.c | 153 |
1 files changed, 64 insertions, 89 deletions
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 9a7ea049fcdc..153dcb2639c3 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -7,50 +7,46 @@ | |||
7 | #include <linux/errno.h> | 7 | #include <linux/errno.h> |
8 | #include <linux/math64.h> | 8 | #include <linux/math64.h> |
9 | #include <asm/uaccess.h> | 9 | #include <asm/uaccess.h> |
10 | #include <linux/kernel_stat.h> | ||
10 | 11 | ||
11 | #ifdef CONFIG_SMP | ||
12 | /* | 12 | /* |
13 | * Allocate the thread_group_cputime structure appropriately for SMP kernels | 13 | * Allocate the thread_group_cputime structure appropriately and fill in the |
14 | * and fill in the current values of the fields. Called from copy_signal() | 14 | * current values of the fields. Called from copy_signal() via |
15 | * via thread_group_cputime_clone_thread() when adding a second or subsequent | 15 | * thread_group_cputime_clone_thread() when adding a second or subsequent |
16 | * thread to a thread group. Assumes interrupts are enabled when called. | 16 | * thread to a thread group. Assumes interrupts are enabled when called. |
17 | */ | 17 | */ |
18 | int thread_group_cputime_alloc_smp(struct task_struct *tsk) | 18 | int thread_group_cputime_alloc(struct task_struct *tsk) |
19 | { | 19 | { |
20 | struct signal_struct *sig = tsk->signal; | 20 | struct signal_struct *sig = tsk->signal; |
21 | struct task_cputime *cputime; | 21 | struct task_cputime *cputime; |
22 | 22 | ||
23 | /* | 23 | /* |
24 | * If we have multiple threads and we don't already have a | 24 | * If we have multiple threads and we don't already have a |
25 | * per-CPU task_cputime struct, allocate one and fill it in with | 25 | * per-CPU task_cputime struct (checked in the caller), allocate |
26 | * the times accumulated so far. | 26 | * one and fill it in with the times accumulated so far. We may |
27 | * race with another thread so recheck after we pick up the sighand | ||
28 | * lock. | ||
27 | */ | 29 | */ |
28 | if (sig->cputime.totals) | ||
29 | return 0; | ||
30 | cputime = alloc_percpu(struct task_cputime); | 30 | cputime = alloc_percpu(struct task_cputime); |
31 | if (cputime == NULL) | 31 | if (cputime == NULL) |
32 | return -ENOMEM; | 32 | return -ENOMEM; |
33 | read_lock(&tasklist_lock); | ||
34 | spin_lock_irq(&tsk->sighand->siglock); | 33 | spin_lock_irq(&tsk->sighand->siglock); |
35 | if (sig->cputime.totals) { | 34 | if (sig->cputime.totals) { |
36 | spin_unlock_irq(&tsk->sighand->siglock); | 35 | spin_unlock_irq(&tsk->sighand->siglock); |
37 | read_unlock(&tasklist_lock); | ||
38 | free_percpu(cputime); | 36 | free_percpu(cputime); |
39 | return 0; | 37 | return 0; |
40 | } | 38 | } |
41 | sig->cputime.totals = cputime; | 39 | sig->cputime.totals = cputime; |
42 | cputime = per_cpu_ptr(sig->cputime.totals, get_cpu()); | 40 | cputime = per_cpu_ptr(sig->cputime.totals, smp_processor_id()); |
43 | cputime->utime = tsk->utime; | 41 | cputime->utime = tsk->utime; |
44 | cputime->stime = tsk->stime; | 42 | cputime->stime = tsk->stime; |
45 | cputime->sum_exec_runtime = tsk->se.sum_exec_runtime; | 43 | cputime->sum_exec_runtime = tsk->se.sum_exec_runtime; |
46 | put_cpu_no_resched(); | ||
47 | spin_unlock_irq(&tsk->sighand->siglock); | 44 | spin_unlock_irq(&tsk->sighand->siglock); |
48 | read_unlock(&tasklist_lock); | ||
49 | return 0; | 45 | return 0; |
50 | } | 46 | } |
51 | 47 | ||
52 | /** | 48 | /** |
53 | * thread_group_cputime_smp - Sum the thread group time fields across all CPUs. | 49 | * thread_group_cputime - Sum the thread group time fields across all CPUs. |
54 | * | 50 | * |
55 | * @tsk: The task we use to identify the thread group. | 51 | * @tsk: The task we use to identify the thread group. |
56 | * @times: task_cputime structure in which we return the summed fields. | 52 | * @times: task_cputime structure in which we return the summed fields. |
@@ -58,7 +54,7 @@ int thread_group_cputime_alloc_smp(struct task_struct *tsk) | |||
58 | * Walk the list of CPUs to sum the per-CPU time fields in the thread group | 54 | * Walk the list of CPUs to sum the per-CPU time fields in the thread group |
59 | * time structure. | 55 | * time structure. |
60 | */ | 56 | */ |
61 | void thread_group_cputime_smp( | 57 | void thread_group_cputime( |
62 | struct task_struct *tsk, | 58 | struct task_struct *tsk, |
63 | struct task_cputime *times) | 59 | struct task_cputime *times) |
64 | { | 60 | { |
@@ -83,8 +79,6 @@ void thread_group_cputime_smp( | |||
83 | } | 79 | } |
84 | } | 80 | } |
85 | 81 | ||
86 | #endif /* CONFIG_SMP */ | ||
87 | |||
88 | /* | 82 | /* |
89 | * Called after updating RLIMIT_CPU to set timer expiration if necessary. | 83 | * Called after updating RLIMIT_CPU to set timer expiration if necessary. |
90 | */ | 84 | */ |
@@ -300,7 +294,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, | |||
300 | cpu->cpu = virt_ticks(p); | 294 | cpu->cpu = virt_ticks(p); |
301 | break; | 295 | break; |
302 | case CPUCLOCK_SCHED: | 296 | case CPUCLOCK_SCHED: |
303 | cpu->sched = task_sched_runtime(p); | 297 | cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p); |
304 | break; | 298 | break; |
305 | } | 299 | } |
306 | return 0; | 300 | return 0; |
@@ -309,16 +303,15 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, | |||
309 | /* | 303 | /* |
310 | * Sample a process (thread group) clock for the given group_leader task. | 304 | * Sample a process (thread group) clock for the given group_leader task. |
311 | * Must be called with tasklist_lock held for reading. | 305 | * Must be called with tasklist_lock held for reading. |
312 | * Must be called with tasklist_lock held for reading, and p->sighand->siglock. | ||
313 | */ | 306 | */ |
314 | static int cpu_clock_sample_group_locked(unsigned int clock_idx, | 307 | static int cpu_clock_sample_group(const clockid_t which_clock, |
315 | struct task_struct *p, | 308 | struct task_struct *p, |
316 | union cpu_time_count *cpu) | 309 | union cpu_time_count *cpu) |
317 | { | 310 | { |
318 | struct task_cputime cputime; | 311 | struct task_cputime cputime; |
319 | 312 | ||
320 | thread_group_cputime(p, &cputime); | 313 | thread_group_cputime(p, &cputime); |
321 | switch (clock_idx) { | 314 | switch (which_clock) { |
322 | default: | 315 | default: |
323 | return -EINVAL; | 316 | return -EINVAL; |
324 | case CPUCLOCK_PROF: | 317 | case CPUCLOCK_PROF: |
@@ -328,29 +321,12 @@ static int cpu_clock_sample_group_locked(unsigned int clock_idx, | |||
328 | cpu->cpu = cputime.utime; | 321 | cpu->cpu = cputime.utime; |
329 | break; | 322 | break; |
330 | case CPUCLOCK_SCHED: | 323 | case CPUCLOCK_SCHED: |
331 | cpu->sched = thread_group_sched_runtime(p); | 324 | cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); |
332 | break; | 325 | break; |
333 | } | 326 | } |
334 | return 0; | 327 | return 0; |
335 | } | 328 | } |
336 | 329 | ||
337 | /* | ||
338 | * Sample a process (thread group) clock for the given group_leader task. | ||
339 | * Must be called with tasklist_lock held for reading. | ||
340 | */ | ||
341 | static int cpu_clock_sample_group(const clockid_t which_clock, | ||
342 | struct task_struct *p, | ||
343 | union cpu_time_count *cpu) | ||
344 | { | ||
345 | int ret; | ||
346 | unsigned long flags; | ||
347 | spin_lock_irqsave(&p->sighand->siglock, flags); | ||
348 | ret = cpu_clock_sample_group_locked(CPUCLOCK_WHICH(which_clock), p, | ||
349 | cpu); | ||
350 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | ||
351 | return ret; | ||
352 | } | ||
353 | |||
354 | 330 | ||
355 | int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) | 331 | int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) |
356 | { | 332 | { |
@@ -1324,29 +1300,37 @@ static inline int task_cputime_expired(const struct task_cputime *sample, | |||
1324 | * fastpath_timer_check - POSIX CPU timers fast path. | 1300 | * fastpath_timer_check - POSIX CPU timers fast path. |
1325 | * | 1301 | * |
1326 | * @tsk: The task (thread) being checked. | 1302 | * @tsk: The task (thread) being checked. |
1327 | * @sig: The signal pointer for that task. | ||
1328 | * | 1303 | * |
1329 | * If there are no timers set return false. Otherwise snapshot the task and | 1304 | * Check the task and thread group timers. If both are zero (there are no |
1330 | * thread group timers, then compare them with the corresponding expiration | 1305 | * timers set) return false. Otherwise snapshot the task and thread group |
1331 | # times. Returns true if a timer has expired, else returns false. | 1306 | * timers and compare them with the corresponding expiration times. Return |
1307 | * true if a timer has expired, else return false. | ||
1332 | */ | 1308 | */ |
1333 | static inline int fastpath_timer_check(struct task_struct *tsk, | 1309 | static inline int fastpath_timer_check(struct task_struct *tsk) |
1334 | struct signal_struct *sig) | ||
1335 | { | 1310 | { |
1336 | struct task_cputime task_sample = { | 1311 | struct signal_struct *sig = tsk->signal; |
1337 | .utime = tsk->utime, | ||
1338 | .stime = tsk->stime, | ||
1339 | .sum_exec_runtime = tsk->se.sum_exec_runtime | ||
1340 | }; | ||
1341 | struct task_cputime group_sample; | ||
1342 | 1312 | ||
1343 | if (task_cputime_zero(&tsk->cputime_expires) && | 1313 | if (unlikely(!sig)) |
1344 | task_cputime_zero(&sig->cputime_expires)) | ||
1345 | return 0; | 1314 | return 0; |
1346 | if (task_cputime_expired(&task_sample, &tsk->cputime_expires)) | 1315 | |
1347 | return 1; | 1316 | if (!task_cputime_zero(&tsk->cputime_expires)) { |
1348 | thread_group_cputime(tsk, &group_sample); | 1317 | struct task_cputime task_sample = { |
1349 | return task_cputime_expired(&group_sample, &sig->cputime_expires); | 1318 | .utime = tsk->utime, |
1319 | .stime = tsk->stime, | ||
1320 | .sum_exec_runtime = tsk->se.sum_exec_runtime | ||
1321 | }; | ||
1322 | |||
1323 | if (task_cputime_expired(&task_sample, &tsk->cputime_expires)) | ||
1324 | return 1; | ||
1325 | } | ||
1326 | if (!task_cputime_zero(&sig->cputime_expires)) { | ||
1327 | struct task_cputime group_sample; | ||
1328 | |||
1329 | thread_group_cputime(tsk, &group_sample); | ||
1330 | if (task_cputime_expired(&group_sample, &sig->cputime_expires)) | ||
1331 | return 1; | ||
1332 | } | ||
1333 | return 0; | ||
1350 | } | 1334 | } |
1351 | 1335 | ||
1352 | /* | 1336 | /* |
@@ -1358,43 +1342,34 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
1358 | { | 1342 | { |
1359 | LIST_HEAD(firing); | 1343 | LIST_HEAD(firing); |
1360 | struct k_itimer *timer, *next; | 1344 | struct k_itimer *timer, *next; |
1361 | struct signal_struct *sig; | ||
1362 | struct sighand_struct *sighand; | ||
1363 | unsigned long flags; | ||
1364 | 1345 | ||
1365 | BUG_ON(!irqs_disabled()); | 1346 | BUG_ON(!irqs_disabled()); |
1366 | 1347 | ||
1367 | /* Pick up tsk->signal and make sure it's valid. */ | ||
1368 | sig = tsk->signal; | ||
1369 | /* | 1348 | /* |
1370 | * The fast path checks that there are no expired thread or thread | 1349 | * The fast path checks that there are no expired thread or thread |
1371 | * group timers. If that's so, just return. Also check that | 1350 | * group timers. If that's so, just return. |
1372 | * tsk->signal is non-NULL; this probably can't happen but cover the | ||
1373 | * possibility anyway. | ||
1374 | */ | 1351 | */ |
1375 | if (unlikely(!sig) || !fastpath_timer_check(tsk, sig)) | 1352 | if (!fastpath_timer_check(tsk)) |
1376 | return; | 1353 | return; |
1377 | 1354 | ||
1378 | sighand = lock_task_sighand(tsk, &flags); | 1355 | spin_lock(&tsk->sighand->siglock); |
1379 | if (likely(sighand)) { | 1356 | /* |
1380 | /* | 1357 | * Here we take off tsk->signal->cpu_timers[N] and |
1381 | * Here we take off tsk->signal->cpu_timers[N] and | 1358 | * tsk->cpu_timers[N] all the timers that are firing, and |
1382 | * tsk->cpu_timers[N] all the timers that are firing, and | 1359 | * put them on the firing list. |
1383 | * put them on the firing list. | 1360 | */ |
1384 | */ | 1361 | check_thread_timers(tsk, &firing); |
1385 | check_thread_timers(tsk, &firing); | 1362 | check_process_timers(tsk, &firing); |
1386 | check_process_timers(tsk, &firing); | ||
1387 | 1363 | ||
1388 | /* | 1364 | /* |
1389 | * We must release these locks before taking any timer's lock. | 1365 | * We must release these locks before taking any timer's lock. |
1390 | * There is a potential race with timer deletion here, as the | 1366 | * There is a potential race with timer deletion here, as the |
1391 | * siglock now protects our private firing list. We have set | 1367 | * siglock now protects our private firing list. We have set |
1392 | * the firing flag in each timer, so that a deletion attempt | 1368 | * the firing flag in each timer, so that a deletion attempt |
1393 | * that gets the timer lock before we do will give it up and | 1369 | * that gets the timer lock before we do will give it up and |
1394 | * spin until we've taken care of that timer below. | 1370 | * spin until we've taken care of that timer below. |
1395 | */ | 1371 | */ |
1396 | } | 1372 | spin_unlock(&tsk->sighand->siglock); |
1397 | unlock_task_sighand(tsk, &flags); | ||
1398 | 1373 | ||
1399 | /* | 1374 | /* |
1400 | * Now that all the timers on our list have the firing flag, | 1375 | * Now that all the timers on our list have the firing flag, |
@@ -1433,7 +1408,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | |||
1433 | struct list_head *head; | 1408 | struct list_head *head; |
1434 | 1409 | ||
1435 | BUG_ON(clock_idx == CPUCLOCK_SCHED); | 1410 | BUG_ON(clock_idx == CPUCLOCK_SCHED); |
1436 | cpu_clock_sample_group_locked(clock_idx, tsk, &now); | 1411 | cpu_clock_sample_group(clock_idx, tsk, &now); |
1437 | 1412 | ||
1438 | if (oldval) { | 1413 | if (oldval) { |
1439 | if (!cputime_eq(*oldval, cputime_zero)) { | 1414 | if (!cputime_eq(*oldval, cputime_zero)) { |