diff options
37 files changed, 903 insertions, 721 deletions
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt index 49378a9f2b5f..10a0263ebb3f 100644 --- a/Documentation/sysrq.txt +++ b/Documentation/sysrq.txt | |||
@@ -95,8 +95,9 @@ On all - write a character to /proc/sysrq-trigger. e.g.: | |||
95 | 95 | ||
96 | 'p' - Will dump the current registers and flags to your console. | 96 | 'p' - Will dump the current registers and flags to your console. |
97 | 97 | ||
98 | 'q' - Will dump a list of all running hrtimers. | 98 | 'q' - Will dump per CPU lists of all armed hrtimers (but NOT regular |
99 | WARNING: Does not cover any other timers | 99 | timer_list timers) and detailed information about all |
100 | clockevent devices. | ||
100 | 101 | ||
101 | 'r' - Turns off keyboard raw mode and sets it to XLATE. | 102 | 'r' - Turns off keyboard raw mode and sets it to XLATE. |
102 | 103 | ||
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index d0c0d64ed366..ce0d9da52a8a 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c | |||
@@ -168,7 +168,7 @@ static void sysrq_handle_show_timers(int key, struct tty_struct *tty) | |||
168 | static struct sysrq_key_op sysrq_show_timers_op = { | 168 | static struct sysrq_key_op sysrq_show_timers_op = { |
169 | .handler = sysrq_handle_show_timers, | 169 | .handler = sysrq_handle_show_timers, |
170 | .help_msg = "show-all-timers(Q)", | 170 | .help_msg = "show-all-timers(Q)", |
171 | .action_msg = "Show pending hrtimers (no others)", | 171 | .action_msg = "Show clockevent devices & pending hrtimers (no others)", |
172 | }; | 172 | }; |
173 | 173 | ||
174 | static void sysrq_handle_mountro(int key, struct tty_struct *tty) | 174 | static void sysrq_handle_mountro(int key, struct tty_struct *tty) |
diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c index 71d2ac4e3f46..c20171078d1d 100644 --- a/drivers/clocksource/acpi_pm.c +++ b/drivers/clocksource/acpi_pm.c | |||
@@ -237,9 +237,12 @@ static int __init parse_pmtmr(char *arg) | |||
237 | 237 | ||
238 | if (strict_strtoul(arg, 16, &base)) | 238 | if (strict_strtoul(arg, 16, &base)) |
239 | return -EINVAL; | 239 | return -EINVAL; |
240 | 240 | #ifdef CONFIG_X86_64 | |
241 | if (base > UINT_MAX) | ||
242 | return -ERANGE; | ||
243 | #endif | ||
241 | printk(KERN_INFO "PMTMR IOPort override: 0x%04x -> 0x%04lx\n", | 244 | printk(KERN_INFO "PMTMR IOPort override: 0x%04x -> 0x%04lx\n", |
242 | (unsigned int)pmtmr_ioport, base); | 245 | pmtmr_ioport, base); |
243 | pmtmr_ioport = base; | 246 | pmtmr_ioport = base; |
244 | 247 | ||
245 | return 1; | 248 | return 1; |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index e2159063198a..8fcfa398d350 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -1341,20 +1341,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus, | |||
1341 | prstatus->pr_pgrp = task_pgrp_vnr(p); | 1341 | prstatus->pr_pgrp = task_pgrp_vnr(p); |
1342 | prstatus->pr_sid = task_session_vnr(p); | 1342 | prstatus->pr_sid = task_session_vnr(p); |
1343 | if (thread_group_leader(p)) { | 1343 | if (thread_group_leader(p)) { |
1344 | struct task_cputime cputime; | ||
1345 | |||
1344 | /* | 1346 | /* |
1345 | * This is the record for the group leader. Add in the | 1347 | * This is the record for the group leader. It shows the |
1346 | * cumulative times of previous dead threads. This total | 1348 | * group-wide total, not its individual thread total. |
1347 | * won't include the time of each live thread whose state | ||
1348 | * is included in the core dump. The final total reported | ||
1349 | * to our parent process when it calls wait4 will include | ||
1350 | * those sums as well as the little bit more time it takes | ||
1351 | * this and each other thread to finish dying after the | ||
1352 | * core dump synchronization phase. | ||
1353 | */ | 1349 | */ |
1354 | cputime_to_timeval(cputime_add(p->utime, p->signal->utime), | 1350 | thread_group_cputime(p, &cputime); |
1355 | &prstatus->pr_utime); | 1351 | cputime_to_timeval(cputime.utime, &prstatus->pr_utime); |
1356 | cputime_to_timeval(cputime_add(p->stime, p->signal->stime), | 1352 | cputime_to_timeval(cputime.stime, &prstatus->pr_stime); |
1357 | &prstatus->pr_stime); | ||
1358 | } else { | 1353 | } else { |
1359 | cputime_to_timeval(p->utime, &prstatus->pr_utime); | 1354 | cputime_to_timeval(p->utime, &prstatus->pr_utime); |
1360 | cputime_to_timeval(p->stime, &prstatus->pr_stime); | 1355 | cputime_to_timeval(p->stime, &prstatus->pr_stime); |
diff --git a/fs/proc/array.c b/fs/proc/array.c index f4bc0e789539..bb9f4b05703d 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -388,20 +388,20 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
388 | 388 | ||
389 | /* add up live thread stats at the group level */ | 389 | /* add up live thread stats at the group level */ |
390 | if (whole) { | 390 | if (whole) { |
391 | struct task_cputime cputime; | ||
391 | struct task_struct *t = task; | 392 | struct task_struct *t = task; |
392 | do { | 393 | do { |
393 | min_flt += t->min_flt; | 394 | min_flt += t->min_flt; |
394 | maj_flt += t->maj_flt; | 395 | maj_flt += t->maj_flt; |
395 | utime = cputime_add(utime, task_utime(t)); | ||
396 | stime = cputime_add(stime, task_stime(t)); | ||
397 | gtime = cputime_add(gtime, task_gtime(t)); | 396 | gtime = cputime_add(gtime, task_gtime(t)); |
398 | t = next_thread(t); | 397 | t = next_thread(t); |
399 | } while (t != task); | 398 | } while (t != task); |
400 | 399 | ||
401 | min_flt += sig->min_flt; | 400 | min_flt += sig->min_flt; |
402 | maj_flt += sig->maj_flt; | 401 | maj_flt += sig->maj_flt; |
403 | utime = cputime_add(utime, sig->utime); | 402 | thread_group_cputime(task, &cputime); |
404 | stime = cputime_add(stime, sig->stime); | 403 | utime = cputime.utime; |
404 | stime = cputime.stime; | ||
405 | gtime = cputime_add(gtime, sig->gtime); | 405 | gtime = cputime_add(gtime, sig->gtime); |
406 | } | 406 | } |
407 | 407 | ||
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 55e434feec99..f88d32f8ff7c 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h | |||
@@ -45,7 +45,8 @@ struct clocksource; | |||
45 | * @read: returns a cycle value | 45 | * @read: returns a cycle value |
46 | * @mask: bitmask for two's complement | 46 | * @mask: bitmask for two's complement |
47 | * subtraction of non 64 bit counters | 47 | * subtraction of non 64 bit counters |
48 | * @mult: cycle to nanosecond multiplier | 48 | * @mult: cycle to nanosecond multiplier (adjusted by NTP) |
49 | * @mult_orig: cycle to nanosecond multiplier (unadjusted by NTP) | ||
49 | * @shift: cycle to nanosecond divisor (power of two) | 50 | * @shift: cycle to nanosecond divisor (power of two) |
50 | * @flags: flags describing special properties | 51 | * @flags: flags describing special properties |
51 | * @vread: vsyscall based read | 52 | * @vread: vsyscall based read |
@@ -63,6 +64,7 @@ struct clocksource { | |||
63 | cycle_t (*read)(void); | 64 | cycle_t (*read)(void); |
64 | cycle_t mask; | 65 | cycle_t mask; |
65 | u32 mult; | 66 | u32 mult; |
67 | u32 mult_orig; | ||
66 | u32 shift; | 68 | u32 shift; |
67 | unsigned long flags; | 69 | unsigned long flags; |
68 | cycle_t (*vread)(void); | 70 | cycle_t (*vread)(void); |
@@ -77,6 +79,7 @@ struct clocksource { | |||
77 | /* timekeeping specific data, ignore */ | 79 | /* timekeeping specific data, ignore */ |
78 | cycle_t cycle_interval; | 80 | cycle_t cycle_interval; |
79 | u64 xtime_interval; | 81 | u64 xtime_interval; |
82 | u32 raw_interval; | ||
80 | /* | 83 | /* |
81 | * Second part is written at each timer interrupt | 84 | * Second part is written at each timer interrupt |
82 | * Keep it in a different cache line to dirty no | 85 | * Keep it in a different cache line to dirty no |
@@ -85,6 +88,7 @@ struct clocksource { | |||
85 | cycle_t cycle_last ____cacheline_aligned_in_smp; | 88 | cycle_t cycle_last ____cacheline_aligned_in_smp; |
86 | u64 xtime_nsec; | 89 | u64 xtime_nsec; |
87 | s64 error; | 90 | s64 error; |
91 | struct timespec raw_time; | ||
88 | 92 | ||
89 | #ifdef CONFIG_CLOCKSOURCE_WATCHDOG | 93 | #ifdef CONFIG_CLOCKSOURCE_WATCHDOG |
90 | /* Watchdog related data, used by the framework */ | 94 | /* Watchdog related data, used by the framework */ |
@@ -201,17 +205,19 @@ static inline void clocksource_calculate_interval(struct clocksource *c, | |||
201 | { | 205 | { |
202 | u64 tmp; | 206 | u64 tmp; |
203 | 207 | ||
204 | /* XXX - All of this could use a whole lot of optimization */ | 208 | /* Do the ns -> cycle conversion first, using original mult */ |
205 | tmp = length_nsec; | 209 | tmp = length_nsec; |
206 | tmp <<= c->shift; | 210 | tmp <<= c->shift; |
207 | tmp += c->mult/2; | 211 | tmp += c->mult_orig/2; |
208 | do_div(tmp, c->mult); | 212 | do_div(tmp, c->mult_orig); |
209 | 213 | ||
210 | c->cycle_interval = (cycle_t)tmp; | 214 | c->cycle_interval = (cycle_t)tmp; |
211 | if (c->cycle_interval == 0) | 215 | if (c->cycle_interval == 0) |
212 | c->cycle_interval = 1; | 216 | c->cycle_interval = 1; |
213 | 217 | ||
218 | /* Go back from cycles -> shifted ns, this time use ntp adjused mult */ | ||
214 | c->xtime_interval = (u64)c->cycle_interval * c->mult; | 219 | c->xtime_interval = (u64)c->cycle_interval * c->mult; |
220 | c->raw_interval = ((u64)c->cycle_interval * c->mult_orig) >> c->shift; | ||
215 | } | 221 | } |
216 | 222 | ||
217 | 223 | ||
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 2f245fe63bda..9a4e35cd5f79 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h | |||
@@ -125,12 +125,12 @@ struct hrtimer { | |||
125 | enum hrtimer_restart (*function)(struct hrtimer *); | 125 | enum hrtimer_restart (*function)(struct hrtimer *); |
126 | struct hrtimer_clock_base *base; | 126 | struct hrtimer_clock_base *base; |
127 | unsigned long state; | 127 | unsigned long state; |
128 | enum hrtimer_cb_mode cb_mode; | ||
129 | struct list_head cb_entry; | 128 | struct list_head cb_entry; |
129 | enum hrtimer_cb_mode cb_mode; | ||
130 | #ifdef CONFIG_TIMER_STATS | 130 | #ifdef CONFIG_TIMER_STATS |
131 | int start_pid; | ||
131 | void *start_site; | 132 | void *start_site; |
132 | char start_comm[16]; | 133 | char start_comm[16]; |
133 | int start_pid; | ||
134 | #endif | 134 | #endif |
135 | }; | 135 | }; |
136 | 136 | ||
@@ -155,10 +155,8 @@ struct hrtimer_sleeper { | |||
155 | * @first: pointer to the timer node which expires first | 155 | * @first: pointer to the timer node which expires first |
156 | * @resolution: the resolution of the clock, in nanoseconds | 156 | * @resolution: the resolution of the clock, in nanoseconds |
157 | * @get_time: function to retrieve the current time of the clock | 157 | * @get_time: function to retrieve the current time of the clock |
158 | * @get_softirq_time: function to retrieve the current time from the softirq | ||
159 | * @softirq_time: the time when running the hrtimer queue in the softirq | 158 | * @softirq_time: the time when running the hrtimer queue in the softirq |
160 | * @offset: offset of this clock to the monotonic base | 159 | * @offset: offset of this clock to the monotonic base |
161 | * @reprogram: function to reprogram the timer event | ||
162 | */ | 160 | */ |
163 | struct hrtimer_clock_base { | 161 | struct hrtimer_clock_base { |
164 | struct hrtimer_cpu_base *cpu_base; | 162 | struct hrtimer_cpu_base *cpu_base; |
@@ -167,13 +165,9 @@ struct hrtimer_clock_base { | |||
167 | struct rb_node *first; | 165 | struct rb_node *first; |
168 | ktime_t resolution; | 166 | ktime_t resolution; |
169 | ktime_t (*get_time)(void); | 167 | ktime_t (*get_time)(void); |
170 | ktime_t (*get_softirq_time)(void); | ||
171 | ktime_t softirq_time; | 168 | ktime_t softirq_time; |
172 | #ifdef CONFIG_HIGH_RES_TIMERS | 169 | #ifdef CONFIG_HIGH_RES_TIMERS |
173 | ktime_t offset; | 170 | ktime_t offset; |
174 | int (*reprogram)(struct hrtimer *t, | ||
175 | struct hrtimer_clock_base *b, | ||
176 | ktime_t n); | ||
177 | #endif | 171 | #endif |
178 | }; | 172 | }; |
179 | 173 | ||
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index cf9f40a91c9c..cac3750cd65e 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h | |||
@@ -52,6 +52,7 @@ static inline int kstat_irqs(int irq) | |||
52 | return sum; | 52 | return sum; |
53 | } | 53 | } |
54 | 54 | ||
55 | extern unsigned long long task_delta_exec(struct task_struct *); | ||
55 | extern void account_user_time(struct task_struct *, cputime_t); | 56 | extern void account_user_time(struct task_struct *, cputime_t); |
56 | extern void account_user_time_scaled(struct task_struct *, cputime_t); | 57 | extern void account_user_time_scaled(struct task_struct *, cputime_t); |
57 | extern void account_system_time(struct task_struct *, int, cputime_t); | 58 | extern void account_system_time(struct task_struct *, int, cputime_t); |
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index a7dd38f30ade..a7c721355549 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h | |||
@@ -45,8 +45,6 @@ struct k_itimer { | |||
45 | int it_requeue_pending; /* waiting to requeue this timer */ | 45 | int it_requeue_pending; /* waiting to requeue this timer */ |
46 | #define REQUEUE_PENDING 1 | 46 | #define REQUEUE_PENDING 1 |
47 | int it_sigev_notify; /* notify word of sigevent struct */ | 47 | int it_sigev_notify; /* notify word of sigevent struct */ |
48 | int it_sigev_signo; /* signo word of sigevent struct */ | ||
49 | sigval_t it_sigev_value; /* value word of sigevent struct */ | ||
50 | struct task_struct *it_process; /* process to send signal to */ | 48 | struct task_struct *it_process; /* process to send signal to */ |
51 | struct sigqueue *sigq; /* signal queue entry. */ | 49 | struct sigqueue *sigq; /* signal queue entry. */ |
52 | union { | 50 | union { |
@@ -115,4 +113,6 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, | |||
115 | 113 | ||
116 | long clock_nanosleep_restart(struct restart_block *restart_block); | 114 | long clock_nanosleep_restart(struct restart_block *restart_block); |
117 | 115 | ||
116 | void update_rlimit_cpu(unsigned long rlim_new); | ||
117 | |||
118 | #endif | 118 | #endif |
diff --git a/include/linux/sched.h b/include/linux/sched.h index f52dbd3587a7..5c38db536e07 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -434,6 +434,39 @@ struct pacct_struct { | |||
434 | unsigned long ac_minflt, ac_majflt; | 434 | unsigned long ac_minflt, ac_majflt; |
435 | }; | 435 | }; |
436 | 436 | ||
437 | /** | ||
438 | * struct task_cputime - collected CPU time counts | ||
439 | * @utime: time spent in user mode, in &cputime_t units | ||
440 | * @stime: time spent in kernel mode, in &cputime_t units | ||
441 | * @sum_exec_runtime: total time spent on the CPU, in nanoseconds | ||
442 | * | ||
443 | * This structure groups together three kinds of CPU time that are | ||
444 | * tracked for threads and thread groups. Most things considering | ||
445 | * CPU time want to group these counts together and treat all three | ||
446 | * of them in parallel. | ||
447 | */ | ||
448 | struct task_cputime { | ||
449 | cputime_t utime; | ||
450 | cputime_t stime; | ||
451 | unsigned long long sum_exec_runtime; | ||
452 | }; | ||
453 | /* Alternate field names when used to cache expirations. */ | ||
454 | #define prof_exp stime | ||
455 | #define virt_exp utime | ||
456 | #define sched_exp sum_exec_runtime | ||
457 | |||
458 | /** | ||
459 | * struct thread_group_cputime - thread group interval timer counts | ||
460 | * @totals: thread group interval timers; substructure for | ||
461 | * uniprocessor kernel, per-cpu for SMP kernel. | ||
462 | * | ||
463 | * This structure contains the version of task_cputime, above, that is | ||
464 | * used for thread group CPU clock calculations. | ||
465 | */ | ||
466 | struct thread_group_cputime { | ||
467 | struct task_cputime *totals; | ||
468 | }; | ||
469 | |||
437 | /* | 470 | /* |
438 | * NOTE! "signal_struct" does not have it's own | 471 | * NOTE! "signal_struct" does not have it's own |
439 | * locking, because a shared signal_struct always | 472 | * locking, because a shared signal_struct always |
@@ -479,6 +512,17 @@ struct signal_struct { | |||
479 | cputime_t it_prof_expires, it_virt_expires; | 512 | cputime_t it_prof_expires, it_virt_expires; |
480 | cputime_t it_prof_incr, it_virt_incr; | 513 | cputime_t it_prof_incr, it_virt_incr; |
481 | 514 | ||
515 | /* | ||
516 | * Thread group totals for process CPU clocks. | ||
517 | * See thread_group_cputime(), et al, for details. | ||
518 | */ | ||
519 | struct thread_group_cputime cputime; | ||
520 | |||
521 | /* Earliest-expiration cache. */ | ||
522 | struct task_cputime cputime_expires; | ||
523 | |||
524 | struct list_head cpu_timers[3]; | ||
525 | |||
482 | /* job control IDs */ | 526 | /* job control IDs */ |
483 | 527 | ||
484 | /* | 528 | /* |
@@ -509,7 +553,7 @@ struct signal_struct { | |||
509 | * Live threads maintain their own counters and add to these | 553 | * Live threads maintain their own counters and add to these |
510 | * in __exit_signal, except for the group leader. | 554 | * in __exit_signal, except for the group leader. |
511 | */ | 555 | */ |
512 | cputime_t utime, stime, cutime, cstime; | 556 | cputime_t cutime, cstime; |
513 | cputime_t gtime; | 557 | cputime_t gtime; |
514 | cputime_t cgtime; | 558 | cputime_t cgtime; |
515 | unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; | 559 | unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; |
@@ -518,14 +562,6 @@ struct signal_struct { | |||
518 | struct task_io_accounting ioac; | 562 | struct task_io_accounting ioac; |
519 | 563 | ||
520 | /* | 564 | /* |
521 | * Cumulative ns of scheduled CPU time for dead threads in the | ||
522 | * group, not including a zombie group leader. (This only differs | ||
523 | * from jiffies_to_ns(utime + stime) if sched_clock uses something | ||
524 | * other than jiffies.) | ||
525 | */ | ||
526 | unsigned long long sum_sched_runtime; | ||
527 | |||
528 | /* | ||
529 | * We don't bother to synchronize most readers of this at all, | 565 | * We don't bother to synchronize most readers of this at all, |
530 | * because there is no reader checking a limit that actually needs | 566 | * because there is no reader checking a limit that actually needs |
531 | * to get both rlim_cur and rlim_max atomically, and either one | 567 | * to get both rlim_cur and rlim_max atomically, and either one |
@@ -536,8 +572,6 @@ struct signal_struct { | |||
536 | */ | 572 | */ |
537 | struct rlimit rlim[RLIM_NLIMITS]; | 573 | struct rlimit rlim[RLIM_NLIMITS]; |
538 | 574 | ||
539 | struct list_head cpu_timers[3]; | ||
540 | |||
541 | /* keep the process-shared keyrings here so that they do the right | 575 | /* keep the process-shared keyrings here so that they do the right |
542 | * thing in threads created with CLONE_THREAD */ | 576 | * thing in threads created with CLONE_THREAD */ |
543 | #ifdef CONFIG_KEYS | 577 | #ifdef CONFIG_KEYS |
@@ -1146,8 +1180,7 @@ struct task_struct { | |||
1146 | /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ | 1180 | /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ |
1147 | unsigned long min_flt, maj_flt; | 1181 | unsigned long min_flt, maj_flt; |
1148 | 1182 | ||
1149 | cputime_t it_prof_expires, it_virt_expires; | 1183 | struct task_cputime cputime_expires; |
1150 | unsigned long long it_sched_expires; | ||
1151 | struct list_head cpu_timers[3]; | 1184 | struct list_head cpu_timers[3]; |
1152 | 1185 | ||
1153 | /* process credentials */ | 1186 | /* process credentials */ |
@@ -1597,6 +1630,7 @@ extern unsigned long long cpu_clock(int cpu); | |||
1597 | 1630 | ||
1598 | extern unsigned long long | 1631 | extern unsigned long long |
1599 | task_sched_runtime(struct task_struct *task); | 1632 | task_sched_runtime(struct task_struct *task); |
1633 | extern unsigned long long thread_group_sched_runtime(struct task_struct *task); | ||
1600 | 1634 | ||
1601 | /* sched_exec is called by processes performing an exec */ | 1635 | /* sched_exec is called by processes performing an exec */ |
1602 | #ifdef CONFIG_SMP | 1636 | #ifdef CONFIG_SMP |
@@ -2094,6 +2128,30 @@ static inline int spin_needbreak(spinlock_t *lock) | |||
2094 | } | 2128 | } |
2095 | 2129 | ||
2096 | /* | 2130 | /* |
2131 | * Thread group CPU time accounting. | ||
2132 | */ | ||
2133 | |||
2134 | extern int thread_group_cputime_alloc(struct task_struct *); | ||
2135 | extern void thread_group_cputime(struct task_struct *, struct task_cputime *); | ||
2136 | |||
2137 | static inline void thread_group_cputime_init(struct signal_struct *sig) | ||
2138 | { | ||
2139 | sig->cputime.totals = NULL; | ||
2140 | } | ||
2141 | |||
2142 | static inline int thread_group_cputime_clone_thread(struct task_struct *curr) | ||
2143 | { | ||
2144 | if (curr->signal->cputime.totals) | ||
2145 | return 0; | ||
2146 | return thread_group_cputime_alloc(curr); | ||
2147 | } | ||
2148 | |||
2149 | static inline void thread_group_cputime_free(struct signal_struct *sig) | ||
2150 | { | ||
2151 | free_percpu(sig->cputime.totals); | ||
2152 | } | ||
2153 | |||
2154 | /* | ||
2097 | * Reevaluate whether the task has signals pending delivery. | 2155 | * Reevaluate whether the task has signals pending delivery. |
2098 | * Wake the task if so. | 2156 | * Wake the task if so. |
2099 | * This is required every time the blocked sigset_t changes. | 2157 | * This is required every time the blocked sigset_t changes. |
diff --git a/include/linux/tick.h b/include/linux/tick.h index 98921a3e1aa8..b6ec8189ac0c 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h | |||
@@ -96,9 +96,11 @@ extern cpumask_t *tick_get_broadcast_oneshot_mask(void); | |||
96 | extern void tick_clock_notify(void); | 96 | extern void tick_clock_notify(void); |
97 | extern int tick_check_oneshot_change(int allow_nohz); | 97 | extern int tick_check_oneshot_change(int allow_nohz); |
98 | extern struct tick_sched *tick_get_tick_sched(int cpu); | 98 | extern struct tick_sched *tick_get_tick_sched(int cpu); |
99 | extern void tick_check_idle(int cpu); | ||
99 | # else | 100 | # else |
100 | static inline void tick_clock_notify(void) { } | 101 | static inline void tick_clock_notify(void) { } |
101 | static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } | 102 | static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } |
103 | static inline void tick_check_idle(int cpu) { } | ||
102 | # endif | 104 | # endif |
103 | 105 | ||
104 | #else /* CONFIG_GENERIC_CLOCKEVENTS */ | 106 | #else /* CONFIG_GENERIC_CLOCKEVENTS */ |
@@ -106,26 +108,23 @@ static inline void tick_init(void) { } | |||
106 | static inline void tick_cancel_sched_timer(int cpu) { } | 108 | static inline void tick_cancel_sched_timer(int cpu) { } |
107 | static inline void tick_clock_notify(void) { } | 109 | static inline void tick_clock_notify(void) { } |
108 | static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } | 110 | static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } |
111 | static inline void tick_check_idle(int cpu) { } | ||
109 | #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ | 112 | #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ |
110 | 113 | ||
111 | # ifdef CONFIG_NO_HZ | 114 | # ifdef CONFIG_NO_HZ |
112 | extern void tick_nohz_stop_sched_tick(int inidle); | 115 | extern void tick_nohz_stop_sched_tick(int inidle); |
113 | extern void tick_nohz_restart_sched_tick(void); | 116 | extern void tick_nohz_restart_sched_tick(void); |
114 | extern void tick_nohz_update_jiffies(void); | ||
115 | extern ktime_t tick_nohz_get_sleep_length(void); | 117 | extern ktime_t tick_nohz_get_sleep_length(void); |
116 | extern void tick_nohz_stop_idle(int cpu); | ||
117 | extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); | 118 | extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); |
118 | # else | 119 | # else |
119 | static inline void tick_nohz_stop_sched_tick(int inidle) { } | 120 | static inline void tick_nohz_stop_sched_tick(int inidle) { } |
120 | static inline void tick_nohz_restart_sched_tick(void) { } | 121 | static inline void tick_nohz_restart_sched_tick(void) { } |
121 | static inline void tick_nohz_update_jiffies(void) { } | ||
122 | static inline ktime_t tick_nohz_get_sleep_length(void) | 122 | static inline ktime_t tick_nohz_get_sleep_length(void) |
123 | { | 123 | { |
124 | ktime_t len = { .tv64 = NSEC_PER_SEC/HZ }; | 124 | ktime_t len = { .tv64 = NSEC_PER_SEC/HZ }; |
125 | 125 | ||
126 | return len; | 126 | return len; |
127 | } | 127 | } |
128 | static inline void tick_nohz_stop_idle(int cpu) { } | ||
129 | static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } | 128 | static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } |
130 | # endif /* !NO_HZ */ | 129 | # endif /* !NO_HZ */ |
131 | 130 | ||
diff --git a/include/linux/time.h b/include/linux/time.h index 51e883df0fa5..4f1c9db57707 100644 --- a/include/linux/time.h +++ b/include/linux/time.h | |||
@@ -119,6 +119,7 @@ extern int do_setitimer(int which, struct itimerval *value, | |||
119 | extern unsigned int alarm_setitimer(unsigned int seconds); | 119 | extern unsigned int alarm_setitimer(unsigned int seconds); |
120 | extern int do_getitimer(int which, struct itimerval *value); | 120 | extern int do_getitimer(int which, struct itimerval *value); |
121 | extern void getnstimeofday(struct timespec *tv); | 121 | extern void getnstimeofday(struct timespec *tv); |
122 | extern void getrawmonotonic(struct timespec *ts); | ||
122 | extern void getboottime(struct timespec *ts); | 123 | extern void getboottime(struct timespec *ts); |
123 | extern void monotonic_to_bootbased(struct timespec *ts); | 124 | extern void monotonic_to_bootbased(struct timespec *ts); |
124 | 125 | ||
@@ -127,6 +128,9 @@ extern int timekeeping_valid_for_hres(void); | |||
127 | extern void update_wall_time(void); | 128 | extern void update_wall_time(void); |
128 | extern void update_xtime_cache(u64 nsec); | 129 | extern void update_xtime_cache(u64 nsec); |
129 | 130 | ||
131 | struct tms; | ||
132 | extern void do_sys_times(struct tms *); | ||
133 | |||
130 | /** | 134 | /** |
131 | * timespec_to_ns - Convert timespec to nanoseconds | 135 | * timespec_to_ns - Convert timespec to nanoseconds |
132 | * @ts: pointer to the timespec variable to be converted | 136 | * @ts: pointer to the timespec variable to be converted |
@@ -216,6 +220,7 @@ struct itimerval { | |||
216 | #define CLOCK_MONOTONIC 1 | 220 | #define CLOCK_MONOTONIC 1 |
217 | #define CLOCK_PROCESS_CPUTIME_ID 2 | 221 | #define CLOCK_PROCESS_CPUTIME_ID 2 |
218 | #define CLOCK_THREAD_CPUTIME_ID 3 | 222 | #define CLOCK_THREAD_CPUTIME_ID 3 |
223 | #define CLOCK_MONOTONIC_RAW 4 | ||
219 | 224 | ||
220 | /* | 225 | /* |
221 | * The IDs of various hardware clocks: | 226 | * The IDs of various hardware clocks: |
diff --git a/include/linux/timex.h b/include/linux/timex.h index fc6035d29d56..9007313b5b71 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h | |||
@@ -82,7 +82,7 @@ | |||
82 | */ | 82 | */ |
83 | #define SHIFT_USEC 16 /* frequency offset scale (shift) */ | 83 | #define SHIFT_USEC 16 /* frequency offset scale (shift) */ |
84 | #define PPM_SCALE (NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC)) | 84 | #define PPM_SCALE (NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC)) |
85 | #define PPM_SCALE_INV_SHIFT 20 | 85 | #define PPM_SCALE_INV_SHIFT 19 |
86 | #define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \ | 86 | #define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \ |
87 | PPM_SCALE + 1) | 87 | PPM_SCALE + 1) |
88 | 88 | ||
@@ -141,8 +141,15 @@ struct timex { | |||
141 | #define ADJ_MICRO 0x1000 /* select microsecond resolution */ | 141 | #define ADJ_MICRO 0x1000 /* select microsecond resolution */ |
142 | #define ADJ_NANO 0x2000 /* select nanosecond resolution */ | 142 | #define ADJ_NANO 0x2000 /* select nanosecond resolution */ |
143 | #define ADJ_TICK 0x4000 /* tick value */ | 143 | #define ADJ_TICK 0x4000 /* tick value */ |
144 | |||
145 | #ifdef __KERNEL__ | ||
146 | #define ADJ_ADJTIME 0x8000 /* switch between adjtime/adjtimex modes */ | ||
147 | #define ADJ_OFFSET_SINGLESHOT 0x0001 /* old-fashioned adjtime */ | ||
148 | #define ADJ_OFFSET_READONLY 0x2000 /* read-only adjtime */ | ||
149 | #else | ||
144 | #define ADJ_OFFSET_SINGLESHOT 0x8001 /* old-fashioned adjtime */ | 150 | #define ADJ_OFFSET_SINGLESHOT 0x8001 /* old-fashioned adjtime */ |
145 | #define ADJ_OFFSET_SS_READ 0xa001 /* read-only adjtime */ | 151 | #define ADJ_OFFSET_SS_READ 0xa001 /* read-only adjtime */ |
152 | #endif | ||
146 | 153 | ||
147 | /* xntp 3.4 compatibility names */ | 154 | /* xntp 3.4 compatibility names */ |
148 | #define MOD_OFFSET ADJ_OFFSET | 155 | #define MOD_OFFSET ADJ_OFFSET |
diff --git a/kernel/compat.c b/kernel/compat.c index 143990e48cb9..8eafe3eb50d9 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/timex.h> | 23 | #include <linux/timex.h> |
24 | #include <linux/migrate.h> | 24 | #include <linux/migrate.h> |
25 | #include <linux/posix-timers.h> | 25 | #include <linux/posix-timers.h> |
26 | #include <linux/times.h> | ||
26 | 27 | ||
27 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
28 | 29 | ||
@@ -208,49 +209,23 @@ asmlinkage long compat_sys_setitimer(int which, | |||
208 | return 0; | 209 | return 0; |
209 | } | 210 | } |
210 | 211 | ||
212 | static compat_clock_t clock_t_to_compat_clock_t(clock_t x) | ||
213 | { | ||
214 | return compat_jiffies_to_clock_t(clock_t_to_jiffies(x)); | ||
215 | } | ||
216 | |||
211 | asmlinkage long compat_sys_times(struct compat_tms __user *tbuf) | 217 | asmlinkage long compat_sys_times(struct compat_tms __user *tbuf) |
212 | { | 218 | { |
213 | /* | ||
214 | * In the SMP world we might just be unlucky and have one of | ||
215 | * the times increment as we use it. Since the value is an | ||
216 | * atomically safe type this is just fine. Conceptually its | ||
217 | * as if the syscall took an instant longer to occur. | ||
218 | */ | ||
219 | if (tbuf) { | 219 | if (tbuf) { |
220 | struct tms tms; | ||
220 | struct compat_tms tmp; | 221 | struct compat_tms tmp; |
221 | struct task_struct *tsk = current; | 222 | |
222 | struct task_struct *t; | 223 | do_sys_times(&tms); |
223 | cputime_t utime, stime, cutime, cstime; | 224 | /* Convert our struct tms to the compat version. */ |
224 | 225 | tmp.tms_utime = clock_t_to_compat_clock_t(tms.tms_utime); | |
225 | read_lock(&tasklist_lock); | 226 | tmp.tms_stime = clock_t_to_compat_clock_t(tms.tms_stime); |
226 | utime = tsk->signal->utime; | 227 | tmp.tms_cutime = clock_t_to_compat_clock_t(tms.tms_cutime); |
227 | stime = tsk->signal->stime; | 228 | tmp.tms_cstime = clock_t_to_compat_clock_t(tms.tms_cstime); |
228 | t = tsk; | ||
229 | do { | ||
230 | utime = cputime_add(utime, t->utime); | ||
231 | stime = cputime_add(stime, t->stime); | ||
232 | t = next_thread(t); | ||
233 | } while (t != tsk); | ||
234 | |||
235 | /* | ||
236 | * While we have tasklist_lock read-locked, no dying thread | ||
237 | * can be updating current->signal->[us]time. Instead, | ||
238 | * we got their counts included in the live thread loop. | ||
239 | * However, another thread can come in right now and | ||
240 | * do a wait call that updates current->signal->c[us]time. | ||
241 | * To make sure we always see that pair updated atomically, | ||
242 | * we take the siglock around fetching them. | ||
243 | */ | ||
244 | spin_lock_irq(&tsk->sighand->siglock); | ||
245 | cutime = tsk->signal->cutime; | ||
246 | cstime = tsk->signal->cstime; | ||
247 | spin_unlock_irq(&tsk->sighand->siglock); | ||
248 | read_unlock(&tasklist_lock); | ||
249 | |||
250 | tmp.tms_utime = compat_jiffies_to_clock_t(cputime_to_jiffies(utime)); | ||
251 | tmp.tms_stime = compat_jiffies_to_clock_t(cputime_to_jiffies(stime)); | ||
252 | tmp.tms_cutime = compat_jiffies_to_clock_t(cputime_to_jiffies(cutime)); | ||
253 | tmp.tms_cstime = compat_jiffies_to_clock_t(cputime_to_jiffies(cstime)); | ||
254 | if (copy_to_user(tbuf, &tmp, sizeof(tmp))) | 229 | if (copy_to_user(tbuf, &tmp, sizeof(tmp))) |
255 | return -EFAULT; | 230 | return -EFAULT; |
256 | } | 231 | } |
diff --git a/kernel/exit.c b/kernel/exit.c index 0ef4673e351b..059b38cae384 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -112,8 +112,6 @@ static void __exit_signal(struct task_struct *tsk) | |||
112 | * We won't ever get here for the group leader, since it | 112 | * We won't ever get here for the group leader, since it |
113 | * will have been the last reference on the signal_struct. | 113 | * will have been the last reference on the signal_struct. |
114 | */ | 114 | */ |
115 | sig->utime = cputime_add(sig->utime, task_utime(tsk)); | ||
116 | sig->stime = cputime_add(sig->stime, task_stime(tsk)); | ||
117 | sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); | 115 | sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); |
118 | sig->min_flt += tsk->min_flt; | 116 | sig->min_flt += tsk->min_flt; |
119 | sig->maj_flt += tsk->maj_flt; | 117 | sig->maj_flt += tsk->maj_flt; |
@@ -122,7 +120,6 @@ static void __exit_signal(struct task_struct *tsk) | |||
122 | sig->inblock += task_io_get_inblock(tsk); | 120 | sig->inblock += task_io_get_inblock(tsk); |
123 | sig->oublock += task_io_get_oublock(tsk); | 121 | sig->oublock += task_io_get_oublock(tsk); |
124 | task_io_accounting_add(&sig->ioac, &tsk->ioac); | 122 | task_io_accounting_add(&sig->ioac, &tsk->ioac); |
125 | sig->sum_sched_runtime += tsk->se.sum_exec_runtime; | ||
126 | sig = NULL; /* Marker for below. */ | 123 | sig = NULL; /* Marker for below. */ |
127 | } | 124 | } |
128 | 125 | ||
@@ -1301,6 +1298,7 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1301 | if (likely(!traced)) { | 1298 | if (likely(!traced)) { |
1302 | struct signal_struct *psig; | 1299 | struct signal_struct *psig; |
1303 | struct signal_struct *sig; | 1300 | struct signal_struct *sig; |
1301 | struct task_cputime cputime; | ||
1304 | 1302 | ||
1305 | /* | 1303 | /* |
1306 | * The resource counters for the group leader are in its | 1304 | * The resource counters for the group leader are in its |
@@ -1316,20 +1314,23 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1316 | * need to protect the access to p->parent->signal fields, | 1314 | * need to protect the access to p->parent->signal fields, |
1317 | * as other threads in the parent group can be right | 1315 | * as other threads in the parent group can be right |
1318 | * here reaping other children at the same time. | 1316 | * here reaping other children at the same time. |
1317 | * | ||
1318 | * We use thread_group_cputime() to get times for the thread | ||
1319 | * group, which consolidates times for all threads in the | ||
1320 | * group including the group leader. | ||
1319 | */ | 1321 | */ |
1320 | spin_lock_irq(&p->parent->sighand->siglock); | 1322 | spin_lock_irq(&p->parent->sighand->siglock); |
1321 | psig = p->parent->signal; | 1323 | psig = p->parent->signal; |
1322 | sig = p->signal; | 1324 | sig = p->signal; |
1325 | thread_group_cputime(p, &cputime); | ||
1323 | psig->cutime = | 1326 | psig->cutime = |
1324 | cputime_add(psig->cutime, | 1327 | cputime_add(psig->cutime, |
1325 | cputime_add(p->utime, | 1328 | cputime_add(cputime.utime, |
1326 | cputime_add(sig->utime, | 1329 | sig->cutime)); |
1327 | sig->cutime))); | ||
1328 | psig->cstime = | 1330 | psig->cstime = |
1329 | cputime_add(psig->cstime, | 1331 | cputime_add(psig->cstime, |
1330 | cputime_add(p->stime, | 1332 | cputime_add(cputime.stime, |
1331 | cputime_add(sig->stime, | 1333 | sig->cstime)); |
1332 | sig->cstime))); | ||
1333 | psig->cgtime = | 1334 | psig->cgtime = |
1334 | cputime_add(psig->cgtime, | 1335 | cputime_add(psig->cgtime, |
1335 | cputime_add(p->gtime, | 1336 | cputime_add(p->gtime, |
diff --git a/kernel/fork.c b/kernel/fork.c index 30de644a40c4..44e64d7ba29b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -759,15 +759,44 @@ void __cleanup_sighand(struct sighand_struct *sighand) | |||
759 | kmem_cache_free(sighand_cachep, sighand); | 759 | kmem_cache_free(sighand_cachep, sighand); |
760 | } | 760 | } |
761 | 761 | ||
762 | |||
763 | /* | ||
764 | * Initialize POSIX timer handling for a thread group. | ||
765 | */ | ||
766 | static void posix_cpu_timers_init_group(struct signal_struct *sig) | ||
767 | { | ||
768 | /* Thread group counters. */ | ||
769 | thread_group_cputime_init(sig); | ||
770 | |||
771 | /* Expiration times and increments. */ | ||
772 | sig->it_virt_expires = cputime_zero; | ||
773 | sig->it_virt_incr = cputime_zero; | ||
774 | sig->it_prof_expires = cputime_zero; | ||
775 | sig->it_prof_incr = cputime_zero; | ||
776 | |||
777 | /* Cached expiration times. */ | ||
778 | sig->cputime_expires.prof_exp = cputime_zero; | ||
779 | sig->cputime_expires.virt_exp = cputime_zero; | ||
780 | sig->cputime_expires.sched_exp = 0; | ||
781 | |||
782 | /* The timer lists. */ | ||
783 | INIT_LIST_HEAD(&sig->cpu_timers[0]); | ||
784 | INIT_LIST_HEAD(&sig->cpu_timers[1]); | ||
785 | INIT_LIST_HEAD(&sig->cpu_timers[2]); | ||
786 | } | ||
787 | |||
762 | static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | 788 | static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) |
763 | { | 789 | { |
764 | struct signal_struct *sig; | 790 | struct signal_struct *sig; |
765 | int ret; | 791 | int ret; |
766 | 792 | ||
767 | if (clone_flags & CLONE_THREAD) { | 793 | if (clone_flags & CLONE_THREAD) { |
768 | atomic_inc(¤t->signal->count); | 794 | ret = thread_group_cputime_clone_thread(current); |
769 | atomic_inc(¤t->signal->live); | 795 | if (likely(!ret)) { |
770 | return 0; | 796 | atomic_inc(¤t->signal->count); |
797 | atomic_inc(¤t->signal->live); | ||
798 | } | ||
799 | return ret; | ||
771 | } | 800 | } |
772 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); | 801 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); |
773 | tsk->signal = sig; | 802 | tsk->signal = sig; |
@@ -795,40 +824,25 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
795 | sig->it_real_incr.tv64 = 0; | 824 | sig->it_real_incr.tv64 = 0; |
796 | sig->real_timer.function = it_real_fn; | 825 | sig->real_timer.function = it_real_fn; |
797 | 826 | ||
798 | sig->it_virt_expires = cputime_zero; | ||
799 | sig->it_virt_incr = cputime_zero; | ||
800 | sig->it_prof_expires = cputime_zero; | ||
801 | sig->it_prof_incr = cputime_zero; | ||
802 | |||
803 | sig->leader = 0; /* session leadership doesn't inherit */ | 827 | sig->leader = 0; /* session leadership doesn't inherit */ |
804 | sig->tty_old_pgrp = NULL; | 828 | sig->tty_old_pgrp = NULL; |
805 | sig->tty = NULL; | 829 | sig->tty = NULL; |
806 | 830 | ||
807 | sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; | 831 | sig->cutime = sig->cstime = cputime_zero; |
808 | sig->gtime = cputime_zero; | 832 | sig->gtime = cputime_zero; |
809 | sig->cgtime = cputime_zero; | 833 | sig->cgtime = cputime_zero; |
810 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; | 834 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; |
811 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; | 835 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; |
812 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; | 836 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; |
813 | task_io_accounting_init(&sig->ioac); | 837 | task_io_accounting_init(&sig->ioac); |
814 | sig->sum_sched_runtime = 0; | ||
815 | INIT_LIST_HEAD(&sig->cpu_timers[0]); | ||
816 | INIT_LIST_HEAD(&sig->cpu_timers[1]); | ||
817 | INIT_LIST_HEAD(&sig->cpu_timers[2]); | ||
818 | taskstats_tgid_init(sig); | 838 | taskstats_tgid_init(sig); |
819 | 839 | ||
820 | task_lock(current->group_leader); | 840 | task_lock(current->group_leader); |
821 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); | 841 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); |
822 | task_unlock(current->group_leader); | 842 | task_unlock(current->group_leader); |
823 | 843 | ||
824 | if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { | 844 | posix_cpu_timers_init_group(sig); |
825 | /* | 845 | |
826 | * New sole thread in the process gets an expiry time | ||
827 | * of the whole CPU time limit. | ||
828 | */ | ||
829 | tsk->it_prof_expires = | ||
830 | secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); | ||
831 | } | ||
832 | acct_init_pacct(&sig->pacct); | 846 | acct_init_pacct(&sig->pacct); |
833 | 847 | ||
834 | tty_audit_fork(sig); | 848 | tty_audit_fork(sig); |
@@ -838,6 +852,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
838 | 852 | ||
839 | void __cleanup_signal(struct signal_struct *sig) | 853 | void __cleanup_signal(struct signal_struct *sig) |
840 | { | 854 | { |
855 | thread_group_cputime_free(sig); | ||
841 | exit_thread_group_keys(sig); | 856 | exit_thread_group_keys(sig); |
842 | tty_kref_put(sig->tty); | 857 | tty_kref_put(sig->tty); |
843 | kmem_cache_free(signal_cachep, sig); | 858 | kmem_cache_free(signal_cachep, sig); |
@@ -888,6 +903,19 @@ void mm_init_owner(struct mm_struct *mm, struct task_struct *p) | |||
888 | #endif /* CONFIG_MM_OWNER */ | 903 | #endif /* CONFIG_MM_OWNER */ |
889 | 904 | ||
890 | /* | 905 | /* |
906 | * Initialize POSIX timer handling for a single task. | ||
907 | */ | ||
908 | static void posix_cpu_timers_init(struct task_struct *tsk) | ||
909 | { | ||
910 | tsk->cputime_expires.prof_exp = cputime_zero; | ||
911 | tsk->cputime_expires.virt_exp = cputime_zero; | ||
912 | tsk->cputime_expires.sched_exp = 0; | ||
913 | INIT_LIST_HEAD(&tsk->cpu_timers[0]); | ||
914 | INIT_LIST_HEAD(&tsk->cpu_timers[1]); | ||
915 | INIT_LIST_HEAD(&tsk->cpu_timers[2]); | ||
916 | } | ||
917 | |||
918 | /* | ||
891 | * This creates a new process as a copy of the old one, | 919 | * This creates a new process as a copy of the old one, |
892 | * but does not actually start it yet. | 920 | * but does not actually start it yet. |
893 | * | 921 | * |
@@ -997,12 +1025,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
997 | task_io_accounting_init(&p->ioac); | 1025 | task_io_accounting_init(&p->ioac); |
998 | acct_clear_integrals(p); | 1026 | acct_clear_integrals(p); |
999 | 1027 | ||
1000 | p->it_virt_expires = cputime_zero; | 1028 | posix_cpu_timers_init(p); |
1001 | p->it_prof_expires = cputime_zero; | ||
1002 | p->it_sched_expires = 0; | ||
1003 | INIT_LIST_HEAD(&p->cpu_timers[0]); | ||
1004 | INIT_LIST_HEAD(&p->cpu_timers[1]); | ||
1005 | INIT_LIST_HEAD(&p->cpu_timers[2]); | ||
1006 | 1029 | ||
1007 | p->lock_depth = -1; /* -1 = no lock */ | 1030 | p->lock_depth = -1; /* -1 = no lock */ |
1008 | do_posix_clock_monotonic_gettime(&p->start_time); | 1031 | do_posix_clock_monotonic_gettime(&p->start_time); |
@@ -1203,21 +1226,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1203 | if (clone_flags & CLONE_THREAD) { | 1226 | if (clone_flags & CLONE_THREAD) { |
1204 | p->group_leader = current->group_leader; | 1227 | p->group_leader = current->group_leader; |
1205 | list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); | 1228 | list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); |
1206 | |||
1207 | if (!cputime_eq(current->signal->it_virt_expires, | ||
1208 | cputime_zero) || | ||
1209 | !cputime_eq(current->signal->it_prof_expires, | ||
1210 | cputime_zero) || | ||
1211 | current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY || | ||
1212 | !list_empty(¤t->signal->cpu_timers[0]) || | ||
1213 | !list_empty(¤t->signal->cpu_timers[1]) || | ||
1214 | !list_empty(¤t->signal->cpu_timers[2])) { | ||
1215 | /* | ||
1216 | * Have child wake up on its first tick to check | ||
1217 | * for process CPU timers. | ||
1218 | */ | ||
1219 | p->it_prof_expires = jiffies_to_cputime(1); | ||
1220 | } | ||
1221 | } | 1229 | } |
1222 | 1230 | ||
1223 | if (likely(p->pid)) { | 1231 | if (likely(p->pid)) { |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index cdec83e722fa..95978f48e039 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -1403,9 +1403,7 @@ void hrtimer_run_queues(void) | |||
1403 | if (!base->first) | 1403 | if (!base->first) |
1404 | continue; | 1404 | continue; |
1405 | 1405 | ||
1406 | if (base->get_softirq_time) | 1406 | if (gettime) { |
1407 | base->softirq_time = base->get_softirq_time(); | ||
1408 | else if (gettime) { | ||
1409 | hrtimer_get_softirq_time(cpu_base); | 1407 | hrtimer_get_softirq_time(cpu_base); |
1410 | gettime = 0; | 1408 | gettime = 0; |
1411 | } | 1409 | } |
@@ -1688,9 +1686,11 @@ static void migrate_hrtimers(int cpu) | |||
1688 | new_base = &get_cpu_var(hrtimer_bases); | 1686 | new_base = &get_cpu_var(hrtimer_bases); |
1689 | 1687 | ||
1690 | tick_cancel_sched_timer(cpu); | 1688 | tick_cancel_sched_timer(cpu); |
1691 | 1689 | /* | |
1692 | local_irq_disable(); | 1690 | * The caller is globally serialized and nobody else |
1693 | spin_lock(&new_base->lock); | 1691 | * takes two locks at once, deadlock is not possible. |
1692 | */ | ||
1693 | spin_lock_irq(&new_base->lock); | ||
1694 | spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); | 1694 | spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); |
1695 | 1695 | ||
1696 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | 1696 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { |
@@ -1703,8 +1703,7 @@ static void migrate_hrtimers(int cpu) | |||
1703 | raise = 1; | 1703 | raise = 1; |
1704 | 1704 | ||
1705 | spin_unlock(&old_base->lock); | 1705 | spin_unlock(&old_base->lock); |
1706 | spin_unlock(&new_base->lock); | 1706 | spin_unlock_irq(&new_base->lock); |
1707 | local_irq_enable(); | ||
1708 | put_cpu_var(hrtimer_bases); | 1707 | put_cpu_var(hrtimer_bases); |
1709 | 1708 | ||
1710 | if (raise) | 1709 | if (raise) |
diff --git a/kernel/itimer.c b/kernel/itimer.c index ab982747d9bd..db7c358b9a02 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c | |||
@@ -55,17 +55,15 @@ int do_getitimer(int which, struct itimerval *value) | |||
55 | spin_unlock_irq(&tsk->sighand->siglock); | 55 | spin_unlock_irq(&tsk->sighand->siglock); |
56 | break; | 56 | break; |
57 | case ITIMER_VIRTUAL: | 57 | case ITIMER_VIRTUAL: |
58 | read_lock(&tasklist_lock); | ||
59 | spin_lock_irq(&tsk->sighand->siglock); | 58 | spin_lock_irq(&tsk->sighand->siglock); |
60 | cval = tsk->signal->it_virt_expires; | 59 | cval = tsk->signal->it_virt_expires; |
61 | cinterval = tsk->signal->it_virt_incr; | 60 | cinterval = tsk->signal->it_virt_incr; |
62 | if (!cputime_eq(cval, cputime_zero)) { | 61 | if (!cputime_eq(cval, cputime_zero)) { |
63 | struct task_struct *t = tsk; | 62 | struct task_cputime cputime; |
64 | cputime_t utime = tsk->signal->utime; | 63 | cputime_t utime; |
65 | do { | 64 | |
66 | utime = cputime_add(utime, t->utime); | 65 | thread_group_cputime(tsk, &cputime); |
67 | t = next_thread(t); | 66 | utime = cputime.utime; |
68 | } while (t != tsk); | ||
69 | if (cputime_le(cval, utime)) { /* about to fire */ | 67 | if (cputime_le(cval, utime)) { /* about to fire */ |
70 | cval = jiffies_to_cputime(1); | 68 | cval = jiffies_to_cputime(1); |
71 | } else { | 69 | } else { |
@@ -73,25 +71,19 @@ int do_getitimer(int which, struct itimerval *value) | |||
73 | } | 71 | } |
74 | } | 72 | } |
75 | spin_unlock_irq(&tsk->sighand->siglock); | 73 | spin_unlock_irq(&tsk->sighand->siglock); |
76 | read_unlock(&tasklist_lock); | ||
77 | cputime_to_timeval(cval, &value->it_value); | 74 | cputime_to_timeval(cval, &value->it_value); |
78 | cputime_to_timeval(cinterval, &value->it_interval); | 75 | cputime_to_timeval(cinterval, &value->it_interval); |
79 | break; | 76 | break; |
80 | case ITIMER_PROF: | 77 | case ITIMER_PROF: |
81 | read_lock(&tasklist_lock); | ||
82 | spin_lock_irq(&tsk->sighand->siglock); | 78 | spin_lock_irq(&tsk->sighand->siglock); |
83 | cval = tsk->signal->it_prof_expires; | 79 | cval = tsk->signal->it_prof_expires; |
84 | cinterval = tsk->signal->it_prof_incr; | 80 | cinterval = tsk->signal->it_prof_incr; |
85 | if (!cputime_eq(cval, cputime_zero)) { | 81 | if (!cputime_eq(cval, cputime_zero)) { |
86 | struct task_struct *t = tsk; | 82 | struct task_cputime times; |
87 | cputime_t ptime = cputime_add(tsk->signal->utime, | 83 | cputime_t ptime; |
88 | tsk->signal->stime); | 84 | |
89 | do { | 85 | thread_group_cputime(tsk, ×); |
90 | ptime = cputime_add(ptime, | 86 | ptime = cputime_add(times.utime, times.stime); |
91 | cputime_add(t->utime, | ||
92 | t->stime)); | ||
93 | t = next_thread(t); | ||
94 | } while (t != tsk); | ||
95 | if (cputime_le(cval, ptime)) { /* about to fire */ | 87 | if (cputime_le(cval, ptime)) { /* about to fire */ |
96 | cval = jiffies_to_cputime(1); | 88 | cval = jiffies_to_cputime(1); |
97 | } else { | 89 | } else { |
@@ -99,7 +91,6 @@ int do_getitimer(int which, struct itimerval *value) | |||
99 | } | 91 | } |
100 | } | 92 | } |
101 | spin_unlock_irq(&tsk->sighand->siglock); | 93 | spin_unlock_irq(&tsk->sighand->siglock); |
102 | read_unlock(&tasklist_lock); | ||
103 | cputime_to_timeval(cval, &value->it_value); | 94 | cputime_to_timeval(cval, &value->it_value); |
104 | cputime_to_timeval(cinterval, &value->it_interval); | 95 | cputime_to_timeval(cinterval, &value->it_interval); |
105 | break; | 96 | break; |
@@ -185,7 +176,6 @@ again: | |||
185 | case ITIMER_VIRTUAL: | 176 | case ITIMER_VIRTUAL: |
186 | nval = timeval_to_cputime(&value->it_value); | 177 | nval = timeval_to_cputime(&value->it_value); |
187 | ninterval = timeval_to_cputime(&value->it_interval); | 178 | ninterval = timeval_to_cputime(&value->it_interval); |
188 | read_lock(&tasklist_lock); | ||
189 | spin_lock_irq(&tsk->sighand->siglock); | 179 | spin_lock_irq(&tsk->sighand->siglock); |
190 | cval = tsk->signal->it_virt_expires; | 180 | cval = tsk->signal->it_virt_expires; |
191 | cinterval = tsk->signal->it_virt_incr; | 181 | cinterval = tsk->signal->it_virt_incr; |
@@ -200,7 +190,6 @@ again: | |||
200 | tsk->signal->it_virt_expires = nval; | 190 | tsk->signal->it_virt_expires = nval; |
201 | tsk->signal->it_virt_incr = ninterval; | 191 | tsk->signal->it_virt_incr = ninterval; |
202 | spin_unlock_irq(&tsk->sighand->siglock); | 192 | spin_unlock_irq(&tsk->sighand->siglock); |
203 | read_unlock(&tasklist_lock); | ||
204 | if (ovalue) { | 193 | if (ovalue) { |
205 | cputime_to_timeval(cval, &ovalue->it_value); | 194 | cputime_to_timeval(cval, &ovalue->it_value); |
206 | cputime_to_timeval(cinterval, &ovalue->it_interval); | 195 | cputime_to_timeval(cinterval, &ovalue->it_interval); |
@@ -209,7 +198,6 @@ again: | |||
209 | case ITIMER_PROF: | 198 | case ITIMER_PROF: |
210 | nval = timeval_to_cputime(&value->it_value); | 199 | nval = timeval_to_cputime(&value->it_value); |
211 | ninterval = timeval_to_cputime(&value->it_interval); | 200 | ninterval = timeval_to_cputime(&value->it_interval); |
212 | read_lock(&tasklist_lock); | ||
213 | spin_lock_irq(&tsk->sighand->siglock); | 201 | spin_lock_irq(&tsk->sighand->siglock); |
214 | cval = tsk->signal->it_prof_expires; | 202 | cval = tsk->signal->it_prof_expires; |
215 | cinterval = tsk->signal->it_prof_incr; | 203 | cinterval = tsk->signal->it_prof_incr; |
@@ -224,7 +212,6 @@ again: | |||
224 | tsk->signal->it_prof_expires = nval; | 212 | tsk->signal->it_prof_expires = nval; |
225 | tsk->signal->it_prof_incr = ninterval; | 213 | tsk->signal->it_prof_incr = ninterval; |
226 | spin_unlock_irq(&tsk->sighand->siglock); | 214 | spin_unlock_irq(&tsk->sighand->siglock); |
227 | read_unlock(&tasklist_lock); | ||
228 | if (ovalue) { | 215 | if (ovalue) { |
229 | cputime_to_timeval(cval, &ovalue->it_value); | 216 | cputime_to_timeval(cval, &ovalue->it_value); |
230 | cputime_to_timeval(cinterval, &ovalue->it_interval); | 217 | cputime_to_timeval(cinterval, &ovalue->it_interval); |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index c42a03aef36f..153dcb2639c3 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -7,6 +7,93 @@ | |||
7 | #include <linux/errno.h> | 7 | #include <linux/errno.h> |
8 | #include <linux/math64.h> | 8 | #include <linux/math64.h> |
9 | #include <asm/uaccess.h> | 9 | #include <asm/uaccess.h> |
10 | #include <linux/kernel_stat.h> | ||
11 | |||
12 | /* | ||
13 | * Allocate the thread_group_cputime structure appropriately and fill in the | ||
14 | * current values of the fields. Called from copy_signal() via | ||
15 | * thread_group_cputime_clone_thread() when adding a second or subsequent | ||
16 | * thread to a thread group. Assumes interrupts are enabled when called. | ||
17 | */ | ||
18 | int thread_group_cputime_alloc(struct task_struct *tsk) | ||
19 | { | ||
20 | struct signal_struct *sig = tsk->signal; | ||
21 | struct task_cputime *cputime; | ||
22 | |||
23 | /* | ||
24 | * If we have multiple threads and we don't already have a | ||
25 | * per-CPU task_cputime struct (checked in the caller), allocate | ||
26 | * one and fill it in with the times accumulated so far. We may | ||
27 | * race with another thread so recheck after we pick up the sighand | ||
28 | * lock. | ||
29 | */ | ||
30 | cputime = alloc_percpu(struct task_cputime); | ||
31 | if (cputime == NULL) | ||
32 | return -ENOMEM; | ||
33 | spin_lock_irq(&tsk->sighand->siglock); | ||
34 | if (sig->cputime.totals) { | ||
35 | spin_unlock_irq(&tsk->sighand->siglock); | ||
36 | free_percpu(cputime); | ||
37 | return 0; | ||
38 | } | ||
39 | sig->cputime.totals = cputime; | ||
40 | cputime = per_cpu_ptr(sig->cputime.totals, smp_processor_id()); | ||
41 | cputime->utime = tsk->utime; | ||
42 | cputime->stime = tsk->stime; | ||
43 | cputime->sum_exec_runtime = tsk->se.sum_exec_runtime; | ||
44 | spin_unlock_irq(&tsk->sighand->siglock); | ||
45 | return 0; | ||
46 | } | ||
47 | |||
48 | /** | ||
49 | * thread_group_cputime - Sum the thread group time fields across all CPUs. | ||
50 | * | ||
51 | * @tsk: The task we use to identify the thread group. | ||
52 | * @times: task_cputime structure in which we return the summed fields. | ||
53 | * | ||
54 | * Walk the list of CPUs to sum the per-CPU time fields in the thread group | ||
55 | * time structure. | ||
56 | */ | ||
57 | void thread_group_cputime( | ||
58 | struct task_struct *tsk, | ||
59 | struct task_cputime *times) | ||
60 | { | ||
61 | struct signal_struct *sig; | ||
62 | int i; | ||
63 | struct task_cputime *tot; | ||
64 | |||
65 | sig = tsk->signal; | ||
66 | if (unlikely(!sig) || !sig->cputime.totals) { | ||
67 | times->utime = tsk->utime; | ||
68 | times->stime = tsk->stime; | ||
69 | times->sum_exec_runtime = tsk->se.sum_exec_runtime; | ||
70 | return; | ||
71 | } | ||
72 | times->stime = times->utime = cputime_zero; | ||
73 | times->sum_exec_runtime = 0; | ||
74 | for_each_possible_cpu(i) { | ||
75 | tot = per_cpu_ptr(tsk->signal->cputime.totals, i); | ||
76 | times->utime = cputime_add(times->utime, tot->utime); | ||
77 | times->stime = cputime_add(times->stime, tot->stime); | ||
78 | times->sum_exec_runtime += tot->sum_exec_runtime; | ||
79 | } | ||
80 | } | ||
81 | |||
82 | /* | ||
83 | * Called after updating RLIMIT_CPU to set timer expiration if necessary. | ||
84 | */ | ||
85 | void update_rlimit_cpu(unsigned long rlim_new) | ||
86 | { | ||
87 | cputime_t cputime; | ||
88 | |||
89 | cputime = secs_to_cputime(rlim_new); | ||
90 | if (cputime_eq(current->signal->it_prof_expires, cputime_zero) || | ||
91 | cputime_lt(current->signal->it_prof_expires, cputime)) { | ||
92 | spin_lock_irq(¤t->sighand->siglock); | ||
93 | set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); | ||
94 | spin_unlock_irq(¤t->sighand->siglock); | ||
95 | } | ||
96 | } | ||
10 | 97 | ||
11 | static int check_clock(const clockid_t which_clock) | 98 | static int check_clock(const clockid_t which_clock) |
12 | { | 99 | { |
@@ -158,10 +245,6 @@ static inline cputime_t virt_ticks(struct task_struct *p) | |||
158 | { | 245 | { |
159 | return p->utime; | 246 | return p->utime; |
160 | } | 247 | } |
161 | static inline unsigned long long sched_ns(struct task_struct *p) | ||
162 | { | ||
163 | return task_sched_runtime(p); | ||
164 | } | ||
165 | 248 | ||
166 | int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) | 249 | int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) |
167 | { | 250 | { |
@@ -211,7 +294,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, | |||
211 | cpu->cpu = virt_ticks(p); | 294 | cpu->cpu = virt_ticks(p); |
212 | break; | 295 | break; |
213 | case CPUCLOCK_SCHED: | 296 | case CPUCLOCK_SCHED: |
214 | cpu->sched = sched_ns(p); | 297 | cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p); |
215 | break; | 298 | break; |
216 | } | 299 | } |
217 | return 0; | 300 | return 0; |
@@ -220,59 +303,30 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, | |||
220 | /* | 303 | /* |
221 | * Sample a process (thread group) clock for the given group_leader task. | 304 | * Sample a process (thread group) clock for the given group_leader task. |
222 | * Must be called with tasklist_lock held for reading. | 305 | * Must be called with tasklist_lock held for reading. |
223 | * Must be called with tasklist_lock held for reading, and p->sighand->siglock. | ||
224 | */ | 306 | */ |
225 | static int cpu_clock_sample_group_locked(unsigned int clock_idx, | 307 | static int cpu_clock_sample_group(const clockid_t which_clock, |
226 | struct task_struct *p, | 308 | struct task_struct *p, |
227 | union cpu_time_count *cpu) | 309 | union cpu_time_count *cpu) |
228 | { | 310 | { |
229 | struct task_struct *t = p; | 311 | struct task_cputime cputime; |
230 | switch (clock_idx) { | 312 | |
313 | thread_group_cputime(p, &cputime); | ||
314 | switch (which_clock) { | ||
231 | default: | 315 | default: |
232 | return -EINVAL; | 316 | return -EINVAL; |
233 | case CPUCLOCK_PROF: | 317 | case CPUCLOCK_PROF: |
234 | cpu->cpu = cputime_add(p->signal->utime, p->signal->stime); | 318 | cpu->cpu = cputime_add(cputime.utime, cputime.stime); |
235 | do { | ||
236 | cpu->cpu = cputime_add(cpu->cpu, prof_ticks(t)); | ||
237 | t = next_thread(t); | ||
238 | } while (t != p); | ||
239 | break; | 319 | break; |
240 | case CPUCLOCK_VIRT: | 320 | case CPUCLOCK_VIRT: |
241 | cpu->cpu = p->signal->utime; | 321 | cpu->cpu = cputime.utime; |
242 | do { | ||
243 | cpu->cpu = cputime_add(cpu->cpu, virt_ticks(t)); | ||
244 | t = next_thread(t); | ||
245 | } while (t != p); | ||
246 | break; | 322 | break; |
247 | case CPUCLOCK_SCHED: | 323 | case CPUCLOCK_SCHED: |
248 | cpu->sched = p->signal->sum_sched_runtime; | 324 | cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); |
249 | /* Add in each other live thread. */ | ||
250 | while ((t = next_thread(t)) != p) { | ||
251 | cpu->sched += t->se.sum_exec_runtime; | ||
252 | } | ||
253 | cpu->sched += sched_ns(p); | ||
254 | break; | 325 | break; |
255 | } | 326 | } |
256 | return 0; | 327 | return 0; |
257 | } | 328 | } |
258 | 329 | ||
259 | /* | ||
260 | * Sample a process (thread group) clock for the given group_leader task. | ||
261 | * Must be called with tasklist_lock held for reading. | ||
262 | */ | ||
263 | static int cpu_clock_sample_group(const clockid_t which_clock, | ||
264 | struct task_struct *p, | ||
265 | union cpu_time_count *cpu) | ||
266 | { | ||
267 | int ret; | ||
268 | unsigned long flags; | ||
269 | spin_lock_irqsave(&p->sighand->siglock, flags); | ||
270 | ret = cpu_clock_sample_group_locked(CPUCLOCK_WHICH(which_clock), p, | ||
271 | cpu); | ||
272 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | ||
273 | return ret; | ||
274 | } | ||
275 | |||
276 | 330 | ||
277 | int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) | 331 | int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) |
278 | { | 332 | { |
@@ -471,80 +525,11 @@ void posix_cpu_timers_exit(struct task_struct *tsk) | |||
471 | } | 525 | } |
472 | void posix_cpu_timers_exit_group(struct task_struct *tsk) | 526 | void posix_cpu_timers_exit_group(struct task_struct *tsk) |
473 | { | 527 | { |
474 | cleanup_timers(tsk->signal->cpu_timers, | 528 | struct task_cputime cputime; |
475 | cputime_add(tsk->utime, tsk->signal->utime), | ||
476 | cputime_add(tsk->stime, tsk->signal->stime), | ||
477 | tsk->se.sum_exec_runtime + tsk->signal->sum_sched_runtime); | ||
478 | } | ||
479 | 529 | ||
480 | 530 | thread_group_cputime(tsk, &cputime); | |
481 | /* | 531 | cleanup_timers(tsk->signal->cpu_timers, |
482 | * Set the expiry times of all the threads in the process so one of them | 532 | cputime.utime, cputime.stime, cputime.sum_exec_runtime); |
483 | * will go off before the process cumulative expiry total is reached. | ||
484 | */ | ||
485 | static void process_timer_rebalance(struct task_struct *p, | ||
486 | unsigned int clock_idx, | ||
487 | union cpu_time_count expires, | ||
488 | union cpu_time_count val) | ||
489 | { | ||
490 | cputime_t ticks, left; | ||
491 | unsigned long long ns, nsleft; | ||
492 | struct task_struct *t = p; | ||
493 | unsigned int nthreads = atomic_read(&p->signal->live); | ||
494 | |||
495 | if (!nthreads) | ||
496 | return; | ||
497 | |||
498 | switch (clock_idx) { | ||
499 | default: | ||
500 | BUG(); | ||
501 | break; | ||
502 | case CPUCLOCK_PROF: | ||
503 | left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu), | ||
504 | nthreads); | ||
505 | do { | ||
506 | if (likely(!(t->flags & PF_EXITING))) { | ||
507 | ticks = cputime_add(prof_ticks(t), left); | ||
508 | if (cputime_eq(t->it_prof_expires, | ||
509 | cputime_zero) || | ||
510 | cputime_gt(t->it_prof_expires, ticks)) { | ||
511 | t->it_prof_expires = ticks; | ||
512 | } | ||
513 | } | ||
514 | t = next_thread(t); | ||
515 | } while (t != p); | ||
516 | break; | ||
517 | case CPUCLOCK_VIRT: | ||
518 | left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu), | ||
519 | nthreads); | ||
520 | do { | ||
521 | if (likely(!(t->flags & PF_EXITING))) { | ||
522 | ticks = cputime_add(virt_ticks(t), left); | ||
523 | if (cputime_eq(t->it_virt_expires, | ||
524 | cputime_zero) || | ||
525 | cputime_gt(t->it_virt_expires, ticks)) { | ||
526 | t->it_virt_expires = ticks; | ||
527 | } | ||
528 | } | ||
529 | t = next_thread(t); | ||
530 | } while (t != p); | ||
531 | break; | ||
532 | case CPUCLOCK_SCHED: | ||
533 | nsleft = expires.sched - val.sched; | ||
534 | do_div(nsleft, nthreads); | ||
535 | nsleft = max_t(unsigned long long, nsleft, 1); | ||
536 | do { | ||
537 | if (likely(!(t->flags & PF_EXITING))) { | ||
538 | ns = t->se.sum_exec_runtime + nsleft; | ||
539 | if (t->it_sched_expires == 0 || | ||
540 | t->it_sched_expires > ns) { | ||
541 | t->it_sched_expires = ns; | ||
542 | } | ||
543 | } | ||
544 | t = next_thread(t); | ||
545 | } while (t != p); | ||
546 | break; | ||
547 | } | ||
548 | } | 533 | } |
549 | 534 | ||
550 | static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) | 535 | static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) |
@@ -608,29 +593,32 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) | |||
608 | default: | 593 | default: |
609 | BUG(); | 594 | BUG(); |
610 | case CPUCLOCK_PROF: | 595 | case CPUCLOCK_PROF: |
611 | if (cputime_eq(p->it_prof_expires, | 596 | if (cputime_eq(p->cputime_expires.prof_exp, |
612 | cputime_zero) || | 597 | cputime_zero) || |
613 | cputime_gt(p->it_prof_expires, | 598 | cputime_gt(p->cputime_expires.prof_exp, |
614 | nt->expires.cpu)) | 599 | nt->expires.cpu)) |
615 | p->it_prof_expires = nt->expires.cpu; | 600 | p->cputime_expires.prof_exp = |
601 | nt->expires.cpu; | ||
616 | break; | 602 | break; |
617 | case CPUCLOCK_VIRT: | 603 | case CPUCLOCK_VIRT: |
618 | if (cputime_eq(p->it_virt_expires, | 604 | if (cputime_eq(p->cputime_expires.virt_exp, |
619 | cputime_zero) || | 605 | cputime_zero) || |
620 | cputime_gt(p->it_virt_expires, | 606 | cputime_gt(p->cputime_expires.virt_exp, |
621 | nt->expires.cpu)) | 607 | nt->expires.cpu)) |
622 | p->it_virt_expires = nt->expires.cpu; | 608 | p->cputime_expires.virt_exp = |
609 | nt->expires.cpu; | ||
623 | break; | 610 | break; |
624 | case CPUCLOCK_SCHED: | 611 | case CPUCLOCK_SCHED: |
625 | if (p->it_sched_expires == 0 || | 612 | if (p->cputime_expires.sched_exp == 0 || |
626 | p->it_sched_expires > nt->expires.sched) | 613 | p->cputime_expires.sched_exp > |
627 | p->it_sched_expires = nt->expires.sched; | 614 | nt->expires.sched) |
615 | p->cputime_expires.sched_exp = | ||
616 | nt->expires.sched; | ||
628 | break; | 617 | break; |
629 | } | 618 | } |
630 | } else { | 619 | } else { |
631 | /* | 620 | /* |
632 | * For a process timer, we must balance | 621 | * For a process timer, set the cached expiration time. |
633 | * all the live threads' expirations. | ||
634 | */ | 622 | */ |
635 | switch (CPUCLOCK_WHICH(timer->it_clock)) { | 623 | switch (CPUCLOCK_WHICH(timer->it_clock)) { |
636 | default: | 624 | default: |
@@ -641,7 +629,9 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) | |||
641 | cputime_lt(p->signal->it_virt_expires, | 629 | cputime_lt(p->signal->it_virt_expires, |
642 | timer->it.cpu.expires.cpu)) | 630 | timer->it.cpu.expires.cpu)) |
643 | break; | 631 | break; |
644 | goto rebalance; | 632 | p->signal->cputime_expires.virt_exp = |
633 | timer->it.cpu.expires.cpu; | ||
634 | break; | ||
645 | case CPUCLOCK_PROF: | 635 | case CPUCLOCK_PROF: |
646 | if (!cputime_eq(p->signal->it_prof_expires, | 636 | if (!cputime_eq(p->signal->it_prof_expires, |
647 | cputime_zero) && | 637 | cputime_zero) && |
@@ -652,13 +642,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) | |||
652 | if (i != RLIM_INFINITY && | 642 | if (i != RLIM_INFINITY && |
653 | i <= cputime_to_secs(timer->it.cpu.expires.cpu)) | 643 | i <= cputime_to_secs(timer->it.cpu.expires.cpu)) |
654 | break; | 644 | break; |
655 | goto rebalance; | 645 | p->signal->cputime_expires.prof_exp = |
646 | timer->it.cpu.expires.cpu; | ||
647 | break; | ||
656 | case CPUCLOCK_SCHED: | 648 | case CPUCLOCK_SCHED: |
657 | rebalance: | 649 | p->signal->cputime_expires.sched_exp = |
658 | process_timer_rebalance( | 650 | timer->it.cpu.expires.sched; |
659 | timer->it.cpu.task, | ||
660 | CPUCLOCK_WHICH(timer->it_clock), | ||
661 | timer->it.cpu.expires, now); | ||
662 | break; | 651 | break; |
663 | } | 652 | } |
664 | } | 653 | } |
@@ -969,13 +958,13 @@ static void check_thread_timers(struct task_struct *tsk, | |||
969 | struct signal_struct *const sig = tsk->signal; | 958 | struct signal_struct *const sig = tsk->signal; |
970 | 959 | ||
971 | maxfire = 20; | 960 | maxfire = 20; |
972 | tsk->it_prof_expires = cputime_zero; | 961 | tsk->cputime_expires.prof_exp = cputime_zero; |
973 | while (!list_empty(timers)) { | 962 | while (!list_empty(timers)) { |
974 | struct cpu_timer_list *t = list_first_entry(timers, | 963 | struct cpu_timer_list *t = list_first_entry(timers, |
975 | struct cpu_timer_list, | 964 | struct cpu_timer_list, |
976 | entry); | 965 | entry); |
977 | if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) { | 966 | if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) { |
978 | tsk->it_prof_expires = t->expires.cpu; | 967 | tsk->cputime_expires.prof_exp = t->expires.cpu; |
979 | break; | 968 | break; |
980 | } | 969 | } |
981 | t->firing = 1; | 970 | t->firing = 1; |
@@ -984,13 +973,13 @@ static void check_thread_timers(struct task_struct *tsk, | |||
984 | 973 | ||
985 | ++timers; | 974 | ++timers; |
986 | maxfire = 20; | 975 | maxfire = 20; |
987 | tsk->it_virt_expires = cputime_zero; | 976 | tsk->cputime_expires.virt_exp = cputime_zero; |
988 | while (!list_empty(timers)) { | 977 | while (!list_empty(timers)) { |
989 | struct cpu_timer_list *t = list_first_entry(timers, | 978 | struct cpu_timer_list *t = list_first_entry(timers, |
990 | struct cpu_timer_list, | 979 | struct cpu_timer_list, |
991 | entry); | 980 | entry); |
992 | if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) { | 981 | if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) { |
993 | tsk->it_virt_expires = t->expires.cpu; | 982 | tsk->cputime_expires.virt_exp = t->expires.cpu; |
994 | break; | 983 | break; |
995 | } | 984 | } |
996 | t->firing = 1; | 985 | t->firing = 1; |
@@ -999,13 +988,13 @@ static void check_thread_timers(struct task_struct *tsk, | |||
999 | 988 | ||
1000 | ++timers; | 989 | ++timers; |
1001 | maxfire = 20; | 990 | maxfire = 20; |
1002 | tsk->it_sched_expires = 0; | 991 | tsk->cputime_expires.sched_exp = 0; |
1003 | while (!list_empty(timers)) { | 992 | while (!list_empty(timers)) { |
1004 | struct cpu_timer_list *t = list_first_entry(timers, | 993 | struct cpu_timer_list *t = list_first_entry(timers, |
1005 | struct cpu_timer_list, | 994 | struct cpu_timer_list, |
1006 | entry); | 995 | entry); |
1007 | if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) { | 996 | if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) { |
1008 | tsk->it_sched_expires = t->expires.sched; | 997 | tsk->cputime_expires.sched_exp = t->expires.sched; |
1009 | break; | 998 | break; |
1010 | } | 999 | } |
1011 | t->firing = 1; | 1000 | t->firing = 1; |
@@ -1055,10 +1044,10 @@ static void check_process_timers(struct task_struct *tsk, | |||
1055 | { | 1044 | { |
1056 | int maxfire; | 1045 | int maxfire; |
1057 | struct signal_struct *const sig = tsk->signal; | 1046 | struct signal_struct *const sig = tsk->signal; |
1058 | cputime_t utime, stime, ptime, virt_expires, prof_expires; | 1047 | cputime_t utime, ptime, virt_expires, prof_expires; |
1059 | unsigned long long sum_sched_runtime, sched_expires; | 1048 | unsigned long long sum_sched_runtime, sched_expires; |
1060 | struct task_struct *t; | ||
1061 | struct list_head *timers = sig->cpu_timers; | 1049 | struct list_head *timers = sig->cpu_timers; |
1050 | struct task_cputime cputime; | ||
1062 | 1051 | ||
1063 | /* | 1052 | /* |
1064 | * Don't sample the current process CPU clocks if there are no timers. | 1053 | * Don't sample the current process CPU clocks if there are no timers. |
@@ -1074,18 +1063,10 @@ static void check_process_timers(struct task_struct *tsk, | |||
1074 | /* | 1063 | /* |
1075 | * Collect the current process totals. | 1064 | * Collect the current process totals. |
1076 | */ | 1065 | */ |
1077 | utime = sig->utime; | 1066 | thread_group_cputime(tsk, &cputime); |
1078 | stime = sig->stime; | 1067 | utime = cputime.utime; |
1079 | sum_sched_runtime = sig->sum_sched_runtime; | 1068 | ptime = cputime_add(utime, cputime.stime); |
1080 | t = tsk; | 1069 | sum_sched_runtime = cputime.sum_exec_runtime; |
1081 | do { | ||
1082 | utime = cputime_add(utime, t->utime); | ||
1083 | stime = cputime_add(stime, t->stime); | ||
1084 | sum_sched_runtime += t->se.sum_exec_runtime; | ||
1085 | t = next_thread(t); | ||
1086 | } while (t != tsk); | ||
1087 | ptime = cputime_add(utime, stime); | ||
1088 | |||
1089 | maxfire = 20; | 1070 | maxfire = 20; |
1090 | prof_expires = cputime_zero; | 1071 | prof_expires = cputime_zero; |
1091 | while (!list_empty(timers)) { | 1072 | while (!list_empty(timers)) { |
@@ -1193,60 +1174,18 @@ static void check_process_timers(struct task_struct *tsk, | |||
1193 | } | 1174 | } |
1194 | } | 1175 | } |
1195 | 1176 | ||
1196 | if (!cputime_eq(prof_expires, cputime_zero) || | 1177 | if (!cputime_eq(prof_expires, cputime_zero) && |
1197 | !cputime_eq(virt_expires, cputime_zero) || | 1178 | (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) || |
1198 | sched_expires != 0) { | 1179 | cputime_gt(sig->cputime_expires.prof_exp, prof_expires))) |
1199 | /* | 1180 | sig->cputime_expires.prof_exp = prof_expires; |
1200 | * Rebalance the threads' expiry times for the remaining | 1181 | if (!cputime_eq(virt_expires, cputime_zero) && |
1201 | * process CPU timers. | 1182 | (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) || |
1202 | */ | 1183 | cputime_gt(sig->cputime_expires.virt_exp, virt_expires))) |
1203 | 1184 | sig->cputime_expires.virt_exp = virt_expires; | |
1204 | cputime_t prof_left, virt_left, ticks; | 1185 | if (sched_expires != 0 && |
1205 | unsigned long long sched_left, sched; | 1186 | (sig->cputime_expires.sched_exp == 0 || |
1206 | const unsigned int nthreads = atomic_read(&sig->live); | 1187 | sig->cputime_expires.sched_exp > sched_expires)) |
1207 | 1188 | sig->cputime_expires.sched_exp = sched_expires; | |
1208 | if (!nthreads) | ||
1209 | return; | ||
1210 | |||
1211 | prof_left = cputime_sub(prof_expires, utime); | ||
1212 | prof_left = cputime_sub(prof_left, stime); | ||
1213 | prof_left = cputime_div_non_zero(prof_left, nthreads); | ||
1214 | virt_left = cputime_sub(virt_expires, utime); | ||
1215 | virt_left = cputime_div_non_zero(virt_left, nthreads); | ||
1216 | if (sched_expires) { | ||
1217 | sched_left = sched_expires - sum_sched_runtime; | ||
1218 | do_div(sched_left, nthreads); | ||
1219 | sched_left = max_t(unsigned long long, sched_left, 1); | ||
1220 | } else { | ||
1221 | sched_left = 0; | ||
1222 | } | ||
1223 | t = tsk; | ||
1224 | do { | ||
1225 | if (unlikely(t->flags & PF_EXITING)) | ||
1226 | continue; | ||
1227 | |||
1228 | ticks = cputime_add(cputime_add(t->utime, t->stime), | ||
1229 | prof_left); | ||
1230 | if (!cputime_eq(prof_expires, cputime_zero) && | ||
1231 | (cputime_eq(t->it_prof_expires, cputime_zero) || | ||
1232 | cputime_gt(t->it_prof_expires, ticks))) { | ||
1233 | t->it_prof_expires = ticks; | ||
1234 | } | ||
1235 | |||
1236 | ticks = cputime_add(t->utime, virt_left); | ||
1237 | if (!cputime_eq(virt_expires, cputime_zero) && | ||
1238 | (cputime_eq(t->it_virt_expires, cputime_zero) || | ||
1239 | cputime_gt(t->it_virt_expires, ticks))) { | ||
1240 | t->it_virt_expires = ticks; | ||
1241 | } | ||
1242 | |||
1243 | sched = t->se.sum_exec_runtime + sched_left; | ||
1244 | if (sched_expires && (t->it_sched_expires == 0 || | ||
1245 | t->it_sched_expires > sched)) { | ||
1246 | t->it_sched_expires = sched; | ||
1247 | } | ||
1248 | } while ((t = next_thread(t)) != tsk); | ||
1249 | } | ||
1250 | } | 1189 | } |
1251 | 1190 | ||
1252 | /* | 1191 | /* |
@@ -1314,6 +1253,86 @@ out: | |||
1314 | ++timer->it_requeue_pending; | 1253 | ++timer->it_requeue_pending; |
1315 | } | 1254 | } |
1316 | 1255 | ||
1256 | /** | ||
1257 | * task_cputime_zero - Check a task_cputime struct for all zero fields. | ||
1258 | * | ||
1259 | * @cputime: The struct to compare. | ||
1260 | * | ||
1261 | * Checks @cputime to see if all fields are zero. Returns true if all fields | ||
1262 | * are zero, false if any field is nonzero. | ||
1263 | */ | ||
1264 | static inline int task_cputime_zero(const struct task_cputime *cputime) | ||
1265 | { | ||
1266 | if (cputime_eq(cputime->utime, cputime_zero) && | ||
1267 | cputime_eq(cputime->stime, cputime_zero) && | ||
1268 | cputime->sum_exec_runtime == 0) | ||
1269 | return 1; | ||
1270 | return 0; | ||
1271 | } | ||
1272 | |||
1273 | /** | ||
1274 | * task_cputime_expired - Compare two task_cputime entities. | ||
1275 | * | ||
1276 | * @sample: The task_cputime structure to be checked for expiration. | ||
1277 | * @expires: Expiration times, against which @sample will be checked. | ||
1278 | * | ||
1279 | * Checks @sample against @expires to see if any field of @sample has expired. | ||
1280 | * Returns true if any field of the former is greater than the corresponding | ||
1281 | * field of the latter if the latter field is set. Otherwise returns false. | ||
1282 | */ | ||
1283 | static inline int task_cputime_expired(const struct task_cputime *sample, | ||
1284 | const struct task_cputime *expires) | ||
1285 | { | ||
1286 | if (!cputime_eq(expires->utime, cputime_zero) && | ||
1287 | cputime_ge(sample->utime, expires->utime)) | ||
1288 | return 1; | ||
1289 | if (!cputime_eq(expires->stime, cputime_zero) && | ||
1290 | cputime_ge(cputime_add(sample->utime, sample->stime), | ||
1291 | expires->stime)) | ||
1292 | return 1; | ||
1293 | if (expires->sum_exec_runtime != 0 && | ||
1294 | sample->sum_exec_runtime >= expires->sum_exec_runtime) | ||
1295 | return 1; | ||
1296 | return 0; | ||
1297 | } | ||
1298 | |||
1299 | /** | ||
1300 | * fastpath_timer_check - POSIX CPU timers fast path. | ||
1301 | * | ||
1302 | * @tsk: The task (thread) being checked. | ||
1303 | * | ||
1304 | * Check the task and thread group timers. If both are zero (there are no | ||
1305 | * timers set) return false. Otherwise snapshot the task and thread group | ||
1306 | * timers and compare them with the corresponding expiration times. Return | ||
1307 | * true if a timer has expired, else return false. | ||
1308 | */ | ||
1309 | static inline int fastpath_timer_check(struct task_struct *tsk) | ||
1310 | { | ||
1311 | struct signal_struct *sig = tsk->signal; | ||
1312 | |||
1313 | if (unlikely(!sig)) | ||
1314 | return 0; | ||
1315 | |||
1316 | if (!task_cputime_zero(&tsk->cputime_expires)) { | ||
1317 | struct task_cputime task_sample = { | ||
1318 | .utime = tsk->utime, | ||
1319 | .stime = tsk->stime, | ||
1320 | .sum_exec_runtime = tsk->se.sum_exec_runtime | ||
1321 | }; | ||
1322 | |||
1323 | if (task_cputime_expired(&task_sample, &tsk->cputime_expires)) | ||
1324 | return 1; | ||
1325 | } | ||
1326 | if (!task_cputime_zero(&sig->cputime_expires)) { | ||
1327 | struct task_cputime group_sample; | ||
1328 | |||
1329 | thread_group_cputime(tsk, &group_sample); | ||
1330 | if (task_cputime_expired(&group_sample, &sig->cputime_expires)) | ||
1331 | return 1; | ||
1332 | } | ||
1333 | return 0; | ||
1334 | } | ||
1335 | |||
1317 | /* | 1336 | /* |
1318 | * This is called from the timer interrupt handler. The irq handler has | 1337 | * This is called from the timer interrupt handler. The irq handler has |
1319 | * already updated our counts. We need to check if any timers fire now. | 1338 | * already updated our counts. We need to check if any timers fire now. |
@@ -1326,42 +1345,31 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
1326 | 1345 | ||
1327 | BUG_ON(!irqs_disabled()); | 1346 | BUG_ON(!irqs_disabled()); |
1328 | 1347 | ||
1329 | #define UNEXPIRED(clock) \ | 1348 | /* |
1330 | (cputime_eq(tsk->it_##clock##_expires, cputime_zero) || \ | 1349 | * The fast path checks that there are no expired thread or thread |
1331 | cputime_lt(clock##_ticks(tsk), tsk->it_##clock##_expires)) | 1350 | * group timers. If that's so, just return. |
1332 | 1351 | */ | |
1333 | if (UNEXPIRED(prof) && UNEXPIRED(virt) && | 1352 | if (!fastpath_timer_check(tsk)) |
1334 | (tsk->it_sched_expires == 0 || | ||
1335 | tsk->se.sum_exec_runtime < tsk->it_sched_expires)) | ||
1336 | return; | 1353 | return; |
1337 | 1354 | ||
1338 | #undef UNEXPIRED | 1355 | spin_lock(&tsk->sighand->siglock); |
1339 | |||
1340 | /* | 1356 | /* |
1341 | * Double-check with locks held. | 1357 | * Here we take off tsk->signal->cpu_timers[N] and |
1358 | * tsk->cpu_timers[N] all the timers that are firing, and | ||
1359 | * put them on the firing list. | ||
1342 | */ | 1360 | */ |
1343 | read_lock(&tasklist_lock); | 1361 | check_thread_timers(tsk, &firing); |
1344 | if (likely(tsk->signal != NULL)) { | 1362 | check_process_timers(tsk, &firing); |
1345 | spin_lock(&tsk->sighand->siglock); | ||
1346 | 1363 | ||
1347 | /* | 1364 | /* |
1348 | * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N] | 1365 | * We must release these locks before taking any timer's lock. |
1349 | * all the timers that are firing, and put them on the firing list. | 1366 | * There is a potential race with timer deletion here, as the |
1350 | */ | 1367 | * siglock now protects our private firing list. We have set |
1351 | check_thread_timers(tsk, &firing); | 1368 | * the firing flag in each timer, so that a deletion attempt |
1352 | check_process_timers(tsk, &firing); | 1369 | * that gets the timer lock before we do will give it up and |
1353 | 1370 | * spin until we've taken care of that timer below. | |
1354 | /* | 1371 | */ |
1355 | * We must release these locks before taking any timer's lock. | 1372 | spin_unlock(&tsk->sighand->siglock); |
1356 | * There is a potential race with timer deletion here, as the | ||
1357 | * siglock now protects our private firing list. We have set | ||
1358 | * the firing flag in each timer, so that a deletion attempt | ||
1359 | * that gets the timer lock before we do will give it up and | ||
1360 | * spin until we've taken care of that timer below. | ||
1361 | */ | ||
1362 | spin_unlock(&tsk->sighand->siglock); | ||
1363 | } | ||
1364 | read_unlock(&tasklist_lock); | ||
1365 | 1373 | ||
1366 | /* | 1374 | /* |
1367 | * Now that all the timers on our list have the firing flag, | 1375 | * Now that all the timers on our list have the firing flag, |
@@ -1389,10 +1397,9 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
1389 | 1397 | ||
1390 | /* | 1398 | /* |
1391 | * Set one of the process-wide special case CPU timers. | 1399 | * Set one of the process-wide special case CPU timers. |
1392 | * The tasklist_lock and tsk->sighand->siglock must be held by the caller. | 1400 | * The tsk->sighand->siglock must be held by the caller. |
1393 | * The oldval argument is null for the RLIMIT_CPU timer, where *newval is | 1401 | * The *newval argument is relative and we update it to be absolute, *oldval |
1394 | * absolute; non-null for ITIMER_*, where *newval is relative and we update | 1402 | * is absolute and we update it to be relative. |
1395 | * it to be absolute, *oldval is absolute and we update it to be relative. | ||
1396 | */ | 1403 | */ |
1397 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | 1404 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, |
1398 | cputime_t *newval, cputime_t *oldval) | 1405 | cputime_t *newval, cputime_t *oldval) |
@@ -1401,7 +1408,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | |||
1401 | struct list_head *head; | 1408 | struct list_head *head; |
1402 | 1409 | ||
1403 | BUG_ON(clock_idx == CPUCLOCK_SCHED); | 1410 | BUG_ON(clock_idx == CPUCLOCK_SCHED); |
1404 | cpu_clock_sample_group_locked(clock_idx, tsk, &now); | 1411 | cpu_clock_sample_group(clock_idx, tsk, &now); |
1405 | 1412 | ||
1406 | if (oldval) { | 1413 | if (oldval) { |
1407 | if (!cputime_eq(*oldval, cputime_zero)) { | 1414 | if (!cputime_eq(*oldval, cputime_zero)) { |
@@ -1435,13 +1442,14 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | |||
1435 | cputime_ge(list_first_entry(head, | 1442 | cputime_ge(list_first_entry(head, |
1436 | struct cpu_timer_list, entry)->expires.cpu, | 1443 | struct cpu_timer_list, entry)->expires.cpu, |
1437 | *newval)) { | 1444 | *newval)) { |
1438 | /* | 1445 | switch (clock_idx) { |
1439 | * Rejigger each thread's expiry time so that one will | 1446 | case CPUCLOCK_PROF: |
1440 | * notice before we hit the process-cumulative expiry time. | 1447 | tsk->signal->cputime_expires.prof_exp = *newval; |
1441 | */ | 1448 | break; |
1442 | union cpu_time_count expires = { .sched = 0 }; | 1449 | case CPUCLOCK_VIRT: |
1443 | expires.cpu = *newval; | 1450 | tsk->signal->cputime_expires.virt_exp = *newval; |
1444 | process_timer_rebalance(tsk, clock_idx, expires, now); | 1451 | break; |
1452 | } | ||
1445 | } | 1453 | } |
1446 | } | 1454 | } |
1447 | 1455 | ||
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 5131e5471169..b931d7cedbfa 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
@@ -223,6 +223,15 @@ static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp) | |||
223 | } | 223 | } |
224 | 224 | ||
225 | /* | 225 | /* |
226 | * Get monotonic time for posix timers | ||
227 | */ | ||
228 | static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp) | ||
229 | { | ||
230 | getrawmonotonic(tp); | ||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | /* | ||
226 | * Initialize everything, well, just everything in Posix clocks/timers ;) | 235 | * Initialize everything, well, just everything in Posix clocks/timers ;) |
227 | */ | 236 | */ |
228 | static __init int init_posix_timers(void) | 237 | static __init int init_posix_timers(void) |
@@ -235,9 +244,15 @@ static __init int init_posix_timers(void) | |||
235 | .clock_get = posix_ktime_get_ts, | 244 | .clock_get = posix_ktime_get_ts, |
236 | .clock_set = do_posix_clock_nosettime, | 245 | .clock_set = do_posix_clock_nosettime, |
237 | }; | 246 | }; |
247 | struct k_clock clock_monotonic_raw = { | ||
248 | .clock_getres = hrtimer_get_res, | ||
249 | .clock_get = posix_get_monotonic_raw, | ||
250 | .clock_set = do_posix_clock_nosettime, | ||
251 | }; | ||
238 | 252 | ||
239 | register_posix_clock(CLOCK_REALTIME, &clock_realtime); | 253 | register_posix_clock(CLOCK_REALTIME, &clock_realtime); |
240 | register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); | 254 | register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); |
255 | register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw); | ||
241 | 256 | ||
242 | posix_timers_cache = kmem_cache_create("posix_timers_cache", | 257 | posix_timers_cache = kmem_cache_create("posix_timers_cache", |
243 | sizeof (struct k_itimer), 0, SLAB_PANIC, | 258 | sizeof (struct k_itimer), 0, SLAB_PANIC, |
@@ -298,6 +313,7 @@ void do_schedule_next_timer(struct siginfo *info) | |||
298 | 313 | ||
299 | int posix_timer_event(struct k_itimer *timr, int si_private) | 314 | int posix_timer_event(struct k_itimer *timr, int si_private) |
300 | { | 315 | { |
316 | int shared, ret; | ||
301 | /* | 317 | /* |
302 | * FIXME: if ->sigq is queued we can race with | 318 | * FIXME: if ->sigq is queued we can race with |
303 | * dequeue_signal()->do_schedule_next_timer(). | 319 | * dequeue_signal()->do_schedule_next_timer(). |
@@ -311,25 +327,10 @@ int posix_timer_event(struct k_itimer *timr, int si_private) | |||
311 | */ | 327 | */ |
312 | timr->sigq->info.si_sys_private = si_private; | 328 | timr->sigq->info.si_sys_private = si_private; |
313 | 329 | ||
314 | timr->sigq->info.si_signo = timr->it_sigev_signo; | 330 | shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID); |
315 | timr->sigq->info.si_code = SI_TIMER; | 331 | ret = send_sigqueue(timr->sigq, timr->it_process, shared); |
316 | timr->sigq->info.si_tid = timr->it_id; | 332 | /* If we failed to send the signal the timer stops. */ |
317 | timr->sigq->info.si_value = timr->it_sigev_value; | 333 | return ret > 0; |
318 | |||
319 | if (timr->it_sigev_notify & SIGEV_THREAD_ID) { | ||
320 | struct task_struct *leader; | ||
321 | int ret = send_sigqueue(timr->sigq, timr->it_process, 0); | ||
322 | |||
323 | if (likely(ret >= 0)) | ||
324 | return ret; | ||
325 | |||
326 | timr->it_sigev_notify = SIGEV_SIGNAL; | ||
327 | leader = timr->it_process->group_leader; | ||
328 | put_task_struct(timr->it_process); | ||
329 | timr->it_process = leader; | ||
330 | } | ||
331 | |||
332 | return send_sigqueue(timr->sigq, timr->it_process, 1); | ||
333 | } | 334 | } |
334 | EXPORT_SYMBOL_GPL(posix_timer_event); | 335 | EXPORT_SYMBOL_GPL(posix_timer_event); |
335 | 336 | ||
@@ -468,11 +469,9 @@ sys_timer_create(const clockid_t which_clock, | |||
468 | struct sigevent __user *timer_event_spec, | 469 | struct sigevent __user *timer_event_spec, |
469 | timer_t __user * created_timer_id) | 470 | timer_t __user * created_timer_id) |
470 | { | 471 | { |
471 | int error = 0; | 472 | struct k_itimer *new_timer; |
472 | struct k_itimer *new_timer = NULL; | 473 | int error, new_timer_id; |
473 | int new_timer_id; | 474 | struct task_struct *process; |
474 | struct task_struct *process = NULL; | ||
475 | unsigned long flags; | ||
476 | sigevent_t event; | 475 | sigevent_t event; |
477 | int it_id_set = IT_ID_NOT_SET; | 476 | int it_id_set = IT_ID_NOT_SET; |
478 | 477 | ||
@@ -490,12 +489,11 @@ sys_timer_create(const clockid_t which_clock, | |||
490 | goto out; | 489 | goto out; |
491 | } | 490 | } |
492 | spin_lock_irq(&idr_lock); | 491 | spin_lock_irq(&idr_lock); |
493 | error = idr_get_new(&posix_timers_id, (void *) new_timer, | 492 | error = idr_get_new(&posix_timers_id, new_timer, &new_timer_id); |
494 | &new_timer_id); | ||
495 | spin_unlock_irq(&idr_lock); | 493 | spin_unlock_irq(&idr_lock); |
496 | if (error == -EAGAIN) | 494 | if (error) { |
497 | goto retry; | 495 | if (error == -EAGAIN) |
498 | else if (error) { | 496 | goto retry; |
499 | /* | 497 | /* |
500 | * Weird looking, but we return EAGAIN if the IDR is | 498 | * Weird looking, but we return EAGAIN if the IDR is |
501 | * full (proper POSIX return value for this) | 499 | * full (proper POSIX return value for this) |
@@ -526,67 +524,43 @@ sys_timer_create(const clockid_t which_clock, | |||
526 | error = -EFAULT; | 524 | error = -EFAULT; |
527 | goto out; | 525 | goto out; |
528 | } | 526 | } |
529 | new_timer->it_sigev_notify = event.sigev_notify; | 527 | rcu_read_lock(); |
530 | new_timer->it_sigev_signo = event.sigev_signo; | 528 | process = good_sigevent(&event); |
531 | new_timer->it_sigev_value = event.sigev_value; | 529 | if (process) |
532 | 530 | get_task_struct(process); | |
533 | read_lock(&tasklist_lock); | 531 | rcu_read_unlock(); |
534 | if ((process = good_sigevent(&event))) { | ||
535 | /* | ||
536 | * We may be setting up this process for another | ||
537 | * thread. It may be exiting. To catch this | ||
538 | * case the we check the PF_EXITING flag. If | ||
539 | * the flag is not set, the siglock will catch | ||
540 | * him before it is too late (in exit_itimers). | ||
541 | * | ||
542 | * The exec case is a bit more invloved but easy | ||
543 | * to code. If the process is in our thread | ||
544 | * group (and it must be or we would not allow | ||
545 | * it here) and is doing an exec, it will cause | ||
546 | * us to be killed. In this case it will wait | ||
547 | * for us to die which means we can finish this | ||
548 | * linkage with our last gasp. I.e. no code :) | ||
549 | */ | ||
550 | spin_lock_irqsave(&process->sighand->siglock, flags); | ||
551 | if (!(process->flags & PF_EXITING)) { | ||
552 | new_timer->it_process = process; | ||
553 | list_add(&new_timer->list, | ||
554 | &process->signal->posix_timers); | ||
555 | if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) | ||
556 | get_task_struct(process); | ||
557 | spin_unlock_irqrestore(&process->sighand->siglock, flags); | ||
558 | } else { | ||
559 | spin_unlock_irqrestore(&process->sighand->siglock, flags); | ||
560 | process = NULL; | ||
561 | } | ||
562 | } | ||
563 | read_unlock(&tasklist_lock); | ||
564 | if (!process) { | 532 | if (!process) { |
565 | error = -EINVAL; | 533 | error = -EINVAL; |
566 | goto out; | 534 | goto out; |
567 | } | 535 | } |
568 | } else { | 536 | } else { |
569 | new_timer->it_sigev_notify = SIGEV_SIGNAL; | 537 | event.sigev_notify = SIGEV_SIGNAL; |
570 | new_timer->it_sigev_signo = SIGALRM; | 538 | event.sigev_signo = SIGALRM; |
571 | new_timer->it_sigev_value.sival_int = new_timer->it_id; | 539 | event.sigev_value.sival_int = new_timer->it_id; |
572 | process = current->group_leader; | 540 | process = current->group_leader; |
573 | spin_lock_irqsave(&process->sighand->siglock, flags); | 541 | get_task_struct(process); |
574 | new_timer->it_process = process; | ||
575 | list_add(&new_timer->list, &process->signal->posix_timers); | ||
576 | spin_unlock_irqrestore(&process->sighand->siglock, flags); | ||
577 | } | 542 | } |
578 | 543 | ||
544 | new_timer->it_sigev_notify = event.sigev_notify; | ||
545 | new_timer->sigq->info.si_signo = event.sigev_signo; | ||
546 | new_timer->sigq->info.si_value = event.sigev_value; | ||
547 | new_timer->sigq->info.si_tid = new_timer->it_id; | ||
548 | new_timer->sigq->info.si_code = SI_TIMER; | ||
549 | |||
550 | spin_lock_irq(¤t->sighand->siglock); | ||
551 | new_timer->it_process = process; | ||
552 | list_add(&new_timer->list, ¤t->signal->posix_timers); | ||
553 | spin_unlock_irq(¤t->sighand->siglock); | ||
554 | |||
555 | return 0; | ||
579 | /* | 556 | /* |
580 | * In the case of the timer belonging to another task, after | 557 | * In the case of the timer belonging to another task, after |
581 | * the task is unlocked, the timer is owned by the other task | 558 | * the task is unlocked, the timer is owned by the other task |
582 | * and may cease to exist at any time. Don't use or modify | 559 | * and may cease to exist at any time. Don't use or modify |
583 | * new_timer after the unlock call. | 560 | * new_timer after the unlock call. |
584 | */ | 561 | */ |
585 | |||
586 | out: | 562 | out: |
587 | if (error) | 563 | release_posix_timer(new_timer, it_id_set); |
588 | release_posix_timer(new_timer, it_id_set); | ||
589 | |||
590 | return error; | 564 | return error; |
591 | } | 565 | } |
592 | 566 | ||
@@ -597,7 +571,7 @@ out: | |||
597 | * the find to the timer lock. To avoid a dead lock, the timer id MUST | 571 | * the find to the timer lock. To avoid a dead lock, the timer id MUST |
598 | * be release with out holding the timer lock. | 572 | * be release with out holding the timer lock. |
599 | */ | 573 | */ |
600 | static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags) | 574 | static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags) |
601 | { | 575 | { |
602 | struct k_itimer *timr; | 576 | struct k_itimer *timr; |
603 | /* | 577 | /* |
@@ -605,23 +579,20 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags) | |||
605 | * flags part over to the timer lock. Must not let interrupts in | 579 | * flags part over to the timer lock. Must not let interrupts in |
606 | * while we are moving the lock. | 580 | * while we are moving the lock. |
607 | */ | 581 | */ |
608 | |||
609 | spin_lock_irqsave(&idr_lock, *flags); | 582 | spin_lock_irqsave(&idr_lock, *flags); |
610 | timr = (struct k_itimer *) idr_find(&posix_timers_id, (int) timer_id); | 583 | timr = idr_find(&posix_timers_id, (int)timer_id); |
611 | if (timr) { | 584 | if (timr) { |
612 | spin_lock(&timr->it_lock); | 585 | spin_lock(&timr->it_lock); |
613 | 586 | if (timr->it_process && | |
614 | if ((timr->it_id != timer_id) || !(timr->it_process) || | 587 | same_thread_group(timr->it_process, current)) { |
615 | !same_thread_group(timr->it_process, current)) { | ||
616 | spin_unlock(&timr->it_lock); | ||
617 | spin_unlock_irqrestore(&idr_lock, *flags); | ||
618 | timr = NULL; | ||
619 | } else | ||
620 | spin_unlock(&idr_lock); | 588 | spin_unlock(&idr_lock); |
621 | } else | 589 | return timr; |
622 | spin_unlock_irqrestore(&idr_lock, *flags); | 590 | } |
591 | spin_unlock(&timr->it_lock); | ||
592 | } | ||
593 | spin_unlock_irqrestore(&idr_lock, *flags); | ||
623 | 594 | ||
624 | return timr; | 595 | return NULL; |
625 | } | 596 | } |
626 | 597 | ||
627 | /* | 598 | /* |
@@ -862,8 +833,7 @@ retry_delete: | |||
862 | * This keeps any tasks waiting on the spin lock from thinking | 833 | * This keeps any tasks waiting on the spin lock from thinking |
863 | * they got something (see the lock code above). | 834 | * they got something (see the lock code above). |
864 | */ | 835 | */ |
865 | if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) | 836 | put_task_struct(timer->it_process); |
866 | put_task_struct(timer->it_process); | ||
867 | timer->it_process = NULL; | 837 | timer->it_process = NULL; |
868 | 838 | ||
869 | unlock_timer(timer, flags); | 839 | unlock_timer(timer, flags); |
@@ -890,8 +860,7 @@ retry_delete: | |||
890 | * This keeps any tasks waiting on the spin lock from thinking | 860 | * This keeps any tasks waiting on the spin lock from thinking |
891 | * they got something (see the lock code above). | 861 | * they got something (see the lock code above). |
892 | */ | 862 | */ |
893 | if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) | 863 | put_task_struct(timer->it_process); |
894 | put_task_struct(timer->it_process); | ||
895 | timer->it_process = NULL; | 864 | timer->it_process = NULL; |
896 | 865 | ||
897 | unlock_timer(timer, flags); | 866 | unlock_timer(timer, flags); |
diff --git a/kernel/sched.c b/kernel/sched.c index 6f230596bd0c..09a8c15748f1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -4052,23 +4052,26 @@ DEFINE_PER_CPU(struct kernel_stat, kstat); | |||
4052 | EXPORT_PER_CPU_SYMBOL(kstat); | 4052 | EXPORT_PER_CPU_SYMBOL(kstat); |
4053 | 4053 | ||
4054 | /* | 4054 | /* |
4055 | * Return p->sum_exec_runtime plus any more ns on the sched_clock | 4055 | * Return any ns on the sched_clock that have not yet been banked in |
4056 | * that have not yet been banked in case the task is currently running. | 4056 | * @p in case that task is currently running. |
4057 | */ | 4057 | */ |
4058 | unsigned long long task_sched_runtime(struct task_struct *p) | 4058 | unsigned long long task_delta_exec(struct task_struct *p) |
4059 | { | 4059 | { |
4060 | unsigned long flags; | 4060 | unsigned long flags; |
4061 | u64 ns, delta_exec; | ||
4062 | struct rq *rq; | 4061 | struct rq *rq; |
4062 | u64 ns = 0; | ||
4063 | 4063 | ||
4064 | rq = task_rq_lock(p, &flags); | 4064 | rq = task_rq_lock(p, &flags); |
4065 | ns = p->se.sum_exec_runtime; | 4065 | |
4066 | if (task_current(rq, p)) { | 4066 | if (task_current(rq, p)) { |
4067 | u64 delta_exec; | ||
4068 | |||
4067 | update_rq_clock(rq); | 4069 | update_rq_clock(rq); |
4068 | delta_exec = rq->clock - p->se.exec_start; | 4070 | delta_exec = rq->clock - p->se.exec_start; |
4069 | if ((s64)delta_exec > 0) | 4071 | if ((s64)delta_exec > 0) |
4070 | ns += delta_exec; | 4072 | ns = delta_exec; |
4071 | } | 4073 | } |
4074 | |||
4072 | task_rq_unlock(rq, &flags); | 4075 | task_rq_unlock(rq, &flags); |
4073 | 4076 | ||
4074 | return ns; | 4077 | return ns; |
@@ -4085,6 +4088,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime) | |||
4085 | cputime64_t tmp; | 4088 | cputime64_t tmp; |
4086 | 4089 | ||
4087 | p->utime = cputime_add(p->utime, cputime); | 4090 | p->utime = cputime_add(p->utime, cputime); |
4091 | account_group_user_time(p, cputime); | ||
4088 | 4092 | ||
4089 | /* Add user time to cpustat. */ | 4093 | /* Add user time to cpustat. */ |
4090 | tmp = cputime_to_cputime64(cputime); | 4094 | tmp = cputime_to_cputime64(cputime); |
@@ -4109,6 +4113,7 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime) | |||
4109 | tmp = cputime_to_cputime64(cputime); | 4113 | tmp = cputime_to_cputime64(cputime); |
4110 | 4114 | ||
4111 | p->utime = cputime_add(p->utime, cputime); | 4115 | p->utime = cputime_add(p->utime, cputime); |
4116 | account_group_user_time(p, cputime); | ||
4112 | p->gtime = cputime_add(p->gtime, cputime); | 4117 | p->gtime = cputime_add(p->gtime, cputime); |
4113 | 4118 | ||
4114 | cpustat->user = cputime64_add(cpustat->user, tmp); | 4119 | cpustat->user = cputime64_add(cpustat->user, tmp); |
@@ -4144,6 +4149,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
4144 | } | 4149 | } |
4145 | 4150 | ||
4146 | p->stime = cputime_add(p->stime, cputime); | 4151 | p->stime = cputime_add(p->stime, cputime); |
4152 | account_group_system_time(p, cputime); | ||
4147 | 4153 | ||
4148 | /* Add system time to cpustat. */ | 4154 | /* Add system time to cpustat. */ |
4149 | tmp = cputime_to_cputime64(cputime); | 4155 | tmp = cputime_to_cputime64(cputime); |
@@ -4185,6 +4191,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal) | |||
4185 | 4191 | ||
4186 | if (p == rq->idle) { | 4192 | if (p == rq->idle) { |
4187 | p->stime = cputime_add(p->stime, steal); | 4193 | p->stime = cputime_add(p->stime, steal); |
4194 | account_group_system_time(p, steal); | ||
4188 | if (atomic_read(&rq->nr_iowait) > 0) | 4195 | if (atomic_read(&rq->nr_iowait) > 0) |
4189 | cpustat->iowait = cputime64_add(cpustat->iowait, tmp); | 4196 | cpustat->iowait = cputime64_add(cpustat->iowait, tmp); |
4190 | else | 4197 | else |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 18fd17172eb6..f604dae71316 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -449,6 +449,7 @@ static void update_curr(struct cfs_rq *cfs_rq) | |||
449 | struct task_struct *curtask = task_of(curr); | 449 | struct task_struct *curtask = task_of(curr); |
450 | 450 | ||
451 | cpuacct_charge(curtask, delta_exec); | 451 | cpuacct_charge(curtask, delta_exec); |
452 | account_group_exec_runtime(curtask, delta_exec); | ||
452 | } | 453 | } |
453 | } | 454 | } |
454 | 455 | ||
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index cdf5740ab03e..b446dc87494f 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -526,6 +526,8 @@ static void update_curr_rt(struct rq *rq) | |||
526 | schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec)); | 526 | schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec)); |
527 | 527 | ||
528 | curr->se.sum_exec_runtime += delta_exec; | 528 | curr->se.sum_exec_runtime += delta_exec; |
529 | account_group_exec_runtime(curr, delta_exec); | ||
530 | |||
529 | curr->se.exec_start = rq->clock; | 531 | curr->se.exec_start = rq->clock; |
530 | cpuacct_charge(curr, delta_exec); | 532 | cpuacct_charge(curr, delta_exec); |
531 | 533 | ||
@@ -1458,7 +1460,7 @@ static void watchdog(struct rq *rq, struct task_struct *p) | |||
1458 | p->rt.timeout++; | 1460 | p->rt.timeout++; |
1459 | next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); | 1461 | next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); |
1460 | if (p->rt.timeout > next) | 1462 | if (p->rt.timeout > next) |
1461 | p->it_sched_expires = p->se.sum_exec_runtime; | 1463 | p->cputime_expires.sched_exp = p->se.sum_exec_runtime; |
1462 | } | 1464 | } |
1463 | } | 1465 | } |
1464 | 1466 | ||
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 8385d43987e2..b8c156979cf2 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h | |||
@@ -270,3 +270,89 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) | |||
270 | #define sched_info_switch(t, next) do { } while (0) | 270 | #define sched_info_switch(t, next) do { } while (0) |
271 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ | 271 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ |
272 | 272 | ||
273 | /* | ||
274 | * The following are functions that support scheduler-internal time accounting. | ||
275 | * These functions are generally called at the timer tick. None of this depends | ||
276 | * on CONFIG_SCHEDSTATS. | ||
277 | */ | ||
278 | |||
279 | /** | ||
280 | * account_group_user_time - Maintain utime for a thread group. | ||
281 | * | ||
282 | * @tsk: Pointer to task structure. | ||
283 | * @cputime: Time value by which to increment the utime field of the | ||
284 | * thread_group_cputime structure. | ||
285 | * | ||
286 | * If thread group time is being maintained, get the structure for the | ||
287 | * running CPU and update the utime field there. | ||
288 | */ | ||
289 | static inline void account_group_user_time(struct task_struct *tsk, | ||
290 | cputime_t cputime) | ||
291 | { | ||
292 | struct signal_struct *sig; | ||
293 | |||
294 | sig = tsk->signal; | ||
295 | if (unlikely(!sig)) | ||
296 | return; | ||
297 | if (sig->cputime.totals) { | ||
298 | struct task_cputime *times; | ||
299 | |||
300 | times = per_cpu_ptr(sig->cputime.totals, get_cpu()); | ||
301 | times->utime = cputime_add(times->utime, cputime); | ||
302 | put_cpu_no_resched(); | ||
303 | } | ||
304 | } | ||
305 | |||
306 | /** | ||
307 | * account_group_system_time - Maintain stime for a thread group. | ||
308 | * | ||
309 | * @tsk: Pointer to task structure. | ||
310 | * @cputime: Time value by which to increment the stime field of the | ||
311 | * thread_group_cputime structure. | ||
312 | * | ||
313 | * If thread group time is being maintained, get the structure for the | ||
314 | * running CPU and update the stime field there. | ||
315 | */ | ||
316 | static inline void account_group_system_time(struct task_struct *tsk, | ||
317 | cputime_t cputime) | ||
318 | { | ||
319 | struct signal_struct *sig; | ||
320 | |||
321 | sig = tsk->signal; | ||
322 | if (unlikely(!sig)) | ||
323 | return; | ||
324 | if (sig->cputime.totals) { | ||
325 | struct task_cputime *times; | ||
326 | |||
327 | times = per_cpu_ptr(sig->cputime.totals, get_cpu()); | ||
328 | times->stime = cputime_add(times->stime, cputime); | ||
329 | put_cpu_no_resched(); | ||
330 | } | ||
331 | } | ||
332 | |||
333 | /** | ||
334 | * account_group_exec_runtime - Maintain exec runtime for a thread group. | ||
335 | * | ||
336 | * @tsk: Pointer to task structure. | ||
337 | * @ns: Time value by which to increment the sum_exec_runtime field | ||
338 | * of the thread_group_cputime structure. | ||
339 | * | ||
340 | * If thread group time is being maintained, get the structure for the | ||
341 | * running CPU and update the sum_exec_runtime field there. | ||
342 | */ | ||
343 | static inline void account_group_exec_runtime(struct task_struct *tsk, | ||
344 | unsigned long long ns) | ||
345 | { | ||
346 | struct signal_struct *sig; | ||
347 | |||
348 | sig = tsk->signal; | ||
349 | if (unlikely(!sig)) | ||
350 | return; | ||
351 | if (sig->cputime.totals) { | ||
352 | struct task_cputime *times; | ||
353 | |||
354 | times = per_cpu_ptr(sig->cputime.totals, get_cpu()); | ||
355 | times->sum_exec_runtime += ns; | ||
356 | put_cpu_no_resched(); | ||
357 | } | ||
358 | } | ||
diff --git a/kernel/signal.c b/kernel/signal.c index e661b01d340f..6eea5826d618 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -1338,6 +1338,7 @@ int do_notify_parent(struct task_struct *tsk, int sig) | |||
1338 | struct siginfo info; | 1338 | struct siginfo info; |
1339 | unsigned long flags; | 1339 | unsigned long flags; |
1340 | struct sighand_struct *psig; | 1340 | struct sighand_struct *psig; |
1341 | struct task_cputime cputime; | ||
1341 | int ret = sig; | 1342 | int ret = sig; |
1342 | 1343 | ||
1343 | BUG_ON(sig == -1); | 1344 | BUG_ON(sig == -1); |
@@ -1368,10 +1369,9 @@ int do_notify_parent(struct task_struct *tsk, int sig) | |||
1368 | 1369 | ||
1369 | info.si_uid = tsk->uid; | 1370 | info.si_uid = tsk->uid; |
1370 | 1371 | ||
1371 | info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime, | 1372 | thread_group_cputime(tsk, &cputime); |
1372 | tsk->signal->utime)); | 1373 | info.si_utime = cputime_to_jiffies(cputime.utime); |
1373 | info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime, | 1374 | info.si_stime = cputime_to_jiffies(cputime.stime); |
1374 | tsk->signal->stime)); | ||
1375 | 1375 | ||
1376 | info.si_status = tsk->exit_code & 0x7f; | 1376 | info.si_status = tsk->exit_code & 0x7f; |
1377 | if (tsk->exit_code & 0x80) | 1377 | if (tsk->exit_code & 0x80) |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 83ba21a13bd4..7110daeb9a90 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -267,16 +267,12 @@ asmlinkage void do_softirq(void) | |||
267 | */ | 267 | */ |
268 | void irq_enter(void) | 268 | void irq_enter(void) |
269 | { | 269 | { |
270 | #ifdef CONFIG_NO_HZ | ||
271 | int cpu = smp_processor_id(); | 270 | int cpu = smp_processor_id(); |
271 | |||
272 | if (idle_cpu(cpu) && !in_interrupt()) | 272 | if (idle_cpu(cpu) && !in_interrupt()) |
273 | tick_nohz_stop_idle(cpu); | 273 | tick_check_idle(cpu); |
274 | #endif | 274 | |
275 | __irq_enter(); | 275 | __irq_enter(); |
276 | #ifdef CONFIG_NO_HZ | ||
277 | if (idle_cpu(cpu)) | ||
278 | tick_nohz_update_jiffies(); | ||
279 | #endif | ||
280 | } | 276 | } |
281 | 277 | ||
282 | #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED | 278 | #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED |
diff --git a/kernel/sys.c b/kernel/sys.c index 0bc8fa3c2288..53879cdae483 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -853,38 +853,28 @@ asmlinkage long sys_setfsgid(gid_t gid) | |||
853 | return old_fsgid; | 853 | return old_fsgid; |
854 | } | 854 | } |
855 | 855 | ||
856 | void do_sys_times(struct tms *tms) | ||
857 | { | ||
858 | struct task_cputime cputime; | ||
859 | cputime_t cutime, cstime; | ||
860 | |||
861 | spin_lock_irq(¤t->sighand->siglock); | ||
862 | thread_group_cputime(current, &cputime); | ||
863 | cutime = current->signal->cutime; | ||
864 | cstime = current->signal->cstime; | ||
865 | spin_unlock_irq(¤t->sighand->siglock); | ||
866 | tms->tms_utime = cputime_to_clock_t(cputime.utime); | ||
867 | tms->tms_stime = cputime_to_clock_t(cputime.stime); | ||
868 | tms->tms_cutime = cputime_to_clock_t(cutime); | ||
869 | tms->tms_cstime = cputime_to_clock_t(cstime); | ||
870 | } | ||
871 | |||
856 | asmlinkage long sys_times(struct tms __user * tbuf) | 872 | asmlinkage long sys_times(struct tms __user * tbuf) |
857 | { | 873 | { |
858 | /* | ||
859 | * In the SMP world we might just be unlucky and have one of | ||
860 | * the times increment as we use it. Since the value is an | ||
861 | * atomically safe type this is just fine. Conceptually its | ||
862 | * as if the syscall took an instant longer to occur. | ||
863 | */ | ||
864 | if (tbuf) { | 874 | if (tbuf) { |
865 | struct tms tmp; | 875 | struct tms tmp; |
866 | struct task_struct *tsk = current; | 876 | |
867 | struct task_struct *t; | 877 | do_sys_times(&tmp); |
868 | cputime_t utime, stime, cutime, cstime; | ||
869 | |||
870 | spin_lock_irq(&tsk->sighand->siglock); | ||
871 | utime = tsk->signal->utime; | ||
872 | stime = tsk->signal->stime; | ||
873 | t = tsk; | ||
874 | do { | ||
875 | utime = cputime_add(utime, t->utime); | ||
876 | stime = cputime_add(stime, t->stime); | ||
877 | t = next_thread(t); | ||
878 | } while (t != tsk); | ||
879 | |||
880 | cutime = tsk->signal->cutime; | ||
881 | cstime = tsk->signal->cstime; | ||
882 | spin_unlock_irq(&tsk->sighand->siglock); | ||
883 | |||
884 | tmp.tms_utime = cputime_to_clock_t(utime); | ||
885 | tmp.tms_stime = cputime_to_clock_t(stime); | ||
886 | tmp.tms_cutime = cputime_to_clock_t(cutime); | ||
887 | tmp.tms_cstime = cputime_to_clock_t(cstime); | ||
888 | if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) | 878 | if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) |
889 | return -EFAULT; | 879 | return -EFAULT; |
890 | } | 880 | } |
@@ -1449,7 +1439,6 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r | |||
1449 | asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) | 1439 | asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) |
1450 | { | 1440 | { |
1451 | struct rlimit new_rlim, *old_rlim; | 1441 | struct rlimit new_rlim, *old_rlim; |
1452 | unsigned long it_prof_secs; | ||
1453 | int retval; | 1442 | int retval; |
1454 | 1443 | ||
1455 | if (resource >= RLIM_NLIMITS) | 1444 | if (resource >= RLIM_NLIMITS) |
@@ -1503,18 +1492,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) | |||
1503 | if (new_rlim.rlim_cur == RLIM_INFINITY) | 1492 | if (new_rlim.rlim_cur == RLIM_INFINITY) |
1504 | goto out; | 1493 | goto out; |
1505 | 1494 | ||
1506 | it_prof_secs = cputime_to_secs(current->signal->it_prof_expires); | 1495 | update_rlimit_cpu(new_rlim.rlim_cur); |
1507 | if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) { | ||
1508 | unsigned long rlim_cur = new_rlim.rlim_cur; | ||
1509 | cputime_t cputime; | ||
1510 | |||
1511 | cputime = secs_to_cputime(rlim_cur); | ||
1512 | read_lock(&tasklist_lock); | ||
1513 | spin_lock_irq(¤t->sighand->siglock); | ||
1514 | set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); | ||
1515 | spin_unlock_irq(¤t->sighand->siglock); | ||
1516 | read_unlock(&tasklist_lock); | ||
1517 | } | ||
1518 | out: | 1496 | out: |
1519 | return 0; | 1497 | return 0; |
1520 | } | 1498 | } |
@@ -1552,11 +1530,8 @@ out: | |||
1552 | * | 1530 | * |
1553 | */ | 1531 | */ |
1554 | 1532 | ||
1555 | static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r, | 1533 | static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r) |
1556 | cputime_t *utimep, cputime_t *stimep) | ||
1557 | { | 1534 | { |
1558 | *utimep = cputime_add(*utimep, t->utime); | ||
1559 | *stimep = cputime_add(*stimep, t->stime); | ||
1560 | r->ru_nvcsw += t->nvcsw; | 1535 | r->ru_nvcsw += t->nvcsw; |
1561 | r->ru_nivcsw += t->nivcsw; | 1536 | r->ru_nivcsw += t->nivcsw; |
1562 | r->ru_minflt += t->min_flt; | 1537 | r->ru_minflt += t->min_flt; |
@@ -1570,12 +1545,13 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1570 | struct task_struct *t; | 1545 | struct task_struct *t; |
1571 | unsigned long flags; | 1546 | unsigned long flags; |
1572 | cputime_t utime, stime; | 1547 | cputime_t utime, stime; |
1548 | struct task_cputime cputime; | ||
1573 | 1549 | ||
1574 | memset((char *) r, 0, sizeof *r); | 1550 | memset((char *) r, 0, sizeof *r); |
1575 | utime = stime = cputime_zero; | 1551 | utime = stime = cputime_zero; |
1576 | 1552 | ||
1577 | if (who == RUSAGE_THREAD) { | 1553 | if (who == RUSAGE_THREAD) { |
1578 | accumulate_thread_rusage(p, r, &utime, &stime); | 1554 | accumulate_thread_rusage(p, r); |
1579 | goto out; | 1555 | goto out; |
1580 | } | 1556 | } |
1581 | 1557 | ||
@@ -1598,8 +1574,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1598 | break; | 1574 | break; |
1599 | 1575 | ||
1600 | case RUSAGE_SELF: | 1576 | case RUSAGE_SELF: |
1601 | utime = cputime_add(utime, p->signal->utime); | 1577 | thread_group_cputime(p, &cputime); |
1602 | stime = cputime_add(stime, p->signal->stime); | 1578 | utime = cputime_add(utime, cputime.utime); |
1579 | stime = cputime_add(stime, cputime.stime); | ||
1603 | r->ru_nvcsw += p->signal->nvcsw; | 1580 | r->ru_nvcsw += p->signal->nvcsw; |
1604 | r->ru_nivcsw += p->signal->nivcsw; | 1581 | r->ru_nivcsw += p->signal->nivcsw; |
1605 | r->ru_minflt += p->signal->min_flt; | 1582 | r->ru_minflt += p->signal->min_flt; |
@@ -1608,7 +1585,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1608 | r->ru_oublock += p->signal->oublock; | 1585 | r->ru_oublock += p->signal->oublock; |
1609 | t = p; | 1586 | t = p; |
1610 | do { | 1587 | do { |
1611 | accumulate_thread_rusage(t, r, &utime, &stime); | 1588 | accumulate_thread_rusage(t, r); |
1612 | t = next_thread(t); | 1589 | t = next_thread(t); |
1613 | } while (t != p); | 1590 | } while (t != p); |
1614 | break; | 1591 | break; |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 093d4acf993b..9ed2eec97526 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -325,6 +325,9 @@ int clocksource_register(struct clocksource *c) | |||
325 | unsigned long flags; | 325 | unsigned long flags; |
326 | int ret; | 326 | int ret; |
327 | 327 | ||
328 | /* save mult_orig on registration */ | ||
329 | c->mult_orig = c->mult; | ||
330 | |||
328 | spin_lock_irqsave(&clocksource_lock, flags); | 331 | spin_lock_irqsave(&clocksource_lock, flags); |
329 | ret = clocksource_enqueue(c); | 332 | ret = clocksource_enqueue(c); |
330 | if (!ret) | 333 | if (!ret) |
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 4c256fdb8875..1ca99557e929 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c | |||
@@ -61,6 +61,7 @@ struct clocksource clocksource_jiffies = { | |||
61 | .read = jiffies_read, | 61 | .read = jiffies_read, |
62 | .mask = 0xffffffff, /*32bits*/ | 62 | .mask = 0xffffffff, /*32bits*/ |
63 | .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ | 63 | .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ |
64 | .mult_orig = NSEC_PER_JIFFY << JIFFIES_SHIFT, | ||
64 | .shift = JIFFIES_SHIFT, | 65 | .shift = JIFFIES_SHIFT, |
65 | }; | 66 | }; |
66 | 67 | ||
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 1ad46f3df6e7..1a20715bfd6e 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
@@ -10,13 +10,13 @@ | |||
10 | 10 | ||
11 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
12 | #include <linux/time.h> | 12 | #include <linux/time.h> |
13 | #include <linux/timer.h> | ||
14 | #include <linux/timex.h> | 13 | #include <linux/timex.h> |
15 | #include <linux/jiffies.h> | 14 | #include <linux/jiffies.h> |
16 | #include <linux/hrtimer.h> | 15 | #include <linux/hrtimer.h> |
17 | #include <linux/capability.h> | 16 | #include <linux/capability.h> |
18 | #include <linux/math64.h> | 17 | #include <linux/math64.h> |
19 | #include <linux/clocksource.h> | 18 | #include <linux/clocksource.h> |
19 | #include <linux/workqueue.h> | ||
20 | #include <asm/timex.h> | 20 | #include <asm/timex.h> |
21 | 21 | ||
22 | /* | 22 | /* |
@@ -218,11 +218,11 @@ void second_overflow(void) | |||
218 | /* Disable the cmos update - used by virtualization and embedded */ | 218 | /* Disable the cmos update - used by virtualization and embedded */ |
219 | int no_sync_cmos_clock __read_mostly; | 219 | int no_sync_cmos_clock __read_mostly; |
220 | 220 | ||
221 | static void sync_cmos_clock(unsigned long dummy); | 221 | static void sync_cmos_clock(struct work_struct *work); |
222 | 222 | ||
223 | static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); | 223 | static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock); |
224 | 224 | ||
225 | static void sync_cmos_clock(unsigned long dummy) | 225 | static void sync_cmos_clock(struct work_struct *work) |
226 | { | 226 | { |
227 | struct timespec now, next; | 227 | struct timespec now, next; |
228 | int fail = 1; | 228 | int fail = 1; |
@@ -258,13 +258,13 @@ static void sync_cmos_clock(unsigned long dummy) | |||
258 | next.tv_sec++; | 258 | next.tv_sec++; |
259 | next.tv_nsec -= NSEC_PER_SEC; | 259 | next.tv_nsec -= NSEC_PER_SEC; |
260 | } | 260 | } |
261 | mod_timer(&sync_cmos_timer, jiffies + timespec_to_jiffies(&next)); | 261 | schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next)); |
262 | } | 262 | } |
263 | 263 | ||
264 | static void notify_cmos_timer(void) | 264 | static void notify_cmos_timer(void) |
265 | { | 265 | { |
266 | if (!no_sync_cmos_clock) | 266 | if (!no_sync_cmos_clock) |
267 | mod_timer(&sync_cmos_timer, jiffies + 1); | 267 | schedule_delayed_work(&sync_cmos_work, 0); |
268 | } | 268 | } |
269 | 269 | ||
270 | #else | 270 | #else |
@@ -277,38 +277,50 @@ static inline void notify_cmos_timer(void) { } | |||
277 | int do_adjtimex(struct timex *txc) | 277 | int do_adjtimex(struct timex *txc) |
278 | { | 278 | { |
279 | struct timespec ts; | 279 | struct timespec ts; |
280 | long save_adjust, sec; | ||
281 | int result; | 280 | int result; |
282 | 281 | ||
283 | /* In order to modify anything, you gotta be super-user! */ | 282 | /* Validate the data before disabling interrupts */ |
284 | if (txc->modes && !capable(CAP_SYS_TIME)) | 283 | if (txc->modes & ADJ_ADJTIME) { |
285 | return -EPERM; | ||
286 | |||
287 | /* Now we validate the data before disabling interrupts */ | ||
288 | |||
289 | if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) { | ||
290 | /* singleshot must not be used with any other mode bits */ | 284 | /* singleshot must not be used with any other mode bits */ |
291 | if (txc->modes & ~ADJ_OFFSET_SS_READ) | 285 | if (!(txc->modes & ADJ_OFFSET_SINGLESHOT)) |
292 | return -EINVAL; | 286 | return -EINVAL; |
287 | if (!(txc->modes & ADJ_OFFSET_READONLY) && | ||
288 | !capable(CAP_SYS_TIME)) | ||
289 | return -EPERM; | ||
290 | } else { | ||
291 | /* In order to modify anything, you gotta be super-user! */ | ||
292 | if (txc->modes && !capable(CAP_SYS_TIME)) | ||
293 | return -EPERM; | ||
294 | |||
295 | /* if the quartz is off by more than 10% something is VERY wrong! */ | ||
296 | if (txc->modes & ADJ_TICK && | ||
297 | (txc->tick < 900000/USER_HZ || | ||
298 | txc->tick > 1100000/USER_HZ)) | ||
299 | return -EINVAL; | ||
300 | |||
301 | if (txc->modes & ADJ_STATUS && time_state != TIME_OK) | ||
302 | hrtimer_cancel(&leap_timer); | ||
293 | } | 303 | } |
294 | 304 | ||
295 | /* if the quartz is off by more than 10% something is VERY wrong ! */ | ||
296 | if (txc->modes & ADJ_TICK) | ||
297 | if (txc->tick < 900000/USER_HZ || | ||
298 | txc->tick > 1100000/USER_HZ) | ||
299 | return -EINVAL; | ||
300 | |||
301 | if (time_state != TIME_OK && txc->modes & ADJ_STATUS) | ||
302 | hrtimer_cancel(&leap_timer); | ||
303 | getnstimeofday(&ts); | 305 | getnstimeofday(&ts); |
304 | 306 | ||
305 | write_seqlock_irq(&xtime_lock); | 307 | write_seqlock_irq(&xtime_lock); |
306 | 308 | ||
307 | /* Save for later - semantics of adjtime is to return old value */ | ||
308 | save_adjust = time_adjust; | ||
309 | |||
310 | /* If there are input parameters, then process them */ | 309 | /* If there are input parameters, then process them */ |
310 | if (txc->modes & ADJ_ADJTIME) { | ||
311 | long save_adjust = time_adjust; | ||
312 | |||
313 | if (!(txc->modes & ADJ_OFFSET_READONLY)) { | ||
314 | /* adjtime() is independent from ntp_adjtime() */ | ||
315 | time_adjust = txc->offset; | ||
316 | ntp_update_frequency(); | ||
317 | } | ||
318 | txc->offset = save_adjust; | ||
319 | goto adj_done; | ||
320 | } | ||
311 | if (txc->modes) { | 321 | if (txc->modes) { |
322 | long sec; | ||
323 | |||
312 | if (txc->modes & ADJ_STATUS) { | 324 | if (txc->modes & ADJ_STATUS) { |
313 | if ((time_status & STA_PLL) && | 325 | if ((time_status & STA_PLL) && |
314 | !(txc->status & STA_PLL)) { | 326 | !(txc->status & STA_PLL)) { |
@@ -375,13 +387,8 @@ int do_adjtimex(struct timex *txc) | |||
375 | if (txc->modes & ADJ_TAI && txc->constant > 0) | 387 | if (txc->modes & ADJ_TAI && txc->constant > 0) |
376 | time_tai = txc->constant; | 388 | time_tai = txc->constant; |
377 | 389 | ||
378 | if (txc->modes & ADJ_OFFSET) { | 390 | if (txc->modes & ADJ_OFFSET) |
379 | if (txc->modes == ADJ_OFFSET_SINGLESHOT) | 391 | ntp_update_offset(txc->offset); |
380 | /* adjtime() is independent from ntp_adjtime() */ | ||
381 | time_adjust = txc->offset; | ||
382 | else | ||
383 | ntp_update_offset(txc->offset); | ||
384 | } | ||
385 | if (txc->modes & ADJ_TICK) | 392 | if (txc->modes & ADJ_TICK) |
386 | tick_usec = txc->tick; | 393 | tick_usec = txc->tick; |
387 | 394 | ||
@@ -389,22 +396,18 @@ int do_adjtimex(struct timex *txc) | |||
389 | ntp_update_frequency(); | 396 | ntp_update_frequency(); |
390 | } | 397 | } |
391 | 398 | ||
399 | txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, | ||
400 | NTP_SCALE_SHIFT); | ||
401 | if (!(time_status & STA_NANO)) | ||
402 | txc->offset /= NSEC_PER_USEC; | ||
403 | |||
404 | adj_done: | ||
392 | result = time_state; /* mostly `TIME_OK' */ | 405 | result = time_state; /* mostly `TIME_OK' */ |
393 | if (time_status & (STA_UNSYNC|STA_CLOCKERR)) | 406 | if (time_status & (STA_UNSYNC|STA_CLOCKERR)) |
394 | result = TIME_ERROR; | 407 | result = TIME_ERROR; |
395 | 408 | ||
396 | if ((txc->modes == ADJ_OFFSET_SINGLESHOT) || | 409 | txc->freq = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) * |
397 | (txc->modes == ADJ_OFFSET_SS_READ)) | 410 | (s64)PPM_SCALE_INV, NTP_SCALE_SHIFT); |
398 | txc->offset = save_adjust; | ||
399 | else { | ||
400 | txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, | ||
401 | NTP_SCALE_SHIFT); | ||
402 | if (!(time_status & STA_NANO)) | ||
403 | txc->offset /= NSEC_PER_USEC; | ||
404 | } | ||
405 | txc->freq = shift_right((s32)(time_freq >> PPM_SCALE_INV_SHIFT) * | ||
406 | (s64)PPM_SCALE_INV, | ||
407 | NTP_SCALE_SHIFT); | ||
408 | txc->maxerror = time_maxerror; | 411 | txc->maxerror = time_maxerror; |
409 | txc->esterror = time_esterror; | 412 | txc->esterror = time_esterror; |
410 | txc->status = time_status; | 413 | txc->status = time_status; |
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index cb01cd8f919b..f98a1b7b16e9 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c | |||
@@ -384,6 +384,19 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc) | |||
384 | } | 384 | } |
385 | 385 | ||
386 | /* | 386 | /* |
387 | * Called from irq_enter() when idle was interrupted to reenable the | ||
388 | * per cpu device. | ||
389 | */ | ||
390 | void tick_check_oneshot_broadcast(int cpu) | ||
391 | { | ||
392 | if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) { | ||
393 | struct tick_device *td = &per_cpu(tick_cpu_device, cpu); | ||
394 | |||
395 | clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT); | ||
396 | } | ||
397 | } | ||
398 | |||
399 | /* | ||
387 | * Handle oneshot mode broadcasting | 400 | * Handle oneshot mode broadcasting |
388 | */ | 401 | */ |
389 | static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) | 402 | static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) |
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 469248782c23..b1c05bf75ee0 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h | |||
@@ -36,6 +36,7 @@ extern void tick_broadcast_switch_to_oneshot(void); | |||
36 | extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); | 36 | extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); |
37 | extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); | 37 | extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); |
38 | extern int tick_broadcast_oneshot_active(void); | 38 | extern int tick_broadcast_oneshot_active(void); |
39 | extern void tick_check_oneshot_broadcast(int cpu); | ||
39 | # else /* BROADCAST */ | 40 | # else /* BROADCAST */ |
40 | static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | 41 | static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) |
41 | { | 42 | { |
@@ -45,6 +46,7 @@ static inline void tick_broadcast_oneshot_control(unsigned long reason) { } | |||
45 | static inline void tick_broadcast_switch_to_oneshot(void) { } | 46 | static inline void tick_broadcast_switch_to_oneshot(void) { } |
46 | static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } | 47 | static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } |
47 | static inline int tick_broadcast_oneshot_active(void) { return 0; } | 48 | static inline int tick_broadcast_oneshot_active(void) { return 0; } |
49 | static inline void tick_check_oneshot_broadcast(int cpu) { } | ||
48 | # endif /* !BROADCAST */ | 50 | # endif /* !BROADCAST */ |
49 | 51 | ||
50 | #else /* !ONESHOT */ | 52 | #else /* !ONESHOT */ |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index b711ffcb106c..0581c11fe6c6 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -155,7 +155,7 @@ void tick_nohz_update_jiffies(void) | |||
155 | touch_softlockup_watchdog(); | 155 | touch_softlockup_watchdog(); |
156 | } | 156 | } |
157 | 157 | ||
158 | void tick_nohz_stop_idle(int cpu) | 158 | static void tick_nohz_stop_idle(int cpu) |
159 | { | 159 | { |
160 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 160 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
161 | 161 | ||
@@ -377,6 +377,32 @@ ktime_t tick_nohz_get_sleep_length(void) | |||
377 | return ts->sleep_length; | 377 | return ts->sleep_length; |
378 | } | 378 | } |
379 | 379 | ||
380 | static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) | ||
381 | { | ||
382 | hrtimer_cancel(&ts->sched_timer); | ||
383 | ts->sched_timer.expires = ts->idle_tick; | ||
384 | |||
385 | while (1) { | ||
386 | /* Forward the time to expire in the future */ | ||
387 | hrtimer_forward(&ts->sched_timer, now, tick_period); | ||
388 | |||
389 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | ||
390 | hrtimer_start(&ts->sched_timer, | ||
391 | ts->sched_timer.expires, | ||
392 | HRTIMER_MODE_ABS); | ||
393 | /* Check, if the timer was already in the past */ | ||
394 | if (hrtimer_active(&ts->sched_timer)) | ||
395 | break; | ||
396 | } else { | ||
397 | if (!tick_program_event(ts->sched_timer.expires, 0)) | ||
398 | break; | ||
399 | } | ||
400 | /* Update jiffies and reread time */ | ||
401 | tick_do_update_jiffies64(now); | ||
402 | now = ktime_get(); | ||
403 | } | ||
404 | } | ||
405 | |||
380 | /** | 406 | /** |
381 | * tick_nohz_restart_sched_tick - restart the idle tick from the idle task | 407 | * tick_nohz_restart_sched_tick - restart the idle tick from the idle task |
382 | * | 408 | * |
@@ -430,28 +456,7 @@ void tick_nohz_restart_sched_tick(void) | |||
430 | */ | 456 | */ |
431 | ts->tick_stopped = 0; | 457 | ts->tick_stopped = 0; |
432 | ts->idle_exittime = now; | 458 | ts->idle_exittime = now; |
433 | hrtimer_cancel(&ts->sched_timer); | 459 | tick_nohz_restart(ts, now); |
434 | ts->sched_timer.expires = ts->idle_tick; | ||
435 | |||
436 | while (1) { | ||
437 | /* Forward the time to expire in the future */ | ||
438 | hrtimer_forward(&ts->sched_timer, now, tick_period); | ||
439 | |||
440 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | ||
441 | hrtimer_start(&ts->sched_timer, | ||
442 | ts->sched_timer.expires, | ||
443 | HRTIMER_MODE_ABS); | ||
444 | /* Check, if the timer was already in the past */ | ||
445 | if (hrtimer_active(&ts->sched_timer)) | ||
446 | break; | ||
447 | } else { | ||
448 | if (!tick_program_event(ts->sched_timer.expires, 0)) | ||
449 | break; | ||
450 | } | ||
451 | /* Update jiffies and reread time */ | ||
452 | tick_do_update_jiffies64(now); | ||
453 | now = ktime_get(); | ||
454 | } | ||
455 | local_irq_enable(); | 460 | local_irq_enable(); |
456 | } | 461 | } |
457 | 462 | ||
@@ -503,10 +508,6 @@ static void tick_nohz_handler(struct clock_event_device *dev) | |||
503 | update_process_times(user_mode(regs)); | 508 | update_process_times(user_mode(regs)); |
504 | profile_tick(CPU_PROFILING); | 509 | profile_tick(CPU_PROFILING); |
505 | 510 | ||
506 | /* Do not restart, when we are in the idle loop */ | ||
507 | if (ts->tick_stopped) | ||
508 | return; | ||
509 | |||
510 | while (tick_nohz_reprogram(ts, now)) { | 511 | while (tick_nohz_reprogram(ts, now)) { |
511 | now = ktime_get(); | 512 | now = ktime_get(); |
512 | tick_do_update_jiffies64(now); | 513 | tick_do_update_jiffies64(now); |
@@ -552,6 +553,27 @@ static void tick_nohz_switch_to_nohz(void) | |||
552 | smp_processor_id()); | 553 | smp_processor_id()); |
553 | } | 554 | } |
554 | 555 | ||
556 | /* | ||
557 | * When NOHZ is enabled and the tick is stopped, we need to kick the | ||
558 | * tick timer from irq_enter() so that the jiffies update is kept | ||
559 | * alive during long running softirqs. That's ugly as hell, but | ||
560 | * correctness is key even if we need to fix the offending softirq in | ||
561 | * the first place. | ||
562 | * | ||
563 | * Note, this is different to tick_nohz_restart. We just kick the | ||
564 | * timer and do not touch the other magic bits which need to be done | ||
565 | * when idle is left. | ||
566 | */ | ||
567 | static void tick_nohz_kick_tick(int cpu) | ||
568 | { | ||
569 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | ||
570 | |||
571 | if (!ts->tick_stopped) | ||
572 | return; | ||
573 | |||
574 | tick_nohz_restart(ts, ktime_get()); | ||
575 | } | ||
576 | |||
555 | #else | 577 | #else |
556 | 578 | ||
557 | static inline void tick_nohz_switch_to_nohz(void) { } | 579 | static inline void tick_nohz_switch_to_nohz(void) { } |
@@ -559,6 +581,19 @@ static inline void tick_nohz_switch_to_nohz(void) { } | |||
559 | #endif /* NO_HZ */ | 581 | #endif /* NO_HZ */ |
560 | 582 | ||
561 | /* | 583 | /* |
584 | * Called from irq_enter to notify about the possible interruption of idle() | ||
585 | */ | ||
586 | void tick_check_idle(int cpu) | ||
587 | { | ||
588 | tick_check_oneshot_broadcast(cpu); | ||
589 | #ifdef CONFIG_NO_HZ | ||
590 | tick_nohz_stop_idle(cpu); | ||
591 | tick_nohz_update_jiffies(); | ||
592 | tick_nohz_kick_tick(cpu); | ||
593 | #endif | ||
594 | } | ||
595 | |||
596 | /* | ||
562 | * High resolution timer specific code | 597 | * High resolution timer specific code |
563 | */ | 598 | */ |
564 | #ifdef CONFIG_HIGH_RES_TIMERS | 599 | #ifdef CONFIG_HIGH_RES_TIMERS |
@@ -611,10 +646,6 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) | |||
611 | profile_tick(CPU_PROFILING); | 646 | profile_tick(CPU_PROFILING); |
612 | } | 647 | } |
613 | 648 | ||
614 | /* Do not restart, when we are in the idle loop */ | ||
615 | if (ts->tick_stopped) | ||
616 | return HRTIMER_NORESTART; | ||
617 | |||
618 | hrtimer_forward(timer, now, tick_period); | 649 | hrtimer_forward(timer, now, tick_period); |
619 | 650 | ||
620 | return HRTIMER_RESTART; | 651 | return HRTIMER_RESTART; |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e91c29f961c9..e7acfb482a68 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -58,27 +58,26 @@ struct clocksource *clock; | |||
58 | 58 | ||
59 | #ifdef CONFIG_GENERIC_TIME | 59 | #ifdef CONFIG_GENERIC_TIME |
60 | /** | 60 | /** |
61 | * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook | 61 | * clocksource_forward_now - update clock to the current time |
62 | * | 62 | * |
63 | * private function, must hold xtime_lock lock when being | 63 | * Forward the current clock to update its state since the last call to |
64 | * called. Returns the number of nanoseconds since the | 64 | * update_wall_time(). This is useful before significant clock changes, |
65 | * last call to update_wall_time() (adjusted by NTP scaling) | 65 | * as it avoids having to deal with this time offset explicitly. |
66 | */ | 66 | */ |
67 | static inline s64 __get_nsec_offset(void) | 67 | static void clocksource_forward_now(void) |
68 | { | 68 | { |
69 | cycle_t cycle_now, cycle_delta; | 69 | cycle_t cycle_now, cycle_delta; |
70 | s64 ns_offset; | 70 | s64 nsec; |
71 | 71 | ||
72 | /* read clocksource: */ | ||
73 | cycle_now = clocksource_read(clock); | 72 | cycle_now = clocksource_read(clock); |
74 | |||
75 | /* calculate the delta since the last update_wall_time: */ | ||
76 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; | 73 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; |
74 | clock->cycle_last = cycle_now; | ||
77 | 75 | ||
78 | /* convert to nanoseconds: */ | 76 | nsec = cyc2ns(clock, cycle_delta); |
79 | ns_offset = cyc2ns(clock, cycle_delta); | 77 | timespec_add_ns(&xtime, nsec); |
80 | 78 | ||
81 | return ns_offset; | 79 | nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift; |
80 | clock->raw_time.tv_nsec += nsec; | ||
82 | } | 81 | } |
83 | 82 | ||
84 | /** | 83 | /** |
@@ -89,6 +88,7 @@ static inline s64 __get_nsec_offset(void) | |||
89 | */ | 88 | */ |
90 | void getnstimeofday(struct timespec *ts) | 89 | void getnstimeofday(struct timespec *ts) |
91 | { | 90 | { |
91 | cycle_t cycle_now, cycle_delta; | ||
92 | unsigned long seq; | 92 | unsigned long seq; |
93 | s64 nsecs; | 93 | s64 nsecs; |
94 | 94 | ||
@@ -96,7 +96,15 @@ void getnstimeofday(struct timespec *ts) | |||
96 | seq = read_seqbegin(&xtime_lock); | 96 | seq = read_seqbegin(&xtime_lock); |
97 | 97 | ||
98 | *ts = xtime; | 98 | *ts = xtime; |
99 | nsecs = __get_nsec_offset(); | 99 | |
100 | /* read clocksource: */ | ||
101 | cycle_now = clocksource_read(clock); | ||
102 | |||
103 | /* calculate the delta since the last update_wall_time: */ | ||
104 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; | ||
105 | |||
106 | /* convert to nanoseconds: */ | ||
107 | nsecs = cyc2ns(clock, cycle_delta); | ||
100 | 108 | ||
101 | } while (read_seqretry(&xtime_lock, seq)); | 109 | } while (read_seqretry(&xtime_lock, seq)); |
102 | 110 | ||
@@ -129,22 +137,22 @@ EXPORT_SYMBOL(do_gettimeofday); | |||
129 | */ | 137 | */ |
130 | int do_settimeofday(struct timespec *tv) | 138 | int do_settimeofday(struct timespec *tv) |
131 | { | 139 | { |
140 | struct timespec ts_delta; | ||
132 | unsigned long flags; | 141 | unsigned long flags; |
133 | time_t wtm_sec, sec = tv->tv_sec; | ||
134 | long wtm_nsec, nsec = tv->tv_nsec; | ||
135 | 142 | ||
136 | if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) | 143 | if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) |
137 | return -EINVAL; | 144 | return -EINVAL; |
138 | 145 | ||
139 | write_seqlock_irqsave(&xtime_lock, flags); | 146 | write_seqlock_irqsave(&xtime_lock, flags); |
140 | 147 | ||
141 | nsec -= __get_nsec_offset(); | 148 | clocksource_forward_now(); |
149 | |||
150 | ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec; | ||
151 | ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec; | ||
152 | wall_to_monotonic = timespec_sub(wall_to_monotonic, ts_delta); | ||
142 | 153 | ||
143 | wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); | 154 | xtime = *tv; |
144 | wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); | ||
145 | 155 | ||
146 | set_normalized_timespec(&xtime, sec, nsec); | ||
147 | set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); | ||
148 | update_xtime_cache(0); | 156 | update_xtime_cache(0); |
149 | 157 | ||
150 | clock->error = 0; | 158 | clock->error = 0; |
@@ -170,22 +178,19 @@ EXPORT_SYMBOL(do_settimeofday); | |||
170 | static void change_clocksource(void) | 178 | static void change_clocksource(void) |
171 | { | 179 | { |
172 | struct clocksource *new; | 180 | struct clocksource *new; |
173 | cycle_t now; | ||
174 | u64 nsec; | ||
175 | 181 | ||
176 | new = clocksource_get_next(); | 182 | new = clocksource_get_next(); |
177 | 183 | ||
178 | if (clock == new) | 184 | if (clock == new) |
179 | return; | 185 | return; |
180 | 186 | ||
181 | new->cycle_last = 0; | 187 | clocksource_forward_now(); |
182 | now = clocksource_read(new); | ||
183 | nsec = __get_nsec_offset(); | ||
184 | timespec_add_ns(&xtime, nsec); | ||
185 | 188 | ||
186 | clock = new; | 189 | new->raw_time = clock->raw_time; |
187 | clock->cycle_last = now; | ||
188 | 190 | ||
191 | clock = new; | ||
192 | clock->cycle_last = 0; | ||
193 | clock->cycle_last = clocksource_read(new); | ||
189 | clock->error = 0; | 194 | clock->error = 0; |
190 | clock->xtime_nsec = 0; | 195 | clock->xtime_nsec = 0; |
191 | clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); | 196 | clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); |
@@ -200,11 +205,44 @@ static void change_clocksource(void) | |||
200 | */ | 205 | */ |
201 | } | 206 | } |
202 | #else | 207 | #else |
208 | static inline void clocksource_forward_now(void) { } | ||
203 | static inline void change_clocksource(void) { } | 209 | static inline void change_clocksource(void) { } |
204 | static inline s64 __get_nsec_offset(void) { return 0; } | ||
205 | #endif | 210 | #endif |
206 | 211 | ||
207 | /** | 212 | /** |
213 | * getrawmonotonic - Returns the raw monotonic time in a timespec | ||
214 | * @ts: pointer to the timespec to be set | ||
215 | * | ||
216 | * Returns the raw monotonic time (completely un-modified by ntp) | ||
217 | */ | ||
218 | void getrawmonotonic(struct timespec *ts) | ||
219 | { | ||
220 | unsigned long seq; | ||
221 | s64 nsecs; | ||
222 | cycle_t cycle_now, cycle_delta; | ||
223 | |||
224 | do { | ||
225 | seq = read_seqbegin(&xtime_lock); | ||
226 | |||
227 | /* read clocksource: */ | ||
228 | cycle_now = clocksource_read(clock); | ||
229 | |||
230 | /* calculate the delta since the last update_wall_time: */ | ||
231 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; | ||
232 | |||
233 | /* convert to nanoseconds: */ | ||
234 | nsecs = ((s64)cycle_delta * clock->mult_orig) >> clock->shift; | ||
235 | |||
236 | *ts = clock->raw_time; | ||
237 | |||
238 | } while (read_seqretry(&xtime_lock, seq)); | ||
239 | |||
240 | timespec_add_ns(ts, nsecs); | ||
241 | } | ||
242 | EXPORT_SYMBOL(getrawmonotonic); | ||
243 | |||
244 | |||
245 | /** | ||
208 | * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres | 246 | * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres |
209 | */ | 247 | */ |
210 | int timekeeping_valid_for_hres(void) | 248 | int timekeeping_valid_for_hres(void) |
@@ -265,8 +303,6 @@ void __init timekeeping_init(void) | |||
265 | static int timekeeping_suspended; | 303 | static int timekeeping_suspended; |
266 | /* time in seconds when suspend began */ | 304 | /* time in seconds when suspend began */ |
267 | static unsigned long timekeeping_suspend_time; | 305 | static unsigned long timekeeping_suspend_time; |
268 | /* xtime offset when we went into suspend */ | ||
269 | static s64 timekeeping_suspend_nsecs; | ||
270 | 306 | ||
271 | /** | 307 | /** |
272 | * timekeeping_resume - Resumes the generic timekeeping subsystem. | 308 | * timekeeping_resume - Resumes the generic timekeeping subsystem. |
@@ -292,8 +328,6 @@ static int timekeeping_resume(struct sys_device *dev) | |||
292 | wall_to_monotonic.tv_sec -= sleep_length; | 328 | wall_to_monotonic.tv_sec -= sleep_length; |
293 | total_sleep_time += sleep_length; | 329 | total_sleep_time += sleep_length; |
294 | } | 330 | } |
295 | /* Make sure that we have the correct xtime reference */ | ||
296 | timespec_add_ns(&xtime, timekeeping_suspend_nsecs); | ||
297 | update_xtime_cache(0); | 331 | update_xtime_cache(0); |
298 | /* re-base the last cycle value */ | 332 | /* re-base the last cycle value */ |
299 | clock->cycle_last = 0; | 333 | clock->cycle_last = 0; |
@@ -319,8 +353,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) | |||
319 | timekeeping_suspend_time = read_persistent_clock(); | 353 | timekeeping_suspend_time = read_persistent_clock(); |
320 | 354 | ||
321 | write_seqlock_irqsave(&xtime_lock, flags); | 355 | write_seqlock_irqsave(&xtime_lock, flags); |
322 | /* Get the current xtime offset */ | 356 | clocksource_forward_now(); |
323 | timekeeping_suspend_nsecs = __get_nsec_offset(); | ||
324 | timekeeping_suspended = 1; | 357 | timekeeping_suspended = 1; |
325 | write_sequnlock_irqrestore(&xtime_lock, flags); | 358 | write_sequnlock_irqrestore(&xtime_lock, flags); |
326 | 359 | ||
@@ -454,23 +487,29 @@ void update_wall_time(void) | |||
454 | #else | 487 | #else |
455 | offset = clock->cycle_interval; | 488 | offset = clock->cycle_interval; |
456 | #endif | 489 | #endif |
457 | clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift; | 490 | clock->xtime_nsec = (s64)xtime.tv_nsec << clock->shift; |
458 | 491 | ||
459 | /* normally this loop will run just once, however in the | 492 | /* normally this loop will run just once, however in the |
460 | * case of lost or late ticks, it will accumulate correctly. | 493 | * case of lost or late ticks, it will accumulate correctly. |
461 | */ | 494 | */ |
462 | while (offset >= clock->cycle_interval) { | 495 | while (offset >= clock->cycle_interval) { |
463 | /* accumulate one interval */ | 496 | /* accumulate one interval */ |
464 | clock->xtime_nsec += clock->xtime_interval; | ||
465 | clock->cycle_last += clock->cycle_interval; | ||
466 | offset -= clock->cycle_interval; | 497 | offset -= clock->cycle_interval; |
498 | clock->cycle_last += clock->cycle_interval; | ||
467 | 499 | ||
500 | clock->xtime_nsec += clock->xtime_interval; | ||
468 | if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { | 501 | if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { |
469 | clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; | 502 | clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; |
470 | xtime.tv_sec++; | 503 | xtime.tv_sec++; |
471 | second_overflow(); | 504 | second_overflow(); |
472 | } | 505 | } |
473 | 506 | ||
507 | clock->raw_time.tv_nsec += clock->raw_interval; | ||
508 | if (clock->raw_time.tv_nsec >= NSEC_PER_SEC) { | ||
509 | clock->raw_time.tv_nsec -= NSEC_PER_SEC; | ||
510 | clock->raw_time.tv_sec++; | ||
511 | } | ||
512 | |||
474 | /* accumulate error between NTP and clock interval */ | 513 | /* accumulate error between NTP and clock interval */ |
475 | clock->error += tick_length; | 514 | clock->error += tick_length; |
476 | clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift); | 515 | clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift); |
@@ -479,9 +518,12 @@ void update_wall_time(void) | |||
479 | /* correct the clock when NTP error is too big */ | 518 | /* correct the clock when NTP error is too big */ |
480 | clocksource_adjust(offset); | 519 | clocksource_adjust(offset); |
481 | 520 | ||
482 | /* store full nanoseconds into xtime */ | 521 | /* store full nanoseconds into xtime after rounding it up and |
483 | xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift; | 522 | * add the remainder to the error difference. |
523 | */ | ||
524 | xtime.tv_nsec = ((s64)clock->xtime_nsec >> clock->shift) + 1; | ||
484 | clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; | 525 | clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; |
526 | clock->error += clock->xtime_nsec << (NTP_SCALE_SHIFT - clock->shift); | ||
485 | 527 | ||
486 | update_xtime_cache(cyc2ns(clock, offset)); | 528 | update_xtime_cache(cyc2ns(clock, offset)); |
487 | 529 | ||
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index a40e20fd0001..f6426911e35a 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
@@ -47,13 +47,14 @@ static void print_name_offset(struct seq_file *m, void *sym) | |||
47 | } | 47 | } |
48 | 48 | ||
49 | static void | 49 | static void |
50 | print_timer(struct seq_file *m, struct hrtimer *timer, int idx, u64 now) | 50 | print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer, |
51 | int idx, u64 now) | ||
51 | { | 52 | { |
52 | #ifdef CONFIG_TIMER_STATS | 53 | #ifdef CONFIG_TIMER_STATS |
53 | char tmp[TASK_COMM_LEN + 1]; | 54 | char tmp[TASK_COMM_LEN + 1]; |
54 | #endif | 55 | #endif |
55 | SEQ_printf(m, " #%d: ", idx); | 56 | SEQ_printf(m, " #%d: ", idx); |
56 | print_name_offset(m, timer); | 57 | print_name_offset(m, taddr); |
57 | SEQ_printf(m, ", "); | 58 | SEQ_printf(m, ", "); |
58 | print_name_offset(m, timer->function); | 59 | print_name_offset(m, timer->function); |
59 | SEQ_printf(m, ", S:%02lx", timer->state); | 60 | SEQ_printf(m, ", S:%02lx", timer->state); |
@@ -99,7 +100,7 @@ next_one: | |||
99 | tmp = *timer; | 100 | tmp = *timer; |
100 | spin_unlock_irqrestore(&base->cpu_base->lock, flags); | 101 | spin_unlock_irqrestore(&base->cpu_base->lock, flags); |
101 | 102 | ||
102 | print_timer(m, &tmp, i, now); | 103 | print_timer(m, timer, &tmp, i, now); |
103 | next++; | 104 | next++; |
104 | goto next_one; | 105 | goto next_one; |
105 | } | 106 | } |
@@ -109,6 +110,7 @@ next_one: | |||
109 | static void | 110 | static void |
110 | print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now) | 111 | print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now) |
111 | { | 112 | { |
113 | SEQ_printf(m, " .base: %p\n", base); | ||
112 | SEQ_printf(m, " .index: %d\n", | 114 | SEQ_printf(m, " .index: %d\n", |
113 | base->index); | 115 | base->index); |
114 | SEQ_printf(m, " .resolution: %Lu nsecs\n", | 116 | SEQ_printf(m, " .resolution: %Lu nsecs\n", |
@@ -183,12 +185,16 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) | |||
183 | 185 | ||
184 | #ifdef CONFIG_GENERIC_CLOCKEVENTS | 186 | #ifdef CONFIG_GENERIC_CLOCKEVENTS |
185 | static void | 187 | static void |
186 | print_tickdevice(struct seq_file *m, struct tick_device *td) | 188 | print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) |
187 | { | 189 | { |
188 | struct clock_event_device *dev = td->evtdev; | 190 | struct clock_event_device *dev = td->evtdev; |
189 | 191 | ||
190 | SEQ_printf(m, "\n"); | 192 | SEQ_printf(m, "\n"); |
191 | SEQ_printf(m, "Tick Device: mode: %d\n", td->mode); | 193 | SEQ_printf(m, "Tick Device: mode: %d\n", td->mode); |
194 | if (cpu < 0) | ||
195 | SEQ_printf(m, "Broadcast device\n"); | ||
196 | else | ||
197 | SEQ_printf(m, "Per CPU device: %d\n", cpu); | ||
192 | 198 | ||
193 | SEQ_printf(m, "Clock Event Device: "); | 199 | SEQ_printf(m, "Clock Event Device: "); |
194 | if (!dev) { | 200 | if (!dev) { |
@@ -222,7 +228,7 @@ static void timer_list_show_tickdevices(struct seq_file *m) | |||
222 | int cpu; | 228 | int cpu; |
223 | 229 | ||
224 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST | 230 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST |
225 | print_tickdevice(m, tick_get_broadcast_device()); | 231 | print_tickdevice(m, tick_get_broadcast_device(), -1); |
226 | SEQ_printf(m, "tick_broadcast_mask: %08lx\n", | 232 | SEQ_printf(m, "tick_broadcast_mask: %08lx\n", |
227 | tick_get_broadcast_mask()->bits[0]); | 233 | tick_get_broadcast_mask()->bits[0]); |
228 | #ifdef CONFIG_TICK_ONESHOT | 234 | #ifdef CONFIG_TICK_ONESHOT |
@@ -232,7 +238,7 @@ static void timer_list_show_tickdevices(struct seq_file *m) | |||
232 | SEQ_printf(m, "\n"); | 238 | SEQ_printf(m, "\n"); |
233 | #endif | 239 | #endif |
234 | for_each_online_cpu(cpu) | 240 | for_each_online_cpu(cpu) |
235 | print_tickdevice(m, tick_get_device(cpu)); | 241 | print_tickdevice(m, tick_get_device(cpu), cpu); |
236 | SEQ_printf(m, "\n"); | 242 | SEQ_printf(m, "\n"); |
237 | } | 243 | } |
238 | #else | 244 | #else |
@@ -244,7 +250,7 @@ static int timer_list_show(struct seq_file *m, void *v) | |||
244 | u64 now = ktime_to_ns(ktime_get()); | 250 | u64 now = ktime_to_ns(ktime_get()); |
245 | int cpu; | 251 | int cpu; |
246 | 252 | ||
247 | SEQ_printf(m, "Timer List Version: v0.3\n"); | 253 | SEQ_printf(m, "Timer List Version: v0.4\n"); |
248 | SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); | 254 | SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); |
249 | SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); | 255 | SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); |
250 | 256 | ||
diff --git a/kernel/timer.c b/kernel/timer.c index 510fe69351ca..56becf373c58 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -1436,9 +1436,11 @@ static void __cpuinit migrate_timers(int cpu) | |||
1436 | BUG_ON(cpu_online(cpu)); | 1436 | BUG_ON(cpu_online(cpu)); |
1437 | old_base = per_cpu(tvec_bases, cpu); | 1437 | old_base = per_cpu(tvec_bases, cpu); |
1438 | new_base = get_cpu_var(tvec_bases); | 1438 | new_base = get_cpu_var(tvec_bases); |
1439 | 1439 | /* | |
1440 | local_irq_disable(); | 1440 | * The caller is globally serialized and nobody else |
1441 | spin_lock(&new_base->lock); | 1441 | * takes two locks at once, deadlock is not possible. |
1442 | */ | ||
1443 | spin_lock_irq(&new_base->lock); | ||
1442 | spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); | 1444 | spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); |
1443 | 1445 | ||
1444 | BUG_ON(old_base->running_timer); | 1446 | BUG_ON(old_base->running_timer); |
@@ -1453,8 +1455,7 @@ static void __cpuinit migrate_timers(int cpu) | |||
1453 | } | 1455 | } |
1454 | 1456 | ||
1455 | spin_unlock(&old_base->lock); | 1457 | spin_unlock(&old_base->lock); |
1456 | spin_unlock(&new_base->lock); | 1458 | spin_unlock_irq(&new_base->lock); |
1457 | local_irq_enable(); | ||
1458 | put_cpu_var(tvec_bases); | 1459 | put_cpu_var(tvec_bases); |
1459 | } | 1460 | } |
1460 | #endif /* CONFIG_HOTPLUG_CPU */ | 1461 | #endif /* CONFIG_HOTPLUG_CPU */ |
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 576e51199079..3e3fde7c1d2b 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c | |||
@@ -75,6 +75,7 @@ | |||
75 | #include <linux/string.h> | 75 | #include <linux/string.h> |
76 | #include <linux/selinux.h> | 76 | #include <linux/selinux.h> |
77 | #include <linux/mutex.h> | 77 | #include <linux/mutex.h> |
78 | #include <linux/posix-timers.h> | ||
78 | 79 | ||
79 | #include "avc.h" | 80 | #include "avc.h" |
80 | #include "objsec.h" | 81 | #include "objsec.h" |
@@ -2322,13 +2323,7 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm) | |||
2322 | initrlim = init_task.signal->rlim+i; | 2323 | initrlim = init_task.signal->rlim+i; |
2323 | rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur); | 2324 | rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur); |
2324 | } | 2325 | } |
2325 | if (current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { | 2326 | update_rlimit_cpu(rlim->rlim_cur); |
2326 | /* | ||
2327 | * This will cause RLIMIT_CPU calculations | ||
2328 | * to be refigured. | ||
2329 | */ | ||
2330 | current->it_prof_expires = jiffies_to_cputime(1); | ||
2331 | } | ||
2332 | } | 2327 | } |
2333 | 2328 | ||
2334 | /* Wake up the parent if it is waiting so that it can | 2329 | /* Wake up the parent if it is waiting so that it can |