aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel/time.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel/time.c')
-rw-r--r--arch/powerpc/kernel/time.c350
1 files changed, 183 insertions, 167 deletions
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 8533b3b83f5d..f33acfd872ad 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -53,7 +53,7 @@
53#include <linux/posix-timers.h> 53#include <linux/posix-timers.h>
54#include <linux/irq.h> 54#include <linux/irq.h>
55#include <linux/delay.h> 55#include <linux/delay.h>
56#include <linux/perf_event.h> 56#include <linux/irq_work.h>
57#include <asm/trace.h> 57#include <asm/trace.h>
58 58
59#include <asm/io.h> 59#include <asm/io.h>
@@ -155,16 +155,15 @@ EXPORT_SYMBOL_GPL(rtc_lock);
155 155
156static u64 tb_to_ns_scale __read_mostly; 156static u64 tb_to_ns_scale __read_mostly;
157static unsigned tb_to_ns_shift __read_mostly; 157static unsigned tb_to_ns_shift __read_mostly;
158static unsigned long boot_tb __read_mostly; 158static u64 boot_tb __read_mostly;
159 159
160extern struct timezone sys_tz; 160extern struct timezone sys_tz;
161static long timezone_offset; 161static long timezone_offset;
162 162
163unsigned long ppc_proc_freq; 163unsigned long ppc_proc_freq;
164EXPORT_SYMBOL(ppc_proc_freq); 164EXPORT_SYMBOL_GPL(ppc_proc_freq);
165unsigned long ppc_tb_freq; 165unsigned long ppc_tb_freq;
166 166EXPORT_SYMBOL_GPL(ppc_tb_freq);
167static DEFINE_PER_CPU(u64, last_jiffy);
168 167
169#ifdef CONFIG_VIRT_CPU_ACCOUNTING 168#ifdef CONFIG_VIRT_CPU_ACCOUNTING
170/* 169/*
@@ -185,6 +184,8 @@ DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta);
185 184
186cputime_t cputime_one_jiffy; 185cputime_t cputime_one_jiffy;
187 186
187void (*dtl_consumer)(struct dtl_entry *, u64);
188
188static void calc_cputime_factors(void) 189static void calc_cputime_factors(void)
189{ 190{
190 struct div_result res; 191 struct div_result res;
@@ -200,62 +201,171 @@ static void calc_cputime_factors(void)
200} 201}
201 202
202/* 203/*
203 * Read the PURR on systems that have it, otherwise the timebase. 204 * Read the SPURR on systems that have it, otherwise the PURR,
205 * or if that doesn't exist return the timebase value passed in.
204 */ 206 */
205static u64 read_purr(void) 207static u64 read_spurr(u64 tb)
206{ 208{
209 if (cpu_has_feature(CPU_FTR_SPURR))
210 return mfspr(SPRN_SPURR);
207 if (cpu_has_feature(CPU_FTR_PURR)) 211 if (cpu_has_feature(CPU_FTR_PURR))
208 return mfspr(SPRN_PURR); 212 return mfspr(SPRN_PURR);
209 return mftb(); 213 return tb;
210} 214}
211 215
216#ifdef CONFIG_PPC_SPLPAR
217
212/* 218/*
213 * Read the SPURR on systems that have it, otherwise the purr 219 * Scan the dispatch trace log and count up the stolen time.
220 * Should be called with interrupts disabled.
214 */ 221 */
215static u64 read_spurr(u64 purr) 222static u64 scan_dispatch_log(u64 stop_tb)
216{ 223{
217 /* 224 u64 i = local_paca->dtl_ridx;
218 * cpus without PURR won't have a SPURR 225 struct dtl_entry *dtl = local_paca->dtl_curr;
219 * We already know the former when we use this, so tell gcc 226 struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
227 struct lppaca *vpa = local_paca->lppaca_ptr;
228 u64 tb_delta;
229 u64 stolen = 0;
230 u64 dtb;
231
232 if (!dtl)
233 return 0;
234
235 if (i == vpa->dtl_idx)
236 return 0;
237 while (i < vpa->dtl_idx) {
238 if (dtl_consumer)
239 dtl_consumer(dtl, i);
240 dtb = dtl->timebase;
241 tb_delta = dtl->enqueue_to_dispatch_time +
242 dtl->ready_to_enqueue_time;
243 barrier();
244 if (i + N_DISPATCH_LOG < vpa->dtl_idx) {
245 /* buffer has overflowed */
246 i = vpa->dtl_idx - N_DISPATCH_LOG;
247 dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
248 continue;
249 }
250 if (dtb > stop_tb)
251 break;
252 stolen += tb_delta;
253 ++i;
254 ++dtl;
255 if (dtl == dtl_end)
256 dtl = local_paca->dispatch_log;
257 }
258 local_paca->dtl_ridx = i;
259 local_paca->dtl_curr = dtl;
260 return stolen;
261}
262
263/*
264 * Accumulate stolen time by scanning the dispatch trace log.
265 * Called on entry from user mode.
266 */
267void accumulate_stolen_time(void)
268{
269 u64 sst, ust;
270
271 u8 save_soft_enabled = local_paca->soft_enabled;
272 u8 save_hard_enabled = local_paca->hard_enabled;
273
274 /* We are called early in the exception entry, before
275 * soft/hard_enabled are sync'ed to the expected state
276 * for the exception. We are hard disabled but the PACA
277 * needs to reflect that so various debug stuff doesn't
278 * complain
220 */ 279 */
221 if (cpu_has_feature(CPU_FTR_PURR) && cpu_has_feature(CPU_FTR_SPURR)) 280 local_paca->soft_enabled = 0;
222 return mfspr(SPRN_SPURR); 281 local_paca->hard_enabled = 0;
223 return purr; 282
283 sst = scan_dispatch_log(local_paca->starttime_user);
284 ust = scan_dispatch_log(local_paca->starttime);
285 local_paca->system_time -= sst;
286 local_paca->user_time -= ust;
287 local_paca->stolen_time += ust + sst;
288
289 local_paca->soft_enabled = save_soft_enabled;
290 local_paca->hard_enabled = save_hard_enabled;
291}
292
293static inline u64 calculate_stolen_time(u64 stop_tb)
294{
295 u64 stolen = 0;
296
297 if (get_paca()->dtl_ridx != get_paca()->lppaca_ptr->dtl_idx) {
298 stolen = scan_dispatch_log(stop_tb);
299 get_paca()->system_time -= stolen;
300 }
301
302 stolen += get_paca()->stolen_time;
303 get_paca()->stolen_time = 0;
304 return stolen;
224} 305}
225 306
307#else /* CONFIG_PPC_SPLPAR */
308static inline u64 calculate_stolen_time(u64 stop_tb)
309{
310 return 0;
311}
312
313#endif /* CONFIG_PPC_SPLPAR */
314
226/* 315/*
227 * Account time for a transition between system, hard irq 316 * Account time for a transition between system, hard irq
228 * or soft irq state. 317 * or soft irq state.
229 */ 318 */
230void account_system_vtime(struct task_struct *tsk) 319void account_system_vtime(struct task_struct *tsk)
231{ 320{
232 u64 now, nowscaled, delta, deltascaled, sys_time; 321 u64 now, nowscaled, delta, deltascaled;
233 unsigned long flags; 322 unsigned long flags;
323 u64 stolen, udelta, sys_scaled, user_scaled;
234 324
235 local_irq_save(flags); 325 local_irq_save(flags);
236 now = read_purr(); 326 now = mftb();
237 nowscaled = read_spurr(now); 327 nowscaled = read_spurr(now);
238 delta = now - get_paca()->startpurr; 328 get_paca()->system_time += now - get_paca()->starttime;
329 get_paca()->starttime = now;
239 deltascaled = nowscaled - get_paca()->startspurr; 330 deltascaled = nowscaled - get_paca()->startspurr;
240 get_paca()->startpurr = now;
241 get_paca()->startspurr = nowscaled; 331 get_paca()->startspurr = nowscaled;
242 if (!in_interrupt()) { 332
243 /* deltascaled includes both user and system time. 333 stolen = calculate_stolen_time(now);
244 * Hence scale it based on the purr ratio to estimate 334
245 * the system time */ 335 delta = get_paca()->system_time;
246 sys_time = get_paca()->system_time; 336 get_paca()->system_time = 0;
247 if (get_paca()->user_time) 337 udelta = get_paca()->user_time - get_paca()->utime_sspurr;
248 deltascaled = deltascaled * sys_time / 338 get_paca()->utime_sspurr = get_paca()->user_time;
249 (sys_time + get_paca()->user_time); 339
250 delta += sys_time; 340 /*
251 get_paca()->system_time = 0; 341 * Because we don't read the SPURR on every kernel entry/exit,
342 * deltascaled includes both user and system SPURR ticks.
343 * Apportion these ticks to system SPURR ticks and user
344 * SPURR ticks in the same ratio as the system time (delta)
345 * and user time (udelta) values obtained from the timebase
346 * over the same interval. The system ticks get accounted here;
347 * the user ticks get saved up in paca->user_time_scaled to be
348 * used by account_process_tick.
349 */
350 sys_scaled = delta;
351 user_scaled = udelta;
352 if (deltascaled != delta + udelta) {
353 if (udelta) {
354 sys_scaled = deltascaled * delta / (delta + udelta);
355 user_scaled = deltascaled - sys_scaled;
356 } else {
357 sys_scaled = deltascaled;
358 }
359 }
360 get_paca()->user_time_scaled += user_scaled;
361
362 if (in_interrupt() || idle_task(smp_processor_id()) != tsk) {
363 account_system_time(tsk, 0, delta, sys_scaled);
364 if (stolen)
365 account_steal_time(stolen);
366 } else {
367 account_idle_time(delta + stolen);
252 } 368 }
253 if (in_irq() || idle_task(smp_processor_id()) != tsk)
254 account_system_time(tsk, 0, delta, deltascaled);
255 else
256 account_idle_time(delta);
257 __get_cpu_var(cputime_last_delta) = delta;
258 __get_cpu_var(cputime_scaled_last_delta) = deltascaled;
259 local_irq_restore(flags); 369 local_irq_restore(flags);
260} 370}
261EXPORT_SYMBOL_GPL(account_system_vtime); 371EXPORT_SYMBOL_GPL(account_system_vtime);
@@ -265,125 +375,26 @@ EXPORT_SYMBOL_GPL(account_system_vtime);
265 * by the exception entry and exit code to the generic process 375 * by the exception entry and exit code to the generic process
266 * user and system time records. 376 * user and system time records.
267 * Must be called with interrupts disabled. 377 * Must be called with interrupts disabled.
378 * Assumes that account_system_vtime() has been called recently
379 * (i.e. since the last entry from usermode) so that
380 * get_paca()->user_time_scaled is up to date.
268 */ 381 */
269void account_process_tick(struct task_struct *tsk, int user_tick) 382void account_process_tick(struct task_struct *tsk, int user_tick)
270{ 383{
271 cputime_t utime, utimescaled; 384 cputime_t utime, utimescaled;
272 385
273 utime = get_paca()->user_time; 386 utime = get_paca()->user_time;
387 utimescaled = get_paca()->user_time_scaled;
274 get_paca()->user_time = 0; 388 get_paca()->user_time = 0;
275 utimescaled = cputime_to_scaled(utime); 389 get_paca()->user_time_scaled = 0;
390 get_paca()->utime_sspurr = 0;
276 account_user_time(tsk, utime, utimescaled); 391 account_user_time(tsk, utime, utimescaled);
277} 392}
278 393
279/*
280 * Stuff for accounting stolen time.
281 */
282struct cpu_purr_data {
283 int initialized; /* thread is running */
284 u64 tb; /* last TB value read */
285 u64 purr; /* last PURR value read */
286 u64 spurr; /* last SPURR value read */
287};
288
289/*
290 * Each entry in the cpu_purr_data array is manipulated only by its
291 * "owner" cpu -- usually in the timer interrupt but also occasionally
292 * in process context for cpu online. As long as cpus do not touch
293 * each others' cpu_purr_data, disabling local interrupts is
294 * sufficient to serialize accesses.
295 */
296static DEFINE_PER_CPU(struct cpu_purr_data, cpu_purr_data);
297
298static void snapshot_tb_and_purr(void *data)
299{
300 unsigned long flags;
301 struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data);
302
303 local_irq_save(flags);
304 p->tb = get_tb_or_rtc();
305 p->purr = mfspr(SPRN_PURR);
306 wmb();
307 p->initialized = 1;
308 local_irq_restore(flags);
309}
310
311/*
312 * Called during boot when all cpus have come up.
313 */
314void snapshot_timebases(void)
315{
316 if (!cpu_has_feature(CPU_FTR_PURR))
317 return;
318 on_each_cpu(snapshot_tb_and_purr, NULL, 1);
319}
320
321/*
322 * Must be called with interrupts disabled.
323 */
324void calculate_steal_time(void)
325{
326 u64 tb, purr;
327 s64 stolen;
328 struct cpu_purr_data *pme;
329
330 pme = &__get_cpu_var(cpu_purr_data);
331 if (!pme->initialized)
332 return; /* !CPU_FTR_PURR or early in early boot */
333 tb = mftb();
334 purr = mfspr(SPRN_PURR);
335 stolen = (tb - pme->tb) - (purr - pme->purr);
336 if (stolen > 0) {
337 if (idle_task(smp_processor_id()) != current)
338 account_steal_time(stolen);
339 else
340 account_idle_time(stolen);
341 }
342 pme->tb = tb;
343 pme->purr = purr;
344}
345
346#ifdef CONFIG_PPC_SPLPAR
347/*
348 * Must be called before the cpu is added to the online map when
349 * a cpu is being brought up at runtime.
350 */
351static void snapshot_purr(void)
352{
353 struct cpu_purr_data *pme;
354 unsigned long flags;
355
356 if (!cpu_has_feature(CPU_FTR_PURR))
357 return;
358 local_irq_save(flags);
359 pme = &__get_cpu_var(cpu_purr_data);
360 pme->tb = mftb();
361 pme->purr = mfspr(SPRN_PURR);
362 pme->initialized = 1;
363 local_irq_restore(flags);
364}
365
366#endif /* CONFIG_PPC_SPLPAR */
367
368#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ 394#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
369#define calc_cputime_factors() 395#define calc_cputime_factors()
370#define calculate_steal_time() do { } while (0)
371#endif
372
373#if !(defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR))
374#define snapshot_purr() do { } while (0)
375#endif 396#endif
376 397
377/*
378 * Called when a cpu comes up after the system has finished booting,
379 * i.e. as a result of a hotplug cpu action.
380 */
381void snapshot_timebase(void)
382{
383 __get_cpu_var(last_jiffy) = get_tb_or_rtc();
384 snapshot_purr();
385}
386
387void __delay(unsigned long loops) 398void __delay(unsigned long loops)
388{ 399{
389 unsigned long start; 400 unsigned long start;
@@ -493,60 +504,60 @@ void __init iSeries_time_init_early(void)
493} 504}
494#endif /* CONFIG_PPC_ISERIES */ 505#endif /* CONFIG_PPC_ISERIES */
495 506
496#ifdef CONFIG_PERF_EVENTS 507#ifdef CONFIG_IRQ_WORK
497 508
498/* 509/*
499 * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... 510 * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
500 */ 511 */
501#ifdef CONFIG_PPC64 512#ifdef CONFIG_PPC64
502static inline unsigned long test_perf_event_pending(void) 513static inline unsigned long test_irq_work_pending(void)
503{ 514{
504 unsigned long x; 515 unsigned long x;
505 516
506 asm volatile("lbz %0,%1(13)" 517 asm volatile("lbz %0,%1(13)"
507 : "=r" (x) 518 : "=r" (x)
508 : "i" (offsetof(struct paca_struct, perf_event_pending))); 519 : "i" (offsetof(struct paca_struct, irq_work_pending)));
509 return x; 520 return x;
510} 521}
511 522
512static inline void set_perf_event_pending_flag(void) 523static inline void set_irq_work_pending_flag(void)
513{ 524{
514 asm volatile("stb %0,%1(13)" : : 525 asm volatile("stb %0,%1(13)" : :
515 "r" (1), 526 "r" (1),
516 "i" (offsetof(struct paca_struct, perf_event_pending))); 527 "i" (offsetof(struct paca_struct, irq_work_pending)));
517} 528}
518 529
519static inline void clear_perf_event_pending(void) 530static inline void clear_irq_work_pending(void)
520{ 531{
521 asm volatile("stb %0,%1(13)" : : 532 asm volatile("stb %0,%1(13)" : :
522 "r" (0), 533 "r" (0),
523 "i" (offsetof(struct paca_struct, perf_event_pending))); 534 "i" (offsetof(struct paca_struct, irq_work_pending)));
524} 535}
525 536
526#else /* 32-bit */ 537#else /* 32-bit */
527 538
528DEFINE_PER_CPU(u8, perf_event_pending); 539DEFINE_PER_CPU(u8, irq_work_pending);
529 540
530#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 541#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1
531#define test_perf_event_pending() __get_cpu_var(perf_event_pending) 542#define test_irq_work_pending() __get_cpu_var(irq_work_pending)
532#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 543#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0
533 544
534#endif /* 32 vs 64 bit */ 545#endif /* 32 vs 64 bit */
535 546
536void set_perf_event_pending(void) 547void set_irq_work_pending(void)
537{ 548{
538 preempt_disable(); 549 preempt_disable();
539 set_perf_event_pending_flag(); 550 set_irq_work_pending_flag();
540 set_dec(1); 551 set_dec(1);
541 preempt_enable(); 552 preempt_enable();
542} 553}
543 554
544#else /* CONFIG_PERF_EVENTS */ 555#else /* CONFIG_IRQ_WORK */
545 556
546#define test_perf_event_pending() 0 557#define test_irq_work_pending() 0
547#define clear_perf_event_pending() 558#define clear_irq_work_pending()
548 559
549#endif /* CONFIG_PERF_EVENTS */ 560#endif /* CONFIG_IRQ_WORK */
550 561
551/* 562/*
552 * For iSeries shared processors, we have to let the hypervisor 563 * For iSeries shared processors, we have to let the hypervisor
@@ -569,14 +580,21 @@ void timer_interrupt(struct pt_regs * regs)
569 struct clock_event_device *evt = &decrementer->event; 580 struct clock_event_device *evt = &decrementer->event;
570 u64 now; 581 u64 now;
571 582
583 /* Ensure a positive value is written to the decrementer, or else
584 * some CPUs will continue to take decrementer exceptions.
585 */
586 set_dec(DECREMENTER_MAX);
587
588 /* Some implementations of hotplug will get timer interrupts while
589 * offline, just ignore these
590 */
591 if (!cpu_online(smp_processor_id()))
592 return;
593
572 trace_timer_interrupt_entry(regs); 594 trace_timer_interrupt_entry(regs);
573 595
574 __get_cpu_var(irq_stat).timer_irqs++; 596 __get_cpu_var(irq_stat).timer_irqs++;
575 597
576 /* Ensure a positive value is written to the decrementer, or else
577 * some CPUs will continuue to take decrementer exceptions */
578 set_dec(DECREMENTER_MAX);
579
580#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC) 598#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC)
581 if (atomic_read(&ppc_n_lost_interrupts) != 0) 599 if (atomic_read(&ppc_n_lost_interrupts) != 0)
582 do_IRQ(regs); 600 do_IRQ(regs);
@@ -585,11 +603,9 @@ void timer_interrupt(struct pt_regs * regs)
585 old_regs = set_irq_regs(regs); 603 old_regs = set_irq_regs(regs);
586 irq_enter(); 604 irq_enter();
587 605
588 calculate_steal_time(); 606 if (test_irq_work_pending()) {
589 607 clear_irq_work_pending();
590 if (test_perf_event_pending()) { 608 irq_work_run();
591 clear_perf_event_pending();
592 perf_event_do_pending();
593 } 609 }
594 610
595#ifdef CONFIG_PPC_ISERIES 611#ifdef CONFIG_PPC_ISERIES