aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/include/asm/exception-64s.h3
-rw-r--r--arch/powerpc/include/asm/lppaca.h19
-rw-r--r--arch/powerpc/include/asm/paca.h10
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h50
-rw-r--r--arch/powerpc/include/asm/time.h5
-rw-r--r--arch/powerpc/kernel/asm-offsets.c8
-rw-r--r--arch/powerpc/kernel/entry_64.S18
-rw-r--r--arch/powerpc/kernel/process.c1
-rw-r--r--arch/powerpc/kernel/smp.c5
-rw-r--r--arch/powerpc/kernel/time.c268
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c24
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c21
-rw-r--r--arch/powerpc/platforms/pseries/setup.c52
13 files changed, 290 insertions, 194 deletions
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 57c400071995..7778d6f0c878 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -137,7 +137,8 @@
137 li r10,0; \ 137 li r10,0; \
138 ld r11,exception_marker@toc(r2); \ 138 ld r11,exception_marker@toc(r2); \
139 std r10,RESULT(r1); /* clear regs->result */ \ 139 std r10,RESULT(r1); /* clear regs->result */ \
140 std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ 140 std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \
141 ACCOUNT_STOLEN_TIME
141 142
142/* 143/*
143 * Exception vectors. 144 * Exception vectors.
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 6d02624b622c..cfb85ec85750 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -172,6 +172,25 @@ struct slb_shadow {
172 172
173extern struct slb_shadow slb_shadow[]; 173extern struct slb_shadow slb_shadow[];
174 174
175/*
176 * Layout of entries in the hypervisor's dispatch trace log buffer.
177 */
178struct dtl_entry {
179 u8 dispatch_reason;
180 u8 preempt_reason;
181 u16 processor_id;
182 u32 enqueue_to_dispatch_time;
183 u32 ready_to_enqueue_time;
184 u32 waiting_to_ready_time;
185 u64 timebase;
186 u64 fault_addr;
187 u64 srr0;
188 u64 srr1;
189};
190
191#define DISPATCH_LOG_BYTES 4096 /* bytes per cpu */
192#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
193
175#endif /* CONFIG_PPC_BOOK3S */ 194#endif /* CONFIG_PPC_BOOK3S */
176#endif /* __KERNEL__ */ 195#endif /* __KERNEL__ */
177#endif /* _ASM_POWERPC_LPPACA_H */ 196#endif /* _ASM_POWERPC_LPPACA_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 1ff6662f7faf..6af6c1613409 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -85,6 +85,8 @@ struct paca_struct {
85 u8 kexec_state; /* set when kexec down has irqs off */ 85 u8 kexec_state; /* set when kexec down has irqs off */
86#ifdef CONFIG_PPC_STD_MMU_64 86#ifdef CONFIG_PPC_STD_MMU_64
87 struct slb_shadow *slb_shadow_ptr; 87 struct slb_shadow *slb_shadow_ptr;
88 struct dtl_entry *dispatch_log;
89 struct dtl_entry *dispatch_log_end;
88 90
89 /* 91 /*
90 * Now, starting in cacheline 2, the exception save areas 92 * Now, starting in cacheline 2, the exception save areas
@@ -134,8 +136,14 @@ struct paca_struct {
134 /* Stuff for accurate time accounting */ 136 /* Stuff for accurate time accounting */
135 u64 user_time; /* accumulated usermode TB ticks */ 137 u64 user_time; /* accumulated usermode TB ticks */
136 u64 system_time; /* accumulated system TB ticks */ 138 u64 system_time; /* accumulated system TB ticks */
137 u64 startpurr; /* PURR/TB value snapshot */ 139 u64 user_time_scaled; /* accumulated usermode SPURR ticks */
140 u64 starttime; /* TB value snapshot */
141 u64 starttime_user; /* TB value on exit to usermode */
138 u64 startspurr; /* SPURR value snapshot */ 142 u64 startspurr; /* SPURR value snapshot */
143 u64 utime_sspurr; /* ->user_time when ->startspurr set */
144 u64 stolen_time; /* TB ticks taken by hypervisor */
145 u64 dtl_ridx; /* read index in dispatch log */
146 struct dtl_entry *dtl_curr; /* pointer corresponding to dtl_ridx */
139 147
140#ifdef CONFIG_KVM_BOOK3S_HANDLER 148#ifdef CONFIG_KVM_BOOK3S_HANDLER
141 /* We use this to store guest state in */ 149 /* We use this to store guest state in */
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 498fe09263d3..98210067c1cc 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -9,6 +9,7 @@
9#include <asm/asm-compat.h> 9#include <asm/asm-compat.h>
10#include <asm/processor.h> 10#include <asm/processor.h>
11#include <asm/ppc-opcode.h> 11#include <asm/ppc-opcode.h>
12#include <asm/firmware.h>
12 13
13#ifndef __ASSEMBLY__ 14#ifndef __ASSEMBLY__
14#error __FILE__ should only be used in assembler files 15#error __FILE__ should only be used in assembler files
@@ -26,17 +27,13 @@
26#ifndef CONFIG_VIRT_CPU_ACCOUNTING 27#ifndef CONFIG_VIRT_CPU_ACCOUNTING
27#define ACCOUNT_CPU_USER_ENTRY(ra, rb) 28#define ACCOUNT_CPU_USER_ENTRY(ra, rb)
28#define ACCOUNT_CPU_USER_EXIT(ra, rb) 29#define ACCOUNT_CPU_USER_EXIT(ra, rb)
30#define ACCOUNT_STOLEN_TIME
29#else 31#else
30#define ACCOUNT_CPU_USER_ENTRY(ra, rb) \ 32#define ACCOUNT_CPU_USER_ENTRY(ra, rb) \
31 beq 2f; /* if from kernel mode */ \ 33 beq 2f; /* if from kernel mode */ \
32BEGIN_FTR_SECTION; \ 34 MFTB(ra); /* get timebase */ \
33 mfspr ra,SPRN_PURR; /* get processor util. reg */ \ 35 ld rb,PACA_STARTTIME_USER(r13); \
34END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ 36 std ra,PACA_STARTTIME(r13); \
35BEGIN_FTR_SECTION; \
36 MFTB(ra); /* or get TB if no PURR */ \
37END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \
38 ld rb,PACA_STARTPURR(r13); \
39 std ra,PACA_STARTPURR(r13); \
40 subf rb,rb,ra; /* subtract start value */ \ 37 subf rb,rb,ra; /* subtract start value */ \
41 ld ra,PACA_USER_TIME(r13); \ 38 ld ra,PACA_USER_TIME(r13); \
42 add ra,ra,rb; /* add on to user time */ \ 39 add ra,ra,rb; /* add on to user time */ \
@@ -44,19 +41,34 @@ END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \
442: 412:
45 42
46#define ACCOUNT_CPU_USER_EXIT(ra, rb) \ 43#define ACCOUNT_CPU_USER_EXIT(ra, rb) \
47BEGIN_FTR_SECTION; \ 44 MFTB(ra); /* get timebase */ \
48 mfspr ra,SPRN_PURR; /* get processor util. reg */ \ 45 ld rb,PACA_STARTTIME(r13); \
49END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ 46 std ra,PACA_STARTTIME_USER(r13); \
50BEGIN_FTR_SECTION; \
51 MFTB(ra); /* or get TB if no PURR */ \
52END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \
53 ld rb,PACA_STARTPURR(r13); \
54 std ra,PACA_STARTPURR(r13); \
55 subf rb,rb,ra; /* subtract start value */ \ 47 subf rb,rb,ra; /* subtract start value */ \
56 ld ra,PACA_SYSTEM_TIME(r13); \ 48 ld ra,PACA_SYSTEM_TIME(r13); \
57 add ra,ra,rb; /* add on to user time */ \ 49 add ra,ra,rb; /* add on to system time */ \
58 std ra,PACA_SYSTEM_TIME(r13); 50 std ra,PACA_SYSTEM_TIME(r13)
59#endif 51
52#ifdef CONFIG_PPC_SPLPAR
53#define ACCOUNT_STOLEN_TIME \
54BEGIN_FW_FTR_SECTION; \
55 beq 33f; \
56 /* from user - see if there are any DTL entries to process */ \
57 ld r10,PACALPPACAPTR(r13); /* get ptr to VPA */ \
58 ld r11,PACA_DTL_RIDX(r13); /* get log read index */ \
59 ld r10,LPPACA_DTLIDX(r10); /* get log write index */ \
60 cmpd cr1,r11,r10; \
61 beq+ cr1,33f; \
62 bl .accumulate_stolen_time; \
6333: \
64END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
65
66#else /* CONFIG_PPC_SPLPAR */
67#define ACCOUNT_STOLEN_TIME
68
69#endif /* CONFIG_PPC_SPLPAR */
70
71#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
60 72
61/* 73/*
62 * Macros for storing registers into and loading registers from 74 * Macros for storing registers into and loading registers from
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index dc779dfcf258..fe6f7c2c9c68 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -34,7 +34,6 @@ extern void to_tm(int tim, struct rtc_time * tm);
34extern void GregorianDay(struct rtc_time *tm); 34extern void GregorianDay(struct rtc_time *tm);
35 35
36extern void generic_calibrate_decr(void); 36extern void generic_calibrate_decr(void);
37extern void snapshot_timebase(void);
38 37
39extern void set_dec_cpu6(unsigned int val); 38extern void set_dec_cpu6(unsigned int val);
40 39
@@ -212,12 +211,8 @@ struct cpu_usage {
212DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array); 211DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array);
213 212
214#if defined(CONFIG_VIRT_CPU_ACCOUNTING) 213#if defined(CONFIG_VIRT_CPU_ACCOUNTING)
215extern void calculate_steal_time(void);
216extern void snapshot_timebases(void);
217#define account_process_vtime(tsk) account_process_tick(tsk, 0) 214#define account_process_vtime(tsk) account_process_tick(tsk, 0)
218#else 215#else
219#define calculate_steal_time() do { } while (0)
220#define snapshot_timebases() do { } while (0)
221#define account_process_vtime(tsk) do { } while (0) 216#define account_process_vtime(tsk) do { } while (0)
222#endif 217#endif
223 218
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 1c0607ddccc0..c63494090854 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -181,17 +181,19 @@ int main(void)
181 offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid)); 181 offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid));
182 DEFINE(SLBSHADOW_STACKESID, 182 DEFINE(SLBSHADOW_STACKESID,
183 offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid)); 183 offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid));
184 DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area));
184 DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0)); 185 DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
185 DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1)); 186 DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
186 DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int)); 187 DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int));
187 DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int)); 188 DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int));
188 DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area)); 189 DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx));
190 DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx));
189#endif /* CONFIG_PPC_STD_MMU_64 */ 191#endif /* CONFIG_PPC_STD_MMU_64 */
190 DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); 192 DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
191 DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); 193 DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
192 DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); 194 DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
193 DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr)); 195 DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
194 DEFINE(PACA_STARTSPURR, offsetof(struct paca_struct, startspurr)); 196 DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user));
195 DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); 197 DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
196 DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); 198 DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
197 DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); 199 DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 4d5fa12ca6e8..d82878c4daa6 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -97,6 +97,24 @@ system_call_common:
97 addi r9,r1,STACK_FRAME_OVERHEAD 97 addi r9,r1,STACK_FRAME_OVERHEAD
98 ld r11,exception_marker@toc(r2) 98 ld r11,exception_marker@toc(r2)
99 std r11,-16(r9) /* "regshere" marker */ 99 std r11,-16(r9) /* "regshere" marker */
100#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)
101BEGIN_FW_FTR_SECTION
102 beq 33f
103 /* if from user, see if there are any DTL entries to process */
104 ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */
105 ld r11,PACA_DTL_RIDX(r13) /* get log read index */
106 ld r10,LPPACA_DTLIDX(r10) /* get log write index */
107 cmpd cr1,r11,r10
108 beq+ cr1,33f
109 bl .accumulate_stolen_time
110 REST_GPR(0,r1)
111 REST_4GPRS(3,r1)
112 REST_2GPRS(7,r1)
113 addi r9,r1,STACK_FRAME_OVERHEAD
11433:
115END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
116#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */
117
100#ifdef CONFIG_TRACE_IRQFLAGS 118#ifdef CONFIG_TRACE_IRQFLAGS
101 bl .trace_hardirqs_on 119 bl .trace_hardirqs_on
102 REST_GPR(0,r1) 120 REST_GPR(0,r1)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 37bc8ff16cac..84906d3fc860 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -517,7 +517,6 @@ struct task_struct *__switch_to(struct task_struct *prev,
517 517
518 account_system_vtime(current); 518 account_system_vtime(current);
519 account_process_vtime(current); 519 account_process_vtime(current);
520 calculate_steal_time();
521 520
522 /* 521 /*
523 * We can't take a PMU exception inside _switch() since there is a 522 * We can't take a PMU exception inside _switch() since there is a
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 9019f0f1bb5e..68034bbf2e4f 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -508,9 +508,6 @@ int __devinit start_secondary(void *unused)
508 if (smp_ops->take_timebase) 508 if (smp_ops->take_timebase)
509 smp_ops->take_timebase(); 509 smp_ops->take_timebase();
510 510
511 if (system_state > SYSTEM_BOOTING)
512 snapshot_timebase();
513
514 secondary_cpu_time_init(); 511 secondary_cpu_time_init();
515 512
516 ipi_call_lock(); 513 ipi_call_lock();
@@ -575,8 +572,6 @@ void __init smp_cpus_done(unsigned int max_cpus)
575 572
576 free_cpumask_var(old_mask); 573 free_cpumask_var(old_mask);
577 574
578 snapshot_timebases();
579
580 dump_numa_cpu_topology(); 575 dump_numa_cpu_topology();
581} 576}
582 577
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 8533b3b83f5d..fca20643c368 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -164,8 +164,6 @@ unsigned long ppc_proc_freq;
164EXPORT_SYMBOL(ppc_proc_freq); 164EXPORT_SYMBOL(ppc_proc_freq);
165unsigned long ppc_tb_freq; 165unsigned long ppc_tb_freq;
166 166
167static DEFINE_PER_CPU(u64, last_jiffy);
168
169#ifdef CONFIG_VIRT_CPU_ACCOUNTING 167#ifdef CONFIG_VIRT_CPU_ACCOUNTING
170/* 168/*
171 * Factors for converting from cputime_t (timebase ticks) to 169 * Factors for converting from cputime_t (timebase ticks) to
@@ -200,62 +198,151 @@ static void calc_cputime_factors(void)
200} 198}
201 199
202/* 200/*
203 * Read the PURR on systems that have it, otherwise the timebase. 201 * Read the SPURR on systems that have it, otherwise the PURR,
202 * or if that doesn't exist return the timebase value passed in.
204 */ 203 */
205static u64 read_purr(void) 204static u64 read_spurr(u64 tb)
206{ 205{
206 if (cpu_has_feature(CPU_FTR_SPURR))
207 return mfspr(SPRN_SPURR);
207 if (cpu_has_feature(CPU_FTR_PURR)) 208 if (cpu_has_feature(CPU_FTR_PURR))
208 return mfspr(SPRN_PURR); 209 return mfspr(SPRN_PURR);
209 return mftb(); 210 return tb;
210} 211}
211 212
213#ifdef CONFIG_PPC_SPLPAR
214
212/* 215/*
213 * Read the SPURR on systems that have it, otherwise the purr 216 * Scan the dispatch trace log and count up the stolen time.
217 * Should be called with interrupts disabled.
214 */ 218 */
215static u64 read_spurr(u64 purr) 219static u64 scan_dispatch_log(u64 stop_tb)
216{ 220{
217 /* 221 unsigned long i = local_paca->dtl_ridx;
218 * cpus without PURR won't have a SPURR 222 struct dtl_entry *dtl = local_paca->dtl_curr;
219 * We already know the former when we use this, so tell gcc 223 struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
220 */ 224 struct lppaca *vpa = local_paca->lppaca_ptr;
221 if (cpu_has_feature(CPU_FTR_PURR) && cpu_has_feature(CPU_FTR_SPURR)) 225 u64 tb_delta;
222 return mfspr(SPRN_SPURR); 226 u64 stolen = 0;
223 return purr; 227 u64 dtb;
228
229 if (i == vpa->dtl_idx)
230 return 0;
231 while (i < vpa->dtl_idx) {
232 dtb = dtl->timebase;
233 tb_delta = dtl->enqueue_to_dispatch_time +
234 dtl->ready_to_enqueue_time;
235 barrier();
236 if (i + N_DISPATCH_LOG < vpa->dtl_idx) {
237 /* buffer has overflowed */
238 i = vpa->dtl_idx - N_DISPATCH_LOG;
239 dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
240 continue;
241 }
242 if (dtb > stop_tb)
243 break;
244 stolen += tb_delta;
245 ++i;
246 ++dtl;
247 if (dtl == dtl_end)
248 dtl = local_paca->dispatch_log;
249 }
250 local_paca->dtl_ridx = i;
251 local_paca->dtl_curr = dtl;
252 return stolen;
224} 253}
225 254
226/* 255/*
256 * Accumulate stolen time by scanning the dispatch trace log.
257 * Called on entry from user mode.
258 */
259void accumulate_stolen_time(void)
260{
261 u64 sst, ust;
262
263 sst = scan_dispatch_log(get_paca()->starttime_user);
264 ust = scan_dispatch_log(get_paca()->starttime);
265 get_paca()->system_time -= sst;
266 get_paca()->user_time -= ust;
267 get_paca()->stolen_time += ust + sst;
268}
269
270static inline u64 calculate_stolen_time(u64 stop_tb)
271{
272 u64 stolen = 0;
273
274 if (get_paca()->dtl_ridx != get_paca()->lppaca_ptr->dtl_idx) {
275 stolen = scan_dispatch_log(stop_tb);
276 get_paca()->system_time -= stolen;
277 }
278
279 stolen += get_paca()->stolen_time;
280 get_paca()->stolen_time = 0;
281 return stolen;
282}
283
284#else /* CONFIG_PPC_SPLPAR */
285static inline u64 calculate_stolen_time(u64 stop_tb)
286{
287 return 0;
288}
289
290#endif /* CONFIG_PPC_SPLPAR */
291
292/*
227 * Account time for a transition between system, hard irq 293 * Account time for a transition between system, hard irq
228 * or soft irq state. 294 * or soft irq state.
229 */ 295 */
230void account_system_vtime(struct task_struct *tsk) 296void account_system_vtime(struct task_struct *tsk)
231{ 297{
232 u64 now, nowscaled, delta, deltascaled, sys_time; 298 u64 now, nowscaled, delta, deltascaled;
233 unsigned long flags; 299 unsigned long flags;
300 u64 stolen, udelta, sys_scaled, user_scaled;
234 301
235 local_irq_save(flags); 302 local_irq_save(flags);
236 now = read_purr(); 303 now = mftb();
237 nowscaled = read_spurr(now); 304 nowscaled = read_spurr(now);
238 delta = now - get_paca()->startpurr; 305 get_paca()->system_time += now - get_paca()->starttime;
306 get_paca()->starttime = now;
239 deltascaled = nowscaled - get_paca()->startspurr; 307 deltascaled = nowscaled - get_paca()->startspurr;
240 get_paca()->startpurr = now;
241 get_paca()->startspurr = nowscaled; 308 get_paca()->startspurr = nowscaled;
242 if (!in_interrupt()) { 309
243 /* deltascaled includes both user and system time. 310 stolen = calculate_stolen_time(now);
244 * Hence scale it based on the purr ratio to estimate 311
245 * the system time */ 312 delta = get_paca()->system_time;
246 sys_time = get_paca()->system_time; 313 get_paca()->system_time = 0;
247 if (get_paca()->user_time) 314 udelta = get_paca()->user_time - get_paca()->utime_sspurr;
248 deltascaled = deltascaled * sys_time / 315 get_paca()->utime_sspurr = get_paca()->user_time;
249 (sys_time + get_paca()->user_time); 316
250 delta += sys_time; 317 /*
251 get_paca()->system_time = 0; 318 * Because we don't read the SPURR on every kernel entry/exit,
319 * deltascaled includes both user and system SPURR ticks.
320 * Apportion these ticks to system SPURR ticks and user
321 * SPURR ticks in the same ratio as the system time (delta)
322 * and user time (udelta) values obtained from the timebase
323 * over the same interval. The system ticks get accounted here;
324 * the user ticks get saved up in paca->user_time_scaled to be
325 * used by account_process_tick.
326 */
327 sys_scaled = delta;
328 user_scaled = udelta;
329 if (deltascaled != delta + udelta) {
330 if (udelta) {
331 sys_scaled = deltascaled * delta / (delta + udelta);
332 user_scaled = deltascaled - sys_scaled;
333 } else {
334 sys_scaled = deltascaled;
335 }
336 }
337 get_paca()->user_time_scaled += user_scaled;
338
339 if (in_irq() || idle_task(smp_processor_id()) != tsk) {
340 account_system_time(tsk, 0, delta, sys_scaled);
341 if (stolen)
342 account_steal_time(stolen);
343 } else {
344 account_idle_time(delta + stolen);
252 } 345 }
253 if (in_irq() || idle_task(smp_processor_id()) != tsk)
254 account_system_time(tsk, 0, delta, deltascaled);
255 else
256 account_idle_time(delta);
257 __get_cpu_var(cputime_last_delta) = delta;
258 __get_cpu_var(cputime_scaled_last_delta) = deltascaled;
259 local_irq_restore(flags); 346 local_irq_restore(flags);
260} 347}
261EXPORT_SYMBOL_GPL(account_system_vtime); 348EXPORT_SYMBOL_GPL(account_system_vtime);
@@ -265,125 +352,26 @@ EXPORT_SYMBOL_GPL(account_system_vtime);
265 * by the exception entry and exit code to the generic process 352 * by the exception entry and exit code to the generic process
266 * user and system time records. 353 * user and system time records.
267 * Must be called with interrupts disabled. 354 * Must be called with interrupts disabled.
355 * Assumes that account_system_vtime() has been called recently
356 * (i.e. since the last entry from usermode) so that
357 * get_paca()->user_time_scaled is up to date.
268 */ 358 */
269void account_process_tick(struct task_struct *tsk, int user_tick) 359void account_process_tick(struct task_struct *tsk, int user_tick)
270{ 360{
271 cputime_t utime, utimescaled; 361 cputime_t utime, utimescaled;
272 362
273 utime = get_paca()->user_time; 363 utime = get_paca()->user_time;
364 utimescaled = get_paca()->user_time_scaled;
274 get_paca()->user_time = 0; 365 get_paca()->user_time = 0;
275 utimescaled = cputime_to_scaled(utime); 366 get_paca()->user_time_scaled = 0;
367 get_paca()->utime_sspurr = 0;
276 account_user_time(tsk, utime, utimescaled); 368 account_user_time(tsk, utime, utimescaled);
277} 369}
278 370
279/*
280 * Stuff for accounting stolen time.
281 */
282struct cpu_purr_data {
283 int initialized; /* thread is running */
284 u64 tb; /* last TB value read */
285 u64 purr; /* last PURR value read */
286 u64 spurr; /* last SPURR value read */
287};
288
289/*
290 * Each entry in the cpu_purr_data array is manipulated only by its
291 * "owner" cpu -- usually in the timer interrupt but also occasionally
292 * in process context for cpu online. As long as cpus do not touch
293 * each others' cpu_purr_data, disabling local interrupts is
294 * sufficient to serialize accesses.
295 */
296static DEFINE_PER_CPU(struct cpu_purr_data, cpu_purr_data);
297
298static void snapshot_tb_and_purr(void *data)
299{
300 unsigned long flags;
301 struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data);
302
303 local_irq_save(flags);
304 p->tb = get_tb_or_rtc();
305 p->purr = mfspr(SPRN_PURR);
306 wmb();
307 p->initialized = 1;
308 local_irq_restore(flags);
309}
310
311/*
312 * Called during boot when all cpus have come up.
313 */
314void snapshot_timebases(void)
315{
316 if (!cpu_has_feature(CPU_FTR_PURR))
317 return;
318 on_each_cpu(snapshot_tb_and_purr, NULL, 1);
319}
320
321/*
322 * Must be called with interrupts disabled.
323 */
324void calculate_steal_time(void)
325{
326 u64 tb, purr;
327 s64 stolen;
328 struct cpu_purr_data *pme;
329
330 pme = &__get_cpu_var(cpu_purr_data);
331 if (!pme->initialized)
332 return; /* !CPU_FTR_PURR or early in early boot */
333 tb = mftb();
334 purr = mfspr(SPRN_PURR);
335 stolen = (tb - pme->tb) - (purr - pme->purr);
336 if (stolen > 0) {
337 if (idle_task(smp_processor_id()) != current)
338 account_steal_time(stolen);
339 else
340 account_idle_time(stolen);
341 }
342 pme->tb = tb;
343 pme->purr = purr;
344}
345
346#ifdef CONFIG_PPC_SPLPAR
347/*
348 * Must be called before the cpu is added to the online map when
349 * a cpu is being brought up at runtime.
350 */
351static void snapshot_purr(void)
352{
353 struct cpu_purr_data *pme;
354 unsigned long flags;
355
356 if (!cpu_has_feature(CPU_FTR_PURR))
357 return;
358 local_irq_save(flags);
359 pme = &__get_cpu_var(cpu_purr_data);
360 pme->tb = mftb();
361 pme->purr = mfspr(SPRN_PURR);
362 pme->initialized = 1;
363 local_irq_restore(flags);
364}
365
366#endif /* CONFIG_PPC_SPLPAR */
367
368#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ 371#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
369#define calc_cputime_factors() 372#define calc_cputime_factors()
370#define calculate_steal_time() do { } while (0)
371#endif 373#endif
372 374
373#if !(defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR))
374#define snapshot_purr() do { } while (0)
375#endif
376
377/*
378 * Called when a cpu comes up after the system has finished booting,
379 * i.e. as a result of a hotplug cpu action.
380 */
381void snapshot_timebase(void)
382{
383 __get_cpu_var(last_jiffy) = get_tb_or_rtc();
384 snapshot_purr();
385}
386
387void __delay(unsigned long loops) 375void __delay(unsigned long loops)
388{ 376{
389 unsigned long start; 377 unsigned long start;
@@ -585,8 +573,6 @@ void timer_interrupt(struct pt_regs * regs)
585 old_regs = set_irq_regs(regs); 573 old_regs = set_irq_regs(regs);
586 irq_enter(); 574 irq_enter();
587 575
588 calculate_steal_time();
589
590 if (test_perf_event_pending()) { 576 if (test_perf_event_pending()) {
591 clear_perf_event_pending(); 577 clear_perf_event_pending();
592 perf_event_do_pending(); 578 perf_event_do_pending();
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index adfd5441b612..0357655db49d 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -27,27 +27,10 @@
27#include <asm/system.h> 27#include <asm/system.h>
28#include <asm/uaccess.h> 28#include <asm/uaccess.h>
29#include <asm/firmware.h> 29#include <asm/firmware.h>
30#include <asm/lppaca.h>
30 31
31#include "plpar_wrappers.h" 32#include "plpar_wrappers.h"
32 33
33/*
34 * Layout of entries in the hypervisor's DTL buffer. Although we don't
35 * actually access the internals of an entry (we only need to know the size),
36 * we might as well define it here for reference.
37 */
38struct dtl_entry {
39 u8 dispatch_reason;
40 u8 preempt_reason;
41 u16 processor_id;
42 u32 enqueue_to_dispatch_time;
43 u32 ready_to_enqueue_time;
44 u32 waiting_to_ready_time;
45 u64 timebase;
46 u64 fault_addr;
47 u64 srr0;
48 u64 srr1;
49};
50
51struct dtl { 34struct dtl {
52 struct dtl_entry *buf; 35 struct dtl_entry *buf;
53 struct dentry *file; 36 struct dentry *file;
@@ -237,6 +220,11 @@ static int dtl_init(void)
237 struct dentry *event_mask_file, *buf_entries_file; 220 struct dentry *event_mask_file, *buf_entries_file;
238 int rc, i; 221 int rc, i;
239 222
223#ifdef CONFIG_VIRT_CPU_ACCOUNTING
224 /* disable this for now */
225 return -ENODEV;
226#endif
227
240 if (!firmware_has_feature(FW_FEATURE_SPLPAR)) 228 if (!firmware_has_feature(FW_FEATURE_SPLPAR))
241 return -ENODEV; 229 return -ENODEV;
242 230
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index a17fe4a9059f..f129040d974c 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -248,6 +248,8 @@ void vpa_init(int cpu)
248 int hwcpu = get_hard_smp_processor_id(cpu); 248 int hwcpu = get_hard_smp_processor_id(cpu);
249 unsigned long addr; 249 unsigned long addr;
250 long ret; 250 long ret;
251 struct paca_struct *pp;
252 struct dtl_entry *dtl;
251 253
252 if (cpu_has_feature(CPU_FTR_ALTIVEC)) 254 if (cpu_has_feature(CPU_FTR_ALTIVEC))
253 lppaca_of(cpu).vmxregs_in_use = 1; 255 lppaca_of(cpu).vmxregs_in_use = 1;
@@ -274,6 +276,25 @@ void vpa_init(int cpu)
274 "registration for cpu %d (hw %d) of area %lx " 276 "registration for cpu %d (hw %d) of area %lx "
275 "returns %ld\n", cpu, hwcpu, addr, ret); 277 "returns %ld\n", cpu, hwcpu, addr, ret);
276 } 278 }
279
280 /*
281 * Register dispatch trace log, if one has been allocated.
282 */
283 pp = &paca[cpu];
284 dtl = pp->dispatch_log;
285 if (dtl) {
286 pp->dtl_ridx = 0;
287 pp->dtl_curr = dtl;
288 lppaca_of(cpu).dtl_idx = 0;
289
290 /* hypervisor reads buffer length from this field */
291 dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES;
292 ret = register_dtl(hwcpu, __pa(dtl));
293 if (ret)
294 pr_warn("DTL registration failed for cpu %d (%ld)\n",
295 cpu, ret);
296 lppaca_of(cpu).dtl_enable_mask = 2;
297 }
277} 298}
278 299
279static long pSeries_lpar_hpte_insert(unsigned long hpte_group, 300static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index a6d19e3a505e..d345bfd56bbe 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -273,6 +273,58 @@ static struct notifier_block pci_dn_reconfig_nb = {
273 .notifier_call = pci_dn_reconfig_notifier, 273 .notifier_call = pci_dn_reconfig_notifier,
274}; 274};
275 275
276#ifdef CONFIG_VIRT_CPU_ACCOUNTING
277/*
278 * Allocate space for the dispatch trace log for all possible cpus
279 * and register the buffers with the hypervisor. This is used for
280 * computing time stolen by the hypervisor.
281 */
282static int alloc_dispatch_logs(void)
283{
284 int cpu, ret;
285 struct paca_struct *pp;
286 struct dtl_entry *dtl;
287
288 if (!firmware_has_feature(FW_FEATURE_SPLPAR))
289 return 0;
290
291 for_each_possible_cpu(cpu) {
292 pp = &paca[cpu];
293 dtl = kmalloc_node(DISPATCH_LOG_BYTES, GFP_KERNEL,
294 cpu_to_node(cpu));
295 if (!dtl) {
296 pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
297 cpu);
298 pr_warn("Stolen time statistics will be unreliable\n");
299 break;
300 }
301
302 pp->dtl_ridx = 0;
303 pp->dispatch_log = dtl;
304 pp->dispatch_log_end = dtl + N_DISPATCH_LOG;
305 pp->dtl_curr = dtl;
306 }
307
308 /* Register the DTL for the current (boot) cpu */
309 dtl = get_paca()->dispatch_log;
310 get_paca()->dtl_ridx = 0;
311 get_paca()->dtl_curr = dtl;
312 get_paca()->lppaca_ptr->dtl_idx = 0;
313
314 /* hypervisor reads buffer length from this field */
315 dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES;
316 ret = register_dtl(hard_smp_processor_id(), __pa(dtl));
317 if (ret)
318 pr_warn("DTL registration failed for boot cpu %d (%d)\n",
319 smp_processor_id(), ret);
320 get_paca()->lppaca_ptr->dtl_enable_mask = 2;
321
322 return 0;
323}
324
325early_initcall(alloc_dispatch_logs);
326#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
327
276static void __init pSeries_setup_arch(void) 328static void __init pSeries_setup_arch(void)
277{ 329{
278 /* Discover PIC type and setup ppc_md accordingly */ 330 /* Discover PIC type and setup ppc_md accordingly */