aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2010-08-26 15:56:43 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2010-09-02 00:07:31 -0400
commitcf9efce0ce3136fa076f53e53154e98455229514 (patch)
tree0e110018b160aff4813b81e0e8c3a43a364edd48
parent93c22703efa72c7527dbd586d1951c1f4a85fd70 (diff)
powerpc: Account time using timebase rather than PURR
Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the PURR register for measuring the user and system time used by processes, as well as other related times such as hardirq and softirq times. This turns out to be quite confusing for users because it means that a program will often be measured as taking less time when run on a multi-threaded processor (SMT2 or SMT4 mode) than it does when run on a single-threaded processor (ST mode), even though the program takes longer to finish. The discrepancy is accounted for as stolen time, which is also confusing, particularly when there are no other partitions running. This changes the accounting to use the timebase instead, meaning that the reported user and system times are the actual number of real-time seconds that the program was executing on the processor thread, regardless of which SMT mode the processor is in. Thus a program will generally show greater user and system times when run on a multi-threaded processor than on a single-threaded processor. On pSeries systems on POWER5 or later processors, we measure the stolen time (time when this partition wasn't running) using the hypervisor dispatch trace log. We check for new entries in the log on every entry from user mode and on every transition from kernel process context to soft or hard IRQ context (i.e. when account_system_vtime() gets called). So that we can correctly distinguish time stolen from user time and time stolen from system time, without having to check the log on every exit to user mode, we store separate timestamps for exit to user mode and entry from user mode. On systems that have a SPURR (POWER6 and POWER7), we read the SPURR in account_system_vtime() (as before), and then apportion the SPURR ticks since the last time we read it between scaled user time and scaled system time according to the relative proportions of user time and system time over the same interval. This avoids having to read the SPURR on every kernel entry and exit. On systems that have PURR but not SPURR (i.e., POWER5), we do the same using the PURR rather than the SPURR. This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl for now since it conflicts with the use of the dispatch trace log by the time accounting code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/include/asm/exception-64s.h3
-rw-r--r--arch/powerpc/include/asm/lppaca.h19
-rw-r--r--arch/powerpc/include/asm/paca.h10
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h50
-rw-r--r--arch/powerpc/include/asm/time.h5
-rw-r--r--arch/powerpc/kernel/asm-offsets.c8
-rw-r--r--arch/powerpc/kernel/entry_64.S18
-rw-r--r--arch/powerpc/kernel/process.c1
-rw-r--r--arch/powerpc/kernel/smp.c5
-rw-r--r--arch/powerpc/kernel/time.c268
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c24
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c21
-rw-r--r--arch/powerpc/platforms/pseries/setup.c52
13 files changed, 290 insertions, 194 deletions
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 57c400071995..7778d6f0c878 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -137,7 +137,8 @@
137 li r10,0; \ 137 li r10,0; \
138 ld r11,exception_marker@toc(r2); \ 138 ld r11,exception_marker@toc(r2); \
139 std r10,RESULT(r1); /* clear regs->result */ \ 139 std r10,RESULT(r1); /* clear regs->result */ \
140 std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ 140 std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \
141 ACCOUNT_STOLEN_TIME
141 142
142/* 143/*
143 * Exception vectors. 144 * Exception vectors.
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 6d02624b622c..cfb85ec85750 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -172,6 +172,25 @@ struct slb_shadow {
172 172
173extern struct slb_shadow slb_shadow[]; 173extern struct slb_shadow slb_shadow[];
174 174
175/*
176 * Layout of entries in the hypervisor's dispatch trace log buffer.
177 */
178struct dtl_entry {
179 u8 dispatch_reason;
180 u8 preempt_reason;
181 u16 processor_id;
182 u32 enqueue_to_dispatch_time;
183 u32 ready_to_enqueue_time;
184 u32 waiting_to_ready_time;
185 u64 timebase;
186 u64 fault_addr;
187 u64 srr0;
188 u64 srr1;
189};
190
191#define DISPATCH_LOG_BYTES 4096 /* bytes per cpu */
192#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
193
175#endif /* CONFIG_PPC_BOOK3S */ 194#endif /* CONFIG_PPC_BOOK3S */
176#endif /* __KERNEL__ */ 195#endif /* __KERNEL__ */
177#endif /* _ASM_POWERPC_LPPACA_H */ 196#endif /* _ASM_POWERPC_LPPACA_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 1ff6662f7faf..6af6c1613409 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -85,6 +85,8 @@ struct paca_struct {
85 u8 kexec_state; /* set when kexec down has irqs off */ 85 u8 kexec_state; /* set when kexec down has irqs off */
86#ifdef CONFIG_PPC_STD_MMU_64 86#ifdef CONFIG_PPC_STD_MMU_64
87 struct slb_shadow *slb_shadow_ptr; 87 struct slb_shadow *slb_shadow_ptr;
88 struct dtl_entry *dispatch_log;
89 struct dtl_entry *dispatch_log_end;
88 90
89 /* 91 /*
90 * Now, starting in cacheline 2, the exception save areas 92 * Now, starting in cacheline 2, the exception save areas
@@ -134,8 +136,14 @@ struct paca_struct {
134 /* Stuff for accurate time accounting */ 136 /* Stuff for accurate time accounting */
135 u64 user_time; /* accumulated usermode TB ticks */ 137 u64 user_time; /* accumulated usermode TB ticks */
136 u64 system_time; /* accumulated system TB ticks */ 138 u64 system_time; /* accumulated system TB ticks */
137 u64 startpurr; /* PURR/TB value snapshot */ 139 u64 user_time_scaled; /* accumulated usermode SPURR ticks */
140 u64 starttime; /* TB value snapshot */
141 u64 starttime_user; /* TB value on exit to usermode */
138 u64 startspurr; /* SPURR value snapshot */ 142 u64 startspurr; /* SPURR value snapshot */
143 u64 utime_sspurr; /* ->user_time when ->startspurr set */
144 u64 stolen_time; /* TB ticks taken by hypervisor */
145 u64 dtl_ridx; /* read index in dispatch log */
146 struct dtl_entry *dtl_curr; /* pointer corresponding to dtl_ridx */
139 147
140#ifdef CONFIG_KVM_BOOK3S_HANDLER 148#ifdef CONFIG_KVM_BOOK3S_HANDLER
141 /* We use this to store guest state in */ 149 /* We use this to store guest state in */
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 498fe09263d3..98210067c1cc 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -9,6 +9,7 @@
9#include <asm/asm-compat.h> 9#include <asm/asm-compat.h>
10#include <asm/processor.h> 10#include <asm/processor.h>
11#include <asm/ppc-opcode.h> 11#include <asm/ppc-opcode.h>
12#include <asm/firmware.h>
12 13
13#ifndef __ASSEMBLY__ 14#ifndef __ASSEMBLY__
14#error __FILE__ should only be used in assembler files 15#error __FILE__ should only be used in assembler files
@@ -26,17 +27,13 @@
26#ifndef CONFIG_VIRT_CPU_ACCOUNTING 27#ifndef CONFIG_VIRT_CPU_ACCOUNTING
27#define ACCOUNT_CPU_USER_ENTRY(ra, rb) 28#define ACCOUNT_CPU_USER_ENTRY(ra, rb)
28#define ACCOUNT_CPU_USER_EXIT(ra, rb) 29#define ACCOUNT_CPU_USER_EXIT(ra, rb)
30#define ACCOUNT_STOLEN_TIME
29#else 31#else
30#define ACCOUNT_CPU_USER_ENTRY(ra, rb) \ 32#define ACCOUNT_CPU_USER_ENTRY(ra, rb) \
31 beq 2f; /* if from kernel mode */ \ 33 beq 2f; /* if from kernel mode */ \
32BEGIN_FTR_SECTION; \ 34 MFTB(ra); /* get timebase */ \
33 mfspr ra,SPRN_PURR; /* get processor util. reg */ \ 35 ld rb,PACA_STARTTIME_USER(r13); \
34END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ 36 std ra,PACA_STARTTIME(r13); \
35BEGIN_FTR_SECTION; \
36 MFTB(ra); /* or get TB if no PURR */ \
37END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \
38 ld rb,PACA_STARTPURR(r13); \
39 std ra,PACA_STARTPURR(r13); \
40 subf rb,rb,ra; /* subtract start value */ \ 37 subf rb,rb,ra; /* subtract start value */ \
41 ld ra,PACA_USER_TIME(r13); \ 38 ld ra,PACA_USER_TIME(r13); \
42 add ra,ra,rb; /* add on to user time */ \ 39 add ra,ra,rb; /* add on to user time */ \
@@ -44,19 +41,34 @@ END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \
442: 412:
45 42
46#define ACCOUNT_CPU_USER_EXIT(ra, rb) \ 43#define ACCOUNT_CPU_USER_EXIT(ra, rb) \
47BEGIN_FTR_SECTION; \ 44 MFTB(ra); /* get timebase */ \
48 mfspr ra,SPRN_PURR; /* get processor util. reg */ \ 45 ld rb,PACA_STARTTIME(r13); \
49END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ 46 std ra,PACA_STARTTIME_USER(r13); \
50BEGIN_FTR_SECTION; \
51 MFTB(ra); /* or get TB if no PURR */ \
52END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \
53 ld rb,PACA_STARTPURR(r13); \
54 std ra,PACA_STARTPURR(r13); \
55 subf rb,rb,ra; /* subtract start value */ \ 47 subf rb,rb,ra; /* subtract start value */ \
56 ld ra,PACA_SYSTEM_TIME(r13); \ 48 ld ra,PACA_SYSTEM_TIME(r13); \
57 add ra,ra,rb; /* add on to user time */ \ 49 add ra,ra,rb; /* add on to system time */ \
58 std ra,PACA_SYSTEM_TIME(r13); 50 std ra,PACA_SYSTEM_TIME(r13)
59#endif 51
52#ifdef CONFIG_PPC_SPLPAR
53#define ACCOUNT_STOLEN_TIME \
54BEGIN_FW_FTR_SECTION; \
55 beq 33f; \
56 /* from user - see if there are any DTL entries to process */ \
57 ld r10,PACALPPACAPTR(r13); /* get ptr to VPA */ \
58 ld r11,PACA_DTL_RIDX(r13); /* get log read index */ \
59 ld r10,LPPACA_DTLIDX(r10); /* get log write index */ \
60 cmpd cr1,r11,r10; \
61 beq+ cr1,33f; \
62 bl .accumulate_stolen_time; \
6333: \
64END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
65
66#else /* CONFIG_PPC_SPLPAR */
67#define ACCOUNT_STOLEN_TIME
68
69#endif /* CONFIG_PPC_SPLPAR */
70
71#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
60 72
61/* 73/*
62 * Macros for storing registers into and loading registers from 74 * Macros for storing registers into and loading registers from
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index dc779dfcf258..fe6f7c2c9c68 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -34,7 +34,6 @@ extern void to_tm(int tim, struct rtc_time * tm);
34extern void GregorianDay(struct rtc_time *tm); 34extern void GregorianDay(struct rtc_time *tm);
35 35
36extern void generic_calibrate_decr(void); 36extern void generic_calibrate_decr(void);
37extern void snapshot_timebase(void);
38 37
39extern void set_dec_cpu6(unsigned int val); 38extern void set_dec_cpu6(unsigned int val);
40 39
@@ -212,12 +211,8 @@ struct cpu_usage {
212DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array); 211DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array);
213 212
214#if defined(CONFIG_VIRT_CPU_ACCOUNTING) 213#if defined(CONFIG_VIRT_CPU_ACCOUNTING)
215extern void calculate_steal_time(void);
216extern void snapshot_timebases(void);
217#define account_process_vtime(tsk) account_process_tick(tsk, 0) 214#define account_process_vtime(tsk) account_process_tick(tsk, 0)
218#else 215#else
219#define calculate_steal_time() do { } while (0)
220#define snapshot_timebases() do { } while (0)
221#define account_process_vtime(tsk) do { } while (0) 216#define account_process_vtime(tsk) do { } while (0)
222#endif 217#endif
223 218
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 1c0607ddccc0..c63494090854 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -181,17 +181,19 @@ int main(void)
181 offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid)); 181 offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid));
182 DEFINE(SLBSHADOW_STACKESID, 182 DEFINE(SLBSHADOW_STACKESID,
183 offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid)); 183 offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid));
184 DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area));
184 DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0)); 185 DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
185 DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1)); 186 DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
186 DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int)); 187 DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int));
187 DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int)); 188 DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int));
188 DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area)); 189 DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx));
190 DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx));
189#endif /* CONFIG_PPC_STD_MMU_64 */ 191#endif /* CONFIG_PPC_STD_MMU_64 */
190 DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); 192 DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
191 DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); 193 DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
192 DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); 194 DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
193 DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr)); 195 DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
194 DEFINE(PACA_STARTSPURR, offsetof(struct paca_struct, startspurr)); 196 DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user));
195 DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); 197 DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
196 DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); 198 DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
197 DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); 199 DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 4d5fa12ca6e8..d82878c4daa6 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -97,6 +97,24 @@ system_call_common:
97 addi r9,r1,STACK_FRAME_OVERHEAD 97 addi r9,r1,STACK_FRAME_OVERHEAD
98 ld r11,exception_marker@toc(r2) 98 ld r11,exception_marker@toc(r2)
99 std r11,-16(r9) /* "regshere" marker */ 99 std r11,-16(r9) /* "regshere" marker */
100#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)
101BEGIN_FW_FTR_SECTION
102 beq 33f
103 /* if from user, see if there are any DTL entries to process */
104 ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */
105 ld r11,PACA_DTL_RIDX(r13) /* get log read index */
106 ld r10,LPPACA_DTLIDX(r10) /* get log write index */
107 cmpd cr1,r11,r10
108 beq+ cr1,33f
109 bl .accumulate_stolen_time
110 REST_GPR(0,r1)
111 REST_4GPRS(3,r1)
112 REST_2GPRS(7,r1)
113 addi r9,r1,STACK_FRAME_OVERHEAD
11433:
115END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
116#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */
117
100#ifdef CONFIG_TRACE_IRQFLAGS 118#ifdef CONFIG_TRACE_IRQFLAGS
101 bl .trace_hardirqs_on 119 bl .trace_hardirqs_on
102 REST_GPR(0,r1) 120 REST_GPR(0,r1)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 37bc8ff16cac..84906d3fc860 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -517,7 +517,6 @@ struct task_struct *__switch_to(struct task_struct *prev,
517 517
518 account_system_vtime(current); 518 account_system_vtime(current);
519 account_process_vtime(current); 519 account_process_vtime(current);
520 calculate_steal_time();
521 520
522 /* 521 /*
523 * We can't take a PMU exception inside _switch() since there is a 522 * We can't take a PMU exception inside _switch() since there is a
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 9019f0f1bb5e..68034bbf2e4f 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -508,9 +508,6 @@ int __devinit start_secondary(void *unused)
508 if (smp_ops->take_timebase) 508 if (smp_ops->take_timebase)
509 smp_ops->take_timebase(); 509 smp_ops->take_timebase();
510 510
511 if (system_state > SYSTEM_BOOTING)
512 snapshot_timebase();
513
514 secondary_cpu_time_init(); 511 secondary_cpu_time_init();
515 512
516 ipi_call_lock(); 513 ipi_call_lock();
@@ -575,8 +572,6 @@ void __init smp_cpus_done(unsigned int max_cpus)
575 572
576 free_cpumask_var(old_mask); 573 free_cpumask_var(old_mask);
577 574
578 snapshot_timebases();
579
580 dump_numa_cpu_topology(); 575 dump_numa_cpu_topology();
581} 576}
582 577
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 8533b3b83f5d..fca20643c368 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -164,8 +164,6 @@ unsigned long ppc_proc_freq;
164EXPORT_SYMBOL(ppc_proc_freq); 164EXPORT_SYMBOL(ppc_proc_freq);
165unsigned long ppc_tb_freq; 165unsigned long ppc_tb_freq;
166 166
167static DEFINE_PER_CPU(u64, last_jiffy);
168
169#ifdef CONFIG_VIRT_CPU_ACCOUNTING 167#ifdef CONFIG_VIRT_CPU_ACCOUNTING
170/* 168/*
171 * Factors for converting from cputime_t (timebase ticks) to 169 * Factors for converting from cputime_t (timebase ticks) to
@@ -200,62 +198,151 @@ static void calc_cputime_factors(void)
200} 198}
201 199
202/* 200/*
203 * Read the PURR on systems that have it, otherwise the timebase. 201 * Read the SPURR on systems that have it, otherwise the PURR,
202 * or if that doesn't exist return the timebase value passed in.
204 */ 203 */
205static u64 read_purr(void) 204static u64 read_spurr(u64 tb)
206{ 205{
206 if (cpu_has_feature(CPU_FTR_SPURR))
207 return mfspr(SPRN_SPURR);
207 if (cpu_has_feature(CPU_FTR_PURR)) 208 if (cpu_has_feature(CPU_FTR_PURR))
208 return mfspr(SPRN_PURR); 209 return mfspr(SPRN_PURR);
209 return mftb(); 210 return tb;
210} 211}
211 212
213#ifdef CONFIG_PPC_SPLPAR
214
212/* 215/*
213 * Read the SPURR on systems that have it, otherwise the purr 216 * Scan the dispatch trace log and count up the stolen time.
217 * Should be called with interrupts disabled.
214 */ 218 */
215static u64 read_spurr(u64 purr) 219static u64 scan_dispatch_log(u64 stop_tb)
216{ 220{
217 /* 221 unsigned long i = local_paca->dtl_ridx;
218 * cpus without PURR won't have a SPURR 222 struct dtl_entry *dtl = local_paca->dtl_curr;
219 * We already know the former when we use this, so tell gcc 223 struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
220 */ 224 struct lppaca *vpa = local_paca->lppaca_ptr;
221 if (cpu_has_feature(CPU_FTR_PURR) && cpu_has_feature(CPU_FTR_SPURR)) 225 u64 tb_delta;
222 return mfspr(SPRN_SPURR); 226 u64 stolen = 0;
223 return purr; 227 u64 dtb;
228
229 if (i == vpa->dtl_idx)
230 return 0;
231 while (i < vpa->dtl_idx) {
232 dtb = dtl->timebase;
233 tb_delta = dtl->enqueue_to_dispatch_time +
234 dtl->ready_to_enqueue_time;
235 barrier();
236 if (i + N_DISPATCH_LOG < vpa->dtl_idx) {
237 /* buffer has overflowed */
238 i = vpa->dtl_idx - N_DISPATCH_LOG;
239 dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
240 continue;
241 }
242 if (dtb > stop_tb)
243 break;
244 stolen += tb_delta;
245 ++i;
246 ++dtl;
247 if (dtl == dtl_end)
248 dtl = local_paca->dispatch_log;
249 }
250 local_paca->dtl_ridx = i;
251 local_paca->dtl_curr = dtl;
252 return stolen;
224} 253}
225 254
226/* 255/*
256 * Accumulate stolen time by scanning the dispatch trace log.
257 * Called on entry from user mode.
258 */
259void accumulate_stolen_time(void)
260{
261 u64 sst, ust;
262
263 sst = scan_dispatch_log(get_paca()->starttime_user);
264 ust = scan_dispatch_log(get_paca()->starttime);
265 get_paca()->system_time -= sst;
266 get_paca()->user_time -= ust;
267 get_paca()->stolen_time += ust + sst;
268}
269
270static inline u64 calculate_stolen_time(u64 stop_tb)
271{
272 u64 stolen = 0;
273
274 if (get_paca()->dtl_ridx != get_paca()->lppaca_ptr->dtl_idx) {
275 stolen = scan_dispatch_log(stop_tb);
276 get_paca()->system_time -= stolen;
277 }
278
279 stolen += get_paca()->stolen_time;
280 get_paca()->stolen_time = 0;
281 return stolen;
282}
283
284#else /* CONFIG_PPC_SPLPAR */
285static inline u64 calculate_stolen_time(u64 stop_tb)
286{
287 return 0;
288}
289
290#endif /* CONFIG_PPC_SPLPAR */
291
292/*
227 * Account time for a transition between system, hard irq 293 * Account time for a transition between system, hard irq
228 * or soft irq state. 294 * or soft irq state.
229 */ 295 */
230void account_system_vtime(struct task_struct *tsk) 296void account_system_vtime(struct task_struct *tsk)
231{ 297{
232 u64 now, nowscaled, delta, deltascaled, sys_time; 298 u64 now, nowscaled, delta, deltascaled;
233 unsigned long flags; 299 unsigned long flags;
300 u64 stolen, udelta, sys_scaled, user_scaled;
234 301
235 local_irq_save(flags); 302 local_irq_save(flags);
236 now = read_purr(); 303 now = mftb();
237 nowscaled = read_spurr(now); 304 nowscaled = read_spurr(now);
238 delta = now - get_paca()->startpurr; 305 get_paca()->system_time += now - get_paca()->starttime;
306 get_paca()->starttime = now;
239 deltascaled = nowscaled - get_paca()->startspurr; 307 deltascaled = nowscaled - get_paca()->startspurr;
240 get_paca()->startpurr = now;
241 get_paca()->startspurr = nowscaled; 308 get_paca()->startspurr = nowscaled;
242 if (!in_interrupt()) { 309
243 /* deltascaled includes both user and system time. 310 stolen = calculate_stolen_time(now);
244 * Hence scale it based on the purr ratio to estimate 311
245 * the system time */ 312 delta = get_paca()->system_time;
246 sys_time = get_paca()->system_time; 313 get_paca()->system_time = 0;
247 if (get_paca()->user_time) 314 udelta = get_paca()->user_time - get_paca()->utime_sspurr;
248 deltascaled = deltascaled * sys_time / 315 get_paca()->utime_sspurr = get_paca()->user_time;
249 (sys_time + get_paca()->user_time); 316
250 delta += sys_time; 317 /*
251 get_paca()->system_time = 0; 318 * Because we don't read the SPURR on every kernel entry/exit,
319 * deltascaled includes both user and system SPURR ticks.
320 * Apportion these ticks to system SPURR ticks and user
321 * SPURR ticks in the same ratio as the system time (delta)
322 * and user time (udelta) values obtained from the timebase
323 * over the same interval. The system ticks get accounted here;
324 * the user ticks get saved up in paca->user_time_scaled to be
325 * used by account_process_tick.
326 */
327 sys_scaled = delta;
328 user_scaled = udelta;
329 if (deltascaled != delta + udelta) {
330 if (udelta) {
331 sys_scaled = deltascaled * delta / (delta + udelta);
332 user_scaled = deltascaled - sys_scaled;
333 } else {
334 sys_scaled = deltascaled;
335 }
336 }
337 get_paca()->user_time_scaled += user_scaled;
338
339 if (in_irq() || idle_task(smp_processor_id()) != tsk) {
340 account_system_time(tsk, 0, delta, sys_scaled);
341 if (stolen)
342 account_steal_time(stolen);
343 } else {
344 account_idle_time(delta + stolen);
252 } 345 }
253 if (in_irq() || idle_task(smp_processor_id()) != tsk)
254 account_system_time(tsk, 0, delta, deltascaled);
255 else
256 account_idle_time(delta);
257 __get_cpu_var(cputime_last_delta) = delta;
258 __get_cpu_var(cputime_scaled_last_delta) = deltascaled;
259 local_irq_restore(flags); 346 local_irq_restore(flags);
260} 347}
261EXPORT_SYMBOL_GPL(account_system_vtime); 348EXPORT_SYMBOL_GPL(account_system_vtime);
@@ -265,125 +352,26 @@ EXPORT_SYMBOL_GPL(account_system_vtime);
265 * by the exception entry and exit code to the generic process 352 * by the exception entry and exit code to the generic process
266 * user and system time records. 353 * user and system time records.
267 * Must be called with interrupts disabled. 354 * Must be called with interrupts disabled.
355 * Assumes that account_system_vtime() has been called recently
356 * (i.e. since the last entry from usermode) so that
357 * get_paca()->user_time_scaled is up to date.
268 */ 358 */
269void account_process_tick(struct task_struct *tsk, int user_tick) 359void account_process_tick(struct task_struct *tsk, int user_tick)
270{ 360{
271 cputime_t utime, utimescaled; 361 cputime_t utime, utimescaled;
272 362
273 utime = get_paca()->user_time; 363 utime = get_paca()->user_time;
364 utimescaled = get_paca()->user_time_scaled;
274 get_paca()->user_time = 0; 365 get_paca()->user_time = 0;
275 utimescaled = cputime_to_scaled(utime); 366 get_paca()->user_time_scaled = 0;
367 get_paca()->utime_sspurr = 0;
276 account_user_time(tsk, utime, utimescaled); 368 account_user_time(tsk, utime, utimescaled);
277} 369}
278 370
279/*
280 * Stuff for accounting stolen time.
281 */
282struct cpu_purr_data {
283 int initialized; /* thread is running */
284 u64 tb; /* last TB value read */
285 u64 purr; /* last PURR value read */
286 u64 spurr; /* last SPURR value read */
287};
288
289/*
290 * Each entry in the cpu_purr_data array is manipulated only by its
291 * "owner" cpu -- usually in the timer interrupt but also occasionally
292 * in process context for cpu online. As long as cpus do not touch
293 * each others' cpu_purr_data, disabling local interrupts is
294 * sufficient to serialize accesses.
295 */
296static DEFINE_PER_CPU(struct cpu_purr_data, cpu_purr_data);
297
298static void snapshot_tb_and_purr(void *data)
299{
300 unsigned long flags;
301 struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data);
302
303 local_irq_save(flags);
304 p->tb = get_tb_or_rtc();
305 p->purr = mfspr(SPRN_PURR);
306 wmb();
307 p->initialized = 1;
308 local_irq_restore(flags);
309}
310
311/*
312 * Called during boot when all cpus have come up.
313 */
314void snapshot_timebases(void)
315{
316 if (!cpu_has_feature(CPU_FTR_PURR))
317 return;
318 on_each_cpu(snapshot_tb_and_purr, NULL, 1);
319}
320
321/*
322 * Must be called with interrupts disabled.
323 */
324void calculate_steal_time(void)
325{
326 u64 tb, purr;
327 s64 stolen;
328 struct cpu_purr_data *pme;
329
330 pme = &__get_cpu_var(cpu_purr_data);
331 if (!pme->initialized)
332 return; /* !CPU_FTR_PURR or early in early boot */
333 tb = mftb();
334 purr = mfspr(SPRN_PURR);
335 stolen = (tb - pme->tb) - (purr - pme->purr);
336 if (stolen > 0) {
337 if (idle_task(smp_processor_id()) != current)
338 account_steal_time(stolen);
339 else
340 account_idle_time(stolen);
341 }
342 pme->tb = tb;
343 pme->purr = purr;
344}
345
346#ifdef CONFIG_PPC_SPLPAR
347/*
348 * Must be called before the cpu is added to the online map when
349 * a cpu is being brought up at runtime.
350 */
351static void snapshot_purr(void)
352{
353 struct cpu_purr_data *pme;
354 unsigned long flags;
355
356 if (!cpu_has_feature(CPU_FTR_PURR))
357 return;
358 local_irq_save(flags);
359 pme = &__get_cpu_var(cpu_purr_data);
360 pme->tb = mftb();
361 pme->purr = mfspr(SPRN_PURR);
362 pme->initialized = 1;
363 local_irq_restore(flags);
364}
365
366#endif /* CONFIG_PPC_SPLPAR */
367
368#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ 371#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
369#define calc_cputime_factors() 372#define calc_cputime_factors()
370#define calculate_steal_time() do { } while (0)
371#endif 373#endif
372 374
373#if !(defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR))
374#define snapshot_purr() do { } while (0)
375#endif
376
377/*
378 * Called when a cpu comes up after the system has finished booting,
379 * i.e. as a result of a hotplug cpu action.
380 */
381void snapshot_timebase(void)
382{
383 __get_cpu_var(last_jiffy) = get_tb_or_rtc();
384 snapshot_purr();
385}
386
387void __delay(unsigned long loops) 375void __delay(unsigned long loops)
388{ 376{
389 unsigned long start; 377 unsigned long start;
@@ -585,8 +573,6 @@ void timer_interrupt(struct pt_regs * regs)
585 old_regs = set_irq_regs(regs); 573 old_regs = set_irq_regs(regs);
586 irq_enter(); 574 irq_enter();
587 575
588 calculate_steal_time();
589
590 if (test_perf_event_pending()) { 576 if (test_perf_event_pending()) {
591 clear_perf_event_pending(); 577 clear_perf_event_pending();
592 perf_event_do_pending(); 578 perf_event_do_pending();
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index adfd5441b612..0357655db49d 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -27,27 +27,10 @@
27#include <asm/system.h> 27#include <asm/system.h>
28#include <asm/uaccess.h> 28#include <asm/uaccess.h>
29#include <asm/firmware.h> 29#include <asm/firmware.h>
30#include <asm/lppaca.h>
30 31
31#include "plpar_wrappers.h" 32#include "plpar_wrappers.h"
32 33
33/*
34 * Layout of entries in the hypervisor's DTL buffer. Although we don't
35 * actually access the internals of an entry (we only need to know the size),
36 * we might as well define it here for reference.
37 */
38struct dtl_entry {
39 u8 dispatch_reason;
40 u8 preempt_reason;
41 u16 processor_id;
42 u32 enqueue_to_dispatch_time;
43 u32 ready_to_enqueue_time;
44 u32 waiting_to_ready_time;
45 u64 timebase;
46 u64 fault_addr;
47 u64 srr0;
48 u64 srr1;
49};
50
51struct dtl { 34struct dtl {
52 struct dtl_entry *buf; 35 struct dtl_entry *buf;
53 struct dentry *file; 36 struct dentry *file;
@@ -237,6 +220,11 @@ static int dtl_init(void)
237 struct dentry *event_mask_file, *buf_entries_file; 220 struct dentry *event_mask_file, *buf_entries_file;
238 int rc, i; 221 int rc, i;
239 222
223#ifdef CONFIG_VIRT_CPU_ACCOUNTING
224 /* disable this for now */
225 return -ENODEV;
226#endif
227
240 if (!firmware_has_feature(FW_FEATURE_SPLPAR)) 228 if (!firmware_has_feature(FW_FEATURE_SPLPAR))
241 return -ENODEV; 229 return -ENODEV;
242 230
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index a17fe4a9059f..f129040d974c 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -248,6 +248,8 @@ void vpa_init(int cpu)
248 int hwcpu = get_hard_smp_processor_id(cpu); 248 int hwcpu = get_hard_smp_processor_id(cpu);
249 unsigned long addr; 249 unsigned long addr;
250 long ret; 250 long ret;
251 struct paca_struct *pp;
252 struct dtl_entry *dtl;
251 253
252 if (cpu_has_feature(CPU_FTR_ALTIVEC)) 254 if (cpu_has_feature(CPU_FTR_ALTIVEC))
253 lppaca_of(cpu).vmxregs_in_use = 1; 255 lppaca_of(cpu).vmxregs_in_use = 1;
@@ -274,6 +276,25 @@ void vpa_init(int cpu)
274 "registration for cpu %d (hw %d) of area %lx " 276 "registration for cpu %d (hw %d) of area %lx "
275 "returns %ld\n", cpu, hwcpu, addr, ret); 277 "returns %ld\n", cpu, hwcpu, addr, ret);
276 } 278 }
279
280 /*
281 * Register dispatch trace log, if one has been allocated.
282 */
283 pp = &paca[cpu];
284 dtl = pp->dispatch_log;
285 if (dtl) {
286 pp->dtl_ridx = 0;
287 pp->dtl_curr = dtl;
288 lppaca_of(cpu).dtl_idx = 0;
289
290 /* hypervisor reads buffer length from this field */
291 dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES;
292 ret = register_dtl(hwcpu, __pa(dtl));
293 if (ret)
294 pr_warn("DTL registration failed for cpu %d (%ld)\n",
295 cpu, ret);
296 lppaca_of(cpu).dtl_enable_mask = 2;
297 }
277} 298}
278 299
279static long pSeries_lpar_hpte_insert(unsigned long hpte_group, 300static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index a6d19e3a505e..d345bfd56bbe 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -273,6 +273,58 @@ static struct notifier_block pci_dn_reconfig_nb = {
273 .notifier_call = pci_dn_reconfig_notifier, 273 .notifier_call = pci_dn_reconfig_notifier,
274}; 274};
275 275
276#ifdef CONFIG_VIRT_CPU_ACCOUNTING
277/*
278 * Allocate space for the dispatch trace log for all possible cpus
279 * and register the buffers with the hypervisor. This is used for
280 * computing time stolen by the hypervisor.
281 */
282static int alloc_dispatch_logs(void)
283{
284 int cpu, ret;
285 struct paca_struct *pp;
286 struct dtl_entry *dtl;
287
288 if (!firmware_has_feature(FW_FEATURE_SPLPAR))
289 return 0;
290
291 for_each_possible_cpu(cpu) {
292 pp = &paca[cpu];
293 dtl = kmalloc_node(DISPATCH_LOG_BYTES, GFP_KERNEL,
294 cpu_to_node(cpu));
295 if (!dtl) {
296 pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
297 cpu);
298 pr_warn("Stolen time statistics will be unreliable\n");
299 break;
300 }
301
302 pp->dtl_ridx = 0;
303 pp->dispatch_log = dtl;
304 pp->dispatch_log_end = dtl + N_DISPATCH_LOG;
305 pp->dtl_curr = dtl;
306 }
307
308 /* Register the DTL for the current (boot) cpu */
309 dtl = get_paca()->dispatch_log;
310 get_paca()->dtl_ridx = 0;
311 get_paca()->dtl_curr = dtl;
312 get_paca()->lppaca_ptr->dtl_idx = 0;
313
314 /* hypervisor reads buffer length from this field */
315 dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES;
316 ret = register_dtl(hard_smp_processor_id(), __pa(dtl));
317 if (ret)
318 pr_warn("DTL registration failed for boot cpu %d (%d)\n",
319 smp_processor_id(), ret);
320 get_paca()->lppaca_ptr->dtl_enable_mask = 2;
321
322 return 0;
323}
324
325early_initcall(alloc_dispatch_logs);
326#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
327
276static void __init pSeries_setup_arch(void) 328static void __init pSeries_setup_arch(void)
277{ 329{
278 /* Discover PIC type and setup ppc_md accordingly */ 330 /* Discover PIC type and setup ppc_md accordingly */