aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/pseries
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2010-08-26 15:56:43 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2010-09-02 00:07:31 -0400
commitcf9efce0ce3136fa076f53e53154e98455229514 (patch)
tree0e110018b160aff4813b81e0e8c3a43a364edd48 /arch/powerpc/platforms/pseries
parent93c22703efa72c7527dbd586d1951c1f4a85fd70 (diff)
powerpc: Account time using timebase rather than PURR
Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the PURR register for measuring the user and system time used by processes, as well as other related times such as hardirq and softirq times. This turns out to be quite confusing for users because it means that a program will often be measured as taking less time when run on a multi-threaded processor (SMT2 or SMT4 mode) than it does when run on a single-threaded processor (ST mode), even though the program takes longer to finish. The discrepancy is accounted for as stolen time, which is also confusing, particularly when there are no other partitions running. This changes the accounting to use the timebase instead, meaning that the reported user and system times are the actual number of real-time seconds that the program was executing on the processor thread, regardless of which SMT mode the processor is in. Thus a program will generally show greater user and system times when run on a multi-threaded processor than on a single-threaded processor. On pSeries systems on POWER5 or later processors, we measure the stolen time (time when this partition wasn't running) using the hypervisor dispatch trace log. We check for new entries in the log on every entry from user mode and on every transition from kernel process context to soft or hard IRQ context (i.e. when account_system_vtime() gets called). So that we can correctly distinguish time stolen from user time and time stolen from system time, without having to check the log on every exit to user mode, we store separate timestamps for exit to user mode and entry from user mode. On systems that have a SPURR (POWER6 and POWER7), we read the SPURR in account_system_vtime() (as before), and then apportion the SPURR ticks since the last time we read it between scaled user time and scaled system time according to the relative proportions of user time and system time over the same interval. This avoids having to read the SPURR on every kernel entry and exit. On systems that have PURR but not SPURR (i.e., POWER5), we do the same using the PURR rather than the SPURR. This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl for now since it conflicts with the use of the dispatch trace log by the time accounting code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/platforms/pseries')
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c24
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c21
-rw-r--r--arch/powerpc/platforms/pseries/setup.c52
3 files changed, 79 insertions, 18 deletions
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index adfd5441b61..0357655db49 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -27,27 +27,10 @@
27#include <asm/system.h> 27#include <asm/system.h>
28#include <asm/uaccess.h> 28#include <asm/uaccess.h>
29#include <asm/firmware.h> 29#include <asm/firmware.h>
30#include <asm/lppaca.h>
30 31
31#include "plpar_wrappers.h" 32#include "plpar_wrappers.h"
32 33
33/*
34 * Layout of entries in the hypervisor's DTL buffer. Although we don't
35 * actually access the internals of an entry (we only need to know the size),
36 * we might as well define it here for reference.
37 */
38struct dtl_entry {
39 u8 dispatch_reason;
40 u8 preempt_reason;
41 u16 processor_id;
42 u32 enqueue_to_dispatch_time;
43 u32 ready_to_enqueue_time;
44 u32 waiting_to_ready_time;
45 u64 timebase;
46 u64 fault_addr;
47 u64 srr0;
48 u64 srr1;
49};
50
51struct dtl { 34struct dtl {
52 struct dtl_entry *buf; 35 struct dtl_entry *buf;
53 struct dentry *file; 36 struct dentry *file;
@@ -237,6 +220,11 @@ static int dtl_init(void)
237 struct dentry *event_mask_file, *buf_entries_file; 220 struct dentry *event_mask_file, *buf_entries_file;
238 int rc, i; 221 int rc, i;
239 222
223#ifdef CONFIG_VIRT_CPU_ACCOUNTING
224 /* disable this for now */
225 return -ENODEV;
226#endif
227
240 if (!firmware_has_feature(FW_FEATURE_SPLPAR)) 228 if (!firmware_has_feature(FW_FEATURE_SPLPAR))
241 return -ENODEV; 229 return -ENODEV;
242 230
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index a17fe4a9059..f129040d974 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -248,6 +248,8 @@ void vpa_init(int cpu)
248 int hwcpu = get_hard_smp_processor_id(cpu); 248 int hwcpu = get_hard_smp_processor_id(cpu);
249 unsigned long addr; 249 unsigned long addr;
250 long ret; 250 long ret;
251 struct paca_struct *pp;
252 struct dtl_entry *dtl;
251 253
252 if (cpu_has_feature(CPU_FTR_ALTIVEC)) 254 if (cpu_has_feature(CPU_FTR_ALTIVEC))
253 lppaca_of(cpu).vmxregs_in_use = 1; 255 lppaca_of(cpu).vmxregs_in_use = 1;
@@ -274,6 +276,25 @@ void vpa_init(int cpu)
274 "registration for cpu %d (hw %d) of area %lx " 276 "registration for cpu %d (hw %d) of area %lx "
275 "returns %ld\n", cpu, hwcpu, addr, ret); 277 "returns %ld\n", cpu, hwcpu, addr, ret);
276 } 278 }
279
280 /*
281 * Register dispatch trace log, if one has been allocated.
282 */
283 pp = &paca[cpu];
284 dtl = pp->dispatch_log;
285 if (dtl) {
286 pp->dtl_ridx = 0;
287 pp->dtl_curr = dtl;
288 lppaca_of(cpu).dtl_idx = 0;
289
290 /* hypervisor reads buffer length from this field */
291 dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES;
292 ret = register_dtl(hwcpu, __pa(dtl));
293 if (ret)
294 pr_warn("DTL registration failed for cpu %d (%ld)\n",
295 cpu, ret);
296 lppaca_of(cpu).dtl_enable_mask = 2;
297 }
277} 298}
278 299
279static long pSeries_lpar_hpte_insert(unsigned long hpte_group, 300static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index a6d19e3a505..d345bfd56bb 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -273,6 +273,58 @@ static struct notifier_block pci_dn_reconfig_nb = {
273 .notifier_call = pci_dn_reconfig_notifier, 273 .notifier_call = pci_dn_reconfig_notifier,
274}; 274};
275 275
276#ifdef CONFIG_VIRT_CPU_ACCOUNTING
277/*
278 * Allocate space for the dispatch trace log for all possible cpus
279 * and register the buffers with the hypervisor. This is used for
280 * computing time stolen by the hypervisor.
281 */
282static int alloc_dispatch_logs(void)
283{
284 int cpu, ret;
285 struct paca_struct *pp;
286 struct dtl_entry *dtl;
287
288 if (!firmware_has_feature(FW_FEATURE_SPLPAR))
289 return 0;
290
291 for_each_possible_cpu(cpu) {
292 pp = &paca[cpu];
293 dtl = kmalloc_node(DISPATCH_LOG_BYTES, GFP_KERNEL,
294 cpu_to_node(cpu));
295 if (!dtl) {
296 pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
297 cpu);
298 pr_warn("Stolen time statistics will be unreliable\n");
299 break;
300 }
301
302 pp->dtl_ridx = 0;
303 pp->dispatch_log = dtl;
304 pp->dispatch_log_end = dtl + N_DISPATCH_LOG;
305 pp->dtl_curr = dtl;
306 }
307
308 /* Register the DTL for the current (boot) cpu */
309 dtl = get_paca()->dispatch_log;
310 get_paca()->dtl_ridx = 0;
311 get_paca()->dtl_curr = dtl;
312 get_paca()->lppaca_ptr->dtl_idx = 0;
313
314 /* hypervisor reads buffer length from this field */
315 dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES;
316 ret = register_dtl(hard_smp_processor_id(), __pa(dtl));
317 if (ret)
318 pr_warn("DTL registration failed for boot cpu %d (%d)\n",
319 smp_processor_id(), ret);
320 get_paca()->lppaca_ptr->dtl_enable_mask = 2;
321
322 return 0;
323}
324
325early_initcall(alloc_dispatch_logs);
326#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
327
276static void __init pSeries_setup_arch(void) 328static void __init pSeries_setup_arch(void)
277{ 329{
278 /* Discover PIC type and setup ppc_md accordingly */ 330 /* Discover PIC type and setup ppc_md accordingly */